diff --git a/.clang-format b/.clang-format
index 2e606ba4bb..1defc175de 100644
--- a/.clang-format
+++ b/.clang-format
@@ -2,7 +2,8 @@
 BasedOnStyle: Google
 
 IndentWidth: 2
-ContinuationIndentWidth: 2
+ColumnLimit: 80
+ContinuationIndentWidth: 4
 UseTab: Never
 MaxEmptyLinesToKeep: 2
 
@@ -34,4 +35,5 @@ BinPackArguments: true
 BinPackParameters: true
 ConstructorInitializerAllOnOneLineOrOnePerLine: false
 
-IndentCaseLabels: true
\ No newline at end of file
+IndentCaseLabels: true
+
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
new file mode 100644
index 0000000000..df06a0e5fb
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,24 @@
+---
+name: Bug report
+about: Create a report to help us improve
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+**Description**
+A clear and concise description of what the bug is.
+
+**Triton Information**
+What version of Triton are you using?
+
+Are you using the Triton container or did you build it yourself?
+
+**To Reproduce**
+Steps to reproduce the behavior.
+
+Describe the models (framework, inputs, outputs), ideally include the model configuration file (if using an ensemble include the model configuration file for that as well).
+
+**Expected behavior**
+A clear and concise description of what you expected to happen.
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
new file mode 100644
index 0000000000..bbcbbe7d61
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,20 @@
+---
+name: Feature request
+about: Suggest an idea for this project
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+**Is your feature request related to a problem? Please describe.**
+A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
+
+**Describe the solution you'd like**
+A clear and concise description of what you want to happen.
+
+**Describe alternatives you've considered**
+A clear and concise description of any alternative solutions or features you've considered.
+
+**Additional context**
+Add any other context or screenshots about the feature request here.
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
new file mode 100644
index 0000000000..745a33730b
--- /dev/null
+++ b/.github/workflows/codeql.yml
@@ -0,0 +1,84 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "CodeQL"
+
+on:
+  pull_request:
+
+jobs:
+  analyze:
+    name: Analyze
+    runs-on: ubuntu-latest
+    permissions:
+      actions: read
+      contents: read
+      security-events: write
+
+    strategy:
+      fail-fast: false
+      matrix:
+        language: [ 'python' ]
+        # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
+        # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v3
+
+    # Initializes the CodeQL tools for scanning.
+    - name: Initialize CodeQL
+      uses: github/codeql-action/init@v2
+      with:
+        languages: ${{ matrix.language }}
+        # If you wish to specify custom queries, you can do so here or in a config file.
+        # By default, queries listed here will override any specified in a config file.
+        # Prefix the list here with "+" to use these queries and those in the config file.
+
+        # Details on CodeQL's query packs refer to:
+        # https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
+        queries: +security-and-quality
+
+
+    # Autobuild attempts to build any compiled languages  (C/C++, C#, Go, or Java).
+    # If this step fails, then you should remove it and run the build manually (see below)
+    - name: Autobuild
+      uses: github/codeql-action/autobuild@v2
+
+    # Command-line programs to run using the OS shell.
+    # See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
+
+    #   If the Autobuild fails above, remove it and uncomment the following three lines.
+    #   modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
+
+    # - run: |
+    #   echo "Run, Build Application using script"
+    #   ./location_of_script_within_repo/buildscript.sh
+
+    - name: Perform CodeQL Analysis
+      uses: github/codeql-action/analyze@v2
+      with:
+        category: "/language:${{matrix.language}}"
diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml
new file mode 100644
index 0000000000..531cc2911b
--- /dev/null
+++ b/.github/workflows/pre-commit.yaml
@@ -0,0 +1,39 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: pre-commit
+
+on:
+  pull_request:
+
+jobs:
+  pre-commit:
+    runs-on: ubuntu-22.04
+    steps:
+    - uses: actions/checkout@v3
+    - uses: actions/setup-python@v3
+    - uses: pre-commit/action@v3.0.0
+
diff --git a/.gitignore b/.gitignore
index 4e1f8ef0cc..f1b69cb25e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,11 +1,8 @@
-/bazel-bin
-/bazel-ci_build-cache
-/bazel-genfiles
-/bazel-trtserver
-/bazel-out
-/bazel-serving
-/bazel-tensorflow
-/bazel-tensorflow_serving
-/bazel-testlogs
-/bazel-tf
-/bazel-workspace
+/build
+/builddir
+/.vscode
+*.so
+__pycache__
+tmp
+*.log
+test_results.txt
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000000..f44f815351
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,74 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+repos:
+- repo: https://github.com/timothycrosley/isort
+  rev: 5.12.0
+  hooks:
+  - id: isort
+    additional_dependencies: [toml]
+- repo: https://github.com/psf/black
+  rev: 23.1.0
+  hooks:
+  - id: black
+    types_or: [python, cython]
+- repo: https://github.com/PyCQA/flake8
+  rev: 5.0.4
+  hooks:
+  - id: flake8
+    args: [--max-line-length=88, --select=C,E,F,W,B,B950, --extend-ignore = E203,E501]
+    types_or: [python, cython]
+- repo: https://github.com/pre-commit/mirrors-clang-format
+  rev: v16.0.5
+  hooks:
+  - id: clang-format
+    types_or: [c, c++, cuda, proto, textproto, java]
+    args: ["-fallback-style=none", "-style=file", "-i"]
+- repo: https://github.com/codespell-project/codespell
+  rev: v2.2.4
+  hooks:
+  - id: codespell
+    additional_dependencies: [tomli]
+    args: ["--toml", "pyproject.toml"]
+    exclude: (?x)^(.*stemmer.*|.*stop_words.*|^CHANGELOG.md$)
+# More details about these pre-commit hooks here:
+# https://pre-commit.com/hooks.html
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v4.4.0
+  hooks:
+  - id: check-case-conflict
+  - id: check-executables-have-shebangs
+  - id: check-merge-conflict
+  - id: check-json
+  - id: check-toml
+  - id: check-yaml
+    exclude: ^deploy(\/[^\/]+)*\/templates\/.*$
+  - id: check-shebang-scripts-are-executable
+  - id: end-of-file-fixer
+    types_or: [c, c++, cuda, proto, textproto, java, python]
+  - id: mixed-line-ending
+  - id: requirements-txt-fixer
+  - id: trailing-whitespace
diff --git a/CITATION.cff b/CITATION.cff
new file mode 100644
index 0000000000..f8fb8d09fb
--- /dev/null
+++ b/CITATION.cff
@@ -0,0 +1,7 @@
+cff-version: 1.2.0
+message: "If you use this software, please cite it as below."
+title: "Triton Inference Server: An Optimized Cloud and Edge Inferencing Solution."
+url: https://github.com/triton-inference-server
+repository-code: https://github.com/triton-inference-server/server
+authors:
+  - name: "NVIDIA Corporation"
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000000..ff578c9724
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,273 @@
+# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+cmake_minimum_required(VERSION 3.18)
+
+project(tritonserver LANGUAGES C CXX)
+
+include(CMakeDependentOption)
+
+# Use C++17 standard as Triton's minimum required.
+set(TRITON_MIN_CXX_STANDARD 17 CACHE STRING "The minimum C++ standard which features are requested to build this target.")
+
+set(TRITON_VERSION "0.0.0" CACHE STRING "The version of the Triton shared library" )
+
+option(TRITON_ENABLE_LOGGING "Include logging support in server" ON)
+option(TRITON_ENABLE_STATS "Include statistics collections in server" ON)
+option(TRITON_ENABLE_TRACING "Include tracing support in server" OFF)
+option(TRITON_ENABLE_NVTX "Include NVTX support in server" OFF)
+option(TRITON_ENABLE_GPU "Enable GPU support in server" ON)
+option(TRITON_ENABLE_MALI_GPU "Enable Arm Mali GPU support in server" OFF)
+option(TRITON_IGPU_BUILD "Enable options for iGPU compilation in sever" OFF)
+set(TRITON_MIN_COMPUTE_CAPABILITY "6.0" CACHE STRING
+    "The minimum CUDA compute capability supported by Triton" )
+set(TRITON_EXTRA_LIB_PATHS "" CACHE PATH "Extra library paths for Triton Server build")
+
+# Ensemble
+option(TRITON_ENABLE_ENSEMBLE "Include ensemble support in server" OFF)
+
+# Endpoints
+option(TRITON_ENABLE_HTTP "Include HTTP API in server" ON)
+option(TRITON_ENABLE_GRPC "Include GRPC API in server" ON)
+option(TRITON_ENABLE_SAGEMAKER "Include AWS SageMaker API in server" OFF)
+option(TRITON_ENABLE_VERTEX_AI "Include Vertex AI API in server" OFF)
+
+# Metrics
+option(TRITON_ENABLE_METRICS "Include metrics support in server" ON)
+option(TRITON_ENABLE_METRICS_GPU "Include GPU metrics support in server" ON)
+option(TRITON_ENABLE_METRICS_CPU "Include CPU metrics support in server" ON)
+
+# Cloud storage
+option(TRITON_ENABLE_GCS "Include GCS Filesystem support in server" OFF)
+option(TRITON_ENABLE_S3 "Include S3 Filesystem support in server" OFF)
+option(TRITON_ENABLE_AZURE_STORAGE "Include Azure Storage Filesystem support in server" OFF)
+
+# Need to know if TensorRT is available when building unit tests
+option(TRITON_ENABLE_TENSORRT "Include TensorRT backend in server" OFF)
+
+# ASAN
+option(TRITON_ENABLE_ASAN "Build with address sanitizer" OFF)
+
+# Repo tags
+set(TRITON_REPO_ORGANIZATION "https://github.com/triton-inference-server" CACHE STRING "Git repository to pull from")
+set(TRITON_THIRD_PARTY_REPO_TAG "main" CACHE STRING
+    "Tag for triton-inference-server/third_party repo")
+set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/common repo")
+set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo")
+set(TRITON_BACKEND_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/backend repo")
+
+# Third-party location
+set(TRITON_THIRD_PARTY_INSTALL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/third-party" CACHE STRING "Location of third-party build")
+set(TRITON_THIRD_PARTY_SRC_INSTALL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/third-party-src" CACHE STRING "Location of third-party source")
+
+if(TRITON_ENABLE_METRICS AND NOT TRITON_ENABLE_STATS)
+  message(FATAL_ERROR "TRITON_ENABLE_METRICS=ON requires TRITON_ENABLE_STATS=ON")
+endif()
+
+if(TRITON_ENABLE_TRACING AND NOT TRITON_ENABLE_STATS)
+  message(FATAL_ERROR "TRITON_ENABLE_TRACING=ON requires TRITON_ENABLE_STATS=ON")
+endif()
+
+if (TRITON_ENABLE_METRICS_CPU AND NOT TRITON_ENABLE_METRICS)
+  message(FATAL_ERROR "TRITON_ENABLE_METRICS_CPU=ON requires TRITON_ENABLE_METRICS=ON")
+endif()
+
+if (TRITON_ENABLE_METRICS_GPU AND NOT TRITON_ENABLE_METRICS)
+  message(FATAL_ERROR "TRITON_ENABLE_METRICS_GPU=ON requires TRITON_ENABLE_METRICS=ON")
+endif()
+
+if (TRITON_ENABLE_METRICS_GPU AND NOT TRITON_ENABLE_GPU)
+  message(FATAL_ERROR "TRITON_ENABLE_METRICS_GPU=ON requires TRITON_ENABLE_GPU=ON")
+endif()
+
+if(TRITON_ENABLE_ASAN AND TRITON_ENABLE_GPU)
+  message(FATAL_ERROR "TRITON_ENABLE_ASAN=ON requires TRITON_ENABLE_GPU=OFF")
+endif()
+
+#
+# Dependencies
+#
+include(FetchContent)
+
+FetchContent_Declare(
+  repo-core
+  GIT_REPOSITORY ${TRITON_REPO_ORGANIZATION}/core.git
+  GIT_TAG ${TRITON_CORE_REPO_TAG}
+)
+FetchContent_Declare(
+  repo-third-party
+  GIT_REPOSITORY ${TRITON_REPO_ORGANIZATION}/third_party.git
+  GIT_TAG ${TRITON_THIRD_PARTY_REPO_TAG}
+)
+
+# Some libs are installed to ${TRITON_THIRD_PARTY_INSTALL_PREFIX}/{LIB}/lib64 instead
+# of ${TRITON_THIRD_PARTY_INSTALL_PREFIX}/{LIB}/lib on Centos
+set (LIB_DIR "lib")
+# /etc/os-release does not exist on Windows
+if(EXISTS "/etc/os-release")
+  file(STRINGS /etc/os-release DISTRO REGEX "^NAME=")
+  string(REGEX REPLACE "NAME=\"(.*)\"" "\\1" DISTRO "${DISTRO}")
+  message(STATUS "Distro Name: ${DISTRO}")
+  if(DISTRO MATCHES "CentOS.*")
+    set (LIB_DIR "lib64")
+  endif()
+endif()
+
+set(TRITON_CORE_HEADERS_ONLY OFF)
+
+FetchContent_MakeAvailable(repo-third-party repo-core)
+
+#
+# Triton server executable and examples
+#
+
+# Need to use ExternalProject for our builds so that we can get the
+# correct dependencies between Triton executable and the
+# ExternalProject dependencies (found in the third_party repo)
+include(ExternalProject)
+
+# If CMAKE_TOOLCHAIN_FILE is set, propagate that hint path to the external
+# projects.
+set(_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE "")
+if (CMAKE_TOOLCHAIN_FILE)
+  set(_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE "-DCMAKE_TOOLCHAIN_FILE:PATH=${CMAKE_TOOLCHAIN_FILE}")
+endif()
+
+# If VCPKG_TARGET_TRIPLET is set, propagate that hint path to the external
+# projects.
+set(_CMAKE_ARGS_VCPKG_TARGET_TRIPLET "")
+if (VCPKG_TARGET_TRIPLET)
+  set(_CMAKE_ARGS_VCPKG_TARGET_TRIPLET "-DVCPKG_TARGET_TRIPLET:STRING=${VCPKG_TARGET_TRIPLET}")
+endif()
+
+# If OPENSSL_ROOT_DIR is set, propagate that hint path to the external
+# projects with OpenSSL dependency.
+set(_CMAKE_ARGS_OPENSSL_ROOT_DIR "")
+if (OPENSSL_ROOT_DIR)
+  set(_CMAKE_ARGS_OPENSSL_ROOT_DIR "-DOPENSSL_ROOT_DIR:PATH=${OPENSSL_ROOT_DIR}")
+endif()
+
+# Location where protobuf-config.cmake will be installed varies by
+# platform
+if (WIN32)
+  set(_FINDPACKAGE_PROTOBUF_CONFIG_DIR "${TRITON_THIRD_PARTY_INSTALL_PREFIX}/protobuf/cmake")
+else()
+  set(_FINDPACKAGE_PROTOBUF_CONFIG_DIR "${TRITON_THIRD_PARTY_INSTALL_PREFIX}/protobuf/${LIB_DIR}/cmake/protobuf")
+endif()
+
+# Triton with Opentelemetry is not supported on Windows
+# FIXME: add location for Windows, when support is added
+# JIRA DLIS-4786
+if (WIN32)
+  set(_FINDPACKAGE_OPENTELEMETRY_CONFIG_DIR "")
+else()
+  set(_FINDPACKAGE_OPENTELEMETRY_CONFIG_DIR "${TRITON_THIRD_PARTY_INSTALL_PREFIX}/opentelemetry-cpp/${LIB_DIR}/cmake/opentelemetry-cpp")
+endif()
+
+if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
+  set(TRITON_INSTALL_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/install)
+else()
+  set(TRITON_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX})
+endif()
+
+set(TRITON_DEPENDS triton-core protobuf googletest re2)
+if(${TRITON_ENABLE_GCS})
+  set(TRITON_DEPENDS ${TRITON_DEPENDS} google-cloud-cpp)
+endif() # TRITON_ENABLE_GCS
+if(${TRITON_ENABLE_S3})
+  set(TRITON_DEPENDS ${TRITON_DEPENDS} aws-sdk-cpp)
+endif() # TRITON_ENABLE_S3
+if(${TRITON_ENABLE_HTTP} OR ${TRITON_ENABLE_METRICS} OR ${TRITON_ENABLE_SAGEMAKER} OR ${TRITON_ENABLE_VERTEX_AI})
+  set(TRITON_DEPENDS ${TRITON_DEPENDS} libevent libevhtp)
+endif() # TRITON_ENABLE_HTTP || TRITON_ENABLE_METRICS || TRITON_ENABLE_SAGEMAKER || TRITON_ENABLE_VERTEX_AI
+if(${TRITON_ENABLE_GRPC})
+  set(TRITON_DEPENDS ${TRITON_DEPENDS} grpc)
+endif() # TRITON_ENABLE_GRPC
+if(NOT WIN32 AND ${TRITON_ENABLE_TRACING})
+  set(TRITON_DEPENDS ${TRITON_DEPENDS} opentelemetry-cpp)
+endif() # TRITON_ENABLE_TRACING
+
+ExternalProject_Add(triton-server
+  PREFIX triton-server
+  SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src"
+  BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/triton-server"
+  CMAKE_CACHE_ARGS
+    -DProtobuf_DIR:PATH=${_FINDPACKAGE_PROTOBUF_CONFIG_DIR}
+    ${_CMAKE_ARGS_OPENSSL_ROOT_DIR}
+    ${_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE}
+    ${_CMAKE_ARGS_VCPKG_TARGET_TRIPLET}
+    -DGTEST_ROOT:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/googletest
+    -DgRPC_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/grpc/lib/cmake/grpc
+    -Dc-ares_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/c-ares/${LIB_DIR}/cmake/c-ares
+    -Dre2_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/re2/${LIB_DIR}/cmake/re2
+    -Dabsl_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/absl/${LIB_DIR}/cmake/absl
+    -DCURL_DIR:STRING=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/curl/${LIB_DIR}/cmake/CURL
+    -Dnlohmann_json_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/nlohmann_json/${LIB_DIR}/cmake/nlohmann_json
+    -DLibevent_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/libevent/lib/cmake/libevent
+    -Dlibevhtp_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/libevhtp/lib/cmake/libevhtp
+    -Dstorage_client_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/google-cloud-cpp/${LIB_DIR}/cmake/storage_client
+    -Dgoogle_cloud_cpp_common_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/google-cloud-cpp/${LIB_DIR}/cmake/google_cloud_cpp_common
+    -DCrc32c_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/crc32c/${LIB_DIR}/cmake/Crc32c
+    -DAWSSDK_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/${LIB_DIR}/cmake/AWSSDK
+    -Daws-cpp-sdk-core_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/${LIB_DIR}/cmake/aws-cpp-sdk-core
+    -Daws-cpp-sdk-s3_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/${LIB_DIR}/cmake/aws-cpp-sdk-s3
+    -Daws-c-event-stream_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/${LIB_DIR}/aws-c-event-stream/cmake
+    -Daws-c-common_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/${LIB_DIR}/aws-c-common/cmake
+    -Daws-checksums_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/${LIB_DIR}/aws-checksums/cmake
+    -Dopentelemetry-cpp_DIR:PATH=${_FINDPACKAGE_OPENTELEMETRY_CONFIG_DIR}
+    -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION}
+    -DTRITON_IGPU_BUILD:BOOL=${TRITON_IGPU_BUILD}
+    -DTRITON_THIRD_PARTY_REPO_TAG:STRING=${TRITON_THIRD_PARTY_REPO_TAG}
+    -DTRITON_COMMON_REPO_TAG:STRING=${TRITON_COMMON_REPO_TAG}
+    -DTRITON_CORE_REPO_TAG:STRING=${TRITON_CORE_REPO_TAG}
+    -DTRITON_BACKEND_REPO_TAG:STRING=${TRITON_BACKEND_REPO_TAG}
+    -DTRITON_EXTRA_LIB_PATHS:PATH=${TRITON_EXTRA_LIB_PATHS}
+    -DTRITON_ENABLE_ASAN:BOOL=${TRITON_ENABLE_ASAN}
+    -DTRITON_ENABLE_NVTX:BOOL=${TRITON_ENABLE_NVTX}
+    -DTRITON_ENABLE_TRACING:BOOL=${TRITON_ENABLE_TRACING}
+    -DTRITON_ENABLE_LOGGING:BOOL=${TRITON_ENABLE_LOGGING}
+    -DTRITON_ENABLE_STATS:BOOL=${TRITON_ENABLE_STATS}
+    -DTRITON_ENABLE_GPU:BOOL=${TRITON_ENABLE_GPU}
+    -DTRITON_ENABLE_MALI_GPU:BOOL=${TRITON_ENABLE_MALI_GPU}
+    -DTRITON_ENABLE_HTTP:BOOL=${TRITON_ENABLE_HTTP}
+    -DTRITON_ENABLE_SAGEMAKER:BOOL=${TRITON_ENABLE_SAGEMAKER}
+    -DTRITON_ENABLE_VERTEX_AI:BOOL=${TRITON_ENABLE_VERTEX_AI}
+    -DTRITON_ENABLE_GRPC:BOOL=${TRITON_ENABLE_GRPC}
+    -DTRITON_MIN_COMPUTE_CAPABILITY:STRING=${TRITON_MIN_COMPUTE_CAPABILITY}
+    -DTRITON_ENABLE_METRICS:BOOL=${TRITON_ENABLE_METRICS}
+    -DTRITON_ENABLE_METRICS_GPU:BOOL=${TRITON_ENABLE_METRICS_GPU}
+    -DTRITON_ENABLE_METRICS_CPU:BOOL=${TRITON_ENABLE_METRICS_CPU}
+    -DTRITON_ENABLE_GCS:BOOL=${TRITON_ENABLE_GCS}
+    -DTRITON_ENABLE_AZURE_STORAGE:BOOL=${TRITON_ENABLE_AZURE_STORAGE}
+    -DTRITON_ENABLE_S3:BOOL=${TRITON_ENABLE_S3}
+    -DTRITON_ENABLE_TENSORRT:BOOL=${TRITON_ENABLE_TENSORRT}
+    -DTRITON_ENABLE_ENSEMBLE:BOOL=${TRITON_ENABLE_ENSEMBLE}
+    -DTRITON_MIN_CXX_STANDARD:STRING=${TRITON_MIN_CXX_STANDARD}
+    -DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}
+    -DCMAKE_INSTALL_PREFIX:PATH=${TRITON_INSTALL_PREFIX}
+    -DTRITON_VERSION:STRING=${TRITON_VERSION}
+  DEPENDS ${TRITON_DEPENDS}
+)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 84be37f175..59e0ace975 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,5 +1,5 @@
 <!--
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+# Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -26,13 +26,43 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 -->
 
+# Contribution Guidelines
+
+Contributions that fix documentation errors or that make small changes
+to existing code can be contributed directly by following the rules
+below and submitting an appropriate PR.
+
+Contributions intended to add significant new functionality must
+follow a more collaborative path described in the following
+points. Before submitting a large PR that adds a major enhancement or
+extension, be sure to submit a GitHub issue that describes the
+proposed change so that the Triton team can provide feedback.
+
+- As part of the GitHub issue discussion, a design for your change
+  will be agreed upon. An up-front design discussion is required to
+  ensure that your enhancement is done in a manner that is consistent
+  with Triton's overall architecture.
+
+- The Triton project is spread across multiple repos. The Triton team
+  will provide guidance about how and where your enhancement should be
+  implemented.
+
+- [Testing](docs/customization_guide/test.md) is a critical part of any Triton
+  enhancement. You should plan on spending significant time on
+  creating tests for your change. The Triton team will help you to
+  design your testing so that it is compatible with existing testing
+  infrastructure.
+
+- If your enhancement provides a user visible feature then you need to
+  provide documentation.
+
 # Contribution Rules
 
-- The code style convention is enforced by clang-format. See the
-  Developer Guide for instructions on how to ensure your contributions
-  conform. In general please follow the existing conventions in the
-  relevant file, submodule, module, and project when you add new code
-  or when you extend/fix existing functionality.
+- The code style convention is enforced by clang-format. See below on
+  how to ensure your contributions conform. In general please follow
+  the existing conventions in the relevant file, submodule, module,
+  and project when you add new code or when you extend/fix existing
+  functionality.
 
 - Avoid introducing unnecessary complexity into existing code so that
   maintainability and readability are preserved.
@@ -54,10 +84,10 @@
 - Make sure all `L0_*` tests pass:
 
   - In the `qa/` directory, there are basic sanity tests scripted in
-    directories named `L0_...`.  See the Testing section in the
-    Developer Guide for instructions on running these tests.
+    directories named `L0_...`.  See the [Test](docs/customization_guide/test.md)
+    documentation for instructions on running these tests.
 
-- TensorRT Inference Server's default build assumes recent versions of
+- Triton Inference Server's default build assumes recent versions of
   dependencies (CUDA, TensorFlow, PyTorch, TensorRT,
   etc.). Contributions that add compatibility with older versions of
   those dependencies will be considered, but NVIDIA cannot guarantee
@@ -66,64 +96,32 @@
 
 - Make sure that you can contribute your work to open source (no
   license and/or patent conflict is introduced by your code). You need
-  to [`sign`](#Sign) your commit.
+  to complete the CLA described below before your PR can be merged.
 
 - Thanks in advance for your patience as we review your contributions;
   we do appreciate them!
 
-<a name="Sign"></a>Sign Your Work
---------------
-
-We require that all contributors "sign-off" on their commits. This
-certifies that the contribution is your original work, or you have
-rights to submit it under the same license, or a compatible license.
-
-Any contribution which contains commits that are not Signed-Off will
-not be accepted.
-
-To sign off on a commit you simply use the `--signoff` (or `-s`)
-option when committing your changes:
-
-    $ git commit -s -m "Add cool feature."
-
-This will append the following to your commit message:
-
-    Signed-off-by: Your Name <your@email.com>
-
-By doing this you certify the below:
-
-    Developer Certificate of Origin
-    Version 1.1
-
-    Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
-    1 Letterman Drive
-    Suite D4700
-    San Francisco, CA, 94129
-
-    Everyone is permitted to copy and distribute verbatim copies of
-    this license document, but changing it is not allowed.
-
-
-    Developer's Certificate of Origin 1.1
-
-    By making a contribution to this project, I certify that:
-
-    (a) The contribution was created in whole or in part by me and I
-    have the right to submit it under the open source license
-    indicated in the file; or
-
-    (b) The contribution is based upon previous work that, to the best
-    of my knowledge, is covered under an appropriate open source
-    license and I have the right under that license to submit that
-    work with modifications, whether created in whole or in part by
-    me, under the same open source license (unless I am permitted to
-    submit under a different license), as indicated in the file; or
-
-    (c) The contribution was provided directly to me by some other
-    person who certified (a), (b) or (c) and I have not modified it.
-
-    (d) I understand and agree that this project and the contribution
-    are public and that a record of the contribution (including all
-    personal information I submit with it, including my sign-off) is
-    maintained indefinitely and may be redistributed consistent with
-    this project or the open source license(s) involved.
+# Coding Convention
+
+All pull requests are checked against the
+[pre-commit hooks](https://github.com/pre-commit/pre-commit-hooks)
+located [in the repository's top-level .pre-commit-config.yaml](https://github.com/NVIDIA/triton-inference-server/blob/master/pre-commit-config.yaml).
+The hooks do some sanity checking like linting and formatting.
+These checks must pass to merge a change.
+
+To run these locally, you can
+[install pre-commit,](https://pre-commit.com/#install)
+then run `pre-commit install` inside the cloned repo. When you
+commit a change, the pre-commit hooks will run automatically.
+If a fix is implemented by a pre-commit hook, adding the file again
+and running `git commit` a second time will pass and successfully
+commit.
+
+# Contributor License Agreement (CLA)
+
+Triton requires that all contributors (or their corporate entity) send
+a signed copy of the [Contributor License
+Agreement](https://github.com/NVIDIA/triton-inference-server/blob/master/Triton-CCLA-v1.pdf)
+to triton-cla@nvidia.com.
+*NOTE*: Contributors with no company affiliation can fill `N/A` in the
+`Corporation Name` and `Corporation Address` fields.
diff --git a/Dockerfile b/Dockerfile
deleted file mode 100644
index ff42f64f4a..0000000000
--- a/Dockerfile
+++ /dev/null
@@ -1,269 +0,0 @@
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#
-# Multistage build.
-#
-
-ARG BASE_IMAGE=nvcr.io/nvidia/tensorrtserver:18.11-py3
-ARG PYTORCH_IMAGE=nvcr.io/nvidia/pytorch:18.11-py3
-ARG TENSORFLOW_IMAGE=nvcr.io/nvidia/tensorflow:18.11-py3
-
-############################################################################
-## Caffe2 stage: Use PyTorch container to get Caffe2 backend
-############################################################################
-FROM ${PYTORCH_IMAGE} AS trtserver_caffe2
-
-ARG BUILD_CLIENTS_ONLY=0
-
-# We cannot just pull libraries from the PyTorch container... we need
-# to:
-#   - copy over netdef_bundle_c2 interface so it can build with other
-#     C2 sources
-#   - need to patch to delegate logging to the inference server.
-
-# Copy netdef_bundle_c2 into Caffe2 core so it builds into the
-# libcaffe2 library. We want netdef_bundle_c2 to build against the
-# Caffe2 protobuf since it interfaces with that code.
-COPY src/servables/caffe2/netdef_bundle_c2.* \
-     /opt/pytorch/pytorch/caffe2/core/
-
-# Modify the C2 logging library to delegate logging to the trtserver
-# logger. Use a checksum to detect if the C2 logging file has
-# changed... if it has need to verify our patch is still valid and
-# update the patch/checksum as necessary.
-COPY tools/patch/caffe2 /tmp/patch/caffe2
-RUN sha1sum -c /tmp/patch/caffe2/checksums && \
-    patch -i /tmp/patch/caffe2/core/logging.cc \
-          /opt/pytorch/pytorch/caffe2/core/logging.cc && \
-    patch -i /tmp/patch/caffe2/core/logging_is_not_google_glog.h \
-          /opt/pytorch/pytorch/caffe2/core/logging_is_not_google_glog.h && \
-    patch -i /tmp/patch/caffe2/core/context_gpu.cu \
-          /opt/pytorch/pytorch/caffe2/core/context_gpu.cu
-
-# Build same as in pytorch container... except for the NO_DISTRIBUTED
-# line where we turn off features not needed for trtserver
-WORKDIR /opt/pytorch
-RUN pip uninstall -y torch
-RUN bash -c 'if [ "$BUILD_CLIENTS_ONLY" != "1" ]; then \
-               cd pytorch && \
-               TORCH_CUDA_ARCH_LIST="5.2 6.0 6.1 7.0 7.5+PTX" \
-                CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" \
-                NCCL_INCLUDE_DIR="/usr/include/" \
-                NCCL_LIB_DIR="/usr/lib/" \
-                NO_DISTRIBUTED=1 NO_TEST=1 NO_MIOPEN=1 USE_OPENCV=OFF USE_LEVELDB=OFF \
-                python setup.py install && python setup.py clean; \
-             else \
-               mkdir -p /opt/conda/lib/python3.6/site-packages/torch/lib; \
-               mkdir -p /opt/conda/lib; \
-               touch /opt/conda/lib/python3.6/site-packages/torch/lib/libcaffe2_detectron_ops_gpu.so; \
-               touch /opt/conda/lib/python3.6/site-packages/torch/lib/libcaffe2.so; \
-               touch /opt/conda/lib/python3.6/site-packages/torch/lib/libcaffe2_gpu.so; \
-               touch /opt/conda/lib/python3.6/site-packages/torch/lib/libc10.so; \
-               touch /opt/conda/lib/libmkl_avx2.so; \
-               touch /opt/conda/lib/libmkl_core.so; \
-               touch /opt/conda/lib/libmkl_def.so; \
-               touch /opt/conda/lib/libmkl_gnu_thread.so; \
-               touch /opt/conda/lib/libmkl_intel_lp64.so; fi'
-
-############################################################################
-## Build stage: Build inference server based on TensorFlow container
-############################################################################
-FROM ${TENSORFLOW_IMAGE} AS trtserver_build
-
-ARG TRTIS_VERSION=0.10.0dev
-ARG TRTIS_CONTAINER_VERSION=19.01dev
-ARG PYVER=3.5
-ARG BUILD_CLIENTS_ONLY=0
-
-# The TFServing release branch must match the TF release used by
-# TENSORFLOW_IMAGE
-ARG TFS_BRANCH=r1.12
-
-RUN apt-get update && \
-    apt-get install -y --no-install-recommends \
-            automake \
-            libcurl3-dev \
-            libopencv-dev \
-            libopencv-core-dev \
-            libtool
-
-RUN curl -O https://bootstrap.pypa.io/get-pip.py && \
-    python$PYVER get-pip.py && \
-    rm get-pip.py
-
-RUN pip install --upgrade setuptools
-
-# Caffe2 library requirements...
-COPY --from=trtserver_caffe2 \
-     /opt/conda/lib/python3.6/site-packages/torch/lib/libcaffe2_detectron_ops_gpu.so \
-     /opt/tensorrtserver/lib/
-COPY --from=trtserver_caffe2 \
-     /opt/conda/lib/python3.6/site-packages/torch/lib/libcaffe2.so \
-     /opt/tensorrtserver/lib/
-COPY --from=trtserver_caffe2 \
-     /opt/conda/lib/python3.6/site-packages/torch/lib/libcaffe2_gpu.so \
-     /opt/tensorrtserver/lib/
-COPY --from=trtserver_caffe2 \
-     /opt/conda/lib/python3.6/site-packages/torch/lib/libc10.so \
-     /opt/tensorrtserver/lib/
-COPY --from=trtserver_caffe2 /opt/conda/lib/libmkl_avx2.so /opt/tensorrtserver/lib/
-COPY --from=trtserver_caffe2 /opt/conda/lib/libmkl_core.so /opt/tensorrtserver/lib/
-COPY --from=trtserver_caffe2 /opt/conda/lib/libmkl_def.so /opt/tensorrtserver/lib/
-COPY --from=trtserver_caffe2 /opt/conda/lib/libmkl_gnu_thread.so /opt/tensorrtserver/lib/
-COPY --from=trtserver_caffe2 /opt/conda/lib/libmkl_intel_lp64.so /opt/tensorrtserver/lib/
-
-# Copy entire repo into container even though some is not needed for
-# build itself... because we want to be able to copyright check on
-# files that aren't directly needed for build.
-WORKDIR /workspace
-RUN rm -fr *
-COPY . .
-
-# Pull the TFS release that matches the version of TF being used.
-RUN git clone --single-branch -b ${TFS_BRANCH} https://github.com/tensorflow/serving.git
-
-# Modify the TF logging library to delegate logging to the trtserver
-# logger. Use a checksum to detect if the TF logging file has
-# changed... if it has need to verify our patch is still valid and
-# update the patch/checksum as necessary.
-RUN sha1sum -c tools/patch/tensorflow/checksums && \
-    patch -i tools/patch/tensorflow/cc/saved_model/loader.cc \
-          /opt/tensorflow/tensorflow/cc/saved_model/loader.cc && \
-    patch -i tools/patch/tensorflow/core/platform/default/logging.cc \
-          /opt/tensorflow/tensorflow/core/platform/default/logging.cc
-
-# TFS modifications. Use a checksum to detect if the TFS file has
-# changed... if it has need to verify our patch is still valid and
-# update the patch/checksum as necessary.
-RUN sha1sum -c tools/patch/tfs/checksums && \
-    patch -i tools/patch/tfs/model_servers/server_core.cc \
-          /workspace/serving/tensorflow_serving/model_servers/server_core.cc && \
-    patch -i tools/patch/tfs/sources/storage_path/file_system_storage_path_source.cc \
-          /workspace/serving/tensorflow_serving/sources/storage_path/file_system_storage_path_source.cc && \
-    patch -i tools/patch/tfs/sources/storage_path/file_system_storage_path_source.h \
-          /workspace/serving/tensorflow_serving/sources/storage_path/file_system_storage_path_source.h && \
-    patch -i tools/patch/tfs/sources/storage_path/file_system_storage_path_source.proto \
-          /workspace/serving/tensorflow_serving/sources/storage_path/file_system_storage_path_source.proto && \
-    patch -i tools/patch/tfs/util/retrier.cc \
-          /workspace/serving/tensorflow_serving/util/retrier.cc && \
-    patch -i tools/patch/tfs/util/BUILD \
-          /workspace/serving/tensorflow_serving/util/BUILD && \
-    patch -i tools/patch/tfs/util/net_http/server/internal/evhttp_request.cc \
-          /workspace/serving/tensorflow_serving/util/net_http/server/internal/evhttp_request.cc && \
-    patch -i tools/patch/tfs/util/net_http/server/internal/evhttp_request.h \
-          /workspace/serving/tensorflow_serving/util/net_http/server/internal/evhttp_request.h && \
-    patch -i tools/patch/tfs/util/net_http/server/public/BUILD \
-          /workspace/serving/tensorflow_serving/util/net_http/server/public/BUILD && \
-    patch -i tools/patch/tfs/util/net_http/server/public/server_request_interface.h \
-          /workspace/serving/tensorflow_serving/util/net_http/server/public/server_request_interface.h && \
-    patch -i tools/patch/tfs/workspace.bzl \
-          /workspace/serving/tensorflow_serving/workspace.bzl
-
-ENV TF_NEED_GCP 1
-ENV TF_NEED_S3 1
-
-# Build the server, clients and any testing artifacts
-RUN (cd /opt/tensorflow && ./nvbuild.sh --python$PYVER --configonly) && \
-    (cd tools && mv bazel.rc bazel.orig && \
-     cat bazel.orig /opt/tensorflow/.tf_configure.bazelrc > bazel.rc) && \
-    bash -c 'if [ "$BUILD_CLIENTS_ONLY" != "1" ]; then \
-               bazel build -c opt --config=cuda src/servers/trtserver src/clients/... src/test/...; \
-             else \
-               bazel build -c opt src/clients/...; \
-             fi' && \
-    (cd /opt/tensorrtserver && ln -s /workspace/qa qa) && \
-    mkdir -p /opt/tensorrtserver/bin && \
-    cp bazel-bin/src/clients/c++/image_client /opt/tensorrtserver/bin/. && \
-    cp bazel-bin/src/clients/c++/perf_client /opt/tensorrtserver/bin/. && \
-    cp bazel-bin/src/clients/c++/simple_client /opt/tensorrtserver/bin/. && \
-    mkdir -p /opt/tensorrtserver/lib && \
-    cp bazel-bin/src/clients/c++/librequest.so /opt/tensorrtserver/lib/. && \
-    cp bazel-bin/src/clients/c++/librequest.a /opt/tensorrtserver/lib/. && \
-    mkdir -p /opt/tensorrtserver/pip && \
-    bazel-bin/src/clients/python/build_pip /opt/tensorrtserver/pip/. && \
-    bash -c 'if [ "$BUILD_CLIENTS_ONLY" != "1" ]; then \
-               cp bazel-bin/src/servers/trtserver /opt/tensorrtserver/bin/.; \
-               cp bazel-bin/src/test/caffe2plan /opt/tensorrtserver/bin/.; \
-             fi' && \
-    bazel clean --expunge && \
-    rm -rf /root/.cache/bazel && \
-    rm -rf /tmp/*
-
-ENV TENSORRT_SERVER_VERSION ${TRTIS_VERSION}
-ENV NVIDIA_TENSORRT_SERVER_VERSION ${TRTIS_CONTAINER_VERSION}
-ENV PYVER ${PYVER}
-
-COPY nvidia_entrypoint.sh /opt/tensorrtserver
-ENTRYPOINT ["/opt/tensorrtserver/nvidia_entrypoint.sh"]
-
-############################################################################
-##  Production stage: Create container with just inference server executable
-############################################################################
-FROM ${BASE_IMAGE}
-
-ARG TRTIS_VERSION=0.10.0dev
-ARG TRTIS_CONTAINER_VERSION=19.01dev
-ARG PYVER=3.5
-
-ENV TENSORRT_SERVER_VERSION ${TRTIS_VERSION}
-ENV NVIDIA_TENSORRT_SERVER_VERSION ${TRTIS_CONTAINER_VERSION}
-LABEL com.nvidia.tensorrtserver.version="${TENSORRT_SERVER_VERSION}"
-
-ENV LD_LIBRARY_PATH /opt/tensorrtserver/lib:${LD_LIBRARY_PATH}
-ENV PATH /opt/tensorrtserver/bin:${PATH}
-ENV PYVER ${PYVER}
-
-ENV TF_ADJUST_HUE_FUSED         1
-ENV TF_ADJUST_SATURATION_FUSED  1
-ENV TF_ENABLE_WINOGRAD_NONFUSED 1
-ENV TF_AUTOTUNE_THRESHOLD       2
-
-# Create a user that can be used to run the tensorrt-server as
-# non-root. Make sure that this user to given ID 1000.
-ENV TENSORRT_SERVER_USER=tensorrt-server
-RUN id -u $TENSORRT_SERVER_USER > /dev/null 2>&1 || \
-    useradd $TENSORRT_SERVER_USER && \
-    [ `id -u $TENSORRT_SERVER_USER` -eq 1000 ] && \
-    [ `id -g $TENSORRT_SERVER_USER` -eq 1000 ]
-
-WORKDIR /opt/tensorrtserver
-RUN rm -fr /opt/tensorrtserver/*
-COPY LICENSE .
-COPY --from=trtserver_build /workspace/serving/LICENSE LICENSE.tfserving
-COPY --from=trtserver_build /opt/tensorflow/LICENSE LICENSE.tensorflow
-COPY --from=trtserver_caffe2 /opt/pytorch/pytorch/LICENSE LICENSE.pytorch
-COPY --from=trtserver_build /opt/tensorrtserver/bin/trtserver bin/
-COPY --from=trtserver_build /opt/tensorrtserver/lib lib
-
-COPY nvidia_entrypoint.sh /opt/tensorrtserver
-ENTRYPOINT ["/opt/tensorrtserver/nvidia_entrypoint.sh"]
-
-ARG NVIDIA_BUILD_ID
-ENV NVIDIA_BUILD_ID ${NVIDIA_BUILD_ID:-<unknown>}
-LABEL com.nvidia.build.id="${NVIDIA_BUILD_ID}"
-ARG NVIDIA_BUILD_REF
-LABEL com.nvidia.build.ref="${NVIDIA_BUILD_REF}"
diff --git a/Dockerfile.QA b/Dockerfile.QA
index bb607c3ac0..3e986a9400 100644
--- a/Dockerfile.QA
+++ b/Dockerfile.QA
@@ -1,4 +1,4 @@
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+# Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -24,72 +24,374 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-#
-# Multistage build.
-#
+ARG BASE_IMAGE=tritonserver
+ARG CIBASE_IMAGE=tritonserver_cibase
+ARG SDK_IMAGE=tritonserver_sdk
+ARG TRITON_REPO_ORGANIZATION=http://github.com/triton-inference-server
+ARG TRITON_COMMON_REPO_TAG=main
+ARG TRITON_CORE_REPO_TAG=main
+ARG TRITON_THIRD_PARTY_REPO_TAG=main
+ARG TRITON_BACKEND_REPO_TAG=main
+ARG TRITONTMP_DIR=/tmp
+ARG IGPU_BUILD=0
+
+############################################################################
+## Test artifacts built as part of the tritonserver build are
+## available in CIBASE_IMAGE. Copy these artifacts into the QA area.
+############################################################################
+FROM ${CIBASE_IMAGE} AS cibase
+
+ARG TRITONTMP_DIR
+ARG TRITON_REPO_ORGANIZATION
+ARG TRITON_COMMON_REPO_TAG
+ARG TRITON_CORE_REPO_TAG
+ARG TRITON_THIRD_PARTY_REPO_TAG
+ARG TRITON_BACKEND_REPO_TAG
+ARG IGPU_BUILD
+
+# Ensure apt-get won't prompt for selecting options
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+            build-essential \
+            libarchive-dev \
+            libboost-dev \
+            python3-dev \
+            python3-pip \
+            rapidjson-dev \
+            software-properties-common && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN pip3 install --upgrade pip && \
+    pip3 install --upgrade wheel setuptools
+
+RUN apt update -q=2 \
+    && apt install -y gpg wget \
+    && wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - |  tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null \
+    && . /etc/os-release \
+    && echo "deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ $UBUNTU_CODENAME main" | tee /etc/apt/sources.list.d/kitware.list >/dev/null \
+    && apt-get update -q=2 \
+    && apt-get install -y --no-install-recommends cmake=3.27.7* cmake-data=3.27.7*
+
+# Add inception_graphdef model to example repo
+WORKDIR /workspace/docs/examples/model_repository
+RUN mkdir -p inception_graphdef/1 && \
+    wget -O ${TRITONTMP_DIR}/inception_v3_2016_08_28_frozen.pb.tar.gz \
+        https://storage.googleapis.com/download.tensorflow.org/models/inception_v3_2016_08_28_frozen.pb.tar.gz && \
+    (cd ${TRITONTMP_DIR} && tar xzf inception_v3_2016_08_28_frozen.pb.tar.gz) && \
+    mv ${TRITONTMP_DIR}/inception_v3_2016_08_28_frozen.pb inception_graphdef/1/model.graphdef
+
+# Update the qa/ directory with test executables, models, etc.
+WORKDIR /workspace
+RUN mkdir -p qa/common && \
+    cp -r /workspace/src/test/models/repeat_int32 qa/L0_decoupled/models/ && \
+    cp -r /workspace/src/test/models/square_int32 qa/L0_decoupled/models/ && \
+    mkdir qa/L0_simple_example/models && \
+    cp -r docs/examples/model_repository/simple qa/L0_simple_example/models/. && \
+    mkdir qa/L0_simple_go_client/models && \
+    cp -r docs/examples/model_repository/simple qa/L0_simple_go_client/models/. && \
+    mkdir qa/L0_backend_release/simple_models && \
+    cp -r docs/examples/model_repository/simple qa/L0_backend_release/simple_models/. && \
+    mkdir qa/L0_simple_nodejs_client/models && \
+    cp -r docs/examples/model_repository/simple qa/L0_simple_nodejs_client/models/. && \
+    mkdir qa/L0_backend_release/simple_seq_models && \
+    cp -r /workspace/docs/examples/model_repository/simple_sequence qa/L0_backend_release/simple_seq_models/. && \
+    mkdir qa/L0_shared_memory/models && \
+    cp -r docs/examples/model_repository/simple qa/L0_shared_memory/models/. && \
+    mkdir qa/L0_cuda_shared_memory/models && \
+    cp -r docs/examples/model_repository/simple qa/L0_cuda_shared_memory/models/. && \
+    mkdir qa/L0_client_java/models && \
+    cp -r docs/examples/model_repository/simple qa/L0_client_java/models && \
+    mkdir qa/L0_grpc/models && \
+    cp -r docs/examples/model_repository/simple qa/L0_grpc/models && \
+    cp -r docs/examples/model_repository/simple_dyna_sequence qa/L0_grpc/models && \
+    cp -r docs/examples/model_repository/simple_int8 qa/L0_grpc/models && \
+    cp -r docs/examples/model_repository/simple_identity qa/L0_grpc/models && \
+    cp -r docs/examples/model_repository/simple_sequence qa/L0_grpc/models && \
+    cp -r docs/examples/model_repository/simple_string qa/L0_grpc/models && \
+    cp -r docs/examples/model_repository/inception_graphdef qa/L0_grpc/models && \
+    mkdir qa/L0_grpc_state_cleanup/models && \
+    cp -r /workspace/src/test/models/repeat_int32 qa/L0_grpc_state_cleanup/models/ && \
+    mkdir qa/L0_http/models && \
+    cp -r docs/examples/model_repository/simple qa/L0_http/models && \
+    cp -r docs/examples/model_repository/simple_dyna_sequence qa/L0_http/models && \
+    cp -r docs/examples/model_repository/simple_identity qa/L0_http/models && \
+    cp -r docs/examples/model_repository/simple_sequence qa/L0_http/models && \
+    cp -r docs/examples/model_repository/simple_string qa/L0_http/models && \
+    cp -r docs/examples/model_repository/inception_graphdef qa/L0_http/models && \
+    mkdir qa/L0_https/models && \
+    cp -r docs/examples/model_repository/simple qa/L0_https/models/. && \
+    mkdir qa/L0_secure_grpc/models && \
+    cp -r docs/examples/model_repository/simple qa/L0_secure_grpc/models/. && \
+    cp bin/simple qa/L0_simple_lib/. && \
+    cp bin/memory_alloc qa/L0_io/. && \
+    cp bin/multi_server qa/L0_multi_server/. && \
+    cp bin/memory_test qa/L0_memory/. && \
+    cp bin/pinned_memory_manager_test qa/L0_memory/. && \
+    cp bin/repo_agent_test qa/L0_triton_repo_agent/. && \
+    cp lib/libtritonrepoagent_relocation.so qa/L0_triton_repo_agent/. && \
+    mkdir qa/L0_query/models/query/1 && \
+    cp tritonbuild/tritonserver/backends/query/libtriton_query.so qa/L0_query/models/query/1/. && \
+    cp bin/query_test qa/L0_query/. && \
+    mkdir qa/L0_iterative_sequence/models/iterative_sequence/1 && \
+    cp tritonbuild/tritonserver/backends/iterative_sequence/libtriton_iterative_sequence.so qa/L0_iterative_sequence/models/iterative_sequence/1/. && \
+    cp bin/register_api_test qa/L0_register/. && \
+    cp bin/async_work_queue_test qa/L0_async_work_queue/. && \
+    cp tritonbuild/tritonserver/backends/implicit_state/libtriton_implicit_state.so \
+       qa/L0_implicit_state/. && \
+    mkdir qa/L0_data_compression/models && \
+    cp -r docs/examples/model_repository/simple qa/L0_data_compression/models && \
+    cp bin/data_compressor_test qa/L0_data_compression/. && \
+    cp bin/metrics_api_test qa/L0_metrics/. && \
+    cp bin/response_cache_test qa/L0_response_cache/. && \
+    cp bin/request_cancellation_test qa/L0_request_cancellation/. && \
+    cp bin/triton_json_test qa/L0_json/. && \
+    cp bin/backend_output_detail_test qa/L0_backend_output_detail/. && \
+    cp -r deploy/mlflow-triton-plugin qa/L0_mlflow/.
+
+RUN mkdir -p qa/pkgs && \
+    cp python/triton*.whl qa/pkgs/. && \
+    cp -rf python/test/. qa/L0_python_api/.
+
+# caffe2plan will not exist if the build was done without TensorRT enabled
+RUN if [ -f bin/caffe2plan ]; then \
+       cp bin/caffe2plan qa/common/.; \
+    fi
 
-ARG BASE_IMAGE=tensorrtserver
-ARG BUILD_IMAGE=tensorrtserver_build
+RUN mkdir -p qa/L0_simple_ensemble/models/simple/1 && \
+    cp docs/examples/model_repository/simple/1/model.graphdef \
+        qa/L0_simple_ensemble/models/simple/1/. && \
+    mkdir -p qa/L0_simple_ensemble/models/simple/2 && \
+    cp docs/examples/model_repository/simple/1/model.graphdef \
+        qa/L0_simple_ensemble/models/simple/2/. && \
+    mkdir -p qa/L0_socket/models/simple/1 && \
+    cp docs/examples/model_repository/simple/1/model.graphdef \
+        qa/L0_socket/models/simple/1/.
+
+RUN mkdir -p qa/L0_backend_identity/models && \
+    cp -r src/test/models/identity_fp32 qa/L0_backend_identity/models/. && \
+    mkdir -p qa/L0_backend_identity/models/identity_fp32/1
+
+RUN mkdir -p qa/custom_models/custom_sequence_int32/1 && \
+    cp tritonbuild/tritonserver/backends/sequence/libtriton_sequence.so \
+        qa/custom_models/custom_sequence_int32/1/. && \
+    mkdir -p qa/custom_models/custom_dyna_sequence_int32/1 && \
+    cp tritonbuild/tritonserver/backends/dyna_sequence/libtriton_dyna_sequence.so \
+        qa/custom_models/custom_dyna_sequence_int32/1/.
+
+# L0_lifecycle needs No-GPU build of identity backend.
+RUN cd tritonbuild/identity && \
+    rm -rf install build && mkdir build && cd build && \
+    cmake -DTRITON_ENABLE_GPU=OFF \
+        -DCMAKE_INSTALL_PREFIX:PATH=/workspace/tritonbuild/identity/install \
+        -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} \
+        -DTRITON_COMMON_REPO_TAG:STRING=${TRITON_COMMON_REPO_TAG} \
+        -DTRITON_CORE_REPO_TAG:STRING=${TRITON_CORE_REPO_TAG} \
+        -DTRITON_THIRD_PARTY_REPO_TAG:STRING=${TRITON_THIRD_PARTY_REPO_TAG} \
+        -DTRITON_BACKEND_REPO_TAG:STRING=${TRITON_BACKEND_REPO_TAG} .. && \
+    make -j16 install
+
+# L0_backend_python test require triton_shm_monitor
+RUN cd tritonbuild/python && \
+    rm -rf install build && mkdir build && cd build && \
+    cmake -DCMAKE_INSTALL_PREFIX:PATH=/workspace/tritonbuild/python/install \
+        -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} \
+        -DTRITON_COMMON_REPO_TAG:STRING=${TRITON_COMMON_REPO_TAG} \
+        -DTRITON_CORE_REPO_TAG:STRING=${TRITON_CORE_REPO_TAG} \
+        -DTRITON_BACKEND_REPO_TAG:STRING=${TRITON_BACKEND_REPO_TAG} .. && \
+    make -j16 triton-shm-monitor install
+
+RUN cp tritonbuild/identity/install/backends/identity/libtriton_identity.so \
+        qa/L0_lifecycle/. && \
+    cp tritonbuild/python/install/backends/python/triton_shm_monitor*.so \
+        qa/common/. && \
+    mkdir -p qa/L0_perf_nomodel/custom_models/custom_zero_1_float32/1 && \
+    mkdir -p qa/L0_perf_pyclients/custom_models/custom_zero_1_int32/1 && \
+    mkdir -p qa/L0_infer_shm && \
+    cp -r qa/L0_infer/. qa/L0_infer_shm && \
+    mkdir -p qa/L0_infer_cudashm && \
+    cp -r qa/L0_infer/. qa/L0_infer_cudashm && \
+    mkdir -p qa/L0_infer_valgrind && \
+    cp -r qa/L0_infer/. qa/L0_infer_valgrind && \
+    mkdir -p qa/L0_trt_shape_tensors_shm && \
+    cp -r qa/L0_trt_shape_tensors/. qa/L0_trt_shape_tensors_shm && \
+    mkdir -p qa/L0_trt_shape_tensors_cudashm && \
+    cp -r qa/L0_trt_shape_tensors/. qa/L0_trt_shape_tensors_cudashm && \
+    mkdir -p qa/L0_batcher_shm && \
+    cp -r qa/L0_batcher/. qa/L0_batcher_shm && \
+    mkdir -p qa/L0_batcher_cudashm && \
+    cp -r qa/L0_batcher/. qa/L0_batcher_cudashm && \
+    mkdir -p qa/L0_batcher_valgrind && \
+    cp -r qa/L0_batcher/. qa/L0_batcher_valgrind && \
+    mkdir -p qa/L0_sequence_batcher_shm && \
+    cp -r qa/L0_sequence_batcher/. qa/L0_sequence_batcher_shm && \
+    mkdir -p qa/L0_sequence_batcher_cudashm && \
+    cp -r qa/L0_sequence_batcher/. qa/L0_sequence_batcher_cudashm && \
+    mkdir -p qa/L0_sequence_batcher_valgrind && \
+    cp -r qa/L0_sequence_batcher/. qa/L0_sequence_batcher_valgrind && \
+    mkdir -p qa/L0_perf_nomodel_shm && \
+    cp -r qa/L0_perf_nomodel/. qa/L0_perf_nomodel_shm && \
+    mkdir -p qa/L0_perf_nomodel_cudashm && \
+    cp -r qa/L0_perf_nomodel/. qa/L0_perf_nomodel_cudashm
+
+# L0_model_control_stress will not be present if gitlab tests are not available
+RUN if [ -d qa/L0_model_control_stress ]; then \
+        mkdir -p qa/L0_model_control_stress_valgrind && \
+            cp -r qa/L0_model_control_stress/. qa/L0_model_control_stress_valgrind && \
+            mkdir -p qa/L0_model_control_stress_valgrind_massif && \
+            cp -r qa/L0_model_control_stress/. qa/L0_model_control_stress_valgrind_massif; \
+    fi
+
+RUN mkdir -p qa/L0_decoupled/models/repeat_int32/1 && \
+    mkdir -p qa/L0_decoupled/models/square_int32/1 && \
+    mkdir -p qa/L0_decoupled/models/identity_int32/1 && \
+    mkdir -p qa/L0_decoupled/models/simple_repeat/1 && \
+    mkdir -p qa/L0_decoupled/models/fan_repeat/1 && \
+    mkdir -p qa/L0_decoupled/models/sequence_repeat/1 && \
+    mkdir -p qa/L0_decoupled/models/repeat_square/1 && \
+    mkdir -p qa/L0_decoupled/models/nested_square/1 && \
+    mkdir -p qa/L0_grpc_state_cleanup/models/repeat_int32/1
+
+RUN if [ "$IGPU_BUILD" == "0" ]; then \
+        cp backends/repeat/libtriton_repeat.so qa/L0_model_config && \
+        cp backends/repeat/libtriton_repeat.so qa/L0_decoupled/models/repeat_int32/1 && \
+        cp backends/repeat/libtriton_repeat.so qa/L0_grpc_state_cleanup/models/repeat_int32/1/. && \
+        cp backends/square/libtriton_square.so qa/L0_decoupled/models/square_int32/1; \
+    fi
+
+RUN cp -r qa/L0_decoupled/models qa/L0_decoupled/python_models/ && \
+    cp /workspace/tritonbuild/python/examples/decoupled/repeat_model.py \
+        qa/L0_decoupled/python_models/repeat_int32/1/. && \
+    cp /workspace/tritonbuild/python/examples/decoupled/repeat_config.pbtxt \
+        qa/L0_decoupled/python_models/repeat_int32/. && \
+    cp /workspace/tritonbuild/python/examples/decoupled/square_model.py \
+        qa/L0_decoupled/python_models/square_int32/1/. && \
+    cp /workspace/tritonbuild/python/examples/decoupled/square_config.pbtxt \
+        qa/L0_decoupled/python_models/square_int32/.
+
+RUN mkdir -p qa/L0_repoagent_checksum/models/identity_int32/1 && \
+    cp tritonbuild/identity/install/backends/identity/libtriton_identity.so \
+        qa/L0_repoagent_checksum/models/identity_int32/1/.
+RUN mkdir -p qa/L0_passive_instance/models/distributed_int32_int32_int32/1 && \
+    cp tritonbuild/tritonserver/backends/distributed_addsub/libtriton_distributed_addsub.so \
+        qa/L0_passive_instance/models/distributed_int32_int32_int32/1/.
 
 ############################################################################
-## Build necessary artifacts needed for CI and initialize the qa/ directory.
+## Copy artifacts from sdk container
 ############################################################################
-FROM ${BUILD_IMAGE} AS trtserver_qa
+FROM ${SDK_IMAGE} AS sdk
 
+ARG TARGETPLATFORM
 WORKDIR /workspace
+COPY --from=cibase /workspace/qa/ qa/
 RUN mkdir -p qa/clients && mkdir -p qa/pkgs && \
-    cp src/clients/python/grpc_image_client.py qa/clients/. && \
-    cp src/clients/python/image_client.py qa/clients/. && \
-    cp src/clients/python/simple_client.py qa/clients/. && \
-    cp /opt/tensorrtserver/bin/image_client qa/clients/. && \
-    cp /opt/tensorrtserver/bin/perf_client qa/clients/. && \
-    cp /opt/tensorrtserver/bin/simple_client qa/clients/. && \
-    cp /opt/tensorrtserver/bin/caffe2plan qa/common/. && \
-    cp /opt/tensorrtserver/pip/tensorrtserver*.whl qa/pkgs/. && \
-    mkdir qa/L0_simple_example/models && \
-    cp -r docs/examples/model_repository/simple qa/L0_simple_example/models/.
+    cp -a install/bin/* qa/clients/. && \
+    cp install/lib/libgrpcclient.so qa/clients/. && \
+    cp install/lib/libhttpclient.so qa/clients/. && \
+    cp install/python/*.py qa/clients/. && \
+    cp install/python/triton*.whl qa/pkgs/. && \
+    cp install/java/examples/*.jar qa/clients/.
+RUN cp client/src/grpc_generated/go/*.go qa/L0_simple_go_client/. && \
+    cp client/src/grpc_generated/javascript/*.js qa/L0_simple_nodejs_client/. && \
+    cp client/src/grpc_generated/javascript/*.json qa/L0_simple_nodejs_client/. && \
+    cp -r client/src/grpc_generated/java qa/L0_client_java/.
 
 ############################################################################
 ## Create CI enabled image
 ############################################################################
 FROM $BASE_IMAGE
 
-ARG PYVER=3.5
+ARG TARGETPLATFORM
+
+# Ensure apt-get won't prompt for selecting options
+ENV DEBIAN_FRONTEND=noninteractive
+
+# install platform specific packages
+RUN if [ $(cat /etc/os-release | grep 'VERSION_ID="20.04"' | wc -l) -ne 0 ]; then \
+        apt-get update && \
+        apt-get install -y --no-install-recommends \
+                libpng-dev; \
+    elif [ $(cat /etc/os-release | grep 'VERSION_ID="22.04"' | wc -l) -ne 0 ]; then \
+        apt-get update && \
+        apt-get install -y --no-install-recommends \
+                libpng-dev; \
+    elif [ $(cat /etc/os-release | grep 'VERSION_ID="18.04"' | wc -l) -ne 0 ]; then \
+        apt-get update && \
+        apt-get install -y --no-install-recommends \
+                libpng-dev; \
+    else \
+        echo "Ubuntu version must be either 18.04, 20.04 or 22.04" && \
+        exit 1; \
+    fi
 
+# CI/QA for memcheck requires valgrind
+# libarchive-dev is required by Python backend
 RUN apt-get update && apt-get install -y --no-install-recommends \
-                              jmeter \
-                              jmeter-http \
-                              libcurl3 \
+                              curl \
+                              gdb \
                               libopencv-dev \
+                              libarchive-dev \
                               libopencv-core-dev \
-                              libpng12-dev \
                               libzmq3-dev \
-                              python$PYVER \
-                              python$PYVER-dev \
-                              python$PYVER-numpy \
-                              python`echo $PYVER | cut -c1-1`-pil \
-                              python-protobuf \
-                              swig && \
+                              maven \
+                              openjdk-11-jdk \
+                              nginx \
+                              npm \
+                              protobuf-compiler \
+                              python3-dev \
+                              python3-pip \
+                              python3-protobuf \
+                              python3-setuptools \
+                              swig \
+                              valgrind && \
     rm -rf /var/lib/apt/lists/*
 
-# Use the PYVER version of python
+# CI/QA expects "python" executable (not python3).
 RUN rm -f /usr/bin/python && \
-    rm -f /usr/bin/python`echo $PYVER | cut -c1-1` && \
-    ln -s /usr/bin/python$PYVER /usr/bin/python && \
-    ln -s /usr/bin/python$PYVER /usr/bin/python`echo $PYVER | cut -c1-1`
+    ln -s /usr/bin/python3 /usr/bin/python
 
-RUN curl -O https://bootstrap.pypa.io/get-pip.py && \
-    python$PYVER get-pip.py && \
-    rm get-pip.py
-RUN pip install --upgrade numpy future grpcio
+RUN pip3 install --upgrade wheel setuptools && \
+    pip3 install --upgrade numpy pillow attrdict future grpcio requests gsutil \
+                           awscli six grpcio-channelz prettytable virtualenv \
+                           check-jsonschema
 
-# CI expects tests in /opt/tensorrtserver/qa
-WORKDIR /opt/tensorrtserver
-COPY --from=trtserver_qa /workspace/qa/ qa/
+# go needed for example go client test.
+RUN if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
+      wget https://golang.org/dl/go1.19.1.linux-arm64.tar.gz && \
+      rm -rf /usr/local/go && tar -C /usr/local -xzf go1.19.1.linux-arm64.tar.gz && \
+      rm -f go1.19.1.linux-arm64.tar.gz; \
+    else \
+      wget https://golang.org/dl/go1.19.1.linux-amd64.tar.gz && \
+      rm -rf /usr/local/go && tar -C /usr/local -xzf go1.19.1.linux-amd64.tar.gz && \
+      rm -f go1.19.1.linux-amd64.tar.gz; \
+    fi
+ENV GOPATH /root/go
+ENV PATH $PATH:/usr/local/go/bin:$GOPATH/bin
+RUN GO111MODULE=off go get github.com/golang/protobuf/protoc-gen-go && \
+    GO111MODULE=off go get google.golang.org/grpc
+
+# CI expects tests in /opt/tritonserver/qa. The triton-server (1000)
+# user should own all artifacts in case CI is run using triton-server
+# user.
+WORKDIR /opt/tritonserver
+COPY --chown=1000:1000 --from=sdk /workspace/qa/ qa/
 
 # Remove CI tests that are meant to run only on build image and
-# install the tensorrtserver python client APIs.
-RUN rm -fr qa/L0_copyrights qa/L0_unit_test qa/L1_tfs_unit_test && \
-    pip install --upgrade qa/pkgs/tensorrtserver-*.whl
+# install the tritonserver/triton python client APIs.
+RUN rm -fr qa/L0_copyrights qa/L0_build_variants && \
+    find qa/pkgs/ -maxdepth 1 -type f -name \
+    "tritonclient-*linux*.whl" | xargs printf -- '%s[all]' | \
+    xargs pip3 install --upgrade
+
+# Install Triton Python API
+RUN find qa/pkgs/ -maxdepth 1 -type f -name \
+    "tritonserver-*.whl" | xargs -I {} pip3 install --upgrade {}[all]
+
+ENV LD_LIBRARY_PATH /opt/tritonserver/qa/clients:${LD_LIBRARY_PATH}
+
+# DLIS-3631: Needed to run Perf Analyzer CI tests correctly
+ENV LD_LIBRARY_PATH /opt/hpcx/ompi/lib:${LD_LIBRARY_PATH}
 
-ENV PYVER ${PYVER}
+# Required for PyTorch to pickup the correct HPCX libraries
+ENV LD_LIBRARY_PATH /opt/hpcx/ucc/lib/:/opt/hpcx/ucx/lib/:${LD_LIBRARY_PATH}
diff --git a/Dockerfile.sdk b/Dockerfile.sdk
new file mode 100644
index 0000000000..7ae8cf0ee8
--- /dev/null
+++ b/Dockerfile.sdk
@@ -0,0 +1,256 @@
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#
+# Multistage build.
+#
+
+# Base image on the minimum Triton container
+ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.03-py3-min
+
+ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo
+ARG TRITON_COMMON_REPO_TAG=main
+ARG TRITON_CORE_REPO_TAG=main
+ARG TRITON_THIRD_PARTY_REPO_TAG=main
+ARG TRITON_MODEL_ANALYZER_REPO_TAG=main
+ARG TRITON_ENABLE_GPU=ON
+ARG JAVA_BINDINGS_MAVEN_VERSION=3.8.4
+ARG JAVA_BINDINGS_JAVACPP_PRESETS_TAG=1.5.8
+
+# DCGM version to install for Model Analyzer
+ARG DCGM_VERSION=3.2.6
+
+ARG NVIDIA_TRITON_SERVER_SDK_VERSION=unknown
+ARG NVIDIA_BUILD_ID=unknown
+
+############################################################################
+##  Build image
+############################################################################
+
+FROM ${BASE_IMAGE} AS sdk_build
+
+# Ensure apt-get won't prompt for selecting options
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+            ca-certificates \
+            software-properties-common \
+            autoconf \
+            automake \
+            build-essential \
+            curl \
+            git \
+            gperf \
+            libb64-dev \
+            libgoogle-perftools-dev \
+            libopencv-dev \
+            libopencv-core-dev \
+            libssl-dev \
+            libtool \
+            pkg-config \
+            python3 \
+            python3-pip \
+            python3-dev \
+            rapidjson-dev \
+            vim \
+            wget \
+            python3-pdfkit \
+            openjdk-11-jdk \
+            maven && \
+    pip3 install --upgrade wheel setuptools && \
+    pip3 install --upgrade grpcio-tools && \
+    pip3 install --upgrade pip
+
+# Client build requires recent version of CMake (FetchContent required)
+# Using CMAKE installation instruction from:: https://apt.kitware.com/
+RUN apt update -q=2 \
+    && apt install -y gpg wget \
+    && wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - |  tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null \
+    && . /etc/os-release \
+    && echo "deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ $UBUNTU_CODENAME main" | tee /etc/apt/sources.list.d/kitware.list >/dev/null \
+    && apt-get update -q=2 \
+    && apt-get install -y --no-install-recommends cmake=3.27.7* cmake-data=3.27.7* \
+    && cmake --version
+
+# Build expects "python" executable (not python3).
+RUN rm -f /usr/bin/python && \
+    ln -s /usr/bin/python3 /usr/bin/python
+
+# Build the client library and examples
+ARG TRITON_REPO_ORGANIZATION
+ARG TRITON_CLIENT_REPO_SUBDIR
+ARG TRITON_COMMON_REPO_TAG
+ARG TRITON_CORE_REPO_TAG
+ARG TRITON_THIRD_PARTY_REPO_TAG
+ARG TRITON_ENABLE_GPU
+ARG JAVA_BINDINGS_MAVEN_VERSION
+ARG JAVA_BINDINGS_JAVACPP_PRESETS_TAG
+ARG TARGETPLATFORM
+
+WORKDIR /workspace
+COPY TRITON_VERSION .
+COPY ${TRITON_CLIENT_REPO_SUBDIR} client
+
+WORKDIR /workspace/build
+RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
+          -DTRITON_VERSION=`cat /workspace/TRITON_VERSION` \
+          -DTRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION} \
+          -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
+          -DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \
+          -DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \
+          -DTRITON_ENABLE_CC_HTTP=ON -DTRITON_ENABLE_CC_GRPC=ON \
+          -DTRITON_ENABLE_PYTHON_HTTP=ON -DTRITON_ENABLE_PYTHON_GRPC=ON \
+          -DTRITON_ENABLE_JAVA_HTTP=ON \
+          -DTRITON_ENABLE_PERF_ANALYZER=ON \
+          -DTRITON_ENABLE_PERF_ANALYZER_C_API=ON \
+          -DTRITON_ENABLE_PERF_ANALYZER_TFS=ON \
+          -DTRITON_ENABLE_PERF_ANALYZER_TS=ON \
+          -DTRITON_ENABLE_PERF_ANALYZER_OPENAI=ON \
+          -DTRITON_ENABLE_EXAMPLES=ON -DTRITON_ENABLE_TESTS=ON \
+          -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} /workspace/client
+RUN make -j16 cc-clients python-clients java-clients && \
+    rm -fr ~/.m2
+
+# Install Java API Bindings
+RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
+        source /workspace/client/src/java-api-bindings/scripts/install_dependencies_and_build.sh \
+        --maven-version ${JAVA_BINDINGS_MAVEN_VERSION} \
+        --core-tag ${TRITON_CORE_REPO_TAG} \
+        --javacpp-tag ${JAVA_BINDINGS_JAVACPP_PRESETS_TAG} \
+        --jar-install-path /workspace/install/java-api-bindings; \
+    fi
+
+RUN pip3 install build \
+    && cd /workspace/client/src/c++/perf_analyzer/genai-perf \
+    && python3 -m build --wheel --outdir /workspace/install/python
+############################################################################
+## Create sdk container
+############################################################################
+FROM ${BASE_IMAGE}
+
+# Ensure apt-get won't prompt for selecting options
+ENV DEBIAN_FRONTEND=noninteractive
+
+ARG DCGM_VERSION
+ARG TRITON_REPO_ORGANIZATION
+ARG TRITON_CORE_REPO_TAG
+ARG TARGETPLATFORM
+ARG TRITON_ENABLE_GPU
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+            software-properties-common \
+            curl \
+            git \
+            gperf \
+            libb64-dev \
+            libgoogle-perftools-dev \
+            libopencv-dev \
+            libopencv-core-dev \
+            libssl-dev \
+            libtool \
+            python3 \
+            python3-pip \
+            python3-dev \
+            vim \
+            wget \
+            python3-pdfkit \
+            maven \
+            default-jdk && \
+    pip3 install --upgrade wheel setuptools && \
+    pip3 install --upgrade grpcio-tools && \
+    pip3 install --upgrade pip
+
+WORKDIR /workspace
+COPY TRITON_VERSION .
+COPY NVIDIA_Deep_Learning_Container_License.pdf .
+COPY --from=sdk_build /workspace/client/ client/
+COPY --from=sdk_build /workspace/install/ install/
+RUN cd install && \
+    export VERSION=`cat /workspace/TRITON_VERSION` && \
+    tar zcf /workspace/v$VERSION.clients.tar.gz *
+
+# For CI testing need to copy over L0_sdk test and L0_client_build_variants test.
+RUN mkdir qa
+COPY qa/L0_sdk qa/L0_sdk
+COPY qa/L0_client_build_variants qa/L0_client_build_variants
+
+# Create a directory for all the python client tests to enable unit testing
+RUN mkdir -p qa/python_client_unit_tests/
+COPY --from=sdk_build /workspace/client/src/python/library/tests/* qa/python_client_unit_tests/
+
+# Install an image needed by the quickstart and other documentation.
+COPY qa/images/mug.jpg images/mug.jpg
+
+# Install the dependencies needed to run the client examples. These
+# are not needed for building but including them allows this image to
+# be used to run the client examples.
+RUN pip3 install --upgrade numpy pillow attrdict && \
+    find install/python/ -maxdepth 1 -type f -name \
+         "tritonclient-*linux*.whl" | xargs printf -- '%s[all]' | \
+    xargs pip3 install --upgrade
+
+RUN pip3 install install/python/genai_perf-*.whl
+
+# Install DCGM
+RUN if [ "$TRITON_ENABLE_GPU" = "ON" ]; then \
+        [ "$(uname -m)" != "x86_64" ] && arch="sbsa" || arch="x86_64" && \
+        curl -o /tmp/cuda-keyring.deb \
+        https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/$arch/cuda-keyring_1.0-1_all.deb \
+        && apt install /tmp/cuda-keyring.deb && rm /tmp/cuda-keyring.deb && \
+        apt-get update && apt-get install -y datacenter-gpu-manager=1:${DCGM_VERSION}; \
+    fi
+
+# Build expects "python" executable (not python3).
+RUN rm -f /usr/bin/python && \
+    ln -s /usr/bin/python3 /usr/bin/python
+
+# Install Model Analyzer
+ARG TRITON_MODEL_ANALYZER_REPO_TAG
+ARG TRITON_MODEL_ANALYZER_REPO="${TRITON_REPO_ORGANIZATION}/model_analyzer@${TRITON_MODEL_ANALYZER_REPO_TAG}"
+RUN pip3 install "git+${TRITON_MODEL_ANALYZER_REPO}"
+
+# Entrypoint Banner
+ENV NVIDIA_PRODUCT_NAME="Triton Server SDK"
+COPY docker/entrypoint.d/ /opt/nvidia/entrypoint.d/
+RUN sed 's/Server/Server SDK/' /opt/nvidia/entrypoint.d/10-banner.txt | \
+    sed 's/^===/=======/' > /opt/nvidia/entrypoint.d/10-banner.new && \
+    mv /opt/nvidia/entrypoint.d/10-banner.new /opt/nvidia/entrypoint.d/10-banner.txt
+
+ARG NVIDIA_TRITON_SERVER_SDK_VERSION
+ARG NVIDIA_BUILD_ID
+ENV NVIDIA_TRITON_SERVER_SDK_VERSION=${NVIDIA_TRITON_SERVER_SDK_VERSION}
+ENV NVIDIA_BUILD_ID=${NVIDIA_BUILD_ID}
+
+ENV PATH /workspace/install/bin:${PATH}
+ENV LD_LIBRARY_PATH /workspace/install/lib:${LD_LIBRARY_PATH}
+
+# DLIS-3631: Needed to run Perf Analyzer CI tests correctly
+ENV LD_LIBRARY_PATH /opt/hpcx/ompi/lib:${LD_LIBRARY_PATH}
+
+# Set TCMALLOC_RELEASE_RATE for users setting LD_PRELOAD with tcmalloc
+ENV TCMALLOC_RELEASE_RATE 200
diff --git a/Dockerfile.win10.min b/Dockerfile.win10.min
new file mode 100644
index 0000000000..107b2e8ac0
--- /dev/null
+++ b/Dockerfile.win10.min
@@ -0,0 +1,208 @@
+# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# Windows min container for Triton build
+
+ARG BASE_IMAGE=mcr.microsoft.com/windows:10.0.19042.1889
+
+FROM ${BASE_IMAGE} as dependency_base
+
+RUN powershell.exe Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope LocalMachine
+RUN powershell.exe [Net.ServicePointManager]::Expect100Continue=$true;[Net.ServicePointManager]::SecurityProtocol=[Net.SecurityProtocolType]::Tls,[Net.SecurityProtocolType]::Tls11,[Net.SecurityProtocolType]::Tls12,[Net.SecurityProtocolType]::Ssl3;Invoke-Expression( New-Object System.Net.WebClient ).DownloadString('https://chocolatey.org/install.ps1')
+RUN choco install unzip -y
+
+#
+# Installing TensorRT
+#
+ARG TENSORRT_VERSION
+ARG TENSORRT_ZIP="TensorRT-${TENSORRT_VERSION}.Windows10.x86_64.cuda-12.0.zip"
+ARG TENSORRT_SOURCE=${TENSORRT_ZIP}
+# COPY ${TENSORRT_ZIP} /tmp/${TENSORRT_ZIP}
+ADD ${TENSORRT_SOURCE} /tmp/${TENSORRT_ZIP}
+RUN unzip /tmp/%TENSORRT_ZIP%
+RUN move TensorRT-* TensorRT
+
+LABEL TENSORRT_VERSION="${TENSORRT_VERSION}"
+
+
+#
+# Installing cuDNN
+#
+ARG CUDNN_VERSION
+ARG CUDNN_ZIP=cudnn-windows-x86_64-${CUDNN_VERSION}_cuda12-archive.zip
+ARG CUDNN_SOURCE=${CUDNN_ZIP}
+ADD ${CUDNN_SOURCE} /tmp/${CUDNN_ZIP}
+RUN unzip /tmp/%CUDNN_ZIP%
+RUN move cudnn-* cudnn
+
+LABEL CUDNN_VERSION="${CUDNN_VERSION}"
+
+
+FROM ${BASE_IMAGE} as build_base
+
+SHELL ["cmd", "/S", "/C"]
+
+RUN mkdir c:\tmp
+WORKDIR /tmp
+
+RUN powershell.exe Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope LocalMachine
+RUN powershell.exe [Net.ServicePointManager]::Expect100Continue=$true;[Net.ServicePointManager]::SecurityProtocol=[Net.SecurityProtocolType]::Tls,[Net.SecurityProtocolType]::Tls11,[Net.SecurityProtocolType]::Tls12,[Net.SecurityProtocolType]::Ssl3;Invoke-Expression( New-Object System.Net.WebClient ).DownloadString('https://chocolatey.org/install.ps1')
+RUN choco install git docker unzip -y
+
+#
+# Installing python
+#
+ARG PYTHON_VERSION=3.8.10
+ARG PYTHON_SOURCE=https://www.python.org/ftp/python/${PYTHON_VERSION}/python-${PYTHON_VERSION}-amd64.exe
+ADD ${PYTHON_SOURCE} python-${PYTHON_VERSION}-amd64.exe
+RUN python-%PYTHON_VERSION%-amd64.exe /quiet InstallAllUsers=1 PrependPath=1 Include_doc=0 TargetDir="C:\python%PYTHON_VERSION%"
+RUN mklink "C:\python%PYTHON_VERSION%\python3.exe" "C:\python%PYTHON_VERSION%\python.exe"
+RUN pip install --upgrade wheel setuptools docker
+RUN pip install grpcio-tools psutil
+
+LABEL PYTHON_VERSION=${PYTHON_VERSION}
+
+#
+# Installing CMake
+#
+ARG CMAKE_VERSION=3.27.1
+ARG CMAKE_FILE=cmake-${CMAKE_VERSION}-windows-x86_64
+ARG CMAKE_SOURCE=https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/${CMAKE_FILE}.zip
+
+ADD ${CMAKE_SOURCE} ${CMAKE_FILE}.zip
+RUN unzip %CMAKE_FILE%.zip
+RUN move %CMAKE_FILE% "c:\CMake"
+RUN setx PATH "c:\CMake\bin;%PATH%"
+
+ENV CMAKE_TOOLCHAIN_FILE /vcpkg/scripts/buildsystems/vcpkg.cmake
+ENV VCPKG_TARGET_TRIPLET x64-windows
+
+LABEL CMAKE_VERSION=${CMAKE_VERSION}
+
+# Be aware that pip can interact badly with VS cmd shell so need to pip install before
+# vsdevcmd.bat (see https://bugs.python.org/issue38989)
+
+
+#
+# Installing Visual Studio BuildTools: VS17 2022
+#
+ARG BUILDTOOLS_VERSION
+# Download collect.exe in case of an install failure.
+ADD https://aka.ms/vscollect.exe "C:\tmp\collect.exe"
+
+# Use the latest release channel. For more control, specify the location of an internal layout.
+ARG CHANNEL_URL=https://aka.ms/vs/17/release/channel
+ADD ${CHANNEL_URL} "C:\tmp\VisualStudio.chman"
+# Download the Build Tools bootstrapper.
+ARG BUILD_TOOLS_SOURCE=https://aka.ms/vs/17/release/vs_buildtools.exe
+ADD ${BUILD_TOOLS_SOURCE} vs_buildtools.exe
+# Install Build Tools with the Microsoft.VisualStudio.Workload.VCTools workload, including recommended.
+ARG VS_INSTALL_PATH_WP="C:\BuildTools"
+RUN vs_buildtools.exe --quiet --wait --norestart --nocache install \
+      --installPath %VS_INSTALL_PATH_WP% \
+      --channelUri "C:\tmp\VisualStudio.chman" \
+      --installChannelUri "C:\tmp\VisualStudio.chman" \
+      --add Microsoft.VisualStudio.Workload.VCTools \
+      --includeRecommended \
+      --locale "En-us"
+
+LABEL BUILDTOOLS_VERSION=${BUILDTOOLS_VERSION}
+
+WORKDIR /
+
+#
+# Installing Vcpkg
+#
+ARG VCPGK_VERSION=2023.11.20
+RUN git clone --single-branch --depth=1 -b %VCPGK_VERSION% https://github.com/microsoft/vcpkg.git
+WORKDIR /vcpkg
+RUN bootstrap-vcpkg.bat
+RUN vcpkg.exe update
+RUN vcpkg.exe install \
+      b64:x64-windows \
+      boost-interprocess:x64-windows \
+      boost-stacktrace:x64-windows \
+      openssl-windows:x64-windows \
+      openssl:x64-windows \
+      pthread:x64-windows \
+      rapidjson:x64-windows \
+      zlib:x64-windows
+RUN vcpkg.exe integrate install
+
+LABEL VCPGK_VERSION=${VCPGK_VERSION}
+
+WORKDIR /
+
+#
+# Installing CUDA
+#
+ARG CUDA_MAJOR=12
+ARG CUDA_MINOR=3
+ARG CUDA_PATCH=2
+ARG CUDA_VERSION=${CUDA_MAJOR}.${CUDA_MINOR}.${CUDA_PATCH}
+ARG CUDA_PACKAGES="nvcc_${CUDA_MAJOR}.${CUDA_MINOR} \
+                   cudart_${CUDA_MAJOR}.${CUDA_MINOR} \
+                   nvml_dev_${CUDA_MAJOR}.${CUDA_MINOR} \
+                   cublas_${CUDA_MAJOR}.${CUDA_MINOR} cublas_dev_${CUDA_MAJOR}.${CUDA_MINOR} \
+                   cufft_${CUDA_MAJOR}.${CUDA_MINOR} cufft_dev_${CUDA_MAJOR}.${CUDA_MINOR} \
+                   curand_${CUDA_MAJOR}.${CUDA_MINOR} curand_dev_${CUDA_MAJOR}.${CUDA_MINOR} \
+                   cusolver_${CUDA_MAJOR}.${CUDA_MINOR} cusolver_dev_${CUDA_MAJOR}.${CUDA_MINOR} \
+                   cusparse_${CUDA_MAJOR}.${CUDA_MINOR} cusparse_dev_${CUDA_MAJOR}.${CUDA_MINOR} \
+                   cupti_${CUDA_MAJOR}.${CUDA_MINOR} \
+                   thrust_${CUDA_MAJOR}.${CUDA_MINOR} \
+                   visual_studio_integration_${CUDA_MAJOR}.${CUDA_MINOR}"
+ARG CUDA_INSTALL_ROOT_WP="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v${CUDA_MAJOR}.${CUDA_MINOR}"
+
+ARG CUDA_SOURCE=https://developer.download.nvidia.com/compute/cuda/${CUDA_VERSION}/network_installers/cuda_${CUDA_VERSION}_windows_network.exe
+ADD ${CUDA_SOURCE} cuda_${CUDA_VERSION}_windows_network.exe
+
+RUN cuda_%CUDA_VERSION%_windows_network.exe -s %CUDA_PACKAGES%
+# Copy the CUDA visualstudio integration from where it was installed
+# into the appropriate place in BuildTools
+RUN copy "%CUDA_INSTALL_ROOT_WP%\extras\visual_studio_integration\MSBuildExtensions\*" "%VS_INSTALL_PATH_WP%\MSBuild\Microsoft\VC\v170\BuildCustomizations"
+
+RUN setx PATH "%CUDA_INSTALL_ROOT_WP%\bin;%PATH%"
+
+ARG CUDNN_VERSION
+ENV CUDNN_VERSION ${CUDNN_VERSION}
+COPY --from=dependency_base /cudnn /cudnn
+RUN copy cudnn\bin\cudnn*.dll "%CUDA_INSTALL_ROOT_WP%\bin\."
+RUN copy cudnn\lib\x64\cudnn*.lib "%CUDA_INSTALL_ROOT_WP%\lib\x64\."
+RUN copy cudnn\include\cudnn*.h "%CUDA_INSTALL_ROOT_WP%\include\."
+LABEL CUDNN_VERSION="${CUDNN_VERSION}"
+
+ARG TENSORRT_VERSION
+ENV TRT_VERSION ${TENSORRT_VERSION}
+COPY --from=dependency_base /TensorRT /TensorRT
+RUN setx PATH "c:\TensorRT\lib;%PATH%"
+LABEL TENSORRT_VERSION="${TENSORRT_VERSION}"
+
+LABEL CUDA_VERSION="${CUDA_VERSION}"
+# It is important that the entrypoint initialize VisualStudio
+# environment otherwise the build will fail. Also set
+# CMAKE_TOOLCHAIN_FILE and VCPKG_TARGET_TRIPLET so
+# that cmake can find the packages installed by vcpkg.
+ENTRYPOINT C:\BuildTools\VC\Auxiliary\Build\vcvars64.bat &&
diff --git a/LICENSE b/LICENSE
index 8d2301c1f9..5529809efc 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,25 +1,25 @@
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+ * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+ * Neither the name of NVIDIA CORPORATION nor the names of its
+   contributors may be used to endorse or promote products derived
+   from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/NVIDIA_Deep_Learning_Container_License.pdf b/NVIDIA_Deep_Learning_Container_License.pdf
new file mode 100644
index 0000000000..bfdce390f3
Binary files /dev/null and b/NVIDIA_Deep_Learning_Container_License.pdf differ
diff --git a/README.md b/README.md
new file mode 100644
index 0000000000..4783f8f1f7
--- /dev/null
+++ b/README.md
@@ -0,0 +1,277 @@
+<!--
+# Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Triton Inference Server
+
+📣 **Triton Meetup at the NVIDIA Headquarters on April 30th 3:00 - 6:30 pm**
+
+We are excited to announce that we will be hosting our Triton user meetup at
+the NVIDIA Headquarters on April 30th 3:00 - 6:30 pm. Join us for this
+exclusive event where you will learn about the newest Triton features, get a
+glimpse into the roadmap, and connect with fellow users and the NVIDIA Triton
+engineering and product teams. Seating is limited and registration confirmation
+is required to attend - please register [here](https://lu.ma/tl06fqc1) to join
+the meetup. We can’t wait to welcome you and share what’s next for the Triton
+Inference Server.
+
+---
+
+[![License](https://img.shields.io/badge/License-BSD3-lightgrey.svg)](https://opensource.org/licenses/BSD-3-Clause)
+
+> [!WARNING]
+> ##### LATEST RELEASE
+> You are currently on the `main` branch which tracks under-development progress towards the next release.
+> The current release is version [2.44.0](https://github.com/triton-inference-server/server/releases/latest) and corresponds to the 24.03 container release on NVIDIA GPU Cloud (NGC).
+
+Triton Inference Server is an open source inference serving software that
+streamlines AI inferencing. Triton enables teams to deploy any AI model from
+multiple deep learning and machine learning frameworks, including TensorRT,
+TensorFlow, PyTorch, ONNX, OpenVINO, Python, RAPIDS FIL, and more. Triton
+Inference Server supports inference across cloud, data center, edge and embedded
+devices on NVIDIA GPUs, x86 and ARM CPU, or AWS Inferentia. Triton Inference
+Server delivers optimized performance for many query types, including real time,
+batched, ensembles and audio/video streaming. Triton inference Server is part of
+[NVIDIA AI Enterprise](https://www.nvidia.com/en-us/data-center/products/ai-enterprise/),
+a software platform that accelerates the data science pipeline and streamlines
+the development and deployment of production AI.
+
+Major features include:
+
+- [Supports multiple deep learning
+  frameworks](https://github.com/triton-inference-server/backend#where-can-i-find-all-the-backends-that-are-available-for-triton)
+- [Supports multiple machine learning
+  frameworks](https://github.com/triton-inference-server/fil_backend)
+- [Concurrent model
+  execution](docs/user_guide/architecture.md#concurrent-model-execution)
+- [Dynamic batching](docs/user_guide/model_configuration.md#dynamic-batcher)
+- [Sequence batching](docs/user_guide/model_configuration.md#sequence-batcher) and
+  [implicit state management](docs/user_guide/architecture.md#implicit-state-management)
+  for stateful models
+- Provides [Backend API](https://github.com/triton-inference-server/backend) that
+  allows adding custom backends and pre/post processing operations
+- Supports writing custom backends in python, a.k.a.
+  [Python-based backends.](https://github.com/triton-inference-server/backend/blob/main/docs/python_based_backends.md#python-based-backends)
+- Model pipelines using
+  [Ensembling](docs/user_guide/architecture.md#ensemble-models) or [Business
+  Logic Scripting
+  (BLS)](https://github.com/triton-inference-server/python_backend#business-logic-scripting)
+- [HTTP/REST and GRPC inference
+  protocols](docs/customization_guide/inference_protocols.md) based on the community
+  developed [KServe
+  protocol](https://github.com/kserve/kserve/tree/master/docs/predict-api/v2)
+- A [C API](docs/customization_guide/inference_protocols.md#in-process-triton-server-api) and
+  [Java API](docs/customization_guide/inference_protocols.md#java-bindings-for-in-process-triton-server-api)
+  allow Triton to link directly into your application for edge and other in-process use cases
+- [Metrics](docs/user_guide/metrics.md) indicating GPU utilization, server
+  throughput, server latency, and more
+
+**New to Triton Inference Server?** Make use of
+[these tutorials](https://github.com/triton-inference-server/tutorials)
+to begin your Triton journey!
+
+Join the [Triton and TensorRT community](https://www.nvidia.com/en-us/deep-learning-ai/triton-tensorrt-newsletter/) and
+stay current on the latest product updates, bug fixes, content, best practices,
+and more.  Need enterprise support?  NVIDIA global support is available for Triton
+Inference Server with the
+[NVIDIA AI Enterprise software suite](https://www.nvidia.com/en-us/data-center/products/ai-enterprise/).
+
+## Serve a Model in 3 Easy Steps
+
+```bash
+# Step 1: Create the example model repository
+git clone -b r24.03 https://github.com/triton-inference-server/server.git
+cd server/docs/examples
+./fetch_models.sh
+
+# Step 2: Launch triton from the NGC Triton container
+docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:24.03-py3 tritonserver --model-repository=/models
+
+# Step 3: Sending an Inference Request
+# In a separate console, launch the image_client example from the NGC Triton SDK container
+docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:24.03-py3-sdk
+/workspace/install/bin/image_client -m densenet_onnx -c 3 -s INCEPTION /workspace/images/mug.jpg
+
+# Inference should return the following
+Image '/workspace/images/mug.jpg':
+    15.346230 (504) = COFFEE MUG
+    13.224326 (968) = CUP
+    10.422965 (505) = COFFEEPOT
+```
+Please read the [QuickStart](docs/getting_started/quickstart.md) guide for additional information
+regarding this example. The quickstart guide also contains an example of how to launch Triton on [CPU-only systems](docs/getting_started/quickstart.md#run-on-cpu-only-system). New to Triton and wondering where to get started? Watch the [Getting Started video](https://youtu.be/NQDtfSi5QF4).
+
+## Examples and Tutorials
+
+Check out [NVIDIA LaunchPad](https://www.nvidia.com/en-us/data-center/products/ai-enterprise-suite/trial/)
+for free access to a set of hands-on labs with Triton Inference Server hosted on
+NVIDIA infrastructure.
+
+Specific end-to-end examples for popular models, such as ResNet, BERT, and DLRM
+are located in the
+[NVIDIA Deep Learning Examples](https://github.com/NVIDIA/DeepLearningExamples)
+page on GitHub. The
+[NVIDIA Developer Zone](https://developer.nvidia.com/nvidia-triton-inference-server)
+contains additional documentation, presentations, and examples.
+
+## Documentation
+
+### Build and Deploy
+
+The recommended way to build and use Triton Inference Server is with Docker
+images.
+
+- [Install Triton Inference Server with Docker containers](docs/customization_guide/build.md#building-with-docker) (*Recommended*)
+- [Install Triton Inference Server without Docker containers](docs/customization_guide/build.md#building-without-docker)
+- [Build a custom Triton Inference Server Docker container](docs/customization_guide/compose.md)
+- [Build Triton Inference Server from source](docs/customization_guide/build.md#building-on-unsupported-platforms)
+- [Build Triton Inference Server for Windows 10](docs/customization_guide/build.md#building-for-windows-10)
+- Examples for deploying Triton Inference Server with Kubernetes and Helm on [GCP](deploy/gcp/README.md),
+  [AWS](deploy/aws/README.md), and [NVIDIA FleetCommand](deploy/fleetcommand/README.md)
+- [Secure Deployment Considerations](docs/customization_guide/deploy.md)
+
+### Using Triton
+
+#### Preparing Models for Triton Inference Server
+
+The first step in using Triton to serve your models is to place one or
+more models into a [model repository](docs/user_guide/model_repository.md). Depending on
+the type of the model and on what Triton capabilities you want to enable for
+the model, you may need to create a [model
+configuration](docs/user_guide/model_configuration.md) for the model.
+
+- [Add custom operations to Triton if needed by your model](docs/user_guide/custom_operations.md)
+- Enable model pipelining with [Model Ensemble](docs/user_guide/architecture.md#ensemble-models)
+  and [Business Logic Scripting (BLS)](https://github.com/triton-inference-server/python_backend#business-logic-scripting)
+- Optimize your models setting [scheduling and batching](docs/user_guide/architecture.md#models-and-schedulers)
+  parameters and [model instances](docs/user_guide/model_configuration.md#instance-groups).
+- Use the [Model Analyzer tool](https://github.com/triton-inference-server/model_analyzer)
+  to help optimize your model configuration with profiling
+- Learn how to [explicitly manage what models are available by loading and
+  unloading models](docs/user_guide/model_management.md)
+
+#### Configure and Use Triton Inference Server
+
+- Read the [Quick Start Guide](docs/getting_started/quickstart.md) to run Triton Inference
+  Server on both GPU and CPU
+- Triton supports multiple execution engines, called
+  [backends](https://github.com/triton-inference-server/backend#where-can-i-find-all-the-backends-that-are-available-for-triton), including
+  [TensorRT](https://github.com/triton-inference-server/tensorrt_backend),
+  [TensorFlow](https://github.com/triton-inference-server/tensorflow_backend),
+  [PyTorch](https://github.com/triton-inference-server/pytorch_backend),
+  [ONNX](https://github.com/triton-inference-server/onnxruntime_backend),
+  [OpenVINO](https://github.com/triton-inference-server/openvino_backend),
+  [Python](https://github.com/triton-inference-server/python_backend), and more
+- Not all the above backends are supported on every platform supported by Triton.
+  Look at the
+  [Backend-Platform Support Matrix](https://github.com/triton-inference-server/backend/blob/main/docs/backend_platform_support_matrix.md)
+  to learn which backends are supported on your target platform.
+- Learn how to [optimize performance](docs/user_guide/optimization.md) using the
+  [Performance Analyzer](https://github.com/triton-inference-server/client/blob/main/src/c++/perf_analyzer/README.md)
+  and
+  [Model Analyzer](https://github.com/triton-inference-server/model_analyzer)
+- Learn how to [manage loading and unloading models](docs/user_guide/model_management.md) in
+  Triton
+- Send requests directly to Triton with the [HTTP/REST JSON-based
+  or gRPC protocols](docs/customization_guide/inference_protocols.md#httprest-and-grpc-protocols)
+
+#### Client Support and Examples
+
+A Triton *client* application sends inference and other requests to Triton. The
+[Python and C++ client libraries](https://github.com/triton-inference-server/client)
+provide APIs to simplify this communication.
+
+- Review client examples for [C++](https://github.com/triton-inference-server/client/blob/main/src/c%2B%2B/examples),
+  [Python](https://github.com/triton-inference-server/client/blob/main/src/python/examples),
+  and [Java](https://github.com/triton-inference-server/client/blob/main/src/java/src/main/java/triton/client/examples)
+- Configure [HTTP](https://github.com/triton-inference-server/client#http-options)
+  and [gRPC](https://github.com/triton-inference-server/client#grpc-options)
+  client options
+- Send input data (e.g. a jpeg image) directly to Triton in the [body of an HTTP
+  request without any additional metadata](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_binary_data.md#raw-binary-request)
+
+### Extend Triton
+
+[Triton Inference Server's architecture](docs/user_guide/architecture.md) is specifically
+designed for modularity and flexibility
+
+- [Customize Triton Inference Server container](docs/customization_guide/compose.md) for your use case
+- [Create custom backends](https://github.com/triton-inference-server/backend)
+  in either [C/C++](https://github.com/triton-inference-server/backend/blob/main/README.md#triton-backend-api)
+  or [Python](https://github.com/triton-inference-server/python_backend)
+- Create [decoupled backends and models](docs/user_guide/decoupled_models.md) that can send
+  multiple responses for a request or not send any responses for a request
+- Use a [Triton repository agent](docs/customization_guide/repository_agents.md) to add functionality
+  that operates when a model is loaded and unloaded, such as authentication,
+  decryption, or conversion
+- Deploy Triton on [Jetson and JetPack](docs/user_guide/jetson.md)
+- [Use Triton on AWS
+   Inferentia](https://github.com/triton-inference-server/python_backend/tree/main/inferentia)
+
+### Additional Documentation
+
+- [FAQ](docs/user_guide/faq.md)
+- [User Guide](docs/README.md#user-guide)
+- [Customization Guide](docs/README.md#customization-guide)
+- [Release Notes](https://docs.nvidia.com/deeplearning/triton-inference-server/release-notes/index.html)
+- [GPU, Driver, and CUDA Support
+Matrix](https://docs.nvidia.com/deeplearning/dgx/support-matrix/index.html)
+
+## Contributing
+
+Contributions to Triton Inference Server are more than welcome. To
+contribute please review the [contribution
+guidelines](CONTRIBUTING.md). If you have a backend, client,
+example or similar contribution that is not modifying the core of
+Triton, then you should file a PR in the [contrib
+repo](https://github.com/triton-inference-server/contrib).
+
+## Reporting problems, asking questions
+
+We appreciate any feedback, questions or bug reporting regarding this project.
+When posting [issues in GitHub](https://github.com/triton-inference-server/server/issues),
+follow the process outlined in the [Stack Overflow document](https://stackoverflow.com/help/mcve).
+Ensure posted examples are:
+- minimal – use as little code as possible that still produces the
+  same problem
+- complete – provide all parts needed to reproduce the problem. Check
+  if you can strip external dependencies and still show the problem. The
+  less time we spend on reproducing problems the more time we have to
+  fix it
+- verifiable – test the code you're about to provide to make sure it
+  reproduces the problem. Remove all other problems that are not
+  related to your request/question.
+
+For issues, please use the provided bug report and feature request templates.
+
+For questions, we recommend posting in our community
+[GitHub Discussions.](https://github.com/triton-inference-server/server/discussions)
+
+## For more information
+
+Please refer to the [NVIDIA Developer Triton page](https://developer.nvidia.com/nvidia-triton-inference-server)
+for more information.
diff --git a/README.rst b/README.rst
deleted file mode 100644
index b8a516266d..0000000000
--- a/README.rst
+++ /dev/null
@@ -1,113 +0,0 @@
-..
-  # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-  #
-  # Redistribution and use in source and binary forms, with or without
-  # modification, are permitted provided that the following conditions
-  # are met:
-  #  * Redistributions of source code must retain the above copyright
-  #    notice, this list of conditions and the following disclaimer.
-  #  * Redistributions in binary form must reproduce the above copyright
-  #    notice, this list of conditions and the following disclaimer in the
-  #    documentation and/or other materials provided with the distribution.
-  #  * Neither the name of NVIDIA CORPORATION nor the names of its
-  #    contributors may be used to endorse or promote products derived
-  #    from this software without specific prior written permission.
-  #
-  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-  # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-  # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-  # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-  # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-  # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-  # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-  # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-  # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-|License|
-
-NVIDIA TensorRT Inference Server
-================================
-
-
-    **NOTE: You are currently on the master branch which tracks
-    under-development progress towards the next release. The latest
-    release of the TensorRT Inference Server is 0.8.0 beta and is
-    available on branch** `r18.11
-    <https://github.com/NVIDIA/tensorrt-inference-server/tree/r18.11>`_.
-
-.. overview-begin-marker-do-not-remove
-
-The NVIDIA TensorRT Inference Server (TRTIS) provides a cloud
-inferencing solution optimized for NVIDIA GPUs. The server provides an
-inference service via an HTTP or gRPC endpoint, allowing remote
-clients to request inferencing for any model being managed by the
-server. TRTIS provides the following features:
-
-* `Multiple framework support <https://docs.nvidia.com/deeplearning/sdk/tensorrt-inference-server-master-branch-guide/docs/model_repository.html#model-definition>`_. The server can manage any number and mix of
-  models (limited by system disk and memory resources). Supports
-  TensorRT, TensorFlow GraphDef, TensorFlow SavedModel and Caffe2
-  NetDef model formats. Also supports TensorFlow-TensorRT integrated
-  models.
-* Multi-GPU support. The server can distribute inferencing across all
-  system GPUs.
-* `Concurrent model execution support <https://docs.nvidia.com/deeplearning/sdk/tensorrt-inference-server-master-branch-guide/docs/model_configuration.html?highlight=batching#instance-groups>`_. Multiple models (or multiple instances of the
-  same model) can run simultaneously on the same GPU.
-* Batching support. For models that support batching, the server can
-  accept requests for a batch of inputs and respond with the
-  corresponding batch of outputs. The server also supports `dynamic
-  batching <https://docs.nvidia.com/deeplearning/sdk/tensorrt-inference-server-master-branch-guide/docs/model_configuration.html?highlight=batching#dynamic-batching>`_ where individual inference requests are dynamically
-  combined together to improve inference throughput. Dynamic batching
-  is transparent to the client requesting inference.
-* `Model repositories <https://docs.nvidia.com/deeplearning/sdk/tensorrt-inference-server-master-branch-guide/docs/model_repository.html#>`_ may reside on a locally accessible file system (e.g. NFS) or
-  in Google Cloud Storage.
-* Readiness and liveness `health endpoints <https://docs.nvidia.com/deeplearning/sdk/tensorrt-inference-server-master-branch-guide/docs/http_grpc_api.html#health>`_ suitable for any orchestration or deployment framework, such as Kubernetes.
-* `Metrics <https://docs.nvidia.com/deeplearning/sdk/tensorrt-inference-server-master-branch-guide/docs/metrics.html>`_ indicating GPU utiliization, server throughput, and server
-  latency.
-
-.. overview-end-marker-do-not-remove
-
-The current release of the TensorRT Inference Server is 0.8.0 beta and
-corresponds to the 18.11 release of the tensorrtserver container on
-`NVIDIA GPU Cloud (NGC) <https://ngc.nvidia.com>`_. The branch for
-this release is `r18.11
-<https://github.com/NVIDIA/tensorrt-inference-server/tree/r18.11>`_. The
-User Guide, Developer Guide, and API Reference `documentation
-<https://docs.nvidia.com/deeplearning/sdk/tensorrt-inference-server-guide/docs/index.html>`_
-provide guidance on installing, building and running TRTIS.
-
-You can also view the documentation for the `master branch
-<https://docs.nvidia.com/deeplearning/sdk/tensorrt-inference-server-master-branch-guide/docs/index.html>`_
-and for `earlier releases
-<https://docs.nvidia.com/deeplearning/sdk/inference-server-archived/index.html>`_.
-
-Contributing
-------------
-
-Contributions to TensorRT Inference Server are more than welcome. To
-contribute make a pull request and follow the guidelines outlined in
-the `Contributing <CONTRIBUTING.md>`_ document.
-
-Reporting problems, asking questions
-------------------------------------
-
-We appreciate any feedback, questions or bug reporting regarding this
-project. When help with code is needed, follow the process outlined in
-the Stack Overflow (https://stackoverflow.com/help/mcve)
-document. Ensure posted examples are:
-
-* minimal – use as little code as possible that still produces the
-  same problem
-
-* complete – provide all parts needed to reproduce the problem. Check
-  if you can strip external dependency and still show the problem. The
-  less time we spend on reproducing problems the more time we have to
-  fix it
-
-* verifiable – test the code you're about to provide to make sure it
-  reproduces the problem. Remove all other problems that are not
-  related to your request/question.
-
-.. |License| image:: https://img.shields.io/badge/License-BSD3-lightgrey.svg
-   :target: https://opensource.org/licenses/BSD-3-Clause
diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644
index 0000000000..7aa39f4e5d
--- /dev/null
+++ b/SECURITY.md
@@ -0,0 +1,44 @@
+<!--
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Report a Security Vulnerability
+
+To report a potential security vulnerability in any NVIDIA product, please use either:
+* This web form: [Security Vulnerability Submission Form](https://www.nvidia.com/object/submit-security-vulnerability.html), or
+* Send email to: [NVIDIA PSIRT](mailto:psirt@nvidia.com)
+
+**OEM Partners should contact their NVIDIA Customer Program Manager**
+
+If reporting a potential vulnerability via email, please encrypt it using NVIDIA’s public PGP key ([see PGP Key page](https://www.nvidia.com/en-us/security/pgp-key/)) and include the following information:
+1. Product/Driver name and version/branch that contains the vulnerability
+2. Type of vulnerability (code execution, denial of service, buffer overflow, etc.)
+3. Instructions to reproduce the vulnerability
+4. Proof-of-concept or exploit code
+5. Potential impact of the vulnerability, including how an attacker could exploit the vulnerability
+
+See https://www.nvidia.com/en-us/security/ for past NVIDIA Security Bulletins and Notices.
diff --git a/TRITON_VERSION b/TRITON_VERSION
new file mode 100644
index 0000000000..4cc09ac9dd
--- /dev/null
+++ b/TRITON_VERSION
@@ -0,0 +1 @@
+2.45.0dev
diff --git a/Triton-CCLA-v1.pdf b/Triton-CCLA-v1.pdf
new file mode 100644
index 0000000000..d08afc8183
Binary files /dev/null and b/Triton-CCLA-v1.pdf differ
diff --git a/VERSION b/VERSION
deleted file mode 100644
index 7382a313f5..0000000000
--- a/VERSION
+++ /dev/null
@@ -1 +0,0 @@
-0.10.0dev
diff --git a/WORKSPACE b/WORKSPACE
deleted file mode 100644
index afb9d3217c..0000000000
--- a/WORKSPACE
+++ /dev/null
@@ -1,118 +0,0 @@
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-workspace(name = "inference_server")
-
-local_repository(
-  name = "org_tensorflow",
-  path = "/opt/tensorflow/",
-)
-
-local_repository(
-  name = "tf_serving",
-  path = __workspace_dir__ + "/serving/",
-)
-
-new_local_repository(
-    name = "extern_lib",
-    path = "/opt/tensorrtserver/lib",
-    build_file_content = """
-cc_library(
-    name = "libcaffe2",
-    srcs = ["libcaffe2.so"],
-    visibility = ["//visibility:public"],
-)
-cc_library(
-    name = "libcaffe2_gpu",
-    srcs = ["libcaffe2_gpu.so"],
-    visibility = ["//visibility:public"],
-)
-cc_library(
-    name = "libcaffe2_detectron_ops_gpu",
-    srcs = ["libcaffe2_detectron_ops_gpu.so"],
-    visibility = ["//visibility:public"],
-)
-cc_library(
-    name = "libc10",
-    srcs = ["libc10.so"],
-    visibility = ["//visibility:public"],
-)
-cc_library(
-    name = "libmkl_core",
-    srcs = ["libmkl_core.so"],
-    visibility = ["//visibility:public"],
-)
-cc_library(
-    name = "libmkl_gnu_thread",
-    srcs = ["libmkl_gnu_thread.so"],
-    visibility = ["//visibility:public"],
-)
-cc_library(
-    name = "libmkl_avx2",
-    srcs = ["libmkl_avx2.so"],
-    visibility = ["//visibility:public"],
-)
-cc_library(
-    name = "libmkl_def",
-    srcs = ["libmkl_def.so"],
-    visibility = ["//visibility:public"],
-)
-cc_library(
-    name = "libmkl_intel_lp64",
-    srcs = ["libmkl_intel_lp64.so"],
-    visibility = ["//visibility:public"],
-)
-""",
-)
-
-# Need prometheus for metrics
-http_archive(
-    name = "prometheus",
-    strip_prefix = "prometheus-cpp-0.5.0",
-    urls = ["https://github.com/jupp0r/prometheus-cpp/archive/v0.5.0.tar.gz"],
-)
-load("@prometheus//:repositories.bzl", "load_civetweb")
-load_civetweb()
-
-# TensorFlow depends on "io_bazel_rules_closure" so we need this here.
-# Needs to be kept in sync with the same target in TensorFlow's WORKSPACE file.
-http_archive(
-    name = "io_bazel_rules_closure",
-    sha256 = "a38539c5b5c358548e75b44141b4ab637bba7c4dc02b46b1f62a96d6433f56ae",
-    strip_prefix = "rules_closure-dbb96841cc0a5fb2664c37822803b06dab20c7d1",
-    urls = [
-        "https://mirror.bazel.build/github.com/bazelbuild/rules_closure/archive/dbb96841cc0a5fb2664c37822803b06dab20c7d1.tar.gz",
-        "https://github.com/bazelbuild/rules_closure/archive/dbb96841cc0a5fb2664c37822803b06dab20c7d1.tar.gz",  # 2018-04-13
-    ],
-)
-
-load('@tf_serving//tensorflow_serving:workspace.bzl', 'tf_serving_workspace')
-tf_serving_workspace()
-
-# Specify the minimum required bazel version.
-load("@org_tensorflow//tensorflow:version_check.bzl", "check_bazel_version_at_least")
-
-check_bazel_version_at_least("0.15.0")
diff --git a/build.py b/build.py
new file mode 100755
index 0000000000..fde2b4ed2b
--- /dev/null
+++ b/build.py
@@ -0,0 +1,2747 @@
+#!/usr/bin/env python3
+# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import importlib.util
+import multiprocessing
+import os
+import os.path
+import pathlib
+import platform
+import stat
+import subprocess
+import sys
+from inspect import getsourcefile
+
+import requests
+
+#
+# Build Triton Inference Server.
+#
+
+# By default build.py builds the Triton Docker image, but can also be
+# used to build without Docker.  See docs/build.md and --help for more
+# information.
+#
+# The TRITON_VERSION file indicates the Triton version and
+# TRITON_VERSION_MAP is used to determine the corresponding container
+# version and upstream container version (upstream containers are
+# dependencies required by Triton). These versions may be overridden.
+
+# Map from Triton version to corresponding container and component versions.
+#
+#   triton version ->
+#     (triton container version,
+#      upstream container version,
+#      ORT version,
+#      ORT OpenVINO version (use None to disable OpenVINO in ORT),
+#      Standalone OpenVINO version,
+#      DCGM version
+#     )
+#
+# Currently the OpenVINO versions used in ORT and standalone must
+# match because of the way dlopen works with loading the backends. If
+# different versions are used then one backend or the other will
+# incorrectly load the other version of the openvino libraries.
+#
+TRITON_VERSION_MAP = {
+    "2.45.0dev": (
+        "24.04dev",  # triton container
+        "24.03",  # upstream container
+        "1.17.2",  # ORT
+        "2023.3.0",  # ORT OpenVINO
+        "2023.3.0",  # Standalone OpenVINO
+        "3.2.6",  # DCGM version
+        "0.4.0.post1",  # vLLM version
+    )
+}
+
+CORE_BACKENDS = ["ensemble"]
+
+FLAGS = None
+EXTRA_CORE_CMAKE_FLAGS = {}
+OVERRIDE_CORE_CMAKE_FLAGS = {}
+EXTRA_BACKEND_CMAKE_FLAGS = {}
+OVERRIDE_BACKEND_CMAKE_FLAGS = {}
+
+THIS_SCRIPT_DIR = os.path.dirname(os.path.abspath(getsourcefile(lambda: 0)))
+
+
+def log(msg, force=False):
+    if force or not FLAGS.quiet:
+        try:
+            print(msg, file=sys.stderr)
+        except Exception:
+            print("<failed to log>", file=sys.stderr)
+
+
+def log_verbose(msg):
+    if FLAGS.verbose:
+        log(msg, force=True)
+
+
+def fail(msg):
+    fail_if(True, msg)
+
+
+def fail_if(p, msg):
+    if p:
+        print("error: {}".format(msg), file=sys.stderr)
+        sys.exit(1)
+
+
+def target_platform():
+    if FLAGS.target_platform is not None:
+        return FLAGS.target_platform
+    return platform.system().lower()
+
+
+def target_machine():
+    if FLAGS.target_machine is not None:
+        return FLAGS.target_machine
+    return platform.machine().lower()
+
+
+def container_versions(version, container_version, upstream_container_version):
+    if container_version is None:
+        if version not in TRITON_VERSION_MAP:
+            fail("container version not known for {}".format(version))
+        container_version = TRITON_VERSION_MAP[version][0]
+    if upstream_container_version is None:
+        if version not in TRITON_VERSION_MAP:
+            fail("upstream container version not known for {}".format(version))
+        upstream_container_version = TRITON_VERSION_MAP[version][1]
+    return container_version, upstream_container_version
+
+
+class BuildScript:
+    """Utility class for writing build scripts"""
+
+    def __init__(self, filepath, desc=None, verbose=False):
+        self._filepath = filepath
+        self._file = open(self._filepath, "w")
+        self._verbose = verbose
+        self.header(desc)
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, type, value, traceback):
+        self.close()
+
+    def __del__(self):
+        self.close()
+
+    def close(self):
+        if self._file is not None:
+            if target_platform() == "windows":
+                self.blankln()
+                self._file.write("}\n")
+                self._file.write("catch {\n")
+                self._file.write("    $_;\n")
+                self._file.write("    ExitWithCode 1;\n")
+                self._file.write("}\n")
+            """Close the file"""
+            self._file.close()
+            self._file = None
+            st = os.stat(self._filepath)
+            os.chmod(self._filepath, st.st_mode | stat.S_IEXEC)
+
+    def blankln(self):
+        self._file.write("\n")
+
+    def commentln(self, cnt):
+        self._file.write("#" * cnt + "\n")
+
+    def comment(self, msg=""):
+        if not isinstance(msg, str):
+            try:
+                for m in msg:
+                    self._file.write(f"# {msg}\n")
+                return
+            except TypeError:
+                pass
+        self._file.write(f"# {msg}\n")
+
+    def comment_verbose(self, msg=""):
+        if self._verbose:
+            self.comment(msg)
+
+    def header(self, desc=None):
+        if target_platform() != "windows":
+            self._file.write("#!/usr/bin/env bash\n\n")
+
+        if desc is not None:
+            self.comment()
+            self.comment(desc)
+            self.comment()
+            self.blankln()
+
+        self.comment("Exit script immediately if any command fails")
+        if target_platform() == "windows":
+            self._file.write("function ExitWithCode($exitcode) {\n")
+            self._file.write("    $host.SetShouldExit($exitcode)\n")
+            self._file.write("    exit $exitcode\n")
+            self._file.write("}\n")
+            self.blankln()
+            if self._verbose:
+                self._file.write("Set-PSDebug -Trace 1\n")
+            self.blankln()
+            self._file.write("try {\n")
+        else:
+            self._file.write("set -e\n")
+            if self._verbose:
+                self._file.write("set -x\n")
+        self.blankln()
+
+    def envvar_ref(self, v):
+        if target_platform() == "windows":
+            return f"${{env:{v}}}"
+        return f"${{{v}}}"
+
+    def cmd(self, clist, check_exitcode=False):
+        if isinstance(clist, str):
+            self._file.write(f"{clist}\n")
+        else:
+            for c in clist:
+                self._file.write(f"{c} ")
+            self.blankln()
+
+        if check_exitcode:
+            if target_platform() == "windows":
+                self._file.write("if ($LASTEXITCODE -ne 0) {\n")
+                self._file.write(
+                    '  Write-Output "exited with status code $LASTEXITCODE";\n'
+                )
+                self._file.write("  ExitWithCode 1;\n")
+                self._file.write("}\n")
+
+    def cwd(self, path):
+        if target_platform() == "windows":
+            self.cmd(f"Set-Location -EV Err -EA Stop {path}")
+        else:
+            self.cmd(f"cd {path}")
+
+    def cp(self, src, dest):
+        if target_platform() == "windows":
+            self.cmd(f"Copy-Item -EV Err -EA Stop {src} -Destination {dest}")
+        else:
+            self.cmd(f"cp {src} {dest}")
+
+    def mkdir(self, path):
+        if target_platform() == "windows":
+            self.cmd(
+                f"New-Item -EV Err -EA Stop -ItemType Directory -Force -Path {path}"
+            )
+        else:
+            self.cmd(f"mkdir -p {pathlib.Path(path)}")
+
+    def rmdir(self, path):
+        if target_platform() == "windows":
+            self.cmd(f"if (Test-Path -Path {path}) {{")
+            self.cmd(f"  Remove-Item -EV Err -EA Stop -Recurse -Force {path}")
+            self.cmd("}")
+        else:
+            self.cmd(f"rm -fr {pathlib.Path(path)}")
+
+    def cpdir(self, src, dest):
+        if target_platform() == "windows":
+            self.cmd(f"Copy-Item -EV Err -EA Stop -Recurse {src} -Destination {dest}")
+        else:
+            self.cmd(f"cp -r {src} {dest}")
+
+    def tar(self, subdir, tar_filename):
+        if target_platform() == "windows":
+            fail("unsupported operation: tar")
+        else:
+            self.cmd(f"tar zcf {tar_filename} {subdir}")
+
+    def cmake(self, args):
+        # Pass some additional envvars into cmake...
+        env_args = []
+        for k in ("TRT_VERSION", "CMAKE_TOOLCHAIN_FILE", "VCPKG_TARGET_TRIPLET"):
+            env_args += [f'"-D{k}={self.envvar_ref(k)}"']
+        self.cmd(f'cmake {" ".join(env_args)} {" ".join(args)}', check_exitcode=True)
+
+    def makeinstall(self, target="install"):
+        verbose_flag = "-v" if self._verbose else ""
+        self.cmd(
+            f"cmake --build . --config {FLAGS.build_type} -j{FLAGS.build_parallel} {verbose_flag} -t {target}"
+        )
+
+    def gitclone(self, repo, tag, subdir, org):
+        clone_dir = subdir
+        if not FLAGS.no_force_clone:
+            self.rmdir(clone_dir)
+
+        if target_platform() == "windows":
+            self.cmd(f"if (-Not (Test-Path -Path {clone_dir})) {{")
+        else:
+            self.cmd(f"if [[ ! -e {clone_dir} ]]; then")
+
+        # FIXME [DLIS-4045 - Currently the tag starting with "pull/" is not
+        # working with "--repo-tag" as the option is not forwarded to the
+        # individual repo build correctly.]
+        # If 'tag' starts with "pull/" then it must be of form
+        # "pull/<pr>/head". We just clone at "main" and then fetch the
+        # reference onto a new branch we name "tritonbuildref".
+        if tag.startswith("pull/"):
+            self.cmd(
+                f"  git clone --recursive --depth=1 {org}/{repo}.git {subdir};",
+                check_exitcode=True,
+            )
+            self.cmd("}" if target_platform() == "windows" else "fi")
+            self.cwd(subdir)
+            self.cmd(f"git fetch origin {tag}:tritonbuildref", check_exitcode=True)
+            self.cmd(f"git checkout tritonbuildref", check_exitcode=True)
+        else:
+            self.cmd(
+                f"  git clone --recursive --single-branch --depth=1 -b {tag} {org}/{repo}.git {subdir};",
+                check_exitcode=True,
+            )
+            self.cmd("}" if target_platform() == "windows" else "fi")
+
+
+def cmake_core_arg(name, type, value):
+    # Return cmake -D setting to set name=value for core build. Use
+    # command-line specified value if one is given.
+    if name in OVERRIDE_CORE_CMAKE_FLAGS:
+        value = OVERRIDE_CORE_CMAKE_FLAGS[name]
+    if type is None:
+        type = ""
+    else:
+        type = ":{}".format(type)
+    return '"-D{}{}={}"'.format(name, type, value)
+
+
+def cmake_core_enable(name, flag):
+    # Return cmake -D setting to set name=flag?ON:OFF for core
+    # build. Use command-line specified value for 'flag' if one is
+    # given.
+    if name in OVERRIDE_CORE_CMAKE_FLAGS:
+        value = OVERRIDE_CORE_CMAKE_FLAGS[name]
+    else:
+        value = "ON" if flag else "OFF"
+    return '"-D{}:BOOL={}"'.format(name, value)
+
+
+def cmake_core_extra_args():
+    args = []
+    for k, v in EXTRA_CORE_CMAKE_FLAGS.items():
+        args.append('"-D{}={}"'.format(k, v))
+    return args
+
+
+def cmake_backend_arg(backend, name, type, value):
+    # Return cmake -D setting to set name=value for backend build. Use
+    # command-line specified value if one is given.
+    if backend in OVERRIDE_BACKEND_CMAKE_FLAGS:
+        if name in OVERRIDE_BACKEND_CMAKE_FLAGS[backend]:
+            value = OVERRIDE_BACKEND_CMAKE_FLAGS[backend][name]
+    if type is None:
+        type = ""
+    else:
+        type = ":{}".format(type)
+    return '"-D{}{}={}"'.format(name, type, value)
+
+
+def cmake_backend_enable(backend, name, flag):
+    # Return cmake -D setting to set name=flag?ON:OFF for backend
+    # build. Use command-line specified value for 'flag' if one is
+    # given.
+    value = None
+    if backend in OVERRIDE_BACKEND_CMAKE_FLAGS:
+        if name in OVERRIDE_BACKEND_CMAKE_FLAGS[backend]:
+            value = OVERRIDE_BACKEND_CMAKE_FLAGS[backend][name]
+    if value is None:
+        value = "ON" if flag else "OFF"
+    return '"-D{}:BOOL={}"'.format(name, value)
+
+
+def cmake_backend_extra_args(backend):
+    args = []
+    if backend in EXTRA_BACKEND_CMAKE_FLAGS:
+        for k, v in EXTRA_BACKEND_CMAKE_FLAGS[backend].items():
+            args.append('"-D{}={}"'.format(k, v))
+    return args
+
+
+def cmake_repoagent_arg(name, type, value):
+    # For now there is no override for repo-agents
+    if type is None:
+        type = ""
+    else:
+        type = ":{}".format(type)
+    return '"-D{}{}={}"'.format(name, type, value)
+
+
+def cmake_repoagent_enable(name, flag):
+    # For now there is no override for repo-agents
+    value = "ON" if flag else "OFF"
+    return '"-D{}:BOOL={}"'.format(name, value)
+
+
+def cmake_repoagent_extra_args():
+    # For now there is no extra args for repo-agents
+    args = []
+    return args
+
+
+def cmake_cache_arg(name, type, value):
+    # For now there is no override for caches
+    if type is None:
+        type = ""
+    else:
+        type = ":{}".format(type)
+    return '"-D{}{}={}"'.format(name, type, value)
+
+
+def cmake_cache_enable(name, flag):
+    # For now there is no override for caches
+    value = "ON" if flag else "OFF"
+    return '"-D{}:BOOL={}"'.format(name, value)
+
+
+def cmake_cache_extra_args():
+    # For now there is no extra args for caches
+    args = []
+    return args
+
+
+def core_cmake_args(components, backends, cmake_dir, install_dir):
+    cargs = [
+        cmake_core_arg("CMAKE_BUILD_TYPE", None, FLAGS.build_type),
+        cmake_core_arg("CMAKE_INSTALL_PREFIX", "PATH", install_dir),
+        cmake_core_arg("TRITON_VERSION", "STRING", FLAGS.version),
+        cmake_core_arg("TRITON_REPO_ORGANIZATION", "STRING", FLAGS.github_organization),
+        cmake_core_arg("TRITON_COMMON_REPO_TAG", "STRING", components["common"]),
+        cmake_core_arg("TRITON_CORE_REPO_TAG", "STRING", components["core"]),
+        cmake_core_arg("TRITON_BACKEND_REPO_TAG", "STRING", components["backend"]),
+        cmake_core_arg(
+            "TRITON_THIRD_PARTY_REPO_TAG", "STRING", components["thirdparty"]
+        ),
+    ]
+
+    cargs.append(cmake_core_enable("TRITON_ENABLE_LOGGING", FLAGS.enable_logging))
+    cargs.append(cmake_core_enable("TRITON_ENABLE_STATS", FLAGS.enable_stats))
+    cargs.append(cmake_core_enable("TRITON_ENABLE_METRICS", FLAGS.enable_metrics))
+    cargs.append(
+        cmake_core_enable("TRITON_ENABLE_METRICS_GPU", FLAGS.enable_gpu_metrics)
+    )
+    cargs.append(
+        cmake_core_enable("TRITON_ENABLE_METRICS_CPU", FLAGS.enable_cpu_metrics)
+    )
+    cargs.append(cmake_core_enable("TRITON_ENABLE_TRACING", FLAGS.enable_tracing))
+    cargs.append(cmake_core_enable("TRITON_ENABLE_NVTX", FLAGS.enable_nvtx))
+
+    cargs.append(cmake_core_enable("TRITON_ENABLE_GPU", FLAGS.enable_gpu))
+    cargs.append(
+        cmake_core_arg(
+            "TRITON_MIN_COMPUTE_CAPABILITY", None, FLAGS.min_compute_capability
+        )
+    )
+
+    cargs.append(cmake_core_enable("TRITON_ENABLE_MALI_GPU", FLAGS.enable_mali_gpu))
+
+    cargs.append(cmake_core_enable("TRITON_ENABLE_GRPC", "grpc" in FLAGS.endpoint))
+    cargs.append(cmake_core_enable("TRITON_ENABLE_HTTP", "http" in FLAGS.endpoint))
+    cargs.append(
+        cmake_core_enable("TRITON_ENABLE_SAGEMAKER", "sagemaker" in FLAGS.endpoint)
+    )
+    cargs.append(
+        cmake_core_enable("TRITON_ENABLE_VERTEX_AI", "vertex-ai" in FLAGS.endpoint)
+    )
+
+    cargs.append(cmake_core_enable("TRITON_ENABLE_GCS", "gcs" in FLAGS.filesystem))
+    cargs.append(cmake_core_enable("TRITON_ENABLE_S3", "s3" in FLAGS.filesystem))
+    cargs.append(
+        cmake_core_enable(
+            "TRITON_ENABLE_AZURE_STORAGE", "azure_storage" in FLAGS.filesystem
+        )
+    )
+
+    cargs.append(cmake_core_enable("TRITON_ENABLE_ENSEMBLE", "ensemble" in backends))
+    cargs.append(cmake_core_enable("TRITON_ENABLE_TENSORRT", "tensorrt" in backends))
+
+    cargs += cmake_core_extra_args()
+    cargs.append(cmake_dir)
+    return cargs
+
+
+def repoagent_repo(ra):
+    return "{}_repository_agent".format(ra)
+
+
+def repoagent_cmake_args(images, components, ra, install_dir):
+    args = []
+
+    cargs = args + [
+        cmake_repoagent_arg("CMAKE_BUILD_TYPE", None, FLAGS.build_type),
+        cmake_repoagent_arg("CMAKE_INSTALL_PREFIX", "PATH", install_dir),
+        cmake_repoagent_arg(
+            "TRITON_REPO_ORGANIZATION", "STRING", FLAGS.github_organization
+        ),
+        cmake_repoagent_arg("TRITON_COMMON_REPO_TAG", "STRING", components["common"]),
+        cmake_repoagent_arg("TRITON_CORE_REPO_TAG", "STRING", components["core"]),
+    ]
+
+    cargs.append(cmake_repoagent_enable("TRITON_ENABLE_GPU", FLAGS.enable_gpu))
+    cargs += cmake_repoagent_extra_args()
+    cargs.append("..")
+    return cargs
+
+
+def cache_repo(cache):
+    # example: "local", or "redis"
+    return "{}_cache".format(cache)
+
+
+def cache_cmake_args(images, components, cache, install_dir):
+    args = []
+
+    cargs = args + [
+        cmake_cache_arg("CMAKE_BUILD_TYPE", None, FLAGS.build_type),
+        cmake_cache_arg("CMAKE_INSTALL_PREFIX", "PATH", install_dir),
+        cmake_cache_arg(
+            "TRITON_REPO_ORGANIZATION", "STRING", FLAGS.github_organization
+        ),
+        cmake_cache_arg("TRITON_COMMON_REPO_TAG", "STRING", components["common"]),
+        cmake_cache_arg("TRITON_CORE_REPO_TAG", "STRING", components["core"]),
+    ]
+
+    cargs.append(cmake_cache_enable("TRITON_ENABLE_GPU", FLAGS.enable_gpu))
+    cargs += cmake_cache_extra_args()
+    cargs.append("..")
+    return cargs
+
+
+def backend_repo(be):
+    return "{}_backend".format(be)
+
+
+def backend_cmake_args(images, components, be, install_dir, library_paths):
+    cmake_build_type = FLAGS.build_type
+
+    if be == "onnxruntime":
+        args = onnxruntime_cmake_args(images, library_paths)
+    elif be == "openvino":
+        args = openvino_cmake_args()
+    elif be == "tensorflow":
+        args = tensorflow_cmake_args(images, library_paths)
+    elif be == "python":
+        args = []
+    elif be == "dali":
+        args = dali_cmake_args()
+    elif be == "pytorch":
+        args = pytorch_cmake_args(images)
+    elif be == "armnn_tflite":
+        args = armnn_tflite_cmake_args()
+    elif be == "fil":
+        args = fil_cmake_args(images)
+        # DLIS-4618: FIL backend fails debug build, so override it for now.
+        cmake_build_type = "Release"
+    elif be == "fastertransformer":
+        args = fastertransformer_cmake_args()
+    elif be == "tensorrt":
+        args = tensorrt_cmake_args()
+    elif be == "tensorrtllm":
+        args = tensorrtllm_cmake_args(images)
+    else:
+        args = []
+
+    cargs = args + [
+        cmake_backend_arg(be, "CMAKE_BUILD_TYPE", None, cmake_build_type),
+        cmake_backend_arg(be, "CMAKE_INSTALL_PREFIX", "PATH", install_dir),
+        cmake_backend_arg(
+            be, "TRITON_REPO_ORGANIZATION", "STRING", FLAGS.github_organization
+        ),
+        cmake_backend_arg(be, "TRITON_COMMON_REPO_TAG", "STRING", components["common"]),
+        cmake_backend_arg(be, "TRITON_CORE_REPO_TAG", "STRING", components["core"]),
+        cmake_backend_arg(
+            be, "TRITON_BACKEND_REPO_TAG", "STRING", components["backend"]
+        ),
+    ]
+
+    cargs.append(cmake_backend_enable(be, "TRITON_ENABLE_GPU", FLAGS.enable_gpu))
+    cargs.append(
+        cmake_backend_enable(be, "TRITON_ENABLE_MALI_GPU", FLAGS.enable_mali_gpu)
+    )
+    cargs.append(cmake_backend_enable(be, "TRITON_ENABLE_STATS", FLAGS.enable_stats))
+    cargs.append(
+        cmake_backend_enable(be, "TRITON_ENABLE_METRICS", FLAGS.enable_metrics)
+    )
+
+    # [DLIS-4950] always enable below once Windows image is updated with CUPTI
+    # cargs.append(cmake_backend_enable(be, 'TRITON_ENABLE_MEMORY_TRACKER', True))
+    if (target_platform() == "windows") and (not FLAGS.no_container_build):
+        print(
+            "Warning: Detected docker build is used for Windows, backend utility 'device memory tracker' will be disabled due to missing library in CUDA Windows docker image."
+        )
+        cargs.append(cmake_backend_enable(be, "TRITON_ENABLE_MEMORY_TRACKER", False))
+    elif target_platform() == "igpu":
+        print(
+            "Warning: Detected iGPU build, backend utility 'device memory tracker' will be disabled as iGPU doesn't contain required version of the library."
+        )
+        cargs.append(cmake_backend_enable(be, "TRITON_ENABLE_MEMORY_TRACKER", False))
+    elif FLAGS.enable_gpu:
+        cargs.append(cmake_backend_enable(be, "TRITON_ENABLE_MEMORY_TRACKER", True))
+
+    cargs += cmake_backend_extra_args(be)
+    if be == "tensorrtllm":
+        cargs.append("-S ../inflight_batcher_llm -B .")
+
+    else:
+        cargs.append("..")
+    return cargs
+
+
+def pytorch_cmake_args(images):
+    if "pytorch" in images:
+        image = images["pytorch"]
+    else:
+        image = "nvcr.io/nvidia/pytorch:{}-py3".format(FLAGS.upstream_container_version)
+    cargs = [
+        cmake_backend_arg("pytorch", "TRITON_PYTORCH_DOCKER_IMAGE", None, image),
+    ]
+
+    if FLAGS.enable_gpu:
+        cargs.append(
+            cmake_backend_enable("pytorch", "TRITON_PYTORCH_ENABLE_TORCHTRT", True)
+        )
+    cargs.append(
+        cmake_backend_enable("pytorch", "TRITON_ENABLE_NVTX", FLAGS.enable_nvtx)
+    )
+    return cargs
+
+
+def onnxruntime_cmake_args(images, library_paths):
+    cargs = [
+        cmake_backend_arg(
+            "onnxruntime",
+            "TRITON_BUILD_ONNXRUNTIME_VERSION",
+            None,
+            TRITON_VERSION_MAP[FLAGS.version][2],
+        )
+    ]
+
+    # TRITON_ENABLE_GPU is already set for all backends in backend_cmake_args()
+    if FLAGS.enable_gpu:
+        cargs.append(
+            cmake_backend_enable(
+                "onnxruntime", "TRITON_ENABLE_ONNXRUNTIME_TENSORRT", True
+            )
+        )
+
+    if target_platform() == "windows":
+        if "base" in images:
+            cargs.append(
+                cmake_backend_arg(
+                    "onnxruntime", "TRITON_BUILD_CONTAINER", None, images["base"]
+                )
+            )
+    else:
+        if "base" in images:
+            cargs.append(
+                cmake_backend_arg(
+                    "onnxruntime", "TRITON_BUILD_CONTAINER", None, images["base"]
+                )
+            )
+        else:
+            cargs.append(
+                cmake_backend_arg(
+                    "onnxruntime",
+                    "TRITON_BUILD_CONTAINER_VERSION",
+                    None,
+                    TRITON_VERSION_MAP[FLAGS.version][1],
+                )
+            )
+
+        if (target_machine() != "aarch64") and (
+            TRITON_VERSION_MAP[FLAGS.version][3] is not None
+        ):
+            cargs.append(
+                cmake_backend_enable(
+                    "onnxruntime", "TRITON_ENABLE_ONNXRUNTIME_OPENVINO", True
+                )
+            )
+            cargs.append(
+                cmake_backend_arg(
+                    "onnxruntime",
+                    "TRITON_BUILD_ONNXRUNTIME_OPENVINO_VERSION",
+                    None,
+                    TRITON_VERSION_MAP[FLAGS.version][3],
+                )
+            )
+
+        if target_platform() == "igpu":
+            cargs.append(
+                cmake_backend_arg(
+                    "onnxruntime",
+                    "TRITON_BUILD_TARGET_PLATFORM",
+                    None,
+                    target_platform(),
+                )
+            )
+
+    return cargs
+
+
+def openvino_cmake_args():
+    cargs = [
+        cmake_backend_arg(
+            "openvino",
+            "TRITON_BUILD_OPENVINO_VERSION",
+            None,
+            TRITON_VERSION_MAP[FLAGS.version][4],
+        )
+    ]
+    if target_platform() == "windows":
+        if "base" in images:
+            cargs.append(
+                cmake_backend_arg(
+                    "openvino", "TRITON_BUILD_CONTAINER", None, images["base"]
+                )
+            )
+    else:
+        if "base" in images:
+            cargs.append(
+                cmake_backend_arg(
+                    "openvino", "TRITON_BUILD_CONTAINER", None, images["base"]
+                )
+            )
+        else:
+            cargs.append(
+                cmake_backend_arg(
+                    "openvino",
+                    "TRITON_BUILD_CONTAINER_VERSION",
+                    None,
+                    TRITON_VERSION_MAP[FLAGS.version][1],
+                )
+            )
+    return cargs
+
+
+def tensorrt_cmake_args():
+    cargs = [
+        cmake_backend_enable("tensorrt", "TRITON_ENABLE_NVTX", FLAGS.enable_nvtx),
+    ]
+    if target_platform() == "windows":
+        cargs.append(
+            cmake_backend_arg(
+                "tensorrt", "TRITON_TENSORRT_INCLUDE_PATHS", None, "c:/TensorRT/include"
+            )
+        )
+
+    return cargs
+
+
+def tensorflow_cmake_args(images, library_paths):
+    backend_name = "tensorflow"
+    extra_args = []
+
+    # If a specific TF image is specified use it, otherwise pull from NGC.
+    if backend_name in images:
+        image = images[backend_name]
+    else:
+        image = "nvcr.io/nvidia/tensorflow:{}-tf2-py3".format(
+            FLAGS.upstream_container_version
+        )
+    extra_args = [
+        cmake_backend_arg(backend_name, "TRITON_TENSORFLOW_DOCKER_IMAGE", None, image)
+    ]
+    return extra_args
+
+
+def dali_cmake_args():
+    return [
+        cmake_backend_enable("dali", "TRITON_DALI_SKIP_DOWNLOAD", False),
+    ]
+
+
+def fil_cmake_args(images):
+    cargs = [cmake_backend_enable("fil", "TRITON_FIL_DOCKER_BUILD", True)]
+    if "base" in images:
+        cargs.append(
+            cmake_backend_arg("fil", "TRITON_BUILD_CONTAINER", None, images["base"])
+        )
+    else:
+        cargs.append(
+            cmake_backend_arg(
+                "fil",
+                "TRITON_BUILD_CONTAINER_VERSION",
+                None,
+                TRITON_VERSION_MAP[FLAGS.version][1],
+            )
+        )
+
+    return cargs
+
+
+def armnn_tflite_cmake_args():
+    return [
+        cmake_backend_arg("armnn_tflite", "JOBS", None, multiprocessing.cpu_count()),
+    ]
+
+
+def fastertransformer_cmake_args():
+    print("Warning: FasterTransformer backend is not officially supported.")
+    cargs = [
+        cmake_backend_arg(
+            "fastertransformer", "CMAKE_EXPORT_COMPILE_COMMANDS", None, 1
+        ),
+        cmake_backend_arg("fastertransformer", "ENABLE_FP8", None, "OFF"),
+    ]
+    return cargs
+
+
+def tensorrtllm_cmake_args(images):
+    cargs = [
+        cmake_backend_arg(
+            "tensorrtllm",
+            "TRT_LIB_DIR",
+            None,
+            "${TRT_ROOT}/targets/${ARCH}-linux-gnu/lib",
+        ),
+        cmake_backend_arg(
+            "tensorrtllm", "TRT_INCLUDE_DIR", None, "${TRT_ROOT}/include"
+        ),
+    ]
+    cargs.append(cmake_backend_enable("tensorrtllm", "TRITON_BUILD", True))
+    return cargs
+
+
+def install_dcgm_libraries(dcgm_version, target_machine):
+    if dcgm_version == "":
+        fail(
+            "unable to determine default repo-tag, DCGM version not known for {}".format(
+                FLAGS.version
+            )
+        )
+        return ""
+    else:
+        if target_machine == "aarch64":
+            return """
+ENV DCGM_VERSION {}
+# Install DCGM. Steps from https://developer.nvidia.com/dcgm#Downloads
+RUN curl -o /tmp/cuda-keyring.deb \\
+        https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/sbsa/cuda-keyring_1.0-1_all.deb \\
+      && apt install /tmp/cuda-keyring.deb \\
+      && rm /tmp/cuda-keyring.deb \\
+      && apt-get update \\
+      && apt-get install -y datacenter-gpu-manager=1:{}
+""".format(
+                dcgm_version, dcgm_version
+            )
+        else:
+            return """
+ENV DCGM_VERSION {}
+# Install DCGM. Steps from https://developer.nvidia.com/dcgm#Downloads
+RUN curl -o /tmp/cuda-keyring.deb \\
+          https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb \\
+      && apt install /tmp/cuda-keyring.deb \\
+      && rm /tmp/cuda-keyring.deb \\
+      && apt-get update \\
+      && apt-get install -y datacenter-gpu-manager=1:{}
+""".format(
+                dcgm_version, dcgm_version
+            )
+
+
+def create_dockerfile_buildbase(ddir, dockerfile_name, argmap):
+    df = """
+ARG TRITON_VERSION={}
+ARG TRITON_CONTAINER_VERSION={}
+ARG BASE_IMAGE={}
+""".format(
+        argmap["TRITON_VERSION"],
+        argmap["TRITON_CONTAINER_VERSION"],
+        argmap["BASE_IMAGE"],
+    )
+
+    df += """
+FROM ${BASE_IMAGE}
+
+ARG TRITON_VERSION
+ARG TRITON_CONTAINER_VERSION
+"""
+    # Install the windows- or linux-specific buildbase dependencies
+    if target_platform() == "windows":
+        df += """
+SHELL ["cmd", "/S", "/C"]
+"""
+    else:
+        df += """
+# Ensure apt-get won't prompt for selecting options
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Install docker docker buildx
+RUN apt-get update \\
+      && apt-get install -y ca-certificates curl gnupg \\
+      && install -m 0755 -d /etc/apt/keyrings \\
+      && curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg \\
+      && chmod a+r /etc/apt/keyrings/docker.gpg \\
+      && echo \\
+          "deb [arch="$(dpkg --print-architecture)" signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \\
+          "$(. /etc/os-release && echo "$VERSION_CODENAME")" stable" | \\
+          tee /etc/apt/sources.list.d/docker.list > /dev/null \\
+      && apt-get update \\
+      && apt-get install -y docker.io docker-buildx-plugin
+
+# libcurl4-openSSL-dev is needed for GCS
+# python3-dev is needed by Torchvision
+# python3-pip and libarchive-dev is needed by python backend
+# libxml2-dev is needed for Azure Storage
+# scons is needed for armnn_tflite backend build dep
+RUN apt-get update \\
+      && apt-get install -y --no-install-recommends \\
+            ca-certificates \\
+            autoconf \\
+            automake \\
+            build-essential \\
+            git \\
+            gperf \\
+            libre2-dev \\
+            libssl-dev \\
+            libtool \\
+            libcurl4-openssl-dev \\
+            libb64-dev \\
+            libgoogle-perftools-dev \\
+            patchelf \\
+            python3-dev \\
+            python3-pip \\
+            python3-setuptools \\
+            rapidjson-dev \\
+            scons \\
+            software-properties-common \\
+            pkg-config \\
+            unzip \\
+            wget \\
+            zlib1g-dev \\
+            libarchive-dev \\
+            libxml2-dev \\
+            libnuma-dev \\
+            wget \\
+      && rm -rf /var/lib/apt/lists/*
+
+RUN pip3 install --upgrade pip \\
+      && pip3 install --upgrade \\
+          wheel \\
+          setuptools \\
+          docker \\
+          virtualenv
+
+# Install boost version >= 1.78 for boost::span
+# Current libboost-dev apt packages are < 1.78, so install from tar.gz
+RUN wget -O /tmp/boost.tar.gz \\
+          https://archives.boost.io/release/1.80.0/source/boost_1_80_0.tar.gz \\
+      && (cd /tmp && tar xzf boost.tar.gz) \\
+      && mv /tmp/boost_1_80_0/boost /usr/include/boost
+
+# Server build requires recent version of CMake (FetchContent required)
+RUN apt update -q=2 \\
+      && apt install -y gpg wget \\
+      && wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - |  tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null \\
+      && . /etc/os-release \\
+      && echo "deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ $UBUNTU_CODENAME main" | tee /etc/apt/sources.list.d/kitware.list >/dev/null \\
+      && apt-get update -q=2 \\
+      && apt-get install -y --no-install-recommends cmake=3.27.7* cmake-data=3.27.7*
+"""
+
+        if FLAGS.enable_gpu:
+            df += install_dcgm_libraries(argmap["DCGM_VERSION"], target_machine())
+
+    df += """
+ENV TRITON_SERVER_VERSION ${TRITON_VERSION}
+ENV NVIDIA_TRITON_SERVER_VERSION ${TRITON_CONTAINER_VERSION}
+"""
+
+    # Copy in the triton source. We remove existing contents first in
+    # case the FROM container has something there already.
+    if target_platform() == "windows":
+        df += """
+WORKDIR /workspace
+RUN rmdir /S/Q * || exit 0
+COPY . .
+"""
+    else:
+        df += """
+WORKDIR /workspace
+RUN rm -fr *
+COPY . .
+ENTRYPOINT []
+"""
+
+    with open(os.path.join(ddir, dockerfile_name), "w") as dfile:
+        dfile.write(df)
+
+
+def create_dockerfile_cibase(ddir, dockerfile_name, argmap):
+    df = """
+ARG TRITON_VERSION={}
+ARG TRITON_CONTAINER_VERSION={}
+ARG BASE_IMAGE={}
+""".format(
+        argmap["TRITON_VERSION"],
+        argmap["TRITON_CONTAINER_VERSION"],
+        argmap["BASE_IMAGE"],
+    )
+
+    df += """
+FROM ${BASE_IMAGE}
+
+ARG TRITON_VERSION
+ARG TRITON_CONTAINER_VERSION
+
+COPY build/ci /workspace
+
+WORKDIR /workspace
+
+ENV TRITON_SERVER_VERSION ${TRITON_VERSION}
+ENV NVIDIA_TRITON_SERVER_VERSION ${TRITON_CONTAINER_VERSION}
+"""
+
+    with open(os.path.join(ddir, dockerfile_name), "w") as dfile:
+        dfile.write(df)
+
+
+def create_dockerfile_linux(
+    ddir, dockerfile_name, argmap, backends, repoagents, caches, endpoints
+):
+    df = """
+ARG TRITON_VERSION={}
+ARG TRITON_CONTAINER_VERSION={}
+ARG BASE_IMAGE={}
+
+""".format(
+        argmap["TRITON_VERSION"],
+        argmap["TRITON_CONTAINER_VERSION"],
+        argmap["BASE_IMAGE"],
+    )
+
+    # PyTorch and TensorFlow backends need extra CUDA and other
+    # dependencies during runtime that are missing in the CPU-only base container.
+    # These dependencies must be copied from the Triton Min image.
+    if not FLAGS.enable_gpu and (("pytorch" in backends) or ("tensorflow" in backends)):
+        df += """
+############################################################################
+##  Triton Min image
+############################################################################
+FROM {} AS min_container
+
+""".format(
+            argmap["GPU_BASE_IMAGE"]
+        )
+
+    df += """
+############################################################################
+##  Production stage: Create container with just inference server executable
+############################################################################
+FROM ${BASE_IMAGE}
+"""
+
+    df += dockerfile_prepare_container_linux(
+        argmap, backends, FLAGS.enable_gpu, target_machine()
+    )
+
+    df += """
+WORKDIR /opt
+COPY --chown=1000:1000 build/install tritonserver
+
+WORKDIR /opt/tritonserver
+COPY --chown=1000:1000 NVIDIA_Deep_Learning_Container_License.pdf .
+
+"""
+    if not FLAGS.no_core_build:
+        # Add feature labels for SageMaker endpoint
+        if "sagemaker" in endpoints:
+            df += """
+LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
+LABEL com.amazonaws.sagemaker.capabilities.multi-models=true
+COPY --chown=1000:1000 docker/sagemaker/serve /usr/bin/.
+"""
+
+    # This is required since libcublasLt.so is not present during the build
+    # stage of the PyTorch backend
+    if not FLAGS.enable_gpu and ("pytorch" in backends):
+        df += """
+RUN patchelf --add-needed /usr/local/cuda/lib64/stubs/libcublasLt.so.12 backends/pytorch/libtorch_cuda.so
+"""
+    if "tensorrtllm" in backends:
+        df += """
+# Remove TRT contents that are not needed in runtime
+RUN ARCH="$(uname -i)" \\
+      && rm -fr ${TRT_ROOT}/bin ${TRT_ROOT}/targets/${ARCH}-linux-gnu/bin ${TRT_ROOT}/data \\
+      && rm -fr  ${TRT_ROOT}/doc ${TRT_ROOT}/onnx_graphsurgeon ${TRT_ROOT}/python \\
+      && rm -fr ${TRT_ROOT}/samples  ${TRT_ROOT}/targets/${ARCH}-linux-gnu/samples
+
+# Install required packages for TRT-LLM models
+RUN python3 -m pip install --upgrade pip \\
+      && pip3 install transformers
+
+# Uninstall unused nvidia packages
+RUN if pip freeze | grep -q "nvidia.*"; then \\
+        pip freeze | grep "nvidia.*" | xargs pip uninstall -y; \\
+    fi
+RUN pip cache purge
+
+# Drop the static libs
+RUN ARCH="$(uname -i)" \\
+      && rm -f ${TRT_ROOT}/targets/${ARCH}-linux-gnu/lib/libnvinfer*.a \\
+          ${TRT_ROOT}/targets/${ARCH}-linux-gnu/lib/libnvonnxparser_*.a
+
+ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib/:/opt/tritonserver/backends/tensorrtllm:$LD_LIBRARY_PATH
+"""
+    with open(os.path.join(ddir, dockerfile_name), "w") as dfile:
+        dfile.write(df)
+
+
+def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_machine):
+    gpu_enabled = 1 if enable_gpu else 0
+    # Common steps to produce docker images shared by build.py and compose.py.
+    # Sets environment variables, installs dependencies and adds entrypoint
+    df = """
+ARG TRITON_VERSION
+ARG TRITON_CONTAINER_VERSION
+
+ENV TRITON_SERVER_VERSION ${TRITON_VERSION}
+ENV NVIDIA_TRITON_SERVER_VERSION ${TRITON_CONTAINER_VERSION}
+LABEL com.nvidia.tritonserver.version="${TRITON_SERVER_VERSION}"
+
+ENV PATH /opt/tritonserver/bin:${PATH}
+# Remove once https://github.com/openucx/ucx/pull/9148 is available
+# in the min container.
+ENV UCX_MEM_EVENTS no
+"""
+
+    # TODO Remove once the ORT-OpenVINO "Exception while Reading network" is fixed
+    if "onnxruntime" in backends:
+        df += """
+ENV LD_LIBRARY_PATH /opt/tritonserver/backends/onnxruntime:${LD_LIBRARY_PATH}
+"""
+
+    # Necessary for libtorch.so to find correct HPCX libraries
+    if "pytorch" in backends:
+        df += """
+ENV LD_LIBRARY_PATH /opt/hpcx/ucc/lib/:/opt/hpcx/ucx/lib/:${LD_LIBRARY_PATH}
+"""
+
+    backend_dependencies = ""
+    # libgomp1 is needed by both onnxruntime and pytorch backends
+    if ("onnxruntime" in backends) or ("pytorch" in backends):
+        backend_dependencies = "libgomp1"
+
+    # libgfortran5 is needed by pytorch backend on ARM
+    if ("pytorch" in backends) and (target_machine == "aarch64"):
+        backend_dependencies += " libgfortran5"
+    # openssh-server is needed for fastertransformer
+    if "fastertransformer" in backends:
+        backend_dependencies += " openssh-server"
+
+    df += """
+ENV TF_ADJUST_HUE_FUSED         1
+ENV TF_ADJUST_SATURATION_FUSED  1
+ENV TF_ENABLE_WINOGRAD_NONFUSED 1
+ENV TF_AUTOTUNE_THRESHOLD       2
+ENV TRITON_SERVER_GPU_ENABLED    {gpu_enabled}
+
+# Create a user that can be used to run triton as
+# non-root. Make sure that this user to given ID 1000. All server
+# artifacts copied below are assign to this user.
+ENV TRITON_SERVER_USER=triton-server
+RUN userdel tensorrt-server > /dev/null 2>&1 || true \\
+      && if ! id -u $TRITON_SERVER_USER > /dev/null 2>&1 ; then \\
+          useradd $TRITON_SERVER_USER; \\
+        fi \\
+      && [ `id -u $TRITON_SERVER_USER` -eq 1000 ] \\
+      && [ `id -g $TRITON_SERVER_USER` -eq 1000 ]
+
+# Ensure apt-get won't prompt for selecting options
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Common dependencies. FIXME (can any of these be conditional? For
+# example libcurl only needed for GCS?)
+RUN apt-get update \\
+      && apt-get install -y --no-install-recommends \\
+              clang \\
+              curl \\
+              dirmngr \\
+              git \\
+              gperf \\
+              libb64-0d \\
+              libcurl4-openssl-dev \\
+              libgoogle-perftools-dev \\
+              libjemalloc-dev \\
+              libnuma-dev \\
+              libre2-9 \\
+              software-properties-common \\
+              wget \\
+              {backend_dependencies} \\
+      && rm -rf /var/lib/apt/lists/*
+
+# Set TCMALLOC_RELEASE_RATE for users setting LD_PRELOAD with tcmalloc
+ENV TCMALLOC_RELEASE_RATE 200
+""".format(
+        gpu_enabled=gpu_enabled, backend_dependencies=backend_dependencies
+    )
+
+    if "fastertransformer" in backends:
+        be = "fastertransformer"
+        url = "https://raw.githubusercontent.com/triton-inference-server/fastertransformer_backend/{}/docker/create_dockerfile_and_build.py".format(
+            backends[be]
+        )
+        response = requests.get(url)
+        spec = importlib.util.spec_from_loader(
+            "fastertransformer_buildscript", loader=None, origin=url
+        )
+        fastertransformer_buildscript = importlib.util.module_from_spec(spec)
+        exec(response.content, fastertransformer_buildscript.__dict__)
+        df += fastertransformer_buildscript.create_postbuild(is_multistage_build=False)
+
+    if enable_gpu:
+        df += install_dcgm_libraries(argmap["DCGM_VERSION"], target_machine)
+        df += """
+# Extra defensive wiring for CUDA Compat lib
+RUN ln -sf ${_CUDA_COMPAT_PATH}/lib.real ${_CUDA_COMPAT_PATH}/lib \\
+      && echo ${_CUDA_COMPAT_PATH}/lib > /etc/ld.so.conf.d/00-cuda-compat.conf \\
+      && ldconfig \\
+      && rm -f ${_CUDA_COMPAT_PATH}/lib
+"""
+    else:
+        df += add_cpu_libs_to_linux_dockerfile(backends, target_machine)
+
+    # Add dependencies needed for python backend
+    if "python" in backends:
+        df += """
+# python3, python3-pip and some pip installs required for the python backend
+RUN apt-get update \\
+      && apt-get install -y --no-install-recommends \\
+            python3 \\
+            libarchive-dev \\
+            python3-pip \\
+            libpython3-dev \\
+      && pip3 install --upgrade pip \\
+      && pip3 install --upgrade \\
+            wheel \\
+            setuptools \\
+            numpy \\
+            virtualenv \\
+      && rm -rf /var/lib/apt/lists/*
+"""
+
+    if "vllm" in backends:
+        df += """
+# vLLM needed for vLLM backend
+RUN pip3 install vllm=={}
+""".format(
+            TRITON_VERSION_MAP[FLAGS.version][6]
+        )
+
+    df += """
+WORKDIR /opt/tritonserver
+RUN rm -fr /opt/tritonserver/*
+ENV NVIDIA_PRODUCT_NAME="Triton Server"
+COPY docker/entrypoint.d/ /opt/nvidia/entrypoint.d/
+"""
+
+    # The CPU-only build uses ubuntu as the base image, and so the
+    # entrypoint files are not available in /opt/nvidia in the base
+    # image, so we must provide them ourselves.
+    if not enable_gpu:
+        df += """
+COPY docker/cpu_only/ /opt/nvidia/
+ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
+"""
+
+    df += """
+ENV NVIDIA_BUILD_ID {}
+LABEL com.nvidia.build.id={}
+LABEL com.nvidia.build.ref={}
+""".format(
+        argmap["NVIDIA_BUILD_ID"], argmap["NVIDIA_BUILD_ID"], argmap["NVIDIA_BUILD_REF"]
+    )
+
+    return df
+
+
+def add_cpu_libs_to_linux_dockerfile(backends, target_machine):
+    df = ""
+    libs_arch = "aarch64" if target_machine == "aarch64" else "x86_64"
+    if "pytorch" in backends:
+        # Add extra dependencies for pytorch backend.
+        # Note: Even though the build is CPU-only, the version of pytorch
+        # we are using depend upon libraries like cuda and cudnn. Since
+        # these dependencies are not present in the ubuntu base image,
+        # we must copy these from the Triton min container ourselves.
+        cuda_arch = "sbsa" if target_machine == "aarch64" else "x86_64"
+        df += """
+RUN mkdir -p /usr/local/cuda/lib64/stubs
+COPY --from=min_container /usr/local/cuda/lib64/stubs/libcusparse.so /usr/local/cuda/lib64/stubs/libcusparse.so.12
+COPY --from=min_container /usr/local/cuda/lib64/stubs/libcusolver.so /usr/local/cuda/lib64/stubs/libcusolver.so.11
+COPY --from=min_container /usr/local/cuda/lib64/stubs/libcurand.so /usr/local/cuda/lib64/stubs/libcurand.so.10
+COPY --from=min_container /usr/local/cuda/lib64/stubs/libcufft.so /usr/local/cuda/lib64/stubs/libcufft.so.11
+COPY --from=min_container /usr/local/cuda/lib64/stubs/libcublas.so /usr/local/cuda/lib64/stubs/libcublas.so.12
+COPY --from=min_container /usr/local/cuda/lib64/stubs/libcublasLt.so /usr/local/cuda/lib64/stubs/libcublasLt.so.12
+COPY --from=min_container /usr/local/cuda/lib64/stubs/libcublasLt.so /usr/local/cuda/lib64/stubs/libcublasLt.so.11
+
+RUN mkdir -p /usr/local/cuda/targets/{cuda_arch}-linux/lib
+COPY --from=min_container /usr/local/cuda/lib64/libcudart.so.12 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
+COPY --from=min_container /usr/local/cuda/lib64/libcupti.so.12 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
+COPY --from=min_container /usr/local/cuda/lib64/libnvToolsExt.so.1 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
+COPY --from=min_container /usr/local/cuda/lib64/libnvJitLink.so.12 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
+
+RUN mkdir -p /opt/hpcx/ucc/lib/ /opt/hpcx/ucx/lib/
+COPY --from=min_container /opt/hpcx/ucc/lib/libucc.so.1 /opt/hpcx/ucc/lib/libucc.so.1
+COPY --from=min_container /opt/hpcx/ucx/lib/libucm.so.0 /opt/hpcx/ucx/lib/libucm.so.0
+COPY --from=min_container /opt/hpcx/ucx/lib/libucp.so.0 /opt/hpcx/ucx/lib/libucp.so.0
+COPY --from=min_container /opt/hpcx/ucx/lib/libucs.so.0 /opt/hpcx/ucx/lib/libucs.so.0
+COPY --from=min_container /opt/hpcx/ucx/lib/libuct.so.0 /opt/hpcx/ucx/lib/libuct.so.0
+
+COPY --from=min_container /usr/lib/{libs_arch}-linux-gnu/libcudnn.so.9 /usr/lib/{libs_arch}-linux-gnu/libcudnn.so.9
+
+# patchelf is needed to add deps of libcublasLt.so.12 to libtorch_cuda.so
+RUN apt-get update \\
+      && apt-get install -y --no-install-recommends openmpi-bin patchelf
+
+ENV LD_LIBRARY_PATH /usr/local/cuda/targets/{cuda_arch}-linux/lib:/usr/local/cuda/lib64/stubs:${{LD_LIBRARY_PATH}}
+""".format(
+            cuda_arch=cuda_arch, libs_arch=libs_arch
+        )
+
+    if ("pytorch" in backends) or ("tensorflow" in backends):
+        # Add NCCL dependency for tensorflow/pytorch backend.
+        # Note: Even though the build is CPU-only, the version of
+        # tensorflow/pytorch we are using depends upon the NCCL library.
+        # Since this dependency is not present in the ubuntu base image,
+        # we must copy it from the Triton min container ourselves.
+        df += """
+COPY --from=min_container /usr/lib/{libs_arch}-linux-gnu/libnccl.so.2 /usr/lib/{libs_arch}-linux-gnu/libnccl.so.2
+""".format(
+            libs_arch=libs_arch
+        )
+
+    return df
+
+
+def create_dockerfile_windows(
+    ddir, dockerfile_name, argmap, backends, repoagents, caches
+):
+    df = """
+ARG TRITON_VERSION={}
+ARG TRITON_CONTAINER_VERSION={}
+ARG BASE_IMAGE={}
+
+############################################################################
+##  Production stage: Create container with just inference server executable
+############################################################################
+FROM ${{BASE_IMAGE}}
+
+ARG TRITON_VERSION
+ARG TRITON_CONTAINER_VERSION
+
+ENV TRITON_SERVER_VERSION ${{TRITON_VERSION}}
+ENV NVIDIA_TRITON_SERVER_VERSION ${{TRITON_CONTAINER_VERSION}}
+LABEL com.nvidia.tritonserver.version="${{TRITON_SERVER_VERSION}}"
+
+RUN setx path "%path%;C:\opt\tritonserver\bin"
+
+""".format(
+        argmap["TRITON_VERSION"],
+        argmap["TRITON_CONTAINER_VERSION"],
+        argmap["BASE_IMAGE"],
+    )
+    df += """
+WORKDIR /opt
+RUN rmdir /S/Q tritonserver || exit 0
+COPY --chown=1000:1000 build/install tritonserver
+
+WORKDIR /opt/tritonserver
+COPY --chown=1000:1000 NVIDIA_Deep_Learning_Container_License.pdf .
+
+"""
+    df += """
+ENTRYPOINT []
+ENV NVIDIA_BUILD_ID {}
+LABEL com.nvidia.build.id={}
+LABEL com.nvidia.build.ref={}
+""".format(
+        argmap["NVIDIA_BUILD_ID"], argmap["NVIDIA_BUILD_ID"], argmap["NVIDIA_BUILD_REF"]
+    )
+
+    with open(os.path.join(ddir, dockerfile_name), "w") as dfile:
+        dfile.write(df)
+
+
+def create_build_dockerfiles(
+    container_build_dir, images, backends, repoagents, caches, endpoints
+):
+    if "base" in images:
+        base_image = images["base"]
+    elif target_platform() == "windows":
+        base_image = "mcr.microsoft.com/dotnet/framework/sdk:4.8"
+    elif FLAGS.enable_gpu:
+        base_image = "nvcr.io/nvidia/tritonserver:{}-py3-min".format(
+            FLAGS.upstream_container_version
+        )
+    else:
+        base_image = "ubuntu:22.04"
+
+    dockerfileargmap = {
+        "NVIDIA_BUILD_REF": "" if FLAGS.build_sha is None else FLAGS.build_sha,
+        "NVIDIA_BUILD_ID": "<unknown>" if FLAGS.build_id is None else FLAGS.build_id,
+        "TRITON_VERSION": FLAGS.version,
+        "TRITON_CONTAINER_VERSION": FLAGS.container_version,
+        "BASE_IMAGE": base_image,
+        "DCGM_VERSION": ""
+        if FLAGS.version is None or FLAGS.version not in TRITON_VERSION_MAP
+        else TRITON_VERSION_MAP[FLAGS.version][5],
+    }
+
+    # For CPU-only image we need to copy some cuda libraries and dependencies
+    # since we are using PyTorch and TensorFlow containers that
+    # are not CPU-only.
+    if (
+        not FLAGS.enable_gpu
+        and (("pytorch" in backends) or ("tensorflow" in backends))
+        and (target_platform() != "windows")
+    ):
+        if "gpu-base" in images:
+            gpu_base_image = images["gpu-base"]
+        else:
+            gpu_base_image = "nvcr.io/nvidia/tritonserver:{}-py3-min".format(
+                FLAGS.upstream_container_version
+            )
+        dockerfileargmap["GPU_BASE_IMAGE"] = gpu_base_image
+
+    create_dockerfile_buildbase(
+        FLAGS.build_dir, "Dockerfile.buildbase", dockerfileargmap
+    )
+
+    if target_platform() == "windows":
+        create_dockerfile_windows(
+            FLAGS.build_dir,
+            "Dockerfile",
+            dockerfileargmap,
+            backends,
+            repoagents,
+            caches,
+        )
+    else:
+        create_dockerfile_linux(
+            FLAGS.build_dir,
+            "Dockerfile",
+            dockerfileargmap,
+            backends,
+            repoagents,
+            caches,
+            endpoints,
+        )
+
+    # Dockerfile used for the creating the CI base image.
+    create_dockerfile_cibase(FLAGS.build_dir, "Dockerfile.cibase", dockerfileargmap)
+
+
+def create_docker_build_script(script_name, container_install_dir, container_ci_dir):
+    with BuildScript(
+        os.path.join(FLAGS.build_dir, script_name),
+        verbose=FLAGS.verbose,
+        desc=("Docker-based build script for Triton Inference Server"),
+    ) as docker_script:
+        #
+        # Build base image... tritonserver_buildbase
+        #
+        docker_script.commentln(8)
+        docker_script.comment("Create Triton base build image")
+        docker_script.comment(
+            "This image contains all dependencies necessary to build Triton"
+        )
+        docker_script.comment()
+
+        cachefrommap = [
+            "tritonserver_buildbase",
+            "tritonserver_buildbase_cache0",
+            "tritonserver_buildbase_cache1",
+        ]
+
+        baseargs = [
+            "docker",
+            "build",
+            "-t",
+            "tritonserver_buildbase",
+            "-f",
+            os.path.join(FLAGS.build_dir, "Dockerfile.buildbase"),
+        ]
+
+        if not FLAGS.no_container_pull:
+            baseargs += [
+                "--pull",
+            ]
+
+        # Windows docker runs in a VM and memory needs to be specified
+        # explicitly (at least for some configurations of docker).
+        if target_platform() == "windows":
+            if FLAGS.container_memory:
+                baseargs += ["--memory", FLAGS.container_memory]
+
+        baseargs += ["--cache-from={}".format(k) for k in cachefrommap]
+        baseargs += ["."]
+
+        docker_script.cwd(THIS_SCRIPT_DIR)
+        docker_script.cmd(baseargs, check_exitcode=True)
+
+        #
+        # Build...
+        #
+        docker_script.blankln()
+        docker_script.commentln(8)
+        docker_script.comment("Run build in tritonserver_buildbase container")
+        docker_script.comment("Mount a directory into the container where the install")
+        docker_script.comment("artifacts will be placed.")
+        docker_script.comment()
+
+        # Don't use '-v' to communicate the built artifacts out of the
+        # build, because we want this code to work even if run within
+        # Docker (i.e. docker-in-docker) and not just if run directly
+        # from host.
+        runargs = [
+            "docker",
+            "run",
+            "-w",
+            "/workspace/build",
+            "--name",
+            "tritonserver_builder",
+        ]
+
+        if not FLAGS.no_container_interactive:
+            runargs += ["-it"]
+
+        if target_platform() == "windows":
+            if FLAGS.container_memory:
+                runargs += ["--memory", FLAGS.container_memory]
+            runargs += ["-v", "\\\\.\pipe\docker_engine:\\\\.\pipe\docker_engine"]
+        else:
+            runargs += ["-v", "/var/run/docker.sock:/var/run/docker.sock"]
+
+        runargs += ["tritonserver_buildbase"]
+
+        if target_platform() == "windows":
+            runargs += ["powershell.exe", "-noexit", "-File", "./cmake_build.ps1"]
+        else:
+            runargs += ["./cmake_build"]
+
+        # Remove existing tritonserver_builder container...
+        if target_platform() == "windows":
+            docker_script.cmd(["docker", "rm", "tritonserver_builder"])
+        else:
+            docker_script._file.write(
+                'if [ "$(docker ps -a | grep tritonserver_builder)" ]; then  docker rm -f tritonserver_builder; fi\n'
+            )
+
+        docker_script.cmd(runargs, check_exitcode=True)
+
+        docker_script.cmd(
+            [
+                "docker",
+                "cp",
+                "tritonserver_builder:/tmp/tritonbuild/install",
+                FLAGS.build_dir,
+            ],
+            check_exitcode=True,
+        )
+        docker_script.cmd(
+            [
+                "docker",
+                "cp",
+                "tritonserver_builder:/tmp/tritonbuild/ci",
+                FLAGS.build_dir,
+            ],
+            check_exitcode=True,
+        )
+
+        #
+        # Final image... tritonserver
+        #
+        docker_script.blankln()
+        docker_script.commentln(8)
+        docker_script.comment("Create final tritonserver image")
+        docker_script.comment()
+
+        finalargs = [
+            "docker",
+            "build",
+            "-t",
+            "tritonserver",
+            "-f",
+            os.path.join(FLAGS.build_dir, "Dockerfile"),
+            ".",
+        ]
+
+        docker_script.cwd(THIS_SCRIPT_DIR)
+        docker_script.cmd(finalargs, check_exitcode=True)
+
+        #
+        # CI base image... tritonserver_cibase
+        #
+        docker_script.blankln()
+        docker_script.commentln(8)
+        docker_script.comment("Create CI base image")
+        docker_script.comment()
+
+        cibaseargs = [
+            "docker",
+            "build",
+            "-t",
+            "tritonserver_cibase",
+            "-f",
+            os.path.join(FLAGS.build_dir, "Dockerfile.cibase"),
+            ".",
+        ]
+
+        docker_script.cwd(THIS_SCRIPT_DIR)
+        docker_script.cmd(cibaseargs, check_exitcode=True)
+
+
+def core_build(
+    cmake_script, repo_dir, cmake_dir, build_dir, install_dir, components, backends
+):
+    repo_build_dir = os.path.join(build_dir, "tritonserver", "build")
+    repo_install_dir = os.path.join(build_dir, "tritonserver", "install")
+
+    cmake_script.commentln(8)
+    cmake_script.comment("Triton core library and tritonserver executable")
+    cmake_script.comment()
+    cmake_script.mkdir(repo_build_dir)
+    cmake_script.cwd(repo_build_dir)
+    cmake_script.cmake(
+        core_cmake_args(components, backends, cmake_dir, repo_install_dir)
+    )
+    cmake_script.makeinstall()
+
+    if target_platform() == "windows":
+        cmake_script.mkdir(os.path.join(install_dir, "bin"))
+        cmake_script.cp(
+            os.path.join(repo_install_dir, "bin", "tritonserver.exe"),
+            os.path.join(install_dir, "bin"),
+        )
+        cmake_script.cp(
+            os.path.join(repo_install_dir, "bin", "tritonserver.dll"),
+            os.path.join(install_dir, "bin"),
+        )
+    else:
+        cmake_script.mkdir(os.path.join(install_dir, "bin"))
+        cmake_script.cp(
+            os.path.join(repo_install_dir, "bin", "tritonserver"),
+            os.path.join(install_dir, "bin"),
+        )
+        cmake_script.mkdir(os.path.join(install_dir, "lib"))
+        cmake_script.cp(
+            os.path.join(repo_install_dir, "lib", "libtritonserver.so"),
+            os.path.join(install_dir, "lib"),
+        )
+    # [FIXME] Placing the Triton server wheel file in 'python' for now, should
+    # have been upload to pip registry and be able to install directly
+    cmake_script.mkdir(os.path.join(install_dir, "python"))
+    cmake_script.cp(
+        os.path.join(repo_install_dir, "python", "tritonserver*.whl"),
+        os.path.join(install_dir, "python"),
+    )
+
+    cmake_script.mkdir(os.path.join(install_dir, "include", "triton"))
+    cmake_script.cpdir(
+        os.path.join(repo_install_dir, "include", "triton", "core"),
+        os.path.join(install_dir, "include", "triton", "core"),
+    )
+
+    cmake_script.cp(os.path.join(repo_dir, "LICENSE"), install_dir)
+    cmake_script.cp(os.path.join(repo_dir, "TRITON_VERSION"), install_dir)
+
+    # If requested, package the source code for all OSS used to build
+    # For windows, Triton is not delivered as a container so skip for
+    # windows platform.
+    if target_platform() != "windows":
+        if (
+            (not FLAGS.no_container_build)
+            and (not FLAGS.no_core_build)
+            and (not FLAGS.no_container_source)
+        ):
+            cmake_script.mkdir(os.path.join(install_dir, "third-party-src"))
+            cmake_script.cwd(repo_build_dir)
+            cmake_script.tar(
+                "third-party-src",
+                os.path.join(install_dir, "third-party-src", "src.tar.gz"),
+            )
+            cmake_script.cp(
+                os.path.join(repo_dir, "docker", "README.third-party-src"),
+                os.path.join(install_dir, "third-party-src", "README"),
+            )
+
+    cmake_script.comment()
+    cmake_script.comment("end Triton core library and tritonserver executable")
+    cmake_script.commentln(8)
+    cmake_script.blankln()
+
+
+def tensorrtllm_prebuild(cmake_script):
+    # Export the TRT_ROOT environment variable
+    cmake_script.cmd("export TRT_ROOT=/usr/local/tensorrt")
+    cmake_script.cmd("export ARCH=$(uname -m)")
+
+
+def backend_build(
+    be,
+    cmake_script,
+    tag,
+    build_dir,
+    install_dir,
+    github_organization,
+    images,
+    components,
+    library_paths,
+):
+    repo_build_dir = os.path.join(build_dir, be, "build")
+    repo_install_dir = os.path.join(build_dir, be, "install")
+
+    cmake_script.commentln(8)
+    cmake_script.comment(f"'{be}' backend")
+    cmake_script.comment("Delete this section to remove backend from build")
+    cmake_script.comment()
+    cmake_script.mkdir(build_dir)
+    cmake_script.cwd(build_dir)
+    cmake_script.gitclone(backend_repo(be), tag, be, github_organization)
+
+    if be == "tensorrtllm":
+        tensorrtllm_prebuild(cmake_script)
+
+    cmake_script.mkdir(repo_build_dir)
+    cmake_script.cwd(repo_build_dir)
+    cmake_script.cmake(
+        backend_cmake_args(images, components, be, repo_install_dir, library_paths)
+    )
+    cmake_script.makeinstall()
+
+    cmake_script.mkdir(os.path.join(install_dir, "backends"))
+    cmake_script.rmdir(os.path.join(install_dir, "backends", be))
+
+    cmake_script.cpdir(
+        os.path.join(repo_install_dir, "backends", be),
+        os.path.join(install_dir, "backends"),
+    )
+
+    cmake_script.comment()
+    cmake_script.comment(f"end '{be}' backend")
+    cmake_script.commentln(8)
+    cmake_script.blankln()
+
+
+def backend_clone(
+    be,
+    clone_script,
+    tag,
+    build_dir,
+    install_dir,
+    github_organization,
+):
+    clone_script.commentln(8)
+    clone_script.comment(f"'{be}' backend")
+    clone_script.comment("Delete this section to remove backend from build")
+    clone_script.comment()
+    clone_script.mkdir(build_dir)
+    clone_script.cwd(build_dir)
+    clone_script.gitclone(backend_repo(be), tag, be, github_organization)
+
+    repo_target_dir = os.path.join(install_dir, "backends")
+    clone_script.mkdir(repo_target_dir)
+    backend_dir = os.path.join(repo_target_dir, be)
+    clone_script.rmdir(backend_dir)
+    clone_script.mkdir(backend_dir)
+
+    clone_script.cp(
+        os.path.join(build_dir, be, "src", "model.py"),
+        backend_dir,
+    )
+
+    clone_script.comment()
+    clone_script.comment(f"end '{be}' backend")
+    clone_script.commentln(8)
+    clone_script.blankln()
+
+
+def repo_agent_build(
+    ra, cmake_script, build_dir, install_dir, repoagent_repo, repoagents
+):
+    repo_build_dir = os.path.join(build_dir, ra, "build")
+    repo_install_dir = os.path.join(build_dir, ra, "install")
+
+    cmake_script.commentln(8)
+    cmake_script.comment(f"'{ra}' repository agent")
+    cmake_script.comment("Delete this section to remove repository agent from build")
+    cmake_script.comment()
+    cmake_script.mkdir(build_dir)
+    cmake_script.cwd(build_dir)
+    cmake_script.gitclone(
+        repoagent_repo(ra), repoagents[ra], ra, FLAGS.github_organization
+    )
+
+    cmake_script.mkdir(repo_build_dir)
+    cmake_script.cwd(repo_build_dir)
+    cmake_script.cmake(repoagent_cmake_args(images, components, ra, repo_install_dir))
+    cmake_script.makeinstall()
+
+    cmake_script.mkdir(os.path.join(install_dir, "repoagents"))
+    cmake_script.rmdir(os.path.join(install_dir, "repoagents", ra))
+    cmake_script.cpdir(
+        os.path.join(repo_install_dir, "repoagents", ra),
+        os.path.join(install_dir, "repoagents"),
+    )
+    cmake_script.comment()
+    cmake_script.comment(f"end '{ra}' repository agent")
+    cmake_script.commentln(8)
+    cmake_script.blankln()
+
+
+def cache_build(cache, cmake_script, build_dir, install_dir, cache_repo, caches):
+    repo_build_dir = os.path.join(build_dir, cache, "build")
+    repo_install_dir = os.path.join(build_dir, cache, "install")
+
+    cmake_script.commentln(8)
+    cmake_script.comment(f"'{cache}' cache")
+    cmake_script.comment("Delete this section to remove cache from build")
+    cmake_script.comment()
+    cmake_script.mkdir(build_dir)
+    cmake_script.cwd(build_dir)
+    cmake_script.gitclone(
+        cache_repo(cache), caches[cache], cache, FLAGS.github_organization
+    )
+
+    cmake_script.mkdir(repo_build_dir)
+    cmake_script.cwd(repo_build_dir)
+    cmake_script.cmake(cache_cmake_args(images, components, cache, repo_install_dir))
+    cmake_script.makeinstall()
+
+    cmake_script.mkdir(os.path.join(install_dir, "caches"))
+    cmake_script.rmdir(os.path.join(install_dir, "caches", cache))
+    cmake_script.cpdir(
+        os.path.join(repo_install_dir, "caches", cache),
+        os.path.join(install_dir, "caches"),
+    )
+    cmake_script.comment()
+    cmake_script.comment(f"end '{cache}' cache")
+    cmake_script.commentln(8)
+    cmake_script.blankln()
+
+
+def cibase_build(
+    cmake_script, repo_dir, cmake_dir, build_dir, install_dir, ci_dir, backends
+):
+    repo_install_dir = os.path.join(build_dir, "tritonserver", "install")
+
+    cmake_script.commentln(8)
+    cmake_script.comment("Collect Triton CI artifacts")
+    cmake_script.comment()
+
+    cmake_script.mkdir(ci_dir)
+
+    # On windows we are not yet using a CI/QA docker image for
+    # testing, so don't do anything...
+    if target_platform() == "windows":
+        return
+
+    # The core build produces some artifacts that are needed for CI
+    # testing, so include those in the install.
+    cmake_script.cpdir(os.path.join(repo_dir, "qa"), ci_dir)
+    cmake_script.cpdir(os.path.join(repo_dir, "deploy"), ci_dir)
+    cmake_script.mkdir(os.path.join(ci_dir, "docs"))
+    cmake_script.cpdir(
+        os.path.join(repo_dir, "docs", "examples"), os.path.join(ci_dir, "docs")
+    )
+    cmake_script.mkdir(os.path.join(ci_dir, "src", "test"))
+    cmake_script.cpdir(
+        os.path.join(repo_dir, "src", "test", "models"),
+        os.path.join(ci_dir, "src", "test"),
+    )
+    # Skip copying the artifacts in the bin, lib, and python as those directories will
+    # be missing when the core build is not enabled.
+    if not FLAGS.no_core_build:
+        cmake_script.cpdir(os.path.join(repo_install_dir, "bin"), ci_dir)
+        cmake_script.mkdir(os.path.join(ci_dir, "lib"))
+        cmake_script.cp(
+            os.path.join(repo_install_dir, "lib", "libtritonrepoagent_relocation.so"),
+            os.path.join(ci_dir, "lib"),
+        )
+        cmake_script.cpdir(os.path.join(repo_install_dir, "python"), ci_dir)
+
+    # Some of the backends are needed for CI testing
+    cmake_script.mkdir(os.path.join(ci_dir, "backends"))
+    for be in ("identity", "repeat", "square"):
+        be_install_dir = os.path.join(build_dir, be, "install", "backends", be)
+        if target_platform() == "windows":
+            cmake_script.cmd(f"if (Test-Path -Path {be_install_dir}) {{")
+        else:
+            cmake_script.cmd(f"if [[ -e {be_install_dir} ]]; then")
+        cmake_script.cpdir(be_install_dir, os.path.join(ci_dir, "backends"))
+        cmake_script.cmd("}" if target_platform() == "windows" else "fi")
+
+    # Some of the unit-test built backends are needed for CI testing
+    cmake_script.mkdir(os.path.join(ci_dir, "tritonbuild", "tritonserver", "backends"))
+    for be in (
+        "query",
+        "implicit_state",
+        "sequence",
+        "dyna_sequence",
+        "distributed_addsub",
+        "iterative_sequence",
+    ):
+        be_install_dir = os.path.join(repo_install_dir, "backends", be)
+        if target_platform() == "windows":
+            cmake_script.cmd(f"if (Test-Path -Path {be_install_dir}) {{")
+        else:
+            cmake_script.cmd(f"if [[ -e {be_install_dir} ]]; then")
+        cmake_script.cpdir(
+            be_install_dir,
+            os.path.join(ci_dir, "tritonbuild", "tritonserver", "backends"),
+        )
+        cmake_script.cmd("}" if target_platform() == "windows" else "fi")
+
+    # The onnxruntime_backend build produces some artifacts that
+    # are needed for CI testing.
+    if "onnxruntime" in backends:
+        ort_install_dir = os.path.join(build_dir, "onnxruntime", "install")
+        cmake_script.mkdir(os.path.join(ci_dir, "qa", "L0_custom_ops"))
+        if target_platform() != "igpu":
+            cmake_script.cp(
+                os.path.join(ort_install_dir, "test", "libcustom_op_library.so"),
+                os.path.join(ci_dir, "qa", "L0_custom_ops"),
+            )
+            cmake_script.cp(
+                os.path.join(ort_install_dir, "test", "custom_op_test.onnx"),
+                os.path.join(ci_dir, "qa", "L0_custom_ops"),
+            )
+        # [WIP] other way than wildcard?
+        backend_tests = os.path.join(build_dir, "onnxruntime", "test", "*")
+        cmake_script.cpdir(backend_tests, os.path.join(ci_dir, "qa"))
+
+    # Need the build area for some backends so that they can be
+    # rebuilt with specific options.
+    cmake_script.mkdir(os.path.join(ci_dir, "tritonbuild"))
+    for be in ("identity", "python"):
+        if be in backends:
+            cmake_script.rmdir(os.path.join(build_dir, be, "build"))
+            cmake_script.rmdir(os.path.join(build_dir, be, "install"))
+            cmake_script.cpdir(
+                os.path.join(build_dir, be), os.path.join(ci_dir, "tritonbuild")
+            )
+
+    cmake_script.comment()
+    cmake_script.comment("end Triton CI artifacts")
+    cmake_script.commentln(8)
+    cmake_script.blankln()
+
+
+def finalize_build(cmake_script, install_dir, ci_dir):
+    cmake_script.cmd(f"chmod -R a+rw {install_dir}")
+    cmake_script.cmd(f"chmod -R a+rw {ci_dir}")
+
+
+def enable_all():
+    if target_platform() != "windows":
+        all_backends = [
+            "ensemble",
+            "identity",
+            "square",
+            "repeat",
+            "tensorflow",
+            "onnxruntime",
+            "python",
+            "dali",
+            "pytorch",
+            "openvino",
+            "fil",
+            "tensorrt",
+        ]
+        all_repoagents = ["checksum"]
+        all_caches = ["local", "redis"]
+        all_filesystems = ["gcs", "s3", "azure_storage"]
+        all_endpoints = ["http", "grpc", "sagemaker", "vertex-ai"]
+
+        FLAGS.enable_logging = True
+        FLAGS.enable_stats = True
+        FLAGS.enable_metrics = True
+        FLAGS.enable_gpu_metrics = True
+        FLAGS.enable_cpu_metrics = True
+        FLAGS.enable_tracing = True
+        FLAGS.enable_nvtx = True
+        FLAGS.enable_gpu = True
+    else:
+        all_backends = [
+            "ensemble",
+            "identity",
+            "square",
+            "repeat",
+            "onnxruntime",
+            "openvino",
+            "tensorrt",
+        ]
+        all_repoagents = ["checksum"]
+        all_caches = ["local", "redis"]
+        all_filesystems = []
+        all_endpoints = ["http", "grpc"]
+
+        FLAGS.enable_logging = True
+        FLAGS.enable_stats = True
+        FLAGS.enable_tracing = True
+        FLAGS.enable_gpu = True
+
+    requested_backends = []
+    for be in FLAGS.backend:
+        parts = be.split(":")
+        requested_backends += [parts[0]]
+    for be in all_backends:
+        if be not in requested_backends:
+            FLAGS.backend += [be]
+
+    requested_repoagents = []
+    for ra in FLAGS.repoagent:
+        parts = ra.split(":")
+        requested_repoagents += [parts[0]]
+    for ra in all_repoagents:
+        if ra not in requested_repoagents:
+            FLAGS.repoagent += [ra]
+
+    requested_caches = []
+    for cache in FLAGS.cache:
+        parts = cache.split(":")
+        requested_caches += [parts[0]]
+    for cache in all_caches:
+        if cache not in requested_caches:
+            FLAGS.cache += [cache]
+
+    for fs in all_filesystems:
+        if fs not in FLAGS.filesystem:
+            FLAGS.filesystem += [fs]
+
+    for ep in all_endpoints:
+        if ep not in FLAGS.endpoint:
+            FLAGS.endpoint += [ep]
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+
+    group_qv = parser.add_mutually_exclusive_group()
+    group_qv.add_argument(
+        "-q",
+        "--quiet",
+        action="store_true",
+        required=False,
+        help="Disable console output.",
+    )
+    group_qv.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        required=False,
+        help="Enable verbose output.",
+    )
+
+    parser.add_argument(
+        "--dryrun",
+        action="store_true",
+        required=False,
+        help="Output the build scripts, but do not perform build.",
+    )
+    parser.add_argument(
+        "--no-container-build",
+        action="store_true",
+        required=False,
+        help="Do not use Docker container for build.",
+    )
+    parser.add_argument(
+        "--no-container-interactive",
+        action="store_true",
+        required=False,
+        help='Do not use -it argument to "docker run" when performing container build.',
+    )
+    parser.add_argument(
+        "--no-container-pull",
+        action="store_true",
+        required=False,
+        help="Do not use Docker --pull argument when building container.",
+    )
+    parser.add_argument(
+        "--container-memory",
+        default=None,
+        required=False,
+        help="Value for Docker --memory argument. Used only for windows builds.",
+    )
+    parser.add_argument(
+        "--target-platform",
+        required=False,
+        default=None,
+        help='Target platform for build, can be "linux", "windows" or "igpu". If not specified, build targets the current platform.',
+    )
+    parser.add_argument(
+        "--target-machine",
+        required=False,
+        default=None,
+        help="Target machine/architecture for build. If not specified, build targets the current machine/architecture.",
+    )
+
+    parser.add_argument(
+        "--build-id",
+        type=str,
+        required=False,
+        help="Build ID associated with the build.",
+    )
+    parser.add_argument(
+        "--build-sha", type=str, required=False, help="SHA associated with the build."
+    )
+    parser.add_argument(
+        "--build-dir",
+        type=str,
+        required=False,
+        help="Build directory. All repo clones and builds will be performed in this directory.",
+    )
+    parser.add_argument(
+        "--install-dir",
+        type=str,
+        required=False,
+        default=None,
+        help="Install directory, default is <builddir>/opt/tritonserver.",
+    )
+    parser.add_argument(
+        "--cmake-dir",
+        type=str,
+        required=False,
+        help="Directory containing the CMakeLists.txt file for Triton server.",
+    )
+    parser.add_argument(
+        "--tmp-dir",
+        type=str,
+        required=False,
+        default="/tmp",
+        help="Temporary directory used for building inside docker. Default is /tmp.",
+    )
+    parser.add_argument(
+        "--library-paths",
+        action="append",
+        required=False,
+        default=None,
+        help="Specify library paths for respective backends in build as <backend-name>[:<library_path>].",
+    )
+    parser.add_argument(
+        "--build-type",
+        required=False,
+        default="Release",
+        help='Build type, one of "Release", "Debug", "RelWithDebInfo" or "MinSizeRel". Default is "Release".',
+    )
+    parser.add_argument(
+        "-j",
+        "--build-parallel",
+        type=int,
+        required=False,
+        default=None,
+        help="Build parallelism. Defaults to 2 * number-of-cores.",
+    )
+
+    parser.add_argument(
+        "--github-organization",
+        type=str,
+        required=False,
+        default="https://github.com/triton-inference-server",
+        help='The GitHub organization containing the repos used for the build. Defaults to "https://github.com/triton-inference-server".',
+    )
+    parser.add_argument(
+        "--version",
+        type=str,
+        required=False,
+        help="The Triton version. If not specified defaults to the value in the TRITON_VERSION file.",
+    )
+    parser.add_argument(
+        "--container-version",
+        type=str,
+        required=False,
+        help="The Triton container version to build. If not specified the container version will be chosen automatically based on --version value.",
+    )
+    parser.add_argument(
+        "--upstream-container-version",
+        type=str,
+        required=False,
+        help="The upstream container version to use for the build. If not specified the upstream container version will be chosen automatically based on --version value.",
+    )
+    parser.add_argument(
+        "--container-prebuild-command",
+        type=str,
+        required=False,
+        help="When performing a container build, this command will be executed within the container just before the build it performed.",
+    )
+    parser.add_argument(
+        "--no-container-source",
+        action="store_true",
+        required=False,
+        help="Do not include OSS source code in Docker container.",
+    )
+    parser.add_argument(
+        "--image",
+        action="append",
+        required=False,
+        help='Use specified Docker image in build as <image-name>,<full-image-name>. <image-name> can be "base", "gpu-base", "tensorflow", or "pytorch".',
+    )
+
+    parser.add_argument(
+        "--enable-all",
+        action="store_true",
+        required=False,
+        help="Enable all standard released Triton features, backends, repository agents, caches, endpoints and file systems.",
+    )
+    parser.add_argument(
+        "--enable-logging", action="store_true", required=False, help="Enable logging."
+    )
+    parser.add_argument(
+        "--enable-stats",
+        action="store_true",
+        required=False,
+        help="Enable statistics collection.",
+    )
+    parser.add_argument(
+        "--enable-metrics",
+        action="store_true",
+        required=False,
+        help="Enable metrics reporting.",
+    )
+    parser.add_argument(
+        "--enable-gpu-metrics",
+        action="store_true",
+        required=False,
+        help="Include GPU metrics in reported metrics.",
+    )
+    parser.add_argument(
+        "--enable-cpu-metrics",
+        action="store_true",
+        required=False,
+        help="Include CPU metrics in reported metrics.",
+    )
+    parser.add_argument(
+        "--enable-tracing", action="store_true", required=False, help="Enable tracing."
+    )
+    parser.add_argument(
+        "--enable-nvtx", action="store_true", required=False, help="Enable NVTX."
+    )
+    parser.add_argument(
+        "--enable-gpu", action="store_true", required=False, help="Enable GPU support."
+    )
+    parser.add_argument(
+        "--enable-mali-gpu",
+        action="store_true",
+        required=False,
+        help="Enable ARM MALI GPU support.",
+    )
+    parser.add_argument(
+        "--min-compute-capability",
+        type=str,
+        required=False,
+        default="6.0",
+        help="Minimum CUDA compute capability supported by server.",
+    )
+
+    parser.add_argument(
+        "--endpoint",
+        action="append",
+        required=False,
+        help='Include specified endpoint in build. Allowed values are "grpc", "http", "vertex-ai" and "sagemaker".',
+    )
+    parser.add_argument(
+        "--filesystem",
+        action="append",
+        required=False,
+        help='Include specified filesystem in build. Allowed values are "gcs", "azure_storage" and "s3".',
+    )
+    parser.add_argument(
+        "--no-core-build",
+        action="store_true",
+        required=False,
+        help="Do not build Triton core shared library or executable.",
+    )
+    parser.add_argument(
+        "--backend",
+        action="append",
+        required=False,
+        help='Include specified backend in build as <backend-name>[:<repo-tag>]. If <repo-tag> starts with "pull/" then it refers to a pull-request reference, otherwise <repo-tag> indicates the git tag/branch to use for the build. If the version is non-development then the default <repo-tag> is the release branch matching the container version (e.g. version YY.MM -> branch rYY.MM); otherwise the default <repo-tag> is "main" (e.g. version YY.MMdev -> branch main).',
+    )
+    parser.add_argument(
+        "--repo-tag",
+        action="append",
+        required=False,
+        help='The version of a component to use in the build as <component-name>:<repo-tag>. <component-name> can be "common", "core", "backend" or "thirdparty". <repo-tag> indicates the git tag/branch to use for the build. Currently <repo-tag> does not support pull-request reference. If the version is non-development then the default <repo-tag> is the release branch matching the container version (e.g. version YY.MM -> branch rYY.MM); otherwise the default <repo-tag> is "main" (e.g. version YY.MMdev -> branch main).',
+    )
+    parser.add_argument(
+        "--repoagent",
+        action="append",
+        required=False,
+        help='Include specified repo agent in build as <repoagent-name>[:<repo-tag>]. If <repo-tag> starts with "pull/" then it refers to a pull-request reference, otherwise <repo-tag> indicates the git tag/branch to use for the build. If the version is non-development then the default <repo-tag> is the release branch matching the container version (e.g. version YY.MM -> branch rYY.MM); otherwise the default <repo-tag> is "main" (e.g. version YY.MMdev -> branch main).',
+    )
+    parser.add_argument(
+        "--cache",
+        action="append",
+        required=False,
+        help='Include specified cache in build as <cache-name>[:<repo-tag>]. If <repo-tag> starts with "pull/" then it refers to a pull-request reference, otherwise <repo-tag> indicates the git tag/branch to use for the build. If the version is non-development then the default <repo-tag> is the release branch matching the container version (e.g. version YY.MM -> branch rYY.MM); otherwise the default <repo-tag> is "main" (e.g. version YY.MMdev -> branch main).',
+    )
+    parser.add_argument(
+        "--no-force-clone",
+        action="store_true",
+        default=False,
+        help="Do not create fresh clones of repos that have already been cloned.",
+    )
+    parser.add_argument(
+        "--extra-core-cmake-arg",
+        action="append",
+        required=False,
+        help="Extra CMake argument as <name>=<value>. The argument is passed to CMake as -D<name>=<value> and is included after all CMake arguments added by build.py for the core builds.",
+    )
+    parser.add_argument(
+        "--override-core-cmake-arg",
+        action="append",
+        required=False,
+        help="Override specified CMake argument in the build as <name>=<value>. The argument is passed to CMake as -D<name>=<value>. This flag only impacts CMake arguments that are used by build.py. To unconditionally add a CMake argument to the core build use --extra-core-cmake-arg.",
+    )
+    parser.add_argument(
+        "--extra-backend-cmake-arg",
+        action="append",
+        required=False,
+        help="Extra CMake argument for a backend build as <backend>:<name>=<value>. The argument is passed to CMake as -D<name>=<value> and is included after all CMake arguments added by build.py for the backend.",
+    )
+    parser.add_argument(
+        "--override-backend-cmake-arg",
+        action="append",
+        required=False,
+        help="Override specified backend CMake argument in the build as <backend>:<name>=<value>. The argument is passed to CMake as -D<name>=<value>. This flag only impacts CMake arguments that are used by build.py. To unconditionally add a CMake argument to the backend build use --extra-backend-cmake-arg.",
+    )
+
+    FLAGS = parser.parse_args()
+
+    if FLAGS.image is None:
+        FLAGS.image = []
+    if FLAGS.repo_tag is None:
+        FLAGS.repo_tag = []
+    if FLAGS.backend is None:
+        FLAGS.backend = []
+    if FLAGS.endpoint is None:
+        FLAGS.endpoint = []
+    if FLAGS.filesystem is None:
+        FLAGS.filesystem = []
+    if FLAGS.repoagent is None:
+        FLAGS.repoagent = []
+    if FLAGS.cache is None:
+        FLAGS.cache = []
+    if FLAGS.library_paths is None:
+        FLAGS.library_paths = []
+    if FLAGS.extra_core_cmake_arg is None:
+        FLAGS.extra_core_cmake_arg = []
+    if FLAGS.override_core_cmake_arg is None:
+        FLAGS.override_core_cmake_arg = []
+    if FLAGS.override_backend_cmake_arg is None:
+        FLAGS.override_backend_cmake_arg = []
+    if FLAGS.extra_backend_cmake_arg is None:
+        FLAGS.extra_backend_cmake_arg = []
+
+    # if --enable-all is specified, then update FLAGS to enable all
+    # settings, backends, repo-agents, caches, file systems, endpoints, etc.
+    if FLAGS.enable_all:
+        enable_all()
+
+    # When doing a docker build, --build-dir, --install-dir and
+    # --cmake-dir must not be set. We will use the build/ subdir
+    # within the server/ repo that contains this build.py script for
+    # --build-dir. If not doing a docker build, --build-dir must be
+    # set.
+    if FLAGS.no_container_build:
+        if FLAGS.build_dir is None:
+            fail("--no-container-build requires --build-dir")
+        if FLAGS.install_dir is None:
+            FLAGS.install_dir = os.path.join(FLAGS.build_dir, "opt", "tritonserver")
+        if FLAGS.cmake_dir is None:
+            FLAGS.cmake_dir = THIS_SCRIPT_DIR
+    else:
+        if FLAGS.build_dir is not None:
+            fail("--build-dir must not be set for container-based build")
+        if FLAGS.install_dir is not None:
+            fail("--install-dir must not be set for container-based build")
+        if FLAGS.cmake_dir is not None:
+            fail("--cmake-dir must not be set for container-based build")
+        FLAGS.build_dir = os.path.join(THIS_SCRIPT_DIR, "build")
+
+    # Determine the versions. Start with Triton version, if --version
+    # is not explicitly specified read from TRITON_VERSION file.
+    if FLAGS.version is None:
+        with open(os.path.join(THIS_SCRIPT_DIR, "TRITON_VERSION"), "r") as vfile:
+            FLAGS.version = vfile.readline().strip()
+
+    if FLAGS.build_parallel is None:
+        FLAGS.build_parallel = multiprocessing.cpu_count() * 2
+
+    log("Building Triton Inference Server")
+    log("platform {}".format(target_platform()))
+    log("machine {}".format(target_machine()))
+    log("version {}".format(FLAGS.version))
+    log("build dir {}".format(FLAGS.build_dir))
+    log("install dir {}".format(FLAGS.install_dir))
+    log("cmake dir {}".format(FLAGS.cmake_dir))
+
+    # Determine the default repo-tag that should be used for images,
+    # backends, repo-agents, and caches if a repo-tag is not given
+    # explicitly. For release branches we use the release branch as
+    # the default, otherwise we use 'main'.
+    default_repo_tag = "main"
+    cver = FLAGS.container_version
+    if cver is None:
+        if FLAGS.version not in TRITON_VERSION_MAP:
+            fail(
+                "unable to determine default repo-tag, container version not known for {}".format(
+                    FLAGS.version
+                )
+            )
+        cver = TRITON_VERSION_MAP[FLAGS.version][0]
+    if not cver.endswith("dev"):
+        default_repo_tag = "r" + cver
+    log("default repo-tag: {}".format(default_repo_tag))
+
+    # For other versions use the TRITON_VERSION_MAP unless explicitly
+    # given.
+    FLAGS.container_version, FLAGS.upstream_container_version = container_versions(
+        FLAGS.version, FLAGS.container_version, FLAGS.upstream_container_version
+    )
+
+    log("container version {}".format(FLAGS.container_version))
+    log("upstream container version {}".format(FLAGS.upstream_container_version))
+
+    for ep in FLAGS.endpoint:
+        log(f'endpoint "{ep}"')
+    for fs in FLAGS.filesystem:
+        log(f'filesystem "{fs}"')
+
+    # Initialize map of backends to build and repo-tag for each.
+    backends = {}
+    for be in FLAGS.backend:
+        parts = be.split(":")
+        if len(parts) == 1:
+            parts.append(default_repo_tag)
+        if parts[0] == "tensorflow1":
+            fail(
+                "Starting from Triton version 23.04, support for TensorFlow 1 has been discontinued. Please switch to Tensorflow 2."
+            )
+        if parts[0] == "tensorflow2":
+            parts[0] = "tensorflow"
+        log('backend "{}" at tag/branch "{}"'.format(parts[0], parts[1]))
+        backends[parts[0]] = parts[1]
+
+    if "vllm" in backends:
+        if "python" not in backends:
+            log(
+                "vLLM backend requires Python backend, adding Python backend with tag {}".format(
+                    backends["vllm"]
+                )
+            )
+            backends["python"] = backends["vllm"]
+
+    # Initialize map of repo agents to build and repo-tag for each.
+    repoagents = {}
+    for be in FLAGS.repoagent:
+        parts = be.split(":")
+        if len(parts) == 1:
+            parts.append(default_repo_tag)
+        log('repoagent "{}" at tag/branch "{}"'.format(parts[0], parts[1]))
+        repoagents[parts[0]] = parts[1]
+
+    # Initialize map of caches to build and repo-tag for each.
+    caches = {}
+    for be in FLAGS.cache:
+        parts = be.split(":")
+        if len(parts) == 1:
+            parts.append(default_repo_tag)
+        log('cache "{}" at tag/branch "{}"'.format(parts[0], parts[1]))
+        caches[parts[0]] = parts[1]
+
+    # Initialize map of docker images.
+    images = {}
+    for img in FLAGS.image:
+        parts = img.split(",")
+        fail_if(
+            len(parts) != 2, "--image must specify <image-name>,<full-image-registry>"
+        )
+        fail_if(
+            parts[0]
+            not in ["base", "gpu-base", "pytorch", "tensorflow", "tensorflow2"],
+            "unsupported value for --image",
+        )
+        log('image "{}": "{}"'.format(parts[0], parts[1]))
+        if parts[0] == "tensorflow2":
+            parts[0] = "tensorflow"
+        images[parts[0]] = parts[1]
+
+    # Initialize map of library paths for each backend.
+    library_paths = {}
+    for lpath in FLAGS.library_paths:
+        parts = lpath.split(":")
+        if len(parts) == 2:
+            log('backend "{}" library path "{}"'.format(parts[0], parts[1]))
+            if parts[0] == "tensorflow2":
+                parts[0] = "tensorflow"
+            library_paths[parts[0]] = parts[1]
+
+    # Parse any explicitly specified cmake arguments
+    for cf in FLAGS.extra_core_cmake_arg:
+        parts = cf.split("=")
+        fail_if(len(parts) != 2, "--extra-core-cmake-arg must specify <name>=<value>")
+        log('CMake core extra "-D{}={}"'.format(parts[0], parts[1]))
+        EXTRA_CORE_CMAKE_FLAGS[parts[0]] = parts[1]
+
+    for cf in FLAGS.override_core_cmake_arg:
+        parts = cf.split("=")
+        fail_if(
+            len(parts) != 2, "--override-core-cmake-arg must specify <name>=<value>"
+        )
+        log('CMake core override "-D{}={}"'.format(parts[0], parts[1]))
+        OVERRIDE_CORE_CMAKE_FLAGS[parts[0]] = parts[1]
+
+    for cf in FLAGS.extra_backend_cmake_arg:
+        parts = cf.split(":", 1)
+        fail_if(
+            len(parts) != 2,
+            "--extra-backend-cmake-arg must specify <backend>:<name>=<value>",
+        )
+        be = parts[0]
+        parts = parts[1].split("=", 1)
+        fail_if(
+            len(parts) != 2,
+            "--extra-backend-cmake-arg must specify <backend>:<name>=<value>",
+        )
+        fail_if(
+            be not in backends,
+            '--extra-backend-cmake-arg specifies backend "{}" which is not included in build'.format(
+                be
+            ),
+        )
+        log('backend "{}" CMake extra "-D{}={}"'.format(be, parts[0], parts[1]))
+        if be not in EXTRA_BACKEND_CMAKE_FLAGS:
+            EXTRA_BACKEND_CMAKE_FLAGS[be] = {}
+        EXTRA_BACKEND_CMAKE_FLAGS[be][parts[0]] = parts[1]
+
+    for cf in FLAGS.override_backend_cmake_arg:
+        parts = cf.split(":", 1)
+        fail_if(
+            len(parts) != 2,
+            "--override-backend-cmake-arg must specify <backend>:<name>=<value>",
+        )
+        be = parts[0]
+        parts = parts[1].split("=", 1)
+        fail_if(
+            len(parts) != 2,
+            "--override-backend-cmake-arg must specify <backend>:<name>=<value>",
+        )
+        fail_if(
+            be not in backends,
+            '--override-backend-cmake-arg specifies backend "{}" which is not included in build'.format(
+                be
+            ),
+        )
+        log('backend "{}" CMake override "-D{}={}"'.format(be, parts[0], parts[1]))
+        if be not in OVERRIDE_BACKEND_CMAKE_FLAGS:
+            OVERRIDE_BACKEND_CMAKE_FLAGS[be] = {}
+        OVERRIDE_BACKEND_CMAKE_FLAGS[be][parts[0]] = parts[1]
+
+    # Initialize map of common components and repo-tag for each.
+    components = {
+        "common": default_repo_tag,
+        "core": default_repo_tag,
+        "backend": default_repo_tag,
+        "thirdparty": default_repo_tag,
+    }
+    for be in FLAGS.repo_tag:
+        parts = be.split(":")
+        fail_if(len(parts) != 2, "--repo-tag must specify <component-name>:<repo-tag>")
+        fail_if(
+            parts[0] not in components,
+            '--repo-tag <component-name> must be "common", "core", "backend", or "thirdparty"',
+        )
+        components[parts[0]] = parts[1]
+    for c in components:
+        log('component "{}" at tag/branch "{}"'.format(c, components[c]))
+
+    # Set the build, install, and cmake directories to use for the
+    # generated build scripts and Dockerfiles. If building without
+    # Docker, these are the directories specified on the cmdline. If
+    # building with Docker, we change these to be directories within
+    # FLAGS.tmp_dir inside the Docker container.
+    script_repo_dir = THIS_SCRIPT_DIR
+    script_build_dir = FLAGS.build_dir
+    script_install_dir = script_ci_dir = FLAGS.install_dir
+    script_cmake_dir = FLAGS.cmake_dir
+    if not FLAGS.no_container_build:
+        # FLAGS.tmp_dir may be specified with "\" on Windows, adjust
+        # to "/" for docker usage.
+        script_build_dir = os.path.normpath(
+            os.path.join(FLAGS.tmp_dir, "tritonbuild").replace("\\", "/")
+        )
+        script_install_dir = os.path.normpath(os.path.join(script_build_dir, "install"))
+        script_ci_dir = os.path.normpath(os.path.join(script_build_dir, "ci"))
+        if target_platform() == "windows":
+            script_repo_dir = script_cmake_dir = os.path.normpath("c:/workspace")
+        else:
+            script_repo_dir = script_cmake_dir = "/workspace"
+
+    script_name = "cmake_build"
+    if target_platform() == "windows":
+        script_name += ".ps1"
+
+    # Write the build script that invokes cmake for the core, backends, repo-agents, and caches.
+    pathlib.Path(FLAGS.build_dir).mkdir(parents=True, exist_ok=True)
+    with BuildScript(
+        os.path.join(FLAGS.build_dir, script_name),
+        verbose=FLAGS.verbose,
+        desc=("Build script for Triton Inference Server"),
+    ) as cmake_script:
+        # Run the container pre-build command if the cmake build is
+        # being done within the build container.
+        if not FLAGS.no_container_build and FLAGS.container_prebuild_command:
+            cmake_script.cmd(FLAGS.container_prebuild_command, check_exitcode=True)
+            cmake_script.blankln()
+
+        # Commands to build the core shared library and the server executable.
+        if not FLAGS.no_core_build:
+            core_build(
+                cmake_script,
+                script_repo_dir,
+                script_cmake_dir,
+                script_build_dir,
+                script_install_dir,
+                components,
+                backends,
+            )
+
+        # Commands to build each backend...
+        for be in backends:
+            # Core backends are not built separately from core so skip...
+            if be in CORE_BACKENDS:
+                continue
+
+            # If armnn_tflite backend, source from external repo for git clone
+            if be == "armnn_tflite":
+                github_organization = "https://gitlab.com/arm-research/smarter/"
+            else:
+                github_organization = FLAGS.github_organization
+
+            if be == "vllm":
+                backend_clone(
+                    be,
+                    cmake_script,
+                    backends[be],
+                    script_build_dir,
+                    script_install_dir,
+                    github_organization,
+                )
+            else:
+                backend_build(
+                    be,
+                    cmake_script,
+                    backends[be],
+                    script_build_dir,
+                    script_install_dir,
+                    github_organization,
+                    images,
+                    components,
+                    library_paths,
+                )
+
+        # Commands to build each repo agent...
+        for ra in repoagents:
+            repo_agent_build(
+                ra,
+                cmake_script,
+                script_build_dir,
+                script_install_dir,
+                repoagent_repo,
+                repoagents,
+            )
+
+        # Commands to build each cache...
+        for cache in caches:
+            cache_build(
+                cache,
+                cmake_script,
+                script_build_dir,
+                script_install_dir,
+                cache_repo,
+                caches,
+            )
+
+        # Commands needed only when building with Docker...
+        if not FLAGS.no_container_build:
+            # Commands to collect all the build artifacts needed for CI
+            # testing.
+            cibase_build(
+                cmake_script,
+                script_repo_dir,
+                script_cmake_dir,
+                script_build_dir,
+                script_install_dir,
+                script_ci_dir,
+                backends,
+            )
+
+            # When building with Docker the install and ci artifacts
+            # written to the build-dir while running the docker container
+            # may have root ownership, so give them permissions to be
+            # managed by all users on the host system.
+            if target_platform() != "windows":
+                finalize_build(cmake_script, script_install_dir, script_ci_dir)
+
+    # If --no-container-build is not specified then we perform the
+    # actual build within a docker container and from that create the
+    # final tritonserver docker image. For the build we need to
+    # generate a few Dockerfiles and a top-level script that drives
+    # the build process.
+    if not FLAGS.no_container_build:
+        script_name = "docker_build"
+        if target_platform() == "windows":
+            script_name += ".ps1"
+
+        create_build_dockerfiles(
+            script_build_dir, images, backends, repoagents, caches, FLAGS.endpoint
+        )
+        create_docker_build_script(script_name, script_install_dir, script_ci_dir)
+
+    # In not dry-run, execute the script to perform the build...  If a
+    # container-based build is requested use 'docker_build' script,
+    # otherwise build directly on this system using cmake script.
+    if not FLAGS.dryrun:
+        if target_platform() == "windows":
+            p = subprocess.Popen(
+                ["powershell.exe", "-noexit", "-File", f"./{script_name}"],
+                cwd=FLAGS.build_dir,
+            )
+        else:
+            p = subprocess.Popen([f"./{script_name}"], cwd=FLAGS.build_dir)
+        p.wait()
+        fail_if(p.returncode != 0, "build failed")
diff --git a/compose.py b/compose.py
new file mode 100755
index 0000000000..14b58c93f6
--- /dev/null
+++ b/compose.py
@@ -0,0 +1,525 @@
+#!/usr/bin/env python3
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+import argparse
+import os
+import platform
+import subprocess
+import sys
+
+FLAGS = None
+
+
+#### helper functions
+def log(msg, force=False):
+    if force or not FLAGS.quiet:
+        try:
+            print(msg, file=sys.stderr)
+        except Exception:
+            print("<failed to log>", file=sys.stderr)
+
+
+def log_verbose(msg):
+    if FLAGS.verbose:
+        log(msg, force=True)
+
+
+def fail(msg):
+    print("error: {}".format(msg), file=sys.stderr)
+    sys.exit(1)
+
+
+def fail_if(p, msg):
+    if p:
+        fail(msg)
+
+
+def start_dockerfile(ddir, images, argmap, dockerfile_name, backends):
+    # Set environment variables, set default user and install dependencies
+    df = """
+#
+# Multistage build.
+#
+ARG TRITON_VERSION={}
+ARG TRITON_CONTAINER_VERSION={}
+
+FROM {} AS full
+""".format(
+        argmap["TRITON_VERSION"], argmap["TRITON_CONTAINER_VERSION"], images["full"]
+    )
+
+    # PyTorch, TensorFlow backends need extra CUDA and other
+    # dependencies during runtime that are missing in the CPU-only base container.
+    # These dependencies must be copied from the Triton Min image.
+    if not FLAGS.enable_gpu and (
+        ("pytorch" in backends)
+        or ("tensorflow" in backends)
+        or ("tensorflow2" in backends)
+    ):
+        df += """
+FROM {} AS min_container
+
+""".format(
+            images["gpu-min"]
+        )
+
+    df += """
+FROM {}
+""".format(
+        images["min"]
+    )
+
+    import build
+
+    df += build.dockerfile_prepare_container_linux(
+        argmap, backends, FLAGS.enable_gpu, platform.machine().lower()
+    )
+    # Copy over files
+    df += """
+WORKDIR /opt/tritonserver
+COPY --chown=1000:1000 --from=full /opt/tritonserver/LICENSE .
+COPY --chown=1000:1000 --from=full /opt/tritonserver/TRITON_VERSION .
+COPY --chown=1000:1000 --from=full /opt/tritonserver/NVIDIA_Deep_Learning_Container_License.pdf .
+COPY --chown=1000:1000 --from=full /opt/tritonserver/bin bin/
+COPY --chown=1000:1000 --from=full /opt/tritonserver/lib lib/
+COPY --chown=1000:1000 --from=full /opt/tritonserver/include include/
+"""
+    with open(os.path.join(ddir, dockerfile_name), "w") as dfile:
+        dfile.write(df)
+
+
+def add_requested_backends(ddir, dockerfile_name, backends):
+    df = "# Copying over backends \n"
+    for backend in backends:
+        df += """COPY --chown=1000:1000 --from=full /opt/tritonserver/backends/{} /opt/tritonserver/backends/{}
+""".format(
+            backend, backend
+        )
+    if len(backends) > 0:
+        df += """
+# Top-level /opt/tritonserver/backends not copied so need to explicitly set permissions here
+RUN chown triton-server:triton-server /opt/tritonserver/backends
+"""
+    with open(os.path.join(ddir, dockerfile_name), "a") as dfile:
+        dfile.write(df)
+
+
+def add_requested_repoagents(ddir, dockerfile_name, repoagents):
+    df = "#  Copying over repoagents \n"
+    for ra in repoagents:
+        df += """COPY --chown=1000:1000 --from=full /opt/tritonserver/repoagents/{} /opt/tritonserver/repoagents/{}
+""".format(
+            ra, ra
+        )
+    if len(repoagents) > 0:
+        df += """
+# Top-level /opt/tritonserver/repoagents not copied so need to explicitly set permissions here
+RUN chown triton-server:triton-server /opt/tritonserver/repoagents
+"""
+    with open(os.path.join(ddir, dockerfile_name), "a") as dfile:
+        dfile.write(df)
+
+
+def add_requested_caches(ddir, dockerfile_name, caches):
+    df = "#  Copying over caches \n"
+    for cache in caches:
+        df += """COPY --chown=1000:1000 --from=full /opt/tritonserver/caches/{} /opt/tritonserver/caches/{}
+""".format(
+            cache, cache
+        )
+    if len(caches) > 0:
+        df += """
+# Top-level /opt/tritonserver/caches not copied so need to explicitly set permissions here
+RUN chown triton-server:triton-server /opt/tritonserver/caches
+"""
+    with open(os.path.join(ddir, dockerfile_name), "a") as dfile:
+        dfile.write(df)
+
+
+def end_dockerfile(ddir, dockerfile_name, argmap):
+    # Install additional dependencies
+    df = ""
+    if argmap["SAGEMAKER_ENDPOINT"]:
+        df += """
+LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
+COPY --chown=1000:1000 --from=full /usr/bin/serve /usr/bin/.
+"""
+    with open(os.path.join(ddir, dockerfile_name), "a") as dfile:
+        dfile.write(df)
+
+
+def build_docker_image(ddir, dockerfile_name, container_name):
+    # Create container with docker build
+    p = subprocess.Popen(
+        [
+            "docker",
+            "build",
+            "-t",
+            container_name,
+            "-f",
+            os.path.join(ddir, dockerfile_name),
+            ".",
+        ]
+    )
+    p.wait()
+    fail_if(p.returncode != 0, "docker build {} failed".format(container_name))
+
+
+def get_container_version_if_not_specified():
+    if FLAGS.container_version is None:
+        # Read from TRITON_VERSION file in server repo to determine version
+        with open("TRITON_VERSION", "r") as vfile:
+            version = vfile.readline().strip()
+        import build
+
+        _, FLAGS.container_version = build.container_versions(
+            version, None, FLAGS.container_version
+        )
+        log("version {}".format(version))
+    log("using container version {}".format(FLAGS.container_version))
+
+
+def create_argmap(images, skip_pull):
+    # Extract information from upstream build and create map other functions can
+    # use
+    full_docker_image = images["full"]
+    min_docker_image = images["min"]
+    enable_gpu = FLAGS.enable_gpu
+    # Docker inspect environment variables
+    base_run_args = ["docker", "inspect", "-f"]
+    import re  # parse all PATH environment variables
+
+    # first pull docker images
+    if not skip_pull:
+        log("pulling container:{}".format(full_docker_image))
+        p = subprocess.run(["docker", "pull", full_docker_image])
+        fail_if(
+            p.returncode != 0,
+            "docker pull container {} failed, {}".format(full_docker_image, p.stderr),
+        )
+    if enable_gpu:
+        if not skip_pull:
+            pm = subprocess.run(["docker", "pull", min_docker_image])
+            fail_if(
+                pm.returncode != 0 and not skip_pull,
+                "docker pull container {} failed, {}".format(
+                    min_docker_image, pm.stderr
+                ),
+            )
+        pm_path = subprocess.run(
+            base_run_args
+            + [
+                "{{range $index, $value := .Config.Env}}{{$value}} {{end}}",
+                min_docker_image,
+            ],
+            capture_output=True,
+            text=True,
+        )
+        fail_if(
+            pm_path.returncode != 0,
+            "docker inspect to find triton environment variables for min container failed, {}".format(
+                pm_path.stderr
+            ),
+        )
+        # min container needs to be GPU-support-enabled if the build is GPU build
+        vars = pm_path.stdout
+        e = re.search("CUDA_VERSION", vars)
+        gpu_enabled = False if e is None else True
+        fail_if(
+            not gpu_enabled,
+            "Composing container with gpu support enabled but min container provided does not have CUDA installed",
+        )
+
+    # Check full container environment variables
+    p_path = subprocess.run(
+        base_run_args
+        + [
+            "{{range $index, $value := .Config.Env}}{{$value}} {{end}}",
+            full_docker_image,
+        ],
+        capture_output=True,
+        text=True,
+    )
+    fail_if(
+        p_path.returncode != 0,
+        "docker inspect to find environment variables for full container failed, {}".format(
+            p_path.stderr
+        ),
+    )
+    vars = p_path.stdout
+    log_verbose("inspect args: {}".format(vars))
+
+    e0 = re.search("TRITON_SERVER_GPU_ENABLED=([\S]{1,}) ", vars)
+    e1 = re.search("CUDA_VERSION", vars)
+    gpu_enabled = False
+    if e0 != None:
+        gpu_enabled = e0.group(1) == "1"
+    elif e1 != None:
+        gpu_enabled = True
+    fail_if(
+        gpu_enabled != enable_gpu,
+        "Error: full container provided was build with "
+        "'TRITON_SERVER_GPU_ENABLED' as {} and you are composing container"
+        "with 'TRITON_SERVER_GPU_ENABLED' as {}".format(gpu_enabled, enable_gpu),
+    )
+    e = re.search("TRITON_SERVER_VERSION=([\S]{6,}) ", vars)
+    version = "" if e is None else e.group(1)
+    fail_if(
+        len(version) == 0,
+        "docker inspect to find triton server version failed, {}".format(p_path.stderr),
+    )
+    e = re.search("NVIDIA_TRITON_SERVER_VERSION=([\S]{5,}) ", vars)
+    container_version = "" if e is None else e.group(1)
+    fail_if(
+        len(container_version) == 0,
+        "docker inspect to find triton container version failed, {}".format(vars),
+    )
+    dcgm_ver = re.search("DCGM_VERSION=([\S]{4,}) ", vars)
+    dcgm_version = ""
+    if dcgm_ver is None:
+        dcgm_version = "2.2.3"
+        log(
+            "WARNING: DCGM version not found from image, installing the earlierst version {}".format(
+                dcgm_version
+            )
+        )
+    else:
+        dcgm_version = dcgm_ver.group(1)
+    fail_if(
+        len(dcgm_version) == 0,
+        "docker inspect to find DCGM version failed, {}".format(vars),
+    )
+
+    p_sha = subprocess.run(
+        base_run_args
+        + ['{{ index .Config.Labels "com.nvidia.build.ref"}}', full_docker_image],
+        capture_output=True,
+        text=True,
+    )
+    fail_if(
+        p_sha.returncode != 0,
+        "docker inspect of upstream docker image build sha failed, {}".format(
+            p_sha.stderr
+        ),
+    )
+    p_build = subprocess.run(
+        base_run_args
+        + ['{{ index .Config.Labels "com.nvidia.build.id"}}', full_docker_image],
+        capture_output=True,
+        text=True,
+    )
+    fail_if(
+        p_build.returncode != 0,
+        "docker inspect of upstream docker image build sha failed, {}".format(
+            p_build.stderr
+        ),
+    )
+
+    p_find = subprocess.run(
+        ["docker", "run", full_docker_image, "bash", "-c", "ls /usr/bin/"],
+        capture_output=True,
+        text=True,
+    )
+    f = re.search("serve", p_find.stdout)
+    fail_if(
+        p_find.returncode != 0,
+        "Cannot search for 'serve' in /usr/bin, {}".format(p_find.stderr),
+    )
+    argmap = {
+        "NVIDIA_BUILD_REF": p_sha.stdout.rstrip(),
+        "NVIDIA_BUILD_ID": p_build.stdout.rstrip(),
+        "TRITON_VERSION": version,
+        "TRITON_CONTAINER_VERSION": container_version,
+        "DCGM_VERSION": dcgm_version,
+        "SAGEMAKER_ENDPOINT": f is not None,
+    }
+    return argmap
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    group_qv = parser.add_mutually_exclusive_group()
+    group_qv.add_argument(
+        "-q",
+        "--quiet",
+        action="store_true",
+        required=False,
+        help="Disable console output.",
+    )
+    group_qv.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        required=False,
+        help="Enable verbose output.",
+    )
+    parser.add_argument(
+        "--output-name",
+        type=str,
+        required=False,
+        help='Name for the generated Docker image. Default is "tritonserver".',
+    )
+    parser.add_argument(
+        "--work-dir",
+        type=str,
+        required=False,
+        help="Generated dockerfiles are placed here. Default to current directory.",
+    )
+    parser.add_argument(
+        "--container-version",
+        type=str,
+        required=False,
+        help="The version to use for the generated Docker image. If not specified "
+        "the container version will be chosen automatically based on the "
+        "repository branch.",
+    )
+    parser.add_argument(
+        "--image",
+        action="append",
+        required=False,
+        help="Use specified Docker image to generate Docker image. Specified as "
+        '<image-name>,<full-image-name>. <image-name> can be "min", "gpu-min" '
+        'or "full". Both "min" and "full" need to be specified at the same time.'
+        'This will override "--container-version". "gpu-min" is needed for '
+        "CPU-only container to copy TensorFlow and PyTorch deps.",
+    )
+    parser.add_argument(
+        "--enable-gpu",
+        nargs="?",
+        type=lambda x: (str(x).lower() == "true"),
+        const=True,
+        default=True,
+        required=False,
+        help=argparse.SUPPRESS,
+    )
+    parser.add_argument(
+        "--backend",
+        action="append",
+        required=False,
+        help="Include <backend-name> in the generated Docker image. The flag may be "
+        "specified multiple times.",
+    )
+    parser.add_argument(
+        "--repoagent",
+        action="append",
+        required=False,
+        help="Include <repoagent-name> in the generated Docker image. The flag may "
+        "be specified multiple times.",
+    )
+    parser.add_argument(
+        "--cache",
+        action="append",
+        required=False,
+        help="Include <cache-name> in the generated Docker image. The flag may "
+        "be specified multiple times.",
+    )
+    parser.add_argument(
+        "--skip-pull",
+        action="store_true",
+        required=False,
+        help="Do not pull the required docker images. The user is responsible "
+        "for pulling the upstream images needed to compose the image.",
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        required=False,
+        help="Only creates Dockerfile.compose, does not build the Docker image.",
+    )
+
+    FLAGS = parser.parse_args()
+
+    if FLAGS.work_dir is None:
+        FLAGS.work_dir = "."
+    if FLAGS.output_name is None:
+        FLAGS.output_name = "tritonserver"
+
+    dockerfile_name = "Dockerfile.compose"
+
+    if FLAGS.backend is None:
+        FLAGS.backend = []
+    if FLAGS.repoagent is None:
+        FLAGS.repoagent = []
+    if FLAGS.cache is None:
+        FLAGS.cache = []
+
+    # Initialize map of docker images.
+    images = {}
+    if FLAGS.image:
+        for img in FLAGS.image:
+            parts = img.split(",")
+            fail_if(
+                len(parts) != 2,
+                "--image must specific <image-name>,<full-image-registry>",
+            )
+            fail_if(
+                parts[0] not in ["min", "full", "gpu-min"],
+                "unsupported image-name '{}' for --image".format(parts[0]),
+            )
+            log('image "{}": "{}"'.format(parts[0], parts[1]))
+            images[parts[0]] = parts[1]
+    else:
+        get_container_version_if_not_specified()
+        if FLAGS.enable_gpu:
+            images = {
+                "full": "nvcr.io/nvidia/tritonserver:{}-py3".format(
+                    FLAGS.container_version
+                ),
+                "min": "nvcr.io/nvidia/tritonserver:{}-py3-min".format(
+                    FLAGS.container_version
+                ),
+            }
+        else:
+            images = {
+                "full": "nvcr.io/nvidia/tritonserver:{}-cpu-only-py3".format(
+                    FLAGS.container_version
+                ),
+                "min": "ubuntu:22.04",
+            }
+    fail_if(len(images) < 2, "Need to specify both 'full' and 'min' images if at all")
+
+    # For CPU-only image we need to copy some cuda libraries and dependencies
+    # since we are using PyTorch, TensorFlow 1, TensorFlow 2 containers that
+    # are not CPU-only.
+    if (
+        ("pytorch" in FLAGS.backend)
+        or ("tensorflow" in FLAGS.backend)
+        or ("tensorflow2" in FLAGS.backend)
+    ) and ("gpu-min" not in images):
+        images["gpu-min"] = "nvcr.io/nvidia/tritonserver:{}-py3-min".format(
+            FLAGS.container_version
+        )
+
+    argmap = create_argmap(images, FLAGS.skip_pull)
+
+    start_dockerfile(FLAGS.work_dir, images, argmap, dockerfile_name, FLAGS.backend)
+    add_requested_backends(FLAGS.work_dir, dockerfile_name, FLAGS.backend)
+    add_requested_repoagents(FLAGS.work_dir, dockerfile_name, FLAGS.repoagent)
+    add_requested_caches(FLAGS.work_dir, dockerfile_name, FLAGS.cache)
+    end_dockerfile(FLAGS.work_dir, dockerfile_name, argmap)
+
+    if not FLAGS.dry_run:
+        build_docker_image(FLAGS.work_dir, dockerfile_name, FLAGS.output_name)
diff --git a/deploy/alibaba-cloud/README.md b/deploy/alibaba-cloud/README.md
new file mode 100644
index 0000000000..98f914a693
--- /dev/null
+++ b/deploy/alibaba-cloud/README.md
@@ -0,0 +1,180 @@
+<!--
+# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Deploy Triton Inference Server on PAI-EAS
+* Table Of Contents
+   - [Description](https://yuque.alibaba-inc.com/pai/blade/mtptqc#Description)
+   - [Prerequisites](https://yuque.alibaba-inc.com/pai/blade/mtptqc#Prerequisites)
+   - [Demo Instruction](https://yuque.alibaba-inc.com/pai/blade/mtptqc#31bb94ef)
+   - [Additional Resources](https://yuque.alibaba-inc.com/pai/blade/mtptqc#89d5e680)
+   - [Known Issues](https://yuque.alibaba-inc.com/pai/blade/mtptqc#558ab0be)
+
+# Description
+This repository contains information about how to deploy NVIDIA Triton Inference Server in EAS(Elastic Algorithm Service) of Alibaba-Cloud.
+- EAS provides a simple way for deep learning developers to deploy their models in Alibaba Cloud.
+- Using **Triton Processor** is the recommended way on EAS to deploy Triton Inference Server. Users can simply deploy a Triton Server by preparing models and creating a EAS service by setting processor type to `triton`.
+- Models should be uploaded to Alibaba Cloud's OSS(Object Storage Service). User's model repository in OSS will be mounted onto local path visible to Triton Server.
+- This documentation uses Triton's own example models for demo. The tensorflow inception model can be downloaded by the `fetch_models.sh` script.
+
+# Prerequisites
+- You should register an Alibaba Cloud Account, and being able to use EAS by [eascmd](https://help.aliyun.com/document_detail/111031.html?spm=a2c4g.11186623.6.752.42356f46FN5fU1), which is a command line tool to create stop or scale services on EAS.
+- Before creating an EAS service, you should buy dedicated resource groups(CPU or GPU) on EAS following this [document](https://www.alibabacloud.com/help/doc-detail/120122.htm).
+- Make sure you can use OSS(Object Storage Service), the models should be uploaded into your own OSS bucket.
+
+# Demo Instruction
+## Prepare a model repo directory in OSS
+Download the tensorflow inception model via [fetch_model.sh](https://github.com/triton-inference-server/server/blob/main/docs/examples/fetch_models.sh). Then using [ossutil](https://help.aliyun.com/document_detail/50452.html?spm=a2c4g.11186623.6.833.26d66d51dPEytI) , which is a command line tool to use OSS, to upload the model to a certain OSS dir as you want.
+
+```
+./ossutil cp inception_graphdef/ oss://triton-model-repo/models
+```
+## Create Triton Service with json config by eascmd
+The following is the json we use when creating a Triton Server on EAS.
+```
+{
+  "name": "<your triton service name>",
+  "processor": "triton",
+  "processor_params": [
+    "--model-repository=oss://triton-model-repo/models",
+    "--allow-grpc=true",
+    "--allow-http=true"
+  ],
+  "metadata": {
+    "instance": 1,
+    "cpu": 4,
+    "gpu": 1,
+    "memory": 10000,
+    "resource": "<your resource id>",
+    "rpc.keepalive": 3000
+  }
+}
+```
+Only processor and processor_params should be different from a normal EAS service.
+|params|details|
+|--------|-------|
+|processor|Name should be **triton** to use Triton on EAS|
+|processor_params|List of strings, every element is a param for tritonserver |
+
+```
+./eascmd create triton.config
+[RequestId]: AECDB6A4-CB69-4688-AA35-BA1E020C39E6
++-------------------+------------------------------------------------------------------------------------------------+
+| Internet Endpoint | http://1271520832287160.cn-shanghai.pai-eas.aliyuncs.com/api/predict/test_triton_processor     |
+| Intranet Endpoint | http://1271520832287160.vpc.cn-shanghai.pai-eas.aliyuncs.com/api/predict/test_triton_processor |
+|             Token | MmY3M2ExZGYwYjZiMTQ5YTRmZWE3MDAzNWM1ZTBiOWQ3MGYxZGNkZQ==                                       |
++-------------------+------------------------------------------------------------------------------------------------+
+[OK] Service is now deploying
+[OK] Successfully synchronized resources
+[OK] Waiting [Total: 1, Pending: 1, Running: 0]
+[OK] Waiting [Total: 1, Pending: 1, Running: 0]
+[OK] Running [Total: 1, Pending: 0, Running: 1]
+[OK] Service is running
+```
+## Query Triton service by python client
+### Install triton's python client
+```
+pip install tritonclient[all]
+```
+### A demo to query inception model
+```
+import numpy as np
+import time
+from PIL import Image
+
+import tritonclient.http as httpclient
+from tritonclient.utils import InferenceServerException
+
+URL = "<servcice url>"
+HEADERS = {"Authorization": "<service token>"}
+input_img = httpclient.InferInput("input", [1, 299, 299, 3], "FP32")
+# Using one of the cat images from imagenet or a random cat images you like
+img = Image.open('./cat.png').resize((299, 299))
+img = np.asarray(img).astype('float32') / 255.0
+input_img.set_data_from_numpy(img.reshape([1, 299, 299, 3]), binary_data=True)
+
+output = httpclient.InferRequestedOutput(
+    "InceptionV3/Predictions/Softmax", binary_data=True
+)
+triton_client = httpclient.InferenceServerClient(url=URL, verbose=False)
+
+start = time.time()
+for i in range(10):
+    results = triton_client.infer(
+        "inception_graphdef", inputs=[input_img], outputs=[output], headers=HEADERS
+    )
+    res_body = results.get_response()
+    elapsed_ms = (time.time() - start) * 1000
+    if i == 0:
+        print("model name: ", res_body["model_name"])
+        print("model version: ", res_body["model_version"])
+        print("output name: ", res_body["outputs"][0]["name"])
+        print("output shape: ", res_body["outputs"][0]["shape"])
+    print("[{}] Avg rt(ms): {:.2f}".format(i, elapsed_ms))
+    start = time.time()
+```
+You will get the following result by running the python script:
+```
+[0] Avg rt(ms): 86.05
+[1] Avg rt(ms): 52.35
+[2] Avg rt(ms): 50.56
+[3] Avg rt(ms): 43.45
+[4] Avg rt(ms): 41.19
+[5] Avg rt(ms): 40.55
+[6] Avg rt(ms): 37.24
+[7] Avg rt(ms): 37.16
+[8] Avg rt(ms): 36.68
+[9] Avg rt(ms): 34.24
+[10] Avg rt(ms): 34.27
+```
+# Additional Resources
+See the following resources to learn more about how to use Alibaba Cloud's OSS orEAS.
+- [Alibaba Cloud OSS's Document](https://help.aliyun.com/product/31815.html?spm=a2c4g.11186623.6.540.3c0f62e7q3jw8b)
+
+
+# Known Issues
+- [Binary Tensor Data Extension](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_binary_data.md) is not fully supported yet, users want to use service with binary extension supported, it is only available in cn-shanghai region of PAI-EAS.
+- Currently only HTTP/1 is supported, hence gRPC cannot be used when query Triton servers on EAS. HTP/2 will be officially supported in a short time.
+- Users should not mount a whole OSS bucket when launching Triton processor, but an arbitrarily deep sub-directory in bucket. Otherwise the mounted path will no be as expected.
+- Not all of Triton Server parameters are be supported on EAS, the following params are supported on EAS:
+```
+model-repository
+log-verbose
+log-info
+log-warning
+log-error
+exit-on-error
+strict-model-config
+strict-readiness
+allow-http
+http-thread-count
+pinned-memory-pool-byte-size
+cuda-memory-pool-byte-size
+min-supported-compute-capability
+buffer-manager-thread-count
+backend-config
+```
diff --git a/deploy/aws/Chart.yaml b/deploy/aws/Chart.yaml
new file mode 100644
index 0000000000..2b7541bee6
--- /dev/null
+++ b/deploy/aws/Chart.yaml
@@ -0,0 +1,31 @@
+# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+apiVersion: v1
+appVersion: "1.0"
+description: Triton Inference Server
+name: triton-inference-server
+version: 1.0.0
diff --git a/deploy/aws/README.md b/deploy/aws/README.md
new file mode 100644
index 0000000000..4e60fdd65b
--- /dev/null
+++ b/deploy/aws/README.md
@@ -0,0 +1,262 @@
+<!--
+# Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+[![License](https://img.shields.io/badge/License-BSD3-lightgrey.svg)](https://opensource.org/licenses/BSD-3-Clause)
+
+# Kubernetes Deploy: Triton Inference Server Cluster
+
+A helm chart for installing a single cluster of Triton Inference
+Server is provided. By default the cluster contains a single instance
+of the inference server but the *replicaCount* configuration parameter
+can be set to create a cluster of any size, as described below.
+
+This guide assumes you already have a functional Kubernetes cluster
+and helm installed (see below for instructions on installing
+helm). Note the following requirements:
+
+* The helm chart deploys Prometheus and Grafana to collect and display Triton metrics. To use this helm chart you must install Prpmetheus and Grafana in your cluster as described below and your cluster must contain sufficient CPU resources to support these services.
+
+* If you want Triton Server to use GPUs for inferencing, your cluster
+must be configured to contain the desired number of GPU nodes (EC2 G4 instances recommended)
+with support for the NVIDIA driver and CUDA version required by the version
+of the inference server you are using.
+
+The steps below describe how to set-up a model repository, use helm to
+launch the inference server, and then send inference requests to the
+running server. You can access a Grafana endpoint to see real-time
+metrics reported by the inference server.
+
+## Installing Helm
+
+### Helm v3
+
+If you do not already have Helm installed in your Kubernetes cluster,
+executing the following steps from the [official helm install
+guide](https://helm.sh/docs/intro/install/) will
+give you a quick setup.
+
+If you're currently using Helm v2 and would like to migrate to Helm v3,
+please see the [official migration guide](https://helm.sh/docs/topics/v2_v3_migration/).
+
+### Helm v2
+
+> **NOTE**: Moving forward this chart will only be tested and maintained for Helm v3.
+
+Below are example instructions for installing Helm v2.
+
+```
+$ curl https://raw.githubusercontent.com/helm/helm/master/scripts/get | bash
+$ kubectl create serviceaccount -n kube-system tiller
+serviceaccount/tiller created
+$ kubectl create clusterrolebinding tiller-cluster-rule --clusterrole=cluster-admin --serviceaccount=kube-system:tiller
+$ helm init --service-account tiller --wait
+```
+
+If you run into any issues, you can refer to the official installation guide [here](https://v2.helm.sh/docs/install/).
+
+## Model Repository
+
+If you already have a model repository you may use that with this helm
+chart. If you do not have a model repository, you can checkout a local
+copy of the inference server source repository to create an example
+model repository::
+
+```
+$ git clone https://github.com/triton-inference-server/server.git
+```
+
+Triton Server needs a repository of models that it will make available
+for inferencing. For this example you will place the model repository
+in an AWS S3 Storage bucket.
+
+```
+$ aws s3 mb s3://triton-inference-server-repository
+```
+
+Following the [QuickStart](../../docs/getting_started/quickstart.md) download the
+example model repository to your system and copy it into the AWS S3
+bucket.
+
+```
+$ aws s3 cp --recursive docs/examples/model_repository s3://triton-inference-server-repository/model_repository
+```
+
+### AWS Model Repository
+To load the model from the AWS S3, you need to convert the following AWS credentials in the base64 format and add it to the values.yaml
+
+```
+echo -n 'REGION' | base64
+```
+```
+echo -n 'SECRECT_KEY_ID' | base64
+```
+```
+echo -n 'SECRET_ACCESS_KEY' | base64
+```
+
+## Deploy Prometheus and Grafana
+
+The inference server metrics are collected by Prometheus and viewable
+by Grafana. The inference server helm chart assumes that Prometheus
+and Grafana are available so this step must be followed even if you
+don't want to use Grafana.
+
+Use the [kube-prometheus-stack](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack) to install these components. The
+*serviceMonitorSelectorNilUsesHelmValues* flag is needed so that
+Prometheus can find the inference server metrics in the *example*
+release deployed below.
+
+```
+$ helm install example-metrics --set prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues=false prometheus-community/kube-prometheus-stack
+```
+
+Then port-forward to the Grafana service so you can access it from
+your local browser.
+
+```
+$ kubectl port-forward service/example-metrics-grafana 8080:80
+```
+
+Now you should be able to navigate in your browser to localhost:8080
+and see the Grafana login page. Use username=admin and
+password=prom-operator to login.
+
+An example Grafana dashboard is available in dashboard.json. Use the
+import function in Grafana to import and view this dashboard.
+
+## Deploy the Inference Server
+
+Deploy the inference server using the default configuration with the
+following commands.
+
+```
+$ cd <directory containing Chart.yaml>
+$ helm install example .
+```
+
+Use kubectl to see status and wait until the inference server pods are
+running.
+
+```
+$ kubectl get pods
+NAME                                               READY   STATUS    RESTARTS   AGE
+example-triton-inference-server-5f74b55885-n6lt7   1/1     Running   0          2m21s
+```
+
+There are several ways of overriding the default configuration as
+described in this [helm
+documentation](https://helm.sh/docs/using_helm/#customizing-the-chart-before-installing).
+
+You can edit the values.yaml file directly or you can use the *--set*
+option to override a single parameter with the CLI. For example, to
+deploy a cluster of four inference servers use *--set* to set the
+replicaCount parameter.
+
+```
+$ helm install example --set replicaCount=4 .
+```
+
+You can also write your own "config.yaml" file with the values you
+want to override and pass it to helm.
+
+```
+$ cat << EOF > config.yaml
+namespace: MyCustomNamespace
+image:
+  imageName: nvcr.io/nvidia/tritonserver:custom-tag
+  modelRepositoryPath: gs://my_model_repository
+EOF
+$ helm install example -f config.yaml .
+```
+
+## Using Triton Inference Server
+
+Now that the inference server is running you can send HTTP or GRPC
+requests to it to perform inferencing. By default, the inferencing
+service is exposed with a LoadBalancer service type. Use the following
+to find the external IP for the inference server. In this case it is
+34.83.9.133.
+
+```
+$ kubectl get services
+NAME                             TYPE           CLUSTER-IP     EXTERNAL-IP   PORT(S)                                        AGE
+...
+example-triton-inference-server  LoadBalancer   10.18.13.28    34.83.9.133   8000:30249/TCP,8001:30068/TCP,8002:32723/TCP   47m
+```
+
+The inference server exposes an HTTP endpoint on port 8000, and GRPC
+endpoint on port 8001 and a Prometheus metrics endpoint on
+port 8002. You can use curl to get the meta-data of the inference server
+from the HTTP endpoint.
+
+```
+$ curl 34.83.9.133:8000/v2
+```
+
+Follow the [QuickStart](../../docs/getting_started/quickstart.md) to get the example
+image classification client that can be used to perform inferencing
+using image classification models being served by the inference
+server. For example,
+
+```
+$ image_client -u 34.83.9.133:8000 -m inception_graphdef -s INCEPTION -c3 mug.jpg
+Request 0, batch size 1
+Image 'images/mug.jpg':
+    504 (COFFEE MUG) = 0.723992
+    968 (CUP) = 0.270953
+    967 (ESPRESSO) = 0.00115997
+```
+
+## Cleanup
+
+Once you've finished using the inference server you should use helm to
+delete the deployment.
+
+```
+$ helm list
+NAME            REVISION  UPDATED                   STATUS    CHART                          APP VERSION   NAMESPACE
+example         1         Wed Feb 27 22:16:55 2019  DEPLOYED  triton-inference-server-1.0.0  1.0           default
+example-metrics	1       	Tue Jan 21 12:24:07 2020	DEPLOYED	prometheus-operator-6.18.0   	 0.32.0     	 default
+
+$ helm uninstall example
+$ helm uninstall example-metrics
+```
+
+For the Prometheus and Grafana services, you should [explicitly delete
+CRDs](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack#uninstall-helm-chart):
+
+```
+$ kubectl delete crd alertmanagerconfigs.monitoring.coreos.com alertmanagers.monitoring.coreos.com podmonitors.monitoring.coreos.com probes.monitoring.coreos.com prometheuses.monitoring.coreos.com prometheusrules.monitoring.coreos.com servicemonitors.monitoring.coreos.com thanosrulers.monitoring.coreos.com
+```
+
+You may also want to delete the AWS bucket you created to hold the
+model repository.
+
+```
+$ aws s3 rm -r gs://triton-inference-server-repository
+```
diff --git a/deploy/aws/dashboard.json b/deploy/aws/dashboard.json
new file mode 100644
index 0000000000..8960b41d35
--- /dev/null
+++ b/deploy/aws/dashboard.json
@@ -0,0 +1,411 @@
+{
+  "__inputs": [
+    {
+      "name": "DS_PROMETHEUS",
+      "label": "Prometheus",
+      "description": "",
+      "type": "datasource",
+      "pluginId": "prometheus",
+      "pluginName": "Prometheus"
+    }
+  ],
+  "__requires": [
+    {
+      "type": "grafana",
+      "id": "grafana",
+      "name": "Grafana",
+      "version": "6.3.5"
+    },
+    {
+      "type": "panel",
+      "id": "graph",
+      "name": "Graph",
+      "version": ""
+    },
+    {
+      "type": "panel",
+      "id": "heatmap",
+      "name": "Heatmap",
+      "version": ""
+    },
+    {
+      "type": "datasource",
+      "id": "prometheus",
+      "name": "Prometheus",
+      "version": "1.0.0"
+    }
+  ],
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": "-- Grafana --",
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "type": "dashboard"
+      }
+    ]
+  },
+  "editable": true,
+  "gnetId": null,
+  "graphTooltip": 0,
+  "id": null,
+  "links": [],
+  "panels": [
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": "${DS_PROMETHEUS}",
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 9,
+        "w": 12,
+        "x": 0,
+        "y": 0
+      },
+      "id": 2,
+      "legend": {
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "show": true,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "nullPointMode": "null",
+      "options": {
+        "dataLinks": []
+      },
+      "percentage": false,
+      "pointradius": 2,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "nv_inference_request_success",
+          "legendFormat": "Success {{instance}}",
+          "refId": "A"
+        },
+        {
+          "expr": "nv_inference_request_failure",
+          "legendFormat": "Failure {{instance}}",
+          "refId": "B"
+        }
+      ],
+      "thresholds": [],
+      "timeFrom": null,
+      "timeRegions": [],
+      "timeShift": null,
+      "title": "Cumulative Inference Requests",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        },
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": false
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    },
+    {
+      "cards": {
+        "cardPadding": null,
+        "cardRound": null
+      },
+      "color": {
+        "cardColor": "#b4ff00",
+        "colorScale": "sqrt",
+        "colorScheme": "interpolateReds",
+        "exponent": 0.5,
+        "mode": "spectrum"
+      },
+      "dataFormat": "timeseries",
+      "gridPos": {
+        "h": 9,
+        "w": 12,
+        "x": 12,
+        "y": 0
+      },
+      "heatmap": {},
+      "hideZeroBuckets": false,
+      "highlightCards": true,
+      "id": 7,
+      "legend": {
+        "show": false
+      },
+      "options": {},
+      "reverseYBuckets": false,
+      "targets": [
+        {
+          "expr": "sum(increase(nv_inference_load_ratio_bucket[1m])) by (le)",
+          "legendFormat": "",
+          "refId": "A"
+        }
+      ],
+      "timeFrom": null,
+      "timeShift": null,
+      "title": "Load Ratio  (Total Time / Compute Time)",
+      "tooltip": {
+        "show": true,
+        "showHistogram": false
+      },
+      "type": "heatmap",
+      "xAxis": {
+        "show": true
+      },
+      "xBucketNumber": null,
+      "xBucketSize": null,
+      "yAxis": {
+        "decimals": null,
+        "format": "short",
+        "logBase": 1,
+        "max": null,
+        "min": null,
+        "show": true,
+        "splitFactor": null
+      },
+      "yBucketBound": "auto",
+      "yBucketNumber": null,
+      "yBucketSize": null
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": "${DS_PROMETHEUS}",
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 9
+      },
+      "id": 4,
+      "legend": {
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "show": true,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "nullPointMode": "null",
+      "options": {
+        "dataLinks": []
+      },
+      "percentage": false,
+      "pointradius": 2,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "rate(nv_inference_queue_duration_us[30s]) / 1000",
+          "legendFormat": "{{instance}}",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [],
+      "timeFrom": null,
+      "timeRegions": [],
+      "timeShift": null,
+      "title": "Queue Time (milliseconds)",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "short",
+          "label": "Queue Time (ms)",
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        },
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": false
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": "${DS_PROMETHEUS}",
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 9
+      },
+      "id": 5,
+      "legend": {
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "show": true,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "nullPointMode": "null",
+      "options": {
+        "dataLinks": []
+      },
+      "percentage": false,
+      "pointradius": 2,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "rate(nv_inference_compute_duration_us[30s]) / 1000",
+          "legendFormat": "{{instance}}",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [],
+      "timeFrom": null,
+      "timeRegions": [],
+      "timeShift": null,
+      "title": "Compute Time (milliseconds)",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "short",
+          "label": "Compute Time (ms)",
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        },
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": false
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    }
+  ],
+  "refresh": "5s",
+  "schemaVersion": 19,
+  "style": "dark",
+  "tags": [],
+  "templating": {
+    "list": []
+  },
+  "time": {
+    "from": "now-15m",
+    "to": "now"
+  },
+  "timepicker": {
+    "refresh_intervals": [
+      "5s",
+      "10s",
+      "30s",
+      "1m",
+      "5m",
+      "15m",
+      "30m",
+      "1h",
+      "2h",
+      "1d"
+    ]
+  },
+  "timezone": "",
+  "title": "Triton Inference Server",
+  "uid": "slEY4dsZk",
+  "version": 8
+}
diff --git a/deploy/aws/templates/_helpers.tpl b/deploy/aws/templates/_helpers.tpl
new file mode 100644
index 0000000000..6dba910012
--- /dev/null
+++ b/deploy/aws/templates/_helpers.tpl
@@ -0,0 +1,92 @@
+{{/*
+# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/}}
+
+{{/* vim: set filetype=mustache: */}}
+{{/*
+Create inference server name.
+*/}}
+{{- define "triton-inference-server.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "triton-inference-server.fullname" -}}
+{{- if .Values.fullnameOverride -}}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- $name := default .Chart.Name .Values.nameOverride -}}
+{{- if contains $name .Release.Name -}}
+{{- .Release.Name | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+{{- end -}}
+{{- end -}}
+
+{{/*
+  Create inference server metrics service name and fullname derived from above and
+  truncated appropriately.
+*/}}
+{{- define "triton-inference-server-metrics.name" -}}
+{{- $basename := include "triton-inference-server.name" . -}}
+{{- $basename_trimmed := $basename | trunc 55 | trimSuffix "-" -}}
+{{- printf "%s-%s" $basename_trimmed "metrics" -}}
+{{- end -}}
+
+{{- define "triton-inference-server-metrics.fullname" -}}
+{{- $basename := include "triton-inference-server.fullname" . -}}
+{{- $basename_trimmed := $basename | trunc 55 | trimSuffix "-" -}}
+{{- printf "%s-%s" $basename_trimmed "metrics" -}}
+{{- end -}}
+
+{{/*
+  Create inference server metrics monitor name and fullname derived from
+  above and truncated appropriately.
+*/}}
+{{- define "triton-inference-server-metrics-monitor.name" -}}
+{{- $basename := include "triton-inference-server.name" . -}}
+{{- $basename_trimmed := $basename | trunc 47 | trimSuffix "-" -}}
+{{- printf "%s-%s" $basename_trimmed "metrics-monitor" -}}
+{{- end -}}
+
+{{- define "triton-inference-server-metrics-monitor.fullname" -}}
+{{- $basename := include "triton-inference-server.fullname" . -}}
+{{- $basename_trimmed := $basename | trunc 47 | trimSuffix "-" -}}
+{{- printf "%s-%s" $basename_trimmed "metrics-monitor" -}}
+{{- end -}}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "triton-inference-server.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
diff --git a/deploy/aws/templates/deployment.yaml b/deploy/aws/templates/deployment.yaml
new file mode 100644
index 0000000000..d90e51b113
--- /dev/null
+++ b/deploy/aws/templates/deployment.yaml
@@ -0,0 +1,100 @@
+# Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ template "triton-inference-server.fullname" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    app: {{ template "triton-inference-server.name" . }}
+    chart: {{ template "triton-inference-server.chart" . }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      app: {{ template "triton-inference-server.name" . }}
+      release: {{ .Release.Name }}
+  template:
+    metadata:
+      labels:
+        app: {{ template "triton-inference-server.name" . }}
+        release: {{ .Release.Name }}
+
+    spec:
+      containers:
+        - name: {{ .Chart.Name }}
+          image: "{{ .Values.image.imageName }}"
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+
+          resources:
+            limits:
+              nvidia.com/gpu: {{ .Values.image.numGpus }}
+
+          args: ["tritonserver", "--model-store={{ .Values.image.modelRepositoryPath }}",
+                 "--model-control-mode=poll",
+                 "--repository-poll-secs=5"]
+
+          env:
+          - name: AWS_DEFAULT_REGION
+            valueFrom:
+              secretKeyRef:
+                name: aws-credentials
+                key: AWS_DEFAULT_REGION
+          - name: AWS_ACCESS_KEY_ID
+            valueFrom:
+              secretKeyRef:
+                name: aws-credentials
+                key: AWS_ACCESS_KEY_ID
+          - name: AWS_SECRET_ACCESS_KEY
+            valueFrom:
+              secretKeyRef:
+                name: aws-credentials
+                key: AWS_SECRET_ACCESS_KEY
+
+          ports:
+            - containerPort: 8000
+              name: http
+            - containerPort: 8001
+              name: grpc
+            - containerPort: 8002
+              name: metrics
+          livenessProbe:
+            httpGet:
+              path: /v2/health/live
+              port: http
+          readinessProbe:
+            initialDelaySeconds: 5
+            periodSeconds: 5
+            httpGet:
+              path: /v2/health/ready
+              port: http
+
+      securityContext:
+        runAsUser: 1000
+        fsGroup: 1000
diff --git a/deploy/aws/templates/secrets.yaml b/deploy/aws/templates/secrets.yaml
new file mode 100644
index 0000000000..d113214ee0
--- /dev/null
+++ b/deploy/aws/templates/secrets.yaml
@@ -0,0 +1,35 @@
+# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+apiVersion: v1
+kind: Secret
+metadata:
+  name: aws-credentials
+type: Opaque
+data:
+  AWS_DEFAULT_REGION: {{ .Values.secret.region }}
+  AWS_ACCESS_KEY_ID: {{ .Values.secret.id }}
+  AWS_SECRET_ACCESS_KEY: {{ .Values.secret.key }}
diff --git a/deploy/aws/templates/service.yaml b/deploy/aws/templates/service.yaml
new file mode 100644
index 0000000000..3315fd77db
--- /dev/null
+++ b/deploy/aws/templates/service.yaml
@@ -0,0 +1,91 @@
+# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ template "triton-inference-server.fullname" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    app: {{ template "triton-inference-server.name" . }}
+    chart: {{ template "triton-inference-server.chart" . }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: 8000
+      targetPort: http
+      name: http-inference-server
+    - port: 8001
+      targetPort: grpc
+      name: grpc-inference-server
+    - port: 8002
+      targetPort: metrics
+      name: metrics-inference-server
+  selector:
+    app: {{ template "triton-inference-server.name" . }}
+    release: {{ .Release.Name }}
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ template "triton-inference-server-metrics.fullname" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    app: {{ template "triton-inference-server-metrics.name" . }}
+    chart: {{ template "triton-inference-server.chart" . }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+  annotations:
+    alpha.monitoring.coreos.com/non-namespaced: "true"
+spec:
+  ports:
+  - name: metrics
+    port: 8080
+    targetPort: metrics
+    protocol: TCP
+  selector:
+    app: {{ template "triton-inference-server.name" . }}
+    release: {{ .Release.Name }}
+---
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: {{ template "triton-inference-server-metrics-monitor.fullname" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    app: {{ template "triton-inference-server-metrics-monitor.name" . }}
+    chart: {{ template "triton-inference-server.chart" . }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+spec:
+  selector:
+    matchLabels:
+      app: {{ template "triton-inference-server-metrics.name" . }}
+  endpoints:
+  - port: metrics
+    interval: 15s
diff --git a/deploy/aws/values.yaml b/deploy/aws/values.yaml
new file mode 100644
index 0000000000..e915da138b
--- /dev/null
+++ b/deploy/aws/values.yaml
@@ -0,0 +1,41 @@
+# Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+replicaCount: 1
+
+image:
+  imageName: nvcr.io/nvidia/tritonserver:24.03-py3
+  pullPolicy: IfNotPresent
+  modelRepositoryPath: s3://triton-inference-server-repository/model_repository
+  numGpus: 1
+
+service:
+  type: LoadBalancer
+
+secret:
+  region: AWS_REGION
+  id: AWS_SECRET_KEY_ID
+  key: AWS_SECRET_ACCESS_KEY
\ No newline at end of file
diff --git a/deploy/fleetcommand/Chart.yaml b/deploy/fleetcommand/Chart.yaml
new file mode 100644
index 0000000000..b7acfe729c
--- /dev/null
+++ b/deploy/fleetcommand/Chart.yaml
@@ -0,0 +1,38 @@
+# Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+apiVersion: v1
+# appVersion is the Triton version; update when changing release
+appVersion: "2.44.0"
+description: Triton Inference Server (Fleet Command)
+name: triton-inference-server
+# version is the Chart version; update when changing anything in the chart
+# This follows semantic versioning, i.e.:
+#   Given version X.Y.Z
+#   When making fixes to the chart, increment Z
+#   When making functional changes to the chart (including updating the Triton version, above), increment Y and reset Z to 0
+#   When making breaking changes to the chart (e.g. user must take action before deploying), increment X and reset Y and Z to 0
+version: 1.4.0
diff --git a/deploy/fleetcommand/README.md b/deploy/fleetcommand/README.md
new file mode 100644
index 0000000000..217162279c
--- /dev/null
+++ b/deploy/fleetcommand/README.md
@@ -0,0 +1,150 @@
+<!--
+# Copyright (c) 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+[![License](https://img.shields.io/badge/License-BSD3-lightgrey.svg)](https://opensource.org/licenses/BSD-3-Clause)
+
+# Fleet Command Deploy: NVIDIA Triton Inference Server
+
+A helm chart for installing a single cluster of NVIDIA Triton Inference Server
+on Fleet Command is provided. By default the cluster contains a single instance
+of the Triton but the *replicaCount* configuration parameter can be set to
+create a cluster of any size, as described below.
+
+This guide assumes you already have a functional Fleet Command location
+deployed.  Please refer to the [Fleet Command
+Documentation](https://docs.nvidia.com/fleet-command/prod_fleet-command/prod_fleet-command/overview.html)
+
+The steps below describe how to set-up a model repository, use helm to launch
+the Triton, and then send inference requests to the running Triton Inference
+Server. You can optionally scrape metrics with Prometheus and access a Grafana
+endpoint to see real-time metrics reported by Triton.
+
+## Model Repository
+
+If you already have a model repository you may use that with this helm chart. If
+you do not have a model repository, you can checkout a local copy of the Triton
+Inference Server source repository to create an example model repository::
+
+```
+$ git clone https://github.com/triton-inference-server/server.git
+```
+
+Triton needs a repository of models that it will make available for inferencing.
+For this example you will place the model repository in an S3 Storage bucket
+(either in AWS or other S3 API compatible on-premises object storage).
+
+```
+$ aws s3 mb s3://triton-inference-server-repository
+```
+
+Following the [QuickStart](../../docs/getting_started/quickstart.md) download the example model
+repository to your system and copy it into the AWS S3 bucket.
+
+```
+$ aws s3 cp -r docs/examples/model_repository s3://triton-inference-server-repository/model_repository
+```
+
+### AWS Model Repository
+
+To load the model from the AWS S3, you need to convert the following AWS
+credentials in the base64 format and add it to the Application Configuration
+section when creating the Fleet Command Deployment.
+
+```
+echo -n 'REGION' | base64
+echo -n 'SECRECT_KEY_ID' | base64
+echo -n 'SECRET_ACCESS_KEY' | base64
+# Optional for using session token
+echo -n 'AWS_SESSION_TOKEN' | base64
+```
+
+## Deploy the Triton Inference Server
+
+Deploy the Triton Inference Server to your Location in Fleet Command by creating
+a Deployment.  You can specify configuration parameters to override the default
+[values.yaml](values.yaml) in the Application Configuration section.
+
+*Note:* You _must_ provide a `--model-repository` parameter with a path to your
+prepared model repository in your S3 bucket.  Otherwise, the Triton will not
+start.
+
+An example Application Configuration for Triton on Fleet Command:
+```yaml
+image:
+  serverArgs:
+    - --model-repository=s3://triton-inference-server-repository
+
+secret:
+  region: <region in base 64 >
+  id: <access id in base 64 >
+  key: <access key in base 64>
+  token: <session token in base 64 (optional)>
+```
+
+See [Fleet Command documentation](https://docs.nvidia.com/fleet-command/prod_fleet-command/prod_fleet-command/ug-deploying-to-the-edge.html)
+for more info.
+
+### Prometheus ServiceMonitor Support
+
+If you have `prometheus-operator` deployed, you can enable the ServiceMonitor
+for the Triton Inference Server by setting `serviceMonitor.enabled: true` in
+Application Configuration.  This will also deploy a Grafana dashboard for Triton
+as a ConfigMap.
+
+Otherwise, metrics can be scraped by pointing an external Prometheus
+instance at the `metricsNodePort` in the values.
+
+## Using Triton Inference Server
+
+Now that the Triton Inference Server is running you can send HTTP or GRPC
+requests to it to perform inferencing. By default, the service is exposed with a
+NodePort service type, where the same port is opened on all systems in a
+Location.
+
+Triton exposes an HTTP endpoint on port 30343, and GRPC endpoint on port 30344
+and a Prometheus metrics endpoint on port 30345. These ports can be overridden
+in the application configuration when deploying.  You can use curl to get the
+meta-data of Triton from the HTTP endpoint.  For example, if a system in your
+location has the IP `34.83.9.133`:
+
+```
+$ curl 34.83.9.133:30343/v2
+```
+
+Follow the [QuickStart](../../docs/getting_started/quickstart.md) to get the example image
+classification client that can be used to perform inferencing using image
+classification models being served by the Triton. For example,
+
+```
+$ image_client -u 34.83.9.133:30343 -m densenet_onnx -s INCEPTION -c 3 mug.jpg
+Request 0, batch size 1
+Image '/workspace/images/mug.jpg':
+    15.349568 (504) = COFFEE MUG
+    13.227468 (968) = CUP
+    10.424893 (505) = COFFEEPOT
+```
diff --git a/deploy/fleetcommand/dashboard.json b/deploy/fleetcommand/dashboard.json
new file mode 100644
index 0000000000..5868176cbe
--- /dev/null
+++ b/deploy/fleetcommand/dashboard.json
@@ -0,0 +1,419 @@
+{
+  "__requires": [
+    {
+      "type": "grafana",
+      "id": "grafana",
+      "name": "Grafana",
+      "version": "6.3.5"
+    },
+    {
+      "type": "panel",
+      "id": "graph",
+      "name": "Graph",
+      "version": ""
+    },
+    {
+      "type": "panel",
+      "id": "heatmap",
+      "name": "Heatmap",
+      "version": ""
+    },
+    {
+      "type": "datasource",
+      "id": "prometheus",
+      "name": "Prometheus",
+      "version": "1.0.0"
+    }
+  ],
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": "-- Grafana --",
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "type": "dashboard"
+      }
+    ]
+  },
+  "editable": true,
+  "gnetId": null,
+  "graphTooltip": 0,
+  "id": null,
+  "links": [],
+  "panels": [
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": "$datasource",
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 9,
+        "w": 12,
+        "x": 0,
+        "y": 0
+      },
+      "id": 2,
+      "legend": {
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "show": true,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "nullPointMode": "null",
+      "options": {
+        "dataLinks": []
+      },
+      "percentage": false,
+      "pointradius": 2,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "nv_inference_request_success",
+          "legendFormat": "Success {{instance}}",
+          "refId": "A"
+        },
+        {
+          "expr": "nv_inference_request_failure",
+          "legendFormat": "Failure {{instance}}",
+          "refId": "B"
+        }
+      ],
+      "thresholds": [],
+      "timeFrom": null,
+      "timeRegions": [],
+      "timeShift": null,
+      "title": "Cumulative Inference Requests",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        },
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": false
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    },
+    {
+      "cards": {
+        "cardPadding": null,
+        "cardRound": null
+      },
+      "color": {
+        "cardColor": "#b4ff00",
+        "colorScale": "sqrt",
+        "colorScheme": "interpolateReds",
+        "exponent": 0.5,
+        "mode": "spectrum"
+      },
+      "dataFormat": "timeseries",
+      "gridPos": {
+        "h": 9,
+        "w": 12,
+        "x": 12,
+        "y": 0
+      },
+      "heatmap": {},
+      "hideZeroBuckets": false,
+      "highlightCards": true,
+      "id": 7,
+      "legend": {
+        "show": false
+      },
+      "options": {},
+      "reverseYBuckets": false,
+      "targets": [
+        {
+          "expr": "sum(increase(nv_inference_load_ratio_bucket[1m])) by (le)",
+          "legendFormat": "",
+          "refId": "A"
+        }
+      ],
+      "timeFrom": null,
+      "timeShift": null,
+      "title": "Load Ratio  (Total Time / Compute Time)",
+      "tooltip": {
+        "show": true,
+        "showHistogram": false
+      },
+      "type": "heatmap",
+      "xAxis": {
+        "show": true
+      },
+      "xBucketNumber": null,
+      "xBucketSize": null,
+      "yAxis": {
+        "decimals": null,
+        "format": "short",
+        "logBase": 1,
+        "max": null,
+        "min": null,
+        "show": true,
+        "splitFactor": null
+      },
+      "yBucketBound": "auto",
+      "yBucketNumber": null,
+      "yBucketSize": null
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": "$datasource",
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 9
+      },
+      "id": 4,
+      "legend": {
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "show": true,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "nullPointMode": "null",
+      "options": {
+        "dataLinks": []
+      },
+      "percentage": false,
+      "pointradius": 2,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "rate(nv_inference_queue_duration_us[30s]) / 1000",
+          "legendFormat": "{{instance}}",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [],
+      "timeFrom": null,
+      "timeRegions": [],
+      "timeShift": null,
+      "title": "Queue Time (milliseconds)",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "short",
+          "label": "Queue Time (ms)",
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        },
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": false
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": "$datasource",
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 9
+      },
+      "id": 5,
+      "legend": {
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "show": true,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "nullPointMode": "null",
+      "options": {
+        "dataLinks": []
+      },
+      "percentage": false,
+      "pointradius": 2,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "rate(nv_inference_compute_duration_us[30s]) / 1000",
+          "legendFormat": "{{instance}}",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [],
+      "timeFrom": null,
+      "timeRegions": [],
+      "timeShift": null,
+      "title": "Compute Time (milliseconds)",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "short",
+          "label": "Compute Time (ms)",
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        },
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": false
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    }
+  ],
+  "refresh": "5s",
+  "schemaVersion": 19,
+  "style": "dark",
+  "tags": [],
+  "templating": {
+    "list": [
+      {
+       "current": {
+         "text": "Prometheus",
+         "value": "Prometheus"
+       },
+       "hide": 0,
+       "includeAll": false,
+       "label": null,
+       "multi": false,
+       "name": "datasource",
+       "options": [],
+       "query": "prometheus",
+       "refresh": 1,
+       "regex": "",
+       "skipUrlSync": false,
+       "type": "datasource"
+      }
+    ]
+  },
+  "time": {
+    "from": "now-15m",
+    "to": "now"
+  },
+  "timepicker": {
+    "refresh_intervals": [
+      "5s",
+      "10s",
+      "30s",
+      "1m",
+      "5m",
+      "15m",
+      "30m",
+      "1h",
+      "2h",
+      "1d"
+    ]
+  },
+  "timezone": "",
+  "title": "Triton Inference Server",
+  "uid": "slEY4dsZk",
+  "version": 8
+}
diff --git a/deploy/fleetcommand/templates/_helpers.tpl b/deploy/fleetcommand/templates/_helpers.tpl
new file mode 100644
index 0000000000..6dba910012
--- /dev/null
+++ b/deploy/fleetcommand/templates/_helpers.tpl
@@ -0,0 +1,92 @@
+{{/*
+# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/}}
+
+{{/* vim: set filetype=mustache: */}}
+{{/*
+Create inference server name.
+*/}}
+{{- define "triton-inference-server.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "triton-inference-server.fullname" -}}
+{{- if .Values.fullnameOverride -}}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- $name := default .Chart.Name .Values.nameOverride -}}
+{{- if contains $name .Release.Name -}}
+{{- .Release.Name | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+{{- end -}}
+{{- end -}}
+
+{{/*
+  Create inference server metrics service name and fullname derived from above and
+  truncated appropriately.
+*/}}
+{{- define "triton-inference-server-metrics.name" -}}
+{{- $basename := include "triton-inference-server.name" . -}}
+{{- $basename_trimmed := $basename | trunc 55 | trimSuffix "-" -}}
+{{- printf "%s-%s" $basename_trimmed "metrics" -}}
+{{- end -}}
+
+{{- define "triton-inference-server-metrics.fullname" -}}
+{{- $basename := include "triton-inference-server.fullname" . -}}
+{{- $basename_trimmed := $basename | trunc 55 | trimSuffix "-" -}}
+{{- printf "%s-%s" $basename_trimmed "metrics" -}}
+{{- end -}}
+
+{{/*
+  Create inference server metrics monitor name and fullname derived from
+  above and truncated appropriately.
+*/}}
+{{- define "triton-inference-server-metrics-monitor.name" -}}
+{{- $basename := include "triton-inference-server.name" . -}}
+{{- $basename_trimmed := $basename | trunc 47 | trimSuffix "-" -}}
+{{- printf "%s-%s" $basename_trimmed "metrics-monitor" -}}
+{{- end -}}
+
+{{- define "triton-inference-server-metrics-monitor.fullname" -}}
+{{- $basename := include "triton-inference-server.fullname" . -}}
+{{- $basename_trimmed := $basename | trunc 47 | trimSuffix "-" -}}
+{{- printf "%s-%s" $basename_trimmed "metrics-monitor" -}}
+{{- end -}}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "triton-inference-server.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
diff --git a/deploy/fleetcommand/templates/configmap-grafana-dashboard.yaml b/deploy/fleetcommand/templates/configmap-grafana-dashboard.yaml
new file mode 100644
index 0000000000..782b1f85e6
--- /dev/null
+++ b/deploy/fleetcommand/templates/configmap-grafana-dashboard.yaml
@@ -0,0 +1,37 @@
+# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+{{- if .Values.serviceMonitor.enabled }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ .Release.Name }}-dashboard-configmap
+  labels:
+    grafana_dashboard: "1"
+data:
+  dashboard.json: |-
+{{ .Files.Get "dashboard.json" | indent 4}}
+{{- end }}
diff --git a/deploy/fleetcommand/templates/deployment.yaml b/deploy/fleetcommand/templates/deployment.yaml
new file mode 100644
index 0000000000..5d7af7023d
--- /dev/null
+++ b/deploy/fleetcommand/templates/deployment.yaml
@@ -0,0 +1,112 @@
+# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ template "triton-inference-server.fullname" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    app: {{ template "triton-inference-server.name" . }}
+    chart: {{ template "triton-inference-server.chart" . }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      app: {{ template "triton-inference-server.name" . }}
+      release: {{ .Release.Name }}
+  template:
+    metadata:
+      labels:
+        app: {{ template "triton-inference-server.name" . }}
+        release: {{ .Release.Name }}
+
+    spec:
+      containers:
+        - name: {{ .Chart.Name }}
+          image: "{{ .Values.image.imageName }}"
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+
+          resources:
+            limits:
+              nvidia.com/gpu: {{ .Values.image.numGpus }}
+
+          args:
+            - {{ .Values.image.serverCommand }}
+            {{- $args := required "image.serverArgs, at least --model-repository, is required!" .Values.image.serverArgs }}
+            {{- range $args }}
+            - {{ . -}}
+            {{ end }}
+
+{{ if .Values.secret }}
+          env:
+          - name: AWS_DEFAULT_REGION
+            valueFrom:
+              secretKeyRef:
+                name: aws-credentials
+                key: AWS_DEFAULT_REGION
+          - name: AWS_ACCESS_KEY_ID
+            valueFrom:
+              secretKeyRef:
+                name: aws-credentials
+                key: AWS_ACCESS_KEY_ID
+          - name: AWS_SECRET_ACCESS_KEY
+            valueFrom:
+              secretKeyRef:
+                name: aws-credentials
+                key: AWS_SECRET_ACCESS_KEY
+{{- if .Values.secret.token }}
+          - name: AWS_SESSION_TOKEN
+            valueFrom:
+              secretKeyRef:
+                name: aws-credentials
+                key: AWS_SESSION_TOKEN
+{{- end }}
+{{- end }}
+
+          ports:
+            - containerPort: 8000
+              name: http
+            - containerPort: 8001
+              name: grpc
+            - containerPort: 8002
+              name: metrics
+          livenessProbe:
+            httpGet:
+              path: /v2/health/live
+              port: http
+          readinessProbe:
+            initialDelaySeconds: 5
+            periodSeconds: 5
+            httpGet:
+              path: /v2/health/ready
+              port: http
+
+      securityContext:
+        runAsUser: 1000
+        fsGroup: 1000
diff --git a/deploy/fleetcommand/templates/secrets.yaml b/deploy/fleetcommand/templates/secrets.yaml
new file mode 100644
index 0000000000..9c7dcd404d
--- /dev/null
+++ b/deploy/fleetcommand/templates/secrets.yaml
@@ -0,0 +1,40 @@
+# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+{{- if .Values.secret }}
+apiVersion: v1
+kind: Secret
+metadata:
+  name: aws-credentials
+type: Opaque
+data:
+  AWS_DEFAULT_REGION: {{ .Values.secret.region }}
+  AWS_ACCESS_KEY_ID: {{ .Values.secret.id }}
+  AWS_SECRET_ACCESS_KEY: {{ .Values.secret.key }}
+{{- if .Values.secret.token }}
+  AWS_SESSION_TOKEN: {{ .Values.secret.token }}
+{{- end }}
+{{- end }}
diff --git a/deploy/fleetcommand/templates/service.yaml b/deploy/fleetcommand/templates/service.yaml
new file mode 100644
index 0000000000..4f12205902
--- /dev/null
+++ b/deploy/fleetcommand/templates/service.yaml
@@ -0,0 +1,102 @@
+# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ template "triton-inference-server.fullname" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    app: {{ template "triton-inference-server.name" . }}
+    chart: {{ template "triton-inference-server.chart" . }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: 8000
+      targetPort: http
+      name: http-inference-server
+      {{- if .Values.service.httpNodePort }}
+      nodePort: {{ .Values.service.httpNodePort }}
+      {{- end }}
+    - port: 8001
+      targetPort: grpc
+      name: grpc-inference-server
+      {{- if .Values.service.grpcNodePort }}
+      nodePort: {{ .Values.service.grpcNodePort }}
+      {{- end }}
+    - port: 8002
+      targetPort: metrics
+      name: metrics-inference-server
+      {{- if .Values.service.metricsNodePort }}
+      nodePort: {{ .Values.service.metricsNodePort }}
+      {{- end }}
+  selector:
+    app: {{ template "triton-inference-server.name" . }}
+    release: {{ .Release.Name }}
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ template "triton-inference-server-metrics.fullname" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    app: {{ template "triton-inference-server-metrics.name" . }}
+    chart: {{ template "triton-inference-server.chart" . }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+  annotations:
+    alpha.monitoring.coreos.com/non-namespaced: "true"
+spec:
+  ports:
+  - name: metrics
+    port: 8080
+    targetPort: metrics
+    protocol: TCP
+  selector:
+    app: {{ template "triton-inference-server.name" . }}
+    release: {{ .Release.Name }}
+---
+{{- if .Values.serviceMonitor.enabled }}
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: {{ template "triton-inference-server-metrics-monitor.fullname" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    app: {{ template "triton-inference-server-metrics-monitor.name" . }}
+    chart: {{ template "triton-inference-server.chart" . }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+spec:
+  selector:
+    matchLabels:
+      app: {{ template "triton-inference-server-metrics.name" . }}
+  endpoints:
+  - port: metrics
+    interval: 15s
+{{- end }}
diff --git a/deploy/fleetcommand/values.yaml b/deploy/fleetcommand/values.yaml
new file mode 100644
index 0000000000..ca00a2acf1
--- /dev/null
+++ b/deploy/fleetcommand/values.yaml
@@ -0,0 +1,78 @@
+# Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+replicaCount: 1
+
+image:
+  imageName: nvcr.io/nvidia/tritonserver:24.03-py3
+  pullPolicy: IfNotPresent
+  numGpus: 1
+  serverCommand: tritonserver
+  serverArgs:
+    # Model Repository Configuration (REQUIRED)
+    #
+    # Configure sources for model repository below.  Multiple repositories
+    # can be specified
+    #
+    # To download models from an S3 bucket, uncomment and configure below
+    # To specify a non-AWS S3 endpoint, use the form
+    #  s3://https://your-s3-endpoint:443/bucket/model_repository
+    #
+    #- --model-repository=s3://triton-inference-server-repository/model_repository
+    #
+    # Model Control Mode (Optional, default: none)
+    #
+    # To set model control mode, uncomment and configure below
+    # See https://github.com/triton-inference-server/server/blob/r24.03/docs/model_management.md
+    #  for more details
+    #- --model-control-mode=explicit|poll|none
+    #
+    # Additional server args
+    #
+    # see https://github.com/triton-inference-server/server/blob/r24.03/README.md
+    #  for more details
+
+service:
+  # for Fleet Command, type should be NodePort
+  type: NodePort
+  # the following ports will be the external port opened for each service
+  httpNodePort: 30343
+  grpcNodePort: 30344
+  metricsNodePort: 30345
+
+# AWS
+#secret:
+  # update the following with base64 encoded parameters
+#  region: AWS_REGION
+#  id: AWS_SECRET_KEY_ID
+#  key: AWS_SECRET_ACCESS_KEY
+#  token: AWS_SESSION_TOKEN
+
+# Prometheus-Operator ServiceMonitor support
+# change enabled to 'true' to enable a ServiceMonitor if your cluster has
+#  Prometheus-Operator installed
+serviceMonitor:
+  enabled: false
diff --git a/deploy/gcp/Chart.yaml b/deploy/gcp/Chart.yaml
new file mode 100644
index 0000000000..2b7541bee6
--- /dev/null
+++ b/deploy/gcp/Chart.yaml
@@ -0,0 +1,31 @@
+# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+apiVersion: v1
+appVersion: "1.0"
+description: Triton Inference Server
+name: triton-inference-server
+version: 1.0.0
diff --git a/deploy/gcp/README.md b/deploy/gcp/README.md
new file mode 100644
index 0000000000..dc80cc77de
--- /dev/null
+++ b/deploy/gcp/README.md
@@ -0,0 +1,300 @@
+<!--
+# Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+[![License](https://img.shields.io/badge/License-BSD3-lightgrey.svg)](https://opensource.org/licenses/BSD-3-Clause)
+
+# Kubernetes Deploy: Triton Inference Server Cluster
+
+A helm chart for installing a single cluster of Triton Inference
+Server is provided. By default the cluster contains a single instance
+of the inference server but the *replicaCount* configuration parameter
+can be set to create a cluster of any size, as described below.
+
+This guide assumes you already have a functional Kubernetes cluster
+and helm installed (see below for instructions on installing
+helm). Note the following requirements:
+
+* The helm chart deploys Prometheus and Grafana to collect and display Triton metrics. Your cluster must contain sufficient CPU resources to support these services. At a minimum you will likely require 2 CPU nodes with machine type of n1-standard-2 or greater.
+
+* If you want Triton Server to use GPUs for inferencing, your cluster
+must be configured to contain the desired number of GPU nodes with
+support for the NVIDIA driver and CUDA version required by the version
+of the inference server you are using.
+
+This helm chart is available from [Triton Inference Server
+GitHub](https://github.com/triton-inference-server/server) or from the
+[NVIDIA GPU Cloud (NGC)](https://ngc.nvidia.com).
+
+The steps below describe how to set-up a model repository, use helm to
+launch the inference server, and then send inference requests to the
+running server. You can access a Grafana endpoint to see real-time
+metrics reported by the inference server.
+
+
+## Installing Helm
+
+### Helm v3
+
+If you do not already have Helm installed in your Kubernetes cluster,
+executing the following steps from the [official helm install
+guide](https://helm.sh/docs/intro/install/) will
+give you a quick setup.
+
+If you're currently using Helm v2 and would like to migrate to Helm v3,
+please see the [official migration guide](https://helm.sh/docs/topics/v2_v3_migration/).
+
+### Helm v2
+
+> **NOTE**: Moving forward this chart will only be tested and maintained for Helm v3.
+
+Below are example instructions for installing Helm v2.
+
+```
+$ curl https://raw.githubusercontent.com/helm/helm/master/scripts/get | bash
+$ kubectl create serviceaccount -n kube-system tiller
+serviceaccount/tiller created
+$ kubectl create clusterrolebinding tiller-cluster-rule --clusterrole=cluster-admin --serviceaccount=kube-system:tiller
+$ helm init --service-account tiller --wait
+```
+
+If you run into any issues, you can refer to the official installation guide [here](https://v2.helm.sh/docs/install/).
+
+## Model Repository
+
+If you already have a model repository you may use that with this helm
+chart. If you do not have a model repository, you can checkout a local
+copy of the inference server source repository to create an example
+model repository::
+
+```
+$ git clone https://github.com/triton-inference-server/server.git
+```
+
+Triton Server needs a repository of models that it will make available
+for inferencing. For this example you will place the model repository
+in a Google Cloud Storage bucket.
+
+```
+$ gsutil mb gs://triton-inference-server-repository
+```
+
+Following the [QuickStart](../../docs/getting_started/quickstart.md) download the
+example model repository to your system and copy it into the GCS
+bucket.
+
+```
+$ gsutil cp -r docs/examples/model_repository gs://triton-inference-server-repository/model_repository
+```
+
+### GCS Permissions
+
+Make sure the bucket permissions are set so that the inference server
+can access the model repository. If the bucket is public then no
+additional changes are needed and you can proceed to "Deploy
+Prometheus and Grafana" section.
+
+If bucket premissions need to be set with the
+GOOGLE_APPLICATION_CREDENTIALS environment variable then perform the
+following steps:
+
+* Generate Google service account JSON with proper permissions called
+  *gcp-creds.json*.
+
+* Create a Kubernetes secret from *gcp-creds.json*:
+
+```
+  $ kubectl create configmap gcpcreds --from-literal "project-id=myproject"
+  $ kubectl create secret generic gcpcreds --from-file gcp-creds.json
+```
+
+* Modify templates/deployment.yaml to include the
+  GOOGLE_APPLICATION_CREDENTIALS environment variable:
+
+```
+    env:
+      - name: GOOGLE_APPLICATION_CREDENTIALS
+        value: /secret/gcp-creds.json
+```
+
+* Modify templates/deployment.yaml to mount the secret in a volume at
+  /secret:
+
+```
+    volumeMounts:
+      - name: vsecret
+        mountPath: "/secret"
+        readOnly: true
+    ...
+    volumes:
+    - name: vsecret
+      secret:
+        secretName: gcpcreds
+```
+
+
+## Deploy Prometheus and Grafana
+
+The inference server metrics are collected by Prometheus and viewable
+by Grafana. The inference server helm chart assumes that Prometheus
+and Grafana are available so this step must be followed even if you
+don't want to use Grafana.
+
+Use the [kube-prometheus-stack](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack) to install these components. The
+*serviceMonitorSelectorNilUsesHelmValues* flag is needed so that
+Prometheus can find the inference server metrics in the *example*
+release deployed below.
+
+```
+$ helm install example-metrics --set prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues=false prometheus-community/kube-prometheus-stack
+```
+
+Then port-forward to the Grafana service so you can access it from
+your local browser.
+
+```
+$ kubectl port-forward service/example-metrics-grafana 8080:80
+```
+
+Now you should be able to navigate in your browser to localhost:8080
+and see the Grafana login page. Use username=admin and
+password=prom-operator to login.
+
+An example Grafana dashboard is available in dashboard.json. Use the
+import function in Grafana to import and view this dashboard.
+
+## Deploy the Inference Server
+
+Deploy the inference server using the default configuration with the
+following commands.
+
+```
+$ cd <directory containing Chart.yaml>
+$ helm install example .
+```
+
+Use kubectl to see status and wait until the inference server pods are
+running.
+
+```
+$ kubectl get pods
+NAME                                               READY   STATUS    RESTARTS   AGE
+example-triton-inference-server-5f74b55885-n6lt7   1/1     Running   0          2m21s
+```
+
+There are several ways of overriding the default configuration as
+described in this [helm
+documentation](https://helm.sh/docs/using_helm/#customizing-the-chart-before-installing).
+
+You can edit the values.yaml file directly or you can use the *--set*
+option to override a single parameter with the CLI. For example, to
+deploy a cluster of four inference servers use *--set* to set the
+replicaCount parameter.
+
+```
+$ helm install example --set replicaCount=4 .
+```
+
+You can also write your own "config.yaml" file with the values you
+want to override and pass it to helm.
+
+```
+$ cat << EOF > config.yaml
+namespace: MyCustomNamespace
+image:
+  imageName: nvcr.io/nvidia/tritonserver:custom-tag
+  modelRepositoryPath: gs://my_model_repository
+EOF
+$ helm install example -f config.yaml .
+```
+
+## Using Triton Inference Server
+
+Now that the inference server is running you can send HTTP or GRPC
+requests to it to perform inferencing. By default, the inferencing
+service is exposed with a LoadBalancer service type. Use the following
+to find the external IP for the inference server. In this case it is
+34.83.9.133.
+
+```
+$ kubectl get services
+NAME                             TYPE           CLUSTER-IP     EXTERNAL-IP   PORT(S)                                        AGE
+...
+example-triton-inference-server  LoadBalancer   10.18.13.28    34.83.9.133   8000:30249/TCP,8001:30068/TCP,8002:32723/TCP   47m
+```
+
+The inference server exposes an HTTP endpoint on port 8000, and GRPC
+endpoint on port 8001 and a Prometheus metrics endpoint on
+port 8002. You can use curl to get the meta-data of the inference server
+from the HTTP endpoint.
+
+```
+$ curl 34.83.9.133:8000/v2
+```
+
+Follow the [QuickStart](../../docs/getting_started/quickstart.md) to get the example
+image classification client that can be used to perform inferencing
+using image classification models being served by the inference
+server. For example,
+
+```
+$ image_client -u 34.83.9.133:8000 -m inception_graphdef -s INCEPTION -c3 mug.jpg
+Request 0, batch size 1
+Image 'images/mug.jpg':
+    504 (COFFEE MUG) = 0.723992
+    968 (CUP) = 0.270953
+    967 (ESPRESSO) = 0.00115997
+```
+
+## Cleanup
+
+Once you've finished using the inference server you should use helm to
+delete the deployment.
+
+```
+$ helm list
+NAME            REVISION  UPDATED                   STATUS    CHART                          APP VERSION   NAMESPACE
+example         1         Wed Feb 27 22:16:55 2019  DEPLOYED  triton-inference-server-1.0.0  1.0           default
+example-metrics	1       	Tue Jan 21 12:24:07 2020	DEPLOYED	prometheus-operator-6.18.0   	 0.32.0     	 default
+
+$ helm uninstall example
+$ helm uninstall example-metrics
+```
+
+For the Prometheus and Grafana services, you should [explicitly delete
+CRDs](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack#uninstall-helm-chart):
+
+```
+$ kubectl delete crd alertmanagerconfigs.monitoring.coreos.com alertmanagers.monitoring.coreos.com podmonitors.monitoring.coreos.com probes.monitoring.coreos.com prometheuses.monitoring.coreos.com prometheusrules.monitoring.coreos.com servicemonitors.monitoring.coreos.com thanosrulers.monitoring.coreos.com
+```
+
+You may also want to delete the GCS bucket you created to hold the
+model repository.
+
+```
+$ gsutil rm -r gs://triton-inference-server-repository
+```
diff --git a/deploy/gcp/dashboard.json b/deploy/gcp/dashboard.json
new file mode 100644
index 0000000000..8960b41d35
--- /dev/null
+++ b/deploy/gcp/dashboard.json
@@ -0,0 +1,411 @@
+{
+  "__inputs": [
+    {
+      "name": "DS_PROMETHEUS",
+      "label": "Prometheus",
+      "description": "",
+      "type": "datasource",
+      "pluginId": "prometheus",
+      "pluginName": "Prometheus"
+    }
+  ],
+  "__requires": [
+    {
+      "type": "grafana",
+      "id": "grafana",
+      "name": "Grafana",
+      "version": "6.3.5"
+    },
+    {
+      "type": "panel",
+      "id": "graph",
+      "name": "Graph",
+      "version": ""
+    },
+    {
+      "type": "panel",
+      "id": "heatmap",
+      "name": "Heatmap",
+      "version": ""
+    },
+    {
+      "type": "datasource",
+      "id": "prometheus",
+      "name": "Prometheus",
+      "version": "1.0.0"
+    }
+  ],
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": "-- Grafana --",
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "type": "dashboard"
+      }
+    ]
+  },
+  "editable": true,
+  "gnetId": null,
+  "graphTooltip": 0,
+  "id": null,
+  "links": [],
+  "panels": [
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": "${DS_PROMETHEUS}",
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 9,
+        "w": 12,
+        "x": 0,
+        "y": 0
+      },
+      "id": 2,
+      "legend": {
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "show": true,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "nullPointMode": "null",
+      "options": {
+        "dataLinks": []
+      },
+      "percentage": false,
+      "pointradius": 2,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "nv_inference_request_success",
+          "legendFormat": "Success {{instance}}",
+          "refId": "A"
+        },
+        {
+          "expr": "nv_inference_request_failure",
+          "legendFormat": "Failure {{instance}}",
+          "refId": "B"
+        }
+      ],
+      "thresholds": [],
+      "timeFrom": null,
+      "timeRegions": [],
+      "timeShift": null,
+      "title": "Cumulative Inference Requests",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        },
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": false
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    },
+    {
+      "cards": {
+        "cardPadding": null,
+        "cardRound": null
+      },
+      "color": {
+        "cardColor": "#b4ff00",
+        "colorScale": "sqrt",
+        "colorScheme": "interpolateReds",
+        "exponent": 0.5,
+        "mode": "spectrum"
+      },
+      "dataFormat": "timeseries",
+      "gridPos": {
+        "h": 9,
+        "w": 12,
+        "x": 12,
+        "y": 0
+      },
+      "heatmap": {},
+      "hideZeroBuckets": false,
+      "highlightCards": true,
+      "id": 7,
+      "legend": {
+        "show": false
+      },
+      "options": {},
+      "reverseYBuckets": false,
+      "targets": [
+        {
+          "expr": "sum(increase(nv_inference_load_ratio_bucket[1m])) by (le)",
+          "legendFormat": "",
+          "refId": "A"
+        }
+      ],
+      "timeFrom": null,
+      "timeShift": null,
+      "title": "Load Ratio  (Total Time / Compute Time)",
+      "tooltip": {
+        "show": true,
+        "showHistogram": false
+      },
+      "type": "heatmap",
+      "xAxis": {
+        "show": true
+      },
+      "xBucketNumber": null,
+      "xBucketSize": null,
+      "yAxis": {
+        "decimals": null,
+        "format": "short",
+        "logBase": 1,
+        "max": null,
+        "min": null,
+        "show": true,
+        "splitFactor": null
+      },
+      "yBucketBound": "auto",
+      "yBucketNumber": null,
+      "yBucketSize": null
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": "${DS_PROMETHEUS}",
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 9
+      },
+      "id": 4,
+      "legend": {
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "show": true,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "nullPointMode": "null",
+      "options": {
+        "dataLinks": []
+      },
+      "percentage": false,
+      "pointradius": 2,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "rate(nv_inference_queue_duration_us[30s]) / 1000",
+          "legendFormat": "{{instance}}",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [],
+      "timeFrom": null,
+      "timeRegions": [],
+      "timeShift": null,
+      "title": "Queue Time (milliseconds)",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "short",
+          "label": "Queue Time (ms)",
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        },
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": false
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": "${DS_PROMETHEUS}",
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 9
+      },
+      "id": 5,
+      "legend": {
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "show": true,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "nullPointMode": "null",
+      "options": {
+        "dataLinks": []
+      },
+      "percentage": false,
+      "pointradius": 2,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "rate(nv_inference_compute_duration_us[30s]) / 1000",
+          "legendFormat": "{{instance}}",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [],
+      "timeFrom": null,
+      "timeRegions": [],
+      "timeShift": null,
+      "title": "Compute Time (milliseconds)",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "short",
+          "label": "Compute Time (ms)",
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        },
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": false
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    }
+  ],
+  "refresh": "5s",
+  "schemaVersion": 19,
+  "style": "dark",
+  "tags": [],
+  "templating": {
+    "list": []
+  },
+  "time": {
+    "from": "now-15m",
+    "to": "now"
+  },
+  "timepicker": {
+    "refresh_intervals": [
+      "5s",
+      "10s",
+      "30s",
+      "1m",
+      "5m",
+      "15m",
+      "30m",
+      "1h",
+      "2h",
+      "1d"
+    ]
+  },
+  "timezone": "",
+  "title": "Triton Inference Server",
+  "uid": "slEY4dsZk",
+  "version": 8
+}
diff --git a/deploy/gcp/templates/_helpers.tpl b/deploy/gcp/templates/_helpers.tpl
new file mode 100644
index 0000000000..6dba910012
--- /dev/null
+++ b/deploy/gcp/templates/_helpers.tpl
@@ -0,0 +1,92 @@
+{{/*
+# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/}}
+
+{{/* vim: set filetype=mustache: */}}
+{{/*
+Create inference server name.
+*/}}
+{{- define "triton-inference-server.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "triton-inference-server.fullname" -}}
+{{- if .Values.fullnameOverride -}}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- $name := default .Chart.Name .Values.nameOverride -}}
+{{- if contains $name .Release.Name -}}
+{{- .Release.Name | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+{{- end -}}
+{{- end -}}
+
+{{/*
+  Create inference server metrics service name and fullname derived from above and
+  truncated appropriately.
+*/}}
+{{- define "triton-inference-server-metrics.name" -}}
+{{- $basename := include "triton-inference-server.name" . -}}
+{{- $basename_trimmed := $basename | trunc 55 | trimSuffix "-" -}}
+{{- printf "%s-%s" $basename_trimmed "metrics" -}}
+{{- end -}}
+
+{{- define "triton-inference-server-metrics.fullname" -}}
+{{- $basename := include "triton-inference-server.fullname" . -}}
+{{- $basename_trimmed := $basename | trunc 55 | trimSuffix "-" -}}
+{{- printf "%s-%s" $basename_trimmed "metrics" -}}
+{{- end -}}
+
+{{/*
+  Create inference server metrics monitor name and fullname derived from
+  above and truncated appropriately.
+*/}}
+{{- define "triton-inference-server-metrics-monitor.name" -}}
+{{- $basename := include "triton-inference-server.name" . -}}
+{{- $basename_trimmed := $basename | trunc 47 | trimSuffix "-" -}}
+{{- printf "%s-%s" $basename_trimmed "metrics-monitor" -}}
+{{- end -}}
+
+{{- define "triton-inference-server-metrics-monitor.fullname" -}}
+{{- $basename := include "triton-inference-server.fullname" . -}}
+{{- $basename_trimmed := $basename | trunc 47 | trimSuffix "-" -}}
+{{- printf "%s-%s" $basename_trimmed "metrics-monitor" -}}
+{{- end -}}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "triton-inference-server.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
diff --git a/deploy/gcp/templates/deployment.yaml b/deploy/gcp/templates/deployment.yaml
new file mode 100644
index 0000000000..b7592c7043
--- /dev/null
+++ b/deploy/gcp/templates/deployment.yaml
@@ -0,0 +1,81 @@
+# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ template "triton-inference-server.fullname" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    app: {{ template "triton-inference-server.name" . }}
+    chart: {{ template "triton-inference-server.chart" . }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      app: {{ template "triton-inference-server.name" . }}
+      release: {{ .Release.Name }}
+  template:
+    metadata:
+      labels:
+        app: {{ template "triton-inference-server.name" . }}
+        release: {{ .Release.Name }}
+
+    spec:
+      containers:
+        - name: {{ .Chart.Name }}
+          image: "{{ .Values.image.imageName }}"
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+
+          resources:
+            limits:
+              nvidia.com/gpu: {{ .Values.image.numGpus }}
+
+          args: ["tritonserver", "--model-store={{ .Values.image.modelRepositoryPath }}"]
+
+          ports:
+            - containerPort: 8000
+              name: http
+            - containerPort: 8001
+              name: grpc
+            - containerPort: 8002
+              name: metrics
+          livenessProbe:
+            httpGet:
+              path: /v2/health/live
+              port: http
+          readinessProbe:
+            initialDelaySeconds: 5
+            periodSeconds: 5
+            httpGet:
+              path: /v2/health/ready
+              port: http
+
+      securityContext:
+        runAsUser: 1000
+        fsGroup: 1000
diff --git a/deploy/gcp/templates/service.yaml b/deploy/gcp/templates/service.yaml
new file mode 100644
index 0000000000..3315fd77db
--- /dev/null
+++ b/deploy/gcp/templates/service.yaml
@@ -0,0 +1,91 @@
+# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ template "triton-inference-server.fullname" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    app: {{ template "triton-inference-server.name" . }}
+    chart: {{ template "triton-inference-server.chart" . }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: 8000
+      targetPort: http
+      name: http-inference-server
+    - port: 8001
+      targetPort: grpc
+      name: grpc-inference-server
+    - port: 8002
+      targetPort: metrics
+      name: metrics-inference-server
+  selector:
+    app: {{ template "triton-inference-server.name" . }}
+    release: {{ .Release.Name }}
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ template "triton-inference-server-metrics.fullname" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    app: {{ template "triton-inference-server-metrics.name" . }}
+    chart: {{ template "triton-inference-server.chart" . }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+  annotations:
+    alpha.monitoring.coreos.com/non-namespaced: "true"
+spec:
+  ports:
+  - name: metrics
+    port: 8080
+    targetPort: metrics
+    protocol: TCP
+  selector:
+    app: {{ template "triton-inference-server.name" . }}
+    release: {{ .Release.Name }}
+---
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: {{ template "triton-inference-server-metrics-monitor.fullname" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    app: {{ template "triton-inference-server-metrics-monitor.name" . }}
+    chart: {{ template "triton-inference-server.chart" . }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+spec:
+  selector:
+    matchLabels:
+      app: {{ template "triton-inference-server-metrics.name" . }}
+  endpoints:
+  - port: metrics
+    interval: 15s
diff --git a/deploy/gcp/values.yaml b/deploy/gcp/values.yaml
new file mode 100644
index 0000000000..0173f37b6f
--- /dev/null
+++ b/deploy/gcp/values.yaml
@@ -0,0 +1,36 @@
+# Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+replicaCount: 1
+
+image:
+  imageName: nvcr.io/nvidia/tritonserver:24.03-py3
+  pullPolicy: IfNotPresent
+  modelRepositoryPath: gs://triton-inference-server-repository/model_repository
+  numGpus: 1
+
+service:
+  type: LoadBalancer
\ No newline at end of file
diff --git a/deploy/gke-marketplace-app/README.md b/deploy/gke-marketplace-app/README.md
new file mode 100644
index 0000000000..e99b9efbae
--- /dev/null
+++ b/deploy/gke-marketplace-app/README.md
@@ -0,0 +1,201 @@
+<!--
+# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# NVIDIA Triton Inference Server GKE Marketplace Application
+
+**Table Of Contents**
+- [NVIDIA Triton Inference Server GKE Marketplace Application](#nvidia-triton-inference-server-gke-marketplace-application)
+  - [Description](#description)
+  - [Prerequisites](#prerequisites)
+  - [Demo Instruction](#demo-instruction)
+  - [Additional Resources](#additional-resources)
+  - [Known Issues](#known-issues)
+
+## Description
+
+This repository contains Google Kubernetes Engine(GKE) Marketplace Application for NVIDIA Triton Inference Server deployer.
+
+ - Triton GKE deployer is a helm chart deployer recommended by GKE Marketplace
+ - Triton GKE deployer deploys a GKE ingress which accepts public inference requests
+ - Triton GKE deployer includes a horizontal pod autoscaler(HPA) which relies on [stack driver custom metrics adaptor](https://github.com/GoogleCloudPlatform/k8s-stackdriver/tree/master/custom-metrics-stackdriver-adapter) to monitor GPU duty cycle, and auto scale GPU nodes.
+ - This repo also contains a sample to generate BERT model with TensorRT and use Locust to experiment with GPU node autoscaling and monitor client latency/throughput.
+
+![Cloud Architecture Diagram](diagram.png)
+
+## Prerequisites
+
+ - [Install Google Cloud SDK on your laptop/client workstation](https://cloud.google.com/sdk/docs/install), so that `gcloud` SDK cli interface could be run on the client and sign in with your GCP credentials.
+ - In addition, user could leverage [Google Cloud shell](https://cloud.google.com/shell/docs/launching-cloud-shell).
+
+## Demo Instruction
+
+First, install this Triton GKE app to an existing GKE cluster with GPU node pool, Google Cloud Marketplace currently doesn't support auto creation of GPU clusters. User has to run following command to create a compatible cluster (gke version >=1.18.7) with GPU node pools, we recommend user to select T4 or A100(MIG) instances type and choose CPU ratio based on profiling of actual inference workflow.
+
+Users need to follow these [instructions](https://cloud.google.com/kubernetes-engine/docs/how-to/kubernetes-service-accounts#creating_a_kubernetes_service_account) to create a kubernetes service account. In this example, we use `gke-test@k80-exploration.iam.gserviceaccount.com`. Make sure it has access to artifact registry and monitoring viewer. For example, to grant access to custom metrics which is required for HPA to work:
+```
+gcloud iam service-accounts add-iam-policy-binding --role \
+  roles/iam.workloadIdentityUser --member \
+  "serviceAccount:<project-id>.svc.id.goog[custom-metrics/custom-metrics-stackdriver-adapter]" \
+  <google-service-account>@<project-id>.iam.gserviceaccount.com
+
+kubectl annotate serviceaccount --namespace custom-metrics \
+  custom-metrics-stackdriver-adapter \
+  iam.gke.io/gcp-service-account=<google-service-account>@<project-id>.iam.gserviceaccount.com
+```
+
+Currently, GKE >= 1.18.7 only supported in GKE rapid channel, to find the latest version, please visit [GKE release notes](https://cloud.google.com/kubernetes-engine/docs/release-notes).
+```
+export PROJECT_ID=<your GCP project ID>
+export ZONE=<GCP zone of your choice>
+export REGION=<GCP region of your choice>
+export DEPLOYMENT_NAME=<GKE cluster name, triton-gke for example>
+# example: export SERVICE_ACCOUNT="gke-test@k80-exploration.iam.gserviceaccount.com"
+export SERVICE_ACCOUNT=<Your GKE service account>
+
+gcloud beta container clusters create ${DEPLOYMENT_NAME} \
+--addons=HorizontalPodAutoscaling,HttpLoadBalancing \
+--service-account=${SERVICE_ACCOUNT} \
+--machine-type=n1-standard-8 \
+--node-locations=${ZONE} \
+--monitoring=SYSTEM \
+--zone=${ZONE} \
+--subnetwork=default \
+--scopes cloud-platform \
+--num-nodes 1 \
+--project ${PROJECT_ID}
+
+# add GPU node pools, user can modify number of node based on workloads
+gcloud container node-pools create accel \
+  --project ${PROJECT_ID} \
+  --zone ${ZONE} \
+  --cluster ${DEPLOYMENT_NAME} \
+  --service-account=${SERVICE_ACCOUNT} \
+  --num-nodes 2 \
+  --accelerator type=nvidia-tesla-t4,count=1 \
+  --enable-autoscaling --min-nodes 2 --max-nodes 3 \
+  --machine-type n1-standard-4 \
+  --disk-size=100 \
+  --scopes cloud-platform \
+  --verbosity error
+
+# so that you can run kubectl locally to the cluster
+gcloud container clusters get-credentials ${DEPLOYMENT_NAME} --project ${PROJECT_ID} --zone ${ZONE}
+
+# deploy NVIDIA device plugin for GKE to prepare GPU nodes for driver install
+kubectl apply -f https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/nvidia-driver-installer/cos/daemonset-preloaded-latest.yaml
+
+# make sure you can run kubectl locally to access the cluster
+kubectl create clusterrolebinding cluster-admin-binding --clusterrole cluster-admin --user "$(gcloud config get-value account)"
+
+# enable stackdriver custom metrics adaptor
+kubectl apply -f https://raw.githubusercontent.com/GoogleCloudPlatform/k8s-stackdriver/master/custom-metrics-stackdriver-adapter/deploy/production/adapter_new_resource_model.yaml
+
+# create an ip for ingress traffic
+gcloud compute addresses create ingress-triton --global
+```
+
+Creating a cluster and adding GPU nodes could take up-to 10 minutes. Please be patient after executing this command. GPU resources in GCP could be fully utilized, so please try a different zone in case compute resource cannot be allocated. After GKE cluster is running, run `kubectl get pods --all-namespaces` to make sure the client can access the cluster correctly:
+
+If user would like to experiment with A100 MIG partitioned GPU in GKE, please create node pool with following command:
+```
+gcloud beta container node-pools create accel \
+  --project ${PROJECT_ID} \
+  --zone ${ZONE} \
+  --cluster ${DEPLOYMENT_NAME} \
+  --service-account=${SERVICE_ACCOUNT} \
+  --num-nodes 1 \
+  --accelerator type=nvidia-tesla-a100,count=1,gpu-partition-size=1g.5gb  \
+  --enable-autoscaling --min-nodes 1 --max-nodes 2 \
+  --machine-type=a2-highgpu-1g  \
+  --disk-size=100 \
+  --scopes cloud-platform \
+  --verbosity error
+```
+
+Please note that A100 MIG in GKE does not support GPU metrics yet, also Triton GPU Metrics is not compatible with A100 MIG. Hence, please disable GPU metrics by unselect allowGPUMetrics while deploy Triton GKE app. Also for the same reason, this deployer doesn't support inference workfload auto-scaling on A100 MIG as well.
+
+Second, go to this [GKE Marketplace link](https://console.cloud.google.com/marketplace/details/nvidia-ngc-public/triton-inference-server) to deploy Triton application.
+
+Users can leave everything as default if their models have already been tested/validated with Triton. They can provide a GCS path pointing to the model repository containing their models. By default, we provide a BERT large model optimized by TensorRT in a public demo GCS bucket that is compatible with the `xx.yy` release of Triton Server in `gs://triton_sample_models/xx_yy`. However, please take note of the following about this demo bucket:
+- The TensorRT engine provided in the demo bucket is only compatible with Tesla T4 GPUs.
+- This bucket is located in `us-central1`, so loading from this bucket into Triton in other regions may be affected.
+- The first deployment of this Triton GKE application will be slower than consecutive runs because the image needs to be pulled into the GKE cluster.
+- You can find an example of how this model is generated and uploaded [here](trt-engine/README.md).
+
+Where <xx.yy> is the version of NGC Triton container needed.
+
+![GKE Marketplace Application UI](ui.png)
+
+We want to discuss HPA autoscaling metrics users can leverage. GPU Power(Percentage of Power) tends to be a reliable metric, especially for larger GPU like V100 and A100. GKE currently natively support GPU duty cycle which is GPU utilization in `nvidia-smi`. We ask users always profile their model to determine the autoscaling target and metrics. When attempting to select the right metrics for autoscaling, the goal should be to pick metrics based on the following: 1, meet SLA rrequirement. 2, give consideration to transient request load, 3, keep GPU as fully utilized as possible. Profiling comes in 2 aspects: If user decided to use Duty Cycle or other GPU metric, it is recommend establish baseline to link SLA requirement such as latency with GPU metrics, for example, for model A, latency will be below 10ms 99% of time when Duty Cycle is below 80% utilized. Additionally, profiling also provide insight to model optimization for inference, with tools like [Nsight](https://developer.nvidia.com/nsight-systems).
+
+Once the application is deployed successfully, get the public ip from ingress:
+```
+> kubectl get ingress
+NAME              CLASS    HOSTS   ADDRESS          PORTS   AGE
+triton-external   <none>   *       35.186.215.182   80      107s
+```
+
+Third, we will try sending request to server with provide client example.
+
+If User selected deploy Triton to accept HTTP request, please launch [Locust](https://docs.locust.io/en/stable/installation.html) with Ingress host and port to query Triton Inference Server. In this [example script](https://github.com/triton-inference-server/server/tree/master/deploy/gke-marketplace-app/client-sample/locustfile_bert.py), we send request to Triton server which has loaded a BERT large TensorRT Engine with Sequence length of 128 into GCP bucket. We simulate 1000 concurrent user as target and spawn user at rate of 50 users per second.
+```
+locust -f locustfile_bert.py -H http://${INGRESS_HOST}:${INGRESS_PORT}
+```
+
+The client example push about ~650 QPS(Query per second) to Triton Server, and will trigger a auto scale of T4 GPU nodes (We recommend to use T4 and A100[MIG] for inference). From locust UI, we will observer a drop of latency mean and variance for the requests. At the end, after autoscaling, we see the latency stablized at ~200 ms, end to end from US client to europe server, which is excellent for a model that has 345 million parameters. Since for each node, we use 1T4 + n1-standard-4 instance, and it can handle ~450 QPS, with on-demand price, it is ($0.35+$0.19)=$0.54/hr, that translate to 3 million inference per dollar for BERT large model at batch size 1. Further more, with 3 year commitment price, hr rate is ($0.16+$0.08)=$0.24/hr, that translate to 6.75 million inference per dollar.
+
+![Locust Client Chart](client.png)
+
+Alternatively, user can opt to use
+[Perf Analyzer](https://github.com/triton-inference-server/client/blob/main/src/c++/perf_analyzer/README.md)
+to profile and study the performance of Triton Inference Server. Here we also
+provide a
+[client script](https://github.com/triton-inference-server/server/tree/master/deploy/gke-marketplace-app/client-sample/perf_analyzer_grpc.sh)
+to use Perf Analyzer to send gRPC to Triton Server GKE deployment. Perf Analyzer
+client requires user to use NGC Triton Client Container.
+
+```
+bash perf_analyzer_grpc.sh ${INGRESS_HOST}:${INGRESS_PORT}
+```
+
+## Additional Resources
+
+See the following resources to learn more about NVIDIA Triton Inference Server and GKE GPU capabilities.
+
+**Documentation**
+
+- [GPU in Google Kubernetes Engine](https://cloud.google.com/kubernetes-engine/docs/how-to/gpus)
+- [Optimize GPU Performance in Google Cloud Platform](https://cloud.google.com/compute/docs/gpus/optimize-gpus)
+- [Triton Inference Server](https://github.com/triton-inference-server/server)
+- [AI Platform Prediction: Custom container concepts with Triton Server](https://cloud.google.com/solutions/ai-platform-prediction-custom-container-concepts) by [Kevin Tsai](https://github.com/merlin1649)
+- [AI Platform Prediction: Direct model server setup for NVIDIA Triton Inference Server](https://cloud.google.com/solutions/ai-platform-prediction-direct-model-server-nvidia) by [Kevin Tsai](https://github.com/merlin1649)
+
+## Known Issues
+
+- GKE one click cluster creation doesn't support GPU node pools at the moment, users have to manually create a compatible (>=1.18.7) cluster and attach node pool (T4 and A100 MIG recommended)
+- When Horizontal Pod Autoscaler(HPA) expand and all GPU node pool already utilized, GKE will request new GPU node and it can take between 4-7 minutes, it could be a long wait plus GPU driver install and image pulling. We recommend user to leverage multi-tier model serving and Triton's priority feature to create cushion for latency critical models, and allocate active standby GPU node for spike of requests.
diff --git a/deploy/gke-marketplace-app/benchmark/README.md b/deploy/gke-marketplace-app/benchmark/README.md
new file mode 100644
index 0000000000..5138148035
--- /dev/null
+++ b/deploy/gke-marketplace-app/benchmark/README.md
@@ -0,0 +1,95 @@
+<!--
+# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Benchmarking with NVIDIA Triton Inference Server GKE Marketplace Application
+
+**Table Of Contents**
+- [Models](#models)
+- [Performance](#performance)
+
+## Models
+
+First, we collect a set of TensorFlow and TensorRT models to compare:
+
+- Get [Distill Bert fine-tuned with Squad Q&A task](https://huggingface.co/distilbert-base-cased-distilled-squad/tree/main) from Huggingface. `wget https://huggingface.co/distilbert-base-cased-distilled-squad/blob/main/saved_model.tar.gz`
+- Get [Bert base fine-tuned with Squad Q&A task](https://huggingface.co/deepset/bert-base-cased-squad2/tree/main) from Huggingface `wget https://huggingface.co/deepset/bert-base-cased-squad2/blob/main/saved_model.tar.gz`
+- Follow [TensorRT Demo Bert](https://github.com/NVIDIA/TensorRT/tree/master/demo/BERT) to convert BERT base model to TensorRT Engine, choose sequence length of 384 to match previous 2 TensorFlow models. Last step, we choose to create TensorRT engine with 2 optimization profile, profile 0 for batch size 1 and profile 1 for batch size 4 run: `python3 builder.py -m models/fine-tuned/bert_tf_ckpt_base_qa_squad2_amp_384_v19.03.1/model.ckpt -o engines/model.plan -b 8 -s 384 --fp16 --int8 --strict -c models/fine-tuned/bert_tf_ckpt_base_qa_squad2_amp_384_v19.03.1 --squad-json ./squad/train-v2.0.json -v models/fine-tuned/bert_tf_ckpt_base_qa_squad2_amp_384_v19.03.1/vocab.txt --calib-num 100 -iln -imh`. This needs to be ran on the inference GPU respectively (Engine optimized with A100 cannot be used for inference on T4).
+
+We the place the model into a GCS with following structure, `config.pbtxt` was provided.
+```
+    ├── bert_base_trt_gpu
+    │   ├── 1
+    │   │   └── model.plan
+    │   └── config.pbtxt
+    ├── bert_base_trt_gpu_seqlen128
+    │   ├── 1
+    │   │   └── model.plan
+    │   └── config.pbtxt
+    ├── bert_base_tf_gpu
+    │   ├── 1
+    │   │   └── model.savedmodel
+    │   └── config.pbtxt
+    ├── bert_base_tf_cpu
+    │   ├── 1
+    │   │   └── model.savedmodel
+    │   └── config.pbtxt
+    ├── bert_distill_tf_gpu
+    │   ├── 1
+    │   │   └── model.savedmodel
+    │   └── config.pbtxt
+    └── bert_distill_tf_cpu
+        ├── 1
+        │   └── model.savedmodel
+        └── config.pbtxt
+```
+
+When deploy Triton GKE application, point the model repository to directory contains the structure above with actual models.
+
+## Performance
+
+We use perf analyzer of Triton to benchmark the performance of each model, the perf analyzer reside in another pod of the GKE cluster.
+```bash
+export INGRESS_HOST=$(kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.status.loadBalancer.ingress[0].ip}')
+export INGRESS_PORT=$(kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.spec.ports[?(@.name=="http2")].port}')
+bash perf_query.sh 35.194.5.119:80 bert_base_trt_gpu 384
+```
+
+We deploy model on n1-standard-96 for CPU BERT BASE and Distill BERT and (n1-standard-4 + T4) for GPU BERT models, the sequence length  of the BERT model is 384 token, and measure the latency/throughput with a concurrency sweep with Triton's performance analyzer. The latency includes Istio ingress/load balancing and reflect the true round trip cost in the same GCP zone.
+
+For all the model with sequence length of 384:
+CPU BERT BASE: latency: 700ms, throughput: 12 qps
+CPU Distill BERT: latency: 369ms, throughput: 24 qps
+
+GPU BERT BASE: latency: 230ms, throughput: 34.7 qps
+GPU Distill BERT: latency: 118ms, throughput: 73.3 qps
+GPU TensorRT BERT BASE: latency: 50ms, throughput: 465 qps
+
+With n1-standard-96 priced at $4.56/hr and n1-standard-4 at $0.19/hr and T4 at $0.35/hr totaling $0.54/hr. While achieving a much lower latency, the TCO of BERT inference with TensorRT on T4 is over 163 times that of Distill BERT inference on n1-standard-96.
+
+
+
diff --git a/deploy/gke-marketplace-app/benchmark/model-store/bert_base_tf_cpu/config.pbtxt b/deploy/gke-marketplace-app/benchmark/model-store/bert_base_tf_cpu/config.pbtxt
new file mode 100644
index 0000000000..3bfccb5c45
--- /dev/null
+++ b/deploy/gke-marketplace-app/benchmark/model-store/bert_base_tf_cpu/config.pbtxt
@@ -0,0 +1,35 @@
+# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+max_batch_size: 4
+dynamic_batching {
+   preferred_batch_size: 1
+   max_queue_delay_microseconds: 2000000
+}
+instance_group {
+   count: 2
+   kind: KIND_CPU
+}
diff --git a/deploy/gke-marketplace-app/benchmark/model-store/bert_base_tf_gpu/config.pbtxt b/deploy/gke-marketplace-app/benchmark/model-store/bert_base_tf_gpu/config.pbtxt
new file mode 100644
index 0000000000..b6ca32f9a2
--- /dev/null
+++ b/deploy/gke-marketplace-app/benchmark/model-store/bert_base_tf_gpu/config.pbtxt
@@ -0,0 +1,35 @@
+# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+max_batch_size: 4
+dynamic_batching {
+   preferred_batch_size: 4
+   max_queue_delay_microseconds: 200000
+}
+instance_group {
+   count: 2
+   kind: KIND_GPU
+}
diff --git a/deploy/gke-marketplace-app/benchmark/model-store/bert_base_trt_gpu/config.pbtxt b/deploy/gke-marketplace-app/benchmark/model-store/bert_base_trt_gpu/config.pbtxt
new file mode 100644
index 0000000000..acbd124bf2
--- /dev/null
+++ b/deploy/gke-marketplace-app/benchmark/model-store/bert_base_trt_gpu/config.pbtxt
@@ -0,0 +1,38 @@
+# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+platform: "tensorrt_plan"
+max_batch_size: 4
+dynamic_batching {
+   preferred_batch_size: 4
+   max_queue_delay_microseconds: 200000
+}
+instance_group {
+   count: 2
+   profile: "1"
+   kind: KIND_GPU
+}
+
diff --git a/deploy/gke-marketplace-app/benchmark/model-store/bert_base_trt_gpu_seqlen128/config.pbtxt b/deploy/gke-marketplace-app/benchmark/model-store/bert_base_trt_gpu_seqlen128/config.pbtxt
new file mode 100644
index 0000000000..2ee39e7dbc
--- /dev/null
+++ b/deploy/gke-marketplace-app/benchmark/model-store/bert_base_trt_gpu_seqlen128/config.pbtxt
@@ -0,0 +1,37 @@
+# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+platform: "tensorrt_plan"
+max_batch_size: 8
+dynamic_batching {
+   preferred_batch_size: 8
+   max_queue_delay_microseconds: 200000
+}
+instance_group {
+   count: 2
+   kind: KIND_GPU
+}
+
diff --git a/deploy/gke-marketplace-app/benchmark/model-store/bert_distill_tf_cpu/config.pbtxt b/deploy/gke-marketplace-app/benchmark/model-store/bert_distill_tf_cpu/config.pbtxt
new file mode 100644
index 0000000000..c8e8074309
--- /dev/null
+++ b/deploy/gke-marketplace-app/benchmark/model-store/bert_distill_tf_cpu/config.pbtxt
@@ -0,0 +1,35 @@
+# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+max_batch_size: 4
+dynamic_batching {
+   preferred_batch_size: 1
+   max_queue_delay_microseconds: 2000000
+}
+instance_group {
+   count: 2
+   kind: KIND_CPU
+}
diff --git a/deploy/gke-marketplace-app/benchmark/model-store/bert_distill_tf_gpu/config.pbtxt b/deploy/gke-marketplace-app/benchmark/model-store/bert_distill_tf_gpu/config.pbtxt
new file mode 100644
index 0000000000..b6ca32f9a2
--- /dev/null
+++ b/deploy/gke-marketplace-app/benchmark/model-store/bert_distill_tf_gpu/config.pbtxt
@@ -0,0 +1,35 @@
+# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+max_batch_size: 4
+dynamic_batching {
+   preferred_batch_size: 4
+   max_queue_delay_microseconds: 200000
+}
+instance_group {
+   count: 2
+   kind: KIND_GPU
+}
diff --git a/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/perf_query.sh b/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/perf_query.sh
new file mode 100755
index 0000000000..0ce6e120b7
--- /dev/null
+++ b/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/perf_query.sh
@@ -0,0 +1,58 @@
+#!/usr/bin/env bash
+# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+SERVER_HOST=${1:-"${INGRESS_HOST}:${INGRESS_PORT}"} # need update public IP
+MODEL_NAME=${2:-"${MODEL_NAME}"}
+SEQ_LENGTH=${3:-"${SEQ_LEN}"}
+BATCH_SIZE=${4:-2}
+MAX_LATENCY=${5:-5000}
+MAX_CLIENT_THREADS=${6:-20}
+MAX_CONCURRENCY=${7:-24}
+MODEL_VERSION=${8:-1}
+precision=${9:-"fp32"}
+PERFCLIENT_PERCENTILE=${10:-90}
+MAX_TRIALS=${12:-40}
+
+ARGS="\
+   --max-threads ${MAX_CLIENT_THREADS} \
+   -m ${MODEL_NAME} \
+   -x ${MODEL_VERSION} \
+   -p 3000 \
+   --async \
+   --concurrency-range 4:${MAX_CONCURRENCY}:2 \
+   -r ${MAX_TRIALS} \
+   -v \
+   -i HTTP \
+   -u ${SERVER_HOST} \
+   -b ${BATCH_SIZE} \
+   -l ${MAX_LATENCY} \
+   -z \
+   --percentile=${PERFCLIENT_PERCENTILE}"
+
+echo "Using args:  $(echo "$ARGS" | sed -e 's/   -/\n-/g')"
+
+/workspace/install/bin/perf_client $ARGS -f perf.csv
\ No newline at end of file
diff --git a/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml b/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml
new file mode 100644
index 0000000000..7339361528
--- /dev/null
+++ b/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml
@@ -0,0 +1,42 @@
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+apiVersion: v1
+kind: Pod
+metadata:
+  labels:
+    app: nv-triton-client
+  name: nv-triton-client
+  namespace: default
+spec:
+  containers:
+  - image: nvcr.io/nvidia/tritonserver:24.03-py3-sdk
+    imagePullPolicy: Always
+    name: nv-triton-client
+    securityContext:
+      privileged: true
+    command: [ "/bin/bash", "-c", "--" ]
+    args: [ "while true; do sleep 30; done;" ]
diff --git a/deploy/gke-marketplace-app/client-sample/bert_request.json b/deploy/gke-marketplace-app/client-sample/bert_request.json
new file mode 100644
index 0000000000..ce4b956db6
--- /dev/null
+++ b/deploy/gke-marketplace-app/client-sample/bert_request.json
@@ -0,0 +1,27 @@
+{
+  "inputs": [{
+    "name": "input_ids",
+    "shape": [1, 128],
+    "datatype": "INT32",
+    "parameters": {},
+    "data": [101, 2054, 2003, 23435, 5339, 1029, 102, 23435, 5339, 2003, 1037, 2152, 2836, 2784, 4083, 28937, 4132, 2008, 18058, 2659, 2397, 9407, 1998, 2152, 2083, 18780, 2005, 18726, 2107, 2004, 16755, 2545, 1010, 4613, 1998, 3746, 1013, 2678, 2006, 1050, 17258, 2401, 14246, 2271, 1012, 2009, 2950, 11968, 8043, 2015, 2000, 12324, 4275, 1010, 1998, 13354, 7076, 2000, 2490, 3117, 23092, 1998, 9014, 2077, 11243, 20600, 2015, 2005, 28937, 1012, 2651, 1050, 17258, 2401, 2003, 2330, 1011, 14768, 6129, 11968, 8043, 2015, 1998, 13354, 7076, 1999, 23435, 5339, 2061, 2008, 1996, 2784, 4083, 2451, 2064, 7661, 4697, 1998, 7949, 2122, 6177, 2000, 2202, 5056, 1997, 3928, 23435, 5339, 20600, 2015, 2005, 2115, 18726, 1012, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+  }, {
+    "name": "input_mask",
+    "shape": [1, 128],
+    "datatype": "INT32",
+    "parameters": {},
+    "data": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+  }, {
+    "name": "segment_ids",
+    "shape": [1, 128],
+    "datatype": "INT32",
+    "parameters": {},
+    "data": [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+  }],
+  "outputs": [{
+    "name": "cls_squad_logits",
+    "parameters": {
+      "binary_data": false
+    }
+  }]
+}
diff --git a/deploy/gke-marketplace-app/client-sample/locustfile_bert.py b/deploy/gke-marketplace-app/client-sample/locustfile_bert.py
new file mode 100755
index 0000000000..aae8c69f43
--- /dev/null
+++ b/deploy/gke-marketplace-app/client-sample/locustfile_bert.py
@@ -0,0 +1,71 @@
+#!/usr/bin/env python3
+
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+
+from locust import HttpUser, LoadTestShape, between, task
+
+
+class ProfileLoad(LoadTestShape):
+    """
+    This load profile starts at 0 and steps up by step_users
+    increments every tick, up to target_users.  After reaching
+    target_user level, load will stay at target_user level
+    until time_limit is reached.
+    """
+
+    target_users = 1000
+    step_users = 50  # ramp users each step
+    time_limit = 3600  # seconds
+
+    def tick(self):
+        num_steps = self.target_users / self.step_users
+        run_time = round(self.get_run_time())
+
+        if run_time < self.time_limit:
+            if num_steps < run_time:
+                user_count = num_steps * self.step_users
+            else:
+                user_count = self.target_users
+            return (user_count, self.step_users)
+        else:
+            return None
+
+
+class TritonUser(HttpUser):
+    wait_time = between(0.2, 0.2)
+
+    @task()
+    def bert(self):
+        response = self.client.post(self.url1, data=json.dumps(self.data))
+
+    def on_start(self):
+        with open("bert_request.json") as f:
+            self.data = json.load(f)
+
+        self.url1 = "{}/v2/models/{}/infer".format(self.environment.host, "bert")
diff --git a/deploy/gke-marketplace-app/client-sample/perf_analyzer_grpc.sh b/deploy/gke-marketplace-app/client-sample/perf_analyzer_grpc.sh
new file mode 100755
index 0000000000..ae5476f338
--- /dev/null
+++ b/deploy/gke-marketplace-app/client-sample/perf_analyzer_grpc.sh
@@ -0,0 +1,62 @@
+#!/usr/bin/env bash
+# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+SERVER_HOST=${1:-"${INGRESS_HOST}:${INGRESS_PORT}"} # need update public IP
+MODEL_VERSION=${2:-1}
+precision=${3:-"int8"}
+BATCH_SIZE=${4:-1}
+MAX_LATENCY=${5:-500}
+MAX_CLIENT_THREADS=${6:-6}
+MAX_CONCURRENCY=${7:-20}
+MODEL_NAME=${8:-"bert"}
+SEQ_LENGTH=${9:-"128"}
+PERFCLIENT_PERCENTILE=${10:-90}
+STABILITY_PERCENTAGE=${11:-0.01}
+MAX_TRIALS=${12:-1000000}
+
+ARGS="\
+   --max-threads ${MAX_CLIENT_THREADS} \
+   -m ${MODEL_NAME} \
+   -x ${MODEL_VERSION} \
+   -p 1000 \
+   -t ${MAX_CONCURRENCY} \
+   -s ${STABILITY_PERCENTAGE} \
+   -r ${MAX_TRIALS} \
+   -v \
+   -i gRPC \
+   -u ${SERVER_HOST} \
+   -b ${BATCH_SIZE} \
+   -l ${MAX_LATENCY} \
+   -z \
+   --shape=input_ids:${SEQ_LENGTH} \
+   --shape=segment_ids:${SEQ_LENGTH} \
+   --shape=input_mask:${SEQ_LENGTH} \
+   --percentile=${PERFCLIENT_PERCENTILE}"
+
+echo "Using args:  $(echo "$ARGS" | sed -e 's/   -/\n-/g')"
+
+/workspace/install/bin/perf_client $ARGS
diff --git a/deploy/gke-marketplace-app/client.png b/deploy/gke-marketplace-app/client.png
new file mode 100644
index 0000000000..1fe3dbe7d5
Binary files /dev/null and b/deploy/gke-marketplace-app/client.png differ
diff --git a/deploy/gke-marketplace-app/diagram.png b/deploy/gke-marketplace-app/diagram.png
new file mode 100644
index 0000000000..7592672e94
Binary files /dev/null and b/deploy/gke-marketplace-app/diagram.png differ
diff --git a/deploy/gke-marketplace-app/server-deployer/Dockerfile b/deploy/gke-marketplace-app/server-deployer/Dockerfile
new file mode 100644
index 0000000000..5bb34adc65
--- /dev/null
+++ b/deploy/gke-marketplace-app/server-deployer/Dockerfile
@@ -0,0 +1,28 @@
+# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+FROM gcr.io/cloud-marketplace-tools/k8s/deployer_helm/onbuild
+
diff --git a/deploy/gke-marketplace-app/server-deployer/build_and_push.sh b/deploy/gke-marketplace-app/server-deployer/build_and_push.sh
new file mode 100755
index 0000000000..8114dbe6f8
--- /dev/null
+++ b/deploy/gke-marketplace-app/server-deployer/build_and_push.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+export REGISTRY=gcr.io/$(gcloud config get-value project | tr ':' '/')
+export APP_NAME=tritonserver
+export MAJOR_VERSION=2.41
+export MINOR_VERSION=2.44.0
+export NGC_VERSION=24.03-py3
+
+docker pull nvcr.io/nvidia/$APP_NAME:$NGC_VERSION
+
+docker tag nvcr.io/nvidia/$APP_NAME:$NGC_VERSION $REGISTRY/$APP_NAME:$MAJOR_VERSION
+docker tag nvcr.io/nvidia/$APP_NAME:$NGC_VERSION $REGISTRY/$APP_NAME:$MINOR_VERSION
+docker tag nvcr.io/nvidia/$APP_NAME:$NGC_VERSION $REGISTRY/$APP_NAME:$NGC_VERSION
+
+docker push $REGISTRY/$APP_NAME:$MINOR_VERSION
+docker push $REGISTRY/$APP_NAME:$MAJOR_VERSION
+docker push $REGISTRY/$APP_NAME:$NGC_VERSION
+
+docker build --tag $REGISTRY/$APP_NAME/deployer .
+
+docker tag $REGISTRY/$APP_NAME/deployer $REGISTRY/$APP_NAME/deployer:$MAJOR_VERSION
+docker tag $REGISTRY/$APP_NAME/deployer $REGISTRY/$APP_NAME/deployer:$MINOR_VERSION
+docker push $REGISTRY/$APP_NAME/deployer:$MAJOR_VERSION
+docker push $REGISTRY/$APP_NAME/deployer:$MINOR_VERSION
diff --git a/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml b/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml
new file mode 100644
index 0000000000..73590f2ea0
--- /dev/null
+++ b/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml
@@ -0,0 +1,31 @@
+# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+apiVersion: v1
+appVersion: "2.41"
+description: Triton Inference Server
+name: triton-inference-server
+version: 2.44.0
diff --git a/deploy/gke-marketplace-app/server-deployer/chart/triton/logo.png b/deploy/gke-marketplace-app/server-deployer/chart/triton/logo.png
new file mode 100644
index 0000000000..9c70ab77fb
Binary files /dev/null and b/deploy/gke-marketplace-app/server-deployer/chart/triton/logo.png differ
diff --git a/deploy/gke-marketplace-app/server-deployer/chart/triton/templates/_helpers.tpl b/deploy/gke-marketplace-app/server-deployer/chart/triton/templates/_helpers.tpl
new file mode 100644
index 0000000000..cd4ef9264a
--- /dev/null
+++ b/deploy/gke-marketplace-app/server-deployer/chart/triton/templates/_helpers.tpl
@@ -0,0 +1,60 @@
+{{/*
+# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/}}
+
+{{/* vim: set filetype=mustache: */}}
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "triton-inference-server.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "triton-inference-server.fullname" -}}
+{{- if .Values.fullnameOverride -}}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- $name := default .Chart.Name .Values.nameOverride -}}
+{{- if contains $name .Release.Name -}}
+{{- .Release.Name | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+{{- end -}}
+{{- end -}}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "triton-inference-server.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
diff --git a/deploy/gke-marketplace-app/server-deployer/chart/triton/templates/application.yaml b/deploy/gke-marketplace-app/server-deployer/chart/triton/templates/application.yaml
new file mode 100644
index 0000000000..28bfbf08c4
--- /dev/null
+++ b/deploy/gke-marketplace-app/server-deployer/chart/triton/templates/application.yaml
@@ -0,0 +1,68 @@
+# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+{{ if and .Values.gcpMarketplace (eq .Values.gcpMarketplace true) }}
+---
+apiVersion: app.k8s.io/v1beta1
+kind: Application
+metadata:
+  name: "{{ .Release.Name }}"
+  annotations:
+    kubernetes-engine.cloud.google.com/icon: >-
+      data:image/png;base64,{{ .Files.Get "logo.png" | b64enc }}
+    marketplace.cloud.google.com/deploy-info: '{"partner_id": "nvidia", "product_id": "triton", "partner_name": "NVIDIA"}'
+  labels:
+    app.kubernetes.io/name: "{{ .Release.Name }}"
+spec:
+  descriptor:
+    type: Triton
+    version: "{{ .Values.publishedVersion }}"
+    description: |-
+      Triton Inference Server provides a cloud and edge inferencing solution
+      optimized for both CPUs and GPUs. Triton supports an HTTP/REST and GRPC
+      protocol that allows remote clients to request inferencing for any model
+      being managed by the server.
+
+    notes: |-
+
+      Send request to Triton server by using IP address "ingress-triton",
+      send to IP:80/v2/models/{}/infer
+
+      Links:
+      - [NVIDIA Triton page](https://developer.nvidia.com/nvidia-triton-inference-server)
+      - [Documentation](https://github.com/triton-inference-server/server)
+
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: "{{ .Release.Name }}"
+  componentKinds:
+  - group: apps/v1
+    kind: Deployment
+  - group: v1
+    kind: Service
+  - group: autoscaling/v2
+    kind: HorizontalPodAutoscaler
+{{  end }}
diff --git a/deploy/gke-marketplace-app/server-deployer/chart/triton/templates/deployment.yaml b/deploy/gke-marketplace-app/server-deployer/chart/triton/templates/deployment.yaml
new file mode 100644
index 0000000000..75ac1aee81
--- /dev/null
+++ b/deploy/gke-marketplace-app/server-deployer/chart/triton/templates/deployment.yaml
@@ -0,0 +1,93 @@
+# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ template "triton-inference-server.name" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    app: {{ template "triton-inference-server.name" . }}
+    chart: {{ template "triton-inference-server.chart" . }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+spec:
+  replicas: {{ .Values.initReplicaCount }}
+  selector:
+    matchLabels:
+      app: {{ template "triton-inference-server.name" . }}
+      release: {{ .Release.Name }}
+  template:
+    metadata:
+      labels:
+        app: {{ template "triton-inference-server.name" . }}
+        release: {{ .Release.Name }}
+
+    spec:
+      containers:
+        - name: {{ .Chart.Name }}
+          image: "{{ .Values.image.registry }}/{{ .Values.image.repository }}:{{ .Values.image.tag }}"
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+
+          resources:
+            limits:
+              nvidia.com/gpu: {{ .Values.image.numGpus }}
+          env:
+            - name: LD_PRELOAD
+              value: {{ .Values.image.ldPreloadPath }}
+          args: ["tritonserver", "--model-store={{ .Values.modelRepositoryPath }}",
+                 "--strict-model-config={{ .Values.image.strictModelConfig }}",
+                 "--log-verbose={{ .Values.image.logVerboseLevel }}",
+                 "--allow-gpu-metrics={{ .Values.image.allowGPUMetrics }}"]
+
+          ports:
+            - containerPort: 8000
+              name: http
+            - containerPort: 8001
+              name: grpc
+            - containerPort: 8002
+              name: metrics
+          livenessProbe:
+            httpGet:
+              path: /v2/health/live
+              port: http
+            initialDelaySeconds: {{ .Values.deployment.livenessProbe.initialDelaySeconds }}
+            periodSeconds: {{ .Values.deployment.livenessProbe.periodSeconds }}
+            timeoutSeconds: {{ .Values.deployment.livenessProbe.timeoutSeconds }}
+            successThreshold: {{ .Values.deployment.livenessProbe.successThreshold }}
+            failureThreshold: {{ .Values.deployment.livenessProbe.failureThreshold }}
+          readinessProbe:
+            httpGet:
+              path: /v2/health/ready
+              port: http
+            initialDelaySeconds: {{ .Values.deployment.readinessProbe.initialDelaySeconds }}
+            periodSeconds: {{ .Values.deployment.readinessProbe.periodSeconds }}
+            timeoutSeconds: {{ .Values.deployment.readinessProbe.timeoutSeconds }}
+            successThreshold: {{ .Values.deployment.readinessProbe.successThreshold }}
+            failureThreshold: {{ .Values.deployment.readinessProbe.failureThreshold }}
+
+          securityContext:
+            runAsUser: 1000
diff --git a/deploy/gke-marketplace-app/server-deployer/chart/triton/templates/hpa.yaml b/deploy/gke-marketplace-app/server-deployer/chart/triton/templates/hpa.yaml
new file mode 100644
index 0000000000..89275ea7de
--- /dev/null
+++ b/deploy/gke-marketplace-app/server-deployer/chart/triton/templates/hpa.yaml
@@ -0,0 +1,49 @@
+# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: triton-hpa
+  namespace: {{ .Release.Namespace }}
+  labels:
+    app: triton-hpa
+spec:
+  minReplicas: {{ .Values.minReplicaCount }}
+  maxReplicas: {{ .Values.maxReplicaCount }}
+  metrics:
+  - type: External
+    external:
+      metric:
+         name: kubernetes.io|container|accelerator|duty_cycle
+      target:
+         type: AverageValue
+         averageValue: {{ .Values.HPATargetAverageValue }}
+
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: {{ template "triton-inference-server.name" . }}
diff --git a/deploy/gke-marketplace-app/server-deployer/chart/triton/templates/ingress.yaml b/deploy/gke-marketplace-app/server-deployer/chart/triton/templates/ingress.yaml
new file mode 100644
index 0000000000..2b6da5fe18
--- /dev/null
+++ b/deploy/gke-marketplace-app/server-deployer/chart/triton/templates/ingress.yaml
@@ -0,0 +1,48 @@
+# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: triton-external
+  annotations:
+    kubernetes.io/ingress.class: "gce"
+    kubernetes.io/ingress.global-static-ip-name: "ingress-triton"
+spec:
+  rules:
+  - http:
+      paths:
+      - path: "/"
+        pathType: Prefix
+        backend:
+          service:
+            name: triton-inference-server
+            port:
+              {{ if eq .Values.tritonProtocol "gRPC" }}
+              number: 8001
+              {{ else }}
+              number: 8000
+              {{ end }}
diff --git a/deploy/gke-marketplace-app/server-deployer/chart/triton/templates/service.yaml b/deploy/gke-marketplace-app/server-deployer/chart/triton/templates/service.yaml
new file mode 100644
index 0000000000..93ef6f9da3
--- /dev/null
+++ b/deploy/gke-marketplace-app/server-deployer/chart/triton/templates/service.yaml
@@ -0,0 +1,55 @@
+# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ template "triton-inference-server.name" . }}
+  namespace: {{ .Release.Namespace }}
+  annotations:
+    cloud.google.com/neg: '{"ingress": true}'
+  labels:
+    app: {{ template "triton-inference-server.name" . }}
+    chart: {{ template "triton-inference-server.chart" . }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: 8000
+      targetPort: http
+      name: http-inference-server
+    - port: 8001
+      targetPort: grpc
+      name: grpc-inference-server
+    - port: 8002
+      targetPort: metrics
+      name: metrics-inference-server
+  selector:
+    app: {{ template "triton-inference-server.name" . }}
+    release: {{ .Release.Name }}
+
+
diff --git a/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml b/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml
new file mode 100644
index 0000000000..3e5eac70b5
--- /dev/null
+++ b/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml
@@ -0,0 +1,66 @@
+# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+initReplicaCount: 1
+minReplicaCount: 1
+maxReplicaCount: 3
+# choice from gRPC and HTTP
+tritonProtocol: HTTP
+# HPA GPU utilization autoscaling target
+HPATargetAverageValue: 85
+modelRepositoryPath: gs://triton_sample_models/24_03
+publishedVersion: '2.44.0'
+gcpMarketplace: true
+
+image:
+  registry: gcr.io
+  repository: nvidia-ngc-public/tritonserver
+  tag: 24.03-py3
+  pullPolicy: IfNotPresent
+  # modify the model repository here to match your GCP storage bucket
+  numGpus: 1
+  strictModelConfig: False
+  # add in custom library which could include custom ops in the model
+  ldPreloadPath: ''
+  logVerboseLevel: 0
+  allowGPUMetrics: True
+
+service:
+  type: NodePort
+
+deployment:
+  livenessProbe:
+    failureThreshold: 60
+    initialDelaySeconds: 10
+    periodSeconds: 5
+    successThreshold: 1
+    timeoutSeconds: 1
+  readinessProbe:
+    failureThreshold: 60
+    initialDelaySeconds: 10
+    periodSeconds: 5
+    successThreshold: 1
+    timeoutSeconds: 1
diff --git a/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml b/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml
new file mode 100644
index 0000000000..9fd8cbe1c4
--- /dev/null
+++ b/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml
@@ -0,0 +1,123 @@
+# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+x-google-marketplace:
+  schemaVersion: v2
+  applicationApiVersion: v1beta1
+  publishedVersion: '2.44.0'
+  publishedVersionMetadata:
+    releaseNote: >-
+      Initial release.
+    releaseTypes:
+    - Feature
+    recommended: true
+
+  clusterConstraints:
+    k8sVersion: ">=1.18.7"
+    assistedClusterCreation:
+      type: DISABLED
+      creationGuidance: GKE currently doesn't support auto-create GPU clusters, please refer to <a href="https://github.com/triton-inference-server/server/tree/master/deploy/gke-marketplace-app">Triton GKE Marketplace Deployer</a> to manually create the GKE cluster >= 1.18.7 and add GPU node pools
+    resources:
+    - requests:
+        gpu:
+          nvidia.com/gpu: {}
+    istio:
+      type: REQUIRED
+
+  images:
+    '':
+      properties:
+        triton.image.registry:
+          type: REGISTRY
+        triton.image.repository:
+            type: REPO_WITHOUT_REGISTRY
+        triton.image.tag:
+            type: TAG
+
+properties:
+  name:
+    type: string
+    x-google-marketplace:
+      type: NAME
+  namespace:
+    type: string
+    x-google-marketplace:
+      type: NAMESPACE
+  initReplicaCount:
+    title: Initial number of Triton pod instances to deploy.
+    type: integer
+    default: 1
+  minReplicaCount:
+    title: Minimum number of Triton pod instances in the deployment for autoscaling.
+    type: integer
+    default: 1
+  maxReplicaCount:
+    title: Maximum number of Triton pod instances in the deployment for autoscaling.
+    type: integer
+    default: 3
+  tritonProtocol:
+    title: Request protocol to send data to Triton, choose from gRPC and HTTP.
+    type: string
+    default: HTTP
+  HPATargetAverageValue:
+    title: HPA autoscaling target, GKE currently support Duty Cycle which is GPU utilization, when target is reached, Triton Server service will create another pod instance. We ask user to analyze model inference to associate appropriate GPU metric target based on latency requirement. We also recommend to leave some room to mitigate transient load effect. For user interested in customizing autoscaling metrics, we recommends GPU Power (Percentage of Power), Queue time or SLA measurements such as latency.
+    type: integer
+    default: 85
+  modelRepositoryPath:
+    type: string
+    title: Bucket where models are stored. Please make sure the user/service account to create the GKE app has permission to this GCS bucket. Read Triton documentation on configs and formatting details, supporting TensorRT, TensorFlow, Pytorch, Onnx ... etc.
+    default: gs://triton_sample_models/models
+  image.ldPreloadPath:
+    type: string
+    title: Leave this empty by default. Triton allows users to create custom layers for backend such as TensorRT plugin or Tensorflow custom ops, the compiled shared library must be provided via LD_PRELOAD environment variable.
+    default: ''
+  image.logVerboseLevel:
+    type: integer
+    title: Set verbose logging level. Zero (0) disables verbose logging and values >= 1 enable verbose logging, this is helpful when user unsure if the model is compatible with Triton or for general debug.
+    default: 0
+  image.strictModelConfig:
+    type: boolean
+    title: Leave this unchecked by default. When strictModelConfig is not checked(False), Triton will try to infer the config file from model file, when checked(True), user need to provide config.pbtxt in model repository.
+    default: False
+  image.allowGPUMetrics:
+    type: boolean
+    title: Select by default. When use A100 MIG, unselect to disable GPU Memory metrics reported by Triton, as current GPU metrics not support on A100 MIG.
+    default: True
+  istioEnabled:
+    type: boolean
+    x-google-marketplace:
+      type: ISTIO_ENABLED
+    default: True
+
+
+required:
+- name
+- namespace
+- modelRepositoryPath
+
+form:
+- widget: help
+  description: GKE currently doesn't support autocreate GPU clusters, please refer to <a href="https://github.com/triton-inference-server/server/tree/master/deploy/gke-marketplace-app">Triton GKE Marketplace Deployer</a> to manually create the GKE cluster >= 1.18.7 and add GPU node pools. Also, please refer to the <a href="https://github.com/triton-inference-server/server">Triton GITHUB page</a> for product information.
diff --git a/deploy/gke-marketplace-app/server-deployer/schema.yaml b/deploy/gke-marketplace-app/server-deployer/schema.yaml
new file mode 100644
index 0000000000..0efdef3e72
--- /dev/null
+++ b/deploy/gke-marketplace-app/server-deployer/schema.yaml
@@ -0,0 +1,123 @@
+# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+x-google-marketplace:
+  schemaVersion: v2
+  applicationApiVersion: v1beta1
+  publishedVersion: '2.44.0'
+  publishedVersionMetadata:
+    releaseNote: >-
+      Initial release.
+    releaseTypes:
+    - Feature
+    recommended: true
+
+  clusterConstraints:
+    k8sVersion: ">=1.18.7"
+    assistedClusterCreation:
+      type: DISABLED
+      creationGuidance: GKE currently doesn't support auto-create GPU clusters, please refer to <a href="https://github.com/triton-inference-server/server/tree/master/deploy/gke-marketplace-app">Triton GKE Marketplace Deployer</a> to manually create the GKE cluster >= 1.18.7 and add GPU node pools
+    resources:
+    - requests:
+        gpu:
+          nvidia.com/gpu: {}
+    istio:
+      type: REQUIRED
+
+  images:
+    '':
+      properties:
+        triton.image.registry:
+          type: REGISTRY
+        triton.image.repository:
+            type: REPO_WITHOUT_REGISTRY
+        triton.image.tag:
+            type: TAG
+
+properties:
+  name:
+    type: string
+    x-google-marketplace:
+      type: NAME
+  namespace:
+    type: string
+    x-google-marketplace:
+      type: NAMESPACE
+  initReplicaCount:
+    title: Initial number of Triton pod instances to deploy.
+    type: integer
+    default: 1
+  minReplicaCount:
+    title: Minimum number of Triton pod instances in the deployment for autoscaling.
+    type: integer
+    default: 1
+  maxReplicaCount:
+    title: Maximum number of Triton pod instances in the deployment for autoscaling.
+    type: integer
+    default: 3
+  tritonProtocol:
+    title: Request protocol to send data to Triton, choose from gRPC and HTTP.
+    type: string
+    default: HTTP
+  HPATargetAverageValue:
+    title: HPA autoscaling target, GKE currently support Duty Cycle which is GPU utilization, when target is reached, Triton Server service will create another pod instance. We ask user to analyze model inference to associate appropriate GPU metric target based on latency requirement. We also recommend to leave some room to mitigate transient load effect. For user interested in customizing autoscaling metrics, we recommends GPU Power (Percentage of Power), Queue time or SLA measurements such as latency.
+    type: integer
+    default: 85
+  modelRepositoryPath:
+    type: string
+    title: Bucket where models are stored. Please make sure the user/service account to create the GKE app has permission to this GCS bucket. Read Triton documentation on configs and formatting details, supporting TensorRT, TensorFlow, Pytorch, Onnx ... etc.
+    default: gs://triton_sample_models/24_03
+  image.ldPreloadPath:
+    type: string
+    title: Leave this empty by default. Triton allows users to create custom layers for backend such as TensorRT plugin or Tensorflow custom ops, the compiled shared library must be provided via LD_PRELOAD environment variable.
+    default: ''
+  image.logVerboseLevel:
+    type: integer
+    title: Set verbose logging level. Zero (0) disables verbose logging and values >= 1 enable verbose logging, this is helpful when user unsure if the model is compatible with Triton or for general debug.
+    default: 0
+  image.strictModelConfig:
+    type: boolean
+    title: Leave this unchecked by default. When strictModelConfig is not checked(False), Triton will try to infer the config file from model file, when checked(True), user need to provide config.pbtxt in model repository.
+    default: False
+  image.allowGPUMetrics:
+    type: boolean
+    title: Select by default. When use A100 MIG, unselect to disable GPU Memory metrics reported by Triton, as current GPU metrics not support on A100 MIG.
+    default: True
+  istioEnabled:
+    type: boolean
+    x-google-marketplace:
+      type: ISTIO_ENABLED
+    default: True
+
+
+required:
+- name
+- namespace
+- modelRepositoryPath
+
+form:
+- widget: help
+  description: GKE currently doesn't support autocreate GPU clusters, please refer to <a href="https://github.com/triton-inference-server/server/tree/master/deploy/gke-marketplace-app">Triton GKE Marketplace Deployer</a> to manually create the GKE cluster >= 1.18.7 and add GPU node pools. Also, please refer to the <a href="https://github.com/triton-inference-server/server">Triton GITHUB page</a> for product information.
diff --git a/deploy/gke-marketplace-app/trt-engine/README.md b/deploy/gke-marketplace-app/trt-engine/README.md
new file mode 100644
index 0000000000..fd9ad2e0a5
--- /dev/null
+++ b/deploy/gke-marketplace-app/trt-engine/README.md
@@ -0,0 +1,63 @@
+<!--
+# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Instruction to create BERT engine for each Triton update
+
+## Description
+
+```
+docker run --gpus all -it --network host \
+    --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 \
+    -v ~:/scripts nvcr.io/nvidia/tensorrt:24.03-py3
+
+pip install onnx six torch tf2onnx tensorflow
+
+git clone -b main https://github.com/NVIDIA/TensorRT.git
+cd TensorRT
+git submodule update --init --recursive
+
+export TRT_OSSPATH=/workspace/TensorRT
+export TRT_LIBPATH=/lib/x86_64-linux-gnu
+
+pushd /usr/local/bin && wget https://ngc.nvidia.com/downloads/ngccli_cat_linux.zip && unzip ngccli_cat_linux.zip && chmod u+x ngc-cli/ngc && rm ngccli_cat_linux.zip ngc-cli.md5 && ln -s ngc-cli/ngc ngc && echo "no-apikey\nascii\n" | ngc config set
+
+popd
+
+cd /workspace/TensorRT/demo/BERT
+bash ./scripts/download_squad.sh
+bash ./scripts/download_model.sh large 128
+# bash ./scripts/download_model.sh large 384
+
+mkdir -p engines
+
+python3 builder.py -m models/fine-tuned/bert_tf_ckpt_large_qa_squad2_amp_128_v19.03.1/model.ckpt -o engines/bert_large_int8_bs1_s128.engine -b 1 -s 128 -c models/fine-tuned/bert_tf_ckpt_large_qa_squad2_amp_128_v19.03.1/ -v models/fine-tuned/bert_tf_ckpt_large_qa_squad2_amp_128_v19.03.1/vocab.txt --int8 --fp16 --strict --calib-num 1 -iln -imh
+
+gsutil cp bert_large_int8_bs1_s128.engine gs://triton_sample_models/24_03/bert/1/model.plan
+```
+
+For each Triton upgrade, container version used to generate the model, and the model path in GCS `gs://triton_sample_models/24_03/` should be updated accordingly with the correct version.
diff --git a/deploy/gke-marketplace-app/ui.png b/deploy/gke-marketplace-app/ui.png
new file mode 100644
index 0000000000..7afec326ee
Binary files /dev/null and b/deploy/gke-marketplace-app/ui.png differ
diff --git a/deploy/k8s-onprem/Chart.yaml b/deploy/k8s-onprem/Chart.yaml
new file mode 100644
index 0000000000..92830bc297
--- /dev/null
+++ b/deploy/k8s-onprem/Chart.yaml
@@ -0,0 +1,44 @@
+# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+apiVersion: v2
+appVersion: "1.0"
+description: Triton Inference Server
+name: triton-inference-server
+version: 1.0.0
+dependencies:
+  - name: traefik
+    version: "~10.6.2"
+    repository: "https://helm.traefik.io/traefik"
+    tags:
+      - loadBalancing
+  - name: prometheus-adapter
+    version: "~3.0.0"
+    repository: "https://prometheus-community.github.io/helm-charts"
+    tags:
+      - autoscaling
+
+
diff --git a/deploy/k8s-onprem/README.md b/deploy/k8s-onprem/README.md
new file mode 100644
index 0000000000..4287b23c35
--- /dev/null
+++ b/deploy/k8s-onprem/README.md
@@ -0,0 +1,329 @@
+<!--
+# Copyright (c) 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+[![License](https://img.shields.io/badge/License-BSD3-lightgrey.svg)](https://opensource.org/licenses/BSD-3-Clause)
+
+# Kubernetes Deploy: NVIDIA Triton Inference Server Cluster
+
+This repository includes a Helm chart and instructions for installing NVIDIA Triton
+Inference Server in an on-premises or AWS EC2 Kubernetes cluster. You can also use this
+repository to enable load balancing and autoscaling for your Triton cluster.
+
+This guide assumes you already have a functional Kubernetes cluster with support for GPUs.
+See the [NVIDIA GPU Operator documentation](https://docs.nvidia.com/datacenter/cloud-native/kubernetes/install-k8s.html)
+for instructions on how to install Kubernetes and enable GPU access in your Kubernetes cluster.
+You must also have Helm installed (see [Installing Helm](#installing-helm) for instructions). Note the following requirements:
+
+* To deploy Prometheus and Grafana to collect and display Triton metrics, your cluster must contain sufficient CPU resources to support these services.
+
+* To use GPUs for inferencing, your cluster must be configured to contain the desired number of GPU nodes, with
+support for the NVIDIA driver and CUDA version required by the version
+of the inference server you are using.
+
+* To enable autoscaling, your cluster's kube-apiserver must have the [aggregation layer
+enabled](https://kubernetes.io/docs/tasks/extend-kubernetes/configure-aggregation-layer/).
+This will allow the horizontal pod autoscaler to read custom metrics from the prometheus adapter.
+
+This Helm chart is available from [Triton Inference Server
+GitHub.](https://github.com/triton-inference-server/server)
+
+For more information on Helm and Helm charts, visit the [Helm documentation](https://helm.sh/docs/).
+
+## Quickstart
+
+First, clone this repository to a local machine. Then, execute the following commands:
+
+Install helm
+
+```
+$ curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3
+$ chmod 700 get_helm.sh
+$ ./get_helm.sh
+```
+
+Deploy Prometheus and Grafana
+
+```
+$ helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
+$ helm repo update
+$ helm install example-metrics --set prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues=false prometheus-community/kube-prometheus-stack
+```
+
+Deploy Triton with default settings
+
+```
+helm install example ./deploy/k8s-onprem
+```
+
+
+<!-- The steps below describe how to set-up a model repository, use Helm to
+launch the inference server, and then send inference requests to the
+running server. You can access a Grafana endpoint to see real-time
+metrics reported by the inference server. -->
+
+
+## Installing Helm
+
+### Helm v3
+
+If you do not already have Helm installed in your Kubernetes cluster,
+executing the following steps from the [official Helm install
+guide](https://helm.sh/docs/intro/install/) will
+give you a quick setup.
+
+If you are currently using Helm v2 and would like to migrate to Helm v3,
+see the [official migration guide](https://helm.sh/docs/topics/v2_v3_migration/).
+
+## Model Repository
+If you already have a model repository, you may use that with this Helm
+chart. If you do not have a model repository, you can check out a local
+copy of the server source repository to create an example
+model repository:
+
+```
+$ git clone https://github.com/triton-inference-server/server.git
+```
+
+Triton Server needs a repository of models that it will make available
+for inferencing. For this example, we are using an existing NFS server and
+placing our model files there. See the
+[Model Repository documentation](../../docs/user_guide/model_repository.md) for other
+supported locations.
+
+Following the [QuickStart](../../docs/getting_started/quickstart.md), download the
+example model repository to your system and copy it onto your NFS server.
+Then, add the url or IP address of your NFS server and the server path of your
+model repository to `values.yaml`.
+
+
+## Deploy Prometheus and Grafana
+
+The inference server metrics are collected by Prometheus and viewable
+through Grafana. The inference server Helm chart assumes that Prometheus
+and Grafana are available so this step must be followed even if you
+do not want to use Grafana.
+
+Use the [kube-prometheus-stack](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack) Helm chart to install these components. The
+*serviceMonitorSelectorNilUsesHelmValues* flag is needed so that
+Prometheus can find the inference server metrics in the *example*
+release deployed in a later section.
+
+```
+$ helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
+$ helm repo update
+$ helm install example-metrics --set prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues=false prometheus-community/kube-prometheus-stack
+```
+
+Then port-forward to the Grafana service so you can access it from
+your local browser.
+
+```
+$ kubectl port-forward service/example-metrics-grafana 8080:80
+```
+
+Now you should be able to navigate in your browser to localhost:8080
+and see the Grafana login page. Use username=admin and
+password=prom-operator to log in.
+
+An example Grafana dashboard is available in dashboard.json. Use the
+import function in Grafana to import and view this dashboard.
+
+## Enable Autoscaling
+To enable autoscaling, ensure that autoscaling tag in `values.yaml`is set to `true`.
+This will do two things:
+
+1. Deploy a Horizontal Pod Autoscaler that will scale replicas of the triton-inference-server
+based on the information included in `values.yaml`.
+
+2. Install the [prometheus-adapter](https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-adapter) helm chart, allowing the Horizontal Pod Autoscaler to scale
+based on custom metrics from prometheus.
+
+The included configuration will scale Triton pods based on the average queue time,
+as described in [this blog post](https://developer.nvidia.com/blog/deploying-nvidia-triton-at-scale-with-mig-and-kubernetes/#:~:text=Query%20NVIDIA%20Triton%20metrics%20using%20Prometheus). To customize this,
+you may replace or add to the list of custom rules in `values.yaml`. If you change
+the custom metric, be sure to change the values in autoscaling.metrics.
+
+If autoscaling is disabled, the number of Triton server pods is set to the minReplicas
+variable in `values.yaml`.
+
+## Enable Load Balancing
+To enable load balancing, ensure that the loadBalancing tag in `values.yaml`
+is set to `true`. This will do two things:
+
+1. Deploy a Traefik reverse proxy through the [Traefik Helm Chart](https://github.com/traefik/traefik-helm-chart).
+
+2. Configure two Traefik [IngressRoutes](https://doc.traefik.io/traefik/providers/kubernetes-crd/),
+one for http and one for grpc. This will allow the Traefik service to expose two
+ports that will be forwarded to and balanced across the Triton pods.
+
+To choose the port numbers exposed, or to disable either http or grpc, edit the
+configured variables in `values.yaml`.
+
+## Deploy the Inference Server
+
+Deploy the inference server, autoscaler, and load balancer using the default
+configuration with the following commands.
+
+Here, and in the following commands we use the name `example` for our chart.
+This name will be added to the beginning of all resources created during the helm
+installation.
+
+```
+$ cd <directory containing Chart.yaml>
+$ helm install example .
+```
+
+Use kubectl to see status and wait until the inference server pods are
+running.
+
+```
+$ kubectl get pods
+NAME                                               READY   STATUS    RESTARTS   AGE
+example-triton-inference-server-5f74b55885-n6lt7   1/1     Running   0          2m21s
+```
+
+There are several ways of overriding the default configuration as
+described in this [Helm
+documentation](https://helm.sh/docs/using_helm/#customizing-the-chart-before-installing).
+
+You can edit the values.yaml file directly or you can use the *--set*
+option to override a single parameter with the CLI. For example, to
+deploy a cluster with a minimum of two inference servers use *--set* to
+set the autoscaler.minReplicas parameter.
+
+```
+$ helm install example --set autoscaler.minReplicas=2 .
+```
+
+You can also write your own "config.yaml" file with the values you
+want to override and pass it to Helm. If you specify a "config.yaml" file, the
+values set will override those in values.yaml.
+
+```
+$ cat << EOF > config.yaml
+namespace: MyCustomNamespace
+image:
+  imageName: nvcr.io/nvidia/tritonserver:custom-tag
+  modelRepositoryPath: gs://my_model_repository
+EOF
+$ helm install example -f config.yaml .
+```
+
+## Probe Configuration
+
+In `templates/deployment.yaml` is configurations for `livenessProbe`, `readinessProbe` and `startupProbe` for the Triton server container.
+By default, Triton loads all the models before starting the HTTP server to respond to the probes. The process can take several minutes, depending on the models sizes.
+If it is not completed in `startupProbe.failureThreshold * startupProbe.periodSeconds` seconds then Kubernetes considers this as a pod failure and restarts it,
+ending up with an infinite loop of restarting pods, so make sure to sufficiently set these values for your use case.
+The liveliness and readiness probes are being sent only after the first success of a startup probe.
+
+For more details, see the [Kubernetes probe documentation](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/) and the [feature page of the startup probe](https://github.com/kubernetes/enhancements/blob/master/keps/sig-node/950-liveness-probe-holdoff/README.md).
+
+## Using Triton Inference Server
+
+Now that the inference server is running you can send HTTP or GRPC
+requests to it to perform inferencing. By default, this chart deploys [Traefik](https://traefik.io/)
+and uses [IngressRoutes](https://doc.traefik.io/traefik/providers/kubernetes-crd/)
+to balance requests across all available nodes.
+
+To send requests through the Traefik proxy, use the Cluster IP of the
+traefik service deployed by the Helm chart. In this case, it is 10.111.128.124.
+
+```
+$ kubectl get services
+NAME                              TYPE           CLUSTER-IP       EXTERNAL-IP   PORT(S)                                                    AGE
+...
+example-traefik                   LoadBalancer   10.111.128.124   <pending>     8001:31752/TCP,8000:31941/TCP,80:30692/TCP,443:30303/TCP   74m
+example-triton-inference-server   ClusterIP      None             <none>        8000/TCP,8001/TCP,8002/TCP                                 74m
+```
+
+Use the following command to refer to the Cluster IP:
+```
+cluster_ip=`kubectl get svc -l app.kubernetes.io/name=traefik -o=jsonpath='{.items[0].spec.clusterIP}'`
+```
+
+
+The Traefik reverse-proxy exposes an HTTP endpoint on port 8000, and GRPC
+endpoint on port 8001 and a Prometheus metrics endpoint on
+port 8002. You can use curl to get the meta-data of the inference server
+from the HTTP endpoint.
+
+```
+$ curl $cluster_ip:8000/v2
+```
+
+Follow the [QuickStart](../../docs/getting_started/quickstart.md) to get the example
+image classification client that can be used to perform inferencing
+using image classification models on the inference
+server. For example,
+
+```
+$ image_client -u $cluster_ip:8000 -m inception_graphdef -s INCEPTION -c3 mug.jpg
+Request 0, batch size 1
+Image 'images/mug.jpg':
+    504 (COFFEE MUG) = 0.723992
+    968 (CUP) = 0.270953
+    967 (ESPRESSO) = 0.00115997
+```
+
+## Testing Load Balancing and Autoscaling
+After you have confirmed that your Triton cluster is operational and can perform inference,
+you can test the load balancing and autoscaling features by sending a heavy load of requests.
+One option for doing this is using the
+[perf_analyzer](https://github.com/triton-inference-server/client/blob/main/src/c++/perf_analyzer/README.md)
+application.
+
+You can apply a progressively increasing load with a command like:
+```
+perf_analyzer -m simple -u $cluster_ip:8000 --concurrency-range 1:10
+```
+
+From your Grafana dashboard, you should be able to see the number of pods increase
+as the load increases, with requests being routed evenly to the new pods.
+
+## Cleanup
+
+After you have finished using the inference server, you should use Helm to
+delete the deployment.
+
+```
+$ helm list
+NAME            REVISION  UPDATED                   STATUS    CHART                          APP VERSION   NAMESPACE
+example         1         Wed Feb 27 22:16:55 2019  DEPLOYED  triton-inference-server-1.0.0  1.0           default
+example-metrics	1       	Tue Jan 21 12:24:07 2020	DEPLOYED	prometheus-operator-6.18.0   	 0.32.0     	 default
+
+$ helm uninstall example
+$ helm uninstall example-metrics
+```
+
+For the Prometheus and Grafana services, you should [explicitly delete
+CRDs](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack#uninstall-helm-chart):
+
+```
+$ kubectl delete crd alertmanagerconfigs.monitoring.coreos.com alertmanagers.monitoring.coreos.com podmonitors.monitoring.coreos.com probes.monitoring.coreos.com prometheuses.monitoring.coreos.com prometheusrules.monitoring.coreos.com servicemonitors.monitoring.coreos.com thanosrulers.monitoring.coreos.com
+```
diff --git a/deploy/k8s-onprem/dashboard.json b/deploy/k8s-onprem/dashboard.json
new file mode 100644
index 0000000000..9c99a2751c
--- /dev/null
+++ b/deploy/k8s-onprem/dashboard.json
@@ -0,0 +1,1172 @@
+{
+  "__inputs": [
+    {
+      "name": "DS_PROMETHEUS",
+      "label": "Prometheus",
+      "description": "",
+      "type": "datasource",
+      "pluginId": "prometheus",
+      "pluginName": "Prometheus"
+    }
+  ],
+  "__elements": {},
+  "__requires": [
+    {
+      "type": "panel",
+      "id": "gauge",
+      "name": "Gauge",
+      "version": ""
+    },
+    {
+      "type": "grafana",
+      "id": "grafana",
+      "name": "Grafana",
+      "version": "10.0.1"
+    },
+    {
+      "type": "datasource",
+      "id": "prometheus",
+      "name": "Prometheus",
+      "version": "1.0.0"
+    },
+    {
+      "type": "panel",
+      "id": "stat",
+      "name": "Stat",
+      "version": ""
+    },
+    {
+      "type": "panel",
+      "id": "timeseries",
+      "name": "Time series",
+      "version": ""
+    }
+  ],
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": {
+          "type": "datasource",
+          "uid": "grafana"
+        },
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "target": {
+          "limit": 100,
+          "matchAny": false,
+          "tags": [],
+          "type": "dashboard"
+        },
+        "type": "dashboard"
+      }
+    ]
+  },
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 0,
+  "id": null,
+  "links": [],
+  "liveNow": false,
+  "panels": [
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 8,
+        "x": 0,
+        "y": 0
+      },
+      "id": 9,
+      "options": {
+        "colorMode": "value",
+        "graphMode": "area",
+        "justifyMode": "auto",
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": "",
+          "values": false
+        },
+        "text": {},
+        "textMode": "auto"
+      },
+      "pluginVersion": "10.0.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "exemplar": true,
+          "expr": "count(count(nv_inference_count) by (instance))",
+          "interval": "",
+          "legendFormat": "",
+          "refId": "A"
+        }
+      ],
+      "title": "Active Triton Instances",
+      "type": "stat"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 50,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "linear",
+            "lineStyle": {
+              "fill": "solid"
+            },
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "percent"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "max": 1,
+          "min": 0,
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": [
+          {
+            "__systemRef": "hideSeriesFrom",
+            "matcher": {
+              "id": "byNames",
+              "options": {
+                "mode": "exclude",
+                "names": [
+                  "example-triton-inference-server-6784d84f5d-v9scn"
+                ],
+                "prefix": "All except:",
+                "readOnly": true
+              }
+            },
+            "properties": [
+              {
+                "id": "custom.hideFrom",
+                "value": {
+                  "legend": false,
+                  "tooltip": false,
+                  "viz": true
+                }
+              }
+            ]
+          }
+        ]
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 16,
+        "x": 8,
+        "y": 0
+      },
+      "id": 11,
+      "interval": "15s",
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "exemplar": true,
+          "expr": "sum by (pod) (rate(nv_inference_count[1m])) / ignoring(pod) group_left sum (rate(nv_inference_count[1m]))",
+          "instant": false,
+          "interval": "",
+          "legendFormat": "{{pod}}",
+          "refId": "A"
+        }
+      ],
+      "title": "Proportion of Requests by Pod",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "description": "",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": true,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "links": [],
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "short"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 9,
+        "w": 12,
+        "x": 0,
+        "y": 8
+      },
+      "id": 2,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "8.2.3",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "exemplar": true,
+          "expr": "sum(nv_inference_request_success) by (pod)",
+          "interval": "",
+          "legendFormat": "Success {{pod}}",
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "exemplar": true,
+          "expr": "sum(nv_inference_request_failure) by (pod)",
+          "interval": "",
+          "legendFormat": "Failure {{pod}}",
+          "refId": "B"
+        }
+      ],
+      "title": "Cumulative Inference Requests by Pod",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "Compute Time (ms)",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": true,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "links": [],
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "ms"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 17,
+        "w": 12,
+        "x": 12,
+        "y": 8
+      },
+      "id": 5,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "8.2.3",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "exemplar": true,
+          "expr": "sum(rate(nv_inference_compute_infer_duration_us[30s])) by (model) / 1000",
+          "interval": "",
+          "legendFormat": "{{model}}",
+          "refId": "A"
+        }
+      ],
+      "title": "Compute Time by Model (milliseconds)",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "Queue Time (ms)",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": true,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "links": [],
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "µs"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 17
+      },
+      "id": 4,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "8.2.3",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "exemplar": true,
+          "expr": "avg(rate(nv_inference_queue_duration_us[30s])/(1+rate(nv_inference_request_success[30s]))) by (pod)",
+          "interval": "",
+          "legendFormat": "{{instance}}",
+          "refId": "A"
+        }
+      ],
+      "title": "Average Queue Time by Pod (microseconds)",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 2,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "links": [],
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "watt"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 18,
+        "x": 0,
+        "y": 25
+      },
+      "id": 10,
+      "options": {
+        "legend": {
+          "calcs": [
+            "mean",
+            "lastNotNull",
+            "max"
+          ],
+          "displayMode": "table",
+          "placement": "right",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "10.0.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "nv_gpu_power_usage",
+          "interval": "",
+          "legendFormat": "GPU {{ gpu_uuid }}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "GPU Power Usage",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "max": 2400,
+          "min": 0,
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "#EAB839",
+                "value": 1800
+              },
+              {
+                "color": "red",
+                "value": 2200
+              }
+            ]
+          },
+          "unit": "watt"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 6,
+        "x": 18,
+        "y": 25
+      },
+      "id": 16,
+      "links": [],
+      "options": {
+        "orientation": "horizontal",
+        "reduceOptions": {
+          "calcs": [
+            "sum"
+          ],
+          "fields": "",
+          "values": false
+        },
+        "showThresholdLabels": false,
+        "showThresholdMarkers": true
+      },
+      "pluginVersion": "10.0.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "sum(nv_gpu_power_usage)",
+          "interval": "",
+          "legendFormat": "",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "GPU Power Total",
+      "type": "gauge"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 2,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "links": [],
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "bytes"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 33
+      },
+      "id": 18,
+      "options": {
+        "legend": {
+          "calcs": [
+            "mean",
+            "max"
+          ],
+          "displayMode": "list",
+          "placement": "right",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "10.0.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "exemplar": false,
+          "expr": "nv_gpu_memory_used_bytes",
+          "interval": "",
+          "legendFormat": "GPU {{gpu_uuid}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "GPU Framebuffer Mem Used",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 2,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "links": [],
+          "mappings": [],
+          "max": 100,
+          "min": 0,
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "percent"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 33
+      },
+      "id": 6,
+      "options": {
+        "legend": {
+          "calcs": [
+            "mean",
+            "lastNotNull",
+            "max"
+          ],
+          "displayMode": "table",
+          "placement": "right",
+          "showLegend": true,
+          "sortBy": "Max",
+          "sortDesc": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "10.0.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "nv_gpu_utilization * 100",
+          "interval": "",
+          "legendFormat": "GPU {{gpu_uuid}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "GPU Utilization",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 2,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "links": [],
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "bytes"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 41
+      },
+      "id": 19,
+      "options": {
+        "legend": {
+          "calcs": [
+            "mean",
+            "max"
+          ],
+          "displayMode": "list",
+          "placement": "right",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "10.0.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "nv_cpu_memory_used_bytes",
+          "hide": false,
+          "instant": false,
+          "legendFormat": "Memory",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Memory Used",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 2,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "links": [],
+          "mappings": [],
+          "max": 100,
+          "min": 0,
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "percent"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 41
+      },
+      "id": 20,
+      "options": {
+        "legend": {
+          "calcs": [
+            "mean",
+            "lastNotNull",
+            "max"
+          ],
+          "displayMode": "table",
+          "placement": "right",
+          "showLegend": true,
+          "sortBy": "Max",
+          "sortDesc": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "10.0.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "nv_cpu_utilization * 100",
+          "interval": "",
+          "legendFormat": "CPU",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "CPU Utilization",
+      "type": "timeseries"
+    }
+  ],
+  "refresh": "5s",
+  "schemaVersion": 38,
+  "style": "dark",
+  "tags": [],
+  "templating": {
+    "list": []
+  },
+  "time": {
+    "from": "now-15m",
+    "to": "now"
+  },
+  "timepicker": {
+    "refresh_intervals": [
+      "5s",
+      "10s",
+      "30s",
+      "1m",
+      "5m",
+      "15m",
+      "30m",
+      "1h",
+      "2h",
+      "1d"
+    ]
+  },
+  "timezone": "",
+  "title": "Triton Inference Server",
+  "uid": "slEY4dsZk",
+  "version": 5,
+  "weekStart": ""
+}
\ No newline at end of file
diff --git a/deploy/k8s-onprem/templates/_helpers.tpl b/deploy/k8s-onprem/templates/_helpers.tpl
new file mode 100644
index 0000000000..a65331e0f0
--- /dev/null
+++ b/deploy/k8s-onprem/templates/_helpers.tpl
@@ -0,0 +1,111 @@
+{{/*
+# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/}}
+
+# Defines a set of helper functions that produce templated values for other files.
+# Mostly for things like names and labels. This file does not produce any
+# kubernetes resources by itself
+
+{{/* vim: set filetype=mustache: */}}
+{{/*
+Create inference server name.
+*/}}
+{{- define "triton-inference-server.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "triton-inference-server.fullname" -}}
+{{- if .Values.fullnameOverride -}}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- $name := default .Chart.Name .Values.nameOverride -}}
+{{- if contains $name .Release.Name -}}
+{{- .Release.Name | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+{{- end -}}
+{{- end -}}
+
+{{/*
+  Create inference server metrics service name and fullname derived from above and
+  truncated appropriately.
+*/}}
+{{- define "triton-inference-server-metrics.name" -}}
+{{- $basename := include "triton-inference-server.name" . -}}
+{{- $basename_trimmed := $basename | trunc 55 | trimSuffix "-" -}}
+{{- printf "%s-%s" $basename_trimmed "metrics" -}}
+{{- end -}}
+
+{{- define "triton-inference-server-metrics.fullname" -}}
+{{- $basename := include "triton-inference-server.fullname" . -}}
+{{- $basename_trimmed := $basename | trunc 55 | trimSuffix "-" -}}
+{{- printf "%s-%s" $basename_trimmed "metrics" -}}
+{{- end -}}
+
+{{/*
+  Create inference server metrics monitor name and fullname derived from
+  above and truncated appropriately.
+*/}}
+{{- define "triton-inference-server-metrics-monitor.name" -}}
+{{- $basename := include "triton-inference-server.name" . -}}
+{{- $basename_trimmed := $basename | trunc 47 | trimSuffix "-" -}}
+{{- printf "%s-%s" $basename_trimmed "metrics-monitor" -}}
+{{- end -}}
+
+{{- define "triton-inference-server-metrics-monitor.fullname" -}}
+{{- $basename := include "triton-inference-server.fullname" . -}}
+{{- $basename_trimmed := $basename | trunc 47 | trimSuffix "-" -}}
+{{- printf "%s-%s" $basename_trimmed "metrics-monitor" -}}
+{{- end -}}
+
+{{/*
+  Create ingressroute names derived from above and truncated appropriately
+*/}}
+{{- define "triton-inference-server-ingressroute-http.name" -}}
+{{- $basename := include "triton-inference-server.name" . -}}
+{{- $basename_trimmed := $basename | trunc 50 | trimSuffix "-" -}}
+{{- printf "%s-%s" $basename_trimmed "ingress-http" -}}
+{{- end -}}
+
+{{- define "triton-inference-server-ingressroute-grpc.name" -}}
+{{- $basename := include "triton-inference-server.name" . -}}
+{{- $basename_trimmed := $basename | trunc 50 | trimSuffix "-" -}}
+{{- printf "%s-%s" $basename_trimmed "ingress-grpc" -}}
+{{- end -}}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "triton-inference-server.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
diff --git a/deploy/k8s-onprem/templates/deployment.yaml b/deploy/k8s-onprem/templates/deployment.yaml
new file mode 100644
index 0000000000..8c3a19d136
--- /dev/null
+++ b/deploy/k8s-onprem/templates/deployment.yaml
@@ -0,0 +1,111 @@
+# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# Creates a deployment for the Triton Inference Server pods
+# Each pod contains a Triton container and an nfs mount as specified in
+# values.yaml for the model repository
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ template "triton-inference-server.fullname" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    app: {{ template "triton-inference-server.name" . }}
+    chart: {{ template "triton-inference-server.chart" . }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+spec:
+  replicas: {{ .Values.autoscaling.minReplicas }}
+  selector:
+    matchLabels:
+      app: {{ template "triton-inference-server.name" . }}
+      release: {{ .Release.Name }}
+  template:
+    metadata:
+      labels:
+        app: {{ template "triton-inference-server.name" . }}
+        release: {{ .Release.Name }}
+
+    spec:
+      volumes:
+        - name: models
+          nfs:
+            server: {{ .Values.image.modelRepositoryServer }}
+            path: {{ .Values.image.modelRepositoryPath }}
+            readOnly: false
+      containers:
+        - name: {{ .Chart.Name }}
+          image: "{{ .Values.image.imageName }}"
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          volumeMounts:
+            - mountPath: /models
+              name: models
+
+          resources:
+            limits:
+              nvidia.com/gpu: {{ .Values.image.numGpus }}
+
+          args:
+            - tritonserver
+            {{- range .Values.serverArgs }}
+            - {{ . }}
+            {{- end }}
+
+          ports:
+            - containerPort: 8000
+              name: http
+            - containerPort: 8001
+              name: grpc
+            - containerPort: 8002
+              name: metrics
+          livenessProbe:
+            initialDelaySeconds: 15
+            failureThreshold: 3
+            periodSeconds: 10
+            httpGet:
+              path: /v2/health/live
+              port: http
+          readinessProbe:
+            initialDelaySeconds: 5
+            periodSeconds: 5
+            failureThreshold: 3
+            httpGet:
+              path: /v2/health/ready
+              port: http
+          startupProbe:
+            # allows Triton to load the models during 30*10 = 300 sec = 5 min
+            # starts checking the other probes only after the success of this one
+            # for details, see https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-startup-probes
+            periodSeconds: 10
+            failureThreshold: 30
+            httpGet:
+              path: /v2/health/ready
+              port: http
+
+      securityContext:
+        runAsUser: 1000
+        fsGroup: 1000
diff --git a/deploy/k8s-onprem/templates/hpa.yaml b/deploy/k8s-onprem/templates/hpa.yaml
new file mode 100644
index 0000000000..4a4afa48d9
--- /dev/null
+++ b/deploy/k8s-onprem/templates/hpa.yaml
@@ -0,0 +1,52 @@
+# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# Creates the horizontal pod autoscaler for the Triton pod deployment.
+# In order to use custom metrics (ie metrics other than CPU usage) with this
+# autoscaler, you must have enabled installation of the prometheus adapter.
+# This autoscaler (and the prometheus adapter) will only be installed in the
+# autoscaling tag is set to true.
+
+{{- if .Values.tags.autoscaling }}
+apiVersion: autoscaling/v2beta2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: triton-hpa
+  namespace: {{ .Release.Namespace }}
+  labels:
+    app: {{ template "triton-inference-server.name" . }}
+    chart: {{ template "triton-inference-server.chart" . }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: {{ template "triton-inference-server.fullname" . }}
+  minReplicas: {{ .Values.autoscaling.minReplicas }}
+  maxReplicas: {{ .Values.autoscaling.maxReplicas }}
+  metrics: {{ toYaml .Values.autoscaling.metrics | nindent 2}}
+{{- end -}}
diff --git a/deploy/k8s-onprem/templates/ingressroute.yaml b/deploy/k8s-onprem/templates/ingressroute.yaml
new file mode 100644
index 0000000000..ee1cbee76f
--- /dev/null
+++ b/deploy/k8s-onprem/templates/ingressroute.yaml
@@ -0,0 +1,69 @@
+# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# Creates the traefik IngressRoutes that allow for external access to the
+# triton service. Two routes are created, one for gRPC and one for HTTP.
+# Requires deployment of the traefik IngressRoute CRD, along with various roles
+# and permissions, most easily accomplished through the referenced traefik
+# helm chart. Will only be installed if the loadBalancing tag is set to true.
+
+{{- if .Values.tags.loadBalancing }}
+apiVersion: traefik.containo.us/v1alpha1
+kind: IngressRoute
+metadata:
+  name: {{ template "triton-inference-server-ingressroute-http.name" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    app: {{ template "triton-inference-server.name" . }}
+    chart: {{ template "triton-inference-server.chart" . }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+spec:
+  entryPoints:
+    - triton-http
+  routes:
+    - match: PathPrefix(`/`)
+      kind: Rule
+      services:
+        - name: {{ template "triton-inference-server.fullname" . }}
+          port: 8000
+---
+apiVersion: traefik.containo.us/v1alpha1
+kind: IngressRoute
+metadata:
+  name: {{ template "triton-inference-server-ingressroute-grpc.name" . }}
+  namespace: {{ .Release.Namespace }}
+spec:
+  entryPoints:
+    - triton-grpc
+  routes:
+    - match: PathPrefix(`/`)
+      kind: Rule
+      services:
+        - name: {{ template "triton-inference-server.fullname" . }}
+          port: 8001
+          scheme: h2c
+{{- end -}}
diff --git a/deploy/k8s-onprem/templates/service.yaml b/deploy/k8s-onprem/templates/service.yaml
new file mode 100644
index 0000000000..6d5bf2cb00
--- /dev/null
+++ b/deploy/k8s-onprem/templates/service.yaml
@@ -0,0 +1,94 @@
+# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# Defines the services for triton and the triton metrics service.
+# Also creates a ServiceMonitor for the triton metrics service.
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ template "triton-inference-server.fullname" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    app: {{ template "triton-inference-server.name" . }}
+    chart: {{ template "triton-inference-server.chart" . }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+spec:
+  clusterIP: None
+  ports:
+    - port: 8000
+      targetPort: http
+      name: http-inference-server
+    - port: 8001
+      targetPort: grpc
+      name: grpc-inference-server
+    - port: 8002
+      targetPort: metrics
+      name: metrics-inference-server
+  selector:
+    app: {{ template "triton-inference-server.name" . }}
+    release: {{ .Release.Name }}
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ template "triton-inference-server-metrics.fullname" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    app: {{ template "triton-inference-server-metrics.name" . }}
+    chart: {{ template "triton-inference-server.chart" . }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+  annotations:
+    alpha.monitoring.coreos.com/non-namespaced: "true"
+spec:
+  ports:
+  - name: metrics
+    port: 8080
+    targetPort: metrics
+    protocol: TCP
+  selector:
+    app: {{ template "triton-inference-server.name" . }}
+    release: {{ .Release.Name }}
+---
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: {{ template "triton-inference-server-metrics-monitor.fullname" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    app: {{ template "triton-inference-server-metrics-monitor.name" . }}
+    chart: {{ template "triton-inference-server.chart" . }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+spec:
+  selector:
+    matchLabels:
+      app: {{ template "triton-inference-server-metrics.name" . }}
+  endpoints:
+  - port: metrics
+    interval: 15s
diff --git a/deploy/k8s-onprem/values.yaml b/deploy/k8s-onprem/values.yaml
new file mode 100644
index 0000000000..6bdf2e3cde
--- /dev/null
+++ b/deploy/k8s-onprem/values.yaml
@@ -0,0 +1,83 @@
+# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+tags:
+  autoscaling: true
+  loadBalancing: true
+
+image:
+  imageName: nvcr.io/nvidia/tritonserver:24.03-py3
+  pullPolicy: IfNotPresent
+  modelRepositoryServer: < Replace with the IP Address of your file server >
+  modelRepositoryPath: /srv/models
+  numGpus: 1
+
+# add server args here e.g. --grpc-use-ssl, --grpc-server-certs, repository-poll-secs, etc
+serverArgs:
+  - '--model-repository=/models'
+
+traefik:
+  ports:
+    triton-http:
+      port: 18000
+      exposedPort: 8000
+      expose: true
+      protocol: TCP
+    triton-grpc:
+      port: 18001
+      exposedPort: 8001
+      expose: true
+      protocol: TCP
+
+autoscaling:
+  minReplicas: 1
+  maxReplicas: 3
+  metrics:
+    - type: Pods
+      pods:
+        metric:
+          name: avg_time_queue_us
+        target:
+          type: AverageValue
+          averageValue: 50
+
+prometheus-adapter:
+  prometheus:
+    url: http://example-metrics-kube-prome-prometheus.default.svc.cluster.local
+    port: 9090
+  rules:
+    custom:
+      - seriesQuery: 'nv_inference_queue_duration_us{namespace="default",pod!=""}'
+        resources:
+          overrides:
+            namespace:
+              resource: "namespace"
+            pod:
+              resource: "pod"
+        name:
+          matches: "nv_inference_queue_duration_us"
+          as: "avg_time_queue_us"
+        metricsQuery: 'avg(delta(nv_inference_queue_duration_us{<<.LabelMatchers>>}[30s])/(1+delta(nv_inference_request_success{<<.LabelMatchers>>}[30s]))) by (<<.GroupBy>>)'
\ No newline at end of file
diff --git a/deploy/mlflow-triton-plugin/README.md b/deploy/mlflow-triton-plugin/README.md
new file mode 100644
index 0000000000..c011194299
--- /dev/null
+++ b/deploy/mlflow-triton-plugin/README.md
@@ -0,0 +1,255 @@
+<!--
+# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+# MLflow Triton
+
+MLflow plugin for deploying your models from MLflow to Triton Inference Server.
+Scripts are included for publishing models, which are in Triton recognized
+structure, to your MLflow Model Registry.
+
+### Supported flavors
+
+MLFlow Triton plugin currently supports the following flavors, you may
+substitute the flavor specification in the example below according to the model
+to be deployed.
+
+* onnx
+* triton
+
+## Requirements
+
+* MLflow
+* Triton Python HTTP client
+* Triton Inference Server
+
+## Installation
+
+The plugin can be installed from source using the following commands
+
+```
+python setup.py install
+```
+
+## Quick Start
+
+In this documentation, we will use the files in `examples` to showcase how
+the plugin interacts with Triton Inference Server. The `onnx_float32_int32_int32`
+model in `examples` is a simple model that takes two float32 inputs, INPUT0 and
+INPUT1, with shape [-1, 16], and produces two int32 outputs, OUTPUT0 and
+OUTPUT1, where OUTPUT0 is the element-wise summation of INPUT0 and INPUT1 and
+OUTPUT1 is the element-wise subtraction of INPUT0 and INPUT1.
+
+### Start Triton Inference Server in EXPLICIT mode
+
+The MLflow Triton plugin must work with a running Triton server, see
+[documentation](https://github.com/triton-inference-server/server/blob/main/docs/getting_started/quickstart.md)
+of Triton Inference Server for how to start the server. Note that
+the server should be run in EXPLICIT mode (`--model-control-mode=explicit`)
+to exploit the deployment feature of the plugin.
+
+Once the server has started, the following environment must be set so that the plugin
+can interact with the server properly:
+* `TRITON_URL`: The address to the Triton HTTP endpoint
+* `TRITON_MODEL_REPO`: The path to the Triton model repository. It can be an s3 URI but keep in \
+mind that the env vars AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY are needed.
+
+### Publish models to MLflow
+
+#### ONNX flavor
+
+The MLFlow ONNX built-in functionalities can be used to publish `onnx` flavor
+models to MLFlow directly, and the MLFlow Triton plugin will prepare the model
+to the format expected by Triton. You may also log
+[`config.pbtxt`](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_model_configuration.md)
+as additional artifact which Triton will be used to serve the model. Otherwise,
+the server should be run with auto-complete feature enabled
+(`--strict-model-config=false`) to generate the model configuration.
+
+```
+import mlflow.onnx
+import onnx
+model = onnx.load("examples/onnx_float32_int32_int32/1/model.onnx")
+mlflow.onnx.log_model(model, "triton", registered_model_name="onnx_float32_int32_int32")
+```
+
+#### Triton flavor
+
+For other model frameworks that Triton supports but not yet recognized by
+the MLFlow Triton plugin, the `publish_model_to_mlflow.py` script can be used to
+publish `triton` flavor models to MLflow. A `triton` flavor model is a directory
+containing the model files following the
+[model layout](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/model_repository.md#repository-layout).
+Below is an example usage:
+
+```
+cd /scripts
+
+python publish_model_to_mlflow.py --model_name onnx_float32_int32_int32 --model_directory <path-to-the-examples-directory>/onnx_float32_int32_int32 --flavor triton
+```
+
+### Deploy models tracked in MLflow to Triton
+
+Once a model is published and tracked in MLflow, it can be deployed to Triton
+via MLflow's deployments command, the following command will download the model
+to Triton's model repository and request Triton to load the model.
+
+```
+mlflow deployments create -t triton --flavor triton --name onnx_float32_int32_int32 -m models:/onnx_float32_int32_int32/1
+```
+
+### Perform inference
+
+After the model is deployed, the following command is the CLI usage to send
+inference request to a deployment.
+
+```
+mlflow deployments predict -t triton --name onnx_float32_int32_int32 --input-path <path-to-the-examples-directory>/input.json --output-path output.json
+```
+
+The inference result will be written in `output.json` and you may compare it
+with the results in `expected_output.json`
+
+## MLflow Deployments
+
+"MLflow Deployments" is a set of MLflow APIs for deploying MLflow models to
+custom serving tools. The MLflow Triton plugin implements the following
+deployment functions to support the interaction with Triton server in MLflow.
+
+### Create Deployment
+
+MLflow deployments create API deploys a model to the Triton target, which will
+download the model to Triton's model repository and request Triton to load the
+model.
+
+To create a MLflow deployment using CLI:
+
+```
+mlflow deployments create -t triton --flavor triton --name model_name -m models:/model_name/1
+```
+
+To create a MLflow deployment using Python API:
+
+```
+from mlflow.deployments import get_deploy_client
+client = get_deploy_client('triton')
+client.create_deployment("model_name", "models:/model_name/1", flavor="triton")
+```
+
+### Delete Deployment
+
+MLflow deployments delete API removes an existing deployment from the Triton
+target, which will remove the model in Triton's model repository and request
+Triton to unload the model.
+
+To delete a MLflow deployment using CLI
+
+```
+mlflow deployments delete -t triton --name model_name
+```
+
+To delete a MLflow deployment using CLI
+
+```
+from mlflow.deployments import get_deploy_client
+client = get_deploy_client('triton')
+client.delete_deployment("model_name")
+```
+
+### Update Deployment
+
+MLflow deployments update API updates an existing deployment with another model
+(version) tracked in MLflow, which will overwrite the model in Triton's model
+repository and request Triton to reload the model.
+
+To update a MLflow deployment using CLI
+
+```
+mlflow deployments update -t triton --flavor triton --name model_name -m models:/model_name/2
+```
+
+To update a MLflow deployment using Python API
+
+```
+from mlflow.deployments import get_deploy_client
+client = get_deploy_client('triton')
+client.update_deployment("model_name", "models:/model_name/2", flavor="triton")
+```
+
+### List Deployments
+
+MLflow deployments list API lists all existing deployments in Triton target.
+
+To list all MLflow deployments using CLI
+
+```
+mlflow deployments list -t triton
+```
+
+To list all MLflow deployments using Python API
+
+```
+from mlflow.deployments import get_deploy_client
+client = get_deploy_client('triton')
+client.list_deployments()
+```
+
+### Get Deployment
+
+MLflow deployments get API returns information regarding a specific deployments
+in Triton target.
+
+To list a specific MLflow deployment using CLI
+```
+mlflow deployments get -t triton --name model_name
+```
+
+To list a specific MLflow deployment using Python API
+```
+from mlflow.deployments import get_deploy_client
+client = get_deploy_client('triton')
+client.get_deployment("model_name")
+```
+
+### Run Inference on Deployments
+
+MLflow deployments predict API runs inference by preparing and sending the
+request to Triton and returns the Triton response.
+
+To run inference using CLI
+
+```
+mlflow deployments predict -t triton --name model_name --input-path input_file --output-path output_file
+
+```
+
+To run inference using Python API
+
+```
+from mlflow.deployments import get_deploy_client
+client = get_deploy_client('triton')
+client.predict("model_name", inputs)
+```
diff --git a/deploy/mlflow-triton-plugin/examples/expected_output.json b/deploy/mlflow-triton-plugin/examples/expected_output.json
new file mode 100644
index 0000000000..320f8f4815
--- /dev/null
+++ b/deploy/mlflow-triton-plugin/examples/expected_output.json
@@ -0,0 +1,6 @@
+{"outputs":
+  {
+    "OUTPUT0": [[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]],
+    "OUTPUT1": [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
+  }
+}
\ No newline at end of file
diff --git a/deploy/mlflow-triton-plugin/examples/input.json b/deploy/mlflow-triton-plugin/examples/input.json
new file mode 100644
index 0000000000..418396ccf0
--- /dev/null
+++ b/deploy/mlflow-triton-plugin/examples/input.json
@@ -0,0 +1,6 @@
+{"inputs":
+  {
+    "INPUT0": [[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0]],
+    "INPUT1": [[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0]]
+  }
+}
\ No newline at end of file
diff --git a/deploy/mlflow-triton-plugin/examples/onnx_float32_int32_int32/1/model.onnx b/deploy/mlflow-triton-plugin/examples/onnx_float32_int32_int32/1/model.onnx
new file mode 100755
index 0000000000..f12d500597
Binary files /dev/null and b/deploy/mlflow-triton-plugin/examples/onnx_float32_int32_int32/1/model.onnx differ
diff --git a/deploy/mlflow-triton-plugin/examples/onnx_float32_int32_int32/config.pbtxt b/deploy/mlflow-triton-plugin/examples/onnx_float32_int32_int32/config.pbtxt
new file mode 100644
index 0000000000..75ea016cfa
--- /dev/null
+++ b/deploy/mlflow-triton-plugin/examples/onnx_float32_int32_int32/config.pbtxt
@@ -0,0 +1,57 @@
+
+# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+platform: "onnxruntime_onnx"
+max_batch_size: 8
+version_policy: { latest { num_versions: 1 }}
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+input [
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
\ No newline at end of file
diff --git a/deploy/mlflow-triton-plugin/mlflow_triton/__init__.py b/deploy/mlflow-triton-plugin/mlflow_triton/__init__.py
new file mode 100755
index 0000000000..0b73b537d4
--- /dev/null
+++ b/deploy/mlflow-triton-plugin/mlflow_triton/__init__.py
@@ -0,0 +1,27 @@
+#!/usr/bin/env python3
+
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/deploy/mlflow-triton-plugin/mlflow_triton/config.py b/deploy/mlflow-triton-plugin/mlflow_triton/config.py
new file mode 100755
index 0000000000..0a381fd407
--- /dev/null
+++ b/deploy/mlflow-triton-plugin/mlflow_triton/config.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python3
+
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+import os
+import re
+from collections import namedtuple
+
+from mlflow.exceptions import MlflowException
+
+
+class Config(dict):
+    def __init__(self):
+        super().__init__()
+        self["triton_url"] = os.environ.get("TRITON_URL")
+        self["triton_model_repo"] = os.environ.get("TRITON_MODEL_REPO")
+
+        if self["triton_model_repo"].startswith("s3://"):
+            self.s3_regex = re.compile(
+                "s3://(http://|https://|)([0-9a-zA-Z\\-.]+):([0-9]+)/"
+                "([0-9a-z.\\-]+)(((/[0-9a-zA-Z.\\-_]+)*)?)"
+            )
+
+            uri = self.parse_path(self["triton_model_repo"])
+            if uri.protocol == "https://":
+                protocol = "https://"
+            else:
+                protocol = "http://"
+            endpoint_url = None
+            if uri.host_name != "" and uri.host_port != "":
+                endpoint_url = "{}{}:{}".format(protocol, uri.host_name, uri.host_port)
+
+            import boto3
+
+            # boto3 handles AWS credentials
+            self["s3"] = boto3.client("s3", endpoint_url=endpoint_url)
+            self["s3_bucket"] = uri.bucket
+            self["s3_prefix"] = uri.prefix
+            self["triton_model_repo"] = "s3://{}".format(
+                os.path.join(uri.bucket, uri.prefix)
+            )
+
+    def parse_path(self, path):
+        # Cleanup extra slashes
+        clean_path = self.clean_path(path)
+
+        # Get the bucket name and the object path. Return error if path is malformed
+        match = self.s3_regex.fullmatch(clean_path)
+        S3URI = namedtuple(
+            "S3URI", ["protocol", "host_name", "host_port", "bucket", "prefix"]
+        )
+        if match:
+            uri = S3URI(*match.group(1, 2, 3, 4, 5))
+            if uri.prefix and uri.prefix[0] == "/":
+                uri = uri._replace(prefix=uri.prefix[1:])
+        else:
+            bucket_start = clean_path.find("s3://") + len("s3://")
+            bucket_end = clean_path.find("/", bucket_start)
+
+            # If there isn't a slash, the address has only the bucket
+            if bucket_end > bucket_start:
+                bucket = clean_path[bucket_start:bucket_end]
+                prefix = clean_path[bucket_end + 1 :]
+            else:
+                bucket = clean_path[bucket_start:]
+                prefix = ""
+            uri = S3URI("", "", "", bucket, prefix)
+
+        if uri.bucket == "":
+            raise MlflowException("No bucket name found in path: " + path)
+
+        return uri
+
+    def clean_path(self, s3_path):
+        # Must handle paths with s3 prefix
+        start = s3_path.find("s3://")
+        path = ""
+        if start != -1:
+            path = s3_path[start + len("s3://") :]
+            clean_path = "s3://"
+        else:
+            path = s3_path
+            clean_path = ""
+
+        # Must handle paths with https:// or http:// prefix
+        https_start = path.find("https://")
+        if https_start != -1:
+            path = path[https_start + len("https://") :]
+            clean_path += "https://"
+        else:
+            http_start = path.find("http://")
+            if http_start != -1:
+                path = path[http_start + len("http://") :]
+                clean_path += "http://"
+
+        # Remove trailing slashes
+        rtrim_length = len(path.rstrip("/"))
+        if rtrim_length == 0:
+            raise MlflowException("Invalid bucket name: '" + path + "'")
+
+        # Remove leading slashes
+        ltrim_length = len(path) - len(path.lstrip("/"))
+        if ltrim_length == len(path):
+            raise MlflowException("Invalid bucket name: '" + path + "'")
+
+        # Remove extra internal slashes
+        true_path = path[ltrim_length : rtrim_length + 1]
+        previous_slash = False
+        for i in range(len(true_path)):
+            if true_path[i] == "/":
+                if not previous_slash:
+                    clean_path += true_path[i]
+                previous_slash = True
+            else:
+                clean_path += true_path[i]
+                previous_slash = False
+
+        return clean_path
diff --git a/deploy/mlflow-triton-plugin/mlflow_triton/deployments.py b/deploy/mlflow-triton-plugin/mlflow_triton/deployments.py
new file mode 100755
index 0000000000..bebe559b9e
--- /dev/null
+++ b/deploy/mlflow-triton-plugin/mlflow_triton/deployments.py
@@ -0,0 +1,540 @@
+#!/usr/bin/env python3
+
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+import ast
+import glob
+import json
+import logging
+import os
+import shutil
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import tritonclient.http as tritonhttpclient
+from mlflow.deployments import BaseDeploymentClient
+from mlflow.exceptions import MlflowException
+from mlflow.models import Model
+from mlflow.tracking.artifact_utils import _download_artifact_from_uri
+from mlflow_triton.config import Config
+from tritonclient.utils import (
+    InferenceServerException,
+    np_to_triton_dtype,
+    triton_to_np_dtype,
+)
+
+logger = logging.getLogger(__name__)
+
+_MLFLOW_META_FILENAME = "mlflow-meta.json"
+
+
+class TritonPlugin(BaseDeploymentClient):
+    def __init__(self, uri):
+        """
+        Initializes the deployment plugin, sets the triton model repo
+        """
+        super(TritonPlugin, self).__init__(target_uri=uri)
+        self.server_config = Config()
+        triton_url, self.triton_model_repo = self._get_triton_server_config()
+        # need to add other flavors
+        self.supported_flavors = ["triton", "onnx"]
+        # URL cleaning for constructing Triton client
+        ssl = False
+        if triton_url.startswith("http://"):
+            triton_url = triton_url[len("http://") :]
+        elif triton_url.startswith("https://"):
+            triton_url = triton_url[len("https://") :]
+            ssl = True
+        self.triton_client = tritonhttpclient.InferenceServerClient(
+            url=triton_url, ssl=ssl
+        )
+
+    def _get_triton_server_config(self):
+        triton_url = "localhost:8000"
+        if self.server_config["triton_url"]:
+            triton_url = self.server_config["triton_url"]
+        logger.info("Triton url = {}".format(triton_url))
+
+        if not self.server_config["triton_model_repo"]:
+            raise Exception("Check that environment variable TRITON_MODEL_REPO is set")
+        triton_model_repo = self.server_config["triton_model_repo"]
+        logger.info("Triton model repo = {}".format(triton_model_repo))
+
+        return triton_url, triton_model_repo
+
+    def create_deployment(self, name, model_uri, flavor=None, config=None):
+        """
+        Deploy the model at the model_uri to the Triton model repo. Associated config.pbtxt and *labels* files will be deployed.
+
+        :param name: Name of the of the model
+        :param model_uri: Model uri in format model:/<model-name>/<version-or-stage>
+        :param flavor: Flavor of the deployed model
+        :param config: Configuration parameters
+
+        :return: Model flavor and name
+        """
+        self._validate_flavor(flavor)
+
+        # Verify model does not already exist in Triton
+        if self._model_exists(name):
+            raise Exception(
+                "Unable to create deployment for name %s because it already exists."
+                % (name)
+            )
+
+        # Get the path of the artifact
+        path = Path(_download_artifact_from_uri(model_uri))
+        self._copy_files_to_triton_repo(path, name, flavor)
+        self._generate_mlflow_meta_file(name, flavor, model_uri)
+
+        try:
+            self.triton_client.load_model(name)
+        except InferenceServerException as ex:
+            raise MlflowException(str(ex))
+
+        return {"name": name, "flavor": flavor}
+
+    def delete_deployment(self, name):
+        """
+        Delete the deployed model in Triton with the provided model name
+
+        :param name: Name of the of the model with version number. For ex: "densenet_onnx/2"
+
+        :return: None
+        """
+        # Verify model is already deployed to Triton
+        if not self._model_exists(name):
+            raise Exception(
+                "Unable to delete deployment for name %s because it does not exist."
+                % (name)
+            )
+
+        try:
+            self.triton_client.unload_model(name)
+        except InferenceServerException as ex:
+            raise MlflowException(str(ex))
+
+        self._delete_deployment_files(name)
+
+        return None
+
+    def update_deployment(self, name, model_uri=None, flavor=None, config=None):
+        """
+        Update the model deployment in triton with the provided name
+
+        :param name: Name and version number of the model, <model_name>/<version>.
+        :param model_uri: Model uri models:/model_name/version
+        :param flavor: The flavor of the model
+        :param config: Configuration parameters
+
+        :return: Returns the flavor of the model
+        """
+        # TODO: Update this function with a warning. If config and label files associated with this
+        # updated model are different than the ones already deployed to triton, issue a warning to the user.
+        self._validate_flavor(flavor)
+
+        # Verify model is already deployed to Triton
+        if not self._model_exists(name):
+            raise Exception(
+                "Unable to update deployment for name %s because it does not exist."
+                % (name)
+            )
+
+        self.get_deployment(name)
+
+        # Get the path of the artifact
+        path = Path(_download_artifact_from_uri(model_uri))
+
+        self._copy_files_to_triton_repo(path, name, flavor)
+
+        self._generate_mlflow_meta_file(name, flavor, model_uri)
+
+        try:
+            self.triton_client.load_model(name)
+        except InferenceServerException as ex:
+            raise MlflowException(str(ex))
+
+        return {"flavor": flavor}
+
+    def list_deployments(self):
+        """
+        List models deployed to Triton.
+
+        :return: None
+        """
+        resp = self.triton_client.get_model_repository_index()
+        actives = []
+        for d in resp:
+            if "state" in d and d["state"] == "READY":
+                mlflow_meta_path = os.path.join(
+                    self.triton_model_repo, d["name"], _MLFLOW_META_FILENAME
+                )
+                if "s3" in self.server_config:
+                    meta_dict = ast.literal_eval(
+                        self.server_config["s3"]
+                        .get_object(
+                            Bucket=self.server_config["s3_bucket"],
+                            Key=os.path.join(
+                                self.server_config["s3_prefix"],
+                                d["name"],
+                                _MLFLOW_META_FILENAME,
+                            ),
+                        )["Body"]
+                        .read()
+                        .decode("utf-8")
+                    )
+                elif os.path.isfile(mlflow_meta_path):
+                    meta_dict = self._get_mlflow_meta_dict(d["name"])
+                else:
+                    continue
+
+                d["triton_model_path"] = meta_dict["triton_model_path"]
+                d["mlflow_model_uri"] = meta_dict["mlflow_model_uri"]
+                d["flavor"] = meta_dict["flavor"]
+                actives.append(d)
+
+        return actives
+
+    def get_deployment(self, name):
+        """
+        Get deployment from Triton.
+
+        :param name: Name of the model. \n
+                     Ex: "mini_bert_onnx" - gets the details of active version of this model \n
+
+        :return: output - Returns a dict with model info
+        """
+        deployments = self.list_deployments()
+        for d in deployments:
+            if d["name"] == name:
+                return d
+        raise ValueError(f"Unable to get deployment with name {name}")
+
+    def predict(self, deployment_name, df):
+        single_input_np = None
+        if isinstance(df, np.ndarray):
+            single_input_np = df
+
+        inputs = []
+        if single_input_np is not None:
+            raise MlflowException("Unnamed input is not currently supported")
+        else:
+            if isinstance(df, pd.DataFrame):
+                model_metadata = self.triton_client.get_model_metadata(deployment_name)
+                input_dtype = {}
+                for input in model_metadata["inputs"]:
+                    input_dtype[input["name"]] = triton_to_np_dtype(input["datatype"])
+                # Sanity check
+                if len(df.columns) != 1:
+                    raise MlflowException("Expect Pandas DataFrame has only 1 column")
+                col = df.columns[0]
+                for row in df.index:
+                    val = df[col][row]
+                    # Need to form numpy array of the data type expected
+                    if type(df[col][row]) != np.ndarray:
+                        val = np.array(val, dtype=input_dtype[row])
+                    inputs.append(
+                        tritonhttpclient.InferInput(
+                            row, val.shape, np_to_triton_dtype(val.dtype)
+                        )
+                    )
+                    inputs[-1].set_data_from_numpy(val)
+            else:
+                for key, val in df.items():
+                    inputs.append(
+                        tritonhttpclient.InferInput(
+                            key, val.shape, np_to_triton_dtype(val.dtype)
+                        )
+                    )
+                    inputs[-1].set_data_from_numpy(val)
+
+        try:
+            resp = self.triton_client.infer(model_name=deployment_name, inputs=inputs)
+            res = {}
+            for output in resp.get_response()["outputs"]:
+                res[output["name"]] = resp.as_numpy(output["name"])
+            return pd.DataFrame.from_dict({"outputs": res})
+        except InferenceServerException as ex:
+            raise MlflowException(str(ex))
+
+    def _generate_mlflow_meta_file(self, name, flavor, model_uri):
+        triton_deployment_dir = os.path.join(self.triton_model_repo, name)
+        meta_dict = {
+            "name": name,
+            "triton_model_path": triton_deployment_dir,
+            "mlflow_model_uri": model_uri,
+            "flavor": flavor,
+        }
+
+        if "s3" in self.server_config:
+            self.server_config["s3"].put_object(
+                Body=json.dumps(meta_dict, indent=4).encode("utf-8"),
+                Bucket=self.server_config["s3_bucket"],
+                Key=os.path.join(
+                    self.server_config["s3_prefix"], name, _MLFLOW_META_FILENAME
+                ),
+            )
+        else:
+            with open(
+                os.path.join(triton_deployment_dir, _MLFLOW_META_FILENAME), "w"
+            ) as outfile:
+                json.dump(meta_dict, outfile, indent=4)
+
+        print("Saved", _MLFLOW_META_FILENAME, "to", triton_deployment_dir)
+
+    def _get_mlflow_meta_dict(self, name):
+        mlflow_meta_path = os.path.join(
+            self.triton_model_repo, name, _MLFLOW_META_FILENAME
+        )
+
+        if "s3" in self.server_config:
+            mlflow_meta_dict = ast.literal_eval(
+                self.server_config["s3"]
+                .get_object(
+                    Bucket=self.server_config["s3_bucket"],
+                    Key=os.path.join(
+                        self.server_config["s3_prefix"], name, _MLFLOW_META_FILENAME
+                    ),
+                )["Body"]
+                .read()
+                .decode("utf-8")
+            )
+        else:
+            with open(mlflow_meta_path, "r") as metafile:
+                mlflow_meta_dict = json.load(metafile)
+
+        return mlflow_meta_dict
+
+    def _get_copy_paths(self, artifact_path, name, flavor):
+        copy_paths = {}
+        copy_paths["model_path"] = {}
+        triton_deployment_dir = os.path.join(self.triton_model_repo, name)
+        if flavor == "triton":
+            # When flavor is 'triton', the model is assumed to be preconfigured
+            # with proper model versions and version strategy, which may differ from
+            # the versioning in MLFlow
+            for file in artifact_path.iterdir():
+                if file.is_dir():
+                    copy_paths["model_path"]["from"] = file
+                    break
+            copy_paths["model_path"]["to"] = triton_deployment_dir
+        elif flavor == "onnx":
+            # Look for model file via MLModel metadata or iterating dir
+            model_file = None
+            config_file = None
+            for file in artifact_path.iterdir():
+                if file.name == "MLmodel":
+                    mlmodel = Model.load(file)
+                    onnx_meta_data = mlmodel.flavors.get("onnx", None)
+                    if onnx_meta_data is not None:
+                        model_file = onnx_meta_data.get("data", None)
+                elif file.name == "config.pbtxt":
+                    config_file = file.name
+                    copy_paths["config_path"] = {}
+                elif file.suffix == ".txt" and file.stem != "requirements":
+                    copy_paths[file.stem] = {"from": file, "to": triton_deployment_dir}
+            if model_file is None:
+                for file in artifact_path.iterdir():
+                    if file.suffix == ".onnx":
+                        model_file = file.name
+                        break
+            copy_paths["model_path"]["from"] = os.path.join(artifact_path, model_file)
+            copy_paths["model_path"]["to"] = os.path.join(triton_deployment_dir, "1")
+
+            if config_file is not None:
+                copy_paths["config_path"]["from"] = os.path.join(
+                    artifact_path, config_file
+                )
+                copy_paths["config_path"]["to"] = triton_deployment_dir
+            else:
+                # Make sure the directory has been created for config.pbtxt
+                os.makedirs(triton_deployment_dir, exist_ok=True)
+                # Provide a minimum config file so Triton knows what backend
+                # should be performing the auto-completion
+                config = """
+backend: "onnxruntime"
+default_model_filename: "{}"
+""".format(
+                    model_file
+                )
+                with open(
+                    os.path.join(triton_deployment_dir, "config.pbtxt"), "w"
+                ) as cfile:
+                    cfile.write(config)
+        return copy_paths
+
+    def _walk(self, path):
+        """Walk a path like os.walk() if path is dir,
+        return file in the expected format otherwise.
+        :param path: dir or file path
+
+        :return: root, dirs, files
+        """
+        if os.path.isfile(path):
+            return [(os.path.dirname(path), [], [os.path.basename(path)])]
+        elif os.path.isdir(path):
+            return list(os.walk(path))
+        else:
+            raise Exception(f"path: {path} is not a valid path to a file or dir.")
+
+    def _copy_files_to_triton_repo(self, artifact_path, name, flavor):
+        copy_paths = self._get_copy_paths(artifact_path, name, flavor)
+        for key in copy_paths:
+            if "s3" in self.server_config:
+                # copy model dir to s3 recursively
+                for root, dirs, files in self._walk(copy_paths[key]["from"]):
+                    for filename in files:
+                        local_path = os.path.join(root, filename)
+
+                        if flavor == "onnx":
+                            s3_path = os.path.join(
+                                self.server_config["s3_prefix"],
+                                copy_paths[key]["to"]
+                                .replace(self.server_config["triton_model_repo"], "")
+                                .strip("/"),
+                                filename,
+                            )
+
+                        elif flavor == "triton":
+                            rel_path = os.path.relpath(
+                                local_path,
+                                copy_paths[key]["from"],
+                            )
+                            s3_path = os.path.join(
+                                self.server_config["s3_prefix"], name, rel_path
+                            )
+
+                        self.server_config["s3"].upload_file(
+                            local_path,
+                            self.server_config["s3_bucket"],
+                            s3_path,
+                        )
+            else:
+                if os.path.isdir(copy_paths[key]["from"]):
+                    if os.path.isdir(copy_paths[key]["to"]):
+                        shutil.rmtree(copy_paths[key]["to"])
+                    shutil.copytree(copy_paths[key]["from"], copy_paths[key]["to"])
+                else:
+                    if not os.path.isdir(copy_paths[key]["to"]):
+                        os.makedirs(copy_paths[key]["to"])
+                    shutil.copy(copy_paths[key]["from"], copy_paths[key]["to"])
+
+        if "s3" not in self.server_config:
+            triton_deployment_dir = os.path.join(self.triton_model_repo, name)
+            version_folder = os.path.join(triton_deployment_dir, "1")
+            os.makedirs(version_folder, exist_ok=True)
+
+        return copy_paths
+
+    def _delete_mlflow_meta(self, filepath):
+        if "s3" in self.server_config:
+            self.server_config["s3"].delete_object(
+                Bucket=self.server_config["s3_bucket"],
+                Key=filepath,
+            )
+        elif os.path.isfile(filepath):
+            os.remove(filepath)
+
+    def _delete_deployment_files(self, name):
+        triton_deployment_dir = os.path.join(self.triton_model_repo, name)
+
+        if "s3" in self.server_config:
+            objs = self.server_config["s3"].list_objects(
+                Bucket=self.server_config["s3_bucket"],
+                Prefix=os.path.join(self.server_config["s3_prefix"], name),
+            )
+
+            for key in objs["Contents"]:
+                key = key["Key"]
+                try:
+                    self.server_config["s3"].delete_object(
+                        Bucket=self.server_config["s3_bucket"],
+                        Key=key,
+                    )
+                except Exception as e:
+                    raise Exception(f"Could not delete {key}: {e}")
+
+        else:
+            # Check if the deployment directory exists
+            if not os.path.isdir(triton_deployment_dir):
+                raise Exception(
+                    "A deployment does not exist for this model in directory {} for model name {}".format(
+                        triton_deployment_dir, name
+                    )
+                )
+
+            model_file = glob.glob("{}/model*".format(triton_deployment_dir))
+            for file in model_file:
+                print("Model directory found: {}".format(file))
+                os.remove(file)
+                print("Model directory removed: {}".format(file))
+
+        # Delete mlflow meta file
+        mlflow_meta_path = os.path.join(
+            self.triton_model_repo, name, _MLFLOW_META_FILENAME
+        )
+        self._delete_mlflow_meta(mlflow_meta_path)
+
+    def _validate_config_args(self, config):
+        if not config["version"]:
+            raise Exception("Please provide the version as a config argument")
+        if not config["version"].isdigit():
+            raise ValueError(
+                "Please make sure version is a number. version = {}".format(
+                    config["version"]
+                )
+            )
+
+    def _validate_flavor(self, flavor):
+        if flavor not in self.supported_flavors:
+            raise Exception("{} model flavor not supported by Triton".format(flavor))
+
+    def _model_exists(self, name):
+        deploys = self.list_deployments()
+        exists = False
+        for d in deploys:
+            if d["name"] == name:
+                exists = True
+        return exists
+
+
+def run_local(name, model_uri, flavor=None, config=None):
+    raise NotImplementedError("run_local has not been implemented yet")
+
+
+def target_help():
+    help_msg = (
+        "\nmlflow-triton plugin integrates the Triton Inference Server to the mlflow deployment pipeline. \n\n "
+        "Example command: \n\n"
+        '  mlflow deployments create -t triton --name mymodel --flavor onnx -m models:/mymodel/Production -C "version=1" \n\n'
+        "The environment variable TRITON_MODEL_REPO must be set to the location that the Triton"
+        "Inference Server is storing its models\n\n"
+        "export TRITON_MODEL_REPO = /path/to/triton/model/repo\n\n"
+        "Use the following config options:\n\n"
+        "- version: The version of the model to be released. This config will be used by Triton to create a new model sub-directory.\n"
+    )
+    return help_msg
diff --git a/deploy/mlflow-triton-plugin/scripts/publish_model_to_mlflow.py b/deploy/mlflow-triton-plugin/scripts/publish_model_to_mlflow.py
new file mode 100755
index 0000000000..779d393020
--- /dev/null
+++ b/deploy/mlflow-triton-plugin/scripts/publish_model_to_mlflow.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python3
+
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+import os
+
+import click
+import mlflow
+import triton_flavor
+
+
+@click.command()
+@click.option(
+    "--model_name",
+    help="Model name",
+)
+@click.option(
+    "--model_directory",
+    type=click.Path(exists=True, readable=True),
+    required=True,
+    help="Model filepath",
+)
+@click.option(
+    "--flavor",
+    type=click.Choice(["triton"], case_sensitive=True),
+    required=True,
+    help="Model flavor",
+)
+def publish_to_mlflow(model_name, model_directory, flavor):
+    mlflow_tracking_uri = os.environ["MLFLOW_TRACKING_URI"]
+    artifact_path = "triton"
+
+    mlflow.set_tracking_uri(uri=mlflow_tracking_uri)
+
+    with mlflow.start_run() as run:
+        if flavor == "triton":
+            triton_flavor.log_model(
+                model_directory,
+                artifact_path=artifact_path,
+                registered_model_name=model_name,
+            )
+        else:
+            # Enhancement, for model in other flavor (framework) that Triton
+            # supports, try to format it in Triton style and provide
+            # config.pbtxt file. Should this be done in the plugin?
+            raise Exception("Other flavor is not supported")
+
+        print(mlflow.get_artifact_uri())
+
+
+if __name__ == "__main__":
+    publish_to_mlflow()
diff --git a/deploy/mlflow-triton-plugin/scripts/triton_flavor.py b/deploy/mlflow-triton-plugin/scripts/triton_flavor.py
new file mode 100755
index 0000000000..7b0f61630d
--- /dev/null
+++ b/deploy/mlflow-triton-plugin/scripts/triton_flavor.py
@@ -0,0 +1,111 @@
+#!/usr/bin/env python3
+
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+The ``triton`` module provides APIs for logging and loading Triton-recognized
+models in the MLflow Model format. This module exports MLflow Models with the following
+flavors:
+
+Triton format
+    model files in the structure that Triton can load the model from.
+
+"""
+import os
+import shutil
+import sys
+
+from mlflow.exceptions import MlflowException
+from mlflow.models import Model
+from mlflow.models.model import MLMODEL_FILE_NAME
+from mlflow.protos.databricks_pb2 import RESOURCE_ALREADY_EXISTS
+from mlflow.tracking._model_registry import DEFAULT_AWAIT_MAX_SLEEP_SECONDS
+from mlflow.utils.annotations import experimental
+
+FLAVOR_NAME = "triton"
+
+
+@experimental
+def save_model(
+    triton_model_path,
+    path,
+    mlflow_model=None,
+):
+    """
+    Save an Triton model to a path on the local file system.
+
+    :param triton_model_path: File path to Triton model to be saved.
+    :param path: Local path where the model is to be saved.
+    :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to.
+
+    """
+
+    path = os.path.abspath(path)
+    if os.path.exists(path):
+        raise MlflowException(
+            message="Path '{}' already exists".format(path),
+            error_code=RESOURCE_ALREADY_EXISTS,
+        )
+    os.makedirs(path)
+    triton_model_path = os.path.normpath(triton_model_path)
+    model_data_subpath = os.path.basename(triton_model_path)
+    model_data_path = os.path.join(path, model_data_subpath)
+
+    # Save Triton model
+    shutil.copytree(triton_model_path, model_data_path)
+
+    mlflow_model.add_flavor(FLAVOR_NAME, data=model_data_subpath)
+    mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))
+
+
+@experimental
+def log_model(
+    triton_model_path,
+    artifact_path,
+    registered_model_name=None,
+    await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS,
+):
+    """
+    Log an Triton model as an MLflow artifact for the current run.
+
+    :param triton_model_path: File path to Triton model.
+    :param artifact_path: Run-relative artifact path.
+    :param registered_model_name: (Experimental) If given, create a model version under
+                                  ``registered_model_name``, also creating a registered model if one
+                                  with the given name does not exist.
+
+    :param await_registration_for: Number of seconds to wait for the model version to finish
+                            being created and is in ``READY`` status. By default, the function
+                            waits for five minutes. Specify 0 or None to skip waiting.
+
+    """
+    Model.log(
+        artifact_path=artifact_path,
+        flavor=sys.modules[__name__],
+        triton_model_path=triton_model_path,
+        registered_model_name=registered_model_name,
+        await_registration_for=await_registration_for,
+    )
diff --git a/deploy/mlflow-triton-plugin/setup.py b/deploy/mlflow-triton-plugin/setup.py
new file mode 100755
index 0000000000..65b8e0df1e
--- /dev/null
+++ b/deploy/mlflow-triton-plugin/setup.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python3
+
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+from setuptools import find_packages, setup
+
+setup(
+    name="mlflow-triton",
+    version="0.2.0",
+    description="Triton Mlflow Deployment",
+    long_description=open("README.md").read(),
+    long_description_content_type="text/markdown",
+    packages=find_packages(),
+    install_requires=["mlflow>=2.2.1,<3.0", "tritonclient[all]", "boto3"],
+    entry_points={"mlflow.deployments": "triton=mlflow_triton.deployments"},
+)
diff --git a/deploy/oci/Chart.yaml b/deploy/oci/Chart.yaml
new file mode 100644
index 0000000000..2b7541bee6
--- /dev/null
+++ b/deploy/oci/Chart.yaml
@@ -0,0 +1,31 @@
+# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+apiVersion: v1
+appVersion: "1.0"
+description: Triton Inference Server
+name: triton-inference-server
+version: 1.0.0
diff --git a/deploy/oci/README.md b/deploy/oci/README.md
new file mode 100644
index 0000000000..dc293c7378
--- /dev/null
+++ b/deploy/oci/README.md
@@ -0,0 +1,306 @@
+<!--
+# Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+[![License](https://img.shields.io/badge/License-BSD3-lightgrey.svg)](https://opensource.org/licenses/BSD-3-Clause)
+
+# Kubernetes Deploy: Triton Inference Server Cluster
+
+A helm chart for installing a single cluster of Triton Inference
+Server is provided. By default the cluster contains a single instance
+of the inference server but the *replicaCount* configuration parameter
+can be set to create a cluster of any size, as described below.
+
+This guide assumes you already have a functional Kubernetes cluster
+and helm installed (see below for instructions on installing
+helm). Note the following requirements:
+
+* The helm chart deploys Prometheus and Grafana to collect and display Triton metrics. To use this helm chart you must install Prometheus and Grafana in your cluster as described below and your cluster must contain sufficient CPU resources to support these services.
+
+* If you want Triton Server to use GPUs for inferencing, your cluster
+must be configured to contain the desired number of GPU nodes (A10 GPU instances recommended)
+with support for the NVIDIA driver and CUDA version required by the version
+of the inference server you are using.
+
+The steps below describe how to set-up a model repository, use helm to
+launch the inference server, and then send inference requests to the
+running server. You can access a Grafana endpoint to see real-time
+metrics reported by the inference server.
+
+## Notes for OKE cluster
+
+When creating your node pool, the default value for the boot volume is 46.6GB.
+Due to the size of the server container, it is recommended to increase this value
+to 150GB and set a [cloud-init script to increase the partition](https://blogs.oracle.com/ateam/post/oke-node-sizing-for-very-large-container-images):
+
+```
+#!/bin/bash
+curl --fail -H "Authorization: Bearer Oracle" -L0 http://169.254.169.254/opc/v2/instance/metadata/oke_init_script | base64 --decode >/var/run/oke-init.sh
+bash /var/run/oke-init.sh
+sudo /usr/libexec/oci-growfs -y
+```
+
+
+## Installing Helm
+
+### Using Cloud Shell from OCI Web Console
+
+It is possible to access your OKE Cluster [directly from the OCI Web Console](https://docs.oracle.com/en-us/iaas/Content/ContEng/Tasks/contengaccessingclusterkubectl.htm).
+Helm v3 is already available from the Cloud Shell.
+
+### Helm v3
+
+If you do not already have Helm installed in your Kubernetes cluster,
+executing the following steps from the [official helm install
+guide](https://helm.sh/docs/intro/install/) will
+give you a quick setup.
+
+If you're currently using Helm v2 and would like to migrate to Helm v3,
+please see the [official migration guide](https://helm.sh/docs/topics/v2_v3_migration/).
+
+### Helm v2
+
+> **NOTE**: Moving forward this chart will only be tested and maintained for Helm v3.
+
+Below are example instructions for installing Helm v2.
+
+```
+$ curl https://raw.githubusercontent.com/helm/helm/master/scripts/get | bash
+$ kubectl create serviceaccount -n kube-system tiller
+serviceaccount/tiller created
+$ kubectl create clusterrolebinding tiller-cluster-rule --clusterrole=cluster-admin --serviceaccount=kube-system:tiller
+$ helm init --service-account tiller --wait
+```
+
+If you run into any issues, you can refer to the official installation guide [here](https://v2.helm.sh/docs/install/).
+
+## Model Repository
+
+If you already have a model repository you may use that with this helm
+chart. If you do not have a model repository, you can checkout a local
+copy of the inference server source repository to create an example
+model repository:
+
+```
+$ git clone https://github.com/triton-inference-server/server.git
+```
+
+Triton Server needs a repository of models that it will make available
+for inferencing. For this example you will place the model repository
+in an S3 compatible OCI Object Storage Bucket.
+
+```
+$ oci os bucket create --compartment-id <COMPARTMENT_OCID> --name triton-inference-server-repository
+```
+
+Following the [QuickStart](../../docs/getting_started/quickstart.md) download the
+example model repository to your system and copy it into the OCI
+Bucket.
+
+```
+$ oci os object bulk-upload -bn triton-inference-server-repository --src-dir docs/examples/model_repository/
+```
+
+### OCI Model Repository
+To load the model from the OCI Object Storage Bucket, you need to convert the following OCI credentials in the base64 format and add it to the values.yaml
+
+```
+echo -n 'REGION' | base64
+```
+```
+echo -n 'SECRECT_KEY_ID' | base64
+```
+```
+echo -n 'SECRET_ACCESS_KEY' | base64
+```
+
+You also need to adapt _modelRepositoryPath_ in values.yaml to your [namespace](https://docs.oracle.com/en-us/iaas/Content/Object/Tasks/understandingnamespaces.htm) and [OCI region](https://docs.oracle.com/en-us/iaas/Content/General/Concepts/regions.htm).
+
+```
+s3://https://<OCI_NAMESPACE>.compat.objectstorage.<OCI_REGION>.oraclecloud.com:443/triton-inference-server-repository
+```
+
+## Deploy Prometheus and Grafana
+
+The inference server metrics are collected by Prometheus and viewable
+by Grafana. The inference server helm chart assumes that Prometheus
+and Grafana are available so this step must be followed even if you
+don't want to use Grafana.
+
+Use the [kube-prometheus-stack](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack) to install these components. The
+*serviceMonitorSelectorNilUsesHelmValues* flag is needed so that
+Prometheus can find the inference server metrics in the *example*
+release deployed below.
+
+```
+$ helm install example-metrics --set prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues=false prometheus-community/kube-prometheus-stack
+```
+
+Then port-forward to the Grafana service so you can access it from
+your local browser.
+
+```
+$ kubectl port-forward service/example-metrics-grafana 8080:80
+```
+
+Now you should be able to navigate in your browser to localhost:8080
+and see the Grafana login page. Use username=admin and
+password=prom-operator to login.
+
+Note that it is also possible to set a load balancer service for the grafana dashboard
+by running:
+
+```
+$ helm install example-metrics --set prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues=false --set grafana.service.type=LoadBalancer prometheus-community/kube-prometheus-stack
+```
+
+You can then see the Public IP of you grafana dashboard by running:
+
+```
+$ kubectl get svc
+NAME                                       TYPE           CLUSTER-IP     EXTERNAL-IP       PORT(S)                      AGE
+alertmanager-operated                      ClusterIP      None           <none>            9093/TCP,9094/TCP,9094/UDP   2m33s
+example-metrics-grafana                    LoadBalancer   10.96.82.33    141.145.220.114   80:31005/TCP                 2m38s
+```
+
+The default load balancer created comes with a fixed shape and a bandwidth of 100Mbps. You can switch to a [flexible](https://docs.oracle.com/en-us/iaas/Content/ContEng/Tasks/contengcreatingloadbalancers-subtopic.htm#contengcreatingloadbalancers_subtopic) shape and adapt the bandwidth according to your OCI limits in case the bandwidth is a bottleneck.
+
+
+An example Grafana dashboard is available in dashboard.json. Use the
+import function in Grafana to import and view this dashboard.
+
+## Deploy the Inference Server
+
+Deploy the inference server using the default configuration with the
+following commands.
+
+```
+$ cd <directory containing Chart.yaml>
+$ helm install example .
+```
+
+Use kubectl to see status and wait until the inference server pods are
+running.
+
+```
+$ kubectl get pods
+NAME                                               READY   STATUS    RESTARTS   AGE
+example-triton-inference-server-5f74b55885-n6lt7   1/1     Running   0          2m21s
+```
+
+There are several ways of overriding the default configuration as
+described in this [helm
+documentation](https://helm.sh/docs/using_helm/#customizing-the-chart-before-installing).
+
+You can edit the values.yaml file directly or you can use the *--set*
+option to override a single parameter with the CLI. For example, to
+deploy a cluster of four inference servers use *--set* to set the
+replicaCount parameter.
+
+```
+$ helm install example --set replicaCount=4 .
+```
+
+You can also write your own "config.yaml" file with the values you
+want to override and pass it to helm.
+
+```
+$ cat << EOF > config.yaml
+namespace: MyCustomNamespace
+image:
+  imageName: nvcr.io/nvidia/tritonserver:custom-tag
+  modelRepositoryPath: s3://https://<OCI_NAMESPACE>.compat.objectstorage.<OCI_REGION>.oraclecloud.com:443/triton-inference-server-repository
+EOF
+$ helm install example -f config.yaml .
+```
+
+## Using Triton Inference Server
+
+Now that the inference server is running you can send HTTP or GRPC
+requests to it to perform inferencing. By default, the inferencing
+service is exposed with a LoadBalancer service type. Use the following
+to find the external IP for the inference server. In this case it is
+34.83.9.133.
+
+```
+$ kubectl get services
+NAME                             TYPE           CLUSTER-IP     EXTERNAL-IP   PORT(S)                                        AGE
+...
+example-triton-inference-server  LoadBalancer   10.18.13.28    34.83.9.133   8000:30249/TCP,8001:30068/TCP,8002:32723/TCP   47m
+```
+
+The inference server exposes an HTTP endpoint on port 8000, and GRPC
+endpoint on port 8001 and a Prometheus metrics endpoint on
+port 8002. You can use curl to get the meta-data of the inference server
+from the HTTP endpoint.
+
+```
+$ curl 34.83.9.133:8000/v2
+```
+
+Follow the [QuickStart](../../docs/getting_started/quickstart.md) to get the example
+image classification client that can be used to perform inferencing
+using image classification models being served by the inference
+server. For example,
+
+```
+$ image_client -u 34.83.9.133:8000 -m inception_graphdef -s INCEPTION -c3 mug.jpg
+Request 0, batch size 1
+Image 'images/mug.jpg':
+    504 (COFFEE MUG) = 0.723992
+    968 (CUP) = 0.270953
+    967 (ESPRESSO) = 0.00115997
+```
+
+## Cleanup
+
+Once you've finished using the inference server you should use helm to
+delete the deployment.
+
+```
+$ helm list
+NAME            REVISION  UPDATED                   STATUS    CHART                          APP VERSION   NAMESPACE
+example         1         Wed Feb 27 22:16:55 2019  DEPLOYED  triton-inference-server-1.0.0  1.0           default
+example-metrics	1       	Tue Jan 21 12:24:07 2020	DEPLOYED	prometheus-operator-6.18.0   	 0.32.0     	 default
+
+$ helm uninstall example
+$ helm uninstall example-metrics
+```
+
+For the Prometheus and Grafana services, you should [explicitly delete
+CRDs](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack#uninstall-helm-chart):
+
+```
+$ kubectl delete crd alertmanagerconfigs.monitoring.coreos.com alertmanagers.monitoring.coreos.com podmonitors.monitoring.coreos.com probes.monitoring.coreos.com prometheuses.monitoring.coreos.com prometheusrules.monitoring.coreos.com servicemonitors.monitoring.coreos.com thanosrulers.monitoring.coreos.com
+```
+
+You may also want to delete the OCI bucket you created to hold the
+model repository.
+
+```
+$ oci os bucket delete --bucket-name triton-inference-server-repository --empty
+```
diff --git a/deploy/oci/dashboard.json b/deploy/oci/dashboard.json
new file mode 100644
index 0000000000..8960b41d35
--- /dev/null
+++ b/deploy/oci/dashboard.json
@@ -0,0 +1,411 @@
+{
+  "__inputs": [
+    {
+      "name": "DS_PROMETHEUS",
+      "label": "Prometheus",
+      "description": "",
+      "type": "datasource",
+      "pluginId": "prometheus",
+      "pluginName": "Prometheus"
+    }
+  ],
+  "__requires": [
+    {
+      "type": "grafana",
+      "id": "grafana",
+      "name": "Grafana",
+      "version": "6.3.5"
+    },
+    {
+      "type": "panel",
+      "id": "graph",
+      "name": "Graph",
+      "version": ""
+    },
+    {
+      "type": "panel",
+      "id": "heatmap",
+      "name": "Heatmap",
+      "version": ""
+    },
+    {
+      "type": "datasource",
+      "id": "prometheus",
+      "name": "Prometheus",
+      "version": "1.0.0"
+    }
+  ],
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": "-- Grafana --",
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "type": "dashboard"
+      }
+    ]
+  },
+  "editable": true,
+  "gnetId": null,
+  "graphTooltip": 0,
+  "id": null,
+  "links": [],
+  "panels": [
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": "${DS_PROMETHEUS}",
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 9,
+        "w": 12,
+        "x": 0,
+        "y": 0
+      },
+      "id": 2,
+      "legend": {
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "show": true,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "nullPointMode": "null",
+      "options": {
+        "dataLinks": []
+      },
+      "percentage": false,
+      "pointradius": 2,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "nv_inference_request_success",
+          "legendFormat": "Success {{instance}}",
+          "refId": "A"
+        },
+        {
+          "expr": "nv_inference_request_failure",
+          "legendFormat": "Failure {{instance}}",
+          "refId": "B"
+        }
+      ],
+      "thresholds": [],
+      "timeFrom": null,
+      "timeRegions": [],
+      "timeShift": null,
+      "title": "Cumulative Inference Requests",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        },
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": false
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    },
+    {
+      "cards": {
+        "cardPadding": null,
+        "cardRound": null
+      },
+      "color": {
+        "cardColor": "#b4ff00",
+        "colorScale": "sqrt",
+        "colorScheme": "interpolateReds",
+        "exponent": 0.5,
+        "mode": "spectrum"
+      },
+      "dataFormat": "timeseries",
+      "gridPos": {
+        "h": 9,
+        "w": 12,
+        "x": 12,
+        "y": 0
+      },
+      "heatmap": {},
+      "hideZeroBuckets": false,
+      "highlightCards": true,
+      "id": 7,
+      "legend": {
+        "show": false
+      },
+      "options": {},
+      "reverseYBuckets": false,
+      "targets": [
+        {
+          "expr": "sum(increase(nv_inference_load_ratio_bucket[1m])) by (le)",
+          "legendFormat": "",
+          "refId": "A"
+        }
+      ],
+      "timeFrom": null,
+      "timeShift": null,
+      "title": "Load Ratio  (Total Time / Compute Time)",
+      "tooltip": {
+        "show": true,
+        "showHistogram": false
+      },
+      "type": "heatmap",
+      "xAxis": {
+        "show": true
+      },
+      "xBucketNumber": null,
+      "xBucketSize": null,
+      "yAxis": {
+        "decimals": null,
+        "format": "short",
+        "logBase": 1,
+        "max": null,
+        "min": null,
+        "show": true,
+        "splitFactor": null
+      },
+      "yBucketBound": "auto",
+      "yBucketNumber": null,
+      "yBucketSize": null
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": "${DS_PROMETHEUS}",
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 9
+      },
+      "id": 4,
+      "legend": {
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "show": true,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "nullPointMode": "null",
+      "options": {
+        "dataLinks": []
+      },
+      "percentage": false,
+      "pointradius": 2,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "rate(nv_inference_queue_duration_us[30s]) / 1000",
+          "legendFormat": "{{instance}}",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [],
+      "timeFrom": null,
+      "timeRegions": [],
+      "timeShift": null,
+      "title": "Queue Time (milliseconds)",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "short",
+          "label": "Queue Time (ms)",
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        },
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": false
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": "${DS_PROMETHEUS}",
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 9
+      },
+      "id": 5,
+      "legend": {
+        "avg": false,
+        "current": false,
+        "max": false,
+        "min": false,
+        "show": true,
+        "total": false,
+        "values": false
+      },
+      "lines": true,
+      "linewidth": 1,
+      "nullPointMode": "null",
+      "options": {
+        "dataLinks": []
+      },
+      "percentage": false,
+      "pointradius": 2,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "rate(nv_inference_compute_duration_us[30s]) / 1000",
+          "legendFormat": "{{instance}}",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [],
+      "timeFrom": null,
+      "timeRegions": [],
+      "timeShift": null,
+      "title": "Compute Time (milliseconds)",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "short",
+          "label": "Compute Time (ms)",
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        },
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": false
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    }
+  ],
+  "refresh": "5s",
+  "schemaVersion": 19,
+  "style": "dark",
+  "tags": [],
+  "templating": {
+    "list": []
+  },
+  "time": {
+    "from": "now-15m",
+    "to": "now"
+  },
+  "timepicker": {
+    "refresh_intervals": [
+      "5s",
+      "10s",
+      "30s",
+      "1m",
+      "5m",
+      "15m",
+      "30m",
+      "1h",
+      "2h",
+      "1d"
+    ]
+  },
+  "timezone": "",
+  "title": "Triton Inference Server",
+  "uid": "slEY4dsZk",
+  "version": 8
+}
diff --git a/deploy/oci/templates/_helpers.tpl b/deploy/oci/templates/_helpers.tpl
new file mode 100644
index 0000000000..6dba910012
--- /dev/null
+++ b/deploy/oci/templates/_helpers.tpl
@@ -0,0 +1,92 @@
+{{/*
+# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/}}
+
+{{/* vim: set filetype=mustache: */}}
+{{/*
+Create inference server name.
+*/}}
+{{- define "triton-inference-server.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "triton-inference-server.fullname" -}}
+{{- if .Values.fullnameOverride -}}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- $name := default .Chart.Name .Values.nameOverride -}}
+{{- if contains $name .Release.Name -}}
+{{- .Release.Name | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+{{- end -}}
+{{- end -}}
+
+{{/*
+  Create inference server metrics service name and fullname derived from above and
+  truncated appropriately.
+*/}}
+{{- define "triton-inference-server-metrics.name" -}}
+{{- $basename := include "triton-inference-server.name" . -}}
+{{- $basename_trimmed := $basename | trunc 55 | trimSuffix "-" -}}
+{{- printf "%s-%s" $basename_trimmed "metrics" -}}
+{{- end -}}
+
+{{- define "triton-inference-server-metrics.fullname" -}}
+{{- $basename := include "triton-inference-server.fullname" . -}}
+{{- $basename_trimmed := $basename | trunc 55 | trimSuffix "-" -}}
+{{- printf "%s-%s" $basename_trimmed "metrics" -}}
+{{- end -}}
+
+{{/*
+  Create inference server metrics monitor name and fullname derived from
+  above and truncated appropriately.
+*/}}
+{{- define "triton-inference-server-metrics-monitor.name" -}}
+{{- $basename := include "triton-inference-server.name" . -}}
+{{- $basename_trimmed := $basename | trunc 47 | trimSuffix "-" -}}
+{{- printf "%s-%s" $basename_trimmed "metrics-monitor" -}}
+{{- end -}}
+
+{{- define "triton-inference-server-metrics-monitor.fullname" -}}
+{{- $basename := include "triton-inference-server.fullname" . -}}
+{{- $basename_trimmed := $basename | trunc 47 | trimSuffix "-" -}}
+{{- printf "%s-%s" $basename_trimmed "metrics-monitor" -}}
+{{- end -}}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "triton-inference-server.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
diff --git a/deploy/oci/templates/deployment.yaml b/deploy/oci/templates/deployment.yaml
new file mode 100644
index 0000000000..f374bd181f
--- /dev/null
+++ b/deploy/oci/templates/deployment.yaml
@@ -0,0 +1,100 @@
+# Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ template "triton-inference-server.fullname" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    app: {{ template "triton-inference-server.name" . }}
+    chart: {{ template "triton-inference-server.chart" . }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+      app: {{ template "triton-inference-server.name" . }}
+      release: {{ .Release.Name }}
+  template:
+    metadata:
+      labels:
+        app: {{ template "triton-inference-server.name" . }}
+        release: {{ .Release.Name }}
+
+    spec:
+      containers:
+        - name: {{ .Chart.Name }}
+          image: "{{ .Values.image.imageName }}"
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+
+          resources:
+            limits:
+              nvidia.com/gpu: {{ .Values.image.numGpus }}
+
+          args: ["tritonserver", "--model-store={{ .Values.image.modelRepositoryPath }}",
+                 "--model-control-mode=poll",
+                 "--repository-poll-secs=5"]
+
+          env:
+          - name: AWS_DEFAULT_REGION
+            valueFrom:
+              secretKeyRef:
+                name: oci-credentials
+                key: OCI_DEFAULT_REGION
+          - name: AWS_ACCESS_KEY_ID
+            valueFrom:
+              secretKeyRef:
+                name: oci-credentials
+                key: OCI_ACCESS_KEY_ID
+          - name: AWS_SECRET_ACCESS_KEY
+            valueFrom:
+              secretKeyRef:
+                name: oci-credentials
+                key: OCI_SECRET_ACCESS_KEY
+
+          ports:
+            - containerPort: 8000
+              name: http
+            - containerPort: 8001
+              name: grpc
+            - containerPort: 8002
+              name: metrics
+          livenessProbe:
+            httpGet:
+              path: /v2/health/live
+              port: http
+          readinessProbe:
+            initialDelaySeconds: 5
+            periodSeconds: 5
+            httpGet:
+              path: /v2/health/ready
+              port: http
+
+      securityContext:
+        runAsUser: 1000
+        fsGroup: 1000
diff --git a/deploy/oci/templates/secrets.yaml b/deploy/oci/templates/secrets.yaml
new file mode 100644
index 0000000000..0546fdda9d
--- /dev/null
+++ b/deploy/oci/templates/secrets.yaml
@@ -0,0 +1,35 @@
+# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+apiVersion: v1
+kind: Secret
+metadata:
+  name: oci-credentials
+type: Opaque
+data:
+  OCI_DEFAULT_REGION: {{ .Values.secret.region }}
+  OCI_ACCESS_KEY_ID: {{ .Values.secret.id }}
+  OCI_SECRET_ACCESS_KEY: {{ .Values.secret.key }}
diff --git a/deploy/oci/templates/service.yaml b/deploy/oci/templates/service.yaml
new file mode 100644
index 0000000000..3315fd77db
--- /dev/null
+++ b/deploy/oci/templates/service.yaml
@@ -0,0 +1,91 @@
+# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ template "triton-inference-server.fullname" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    app: {{ template "triton-inference-server.name" . }}
+    chart: {{ template "triton-inference-server.chart" . }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: 8000
+      targetPort: http
+      name: http-inference-server
+    - port: 8001
+      targetPort: grpc
+      name: grpc-inference-server
+    - port: 8002
+      targetPort: metrics
+      name: metrics-inference-server
+  selector:
+    app: {{ template "triton-inference-server.name" . }}
+    release: {{ .Release.Name }}
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ template "triton-inference-server-metrics.fullname" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    app: {{ template "triton-inference-server-metrics.name" . }}
+    chart: {{ template "triton-inference-server.chart" . }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+  annotations:
+    alpha.monitoring.coreos.com/non-namespaced: "true"
+spec:
+  ports:
+  - name: metrics
+    port: 8080
+    targetPort: metrics
+    protocol: TCP
+  selector:
+    app: {{ template "triton-inference-server.name" . }}
+    release: {{ .Release.Name }}
+---
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: {{ template "triton-inference-server-metrics-monitor.fullname" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    app: {{ template "triton-inference-server-metrics-monitor.name" . }}
+    chart: {{ template "triton-inference-server.chart" . }}
+    release: {{ .Release.Name }}
+    heritage: {{ .Release.Service }}
+spec:
+  selector:
+    matchLabels:
+      app: {{ template "triton-inference-server-metrics.name" . }}
+  endpoints:
+  - port: metrics
+    interval: 15s
diff --git a/deploy/oci/values.yaml b/deploy/oci/values.yaml
new file mode 100644
index 0000000000..00d66d2594
--- /dev/null
+++ b/deploy/oci/values.yaml
@@ -0,0 +1,41 @@
+# Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+replicaCount: 1
+
+image:
+  imageName: nvcr.io/nvidia/tritonserver:24.03-py3
+  pullPolicy: IfNotPresent
+  modelRepositoryPath: s3://https://<OCI_NAMESPACE>.compat.objectstorage.<OCI_REGION>.oraclecloud.com:443/triton-inference-server-repository
+  numGpus: 1
+
+service:
+  type: LoadBalancer
+
+secret:
+  region: OCI_REGION
+  id: OCI_SECRET_KEY_ID
+  key: OCI_SECRET_ACCESS_KEY
\ No newline at end of file
diff --git a/docker/README.third-party-src b/docker/README.third-party-src
new file mode 100644
index 0000000000..85f17d11ee
--- /dev/null
+++ b/docker/README.third-party-src
@@ -0,0 +1,5 @@
+This directory contains the licenses and source code for software
+included in the Triton Inference Server build. To extract the files
+use:
+
+  $ tar zxf src.tar.gz
diff --git a/docker/cpu_only/entrypoint.d/12-banner.sh b/docker/cpu_only/entrypoint.d/12-banner.sh
new file mode 100755
index 0000000000..0b4adda84b
--- /dev/null
+++ b/docker/cpu_only/entrypoint.d/12-banner.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+prodname_uc=$(echo "${NVIDIA_PRODUCT_NAME}" | tr [:lower:] [:upper:] | sed 's/ /_/g' | sed 's/^NVIDIA_//')  # Product name
+_prodver="NVIDIA_${prodname_uc}_VERSION" # Container product version variable name
+_compver="${prodname_uc}_VERSION"        # Upstream component version variable name
+
+echo
+echo "NVIDIA Release ${!_prodver} (build ${NVIDIA_BUILD_ID})"
+[ -n "${!_compver}" ] && echo "${NVIDIA_PRODUCT_NAME} Version ${!_compver}"
diff --git a/docker/cpu_only/entrypoint.d/50-gpu-driver-check2.sh b/docker/cpu_only/entrypoint.d/50-gpu-driver-check2.sh
new file mode 100755
index 0000000000..4caa8eeff7
--- /dev/null
+++ b/docker/cpu_only/entrypoint.d/50-gpu-driver-check2.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+export TRITON_SERVER_CPU_ONLY=1
diff --git a/docker/cpu_only/nvidia_entrypoint.sh b/docker/cpu_only/nvidia_entrypoint.sh
new file mode 100755
index 0000000000..82859d1bb6
--- /dev/null
+++ b/docker/cpu_only/nvidia_entrypoint.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+# Copyright 2016-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# Gather parts in alpha order
+shopt -s nullglob extglob
+SCRIPT_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
+declare -a PARTS=( "${SCRIPT_DIR}/entrypoint.d"/*@(.txt|.sh) )
+shopt -u nullglob extglob
+
+# Execute the entrypoint parts
+for file in "${PARTS[@]}"; do
+  case "${file}" in
+    *.txt) cat "${file}";;
+    *.sh)  source "${file}";;
+  esac
+done
+
+echo
+
+# This script can either be a wrapper around arbitrary command lines,
+# or it will simply exec bash if no arguments were given
+if [[ $# -eq 0 ]]; then
+  exec "/bin/bash"
+else
+  exec "$@"
+fi
diff --git a/docker/entrypoint.d/10-banner.txt b/docker/entrypoint.d/10-banner.txt
new file mode 100644
index 0000000000..56a8b28e55
--- /dev/null
+++ b/docker/entrypoint.d/10-banner.txt
@@ -0,0 +1,4 @@
+
+=============================
+== Triton Inference Server ==
+=============================
diff --git a/docker/entrypoint.d/15-container-copyright.txt b/docker/entrypoint.d/15-container-copyright.txt
new file mode 100644
index 0000000000..5e077f288f
--- /dev/null
+++ b/docker/entrypoint.d/15-container-copyright.txt
@@ -0,0 +1,2 @@
+
+Copyright (c) 2018-2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
diff --git a/docker/entrypoint.d/50-gpu-driver-check2.sh b/docker/entrypoint.d/50-gpu-driver-check2.sh
new file mode 100755
index 0000000000..bc22dd55ad
--- /dev/null
+++ b/docker/entrypoint.d/50-gpu-driver-check2.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+if [[ "${NVIDIA_CPU_ONLY:-0}" == "1" ]]; then
+  export TRITON_SERVER_CPU_ONLY=1
+fi
diff --git a/docker/entrypoint.d/56-network-driver-version-check.sh b/docker/entrypoint.d/56-network-driver-version-check.sh
new file mode 100644
index 0000000000..8b13789179
--- /dev/null
+++ b/docker/entrypoint.d/56-network-driver-version-check.sh
@@ -0,0 +1 @@
+
diff --git a/docker/entrypoint.d/70-shm-check.sh b/docker/entrypoint.d/70-shm-check.sh
new file mode 100644
index 0000000000..8b13789179
--- /dev/null
+++ b/docker/entrypoint.d/70-shm-check.sh
@@ -0,0 +1 @@
+
diff --git a/docker/entrypoint.d/99-check-run-aip-mode.sh b/docker/entrypoint.d/99-check-run-aip-mode.sh
new file mode 100755
index 0000000000..ec9249e944
--- /dev/null
+++ b/docker/entrypoint.d/99-check-run-aip-mode.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+# Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+# If detect Vertex AI environment, launch tritonserver with supplied arguments
+
+# This has the effect of "unshifting" the tritonserver command onto the front
+# of $@ if AIP_MODE is nonempty; it will then be exec'd by entrypoint.sh
+set -- ${AIP_MODE:+"/opt/tritonserver/bin/tritonserver"} "$@"
diff --git a/docker/sagemaker/serve b/docker/sagemaker/serve
new file mode 100755
index 0000000000..e9abc00bf5
--- /dev/null
+++ b/docker/sagemaker/serve
@@ -0,0 +1,169 @@
+#!/bin/bash
+# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+SAGEMAKER_SINGLE_MODEL_REPO=/opt/ml/model/
+
+# Use 'ready' for ping check in single-model endpoint mode, and use 'live' for ping check in multi-model endpoint model
+# https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/rest_predict_v2.yaml#L10-L26
+if [ -n "$SAGEMAKER_TRITON_OVERRIDE_PING_MODE" ]; then
+    SAGEMAKER_TRITON_PING_MODE=${SAGEMAKER_TRITON_OVERRIDE_PING_MODE}
+else
+    SAGEMAKER_TRITON_PING_MODE="ready"
+fi
+
+# Note: in Triton on SageMaker, each model url is registered as a separate repository
+# e.g., /opt/ml/models/<hash>/model. Specifying MME model repo path as /opt/ml/models causes Triton
+# to treat it as an additional empty repository and changes
+# the state of all models to be UNAVAILABLE in the model repository
+# https://github.com/triton-inference-server/core/blob/main/src/model_repository_manager.cc#L914,L922
+# On Triton, this path will be a dummy path as it's mandatory to specify a model repo when starting triton
+SAGEMAKER_MULTI_MODEL_REPO=/tmp/sagemaker
+
+SAGEMAKER_MODEL_REPO=${SAGEMAKER_SINGLE_MODEL_REPO}
+is_mme_mode=false
+
+if [ -n "$SAGEMAKER_MULTI_MODEL" ]; then
+    if [ "$SAGEMAKER_MULTI_MODEL" == "true" ]; then
+        mkdir -p ${SAGEMAKER_MULTI_MODEL_REPO}
+        SAGEMAKER_MODEL_REPO=${SAGEMAKER_MULTI_MODEL_REPO}
+        if [ -n "$SAGEMAKER_TRITON_OVERRIDE_PING_MODE" ]; then
+            SAGEMAKER_TRITON_PING_MODE=${SAGEMAKER_TRITON_OVERRIDE_PING_MODE}
+        else
+            SAGEMAKER_TRITON_PING_MODE="live"
+        fi
+        is_mme_mode=true
+        echo -e "Triton is running in SageMaker MME mode. Using Triton ping mode: \"${SAGEMAKER_TRITON_PING_MODE}\""
+    fi
+fi
+
+SAGEMAKER_ARGS="--model-repository=${SAGEMAKER_MODEL_REPO}"
+#Set model namespacing to true, but allow disabling if required
+if [ -n "$SAGEMAKER_TRITON_DISABLE_MODEL_NAMESPACING" ]; then
+    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --model-namespacing=${SAGEMAKER_TRITON_DISABLE_MODEL_NAMESPACING}"
+else
+    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --model-namespacing=true"
+fi
+if [ -n "$SAGEMAKER_BIND_TO_PORT" ]; then
+    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --sagemaker-port=${SAGEMAKER_BIND_TO_PORT}"
+fi
+if [ -n "$SAGEMAKER_SAFE_PORT_RANGE" ]; then
+    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --sagemaker-safe-port-range=${SAGEMAKER_SAFE_PORT_RANGE}"
+fi
+if [ -n "$SAGEMAKER_TRITON_ALLOW_GRPC" ]; then
+    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --allow-grpc=${SAGEMAKER_TRITON_ALLOW_GRPC}"
+else
+    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --allow-grpc=false"
+fi
+if [ -n "$SAGEMAKER_TRITON_ALLOW_METRICS" ]; then
+    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --allow-metrics=${SAGEMAKER_TRITON_ALLOW_METRICS}"
+else
+    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --allow-metrics=false"
+fi
+if [ -n "$SAGEMAKER_TRITON_METRICS_PORT" ]; then
+    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --metrics-port=${SAGEMAKER_TRITON_METRICS_PORT}"
+fi
+if [ -n "$SAGEMAKER_TRITON_GRPC_PORT" ]; then
+    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --grpc-port=${SAGEMAKER_TRITON_GRPC_PORT}"
+fi
+if [ -n "$SAGEMAKER_TRITON_BUFFER_MANAGER_THREAD_COUNT" ]; then
+    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --buffer-manager-thread-count=${SAGEMAKER_TRITON_BUFFER_MANAGER_THREAD_COUNT}"
+fi
+if [ -n "$SAGEMAKER_TRITON_THREAD_COUNT" ]; then
+    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --sagemaker-thread-count=${SAGEMAKER_TRITON_THREAD_COUNT}"
+fi
+# Enable verbose logging by default. If env variable is specified, use value from env variable
+if [ -n "$SAGEMAKER_TRITON_LOG_VERBOSE" ]; then
+    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --log-verbose=${SAGEMAKER_TRITON_LOG_VERBOSE}"
+else
+    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --log-verbose=true"
+fi
+if [ -n "$SAGEMAKER_TRITON_LOG_INFO" ]; then
+    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --log-info=${SAGEMAKER_TRITON_LOG_INFO}"
+fi
+if [ -n "$SAGEMAKER_TRITON_LOG_WARNING" ]; then
+    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --log-warning=${SAGEMAKER_TRITON_LOG_WARNING}"
+fi
+if [ -n "$SAGEMAKER_TRITON_LOG_ERROR" ]; then
+    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --log-error=${SAGEMAKER_TRITON_LOG_ERROR}"
+fi
+if [ -n "$SAGEMAKER_TRITON_SHM_DEFAULT_BYTE_SIZE" ]; then
+    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --backend-config=python,shm-default-byte-size=${SAGEMAKER_TRITON_SHM_DEFAULT_BYTE_SIZE}"
+else
+    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --backend-config=python,shm-default-byte-size=16777216" #16MB
+fi
+if [ -n "$SAGEMAKER_TRITON_SHM_GROWTH_BYTE_SIZE" ]; then
+    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --backend-config=python,shm-growth-byte-size=${SAGEMAKER_TRITON_SHM_GROWTH_BYTE_SIZE}"
+else
+    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --backend-config=python,shm-growth-byte-size=1048576" #1MB
+fi
+if [ -n "$SAGEMAKER_TRITON_TENSORFLOW_VERSION" ]; then
+    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --backend-config=tensorflow,version=${SAGEMAKER_TRITON_TENSORFLOW_VERSION}"
+fi
+if [ -n "$SAGEMAKER_TRITON_MODEL_LOAD_GPU_LIMIT" ]; then
+    num_gpus=$(nvidia-smi -L | wc -l)
+    for ((i=0; i<${num_gpus}; i++)); do
+        SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --model-load-gpu-limit ${i}:${SAGEMAKER_TRITON_MODEL_LOAD_GPU_LIMIT}"
+    done
+fi
+if [ -n "$SAGEMAKER_TRITON_ADDITIONAL_ARGS" ]; then
+    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} ${SAGEMAKER_TRITON_ADDITIONAL_ARGS}"
+fi
+
+
+if [ "${is_mme_mode}" = false ] && [ -f "${SAGEMAKER_MODEL_REPO}/config.pbtxt" ]; then
+    echo "ERROR: Incorrect directory structure."
+    echo "       Model directory needs to contain the top level folder"
+    exit 1
+fi
+
+if [ "${is_mme_mode}" = false ] && [ -n "$SAGEMAKER_TRITON_DEFAULT_MODEL_NAME" ]; then
+    if [ -d "${SAGEMAKER_MODEL_REPO}/$SAGEMAKER_TRITON_DEFAULT_MODEL_NAME" ]; then
+        SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --load-model=${SAGEMAKER_TRITON_DEFAULT_MODEL_NAME}"
+    else
+        echo "ERROR: Directory with provided SAGEMAKER_TRITON_DEFAULT_MODEL_NAME ${SAGEMAKER_TRITON_DEFAULT_MODEL_NAME} does not exist"
+        exit 1
+    fi
+elif [ "${is_mme_mode}" = false ]; then
+    MODEL_DIRS=(`find "${SAGEMAKER_MODEL_REPO}" -mindepth 1 -maxdepth 1 -type d -printf "%f\n"`)
+    case ${#MODEL_DIRS[@]} in
+        0) echo "ERROR: No model found in model repository";
+           exit 1
+           ;;
+        1) echo "WARNING: No SAGEMAKER_TRITON_DEFAULT_MODEL_NAME provided."
+           echo "         Starting with the only existing model directory ${MODEL_DIRS[0]}";
+           export SAGEMAKER_TRITON_DEFAULT_MODEL_NAME=${MODEL_DIRS[0]}
+           ;;
+        *) echo "ERROR: More than 1 model directory found in model repository."
+           echo "       Either provide a single directory or set SAGEMAKER_TRITON_DEFAULT_MODEL_NAME to run the ensemble backend."
+           echo "       Directories found in model repository: ${MODEL_DIRS[@]}";
+           exit 1
+           ;;
+    esac
+    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --load-model=${SAGEMAKER_TRITON_DEFAULT_MODEL_NAME}"
+fi
+
+tritonserver --allow-sagemaker=true --allow-http=false --model-control-mode=explicit $SAGEMAKER_ARGS
diff --git a/docs/Dockerfile.docs b/docs/Dockerfile.docs
new file mode 100644
index 0000000000..ba30a144ac
--- /dev/null
+++ b/docs/Dockerfile.docs
@@ -0,0 +1,78 @@
+# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+FROM ubuntu:22.04
+
+# various documentation dependencies
+RUN apt-get update -q=2 \
+    && apt-get install -y --no-install-recommends \
+        build-essential \
+        curl \
+        doxygen \
+        git \
+        git-lfs \
+        pandoc \
+        python3-dev \
+        python3-pip \
+        ssh \
+        unzip \
+        wget \
+    && rm -rf /var/lib/apt/lists/*
+
+# install protobuf
+RUN wget https://github.com/google/protobuf/releases/download/v3.6.1/protoc-3.6.1-linux-x86_64.zip -O /tmp/proto.zip \
+    && unzip /tmp/proto.zip -d /usr/local \
+    && rm /tmp/proto.zip
+
+# install pseudomuto/protoc-gen-doc
+RUN wget https://github.com/pseudomuto/protoc-gen-doc/releases/download/v1.3.2/protoc-gen-doc-1.3.2.linux-amd64.go1.12.6.tar.gz -O /tmp/protoc-gen-doc.tar.gz \
+    && tar -xvf /tmp/protoc-gen-doc.tar.gz --strip-components=1 -C /usr/local/bin/ \
+    && rm /tmp/protoc-gen-doc.tar.gz
+
+# install sphinx et al
+RUN pip3 install \
+      ablog \
+      attrs  \
+      breathe \
+      docutils \
+      exhale \
+      ipython \
+      myst-nb \
+      nbclient \
+      nbsphinx \
+      rst-to-myst \
+      sphinx==5.0.0 \
+      sphinx-book-theme \
+      sphinx-copybutton \
+      sphinx-design \
+      sphinx-prompt \
+      sphinx-sitemap \
+      sphinx-tabs \
+      sphinxcontrib-bibtex
+
+# Set visitor script to be included on every HTML page
+ENV VISITS_COUNTING_SCRIPT="//assets.adobedtm.com/b92787824f2e0e9b68dc2e993f9bd995339fe417/satelliteLib-7ba51e58dc61bcb0e9311aadd02a0108ab24cc6c.js"
+
diff --git a/docs/Makefile b/docs/Makefile
index fb11718781..b8cf4b654b 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -1,4 +1,4 @@
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -24,34 +24,35 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-# Makefile for Sphinx documentation
+# Minimal makefile for Sphinx documentation
 #
 
-# You can set these variables from the command line.
-SPHINXOPTS    =
-SPHINXBUILD   = sphinx-build
-SPHINXPROJ    = TRTIS
-SOURCEDIR     = .
-BUILDDIR      = build
-EXHALEDIRS    = cpp_api doxyoutput
-PROTOBUFFILES = $(wildcard ../src/core/*.proto)
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS        ?=
+SPHINXBUILD       ?= sphinx-build
+SOURCEDIR          = .
+BUILDDIR           = build
+TRITONCLIENTRSTDIR = _reference/tritonclient
+
+#PROTOBUFFILES = $(wildcard ../triton/proto/*.proto)
 
 # Put it first so that "make" without argument is like "make help".
 help:
 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
 
 clean:
-	@rm -fr $(BUILDDIR) $(EXHALEDIRS)
-	@rm -f protobuf_api/*.proto.rst
+	@rm -fr ${BUILDDIR}
+	@rm -fr ${TRITONCLIENTRSTDIR}
+
+.PHONY: help Makefile clean
 
-protobufdoc: protobuf_api/gen_proto_doc.sh
-	cd protobuf_api && \
-    rm -f *.proto.rst && \
-    bash -x ./gen_proto_doc.sh $(PROTOBUFFILES:%=../%)
+# protobuf: source/reference/protos/gen_proto_doc.sh
+# 	cd source/reference/protos && \
+#     rm -f *.proto.rst && \
+#     bash -x ./gen_proto_doc.sh $(PROTOBUFFILES:%=../%)
 
 # Catch-all target: route all unknown targets to Sphinx using the new
 # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
-%: Makefile protobufdoc
+%:
 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
-
-.PHONY: help clean protobufdoc Makefile
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 0000000000..22e0c0d691
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,218 @@
+<!--
+# Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# **Triton Inference Server Documentation**
+
+| [Installation](README.md#installation) | [Getting Started](README.md#getting-started) | [User Guide](README.md#user-guide) | [API Guide](protocol/README.md) | [Additional Resources](README.md#resources) | [Customization Guide](README.md#customization-guide) |
+| ------------ | --------------- | --------------- | ------------ | --------------- | --------------- |
+
+**New to Triton Inference Server?** Make use of
+[these tutorials](https://github.com/triton-inference-server/tutorials)
+ to begin your Triton journey!
+
+## **Installation**
+Before you can use the Triton Docker image you must install
+[Docker](https://docs.docker.com/engine/install). If you plan on using
+a GPU for inference you must also install the [NVIDIA Container
+Toolkit](https://github.com/NVIDIA/nvidia-docker). DGX users should
+follow [Preparing to use NVIDIA
+Containers](http://docs.nvidia.com/deeplearning/dgx/preparing-containers/index.html).
+
+Pull the image using the following command.
+
+```
+$ docker pull nvcr.io/nvidia/tritonserver:<yy.mm>-py3
+```
+
+Where \<yy.mm\> is the version of Triton that you want to pull. For a complete list of all the variants and versions of the Triton Inference Server Container,  visit the [NGC Page](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tritonserver). More information about customizing the Triton Container can be found in [this section](customization_guide/compose.md) of the User Guide.
+
+## **Getting Started**
+
+This guide covers the simplest possible workflow for deploying a model using a Triton Inference Server.
+- [Create a Model Repository](getting_started/quickstart.md#create-a-model-repository)
+- [Launch Triton](getting_started/quickstart.md#launch-triton)
+- [Send an Inference Request](getting_started/quickstart.md#send-an-inference-request)
+
+Triton Inference Server has a considerable list versatile and powerful features. All new users are recommended to explore the [User Guide](README.md#user-guide) and the [additional resources](README.md#resources) sections for features most relevant to their use case.
+
+## **User Guide**
+The User Guide describes how to configure Triton, organize and configure your models, use the C++ and Python clients, etc. This guide includes the following:
+* Creating a Model Repository [[Overview](README.md#model-repository) || [Details](user_guide/model_repository.md)]
+* Writing a Model Configuration [[Overview](README.md#model-configuration) || [Details](user_guide/model_configuration.md)]
+* Buillding a Model Pipeline [[Overview](README.md#model-pipeline)]
+* Managing Model Availability [[Overview](README.md#model-management) || [Details](user_guide/model_management.md)]
+* Collecting Server Metrics [[Overview](README.md#metrics) || [Details](user_guide/metrics.md)]
+* Supporting Custom Ops/layers [[Overview](README.md#framework-custom-operations) || [Details](user_guide/custom_operations.md)]
+* Using the Client API [[Overview](README.md#client-libraries-and-examples) || [Details](https://github.com/triton-inference-server/client)]
+* Cancelling Inference Requests [[Overview](README.md#cancelling-inference-requests) || [Details](user_guide/request_cancellation.md)]
+* Analyzing Performance [[Overview](README.md#performance-analysis)]
+* Deploying on edge (Jetson) [[Overview](README.md#jetson-and-jetpack)]
+* Debugging Guide [Details](./user_guide/debugging_guide.md)
+
+### Model Repository
+[Model Repositories](user_guide/model_repository.md) are the organizational hub for using Triton. All models, configuration files, and additional resources needed to serve the models are housed inside a model repository.
+- [Cloud Storage](user_guide/model_repository.md#model-repository-locations)
+- [File Organization](user_guide/model_repository.md#model-files)
+- [Model Versioning](user_guide/model_repository.md#model-versions)
+### Model Configuration
+
+A [Model Configuration](user_guide/model_configuration.md) file is where you set the model-level options, such as output tensor reshaping and dynamic batch sizing.
+
+#### Required Model Configuration
+
+Triton Inference Server requires some [Minimum Required parameters](user_guide/model_configuration.md#minimal-model-configuration) to be filled in the Model Configuration. These required parameters essentially pertain to the structure of the model. For TensorFlow, ONNX and TensorRT models, users can rely on Triton to [Auto Generate](user_guide/model_configuration.md#auto-generated-model-configuration) the Minimum Required model configuration.
+- [Maximum Batch Size - Batching and Non-Batching Models](user_guide/model_configuration.md#maximum-batch-size)
+- [Input and Output Tensors](user_guide/model_configuration.md#inputs-and-outputs)
+    - [Tensor Datatypes](user_guide/model_configuration.md#datatypes)
+    - [Tensor Reshape](user_guide/model_configuration.md#reshape)
+    - [Shape Tensor](user_guide/model_configuration.md#shape-tensors)
+
+#### Versioning Models
+Users need the ability to save and serve different versions of models based on business requirements. Triton allows users to set policies to make available different versions of the model as needed. [Learn More](user_guide/model_configuration.md#version-policy).
+
+#### Instance Groups
+Triton allows users to use of multiple instances of the same model. Users can specify how many instances (copies) of a model to load and whether to use GPU or CPU. If the model is being loaded on GPU, users can also select which GPUs to use. [Learn more](user_guide/model_configuration.md#instance-groups).
+- [Specifying Multiple Model Instances](user_guide/model_configuration.md#multiple-model-instances)
+- [CPU and GPU Instances](user_guide/model_configuration.md#cpu-model-instance)
+- [Configuring Rate Limiter](user_guide/model_configuration.md#rate-limiter-configuration)
+
+#### Optimization Settings
+
+The Model Configuration ModelOptimizationPolicy property is used to specify optimization and prioritization settings for a model. These settings control if/how a model is optimized by the backend and how it is scheduled and executed by Triton. See the [ModelConfig Protobuf](https://github.com/triton-inference-server/common/blob/main/protobuf/model_config.proto) and [Optimization Documentation](user_guide/optimization.md#optimization) for the currently available settings.
+- [Framework-Specific Optimization](user_guide/optimization.md#framework-specific-optimization)
+  - [ONNX-TensorRT](user_guide/optimization.md#onnx-with-tensorrt-optimization-ort-trt)
+  - [ONNX-OpenVINO](user_guide/optimization.md#onnx-with-openvino-optimization)
+  - [TensorFlow-TensorRT](user_guide/optimization.md#tensorflow-with-tensorrt-optimization-tf-trt)
+  - [TensorFlow-Mixed-Precision](user_guide/optimization.md#tensorflow-automatic-fp16-optimization)
+- [NUMA Optimization](user_guide/optimization.md#numa-optimization)
+
+#### Scheduling and Batching
+
+Triton supports batching individual inference requests to improve compute resource utilization. This is extremely important as individual requests typically will not saturate GPU resources thus not leveraging the parallelism provided by GPUs to its extent. Learn more about Triton's [Batcher and Scheduler](user_guide/model_configuration.md#scheduling-and-batching).
+- [Default Scheduler - Non-Batching](user_guide/model_configuration.md#default-scheduler)
+- [Dynamic Batcher](user_guide/model_configuration.md#dynamic-batcher)
+  - [How to Configure Dynamic Batcher](user_guide/model_configuration.md#recommended-configuration-process)
+    - [Delayed Batching](user_guide/model_configuration.md#delayed-batching)
+    - [Preferred Batch Size](user_guide/model_configuration.md#preferred-batch-sizes)
+  - [Preserving Request Ordering](user_guide/model_configuration.md#preserve-ordering)
+  - [Priority Levels](user_guide/model_configuration.md#priority-levels)
+  - [Queuing Policies](user_guide/model_configuration.md#queue-policy)
+  - [Ragged Batching](user_guide/ragged_batching.md)
+- [Sequence Batcher](user_guide/model_configuration.md#sequence-batcher)
+  - [Stateful Models](user_guide/architecture.md#stateful-models)
+  - [Control Inputs](user_guide/architecture.md#control-inputs)
+  - [Implicit State - Stateful Inference Using a Stateless Model](user_guide/architecture.md#implicit-state-management)
+  - [Sequence Scheduling Strategies](user_guide/architecture.md#scheduling-strategies)
+    - [Direct](user_guide/architecture.md#direct)
+    - [Oldest](user_guide/architecture.md#oldest)
+
+#### Rate Limiter
+Rate limiter manages the rate at which requests are scheduled on model instances by Triton. The rate limiter operates across all models loaded in Triton to allow cross-model prioritization. [Learn more](user_guide/rate_limiter.md).
+
+#### Model Warmup
+For a few of the Backends (check [Additional Resources](README.md#resources)) some or all of initialization is deferred until the first inference request is received, the benefit is resource conservation but comes with the downside of the initial requests getting processed slower than expected. Users can pre-"warm up" the model by instructing Triton to initialize the model. [Learn more](user_guide/model_configuration.md#model-warmup).
+
+#### Inference Request/Response Cache
+Triton has a feature which allows inference responses to get cached. [Learn More](user_guide/response_cache.md).
+
+### Model Pipeline
+Building ensembles is as easy as adding an addition configuration file which outlines the specific flow of tensors from one model to another. Any additional changes required by the model ensemble can be made in existing (individual) model configurations.
+- [Model Ensemble](user_guide/architecture.md#ensemble-models)
+- [Business Logic Scripting (BLS)](https://github.com/triton-inference-server/python_backend#business-logic-scripting)
+### Model Management
+Users can specify policies in the model configuration for loading and unloading of models. This [section](user_guide/model_management.md) covers user selectable policy details.
+- [Explicit Model Loading and Unloading](user_guide/model_management.md#model-control-mode-explicit)
+- [Modifying the Model Repository](user_guide/model_management.md#modifying-the-model-repository)
+### Metrics
+Triton provides Prometheus metrics like GPU Utilization, Memory Usage, Latency and more. Learn about [available metrics](user_guide/metrics.md).
+### Framework Custom Operations
+Some frameworks provide the option of building custom layers/operations. These can be added to specific Triton Backends for the those frameworks. [Learn more](user_guide/custom_operations.md)
+- [TensorRT](user_guide/custom_operations.md#tensorrt)
+- [TensorFlow](user_guide/custom_operations.md#tensorflow)
+- [PyTorch](user_guide/custom_operations.md#pytorch)
+- [ONNX](user_guide/custom_operations.md#onnx)
+### Client Libraries and Examples
+Use the [Triton Client](https://github.com/triton-inference-server/client) API to integrate client applications over the network HTTP/gRPC API or integrate applications directly with Triton using CUDA shared memory to remove network overhead.
+- [C++ HTTP/GRPC Libraries](https://github.com/triton-inference-server/client#client-library-apis)
+- [Python HTTP/GRPC Libraries](https://github.com/triton-inference-server/client#client-library-apis)
+- [Java HTTP Library](https://github.com/triton-inference-server/client/tree/main/src/java)
+- GRPC Generated Libraries
+  - [go](https://github.com/triton-inference-server/client/tree/main/src/grpc_generated/go)
+  - [Java/Scala](https://github.com/triton-inference-server/client/tree/main/src/grpc_generated/java)
+  - [Javascript](https://github.com/triton-inference-server/client/tree/main/src/grpc_generated/javascript)
+- [Shared Memory Extension](protocol/extension_shared_memory.md)
+### Cancelling Inference Requests
+Triton can detect and handle requests that have been cancelled from the client-side. This [document](user_guide/request_cancellation.md) discusses scope and limitations of the feature.
+### Performance Analysis
+Understanding Inference performance is key to better resource utilization. Use Triton's Tools to costomize your deployment.
+- [Performance Tuning Guide](user_guide/performance_tuning.md)
+- [Optimization](user_guide/optimization.md)
+- [Model Analyzer](user_guide/model_analyzer.md)
+- [Performance Analyzer](https://github.com/triton-inference-server/client/blob/main/src/c++/perf_analyzer/README.md)
+- [Inference Request Tracing](user_guide/trace.md)
+### Jetson and JetPack
+Triton can be deployed on edge devices. Explore [resources](user_guide/jetson.md) and [examples](examples/jetson/README.md).
+
+## **Resources**
+
+The following resources are recommended to explore the full suite of Triton Inference Server's functionalities.
+- **Clients**: Triton Inference Server comes with C++, Python and Java APIs with which users can send HTTP/REST or gRPC(possible extensions for other languages) requests. Explore the [client repository](https://github.com/triton-inference-server/server/tree/main/docs/protocol) for examples and documentation.
+
+- **Configuring Deployment**: Triton comes with three tools which can be used to configure deployment setting, measure performance and recommend optimizations.
+  - [Model Analyzer](https://github.com/triton-inference-server/model_analyzer) Model Analyzer is CLI tool built to recommend deployment configurations for Triton Inference Server based on user's Quality of Service Requirements. It also generates detailed reports about model performance to summarize the benefits and trade offs of different configurations.
+  - [Perf Analyzer](https://github.com/triton-inference-server/client/blob/main/src/c++/perf_analyzer/README.md):
+  Perf Analyzer is a CLI application built to generate inference requests and
+  measures the latency of those requests and throughput of the model being
+  served.
+  - [Model Navigator](https://github.com/triton-inference-server/model_navigator):
+  The Triton Model Navigator is a tool that provides the ability to automate the process of moving model from source to optimal format and configuration for deployment on Triton Inference Server. The tool supports export model from source to all possible formats and applies the Triton Inference Server backend optimizations.
+
+- **Backends**: Triton supports a wide variety of frameworks used to run models. Users can extend this functionality by creating custom backends.
+  - [PyTorch](https://github.com/triton-inference-server/pytorch_backend): Widely used Open Source DL Framework
+  - [TensorFlow](https://github.com/triton-inference-server/tensorflow_backend): Widely used Open Source DL Framework
+  - [TensorRT](https://github.com/triton-inference-server/tensorrt_backend): NVIDIA [TensorRT](https://developer.nvidia.com/tensorrt) is an inference acceleration SDK that provide a with range of graph optimizations, kernel optimization, use of lower precision, and more.
+  - [ONNX](https://github.com/triton-inference-server/onnxruntime_backend): ONNX Runtime is a cross-platform inference and training machine-learning accelerator.
+  - [OpenVINO](https://github.com/triton-inference-server/openvino_backend): OpenVINO™ is an open-source toolkit for optimizing and deploying AI inference.
+  - [Paddle Paddle](https://github.com/triton-inference-server/paddlepaddle_backend): Widely used Open Source DL Framework
+  - [Python](https://github.com/triton-inference-server/python_backend): Users can add custom business logic, or any python code/model for serving requests.
+  - [Forest Inference Library](https://github.com/triton-inference-server/fil_backend): Backend built for forest models trained by several popular machine learning frameworks (including XGBoost, LightGBM, Scikit-Learn, and cuML)
+  - [DALI](https://github.com/triton-inference-server/dali_backend): NVIDIA [DALI](https://developer.nvidia.com/dali) is a Data Loading Library purpose built to accelerated pre-processing and data loading steps in a Deep Learning Pipeline.
+  - [HugeCTR](https://github.com/triton-inference-server/hugectr_backend): HugeCTR is a GPU-accelerated recommender framework designed to distribute training across multiple GPUs and nodes and estimate Click-Through Rates
+  - [Managed Stateful Models](https://github.com/triton-inference-server/stateful_backend): This backend automatically manages the input and output states of a model. The states are associated with a sequence id and need to be tracked for inference requests associated with the sequence id.
+  - [Faster Transformer](https://github.com/triton-inference-server/fastertransformer_backend): NVIDIA [FasterTransformer](https://github.com/NVIDIA/FasterTransformer/) (FT) is a library implementing an accelerated engine for the inference of transformer-based neural networks, with a special emphasis on large models, spanning many GPUs and nodes in a distributed manner.
+  - [Building Custom Backends](https://github.com/triton-inference-server/backend/tree/main/examples#tutorial)
+  - [Sample Custom Backend: Repeat_backend](https://github.com/triton-inference-server/repeat_backend): Backend built to demonstrate sending of zero, one, or multiple responses per request.
+
+## **Customization Guide**
+This guide describes how to build and test Triton and also how Triton can be extended with new functionality.
+
+- [Build](customization_guide/build.md)
+- [Protocols and APIs](customization_guide/inference_protocols.md).
+- [Backends](https://github.com/triton-inference-server/backend)
+- [Repository Agents](customization_guide/repository_agents.md)
+- [Test](customization_guide/test.md)
diff --git a/docs/_reference/tritonclient_api.rst b/docs/_reference/tritonclient_api.rst
new file mode 100644
index 0000000000..33dd53127a
--- /dev/null
+++ b/docs/_reference/tritonclient_api.rst
@@ -0,0 +1,10 @@
+Python tritonclient Package API
+===============================
+
+tritonclient python package is hosted at the `pyPI.org <https://pypi.org/project/tritonclient/>`_. This package documentation for tritonclient is genenerated by sphinx autosummary extension.
+
+.. autosummary::
+   :toctree: tritonclient
+   :recursive:
+
+   tritonclient
diff --git a/docs/_static/.gitattributes b/docs/_static/.gitattributes
new file mode 100644
index 0000000000..04865f126a
--- /dev/null
+++ b/docs/_static/.gitattributes
@@ -0,0 +1,2 @@
+nvidia-logo-horiz-rgb-blk-for-screen.png filter=lfs diff=lfs merge=lfs -text
+nvidia-logo-vert-rgb-blk-for-screen.png filter=lfs diff=lfs merge=lfs -text
diff --git a/docs/_static/custom.css b/docs/_static/custom.css
new file mode 100644
index 0000000000..46bab57d4e
--- /dev/null
+++ b/docs/_static/custom.css
@@ -0,0 +1,319 @@
+/*
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+@font-face {
+  font-family: "NVIDIA Sans";
+  src: url(https://aws1.discourse-cdn.com/nvidia/original/3X/5/2/52891dda673228d54e5d57bf1e4a3880d4b22405.woff2) format("woff2"),
+      url(https://aws1.discourse-cdn.com/nvidia/original/3X/e/0/e090b7dda7a582522c7f9045c6ce949cce60134f.woff) format("woff");
+  font-weight: 300;
+  font-style: normal;
+}
+@font-face {
+  font-family: "NVIDIA Sans";
+  src: url(https://aws1.discourse-cdn.com/nvidia/original/3X/a/1/a107baabcbf6b241099122336bce7429bcfd377a.woff2) format("woff2"),
+      url(https://aws1.discourse-cdn.com/nvidia/original/3X/3/a/3a6060a4e3bce70e5552ba0de8af4b22c6cf9144.woff) format("woff");
+  font-weight: 300;
+  font-style: italic;
+}
+@font-face {
+  font-family: "NVIDIA Sans";
+  src: url(https://aws1.discourse-cdn.com/nvidia/original/3X/9/9/9920d2b172b01d92fc9c1c0e521dcf45b59c47c3.woff2) format("woff2"),
+      url(https://aws1.discourse-cdn.com/nvidia/original/3X/6/c/6c7d947928a7e4ef3e80ed409bef6c243f2148cb.woff) format("woff");
+  font-weight: 400;
+  font-style: normal;
+}
+@font-face {
+  font-family: "NVIDIA Sans";
+  src: url(https://aws1.discourse-cdn.com/nvidia/original/3X/e/8/e8e63fe1244372cd942d957f44a5616a1eba0644.woff2) format("woff2"),
+      url(https://aws1.discourse-cdn.com/nvidia/original/3X/0/f/0f1fb2af0283ab09d36e7097bb07d895c3228f12.woff) format("woff");
+  font-weight: 400;
+  font-style: italic;
+}
+@font-face {
+  font-family: "NVIDIA Sans";
+  src: url(https://aws1.discourse-cdn.com/nvidia/original/3X/7/9/79d3c513a9cd72c59f65354f39f89ca52dc17dd2.woff2) format("woff2"),
+      url(https://aws1.discourse-cdn.com/nvidia/original/3X/2/5/2581ac533f5d01f4985d8a7245b0766b4630ced8.woff) format("woff");
+  font-weight: 500;
+  font-style: normal;
+}
+@font-face {
+  font-family: "NVIDIA Sans";
+  src: url(https://aws1.discourse-cdn.com/nvidia/original/3X/3/9/39d9ef1ee9770dd503f19bb2ace2fdb4eff3bb50.woff2) format("woff2"),
+      url(https://aws1.discourse-cdn.com/nvidia/original/3X/7/b/7bb5d5e2e71b2e13c8098b2e67c0a0ed9258e6c7.woff) format("woff");
+  font-weight: 500;
+  font-style: italic;
+}
+@font-face {
+  font-family: "NVIDIA Sans";
+  src: url(https://aws1.discourse-cdn.com/nvidia/original/3X/0/5/05276a55a43eb3f74981ec1e93252727afcd9d16.woff2) format("woff2"),
+      url(https://aws1.discourse-cdn.com/nvidia/original/3X/9/c/9cfec7ed941b06564aa4d5ca14610e81542d070f.woff) format("woff");
+  font-weight: 700;
+  font-style: normal;
+}
+@font-face {
+  font-family: "NVIDIA Sans";
+  src: url(https://aws1.discourse-cdn.com/nvidia/original/3X/a/e/aebd14d09ba56f541e1b8735fb051e33710f9ae7.woff2) format("woff2"),
+      url(https://aws1.discourse-cdn.com/nvidia/original/3X/e/d/edbdabef43acc5c12e84a94baaa5542c9404cfeb.woff) format("woff");
+  font-weight: 700;
+  font-style: italic;
+}
+
+/* Custom Styles */
+:root {
+--pst-font-size-base: none;
+--pst-color-primary: 0, 133, 197;
+--pst-color-admonition-note: var(--pst-color-primary);
+--pst-color-admonition-default: var(--pst-color-primary);
+--pst-color-info: 255, 193, 7;
+--pst-color-admonition-tip: var(--pst-color-info);
+--pst-color-admonition-hint: var(--pst-color-info);
+--pst-color-admonition-important: var(--pst-color-info);
+--pst-color-warning: 245, 162, 82;
+--pst-color-danger: 230, 101, 129;
+--pst-color-admonition-warning: var(--pst-color-danger);
+--pst-color-link: 118, 185, 0;
+--pst-color-inline-code: 92, 22, 130;
+--font-family-sans-serif: NVIDIA Sans, Helvetica, Arial, Sans-serif;
+--pst-font-family-base-system: NVIDIA Sans, Helvetica, Arial, Sans-serif;
+font-family: NVIDIA Sans, Helvetica, Arial, Sans-serif;
+}
+
+.prev-next-area {
+    font-size: small;
+}
+
+.docutils caption {
+  caption-side: top;
+}
+
+#site-navigation h1.site-logo {
+  font-size: 0.85em;
+}
+
+/* colors
+nv green 118,185,0
+black 0, 0, 0
+light gray 205, 205, 205
+medium gray 140, 140, 140
+dark gray 94, 94, 94
+
+emerald 0, 133, 100
+emerald #008564
+amethyst 92, 22, 130
+amethyst #5C1682
+cpu blue 0, 133, 197
+cpu blue #0085C5
+garnet 137, 12, 88
+garnet 890C58
+fluorite 250, 194, 0
+fluorite FAC200
+*/
+
+:root {
+  --nv-green: #76b900;
+  --nv-green-darken: #6ead00;
+  --emerald: #008564;
+  --emerald-darken: #017c5d;
+  --amethyst: #5d1682;
+  --amethyst-darken: #4c116b;
+  --cpu-blue: #0071c5;
+  --cpu-blue-darken: #0062ad;
+  --garnet: #890c58;
+  --garnet-darken: #7a0c4e;
+  --fluorite: #fac200;
+  --fluorite-darken: #e4b301;
+  --dark-gray: #5e5e5e;
+  --light-gray: #cdcdcd;
+  --medium-gray: #8c8c8c;
+  --medium-gray-darken: #8c8c8cde;
+  --primary: #76b900;
+  --secondary: #008564;
+  --success: #5d1682;
+  --info: #0071c5;
+  --warning: #fac200;
+  --danger: #890c58;
+}
+
+/* Riva TBYB (ASR and TTS) Styling */
+.demo-box {
+  background-color: rgb(245,245,245);
+}
+a:link { text-decoration: none; }
+.scrollable {
+  height: 125px;
+  overflow-y: auto;
+  font-size: 1.3rem;
+}
+.dot {
+  height: 8px;
+  width: 8px;
+  background-color: rgb(228, 77, 77);
+  border-radius: 50%;
+  display: inline-block;
+}
+.timer {
+  font-size: 80%;
+  text-transform: uppercase;
+  white-space: nowrap;
+}
+.form-select {
+  border-radius: 0%;
+  font-size: 80%;
+}
+.form-control {
+  border-radius: 0%;
+}
+.input-group-text {
+  border-radius: 0%;
+  font-size: 80%;
+  text-transform: uppercase;
+  background-color: rgb(245,245,245);
+}
+.card {
+  border-radius: 0%;
+}
+.speech-control {
+  border-top-width: 0px;
+}
+.btn {
+  border-radius: 0%;
+  font-size: 80%;
+  text-transform: uppercase;
+  white-space: nowrap;
+  min-width: 125px;
+}
+.btn-primary {
+  background-color: var(--nv-green);
+  border-color: var(--nv-green);
+}
+.btn-primary:hover {
+  background-color: var(--nv-green-darken);
+  border-color: var(--nv-green-darken);
+}
+.btn-primary:focus, .btn-primary.focus {
+  background-color: var(--nv-green-darken);
+  border-color: var(--nv-green-darken);
+  -webkit-box-shadow: 0 0 0 0.2rem rgba(147, 173, 102, 0.5);
+          box-shadow: 0 0 0 0.2rem rgba(147, 173, 102, 0.5);
+}
+.btn-primary.disabled, .btn-primary:disabled {
+  background-color: var(--nv-green);
+  border-color: var(--nv-green);
+}
+.btn-primary:not(:disabled):not(.disabled):active, .btn-primary:not(:disabled):not(.disabled).active,
+.show > .btn-primary.dropdown-toggle {
+  background-color: var(--nv-green-darken);
+  border-color: var(--nv-green-darken);
+}
+.btn-primary:not(:disabled):not(.disabled):active:focus, .btn-primary:not(:disabled):not(.disabled).active:focus,
+.show > .btn-primary.dropdown-toggle:focus {
+  -webkit-box-shadow: 0 0 0 0.2rem rgba(147, 173, 102, 0.5);
+          box-shadow: 0 0 0 0.2rem rgba(147, 173, 102, 0.5);
+}
+.btn-secondary {
+  background-color: var(--medium-gray);
+  border-color: var(--medium-gray);
+}
+.btn-secondary:hover {
+  background-color: var(--medium-gray-darken);
+  border-color: var(--medium-gray-darken);
+}
+.btn-secondary:focus, .btn-secondary.focus {
+  background-color: var(--medium-gray-darken);
+  border-color: var(--medium-gray-darken);
+  -webkit-box-shadow: 0 0 0 0.2rem rgba(140, 140, 140, 0.5);
+          box-shadow: 0 0 0 0.2rem rgba(140, 140, 140, 0.5);
+}
+.btn-secondary.disabled, .btn-secondary:disabled {
+  background-color: var(--medium-gray);
+  border-color: var(--medium-gray);
+}
+.btn-secondary:not(:disabled):not(.disabled):active, .btn-secondary:not(:disabled):not(.disabled).active,
+.show > .btn-secondary.dropdown-toggle {
+  background-color: var(--medium-gray-darken);
+  border-color: var(--medium-gray-darken);
+}
+.btn-secondary:not(:disabled):not(.disabled):active:focus, .btn-secondary:not(:disabled):not(.disabled).active:focus,
+.show > .btn-secondary.dropdown-toggle:focus {
+  -webkit-box-shadow: 0 0 0 0.2rem rgba(140, 140, 140, 0.5);
+          box-shadow: 0 0 0 0.2rem rgba(140, 140, 140, 0.5);
+}
+.btn-link {
+  color: var(--nv-green);
+  text-decoration-line: none;
+}
+.btn-link:hover {
+  color: var(--nv-green-darken);
+}
+.btn-link:focus, .btn-link.focus {
+  color: var(--nv-green-darken);
+  -webkit-box-shadow: 0 0 0 0.2rem rgba(147, 173, 102, 0.5);
+          box-shadow: 0 0 0 0.2rem rgba(147, 173, 102, 0.5);
+}
+.link-primary {
+  color: var(--nv-green);
+}
+.link-primary:hover {
+  color: var(--nv-green-darken);
+}
+
+/* Riva ASR Styles */
+#riva-upload-label {
+  margin-top: 0.5rem;
+}
+
+/* Riva TTS Styles */
+.tts-control {
+  justify-content: space-between;
+  align-items: center;
+}
+
+.tts-control > p {
+  margin: unset;
+}
+
+#riva-tts-field {
+  resize: none;
+  border: unset;
+  padding: 0;
+  height: 100%;
+  font-size: 1.0rem;
+}
+
+#riva-terms-of-use p {
+  max-width: 620px;
+}
+
+/* Media Queries */
+@media (max-width: 1024px) {
+
+  /* Riva TTS and ASR */
+  .scrollable {
+      height: 250px;
+  }
+}
+
diff --git a/docs/_static/logo_2color_horizontal.svg b/docs/_static/logo_2color_horizontal.svg
new file mode 100644
index 0000000000..5ab0442d32
--- /dev/null
+++ b/docs/_static/logo_2color_horizontal.svg
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg xmlns="http://www.w3.org/2000/svg" height="979.59183673469" viewBox="0 0 392 160" width="2400"><path d="m0 0h392v160h-392z" fill="#fff"/><path d="m101.85264 71.15366v-3.96347c.38481-.02742.77346-.04793 1.1695-.06034 10.84039-.34071 17.95214 9.31439 17.95214 9.31439s-7.68112 10.66869-15.91688 10.66869a9.98763 9.98763 0 0 1 -3.20476-.51238v-12.0187c4.22026.50977 5.0686 2.37382 7.60648 6.60292l5.64279-4.75769a14.941 14.941 0 0 0 -11.06261-5.40241 20.46324 20.46324 0 0 0 -2.18666.129m0-13.09284v5.92027c.38911-.03076.77871-.05554 1.1695-.06961 15.0752-.50786 24.89629 12.36323 24.89629 12.36323s-11.281 13.71754-23.03316 13.71754a17.34363 17.34363 0 0 1 -3.03263-.26706v3.65948a19.96037 19.96037 0 0 0 2.52547.16356c10.93695 0 18.8458-5.58481 26.50476-12.19537 1.26895 1.01668 6.46777 3.49 7.53691 4.5741-7.2825 6.09578-24.25276 11.00917-33.87357 11.00917-.927 0-1.81876-.056-2.69357-.13995v5.143h41.56853v-43.87837zm0 28.53973v3.12464c-10.11532-1.8035-12.9229-12.3184-12.9229-12.3184a21.86733 21.86733 0 0 1 12.9229-6.25314v3.42819l-.01575-.00166c-4.23267-.50836-7.54 3.44655-7.54 3.44655s1.85285 6.65775 7.55573 8.57381m-17.966-9.64939a25.05247 25.05247 0 0 1 17.966-9.761v-3.20906c-13.25881 1.06413-24.74084 12.29362-24.74084 12.29362s6.50281 18.80022 24.74086 20.52147v-3.41151c-13.38354-1.68382-17.96599-16.43352-17.96599-16.43352z" fill="#76b900"/><path d="m218.2973 66.35212.00289 28.80124h8.1338v-28.80074zm-63.98735-.03912v28.84036h8.20639v-21.89888l6.35734.00218a5.97838 5.97838 0 0 1 4.62062 1.60684c1.28662 1.371 1.81174 3.58116 1.81174 7.62594v12.66392l7.9498-.00145.00145-15.93268c0-11.37261-7.2493-12.90625-14.34111-12.90625h-14.60623m77.08424.04011v28.80027h13.193c7.02922 0 9.3234-1.16908 11.80458-3.79014 1.754-1.84 2.88706-5.87921 2.88706-10.2934 0-4.04814-.95912-7.65975-2.63266-9.90875-3.0134-4.02182-7.35489-4.808-13.83615-4.808zm8.06854 6.27109h3.49733c5.07374 0 8.35509 2.27846 8.35509 8.19028 0 5.91329-3.28135 8.19175-8.35509 8.19175h-3.49733zm-32.8932-6.27109-6.78845 22.82591-6.50485-22.82442-8.78051-.00146 9.2898 28.80027h11.72341l9.36278-28.80027zm56.49439 28.80027h8.13524v-28.79881l-8.13717-.00146zm22.80226-28.78988-11.35813 28.78h8.0205l1.797-5.08677h13.44185l1.70108 5.08677h8.70805l-11.44485-28.7824zm5.28005 5.25059 4.92734 13.48293h-10.01026z"/><path d="m312.34746 96.03194a2.5406 2.5406 0 1 1 2.54076-2.54046 2.53986 2.53986 0 0 1 -2.54076 2.54046zm0-4.5932a2.05307 2.05307 0 1 0 2.01646 2.05274 1.99245 1.99245 0 0 0 -2.01646-2.05274zm.45632 3.40026-.49117-1.04578h-.288v1.04578h-.60347v-2.70218h1.1018a.85537.85537 0 0 1 .89835.8492.76058.76058 0 0 1 -.49833.71605l.56862 1.13693zm-.358-2.20386h-.42118v.70176h.42118a.35148.35148 0 1 0 0-.70176z"/></svg>
diff --git a/docs/_static/logo_2color_vertical.svg b/docs/_static/logo_2color_vertical.svg
new file mode 100644
index 0000000000..69e64b7001
--- /dev/null
+++ b/docs/_static/logo_2color_vertical.svg
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg xmlns="http://www.w3.org/2000/svg" height="979.59183673469" viewBox="0 0 392 160" width="2400"><path d="m0 0h392v160h-392z" fill="#fff"/><path d="m179.69824 110.32017.0029 28.80125h8.13379v-28.80075zm-63.98724-.03917v28.84038h8.20638v-21.89884l6.35733.00217a5.97833 5.97833 0 0 1 4.62061 1.60685c1.28662 1.37105 1.81174 3.58116 1.81174 7.62593v12.66393l7.94979-.00145.00145-15.93269c0-11.3726-7.24929-12.90624-14.34109-12.90624h-14.60621m77.08411.0401v28.80028h13.193c7.02921 0 9.32339-1.16908 11.80457-3.79014 1.754-1.84 2.88705-5.87922 2.88705-10.2934 0-4.04814-.95912-7.65976-2.63266-9.90876-3.0134-4.02182-7.35487-4.808-13.83613-4.808zm8.06853 6.27109h3.49736c5.07374 0 8.35508 2.27846 8.35508 8.19029 0 5.91328-3.28134 8.19175-8.35508 8.19175h-3.49732zm-32.89314-6.27109-6.78845 22.82586-6.50483-22.82442-8.7805-.00146 9.28979 28.80028h11.72339l9.36276-28.80028zm56.49429 28.80028h8.13521v-28.79882l-8.13716-.00146zm22.80221-28.78989-11.35812 28.78h8.02049l1.797-5.08677h13.44186l1.70108 5.08677h8.708l-11.44481-28.7824zm26.48125 29.66847a2.54061 2.54061 0 1 1 2.54075-2.54046 2.53987 2.53987 0 0 1 -2.54075 2.54046zm0-4.5932a2.05307 2.05307 0 1 0 2.01646 2.05274 1.99245 1.99245 0 0 0 -2.01646-2.05274zm.45632 3.40026-.49118-1.04579h-.288v1.04579h-.60347v-2.70219h1.10179a.85536.85536 0 0 1 .89835.84921.76058.76058 0 0 1 -.49832.716l.56861 1.13693zm-.358-2.20387h-.42117v.70177h.42117a.35149.35149 0 1 0 0-.70177zm-21.29949-21.02107 4.92733 13.48292h-10.01027z"/><path d="m168.9447 42.85644v-6.91929c.67138-.04757 1.35-.08347 2.0416-.10531 18.924-.59469 31.33954 16.26052 31.33954 16.26052s-13.40932 18.62441-27.78675 18.62441a17.42732 17.42732 0 0 1 -5.59439-.89463v-20.98129c7.367.88992 8.84823 4.1442 13.27862 11.527l9.85075-8.30554s-7.19066-9.431-19.31239-9.431a35.65918 35.65918 0 0 0 -3.817.22518m0-22.85644v10.33514c.67912-.05389 1.35909-.09676 2.0416-.12165 26.31676-.88653 43.46189 21.58261 43.46189 21.58261s-19.69361 23.947-40.20951 23.947a30.25525 30.25525 0 0 1 -5.294-.46641v6.3885a34.85638 34.85638 0 0 0 4.4084.28569c19.0929 0 32.90014-9.74956 46.27035-21.28982 2.215 1.775 11.29076 6.09267 13.1571 7.98508-12.7133 10.64175-42.33847 19.219-59.13367 19.219-1.61858 0-3.17519-.0976-4.70218-.2445v8.97831h72.56691v-76.599zm0 49.82214v5.4546c-17.6589-3.14832-22.55994-21.50434-22.55994-21.50434a38.17412 38.17412 0 0 1 22.55994-10.916v5.98441c-.011 0-.01814-.00277-.02769-.00277-7.389-.8875-13.16277 6.01692-13.16277 6.01692s3.23478 11.62226 13.19048 14.96713m-31.36384-16.84514a43.73468 43.73468 0 0 1 31.36384-17.03985v-5.602c-23.14654 1.8575-43.19085 21.461-43.19085 21.461s11.35191 32.82005 43.19085 35.82451v-5.95542c-23.36398-2.9396-31.36384-28.68824-31.36384-28.68824z" fill="#76b900"/></svg>
diff --git a/docs/_static/nvidia-logo-horiz-rgb-blk-for-screen.png b/docs/_static/nvidia-logo-horiz-rgb-blk-for-screen.png
new file mode 100644
index 0000000000..6316a9340f
--- /dev/null
+++ b/docs/_static/nvidia-logo-horiz-rgb-blk-for-screen.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dd57ffce985e08c97c6af5fdadd2a28e4a92996455edc2d0598dd964cca51eae
+size 48928
diff --git a/docs/_static/nvidia-logo-vert-rgb-blk-for-screen.png b/docs/_static/nvidia-logo-vert-rgb-blk-for-screen.png
new file mode 100644
index 0000000000..5546c1b57d
--- /dev/null
+++ b/docs/_static/nvidia-logo-vert-rgb-blk-for-screen.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:17a25111e145aa52b77ec5a89eb3b0c7d9a2a90dea25a0bb867a937514fc783c
+size 63541
diff --git a/docs/_static/rtd-data.js b/docs/_static/rtd-data.js
new file mode 100644
index 0000000000..7ed13e8ee0
--- /dev/null
+++ b/docs/_static/rtd-data.js
@@ -0,0 +1,36 @@
+/*
+# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+// Dummy data for testing ReadTheDocs footer insertion
+// This mimics RTD data for a project that uses both versions + languages
+var READTHEDOCS_DATA = {
+  project: "frc-docs",
+  version: "latest",
+  language: "en",
+  proxied_api_host: "https://readthedocs.org",
+};
diff --git a/docs/_templates/layout.html b/docs/_templates/layout.html
new file mode 100644
index 0000000000..570aba8ba3
--- /dev/null
+++ b/docs/_templates/layout.html
@@ -0,0 +1,31 @@
+<!--
+# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+{% extends "!layout.html" %}
+{%- block footer %}
+<script type="text/javascript">_satellite.pageBottom();</script>
+{%- endblock %}
diff --git a/docs/architecture.rst b/docs/architecture.rst
deleted file mode 100644
index 75da5b0574..0000000000
--- a/docs/architecture.rst
+++ /dev/null
@@ -1,118 +0,0 @@
-..
-  # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-  #
-  # Redistribution and use in source and binary forms, with or without
-  # modification, are permitted provided that the following conditions
-  # are met:
-  #  * Redistributions of source code must retain the above copyright
-  #    notice, this list of conditions and the following disclaimer.
-  #  * Redistributions in binary form must reproduce the above copyright
-  #    notice, this list of conditions and the following disclaimer in the
-  #    documentation and/or other materials provided with the distribution.
-  #  * Neither the name of NVIDIA CORPORATION nor the names of its
-  #    contributors may be used to endorse or promote products derived
-  #    from this software without specific prior written permission.
-  #
-  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-  # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-  # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-  # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-  # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-  # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-  # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-  # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-  # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-Architecture
-============
-
-The following figure shows the TensorRT Inference Server high-level
-architecture. The :ref:`model repository <section-model-repository>`
-is a file-system based store of the models that TRTIS will make
-available for inferencing. Inference requests arrive at the server via
-either :ref:`HTTP or GRPC <section-inference-server-api>` and are then
-routed to the appropriate per-model scheduler queue. The scheduler
-performs fair scheduling and dynamic batching for each model’s
-requests. The schedule passes each request to the framework backend
-corresponding to the model type. The framework backend performs
-inferencing using the inputs provided in the request to produce the
-requested outputs. The outputs are then formatted and a response is
-sent.
-
-.. image:: images/arch.png
-
-.. _section-concurrent-model-execution:
-
-Concurrent Model Execution
---------------------------
-
-The TRTIS architecture allows multiple models and/or multiple
-instances of the same model to execute in parallel on a single
-GPU. The following figure shows an example with two models; model0 and
-model1. Assuming TRTIS is not currently processing any request, when
-two requests arrive simultaneously, one for each model, TRTIS
-immediately schedules both of them onto the GPU and the GPU’s hardware
-scheduler begins working on both computations in parallel.
-
-.. image:: images/multi_model_exec.png
-
-By default, if multiple requests for the same model arrive at the same
-time, TRTIS will serialize their execution by scheduling only one at a
-time on the GPU, as shown in the following figure.
-
-.. image:: images/multi_model_serial_exec.png
-
-The TensorRT inference server provides an :ref:`instance-group
-<section-instance-groups>` feature that allows each model to specify
-how many parallel executions of that model should be allowed. Each
-such enabled parallel execution is referred to as an *execution
-instance*. By default, TRTIS gives each model a single execution
-instance, which means that only a single execution of the model is
-allowed to be in progress at a time as shown in the above figure. By
-using instance-group the number of execution instances for a model can
-be increased. The following figure shows model execution when model1
-is configured to allow three execution instances. As shown in the
-figure, the first three model1 inference requests are immediately
-executed in parallel on the GPU. The fourth model1 inference request
-must wait until one of the first three executions completes before
-beginning.
-
-.. image:: images/multi_model_parallel_exec.png
-
-To provide the current model execution capabilities shown in the above
-figures, TRTIS uses `CUDA streams
-<https://devblogs.nvidia.com/gpu-pro-tip-cuda-7-streams-simplify-concurrency/>`_
-to exploit the GPU’s hardware scheduling capabilities. CUDA streams
-allow TRTIS to communicate independent sequences of memory-copy and
-kernel executions to the GPU. The hardware scheduler in the GPU takes
-advantage of the independent execution streams to fill the GPU with
-independent memory-copy and kernel executions. For example, using
-streams allows the GPU to execute a memory-copy for one model, a
-kernel for another model, and a different kernel for yet another model
-at the same time.
-
-The following figure shows some details of how this works within the
-TensorRT Inference Server. Each framework backend (TensorRT,
-TensorFlow, Caffe2) provides an API for creating an execution context
-that is used to execute a given model (each framework uses different
-terminology for this concept but here we refer to them generally as
-execution contexts). Each framework allows an execution context to be
-associated with a CUDA stream. This CUDA stream is used by the
-framework to execute all memory copies and kernels needed for the
-model associated with the execution context. For a given model, TRTIS
-creates one execution context for each execution instance specified
-for the model. When an inference request arrives for a given model,
-that request is queued in the model scheduler associated with that
-model. The model scheduler waits for any execution context associated
-with that model to be idle and then sends the queued request to the
-context. The execution context then issues all the memory copies and
-kernel executions required to execute the model to the CUDA stream
-associated with that execution context. The memory copies and kernels
-in each CUDA stream are independent of memory copies and kernels in
-other CUDA streams. The GPU hardware scheduler looks across all CUDA
-streams to find independent memory copies and kernels to execute on
-the GPU.
-
-.. image:: images/cuda_stream_exec.png
diff --git a/docs/build.rst b/docs/build.rst
deleted file mode 100644
index 9e9d7f1ce5..0000000000
--- a/docs/build.rst
+++ /dev/null
@@ -1,121 +0,0 @@
-..
-  # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-  #
-  # Redistribution and use in source and binary forms, with or without
-  # modification, are permitted provided that the following conditions
-  # are met:
-  #  * Redistributions of source code must retain the above copyright
-  #    notice, this list of conditions and the following disclaimer.
-  #  * Redistributions in binary form must reproduce the above copyright
-  #    notice, this list of conditions and the following disclaimer in the
-  #    documentation and/or other materials provided with the distribution.
-  #  * Neither the name of NVIDIA CORPORATION nor the names of its
-  #    contributors may be used to endorse or promote products derived
-  #    from this software without specific prior written permission.
-  #
-  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-  # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-  # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-  # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-  # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-  # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-  # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-  # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-  # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-Building
-========
-
-The TensorRT Inference Server is built using Docker and the TensorFlow
-and PyTorch containers from `NVIDIA GPU Cloud (NGC)
-<https://ngc.nvidia.com>`_. Before building you must install Docker
-and nvidia-docker and login to the NGC registry by following the
-instructions in :ref:`section-installing-prebuilt-containers`.
-
-.. _section-building-the-server:
-
-Building the Server
--------------------
-
-To build a release version of the TRTIS container, change directory to
-the root of the repo and issue the following command::
-
-  $ docker build --pull -t tensorrtserver .
-
-Incremental Builds
-^^^^^^^^^^^^^^^^^^
-
-For typical development you will want to run the *build* container
-with your local repo’s source files mounted so that your local changes
-can be incrementally built. This is done by first building the
-*tensorrtserver_build* container::
-
-  $ docker build --pull -t tensorrtserver_build --target trtserver_build .
-
-By mounting /path/to/tensorrtserver/src into the container at
-/workspace/src, changes to your local repo will be reflected in the
-container::
-
-  $ nvidia-docker run -it --rm -v/path/to/tensorrtserver/src:/workspace/src tensorrtserver_build
-
-Within the container you can perform an incremental server build
-with::
-
-  # cd /workspace
-  # bazel build -c opt --config=cuda src/servers/trtserver
-  # cp /workspace/bazel-bin/src/servers/trtserver /opt/tensorrtserver/bin/trtserver
-
-Similarly, within the container you can perform an incremental build
-of the C++ and Python client libraries and example executables with::
-
-  # cd /workspace
-  # bazel build -c opt --config=cuda src/clients/…
-  # mkdir -p /opt/tensorrtserver/bin
-  # cp bazel-bin/src/clients/c++/image_client /opt/tensorrtserver/bin/.
-  # cp bazel-bin/src/clients/c++/perf_client /opt/tensorrtserver/bin/.
-  # cp bazel-bin/src/clients/c++/simple_client /opt/tensorrtserver/bin/.
-  # mkdir -p /opt/tensorrtserver/lib
-  # cp bazel-bin/src/clients/c++/librequest.so /opt/tensorrtserver/lib/.
-  # cp bazel-bin/src/clients/c++/librequest.a /opt/tensorrtserver/lib/.
-  # mkdir -p /opt/tensorrtserver/pip
-  # bazel-bin/src/clients/python/build_pip /opt/tensorrtserver/pip/.
-
-Some source changes seem to cause bazel to get confused and not
-correctly rebuild all required sources. You can force bazel to rebuild
-all of the TRTIS source without requiring a complete rebuild of the
-TensorFlow and Caffe2 components by doing the following before issuing
-the above build command::
-
-  # rm -fr bazel-bin/src
-
-.. include:: client.rst
-   :start-after: build-client-begin-marker-do-not-remove
-   :end-before: build-client-end-marker-do-not-remove
-
-Building the Documentation
---------------------------
-
-The TRTIS documentation is found in the docs/ directory and is based
-on `Sphinx <http://www.sphinx-doc.org>`_. `Doxygen
-<http://www.doxygen.org/>`_ integrated with `Exhale
-<https://github.com/svenevs/exhale>`_ is used for C++ API
-docuementation.
-
-To build the docs install the required dependencies::
-
-  $ apt-get update
-  $ apt-get install -y --no-install-recommends doxygen
-  $ pip install --upgrade sphinx sphinx-rtd-theme nbsphinx exhale
-
-To get the Python client library API docs the TensorRT Inference
-Server Python package must be installed::
-
-  $ pip install --upgrade tensorrtserver-*.whl
-
-Then use Sphinx to build the documentation into the build/html
-directory::
-
-  $ cd docs
-  $ make clean html
diff --git a/docs/client.rst b/docs/client.rst
deleted file mode 100644
index 0335b32cbd..0000000000
--- a/docs/client.rst
+++ /dev/null
@@ -1,383 +0,0 @@
-..
-  # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-  #
-  # Redistribution and use in source and binary forms, with or without
-  # modification, are permitted provided that the following conditions
-  # are met:
-  #  * Redistributions of source code must retain the above copyright
-  #    notice, this list of conditions and the following disclaimer.
-  #  * Redistributions in binary form must reproduce the above copyright
-  #    notice, this list of conditions and the following disclaimer in the
-  #    documentation and/or other materials provided with the distribution.
-  #  * Neither the name of NVIDIA CORPORATION nor the names of its
-  #    contributors may be used to endorse or promote products derived
-  #    from this software without specific prior written permission.
-  #
-  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-  # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-  # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-  # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-  # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-  # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-  # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-  # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-  # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-.. _section-client-libraries-and-examples:
-
-Client Libraries and Examples
-=============================
-
-The TRTIS *client libraries* make it easy to communicate with the
-TensorRT Inference Server from you C++ or Python application. Using
-these libraries you can send either HTTP or GRPC requests to TRTIS to
-check server status or health and to make inference requests.
-
-A couple of example applications show how to use the client libraries
-to perform image classification and to test performance:
-
-* C++ and Python versions of *image\_client*, an example application
-  that uses the C++ or Python client library to execute image
-  classification models on the TensorRT Inference Server.
-
-* Python version of *grpc\_image\_client*, an example application that
-  is functionally equivalent to *image\_client* but that uses GRPC
-  generated client code to communicate with TRTIS (instead of the
-  client library).
-
-* C++ version of *perf\_client*, an example application that issues a
-  large number of concurrent requests to TRTIS to measure latency and
-  throughput for a given model. You can use this to experiment with
-  different model configuration settings for your models.
-
-.. build-client-begin-marker-do-not-remove
-
-.. _section-building-the-client-libraries-and-examples:
-
-Building the Client Libraries and Examples
-------------------------------------------
-
-The provided Dockerfile can be used to build just the client libraries
-and examples. Issue the following command to build the C++ client
-library, C++ and Python examples, and a Python wheel file for the
-Python client library::
-
-  $ docker build -t tensorrtserver_clients --target trtserver_build --build-arg "PYVER=<ver>" --build-arg "BUILD_CLIENTS_ONLY=1" .
-
-The -\\-build-arg setting PYVER is optional and can be used to set the
-Python version that you want the Python client library built for (the
-default is 3.5).
-
-After the build completes, the easiest way to extract the built
-libraries and examples from the docker image is to mount a host
-directory and then copy them out from within the container::
-
-  $ docker run -it --rm -v/tmp:/tmp/host tensorrtserver_clients
-  # cp /opt/tensorrtserver/bin/image_client /tmp/host/.
-  # cp /opt/tensorrtserver/bin/perf_client /tmp/host/.
-  # cp /opt/tensorrtserver/bin/simple_client /tmp/host/.
-  # cp /opt/tensorrtserver/pip/tensorrtserver-*.whl /tmp/host/.
-  # cp /opt/tensorrtserver/lib/librequest.* /tmp/host/.
-
-You can now access the files from /tmp on the host system. To run the
-C++ examples you must install some dependencies on your host system::
-
-  $ apt-get install curl libcurl3-dev libopencv-dev libopencv-core-dev python-pil
-
-To run the Python examples you will need to additionally install the
-client whl file and some other dependencies::
-
-  $ apt-get install python3 python3-pip
-  $ pip3 install --user --upgrade tensorrtserver-*.whl pillow
-
-.. build-client-end-marker-do-not-remove
-
-.. _section-image_classification_example:
-
-Image Classification Example Application
-----------------------------------------
-
-The image classification example that uses the C++ client API is
-available at `src/clients/c++/image\_client.cc
-<https://github.com/NVIDIA/tensorrt-inference-server/blob/master/src/clients/c%2B%2B/image_client.cc>`_. The
-Python version of the image classification client is available at
-`src/clients/python/image\_client.py
-<https://github.com/NVIDIA/tensorrt-inference-server/blob/master/src/clients/python/image_client.py>`_.
-
-To use image\_client (or image\_client.py) you must first have a
-running TRTIS that is serving one or more image classification
-models. The image\_client application requires that the model have a
-single image input and produce a single classification output. If you
-don't have a model repository with image classification models see
-:ref:`section-example-model-repository` for instructions on how to
-create one.
-
-Follow the instructions in :ref:`section-running-the-inference-server`
-to launch TRTIS using the model repository. Once the server is running
-you can use the image\_client application to send inference requests
-to the server. You can specify a single image or a directory holding
-images. Here we send a request for the resnet50_netdef model from the
-:ref:`example model repository <section-example-model-repository>` for
-an image from the `qa/images
-<https://github.com/NVIDIA/tensorrt-inference-server/tree/master/qa/images>`_
-directory::
-
-  $ image_client -m resnet50_netdef -s INCEPTION qa/images/mug.jpg
-  Request 0, batch size 1
-  Image '../qa/images/mug.jpg':
-      504 (COFFEE MUG) = 0.723991
-
-The Python version of the application accepts the same command-line
-arguments::
-
-  $ src/clients/python/image_client.py -m resnet50_netdef -s INCEPTION qa/images/mug.jpg
-  Request 0, batch size 1
-  Image '../qa/images/mug.jpg':
-      504 (COFFEE MUG) = 0.778078556061
-
-The image\_client and image\_client.py applications use the TRTIS
-client library to talk to the server. By default image\_client
-instructs the client library to use HTTP protocol to talk to TRTIS,
-but you can use GRPC protocol by providing the \-i flag. You must also
-use the \-u flag to point at the GRPC endpoint on TRTIS::
-
-  $ image_client -i grpc -u localhost:8001 -m resnet50_netdef -s INCEPTION qa/images/mug.jpg
-  Request 0, batch size 1
-  Image '../qa/images/mug.jpg':
-      504 (COFFEE MUG) = 0.723991
-
-By default the client prints the most probable classification for the
-image. Use the \-c flag to see more classifications::
-
-  $ image_client -m resnet50_netdef -s INCEPTION -c 3 qa/images/mug.jpg
-  Request 0, batch size 1
-  Image '../qa/images/mug.jpg':
-      504 (COFFEE MUG) = 0.723991
-      968 (CUP) = 0.270953
-      967 (ESPRESSO) = 0.00115996
-
-The \-b flag allows you to send a batch of images for inferencing.
-The image\_client application will form the batch from the image or
-images that you specified. If the batch is bigger than the number of
-images then image\_client will just repeat the images to fill the
-batch::
-
-  $ image_client -m resnet50_netdef -s INCEPTION -c 3 -b 2 qa/images/mug.jpg
-  Request 0, batch size 2
-  Image '../qa/images/mug.jpg':
-      504 (COFFEE MUG) = 0.778078556061
-      968 (CUP) = 0.213262036443
-      967 (ESPRESSO) = 0.00293014757335
-  Image '../qa/images/mug.jpg':
-      504 (COFFEE MUG) = 0.778078556061
-      968 (CUP) = 0.213262036443
-      967 (ESPRESSO) = 0.00293014757335
-
-Provide a directory instead of a single image to perform inferencing
-on all images in the directory::
-
-  $ image_client -m resnet50_netdef -s INCEPTION -c 3 -b 2 qa/images
-  Request 0, batch size 2
-  Image '../qa/images/car.jpg':
-      817 (SPORTS CAR) = 0.836187
-      511 (CONVERTIBLE) = 0.0708251
-      751 (RACER) = 0.0597549
-  Image '../qa/images/mug.jpg':
-      504 (COFFEE MUG) = 0.723991
-      968 (CUP) = 0.270953
-      967 (ESPRESSO) = 0.00115996
-  Request 1, batch size 2
-  Image '../qa/images/vulture.jpeg':
-      23 (VULTURE) = 0.992326
-      8 (HEN) = 0.00231854
-      84 (PEACOCK) = 0.00201471
-  Image '../qa/images/car.jpg':
-      817 (SPORTS CAR) = 0.836187
-      511 (CONVERTIBLE) = 0.0708251
-      751 (RACER) = 0.0597549
-
-The grpc\_image\_client.py application at available at
-`src/clients/python/grpc\_image\_client.py
-<https://github.com/NVIDIA/tensorrt-inference-server/blob/master/src/clients/python/grpc_image_client.py>`_
-behaves the same as the image\_client except that instead of using the
-TRTIS client library it uses the GRPC generated client library to
-communicate with TRTIS.
-
-Performance Example Application
--------------------------------
-
-The perf\_client example application located at
-`src/clients/c++/perf\_client.cc
-<https://github.com/NVIDIA/tensorrt-inference-server/blob/master/src/clients/c%2B%2B/perf_client.cc>`_
-uses the C++ client API to send concurrent requests to TRTIS to
-measure latency and inferences per second under varying client loads.
-
-To use perf\_client you must first have a running TRTIS that is
-serving one or more models. The perf\_client application works with
-any type of model by sending random data for all input tensors and by
-reading and ignoring all output tensors. If you don't have a model
-repository see :ref:`section-example-model-repository` for
-instructions on how to create one.
-
-Follow the instructions in :ref:`section-running-the-inference-server`
-to launch TRTIS using the model repository.
-
-The perf\_client application has two major modes. In the first mode
-you specify how many concurrent clients you want to simulate and
-perf\_client finds a stable latency and inferences/second for that
-level of concurrency. Use the \-t flag to control concurrency and \-v
-to see verbose output. The following example simulates four clients
-continuously sending requests to TRTIS::
-
-  $ perf_client -m resnet50_netdef -p3000 -t4 -v
-  *** Measurement Settings ***
-    Batch size: 1
-    Measurement window: 3000 msec
-
-  Request concurrency: 4
-    Pass [1] throughput: 207 infer/sec. Avg latency: 19268 usec (std 910 usec)
-    Pass [2] throughput: 206 infer/sec. Avg latency: 19362 usec (std 941 usec)
-    Pass [3] throughput: 208 infer/sec. Avg latency: 19252 usec (std 841 usec)
-    Client:
-      Request count: 624
-      Throughput: 208 infer/sec
-      Avg latency: 19252 usec (standard deviation 841 usec)
-      Avg HTTP time: 19224 usec (send 714 usec + response wait 18486 usec + receive 24 usec)
-    Server:
-      Request count: 749
-      Avg request latency: 17886 usec (overhead 55 usec + queue 26 usec + compute 17805 usec)
-
-In the second mode perf\_client will generate an inferences/second
-vs. latency curve by increasing concurrency until a specific latency
-limit or concurrency limit is reached. This mode is enabled by using
-the \-d option and \-l to specify the latency limit and optionally the
-\-c to specify a maximum concurrency limit::
-
-  $ perf_client -m resnet50_netdef -p3000 -d -l50 -c 3
-  *** Measurement Settings ***
-    Batch size: 1
-    Measurement window: 3000 msec
-    Latency limit: 50 msec
-    Concurrency limit: 3 concurrent requests
-
-  Request concurrency: 1
-    Client:
-      Request count: 327
-      Throughput: 109 infer/sec
-      Avg latency: 9191 usec (standard deviation 822 usec)
-      Avg HTTP time: 9188 usec (send/recv 1007 usec + response wait 8181 usec)
-    Server:
-      Request count: 391
-      Avg request latency: 7661 usec (overhead 90 usec + queue 68 usec + compute 7503 usec)
-
-  Request concurrency: 2
-    Client:
-      Request count: 521
-      Throughput: 173 infer/sec
-      Avg latency: 11523 usec (standard deviation 616 usec)
-      Avg HTTP time: 11448 usec (send/recv 711 usec + response wait 10737 usec)
-    Server:
-      Request count: 629
-      Avg request latency: 10018 usec (overhead 70 usec + queue 41 usec + compute 9907 usec)
-
-  Request concurrency: 3
-    Client:
-      Request count: 580
-      Throughput: 193 infer/sec
-      Avg latency: 15518 usec (standard deviation 635 usec)
-      Avg HTTP time: 15487 usec (send/recv 779 usec + response wait 14708 usec)
-    Server:
-      Request count: 697
-      Avg request latency: 14083 usec (overhead 59 usec + queue 30 usec + compute 13994 usec)
-
-  Inferences/Second vs. Client Average Batch Latency
-  Concurrency: 1, 109 infer/sec, latency 9191 usec
-  Concurrency: 2, 173 infer/sec, latency 11523 usec
-  Concurrency: 3, 193 infer/sec, latency 15518 usec
-
-Use the \-f flag to generate a file containing CSV output of the
-results::
-
-  $ perf_client -m resnet50_netdef -p3000 -d -l50 -c 3 -f perf.csv
-
-You can then import the CSV file into a spreadsheet to help visualize
-the latency vs inferences/second tradeoff as well as see some
-components of the latency. Follow these steps:
-
-- Open `this spreadsheet <https://docs.google.com/spreadsheets/d/1zszgmbSNHHXy0DVEU_4lrL4Md-6dUKwy_mLVmcseUrE>`_
-- Make a copy from the File menu "Make a copy..."
-- Open the copy
-- Select the A2 cell
-- From the File menu select "Import..."
-- Select "Upload" and upload the file
-- Select "Replace data at selected cell" and then select the "Import data" button
-
-.. _section-client-api:
-
-Client API
-----------
-
-The C++ client API exposes a class-based interface for querying server
-and model status and for performing inference. The commented interface
-is available at `src/clients/c++/request.h
-<https://github.com/NVIDIA/tensorrt-inference-server/blob/master/src/clients/c%2B%2B/request.h>`_
-and in the API Reference.
-
-The Python client API provides similar capabilities as the C++
-API. The commented interface is available at
-`src/clients/python/\_\_init\_\_.py
-<https://github.com/NVIDIA/tensorrt-inference-server/blob/master/src/clients/python/__init__.py>`_
-and in the API Reference.
-
-A very simple C++ example application at
-`src/clients/c++/simple\_client.cc
-<https://github.com/NVIDIA/tensorrt-inference-server/blob/master/src/clients/c%2B%2B/simple_client.cc>`_
-and a Python version at `src/clients/python/simple\_client.py
-<https://github.com/NVIDIA/tensorrt-inference-server/blob/master/src/clients/python/simple_client.py>`_
-demonstrate basic client API usage.
-
-To run the the C++ version of the simple example, first build as
-described in :ref:`section-building-the-client-libraries-and-examples`
-and then::
-
-  $ simple_client
-  0 + 1 = 1
-  0 - 1 = -1
-  1 + 1 = 2
-  1 - 1 = 0
-  2 + 1 = 3
-  2 - 1 = 1
-  3 + 1 = 4
-  3 - 1 = 2
-  4 + 1 = 5
-  4 - 1 = 3
-  5 + 1 = 6
-  5 - 1 = 4
-  6 + 1 = 7
-  6 - 1 = 5
-  7 + 1 = 8
-  7 - 1 = 6
-  8 + 1 = 9
-  8 - 1 = 7
-  9 + 1 = 10
-  9 - 1 = 8
-  10 + 1 = 11
-  10 - 1 = 9
-  11 + 1 = 12
-  11 - 1 = 10
-  12 + 1 = 13
-  12 - 1 = 11
-  13 + 1 = 14
-  13 - 1 = 12
-  14 + 1 = 15
-  14 - 1 = 13
-  15 + 1 = 16
-  15 - 1 = 14
-
-To run the the Python version of the simple example, first build as
-described in :ref:`section-building-the-client-libraries-and-examples`
-and install the tensorrtserver whl, then::
-
-  $ python src/clients/python/simple_client.py
diff --git a/docs/conf.py b/docs/conf.py
old mode 100644
new mode 100755
index 917cb7ea71..564ff3e1af
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -1,4 +1,6 @@
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+#!/usr/bin/env python3
+
+# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -24,13 +26,11 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-# -*- coding: utf-8 -*-
-#
 # Configuration file for the Sphinx documentation builder.
 #
-# This file does only contain a selection of the most common options. For a
-# full list see the documentation:
-# http://www.sphinx-doc.org/en/master/config
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
 
 # -- Path setup --------------------------------------------------------------
 
@@ -38,223 +38,242 @@
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #
-# import os
-# import sys
-# sys.path.insert(0, os.path.abspath('..'))
-from builtins import str
 import os
-import re
-import sphinx_rtd_theme
-import subprocess
-import textwrap
-
-# -- Project information -----------------------------------------------------
-
-project = u'NVIDIA TensorRT Inference Server'
-copyright = u'2018, NVIDIA Corporation'
-author = u'NVIDIA Corporation'
-
-version_long = u'0.0.0'
-with open("../VERSION") as f:
-    version_long = f.readline()
 
-version_short = re.match('^[\d]+\.[\d]+', version_long).group(0)
+from docutils import nodes
+from sphinx import search
 
-git_sha = os.getenv("GIT_SHA")
+# import sys
+# sys.path.insert(0, os.path.abspath('.'))
 
-if not git_sha:
-    try:
-        git_sha = subprocess.check_output(["git", "log", "--pretty=format:'%h'", "-n1"]).decode('ascii').replace("'","").strip()
-    except:
-        git_sha = u'0000000'
+# -- Project information -----------------------------------------------------
 
-git_sha = git_sha[:7] if len(git_sha) > 7 else git_sha
+project = "NVIDIA Triton Inference Server"
+copyright = "2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved"
+author = "NVIDIA"
 
-version = str(version_long + u"-" + git_sha)
 # The full version, including alpha/beta/rc tags
-release = str(version_long)
+# Env only set during riva-release process, otherwise keep as dev for all internal builds
+release = os.getenv("TRITON_VERSION", "dev")
 
-# hack: version is used for html creation, so put the version picker
-# link here as well:
-version = version + """<br/>
-Version select: <select onChange="window.location.href = this.value" onFocus="this.selectedIndex = -1">
-    <option value="https://docs.nvidia.com/deeplearning/sdk/tensorrt-inference-server-guide/docs/index.html">Current release</option>
-    <option value="https://docs.nvidia.com/deeplearning/sdk/tensorrt-inference-server-master-branch-guide/docs/index.html">master (unstable)</option>
-    <option value="https://docs.nvidia.com/deeplearning/sdk/inference-server-archived/index.html">Older releases</option>
-</select>"""
+# maintain left-side bar toctrees in `contents` file
+# so it doesn't show up needlessly in the index page
+master_doc = "contents"
 
 # -- General configuration ---------------------------------------------------
 
-# If your documentation needs a minimal Sphinx version, state it here.
-#
-# needs_sphinx = '1.0'
-
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = [
-    'sphinx.ext.autodoc',
-    'sphinx.ext.mathjax',
-    'sphinx.ext.napoleon',
-    'sphinx.ext.ifconfig',
-    'sphinx.ext.extlinks',
-    'nbsphinx',
-    'breathe',
-    'exhale'
+    "ablog",
+    "myst_parser",
+    "sphinx_copybutton",
+    "sphinx_design",
+    "sphinx-prompt",
+    # "sphinxcontrib.bibtex",
+    "sphinx_tabs.tabs",
+    "sphinx_sitemap",
+    "sphinx.ext.autodoc",
+    "sphinx.ext.autosummary",
+    "sphinx.ext.mathjax",
+    "sphinx.ext.napoleon",
+    "sphinx.ext.ifconfig",
+    "sphinx.ext.extlinks",
 ]
 
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ['templates']
-
-# The suffix(es) of source filenames.
-# You can specify multiple suffix as a list of string:
-#
-# source_suffix = ['.rst', '.md']
-source_suffix = '.rst'
-
-# The master toctree document.
-master_doc = 'index'
-
-# The language for content autogenerated by Sphinx. Refer to documentation
-# for a list of supported languages.
-#
-# This is also used if you do content translation via gettext catalogs.
-# Usually you set "language" from the command line for these cases.
-language = None
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-# This pattern also affects html_static_path and html_extra_path .
-exclude_patterns = [u'build', 'Thumbs.db', '.DS_Store', '**.ipynb_checkpoints']
+suppress_warnings = ["myst.domains", "ref.ref", "myst.header"]
 
-# The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
+source_suffix = [".rst", ".md"]
 
-# Setup the breathe extension
-breathe_projects = {
-    "BreatheTRTIS": "./doxyoutput/xml"
-}
-breathe_default_project = "BreatheTRTIS"
-
-# Setup the exhale extension
-exhale_args = {
-    # These arguments are required
-    "containmentFolder":     "./cpp_api",
-    "rootFileName":          "cpp_api_root.rst",
-    "rootFileTitle":         "C++ API",
-    "doxygenStripFromPath":  "..",
-    # Suggested optional arguments
-    "createTreeView":        True,
-    # TIP: if using the sphinx-bootstrap-theme, you need
-    # "treeViewIsBootstrap": True,
-    "exhaleExecutesDoxygen": True,
-    "exhaleDoxygenStdin": textwrap.dedent('''
-        JAVADOC_AUTOBRIEF = YES
-        INPUT = ../src/clients/c++/request.h
-    ''')
+autodoc_default_options = {
+    "members": True,
+    "undoc-members": True,
+    "private-members": True,
 }
 
-# Tell sphinx what the primary language being documented is.
-#primary_domain = 'cpp'
+autosummary_generate = True
+autosummary_mock_imports = [
+    "tritonclient.grpc.model_config_pb2",
+    "tritonclient.grpc.service_pb2",
+    "tritonclient.grpc.service_pb2_grpc",
+]
 
-# Tell sphinx what the pygments highlight language should be.
-highlight_language = 'text'
+napoleon_include_special_with_doc = True
+
+numfig = True
+
+# final location of docs for seo/sitemap
+html_baseurl = (
+    "https://docs.nvidia.com/deeplearning/triton-inference-server/user-guide/docs/"
+)
+
+myst_enable_extensions = [
+    "dollarmath",
+    "amsmath",
+    "deflist",
+    # "html_admonition",
+    "html_image",
+    "colon_fence",
+    # "smartquotes",
+    "replacements",
+    # "linkify",
+    "substitution",
+]
+myst_heading_anchors = 5
 
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ["_templates"]
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = ["README.md", "examples/README.md", "user_guide/perf_analyzer.md"]
 
 # -- Options for HTML output -------------------------------------------------
 
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 #
-html_theme = 'sphinx_rtd_theme'
-html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
-
-# Theme options are theme-specific and customize the look and feel of a theme
-# further.  For a list of options available for each theme, see the
-# documentation.
-#
-html_theme_options = {
-    'canonical_url': 'https://docs.nvidia.com/deeplearning/sdk/tensorrt-inference-server-guide/docs/index.html',
-    'collapse_navigation': False,
-    'display_version': True,
-    'logo_only': False,
-}
+html_theme = "sphinx_book_theme"
+html_logo = "_static/nvidia-logo-horiz-rgb-blk-for-screen.png"
+html_title = "NVIDIA Triton Inference Server"
+html_short_title = "Triton"
+html_copy_source = True
+html_sourcelink_suffix = ""
+html_favicon = "_static/nvidia-logo-vert-rgb-blk-for-screen.png"
+html_last_updated_fmt = ""
+html_additional_files = ["index.html"]
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-#html_static_path = ['_static']
-
-# Custom sidebar templates, must be a dictionary that maps document names
-# to template names.
-#
-# The default sidebars (for documents that don't match any pattern) are
-# defined by theme itself.  Builtin themes are using these templates by
-# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
-# 'searchbox.html']``.
-#
-# html_sidebars = {}
-
-
-# -- Options for HTMLHelp output ---------------------------------------------
-
-# Output file base name for HTML help builder.
-htmlhelp_basename = 'NVIDIATRTISdoc'
-
-
-# -- Options for LaTeX output ------------------------------------------------
+html_static_path = ["_static"]
+html_css_files = ["custom.css"]
 
-latex_elements = {
-    # The paper size ('letterpaper' or 'a4paper').
-    #
-    # 'papersize': 'letterpaper',
-
-    # The font size ('10pt', '11pt' or '12pt').
-    #
-    # 'pointsize': '10pt',
-
-    # Additional stuff for the LaTeX preamble.
-    #
-    # 'preamble': '',
-
-    # Latex figure (float) alignment
-    #
-    # 'figure_align': 'htbp',
+html_theme_options = {
+    "path_to_docs": "docs",
+    # "launch_buttons": {
+    #     "binderhub_url": "https://mybinder.org",
+    #     "colab_url": "https://colab.research.google.com/",
+    #     "deepnote_url": "https://deepnote.com/",
+    #     "notebook_interface": "jupyterlab",
+    #     "thebe": True,
+    #     # "jupyterhub_url": "https://datahub.berkeley.edu",  # For testing
+    # },
+    "use_edit_page_button": False,
+    "use_issues_button": True,
+    "use_repository_button": True,
+    "use_download_button": False,
+    "logo_only": False,
+    "show_toc_level": 2,
+    "extra_navbar": "",
+    "extra_footer": "",
+    "repository_url": "https://github.com/triton-inference-server/server",
+    "use_repository_button": True,
 }
 
-# Grouping the document tree into LaTeX files. List of tuples
-# (source start file, target name, title,
-#  author, documentclass [howto, manual, or own class]).
-latex_documents = [
-    (master_doc, 'NVIDIATRTIS.tex', u'NVIDIA TensorRT Inference Server Documentation',
-     u'NVIDIA Corporation', 'manual'),
-]
-
-
-# -- Options for manual page output ------------------------------------------
-
-# One entry per manual page. List of tuples
-# (source start file, name, description, authors, manual section).
-man_pages = [
-    (master_doc, 'nvidiatrtis', u'NVIDIA TensorRT Inference Server Documentation',
-     [author], 1)
-]
+version_short = release
+deploy_ngc_org = "nvidia"
+deploy_ngc_team = "triton"
+myst_substitutions = {
+    "VersionNum": version_short,
+    "deploy_ngc_org_team": f"{deploy_ngc_org}/{deploy_ngc_team}"
+    if deploy_ngc_team
+    else deploy_ngc_org,
+}
 
 
-# -- Options for Texinfo output ----------------------------------------------
+def ultimateReplace(app, docname, source):
+    result = source[0]
+    for key in app.config.ultimate_replacements:
+        result = result.replace(key, app.config.ultimate_replacements[key])
+    source[0] = result
 
-# Grouping the document tree into Texinfo files. List of tuples
-# (source start file, target name, title, author,
-#  dir menu entry, description, category)
-texinfo_documents = [
-    (master_doc, 'NVIDIATRTIS', u'NVIDIA TensorRT Inference Server Documentation',
-     author, 'NVIDIATRTIS', 'One line description of project.',
-     'Miscellaneous'),
-]
 
+# this is a necessary hack to allow us to fill in variables that exist in code blocks
+ultimate_replacements = {
+    "{VersionNum}": version_short,
+    "{SamplesVersionNum}": version_short,
+    "{NgcOrgTeam}": f"{deploy_ngc_org}/{deploy_ngc_team}"
+    if deploy_ngc_team
+    else deploy_ngc_org,
+}
 
-# -- Extension configuration -------------------------------------------------
-extlinks = {'issue': ('https://github.com/NVIDIA/tensorrt-inference-server/issues/%s',
-                      'issue '),
-            'fileref': ('https://github.com/NVIDIA/tensorrt-inference-server/tree/' +
-                        (git_sha if git_sha != u'0000000' else "master") + '/%s', ''),}
+# bibtex_bibfiles = ["references.bib"]
+# To test that style looks good with common bibtex config
+# bibtex_reference_style = "author_year"
+# bibtex_default_style = "plain"
+
+### We currently use Myst: https://myst-nb.readthedocs.io/en/latest/use/execute.html
+nb_execution_mode = "off"  # Global execution disable
+# execution_excludepatterns = ['tutorials/tts-python-basics.ipynb']  # Individual notebook disable
+
+
+def setup(app):
+    app.add_config_value("ultimate_replacements", {}, True)
+    app.connect("source-read", ultimateReplace)
+    app.add_js_file("https://js.hcaptcha.com/1/api.js")
+
+    visitor_script = (
+        "//assets.adobedtm.com/5d4962a43b79/c1061d2c5e7b/launch-191c2462b890.min.js"
+    )
+
+    if visitor_script:
+        app.add_js_file(visitor_script)
+
+    # if not os.environ.get("READTHEDOCS") and not os.environ.get("GITHUB_ACTIONS"):
+    #     app.add_css_file(
+    #         "https://assets.readthedocs.org/static/css/readthedocs-doc-embed.css"
+    #     )
+    #     app.add_css_file("https://assets.readthedocs.org/static/css/badge_only.css")
+
+    #     # Create the dummy data file so we can link it
+    #     # ref: https://github.com/readthedocs/readthedocs.org/blob/bc3e147770e5740314a8e8c33fec5d111c850498/readthedocs/core/static-src/core/js/doc-embed/footer.js  # noqa: E501
+    #     app.add_js_file("rtd-data.js")
+    #     app.add_js_file(
+    #         "https://assets.readthedocs.org/static/javascript/readthedocs-doc-embed.js",
+    #         priority=501,
+    #     )
+
+
+# Patch for sphinx.search stemming short terms (i.e. tts -> tt)
+# https://github.com/sphinx-doc/sphinx/blob/4.5.x/sphinx/search/__init__.py#L380
+def sphinxSearchIndexFeed(
+    self, docname: str, filename: str, title: str, doctree: nodes.document
+):
+    """Feed a doctree to the index."""
+    self._titles[docname] = title
+    self._filenames[docname] = filename
+
+    visitor = search.WordCollector(doctree, self.lang)
+    doctree.walk(visitor)
+
+    # memoize self.lang.stem
+    def stem(word: str) -> str:
+        try:
+            return self._stem_cache[word]
+        except KeyError:
+            self._stem_cache[word] = self.lang.stem(word).lower()
+            return self._stem_cache[word]
+
+    _filter = self.lang.word_filter
+
+    for word in visitor.found_title_words:
+        stemmed_word = stem(word)
+        if len(stemmed_word) > 3 and _filter(stemmed_word):
+            self._title_mapping.setdefault(stemmed_word, set()).add(docname)
+        elif _filter(word):  # stemmer must not remove words from search index
+            self._title_mapping.setdefault(word.lower(), set()).add(docname)
+
+    for word in visitor.found_words:
+        stemmed_word = stem(word)
+        # again, stemmer must not remove words from search index
+        if len(stemmed_word) <= 3 or not _filter(stemmed_word) and _filter(word):
+            stemmed_word = word.lower()
+        already_indexed = docname in self._title_mapping.get(stemmed_word, set())
+        if _filter(stemmed_word) and not already_indexed:
+            self._mapping.setdefault(stemmed_word, set()).add(docname)
+
+
+search.IndexBuilder.feed = sphinxSearchIndexFeed
diff --git a/docs/contents.md b/docs/contents.md
new file mode 100644
index 0000000000..cf5653340d
--- /dev/null
+++ b/docs/contents.md
@@ -0,0 +1,149 @@
+<!--
+# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+```{toctree}
+:maxdepth: 1
+:caption: Getting Started
+
+getting_started/quickstart
+```
+
+```{toctree}
+:maxdepth: 1
+:caption: User Guide
+
+user_guide/performance_tuning
+user_guide/architecture
+user_guide/model_repository
+customization_guide/repository_agents
+user_guide/model_configuration
+user_guide/request_cancellation
+user_guide/optimization
+user_guide/ragged_batching
+user_guide/rate_limiter
+user_guide/model_analyzer
+user_guide/model_management
+user_guide/custom_operations
+user_guide/decoupled_models
+user_guide/response_cache
+user_guide/metrics
+user_guide/trace
+user_guide/jetson
+user_guide/v1_to_v2
+customization_guide/deploy
+```
+
+```{toctree}
+:maxdepth: 1
+:caption: Debugging
+
+user_guide/debugging_guide
+user_guide/faq
+```
+
+```{toctree}
+:maxdepth: 1
+:caption: Protocol Guides
+
+protocol/README
+customization_guide/inference_protocols
+protocol/extension_binary_data
+protocol/extension_classification
+protocol/extension_generate
+protocol/extension_logging
+protocol/extension_model_configuration
+protocol/extension_model_repository
+protocol/extension_schedule_policy
+protocol/extension_sequence
+protocol/extension_shared_memory
+protocol/extension_statistics
+protocol/extension_trace
+protocol/extension_parameters
+```
+
+```{toctree}
+:maxdepth: 1
+:caption: Customization Guide
+
+customization_guide/build
+customization_guide/compose
+customization_guide/test
+```
+
+```{toctree}
+:maxdepth: 1
+:caption: Examples
+
+examples/jetson/README
+examples/jetson/concurrency_and_dynamic_batching/README
+```
+
+```{toctree}
+:maxdepth: 1
+:caption: Client
+
+client/README
+_reference/tritonclient_api.rst
+client/src/java/README
+client/src/grpc_generated/go/README
+client/src/grpc_generated/javascript/README
+client/src/grpc_generated/java/README
+```
+
+```{toctree}
+:maxdepth: 1
+:caption: Performance Analyzer
+
+client/src/c++/perf_analyzer/README
+client/src/c++/perf_analyzer/docs/README
+client/src/c++/perf_analyzer/docs/install
+client/src/c++/perf_analyzer/docs/quick_start
+client/src/c++/perf_analyzer/docs/cli
+client/src/c++/perf_analyzer/docs/inference_load_modes
+client/src/c++/perf_analyzer/docs/input_data
+client/src/c++/perf_analyzer/docs/measurements_metrics
+client/src/c++/perf_analyzer/docs/benchmarking
+client/src/c++/perf_analyzer/genai-perf/README
+client/src/c++/perf_analyzer/genai-perf/examples/tutorial
+```
+
+```{toctree}
+:maxdepth: 1
+:caption: Python Backend
+
+python_backend/README
+python_backend/inferentia/README
+python_backend/examples/auto_complete/README
+python_backend/examples/bls/README
+python_backend/examples/bls_decoupled/README
+python_backend/examples/custom_metrics/README
+python_backend/examples/decoupled/README
+python_backend/examples/instance_kind/README
+python_backend/examples/jax/README
+python_backend/examples/preprocessing/README
+```
diff --git a/docs/contribute.rst b/docs/contribute.rst
deleted file mode 100644
index edc1bc0e3a..0000000000
--- a/docs/contribute.rst
+++ /dev/null
@@ -1,45 +0,0 @@
-..
-  # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-  #
-  # Redistribution and use in source and binary forms, with or without
-  # modification, are permitted provided that the following conditions
-  # are met:
-  #  * Redistributions of source code must retain the above copyright
-  #    notice, this list of conditions and the following disclaimer.
-  #  * Redistributions in binary form must reproduce the above copyright
-  #    notice, this list of conditions and the following disclaimer in the
-  #    documentation and/or other materials provided with the distribution.
-  #  * Neither the name of NVIDIA CORPORATION nor the names of its
-  #    contributors may be used to endorse or promote products derived
-  #    from this software without specific prior written permission.
-  #
-  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-  # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-  # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-  # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-  # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-  # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-  # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-  # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-  # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-Contributing
-============
-
-Contributions to TensorRT Inference Server are more than welcome. To
-contribute make a pull request and follow the guidelines outlined in
-the `CONTRIBUTING
-<https://github.com/NVIDIA/tensorrt-inference-server/blob/master/CONTRIBUTING.md>`_
-document.
-
-Coding Convention
------------------
-
-Use clang-format to format all source files (\*.h, \*.cc, \*.proto) to
-a consistent format. You should run clang-format on all source files
-before submitting a pull request::
-
-  $ apt-get install clang-format clang-format-6.0
-  $ clang-format-6.0 --style=file -i *.proto *.cc *.h
diff --git a/docs/customization_guide/build.md b/docs/customization_guide/build.md
new file mode 100644
index 0000000000..2f8b8f69d4
--- /dev/null
+++ b/docs/customization_guide/build.md
@@ -0,0 +1,521 @@
+<!--
+# Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Building Triton
+
+This section describes how to build the Triton server from source. For
+information on building the Triton client libraries and examples see
+[Client Libraries and
+Examples](https://github.com/triton-inference-server/client). For
+information on building the Triton SDK container see [Build SDK
+Image](test.md#build-sdk-image). For information on testing your
+Triton build see [Testing Triton](test.md).
+
+You can create a customized Triton Docker image that contains a subset
+of the released backends without building from source. For example,
+you may want a Triton image that contains only the TensorRT and Python
+backends. For this type of customization you don't need to build
+Triton from source and instead can use [the *compose*
+utility](compose.md).
+
+The Triton source is distributed across multiple GitHub repositories
+that together can be built and installed to create a complete Triton
+installation. Triton server is built using CMake and (optionally)
+Docker. To simplify the build process, Triton provides a
+[build.py](https://github.com/triton-inference-server/server/blob/main/build.py) script.
+The build.py script will generate the CMake and Docker build steps required to
+build Triton, and will optionally invoke those steps or leave the invocation to
+you, as described below.
+
+The build.py script currently supports building Triton for the
+following platforms. See [Building on Unsupported
+Platforms](#building-on-unsupported-platforms) if you are attempting
+to build Triton on a platform that is not listed here.
+
+* [Ubuntu 22.04, x86-64](#building-for-ubuntu-2204)
+
+* [Jetpack 4.x, NVIDIA Jetson (Xavier, Nano, TX2)](#building-for-jetpack-4x)
+
+* [Windows 10, x86-64](#building-for-windows-10)
+
+If you are developing or debugging Triton, see [Development and
+Incremental Builds](#development-and-incremental-builds) for information
+on how to perform incremental build.
+
+## Building for Ubuntu 22.04
+
+For Ubuntu-22.04, build.py supports both a Docker build and a
+non-Docker build.
+
+* [Build using Docker](#building-with-docker) and the TensorFlow and PyTorch
+  Docker images from [NVIDIA GPU Cloud (NGC)](https://ngc.nvidia.com).
+
+* [Build without Docker](#building-without-docker).
+
+### Building With Docker
+
+The easiest way to build Triton is to use Docker. The result of the
+build will be a Docker image called *tritonserver* that will contain
+the tritonserver executable in /opt/tritonserver/bin and the required
+shared libraries in /opt/tritonserver/lib. The backends and
+repository-agents built for Triton will be in
+/opt/tritonserver/backends and /opt/tritonserver/repoagents,
+respectively.
+
+The first step for the build is to clone the
+[triton-inference-server/server](https://github.com/triton-inference-server/server)
+repo branch for the release you are interested in building (or the
+*main* branch to build from the development branch). Then run build.py
+as described below. The build.py script performs these steps when
+building with Docker.
+
+* In the *build* subdirectory of the server repo, generate the
+  docker_build script, the cmake_build script and the Dockerfiles
+  needed to build Triton. If you use the --dryrun flag, build.py will
+  stop here so that you can examine these files.
+
+* Run the docker_build script to perform the Docker-based build. The
+  docker_build script performs the following steps.
+
+  * Build the *tritonserver_buildbase* Docker image that collects all
+    the build dependencies needed to build Triton. The
+    *tritonserver_buildbase* image is based on a minimal/base
+    image. When building with GPU support (--enable-gpu), the *min*
+    image is the
+    [\<xx.yy\>-py3-min](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tritonserver)
+    image pulled from [NGC](https://ngc.nvidia.com) that contains the
+    CUDA, cuDNN, TensorRT and other dependencies that are required to
+    build Triton. When building without GPU support, the *min* image
+    is the standard ubuntu:22.04 image.
+
+  * Run the cmake_build script within the *tritonserver_buildbase*
+    image to actually build Triton. The cmake_build script performs
+    the following steps.
+
+    * Invoke CMake in the server repo to build Triton's core shared
+      library and *tritonserver* executable.
+
+    * Clone each requested backend and build it using CMake. For
+      example, the ONNX Runtime backend is built using
+      [triton-inference-server/onnxruntime_backend/CMakeLists.txt](https://github.com/triton-inference-server/onnxruntime_backend/blob/main/CMakeLists.txt). Some
+      of the backends may use Docker as part of their build (for
+      example [ONNX
+      Runtime](https://github.com/triton-inference-server/onnxruntime_backend)
+      and
+      [OpenVINO](https://github.com/triton-inference-server/openvino_backend)). If
+      you don't want to use Docker in those cases you must consult the
+      build process for those backends.
+
+    * Clone each repository agent and build it using the CMake file
+      from the corresponding repo. For example, the
+      [Checksum](https://github.com/triton-inference-server/checksum_repository_agent)
+      repository agent is built using
+      [triton-inference-server/checksum_repository_agent/CMakeLists.txt](https://github.com/triton-inference-server/checksum_repository_agent/blob/main/CMakeLists.txt).
+
+  * Copy the built artifacts out of the container and into the build
+    subdirectory on the host system.
+
+  * Create the final *tritonserver* Docker image that contains the
+    libraries, executables and other artifacts from the build.
+
+  * Create a *tritonserver_cibase* Docker image that contains the QA
+    artifacts needed for testing, as described in [Testing
+    Triton](test.md).
+
+By default, build.py does not enable any of Triton's optional features
+but you can enable all features, backends, and repository agents with
+the --enable-all flag. The -v flag turns on verbose output.
+
+```bash
+$ ./build.py -v --enable-all
+```
+
+If you want to enable only certain Triton features, backends and
+repository agents, do not specify --enable-all. Instead you must
+specify the individual flags as documented by --help.
+
+#### Building With Specific GitHub Branches
+
+As described above, the build is performed in the server repo, but
+source from several other repos is fetched during the build
+process. Typically you do not need to specify anything about these
+other repos, but if you want to control which branch is used in these
+other repos you can as shown in the following example.
+
+```bash
+$ ./build.py ... --repo-tag=common:<container tag> --repo-tag=core:<container tag> --repo-tag=backend:<container tag> --repo-tag=thirdparty:<container tag> ... --backend=tensorrt:<container tag> ... --repoagent=checksum:<container tag> ...
+```
+
+If you are building on a release branch then `<container tag>` will
+default to the branch name. For example, if you are building on the
+r24.03 branch, `<container tag>` will default to r24.03. If you are
+building on any other branch (including the *main* branch) then
+`<container tag>` will default to "main". Therefore, you typically do
+not need to provide `<container tag>` at all (nor the preceding
+colon). You can use a different `<container tag>` for a component to
+instead use the corresponding branch/tag in the build. For example, if
+you have a branch called "mybranch" in the
+[onnxruntime_backend](https://github.com/triton-inference-server/onnxruntime_backend)
+repo that you want to use in the build, you would specify
+--backend=onnxruntime:mybranch.
+
+#### CPU-Only Build
+
+If you want to build without GPU support you must specify individual
+feature flags and not include the `--enable-gpu` and
+`--enable-gpu-metrics` flags. Only the following backends are
+available for a non-GPU / CPU-only build: `identity`, `repeat`, `ensemble`,
+`square`, `tensorflow2`, `pytorch`, `onnxruntime`, `openvino`,
+`python` and `fil`.
+
+To include the TensorFlow2 backend in your CPU-only build, you must
+provide this additional flag to build.py:
+`--extra-backend-cmake-arg=tensorflow2:TRITON_TENSORFLOW_INSTALL_EXTRA_DEPS=ON`.
+
+CPU-only builds of the TensorFlow and PyTorch backends require some CUDA stubs
+and runtime dependencies that are not present in the CPU-only base container.
+These are retrieved from a GPU base container, which can be changed with the
+`--image=gpu-base,nvcr.io/nvidia/tritonserver:<xx.yy>-py3-min` flag.
+
+### Building Without Docker
+
+To build Triton without using Docker you must install the build
+dependencies that are handled automatically when building with Docker.
+
+The first step for the build is to clone the
+[triton-inference-server/server](https://github.com/triton-inference-server/server)
+repo branch for the release you are interested in building (or the
+*main* branch to build from the development branch).
+
+To determine what dependencies are required by the build, run build.py
+with the --dryrun flag, and then looking in the build subdirectory at
+Dockerfile.buildbase.
+
+```bash
+$ ./build.py -v --enable-all
+```
+
+From Dockerfile.buildbase you can see what dependencies you need to
+install on your host system. Note that when building with --enable-gpu
+(or --enable-all), Dockerfile.buildbase depends on the
+[\<xx.yy\>-py3-min](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tritonserver)
+image pulled from [NGC](https://ngc.nvidia.com). Unfortunately, a
+Dockerfile is not currently available for the
+[\<xx.yy\>-py3-min](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tritonserver)
+image. Instead, you must manually install [CUDA and
+cuDNN](#cuda-cublas-cudnn) and [TensorRT](#tensorrt) dependencies as
+described below.
+
+Once you have installed these dependencies on your build system you
+can then use build.py with the --no-container-build flag to build
+Triton.
+
+```bash
+$ ./build.py -v --no-container-build --build-dir=`pwd`/build --enable-all
+```
+
+See [Building with Docker](#building-with-docker) for more details on how the
+cmake_build script is used to perform the build.
+
+#### CUDA, cuBLAS, cuDNN
+
+For Triton to support NVIDIA GPUs you must install CUDA, cuBLAS and
+cuDNN. These libraries must be installed on the system include and
+library paths so that they are available for the build. The version of
+the libraries used for a given release can be found in the [Framework
+Containers Support
+Matrix](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html).
+
+For a given version of Triton you can attempt to build with
+non-supported versions of the libraries but you may have build or
+execution issues since non-supported versions are not tested.
+
+#### TensorRT
+
+The TensorRT headers and libraries must be installed on system include
+and library paths so that they are available for the build. The
+version of TensorRT used in a given release can be found in the
+[Framework Containers Support
+Matrix](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html).
+
+For a given version of Triton you can attempt to build with
+non-supported versions of TensorRT but you may have build or execution
+issues since non-supported versions are not tested.
+
+## Building for JetPack 4.x
+
+*Under Construction*
+
+## Building for Windows 10
+
+For Windows 10, build.py supports both a Docker build and a non-Docker
+build in a similar way as described for [Ubuntu](#building-for-ubuntu-2204). The primary
+difference is that the minimal/base image used as the base of
+Dockerfile.buildbase image can be built from the provided
+[Dockerfile.win10.min](https://github.com/triton-inference-server/server/blob/main/Dockerfile.win10.min)
+file as described in [Windows 10 "Min" Image](#windows-10-min-image). When running build.py
+use the --image flag to specify the tag that you assigned to this
+image. For example, --image=base,win10-py3-min.
+
+### Windows and Docker
+
+Depending on your version of Windows 10 and your version of Docker you
+may need to perform these additional steps before any of the following
+step.
+
+* Set your Docker to work with "Windows containers". Right click on
+  the whale icon in the lower-right status area and select "Switch to
+  Windows containers".
+
+### Windows 10 "Min" Image
+
+The "min" container describes the base dependencies needed to perform
+the Windows build. The Windows min container is
+[Dockerfile.win10.min](https://github.com/triton-inference-server/server/blob/main/Dockerfile.win10.min).
+
+Before building the min container you must download the appropriate
+cuDNN and TensorRT versions and place them in the same directory as
+Dockerfile.win10.min.
+
+* For cuDNN the CUDNN_VERSION and CUDNN_ZIP arguments defined in
+  Dockerfile.win10.min indicate the version of cuDNN that your should
+  download from https://developer.nvidia.com/rdp/cudnn-download.
+
+* For TensorRT the TENSORRT_VERSION and TENSORRT_ZIP arguments defined
+  in Dockerfile.win10.min indicate the version of TensorRT that your
+  should download from
+  https://developer.nvidia.com/nvidia-tensorrt-download.
+
+After downloading the zip files for cuDNN and TensorRT, you build the
+min container using the following command.
+
+```bash
+$ docker build -t win10-py3-min -f Dockerfile.win10.min .
+```
+
+### Build Triton Server
+
+Triton is built using the build.py script. The build system must have
+Docker, Python3 (plus pip installed *docker* module) and git installed
+so that it can execute build.py and perform a docker build. By
+default, build.py does not enable any of Triton's optional features
+and so you must enable them explicitly. The following build.py
+invocation builds all features and backends available on windows.
+
+```bash
+python build.py --cmake-dir=<path/to/repo>/build --build-dir=/tmp/citritonbuild --no-container-pull --image=base,win10-py3-min --enable-logging --enable-stats --enable-tracing --enable-gpu --endpoint=grpc --endpoint=http --repo-tag=common:<container tag> --repo-tag=core:<container tag> --repo-tag=backend:<container tag> --repo-tag=thirdparty:<container tag> --backend=ensemble --backend=tensorrt:<container tag> --backend=onnxruntime:<container tag> --backend=openvino:<container tag>
+```
+
+If you are building on *main* branch then '<container tag>' will
+default to "main". If you are building on a release branch then
+'<container tag>' will default to the branch name. For example, if you
+are building on the r24.03 branch, '<container tag>' will default to
+r24.03. Therefore, you typically do not need to provide '<container
+tag>' at all (nor the preceding colon). You can use a different
+'<container tag>' for a component to instead use the corresponding
+branch/tag in the build. For example, if you have a branch called
+"mybranch" in the
+[onnxruntime_backend](https://github.com/triton-inference-server/onnxruntime_backend)
+repo that you want to use in the build, you would specify
+--backend=onnxruntime:mybranch.
+
+### Extract Build Artifacts
+
+When build.py completes, a Docker image called *tritonserver* will
+contain the built Triton Server executable, libraries and other
+artifacts. Windows containers do not support GPU access so you likely
+want to extract the necessary files from the tritonserver image and
+run them directly on your host system. All the Triton artifacts can be
+found in /opt/tritonserver directory of the tritonserver image.  Your
+host system will need to install the CUDA, cuDNN, TensorRT and other
+dependencies that were used for the build.
+
+## Building on Unsupported Platforms
+
+Building for an unsupported OS and/or hardware platform is
+possible. All of the build scripting, Dockerfiles and CMake
+invocations are included in the public repos or are generated by
+build.py as described in [Building with Docker](#building-with-docker). From
+these files you can find the required dependencies and CMake
+invocations. However, due to differences in compilers, libraries,
+package management, etc. you may have to make changes in the build
+scripts, Dockerfiles, CMake files and the source code.
+
+To see the generated build scripts and Dockerfiles referred to below,
+use:
+
+```bash
+$ ./build.py -v --enable-all --dryrun
+```
+
+You should familiarize yourself with the build process for supported
+platforms by reading the above documentation and then follow the
+process for the supported platform that most closely matches the
+platform you are interested in (for example, if you are trying to
+build for RHEL/x86-64 then follow the [Building for Ubuntu
+22.04](#building-for-ubuntu-2204) process. You will likely need to
+make changes in the following areas and then manually run docker_build
+and cmake_build or the equivalent commands to perform a build.
+
+* The generated Dockerfiles install dependencies for the build using
+  platform-specific packaging tools, for example, apt-get for
+  Ubuntu. You will need to change build.py to use the packaging tool
+  appropriate for your platform.
+
+* The package and libraries names for your platform may differ from
+  those used by the generated Dockerfiles. You will need to find the
+  corresponding packages on libraries on your platform.
+
+* Your platform may use a different compiler or compiler version than
+  the support platforms. As a result you may encounter build errors
+  that need to be fixed by editing the source code or changing the
+  compilation flags.
+
+* Triton depends on a large number of open-source packages that it
+  builds from source. If one of these packages does not support your
+  platform then you may need to disable the Triton feature that
+  depends on that package. For example, Triton supports the S3
+  filesystem by building the aws-sdk-cpp package. If aws-sdk-cpp
+  doesn't build for your platform then you can remove the need for
+  that package by not specifying --filesystem=s3 when you run
+  build.py. In general, you should start by running build.py with the
+  minimal required feature set.
+
+* The
+  [TensorFlow](https://github.com/triton-inference-server/tensorflow_backend)
+  backend extracts pre-built shared libraries from the TensorFlow NGC
+  container as part of the build. This container is only available for
+  Ubuntu-22.04 / x86-64, so if you require the TensorFlow backend for
+  your platform you will need download the TensorFlow container and
+  modify its build to produce shared libraries for your platform. You
+  must use the TensorFlow source and build scripts from within the NGC
+  container because they contain Triton-specific patches that are
+  required for the Triton TensorFlow backend.
+
+* By default, the
+  [PyTorch](https://github.com/triton-inference-server/pytorch_backend)
+  backend build extracts pre-built shared libraries from The PyTorch
+  NGC container. But the build can also use PyTorch shared libraries
+  that you build separately for your platform. See the pytorch_backend
+  build process for details.
+
+## Development and Incremental Builds
+
+### Development Builds Without Docker
+
+If you are [building without Docker](#building-without-docker) use the
+CMake invocation steps in cmake_build to invoke CMake to set-up a
+build environment where you can invoke make/msbuild.exe to incremental
+build the Triton core, a backend, or a repository agent.
+
+### Development Builds With Docker
+
+If you are [building with Docker](#building-with-docker), the generated
+*tritonserver_buildbase* image contains all the dependencies needed to
+perform a full or incremental build. Within *tritonserver_buildbase*,
+/workspace/build/cmake_build contains the CMake invocations that are
+used to build the Triton core, the backends, and the repository
+agents.
+
+To perform an incremental build within the *tritonserver_buildbase*
+container, map your source into the container and then run the
+appropriate CMake and `make` (or `msbuild.exe`) steps from cmake_build
+within the container.
+
+#### Development Build of Triton Core
+
+Assuming you have a clone of the [server
+repo](https://github.com/triton-inference-server/server) on your host
+system where you are making changes and you want to perform
+incremental builds to test those changes. Your source code is in
+/home/me/server. Run the *tritonserver_buildbase* container and map
+your server source directory into the container at /server.
+
+```
+$ docker run -it --rm -v/home/me/server:/server tritonserver_buildbase bash
+```
+
+Look at /workspace/build/cmake_build within the container for the
+section of commands that build "Triton core library". You can follow
+those command exactly, or you can modify them to change the build
+directory or the CMake options. You **must** change the CMake command
+to use /server instead of /workspace as the location for the
+CMakeLists.txt file and source:
+
+```
+$ cmake <options> /server
+```
+
+Then you can change directory into the build directory and run `make`
+(or `msbuild.exe`) as shown in cmake_build. As you make changes to the
+source on your host system, you can perform incremental builds by
+re-running `make` (or `msbuild.exe`).
+
+#### Development Build of Backend or Repository Agent
+
+Performing a full or incremental build of a backend or repository
+agent is similar to building the Triton core. As an example we will
+use the TensorRT backend. Assuming you have a clone of the [TensorRT
+backend
+repo](https://github.com/triton-inference-server/tensorrt_backend) on
+your host system where you are making changes and you want to perform
+incremental builds to test those changes. Your source code is in
+/home/me/tritonserver_backend. Run the *tritonserver_buildbase*
+container and map your TensorRT backend source directory into the
+container at /tensorrt_backend. Note that some backends will use
+Docker as part of their build, and so the host's Docker registry must
+be made available within the *tritonserver_buildbase* by mounting
+docker.sock (on Windows use
+-v\\.\pipe\docker_engine:\\.\pipe\docker_engine).
+
+```
+$ docker run -it --rm -v/var/run/docker.sock:/var/run/docker.sock -v/home/me/tensorrt_backend:/tensorrt_backend tritonserver_buildbase bash
+```
+
+Look at /workspace/build/cmake_build within the container for the
+section of commands that build "TensorRT backend". You can follow
+those command exactly, or you can modify them to change the build
+directory or the CMake options. You **must** change the CMake command
+to use /tensorrt_backend instead of /workspace as the location for the
+CMakeLists.txt file and source:
+
+```
+$ cmake <options> /tensorrt_backend
+```
+
+Then you can change directory into the build directory and run `make`
+(or `msbuild.exe`) as shown in cmake_build. As you make changes to the
+source on your host system, you can perform incremental builds by
+re-running `make` (or `msbuild.exe`).
+
+### Building with Debug Symbols
+
+To build with Debug symbols, use the --build-type=Debug argument while
+launching build.py. If building directly with CMake use
+-DCMAKE_BUILD_TYPE=Debug. You can then launch the built server with
+gdb and see the debug symbols/information in the gdb trace.
diff --git a/docs/customization_guide/compose.md b/docs/customization_guide/compose.md
new file mode 100644
index 0000000000..3baa9e9df6
--- /dev/null
+++ b/docs/customization_guide/compose.md
@@ -0,0 +1,147 @@
+<!--
+# Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Customize Triton Container
+
+Two Docker images are available from [NVIDIA GPU Cloud
+(NGC)](https://ngc.nvidia.com) that make it possible to easily
+construct customized versions of Triton. By customizing Triton you can
+significantly reduce the size of the Triton image by removing
+functionality that you don't require.
+
+Currently the customization is limited as described below but future
+releases will increase the amount of customization that is available.
+It is also possible to [build Triton](build.md#building-triton)
+from source to get more exact customization.
+
+## Use the compose.py script
+
+The `compose.py` script can be found in the
+[server repository](https://github.com/triton-inference-server/server).
+Simply clone the repository and run `compose.py` to create a custom container.
+Note: Created container version will depend on the branch that was cloned.
+For example branch
+ [r24.03](https://github.com/triton-inference-server/server/tree/r24.03)
+should be used to create a image based on the NGC 24.03 Triton release.
+
+`compose.py` provides `--backend`, `--repoagent` options that allow you to
+specify which backends and repository agents to include in the custom image.
+For example, the following creates a new docker image that
+contains only the Pytorch and Tensorflow backends and the checksum
+repository agent.
+
+Example:
+```
+python3 compose.py --backend pytorch --backend tensorflow --repoagent checksum
+```
+will provide a container `tritonserver` locally. You can access the container
+with
+```
+$ docker run -it tritonserver:latest
+```
+
+Note: If `compose.py` is run on release versions `r21.08` and earlier,
+the resulting container will have DCGM version 2.2.3 installed.
+This may result in different GPU statistic reporting behavior.
+
+### Compose a specific version of Triton
+
+`compose.py` requires two containers: a `min` container which is the
+base the compose container is built from and a `full` container from which the
+script will extract components. The version of the `min` and `full` container
+is determined by the branch of Triton `compose.py` is on.
+For example, running
+```
+python3 compose.py --backend pytorch --repoagent checksum
+```
+on branch [r24.03](https://github.com/triton-inference-server/server/tree/r24.03) pulls:
+- `min` container `nvcr.io/nvidia/tritonserver:24.03-py3-min`
+- `full` container `nvcr.io/nvidia/tritonserver:24.03-py3`
+
+Alternatively, users can specify the version of Triton container to pull from
+any branch by either:
+1. Adding flag `--container-version <container version>` to branch
+```
+python3 compose.py --backend pytorch --repoagent checksum --container-version 24.03
+```
+2. Specifying `--image min,<min container image name> --image full,<full container image name>`.
+   The user is responsible for specifying compatible `min` and `full` containers.
+```
+python3 compose.py --backend pytorch --repoagent checksum --image min,nvcr.io/nvidia/tritonserver:24.03-py3-min --image full,nvcr.io/nvidia/tritonserver:24.03-py3
+```
+Method 1 and 2 will result in the same composed container. Furthermore,
+`--image` flag overrides the `--container-version` flag when both are specified.
+
+Note:
+1. All contents in `/opt/tritonserver` repository of the `min` image will be
+ removed to ensure dependencies of the composed image are added properly.
+2. vLLM and TensorRT-LLM backends are currently not supported backends for
+`compose.py`. If you want to build additional backends on top of these backends,
+it would be better to [build it yourself](#build-it-yourself) by using
+`nvcr.io/nvidia/tritonserver:24.03-vllm-python-py3` or
+`nvcr.io/nvidia/tritonserver:24.03-trtllm-python-py3` as a `min` container.
+
+
+### CPU-only container composition
+
+CPU-only containers are not yet available for customization. Please see
+ [build documentation](build.md) for instructions to build a full CPU-only
+ container. When including TensorFlow or PyTorch backends in the composed
+ container, an additional `gpu-min` container is needed
+since this container provided the CUDA stubs and runtime dependencies which are
+not provided in the CPU only min container.
+
+## Build it yourself
+
+If you would like to do what `compose.py` is doing under the hood yourself, you
+ can run `compose.py` with the `--dry-run` option and then modify the
+ `Dockerfile.compose` file to satisfy your needs.
+
+
+### Triton with Unsupported and Custom Backends
+
+You can [create and build your own Triton
+backend](https://github.com/triton-inference-server/backend).  The
+result of that build should be a directory containing your backend
+shared library and any additional files required by the
+backend. Assuming your backend is called "mybackend" and that the
+directory is "./mybackend", adding the following to the Dockerfile `compose.py`
+created will create a Triton image that contains all the supported Triton
+backends plus your custom backend.
+
+```
+COPY ./mybackend /opt/tritonserver/backends/mybackend
+```
+
+You also need to install any additional dependencies required by your
+backend as part of the Dockerfile. Then use Docker to create the
+image.
+
+```
+$ docker build -t tritonserver_custom -f Dockerfile.compose .
+```
diff --git a/docs/customization_guide/deploy.md b/docs/customization_guide/deploy.md
new file mode 100644
index 0000000000..112a2cebcf
--- /dev/null
+++ b/docs/customization_guide/deploy.md
@@ -0,0 +1,279 @@
+<!--
+# Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Secure Deployment Considerations
+
+The Triton Inference Server project is designed for flexibility and
+allows developers to create and deploy inferencing solutions in a
+variety of ways. Developers can deploy Triton as an http server, a
+grpc server, a server supporting both, or embed a Triton server into
+their own application. Developers can deploy Triton locally or in the
+cloud, within a Kubernetes cluster behind an API gateway or as a
+standalone process.  This guide is intended to provide some key points
+and best practices that users deploying Triton based solutions should
+consider.
+
+| [Deploying Behind a Secure Gateway or Proxy](#deploying-behind-a-secure-proxy-or-gateway) | [Running with Least Privilege](#running-with-least-privilege) |
+
+> [!IMPORTANT]
+> Ultimately the security of a solution based on Triton
+> is the responsibility of the developer building and deploying that
+> solution. When deploying in production settings please have security
+> experts review any potential risks and threats.
+
+> [!WARNING]
+> Dynamic updates to model repositories are disabled by
+> default. Enabling dynamic updates to model repositories either
+> through model loading APIs or through directory polling can lead to
+> arbitrary code execution. Model repository access control is
+> critical in production deployments. If dynamic updates are required,
+> ensure only trusted entities have access to model loading APIs and
+> model repository directories.
+
+## Deploying Behind a Secure Proxy or Gateway
+
+The Triton Inference Server is designed primarily as a microservice to
+be deployed as part of a larger solution within an application
+framework or service mesh.
+
+In such deployments it is typical to utilize dedicated gateway or
+proxy servers to handle authorization, access control, resource
+management, encryption, load balancing, redundancy and many other
+security and availability features.
+
+The full design of such systems is outside the scope of this
+deployment guide but in such scenarios dedicated ingress controllers
+handle access from outside the trusted network while Triton Inference
+Server handles only trusted, validated requests.
+
+In such scenarios Triton Inference Server is not exposed directly to
+an untrusted network.
+
+### References on Secure Deployments
+
+In the following references, Triton Inference Server would be deployed
+as an "Application" or "Service" within the trusted internal network.
+
+* [https://www.nginx.com/blog/architecting-zero-trust-security-for-kubernetes-apps-with-nginx/]
+* [https://istio.io/latest/docs/concepts/security/]
+* [https://konghq.com/blog/enterprise/envoy-service-mesh]
+* [https://www.solo.io/topics/envoy-proxy/]
+
+## Running with Least Privilege
+
+  The security principle of least privilege advocates that a process be
+  granted the minimum permissions required to do its job.
+
+  For an inference solution based on Triton Inference Server there are a
+  number of ways to reduce security risks by limiting the permissions
+  and capabilities of the server to the minimum required for correct
+  operation.
+
+### 1. Follow Best Practices for Securing Kubernetes Deployments
+
+ When deploying Triton within a Kubernetes pod ensure that it is
+ running with a service account with the fewest possible
+ permissions. Ensure that you have configured [role based access
+ control](https://kubernetes.io/docs/reference/access-authn-authz/rbac/)
+ to limit access to resources and capabilities as required by your
+ application.
+
+### 2. Follow Best Practices for Launching Standalone Docker Containers
+
+  When Triton is deployed as a containerized service, standard docker
+  security practices apply. This includes limiting the resources that a
+  container has access to as well as limiting network access to the
+  container. https://docs.docker.com/engine/security/
+
+### 3. Run as a Non-Root User
+
+   Triton's pre-built containers contain a non-root user that can be used
+   to launch the tritonserver application with limited permissions. This
+   user, `triton-server` is created with `user id 1000`. When launching
+   the container using docker the user can be set with the `--user`
+   command line option.
+
+##### Example Launch Command
+
+   ```
+   docker run --rm --user triton-server -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:YY.MM-py3 tritonserver --model-repository=/models
+   ```
+
+### 4. Restrict or Disable Access to Protocols and APIs
+
+The pre-built Triton Inference Serrver application enables a full set
+of features including health checks, server metadata, inference apis,
+shared memory apis, model and model repository configuration,
+statistics, tracing and logging. Care should be taken to only expose
+those capabilities that are required for your solution.
+
+#### Disabling Features at Compile Time
+
+When building a custom inference server application features can be
+selectively enabled or disabled using the `build.py` script. As an
+example a developer can use the flags `--endpoint http` and
+`--endpoint grpc` to compile support for `http`, `grpc` or
+both. Support for individual backends can be enabled as well. For more
+details please see [documentation](build.md) on building a custom
+inference server application.
+
+#### Disabling / Restricting Features at Run Time
+
+The `tritonserver` application provides a number of command line
+options to enable and disable features when launched. For a full list
+of options please see `tritonserver --help`. The following subset are
+described here with basic recommendations.
+
+##### `--exit-on-error <boolean>, default True`
+
+Exits the inference server if any error occurs during
+initialization. Recommended to set to `True` to catch any
+unanticipated errors.
+
+##### `--disable-auto-complete-config, default enabled`
+
+Disables backends from autocompleting model configuration. If not
+required for your solution recommended to disable to ensure model
+configurations are defined statically.
+
+##### `--strict-readiness <boolean>, default True`
+
+If set to true `/v2/health/ready` will only report ready when all
+selected models are loaded. Recommended to set to `True` to provide a
+signal to other services and orchestration frameworks when full
+initialization is complete and server is healthy.
+
+##### `--model-control-mode <string>, default "none"`
+
+Specifies the mode for model management.
+
+> [!WARNING]
+> Allowing dynamic updates to the model repository can lead
+> to arbitrary code execution. Model repository access control is
+> critical in production deployments. Unless required for operation, it's recommended
+> to disable dynamic updates. If required, please ensure only trusted entities
+> can add or remove models from a model repository.
+
+Options:
+
+ * `none`- Models are loaded at start up and can not be modified.
+ * `poll`- Server process will poll the model repository for changes.
+ * `explicit` - Models can be loaded and unloaded via the model control APIs.
+
+Recommended to set to `none` unless dynamic updates are required. If
+dynamic updates are required care must be taken to control access to
+the model repository files and load and unload APIs.
+
+##### `--allow-http <boolean>, default True`
+
+Enable HTTP request handling. Recommended to set to `False` if not required.
+
+##### `--allow-grpc <boolean>, default True`
+
+Enable gRPC request handling. Recommended to set to `False` if not required.
+
+##### `--grpc-use-ssl <boolean> default False`
+
+Use SSL authentication for gRPC requests. Recommended to set to `True` if service is not protected by a gateway or proxy.
+
+##### `--grpc-use-ssl-mutual <boolean> default False`
+
+Use mutual SSL authentication for gRPC requests. Recommended to set to `True` if service is not protected by a gateway or proxy.
+
+##### `--grpc-restricted-protocol <<string>:<string>=<string>>`
+
+Restrict access to specific gRPC protocol categories to users with
+specific key, value pair shared secret. See
+[limit-endpoint-access](inference_protocols.md#limit-endpoint-access-beta)
+for more information.
+
+> [!Note]
+> Restricting access can be used to limit exposure to model
+> control APIs to trusted users.
+
+##### `--http-restricted-api <<string>:<string>=<string>>`
+
+Restrict access to specific HTTP API categories to users with
+specific key, value pair shared secret. See
+[limit-endpoint-access](inference_protocols.md#limit-endpoint-access-beta)
+for more information.
+
+> [!Note]
+> Restricting access can be used to limit exposure to model
+> control APIs to trusted users.
+
+##### `--allow-sagemaker <boolean> default False`
+
+Enable Sagemaker request handling. Recommended to set to `False` unless required.
+
+##### `--allow-vertex-ai <boolean> default depends on environment variable`
+
+Enable Vertex AI request handling. Default is `True` if
+`AIP_MODE=PREDICTION`, `False` otherwise. Recommended to set to
+`False` unless required.
+
+##### `--allow-metrics <boolean> default True`
+
+Allow server to publish prometheus style metrics. Recommended to set
+to `False` if not required to avoid capturing or exposing any sensitive information.
+
+#### `--trace-config level=<string> default "off"`
+
+Tracing mode. Trace mode supports `triton` and `opentelemetry`. Unless required `--trace-config level=off` should be set to avoid capturing or exposing any sensitive information.
+
+
+##### `backend-directory <string> default /opt/tritonserver/backends`
+
+Directory where backend shared libraries are found.
+
+> [!Warning]
+> Access to add or remove files from the backend directory
+> must be access controlled. Adding untrusted files
+> can lead to arbitrarty code execution.
+
+##### `repoagent-directory <string> default /opt/tritonserver/repoagents`
+Directory where repository agent shared libraries are found.
+
+> [!Warning]
+> Access to add or remove files from the repoagent directory
+> must be access controlled. Adding untrusted files
+> can lead to arbitrarty code execution.
+
+##### `cache-directory <string> default /opt/tritonserver/caches`
+
+Directory where cache shared libraries are found.
+
+> [!Warning]
+> Access to add or remove files from the cache directory
+> must be access controlled. Adding untrusted files
+> can lead to arbitrarty code execution.
+
+
+
+
+
diff --git a/docs/customization_guide/inference_protocols.md b/docs/customization_guide/inference_protocols.md
new file mode 100644
index 0000000000..592f26e7d1
--- /dev/null
+++ b/docs/customization_guide/inference_protocols.md
@@ -0,0 +1,506 @@
+<!--
+# Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Inference Protocols and APIs
+
+Clients can communicate with Triton using either an [HTTP/REST
+protocol](#httprest-and-grpc-protocols), a [GRPC
+protocol](#httprest-and-grpc-protocols), or by an [in-process C
+API](#in-process-triton-server-api) or its
+[C++ wrapper](https://github.com/triton-inference-server/developer_tools/tree/main/server).
+
+## HTTP/REST and GRPC Protocols
+
+Triton exposes both HTTP/REST and GRPC endpoints based on [standard
+inference
+protocols](https://github.com/kserve/kserve/tree/master/docs/predict-api/v2)
+that have been proposed by the [KServe
+project](https://github.com/kserve). To fully enable all capabilities
+Triton also implements [HTTP/REST and GRPC
+extensions](https://github.com/triton-inference-server/server/tree/main/docs/protocol)
+to the KServe inference protocol. GRPC protocol also provides a
+bi-directional streaming version of the inference RPC to allow a
+sequence of inference requests/responses to be sent over a
+GRPC stream. We typically recommend using the unary version for
+inference requests. The streaming version should be used only if the
+situation demands it. Some of such use cases can be:
+
+* Assume a system with multiple Triton server instances running
+  behind a Load Balancer. If a sequence of inference requests is
+  needed to hit the same Triton server instance, a GRPC stream
+  will hold a single connection throughout the lifetime and hence
+  ensure the requests are delivered to the same Triton instance.
+* If the order of requests/responses needs to be preserved over
+  the network, a GRPC stream will ensure that the server receives
+  the requests in the same order as they were sent from the
+  client.
+
+The HTTP/REST and GRPC protocols also provide endpoints to check
+server and model health, metadata and statistics. Additional
+endpoints allow model loading and unloading, and inferencing. See
+the KServe and extension documentation for details.
+
+### HTTP Options
+Triton provides the following configuration options for server-client network transactions over HTTP protocol.
+
+#### Compression
+
+Triton allows the on-wire compression of request/response on HTTP through its clients. See [HTTP Compression](https://github.com/triton-inference-server/client/tree/main#compression) for more details.
+
+### GRPC Options
+Triton exposes various GRPC parameters for configuring the server-client network transactions. For usage of these options, refer to the output from `tritonserver --help`.
+
+#### SSL/TLS
+
+These options can be used to configure a secured channel for communication. The server-side options include:
+
+* `--grpc-use-ssl`
+* `--grpc-use-ssl-mutual`
+* `--grpc-server-cert`
+* `--grpc-server-key`
+* `--grpc-root-cert`
+
+For client-side documentation, see [Client-Side GRPC SSL/TLS](https://github.com/triton-inference-server/client/tree/main#ssltls)
+
+For more details on overview of authentication in gRPC, refer [here](https://grpc.io/docs/guides/auth/).
+
+#### Compression
+
+Triton allows the on-wire compression of request/response messages by exposing following option on server-side:
+
+* `--grpc-infer-response-compression-level`
+
+For client-side documentation, see [Client-Side GRPC Compression](https://github.com/triton-inference-server/client/tree/main#compression-1)
+
+Compression can be used to reduce the amount of bandwidth used in server-client communication. For more details, see [gRPC Compression](https://grpc.github.io/grpc/core/md_doc_compression.html).
+
+#### GRPC KeepAlive
+
+Triton exposes GRPC KeepAlive parameters with the default values for both
+client and server described [here](https://github.com/grpc/grpc/blob/master/doc/keepalive.md).
+
+These options can be used to configure the KeepAlive settings:
+
+* `--grpc-keepalive-time`
+* `--grpc-keepalive-timeout`
+* `--grpc-keepalive-permit-without-calls`
+* `--grpc-http2-max-pings-without-data`
+* `--grpc-http2-min-recv-ping-interval-without-data`
+* `--grpc-http2-max-ping-strikes`
+
+For client-side documentation, see [Client-Side GRPC KeepAlive](https://github.com/triton-inference-server/client/blob/main/README.md#grpc-keepalive).
+
+### Limit Endpoint Access (BETA)
+
+Triton users may want to restrict access to protocols or APIs that are
+provided by the GRPC or HTTP endpoints of a server. For example, users
+can provide one set of access credentials for inference APIs and
+another for model control APIs such as model loading and unloading.
+
+The following options can be specified to declare a restricted
+protocol group (GRPC) or restricted API group (HTTP):
+
+```
+--grpc-restricted-protocol=<protocol_1>,<protocol_2>,...:<restricted-key>=<restricted-value>
+--http-restricted-api=<API_1>,API_2>,...:<restricted-key>=<restricted-value>
+```
+
+The option can be specified multiple times to specifies multiple groups of
+protocols or APIs with different restriction settings.
+
+* `protocols / APIs` : A comma-separated list of protocols / APIs to be included in this
+group. Note that currently a given protocol / API is not allowed to be included in
+multiple groups. The following protocols / APIs are recognized:
+
+  * `health` : Health endpoint defined for [HTTP/REST](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/required_api.md#health) and [GRPC](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/required_api.md#health-1). For GRPC endpoint, this value also exposes [GRPC health check protocol](https://github.com/triton-inference-server/common/blob/main/protobuf/health.proto).
+  * `metadata` : Server / model metadata endpoints defined for [HTTP/REST](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/required_api.md#server-metadata) and [GRPC](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/required_api.md#server-metadata-1).
+  * `inference` : Inference endpoints defined for [HTTP/REST](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/required_api.md#inference) and [GRPC](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/required_api.md#inference-1).
+  * `shared-memory` : [Shared-memory endpoint](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_shared_memory.md).
+  * `model-config` : [Model configuration endpoint](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_model_configuration.md).
+  * `model-repository` : [Model repository endpoint](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_model_repository.md).
+  * `statistics` : [statistics endpoint](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_statistics.md).
+  * `trace` : [trace endpoint](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_trace.md).
+  * `logging` : [logging endpoint](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_logging.md).
+
+* `restricted-key` : The GRPC / HTTP request header
+to be checked when a request is received. The
+completed header for GRPC will be in the form of
+`triton-grpc-protocol-<restricted-key>`. The completed header for HTTP
+will be in the form of `<restricted-key>`.
+
+* `restricted-value` : The header value required to access the specified protocols.
+
+#### Example
+
+To start the server with a set of protocols and APIs restricted for
+`admin` usage and the rest of the protocols and APIs left unrestricted
+use the following command line arguments:
+
+
+```
+tritonserver --grpc-restricted-protocol=shared-memory,model-config,model-repository,statistics,trace:<admin-key>=<admin-value> \
+             --http-restricted-api=shared-memory,model-config,model-repository,statistics,trace:<admin-key>=<admin-value> ...
+```
+
+GRPC requests to `admin` protocols require that an additional header
+`triton-grpc-protocol-<admin-key>` is provided with value
+`<admin-value>`. HTTP requests to `admin` APIs required that an
+additional header `<admin-key>` is provided with value `<admin-value>`.
+
+
+## In-Process Triton Server API
+
+The Triton Inference Server provides a backwards-compatible C API that
+allows Triton to be linked directly into a C/C++ application. This API
+is called the "Triton Server API" or just "Server API" for short. The
+API is implemented in the Triton shared library which is built from
+source contained in the [core
+repository](https://github.com/triton-inference-server/core). On Linux
+this library is libtritonserver.so and on Windows it is
+tritonserver.dll. In the Triton Docker image the shared library is
+found in /opt/tritonserver/lib. The header file that defines and
+documents the Server API is
+[tritonserver.h](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritonserver.h).
+[Java bindings for In-Process Triton Server API](#java-bindings-for-in-process-triton-server-api)
+are built on top of `tritonserver.h` and can be used for Java applications that
+need to use Tritonserver in-process.
+
+All capabilities of Triton server are encapsulated in the shared
+library and are exposed via the Server API. The `tritonserver`
+executable implements HTTP/REST and GRPC endpoints and uses the Server
+API to communicate with core Triton logic. The primary source files
+for the endpoints are [grpc_server.cc](https://github.com/triton-inference-server/server/blob/main/src/grpc/grpc_server.cc) and
+[http_server.cc](https://github.com/triton-inference-server/server/blob/main/src/http_server.cc). In these source files you can
+see the Server API being used.
+
+You can use the Server API in your own application as well. A simple
+example using the Server API can be found in
+[simple.cc](https://github.com/triton-inference-server/server/blob/main/src/simple.cc).
+
+### API Description
+
+Triton server functionality is encapsulated in a shared library which
+is built from source contained in the [core
+repository](https://github.com/triton-inference-server/core). You can
+include the full capabilities of Triton by linking the shared library
+into your application and by using the C API defined in
+[tritonserver.h](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritonserver.h).
+
+When you link the Triton shared library into your application you are
+*not* spawning a separate Triton process, instead, you are including
+the Triton core logic directly in your application. The Triton
+HTTP/REST or GRPC protocols are not used to communicate with this
+Triton core logic, instead all communication between your application
+and the Triton core logic must take place via the [Server
+API](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritonserver.h).
+
+The top-level abstraction used by Server API is `TRITONSERVER_Server`,
+which represents the Triton core logic that is capable of implementing
+all of the features and capabilities of Triton. A
+`TRITONSERVER_Server` object is created by calling
+`TRITONSERVER_ServerNew` with a set of options that indicate how the
+object should be initialized.  Use of `TRITONSERVER_ServerNew` is
+demonstrated in [simple.cc](https://github.com/triton-inference-server/server/blob/main/src/simple.cc). Once you have created a
+`TRITONSERVER_Server` object, you can begin using the rest of the
+Server API as described below.
+
+#### Error Handling
+
+Most Server API functions return an error object indicating success or
+failure. Success is indicated by return `nullptr` (`NULL`). Failure is
+indicated by returning a `TRITONSERVER_Error` object. The error code
+and message can be retrieved from a `TRITONSERVER_Error` object with
+`TRITONSERVER_ErrorCode` and `TRITONSERVER_ErrorMessage`.
+
+The lifecycle and ownership of all Server API objects is documented in
+[tritonserver.h](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritonserver.h). For
+`TRITONSERVER_Error`, ownership of the object passes to the caller of
+the Server API function. As a result, your application is responsible
+for managing the lifecycle of the returned `TRITONSERVER_Error`
+object. You must delete the error object using
+`TRITONSERVER_ErrorDelete` when you are done using it. Macros such as
+`FAIL_IF_ERR` shown in [common.h](https://github.com/triton-inference-server/server/blob/main/src/common.h) are useful for
+managing error object lifetimes.
+
+#### Versioning and Backwards Compatibility
+
+A typical pattern, demonstrated in [simple.cc](https://github.com/triton-inference-server/server/blob/main/src/simple.cc) and
+shown below, shows how you can compare the Server API version provided
+by the shared library against the Server API version that you compiled
+your application against. The Server API is backwards compatible, so
+as long as the major version provided by the shared library matches
+the major version that you compiled against, and the minor version
+provided by the shared library is greater-than-or-equal to the minor
+version that you compiled against, then your application can use the
+Server API.
+
+```
+#include "tritonserver.h"
+// Error checking removed for clarity...
+uint32_t api_version_major, api_version_minor;
+TRITONSERVER_ApiVersion(&api_version_major, &api_version_minor);
+if ((TRITONSERVER_API_VERSION_MAJOR != api_version_major) ||
+    (TRITONSERVER_API_VERSION_MINOR > api_version_minor)) {
+  // Error, the shared library implementing the Server API is older than
+  // the version of the Server API that you compiled against.
+}
+```
+
+#### Non-Inference APIs
+
+The Server API contains functions for checking health and readiness,
+getting model information, getting model statistics and metrics,
+loading and unloading models, etc. The use of these functions is
+straightforward and some of these functions are demonstrated in
+[simple.cc](https://github.com/triton-inference-server/server/blob/main/src/simple.cc) and all are documented in
+[tritonserver.h](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritonserver.h).
+
+#### Inference APIs
+
+Performing an inference request requires the use of many Server API
+functions and objects, as demonstrated in
+[simple.cc](https://github.com/triton-inference-server/server/blob/main/src/simple.cc). The general usage requires the
+following steps.
+
+* Create a `TRITONSERVER_ResponseAllocator` using
+  `TRITONSERVER_ResponseAllocatorNew`.  You can use the same response
+  allocator for all of your inference requests, or you can create
+  multiple response allocators.  When Triton produces an output
+  tensor, it needs a memory buffer into which it can store the
+  contents of that tensor. Triton defers the allocation of these
+  output buffers by invoking callback functions in your
+  application. You communicate these callback functions to Triton with
+  the `TRITONSERVER_ResponseAllocator` object. You must implement two
+  callback functions, one for buffer allocation and one for buffer
+  free. The signatures for these functions are
+  `TRITONSERVER_ResponseAllocatorAllocFn_t` and
+  `TRITONSERVER_ResponseAllocatorReleaseFn_t` as defined in
+  [tritonserver.h](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritonserver.h). In
+  [simple.cc](https://github.com/triton-inference-server/server/blob/main/src/simple.cc), these callback functions are
+  implemented as `ResponseAlloc` and `ResponseRelease`.
+
+* Create an inference request as a `TRITONSERVER_InferenceRequest`
+  object. The inference request is where you specify what model you
+  want to use, the input tensors and their values, the output tensors
+  that you want returned, and other request parameters. You create an
+  inference request using `TRITONSERVER_InferenceRequestNew`. You
+  create each input tensor in the request using
+  `TRITONSERVER_InferenceRequestAddInput` and set the data for the
+  input tensor using `TRITONSERVER_InferenceRequestAppendInputData`
+  (or one of the `TRITONSERVER_InferenceRequestAppendInputData*`
+  variants defined in
+  [tritonserver.h](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritonserver.h)). By
+  default, Triton will return all output tensors, but you can limit
+  Triton to only return some outputs by using
+  `TRITONSERVER_InferenceRequestAddRequestedOutput`.
+
+  To correctly manage the lifecycle of the inference request, you must
+  use `TRITONSERVER_InferenceRequestSetReleaseCallback` to set a
+  callback into a function in your application. This callback will be
+  invoke by Triton to return ownership of the
+  `TRITONSERVER_InferenceRequest` object. Typically, in this callback
+  you will just delete the `TRITONSERVER_InferenceRequest` object by
+  using `TRITONSERVER_InferenceRequestDelete`. But you may also
+  implement a different lifecycle management; for example, if you are
+  reusing inference request objects you would want to make the object
+  available for reuse.
+
+  You can optionally use `TRITONSERVER_InferenceRequestSetId` to set a
+  user-defined ID on the request. This ID is not used by Triton but
+  will be returned in the response.
+
+  You can reuse an existing `TRITONSERVER_InferenceRequest` object for
+  a new inference request. A couple of examples of how this is done
+  and why it is useful are shown in [simple.cc](https://github.com/triton-inference-server/server/blob/main/src/simple.cc).
+
+* Ask Triton to execute the inference request using
+  `TRITONSERVER_ServerInferAsync`. `TRITONSERVER_ServerInferAsync` is
+  a asynchronous call that returns immediately. The inference response
+  is returned via a callback into your application. You register this
+  callback using `TRITONSERVER_InferenceRequestSetResponseCallback`
+  before you invoke `TRITONSERVER_ServerInferAsync`. In
+  [simple.cc](https://github.com/triton-inference-server/server/blob/main/src/simple.cc) this callback is
+  `InferResponseComplete`.
+
+  When you invoke `TRITONSERVER_ServerInferAsync` and it returns
+  without error, you are passing ownership of the
+  `TRITONSERVER_InferenceRequest` object to Triton, and so you must
+  not access that object in any way until Triton returns ownership to
+  you via the callback you registered with
+  `TRITONSERVER_InferenceRequestSetReleaseCallback`.
+
+* Process the inference response. The inference response is returned
+  to the callback function you registered with
+  `TRITONSERVER_InferenceRequestSetResponseCallback`. Your callback
+  receives the response as a `TRITONSERVER_InferenceResponse`
+  object. Your callback takes ownership of the
+  `TRITONSERVER_InferenceResponse` object and so must free it with
+  `TRITONSERVER_InferenceResponseDelete` when it is no longer needed.
+
+  The first step in processing a response is to use
+  `TRITONSERVER_InferenceResponseError` to check if the response is
+  returning an error or if it is returning valid results. If the
+  response is valid you can use
+  `TRITONSERVER_InferenceResponseOutputCount` to iterate over the
+  output tensors, and `TRITONSERVER_InferenceResponseOutput` to get
+  information about each output tensor.
+
+  Note that the [simple.cc](https://github.com/triton-inference-server/server/blob/main/src/simple.cc) example uses a
+  std::promise to simply wait for the response, but synchronizing
+  response handling in this way is not required. You can have multiple
+  inference requests in flight at the same time and can issue
+  inference requests from the same thread or from multiple different
+  threads.
+allows Triton to be linked directly to a C/C++ application. The API
+is documented in
+[tritonserver.h](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritonserver.h).
+
+A simple example using the C API can be found in
+[simple.cc](https://github.com/triton-inference-server/server/blob/main/src/simple.cc).  A more complicated example can be
+found in the source that implements the HTTP/REST and GRPC endpoints
+for Triton. These endpoints use the C API to communicate with the core
+of Triton. The primary source files for the endpoints are
+[grpc_server.cc](https://github.com/triton-inference-server/server/blob/main/src/grpc/grpc_server.cc) and
+[http_server.cc](https://github.com/triton-inference-server/server/blob/main/src/http_server.cc).
+
+## Java bindings for In-Process Triton Server API
+
+The Triton Inference Server uses [Java CPP](https://github.com/bytedeco/javacpp)
+to create bindings around Tritonserver to create Java API.
+
+The API is documented in
+[tritonserver.java](https://github.com/bytedeco/javacpp-presets/blob/master/tritonserver/src/gen/java/org/bytedeco/tritonserver/global/tritonserver.java).
+Alternatively, the user can refer to the web version [API docs](http://bytedeco.org/javacpp-presets/tritonserver/apidocs/)
+generated from `tritonserver.java`.
+**Note:** Currently, `tritonserver.java` contains bindings for both the `In-process C-API`
+and the bindings for `C-API Wrapper`. More information about the [developer_tools/server C-API wrapper](https://github.com/triton-inference-server/developer_tools/blob/main/server/README.md) can be found in the [developer_tools repository](https://github.com/triton-inference-server/developer_tools/).
+
+A simple example using the Java API can be found in
+[Samples folder](https://github.com/bytedeco/javacpp-presets/tree/master/tritonserver/samples)
+which includes `Simple.java` which is similar to
+[`simple.cc`](https://github.com/triton-inference-server/server/blob/main/src/simple.cc).
+Please refer to
+[sample usage documentation](https://github.com/bytedeco/javacpp-presets/tree/master/tritonserver#sample-usage)
+to learn about how to build and run `Simple.java`.
+
+In the [QA folder](https://github.com/triton-inference-server/server/blob/main/qa), folders starting with L0_java include Java API tests.
+These can be useful references for getting started, such as the
+[ResNet50 test](https://github.com/triton-inference-server/server/blob/main/qa/L0_java_resnet).
+
+### Java API setup instructions
+
+To use the Tritonserver Java API, you will need to have the Tritonserver library
+and dependencies installed in your environment. There are two ways to do this:
+
+1. Use a Tritonserver docker container with
+   1. `.jar` Java bindings to C API (recommended)
+   2. maven and build bindings yourself
+2. Build Triton from your environment without Docker (not recommended)
+
+#### Run Tritonserver container and install dependencies
+
+To set up your environment with Triton Java API, please follow the following steps:
+1. First run Docker container:
+```
+ $ docker run -it --gpus=all -v ${pwd}:/workspace nvcr.io/nvidia/tritonserver:<your container version>-py3 bash
+```
+2. Install `jdk`:
+```bash
+ $ apt update && apt install -y openjdk-11-jdk
+```
+3. Install `maven` (only if you want to build the bindings yourself):
+```bash
+$ cd /opt/tritonserver
+ $ wget https://archive.apache.org/dist/maven/maven-3/3.8.4/binaries/apache-maven-3.8.4-bin.tar.gz
+ $ tar zxvf apache-maven-3.8.4-bin.tar.gz
+ $ export PATH=/opt/tritonserver/apache-maven-3.8.4/bin:$PATH
+```
+
+#### Run Java program with Java bindings Jar
+
+After ensuring that Tritonserver and dependencies are installed, you can run your
+Java program with the Java bindings with the following steps:
+
+1. Place Java bindings into your environment. You can do this by either:
+
+   a. Building Java API bindings with provided build script:
+      ```bash
+      # Clone Triton client repo. Recommended client repo tag is: main
+      $ git clone --single-branch --depth=1 -b <client repo tag>
+                     https://github.com/triton-inference-server/client.git clientrepo
+      # Run build script
+      ## For In-Process C-API Java Bindings
+      $ source clientrepo/src/java-api-bindings/scripts/install_dependencies_and_build.sh
+      ## For C-API Wrapper (Triton with C++ bindings) Java Bindings
+      $ source clientrepo/src/java-api-bindings/scripts/install_dependencies_and_build.sh --enable-developer-tools-server
+      ```
+      This will install the Java bindings to `/workspace/install/java-api-bindings/tritonserver-java-bindings.jar`
+
+   *or*
+
+   b. Copying "Uber Jar" from Triton SDK container to your environment
+      ```bash
+      $ id=$(docker run -dit nvcr.io/nvidia/tritonserver:<triton container version>-py3-sdk bash)
+      $ docker cp ${id}:/workspace/install/java-api-bindings/tritonserver-java-bindings.jar <Uber Jar directory>/tritonserver-java-bindings.jar
+      $ docker stop ${id}
+      ```
+      **Note:** `tritonserver-java-bindings.jar` only includes the `In-Process Java Bindings`. To use the `C-API Wrapper Java Bindings`, please use the build script.
+2. Use the built "Uber Jar" that contains the Java bindings
+   ```bash
+   $ java -cp <Uber Jar directory>/tritonserver-java-bindings.jar <your Java program>
+   ```
+
+#### Build Java bindings and run Java program with Maven
+
+If you want to make changes to the Java bindings, then you can use Maven to
+build yourself. You can refer to part 1.a of [Run Java program with Java
+bindings Jar](#run-java-program-with-java-bindings-jar) to also build the jar
+yourself without any modifications to the Tritonserver bindings in
+JavaCPP-presets.
+You can do this using the following steps:
+
+1. Create the JNI binaries in your local repository (`/root/.m2/repository`)
+   with [`javacpp-presets/tritonserver`](https://github.com/bytedeco/javacpp-presets/tree/master/tritonserver).
+   For C-API Wrapper Java bindings (Triton with C++ bindings), you need to
+   install some build specific dependencies including cmake and rapidjson.
+   Refer to [java installation script](https://github.com/triton-inference-server/client/blob/main/src/java-api-bindings/scripts/install_dependencies_and_build.sh)
+   for dependencies you need to install and modifications you need to make for your container.
+After installing dependencies, you can build the tritonserver project on javacpp-presets:
+```bash
+ $ git clone https://github.com/bytedeco/javacpp-presets.git
+ $ cd javacpp-presets
+ $ mvn clean install --projects .,tritonserver
+ $ mvn clean install -f platform --projects ../tritonserver/platform -Djavacpp.platform=linux-x86_64
+```
+2. Create your custom `*.pom` file for Maven. Please refer to
+   [samples/simple/pom.xml](https://github.com/bytedeco/javacpp-presets/blob/master/tritonserver/samples/simple/pom.xml) as
+   reference for how to create your pom file.
+3. After creating your `pom.xml` file you can build your application with:
+```bash
+ $ mvn compile exec:java -Djavacpp.platform=linux-x86_64 -Dexec.args="<your input args>"
+```
diff --git a/docs/customization_guide/repository_agents.md b/docs/customization_guide/repository_agents.md
new file mode 100644
index 0000000000..02fb1d57ec
--- /dev/null
+++ b/docs/customization_guide/repository_agents.md
@@ -0,0 +1,176 @@
+<!--
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Repository Agent
+
+A *repository agent* extends Triton with new functionality that
+operates when a model is loaded or unloaded. You can introduce your
+own code to perform authentication, decryption, conversion, or similar
+operations when a model is loaded.
+
+**BETA: The repository agent API is beta quality and is subject to
+non-backward-compatible changes for one or more releases.**
+
+A repository agent comunicates with Triton using the [repository agent
+API](https://github.com/triton-inference-server/core/tree/main/include/triton/core/tritonrepoagent.h). The
+[checksum_repository_agent GitHub
+repo](https://github.com/triton-inference-server/checksum_repository_agent)
+provides an example repository agent that verifies file checksums
+before loading a model.
+
+## Using a Repository Agent
+
+A model can use one or more repository agents by specifying them in
+the *ModelRepositoryAgents* section of the [model
+configuration](../user_guide/model_configuration.md). Each repository agent can have
+parameters specific to that agent that are specified in the model
+configuration to control the behavior of the agent. To understand the
+parameters available for a given agent consult the documentation for
+that agent.
+
+Multiple agents may be specified for the same model and they will be
+invoked in order when a model is loaded or unloaded. The following
+example model configuration contents shows how two agents, "agent0"
+and "agent1", are specified so that they are invoked in that order
+with the given parameters.
+
+```
+model_repository_agents
+{
+  agents [
+    {
+      name: "agent0",
+      parameters [
+        {
+          key: "key0",
+          value: "value0"
+        },
+        {
+          key: "key1",
+          value: "value1"
+        }
+      ]
+    },
+    {
+      name: "agent1",
+      parameters [
+        {
+          key: "keyx",
+          value: "valuex"
+        }
+      ]
+    }
+  ]
+}
+```
+
+## Implementing a Repository Agent
+
+A repository agent must be implemented as a shared library and the
+name of the shared library must be
+*libtritonrepoagent_\<repo-agent-name\>.so*. The shared library should
+hide all symbols except those needed by the repository agent API. See
+the [checksum example's
+CMakeList.txt](https://github.com/triton-inference-server/checksum_repository_agent/blob/main/CMakeLists.txt)
+for an example of how to use an ldscript to expose only the necessary
+symbols.
+
+The shared library will be dynamically loaded by Triton when it is
+needed. For a repository agent called *A*, the shared library must be
+installed as \<repository_agent_directory\>/A/libtritonrepoagent_A.so.
+Where \<repository_agent_directory\> is by default
+/opt/tritonserver/repoagents.  The --repoagent-directory flag can be
+used to override the default.
+
+Your repository agent must implement the repository agent API as
+documented in
+[tritonrepoagent.h](https://github.com/triton-inference-server/core/tree/main/include/triton/core/tritonrepoagent.h).
+
+Triton follows these steps when loading a model:
+
+* Load the model's configuration file (config.pbtxt) and extract the
+  *ModelRepositoryAgents* settings. Even if a repository agent
+  modifies the config.pbtxt file, the repository agent settings from
+  the initial config.pbtxt file are used for the entire loading
+  process.
+
+* For each repository agent specified:
+
+  * Initialize the corresponding repository agent, loading the shared
+    library if necessary. Model loading fails if the shared library is
+    not available or if initialization fails.
+
+  * Invoke the repository agent's *TRITONREPOAGENT_ModelAction*
+    function with action TRITONREPOAGENT_ACTION_LOAD. As input the
+    agent can access the model's repository as either a cloud storage
+    location or a local filesystem location.
+
+  * The repository agent can return *success* to indicate that no
+    changes where made to the repository, can return *failure* to
+    indicate that the model load should fail, or can create a new
+    repository for the model (for example, by decrypting the input
+    repository) and return *success* to indicate that the new
+    repository should be used.
+
+  * If the agent returns *success* Triton continues to the next
+    agent. If the agent returns *failure*, Triton skips invocation of
+    any additional agents.
+
+* If all agents returned *success*, Triton attempts to load the model
+  using the final model repository.
+
+* For each repository agent that was invoked with
+  TRITONREPOAGENT_ACTION_LOAD, in reverse order:
+
+  * Triton invokes the repository agent's
+    *TRITONREPOAGENT_ModelAction* function with action
+    TRITONREPOAGENT_ACTION_LOAD_COMPLETE if the model loaded
+    successfully or TRITONREPOAGENT_ACTION_LOAD_FAIL if the model
+    failed to load.
+
+Triton follows these steps when unloading a model:
+
+* Triton uses the repository agent settings from the initial
+  config.pbtxt file, even if during loading one or more agents
+  modified its contents.
+
+* For each repository agent that was invoked with
+  TRITONREPOAGENT_ACTION_LOAD, in the same order:
+
+  * Triton invokes the repository agent's
+    *TRITONREPOAGENT_ModelAction* function with action
+    TRITONREPOAGENT_ACTION_UNLOAD.
+
+* Triton unloads the model.
+
+* For each repository agent that was invoked with
+  TRITONREPOAGENT_ACTION_UNLOAD, in reverse order:
+
+  * Triton invokes the repository agent's
+    *TRITONREPOAGENT_ModelAction* function with action
+    TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE.
diff --git a/docs/customization_guide/test.md b/docs/customization_guide/test.md
new file mode 100644
index 0000000000..a64d81a27f
--- /dev/null
+++ b/docs/customization_guide/test.md
@@ -0,0 +1,134 @@
+<!--
+# Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Testing Triton
+
+Currently there is no CI testing enabled for Triton repositories. We
+will enable CI testing in a future update.
+
+However, there is a set of tests in the qa/ directory that can be run
+manually to provide extensive testing. Before running these tests you
+must first generate a few model repositories containing the models
+needed by the tests.
+
+## Generate QA Model Repositories
+
+The QA model repositories contain some simple models that are used to
+verify the correctness of Triton. To generate the QA model
+repositories:
+
+```
+$ cd qa/common
+$ ./gen_qa_model_repository
+$ ./gen_qa_custom_ops
+```
+
+This will create multiple model repositories in /tmp/<version>/qa_*
+(for example /tmp/24.03/qa_model_repository).  The TensorRT models
+will be created for the GPU on the system that CUDA considers device 0
+(zero). If you have multiple GPUs on your system see the documentation
+in the scripts for how to target a specific GPU.
+
+## Build SDK Image
+
+Build the *tritonserver_sdk* image that contains the client
+libraries, model analyzer, and examples using the following
+commands. You must first checkout the <client branch> branch of the
+*client* repo into the clientrepo/ subdirectory. Typically you want to
+set <client branch> to be the same as your current server branch.
+
+```
+$ cd <server repo root>
+$ git clone --single-branch --depth=1 -b <client branch> https://github.com/triton-inference-server/client.git clientrepo
+$ docker build -t tritonserver_sdk -f Dockerfile.sdk .
+```
+
+## Build QA Image
+
+Next you need to build a QA version of the Triton Docker image. This
+image will contain Triton, the QA tests, and all the dependencies
+needed to run the QA tests. First do a [Docker image
+build](build.md#building-with-docker) to produce the
+*tritonserver_cibase* and *tritonserver* images.
+
+Then, build the actual QA image.
+
+```
+$ docker build -t tritonserver_qa -f Dockerfile.QA .
+```
+
+## Run QA Tests
+
+Now run the QA image and mount the QA model repositories into the
+container so the tests will be able to access them.
+
+```
+$ docker run --gpus=all -it --rm -v/tmp:/data/inferenceserver tritonserver_qa
+```
+
+Within the container the QA tests are in /opt/tritonserver/qa. To run
+a test, change directory to the test and run the test.sh script.
+
+```
+$ cd <test directory>
+$ bash -x ./test.sh
+```
+
+### Sanity Tests
+
+Many tests require that you use a complete Triton build, with all
+backends and other features enabled. There are three sanity tests that
+are parameterized so that you can run them even if you have built a
+Triton that contains only a subset of all supported Triton
+backends. These tests are L0_infer, L0_batcher and
+L0_sequence_batcher. For these tests the following envvars are
+available to control how the tests behave:
+
+* BACKENDS: Control which backends are tested. Look in the test.sh
+  file of the test to see the default and allowed values.
+
+* ENSEMBLES: Enable testing of ensembles. Set to "0" to disable, set
+  to "1" to enable. If enabled you must have the *identity* backend
+  included in your Triton build.
+
+* EXPECTED_NUM_TESTS: The tests perform a check of the total number of
+  test sub-cases. The exact number of sub-cases that run will depend
+  on the values you use for BACKENDS and ENSEMBLES. So you will need
+  to adjust this as appropriate for your testing.
+
+For example, if you build a Triton that has only the TensorRT backend
+you can run L0_infer as follows:
+
+```
+$ BACKENDS="plan" ENSEMBLES=0 EXPECTED_NUM_TESTS=<expected> bash -x ./test.sh
+```
+
+Where '\<expected\>' is the number of sub-tests expected to be run for
+just TensorRT testing and no ensembles. Depending on which backend(s)
+you are testing you will need to experiment and determine the correct
+value for '\<expected\>'.
diff --git a/docs/examples/README.md b/docs/examples/README.md
new file mode 100644
index 0000000000..84bfcb9499
--- /dev/null
+++ b/docs/examples/README.md
@@ -0,0 +1,35 @@
+<!--
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Triton Examples
+
+**New to Triton Inference Server?** Make use of [these tutorials](https://github.com/triton-inference-server/tutorials) to begin your Triton journey!
+
+This folder contains the following:
+* jetson: This covers deploying Triton Inference Server on Jetson devices.
+* model_repository: This folder is a basic model repository for deploying models using the Triton Inference Server.
\ No newline at end of file
diff --git a/docs/examples/fetch_models.sh b/docs/examples/fetch_models.sh
index 0612dfc6cb..5594878b3e 100755
--- a/docs/examples/fetch_models.sh
+++ b/docs/examples/fetch_models.sh
@@ -27,16 +27,14 @@
 
 set -ex
 
-# Caffe2 resnet50
-mkdir -p model_repository/resnet50_netdef/1
-wget -O model_repository/resnet50_netdef/1/model.netdef \
-     http://download.caffe2.ai.s3.amazonaws.com/models/resnet50/predict_net.pb
-wget -O model_repository/resnet50_netdef/1/init_model.netdef \
-     http://download.caffe2.ai.s3.amazonaws.com/models/resnet50/init_net.pb
-
 # TensorFlow inception
 mkdir -p model_repository/inception_graphdef/1
 wget -O /tmp/inception_v3_2016_08_28_frozen.pb.tar.gz \
      https://storage.googleapis.com/download.tensorflow.org/models/inception_v3_2016_08_28_frozen.pb.tar.gz
 (cd /tmp && tar xzf inception_v3_2016_08_28_frozen.pb.tar.gz)
 mv /tmp/inception_v3_2016_08_28_frozen.pb model_repository/inception_graphdef/1/model.graphdef
+
+# ONNX densenet
+mkdir -p model_repository/densenet_onnx/1
+wget -O model_repository/densenet_onnx/1/model.onnx \
+     https://contentmamluswest001.blob.core.windows.net/content/14b2744cf8d6418c87ffddc3f3127242/9502630827244d60a1214f250e3bbca7/08aed7327d694b8dbaee2c97b8d0fcba/densenet121-1.2.onnx
diff --git a/docs/examples/jetson/README.md b/docs/examples/jetson/README.md
new file mode 100644
index 0000000000..281d5f2a97
--- /dev/null
+++ b/docs/examples/jetson/README.md
@@ -0,0 +1,68 @@
+<!--
+# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Using Triton Inference Server as a shared library for execution on Jetson
+
+## Overview
+This project demonstrates how to run C API applications using Triton Inference Server as a shared library. We also show how to build and execute such applications on Jetson.
+
+### Prerequisites
+
+* JetPack >= 4.6
+* OpenCV >= 4.1.1
+* TensorRT >= 8.0.1.6
+
+### Installation
+
+Follow the installation instructions from the GitHub release page ([https://github.com/triton-inference-server/server/releases/](https://github.com/triton-inference-server/server/releases/)).
+
+In our example, we placed the contents of downloaded release directory under `/opt/tritonserver`.
+
+## Part 1. Concurrent inference and dynamic batching
+
+The purpose of the sample located under [concurrency_and_dynamic_batching](concurrency_and_dynamic_batching/README.md)
+is to demonstrate the important features of Triton Inference Server such as concurrent model execution and
+dynamic batching. In order to do that, we implemented a people detection application using C API and Triton
+Inference Server as a shared library.
+
+## Part 2. Analyzing model performance with perf_analyzer
+
+To analyze model performance on Jetson,
+[perf_analyzer](https://github.com/triton-inference-server/client/blob/main/src/c++/perf_analyzer/README.md)
+tool is used. The `perf_analyzer` is included in the release tar file or can be
+compiled from source.
+
+From this directory of the repository, execute the following to evaluate model performance:
+
+```shell
+./perf_analyzer -m peoplenet -b 2 --service-kind=triton_c_api --model-repo=$(pwd)/concurrency_and_dynamic_batching/trtis_model_repo_sample_1 --triton-server-directory=/opt/tritonserver --concurrency-range 1:6 -f perf_c_api.csv
+```
+
+In the example above we saved the results as a `.csv` file. To visualize these
+results, follow the steps described
+[here](https://github.com/triton-inference-server/client/blob/main/src/c++/perf_analyzer/README.md).
diff --git a/docs/examples/jetson/concurrency_and_dynamic_batching/Makefile b/docs/examples/jetson/concurrency_and_dynamic_batching/Makefile
new file mode 100644
index 0000000000..6506314999
--- /dev/null
+++ b/docs/examples/jetson/concurrency_and_dynamic_batching/Makefile
@@ -0,0 +1,47 @@
+# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+TARGET=people_detection
+GCC=g++
+GCC_PARMS+=-I../../server -I/usr/include/opencv4 -I../../core/include/ -I/usr/local/cuda/targets/aarch64-linux/include
+GCC_PARMS+=-I${HOME}/tritonserver/include/tritonserver -D TRITON_ENABLE_GPU=ON -D TRITON_MIN_COMPUTE_CAPABILITY=5.3
+
+GCC_LIBS=-L${HOME}/tritonserver/lib -L/usr/lib -L/usr/local/cuda/targets/aarch64-linux/lib
+GCC_LIBS+=-lpthread -ltritonserver -lopencv_core -lopencv_highgui -lopencv_imgproc -lopencv_imgcodecs -lopencv_dnn -lcudart
+
+all: $(TARGET)
+
+
+%.o: %.cc
+	$(GCC) $(GCC_PARMS) -c -g -o $@ $^
+
+$(TARGET): $(TARGET).o
+	$(GCC) $^ $(GCC_LIBS) -o $@
+
+clean:
+	rm -f $(TARGET).o $(TARGET)
+
+.PHONY: all clean
diff --git a/docs/examples/jetson/concurrency_and_dynamic_batching/README.md b/docs/examples/jetson/concurrency_and_dynamic_batching/README.md
new file mode 100644
index 0000000000..1f96dd365d
--- /dev/null
+++ b/docs/examples/jetson/concurrency_and_dynamic_batching/README.md
@@ -0,0 +1,331 @@
+<!--
+# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Concurrent inference and dynamic batching
+
+The purpose of this sample is to demonstrate the important features of Triton Inference Server such as concurrent model execution and dynamic batching.
+
+We will be using a purpose built deployable people detection model, which we download from [Nvidia GPU Cloud (NGC)](https://ngc.nvidia.com/).
+
+## Acquiring the model
+
+Download the pruned [PeopleNet](https://ngc.nvidia.com/catalog/models/nvidia:tlt_peoplenet) model from the NGC. This model is available as a ready-to-use model, and you can download it from NGC using either `wget` method:
+
+```shell
+wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/tao/peoplenet/versions/pruned_v2.1/zip -O pruned_v2.1.zip
+```
+
+or via CLI command:
+
+```shell
+ngc registry model download-version "nvidia/tao/peoplenet:pruned_v2.1"
+```
+
+For latter you need to setup the [NGC CLI](https://ngc.nvidia.com/setup).
+
+Having downloaded the model from the NGC, unzip the archive `peoplenet_pruned_v2.1.zip` into `concurrency_and_dynamic_batching/tao/models/peoplenet`.
+
+If you have the zip archive in the `concurrency_and_dynamic_batching` directory, the following will automatically place the model to the correct location:
+
+```shell
+unzip pruned_v2.1.zip -d $(pwd)/tao/models/peoplenet
+```
+
+Verify that you can see the model file `resnet34_peoplenet_pruned.etlt` under
+
+```
+concurrency_and_dynamic_batching
+└── tao
+       └── models
+           └── peoplenet
+               ├── labels.txt
+               └── resnet34_peoplenet_pruned.etlt
+```
+
+## Converting the model to TensorRT
+
+After you have acquired the model file in `.etlt` format, you will need to convert the model to [TensorRT](https://developer.nvidia.com/tensorrt) format. NVIDIA TensorRT is an SDK for high-performance deep learning inference. It includes a deep learning inference optimizer and runtime that delivers low latency and high throughput for deep learning inference applications. The latest versions of JetPack include TensorRT.
+
+In order to convert an `.etlt` model to TensorRT format, you need to use the `tao-converter` tool.
+
+The `tao-converter` tool is available as a compiled release file for different platforms. The download links corresponding to your deployment system are provided among the [TLT Getting Started resources](https://developer.nvidia.com/tlt-get-started).
+
+After you have downloaded `tao-converter`, you might need to execute
+
+```shell
+chmod 777 tao-converter
+```
+
+in the directory with the tool.
+
+We provide a conversion script `tao/convert_peoplenet.sh` which expects the model to be present at the location.
+
+```shell
+tao
+└──  models
+   └── peoplenet
+```
+
+To execute it, you can place the `tao-converter` executable to the `tao` directory of the project and in the same directory run
+
+```shell
+bash convert_peoplenet.sh
+```
+
+After you execute it, verify that a `model.plan` file was placed to to the directories `/trtis_model_repo_sample_1/peoplenet/1` and `/trtis_model_repo_sample_2/peoplenet/1`. Note that we have two slightly different repositories for the same model to demonstrate different features of Triton.
+
+Also note that this step has to be performed on the target hardware: if you are planning to execute this application on Jetson, the conversion has to be performed on Jetson.
+
+To learn more about `tao-converter`parameters, run:
+
+```shell
+./tao-converter -h
+```
+
+## Building the app
+
+To compile the sample, pull the following repositories:
+* [https://github.com/triton-inference-server/server](https://github.com/triton-inference-server/server)
+* [https://github.com/triton-inference-server/core](https://github.com/triton-inference-server/core)
+
+Make sure you copied the contents of the release you downloaded to `$HOME`
+
+```shell
+sudo cp -rf tritonserver2.x.y-jetpack4.6 $HOME/tritonserver
+```
+
+Open the terminal in `concurrency_and_dynamic_batching` and build the app executing
+
+```shell
+make
+```
+
+An example Makefile is provided for Jetson.
+
+## Demonstration  case 1: Concurrent model execution
+
+With Triton Inference Server, multiple models (or multiple instances of the same model) can run simultaneously on the same GPU or on multiple GPUs. In this example, we are demonstrating how to run multiple instances of the same model on a single Jetson GPU.
+
+### Running the sample
+
+To execute from the terminal, run from the `concurrency_and_dynamic_batching` directory:
+
+```shell
+LD_LIBRARY_PATH=$HOME/tritonserver/lib ./people_detection -m system -v -r $(pwd)/trtis_model_repo_sample_1 -t 6 -s false -p $HOME/tritonserver
+```
+
+The parameter `-t` controls the number of concurrent inference calls we want to execute. We will be executing the same model on the same sample image with the purpose of demonstrating how setting different concurrency options affects the performance.
+
+You can enable saving detected bounding boxes in the project directory in form of overlays over the original image for each execution thread. You can turn the visualization on by setting the parameter `-s` to `true` upon execution (`-s` is set to `false` by default).
+
+### Expected output
+
+Upon execution, in the terminal log you will see _Model 'peoplenet' Stats_ in json format reflecting the inference performance. We also output _TOTAL INFERENCE TIME_ which simply reflects the elapsed time required to run the application including data loading, pre-processing and post-processing.
+
+A typical output in the log for _Model 'peoplenet' Stats_ looks as follows:
+
+```json
+{
+   "model_stats":[
+      {
+         "name":"peoplenet",
+         "version":"1",
+         "last_inference":1626448309997,
+         "inference_count":6,
+         "execution_count":6,
+         "inference_stats":{
+            "success":{
+               "count":6,
+               "ns":574589968
+            },
+            "fail":{
+               "count":0,
+               "ns":0
+            },
+            "queue":{
+               "count":6,
+               "ns":234669630
+            },
+            "compute_input":{
+               "count":6,
+               "ns":194884512
+            },
+            "compute_infer":{
+               "count":6,
+               "ns":97322636
+            },
+            "compute_output":{
+               "count":6,
+               "ns":47700806
+            }
+         },
+         "batch_stats":[
+            {
+               "batch_size":1,
+               "compute_input":{
+                  "count":6,
+                  "ns":194884512
+               },
+               "compute_infer":{
+                  "count":6,
+                  "ns":97322636
+               },
+               "compute_output":{
+                  "count":6,
+                  "ns":47700806
+               }
+            }
+         ]
+      }
+   ]
+}
+
+"TOTAL INFERENCE TIME: 174ms"
+```
+
+To learn about different statistics check out the [documentation](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_statistics.md#statistics-extension).
+
+To see how setting different values for concurrency affects total execution time and its components reflected in the model stats, you need to modify a single parameter in the model config file.
+
+To enable concurrent model execution support for a model, corresponding model config file `trtis_model_repo_sample_1/peoplenet/config.pbtxt` includes the following:
+
+```
+instance_group [
+  {
+    count: 3
+    kind: KIND_GPU
+  }
+]
+```
+
+You can change the count of allowed inferences for the same model instance and observe how it affects performance in _Model 'peoplenet' Stats_ and _TOTAL INFERENCE TIME_. Note that on Jetson we dont recommend setting values too high: for instance, on a device like a Jetson Xavier AGX we don't recommend setting the number larger than 6. The values in the range 1-3 are optimal.
+
+While trying out different values, note how it affects total inference time as well as some inference statistics (like queue and compute times)
+
+## Demonstration case 2: Dynamic batching
+
+For models that support batching, Triton implements multiple scheduling and batching algorithms that combine individual inference requests together to improve inference throughput. In this example, we want to demonstrate how enbling automatic dynamic batching affects inference performance.
+
+### Running the sample
+
+To observe the effect of dynamic batching, from the `concurrency_and_dynamic_batching` directory execute:
+
+```shell
+LD_LIBRARY_PATH=$HOME/tritonserver/lib ./people_detection -m system -v -r $(pwd)/trtis_model_repo_sample_2 -t 6 -s false -p $HOME/tritonserver
+```
+
+### Expected output
+
+Take a look at _Model 'peoplenet' Stats_ and _TOTAL INFERENCE TIME_ to see the effect of dynamic batching. A possible outcome should look like that:
+
+```json
+{
+   "model_stats":[
+      {
+         "name":"peoplenet",
+         "version":"1",
+         "last_inference":1626447787832,
+         "inference_count":6,
+         "execution_count":2,
+         "inference_stats":{
+            "success":{
+               "count":6,
+               "ns":558981051
+            },
+            "fail":{
+               "count":0,
+               "ns":0
+            },
+            "queue":{
+               "count":6,
+               "ns":49271380
+            },
+            "compute_input":{
+               "count":6,
+               "ns":170634044
+            },
+            "compute_infer":{
+               "count":6,
+               "ns":338079193
+            },
+            "compute_output":{
+               "count":6,
+               "ns":950544
+            }
+         },
+         "batch_stats":[
+            {
+               "batch_size":1,
+               "compute_input":{
+                  "count":1,
+                  "ns":15955684
+               },
+               "compute_infer":{
+                  "count":1,
+                  "ns":29917093
+               },
+               "compute_output":{
+                  "count":1,
+                  "ns":152264
+               }
+            },
+            {
+               "batch_size":5,
+               "compute_input":{
+                  "count":1,
+                  "ns":30935672
+               },
+               "compute_infer":{
+                  "count":1,
+                  "ns":61632420
+               },
+               "compute_output":{
+                  "count":1,
+                  "ns":159656
+               }
+            }
+         ]
+      }
+   ]
+}
+
+"TOTAL INFERENCE TIME: 162ms"
+```
+
+Notice that this time the model was executed only twice (as indicated by `execution_count`). Also, unlike in the previous example, the `batch_stats` part of the statitstics looks different: we see that our model was executed one time with `batch = 1` and the second time with `batch = 5`. It helped to decrease the total inference time.
+
+In order to enable dynamic batching, the following is present in the model config `trtis_model_repo_sample_2/peoplenet/config.pbtxt`:
+
+```
+dynamic_batching {
+}
+```
+
+To try further options of dynamic batcher see the [documentation](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/model_configuration.md#dynamic-batcher).
+
+You can also try enabling both concurrent model execution and dynamic batching.
\ No newline at end of file
diff --git a/docs/examples/jetson/concurrency_and_dynamic_batching/capture.jpg b/docs/examples/jetson/concurrency_and_dynamic_batching/capture.jpg
new file mode 100644
index 0000000000..82e2cb38e0
Binary files /dev/null and b/docs/examples/jetson/concurrency_and_dynamic_batching/capture.jpg differ
diff --git a/docs/examples/jetson/concurrency_and_dynamic_batching/common.h b/docs/examples/jetson/concurrency_and_dynamic_batching/common.h
new file mode 100644
index 0000000000..b55c8b71c5
--- /dev/null
+++ b/docs/examples/jetson/concurrency_and_dynamic_batching/common.h
@@ -0,0 +1,106 @@
+// Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#pragma once
+
+#include <iostream>
+#include <string>
+
+#include "triton/core/tritonserver.h"
+
+#define RETURN_IF_ERR(X)             \
+  do {                               \
+    TRITONSERVER_Error* err__ = (X); \
+    if (err__ != nullptr) {          \
+      return err__;                  \
+    }                                \
+  } while (false)
+
+#define RETURN_MSG_IF_ERR(X, MSG)                                      \
+  do {                                                                 \
+    TRITONSERVER_Error* err__ = (X);                                   \
+    if (err__ != nullptr) {                                            \
+      return TRITONSERVER_ErrorNew(                                    \
+          TRITONSERVER_ErrorCode(err__),                               \
+          (std::string(MSG) + ": " + TRITONSERVER_ErrorMessage(err__)) \
+              .c_str());                                               \
+    }                                                                  \
+  } while (false)
+
+#define GOTO_IF_ERR(X, T)            \
+  do {                               \
+    TRITONSERVER_Error* err__ = (X); \
+    if (err__ != nullptr) {          \
+      goto T;                        \
+    }                                \
+  } while (false)
+
+#define FAIL(MSG)                                 \
+  do {                                            \
+    std::cerr << "error: " << (MSG) << std::endl; \
+    exit(1);                                      \
+  } while (false)
+
+#define FAIL_IF_ERR(X, MSG)                                       \
+  do {                                                            \
+    TRITONSERVER_Error* err__ = (X);                              \
+    if (err__ != nullptr) {                                       \
+      std::cerr << "error: " << (MSG) << ": "                     \
+                << TRITONSERVER_ErrorCodeString(err__) << " - "   \
+                << TRITONSERVER_ErrorMessage(err__) << std::endl; \
+      TRITONSERVER_ErrorDelete(err__);                            \
+      exit(1);                                                    \
+    }                                                             \
+  } while (false)
+
+#define IGNORE_ERR(X)                  \
+  do {                                 \
+    TRITONSERVER_Error* err__ = (X);   \
+    if (err__ != nullptr) {            \
+      TRITONSERVER_ErrorDelete(err__); \
+    }                                  \
+  } while (false)
+
+#ifdef TRITON_ENABLE_GPU
+#define FAIL_IF_CUDA_ERR(X, MSG)                                           \
+  do {                                                                     \
+    cudaError_t err__ = (X);                                               \
+    if (err__ != cudaSuccess) {                                            \
+      std::cerr << "error: " << (MSG) << ": " << cudaGetErrorString(err__) \
+                << std::endl;                                              \
+      exit(1);                                                             \
+    }                                                                      \
+  } while (false)
+#endif  // TRITON_ENABLE_GPU
+
+/// Get the integral version from a string, or fail if string does not
+/// represent a valid version.
+///
+/// \param version_string The string version.
+/// \param version Returns the integral version.
+/// \return The error status. Failure if 'version_string' doesn't
+/// convert to valid version.
+TRITONSERVER_Error* GetModelVersionFromString(
+    const std::string& version_string, int64_t* version);
diff --git a/docs/examples/jetson/concurrency_and_dynamic_batching/labels.txt b/docs/examples/jetson/concurrency_and_dynamic_batching/labels.txt
new file mode 100644
index 0000000000..8ae80671d6
--- /dev/null
+++ b/docs/examples/jetson/concurrency_and_dynamic_batching/labels.txt
@@ -0,0 +1,4 @@
+person
+bag
+face
+
diff --git a/docs/examples/jetson/concurrency_and_dynamic_batching/people_detection.cc b/docs/examples/jetson/concurrency_and_dynamic_batching/people_detection.cc
new file mode 100644
index 0000000000..ce22bdcba9
--- /dev/null
+++ b/docs/examples/jetson/concurrency_and_dynamic_batching/people_detection.cc
@@ -0,0 +1,1158 @@
+// Copyright (c) 2021, NVIDIA CORPORATION& AFFILIATES.All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <rapidjson/document.h>
+#include <rapidjson/error/en.h>
+#include <unistd.h>
+
+#include <chrono>
+#include <cstring>
+#include <future>
+#include <iostream>
+#include <opencv2/dnn.hpp>
+#include <string>
+#include <thread>
+#include <unordered_map>
+#include <vector>
+
+#include "common.h"
+#include "opencv2/core.hpp"
+#include "opencv2/highgui.hpp"
+#include "opencv2/imgproc.hpp"
+#include "opencv2/opencv.hpp"
+#include "triton/core/tritonserver.h"
+
+#ifdef TRITON_ENABLE_GPU
+#include <cuda_runtime_api.h>
+#endif  // TRITON_ENABLE_GPU
+
+namespace {
+
+bool enforce_memory_type = false;
+TRITONSERVER_MemoryType requested_memory_type;
+
+#ifdef TRITON_ENABLE_GPU
+static auto cuda_data_deleter = [](void* data) {
+  if (data != nullptr) {
+    cudaPointerAttributes attr;
+    auto cuerr = cudaPointerGetAttributes(&attr, data);
+    if (cuerr != cudaSuccess) {
+      std::cerr << "error: failed to get CUDA pointer attribute of " << data
+                << ": " << cudaGetErrorString(cuerr) << std::endl;
+    }
+    if (attr.type == cudaMemoryTypeDevice) {
+      cuerr = cudaFree(data);
+    } else if (attr.type == cudaMemoryTypeHost) {
+      cuerr = cudaFreeHost(data);
+    }
+    if (cuerr != cudaSuccess) {
+      std::cerr << "error: failed to release CUDA pointer " << data << ": "
+                << cudaGetErrorString(cuerr) << std::endl;
+    }
+  }
+};
+#endif  // TRITON_ENABLE_GPU
+
+void
+Usage(char** argv, const std::string& msg = std::string())
+{
+  if (!msg.empty()) {
+    std::cerr << msg << std::endl;
+  }
+
+  std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
+  std::cerr << "\t-m <\"system\"|\"pinned\"|gpu>"
+            << " Enforce the memory type for input and output tensors."
+            << " If not specified, inputs will be in system memory and outputs"
+            << " will be based on the model's preferred type." << std::endl;
+  std::cerr << "\t-v Enable verbose logging." << std::endl;
+  std::cerr
+      << "\t-t Thread count to simulate the number of concurrent requests."
+      << std::endl;
+  std::cerr << "\t-r [model repository absolute path]." << std::endl;
+  std::cerr << "\t-p [tritonserver path]." << std::endl;
+  std::cerr << "\t-s <true|false>."
+            << " Specify whether output visualizations will be saved to the "
+               "project folder."
+            << " If not specified, no outputs will be saved." << std::endl;
+
+  exit(1);
+}
+
+TRITONSERVER_Error*
+ResponseAlloc(
+    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
+    size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
+    int64_t preferred_memory_type_id, void* userp, void** buffer,
+    void** buffer_userp, TRITONSERVER_MemoryType* actual_memory_type,
+    int64_t* actual_memory_type_id)
+{
+  // Initially attempt to make the actual memory type and id that we
+  // allocate be the same as preferred memory type
+  *actual_memory_type = preferred_memory_type;
+  *actual_memory_type_id = preferred_memory_type_id;
+
+  // If 'byte_size' is zero just return 'buffer' == nullptr, we don't
+  // need to do any other book-keeping.
+  if (byte_size == 0) {
+    *buffer = nullptr;
+    *buffer_userp = nullptr;
+    std::cout << "allocated " << byte_size << " bytes for result tensor "
+              << tensor_name << std::endl;
+  } else {
+    void* allocated_ptr = nullptr;
+    if (enforce_memory_type) {
+      *actual_memory_type = requested_memory_type;
+    }
+
+    switch (*actual_memory_type) {
+#ifdef TRITON_ENABLE_GPU
+      case TRITONSERVER_MEMORY_CPU_PINNED: {
+        auto err = cudaSetDevice(*actual_memory_type_id);
+        if ((err != cudaSuccess) && (err != cudaErrorNoDevice) &&
+            (err != cudaErrorInsufficientDriver)) {
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INTERNAL,
+              std::string(
+                  "unable to recover current CUDA device: " +
+                  std::string(cudaGetErrorString(err)))
+                  .c_str());
+        }
+
+        err = cudaHostAlloc(&allocated_ptr, byte_size, cudaHostAllocPortable);
+        if (err != cudaSuccess) {
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INTERNAL,
+              std::string(
+                  "cudaHostAlloc failed: " +
+                  std::string(cudaGetErrorString(err)))
+                  .c_str());
+        }
+        break;
+      }
+
+      case TRITONSERVER_MEMORY_GPU: {
+        auto err = cudaSetDevice(*actual_memory_type_id);
+        if ((err != cudaSuccess) && (err != cudaErrorNoDevice) &&
+            (err != cudaErrorInsufficientDriver)) {
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INTERNAL,
+              std::string(
+                  "unable to recover current CUDA device: " +
+                  std::string(cudaGetErrorString(err)))
+                  .c_str());
+        }
+
+        err = cudaMalloc(&allocated_ptr, byte_size);
+        if (err != cudaSuccess) {
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INTERNAL,
+              std::string(
+                  "cudaMalloc failed: " + std::string(cudaGetErrorString(err)))
+                  .c_str());
+        }
+        break;
+      }
+#endif  // TRITON_ENABLE_GPU
+
+      // Use CPU memory if the requested memory type is unknown
+      // (default case).
+      case TRITONSERVER_MEMORY_CPU:
+      default: {
+        *actual_memory_type = TRITONSERVER_MEMORY_CPU;
+        allocated_ptr = malloc(byte_size);
+        break;
+      }
+    }
+
+    // Pass the tensor name with buffer_userp so we can show it when
+    // releasing the buffer.
+    if (allocated_ptr != nullptr) {
+      *buffer = allocated_ptr;
+      *buffer_userp = new std::string(tensor_name);
+      std::cout << "allocated " << byte_size << " bytes in "
+                << TRITONSERVER_MemoryTypeString(*actual_memory_type)
+                << " for result tensor " << tensor_name << std::endl;
+    }
+  }
+
+  return nullptr;  // Success
+}
+
+TRITONSERVER_Error*
+ResponseRelease(
+    TRITONSERVER_ResponseAllocator* allocator, void* buffer, void* buffer_userp,
+    size_t byte_size, TRITONSERVER_MemoryType memory_type,
+    int64_t memory_type_id)
+{
+  std::string* name = nullptr;
+  if (buffer_userp != nullptr) {
+    name = reinterpret_cast<std::string*>(buffer_userp);
+  } else {
+    name = new std::string("<unknown>");
+  }
+
+  std::cout << "Releasing buffer " << buffer << " of size " << byte_size
+            << " in " << TRITONSERVER_MemoryTypeString(memory_type)
+            << " for result '" << *name << "'" << std::endl;
+  switch (memory_type) {
+    case TRITONSERVER_MEMORY_CPU:
+      free(buffer);
+      break;
+#ifdef TRITON_ENABLE_GPU
+    case TRITONSERVER_MEMORY_CPU_PINNED: {
+      auto err = cudaSetDevice(memory_type_id);
+      if (err == cudaSuccess) {
+        err = cudaFreeHost(buffer);
+      }
+      if (err != cudaSuccess) {
+        std::cerr << "error: failed to cudaFree " << buffer << ": "
+                  << cudaGetErrorString(err) << std::endl;
+      }
+      break;
+    }
+    case TRITONSERVER_MEMORY_GPU: {
+      auto err = cudaSetDevice(memory_type_id);
+      if (err == cudaSuccess) {
+        err = cudaFree(buffer);
+      }
+      if (err != cudaSuccess) {
+        std::cerr << "error: failed to cudaFree " << buffer << ": "
+                  << cudaGetErrorString(err) << std::endl;
+      }
+      break;
+    }
+#endif  // TRITON_ENABLE_GPU
+    default:
+      std::cerr << "error: unexpected buffer allocated in CUDA managed memory"
+                << std::endl;
+      break;
+  }
+
+  delete name;
+
+  return nullptr;  // Success
+}
+
+void
+InferRequestComplete(
+    TRITONSERVER_InferenceRequest* request, const uint32_t flags, void* userp)
+{
+  // We reuse the request so we don't delete it here.
+}
+
+void
+InferResponseComplete(
+    TRITONSERVER_InferenceResponse* response, const uint32_t flags, void* userp)
+{
+  if (response != nullptr) {
+    // Send 'response' to the future.
+    std::promise<TRITONSERVER_InferenceResponse*>* p =
+        reinterpret_cast<std::promise<TRITONSERVER_InferenceResponse*>*>(userp);
+    p->set_value(response);
+    delete p;
+  }
+}
+
+
+TRITONSERVER_Error*
+ParseModelMetadata(const rapidjson::Document& model_metadata)
+{
+  std::string seen_data_type;
+  for (const auto& input : model_metadata["inputs"].GetArray()) {
+    if (strcmp(input["datatype"].GetString(), "FP32")) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_UNSUPPORTED,
+          "this example only supports model with data type FP32");
+    }
+    if (seen_data_type.empty()) {
+      seen_data_type = input["datatype"].GetString();
+    } else if (strcmp(seen_data_type.c_str(), input["datatype"].GetString())) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG,
+          "the inputs and outputs of this model must have the data type");
+    }
+  }
+  for (const auto& output : model_metadata["outputs"].GetArray()) {
+    if (strcmp(output["datatype"].GetString(), "FP32")) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_UNSUPPORTED,
+          "this example only supports model with data type FP32");
+    } else if (strcmp(seen_data_type.c_str(), output["datatype"].GetString())) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG,
+          "the inputs and outputs of this model must have the data type");
+    }
+  }
+
+  return nullptr;
+}
+
+
+cv::Mat
+ResizeKeepAspectRatio(
+    const cv::Mat& input, const cv::Size& dstSize, const cv::Scalar& bgcolor,
+    bool& fixHeight, float& ratio, int& sideCache)
+{
+  cv::Mat output;
+
+  double h1 = dstSize.width * (input.rows / (double)input.cols);
+  double w2 = dstSize.height * (input.cols / (double)input.rows);
+  if (h1 <= dstSize.height) {
+    cv::resize(input, output, cv::Size(dstSize.width, h1));
+    ratio = (float)dstSize.width / input.cols;
+    fixHeight = false;
+    sideCache = (int)(ratio * input.rows);
+    std::cout << "Resizing to fixed width. Ratio " << ratio << std::endl;
+    std::cout << "Height cache " << sideCache << std::endl;
+  } else {
+    cv::resize(input, output, cv::Size(w2, dstSize.height));
+    ratio = (float)dstSize.height / input.rows;
+    fixHeight = true;
+    sideCache = (int)(ratio * input.cols);
+    std::cout << "Resizing to fixed height. Ratio " << ratio << std::endl;
+    std::cout << "Width cache " << sideCache << std::endl;
+  }
+
+  int top = (dstSize.height - output.rows) / 2;
+  int down = (dstSize.height - output.rows + 1) / 2;
+  int left = (dstSize.width - output.cols) / 2;
+  int right = (dstSize.width - output.cols + 1) / 2;
+
+  cv::copyMakeBorder(
+      output, output, top, down, left, right, cv::BORDER_CONSTANT, bgcolor);
+
+  return output;
+}
+
+
+void
+SaveOverlay(
+    std::vector<cv::Rect>& bboxes_list, std::vector<int>& indexes,
+    std::vector<int64_t>& input0_shape, bool& fixHeight, float& ratio,
+    int& sideCache, std::string imageName, size_t& thread_id)
+{
+  const int inputC = input0_shape[1];
+  const int inputH = input0_shape[2];
+  const int inputW = input0_shape[3];
+
+  cv::Mat image = cv::imread(imageName);
+
+  cv::Scalar color = cv::Scalar(0, 255, 0);
+
+  int xmin, ymin, xmax, ymax;
+
+  for (auto i : indexes) {
+    xmin = bboxes_list[i].x;
+    ymin = bboxes_list[i].y;
+    xmax = bboxes_list[i].x + bboxes_list[i].width;
+    ymax = bboxes_list[i].y + bboxes_list[i].height;
+
+    if (fixHeight) {
+      xmin = int((xmin - (inputW - sideCache) / 2) / ratio);
+      xmax = int((xmax - (inputW - sideCache) / 2) / ratio);
+      ymin = int(ymin / ratio);
+      ymax = int(ymax / ratio);
+    } else {
+      ymin = int((ymin - (inputH - sideCache) / 2) / ratio);
+      ymax = int((ymax - (inputH - sideCache) / 2) / ratio);
+      xmin = int(xmin / ratio);
+      xmax = int(xmax / ratio);
+    }
+    cv::Point p1(xmin, ymin);
+    cv::Point p2(xmax, ymax);
+    cv::rectangle(image, p1, p2, color, 4);
+  }
+
+  std::string outName = "capture_overlay_" + std::to_string(thread_id) + ".jpg";
+  imwrite(outName, image);
+}
+
+
+void
+Normalize(cv::Mat img, std::vector<float>*& data, int inputC)
+{
+  for (int c = 0; c < inputC; ++c) {
+    for (int i = 0; i < img.rows; ++i) {
+      cv::Vec3b* p1 = img.ptr<cv::Vec3b>(i);
+      for (int j = 0; j < img.cols; ++j) {
+        ((float*)data->data())[c * img.cols * img.rows + i * img.cols + j] =
+            p1[j][c] / 255.f;
+      }
+    }
+  }
+}
+
+
+void
+RecoverBoundingBoxes(
+    std::unordered_map<std::string, std::vector<float>>& output_data,
+    std::unordered_map<std::string, const int64_t*>& shapes,
+    std::vector<int64_t>& input0_shape, std::vector<cv::Rect>& bboxes_list,
+    std::vector<float>& scores_list, std::vector<int>& indexes)
+{
+  const float box_scale = 35.f;
+  const float box_offset = 0.5f;
+  const float score_threshold = 0.5f;
+  const float nms_threshold = 0.5f;
+
+  int gridH = shapes["output_cov/Sigmoid"][2];
+  int gridW = shapes["output_cov/Sigmoid"][3];
+
+  std::cout << "gridH: " << gridH << std::endl;
+  std::cout << "gridW: " << gridW << std::endl;
+
+  int modelH = input0_shape[2];
+  int modelW = input0_shape[3];
+  int batch = input0_shape[0];
+
+  std::cout << "batch: " << batch << std::endl;
+  std::cout << "modelH: " << modelH << std::endl;
+  std::cout << "modelW: " << modelW << std::endl;
+
+  int cellH = modelH / gridH;
+  int cellW = modelW / gridW;
+
+  for (int b = 0; b < batch; b++) {
+    for (int h = 0; h < gridH; h++) {
+      for (int w = 0; w < gridW; w++) {
+        // value(n, c, h, w) = n * CHW + c * HW + h * W + w
+        int idx = b * gridH * gridW + h * gridW + w;
+        float val = output_data["output_cov/Sigmoid"][idx];
+        if (val > score_threshold) {
+          scores_list.push_back(val);
+
+          // location of the w, h coordinate in the original image
+          int mx = w * cellW;
+          int my = h * cellH;
+
+          // scale the detected coordinates to original and return their
+          // location in the image
+          int idxX1 = b * 3 * gridH * gridW + 0 * gridH * gridW + h * gridW + w;
+          int idxY1 = b * 3 * gridH * gridW + 1 * gridH * gridW + h * gridW + w;
+          int idxX2 = b * 3 * gridH * gridW + 2 * gridH * gridW + h * gridW + w;
+          int idxY2 = b * 3 * gridH * gridW + 3 * gridH * gridW + h * gridW + w;
+
+          int rectX1 =
+              -(output_data["output_bbox/BiasAdd"][idxX1] + box_offset) *
+                  box_scale +
+              mx;
+          int rectY1 =
+              -(output_data["output_bbox/BiasAdd"][idxY1] + box_offset) *
+                  box_scale +
+              my;
+          int rectX2 =
+              (output_data["output_bbox/BiasAdd"][idxX2] + box_offset) *
+                  box_scale +
+              mx;
+          int rectY2 =
+              (output_data["output_bbox/BiasAdd"][idxY2] + box_offset) *
+                  box_scale +
+              my;
+
+          // Rect ROI (x, y, width, height);
+          cv::Rect bbox(rectX1, rectY1, rectX2 - rectX1, rectY2 - rectY1);
+          bboxes_list.push_back(bbox);
+        }
+      }
+    }
+  }
+
+  // Execute non-maximum suppression
+  cv::dnn::NMSBoxes(
+      bboxes_list, scores_list, score_threshold, nms_threshold, indexes);
+}
+
+void
+ParseDetections(
+    TRITONSERVER_InferenceResponse* response, const std::string& output0,
+    const std::string& output1,
+    std::unordered_map<std::string, std::vector<float>>& output_data,
+    std::unordered_map<std::string, const int64_t*>& shapes)
+{
+  uint32_t output_count;
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceResponseOutputCount(response, &output_count),
+      "getting number of response outputs");
+  if (output_count != 2) {
+    FAIL("expecting 2 response outputs, got " + std::to_string(output_count));
+  }
+
+  for (uint32_t idx = 0; idx < output_count; ++idx) {
+    const char* cname;
+    TRITONSERVER_DataType datatype;
+    const int64_t* shape;
+    uint64_t dim_count;
+    const void* base;
+    size_t byte_size;
+    TRITONSERVER_MemoryType memory_type;
+    int64_t memory_type_id;
+    void* userp;
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceResponseOutput(
+            response, idx, &cname, &datatype, &shape, &dim_count, &base,
+            &byte_size, &memory_type, &memory_type_id, &userp),
+        "getting output info");
+
+    if (cname == nullptr) {
+      FAIL("unable to get output name");
+    }
+
+    std::string name(cname);
+    if ((name != output0) && (name != output1)) {
+      FAIL("unexpected output '" + name + "'");
+    }
+
+    shapes[name] = shape;
+
+    std::vector<float>& odata = output_data[name];
+
+    switch (memory_type) {
+      case TRITONSERVER_MEMORY_CPU: {
+        std::cout << std::endl
+                  << name << " is stored in system memory" << std::endl;
+        const float* cbase = reinterpret_cast<const float*>(base);
+        odata.assign(cbase, cbase + byte_size / sizeof(float));
+        break;
+      }
+
+      case TRITONSERVER_MEMORY_CPU_PINNED: {
+        std::cout << std::endl
+                  << name << " is stored in pinned memory" << std::endl;
+        const float* cbase = reinterpret_cast<const float*>(base);
+        odata.assign(cbase, cbase + byte_size / sizeof(float));
+        break;
+      }
+
+#ifdef TRITON_ENABLE_GPU
+      case TRITONSERVER_MEMORY_GPU: {
+        std::cout << std::endl
+                  << name << " is stored in GPU memory" << std::endl;
+        odata.reserve(byte_size);
+        FAIL_IF_CUDA_ERR(
+            cudaMemcpy(&odata[0], base, byte_size, cudaMemcpyDeviceToHost),
+            "getting " + name + " data from GPU memory");
+        break;
+      }
+#endif
+
+      default:
+        FAIL("unexpected memory type");
+    }
+  }
+}
+
+void
+DetectionInferenceOutput(
+    std::vector<int>& result_indexes, std::vector<cv::Rect>& bboxes_list,
+    TRITONSERVER_InferenceResponse* completed_response,
+    const std::string& output0, const std::string& output1,
+    std::vector<int64_t>& input0_shape, bool& fixHeight, float& ratio,
+    int& sideCache, size_t& thread_id, bool visualize = false,
+    std::string imageName = "capture.jpg")
+{
+  // Parse outputs
+  std::unordered_map<std::string, std::vector<float>> output_data;
+  std::unordered_map<std::string, const int64_t*> shapes;
+  ParseDetections(completed_response, output0, output1, output_data, shapes);
+
+  std::vector<float> scores_list;
+  RecoverBoundingBoxes(
+      output_data, shapes, input0_shape, bboxes_list, scores_list,
+      result_indexes);
+
+  std::cout << "Detection finished. Indexes of detected objects: " << std::endl;
+  for (auto idx : result_indexes) {
+    std::cout << idx << std::endl;
+    std::cout << bboxes_list[idx] << std::endl;
+  }
+
+  if (visualize)
+    SaveOverlay(
+        bboxes_list, result_indexes, input0_shape, fixHeight, ratio, sideCache,
+        imageName, thread_id);
+}
+
+
+}  // namespace
+
+
+void
+SetServerOptions(
+    TRITONSERVER_ServerOptions** server_options, bool verbose_level,
+    std::string model_repository_path, std::string tritonserver_path)
+{
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsNew(server_options), "creating server options");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetModelRepositoryPath(
+          *server_options, model_repository_path.c_str()),
+      "setting model repository path");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetLogVerbose(*server_options, verbose_level),
+      "setting verbose logging level");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetMetrics(*server_options, true),
+      "failed to enable metrics");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetStrictReadiness(*server_options, true),
+      "failed to set strict readiness");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetStrictModelConfig(*server_options, true),
+      "failed to set strict model config");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetModelControlMode(
+          *server_options, TRITONSERVER_MODEL_CONTROL_EXPLICIT),
+      "failed to set model control mode to explicit");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetBackendDirectory(
+          *server_options, (tritonserver_path + "/backends").c_str()),
+      "setting backend directory");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetRepoAgentDirectory(
+          *server_options, (tritonserver_path + "/repoagents").c_str()),
+      "setting repository agent directory");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetStrictModelConfig(*server_options, true),
+      "setting strict model configuration");
+#ifdef TRITON_ENABLE_GPU
+  double min_compute_capability = TRITON_MIN_COMPUTE_CAPABILITY;
+#else
+  double min_compute_capability = 0;
+#endif  // TRITON_ENABLE_GPU
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability(
+          *server_options, min_compute_capability),
+      "setting minimum supported CUDA compute capability");
+}
+
+
+void
+CheckServerLiveAndReady(std::shared_ptr<TRITONSERVER_Server> server)
+{
+  size_t wait_seconds = 0;
+  while (true) {
+    bool live, ready;
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerIsLive(server.get(), &live),
+        "unable to get server liveness");
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerIsReady(server.get(), &ready),
+        "unable to get server readiness");
+    std::cout << "Server Health: live " << live << ", ready " << ready
+              << std::endl;
+    if (live && ready) {
+      break;
+    }
+
+    if (++wait_seconds >= 10) {
+      FAIL("failed to find healthy inference server");
+    }
+
+    std::this_thread::sleep_for(std::chrono::milliseconds(1000));
+  }
+}
+
+
+void
+PrintServerStatus(std::shared_ptr<TRITONSERVER_Server> server)
+{
+  TRITONSERVER_Message* server_metadata_message;
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerMetadata(server.get(), &server_metadata_message),
+      "unable to get server metadata message");
+  const char* buffer;
+  size_t byte_size;
+  FAIL_IF_ERR(
+      TRITONSERVER_MessageSerializeToJson(
+          server_metadata_message, &buffer, &byte_size),
+      "unable to serialize server metadata message");
+
+  std::cout << "Server Status:" << std::endl;
+  std::cout << std::string(buffer, byte_size) << std::endl;
+
+  FAIL_IF_ERR(
+      TRITONSERVER_MessageDelete(server_metadata_message),
+      "deleting status metadata");
+}
+
+
+void
+AwaitModelReady(
+    std::shared_ptr<TRITONSERVER_Server> server, const std::string model_name)
+{
+  bool is_ready = false;
+  size_t wait_seconds = 0;
+  while (!is_ready) {
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerModelIsReady(
+            server.get(), model_name.c_str(), 1, &is_ready),
+        "unable to get model readiness");
+    if (!is_ready) {
+      if (++wait_seconds >= 5) {
+        FAIL("model failed to be ready in 5 seconds");
+      }
+      std::this_thread::sleep_for(std::chrono::milliseconds(1000));
+      continue;
+    }
+
+    TRITONSERVER_Message* model_metadata_message;
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerModelMetadata(
+            server.get(), model_name.c_str(), 1, &model_metadata_message),
+        "unable to get model metadata message");
+    const char* buffer;
+    size_t byte_size;
+    FAIL_IF_ERR(
+        TRITONSERVER_MessageSerializeToJson(
+            model_metadata_message, &buffer, &byte_size),
+        "unable to serialize model status protobuf");
+
+    rapidjson::Document model_metadata;
+    model_metadata.Parse(buffer, byte_size);
+    if (model_metadata.HasParseError()) {
+      FAIL(
+          "error: failed to parse model metadata from JSON: " +
+          std::string(GetParseError_En(model_metadata.GetParseError())) +
+          " at " + std::to_string(model_metadata.GetErrorOffset()));
+    }
+
+    FAIL_IF_ERR(
+        TRITONSERVER_MessageDelete(model_metadata_message),
+        "deleting status protobuf");
+
+    if (strcmp(model_metadata["name"].GetString(), model_name.c_str())) {
+      FAIL("unable to find metadata for model");
+    }
+
+    bool found_version = false;
+    if (model_metadata.HasMember("versions")) {
+      for (const auto& version : model_metadata["versions"].GetArray()) {
+        if (strcmp(version.GetString(), "1") == 0) {
+          found_version = true;
+          break;
+        }
+      }
+    }
+    if (!found_version) {
+      FAIL("unable to find version 1 status for model");
+    }
+
+    FAIL_IF_ERR(ParseModelMetadata(model_metadata), "parsing model metadata");
+  }
+}
+
+
+void
+LoadInputImageFromFile(
+    cv::Mat& dst, std::vector<int64_t>& input0_shape, bool& fixHeight,
+    float& ratio, int& sideCache, std::string imageName = "capture.jpg")
+{
+  const int inputC = input0_shape[1];
+  const int inputH = input0_shape[2];
+  const int inputW = input0_shape[3];
+  const int batchSize = input0_shape[0];
+
+  cv::Mat image = cv::imread(imageName);
+
+  if (image.empty()) {
+    std::cout << "Cannot open image " << imageName << std::endl;
+    exit(0);
+  }
+
+  // resize keeping aspect ratio and pad
+  dst = ResizeKeepAspectRatio(
+      image, cv::Size(inputW, inputH), cv::Scalar(0, 0, 0), fixHeight, ratio,
+      sideCache);
+
+  cv::cvtColor(dst, dst, cv::COLOR_BGR2RGB);
+}
+
+
+void
+LoadInputData(
+    cv::Mat& dst, std::vector<float>* input0_data,
+    std::vector<int64_t>& input0_shape)
+{
+  const int inputC = input0_shape[1];
+  const int inputH = input0_shape[2];
+  const int inputW = input0_shape[3];
+
+  input0_data->resize(inputC * inputH * inputW * sizeof(float));
+
+  // normalize
+  Normalize(dst, input0_data, inputC);
+}
+
+static std::mutex mutex;
+
+void
+RunInferenceAndValidate(
+    std::shared_ptr<TRITONSERVER_Server> server,
+    TRITONSERVER_ResponseAllocator* allocator, cv::Mat scaled_input_image,
+    bool fixHeight, float ratio, int sideCache, std::string model_name,
+    size_t thread_id, bool visualize)
+{
+  TRITONSERVER_InferenceRequest* irequest = nullptr;
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestNew(
+          &irequest, server.get(), model_name.c_str(), -1),
+      "creating inference request");
+
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestSetId(irequest, "my_request_id"),
+      "setting ID for the request");
+
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestSetReleaseCallback(
+          irequest, InferRequestComplete, nullptr),
+      "setting request release callback");
+
+  // Inputs
+  auto input0 = "input_1";
+  std::vector<int64_t> input0_shape({1, 3, 544, 960});
+
+  const TRITONSERVER_DataType datatype = TRITONSERVER_TYPE_FP32;
+
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestAddInput(
+          irequest, input0, datatype, &input0_shape[0], input0_shape.size()),
+      "setting input 0 meta-data for the request");
+
+  // Outputs
+  auto output0 = "output_bbox/BiasAdd";
+  auto output1 = "output_cov/Sigmoid";
+
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output0),
+      "requesting output 0 for the request");
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output1),
+      "requesting output 1 for the request");
+
+  // Load the input data
+  std::vector<float> input0_data;
+  std::vector<int> result_indexes;
+  std::vector<cv::Rect> bboxes_list;
+
+  LoadInputData(scaled_input_image, &input0_data, input0_shape);
+
+  size_t input0_size = input0_data.size();
+
+  const void* input0_base = &input0_data[0];
+
+#ifdef TRITON_ENABLE_GPU
+  std::unique_ptr<void, decltype(cuda_data_deleter)> input0_gpu(
+      nullptr, cuda_data_deleter);
+  bool use_cuda_memory =
+      (enforce_memory_type &&
+       (requested_memory_type != TRITONSERVER_MEMORY_CPU));
+  if (use_cuda_memory) {
+    FAIL_IF_CUDA_ERR(cudaSetDevice(0), "setting CUDA device to device 0");
+    if (requested_memory_type != TRITONSERVER_MEMORY_CPU_PINNED) {
+      void* dst;
+      FAIL_IF_CUDA_ERR(
+          cudaMalloc(&dst, input0_size),
+          "allocating GPU memory for INPUT0 data");
+      input0_gpu.reset(dst);
+      FAIL_IF_CUDA_ERR(
+          cudaMemcpy(dst, &input0_data[0], input0_size, cudaMemcpyHostToDevice),
+          "setting INPUT0 data in GPU memory");
+    } else {
+      void* dst;
+      FAIL_IF_CUDA_ERR(
+          cudaHostAlloc(&dst, input0_size, cudaHostAllocPortable),
+          "allocating pinned memory for INPUT0 data");
+      input0_gpu.reset(dst);
+      FAIL_IF_CUDA_ERR(
+          cudaMemcpy(dst, &input0_data[0], input0_size, cudaMemcpyHostToHost),
+          "setting INPUT0 data in pinned memory");
+    }
+  }
+
+  input0_base = use_cuda_memory ? input0_gpu.get() : &input0_data[0];
+#endif  // TRITON_ENABLE_GPU
+
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestAppendInputData(
+          irequest, input0, input0_base, input0_size, requested_memory_type, 0),
+      "assigning INPUT0 data");
+
+  // Perform inference...
+  {
+    auto p = new std::promise<TRITONSERVER_InferenceResponse*>();
+    std::future<TRITONSERVER_InferenceResponse*> completed = p->get_future();
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestSetResponseCallback(
+            irequest, allocator, nullptr, InferResponseComplete,
+            reinterpret_cast<void*>(p)),
+        "setting response callback");
+
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerInferAsync(server.get(), irequest, nullptr),
+        "running inference");
+
+    // Wait for the inference to complete.
+    TRITONSERVER_InferenceResponse* completed_response = completed.get();
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceResponseError(completed_response),
+        "response status");
+
+    std::unique_lock<std::mutex> lock(mutex);
+
+    // Process output
+    DetectionInferenceOutput(
+        result_indexes, bboxes_list, completed_response, output0, output1,
+        input0_shape, fixHeight, ratio, sideCache, thread_id, visualize);
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceResponseDelete(completed_response),
+        "deleting inference response");
+  }
+
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestDelete(irequest),
+      "deleting inference request");
+}
+
+
+void
+PrintModelStats(
+    std::shared_ptr<TRITONSERVER_Server> server, const std::string model_name)
+{
+  TRITONSERVER_Message* model_stats_message = nullptr;
+
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerModelStatistics(
+          server.get(), model_name.c_str(), -1 /* model_version */,
+          &model_stats_message),
+      "unable to get model stats message");
+  const char* buffer;
+  size_t byte_size;
+  FAIL_IF_ERR(
+      TRITONSERVER_MessageSerializeToJson(
+          model_stats_message, &buffer, &byte_size),
+      "unable to serialize server metadata message");
+
+  std::cout << "Model '" << model_name << "' Stats:" << std::endl;
+  std::cout << std::string(buffer, byte_size) << std::endl;
+
+  FAIL_IF_ERR(
+      TRITONSERVER_MessageDelete(model_stats_message),
+      "deleting model stats message");
+}
+
+
+void
+CreateAndRunTritonserverInstance(
+    std::string model_repository_path, std::string tritonserver_path,
+    bool verbose_level, int thread_count, bool visualize)
+{
+  TRITONSERVER_ServerOptions* server_options = nullptr;
+
+  SetServerOptions(
+      &server_options, verbose_level, model_repository_path, tritonserver_path);
+
+  TRITONSERVER_Server* server_ptr = nullptr;
+
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerNew(&server_ptr, server_options),
+      "creating server instance. ");
+
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsDelete(server_options),
+      "deleting server options");
+
+  std::shared_ptr<TRITONSERVER_Server> server(
+      server_ptr, TRITONSERVER_ServerDelete);
+
+  // Wait and until the server is both live and ready.
+  CheckServerLiveAndReady(server);
+
+  // Print status of the servers.
+  PrintServerStatus(server);
+  std::string model = "peoplenet";
+
+  // Load models in server.
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerLoadModel(server.get(), model.c_str()),
+      "failed to load model peoplenet");
+
+  // Wait for the models to become available.
+  AwaitModelReady(server, model.c_str());
+
+  // Create the allocator that will be used to allocate buffers for
+  // the result tensors.
+  TRITONSERVER_ResponseAllocator* allocator = nullptr;
+  FAIL_IF_ERR(
+      TRITONSERVER_ResponseAllocatorNew(
+          &allocator, ResponseAlloc, ResponseRelease, nullptr /* start_fn */),
+      "creating response allocator");
+
+
+  // Measure total execution time
+  using std::chrono::duration;
+  using std::chrono::duration_cast;
+  using std::chrono::high_resolution_clock;
+  using std::chrono::milliseconds;
+
+  cv::Mat scaled_input_image;
+  bool fixHeight;
+  float ratio;
+  int sideCache;
+  std::vector<int64_t> input0_shape({1, 3, 544, 960});
+
+  // the input image is loaded only once and used for all inferences
+  LoadInputImageFromFile(
+      scaled_input_image, input0_shape, fixHeight, ratio, sideCache);
+
+  auto t1 = high_resolution_clock::now();
+
+  // Multi-thread inference
+  std::thread inferences[thread_count];
+  for (size_t i = 0; i < thread_count; i++) {
+    inferences[i] = std::thread(
+        &RunInferenceAndValidate, server, allocator, scaled_input_image,
+        fixHeight, ratio, sideCache, model.c_str(), i, visualize);
+  }
+
+  for (int i = 0; i < thread_count; ++i) {
+    inferences[i].join();
+  }
+
+  // Second time point to measure elapsed time
+  auto t2 = high_resolution_clock::now();
+
+  FAIL_IF_ERR(
+      TRITONSERVER_ResponseAllocatorDelete(allocator),
+      "deleting response allocator");
+
+  // Print Model Statistics for all models
+  PrintModelStats(server, model.c_str());
+
+  // Unload models in the servers.
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerUnloadModel(server.get(), model.c_str()),
+      "failed to unload model");
+
+  /* Getting number of milliseconds as an integer. */
+  auto ms_int = duration_cast<milliseconds>(t2 - t1);
+
+  std::cout << "\n TOTAL INFERENCE TIME: " << ms_int.count() << "ms\n";
+}
+
+
+int
+main(int argc, char** argv)
+{
+  std::string model_repository_path;
+  std::string tritonserver_path;
+  int verbose_level = 0;
+  int thread_count = 2;
+  bool visualize = false;
+
+  // Parse commandline...
+  int opt;
+  while ((opt = getopt(argc, argv, "vm:r:p:t:s:")) != -1) {
+    switch (opt) {
+      case 'm': {
+        enforce_memory_type = true;
+        if (!strcmp(optarg, "system")) {
+          requested_memory_type = TRITONSERVER_MEMORY_CPU;
+        } else if (!strcmp(optarg, "pinned")) {
+          requested_memory_type = TRITONSERVER_MEMORY_CPU_PINNED;
+        } else if (!strcmp(optarg, "gpu")) {
+          requested_memory_type = TRITONSERVER_MEMORY_GPU;
+        } else {
+          Usage(
+              argv,
+              "-m must be used to specify one of the following types:"
+              " <\"system\"|\"pinned\"|gpu>");
+        }
+        break;
+      }
+      case 'r':
+        model_repository_path = optarg;
+        break;
+      case 'p':
+        tritonserver_path = optarg;
+        break;
+      case 'v':
+        verbose_level = 1;
+        break;
+      case 't':
+        thread_count = std::stoi(optarg);
+        break;
+      case 's':
+        if (!strcmp(optarg, "true")) {
+          visualize = true;
+        } else if (!strcmp(optarg, "false")) {
+          visualize = false;
+        } else {
+          Usage(
+              argv,
+              "-s must be:"
+              " <true|false>");
+        }
+        break;
+      case '?':
+        Usage(argv);
+        break;
+    }
+  }
+
+  if (thread_count < 1) {
+    Usage(argv, "thread_count must be >= 1");
+  }
+
+  if (model_repository_path.empty()) {
+    Usage(argv, "-r must be used to specify model repository path");
+  }
+#ifndef TRITON_ENABLE_GPU
+  if (enforce_memory_type && requested_memory_type != TRITONSERVER_MEMORY_CPU) {
+    Usage(argv, "-m can only be set to \"system\" without enabling GPU");
+  }
+#endif  // TRITON_ENABLE_GPU
+
+  // Check API version.
+  uint32_t api_version_major, api_version_minor;
+  FAIL_IF_ERR(
+      TRITONSERVER_ApiVersion(&api_version_major, &api_version_minor),
+      "getting Triton API version");
+  if ((TRITONSERVER_API_VERSION_MAJOR != api_version_major) ||
+      (TRITONSERVER_API_VERSION_MINOR > api_version_minor)) {
+    FAIL("triton server API version mismatch");
+  }
+
+  CreateAndRunTritonserverInstance(
+      model_repository_path, tritonserver_path, verbose_level, thread_count,
+      visualize);
+
+  return 0;
+}
diff --git a/docs/examples/jetson/concurrency_and_dynamic_batching/tao/convert_peoplenet.sh b/docs/examples/jetson/concurrency_and_dynamic_batching/tao/convert_peoplenet.sh
new file mode 100755
index 0000000000..5c69680eee
--- /dev/null
+++ b/docs/examples/jetson/concurrency_and_dynamic_batching/tao/convert_peoplenet.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+./tao-converter \
+    -k tlt_encode \
+    -d 3,544,960 \
+    -i nchw \
+    -t fp16 \
+    -b 16 \
+    -m 64 \
+    -o output_cov/Sigmoid,output_bbox/BiasAdd \
+    -e ../trtis_model_repo_sample_1/peoplenet/1/model.plan \
+    models/peoplenet/resnet34_peoplenet_pruned.etlt
+
+cp ../trtis_model_repo_sample_1/peoplenet/1/model.plan ../trtis_model_repo_sample_2/peoplenet/1/model.plan
+
diff --git a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/empty_config/config.pbtxt b/docs/examples/jetson/concurrency_and_dynamic_batching/tao/models/peoplenet/.gitkeep
similarity index 100%
rename from src/servables/tensorflow/testdata/savedmodel_autofill_sanity/empty_config/config.pbtxt
rename to docs/examples/jetson/concurrency_and_dynamic_batching/tao/models/peoplenet/.gitkeep
diff --git a/src/servables/tensorrt/testdata/autofill_sanity/empty_config/config.pbtxt b/docs/examples/jetson/concurrency_and_dynamic_batching/trtis_model_repo_sample_1/peoplenet/1/.gitkeep
similarity index 100%
rename from src/servables/tensorrt/testdata/autofill_sanity/empty_config/config.pbtxt
rename to docs/examples/jetson/concurrency_and_dynamic_batching/trtis_model_repo_sample_1/peoplenet/1/.gitkeep
diff --git a/docs/examples/jetson/concurrency_and_dynamic_batching/trtis_model_repo_sample_1/peoplenet/config.pbtxt b/docs/examples/jetson/concurrency_and_dynamic_batching/trtis_model_repo_sample_1/peoplenet/config.pbtxt
new file mode 100644
index 0000000000..75532dee5f
--- /dev/null
+++ b/docs/examples/jetson/concurrency_and_dynamic_batching/trtis_model_repo_sample_1/peoplenet/config.pbtxt
@@ -0,0 +1,28 @@
+name: "peoplenet"
+platform: "tensorrt_plan"
+max_batch_size: 64
+input [
+  {
+    name: "input_1"
+    data_type: TYPE_FP32
+    dims: [ 3, 544, 960  ]
+  }
+]
+output [
+  {
+    name: "output_bbox/BiasAdd"
+    data_type: TYPE_FP32
+    dims: [ 12, 34, 60 ]
+  },
+  {
+    name: "output_cov/Sigmoid"
+    data_type: TYPE_FP32
+    dims: [ 3, 34, 60 ]
+  }
+]
+instance_group [
+  {
+    count: 3
+    kind: KIND_GPU
+  }
+]
diff --git a/docs/examples/jetson/concurrency_and_dynamic_batching/trtis_model_repo_sample_2/peoplenet/1/.gitkeep b/docs/examples/jetson/concurrency_and_dynamic_batching/trtis_model_repo_sample_2/peoplenet/1/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/docs/examples/jetson/concurrency_and_dynamic_batching/trtis_model_repo_sample_2/peoplenet/config.pbtxt b/docs/examples/jetson/concurrency_and_dynamic_batching/trtis_model_repo_sample_2/peoplenet/config.pbtxt
new file mode 100644
index 0000000000..9a03913963
--- /dev/null
+++ b/docs/examples/jetson/concurrency_and_dynamic_batching/trtis_model_repo_sample_2/peoplenet/config.pbtxt
@@ -0,0 +1,25 @@
+name: "peoplenet"
+platform: "tensorrt_plan"
+max_batch_size: 64
+input [
+  {
+    name: "input_1"
+    data_type: TYPE_FP32
+    dims: [ 3, 544, 960  ]
+  }
+]
+output [
+  {
+    name: "output_bbox/BiasAdd"
+    data_type: TYPE_FP32
+    dims: [ 12, 34, 60 ]
+  },
+  {
+    name: "output_cov/Sigmoid"
+    data_type: TYPE_FP32
+    dims: [ 3, 34, 60 ]
+  }
+]
+dynamic_batching {
+}
+
diff --git a/docs/examples/model_repository/densenet_onnx/config.pbtxt b/docs/examples/model_repository/densenet_onnx/config.pbtxt
new file mode 100644
index 0000000000..62f84d1bd9
--- /dev/null
+++ b/docs/examples/model_repository/densenet_onnx/config.pbtxt
@@ -0,0 +1,21 @@
+name: "densenet_onnx"
+platform: "onnxruntime_onnx"
+max_batch_size : 0
+input [
+  {
+    name: "data_0"
+    data_type: TYPE_FP32
+    format: FORMAT_NCHW
+    dims: [ 3, 224, 224 ]
+    reshape { shape: [ 1, 3, 224, 224 ] }
+  }
+]
+output [
+  {
+    name: "fc6_1"
+    data_type: TYPE_FP32
+    dims: [ 1000 ]
+    reshape { shape: [ 1, 1000, 1, 1 ] }
+    label_filename: "densenet_labels.txt"
+  }
+]
\ No newline at end of file
diff --git a/docs/examples/model_repository/resnet50_netdef/resnet50_labels.txt b/docs/examples/model_repository/densenet_onnx/densenet_labels.txt
similarity index 100%
rename from docs/examples/model_repository/resnet50_netdef/resnet50_labels.txt
rename to docs/examples/model_repository/densenet_onnx/densenet_labels.txt
diff --git a/docs/examples/model_repository/inception_graphdef/config.pbtxt b/docs/examples/model_repository/inception_graphdef/config.pbtxt
index e7a01bcb20..1636d56f77 100644
--- a/docs/examples/model_repository/inception_graphdef/config.pbtxt
+++ b/docs/examples/model_repository/inception_graphdef/config.pbtxt
@@ -17,9 +17,3 @@ output [
     label_filename: "inception_labels.txt"
   }
 ]
-instance_group [
-  {
-    kind: KIND_GPU,
-    count: 4
-  }
-]
diff --git a/docs/examples/model_repository/resnet50_netdef/config.pbtxt b/docs/examples/model_repository/resnet50_netdef/config.pbtxt
deleted file mode 100644
index 5935b6293e..0000000000
--- a/docs/examples/model_repository/resnet50_netdef/config.pbtxt
+++ /dev/null
@@ -1,25 +0,0 @@
-name: "resnet50_netdef"
-platform: "caffe2_netdef"
-max_batch_size: 128
-input [
-  {
-    name: "gpu_0/data"
-    data_type: TYPE_FP32
-    format: FORMAT_NCHW
-    dims: [ 3, 224, 224 ]
-  }
-]
-output [
-  {
-    name: "gpu_0/softmax"
-    data_type: TYPE_FP32
-    dims: [ 1000 ]
-    label_filename: "resnet50_labels.txt"
-  }
-]
-instance_group [
-  {
-    kind: KIND_GPU,
-    count: 4
-  }
-]
diff --git a/docs/examples/model_repository/simple_dyna_sequence/1/model.graphdef b/docs/examples/model_repository/simple_dyna_sequence/1/model.graphdef
new file mode 100755
index 0000000000..7dbacf70b4
Binary files /dev/null and b/docs/examples/model_repository/simple_dyna_sequence/1/model.graphdef differ
diff --git a/docs/examples/model_repository/simple_dyna_sequence/config.pbtxt b/docs/examples/model_repository/simple_dyna_sequence/config.pbtxt
new file mode 100644
index 0000000000..47889f1f7c
--- /dev/null
+++ b/docs/examples/model_repository/simple_dyna_sequence/config.pbtxt
@@ -0,0 +1,101 @@
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "simple_dyna_sequence"
+platform: "tensorflow_graphdef"
+max_batch_size: 8
+sequence_batching {
+    max_sequence_idle_microseconds: 10000000
+    oldest {
+      max_candidate_sequences: 1024
+      max_queue_delay_microseconds: 10000
+    }
+
+  control_input [
+    {
+      name: "START"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_START
+          int32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "END"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_END
+          int32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "READY"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_READY
+          int32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "CORRID"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_CORRID
+          data_type: TYPE_UINT64
+        }
+      ]
+    }
+  ]
+}
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+parameters [
+  {
+    key: "execute_delay_ms"
+    value: { string_value: "3" }
+  }
+]
+instance_group [
+  {
+    count: 2
+    kind: KIND_CPU
+  }
+]
diff --git a/docs/examples/model_repository/simple_identity/1/model.savedmodel/saved_model.pb b/docs/examples/model_repository/simple_identity/1/model.savedmodel/saved_model.pb
new file mode 100755
index 0000000000..63f78fecb4
Binary files /dev/null and b/docs/examples/model_repository/simple_identity/1/model.savedmodel/saved_model.pb differ
diff --git a/docs/examples/model_repository/simple_identity/config.pbtxt b/docs/examples/model_repository/simple_identity/config.pbtxt
new file mode 100644
index 0000000000..fa7baee9c6
--- /dev/null
+++ b/docs/examples/model_repository/simple_identity/config.pbtxt
@@ -0,0 +1,19 @@
+
+name: "simple_identity"
+platform: "tensorflow_savedmodel"
+max_batch_size: 8
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_STRING
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_STRING
+    dims: [ -1 ]
+  }
+]
diff --git a/docs/examples/model_repository/simple_int8/1/model.graphdef b/docs/examples/model_repository/simple_int8/1/model.graphdef
new file mode 100755
index 0000000000..65cbc0dcf4
--- /dev/null
+++ b/docs/examples/model_repository/simple_int8/1/model.graphdef
@@ -0,0 +1,21 @@
+
+@
+INPUT0Placeholder*
+dtype0*
+shape:���������
+@
+INPUT1Placeholder*
+dtype0*
+shape:���������
+#
+ADDAddINPUT0INPUT1*
+T0
+#
+SUBSubINPUT0INPUT1*
+T0
+!
+OUTPUT0IdentityADD*
+T0
+!
+OUTPUT1IdentitySUB*
+T0"�
\ No newline at end of file
diff --git a/docs/examples/model_repository/simple_int8/config.pbtxt b/docs/examples/model_repository/simple_int8/config.pbtxt
new file mode 100644
index 0000000000..47e3324456
--- /dev/null
+++ b/docs/examples/model_repository/simple_int8/config.pbtxt
@@ -0,0 +1,27 @@
+name: "simple_int8"
+platform: "tensorflow_graphdef"
+max_batch_size: 8
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT8
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_INT8
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT8
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_INT8
+    dims: [ 16 ]
+  }
+]
diff --git a/docs/examples/model_repository/simple_sequence/1/model.graphdef b/docs/examples/model_repository/simple_sequence/1/model.graphdef
new file mode 100755
index 0000000000..d4c4bd6031
Binary files /dev/null and b/docs/examples/model_repository/simple_sequence/1/model.graphdef differ
diff --git a/docs/examples/model_repository/simple_sequence/config.pbtxt b/docs/examples/model_repository/simple_sequence/config.pbtxt
new file mode 100644
index 0000000000..1dd5c0da7c
--- /dev/null
+++ b/docs/examples/model_repository/simple_sequence/config.pbtxt
@@ -0,0 +1,70 @@
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "simple_sequence"
+platform: "tensorflow_graphdef"
+max_batch_size: 8
+sequence_batching {
+  control_input [
+    {
+      name: "START"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_START
+          int32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "READY"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_READY
+          int32_false_true: [ 0, 1 ]
+        }
+      ]
+    }
+  ]
+}
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+instance_group [
+  {
+    kind: KIND_CPU
+  }
+]
diff --git a/docs/examples/model_repository/simple_string/1/model.graphdef b/docs/examples/model_repository/simple_string/1/model.graphdef
new file mode 100644
index 0000000000..d2d3db9180
Binary files /dev/null and b/docs/examples/model_repository/simple_string/1/model.graphdef differ
diff --git a/docs/examples/model_repository/simple_string/config.pbtxt b/docs/examples/model_repository/simple_string/config.pbtxt
new file mode 100644
index 0000000000..b01cd039b0
--- /dev/null
+++ b/docs/examples/model_repository/simple_string/config.pbtxt
@@ -0,0 +1,28 @@
+
+name: "simple_string"
+platform: "tensorflow_graphdef"
+max_batch_size: 8
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_STRING
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_STRING
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_STRING
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_STRING
+    dims: [ 16 ]
+  }
+]
diff --git a/docs/generate_docs.py b/docs/generate_docs.py
new file mode 100755
index 0000000000..339273841a
--- /dev/null
+++ b/docs/generate_docs.py
@@ -0,0 +1,417 @@
+#!/usr/bin/env python3
+
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import logging
+import os
+import re
+import subprocess
+from collections import defaultdict
+from functools import partial
+
+from conf import exclude_patterns
+
+# Global constants
+server_abspath = os.environ.get("SERVER_ABSPATH", os.getcwd())
+server_docs_abspath = os.path.join(server_abspath, "docs")
+
+"""
+TODO: Needs to handle cross-branch linkage.
+
+For example, server/docs/user_guide/architecture.md on branch 24.04 links to
+server/docs/user_guide/model_analyzer.md on main branch. In this case, the
+hyperlink of model_analyzer.md should be a URL instead of relative path.
+
+Another example can be server/docs/user_guide/model_analyzer.md on branch 24.04
+links to a file in server repo with relative path. Currently all URLs are
+hardcoded to main branch. We need to make sure that the URL actually points to the
+correct branch. We also need to handle cases like deprecated or removed files from
+older branch to avoid 404 error code.
+"""
+# Regex patterns
+http_patn = r"^https?://"
+http_reg = re.compile(http_patn)
+tag_patn = "/(?:blob|tree)/main"
+triton_repo_patn = rf"{http_patn}github.com/triton-inference-server"
+triton_github_url_reg = re.compile(
+    rf"{triton_repo_patn}/([^/#]+)(?:{tag_patn})?/*([^#]*)\s*(?=#|$)"
+)
+# relpath_patn = r"]\s*\(\s*([^)]+)\)"
+# Hyperlink in a .md file, excluding embedded images.
+hyperlink_reg = re.compile(r"((?<!\!)\[[^\]]+\]\s*\(\s*)([^)]+?)(\s*\))")
+
+# Parser
+parser = argparse.ArgumentParser(description="Process some arguments.")
+parser.add_argument(
+    "--repo-tag", action="append", help="Repository tags in format key:value"
+)
+parser.add_argument(
+    "--backend", action="append", help="Repository tags in format key:value"
+)
+parser.add_argument("--github-organization", help="GitHub organization name")
+
+
+def setup_logger():
+    """
+    This function is to setup logging
+    """
+    # Create a custom logger
+    logger = logging.getLogger(__name__)
+    # Set the log level
+    logger.setLevel(logging.INFO)
+    # Create handlers
+    file_handler = logging.FileHandler("/tmp/docs.log")
+    # Create formatters and add it to the handlers
+    formatter = logging.Formatter(
+        "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+    )
+    file_handler.setFormatter(formatter)
+    # Add handlers to the logger
+    logger.addHandler(file_handler)
+    return logger
+
+
+def log_message(message):
+    """
+    This function is for logging to /tmp
+    - message: Message to log
+    """
+    # Setup the logger
+    logger = setup_logger()
+    # Log the message
+    logger.info(message)
+
+
+def run_command(command):
+    """
+    This function runs any command using subprocess and logs failures
+    - command: Command to execute
+    """
+    log_message(f"Running command: {command}")
+    try:
+        subprocess.run(
+            command,
+            shell=True,
+            check=True,
+            text=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+        )
+    except subprocess.CalledProcessError as e:
+        log_message(f"Error executing command: {e.cmd}")
+        log_message(e.output)
+        log_message(e.stderr)
+
+
+def clone_from_github(repo, tag, org):
+    """
+    This function clones from github, in-sync with build.py
+    - repo: Repo Name
+    - tag: Tag Name
+    - org: Org Name
+    """
+    # Construct the full GitHub repository URL
+    repo_url = f"https://github.com/{org}/{repo}.git"
+    print(repo_url)
+    # Construct the git clone command
+    if tag:
+        clone_command = [
+            "git",
+            "clone",
+            "--branch",
+            tag[0],
+            "--single-branch",
+            repo_url,
+        ]
+    else:
+        clone_command = ["git", "clone", repo_url]
+    # Execute the git clone command
+    try:
+        subprocess.run(clone_command, check=True)
+        log_message(f"Successfully cloned {repo}")
+    except subprocess.CalledProcessError as e:
+        log_message(f"Failed to clone {repo}. Error: {e}")
+
+
+def parse_repo_tag(repo_tags):
+    repo_dict = defaultdict(list)
+    for tag in repo_tags:
+        key, value = tag.split(":", 1)
+        repo_dict[key].append(value)
+    return dict(repo_dict)
+
+
+def is_excluded(file_path):
+    for exclude_pattern in exclude_patterns:
+        file_abspath = os.path.abspath(file_path)
+        exclude_pattern = os.path.abspath(exclude_pattern)
+        if os.path.commonpath([file_abspath, exclude_pattern]) == exclude_pattern:
+            return True
+    return False
+
+
+# Return the Git repo name of given file path
+def get_git_repo_name(file_path):
+    # Execute git command to get remote URL
+    try:
+        # Get the directory containing the file
+        directory = os.path.dirname(file_path)
+        # Execute git command with the file's directory as the cwd
+        remote_url = (
+            subprocess.check_output(
+                ["git", "-C", directory, "remote", "get-url", "origin"]
+            )
+            .decode()
+            .strip()
+        )
+    except subprocess.CalledProcessError:
+        return None
+
+    # Extract repository name from the remote URL.
+    if remote_url.endswith(".git"):
+        # Remove '.git' extension.
+        remote_url = remote_url[:-4]
+    repo_name = os.path.basename(remote_url)
+    return repo_name
+
+
+def replace_url_with_relpath(url, src_doc_path):
+    """
+    This function replaces Triton Inference Server GitHub URLs with relative paths in following cases.
+    1. URL is a doc file, e.g. ".md" file.
+    2. URL is a directory which contains README.md and URL ends with "#<section>".
+
+    Examples:
+        https://github.com/triton-inference-server/server/blob/main/docs/protocol#restricted-protocols
+        https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_shared_memory.md
+        https://github.com/triton-inference-server/server/blob/main/docs/user_guide/model_configuration.md#dynamic-batcher
+
+    Keep URL in the following cases:
+        https://github.com/triton-inference-server/server/tree/r24.02
+        https://github.com/triton-inference-server/server/blob/main/build.py
+        https://github.com/triton-inference-server/server/blob/main/qa
+        https://github.com/triton-inference-server/server/blob/main/CONTRIBUTING.md
+    """
+    m = triton_github_url_reg.match(url)
+    # Do not replace URL if it is not a Triton GitHub file.
+    if not m:
+        return url
+
+    target_repo_name = m.group(1)
+    target_relpath_from_target_repo = os.path.normpath(m.groups("")[1])
+    section = url[len(m.group(0)) :]
+    valid_hashtag = section not in ["", "#"] and section.startswith("#")
+
+    if target_repo_name == "server":
+        target_path = os.path.join(server_abspath, target_relpath_from_target_repo)
+    else:
+        target_path = os.path.join(
+            server_docs_abspath, target_repo_name, target_relpath_from_target_repo
+        )
+
+    # Return URL if it points to a path outside server/docs.
+    if os.path.commonpath([server_docs_abspath, target_path]) != server_docs_abspath:
+        return url
+
+    if (
+        os.path.isfile(target_path)
+        and os.path.splitext(target_path)[1] == ".md"
+        and not is_excluded(target_path)
+    ):
+        pass
+    elif (
+        os.path.isdir(target_path)
+        and os.path.isfile(os.path.join(target_path, "README.md"))
+        and valid_hashtag
+        and not is_excluded(os.path.join(target_path, "README.md"))
+    ):
+        target_path = os.path.join(target_path, "README.md")
+    else:
+        return url
+
+    # The "target_path" must be a file at this line.
+    relpath = os.path.relpath(target_path, start=os.path.dirname(src_doc_path))
+    return re.sub(triton_github_url_reg, relpath, url, 1)
+
+
+def replace_relpath_with_url(relpath, src_doc_path):
+    """
+    This function replaces relative paths with Triton Inference Server GitHub URLs in following cases.
+    1. Relative path is a file that is not ".md" type inside the current repo.
+    2. Relative path is a directory but not (has "README.md" and ends with "#<section>").
+    3. Relative path does not exist (shows 404 page).
+
+    Examples:
+        ../examples/model_repository
+        ../examples/model_repository/inception_graphdef/config.pbtxt
+
+    Keep relpath in the following cases:
+        build.md
+        build.md#building-with-docker
+        #building-with-docker
+        ../getting_started/quickstart.md
+        ../protocol#restricted-protocols
+    """
+    target_path = relpath.rsplit("#")[0]
+    section = relpath[len(target_path) :]
+    valid_hashtag = section not in ["", "#"]
+    if relpath.startswith("#"):
+        target_path = os.path.basename(src_doc_path)
+    target_path = os.path.join(os.path.dirname(src_doc_path), target_path)
+    target_path = os.path.normpath(target_path)
+    src_git_repo_name = get_git_repo_name(src_doc_path)
+
+    url = f"https://github.com/triton-inference-server/{src_git_repo_name}/blob/main/"
+    if src_git_repo_name == "server":
+        src_repo_abspath = server_abspath
+        # TODO: Assert the relative path not pointing to cloned repo, e.g. client.
+        # This requires more information which may be stored in a global variable.
+    else:
+        src_repo_abspath = os.path.join(server_docs_abspath, src_git_repo_name)
+
+    # Assert target path is under the current repo directory.
+    assert os.path.commonpath([src_repo_abspath, target_path]) == src_repo_abspath
+
+    target_path_from_src_repo = os.path.relpath(target_path, start=src_repo_abspath)
+
+    # For example, target_path of "../protocol#restricted-protocols" should be "<path-to-server>/server/docs/protocol/README.md"
+    if (
+        os.path.isdir(target_path)
+        and valid_hashtag
+        and os.path.isfile(os.path.join(target_path, "README.md"))
+    ):
+        relpath = os.path.join(relpath.rsplit("#")[0], "README.md") + section
+        target_path = os.path.join(target_path, "README.md")
+
+    if (
+        os.path.isfile(target_path)
+        and os.path.splitext(target_path)[1] == ".md"
+        and os.path.commonpath([server_docs_abspath, target_path])
+        == server_docs_abspath
+        and not is_excluded(target_path)
+    ):
+        return relpath
+    else:
+        return url + target_path_from_src_repo + section
+
+
+def replace_hyperlink(m, src_doc_path):
+    """
+    TODO: Support of HTML tags for future docs.
+    Markdown allows <link>, e.g. <a href=[^>]+>. Whether we want to
+    find and replace the link depends on if they link to internal .md files
+    or allows relative paths. I haven't seen one such case in our doc so
+    should be safe for now.
+    """
+
+    hyperlink_str = m.group(2)
+    match = http_reg.match(hyperlink_str)
+
+    if match:
+        # Hyperlink is a URL.
+        res = replace_url_with_relpath(hyperlink_str, src_doc_path)
+    else:
+        # Hyperlink is a relative path.
+        res = replace_relpath_with_url(hyperlink_str, src_doc_path)
+
+    return m.group(1) + res + m.group(3)
+
+
+def preprocess_docs(exclude_paths=[]):
+    # Find all ".md" files inside the current repo.
+    if exclude_paths:
+        cmd = (
+            ["find", server_docs_abspath, "-type", "d", "\\("]
+            + " -o ".join([f"-path './{dir}'" for dir in exclude_paths]).split(" ")
+            + ["\\)", "-prune", "-o", "-type", "f", "-name", "'*.md'", "-print"]
+        )
+    else:
+        cmd = ["find", server_docs_abspath, "-name", "'*.md'"]
+    cmd = " ".join(cmd)
+    result = subprocess.run(cmd, check=True, capture_output=True, text=True, shell=True)
+    docs_list = list(filter(None, result.stdout.split("\n")))
+
+    # Read, preprocess and write back to each document file.
+    for doc_abspath in docs_list:
+        if is_excluded(doc_abspath):
+            continue
+
+        content = None
+        with open(doc_abspath, "r") as f:
+            content = f.read()
+
+        content = hyperlink_reg.sub(
+            partial(replace_hyperlink, src_doc_path=doc_abspath),
+            content,
+        )
+
+        with open(doc_abspath, "w") as f:
+            f.write(content)
+
+
+def main():
+    args = parser.parse_args()
+    repo_tags = parse_repo_tag(args.repo_tag) if args.repo_tag else {}
+    backend_tags = parse_repo_tag(args.backend) if args.backend else {}
+    github_org = args.github_organization
+
+    # Change working directory to server/docs.
+    os.chdir(server_docs_abspath)
+    run_command("make clean")
+
+    # Usage generate_docs.py --repo-tag=client:main
+    if "client" in repo_tags:
+        clone_from_github("client", repo_tags["client"], github_org)
+
+    # Usage generate_docs.py --repo-tag=python_backend:main
+    if "python_backend" in repo_tags:
+        clone_from_github("python_backend", repo_tags["python_backend"], github_org)
+
+    # Usage generate_docs.py --backend-tag=custom_backend:main
+    # Custom backend can be anything currently empty
+    if "custom_backend" in backend_tags:
+        clone_from_github("custom_backend", backend_tags["custom_backend"], github_org)
+
+    # Preprocess documents in server_docs_abspath after all repos are cloned.
+    preprocess_docs()
+    run_command("make html")
+
+    # Clean up working directory.
+    if "client" in repo_tags:
+        run_command("rm -rf client")
+    if "python_backend" in repo_tags:
+        run_command("rm -rf python_backend")
+    if "custom_backend" in backend_tags:
+        run_command("rm -rf custom_backend")
+
+    # Return to previous working directory server/.
+    os.chdir(server_abspath)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/docs/getting_started/quickstart.md b/docs/getting_started/quickstart.md
new file mode 100644
index 0000000000..1d475e771e
--- /dev/null
+++ b/docs/getting_started/quickstart.md
@@ -0,0 +1,164 @@
+<!--
+# Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Quickstart
+
+**New to Triton Inference Server and want do just deploy your model quickly?**
+Make use of
+[these tutorials](https://github.com/triton-inference-server/tutorials#quick-deploy)
+ to begin your Triton journey!
+
+The Triton Inference Server is available as [buildable source
+  code](../customization_guide/build.md), but the easiest way to install and run Triton is to
+  use the pre-built Docker image available from the [NVIDIA GPU
+  Cloud (NGC)](https://ngc.nvidia.com).
+
+Launching and maintaining Triton Inference Server revolves around the use of building model repositories. This tutorial will cover:
+
+* Creating a Model Repository
+* Launching Triton
+* Send an Inference Request
+
+## Create A Model Repository
+
+The [model repository](../user_guide/model_repository.md) is the directory where you
+place the models that you want Triton to serve. An example model
+repository is included in the
+[docs/examples/model_repository](../examples/model_repository).
+Before using the repository, you must fetch any missing model definition
+files from their public model zoos via the provided script.
+
+```
+$ cd docs/examples
+$ ./fetch_models.sh
+```
+
+## Launch Triton
+
+Triton is optimized to provide the best inferencing performance by
+using GPUs, but it can also work on CPU-only systems. In both cases
+you can use the same Triton Docker image.
+
+### Run on System with GPUs
+
+Use the following command to run Triton with the example model
+repository you just created. The [NVIDIA Container
+Toolkit](https://github.com/NVIDIA/nvidia-docker) must be installed
+for Docker to recognize the GPU(s). The --gpus=1 flag indicates that 1
+system GPU should be made available to Triton for inferencing.
+
+```
+$ docker run --gpus=1 --rm -p8000:8000 -p8001:8001 -p8002:8002 -v/full/path/to/docs/examples/model_repository:/models nvcr.io/nvidia/tritonserver:<xx.yy>-py3 tritonserver --model-repository=/models
+```
+
+Where \<xx.yy\> is the version of Triton that you want to use (and
+pulled above). After you start Triton you will see output on the
+console showing the server starting up and loading the model. When you
+see output like the following, Triton is ready to accept inference
+requests.
+
+```
++----------------------+---------+--------+
+| Model                | Version | Status |
++----------------------+---------+--------+
+| <model_name>         | <v>     | READY  |
+| ..                   | .       | ..     |
+| ..                   | .       | ..     |
++----------------------+---------+--------+
+...
+...
+...
+I1002 21:58:57.891440 62 grpc_server.cc:3914] Started GRPCInferenceService at 0.0.0.0:8001
+I1002 21:58:57.893177 62 http_server.cc:2717] Started HTTPService at 0.0.0.0:8000
+I1002 21:58:57.935518 62 http_server.cc:2736] Started Metrics Service at 0.0.0.0:8002
+```
+All the models should show "READY" status to indicate that they loaded correctly. If a model fails to load the status will report the failure and a reason for the failure. If your model is not displayed in the table check the path to the model repository and your CUDA drivers.
+
+### Run on CPU-Only System
+
+On a system without GPUs, Triton should be run without using the
+--gpus flag to Docker, but is otherwise identical to what is described
+above.
+
+```
+$ docker run --rm -p8000:8000 -p8001:8001 -p8002:8002 -v/full/path/to/docs/examples/model_repository:/models nvcr.io/nvidia/tritonserver:<xx.yy>-py3 tritonserver --model-repository=/models
+```
+
+Because the --gpus flag is not used, a GPU is not available and Triton
+will therefore be unable to load any model configuration that requires
+a GPU.
+
+### Verify Triton Is Running Correctly
+
+Use Triton’s *ready* endpoint to verify that the server and the models
+are ready for inference. From the host system use curl to access the
+HTTP endpoint that indicates server status.
+
+```
+$ curl -v localhost:8000/v2/health/ready
+...
+< HTTP/1.1 200 OK
+< Content-Length: 0
+< Content-Type: text/plain
+```
+
+The HTTP request returns status 200 if Triton is ready and non-200 if
+it is not ready.
+
+## Send an Inference Request
+
+Use docker pull to get the client libraries and examples image
+from NGC.
+
+```
+$ docker pull nvcr.io/nvidia/tritonserver:<xx.yy>-py3-sdk
+```
+
+Where \<xx.yy\> is the version that you want to pull. Run the client
+image.
+
+```
+$ docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:<xx.yy>-py3-sdk
+```
+
+From within the nvcr.io/nvidia/tritonserver:<xx.yy>-py3-sdk
+image, run the example image-client application to perform image
+classification using the example densenet_onnx model.
+
+To send a request for the densenet_onnx model use an image from the
+/workspace/images directory. In this case we ask for the top 3
+classifications.
+
+```
+$ /workspace/install/bin/image_client -m densenet_onnx -c 3 -s INCEPTION /workspace/images/mug.jpg
+Request 0, batch size 1
+Image '/workspace/images/mug.jpg':
+    15.346230 (504) = COFFEE MUG
+    13.224326 (968) = CUP
+    10.422965 (505) = COFFEEPOT
+```
diff --git a/docs/http_grpc_api.rst b/docs/http_grpc_api.rst
deleted file mode 100644
index 1f5aba85ce..0000000000
--- a/docs/http_grpc_api.rst
+++ /dev/null
@@ -1,180 +0,0 @@
-..
-  # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-  #
-  # Redistribution and use in source and binary forms, with or without
-  # modification, are permitted provided that the following conditions
-  # are met:
-  #  * Redistributions of source code must retain the above copyright
-  #    notice, this list of conditions and the following disclaimer.
-  #  * Redistributions in binary form must reproduce the above copyright
-  #    notice, this list of conditions and the following disclaimer in the
-  #    documentation and/or other materials provided with the distribution.
-  #  * Neither the name of NVIDIA CORPORATION nor the names of its
-  #    contributors may be used to endorse or promote products derived
-  #    from this software without specific prior written permission.
-  #
-  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-  # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-  # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-  # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-  # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-  # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-  # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-  # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-  # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-.. _section-inference-server-api:
-
-Inference Server API
-====================
-
-The TensorRT Inference Server exposes both HTTP and GRPC
-endpoints. Three endpoints with identical functionality are exposed
-for each protocol.
-
-* :ref:`section-api-health`: The server health API for determining
-  server liveness and readiness.
-
-* :ref:`section-api-status`: The server status API for getting
-  information about the server and about the models being served.
-
-* :ref:`section-api-inference`: The inference API that accepts model
-  inputs, runs inference and returns the requested outputs.
-
-The HTTP endpoints can be used directly as described in this section,
-but for most use-cases, the preferred way to access TRTIS is via the
-`C++ and Python Client libraries
-<section-client-libraries-and-examples>`.
-
-The GRPC endpoints can also be used via the `C++ and Python Client
-libraries <section-client-libraries-and-examples>` or a GRPC-generated
-API can be used directly as shown in the grpc_image_client.py example.
-
-.. _section-api-health:
-
-Health
-------
-
-Performing an HTTP GET to /api/health/live returns a 200 status if the
-server is able to receive and process requests. Any other status code
-indicates that the server is still initializing or has failed in some
-way that prevents it from processing requests.
-
-Once the liveness endpoint indicates that the server is active,
-performing an HTTP GET to /api/health/ready returns a 200 status if
-the server is able to respond to inference requests for some or all
-models (based on TRTIS's -\\-strict-readiness option explained
-below). Any other status code indicates that the server is not ready
-to respond to some or all inference requests.
-
-For GRPC the :cpp:var:`GRPCService
-<nvidia::inferenceserver::GRPCService>` uses the
-:cpp:var:`HealthRequest <nvidia::inferenceserver::HealthRequest>` and
-:cpp:var:`HealthResponse <nvidia::inferenceserver::HealthResponse>`
-messages to implement the endpoint.
-
-By default, the readiness endpoint will return success if the server
-is responsive and all models loaded successfully. Thus, by default,
-success indicates that an inference request for any model can be
-handled by the server. For some use cases, you want the readiness
-endpoint to return success even if all models are not available. In
-this case, use the -\\-strict-readiness=false option to cause the
-readiness endpoint to report success as long as the server is
-responsive (even if one or more models are not available).
-
-.. _section-api-status:
-
-Status
-------
-
-Performing an HTTP GET to /api/status returns status information about
-the server and all the models being served. Performing an HTTP GET to
-/api/status/<model name> returns information about the server and the
-single model specified by <model name>. The server status is returned
-in the HTTP response body in either text format (the default) or in
-binary format if query parameter format=binary is specified (for
-example, /api/status?format=binary). The success or failure of the
-status request is indicated in the HTTP response code and the
-**NV-Status** response header. The **NV-Status** response header
-returns a text protobuf formatted :cpp:var:`RequestStatus
-<nvidia::inferenceserver::RequestStatus>` message.
-
-For GRPC the :cpp:var:`GRPCService
-<nvidia::inferenceserver::GRPCService>` uses the
-:cpp:var:`StatusRequest <nvidia::inferenceserver::StatusRequest>` and
-:cpp:var:`StatusResponse <nvidia::inferenceserver::StatusResponse>`
-messages to implement the endpoint. The response includes a
-:cpp:var:`RequestStatus <nvidia::inferenceserver::RequestStatus>`
-message indicating success or failure.
-
-For either protocol the status itself is returned as a
-:cpp:var:`ServerStatus <nvidia::inferenceserver::ServerStatus>`
-message.
-
-.. _section-api-inference:
-
-Inference
----------
-
-Performing an HTTP POST to /api/infer/<model name> performs inference
-using the latest version of the model that is being made available by
-the model's :ref:`version policy <section-version-policy>`. The latest
-version is the numerically greatest version number. Performing an HTTP
-POST to /api/infer/<model name>/<model version> performs inference
-using a specific version of the model.
-
-The request uses the **NV-InferRequest** header to communicate an
-:cpp:var:`InferRequestHeader
-<nvidia::inferenceserver::InferRequestHeader>` message that describes
-the input tensors and the requested output tensors. For example, for a
-resnet50 model the following **NV-InferRequest** header indicates that
-a batch-size 1 request is being made with input size of 602112 bytes
-(3 * 224 * 224 * sizeof(FP32)), and that the result of the tensor
-named "output" should be returned as the top-3 classification values::
-
-  NV-InferRequest: batch_size: 1 input { name: "input" byte_size: 602112 } output { name: "output" byte_size: 4000 cls { count: 3 } }
-
-The input tensor values are communicated in the body of the HTTP POST
-request as raw binary in the order as the inputs are listed in the
-request header.
-
-The inference results are returned in the body of the HTTP response to
-the POST request. For outputs where full result tensors were
-requested, the result values are communicated in the body of the
-response in the order as the outputs are listed in the request
-header. After those, an :cpp:var:`InferResponseHeader
-<nvidia::inferenceserver::InferResponseHeader>` message is appended to
-the response body. The :cpp:var:`InferResponseHeader
-<nvidia::inferenceserver::InferResponseHeader>` message is returned in
-either text format (the default) or in binary format if query
-parameter format=binary is specified (for example,
-/api/infer/foo?format=binary).
-
-For example, assuming outputs specified in the
-:cpp:var:`InferResponseHeader
-<nvidia::inferenceserver::InferResponseHeader>` in order are
-“output0”, “output1”, …, “outputn”, the response body would contain::
-
-  <raw binary tensor values for output0, if raw output was requested for output0>
-  <raw binary tensor values for output1, if raw output was requested for output1>
-  ...
-  <raw binary tensor values for outputn, if raw output was requested for outputn>
-  <text or binary encoded InferResponseHeader proto>
-
-The success or failure of the inference request is indicated in the
-HTTP response code and the **NV-Status** response header. The
-**NV-Status** response header returns a text protobuf formatted
-:cpp:var:`RequestStatus <nvidia::inferenceserver::RequestStatus>`
-message.
-
-For GRPC the :cpp:var:`GRPCService
-<nvidia::inferenceserver::GRPCService>` uses the
-:cpp:var:`InferRequest <nvidia::inferenceserver::InferRequest>` and
-:cpp:var:`InferResponse <nvidia::inferenceserver::InferResponse>`
-messages to implement the endpoint. The response includes a
-:cpp:var:`RequestStatus <nvidia::inferenceserver::RequestStatus>`
-message indicating success or failure, :cpp:var:`InferResponseHeader
-<nvidia::inferenceserver::InferResponseHeader>` message giving
-response meta-data, and the raw output tensors.
diff --git a/docs/images/arch.png b/docs/images/arch.png
deleted file mode 100644
index 949d56ab3a..0000000000
Binary files a/docs/images/arch.png and /dev/null differ
diff --git a/docs/images/cuda_stream_exec.png b/docs/images/cuda_stream_exec.png
deleted file mode 100644
index f9f152c195..0000000000
Binary files a/docs/images/cuda_stream_exec.png and /dev/null differ
diff --git a/docs/images/multi_model_exec.png b/docs/images/multi_model_exec.png
deleted file mode 100644
index b77e3a1117..0000000000
Binary files a/docs/images/multi_model_exec.png and /dev/null differ
diff --git a/docs/images/multi_model_parallel_exec.png b/docs/images/multi_model_parallel_exec.png
deleted file mode 100644
index 53e6647195..0000000000
Binary files a/docs/images/multi_model_parallel_exec.png and /dev/null differ
diff --git a/docs/images/multi_model_serial_exec.png b/docs/images/multi_model_serial_exec.png
deleted file mode 100644
index 1b6532a920..0000000000
Binary files a/docs/images/multi_model_serial_exec.png and /dev/null differ
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 0000000000..6d42750eaa
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,106 @@
+<!--
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+::::{grid}
+:reverse:
+:gutter: 2 1 1 1
+:margin: 4 4 1 1
+
+:::{grid-item}
+:columns: 4
+
+```{image} ./_static/nvidia-logo-vert-rgb-blk-for-screen.png
+:width: 300px
+```
+:::
+:::{grid-item}
+:columns: 8
+:class: sd-fs-3
+
+NVIDIA Triton Inference Server
+
+:::
+::::
+
+Triton Inference Server is an open source inference serving software that streamlines AI inferencing.
+
+  <!-- :::
+  :align: center
+  [![Getting Started Video](https://img.youtube.com/vi/NQDtfSi5QF4/1.jpg)](https://www.youtube.com/watch?v=NQDtfSi5QF4)
+  ::: -->
+
+<div>
+<iframe width="560" height="315" src="https://www.youtube.com/embed/NQDtfSi5QF4" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
+</div>
+
+# Triton Inference Server
+
+Triton Inference Server enables teams to deploy any AI model from multiple deep
+learning and machine learning frameworks, including TensorRT, TensorFlow,
+PyTorch, ONNX, OpenVINO, Python, RAPIDS FIL, and more. Triton supports inference
+across cloud, data center, edge and embedded devices on NVIDIA GPUs, x86 and ARM
+CPU, or AWS Inferentia. Triton Inference Server delivers optimized performance
+for many query types, including real time, batched, ensembles and audio/video
+streaming. Triton inference Server is part of
+[NVIDIA AI Enterprise](https://www.nvidia.com/en-us/data-center/products/ai-enterprise/),
+a software platform that accelerates the data science pipeline and streamlines
+the development and deployment of production AI.
+
+Major features include:
+
+- [Supports multiple deep learning
+  frameworks](https://github.com/triton-inference-server/backend#where-can-i-find-all-the-backends-that-are-available-for-triton)
+- [Supports multiple machine learning
+  frameworks](https://github.com/triton-inference-server/fil_backend)
+- [Concurrent model
+  execution](user_guide/architecture.md#concurrent-model-execution)
+- [Dynamic batching](user_guide/model_configuration.md#dynamic-batcher)
+- [Sequence batching](user_guide/model_configuration.md#sequence-batcher) and
+  [implicit state management](user_guide/architecture.md#implicit-state-management)
+  for stateful models
+- Provides [Backend API](https://github.com/triton-inference-server/backend) that
+  allows adding custom backends and pre/post processing operations
+- Model pipelines using
+  [Ensembling](user_guide/architecture.md#ensemble-models) or [Business
+  Logic Scripting
+  (BLS)](https://github.com/triton-inference-server/python_backend#business-logic-scripting)
+- [HTTP/REST and GRPC inference
+  protocols](customization_guide/inference_protocols.md) based on the community
+  developed [KServe
+  protocol](https://github.com/kserve/kserve/tree/master/docs/predict-api/v2)
+- A [C API](customization_guide/inference_protocols.md#in-process-triton-server-api) and
+  [Java API](customization_guide/inference_protocols.md#java-bindings-for-in-process-triton-server-api)
+  allow Triton to link directly into your application for edge and other in-process use cases
+- [Metrics](user_guide/metrics.md) indicating GPU utilization, server
+  throughput, server latency, and more
+
+Join the [Triton and TensorRT community](https://www.nvidia.com/en-us/deep-learning-ai/triton-tensorrt-newsletter/) and stay current on the latest product updates, bug fixes, content, best
+practices, and more. Need enterprise support? NVIDIA global support is available
+for Triton Inference Server with the [NVIDIA AI Enterprise software suite](https://www.nvidia.com/en-us/data-center/products/ai-enterprise/).
+
+See the [Latest Release Notes](https://docs.nvidia.com/deeplearning/triton-inference-server/release-notes/rel-23-05.html#rel-23-05) for updates on the newest features and bug fixes.
diff --git a/docs/index.rst b/docs/index.rst
deleted file mode 100644
index cb2e55c992..0000000000
--- a/docs/index.rst
+++ /dev/null
@@ -1,82 +0,0 @@
-..
-  # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-  #
-  # Redistribution and use in source and binary forms, with or without
-  # modification, are permitted provided that the following conditions
-  # are met:
-  #  * Redistributions of source code must retain the above copyright
-  #    notice, this list of conditions and the following disclaimer.
-  #  * Redistributions in binary form must reproduce the above copyright
-  #    notice, this list of conditions and the following disclaimer in the
-  #    documentation and/or other materials provided with the distribution.
-  #  * Neither the name of NVIDIA CORPORATION nor the names of its
-  #    contributors may be used to endorse or promote products derived
-  #    from this software without specific prior written permission.
-  #
-  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-  # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-  # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-  # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-  # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-  # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-  # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-  # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-  # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-NVIDIA TensorRT Inference Server
-================================
-
-.. ifconfig:: "dev" in release
-
-   .. warning::
-     You are currently viewing unstable developer preview
-     of the documentation. To see the documentation for the latest
-     stable release click `here
-     <https://docs.nvidia.com/deeplearning/sdk/tensorrt-inference-server-guide/docs/index.html>`_.
-
-.. include:: ../README.rst
-   :start-after: overview-begin-marker-do-not-remove
-   :end-before: overview-end-marker-do-not-remove
-
-.. toctree::
-   :hidden:
-
-   Documentation home <self>
-
-.. toctree::
-   :maxdepth: 2
-   :caption: User Guide
-
-   quickstart
-   install
-   run
-   client
-   model_repository
-   model_configuration
-   http_grpc_api
-   metrics
-
-.. toctree::
-   :maxdepth: 2
-   :caption: Developer Guide
-
-   architecture
-   contribute
-   build
-   test
-
-.. toctree::
-   :maxdepth: 2
-   :caption: API Reference
-
-   protobuf_api/protobuf_api_root
-   cpp_api/cpp_api_root
-   python_api
-
-
-Indices and tables
-==================
-
-* :ref:`genindex`
diff --git a/docs/install.rst b/docs/install.rst
deleted file mode 100644
index ff7f56b497..0000000000
--- a/docs/install.rst
+++ /dev/null
@@ -1,59 +0,0 @@
-..
-  # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-  #
-  # Redistribution and use in source and binary forms, with or without
-  # modification, are permitted provided that the following conditions
-  # are met:
-  #  * Redistributions of source code must retain the above copyright
-  #    notice, this list of conditions and the following disclaimer.
-  #  * Redistributions in binary form must reproduce the above copyright
-  #    notice, this list of conditions and the following disclaimer in the
-  #    documentation and/or other materials provided with the distribution.
-  #  * Neither the name of NVIDIA CORPORATION nor the names of its
-  #    contributors may be used to endorse or promote products derived
-  #    from this software without specific prior written permission.
-  #
-  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-  # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-  # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-  # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-  # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-  # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-  # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-  # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-  # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-Installing the Server
-=====================
-
-The TensorRT Inference Server is available as a pre-built Docker
-container or you can :ref:`build it from source
-<section-building-the-server>`.
-
-.. _section-installing-prebuilt-containers:
-
-Installing Prebuilt Containers
-------------------------------
-
-The inference server is provided as a pre-built container on the
-`NVIDIA GPU Cloud (NGC) <https://ngc.nvidia.com>`_.  Before pulling the
-container you must have access and be logged into the NGC container
-registry as explained in the `NGC Getting Started Guide
-<http://docs.nvidia.com/ngc/ngc-getting-started-guide/index.html>`_.
-
-Before you can pull a container from the NGC container registry, you
-must have Docker and nvidia-docker installed. For DGX users, this is
-explained in `Preparing to use NVIDIA Containers Getting Started Guide
-<http://docs.nvidia.com/deeplearning/dgx/preparing-containers/index.html>`_.
-For users other than DGX, follow the `nvidia-docker installation
-documentation <https://github.com/NVIDIA/nvidia-docker>`_ to install
-the most recent version of CUDA, Docker, and nvidia-docker.
-
-After performing the above setup, you can pull the TensorRT Inference
-Server container using the following command::
-
-  docker pull nvcr.io/nvidia/tensorrtserver:18.11-py3
-
-Replace *18.11* with the version of TRTIS that you want to pull.
diff --git a/docs/metrics.rst b/docs/metrics.rst
deleted file mode 100644
index cc26f7e8ea..0000000000
--- a/docs/metrics.rst
+++ /dev/null
@@ -1,93 +0,0 @@
-..
-  # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-  #
-  # Redistribution and use in source and binary forms, with or without
-  # modification, are permitted provided that the following conditions
-  # are met:
-  #  * Redistributions of source code must retain the above copyright
-  #    notice, this list of conditions and the following disclaimer.
-  #  * Redistributions in binary form must reproduce the above copyright
-  #    notice, this list of conditions and the following disclaimer in the
-  #    documentation and/or other materials provided with the distribution.
-  #  * Neither the name of NVIDIA CORPORATION nor the names of its
-  #    contributors may be used to endorse or promote products derived
-  #    from this software without specific prior written permission.
-  #
-  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-  # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-  # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-  # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-  # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-  # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-  # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-  # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-  # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-.. _section-metrics:
-
-Metrics
-=======
-
-The TensorRT Inference server provides `Prometheus
-<https://prometheus.io/>`_ metrics indicating GPU and request
-statistics. By default, these metrics are available at
-http://localhost:8002/metrics. The TRTIS -\\-metrics-port option can
-be used to select a different port. The following table describes the
-available metrics.
-
-+--------------+----------------+---------------------------------------+-----------+-----------+
-|Category      |Metric          |Description                            |Granularity|Frequency  |
-|              |                |                                       |           |           |
-+==============+================+=======================================+===========+===========+
-|| GPU         |Power Usage     |GPU instantaneous power                |Per GPU    |Per second |
-|| Utilization |                |                                       |           |           |
-|              |                |                                       |           |           |
-+              +----------------+---------------------------------------+-----------+-----------+
-|              |Power Limit     |Maximum GPU power limit                |Per GPU    |Per second |
-|              |                |                                       |           |           |
-+              +----------------+---------------------------------------+-----------+-----------+
-|              || Energy        || GPU energy consumption in joules     |Per GPU    |Per second |
-|              || Consumption   || since the server started             |           |           |
-+              +----------------+---------------------------------------+-----------+-----------+
-|              |GPU Utilization || GPU utilization rate                 |Per GPU    |Per second |
-|              |                || (0.0 - 1.0)                          |           |           |
-+--------------+----------------+---------------------------------------+-----------+-----------+
-|| GPU         || GPU Total     || Total GPU memory, in bytes           |Per GPU    |Per second |
-|| Memory      || Memory        |                                       |           |           |
-+              +----------------+---------------------------------------+-----------+-----------+
-|              || GPU Used      || Used GPU memory, in bytes            |Per GPU    |Per second |
-|              || Memory        |                                       |           |           |
-+--------------+----------------+---------------------------------------+-----------+-----------+
-|Count         |Request Count   || Number of inference requests         |Per model  |Per request|
-|              |                |                                       |           |           |
-|              |                |                                       |           |           |
-|              |                |                                       |           |           |
-+              +----------------+---------------------------------------+-----------+-----------+
-|              |Execution Count || Number of inference executions       |Per model  |Per request|
-|              |                || (request count / execution count     |           |           |
-|              |                || = average dynamic batch size)        |           |           |
-|              |                |                                       |           |           |
-+              +----------------+---------------------------------------+-----------+-----------+
-|              |Inference Count || Number of inferences performed       |Per model  |Per request|
-|              |                || (one request counts as               |           |           |
-|              |                || "batch size" inferences)             |           |           |
-|              |                |                                       |           |           |
-+--------------+----------------+---------------------------------------+-----------+-----------+
-|Latency       |Request Time    || End-to-end inference request         |Per model  |Per request|
-|              |                || handling time                        |           |           |
-|              |                |                                       |           |           |
-|              |                |                                       |           |           |
-+              +----------------+---------------------------------------+-----------+-----------+
-|              |Compute Time    || Time a request spends executing      |Per model  |Per request|
-|              |                || the inference model (in the          |           |           |
-|              |                || framework backend)                   |           |           |
-|              |                |                                       |           |           |
-+              +----------------+---------------------------------------+-----------+-----------+
-|              |Queue Time      || Time a request spends waiting        |Per model  |Per request|
-|              |                || in the queue                         |           |           |
-|              |                |                                       |           |           |
-|              |                |                                       |           |           |
-|              |                |                                       |           |           |
-+--------------+----------------+---------------------------------------+-----------+-----------+
diff --git a/docs/model_configuration.rst b/docs/model_configuration.rst
deleted file mode 100644
index d038788e12..0000000000
--- a/docs/model_configuration.rst
+++ /dev/null
@@ -1,305 +0,0 @@
-..
-  # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-  #
-  # Redistribution and use in source and binary forms, with or without
-  # modification, are permitted provided that the following conditions
-  # are met:
-  #  * Redistributions of source code must retain the above copyright
-  #    notice, this list of conditions and the following disclaimer.
-  #  * Redistributions in binary form must reproduce the above copyright
-  #    notice, this list of conditions and the following disclaimer in the
-  #    documentation and/or other materials provided with the distribution.
-  #  * Neither the name of NVIDIA CORPORATION nor the names of its
-  #    contributors may be used to endorse or promote products derived
-  #    from this software without specific prior written permission.
-  #
-  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-  # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-  # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-  # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-  # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-  # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-  # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-  # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-  # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-.. _section-model-configuration:
-
-Model Configuration
-===================
-
-Each model in a :ref:`section-model-repository` must include a model
-configuration that provides required and optional information about
-the model. Typically, this configuration is provided in a config.pbtxt
-file specified as :doc:`ModelConfig <protobuf_api/model_config.proto>`
-protobuf. In some cases, discussed in
-:ref:`section-generated-model-configuration`, the model configuration
-can be generated automatically by TRTIS and so does not need to be
-provided explicitly.
-
-A minimal model configuration must specify :cpp:var:`name
-<nvidia::inferenceserver::ModelConfig::name>`, :cpp:var:`platform
-<nvidia::inferenceserver::ModelConfig::platform>`,
-:cpp:var:`max_batch_size
-<nvidia::inferenceserver::ModelConfig::max_batch_size>`,
-:cpp:var:`input <nvidia::inferenceserver::ModelConfig::input>`, and
-:cpp:var:`output <nvidia::inferenceserver::ModelConfig::output>`.
-
-As a running example consider a TensorRT model called *mymodel* that
-has two inputs, *input0* and *input1*, and one output, *output0*, all
-of which are 16 entry float32 tensors. The minimal configuration is::
-
-  name: "mymodel"
-  platform: "tensorrt_plan"
-  max_batch_size: 8
-  input [
-    {
-      name: "input0"
-      data_type: TYPE_FP32
-      dims: [ 16 ]
-    },
-    {
-      name: "input1"
-      data_type: TYPE_FP32
-      dims: [ 16 ]
-    }
-  ]
-  output [
-    {
-      name: "output0"
-      data_type: TYPE_FP32
-      dims: [ 16 ]
-    }
-  ]
-
-The name of the model must match the :cpp:var:`name
-<nvidia::inferenceserver::ModelConfig::name>` of the model repository
-directory containing the model. The :cpp:var:`platform
-<nvidia::inferenceserver::ModelConfig::platform>` must be one of
-**tensorrt_plan**, **tensorflow_graphdef**, **tensorflow_savedmodel**,
-or **caffe2_netdef**.
-
-For models that support batched inputs the :cpp:var:`max_batch_size
-<nvidia::inferenceserver::ModelConfig::max_batch_size>` value must be
->= 1. The TensorRT Inference Server assumes that the batching occurs
-along a first dimension that is not listed in the inputs or
-outputs. For the above example TRTIS expects to receive input tensors
-with shape **[ x, 16 ]** and produces an output tensor with shape **[
-x, 16 ]**, where **x** is the batch size of the request.
-
-For models that do not support batched inputs the
-:cpp:var:`max_batch_size
-<nvidia::inferenceserver::ModelConfig::max_batch_size>` value must be
-zero. If the above example specified a :cpp:var:`max_batch_size
-<nvidia::inferenceserver::ModelConfig::max_batch_size>` of zero, TRTIS
-would expect to receive input tensors with shape **[ 16 ]**, and would
-produce an output tensor with shape **[ 16 ]**.
-
-.. _section-generated-model-configuration:
-
-Generated Model Configuration
------------------------------
-
-By default, the model configuration file containing the required
-settings must be provided with each model. However, if TRTIS is
-started with the -\\-strict-model-config=false option, then in some
-cases the required portions of the model configuration file can be
-generated automatically by TRTIS. The required portion of the model
-configuration are those settings shown in the example minimal
-configuration above. Specifically:
-
-* :ref:`TensorRT Plan <section-tensorrt-models>` models do not require
-  a model configuration file because TRTIS can derive all the required
-  settings automatically.
-
-* Some :ref:`TensorFlow SavedModel <section-tensorflow-models>` models
-  do not require a model configuration file. The models must specify
-  all inputs and outputs as fixed-size tensors (with an optional
-  initial batch dimension) for the model configuration to be generated
-  automatically. The easiest way to determine if a particular
-  SavedModel is supported is to try it with TRTIS and check the
-  console log and :ref:`Status API <section-api-status>` to determine
-  if the model loaded successfully.
-
-When using -\\-strict-model-config=false you can see the model
-configuration that was generated for a model by using the :ref:`Status
-API <section-api-status>`.
-
-The TensorRT Inference Server only generates the required portion of
-the model configuration file. You must still provide the optional
-portions of the model configuration if necessary, such as
-:cpp:var:`version_policy
-<nvidia::inferenceserver::ModelConfig::version_policy>`,
-:cpp:var:`optimization
-<nvidia::inferenceserver::ModelConfig::optimization>`,
-:cpp:var:`dynamic_batching
-<nvidia::inferenceserver::ModelConfig::dynamic_batching>`,
-:cpp:var:`instance_group
-<nvidia::inferenceserver::ModelConfig::instance_group>`,
-:cpp:var:`default_model_filename
-<nvidia::inferenceserver::ModelConfig::default_model_filename>`,
-:cpp:var:`cc_model_filenames
-<nvidia::inferenceserver::ModelConfig::cc_model_filenames>`, and
-:cpp:var:`tags <nvidia::inferenceserver::ModelConfig::tags>`.
-
-.. _section-version-policy:
-
-Version Policy
---------------
-
-Each model can have one or more :ref:`versions available in the model
-repository <section-model-versions>`. The
-:cpp:var:`nvidia::inferenceserver::ModelVersionPolicy` schema allows
-the following policies.
-
-* :cpp:var:`All
-  <nvidia::inferenceserver::ModelVersionPolicy::All>`: All versions
-  of the model that are available in the model repository are
-  available for inferencing.
-
-* :cpp:var:`Latest
-  <nvidia::inferenceserver::ModelVersionPolicy::Latest>`: Only the
-  latest ‘n’ versions of the model in the repository are available for
-  inferencing. The latest versions of the model are the numerically
-  greatest version numbers.
-
-* :cpp:var:`Specific
-  <nvidia::inferenceserver::ModelVersionPolicy::Specific>`: Only the
-  specifically listed versions of the model are available for
-  inferencing.
-
-If no version policy is specified, then :cpp:var:`Latest
-<nvidia::inferenceserver::ModelVersionPolicy::Latest>` (with
-num_version = 1) is used as the default, indicating that only the most
-recent version of the model is made available by TRTIS. In all cases,
-the addition or removal of version subdirectories from the model
-repository can change which model version is used on subsequent
-inference requests.
-
-Continuing the above example, the following configuration specifies
-that all versions of the model will be available from TRTIS::
-
-  name: "mymodel"
-  platform: "tensorrt_plan"
-  max_batch_size: 8
-  input [
-    {
-      name: "input0"
-      data_type: TYPE_FP32
-      dims: [ 16 ]
-    },
-    {
-      name: "input1"
-      data_type: TYPE_FP32
-      dims: [ 16 ]
-    }
-  ]
-  output [
-    {
-      name: "output0"
-      data_type: TYPE_FP32
-      dims: [ 16 ]
-    }
-  ]
-  version_policy: { all { }}
-
-.. _section-instance-groups:
-
-Instance Groups
----------------
-
-The TensorRT Inference Server can provide multiple :ref:`execution
-instances <section-concurrent-model-execution>` of a model so that
-multiple simultaneous inference requests for that model can be handled
-simultaneously. The model configuration :cpp:var:`ModelInstanceGroup
-<nvidia::inferenceserver::ModelInstanceGroup>` is used to specify the
-number of execution instances that should be made available and what
-compute resource should be used for those instances.
-
-By default, a single execution instance of the model is created for
-each GPU available in the system. The instance-group setting can be
-used to place multiple execution instances of a model on every GPU or
-on only certain GPUs. For example, the following configuration will
-place two execution instances of the model to be available on each
-system GPU::
-
-  instance_group [
-    {
-      count: 2
-      kind: KIND_GPU
-    }
-  ]
-
-And the following configuration will place one execution instance on
-GPU 0 and two execution instances on GPUs 1 and 2::
-
-  instance_group [
-    {
-      count: 1
-      kind: KIND_GPU
-      gpus: [ 0 ]
-    },
-    {
-      count: 2
-      kind: KIND_GPU
-      gpus: [ 1, 2 ]
-    }
-  ]
-
-The instance group setting is also used to enable exection of a model
-on the CPU. The following places two execution instances on the CPU::
-
-  instance_group [
-    {
-      count: 2
-      kind: KIND_CPU
-    }
-  ]
-
-.. _section-dynamic-batching:
-
-Dynamic Batching
-----------------
-
-The TensorRT Inference Server supports batch inferencing by allowing
-individual inference requests to specify a batch of inputs. The
-inferencing for a batch of inputs is processed at the same time which
-is especially important for GPUs since it can greatly increase
-inferencing throughput. In many use-cases the individual inference
-requests are not batched, therefore, they do not benefit from the
-throughput benefits of batching.
-
-Dynamic batching is a feature of TRTIS that allows non-batched
-inference requests to be combined by TRTIS, so that a batch is created
-dynamically, resulting in the same increased throughput seen for
-batched inference requests.
-
-Dynamic batching is enabled and configured independently for each
-model using the :cpp:var:`ModelDynamicBatching
-<nvidia::inferenceserver::ModelDynamicBatching>` settings in the model
-configuration. These settings control the preferred size(s) of the
-dynamically created batches as well as a maximum time that requests
-can be delayed in the scheduler to allow other requests to join the
-dynamic batch.
-
-The following configuration enables dynamic batching with preferred
-batch sizes of 4 and 8, and a maximum delay time of 100 microseconds::
-
-  dynamic_batching {
-    preferred_batch_size: [ 4, 8 ]
-    max_queue_delay_microseconds: 100
-  }
-
-.. _section-optimization-policy:
-
-Optimization Policy
--------------------
-
-The model configuration :cpp:var:`ModelOptimizationPolicy
-<nvidia::inferenceserver::ModelOptimizationPolicy>` is used to specify
-optimization and prioritization settings for a model. These settings
-control if/how a model is optimized by the backend framework and how
-it is scheduled and executed by TRTIS. See the protobuf documentation
-for the currently available settings.
diff --git a/docs/model_repository.rst b/docs/model_repository.rst
deleted file mode 100644
index 551dc1d952..0000000000
--- a/docs/model_repository.rst
+++ /dev/null
@@ -1,297 +0,0 @@
-..
-  # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-  #
-  # Redistribution and use in source and binary forms, with or without
-  # modification, are permitted provided that the following conditions
-  # are met:
-  #  * Redistributions of source code must retain the above copyright
-  #    notice, this list of conditions and the following disclaimer.
-  #  * Redistributions in binary form must reproduce the above copyright
-  #    notice, this list of conditions and the following disclaimer in the
-  #    documentation and/or other materials provided with the distribution.
-  #  * Neither the name of NVIDIA CORPORATION nor the names of its
-  #    contributors may be used to endorse or promote products derived
-  #    from this software without specific prior written permission.
-  #
-  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-  # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-  # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-  # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-  # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-  # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-  # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-  # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-  # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-.. _section-model-repository:
-
-Model Repository
-================
-
-The TensorRT Inference Server accesses models from a locally
-accessible file path or from Google Cloud Storage. This path is
-specified when the server is started using the -\\-model-store option.
-
-For a locally accessible file-system the absolute path must be
-specified, for example, -\\-model-store=/path/to/model/repository. For
-a model repository residing in Google Cloud Storage, the path must be
-prefixed with gs://, for example,
--\\-model-store=gs://bucket/path/to/model/repository.
-
-:ref:`section-example-model-repository` describes how to create an
-example repository with a couple if image classification models.
-
-An example of a typical model repository layout is shown below::
-
-  <model-repository-path>/
-    model_0/
-      config.pbtxt
-      output0_labels.txt
-      1/
-        model.plan
-      2/
-        model.plan
-    model_1/
-      config.pbtxt
-      output0_labels.txt
-      output1_labels.txt
-      0/
-        model.graphdef
-      7/
-        model.graphdef
-
-Any number of models may be specified and TRTIS will attempt to load
-all models into the CPU and GPU when the server starts. The
-:ref:`Status API <section-api-status>` can be used to determine if any
-models failed to load successfully. The server's console log will also
-show the reason for any failures during startup.
-
-The name of the model directory (model_0 and model_1 in the above
-example) must match the name of the model specified in the
-:ref:`model configuration file <section-model-configuration>`,
-config.pbtxt. The model name is used in the :ref:`client API
-<section-client-api>` and :ref:`server API
-<section-inference-server-api>` to identify the model. Each model
-directory must have at least one numeric subdirectory. Each of these
-subdirectories holds a version of the model with the version number
-corresponding to the directory name.
-
-For more information about how the model versions are handled by the
-server see :ref:`section-model-versions`.  Within each version
-subdirectory there are one or more model definition files. For more
-information about the model definition files contained in each version
-subdirectory see :ref:`section-model-definition`.
-
-The \*_labels.txt files are optional and are used to provide labels for
-outputs that represent classifications. The label file must be
-specified in the :cpp:var:`label_filename
-<nvidia::inferenceserver::ModelOutput::label_filename>` property of
-the output it corresponds to in the :ref:`model configuration
-<section-model-configuration>`.
-
-.. _section-modifying-the-model-repository:
-
-Modifying the Model Repository
-------------------------------
-
-By default, changes to the model repository will be detected by a
-running TRTIS and the server will attempt to add, remove, and reload
-models as necessary based on those changes. Changes to the model
-repository may not be detected immediately because TRTIS polls the
-repository periodically. You can control the polling interval with the
--\\-repository-poll-secs options. The console log or the :ref:`Status
-API <section-api-status>` can be used to determine when model
-repository changes have taken effect. You can disable the server from
-responding to repository changes by using the
--\\-allow-poll-model-repository=false option.
-
-The TensorRT Inference Server responds to the following changes:
-
-* Versions may be added and removed from models by adding and removing
-  the corresponding version subdirectory. The inference server will
-  allow in-flight requests to complete even if they are using a
-  removed version of the model. New requests for a removed model
-  version will fail. Depending on the model's :ref:`version policy
-  <section-version-policy>`, changes to the available versions may
-  change which model version is served by default.
-
-* Existing models can be removed from the repository by removing the
-  corresponding model directory.  TRTIS will allow in-flight requests
-  to any version of the removed model to complete. New requests for a
-  removed model will fail.
-
-* New models can be added to the repository by adding a new model
-  directory.
-
-* The :ref:`model configuration <section-model-configuration>`
-  (config.pbtxt) can be changed and TRTIS will unload and reload the
-  model to pick up the new model configuration.
-
-* Labels files providing labels for outputs that represent
-  classifications can be added, removed, or modified and TRTIS will
-  unload and reload the model to pick up the new labels. If a label
-  file is added or removed the corresponding edit to the
-  :cpp:var:`label_filename
-  <nvidia::inferenceserver::ModelOutput::label_filename>` property of
-  the output it corresponds to in the :ref:`model configuration
-  <section-model-configuration>` must be performed at the same time.
-
-.. _section-model-versions:
-
-Model Versions
---------------
-
-Each model can have one or more versions available in the model
-repository. Each version is stored in its own, numerically named,
-subdirectory where the name of the subdirectory corresponds to the
-version number of the model. Each model specifies a :ref:`version
-policy <section-version-policy>` that controls which of the versions
-in the model repository are made available by TRTIS at any given time.
-
-.. _section-model-definition:
-
-Model Definition
-----------------
-
-Each model version subdirectory must contain at least one model
-definition. By default, the name of this file or directory must be:
-
-* **model.plan** for TensorRT models
-* **model.graphdef** for TensorFlow GraphDef models
-* **model.savedmodel** for TensorFlow SavedModel models
-* **model.netdef** and **init_model.netdef** for Caffe2 Netdef models
-
-This default name can be overridden using the *default_model_filename*
-property in the :ref:`model configuration
-<section-model-configuration>`.
-
-Optionally, a model can provide multiple model definition files, each
-targeted at a GPU with a different `Compute Capability
-<https://developer.nvidia.com/cuda-gpus>`_. Most commonly, this
-feature is needed for TensorRT and TensorFlow/TensorRT integrated
-models where the model definition is valid for only a single compute
-capability. See the *cc_model_filenames* property in the :ref:`model
-configuration <section-model-configuration>` for description of how to
-specify different model definitions for different compute
-capabilities.
-
-.. _section-tensorrt-models:
-
-TensorRT Models
-^^^^^^^^^^^^^^^
-
-A TensorRT model definition is called a *Plan*. A TensorRT Plan is a
-single file that by default must be named model.plan. A TensorRT Plan
-is specific to CUDA Compute Capability and so it is typically
-necessary to use the :ref:`model configuration's
-<section-model-configuration>` *cc_model_filenames* property as
-described above.
-
-A minimal model repository for a single TensorRT model would look
-like::
-
-  models/
-    <model-name>/
-      config.pbtxt
-      1/
-        model.plan
-
-As described in :ref:`section-generated-model-configuration` the
-config.pbtxt is optional for some models. In cases where it is not
-required the minimal model repository would look like::
-
-  models/
-    <model-name>/
-      1/
-        model.plan
-
-.. _section-tensorflow-models:
-
-TensorFlow Models
-^^^^^^^^^^^^^^^^^
-
-TensorFlow saves trained models in one of two ways: *GraphDef* or
-*SavedModel*. The inference server supports both formats. Once you
-have a trained model in TensorFlow, you can save it as a GraphDef
-directly or convert it to a GraphDef by using a script like
-`freeze_graph.py
-<https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/freeze_graph.py>`_,
-or save it as a SavedModel using a `SavedModelBuilder
-<https://www.tensorflow.org/serving/serving_basic>`_ or
-`tf.saved_model.simple_save
-<https://www.tensorflow.org/api_docs/python/tf/saved_model/simple_save>`_.
-
-A TensorFlow GraphDef is a single file that by default must be named
-model.graphdef. A minimal model repository for a single TensorFlow
-GraphDef model would look like::
-
-  models/
-    <model-name>/
-      config.pbtxt
-      1/
-        model.graphdef
-
-A TensorFlow SavedModel is a directory containing multiple files. By
-default the directory must be named model.savedmodel. A minimal model
-repository for a single TensorFlow SavedModel model would look like::
-
-  models/
-    <model-name>/
-      config.pbtxt
-      1/
-        model.savedmodel/
-           <saved-model files>
-
-As described in :ref:`section-generated-model-configuration` the
-config.pbtxt is optional for some models. In cases where it is not
-required the minimal model repository would look like::
-
-  models/
-    <model-name>/
-      1/
-        model.savedmodel/
-           <saved-model files>
-
-Caffe2 Models
-^^^^^^^^^^^^^
-
-A Caffe2 model definition is called a *NetDef*. A Caffe2 NetDef is a
-single file that by default must be named model.netdef. A minimal
-model repository for a single NetDef model would look like::
-
-  models/
-    <model-name>/
-      config.pbtxt
-      1/
-        model.netdef
-
-TensorRT/TensorFlow Models
-^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-TensorFlow 1.7 and later integrates TensorRT to enable TensorFlow
-models to benefit from the inference optimizations provided by
-TensorRT. TRTIS supports models that have been optimized with TensorRT
-and can serve those models just like any other TensorFlow model. The
-inference server’s TensorRT version (available in the Release Notes)
-must match the TensorRT version that was used when the model was
-created.
-
-A TensorRT/TensorFlow integrated model is specific to CUDA Compute
-Capability and so it is typically necessary to use the :ref:`model
-configuration's <section-model-configuration>` *cc_model_filenames*
-property as described above.
-
-ONNX Models
-^^^^^^^^^^^
-
-The TensorRT Inference Server cannot directly perform inferencing
-using `ONNX <http://onnx.ai/>`_ models. An ONNX model must be
-converted to either a TensorRT Plan or a Caffe2 NetDef. To convert
-your ONNX model to a TensorRT Plan use either the `ONNX Parser
-<https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#api>`_
-included in TensorRT or the `open-source TensorRT backend for ONNX
-<https://github.com/onnx/onnx-tensorrt>`_. Another option is to
-convert your ONNX model to Caffe2 NetDef `as described here
-<https://github.com/pytorch/pytorch/tree/master/caffe2/python/onnx>`_.
diff --git a/docs/protobuf_api/gen_proto_doc.sh b/docs/protobuf_api/gen_proto_doc.sh
deleted file mode 100755
index 5c86fdb881..0000000000
--- a/docs/protobuf_api/gen_proto_doc.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-for pf in "$@"; do
-    bname=$(basename $pf)
-    echo -e "$bname" > $bname.rst
-    echo -e "====================\n" >> $bname.rst
-    sed -n -e 's/.*\/\/@@\(.*\)/\1/p' $pf >> $bname.rst
-done
diff --git a/docs/protobuf_api/protobuf_api_root.rst b/docs/protobuf_api/protobuf_api_root.rst
deleted file mode 100644
index 696c46a565..0000000000
--- a/docs/protobuf_api/protobuf_api_root.rst
+++ /dev/null
@@ -1,43 +0,0 @@
-..
-  # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-  #
-  # Redistribution and use in source and binary forms, with or without
-  # modification, are permitted provided that the following conditions
-  # are met:
-  #  * Redistributions of source code must retain the above copyright
-  #    notice, this list of conditions and the following disclaimer.
-  #  * Redistributions in binary form must reproduce the above copyright
-  #    notice, this list of conditions and the following disclaimer in the
-  #    documentation and/or other materials provided with the distribution.
-  #  * Neither the name of NVIDIA CORPORATION nor the names of its
-  #    contributors may be used to endorse or promote products derived
-  #    from this software without specific prior written permission.
-  #
-  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-  # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-  # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-  # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-  # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-  # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-  # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-  # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-  # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-Protobuf API
-============
-
-HTTP/GRPC API
--------------
-* :doc:`src/core/api.proto <api.proto>`
-* :doc:`src/core/grpc_service.proto <grpc_service.proto>`
-* :doc:`src/core/request_status.proto <request_status.proto>`
-
-Model Configuration
--------------------
-* :doc:`src/core/model_config.proto <model_config.proto>`
-
-Status
-------
-* :doc:`src/core/server_status.proto <server_status.proto>`
diff --git a/docs/protocol/README.md b/docs/protocol/README.md
new file mode 100644
index 0000000000..3ad2517e32
--- /dev/null
+++ b/docs/protocol/README.md
@@ -0,0 +1,118 @@
+<!--
+# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# HTTP/REST and GRPC Protocol
+
+This directory contains documents related to the HTTP/REST and GRPC
+protocols used by Triton. Triton uses the [KServe community standard
+inference
+protocols](https://github.com/kserve/kserve/tree/master/docs/predict-api/v2)
+plus several extensions that are defined in the following documents:
+
+- [Binary tensor data extension](./extension_binary_data.md)
+- [Classification extension](./extension_classification.md)
+- [Schedule policy extension](./extension_schedule_policy.md)
+- [Sequence extension](./extension_sequence.md)
+- [Shared-memory extension](./extension_shared_memory.md)
+- [Model configuration extension](./extension_model_configuration.md)
+- [Model repository extension](./extension_model_repository.md)
+- [Statistics extension](./extension_statistics.md)
+- [Trace extension](./extension_trace.md)
+- [Logging extension](./extension_logging.md)
+- [Parameters extension](./extension_parameters.md)
+
+Note that some extensions introduce new fields onto the inference protocols,
+and the other extensions define new protocols that Triton follows, please refer
+to the extension documents for detail.
+
+For the GRPC protocol, the [protobuf
+specification](https://github.com/triton-inference-server/common/blob/main/protobuf/grpc_service.proto)
+is also available. In addition, you can find the GRPC health checking protocol protobuf
+specification [here](https://github.com/triton-inference-server/common/blob/main/protobuf/health.proto).
+
+## Restricted Protocols
+
+You can configure the Triton endpoints, which implement the protocols, to
+restrict access to some protocols and to control network settings, please refer
+to [protocol customization guide](https://github.com/triton-inference-server/server/blob/main/docs/customization_guide/inference_protocols.md#httprest-and-grpc-protocols) for detail.
+
+## IPv6
+
+Assuming your host or [docker config](https://docs.docker.com/config/daemon/ipv6/)
+supports IPv6 connections, `tritonserver` can be configured to use IPv6
+HTTP endpoints as follows:
+```
+$ tritonserver ... --http-address ipv6:[::1]&
+...
+I0215 21:04:11.572305 571 grpc_server.cc:4868] Started GRPCInferenceService at 0.0.0.0:8001
+I0215 21:04:11.572528 571 http_server.cc:3477] Started HTTPService at ipv6:[::1]:8000
+I0215 21:04:11.614167 571 http_server.cc:184] Started Metrics Service at ipv6:[::1]:8002
+```
+
+This can be confirmed via `netstat`, for example:
+```
+$ netstat -tulpn | grep tritonserver
+tcp6      0      0 :::8000      :::*      LISTEN      571/tritonserver
+tcp6      0      0 :::8001      :::*      LISTEN      571/tritonserver
+tcp6      0      0 :::8002      :::*      LISTEN      571/tritonserver
+```
+
+And can be tested via `curl`, for example:
+```
+$ curl -6 --verbose "http://[::1]:8000/v2/health/ready"
+*   Trying ::1:8000...
+* TCP_NODELAY set
+* Connected to ::1 (::1) port 8000 (#0)
+> GET /v2/health/ready HTTP/1.1
+> Host: [::1]:8000
+> User-Agent: curl/7.68.0
+> Accept: */*
+>
+* Mark bundle as not supporting multiuse
+< HTTP/1.1 200 OK
+< Content-Length: 0
+< Content-Type: text/plain
+<
+* Connection #0 to host ::1 left intact
+```
+
+
+## Mapping Triton Server Error Codes to HTTP Status Codes
+
+This table maps various Triton Server error codes to their corresponding HTTP status
+codes. It can be used as a reference guide for understanding how Triton Server errors
+are handled in HTTP responses.
+
+
+| Triton Server Error Code                      | HTTP Status Code   | Description          |
+| ----------------------------------------------| -------------------| ---------------------|
+| `TRITONSERVER_ERROR_INTERNAL`                 | 500                | Internal Server Error|
+| `TRITONSERVER_ERROR_NOT_FOUND`                | 404                | Not Found            |
+| `TRITONSERVER_ERROR_UNAVAILABLE`              | 503                | Service Unavailable  |
+| `TRITONSERVER_ERROR_UNSUPPORTED`              | 501                | Not Implemented      |
+| `TRITONSERVER_ERROR_UNKNOWN`,<br>`TRITONSERVER_ERROR_INVALID_ARG`,<br>`TRITONSERVER_ERROR_ALREADY_EXISTS`,<br>`TRITONSERVER_ERROR_CANCELLED` | `400` | Bad Request (default for other errors)      |
diff --git a/docs/protocol/extension_binary_data.md b/docs/protocol/extension_binary_data.md
new file mode 100644
index 0000000000..d04edda28b
--- /dev/null
+++ b/docs/protocol/extension_binary_data.md
@@ -0,0 +1,216 @@
+<!--
+# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Binary Tensor Data Extension
+
+This document describes Triton's binary tensor data extension. The
+binary tensor data extension allows Triton to support tensor data
+represented in a binary format in the body of an HTTP/REST
+request. Because this extension is supported, Triton reports
+“binary_tensor_data” in the extensions field of its Server Metadata.
+
+## Binary Tensor Request
+
+Tensor data represented as binary data is organized in little-endian
+byte order, row major, without stride or padding between elements. All
+tensor data types are representable as binary data in the native size
+of the data type. For BOOL type element true is a single byte with
+value 1 and false is a single byte with value 0. For BYTES type an
+element is represented by a 4-byte unsigned integer giving the length
+followed by the actual bytes. The binary data for a tensor is
+delivered in the HTTP body after the JSON object (see Examples).
+
+The binary tensor data extension uses parameters to indicate that an
+input or output tensor is communicated as binary data. The first
+parameter is used in `$request_input` and `$response_output` to indicate
+that the input or output tensor is communicated as binary data:
+
+- "binary_data_size" : int64 parameter indicating the size of the
+  tensor binary data, in bytes.
+
+The second parameter is used in `$request_output` to indicate that the
+output should be returned from Triton as binary data.
+
+- "binary_data" : bool parameter that is true if the output should be
+  returned as binary data and false (or not given) if the tensor
+  should be returned as JSON.
+
+The third parameter is used in $inference_request to indicate that all
+outputs should be returned from Triton as binary data, unless
+overridden by "binary_data" on a specific output.
+
+- "binary_data_output" : bool parameter that is true if all outputs
+  should be returned as binary data and false (or not given) if the
+  outputs should be returned as JSON. If "binary_data" is specified on
+  an output it overrides this setting.
+
+When one or more tensors are communicated as binary data, the HTTP
+body of the request or response will contain the JSON inference
+request or response object followed by the binary tensor data in the
+same order as the order of the input or output tensors are specified
+in the JSON. If any binary data is present in the request or response
+the Inference-Header-Content-Length header must be provided to give
+the length of the JSON object, and Content-Length continues to give
+the full body length (as HTTP requires).
+
+### Examples
+
+For the following request the input tensors are sent as binary data
+and the output tensor must be returned as binary data as that is what
+is requested. Also note that the total size of the binary data is 19
+bytes and that size must be reflected in the content length headers.
+
+```
+POST /v2/models/mymodel/infer HTTP/1.1
+Host: localhost:8000
+Content-Type: application/octet-stream
+Inference-Header-Content-Length: <xx>
+Content-Length: <xx+19>
+{
+  "model_name" : "mymodel",
+  "inputs" : [
+    {
+      "name" : "input0",
+      "shape" : [ 2, 2 ],
+      "datatype" : "UINT32",
+      "parameters" : {
+        "binary_data_size" : 16
+      }
+    },
+    {
+      "name" : "input1",
+      "shape" : [ 3 ],
+      "datatype" : "BOOL",
+      "parameters" : {
+        "binary_data_size" : 3
+      }
+    }
+  ],
+  "outputs" : [
+    {
+      "name" : "output0",
+      "parameters" : {
+        "binary_data" : true
+      }
+    }
+  ]
+}
+<16 bytes of data for input0 tensor>
+<3 bytes of data for input1 tensor>
+```
+
+Assuming the model returns a [ 3, 2 ] tensor of data type FP32 the
+following response would be returned.
+
+```
+HTTP/1.1 200 OK
+Content-Type: application/octet-stream
+Inference-Header-Content-Length: <yy>
+Content-Length: <yy+24>
+{
+  "outputs" : [
+    {
+      "name" : "output0",
+      "shape" : [ 3, 2 ],
+      "datatype"  : "FP32",
+      "parameters" : {
+        "binary_data_size" : 24
+      }
+    }
+  ]
+}
+<24 bytes of data for output0 tensor>
+```
+
+## Raw Binary Request
+
+For models whose tensor metadata can be deduced from the byte size of the binary
+data. User may send the binary tensor request without specifying inference
+header. In other words, the request body only contains the binary data of the
+tensor. Below is the constraints for the qualified models:
+
+1. Only has 1 input
+2. If the input data type is non-BYTE, the number of variable size dimensions is
+at most 1. If the data type is BYTE, the shape must be [1]. The supported data
+types can be found [here](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/required_api.md#tensor-data-types)
+
+To send a raw binary request, the Inference-Header-Content-Length header must be
+provided with value 0 to indicate that the request body doesn't include the
+inference header.
+
+Note: if the model supports batching, the request will be treated as batch-1
+request because the inference header is omitted. Additionally, all the model
+output will be requested to be returned in binary tensor form as described in
+the previous section.
+
+### Examples
+
+The following is the example of sending raw binary request. Note that the total
+size of the binary data is 16 bytes and that size must be reflected in
+the content length headers.
+
+```
+POST /v2/models/mymodel/infer HTTP/1.1
+Host: localhost:8000
+Content-Type: application/octet-stream
+Inference-Header-Content-Length: 0
+Content-Length: 16
+<16 bytes of data for input tensor>
+```
+
+Assuming the model returns two outputs which both has shape [ 3, 1 ] and data
+type FP32, then the following response would be returned.
+
+```
+HTTP/1.1 200 OK
+Content-Type: application/octet-stream
+Inference-Header-Content-Length: <yy>
+Content-Length: <yy+24>
+{
+  "outputs" : [
+    {
+      "name" : "output0",
+      "shape" : [ 3, 1 ],
+      "datatype"  : "FP32",
+      "parameters" : {
+        "binary_data_size" : 12
+      }
+    },
+    {
+      "name" : "output1",
+      "shape" : [ 3, 1 ],
+      "datatype"  : "FP32",
+      "parameters" : {
+        "binary_data_size" : 12
+      }
+    }
+  ]
+}
+<12 bytes of data for output0 tensor>
+<12 bytes of data for output1 tensor>
+```
\ No newline at end of file
diff --git a/docs/protocol/extension_classification.md b/docs/protocol/extension_classification.md
new file mode 100644
index 0000000000..5c481e16a7
--- /dev/null
+++ b/docs/protocol/extension_classification.md
@@ -0,0 +1,200 @@
+<!--
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Classification Extension
+
+This document describes Triton's classification extension.  The
+classification extension allows Triton to return an output as a
+classification index and (optional) label instead of returning the
+output as raw tensor data.  Because this extension is supported,
+Triton reports “classification” in the extensions field of its Server
+Metadata.
+
+An inference request can use the “classification” parameter to request
+that one or more classifications be returned for an output. For such
+an output the returned tensor will not be the shape and type produced
+by the model, but will instead be type BYTES with shape [ batch-size,
+\<count\> ] where each element returns the classification index and
+label as a single string. The \<count\> dimension of the returned tensor
+will equal the “count” value specified in the classification
+parameter.
+
+When the classification parameter is used, Triton will determine the
+top-n classifications as the n highest-valued elements in the output
+tensor compared using the output tensor’s data type. For example, if
+an output tensor is [ 1, 5, 10, 4 ], the highest-valued element is 10
+(index 2), followed by 5 (index 1), followed by 4 (index 3), followed
+by 1 (index 0). So, for example, the top-2 classifications by index
+are [ 2, 1 ].
+
+The format of the returned string will be “\<value\>:\<index\>[:\<label\>]”,
+where \<index\> is the index of the class in the model output tensor,
+\<value\> is the value associated with that index in the model output,
+and the \<label\> associated with that index is optional. For example,
+continuing the example from above, the returned tensor will be [
+“10:2”, “5:1” ]. If the model has labels associated with those
+indices, the returned tensor will be [ “10:2:apple”, “5:1:pickle” ].
+
+## HTTP/REST
+
+In all JSON schemas shown in this document `$number`, `$string`, `$boolean`,
+`$object` and `$array` refer to the fundamental JSON types. #optional
+indicates an optional JSON field.
+
+The classification extension requires that the “classification”
+parameter, when applied to a requested inference output, be recognized
+by Triton as follows:
+
+- “classification” : `$number` indicating the number of classes that
+  should be returned for the output.
+
+The following example shows how the classification parameter is used
+in an inference request.
+
+```
+POST /v2/models/mymodel/infer HTTP/1.1
+Host: localhost:8000
+Content-Type: application/json
+Content-Length: <xx>
+{
+  "id" : "42",
+  "inputs" : [
+    {
+      "name" : "input0",
+      "shape" : [ 2, 2 ],
+      "datatype" : "UINT32",
+      "data" : [ 1, 2, 3, 4 ]
+    }
+  ],
+  "outputs" : [
+    {
+      "name" : "output0",
+      "parameters" : { "classification" : 2 }
+    }
+  ]
+}
+```
+
+For the above request Triton will return the “output0” output tensor
+as a STRING tensor with shape [ 2 ]. Assuming the model produces
+output0 tensor [ 1.1, 3.3, 0.5, 2.4 ] from the above inputs, the
+response will be the following.
+
+```
+HTTP/1.1 200 OK
+Content-Type: application/json
+Content-Length: <yy>
+{
+  "id" : "42"
+  "outputs" : [
+    {
+      "name" : "output0",
+      "shape" : [ 2 ],
+      "datatype"  : "STRING",
+      "data" : [ "3.3:1", "2.4:3" ]
+    }
+  ]
+}
+```
+
+If the model has labels associated with each classification index
+Triton will return those as well, as shown below.
+
+```
+HTTP/1.1 200 OK
+Content-Type: application/json
+Content-Length: <yy>
+{
+  "id" : "42"
+  "outputs" : [
+    {
+      "name" : "output0",
+      "shape" : [ 2 ],
+      "datatype"  : "STRING",
+      "data" : [ "3.3:1:index_1_label", "2.4:3:index_3_label" ]
+    }
+  ]
+}
+```
+
+## GRPC
+
+The classification extension requires that the “classification”
+parameter, when applied to a requested inference output, be recognized
+by Triton as follows:
+
+- “classification” : int64_param indicating the number of classes that
+  should be returned for the output.
+
+The following example shows how the classification parameter is used
+in an inference request.
+
+```
+ModelInferRequest {
+  model_name : "mymodel"
+  model_version : -1
+  inputs [
+    {
+      name : "input0"
+      shape : [ 2, 2 ]
+      datatype : "UINT32"
+      contents { int_contents : [ 1, 2, 3, 4 ] }
+    }
+  ]
+  outputs [
+    {
+      name : "output0"
+      parameters [
+        {
+          key : "classification"
+          value : { int64_param : 2 }
+        }
+      ]
+    }
+  ]
+}
+```
+
+For the above request Triton will return the “output0” output tensor
+as a STRING tensor with shape [ 2 ]. Assuming the model produces
+output0 tensor [ 1.1, 3.3, 0.5, 2.4 ] from the above inputs, the
+response will be the following.
+
+```
+ModelInferResponse {
+  model_name : "mymodel"
+  outputs [
+    {
+      name : "output0"
+      shape : [ 2 ]
+      datatype  : "STRING"
+      contents { bytes_contents : [ "3.3:1", "2.4:3" ] }
+    }
+  ]
+}
+```
diff --git a/docs/protocol/extension_generate.md b/docs/protocol/extension_generate.md
new file mode 100644
index 0000000000..b54b0caffb
--- /dev/null
+++ b/docs/protocol/extension_generate.md
@@ -0,0 +1,188 @@
+<!--
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Generate Extension
+
+> [!NOTE]
+> The Generate Extension is *provisional* and likely to change in future versions.
+
+This document describes Triton's generate extension. The generate
+extension provides a simple text-oriented endpoint schema for interacting with
+large language models (LLMs). The generate endpoint is specific to HTTP/REST
+frontend.
+
+## HTTP/REST
+
+In all JSON schemas shown in this document, `$number`, `$string`, `$boolean`,
+`$object` and `$array` refer to the fundamental JSON types. #optional
+indicates an optional JSON field.
+
+Triton exposes the generate endpoint at the following URLs. The client may use
+HTTP POST request to different URLs for different response behavior, the
+endpoint will return the generate results on success or an error in the case of
+failure.
+
+```
+POST v2/models/${MODEL_NAME}[/versions/${MODEL_VERSION}]/generate
+
+POST v2/models/${MODEL_NAME}[/versions/${MODEL_VERSION}]/generate_stream
+```
+
+### generate vs. generate_stream
+
+Both URLs expect the same request JSON object, and generate the same JSON
+response object. However, there are some differences in the format used to
+return each:
+* `/generate` returns exactly 1 response JSON object with a
+`Content-Type` of `application/json`
+* `/generate_stream` may return multiple responses based on the inference
+results, with a `Content-Type` of `text/event-stream; charset=utf-8`.
+These responses will be sent as
+[Server-Sent Events](https://html.spec.whatwg.org/multipage/server-sent-events.html#server-sent-events)
+(SSE), where each response will be a "data" chunk in the HTTP
+response body. In the case of inference errors, responses will have
+an [error JSON object](#generate-response-json-error-object).
+    * Note that the HTTP response code is set in the first response of the SSE,
+    so if the first response succeeds but an error occurs in a subsequent
+    response for the request, it can result in receiving an error object
+    while the status code shows success (200). Therefore, the user must
+    always check whether an error object is received when generating
+    responses through `/generate_stream`.
+    * If the request fails before inference begins, then a JSON error will
+    be returned with `Content-Type` of `application/json`, similar to errors
+    from other endpoints with the status code set to an error.
+
+### Generate Request JSON Object
+
+The generate request object, identified as *$generate_request*, is
+required in the HTTP body of the POST request. The model name and
+(optionally) version must be available in the URL. If a version is not
+provided, the server may choose a version based on its own policies or
+return an error.
+
+    $generate_request =
+    {
+      "text_input" : $string,
+      "parameters" : $parameters #optional
+    }
+
+* "text_input" : The text input that the model should generate output from.
+* "parameters" : An optional object containing zero or more parameters for this
+  generate request expressed as key/value pairs. See
+  [Parameters](#parameters) for more information.
+
+> [!NOTE]
+> Any additional properties in the request object are passed either as
+> parameters or tensors based on model specification.
+
+#### Parameters
+
+The `$parameters` JSON describes zero or more “name”/”value” pairs,
+where the “name” is the name of the parameter and the “value” is a
+`$string`, `$number`, or `$boolean`.
+
+    $parameters =
+    {
+      $parameter, ...
+    }
+
+    $parameter = $string : $string | $number | $boolean
+
+Parameters are model-specific. The user should check with the model
+specification to set the parameters.
+
+#### Example Request
+
+Below is an example to send generate request with additional model parameters `stream` and `temperature`.
+
+```
+$ curl -X POST localhost:8000/v2/models/mymodel/generate -d '{"text_input": "client input", "parameters": {"stream": false, "temperature": 0}}'
+
+POST /v2/models/mymodel/generate HTTP/1.1
+Host: localhost:8000
+Content-Type: application/json
+Content-Length: <xx>
+{
+  "text_input":  "client input",
+  "parameters" :
+    {
+      "stream": false,
+      "temperature": 0
+    }
+}
+```
+
+### Generate Response JSON Object
+
+A successful generate request is indicated by a 200 HTTP status code.
+The generate response object, identified as `$generate_response`, is returned in
+the HTTP body.
+
+    $generate_response =
+    {
+      "model_name" : $string,
+      "model_version" : $string,
+      "text_output" : $string
+    }
+
+* "model_name" : The name of the model used for inference.
+* "model_version" : The specific model version used for inference.
+* "text_output" : The output of the inference.
+
+#### Example Response
+
+```
+200
+{
+  "model_name" : "mymodel",
+  "model_version" : "1",
+  "text_output" : "model output"
+}
+```
+
+### Generate Response JSON Error Object
+
+A failed generate request must be indicated by an HTTP error status
+(typically 400). The HTTP body must contain the
+`$generate_error_response` object.
+
+    $generate_error_response =
+    {
+      "error": <error message string>
+    }
+
+* “error” : The descriptive message for the error.
+
+#### Example Error
+
+```
+400
+{
+  "error" : "error message"
+}
+```
diff --git a/docs/protocol/extension_logging.md b/docs/protocol/extension_logging.md
new file mode 100644
index 0000000000..e30c22b784
--- /dev/null
+++ b/docs/protocol/extension_logging.md
@@ -0,0 +1,198 @@
+<!--
+# Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Logging Extension
+
+This document describes Triton's logging extension. The logging extension enables
+the client to configure log settings during a Triton run. Triton reports "logging"
+in the extensions field of its Server Metadata.
+
+## HTTP/REST
+
+In all JSON schemas shown in this document `$number`, `$string`, `$boolean`,
+`$object` and `$array` refer to the fundamental JSON types. #optional
+indicates an optional JSON field.
+
+Triton exposes the logging endpoint at the following URL. The client may use
+HTTP GET request to retrieve the current log settings. A HTTP POST request
+will modify the log settings, and the endpoint will return the updated log
+settings on success or an error in the case of failure.
+
+```
+GET v2/logging
+
+POST v2/logging
+```
+
+### Log Setting Response JSON Object
+
+A successful log setting request is indicated by a 200 HTTP status
+code. The response object, identified as `$log_setting_response`, is
+returned in the HTTP body for every successful log setting request.
+
+```
+$log_setting_response =
+{
+  $log_setting, ...
+}
+
+$log_setting = $string : $string | $boolean | $number
+```
+
+Each `$log_setting` JSON describes a “name”/”value” pair, where the “name” is
+the `$string` representation of the log setting and the “value” is a `$string`,
+`$bool`, or `$number` representation of the setting value. Currently, the
+following log settings are defined:
+
+- "log_file" : a `$string` parameter defining the file where the log outputs will be saved. If an empty string is specified, log outputs will stream to the console.
+
+- "log_info" : a `$boolean` parameter that controls whether the Triton server logs INFO level messages.
+
+- "log_warning" : a `$boolean` parameter that controls whether the Triton server logs WARNING level messages.
+
+- "log_error" : a `$boolean` parameter that controls whether the Triton server logs ERROR level messages.
+
+- "log_verbose_level" : a `$number` parameter that controls whether the Triton server outputs verbose messages
+of varying degrees. This value can be any integer >= 0. If "log_verbose_level" is 0, verbose logging will be disabled, and
+no verbose messages will be output by the Triton server. If "log_verbose_level" is 1, level 1 verbose messages will be output
+by the Triton server. If "log_verbose_level" is 2, the Triton server will output all verbose messages of
+level <= 2, etc. Attempting to set "log_verbose_level" to a number < 0 will result in an error.
+
+- "log_format" : a `$string` parameter that controls the format of Triton server log messages. There are currently
+2 formats: "default" and "ISO8601".
+
+
+### Log Setting Response JSON Error Object
+
+A failed log setting request will be indicated by an HTTP error status
+(typically 400). The HTTP body will contain a `$log_setting_error_response` object.
+
+```
+$log_setting_error_response =
+{
+  "error": $string
+}
+```
+
+- “error” : The descriptive message for the error.
+
+### Log Setting Request JSON Object
+
+A log setting request is made with a HTTP POST to
+the logging endpoint. In the corresponding response, the HTTP body contains the
+response JSON. A successful request is indicated by a 200 HTTP status code.
+
+The request object, identified as `$log_setting_request` must be provided in the HTTP
+body.
+
+```
+$log_setting_request =
+{
+  $log_setting, ...
+}
+```
+
+When a `$log_setting` JSON is received (defined above), only the specified
+settings will be updated.
+
+### Example Usage
+The logging protocol extension can be invoked using the curl library in the following manner (assuming
+a Triton server is running at `localhost:8000`):
+```
+curl -s -w '\n%{http_code}\n' -d '{"log_verbose_level":1}' -X POST localhost:8000/v2/logging
+```
+This command should return a `$log_setting_response` JSON object with the following format:
+```
+{"log_file":"","log_info":true,"log_warnings":true,"log_errors":true,"log_verbose_level":1,"log_format":"default"}
+200
+```
+Note that the current values for all parameter fields are returned even though `log_verbose_level`
+was the only parameter that was modified.
+
+## GRPC
+
+For the logging extension, Triton implements the following API:
+
+```
+service GRPCInferenceService
+{
+  …
+
+  // Update and get the log setting of the Triton server.
+  rpc LogSettings(LogSettingsRequest)
+          returns (LogSettingsResponse) {}
+}
+```
+
+The Log Setting API returns the latest log settings. Errors are indicated
+by the `google.rpc.Status` returned for the request. The OK code
+indicates success and other codes indicate failure. The request and
+response messages for Log Settings are:
+
+```
+message LogSettingsRequest
+{
+  message SettingValue
+  {
+    oneof parameter_choice
+    {
+      // bool param option
+      bool bool_param = 1;
+
+      // uint32 param option
+      uint32 uint32_param = 2;
+
+      // string param option
+      string string_param = 3;
+    }
+  }
+  // The new setting values to be updated.
+  // Unspecified settings will remain unchanged.
+  map<string, SettingValue> settings = 1;
+}
+
+message LogSettingsResponse
+{
+  message SettingValue
+  {
+    oneof parameter_choice
+    {
+      // bool param option
+      bool bool_param = 1;
+
+      // uint32 param option
+      uint32 uint32_param = 2;
+
+      // string param option
+      string string_param = 3;
+    }
+  }
+  // The latest log settings values.
+  map<string, SettingValue> settings = 1;
+}
+```
diff --git a/docs/protocol/extension_model_configuration.md b/docs/protocol/extension_model_configuration.md
new file mode 100644
index 0000000000..04a2d28fac
--- /dev/null
+++ b/docs/protocol/extension_model_configuration.md
@@ -0,0 +1,118 @@
+<!--
+# Copyright (c) 2020-2023, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Model Configuration Extension
+
+This document describes Triton's model configuration extension.  The
+model configuration extension allows Triton to return server-specific
+information.  Because this extension is supported, Triton reports
+“model_configuration” in the extensions field of its Server Metadata.
+
+## HTTP/REST
+
+In all JSON schemas shown in this document `$number`, `$string`, `$boolean`,
+`$object` and `$array` refer to the fundamental JSON types. #optional
+indicates an optional JSON field.
+
+Triton exposes the model configuration endpoint at the following
+URL. The versions portion of the URL is optional; if not provided
+Triton will return model configuration for the highest-numbered
+version of the model.
+
+```
+GET v2/models/${MODEL_NAME}[/versions/${MODEL_VERSION}]/config
+```
+
+A model configuration request is made with an HTTP GET to the model
+configuration endpoint.A successful model configuration request is
+indicated by a 200 HTTP status code. The model configuration response
+object, identified as `$model_configuration_response`, is returned in
+the HTTP body for every successful request.
+
+```
+$model_configuration_response =
+{
+  # configuration JSON
+}
+```
+
+The contents of the response will be the JSON representation of the
+model's configuration described by the [ModelConfig message from
+model_config.proto](https://github.com/triton-inference-server/common/blob/main/protobuf/model_config.proto).
+
+A failed model configuration request must be indicated by an HTTP
+error status (typically 400). The HTTP body must contain the
+`$model_configuration_error_response` object.
+
+```
+$model_configuration_error_response =
+{
+  "error": <error message string>
+}
+```
+
+- “error” : The descriptive message for the error.
+
+## GRPC
+
+The GRPC definition of the service is:
+
+```
+service GRPCInferenceService
+{
+  …
+
+  // Get model configuration.
+  rpc ModelConfig(ModelConfigRequest) returns (ModelConfigResponse) {}
+}
+```
+
+Errors are indicated by the google.rpc.Status returned for the
+request. The OK code indicates success and other codes indicate
+failure. The request and response messages for ModelConfig are:
+
+```
+message ModelConfigRequest
+{
+  // The name of the model.
+  string name = 1;
+
+  // The version of the model. If not given the version of the model
+  // is selected automatically based on the version policy.
+  string version = 2;
+}
+
+message ModelConfigResponse
+{
+  // The model configuration.
+  ModelConfig config = 1;
+}
+```
+
+Where the ModelConfig message is defined in
+[model_config.proto](https://github.com/triton-inference-server/common/blob/main/protobuf/model_config.proto).
diff --git a/docs/protocol/extension_model_repository.md b/docs/protocol/extension_model_repository.md
new file mode 100644
index 0000000000..b0009043f5
--- /dev/null
+++ b/docs/protocol/extension_model_repository.md
@@ -0,0 +1,400 @@
+<!--
+# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Model Repository Extension
+
+This document describes Triton's model repository extension.  The
+model-repository extension allows a client to query and control the
+one or more model repositories being served by Triton.  Because this
+extension is supported, Triton reports “model_repository” in the
+extensions field of the Server Metadata. This extension has an
+optional component, described below, that allows the unload API to
+specify the "unload_dependents" parameter. Versions of Triton that
+support this optional component will also report
+"model_repository(unload_dependents)" in the extensions field of the
+Server Metadata.
+
+## HTTP/REST
+
+In all JSON schemas shown in this document `$number`, `$string`, `$boolean`,
+`$object` and `$array` refer to the fundamental JSON types. `#optional`
+indicates an optional JSON field.
+
+The model-repository extension requires Index, Load and Unload
+APIs. Triton exposes the endpoints at the following URLs.
+
+```
+POST v2/repository/index
+
+POST v2/repository/models/${MODEL_NAME}/load
+
+POST v2/repository/models/${MODEL_NAME}/unload
+```
+
+### Index
+
+The index API returns information about every model available in a
+model repository, even if it is not currently loaded into Triton. The
+index API provides a way to determine which models can potentially be
+loaded by the Load API. A model-repository index request is made with
+an HTTP POST to the index endpoint. In the corresponding response the
+HTTP body contains the JSON response.
+
+The index request object, identified as `$repository_index_request`, is
+required in the HTTP body of the POST request.
+
+```
+$repository_index_request =
+{
+  "ready" : $boolean #optional,
+}
+```
+
+- "ready" : Optional, default is false. If true return only models ready for inferencing.
+
+A successful index request is indicated by a 200 HTTP status code. The
+response object, identified as `$repository_index_response`, is returned
+in the HTTP body for every successful request.
+
+```
+$repository_index_response =
+[
+  {
+    "name" : $string,
+    "version" : $string #optional,
+    "state" : $string,
+    "reason" : $string
+  },
+  …
+]
+```
+
+- “name” : The name of the model.
+- “version” : The version of the model.
+- “state” : The state of the model.
+- “reason” : The reason, if any, that the model is in the current state.
+
+A failed index request must be indicated by an HTTP error status
+(typically 400). The HTTP body must contain the
+`$repository_index_error_response` object.
+
+```
+$repository_index_error_response =
+{
+  "error": $string
+}
+```
+
+- “error” : The descriptive message for the error.
+
+### Load
+
+The load API requests that a model be loaded into Triton, or reloaded
+if the model is already loaded. A load request is made with an HTTP
+POST to a load endpoint. The HTTP body may be empty or may contain
+the load request object, identified as `$repository_load_request`.
+A successful load request is indicated by a 200 HTTP status.
+
+
+```
+$repository_load_request =
+{
+  "parameters" : $parameters #optional
+}
+```
+
+- "parameters" : An object containing zero or more parameters for this
+  request expressed as key/value pairs. See
+  [Parameters](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/required_api.md#parameters)
+  for more information.
+
+The load API accepts the following parameters:
+
+- "config" : string parameter that contains a JSON representation of the model
+configuration, which must be able to be parsed into [ModelConfig message from
+model_config.proto](https://github.com/triton-inference-server/common/blob/main/protobuf/model_config.proto).
+This config will be used for loading the model instead of the one in
+the model directory. If config is provided, the (re-)load will be triggered as
+the model metadata has been updated, and the same (re-)load behavior will be
+applied.
+
+- "file:\<version\>/\<file-name\>" : The serialized model file, base64 encoded.
+This convention will be used to specify the override model directory to load
+the model from. For instance, if the user wants to specify a model directory
+that contains an ONNX model as version 2, then the user will specify the
+parameter to "file:2/model.onnx" : "\<base64-encoded-file-content\>". Note that
+"config" parameter must be provided to serve as the model configuration of the
+override model directory.
+
+A failed load request must be indicated by an HTTP error status
+(typically 400). The HTTP body must contain the
+`$repository_load_error_response` object.
+
+```
+$repository_load_error_response =
+{
+  "error": $string
+}
+```
+- “error” : The descriptive message for the error.
+
+#### Examples
+
+For the following request, Triton will load the model "mymodel" with provided
+model configuration and model file.
+
+```
+POST /v2/repository/models/mymodel/load HTTP/1.1
+Host: localhost:8000
+{
+  "parameters": {
+    "config": "{
+      "name": "mymodel",
+      "backend": "onnxruntime",
+      "inputs": [{
+          "name": "INPUT0",
+          "datatype": "FP32",
+          "shape": [ 1 ]
+        }
+      ],
+      "outputs": [{
+          "name": "OUTPUT0",
+          "datatype": "FP32",
+          "shape": [ 1 ]
+        }
+      ]
+    }",
+
+    "file:1/model.onnx" : "<base64-encoded-file-content>"
+  }
+}
+```
+
+### Unload
+
+The unload API requests that a model be unloaded from Triton. An
+unload request is made with an HTTP POST to an unload endpoint. The
+HTTP body may be empty or may contain the unload request object,
+identified as `$repository_unload_request`. A successful unload request
+is indicated by a 200 HTTP status.
+
+```
+$repository_unload_request =
+{
+  "parameters" : $parameters #optional
+}
+```
+
+- "parameters" : An object containing zero or more parameters for this
+  request expressed as key/value pairs. See
+  [Parameters](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/required_api.md#parameters)
+  for more information.
+
+The unload API accepts the following parameters:
+
+- "unload_dependents" : boolean parameter indicating that in addition
+  to unloading the requested model, also unload any dependent model
+  that was loaded along with the requested model. For example, request to
+  unload the models composing an ensemble will unload the ensemble as well.
+
+A failed unload request must be indicated by an HTTP error status
+(typically 400). The HTTP body must contain the
+`$repository_unload_error_response` object.
+
+```
+$repository_unload_error_response =
+{
+  "error": $string
+}
+```
+
+- “error” : The descriptive message for the error.
+
+## GRPC
+
+The model-repository extension requires the following API:
+
+```
+service GRPCInferenceService
+{
+  …
+
+  // Get the index of model repository contents.
+  rpc RepositoryIndex(RepositoryIndexRequest)
+          returns (RepositoryIndexResponse) {}
+
+  // Load or reload a model from a repository.
+  rpc RepositoryModelLoad(RepositoryModeLoadRequest)
+          returns (RepositoryModelLoadResponse) {}
+
+  // Unload a model.
+  rpc RepositoryModelUnload(RepositoryModelUnloadRequest)
+          returns (RepositoryModelUnloadResponse) {}
+}
+
+message ModelRepositoryParameter
+{
+  // The parameter value can be a string, an int64, a boolean
+  // or a message specific to a predefined parameter.
+  oneof parameter_choice
+  {
+    // A boolean parameter value.
+    bool bool_param = 1;
+
+    // An int64 parameter value.
+    int64 int64_param = 2;
+
+    // A string parameter value.
+    string string_param = 3;
+
+    // A bytes parameter value.
+    bytes bytes_param = 4;
+  }
+}
+```
+
+### Index
+
+The RepositoryIndex API returns information about every model
+available in a model repository, even if it is not currently loaded
+into Triton. Errors are indicated by the google.rpc.Status returned
+for the request. The OK code indicates success and other codes
+indicate failure. The request and response messages for
+RepositoryIndex are:
+
+```
+message RepositoryIndexRequest
+{
+  // The name of the repository. If empty the index is returned
+  // for all repositories.
+  string repository_name = 1;
+
+  // If true return only models currently ready for inferencing.
+  bool ready = 2;
+}
+
+message RepositoryIndexResponse
+{
+  // Index entry for a model.
+  message ModelIndex {
+    // The name of the model.
+    string name = 1;
+
+    // The version of the model.
+    string version = 2;
+
+    // The state of the model.
+    string state = 3;
+
+    // The reason, if any, that the model is in the given state.
+    string reason = 4;
+  }
+
+  // An index entry for each model.
+  repeated ModelIndex models = 1;
+}
+```
+
+### Load
+
+The RepositoryModelLoad API requests that a model be loaded into
+Triton, or reloaded if the model is already loaded. Errors are
+indicated by the google.rpc.Status returned for the request. The OK
+code indicates success and other codes indicate failure. The request
+and response messages for RepositoryModelLoad are:
+
+```
+message RepositoryModelLoadRequest
+{
+  // The name of the repository to load from. If empty the model
+  // is loaded from any repository.
+  string repository_name = 1;
+
+  // The name of the model to load, or reload.
+  string model_name = 2;
+
+  // Optional parameters.
+  map<string, ModelRepositoryParameter> parameters = 3;
+}
+
+message RepositoryModelLoadResponse
+{
+}
+```
+
+The RepositoryModelLoad API accepts the following parameters:
+
+- "config" : string parameter that contains a JSON representation of the model
+configuration, which must be able to be parsed into [ModelConfig message from
+model_config.proto](https://github.com/triton-inference-server/common/blob/main/protobuf/model_config.proto).
+This config will be used for loading the model instead of the one in
+the model directory. If config is provided, the (re-)load will be triggered as
+the model metadata has been updated, and the same (re-)load behavior will be
+applied.
+
+- "file:\<version\>/\<file-name\>" : bytes parameter that contains the model
+file content. This convention will be used to specify the override model
+directory to load the model from. For instance, if the user wants to specify a
+model directory that contains an ONNX model as version 2, then the user will
+specify the parameter to "file:2/model.onnx" : "\<file-content\>". Note that
+"config" parameter must be provided to serve as the model configuration of the
+override model directory.
+
+### Unload
+
+The RepositoryModelUnload API requests that a model be unloaded from
+Triton. Errors are indicated by the google.rpc.Status returned for the
+request. The OK code indicates success and other codes indicate
+failure. The request and response messages for RepositoryModelUnload
+are:
+
+```
+message RepositoryModelUnloadRequest
+{
+  // The name of the repository from which the model was originally
+  // loaded. If empty the repository is not considered.
+  string repository_name = 1;
+
+  // The name of the model to unload.
+  string model_name = 2;
+
+  // Optional parameters.
+  map<string, ModelRepositoryParameter> parameters = 3;
+}
+
+message RepositoryModelUnloadResponse
+{
+}
+```
+
+The RepositoryModelUnload API accepts the following parameters:
+
+- "unload_dependents" : boolean parameter indicating that in addition
+  to unloading the requested model, also unload any dependent model
+  that was loaded along with the requested model. For example, request to
+  unload the models composing an ensemble will unload the ensemble as well.
\ No newline at end of file
diff --git a/docs/protocol/extension_parameters.md b/docs/protocol/extension_parameters.md
new file mode 100644
index 0000000000..14ed4d1dc5
--- /dev/null
+++ b/docs/protocol/extension_parameters.md
@@ -0,0 +1,110 @@
+<!--
+# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Parameters Extension
+
+This document describes Triton's parameters extension. The
+parameters extension allows an inference request to provide
+custom parameters that cannot be provided as inputs. Because this extension is
+supported, Triton reports “parameters” in the extensions field of its
+Server Metadata. This extension uses the optional "parameters"
+field in the KServe Protocol in
+[HTTP](https://kserve.github.io/website/0.10/modelserving/data_plane/v2_protocol/#inference-request-json-object)
+and
+[GRPC](https://kserve.github.io/website/0.10/modelserving/data_plane/v2_protocol/#parameters).
+
+The following parameters are reserved for Triton's usage and should not be
+used as custom parameters:
+
+- sequence_id
+- priority
+- timeout
+- sequence_start
+- sequence_end
+- headers
+- All the keys that start with `"triton_"` prefix. Some examples used today:
+  - `"triton_enable_empty_final_response"` request parameter
+  - `"triton_final_response"` response parameter
+
+When using both GRPC and HTTP endpoints, you need to make sure to not use
+the reserved parameters list to avoid unexpected behavior. The reserved
+parameters are not accessible in the Triton C-API.
+
+## HTTP/REST
+
+The following example shows how a request can include custom parameters.
+
+```
+POST /v2/models/mymodel/infer HTTP/1.1
+Host: localhost:8000
+Content-Type: application/json
+Content-Length: <xx>
+{
+  "parameters" : { "my_custom_parameter" : 42 }
+  "inputs" : [
+    {
+      "name" : "input0",
+      "shape" : [ 2, 2 ],
+      "datatype" : "UINT32",
+      "data" : [ 1, 2, 3, 4 ]
+    }
+  ],
+  "outputs" : [
+    {
+      "name" : "output0",
+    }
+  ]
+}
+```
+
+## GRPC
+
+The `parameters` field in the
+ModelInferRequest message can be used to send custom parameters.
+
+## Forwarding HTTP/GRPC Headers as Parameters
+
+Triton can forward HTTP/GRPC headers as inference request parameters. By
+specifying a regular expression in `--http-header-forward-pattern` and
+`--grpc-header-forward-pattern`,
+Triton will add the headers that match with the regular expression as request
+parameters. All the forwarded headers will be added as a parameter with string
+value. For example to forward all the headers that start with 'PREFIX_' from
+both HTTP and GRPC, you should add `--http-header-forward-pattern PREFIX_.*
+--grpc-header-forward-pattern PREFIX_.*` to your `tritonserver` command.
+
+By default, the regular expression pattern matches headers with case-insensitive
+mode according to the HTTP protocol. If you want to enforce case-sensitive mode,
+simplying adding the `(?-i)` prefix which turns off case-insensitive mode, e.g.
+`--http-header-forward-pattern (?-i)PREFIX_.*`. Note, headers sent through the
+Python HTTP client may be automatically lower-cased by internal client libraries.
+
+The forwarded headers can be accessed using the
+[Python](https://github.com/triton-inference-server/python_backend#inference-request-parameters)
+or C Backend APIs as inference request parameters.
+
diff --git a/docs/protocol/extension_schedule_policy.md b/docs/protocol/extension_schedule_policy.md
new file mode 100644
index 0000000000..c3c57a63c7
--- /dev/null
+++ b/docs/protocol/extension_schedule_policy.md
@@ -0,0 +1,81 @@
+<!--
+# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Schedule Policy Extension
+
+This document describes Triton's schedule policy extension. The
+schedule-policy extension allows an inference request to provide
+parameters that influence how Triton handles and schedules the
+request. Because this extension is supported, Triton reports
+“schedule_policy” in the extensions field of its Server Metadata.
+Note the policies are specific to [dynamic
+batcher](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/model_configuration.md#dynamic-batcher)
+and only experimental support to [sequence
+batcher](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/model_configuration.md#sequence-batcher)
+with the [direct](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/architecture.md#direct)
+scheduling strategy.
+
+## Dynamic Batcher
+
+The schedule-policy extension uses request parameters to indicate the
+policy. The parameters and their type are:
+
+- "priority" : int64 value indicating the priority of the
+  request. Priority value zero indicates that the default priority
+  level should be used (i.e. same behavior as not specifying the
+  priority parameter). Lower value priorities indicate higher priority
+  levels. Thus the highest priority level is indicated by setting the
+  parameter to 1, the next highest is 2, etc.
+
+- "timeout" : int64 value indicating the timeout value for the
+  request, in microseconds. If the request cannot be completed within
+  the time Triton will take a model-specific action such as
+  terminating the request.
+
+Both parameters are optional and, if not specified, Triton will handle
+the request using the default priority and timeout values appropriate
+for the model.
+
+## Sequence Batcher with Direct Scheduling Strategy
+
+**Note that the schedule policy for sequence batcher is at experimental stage
+and it is subject to change.**
+
+The schedule-policy extension uses request parameters to indicate the
+policy. The parameters and their type are:
+
+- "timeout" : int64 value indicating the timeout value for the
+  request, in microseconds. If the request cannot be completed within
+  the time Triton will terminate the request, as well as the corresponding
+  sequence and received requests of the sequence. The timeout will only be
+  applied to requests of the sequences that haven't been allocated a batch slot
+  for execution, the requests of the sequences that have been allocated batch
+  slots will not be affected by the timeout setting.
+
+The parameter is optional and, if not specified, Triton will handle
+the request and corresponding sequence based on the model configuration.
\ No newline at end of file
diff --git a/docs/protocol/extension_sequence.md b/docs/protocol/extension_sequence.md
new file mode 100644
index 0000000000..3836d06fce
--- /dev/null
+++ b/docs/protocol/extension_sequence.md
@@ -0,0 +1,147 @@
+<!--
+# Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Sequence Extension
+
+This document describes Triton's sequence extension. The sequence
+extension allows Triton to support stateful models that expect a
+sequence of related inference requests.
+
+An inference request can specify that it is part of a sequence using
+the “sequence_id” parameter in the request and by using the
+“sequence_start” and “sequence_end” parameters to indicate the start
+and end of sequences.
+
+Because this extension is supported, Triton reports "sequence"
+in the extensions field of its Server Metadata. Triton may additionally
+report "sequence(string_id)" in the extensions field of the Server Metadata
+if the "sequence_id" parameter supports string types.
+
+- "sequence_id" : a string or uint64 value that identifies the sequence to which
+  a request belongs. All inference requests that belong to the same sequence
+  must use the same sequence ID. A sequence ID of 0 or "" indicates the
+  inference request is not part of a sequence.
+
+- "sequence_start" : boolean value if set to true in a request
+  indicates that the request is the first in a sequence. If not set,
+  or set to false the request is not the first in a sequence. If set
+  the "sequence_id" parameter must be set to a non-zero or non-empty string
+  value.
+
+- "sequence_end" : boolean value if set to true in a request indicates
+  that the request is the last in a sequence. If not set, or set to
+  false the request is not the last in a sequence. If set the
+  "sequence_id" parameter must be set to a non-zero or non-empty string
+  value.
+
+## HTTP/REST
+
+The following example shows how a request is marked as part of a
+sequence. In this case the sequence_start and sequence_end parameters
+are not used which means that this request is neither the start nor
+end of the sequence.
+
+```
+POST /v2/models/mymodel/infer HTTP/1.1
+Host: localhost:8000
+Content-Type: application/json
+Content-Length: <xx>
+{
+  "parameters" : { "sequence_id" : 42 }
+  "inputs" : [
+    {
+      "name" : "input0",
+      "shape" : [ 2, 2 ],
+      "datatype" : "UINT32",
+      "data" : [ 1, 2, 3, 4 ]
+    }
+  ],
+  "outputs" : [
+    {
+      "name" : "output0",
+    }
+  ]
+}
+```
+
+The example below uses a v4 UUID string as the value for the "sequence_id"
+parameter.
+
+```
+POST /v2/models/mymodel/infer HTTP/1.1
+Host: localhost:8000
+Content-Type: application/json
+Content-Length: <xx>
+{
+  "parameters" : { "sequence_id" : "e333c95a-07fc-42d2-ab16-033b1a566ed5" }
+  "inputs" : [
+    {
+      "name" : "input0",
+      "shape" : [ 2, 2 ],
+      "datatype" : "UINT32",
+      "data" : [ 1, 2, 3, 4 ]
+    }
+  ],
+  "outputs" : [
+    {
+      "name" : "output0",
+    }
+  ]
+}
+```
+
+## GRPC
+
+In addition to supporting the sequence parameters described above, the
+GRPC API adds a streaming version of the inference API to allow a
+sequence of inference requests to be sent over the same GRPC
+stream. This streaming API is not required to be used for requests
+that specify a sequence_id and may be used by requests that do not
+specify a sequence_id. The ModelInferRequest is the same as for the
+ModelInfer API.  The ModelStreamInferResponse message is shown below.
+
+```
+service GRPCInferenceService
+{
+  …
+
+  // Perform inference using a specific model with GRPC streaming.
+  rpc ModelStreamInfer(stream ModelInferRequest) returns (stream ModelStreamInferResponse) {}
+}
+
+// Response message for ModelStreamInfer.
+message ModelStreamInferResponse
+{
+  // The message describing the error. The empty message
+  // indicates the inference was successful without errors.
+  String error_message = 1;
+
+  // Holds the results of the request.
+  ModelInferResponse infer_response = 2;
+}
+```
diff --git a/docs/protocol/extension_shared_memory.md b/docs/protocol/extension_shared_memory.md
new file mode 100644
index 0000000000..f10b3ee7f6
--- /dev/null
+++ b/docs/protocol/extension_shared_memory.md
@@ -0,0 +1,590 @@
+<!--
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Shared-Memory Extension
+
+This document describes Triton's shared-memory extensions.  The
+shared-memory extensions allow a client to communicate input and
+output tensors by system or CUDA shared memory. Using shared memory
+instead of sending the tensor data over the GRPC or REST interface can
+provide significant performance improvement for some use cases.
+Because both of these extensions are supported, Triton reports
+“system_shared_memory” and "cuda_shared_memory" in the extensions
+field of its Server Metadata.
+
+The shared-memory extensions use a common set of parameters to
+indicate that an input or output tensor is communicated via shared
+memory. These parameters and their type are:
+
+- "shared_memory_region" : string value is the name of a previously
+  registered shared memory region. Region names share a namespace for
+  system-shared-memory regions and CUDA-shared-memory regions.
+
+- "shared_memory_offset" : size_t value is the offset, in bytes, into
+  the region where the data for the tensor starts.
+
+- "shared_memory_byte_size" : size_t value is the size, in bytes, of
+  the data.
+
+The “shared_memory_offset” parameter is optional and defaults to
+zero. The other two parameters are required. If only one of the two is
+given Triton will return an error.
+
+Note that there is no Windows support for shared memory yet. Jetson only
+supports system shared memory.
+
+## HTTP/REST
+
+In all JSON schemas shown in this document `$number`, `$string`, `$boolean`,
+`$object` and `$array` refer to the fundamental JSON types. #optional
+indicates an optional JSON field.
+
+The shared-memory parameters may be used in the `$request_input`
+parameters to indicate that the corresponding input is being
+communicated via shared memory. The parameters may be used in the
+`$request_output` parameters to indicate that the requested output
+should be communicated via shared memory.
+
+When these parameters are set for an input tensor the “data” field of
+`$request_input` must not be set. If the “data” field is set Triton will
+return an error. When these parameters are set for a requested output
+tensor the returned `$response_output` must not define the “data” field.
+
+Shared memory regions must be created by the client and then
+registered with Triton before they can be referenced with a
+“shared_memory_region” parameter. The system and CUDA shared-memory
+extensions each require a different set of APIs for registering a
+shared memory region.
+
+### System Shared Memory
+
+The system shared memory extension requires Status, Register and
+Unregister APIs.
+
+Triton exposes the following URL to register and unregister system
+shared memory regions.
+
+```
+GET v2/systemsharedmemory[/region/${REGION_NAME}]/status
+
+POST v2/systemsharedmemory/region/${REGION_NAME}/register
+
+POST v2/systemsharedmemory[/region/${REGION_NAME}]/unregister
+```
+
+#### Status
+
+A system-shared-memory status request is made with an HTTP GET to the
+status endpoint. In the corresponding response the HTTP body contains
+the response JSON. If REGION_NAME is provided in the URL the response
+includes the status for the corresponding region. If REGION_NAME is
+not provided in the URL the response includes the status for all
+registered regions.
+
+A successful status request is indicated by a 200 HTTP status
+code. The response object, identified as
+`$system_shared_memory_status_response`, is returned in the HTTP body
+for every successful request.
+
+```
+$system_shared_memory_status_response =
+[
+  {
+    "name" : $string,
+    "key" : $string,
+    "offset" : $number,
+    "byte_size" : $number
+  },
+  …
+]
+```
+
+- “name” : The name of the shared-memory region.
+
+- “key” : The key of the underlying memory object that contains the
+  shared memory region.
+
+- “offset” : The offset, in bytes, within the underlying memory object
+  to the start of the shared memory region.
+
+- “byte_size” : The size of the shared memory region, in bytes.
+
+A failed status request must be indicated by an HTTP error status
+(typically 400). The HTTP body must contain the
+`$system_shared_memory_status_error_response` object.
+
+```
+$system_shared_memory_status_error_response =
+{
+  "error": $string
+}
+```
+
+- “error” : The descriptive message for the error.
+
+#### Register
+
+A system-shared-memory register request is made with a HTTP POST to
+the register endpoint. In the corresponding response the HTTP body
+contains the response JSON. A successful register request is indicated
+by a 200 HTTP status code.
+
+The request object, identified as
+`$system_shared_memory_register_request` must be provided in the HTTP
+body.
+
+```
+$system_shared_memory_register_request =
+{
+  "key" : $string,
+  "offset" : $number,
+  "byte_size" : $number
+}
+```
+
+- “key” : The key of the underlying memory object that contains the
+  shared memory region.
+
+- “offset” : The offset, in bytes, within the underlying memory object
+  to the start of the shared memory region.
+
+- “byte_size” : The size of the shared memory region, in bytes.
+
+A failed register request must be indicated by an HTTP error status
+(typically 400). The HTTP body must contain the
+`$system_shared_memory_register_error_response` object.
+
+```
+$system_shared_memory_register_error_response =
+{
+  "error": $string
+}
+```
+- “error” : The descriptive message for the error.
+
+#### Unregister
+
+A system-shared-memory unregister request is made with an HTTP POST to
+an unregister endpoint. In the request the HTTP body must be empty.
+
+A successful register request is indicated by a 200 HTTP status.  If
+REGION_NAME is provided in the URL the single region is
+unregistered. If REGION_NAME is not provided in the URL all regions
+are unregisered.
+
+A failed unregister request must be indicated by an HTTP error status
+(typically 400). The HTTP body must contain the
+`$system_shared_memory_unregister_error_response` object.
+
+```
+$system_shared_memory_unregister_error_response =
+{
+  "error": $string
+}
+```
+
+- “error” : The descriptive message for the error.
+
+### CUDA Shared Memory
+
+The CUDA shared memory extension requires Status, Register and
+Unregister APIs.
+
+Triton exposes the following URL to register and unregister system
+shared memory regions.
+
+```
+GET v2/cudasharedmemory[/region/${REGION_NAME}]/status
+
+POST v2/cudasharedmemory/region/${REGION_NAME}/register
+
+POST v2/cudasharedmemory[/region/${REGION_NAME}]/unregister
+```
+
+#### Status
+
+A CUDA-shared-memory status request is made with an HTTP GET to the
+status endpoint. In the corresponding response the HTTP body contains
+the response JSON. If REGION_NAME is provided in the URL the response
+includes the status for the corresponding region. If REGION_NAME is
+not provided in the URL the response includes the status for all
+registered regions.
+
+A successful status request is indicated by a 200 HTTP status
+code. The response object, identified as
+`$cuda_shared_memory_status_response`, is returned in the HTTP body
+for every successful request.
+
+```
+$cuda_shared_memory_status_response =
+[
+  {
+    "name" : $string,
+    "device_id" : $number,
+    "byte_size" : $number
+  },
+  …
+]
+```
+
+- “name” : The name of the shared memory region.
+
+- “device_id” : The GPU device ID where the cudaIPC handle was
+  created.
+
+- “byte_size” : The size of the shared memory region, in bytes.
+
+A failed status request must be indicated by an HTTP error status
+(typically 400). The HTTP body must contain the
+`$cuda_shared_memory_status_error_response` object.
+
+```
+$cuda_shared_memory_status_error_response =
+{
+  "error": $string
+}
+```
+
+- “error” : The descriptive message for the error.
+
+#### Register
+
+A CUDA-shared-memory register request is made with a HTTP POST to
+the register endpoint. In the corresponding response the HTTP body
+contains the response JSON. A successful register request is indicated
+by a 200 HTTP status code.
+
+The request object, identified as
+`$cuda_shared_memory_register_request` must be provided in the HTTP
+body.
+
+```
+$cuda_shared_memory_register_request =
+{
+  "raw_handle" : { "b64" : $string },
+  "device_id" : $number,
+  "byte_size" : $number
+}
+```
+
+- “raw_handle” : The serialized cudaIPC handle, base64 encoded.
+
+- “device_id” : The GPU device ID where the cudaIPC handle was
+  created.
+
+- “byte_size” : The size of the shared memory region, in bytes.
+
+A failed register request must be indicated by an HTTP error status
+(typically 400). The HTTP body must contain the
+`$cuda_shared_memory_register_error_response` object.
+
+```
+$cuda_shared_memory_register_error_response =
+{
+  "error": $string
+}
+```
+
+- “error” : The descriptive message for the error.
+
+#### Unregister
+
+A CUDA-shared-memory unregister request is made with an HTTP POST to
+an unregister endpoint. In the request the HTTP body must be empty.
+
+A successful register request is indicated by a 200 HTTP status.  If
+REGION_NAME is provided in the URL the single region is
+unregistered. If REGION_NAME is not provided in the URL all regions
+are unregisered.
+
+A failed unregister request must be indicated by an HTTP error status
+(typically 400). The HTTP body must contain the
+`$cuda_shared_memory_unregister_error_response` object.
+
+```
+$cuda_shared_memory_unregister_error_response =
+{
+  "error": $string
+}
+```
+
+- “error” : The descriptive message for the error.
+
+## GRPC
+
+The shared-memory parameters may be used in the
+ModelInferRequest::InferInputTensor message to indicate that the
+corresponding input is being communicated via shared memory. The
+parameters may be used in the
+ModelInferRequest::InferRequestedOutputTensor message to indicate that
+the requested output should be communicated via shared memory.
+
+When these parameters are set for an input tensor the “contents” field
+of ModelInferRequest::InferInputTensor must not be set. If the
+“contents” field is set Triton will return an error.. When these
+parameters are set for a requested output tensor the “contents” field
+of the ModelInferResponse::InferOutputTensor will not be set in the
+inference response.
+
+Shared memory regions must be created by the client and then
+registered with Triton before they can be referenced with a
+“shared_memory_region” parameter. The system and CUDA shared-memory
+extensions each require a different set of APIs. For all APIs, errors
+are indicated by the google.rpc.Status returned for the request. The
+OK code indicates success and other codes indicate failure.
+
+### System Shared Memory
+
+The system shared memory extension requires the following API:
+
+```
+service GRPCInferenceService
+{
+  …
+
+  // Get the status of all registered system-shared-memory regions.
+  rpc SystemSharedMemoryStatus(SystemSharedMemoryStatusRequest)
+          returns (SystemSharedMemoryStatusResponse) {}
+
+  // Register system-shared-memory region.
+  rpc SystemSharedMemoryRegister(SystemSharedMemoryRegisterRequest)
+          returns (SystemSharedMemoryRegisterResponse) {}
+
+  // Unregister system-shared-memory region.
+  rpc SystemSharedMemoryUnregister(SystemSharedMemoryUnregisterRequest)
+          returns (SystemSharedMemoryUnregisterResponse) {}
+}
+```
+
+#### Status
+
+The system-shared-memory status API provides information about
+registered system shared-memory regions. Errors are indicated by the
+google.rpc.Status returned for the request. The OK code indicates
+success and other codes indicate failure. The request and response
+messages for SystemSharedMemoryStatus are:
+
+```
+message SystemSharedMemoryStatusRequest
+{
+  // The name of the region to get status for. If empty the
+  // status is returned for all registered regions.
+  string name = 1;
+}
+
+message SystemSharedMemoryStatusResponse
+{
+  // Status for a shared memory region.
+  message RegionStatus {
+    // The name for the shared memory region.
+    string name = 1;
+
+    // The key of the underlying memory object that contains the
+    // shared memory region.
+    string key = 2;
+
+    // Offset, in bytes, within the underlying memory object to
+    // the start of the shared memory region.
+    uint64 offset = 3;
+
+    // Size of the shared memory region, in bytes.
+    uint64 byte_size = 4;
+  }
+
+  // Status for each of the registered regions, indexed by region name.
+  map<string, RegionStatus> regions = 1;
+}
+```
+
+#### Register
+
+The system-shared-memory register API is used to register a new
+shared-memory region with Triton. After a region is registered it can
+be used in the “shared_memory_region” parameter for an input or output
+tensor. Errors are indicated by the google.rpc.Status returned for the
+request. The OK code indicates success and other codes indicate
+failure. The request and response messages for
+SystemSharedMemoryRegister are:
+
+```
+message SystemSharedMemoryRegisterRequest
+{
+  // The name of the region to register.
+  string name = 1;
+
+  // The key of the underlying memory object that contains the
+  // shared memory region.
+  string key = 2;
+
+  // Offset, in bytes, within the underlying memory object to
+  // the start of the shared memory region.
+  uint64 offset = 3;
+
+  // Size of the shared memory region, in bytes.
+  uint64 byte_size = 4;
+}
+
+message SystemSharedMemoryRegisterResponse
+{
+}
+```
+
+#### Unregister
+
+The system-shared-memory unregister API provides unregisters a
+shared-memory region from Triton. After a region is
+unregistered it can no longer be used to communicate input and output
+tensor contents. Errors are indicated by the google.rpc.Status
+returned for the request. The OK code indicates success and other
+codes indicate failure. The request and response messages for
+SystemSharedMemoryStatus are:
+
+```
+message SystemSharedMemoryUnregisterRequest
+{
+  // The name of the region to unregister. If empty all system shared-memory
+  // regions are unregistered.
+  string name = 1;
+}
+
+message SystemSharedMemoryUnregisterResponse
+{
+}
+```
+
+### CUDA Shared Memory
+
+The CUDA shared memory extension requires the following API:
+
+```
+service GRPCInferenceService
+{
+  …
+
+  // Get the status of all registered CUDA-shared-memory regions.
+  rpc CudaSharedMemoryStatus(CudaSharedMemoryStatusRequest)
+          returns (CudaSharedMemoryStatusResponse) {}
+
+  // Register CUDA-shared-memory region.
+  rpc CudaSharedMemoryRegister(CudaSharedMemoryRegisterRequest)
+          returns (CudaSharedMemoryRegisterResponse) {}
+
+  // Unregister CUDA-shared-memory region.
+  rpc CudaSharedMemorUnregister(CudaSharedMemoryUnregisterRequest)
+          returns (CudaSharedMemoryUnregisterResponse) {}
+}
+```
+
+#### Status
+
+The CUDA-shared-memory status API provides information about
+registered CUDA shared-memory regions. Errors are indicated by the
+google.rpc.Status returned for the request. The OK code indicates
+success and other codes indicate failure. The request and response
+messages for CudaSharedMemoryStatus are:
+
+```
+message CudaSharedMemoryStatusRequest
+{
+  // The name of the region to get status for. If empty the
+  // status is returned for all registered regions.
+  string name = 1;
+}
+
+message CudaSharedMemoryStatusResponse
+{
+  // Status for a shared memory region.
+  message RegionStatus {
+    // The name for the shared memory region.
+    string name = 1;
+
+    // The GPU device ID where the cudaIPC handle was created.
+    uint64 device_id = 2;
+
+    // Size of the shared memory region, in bytes.
+    uint64 byte_size = 3;
+  }
+
+  // Status for each of the registered regions, indexed by region name.
+  map<string, RegionStatus> regions = 1;
+}
+```
+
+#### Register
+
+The CUDA-shared-memory register API is used to register a new
+shared-memory region with Triton. After a region is
+registered it can be used in the “shared_memory_region” parameter for
+an input or output tensor. Errors are indicated by the
+google.rpc.Status returned for the request. The OK code indicates
+success and other codes indicate failure. The request and response
+messages for CudaSharedMemoryRegister are:
+
+```
+message CudaSharedMemoryRegisterRequest
+{
+  // The name of the region to register.
+  string name = 1;
+
+  // The raw serialized cudaIPC handle.
+  bytes raw_handle = 2;
+
+  // The GPU device ID on which the cudaIPC handle was created.
+  int64 device_id = 3;
+
+  // Size of the shared memory region, in bytes.
+  uint64 byte_size = 4;
+}
+
+message CudaSharedMemoryRegisterResponse
+{
+}
+```
+
+#### Unregister
+
+The CUDA-shared-memory unregister API provides unregisters a
+shared-memory region from Triton. After a region is unregistered it
+can no longer be used to communicate input and output tensor
+contents. Errors are indicated by the google.rpc.Status returned for
+the request. The OK code indicates success and other codes indicate
+failure. The request and response messages for CudaSharedMemoryStatus
+are:
+
+```
+message CudaSharedMemoryUnregisterRequest
+{
+  // The name of the region to unregister. If empty all CUDA shared-memory
+  // regions are unregistered.
+  string name = 1;
+}
+
+message CudaSharedMemoryUnregisterResponse
+{
+}
+```
diff --git a/docs/protocol/extension_statistics.md b/docs/protocol/extension_statistics.md
new file mode 100644
index 0000000000..4ff956b60a
--- /dev/null
+++ b/docs/protocol/extension_statistics.md
@@ -0,0 +1,526 @@
+<!--
+# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Statistics Extension
+
+This document describes Triton's statistics extension. The statistics
+extension enables the reporting of per-model (per-version) statistics
+which provide aggregate information about all activity occurring for a
+specific model (version) since Triton started. Because this extension
+is supported, Triton reports “statistics” in the extensions field of
+its Server Metadata.
+
+## HTTP/REST
+
+In all JSON schemas shown in this document `$number`, `$string`, `$boolean`,
+`$object` and `$array` refer to the fundamental JSON types. #optional
+indicates an optional JSON field.
+
+Triton exposes the statistics endpoint at the following URL. The
+specific model name portion of the URL is optional; if not provided
+Triton will return the statistics for all versions of all models. If a
+specific model is given in the URL the versions portion of the URL is
+optional; if not provided Triton will return statistics for all
+versions of the specified model.
+
+```
+GET v2/models[/${MODEL_NAME}[/versions/${MODEL_VERSION}]]/stats
+```
+
+### Statistics Response JSON Object
+
+A successful statistics request is indicated by a 200 HTTP status
+code. The response object, identified as `$stats_model_response`, is
+returned in the HTTP body for every successful statistics request.
+
+```
+$stats_model_response =
+{
+  "model_stats" : [ $model_stat, ... ]
+}
+```
+
+Each `$model_stat` object gives the statistics for a specific model and
+version. The `$version` field is optional for servers that do not
+support versions.
+
+```
+$model_stat =
+{
+  "name" : $string,
+  "version" : $string #optional,
+  "last_inference" : $number,
+  "inference_count" : $number,
+  "execution_count" : $number,
+  "inference_stats" : $inference_stats,
+  "response_stats" : { $string : $response_stats, ... },
+  "batch_stats" : [ $batch_stats, ... ],
+  "memory_usage" : [ $memory_usage, ...]
+}
+```
+
+- "name" : The name of the model.
+
+- "version" : The version of the model.
+
+- "last_inference" : The timestamp of the last inference request made
+  for this model, as milliseconds since the epoch.
+
+- "inference_count" : The cumulative count of successful inference
+  requests made for this model. Each inference in a batched request is
+  counted as an individual inference. For example, if a client sends a
+  single inference request with batch size 64, "inference_count" will
+  be incremented by 64. Similarly, if a clients sends 64 individual
+  requests each with batch size 1, "inference_count" will be
+  incremented by 64. The "inference_count" value DOES NOT include cache hits.
+
+- "execution_count" : The cumulative count of the number of successful
+  inference executions performed for the model. When dynamic batching
+  is enabled, a single model execution can perform inferencing for
+  more than one inference request. For example, if a clients sends 64
+  individual requests each with batch size 1 and the dynamic batcher
+  batches them into a single large batch for model execution then
+  "execution_count" will be incremented by 1. If, on the other hand,
+  the dynamic batcher is not enabled for that each of the 64
+  individual requests is executed independently, then
+  "execution_count" will be incremented by 64. The "execution_count" value
+  DOES NOT include cache hits.
+
+- "inference_stats" : The aggregate statistics for the
+  model. So, for example, "inference_stats":"success" indicates the number of
+  successful inference requests for the model.
+
+- "response_stats" : The aggregate response statistics for the model. For
+  example, { "key" : { "response_stats" : "success" } } indicates the aggregate
+  statistics of successful responses at "key" for the model, where "key"
+  identifies each response generated by the model across different requests. For
+  example, given a model that generates three responses, the keys can be "0",
+  "1" and "2" identifying the three responses in order.
+
+- "batch_stats" : The aggregate statistics for each different batch
+  size that is executed in the model. The batch statistics indicate
+  how many actual model executions were performed and show differences
+  due to different batch size (for example, larger batches typically
+  take longer to compute).
+
+- "memory_usage" : The memory usage detected during model loading, which may be
+  used to estimate the memory to be released once the model is unloaded. Note
+  that the estimation is inferenced by the profiling tools and framework's
+  memory schema, therefore it is advised to perform experiments to understand
+  the scenario that the reported memory usage can be relied on. As a starting
+  point, the GPU memory usage for models in ONNX Runtime backend and TensorRT
+  backend is usually aligned.
+
+```
+$inference_stats =
+{
+  "success" : $duration_stat,
+  "fail" : $duration_stat,
+  "queue" : $duration_stat,
+  "compute_input" : $duration_stat,
+  "compute_infer" : $duration_stat,
+  "compute_output" : $duration_stat,
+  "cache_hit": $duration_stat,
+  "cache_miss": $duration_stat
+}
+```
+
+- “success” : The count and cumulative duration for all successful
+  inference requests. The "success" count and cumulative duration includes
+  cache hits.
+
+- “fail” : The count and cumulative duration for all failed inference
+  requests.
+
+- “queue” : The count and cumulative duration that inference requests
+  wait in scheduling or other queues. The "queue" count and cumulative
+  duration includes cache hits.
+
+- “compute_input” : The count and cumulative duration to prepare input
+  tensor data as required by the model framework / backend. For
+  example, this duration should include the time to copy input tensor
+  data to the GPU. The "compute_input" count and cumulative duration DO NOT
+  include cache hits.
+
+- “compute_infer” : The count and cumulative duration to execute the
+  model. The "compute_infer" count and cumulative duration DO NOT include
+  cache hits.
+
+- “compute_output” : The count and cumulative duration to extract
+  output tensor data produced by the model framework / backend. For
+  example, this duration should include the time to copy output tensor
+  data from the GPU. The "compute_output" count and cumulative duration DO NOT
+  include cache hits.
+
+- "cache_hit" : The count of response cache hits and cumulative duration to
+  lookup and extract output tensor data from the Response Cache on a cache hit.
+  For example, this duration should include the time to copy output tensor data
+  from the Response Cache to the response object.
+
+- "cache_miss" : The count of response cache misses and cumulative duration to
+  lookup and insert output tensor data to the Response Cache on a cache miss.
+  For example, this duration should include the time to copy output tensor data
+  from the response object to the Response Cache.
+
+
+```
+$response_stats =
+{
+  "compute_infer" : $duration_stat,
+  "compute_output" : $duration_stat,
+  "success" : $duration_stat,
+  "fail" : $duration_stat,
+  "empty_response" : $duration_stat,
+  "cancel" : $duration_stat
+}
+```
+
+- "compute_infer" : The count and cumulative duration to compute a response.
+- "compute_output" : The count and cumulative duration to extract the output
+  tensor of a computed response.
+- "success" : The count and cumulative duration of a success inference. The
+  duration is the sum of infer and output durations.
+- "fail" : The count and cumulative duration of a fail inference. The duration
+  is the sum of infer and output durations.
+- "empty_response" : The count and cumulative duration of an inference with an
+  empty / no response. The duration is infer durations.
+- "cancel" : The count and cumulative duration of a inference cancellation. The
+  duration is for cleaning up resources held by cancelled inference requests.
+
+
+```
+$batch_stats =
+{
+  "batch_size" : $number,
+  "compute_input" : $duration_stat,
+  "compute_infer" : $duration_stat,
+  "compute_output" : $duration_stat
+}
+```
+
+- "batch_size" : The size of the batch.
+
+- "count" : The number of times the batch size was executed on the
+  model. A single model execution performs inferencing for the entire
+  request batch and can perform inferencing for multiple requests if
+  dynamic batching is enabled.
+
+- “compute_input” : The count and cumulative duration to prepare input
+  tensor data as required by the model framework / backend with the
+  given batch size. For example, this duration should include the time
+  to copy input tensor data to the GPU.
+
+- “compute_infer” : The count and cumulative duration to execute the
+  model with the given batch size.
+
+- “compute_output” : The count and cumulative duration to extract
+  output tensor data produced by the model framework / backend with
+  the given batch size. For example, this duration should include the
+  time to copy output tensor data from the GPU.
+
+The `$duration_stat` object reports a count and a total time. This
+format can be sampled to determine not only long-running averages but
+also incremental averages between sample points.
+
+```
+$duration_stat =
+{
+  "count" : $number,
+  "ns" : $number
+}
+```
+
+- "count" : The number of times the statistic was collected.
+
+- “ns” : The total duration for the statistic in nanoseconds.
+
+```
+$memory_usage =
+{
+  "type" : $string,
+  "id" : $number,
+  "byte_size" : $number
+}
+```
+
+- "type" : The type of memory, the value can be "CPU", "CPU_PINNED", "GPU".
+
+- "id" : The id of the memory, typically used with "type" to identify
+  a device that hosts the memory.
+
+- "byte_size" : The byte size of the memory.
+
+### Statistics Response JSON Error Object
+
+A failed statistics request will be indicated by an HTTP error status
+(typically 400). The HTTP body must contain the
+`$repository_statistics_error_response` object.
+
+```
+$repository_statistics_error_response =
+{
+  "error": $string
+}
+```
+
+- “error” : The descriptive message for the error.
+
+## GRPC
+
+For the statistics extension Triton implements the following API:
+
+```
+service GRPCInferenceService
+{
+  …
+
+  // Get the cumulative statistics for a model and version.
+  rpc ModelStatistics(ModelStatisticsRequest)
+          returns (ModelStatisticsResponse) {}
+}
+```
+
+The ModelStatistics API returns model statistics. Errors are indicated
+by the google.rpc.Status returned for the request. The OK code
+indicates success and other codes indicate failure. The request and
+response messages for ModelStatistics are:
+
+```
+message ModelStatisticsRequest
+{
+  // The name of the model. If not given returns statistics for all
+  // models.
+  string name = 1;
+
+  // The version of the model. If not given returns statistics for
+  // all model versions.
+  string version = 2;
+}
+
+message ModelStatisticsResponse
+{
+  // Statistics for each requested model.
+  repeated ModelStatistics model_stats = 1;
+}
+```
+
+The statistics messages are:
+
+```
+// Statistic recording a cumulative duration metric.
+message StatisticDuration
+{
+  // Cumulative number of times this metric occurred.
+  uint64 count = 1;
+
+  // Total collected duration of this metric in nanoseconds.
+  uint64 ns = 2;
+}
+
+// Statistics for a specific model and version.
+message ModelStatistics
+{
+  // The name of the model.
+  string name = 1;
+
+  // The version of the model.
+  string version = 2;
+
+  // The timestamp of the last inference request made for this model,
+  // as milliseconds since the epoch.
+  uint64 last_inference = 3;
+
+  // The cumulative count of successful inference requests made for this
+  // model. Each inference in a batched request is counted as an
+  // individual inference. For example, if a client sends a single
+  // inference request with batch size 64, "inference_count" will be
+  // incremented by 64. Similarly, if a clients sends 64 individual
+  // requests each with batch size 1, "inference_count" will be
+  // incremented by 64. The "inference_count" value DOES NOT include cache hits.
+  uint64 inference_count = 4;
+
+  // The cumulative count of the number of successful inference executions
+  // performed for the model. When dynamic batching is enabled, a single
+  // model execution can perform inferencing for more than one inference
+  // request. For example, if a clients sends 64 individual requests each
+  // with batch size 1 and the dynamic batcher batches them into a single
+  // large batch for model execution then "execution_count" will be
+  // incremented by 1. If, on the other hand, the dynamic batcher is not
+  // enabled for that each of the 64 individual requests is executed
+  // independently, then "execution_count" will be incremented by 64.
+  // The "execution_count" value DOES NOT include cache hits.
+  uint64 execution_count = 5;
+
+  // The aggregate statistics for the model.
+  InferStatistics inference_stats = 6;
+
+  // The aggregate statistics for each different batch size that is
+  // executed in the model. The batch statistics indicate how many actual
+  // model executions were performed and show differences due to different
+  // batch size (for example, larger batches typically take longer to compute).
+  repeated InferBatchStatistics batch_stats = 7;
+
+  // The memory usage detected during model loading, which may be
+  // used to estimate the memory to be released once the model is unloaded. Note
+  // that the estimation is inferenced by the profiling tools and framework's
+  // memory schema, therefore it is advised to perform experiments to understand
+  // the scenario that the reported memory usage can be relied on. As a starting
+  // point, the GPU memory usage for models in ONNX Runtime backend and TensorRT
+  // backend is usually aligned.
+  repeated MemoryUsage memory_usage = 8;
+
+  // The key and value pairs for all decoupled responses statistics. The key is
+  // a string identifying a set of response statistics aggregated together (i.e.
+  // index of the response sent). The value is the aggregated response
+  // statistics.
+  map<string, InferResponseStatistics> response_stats = 9;
+}
+
+// Inference statistics.
+message InferStatistics
+{
+  // Cumulative count and duration for successful inference
+  // request. The "success" count and cumulative duration includes
+  // cache hits.
+  StatisticDuration success = 1;
+
+  // Cumulative count and duration for failed inference
+  // request.
+  StatisticDuration fail = 2;
+
+  // The count and cumulative duration that inference requests wait in
+  // scheduling or other queues. The "queue" count and cumulative
+  // duration includes cache hits.
+  StatisticDuration queue = 3;
+
+  // The count and cumulative duration to prepare input tensor data as
+  // required by the model framework / backend. For example, this duration
+  // should include the time to copy input tensor data to the GPU.
+  // The "compute_input" count and cumulative duration do not account for
+  // requests that were a cache hit. See the "cache_hit" field for more
+  // info.
+  StatisticDuration compute_input = 4;
+
+  // The count and cumulative duration to execute the model.
+  // The "compute_infer" count and cumulative duration do not account for
+  // requests that were a cache hit. See the "cache_hit" field for more
+  // info.
+  StatisticDuration compute_infer = 5;
+
+  // The count and cumulative duration to extract output tensor data
+  // produced by the model framework / backend. For example, this duration
+  // should include the time to copy output tensor data from the GPU.
+  // The "compute_output" count and cumulative duration do not account for
+  // requests that were a cache hit. See the "cache_hit" field for more
+  // info.
+  StatisticDuration compute_output = 6;
+
+  // The count of response cache hits and cumulative duration to lookup
+  // and extract output tensor data from the Response Cache on a cache
+  // hit. For example, this duration should include the time to copy
+  // output tensor data from the Response Cache to the response object.
+  // On cache hits, triton does not need to go to the model/backend
+  // for the output tensor data, so the "compute_input", "compute_infer",
+  // and "compute_output" fields are not updated. Assuming the response
+  // cache is enabled for a given model, a cache hit occurs for a
+  // request to that model when the request metadata (model name,
+  // model version, model inputs) hashes to an existing entry in the
+  // cache. On a cache miss, the request hash and response output tensor
+  // data is added to the cache. See response cache docs for more info:
+  // https://github.com/triton-inference-server/server/blob/main/docs/response_cache.md
+  StatisticDuration cache_hit = 7;
+
+  // The count of response cache misses and cumulative duration to lookup
+  // and insert output tensor data from the computed response to the cache
+  // For example, this duration should include the time to copy
+  // output tensor data from the response object to the Response Cache.
+  // Assuming the response cache is enabled for a given model, a cache
+  // miss occurs for a request to that model when the request metadata
+  // does NOT hash to an existing entry in the cache. See the response
+  // cache docs for more info:
+  // https://github.com/triton-inference-server/server/blob/main/docs/response_cache.md
+  StatisticDuration cache_miss = 8;
+}
+
+// Statistics per decoupled response.
+message InferResponseStatistics
+{
+  // The count and cumulative duration to compute a response.
+  StatisticDuration compute_infer = 1;
+
+  // The count and cumulative duration to extract the output tensors of a
+  // response.
+  StatisticDuration compute_output = 2;
+
+  // The count and cumulative duration for successful responses.
+  StatisticDuration success = 3;
+
+  // The count and cumulative duration for failed responses.
+  StatisticDuration fail = 4;
+
+  // The count and cumulative duration for empty responses.
+  StatisticDuration empty_response = 5;
+}
+
+// Inference batch statistics.
+message InferBatchStatistics
+{
+  // The size of the batch.
+  uint64 batch_size = 1;
+
+  // The count and cumulative duration to prepare input tensor data as
+  // required by the model framework / backend with the given batch size.
+  // For example, this duration should include the time to copy input
+  // tensor data to the GPU.
+  StatisticDuration compute_input = 2;
+
+  // The count and cumulative duration to execute the model with the given
+  // batch size.
+  StatisticDuration compute_infer = 3;
+
+  // The count and cumulative duration to extract output tensor data
+  // produced by the model framework / backend with the given batch size.
+  // For example, this duration should include the time to copy output
+  // tensor data from the GPU.
+  StatisticDuration compute_output = 4;
+}
+
+// Memory usage.
+message MemoryUsage
+{
+  // The type of memory, the value can be "CPU", "CPU_PINNED", "GPU".
+  string type = 1;
+
+  // The id of the memory, typically used with "type" to identify
+  // a device that hosts the memory.
+  int64_t id = 2;
+
+  // The byte size of the memory.
+  uint64_t byte_size = 3;
+}
+```
diff --git a/docs/protocol/extension_trace.md b/docs/protocol/extension_trace.md
new file mode 100644
index 0000000000..6472e1db24
--- /dev/null
+++ b/docs/protocol/extension_trace.md
@@ -0,0 +1,197 @@
+<!--
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Trace Extension
+
+This document describes Triton's trace extension. The trace extension enables
+the client to configure the trace settings during a Triton run. Because this
+extension is supported, Triton reports “trace” in the extensions field of
+its Server Metadata.
+
+## HTTP/REST
+
+In all JSON schemas shown in this document `$number`, `$string`, `$boolean`,
+`$object` and `$array` refer to the fundamental JSON types. `#optional`
+indicates an optional JSON field.
+
+Triton exposes the trace endpoint at the following URL. The client may use
+HTTP GET request to retrieve the current trace setting. A HTTP POST request
+will modify the trace setting, and the endpoint will return the updated trace
+setting on success or an error in the case of failure. Optional model name
+can be provided to get or to set the trace settings for specific model.
+
+```
+GET v2[/models/${MODEL_NAME}]/trace/setting
+
+POST v2[/models/${MODEL_NAME}]/trace/setting
+```
+
+### Trace Setting Response JSON Object
+
+A successful trace setting request is indicated by a 200 HTTP status
+code. The response object, identified as `$trace_setting_response`, is
+returned in the HTTP body for every successful trace setting request.
+
+```
+$trace_setting_response =
+{
+  $trace_setting, ...
+}
+
+$trace_setting = $string : $string | [ $string, ...]
+```
+
+Each `$trace_setting` JSON describes a “name”/”value” pair, where the “name” is
+the name of the trace setting and the “value” is a `$string representation` of the
+setting value, or an array of `$string` for some settings. Currently the following
+trace settings are defined:
+
+- "trace_file" : the file where the trace output will be saved. If
+"log_frequency" is set, this will be the prefix of the files to save the
+trace output, resulting files in name `"${trace_file}.0", "${trace_file}.1", ...`,
+see trace setting "log_frequency" below for detail.
+- "trace_level" : the trace level. "OFF" to disable tracing,
+"TIMESTAMPS" to trace timestamps, "TENSORS" to trace tensors.
+This value is an array of string where user may specify multiple levels to
+trace multiple information.
+- "trace_rate" : the trace sampling rate. The value represents how many requests
+will one trace be sampled from. For example, if the trace rate is "1000",
+1 trace will be sampled for every 1000 requests.
+- "trace_count" : the number of remaining traces to be sampled. Once the value
+becomes "0", no more traces will be sampled for the trace setting, and the
+collected traces will be written to indexed trace file in the format described
+in "log_frequency", regardless of the "log_frequency" status.
+If the value is "-1", the number of traces to be sampled will not be limited.
+- "log_frequency" : the frequency that Triton will log the
+trace output to the files. If the value is "0", Triton will only log
+the trace output to `${trace_file}` when shutting down. Otherwise, Triton will log
+the trace output to `${trace_file}.${idx}` when it collects
+the specified number of traces. For example, if the log frequency is "100",
+when Triton collects the 100-th trace, it logs the traces to file
+`"${trace_file}.0"`, and when it collects the 200-th trace, it logs the 101-th to
+the 200-th traces to file `"${trace_file}.1"`. Note that the file index will be
+reset to 0 when "trace_file" setting is updated.
+
+
+### Trace Setting Response JSON Error Object
+
+A failed trace setting request will be indicated by an HTTP error status
+(typically 400). The HTTP body must contain the
+`$trace_setting_error_response` object.
+
+```
+$trace_setting_error_response =
+{
+  "error": $string
+}
+```
+
+- “error” : The descriptive message for the error.
+
+#### Trace Setting Request JSON Object
+
+A trace setting request is made with a HTTP POST to
+the trace endpoint. In the corresponding response the HTTP body contains the
+response JSON. A successful request is indicated by a 200 HTTP status code.
+
+The request object, identified as `$trace_setting_request` must be provided in the HTTP
+body.
+
+```
+$trace_setting_request =
+{
+  $trace_setting, ...
+}
+```
+
+The `$trace_setting` JSON is defined in
+[Trace Setting Response JSON Object](#trace-setting-response-json-object), only the specified
+settings will be updated. In addition to the values mentioned in response JSON
+object, JSON null value may be used to remove the specification of
+the trace setting. In such case, the current global setting will be used.
+Similarly, if this is the first request to initialize a model trace settings,
+for the trace settings that are not specified in the request, the current global
+setting will be used.
+
+## GRPC
+
+For the trace extension Triton implements the following API:
+
+```
+service GRPCInferenceService
+{
+  …
+
+  // Update and get the trace setting of the Triton server.
+  rpc TraceSetting(TraceSettingRequest)
+          returns (TraceSettingResponse) {}
+}
+```
+
+The Trace Setting API returns the latest trace settings. Errors are indicated
+by the google.rpc.Status returned for the request. The OK code
+indicates success and other codes indicate failure. The request and
+response messages for Trace Setting are:
+
+```
+message TraceSettingRequest
+{
+  // The values to be associated with a trace setting.
+  // If no value is provided, the setting will be clear and
+  // the global setting value will be used.
+  message SettingValue
+  {
+    repeated string value = 1;
+  }
+
+  // The new setting values to be updated,
+  // settings that are not specified will remain unchanged.
+  map<string, SettingValue> settings = 1;
+
+  // The name of the model to apply the new trace settings.
+  // If not given, the new settings will be applied globally.
+  string model_name = 2;
+}
+
+message TraceSettingResponse
+{
+  message SettingValue
+  {
+    repeated string value = 1;
+  }
+
+  // The latest trace settings.
+  map<string, SettingValue> settings = 1;
+}
+```
+
+The trace settings are mentioned in
+[Trace Setting Response JSON Object](#trace-setting-response-json-object).
+Note that if this is the first request to initialize
+a model trace settings, for the trace settings that are not specified
+in the request, the value will be copied from the current global settings.
diff --git a/docs/python_api.rst b/docs/python_api.rst
deleted file mode 100644
index 237a83f8d4..0000000000
--- a/docs/python_api.rst
+++ /dev/null
@@ -1,35 +0,0 @@
-..
-  # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-  #
-  # Redistribution and use in source and binary forms, with or without
-  # modification, are permitted provided that the following conditions
-  # are met:
-  #  * Redistributions of source code must retain the above copyright
-  #    notice, this list of conditions and the following disclaimer.
-  #  * Redistributions in binary form must reproduce the above copyright
-  #    notice, this list of conditions and the following disclaimer in the
-  #    documentation and/or other materials provided with the distribution.
-  #  * Neither the name of NVIDIA CORPORATION nor the names of its
-  #    contributors may be used to endorse or promote products derived
-  #    from this software without specific prior written permission.
-  #
-  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-  # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-  # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-  # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-  # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-  # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-  # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-  # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-  # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-Python API
-==========
-
-Client
-------
-
-.. automodule:: tensorrtserver.api
-   :members:
diff --git a/docs/quickstart.rst b/docs/quickstart.rst
deleted file mode 100644
index c53cc06dcc..0000000000
--- a/docs/quickstart.rst
+++ /dev/null
@@ -1,52 +0,0 @@
-..
-  # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-  #
-  # Redistribution and use in source and binary forms, with or without
-  # modification, are permitted provided that the following conditions
-  # are met:
-  #  * Redistributions of source code must retain the above copyright
-  #    notice, this list of conditions and the following disclaimer.
-  #  * Redistributions in binary form must reproduce the above copyright
-  #    notice, this list of conditions and the following disclaimer in the
-  #    documentation and/or other materials provided with the distribution.
-  #  * Neither the name of NVIDIA CORPORATION nor the names of its
-  #    contributors may be used to endorse or promote products derived
-  #    from this software without specific prior written permission.
-  #
-  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-  # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-  # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-  # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-  # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-  # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-  # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-  # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-  # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-Quickstart
-==========
-
-To quickly get the TensorRT Inference Server (TRTIS) up and running
-follow these steps. After you've seen TRTIS in action you can revisit
-the rest of the User Guide to learn more about its features.
-
-First, follow the instructions in
-:ref:`section-installing-prebuilt-containers` to install the TRTIS
-container.
-
-Next, use the :ref:`section-example-model-repository` section to
-create an example model repository containing a couple of models that
-you can serve with TRTIS.
-
-Now that you have a model repository, follow the instructions in
-:ref:`section-running-the-inference-server` to start TRTIS. Use the
-server's *Status* endpoint to :ref:`make sure the server and the
-models are ready for
-inferencing<section-checking-inference-server-status>`.
-
-Finally,
-:ref:`build<section-building-the-client-libraries-and-examples>` and
-:ref:`run<section-image_classification_example>` the example
-image-client application to perform image classification using TRTIS.
diff --git a/docs/run.rst b/docs/run.rst
deleted file mode 100644
index 720603d453..0000000000
--- a/docs/run.rst
+++ /dev/null
@@ -1,127 +0,0 @@
-..
-  # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-  #
-  # Redistribution and use in source and binary forms, with or without
-  # modification, are permitted provided that the following conditions
-  # are met:
-  #  * Redistributions of source code must retain the above copyright
-  #    notice, this list of conditions and the following disclaimer.
-  #  * Redistributions in binary form must reproduce the above copyright
-  #    notice, this list of conditions and the following disclaimer in the
-  #    documentation and/or other materials provided with the distribution.
-  #  * Neither the name of NVIDIA CORPORATION nor the names of its
-  #    contributors may be used to endorse or promote products derived
-  #    from this software without specific prior written permission.
-  #
-  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-  # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-  # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-  # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-  # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-  # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-  # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-  # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-  # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-Running the Server
-==================
-
-.. _section-example-model-repository:
-
-Example Model Repository
-------------------------
-
-Before running the TensorRT Inference Server, you must first set up a
-model repository containing the models that TRTIS will make available
-for inferencing.
-
-An example model repository containing a Caffe2 ResNet50, a TensorFlow
-Inception model, and a simple TensorFlow GraphDef model (used by the
-:ref:`simple_client example <section-client-api>`) are provided in the
-`docs/examples/model_repository
-<https://github.com/NVIDIA/tensorrt-inference-server/tree/master/docs/examples/model_repository>`_
-directory. Before using the example model repository you must fetch
-any missing model definition files from their public model zoos::
-
-  $ cd docs/examples
-  $ ./fetch_models.sh
-
-.. _section-running-the-inference-server:
-
-Running The Inference Server
-----------------------------
-
-Before running TRTIS, you must first set up a model repository
-containing the models that TRTIS will make available
-for inferencing. Section :ref:`section-model-repository` describes how
-to create your own model repository. You can also use
-:ref:`section-example-model-repository` to set up an example model
-repository.
-
-Assuming the sample model repository is available in
-/path/to/model/repository, the following command runs the container
-you pulled from NGC or built locally::
-
-  $ nvidia-docker run --rm --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 -p8000:8000 -p8001:8001 -p8002:8002 -v/path/to/model/repository:/models <tensorrtserver image name> trtserver --model-store=/models
-
-Where *<tensorrtserver image name>* will be something like
-**nvcr.io/nvidia/tensorrtserver:18.11-py3** if you pulled the
-container from the NGC register, or **tensorrtserver** if you
-:ref:`built it from source <section-building-the-server>`.
-
-The nvidia-docker -v option maps /path/to/model/repository on the host
-into the container at /models, and the -\\-model-store option to TRTIS
-is used to point to /models as the model repository.
-
-The -p flags expose the container ports where TRTIS listens for HTTP
-requests (port 8000), listens for GRPC requests (port 8001), and
-reports Prometheus metrics (port 8002).
-
-The -\\-shm-size and -\\-ulimit flags are recommended to improve TRTIS
-performance. For -\\-shm-size the minimum recommended size is 1g but
-larger sizes may be necessary depending on the number and size of
-models being served.
-
-For more information on the Prometheus metrics provided by the
-inference server see :ref:`section-metrics`.
-
-.. _section-checking-inference-server-status:
-
-Checking Inference Server Status
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-The simplest way to verify that TRTIS is running correctly is to use
-the Status API to query the server’s status. From the host system use
-*curl* to access the HTTP endpoint to request server status. The
-response is protobuf text showing the status for the server and for
-each model being served, for example::
-
-  $ curl localhost:8000/api/status
-  id: "inference:0"
-  version: "0.6.0"
-  uptime_ns: 23322988571
-  model_status {
-    key: "resnet50_netdef"
-    value {
-      config {
-        name: "resnet50_netdef"
-        platform: "caffe2_netdef"
-      }
-      ...
-      version_status {
-        key: 1
-        value {
-          ready_state: MODEL_READY
-        }
-      }
-    }
-  }
-  ready_state: SERVER_READY
-
-This status shows configuration information as well as indicating that
-version 1 of the resnet50_netdef model is MODEL_READY. This means that
-TRTIS is ready to accept inferencing requests for version 1 of that
-model. A model version ready_state will show up as MODEL_UNAVAILABLE
-if the model failed to load for some reason.
diff --git a/docs/templates/layout.html b/docs/templates/layout.html
deleted file mode 100644
index 4700514819..0000000000
--- a/docs/templates/layout.html
+++ /dev/null
@@ -1,78 +0,0 @@
-<!--
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--->
-{% extends "!layout.html" %}
-  {% block sidebartitle %} {{ super() }}
-
-  <style>
-    /* Sidebar header (and topbar for mobile) */
-    .wy-side-nav-search, .wy-nav-top {
-      background: #76b900;
-    }
-
-    .wy-side-nav-search a:link, .wy-nav-top a:link {
-      color: #fff;
-    }
-    .wy-side-nav-search a:visited, .wy-nav-top a:visited {
-      color: #fff;
-    }
-    .wy-side-nav-search a:hover, .wy-nav-top a:hover {
-      color: #fff;
-    }
-
-    .wy-menu-vertical a:link, .wy-menu-vertical a:visited {
-      color: #d9d9d9
-    }
-
-    .wy-menu-vertical a:active {
-      background-color: #76b900
-    }
-
-    .wy-side-nav-search>div.version {
-      color: rgba(0, 0, 0, 0.3)
-    }
-  </style>
-  {% endblock %}
-
-  {% block footer %} {{ super() }}
-
-  <style>
-  a:link, a:visited {
-    color: #76b900;
-  }
-
-  a:hover {
-    color: #8c0;
-  }
-
-  .rst-content dl:not(.docutils) dt {
-    background: rgba(118, 185, 0, 0.1);
-    color: rgba(59,93,0,1);
-    border-top: solid 3px rgba(59,93,0,1);
-  }
-  </style>
-  {% endblock %}
diff --git a/docs/test.rst b/docs/test.rst
deleted file mode 100644
index a6bbfcc509..0000000000
--- a/docs/test.rst
+++ /dev/null
@@ -1,77 +0,0 @@
-..
-  # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-  #
-  # Redistribution and use in source and binary forms, with or without
-  # modification, are permitted provided that the following conditions
-  # are met:
-  #  * Redistributions of source code must retain the above copyright
-  #    notice, this list of conditions and the following disclaimer.
-  #  * Redistributions in binary form must reproduce the above copyright
-  #    notice, this list of conditions and the following disclaimer in the
-  #    documentation and/or other materials provided with the distribution.
-  #  * Neither the name of NVIDIA CORPORATION nor the names of its
-  #    contributors may be used to endorse or promote products derived
-  #    from this software without specific prior written permission.
-  #
-  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-  # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-  # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-  # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-  # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-  # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-  # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-  # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-  # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-Testing
-=======
-
-Currently there is no CI testing enabled for the open-source version
-of the TensorRT Inference Server. We will enable CI testing in a
-future update.
-
-There is a set of tests in the qa/ directory that can be run manually
-to provide some testing. Before running these tests you must first
-generate a test model repository containing the models needed by the
-tests.
-
-Generate QA Model Repository
-----------------------------
-
-The QA model repository contains some simple models that are used to
-verify the correctness of TRTIS. To generate the QA model repository::
-
-  $ cd qa/common
-  $ ./gen_qa_model_repository
-
-This will generate the model repository in /tmp/qa_model_repository.
-The TensorRT models will be created for the GPU on the system that
-CUDA considers device 0 (zero). If you have multiple GPUs on your
-system see the documentation in the script for how to target a
-specific GPU.
-
-Build QA Container
-------------------
-
-Next you need to build a QA version of the TRTIS container. This
-container will contain TRTIS, the QA tests, and all the dependencies
-needed to run the QA tests. You must first build the
-tensorrtserver_build and tensorrtserver containers as described in
-:ref:`section-building-the-server` and then build the QA container::
-
-  $ docker build -t tensorrtserver_qa -f Dockerfile.QA .
-
-Run QA Container
-----------------
-
-Now run the QA container and mount the QA model repository into the
-container so the tests will be able to access it::
-
-  $ nvidia-docker run -it --rm -v/tmp/qa_model_repository:/models tensorrtserver_qa
-
-Within the container the QA tests are in /opt/tensorrtserver/qa. To run a test::
-
-  $ cd <test directory>
-  $ ./test.sh
diff --git a/docs/user_guide/architecture.md b/docs/user_guide/architecture.md
new file mode 100644
index 0000000000..b343842014
--- /dev/null
+++ b/docs/user_guide/architecture.md
@@ -0,0 +1,810 @@
+<!--
+# Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Triton Architecture
+
+The following figure shows the Triton Inference Server high-level
+architecture. The [model repository](model_repository.md) is a
+file-system based repository of the models that Triton will make
+available for inferencing. Inference requests arrive at the server via
+either [HTTP/REST or GRPC](../customization_guide/inference_protocols.md) or by the [C
+API](../customization_guide/inference_protocols.md) and are then routed to the appropriate per-model
+scheduler. Triton implements [multiple scheduling and batching
+algorithms](#models-and-schedulers) that can be configured on a
+model-by-model basis. Each model's scheduler optionally performs
+batching of inference requests and then passes the requests to the
+[backend](https://github.com/triton-inference-server/backend/blob/main/README.md)
+corresponding to the model type. The backend performs inferencing
+using the inputs provided in the batched requests to produce the
+requested outputs. The outputs are then returned.
+
+Triton supports a [backend C
+API](https://github.com/triton-inference-server/backend/blob/main/README.md#triton-backend-api)
+that allows Triton to be extended with new functionality such as
+custom pre- and post-processing operations or even a new deep-learning
+framework.
+
+The models being served by Triton can be queried and controlled by a
+dedicated [model management API](model_management.md) that is
+available by HTTP/REST or GRPC protocol, or by the C API.
+
+Readiness and liveness health endpoints and utilization, throughput
+and latency metrics ease the integration of Triton into deployment
+framework such as Kubernetes.
+
+![Triton Architecture Diagram](images/arch.jpg)
+
+## Concurrent Model Execution
+
+The Triton architecture allows multiple models and/or multiple
+instances of the same model to execute in parallel on the same
+system. The system may have zero, one, or many GPUs. The following
+figure shows an example with two models; model0 and model1. Assuming
+Triton is not currently processing any request, when two requests
+arrive simultaneously, one for each model, Triton immediately
+schedules both of them onto the GPU and the GPU’s hardware scheduler
+begins working on both computations in parallel. Models executing on
+the system's CPU are handled similarly by Triton except that the
+scheduling of the CPU threads execution each model is handled by the
+system's OS.
+
+![Triton Mult-Model Execution Diagram](images/multi_model_exec.png)
+
+By default, if multiple requests for the same model arrive at the same
+time, Triton will serialize their execution by scheduling only one at
+a time on the GPU, as shown in the following figure.
+
+![Triton Mult-Model Serial Execution
+Diagram](images/multi_model_serial_exec.png)
+
+Triton provides a [model configuration option called
+instance-group](model_configuration.md#instance-groups) that allows
+each model to specify how many parallel executions of that model
+should be allowed. Each such enabled parallel execution is referred to
+as an *instance*. By default, Triton gives each model a single
+instance for each available GPU in the system. By
+using the instance_group field in the model configuration, the number
+of execution instances for a model can
+be changed. The following figure shows model execution when model1
+is configured to allow three instances. As shown in the figure, the
+first three model1 inference requests are immediately executed in
+parallel. The fourth model1 inference request must wait until one of
+the first three executions completes before beginning.
+
+![Triton Mult-Model Parallel Execution
+Diagram](images/multi_model_parallel_exec.png)
+
+## Models And Schedulers
+
+Triton supports multiple scheduling and batching algorithms that can
+be selected independently for each model.  This section describes
+*stateless*, *stateful* and *ensemble* models and how Triton provides
+schedulers to support those model types. For a given model, the
+selection and configuration of the scheduler is done with the [model's
+configuration file](model_configuration.md).
+
+### Stateless Models
+
+With respect to Triton's schedulers, a *stateless* model does not
+maintain state between inference requests. Each inference performed on
+a stateless model is independent of all other inferences using that
+model.
+
+Examples of stateless models are CNNs such as image classification and
+object detection. The [default
+scheduler](model_configuration.md#default-scheduler) or [dynamic
+batcher](model_configuration.md#dynamic-batcher) can be used as the
+scheduler for these stateless models.
+
+RNNs and similar models which do have internal memory can be stateless
+as long as the state they maintain does not span inference
+requests. For example, an RNN that iterates over all elements in a
+batch is considered stateless by Triton if the internal state is not
+carried between batches of inference requests. The [default
+scheduler](model_configuration.md#default-scheduler) can be used for
+these stateless models. The [dynamic
+batcher](model_configuration.md#dynamic-batcher) cannot be used since
+the model is typically not expecting the batch to represent multiple
+inference requests.
+
+### Stateful Models
+
+With respect to Triton's schedulers, a *stateful* model does maintain
+state between inference requests. The model is expecting multiple
+inference requests that together form a sequence of inferences that
+must be routed to the same model instance so that the state being
+maintained by the model is correctly updated. Moreover, the model may
+require that Triton provide *control* signals indicating, for example,
+the start and end of the sequence.
+
+The [sequence batcher](model_configuration.md#sequence-batcher) must
+be used for these stateful models. As explained below, the sequence
+batcher ensures that all inference requests in a sequence get routed
+to the same model instance so that the model can maintain state
+correctly. The sequence batcher also communicates with the model to
+indicate when a sequence is starting, when a sequence is ending, when
+a sequence has an inference request ready for execution, and the
+*correlation ID* of the sequence.
+
+When making inference requests for a stateful model, the client
+application must provide the same correlation ID to all requests in a
+sequence, and must also mark the start and end of the sequence. The
+correlation ID allows Triton to identify that the requests belong to
+the same sequence.
+
+#### Control Inputs
+
+For a stateful model to operate correctly with the sequence batcher,
+the model must typically accept one or more *control* input tensors
+that Triton uses to communicate with the model. The
+*ModelSequenceBatching::Control* section of the [model
+configuration](model_configuration.md) indicates how the model exposes
+the tensors that the sequence batcher should use for these
+controls. All controls are optional. Below is portion of a model
+configuration that shows an example configuration for all the
+available control signals.
+
+```
+sequence_batching {
+  control_input [
+    {
+      name: "START"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_START
+          fp32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "END"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_END
+          fp32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "READY"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_READY
+          fp32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "CORRID"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_CORRID
+          data_type: TYPE_UINT64
+        }
+      ]
+    }
+  ]
+}
+```
+
+* **Start**: The start input tensor is specified using
+  CONTROL_SEQUENCE_START in the configuration. The example
+  configuration indicates that the model has an input tensor called
+  START with a 32-bit floating point data-type. The sequence batcher
+  will define this tensor when executing an inference on the
+  model. The START tensor must be 1-dimensional with size equal to the
+  batch-size. Each element in the tensor indicates if the sequence in
+  the corresponding batch slot is starting or not. In the example
+  configuration, fp32_false_true indicates that a sequence start is
+  indicated by tensor element equal to 1, and non-start is indicated
+  by tensor element equal to 0.
+
+* **End**: The end input tensor is specified using
+  CONTROL_SEQUENCE_END in the configuration. The example configuration
+  indicates that the model has an input tensor called END with a
+  32-bit floating point data-type. The sequence batcher will define
+  this tensor when executing an inference on the model. The END tensor
+  must be 1-dimensional with size equal to the batch-size. Each
+  element in the tensor indicates if the sequence in the corresponding
+  batch slot is ending or not. In the example configuration,
+  fp32_false_true indicates that a sequence end is indicated by tensor
+  element equal to 1, and non-end is indicated by tensor element equal
+  to 0.
+
+* **Ready**: The ready input tensor is specified using
+  CONTROL_SEQUENCE_READY in the configuration. The example
+  configuration indicates that the model has an input tensor called
+  READY with a 32-bit floating point data-type. The sequence batcher
+  will define this tensor when executing an inference on the
+  model. The READY tensor must be 1-dimensional with size equal to the
+  batch-size. Each element in the tensor indicates if the sequence in
+  the corresponding batch slot has an inference request ready for
+  inference. In the example configuration, fp32_false_true indicates
+  that a sequence ready is indicated by tensor element equal to 1, and
+  non-ready is indicated by tensor element equal to 0.
+
+* **Correlation ID**: The correlation ID input tensor is specified
+  using CONTROL_SEQUENCE_CORRID in the configuration. The example
+  configuration indicates that the model has an input tensor called
+  CORRID with a unsigned 64-bit integer data-type. The sequence
+  batcher will define this tensor when executing an inference on the
+  model. The CORRID tensor must be 1-dimensional with size equal to
+  the batch-size. Each element in the tensor indicates the correlation
+  ID of the sequence in the corresponding batch slot.
+
+#### Implicit State Management
+
+Implicit state management allows a stateful model to store its state inside
+Triton. When using implicit state, the stateful model does not need to store
+the state required for inference inside the model.
+
+Below is a portion of the model configuration that indicates the model
+is using implicit state.
+
+```
+sequence_batching {
+  state [
+    {
+      input_name: "INPUT_STATE"
+      output_name: "OUTPUT_STATE"
+      data_type: TYPE_INT32
+      dims: [ -1 ]
+    }
+  ]
+}
+```
+
+The *state* section in the sequence_batching setting is used to indicate that
+the model is using implicit state. The *input_name* field specifies the name of
+the input tensor that will contain the input state. The *output_name* field
+describes the name of the output tensor produced by the model that contains
+output state. The output state provided by the model in the *i<sup>th</sup>*
+request in the sequence will be used as the input state in the
+*i+1<sup>th</sup>* request. The *dims* field specifies the dimensions of the
+state tensors. When the *dims* field contains variable-sized dimensions, the
+shape of the input state and output state does not have to match.
+
+For debugging purposes, the client can request the output state. In order to
+allow the client to request the output state, the
+[*output* section of the model configuration](./model_configuration.md#inputs-and-outputs)
+must list the output state as one of the model outputs. Note that requesting the
+output state from the client can increase the request latency because of the
+additional tensors that have to be transferred.
+
+Implicit state management requires backend support. Currently, only
+[onnxruntime_backend](https://github.com/triton-inference-server/onnxruntime_backend)
+[tensorrt_backend](https://github.com/triton-inference-server/tensorrt_backend),
+and [pytorch_backend](https://github.com/triton-inference-server/pytorch_backend)
+support implicit state.
+
+##### State Initialization
+
+By default, the starting request in the sequence contains uninitialized data for
+the input state. The model can use the start flag in the request to detect the
+beginning of a new sequence and initialize the model state by providing the
+initial state in the model output. If the *dims* section in the *state*
+description of the model contains variable-sized dimensions, Triton will use *1*
+for every variable-sized dimension for the starting request. For other
+non-starting requests in the sequence, the input state is the output state of
+the previous request in the sequence. For an example ONNX model that uses
+implicit state you can refer to this onnx model generated from the
+`create_onnx_modelfile_wo_initial_state()`
+[from this generation script](https://github.com/triton-inference-server/server/blob/main/qa/common/gen_qa_implicit_models.py).
+This is a simple accumulator model that stores the partial sum of the requests
+in a sequence in Triton using implicit state. For state initialization, if the
+request is starting, the model sets the "OUTPUT\_STATE" to be equal to the
+"INPUT" tensor. For non-starting requests, it sets the "OUTPUT\_STATE" tensor
+to the sum of "INPUT" and "INPUT\_STATE" tensors.
+
+In addition to the default state initialization discussed above, Triton provides
+two other mechanisms for initializing state.
+
+###### Initializing State from Zero.
+
+Below is an example of initializing state from zero.
+
+```
+sequence_batching {
+  state [
+    {
+      input_name: "INPUT_STATE"
+      output_name: "OUTPUT_STATE"
+      data_type: TYPE_INT32
+      dims: [ -1 ]
+      initial_state: {
+       data_type: TYPE_INT32
+       dims: [ 1 ]
+       zero_data: true
+       name: "initial state"
+      }
+    }
+  ]
+}
+```
+
+Note that in the example above variable dimensions in the state description are
+converted to fixed size dimensions.
+
+###### Initializing State from File
+
+For initializing state from file, you need to create a directory named
+"initial\_state" under the model directory. The file that contains the initial
+state under this directory needs to be provided in the *data_file* field.
+The data stored in this file will be used in row-major order as the initial
+state. Below is an example state description initializing state from file.
+
+```
+sequence_batching {
+  state [
+    {
+      input_name: "INPUT_STATE"
+      output_name: "OUTPUT_STATE"
+      data_type: TYPE_INT32
+      dims: [ -1 ]
+      initial_state: {
+       data_type: TYPE_INT32
+       dims: [ 1 ]
+       data_file: "initial_state_data"
+       name: "initial state"
+      }
+    }
+  ]
+}
+```
+
+#### Scheduling Strategies
+
+The sequence batcher can employ one of two scheduling strategies when
+deciding how to batch the sequences that are routed to the same model
+instance. These strategies are [direct](#direct) and [oldest](#oldest).
+
+##### Direct
+
+With the Direct scheduling strategy the sequence batcher ensures not
+only that all inference requests in a sequence are routed to the same
+model instance, but also that each sequence is routed to a dedicated
+batch slot within the model instance. This strategy is required when
+the model maintains state for each batch slot, and is expecting all
+inference requests for a given sequence to be routed to the same slot
+so that the state is correctly updated.
+
+As an example of the sequence batcher using the Direct scheduling
+strategy, assume a TensorRT stateful model that has the following
+model configuration.
+
+```
+name: "direct_stateful_model"
+platform: "tensorrt_plan"
+max_batch_size: 2
+sequence_batching {
+  max_sequence_idle_microseconds: 5000000
+  direct { }
+  control_input [
+    {
+      name: "START"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_START
+          fp32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "READY"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_READY
+          fp32_false_true: [ 0, 1 ]
+        }
+      ]
+    }
+  ]
+}
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_FP32
+    dims: [ 100, 100 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_FP32
+    dims: [ 10 ]
+  }
+]
+instance_group [
+  {
+    count: 2
+  }
+]
+```
+
+The sequence_batching section indicates that the model should use the
+sequence batcher and the Direct scheduling strategy. In this example
+the model only requires a *start* and *ready* control input from the
+sequence batcher so only those controls are listed. The instance_group
+indicates two instances of the model should be instantiated and
+max_batch_size indicates that each of those instances should perform
+batch-size 2 inferences. The following figure shows a representation
+of the sequence batcher and the inference resources specified by this
+configuration.
+
+![Sequence Batching Example](images/sequence_example0.png)
+
+Each model instance is maintaining state for each batch slot, and is
+expecting all inference requests for a given sequence to be routed to
+the same slot so that the state is correctly updated. For this example
+that means that Triton can simultaneously perform inference for up to
+four sequences.
+
+Using the Direct scheduling strategy, the sequence batcher:
+
+* Recognizes when an inference request starts a new sequence and
+  allocates a batch slot for that sequence. If no batch slot is
+  available for the new sequence, Triton places the inference request
+  in a backlog.
+
+* Recognizes when an inference request is part of a sequence that has
+  an allocated batch slot and routes the request to that slot.
+
+* Recognizes when an inference request is part of a sequence that is
+  in the backlog and places the request in the backlog.
+
+* Recognizes when the last inference request in a sequence has been
+  completed. The batch slot occupied by that sequence is immediately
+  reallocated to a sequence in the backlog, or freed for a future
+  sequence if there is no backlog.
+
+The following figure shows how multiple sequences are scheduled onto
+the model instances using the Direct scheduling strategy. On the left
+the figure shows several sequences of requests arriving at
+Triton. Each sequence could be made up of any number of inference
+requests and those individual inference requests could arrive in any
+order relative to inference requests in other sequences, except that
+the execution order shown on the right assumes that the first
+inference request of sequence 0 arrives before any inference request
+in sequences 1-5, the first inference request of sequence 1 arrives
+before any inference request in sequences 2-5, etc.
+
+The right of the figure shows how the inference request sequences are
+scheduled onto the model instances over time.
+
+![Sequence Batcher Example](images/sequence_example1.png)
+
+The following figure shows the sequence batcher uses the control input
+tensors to communicate with the model. The figure shows two sequences
+assigned to the two batch slots in a model instance. Inference
+requests for each sequence arrive over time. The START and READY rows
+show the input tensor values used for each execution of the
+model. Over time the following happens:
+
+* The first request arrives for the sequence in slot0. Assuming the
+  model instance is not already executing an inference, the sequence
+  scheduler immediately schedules the model instance to execute
+  because an inference request is available.
+
+* This is the first request in the sequence so the corresponding
+  element in the START tensor is set to 1. There is no request
+  available in slot1 so the READY tensor shows only slot0 as ready.
+
+* After the inference completes the sequence scheduler sees that there
+  are no requests available in any batch slot and so the model
+  instance sits idle.
+
+* Next, two inference requests arrive close together in time so that
+  the sequence scheduler sees them both available in their respective
+  batch slots. The scheduler immediately schedules the model instance
+  to perform a batch-size 2 inference and uses START and READY to show
+  that both slots have an inference request available but that only
+  slot1 is the start of a new sequence.
+
+* The processing continues in a similar manner for the other inference
+  requests.
+
+![Sequence Batcher Example](images/sequence_example2.png)
+
+##### Oldest
+
+With the Oldest scheduling strategy the sequence batcher ensures that
+all inference requests in a sequence are routed to the same model
+instance and then uses the [dynamic
+batcher](model_configuration.md#dynamic-batcher) to batch together
+multiple inferences from different sequences into a batch that
+inferences together.  With this strategy the model must typically use
+the CONTROL_SEQUENCE_CORRID control so that it knows which sequence
+each inference request in the batch belongs to. The
+CONTROL_SEQUENCE_READY control is typically not needed because all
+inferences in the batch will always be ready for inference.
+
+As an example of the sequence batcher using the Oldest scheduling
+strategy, assume a stateful model that has the following model
+configuration:
+
+```
+name: "oldest_stateful_model"
+platform: "tensorflow_savedmodel"
+max_batch_size: 2
+sequence_batching {
+  max_sequence_idle_microseconds: 5000000
+  oldest
+    {
+      max_candidate_sequences: 4
+    }
+  control_input [
+    {
+      name: "START"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_START
+          fp32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "END"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_END
+          fp32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "CORRID"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_CORRID
+          data_type: TYPE_UINT64
+        }
+      ]
+    }
+  ]
+}
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_FP32
+    dims: [ 100, 100 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_FP32
+    dims: [ 10 ]
+  }
+]
+```
+
+The sequence_batching section indicates that the model should use the
+sequence batcher and the Oldest scheduling strategy. The Oldest
+strategy is configured so that the sequence batcher maintains up to 4
+active candidate sequences from which it prefers to form dynamic
+batches of size 2. In this example the model requires a *start*,
+*end*, and *correlation ID* control input from the sequence
+batcher. The following figure shows a representation of the sequence
+batcher and the inference resources specified by this configuration.
+
+![Sequence Batching Example](images/dyna_sequence_example0.png)
+
+Using the Oldest scheduling strategy, the sequence batcher:
+
+* Recognizes when an inference request starts a new sequence and
+  attempts to find a model instance that has room for a candidate
+  sequence. If no model instance has room for a new candidate
+  sequence, Triton places the inference request in a backlog.
+
+* Recognizes when an inference request is part of a sequence that is
+  already a candidate sequence in some model instance and routes the
+  request to that model instance.
+
+* Recognizes when an inference request is part of a sequence that is
+  in the backlog and places the request in the backlog.
+
+* Recognizes when the last inference request in a sequence has been
+  completed. The model instance immediately removes a sequence from
+  the backlog and makes it a candidate sequence in the model instance,
+  or records that the model instance can handle a future sequence if
+  there is no backlog.
+
+The following figure shows how multiple sequences are scheduled onto
+the model instance specified by the above example configuration. On
+the left the figure shows four sequences of requests arriving at
+Triton. Each sequence is composed of multiple inference requests as
+shown in the figure. The center of the figure shows how the inference
+request sequences are batched onto the model instance over time,
+assuming that the inference requests for each sequence arrive at the
+same rate with sequence A arriving just before B, which arrives just
+before C, etc. The Oldest strategy forms a dynamic batch from the
+oldest requests but never includes more than one request from a given
+sequence in a batch (for example, the last two inferences in sequence
+D are not batched together).
+
+![Sequence Batcher Example](images/dyna_sequence_example1.png)
+
+### Ensemble Models
+
+An ensemble model represents a *pipeline* of one or more models and
+the connection of input and output tensors between those
+models. Ensemble models are intended to be used to encapsulate a
+procedure that involves multiple models, such as "data preprocessing
+-> inference -> data postprocessing".  Using ensemble models for this
+purpose can avoid the overhead of transferring intermediate tensors
+and minimize the number of requests that must be sent to Triton.
+
+The ensemble scheduler must be used for ensemble models, regardless of
+the scheduler used by the models within the ensemble. With respect to
+the ensemble scheduler, an *ensemble* model is not an actual
+model. Instead, it specifies the dataflow between models within the
+ensemble as *ModelEnsembling::Step* entries in the model
+configuration. The scheduler collects the output tensors in each step,
+provides them as input tensors for other steps according to the
+specification. In spite of that, the ensemble model is still viewed as
+a single model from an external view.
+
+Note that the ensemble models will inherit the characteristics of the
+models involved, so the meta-data in the request header must comply
+with the models within the ensemble. For instance, if one of the
+models is stateful model, then the inference request for the ensemble
+model should contain the information mentioned in [Stateful
+Models](#stateful-models), which will be provided to the stateful
+model by the scheduler.
+
+As an example consider an ensemble model for image classification and
+segmentation that has the following model configuration:
+
+```
+name: "ensemble_model"
+platform: "ensemble"
+max_batch_size: 1
+input [
+  {
+    name: "IMAGE"
+    data_type: TYPE_STRING
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "CLASSIFICATION"
+    data_type: TYPE_FP32
+    dims: [ 1000 ]
+  },
+  {
+    name: "SEGMENTATION"
+    data_type: TYPE_FP32
+    dims: [ 3, 224, 224 ]
+  }
+]
+ensemble_scheduling {
+  step [
+    {
+      model_name: "image_preprocess_model"
+      model_version: -1
+      input_map {
+        key: "RAW_IMAGE"
+        value: "IMAGE"
+      }
+      output_map {
+        key: "PREPROCESSED_OUTPUT"
+        value: "preprocessed_image"
+      }
+    },
+    {
+      model_name: "classification_model"
+      model_version: -1
+      input_map {
+        key: "FORMATTED_IMAGE"
+        value: "preprocessed_image"
+      }
+      output_map {
+        key: "CLASSIFICATION_OUTPUT"
+        value: "CLASSIFICATION"
+      }
+    },
+    {
+      model_name: "segmentation_model"
+      model_version: -1
+      input_map {
+        key: "FORMATTED_IMAGE"
+        value: "preprocessed_image"
+      }
+      output_map {
+        key: "SEGMENTATION_OUTPUT"
+        value: "SEGMENTATION"
+      }
+    }
+  ]
+}
+```
+
+The ensemble\_scheduling section indicates that the ensemble scheduler will be
+used and that the ensemble model consists of three different models. Each
+element in step section specifies the model to be used and how the inputs and
+outputs of the model are mapped to tensor names recognized by the scheduler. For
+example, the first element in step specifies that the latest version of
+image\_preprocess\_model should be used, the content of its input "RAW\_IMAGE"
+is provided by "IMAGE" tensor, and the content of its output
+"PREPROCESSED\_OUTPUT" will be mapped to "preprocessed\_image" tensor for later
+use. The tensor names recognized by the scheduler are the ensemble inputs, the
+ensemble outputs and all values in the input\_map and the output\_map.
+
+The models composing the ensemble may also have dynamic batching
+enabled.  Since ensemble models are just routing the data between
+composing models, Triton can take requests into an ensemble model
+without modifying the ensemble's configuration to exploit the dynamic
+batching of the composing models.
+
+Assuming that only the ensemble model, the preprocess model, the classification
+model and the segmentation model are being served, the client applications will
+see them as four different models which can process requests independently.
+However, the ensemble scheduler will view the ensemble model as the following.
+
+![Ensemble Example](images/ensemble_example0.png)
+
+When an inference request for the ensemble model is received, the ensemble
+scheduler will:
+
+1. Recognize that the "IMAGE" tensor in the request is mapped to input
+   "RAW\_IMAGE" in the preprocess model.
+
+2. Check models within the ensemble and send an internal request to the
+   preprocess model because all the input tensors required are ready.
+
+3. Recognize the completion of the internal request, collect the output
+   tensor and map the content to "preprocessed\_image" which is an unique name
+   known within the ensemble.
+
+4. Map the newly collected tensor to inputs of the models within the ensemble.
+   In this case, the inputs of "classification\_model" and "segmentation\_model"
+   will be mapped and marked as ready.
+
+5. Check models that require the newly collected tensor and send internal
+   requests to models whose inputs are ready, the classification
+   model and the segmentation model in this case. Note that the responses will
+   be in arbitrary order depending on the load and computation time of
+   individual models.
+
+6. Repeat step 3-5 until no more internal requests should be sent, and then
+   response to the inference request with the tensors mapped to the ensemble
+   output names.
+
+#### Additional Resources
+
+You can find additional end-to-end ensemble examples in the links below:
+* [This guide](https://github.com/triton-inference-server/tutorials/tree/main/Conceptual_Guide/Part_5-Model_Ensembles)
+explores the concept of ensembles with a running example.
+* [Preprocessing in Python Backend Using
+  Ensemble](https://github.com/triton-inference-server/python_backend#preprocessing)
+* [Accelerating Inference with NVIDIA Triton Inference Server and NVIDIA
+  DALI](https://developer.nvidia.com/blog/accelerating-inference-with-triton-inference-server-and-dali/)
+* [Using RAPIDS AI with NVIDIA Triton Inference
+  Server](https://github.com/rapidsai/rapids-examples/tree/main/rapids_triton_example)
+
diff --git a/docs/user_guide/custom_operations.md b/docs/user_guide/custom_operations.md
new file mode 100644
index 0000000000..17d0470c47
--- /dev/null
+++ b/docs/user_guide/custom_operations.md
@@ -0,0 +1,195 @@
+<!--
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Custom Operations
+
+Modeling frameworks that allow custom operations are partially
+supported by the Triton Inference Server. Custom operations can be
+added to Triton at build time or at startup and are made available to
+all loaded models.
+
+## TensorRT
+
+TensorRT allows a user to create [custom
+layers](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#extending)
+which can then be used in TensorRT models. For those models to run in
+Triton the custom layers must be made available.
+
+To make the custom layers available to Triton, the TensorRT custom
+layer implementations must be compiled into one or more shared
+libraries which must then be loaded into Triton using LD_PRELOAD. For
+example, assuming your TensorRT custom layers are compiled into
+libtrtcustom.so, starting Triton with the following command makes
+those custom layers available to all TensorRT models.
+
+```bash
+$ LD_PRELOAD=libtrtcustom.so:${LD_PRELOAD} tritonserver --model-repository=/tmp/models ...
+```
+
+A limitation of this approach is that the custom layers must be
+managed separately from the model repository itself. And more
+seriously, if there are custom layer name conflicts across multiple
+shared libraries there is currently no way to handle it.
+
+When building the custom layer shared library it is important to use
+the same version of TensorRT as is being used in Triton. You can find
+the TensorRT version in the [Triton Release
+Notes](https://docs.nvidia.com/deeplearning/triton-inference-server/release-notes/index.html). A
+simple way to ensure you are using the correct version of TensorRT is
+to use the [NGC TensorRT
+container](https://ngc.nvidia.com/catalog/containers/nvidia:tensorrt)
+corresponding to the Triton container. For example, if you are using
+the 24.03 version of Triton, use the 24.03 version of the TensorRT
+container.
+
+## TensorFlow
+
+TensorFlow allows users to [add custom
+operations](https://www.tensorflow.org/guide/create_op) which can then
+be used in TensorFlow models. You can load custom TensorFlow operations
+into Triton in two ways:
+* At model load time, by listing them in the model configuration.
+* At server launch time, by using LD_PRELOAD.
+
+To register your custom operations library via the the model configuration,
+you can include it as an additional field. See the below configuration as an example.
+
+```bash
+$ model_operations { op_library_filename: "path/to/libtfcustom.so" }
+```
+
+Note that even though the models are loaded at runtime, multiple models can use the custom
+operators. There is currently no way to deallocate the custom operators, so they will stay
+available until Triton is shut down.
+
+You can also register your custom operations library via LD_PRELOAD. For example,
+assuming your TensorFlow custom operations are compiled into libtfcustom.so,
+starting Triton with the following command makes those operations
+available to all TensorFlow models.
+
+```bash
+$ LD_PRELOAD=libtfcustom.so:${LD_PRELOAD} tritonserver --model-repository=/tmp/models ...
+```
+
+With this approach, all TensorFlow custom operations depend on a TensorFlow shared
+library that must be available to the custom shared library when it is
+loading. In practice, this means that you must make sure that
+/opt/tritonserver/backends/tensorflow1 or
+/opt/tritonserver/backends/tensorflow2 is on the library path before
+issuing the above command. There are several ways to control the
+library path and a common one is to use the LD_LIBRARY_PATH. You can
+set LD_LIBRARY_PATH in the "docker run" command or inside the
+container.
+
+```bash
+$ export LD_LIBRARY_PATH=/opt/tritonserver/backends/tensorflow1:$LD_LIBRARY_PATH
+```
+
+A limitation of this approach is that the custom operations must be
+managed separately from the model repository itself. And more
+seriously, if there are custom layer name conflicts across multiple
+shared libraries there is currently no way to handle it.
+
+When building the custom operations shared library it is important to
+use the same version of TensorFlow as is being used in Triton. You can
+find the TensorFlow version in the [Triton Release
+Notes](https://docs.nvidia.com/deeplearning/triton-inference-server/release-notes/index.html). A
+simple way to ensure you are using the correct version of TensorFlow
+is to use the [NGC TensorFlow
+container](https://ngc.nvidia.com/catalog/containers/nvidia:tensorflow)
+corresponding to the Triton container. For example, if you are using
+the 24.03 version of Triton, use the 24.03 version of the TensorFlow
+container.
+
+## PyTorch
+
+Torchscript allows users to [add custom
+operations](https://pytorch.org/tutorials/advanced/torch_script_custom_ops.html)
+which can then be used in Torchscript models. By using LD_PRELOAD you
+can load your custom C++ operations into Triton. For example, if you
+follow the instructions in the
+[pytorch/extension-script](https://github.com/pytorch/extension-script)
+repository and your Torchscript custom operations are compiled into
+libpytcustom.so, starting Triton with the following command makes
+those operations available to all PyTorch models. Since all Pytorch
+custom operations depend on one or more PyTorch shared libraries
+that must be available to the custom shared library when it is
+loading. In practice this means that you must make sure that
+/opt/tritonserver/backends/pytorch is on the library path while
+launching the server. There are several ways to control the library path
+and a common one is to use the LD_LIBRARY_PATH.
+
+```bash
+$ LD_LIBRARY_PATH=/opt/tritonserver/backends/pytorch:$LD_LIBRARY_PATH LD_PRELOAD=libpytcustom.so:${LD_PRELOAD} tritonserver --model-repository=/tmp/models ...
+```
+
+A limitation of this approach is that the custom operations must be
+managed separately from the model repository itself. And more
+seriously, if there are custom layer name conflicts across multiple
+shared libraries or the handles used to register them in PyTorch there
+is currently no way to handle it.
+
+Starting with the 20.07 release of Triton the [TorchVision
+operations](https://github.com/pytorch/vision) will be included with
+the PyTorch backend and hence they do not have to be explicitly added
+as custom operations.
+
+When building the custom operations shared library it is important to
+use the same version of PyTorch as is being used in Triton. You can
+find the PyTorch version in the [Triton Release
+Notes](https://docs.nvidia.com/deeplearning/triton-inference-server/release-notes/index.html). A
+simple way to ensure you are using the correct version of PyTorch is
+to use the [NGC PyTorch
+container](https://ngc.nvidia.com/catalog/containers/nvidia:pytorch)
+corresponding to the Triton container. For example, if you are using
+the 24.03 version of Triton, use the 24.03 version of the PyTorch
+container.
+
+## ONNX
+
+ONNX Runtime allows users to [add custom
+operations](https://onnxruntime.ai/docs/reference/operators/add-custom-op.html)
+which can then be used in ONNX models. To register your custom
+operations library you need to include it in the model configuration
+as an additional field. For example, if you follow [this
+example](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/test/shared_lib/test_inference.cc)
+from the
+[microsoft/onnxruntime](https://github.com/microsoft/onnxruntime)
+repository and your ONNXRuntime custom operations are compiled into
+libonnxcustom.so, adding the following to the model configuration of
+your model makes those operations available to that specific ONNX
+model.
+
+```bash
+$ model_operations { op_library_filename: "/path/to/libonnxcustom.so" }
+```
+
+When building the custom operations shared library it is important to
+use the same version of ONNXRuntime as is being used in Triton. You
+can find the ONNXRuntime version in the [Triton Release
+Notes](https://docs.nvidia.com/deeplearning/triton-inference-server/release-notes/index.html).
diff --git a/docs/user_guide/debugging_guide.md b/docs/user_guide/debugging_guide.md
new file mode 100644
index 0000000000..3a38f209d3
--- /dev/null
+++ b/docs/user_guide/debugging_guide.md
@@ -0,0 +1,151 @@
+<!--
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Debugging Guide
+This guide goes over first-step troubleshooting for common scenarios in which Triton is behaving unexpectedly or failing. Below, we break down the issues into these categories:
+
+- **[Configuration](#configuration-issues)**: Triton reports an error with your configuration file.
+- **[Model](#model-issues)**: Your model fails to load or perform inference.
+- Server: The server is crashing or unavailable.
+- Client: The client is failing in sending and receiving data to the server.
+- Performance: Triton is not achieving optimal performance.
+
+Regardless of the category of your issue, it is worthwhile to try running in the latest Triton container, whenever possible. While we provide support to older containers, fixes get merged into the next release. By checking the latest release, you can spot whether this issue has already been resolved.
+
+You can also search [Triton’s GitHub issues](https://github.com/triton-inference-server/server/issues) to see if someone previously asked about your issue. If you received an error, you can use a few keywords from the error as a search term.
+
+Triton provides different types of errors and statuses, relevant across a wide swath of issues. Here is an overview of them:
+
+| Error | Definition | Example |
+| ----- | ---------- | ------- |
+|Already Exists | Returned when an action cannot be done because there is already an existing item. | A registered model fails to be registered again.|
+| Internal | Returned when there is an unexpected failure within the Triton code. | A memory allocation fails. |
+| Invalid Arg | Returned when an invalid argument is provided to a function | A model config has an invalid parameter |
+| Not Found | Returned when a requested resource is unable to be found | A shared library is unable to be found |
+| Unavailable | Returned when a requested resource is found but unavailable | A requested model is not ready for inference |
+| Unknown | Returned for cases where the reason for the error is unknown | This error code should not be used |
+| Unsupported | Returned when an option is unsupported | A model config includes a parameter that is not yet supported for that backend |
+
+## Configuration Issues
+
+Before proceeding, please see if the model configuration documentation [here](./model_configuration.md) resolves your question. Beyond that, the best places to find a sample model configuration for your use cases are:
+
+- The server [qa folder](https://github.com/triton-inference-server/server/tree/main/qa). You can find test scripts covering most features, including some which update the model config files to do so.
+    - [Custom_models](https://github.com/triton-inference-server/server/tree/main/qa/custom_models), [ensemble_models](https://github.com/triton-inference-server/server/tree/main/qa/ensemble_models), and [python_models](https://github.com/triton-inference-server/server/tree/main/qa/python_models) include examples of configs for their respective use cases.
+    - [L0_model_config](https://github.com/triton-inference-server/server/tree/main/qa/L0_model_config) tests many types of incomplete model configs.
+
+Note that if you are running into an issue with [perf_analyzer](https://github.com/triton-inference-server/client/blob/main/src/c%2B%2B/perf_analyzer/README.md) or [Model Analyzer](https://github.com/triton-inference-server/model_analyzer), try loading the model onto Triton directly. This checks if the configuration is incorrect or the perf_analyzer or Model Analyzer options need to be updated.
+
+## Model Issues
+**Step 1. Run Models Outside of Triton**
+
+If you are running into an issue with loading or running a model, the first step is to ensure your model runs in its framework outside of Triton. For example, you can run ONNX models in ONNX Runtime and TensorRT models in trtexec. If this check fails, the issue is happening within the framework and not within Triton.
+
+**Step 2. Find the Error Message**
+
+If you receive an error message, you may be able to find where it was generated by searching the code. GitHub provides instructions for searching code [here](https://docs.github.com/en/search-github/searching-on-github/searching-code). A generic search through the Triton organization is available at [this link](https://github.com/search?q=org%3Atriton-inference-server&type=Code).
+
+If your error message only occurs in one or a few places in the Triton code, you may be able to see what’s going wrong pretty quickly. Even if not, it’s good to save this link to provide to us when asking for help with your issue. This is often the first thing we look for.
+
+**Step 3. Build with Debug Flags**
+
+The next step is building with debug flags. We unfortunately don’t provide a debug container, so you’d need to follow the [build guide](https://github.com/triton-inference-server/server/blob/main/docs/customization_guide/build.md) to build the container, which includes a [section on adding debug symbols](https://github.com/triton-inference-server/server/blob/main/docs/build.md#building-with-debug-symbols). Once you do so, you can install GDB (`apt-get install gdb`) in the container and run Triton in GDB (`gdb --args tritonserver…`). If needed, you can open a second terminal to run a script in another container. If the server segfaults, you can enter `backtrace`, which will provide you a call stack that lets you know where the error got generated. You should then be able to trace the source of the error. If the bug still exists after debugging, we’ll need this to expedite our work.
+
+Advanced GDB users can also examine variable values, add breakpoints, and more to find the cause of their issue.
+
+### Specific Issues
+**Undefined Symbols**
+
+There are a few options here:
+- This often means a version mismatch between the version of a framework used by Triton and the one used to create the model. Check the version of the framework used in the Triton container and compare against the version used to generate the model.
+- If you are loading a shared library used by a backend, don’t forget to include LD_PRELOAD before the command to run Tritonserver. 
+    - `LD_PRELOAD=<name_of_so_file.so> tritonserver --model-repository…`
+If you built the backend yourself, this could be a linking error. If you are confident the backends and server were built correctly, double check that the server is loading the correct backend.
+
+## Server Issues
+
+You generally should not run into errors with the server itself. If the server goes down, it’s usually because something went wrong during model loading or inference and you can use the above section to debug. It’s particularly useful to work through the [Building with Debug Flags](https://github.com/triton-inference-server/server/blob/main/docs/build.md#building-with-debug-symbols) section above to resolve those sorts of issues. However, this section will go through some specific cases that may occur.
+
+### No Connection to Server
+
+If you are having trouble connecting to the server or getting its health via the health endpoint (`curl -v localhost:8000/v2/health/ready`), make sure you are able to reach the network your server is running on from where you are running your command. Most commonly, we see that when separate Docker containers are started for the client and server, they are not started with [--net=host](https://docs.docker.com/network/host/) to share the network.
+
+### Intermittent Failure
+
+This is going to be one of the hardest things to debug. If possible, you want to build your server with debug flags to get a backtrace of what is happening specifically. You would also want to keep notes to see how often this happens and whether that is a common cause. The server itself should not fail while idling, so see if a certain action (loading/unloading a model, running a model inference, etc.) is triggering it.
+
+### Server Failure Due to Individual Models
+
+If you want the server to start up even when models fail, use the `exit-on-error=false` option. If you want the server health endpoint to show ready even when specific models fail, use the `--strict-readiness=false` flag.
+
+### Deadlock
+
+Some useful steps for debugging a deadlock with `gdb`:
+1. Use `$info threads` to see which threads are waiting.
+2. Go to a thread: `$thread 4`.
+3. Print the backtrace: `$bt`.
+4. Go to the frame with the lock: `$f 1`.
+5. Print the memory of the mutex being held: `$p *mutex`.
+6. You can now see the owner of the mutex under `owner`.
+
+## Client Issues
+
+For working with different client cases, the best resources are the [client repo’s](https://github.com/triton-inference-server/client) examples. You can see clients written in Python, Java, and C++ with running examples across many common use cases. You can review the main functions of these clients to get a sense of the flow of the code.
+
+We often get performance optimization questions around the clients. Triton clients send input tensors as raw binary. However, GRPC uses protobuf which has some serialization and deserialization overhead. For those looking for the lowest-latency solution, C API eliminates the latency associated with GRPC/HTTP. Shared memory is also a good option to reduce data movement when the client and server are on the same system.
+
+## Performance Issues
+
+This section goes over debugging unexpected performance. If you are looking to optimize performance, please see the [Optimization](https://github.com/triton-inference-server/server/blob/main/docs/optimization.md) and [Performance Tuning](https://github.com/triton-inference-server/server/blob/main/docs/performance_tuning.md) guides.
+
+The easiest step to start with is running perf_analyzer to get a breakdown of the request lifecycle, throughput, and latency for each individual model. For a more detailed view, you can [enable tracing](https://github.com/triton-inference-server/server/blob/main/docs/trace.md) when running the server. This will provide exact timestamps to drill down into what is happening. You can also enable tracing with perf_analyzer for the GRPC and HTTP clients by using the tracing flags. Note that enabling tracing can impact Triton’s performance, but it can be helpful to examine the timestamps throughout a request’s lifecycle.
+
+### Performance Profiling
+
+The next step would be to use a performance profiler. One profiler we recommend is [Nsight Systems](https://developer.nvidia.com/nsight-systems) (nsys), optionally including NVIDIA Tools Extension (NVTX) markers to profile Triton.
+
+The Triton server container already has nsys installed. However, Triton does not build with the NVTX markers by default. If you want to use NVTX markers, you should build Triton with build.py, using the “--enable-nvtx” flag. This will provide details around some phases of processing a request, such as queueing, running inference, and handling outputs.
+
+You can profile Triton by running `nsys profile tritonserver --model-repository …`. The [nsys documentation](https://docs.nvidia.com/nsight-systems/UserGuide/index.html) provides more options and details for getting a thorough overview of what is going on.
+
+## Submitting an Issue
+
+If you’ve done the initial debugging steps with no results, the next step is to submit the issue to us. Before you do so, please answer these questions:
+- Is this reproducible with multiple models and/or our example models? Or is the issue unique to your model?
+- Is the bug reproducible with any protocol (ex: HTTP vs GRPC)? Or only one protocol?
+
+The answers to the above should inform what you submit. If you find that this issue only happens under specific circumstances, please include this in your report. If the issue still exists, please submit **all** of the below:
+
+- The commands or script used to build/pull Triton and run your models.
+    - If building Triton, please provide the version or branch you are building from.
+- Your model configuration file.
+- The error received, plus any logs.
+    - If your issue involves the server crashing, a backtrace of the dump would be helpful.
+    - Please enable verbose logging (--verbose-log=1) to get the most detailed logs.
+- If this issue is unique to your model, your model or a toy model that reproduces the issue.
+- Anything else that would expedite our investigation.
diff --git a/docs/user_guide/decoupled_models.md b/docs/user_guide/decoupled_models.md
new file mode 100644
index 0000000000..fbe6f4c298
--- /dev/null
+++ b/docs/user_guide/decoupled_models.md
@@ -0,0 +1,127 @@
+<!--
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Decoupled Backends and Models
+
+Triton can support [backends](https://github.com/triton-inference-server/backend)
+and models that send multiple responses for a request or zero responses
+for a request. A decoupled model/backend may also send responses out-of-order
+relative to the order that the request batches are executed. This allows
+backend to deliver response whenever it deems fit. This is specifically
+useful in Automated Speech Recognition (ASR). The requests with large number
+of responses, will not block the responses from other requests from being
+delivered.
+
+## Developing Decoupled Backend/Model
+
+### C++ Backend
+
+Read carefully about the [Triton Backend API](https://github.com/triton-inference-server/backend/blob/main/README.md#triton-backend-api),
+[Inference Requests and Responses](https://github.com/triton-inference-server/backend/blob/main/README.md#inference-requests-and-responses)
+and [Decoupled Responses](https://github.com/triton-inference-server/backend/blob/main/README.md#decoupled-responses).
+The [repeat backend](https://github.com/triton-inference-server/repeat_backend)
+and [square backend](https://github.com/triton-inference-server/square_backend)
+demonstrate how the Triton Backend API can be used to implement a decoupled
+backend. The example is designed to show the flexibility of the Triton API
+and in no way should be used in production. This example may process multiple
+batches of requests at the same time without having to increase the
+[instance count](model_configuration.md#instance-groups). In real deployment,
+the backend should not allow the caller thread to return from
+TRITONBACKEND_ModelInstanceExecute until that instance is ready to
+handle another set of requests. If not designed properly the backend
+can be easily over-subscribed. This can also cause under-utilization
+of features like [Dynamic Batching](model_configuration.md#dynamic-batcher)
+as it leads to eager batching.
+
+### Python model using Python Backend
+
+Read carefully about the [Python Backend](https://github.com/triton-inference-server/python_backend),
+and specifically [`execute`](https://github.com/triton-inference-server/python_backend#execute).
+
+The [decoupled examples](https://github.com/triton-inference-server/python_backend/tree/main/examples/decoupled)
+demonstrates how decoupled API can be used to implement a decoupled
+python model. As noted in the examples, these are designed to show
+the flexibility of the decoupled API and in no way should be used
+in production.
+
+
+## Deploying Decoupled Models
+
+The [decoupled model transaction policy](model_configuration.md#decoupled)
+must be set in the provided [model configuration](model_configuration.md)
+file for the model. Triton requires this information to enable special
+handling required for decoupled models. Deploying decoupled models without
+this configuration setting will throw errors at the runtime.
+
+## Running Inference on Decoupled Models
+
+[Inference Protocols and APIs](../customization_guide/inference_protocols.md) describes various ways
+a client can communicate and run inference on the server. For decoupled models,
+Triton's HTTP endpoint cannot be used for running inference as it supports
+exactly one response per request. Even standard ModelInfer RPC in the GRPC endpoint
+does not support decoupled responses. In order to run inference on a decoupled
+model, the client must use the bi-directional streaming RPC. See
+[here](https://github.com/triton-inference-server/common/blob/main/protobuf/grpc_service.proto)
+for more details. The [decoupled_test.py](../../qa/L0_decoupled/decoupled_test.py) demonstrates
+how the gRPC streaming can be used to infer decoupled models.
+
+If using [Triton's in-process C API](../customization_guide/inference_protocols.md#in-process-triton-server-api),
+your application should be cognizant that the callback function you registered with
+`TRITONSERVER_InferenceRequestSetResponseCallback` can be invoked any number of times,
+each time with a new response. You can take a look at [grpc_server.cc](https://github.com/triton-inference-server/server/blob/main/src/grpc/grpc_server.cc)
+
+### Knowing When a Decoupled Inference Request is Complete
+
+An inference request is considered complete when a response containing the
+`TRITONSERVER_RESPONSE_COMPLETE_FINAL` flag is received from a model/backend.
+
+1. Client applications using streaming GRPC can access this information by
+   checking the response parameters for the `"triton_final_response"` parameter.
+   Decoupled models may not send a response for each request depending on how
+   the model/backend is designed. In these cases where no response is sent by
+   the backend, the streaming GRPC client can opt-in to receive an empty final
+   response for each request. By default, empty final responses are not sent to
+   save on network traffic.
+
+   ```python
+   # Example of streaming GRPC client opting-in
+   client.async_stream_infer(
+     ...,
+     enable_empty_final_response=True
+   )
+   ```
+
+2. Client applications using the C API can check the
+   `TRITONSERVER_RESPONSE_COMPLETE_FINAL` flag directly in their response
+   handling / callback logic.
+
+The [decoupled_test.py](../../qa/L0_decoupled/decoupled_test.py)
+demonstrates an example of opting-in through the streaming GRPC
+Python client API and programmatically identifying when a final response
+is received through the `"triton_final_response"` response parameter.
+
diff --git a/docs/user_guide/faq.md b/docs/user_guide/faq.md
new file mode 100644
index 0000000000..523b38f750
--- /dev/null
+++ b/docs/user_guide/faq.md
@@ -0,0 +1,205 @@
+<!--
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# FAQ
+
+## What are the advantages of running a model with Triton Inference Server compared to running directly using the model's framework API?
+
+When using Triton Inference Server the inference result will be the
+same as when using the model's framework directly. However, with
+Triton you get benefits like [concurrent model
+execution](architecture.md#concurrent-model-execution) (the ability to
+run multiple models at the same time on the same GPU) and [dynamic
+batching](model_configuration.md#dynamic-batcher) to get better
+throughput. You can also [replace or upgrade models while Triton and
+client application are running](model_management.md). Another benefit
+is that Triton can be deployed as a Docker container, anywhere – on
+premises and on public clouds. Triton Inference Server also [supports
+multiple
+frameworks](https://github.com/triton-inference-server/backend) such
+as TensorRT, TensorFlow, PyTorch, and ONNX on both GPUs and CPUs
+leading to a streamlined deployment.
+
+## Can Triton Inference Server run on systems that don't have GPUs?
+
+Yes, the QuickStart guide describes how to [run Triton on a CPU-Only
+System](../getting_started/quickstart.md#run-on-cpu-only-system).
+
+## Can Triton Inference Server be used in non-Docker environments?
+
+Yes. Triton Inference Server can also be [built from
+source](../customization_guide/build.md#building-without-docker) on your "bare metal"
+system.
+
+## Do you provide client libraries for languages other than C++ and Python?
+
+We provide C++ and Python client libraries to make it easy for users
+to write client applications that communicate with Triton. We chose
+those languages because they were likely to be popular and performant
+in the ML inference space, but in the future we can possibly add other
+languages if there is a need.
+
+We provide the GRPC API as a way to generate your own client library
+for a large number of languages. By following the official GRPC
+documentation and using
+[grpc_service.proto](https://github.com/triton-inference-server/common/blob/main/protobuf/grpc_service.proto)
+you can generate language bindings for all the languages supported by
+GRPC. We provide three examples of this for
+[Go](https://github.com/triton-inference-server/client/blob/main/src/grpc_generated/go),
+[Python](https://github.com/triton-inference-server/client/blob/main/src/python/examples/grpc_client.py) and
+[Java](https://github.com/triton-inference-server/client/blob/main/src/grpc_generated/java).
+
+In general the client libraries (and client examples) are meant to be
+just that, examples. We feel the client libraries are well written and
+well tested, but they are not meant to serve every possible use
+case. In some cases you may want to develop your own customized
+library to suit your specific needs.
+
+## How would you use Triton Inference Server within the AWS environment?
+
+In an AWS environment, the Triton Inference Server docker container
+can run on [CPU-only instances or GPU compute
+instances](../getting_started/quickstart.md#launch-triton). Triton can run directly on the
+compute instance or inside Elastic Kubernetes Service (EKS). In
+addition, other AWS services such as Elastic Load Balancer (ELB) can
+be used for load balancing traffic among multiple Triton
+instances. Elastic Block Store (EBS) or S3 can be used for storing
+deep-learning models loaded by the inference server.
+
+## How do I measure the performance of my model running in the Triton Inference Server?
+
+The Triton Inference Server exposes performance information in two
+ways: by [Prometheus metrics](metrics.md) and by the statistics
+available through the [HTTP/REST, GRPC, and C
+APIs](../customization_guide/inference_protocols.md).
+
+A client application,
+[perf_analyzer](https://github.com/triton-inference-server/client/blob/main/src/c++/perf_analyzer/README.md),
+allows you to measure the performance of an individual model using a synthetic
+load. The perf_analyzer application is designed to show you the tradeoff of
+latency vs. throughput.
+
+## How can I fully utilize the GPU with Triton Inference Server?
+
+Triton Inference Server has several features designed to increase
+GPU utilization:
+
+* Triton can [simultaneously perform inference for multiple
+  models](architecture.md#concurrent-model-execution) (using either
+  the same or different frameworks) using the same GPU.
+
+* Triton can increase inference throughput by using [multiple
+instances of the same
+model](architecture.md#concurrent-model-execution) to handle multiple
+simultaneous inferences requests to that model. Triton chooses
+reasonable defaults but [you can also control the exact level of
+concurrency](model_configuration.md#instance-groups) on a
+model-by-model basis.
+
+* Triton can [batch together multiple inference requests into a single
+  inference execution](model_configuration.md#dynamic-batcher). Typically,
+  batching inference requests leads to much higher thoughput with only
+  a relatively small increase in latency.
+
+As a general rule, batching is the most beneficial way to increase GPU
+utilization. So you should always try enabling the [dynamic
+batcher](model_configuration.md#dynamic-batcher) with your models. Using
+multiple instances of a model can also provide some benefit but is
+typically most useful for models that have small compute
+requirements. Most models will benefit from using two instances but
+more than that is often not useful.
+
+## If I have a server with multiple GPUs should I use one Triton Inference Server to manage all GPUs or should I use multiple inference servers, one for each GPU?
+
+Triton Inference Server will take advantage of all GPUs that it has
+access to on the server. You can limit the GPUs available to Triton by
+using the CUDA_VISIBLE_DEVICES environment variable (or with Docker
+you can also use NVIDIA_VISIBLE_DEVICES or --gpus flag when launching
+the container). When using multiple GPUs, Triton will distribute
+inference request across the GPUs to keep them all equally
+utilized. You can also [control more explicitly which models are
+running on which GPUs](model_configuration.md#instance-groups).
+
+In some deployment and orchestration environments (for example,
+Kubernetes) it may be more desirable to partition a single multi-GPU
+server into multiple *nodes*, each with one GPU. In this case the
+orchestration environment will run a different Triton for each GPU and
+an load balancer will be used to divide inference requests across the
+available Triton instances.
+
+## If the server segfaults, how can I debug it?
+
+The NGC build is a Release build and does not contain Debug symbols.
+The build.py as well defaults to a Release build. Refer to the instructions
+in [build.md](../customization_guide/build.md#building-with-debug-symbols) to create a Debug build
+of Triton. This will help find the cause of the segmentation fault when
+looking at the gdb trace for the segfault.
+
+When opening a GitHub issue for the segfault with Triton, please include
+the backtrace to better help us resolve the problem.
+
+## What are the benefits of using [Triton Inference Server](https://developer.nvidia.com/triton-inference-server) as part of the [NVIDIA AI Enterprise Software Suite](https://www.nvidia.com/en-us/data-center/products/ai-enterprise/)?
+
+NVIDIA AI Enterprise enables enterprises to implement full AI workflows by
+delivering an entire end-to-end AI platform. Four key benefits:
+
+### Enterprise-Grade Support, Security & API Stability:
+
+Business-critical AI projects stay on track with NVIDIA Enterprise Support,
+available globally to assist both IT teams with deploying and managing the
+lifecycle of AI applications and the developer teams with building AI
+applications.  Support includes maintenance updates, dependable SLAs and
+response times.  Regular security reviews and priority notifications mitigate
+potential risk of unmanaged opensource and ensure compliance with corporate
+standards.  Finally, long term support and regression testing ensures API
+stability between releases.
+
+### Speed time to production with AI Workflows & Pretrained Models:
+To reduce the complexity of developing common AI applications, NVIDIA AI
+Enterprise includes
+[AI workflows](https://www.nvidia.com/en-us/launchpad/ai/workflows/) which are
+reference applications for specific business outcomes such as Intelligent
+Virtual Assistants and Digital Fingerprinting for real-time cybersecurity threat
+detection.  AI workflow reference applications may include
+[AI frameworks](https://docs.nvidia.com/deeplearning/frameworks/index.html) and
+[pretrained models](https://developer.nvidia.com/ai-models),
+[Helm Charts](https://catalog.ngc.nvidia.com/helm-charts),
+[Jupyter Notebooks](https://developer.nvidia.com/run-jupyter-notebooks) and
+[documentation](https://docs.nvidia.com/ai-enterprise/index.html#overview).
+
+### Performance for Efficiency and Cost Savings:
+Using accelerated compute for AI workloads such as data process with
+[NVIDIA RAPIDS Accelerator](https://developer.nvidia.com/rapids) for Apache
+Spark and inference with Triton Inference Sever delivers better performance
+which also improves efficiency and reduces operation and infrastructure costs,
+including savings from reduced time and energy consumption.
+
+### Optimized and Certified to Deploy Everywhere:
+Cloud, Data Center, Edge Optimized and certified to ensure reliable performance
+whether it’s running your AI in the public cloud, virtualized data centers, or
+on DGX systems.
diff --git a/docs/user_guide/images/arch.jpg b/docs/user_guide/images/arch.jpg
new file mode 100644
index 0000000000..733b9f169c
Binary files /dev/null and b/docs/user_guide/images/arch.jpg differ
diff --git a/docs/user_guide/images/dyna_sequence_example0.png b/docs/user_guide/images/dyna_sequence_example0.png
new file mode 100644
index 0000000000..0872d4a25b
Binary files /dev/null and b/docs/user_guide/images/dyna_sequence_example0.png differ
diff --git a/docs/user_guide/images/dyna_sequence_example1.png b/docs/user_guide/images/dyna_sequence_example1.png
new file mode 100644
index 0000000000..b20bcea5ed
Binary files /dev/null and b/docs/user_guide/images/dyna_sequence_example1.png differ
diff --git a/docs/user_guide/images/ensemble_example0.png b/docs/user_guide/images/ensemble_example0.png
new file mode 100644
index 0000000000..7ff1f2fdfd
Binary files /dev/null and b/docs/user_guide/images/ensemble_example0.png differ
diff --git a/docs/user_guide/images/multi_model_exec.png b/docs/user_guide/images/multi_model_exec.png
new file mode 100644
index 0000000000..77413112e6
Binary files /dev/null and b/docs/user_guide/images/multi_model_exec.png differ
diff --git a/docs/user_guide/images/multi_model_parallel_exec.png b/docs/user_guide/images/multi_model_parallel_exec.png
new file mode 100644
index 0000000000..ba690e808a
Binary files /dev/null and b/docs/user_guide/images/multi_model_parallel_exec.png differ
diff --git a/docs/user_guide/images/multi_model_serial_exec.png b/docs/user_guide/images/multi_model_serial_exec.png
new file mode 100644
index 0000000000..fd5f92b04b
Binary files /dev/null and b/docs/user_guide/images/multi_model_serial_exec.png differ
diff --git a/docs/user_guide/images/sequence_example0.png b/docs/user_guide/images/sequence_example0.png
new file mode 100644
index 0000000000..d46bac0987
Binary files /dev/null and b/docs/user_guide/images/sequence_example0.png differ
diff --git a/docs/user_guide/images/sequence_example1.png b/docs/user_guide/images/sequence_example1.png
new file mode 100644
index 0000000000..1c6b18b57e
Binary files /dev/null and b/docs/user_guide/images/sequence_example1.png differ
diff --git a/docs/user_guide/images/sequence_example2.png b/docs/user_guide/images/sequence_example2.png
new file mode 100644
index 0000000000..b55611e3d9
Binary files /dev/null and b/docs/user_guide/images/sequence_example2.png differ
diff --git a/docs/user_guide/images/triton_on_jetson.png b/docs/user_guide/images/triton_on_jetson.png
new file mode 100644
index 0000000000..c54dd279b9
Binary files /dev/null and b/docs/user_guide/images/triton_on_jetson.png differ
diff --git a/docs/user_guide/jetson.md b/docs/user_guide/jetson.md
new file mode 100644
index 0000000000..c1f8488c1b
--- /dev/null
+++ b/docs/user_guide/jetson.md
@@ -0,0 +1,215 @@
+<!--
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Triton Inference Server Support for Jetson and JetPack
+
+A release of Triton for [JetPack 5.0](https://developer.nvidia.com/embedded/jetpack)
+is provided in the attached tar file in the [release notes](https://github.com/triton-inference-server/server/releases).
+
+![Triton on Jetson Diagram](images/triton_on_jetson.png)
+
+Triton Inference Server support on JetPack includes:
+
+* Running models on GPU and NVDLA
+* [Concurrent model execution](architecture.md#concurrent-model-execution)
+* [Dynamic batching](architecture.md#models-and-schedulers)
+* [Model pipelines](architecture.md#ensemble-models)
+* [Extensible backends](https://github.com/triton-inference-server/backend)
+* [HTTP/REST and GRPC inference protocols](../customization_guide/inference_protocols.md)
+* [C API](../customization_guide/inference_protocols.md#in-process-triton-server-api)
+
+Limitations on JetPack 5.0:
+
+* Onnx Runtime backend does not support the OpenVino and TensorRT execution providers.
+The CUDA execution provider is in Beta.
+* The Python backend does not support GPU Tensors and Async BLS.
+* CUDA IPC (shared memory) is not supported. System shared memory however is supported.
+* GPU metrics, GCS storage, S3 storage and Azure storage are not supported.
+
+On JetPack, although HTTP/REST and GRPC inference protocols are supported, for edge
+use cases, direct [C API integration](../customization_guide/inference_protocols.md#in-process-triton-server-api)
+is recommended.
+
+You can download the `.tgz` file for Jetson from the Triton Inference Server
+[release page](https://github.com/triton-inference-server/server/releases) in the
+_"Jetson JetPack Support"_ section.
+
+The `.tgz` file contains the Triton server executable and shared libraries,
+as well as the C++ and Python client libraries and examples.
+
+## Installation and Usage
+
+### Build Dependencies for Triton
+
+The following dependencies must be installed before building Triton server:
+
+```
+apt-get update && \
+        apt-get install -y --no-install-recommends \
+            software-properties-common \
+            autoconf \
+            automake \
+            build-essential \
+            git \
+            libb64-dev \
+            libre2-dev \
+            libssl-dev \
+            libtool \
+            libboost-dev \
+            rapidjson-dev \
+            patchelf \
+            pkg-config \
+            libopenblas-dev \
+            libarchive-dev \
+            zlib1g-dev \
+            python3 \
+            python3-dev \
+            python3-pip
+```
+
+Additional Onnx Runtime dependencies must be installed to build the Onnx Runtime backend:
+
+```
+pip3 install --upgrade flake8 flatbuffers
+```
+
+Additional PyTorch dependencies must be installed to build (and run) the PyTorch backend:
+
+```
+apt-get -y install autoconf \
+            bc \
+            g++-8 \
+            gcc-8 \
+            clang-8 \
+            lld-8
+
+pip3 install --upgrade expecttest xmlrunner hypothesis aiohttp pyyaml scipy ninja typing_extensions protobuf
+```
+
+Apart from these PyTorch dependencies, the PyTorch wheel corresponding to the release must also be installed (for build and runtime):
+
+```
+pip3 install --upgrade https://developer.download.nvidia.com/compute/redist/jp/v50/pytorch/torch-1.12.0a0+2c916ef.nv22.3-cp38-cp38-linux_aarch64.whl
+```
+
+The following dependencies must be installed before building Triton client libraries/examples:
+
+```
+apt-get install -y --no-install-recommends \
+            curl \
+            jq
+
+pip3 install --upgrade wheel setuptools cython && \
+    pip3 install --upgrade grpcio-tools numpy attrdict pillow
+```
+
+**Note**: OpenCV 4.2.0 is installed as a part of JetPack. It is one of the dependencies for the client build.
+
+**Note**: When building Triton on Jetson, you will require a recent version of cmake.
+We recommend using cmake 3.25.2. Below is a script to upgrade your cmake version to 3.25.2.
+
+```
+apt remove cmake
+# Using CMAKE installation instruction from:: https://apt.kitware.com/
+apt update && apt install -y gpg wget && \
+      wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | \
+            gpg --dearmor - |  \
+            tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null && \
+      . /etc/os-release && \
+      echo "deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ $UBUNTU_CODENAME main" | \
+      tee /etc/apt/sources.list.d/kitware.list >/dev/null && \
+      apt-get update && \
+      apt-get install -y --no-install-recommends cmake cmake-data
+```
+
+### Runtime Dependencies for Triton
+
+The following runtime dependencies must be installed before running Triton server:
+
+```
+apt-get update && \
+        apt-get install -y --no-install-recommends \
+        libb64-0d \
+        libre2-9 \
+        libssl1.1 \
+        rapidjson-dev \
+        libopenblas-dev \
+        libarchive-dev \
+        zlib1g \
+        python3 \
+        python3-dev \
+        python3-pip
+```
+
+The following runtime dependencies must be installed before running Triton client:
+
+```
+apt-get update && \
+        apt-get install -y --no-install-recommends \
+        curl \
+        jq
+
+pip3 install --upgrade wheel setuptools && \
+    pip3 install --upgrade grpcio-tools numpy attrdict pillow
+```
+
+The PyTorch runtime dependencies are the same as the build dependencies listed above.
+
+### Usage
+
+**Note**: The PyTorch backend depends on libomp.so, which is not loaded automatically.
+If using the PyTorch backend in Triton, you need to set the LD_LIBRARY_PATH to allow
+libomp.so to be loaded as needed before launching Triton.
+
+```
+LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/lib/llvm-8/lib"
+```
+
+**Note**: On Jetson, the backend directory must be explicitly specified using the
+`--backend-directory` flag. Starting from 23.04, Triton no longer supports
+TensorFlow 1.x. If you'd like to use TensorFlow 1.x with Triton prior to 23.04,
+a version string is required to use TensorFlow 1.x.
+
+```
+tritonserver --model-repository=/path/to/model_repo --backend-directory=/path/to/tritonserver/backends \
+             --backend-config=tensorflow,version=2
+```
+
+**Note**:
+[perf_analyzer](https://github.com/triton-inference-server/client/blob/main/src/c++/perf_analyzer/README.md)
+is supported on Jetson, while the [model_analyzer](model_analyzer.md) is
+currently not available for Jetson. To execute `perf_analyzer` for C API, use
+the CLI flag `--service-kind=triton_c_api`:
+
+```shell
+perf_analyzer -m graphdef_int32_int32_int32 --service-kind=triton_c_api \
+    --triton-server-directory=/opt/tritonserver \
+    --model-repository=/workspace/qa/L0_perf_analyzer_capi/models
+```
+
+Refer to these [examples](../examples/jetson/README.md) that demonstrate how to use Triton Inference Server on Jetson.
diff --git a/docs/user_guide/metrics.md b/docs/user_guide/metrics.md
new file mode 100644
index 0000000000..1e70bac86c
--- /dev/null
+++ b/docs/user_guide/metrics.md
@@ -0,0 +1,361 @@
+<!--
+# Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Metrics
+
+Triton provides [Prometheus](https://prometheus.io/) metrics
+indicating GPU and request statistics. By default, these metrics are
+available at http://localhost:8002/metrics. The metrics are only
+available by accessing the endpoint, and are not pushed or published
+to any remote server. The metric format is plain text so you can view
+them directly, for example:
+
+```
+$ curl localhost:8002/metrics
+```
+
+The `tritonserver --allow-metrics=false` option can be used to disable
+all metric reporting, while the `--allow-gpu-metrics=false` and
+`--allow-cpu-metrics=false` can be used to disable just the GPU and CPU
+metrics respectively.
+
+The `--metrics-port` option can be used to select a different port. By default,
+Triton reuses the `--http-address` option for the metrics endpoint and binds the
+http and metrics endpoints to the same specific address when http service is
+enabled. If http service is not enabled, the metric address will bind to `0.0.0.0`
+by default. To uniquely specify the metric endpoint, `--metrics-address` option
+can be used. See the `tritonserver --help` output for more info on these CLI options.
+
+To change the interval at which metrics are polled/updated, see the `--metrics-interval-ms` flag. Metrics that are updated "Per Request" are unaffected by this interval setting. This interval only applies to metrics that are designated as "Per Interval" in the tables of each section below:
+
+- [Inference Request Metrics](#inference-request-metrics)
+- [GPU Metrics](#gpu-metrics)
+- [CPU Metrics](#cpu-metrics)
+- [Pinned Memory Metrics](#pinned-memory-metrics)
+- [Response Cache Metrics](#response-cache-metrics)
+- [Custom Metrics](#custom-metrics)
+
+## Inference Request Metrics
+
+### Counts
+
+For models that do not support batching, *Request Count*, *Inference
+Count* and *Execution Count* will be equal, indicating that each
+inference request is executed separately.
+
+For models that support batching, the count metrics can be interpreted
+to determine average batch size as *Inference Count* / *Execution
+Count*. The count metrics are illustrated by the following examples:
+
+* Client sends a single batch-1 inference request. *Request Count* =
+  1, *Inference Count* = 1, *Execution Count* = 1.
+
+* Client sends a single batch-8 inference request. *Request Count* =
+  1, *Inference Count* = 8, *Execution Count* = 1.
+
+* Client sends 2 requests: batch-1 and batch-8. Dynamic batcher is not
+  enabled for the model. *Request Count* = 2, *Inference Count* = 9,
+  *Execution Count* = 2.
+
+* Client sends 2 requests: batch-1 and batch-1. Dynamic batcher is
+  enabled for the model and the 2 requests are dynamically batched by
+  the server. *Request Count* = 2, *Inference Count* = 2, *Execution
+  Count* = 1.
+
+* Client sends 2 requests: batch-1 and batch-8. Dynamic batcher is
+  enabled for the model and the 2 requests are dynamically batched by
+  the server. *Request Count* = 2, *Inference Count* = 9, *Execution
+  Count* = 1.
+
+|Category      |Metric          |Metric Name |Description                            |Granularity|Frequency    |
+|--------------|----------------|------------|---------------------------|-----------|-------------|
+|Count         |Success Count   |`nv_inference_request_success` |Number of successful inference requests received by Triton (each request is counted as 1, even if the request contains a batch) |Per model  |Per request  |
+|              |Failure Count   |`nv_inference_request_failure` |Number of failed inference requests received by Triton (each request is counted as 1, even if the request contains a batch) |Per model  |Per request  |
+|              |Inference Count |`nv_inference_count` |Number of inferences performed (a batch of "n" is counted as "n" inferences, does not include cached requests)|Per model|Per request|
+|              |Execution Count |`nv_inference_exec_count` |Number of inference batch executions (see [Inference Request Metrics](#inference-request-metrics), does not include cached requests)|Per model|Per request|
+|              |Pending Request Count |`nv_inference_pending_request_count` |Number of inference requests awaiting execution by a backend. This number is incremented when a request is enqueued to the server (`TRITONSERVER_ServerInferAsync`) and is decremented when a backend is about to start executing the request. More details can be found below. |Per model|Per request|
+
+#### Pending Request Count (Queue Size) Per-Model
+
+The *Pending Request Count* reflects the number of requests that have been
+received by Triton core via `TRITONSERVER_InferAsync`, but have not yet
+started execution by a backend model instance
+(`TRITONBACKEND_ModelInstanceExecute`).
+
+For all intents and purposes, the
+"pending request count" and "queue size" per-model can be used
+interchangeably, and the number reflected in the metric should
+intuitively represent the number of requests that are not currently
+being executed by any model instances. In simple terms, if you send a 100
+requests to a model that can only handle 5 requests concurrently, then you
+should see a pending count of 95 for that model in most cases.
+
+For those interested in more technical details, the term "pending request count"
+is a bit more accurate than "queue size" because Triton is highly configurable,
+and there are many places in Triton that a request be considered pending rather
+than a single queue. Some of the most common will be called out below:
+- Default Scheduler backlogs any requests not currently executing.
+  - Assuming 1 available model instance with the default scheduler settings,
+    and 10 requests are sent in rapid succession.
+  - The 1st request should be picked up for
+    execution immediately, and the remaining 9 requests should be considered
+    pending for this model, until the 1st request is finished. Afterwards, the
+    next request should be picked up and the pending count should be decremented
+    to 8, and so on until all requests are finished and the pending count is 0.
+- Dynamic Batcher queue for dynamically creating batches from requests.
+  - Assuming 1 available model instance with the dynamic batch scheduler
+    configured with `max_batch_size: 4` and a sufficiently large
+    `max_queue_delay_microseconds` (or queue of requests),
+    and 10 requests are sent in rapid succession.
+  - The first 4 requests, or as large of a batch the scheduler could form,
+    should be picked up for execution immediately, and the remaining 6 requests
+    should be considered pending. After the batch finishes, the next batch
+    should be picked up, decrementing the pending count again to 2 pending.
+    Then finally since only 2 requests remain, the final 2 requests will be
+    batched and picked up by the backend, decrementing the pending count to 0.
+- Sequence Batcher queues and backlogs for ongoing sequence requests, some may
+  be assigned sequence slots, some may not.
+  - Sequence Batchers of both strategies (direct and oldest) will have pending
+    counts that generally follow the same trend as the dynamic batching
+    description above. The sequence batchers will immediately execute as many
+    requests in a batch as it can based on the model/scheduler config settings,
+    and any further requests will be considered pending until the previous batch
+    finishes and the next batch can start.
+- Rate Limiter queues for prepared batches of requests.
+  - When rate limiting is enabled, requests can be held back from execution
+    to satisfy the rate limit constraints that were configured.
+
+There are some places where a request would not be considered pending:
+- Ensemble Scheduler
+  - The Ensemble Scheduler almost immediately enqueues any requests it receives
+    into the composing model schedulers at the first step in the ensemble.
+    Therefore, the requests could be considered pending by the composing model
+    scheduler's, however from the ensemble's perspective, these requests have been
+    scheduled.
+- Frontends (HTTP/GRPC Servers)
+  - Any requests sent from a client to a frontend server in-front of Triton
+    may spend some time in the corresponding server's code mapping
+    protocol-specific metadata to Triton metadata. Though this time is
+    generally brief, it will not be considered pending from Triton's
+    perspective until Triton core has received the request from the frontend.
+
+### Latencies
+
+Starting in 23.04, Triton exposes the ability to choose the types of metrics
+that are published through the `--metrics-config` CLI options.
+
+#### Counters
+
+By default, the following
+[Counter](https://prometheus.io/docs/concepts/metric_types/#counter)
+metrics are used for latencies:
+
+|Category      |Metric          |Metric Name |Description                            |Granularity|Frequency    |
+|--------------|----------------|------------|---------------------------|-----------|-------------|
+|Latency       |Request Time    |`nv_inference_request_duration_us` |Cumulative end-to-end inference request handling time (includes cached requests) |Per model  |Per request  |
+|              |Queue Time      |`nv_inference_queue_duration_us` |Cumulative time requests spend waiting in the scheduling queue (includes cached requests) |Per model  |Per request  |
+|              |Compute Input Time|`nv_inference_compute_input_duration_us` |Cumulative time requests spend processing inference inputs (in the framework backend, does not include cached requests)     |Per model  |Per request  |
+|              |Compute Time    |`nv_inference_compute_infer_duration_us` |Cumulative time requests spend executing the inference model (in the framework backend, does not include cached requests)     |Per model  |Per request  |
+|              |Compute Output Time|`nv_inference_compute_output_duration_us` |Cumulative time requests spend processing inference outputs (in the framework backend, does not include cached requests)     |Per model  |Per request  |
+
+To disable these metrics specifically, you can set `--metrics-config counter_latencies=false`
+
+#### Summaries
+
+> **Note**
+>
+> The following Summary feature is experimental for the time being and may be
+> subject to change based on user feedback.
+
+To get configurable quantiles over a sliding time window, Triton supports
+a set a [Summary](https://prometheus.io/docs/concepts/metric_types/#summary)
+metrics for latencies as well. These metrics are disabled by default, but can
+be enabled by setting `--metrics-config summary_latencies=true`.
+
+For more information on how the quantiles are calculated, see
+[this explanation](https://grafana.com/blog/2022/03/01/how-summary-metrics-work-in-prometheus/).
+
+The following summary metrics are available:
+
+|Category      |Metric          |Metric Name |Description                            |Granularity|Frequency    |
+|--------------|----------------|------------|---------------------------|-----------|-------------|
+|Latency       |Request Time    |`nv_inference_request_summary_us` |Summary of end-to-end inference request handling times (includes cached requests) |Per model  |Per request  |
+|              |Queue Time      |`nv_inference_queue_summary_us` |Summary of time requests spend waiting in the scheduling queue (includes cached requests) |Per model  |Per request  |
+|              |Compute Input Time|`nv_inference_compute_input_summary_us` |Summary time requests spend processing inference inputs (in the framework backend, does not include cached requests)     |Per model  |Per request  |
+|              |Compute Time    |`nv_inference_compute_infer_summary_us` |Summary of time requests spend executing the inference model (in the framework backend, does not include cached requests)     |Per model  |Per request  |
+|              |Compute Output Time|`nv_inference_compute_output_summary_us` |Summary of time requests spend processing inference outputs (in the framework backend, does not include cached requests)     |Per model  |Per request  |
+
+Each summary above is actually composed of several sub-metrics. For each
+metric, there is a set of `quantile` metrics tracking the latency for each
+quantile. Additionally, there are `_count` and `_sum` metrics that aggregate
+the count and observed values for each. For example, see the following
+information exposed by the Inference Queue Summary metrics:
+```
+# HELP nv_inference_queue_summary_us Summary of inference queuing duration in microseconds (includes cached requests)
+# TYPE nv_inference_queue_summary_us summary
+nv_inference_queue_summary_us_count{model="my_model",version="1"} 161
+nv_inference_queue_summary_us_sum{model="my_model",version="1"} 11110
+nv_inference_queue_summary_us{model="my_model",version="1",quantile="0.5"} 55
+nv_inference_queue_summary_us{model="my_model",version="1",quantile="0.9"} 97
+nv_inference_queue_summary_us{model="my_model",version="1",quantile="0.95"} 98
+nv_inference_queue_summary_us{model="my_model",version="1",quantile="0.99"} 101
+nv_inference_queue_summary_us{model="my_model",version="1",quantile="0.999"} 101
+```
+
+The count and sum for the summary above show that stats have been recorded for
+161 requests, and took a combined total of 11110 microseconds. The `_count` and
+`_sum` of a summary should generally match the counter metric equivalents when
+applicable, such as:
+```
+nv_inference_request_success{model="my_model",version="1"} 161
+nv_inference_queue_duration_us{model="my_model",version="1"} 11110
+```
+
+Triton has a set of default quantiles to track, as shown above. To set
+custom quantiles, you can use the `--metrics-config` CLI option. The format is:
+```
+tritonserver --metrics-config summary_quantiles="<quantile1>:<error1>,...,<quantileN>:<errorN>"`
+```
+
+For example:
+```
+tritonserver --metrics-config summary_quantiles="0.5:0.05,0.9:0.01,0.95:0.001,0.99:0.001"`
+```
+
+To better understand the setting of error values for computing each quantile, see the
+[best practices for histograms and summaries](https://prometheus.io/docs/practices/histograms/#histograms-and-summaries).
+
+
+## GPU Metrics
+
+GPU metrics are collected through the use of [DCGM](https://developer.nvidia.com/dcgm).
+Collection of GPU metrics can be toggled with the `--allow-gpu-metrics` CLI flag.
+If building Triton locally, the `TRITON_ENABLE_METRICS_GPU` CMake build flag can be used to toggle building the relevant code entirely.
+
+|Category        |Metric            |Metric Name                 |Description                                            |Granularity|Frequency    |
+|----------------|------------------|----------------------------|-------------------------------------------------------|-----------|-------------|
+|GPU Utilization |Power Usage       |`nv_gpu_power_usage`        |GPU instantaneous power, in watts                      |Per GPU    |Per interval |
+|                |Power Limit       |`nv_gpu_power_limit`        |Maximum GPU power limit, in watts                      |Per GPU    |Per interval |
+|                |Energy Consumption|`nv_energy_consumption`     |GPU energy consumption since Triton started, in joules |Per GPU    |Per interval |
+|                |GPU Utilization   |`nv_gpu_utilization`        |GPU utilization rate (0.0 - 1.0)                       |Per GPU    |Per interval |
+|GPU Memory      |GPU Total Memory  |`nv_gpu_memory_total_bytes` |Total GPU memory, in bytes                             |Per GPU    |Per interval |
+|                |GPU Used Memory   |`nv_gpu_memory_used_bytes`  |Used GPU memory, in bytes                              |Per GPU    |Per interval |
+
+
+## CPU Metrics
+
+Collection of CPU metrics can be toggled with the `--allow-cpu-metrics` CLI flag.
+If building Triton locally, the `TRITON_ENABLE_METRICS_CPU` CMake build flag can be used to toggle building the relevant code entirely.
+
+> **Note**
+>
+> CPU Metrics are currently only supported on Linux.
+> They collect information from the [/proc filesystem](https://www.kernel.org/doc/html/latest/filesystems/proc.html) such as `/proc/stat` and `/proc/meminfo`.
+
+|Category      |Metric          |Metric Name |Description                            |Granularity|Frequency    |
+|--------------|----------------|------------|---------------------------|-----------|-------------|
+|CPU Utilization | CPU Utilization | `nv_cpu_utilization` | Total CPU utilization rate [0.0 - 1.0] | Aggregated across all cores since last interval | Per interval |
+|CPU Memory      | CPU Total Memory | `nv_cpu_memory_total_bytes` | Total CPU memory (RAM), in bytes | System-wide | Per interval |
+|                | CPU Used Memory | `nv_cpu_memory_used_bytes` | Used CPU memory (RAM), in bytes | System-wide | Per interval |
+
+## Pinned Memory Metrics
+
+Starting in 24.01, Triton offers Pinned Memory metrics to monitor the utilization of the Pinned Memory pool.
+
+|Category        |Metric            |Metric Name                 |Description                                            |Granularity|Frequency    |
+|----------------|------------------|----------------------------|-------------------------------------------------------|-----------|-------------|
+|Pinned Memory   |Total Pinned memory |`nv_pinned_memory_pool_total_bytes`        |Total Pinned memory, in bytes                      |All models    |Per interval |
+|                |Used Pinned memory |`nv_pinned_memory_pool_used_bytes`        |Used Pinned memory, in bytes                      |All models    |Per interval |
+
+## Response Cache Metrics
+
+Cache metrics can be reported in two ways:
+
+1. A base set of cache metrics will be reported
+by Triton directly, such as the cache hit/miss counts and durations described
+below.
+
+2. As of 23.03, additional cache metrics may be reported depending on the
+[cache implementation](response_cache.md#cache-implementations)
+being used through Triton's [Metrics API](#custom-metrics).
+
+### Triton-reported Response Cache Metrics
+
+Compute latency metrics in the
+[Inference Request Metrics table](#inference-request-metrics) above are
+calculated for the time spent in model inference backends. If the response
+cache is enabled for a given model (see [Response Cache](response_cache.md)
+docs for more info), total inference times may be affected by response cache
+lookup times.
+
+On cache hits, "Cache Hit Time" indicates the time spent looking up the
+response, and "Compute Input Time" /  "Compute Time" / "Compute Output Time"
+are not recorded.
+
+On cache misses, "Cache Miss Time" indicates the time spent looking up
+the request hash and inserting the computed output tensor data into the cache.
+Otherwise, "Compute Input Time" /  "Compute Time" / "Compute Output Time" will
+be recorded as usual.
+
+|Category      |Metric          |Metric Name |Description                            |Granularity|Frequency    |
+|--------------|----------------|------------|---------------------------|-----------|-------------|
+|Count         |Cache Hit Count |`nv_cache_num_hits_per_model` |Number of response cache hits per model |Per model |Per request |
+|              |Cache Miss Count |`nv_cache_num_misses_per_model` |Number of response cache misses per model |Per model |Per request |
+|Latency       |Cache Hit Time |`nv_cache_hit_duration_per_model` |Cumulative time requests spend retrieving a cached response per model on cache hits (microseconds) |Per model |Per request |
+|              |Cache Miss Time |`nv_cache_miss_duration_per_model` |Cumulative time requests spend looking up and inserting responses into the cache on a cache miss (microseconds) |Per model |Per request |
+
+Similar to the Summaries section above for Inference Request Metrics, the
+per-model cache hit/miss latency metrics also support Summaries.
+
+> **Note**
+>
+> For models with response caching enabled, the inference request **summary** metric
+> is currently disabled. This is due to extra time spent internally on cache
+> management that wouldn't be reflected correctly in the end to end request time.
+> Other summary metrics are unaffected.
+
+## Custom Metrics
+
+Triton exposes a C API to allow users and backends to register and collect
+custom metrics with the existing Triton metrics endpoint. The user takes the
+ownership of the custom metrics created through the APIs and must manage their
+lifetime following the API documentation.
+
+The
+[identity_backend](https://github.com/triton-inference-server/identity_backend/blob/main/README.md#custom-metric-example)
+demonstrates a practical example of adding a custom metric to a backend.
+
+Further documentation can be found in the `TRITONSERVER_MetricFamily*` and
+`TRITONSERVER_Metric*` API annotations in
+[tritonserver.h](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritonserver.h).
+
+### TensorRT-LLM Backend Metrics
+
+The TRT-LLM backend uses the custom metrics API to track and expose specific metrics about
+LLMs, KV Cache, and Inflight Batching to Triton:
+https://github.com/triton-inference-server/tensorrtllm_backend?tab=readme-ov-file#triton-metrics
diff --git a/docs/user_guide/model_analyzer.md b/docs/user_guide/model_analyzer.md
new file mode 100644
index 0000000000..663a8a277a
--- /dev/null
+++ b/docs/user_guide/model_analyzer.md
@@ -0,0 +1,45 @@
+<!--
+# Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Model Analyzer
+
+The Triton [Model Analyzer](https://github.com/triton-inference-server/model_analyzer)
+ is a tool that uses
+[Performance Analyzer](https://github.com/triton-inference-server/client/blob/main/src/c++/perf_analyzer/README.md)
+to send requests to your model while measuring GPU memory and compute
+utilization. The Model Analyzer is specifically useful for characterizing the
+GPU memory requirements for your model under different batching and model
+instance configurations. Once you have this GPU memory usage information you can
+more intelligently decide on how to combine multiple models on the same GPU
+while remaining within the memory capacity of the GPU.
+
+For more detailed examples and explanations of using Model Analyzer, see:
+- [Model Analyzer Conceptual Guide](https://github.com/triton-inference-server/tutorials/tree/main/Conceptual_Guide/Part_3-optimizing_triton_configuration)
+- [Maximizing Deep Learning
+Inference Performance with NVIDIA Model
+Analyzer](https://developer.nvidia.com/blog/maximizing-deep-learning-inference-performance-with-nvidia-model-analyzer)
\ No newline at end of file
diff --git a/docs/user_guide/model_configuration.md b/docs/user_guide/model_configuration.md
new file mode 100644
index 0000000000..023fcf259b
--- /dev/null
+++ b/docs/user_guide/model_configuration.md
@@ -0,0 +1,1118 @@
+<!--
+# Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Model Configuration
+
+**Is this your first time writing a config file?** Check out
+[this guide](https://github.com/triton-inference-server/tutorials/tree/main/Conceptual_Guide/Part_1-model_deployment#model-configuration)
+ or this
+[example](https://github.com/triton-inference-server/tutorials/tree/main/HuggingFace#examples)!
+
+Each model in a [model repository](model_repository.md) must include a
+model configuration that provides required and optional information
+about the model. Typically, this configuration is provided in a
+config.pbtxt file specified as [ModelConfig
+protobuf](https://github.com/triton-inference-server/common/blob/main/protobuf/model_config.proto).
+In some cases, discussed in [Auto-Generated Model
+Configuration](#auto-generated-model-configuration), the model
+configuration can be generated automatically by Triton and so does not
+need to be provided explicitly.
+
+This section describes the most important model configuration
+properties but the documentation in the [ModelConfig
+protobuf](https://github.com/triton-inference-server/common/blob/main/protobuf/model_config.proto)
+should also be consulted.
+
+## Minimal Model Configuration
+
+A minimal model configuration must specify the [*platform* and/or
+*backend*
+properties](https://github.com/triton-inference-server/backend/blob/main/README.md#backends),
+the *max_batch_size* property, and the input and output tensors of the
+model.
+
+As an example consider a TensorRT model that has two inputs, *input0*
+and *input1*, and one output, *output0*, all of which are 16 entry
+float32 tensors. The minimal configuration is:
+
+```
+  platform: "tensorrt_plan"
+  max_batch_size: 8
+  input [
+    {
+      name: "input0"
+      data_type: TYPE_FP32
+      dims: [ 16 ]
+    },
+    {
+      name: "input1"
+      data_type: TYPE_FP32
+      dims: [ 16 ]
+    }
+  ]
+  output [
+    {
+      name: "output0"
+      data_type: TYPE_FP32
+      dims: [ 16 ]
+    }
+  ]
+```
+
+### Name, Platform and Backend
+
+The model configuration *name* property is optional. If the name of
+the model is not specified in the configuration it is assumed to be
+the same as the model repository directory containing the model. If
+*name* is specified it must match the name of the model repository
+directory containing the model.  The required values for *platform*
+and *backend* are described in the [backend
+documentation](https://github.com/triton-inference-server/backend/blob/main/README.md#backends).
+
+### Model Transaction Policy
+
+The *model_transaction_policy* property describes the nature of
+transactions expected from the model.
+
+#### Decoupled
+
+This boolean setting indicates whether responses generated by
+the model are [decoupled](./decoupled_models.md)
+with the requests issued to it. Using decoupled means the number of
+responses generated by the model may differ from number of requests
+issued, and the responses may be out of order relative to the order
+of requests. The default is false, which means the model will
+generate exactly one response for each request.
+
+### Maximum Batch Size
+
+The *max_batch_size* property indicates the maximum batch size that
+the model supports for the [types of
+batching](architecture.md#models-and-schedulers) that can be exploited
+by Triton. If the model's batch dimension is the first dimension, and
+all inputs and outputs to the model have this batch dimension, then
+Triton can use its [dynamic batcher](#dynamic-batcher) or [sequence
+batcher](#sequence-batcher) to automatically use batching with the
+model. In this case *max_batch_size* should be set to a value
+greater-or-equal-to 1 that indicates the maximum batch size that
+Triton should use with the model.
+
+For models that do not support batching, or do not support batching in
+the specific ways described above, *max_batch_size* must be set to
+zero.
+
+
+### Inputs and Outputs
+
+Each model input and output must specify a name, datatype, and shape.
+The name specified for an input or output tensor must match the name
+expected by the model.
+
+#### Special Conventions for PyTorch Backend
+
+**Naming Convention:**
+
+Due to the absence of sufficient metadata for inputs/outputs in TorchScript
+model files, the "name" attribute of inputs/outputs in the configuration must
+follow specific naming conventions. These are detailed below.
+
+1. [Only for Inputs] When the input is not a Dictionary of Tensors, the input
+names in the configuration file should mirror the names of the input arguments to
+the forward function in the model's definition.
+
+For example, if the forward function for the Torchscript model was defined as
+`forward(self, input0, input1)`, the first and second inputs should be named
+"input0" and "input1" respectively.
+
+2. `<name>__<index>`: Where \<name\> can be any string and \<index\> is an
+integer index that refers to the position of the corresponding input/output.
+
+This means that if there are two inputs and two outputs, the first and second
+inputs can be named "INPUT__0" and "INPUT__1" and the first and second outputs
+can be named "OUTPUT__0" and "OUTPUT__1" respectively.
+
+3. If all inputs (or outputs) do not follow the same naming convention, then we
+enforce strict ordering from the model configuration i.e. we assume the order of
+inputs (or outputs) in the configuration is the true ordering of these inputs.
+
+***Dictionary of Tensors as Input:***
+
+The PyTorch backend supports passing of inputs to the model in the form of a
+Dictionary of Tensors. This is only supported when there is a *single* input to
+the model of type Dictionary that contains a mapping of string to tensor. As an
+example, if there is a model that expects the input of the form:
+
+```
+{'A': tensor1, 'B': tensor2}
+```
+
+The input names in the configuration in this case must not follow the above
+naming conventions `<name>__<index>`. Instead, the names of the inputs in this
+case must map to the string value 'key' for that specific tensor. For this case,
+the inputs would be "A" and "B", where input "A" refers to value corresponding to
+tensor1 and "B" refers to the value corresponding to tensor2.
+
+<br>
+
+The datatypes allowed for input and output tensors varies based on the
+type of the model. Section [Datatypes](#datatypes) describes the
+allowed datatypes and how they map to the datatypes of each model
+type.
+
+An input shape indicates the shape of an input tensor expected by the
+model and by Triton in inference requests. An output shape indicates
+the shape of an output tensor produced by the model and returned by
+Triton in response to an inference request. Both input and output
+shape must have rank greater-or-equal-to 1, that is, the empty shape
+**[ ]** is not allowed.
+
+Input and output shapes are specified by a combination of
+*max_batch_size* and the dimensions specified by the input or output
+*dims* property. For models with *max_batch_size* greater-than 0, the
+full shape is formed as [ -1 ] + *dims*. For models with
+*max_batch_size* equal to 0, the full shape is formed as *dims*. For
+example, for the following configuration the shape of "input0" is [
+-1, 16 ] and the shape of "output0" is [ -1, 4 ].
+
+```
+  platform: "tensorrt_plan"
+  max_batch_size: 8
+  input [
+    {
+      name: "input0"
+      data_type: TYPE_FP32
+      dims: [ 16 ]
+    }
+  ]
+  output [
+    {
+      name: "output0"
+      data_type: TYPE_FP32
+      dims: [ 4 ]
+    }
+  ]
+```
+
+For a configuration that is identical except that *max_batch_size*
+equal to 0, the shape of "input0" is [ 16 ] and the shape of "output0"
+is [ 4 ].
+
+```
+  platform: "tensorrt_plan"
+  max_batch_size: 0
+  input [
+    {
+      name: "input0"
+      data_type: TYPE_FP32
+      dims: [ 16 ]
+    }
+  ]
+  output [
+    {
+      name: "output0"
+      data_type: TYPE_FP32
+      dims: [ 4 ]
+    }
+  ]
+```
+
+For models that support input and output tensors with variable-size
+dimensions, those dimensions can be listed as -1 in the input and
+output configuration. For example, if a model requires a 2-dimensional
+input tensor where the first dimension must be size 4 but the second
+dimension can be any size, the model configuration for that input
+would include *dims: [ 4, -1 ]*. Triton would then accept inference
+requests where that input tensor's second dimension was any value
+greater-or-equal-to 0. The model configuration can be more restrictive
+than what is allowed by the underlying model. For example, even though
+the framework model itself allows the second dimension to be any size,
+the model configuration could be specified as *dims: [ 4, 4 ]*. In
+this case, Triton would only accept inference requests where the input
+tensor's shape was exactly *[ 4, 4 ]*.
+
+The [*reshape* property](#reshape) must be used if there is a mismatch
+between the input shape that Triton receives in an inference request
+and the input shape expected by the model. Similarly, the *reshape*
+property must be used if there is a mismatch between the output shape
+produced by the model and the shape that Triton returns in a response
+to an inference request.
+
+Model inputs can specify `allow_ragged_batch` to indicate that the
+input is a [ragged input](ragged_batching.md#ragged-batching). The field is
+used with [dynamic batcher](#dynamic-batcher) to allow batching without
+enforcing the input to have the same shape in all requests.
+
+## Auto-Generated Model Configuration
+
+The model configuration file containing the required
+settings must be available with each model to be deployed
+on Triton. In some cases the required portions of the model
+configuration can be generated automatically by Triton. The
+required portion of the model configuration are the settings
+shown in the [Minimal Model Configuration](#minimal-model-configuration).
+By default, Triton will try to complete these sections. However,
+by starting Triton with `--disable-auto-complete-config` option,
+Triton can be configured to not auto-complete model configuration
+on the backend side. However, even with this option Triton will
+fill in missing [`instance_group`](#instance-groups) settings with
+default values.
+
+Triton can derive all the required settings automatically for
+most of the TensorRT, TensorFlow saved-model, ONNX models, and OpenVINO models.
+For Python models, [`auto_complete_config`](https://github.com/triton-inference-server/python_backend/#auto_complete_config)
+function can be implemented in Python backend to provide
+[`max_batch_size`](#maximum-batch-size), [`input`](#inputs-and-outputs)
+and [`output`](#inputs-and-outputs) properties using `set_max_batch_size`,
+`add_input`, and `add_output` functions. These properties will allow Triton
+to load the Python model with [Minimal Model Configuration](#minimal-model-configuration)
+in absence of a configuration file.
+All other model types *must* provide a model configuration file.
+
+When developing a custom backend, you can populate required settings
+in the configuration and call `TRITONBACKEND_ModelSetConfig` API to
+update completed configuration with Triton core. You can take a
+look at [TensorFlow](https://github.com/triton-inference-server/tensorflow_backend)
+and [Onnxruntime](https://github.com/triton-inference-server/onnxruntime_backend)
+backends as examples of how to achieve this. Currently, only
+[inputs, outputs](#inputs-and-outputs), [max_batch_size](#maximum-batch-size)
+and [dynamic batching](#dynamic-batcher) settings can be populated by
+backend. For custom backends, your config.pbtxt file must
+include a `backend` field or your model name must be in the
+form `<model_name>.<backend_name>`.
+
+You can also see the model configuration generated for a model by
+Triton using the [model configuration endpoint](../protocol/extension_model_configuration.md). The
+easiest way to do this is to use a utility like *curl*:
+
+```bash
+$ curl localhost:8000/v2/models/<model name>/config
+```
+
+This will return a JSON representation of the generated model
+configuration. From this you can take the max_batch_size, inputs, and
+outputs sections of the JSON and convert it to a config.pbtxt file.
+Triton only generates the [minimal portion of the model
+configuration](#minimal-model-configuration). You must still provide
+the optional portions of the model configuration by editing the
+config.pbtxt file.
+
+### Default Max Batch Size and Dynamic Batcher
+
+When a model is using the auto-complete feature, a default maximum
+batch size may be set by using the `--backend-config=default-max-batch-size=<int>`
+command line argument. This allows all models which are capable of
+batching and which make use of [Auto Generated Model Configuration](#auto-generated-model-configuration)
+to have a default maximum batch size. This value is set to 4 by
+default. Backend developers may make use of this default-max-batch-size
+by obtaining it from the TRITONBACKEND_BackendConfig api. Currently, the
+following backends which utilize these default batch values and turn on
+dynamic batching in their generated model configurations are:
+
+1. [TensorFlow backend](https://github.com/triton-inference-server/tensorflow_backend)
+2. [Onnxruntime backend](https://github.com/triton-inference-server/onnxruntime_backend)
+3. [TensorRT backend](https://github.com/triton-inference-server/tensorrt_backend)
+   1. TensorRT models store the maximum batch size explicitly and do not make use
+   of the default-max-batch-size parameter. However, if max_batch_size > 1
+   and no [scheduler](model_configuration.md#scheduling-and-batching)
+   is provided, the dynamic batch scheduler will be enabled.
+
+If a value greater than 1 for the maximum batch size is set for the
+model, the [dynamic_batching](#dynamic-batcher) config will be set
+if no scheduler is provided in the configuration file.
+
+
+## Datatypes
+
+The following table shows the tensor datatypes supported by
+Triton. The first column shows the name of the datatype as it appears
+in the model configuration file. The next four columns show the
+corresponding datatype for supported model frameworks. If a model
+framework does not have an entry for a given datatype, then Triton
+does not support that datatype for that model. The sixth column,
+labeled "API", shows the corresponding datatype for the TRITONSERVER C
+API, TRITONBACKEND C API, HTTP/REST protocol and GRPC protocol. The
+last column shows the corresponding datatype for the Python numpy
+library.
+
+|Model Config  |TensorRT      |TensorFlow    |ONNX Runtime  |PyTorch  |API      |NumPy         |
+|--------------|--------------|--------------|--------------|---------|---------|--------------|
+|TYPE_BOOL     | kBOOL        |DT_BOOL       |BOOL          |kBool    |BOOL     |bool          |
+|TYPE_UINT8    | kUINT8       |DT_UINT8      |UINT8         |kByte    |UINT8    |uint8         |
+|TYPE_UINT16   |              |DT_UINT16     |UINT16        |         |UINT16   |uint16        |
+|TYPE_UINT32   |              |DT_UINT32     |UINT32        |         |UINT32   |uint32        |
+|TYPE_UINT64   |              |DT_UINT64     |UINT64        |         |UINT64   |uint64        |
+|TYPE_INT8     | kINT8        |DT_INT8       |INT8          |kChar    |INT8     |int8          |
+|TYPE_INT16    |              |DT_INT16      |INT16         |kShort   |INT16    |int16         |
+|TYPE_INT32    | kINT32       |DT_INT32      |INT32         |kInt     |INT32    |int32         |
+|TYPE_INT64    |              |DT_INT64      |INT64         |kLong    |INT64    |int64         |
+|TYPE_FP16     | kHALF        |DT_HALF       |FLOAT16       |         |FP16     |float16       |
+|TYPE_FP32     | kFLOAT       |DT_FLOAT      |FLOAT         |kFloat   |FP32     |float32       |
+|TYPE_FP64     |              |DT_DOUBLE     |DOUBLE        |kDouble  |FP64     |float64       |
+|TYPE_STRING   |              |DT_STRING     |STRING        |         |BYTES    |dtype(object) |
+|TYPE_BF16     |              |              |              |         |BF16     |              |
+
+For TensorRT each value is in the nvinfer1::DataType namespace. For
+example, nvinfer1::DataType::kFLOAT is the 32-bit floating-point
+datatype.
+
+For TensorFlow each value is in the tensorflow namespace. For example,
+tensorflow::DT_FLOAT is the 32-bit floating-point value.
+
+For ONNX Runtime each value is prepended with ONNX_TENSOR_ELEMENT_DATA_TYPE_.
+For example, ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT is the 32-bit floating-point
+datatype.
+
+For PyTorch each value is in the torch namespace. For example, torch::kFloat
+is the 32-bit floating-point datatype.
+
+For Numpy each value is in the numpy module. For example, numpy.float32
+is the 32-bit floating-point datatype.
+
+## Reshape
+
+The *ModelTensorReshape* property on a model configuration input or
+output is used to indicate that the input or output shape accepted by
+the inference API differs from the input or output shape expected or
+produced by the underlying framework model or custom backend.
+
+For an input, *reshape* can be used to reshape the input tensor to a
+different shape expected by the framework or backend. A common
+use-case is where a model that supports batching expects a batched
+input to have shape *[ batch-size ]*, which means that the batch
+dimension fully describes the shape. For the inference API the
+equivalent shape *[ batch-size, 1 ]* must be specified since each
+input must specify a non-empty *dims*. For this case the input should
+be specified as:
+
+```
+  input [
+    {
+      name: "in"
+      dims: [ 1 ]
+      reshape: { shape: [ ] }
+    }
+```
+
+For an output, *reshape* can be used to reshape the output tensor
+produced by the framework or backend to a different shape that is
+returned by the inference API. A common use-case is where a model that
+supports batching expects a batched output to have shape *[ batch-size
+]*, which means that the batch dimension fully describes the
+shape. For the inference API the equivalent shape *[ batch-size, 1 ]*
+must be specified since each output must specify a non-empty
+*dims*. For this case the output should be specified as:
+
+```
+  output [
+    {
+      name: "in"
+      dims: [ 1 ]
+      reshape: { shape: [ ] }
+    }
+```
+
+## Shape Tensors
+
+For models that support shape tensors, the *is_shape_tensor* property
+must be set appropriately for inputs and outputs that are acting as
+shape tensors. The following shows an example configuration that
+specifies shape tensors.
+
+```
+  name: "myshapetensormodel"
+  platform: "tensorrt_plan"
+  max_batch_size: 8
+  input [
+    {
+      name: "input0"
+      data_type: TYPE_FP32
+      dims: [ 1 , 3]
+    },
+    {
+      name: "input1"
+      data_type: TYPE_INT32
+      dims: [ 2 ]
+      is_shape_tensor: true
+    }
+  ]
+  output [
+    {
+      name: "output0"
+      data_type: TYPE_FP32
+      dims: [ 1 , 3]
+    }
+  ]
+```
+
+As discussed above, Triton assumes that batching occurs along the
+first dimension which is not listed in in the input or output tensor
+*dims*. However, for shape tensors, batching occurs at the first shape
+value. For the above example, an inference request must provide inputs
+with the following shapes.
+
+```
+  "input0": [ x, 1, 3]
+  "input1": [ 3 ]
+  "output0": [ x, 1, 3]
+```
+
+Where *x* is the batch size of the request. Triton requires the shape
+tensors to be marked as shape tensors in the model when using
+batching. Note that "input1" has shape *[ 3 ]* and not *[ 2 ]*, which
+is how it is described in model configuration. As `myshapetensormodel`
+model is a batching model, the batch size should be provided as an
+additional value. Triton will accumulate all the shape values together
+for "input1" in batch dimension before issuing the request to model.
+
+For example, assume the client sends following three requests to Triton
+with following inputs:
+
+```
+Request1:
+input0: [[[1,2,3]]] <== shape of this tensor [1,1,3]
+input1: [1,4,6] <== shape of this tensor [3]
+
+Request2:
+input0: [[[4,5,6]], [[7,8,9]]] <== shape of this tensor [2,1,3]
+input1: [2,4,6] <== shape of this tensor [3]
+
+Request3:
+input0: [[[10,11,12]]] <== shape of this tensor [1,1,3]
+input1: [1,4,6] <== shape of this tensor [3]
+```
+
+Assuming these requests get batched together would be delivered to the
+model as:
+
+
+```
+Batched Requests to model:
+input0: [[[1,2,3]], [[4,5,6]], [[7,8,9]], [[10,11,12]]] <== shape of this tensor [4,1,3]
+input1: [4, 4, 6] <== shape of this tensor [3]
+
+```
+
+Currently, only TensorRT supports shape tensors. Read [Shape Tensor I/O](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#shape_tensor_io)
+to learn more about shape tensors.
+
+## Version Policy
+
+Each model can have one or more
+[versions](model_repository.md#model-versions). The
+*ModelVersionPolicy* property of the model configuration is used to
+set one of the following policies.
+
+* *All*: All versions of the model that are available in the model
+  repository are available for inferencing.
+  ```version_policy: { all: {}}```
+
+* *Latest*: Only the latest ‘n’ versions of the model in the
+  repository are available for inferencing. The latest versions of the
+  model are the numerically greatest version numbers.
+  ```version_policy: { latest: { num_versions: 2}}```
+
+* *Specific*: Only the specifically listed versions of the model are
+  available for inferencing.
+  ```version_policy: { specific: { versions: [1,3]}}```
+
+If no version policy is specified, then *Latest* (with n=1) is used as
+the default, indicating that only the most recent version of the model
+is made available by Triton. In all cases, the [addition or removal of
+version subdirectories](model_management.md) from the model repository
+can change which model version is used on subsequent inference
+requests.
+
+The following configuration specifies that all versions of the model
+will be available from the server.
+
+```
+  platform: "tensorrt_plan"
+  max_batch_size: 8
+  input [
+    {
+      name: "input0"
+      data_type: TYPE_FP32
+      dims: [ 16 ]
+    },
+    {
+      name: "input1"
+      data_type: TYPE_FP32
+      dims: [ 16 ]
+    }
+  ]
+  output [
+    {
+      name: "output0"
+      data_type: TYPE_FP32
+      dims: [ 16 ]
+    }
+  ]
+  version_policy: { all { }}
+```
+
+## Instance Groups
+
+Triton can provide multiple [instances of a
+model](architecture.md#concurrent-model-execution) so that multiple
+inference requests for that model can be handled simultaneously. The
+model configuration *ModelInstanceGroup* property is used to specify
+the number of execution instances that should be made available and
+what compute resource should be used for those instances.
+
+### Multiple Model Instances
+
+By default, a single execution instance of the model is created for
+each GPU available in the system. The instance-group setting can be
+used to place multiple execution instances of a model on every GPU or
+on only certain GPUs. For example, the following configuration will
+place two execution instances of the model to be available on each
+system GPU.
+
+```
+  instance_group [
+    {
+      count: 2
+      kind: KIND_GPU
+    }
+  ]
+```
+
+And the following configuration will place one execution instance on
+GPU 0 and two execution instances on GPUs 1 and 2.
+
+```
+  instance_group [
+    {
+      count: 1
+      kind: KIND_GPU
+      gpus: [ 0 ]
+    },
+    {
+      count: 2
+      kind: KIND_GPU
+      gpus: [ 1, 2 ]
+    }
+  ]
+```
+For a more detailed example of using instance groups, see
+ [this guide](https://github.com/triton-inference-server/tutorials/tree/main/Conceptual_Guide/Part_2-improving_resource_utilization#concurrent-model-execution).
+### CPU Model Instance
+
+The instance group setting is also used to enable execution of a model
+on the CPU. A model can be executed on the CPU even if there is a GPU
+available in the system. The following places two execution instances
+on the CPU.
+
+```
+  instance_group [
+    {
+      count: 2
+      kind: KIND_CPU
+    }
+  ]
+```
+
+If no `count` is specified for a KIND_CPU instance group, then the default instance
+count will be 2 for selected backends (Tensorflow and Onnxruntime). All
+other backends will default to 1.
+
+### Host Policy
+
+The instance group setting is associated with a host policy. The following
+configuration will associate all instances created by the instance group setting
+with host policy "policy_0". By default the host policy will be set according to
+the device kind of the instance, for instance, KIND_CPU is "cpu", KIND_MODEL is
+"model", and KIND_GPU is "gpu_\<gpu_id\>".
+
+```
+  instance_group [
+    {
+      count: 2
+      kind: KIND_CPU
+      host_policy: "policy_0"
+    }
+  ]
+```
+
+### Rate Limiter Configuration
+
+Instance group optionally specifies [rate limiter](rate_limiter.md)
+configuration which controls how the rate limiter operates on the
+instances in the group. The rate limiter configuration is ignored if
+rate limiting is off. If rate limiting is on and if an instance_group
+does not provide this configuration, then the execution on the model
+instances belonging to this group will not be limited in any way by
+the rate limiter. The configuration includes the following
+specifications:
+
+#### Resources
+
+The set of [resources](rate_limiter.md#resources) required to execute
+a model instance. The "name" field identifies the resource and "count"
+field refers to the number of copies of the resource that the model
+instance in the group requires to run. The "global" field specifies
+whether the resource is per-device or shared globally across the system.
+Loaded models can not specify a resource with the same name both as global
+and non-global. If no resources are provided then triton assumes the
+execution of model instance does not require any resources and will
+start executing as soon as model instance is available.
+
+#### Priority
+
+Priority serves as a weighting value to be used for prioritizing across
+all the instances of all the models. An instance with priority 2 will be
+given 1/2 the number of scheduling chances as an instance with priority
+1.
+
+The following example specifies the instances in the group requires
+four "R1" and two "R2" resources for execution. Resource "R2" is a global
+resource. Additionally, the rate-limiter priority of the instance_group
+is 2.
+
+```
+  instance_group [
+    {
+      count: 1
+      kind: KIND_GPU
+      gpus: [ 0, 1, 2 ]
+      rate_limiter {
+        resources [
+          {
+            name: "R1"
+            count: 4
+          },
+          {
+            name: "R2"
+            global: True
+            count: 2
+          }
+        ]
+        priority: 2
+      }
+    }
+  ]
+```
+
+The above configuration creates 3 model instances, one on each device
+(0, 1 and 2). The three instances will not contend for "R1" among
+themselves as "R1" is local for their own device, however, they will
+contend for "R2" because it is specified as a global resource which
+means "R2" is shared across the system. Though these instances don't
+contend for "R1" among themselves, but they will contend for "R1"
+with other model instances which includes "R1" in their resource
+requirements and run on the same device as them.
+
+### Ensemble Model Instance Groups
+
+[Ensemble models](architecture.md#ensemble-models)
+are an abstraction Triton uses to execute a user-defined pipeline of models.
+Since there is no physical instance associated with an ensemble model, the
+`instance_group` field can not be specified for it.
+
+However, each composing model that makes up an ensemble can specify
+`instance_group` in its config file and individually support parallel
+execution as described above when the ensemble receives multiple requests.
+
+## CUDA Compute Capability
+
+Similar to the `default_model_filename` field, you can optionally specify the
+`cc_model_filenames` field to map the GPU's
+[CUDA Compute Capability](https://developer.nvidia.com/cuda-gpus)
+to a corresponding model filename at model load time. This is particularly
+useful for TensorRT models, since they are generally tied to a specific
+compute capability.
+
+```
+cc_model_filenames [
+  {
+    key: "7.5"
+    value: "resnet50_T4.plan"
+  },
+  {
+    key: "8.0"
+    value: "resnet50_A100.plan"
+  }
+]
+```
+
+## Scheduling And Batching
+
+Triton supports batch inferencing by allowing individual inference
+requests to specify a batch of inputs. The inferencing for a batch of
+inputs is performed at the same time which is especially important for
+GPUs since it can greatly increase inferencing throughput. In many use
+cases the individual inference requests are not batched, therefore,
+they do not benefit from the throughput benefits of batching.
+
+The inference server contains multiple scheduling and batching
+algorithms that support many different model types and use-cases. More
+information about model types and schedulers can be found in [Models
+And Schedulers](architecture.md#models-and-schedulers).
+
+### Default Scheduler
+
+The default scheduler is used for a model if none of the
+*scheduling_choice* properties are specified in the model
+configuration. The default scheduler simply distributes inference
+requests to all [model instances](#instance-groups) configured for the
+model.
+
+### Dynamic Batcher
+
+Dynamic batching is a feature of Triton that allows inference requests
+to be combined by the server, so that a batch is created
+dynamically. Creating a batch of requests typically results in
+increased throughput. The dynamic batcher should be used for
+[stateless models](architecture.md#stateless-models). The dynamically created
+batches are distributed to all [model instances](#instance-groups)
+configured for the model.
+
+Dynamic batching is enabled and configured independently for each
+model using the *ModelDynamicBatching* property in the model
+configuration. These settings control the preferred size(s) of the
+dynamically created batches, the maximum time that requests can be
+delayed in the scheduler to allow other requests to join the dynamic
+batch, and queue properties such a queue size, priorities, and
+time-outs. Refer to
+[this guide](https://github.com/triton-inference-server/tutorials/tree/main/Conceptual_Guide/Part_2-improving_resource_utilization#what-is-dynamic-batching)
+for a more detailed example of dynamic batching.
+
+#### Recommended Configuration Process
+
+The individual settings are described in detail below. The following
+steps are the recommended process for tuning the dynamic batcher for
+each model. It is also possible to use the [Model
+Analyzer](model_analyzer.md) to automatically search across different
+dynamic batcher configurations.
+
+* Decide on a [maximum batch size](#maximum-batch-size) for the model.
+
+* Add the following to the model configuration to enable the dynamic
+  batcher with all default settings. By default the dynamic batcher
+  will create batches as large as possible up to the maximum batch
+  size and will not [delay](#delayed-batching) when forming batches.
+
+```
+  dynamic_batching { }
+```
+
+* Use the
+  [Performance Analyzer](https://github.com/triton-inference-server/client/blob/main/src/c++/perf_analyzer/README.md)
+  to determine the latency and throughput provided by the default dynamic
+  batcher configuration.
+
+* If the default configuration results in latency values that are
+  within your latency budget, try one or both of the following to
+  trade off increased latency for increased throughput:
+
+  * Increase maximum batch size.
+
+  * Set [batch delay](#delayed-batching) to a non-zero value. Try
+    increasing delay values until the latency budget is exceeded to
+    see the impact on throughput.
+
+* [Preferred batch sizes](#preferred-batch-sizes) should not be used
+  for most models. A preferred batch size(s) should only be configured
+  if that batch size results in significantly higher performance than
+  other batch sizes.
+
+#### Preferred Batch Sizes
+
+The *preferred_batch_size* property indicates the batch sizes that the
+dynamic batcher should attempt to create. For most models,
+*preferred_batch_size* should not be specified, as described in
+[Recommended Configuration
+Process](#recommended-configuration-process). An exception is TensorRT
+models that specify multiple optimization profiles for different batch
+sizes. In this case, because some optimization profiles may give
+significant performance improvement compared to others, it may make
+sense to use *preferred_batch_size* for the batch sizes supported by
+those higher-performance optimization profiles.
+
+The following example shows the configuration that enables dynamic
+batching with preferred batch sizes of 4 and 8.
+
+```
+  dynamic_batching {
+    preferred_batch_size: [ 4, 8 ]
+  }
+```
+
+When a model instance becomes available for inferencing, the dynamic
+batcher will attempt to create batches from the requests that are
+available in the scheduler. Requests are added to the batch in the
+order the requests were received. If the dynamic batcher can form a
+batch of a preferred size(s) it will create a batch of the largest
+possible preferred size and send it for inferencing. If the dynamic
+batcher cannot form a batch of a preferred size (or if the dynamic
+batcher is not configured with any preferred batch sizes), it will
+send a batch of the largest size possible that is less than the
+maximum batch size allowed by the model (but see the following section
+for the delay option that changes this behavior).
+
+The size of generated batches can be examined in aggregate using
+[count metrics](metrics.md#inference-request-metrics).
+
+#### Delayed Batching
+
+The dynamic batcher can be configured to allow requests to be delayed
+for a limited time in the scheduler to allow other requests to join
+the dynamic batch. For example, the following configuration sets the
+maximum delay time of 100 microseconds for a request.
+
+```
+  dynamic_batching {
+    max_queue_delay_microseconds: 100
+  }
+```
+
+The *max_queue_delay_microseconds* property setting changes the
+dynamic batcher behavior when a maximum size (or preferred size) batch
+cannot be created. When a batch of a maximum or preferred size cannot
+be created from the available requests, the dynamic batcher will delay
+sending the batch as long as no request is delayed longer than the
+configured *max_queue_delay_microseconds* value. If a new request
+arrives during this delay and allows the dynamic batcher to form a
+batch of a maximum or preferred batch size, then that batch is sent
+immediately for inferencing. If the delay expires the dynamic batcher
+sends the batch as is, even though it is not a maximum or preferred
+size.
+
+#### Preserve Ordering
+
+The *preserve_ordering* property is used to force all responses to be
+returned in the same order as requests were received. See the
+[protobuf
+documentation](https://github.com/triton-inference-server/common/blob/main/protobuf/model_config.proto)
+for details.
+
+#### Priority Levels
+
+By default the dynamic batcher maintains a single queue that holds all
+inference requests for a model. The requests are processed and batched
+in order.  The *priority_levels* property can be used to create
+multiple priority levels within the dynamic batcher so that requests
+with higher priority are allowed to bypass requests with lower
+priority. Requests at the same priority level are processed in
+order. Inference requests that do not set a priority are scheduled
+using the *default_priority_level* property.
+
+#### Queue Policy
+
+The dynamic batcher provides several settings that control how
+requests are queued for batching.
+
+When *priority_levels* is not defined, the *ModelQueuePolicy* for the
+single queue can be set with *default_queue_policy*.  When
+*priority_levels* is defined, each priority level can have a different
+*ModelQueuePolicy* as specified by *default_queue_policy* and *priority_queue_policy*.
+
+The *ModelQueuePolicy* property allows a maximum queue size to be set
+using the *max_queue_size*. The *timeout_action*,
+*default_timeout_microseconds* and *allow_timeout_override* settings
+allow the queue to be configured so that individual requests are
+rejected or deferred if their time in the queue exceeds a specified
+timeout.
+
+#### Custom Batching
+
+You can set custom batching rules that work _in addition to_ the specified behavior of the dynamic batcher.
+To do so, you would implement five functions in [tritonbackend.h](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritonbackend.h)
+and create a shared library. These functions are described below.
+
+| Function | Description|
+| :--          |   :--           |
+| TRITONBACKEND_ModelBatchIncludeRequest | Determines whether a request should be included in the current batch |
+| TRITONBACKEND_ModelBatchInitialize | Initializes a record-keeping data structure for a new batch |
+| TRITONBACKEND_ModelBatchFinalize | Deallocates the record-keeping data structure after a batch is formed |
+| TRITONBACKEND_ModelBatcherInitialize | Initializes a read-only data structure for use with all batches |
+| TRITONBACKEND_ModelBatcherFinalize | Deallocates the read-only data structure after the model is unloaded |
+
+The path to the shared library can be passed into the model configuration via the parameter
+`TRITON_BATCH_STRATEGY_PATH`. If not provided, the dynamic batcher will look for a custom
+batching strategy named batchstrategy.so in the model version, model, and backend directories,
+in that order. If found, it will load it. This lets you easily share a custom batching strategy
+among all models using the same backend.
+
+For a tutorial of how to create and use a custom batching library, please see the
+[backend examples directory](https://github.com/triton-inference-server/backend/tree/main/examples#volume-batching).
+
+### Sequence Batcher
+
+Like the dynamic batcher, the sequence batcher combines non-batched
+inference requests, so that a batch is created dynamically. Unlike the
+dynamic batcher, the sequence batcher should be used for
+[stateful models](architecture.md#stateful-models) where a sequence of
+inference requests must be routed to the same model instance. The
+dynamically created batches are distributed to all [model
+instances](#instance-groups) configured for the model.
+
+Sequence batching is enabled and configured independently for each
+model using the *ModelSequenceBatching* property in the model
+configuration. These settings control the sequence timeout as well as
+configuring how Triton will send control signals to the model
+indicating sequence start, end, ready and correlation ID. See
+[Stateful Models](architecture.md#stateful-models) for more
+information and examples.
+
+#### Iterative Sequences
+
+> [!NOTE]
+> Iterative sequences are *provisional* and likely to change in future versions.
+
+The sequence batcher supports stateful execution of "iterative
+sequences" where a single request is processed over a number of
+scheduling iterations. "Iterative sequences" enable the scheduler to
+batch multiple inflight requests at each step and allow the model or
+backend to complete a request at any iteration.
+
+For models and backends that support "iterative sequences", users can
+enable support in the sequence batcher by specifying:
+
+```
+  sequence_batching {
+    iterative_sequence: true
+  }
+```
+
+An "iterative sequence" refers to stateful models that iteratively
+process a single request until a complete response is generated.  When
+iterative sequence is enabled, the sequence scheduler will expect a
+single incoming request to initiate the sequence. Backends that
+support iterative sequences can then yield back to the sequence
+batcher to reschedule the request for further execution in a future
+batch.
+
+Because only one request is used to represent the "iterative
+sequence", the user doesn't need to set [control
+inputs](architecture.md#control-inputs) mentioned in the previous
+section. They will be filled internally by the scheduler.
+
+"Iterative sequences" can be [decoupled](#decoupled) where more than
+one response can be generated during execution or non-decoupled where
+a single response is generated when the full response is complete.
+
+The main advantage of "iterative sequences" is the ability to use
+Triton's native batching capabilities to form batches of requests at
+different iteration stages without having to maintain additional state
+in the backend. Typically batches executed by backends are completed
+in the same execution which can waste resources if the execution of
+one of the requests in the batch takes much longer than the rest. With
+"iterative sequences", processing for each request in a batch can be
+broken down into multiple iterations and a backend can start
+processing new requests as soon as any request is complete.
+
+##### Continuous/Inflight Batching with Iterative Sequences
+
+Continuous batching, iteration level batching, and inflight batching
+are terms used in large language model (LLM) inferencing to describe
+batching strategies that form batches of requests at each iteration
+step. By forming batches "continuously" inference servers can increase
+throughput by reusing batch slots as soon as they are free without
+waiting for all requests in a batch to complete.
+
+As the number of steps required to process a request can vary
+significantly, batching existing requests and new requests continuously
+can have a significant improvement on throughput and latency.
+
+To achieve inflight batching with iterative sequences, the backend
+should break request processing into a number of steps, where each
+step corresponds to one Triton model instance execution. At the end of
+each step, the model instance will release requests that have been
+completed and reschedule requests that are still inflight. Triton will
+then form and schedule the next batch of requests that mixes new and
+rescheduled requests.
+
+### Ensemble Scheduler
+
+The ensemble scheduler must be used for [ensemble
+ models](architecture.md#ensemble-models) and cannot be used for any
+ other type of model.
+
+The ensemble scheduler is enabled and configured independently for
+each model using the *ModelEnsembleScheduling* property in the model
+configuration. The settings describe the models that are included in
+the ensemble and the flow of tensor values between the models. See
+[Ensemble Models](architecture.md#ensemble-models) for more
+information and examples.
+
+## Optimization Policy
+
+The model configuration *ModelOptimizationPolicy* property is used to
+specify optimization and prioritization settings for a model. These
+settings control if/how a model is optimized by the backend and how it
+is scheduled and executed by Triton. See the [ModelConfig
+protobuf](https://github.com/triton-inference-server/common/blob/main/protobuf/model_config.proto)
+and [optimization](optimization.md#framework-specific-optimization)
+documentation for the currently available settings.
+
+## Model Warmup
+
+When a model is loaded by Triton the corresponding
+[backend](https://github.com/triton-inference-server/backend/blob/main/README.md)
+initializes for that model.  For some backends, some or all of this
+initialization is deferred until the model receives its first
+inference request (or first few inference requests). As a result, the
+first (few) inference requests can be significantly slower due to
+deferred initialization.
+
+To avoid these initial, slow inference requests, Triton provides a
+configuration option that enables a model to be "warmed up" so that it
+is completely initialized before the first inference request is
+received. When the *ModelWarmup* property is defined in a model
+configuration, Triton will not show the model as being ready for
+inference until model warmup has completed.
+
+The model configuration *ModelWarmup* is used to specify warmup
+settings for a model. The settings define a series of inference
+requests that Triton will create to warm-up each model instance. A
+model instance will be served only if it completes the requests
+successfully.  Note that the effect of warming up models varies
+depending on the framework backend, and it will cause Triton to be
+less responsive to model update, so the users should experiment and
+choose the configuration that suits their need. See the
+[ModelWarmup protobuf](https://github.com/triton-inference-server/common/blob/main/protobuf/model_config.proto)
+documentation for the currently available settings, and
+[L0_warmup](https://github.com/triton-inference-server/server/blob/main/qa/L0_warmup/test.sh)
+for examples on specifying different variants of warmup samples.
+
+## Response Cache
+
+The model configuration `response_cache` section has an `enable` boolean used to
+enable the Response Cache for this model.
+
+```
+response_cache {
+  enable: true
+}
+```
+
+In addition to enabling the cache in the model config, a `--cache-config` must
+be specified when starting the server to enable caching on the server-side. See
+the [Response Cache](response_cache.md) doc for more details on enabling
+server-side caching.
diff --git a/docs/user_guide/model_management.md b/docs/user_guide/model_management.md
new file mode 100644
index 0000000000..4ce698feee
--- /dev/null
+++ b/docs/user_guide/model_management.md
@@ -0,0 +1,250 @@
+<!--
+# Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Model Management
+
+Triton provides model management APIs are part of the [HTTP/REST and
+GRPC protocols, and as part of the C
+API](../customization_guide/inference_protocols.md). Triton operates in one of three model
+control modes: NONE, EXPLICIT or POLL. The model control mode
+determines how changes to the model repository are handled by Triton
+and which of these protocols and APIs are available.
+
+## Model Control Mode NONE
+
+Triton attempts to load all models in the model repository at
+startup. Models that Triton is not able to load will be marked as
+UNAVAILABLE and will not be available for inferencing.
+
+Changes to the model repository while the server is running will be
+ignored. Model load and unload requests using the [model control
+protocol](../protocol/extension_model_repository.md) will have no affect
+and will return an error response.
+
+This model control mode is selected by specifying
+`--model-control-mode=none` when starting Triton. This is the default
+model control mode. Changing the model repository while Triton is
+running must be done carefully, as explained in [Modifying the Model
+Repository](#modifying-the-model-repository).
+
+## Model Control Mode EXPLICIT
+
+At startup, Triton loads only those models specified explicitly with the
+`--load-model` command-line option. To load ALL models at startup, specify
+`--load-model=*` as the ONLY `--load-model` argument. Specifying
+`--load-model=*` in conjunction with another `--load-model` argument will
+result in error. If `--load-model` is not specified then no models are loaded
+at startup. Models that Triton is not able to load will be marked as
+UNAVAILABLE and will not be available for inferencing.
+
+After startup, all model load and unload actions must be initiated
+explicitly by using the [model control
+protocol](../protocol/extension_model_repository.md). The response
+status of the model control request indicates success or failure of
+the load or unload action. When attempting to reload an already loaded
+model, if the reload fails for any reason the already loaded model
+will be unchanged and will remain loaded. If the reload succeeds, the
+newly loaded model will replace the already loaded model without any
+loss in availability for the model.
+
+This model control mode is enabled by specifying
+`--model-control-mode=explicit`. Changing the model repository while
+Triton is running must be done carefully, as explained in [Modifying
+the Model Repository](#modifying-the-model-repository).
+
+If you are seeing some memory growth when using the [model control
+protocol](../protocol/extension_model_repository.md) for loading and unloading
+models, it is possible that it's not an actual memory leak but some system's
+malloc heuristics that causes memory to be unable to be released back to the OS
+right away. To improve memory performance, you can consider switching from
+malloc to [tcmalloc](https://github.com/google/tcmalloc) or
+[jemalloc](https://github.com/jemalloc/jemalloc) by setting the `LD_PRELOAD`
+environment variable when running Triton, as shown below:
+```
+# Using tcmalloc
+LD_PRELOAD=/usr/lib/$(uname -m)-linux-gnu/libtcmalloc.so.4:${LD_PRELOAD} tritonserver --model-repository=/models ...
+```
+```
+# Using jemalloc
+LD_PRELOAD=/usr/lib/$(uname -m)-linux-gnu/libjemalloc.so:${LD_PRELOAD} tritonserver --model-repository=/models ...
+```
+We recommend experimenting with both tcmalloc and jemalloc to determine which
+one works better for your use case, as they have different strategies for
+memory allocation and deallocation and may perform differently depending on the
+workload.
+
+Both tcmalloc and jemalloc libraries are already installed within the Triton
+container. However, if you need to install them, you can do so using the
+following commands:
+```
+# Install tcmalloc
+apt-get install gperf libgoogle-perftools-dev
+```
+```
+# Install jemalloc
+apt-get install libjemalloc-dev
+```
+
+## Model Control Mode POLL
+
+Triton attempts to load all models in the model repository at
+startup. Models that Triton is not able to load will be marked as
+UNAVAILABLE and will not be available for inferencing.
+
+Changes to the model repository will be detected and Triton will
+attempt to load and unload models as necessary based on those changes.
+When attempting to reload an already loaded model, if the reload fails
+for any reason the already loaded model will be unchanged and will
+remain loaded. If the reload succeeds, the newly loaded model will
+replace the already loaded model without any loss of availability for
+the model.
+
+Changes to the model repository may not be detected immediately
+because Triton polls the repository periodically. You can control the
+polling interval with the `--repository-poll-secs` option. The console
+log or the [model ready
+protocol](https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/required_api.md)
+or the index operation of the [model control
+protocol](../protocol/extension_model_repository.md) can be used to
+determine when model repository changes have taken effect.
+
+**WARNING: There is no synchronization between when Triton polls the
+model repository and when you make any changes to the repository. As a
+result Triton could observe partial and incomplete changes that lead
+to unexpected behavior. For this reason POLL mode is not recommended
+for use in production environments.**
+
+Model load and unload requests using the [model control
+protocol](../protocol/extension_model_repository.md) will have no affect
+and will return an error response.
+
+This model control mode is enabled by specifying
+`--model-control-mode=poll` and by setting `--repository-poll-secs` to a
+non-zero value when starting Triton. Changing the model repository
+while Triton is running must be done carefully, as explained in
+[Modifying the Model Repository](#modifying-the-model-repository).
+
+In POLL mode Triton responds to the following model repository
+changes:
+
+* Versions may be added and removed from models by adding and removing
+  the corresponding version subdirectory. Triton will allow in-flight
+  requests to complete even if they are using a removed version of the
+  model. New requests for a removed model version will fail. Depending
+  on the model's [version
+  policy](model_configuration.md#version-policy), changes to the
+  available versions may change which model version is served by
+  default.
+
+* Existing models can be removed from the repository by removing the
+  corresponding model directory.  Triton will allow in-flight requests
+  to any version of the removed model to complete. New requests for a
+  removed model will fail.
+
+* New models can be added to the repository by adding a new model
+  directory.
+
+* The [model configuration file](model_configuration.md)
+  (config.pbtxt) can be changed and Triton will unload and reload the
+  model to pick up the new model configuration.
+
+* Label(s) files providing labels for outputs that represent
+  classifications can be added, removed, or modified and Triton will
+  unload and reload the model to pick up the new labels. If a label
+  file is added or removed the corresponding edit to the
+  *label_filename* property of the output it corresponds to in the
+  [model configuration](model_configuration.md) must be performed at
+  the same time.
+
+## Modifying the Model Repository
+
+Each model in a model repository [resides in its own
+sub-directory](model_repository.md#repository-layout). The activity
+allowed on the contents of a model's sub-directory varies depending on
+how Triton is using that model. The state of a model can be determined
+by using the [model
+metadata](../customization_guide/inference_protocols.md#inference-protocols-and-apis) or
+[repository index](../protocol/extension_model_repository.md#index) APIs.
+
+* If the model is actively loading or unloading, no files or
+directories within that sub-directory must be added, removed or
+modified.
+
+* If the model has never been loaded or has been completely unloaded,
+  then the entire model sub-directory can be removed or any of its
+  contents can be added, removed or modified.
+
+* If the model has been completely loaded then any files or
+directories within that sub-directory can be added, removed or
+modified; except for shared libraries implementing the model's
+backend. Triton uses the backend shared libraries while the model is
+loading so removing or modifying them will likely cause Triton to
+crash. To update a model's backend you must first unload the model
+completely, modify the backend shared libraries, and then reload the
+model. On some OSes it may also be possible to simply move the
+existing shared-libraries to another location outside of the model
+repository, copy in the new shared libraries, and then reload the
+model.
+
+* If only the model instance configuration on the 'config.pbtxt' is modified
+(i.e. increasing/decreasing the instance count), then Triton will update the
+model rather then reloading it, when either a load request is received under
+[Model Control Mode EXPLICIT](#model-control-mode-explicit) or change to the
+'config.pbtxt' is detected under
+[Model Control Mode POLL](#model-control-mode-poll).
+  * The new model configuration may also be passed to Triton via the
+[load API](../protocol/extension_model_repository.md#load).
+  * Some text editors create a swap file in the model directory when the
+'config.pbtxt' is modified in place. The swap file is not part of the model
+configuration, so its presence in the model directory may be detected as a new file
+and cause the model to fully reload when only an update is expected.
+
+* If a sequence model is *updated* (i.e. decreasing the instance count), Triton
+will wait until the in-flight sequence is completed (or timed-out) before the
+instance behind the sequence is removed.
+  * If the instance count is decreased, arbitrary instance(s) are selected among
+idle instances and instances with in-flight sequence(s) for removal.
+
+* If a sequence model is *reloaded* with in-flight sequence(s) (i.e. changes to
+the model file), Triton does not guarantee any remaining request(s) from the
+in-flight sequence(s) will be routed to the same model instance for processing.
+It is currently the responsibility of the user to ensure any in-flight
+sequence(s) are completed before reloading a sequence model.
+
+## Concurrently Loading Models
+
+To reduce service downtime, Triton loads new models in the background while
+continuing to serve inferences on existing models. Based on use case and
+performance requirements, the optimal amount of resources dedicated to loading
+models may differ. Triton exposes a `--model-load-thread-count` option to
+configure the number of threads dedicated to loading models, which defaults to 4.
+
+To set this parameter with the C API, refer to
+`TRITONSERVER_ServerOptionsSetModelLoadThreadCount` in
+[tritonserver.h](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritonserver.h).
+
diff --git a/docs/user_guide/model_repository.md b/docs/user_guide/model_repository.md
new file mode 100644
index 0000000000..494efba8e7
--- /dev/null
+++ b/docs/user_guide/model_repository.md
@@ -0,0 +1,504 @@
+<!--
+# Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Model Repository
+
+**Is this your first time setting up a model repository?** Check out
+[these tutorials](https://github.com/triton-inference-server/tutorials/tree/main/Conceptual_Guide/Part_1-model_deployment#setting-up-the-model-repository)
+ to begin your Triton journey!
+
+The Triton Inference Server serves models from one or more model
+repositories that are specified when the server is started. While
+Triton is running, the models being served can be modified as
+described in [Model Management](model_management.md).
+
+## Repository Layout
+
+These repository paths are specified when Triton is started using the
+--model-repository option. The --model-repository option can be
+specified multiple times to included models from multiple
+repositories. The directories and files that compose a model
+repository must follow a required layout. Assuming a repository path
+is specified as follows.
+
+```bash
+$ tritonserver --model-repository=<model-repository-path>
+```
+
+The corresponding repository layout must be:
+
+```
+  <model-repository-path>/
+    <model-name>/
+      [config.pbtxt]
+      [<output-labels-file> ...]
+      <version>/
+        <model-definition-file>
+      <version>/
+        <model-definition-file>
+      ...
+    <model-name>/
+      [config.pbtxt]
+      [<output-labels-file> ...]
+      <version>/
+        <model-definition-file>
+      <version>/
+        <model-definition-file>
+      ...
+    ...
+```
+
+Within the top-level model repository directory there must be zero or
+more <model-name> sub-directories. Each of the <model-name>
+sub-directories contains the repository information for the
+corresponding model. The config.pbtxt file describes the [model
+configuration](model_configuration.md) for the model. For some models,
+config.pbtxt is required while for others it is optional. See
+[Auto-Generated Model
+Configuration](model_configuration.md#auto-generated-model-configuration)
+for more information.
+
+Each <model-name> directory must have at least one numeric
+sub-directory representing a version of the model.  For more
+information about how the model versions are handled by Triton see
+[Model Versions](#model-versions).  Each model is executed by a
+specific
+[backend](https://github.com/triton-inference-server/backend/blob/main/README.md).
+Within each version sub-directory there must be the files required by
+that backend. For example, models that use framework backends such as
+TensorRT, PyTorch, ONNX, OpenVINO and TensorFlow must provide the
+[framework-specific model files](#model-files).
+
+## Model Repository Locations
+
+Triton can access models from one or more locally accessible file
+paths, from Google Cloud Storage, from Amazon S3, and from Azure
+Storage.
+
+### Local File System
+
+For a locally accessible file-system the absolute path must be
+specified.
+
+```bash
+$ tritonserver --model-repository=/path/to/model/repository ...
+```
+
+### Cloud Storage with Environment variables
+
+#### Google Cloud Storage
+
+For a model repository residing in Google Cloud Storage, the
+repository path must be prefixed with gs://.
+
+```bash
+$ tritonserver --model-repository=gs://bucket/path/to/model/repository ...
+```
+
+When using Google Cloud Storage, credentials are fetched and attempted in the
+following order:
+1. [GOOGLE_APPLICATION_CREDENTIALS environment variable](https://cloud.google.com/docs/authentication/application-default-credentials#GAC)
+   - The environment variable should be set and contains the location of a
+credential JSON file.
+   - Authorized user credential will be attempted first, and then service
+account credential.
+2. [The attached service account](https://cloud.google.com/docs/authentication/application-default-credentials#attached-sa)
+   - A value for the
+[Authorization HTTP header](https://googleapis.dev/cpp/google-cloud-storage/1.42.0/classgoogle_1_1cloud_1_1storage_1_1oauth2_1_1ComputeEngineCredentials.html#a8c3a5d405366523e2f4df06554f0a676)
+should be obtainable.
+3. Anonymous credential (also known as public bucket)
+   - The bucket (and objects) should have granted `get` and `list` permission to
+all users.
+   - One way to grant such permission is by adding both
+[storage.objectViewer](https://cloud.google.com/storage/docs/access-control/iam-roles#standard-roles)
+and
+[storage.legacyBucketReader](https://cloud.google.com/storage/docs/access-control/iam-roles#legacy-roles)
+predefined roles for "allUsers" to the bucket, for example:
+        ```
+        $ gsutil iam ch allUsers:objectViewer "${BUCKET_URL}"
+        $ gsutil iam ch allUsers:legacyBucketReader "${BUCKET_URL}"
+        ```
+
+By default, Triton makes a local copy of a remote model repository in
+a temporary folder, which is deleted after Triton server is shut down.
+If you would like to control where remote model repository is copied to,
+you may set the `TRITON_GCS_MOUNT_DIRECTORY` environment variable to
+a path pointing to the existing folder on your local machine.
+
+```bash
+export TRITON_GCS_MOUNT_DIRECTORY=/path/to/your/local/directory
+```
+
+**Make sure, that `TRITON_GCS_MOUNT_DIRECTORY` exists on your local machine
+and it is empty.**
+
+#### S3
+
+For a model repository residing in Amazon S3, the path must be
+prefixed with s3://.
+
+```bash
+$ tritonserver --model-repository=s3://bucket/path/to/model/repository ...
+```
+
+For a local or private instance of S3, the prefix s3:// must be
+followed by the host and port (separated by a semicolon) and
+subsequently the bucket path.
+
+```bash
+$ tritonserver --model-repository=s3://host:port/bucket/path/to/model/repository ...
+```
+
+By default, Triton uses HTTP to communicate with your instance of S3. If
+your instance of S3 supports HTTPS and you wish for Triton to use the HTTPS
+protocol to communicate with it, you can specify the same in the model
+repository path by prefixing the host name with https://.
+
+```bash
+$ tritonserver --model-repository=s3://https://host:port/bucket/path/to/model/repository ...
+```
+
+When using S3, the credentials and default region can be passed by
+using either the [aws
+config](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html)
+command or via the respective [environment
+variables](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-envvars.html).
+If the environment variables are set they will take a higher priority
+and will be used by Triton instead of the credentials set using the
+aws config command.
+
+By default, Triton makes a local copy of a remote model repository
+in a temporary folder, which is deleted after Triton server is shut down.
+If you would like to control where remote model repository is copied to,
+you may set the `TRITON_AWS_MOUNT_DIRECTORY` environment variable to
+a path pointing to the existing folder on your local machine.
+
+```bash
+export TRITON_AWS_MOUNT_DIRECTORY=/path/to/your/local/directory
+```
+
+**Make sure, that `TRITON_AWS_MOUNT_DIRECTORY` exists on your local machine
+and it is empty.**
+
+#### Azure Storage
+
+For a model repository residing in Azure Storage, the repository path
+must be prefixed with as://.
+
+```bash
+$ tritonserver --model-repository=as://account_name/container_name/path/to/model/repository ...
+```
+
+When using Azure Storage, you must set the `AZURE_STORAGE_ACCOUNT` and `AZURE_STORAGE_KEY`
+environment variables to an account that has access to the Azure Storage repository.
+
+If you don't know your `AZURE_STORAGE_KEY` and have your Azure CLI correctly configured,
+here's an example of how to find a key corresponding to your `AZURE_STORAGE_ACCOUNT`:
+
+```bash
+$ export AZURE_STORAGE_ACCOUNT="account_name"
+$ export AZURE_STORAGE_KEY=$(az storage account keys list -n $AZURE_STORAGE_ACCOUNT --query "[0].value")
+```
+By default, Triton makes a local copy of a remote model repository in
+a temporary folder, which is deleted after Triton server is shut down.
+If you would like to control where remote model repository is copied to,
+you may set the `TRITON_AZURE_MOUNT_DIRECTORY` environment variable to a path
+pointing to the existing folder on your local machine.
+
+```bash
+export TRITON_AZURE_MOUNT_DIRECTORY=/path/to/your/local/directory
+```
+
+**Make sure, that `TRITON_AZURE_MOUNT_DIRECTORY` exists on your local machine
+and it is empty.**
+
+
+### Cloud Storage with Credential file (Beta)
+
+*This feature is currently in beta and may be subject to change.*
+
+To group the credentials into a single file for Triton, you may set the
+`TRITON_CLOUD_CREDENTIAL_PATH` environment variable to a path pointing to a
+JSON file of the following format, residing in the local file system.
+
+```
+export TRITON_CLOUD_CREDENTIAL_PATH="cloud_credential.json"
+```
+
+"cloud_credential.json":
+```
+{
+  "gs": {
+    "": "PATH_TO_GOOGLE_APPLICATION_CREDENTIALS",
+    "gs://gcs-bucket-002": "PATH_TO_GOOGLE_APPLICATION_CREDENTIALS_2"
+  },
+  "s3": {
+    "": {
+      "secret_key": "AWS_SECRET_ACCESS_KEY",
+      "key_id": "AWS_ACCESS_KEY_ID",
+      "region": "AWS_DEFAULT_REGION",
+      "session_token": "",
+      "profile": ""
+    },
+    "s3://s3-bucket-002": {
+      "secret_key": "AWS_SECRET_ACCESS_KEY_2",
+      "key_id": "AWS_ACCESS_KEY_ID_2",
+      "region": "AWS_DEFAULT_REGION_2",
+      "session_token": "AWS_SESSION_TOKEN_2",
+      "profile": "AWS_PROFILE_2"
+    }
+  },
+  "as": {
+    "": {
+      "account_str": "AZURE_STORAGE_ACCOUNT",
+      "account_key": "AZURE_STORAGE_KEY"
+    },
+    "as://Account-002/Container": {
+      "account_str": "",
+      "account_key": ""
+    }
+  }
+}
+```
+
+To match a credential, the longest matching credential name against the start
+of a given path is used. For example: `gs://gcs-bucket-002/model_repository`
+will match the "gs://gcs-bucket-002" GCS credential, and
+`gs://any-other-gcs-bucket` will match the "" GCS credential.
+
+This feature is intended for use-cases which multiple credentials are needed
+for each cloud storage provider. Be sure to replace any credential paths/keys
+with the actual paths/keys from the example above.
+
+If the `TRITON_CLOUD_CREDENTIAL_PATH` environment variable is not set, the
+[Cloud Storage with Environment variables](#cloud-storage-with-environment-variables)
+will be used.
+
+### Caching of Cloud Storage
+
+Triton currently doesn't perform file caching for cloud storage.
+However, this functionality can be implemented through
+[repository agent API](https://github.com/triton-inference-server/server/blob/bbbcad7d87adc9596f99e3685da5d6b73380514f/docs/customization_guide/repository_agents.md) by injecting a proxy, which checks a specific local directory for caching
+given the cloud storage (original path) of the model,
+and then decides if cached files may be used.
+
+## Model Versions
+
+Each model can have one or more versions available in the model
+repository. Each version is stored in its own, numerically named,
+subdirectory where the name of the subdirectory corresponds to the
+version number of the model. The subdirectories that are not
+numerically named, or have names that start with zero (0) will be
+ignored. Each model configuration specifies a [version
+policy](model_configuration.md#version-policy) that controls which of
+the versions in the model repository are made available by Triton at
+any given time.
+
+## Model Files
+
+The contents of each model version sub-directory is determined by the
+type of the model and the requirements of the
+[backend](https://github.com/triton-inference-server/backend/blob/main/README.md)
+that supports the model.
+
+### TensorRT Models
+
+A TensorRT model definition is called a *Plan*. A TensorRT Plan is a
+single file that by default must be named model.plan. This default
+name can be overridden using the *default_model_filename* property in
+the [model configuration](model_configuration.md).
+
+A TensorRT Plan is specific to a GPU's [CUDA Compute
+Capability](https://developer.nvidia.com/cuda-gpus).  As a result,
+TensorRT models will need to set the *cc_model_filenames* property in
+the [model configuration](model_configuration.md) to associate each
+Plan file with the corresponding Compute Capability.
+
+A minimal model repository for a TensorRT model is:
+
+```
+  <model-repository-path>/
+    <model-name>/
+      config.pbtxt
+      1/
+        model.plan
+```
+
+### ONNX Models
+
+An ONNX model is a single file or a directory containing multiple
+files. By default the file or directory must be named model.onnx.
+This default name can be overridden using the *default_model_filename*
+property in the [model configuration](model_configuration.md).
+
+Triton supports all ONNX models that are supported by the version of
+[ONNX Runtime](https://github.com/Microsoft/onnxruntime) being used by
+Triton. Models will not be supported if they use a [stale ONNX opset
+version](https://github.com/Microsoft/onnxruntime/blob/master/docs/Versioning.md#version-matrix)
+or [contain operators with unsupported
+types](https://github.com/microsoft/onnxruntime/issues/1122).
+
+A minimal model repository for a ONNX model contained in a single file
+is:
+
+```
+  <model-repository-path>/
+    <model-name>/
+      config.pbtxt
+      1/
+        model.onnx
+```
+
+An ONNX model composed from multiple files must be contained in a
+directory.  By default this directory must be named model.onnx but can
+be overridden using the *default_model_filename* property in the
+[model configuration](model_configuration.md). The main model file
+within this directory must be named model.onnx. A minimal model
+repository for a ONNX model contained in a directory is:
+
+```
+  <model-repository-path>/
+    <model-name>/
+      config.pbtxt
+      1/
+        model.onnx/
+           model.onnx
+           <other model files>
+```
+
+### TorchScript Models
+
+An TorchScript model is a single file that by default must be named
+model.pt. This default name can be overridden using the
+*default_model_filename* property in the [model
+configuration](model_configuration.md). It is possible that some
+models traced with different versions of PyTorch may not be supported
+by Triton due to changes in the underlying opset.
+
+A minimal model repository for a TorchScript model is:
+
+```
+  <model-repository-path>/
+    <model-name>/
+      config.pbtxt
+      1/
+        model.pt
+```
+
+### TensorFlow Models
+
+TensorFlow saves models in one of two formats: *GraphDef* or
+*SavedModel*. Triton supports both formats.
+
+A TensorFlow GraphDef is a single file that by default must be named
+model.graphdef. A TensorFlow SavedModel is a directory containing
+multiple files. By default the directory must be named
+model.savedmodel. These default names can be overridden using the
+*default_model_filename* property in the [model
+configuration](model_configuration.md).
+
+A minimal model repository for a TensorFlow
+GraphDef model is:
+
+```
+  <model-repository-path>/
+    <model-name>/
+      config.pbtxt
+      1/
+        model.graphdef
+```
+
+A minimal model repository for a TensorFlow SavedModel model is:
+
+```
+  <model-repository-path>/
+    <model-name>/
+      config.pbtxt
+      1/
+        model.savedmodel/
+           <saved-model files>
+```
+
+### OpenVINO Models
+
+An OpenVINO model is represented by two files, a *.xml and *.bin
+file. By default the *.xml file must be named model.xml. This default
+name can be overridden using the *default_model_filename* property in
+the [model configuration](model_configuration.md).
+
+A minimal model repository for an OpenVINO model is:
+
+```
+  <model-repository-path>/
+    <model-name>/
+      config.pbtxt
+      1/
+        model.xml
+        model.bin
+```
+
+### Python Models
+
+The [Python
+backend](https://github.com/triton-inference-server/python_backend)
+allows you to run Python code as a model within Triton. By default the
+Python script must be named model.py but this default name can be
+overridden using the *default_model_filename* property in the [model
+configuration](model_configuration.md).
+
+A minimal model repository for a Python model is:
+
+```
+  <model-repository-path>/
+    <model-name>/
+      config.pbtxt
+      1/
+        model.py
+```
+
+### DALI Models
+
+The [DALI backend](https://github.com/triton-inference-server/dali_backend)
+allows you to run a [DALI pipeline](https://github.com/NVIDIA/DALI) as
+a model within Triton. In order to use this backend, you need to generate
+a file, by default named `model.dali`, and include it in your model repository.
+Please refer to [DALI backend documentation
+](https://github.com/triton-inference-server/dali_backend#how-to-use) for the
+description, how to generate `model.dali`. The default model file name can be
+overridden using the *default_model_filename* property in the
+[model configuration](model_configuration.md).
+
+A minimal model repository for a DALI model is:
+
+```
+  <model-repository-path>/
+    <model-name>/
+      config.pbtxt
+      1/
+        model.dali
+```
diff --git a/docs/user_guide/optimization.md b/docs/user_guide/optimization.md
new file mode 100644
index 0000000000..f842198a90
--- /dev/null
+++ b/docs/user_guide/optimization.md
@@ -0,0 +1,450 @@
+<!--
+# Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Optimization
+
+The Triton Inference Server has many features that you can use to
+decrease latency and increase throughput for your model. This section
+discusses these features and demonstrates how you can use them to
+improve the performance of your model. As a prerequisite you should
+follow the [QuickStart](../getting_started/quickstart.md) to get Triton and client
+examples running with the example model repository.
+
+This section focuses on understanding latency and throughput tradeoffs
+for a single model. The [Model Analyzer](model_analyzer.md) section
+describes a tool that helps you understand the GPU memory utilization
+of your models so you can decide how to best run multiple models on a
+single GPU.
+
+Unless you already have a client application suitable for measuring
+the performance of your model on Triton, you should familiarize
+yourself with
+[Performance Analyzer](https://github.com/triton-inference-server/client/blob/main/src/c++/perf_analyzer/README.md).
+The Performance Analyzer is an essential tool for optimizing your model's
+performance.
+
+As a running example demonstrating the optimization features and
+options, we will use a TensorFlow Inception model that you can obtain
+by following the [QuickStart](../getting_started/quickstart.md). As a baseline we use
+perf_analyzer to determine the performance of the model using a [basic
+model configuration that does not enable any performance
+features](../examples/model_repository/inception_graphdef/config.pbtxt).
+
+```
+$ perf_analyzer -m inception_graphdef --percentile=95 --concurrency-range 1:4
+...
+Inferences/Second vs. Client p95 Batch Latency
+Concurrency: 1, throughput: 62.6 infer/sec, latency 21371 usec
+Concurrency: 2, throughput: 73.2 infer/sec, latency 34381 usec
+Concurrency: 3, throughput: 73.2 infer/sec, latency 50298 usec
+Concurrency: 4, throughput: 73.4 infer/sec, latency 65569 usec
+```
+
+The results show that our non-optimized model configuration gives a
+throughput of about 73 inferences per second. Note how there is a
+significant throughput increase going from one concurrent request to
+two concurrent requests and then throughput levels off. With one
+concurrent request Triton is idle during the time when the response is
+returned to the client and the next request is received at the
+server. Throughput increases with a concurrency of two because Triton
+overlaps the processing of one request with the communication of the
+other. Because we are running perf_analyzer on the same system as
+Triton, two requests are enough to completely hide the communication
+latency.
+
+## Optimization Settings
+
+For most models, the Triton feature that provides the largest
+performance improvement is [dynamic
+batching](model_configuration.md#dynamic-batcher).
+[This example](https://github.com/triton-inference-server/tutorials/tree/main/Conceptual_Guide/Part_2-improving_resource_utilization#dynamic-batching--concurrent-model-execution)
+ sheds more light on conceptual details. If your model does not
+support batching then you can skip ahead to [Model
+Instances](#model-instances).
+
+
+### Dynamic Batcher
+
+The dynamic batcher combines individual inference requests into a
+larger batch that will often execute much more efficiently than
+executing the individual requests independently. To enable the dynamic
+batcher stop Triton, add the following line to the end of the [model
+configuration file for
+inception_graphdef](../examples/model_repository/inception_graphdef/config.pbtxt),
+and then restart Triton.
+
+```
+dynamic_batching { }
+```
+
+The dynamic batcher allows Triton to handle a higher number of
+concurrent requests because those requests are combined for
+inference. To see this run perf_analyzer with request concurrency from
+1 to 8.
+
+```
+$ perf_analyzer -m inception_graphdef --percentile=95 --concurrency-range 1:8
+...
+Inferences/Second vs. Client p95 Batch Latency
+Concurrency: 1, throughput: 66.8 infer/sec, latency 19785 usec
+Concurrency: 2, throughput: 80.8 infer/sec, latency 30732 usec
+Concurrency: 3, throughput: 118 infer/sec, latency 32968 usec
+Concurrency: 4, throughput: 165.2 infer/sec, latency 32974 usec
+Concurrency: 5, throughput: 194.4 infer/sec, latency 33035 usec
+Concurrency: 6, throughput: 217.6 infer/sec, latency 34258 usec
+Concurrency: 7, throughput: 249.8 infer/sec, latency 34522 usec
+Concurrency: 8, throughput: 272 infer/sec, latency 35988 usec
+```
+
+With eight concurrent requests the dynamic batcher allows Triton to
+provide 272 inferences per second without increasing latency
+compared to not using the dynamic batcher.
+
+Instead of having perf_analyzer collect data for a range of request
+concurrency values we can instead use a couple of simple rules that
+typically applies when perf_analyzer is running on the same system as
+Triton. The first rule is that for minimum latency set the request
+concurrency to 1 and disable the dynamic batcher and use only 1 [model
+instance](#model-instances). The second rule is that for maximum
+throughput set the request concurrency to be
+`2 * <maximum batch size> * <model instance count>`. We will discuss model
+instances [below](#model-instances), for now we are working with one model
+instance. So for maximum-batch-size 4 we want to run perf_analyzer
+with request concurrency of `2 * 4 * 1 = 8`.
+
+```
+$ perf_analyzer -m inception_graphdef --percentile=95 --concurrency-range 8
+...
+Inferences/Second vs. Client p95 Batch Latency
+Concurrency: 8, throughput: 267.8 infer/sec, latency 35590 usec
+```
+
+### Model Instances
+
+Triton allows you to specify how many copies of each model you want to
+make available for inferencing. By default you get one copy of each
+model, but you can specify any number of instances in the model
+configuration by using [instance
+groups](model_configuration.md#instance-groups). Typically, having two
+instances of a model will improve performance because it allows
+overlap of memory transfer operations (for example, CPU to/from GPU)
+with inference compute. Multiple instances also improve GPU
+utilization by allowing more inference work to be executed
+simultaneously on the GPU. Smaller models may benefit from more than
+two instances; you can use perf_analyzer to experiment.
+
+To specify two instances of the inception_graphdef model: stop Triton,
+remove any dynamic batching settings you may have previously added to
+the model configuration (we discuss combining dynamic batcher and
+multiple model instances below), add the following lines to the end of
+the [model configuration
+file](../examples/model_repository/inception_graphdef/config.pbtxt), and
+then restart Triton.
+
+```
+instance_group [ { count: 2 }]
+```
+
+Now run perf_analyzer using the same options as for the baseline.
+
+```
+$ perf_analyzer -m inception_graphdef --percentile=95 --concurrency-range 1:4
+...
+Inferences/Second vs. Client p95 Batch Latency
+Concurrency: 1, throughput: 70.6 infer/sec, latency 19547 usec
+Concurrency: 2, throughput: 106.6 infer/sec, latency 23532 usec
+Concurrency: 3, throughput: 110.2 infer/sec, latency 36649 usec
+Concurrency: 4, throughput: 108.6 infer/sec, latency 43588 usec
+```
+
+In this case having two instances of the model increases throughput
+from about 73 inference per second to about 110 inferences per second
+compared with one instance.
+
+It is possible to enable both the dynamic batcher and multiple model
+instances, for example, change the model configuration file to include
+the following.
+
+```
+dynamic_batching { }
+instance_group [ { count: 2 }]
+```
+
+When we run perf_analyzer with the same options used for just the
+dynamic batcher above.
+
+```
+$ perf_analyzer -m inception_graphdef --percentile=95 --concurrency-range 16
+...
+Inferences/Second vs. Client p95 Batch Latency
+Concurrency: 16, throughput: 289.6 infer/sec, latency 59817 usec
+```
+
+We see that two instances does not improve throughput much while
+increasing latency, compared with just using the dynamic batcher and
+one instance. This occurs because for this model the dynamic batcher
+alone is capable of fully utilizing the GPU and so adding additional
+model instances does not provide any performance advantage. In general
+the benefit of the dynamic batcher and multiple instances is model
+specific, so you should experiment with perf_analyzer to determine the
+settings that best satisfy your throughput and latency requirements.
+
+## Framework-Specific Optimization
+
+Triton has several optimization settings that apply to only a subset
+of the supported model frameworks. These optimization settings are
+controlled by the model configuration [optimization
+policy](model_configuration.md#optimization-policy). Visit
+[this guide](https://github.com/triton-inference-server/tutorials/tree/main/Conceptual_Guide/Part_4-inference_acceleration)
+ for an end to end discussion.
+
+### ONNX with TensorRT Optimization (ORT-TRT)
+
+One especially powerful optimization is to use TensorRT in
+conjunction with an ONNX model. As an example of TensorRT optimization
+applied to an ONNX model, we will use an ONNX DenseNet model that you
+can obtain by following [QuickStart](../getting_started/quickstart.md). As a baseline we
+use perf_analyzer to determine the performance of the model using a
+[basic model configuration that does not enable any performance
+features](../examples/model_repository/densenet_onnx/config.pbtxt).
+
+```
+$ perf_analyzer -m densenet_onnx --percentile=95 --concurrency-range 1:4
+...
+Inferences/Second vs. Client p95 Batch Latency
+Concurrency: 1, 113.2 infer/sec, latency 8939 usec
+Concurrency: 2, 138.2 infer/sec, latency 14548 usec
+Concurrency: 3, 137.2 infer/sec, latency 21947 usec
+Concurrency: 4, 136.8 infer/sec, latency 29661 usec
+```
+
+To enable TensorRT optimization for the model: stop Triton, add the
+following lines to the end of the model configuration file, and then
+restart Triton.
+
+```
+optimization { execution_accelerators {
+  gpu_execution_accelerator : [ {
+    name : "tensorrt"
+    parameters { key: "precision_mode" value: "FP16" }
+    parameters { key: "max_workspace_size_bytes" value: "1073741824" }
+    }]
+}}
+```
+
+As Triton starts you should check the console output and wait until
+Triton prints the "Staring endpoints" message. ONNX model loading can
+be significantly slower when TensorRT optimization is enabled. In
+production you can use [model warmup](model_configuration.md#model-warmup)
+to avoid this model startup/optimization slowdown. Now
+run perf_analyzer using the same options as for the baseline.
+
+```
+$ perf_analyzer -m densenet_onnx --percentile=95 --concurrency-range 1:4
+...
+Inferences/Second vs. Client p95 Batch Latency
+Concurrency: 1, 190.6 infer/sec, latency 5384 usec
+Concurrency: 2, 273.8 infer/sec, latency 7347 usec
+Concurrency: 3, 272.2 infer/sec, latency 11046 usec
+Concurrency: 4, 266.8 infer/sec, latency 15089 usec
+```
+
+The TensorRT optimization provided 2x throughput improvement while
+cutting latency in half. The benefit provided by TensorRT will vary
+based on the model, but in general it can provide significant
+performance improvement.
+
+### ONNX with OpenVINO Optimization
+
+ONNX models running on the CPU can also be accelerated by using
+[OpenVINO](https://docs.openvinotoolkit.org/latest/index.html). To
+enable OpenVINO optimization for an ONNX model, add the following
+lines to the end of the model's configuration file.
+
+```
+optimization { execution_accelerators {
+  cpu_execution_accelerator : [ {
+    name : "openvino"
+  }]
+}}
+```
+
+### TensorFlow with TensorRT Optimization (TF-TRT)
+
+TensorRT optimization applied to a TensorFlow model works similarly to
+TensorRT and ONNX described above. To enable TensorRT optimization you
+must set the model configuration appropriately. For TensorRT
+optimization of TensorFlow models there are several options that you
+can enable, including selection of the compute precision.
+
+```
+optimization { execution_accelerators {
+  gpu_execution_accelerator : [ {
+    name : "tensorrt"
+    parameters { key: "precision_mode" value: "FP16" }}]
+}}
+```
+
+The options are described in detail in the
+[ModelOptimizationPolicy](https://github.com/triton-inference-server/common/blob/main/protobuf/model_config.proto)
+section of the model configuration protobuf.
+
+As an example of TensorRT optimization applied to a TensorFlow model,
+we will use a TensorFlow Inception model that you can obtain by
+following the [QuickStart](../getting_started/quickstart.md). As a baseline we use
+perf_analyzer to determine the performance of the model using a [basic
+model configuration that does not enable any performance
+features](../examples/model_repository/inception_graphdef/config.pbtxt).
+
+```
+$ perf_analyzer -m inception_graphdef --percentile=95 --concurrency-range 1:4
+...
+Inferences/Second vs. Client p95 Batch Latency
+Concurrency: 1, throughput: 62.6 infer/sec, latency 21371 usec
+Concurrency: 2, throughput: 73.2 infer/sec, latency 34381 usec
+Concurrency: 3, throughput: 73.2 infer/sec, latency 50298 usec
+Concurrency: 4, throughput: 73.4 infer/sec, latency 65569 usec
+```
+
+To enable TensorRT optimization for the model: stop Triton, add the
+lines from above to the end of the model configuration file, and then
+restart Triton. As Triton starts you should check the console output
+and wait until the server prints the "Staring endpoints" message. Now
+run perf_analyzer using the same options as for the baseline. Note
+that the first run of perf_analyzer might timeout because the TensorRT
+optimization is performed when the inference request is received and
+may take significant time. In production you can use [model
+warmup](model_configuration.md#model-warmup) to avoid this model
+startup/optimization slowdown. For now, if this happens just run
+perf_analyzer again.
+
+```
+$ perf_analyzer -m inception_graphdef --percentile=95 --concurrency-range 1:4
+...
+Inferences/Second vs. Client p95 Batch Latency
+Concurrency: 1, throughput: 140 infer/sec, latency 8987 usec
+Concurrency: 2, throughput: 195.6 infer/sec, latency 12583 usec
+Concurrency: 3, throughput: 189 infer/sec, latency 19020 usec
+Concurrency: 4, throughput: 191.6 infer/sec, latency 24622 usec
+```
+
+The TensorRT optimization provided 2.5x throughput improvement while
+cutting latency by more than half. The benefit provided by TensorRT
+will vary based on the model, but in general it can provide
+significant performance improvement.
+
+### TensorFlow JIT Graph Optimizations
+
+Tensorflow allows its user to specify the optimization level
+while running the model graph via GlobalJitLevel setting.
+See [config.proto](https://github.com/tensorflow/tensorflow/blob/v2.10.0/tensorflow/core/protobuf/config.proto)
+for more information. When running
+TensorFlow models in Triton, the users can provide this setting
+by providing graph levels like below:
+
+```
+optimization {
+  graph { level: 1
+}}
+```
+
+The users can also utilize the [XLA optimization](https://www.tensorflow.org/xla)
+by setting `TF_XLA_FLAGS` environment variable before launching
+Triton. An example to launch Triton with GPU and CPU auto-clustering:
+
+```
+$ TF_XLA_FLAGS="--tf_xla_auto_jit=2 --tf_xla_cpu_global_jit" tritonserver --model-repository=...
+```
+
+As in the case of TensorRT optimization above, these optimizations
+occur when the first inference request is run. To mitigate the
+model startup slowdown in production systems, you can use
+[model warmup](model_configuration.md#model-warmup).
+
+### TensorFlow Automatic FP16 Optimization
+
+TensorFlow has an option to provide FP16 optimization that can be
+enabled in the model configuration. As with the TensorRT optimization
+described above, you can enable this optimization by using the
+gpu_execution_accelerator property.
+
+```
+optimization { execution_accelerators {
+  gpu_execution_accelerator : [
+    { name : "auto_mixed_precision" }
+  ]
+}}
+```
+
+The options are described in detail in the
+[ModelOptimizationPolicy](https://github.com/triton-inference-server/common/blob/main/protobuf/model_config.proto)
+section of the model configuration protobuf.
+
+You can follow the steps described above for TensorRT to see how this
+automatic FP16 optimization benefits a model by using perf_analyzer
+to evaluate the model's performance with and without the optimization.
+
+## NUMA Optimization
+
+Many modern CPUs are composed of multiple cores, memories and interconnects that
+expose different performance characteristics depending on how threads and
+data are allocated.
+Triton allows you to set host policies that describe this
+[NUMA](https://www.kernel.org/doc/html/latest/mm/numa.html) configuration for
+your system and then assign model instances to different host policies
+to exploit these NUMA properties.
+
+### Host Policy
+
+Triton allows you to specify host policy that associates with a policy name on
+startup. A host policy will be applied to a model instance if the instance is
+specified with the same policy name by using host policy field in [instance
+groups](model_configuration.md#instance-groups). Note that if not specified,
+the host policy field will be set to default name based on the instance
+property.
+
+To specify a host policy, you can specify the following in command line option:
+```
+--host-policy=<policy_name>,<setting>=<value>
+```
+
+Currently, the supported settings are the following:
+
+* *numa-node*: The NUMA node id that the host policy will be bound to, the
+  host policy restricts memory allocation to the node specified.
+
+* *cpu-cores*: The CPU cores to be run on, the instance with this host policy
+  set will be running on one of those CPU cores.
+
+Assuming that the system is configured to bind GPU 0 with NUMA node 0 which has
+CPU cores from 0 to 15, the following shows setting the numa-node and cpu-cores
+policies for "gpu_0":
+
+```
+$ tritonserver --host-policy=gpu_0,numa-node=0 --host-policy=gpu_0,cpu-cores=0-15 ...
+```
diff --git a/docs/user_guide/perf_analyzer.md b/docs/user_guide/perf_analyzer.md
new file mode 100644
index 0000000000..7019d51c63
--- /dev/null
+++ b/docs/user_guide/perf_analyzer.md
@@ -0,0 +1,30 @@
+<!--
+# Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+Perf Analyzer documentation has been relocated to
+[here](https://github.com/triton-inference-server/client/blob/main/src/c++/perf_analyzer/README.md).
diff --git a/docs/user_guide/performance_tuning.md b/docs/user_guide/performance_tuning.md
new file mode 100644
index 0000000000..3317e4dd5d
--- /dev/null
+++ b/docs/user_guide/performance_tuning.md
@@ -0,0 +1,393 @@
+<!--
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Deploying your trained model using Triton
+
+Given a trained model, how do I deploy it at-scale with an optimal configuration
+using Triton Inference Server?  This document is here to help answer that.
+
+For those who like a [high level overview](#overview), below is the common flow
+for most use cases.
+
+For those who wish to jump right in, skip to the
+[end-to-end example](#end-to-end-example).
+
+For additional material, see the
+[Triton Conceptual Guide tutorial](https://github.com/triton-inference-server/tutorials/tree/main/Conceptual_Guide/Part_4-inference_acceleration).
+
+## Overview
+
+1. Is my model compatible with Triton?
+    - If your model falls under one of Triton's
+    [supported backends](https://github.com/triton-inference-server/backend),
+    then we can simply try to deploy the model as described in the
+    [Quickstart](../getting_started/quickstart.md) guide.
+    For the ONNXRuntime, TensorFlow SavedModel, and TensorRT backends, the
+    minimal model configuration can be inferred from the model using Triton's
+    [AutoComplete](model_configuration.md#auto-generated-model-configuration)
+    feature.
+    This means that a `config.pbtxt` may still be provided, but is not required
+    unless you want to explicitly set certain parameters.
+    Additionally, by enabling verbose logging via `--log-verbose=1`, you can see
+    the complete config that Triton sees internally in the server log output.
+    For other backends, refer to the
+    [Minimal Model Configuration](model_configuration.md#minimal-model-configuration)
+    required to get started.
+    - If your model does not come from a supported backend, you can look into
+    the [Python Backend](https://github.com/triton-inference-server/python_backend)
+    or writing a
+    [Custom C++ Backend](https://github.com/triton-inference-server/backend/blob/main/examples/README.md)
+    to support your model. The Python Backend provides a simple interface to
+    execute requests through a generic python script, but may not be as
+    performant as a Custom C++ Backend.  Depending on your use case, the Python
+    Backend performance may be a sufficient tradeoff for the simplicity of
+    implementation.
+
+2. Can I run inference on my served model?
+    - Assuming you were able to load your model on Triton, the next step is to
+    verify that we can run inference requests and get a baseline performance
+    benchmark of your model.
+    Triton's
+    [Perf Analyzer](https://github.com/triton-inference-server/client/blob/main/src/c++/perf_analyzer/README.md)
+    tool specifically fits this purpose. Here is a simplified output for
+    demonstration purposes:
+
+    ```
+    # NOTE: "my_model" represents a model currently being served by Triton
+    $ perf_analyzer -m my_model
+    ...
+
+    Inferences/Second vs. Client Average Batch Latency
+    Concurrency: 1, throughput: 482.8 infer/sec, latency 12613 usec
+    ```
+
+    - This gives us a sanity test that we are able to successfully form input
+    requests and receive output responses to communicate with the model backend
+    via Triton APIs.
+    - If Perf Analyzer fails to send requests and it is unclear from the error
+    how to proceed, then you may want to sanity check that your model
+    `config.pbtxt` inputs/outputs match what the model expects. If the config
+    is correct, check that the model runs successfully using its original
+    framework directly.  If you don't have your own script or tool to do so,
+    [Polygraphy](https://github.com/NVIDIA/TensorRT/tree/main/tools/Polygraphy)
+    is a useful tool to run sample inferences on your model via various
+    frameworks.  Currently, Polygraphy supports ONNXRuntime, TensorRT, and
+    TensorFlow 1.x.
+    - The definition of "performing well" is subject to change for each use
+    case. Some common metrics are throughput, latency, and GPU utilization.
+    There are many variables that can be tweaked just within your model
+    configuration (`config.pbtxt`) to obtain different results.
+    - As your model, config, or use case evolves,
+    [Perf Analyzer](https://github.com/triton-inference-server/client/blob/main/src/c++/perf_analyzer/README.md)
+    is a great tool to quickly verify model functionality and performance.
+
+3. How can I improve my model performance?
+    - To further understand the best model configuration you can provide to
+    Triton for your use case, Triton's
+    [Model Analyzer](https://github.com/triton-inference-server/model_analyzer)
+    tool can help.
+    Model Analyzer can automatically or
+    [manually](https://github.com/triton-inference-server/model_analyzer/blob/main/docs/config_search.md)
+    search through config combinations to find the optimal triton configuration
+    to meet your constraints.  After running Model Analyzer to find the optimal
+    configurations for your model/use case, you can transfer the generated
+    config files to your [Model Repository](model_repository.md).
+    Model Analyzer provides a
+    [Quickstart](https://github.com/triton-inference-server/model_analyzer/blob/main/docs/quick_start.md)
+    guide with some examples to walk through.
+    - Upon serving the model with the newly optimized configuration file found
+    by Model Analyzer and running Perf Analyzer again, you should expect to find
+    better performance numbers in most cases compared to a default config.
+    - Some parameters that can be tuned for a model may not be exposed to Model
+    Analyzer's automatic search since they don't apply to all models.
+    For instance, [backends](https://github.com/triton-inference-server/backend)
+    can expose backend-specific configuration options that can be tuned as well.
+    The [ONNXRuntime
+    Backend](https://github.com/triton-inference-server/onnxruntime_backend),
+    for example, has several
+    [parameters](https://github.com/triton-inference-server/onnxruntime_backend#model-config-options)
+    that affect the level of parallelization when executing inference on a
+    model.
+    These backend-specific options may be worth investigating if the defaults
+    are not providing sufficient performance.  To tune custom sets of
+    parameters, Model Analyzer supports
+    [Manual Configuration Search](https://github.com/triton-inference-server/model_analyzer/blob/main/docs/config_search.md).
+    - To learn more about further optimizations for your model configuration,
+    see the [Optimization](optimization.md) docs.
+
+### Other Areas of Interest
+
+1. My model performs slowly when it is first loaded by Triton
+(cold-start penalty), what do I do?
+    - Triton exposes the ability to run
+    [ModelWarmup](model_configuration.md#model-warmup) requests when first
+    loading the model to ensure that the model is sufficiently warmed up before
+    being marked "READY" for inference.
+
+2. Why doesn't my model perform significantly faster on GPU?
+    - Most official backends supported by Triton are optimized for GPU inference
+    and should perform well on GPU out of the box.
+    - Triton exposes options for you to optimize your model further on the GPU.
+    Triton's
+    [Framework Specific Optimizations](optimization.md#framework-specific-optimization)
+    goes into further detail on this topic.
+    - Complete conversion of your model to a backend fully optimized for GPU
+    inference such as [TensorRT](https://developer.nvidia.com/tensorrt) may
+    provide even better results.
+    You may find more Triton-specific details about TensorRT in the
+    [TensorRT Backend](https://github.com/triton-inference-server/tensorrt_backend).
+    - If none of the above can help get sufficient GPU-accelerated performance
+    for your model, the model may simply be better designed for CPU execution
+    and the [OpenVINO Backend](https://github.com/triton-inference-server/openvino_backend) may
+    help further optimize your CPU execution.
+
+## End-to-end Example
+
+> **Note**
+> If you have never worked with Triton before, you may be interested in first
+checking out the [Quickstart](../getting_started/quickstart.md) example.
+> Some basic understanding of Triton may be useful for the following section,
+but this example is meant to be straightforward enough without prior experience.
+
+Let's take an ONNX model as our example since ONNX is designed to be a format
+that can be [easily
+exported](https://github.com/onnx/tutorials#converting-to-onnx-format) from most
+other frameworks.
+
+1. Create a [Model Repository](model_repository.md) and download our example
+`densenet_onnx` model into it.
+
+```bash
+# Create model repository with placeholder for model and version 1
+mkdir -p ./models/densenet_onnx/1
+
+# Download model and place it in model repository
+wget -O models/densenet_onnx/1/model.onnx
+https://contentmamluswest001.blob.core.windows.net/content/14b2744cf8d6418c87ffddc3f3127242/9502630827244d60a1214f250e3bbca7/08aed7327d694b8dbaee2c97b8d0fcba/densenet121-1.2.onnx
+```
+
+2. Create a minimal [Model Configuration](model_configuration.md) for the
+`densenet_onnx` model in our [Model Repository](model_repository.md) at
+`./models/densenet_onnx/config.pbtxt`.
+
+> **Note**
+> This is a slightly simplified version of another [example
+config](../examples/model_repository/densenet_onnx/config.pbtxt) that utilizes
+other [Model Configuration](model_configuration.md) features not necessary for
+this example.
+
+```protobuf
+name: "densenet_onnx"
+backend: "onnxruntime"
+max_batch_size: 0
+input: [
+  {
+    name: "data_0",
+    data_type: TYPE_FP32,
+    dims: [ 1, 3, 224, 224]
+  }
+]
+output: [
+  {
+    name: "prob_1",
+    data_type: TYPE_FP32,
+    dims: [ 1, 1000, 1, 1 ]
+  }
+]
+```
+
+> **Note**
+> As of the 22.07 release, both Triton and Model Analyzer support fully
+auto-completing the config file for
+[backends that support it](model_configuration.md#auto-generated-model-configuration).
+> So for an ONNX model, for example, this step can be skipped unless you want to
+explicitly set certain parameters.
+
+3. Start the server container
+
+To serve our model, we will use the server container which comes pre-installed
+with a `tritonserver` binary.
+
+```bash
+# Start server container
+docker run -ti --rm --gpus=all --network=host -v $PWD:/mnt --name triton-server nvcr.io/nvidia/tritonserver:24.03-py3
+
+# Start serving your models
+tritonserver --model-repository=/mnt/models
+```
+
+> **Note**
+> The `-v $PWD:/mnt` is mounting your current directory on the host into the
+`/mnt` directory inside the container.
+> So if you created your model repository in `$PWD/models`, you will find it
+inside the container at `/mnt/models`.
+> You can change these paths as needed. See
+[docker volume](https://docs.docker.com/storage/volumes/) docs for more information on
+how this works.
+
+
+To check if the model loaded successfully, we expect to see our model in a
+`READY` state in the output of the previous command:
+
+```
+...
+I0802 18:11:47.100537 135 model_repository_manager.cc:1345] successfully loaded 'densenet_onnx' version 1
+...
++---------------+---------+--------+
+| Model         | Version | Status |
++---------------+---------+--------+
+| densenet_onnx | 1       | READY  |
++---------------+---------+--------+
+...
+```
+
+4. Verify the model can run inference
+
+To verify our model can perform inference, we will use the `triton-client`
+container that we already started which comes with `perf_analyzer`
+pre-installed.
+
+In a separate shell, we use Perf Analyzer to sanity check that we can run
+inference and get a baseline for the kind of performance we expect from this
+model.
+
+In the example below, Perf Analyzer is sending requests to models served on the
+same machine (`localhost` from the server container via `--network=host`).
+However, you may also test models being served remotely at some `<IP>:<PORT>`
+by setting the `-u` flag, such as `perf_analyzer -m densenet_onnx -u
+127.0.0.1:8000`.
+
+```bash
+# Start the SDK container interactively
+docker run -ti --rm --gpus=all --network=host -v $PWD:/mnt --name triton-client nvcr.io/nvidia/tritonserver:24.03-py3-sdk
+
+# Benchmark model being served from step 3
+perf_analyzer -m densenet_onnx --concurrency-range 1:4
+```
+
+```
+...
+Inferences/Second vs. Client Average Batch Latency
+Concurrency: 1, throughput: 265.147 infer/sec, latency 3769 usec
+Concurrency: 2, throughput: 890.793 infer/sec, latency 2243 usec
+Concurrency: 3, throughput: 937.036 infer/sec, latency 3199 usec
+Concurrency: 4, throughput: 965.21 infer/sec, latency 4142 usec
+```
+
+5. Run Model Analyzer to find the best configurations for our model
+
+While Model Analyzer comes pre-installed in the SDK (client) container and
+supports various modes of connecting to a Triton server, for simplicity we will
+use install Model Analyzer in our `server` container to use the `local`
+(default) mode.
+To learn more about other methods of connecting Model Analyzer to a running
+Triton Server, see the `--triton-launch-mode` Model Analyzer flag.
+
+```bash
+# Enter server container interactively
+docker exec -ti triton-server bash
+
+# Stop existing tritonserver process if still running
+# because model-analyzer will start its own server
+SERVER_PID=`ps | grep tritonserver | awk '{ printf $1 }'`
+kill ${SERVER_PID}
+
+# Install model analyzer
+pip install --upgrade pip
+pip install triton-model-analyzer wkhtmltopdf
+
+# Profile the model using local (default) mode
+# NOTE: This may take some time, in this example it took ~10 minutes
+model-analyzer profile \
+  --model-repository=/mnt/models \
+  --profile-models=densenet_onnx \
+  --output-model-repository-path=results
+
+# Summarize the profiling results
+model-analyzer analyze --analysis-models=densenet_onnx
+```
+
+Example Model Analyzer output summary:
+
+> In 51 measurements across 6 configurations, `densenet_onnx_config_3` provides
+the best throughput: **323 infer/sec**.
+>
+> **This is a 92% gain over the default configuration (168 infer/sec), under the
+given constraints.**
+
+| Model Config Name | Max Batch Size | Dynamic Batching | Instance Count | p99 Latency (ms) | Throughput (infer/sec) | Max GPU Memory Usage (MB) | Average GPU Utilization (%) |
+|---|---|---|---|---|---|---|---|
+| densenet_onnx_config_3 | 0 | Enabled | 4/GPU | 35.8 | 323.13 | 3695 | 58.6 |
+| densenet_onnx_config_2 | 0 | Enabled | 3/GPU | 59.575 | 295.82 | 3615 | 58.9 |
+| densenet_onnx_config_4 | 0 | Enabled | 5/GPU | 69.939 | 291.468 | 3966 | 58.2 |
+| densenet_onnx_config_default | 0 | Disabled | 1/GPU | 12.658 | 167.549 | 3116 | 51.3 |
+
+In the table above, we see that setting our GPU [Instance
+Count](model_configuration.md#instance-groups) to 4 allows us to achieve the
+highest throughput and almost lowest latency on this system.
+
+Also, note that this `densenet_onnx` model has a fixed batch-size that is
+explicitly specified in the first dimension of the Input/Output `dims`,
+therefore the `max_batch_size` parameter is set to 0 as described
+[here](model_configuration.md#maximum-batch-size).
+For models that support dynamic batch size, Model Analyzer would also tune the
+`max_batch_size` parameter.
+
+> **Warning**
+> These results are specific to the system running the Triton server, so for
+example, on a smaller GPU we may not see improvement from increasing the GPU
+instance count.
+> In general, running the same configuration on systems with different hardware
+(CPU, GPU, RAM, etc.) may provide different results, so it is important to
+profile your model on a system that accurately reflects where you will deploy
+your models for your use case.
+
+6. Extract optimal config from Model Analyzer results
+
+In our example above, `densenet_onnx_config_3` was the optimal configuration.
+So let's extract that `config.pbtxt` and put it back in our model repository for future use.
+
+```bash
+# (optional) Backup our original config.pbtxt (if any) to another directory
+cp /mnt/models/densenet_onnx/config.pbtxt /tmp/original_config.pbtxt
+
+# Copy over the optimal config.pbtxt from Model Analyzer results to our model repository
+cp ./results/densenet_onnx_config_3/config.pbtxt /mnt/models/densenet_onnx/
+```
+
+Now that we have an optimized Model Configuration, we are ready to take our
+model to deployment.  For further manual tuning, read the [Model
+Configuration](model_configuration.md) and [Optimization](optimization.md) docs
+to learn more about Triton's complete set of capabilities.
+
+In this example, we happened to get both the highest throughput and almost
+lowest latency from the same configuration, but in some cases this is a tradeoff
+that must be made. Certain models or configurations may achieve a higher
+throughput but also incur a higher latency in return.  It is worthwhile to fully
+inspect the reports generated by Model Analyzer to ensure your model performance
+meets your requirements.
diff --git a/docs/user_guide/ragged_batching.md b/docs/user_guide/ragged_batching.md
new file mode 100644
index 0000000000..308b75fa57
--- /dev/null
+++ b/docs/user_guide/ragged_batching.md
@@ -0,0 +1,139 @@
+<!--
+# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Ragged Batching
+
+Triton provides [dynamic batching feature](model_configuration.md#dynamic-batcher),
+which combines multiple requests for the same model execution to provide larger
+throughput. By default, the requests can be dynamically batched only if
+each input has the same shape across the requests. In order to exploit dynamic
+batching for cases where input shapes often vary, the client would need to pad
+the input tensors in the requests to the same shape.
+
+Ragged batching is a feature to avoid explicit padding by allowing user to
+specify which of the inputs doesn't require the shape check. User can specify
+such input (ragged input) by setting `allow_ragged_batch` field in the model
+config:
+
+```
+...
+input [
+  {
+    name: "input0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+    allow_ragged_batch: true
+  }
+]
+...
+```
+
+How ragged input are processed in a batch of requests depends on the backend
+implementation. The backends, such as
+[ONNX Runtime backend](https://github.com/triton-inference-server/onnxruntime_backend),
+[TensorFlow backend](https://github.com/triton-inference-server/tensorflow_backend),
+[PyTorch backend](https://github.com/triton-inference-server/pytorch_backend),
+and [TensorRT backend](https://github.com/triton-inference-server/tensorrt_backend),
+require models to accept ragged inputs as 1-dimensional tensors.
+These backends concatenates the request inputs into the 1-dimensional tensor.
+
+Because the concatenated input doesn't track the start and end index for each
+request, the backends often require the model to have additional input(s),
+[batch input](#batch-input), that describe various information about the batch
+formed.
+
+## Batch Input
+
+Batch input is often used in combination with ragged input to provide
+information about each batch element, such as the element count
+of an input for each request in the batch. A batch input is generated by
+Triton instead of being provided in the request, because the information can
+only be finalized after the dynamic batch is formed.
+
+Besides element count,
+there are other batch input kinds that the user can specify, see the
+[protobuf documentation](https://github.com/triton-inference-server/common/blob/main/protobuf/model_config.proto)
+for details.
+
+## Example on Ragged Input and Batch Input
+
+If you have a model that accepts 1 variable length input tensor, INPUT, with
+shape [ -1, -1 ]. The first dimension is the batch dimension, and the second
+dimension is the variable-length content. When the client sends 3 requests of
+shapes [ 1, 3 ], [ 1, 4 ], [ 1, 5 ]. To exploit dynamic batching, the
+straight-forward way to implement this model would expect INPUT shape [ -1, -1 ]
+and assume that all inputs were padded to same length so that all requests
+become shape [ 1, 5 ] and thus Triton can batch and send them to the model
+as a single [ 3, 5 ] tensor. In this case, there will be overhead on padding
+the tensor and on extra model computation on the padded content.
+Below is the input config:
+
+```
+max_batch_size: 16
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+```
+
+With triton ragged batching, the model will be implemented to expect INPUT shape
+[ -1 ] and an additional batch input, INDEX, shape [ -1 ] which the model should
+use to interpret the batch elements in INPUT. For such model,
+the client requests don't need to be padded and they can be sent as they are
+(with shapes [ 1, 3 ], [ 1, 4 ], [ 1, 5 ]). The backends discussed above will
+batch the input into a tensor of shape [ 12 ] which contains the 3 + 4 + 5
+concatenation of the requests. Triton also creates the batch input tensor of
+shape [ 3 ] with value [ 3, 7, 12 ] which gives the offset into the input tensor
+where each batch element ends. Below is the input config:
+
+```
+max_batch_size: 16
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+    allow_ragged_batch: true
+  }
+]
+batch_input [
+  {
+    kind: BATCH_ACCUMULATED_ELEMENT_COUNT
+    target_name: "INDEX"
+    data_type: TYPE_FP32
+    source_input: "INPUT"
+  }
+]
+```
+
+The above example uses
+[`BATCH_ACCUMULATED_ELEMENT_COUNT`](https://github.com/triton-inference-server/common/blob/main/protobuf/model_config.proto)
+type of ragged batching. Other types described in [protobuf documentation](https://github.com/triton-inference-server/common/blob/main/protobuf/model_config.proto) operate similarly.
\ No newline at end of file
diff --git a/docs/user_guide/rate_limiter.md b/docs/user_guide/rate_limiter.md
new file mode 100644
index 0000000000..69b94fd8b8
--- /dev/null
+++ b/docs/user_guide/rate_limiter.md
@@ -0,0 +1,131 @@
+<!--
+# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Rate Limiter
+
+Rate limiter manages the rate at which requests are scheduled on
+model instances by Triton. The rate limiter operates across all
+models loaded in Triton to allow *cross-model prioritization*.
+
+In absence of rate limiting (--rate-limit=off), Triton schedules
+execution of a request (or set of requests when using dynamic
+batching) as soon as a model instance is available. This behavior
+is typically best suited for performance. However, there can be
+cases where running all the models simultaneously places excessive
+load on the server. For instance, model execution on some
+frameworks dynamically allocate memory. Running all such models
+simultaneously may lead to system going out-of-memory.
+
+Rate limiter allows to postpone the inference execution on some
+model instances such that not all of them runs simultaneously.
+The model priorities are used to decide which model instance
+to schedule next.
+
+## Using Rate Limiter
+
+To enable rate limiting users must set `--rate-limit` option when
+launching tritonserver. For more information, consult usage of
+the option emitted by `tritonserver --help`.
+
+The rate limiter is controlled by the rate limiter configuration given
+for each model instance, as described in [rate limiter
+configuration](model_configuration.md#rate-limiter-configuration).
+The rate limiter configuration includes
+[resources](model_configuration.md#resources) and
+[priority](model_configuration.md#priority) for the model instances
+defined by the instance group.
+
+### Resources
+
+Resources are identified by a unique name and a count indicating
+the number of copies of the resource. By default, model instance
+uses no rate-limiter resources. By listing a resource/count the
+model instance indicates that it requires that many resources to
+be available on the model instance device before it can be allowed
+to execute. When under execution the specified many resources are
+allocated to the model instance only to be released when the
+execution is over. The available number of resource copies
+are, by default, the max across all model instances that list that
+resource. For example, assume three loaded model instances A, B
+and C each specifying the following resource requirements for
+a single device:
+
+```
+A: [R1: 4, R2: 4]
+B: [R2: 5, R3: 10, R4: 5]
+C: [R1: 1, R3: 7, R4: 2]
+```
+
+By default, based on those model instance requirements, the server
+will create the following resources with the indicated copies:
+
+```
+R1: 4
+R2: 5
+R3: 10
+R4: 5
+```
+
+These values ensure that all model instances can be successfully
+scheduled. The default for a resource can be overridden by giving
+it explicitly on command-line using `--rate-limit-resource` option.
+`tritonserver --help` will provide with more detailed usage
+instructions.
+
+By default, the available resource copies are per-device and resource
+requirements for a model instance are enforced against corresponding
+resources associated with the device where the model instance runs.
+The `--rate-limit-resource` allows users to provide different resource
+copies to different devices. Rate limiter can also handle global
+resources. Instead of creating resource copies per-device, a global
+resource will have a single copy all across the system.
+
+Rate limiter depends upon the model configuration to determine
+whether the resource is global or not. See
+[resources](model_configuration.md#resources) for more details on
+how to specify them in model configuration.
+
+For tritonserver, running on a two device machine, invoked with
+`--rate-limit-resource=R1:10 --rate-limit-resource=R2:5:0 --rate-limit-resource=R2:8:1 --rate-limit-resource=R3:2`
+, available resource copies are:
+
+```
+GLOBAL   => [R3: 2]
+DEVICE 0 => [R1: 10, R2: 5]
+DEVICE 1 => [R1: 10, R2: 8]
+```
+
+where R3 appears as a global resource in one of the loaded model.
+
+### Priority
+
+In a resource constrained system, there will be a contention for
+the resources among model instances to execute their inference
+requests. Priority setting helps determining which model instance
+to select for next execution. See [priority](model_configuration.md#priority)
+for more information.
diff --git a/docs/user_guide/request_cancellation.md b/docs/user_guide/request_cancellation.md
new file mode 100644
index 0000000000..8db4e3b8c1
--- /dev/null
+++ b/docs/user_guide/request_cancellation.md
@@ -0,0 +1,102 @@
+<!--
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Request Cancellation
+
+Starting from r23.10, Triton supports handling request cancellation received
+from the gRPC client or a C API user. Long running inference requests such
+as for auto generative large language models may run for an indeterminate
+amount of time or indeterminate number of steps. Additionally clients may
+enqueue a large number of requests as part of a sequence or request stream
+and later determine the results are no longer needed. Continuing to process
+requests whose results are no longer required can significantly impact server
+resources.
+
+## Issuing Request Cancellation
+
+### In-Process C API
+
+[In-Process Triton Server C API](../customization_guide/inference_protocols.md#in-process-triton-server-api) has been enhanced with `TRITONSERVER_InferenceRequestCancel`
+and `TRITONSERVER_InferenceRequestIsCancelled` to issue cancellation and query
+whether cancellation has been issued on an inflight request respectively. Read more
+about the APIs in [tritonserver.h](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritonserver.h).
+
+
+### gRPC Endpoint
+
+In addition, [gRPC endpoint](../customization_guide/inference_protocols.md#httprest-and-grpc-protocols) can
+now detect cancellation from the client and attempt to terminate request.
+At present, only gRPC python client supports issuing request cancellation
+to the server endpoint. See [request-cancellation](https://github.com/triton-inference-server/client#request-cancellation)
+for more details on how to issue requests from the client-side.
+See gRPC guide on RPC [cancellation](https://grpc.io/docs/guides/cancellation/) for
+finer details.
+
+## Handling in Triton Core
+
+Triton core checks for requests that have been cancelled at some critical points
+when using [dynamic](./model_configuration.md#dynamic-batcher) or
+[sequence](./model_configuration.md#sequence-batcher) batching. The checking is
+also performed between each
+[ensemble](./model_configuration.md#ensemble-scheduler) steps and terminates
+further processing if the request is cancelled.
+
+On detecting a cancelled request, Triton core responds with CANCELLED status. If a request
+is cancelled when using [sequence_batching](./model_configuration.md#sequence-batcher),
+then all the pending requests in the same sequence will also be cancelled. The sequence
+is represented by the requests that has identical sequence id.
+
+**Note**: Currently, Triton core does not detect cancellation status of a request once
+it is forwarded to [rate limiter](./rate_limiter.md). Improving the request cancellation
+detection and handling within Triton core is work in progress.
+
+## Handling in Backend
+
+Upon receiving request cancellation, Triton does its best to terminate request
+at various points. However, once a request has been given to the backend
+for execution, it is up to the individual backends to detect and handle
+request termination.
+Currently, the following backends support early termination:
+- [TensorRT-LLM backend](https://github.com/triton-inference-server/tensorrtllm_backend)
+- [vLLM backend](https://github.com/triton-inference-server/vllm_backend)
+- [python backend](https://github.com/triton-inference-server/python_backend)
+
+Python backend is a special case where we expose the APIs to detect cancellation
+status of the request but it is up to the `model.py` developer to detect whether
+the request is cancelled and terminate further execution.
+
+**For the backend developer**: The backend APIs have also been enhanced to let the
+backend detect whether the request received from Triton core has been cancelled.
+See `TRITONBACKEND_RequestIsCancelled` and `TRITONBACKEND_ResponseFactoryIsCancelled`
+in [tritonbackend.h](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritonbackend.h)
+for more details. The backend upon detecting request cancellation can stop processing
+it any further.
+The Python models running behind Python backend can also query the cancellation status
+of request and response_sender. See [this](https://github.com/triton-inference-server/python_backend#request-cancellation-handling)
+section in python backend documentation for more details.
+
diff --git a/docs/user_guide/response_cache.md b/docs/user_guide/response_cache.md
new file mode 100644
index 0000000000..ebeef4cb0a
--- /dev/null
+++ b/docs/user_guide/response_cache.md
@@ -0,0 +1,247 @@
+<!--
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Triton Response Cache
+
+## Overview
+
+In this document an *inference request* is the model name, model version, and
+input tensors (name, shape, datatype and tensor data) that make up a request
+submitted to Triton. An inference result is the output tensors (name, shape,
+datatype and tensor data) produced by an inference execution. The response cache
+is used by Triton to hold inference results generated for previous executed
+inference requests. Triton will maintain the response cache so that inference
+requests that hit in the cache will not need to execute a model to produce
+results and will instead extract their results from the cache. For some use
+cases this can significantly reduce the inference request latency.
+
+Triton accesses the response cache with a hash of the inference request that
+includes the model name, model version and model inputs. If the hash is found in
+the cache, the corresponding inference result is extracted from the cache and
+used for the request. When this happens there is no need for Triton to execute
+the model to produce the inference result. If the hash is not found in the
+cache, Triton executes the model to produce the inference result, and then
+records that result in the cache so that subsequent inference requests can
+(re)use those results.
+
+## Usage
+
+In order for caching to be used on a given model, it must be enabled
+on both the server-side, and in the model's
+[model config](model_configuration.md#response-cache). See the following
+sections below for more details.
+
+### Enable Caching on Server-side
+
+The response cache is enabled on the server-side by specifying a cache
+implementation name `<cache>` and corresponding configuration when starting
+the Triton server.
+
+Through the CLI, this translates to setting
+`tritonserver --cache-config <cache>,<key>=<value> ...`. For example:
+```
+tritonserver --cache-config local,size=1048576
+```
+
+> [!NOTE]
+> If using a non-interactive shell, you may need to specify the argument without
+> the space like so: `--cache-config=<cache>,<key>=<value>`.
+
+For in-process C API applications, this translates to calling
+`TRITONSERVER_SetCacheConfig(const char* cache_implementation, const char* config_json)`.
+
+This allows users to enable/disable caching globally on server startup.
+
+### Enable Caching for a Model
+
+**By default, no model uses response caching even if the response cache
+is enabled globally with the `--cache-config` flag.**
+
+For a given model to use response caching, the model must also have
+response caching enabled in its model configuration:
+```
+# config.pbtxt
+
+response_cache {
+  enable: true
+}
+```
+
+This allows users to enable/disable caching for specific models.
+
+For more information on enabling the response cache for each model, see the
+[model configuration docs](model_configuration.md#response-cache).
+
+### Cache Implementations
+
+Starting in the 23.03 release, Triton has a set of
+[TRITONCACHE APIs](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritoncache.h)
+that are used to communicate with a cache implementation of the user's choice.
+
+A cache implementation is a shared library that implements the required
+TRITONCACHE APIs and is dynamically loaded on server startup, if enabled.
+
+Triton's most recent
+[tritonserver release containers](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tritonserver)
+come with the following cache implementations out of the box:
+- [local](https://github.com/triton-inference-server/local_cache): `/opt/tritonserver/caches/local/libtritoncache_local.so`
+- [redis](https://github.com/triton-inference-server/redis_cache): `/opt/tritonserver/caches/redis/libtritoncache_redis.so`
+
+With these TRITONCACHE APIs, `tritonserver` exposes a new `--cache-config`
+CLI flag that gives the user flexible customization of which cache implementation
+to use, and how to configure it. Similar to the `--backend-config` flag,
+the expected format is `--cache-config <cache_name>,<key>=<value>` and may
+be specified multiple times to specify multiple keys if the cache implementation
+requires it.
+
+#### Local Cache
+
+The `local` cache implementation is equivalent to the response cache used
+internally before the 23.03 release. For more implementation specific details,
+see the
+[local cache implementation](https://github.com/triton-inference-server/local_cache).
+
+When `--cache-config local,size=SIZE` is specified with a non-zero `SIZE`,
+Triton allocates the requested size in CPU memory and **shares the
+cache across all inference requests and across all models**.
+
+#### Redis Cache
+
+The `redis` cache implementation exposes the ability for Triton to communicate
+with a Redis server for caching. The `redis_cache` implementation is essentially
+a Redis client that acts as an intermediary between Triton and Redis.
+
+To list a few benefits of the `redis` cache compared to the `local` cache in
+the context of Triton:
+- The Redis server can be hosted remotely as long as it is accessible by Triton,
+  so it is not tied directly to the Triton process lifetime.
+  - This means Triton can be restarted and still have access to previously cached entries.
+  - This also means that Triton doesn't have to compete with the cache for memory/resource usage.
+- Multiple Triton instances can share a cache by configuring each Triton instance
+  to communicate with the same Redis server.
+- The Redis server can be updated/restarted independently of Triton, and
+  Triton will fallback to operating as it would with no cache access during
+  any Redis server downtime, and log appropriate errors.
+
+In general, the Redis server can be configured/deployed as needed for your use
+case, and Triton's `redis` cache will simply act as a client of your Redis
+deployment. The [Redis docs](https://redis.io/docs/) should be consulted for
+questions and details about configuring the Redis server.
+
+For Triton-specific `redis` cache implementation details/configuration, see the
+[redis cache implementation](https://github.com/triton-inference-server/redis_cache).
+
+#### Custom Cache
+
+With the TRITONCACHE API interface, it is now possible for
+users to implement their own cache to suit any use-case specific needs.
+To see the required interface that must be implemented by a cache
+developer, see the
+[TRITONCACHE API header](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritoncache.h).
+The `local` or `redis` cache implementations may be used as reference.
+
+Upon successfully developing and building a custom cache, the resulting shared
+library (ex: `libtritoncache_<name>.so`) must be placed in the cache directory
+similar to where the `local` and `redis` cache implementations live. By default,
+this directory is `/opt/tritonserver/caches`, but a custom directory may be
+specified with `--cache-dir` as needed.
+
+To put this example together, if the custom cache were named "custom"
+(this name is arbitrary), by default Triton would expect to find the
+cache implementation at `/opt/tritonserver/caches/custom/libtritoncache_custom.so`.
+
+## Deprecation Notes
+
+> **Note**
+> Prior to 23.03, enabling the `local` cache used to be done through setting a non-zero size
+> (in bytes) when Triton was launched using the `--response-cache-byte-size` flag.
+>
+> Starting in 23.03, the `--response-cache-byte-size` flag is now deprecated and
+> `--cache-config` should be used instead. For backwards compatibility,
+> `--response-cache-byte-size` will continue to function under the hood by being
+> converted to the corresponding `--cache-config` argument, but it will default
+> to using the `local` cache implementation. It is not possible to choose other
+> cache implementations using the `--response-cache-byte-size` flag.
+>
+> For example, `--response-cache-byte-size 1048576`
+> would be equivalent to `--cache-config local,size=1048576`. However, the
+> `--cache-config` flag is much more flexible and should be used instead.
+
+> **Warning**
+>
+> The `local` cache implementation may fail to initialize for very small values
+> of `--cache-config local,size=<small_value>` or `--response-cache-byte-size`
+> (ex: less than 1024 bytes) due to internal memory management requirements.
+> If you encounter an initialization error for a relatively small cache size,
+> try increasing it.
+>
+> Similarly, the size is upper bounded by the available RAM on the system.
+> If you encounter an initial allocation error for a very large cache size
+> setting, try decreasing it.
+
+## Performance
+
+The response cache is intended to be used for use cases where a significant
+number of duplicate requests (cache hits) are expected and therefore would
+benefit from caching. The term "significant" here is subjective to the use
+case, but a simple interpretation would be to consider the proportion of
+expected cache hits/misses, as well as the average time spend computing
+a response.
+
+For cases where cache hits are common and computation is expensive,
+the cache can significantly improve overall performance.
+
+For cases where most requests are unique (cache misses) or the compute is
+fast/cheap (the model is not compute-bound), the cache can negatively impact
+the overall performance due to the overhead of managing and communicating with
+the cache.
+
+## Known Limitations
+
+- Only input tensors located in CPU memory will be hashable for accessing the
+  cache. If an inference request contains input tensors not in CPU memory, the
+  request will not be hashed and therefore the response will not be cached.
+- Only responses with all output tensors located in CPU memory will be eligible
+  for caching. If any output tensor in a response is not located in CPU memory,
+  the response will not be cached.
+- The cache is accessed using only the inference request hash. As a result, if
+  two different inference requests generate the same hash (a hash collision),
+  then Triton may incorrectly use the cached result for an inference request.
+  The hash is a 64-bit value so the likelihood of collision is small.
+- Only successful inference requests will have their responses cached. If a
+  request fails or returns an error during inference, its response will not be
+  cached.
+- Only requests going through the Default Scheduler or Dynamic Batch Scheduler
+  are eligible for caching. The Sequence Batcher does not currently support
+  response caching.
+- The response cache does not currently support
+  [decoupled models](decoupled_models.md).
+- Top-level requests to ensemble models do not currently support response
+  caching. However, composing models within an ensemble may have their
+  responses cached if supported and enabled by that composing model.
+
diff --git a/docs/user_guide/trace.md b/docs/user_guide/trace.md
new file mode 100644
index 0000000000..d359299499
--- /dev/null
+++ b/docs/user_guide/trace.md
@@ -0,0 +1,638 @@
+<!--
+# Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Triton Server Trace
+
+Triton includes that capability to generate a detailed trace for
+individual inference requests. Tracing is enable by command-line
+arguments when running the tritonserver executable.
+
+`--trace-config` command line option in Triton can be used to specify
+global and trace mode specific config setting. The format of this flag
+is `--trace-config <mode>,<setting>=<value>`, where `<mode>`
+is either `triton` or `opentelemetry`. By default, the trace mode is set to `triton`,
+and the server will use Triton's trace APIs. For `opentelemetry` mode,
+the server will use the [OpenTelemetry's APIs](#opentelemetry-trace-support) to generate,
+collect and export traces for individual inference requests.
+
+To specify global trace settings (level, rate, count, or mode),
+the format is `--trace-config <setting>=<value>`.
+
+An example usage, which invokes Triton's trace APIs:
+
+```
+$ tritonserver \
+    --trace-config triton,file=/tmp/trace.json \
+    --trace-config triton,log-frequency=50 \
+    --trace-config rate=100 \
+    --trace-config level=TIMESTAMPS \
+    --trace-config count=100 ...
+```
+
+## Trace Settings
+### Global Settings
+The following table shows available global trace settings to pass to `--trace-config`
+<table>
+  <thead>
+  <tr>
+    <th>Setting</th>
+    <th>Default Value</th>
+    <th>Description</th>
+  </tr>
+  </thead>
+  <tbody>
+    <tr>
+    <td><code>rate</code></td>
+    <td>1000</td>
+    <td>
+      Specifies the sampling rate. The same as deprecated
+      <code>--trace-rate</code>. <br/>
+      For example, a value of 1000 specifies that every 1000-th inference <br/>
+      request will be traced.
+    </td>
+    </tr>
+    <tr>
+    <td><code>level</code></td>
+    <td>OFF</td>
+    <td>
+      Indicates the level of trace detail that should be collected and <br/>
+      may be specified  multiple times to trace multiple information. <br/>
+      The same as deprecated <code>--trace-level</code>. <br/>
+      Choices are <code>TIMESTAMPS</code> and <code>TENSORS</code>.<br/>
+      <b>Note</b> that <code>opentelemetry</code> mode does not currently <br/>
+      support <code>TENSORS</code> level.
+    </td>
+    </tr>
+    <tr>
+    <td><code>count</code></td>
+    <td>-1</td>
+    <td>
+      Specifies the remaining number of traces to be collected. <br/>
+      The default value of -1 specifies to never stop collecting traces. <br/>
+      With a value  of 100, Triton will stop tracing requests<br/>
+      after 100 traces are collected.<br/>
+      The same as  deprecated <code>--trace-count</code>.
+    </td>
+    </tr>
+    <tr>
+    <td><code>mode</code></td>
+    <td>triton</td>
+    <td>
+      Specifies which trace APIs to use for collecting traces. <br/>
+      The choices are <code>triton</code> or <code>opentelemetry</code>. <br/>
+    </td>
+    </tr>
+  </tbody>
+</table>
+
+### Triton Trace APIs Settings
+
+The following table shows available Triton trace APIs settings for
+`--trace-config triton,<setting>=<value>`.
+<table>
+  <thead>
+  <tr>
+    <th>Setting</th>
+    <th>Default Value</th>
+    <th>Description</th>
+  </tr>
+  </thead>
+  <tbody>
+    <tr>
+    <td><code>file</code></td>
+    <td>empty string</td>
+    <td>
+      Indicates where the trace output should be written. <br/>
+      The same as deprecated <code>--trace-file</code>. <br/>
+    </td>
+    </tr>
+    <tr>
+    <td><code>log-frequency</code></td>
+    <td>0</td>
+    <td>
+      Specifies the rate that the traces are written to file. <br/>
+      For example, a value of 50 specifies that Triton will log <br/>
+      to file for every 50 traces collected. <br/>
+      The same as deprecated <code>--trace-log-frequency</code>.<br/>
+    </td>
+    </tr>
+  </tbody>
+</table>
+
+In addition to the trace configuration settings in the command line, you can
+modify the trace configuration using the [trace
+protocol](../protocol/extension_trace.md). This option is currently not supported,
+when trace mode is set to `opentelemetry`.
+
+**Note**: the following flags are **deprecated**:
+
+The `--trace-file` option indicates where the trace output should be
+written. The `--trace-rate` option specifies the sampling rate. In
+this example every 100-th inference request will be traced. The
+`--trace-level` option indicates the level of trace detail that should
+be collected. `--trace-level` option may be specified multiple times to
+trace multiple information. The `--trace-log-frequency` option specifies the
+rate that the traces are written to file. In this example Triton will log to
+file for every 50 traces collected. The `--trace-count` option specifies the
+remaining number of traces to be collected. In this example Triton will stop
+tracing more requests after 100 traces are collected.  Use the `--help` option
+to get more information.
+
+## Supported Trace Level Option
+
+- `TIMESTAMPS`: Tracing execution timestamps of each request.
+- `TENSORS`: Tracing input and output tensors during the execution.
+
+## JSON Trace Output
+
+The trace output is a JSON file with the following schema.
+
+```
+[
+  {
+    "model_name": $string,
+    "model_version": $number,
+    "id": $number,
+    "request_id": $string,
+    "parent_id": $number
+  },
+  {
+    "id": $number,
+    "timestamps": [
+      { "name" : $string, "ns" : $number }
+    ]
+  },
+  {
+    "id": $number
+    "activity": $string,
+    "tensor":{
+      "name": $string,
+      "data": $string,
+      "shape": $string,
+      "dtype": $string
+    }
+  },
+  ...
+]
+```
+
+Each trace is assigned a "id", which indicates the model name and
+version of the inference request. If the trace is from a
+model run as part of an ensemble, the "parent_id" will indicate the
+"id" of the containing ensemble.
+For example:
+```
+[
+  {
+    "id": 1,
+    "model_name": "simple",
+    "model_version": 1
+  },
+  ...
+]
+```
+
+Each `TIMESTAMPS` trace will have one or more "timestamps" with
+each timestamp having a name and the timestamp in nanoseconds ("ns").
+For example:
+
+```
+[
+  {"id": 1, "timestamps": [{ "name": "HTTP_RECV_START", "ns": 2356425054587444 }] },
+  {"id": 1, "timestamps": [{ "name": "HTTP_RECV_END", "ns": 2356425054632308 }] },
+  {"id": 1, "timestamps": [{ "name": "REQUEST_START", "ns": 2356425054785863 }] },
+  {"id": 1, "timestamps": [{ "name": "QUEUE_START", "ns": 2356425054791517 }] },
+  {"id": 1, "timestamps": [{ "name": "INFER_RESPONSE_COMPLETE", "ns": 2356425057587919 }] },
+  {"id": 1, "timestamps": [{ "name": "COMPUTE_START", "ns": 2356425054887198 }] },
+  {"id": 1, "timestamps": [{ "name": "COMPUTE_INPUT_END", "ns": 2356425057152908 }] },
+  {"id": 1, "timestamps": [{ "name": "COMPUTE_OUTPUT_START", "ns": 2356425057497763 }] },
+  {"id": 1, "timestamps": [{ "name": "COMPUTE_END", "ns": 2356425057540989 }] },
+  {"id": 1, "timestamps": [{ "name": "REQUEST_END", "ns": 2356425057643164 }] },
+  {"id": 1, "timestamps": [{ "name": "HTTP_SEND_START", "ns": 2356425057681578 }] },
+  {"id": 1, "timestamps": [{ "name": "HTTP_SEND_END", "ns": 2356425057712991 }] }
+]
+```
+
+Each `TENSORS` trace will contain an "activity" and a "tensor".
+"activity" indicates the type of tensor, including "TENSOR_QUEUE_INPUT"
+and "TENSOR_BACKEND_OUTPUT" by now. "tensor" has the detail of tensor,
+including its "name", "data" and "dtype". For example:
+
+```
+[
+  {
+    "id": 1,
+    "activity": "TENSOR_QUEUE_INPUT",
+    "tensor":{
+      "name": "input",
+      "data": "0.1,0.1,0.1,...",
+      "shape": "1,16",
+      "dtype": "FP32"
+    }
+  }
+]
+```
+
+## Trace Summary Tool
+
+An example [trace summary tool](https://github.com/triton-inference-server/server/blob/main/qa/common/trace_summary.py) can be
+used to summarize a set of traces collected from Triton. Basic usage
+is:
+
+```
+$ trace_summary.py <trace file>
+```
+
+This produces a summary report for all traces in the file. HTTP and
+GRPC inference requests are reported separately.
+
+```
+File: trace.json
+Summary for simple (-1): trace count = 1
+HTTP infer request (avg): 403.578us
+	Receive (avg): 20.555us
+	Send (avg): 4.52us
+	Overhead (avg): 24.592us
+	Handler (avg): 353.911us
+  		Overhead (avg): 23.675us
+  		Queue (avg): 18.019us
+  		Compute (avg): 312.217us
+  			Input (avg): 24.151us
+  			Infer (avg): 244.186us
+  			Output (avg): 43.88us
+Summary for simple (-1): trace count = 1
+GRPC infer request (avg): 383.601us
+	Send (avg): 62.816us
+	Handler (avg): 392.924us
+  		Overhead (avg): 51.968us
+  		Queue (avg): 21.45us
+  		Compute (avg): 319.506us
+  			Input (avg): 27.76us
+  			Infer (avg): 227.844us
+  			Output (avg): 63.902us
+```
+
+Note: The "Receive (avg)" metric is not included in the gRPC summary as gRPC library does not provide any non-intrusive hooks to detect time spent in reading a message from the wire. Tracing an HTTP request will provide an accurate measurement of time spent reading a request from the network.
+
+Use the -t option to get a summary for each trace in the file. This
+summary shows the time, in microseconds, between different points in
+the processing of an inference request. For example, the below output
+shows that it took 15us from the start of handling the request until
+the request was enqueued in the scheduling queue.
+
+```
+$ trace_summary.py -t <trace file>
+...
+simple (-1):
+  	request handler start
+  		15us
+  	queue start
+  		20us
+  	compute start
+  		266us
+  	compute end
+  		4us
+  	request handler end
+  		19us
+  	grpc send start
+  		77us
+  	grpc send end
+...
+```
+
+The script can also show the data flow of the first request if there are
+`TENSORS` traces in the file. If the `TENSORS` traces are from an ensemble,
+the data flow will be shown with the dependency of each model.
+
+```
+...
+Data Flow:
+	==========================================================
+	Name:   ensemble
+	Version:1
+	QUEUE_INPUT:
+		input: [[0.705676  0.830855  0.833153]]
+	BACKEND_OUTPUT:
+		output: [[1. 2. 7. 0. 4. 7. 9. 3. 4. 9.]]
+	==========================================================
+		==================================================
+		Name:   test_trt1
+		Version:1
+		QUEUE_INPUT:
+			input: [[0.705676  0.830855  0.833153]]
+		BACKEND_OUTPUT:
+			output1: [[1. 1. ...]]
+		==================================================
+		==================================================
+		Name:   test_trt2
+		Version:1
+		QUEUE_INPUT:
+			input: [[0.705676  0.830855  0.833153]]
+		BACKEND_OUTPUT:
+			output2: [[2. 2. ...]]
+		==================================================
+		==================================================
+		Name:   test_py
+		Version:1
+		QUEUE_INPUT:
+			output1: [[1. 1. ...]]
+		QUEUE_INPUT:
+			output2: [[2. 2. ...]]
+		BACKEND_OUTPUT:
+			output: [[1. 2. 7. 0. 4. 7. 9. 3. 4. 9.]]
+		==================================================
+...
+```
+
+The meaning of the trace timestamps is:
+
+* HTTP Request Receive: Collected only for inference requests that use the
+  HTTP protocol. The time required to read the inference request from
+  the network.
+
+* Send: The time required to send the inference response.
+
+* Overhead: Additional time required in the HTTP endpoint to
+  process the inference request and response.
+
+* Handler: The total time spent handling the inference request, not
+  including the HTTP and GRPC request/response handling.
+
+  * Queue: The time the inference request spent in the scheduling queue.
+
+  * Compute: The time the inference request spent executing the actual
+    inference. This time includes the time spent copying input and
+    output tensors. If --trace-level=TIMESTAMPS then a breakdown of the
+    compute time will be provided as follows:
+
+    * Input: The time to copy input tensor data as required by the
+      inference framework / backend. This includes the time to copy
+      input tensor data to the GPU.
+
+    * Infer: The time spent executing the model to perform the
+      inference.
+
+    * Output: The time to copy output tensor data as required by the
+      inference framework / backend. This includes the time to copy
+      output tensor data from the GPU.
+
+  * Overhead: Additional time required for request handling not
+    covered by Queue or Compute times.
+
+* Data Flow: The data flow of the first request. It contains the input and
+  output tensors of each part of execution.
+
+  * Name: The name of model.
+
+  * Version: The version of model.
+
+  * QUEUE_INPUT: The tensor entering the queue of a backend to wait for
+    scheduling.
+
+  * BACKEND_OUTPUT: The tensor in the response of a backend.
+
+## Tracing for BLS models
+
+Triton does not collect traces for child models invoked from
+[BLS](https://github.com/triton-inference-server/python_backend/tree/main#business-logic-scripting)
+models by default.
+
+To include child models into collected traces, user needs to provide the `trace`
+argument (as shown in the example below), when constructing an InferenceRequest object.
+This helps Triton associate the child model with the parent model's trace (`request.trace()`).
+
+```python
+
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+  ...
+    def execute(self, requests):
+      ...
+      for request in requests:
+        ...
+        inference_request = pb_utils.InferenceRequest(
+            model_name='model_name',
+            requested_output_names=['REQUESTED_OUTPUT_1', 'REQUESTED_OUTPUT_2'],
+            inputs=[<pb_utils.Tensor object>], trace = request.trace())
+
+```
+
+## OpenTelemetry trace support
+
+Triton provides an option to generate and export traces using
+[OpenTelemetry APIs and SDKs](https://opentelemetry.io/).
+
+To specify OpenTelemetry mode for tracing, specify the `--trace-config`
+flag as follows:
+
+```
+$ tritonserver --trace-config mode=opentelemetry \
+    --trace-config opentelemetry,url=<endpoint> ...
+```
+
+Triton's OpenTelemetry trace mode uses
+[Batch Span Processor](https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/#batch-span-processor),
+which batches ended spans and sends them in bulk. Batching helps
+with data compression and reduces the number of outgoing connections
+required to transmit the data. This processor supports both size and
+time based batching. Size-based batching is controlled by 2 parameters:
+`bsp_max_export_batch_size` and `bsp_max_queue_size`, while time-based batching
+is controlled by `bsp_schedule_delay`. Collected spans will be exported when
+the batch size reaches `bsp_max_export_batch_size`, or delay since last export
+reaches `bsp_schedule_delay`, whatever comes first. Additionally, user should
+make sure that `bsp_max_export_batch_size` is always less than
+`bsp_max_queue_size`, otherwise the excessive spans will be dropped
+and trace data will be lost.
+
+Default parameters for the Batch Span Processor are provided in
+[`OpenTelemetry trace APIs settings`](#opentelemetry-trace-apis-settings).
+As a general recommendation, make sure that `bsp_max_queue_size` is large enough
+to hold all collected spans, and `bsp_schedule_delay` does not cause frequent
+exports, which will affect Triton Server's latency. A minimal Triton trace
+consists of 3 spans: top level span, model span, and compute span.
+
+* __Top level span__: The top-level span collects timestamps for when
+request was received by Triton, and when the response was sent. Any Triton
+trace contains only 1 top level span.
+* __Model span__: Model spans collect information, when request for
+this model was started, when it was placed in a queue, and when it was ended.
+A minimal Triton trace contains 1 model span.
+* __Compute span__: Compute spans record compute timestamps. A minimal
+Triton trace contains 1 compute span.
+
+The total amount of spans depends on the complexity of your model.
+A general rule is any base model - a single model that performs computations -
+produces 1 model span and one compute span. For ensembles, every composing
+model produces model and compute spans in addition to one model span for the
+ensemble. [BLS](#tracing-for-bls-models) models produce the same number of
+model and compute spans as the total amount of models involved in the BLS request,
+including the main BLS model.
+
+
+### Differences in trace contents from Triton's trace [output](#json-trace-output)
+
+OpenTelemetry APIs produce [spans](https://opentelemetry.io/docs/concepts/observability-primer/#spans)
+that collect the same timestamps as Triton's Trace
+APIs. Each span also includes `model_name`, `model_version`, `request_id`,
+and `parent_id` as an [attribute](https://opentelemetry.io/docs/concepts/observability-primer/#span-attributes).
+
+The span collects `TIMESTAMPS` that consist of a name and a timestamp
+in nanoseconds, which is similar to Triton Trace APIs. However,
+OpenTelemetry relies on the system's clock for event timestamps, which is based
+on the system's real-time clock. On the other hand, Triton Trace APIs
+report timestamps using steady clock, which is a monotonic clock that ensures
+time always movess forward. This clock is not related to wall clock time
+and, for example, can measure time since last reboot.
+
+
+### OpenTelemetry trace APIs settings
+
+The following table shows available OpenTelemetry trace APIs settings for
+`--trace-config opentelemetry,<setting>=<value>`.
+<table>
+  <thead>
+  <tr>
+    <th>Setting</th>
+    <th>Default Value</th>
+    <th>Description</th>
+  </tr>
+  </thead>
+  <tbody>
+    <tr>
+    <td><code>url</code></td>
+    <td><code>http://localhost:4318/v1/traces</code></td>
+    <td>
+      <code>host:port</code> to which the receiver is going to receive
+      trace data.
+    </td>
+    </tr>
+    <tr>
+    <td><code>resource</code></td>
+    <td><code>service.name=triton-inference-server</code></td>
+    <td>
+      Key-value pairs to be used as resource attributes. <br/>
+      Should be specified following the provided template:<br/>
+      <code>--trace-config opentelemetry,resource=<<text>key</text>>=<<text>value</text>></code><br/>
+      For example:<br/>
+      <code>--trace-config opentelemetry,resource=service.name=triton</code><br/>
+      <code>--trace-config opentelemetry,resource=service.version=1</code><br/>
+      Alternatively, key-value attributes can be specified through <br/>
+      <a href="https://opentelemetry.io/docs/concepts/sdk-configuration/general-sdk-configuration/#otel_resource_attributes">
+      OTEL_RESOURCE_ATTRIBUTES</a>
+      environment variable.
+    </td>
+    </tr>
+    <tr>
+    <td><a href="https://opentelemetry.io/docs/specs/otel/trace/sdk/#batching-processor">
+      Batch Span Processor</a>
+    </td>
+    <td></td><td></td>
+    </tr>
+    <tr>
+    <td><code>bsp_max_queue_size</code></td>
+    <td align="center">2048</td>
+    <td>
+      Maximum queue size. <br/>
+      This setting can also be specified through <br/>
+      <a href="https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/#batch-span-processor">
+      OTEL_BSP_MAX_QUEUE_SIZE</a>
+      environment variable.
+    </td>
+    </tr>
+    <tr>
+    <td><code>bsp_schedule_delay</code></td>
+    <td align="center">5000</td>
+    <td>
+      Delay interval (in milliseconds) between two consecutive exports. <br/>
+      This setting can also be specified through <br/>
+      <a href="https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/#batch-span-processor">
+      OTEL_BSP_SCHEDULE_DELAY</a>
+      environment variable.
+    </td>
+    </tr>
+    <tr>
+    <td><code>bsp_max_export_batch_size</code></td>
+    <td align="center">512</td>
+    <td>
+      Maximum batch size. Must be less than or equal to
+      <code>bsp_max_queue_size</code>.<br/>
+      This setting can also be specified through <br/>
+      <a href="https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/#batch-span-processor">
+      OTEL_BSP_MAX_EXPORT_BATCH_SIZE</a>
+      environment variable.
+    </td>
+    </tr>
+  </tbody>
+</table>
+
+### OpenTelemetry Context Propagation
+
+Triton supports [context propagation](https://opentelemetry.io/docs/concepts/context-propagation/)
+in OpenTelemetry mode starting in version 24.01. Note, that every request
+with propagated OpenTelemetry context will be traced, regardless of `rate` and
+`count` trace settings. If a user wishes to trace only those requests, for which
+OpenTelemetry context was injected on the client side, please start Triton with
+`--trace-config rate=0`:
+```
+$ tritonserver \
+    --trace-config rate=0 \
+    --trace-config level=TIMESTAMPS \
+    --trace-config count=-1 \
+    --trace-config mode=opentelemetry
+```
+Please, be aware that this option is subject to change in future releases.
+
+#### How to inject OpenTelemetry context on the client side
+
+For C++ clients, please refer to [gRPC](https://github.com/open-telemetry/opentelemetry-cpp/blob/main/examples/grpc/README.md)
+and [HTTP](https://github.com/open-telemetry/opentelemetry-cpp/blob/main/examples/http/README.md)
+examples.
+
+For python clients, please make sure to install
+[OpenTelemetry Python](https://github.com/open-telemetry/opentelemetry-python/tree/main?tab=readme-ov-file#install).
+You can then use the `opentelemetry.propagate.inject` method to prepare headers to
+pass with the request, as shown [here](https://github.com/open-telemetry/opentelemetry-python/blob/main/docs/examples/auto-instrumentation/client.py#L37-L41).
+Then, you can specify headers in the `infer` method. For references, please
+look at our [tests](https://github.com/triton-inference-server/server/blob/main/qa/L0_trace/opentelemetry_unittest.py),
+e.g. [http context propagation test](https://github.com/triton-inference-server/server/blob/main/qa/L0_trace/opentelemetry_unittest.py#L494-L508).
+
+### Limitations
+
+- OpenTelemetry trace mode is not supported on Windows systems.
+
+- Triton supports only
+[OTLP/HTTP Exporter](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/protocol/otlp.md#otlphttp)
+and allows specification of only url for this exporter through
+`--trace-config`. Other options and corresponding default values can be
+found [here](https://github.com/open-telemetry/opentelemetry-cpp/tree/v1.8.3/exporters/otlp#configuration-options--otlp-http-exporter-).
+
+- Triton does not support configuration of the opentelemetry trace settings
+during a Triton run and opentelemetry specific settings are not available
+for the retrieval through [Triton's trace extension](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_trace.md).
diff --git a/docs/user_guide/v1_to_v2.md b/docs/user_guide/v1_to_v2.md
new file mode 100644
index 0000000000..d9da6f6cf8
--- /dev/null
+++ b/docs/user_guide/v1_to_v2.md
@@ -0,0 +1,68 @@
+<!--
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Version 1 to Version 2 Migration
+
+Version 2 of Triton does not generally maintain backwards
+compatibility with version 1.  Specifically, you should take the
+following items into account when transitioning from version 1 to
+version 2.
+
+* The Triton executables and libraries are in /opt/tritonserver. The
+  Triton executable is /opt/tritonserver/bin/tritonserver.
+
+* Some *tritonserver* command-line arguments are removed, changed or
+  have different default behavior in version 2.
+
+  * --api-version, --http-health-port, --grpc-infer-thread-count,
+    --grpc-stream-infer-thread-count,--allow-poll-model-repository, --allow-model-control
+    and --tf-add-vgpu are removed.
+
+  * The default for --model-control-mode is changed to *none*.
+
+  * --tf-allow-soft-placement and --tf-gpu-memory-fraction are renamed
+     to --backend-config="tensorflow,allow-soft-placement=\<true,false\>"
+     and --backend-config="tensorflow,gpu-memory-fraction=\<float\>".
+
+* The HTTP/REST and GRPC protocols, while conceptually similar to
+  version 1, are completely changed in version 2. See [inference
+  protocols](../customization_guide/inference_protocols.md) for more information.
+
+* Python and C++ client libraries are re-implemented to match the new
+  HTTP/REST and GRPC protocols. The Python client no longer depends on
+  a C++ shared library and so should be usable on any platform that
+  supports Python. See [client
+  libraries](https://github.com/triton-inference-server/client) for
+  more information.
+
+* Building Triton has changed significantly in version 2. See
+  [build](../customization_guide/build.md) for more information.
+
+* In the Docker containers the environment variables indicating the
+  Triton version have changed to have a TRITON prefix, for example,
+  TRITON_SERVER_VERSION.
diff --git a/nvidia_entrypoint.sh b/nvidia_entrypoint.sh
deleted file mode 100755
index fb3ea26de3..0000000000
--- a/nvidia_entrypoint.sh
+++ /dev/null
@@ -1,85 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-set -e
-cat <<EOF
-
-===============================
-== TensorRT Inference Server ==
-===============================
-
-NVIDIA Release ${NVIDIA_TENSORRT_SERVER_VERSION} (build ${NVIDIA_BUILD_ID})
-
-Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
-Copyright 2018 The TensorFlow Authors.  All rights reserved.
-Copyright 2018 The TensorFlow Serving Authors.  All rights reserved.
-Copyright (c) 2016-present, Facebook Inc. All rights reserved.
-
-Various files include modifications (c) NVIDIA CORPORATION.  All rights reserved.
-NVIDIA modifications are covered by the license terms that apply to the underlying
-project or file.
-EOF
-
-if [[ "$(find /usr -name libcuda.so.1 | grep -v "compat") " == " " || "$(ls /dev/nvidiactl 2>/dev/null) " == " " ]]; then
-  echo
-  echo "WARNING: The NVIDIA Driver was not detected.  GPU functionality will not be available."
-  echo "   Use 'nvidia-docker run' to start this container; see"
-  echo "   https://github.com/NVIDIA/nvidia-docker/wiki/nvidia-docker ."
-else
-  ( /usr/local/bin/checkSMVER.sh )
-  DRIVER_VERSION=$(sed -n 's/^NVRM.*Kernel Module *\([0-9.]*\).*$/\1/p' /proc/driver/nvidia/version 2>/dev/null || true)
-  if [[ ! "$DRIVER_VERSION" =~ ^[0-9]*.[0-9]*$ ]]; then
-    echo "Failed to detect NVIDIA driver version."
-  elif [[ "${DRIVER_VERSION%.*}" -lt "${CUDA_DRIVER_VERSION%.*}" ]]; then
-    if [[ "${_CUDA_COMPAT_STATUS}" == "CUDA Driver OK" ]]; then
-      echo
-      echo "NOTE: Legacy NVIDIA Driver detected.  Compatibility mode ENABLED."
-    else
-      echo
-      echo "ERROR: This container was built for NVIDIA Driver Release ${CUDA_DRIVER_VERSION%.*} or later, but"
-      echo "       version ${DRIVER_VERSION} was detected and compatibility mode is UNAVAILABLE."
-      echo
-      echo "       [[${_CUDA_COMPAT_STATUS}]]"
-      sleep 2
-    fi
-  fi
-fi
-
-if [[ "$(df -k /dev/shm |grep ^shm |awk '{print $2}') " == "65536 " ]]; then
-  echo
-  echo "NOTE: The SHMEM allocation limit is set to the default of 64MB.  This may be"
-  echo "   insufficient for the inference server.  NVIDIA recommends the use of the following flags:"
-  echo "   nvidia-docker run --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 ..."
-fi
-
-echo
-
-if [[ $# -eq 0 ]]; then
-  exec "/bin/bash"
-else
-  exec "$@"
-fi
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000000..2843ad2d42
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,51 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+[tool.codespell]
+# note: pre-commit passes explicit lists of files here, which this skip file list doesn't override -
+# this is only to allow you to run codespell interactively
+skip = "./.git,./.github"
+# ignore short words, and typename parameters like OffsetT
+ignore-regex = "\\b(.{1,4}|[A-Z]\\w*T)\\b"
+# ignore allowed words
+ignore-words-list = "passin"
+# use the 'clear' dictionary for unambiguous spelling mistakes
+builtin = "clear"
+# disable warnings about binary files and wrong encoding
+quiet-level = 3
+
+[tool.isort]
+profile = "black"
+use_parentheses = true
+multi_line_output = 3
+include_trailing_comma = true
+force_grid_wrap = 0
+ensure_newline_before_comments = true
+line_length = 88
+balanced_wrapping = true
+indent = "    "
+skip = ["build"]
+
diff --git a/qa/L0_async_work_queue/test.sh b/qa/L0_async_work_queue/test.sh
new file mode 100755
index 0000000000..a6d09264f2
--- /dev/null
+++ b/qa/L0_async_work_queue/test.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+TEST_LOG="./async_work_queue.log"
+ASYNC_WORK_QUEUE_TEST=./async_work_queue_test
+
+RET=0
+
+export CUDA_VISIBLE_DEVICES=0
+
+rm -f TEST_LOG
+
+set +e
+$ASYNC_WORK_QUEUE_TEST >>$TEST_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+set -e
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $TEST_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_backend_bls/test.sh b/qa/L0_backend_bls/test.sh
new file mode 100755
index 0000000000..0db3931626
--- /dev/null
+++ b/qa/L0_backend_bls/test.sh
@@ -0,0 +1,110 @@
+#!/bin/bash
+# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+export CUDA_VISIBLE_DEVICES=0
+
+TRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION:="http://github.com/triton-inference-server"}
+TRITON_BACKEND_REPO_TAG=${TRITON_BACKEND_REPO_TAG:="main"}
+TRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG:="main"}
+TRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG:="main"}
+
+SERVER=/opt/tritonserver/bin/tritonserver
+source ../common/util.sh
+
+RET=0
+
+# Backend build requires recent version of CMake (FetchContent required)
+# Using CMAKE installation instruction from:: https://apt.kitware.com/
+apt update -q=2 \
+    && apt install -y gpg wget \
+    && wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - |  tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null \
+    && . /etc/os-release \
+    && echo "deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ $UBUNTU_CODENAME main" | tee /etc/apt/sources.list.d/kitware.list >/dev/null \
+    && apt-get update -q=2 \
+    && apt-get install -y --no-install-recommends cmake=3.27.7* cmake-data=3.27.7* \
+            rapidjson-dev
+cmake --version
+
+rm -fr *.log ./backend
+
+git clone --single-branch --depth=1 -b $TRITON_BACKEND_REPO_TAG \
+    ${TRITON_REPO_ORGANIZATION}/backend.git
+
+(cd backend/examples/backends/bls &&
+ mkdir build &&
+ cd build &&
+ cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install \
+       -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} \
+       -DTRITON_BACKEND_REPO_TAG=${TRITON_BACKEND_REPO_TAG} \
+       -DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \
+       -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
+       .. &&
+ make -j4 install)
+
+rm -fr /opt/tritonserver/backends/bls
+cp -r backend/examples/backends/bls/build/install/backends/bls /opt/tritonserver/backends/.
+
+SERVER_ARGS="--model-repository=`pwd`/backend/examples/model_repos/bls_models --log-verbose=1"
+SERVER_LOG="./inference_server.log"
+CLIENT_LOG="./client.log"
+
+mkdir `pwd`/backend/examples/model_repos/bls_models/bls_fp32/1/
+
+# Run the server with all the required models.
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+backend/examples/clients/bls_client >> $CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo "Failed: Client test had a non-zero return code."
+    RET=1
+fi
+
+grep "PASS" $CLIENT_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** bls_test.py FAILED. \n***"
+    cat $CLIENT_LOG
+    cat $SERVER_LOG
+    RET=1
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_backend_config/test.sh b/qa/L0_backend_config/test.sh
new file mode 100755
index 0000000000..b898735798
--- /dev/null
+++ b/qa/L0_backend_config/test.sh
@@ -0,0 +1,410 @@
+#!/bin/bash
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+rm -rf ./models/
+mkdir -p ./models/no_config
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/savedmodel_float32_float32_float32/1 ./models/no_config/
+
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_TIMEOUT=20
+source ../common/util.sh
+
+SERVER_LOG_BASE="./inference_server"
+rm -f $SERVER_LOG_BASE*
+rm -f *.out
+
+COMMON_ARGS="--model-repository=`pwd`/models --strict-model-config=false --log-verbose=1 "
+
+NEGATIVE_PARSE_ARGS=("--backend-config=,default-max-batch-size=3 $COMMON_ARGS" \
+                    "--backend-config=default-max-batch-size= $COMMON_ARGS" \
+                    "--backend-config=default-max-batch-size $COMMON_ARGS" \
+                    "--backend-config=tensorflow,default-max-batch-size= $COMMON_ARGS" \
+                    "--backend-config=tensorflow,default-max-batch-size $COMMON_ARGS" \
+)
+
+POSITIVE_DEFAULT_ARGS=$COMMON_ARGS
+POSITIVE_TEST_ARGS=("--backend-config=tensorflow,default-max-batch-size=5 $COMMON_ARGS" \
+                    "--backend-config=default-max-batch-size=6 $COMMON_ARGS" \
+                    "--backend-config=default-max-batch-size=7 --backend-config=tensorflow,default-max-batch-size=8 $COMMON_ARGS" \
+)
+
+# These integers correspond to the expected default-max-batch-size which gets set
+# in the POSITIVE_TEST_ARGS
+POSITIVE_TEST_ANSWERS=(5 6 8)
+
+RET=0
+# Positive tests
+SERVER_ARGS=$POSITIVE_DEFAULT_ARGS
+SERVER_LOG=$SERVER_LOG_BASE.backend_config_positive_default.log
+run_server
+
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "*** FAILED: Server failed to start $SERVER\n"
+    RET=1
+
+else
+    kill $SERVER_PID
+    wait $SERVER_PID
+
+    RESULT_LOG_LINE=$(grep -a "Adding default backend config setting:" $SERVER_LOG)
+    if [ "$RESULT_LOG_LINE" != "" ]; then
+
+        # Pick out the logged value of the default-max-batch-size which gets passed into model creation
+        RESOLVED_DEFAULT_MAX_BATCH_SIZE=$(awk -v line="$RESULT_LOG_LINE" 'BEGIN {split(line, a, "]"); split(a[2], b, ": "); split(b[2], c, ","); print c[2]}')
+
+        if [ "$RESOLVED_DEFAULT_MAX_BATCH_SIZE" != "4" ]; then
+            echo "*** FAILED: Found default-max-batch-size not equal to the expected default-max-batch-size. Expected: default-max-batch-size,4, Found: $RESOLVED_DEFAULT_MAX_BATCH_SIZE \n"
+            RET=1
+        fi
+    else
+        echo "*** FAILED: No log statement stating default max batch size\n"
+        RET=1
+    fi
+fi
+
+for ((i=0; i < ${#POSITIVE_TEST_ARGS[@]}; i++)); do
+    SERVER_ARGS=${POSITIVE_TEST_ARGS[$i]}
+    SERVER_LOG=$SERVER_LOG_BASE.backend_config_positive_$i.log
+    run_server
+
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "*** FAILED: Server failed to start $SERVER\n"
+        RET=1
+
+    else
+        kill $SERVER_PID
+        wait $SERVER_PID
+
+        RESULT_LOG_LINE=$(grep -a "Found overwritten default setting:" $SERVER_LOG)
+        if [ "$RESULT_LOG_LINE" != "" ]; then
+
+            # Pick out the logged value of the default-max-batch-size which gets passed into model creation
+            RESOLVED_DEFAULT_MAX_BATCH_SIZE=$(awk -v line="$RESULT_LOG_LINE" 'BEGIN {split(line, a, "]"); split(a[2], b, ": "); split(b[2], c, ","); print c[2]}')
+
+            if [ "$RESOLVED_DEFAULT_MAX_BATCH_SIZE" != "${POSITIVE_TEST_ANSWERS[$i]}" ]; then
+                echo "*** FAILED: Found default-max-batch-size not equal to the expected default-max-batch-size. Expected: ${POSITIVE_TEST_ANSWERS[$i]}, Found: $RESOLVED_DEFAULT_MAX_BATCH_SIZE \n"
+                RET=1
+            fi
+        else
+            echo "*** FAILED: No log statement stating default max batch size\n"
+            RET=1
+        fi
+    fi
+done
+
+# Negative tests
+# Failing because the syntax is incorrect
+for ((i=0; i < ${#NEGATIVE_PARSE_ARGS[@]}; i++)); do
+    SERVER_ARGS=${NEGATIVE_PARSE_ARGS[$i]}
+    SERVER_LOG=$SERVER_LOG_BASE.backend_config_negative_parse$i.log
+    run_server
+
+    if [ "$SERVER_PID" == "0" ]; then
+        if ! grep -e "--backend-config option format is" $SERVER_LOG; then
+            echo -e "*** FAILED: Expected invalid backend config parse message but found other error.\n"
+            RET=1
+        fi
+    else
+        echo -e "*** FAILED: Expected server to exit with error, but found running.\n"
+        RET=1
+        kill $SERVER_PID
+        wait $SERVER_PID
+    fi
+done
+
+
+#
+# Specific backend tests
+#
+
+# While inference server is running, save the
+# config of the 'no_config' model to the TRIAL
+# file.
+function save_model_config() {
+    CODE=`curl -s -w %{http_code} -o ./$TRIAL.out localhost:8000/v2/models/no_config/config`
+    set -e
+    if [ "$CODE" != "200" ]; then
+        cat $TRIAL.out
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+}
+
+# Tensorflow 1: Batching ON
+rm -rf ./models/
+mkdir -p ./models/no_config
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/savedmodel_float32_float32_float32/1 ./models/no_config/
+
+SERVER_ARGS="--backend-config=tensorflow,default-max-batch-size=5 $COMMON_ARGS"
+SERVER_LOG=$SERVER_LOG_BASE.backend_config_tensorflow_batch_5.log
+run_server
+
+TRIAL=tensorflow_batching_on
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "*** FAILED: Server failed to start $SERVER\n"
+    RET=1
+else
+    save_model_config
+
+    # Assert the max-batch-size is the command line value
+    MAX_BATCH_LOG_LINE=$(grep -a "\"max_batch_size\":5" $TRIAL.out)
+    if [ "$MAX_BATCH_LOG_LINE" == "" ]; then
+        cat $TRIAL.out
+        echo "*** FAILED: Expected max batch size to be 5 but found: $MAX_BATCH_LOG_LINE\n"
+        RET=1
+    fi
+
+    # Assert we are also turning on the dynamic_batcher
+    DYNAMIC_BATCHING_LOG_LINE=$(grep -a "Starting dynamic-batcher thread" $SERVER_LOG)
+    if [ "$DYNAMIC_BATCHING_LOG_LINE" == "" ]; then
+        echo "*** FAILED: Expected dynamic batching to be set in model config but was not found\n"
+        RET=1
+    fi
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+
+fi
+
+# Tensorflow 1: Batching OFF
+SERVER_ARGS="--backend-config=tensorflow,default-max-batch-size=0 $COMMON_ARGS"
+SERVER_LOG=$SERVER_LOG_BASE.backend_config_tensorflow_batch_0.log
+run_server
+
+TRIAL=tensorflow_batching_off
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "*** FAILED: Server failed to start $SERVER\n"
+    RET=1
+
+else
+    save_model_config
+
+    # Assert the max-batch-size is 0 in the case batching is supported
+    # in the model but not in the config.
+    MAX_BATCH_LOG_LINE=$(grep -a "\"max_batch_size\":0" $TRIAL.out)
+    if [ "$MAX_BATCH_LOG_LINE" == "" ]; then
+        echo "*** FAILED: Expected max batch size to be 0 but found: $MAX_BATCH_LOG_LINE\n"
+        RET=1
+    fi
+
+    # Assert batching disabled
+    if [ "$(grep -a -E '\"dynamic_batching\": \{}' $SERVER_LOG)" != "" ]; then
+        echo "*** FAILED: Found dynamic batching enabled in configuration when none expected.\n"
+        RET=1
+    fi
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+
+fi
+
+# Onnxruntime: Batching ON
+rm -rf ./models/
+mkdir -p ./models/no_config
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/onnx_float32_float32_float32/1 ./models/no_config/
+
+SERVER_ARGS="--backend-config=onnxruntime,default-max-batch-size=5 $COMMON_ARGS"
+SERVER_LOG=$SERVER_LOG_BASE.backend_config_onnxruntime_batch_5.log
+run_server
+
+TRIAL=onnxruntime_batching_on
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "*** FAILED: Server failed to start $SERVER\n"
+    RET=1
+
+else
+    save_model_config
+
+    # Assert the max-batch-size is the command line value
+    MAX_BATCH_LOG_LINE=$(grep -a "\"max_batch_size\":5" $TRIAL.out)
+    if [ "$MAX_BATCH_LOG_LINE" == "" ]; then
+        echo "*** FAILED: Expected max batch size to be 5 but found: $MAX_BATCH_LOG_LINE\n"
+        RET=1
+    fi
+
+    # Assert we are also turning on the dynamic_batcher
+    DYNAMIC_BATCHING_LOG_LINE=$(grep -a "Starting dynamic-batcher thread" $SERVER_LOG)
+    if [ "$DYNAMIC_BATCHING_LOG_LINE" == "" ]; then
+        echo "*** FAILED: Expected dynamic batching to be set in model config but was not found\n"
+        RET=1
+    fi
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+fi
+
+# Onnxruntime: Batching OFF
+rm -rf ./models/
+mkdir -p ./models/no_config
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/onnx_float32_float32_float32/1 ./models/no_config/
+
+SERVER_ARGS="--backend-config=onnxruntime,default-max-batch-size=0 $COMMON_ARGS"
+SERVER_LOG=$SERVER_LOG_BASE.backend_config_onnxruntime_batch_0.log
+run_server
+
+TRIAL=onnxruntime_batching_off
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "*** FAILED: Server failed to start $SERVER\n"
+    RET=1
+
+else
+    save_model_config
+
+    # Assert the max-batch-size is 0 in the case batching is supported
+    # in the model but not in the config.
+    MAX_BATCH_LOG_LINE=$(grep -a "\"max_batch_size\":0" $TRIAL.out)
+    if [ "$MAX_BATCH_LOG_LINE" == "" ]; then
+        echo "*** FAILED: Expected max batch size to be 0 but found: $MAX_BATCH_LOG_LINE\n"
+        RET=1
+    fi
+
+    # Assert batching disabled
+    if [ "$(grep -a -E '\"dynamic_batching\": \{}' $SERVER_LOG)" != "" ]; then
+        echo "*** FAILED: Found dynamic batching in configuration when none expected.\n"
+        RET=1
+    fi
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+
+fi
+
+#
+# General backend tests
+#
+
+# We want to make sure that backend configurations
+# are not lost. For this purpose we are using only onnx backend
+
+rm -rf ./models/
+mkdir -p ./models/no_config/
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/onnx_float32_float32_float32/1 ./models/no_config/
+
+# First getting a baseline for the number of default configs
+# added during a server set up
+SERVER_ARGS="$COMMON_ARGS"
+SERVER_LOG=$SERVER_LOG_BASE.default_configs.log
+run_server
+
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "*** FAILED: Server failed to start $SERVER\n"
+    RET=1
+
+else
+    # Count number of default configs
+    BACKEND_CONFIG_MAP=$(grep -a "backend configuration:" $SERVER_LOG -A 1  | grep -v "backend configuration")
+    DEFAULT_CONFIG_COUNT=$(echo $BACKEND_CONFIG_MAP | jq -r | jq '.["cmdline"]' | jq 'length')
+    if [ $DEFAULT_CONFIG_COUNT -lt 4 ]; then
+        echo "*** FAILED: Expected number of default configs to be at least 4 but found: $DEFAULT_CONFIG_COUNT\n"
+        RET=1
+    fi
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+
+fi
+
+# Now make sure that when setting specific backend configs
+# default ones are not lost.
+# Current logic for backend config resolution reads default configs first,
+# then specific configs and overrides defaults if needed.
+# We would like to make sure that none of configs are lost and
+# defaults are properly overridden.
+# One of defaultconfigs is `min-compute-capability`. This test
+# checks if it is properlly overridden.
+MIN_COMPUTE_CAPABILITY=XX
+SERVER_ARGS="--backend-config=onnxruntime,min-compute-capability=$MIN_COMPUTE_CAPABILITY $COMMON_ARGS"
+SERVER_LOG=$SERVER_LOG_BASE.global_configs.log
+run_server
+
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "*** FAILED: Server failed to start $SERVER\n"
+    RET=1
+
+else
+    # Count number of default configs
+    BACKEND_CONFIG_MAP=$(grep -a "backend configuration:" $SERVER_LOG -A 1  | grep -v "backend configuration")
+    CONFIG_VALUE=$(echo $BACKEND_CONFIG_MAP | jq -r | jq '.["cmdline"]' | jq -r '.["min-compute-capability"]')
+
+    if [ $CONFIG_VALUE != $MIN_COMPUTE_CAPABILITY ]; then
+        echo "*** FAILED: Expected min-compute-capability config to be $MIN_COMPUTE_CAPABILITY but found: $CONFIG_VALUE\n"
+        RET=1
+    fi
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+
+fi
+# Now make sure that specific backend configs are not lost.
+SERVER_ARGS="--backend-config=onnxruntime,a=0 --backend-config=onnxruntime,y=0 --backend-config=onnxruntime,z=0 $COMMON_ARGS"
+SERVER_LOG=$SERVER_LOG_BASE.specific_configs.log
+EXPECTED_CONFIG_COUNT=$(($DEFAULT_CONFIG_COUNT+3))
+run_server
+
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "*** FAILED: Server failed to start $SERVER\n"
+    RET=1
+
+else
+    # Count number of default configs
+    BACKEND_CONFIG_MAP=$(grep -a "backend configuration:" $SERVER_LOG -A 1  | grep -v "backend configuration")
+    TOTAL_CONFIG_COUNT=$(echo $BACKEND_CONFIG_MAP | jq -r | jq '.["cmdline"]' | jq 'length')
+
+    if [ $TOTAL_CONFIG_COUNT -ne $EXPECTED_CONFIG_COUNT ]; then
+        echo "*** FAILED: Expected number of backend configs to be $EXPECTED_CONFIG_COUNT but found: $TOTAL_CONFIG_COUNT\n"
+        RET=1
+    fi
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+
+fi
+
+
+# Print test outcome
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
+
diff --git a/qa/L0_backend_fastertransformer/test.sh b/qa/L0_backend_fastertransformer/test.sh
new file mode 100755
index 0000000000..8e5d20271a
--- /dev/null
+++ b/qa/L0_backend_fastertransformer/test.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+FASTERTRANSFORMER_BRANCH_TAG=${FASTERTRANSFORMER_BRANCH_TAG:="main"}
+FASTERTRANSFORMER_BRANCH=${FASTERTRANSFORMER_BRANCH:="https://github.com/triton-inference-server/fastertransformer_backend.git"}
+SERVER_TIMEOUT=600
+SERVER_LOG="$PWD/inference_server"
+CLIENT_LOG="$PWD/client"
+
+MODEL_DIR=${MODEL_DIR:=$PWD/fastertransformer_backend/all_models/t5/}
+TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
+SERVER=${TRITON_DIR}/bin/tritonserver
+BACKEND_DIR=${TRITON_DIR}/backends
+SERVER_ARGS_EXTRA="--exit-timeout-secs=${SERVER_TIMEOUT} --backend-directory=${BACKEND_DIR}"
+SERVER_ARGS="--model-repository=${MODEL_DIR} ${SERVER_ARGS_EXTRA}"
+source ../common/util.sh
+
+rm -f $SERVER_LOG* $CLIENT_LOG*
+
+RET=0
+# install dependencies
+apt-get update && \
+    apt-get install -y --no-install-recommends python3 python3-pip python3-protobuf
+python3 -m pip install --upgrade pip && \
+    pip3 install --upgrade numpy
+
+# install client libraries
+pip3 install tritonclient[all]
+
+# Clone repo
+git clone --single-branch --depth=1 -b ${FASTERTRANSFORMER_BRANCH_TAG} ${FASTERTRANSFORMER_BRANCH}
+cd fastertransformer_backend
+
+run_server
+
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+python3 tools/issue_request.py tools/requests/sample_request_single_t5.json >$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    RET=1
+fi
+
+kill_server
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $SERVER_LOG
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_backend_identity/identity_test.py b/qa/L0_backend_identity/identity_test.py
new file mode 100755
index 0000000000..5b82890266
--- /dev/null
+++ b/qa/L0_backend_identity/identity_test.py
@@ -0,0 +1,298 @@
+#!/usr/bin/python
+
+# Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import sys
+from builtins import range
+
+import numpy as np
+import requests as httpreq
+import tritongrpcclient as grpcclient
+import tritonhttpclient as httpclient
+from tritonclientutils import np_to_triton_dtype
+
+FLAGS = None
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        required=False,
+        default=False,
+        help="Enable verbose output",
+    )
+    parser.add_argument(
+        "-u", "--url", type=str, required=False, help="Inference server URL."
+    )
+    parser.add_argument(
+        "-i",
+        "--protocol",
+        type=str,
+        required=False,
+        default="http",
+        help='Protocol ("http"/"grpc") used to '
+        + 'communicate with inference service. Default is "http".',
+    )
+
+    FLAGS = parser.parse_args()
+    if (FLAGS.protocol != "http") and (FLAGS.protocol != "grpc"):
+        print(
+            'unexpected protocol "{}", expects "http" or "grpc"'.format(FLAGS.protocol)
+        )
+        exit(1)
+
+    client_util = httpclient if FLAGS.protocol == "http" else grpcclient
+
+    if FLAGS.url is None:
+        FLAGS.url = "localhost:8000" if FLAGS.protocol == "http" else "localhost:8001"
+
+    # Run async requests to make sure backend handles request batches
+    # correctly. We use just HTTP for this since we are not testing the
+    # protocol anyway.
+    if FLAGS.protocol == "http":
+        model_name = "identity_uint32"
+        request_parallelism = 4
+        shape = [2, 2]
+        with client_util.InferenceServerClient(
+            FLAGS.url, concurrency=request_parallelism, verbose=FLAGS.verbose
+        ) as client:
+            input_datas = []
+            requests = []
+            for i in range(request_parallelism):
+                input_data = (16384 * np.random.randn(*shape)).astype(np.uint32)
+                input_datas.append(input_data)
+                inputs = [
+                    client_util.InferInput(
+                        "INPUT0", input_data.shape, np_to_triton_dtype(input_data.dtype)
+                    )
+                ]
+                inputs[0].set_data_from_numpy(input_data)
+                requests.append(client.async_infer(model_name, inputs))
+
+            for i in range(request_parallelism):
+                # Get the result from the initiated asynchronous inference request.
+                # Note the call will block till the server responds.
+                results = requests[i].get_result()
+                print(results)
+
+                output_data = results.as_numpy("OUTPUT0")
+                if output_data is None:
+                    print("error: expected 'OUTPUT0'")
+                    sys.exit(1)
+
+                if not np.array_equal(output_data, input_datas[i]):
+                    print(
+                        "error: expected output {} to match input {}".format(
+                            output_data, input_datas[i]
+                        )
+                    )
+                    sys.exit(1)
+
+            # Make sure the requests ran in parallel.
+            stats = client.get_inference_statistics(model_name)
+            if (len(stats["model_stats"]) != 1) or (
+                stats["model_stats"][0]["name"] != model_name
+            ):
+                print("error: expected statistics for {}".format(model_name))
+                sys.exit(1)
+
+            stat = stats["model_stats"][0]
+            if (stat["inference_count"] != 8) or (stat["execution_count"] != 1):
+                print(
+                    "error: expected execution_count == 1 and inference_count == 8, got {} and {}".format(
+                        stat["execution_count"], stat["inference_count"]
+                    )
+                )
+                sys.exit(1)
+
+            # Check metrics to make sure they are reported correctly
+            metrics = httpreq.get("http://localhost:8002/metrics")
+            print(metrics.text)
+
+            success_str = (
+                'nv_inference_request_success{model="identity_uint32",version="1"}'
+            )
+            infer_count_str = 'nv_inference_count{model="identity_uint32",version="1"}'
+            infer_exec_str = (
+                'nv_inference_exec_count{model="identity_uint32",version="1"}'
+            )
+            custom_metric_str = (
+                'input_byte_size_counter{model="identity_uint32",version="1"}'
+            )
+
+            success_val = None
+            infer_count_val = None
+            infer_exec_val = None
+            custom_metric_val = None
+            for line in metrics.text.splitlines():
+                if line.startswith(success_str):
+                    success_val = float(line[len(success_str) :])
+                if line.startswith(infer_count_str):
+                    infer_count_val = float(line[len(infer_count_str) :])
+                if line.startswith(infer_exec_str):
+                    infer_exec_val = float(line[len(infer_exec_str) :])
+                if line.startswith(custom_metric_str):
+                    custom_metric_val = float(line[len(custom_metric_str) :])
+
+            if success_val != 4:
+                print(
+                    "error: expected metric {} == 4, got {}".format(
+                        success_str, success_val
+                    )
+                )
+                sys.exit(1)
+            if infer_count_val != 8:
+                print(
+                    "error: expected metric {} == 8, got {}".format(
+                        infer_count_str, infer_count_val
+                    )
+                )
+                sys.exit(1)
+            if infer_exec_val != 1:
+                print(
+                    "error: expected metric {} == 1, got {}".format(
+                        infer_exec_str, infer_exec_val
+                    )
+                )
+                sys.exit(1)
+            if custom_metric_val != 64:
+                print(
+                    "error: expected metric {} == 64, got {}".format(
+                        custom_metric_str, custom_metric_val
+                    )
+                )
+                sys.exit(1)
+
+    # Reuse a single client for all sync tests
+    with client_util.InferenceServerClient(FLAGS.url, verbose=FLAGS.verbose) as client:
+        for model_name, np_dtype, shape in (
+            # yapf: disable
+            ("identity_fp32", np.float32, [1, 0]),
+            ("identity_fp32", np.float32, [1, 5]),
+            ("identity_uint32", np.uint32, [4, 0]),
+            ("identity_uint32", np.uint32, [8, 5]),
+            ("identity_nobatch_int8", np.int8, [0]),
+            ("identity_nobatch_int8", np.int8, [7]),
+            ("identity_bytes", object, [1, 1]),
+            ("identity_bf16", np.float32, [1, 0]),
+            ("identity_bf16", np.float32, [1, 5])
+        ):
+            # yapf: enable
+            if np_dtype != object:
+                input_data = (16384 * np.random.randn(*shape)).astype(np_dtype)
+            else:
+                in0 = 16384 * np.ones(shape, dtype="int")
+                in0n = np.array([str(x) for x in in0.reshape(in0.size)], dtype=object)
+                input_data = in0n.reshape(in0.shape)
+            if model_name != "identity_bf16":
+                triton_type = np_to_triton_dtype(input_data.dtype)
+            else:
+                triton_type = "BF16"
+            inputs = [client_util.InferInput("INPUT0", input_data.shape, triton_type)]
+            inputs[0].set_data_from_numpy(input_data)
+
+            results = client.infer(model_name, inputs)
+            print(results)
+
+            # Make sure outputs are expected value
+            output_data = results.as_numpy("OUTPUT0")
+
+            if np_dtype == object:
+                output_data = np.array(
+                    [str(x, encoding="utf-8") for x in output_data.flatten()],
+                    dtype=object,
+                ).reshape(output_data.shape)
+
+            if output_data is None:
+                print("error: expected 'OUTPUT0'")
+                sys.exit(1)
+
+            if model_name == "identity_bf16":
+                if input_data.shape != output_data.shape:
+                    print(
+                        "error: expected output shape {} to match input shape {}".format(
+                            output_data.shape, input_data.shape
+                        )
+                    )
+                    sys.exit(1)
+                for input, output in zip(
+                    np.nditer(input_data, flags=["refs_ok", "zerosize_ok"], order="C"),
+                    np.nditer(output_data, flags=["refs_ok", "zerosize_ok"], order="C"),
+                ):
+                    if input.tobytes()[2:4] != output.tobytes()[2:4]:
+                        print(
+                            "error: expected low-order bits of output {} to match low-order bits of input {}".format(
+                                output, input
+                            )
+                        )
+                        sys.exit(1)
+                    if output.tobytes()[0:2] != b"\x00\x00":
+                        print(
+                            "error: expected output {} to have all-zero high-order bits, got {}".format(
+                                output, output.tobytes()[0:2]
+                            )
+                        )
+                        sys.exit(1)
+            else:
+                if not np.array_equal(output_data, input_data):
+                    print(
+                        "error: expected output {} to match input {}".format(
+                            output_data, input_data
+                        )
+                    )
+                    sys.exit(1)
+
+            # Make sure response parameters are correct
+            response = results.get_response()
+            if FLAGS.protocol == "http":
+                params = response["parameters"]
+                param0 = params["param0"]
+                param1 = params["param1"]
+                param2 = params["param2"]
+                param3 = params["param3"]
+            else:
+                params = response.parameters
+                param0 = params["param0"].string_param
+                param1 = params["param1"].int64_param
+                param2 = params["param2"].bool_param
+                param3 = params["param3"].double_param
+
+            if param0 != "an example string parameter":
+                print("error: expected 'param0' == 'an example string parameter'")
+                sys.exit(1)
+            if param1 != 42:
+                print("error: expected 'param1' == 42")
+                sys.exit(1)
+            if param2 != False:
+                print("error: expected 'param2' == False")
+                sys.exit(1)
+            if param3 != 123.123:
+                print("error: expected 'param3' == 123.123")
+                sys.exit(1)
diff --git a/qa/L0_backend_identity/test.sh b/qa/L0_backend_identity/test.sh
new file mode 100755
index 0000000000..bd29951ba6
--- /dev/null
+++ b/qa/L0_backend_identity/test.sh
@@ -0,0 +1,108 @@
+#!/bin/bash
+# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+export CUDA_VISIBLE_DEVICES=0
+
+CLIENT_PY=./identity_test.py
+CLIENT_LOG="./client.log"
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=`pwd`/all_models --log-verbose=1"
+SERVER_LOG="./inference_server.log"
+source ../common/util.sh
+
+rm -fr *.log ./all_models
+
+cp -r ./models ./all_models
+cp -r ./models/identity_fp32 ./all_models/identity_bytes
+(cd all_models/identity_bytes && \
+          sed -i "s/^name:.*/name: \"identity_bytes\"/" config.pbtxt && \
+          sed -i "s/TYPE_FP32/TYPE_STRING/g" config.pbtxt)
+cp -r ./models/identity_fp32 ./all_models/identity_nobatch_int8
+(cd all_models/identity_nobatch_int8 && \
+          sed -i "s/^name:.*/name: \"identity_nobatch_int8\"/" config.pbtxt && \
+          sed -i "s/^max_batch_size:.*/max_batch_size: 0/" config.pbtxt && \
+          sed -i "s/TYPE_FP32/TYPE_INT8/g" config.pbtxt)
+cp -r ./models/identity_fp32 ./all_models/identity_uint32
+(cd all_models/identity_uint32 && \
+          sed -i "s/^name:.*/name: \"identity_uint32\"/" config.pbtxt && \
+          sed -i "s/^max_batch_size:.*/max_batch_size: 8/" config.pbtxt && \
+          sed -i "s/TYPE_FP32/TYPE_UINT32/g" config.pbtxt && \
+          echo "dynamic_batching { preferred_batch_size: [8], max_queue_delay_microseconds: 3000000 }" >> config.pbtxt)
+cp -r ./models/identity_fp32 ./all_models/identity_bf16
+(cd all_models/identity_bf16 && \
+          sed -i "s/^name:.*/name: \"identity_bf16\"/" config.pbtxt && \
+          sed -i "s/TYPE_FP32/TYPE_BF16/g" config.pbtxt)
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+RET=0
+
+for PROTOCOL in http grpc; do
+    set +e
+    python $CLIENT_PY -i $PROTOCOL -v >>$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        echo "Failed: Client test had a non-zero return code."
+        RET=1
+    fi
+    set -e
+done
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Validate the byte_sizes reported by backend
+OLDIFS=$IFS; IFS=','
+for i in "byte_size = 0, 8", \
+         "byte_size = 7, 2", \
+         "byte_size = 16, 6", \
+         "byte_size = 20, 2", \
+         "byte_size = 160, 2" \
+         ; do set -- $i; \
+    # $SERVER_LOG is recorded as a binary file. Using -a option
+    # to correctly grep the pattern in the server log.
+    if [[ $(cat $SERVER_LOG | grep -a $1 | wc -l) -ne $2 ]]; then
+        echo -e "\n***\n*** Test Failed $1 $2\n***"
+        RET=1
+    fi
+done
+IFS=$OLDIFS
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $SERVER_LOG
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_backend_output_detail/test.sh b/qa/L0_backend_output_detail/test.sh
new file mode 100755
index 0000000000..a8f4de59d1
--- /dev/null
+++ b/qa/L0_backend_output_detail/test.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "No Repo version detected"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+export CUDA_VISIBLE_DEVICES=0
+
+rm -f *.log
+MODELSDIR=`pwd`/models
+rm -fr $MODELSDIR && mkdir -p $MODELSDIR/add_sub/1 && \
+    cp  ../python_models/add_sub/config.pbtxt $MODELSDIR/add_sub && \
+    cp  ../python_models/add_sub/model.py $MODELSDIR/add_sub/1 && \
+
+source ../common/util.sh
+
+RET=0
+
+TEST_LOG="./backend_output_detail_test.log"
+TEST_EXEC=./backend_output_detail_test
+
+set +e
+LD_LIBRARY_PATH=/opt/tritonserver/lib:$LD_LIBRARY_PATH $TEST_EXEC >>$TEST_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Backend Output Detail Unit Test Failed\n***"
+    RET=1
+fi
+set -e
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $TEST_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_backend_python/argument_validation/models/argument_validation/1/model.py b/qa/L0_backend_python/argument_validation/models/argument_validation/1/model.py
new file mode 100644
index 0000000000..e03b1878bc
--- /dev/null
+++ b/qa/L0_backend_python/argument_validation/models/argument_validation/1/model.py
@@ -0,0 +1,243 @@
+# Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import unittest
+
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+
+class ArgumentValidationTest(unittest.TestCase):
+    def test_infer_request_args(self):
+        # Dummy arguments used in the tests.
+        inputs = [pb_utils.Tensor("INPUT0", np.asarray([1, 2], dtype=np.int32))]
+        model_name = "my_model"
+        requested_output_names = ["my_output"]
+
+        #
+        # inputs field validation
+        #
+
+        # Test list of None as inputs
+        with self.assertRaises(pb_utils.TritonModelException) as e:
+            pb_utils.InferenceRequest(
+                inputs=[None],
+                model_name=model_name,
+                requested_output_names=requested_output_names,
+            )
+
+        # Test None object as list of inputs
+        with self.assertRaises(TypeError) as e:
+            pb_utils.InferenceRequest(
+                inputs=None,
+                model_name=model_name,
+                requested_output_names=requested_output_names,
+            )
+
+        # model_name validation
+        with self.assertRaises(TypeError) as e:
+            pb_utils.InferenceRequest(
+                model_name=None,
+                inputs=inputs,
+                requested_output_names=requested_output_names,
+            )
+
+        #
+        # Requested output name validations
+        #
+
+        # Test list of None objects as requested_output_names
+        with self.assertRaises(TypeError) as e:
+            pb_utils.InferenceRequest(
+                requested_output_names=[None], inputs=inputs, model_name=model_name
+            )
+
+        with self.assertRaises(TypeError) as e:
+            pb_utils.InferenceRequest(
+                requested_output_names=None, inputs=inputs, model_name=model_name
+            )
+
+        # Other arguments validation
+
+        # correlation_id set to None
+        with self.assertRaises(TypeError) as e:
+            pb_utils.InferenceRequest(
+                requested_output_names=requested_output_names,
+                inputs=inputs,
+                model_name=model_name,
+                correlation_id=None,
+            )
+
+        # correlation_id set to an integer
+        infer_request_test = pb_utils.InferenceRequest(
+            requested_output_names=requested_output_names,
+            inputs=inputs,
+            model_name=model_name,
+            correlation_id=5,
+        )
+        self.assertIsInstance(infer_request_test.correlation_id(), int)
+        self.assertEqual(infer_request_test.correlation_id(), 5)
+
+        # correlation_id set to string
+        infer_request_test = pb_utils.InferenceRequest(
+            requested_output_names=requested_output_names,
+            inputs=inputs,
+            model_name=model_name,
+            correlation_id="test_str_id-5",
+        )
+        self.assertIsInstance(infer_request_test.correlation_id(), str)
+        self.assertEqual(infer_request_test.correlation_id(), "test_str_id-5")
+
+        # correlation_id default
+        infer_request_test = pb_utils.InferenceRequest(
+            requested_output_names=requested_output_names,
+            inputs=inputs,
+            model_name=model_name,
+        )
+        self.assertIsInstance(infer_request_test.correlation_id(), int)
+        self.assertEqual(infer_request_test.correlation_id(), 0)
+
+        # request_id set to None
+        with self.assertRaises(TypeError) as e:
+            pb_utils.InferenceRequest(
+                requested_output_names=requested_output_names,
+                inputs=inputs,
+                model_name=model_name,
+                request_id=None,
+            )
+
+        # model_version set to None
+        with self.assertRaises(TypeError) as e:
+            pb_utils.InferenceRequest(
+                requested_output_names=requested_output_names,
+                inputs=inputs,
+                model_name=model_name,
+                model_version=None,
+            )
+
+        # flags set to None
+        with self.assertRaises(TypeError) as e:
+            pb_utils.InferenceRequest(
+                requested_output_names=requested_output_names,
+                inputs=inputs,
+                model_name=model_name,
+                flags=None,
+            )
+
+        # Empty lists should not raise an exception
+        pb_utils.InferenceRequest(
+            requested_output_names=[], inputs=[], model_name=model_name
+        )
+
+    def test_infer_response_args(self):
+        outputs = [pb_utils.Tensor("OUTPUT0", np.asarray([1, 2], dtype=np.int32))]
+
+        # Test list of None object as output tensor
+        with self.assertRaises(pb_utils.TritonModelException) as e:
+            pb_utils.InferenceResponse(output_tensors=[None])
+
+        # Test None as output tensors
+        with self.assertRaises(TypeError) as e:
+            pb_utils.InferenceResponse(output_tensors=None)
+
+        # This should not raise an exception
+        pb_utils.InferenceResponse(output_tensors=[])
+        pb_utils.InferenceResponse(outputs)
+
+    def test_tensor_args(self):
+        np_array = np.asarray([1, 2], dtype=np.int32)
+
+        # Test None as tensor name
+        with self.assertRaises(TypeError) as e:
+            pb_utils.Tensor(None, np_array)
+
+        # Test None as Numpy array
+        with self.assertRaises(TypeError) as e:
+            pb_utils.Tensor("OUTPUT0", None)
+
+        # Test None as dlpack capsule
+        with self.assertRaises(pb_utils.TritonModelException) as e:
+            pb_utils.Tensor.from_dlpack("OUTPUT0", None)
+
+        # Test empty string as tensor name (from_dlpack)
+        with self.assertRaises(pb_utils.TritonModelException) as e:
+            pb_utils.Tensor.from_dlpack("", None)
+
+        # Test empty string as tensor name
+        with self.assertRaises(TypeError) as e:
+            pb_utils.Tensor("", None)
+
+    def test_log_args(self):
+        logger = pb_utils.Logger
+
+        # Test None as log level setting
+        with self.assertRaises(TypeError) as e:
+            logger.log("Invalid Level", None)
+
+        # Test integer as log level setting
+        with self.assertRaises(TypeError) as e:
+            logger.log("Invalid Level", 1)
+
+        # Test None as log info msg
+        with self.assertRaises(TypeError) as e:
+            logger.log_info(None)
+
+        # Test None as log warning msg
+        with self.assertRaises(TypeError) as e:
+            logger.log_warn(None)
+
+        # Test None as log error msg
+        with self.assertRaises(TypeError) as e:
+            logger.log_error(None)
+
+        # Test None as log verbose msg
+        with self.assertRaises(TypeError) as e:
+            logger.log_verbose(None)
+
+        # This should not raise an exception
+        logger.log("Level unspecified")
+
+
+class TritonPythonModel:
+    """This model tests the Python API arguments to make sure invalid args are
+    rejected."""
+
+    def execute(self, requests):
+        responses = []
+        for _ in requests:
+            # Run the unittest and store the results in InferenceResponse.
+            test = unittest.main("model", exit=False)
+            responses.append(
+                pb_utils.InferenceResponse(
+                    [
+                        pb_utils.Tensor(
+                            "OUTPUT0",
+                            np.array([test.result.wasSuccessful()], dtype=np.float16),
+                        )
+                    ]
+                )
+            )
+        return responses
diff --git a/qa/L0_backend_python/argument_validation/models/argument_validation/config.pbtxt b/qa/L0_backend_python/argument_validation/models/argument_validation/config.pbtxt
new file mode 100644
index 0000000000..4c02983319
--- /dev/null
+++ b/qa/L0_backend_python/argument_validation/models/argument_validation/config.pbtxt
@@ -0,0 +1,39 @@
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "argument_validation"
+backend: "python"
+max_batch_size: 0
+
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+
+instance_group [{ kind: KIND_CPU }]
diff --git a/qa/L0_backend_python/argument_validation/test.sh b/qa/L0_backend_python/argument_validation/test.sh
new file mode 100755
index 0000000000..b14ba4abb3
--- /dev/null
+++ b/qa/L0_backend_python/argument_validation/test.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+# Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+CLIENT_PY=../python_unittest.py
+CLIENT_LOG="./arg_validation_client.log"
+TEST_RESULT_FILE='test_results.txt'
+SERVER_ARGS="--model-repository=${MODELDIR}/argument_validation/models --backend-directory=${BACKEND_DIR} --log-verbose=1"
+SERVER_LOG="./arg_validation_server.log"
+
+RET=0
+source ../../common/util.sh
+
+rm -fr *.log
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    RET=1
+fi
+
+set +e
+export MODEL_NAME="argument_validation"
+python3 -m pytest --junitxml="${MODEL_NAME}.report.xml" $CLIENT_PY >> $CLIENT_LOG 2>&1
+
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** python_unittest.py FAILED. \n***"
+    RET=1
+fi
+set -e
+
+kill_server
+
+if [ $RET -eq 1 ]; then
+    cat $CLIENT_LOG
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Argument validation test FAILED. \n***"
+else
+    echo -e "\n***\n*** Argument validation test PASSED. \n***"
+fi
+
+exit $RET
diff --git a/qa/L0_backend_python/async_execute/concurrency_test.py b/qa/L0_backend_python/async_execute/concurrency_test.py
new file mode 100644
index 0000000000..bc5f31650b
--- /dev/null
+++ b/qa/L0_backend_python/async_execute/concurrency_test.py
@@ -0,0 +1,161 @@
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import time
+import unittest
+
+import numpy as np
+import tritonclient.grpc as grpcclient
+
+
+class ConcurrencyTest(unittest.TestCase):
+    def setUp(self):
+        # Initialize client
+        self._triton = grpcclient.InferenceServerClient("localhost:8001")
+
+    def _generate_streaming_callback_and_response_pair(self):
+        response = []  # [{"result": result, "error": error}, ...]
+
+        def callback(result, error):
+            response.append({"result": result, "error": error})
+
+        return callback, response
+
+    # Helper for testing concurrent execution
+    def _concurrent_execute_requests(self, model_name, batch_size, number_of_requests):
+        delay_secs = 4
+        shape = [batch_size, 1]
+        inputs = [grpcclient.InferInput("WAIT_SECONDS", shape, "FP32")]
+        inputs[0].set_data_from_numpy(np.full(shape, delay_secs, dtype=np.float32))
+
+        callback, response = self._generate_streaming_callback_and_response_pair()
+        self._triton.start_stream(callback)
+        for i in range(number_of_requests):
+            self._triton.async_stream_infer(model_name, inputs)
+
+        # 2s for sending requests for processing and 2s for returning results.
+        wait_secs = 2 + delay_secs + 2
+        time.sleep(wait_secs)
+        # Ensure the sleep is shorter than sequential processing delay.
+        sequential_min_delay = wait_secs * batch_size * number_of_requests
+        self.assertLessEqual(wait_secs, sequential_min_delay)
+
+        # If executed sequentially, the results are not available yet, so concurrent
+        # execution is observed from seeing the correct responses.
+        self.assertEqual(len(response), number_of_requests)
+        for res in response:
+            self.assertEqual(res["result"].as_numpy("DUMMY_OUT").shape[0], batch_size)
+            self.assertIsNone(res["error"])
+
+        self._triton.stop_stream()
+
+    # Test batched requests are executed concurrently
+    def test_concurrent_execute_single_request(self):
+        self._concurrent_execute_requests(
+            model_name="async_execute_decouple", batch_size=4, number_of_requests=1
+        )
+
+    # Test multiple requests are executed concurrently
+    def test_concurrent_execute_multi_request(self):
+        self._concurrent_execute_requests(
+            model_name="async_execute_decouple", batch_size=1, number_of_requests=4
+        )
+
+    # Test batched requests are executed concurrently via bls
+    def test_concurrent_execute_single_request_bls(self):
+        self._concurrent_execute_requests(
+            model_name="async_execute_decouple_bls", batch_size=4, number_of_requests=1
+        )
+
+    # Test multiple requests are executed concurrently via bls
+    def test_concurrent_execute_multi_request_bls(self):
+        self._concurrent_execute_requests(
+            model_name="async_execute_decouple_bls", batch_size=1, number_of_requests=4
+        )
+
+    # Test requests with a shorter duration should return first
+    def test_concurrent_execute_different_duration(self):
+        model_name = "async_execute_decouple"
+        callback, response = self._generate_streaming_callback_and_response_pair()
+        self._triton.start_stream(callback)
+
+        # Send 2 requests / delays
+        shape = [1, 1]
+        for delay_secs in [10, 2]:
+            inputs = [grpcclient.InferInput("WAIT_SECONDS", shape, "FP32")]
+            inputs[0].set_data_from_numpy(np.full(shape, delay_secs, dtype=np.float32))
+            self._triton.async_stream_infer(model_name, inputs)
+            time.sleep(2)  # leave a gap after each inference
+            shape[0] += 1  # batch size to track request id
+
+        # The last request executes for 2 secs, leave an additional 2 secs for sending
+        # the request and 2 secs for receiving its response. Since 2 secs has elapsed
+        # after sending the request, wait for another 4 secs.
+        time.sleep(4)
+        # The response of the last request should be available by now, while the first
+        # request executes for 10 secs and only 8 secs has elapsed, so its response
+        # should not be available by now.
+        self.assertEqual(len(response), 1)
+        self.assertEqual(response[0]["result"].as_numpy("DUMMY_OUT").shape[0], 2)
+        self.assertIsNone(response[0]["error"])
+
+        # The first request executes for 10 secs, leave an additional 2 secs for sending
+        # the request and 2 secs for receiving its response. Since 8 secs has elapsed
+        # after sending the request, wait for another 6 secs.
+        time.sleep(6)
+        # The response of the first request should be available by now.
+        self.assertEqual(len(response), 2)
+        self.assertEqual(response[1]["result"].as_numpy("DUMMY_OUT").shape[0], 1)
+        self.assertIsNone(response[1]["error"])
+
+        self._triton.stop_stream()
+
+    # Test model exception handling
+    def test_model_raise_exception(self):
+        model_name = "async_execute_decouple"
+        delay_secs = -1  # model will raise exception
+        shape = [1, 1]
+        inputs = [grpcclient.InferInput("WAIT_SECONDS", shape, "FP32")]
+        inputs[0].set_data_from_numpy(np.full(shape, delay_secs, dtype=np.float32))
+
+        with open(os.environ["SERVER_LOG"]) as f:
+            server_log = f.read()
+        self.assertNotIn("ValueError: wait_secs cannot be negative", server_log)
+
+        callback, response = self._generate_streaming_callback_and_response_pair()
+        self._triton.start_stream(callback)
+        self._triton.async_stream_infer(model_name, inputs)
+        time.sleep(2)
+        self._triton.stop_stream()
+
+        with open(os.environ["SERVER_LOG"]) as f:
+            server_log = f.read()
+        self.assertIn("ValueError: wait_secs cannot be negative", server_log)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_backend_python/async_execute/test.sh b/qa/L0_backend_python/async_execute/test.sh
new file mode 100755
index 0000000000..b52c2bffa5
--- /dev/null
+++ b/qa/L0_backend_python/async_execute/test.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+source ../../common/util.sh
+
+RET=0
+
+#
+# Test execution overlapping on the same instance
+#
+rm -rf models && mkdir models
+mkdir -p models/async_execute_decouple/1 && \
+    cp ../../python_models/async_execute_decouple/model.py models/async_execute_decouple/1 && \
+    cp ../../python_models/async_execute_decouple/config.pbtxt models/async_execute_decouple
+mkdir -p models/async_execute_decouple_bls/1 && \
+    cp ../../python_models/async_execute_decouple_bls/model.py models/async_execute_decouple_bls/1 && \
+    cp ../../python_models/async_execute_decouple_bls/config.pbtxt models/async_execute_decouple_bls
+
+TEST_LOG="concurrency_test.log"
+SERVER_LOG="concurrency_test.server.log"
+SERVER_ARGS="--model-repository=${MODELDIR}/async_execute/models --backend-directory=${BACKEND_DIR} --log-verbose=1"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+SERVER_LOG=$SERVER_LOG python3 -m pytest --junitxml=concurrency_test.report.xml concurrency_test.py > $TEST_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** async execute concurrency test FAILED\n***"
+    cat $TEST_LOG
+    RET=1
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 1 ]; then
+    echo -e "\n***\n*** Async execute test FAILED\n***"
+else
+    echo -e "\n***\n*** Async execute test Passed\n***"
+fi
+exit $RET
diff --git a/qa/L0_backend_python/bls/bls_parameters_test.py b/qa/L0_backend_python/bls/bls_parameters_test.py
new file mode 100755
index 0000000000..e8fe7dfa81
--- /dev/null
+++ b/qa/L0_backend_python/bls/bls_parameters_test.py
@@ -0,0 +1,76 @@
+#!/usr/bin/env python3
+
+# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+import os
+import unittest
+
+import numpy as np
+import tritonclient.grpc as grpcclient
+from tritonclient.utils import np_to_triton_dtype
+
+# By default, find tritonserver on "localhost", but for windows tests
+# we overwrite the IP address with the TRITONSERVER_IPADDR envvar
+_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")
+
+
+class TestBlsParameters(unittest.TestCase):
+    def test_bls_parameters(self):
+        model_name = "bls_parameters"
+        shape = [1]
+        num_params = 3
+
+        # Based on the num_params specified, the model will generate a JSON response
+        # containing all the supported parameter types for num_params times recursively.
+        # Make sure the model has at least num_params + 1 instances.
+        expected_params = {}
+        for i in range(1, num_params + 1):
+            expected_params["bool_" + str(i)] = bool(i)
+            expected_params["int_" + str(i)] = i
+            expected_params["str_" + str(i)] = str(i)
+
+        with grpcclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8001") as client:
+            input_data = np.array([num_params], dtype=np.ubyte)
+            inputs = [
+                grpcclient.InferInput(
+                    "NUMBER_PARAMETERS", shape, np_to_triton_dtype(input_data.dtype)
+                )
+            ]
+            inputs[0].set_data_from_numpy(input_data)
+            outputs = [grpcclient.InferRequestedOutput("PARAMETERS_AGGREGATED")]
+            result = client.infer(model_name, inputs, outputs=outputs)
+            params_json = str(
+                result.as_numpy("PARAMETERS_AGGREGATED")[0], encoding="utf-8"
+            )
+
+        params = json.loads(params_json)
+        self.assertEqual(params, expected_params)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_backend_python/bls/test.sh b/qa/L0_backend_python/bls/test.sh
new file mode 100755
index 0000000000..f4435eacaa
--- /dev/null
+++ b/qa/L0_backend_python/bls/test.sh
@@ -0,0 +1,350 @@
+#!/bin/bash
+# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+CLIENT_PY=../python_unittest.py
+CLIENT_LOG="./bls_client.log"
+TEST_RESULT_FILE='test_results.txt'
+source ../../common/util.sh
+
+TRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION:=http://github.com/triton-inference-server}
+
+RET=0
+rm -fr *.log ./models *.txt
+
+# FIXME: [DLIS-5970] Until Windows supports GPU tensors, only test CPU
+if [[ ${TEST_WINDOWS} == 0 ]]; then
+    pip3 uninstall -y torch
+    pip3 install torch==1.13.0+cu117 -f https://download.pytorch.org/whl/torch_stable.html
+
+    mkdir -p models/bls/1/
+    cp ../../python_models/bls/model.py models/bls/1/
+    cp ../../python_models/bls/config.pbtxt models/bls
+
+    mkdir -p models/dlpack_add_sub/1/
+    cp ../../python_models/dlpack_add_sub/model.py models/dlpack_add_sub/1/
+    cp ../../python_models/dlpack_add_sub/config.pbtxt models/dlpack_add_sub
+
+    mkdir -p models/bls_async/1/
+    cp ../../python_models/bls_async/model.py models/bls_async/1/
+    cp ../../python_models/bls_async/config.pbtxt models/bls_async
+
+    mkdir -p models/bls_memory/1/
+    cp ../../python_models/bls_memory/model.py models/bls_memory/1/
+    cp ../../python_models/bls_memory/config.pbtxt models/bls_memory
+
+    mkdir -p models/bls_memory_async/1/
+    cp ../../python_models/bls_memory_async/model.py models/bls_memory_async/1/
+    cp ../../python_models/bls_memory_async/config.pbtxt models/bls_memory_async
+
+    mkdir -p models/add_sub/1/
+    cp ../../python_models/add_sub/model.py models/add_sub/1/
+    cp ../../python_models/add_sub/config.pbtxt models/add_sub
+
+    mkdir -p models/execute_error/1/
+    cp ../../python_models/execute_error/model.py models/execute_error/1/
+    cp ../../python_models/execute_error/config.pbtxt models/execute_error
+
+    mkdir -p models/identity_fp32/1/
+    cp ../../python_models/identity_fp32/model.py models/identity_fp32/1/
+    cp ../../python_models/identity_fp32/config.pbtxt models/identity_fp32
+
+    mkdir -p models/dlpack_identity/1/
+    cp ../../python_models/dlpack_identity/model.py models/dlpack_identity/1/
+    cp ../../python_models/dlpack_identity/config.pbtxt models/dlpack_identity
+
+    cp -r ${DATADIR}/qa_sequence_implicit_model_repository/onnx_nobatch_sequence_int32/ ./models
+
+    git clone ${TRITON_REPO_ORGANIZATION}/python_backend -b $PYTHON_BACKEND_REPO_TAG
+    mkdir -p models/square_int32/1/
+    cp python_backend/examples/decoupled/square_model.py models/square_int32/1/model.py
+    cp python_backend/examples/decoupled/square_config.pbtxt models/square_int32/config.pbtxt
+
+    mkdir -p models/dlpack_square/1/
+    cp ../../python_models/dlpack_square/model.py models/dlpack_square/1/
+    cp ../../python_models/dlpack_square/config.pbtxt models/dlpack_square
+
+    mkdir -p models/identity_fp32_timeout/1/
+    cp ../../python_models/identity_fp32_timeout/model.py models/identity_fp32_timeout/1/
+    cp ../../python_models/identity_fp32_timeout/config.pbtxt models/identity_fp32_timeout
+
+    cp -r ${DATADIR}/qa_model_repository/libtorch_nobatch_float32_float32_float32/ ./models/libtorch_gpu && \
+        sed -i 's/libtorch_nobatch_float32_float32_float32/libtorch_gpu/' models/libtorch_gpu/config.pbtxt && \
+        echo "instance_group [ { kind: KIND_GPU} ]" >> models/libtorch_gpu/config.pbtxt
+
+    cp -r ${DATADIR}/qa_model_repository/libtorch_nobatch_float32_float32_float32/ ./models/libtorch_cpu && \
+        sed -i 's/libtorch_nobatch_float32_float32_float32/libtorch_cpu/' models/libtorch_cpu/config.pbtxt && \
+        echo "instance_group [ { kind: KIND_CPU} ]" >> models/libtorch_cpu/config.pbtxt
+
+    # Test with different sizes of CUDA memory pool
+    for CUDA_MEMORY_POOL_SIZE_MB in 64 128 ; do
+        CUDA_MEMORY_POOL_SIZE_BYTES=$((CUDA_MEMORY_POOL_SIZE_MB * 1024 * 1024))
+        SERVER_ARGS="--model-repository=${MODELDIR}/bls/models --backend-directory=${BACKEND_DIR} --log-verbose=1 --cuda-memory-pool-byte-size=0:${CUDA_MEMORY_POOL_SIZE_BYTES}"
+        for TRIAL in non_decoupled decoupled ; do
+            export BLS_KIND=${TRIAL}
+            SERVER_LOG="./bls_${TRIAL}.${CUDA_MEMORY_POOL_SIZE_MB}.inference_server.log"
+
+            run_server
+            if [ "$SERVER_PID" == "0" ]; then
+                echo -e "\n***\n*** Failed to start $SERVER\n***"
+                cat $SERVER_LOG
+                exit 1
+            fi
+
+            set +e
+
+            for MODEL_NAME in bls bls_memory bls_memory_async bls_async; do
+                export MODEL_NAME=${MODEL_NAME}
+
+                python3 -m pytest --junitxml="${MODEL_NAME}.${TRIAL}.${CUDA_MEMORY_POOL_SIZE_MB}.report.xml" $CLIENT_PY >> $CLIENT_LOG 2>&1
+                if [ $? -ne 0 ]; then
+                    echo -e "\n***\n*** ${MODEL_NAME} ${BLS_KIND} test FAILED. \n***"
+                    cat $SERVER_LOG
+                    cat $CLIENT_LOG
+                    RET=1
+                fi
+            done
+
+            set -e
+
+            kill_server
+
+            # Check for bls 'test_timeout' to ensure timeout value is being correctly passed
+            if [ `grep -c "Request timeout: 11000000000" $SERVER_LOG` == "0" ]; then
+                echo -e "\n***\n*** BLS timeout value not correctly passed to model: line ${LINENO}\n***"
+                cat $SERVER_LOG
+                RET=1
+            fi
+
+            if [[ $CUDA_MEMORY_POOL_SIZE_MB -eq 128 ]]; then
+                if [ `grep -c "Failed to allocate memory from CUDA memory pool" $SERVER_LOG` != "0" ]; then
+                    echo -e "\n***\n*** Expected to use CUDA memory pool for all tests when CUDA_MEMOY_POOL_SIZE_MB is 128 MB for 'bls' $BLS_KIND test\n***"
+                    cat $SERVER_LOG
+                    RET=1
+                fi
+            fi
+        done
+    done
+fi
+
+SERVER_ARGS="--model-repository=${MODELDIR}/bls/models --backend-directory=${BACKEND_DIR} --log-verbose=1"
+# Test error handling when BLS is used in "initialize" or "finalize" function
+ERROR_MESSAGE="BLS is only supported during the 'execute' function."
+
+rm -fr ./models
+mkdir -p models/bls_init_error/1/
+cp ../../python_models/bls_init_error/model.py models/bls_init_error/1/
+cp ../../python_models/bls_init_error/config.pbtxt models/bls_init_error
+SERVER_LOG="./bls_init_error_server.log"
+# This variable is used to print out the correct server log for each sub-test.
+SUB_TEST_RET=0
+
+run_server
+if [ "$SERVER_PID" != "0" ]; then
+    echo -e "*** FAILED: unexpected success starting $SERVER" >> $CLIENT_LOG
+    RET=1
+    SUB_TEST_RET=1
+    kill_server
+else
+    if grep "$ERROR_MESSAGE" $SERVER_LOG; then
+        echo -e "Found \"$ERROR_MESSAGE\"" >> $CLIENT_LOG
+    else
+        echo -e "Not found \"$ERROR_MESSAGE\"" >> $CLIENT_LOG
+        RET=1
+        SUB_TEST_RET=1
+    fi
+fi
+
+if [ $SUB_TEST_RET -eq 1 ]; then
+    cat $CLIENT_LOG
+    cat $SERVER_LOG
+fi
+
+# FIXME: [DLIS-6122] Requires support for model load/unload
+# Until we can simulate Ctrl^C bls_finialize_error will not pass.
+if [[ ${TEST_WINDOWS} == 0 ]]; then
+    rm -fr ./models
+    mkdir -p models/bls_finalize_error/1/
+    cp ../../python_models/bls_finalize_error/model.py models/bls_finalize_error/1/
+    cp ../../python_models/bls_finalize_error/config.pbtxt models/bls_finalize_error/
+    SERVER_LOG="./bls_finalize_error_server.log"
+    SUB_TEST_RET=0
+
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        RET=1
+    else
+        kill_server
+
+        if grep "$ERROR_MESSAGE" $SERVER_LOG; then
+            echo -e "Found \"$ERROR_MESSAGE\"" >> $CLIENT_LOG
+        else
+            echo -e "Not found \"$ERROR_MESSAGE\"" >> $CLIENT_LOG
+            RET=1
+            SUB_TEST_RET=1
+        fi
+
+        if [ $SUB_TEST_RET -eq 1 ]; then
+            cat $CLIENT_LOG
+            cat $SERVER_LOG
+        fi
+    fi
+
+    # Test model loading API with BLS
+    SUB_TEST_RET=0
+    rm -fr ./models
+    mkdir -p models/bls_model_loading/1/
+    cp ../../python_models/bls_model_loading/model.py models/bls_model_loading/1/
+    cp ../../python_models/bls_model_loading/config.pbtxt models/bls_model_loading/
+    cp -fr ${DATADIR}/qa_model_repository/onnx_int32_int32_int32 models/.
+    # Make only version 2, 3 is valid version directory
+    rm -rf models/onnx_int32_int32_int32/1
+
+    SERVER_LOG="./bls_model_loading_server.log"
+    SERVER_ARGS="--model-repository=${MODELDIR}/bls/models --backend-directory=${BACKEND_DIR} --model-control-mode=explicit --log-verbose=1"
+
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        RET=1
+    else
+        export MODEL_NAME='bls_model_loading'
+
+        set +e
+        code=`curl -s -w %{http_code} -X POST ${TRITONSERVER_IPADDR}:8000/v2/repository/models/${MODEL_NAME}/load`
+        set -e
+        if [ "$code" == "400" ]; then
+            echo -e "\n***\n*** Failed to load model '${MODEL_NAME}'\n***"
+            RET=1
+            SUB_TEST_RET=1
+        fi
+
+        set +e
+
+        python3 -m pytest --junitxml="${MODEL_NAME}.report.xml" $CLIENT_PY >> $CLIENT_LOG 2>&1
+        if [ $? -ne 0 ]; then
+            echo -e "\n***\n*** 'bls_model_loading' test FAILED. \n***"
+            cat $CLIENT_LOG
+            RET=1
+            SUB_TEST_RET=1
+    fi
+
+        set -e
+
+        kill_server
+
+        if [ $SUB_TEST_RET -eq 1 ]; then
+            cat $CLIENT_LOG
+            cat $SERVER_LOG
+        fi
+    fi
+
+    # Test model loading API with BLS warmup
+    (cd models/bls_model_loading && \
+            echo "model_warmup [{" >> config.pbtxt && \
+            echo "    name : \"regular sample\"" >> config.pbtxt && \
+            echo "    batch_size: 1" >> config.pbtxt && \
+            echo "    inputs {" >> config.pbtxt && \
+            echo "        key: \"INPUT0\"" >> config.pbtxt && \
+            echo "        value: {" >> config.pbtxt && \
+            echo "            data_type: TYPE_FP32" >> config.pbtxt && \
+            echo "            dims: 4" >> config.pbtxt && \
+            echo "            zero_data: false" >> config.pbtxt && \
+            echo "        }" >> config.pbtxt && \
+            echo "    }" >> config.pbtxt && \
+            echo "    inputs {" >> config.pbtxt && \
+            echo "        key: \"INPUT1\"" >> config.pbtxt && \
+            echo "        value: {" >> config.pbtxt && \
+            echo "            data_type: TYPE_FP32" >> config.pbtxt && \
+            echo "            dims: 4" >> config.pbtxt && \
+            echo "            zero_data: false" >> config.pbtxt && \
+            echo "        }" >> config.pbtxt && \
+            echo "    }" >> config.pbtxt && \
+            echo "}]" >> config.pbtxt )
+
+    SUB_TEST_RET=0
+    SERVER_LOG="./bls_model_loading_server_warmup.log"
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        RET=1
+    else
+        set +e
+        code=`curl -s -w %{http_code} -X POST ${TRITONSERVER_IPADDR}:8000/v2/repository/models/${MODEL_NAME}/load`
+        set -e
+        if [ "$code" == "400" ]; then
+            echo -e "\n***\n*** Failed to load model '${MODEL_NAME}'\n***"
+            RET=1
+            SUB_TEST_RET=1
+        fi
+
+        kill_server
+
+        if [ $SUB_TEST_RET -eq 1 ]; then
+            cat $CLIENT_LOG
+            cat $SERVER_LOG
+        fi
+    fi
+fi
+
+# Test BLS parameters
+rm -rf params_models && mkdir -p params_models/bls_parameters/1
+cp ../../python_models/bls_parameters/model.py ./params_models/bls_parameters/1
+cp ../../python_models/bls_parameters/config.pbtxt ./params_models/bls_parameters
+
+TEST_LOG="./bls_parameters.log"
+SERVER_LOG="./bls_parameters.server.log"
+
+SERVER_ARGS="--model-repository=${MODELDIR}/bls/params_models --backend-directory=${BACKEND_DIR} --log-verbose=1"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python3 bls_parameters_test.py > $TEST_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** bls_parameters_test.py FAILED. \n***"
+    cat $TEST_LOG
+    RET=1
+fi
+set -e
+
+kill_server
+
+if [ $RET -eq 1 ]; then
+    echo -e "\n***\n*** BLS test FAILED. \n***"
+else
+    echo -e "\n***\n*** BLS test PASSED. \n***"
+fi
+
+exit $RET
diff --git a/qa/L0_backend_python/common.sh b/qa/L0_backend_python/common.sh
new file mode 100755
index 0000000000..ca1ae4a7bc
--- /dev/null
+++ b/qa/L0_backend_python/common.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+get_shm_pages() {
+  shm_pages=(`ls /dev/shm`)
+  echo ${#shm_pages[@]}
+}
+
+install_conda() {
+  rm -rf ./miniconda
+  file_name="Miniconda3-py310_23.11.0-2-Linux-x86_64.sh"
+  wget https://repo.anaconda.com/miniconda/$file_name
+
+  # install miniconda in silent mode
+  bash $file_name -p ./miniconda -b
+
+  # activate conda
+  eval "$(./miniconda/bin/conda shell.bash hook)"
+}
+
+install_build_deps() {
+  apt update && apt install software-properties-common rapidjson-dev -y
+  # Using CMAKE installation instruction from:: https://apt.kitware.com/
+  apt update -q=2 \
+    && apt install -y gpg wget \
+    && wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - |  tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null \
+    && . /etc/os-release \
+    && echo "deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ $UBUNTU_CODENAME main" | tee /etc/apt/sources.list.d/kitware.list >/dev/null \
+    && apt-get update -q=2 \
+    && apt-get install -y --no-install-recommends cmake=3.27.7* cmake-data=3.27.7*
+}
+
+create_conda_env() {
+  local python_version=$1
+  local env_name=$2
+  conda create -n $env_name python=$python_version -y
+  conda activate $env_name
+  conda install -c conda-forge conda-pack -y
+}
+
+create_conda_env_with_specified_path() {
+  local python_version=$1
+  local env_path=$2
+  conda create -p $env_path python=$python_version -y
+  conda activate $env_path
+  conda install -c conda-forge conda-pack -y
+}
+
+create_python_backend_stub() {
+  rm -rf python_backend
+  git clone ${TRITON_REPO_ORGANIZATION}/python_backend -b $PYTHON_BACKEND_REPO_TAG
+  (cd python_backend/ && mkdir builddir && cd builddir && \
+  cmake -DTRITON_ENABLE_GPU=ON -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} -DTRITON_BACKEND_REPO_TAG=$TRITON_BACKEND_REPO_TAG -DTRITON_COMMON_REPO_TAG=$TRITON_COMMON_REPO_TAG -DTRITON_CORE_REPO_TAG=$TRITON_CORE_REPO_TAG ../ && \
+  make -j18 triton-python-backend-stub)
+}
diff --git a/qa/L0_backend_python/custom_metrics/test.sh b/qa/L0_backend_python/custom_metrics/test.sh
new file mode 100755
index 0000000000..4491d9e030
--- /dev/null
+++ b/qa/L0_backend_python/custom_metrics/test.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+CLIENT_PY=../python_unittest.py
+CLIENT_LOG="./custom_metrics_client.log"
+TEST_RESULT_FILE='test_results.txt'
+source ../../common/util.sh
+
+SERVER_ARGS="--model-repository=${MODELDIR}/custom_metrics/models --backend-directory=${BACKEND_DIR} --log-verbose=1"
+SERVER_LOG="./custom_metrics_server.log"
+
+RET=0
+rm -fr *.log ./models *.txt
+
+mkdir -p models/custom_metrics/1/
+cp ../../python_models/custom_metrics/model.py models/custom_metrics/1/
+cp ../../python_models/custom_metrics/config.pbtxt models/custom_metrics
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+export MODEL_NAME='custom_metrics'
+python3 -m pytest --junitxml="${MODEL_NAME}.report.xml" $CLIENT_PY >> $CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** 'Custom Metrics' test FAILED. \n***"
+    cat $CLIENT_LOG
+    RET=1
+fi
+
+set -e
+
+kill_server
+
+
+if [ $RET -eq 1 ]; then
+    cat $CLIENT_LOG
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Custom Metrics test FAILED. \n***"
+else
+    echo -e "\n***\n*** Custom Metrics test PASSED. \n***"
+fi
+
+exit $RET
diff --git a/qa/L0_backend_python/decoupled/decoupled_test.py b/qa/L0_backend_python/decoupled/decoupled_test.py
new file mode 100755
index 0000000000..c819554ce7
--- /dev/null
+++ b/qa/L0_backend_python/decoupled/decoupled_test.py
@@ -0,0 +1,361 @@
+#!/usr/bin/env python3
+
+# Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import sys
+
+sys.path.append("../../common")
+
+import queue
+import time
+import unittest
+from functools import partial
+
+import numpy as np
+import shm_util
+import tritonclient.grpc as grpcclient
+from tritonclient.utils import *
+
+# By default, find tritonserver on "localhost", but for windows tests
+# we overwrite the IP address with the TRITONSERVER_IPADDR envvar
+_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")
+
+
+class UserData:
+    def __init__(self):
+        self._completed_requests = queue.Queue()
+
+
+def callback(user_data, result, error):
+    if error:
+        user_data._completed_requests.put(error)
+    else:
+        user_data._completed_requests.put(result)
+
+
+class DecoupledTest(unittest.TestCase):
+    def setUp(self):
+        self._shm_leak_detector = shm_util.ShmLeakDetector()
+
+    def test_decoupled_execute_error(self):
+        # The decoupled_execute_error model returns an error for the first
+        # request and successfully processes the second request. This is making
+        # sure that an error in a single request does not completely fail the
+        # batch.
+
+        model_name = "decoupled_execute_error"
+        shape = [2, 2]
+        number_of_requests = 2
+        user_data = UserData()
+        with self._shm_leak_detector.Probe() as shm_probe:
+            with grpcclient.InferenceServerClient(
+                f"{_tritonserver_ipaddr}:8001"
+            ) as triton_client:
+                triton_client.start_stream(callback=partial(callback, user_data))
+
+                input_datas = []
+                for i in range(number_of_requests):
+                    input_data = np.random.randn(*shape).astype(np.float32)
+                    input_datas.append(input_data)
+                    inputs = [
+                        grpcclient.InferInput(
+                            "IN", input_data.shape, np_to_triton_dtype(input_data.dtype)
+                        )
+                    ]
+                    inputs[0].set_data_from_numpy(input_data)
+                    triton_client.async_stream_infer(
+                        model_name=model_name, inputs=inputs
+                    )
+
+                for i in range(number_of_requests):
+                    result = user_data._completed_requests.get()
+                    if i == 0:
+                        self.assertIs(type(result), InferenceServerException)
+                        continue
+
+                    print(result)
+                    output_data = result.as_numpy("OUT")
+                    self.assertIsNotNone(output_data, "error: expected 'OUT'")
+                    self.assertTrue(
+                        np.array_equal(output_data, input_datas[i]),
+                        "error: expected output {} to match input {}".format(
+                            output_data, input_datas[i]
+                        ),
+                    )
+
+    def test_decoupled_bls(self):
+        # Test combinations of BLS and decoupled API in Python backend.
+        model_name = "decoupled_bls"
+        shape = [1, 2]
+        user_data = UserData()
+        with self._shm_leak_detector.Probe() as shm_probe:
+            with grpcclient.InferenceServerClient(
+                f"{_tritonserver_ipaddr}:8001"
+            ) as triton_client:
+                triton_client.start_stream(callback=partial(callback, user_data))
+
+                input_datas = []
+                input_data = np.random.randn(*shape).astype(np.float32)
+                input_datas.append(input_data)
+                inputs = [
+                    grpcclient.InferInput(
+                        "IN", input_data.shape, np_to_triton_dtype(input_data.dtype)
+                    )
+                ]
+                inputs[0].set_data_from_numpy(input_data)
+                triton_client.async_stream_infer(model_name=model_name, inputs=inputs)
+
+                # Check the results of the decoupled model using BLS
+                def check_result(result):
+                    # Make sure the result is not an exception
+                    self.assertIsNot(type(result), InferenceServerException)
+
+                    output_data = result.as_numpy("OUT")
+                    self.assertIsNotNone(output_data, "error: expected 'OUT'")
+                    self.assertTrue(
+                        np.array_equal(output_data, input_data),
+                        "error: expected output {} to match input {}".format(
+                            output_data, input_data
+                        ),
+                    )
+
+                result = user_data._completed_requests.get()
+                check_result(result)
+
+    def test_decoupled_bls_stream(self):
+        # Test combinations of BLS and decoupled API in Python backend.
+        model_name = "decoupled_bls_stream"
+        in_values = [4, 2, 0, 1]
+        user_data = UserData()
+        with self._shm_leak_detector.Probe() as shm_probe:
+            with grpcclient.InferenceServerClient(
+                f"{_tritonserver_ipaddr}:8001"
+            ) as triton_client:
+                triton_client.start_stream(callback=partial(callback, user_data))
+                for i in range(len(in_values)):
+                    input_data = np.array([in_values[i]], dtype=np.int32)
+                    inputs = [
+                        grpcclient.InferInput(
+                            "IN", input_data.shape, np_to_triton_dtype(input_data.dtype)
+                        )
+                    ]
+                    inputs[0].set_data_from_numpy(input_data)
+                    triton_client.async_stream_infer(
+                        model_name=model_name, inputs=inputs, request_id=str(i)
+                    )
+
+                # Retrieve results...
+                recv_count = 0
+                expected_count = sum(in_values)
+                result_dict = {}
+                while recv_count < expected_count:
+                    data_item = user_data._completed_requests.get()
+                    self.assertIsNot(type(data_item), InferenceServerException)
+
+                    this_id = data_item.get_response().id
+                    if this_id not in result_dict.keys():
+                        result_dict[this_id] = []
+                    result_dict[this_id].append((recv_count, data_item))
+
+                    recv_count += 1
+                # Validate results...
+                for i in range(len(in_values)):
+                    this_id = str(i)
+                    is_received = False
+                    if this_id in result_dict.keys():
+                        is_received = True
+
+                    if in_values[i] != 0:
+                        self.assertTrue(
+                            is_received,
+                            "response for request id {} not received".format(this_id),
+                        )
+                        self.assertEqual(len(result_dict[this_id]), in_values[i])
+
+                        result_list = result_dict[this_id]
+                        expected_data = np.array([in_values[i]], dtype=np.int32)
+                        for j in range(len(result_list)):
+                            this_data = result_list[j][1].as_numpy("OUT")
+                            self.assertTrue(
+                                np.array_equal(expected_data, this_data),
+                                "error: incorrect data: expected {}, got {}".format(
+                                    expected_data, this_data
+                                ),
+                            )
+                    else:
+                        self.assertFalse(
+                            is_received,
+                            "received unexpected response for request id {}".format(
+                                this_id
+                            ),
+                        )
+
+    def test_decoupled_return_response_error(self):
+        model_name = "decoupled_return_response_error"
+        shape = [16]
+        user_data = UserData()
+        with self._shm_leak_detector.Probe() as shm_probe:
+            with grpcclient.InferenceServerClient(
+                f"{_tritonserver_ipaddr}:8001"
+            ) as client:
+                client.start_stream(callback=partial(callback, user_data))
+                input_data_0 = np.random.random(shape).astype(np.float32)
+                input_data_1 = np.random.random(shape).astype(np.float32)
+                inputs = [
+                    grpcclient.InferInput(
+                        "INPUT0",
+                        input_data_0.shape,
+                        np_to_triton_dtype(input_data_0.dtype),
+                    ),
+                    grpcclient.InferInput(
+                        "INPUT1",
+                        input_data_1.shape,
+                        np_to_triton_dtype(input_data_1.dtype),
+                    ),
+                ]
+                inputs[0].set_data_from_numpy(input_data_0)
+                inputs[1].set_data_from_numpy(input_data_1)
+                client.async_stream_infer(model_name=model_name, inputs=inputs)
+                data_item = user_data._completed_requests.get()
+                if type(data_item) == InferenceServerException:
+                    self.assertEqual(
+                        data_item.message(),
+                        "Python model 'decoupled_return_response_error_0_0' is using "
+                        "the decoupled mode and the execute function must return "
+                        "None.",
+                        "Exception message didn't match.",
+                    )
+
+    def test_decoupled_send_after_close_error(self):
+        model_name = "decoupled_send_after_close_error"
+        shape = [16]
+        user_data = UserData()
+        with self._shm_leak_detector.Probe() as shm_probe:
+            with grpcclient.InferenceServerClient(
+                f"{_tritonserver_ipaddr}:8001"
+            ) as client:
+                client.start_stream(callback=partial(callback, user_data))
+                input_data_0 = np.random.random(shape).astype(np.float32)
+                input_data_1 = np.random.random(shape).astype(np.float32)
+                inputs = [
+                    grpcclient.InferInput(
+                        "INPUT0",
+                        input_data_0.shape,
+                        np_to_triton_dtype(input_data_0.dtype),
+                    ),
+                    grpcclient.InferInput(
+                        "INPUT1",
+                        input_data_1.shape,
+                        np_to_triton_dtype(input_data_1.dtype),
+                    ),
+                ]
+                inputs[0].set_data_from_numpy(input_data_0)
+                inputs[1].set_data_from_numpy(input_data_1)
+                client.async_stream_infer(model_name=model_name, inputs=inputs)
+
+                # Because the model has closed the response sender there is no
+                # way to deliver the error message to the client. The error
+                # will be logged on the server side.
+                time.sleep(4)
+                self.assertEqual(
+                    user_data._completed_requests.qsize(),
+                    0,
+                    "The completed request size must be zero.",
+                )
+
+    def test_decoupled_execute_cancel(self):
+        model_name = "execute_cancel"
+        log_path = "decoupled_server.log"
+        execute_delay = 4.0  # seconds
+        shape = [1, 1]
+        user_data = UserData()
+
+        with self._shm_leak_detector.Probe() as shm_probe:
+            with grpcclient.InferenceServerClient(
+                f"{_tritonserver_ipaddr}:8001"
+            ) as client:
+                client.start_stream(callback=partial(callback, user_data))
+                input_data = np.array([[execute_delay]], dtype=np.float32)
+                inputs = [
+                    grpcclient.InferInput(
+                        "EXECUTE_DELAY", shape, np_to_triton_dtype(input_data.dtype)
+                    )
+                ]
+                inputs[0].set_data_from_numpy(input_data)
+                client.async_stream_infer(model_name, inputs)
+                time.sleep(2)  # model delay for decoupling request and response sender
+                time.sleep(2)  # ensure the request is executing
+                client.stop_stream(cancel_requests=True)
+                time.sleep(2)  # ensure the cancellation is delivered
+
+            self.assertFalse(user_data._completed_requests.empty())
+            while not user_data._completed_requests.empty():
+                data_item = user_data._completed_requests.get()
+                self.assertIsInstance(data_item, InferenceServerException)
+                self.assertEqual(data_item.status(), "StatusCode.CANCELLED")
+
+            with open(log_path, mode="r", encoding="utf-8", errors="strict") as f:
+                log_text = f.read()
+            self.assertIn("[execute_cancel] Request not cancelled at 1.0 s", log_text)
+            self.assertIn("[execute_cancel] Request cancelled at ", log_text)
+
+    def test_decoupled_raise_exception(self):
+        # The decoupled_raise_exception model raises an exception for the request.
+        # This test case is making sure that repeated exceptions are properly handled.
+
+        model_name = "decoupled_raise_exception"
+        shape = [2, 2]
+        number_of_requests = 10
+        user_data = UserData()
+        with grpcclient.InferenceServerClient(
+            f"{_tritonserver_ipaddr}:8001"
+        ) as triton_client:
+            triton_client.start_stream(callback=partial(callback, user_data))
+
+            input_datas = []
+            for i in range(number_of_requests):
+                input_data = np.random.randn(*shape).astype(np.float32)
+                input_datas.append(input_data)
+                inputs = [
+                    grpcclient.InferInput(
+                        "IN", input_data.shape, np_to_triton_dtype(input_data.dtype)
+                    )
+                ]
+                inputs[0].set_data_from_numpy(input_data)
+                triton_client.async_stream_infer(model_name=model_name, inputs=inputs)
+
+            for i in range(number_of_requests):
+                result = user_data._completed_requests.get()
+                self.assertIs(type(result), InferenceServerException)
+                self.assertIn("Intentional Error", result.message())
+
+            self.assertTrue(triton_client.is_model_ready(model_name))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_backend_python/decoupled/models/decoupled_bls/1/model.py b/qa/L0_backend_python/decoupled/models/decoupled_bls/1/model.py
new file mode 100644
index 0000000000..db6be9f908
--- /dev/null
+++ b/qa/L0_backend_python/decoupled/models/decoupled_bls/1/model.py
@@ -0,0 +1,321 @@
+# Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+import sys
+import threading
+import time
+
+import numpy as np
+import torch
+import triton_python_backend_utils as pb_utils
+from torch.utils.dlpack import from_dlpack, to_dlpack
+
+
+class TritonPythonModel:
+    """This model sends an error message with the first request."""
+
+    def initialize(self, args):
+        logger = pb_utils.Logger
+        logger.log("Initialize-Specific Msg!", logger.INFO)
+        logger.log_info("Initialize-Info Msg!")
+        logger.log_warn("Initialize-Warning Msg!")
+        logger.log_error("Initialize-Error Msg!")
+        # You must parse model_config. JSON string is not parsed here
+        self.model_config = model_config = json.loads(args["model_config"])
+
+        using_decoupled = pb_utils.using_decoupled_model_transaction_policy(
+            model_config
+        )
+        if not using_decoupled:
+            raise pb_utils.TritonModelException(
+                """the model `{}` can generate any number of responses per request,
+                enable decoupled transaction policy in model configuration to
+                serve this model""".format(
+                    args["model_name"]
+                )
+            )
+
+        # Get OUT configuration
+        out_config = pb_utils.get_output_config_by_name(model_config, "OUT")
+
+        # Convert Triton types to numpy types
+        self.out_dtype = pb_utils.triton_string_to_numpy(out_config["data_type"])
+
+        self.inflight_thread_count = 0
+        self.inflight_thread_count_lck = threading.Lock()
+        logger = pb_utils.Logger
+        logger.log("Initialize-Specific Msg!", logger.INFO)
+        logger.log_info("Initialize-Info Msg!")
+        logger.log_warn("Initialize-Warning Msg!")
+        logger.log_error("Initialize-Error Msg!")
+
+    def execute(self, requests):
+        """This function is called on inference request."""
+        logger = pb_utils.Logger
+        logger.log("Execute-Specific Msg!", logger.INFO)
+        logger.log_info("Execute-Info Msg!")
+        logger.log_warn("Execute-Warning Msg!")
+        logger.log_error("Execute-Error Msg!")
+        # Only generate the error for the first request
+        for i, request in enumerate(requests):
+            request_input = pb_utils.get_input_tensor_by_name(request, "IN")
+
+            # Sync BLS request
+            infer_request = pb_utils.InferenceRequest(
+                model_name="identity_fp32",
+                requested_output_names=["OUTPUT0"],
+                inputs=[pb_utils.Tensor("INPUT0", request_input.as_numpy())],
+            )
+            infer_response = infer_request.exec()
+            if infer_response.has_error():
+                raise pb_utils.TritonModelException(
+                    f"BLS Response has an error: {infer_response.error().message()}"
+                )
+
+            output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
+            if np.any(output0.as_numpy() != request_input.as_numpy()):
+                raise pb_utils.TritonModelException(
+                    f"BLS Request input and BLS response output do not match. {request_input.as_numpy()} != {output0.as_numpy()}"
+                )
+
+            thread1 = threading.Thread(
+                target=self.response_thread,
+                args=(
+                    request.get_response_sender(),
+                    pb_utils.get_input_tensor_by_name(request, "IN").as_numpy(),
+                ),
+            )
+            thread1.daemon = True
+            with self.inflight_thread_count_lck:
+                self.inflight_thread_count += 1
+            thread1.start()
+
+        logger = pb_utils.Logger
+        logger.log("Execute-Specific Msg!", logger.INFO)
+        logger.log_info("Execute-Info Msg!")
+        logger.log_warn("Execute-Warning Msg!")
+        logger.log_error("Execute-Error Msg!")
+
+        return None
+
+    def _get_gpu_bls_outputs(self, input0_pb, input1_pb):
+        """
+        This function is created to test that the DLPack container works
+        properly when the inference response and outputs go out of scope.
+
+        Returns True on success and False on failure.
+        """
+        logger = pb_utils.Logger
+        logger.log("_get_gpu_bls_outputs-Specific Msg!", logger.INFO)
+        logger.log_info("_get_gpu_bls_outputs-Info Msg!")
+        logger.log_warn("_get_gpu_bls_outputs-Warning Msg!")
+        logger.log_error("_get_gpu_bls_outputs-Error Msg!")
+
+        infer_request = pb_utils.InferenceRequest(
+            model_name="dlpack_add_sub",
+            inputs=[input0_pb, input1_pb],
+            requested_output_names=["OUTPUT0", "OUTPUT1"],
+        )
+        infer_response = infer_request.exec()
+        if infer_response.has_error():
+            return False
+
+        output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
+        output1 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT1")
+        if output0 is None or output1 is None:
+            return False
+
+        # When one of the inputs is in GPU the output returned by the model must
+        # be in GPU, otherwise the outputs will be in CPU.
+        if not input0_pb.is_cpu() or not input1_pb.is_cpu():
+            if output0.is_cpu() or output1.is_cpu():
+                return False
+        else:
+            if (not output0.is_cpu()) or (not output1.is_cpu()):
+                return False
+
+        # Make sure that the reference count is increased by one when DLPack
+        # representation is created.
+        rc_before_dlpack_output0 = sys.getrefcount(output0)
+        rc_before_dlpack_output1 = sys.getrefcount(output1)
+
+        output0_dlpack = output0.to_dlpack()
+        output1_dlpack = output1.to_dlpack()
+
+        rc_after_dlpack_output0 = sys.getrefcount(output0)
+        rc_after_dlpack_output1 = sys.getrefcount(output1)
+
+        if rc_after_dlpack_output0 - rc_before_dlpack_output0 != 1:
+            return False
+
+        if rc_after_dlpack_output1 - rc_before_dlpack_output1 != 1:
+            return False
+
+        # Make sure that reference count decreases after destroying the DLPack
+        output0_dlpack = None
+        output1_dlpack = None
+        rc_after_del_dlpack_output0 = sys.getrefcount(output0)
+        rc_after_del_dlpack_output1 = sys.getrefcount(output1)
+        if rc_after_del_dlpack_output0 - rc_after_dlpack_output0 != -1:
+            return False
+
+        if rc_after_del_dlpack_output1 - rc_after_dlpack_output1 != -1:
+            return False
+
+        return output0.to_dlpack(), output1.to_dlpack()
+
+    def _test_gpu_bls_add_sub(self, is_input0_gpu, is_input1_gpu):
+        logger = pb_utils.Logger
+        logger.log("_test_gpu_bls_add_sub-Specific Msg!", logger.INFO)
+        logger.log_info("_test_gpu_bls_add_sub-Info Msg!")
+        logger.log_warn("_test_gpu_bls_add_sub-Warning Msg!")
+        logger.log_error("_test_gpu_bls_add_sub-Error Msg!")
+
+        input0 = torch.rand(16)
+        input1 = torch.rand(16)
+
+        if is_input0_gpu:
+            input0 = input0.to("cuda")
+
+        if is_input1_gpu:
+            input1 = input1.to("cuda")
+
+        input0_pb = pb_utils.Tensor.from_dlpack("INPUT0", to_dlpack(input0))
+        input1_pb = pb_utils.Tensor.from_dlpack("INPUT1", to_dlpack(input1))
+        gpu_bls_return = self._get_gpu_bls_outputs(input0_pb, input1_pb)
+        if gpu_bls_return:
+            output0_dlpack, output1_dlpack = gpu_bls_return
+        else:
+            return False
+
+        expected_output_0 = from_dlpack(input0_pb.to_dlpack()).to("cpu") + from_dlpack(
+            input1_pb.to_dlpack()
+        ).to("cpu")
+        expected_output_1 = from_dlpack(input0_pb.to_dlpack()).to("cpu") - from_dlpack(
+            input1_pb.to_dlpack()
+        ).to("cpu")
+
+        output0_matches = torch.all(
+            expected_output_0 == from_dlpack(output0_dlpack).to("cpu")
+        )
+        output1_matches = torch.all(
+            expected_output_1 == from_dlpack(output1_dlpack).to("cpu")
+        )
+        if not output0_matches or not output1_matches:
+            return False
+
+        return True
+
+    def execute_gpu_bls(self):
+        logger = pb_utils.Logger
+        logger.log("execute_gpu_bls-Specific Msg!", logger.INFO)
+        logger.log_info("execute_gpu_bls-Info Msg!")
+        logger.log_warn("execute_gpu_bls-Warning Msg!")
+        logger.log_error("execute_gpu_bls-Error Msg!")
+        for input0_device in [True, False]:
+            for input1_device in [True, False]:
+                test_status = self._test_gpu_bls_add_sub(input0_device, input1_device)
+                if not test_status:
+                    return False
+
+        return True
+
+    def response_thread(self, response_sender, in_input):
+        # The response_sender is used to send response(s) associated with the
+        # corresponding request.
+        # Sleep 5 seconds to make sure the main thread has exited.
+        logger = pb_utils.Logger
+        logger.log("response_thread-Specific Msg!", logger.INFO)
+        logger.log_info("response_thread-Info Msg!")
+        logger.log_warn("response_thread-Warning Msg!")
+        logger.log_error("response_thread-Error Msg!")
+        time.sleep(5)
+
+        # FIXME: [DLIS-5970] Until Windows supports GPU tensors, only test CPU
+        if sys.platform != "win32":
+            status = self.execute_gpu_bls()
+        else:
+            status = True
+
+        if not status:
+            infer_response = pb_utils.InferenceResponse(error="GPU BLS test failed.")
+            response_sender.send(infer_response)
+        else:
+            in_value = in_input
+            infer_request = pb_utils.InferenceRequest(
+                model_name="identity_fp32",
+                requested_output_names=["OUTPUT0"],
+                inputs=[pb_utils.Tensor("INPUT0", in_input)],
+            )
+            infer_response = infer_request.exec()
+            output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
+            if infer_response.has_error():
+                response = pb_utils.InferenceResponse(
+                    error=infer_response.error().message()
+                )
+                response_sender.send(
+                    response, flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
+                )
+            elif np.any(in_input != output0.as_numpy()):
+                error_message = (
+                    "BLS Request input and BLS response output do not match."
+                    f" {in_value} != {output0.as_numpy()}"
+                )
+                response = pb_utils.InferenceResponse(error=error_message)
+                response_sender.send(
+                    response, flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
+                )
+            else:
+                output_tensors = [pb_utils.Tensor("OUT", in_value)]
+                response = pb_utils.InferenceResponse(output_tensors=output_tensors)
+                response_sender.send(
+                    response, flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
+                )
+
+        with self.inflight_thread_count_lck:
+            self.inflight_thread_count -= 1
+        logger.log("response_thread-Specific Msg!", logger.INFO)
+        logger.log_info("response_thread-Info Msg!")
+        logger.log_warn("response_thread-Warning Msg!")
+        logger.log_error("response_thread-Error Msg!")
+
+    def finalize(self):
+        """`finalize` is called only once when the model is being unloaded.
+        Implementing `finalize` function is OPTIONAL. This function allows
+        the model to perform any necessary clean ups before exit.
+        """
+        logger = pb_utils.Logger
+        logger.log_info("Finalize invoked")
+
+        inflight_threads = True
+        while inflight_threads:
+            with self.inflight_thread_count_lck:
+                inflight_threads = self.inflight_thread_count != 0
+            if inflight_threads:
+                time.sleep(0.1)
+
+        logger.log_info("Finalize complete...")
diff --git a/qa/L0_backend_python/decoupled/models/decoupled_bls/config.pbtxt b/qa/L0_backend_python/decoupled/models/decoupled_bls/config.pbtxt
new file mode 100644
index 0000000000..aaefde24a5
--- /dev/null
+++ b/qa/L0_backend_python/decoupled/models/decoupled_bls/config.pbtxt
@@ -0,0 +1,55 @@
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "decoupled_bls"
+backend: "python"
+max_batch_size: 64
+
+model_transaction_policy {
+  decoupled: True
+}
+input [
+  {
+    name: "IN"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+output [
+  {
+    name: "OUT"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+instance_group [
+  {
+    count: 1
+    kind : KIND_CPU
+  }
+]
diff --git a/qa/L0_backend_python/decoupled/models/decoupled_bls_stream/1/model.py b/qa/L0_backend_python/decoupled/models/decoupled_bls_stream/1/model.py
new file mode 100644
index 0000000000..8643482912
--- /dev/null
+++ b/qa/L0_backend_python/decoupled/models/decoupled_bls_stream/1/model.py
@@ -0,0 +1,132 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+import threading
+import time
+
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    """This model sends a BLS request to a decoupled model 'square_int32' and
+    returns the output from 'square_int32' as responses.
+    """
+
+    def initialize(self, args):
+        # You must parse model_config. JSON string is not parsed here
+        self.model_config = model_config = json.loads(args["model_config"])
+
+        using_decoupled = pb_utils.using_decoupled_model_transaction_policy(
+            model_config
+        )
+        if not using_decoupled:
+            raise pb_utils.TritonModelException(
+                """the model `{}` can generate any number of responses per request,
+                enable decoupled transaction policy in model configuration to
+                serve this model""".format(
+                    args["model_name"]
+                )
+            )
+
+        self.inflight_thread_count = 0
+        self.inflight_thread_count_lck = threading.Lock()
+
+    def execute(self, requests):
+        """This function is called on inference request."""
+
+        for request in requests:
+            thread = threading.Thread(
+                target=self.response_thread,
+                args=(
+                    request.get_response_sender(),
+                    pb_utils.get_input_tensor_by_name(request, "IN").as_numpy(),
+                ),
+            )
+            thread.daemon = True
+            with self.inflight_thread_count_lck:
+                self.inflight_thread_count += 1
+            thread.start()
+
+        return None
+
+    def response_thread(self, response_sender, in_value):
+        infer_request = pb_utils.InferenceRequest(
+            model_name="square_int32",
+            requested_output_names=["OUT"],
+            inputs=[pb_utils.Tensor("IN", in_value)],
+        )
+        infer_responses = infer_request.exec(decoupled=True)
+
+        response_count = 0
+        for infer_response in infer_responses:
+            if len(infer_response.output_tensors()) > 0:
+                output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUT")
+                if infer_response.has_error():
+                    response = pb_utils.InferenceResponse(
+                        error=infer_response.error().message()
+                    )
+                    response_sender.send(
+                        response, flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
+                    )
+                elif np.any(in_value != output0.as_numpy()):
+                    error_message = (
+                        "BLS Request input and BLS response output do not match."
+                        f" {in_value} != {output0.as_numpy()}"
+                    )
+                    response = pb_utils.InferenceResponse(error=error_message)
+                    response_sender.send(
+                        response, flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
+                    )
+                else:
+                    output_tensors = [pb_utils.Tensor("OUT", output0.as_numpy())]
+                    response = pb_utils.InferenceResponse(output_tensors=output_tensors)
+                    response_sender.send(response)
+
+            response_count += 1
+
+        if in_value != response_count - 1:
+            error_message = "Expected {} responses, got {}".format(
+                in_value, len(infer_responses) - 1
+            )
+            response = pb_utils.InferenceResponse(error=error_message)
+            response_sender.send(
+                response, flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
+            )
+        else:
+            response_sender.send(flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL)
+
+        with self.inflight_thread_count_lck:
+            self.inflight_thread_count -= 1
+
+    def finalize(self):
+        inflight_threads = True
+        while inflight_threads:
+            with self.inflight_thread_count_lck:
+                inflight_threads = self.inflight_thread_count != 0
+            if inflight_threads:
+                time.sleep(0.1)
diff --git a/qa/L0_backend_python/decoupled/models/decoupled_bls_stream/config.pbtxt b/qa/L0_backend_python/decoupled/models/decoupled_bls_stream/config.pbtxt
new file mode 100644
index 0000000000..23ad453212
--- /dev/null
+++ b/qa/L0_backend_python/decoupled/models/decoupled_bls_stream/config.pbtxt
@@ -0,0 +1,54 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "decoupled_bls_stream"
+backend: "python"
+
+model_transaction_policy {
+  decoupled: True
+}
+input [
+  {
+    name: "IN"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+
+output [
+  {
+    name: "OUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+
+instance_group [
+  {
+    count: 1
+    kind : KIND_CPU
+  }
+]
diff --git a/qa/L0_backend_python/decoupled/models/decoupled_execute_error/1/model.py b/qa/L0_backend_python/decoupled/models/decoupled_execute_error/1/model.py
new file mode 100644
index 0000000000..3882f0da9c
--- /dev/null
+++ b/qa/L0_backend_python/decoupled/models/decoupled_execute_error/1/model.py
@@ -0,0 +1,125 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+import threading
+import time
+
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    """This model sends an error message with the first request."""
+
+    def initialize(self, args):
+        # You must parse model_config. JSON string is not parsed here
+        self.model_config = model_config = json.loads(args["model_config"])
+
+        using_decoupled = pb_utils.using_decoupled_model_transaction_policy(
+            model_config
+        )
+        if not using_decoupled:
+            raise pb_utils.TritonModelException(
+                """the model `{}` can generate any number of responses per request,
+                enable decoupled transaction policy in model configuration to
+                serve this model""".format(
+                    args["model_name"]
+                )
+            )
+
+        # Get OUT configuration
+        out_config = pb_utils.get_output_config_by_name(model_config, "OUT")
+
+        # Convert Triton types to numpy types
+        self.out_dtype = pb_utils.triton_string_to_numpy(out_config["data_type"])
+
+        self.inflight_thread_count = 0
+        self.inflight_thread_count_lck = threading.Lock()
+
+    def execute(self, requests):
+        """This function is called on inference request."""
+
+        # Only generate the error for the first request
+        for i, request in enumerate(requests):
+            # Start a separate thread to send the responses for the request.
+            thread = threading.Thread(
+                target=self.response_thread,
+                args=(
+                    request.get_response_sender(),
+                    i,
+                    pb_utils.get_input_tensor_by_name(request, "IN").as_numpy(),
+                ),
+            )
+            thread.daemon = True
+
+            with self.inflight_thread_count_lck:
+                self.inflight_thread_count += 1
+
+            thread.start()
+
+        return None
+
+    def response_thread(self, response_sender, index, in_input):
+        # The response_sender is used to send response(s) associated with the
+        # corresponding request.  The first request will send errors and the
+        # other requests will send the responses.  The number of responses per
+        # requests is the number of elements in input tensor.
+
+        in_value = in_input
+        out_output = pb_utils.Tensor("OUT", in_value)
+
+        if index == 0:
+            error = pb_utils.TritonError("An error occurred during execution")
+            response = pb_utils.InferenceResponse(
+                output_tensors=[out_output], error=error
+            )
+        else:
+            response = pb_utils.InferenceResponse(output_tensors=[out_output])
+        response_sender.send(response)
+
+        # We must close the response sender to indicate to Triton that we are
+        # done sending responses for the corresponding request. We can't use the
+        # response sender after closing it.
+        response_sender.send(flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL)
+
+        with self.inflight_thread_count_lck:
+            self.inflight_thread_count -= 1
+
+    def finalize(self):
+        """`finalize` is called only once when the model is being unloaded.
+        Implementing `finalize` function is OPTIONAL. This function allows
+        the model to perform any necessary clean ups before exit.
+        """
+        print("Finalize invoked")
+
+        inflight_threads = True
+        while inflight_threads:
+            with self.inflight_thread_count_lck:
+                inflight_threads = self.inflight_thread_count != 0
+            if inflight_threads:
+                time.sleep(0.1)
+
+        print("Finalize complete...")
diff --git a/qa/L0_backend_python/decoupled/models/decoupled_execute_error/config.pbtxt b/qa/L0_backend_python/decoupled/models/decoupled_execute_error/config.pbtxt
new file mode 100644
index 0000000000..37e62d4adb
--- /dev/null
+++ b/qa/L0_backend_python/decoupled/models/decoupled_execute_error/config.pbtxt
@@ -0,0 +1,57 @@
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "decoupled_execute_error"
+backend: "python"
+max_batch_size: 64
+
+model_transaction_policy {
+  decoupled: True
+}
+input [
+  {
+    name: "IN"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+output [
+  {
+    name: "OUT"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+instance_group [
+  {
+    count: 1
+    kind : KIND_CPU
+  }
+]
+
+dynamic_batching { preferred_batch_size: [8], max_queue_delay_microseconds: 12000000 }
diff --git a/qa/L0_backend_python/decoupled/models/decoupled_raise_exception/1/model.py b/qa/L0_backend_python/decoupled/models/decoupled_raise_exception/1/model.py
new file mode 100644
index 0000000000..03a19db98d
--- /dev/null
+++ b/qa/L0_backend_python/decoupled/models/decoupled_raise_exception/1/model.py
@@ -0,0 +1,35 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+class TritonPythonModel:
+    def initialize(self, args):
+        pass
+
+    def execute(self, requests):
+        for request in requests:
+            raise Exception("Intentional Error")
+        return None
diff --git a/qa/L0_backend_python/decoupled/models/decoupled_raise_exception/config.pbtxt b/qa/L0_backend_python/decoupled/models/decoupled_raise_exception/config.pbtxt
new file mode 100644
index 0000000000..046687dfe7
--- /dev/null
+++ b/qa/L0_backend_python/decoupled/models/decoupled_raise_exception/config.pbtxt
@@ -0,0 +1,55 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "decoupled_raise_exception"
+backend: "python"
+max_batch_size: 64
+
+model_transaction_policy {
+  decoupled: True
+}
+input [
+  {
+    name: "IN"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+output [
+  {
+    name: "OUT"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+instance_group [
+  {
+    count: 1
+    kind : KIND_CPU
+  }
+]
diff --git a/qa/L0_backend_python/decoupled/models/decoupled_return_response_error/1/model.py b/qa/L0_backend_python/decoupled/models/decoupled_return_response_error/1/model.py
new file mode 100644
index 0000000000..ecde9c7168
--- /dev/null
+++ b/qa/L0_backend_python/decoupled/models/decoupled_return_response_error/1/model.py
@@ -0,0 +1,82 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    """This model tries to return a response directly from
+    execute function when configured as decoupled model.
+    """
+
+    def initialize(self, args):
+        self.model_config = model_config = json.loads(args["model_config"])
+
+        using_decoupled = pb_utils.using_decoupled_model_transaction_policy(
+            model_config
+        )
+        if not using_decoupled:
+            raise pb_utils.TritonModelException(
+                """the model `{}` can generate any number of responses per request,
+                enable decoupled transaction policy in model configuration to
+                serve this model""".format(
+                    args["model_name"]
+                )
+            )
+
+        output0_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT0")
+        output1_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT1")
+
+        self.output0_dtype = pb_utils.triton_string_to_numpy(
+            output0_config["data_type"]
+        )
+        self.output1_dtype = pb_utils.triton_string_to_numpy(
+            output1_config["data_type"]
+        )
+
+    def execute(self, requests):
+        """Tries to create a response sender object and use that
+        for sending the response.
+        """
+
+        output0_dtype = self.output0_dtype
+        output1_dtype = self.output1_dtype
+
+        responses = []
+        for request in requests:
+            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")
+            out_0, out_1 = (
+                in_0.as_numpy() + in_1.as_numpy(),
+                in_0.as_numpy() - in_1.as_numpy(),
+            )
+
+            out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0.astype(output0_dtype))
+            out_tensor_1 = pb_utils.Tensor("OUTPUT1", out_1.astype(output1_dtype))
+            responses.append(pb_utils.InferenceResponse([out_tensor_0, out_tensor_1]))
+        return responses
diff --git a/qa/L0_backend_python/decoupled/models/decoupled_return_response_error/config.pbtxt b/qa/L0_backend_python/decoupled/models/decoupled_return_response_error/config.pbtxt
new file mode 100644
index 0000000000..5d35d05ad6
--- /dev/null
+++ b/qa/L0_backend_python/decoupled/models/decoupled_return_response_error/config.pbtxt
@@ -0,0 +1,61 @@
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "decoupled_return_response_error"
+backend: "python"
+model_transaction_policy {
+  decoupled: True
+}
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+input [
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+
+instance_group [{ kind: KIND_CPU }]
diff --git a/qa/L0_backend_python/decoupled/models/decoupled_send_after_close_error/1/model.py b/qa/L0_backend_python/decoupled/models/decoupled_send_after_close_error/1/model.py
new file mode 100644
index 0000000000..52aa17ac0d
--- /dev/null
+++ b/qa/L0_backend_python/decoupled/models/decoupled_send_after_close_error/1/model.py
@@ -0,0 +1,89 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    """This model tries to send response after closing
+    the response_sender.
+    """
+
+    def initialize(self, args):
+        self.model_config = model_config = json.loads(args["model_config"])
+
+        using_decoupled = pb_utils.using_decoupled_model_transaction_policy(
+            model_config
+        )
+        if not using_decoupled:
+            raise pb_utils.TritonModelException(
+                """the model `{}` can generate any number of responses per request,
+                enable decoupled transaction policy in model configuration to
+                serve this model""".format(
+                    args["model_name"]
+                )
+            )
+
+        output0_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT0")
+        output1_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT1")
+
+        self.output0_dtype = pb_utils.triton_string_to_numpy(
+            output0_config["data_type"]
+        )
+        self.output1_dtype = pb_utils.triton_string_to_numpy(
+            output1_config["data_type"]
+        )
+
+    def execute(self, requests):
+        """Create a response sender object and use that
+        for sending the response.
+        """
+
+        # This model does not support batching, so 'request_count' should always be 1.
+        if len(requests) != 1:
+            raise pb_utils.TritonModelException(
+                "unsupported batch size " + len(requests)
+            )
+
+        output0_dtype = self.output0_dtype
+        output1_dtype = self.output1_dtype
+
+        response_sender = requests[0].get_response_sender()
+        in_0 = pb_utils.get_input_tensor_by_name(requests[0], "INPUT0")
+        in_1 = pb_utils.get_input_tensor_by_name(requests[0], "INPUT1")
+        out_0, out_1 = (
+            in_0.as_numpy() + in_1.as_numpy(),
+            in_0.as_numpy() - in_1.as_numpy(),
+        )
+
+        out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0.astype(output0_dtype))
+        out_tensor_1 = pb_utils.Tensor("OUTPUT1", out_1.astype(output1_dtype))
+        response = pb_utils.InferenceResponse([out_tensor_0, out_tensor_1])
+
+        response_sender.send(flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL)
+        response_sender.send(response)
diff --git a/qa/L0_backend_python/decoupled/models/decoupled_send_after_close_error/config.pbtxt b/qa/L0_backend_python/decoupled/models/decoupled_send_after_close_error/config.pbtxt
new file mode 100644
index 0000000000..3c9443a6f0
--- /dev/null
+++ b/qa/L0_backend_python/decoupled/models/decoupled_send_after_close_error/config.pbtxt
@@ -0,0 +1,62 @@
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "decoupled_send_after_close_error"
+backend: "python"
+model_transaction_policy {
+  decoupled: True
+}
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+input [
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+
+instance_group [{ kind: KIND_CPU }]
diff --git a/qa/L0_backend_python/decoupled/test.sh b/qa/L0_backend_python/decoupled/test.sh
new file mode 100755
index 0000000000..86455ff897
--- /dev/null
+++ b/qa/L0_backend_python/decoupled/test.sh
@@ -0,0 +1,128 @@
+#!/bin/bash
+# Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+TRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION:="http://github.com/triton-inference-server"}
+CLIENT_PY=./decoupled_test.py
+CLIENT_LOG="./decoupled_client.log"
+TEST_RESULT_FILE='test_results.txt'
+SERVER_ARGS="--model-repository=${MODELDIR}/decoupled/models --backend-directory=${BACKEND_DIR} --log-verbose=1"
+SERVER_LOG="./decoupled_server.log"
+
+pip3 uninstall -y torch
+# FIXME: Until Windows supports GPU tensors, only test CPU scenarios
+if [[ ${TEST_WINDOWS} == 1 ]]; then
+  pip3 install torch==1.13.0 -f https://download.pytorch.org/whl/torch_stable.html
+else
+  pip3 install torch==1.13.0+cu117 -f https://download.pytorch.org/whl/torch_stable.html
+fi
+
+RET=0
+source ../../common/util.sh
+
+rm -fr *.log
+mkdir -p models/identity_fp32/1/
+cp ../../python_models/identity_fp32/model.py models/identity_fp32/1/
+cp ../../python_models/identity_fp32/config.pbtxt models/identity_fp32/
+
+mkdir -p models/execute_cancel/1/
+cp ../../python_models/execute_cancel/model.py ./models/execute_cancel/1/
+cp ../../python_models/execute_cancel/config.pbtxt ./models/execute_cancel/
+echo "model_transaction_policy { decoupled: True }" >> ./models/execute_cancel/config.pbtxt
+
+rm -fr python_backend
+git clone ${TRITON_REPO_ORGANIZATION}/python_backend -b $PYTHON_BACKEND_REPO_TAG
+mkdir -p models/square_int32/1/
+cp python_backend/examples/decoupled/square_model.py models/square_int32/1/model.py
+cp python_backend/examples/decoupled/square_config.pbtxt models/square_int32/config.pbtxt
+
+mkdir -p models/dlpack_add_sub/1/
+cp ../../python_models/dlpack_add_sub/model.py models/dlpack_add_sub/1/
+cp ../../python_models/dlpack_add_sub/config.pbtxt models/dlpack_add_sub/
+
+function verify_log_counts () {
+  if [ `grep -c "Specific Msg!" $SERVER_LOG` -lt 1 ]; then
+    echo -e "\n***\n*** Test Failed: Specific Msg Count Incorrect\n***"
+    RET=1
+  fi
+  if [ `grep -c "Info Msg!" $SERVER_LOG` -lt 1 ]; then
+    echo -e "\n***\n*** Test Failed: Info Msg Count Incorrect\n***"
+    RET=1
+  fi
+  if [ `grep -c "Warning Msg!" $SERVER_LOG` -lt 1 ]; then
+    echo -e "\n***\n*** Test Failed: Warning Msg Count Incorrect\n***"
+    RET=1
+  fi
+  if [ `grep -c "Error Msg!" $SERVER_LOG` -lt 1 ]; then
+    echo -e "\n***\n*** Test Failed: Error Msg Count Incorrect\n***"
+    RET=1
+  fi
+  # NOTE: Windows does not seem to have a way to send a true SIGINT signal
+  # to tritonserver. Instead, it seems required to use taskkill.exe with /F (force)
+  # to kill the running program. This means the server terminates immediately,
+  # instead of shutting down how it would if Ctrl^C was invoked from the terminal.
+  # To properly test functionality, we need a WAR.
+  if [[ ${TEST_WINDOWS} == 0 ]]; then
+    if [ `grep -c "Finalize invoked" $SERVER_LOG` -ne 3 ]; then
+      echo -e "\n***\n*** Test Failed: 'Finalize invoked' message missing\n***"
+      RET=1
+    fi
+    if [ `grep -c "Finalize complete..." $SERVER_LOG` -ne 3 ]; then
+      echo -e "\n***\n*** Test Failed: 'Finalize complete...' message missing\n***"
+      RET=1
+    fi
+  fi
+}
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    RET=1
+fi
+
+set +e
+python3 -m pytest --junitxml=decoupled.report.xml $CLIENT_PY > $CLIENT_LOG 2>&1
+
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** decoupled test FAILED. \n***"
+    RET=1
+fi
+set -e
+
+kill_server
+
+verify_log_counts
+
+if [ $RET -eq 1 ]; then
+    cat $CLIENT_LOG
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Decoupled test FAILED. \n***"
+else
+    echo -e "\n***\n*** Decoupled test PASSED. \n***"
+fi
+
+exit $RET
diff --git a/qa/L0_backend_python/ensemble/ensemble_test.py b/qa/L0_backend_python/ensemble/ensemble_test.py
new file mode 100755
index 0000000000..521f59900f
--- /dev/null
+++ b/qa/L0_backend_python/ensemble/ensemble_test.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python3
+
+# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import sys
+
+sys.path.append("../../common")
+
+import unittest
+
+import numpy as np
+import shm_util
+import tritonclient.http as httpclient
+from tritonclient.utils import *
+
+# By default, find tritonserver on "localhost", but for windows tests
+# we overwrite the IP address with the TRITONSERVER_IPADDR envvar
+_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")
+
+
+class EnsembleTest(unittest.TestCase):
+    def setUp(self):
+        self._shm_leak_detector = shm_util.ShmLeakDetector()
+
+    def infer(self, model_name):
+        shape = [16]
+        with self._shm_leak_detector.Probe() as shm_probe:
+            with httpclient.InferenceServerClient(
+                f"{_tritonserver_ipaddr}:8000"
+            ) as client:
+                input_data_0 = np.random.random(shape).astype(np.float32)
+                input_data_1 = np.random.random(shape).astype(np.float32)
+                inputs = [
+                    httpclient.InferInput(
+                        "INPUT0",
+                        input_data_0.shape,
+                        np_to_triton_dtype(input_data_0.dtype),
+                    ),
+                    httpclient.InferInput(
+                        "INPUT1",
+                        input_data_1.shape,
+                        np_to_triton_dtype(input_data_1.dtype),
+                    ),
+                ]
+                inputs[0].set_data_from_numpy(input_data_0)
+                inputs[1].set_data_from_numpy(input_data_1)
+                result = client.infer(model_name, inputs)
+                output0 = result.as_numpy("OUTPUT0")
+                output1 = result.as_numpy("OUTPUT1")
+                self.assertIsNotNone(output0)
+                self.assertIsNotNone(output1)
+
+                # Set a big enough tolerance to reduce intermittence. May be
+                # better to test integer outputs in the future for consistency.
+                self.assertTrue(np.allclose(output0, 2 * input_data_0, atol=1e-06))
+                self.assertTrue(np.allclose(output1, 2 * input_data_1, atol=1e-06))
+
+    def test_ensemble(self):
+        model_name = "ensemble"
+        self.infer(model_name)
+
+    def test_ensemble_gpu(self):
+        model_name = "ensemble_gpu"
+        self.infer(model_name)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_backend_python/ensemble/test.sh b/qa/L0_backend_python/ensemble/test.sh
new file mode 100755
index 0000000000..3df9071a03
--- /dev/null
+++ b/qa/L0_backend_python/ensemble/test.sh
@@ -0,0 +1,115 @@
+#!/bin/bash
+# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+CLIENT_LOG="./ensemble_client.log"
+source ../common.sh
+source ../../common/util.sh
+
+# FIXME: [DLIS-5970] Until Windows supports GPU tensors, only test CPU scenarios
+if [[ ${TEST_WINDOWS} == 1 ]]; then
+    EXPECTED_NUM_TESTS="1"
+else
+    EXPECTED_NUM_TESTS="2"
+fi
+
+SERVER_ARGS="--model-repository=${MODELDIR}/ensemble/models --backend-directory=${BACKEND_DIR} --log-verbose=1"
+SERVER_LOG="./ensemble_server.log"
+
+RET=0
+rm -rf models/ $CLIENT_LOG
+
+# Ensemble Model
+mkdir -p models/ensemble/1/
+cp ../../python_models/ensemble/config.pbtxt ./models/ensemble
+
+mkdir -p models/add_sub_1/1/
+cp ../../python_models/add_sub/config.pbtxt ./models/add_sub_1
+cp ../../python_models/add_sub/model.py ./models/add_sub_1/1/
+
+mkdir -p models/add_sub_2/1/
+cp ../../python_models/add_sub/config.pbtxt ./models/add_sub_2/
+cp ../../python_models/add_sub/model.py ./models/add_sub_2/1/
+
+# FIXME: [DLIS-5970] Until Windows supports GPU tensors, only test CPU scenarios
+if [[ ${TEST_WINDOWS} == 0 ]]; then
+    # Ensemble GPU Model
+    mkdir -p models/ensemble_gpu/1/
+    cp ../../python_models/ensemble_gpu/config.pbtxt ./models/ensemble_gpu
+    cp -r ${DATADIR}/qa_model_repository/libtorch_float32_float32_float32/ ./models
+    (cd models/libtorch_float32_float32_float32 && \
+            echo "instance_group [ { kind: KIND_GPU }]" >> config.pbtxt)
+    (cd models/libtorch_float32_float32_float32 && \
+            sed -i "s/^max_batch_size:.*/max_batch_size: 0/" config.pbtxt)
+    (cd models/libtorch_float32_float32_float32 && \
+            sed -i "s/^version_policy:.*//" config.pbtxt)
+    rm -rf models/libtorch_float32_float32_float32/2
+    rm -rf models/libtorch_float32_float32_float32/3
+fi
+
+prev_num_pages=`get_shm_pages`
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    RET=1
+fi
+
+set +e
+
+# FIXME: [DLIS-5970] Until Windows supports GPU tensors, only test CPU scenarios
+if [[ ${TEST_WINDOWS} == 0 ]]; then
+    python3 -m pytest --junitxml=ensemble.report.xml ensemble_test.py 2>&1 > $CLIENT_LOG
+else
+    python3 ensemble_test.py EnsembleTest.test_ensemble 2>&1 > $CLIENT_LOG
+fi
+
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** ensemble_test.py FAILED. \n***"
+    RET=1
+fi
+set -e
+
+kill_server
+
+current_num_pages=`get_shm_pages`
+if [ $current_num_pages -ne $prev_num_pages ]; then
+    ls /dev/shm
+    echo -e "\n***\n*** Test Failed. Shared memory pages where not cleaned properly.
+Shared memory pages before starting triton equals to $prev_num_pages
+and shared memory pages after starting triton equals to $current_num_pages \n***"
+    RET=1
+fi
+
+if [ $RET -eq 1 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Ensemble test FAILED. \n***"
+else
+    echo -e "\n***\n*** Ensemble test PASSED. \n***"
+fi
+
+exit $RET
diff --git a/qa/L0_backend_python/env/test.sh b/qa/L0_backend_python/env/test.sh
new file mode 100755
index 0000000000..b6963be615
--- /dev/null
+++ b/qa/L0_backend_python/env/test.sh
@@ -0,0 +1,317 @@
+#!/bin/bash
+# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+CLIENT_LOG="./env_client.log"
+source ../common.sh
+source ../../common/util.sh
+
+BASE_SERVER_ARGS="--model-repository=${MODELDIR}/env/models --log-verbose=1 --disable-auto-complete-config"
+PYTHON_BACKEND_BRANCH=$PYTHON_BACKEND_REPO_TAG
+SERVER_ARGS=$BASE_SERVER_ARGS
+SERVER_LOG="./env_server.log"
+
+RET=0
+
+rm -fr ./models
+rm -rf *.tar.gz
+install_build_deps
+install_conda
+
+# Tensorflow 2.1.0 only works with Python 3.4 - 3.7. Successful execution of
+# the Python model indicates that the environment has been setup correctly.
+# Create a model with python 3.7 version
+create_conda_env "3.7" "python-3-7"
+conda install numpy=1.20.1 -y
+conda install tensorflow=2.1.0 -y
+conda install -c conda-forge libstdcxx-ng=12 -y
+
+PY37_VERSION_STRING="Python version is 3.7, NumPy version is 1.20.1, and Tensorflow version is 2.1.0"
+create_python_backend_stub
+conda-pack -o python3.7.tar.gz
+path_to_conda_pack=`pwd`/python3.7.tar.gz
+mkdir -p models/python_3_7/1/
+cp ../../python_models/python_version/config.pbtxt ./models/python_3_7
+(cd models/python_3_7 && \
+          sed -i "s/^name:.*/name: \"python_3_7\"/" config.pbtxt && \
+          echo "parameters: {key: \"EXECUTION_ENV_PATH\", value: {string_value: \"$path_to_conda_pack\"}}">> config.pbtxt)
+cp ../../python_models/python_version/model.py ./models/python_3_7/1/
+cp python_backend/builddir/triton_python_backend_stub ./models/python_3_7
+conda deactivate
+
+# Use python-3-7 without conda pack
+# Create a model with python 3.7 version and numpy 1.20.3 to distinguish from
+# previous test.
+# Tensorflow 2.1.0 only works with Python 3.4 - 3.7. Successful execution of
+# the Python model indicates that the environment has been setup correctly.
+path_to_conda_pack="$PWD/python-3-7-1"
+create_conda_env_with_specified_path "3.7" $path_to_conda_pack
+conda install numpy=1.20.3 -y
+conda install tensorflow=2.1.0 -y
+conda install -c conda-forge libstdcxx-ng=12 -y
+
+PY37_1_VERSION_STRING="Python version is 3.7, NumPy version is 1.20.3, and Tensorflow version is 2.1.0"
+create_python_backend_stub
+mkdir -p models/python_3_7_1/1/
+cp ../../python_models/python_version/config.pbtxt ./models/python_3_7_1
+(cd models/python_3_7_1 && \
+          sed -i "s/^name:.*/name: \"python_3_7_1\"/" config.pbtxt && \
+          echo "parameters: {key: \"EXECUTION_ENV_PATH\", value: {string_value: \"$path_to_conda_pack\"}}">> config.pbtxt)
+cp ../../python_models/python_version/model.py ./models/python_3_7_1/1/
+# Copy activate script to folder
+cp $path_to_conda_pack/lib/python3.7/site-packages/conda_pack/scripts/posix/activate $path_to_conda_pack/bin/.
+cp python_backend/builddir/triton_python_backend_stub ./models/python_3_7_1
+conda deactivate
+
+# Create a model with python 3.6 version
+# Tensorflow 2.1.0 only works with Python 3.4 - 3.7. Successful execution of
+# the Python model indicates that the environment has been setup correctly.
+create_conda_env "3.6" "python-3-6"
+conda install -c conda-forge libstdcxx-ng=12 -y
+conda install numpy=1.18.1 -y
+conda install tensorflow=2.1.0 -y
+PY36_VERSION_STRING="Python version is 3.6, NumPy version is 1.18.1, and Tensorflow version is 2.1.0"
+conda-pack -o python3.6.tar.gz
+
+# Test relative execution env path
+path_to_conda_pack='$$TRITON_MODEL_DIRECTORY/python_3_6_environment.tar.gz'
+create_python_backend_stub
+mkdir -p models/python_3_6/1/
+cp ../../python_models/python_version/config.pbtxt ./models/python_3_6
+cp python3.6.tar.gz models/python_3_6/python_3_6_environment.tar.gz
+(cd models/python_3_6 && \
+          sed -i "s/^name:.*/name: \"python_3_6\"/" config.pbtxt && \
+          echo "parameters: {key: \"EXECUTION_ENV_PATH\", value: {string_value: \"$path_to_conda_pack\"}}" >> config.pbtxt)
+cp ../../python_models/python_version/model.py ./models/python_3_6/1/
+cp python_backend/builddir/triton_python_backend_stub ./models/python_3_6
+conda deactivate
+
+# Test conda env without custom Python backend stub This environment should
+# always use the default Python version shipped in the container. For Ubuntu 22.04
+# it is Python 3.10 and for Ubuntu 20.04 is 3.8
+path_to_conda_pack='$$TRITON_MODEL_DIRECTORY/python_3_10_environment.tar.gz'
+create_conda_env "3.10" "python-3-10"
+conda install -c conda-forge libstdcxx-ng=12 -y
+conda install numpy=1.23.4 -y
+conda install tensorflow=2.10.0 -y
+PY310_VERSION_STRING="Python version is 3.10, NumPy version is 1.23.4, and Tensorflow version is 2.10.0"
+conda pack -o python3.10.tar.gz
+mkdir -p models/python_3_10/1/
+cp ../../python_models/python_version/config.pbtxt ./models/python_3_10
+cp python3.10.tar.gz models/python_3_10/python_3_10_environment.tar.gz
+(cd models/python_3_10 && \
+          sed -i "s/^name:.*/name: \"python_3_10\"/" config.pbtxt && \
+          echo "parameters: {key: \"EXECUTION_ENV_PATH\", value: {string_value: \"$path_to_conda_pack\"}}" >> config.pbtxt)
+cp ../../python_models/python_version/model.py ./models/python_3_10/1/
+conda deactivate
+rm -rf ./miniconda
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+set +e
+for EXPECTED_VERSION_STRING in "$PY36_VERSION_STRING" "$PY37_VERSION_STRING" "$PY37_1_VERSION_STRING" "$PY310_VERSION_STRING"; do
+    grep "$EXPECTED_VERSION_STRING" $SERVER_LOG
+    if [ $? -ne 0 ]; then
+        cat $SERVER_LOG
+        echo -e "\n***\n*** $EXPECTED_VERSION_STRING was not found in Triton logs. \n***"
+        RET=1
+    fi
+done
+
+# Test default (non set) locale in python stub processes
+# NOTE: In certain pybind versions, the locale settings may not be propagated from parent to
+#       stub processes correctly. See https://github.com/triton-inference-server/python_backend/pull/260.
+export LC_ALL=INVALID
+grep "Locale is (None, None)" $SERVER_LOG
+    if [ $? -ne 0 ]; then
+        cat $SERVER_LOG
+        echo -e "\n***\n*** Default unset Locale was not found in Triton logs. \n***"
+        RET=1
+    fi
+set -e
+
+rm $SERVER_LOG
+
+# Test locale set via environment variable in python stub processes
+# NOTE: In certain pybind versions, the locale settings may not be propagated from parent to
+#       stub processes correctly. See https://github.com/triton-inference-server/python_backend/pull/260.
+export LC_ALL=C.UTF-8
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+set +e
+grep "Locale is ('en_US', 'UTF-8')" $SERVER_LOG
+    if [ $? -ne 0 ]; then
+        cat $SERVER_LOG
+        echo -e "\n***\n*** Locale UTF-8 was not found in Triton logs. \n***"
+        RET=1
+    fi
+set -e
+
+rm $SERVER_LOG
+
+## Test re-extraction of environment.
+SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1 --model-control-mode=explicit"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+# The environment should be extracted
+curl -v -X POST localhost:8000/v2/repository/models/python_3_10/load
+touch -m models/python_3_10/1/model.py
+# The environment should not be re-extracted
+curl -v -X POST localhost:8000/v2/repository/models/python_3_10/load
+touch -m models/python_3_10/python_3_10_environment.tar.gz
+# The environment should be re-extracted
+curl -v -X POST localhost:8000/v2/repository/models/python_3_10/load
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+set +e
+
+PY310_ENV_EXTRACTION="Extracting Python execution env"
+if [ `grep -c "${PY310_ENV_EXTRACTION}" ${SERVER_LOG}` != "2" ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Python execution environment should be extracted exactly twice. \n***"
+    RET=1
+fi
+set -e
+
+# Test execution environments with S3
+# S3 credentials are necessary for this test. Pass via ENV variables
+aws configure set default.region $AWS_DEFAULT_REGION && \
+    aws configure set aws_access_key_id $AWS_ACCESS_KEY_ID && \
+    aws configure set aws_secret_access_key $AWS_SECRET_ACCESS_KEY
+
+# S3 bucket path (Point to bucket when testing cloud storage)
+BUCKET_URL="s3://triton-bucket-${CI_JOB_ID}"
+
+# Cleanup and delete S3 test bucket if it already exists (due to test failure)
+aws s3 rm $BUCKET_URL --recursive --include "*" && \
+    aws s3 rb $BUCKET_URL || true
+
+# Make S3 test bucket
+aws s3 mb "${BUCKET_URL}"
+
+# Remove Slash in BUCKET_URL
+BUCKET_URL=${BUCKET_URL%/}
+BUCKET_URL_SLASH="${BUCKET_URL}/"
+
+# Remove Python 3.7 model because it contains absolute paths and cannot be used
+# with S3.
+rm -rf models/python_3_7
+
+# Test with the bucket url as model repository
+aws s3 cp models/ "${BUCKET_URL_SLASH}" --recursive --include "*"
+
+rm $SERVER_LOG
+
+SERVER_ARGS="--model-repository=$BUCKET_URL_SLASH --log-verbose=1"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+set +e
+grep "$PY36_VERSION_STRING" $SERVER_LOG
+if [ $? -ne 0 ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** $PY36_VERSION_STRING was not found in Triton logs. \n***"
+    RET=1
+fi
+set -e
+
+# Clean up bucket contents
+aws s3 rm "${BUCKET_URL_SLASH}" --recursive --include "*"
+
+# Test with EXECUTION_ENV_PATH outside the model directory
+sed -i "s/TRITON_MODEL_DIRECTORY\/python_3_6_environment/TRITON_MODEL_DIRECTORY\/..\/python_3_6_environment/" models/python_3_6/config.pbtxt
+mv models/python_3_6/python_3_6_environment.tar.gz models
+sed -i "s/\$\$TRITON_MODEL_DIRECTORY\/python_3_10_environment/s3:\/\/triton-bucket-${CI_JOB_ID}\/python_3_10_environment/" models/python_3_10/config.pbtxt
+mv models/python_3_10/python_3_10_environment.tar.gz models
+
+aws s3 cp models/ "${BUCKET_URL_SLASH}" --recursive --include "*"
+
+rm $SERVER_LOG
+
+SERVER_ARGS="--model-repository=$BUCKET_URL_SLASH --log-verbose=1"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+set +e
+for EXPECTED_VERSION_STRING in "$PY36_VERSION_STRING" "$PY310_VERSION_STRING"; do
+    grep "$EXPECTED_VERSION_STRING" $SERVER_LOG
+    if [ $? -ne 0 ]; then
+        cat $SERVER_LOG
+        echo -e "\n***\n*** $EXPECTED_VERSION_STRING was not found in Triton logs. \n***"
+        RET=1
+    fi
+done
+set -e
+
+# Clean up bucket contents and delete bucket
+aws s3 rm "${BUCKET_URL_SLASH}" --recursive --include "*"
+aws s3 rb "${BUCKET_URL}"
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Env Manager Test PASSED.\n***"
+else
+  cat $SERVER_LOG
+  echo -e "\n***\n*** Env Manager Test FAILED.\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_backend_python/examples/test.sh b/qa/L0_backend_python/examples/test.sh
new file mode 100755
index 0000000000..ae8292cd0e
--- /dev/null
+++ b/qa/L0_backend_python/examples/test.sh
@@ -0,0 +1,441 @@
+#!/bin/bash
+# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+source ../common.sh
+source ../../common/util.sh
+
+TRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION:="http://github.com/triton-inference-server"}
+
+SERVER_ARGS="--model-repository=${MODELDIR}/examples/python_backend/models --backend-directory=${BACKEND_DIR} --log-verbose=1"
+SERVER_LOG="./examples_server.log"
+
+RET=0
+rm -fr *.log python_backend/
+
+# Install torch
+pip3 uninstall -y torch
+if [[ "$TEST_JETSON" == "0" ]] || [[ ${TEST_WINDOWS} == 0 ]]; then
+    pip3 install torch==2.0.0+cu117 -f https://download.pytorch.org/whl/torch_stable.html torchvision==0.15.0+cu117
+else
+    pip3 install torch==2.0.0 -f https://download.pytorch.org/whl/torch_stable.html torchvision==0.15.0
+fi
+
+# Install `validators` for Model Instance Kind example
+pip3 install validators
+
+# Install JAX
+if [ "$TEST_JETSON" == "0" ]; then
+    pip3 install --upgrade "jax[cuda12_local]" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
+fi
+
+git clone ${TRITON_REPO_ORGANIZATION}/python_backend -b $PYTHON_BACKEND_REPO_TAG
+cd python_backend
+
+# Example 1
+CLIENT_LOG="./examples_add_sub_client.log"
+mkdir -p models/add_sub/1/
+cp examples/add_sub/model.py models/add_sub/1/model.py
+cp examples/add_sub/config.pbtxt models/add_sub/config.pbtxt
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    RET=1
+fi
+
+set +e
+python3 examples/add_sub/client.py > $CLIENT_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed to verify add_sub example. \n***"
+    RET=1
+fi
+
+grep "PASS" $CLIENT_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed to verify add_sub example. \n***"
+    cat $CLIENT_LOG
+    RET=1
+fi
+set -e
+
+kill_server
+
+# Example 2
+CLIENT_LOG="./examples_pytorch_client.log"
+mkdir -p models/pytorch/1/
+cp examples/pytorch/model.py models/pytorch/1/model.py
+cp examples/pytorch/config.pbtxt models/pytorch/config.pbtxt
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    RET=1
+fi
+
+set +e
+python3 examples/pytorch/client.py > $CLIENT_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed to verify pytorch example. \n***"
+    RET=1
+fi
+
+grep "PASS" $CLIENT_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed to verify pytorch example. \n***"
+    cat $CLIENT_LOG
+    RET=1
+fi
+set -e
+
+kill_server
+
+# Example 3
+
+# JAX AddSub
+# JAX is not supported on Jetson
+if [ "$TEST_JETSON" == "0" ]; then
+    CLIENT_LOG="./examples_jax_client.log"
+    mkdir -p models/jax/1/
+    cp examples/jax/model.py models/jax/1/model.py
+    cp examples/jax/config.pbtxt models/jax/config.pbtxt
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        RET=1
+    fi
+
+    set +e
+    python3 examples/jax/client.py > $CLIENT_LOG
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed to verify jax example. \n***"
+        RET=1
+    fi
+
+    grep "PASS" $CLIENT_LOG
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed to verify jax example. \n***"
+        cat $CLIENT_LOG
+        RET=1
+    fi
+    set -e
+
+    kill_server
+fi
+
+# Example 4
+
+# BLS Sync
+CLIENT_LOG="./examples_sync_client.log"
+mkdir -p models/bls_sync/1
+cp examples/bls/sync_model.py models/bls_sync/1/model.py
+cp examples/bls/sync_config.pbtxt models/bls_sync/config.pbtxt
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    RET=1
+fi
+
+set +e
+python3 examples/bls/sync_client.py > $CLIENT_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed to verify BLS sync example. \n***"
+    RET=1
+fi
+
+grep "PASS" $CLIENT_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed to verify BLS sync example. \n***"
+    cat $CLIENT_LOG
+    RET=1
+fi
+set -e
+
+kill_server
+
+# Example 5
+
+# Decoupled Repeat
+CLIENT_LOG="./examples_repeat_client.log"
+mkdir -p models/repeat_int32/1/
+cp examples/decoupled/repeat_model.py models/repeat_int32/1/model.py
+cp examples/decoupled/repeat_config.pbtxt models/repeat_int32/config.pbtxt
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    RET=1
+fi
+
+set +e
+python3 examples/decoupled/repeat_client.py > $CLIENT_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed to verify repeat_int32 example. \n***"
+    RET=1
+fi
+
+grep "PASS" $CLIENT_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed to verify repeat_int32 example. \n***"
+    cat $CLIENT_LOG
+    RET=1
+fi
+set -e
+
+kill_server
+
+# Example 6
+
+# Decoupled Square
+CLIENT_LOG="./examples_square_client.log"
+mkdir -p models/square_int32/1/
+cp examples/decoupled/square_model.py models/square_int32/1/model.py
+cp examples/decoupled/square_config.pbtxt models/square_int32/config.pbtxt
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    RET=1
+fi
+
+set +e
+python3 examples/decoupled/square_client.py > $CLIENT_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed to verify square_int32 example. \n***"
+    RET=1
+fi
+
+grep "PASS" $CLIENT_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed to verify square_int32 example. \n***"
+    cat $CLIENT_LOG
+    RET=1
+fi
+set -e
+
+kill_server
+
+#
+# BLS Async
+#
+# Skip async BLS on Jetson since it is not supported with python3.6
+# Having multiple python versions lead to build issues.
+# Anaconda is not officially supported on Jetson.
+if [ "$TEST_JETSON" == "0" ]; then
+    CLIENT_LOG="./examples_async_client.log"
+    mkdir -p models/bls_async/1
+    cp examples/bls/async_model.py models/bls_async/1/model.py
+    cp examples/bls/async_config.pbtxt models/bls_async/config.pbtxt
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        RET=1
+    fi
+
+    set +e
+    python3 examples/bls/async_client.py > $CLIENT_LOG
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed to verify BLS async example. \n***"
+        RET=1
+    fi
+
+    grep "PASS" $CLIENT_LOG
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed to verify BLS async example. \n***"
+        cat $CLIENT_LOG
+        RET=1
+    fi
+
+    set -e
+
+    kill_server
+fi
+
+# Auto Complete Model Configuration Example
+CLIENT_LOG="./examples_auto_complete_client.log"
+mkdir -p models/nobatch_auto_complete/1/
+mkdir -p models/batch_auto_complete/1/
+cp examples/auto_complete/nobatch_model.py models/nobatch_auto_complete/1/model.py
+cp examples/auto_complete/batch_model.py models/batch_auto_complete/1/model.py
+
+SERVER_ARGS="$SERVER_ARGS --strict-model-config=false"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    RET=1
+fi
+
+set +e
+python3 examples/auto_complete/client.py > $CLIENT_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed to verify auto_complete example. \n***"
+    RET=1
+fi
+
+grep "PASS" $CLIENT_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed to verify auto_complete example. \n***"
+    cat $CLIENT_LOG
+    RET=1
+fi
+set -e
+
+kill_server
+
+# BLS Decoupled Sync
+CLIENT_LOG="./examples_bls_decoupled_sync_client.log"
+mkdir -p models/bls_decoupled_sync/1
+cp examples/bls_decoupled/sync_model.py models/bls_decoupled_sync/1/model.py
+cp examples/bls_decoupled/sync_config.pbtxt models/bls_decoupled_sync/config.pbtxt
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    RET=1
+fi
+
+set +e
+python3 examples/bls_decoupled/sync_client.py > $CLIENT_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed to verify BLS Decoupled Sync example. \n***"
+    RET=1
+fi
+
+grep "PASS" $CLIENT_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed to verify BLS Decoupled Sync example. \n***"
+    cat $CLIENT_LOG
+    RET=1
+fi
+set -e
+
+kill_server
+
+# BLS Decoupled Async
+if [ "$TEST_JETSON" == "0" ]; then
+    CLIENT_LOG="./examples_bls_decoupled_async_client.log"
+    mkdir -p models/bls_decoupled_async/1
+    cp examples/bls_decoupled/async_model.py models/bls_decoupled_async/1/model.py
+    cp examples/bls_decoupled/async_config.pbtxt models/bls_decoupled_async/config.pbtxt
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        RET=1
+    fi
+
+    set +e
+    python3 examples/bls_decoupled/async_client.py > $CLIENT_LOG
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed to verify BLS Decoupled Async example. \n***"
+        RET=1
+    fi
+
+    grep "PASS" $CLIENT_LOG
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed to verify BLS Decoupled Async example. \n***"
+        cat $CLIENT_LOG
+        RET=1
+    fi
+
+    set -e
+
+    kill_server
+fi
+
+# Example 7
+
+# Model Instance Kind
+CLIENT_LOG="./examples_model_instance_kind.log"
+mkdir -p models/resnet50/1
+cp examples/instance_kind/model.py models/resnet50/1/
+cp examples/instance_kind/config.pbtxt models/resnet50/
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    RET=1
+fi
+
+set +e
+python3 examples/instance_kind/client.py --label_file examples/instance_kind/resnet50_labels.txt > $CLIENT_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed to verify Model instance Kind example. \n***"
+    RET=1
+fi
+
+grep "PASS" $CLIENT_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed to verify Model Instance Kind example. Example failed to pass. \n***"
+    cat $CLIENT_LOG
+    RET=1
+fi
+set -e
+
+kill_server
+
+# Custom Metrics
+CLIENT_LOG="./examples_custom_metrics_client.log"
+mkdir -p models/custom_metrics/1
+cp examples/custom_metrics/model.py models/custom_metrics/1/model.py
+cp examples/custom_metrics/config.pbtxt models/custom_metrics/config.pbtxt
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    RET=1
+fi
+
+set +e
+python3 examples/custom_metrics/client.py > $CLIENT_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed to verify Custom Metrics example. \n***"
+    RET=1
+fi
+
+grep "PASS" $CLIENT_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed to verify Custom Metrics example. \n***"
+    cat $CLIENT_LOG
+    RET=1
+fi
+set -e
+
+kill_server
+
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Example verification test PASSED.\n***"
+else
+    echo -e "\n***\n*** Example verification test FAILED.\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_backend_python/io/io_test.py b/qa/L0_backend_python/io/io_test.py
new file mode 100755
index 0000000000..4269390735
--- /dev/null
+++ b/qa/L0_backend_python/io/io_test.py
@@ -0,0 +1,167 @@
+#!/usr/bin/env python3
+
+# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import sys
+
+sys.path.append("../../common")
+
+import itertools
+import queue
+import unittest
+from functools import partial
+
+import numpy as np
+import shm_util
+import tritonclient.grpc as grpcclient
+from tritonclient.utils import *
+
+TRIAL = os.getenv("TRIAL")
+
+# By default, find tritonserver on "localhost", but for windows tests
+# we overwrite the IP address with the TRITONSERVER_IPADDR envvar
+_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")
+
+
+class UserData:
+    def __init__(self):
+        self._completed_requests = queue.Queue()
+
+
+def callback(user_data, result, error):
+    if error:
+        user_data._completed_requests.put(error)
+    else:
+        user_data._completed_requests.put(result)
+
+
+class IOTest(unittest.TestCase):
+    def setUp(self):
+        self._shm_leak_detector = shm_util.ShmLeakDetector()
+        self._client = grpcclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8001")
+
+    def _run_ensemble_test(self, model_name):
+        user_data = UserData()
+        input0 = np.random.random([1000]).astype(np.float32)
+        # Use context manager to close client stream if any early exit occurs
+        with grpcclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8001") as client:
+            client.start_stream(callback=partial(callback, user_data))
+            # Each pair represents whether the corresponding model is in GPU or not.
+            gpu_flags = [(True, False), (True, False), (True, False)]
+            # Create iterable of all possible combinations of each model gpu location
+            # ex: (True, True, True), (True, True, False), (True, False, True), ...
+            combinations = itertools.product(*gpu_flags)
+            for model_1_in_gpu, model_2_in_gpu, model_3_in_gpu in combinations:
+                gpu_output = np.asarray(
+                    [model_1_in_gpu, model_2_in_gpu, model_3_in_gpu], dtype=bool
+                )
+                inputs = [
+                    grpcclient.InferInput(
+                        "INPUT0", input0.shape, np_to_triton_dtype(input0.dtype)
+                    ),
+                    grpcclient.InferInput(
+                        "GPU_OUTPUT",
+                        gpu_output.shape,
+                        np_to_triton_dtype(gpu_output.dtype),
+                    ),
+                ]
+                inputs[0].set_data_from_numpy(input0)
+                inputs[1].set_data_from_numpy(gpu_output)
+                client.async_stream_infer(model_name=model_name, inputs=inputs)
+                if TRIAL == "default":
+                    result = user_data._completed_requests.get()
+                    output0 = result.as_numpy("OUTPUT0")
+                    self.assertIsNotNone(output0)
+                    self.assertTrue(np.all(output0 == input0))
+                else:
+                    response_repeat = 2
+                    for _ in range(response_repeat):
+                        result = user_data._completed_requests.get()
+                        output0 = result.as_numpy("OUTPUT0")
+                        self.assertIsNotNone(output0)
+                        self.assertTrue(np.all(output0 == input0))
+
+    def test_ensemble_io(self):
+        model_name = "ensemble_io"
+
+        # FIXME: This test detects a decrease of 80 bytes, which fails inequality check:
+        # [ensemble_io] Shared memory leak detected: 1006976 (current) != 1007056 (prev).
+        # so Probe was modified to check for growth instead of inequality.
+        with self._shm_leak_detector.Probe():
+            self._run_ensemble_test(model_name)
+
+    def test_empty_gpu_output(self):
+        model_name = "dlpack_empty_output"
+        with self._shm_leak_detector.Probe():
+            input_data = np.array([[1.0]], dtype=np.float32)
+            inputs = [
+                grpcclient.InferInput(
+                    "INPUT", input_data.shape, np_to_triton_dtype(input_data.dtype)
+                )
+            ]
+            inputs[0].set_data_from_numpy(input_data)
+            result = self._client.infer(model_name, inputs)
+            output = result.as_numpy("OUTPUT")
+            self.assertIsNotNone(output)
+            self.assertEqual(output.size, 0)
+
+    def test_variable_gpu_output(self):
+        model_name = "variable_gpu_output"
+        with self._shm_leak_detector.Probe():
+            # Input is not important in this test
+            input_data = np.array([[1.0]], dtype=np.float32)
+            inputs = [
+                grpcclient.InferInput(
+                    "INPUT", input_data.shape, np_to_triton_dtype(input_data.dtype)
+                )
+            ]
+            inputs[0].set_data_from_numpy(input_data)
+            user_data = UserData()
+
+            # The test sends five requests to the model and the model returns five
+            # responses with different GPU output shapes
+            num_requests = 5
+            for _ in range(num_requests):
+                _ = self._client.async_infer(
+                    model_name=model_name,
+                    inputs=inputs,
+                    callback=partial(callback, user_data),
+                )
+
+            for i in range(num_requests):
+                result = user_data._completed_requests.get()
+                if result is InferenceServerException:
+                    self.assertTrue(False, result)
+                output = result.as_numpy("OUTPUT")
+                self.assertIsNotNone(output)
+                self.assertEqual(output.size, i + 1)
+                np.testing.assert_almost_equal(output, np.ones(i + 1) * (i + 1))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_backend_python/io/test.sh b/qa/L0_backend_python/io/test.sh
new file mode 100755
index 0000000000..1ef52cd46b
--- /dev/null
+++ b/qa/L0_backend_python/io/test.sh
@@ -0,0 +1,154 @@
+#!/bin/bash
+# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+UNITTEST_PY=./io_test.py
+CLIENT_LOG="./io_client.log"
+TEST_RESULT_FILE='test_results.txt'
+source ../common.sh
+source ../../common/util.sh
+
+SERVER_ARGS="--model-repository=${MODELDIR}/io/models --backend-directory=${BACKEND_DIR} --log-verbose=1"
+SERVER_LOG="./io_server.log"
+
+RET=0
+rm -fr *.log ./models
+
+pip3 uninstall -y torch
+pip3 install torch==1.13.0+cu117 -f https://download.pytorch.org/whl/torch_stable.html
+
+# IOTest.test_ensemble_io
+TRIALS="default decoupled"
+
+for trial in $TRIALS; do
+    export TRIAL=$trial
+    rm -rf ./models
+
+    if [ $trial = "default" ]; then
+        for i in {1..3}; do
+            model_name=dlpack_io_identity_$i
+            mkdir -p models/$model_name/1/
+            cp ../../python_models/dlpack_io_identity/model.py ./models/$model_name/1/
+            cp ../../python_models/dlpack_io_identity/config.pbtxt ./models/$model_name/
+            (cd models/$model_name && \
+                      sed -i "s/^name:.*/name: \"$model_name\"/" config.pbtxt)
+        done
+    else
+        for i in {1..3}; do
+            model_name=dlpack_io_identity_$i
+            mkdir -p models/$model_name/1/
+            cp ../../python_models/dlpack_io_identity_decoupled/model.py ./models/$model_name/1/
+            cp ../../python_models/dlpack_io_identity_decoupled/config.pbtxt ./models/$model_name/
+            (cd models/$model_name && \
+                      sed -i "s/^name:.*/name: \"$model_name\"/" config.pbtxt)
+        done
+    fi
+
+    mkdir -p models/ensemble_io/1/
+    cp ../../python_models/ensemble_io/config.pbtxt ./models/ensemble_io
+
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        RET=1
+    fi
+
+    set +e
+    SUBTEST="test_ensemble_io"
+    python3 -m pytest --junitxml=${SUBTEST}.${TRIAL}.report.xml ${UNITTEST_PY}::IOTest::${SUBTEST} >> ${CLIENT_LOG}.${SUBTEST}
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** IOTest.${SUBTEST} FAILED. \n***"
+        cat $CLIENT_LOG.${SUBTEST}
+        RET=1
+    fi
+    set -e
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+done
+
+# IOTest.test_empty_gpu_output
+rm -rf models && mkdir models
+mkdir -p models/dlpack_empty_output/1/
+cp ../../python_models/dlpack_empty_output/model.py ./models/dlpack_empty_output/1/
+cp ../../python_models/dlpack_empty_output/config.pbtxt ./models/dlpack_empty_output/
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    RET=1
+fi
+
+set +e
+SUBTEST="test_empty_gpu_output"
+python3 -m pytest --junitxml=${SUBTEST}.${TRIAL}.report.xml ${UNITTEST_PY}::IOTest::${SUBTEST} > ${CLIENT_LOG}.${SUBTEST}
+
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** IOTest.${SUBTEST} FAILED. \n***"
+    cat $CLIENT_LOG.${SUBTEST}
+    RET=1
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# IOTest.test_variable_gpu_output
+rm -rf models && mkdir models
+mkdir -p models/variable_gpu_output/1/
+cp ../../python_models/variable_gpu_output/model.py ./models/variable_gpu_output/1/
+cp ../../python_models/variable_gpu_output/config.pbtxt ./models/variable_gpu_output/
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    RET=1
+fi
+
+set +e
+SUBTEST="test_variable_gpu_output"
+python3 -m pytest --junitxml=${SUBTEST}.${TRIAL}.report.xml ${UNITTEST_PY}::IOTest::${SUBTEST} > ${CLIENT_LOG}.${SUBTEST}
+
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** IOTest.${SUBTEST} FAILED. \n***"
+    cat $CLIENT_LOG.${SUBTEST}
+    RET=1
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** IO test PASSED.\n***"
+else
+    echo -e "\n***\n*** IO test FAILED.\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_backend_python/lifecycle/lifecycle_test.py b/qa/L0_backend_python/lifecycle/lifecycle_test.py
new file mode 100755
index 0000000000..09c2449217
--- /dev/null
+++ b/qa/L0_backend_python/lifecycle/lifecycle_test.py
@@ -0,0 +1,252 @@
+#!/usr/bin/env python3
+
+# Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import sys
+
+sys.path.append("../../common")
+
+import queue
+import time
+import unittest
+from functools import partial
+
+import numpy as np
+import shm_util
+import tritonclient.grpc as grpcclient
+import tritonclient.http as httpclient
+from tritonclient.utils import *
+
+# By default, find tritonserver on "localhost", but for windows tests
+# we overwrite the IP address with the TRITONSERVER_IPADDR envvar
+_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")
+
+
+class UserData:
+    def __init__(self):
+        self._completed_requests = queue.Queue()
+
+
+def callback(user_data, result, error):
+    if error:
+        user_data._completed_requests.put(error)
+    else:
+        user_data._completed_requests.put(result)
+
+
+class LifecycleTest(unittest.TestCase):
+    def setUp(self):
+        self._shm_leak_detector = shm_util.ShmLeakDetector()
+
+    def test_error_code(self):
+        model_name = "error_code"
+        shape = [1, 1]
+        # [(Triton error, expected gRPC error message starting), ...]
+        errors = [
+            ("UNKNOWN", "[StatusCode.UNKNOWN]"),
+            ("INTERNAL", "[StatusCode.INTERNAL]"),
+            ("NOT_FOUND", "[StatusCode.NOT_FOUND]"),
+            ("INVALID_ARG", "[StatusCode.INVALID_ARGUMENT]"),
+            ("UNAVAILABLE", "[StatusCode.UNAVAILABLE]"),
+            ("UNSUPPORTED", "[StatusCode.UNIMPLEMENTED]"),
+            ("ALREADY_EXISTS", "[StatusCode.ALREADY_EXISTS]"),
+            ("CANCELLED", "[StatusCode.CANCELLED]"),
+            ("(default)", "[StatusCode.INTERNAL] unrecognized"),
+        ]
+        with self._shm_leak_detector.Probe() as shm_probe:
+            with grpcclient.InferenceServerClient(
+                f"{_tritonserver_ipaddr}:8001"
+            ) as client:
+                for error, expected_grpc_error_start in errors:
+                    input_data = np.array([[error]], dtype=np.object_)
+                    inputs = [
+                        grpcclient.InferInput(
+                            "ERROR_CODE", shape, np_to_triton_dtype(input_data.dtype)
+                        )
+                    ]
+                    inputs[0].set_data_from_numpy(input_data)
+                    with self.assertRaises(InferenceServerException) as e:
+                        client.infer(model_name, inputs)
+                    # e.g. [StatusCode.UNKNOWN] error code: TRITONSERVER_ERROR_UNKNOWN
+                    # e.g. [StatusCode.INTERNAL] unrecognized error code: (default)
+                    self.assertEqual(
+                        str(e.exception),
+                        expected_grpc_error_start + " error code: " + error,
+                    )
+
+    def test_execute_cancel(self):
+        model_name = "execute_cancel"
+        log_path = "lifecycle_server.log"
+        execute_delay = 4.0  # seconds
+        shape = [1, 1]
+        response = {"responded": False, "result": None, "error": None}
+
+        def callback(result, error):
+            response["responded"] = True
+            response["result"] = result
+            response["error"] = error
+
+        with self._shm_leak_detector.Probe() as shm_probe:
+            with grpcclient.InferenceServerClient(
+                f"{_tritonserver_ipaddr}:8001"
+            ) as client:
+                input_data = np.array([[execute_delay]], dtype=np.float32)
+                inputs = [
+                    grpcclient.InferInput(
+                        "EXECUTE_DELAY", shape, np_to_triton_dtype(input_data.dtype)
+                    )
+                ]
+                inputs[0].set_data_from_numpy(input_data)
+                exec_future = client.async_infer(model_name, inputs, callback)
+                time.sleep(2)  # ensure the request is executing
+                self.assertFalse(response["responded"])
+                exec_future.cancel()
+                time.sleep(2)  # ensure the cancellation is delivered
+                self.assertTrue(response["responded"])
+
+        self.assertEqual(response["result"], None)
+        self.assertIsInstance(response["error"], InferenceServerException)
+        self.assertEqual(response["error"].status(), "StatusCode.CANCELLED")
+        with open(log_path, mode="r", encoding="utf-8", errors="strict") as f:
+            log_text = f.read()
+            self.assertIn("[execute_cancel] Request not cancelled at 1.0 s", log_text)
+            self.assertIn("[execute_cancel] Request cancelled at ", log_text)
+
+    def test_batch_error(self):
+        # The execute_error model returns an error for the first and third
+        # request and successfully processes the second request. This is making
+        # sure that an error in a single request does not completely fail the
+        # batch.
+        model_name = "execute_error"
+        shape = [2, 2]
+        number_of_requests = 3
+        user_data = UserData()
+        triton_client = grpcclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8001")
+        triton_client.start_stream(callback=partial(callback, user_data))
+
+        with self._shm_leak_detector.Probe() as shm_probe:
+            input_datas = []
+            for i in range(number_of_requests):
+                input_data = np.random.randn(*shape).astype(np.float32)
+                input_datas.append(input_data)
+                inputs = [
+                    grpcclient.InferInput(
+                        "IN", input_data.shape, np_to_triton_dtype(input_data.dtype)
+                    )
+                ]
+                inputs[0].set_data_from_numpy(input_data)
+                triton_client.async_stream_infer(model_name=model_name, inputs=inputs)
+
+            for i in range(number_of_requests):
+                result = user_data._completed_requests.get()
+                if i == 0 or i == 2:
+                    self.assertIs(type(result), InferenceServerException)
+                    continue
+
+                print(result)
+                output_data = result.as_numpy("OUT")
+                self.assertIsNotNone(output_data, "error: expected 'OUT'")
+                self.assertTrue(
+                    np.array_equal(output_data, input_datas[i]),
+                    "error: expected output {} to match input {}".format(
+                        output_data, input_datas[i]
+                    ),
+                )
+
+    def test_infer_pymodel_error(self):
+        model_name = "wrong_model"
+        shape = [2, 2]
+
+        with self._shm_leak_detector.Probe() as shm_probe:
+            with httpclient.InferenceServerClient(
+                f"{_tritonserver_ipaddr}:8000"
+            ) as client:
+                input_data = (16384 * np.random.randn(*shape)).astype(np.uint32)
+                inputs = [
+                    httpclient.InferInput(
+                        "IN", input_data.shape, np_to_triton_dtype(input_data.dtype)
+                    )
+                ]
+                inputs[0].set_data_from_numpy(input_data)
+                try:
+                    client.infer(model_name, inputs)
+                except InferenceServerException as e:
+                    print(e.message())
+                    self.assertTrue(
+                        e.message().startswith(
+                            "Failed to process the request(s) for model instance"
+                        ),
+                        "Exception message is not correct",
+                    )
+                else:
+                    self.assertTrue(
+                        False, "Wrong exception raised or did not raise an exception"
+                    )
+
+    def test_incorrect_execute_return(self):
+        model_name = "execute_return_error"
+        shape = [1, 1]
+        with self._shm_leak_detector.Probe() as shm_probe:
+            with httpclient.InferenceServerClient(
+                f"{_tritonserver_ipaddr}:8000"
+            ) as client:
+                input_data = (5 * np.random.randn(*shape)).astype(np.float32)
+                inputs = [
+                    httpclient.InferInput(
+                        "INPUT", input_data.shape, np_to_triton_dtype(input_data.dtype)
+                    )
+                ]
+                inputs[0].set_data_from_numpy(input_data)
+
+                # The first request to this model will return None.
+                with self.assertRaises(InferenceServerException) as e:
+                    client.infer(model_name, inputs)
+
+                self.assertTrue(
+                    "Failed to process the request(s) for model instance "
+                    "'execute_return_error_0_0', message: Expected a list in the "
+                    "execute return" in str(e.exception),
+                    "Exception message is not correct.",
+                )
+
+                # The second inference request will return a list of None object
+                # instead of Python InferenceResponse objects.
+                with self.assertRaises(InferenceServerException) as e:
+                    client.infer(model_name, inputs)
+
+                self.assertTrue(
+                    "Failed to process the request(s) for model instance "
+                    "'execute_return_error_0_0', message: Expected an "
+                    "'InferenceResponse' object in the execute function return"
+                    " list" in str(e.exception),
+                    "Exception message is not correct.",
+                )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_backend_python/lifecycle/test.sh b/qa/L0_backend_python/lifecycle/test.sh
new file mode 100755
index 0000000000..dba4581ddd
--- /dev/null
+++ b/qa/L0_backend_python/lifecycle/test.sh
@@ -0,0 +1,207 @@
+#!/bin/bash
+# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+CLIENT_LOG="./lifecycle_client.log"
+TEST_RESULT_FILE='test_results.txt'
+source ../common.sh
+source ../../common/util.sh
+
+SERVER_ARGS="--model-repository=${MODELDIR}/lifecycle/models --backend-directory=${BACKEND_DIR} --log-verbose=1"
+SERVER_LOG="./lifecycle_server.log"
+
+RET=0
+rm -fr *.log ./models
+
+mkdir -p models/error_code/1/
+cp ../../python_models/error_code/model.py ./models/error_code/1/
+cp ../../python_models/error_code/config.pbtxt ./models/error_code/
+
+mkdir -p models/execute_cancel/1/
+cp ../../python_models/execute_cancel/model.py ./models/execute_cancel/1/
+cp ../../python_models/execute_cancel/config.pbtxt ./models/execute_cancel/
+
+mkdir -p models/execute_error/1/
+cp ../../python_models/execute_error/model.py ./models/execute_error/1/
+cp ../../python_models/execute_error/config.pbtxt ./models/execute_error/
+(cd models/execute_error && \
+          sed -i "s/^name:.*/name: \"execute_error\"/" config.pbtxt && \
+          sed -i "s/^max_batch_size:.*/max_batch_size: 8/" config.pbtxt && \
+          echo "dynamic_batching { preferred_batch_size: [8], max_queue_delay_microseconds: 12000000 }" >> config.pbtxt)
+
+mkdir -p models/execute_return_error/1/
+cp ../../python_models/execute_return_error/model.py ./models/execute_return_error/1/
+cp ../../python_models/execute_return_error/config.pbtxt ./models/execute_return_error/
+
+mkdir -p models/wrong_model/1/
+cp ../../python_models/wrong_model/model.py ./models/wrong_model/1/
+cp ../../python_models/wrong_model/config.pbtxt ./models/wrong_model/
+(cd models/wrong_model && \
+          sed -i "s/^name:.*/name: \"wrong_model\"/" config.pbtxt && \
+          sed -i "s/TYPE_FP32/TYPE_UINT32/g" config.pbtxt)
+
+prev_num_pages=`get_shm_pages`
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    RET=1
+fi
+
+set +e
+
+# Run this multiple times to catch any intermittent segfault.
+for i in {0..4}; do
+    python3 -m pytest --junitxml=lifecycle.iter${i}.report.xml lifecycle_test.py >> $CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** lifecycle_test.py FAILED. \n***"
+        RET=1
+    fi
+done
+
+set -e
+
+kill_server
+
+current_num_pages=`get_shm_pages`
+if [ $current_num_pages -ne $prev_num_pages ]; then
+    ls /dev/shm
+    echo -e "\n***\n*** Test Failed. Shared memory pages were not cleaned properly.
+Shared memory pages before starting triton equals to $prev_num_pages
+and shared memory pages after starting triton equals to $current_num_pages \n***"
+    RET=1
+fi
+
+# These models have errors in the initialization and finalization
+# steps and we want to ensure that correct error is being returned
+
+rm -rf models/
+mkdir -p models/init_error/1/
+cp ../../python_models/init_error/model.py ./models/init_error/1/
+cp ../../python_models/init_error/config.pbtxt ./models/init_error/
+
+set +e
+prev_num_pages=`get_shm_pages`
+run_server_nowait
+
+wait $SERVER_PID
+current_num_pages=`get_shm_pages`
+if [ $current_num_pages -ne $prev_num_pages ]; then
+    ls /dev/shm
+    echo -e "\n***\n*** Test Failed. Shared memory pages were not cleaned properly.
+Shared memory pages before starting triton equals to $prev_num_pages
+and shared memory pages after starting triton equals to $current_num_pages \n***"
+    RET=1
+fi
+
+grep "name 'lorem_ipsum' is not defined" $SERVER_LOG
+
+if [ $? -ne 0 ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** init_error model test failed \n***"
+    RET=1
+fi
+set -e
+
+# FIXME: Until we find a way to simulate Ctrl^C on windows, this
+# test will not pass.
+if [[ ${TEST_WINDOWS} == 0 ]]; then
+    rm -rf models/
+    mkdir -p models/fini_error/1/
+    cp ../../python_models/fini_error/model.py ./models/fini_error/1/
+    cp ../../python_models/fini_error/config.pbtxt ./models/fini_error/
+
+    prev_num_pages=`get_shm_pages`
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        RET=1
+    fi
+
+    kill_server
+
+    current_num_pages=`get_shm_pages`
+    if [ $current_num_pages -ne $prev_num_pages ]; then
+        cat $CLIENT_LOG
+        ls /dev/shm
+        echo -e "\n***\n*** Test Failed. Shared memory pages were not cleaned properly.
+    Shared memory pages before starting triton equals to $prev_num_pages
+    and shared memory pages after starting triton equals to $current_num_pages \n***"
+        RET=1
+    fi
+
+    set +e
+    grep "name 'undefined_variable' is not defined" $SERVER_LOG
+
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** fini_error model test failed \n***"
+        RET=1
+    fi
+    set -e
+fi
+
+rm -rf models/
+mkdir -p models/auto_complete_error/1/
+cp ../../python_models/auto_complete_error/model.py ./models/auto_complete_error/1/
+
+SERVER_ARGS="${SERVER_ARGS} --strict-model-config=false"
+
+set +e
+prev_num_pages=`get_shm_pages`
+run_server_nowait
+
+wait $SERVER_PID
+current_num_pages=`get_shm_pages`
+if [ $current_num_pages -ne $prev_num_pages ]; then
+    ls /dev/shm
+    echo -e "\n***\n*** Test Failed. Shared memory pages were not cleaned properly.
+Shared memory pages before starting triton equals to $prev_num_pages
+and shared memory pages after starting triton equals to $current_num_pages \n***"
+    RET=1
+fi
+
+set +e
+grep "name 'undefined_variable' is not defined" $SERVER_LOG
+
+if [ $? -ne 0 ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** auto_complete_error model test failed \n***"
+    RET=1
+fi
+set -e
+
+if [ $RET -eq 1 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Lifecycle test FAILED. \n***"
+else
+    echo -e "\n***\n*** Lifecycle test PASSED. \n***"
+fi
+
+exit $RET
diff --git a/qa/L0_backend_python/logging/logging_test.py b/qa/L0_backend_python/logging/logging_test.py
new file mode 100755
index 0000000000..6be3125478
--- /dev/null
+++ b/qa/L0_backend_python/logging/logging_test.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python3
+
+# Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import sys
+
+sys.path.append("../../common")
+import unittest
+
+import numpy as np
+import shm_util
+import tritonclient.http as httpclient
+from tritonclient.utils import *
+
+# By default, find tritonserver on "localhost", but for windows tests
+# we overwrite the IP address with the TRITONSERVER_IPADDR envvar
+_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")
+
+
+class LogTest(unittest.TestCase):
+    def setUp(self):
+        self._shm_leak_detector = shm_util.ShmLeakDetector()
+
+    def test_log_output(self):
+        model_name = "identity_fp32_logging"
+        with self._shm_leak_detector.Probe() as shm_probe:
+            with httpclient.InferenceServerClient(
+                f"{_tritonserver_ipaddr}:8000"
+            ) as client:
+                input_data = np.array([[1.0]], dtype=np.float32)
+                inputs = [
+                    httpclient.InferInput(
+                        "INPUT0", input_data.shape, np_to_triton_dtype(input_data.dtype)
+                    )
+                ]
+                inputs[0].set_data_from_numpy(input_data)
+                result = client.infer(model_name, inputs)
+                output0 = result.as_numpy("OUTPUT0")
+                self.assertIsNotNone(output0)
+                self.assertTrue(np.all(output0 == input_data))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_backend_python/logging/test.sh b/qa/L0_backend_python/logging/test.sh
new file mode 100755
index 0000000000..174f3e0140
--- /dev/null
+++ b/qa/L0_backend_python/logging/test.sh
@@ -0,0 +1,209 @@
+#!/bin/bash
+# Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+CLIENT_LOG="logging_client.log"
+TEST_RESULT_FILE="test_results.txt"
+LOG_TEST="logging_test.py"
+SERVER_LOG="./logging_server.log"
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+MODELSDIR=${MODELDIR}/logging/models
+source ../../common/util.sh
+
+function verify_log_counts () {
+  non_verbose_expected=$1
+  verbose_expected=$2
+
+  if [ `grep -c "Specific Msg!" $SERVER_LOG` != $non_verbose_expected ]; then
+    echo -e "\n***\n*** Test Failed: Specific Msg Count Incorrect\n***"
+    RET=1
+  fi
+  if [ `grep -c "Info Msg!" $SERVER_LOG` != $non_verbose_expected ]; then
+    echo -e "\n***\n*** Test Failed: Info Msg Count Incorrect\n***"
+    RET=1
+  fi
+  if [ `grep -c "Warning Msg!" $SERVER_LOG` != $non_verbose_expected ]; then
+    echo -e "\n***\n*** Test Failed: Warning Msg Count Incorrect\n***"
+    RET=1
+  fi
+  if [ `grep -c "Error Msg!" $SERVER_LOG` != $non_verbose_expected ]; then
+    echo -e "\n***\n*** Test Failed: Error Msg Count Incorrect\n***"
+    RET=1
+  fi
+  if [ `grep -c "Verbose Msg!" $SERVER_LOG` != $verbose_expected ]; then
+    echo -e "\n***\n*** Test Failed: Verbose Msg Count Incorrect\n***"
+    RET=1
+  fi
+}
+
+rm -f *.log
+
+# set up simple repository MODELBASE
+rm -fr ${MODELSDIR} && mkdir -p ${MODELSDIR} && \
+    python_model="identity_fp32_logging"
+    mkdir -p models/$python_model/1/
+    cp ../../python_models/${python_model}/config.pbtxt models/${python_model}/config.pbtxt
+    cp ../../python_models/${python_model}/model.py models/${python_model}/1/
+RET=0
+
+#Run Server with Default Log Settings
+SERVER_ARGS="--model-repository=${MODELSDIR} --backend-directory=${BACKEND_DIR}"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+SUBTEST="default"
+python3 -m pytest --junitxml=log_test.${SUBTEST}.report.xml ${LOG_TEST} >> ${CLIENT_LOG} 2>&1
+if [ $? -ne 0 ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    cat $CLIENT_LOG
+    RET=1
+fi
+set -e
+
+kill_server
+
+# Check if correct # log messages are present [ non-verbose-msg-cnt | verbose-msg-cnt ]
+# NOTE: Windows does not seem to have a way to send a true SIGINT signal
+# to tritonserver. Instead, it seems required to use taskkill.exe with /F (force)
+# to kill the running program. This means the server terminates immediately,
+# instead of shutting down how it would if Ctrl^C was invoked from the terminal.
+# To properly test functionality, we need a WAR. In the meantime, we will subtract
+# 1 from the expected values to account for the fact that no logs will be emitted
+# from the finalize function.
+if [[ ${TEST_WINDOWS} == 1 ]]; then
+    verify_log_counts 3 0
+else
+    verify_log_counts 4 0
+fi
+
+
+rm -f *.log
+#Run Server Enabling Verbose Messages
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+# Enable verbose logging
+code=`curl -s -w %{http_code} -o ./curl.out -d'{"log_verbose_level":1}' ${TRITONSERVER_IPADDR}:8000/v2/logging`
+
+if [ "$code" != "200" ]; then
+    cat ./curl.out
+    echo -e "\n***\n*** Test Failed: Could not Change Log Settings\n***"
+    RET=1
+fi
+
+SUBTEST="verbose"
+python3 -m pytest --junitxml=log_test.${SUBTEST}.report.xml ${LOG_TEST} >> ${CLIENT_LOG} 2>&1
+if [ $? -ne 0 ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    cat $CLIENT_LOG
+    RET=1
+fi
+set -e
+
+kill_server
+
+# Verbose only 3 because model must initialize before
+# log settings can be modified
+if [[ ${TEST_WINDOWS} == 1 ]]; then
+    verify_log_counts 3 2
+else
+    verify_log_counts 4 3
+fi
+
+rm -f *.log
+#Run Server Enabling Verbose Messages
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+# Disable all logging
+BOOL_PARAMS=${BOOL_PARAMS:="log_info log_warning log_error"}
+for BOOL_PARAM in $BOOL_PARAMS; do
+    # Attempt to use integer instead of bool
+    code=`curl -s -w %{http_code} -o ./curl.out -d'{"'"$BOOL_PARAM"'":false}' ${TRITONSERVER_IPADDR}:8000/v2/logging`
+    if [ "$code" != "200" ]; then
+        cat ./curl.out
+        echo -e "\n***\n*** Test Failed: Could not Change Log Settings\n***"
+        RET=1
+    fi
+done
+
+SUBTEST="disabled"
+python3 -m pytest --junitxml=log_test.${SUBTEST}.report.xml ${LOG_TEST} >> ${CLIENT_LOG} 2>&1
+if [ $? -ne 0 ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    cat $CLIENT_LOG
+    RET=1
+fi
+set -e
+
+kill_server
+
+# Will have 1 occurrence of each non-verbose log type
+# because the server must initialize before log settings
+# can be modified
+# Same count for both Unix and Windows because this does
+# not test log output in the finalize step.
+verify_log_counts 1 0
+
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Logging test PASSED. \n***"
+else
+    echo -e "\n***\n*** Logging test FAILED. \n***"
+fi
+
+exit $RET
diff --git a/qa/L0_backend_python/model_control/model_control_test.py b/qa/L0_backend_python/model_control/model_control_test.py
new file mode 100755
index 0000000000..9ccb73df4f
--- /dev/null
+++ b/qa/L0_backend_python/model_control/model_control_test.py
@@ -0,0 +1,87 @@
+#!/usr/bin/env python3
+
+# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import sys
+
+sys.path.append("../../common")
+
+import unittest
+
+import numpy as np
+import shm_util
+import tritonclient.http as httpclient
+from tritonclient.utils import *
+
+# By default, find tritonserver on "localhost", but for windows tests
+# we overwrite the IP address with the TRITONSERVER_IPADDR envvar
+_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")
+
+
+class ExplicitModelTest(unittest.TestCase):
+    def setUp(self):
+        self._shm_leak_detector = shm_util.ShmLeakDetector()
+
+    def send_identity_request(self, client, model_name):
+        inputs = []
+        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "FP32"))
+        input0_data = np.arange(start=0, stop=16, dtype=np.float32)
+        input0_data = np.expand_dims(input0_data, axis=0)
+        inputs[0].set_data_from_numpy(input0_data)
+
+        with self._shm_leak_detector.Probe() as shm_probe:
+            result = client.infer(
+                model_name=model_name,
+                inputs=inputs,
+                outputs=[httpclient.InferRequestedOutput("OUTPUT0")],
+            )
+        output_numpy = result.as_numpy("OUTPUT0")
+        self.assertTrue(np.all(input0_data == output_numpy))
+
+    def test_model_reload(self):
+        model_name = "identity_fp32"
+        ensemble_model_name = "simple_" + "identity_fp32"
+        with httpclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8000") as client:
+            for _ in range(5):
+                self.assertFalse(client.is_model_ready(model_name))
+                # Load the model before the ensemble model to make sure reloading the
+                # model works properly in Python backend.
+                client.load_model(model_name)
+                client.load_model(ensemble_model_name)
+                self.assertTrue(client.is_model_ready(model_name))
+                self.assertTrue(client.is_model_ready(ensemble_model_name))
+                self.send_identity_request(client, model_name)
+                self.send_identity_request(client, ensemble_model_name)
+                client.unload_model(ensemble_model_name)
+                client.unload_model(model_name)
+                self.assertFalse(client.is_model_ready(model_name))
+                self.assertFalse(client.is_model_ready(ensemble_model_name))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_backend_python/model_control/test.sh b/qa/L0_backend_python/model_control/test.sh
new file mode 100755
index 0000000000..e2c22f2685
--- /dev/null
+++ b/qa/L0_backend_python/model_control/test.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+CLIENT_LOG="./model_control_client.log"
+TEST_RESULT_FILE='test_results.txt'
+SERVER_ARGS="--model-repository=${MODELDIR}/model_control/models --model-control-mode=explicit --backend-directory=${BACKEND_DIR} --log-verbose=1"
+SERVER_LOG="./model_control_server.log"
+
+RET=0
+rm -fr *.log ./models
+
+source ../../common/util.sh
+
+mkdir -p models/identity_fp32/1/
+mkdir -p models/simple_identity_fp32/1/
+cp ../../python_models/identity_fp32/model.py ./models/identity_fp32/1/model.py
+cp ../../python_models/identity_fp32/config.pbtxt ./models/identity_fp32/config.pbtxt
+cp ../../python_models/simple_identity_fp32/config.pbtxt ./models/simple_identity_fp32/config.pbtxt
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python3 -m pytest --junitxml=model_control.report.xml model_control_test.py 2>&1 > $CLIENT_LOG
+
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** model_control_test.py FAILED. \n***"
+    RET=1
+fi
+set -e
+
+kill_server
+
+if [ $RET -eq 1 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** model_control_test FAILED. \n***"
+else
+    echo -e "\n***\n*** model_control_test PASSED. \n***"
+fi
+
+exit $RET
diff --git a/qa/L0_backend_python/python_based_backends/python_based_backends_test.py b/qa/L0_backend_python/python_based_backends/python_based_backends_test.py
new file mode 100644
index 0000000000..24051e5217
--- /dev/null
+++ b/qa/L0_backend_python/python_based_backends/python_based_backends_test.py
@@ -0,0 +1,150 @@
+# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import sys
+import unittest
+from random import randint
+
+import numpy as np
+import tritonclient.grpc as grpcclient
+from tritonclient.utils import *
+
+sys.path.append("../../common")
+
+# By default, find tritonserver on "localhost", but for windows tests
+# we overwrite the IP address with the TRITONSERVER_IPADDR envvar
+_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")
+
+
+class PythonBasedBackendsTest(unittest.TestCase):
+    def setUp(self):
+        self.triton_client = grpcclient.InferenceServerClient(
+            url=f"{_tritonserver_ipaddr}:8001"
+        )
+        self.add_sub_model_1 = "add"
+        self.add_sub_model_2 = "sub"
+        self.python_model = "add_sub"
+        self.pytorch_model = "add_sub_pytorch"
+
+        self.triton_client.load_model(
+            self.add_sub_model_1,
+            config='{"backend":"add_sub","version_policy":{"latest":{"num_versions":2}}}',
+        )
+        self.triton_client.load_model(self.add_sub_model_2)
+        self.triton_client.load_model(self.python_model)
+        self.triton_client.load_model(self.pytorch_model)
+
+    def test_add_sub_models(self):
+        self.assertTrue(
+            self.triton_client.is_model_ready(self.add_sub_model_1, model_version="2")
+        )
+        self._test_add_sub_model(
+            model_name=self.add_sub_model_1, model_version="2", single_output=True
+        )
+
+        self.assertTrue(
+            self.triton_client.is_model_ready(self.add_sub_model_1, model_version="1")
+        )
+        self._test_add_sub_model(
+            model_name=self.add_sub_model_1, model_version="1", single_output=True
+        )
+
+        self.assertTrue(self.triton_client.is_model_ready(self.add_sub_model_2))
+        self._test_add_sub_model(model_name=self.add_sub_model_2, single_output=True)
+
+    def test_python_model(self):
+        self.assertTrue(
+            self.triton_client.is_model_ready(self.python_model, model_version="2")
+        )
+        self._test_add_sub_model(
+            model_name=self.python_model, shape=[16], model_version="2"
+        )
+
+    def test_pytorch_model(self):
+        self.assertTrue(
+            self.triton_client.is_model_ready(self.pytorch_model, model_version="1")
+        )
+        self._test_add_sub_model(model_name=self.pytorch_model)
+
+    def _test_add_sub_model(
+        self, model_name, model_version="1", shape=[4], single_output=False
+    ):
+        input0_data = np.random.rand(*shape).astype(np.float32)
+        input1_data = np.random.rand(*shape).astype(np.float32)
+
+        inputs = [
+            grpcclient.InferInput(
+                "INPUT0", input0_data.shape, np_to_triton_dtype(input0_data.dtype)
+            ),
+            grpcclient.InferInput(
+                "INPUT1", input1_data.shape, np_to_triton_dtype(input1_data.dtype)
+            ),
+        ]
+
+        inputs[0].set_data_from_numpy(input0_data)
+        inputs[1].set_data_from_numpy(input1_data)
+
+        if single_output:
+            outputs = [grpcclient.InferRequestedOutput("OUTPUT")]
+
+        else:
+            outputs = [
+                grpcclient.InferRequestedOutput("OUTPUT0"),
+                grpcclient.InferRequestedOutput("OUTPUT1"),
+            ]
+
+        response = self.triton_client.infer(
+            model_name=model_name,
+            inputs=inputs,
+            model_version=model_version,
+            request_id=str(randint(10, 99)),
+            outputs=outputs,
+        )
+
+        if single_output:
+            if model_name == "add":
+                self.assertTrue(
+                    np.allclose(input0_data + input1_data, response.as_numpy("OUTPUT"))
+                )
+            else:
+                self.assertTrue(
+                    np.allclose(input0_data - input1_data, response.as_numpy("OUTPUT"))
+                )
+        else:
+            self.assertTrue(
+                np.allclose(input0_data + input1_data, response.as_numpy("OUTPUT0"))
+            )
+            self.assertTrue(
+                np.allclose(input0_data - input1_data, response.as_numpy("OUTPUT1"))
+            )
+
+    def tearDown(self):
+        self.triton_client.close()
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_backend_python/python_based_backends/test.sh b/qa/L0_backend_python/python_based_backends/test.sh
new file mode 100755
index 0000000000..c6d55d6ed3
--- /dev/null
+++ b/qa/L0_backend_python/python_based_backends/test.sh
@@ -0,0 +1,102 @@
+#!/bin/bash
+# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+source ../../common/util.sh
+
+QA_MODELS_PATH="../../python_models"
+MODEL_REPOSITORY="${MODELDIR}/python_based_backends/models"
+SERVER_ARGS="--model-repository=${MODEL_REPOSITORY} --backend-directory=${BACKEND_DIR} --model-control-mode=explicit --log-verbose=1"
+SERVER_LOG="./python_based_backends_server.log"
+CLIENT_LOG="./python_based_backends_client.log"
+TEST_RESULT_FILE="./test_results.txt"
+CLIENT_PY="./python_based_backends_test.py"
+GEN_PYTORCH_MODEL_PY="../../common/gen_qa_pytorch_model.py"
+RET=0
+
+rm -rf ${MODEL_REPOSITORY}
+pip3 install torch
+
+# Setup add_sub backend and models
+mkdir -p ${BACKEND_DIR}/add_sub
+cp ${QA_MODELS_PATH}/python_based_backends/add_sub_backend/model.py ${BACKEND_DIR}/add_sub/model.py
+
+mkdir -p ${MODEL_REPOSITORY}/add/1/
+echo '{ "operation": "add" }' > ${MODEL_REPOSITORY}/add/1/model.json
+echo "backend: \"add_sub\"" > ${MODEL_REPOSITORY}/add/config.pbtxt
+cp -r ${MODEL_REPOSITORY}/add/1/ ${MODEL_REPOSITORY}/add/2/
+
+mkdir -p ${MODEL_REPOSITORY}/sub/1/
+echo '{ "operation": "sub" }' > ${MODEL_REPOSITORY}/sub/1/model.json
+echo "backend: \"add_sub\"" > ${MODEL_REPOSITORY}/sub/config.pbtxt
+
+# Setup python backend model
+mkdir -p ${MODEL_REPOSITORY}/add_sub/1
+cp ${QA_MODELS_PATH}/add_sub/model.py ${MODEL_REPOSITORY}/add_sub/1/
+cp ${QA_MODELS_PATH}/add_sub/config.pbtxt ${MODEL_REPOSITORY}/add_sub/
+cp -r ${MODEL_REPOSITORY}/add_sub/1/ ${MODEL_REPOSITORY}/add_sub/2/
+
+# Setup pytorch backend model
+cp ${GEN_PYTORCH_MODEL_PY} ./gen_qa_pytorch_model.py
+GEN_PYTORCH_MODEL_PY=./gen_qa_pytorch_model.py
+
+set +e
+python3 ${GEN_PYTORCH_MODEL_PY} -m ${MODEL_REPOSITORY}
+
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Running ${GEN_PYTORCH_MODEL_PY} FAILED. \n***"
+    exit 1
+fi
+set -e
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    exit 1
+fi
+
+set +e
+python3 -m pytest --junitxml=python_based_backends.report.xml ${CLIENT_PY} -v > ${CLIENT_LOG} 2>&1
+
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Running ${CLIENT_PY} FAILED. \n***"
+    RET=1
+fi
+set -e
+
+kill_server
+rm -rf ${MODEL_REPOSITORY} ${GEN_PYTORCH_MODEL_PY}
+
+if [ $RET -eq 1 ]; then
+    cat $CLIENT_LOG
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Python-based Backends test FAILED. \n***"
+else
+    echo -e "\n***\n*** Python-based Backends test PASSED. \n***"
+fi
+
+exit $RET
diff --git a/qa/L0_backend_python/python_test.py b/qa/L0_backend_python/python_test.py
new file mode 100755
index 0000000000..a7a44608cf
--- /dev/null
+++ b/qa/L0_backend_python/python_test.py
@@ -0,0 +1,521 @@
+#!/usr/bin/python
+
+# Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import os
+import unittest
+
+import numpy as np
+import requests as httpreq
+import shm_util
+import tritonclient.http as httpclient
+from tritonclient.utils import *
+
+# By default, find tritonserver on "localhost", but for windows tests
+# we overwrite the IP address with the TRITONSERVER_IPADDR envvar
+_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")
+
+_test_jetson = bool(int(os.environ.get("TEST_JETSON", 0)))
+_test_windows = bool(int(os.environ.get("TEST_WINDOWS", 0)))
+
+
+class PythonTest(unittest.TestCase):
+    def setUp(self):
+        self._shm_leak_detector = shm_util.ShmLeakDetector()
+
+    def _infer_help(self, model_name, shape, data_type):
+        with httpclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8000") as client:
+            input_data_0 = np.array(np.random.randn(*shape), dtype=data_type)
+            inputs = [
+                httpclient.InferInput(
+                    "INPUT0", shape, np_to_triton_dtype(input_data_0.dtype)
+                )
+            ]
+            inputs[0].set_data_from_numpy(input_data_0)
+
+            result = client.infer(model_name, inputs)
+            output0 = result.as_numpy("OUTPUT0")
+            self.assertTrue(np.all(input_data_0 == output0))
+
+    def _create_cuda_region(self, client, size, name):
+        import tritonclient.utils.cuda_shared_memory as cuda_shared_memory
+
+        shm0_handle = cuda_shared_memory.create_shared_memory_region(
+            name, byte_size=size, device_id=0
+        )
+        client.register_cuda_shared_memory(
+            name, cuda_shared_memory.get_raw_handle(shm0_handle), 0, size
+        )
+        return shm0_handle
+
+    def _optional_input_infer(self, model_name, has_input0, has_input1):
+        with httpclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8000") as client:
+            shape = (1,)
+            if has_input0:
+                input0_numpy = np.random.randint(0, 100, size=shape, dtype=np.int32)
+            else:
+                # Set the input0 to a default value if it is optional. This is
+                # the input used by the model if it is not provided.
+                input0_numpy = np.array([5], dtype=np.int32)
+
+            if has_input1:
+                input1_numpy = np.random.randint(0, 100, size=shape, dtype=np.int32)
+            else:
+                # Set the input1 to a default value if it is optional. This is
+                # the input used by the model if it is not provided.
+                input1_numpy = np.array([5], dtype=np.int32)
+
+            inputs = []
+            if has_input0:
+                inputs.append(
+                    httpclient.InferInput(
+                        "INPUT0", shape, np_to_triton_dtype(input0_numpy.dtype)
+                    )
+                )
+                inputs[-1].set_data_from_numpy(input0_numpy)
+
+            if has_input1:
+                inputs.append(
+                    httpclient.InferInput(
+                        "INPUT1", shape, np_to_triton_dtype(input1_numpy.dtype)
+                    )
+                )
+                inputs[-1].set_data_from_numpy(input1_numpy)
+
+            result = client.infer(model_name, inputs)
+            output0 = result.as_numpy("OUTPUT0")
+            self.assertIsNotNone(output0, "OUTPUT0 was not found.")
+
+            output1 = result.as_numpy("OUTPUT1")
+            self.assertIsNotNone(output1, "OUTPUT1 was not found.")
+
+            expected_output0 = input0_numpy + input1_numpy
+            expected_output1 = input0_numpy - input1_numpy
+            np.testing.assert_equal(
+                output0, expected_output0, "OUTPUT0 doesn't match expected OUTPUT0"
+            )
+            np.testing.assert_equal(
+                output1, expected_output1, "OUTPUT1 doesn't match expected OUTPUT1"
+            )
+
+    def test_growth_error(self):
+        # NOTE: Windows tests are not running in a docker container. Consequently, we
+        # do not specify a --shm-size to use a basis to grow. Therefore, this test does
+        # not apply for Windows.
+        if not _test_windows:
+            # 2 MiBs
+            total_byte_size = 2 * 1024 * 1024
+            shape = [total_byte_size]
+            model_name = "identity_uint8_nobatch"
+            dtype = np.uint8
+            with self._shm_leak_detector.Probe() as shm_probe:
+                self._infer_help(model_name, shape, dtype)
+
+            # 1 GiB payload leads to error in the main Python backend process.
+            # Total shared memory available is 1GiB.
+            total_byte_size = 1024 * 1024 * 1024
+            shape = [total_byte_size]
+            with self.assertRaises(InferenceServerException) as ex:
+                self._infer_help(model_name, shape, dtype)
+            self.assertIn(
+                "Failed to increase the shared memory pool size", str(ex.exception)
+            )
+
+            # 512 MiBs payload leads to error in the Python stub process.
+            total_byte_size = 512 * 1024 * 1024
+            shape = [total_byte_size]
+            with self.assertRaises(InferenceServerException) as ex:
+                self._infer_help(model_name, shape, dtype)
+            self.assertIn(
+                "Failed to increase the shared memory pool size", str(ex.exception)
+            )
+
+            # 2 MiBs
+            # Send a small paylaod to make sure it is still working properly
+            total_byte_size = 2 * 1024 * 1024
+            shape = [total_byte_size]
+            with self._shm_leak_detector.Probe() as shm_probe:
+                self._infer_help(model_name, shape, dtype)
+
+    # GPU tensors are not supported on jetson
+    # CUDA Shared memory is not supported on jetson
+    if not _test_jetson and not _test_windows:
+
+        def test_gpu_tensor_error(self):
+            import tritonclient.utils.cuda_shared_memory as cuda_shared_memory
+
+            model_name = "identity_bool"
+            with self._shm_leak_detector.Probe() as shm_probe:
+                with httpclient.InferenceServerClient(
+                    f"{_tritonserver_ipaddr}:8000"
+                ) as client:
+                    input_data = np.array([[True] * 1000], dtype=bool)
+                    inputs = [
+                        httpclient.InferInput(
+                            "INPUT0",
+                            input_data.shape,
+                            np_to_triton_dtype(input_data.dtype),
+                        )
+                    ]
+                    inputs[0].set_data_from_numpy(input_data)
+
+                    requested_outputs = [httpclient.InferRequestedOutput("OUTPUT0")]
+
+                    # intentionally create a shared memory region with not enough size.
+                    client.unregister_cuda_shared_memory()
+                    shm0_handle = self._create_cuda_region(client, 1, "output0_data")
+
+                    requested_outputs[0].set_shared_memory("output0_data", 1)
+                    with self.assertRaises(InferenceServerException) as ex:
+                        client.infer(model_name, inputs, outputs=requested_outputs)
+                    self.assertIn(
+                        "should be at least 1000 bytes to hold the results",
+                        str(ex.exception),
+                    )
+                    client.unregister_cuda_shared_memory()
+                    cuda_shared_memory.destroy_shared_memory_region(shm0_handle)
+
+        def test_dlpack_tensor_error(self):
+            import tritonclient.utils.cuda_shared_memory as cuda_shared_memory
+
+            model_name = "dlpack_identity"
+            with self._shm_leak_detector.Probe() as shm_probe:
+                with httpclient.InferenceServerClient(
+                    f"{_tritonserver_ipaddr}:8000"
+                ) as client:
+                    input_data = np.array([[1] * 1000], dtype=np.float32)
+                    inputs = [
+                        httpclient.InferInput(
+                            "INPUT0",
+                            input_data.shape,
+                            np_to_triton_dtype(input_data.dtype),
+                        )
+                    ]
+
+                    requested_outputs = [httpclient.InferRequestedOutput("OUTPUT0")]
+                    input_data_size = input_data.itemsize * input_data.size
+                    client.unregister_cuda_shared_memory()
+                    input_region = self._create_cuda_region(
+                        client, input_data_size, "input0_data"
+                    )
+                    inputs[0].set_shared_memory("input0_data", input_data_size)
+                    cuda_shared_memory.set_shared_memory_region(
+                        input_region, [input_data]
+                    )
+
+                    # Intentionally create a small region to trigger an error
+                    shm0_handle = self._create_cuda_region(client, 1, "output0_data")
+                    requested_outputs[0].set_shared_memory("output0_data", 1)
+
+                    with self.assertRaises(InferenceServerException) as ex:
+                        client.infer(model_name, inputs, outputs=requested_outputs)
+                    self.assertIn(
+                        "should be at least 4000 bytes to hold the results",
+                        str(ex.exception),
+                    )
+                    client.unregister_cuda_shared_memory()
+                    cuda_shared_memory.destroy_shared_memory_region(shm0_handle)
+
+    def test_async_infer(self):
+        model_name = "identity_uint8"
+        request_parallelism = 4
+        shape = [2, 2]
+
+        with self._shm_leak_detector.Probe() as shm_probe:
+            with httpclient.InferenceServerClient(
+                f"{_tritonserver_ipaddr}:8000", concurrency=request_parallelism
+            ) as client:
+                input_datas = []
+                requests = []
+                for i in range(request_parallelism):
+                    input_data = (16384 * np.random.randn(*shape)).astype(np.uint8)
+                    input_datas.append(input_data)
+                    inputs = [
+                        httpclient.InferInput(
+                            "INPUT0",
+                            input_data.shape,
+                            np_to_triton_dtype(input_data.dtype),
+                        )
+                    ]
+                    inputs[0].set_data_from_numpy(input_data)
+                    requests.append(client.async_infer(model_name, inputs))
+
+                for i in range(request_parallelism):
+                    # Get the result from the initiated asynchronous inference request.
+                    # Note the call will block till the server responds.
+                    results = requests[i].get_result()
+
+                    output_data = results.as_numpy("OUTPUT0")
+                    self.assertIsNotNone(output_data, "error: expected 'OUTPUT0'")
+                    self.assertTrue(
+                        np.array_equal(output_data, input_datas[i]),
+                        "error: expected output {} to match input {}".format(
+                            output_data, input_datas[i]
+                        ),
+                    )
+
+                # Make sure the requests ran in parallel.
+                stats = client.get_inference_statistics(model_name)
+                test_cond = (len(stats["model_stats"]) != 1) or (
+                    stats["model_stats"][0]["name"] != model_name
+                )
+                self.assertFalse(
+                    test_cond, "error: expected statistics for {}".format(model_name)
+                )
+
+                stat = stats["model_stats"][0]
+                self.assertFalse(
+                    (stat["inference_count"] != 8) or (stat["execution_count"] != 1),
+                    "error: expected execution_count == 1 and inference_count == 8, got {} and {}".format(
+                        stat["execution_count"], stat["inference_count"]
+                    ),
+                )
+                batch_stat = stat["batch_stats"][0]
+                self.assertFalse(
+                    batch_stat["batch_size"] != 8,
+                    f"error: expected batch_size == 8, got {batch_stat['batch_size']}",
+                )
+                # Check metrics to make sure they are reported correctly
+                metrics = httpreq.get(f"http://{_tritonserver_ipaddr}:8002/metrics")
+                print(metrics.text)
+
+                success_str = (
+                    'nv_inference_request_success{model="identity_uint8",version="1"}'
+                )
+                infer_count_str = (
+                    'nv_inference_count{model="identity_uint8",version="1"}'
+                )
+                infer_exec_str = (
+                    'nv_inference_exec_count{model="identity_uint8",version="1"}'
+                )
+
+                success_val = None
+                infer_count_val = None
+                infer_exec_val = None
+                for line in metrics.text.splitlines():
+                    if line.startswith(success_str):
+                        success_val = float(line[len(success_str) :])
+                    if line.startswith(infer_count_str):
+                        infer_count_val = float(line[len(infer_count_str) :])
+                    if line.startswith(infer_exec_str):
+                        infer_exec_val = float(line[len(infer_exec_str) :])
+
+                self.assertFalse(
+                    success_val != 4,
+                    "error: expected metric {} == 4, got {}".format(
+                        success_str, success_val
+                    ),
+                )
+                self.assertFalse(
+                    infer_count_val != 8,
+                    "error: expected metric {} == 8, got {}".format(
+                        infer_count_str, infer_count_val
+                    ),
+                )
+                self.assertFalse(
+                    infer_exec_val != 1,
+                    "error: expected metric {} == 1, got {}".format(
+                        infer_exec_str, infer_exec_val
+                    ),
+                )
+
+    def test_bool(self):
+        model_name = "identity_bool"
+        with self._shm_leak_detector.Probe() as shm_probe:
+            with httpclient.InferenceServerClient(
+                f"{_tritonserver_ipaddr}:8000"
+            ) as client:
+                input_data = np.array([[True, False, True]], dtype=bool)
+                inputs = [
+                    httpclient.InferInput(
+                        "INPUT0", input_data.shape, np_to_triton_dtype(input_data.dtype)
+                    )
+                ]
+                inputs[0].set_data_from_numpy(input_data)
+                result = client.infer(model_name, inputs)
+                output0 = result.as_numpy("OUTPUT0")
+                self.assertIsNotNone(output0)
+                self.assertTrue(np.all(output0 == input_data))
+
+    def test_infer_pytorch(self):
+        # FIXME: This model requires torch. Because windows tests are not run in a docker
+        # environment with torch installed, we need to think about how we want to install
+        # the package. Do we install it on the runners? Within the model?
+        if not _test_windows:
+            model_name = "pytorch_fp32_fp32"
+            shape = [1, 1, 28, 28]
+            with self._shm_leak_detector.Probe() as shm_probe:
+                with httpclient.InferenceServerClient(
+                    f"{_tritonserver_ipaddr}:8000"
+                ) as client:
+                    input_data = np.zeros(shape, dtype=np.float32)
+                    inputs = [
+                        httpclient.InferInput(
+                            "IN", input_data.shape, np_to_triton_dtype(input_data.dtype)
+                        )
+                    ]
+                    inputs[0].set_data_from_numpy(input_data)
+                    result = client.infer(model_name, inputs)
+                    output_data = result.as_numpy("OUT")
+                    self.assertIsNotNone(output_data, "error: expected 'OUT'")
+
+                    # expected inference response from a zero tensor
+                    expected_result = [
+                        -2.2377274,
+                        -2.3976364,
+                        -2.2464046,
+                        -2.2790744,
+                        -2.3828976,
+                        -2.2940576,
+                        -2.2928185,
+                        -2.340665,
+                        -2.275219,
+                        -2.292135,
+                    ]
+                    self.assertTrue(
+                        np.allclose(output_data[0], expected_result),
+                        "Inference result is not correct",
+                    )
+
+    def test_init_args(self):
+        model_name = "init_args"
+        shape = [2, 2]
+        with self._shm_leak_detector.Probe() as shm_probe:
+            with httpclient.InferenceServerClient(
+                f"{_tritonserver_ipaddr}:8000"
+            ) as client:
+                input_data = np.zeros(shape, dtype=np.float32)
+                inputs = [
+                    httpclient.InferInput(
+                        "IN", input_data.shape, np_to_triton_dtype(input_data.dtype)
+                    )
+                ]
+                inputs[0].set_data_from_numpy(input_data)
+                result = client.infer(model_name, inputs)
+                # output response in this model is the number of keys in the args
+                self.assertTrue(
+                    result.as_numpy("OUT") == 7,
+                    "Number of keys in the init args is not correct",
+                )
+
+    def test_unicode(self):
+        model_name = "string"
+        shape = [1]
+
+        # The first run will use np.bytes_ and the second run will use
+        # np.object_
+        for i in range(2):
+            with self._shm_leak_detector.Probe() as shm_probe:
+                with httpclient.InferenceServerClient(
+                    f"{_tritonserver_ipaddr}:8000"
+                ) as client:
+                    utf8 = "😀"
+                    input_data = np.array(
+                        [bytes(utf8, encoding="utf-8")], dtype=np.bytes_
+                    )
+                    inputs = [
+                        httpclient.InferInput(
+                            "INPUT0", shape, np_to_triton_dtype(input_data.dtype)
+                        )
+                    ]
+                    inputs[0].set_data_from_numpy(input_data)
+                    result = client.infer(model_name, inputs)
+                    output0 = result.as_numpy("OUTPUT0")
+                    self.assertIsNotNone(output0)
+                    self.assertEqual(output0[0], input_data)
+
+    def test_optional_input(self):
+        model_name = "optional"
+
+        with self._shm_leak_detector.Probe() as shm_probe:
+            for has_input0 in [True, False]:
+                for has_input1 in [True, False]:
+                    self._optional_input_infer(model_name, has_input0, has_input1)
+
+    def test_string(self):
+        model_name = "string_fixed"
+        shape = [1]
+
+        # Test different string outputs. This test will send 4 requests to the
+        # backend. The model will return 4 responses (np.object_ and np.bytes) *
+        # (empty output and fixed output)
+        for i in range(4):
+            with self._shm_leak_detector.Probe() as shm_probe:
+                with httpclient.InferenceServerClient(
+                    f"{_tritonserver_ipaddr}:8000"
+                ) as client:
+                    input_data = np.array(["123456"], dtype=np.object_)
+                    inputs = [
+                        httpclient.InferInput(
+                            "INPUT0", shape, np_to_triton_dtype(input_data.dtype)
+                        )
+                    ]
+                    inputs[0].set_data_from_numpy(input_data)
+                    result = client.infer(model_name, inputs)
+                    output0 = result.as_numpy("OUTPUT0")
+                    self.assertIsNotNone(output0)
+
+                    if i % 2 == 0:
+                        self.assertEqual(output0[0], input_data.astype(np.bytes_))
+                    else:
+                        self.assertEqual(output0.size, 0)
+
+    def test_non_contiguous(self):
+        model_name = "non_contiguous"
+        shape = [2, 10, 11, 6, 5]
+        new_shape = [10, 2, 6, 5, 11]
+        shape_reorder = [1, 0, 4, 2, 3]
+        with self._shm_leak_detector.Probe() as shm_probe:
+            with httpclient.InferenceServerClient(
+                f"{_tritonserver_ipaddr}:8000"
+            ) as client:
+                input_numpy = np.random.rand(*shape)
+                input_numpy = input_numpy.astype(np.float32)
+                inputs = [
+                    httpclient.InferInput(
+                        "INPUT0", shape, np_to_triton_dtype(input_numpy.dtype)
+                    )
+                ]
+                inputs[0].set_data_from_numpy(input_numpy)
+                result = client.infer(model_name, inputs)
+                output0 = input_numpy.reshape(new_shape)
+
+                # Transpose the tensor to create a non-contiguous tensor.
+                output1 = input_numpy.T
+                output2 = np.transpose(input_numpy, shape_reorder)
+
+                self.assertTrue(np.all(output0 == result.as_numpy("OUTPUT0")))
+                self.assertTrue(np.all(output1 == result.as_numpy("OUTPUT1")))
+                self.assertTrue(np.all(output2 == result.as_numpy("OUTPUT2")))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_backend_python/python_unittest.py b/qa/L0_backend_python/python_unittest.py
new file mode 100755
index 0000000000..4b94996976
--- /dev/null
+++ b/qa/L0_backend_python/python_unittest.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python3
+
+# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../../common")
+
+import os
+import unittest
+
+import shm_util
+import tritonclient.grpc as grpcclient
+from tritonclient.utils import *
+
+# By default, find tritonserver on "localhost", but for windows tests
+# we overwrite the IP address with the TRITONSERVER_IPADDR envvar
+_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")
+
+
+class PythonUnittest(unittest.TestCase):
+    def setUp(self):
+        self._shm_leak_detector = shm_util.ShmLeakDetector()
+
+    def _run_unittest(self, model_name):
+        with grpcclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8001") as client:
+            # No input is required
+            result = client.infer(model_name, [], client_timeout=240)
+            output0 = result.as_numpy("OUTPUT0")
+
+            # The model returns 1 if the tests were successfully passed.
+            # Otherwise, it will return 0.
+            self.assertEqual(
+                output0, [1], f"python_unittest failed for model {model_name}"
+            )
+
+    def test_python_unittest(self):
+        model_name = os.environ["MODEL_NAME"]
+        with self._shm_leak_detector.Probe() as shm_probe:
+            self._run_unittest(model_name)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_backend_python/request_rescheduling/grpc_endpoint_test.py b/qa/L0_backend_python/request_rescheduling/grpc_endpoint_test.py
new file mode 100755
index 0000000000..9dcb648d87
--- /dev/null
+++ b/qa/L0_backend_python/request_rescheduling/grpc_endpoint_test.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python
+# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import sys
+
+sys.path.append("../../common")
+
+# GRPC streaming helpers..
+import queue
+import unittest
+from functools import partial
+
+import numpy as np
+import tritonclient.grpc as grpcclient
+from tritonclient.utils import InferenceServerException
+
+# By default, find tritonserver on "localhost", but for windows tests
+# we overwrite the IP address with the TRITONSERVER_IPADDR envvar
+_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")
+
+
+class UserData:
+    def __init__(self):
+        self._completed_requests = queue.Queue()
+
+
+def callback(user_data, result, error):
+    if error:
+        user_data._completed_requests.put(error)
+    else:
+        user_data._completed_requests.put(result)
+
+
+class GrpcEndpointTest(unittest.TestCase):
+    def test_grpc_decoupled(self, sequence_id=0, sequence_start=False):
+        user_data = UserData()
+        with grpcclient.InferenceServerClient(
+            f"{_tritonserver_ipaddr}:8001"
+        ) as triton_client:
+            # Reload the model to reset the flag
+            triton_client.unload_model("iterative_sequence")
+            triton_client.load_model("iterative_sequence")
+
+            triton_client.start_stream(callback=partial(callback, user_data))
+            inputs = []
+            inputs.append(grpcclient.InferInput("IN", [1], "INT32"))
+            inputs[0].set_data_from_numpy(np.array([3], dtype=np.int32))
+
+            triton_client.async_stream_infer(
+                model_name="iterative_sequence",
+                inputs=inputs,
+                sequence_id=sequence_id,
+                sequence_start=sequence_start,
+            )
+            res_count = 3
+            while res_count > 0:
+                data_item = user_data._completed_requests.get()
+                res_count -= 1
+                if type(data_item) == InferenceServerException:
+                    raise data_item
+                else:
+                    self.assertEqual(res_count, data_item.as_numpy("OUT")[0])
+            self.assertEqual(0, res_count)
+
+    def test_grpc_non_decoupled(self, sequence_id=0, sequence_start=False):
+        with grpcclient.InferenceServerClient(
+            f"{_tritonserver_ipaddr}:8001"
+        ) as triton_client:
+            # Reload the model to reset the flag
+            triton_client.unload_model("request_rescheduling_addsub")
+            triton_client.load_model("request_rescheduling_addsub")
+
+            inputs = []
+            inputs.append(grpcclient.InferInput("INPUT0", [16], "FP32"))
+            inputs.append(grpcclient.InferInput("INPUT1", [16], "FP32"))
+            input0_val = np.random.randn(*[16]).astype(np.float32)
+            input1_val = np.random.randn(*[16]).astype(np.float32)
+            inputs[0].set_data_from_numpy(input0_val)
+            inputs[1].set_data_from_numpy(input1_val)
+
+            results = triton_client.infer(
+                model_name="request_rescheduling_addsub",
+                inputs=inputs,
+            )
+
+            output0_data = results.as_numpy("OUTPUT0")
+            output1_data = results.as_numpy("OUTPUT1")
+
+            self.assertTrue(np.array_equal(output0_data, input0_val + input1_val))
+            self.assertTrue(np.array_equal(output1_data, input0_val - input1_val))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_backend_python/request_rescheduling/test.sh b/qa/L0_backend_python/request_rescheduling/test.sh
new file mode 100755
index 0000000000..6fd6fe09e5
--- /dev/null
+++ b/qa/L0_backend_python/request_rescheduling/test.sh
@@ -0,0 +1,95 @@
+#!/bin/bash
+# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+CLIENT_PY="../python_unittest.py"
+CLIENT_LOG="./request_rescheduling_client.log"
+TEST_RESULT_FILE='test_results.txt'
+source ../../common/util.sh
+
+RET=0
+
+rm -fr *.log ./models *.txt
+
+mkdir -p models/bls_request_rescheduling/1/
+cp ../../python_models/bls_request_rescheduling/model.py models/bls_request_rescheduling/1/
+cp ../../python_models/bls_request_rescheduling/config.pbtxt models/bls_request_rescheduling
+
+mkdir -p models/request_rescheduling_addsub/1/
+cp ../../python_models/request_rescheduling_addsub/model.py models/request_rescheduling_addsub/1/
+cp ../../python_models/request_rescheduling_addsub/config.pbtxt models/request_rescheduling_addsub
+
+mkdir -p models/iterative_sequence/1/
+cp ../../python_models/iterative_sequence/model.py models/iterative_sequence/1/
+cp ../../python_models/iterative_sequence/config.pbtxt models/iterative_sequence
+
+mkdir -p models/wrong_return_type/1/
+cp ../../python_models/wrong_return_type/model.py models/wrong_return_type/1/
+cp ../../python_models/wrong_return_type/config.pbtxt models/wrong_return_type
+
+SERVER_LOG="./request_rescheduling_server.log"
+SERVER_ARGS="--model-repository=${MODELDIR}/request_rescheduling/models --backend-directory=${BACKEND_DIR} --model-control-mode=explicit --load-model=* --log-verbose=1"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+export MODEL_NAME='bls_request_rescheduling'
+
+set +e
+python3 -m pytest --junitxml="${MODEL_NAME}.report.xml" $CLIENT_PY >> $CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** bls_request_rescheduling test FAILED. \n***"
+    cat $CLIENT_LOG
+    RET=1
+fi
+set -e
+
+GRPC_TEST_PY=./grpc_endpoint_test.py
+
+set +e
+python3 -m pytest --junitxml="grpc_request_reschedule.report.xml" ${GRPC_TEST_PY} >> ${CLIENT_LOG} 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** GRPC Endpoint test FAILED. \n***"
+    cat $CLIENT_LOG
+    RET=1
+fi
+set -e
+
+kill_server
+
+
+if [ $RET -eq 1 ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Request Rescheduling test FAILED. \n***"
+else
+    echo -e "\n***\n*** Request Rescheduling test PASSED. \n***"
+fi
+
+exit $RET
diff --git a/qa/L0_backend_python/restart/models/restart/1/model.py b/qa/L0_backend_python/restart/models/restart/1/model.py
new file mode 100644
index 0000000000..1f7491498e
--- /dev/null
+++ b/qa/L0_backend_python/restart/models/restart/1/model.py
@@ -0,0 +1,58 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from os import path
+
+import c_python_backend_utils as c_utils
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    def execute(self, requests):
+        # This function will be called once to record the free memory. Then,
+        # the stub process will be killed to trigger Python backend restart.
+        # After that this value will be read again to make sure that it matches
+        # before restart.
+
+        file_name = "free_memory.txt"
+        current_free_memory = str(c_utils.shared_memory.free_memory())
+        if path.exists(file_name):
+            with open(file_name, "r") as f:
+                expected_free_memory = f.read()
+                assert expected_free_memory == current_free_memory, (
+                    f"Free shared memory before and after restart are not equal. "
+                    "{expected_free_memory} (before) != {current_free_memory} (after)."
+                )
+        else:
+            with open(file_name, "w") as f:
+                f.write(current_free_memory)
+
+        responses = []
+        for request in requests:
+            input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+            out_tensor = pb_utils.Tensor("OUTPUT0", input_tensor.as_numpy())
+            responses.append(pb_utils.InferenceResponse([out_tensor]))
+        return responses
diff --git a/qa/L0_backend_python/restart/models/restart/config.pbtxt b/qa/L0_backend_python/restart/models/restart/config.pbtxt
new file mode 100644
index 0000000000..7eb69ce275
--- /dev/null
+++ b/qa/L0_backend_python/restart/models/restart/config.pbtxt
@@ -0,0 +1,52 @@
+# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "restart"
+backend: "python"
+max_batch_size: 64
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+instance_group [
+  {
+    count: 1
+    kind : KIND_CPU
+  }
+]
diff --git a/qa/L0_backend_python/restart/restart_test.py b/qa/L0_backend_python/restart/restart_test.py
new file mode 100755
index 0000000000..585548608f
--- /dev/null
+++ b/qa/L0_backend_python/restart/restart_test.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python3
+
+# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import sys
+
+sys.path.append("../../common")
+
+import unittest
+
+import numpy as np
+import shm_util
+import tritonclient.http as httpclient
+from tritonclient.utils import *
+
+# By default, find tritonserver on "localhost", but for windows tests
+# we overwrite the IP address with the TRITONSERVER_IPADDR envvar
+_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")
+
+
+class RestartTest(unittest.TestCase):
+    def setUp(self):
+        self._shm_leak_detector = shm_util.ShmLeakDetector()
+
+    def _infer_helper(self, model_name, shape, data_type):
+        with httpclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8000") as client:
+            input_data_0 = np.array(np.random.randn(*shape), dtype=data_type)
+            inputs = [
+                httpclient.InferInput(
+                    "INPUT0", shape, np_to_triton_dtype(input_data_0.dtype)
+                )
+            ]
+            inputs[0].set_data_from_numpy(input_data_0)
+            result = client.infer(model_name, inputs)
+            output0 = result.as_numpy("OUTPUT0")
+            self.assertTrue(np.all(input_data_0 == output0))
+
+    def test_restart(self):
+        shape = [1, 16]
+        model_name = "restart"
+        dtype = np.float32
+
+        # Since the stub process has been killed, the first request
+        # will return an exception.
+        with self.assertRaises(InferenceServerException):
+            # FIXME: No leak check here as the unhealthy stub error likely causes issues.
+            # tritonclient.utils.InferenceServerException: [400] Failed to
+            # process the request(s) for model instance 'restart_0_0',
+            # message: Stub process 'restart_0_0' is not healthy.
+            # [restart] Shared memory leak detected: 1007216 (current) > 1007056 (prev).
+            self._infer_helper(model_name, shape, dtype)
+
+        # The second request should work properly since the stub process should
+        # have come alive.
+        with self._shm_leak_detector.Probe() as shm_probe:
+            self._infer_helper(model_name, shape, dtype)
+
+    def test_infer(self):
+        shape = [1, 16]
+        model_name = "restart"
+        dtype = np.float32
+        with self._shm_leak_detector.Probe() as shm_probe:
+            self._infer_helper(model_name, shape, dtype)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_backend_python/restart/test.sh b/qa/L0_backend_python/restart/test.sh
new file mode 100755
index 0000000000..5b14c280b5
--- /dev/null
+++ b/qa/L0_backend_python/restart/test.sh
@@ -0,0 +1,142 @@
+#!/bin/bash
+# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+CLIENT_LOG="./restart_client.log"
+SERVER_ARGS="--model-repository=${MODELDIR}/restart/models --backend-directory=${BACKEND_DIR} --log-verbose=1"
+SERVER_LOG="./restart_server.log"
+source ../../common/util.sh
+source ../common.sh
+
+rm -fr *.log free_memory.txt
+
+RET=0
+
+prev_num_pages=`get_shm_pages`
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+SUBTEST="test_infer"
+python3 -m pytest --junitxml=restart.${SUBTEST}.report.xml restart_test.py::RestartTest::${SUBTEST} >> $CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    cat $SERVER_LOG
+    echo -e "\n***\n*** ${SUBTEST} test FAILED. \n***"
+    RET=1
+fi
+set -e
+
+# NOTE: with the current setup, tritonserver is launched within wsl, but the stub is started
+# in Windows. Therefore, finding the PID of the stub requires a bit more work.
+if [[ ${TEST_WINDOWS} == 1 ]]; then
+    tasklist=$(/mnt/c/windows/system32/tasklist.exe /FI 'IMAGENAME eq triton_python_backend_stub.exe' /FO CSV)
+    taskcount=$(echo "$tasklist" | grep -c triton_python_backend_stub)
+    if [[ $taskcount > 0 ]]; then
+        echo "$tasklist" | while IFS=, read -r taskname taskpid taskrest; do
+            if [[ "$taskname" == "\"triton_python_backend_stub.exe\"" ]]; then
+                taskpid="${taskpid%\"}"
+                taskpid="${taskpid#\"}"
+                /mnt/c/windows/system32/taskkill.exe /PID $taskpid /F /T
+            fi
+        done
+    fi
+else
+    triton_procs=$(pgrep --parent $SERVER_PID)
+    echo $triton_procs
+    for proc in $triton_procs; do
+        kill -9 $proc
+    done
+fi
+
+set +e
+
+SUBTEST="test_restart"
+python3 -m pytest --junitxml=restart.${SUBTEST}.report.xml restart_test.py::RestartTest::${SUBTEST} >> $CLIENT_LOG 2>&1
+
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    cat $SERVER_LOG
+    echo -e "\n***\n*** ${SUBTEST} test FAILED. \n***"
+    RET=1
+fi
+set -e
+
+kill_server
+
+current_num_pages=`get_shm_pages`
+if [ $current_num_pages -ne $prev_num_pages ]; then
+    cat $CLIENT_LOG
+    ls /dev/shm
+    echo -e "\n***\n*** Test Failed. Shared memory pages where not cleaned properly.
+Shared memory pages before starting triton equals to $prev_num_pages
+and shared memory pages after starting triton equals to $current_num_pages \n***"
+    exit 1
+fi
+
+# Test if the Triton server exits gracefully when the stub has been killed.
+rm $SERVER_LOG
+prev_num_pages=`get_shm_pages`
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+triton_procs=`pgrep --parent $SERVER_PID`
+echo $triton_procs
+
+set +e
+for proc in $triton_procs; do
+    kill -9 $proc
+done
+set -e
+
+kill_server
+
+current_num_pages=`get_shm_pages`
+if [ $current_num_pages -ne $prev_num_pages ]; then
+    cat $CLIENT_LOG
+    ls /dev/shm
+    echo -e "\n***\n*** Test Failed. Shared memory pages where not cleaned properly.
+Shared memory pages before starting triton equals to $prev_num_pages
+and shared memory pages after starting triton equals to $current_num_pages \n***"
+    exit 1
+fi
+
+if [ $RET -eq 1 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Restart test FAILED. \n***"
+else
+    echo -e "\n***\n*** Restart test PASSED. \n***"
+fi
+
+exit $RET
diff --git a/qa/L0_backend_python/setup_python_enviroment.sh b/qa/L0_backend_python/setup_python_enviroment.sh
new file mode 100755
index 0000000000..a62d936017
--- /dev/null
+++ b/qa/L0_backend_python/setup_python_enviroment.sh
@@ -0,0 +1,171 @@
+#!/bin/bash
+# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+RET=0
+set -e
+if [ ${PYTHON_ENV_VERSION} = "10" ]; then
+    echo No need to set up anything for default python3.${PYTHON_ENV_VERSION}
+    exit $RET
+fi
+
+source common.sh
+source ../common/util.sh
+
+TRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION:="http://github.com/triton-inference-server"}
+BASE_SERVER_ARGS="--model-repository=${MODELDIR}/models --log-verbose=1 --disable-auto-complete-config"
+PYTHON_BACKEND_BRANCH=$PYTHON_BACKEND_REPO_TAG
+SERVER_ARGS=$BASE_SERVER_ARGS
+SERVER_LOG="./inference_server.log"
+export PYTHON_ENV_VERSION=${PYTHON_ENV_VERSION:="10"}
+RET=0
+EXPECTED_VERSION_STRINGS=""
+
+rm -fr ./models
+rm -rf *.tar.gz
+install_build_deps
+install_conda
+
+# Test other python versions
+conda update -n base -c defaults conda -y
+# Create a model with python 3.8 version
+# Successful execution of the Python model indicates that the environment has
+# been setup correctly.
+if [ ${PYTHON_ENV_VERSION} = "8" ]; then
+    create_conda_env "3.8" "python-3-8"
+    conda install -c conda-forge libstdcxx-ng=12 -y
+    conda install numpy=1.23.4 -y
+    conda install tensorflow=2.10.0 -y
+    EXPECTED_VERSION_STRING="Python version is 3.8, NumPy version is 1.23.4, and Tensorflow version is 2.10.0"
+    create_python_backend_stub
+    conda-pack -o python3.8.tar.gz
+    path_to_conda_pack="$PWD/python-3-8"
+    mkdir -p $path_to_conda_pack
+    tar -xzf python3.8.tar.gz -C $path_to_conda_pack
+    mkdir -p models/python_3_8/1/
+    cp ../python_models/python_version/config.pbtxt ./models/python_3_8
+    (cd models/python_3_8 && \
+            sed -i "s/^name:.*/name: \"python_3_8\"/" config.pbtxt && \
+            echo "parameters: {key: \"EXECUTION_ENV_PATH\", value: {string_value: \"$path_to_conda_pack\"}}">> config.pbtxt)
+    cp ../python_models/python_version/model.py ./models/python_3_8/1/
+    cp python_backend/builddir/triton_python_backend_stub ./models/python_3_8
+fi
+
+# Create a model with python 3.9 version
+# Successful execution of the Python model indicates that the environment has
+# been setup correctly.
+if [ ${PYTHON_ENV_VERSION} = "9" ]; then
+    create_conda_env "3.9" "python-3-9"
+    conda install -c conda-forge libstdcxx-ng=12 -y
+    conda install numpy=1.23.4 -y
+    conda install tensorflow=2.10.0 -y
+    EXPECTED_VERSION_STRING="Python version is 3.9, NumPy version is 1.23.4, and Tensorflow version is 2.10.0"
+    create_python_backend_stub
+    conda-pack -o python3.9.tar.gz
+    path_to_conda_pack="$PWD/python-3-9"
+    mkdir -p $path_to_conda_pack
+    tar -xzf python3.9.tar.gz -C $path_to_conda_pack
+    mkdir -p models/python_3_9/1/
+    cp ../python_models/python_version/config.pbtxt ./models/python_3_9
+    (cd models/python_3_9 && \
+            sed -i "s/^name:.*/name: \"python_3_9\"/" config.pbtxt && \
+            echo "parameters: {key: \"EXECUTION_ENV_PATH\", value: {string_value: \"$path_to_conda_pack\"}}">> config.pbtxt)
+    cp ../python_models/python_version/model.py ./models/python_3_9/1/
+    cp python_backend/builddir/triton_python_backend_stub ./models/python_3_9
+fi
+
+# Create a model with python 3.11 version
+# Successful execution of the Python model indicates that the environment has
+# been setup correctly.
+if [ ${PYTHON_ENV_VERSION} = "11" ]; then
+    create_conda_env "3.11" "python-3-11"
+    # tensorflow needs to be installed before numpy so pip does not mess up conda
+    # environment
+    pip install tensorflow==2.12.0
+    conda install -c conda-forge libstdcxx-ng=12 -y
+    conda install numpy=1.23.5 -y
+    EXPECTED_VERSION_STRING="Python version is 3.11, NumPy version is 1.23.5, and Tensorflow version is 2.12.0"
+    create_python_backend_stub
+    conda-pack -o python3.11.tar.gz
+    path_to_conda_pack="$PWD/python-3-11"
+    mkdir -p $path_to_conda_pack
+    tar -xzf python3.11.tar.gz -C $path_to_conda_pack
+    mkdir -p models/python_3_11/1/
+    cp ../python_models/python_version/config.pbtxt ./models/python_3_11
+    (cd models/python_3_11 && \
+            sed -i "s/^name:.*/name: \"python_3_11\"/" config.pbtxt && \
+            echo "parameters: {key: \"EXECUTION_ENV_PATH\", value: {string_value: \"$path_to_conda_pack\"}}">> config.pbtxt)
+    cp ../python_models/python_version/model.py ./models/python_3_11/1/
+    cp python_backend/builddir/triton_python_backend_stub ./models/python_3_11
+fi
+conda deactivate
+rm -rf ./miniconda
+
+# test that
+set +e
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+kill_server
+
+grep "$EXPECTED_VERSION_STRING" $SERVER_LOG
+if [ $? -ne 0 ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** $EXPECTED_VERSION_STRING was not found in Triton logs. \n***"
+    RET=1
+fi
+set -e
+
+echo "python environment 3.${PYTHON_ENV_VERSION}"
+# copy the stub out to /opt/tritonserver/backends/python/triton_python_backend_stub
+cp python_backend/builddir/triton_python_backend_stub /opt/tritonserver/backends/python/triton_python_backend_stub
+# Set up environment and stub for each test
+add-apt-repository ppa:deadsnakes/ppa -y
+apt-get update && apt-get -y install \
+                            "python3.${PYTHON_ENV_VERSION}-dev" \
+                            "python3.${PYTHON_ENV_VERSION}-distutils" \
+                            libboost-dev
+rm -f /usr/bin/python3 && \
+ln -s "/usr/bin/python3.${PYTHON_ENV_VERSION}" /usr/bin/python3
+pip3 install --upgrade install requests numpy virtualenv protobuf
+find /opt/tritonserver/qa/pkgs/ -maxdepth 1 -type f -name \
+    "tritonclient-*linux*.whl" | xargs printf -- '%s[all]' | \
+    xargs pip3 install --upgrade
+
+# Build triton-shm-monitor for the test
+cd python_backend && rm -rf install build && mkdir build && cd build && \
+    cmake -DCMAKE_INSTALL_PREFIX:PATH=$PWD/install \
+        -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} \
+        -DTRITON_COMMON_REPO_TAG:STRING=${TRITON_COMMON_REPO_TAG} \
+        -DTRITON_CORE_REPO_TAG:STRING=${TRITON_CORE_REPO_TAG} \
+        -DTRITON_BACKEND_REPO_TAG:STRING=${TRITON_BACKEND_REPO_TAG} .. && \
+    make -j16 triton-shm-monitor install
+cp $PWD/install/backends/python/triton_shm_monitor.cpython-* /opt/tritonserver/qa/common/.
+set +e
+exit $RET
diff --git a/qa/L0_backend_python/test.sh b/qa/L0_backend_python/test.sh
new file mode 100755
index 0000000000..8c5849bc61
--- /dev/null
+++ b/qa/L0_backend_python/test.sh
@@ -0,0 +1,474 @@
+#!/bin/bash
+# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+
+# On windows the paths invoked by the script (running in WSL) must use
+# /mnt/c when needed but the paths on the tritonserver command-line
+# must be C:/ style.
+export TEST_WINDOWS=0
+if [[ "$(< /proc/sys/kernel/osrelease)" == *microsoft* ]]; then
+    export DATADIR=${DATADIR:="/c/data/inferenceserver/${REPO_VERSION}"}
+    export TRITON_DIR=${TRITON_DIR:=c:/tritonserver}
+    # This will run in WSL, but Triton will run in windows, so environment
+    # variables meant for loaded models must be exported using WSLENV.
+    # The /w flag indicates the value should only be included when invoking
+    # Win32 from WSL.
+    export WSLENV=TRITON_DIR/w
+    export SERVER=${SERVER:=c:/tritonserver/bin/tritonserver.exe}
+    export BACKEND_DIR=${BACKEND_DIR:=c:/tritonserver/backends}
+    export MODELDIR=${MODELDIR:=c:/}
+    TEST_WINDOWS=1
+else
+    export DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"}
+    export TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
+    export SERVER=${TRITON_DIR}/bin/tritonserver
+    export BACKEND_DIR=${TRITON_DIR}/backends
+    export MODELDIR=${MODELDIR:=`pwd`}
+fi
+export REPO_VERSION=$REPO_VERSION
+export TEST_JETSON=${TEST_JETSON:=0}
+export CUDA_VISIBLE_DEVICES=0
+export PYTHON_ENV_VERSION=${PYTHON_ENV_VERSION:="10"}
+export PYTHON_BACKEND_REPO_TAG=$PYTHON_BACKEND_REPO_TAG
+
+BASE_SERVER_ARGS="--model-repository=${MODELDIR}/models --backend-directory=${BACKEND_DIR} --log-verbose=1"
+# Set the default byte size to 5MBs to avoid going out of shared memory. The
+# environment that this job runs on has only 1GB of shared-memory available.
+SERVER_ARGS="$BASE_SERVER_ARGS --backend-config=python,shm-default-byte-size=5242880"
+
+CLIENT_PY=./python_test.py
+CLIENT_LOG="./client.log"
+TEST_RESULT_FILE='test_results.txt'
+SERVER_LOG="./inference_server.log"
+source ../common/util.sh
+source ./common.sh
+
+rm -fr *.log ./models
+
+python3 --version | grep "3.10" > /dev/null
+if [ $? -ne 0 ]; then
+    echo -e "Expecting Python default version to be: Python 3.10 but actual version is $(python3 --version)"
+    exit 1
+fi
+
+(bash -ex setup_python_enviroment.sh)
+
+python3 --version | grep "3.${PYTHON_ENV_VERSION}" > /dev/null
+if [ $? -ne 0 ]; then
+    echo -e "Expecting Python version to be: Python 3.${PYTHON_ENV_VERSION} but actual version is $(python3 --version)"
+    exit 1
+fi
+
+mkdir -p models/identity_fp32/1/
+cp ../python_models/identity_fp32/model.py ./models/identity_fp32/1/model.py
+cp ../python_models/identity_fp32/config.pbtxt ./models/identity_fp32/config.pbtxt
+RET=0
+
+cp -r ./models/identity_fp32 ./models/identity_uint8
+(cd models/identity_uint8 && \
+          sed -i "s/^name:.*/name: \"identity_uint8\"/" config.pbtxt && \
+          sed -i "s/TYPE_FP32/TYPE_UINT8/g" config.pbtxt && \
+          sed -i "s/^max_batch_size:.*/max_batch_size: 8/" config.pbtxt && \
+          echo "dynamic_batching { preferred_batch_size: [8], max_queue_delay_microseconds: 12000000 }" >> config.pbtxt)
+
+cp -r ./models/identity_fp32 ./models/identity_uint8_nobatch
+(cd models/identity_uint8_nobatch && \
+          sed -i "s/^name:.*/name: \"identity_uint8_nobatch\"/" config.pbtxt && \
+          sed -i "s/TYPE_FP32/TYPE_UINT8/g" config.pbtxt && \
+          sed -i "s/^max_batch_size:.*//" config.pbtxt >> config.pbtxt)
+
+cp -r ./models/identity_fp32 ./models/identity_uint32
+(cd models/identity_uint32 && \
+          sed -i "s/^name:.*/name: \"identity_uint32\"/" config.pbtxt && \
+          sed -i "s/TYPE_FP32/TYPE_UINT32/g" config.pbtxt)
+
+cp -r ./models/identity_fp32 ./models/identity_bool
+(cd models/identity_bool && \
+          sed -i "s/^name:.*/name: \"identity_bool\"/" config.pbtxt && \
+          sed -i "s/TYPE_FP32/TYPE_BOOL/g" config.pbtxt)
+
+# Test models with `default_model_filename` variable set.
+cp -r ./models/identity_fp32 ./models/default_model_name
+mv ./models/default_model_name/1/model.py ./models/default_model_name/1/mymodel.py
+(cd models/default_model_name && \
+    sed -i "s/^name:.*/name: \"default_model_name\"/" config.pbtxt && \
+    echo "default_model_filename: \"mymodel.py\"" >> config.pbtxt )
+
+mkdir -p models/pytorch_fp32_fp32/1/
+    cp -r ../python_models/pytorch_fp32_fp32/model.py ./models/pytorch_fp32_fp32/1/
+    cp ../python_models/pytorch_fp32_fp32/config.pbtxt ./models/pytorch_fp32_fp32/
+    (cd models/pytorch_fp32_fp32 && \
+            sed -i "s/^name:.*/name: \"pytorch_fp32_fp32\"/" config.pbtxt)
+
+mkdir -p models/delayed_model/1/
+cp -r ../python_models/delayed_model/model.py ./models/delayed_model/1/
+cp ../python_models/delayed_model/config.pbtxt ./models/delayed_model/
+mkdir -p models/init_args/1/
+cp ../python_models/init_args/model.py ./models/init_args/1/
+cp ../python_models/init_args/config.pbtxt ./models/init_args/
+sed -i "s|TRITON_DIR_PATH|${TRITON_DIR}|" ./models/init_args/config.pbtxt
+
+
+mkdir -p models/optional/1/
+cp ../python_models/optional/model.py ./models/optional/1/
+cp ../python_models/optional/config.pbtxt ./models/optional/
+
+mkdir -p models/non_contiguous/1/
+cp ../python_models/non_contiguous/model.py ./models/non_contiguous/1/
+cp ../python_models/non_contiguous/config.pbtxt ./models/non_contiguous/config.pbtxt
+
+# Unicode Characters
+mkdir -p models/string/1/
+cp ../python_models/string/model.py ./models/string/1/
+cp ../python_models/string/config.pbtxt ./models/string
+
+# More string tests
+mkdir -p models/string_fixed/1/
+cp ../python_models/string_fixed/model.py ./models/string_fixed/1/
+cp ../python_models/string_fixed/config.pbtxt ./models/string_fixed
+
+mkdir -p models/dlpack_identity/1/
+cp ../python_models/dlpack_identity/model.py ./models/dlpack_identity/1/
+cp ../python_models/dlpack_identity/config.pbtxt ./models/dlpack_identity
+
+
+if [[ "$TEST_JETSON" == "0" ]] && [[ ${TEST_WINDOWS} == 0 ]]; then
+  pip3 install torch==1.13.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
+else
+  # GPU tensor tests are disabled on jetson
+  pip3 install torch==1.13.0 -f https://download.pytorch.org/whl/torch_stable.html
+fi
+
+pip3 install pytest requests virtualenv
+
+prev_num_pages=`get_shm_pages`
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    exit 1
+fi
+
+set +e
+python3 -m pytest --junitxml=L0_backend_python.report.xml $CLIENT_PY >> $CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    RET=1
+fi
+set -e
+
+kill_server
+
+current_num_pages=`get_shm_pages`
+if [ $current_num_pages -ne $prev_num_pages ]; then
+    ls /dev/shm
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed. Shared memory pages where not cleaned properly.
+Shared memory pages before starting triton equals to $prev_num_pages
+and shared memory pages after starting triton equals to $current_num_pages \n***"
+    RET=1
+fi
+
+prev_num_pages=`get_shm_pages`
+# Triton non-graceful exit
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    exit 1
+fi
+
+sleep 5
+
+readarray -t triton_procs < <(pgrep --parent ${SERVER_PID})
+
+set +e
+
+# Trigger non-graceful termination of Triton
+kill -9 $SERVER_PID
+
+# Wait 10 seconds so that Python stub can detect non-graceful exit
+sleep 10
+
+for triton_proc in $triton_procs; do
+    kill -0 $triton_proc > /dev/null 2>&1
+    if [ $? -eq 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Python backend non-graceful exit test failed \n***"
+        RET=1
+        break
+    fi
+done
+set -e
+
+#
+# Test KIND_GPU
+# Disable env test for Jetson & Windows since GPU Tensors are not supported
+if [[ "$TEST_JETSON" == "0" ]] && [[ ${TEST_WINDOWS} == 0 ]]; then
+  rm -rf models/
+  mkdir -p models/add_sub_gpu/1/
+  cp ../python_models/add_sub/model.py ./models/add_sub_gpu/1/
+  cp ../python_models/add_sub_gpu/config.pbtxt ./models/add_sub_gpu/
+
+  prev_num_pages=`get_shm_pages`
+  run_server
+  if [ "$SERVER_PID" == "0" ]; then
+      cat $SERVER_LOG
+      echo -e "\n***\n*** Failed to start $SERVER\n***"
+      exit 1
+  fi
+
+  if [ $? -ne 0 ]; then
+      cat $SERVER_LOG
+      echo -e "\n***\n*** KIND_GPU model test failed \n***"
+      RET=1
+  fi
+
+  kill_server
+
+  current_num_pages=`get_shm_pages`
+  if [ $current_num_pages -ne $prev_num_pages ]; then
+      cat $CLIENT_LOG
+      ls /dev/shm
+      echo -e "\n***\n*** Test Failed. Shared memory pages where not cleaned properly.
+  Shared memory pages before starting triton equals to $prev_num_pages
+  and shared memory pages after starting triton equals to $current_num_pages \n***"
+      exit 1
+  fi
+fi
+
+# Test Multi file models
+rm -rf models/
+mkdir -p models/multi_file/1/
+cp ../python_models/multi_file/*.py ./models/multi_file/1/
+cp ../python_models/identity_fp32/config.pbtxt ./models/multi_file/
+(cd models/multi_file && \
+          sed -i "s/^name:.*/name: \"multi_file\"/" config.pbtxt)
+
+prev_num_pages=`get_shm_pages`
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    exit 1
+fi
+
+if [ $? -ne 0 ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** multi-file model test failed \n***"
+    RET=1
+fi
+
+kill_server
+
+current_num_pages=`get_shm_pages`
+if [ $current_num_pages -ne $prev_num_pages ]; then
+    cat $SERVER_LOG
+    ls /dev/shm
+    echo -e "\n***\n*** Test Failed. Shared memory pages where not cleaned properly.
+Shared memory pages before starting triton equals to $prev_num_pages
+and shared memory pages after starting triton equals to $current_num_pages \n***"
+    exit 1
+fi
+
+# Test environment variable propagation
+rm -rf models/
+mkdir -p models/model_env/1/
+cp ../python_models/model_env/model.py ./models/model_env/1/
+cp ../python_models/model_env/config.pbtxt ./models/model_env/
+
+export MY_ENV="MY_ENV"
+if [[ ${TEST_WINDOWS} == 1 ]]; then
+    # This will run in WSL, but Triton will run in windows, so environment
+    # variables meant for loaded models must be exported using WSLENV.
+    # The /w flag indicates the value should only be included when invoking
+    # Win32 from WSL.
+    export WSLENV=MY_ENV/w
+fi
+
+prev_num_pages=`get_shm_pages`
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    echo -e "\n***\n*** Environment variable test failed \n***"
+    exit 1
+fi
+
+kill_server
+
+current_num_pages=`get_shm_pages`
+if [ $current_num_pages -ne $prev_num_pages ]; then
+    cat $CLIENT_LOG
+    ls /dev/shm
+    echo -e "\n***\n*** Test Failed. Shared memory pages where not cleaned properly.
+Shared memory pages before starting triton equals to $prev_num_pages
+and shared memory pages after starting triton equals to $current_num_pages \n***"
+    exit 1
+fi
+
+rm -fr ./models
+mkdir -p models/identity_fp32/1/
+cp ../python_models/identity_fp32/model.py ./models/identity_fp32/1/model.py
+cp ../python_models/identity_fp32/config.pbtxt ./models/identity_fp32/config.pbtxt
+
+shm_default_byte_size=$((1024*1024*4))
+SERVER_ARGS="$BASE_SERVER_ARGS --backend-config=python,shm-default-byte-size=$shm_default_byte_size"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    exit 1
+fi
+
+for shm_page in `ls /dev/shm/`; do
+    if [[ $shm_page !=  triton_python_backend_shm* ]]; then
+        continue
+    fi
+    page_size=`ls -l /dev/shm/$shm_page 2>&1 | awk '{print $5}'`
+    if [ $page_size -ne $shm_default_byte_size ]; then
+        echo -e "Shared memory region size is not equal to
+$shm_default_byte_size for page $shm_page. Region size is
+$page_size."
+        RET=1
+    fi
+done
+
+kill_server
+
+# Test model getting killed during initialization
+rm -fr ./models
+mkdir -p models/init_exit/1/
+cp ../python_models/init_exit/model.py ./models/init_exit/1/model.py
+cp ../python_models/init_exit/config.pbtxt ./models/init_exit/config.pbtxt
+
+ERROR_MESSAGE="Stub process 'init_exit_0_0' is not healthy."
+
+prev_num_pages=`get_shm_pages`
+run_server
+if [ "$SERVER_PID" != "0" ]; then
+    echo -e "*** FAILED: unexpected success starting $SERVER" >> $CLIENT_LOG
+    RET=1
+    kill_server
+else
+    if grep "$ERROR_MESSAGE" $SERVER_LOG; then
+        echo -e "Found \"$ERROR_MESSAGE\"" >> $CLIENT_LOG
+    else
+        echo $CLIENT_LOG
+        echo -e "Not found \"$ERROR_MESSAGE\"" >> $CLIENT_LOG
+        RET=1
+    fi
+fi
+
+current_num_pages=`get_shm_pages`
+if [ $current_num_pages -ne $prev_num_pages ]; then
+    cat $SERVER_LOG
+    ls /dev/shm
+    echo -e "\n***\n*** Test Failed. Shared memory pages where not cleaned properly.
+Shared memory pages before starting triton equals to $prev_num_pages
+and shared memory pages after starting triton equals to $current_num_pages \n***"
+    exit 1
+fi
+
+# Disable env test for Jetson since cloud storage repos are not supported
+# Disable ensemble, io and bls tests for Jetson since GPU Tensors are not supported
+# Disable variants test for Jetson since already built without GPU Tensor support
+# Disable decoupled test because it uses GPU tensors
+if [[ "$TEST_JETSON" == "0" ]]; then
+    SUBTESTS="ensemble bls decoupled"
+    # [DLIS-6093] Disable variants test for Windows since tests are not executed in docker container (cannot apt update/install)
+    # [DLIS-5970] Disable io tests for Windows since GPU Tensors are not supported
+    # [DLIS-6122] Disable model_control & request_rescheduling tests for Windows since they require load/unload
+    if [[ ${TEST_WINDOWS} == 0 ]]; then
+        SUBTESTS+=" variants io python_based_backends async_execute"
+    fi
+
+    for TEST in ${SUBTESTS}; do
+        # Run each subtest in a separate virtual environment to avoid conflicts
+        # between dependencies.
+        setup_virtualenv
+
+        (cd ${TEST} && bash -ex test.sh)
+        if [ $? -ne 0 ]; then
+            echo "Subtest ${TEST} FAILED"
+            RET=1
+        fi
+
+        deactivate_virtualenv
+    done
+
+    # [DLIS-5969]: Incorporate env test for windows
+    if [[ ${PYTHON_ENV_VERSION} = "10" ]] && [[ ${TEST_WINDOWS} == 0 ]]; then
+        # In 'env' test we use miniconda for dependency management. No need to run
+        # the test in a virtual environment.
+        (cd env && bash -ex test.sh)
+        if [ $? -ne 0 ]; then
+            echo "Subtest env FAILED"
+            RET=1
+        fi
+    fi
+fi
+
+SUBTESTS="lifecycle argument_validation logging custom_metrics"
+# [DLIS-6124] Disable restart test for Windows since it requires more investigation
+# [DLIS-6122] Disable model_control & request_rescheduling tests for Windows since they require load/unload
+# [DLIS-6123] Disable examples test for Windows since it requires updates to the example clients
+if [[ ${TEST_WINDOWS} == 0 ]]; then
+    SUBTESTS+=" restart model_control examples request_rescheduling"
+fi
+for TEST in ${SUBTESTS}; do
+    # Run each subtest in a separate virtual environment to avoid conflicts
+    # between dependencies.
+    setup_virtualenv
+
+    (cd ${TEST} && bash -ex test.sh)
+
+    if [ $? -ne 0 ]; then
+        echo "Subtest ${TEST} FAILED"
+        RET=1
+    fi
+
+    deactivate_virtualenv
+done
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+else
+  echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_backend_python/variants/test.sh b/qa/L0_backend_python/variants/test.sh
new file mode 100755
index 0000000000..86cc793a94
--- /dev/null
+++ b/qa/L0_backend_python/variants/test.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# Building a CPU build of Python backend
+TRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION:=http://github.com/triton-inference-server}
+
+source ../common.sh
+install_build_deps
+rm -rf python_backend
+
+git clone ${TRITON_REPO_ORGANIZATION}/python_backend -b $PYTHON_BACKEND_REPO_TAG
+(cd python_backend/ && mkdir builddir && cd builddir && \
+  cmake -DTRITON_ENABLE_GPU=OFF -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} -DTRITON_BACKEND_REPO_TAG=$TRITON_BACKEND_REPO_TAG -DTRITON_COMMON_REPO_TAG=$TRITON_COMMON_REPO_TAG -DTRITON_CORE_REPO_TAG=$TRITON_CORE_REPO_TAG ../ && \
+  make -j18 install)
+
+if [ $? == 0 ]; then
+  echo -e "\n***\n*** No CPU build test PASSED.\n***"
+else
+  echo -e "\n***\n*** No CPU build test FAILED.\n***"
+fi
+
diff --git a/qa/L0_backend_release/test.sh b/qa/L0_backend_release/test.sh
new file mode 100755
index 0000000000..def25499ab
--- /dev/null
+++ b/qa/L0_backend_release/test.sh
@@ -0,0 +1,167 @@
+#!/bin/bash
+# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+SIMPLE_CLIENT=../clients/simple_http_infer_client
+SIMPLE_SEQ_CLIENT=../clients/simple_grpc_sequence_stream_infer_client
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=`pwd`/models"
+source ../common/util.sh
+
+export CUDA_VISIBLE_DEVICES=0
+
+RET=0
+
+rm -fr *.log
+
+# This is a test of the schedulers to make sure they correctly release
+# their own backend so don't need to test across all frameworks.  Set
+# the delay, in milliseconds, that will cause the scheduler to be the
+# last holding the backend handle.
+export TRITONSERVER_DELAY_SCHEDULER_BACKEND_RELEASE=5000
+
+# dynamic batcher - 1 instance
+rm -fr models && cp -r simple_models models
+(cd models/simple && echo "instance_group [{ count: 1 }]" >> config.pbtxt)
+
+SERVER_LOG="./inference_server_1.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+$SIMPLE_CLIENT -v >> client_simple.log 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# dynamic batcher - 4 instance
+rm -fr models && cp -r simple_models models
+(cd models/simple && echo "instance_group [{ count: 4 }]" >> config.pbtxt)
+
+SERVER_LOG="./inference_server_4.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+$SIMPLE_CLIENT -v >> client_simple.log 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# sequence batcher - 1 instance
+rm -fr models && cp -r simple_seq_models models
+(cd models/simple_sequence && \
+        sed -i "s/sequence_batching.*{.*/sequence_batching { max_sequence_idle_microseconds: 10000000/" \
+            config.pbtxt)
+
+SERVER_LOG="./inference_server_seq_1.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+$SIMPLE_SEQ_CLIENT -v >> client_simple_seq.log 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# sequence batcher - 4 instance
+rm -fr models && cp -r simple_seq_models models
+(cd models/simple_sequence && \
+        echo "instance_group [{ count: 3 }]" >> config.pbtxt && \
+        sed -i "s/sequence_batching.*{.*/sequence_batching { max_sequence_idle_microseconds: 10000000/" \
+            config.pbtxt)
+
+SERVER_LOG="./inference_server_seq_4.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+$SIMPLE_SEQ_CLIENT -v >> client_simple_seq.log 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_backend_tutorial/test.sh b/qa/L0_backend_tutorial/test.sh
new file mode 100755
index 0000000000..52319f90ba
--- /dev/null
+++ b/qa/L0_backend_tutorial/test.sh
@@ -0,0 +1,222 @@
+#!/bin/bash
+# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+export CUDA_VISIBLE_DEVICES=0
+
+TRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION:="http://github.com/triton-inference-server"}
+TRITON_BACKEND_REPO_TAG=${TRITON_BACKEND_REPO_TAG:="main"}
+TRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG:="main"}
+TRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG:="main"}
+
+MINIMAL_LOG="./minimal.log"
+RECOMMENDED_LOG="./recommended.log"
+
+SERVER=/opt/tritonserver/bin/tritonserver
+source ../common/util.sh
+
+RET=0
+
+# Client build requires recent version of CMake (FetchContent required)
+# Using CMAKE installation instruction from:: https://apt.kitware.com/
+apt update -q=2 \
+    && apt install -y gpg wget \
+    && wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - |  tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null \
+    && . /etc/os-release \
+    && echo "deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ $UBUNTU_CODENAME main" | tee /etc/apt/sources.list.d/kitware.list >/dev/null \
+    && apt-get update -q=2 \
+    && apt-get install -y --no-install-recommends cmake=3.27.7* cmake-data=3.27.7* \
+            rapidjson-dev
+cmake --version
+
+rm -fr *.log ./backend
+git clone --single-branch --depth=1 -b $TRITON_BACKEND_REPO_TAG \
+    ${TRITON_REPO_ORGANIZATION}/backend.git
+
+#
+# Minimal backend
+#
+(cd backend/examples/backends/minimal &&
+ mkdir build &&
+ cd build &&
+ cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install \
+       -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} \
+       -DTRITON_BACKEND_REPO_TAG=${TRITON_BACKEND_REPO_TAG} \
+       -DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \
+       -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
+       .. &&
+ make -j4 install)
+
+rm -fr /opt/tritonserver/backends/minimal
+cp -r backend/examples/backends/minimal/build/install/backends/minimal /opt/tritonserver/backends/.
+
+SERVER_LOG="./minimal_server.log"
+SERVER_ARGS="--model-repository=`pwd`/backend/examples/model_repos/minimal_models"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+backend/examples/clients/minimal_client >> ${MINIMAL_LOG} 2>&1
+if [ $? -ne 0 ]; then
+    cat $MINIMAL_LOG
+    RET=1
+fi
+
+grep "OUT0 = \[1 2 3 4\]" $MINIMAL_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed to verify minimal nonbatching example. \n***"
+    cat $MINIMAL_LOG
+    RET=1
+fi
+
+grep "OUT0 = \[\[10 11 12 13\]\]" $MINIMAL_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed to verify minimal batching example. \n***"
+    cat $MINIMAL_LOG
+    RET=1
+fi
+
+grep "OUT0 = \[\[20 21 22 23\]\]" $MINIMAL_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed to verify minimal batching example. \n***"
+    cat $MINIMAL_LOG
+    RET=1
+fi
+
+grep "model batching: requests in batch 2" $SERVER_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed to verify minimal server log. \n***"
+    cat $SERVER_LOG
+    cat $MINIMAL_LOG
+    RET=1
+fi
+
+grep "batched IN0 value: \[ 10, 11, 12, 13, 20, 21, 22, 23 \]" $SERVER_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed to verify minimal server log. \n***"
+    cat $SERVER_LOG
+    cat $MINIMAL_LOG
+    RET=1
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+rm -fr /opt/tritonserver/backends/minimal
+
+#
+# Recommended backend
+#
+(cd backend/examples/backends/recommended &&
+ mkdir build &&
+ cd build &&
+ cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install \
+       -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} \
+       -DTRITON_BACKEND_REPO_TAG=${TRITON_BACKEND_REPO_TAG} \
+       -DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \
+       -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
+       .. &&
+ make -j4 install)
+
+rm -fr /opt/tritonserver/backends/recommended
+cp -r backend/examples/backends/recommended/build/install/backends/recommended /opt/tritonserver/backends/.
+
+SERVER_LOG="./recommended_server.log"
+SERVER_ARGS="--model-repository=`pwd`/backend/examples/model_repos/recommended_models"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+backend/examples/clients/recommended_client >> ${RECOMMENDED_LOG} 2>&1
+if [ $? -ne 0 ]; then
+    cat $RECOMMENDED_LOG
+    RET=1
+fi
+
+grep -z "OUTPUT = \[\[\[1.  1.1 1.2 1.3\].*\[2.  2.1 2.2 2.3\].*\[3.  3.1 3.2 3.3\].*\[4.  4.1 4.2 4.3\]\]\]" $RECOMMENDED_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed to verify recommended example. \n***"
+    cat $RECOMMENDED_LOG
+    RET=1
+fi
+
+grep -z "OUTPUT = \[\[\[10.  10.1 10.2 10.3\].*\[20.  20.1 20.2 20.3\].*\[30.  30.1 30.2 30.3\].*\[40.  40.1 40.2 40.3\]\]\]" $RECOMMENDED_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed to verify recommended example. \n***"
+    cat $RECOMMENDED_LOG
+    RET=1
+fi
+
+grep "model batching: requests in batch 2" $SERVER_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed to verify recommended server log. \n***"
+    cat $SERVER_LOG
+    cat $RECOMMENDED_LOG
+    RET=1
+fi
+
+FOUND_MATCH=0
+grep "batched INPUT value: \[ 1.000000, 1.100000, 1.200000, 1.300000, 2.000000, 2.100000, 2.200000, 2.300000, 3.000000, 3.100000, 3.200000, 3.300000, 4.000000, 4.100000, 4.200000, 4.300000, 10.000000, 10.100000, 10.200000, 10.300000, 20.000000, 20.100000, 20.200001, 20.299999, 30.000000, 30.100000, 30.200001, 30.299999, 40.000000, 40.099998, 40.200001, 40.299999 \]" $SERVER_LOG
+if [ $? -ne 0 ]; then
+    FOUND_MATCH=1
+fi
+grep "batched INPUT value: \[ 10.000000, 10.100000, 10.200000, 10.300000, 20.000000, 20.100000, 20.200001, 20.299999, 30.000000, 30.100000, 30.200001, 30.299999, 40.000000, 40.099998, 40.200001, 40.299999, 1.000000, 1.100000, 1.200000, 1.300000, 2.000000, 2.100000, 2.200000, 2.300000, 3.000000, 3.100000, 3.200000, 3.300000, 4.000000, 4.100000, 4.200000, 4.300000 \]" $SERVER_LOG
+if [ $? -ne 0 ]; then
+    FOUND_MATCH=1
+fi
+if [ $FOUND_MATCH -eq 0 ]; then
+    echo -e "\n***\n*** Failed to verify recommended server log. \n***"
+    cat $SERVER_LOG
+    cat $RECOMMENDED_LOG
+    RET=1
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+rm -fr /opt/tritonserver/backends/recommended
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_batch_custom/batch_custom_test.py b/qa/L0_batch_custom/batch_custom_test.py
new file mode 100755
index 0000000000..6cd6346ad3
--- /dev/null
+++ b/qa/L0_batch_custom/batch_custom_test.py
@@ -0,0 +1,273 @@
+#!/usr/bin/env python3
+
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import os
+import threading
+import time
+import unittest
+from builtins import range
+from collections.abc import Iterable
+
+import infer_util as iu
+import numpy as np
+import test_util as tu
+import tritonclient.grpc as grpcclient
+
+# By default, find tritonserver on "localhost", but can be overridden
+# with TRITONSERVER_IPADDR envvar
+_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")
+
+_deferred_exceptions_lock = threading.Lock()
+_deferred_exceptions = []
+
+
+class BatcherTest(tu.TestResultCollector):
+    def setUp(self):
+        # The helper client for setup will be GRPC for simplicity.
+        self.triton_client_ = grpcclient.InferenceServerClient(
+            f"{_tritonserver_ipaddr}:8001"
+        )
+        self.precreated_shm_regions_ = []
+        global _deferred_exceptions
+        _deferred_exceptions = []
+
+    def tearDown(self):
+        super().tearDown()
+
+    def add_deferred_exception(self, ex):
+        global _deferred_exceptions
+        with _deferred_exceptions_lock:
+            _deferred_exceptions.append(ex)
+
+    def check_deferred_exception(self):
+        # Just raise one of the exceptions...
+        with _deferred_exceptions_lock:
+            if len(_deferred_exceptions) > 0:
+                raise _deferred_exceptions[0]
+
+    def check_response(
+        self,
+        trial,
+        bs,
+        thresholds,
+        requested_outputs=("OUTPUT0", "OUTPUT1"),
+        input_size=16,
+        shm_region_names=None,
+        precreated_shm_regions=None,
+    ):
+        try:
+            start_ms = int(round(time.time() * 1000))
+
+            if (
+                trial == "savedmodel"
+                or trial == "graphdef"
+                or trial == "libtorch"
+                or trial == "onnx"
+                or trial == "plan"
+                or trial == "python"
+            ):
+                tensor_shape = (bs, input_size)
+                iu.infer_exact(
+                    self,
+                    trial,
+                    tensor_shape,
+                    bs,
+                    np.float32,
+                    np.float32,
+                    np.float32,
+                    swap=False,
+                    model_version=1,
+                    outputs=requested_outputs,
+                    use_http=False,
+                    use_grpc=False,
+                    use_http_json_tensors=False,
+                    skip_request_id_check=True,
+                    use_streaming=False,
+                )
+            else:
+                self.assertFalse(True, "unknown trial type: " + trial)
+
+            end_ms = int(round(time.time() * 1000))
+
+            lt_ms = thresholds[0]
+            gt_ms = thresholds[1]
+            if lt_ms is not None:
+                self.assertTrue(
+                    (end_ms - start_ms) < lt_ms,
+                    "expected less than "
+                    + str(lt_ms)
+                    + "ms response time, got "
+                    + str(end_ms - start_ms)
+                    + " ms",
+                )
+            if gt_ms is not None:
+                self.assertTrue(
+                    (end_ms - start_ms) > gt_ms,
+                    "expected greater than "
+                    + str(gt_ms)
+                    + "ms response time, got "
+                    + str(end_ms - start_ms)
+                    + " ms",
+                )
+        except Exception as ex:
+            self.add_deferred_exception(ex)
+
+    def check_status(self, model_name, batch_exec, request_cnt, infer_cnt, exec_count):
+        # There is a time window between when responses are returned and statistics are updated.
+        # To prevent intermittent test failure during that window, wait up to 10 seconds for the
+        # inference statistics to be ready.
+        num_tries = 10
+        for i in range(num_tries):
+            stats = self.triton_client_.get_inference_statistics(model_name, "1")
+            self.assertEqual(len(stats.model_stats), 1, "expect 1 model stats")
+            actual_exec_cnt = stats.model_stats[0].execution_count
+            if actual_exec_cnt == exec_count:
+                break
+            print(
+                "WARNING: expect {} executions, got {} (attempt {})".format(
+                    exec_count, actual_exec_cnt, i
+                )
+            )
+            time.sleep(1)
+
+        self.assertEqual(
+            stats.model_stats[0].name,
+            model_name,
+            "expect model stats for model {}".format(model_name),
+        )
+        self.assertEqual(
+            stats.model_stats[0].version,
+            "1",
+            "expect model stats for model {} version 1".format(model_name),
+        )
+
+        if batch_exec:
+            batch_stats = stats.model_stats[0].batch_stats
+            self.assertEqual(
+                len(batch_stats),
+                len(batch_exec),
+                "expected {} different batch-sizes, got {}".format(
+                    len(batch_exec), len(batch_stats)
+                ),
+            )
+
+            for batch_stat in batch_stats:
+                bs = batch_stat.batch_size
+                bc = batch_stat.compute_infer.count
+                self.assertTrue(bs in batch_exec, "unexpected batch-size {}".format(bs))
+                # Get count from one of the stats
+                self.assertEqual(
+                    bc,
+                    batch_exec[bs],
+                    "expected model-execution-count {} for batch size {}, got {}".format(
+                        batch_exec[bs], bs, bc
+                    ),
+                )
+
+        actual_request_cnt = stats.model_stats[0].inference_stats.success.count
+        self.assertEqual(
+            actual_request_cnt,
+            request_cnt,
+            "expected model-request-count {}, got {}".format(
+                request_cnt, actual_request_cnt
+            ),
+        )
+
+        actual_exec_cnt = stats.model_stats[0].execution_count
+        if isinstance(exec_count, Iterable):
+            self.assertIn(
+                actual_exec_cnt,
+                exec_count,
+                "expected model-exec-count {}, got {}".format(
+                    exec_count, actual_exec_cnt
+                ),
+            )
+        else:
+            self.assertEqual(
+                actual_exec_cnt,
+                exec_count,
+                "expected model-exec-count {}, got {}".format(
+                    exec_count, actual_exec_cnt
+                ),
+            )
+        actual_infer_cnt = stats.model_stats[0].inference_count
+        self.assertEqual(
+            actual_infer_cnt,
+            infer_cnt,
+            "expected model-inference-count {}, got {}".format(
+                infer_cnt, actual_infer_cnt
+            ),
+        )
+
+    def test_volume_batching(self):
+        # Send 12 requests with batch size 1. The max_queue_delay is set
+        # to non-zero. Depending upon the timing of the requests arrival
+        # there can be either 4-6 model executions.
+        model_base = "onnx"
+        dtype = np.float16
+        shapes = (
+            [
+                1,
+                4,
+                4,
+            ],
+        )
+
+        try:
+            # use threads to send 12 requests without waiting for response
+            threads = []
+            for i in range(12):
+                threads.append(
+                    threading.Thread(
+                        target=iu.infer_zero,
+                        args=(self, model_base, 1, dtype, shapes, shapes),
+                        kwargs={
+                            "use_http": True,
+                            "use_grpc": False,
+                            "use_http_json_tensors": False,
+                            "use_streaming": False,
+                        },
+                    )
+                )
+            for t in threads:
+                t.start()
+            for t in threads:
+                t.join()
+            self.check_deferred_exception()
+            model_name = tu.get_zero_model_name(model_base, len(shapes), dtype)
+            self.check_status(model_name, None, 12, 12, (4, 5, 6))
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_batch_custom/test.sh b/qa/L0_batch_custom/test.sh
new file mode 100755
index 0000000000..96dba468f8
--- /dev/null
+++ b/qa/L0_batch_custom/test.sh
@@ -0,0 +1,196 @@
+#!/bin/bash
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+## This test tests the ability to use custom batching strategies with models.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+BATCH_CUSTOM_TEST=batch_custom_test.py
+CLIENT_LOG_BASE="./client.log"
+DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository
+EXPECTED_NUM_TESTS="1"
+MODEL_NAME="onnx_zero_1_float16"
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=models --log-verbose 1"
+SERVER_LOG_BASE="./inference_server.log"
+TEST_RESULT_FILE='test_results.txt'
+TRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION:="http://github.com/triton-inference-server"}
+TRITON_BACKEND_REPO_TAG=${TRITON_BACKEND_REPO_TAG:="main"}
+TRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG:="main"}
+
+source ../common/util.sh
+RET=0
+
+# Batch strategy build requires recent version of CMake (FetchContent required)
+# Using CMAKE installation instruction from:: https://apt.kitware.com/
+apt update -q=2 \
+    && apt install -y gpg wget \
+    && wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - |  tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null \
+    && . /etc/os-release \
+    && echo "deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ $UBUNTU_CODENAME main" | tee /etc/apt/sources.list.d/kitware.list >/dev/null \
+    && apt-get update -q=2 \
+    && apt-get install -y --no-install-recommends cmake=3.27.7* cmake-data=3.27.7* rapidjson-dev
+cmake --version
+
+# Set up repository
+rm -fr *.log* ./backend
+rm -fr models && mkdir models
+cp -r $DATADIR/$MODEL_NAME models
+
+CONFIG_PATH="models/${MODEL_NAME}/config.pbtxt"
+echo "dynamic_batching { max_queue_delay_microseconds: 10000}" >> ${CONFIG_PATH}
+echo "instance_group [ { kind: KIND_GPU count: 2 }]" >> ${CONFIG_PATH}
+echo "parameters { key: \"MAX_BATCH_VOLUME_BYTES\" value: {string_value: \"96\"}}" >> ${CONFIG_PATH}
+
+# Create custom batching libraries
+git clone --single-branch --depth=1 -b $TRITON_BACKEND_REPO_TAG \
+    ${TRITON_REPO_ORGANIZATION}/backend.git
+
+(cd backend/examples/batching_strategies/volume_batching &&
+ mkdir build &&
+ cd build &&
+ cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install \
+      -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} \
+      -DTRITON_CORE_REPO_TAG=$TRITON_CORE_REPO_TAG .. &&
+ make -j4 install)
+
+ (cd backend/examples/batching_strategies/single_batching &&
+ mkdir build &&
+ cd build &&
+ cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install \
+       -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} \
+       -DTRITON_CORE_REPO_TAG=$TRITON_CORE_REPO_TAG .. &&
+ make -j4 install)
+
+cp -r backend/examples/batching_strategies/volume_batching/build/libtriton_volumebatching.so models
+cp -r backend/examples/batching_strategies/single_batching/build/libtriton_singlebatching.so models
+
+# Run a test to validate the single batching strategy example.
+# Then, run tests to validate the volume batching example being passed in via the backend dir, model dir, version dir, and model config.
+BACKEND_DIR="/opt/tritonserver/backends/onnxruntime"
+MODEL_DIR="models/$MODEL_NAME"
+VERSION_DIR="$MODEL_DIR/1/"
+
+test_types=('single_batching_backend' 'backend_directory' 'model_directory' 'version_directory' 'model_config')
+test_setups=("cp models/libtriton_singlebatching.so ${BACKEND_DIR}/batchstrategy.so && sed -i \"s/(4, 5, 6))/(12))/\" ${BATCH_CUSTOM_TEST}"
+    "cp models/libtriton_volumebatching.so ${BACKEND_DIR}/batchstrategy.so && sed -i \"s/(12))/(4, 5, 6))/\" ${BATCH_CUSTOM_TEST}"
+    "mv ${BACKEND_DIR}/batchstrategy.so ${MODEL_DIR} && cp models/libtriton_singlebatching.so ${BACKEND_DIR}"
+    "mv ${MODEL_DIR}/batchstrategy.so ${VERSION_DIR}/batchstrategy.so"
+    "mv ${VERSION_DIR}/batchstrategy.so models/${MODEL_NAME}/libtriton_volumebatching.so && echo \"parameters: {key: \\\"TRITON_BATCH_STRATEGY_PATH\\\", value: {string_value: \\\"${MODEL_DIR}/libtriton_volumebatching.so\\\"}}\" >> ${CONFIG_PATH}")
+
+for i in "${!test_setups[@]}"; do
+    echo "Running ${test_types[$i]} test"
+    eval ${test_setups[$i]}
+
+    SERVER_LOG=${SERVER_LOG_BASE}_${test_types[$i]}
+    CLIENT_LOG=${CLIENT_LOG_BASE}_${test_types[$i]}
+
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+    if [ `grep -c "Loading custom batching strategy" $SERVER_LOG` != "1" ]; then
+        cat $SERVER_LOG
+        echo -e "\n***\n*** Failed to load custom batching strategy.***"
+        RET=1
+    else
+        set +e
+        python $BATCH_CUSTOM_TEST >$CLIENT_LOG 2>&1
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** ${test_types[$i]} Test Failed\n***"
+            RET=1
+        else
+            check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+            if [ $? -ne 0 ]; then
+                cat $CLIENT_LOG
+                echo -e "\n***\n*** ${test_types[$i]} Test Result Verification Failed\n***"
+                RET=1
+            fi
+        fi
+        set -e
+    fi
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+done
+
+# Test ModelBatchInitialize failure
+FILE_PATH="backend/examples/batching_strategies/volume_batching/src/volume_batching.cc"
+OLD_STRING="\/\/ Batcher will point to an unsigned integer representing the maximum"
+NEW_STRING="return TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_NOT_FOUND,\"Failure test case\");"
+
+sed -i "s/${OLD_STRING}/${NEW_STRING}/g" ${FILE_PATH}
+
+(cd backend/examples/batching_strategies/volume_batching &&
+ cd build &&
+ cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install \
+      -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} \
+      -DTRITON_CORE_REPO_TAG=$TRITON_CORE_REPO_TAG .. &&
+ make -j4 install)
+
+cp -r backend/examples/batching_strategies/volume_batching/build/libtriton_volumebatching.so models/${MODEL_NAME}/libtriton_volumebatching.so
+
+SERVER_LOG=${SERVER_LOG_BASE}_batching_init_failure
+
+run_server
+if [ "$SERVER_PID" != "0" ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** ModelBatchInit Error Test: unexpected successful server start $SERVER\n***"
+    kill_server
+    RET=1
+else
+    if [ `grep -c "Failure test case" $SERVER_LOG` -lt 1 ] || [ `grep -c "Not found" $SERVER_LOG` -lt 1 ]; then
+        cat $SERVER_LOG
+        echo -e "\n***\n*** ModelBatchInit Error Test: failed to find \"Failure test case\" message and/or \"Not found\" error type"
+        RET=1
+    fi
+fi
+
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_batch_input/batch_input_test.py b/qa/L0_batch_input/batch_input_test.py
new file mode 100755
index 0000000000..02de27d921
--- /dev/null
+++ b/qa/L0_batch_input/batch_input_test.py
@@ -0,0 +1,398 @@
+#!/usr/bin/env python3
+
+# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import queue
+import unittest
+from functools import partial
+
+import numpy as np
+import test_util as tu
+import tritonclient.grpc as grpcclient
+from tritonclient.utils import InferenceServerException
+
+
+class BatchInputTest(tu.TestResultCollector):
+    def setUp(self):
+        self.client = grpcclient.InferenceServerClient(url="localhost:8001")
+
+        def callback(user_data, result, error):
+            if error:
+                user_data.put(error)
+            else:
+                user_data.put(result)
+
+        self.client_callback = callback
+
+    def set_inputs(self, shapes, input_name):
+        self.dtype_ = np.float32
+        self.inputs = []
+        for shape in shapes:
+            self.inputs.append(
+                [grpcclient.InferInput(input_name, [1, shape[0]], "FP32")]
+            )
+            self.inputs[-1][0].set_data_from_numpy(
+                np.full([1, shape[0]], shape[0], np.float32)
+            )
+
+    def set_inputs_for_batch_item(self, shapes, input_name):
+        self.dtype_ = np.float32
+        self.inputs = []
+        for shape in shapes:
+            self.inputs.append([grpcclient.InferInput(input_name, shape, "FP32")])
+            self.inputs[-1][0].set_data_from_numpy(np.full(shape, shape[0], np.float32))
+
+    def test_ragged_output(self):
+        model_name = "ragged_io"
+        # The model is an identity model
+        self.set_inputs([[2], [4], [1], [3]], "INPUT0")
+        user_data = queue.Queue()
+        self.client.start_stream(callback=partial(self.client_callback, user_data))
+
+        output_name = "OUTPUT0"
+        outputs = [grpcclient.InferRequestedOutput(output_name)]
+
+        async_requests = []
+        try:
+            for input in self.inputs:
+                # Asynchronous inference call.
+                async_requests.append(
+                    self.client.async_stream_infer(
+                        model_name=model_name, inputs=input, outputs=outputs
+                    )
+                )
+
+            expected_value_list = [[v] * v for v in [2, 4, 1, 3]]
+            expected_value_list = [
+                np.asarray([expected_value], dtype=np.float32)
+                for expected_value in expected_value_list
+            ]
+            for idx in range(len(async_requests)):
+                # Get the result from the initiated asynchronous inference request.
+                # Note the call will block till the server responds.
+                result = user_data.get()
+
+                # Validate the results by comparing with precomputed values.
+                output_data = result.as_numpy(output_name)
+                self.assertTrue(
+                    np.array_equal(output_data, expected_value_list[idx]),
+                    "Expect response {} to have value {}, got {}".format(
+                        idx, expected_value_list[idx], output_data
+                    ),
+                )
+        except InferenceServerException as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+        self.client.stop_stream()
+
+    def test_ragged_input(self):
+        model_name = "ragged_acc_shape"
+        self.set_inputs([[2], [4], [1], [3]], "RAGGED_INPUT")
+        user_data = queue.Queue()
+        self.client.start_stream(callback=partial(self.client_callback, user_data))
+
+        output_name = "RAGGED_OUTPUT"
+        outputs = [grpcclient.InferRequestedOutput(output_name)]
+        async_requests = []
+        try:
+            for input in self.inputs:
+                # Asynchronous inference call.
+                async_requests.append(
+                    self.client.async_stream_infer(
+                        model_name=model_name, inputs=input, outputs=outputs
+                    )
+                )
+
+            value_lists = [[v] * v for v in [2, 4, 1, 3]]
+            expected_value = []
+            for value_list in value_lists:
+                expected_value += value_list
+            expected_value = np.asarray([expected_value], dtype=np.float32)
+            for idx in range(len(async_requests)):
+                # Get the result from the initiated asynchronous inference request.
+                # Note the call will block till the server responds.
+                result = user_data.get()
+                # Validate the results by comparing with precomputed values.
+                output_data = result.as_numpy(output_name)
+                self.assertTrue(
+                    np.array_equal(output_data, expected_value),
+                    "Expect response {} to have value {}, got {}".format(
+                        idx, expected_value, output_data
+                    ),
+                )
+        except InferenceServerException as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+        self.client.stop_stream()
+
+    def test_element_count(self):
+        model_name = "ragged_element_count_acc_zero"
+        self.set_inputs([[2], [4], [1], [3]], "RAGGED_INPUT")
+        user_data = queue.Queue()
+        self.client.start_stream(callback=partial(self.client_callback, user_data))
+
+        output_name = "BATCH_AND_SIZE_OUTPUT"
+        outputs = [grpcclient.InferRequestedOutput(output_name)]
+
+        async_requests = []
+        try:
+            for input in self.inputs:
+                # Asynchronous inference call.
+                async_requests.append(
+                    self.client.async_stream_infer(
+                        model_name=model_name, inputs=input, outputs=outputs
+                    )
+                )
+
+            expected_value = np.asarray([[2, 4, 1, 3]], np.float32)
+            for idx in range(len(async_requests)):
+                # Get the result from the initiated asynchronous inference request.
+                # Note the call will block till the server responds.
+                result = user_data.get()
+
+                # Validate the results by comparing with precomputed values.
+                output_data = result.as_numpy(output_name)
+                self.assertTrue(
+                    np.array_equal(output_data, expected_value),
+                    "Expect response {} to have value {}, got {}".format(
+                        idx, expected_value, output_data
+                    ),
+                )
+        except InferenceServerException as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+        self.client.stop_stream()
+
+    def test_accumulated_element_count(self):
+        model_name = "ragged_acc_shape"
+        self.set_inputs([[2], [4], [1], [3]], "RAGGED_INPUT")
+        user_data = queue.Queue()
+        self.client.start_stream(callback=partial(self.client_callback, user_data))
+
+        output_name = "BATCH_AND_SIZE_OUTPUT"
+        outputs = [grpcclient.InferRequestedOutput(output_name)]
+
+        async_requests = []
+        try:
+            for input in self.inputs:
+                # Asynchronous inference call.
+                async_requests.append(
+                    self.client.async_stream_infer(
+                        model_name=model_name, inputs=input, outputs=outputs
+                    )
+                )
+
+            expected_value = np.asarray([[2, 6, 7, 10]], np.float32)
+            for idx in range(len(async_requests)):
+                # Get the result from the initiated asynchronous inference request.
+                # Note the call will block till the server responds.
+                result = user_data.get()
+
+                # Validate the results by comparing with precomputed values.
+                output_data = result.as_numpy(output_name)
+                self.assertTrue(
+                    np.array_equal(output_data, expected_value),
+                    "Expect response {} to have value {}, got {}".format(
+                        idx, expected_value, output_data
+                    ),
+                )
+        except InferenceServerException as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+        self.client.stop_stream()
+
+    def test_accumulated_element_count_with_zero(self):
+        model_name = "ragged_element_count_acc_zero"
+        self.set_inputs([[2], [4], [1], [3]], "RAGGED_INPUT")
+        user_data = queue.Queue()
+        self.client.start_stream(callback=partial(self.client_callback, user_data))
+
+        output_name = "BATCH_OUTPUT"
+        outputs = [grpcclient.InferRequestedOutput(output_name)]
+
+        async_requests = []
+        try:
+            for input in self.inputs:
+                # Asynchronous inference call.
+                async_requests.append(
+                    self.client.async_stream_infer(
+                        model_name=model_name, inputs=input, outputs=outputs
+                    )
+                )
+
+            expected_value = np.asarray([[0, 2, 6, 7, 10]], np.float32)
+            for idx in range(len(async_requests)):
+                # Get the result from the initiated asynchronous inference request.
+                # Note the call will block till the server responds.
+                result = user_data.get()
+
+                # Validate the results by comparing with precomputed values.
+                output_data = result.as_numpy(output_name)
+                self.assertTrue(
+                    np.array_equal(output_data, expected_value),
+                    "Expect response {} to have value {}, got {}".format(
+                        idx, expected_value, output_data
+                    ),
+                )
+        except InferenceServerException as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+        self.client.stop_stream()
+
+    def test_max_element_count_as_shape(self):
+        model_name = "ragged_acc_shape"
+        self.set_inputs([[2], [4], [1], [3]], "RAGGED_INPUT")
+        user_data = queue.Queue()
+        self.client.start_stream(callback=partial(self.client_callback, user_data))
+
+        output_name = "BATCH_OUTPUT"
+        outputs = [grpcclient.InferRequestedOutput(output_name)]
+
+        async_requests = []
+        try:
+            for input in self.inputs:
+                # Asynchronous inference call.
+                async_requests.append(
+                    self.client.async_stream_infer(
+                        model_name=model_name, inputs=input, outputs=outputs
+                    )
+                )
+
+            for idx in range(len(async_requests)):
+                # Get the result from the initiated asynchronous inference request.
+                # Note the call will block till the server responds.
+                result = user_data.get()
+
+                # Validate the results by comparing with precomputed values.
+                output_data = result.as_numpy(output_name)
+                self.assertEqual(
+                    output_data.shape,
+                    (1, 4),
+                    "Expect response {} to have shape to represent max element count {} among the batch , got {}".format(
+                        idx, 4, output_data.shape
+                    ),
+                )
+        except InferenceServerException as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+        self.client.stop_stream()
+
+    def test_batch_item_shape_flatten(self):
+        # Use 4 set of inputs with shape
+        # [1, 4, 1], [1, 1, 2], [1, 1, 2], [1, 2, 2]
+        # Note that the test only checks the formation of "BATCH_INPUT" where
+        # the value of "RAGGED_INPUT" is irrelevant, only the shape matters
+        self.set_inputs_for_batch_item(
+            [[1, 4, 1], [1, 1, 2], [1, 1, 2], [1, 2, 2]], "RAGGED_INPUT"
+        )
+
+        model_name = "batch_item_flatten"
+        user_data = queue.Queue()
+        self.client.start_stream(callback=partial(self.client_callback, user_data))
+
+        output_name = "BATCH_OUTPUT"
+        outputs = [grpcclient.InferRequestedOutput(output_name)]
+
+        async_requests = []
+        try:
+            for input in self.inputs:
+                # Asynchronous inference call.
+                async_requests.append(
+                    self.client.async_stream_infer(
+                        model_name=model_name, inputs=input, outputs=outputs
+                    )
+                )
+
+            expected_value = np.asarray([[4, 1, 1, 2, 1, 2, 2, 2]], np.float32)
+            for idx in range(len(async_requests)):
+                # Get the result from the initiated asynchronous inference request.
+                # Note the call will block till the server responds.
+                result = user_data.get()
+
+                # Validate the results by comparing with precomputed values.
+                output_data = result.as_numpy(output_name)
+                self.assertTrue(
+                    np.array_equal(output_data, expected_value),
+                    "Expect response {} to have value {}, got {}".format(
+                        idx, expected_value, output_data
+                    ),
+                )
+        except InferenceServerException as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+        self.client.stop_stream()
+
+    def test_batch_item_shape(self):
+        # Use 3 set of inputs with shape [2, 1, 2], [1, 1, 2], [1, 2, 2]
+        # Note that the test only checks the formation of "BATCH_INPUT" where
+        # the value of "RAGGED_INPUT" is irrelevant, only the shape matters
+        self.set_inputs_for_batch_item(
+            [[2, 1, 2], [1, 1, 2], [1, 2, 2]], "RAGGED_INPUT"
+        )
+
+        expected_outputs = [
+            np.array([[1.0, 2.0], [1.0, 2.0]]),
+            np.array([[1.0, 2.0]]),
+            np.array([[2.0, 2.0]]),
+        ]
+
+        model_name = "batch_item"
+        user_data = queue.Queue()
+        self.client.start_stream(callback=partial(self.client_callback, user_data))
+
+        output_name = "BATCH_OUTPUT"
+        outputs = [grpcclient.InferRequestedOutput(output_name)]
+
+        async_requests = []
+        try:
+            for input in self.inputs:
+                # Asynchronous inference call.
+                async_requests.append(
+                    self.client.async_stream_infer(
+                        model_name=model_name, inputs=input, outputs=outputs
+                    )
+                )
+
+            for idx in range(len(async_requests)):
+                # Get the result from the initiated asynchronous inference request.
+                # Note the call will block till the server responds.
+                result = user_data.get()
+
+                # Validate the results by comparing with precomputed values.
+                output_data = result.as_numpy(output_name)
+                self.assertTrue(
+                    np.allclose(output_data, expected_outputs[idx]),
+                    "Expect response to have value:\n{}, got:\n{}\nEqual matrix:\n{}".format(
+                        expected_outputs[idx],
+                        output_data,
+                        np.isclose(expected_outputs[idx], output_data),
+                    ),
+                )
+        except InferenceServerException as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+        self.client.stop_stream()
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_batch_input/test.sh b/qa/L0_batch_input/test.sh
new file mode 100755
index 0000000000..e780516ec4
--- /dev/null
+++ b/qa/L0_batch_input/test.sh
@@ -0,0 +1,130 @@
+#!/bin/bash
+# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+CLIENT_LOG="./client.log"
+BATCH_INPUT_TEST=batch_input_test.py
+EXPECTED_NUM_TESTS="8"
+
+DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_ragged_model_repository
+IDENTITY_DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository
+
+TEST_RESULT_FILE='test_results.txt'
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=models --exit-timeout-secs=120"
+SERVER_LOG="./inference_server.log"
+source ../common/util.sh
+
+# If BACKENDS not specified, set to all
+BACKENDS=${BACKENDS:="onnx savedmodel plan libtorch"}
+
+rm -f $SERVER_LOG $CLIENT_LOG
+
+RET=0
+for BACKEND in $BACKENDS; do
+    rm -rf models && mkdir models
+    cp -r $DATADIR/${BACKEND}_batch_input models/ragged_element_count_acc_zero
+    (cd models/ragged_element_count_acc_zero && \
+          sed -i "s/${BACKEND}_batch_input/ragged_element_count_acc_zero/" config.pbtxt)
+    cp -r $DATADIR/${BACKEND}_batch_input models/ragged_acc_shape
+    (cd models/ragged_acc_shape && \
+          sed -i "s/${BACKEND}_batch_input/ragged_acc_shape/" config.pbtxt && \
+          sed -i "s/BATCH_ELEMENT_COUNT/BATCH_ACCUMULATED_ELEMENT_COUNT/" config.pbtxt && \
+          sed -i "s/BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO/BATCH_MAX_ELEMENT_COUNT_AS_SHAPE/" config.pbtxt)
+    cp -r $DATADIR/${BACKEND}_batch_input models/batch_item_flatten
+    (cd models/batch_item_flatten && \
+          sed -i "s/${BACKEND}_batch_input/batch_item_flatten/" config.pbtxt && \
+          sed -i "0,/-1/{s/-1/-1, -1/}" config.pbtxt && \
+          sed -i "s/BATCH_ELEMENT_COUNT/BATCH_ACCUMULATED_ELEMENT_COUNT/" config.pbtxt && \
+          sed -i "s/BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO/BATCH_ITEM_SHAPE_FLATTEN/" config.pbtxt)
+    cp -r $DATADIR/${BACKEND}_batch_item models/batch_item
+    (cd models/batch_item && \
+          sed -i "s/${BACKEND}_batch_item/batch_item/" config.pbtxt)
+    # Use nobatch model to showcase ragged input, identity model to verify
+    # batch input is generated properly
+    cp -r $IDENTITY_DATADIR/${BACKEND}_nobatch_zero_1_float32 models/ragged_io
+    (cd models/ragged_io && \
+          # In case of libtorch, update I/O names
+          sed -i "s/__0/0/" config.pbtxt && \
+          sed -i "s/${BACKEND}_nobatch_zero_1_float32/ragged_io/" config.pbtxt && \
+          sed -i "s/^max_batch_size:.*/max_batch_size: 4/" config.pbtxt && \
+          sed -i "s/name: \"INPUT0\"/name: \"INPUT0\"\\nallow_ragged_batch: true/" config.pbtxt && \
+          echo "batch_output [{target_name: \"OUTPUT0\" \
+                                 kind: BATCH_SCATTER_WITH_INPUT_SHAPE \
+                                 source_input: \"INPUT0\" }] \
+                dynamic_batching { max_queue_delay_microseconds: 1000000 }" >> config.pbtxt)
+
+
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    set +e
+    python3 $BATCH_INPUT_TEST >$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    else
+        check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Result Verification Failed\n***"
+            RET=1
+        fi
+    fi
+    set -e
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+done
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_batcher/batcher_test.py b/qa/L0_batcher/batcher_test.py
old mode 100644
new mode 100755
index a2328d7443..38e208c21e
--- a/qa/L0_batcher/batcher_test.py
+++ b/qa/L0_batcher/batcher_test.py
@@ -1,4 +1,6 @@
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+#!/usr/bin/env python3
+
+# Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -25,179 +27,487 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import sys
+
 sys.path.append("../common")
 
-from builtins import range
-from future.utils import iteritems
 import os
-import time
 import threading
-import traceback
+import time
 import unittest
-import numpy as np
+from builtins import range
+
 import infer_util as iu
+import numpy as np
 import test_util as tu
-from tensorrtserver.api import *
-import tensorrtserver.api.server_status_pb2 as server_status
+import tritonclient.grpc as grpcclient
+
+# By default, find tritonserver on "localhost", but can be overridden
+# with TRITONSERVER_IPADDR envvar
+_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")
+
+TEST_SYSTEM_SHARED_MEMORY = bool(int(os.environ.get("TEST_SYSTEM_SHARED_MEMORY", 0)))
+TEST_CUDA_SHARED_MEMORY = bool(int(os.environ.get("TEST_CUDA_SHARED_MEMORY", 0)))
+
+if TEST_SYSTEM_SHARED_MEMORY:
+    import tritonclient.utils.shared_memory as shm
+if TEST_CUDA_SHARED_MEMORY:
+    import tritonclient.utils.cuda_shared_memory as cudashm
+
+# Test with either GRPC of HTTP, but not both since when we check
+# results we expect only one to run
+USE_GRPC = os.environ.get("USE_GRPC", 1) != "0"
+USE_HTTP = os.environ.get("USE_HTTP", 1) != "0"
+if USE_GRPC and USE_HTTP:
+    USE_GRPC = False
+assert USE_GRPC or USE_HTTP, "USE_GRPC or USE_HTTP must be non-zero"
+
+BACKENDS = os.environ.get("BACKENDS", "graphdef savedmodel onnx libtorch plan python")
 
-_trials = ("graphdef", "plan", "netdef")
-_max_queue_delay = 10000
-_check_exception = None
+_trials = BACKENDS.split(" ")
 
-class BatcherTest(unittest.TestCase):
+_ragged_batch_supported_trials = ["custom"]
+if "plan" in _trials:
+    _ragged_batch_supported_trials.append("plan")
+if "onnx" in _trials:
+    _ragged_batch_supported_trials.append("onnx")
+if "libtorch" in _trials:
+    _ragged_batch_supported_trials.append("libtorch")
+
+_max_queue_delay_ms = 10000
+
+_deferred_exceptions_lock = threading.Lock()
+_deferred_exceptions = []
+
+
+class BatcherTest(tu.TestResultCollector):
     def setUp(self):
-        global _check_exception
-        _check_exception = None
+        # The helper client for setup will be GRPC for simplicity.
+        self.triton_client_ = grpcclient.InferenceServerClient(
+            f"{_tritonserver_ipaddr}:8001"
+        )
+        self.precreated_shm_regions_ = []
+        global _deferred_exceptions
+        _deferred_exceptions = []
+
+    def tearDown(self):
+        if TEST_SYSTEM_SHARED_MEMORY:
+            self.triton_client_.unregister_system_shared_memory()
+        if TEST_CUDA_SHARED_MEMORY:
+            self.triton_client_.unregister_cuda_shared_memory()
+        for precreated_shm_region in self.precreated_shm_regions_:
+            if TEST_SYSTEM_SHARED_MEMORY:
+                shm.destroy_shared_memory_region(precreated_shm_region)
+            elif TEST_CUDA_SHARED_MEMORY:
+                cudashm.destroy_shared_memory_region(precreated_shm_region)
+        super().tearDown()
+
+    # FIXME why only used for outputs
+    def create_advance(self, shm_regions=None):
+        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
+            precreated_shm_regions = []
+            if shm_regions is None:
+                shm_regions = ["output0", "output1"]
+            for shm_region in shm_regions:
+                if TEST_SYSTEM_SHARED_MEMORY:
+                    shm_handle = shm.create_shared_memory_region(
+                        shm_region + "_data", "/" + shm_region, 512
+                    )
+                    self.triton_client_.register_system_shared_memory(
+                        shm_region + "_data", "/" + shm_region, 512
+                    )
+                else:
+                    shm_handle = cudashm.create_shared_memory_region(
+                        shm_region + "_data", 512, 0
+                    )
+                    self.triton_client_.register_cuda_shared_memory(
+                        shm_region + "_data", cudashm.get_raw_handle(shm_handle), 0, 512
+                    )
+                # Collect precreated handles for cleanup
+                self.precreated_shm_regions_.append(shm_handle)
+                precreated_shm_regions.append(shm_handle)
+            return precreated_shm_regions
+        return []
+
+    def add_deferred_exception(self, ex):
+        global _deferred_exceptions
+        with _deferred_exceptions_lock:
+            _deferred_exceptions.append(ex)
 
     def check_deferred_exception(self):
-        if _check_exception is not None:
-            raise _check_exception
+        # Just raise one of the exceptions...
+        with _deferred_exceptions_lock:
+            if len(_deferred_exceptions) > 0:
+                raise _deferred_exceptions[0]
 
-    def check_response(self, trial, bs, less_than, threshold_ms,
-                       requested_outputs=("OUTPUT0", "OUTPUT1")):
-        global _check_exception
+    def check_response(
+        self,
+        trial,
+        bs,
+        thresholds,
+        requested_outputs=("OUTPUT0", "OUTPUT1"),
+        input_size=16,
+        shm_region_names=None,
+        precreated_shm_regions=None,
+    ):
         try:
-            input_size = 16
-
             start_ms = int(round(time.time() * 1000))
 
-            if trial == "graphdef" or trial == "netdef":
-                tensor_shape = (input_size,)
-                iu.infer_exact(self, trial, tensor_shape, bs, True,
-                               np.float32, np.float32, np.float32, swap=True,
-                               outputs=requested_outputs,
-                               use_grpc=False, skip_request_id_check=True)
-            elif trial == "plan":
-                tensor_shape = (input_size,1,1)
-                iu.infer_exact(self, trial, tensor_shape, bs, True,
-                               np.float32, np.float32, np.float32, swap=True,
-                               outputs=requested_outputs,
-                               use_grpc=False, skip_request_id_check=True)
+            if (
+                trial == "savedmodel"
+                or trial == "graphdef"
+                or trial == "libtorch"
+                or trial == "onnx"
+                or trial == "plan"
+                or trial == "python"
+            ):
+                tensor_shape = (bs, input_size)
+                iu.infer_exact(
+                    self,
+                    trial,
+                    tensor_shape,
+                    bs,
+                    np.float32,
+                    np.float32,
+                    np.float32,
+                    swap=False,
+                    model_version=1,
+                    outputs=requested_outputs,
+                    use_http_json_tensors=False,
+                    use_grpc=USE_GRPC,
+                    use_http=USE_HTTP,
+                    skip_request_id_check=True,
+                    use_streaming=False,
+                    shm_region_names=shm_region_names,
+                    precreated_shm_regions=precreated_shm_regions,
+                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+                )
             else:
                 self.assertFalse(True, "unknown trial type: " + trial)
 
             end_ms = int(round(time.time() * 1000))
-            if less_than:
-                self.assertTrue((end_ms - start_ms) < threshold_ms,
-                                "expected less than " + str(threshold_ms) +
-                                "ms response time, got " + str(end_ms - start_ms) + " ms")
-            else:
-                self.assertTrue((end_ms - start_ms) > threshold_ms,
-                                "expected greater than " + str(threshold_ms) +
-                                "ms response time, got " + str(end_ms - start_ms) + " ms")
+
+            lt_ms = thresholds[0]
+            gt_ms = thresholds[1]
+            if lt_ms is not None:
+                self.assertTrue(
+                    (end_ms - start_ms) < lt_ms,
+                    "expected less than "
+                    + str(lt_ms)
+                    + "ms response time, got "
+                    + str(end_ms - start_ms)
+                    + " ms",
+                )
+            if gt_ms is not None:
+                self.assertTrue(
+                    (end_ms - start_ms) > gt_ms,
+                    "expected greater than "
+                    + str(gt_ms)
+                    + "ms response time, got "
+                    + str(end_ms - start_ms)
+                    + " ms",
+                )
         except Exception as ex:
-            _check_exception = ex
+            self.add_deferred_exception(ex)
 
-    def check_setup(self, url, protocol, model_name):
+    def check_setup(self, model_name, preferred_batch_sizes, max_queue_delay_us):
         # Make sure test.sh set up the correct batcher settings
-        ctx = ServerStatusContext(url, protocol, model_name, True)
-        ss = ctx.get_server_status()
-        self.assertEqual(len(ss.model_status), 1)
-        self.assertTrue(model_name in ss.model_status,
-                        "expected status for model " + model_name)
-        bconfig = ss.model_status[model_name].config.dynamic_batching
-        self.assertTrue(2 in bconfig.preferred_batch_size)
-        self.assertTrue(6 in bconfig.preferred_batch_size)
-        self.assertEqual(bconfig.max_queue_delay_microseconds, 10000000) # 10 secs
-
-    def check_status(self, url, protocol, model_name, static_bs, exec_cnt, infer_cnt):
-        ctx = ServerStatusContext(url, protocol, model_name, True)
-        ss = ctx.get_server_status()
-        self.assertEqual(len(ss.model_status), 1)
-        self.assertTrue(model_name in ss.model_status,
-                        "expected status for model " + model_name)
-        vs = ss.model_status[model_name].version_status
-        self.assertEqual(len(vs), 2) # *_float32_float32_float32 has 2 versions
-        self.assertTrue(3 in vs, "expected status for version 3")
-        infer = vs[3].infer_stats
-        self.assertEqual(len(infer), len(static_bs),
-                         "expected batch-sizes (" + ",".join(str(b) for b in static_bs) +
-                         "), got " + str(vs[3]))
-        for b in static_bs:
-            self.assertTrue(b in infer,
-                            "expected batch-size " + str(b) + ", got " + str(vs[3]))
-        self.assertEqual(vs[3].model_execution_count, exec_cnt,
-                        "expected model-execution-count " + str(exec_cnt) + ", got " +
-                        str(vs[3].model_execution_count))
-        self.assertEqual(vs[3].model_inference_count, infer_cnt,
-                        "expected model-inference-count " + str(infer_cnt) + ", got " +
-                        str(vs[3].model_inference_count))
+        config = self.triton_client_.get_model_config(model_name).config
+        bconfig = config.dynamic_batching
+        self.assertEqual(len(bconfig.preferred_batch_size), len(preferred_batch_sizes))
+        for i in preferred_batch_sizes:
+            self.assertTrue(i in bconfig.preferred_batch_size)
+        self.assertEqual(bconfig.max_queue_delay_microseconds, max_queue_delay_us)
+
+    def check_status(self, model_name, batch_exec, request_cnt, infer_cnt, exec_count):
+        # There is a time window between when responses are returned and statistics are updated.
+        # To prevent intermittent test failure during that window, wait up to 10 seconds for the
+        # inference statistics to be ready.
+        num_tries = 10
+        for i in range(num_tries):
+            stats = self.triton_client_.get_inference_statistics(model_name, "1")
+            self.assertEqual(len(stats.model_stats), 1, "expect 1 model stats")
+            actual_exec_cnt = stats.model_stats[0].execution_count
+            if actual_exec_cnt in exec_count:
+                break
+            print(
+                "WARNING: expect {} executions, got {} (attempt {})".format(
+                    exec_count, actual_exec_cnt, i
+                )
+            )
+            time.sleep(1)
+
+        self.assertEqual(
+            stats.model_stats[0].name,
+            model_name,
+            "expect model stats for model {}".format(model_name),
+        )
+        self.assertEqual(
+            stats.model_stats[0].version,
+            "1",
+            "expect model stats for model {} version 1".format(model_name),
+        )
+
+        if batch_exec:
+            batch_stats = stats.model_stats[0].batch_stats
+            self.assertEqual(
+                len(batch_stats),
+                len(batch_exec),
+                "expected {} different batch-sizes, got {}".format(
+                    len(batch_exec), len(batch_stats)
+                ),
+            )
+
+            for batch_stat in batch_stats:
+                bs = batch_stat.batch_size
+                bc = batch_stat.compute_infer.count
+                self.assertTrue(bs in batch_exec, "unexpected batch-size {}".format(bs))
+                # Get count from one of the stats
+                self.assertEqual(
+                    bc,
+                    batch_exec[bs],
+                    "expected model-execution-count {} for batch size {}, got {}".format(
+                        batch_exec[bs], bs, bc
+                    ),
+                )
+
+        actual_request_cnt = stats.model_stats[0].inference_stats.success.count
+        self.assertEqual(
+            actual_request_cnt,
+            request_cnt,
+            "expected model-request-count {}, got {}".format(
+                request_cnt, actual_request_cnt
+            ),
+        )
+
+        actual_exec_cnt = stats.model_stats[0].execution_count
+        self.assertIn(
+            actual_exec_cnt,
+            exec_count,
+            "expected model-exec-count {}, got {}".format(exec_count, actual_exec_cnt),
+        )
+
+        actual_infer_cnt = stats.model_stats[0].inference_count
+        self.assertEqual(
+            actual_infer_cnt,
+            infer_cnt,
+            "expected model-inference-count {}, got {}".format(
+                infer_cnt, actual_infer_cnt
+            ),
+        )
 
     def test_static_batch_preferred(self):
         # Send two requests with static batch sizes == preferred
         # size. This should cause the responses to be returned
         # immediately
+        precreated_shm_regions = self.create_advance()
         for trial in _trials:
             try:
-                url = "localhost:8000"
-                protocol = ProtocolType.HTTP
-                model_name = tu.get_model_name(trial, np.float32, np.float32, np.float32)
+                model_name = tu.get_model_name(
+                    trial, np.float32, np.float32, np.float32
+                )
 
-                self.check_setup(url, protocol, model_name)
-                self.assertFalse("TRTSERVER_DELAY_SCHEDULER" in os.environ)
+                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)
+                self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
 
-                self.check_response(trial, 2, True, 3000)
-                self.check_response(trial, 6, True, 3000)
+                self.check_response(
+                    trial,
+                    2,
+                    (3000, None),
+                    precreated_shm_regions=precreated_shm_regions,
+                )
+                self.check_response(
+                    trial,
+                    6,
+                    (3000, None),
+                    precreated_shm_regions=precreated_shm_regions,
+                )
                 self.check_deferred_exception()
-                self.check_status(url, protocol, model_name, (2,6), 2, 8)
-            except InferenceServerException as ex:
+                self.check_status(model_name, {2: 1, 6: 1}, 2, 8, (2,))
+            except Exception as ex:
                 self.assertTrue(False, "unexpected error {}".format(ex))
 
     def test_static_batch_lt_any_preferred(self):
         # Send a request with a static batch size < any preferred
         # size. This should cause the response to be delayed by the
         # max batch queue delay
+        precreated_shm_regions = self.create_advance()
         for trial in _trials:
             try:
-                url = "localhost:8000"
-                protocol = ProtocolType.HTTP
-                model_name = tu.get_model_name(trial, np.float32, np.float32, np.float32)
+                model_name = tu.get_model_name(
+                    trial, np.float32, np.float32, np.float32
+                )
 
-                self.check_setup(url, protocol, model_name)
-                self.assertFalse("TRTSERVER_DELAY_SCHEDULER" in os.environ)
+                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)
+                self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
 
-                self.check_response(trial, 1, False, _max_queue_delay)
+                self.check_response(
+                    trial,
+                    1,
+                    (_max_queue_delay_ms * 1.5, _max_queue_delay_ms),
+                    precreated_shm_regions=precreated_shm_regions,
+                )
                 self.check_deferred_exception()
-                self.check_status(url, protocol, model_name, (1,), 1, 1)
-            except InferenceServerException as ex:
+                self.check_status(model_name, {1: 1}, 1, 1, (1,))
+            except Exception as ex:
                 self.assertTrue(False, "unexpected error {}".format(ex))
 
     def test_static_batch_not_preferred(self):
         # Send a request with a static batch size in between preferred
         # sizes. This should cause the response to be delayed by the
         # max batch queue delay
+        precreated_shm_regions = self.create_advance()
         for trial in _trials:
             try:
-                url = "localhost:8000"
-                protocol = ProtocolType.HTTP
-                model_name = tu.get_model_name(trial, np.float32, np.float32, np.float32)
+                model_name = tu.get_model_name(
+                    trial, np.float32, np.float32, np.float32
+                )
 
-                self.check_setup(url, protocol, model_name)
-                self.assertFalse("TRTSERVER_DELAY_SCHEDULER" in os.environ)
+                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)
+                self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
 
-                self.check_response(trial, 3, False, _max_queue_delay)
+                self.check_response(
+                    trial,
+                    3,
+                    (_max_queue_delay_ms * 1.5, _max_queue_delay_ms),
+                    precreated_shm_regions=precreated_shm_regions,
+                )
                 self.check_deferred_exception()
-                self.check_status(url, protocol, model_name, (3,), 1, 3)
-            except InferenceServerException as ex:
+                self.check_status(model_name, {3: 1}, 1, 3, (1,))
+            except Exception as ex:
                 self.assertTrue(False, "unexpected error {}".format(ex))
 
     def test_static_batch_gt_max_preferred(self):
         # Send a request with a static batch size > maximum preferred
         # size. This should cause the request to be issued immediately
         # (even though the maximum batching queue delay is very high).
+        precreated_shm_regions = self.create_advance()
         for trial in _trials:
             try:
-                url = "localhost:8000"
-                protocol = ProtocolType.HTTP
-                model_name = tu.get_model_name(trial, np.float32, np.float32, np.float32)
+                model_name = tu.get_model_name(
+                    trial, np.float32, np.float32, np.float32
+                )
 
-                self.check_setup(url, protocol, model_name)
-                self.assertFalse("TRTSERVER_DELAY_SCHEDULER" in os.environ)
+                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)
+                self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
 
-                self.check_response(trial, 7, True, 3000)
+                self.check_response(
+                    trial,
+                    7,
+                    (3000, None),
+                    precreated_shm_regions=precreated_shm_regions,
+                )
                 self.check_deferred_exception()
-                self.check_status(url, protocol, model_name, (7,), 1, 7)
-            except InferenceServerException as ex:
+                self.check_status(model_name, {7: 1}, 1, 7, (1,))
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_multi_batch_different_shape_allow_ragged(self):
+        # Send two requests with static batch sizes == preferred size,
+        # but with different shapes (using model with variable-size
+        # tensors). Input tensors are marked as allowing ragged batch
+        # so requests should be batched.
+        for trial in _ragged_batch_supported_trials:
+            try:
+                dtype = np.float32
+                model_name = tu.get_zero_model_name(trial, 1, dtype)
+
+                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)
+                self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
+
+                threads = []
+                threads.append(
+                    threading.Thread(
+                        target=iu.infer_zero,
+                        args=(self, trial, 1, dtype, ([1, 16],), ([1, 16],)),
+                        kwargs={
+                            "use_grpc": USE_GRPC,
+                            "use_http": USE_HTTP,
+                            "use_http_json_tensors": False,
+                            "use_streaming": False,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=iu.infer_zero,
+                        args=(self, trial, 1, dtype, ([1, 8],), ([1, 8],)),
+                        kwargs={
+                            "use_grpc": USE_GRPC,
+                            "use_http": USE_HTTP,
+                            "use_http_json_tensors": False,
+                            "use_streaming": False,
+                        },
+                    )
+                )
+                threads[0].start()
+                threads[1].start()
+                for t in threads:
+                    t.join()
+                self.check_deferred_exception()
+                self.check_status(model_name, {2: 1}, 2, 2, (1,))
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_multi_batch_different_shape(self):
+        # Send two requests with sum of static batch sizes ==
+        # preferred size, but with different shapes (using model with
+        # variable-size tensors). This should cause the requests to
+        # not be batched. The first response will come back
+        # immediately and the second delayed by the max batch queue
+        # delay
+        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
+            shm0_region_names = ["ip00", "ip01", "op00", "op01"]
+            shm1_region_names = ["ip10", "ip11", "op10", "op11"]
+        else:
+            shm0_region_names = None
+            shm1_region_names = None
+        precreated_shm0_regions = self.create_advance(["op00", "op01"])
+        precreated_shm1_regions = self.create_advance(["op10", "op11"])
+        for trial in _trials:
+            try:
+                model_name = tu.get_model_name(
+                    trial, np.float32, np.float32, np.float32
+                )
+
+                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)
+                self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
+
+                threads = []
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (6000, None)),
+                        kwargs={
+                            "input_size": 16,
+                            "shm_region_names": shm0_region_names,
+                            "precreated_shm_regions": precreated_shm0_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(
+                            trial,
+                            1,
+                            (_max_queue_delay_ms * 1.5, _max_queue_delay_ms),
+                        ),
+                        kwargs={
+                            "input_size": 8,
+                            "shm_region_names": shm1_region_names,
+                            "precreated_shm_regions": precreated_shm1_regions,
+                        },
+                    )
+                )
+                threads[0].start()
+                time.sleep(1)
+                threads[1].start()
+                for t in threads:
+                    t.join()
+                self.check_deferred_exception()
+                self.check_status(model_name, {1: 2}, 2, 2, (2,))
+            except Exception as ex:
                 self.assertTrue(False, "unexpected error {}".format(ex))
 
     def test_multi_batch_not_preferred(self):
@@ -206,147 +516,442 @@ def test_multi_batch_not_preferred(self):
         # delayed by the max batch queue delay, and the second by max
         # delay (minus the difference in time that they arrived in the
         # queue)
+        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
+            shm0_region_names = ["ip00", "ip01", "op00", "op01"]
+            shm1_region_names = ["ip10", "ip11", "op10", "op11"]
+        else:
+            shm0_region_names = None
+            shm1_region_names = None
+        precreated_shm0_regions = self.create_advance(["op00", "op01"])
+        precreated_shm1_regions = self.create_advance(["op10", "op11"])
         for trial in _trials:
             try:
-                url = "localhost:8000"
-                protocol = ProtocolType.HTTP
-                model_name = tu.get_model_name(trial, np.float32, np.float32, np.float32)
+                model_name = tu.get_model_name(
+                    trial, np.float32, np.float32, np.float32
+                )
 
-                self.check_setup(url, protocol, model_name)
-                self.assertFalse("TRTSERVER_DELAY_SCHEDULER" in os.environ)
+                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)
+                self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
 
                 threads = []
-                threads.append(threading.Thread(target=self.check_response,
-                                                args=(trial, 1, False, _max_queue_delay)))
-                threads.append(threading.Thread(target=self.check_response,
-                                                args=(trial, 3, False, _max_queue_delay - 2000)))
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(
+                            trial,
+                            1,
+                            (_max_queue_delay_ms * 1.5, _max_queue_delay_ms),
+                        ),
+                        kwargs={
+                            "shm_region_names": shm0_region_names,
+                            "precreated_shm_regions": precreated_shm0_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(
+                            trial,
+                            3,
+                            (_max_queue_delay_ms * 1.5, _max_queue_delay_ms - 2000),
+                        ),
+                        kwargs={
+                            "shm_region_names": shm1_region_names,
+                            "precreated_shm_regions": precreated_shm1_regions,
+                        },
+                    )
+                )
                 threads[0].start()
                 time.sleep(1)
                 threads[1].start()
                 for t in threads:
                     t.join()
                 self.check_deferred_exception()
-                self.check_status(url, protocol, model_name, (1,3), 1, 4)
-            except InferenceServerException as ex:
+                self.check_status(model_name, {4: 1}, 2, 4, (1,))
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_multi_batch_not_preferred_different_shape(self):
+        # Send two requests with total static batch size in between
+        # preferred sizes. Then send a request with a different shape
+        # and a non-preferred batch size. This should cause the first
+        # two requests to be immediately responded to and the third
+        # response to be delayed by the max batch queue delay.
+        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
+            shm0_region_names = ["ip00", "ip01", "op00", "op01"]
+            shm1_region_names = ["ip10", "ip11", "op10", "op11"]
+            shm2_region_names = ["ip20", "ip21", "op20", "op21"]
+        else:
+            shm0_region_names = None
+            shm1_region_names = None
+            shm2_region_names = None
+        precreated_shm0_regions = self.create_advance(["op00", "op01"])
+        precreated_shm1_regions = self.create_advance(["op10", "op11"])
+        precreated_shm2_regions = self.create_advance(["op20", "op21"])
+        for trial in _trials:
+            try:
+                model_name = tu.get_model_name(
+                    trial, np.float32, np.float32, np.float32
+                )
+
+                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)
+                self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
+
+                threads = []
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (6000, None)),
+                        kwargs={
+                            "shm_region_names": shm0_region_names,
+                            "precreated_shm_regions": precreated_shm0_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 3, (6000, None)),
+                        kwargs={
+                            "shm_region_names": shm1_region_names,
+                            "precreated_shm_regions": precreated_shm1_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(
+                            trial,
+                            1,
+                            (_max_queue_delay_ms * 1.5, _max_queue_delay_ms),
+                        ),
+                        kwargs={
+                            "input_size": 8,
+                            "shm_region_names": shm2_region_names,
+                            "precreated_shm_regions": precreated_shm2_regions,
+                        },
+                    )
+                )
+                threads[0].start()
+                threads[1].start()
+                time.sleep(1)
+                threads[2].start()
+                for t in threads:
+                    t.join()
+                self.check_deferred_exception()
+                self.check_status(model_name, {1: 1, 4: 1}, 3, 5, (2,))
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_multi_batch_preferred_different_shape(self):
+        # Send two requests with total static batch size in between
+        # preferred sizes. Then send a request with a different shape
+        # and a non-preferred batch size. This should cause the first
+        # two requests to be immediately responded to. Send a forth
+        # request with the same shape as the third that causes a
+        # preferred size so that third and forth response are sent
+        # immediately.
+        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
+            shm0_region_names = ["ip00", "ip01", "op00", "op01"]
+            shm1_region_names = ["ip10", "ip11", "op10", "op11"]
+            shm2_region_names = ["ip20", "ip21", "op20", "op21"]
+            shm3_region_names = ["ip30", "ip31", "op30", "op31"]
+        else:
+            shm0_region_names = None
+            shm1_region_names = None
+            shm2_region_names = None
+            shm3_region_names = None
+        precreated_shm0_regions = self.create_advance(["op00", "op01"])
+        precreated_shm1_regions = self.create_advance(["op10", "op11"])
+        precreated_shm2_regions = self.create_advance(["op20", "op21"])
+        precreated_shm3_regions = self.create_advance(["op30", "op31"])
+        for trial in _trials:
+            try:
+                model_name = tu.get_model_name(
+                    trial, np.float32, np.float32, np.float32
+                )
+
+                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)
+                self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
+
+                threads = []
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (6000, None)),
+                        kwargs={
+                            "shm_region_names": shm0_region_names,
+                            "precreated_shm_regions": precreated_shm0_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 3, (6000, None)),
+                        kwargs={
+                            "shm_region_names": shm1_region_names,
+                            "precreated_shm_regions": precreated_shm1_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (6000, None)),
+                        kwargs={
+                            "input_size": 8,
+                            "shm_region_names": shm2_region_names,
+                            "precreated_shm_regions": precreated_shm2_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 5, (6000, None)),
+                        kwargs={
+                            "input_size": 8,
+                            "shm_region_names": shm3_region_names,
+                            "precreated_shm_regions": precreated_shm3_regions,
+                        },
+                    )
+                )
+                threads[0].start()
+                threads[1].start()
+                time.sleep(1)
+                threads[2].start()
+                threads[3].start()
+                for t in threads:
+                    t.join()
+                self.check_deferred_exception()
+                self.check_status(model_name, {4: 1, 6: 1}, 4, 10, (2,))
+            except Exception as ex:
                 self.assertTrue(False, "unexpected error {}".format(ex))
 
     def test_multi_batch_gt_max_preferred(self):
         # Send two requests with first not having preferred size and
-        # second being larger than max preferred size. This should cause
-        # both responses to be returned immediately.
+        # second being larger than max preferred size. Delay the
+        # second request so that it arrives after the first is already
+        # be processed by the dynamic batcher. This should cause both
+        # responses to be returned immediately.
+        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
+            shm0_region_names = ["ip00", "ip01", "op00", "op01"]
+            shm1_region_names = ["ip10", "ip11", "op10", "op11"]
+        else:
+            shm0_region_names = None
+            shm1_region_names = None
+        precreated_shm0_regions = self.create_advance(["op00", "op01"])
+        precreated_shm1_regions = self.create_advance(["op10", "op11"])
         for trial in _trials:
             try:
-                url = "localhost:8000"
-                protocol = ProtocolType.HTTP
-                model_name = tu.get_model_name(trial, np.float32, np.float32, np.float32)
+                model_name = tu.get_model_name(
+                    trial, np.float32, np.float32, np.float32
+                )
 
-                self.check_setup(url, protocol, model_name)
-                self.assertFalse("TRTSERVER_DELAY_SCHEDULER" in os.environ)
+                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)
+                self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
 
                 threads = []
-                threads.append(threading.Thread(target=self.check_response,
-                                                args=(trial, 3, True, 3000)))
-                threads.append(threading.Thread(target=self.check_response,
-                                                args=(trial, 7, True, 3000)))
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 3, (3000, None)),
+                        kwargs={
+                            "shm_region_names": shm0_region_names,
+                            "precreated_shm_regions": precreated_shm0_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 7, (3000, None)),
+                        kwargs={
+                            "shm_region_names": shm1_region_names,
+                            "precreated_shm_regions": precreated_shm1_regions,
+                        },
+                    )
+                )
                 threads[0].start()
                 time.sleep(1)
                 threads[1].start()
                 for t in threads:
                     t.join()
                 self.check_deferred_exception()
-                self.check_status(url, protocol, model_name, (3, 7), 2, 10)
-            except InferenceServerException as ex:
+                self.check_status(model_name, {3: 1, 7: 1}, 2, 10, (2,))
+            except Exception as ex:
                 self.assertTrue(False, "unexpected error {}".format(ex))
 
     def test_multi_batch_sum_gt_max_preferred(self):
         # Send two requests with first not having preferred size and
         # second being smaller than max preferred size but the sum of
-        # the requests being larger than max preferred size. This
-        # should cause first response to be returned immediately but
-        # the second response, since it alone is not greater than max
-        # preferred size, will be delayed.
+        # the requests being larger than max preferred size. Delay the
+        # second request so that it arrives after the first is already
+        # be processed by the dynamic batcher. This should cause first
+        # response to be returned immediately but the second response,
+        # since it alone is not greater than max preferred size, will
+        # be delayed.
+        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
+            shm0_region_names = ["ip00", "ip01", "op00", "op01"]
+            shm1_region_names = ["ip10", "ip11", "op10", "op11"]
+        else:
+            shm0_region_names = None
+            shm1_region_names = None
+        precreated_shm0_regions = self.create_advance(["op00", "op01"])
+        precreated_shm1_regions = self.create_advance(["op10", "op11"])
         for trial in _trials:
             try:
-                url = "localhost:8000"
-                protocol = ProtocolType.HTTP
-                model_name = tu.get_model_name(trial, np.float32, np.float32, np.float32)
+                model_name = tu.get_model_name(
+                    trial, np.float32, np.float32, np.float32
+                )
 
-                self.check_setup(url, protocol, model_name)
-                self.assertFalse("TRTSERVER_DELAY_SCHEDULER" in os.environ)
+                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)
+                self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
 
                 threads = []
-                threads.append(threading.Thread(target=self.check_response,
-                                                args=(trial, 3, True, 3000)))
-                threads.append(threading.Thread(target=self.check_response,
-                                                args=(trial, 4, False, _max_queue_delay)))
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 3, (3000, None)),
+                        kwargs={
+                            "shm_region_names": shm0_region_names,
+                            "precreated_shm_regions": precreated_shm0_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(
+                            trial,
+                            4,
+                            (_max_queue_delay_ms * 1.5, _max_queue_delay_ms),
+                        ),
+                        kwargs={
+                            "shm_region_names": shm1_region_names,
+                            "precreated_shm_regions": precreated_shm1_regions,
+                        },
+                    )
+                )
                 threads[0].start()
                 time.sleep(1)
                 threads[1].start()
                 for t in threads:
                     t.join()
                 self.check_deferred_exception()
-                self.check_status(url, protocol, model_name, (3,4), 2, 7)
-            except InferenceServerException as ex:
+                self.check_status(model_name, {3: 1, 4: 1}, 2, 7, (2,))
+            except Exception as ex:
                 self.assertTrue(False, "unexpected error {}".format(ex))
 
     def test_multi_same_output0(self):
         # Send two requests where both ask for OUTPUT0. They should be
         # batched and get the correct response even though they don't
         # request both outputs.
+        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
+            shm0_region_names = ["ip00", "ip01", "op00"]
+            shm1_region_names = ["ip10", "ip11", "op10"]
+        else:
+            shm0_region_names = None
+            shm1_region_names = None
+        precreated_shm0_regions = self.create_advance(["op00"])
+        precreated_shm1_regions = self.create_advance(["op10"])
         for trial in _trials:
             try:
-                url = "localhost:8000"
-                protocol = ProtocolType.HTTP
-                model_name = tu.get_model_name(trial, np.float32, np.float32, np.float32)
+                model_name = tu.get_model_name(
+                    trial, np.float32, np.float32, np.float32
+                )
 
-                self.check_setup(url, protocol, model_name)
+                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)
 
-                self.assertFalse("TRTSERVER_DELAY_SCHEDULER" in os.environ)
+                self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
 
                 threads = []
-                threads.append(threading.Thread(target=self.check_response,
-                                                args=(trial, 1, True, 3000),
-                                                kwargs={'requested_outputs': ("OUTPUT0",)}))
-                threads.append(threading.Thread(target=self.check_response,
-                                                args=(trial, 1, True, 3000),
-                                                kwargs={'requested_outputs': ("OUTPUT0",)}))
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (3000, None)),
+                        kwargs={
+                            "requested_outputs": ("OUTPUT0",),
+                            "shm_region_names": shm0_region_names,
+                            "precreated_shm_regions": precreated_shm0_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (3000, None)),
+                        kwargs={
+                            "requested_outputs": ("OUTPUT0",),
+                            "shm_region_names": shm1_region_names,
+                            "precreated_shm_regions": precreated_shm1_regions,
+                        },
+                    )
+                )
                 threads[0].start()
                 threads[1].start()
                 for t in threads:
                     t.join()
                 self.check_deferred_exception()
-                self.check_status(url, protocol, model_name, (1,), 1, 2)
-            except InferenceServerException as ex:
+                self.check_status(model_name, {2: 1}, 2, 2, (1,))
+            except Exception as ex:
                 self.assertTrue(False, "unexpected error {}".format(ex))
 
     def test_multi_same_output1(self):
         # Send two requests where both ask for OUTPUT1. They should be
         # batched and get the correct response even though they don't
         # request both outputs.
+        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
+            shm0_region_names = ["ip00", "ip01", "op01"]
+            shm1_region_names = ["ip10", "ip11", "op11"]
+        else:
+            shm0_region_names = None
+            shm1_region_names = None
+        precreated_shm0_regions = self.create_advance(["op01"])
+        precreated_shm1_regions = self.create_advance(["op11"])
         for trial in _trials:
             try:
-                url = "localhost:8000"
-                protocol = ProtocolType.HTTP
-                model_name = tu.get_model_name(trial, np.float32, np.float32, np.float32)
+                model_name = tu.get_model_name(
+                    trial, np.float32, np.float32, np.float32
+                )
 
-                self.check_setup(url, protocol, model_name)
+                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)
 
-                self.assertFalse("TRTSERVER_DELAY_SCHEDULER" in os.environ)
+                self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
 
                 threads = []
-                threads.append(threading.Thread(target=self.check_response,
-                                                args=(trial, 1, True, 3000),
-                                                kwargs={'requested_outputs': ("OUTPUT1",)}))
-                threads.append(threading.Thread(target=self.check_response,
-                                                args=(trial, 1, True, 3000),
-                                                kwargs={'requested_outputs': ("OUTPUT1",)}))
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (3000, None)),
+                        kwargs={
+                            "requested_outputs": ("OUTPUT1",),
+                            "shm_region_names": shm0_region_names,
+                            "precreated_shm_regions": precreated_shm0_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (3000, None)),
+                        kwargs={
+                            "requested_outputs": ("OUTPUT1",),
+                            "shm_region_names": shm1_region_names,
+                            "precreated_shm_regions": precreated_shm1_regions,
+                        },
+                    )
+                )
                 threads[0].start()
                 threads[1].start()
                 for t in threads:
                     t.join()
                 self.check_deferred_exception()
-                self.check_status(url, protocol, model_name, (1,), 1, 2)
-            except InferenceServerException as ex:
+                self.check_status(model_name, {2: 1}, 2, 2, (1,))
+            except Exception as ex:
                 self.assertTrue(False, "unexpected error {}".format(ex))
 
     def test_multi_different_outputs(self):
@@ -354,70 +959,451 @@ def test_multi_different_outputs(self):
         # the other request asks for the other output. They should be
         # batched and get the correct response even though they don't
         # request both outputs.
+        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
+            shm0_region_names = ["ip00", "ip01", "op00"]
+            shm1_region_names = ["ip10", "ip11", "op11"]
+        else:
+            shm0_region_names = None
+            shm1_region_names = None
+        precreated_shm0_regions = self.create_advance(["op00"])
+        precreated_shm1_regions = self.create_advance(["op11"])
+        for trial in _trials:
+            try:
+                model_name = tu.get_model_name(
+                    trial, np.float32, np.float32, np.float32
+                )
+
+                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)
+
+                self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
+
+                threads = []
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (6000, None)),
+                        kwargs={
+                            "requested_outputs": ("OUTPUT0",),
+                            "shm_region_names": shm0_region_names,
+                            "precreated_shm_regions": precreated_shm0_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (6000, None)),
+                        kwargs={
+                            "requested_outputs": ("OUTPUT1",),
+                            "shm_region_names": shm1_region_names,
+                            "precreated_shm_regions": precreated_shm1_regions,
+                        },
+                    )
+                )
+                threads[0].start()
+                threads[1].start()
+                for t in threads:
+                    t.join()
+                self.check_deferred_exception()
+                self.check_status(model_name, {2: 1}, 2, 2, (1,))
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_multi_different_output_order(self):
+        # Send two requests that ask for both outputs, but in a
+        # different order. They should be batched and get the correct
+        # response even though they use different order.
+        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
+            shm0_region_names = ["ip00", "ip01", "op00", "op01"]
+            shm1_region_names = ["ip10", "ip11", "op11", "op10"]
+        else:
+            shm0_region_names = None
+            shm1_region_names = None
+        for trial in _trials:
+            try:
+                model_name = tu.get_model_name(
+                    trial, np.float32, np.float32, np.float32
+                )
+
+                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)
+
+                self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
+
+                threads = []
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (6000, None)),
+                        kwargs={
+                            "requested_outputs": ("OUTPUT0", "OUTPUT1"),
+                            "shm_region_names": shm0_region_names,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (6000, None)),
+                        kwargs={
+                            "requested_outputs": ("OUTPUT1", "OUTPUT0"),
+                            "shm_region_names": shm1_region_names,
+                        },
+                    )
+                )
+                threads[0].start()
+                threads[1].start()
+                for t in threads:
+                    t.join()
+                self.check_deferred_exception()
+                self.check_status(model_name, {2: 1}, 2, 2, (1,))
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_multi_batch_delayed_sum_gt_max_preferred(self):
+        # Send two requests with first not having preferred size and
+        # second being smaller than max preferred size but the sum of
+        # the requests being larger than max preferred size. Use
+        # TRITONSERVER_DELAY_SCHEDULER in the environment so that
+        # requests can be queued up before scheduler starts
+        # servicing. This should cause first response to be returned
+        # immediately but the second response, since it alone is not
+        # greater than max preferred size, will be delayed.
+        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
+            shm0_region_names = ["ip00", "ip01", "op00", "op01"]
+            shm1_region_names = ["ip10", "ip11", "op10", "op11"]
+        else:
+            shm0_region_names = None
+            shm1_region_names = None
+        precreated_shm0_regions = self.create_advance(["op00", "op01"])
+        precreated_shm1_regions = self.create_advance(["op10", "op11"])
+        for trial in _trials:
+            try:
+                model_name = tu.get_model_name(
+                    trial, np.float32, np.float32, np.float32
+                )
+
+                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)
+
+                # Need scheduler to wait for queue to contain 2 requests
+                self.assertTrue("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
+                self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 2)
+
+                threads = []
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 3, (6000, None)),
+                        kwargs={
+                            "shm_region_names": shm0_region_names,
+                            "precreated_shm_regions": precreated_shm0_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(
+                            trial,
+                            4,
+                            (_max_queue_delay_ms * 1.5, _max_queue_delay_ms),
+                        ),
+                        kwargs={
+                            "shm_region_names": shm1_region_names,
+                            "precreated_shm_regions": precreated_shm1_regions,
+                        },
+                    )
+                )
+                threads[0].start()
+                time.sleep(1)
+                threads[1].start()
+                for t in threads:
+                    t.join()
+                self.check_deferred_exception()
+                self.check_status(model_name, {3: 1, 4: 1}, 2, 7, (2,))
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_multi_batch_delayed_use_max_batch(self):
+        # Send three requests with first not having preferred size,
+        # second being smaller than max preferred size but the sum of
+        # the requests being larger than max preferred size and third
+        # is sent after the first two requests exceeds the queue delay
+        # and the sum of the requests to be in full batch. Use
+        # TRITONSERVER_DELAY_SCHEDULER in the environment so that
+        # requests can be queued up before scheduler starts
+        # servicing. This should cause all response to be returned together,
+        # while it appears that the first two responses to be returned
+        # after being delayed and the third response to be returned immediately.
+        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
+            shm0_region_names = ["ip00", "ip01", "op00", "op01"]
+            shm1_region_names = ["ip10", "ip11", "op10", "op11"]
+            shm2_region_names = ["ip20", "ip21", "op20", "op21"]
+        else:
+            shm0_region_names = None
+            shm1_region_names = None
+            shm2_region_names = None
+        precreated_shm0_regions = self.create_advance(["op00", "op01"])
+        precreated_shm1_regions = self.create_advance(["op10", "op11"])
+        precreated_shm2_regions = self.create_advance(["op20", "op21"])
+        for trial in _trials:
+            try:
+                model_name = tu.get_model_name(
+                    trial, np.float32, np.float32, np.float32
+                )
+
+                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)
+
+                # Need scheduler to wait for queue to contain 3 requests
+                self.assertTrue("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
+                self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 3)
+
+                threads = []
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(
+                            trial,
+                            3,
+                            (_max_queue_delay_ms * 1.5, _max_queue_delay_ms),
+                        ),
+                        kwargs={
+                            "shm_region_names": shm0_region_names,
+                            "precreated_shm_regions": precreated_shm0_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(
+                            trial,
+                            4,
+                            (_max_queue_delay_ms * 1.5, _max_queue_delay_ms),
+                        ),
+                        kwargs={
+                            "shm_region_names": shm1_region_names,
+                            "precreated_shm_regions": precreated_shm1_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (6000, None)),
+                        kwargs={
+                            "shm_region_names": shm2_region_names,
+                            "precreated_shm_regions": precreated_shm2_regions,
+                        },
+                    )
+                )
+                threads[0].start()
+                threads[1].start()
+                time.sleep(11)
+                threads[2].start()
+                for t in threads:
+                    t.join()
+                self.check_deferred_exception()
+                self.check_status(model_name, {8: 1}, 3, 8, (1,))
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_multi_batch_delayed_preferred_different_shape(self):
+        # Send two requests with total static batch size in between
+        # preferred sizes. Then send a request with a different shape
+        # and a non-preferred batch size. Use
+        # TRITONSERVER_DELAY_SCHEDULER in the environment so that
+        # requests can be queued up before scheduler starts
+        # servicing. This should cause the first two requests to be
+        # immediately responded to. Send a forth request with the same
+        # shape as the third that causes a preferred size so that
+        # third and forth response are sent immediately.
+        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
+            shm0_region_names = ["ip00", "ip01", "op00", "op01"]
+            shm1_region_names = ["ip10", "ip11", "op10", "op11"]
+            shm2_region_names = ["ip20", "ip21", "op20", "op21"]
+            shm3_region_names = ["ip30", "ip31", "op30", "op31"]
+        else:
+            shm0_region_names = None
+            shm1_region_names = None
+            shm2_region_names = None
+            shm3_region_names = None
+        precreated_shm0_regions = self.create_advance(["op00", "op01"])
+        precreated_shm1_regions = self.create_advance(["op10", "op11"])
+        precreated_shm2_regions = self.create_advance(["op20", "op21"])
+        precreated_shm3_regions = self.create_advance(["op30", "op31"])
         for trial in _trials:
             try:
-                url = "localhost:8000"
-                protocol = ProtocolType.HTTP
-                model_name = tu.get_model_name(trial, np.float32, np.float32, np.float32)
+                model_name = tu.get_model_name(
+                    trial, np.float32, np.float32, np.float32
+                )
 
-                self.check_setup(url, protocol, model_name)
+                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)
 
-                self.assertFalse("TRTSERVER_DELAY_SCHEDULER" in os.environ)
+                # Need scheduler to wait for queue to contain 4 requests
+                self.assertTrue("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
+                self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 4)
 
                 threads = []
-                threads.append(threading.Thread(target=self.check_response,
-                                                args=(trial, 1, True, 3000),
-                                                kwargs={'requested_outputs': ("OUTPUT0",)}))
-                threads.append(threading.Thread(target=self.check_response,
-                                                args=(trial, 1, True, 3000),
-                                                kwargs={'requested_outputs': ("OUTPUT1",)}))
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (3000, None)),
+                        kwargs={
+                            "shm_region_names": shm0_region_names,
+                            "precreated_shm_regions": precreated_shm0_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 3, (3000, None)),
+                        kwargs={
+                            "shm_region_names": shm1_region_names,
+                            "precreated_shm_regions": precreated_shm1_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (3000, None)),
+                        kwargs={
+                            "input_size": 8,
+                            "shm_region_names": shm2_region_names,
+                            "precreated_shm_regions": precreated_shm2_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 5, (3000, None)),
+                        kwargs={
+                            "input_size": 8,
+                            "shm_region_names": shm3_region_names,
+                            "precreated_shm_regions": precreated_shm3_regions,
+                        },
+                    )
+                )
                 threads[0].start()
                 threads[1].start()
+                time.sleep(1)
+                threads[2].start()
+                threads[3].start()
                 for t in threads:
                     t.join()
                 self.check_deferred_exception()
-                self.check_status(url, protocol, model_name, (1,), 1, 2)
-            except InferenceServerException as ex:
+                self.check_status(model_name, {4: 1, 6: 1}, 4, 10, (2,))
+            except Exception as ex:
                 self.assertTrue(False, "unexpected error {}".format(ex))
 
     def test_multi_batch_use_biggest_preferred(self):
         # Send multiple requests that sum to multiple preferred sizes
-        # and make sure the largest preferred size if used for the
-        # batch. Requires TRTSERVER_DELAY_SCHEDULER in the environment
-        # so that requests can be queued up before scheduler starts
+        # and make sure the largest preferred size is used for the
+        # batch. Use TRITONSERVER_DELAY_SCHEDULER in the environment so
+        # that requests can be queued up before scheduler starts
         # servicing.
+        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
+            shm0_region_names = ["ip00", "ip01", "op00", "op01"]
+            shm1_region_names = ["ip10", "ip11", "op10", "op11"]
+            shm2_region_names = ["ip20", "ip21", "op20", "op21"]
+            shm3_region_names = ["ip30", "ip31", "op30", "op31"]
+            shm4_region_names = ["ip40", "ip41", "op40", "op41"]
+            shm5_region_names = ["ip50", "ip51", "op50", "op51"]
+        else:
+            shm0_region_names = None
+            shm1_region_names = None
+            shm2_region_names = None
+            shm3_region_names = None
+            shm4_region_names = None
+            shm5_region_names = None
+        precreated_shm0_regions = self.create_advance(["op00", "op01"])
+        precreated_shm1_regions = self.create_advance(["op10", "op11"])
+        precreated_shm2_regions = self.create_advance(["op20", "op21"])
+        precreated_shm3_regions = self.create_advance(["op30", "op31"])
+        precreated_shm4_regions = self.create_advance(["op40", "op41"])
+        precreated_shm5_regions = self.create_advance(["op50", "op51"])
         for trial in _trials:
             try:
-                url = "localhost:8000"
-                protocol = ProtocolType.HTTP
-                model_name = tu.get_model_name(trial, np.float32, np.float32, np.float32)
+                model_name = tu.get_model_name(
+                    trial, np.float32, np.float32, np.float32
+                )
 
-                self.check_setup(url, protocol, model_name)
+                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)
 
                 # Need scheduler to wait for queue to contain 6 request
-                self.assertTrue("TRTSERVER_DELAY_SCHEDULER" in os.environ)
-                self.assertEqual(int(os.environ["TRTSERVER_DELAY_SCHEDULER"]), 6)
+                self.assertTrue("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
+                self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 6)
 
                 threads = []
-                threads.append(threading.Thread(target=self.check_response,
-                                                args=(trial, 1, True, 3000)))
-                threads.append(threading.Thread(target=self.check_response,
-                                                args=(trial, 1, True, 3000)))
-                threads.append(threading.Thread(target=self.check_response,
-                                                args=(trial, 1, True, 3000)))
-                threads.append(threading.Thread(target=self.check_response,
-                                                args=(trial, 1, True, 3000)))
-                threads.append(threading.Thread(target=self.check_response,
-                                                args=(trial, 1, True, 3000)))
-                threads.append(threading.Thread(target=self.check_response,
-                                                args=(trial, 1, True, 3000)))
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (6000, None)),
+                        kwargs={
+                            "shm_region_names": shm0_region_names,
+                            "precreated_shm_regions": precreated_shm0_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (6000, None)),
+                        kwargs={
+                            "shm_region_names": shm1_region_names,
+                            "precreated_shm_regions": precreated_shm1_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (6000, None)),
+                        kwargs={
+                            "shm_region_names": shm2_region_names,
+                            "precreated_shm_regions": precreated_shm2_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (6000, None)),
+                        kwargs={
+                            "shm_region_names": shm3_region_names,
+                            "precreated_shm_regions": precreated_shm3_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (6000, None)),
+                        kwargs={
+                            "shm_region_names": shm4_region_names,
+                            "precreated_shm_regions": precreated_shm4_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (6000, None)),
+                        kwargs={
+                            "shm_region_names": shm5_region_names,
+                            "precreated_shm_regions": precreated_shm5_regions,
+                        },
+                    )
+                )
                 for t in threads:
                     t.start()
                 for t in threads:
                     t.join()
                 self.check_deferred_exception()
-                self.check_status(url, protocol, model_name, (1,), 1, 6)
-            except InferenceServerException as ex:
+                self.check_status(model_name, {6: 1}, 6, 6, (1,))
+            except Exception as ex:
                 self.assertTrue(False, "unexpected error {}".format(ex))
 
     def test_multi_batch_use_best_preferred(self):
@@ -425,28 +1411,67 @@ def test_multi_batch_use_best_preferred(self):
         # preferred size and then extra request goes beyond that. The
         # initial requests should be handled immediately at the
         # preferred batch size and then the other one after
-        # timeout. Requires TRTSERVER_DELAY_SCHEDULER in the
-        # environment so that requests can be queued up before
-        # scheduler starts servicing.
+        # timeout. Use TRITONSERVER_DELAY_SCHEDULER in the environment so
+        # that requests can be queued up before scheduler starts
+        # servicing.
+        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
+            shm0_region_names = ["ip00", "ip01", "op00", "op01"]
+            shm1_region_names = ["ip10", "ip11", "op10", "op11"]
+            shm2_region_names = ["ip20", "ip21", "op20", "op21"]
+        else:
+            shm0_region_names = None
+            shm1_region_names = None
+            shm2_region_names = None
+        precreated_shm0_regions = self.create_advance(["op00", "op01"])
+        precreated_shm1_regions = self.create_advance(["op10", "op11"])
+        precreated_shm2_regions = self.create_advance(["op20", "op21"])
         for trial in _trials:
             try:
-                url = "localhost:8000"
-                protocol = ProtocolType.HTTP
-                model_name = tu.get_model_name(trial, np.float32, np.float32, np.float32)
+                model_name = tu.get_model_name(
+                    trial, np.float32, np.float32, np.float32
+                )
 
-                self.check_setup(url, protocol, model_name)
+                self.check_setup(model_name, [2, 6], _max_queue_delay_ms * 1000)
 
                 # Need scheduler to wait for queue to contain 3 requests
-                self.assertTrue("TRTSERVER_DELAY_SCHEDULER" in os.environ)
-                self.assertEqual(int(os.environ["TRTSERVER_DELAY_SCHEDULER"]), 3)
+                self.assertTrue("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
+                self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 3)
 
                 threads = []
-                threads.append(threading.Thread(target=self.check_response,
-                                                args=(trial, 1, True, 3000)))
-                threads.append(threading.Thread(target=self.check_response,
-                                                args=(trial, 1, True, 3000)))
-                threads.append(threading.Thread(target=self.check_response,
-                                                args=(trial, 1, False, _max_queue_delay)))
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (6000, None)),
+                        kwargs={
+                            "shm_region_names": shm0_region_names,
+                            "precreated_shm_regions": precreated_shm0_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (6000, None)),
+                        kwargs={
+                            "shm_region_names": shm1_region_names,
+                            "precreated_shm_regions": precreated_shm1_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(
+                            trial,
+                            1,
+                            (_max_queue_delay_ms * 1.5, _max_queue_delay_ms),
+                        ),
+                        kwargs={
+                            "shm_region_names": shm2_region_names,
+                            "precreated_shm_regions": precreated_shm2_regions,
+                        },
+                    )
+                )
                 threads[0].start()
                 threads[1].start()
                 time.sleep(1)
@@ -454,9 +1479,512 @@ def test_multi_batch_use_best_preferred(self):
                 for t in threads:
                     t.join()
                 self.check_deferred_exception()
-                self.check_status(url, protocol, model_name, (1,), 2, 3)
-            except InferenceServerException as ex:
+                self.check_status(model_name, {2: 1, 1: 1}, 3, 3, (2,))
+            except Exception as ex:
                 self.assertTrue(False, "unexpected error {}".format(ex))
 
-if __name__ == '__main__':
+    def test_multi_batch_preserve_ordering(self):
+        model_base = "custom"
+        dtype = np.float32
+        shapes = (
+            [
+                1,
+                1,
+            ],
+        )
+
+        try:
+            # use threads to send 12 requests without waiting for response
+            threads = []
+            for i in range(12):
+                if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
+                    shm_region_name_prefix = ["input" + str(i), "output" + str(i)]
+                else:
+                    shm_region_name_prefix = None
+                threads.append(
+                    threading.Thread(
+                        target=iu.infer_zero,
+                        args=(self, model_base, 1, dtype, shapes, shapes),
+                        kwargs={
+                            "use_grpc": USE_GRPC,
+                            "use_http": USE_HTTP,
+                            "use_http_json_tensors": False,
+                            "use_streaming": False,
+                            "shm_region_name_prefix": shm_region_name_prefix,
+                            "use_system_shared_memory": TEST_SYSTEM_SHARED_MEMORY,
+                            "use_cuda_shared_memory": TEST_CUDA_SHARED_MEMORY,
+                        },
+                    )
+                )
+            for t in threads:
+                t.start()
+            for t in threads:
+                t.join()
+            self.check_deferred_exception()
+            model_name = tu.get_zero_model_name(model_base, len(shapes), dtype)
+            self.check_status(model_name, {4: 3}, 12, 12, (3,))
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_preferred_batch_only_aligned(self):
+        # Send 4 requests with batch size 1. Use
+        # TRITONSERVER_DELAY_SCHEDULER in the environment so that
+        # requests can be queued up before scheduler starts
+        # servicing. The batcher should form a batch of preferred
+        # size 4.
+        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
+            shm0_region_names = ["ip00", "ip01", "op00", "op01"]
+            shm1_region_names = ["ip10", "ip11", "op10", "op11"]
+            shm2_region_names = ["ip20", "ip21", "op20", "op21"]
+            shm3_region_names = ["ip30", "ip31", "op30", "op31"]
+        else:
+            shm0_region_names = None
+            shm1_region_names = None
+            shm2_region_names = None
+            shm3_region_names = None
+        precreated_shm0_regions = self.create_advance(["op00", "op01"])
+        precreated_shm1_regions = self.create_advance(["op10", "op11"])
+        precreated_shm2_regions = self.create_advance(["op20", "op21"])
+        precreated_shm3_regions = self.create_advance(["op30", "op31"])
+        for trial in _trials:
+            try:
+                model_name = tu.get_model_name(
+                    trial, np.float32, np.float32, np.float32
+                )
+
+                self.check_setup(model_name, [4, 6], 0)
+
+                # Need scheduler to wait for queue to contain 4 requests
+                self.assertTrue("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
+                self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 4)
+
+                threads = []
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (6000, None)),
+                        kwargs={
+                            "shm_region_names": shm0_region_names,
+                            "precreated_shm_regions": precreated_shm0_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (6000, None)),
+                        kwargs={
+                            "shm_region_names": shm1_region_names,
+                            "precreated_shm_regions": precreated_shm1_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (6000, None)),
+                        kwargs={
+                            "shm_region_names": shm2_region_names,
+                            "precreated_shm_regions": precreated_shm2_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (6000, None)),
+                        kwargs={
+                            "shm_region_names": shm3_region_names,
+                            "precreated_shm_regions": precreated_shm3_regions,
+                        },
+                    )
+                )
+                for t in threads:
+                    t.start()
+                for t in threads:
+                    t.join()
+                self.check_deferred_exception()
+                self.check_status(model_name, {4: 1}, 4, 4, (1,))
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_preferred_batch_only_unaligned(self):
+        # Send 5 requests with batch size 1. Use
+        # TRITONSERVER_DELAY_SCHEDULER in the environment so that
+        # requests can be queued up before scheduler starts
+        # servicing. The batcher should form a batch of preferred
+        # size 4 followed by a batch of size 1.
+        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
+            shm0_region_names = ["ip00", "ip01", "op00", "op01"]
+            shm1_region_names = ["ip10", "ip11", "op10", "op11"]
+            shm2_region_names = ["ip20", "ip21", "op20", "op21"]
+            shm3_region_names = ["ip30", "ip31", "op30", "op31"]
+            shm4_region_names = ["ip40", "ip41", "op40", "op41"]
+        else:
+            shm0_region_names = None
+            shm1_region_names = None
+            shm2_region_names = None
+            shm3_region_names = None
+            shm4_region_names = None
+        precreated_shm0_regions = self.create_advance(["op00", "op01"])
+        precreated_shm1_regions = self.create_advance(["op10", "op11"])
+        precreated_shm2_regions = self.create_advance(["op20", "op21"])
+        precreated_shm3_regions = self.create_advance(["op30", "op31"])
+        precreated_shm4_regions = self.create_advance(["op40", "op41"])
+        for trial in _trials:
+            try:
+                model_name = tu.get_model_name(
+                    trial, np.float32, np.float32, np.float32
+                )
+
+                self.check_setup(model_name, [4, 6], 0)
+
+                # Need scheduler to wait for queue to contain 3 requests
+                self.assertTrue("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
+                self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 5)
+
+                threads = []
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (6000, None)),
+                        kwargs={
+                            "shm_region_names": shm0_region_names,
+                            "precreated_shm_regions": precreated_shm0_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (6000, None)),
+                        kwargs={
+                            "shm_region_names": shm1_region_names,
+                            "precreated_shm_regions": precreated_shm1_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (6000, None)),
+                        kwargs={
+                            "shm_region_names": shm2_region_names,
+                            "precreated_shm_regions": precreated_shm2_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (6000, None)),
+                        kwargs={
+                            "shm_region_names": shm3_region_names,
+                            "precreated_shm_regions": precreated_shm3_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (6000, None)),
+                        kwargs={
+                            "shm_region_names": shm4_region_names,
+                            "precreated_shm_regions": precreated_shm4_regions,
+                        },
+                    )
+                )
+                for t in threads:
+                    t.start()
+                for t in threads:
+                    t.join()
+                self.check_deferred_exception()
+                self.check_status(model_name, {4: 1, 1: 1}, 5, 5, (2,))
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_preferred_batch_only_use_biggest_preferred(self):
+        # Send 7 requests with batch size 1. Use
+        # TRITONSERVER_DELAY_SCHEDULER in the environment so that
+        # requests can be queued up before scheduler starts
+        # servicing. The batcher should form a batch of largest preferred
+        # size 6 followed by a batch of size 1.
+        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
+            shm0_region_names = ["ip00", "ip01", "op00", "op01"]
+            shm1_region_names = ["ip10", "ip11", "op10", "op11"]
+            shm2_region_names = ["ip20", "ip21", "op20", "op21"]
+            shm3_region_names = ["ip30", "ip31", "op30", "op31"]
+            shm4_region_names = ["ip40", "ip41", "op40", "op41"]
+            shm5_region_names = ["ip50", "ip51", "op50", "op51"]
+            shm6_region_names = ["ip60", "ip61", "op60", "op61"]
+        else:
+            shm0_region_names = None
+            shm1_region_names = None
+            shm2_region_names = None
+            shm3_region_names = None
+            shm4_region_names = None
+            shm5_region_names = None
+            shm6_region_names = None
+        precreated_shm0_regions = self.create_advance(["op00", "op01"])
+        precreated_shm1_regions = self.create_advance(["op10", "op11"])
+        precreated_shm2_regions = self.create_advance(["op20", "op21"])
+        precreated_shm3_regions = self.create_advance(["op30", "op31"])
+        precreated_shm4_regions = self.create_advance(["op40", "op41"])
+        precreated_shm5_regions = self.create_advance(["op50", "op51"])
+        precreated_shm6_regions = self.create_advance(["op60", "op61"])
+        for trial in _trials:
+            try:
+                model_name = tu.get_model_name(
+                    trial, np.float32, np.float32, np.float32
+                )
+
+                self.check_setup(model_name, [4, 6], 0)
+
+                # Need scheduler to wait for queue to contain 6 request
+                self.assertTrue("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
+                self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 7)
+
+                threads = []
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (6000, None)),
+                        kwargs={
+                            "shm_region_names": shm0_region_names,
+                            "precreated_shm_regions": precreated_shm0_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (6000, None)),
+                        kwargs={
+                            "shm_region_names": shm1_region_names,
+                            "precreated_shm_regions": precreated_shm1_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (6000, None)),
+                        kwargs={
+                            "shm_region_names": shm2_region_names,
+                            "precreated_shm_regions": precreated_shm2_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (6000, None)),
+                        kwargs={
+                            "shm_region_names": shm3_region_names,
+                            "precreated_shm_regions": precreated_shm3_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (6000, None)),
+                        kwargs={
+                            "shm_region_names": shm4_region_names,
+                            "precreated_shm_regions": precreated_shm4_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (6000, None)),
+                        kwargs={
+                            "shm_region_names": shm5_region_names,
+                            "precreated_shm_regions": precreated_shm5_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (6000, None)),
+                        kwargs={
+                            "shm_region_names": shm6_region_names,
+                            "precreated_shm_regions": precreated_shm6_regions,
+                        },
+                    )
+                )
+                for t in threads:
+                    t.start()
+                for t in threads:
+                    t.join()
+                self.check_deferred_exception()
+                self.check_status(model_name, {6: 1, 1: 1}, 7, 7, (2,))
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_preferred_batch_only_use_no_preferred_size(self):
+        # Send 3 requests with batch size 1. Use
+        # TRITONSERVER_DELAY_SCHEDULER in the environment so that
+        # requests can be queued up before scheduler starts
+        # servicing. The batcher should form a batch of of 3.
+        if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
+            shm0_region_names = ["ip00", "ip01", "op00", "op01"]
+            shm1_region_names = ["ip10", "ip11", "op10", "op11"]
+            shm2_region_names = ["ip20", "ip21", "op20", "op21"]
+        else:
+            shm0_region_names = None
+            shm1_region_names = None
+            shm2_region_names = None
+        precreated_shm0_regions = self.create_advance(["op00", "op01"])
+        precreated_shm1_regions = self.create_advance(["op10", "op11"])
+        precreated_shm2_regions = self.create_advance(["op20", "op21"])
+        for trial in _trials:
+            try:
+                model_name = tu.get_model_name(
+                    trial, np.float32, np.float32, np.float32
+                )
+
+                self.check_setup(model_name, [4, 6], 0)
+
+                # Need scheduler to wait for queue to contain 3 request
+                self.assertTrue("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
+                self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 3)
+
+                threads = []
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (6000, None)),
+                        kwargs={
+                            "shm_region_names": shm0_region_names,
+                            "precreated_shm_regions": precreated_shm0_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (6000, None)),
+                        kwargs={
+                            "shm_region_names": shm1_region_names,
+                            "precreated_shm_regions": precreated_shm1_regions,
+                        },
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(trial, 1, (6000, None)),
+                        kwargs={
+                            "shm_region_names": shm2_region_names,
+                            "precreated_shm_regions": precreated_shm2_regions,
+                        },
+                    )
+                )
+                for t in threads:
+                    t.start()
+                for t in threads:
+                    t.join()
+                self.check_deferred_exception()
+                self.check_status(model_name, {3: 1}, 3, 3, (1,))
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_max_queue_delay_only_non_default(self):
+        # Send 12 requests with batch size 1. The max_queue_delay is set
+        # to non-zero. Depending upon the timing of the requests arrival
+        # there can be either 1 or 2 model executions.
+        model_base = "custom"
+        dtype = np.float32
+        shapes = (
+            [
+                1,
+                1,
+            ],
+        )
+
+        try:
+            # use threads to send 12 requests without waiting for response
+            threads = []
+            for i in range(12):
+                if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
+                    shm_region_name_prefix = ["input" + str(i), "output" + str(i)]
+                else:
+                    shm_region_name_prefix = None
+                threads.append(
+                    threading.Thread(
+                        target=iu.infer_zero,
+                        args=(self, model_base, 1, dtype, shapes, shapes),
+                        kwargs={
+                            "use_grpc": USE_GRPC,
+                            "use_http": USE_HTTP,
+                            "use_http_json_tensors": False,
+                            "use_streaming": False,
+                            "shm_region_name_prefix": shm_region_name_prefix,
+                            "use_system_shared_memory": TEST_SYSTEM_SHARED_MEMORY,
+                            "use_cuda_shared_memory": TEST_CUDA_SHARED_MEMORY,
+                        },
+                    )
+                )
+            for t in threads:
+                t.start()
+            for t in threads:
+                t.join()
+            self.check_deferred_exception()
+            model_name = tu.get_zero_model_name(model_base, len(shapes), dtype)
+            self.check_status(model_name, None, 12, 12, (1, 2))
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_max_queue_delay_only_default(self):
+        # Send 12 requests with batch size 1. The max_queue_delay is set
+        # to default value of 0. There should be two distinct model
+        # executions. The first few requests will form a first batch
+        # and the remaining requests will form the second batch.
+        model_base = "custom"
+        dtype = np.float32
+        shapes = (
+            [
+                1,
+                1,
+            ],
+        )
+
+        try:
+            # use threads to send 12 requests without waiting for response
+            threads = []
+            for i in range(12):
+                if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
+                    shm_region_name_prefix = ["input" + str(i), "output" + str(i)]
+                else:
+                    shm_region_name_prefix = None
+                threads.append(
+                    threading.Thread(
+                        target=iu.infer_zero,
+                        args=(self, model_base, 1, dtype, shapes, shapes),
+                        kwargs={
+                            "use_grpc": USE_GRPC,
+                            "use_http": USE_HTTP,
+                            "use_http_json_tensors": False,
+                            "use_streaming": False,
+                            "shm_region_name_prefix": shm_region_name_prefix,
+                            "use_system_shared_memory": TEST_SYSTEM_SHARED_MEMORY,
+                            "use_cuda_shared_memory": TEST_CUDA_SHARED_MEMORY,
+                        },
+                    )
+                )
+            for t in threads:
+                t.start()
+            for t in threads:
+                t.join()
+            self.check_deferred_exception()
+            model_name = tu.get_zero_model_name(model_base, len(shapes), dtype)
+            self.check_status(model_name, None, 12, 12, (2,))
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+
+if __name__ == "__main__":
     unittest.main()
diff --git a/qa/L0_batcher/queue_timeout_test.py b/qa/L0_batcher/queue_timeout_test.py
new file mode 100755
index 0000000000..886bf52a03
--- /dev/null
+++ b/qa/L0_batcher/queue_timeout_test.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python3
+
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import concurrent.futures
+import time
+import unittest
+
+import numpy as np
+import tritonclient.grpc as grpcclient
+from tritonclient.utils import InferenceServerException
+
+
+class TestMaxQueueDelayTimeout(unittest.TestCase):
+    def setUp(self):
+        # Initialize client
+        self._triton = grpcclient.InferenceServerClient("localhost:8001")
+
+    def _get_inputs(self, batch_size):
+        self.assertIsInstance(batch_size, int)
+        self.assertGreater(batch_size, 0)
+        shape = [batch_size, 8]
+        inputs = [grpcclient.InferInput("INPUT0", shape, "FP32")]
+        inputs[0].set_data_from_numpy(np.ones(shape, dtype=np.float32))
+        return inputs
+
+    def _generate_callback_and_response_pair(self):
+        response = {"responded": False, "result": None, "error": None}
+
+        def callback(result, error):
+            response["responded"] = True
+            response["result"] = result
+            response["error"] = error
+
+        return callback, response
+
+    # Test queued requests on dynamic batch scheduler can be cancelled
+    def test_default_queue_policy_timeout_prompt_response(self):
+        model_name = "dynamic_batch"
+        with concurrent.futures.ThreadPoolExecutor() as pool:
+            # Saturate the slots on the model
+            saturate_thread = pool.submit(
+                self._triton.infer, model_name, self._get_inputs(batch_size=1)
+            )
+            time.sleep(2)  # ensure the slots are filled
+            # The next request should be queued
+            callback, response = self._generate_callback_and_response_pair()
+            self._triton.async_infer(
+                model_name, self._get_inputs(batch_size=1), callback
+            )
+            time.sleep(2)  # ensure the request is queued
+            # Check if the request has timed-out
+            time.sleep(2)  # ensure the timeout period has expired
+            self.assertTrue(response["responded"])
+            self.assertEqual(response["result"], None)
+            self.assertIsInstance(response["error"], InferenceServerException)
+            self.assertEqual(response["error"].status(), "StatusCode.UNAVAILABLE")
+            self.assertEqual(response["error"].message(), "Request timeout expired")
+            # Join saturating thread
+            saturate_thread.result()
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_batcher/test.sh b/qa/L0_batcher/test.sh
index 7d72cc88f0..827751eb40 100755
--- a/qa/L0_batcher/test.sh
+++ b/qa/L0_batcher/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+# Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -25,45 +25,367 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+# Must run on a single device or else the TRITONSERVER_DELAY_SCHEDULER
+# can fail when the requests are distributed to multiple devices.
+export CUDA_VISIBLE_DEVICES=0
+
 CLIENT_LOG="./client.log"
 BATCHER_TEST=batcher_test.py
+VERIFY_TIMESTAMPS=verify_timestamps.py
+TEST_RESULT_FILE='test_results.txt'
+
+if [ -z "$TEST_VALGRIND" ]; then
+    TEST_VALGRIND="0"
+fi
+
+if [ -z "$TEST_CUDA_SHARED_MEMORY" ]; then
+    TEST_CUDA_SHARED_MEMORY="0"
+fi
+
+# Add valgrind flag check
+if [ "$TEST_VALGRIND" -eq 1 ]; then
+    LEAKCHECK=/usr/bin/valgrind
+    LEAKCHECK_ARGS_BASE="--leak-check=full --show-leak-kinds=definite --max-threads=3000"
+    SERVER_TIMEOUT=3600
+    rm -f *.valgrind.log
+
+    NO_DELAY_TESTS="test_static_batch_preferred \
+                        test_multi_batch_sum_gt_max_preferred \
+                        test_multi_same_output0 \
+                        test_multi_different_output_order"
+
+    DELAY_TESTS="test_multi_batch_use_biggest_preferred \
+                    test_multi_batch_use_best_preferred"
 
-DATADIR=/data/inferenceserver
+    DIFFERENT_SHAPE_TESTS="test_multi_batch_not_preferred_different_shape \
+                                test_multi_batch_different_shape_allow_ragged"
+fi
+
+TF_VERSION=${TF_VERSION:=2}
+
+# On windows the paths invoked by the script (running in WSL) must use
+# /mnt/c when needed but the paths on the tritonserver command-line
+# must be C:/ style.
+if [[ "$(< /proc/sys/kernel/osrelease)" == *microsoft* ]]; then
+    MODELDIR=${MODELDIR:=C:/models}
+    DATADIR=${DATADIR:="/mnt/c/data/inferenceserver/${REPO_VERSION}"}
+    BACKEND_DIR=${BACKEND_DIR:=C:/tritonserver/backends}
+    SERVER=${SERVER:=/mnt/c/tritonserver/bin/tritonserver.exe}
+    export WSLENV=$WSLENV:TRITONSERVER_DELAY_SCHEDULER
+else
+    MODELDIR=${MODELDIR:=`pwd`}
+    DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"}
+    TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
+    SERVER=${TRITON_DIR}/bin/tritonserver
+    BACKEND_DIR=${TRITON_DIR}/backends
+
+    # PyTorch on SBSA requires libgomp to be loaded first. See the following
+    # GitHub issue for more information:
+    # https://github.com/pytorch/pytorch/issues/2575
+    arch=`uname -m`
+    if [ $arch = "aarch64" ]; then
+      SERVER_LD_PRELOAD=/usr/lib/$(uname -m)-linux-gnu/libgomp.so.1
+    fi
+fi
 
-SERVER=/opt/tensorrtserver/bin/trtserver
+SERVER_ARGS_EXTRA="--backend-directory=${BACKEND_DIR} --backend-config=tensorflow,version=${TF_VERSION}"
 source ../common/util.sh
 
 RET=0
 
-# Setup model store
-rm -fr *.log *.serverlog models && mkdir models
-for m in \
-        graphdef_float32_float32_float32 \
-        netdef_float32_float32_float32 \
-        plan_float32_float32_float32 ; do
-    cp -r $DATADIR/qa_model_repository/$m models/. &&
-        (cd models/$m && \
-                sed -i "s/^max_batch_size:.*/max_batch_size: 8/" config.pbtxt && \
-                echo "dynamic_batching { preferred_batch_size: [ 2, 6 ], max_queue_delay_microseconds: 10000000 }" >> config.pbtxt)
+# If BACKENDS not specified, set to all
+BACKENDS=${BACKENDS:="graphdef savedmodel onnx libtorch plan python"}
+export BACKENDS
+
+# Basic batcher tests
+NO_DELAY_TESTS=${NO_DELAY_TESTS:="test_static_batch_preferred \
+                            test_static_batch_lt_any_preferred \
+                            test_static_batch_not_preferred \
+                            test_static_batch_gt_max_preferred \
+                            test_multi_batch_not_preferred \
+                            test_multi_batch_gt_max_preferred \
+                            test_multi_batch_sum_gt_max_preferred \
+                            test_multi_same_output0 \
+                            test_multi_same_output1 \
+                            test_multi_different_outputs \
+                            test_multi_different_output_order"}
+
+# Tests that use scheduler delay
+DELAY_TESTS=${DELAY_TESTS:="test_multi_batch_delayed_sum_gt_max_preferred \
+                        test_multi_batch_use_biggest_preferred \
+                        test_multi_batch_use_best_preferred \
+                        test_multi_batch_delayed_use_max_batch"}
+
+# Tests with different shapes
+DIFFERENT_SHAPE_TESTS=${DIFFERENT_SHAPE_TESTS:="test_multi_batch_not_preferred_different_shape \
+                                        test_multi_batch_preferred_different_shape \
+                                        test_multi_batch_different_shape_allow_ragged \
+                                        test_multi_batch_different_shape"}
+
+# Test with preferred batch sizes but default max_queue_delay
+PREFERRED_BATCH_ONLY_TESTS=${PREFERRED_BATCH_ONLY_TESTS:="test_preferred_batch_only_aligned \
+                                                    test_preferred_batch_only_unaligned \
+                                                    test_preferred_batch_only_use_biggest_preferred \
+                                                    test_preferred_batch_only_use_no_preferred_size"}
+
+# Tests with varying delay for max queue but no preferred batch size
+MAX_QUEUE_DELAY_ONLY_TESTS=${MAX_QUEUE_DELAY_ONLY_TESTS:="test_max_queue_delay_only_default \
+                                                    test_max_queue_delay_only_non_default"}
+
+# Setup non-variable-size model repository
+rm -fr *.log  models && mkdir models
+for BACKEND in $BACKENDS; do
+    TMP_MODEL_DIR="$DATADIR/qa_model_repository/${BACKEND}_float32_float32_float32"
+    if [ "$BACKEND" == "python" ]; then
+        # We will be using ONNX models config.pbtxt and tweak them to make them
+        # appropriate for Python backend
+        onnx_model="${DATADIR}/qa_model_repository/onnx_float32_float32_float32"
+        python_model=`echo $onnx_model | sed 's/onnx/python/g' | sed 's,'"$DATADIR/qa_model_repository/"',,g'`
+        mkdir -p models/$python_model/1/
+        cat $onnx_model/config.pbtxt | sed 's/platform:.*/backend:\ "python"/g' | sed 's/onnx/python/g' > models/$python_model/config.pbtxt
+        cp $onnx_model/output0_labels.txt models/$python_model
+        cp ../python_models/add_sub/model.py models/$python_model/1/
+    else
+        cp -r $TMP_MODEL_DIR models/.
+    fi
+    (cd models/$(basename $TMP_MODEL_DIR) && \
+          sed -i "s/^max_batch_size:.*/max_batch_size: 8/" config.pbtxt && \
+          sed -i "s/^version_policy:.*/version_policy: { specific { versions: [1] }}/" config.pbtxt && \
+          echo "dynamic_batching { preferred_batch_size: [ 2, 6 ], max_queue_delay_microseconds: 10000000 }" >> config.pbtxt)
+done
+
+rm -fr preferred_batch_only_models && mkdir preferred_batch_only_models
+for BACKEND in $BACKENDS; do
+    TMP_MODEL_DIR="$DATADIR/qa_model_repository/${BACKEND}_float32_float32_float32"
+    if [ "$BACKEND" == "python" ]; then
+        # We will be using ONNX models config.pbtxt and tweak them to make them
+        # appropriate for Python backend
+        onnx_model="${DATADIR}/qa_model_repository/onnx_float32_float32_float32"
+        python_model=`echo $onnx_model | sed 's/onnx/python/g' | sed 's,'"$DATADIR/qa_model_repository/"',,g'`
+        mkdir -p preferred_batch_only_models/$python_model/1/
+        cat $onnx_model/config.pbtxt | sed 's/platform:.*/backend:\ "python"/g' | sed 's/onnx/python/g' > preferred_batch_only_models/$python_model/config.pbtxt
+        cp $onnx_model/output0_labels.txt preferred_batch_only_models/$python_model
+        cp ../python_models/add_sub/model.py preferred_batch_only_models/$python_model/1/
+    else
+        cp -r $TMP_MODEL_DIR preferred_batch_only_models/.
+    fi
+    (cd preferred_batch_only_models/$(basename $TMP_MODEL_DIR) && \
+          sed -i "s/^max_batch_size:.*/max_batch_size: 8/" config.pbtxt && \
+          sed -i "s/^version_policy:.*/version_policy: { specific { versions: [1] }}/" config.pbtxt && \
+          echo "dynamic_batching { preferred_batch_size: [ 4, 6 ] }" >> config.pbtxt)
+done
+
+# Setup variable-size model repository
+rm -fr var_models && mkdir var_models
+for BACKEND in $BACKENDS; do
+    TMP_MODEL_DIR="$DATADIR/qa_variable_model_repository/${BACKEND}_float32_float32_float32"
+    if [ "$BACKEND" == "python" ]; then
+        # We will be using ONNX models config.pbtxt and tweak them to make them
+        # appropriate for Python backend
+        onnx_model="${DATADIR}/qa_variable_model_repository/onnx_float32_float32_float32"
+        python_model=`echo $onnx_model | sed 's/onnx/python/g' | sed 's,'"$DATADIR/qa_variable_model_repository/"',,g'`
+        mkdir -p var_models/$python_model/1/
+        cat $onnx_model/config.pbtxt | sed 's/platform:.*/backend:\ "python"/g' | sed 's/onnx/python/g' > var_models/$python_model/config.pbtxt
+        cp $onnx_model/output0_labels.txt var_models/$python_model
+        cp ../python_models/add_sub/model.py var_models/$python_model/1/
+    else
+        cp -r $TMP_MODEL_DIR var_models/.
+    fi
+    (cd var_models/$(basename $TMP_MODEL_DIR) && \
+            sed -i "s/^max_batch_size:.*/max_batch_size: 8/" config.pbtxt && \
+            sed -i "s/^version_policy:.*/version_policy: { specific { versions: [1] }}/" config.pbtxt && \
+            echo "dynamic_batching { preferred_batch_size: [ 2, 6 ], max_queue_delay_microseconds: 10000000 }" >> config.pbtxt)
+done
+
+for MC in `ls var_models/*/config.pbtxt`; do
+    sed -i "s/16/-1/g" $MC
 done
 
+# Create allow-ragged model to variable-size model repository
+cp -r ../custom_models/custom_zero_1_float32 var_models/. && \
+    (cd var_models/custom_zero_1_float32 && mkdir 1 && \
+        echo "instance_group [ { kind: KIND_GPU count: 1 }]" >> config.pbtxt && \
+        sed -i "s/^max_batch_size:.*/max_batch_size: 8/" config.pbtxt && \
+        sed -i "s/dims:.*\[.*\]/dims: \[ -1 \]/g" config.pbtxt && \
+        sed -i "s/name:.*\"INPUT0\"/name: \"INPUT0\"\\nallow_ragged_batch: true/" config.pbtxt && \
+        sed -i "s/^version_policy:.*/version_policy: { specific { versions: [1] }}/" config.pbtxt && \
+        echo "dynamic_batching { preferred_batch_size: [ 2, 6 ], max_queue_delay_microseconds: 10000000 }" >> config.pbtxt)
+
+if [[ $BACKENDS == *"plan"* ]]; then
+    # Use nobatch model to match the ragged test requirement
+    cp -r $DATADIR/qa_identity_model_repository/plan_nobatch_zero_1_float32 var_models/plan_zero_1_float32 && \
+        (cd var_models/plan_zero_1_float32 && \
+            sed -i "s/nobatch_//" config.pbtxt && \
+            sed -i "s/^max_batch_size:.*/max_batch_size: 8/" config.pbtxt && \
+            sed -i "s/name: \"INPUT0\"/name: \"INPUT0\"\\nallow_ragged_batch: true/" config.pbtxt && \
+            echo "batch_output [{target_name: \"OUTPUT0\" \
+                                    kind: BATCH_SCATTER_WITH_INPUT_SHAPE \
+                                    source_input: \"INPUT0\" }] \
+                    dynamic_batching { preferred_batch_size: [ 2, 6 ], max_queue_delay_microseconds: 10000000 }" >> config.pbtxt)
+fi
+
+if [[ $BACKENDS == *"onnx"* ]]; then
+    # Use nobatch model to match the ragged test requirement
+    cp -r $DATADIR/qa_identity_model_repository/onnx_nobatch_zero_1_float32 var_models/onnx_zero_1_float32 && \
+        (cd var_models/onnx_zero_1_float32 && \
+            sed -i "s/nobatch_//" config.pbtxt && \
+            sed -i "s/^max_batch_size:.*/max_batch_size: 8/" config.pbtxt && \
+            sed -i "s/name: \"INPUT0\"/name: \"INPUT0\"\\nallow_ragged_batch: true/" config.pbtxt && \
+            echo "batch_output [{target_name: \"OUTPUT0\" \
+                                    kind: BATCH_SCATTER_WITH_INPUT_SHAPE \
+                                    source_input: \"INPUT0\" }] \
+                    dynamic_batching { preferred_batch_size: [ 2, 6 ], max_queue_delay_microseconds: 10000000 }" >> config.pbtxt)
+fi
+
+if [[ $BACKENDS == *"libtorch"* ]]; then
+    # Use nobatch model to match the ragged test requirement
+    cp -r $DATADIR/qa_identity_model_repository/libtorch_nobatch_zero_1_float32 var_models/libtorch_zero_1_float32 && \
+        (cd var_models/libtorch_zero_1_float32 && \
+            sed -i "s/nobatch_//" config.pbtxt && \
+            sed -i "s/^max_batch_size:.*/max_batch_size: 8/" config.pbtxt && \
+            sed -i "s/name: \"INPUT__0\"/name: \"INPUT__0\"\\nallow_ragged_batch: true/" config.pbtxt && \
+            echo "batch_output [{target_name: \"OUTPUT__0\" \
+                                    kind: BATCH_SCATTER_WITH_INPUT_SHAPE \
+                                    source_input: \"INPUT__0\" }] \
+                    dynamic_batching { preferred_batch_size: [ 2, 6 ], max_queue_delay_microseconds: 10000000 }" >> config.pbtxt)
+fi
+
 # Need to launch the server for each test so that the model status is
 # reset (which is used to make sure the correctly batch size was used
-# for execution)
-for i in \
-        test_static_batch_preferred \
-        test_static_batch_lt_any_preferred \
-        test_static_batch_not_preferred \
-        test_static_batch_gt_max_preferred \
-        test_multi_batch_not_preferred \
-        test_multi_batch_gt_max_preferred \
-        test_multi_batch_sum_gt_max_preferred \
-        test_multi_same_output0 \
-        test_multi_same_output1 \
-        test_multi_different_outputs ; do
-    SERVER_ARGS="--model-store=`pwd`/models"
-    SERVER_LOG="./$i.serverlog"
-    run_server
+# for execution). Test everything with fixed-tensor-size models and
+# variable-tensor-size models.
+
+for model_type in FIXED VARIABLE; do
+    export BATCHER_TYPE=$model_type
+    MODEL_PATH=models && [[ "$model_type" == "VARIABLE" ]] && MODEL_PATH=var_models
+    for i in $NO_DELAY_TESTS ; do
+        SERVER_ARGS="--model-repository=$MODELDIR/$MODEL_PATH ${SERVER_ARGS_EXTRA}"
+        SERVER_LOG="./$i.$model_type.server.log"
+
+        if [ "$TEST_VALGRIND" -eq 1 ]; then
+            LEAKCHECK_LOG="./$i.$model_type.valgrind.log"
+            LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --log-file=$LEAKCHECK_LOG"
+            run_server_leakcheck
+        else
+            run_server
+        fi
+
+        if [ "$SERVER_PID" == "0" ]; then
+            echo -e "\n***\n*** Failed to start $SERVER\n***"
+            cat $SERVER_LOG
+            exit 1
+        fi
+
+        echo "Test: $i, $model_type" >>$CLIENT_LOG
+
+        set +e
+        python3 $BATCHER_TEST BatcherTest.$i >>$CLIENT_LOG 2>&1
+        if [ $? -ne 0 ]; then
+            echo -e "\n***\n*** Test Failed\n***"
+            RET=1
+        else
+            check_test_results $TEST_RESULT_FILE 1
+            if [ $? -ne 0 ]; then
+                cat $CLIENT_LOG
+                echo -e "\n***\n*** Test Result Verification Failed\n***"
+                RET=1
+            fi
+        fi
+        set -e
+
+        kill_server
+
+        set +e
+        if [ "$TEST_VALGRIND" -eq 1 ]; then
+            python3 ../common/check_valgrind_log.py -f $LEAKCHECK_LOG
+            if [ $? -ne 0 ]; then
+                RET=1
+            fi
+        fi
+        set -e
+    done
+
+    # Tests that require TRITONSERVER_DELAY_SCHEDULER so that the
+    # scheduler is delayed and requests can collect in the queue.
+    for i in $DELAY_TESTS ; do
+        export TRITONSERVER_DELAY_SCHEDULER=6 &&
+            [[ "$i" != "test_multi_batch_use_biggest_preferred" ]] && export TRITONSERVER_DELAY_SCHEDULER=3 &&
+            [[ "$i" != "test_multi_batch_use_best_preferred" ]] &&
+            [[ "$i" != "test_multi_batch_delayed_use_max_batch" ]] && export TRITONSERVER_DELAY_SCHEDULER=2
+        SERVER_ARGS="--model-repository=$MODELDIR/$MODEL_PATH ${SERVER_ARGS_EXTRA}"
+        SERVER_LOG="./$i.$model_type.server.log"
+
+        if [ "$TEST_VALGRIND" -eq 1 ]; then
+            LEAKCHECK_LOG="./$i.$model_type.valgrind.log"
+            LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --log-file=$LEAKCHECK_LOG"
+            run_server_leakcheck
+        else
+            run_server
+        fi
+
+        if [ "$SERVER_PID" == "0" ]; then
+            echo -e "\n***\n*** Failed to start $SERVER\n***"
+            cat $SERVER_LOG
+            exit 1
+        fi
+
+        echo "Test: $i" >>$CLIENT_LOG
+
+        set +e
+        python3 $BATCHER_TEST BatcherTest.$i >>$CLIENT_LOG 2>&1
+        if [ $? -ne 0 ]; then
+            echo -e "\n***\n*** Test Failed\n***"
+            RET=1
+        else
+            check_test_results $TEST_RESULT_FILE 1
+            if [ $? -ne 0 ]; then
+                cat $CLIENT_LOG
+                echo -e "\n***\n*** Test Result Verification Failed\n***"
+                RET=1
+            fi
+        fi
+        set -e
+
+        unset TRITONSERVER_DELAY_SCHEDULER
+        kill_server
+
+        set +e
+        if [ "$TEST_VALGRIND" -eq 1 ]; then
+            python3 ../common/check_valgrind_log.py -f $LEAKCHECK_LOG
+            if [ $? -ne 0 ]; then
+                RET=1
+            fi
+        fi
+        set -e
+    done
+done
+
+export BATCHER_TYPE=VARIABLE
+for i in $DIFFERENT_SHAPE_TESTS ; do
+    SERVER_ARGS="--model-repository=$MODELDIR/var_models ${SERVER_ARGS_EXTRA}"
+    SERVER_LOG="./$i.VARIABLE.server.log"
+
+    if [ "$TEST_VALGRIND" -eq 1 ]; then
+        LEAKCHECK_LOG="./$i.VARIABLE.valgrind.log"
+        LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --log-file=$LEAKCHECK_LOG"
+        run_server_leakcheck
+    else
+        run_server
+    fi
+
     if [ "$SERVER_PID" == "0" ]; then
         echo -e "\n***\n*** Failed to start $SERVER\n***"
         cat $SERVER_LOG
@@ -73,27 +395,50 @@ for i in \
     echo "Test: $i" >>$CLIENT_LOG
 
     set +e
-    python $BATCHER_TEST BatcherTest.$i >>$CLIENT_LOG 2>&1
+    python3 $BATCHER_TEST BatcherTest.$i >>$CLIENT_LOG 2>&1
     if [ $? -ne 0 ]; then
         echo -e "\n***\n*** Test Failed\n***"
         RET=1
+    else
+        check_test_results $TEST_RESULT_FILE 1
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Result Verification Failed\n***"
+            RET=1
+        fi
     fi
     set -e
 
-    kill $SERVER_PID
-    wait $SERVER_PID
+    kill_server
+
+    set +e
+    if [ "$TEST_VALGRIND" -eq 1 ]; then
+        python3 ../common/check_valgrind_log.py -f $LEAKCHECK_LOG
+        if [ $? -ne 0 ]; then
+            RET=1
+        fi
+    fi
+    set -e
 done
 
-# Tests that require TRTSERVER_DELAY_SCHEDULER so that the scheduler
-# is delayed and requests can collect in the queue.
+# Tests that run only on the variable-size tensor models and that
+# require TRITONSERVER_DELAY_SCHEDULER so that the scheduler is delayed
+# and requests can collect in the queue.
+export BATCHER_TYPE=VARIABLE
 for i in \
-        test_multi_batch_use_biggest_preferred \
-        test_multi_batch_use_best_preferred ; do
-    export TRTSERVER_DELAY_SCHEDULER=6 &&
-        [[ "$i" == "test_multi_batch_use_best_preferred" ]] && export TRTSERVER_DELAY_SCHEDULER=3
-    SERVER_ARGS="--model-store=`pwd`/models"
-    SERVER_LOG="./$i.serverlog"
-    run_server
+        test_multi_batch_delayed_preferred_different_shape ; do
+    export TRITONSERVER_DELAY_SCHEDULER=4
+    SERVER_ARGS="--model-repository=$MODELDIR/var_models ${SERVER_ARGS_EXTRA}"
+    SERVER_LOG="./$i.VARIABLE.server.log"
+
+    if [ "$TEST_VALGRIND" -eq 1 ]; then
+        LEAKCHECK_LOG="./$i.VARIABLE.valgrind.log"
+        LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --log-file=$LEAKCHECK_LOG"
+        run_server_leakcheck
+    else
+        run_server
+    fi
+
     if [ "$SERVER_PID" == "0" ]; then
         echo -e "\n***\n*** Failed to start $SERVER\n***"
         cat $SERVER_LOG
@@ -103,26 +448,331 @@ for i in \
     echo "Test: $i" >>$CLIENT_LOG
 
     set +e
-    python $BATCHER_TEST BatcherTest.$i >>$CLIENT_LOG 2>&1
+    python3 $BATCHER_TEST BatcherTest.$i >>$CLIENT_LOG 2>&1
     if [ $? -ne 0 ]; then
         echo -e "\n***\n*** Test Failed\n***"
         RET=1
+    else
+        check_test_results $TEST_RESULT_FILE 1
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Result Verification Failed\n***"
+            RET=1
+        fi
     fi
     set -e
 
-    unset TRTSERVER_DELAY_SCHEDULER
-    kill $SERVER_PID
-    wait $SERVER_PID
+    unset TRITONSERVER_DELAY_SCHEDULER
+    kill_server
+
+    set +e
+    if [ "$TEST_VALGRIND" -eq 1 ]; then
+        python3 ../common/check_valgrind_log.py -f $LEAKCHECK_LOG
+        if [ $? -ne 0 ]; then
+            RET=1
+        fi
+    fi
+    set -e
 done
 
-# python unittest seems to swallow ImportError and still return 0 exit
-# code. So need to explicitly check CLIENT_LOG to make sure we see
-# some running tests
-grep -c "HTTP/1.1 200 OK" $CLIENT_LOG
+export BATCHER_TYPE=FIXED
+for i in $PREFERRED_BATCH_ONLY_TESTS ; do
+    export TRITONSERVER_DELAY_SCHEDULER=4 &&
+            [[ "$i" != "test_preferred_batch_only_aligned" ]] && export TRITONSERVER_DELAY_SCHEDULER=5 &&
+            [[ "$i" != "test_preferred_batch_only_unaligned" ]] && export TRITONSERVER_DELAY_SCHEDULER=7 &&
+            [[ "$i" != "test_preferred_batch_only_use_biggest_preferred" ]] && export TRITONSERVER_DELAY_SCHEDULER=3
+    SERVER_ARGS="--model-repository=$MODELDIR/preferred_batch_only_models ${SERVER_ARGS_EXTRA}"
+    SERVER_LOG="./$i.PREFERRED_BATCH_ONLY.server.log"
+
+    if [ "$TEST_VALGRIND" -eq 1 ]; then
+        LEAKCHECK_LOG="./$i.PREFERRED_BATCH_ONLY.valgrind.log"
+        LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --log-file=$LEAKCHECK_LOG"
+        run_server_leakcheck
+    else
+        run_server
+    fi
+
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    echo "Test: $i" >>$CLIENT_LOG
+
+    set +e
+    python3 $BATCHER_TEST BatcherTest.$i >>$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    else
+        check_test_results $TEST_RESULT_FILE 1
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Result Verification Failed\n***"
+            RET=1
+        fi
+    fi
+    set -e
+
+    unset TRITONSERVER_DELAY_SCHEDULER
+    kill_server
+
+    set +e
+    if [ "$TEST_VALGRIND" -eq 1 ]; then
+        python3 ../common/check_valgrind_log.py -f $LEAKCHECK_LOG
+        if [ $? -ne 0 ]; then
+            RET=1
+        fi
+    fi
+    set -e
+done
+
+# Test cases that checks the runtime batches created with max_queue_delay
+# specification only.
+rm -fr ./custom_models && mkdir ./custom_models && \
+cp -r ../custom_models/custom_zero_1_float32 ./custom_models/. && \
+mkdir -p ./custom_models/custom_zero_1_float32/1
+
+# Provide sufficient delay to allow forming of next batch.
+(cd custom_models/custom_zero_1_float32 && \
+        sed -i "s/dims:.*\[.*\]/dims: \[ -1 \]/g" config.pbtxt && \
+        sed -i "s/max_batch_size:.*/max_batch_size: 100/g" config.pbtxt && \
+        echo "dynamic_batching { max_queue_delay_microseconds: 0}" >> config.pbtxt && \
+        echo "instance_group [ { kind: KIND_GPU } ]" >> config.pbtxt && \
+        echo "parameters [" >> config.pbtxt && \
+        echo "{ key: \"execute_delay_ms\"; value: { string_value: \"100\" }}" >> config.pbtxt && \
+        echo "]" >> config.pbtxt)
+
+for i in $MAX_QUEUE_DELAY_ONLY_TESTS ; do
+    export MAX_QUEUE_DELAY_MICROSECONDS=20000 &&
+        [[ "$i" != "test_max_queue_delay_only_non_default" ]] && export MAX_QUEUE_DELAY_MICROSECONDS=0
+    (cd custom_models/custom_zero_1_float32 && \
+        sed -i "s/max_queue_delay_microseconds:.*\[.*\]/max_queue_delay_microseconds: ${MAX_QUEUE_DELAY_MICROSECONDS}/g" config.pbtxt )
+
+    SERVER_ARGS="--model-repository=$MODELDIR/custom_models ${SERVER_ARGS_EXTRA}"
+    SERVER_LOG="./$i.MAX_QUEUE_DELAY_ONLY.server.log"
+
+    if [ "$TEST_VALGRIND" -eq 1 ]; then
+        LEAKCHECK_LOG="./$i.MAX_QUEUE_DELAY_ONLY.valgrind.log"
+        LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --log-file=$LEAKCHECK_LOG"
+        run_server_leakcheck
+    else
+        run_server
+    fi
+
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    echo "Test: $i" >>$CLIENT_LOG
+
+    set +e
+    python3 $BATCHER_TEST BatcherTest.$i >>$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    else
+        check_test_results $TEST_RESULT_FILE 1
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Result Verification Failed\n***"
+            RET=1
+        fi
+    fi
+    set -e
+    kill_server
+    unset MAX_QUEUE_DELAY_MICROSECONDS
+    set +e
+    if [ "$TEST_VALGRIND" -eq 1 ]; then
+        python3 ../common/check_valgrind_log.py -f $LEAKCHECK_LOG
+        if [ $? -ne 0 ]; then
+            RET=1
+        fi
+    fi
+    set -e
+done
+
+# Test that verify the 'preserve_ordering' option in dynamic batcher
+# Run the test scheme with and without preserve ordering, verify behavior
+# by comparing the "response send" timestamps.
+TEST_CASE=test_multi_batch_preserve_ordering
+
+# Skip test for Windows. Trace file concats at 8192 chars on Windows.
+if [[ "$(< /proc/sys/kernel/osrelease)" != *microsoft* ]]; then
+    rm -fr ./custom_models && mkdir ./custom_models && \
+        cp -r ../custom_models/custom_zero_1_float32 ./custom_models/. && \
+        mkdir -p ./custom_models/custom_zero_1_float32/1
+
+    # Two instances will be created for the custom model, one delays 100 ms while
+    # the other delays 400 ms
+    (cd custom_models/custom_zero_1_float32 && \
+            sed -i "s/dims:.*\[.*\]/dims: \[ -1 \]/g" config.pbtxt && \
+            sed -i "s/max_batch_size:.*/max_batch_size: 4/g" config.pbtxt && \
+            echo "dynamic_batching { preferred_batch_size: [ 4 ] }" >> config.pbtxt && \
+            echo "instance_group [ { kind: KIND_GPU count: 2 }]" >> config.pbtxt && \
+            echo "parameters [" >> config.pbtxt && \
+            echo "{ key: \"execute_delay_ms\"; value: { string_value: \"100\" }}," >> config.pbtxt && \
+            echo "{ key: \"instance_wise_delay_multiplier\"; value: { string_value: \"4\" }}" >> config.pbtxt && \
+            echo "]" >> config.pbtxt)
+
+    # enqueue 3 batches to guarantee that a large delay batch will be followed by
+    # a small delay one regardless of the order issued to model instances.
+    # i.e. the 3 batches will be queued: [1, 2, 3] and there are two delay instances
+    # [small, large], then the distributions can be the following:
+    # [1:small 2:large 3:small] or [1:large 2:small 3:*] (* depends on whether order
+    # is preserved), and we only interested in the timestamps where the large delay
+    # batch is followed by small delay batch
+    export TRITONSERVER_DELAY_SCHEDULER=12
+
+    # not preserve
+    SERVER_ARGS="--trace-file=not_preserve.log --trace-level=MIN --trace-rate=1 --model-repository=$MODELDIR/custom_models ${SERVER_ARGS_EXTRA}"
+    SERVER_LOG="./not_preserve.server.log"
+
+    if [ "$TEST_VALGRIND" -eq 1 ]; then
+        LEAKCHECK_LOG="./not_preserve.valgrind.log"
+        LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --log-file=$LEAKCHECK_LOG"
+        run_server_leakcheck
+    else
+        run_server
+    fi
+
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    echo "Test: not_preserve" >>$CLIENT_LOG
+
+    set +e
+    python3 $BATCHER_TEST BatcherTest.$TEST_CASE >>$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    else
+        check_test_results $TEST_RESULT_FILE 1
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Result Verification Failed\n***"
+            RET=1
+        fi
+    fi
+    set -e
+
+    kill_server
+
+    set +e
+    if [ "$TEST_VALGRIND" -eq 1 ]; then
+        python3 ../common/check_valgrind_log.py -f $LEAKCHECK_LOG
+        if [ $? -ne 0 ]; then
+            RET=1
+        fi
+    fi
+
+    python3 $VERIFY_TIMESTAMPS not_preserve.log
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+    set -e
+
+    # preserve
+    (cd custom_models/custom_zero_1_float32 && \
+            sed -i "s/dynamic_batching.*/dynamic_batching { preferred_batch_size: [ 4 ] preserve_ordering: true }/g" config.pbtxt)
+
+    SERVER_ARGS="--trace-file=preserve.log --trace-level=MIN --trace-rate=1 --model-repository=$MODELDIR/custom_models  ${SERVER_ARGS_EXTRA}"
+    SERVER_LOG="./preserve.server.log"
+
+    if [ "$TEST_VALGRIND" -eq 1 ]; then
+        LEAKCHECK_LOG="./preserve.valgrind.log"
+        LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --log-file=$LEAKCHECK_LOG"
+        run_server_leakcheck
+    else
+        run_server
+    fi
+
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    echo "Test: preserve" >>$CLIENT_LOG
+
+    set +e
+    python3 $BATCHER_TEST BatcherTest.$TEST_CASE >>$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    else
+        check_test_results $TEST_RESULT_FILE 1
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Result Verification Failed\n***"
+            RET=1
+        fi
+    fi
+    set -e
+
+    kill_server
+
+    set +e
+    if [ "$TEST_VALGRIND" -eq 1 ]; then
+        python3 ../common/check_valgrind_log.py -f $LEAKCHECK_LOG
+        if [ $? -ne 0 ]; then
+            RET=1
+        fi
+    fi
+
+    python3 $VERIFY_TIMESTAMPS -p preserve.log
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+    set -e
+    unset TRITONSERVER_DELAY_SCHEDULER
+fi
+
+# Test requests should be returned immediately upon timeout, without waiting for
+# the next slot to be available and then returned.
+rm -rf models && mkdir models
+mkdir -p models/dynamic_batch/1 && (cd models/dynamic_batch && \
+    echo 'backend: "identity"' >> config.pbtxt && \
+    echo 'max_batch_size: 1' >> config.pbtxt && \
+    echo -e 'input [{ name: "INPUT0" \n data_type: TYPE_FP32 \n dims: [ -1 ] }]' >> config.pbtxt && \
+    echo -e 'output [{ name: "OUTPUT0" \n data_type: TYPE_FP32 \n dims: [ -1 ] }]' >> config.pbtxt && \
+    echo -e 'instance_group [{ count: 1 \n kind: KIND_CPU }]' >> config.pbtxt && \
+    echo -e 'dynamic_batching {' >> config.pbtxt && \
+    echo -e '  preferred_batch_size: [ 1 ]' >> config.pbtxt && \
+    echo -e '  default_queue_policy { timeout_action: REJECT \n default_timeout_microseconds: 1000000 \n max_queue_size: 8 }' >> config.pbtxt && \
+    echo -e '}' >> config.pbtxt && \
+    echo -e 'parameters [{ key: "execute_delay_ms" \n value: { string_value: "8000" } }]' >> config.pbtxt)
+
+TEST_LOG="queue_timeout_test.log"
+SERVER_LOG="./queue_timeout_test.server.log"
+
+SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=2"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python queue_timeout_test.py > $TEST_LOG 2>&1
 if [ $? -ne 0 ]; then
-    echo -e "\n***\n*** Test Failed To Run\n***"
+    echo -e "\n***\n*** Queue Timeout Tests Failed\n***"
+    cat $TEST_LOG
     RET=1
 fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
 
 if [ $RET -eq 0 ]; then
     echo -e "\n***\n*** Test Passed\n***"
@@ -132,3 +782,4 @@ else
 fi
 
 exit $RET
+
diff --git a/qa/L0_batcher/verify_timestamps.py b/qa/L0_batcher/verify_timestamps.py
new file mode 100755
index 0000000000..3271135fcd
--- /dev/null
+++ b/qa/L0_batcher/verify_timestamps.py
@@ -0,0 +1,126 @@
+#!/usr/bin/python
+# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import json
+
+FLAGS = None
+
+
+def verify_timestamps(traces, preserve):
+    # Order traces by id
+    traces = sorted(traces, key=lambda t: t.get("id", -1))
+
+    # Filter the trace that is not meaningful and group them by 'id'
+    filtered_traces = dict()
+    grpc_id_offset = 0
+    for trace in traces:
+        if "id" not in trace:
+            continue
+        # Skip GRPC traces as actual traces are not generated via GRPC,
+        # thus GRPC traces are ill-formed
+        if "timestamps" in trace:
+            is_grpc = False
+            for ts in trace["timestamps"]:
+                if "GRPC" in ts["name"]:
+                    is_grpc = True
+                    break
+            if is_grpc:
+                grpc_id_offset += 1
+                continue
+
+        if trace["id"] in filtered_traces.keys():
+            rep_trace = filtered_traces[trace["id"]]
+            # Append the timestamp to the trace representing this 'id'
+            if "timestamps" in trace:
+                rep_trace["timestamps"] += trace["timestamps"]
+        else:
+            # Use this trace to represent this 'id'
+            if "timestamps" not in trace:
+                trace["timestamps"] = []
+            filtered_traces[trace["id"]] = trace
+
+    # First find the latest response complete timestamp for the batch with large delay
+    large_delay_response_complete = 0
+    small_delay_traces = []
+    for trace_id, trace in filtered_traces.items():
+        timestamps = dict()
+        for ts in trace["timestamps"]:
+            timestamps[ts["name"]] = ts["ns"]
+        # Hardcoded delay value here (knowing large delay is 400ms)
+        compute_span = timestamps["COMPUTE_END"] - timestamps["COMPUTE_START"]
+        # If the 3rd batch is also processed by large delay instance, we don't
+        # want to use its responses as baseline
+        if trace["id"] <= (8 + grpc_id_offset) and compute_span >= 400 * 1000 * 1000:
+            response_complete = timestamps["INFER_RESPONSE_COMPLETE"]
+            large_delay_response_complete = max(
+                large_delay_response_complete, response_complete
+            )
+        else:
+            small_delay_traces.append(trace)
+
+    response_request_after_large_delay_count = 0
+    for trace in small_delay_traces:
+        timestamps = dict()
+        for ts in trace["timestamps"]:
+            timestamps[ts["name"]] = ts["ns"]
+        response_complete = timestamps["INFER_RESPONSE_COMPLETE"]
+        if response_complete > large_delay_response_complete:
+            response_request_after_large_delay_count += 1
+
+    # Hardcoded expected count here
+    print(
+        "responses after large delay count: {}".format(
+            response_request_after_large_delay_count
+        )
+    )
+    if preserve:
+        # If preserve ordering, there must be large delay batch followed by
+        # small delay batch and thus at least 4 responses are sent after
+        return 0 if response_request_after_large_delay_count >= 4 else 1
+    else:
+        # If not preserve ordering, the small delay batches should all be done
+        # before large delay batch regardless of the ordering in scheduler
+        return 0 if response_request_after_large_delay_count == 0 else 1
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-p",
+        "--preserve",
+        action="store_true",
+        required=False,
+        default=False,
+        help="Timestamps is collected with preserve ordering",
+    )
+    parser.add_argument("file", type=argparse.FileType("r"), nargs="+")
+    FLAGS = parser.parse_args()
+
+    for f in FLAGS.file:
+        trace_data = json.loads(f.read())
+        exit(verify_timestamps(trace_data, FLAGS.preserve))
diff --git a/qa/L0_buffer_attributes/buffer_attributes_test.py b/qa/L0_buffer_attributes/buffer_attributes_test.py
new file mode 100755
index 0000000000..7d61e082c5
--- /dev/null
+++ b/qa/L0_buffer_attributes/buffer_attributes_test.py
@@ -0,0 +1,122 @@
+#!/usr/bin/env python3
+
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import unittest
+
+import numpy as np
+import test_util as tu
+import tritonclient.grpc as grpcclient
+import tritonclient.http as httpclient
+import tritonclient.utils.cuda_shared_memory as cudashm
+from tritonclient.utils import triton_to_np_dtype
+
+
+class BufferAttributesTest(tu.TestResultCollector):
+    def test_buffer_attributes(self):
+        model_name = "bls"
+
+        # Infer
+        clients = [
+            httpclient.InferenceServerClient(url="localhost:8000"),
+            grpcclient.InferenceServerClient(url="localhost:8001"),
+        ]
+        triton_clients = [httpclient, grpcclient]
+        for i, client in enumerate(clients):
+            # To make sure no shared memory regions are registered with the
+            # server.
+            client.unregister_system_shared_memory()
+            client.unregister_cuda_shared_memory()
+
+            triton_client = triton_clients[i]
+            inputs = []
+            outputs = []
+            inputs.append(triton_client.InferInput("INPUT0", [1, 1000], "INT32"))
+
+            input0_data = np.arange(start=0, stop=1000, dtype=np.int32)
+            input0_data = np.expand_dims(input0_data, axis=0)
+
+            input_byte_size = input0_data.size * input0_data.itemsize
+            output_byte_size = input_byte_size
+
+            shm_ip0_handle = cudashm.create_shared_memory_region(
+                "input0_data", input_byte_size, 0
+            )
+            shm_op0_handle = cudashm.create_shared_memory_region(
+                "output0_data", output_byte_size, 0
+            )
+
+            client.register_cuda_shared_memory(
+                "input0_data",
+                cudashm.get_raw_handle(shm_ip0_handle),
+                0,
+                input_byte_size,
+            )
+            client.register_cuda_shared_memory(
+                "output0_data",
+                cudashm.get_raw_handle(shm_op0_handle),
+                0,
+                input_byte_size,
+            )
+
+            cudashm.set_shared_memory_region(shm_ip0_handle, [input0_data])
+            inputs[0].set_shared_memory("input0_data", input_byte_size)
+
+            if triton_client is grpcclient:
+                outputs.append(triton_client.InferRequestedOutput("OUTPUT0"))
+                outputs[0].set_shared_memory("output0_data", output_byte_size)
+            else:
+                outputs.append(
+                    triton_client.InferRequestedOutput("OUTPUT0", binary_data=True)
+                )
+                outputs[0].set_shared_memory("output0_data", output_byte_size)
+
+            results = client.infer(
+                model_name=model_name, inputs=inputs, outputs=outputs
+            )
+
+            output0 = results.get_output("OUTPUT0")
+            self.assertIsNotNone(output0)
+            if triton_client is grpcclient:
+                output0_data = cudashm.get_contents_as_numpy(
+                    shm_op0_handle, triton_to_np_dtype(output0.datatype), output0.shape
+                )
+            else:
+                output0_data = cudashm.get_contents_as_numpy(
+                    shm_op0_handle,
+                    triton_to_np_dtype(output0["datatype"]),
+                    output0["shape"],
+                )
+            self.assertTrue(np.all(output0_data == input0_data))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_buffer_attributes/models/bls/1/model.py b/qa/L0_buffer_attributes/models/bls/1/model.py
new file mode 100644
index 0000000000..2d3e78e936
--- /dev/null
+++ b/qa/L0_buffer_attributes/models/bls/1/model.py
@@ -0,0 +1,54 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import triton_python_backend_utils as pb_utils
+
+
+# Simple Python model that executes a BLS request on an identity model.
+class TritonPythonModel:
+    def execute(self, requests):
+        responses = []
+        for request in requests:
+            # Get INPUT0
+            input0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+            infer_request = pb_utils.InferenceRequest(
+                model_name="identity",
+                requested_output_names=["OUTPUT0"],
+                inputs=[input0],
+            )
+            infer_response = infer_request.exec()
+
+            if infer_response.has_error():
+                raise pb_utils.TritonModelException(infer_response.error().message())
+
+            inference_response = pb_utils.InferenceResponse(
+                output_tensors=[
+                    pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
+                ]
+            )
+            responses.append(inference_response)
+
+        return responses
diff --git a/qa/L0_buffer_attributes/models/bls/config.pbtxt b/qa/L0_buffer_attributes/models/bls/config.pbtxt
new file mode 100644
index 0000000000..d0d6cd3260
--- /dev/null
+++ b/qa/L0_buffer_attributes/models/bls/config.pbtxt
@@ -0,0 +1,51 @@
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "bls"
+backend: "python"
+max_batch_size: 64
+input [
+ {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 1000 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ 1000 ]
+  }
+]
+instance_group [{ kind: KIND_GPU }]
+
+parameters: {
+  key: "FORCE_CPU_ONLY_INPUT_TENSORS"
+  value: {
+    string_value: "no"
+  }
+}
diff --git a/qa/L0_buffer_attributes/models/identity/1/model.py b/qa/L0_buffer_attributes/models/identity/1/model.py
new file mode 100644
index 0000000000..2d4b592ae3
--- /dev/null
+++ b/qa/L0_buffer_attributes/models/identity/1/model.py
@@ -0,0 +1,42 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    def execute(self, requests):
+        """
+        Identity model using DLPack in Python backend.
+        """
+        responses = []
+        for request in requests:
+            input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+            out_tensor = pb_utils.Tensor.from_dlpack(
+                "OUTPUT0", input_tensor.to_dlpack()
+            )
+            responses.append(pb_utils.InferenceResponse([out_tensor]))
+        return responses
diff --git a/qa/L0_buffer_attributes/models/identity/config.pbtxt b/qa/L0_buffer_attributes/models/identity/config.pbtxt
new file mode 100644
index 0000000000..aa9d63e68c
--- /dev/null
+++ b/qa/L0_buffer_attributes/models/identity/config.pbtxt
@@ -0,0 +1,51 @@
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "identity"
+backend: "python"
+max_batch_size: 64
+input [
+ {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 1000 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ 1000 ]
+  }
+]
+instance_group [{ kind: KIND_GPU }]
+
+parameters: {
+  key: "FORCE_CPU_ONLY_INPUT_TENSORS"
+  value: {
+    string_value: "no"
+  }
+}
diff --git a/qa/L0_buffer_attributes/test.sh b/qa/L0_buffer_attributes/test.sh
new file mode 100755
index 0000000000..7e2f35d837
--- /dev/null
+++ b/qa/L0_buffer_attributes/test.sh
@@ -0,0 +1,86 @@
+#!/bin/bash
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+source ../common/util.sh
+
+RET=0
+
+CLIENT_LOG="./buffer_attributes_client.log"
+TEST_PY=./buffer_attributes_test.py
+EXPECTED_NUM_TESTS="1"
+TEST_RESULT_FILE='test_results.txt'
+
+export CUDA_VISIBLE_DEVICES=0
+
+rm -fr *.log
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=`pwd`/models"
+SERVER_LOG="./inference_server.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python $TEST_PY >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $CLIENT_LOG
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_client_build_variants/test.sh b/qa/L0_client_build_variants/test.sh
new file mode 100755
index 0000000000..c31c55e310
--- /dev/null
+++ b/qa/L0_client_build_variants/test.sh
@@ -0,0 +1,266 @@
+#!/bin/bash
+# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# Install required dependencies for client build
+apt-get update && \
+apt-get install -y --no-install-recommends \
+        rapidjson-dev
+
+# Client build requires recent version of CMake (FetchContent required)
+# Using CMAKE installation instruction from:: https://apt.kitware.com/
+apt update -q=2 \
+    && apt install -y gpg wget \
+    && wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - |  tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null \
+    && . /etc/os-release \
+    && echo "deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ $UBUNTU_CODENAME main" | tee /etc/apt/sources.list.d/kitware.list >/dev/null \
+    && apt-get update -q=2 \
+    && apt-get install -y --no-install-recommends cmake=3.27.7* cmake-data=3.27.7*
+cmake --version
+
+
+set +e
+
+mkdir -p /workspace/build
+
+#
+# Build without GPU support
+#
+TRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION:="http://github.com/triton-inference-server"}
+(cd /workspace/build && \
+        rm -fr cc-clients java-clients python-clients && \
+        cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
+              -DTRITON_ENABLE_CC_HTTP=ON \
+              -DTRITON_ENABLE_CC_GRPC=ON \
+              -DTRITON_ENABLE_PYTHON_HTTP=ON \
+              -DTRITON_ENABLE_PYTHON_GRPC=ON \
+              -DTRITON_ENABLE_JAVA_HTTP=ON \
+              -DTRITON_ENABLE_PERF_ANALYZER=ON \
+              -DTRITON_ENABLE_PERF_ANALYZER_C_API=ON \
+              -DTRITON_ENABLE_PERF_ANALYZER_TFS=OFF \
+              -DTRITON_ENABLE_PERF_ANALYZER_TS=OFF \
+              -DTRITON_ENABLE_EXAMPLES=ON \
+              -DTRITON_ENABLE_TESTS=ON \
+              -DTRITON_ENABLE_GPU=OFF \
+              -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} \
+              -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
+              -DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \
+              -DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \
+              /workspace/client && \
+        make -j16 cc-clients java-clients python-clients)
+if [ $? -eq 0 ]; then
+    echo -e "\n***\n*** No-GPU Passed\n***"
+else
+    echo -e "\n***\n*** No-GPU FAILED\n***"
+    exit 1
+fi
+
+#
+# Build without HTTP
+# Skip this test for java-clients because we can only build
+# java-clients with http protocol
+#
+(cd /workspace/build && \
+        rm -fr cc-clients python-clients && \
+        cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
+              -DTRITON_ENABLE_CC_HTTP=OFF \
+              -DTRITON_ENABLE_CC_GRPC=ON \
+              -DTRITON_ENABLE_PYTHON_HTTP=OFF \
+              -DTRITON_ENABLE_PYTHON_GRPC=ON \
+              -DTRITON_ENABLE_PERF_ANALYZER=ON \
+              -DTRITON_ENABLE_PERF_ANALYZER_C_API=ON \
+              -DTRITON_ENABLE_PERF_ANALYZER_TFS=OFF \
+              -DTRITON_ENABLE_PERF_ANALYZER_TS=OFF \
+              -DTRITON_ENABLE_EXAMPLES=ON \
+              -DTRITON_ENABLE_TESTS=ON \
+              -DTRITON_ENABLE_GPU=ON \
+              -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} \
+              -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
+              -DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \
+              -DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \
+              /workspace/client && \
+        make -j16 cc-clients python-clients)
+if [ $? -eq 0 ]; then
+    echo -e "\n***\n*** No-HTTP Passed\n***"
+else
+    echo -e "\n***\n*** No-HTTP FAILED\n***"
+    exit 1
+fi
+
+#
+# Build without GRPC
+# Skip this test for java-clients because grpc protocol is not supported
+#
+(cd /workspace/build && \
+        rm -fr cc-clients python-clients && \
+        cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
+              -DTRITON_ENABLE_CC_HTTP=ON \
+              -DTRITON_ENABLE_CC_GRPC=OFF \
+              -DTRITON_ENABLE_PYTHON_HTTP=ON \
+              -DTRITON_ENABLE_PYTHON_GRPC=OFF \
+              -DTRITON_ENABLE_PERF_ANALYZER=ON \
+              -DTRITON_ENABLE_PERF_ANALYZER_C_API=ON \
+              -DTRITON_ENABLE_PERF_ANALYZER_TFS=OFF \
+              -DTRITON_ENABLE_PERF_ANALYZER_TS=OFF \
+              -DTRITON_ENABLE_EXAMPLES=ON \
+              -DTRITON_ENABLE_TESTS=ON \
+              -DTRITON_ENABLE_GPU=ON \
+              -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} \
+              -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
+              -DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \
+              -DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \
+              /workspace/client && \
+        make -j16 cc-clients python-clients)
+if [ $? -eq 0 ]; then
+    echo -e "\n***\n*** No-GRPC Passed\n***"
+else
+    echo -e "\n***\n*** No-GRPC FAILED\n***"
+    exit 1
+fi
+
+#
+# Build without Perf Analyzer
+#
+(cd /workspace/build && \
+        rm -fr cc-clients python-clients && \
+        cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
+              -DTRITON_ENABLE_CC_HTTP=ON \
+              -DTRITON_ENABLE_CC_GRPC=ON \
+              -DTRITON_ENABLE_PYTHON_HTTP=ON \
+              -DTRITON_ENABLE_PYTHON_GRPC=ON \
+              -DTRITON_ENABLE_PERF_ANALYZER=OFF \
+              -DTRITON_ENABLE_PERF_ANALYZER_C_API=OFF \
+              -DTRITON_ENABLE_PERF_ANALYZER_TFS=OFF \
+              -DTRITON_ENABLE_PERF_ANALYZER_TS=OFF \
+              -DTRITON_ENABLE_EXAMPLES=ON \
+              -DTRITON_ENABLE_TESTS=ON \
+              -DTRITON_ENABLE_GPU=ON \
+              -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} \
+              -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
+              -DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \
+              -DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \
+              /workspace/client && \
+        make -j16 cc-clients python-clients)
+if [ $? -eq 0 ]; then
+    echo -e "\n***\n*** No-Perf-Analyzer Passed\n***"
+else
+    echo -e "\n***\n*** No-Perf-Analyzer FAILED\n***"
+    exit 1
+fi
+
+#
+# Build without C API in Perf Analyzer
+#
+(cd /workspace/build && \
+        rm -fr cc-clients python-clients && \
+        cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
+              -DTRITON_ENABLE_CC_HTTP=ON \
+              -DTRITON_ENABLE_CC_GRPC=ON \
+              -DTRITON_ENABLE_PYTHON_HTTP=ON \
+              -DTRITON_ENABLE_PYTHON_GRPC=ON \
+              -DTRITON_ENABLE_PERF_ANALYZER=ON \
+              -DTRITON_ENABLE_PERF_ANALYZER_C_API=OFF \
+              -DTRITON_ENABLE_PERF_ANALYZER_TFS=ON \
+              -DTRITON_ENABLE_PERF_ANALYZER_TS=ON \
+              -DTRITON_ENABLE_EXAMPLES=ON \
+              -DTRITON_ENABLE_TESTS=ON \
+              -DTRITON_ENABLE_GPU=ON \
+              -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} \
+              -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
+              -DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \
+              -DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \
+              /workspace/client && \
+        make -j16 cc-clients python-clients)
+if [ $? -eq 0 ]; then
+    echo -e "\n***\n*** No-CAPI Passed\n***"
+else
+    echo -e "\n***\n*** No-CAPI FAILED\n***"
+    exit 1
+fi
+
+#
+# Build without TensorFlow Serving in Perf Analyzer
+#
+(cd /workspace/build && \
+        rm -fr cc-clients python-clients && \
+        cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
+              -DTRITON_ENABLE_CC_HTTP=ON \
+              -DTRITON_ENABLE_CC_GRPC=ON \
+              -DTRITON_ENABLE_PYTHON_HTTP=ON \
+              -DTRITON_ENABLE_PYTHON_GRPC=ON \
+              -DTRITON_ENABLE_PERF_ANALYZER=ON \
+              -DTRITON_ENABLE_PERF_ANALYZER_C_API=ON \
+              -DTRITON_ENABLE_PERF_ANALYZER_TFS=OFF \
+              -DTRITON_ENABLE_PERF_ANALYZER_TS=ON \
+              -DTRITON_ENABLE_EXAMPLES=ON \
+              -DTRITON_ENABLE_TESTS=ON \
+              -DTRITON_ENABLE_GPU=ON \
+              -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} \
+              -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
+              -DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \
+              -DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \
+              /workspace/client && \
+        make -j16 cc-clients python-clients)
+if [ $? -eq 0 ]; then
+    echo -e "\n***\n*** No-TF-Serving Passed\n***"
+else
+    echo -e "\n***\n*** No-TF-Serving FAILED\n***"
+    exit 1
+fi
+
+#
+# Build without TorchServe in Perf Analyzer
+#
+(cd /workspace/build && \
+        rm -fr cc-clients python-clients && \
+        cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
+              -DTRITON_ENABLE_CC_HTTP=ON \
+              -DTRITON_ENABLE_CC_GRPC=ON \
+              -DTRITON_ENABLE_PYTHON_HTTP=ON \
+              -DTRITON_ENABLE_PYTHON_GRPC=ON \
+              -DTRITON_ENABLE_PERF_ANALYZER=ON \
+              -DTRITON_ENABLE_PERF_ANALYZER_C_API=ON \
+              -DTRITON_ENABLE_PERF_ANALYZER_TFS=ON \
+              -DTRITON_ENABLE_PERF_ANALYZER_TS=OFF \
+              -DTRITON_ENABLE_EXAMPLES=ON \
+              -DTRITON_ENABLE_TESTS=ON \
+              -DTRITON_ENABLE_GPU=ON \
+              -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} \
+              -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
+              -DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \
+              -DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \
+              /workspace/client && \
+        make -j16 cc-clients python-clients)
+if [ $? -eq 0 ]; then
+    echo -e "\n***\n*** No-TorchServe Passed\n***"
+else
+    echo -e "\n***\n*** No-TorchServe FAILED\n***"
+    exit 1
+fi
+
+set -e
+
+echo -e "\n***\n*** Test Passed\n***"
diff --git a/qa/L0_client_java/test.sh b/qa/L0_client_java/test.sh
new file mode 100755
index 0000000000..0300ff1bce
--- /dev/null
+++ b/qa/L0_client_java/test.sh
@@ -0,0 +1,120 @@
+#!/bin/bash
+# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+TRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION:=http://github.com/triton-inference-server}
+TRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG:="main"}
+
+RET=0
+
+rm -f *.log.*
+
+# Get the proto files from the common repo
+rm -fr common
+git clone --single-branch --depth=1 -b $TRITON_COMMON_REPO_TAG \
+    ${TRITON_REPO_ORGANIZATION}/common.git
+cp common/protobuf/*.proto java/library/src/main/proto/.
+
+# Compile library
+(cd java/library && \
+    mvn compile && \
+    cp -R target/generated-sources/protobuf/java/inference ../examples/src/main/java/inference && \
+    cp -r target/generated-sources/protobuf/grpc-java/inference/*.java ../examples/src/main/java/inference/)
+
+# Build simple java and scala client example
+(cd java/examples && mvn clean install)
+
+CLIENT_LOG=`pwd`/client.log
+DATADIR=`pwd`/models
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=$DATADIR"
+source ../common/util.sh
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+pushd java/examples
+
+# Test grpc_generated simple java client example
+mvn exec:java -Dexec.mainClass=clients.SimpleJavaClient -Dexec.args="localhost 8001" >> ${CLIENT_LOG}.java 2>&1
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}.java
+    RET=1
+fi
+
+# Test grpc_generated simple scala client example
+mvn exec:java -Dexec.mainClass=clients.SimpleClient -Dexec.args="localhost 8001" >> ${CLIENT_LOG}.scala 2>&1
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}.scala
+    RET=1
+fi
+
+popd
+
+# Test simple infer java client
+SIMPLE_INFER_JAVA_CLIENT=../clients/SimpleInferClient.jar
+
+pushd ../clients
+
+java -jar ${SIMPLE_INFER_JAVA_CLIENT} >> ${CLIENT_LOG}.simple_infer_java 2>&1
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}.simple_infer_java
+    RET=1
+fi
+
+popd
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_client_memory_growth/client_memory_mail.py b/qa/L0_client_memory_growth/client_memory_mail.py
new file mode 100755
index 0000000000..ef1703f2c3
--- /dev/null
+++ b/qa/L0_client_memory_growth/client_memory_mail.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python
+# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import glob
+from datetime import date
+
+import nightly_email_helper
+
+if __name__ == "__main__":
+    today = date.today().strftime("%Y-%m-%d")
+    subject = "Triton Client Memory Growth " + sys.argv[1] + " Summary: " + today
+    memory_graphs = glob.glob("client_memory_growth*.log")
+    write_up = "<p>This test is run for both HTTP and GRPC protocols using C++ and Python test scripts. The max-allowed difference between mean and maximum memory usage is set to 10MB and 1MB for C++ and Python tests individually.</p>"
+    write_up += "<p><b>&#8226 What to look for</b><br>A linear memory growth in the beginning of the graph is acceptable only when it is followed by a flat memory usage. If a linear memory growth is observed during the entire test then there is possibly a memory leak.</p>"
+    html_content = (
+        '<html><head></head><body><pre style="font-size:11pt;font-family:Arial, sans-serif;">'
+        + write_up
+        + '</pre><pre style="font-size:11pt;font-family:Consolas;">'
+    )
+    for mem_graph in sorted(memory_graphs):
+        html_content += "\n" + mem_graph + "\n"
+        with open(mem_graph, "r") as f:
+            html_content += f.read() + "\n"
+    html_content += "</pre></body></html>"
+    nightly_email_helper.send(subject, html_content, is_html=True)
diff --git a/qa/L0_client_memory_growth/models/custom_identity_int32/config.pbtxt b/qa/L0_client_memory_growth/models/custom_identity_int32/config.pbtxt
new file mode 100644
index 0000000000..6a2a76bde5
--- /dev/null
+++ b/qa/L0_client_memory_growth/models/custom_identity_int32/config.pbtxt
@@ -0,0 +1,47 @@
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "custom_identity_int32"
+backend: "identity"
+max_batch_size: 1024
+version_policy: { latest { num_versions: 1 }}
+instance_group [ { kind: KIND_CPU } ]
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
\ No newline at end of file
diff --git a/qa/L0_client_memory_growth/test.sh b/qa/L0_client_memory_growth/test.sh
new file mode 100755
index 0000000000..73188812b2
--- /dev/null
+++ b/qa/L0_client_memory_growth/test.sh
@@ -0,0 +1,186 @@
+#!/bin/bash
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+# Must run on a single device or else the TRITONSERVER_DELAY_SCHEDULER
+# can fail when the requests are distributed to multiple devices.
+export CUDA_VISIBLE_DEVICES=0
+
+LEAKCHECK=/usr/bin/valgrind
+LEAKCHECK_ARGS_BASE="--max-threads=3000 --tool=massif --time-unit=B"
+SERVER_TIMEOUT=3600
+rm -f *.log *.massif
+
+MEMORY_GROWTH_TEST_CPP=../clients/memory_leak_test
+MEMORY_GROWTH_TEST_PY=../clients/memory_growth_test.py
+MASSIF_TEST=../common/check_massif_log.py
+
+DATADIR=`pwd`/models
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=$DATADIR"
+source ../common/util.sh
+
+# Set the number of repetitions in nightly and weekly tests
+# Set the email subject for nightly and weekly tests
+if [ "$TRITON_PERF_WEEKLY" == 1 ]; then
+    if [ "$TRITON_PERF_LONG" == 1 ]; then
+        # ~ 12 hours
+        # GRPC cycles are reduced as there is high fluctuation in time spent
+        REPETITION_HTTP_CPP=2220000
+        REPETITION_HTTP_PY=3600000
+        REPETITION_GRPC_CPP=8000000
+        REPETITION_GRPC_PY=1500000
+        EMAIL_SUBJECT="Weekly Long"
+    else
+        # Run the test for each case approximately 1.5 hours
+        # All tests are run cumulatively for 7 hours
+        REPETITION_HTTP_CPP=1300000
+        REPETITION_HTTP_PY=2100000
+        REPETITION_GRPC_CPP=6600000
+        REPETITION_GRPC_PY=1000000
+        EMAIL_SUBJECT="Weekly"
+    fi
+else
+    REPETITION_CPP=100000
+    REPETITION_PY=10000
+    EMAIL_SUBJECT="Nightly"
+fi
+
+mkdir -p $DATADIR/custom_identity_int32/1
+
+RET=0
+
+# Run test for both HTTP and GRPC, not re-using client object.
+for PROTOCOL in http grpc; do
+    for LANG in c++ python; do
+        LEAKCHECK_LOG="./valgrind.${PROTOCOL}.${LANG}.log"
+        CLIENT_LOG="./client.${PROTOCOL}.${LANG}.log"
+        GRAPH_LOG="./client_memory_growth.${PROTOCOL}.${LANG}.log"
+        MASSIF_LOG="./${PROTOCOL}.${LANG}.massif"
+        LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --log-file=$LEAKCHECK_LOG --massif-out-file=$MASSIF_LOG"
+
+        if [ "$TRITON_PERF_WEEKLY" == 1 ]; then
+            if [ $PROTOCOL ==  http ]; then
+                REPETITION_CPP=$REPETITION_HTTP_CPP
+                REPETITION_PY=$REPETITION_HTTP_PY
+            else
+                REPETITION_CPP=$REPETITION_GRPC_CPP
+                REPETITION_PY=$REPETITION_GRPC_PY
+            fi
+        fi
+
+        run_server
+        if [ "$SERVER_PID" == "0" ]; then
+            echo -e "\n***\n*** Failed to start $SERVER\n***"
+            cat $SERVER_LOG
+            exit 1
+        fi
+
+        # MAX_ALLOWED_ALLOC is the threshold memory growth in MB
+        if [ "$LANG" == "c++" ]; then
+            MEMORY_GROWTH_TEST=$MEMORY_GROWTH_TEST_CPP
+            MAX_ALLOWED_ALLOC="10"
+            # NOTE: This test has risk of exhausting all available sockets in
+            # the ephemeral port range. Re-using the same client connection
+            # ("-R") can easily solve this problem. However, to cleanly separate
+            # the resources used by different client objects, we create new
+            # connections for each request and retry/sleep on failure to give
+            # the system time to reclaim sockets after TIME_WAIT.
+            # TIP: You can use the "ss -s" command to observe the socket usage.
+            EXTRA_ARGS="-r ${REPETITION_CPP} -i ${PROTOCOL}"
+        else
+            MEMORY_GROWTH_TEST="python $MEMORY_GROWTH_TEST_PY"
+            MAX_ALLOWED_ALLOC="1"
+            EXTRA_ARGS="-r ${REPETITION_PY} -i ${PROTOCOL}"
+        fi
+
+        set +e
+        SECONDS=0
+        $LEAKCHECK $LEAKCHECK_ARGS $MEMORY_GROWTH_TEST $EXTRA_ARGS >> ${CLIENT_LOG} 2>&1
+        TEST_RETCODE=$?
+        TEST_DURATION=$SECONDS
+        set -e
+        if [ ${TEST_RETCODE} -ne 0 ]; then
+            cat ${CLIENT_LOG}
+            RET=1
+            echo -e "\n***\n*** Test FAILED\n***"
+        else
+            python3 ../common/check_valgrind_log.py -f $LEAKCHECK_LOG
+            if [ $? -ne 0 ]; then
+                echo -e "\n***\n*** Memory leak detected\n***"
+                RET=1
+            fi
+
+            set +e
+            # Check for memory growth
+            python $MASSIF_TEST $MASSIF_LOG $MAX_ALLOWED_ALLOC >> ${CLIENT_LOG}.massif 2>&1
+            if [ $? -ne 0 ]; then
+                echo -e "\n***\n*** Massif Test for ${PROTOCOL} ${LANG} Failed\n***"
+                RET=1
+            fi
+
+            # Log test duration, the graph for memory growth and the change between Average and Max memory usage
+            hrs=$(printf "%02d" $((TEST_DURATION / 3600)))
+            mins=$(printf "%02d" $(((TEST_DURATION / 60) % 60)))
+            secs=$(printf "%02d" $((TEST_DURATION % 60)))
+            echo -e "Test Duration: $hrs:$mins:$secs (HH:MM:SS)" >> ${GRAPH_LOG}
+            cat ${CLIENT_LOG}.massif
+            ms_print ${MASSIF_LOG} | head -n35 >> ${GRAPH_LOG}
+            cat ${GRAPH_LOG}
+            set -e
+        fi
+
+        # Stop Server
+        kill $SERVER_PID
+        wait $SERVER_PID
+    done
+done
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+# Run only if both TRITON_FROM and TRITON_TO_DL are set
+if [[ ! -z "$TRITON_FROM" ]] && [[ ! -z "$TRITON_TO_DL" ]]; then
+    python client_memory_mail.py "$EMAIL_SUBJECT"
+fi
+
+exit $RET
diff --git a/qa/L0_client_nobatch/client_test.py b/qa/L0_client_nobatch/client_test.py
old mode 100644
new mode 100755
index 5faeaae604..c821d446d2
--- a/qa/L0_client_nobatch/client_test.py
+++ b/qa/L0_client_nobatch/client_test.py
@@ -1,4 +1,6 @@
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+#!/usr/bin/env python3
+
+# Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -25,101 +27,216 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import sys
+
 sys.path.append("../common")
 
-from builtins import range
-from future.utils import iteritems
 import unittest
+
 import numpy as np
-from tensorrtserver.api import *
 import test_util as tu
+import tritongrpcclient
+import tritonhttpclient
+from tritonclientutils import InferenceServerException
+
 
-class ClientNoBatchTest(unittest.TestCase):
-    def test_bs0_request_for_batching_model(self):
+class ClientNoBatchTest(tu.TestResultCollector):
+    def test_nobatch_request_for_batching_model(self):
         input_size = 16
-        tensor_shape = (input_size,)
 
-        # graphdef_int32_int8_int8 has a batching version. If we make
-        # a batch-size 0 request for that model we still allow it
-        # (treated as batch-size 1).
-        for protocol, url in ((ProtocolType.HTTP, 'localhost:8000'),
-                              (ProtocolType.GRPC, 'localhost:8001')):
+        # graphdef_int32_int8_int8 has a batching version with max batch size of 8.
+        # The server should return an error if the batch size is not included in the
+        # input shapes.
+        tensor_shape = (input_size,)
+        for protocol in ["http", "grpc"]:
             model_name = tu.get_model_name("graphdef", np.int32, np.int8, np.int8)
             in0 = np.random.randint(low=0, high=100, size=tensor_shape, dtype=np.int32)
             in1 = np.random.randint(low=0, high=100, size=tensor_shape, dtype=np.int32)
 
-            ctx = InferContext(url, protocol, model_name, None, True)
-            results = ctx.run({ 'INPUT0' : (in0,),
-                                'INPUT1' : (in1,) },
-                              { 'OUTPUT0' : InferContext.ResultFormat.RAW,
-                                'OUTPUT1' : InferContext.ResultFormat.RAW },
-                              0)
+            inputs = []
+            outputs = []
+            if protocol == "http":
+                triton_client = tritonhttpclient.InferenceServerClient(
+                    url="localhost:8000", verbose=True
+                )
+                inputs.append(
+                    tritonhttpclient.InferInput("INPUT0", tensor_shape, "INT32")
+                )
+                inputs.append(
+                    tritonhttpclient.InferInput("INPUT1", tensor_shape, "INT32")
+                )
+                outputs.append(tritonhttpclient.InferRequestedOutput("OUTPUT0"))
+                outputs.append(tritonhttpclient.InferRequestedOutput("OUTPUT1"))
+            else:
+                triton_client = tritongrpcclient.InferenceServerClient(
+                    url="localhost:8001", verbose=True
+                )
+                inputs.append(
+                    tritongrpcclient.InferInput("INPUT0", tensor_shape, "INT32")
+                )
+                inputs.append(
+                    tritongrpcclient.InferInput("INPUT1", tensor_shape, "INT32")
+                )
+                outputs.append(tritongrpcclient.InferRequestedOutput("OUTPUT0"))
+                outputs.append(tritongrpcclient.InferRequestedOutput("OUTPUT1"))
+
+            # Initialize the data
+            inputs[0].set_data_from_numpy(in0)
+            inputs[1].set_data_from_numpy(in1)
 
-    def test_bs0_request_for_non_batching_model(self):
+            try:
+                _ = triton_client.infer(model_name, inputs, outputs=outputs)
+                self.assertTrue(
+                    False, "expected failure with no batch request for batching model"
+                )
+            except InferenceServerException as ex:
+                pass
+
+    def test_batch_request_for_nobatching_model(self):
         input_size = 16
-        tensor_shape = (input_size,)
 
-        # graphdef_int32_int8_int8 has a non-batching version. If we
-        # make a batch-size zero request for that model it should
-        # pass.
-        for protocol, url in ((ProtocolType.HTTP, 'localhost:8000'),
-                              (ProtocolType.GRPC, 'localhost:8001')):
-            model_name = tu.get_model_name("graphdef_nobatch", np.int32, np.int8, np.int8)
+        # graphdef_nobatch_int32_int8_int8 is non batching version.
+        # The server should return an error if the batch size dimension
+        # is included in the shape
+        tensor_shape = (1, input_size)
+        for protocol in ["http", "grpc"]:
+            model_name = tu.get_model_name(
+                "graphdef_nobatch", np.int32, np.int8, np.int8
+            )
             in0 = np.random.randint(low=0, high=100, size=tensor_shape, dtype=np.int32)
             in1 = np.random.randint(low=0, high=100, size=tensor_shape, dtype=np.int32)
 
-            ctx = InferContext(url, protocol, model_name, None, True)
-            results = ctx.run({ 'INPUT0' : (in0,),
-                                'INPUT1' : (in1,) },
-                              { 'OUTPUT0' : InferContext.ResultFormat.RAW,
-                                'OUTPUT1' : InferContext.ResultFormat.RAW },
-                              0)
+            inputs = []
+            outputs = []
+            if protocol == "http":
+                triton_client = tritonhttpclient.InferenceServerClient(
+                    url="localhost:8000", verbose=True
+                )
+                inputs.append(
+                    tritonhttpclient.InferInput("INPUT0", tensor_shape, "INT32")
+                )
+                inputs.append(
+                    tritonhttpclient.InferInput("INPUT1", tensor_shape, "INT32")
+                )
+                outputs.append(tritonhttpclient.InferRequestedOutput("OUTPUT0"))
+                outputs.append(tritonhttpclient.InferRequestedOutput("OUTPUT1"))
+            else:
+                triton_client = tritongrpcclient.InferenceServerClient(
+                    url="localhost:8001", verbose=True
+                )
+                inputs.append(
+                    tritongrpcclient.InferInput("INPUT0", tensor_shape, "INT32")
+                )
+                inputs.append(
+                    tritongrpcclient.InferInput("INPUT1", tensor_shape, "INT32")
+                )
+                outputs.append(tritongrpcclient.InferRequestedOutput("OUTPUT0"))
+                outputs.append(tritongrpcclient.InferRequestedOutput("OUTPUT1"))
+
+            # Initialize the data
+            inputs[0].set_data_from_numpy(in0)
+            inputs[1].set_data_from_numpy(in1)
+
+            try:
+                _ = triton_client.infer(model_name, inputs, outputs=outputs)
+                self.assertTrue(
+                    False,
+                    "expected failure with batched request for non-batching model",
+                )
+            except InferenceServerException as ex:
+                pass
 
-    def test_bs1_request_for_non_batching_model(self):
+    def test_nobatch_request_for_nonbatching_model(self):
         input_size = 16
-        tensor_shape = (input_size,)
 
-        # graphdef_int32_int8_int8 has a non-batching version. If we
-        # make a batch-size one request for that model it should
-        # pass.
-        for protocol, url in ((ProtocolType.HTTP, 'localhost:8000'),
-                              (ProtocolType.GRPC, 'localhost:8001')):
-            model_name = tu.get_model_name("graphdef_nobatch", np.int32, np.int8, np.int8)
+        # graphdef_int32_int8_int8 has a batching version with max batch size of 8.
+        # The server should return an error if the batch size is not included in the
+        # input shapes.
+        tensor_shape = (input_size,)
+        for protocol in ["http", "grpc"]:
+            model_name = tu.get_model_name(
+                "graphdef_nobatch", np.int32, np.int8, np.int8
+            )
             in0 = np.random.randint(low=0, high=100, size=tensor_shape, dtype=np.int32)
             in1 = np.random.randint(low=0, high=100, size=tensor_shape, dtype=np.int32)
 
-            ctx = InferContext(url, protocol, model_name, None, True)
-            results = ctx.run({ 'INPUT0' : (in0,),
-                                'INPUT1' : (in1,) },
-                              { 'OUTPUT0' : InferContext.ResultFormat.RAW,
-                                'OUTPUT1' : InferContext.ResultFormat.RAW },
-                              1)
-
-    def test_bs2_request_for_non_batching_model(self):
+            inputs = []
+            outputs = []
+            if protocol == "http":
+                triton_client = tritonhttpclient.InferenceServerClient(
+                    url="localhost:8000", verbose=True
+                )
+                inputs.append(
+                    tritonhttpclient.InferInput("INPUT0", tensor_shape, "INT32")
+                )
+                inputs.append(
+                    tritonhttpclient.InferInput("INPUT1", tensor_shape, "INT32")
+                )
+                outputs.append(tritonhttpclient.InferRequestedOutput("OUTPUT0"))
+                outputs.append(tritonhttpclient.InferRequestedOutput("OUTPUT1"))
+            else:
+                triton_client = tritongrpcclient.InferenceServerClient(
+                    url="localhost:8001", verbose=True
+                )
+                inputs.append(
+                    tritongrpcclient.InferInput("INPUT0", tensor_shape, "INT32")
+                )
+                inputs.append(
+                    tritongrpcclient.InferInput("INPUT1", tensor_shape, "INT32")
+                )
+                outputs.append(tritongrpcclient.InferRequestedOutput("OUTPUT0"))
+                outputs.append(tritongrpcclient.InferRequestedOutput("OUTPUT1"))
+
+            # Initialize the data
+            inputs[0].set_data_from_numpy(in0)
+            inputs[1].set_data_from_numpy(in1)
+
+            results = triton_client.infer(model_name, inputs, outputs=outputs)
+
+    def test_batch_request_for_batching_model(self):
         input_size = 16
-        tensor_shape = (input_size,)
 
-        # graphdef_int32_int8_int8 has a non-batching version. If we
-        # make a batch-size two (or greater) request for that model it
-        # should fail.
-        for protocol, url in ((ProtocolType.HTTP, 'localhost:8000'),
-                              (ProtocolType.GRPC, 'localhost:8001')):
-            model_name = tu.get_model_name("graphdef_nobatch", np.int32, np.int8, np.int8)
+        # graphdef_nobatch_int32_int8_int8 is non batching version.
+        # The server should return an error if the batch size dimension
+        # is included in the shape
+        tensor_shape = (1, input_size)
+        for protocol in ["http", "grpc"]:
+            model_name = tu.get_model_name("graphdef", np.int32, np.int8, np.int8)
             in0 = np.random.randint(low=0, high=100, size=tensor_shape, dtype=np.int32)
             in1 = np.random.randint(low=0, high=100, size=tensor_shape, dtype=np.int32)
 
-            try:
-                ctx = InferContext(url, protocol, model_name, None, True)
-                results = ctx.run({ 'INPUT0' : (in0,),
-                                    'INPUT1' : (in1,) },
-                                  { 'OUTPUT0' : InferContext.ResultFormat.RAW,
-                                    'OUTPUT1' : InferContext.ResultFormat.RAW },
-                                  2)
-                self.assertTrue(False, "expected failure with batch-size 2 for non-batching model")
-
-            except InferenceServerException as ex:
-                pass
-
-
-if __name__ == '__main__':
+            inputs = []
+            outputs = []
+            if protocol == "http":
+                triton_client = tritonhttpclient.InferenceServerClient(
+                    url="localhost:8000", verbose=True
+                )
+                inputs.append(
+                    tritonhttpclient.InferInput("INPUT0", tensor_shape, "INT32")
+                )
+                inputs.append(
+                    tritonhttpclient.InferInput("INPUT1", tensor_shape, "INT32")
+                )
+                outputs.append(tritonhttpclient.InferRequestedOutput("OUTPUT0"))
+                outputs.append(tritonhttpclient.InferRequestedOutput("OUTPUT1"))
+            else:
+                triton_client = tritongrpcclient.InferenceServerClient(
+                    url="localhost:8001", verbose=True
+                )
+                inputs.append(
+                    tritongrpcclient.InferInput("INPUT0", tensor_shape, "INT32")
+                )
+                inputs.append(
+                    tritongrpcclient.InferInput("INPUT1", tensor_shape, "INT32")
+                )
+                outputs.append(tritongrpcclient.InferRequestedOutput("OUTPUT0"))
+                outputs.append(tritongrpcclient.InferRequestedOutput("OUTPUT1"))
+
+            # Initialize the data
+            inputs[0].set_data_from_numpy(in0)
+            inputs[1].set_data_from_numpy(in1)
+
+            results = triton_client.infer(model_name, inputs, outputs=outputs)
+
+
+if __name__ == "__main__":
     unittest.main()
diff --git a/qa/L0_client_nobatch/test.sh b/qa/L0_client_nobatch/test.sh
index 9f93030a4f..58b1b3dc58 100755
--- a/qa/L0_client_nobatch/test.sh
+++ b/qa/L0_client_nobatch/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -25,13 +25,30 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+TEST_RESULT_FILE='test_results.txt'
 CLIENT_LOG="./client.log"
 CLIENT_TEST=client_test.py
+EXPECTED_NUM_TESTS="4"
 
-DATADIR=/data/inferenceserver
+DATADIR=/data/inferenceserver/${REPO_VERSION}
 
-SERVER=/opt/tensorrtserver/bin/trtserver
-SERVER_ARGS=--model-store=$DATADIR/qa_model_repository
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=$DATADIR/qa_model_repository"
 SERVER_LOG="./inference_server.log"
 source ../common/util.sh
 
@@ -55,13 +72,13 @@ if [ $? -ne 0 ]; then
     cat $CLIENT_LOG
     echo -e "\n***\n*** Test Failed\n***"
     RET=1
-fi
-
-grep -c "HTTP/1.1 200 OK" $CLIENT_LOG
-if [ $? -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed To Run\n***"
-    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
 fi
 
 set -e
diff --git a/qa/L0_client_timeout/client_infer_timeout_test.py b/qa/L0_client_timeout/client_infer_timeout_test.py
new file mode 100755
index 0000000000..700e9bfe9b
--- /dev/null
+++ b/qa/L0_client_timeout/client_infer_timeout_test.py
@@ -0,0 +1,248 @@
+#!/usr/bin/env python3
+
+# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import queue
+import socket
+import unittest
+from functools import partial
+
+import numpy as np
+import test_util as tu
+import tritonclient.grpc as grpcclient
+import tritonclient.http as httpclient
+from tritonclient.utils import InferenceServerException
+
+
+class UserData:
+    def __init__(self):
+        self._completed_requests = queue.Queue()
+
+
+def callback(user_data, result, error):
+    if error:
+        user_data._completed_requests.put(error)
+    else:
+        user_data._completed_requests.put(result)
+
+
+class ClientInferTimeoutTest(tu.TestResultCollector):
+    def setUp(self):
+        self.model_name_ = "custom_identity_int32"
+        self.input0_data_ = np.array([[10]], dtype=np.int32)
+        self.input0_data_byte_size_ = 32
+        self.INFER_SMALL_INTERVAL = 2.0  # seconds for a timeout
+
+    def _prepare_request(self, protocol):
+        if protocol == "grpc":
+            self.inputs_ = []
+            self.inputs_.append(grpcclient.InferInput("INPUT0", [1, 1], "INT32"))
+            self.outputs_ = []
+            self.outputs_.append(grpcclient.InferRequestedOutput("OUTPUT0"))
+        else:
+            self.inputs_ = []
+            self.inputs_.append(httpclient.InferInput("INPUT0", [1, 1], "INT32"))
+            self.outputs_ = []
+            self.outputs_.append(httpclient.InferRequestedOutput("OUTPUT0"))
+
+        self.inputs_[0].set_data_from_numpy(self.input0_data_)
+
+    def test_grpc_infer(self):
+        triton_client = grpcclient.InferenceServerClient(
+            url="localhost:8001", verbose=True
+        )
+        self._prepare_request("grpc")
+
+        # The model is configured to take three seconds to send the
+        # response. Expect an exception for small timeout values.
+        with self.assertRaises(InferenceServerException) as cm:
+            _ = triton_client.infer(
+                model_name=self.model_name_,
+                inputs=self.inputs_,
+                outputs=self.outputs_,
+                client_timeout=0.2,
+            )
+        self.assertIn("Deadline Exceeded", str(cm.exception))
+
+        # Expect inference to pass successfully for a large timeout
+        # value
+        result = triton_client.infer(
+            model_name=self.model_name_,
+            inputs=self.inputs_,
+            outputs=self.outputs_,
+            client_timeout=10,
+        )
+
+        output0_data = result.as_numpy("OUTPUT0")
+        self.assertTrue(np.array_equal(self.input0_data_, output0_data))
+
+    def test_grpc_async_infer(self):
+        triton_client = grpcclient.InferenceServerClient(
+            url="localhost:8001", verbose=True
+        )
+        self._prepare_request("grpc")
+
+        user_data = UserData()
+
+        # The model is configured to take three seconds to send the
+        # response. Expect an exception for small timeout values.
+        with self.assertRaises(InferenceServerException) as cm:
+            triton_client.async_infer(
+                model_name=self.model_name_,
+                inputs=self.inputs_,
+                callback=partial(callback, user_data),
+                outputs=self.outputs_,
+                client_timeout=self.INFER_SMALL_INTERVAL,
+            )
+            data_item = user_data._completed_requests.get()
+            if type(data_item) == InferenceServerException:
+                raise data_item
+        self.assertIn("Deadline Exceeded", str(cm.exception))
+
+        # Expect inference to pass successfully for a large timeout
+        # value
+        triton_client.async_infer(
+            model_name=self.model_name_,
+            inputs=self.inputs_,
+            callback=partial(callback, user_data),
+            outputs=self.outputs_,
+            client_timeout=10,
+        )
+
+        # Wait until the results are available in user_data
+        data_item = user_data._completed_requests.get()
+        self.assertFalse(type(data_item) == InferenceServerException)
+
+        output0_data = data_item.as_numpy("OUTPUT0")
+        self.assertTrue(np.array_equal(self.input0_data_, output0_data))
+
+    def test_grpc_stream_infer(self):
+        triton_client = grpcclient.InferenceServerClient(
+            url="localhost:8001", verbose=True
+        )
+
+        self._prepare_request("grpc")
+        user_data = UserData()
+
+        # The model is configured to take three seconds to send the
+        # response. Expect an exception for small timeout values.
+        with self.assertRaises(InferenceServerException) as cm:
+            triton_client.stop_stream()
+            triton_client.start_stream(
+                callback=partial(callback, user_data), stream_timeout=1
+            )
+            triton_client.async_stream_infer(
+                model_name=self.model_name_, inputs=self.inputs_, outputs=self.outputs_
+            )
+            data_item = user_data._completed_requests.get()
+            if type(data_item) == InferenceServerException:
+                raise data_item
+        self.assertIn("Deadline Exceeded", str(cm.exception))
+
+        # Expect inference to pass successfully for a large timeout
+        # value
+        triton_client.stop_stream()
+        triton_client.start_stream(
+            callback=partial(callback, user_data), stream_timeout=100
+        )
+
+        triton_client.async_stream_infer(
+            model_name=self.model_name_, inputs=self.inputs_, outputs=self.outputs_
+        )
+        data_item = user_data._completed_requests.get()
+        triton_client.stop_stream()
+
+        if type(data_item) == InferenceServerException:
+            raise data_item
+        output0_data = data_item.as_numpy("OUTPUT0")
+        self.assertTrue(np.array_equal(self.input0_data_, output0_data))
+
+    def test_http_infer(self):
+        self._prepare_request("http")
+
+        # The model is configured to take three seconds to send the
+        # response. Expect an exception for small timeout values.
+        with self.assertRaises(socket.timeout) as cm:
+            triton_client = httpclient.InferenceServerClient(
+                url="localhost:8000",
+                verbose=True,
+                network_timeout=self.INFER_SMALL_INTERVAL,
+            )
+            _ = triton_client.infer(
+                model_name=self.model_name_, inputs=self.inputs_, outputs=self.outputs_
+            )
+        self.assertIn("timed out", str(cm.exception))
+
+        # Expect to successfully pass with sufficiently large timeout
+        triton_client = httpclient.InferenceServerClient(
+            url="localhost:8000", verbose=True, connection_timeout=10.0
+        )
+
+        result = triton_client.infer(
+            model_name=self.model_name_, inputs=self.inputs_, outputs=self.outputs_
+        )
+
+        output0_data = result.as_numpy("OUTPUT0")
+        self.assertTrue(np.array_equal(self.input0_data_, output0_data))
+
+    def test_http_async_infer(self):
+        self._prepare_request("http")
+
+        # The model is configured to take three seconds to send the
+        # response. Expect an exception for small timeout values.
+        with self.assertRaises(socket.timeout) as cm:
+            triton_client = httpclient.InferenceServerClient(
+                url="localhost:8000",
+                verbose=True,
+                network_timeout=self.INFER_SMALL_INTERVAL,
+            )
+            async_request = triton_client.async_infer(
+                model_name=self.model_name_, inputs=self.inputs_, outputs=self.outputs_
+            )
+            result = async_request.get_result()
+        self.assertIn("timed out", str(cm.exception))
+
+        # Expect to successfully pass with sufficiently large timeout
+        triton_client = httpclient.InferenceServerClient(
+            url="localhost:8000", verbose=True, connection_timeout=10.0
+        )
+
+        async_request = triton_client.async_infer(
+            model_name=self.model_name_, inputs=self.inputs_, outputs=self.outputs_
+        )
+        result = async_request.get_result()
+
+        output0_data = result.as_numpy("OUTPUT0")
+        self.assertTrue(np.array_equal(self.input0_data_, output0_data))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_client_timeout/client_non_infer_timeout_test.py b/qa/L0_client_timeout/client_non_infer_timeout_test.py
new file mode 100755
index 0000000000..bbaf8c34e8
--- /dev/null
+++ b/qa/L0_client_timeout/client_non_infer_timeout_test.py
@@ -0,0 +1,340 @@
+#!/usr/bin/env python3
+
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import unittest
+
+import numpy as np
+import test_util as tu
+import tritonclient.grpc as grpcclient
+from tritonclient.utils import InferenceServerException
+
+
+class ClientNonInferTimeoutTest(tu.TestResultCollector):
+    def setUp(self):
+        self.model_name_ = "custom_identity_int32"
+        self.input0_data_ = np.array([[10]], dtype=np.int32)
+        self.input0_data_byte_size_ = 32
+        self.SMALL_INTERVAL = 0.1  # seconds for a timeout
+        self.NORMAL_INTERVAL = 5.0  # seconds for server to load then receive request
+
+    def test_grpc_server_live(self):
+        triton_client = grpcclient.InferenceServerClient(
+            url="localhost:8001", verbose=True
+        )
+        with self.assertRaises(InferenceServerException) as cm:
+            _ = triton_client.is_server_live(client_timeout=self.SMALL_INTERVAL)
+        self.assertIn("Deadline Exceeded", str(cm.exception))
+        self.assertTrue(
+            triton_client.is_server_live(client_timeout=self.NORMAL_INTERVAL)
+        )
+
+    def test_grpc_is_server_ready(self):
+        triton_client = grpcclient.InferenceServerClient(
+            url="localhost:8001", verbose=True
+        )
+        with self.assertRaises(InferenceServerException) as cm:
+            _ = triton_client.is_server_ready(client_timeout=self.SMALL_INTERVAL)
+        self.assertIn("Deadline Exceeded", str(cm.exception))
+        self.assertTrue(
+            triton_client.is_server_ready(client_timeout=self.NORMAL_INTERVAL)
+        )
+
+    def test_grpc_is_model_ready(self):
+        triton_client = grpcclient.InferenceServerClient(
+            url="localhost:8001", verbose=True
+        )
+        with self.assertRaises(InferenceServerException) as cm:
+            _ = triton_client.is_model_ready(
+                model_name=self.model_name_, client_timeout=self.SMALL_INTERVAL
+            )
+        self.assertIn("Deadline Exceeded", str(cm.exception))
+        self.assertTrue(
+            triton_client.is_model_ready(
+                model_name=self.model_name_, client_timeout=self.NORMAL_INTERVAL
+            )
+        )
+
+    def test_grpc_get_server_metadata(self):
+        triton_client = grpcclient.InferenceServerClient(
+            url="localhost:8001", verbose=True
+        )
+        with self.assertRaises(InferenceServerException) as cm:
+            _ = triton_client.get_server_metadata(client_timeout=self.SMALL_INTERVAL)
+        self.assertIn("Deadline Exceeded", str(cm.exception))
+
+        triton_client.get_server_metadata(client_timeout=self.NORMAL_INTERVAL)
+
+    def test_grpc_get_model_metadata(self):
+        triton_client = grpcclient.InferenceServerClient(
+            url="localhost:8001", verbose=True
+        )
+        with self.assertRaises(InferenceServerException) as cm:
+            _ = triton_client.get_model_metadata(
+                model_name=self.model_name_, client_timeout=self.SMALL_INTERVAL
+            )
+        self.assertIn("Deadline Exceeded", str(cm.exception))
+        triton_client.get_model_metadata(
+            model_name=self.model_name_, client_timeout=self.NORMAL_INTERVAL
+        )
+
+    def test_grpc_get_model_config(self):
+        triton_client = grpcclient.InferenceServerClient(
+            url="localhost:8001", verbose=True
+        )
+        with self.assertRaises(InferenceServerException) as cm:
+            _ = triton_client.get_model_config(
+                model_name=self.model_name_, client_timeout=self.SMALL_INTERVAL
+            )
+        self.assertIn("Deadline Exceeded", str(cm.exception))
+        triton_client.get_model_config(
+            model_name=self.model_name_, client_timeout=self.NORMAL_INTERVAL
+        )
+
+    def test_grpc_model_repository_index(self):
+        triton_client = grpcclient.InferenceServerClient(
+            url="localhost:8001", verbose=True
+        )
+        with self.assertRaises(InferenceServerException) as cm:
+            _ = triton_client.get_model_repository_index(
+                client_timeout=self.SMALL_INTERVAL
+            )
+        self.assertIn("Deadline Exceeded", str(cm.exception))
+        triton_client.get_model_repository_index(client_timeout=self.NORMAL_INTERVAL)
+
+    def test_grpc_load_model(self):
+        triton_client = grpcclient.InferenceServerClient(
+            url="localhost:8001", verbose=True
+        )
+        triton_client.unload_model(model_name=self.model_name_)
+        with self.assertRaises(InferenceServerException) as cm:
+            _ = triton_client.load_model(
+                model_name=self.model_name_, client_timeout=self.SMALL_INTERVAL
+            )
+        self.assertIn("Deadline Exceeded", str(cm.exception))
+        triton_client.unload_model(
+            model_name=self.model_name_, client_timeout=self.NORMAL_INTERVAL
+        )
+        triton_client.load_model(
+            model_name=self.model_name_, client_timeout=self.NORMAL_INTERVAL
+        )
+
+    def test_grpc_unload_model(self):
+        triton_client = grpcclient.InferenceServerClient(
+            url="localhost:8001", verbose=True
+        )
+        with self.assertRaises(InferenceServerException) as cm:
+            _ = triton_client.unload_model(
+                model_name=self.model_name_, client_timeout=self.SMALL_INTERVAL
+            )
+        self.assertIn("Deadline Exceeded", str(cm.exception))
+        triton_client.load_model(model_name=self.model_name_)
+        triton_client.unload_model(
+            model_name=self.model_name_, client_timeout=self.NORMAL_INTERVAL
+        )
+        triton_client.load_model(model_name=self.model_name_)
+
+    def test_grpc_get_inference_statistics(self):
+        triton_client = grpcclient.InferenceServerClient(
+            url="localhost:8001", verbose=True
+        )
+        with self.assertRaises(InferenceServerException) as cm:
+            _ = triton_client.get_inference_statistics(
+                model_name=self.model_name_, client_timeout=self.SMALL_INTERVAL
+            )
+        self.assertIn("Deadline Exceeded", str(cm.exception))
+        triton_client.get_inference_statistics(
+            model_name=self.model_name_, client_timeout=self.NORMAL_INTERVAL
+        )
+
+    def test_grpc_update_trace_settings(self):
+        triton_client = grpcclient.InferenceServerClient(
+            url="localhost:8001", verbose=True
+        )
+        with self.assertRaises(InferenceServerException) as cm:
+            _ = triton_client.update_trace_settings(
+                model_name=self.model_name_, client_timeout=self.SMALL_INTERVAL
+            )
+        self.assertIn("Deadline Exceeded", str(cm.exception))
+        triton_client.update_trace_settings(
+            model_name=self.model_name_, client_timeout=self.NORMAL_INTERVAL
+        )
+
+    def test_grpc_get_trace_settings(self):
+        triton_client = grpcclient.InferenceServerClient(
+            url="localhost:8001", verbose=True
+        )
+        with self.assertRaises(InferenceServerException) as cm:
+            _ = triton_client.get_trace_settings(
+                model_name=self.model_name_, client_timeout=self.SMALL_INTERVAL
+            )
+        self.assertIn("Deadline Exceeded", str(cm.exception))
+        triton_client.get_trace_settings(
+            model_name=self.model_name_, client_timeout=self.NORMAL_INTERVAL
+        )
+
+    def test_grpc_update_log_settings(self):
+        triton_client = grpcclient.InferenceServerClient(
+            url="localhost:8001", verbose=True
+        )
+        settings = {}
+        with self.assertRaises(InferenceServerException) as cm:
+            _ = triton_client.update_log_settings(
+                settings=settings, client_timeout=self.SMALL_INTERVAL
+            )
+        self.assertIn("Deadline Exceeded", str(cm.exception))
+        triton_client.update_log_settings(
+            settings=settings, client_timeout=self.NORMAL_INTERVAL
+        )
+
+    def test_grpc_get_log_settings(self):
+        triton_client = grpcclient.InferenceServerClient(
+            url="localhost:8001", verbose=True
+        )
+        with self.assertRaises(InferenceServerException) as cm:
+            _ = triton_client.get_log_settings(
+                as_json=True, client_timeout=self.SMALL_INTERVAL
+            )
+        self.assertIn("Deadline Exceeded", str(cm.exception))
+        triton_client.get_log_settings(
+            as_json=True, client_timeout=self.NORMAL_INTERVAL
+        )
+
+    def test_grpc_get_system_shared_memory_status(self):
+        triton_client = grpcclient.InferenceServerClient(
+            url="localhost:8001", verbose=True
+        )
+        with self.assertRaises(InferenceServerException) as cm:
+            _ = triton_client.get_system_shared_memory_status(
+                client_timeout=self.SMALL_INTERVAL
+            )
+        self.assertIn("Deadline Exceeded", str(cm.exception))
+        triton_client.get_system_shared_memory_status(
+            client_timeout=self.NORMAL_INTERVAL
+        )
+
+    def test_grpc_register_system_shared_memory(self):
+        triton_client = grpcclient.InferenceServerClient(
+            url="localhost:8001", verbose=True
+        )
+        triton_client.unregister_system_shared_memory()
+        import tritonclient.utils.shared_memory as shm
+
+        shm_ip0_handle = shm.create_shared_memory_region(
+            "input0_data", "/input_simple", self.input0_data_byte_size_
+        )
+        shm.set_shared_memory_region(shm_ip0_handle, [self.input0_data_])
+        with self.assertRaises(InferenceServerException) as cm:
+            _ = triton_client.register_system_shared_memory(
+                "input0_data",
+                "/input_simple",
+                self.input0_data_byte_size_,
+                client_timeout=self.SMALL_INTERVAL,
+            )
+        self.assertIn("Deadline Exceeded", str(cm.exception))
+        triton_client.unregister_system_shared_memory()
+        triton_client.register_system_shared_memory(
+            "input0_data",
+            "/input_simple",
+            self.input0_data_byte_size_,
+            client_timeout=self.NORMAL_INTERVAL,
+        )
+        triton_client.unregister_system_shared_memory()
+
+    def test_grpc_unregister_system_shared_memory(self):
+        triton_client = grpcclient.InferenceServerClient(
+            url="localhost:8001", verbose=True
+        )
+        with self.assertRaises(InferenceServerException) as cm:
+            _ = triton_client.unregister_system_shared_memory(
+                client_timeout=self.SMALL_INTERVAL
+            )
+        self.assertIn("Deadline Exceeded", str(cm.exception))
+        triton_client.unregister_system_shared_memory(
+            client_timeout=self.NORMAL_INTERVAL
+        )
+
+    def test_grpc_get_cuda_shared_memory_status(self):
+        triton_client = grpcclient.InferenceServerClient(
+            url="localhost:8001", verbose=True
+        )
+        with self.assertRaises(InferenceServerException) as cm:
+            _ = triton_client.get_cuda_shared_memory_status(
+                client_timeout=self.SMALL_INTERVAL
+            )
+        self.assertIn("Deadline Exceeded", str(cm.exception))
+        triton_client.get_cuda_shared_memory_status(client_timeout=self.NORMAL_INTERVAL)
+
+    def test_grpc_register_cuda_shared_memory(self):
+        triton_client = grpcclient.InferenceServerClient(
+            url="localhost:8001", verbose=True
+        )
+        import tritonclient.utils.cuda_shared_memory as cshm
+
+        input_data = np.array([[10]], dtype=np.int32)
+        byteSize = input_data.itemsize * input_data.size
+        shm_op0_handle = cshm.create_shared_memory_region(
+            "dummy_data", byte_size=byteSize, device_id=0
+        )
+        cshm.set_shared_memory_region(shm_op0_handle, [input_data])
+        with self.assertRaises(InferenceServerException) as cm:
+            _ = triton_client.register_cuda_shared_memory(
+                "dummy_data",
+                cshm.get_raw_handle(shm_op0_handle),
+                device_id=0,
+                byte_size=byteSize,
+                client_timeout=self.SMALL_INTERVAL,
+            )
+        self.assertIn("Deadline Exceeded", str(cm.exception))
+        triton_client.unregister_cuda_shared_memory()
+        triton_client.register_cuda_shared_memory(
+            "dummy_data",
+            cshm.get_raw_handle(shm_op0_handle),
+            device_id=0,
+            byte_size=byteSize,
+            client_timeout=self.NORMAL_INTERVAL,
+        )
+        cshm.destroy_shared_memory_region(shm_op0_handle)
+
+    def test_grpc_unregister_cuda_shared_memory(self):
+        triton_client = grpcclient.InferenceServerClient(
+            url="localhost:8001", verbose=True
+        )
+        with self.assertRaises(InferenceServerException) as cm:
+            _ = triton_client.unregister_cuda_shared_memory(
+                client_timeout=self.SMALL_INTERVAL
+            )
+        self.assertIn("Deadline Exceeded", str(cm.exception))
+        triton_client.unregister_cuda_shared_memory(client_timeout=self.NORMAL_INTERVAL)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_client_timeout/models/custom_identity_int32/config.pbtxt b/qa/L0_client_timeout/models/custom_identity_int32/config.pbtxt
new file mode 100644
index 0000000000..1732ff32fd
--- /dev/null
+++ b/qa/L0_client_timeout/models/custom_identity_int32/config.pbtxt
@@ -0,0 +1,54 @@
+# Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "custom_identity_int32"
+backend: "identity"
+max_batch_size: 1024
+version_policy: { latest { num_versions: 1 }}
+instance_group [ { kind: KIND_CPU } ]
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
+
+parameters [
+  {
+    key: "execute_delay_ms"
+    value: { string_value: "3000" }
+  }
+]
diff --git a/qa/L0_client_timeout/test.sh b/qa/L0_client_timeout/test.sh
new file mode 100755
index 0000000000..f250dc9fa3
--- /dev/null
+++ b/qa/L0_client_timeout/test.sh
@@ -0,0 +1,272 @@
+#!/bin/bash
+# Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+TIMEOUT_VALUE=100000000
+SHORT_TIMEOUT_VALUE=1000
+RET=0
+
+CLIENT_INFER_TIMEOUT_TEST=client_infer_timeout_test.py
+CLIENT_NON_INFER_TIMEOUT_TEST=client_non_infer_timeout_test.py
+CLIENT_TIMEOUT_TEST_CPP=../clients/client_timeout_test
+TEST_RESULT_FILE='test_results.txt'
+
+rm -f *.log
+rm -f *.log.*
+
+CLIENT_LOG=`pwd`/client.log
+CLIENT_GRPC_TIMEOUTS_LOG=`pwd`/client.log.grpc
+DATADIR=`pwd`/models
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=$DATADIR --model-control-mode=explicit --load-model=custom_identity_int32 --log-verbose 2"
+source ../common/util.sh
+
+mkdir -p $DATADIR/custom_identity_int32/1
+
+# Test all APIs apart from Infer.
+export TRITONSERVER_SERVER_DELAY_GRPC_RESPONSE_SEC=2
+run_server
+if [ $? -eq 1 ]; then
+    echo -e "\n***\n*** Test Failed: GRPC non-infer APIs\n***"
+    RET=1
+fi
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+# Expect timeout for everything
+$CLIENT_TIMEOUT_TEST_CPP -t $SHORT_TIMEOUT_VALUE -v -i grpc -p >> ${CLIENT_LOG}.c++.grpc_non_infer_apis 2>&1
+if [ `grep -c "Deadline Exceeded" ${CLIENT_LOG}.c++.grpc_non_infer_apis` != "18" ]; then
+    cat ${CLIENT_LOG}.c++.grpc_non_infer_apis
+    echo -e "\n***\n*** Test Failed. Expected 18 failed\n***"
+    RET=1
+fi
+# Test all APIs with long timeout
+$CLIENT_TIMEOUT_TEST_CPP -t $TIMEOUT_VALUE -v -i grpc -p >> ${CLIENT_LOG} 2>&1
+if [ $? -eq 0 ]; then
+    echo -e "\n***\n*** Test Failed: GRPC non-infer APIs\n***"
+    RET=1
+fi
+
+set -e
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Test infer APIs
+unset TRITONSERVER_SERVER_DELAY_GRPC_RESPONSE_SEC
+SERVER_ARGS="--model-repository=$DATADIR --log-verbose 2"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+set +e
+
+# CASE 1: Provide too small a timeout and expect a failure.
+# Note, the custom_identity_int32 is configured with a delay
+# of 3 sec.
+# Test request timeout in grpc synchronous inference
+$CLIENT_TIMEOUT_TEST_CPP -t $SHORT_TIMEOUT_VALUE -v -i grpc >> ${CLIENT_LOG}.c++.grpc_infer 2>&1
+if [ $? -eq 0 ]; then
+    RET=1
+fi
+if [ `grep -c "Deadline Exceeded" ${CLIENT_LOG}.c++.grpc_infer` != "1" ]; then
+    cat ${CLIENT_LOG}.c++.grpc_infer
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+# Test request timeout in grpc asynchronous inference
+$CLIENT_TIMEOUT_TEST_CPP -t $SHORT_TIMEOUT_VALUE -v -i grpc -a >> ${CLIENT_LOG}.c++.grpc_async_infer 2>&1
+if [ $? -eq 0 ]; then
+    RET=1
+fi
+if [ `grep -c "Deadline Exceeded" ${CLIENT_LOG}.c++.grpc_async_infer` != "1" ]; then
+    cat ${CLIENT_LOG}.c++.grpc_async_infer
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+# Test stream timeout in grpc asynchronous streaming inference
+$CLIENT_TIMEOUT_TEST_CPP -t $SHORT_TIMEOUT_VALUE -v -i grpc -s >> ${CLIENT_LOG}.c++.grpc_async_stream_infer 2>&1
+if [ $? -eq 0 ]; then
+    RET=1
+fi
+if [ `grep -c "Stream has been closed" ${CLIENT_LOG}.c++.grpc_async_stream_infer` != "1" ]; then
+    cat ${CLIENT_LOG}.c++.grpc_async_stream_infer
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+# Test request timeout in http synchronous inference
+$CLIENT_TIMEOUT_TEST_CPP -t $SHORT_TIMEOUT_VALUE -v >> ${CLIENT_LOG}.c++.http_infer 2>&1
+if [ $? -eq 0 ]; then
+    RET=1
+fi
+if [ `grep -c "Deadline Exceeded" ${CLIENT_LOG}.c++.http_infer` == "0" ]; then
+    cat ${CLIENT_LOG}.c++.http_infer
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+
+# Test request timeout in http asynchronous inference
+$CLIENT_TIMEOUT_TEST_CPP -t $SHORT_TIMEOUT_VALUE -v -a >> ${CLIENT_LOG}.c++.http_async_infer 2>&1
+if [ $? -eq 0 ]; then
+    RET=1
+fi
+if [ `grep -c "Deadline Exceeded" ${CLIENT_LOG}.c++.http_async_infer` == "0" ]; then
+    cat ${CLIENT_LOG}.c++.http_async_infer
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+set -e
+
+if [ $RET -eq 1 ]; then
+    # Return if CASE 1 failed
+    kill $SERVER_PID
+    wait $SERVER_PID
+    exit $RET
+fi
+
+
+# CASE 2: Provide sufficiently large timeout value
+set +e
+
+echo "TEST:  GRPC Synchronous" >> ${CLIENT_LOG}
+$CLIENT_TIMEOUT_TEST_CPP -t $TIMEOUT_VALUE -v -i grpc >> ${CLIENT_LOG} 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed: GRPC Synchronous\n***"
+    RET=1
+fi
+
+echo "TEST:  GRPC Asynchronous" >> ${CLIENT_LOG}
+$CLIENT_TIMEOUT_TEST_CPP -t $TIMEOUT_VALUE -v -i grpc -a >> ${CLIENT_LOG}.c++.grpc_async_infer 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed: GRPC Asynchronous\n***"
+    RET=1
+fi
+
+echo "TEST:  GRPC Streaming" >> ${CLIENT_LOG}
+$CLIENT_TIMEOUT_TEST_CPP -t $TIMEOUT_VALUE -v -i grpc -s >> ${CLIENT_LOG}.c++.grpc_async_stream_infer 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed: GRPC Streaming\n***"
+    RET=1
+fi
+
+echo "TEST:  HTTP Synchronous" >> ${CLIENT_LOG}
+$CLIENT_TIMEOUT_TEST_CPP -t $TIMEOUT_VALUE -v >> ${CLIENT_LOG}.c++.http_infer 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed: HTTP Synchronous\n***"
+    RET=1
+fi
+
+echo "TEST:  HTTP Asynchronous" >> ${CLIENT_LOG}
+$CLIENT_TIMEOUT_TEST_CPP -t $TIMEOUT_VALUE -v -a >> ${CLIENT_LOG}.c++.http_async_infer 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed: HTTP Asynchronous\n***"
+    RET=1
+fi
+
+echo "TEST:  Python Library" >> ${CLIENT_LOG}
+
+# CASE 3: Python Library
+
+for i in test_grpc_infer \
+    test_grpc_async_infer \
+    test_grpc_stream_infer \
+    test_http_infer \
+    test_http_async_infer \
+   ; do
+    python $CLIENT_INFER_TIMEOUT_TEST ClientInferTimeoutTest.$i >>$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Test $i Failed\n***" >>$CLIENT_LOG
+            echo -e "\n***\n*** Test $i Failed\n***"
+            RET=1
+    else
+        check_test_results $TEST_RESULT_FILE 1
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Result Verification Failed\n***"
+            RET=1
+        fi
+    fi
+done
+
+set -e
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Test all APIs other than infer
+export TRITONSERVER_SERVER_DELAY_GRPC_RESPONSE_SEC=2
+SERVER_ARGS="${SERVER_ARGS} --model-control-mode=explicit --load-model=custom_identity_int32 --log-verbose 2"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+set +e
+
+python $CLIENT_NON_INFER_TIMEOUT_TEST >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test $i Failed\n***" >>$CLIENT_LOG
+    echo -e "\n***\n*** Test $i Failed\n***"
+    RET=1
+fi
+
+set -e
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    cat ${CLIENT_LOG}
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+set +e
+exit $RET
diff --git a/qa/L0_client_valgrind/models/custom_identity_int32/config.pbtxt b/qa/L0_client_valgrind/models/custom_identity_int32/config.pbtxt
new file mode 100644
index 0000000000..6a2a76bde5
--- /dev/null
+++ b/qa/L0_client_valgrind/models/custom_identity_int32/config.pbtxt
@@ -0,0 +1,47 @@
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "custom_identity_int32"
+backend: "identity"
+max_batch_size: 1024
+version_policy: { latest { num_versions: 1 }}
+instance_group [ { kind: KIND_CPU } ]
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
\ No newline at end of file
diff --git a/qa/L0_client_valgrind/test.sh b/qa/L0_client_valgrind/test.sh
new file mode 100755
index 0000000000..0870aa883c
--- /dev/null
+++ b/qa/L0_client_valgrind/test.sh
@@ -0,0 +1,107 @@
+#!/bin/bash
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+# Must run on a single device or else the TRITONSERVER_DELAY_SCHEDULER
+# can fail when the requests are distributed to multiple devices.
+export CUDA_VISIBLE_DEVICES=0
+
+LEAKCHECK=/usr/bin/valgrind
+LEAKCHECK_ARGS_BASE="--leak-check=full --show-leak-kinds=definite --max-threads=3000"
+SERVER_TIMEOUT=3600
+rm -f *.log
+
+MEMORY_GROWTH_TEST=../clients/memory_leak_test
+
+DATADIR=`pwd`/models
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=$DATADIR"
+source ../common/util.sh
+
+mkdir -p $DATADIR/custom_identity_int32/1
+
+RET=0
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+# Run test for both HTTP and GRPC, re-using and not re-using client object.
+# 1000 inferences in each case.
+EXTRA_ARGS="-r 1000"
+for PROTOCOL in http grpc; do
+    for REUSE in reuse no_reuse; do
+        LEAKCHECK_LOG="./valgrind.${PROTOCOL}.${REUSE}.c++.log"
+        CLIENT_LOG="./client.${PROTOCOL}.${REUSE}.c++.log"
+        LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --log-file=$LEAKCHECK_LOG"
+        if [ "$REUSE" == "reuse" ]; then
+            EXTRA_CLIENT_ARGS="${EXTRA_ARGS} -i ${PROTOCOL} -R"
+        else
+            EXTRA_CLIENT_ARGS="${EXTRA_ARGS} -i ${PROTOCOL}"
+        fi
+
+        $LEAKCHECK $LEAKCHECK_ARGS $MEMORY_GROWTH_TEST $EXTRA_CLIENT_ARGS >> ${CLIENT_LOG} 2>&1
+        if [ $? -ne 0 ]; then
+            cat ${CLIENT_LOG}
+            RET=1
+            echo -e "\n***\n*** Test FAILED\n***"
+        else
+            python3 ../common/check_valgrind_log.py -f $LEAKCHECK_LOG
+            if [ $? -ne 0 ]; then
+                echo -e "\n***\n*** Memory leak detected\n***"
+                RET=1
+            fi
+        fi
+    done
+done
+
+# Stop Server
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_cmdline_trace/test.sh b/qa/L0_cmdline_trace/test.sh
new file mode 100755
index 0000000000..d0f86dc2a9
--- /dev/null
+++ b/qa/L0_cmdline_trace/test.sh
@@ -0,0 +1,722 @@
+#!/bin/bash
+# Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# ============================= Helpers =======================================
+function assert_server_startup_failed() {
+  if [ "$SERVER_PID" != "0" ]; then
+      echo -e "\n***\n***Fail: Server start should have failed $SERVER\n***"
+      cat $SERVER_LOG
+      set -e
+      kill $SERVER_PID
+      wait $SERVER_PID
+      set +e
+      exit 1
+  fi
+}
+
+TRACE_SUMMARY=../common/trace_summary.py
+CLIENT_SCRIPT=trace_client.py
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_model_repository
+ENSEMBLEDIR=$DATADIR/../qa_ensemble_model_repository/qa_model_repository/
+MODELBASE=onnx_int32_int32_int32
+
+MODELSDIR=`pwd`/trace_models
+
+SERVER=/opt/tritonserver/bin/tritonserver
+source ../common/util.sh
+
+rm -f *.log
+rm -fr $MODELSDIR && mkdir -p $MODELSDIR
+
+# set up simple model using MODELBASE, this test needs gradually update as
+# backends are ported to use backend API as backend API not yet support tracing.
+rm -fr $MODELSDIR && mkdir -p $MODELSDIR && \
+    cp -r $DATADIR/$MODELBASE $MODELSDIR/simple && \
+    rm -r $MODELSDIR/simple/2 && rm -r $MODELSDIR/simple/3 && \
+    (cd $MODELSDIR/simple && \
+            sed -i "s/^name:.*/name: \"simple\"/" config.pbtxt)
+
+RET=0
+
+# trace-level=OFF make sure no tracing
+SERVER_ARGS="--trace-file=trace_off.log --trace-level=OFF --trace-rate=1 --model-repository=$MODELSDIR"
+SERVER_LOG="./inference_server_off.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+for p in {1..10}; do
+    python3 $CLIENT_SCRIPT -i grpc -u localhost:8001 >> client_off.log 2>&1
+    if [ $? -ne 0 ]; then
+        RET=1
+    fi
+
+    python3 $CLIENT_SCRIPT -i http -u localhost:8000 >> client_off.log 2>&1
+    if [ $? -ne 0 ]; then
+        RET=1
+    fi
+done
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+set +e
+
+if [ -f ./trace_off.log ]; then
+    echo -e "\n***\n*** Test Failed, unexpected generation of trace_off.log\n***"
+    RET=1
+fi
+
+set -e
+
+# trace-rate == 1, trace-level=MIN make sure every request is traced
+SERVER_ARGS="--trace-file=trace_min.log --trace-level=MIN --trace-rate=1 --model-repository=$MODELSDIR"
+SERVER_LOG="./inference_server_min.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+for p in {1..10}; do
+    python3 $CLIENT_SCRIPT -i grpc -u localhost:8001 >> client_min.log 2>&1
+    if [ $? -ne 0 ]; then
+        RET=1
+    fi
+
+    python3 $CLIENT_SCRIPT -i http -u localhost:8000 >> client_min.log 2>&1
+    if [ $? -ne 0 ]; then
+        RET=1
+    fi
+done
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+set +e
+
+$TRACE_SUMMARY -t trace_min.log > summary_min.log
+
+if [ `grep -c "COMPUTE_INPUT_END" summary_min.log` != "20" ]; then
+    cat summary_min.log
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+if [ `grep -c ^simple summary_min.log` != "20" ]; then
+    cat summary_min.log
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+set -e
+
+# trace-rate == 9, trace-level=MAX
+SERVER_ARGS="--http-thread-count=1 --trace-file=trace_max.log \
+             --trace-level=MAX --trace-rate=9 --model-repository=$MODELSDIR"
+SERVER_LOG="./inference_server_max.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+for p in {1..10}; do
+    python3 $CLIENT_SCRIPT -i grpc -u localhost:8001 >> client_max.log 2>&1
+    if [ $? -ne 0 ]; then
+        RET=1
+    fi
+
+    python3 $CLIENT_SCRIPT -i http -u localhost:8000 >> client_max.log 2>&1
+    if [ $? -ne 0 ]; then
+        RET=1
+    fi
+done
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+set +e
+
+$TRACE_SUMMARY -t trace_max.log > summary_max.log
+
+if [ `grep -c "COMPUTE_INPUT_END" summary_max.log` != "2" ]; then
+    cat summary_max.log
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+if [ `grep -c ^simple summary_max.log` != "2" ]; then
+    cat summary_max.log
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+set -e
+
+# trace-rate == 1, trace-level=TIMESTAMPS make sure every request is traced
+SERVER_ARGS="--trace-file=trace_1.log --trace-level=TIMESTAMPS --trace-rate=1 --model-repository=$MODELSDIR"
+SERVER_LOG="./inference_server_1.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+for p in {1..10}; do
+    python3 $CLIENT_SCRIPT -i grpc -u localhost:8001 >> client_1.log 2>&1
+    if [ $? -ne 0 ]; then
+        RET=1
+    fi
+
+    python3 $CLIENT_SCRIPT -i http -u localhost:8000 >> client_1.log 2>&1
+    if [ $? -ne 0 ]; then
+        RET=1
+    fi
+done
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+set +e
+
+$TRACE_SUMMARY -t trace_1.log > summary_1.log
+
+if [ `grep -c "COMPUTE_INPUT_END" summary_1.log` != "20" ]; then
+    cat summary_1.log
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+if [ `grep -c ^simple summary_1.log` != "20" ]; then
+    cat summary_1.log
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+set -e
+
+# trace-rate == 6, trace-level=TIMESTAMPS
+SERVER_ARGS="--http-thread-count=1 --trace-file=trace_6.log \
+             --trace-level=TIMESTAMPS --trace-rate=6 --model-repository=$MODELSDIR"
+SERVER_LOG="./inference_server_6.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+for p in {1..10}; do
+    python3 $CLIENT_SCRIPT -i grpc -u localhost:8001 >> client_6.log 2>&1
+    if [ $? -ne 0 ]; then
+        RET=1
+    fi
+
+    python3 $CLIENT_SCRIPT -i http -u localhost:8000 >> client_6.log 2>&1
+    if [ $? -ne 0 ]; then
+        RET=1
+    fi
+done
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+set +e
+
+$TRACE_SUMMARY -t trace_6.log > summary_6.log
+
+if [ `grep -c "COMPUTE_INPUT_END" summary_6.log` != "3" ]; then
+    cat summary_6.log
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+if [ `grep -c ^simple summary_6.log` != "3" ]; then
+    cat summary_6.log
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+set -e
+
+# trace-rate == 6, trace-level=TIMESTAMPS, trace-log-frequency == 2
+SERVER_ARGS="--http-thread-count=1 --trace-file=trace_frequency.log \
+             --trace-level=TIMESTAMPS --trace-rate=6 \
+             --trace-log-frequency=2 --model-repository=$MODELSDIR"
+SERVER_LOG="./inference_server_frequency.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+for p in {1..10}; do
+    python3 $CLIENT_SCRIPT -i grpc -u localhost:8001 >> client_frequency.log 2>&1
+    if [ $? -ne 0 ]; then
+        RET=1
+    fi
+
+    python3 $CLIENT_SCRIPT -i http -u localhost:8000 >> client_frequency.log 2>&1
+    if [ $? -ne 0 ]; then
+        RET=1
+    fi
+done
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+set +e
+
+# Two trace files
+$TRACE_SUMMARY -t trace_frequency.log.0 > summary_frequency.log.0
+if [ `grep -c "COMPUTE_INPUT_END" summary_frequency.log.0` != "2" ]; then
+    cat summary_frequency.log.0
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+if [ `grep -c ^simple summary_frequency.log.0` != "2" ]; then
+    cat summary_frequency.log.0
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+$TRACE_SUMMARY -t trace_frequency.log.1 > summary_frequency.log.1
+if [ `grep -c "COMPUTE_INPUT_END" summary_frequency.log.1` != "1" ]; then
+    cat summary_frequency.log.1
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+if [ `grep -c ^simple summary_frequency.log.1` != "1" ]; then
+    cat summary_frequency.log.1
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+set -e
+
+# trace-rate == 9, trace-level=TIMESTAMPS
+SERVER_ARGS="--http-thread-count=1 --trace-file=trace_9.log \
+             --trace-level=TIMESTAMPS --trace-rate=9 --model-repository=$MODELSDIR"
+SERVER_LOG="./inference_server_9.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+for p in {1..10}; do
+    python3 $CLIENT_SCRIPT -i grpc -u localhost:8001 >> client_9.log 2>&1
+    if [ $? -ne 0 ]; then
+        RET=1
+    fi
+
+    python3 $CLIENT_SCRIPT -i http -u localhost:8000 >> client_9.log 2>&1
+    if [ $? -ne 0 ]; then
+        RET=1
+    fi
+done
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+set +e
+
+$TRACE_SUMMARY -t trace_9.log > summary_9.log
+
+if [ `grep -c "COMPUTE_INPUT_END" summary_9.log` != "2" ]; then
+    cat summary_9.log
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+if [ `grep -c ^simple summary_9.log` != "2" ]; then
+    cat summary_9.log
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+set -e
+
+# Demonstrate trace for ensemble
+# set up "addsub" nested ensemble
+rm -fr $MODELSDIR && mkdir -p $MODELSDIR && \
+    cp -r $DATADIR/$MODELBASE $MODELSDIR/$MODELBASE && \
+    rm -r $MODELSDIR/$MODELBASE/2 && rm -r $MODELSDIR/$MODELBASE/3
+
+# nested ensemble
+mkdir -p $MODELSDIR/fan_$MODELBASE/1 && \
+    cp $ENSEMBLEDIR/fan_$MODELBASE/config.pbtxt $MODELSDIR/fan_$MODELBASE/. && \
+        (cd $MODELSDIR/fan_$MODELBASE && \
+                sed -i "s/label_filename:.*//" config.pbtxt)
+
+mkdir -p $MODELSDIR/simple/1 && \
+    cp $ENSEMBLEDIR/fan_$MODELBASE/config.pbtxt $MODELSDIR/simple/. && \
+        (cd $MODELSDIR/simple && \
+                sed -i "s/^name:.*/name: \"simple\"/" config.pbtxt && \
+                sed -i "s/$MODELBASE/fan_$MODELBASE/" config.pbtxt && \
+                sed -i "s/label_filename:.*//" config.pbtxt)
+
+cp -r $ENSEMBLEDIR/nop_TYPE_INT32_-1 $MODELSDIR/. && \
+    mkdir -p $MODELSDIR/nop_TYPE_INT32_-1/1
+
+# trace-rate == 1, trace-level=TIMESTAMPS
+SERVER_ARGS="--http-thread-count=1 --trace-file=trace_ensemble.log \
+             --trace-level=TIMESTAMPS --trace-rate=1 --model-repository=$MODELSDIR"
+SERVER_LOG="./inference_server_ensemble.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+python3 $CLIENT_SCRIPT -i http -u localhost:8000 >> client_ensemble.log 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+set +e
+
+$TRACE_SUMMARY -t trace_ensemble.log > summary_ensemble.log
+
+# Check if the traces are captured with proper hierarchy
+if [ `grep -c "COMPUTE_INPUT_END" summary_ensemble.log` != "7" ]; then
+    echo -e "Ensemble trace log expects 7 compute"
+    RET=1
+fi
+
+for trace_str in \
+        "{\"id\":1,\"model_name\":\"simple\",\"model_version\":1,\"request_id\":\"1\"}" \
+        "{\"id\":2,\"model_name\":\"nop_TYPE_INT32_-1\",\"model_version\":1,\"request_id\":\"1\",\"parent_id\":1}" \
+        "{\"id\":3,\"model_name\":\"fan_${MODELBASE}\",\"model_version\":1,\"request_id\":\"1\",\"parent_id\":1}" \
+        "{\"id\":4,\"model_name\":\"nop_TYPE_INT32_-1\",\"model_version\":1,\"request_id\":\"1\",\"parent_id\":3}" \
+        "{\"id\":5,\"model_name\":\"${MODELBASE}\",\"model_version\":1,\"request_id\":\"1\",\"parent_id\":3}" \
+        "{\"id\":6,\"model_name\":\"nop_TYPE_INT32_-1\",\"model_version\":1,\"request_id\":\"1\",\"parent_id\":3}" \
+        "{\"id\":7,\"model_name\":\"nop_TYPE_INT32_-1\",\"model_version\":1,\"request_id\":\"1\",\"parent_id\":3}" \
+        "{\"id\":8,\"model_name\":\"nop_TYPE_INT32_-1\",\"model_version\":1,\"request_id\":\"1\",\"parent_id\":1}" \
+        "{\"id\":9,\"model_name\":\"nop_TYPE_INT32_-1\",\"model_version\":1,\"request_id\":\"1\",\"parent_id\":1}" ; do
+    if [ `grep -c ${trace_str} trace_ensemble.log` != "1" ]; then
+        echo -e "Ensemble trace log expects trace: ${trace_str}"
+        RET=1
+    fi
+done
+
+if [ `grep -c ^simple summary_ensemble.log` != "1" ]; then
+    cat summary_ensemble.log
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+set -e
+
+
+# trace-rate == 1, trace-level=TIMESTAMPS, trace-level=TENSORS
+SERVER_ARGS="--http-thread-count=1 --trace-file=trace_ensemble_tensor.log \
+             --trace-level=TIMESTAMPS --trace-level=TENSORS --trace-rate=1 --model-repository=$MODELSDIR"
+SERVER_LOG="./inference_server_ensemble_tensor.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+python3 $CLIENT_SCRIPT -i http -u localhost:8000 >> client_ensemble_tensor.log 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+set +e
+
+$TRACE_SUMMARY -t trace_ensemble_tensor.log > summary_ensemble_tensor.log
+
+# Check if the traces are captured with proper hierarchy
+if [ `grep -c "COMPUTE_INPUT_END" summary_ensemble_tensor.log` != "7" ]; then
+    echo -e "Ensemble trace tensors log expects 7 compute"
+    RET=1
+fi
+for trace_str in \
+        "{\"id\":1,\"model_name\":\"simple\",\"model_version\":1,\"request_id\":\"1\"}" \
+        "{\"id\":2,\"model_name\":\"nop_TYPE_INT32_-1\",\"model_version\":1,\"request_id\":\"1\",\"parent_id\":1}" \
+        "{\"id\":3,\"model_name\":\"fan_${MODELBASE}\",\"model_version\":1,\"request_id\":\"1\",\"parent_id\":1}" \
+        "{\"id\":4,\"model_name\":\"nop_TYPE_INT32_-1\",\"model_version\":1,\"request_id\":\"1\",\"parent_id\":3}" \
+        "{\"id\":5,\"model_name\":\"${MODELBASE}\",\"model_version\":1,\"request_id\":\"1\",\"parent_id\":3}" \
+        "{\"id\":6,\"model_name\":\"nop_TYPE_INT32_-1\",\"model_version\":1,\"request_id\":\"1\",\"parent_id\":3}" \
+        "{\"id\":7,\"model_name\":\"nop_TYPE_INT32_-1\",\"model_version\":1,\"request_id\":\"1\",\"parent_id\":3}" \
+        "{\"id\":8,\"model_name\":\"nop_TYPE_INT32_-1\",\"model_version\":1,\"request_id\":\"1\",\"parent_id\":1}" \
+        "{\"id\":9,\"model_name\":\"nop_TYPE_INT32_-1\",\"model_version\":1,\"request_id\":\"1\",\"parent_id\":1}" ; do
+    if [ `grep -c ${trace_str} trace_ensemble_tensor.log` != "1" ]; then
+        echo -e "Ensemble trace tensors log expects trace: ${trace_str}"
+        RET=1
+    fi
+done
+
+if [ `grep -c ^simple summary_ensemble_tensor.log` != "1" ]; then
+    cat summary_ensemble_tensor.log
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+if [ `grep -o TENSOR_QUEUE_INPUT trace_ensemble_tensor.log | wc -l` != "18" ]; then
+    echo -e "Ensemble trace tensors log expects 18 TENSOR_QUEUE_INPUTs"
+    RET=1
+fi
+
+if [ `grep -o TENSOR_BACKEND_OUTPUT trace_ensemble_tensor.log | wc -l` != "14" ]; then
+    echo -e "Ensemble trace tensors log expects 14 TENSOR_BACKEND_OUTPUTs"
+    RET=1
+fi
+
+for trace_str in \
+        "{\"id\":1,\"activity\":\"TENSOR_QUEUE_INPUT\",\"tensor\":{\"name\":\"INPUT0\",\"data\":\"0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15\",\"shape\":\"1,16\",\"dtype\":\"INT32\"}}" \
+        "{\"id\":1,\"activity\":\"TENSOR_QUEUE_INPUT\",\"tensor\":{\"name\":\"INPUT1\",\"data\":\"1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1\",\"shape\":\"1,16\",\"dtype\":\"INT32\"}}" \
+        "{\"id\":1,\"activity\":\"TENSOR_BACKEND_OUTPUT\",\"tensor\":{\"name\":\"OUTPUT0\",\"data\":\"1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16\",\"shape\":\"1,16\",\"dtype\":\"INT32\"}}" \
+        "{\"id\":1,\"activity\":\"TENSOR_BACKEND_OUTPUT\",\"tensor\":{\"name\":\"OUTPUT1\",\"data\":\"-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14\",\"shape\":\"1,16\",\"dtype\":\"INT32\"}}" ; do
+    if [ `grep -c ${trace_str} trace_ensemble_tensor.log` != "1" ]; then
+        echo -e "Ensemble trace tensors log expects trace: ${trace_str}"
+        RET=1
+    fi
+done
+
+set -e
+
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+
+# check deprecation warnings
+SERVER_ARGS=" --trace-file=/tmp/trace.json --trace-rate=100 --trace-level=TIMESTAMPS \
+              --trace-log-frequency=50 --trace-count=100 --model-repository=$MODELSDIR"
+SERVER_LOG="./inference_server_trace_config_flag.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+if [ `grep -c "Warning: '--trace-file' has been deprecated" $SERVER_LOG` != "1" ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+if [ `grep -c "Warning: '--trace-rate' has been deprecated" $SERVER_LOG` != "1" ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+if [ `grep -c "Warning: '--trace-level' has been deprecated" $SERVER_LOG` != "1" ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+if [ `grep -c "Warning: '--trace-log-frequency' has been deprecated" $SERVER_LOG` != "1" ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+if [ `grep -c "Warning: '--trace-count' has been deprecated" $SERVER_LOG` != "1" ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+set +e
+
+################################################################################
+# The following set of tests checks that tritonserver gracefully handles       #
+# bad OpenTelemetry BatchSpanProcessor parameters, provided through            #
+# environment variables, or tritonserver's options.                            #
+################################################################################
+export OTEL_BSP_MAX_QUEUE_SIZE="bad_value"
+
+SERVER_ARGS="--trace-config mode=opentelemetry --model-repository=$MODELSDIR"
+SERVER_LOG="./inference_server_trace_config_flag.log"
+run_server
+assert_server_startup_failed
+
+if [ `grep -c "Bad option: \"OTEL_BSP_MAX_QUEUE_SIZE\"" $SERVER_LOG` != "1" ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+unset OTEL_BSP_MAX_QUEUE_SIZE
+
+export OTEL_BSP_SCHEDULE_DELAY="bad_value"
+run_server
+assert_server_startup_failed
+
+if [ `grep -c "Bad option: \"OTEL_BSP_SCHEDULE_DELAY\"" $SERVER_LOG` != "1" ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+unset OTEL_BSP_SCHEDULE_DELAY
+
+export OTEL_BSP_MAX_EXPORT_BATCH_SIZE="bad_value"
+run_server
+assert_server_startup_failed
+
+if [ `grep -c "Bad option: \"OTEL_BSP_MAX_EXPORT_BATCH_SIZE\"" $SERVER_LOG` != "1" ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+unset OTEL_BSP_MAX_EXPORT_BATCH_SIZE
+
+SERVER_ARGS="--model-repository=$MODELSDIR --trace-config mode=opentelemetry \
+             --trace-config opentelemetry,bsp_max_queue_size=bad_value"
+SERVER_LOG="./inference_server_trace_config_flag.log"
+run_server
+assert_server_startup_failed
+
+if [ `grep -c "Bad option: \"--trace-config opentelemetry,bsp_max_queue_size\"" $SERVER_LOG` != "1" ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+SERVER_ARGS="--model-repository=$MODELSDIR --trace-config mode=opentelemetry \
+             --trace-config opentelemetry,bsp_schedule_delay=bad_value"
+SERVER_LOG="./inference_server_trace_config_flag.log"
+run_server
+assert_server_startup_failed
+
+if [ `grep -c "Bad option: \"--trace-config opentelemetry,bsp_schedule_delay\"" $SERVER_LOG` != "1" ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+SERVER_ARGS="--model-repository=$MODELSDIR --trace-config mode=opentelemetry \
+             --trace-config opentelemetry,bsp_max_export_batch_size=bad_value"
+SERVER_LOG="./inference_server_trace_config_flag.log"
+run_server
+assert_server_startup_failed
+
+if [ `grep -c "Bad option: \"--trace-config opentelemetry,bsp_max_export_batch_size\"" $SERVER_LOG` != "1" ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_cmdline_trace/trace_client.py b/qa/L0_cmdline_trace/trace_client.py
new file mode 100755
index 0000000000..4d59579d7c
--- /dev/null
+++ b/qa/L0_cmdline_trace/trace_client.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python
+# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import sys
+
+import numpy as np
+import tritonclient.grpc as grpcclient
+import tritonclient.http as httpclient
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-u",
+        "--url",
+        type=str,
+        required=False,
+        default="localhost:8001",
+        help="Inference server URL. Default is localhost:8001.",
+    )
+    parser.add_argument("-i", "--protocol", type=str, required=True)
+    FLAGS = parser.parse_args()
+
+    if FLAGS.protocol == "grpc":
+        client_type = grpcclient
+    else:
+        client_type = httpclient
+
+    try:
+        triton_client = client_type.InferenceServerClient(url=FLAGS.url)
+    except Exception as e:
+        print("channel creation failed: " + str(e))
+        sys.exit()
+
+    model_name = "simple"
+
+    # Infer
+    inputs = []
+    outputs = []
+    inputs.append(client_type.InferInput("INPUT0", [1, 16], "INT32"))
+    inputs.append(client_type.InferInput("INPUT1", [1, 16], "INT32"))
+
+    input0_data = np.arange(start=0, stop=16, dtype=np.int32)
+    input0_data = np.expand_dims(input0_data, axis=0)
+    input1_data = np.ones(shape=(1, 16), dtype=np.int32)
+
+    inputs[0].set_data_from_numpy(input0_data)
+    inputs[1].set_data_from_numpy(input1_data)
+
+    outputs.append(client_type.InferRequestedOutput("OUTPUT0"))
+    outputs.append(client_type.InferRequestedOutput("OUTPUT1"))
+
+    triton_client.infer(
+        model_name=model_name, inputs=inputs, outputs=outputs, request_id="1"
+    )
diff --git a/qa/L0_compute_capability/test.sh b/qa/L0_compute_capability/test.sh
new file mode 100755
index 0000000000..d85acb1b6e
--- /dev/null
+++ b/qa/L0_compute_capability/test.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+# Copyright 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
+SERVER=${TRITON_DIR}/bin/tritonserver
+DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"}
+source ../common/util.sh
+
+rm -f *.log
+
+RET=0
+
+# If BACKENDS not specified, set to all
+BACKENDS=${BACKENDS:="graphdef savedmodel onnx libtorch plan"}
+
+for BACKEND in $BACKENDS; do
+    # Need just one model for the backend...
+    rm -fr models && mkdir models
+    cp -r ${DATADIR}/qa_model_repository/${BACKEND}_float32_float32_float32 \
+        models/.
+
+    if [ "$BACKEND" != "plan" ]; then
+        for MC in `ls models/*/config.pbtxt`; do
+            echo "instance_group [ { kind: KIND_GPU }]" >> $MC
+        done
+    fi
+
+    # Run with a high minimum capability so that no GPUs are
+    # recognized. This should cause the server to fail to start since
+    # we explicitly asked for a GPU in the instance_group.
+    SERVER_ARGS="--min-supported-compute-capability=100.0 --model-repository=`pwd`/models"
+    SERVER_LOG="./inference_server_${BACKEND}_cc100.log"
+    run_server
+    if [ "$SERVER_PID" != "0" ]; then
+        echo -e "\n***\n*** Unexpected success with min compute 100.0 for ${BACKEND}\n***"
+        RET=1
+
+        kill $SERVER_PID
+        wait $SERVER_PID
+    fi
+
+    # Run with a low minimum capability and make sure GPUs are
+    # recognized.
+    SERVER_ARGS="--min-supported-compute-capability=1.0 --model-repository=`pwd`/models"
+    SERVER_LOG="./inference_server_${BACKEND}_cc1.log"
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Unexpected failure with min compute 1.0 for ${BACKEND}\n***"
+        RET=1
+    else
+        kill $SERVER_PID
+        wait $SERVER_PID
+    fi
+done
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_config_json/ensemble_config.pbtxt b/qa/L0_config_json/ensemble_config.pbtxt
new file mode 100644
index 0000000000..29de01a3aa
--- /dev/null
+++ b/qa/L0_config_json/ensemble_config.pbtxt
@@ -0,0 +1,105 @@
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "simple_ensemble"
+platform: "ensemble"
+max_batch_size: 0
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+ensemble_scheduling {
+  step [
+    {
+      model_name: "savedmodel_nobatch_float32_float32_float32"
+      model_version: 1
+      input_map [
+        {
+          key : "INPUT0"
+          value : "INPUT0"
+        },
+        {
+          key : "INPUT1"
+          value : "INPUT1"
+        }
+      ]
+      output_map [
+        {
+          key : "OUTPUT0"
+          value : "out0"
+        },
+        {
+          key : "OUTPUT1"
+          value : "out1"
+        }
+      ]
+    },
+    {
+      model_name: "savedmodel_nobatch_float32_float32_float32"
+      model_version: -1
+      input_map [
+        {
+          key : "INPUT0"
+          value : "out0"
+        },
+        {
+          key : "INPUT1"
+          value : "out1"
+        }
+      ]
+      output_map [
+        {
+          key : "OUTPUT0"
+          value : "OUTPUT0"
+        },
+        {
+          key : "OUTPUT1"
+          value : "OUTPUT1"
+        }
+      ]
+    }
+  ]
+}
diff --git a/qa/L0_config_json/max_priority_level.pbtxt b/qa/L0_config_json/max_priority_level.pbtxt
new file mode 100644
index 0000000000..f71f08d236
--- /dev/null
+++ b/qa/L0_config_json/max_priority_level.pbtxt
@@ -0,0 +1,62 @@
+# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "max_priority_level"
+backend: "identity"
+max_batch_size: 1
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+
+dynamic_batching:
+{
+    # Max uint64
+    priority_levels: 18446744073709551615
+    # Max uint32
+    default_priority_level: 4294967295
+    # Max uint32 + 1
+    priority_queue_policy: [
+       {key: 4294967296
+        value: {
+          timeout_action: REJECT
+	  default_timeout_microseconds: 18446744073709551615
+	  allow_timeout_override: true
+	  max_queue_size: 10
+       }
+    }
+]
+}
\ No newline at end of file
diff --git a/qa/L0_config_json/test.sh b/qa/L0_config_json/test.sh
new file mode 100755
index 0000000000..b1016b806b
--- /dev/null
+++ b/qa/L0_config_json/test.sh
@@ -0,0 +1,428 @@
+#!/bin/bash
+# Copyright (c) 2020-2023, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+DATADIR="/data/inferenceserver/${REPO_VERSION}"
+CLIENT_LOG="./client.log"
+SERVER_LOG="./inference_server.log"
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=`pwd`/models"
+source ../common/util.sh
+
+RET=0
+rm -fr *.log
+
+rm -fr models && mkdir models
+cp -r $DATADIR/qa_model_repository/savedmodel_nobatch_float32_float32_float32 models/.
+
+# Test input and output dims are shown as numbers
+TRIAL=ios
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+code=`curl -s -w %{http_code} -o ./$TRIAL.out localhost:8000/v2/models/savedmodel_nobatch_float32_float32_float32/config`
+set -e
+if [ "$code" != "200" ]; then
+    cat $TRIAL.out
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+matches=`grep -o "\"dims\":\[16\]" $TRIAL.out | wc -l`
+if [ $matches -ne 4 ]; then
+    cat $TRIAL.out
+    echo -e "\n***\n*** Expected 4 dims, got $matches\n***"
+    RET=1
+fi
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Test input and output reshape are shown as numbers
+TRIAL=reshape
+
+rm -fr models && mkdir models
+cp -r $DATADIR/qa_model_repository/savedmodel_nobatch_float32_float32_float32 models/.
+(cd models/savedmodel_nobatch_float32_float32_float32 && \
+     sed -i "s/data_type:.*TYPE_FP32/data_type: TYPE_FP32\nreshape: { shape: [ 16 ]}/g" config.pbtxt)
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+code=`curl -s -w %{http_code} -o ./$TRIAL.out localhost:8000/v2/models/savedmodel_nobatch_float32_float32_float32/config`
+set -e
+if [ "$code" != "200" ]; then
+    cat $TRIAL.out
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+matches=`grep -o "\"reshape\":{\"shape\":\[16\]}" $TRIAL.out | wc -l`
+if [ $matches -ne 4 ]; then
+    cat $TRIAL.out
+    echo -e "\n***\n*** Expected 4 reshape:shape, got $matches\n***"
+    RET=1
+fi
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Test version_policy::specific
+TRIAL=specific
+
+rm -fr models && mkdir models
+cp -r $DATADIR/qa_model_repository/savedmodel_nobatch_float32_float32_float32 models/.
+(cd models/savedmodel_nobatch_float32_float32_float32 && \
+    sed -i "s/^version_policy:.*/version_policy: { specific: { versions: [1] }}/" config.pbtxt)
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+code=`curl -s -w %{http_code} -o ./$TRIAL.out localhost:8000/v2/models/savedmodel_nobatch_float32_float32_float32/config`
+set -e
+if [ "$code" != "200" ]; then
+    cat $TRIAL.out
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+matches=`grep -o "\"version_policy\":{\"specific\":{\"versions\":\[1\]}}" $TRIAL.out | wc -l`
+if [ $matches -ne 1 ]; then
+    cat $TRIAL.out
+    echo -e "\n***\n*** Expected 1 version_policy:specific:versions, got $matches\n***"
+    RET=1
+fi
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Test dynamic_batching::max_queue_delay_microseconds,
+# dynamic_batching::default_queue_policy::default_timeout_microseconds,
+# dynamic_batching::priority_queue_policy::value::default_timeout_microseconds
+TRIAL=dbatch
+
+rm -fr models && mkdir models
+cp -r $DATADIR/qa_model_repository/savedmodel_nobatch_float32_float32_float32 models/.
+(cd models/savedmodel_nobatch_float32_float32_float32 && \
+     echo "dynamic_batching: { max_queue_delay_microseconds: 42 \
+          default_queue_policy: { default_timeout_microseconds: 123 } \
+          priority_queue_policy: { key: 1  value: { default_timeout_microseconds: 123 }} \
+          priority_queue_policy: { key: 2  value: { default_timeout_microseconds: 123 }}}" >> config.pbtxt)
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+code=`curl -s -w %{http_code} -o ./$TRIAL.out localhost:8000/v2/models/savedmodel_nobatch_float32_float32_float32/config`
+set -e
+if [ "$code" != "200" ]; then
+    cat $TRIAL.out
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+matches=`grep -o "\"dynamic_batching\":{" $TRIAL.out | wc -l`
+if [ $matches -ne 1 ]; then
+    cat $TRIAL.out
+    echo -e "\n***\n*** Expected 1 dynamic_batching, got $matches\n***"
+    RET=1
+fi
+
+matches=`grep -o "\"max_queue_delay_microseconds\":42" $TRIAL.out | wc -l`
+if [ $matches -ne 1 ]; then
+    cat $TRIAL.out
+    echo -e "\n***\n*** Expected 1 dynamic_batching:max_queue_delay_microseconds, got $matches\n***"
+    RET=1
+fi
+
+matches=`grep -o "\"default_timeout_microseconds\":123" $TRIAL.out | wc -l`
+if [ $matches -ne 3 ]; then
+    cat $TRIAL.out
+    echo -e "\n***\n*** Expected 3 dynamic_batching:*_queue_policy:default_timeout_microseconds, got $matches\n***"
+    RET=1
+fi
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Test sequence_batching::oldest::max_queue_delay_microseconds,
+# sequence_batching::max_sequence_idle_microseconds
+TRIAL=sbatch
+
+rm -fr models && mkdir models
+cp -r $DATADIR/qa_model_repository/savedmodel_nobatch_float32_float32_float32 models/.
+(cd models/savedmodel_nobatch_float32_float32_float32 && \
+     echo "sequence_batching: { max_sequence_idle_microseconds: 42 \
+          oldest: { max_queue_delay_microseconds: 987 }}" >> config.pbtxt)
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+code=`curl -s -w %{http_code} -o ./$TRIAL.out localhost:8000/v2/models/savedmodel_nobatch_float32_float32_float32/config`
+set -e
+if [ "$code" != "200" ]; then
+    cat $TRIAL.out
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+matches=`grep -o "\"sequence_batching\":{" $TRIAL.out | wc -l`
+if [ $matches -ne 1 ]; then
+    cat $TRIAL.out
+    echo -e "\n***\n*** Expected 1 sequence_batching, got $matches\n***"
+    RET=1
+fi
+
+matches=`grep -o "\"max_sequence_idle_microseconds\":42" $TRIAL.out | wc -l`
+if [ $matches -ne 1 ]; then
+    cat $TRIAL.out
+    echo -e "\n***\n*** Expected 1 sequence_batching:max_sequence_idle_microseconds, got $matches\n***"
+    RET=1
+fi
+
+matches=`grep -o "\"oldest\":{" $TRIAL.out | wc -l`
+if [ $matches -ne 1 ]; then
+    cat $TRIAL.out
+    echo -e "\n***\n*** Expected 1 sequence_batching:oldest, got $matches\n***"
+    RET=1
+fi
+
+matches=`grep -o "\"max_queue_delay_microseconds\":987" $TRIAL.out | wc -l`
+if [ $matches -ne 1 ]; then
+    cat $TRIAL.out
+    echo -e "\n***\n*** Expected 1 sequence_batching:oldest:max_queue_delay_microseconds, got $matches\n***"
+    RET=1
+fi
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Test ensemble_scheduling::step::model_version
+TRIAL=ensemble
+
+rm -fr models && mkdir models
+cp -r $DATADIR/qa_model_repository/savedmodel_nobatch_float32_float32_float32 models/.
+mkdir -p models/simple_ensemble/1 && cp ensemble_config.pbtxt models/simple_ensemble/config.pbtxt
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+code=`curl -s -w %{http_code} -o ./$TRIAL.out localhost:8000/v2/models/simple_ensemble/config`
+set -e
+if [ "$code" != "200" ]; then
+    cat $TRIAL.out
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+matches=`grep -o "\"model_version\":1" $TRIAL.out | wc -l`
+if [ $matches -ne 1 ]; then
+    cat $TRIAL.out
+    echo -e "\n***\n*** Expected 1 ensemble_scheduling:step:model_version == 1, got $matches\n***"
+    RET=1
+fi
+
+matches=`grep -o "\"model_version\":-1" $TRIAL.out | wc -l`
+if [ $matches -ne 1 ]; then
+    cat $TRIAL.out
+    echo -e "\n***\n*** Expected 1 ensemble_scheduling:step:model_version == -1, got $matches\n***"
+    RET=1
+fi
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+rm -fr models/simple_ensemble
+
+# Test model_warmup::inputs::value::dims
+TRIAL=warmup
+
+rm -fr models && mkdir models
+cp -r $DATADIR/qa_model_repository/savedmodel_nobatch_float32_float32_float32 models/.
+(cd models/savedmodel_nobatch_float32_float32_float32 && \
+     echo "model_warmup [{" >> config.pbtxt && \
+     echo "    name : \"warmup 1\"" >> config.pbtxt && \
+     echo "    batch_size: 1" >> config.pbtxt && \
+     echo "    inputs [{" >> config.pbtxt && \
+     echo "        key: \"INPUT0\"" >> config.pbtxt && \
+     echo "        value: {" >> config.pbtxt && \
+     echo "            data_type: TYPE_FP32" >> config.pbtxt && \
+     echo "            dims: 16" >> config.pbtxt && \
+     echo "            zero_data: true" >> config.pbtxt && \
+     echo "        }" >> config.pbtxt && \
+     echo "    }, {" >> config.pbtxt && \
+     echo "        key: \"INPUT1\"" >> config.pbtxt && \
+     echo "        value: {" >> config.pbtxt && \
+     echo "            data_type: TYPE_FP32" >> config.pbtxt && \
+     echo "            dims: 16" >> config.pbtxt && \
+     echo "            random_data: true" >> config.pbtxt && \
+     echo "        }" >> config.pbtxt && \
+     echo "    }]" >> config.pbtxt && \
+     echo "  }, {" >> config.pbtxt && \
+     echo "    name : \"warmup 2\"" >> config.pbtxt && \
+     echo "    batch_size: 1" >> config.pbtxt && \
+     echo "    inputs [{" >> config.pbtxt && \
+     echo "        key: \"INPUT0\"" >> config.pbtxt && \
+     echo "        value: {" >> config.pbtxt && \
+     echo "            data_type: TYPE_FP32" >> config.pbtxt && \
+     echo "            dims: 16" >> config.pbtxt && \
+     echo "            zero_data: true" >> config.pbtxt && \
+     echo "        }" >> config.pbtxt && \
+     echo "    }, {" >> config.pbtxt && \
+     echo "        key: \"INPUT1\"" >> config.pbtxt && \
+     echo "        value: {" >> config.pbtxt && \
+     echo "            data_type: TYPE_FP32" >> config.pbtxt && \
+     echo "            dims: 16" >> config.pbtxt && \
+     echo "            random_data: true" >> config.pbtxt && \
+     echo "        }" >> config.pbtxt && \
+     echo "    }]" >> config.pbtxt && \
+     echo "  }]" >> config.pbtxt)
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+code=`curl -s -w %{http_code} -o ./$TRIAL.out localhost:8000/v2/models/savedmodel_nobatch_float32_float32_float32/config`
+set -e
+if [ "$code" != "200" ]; then
+    cat $TRIAL.out
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+matches=`grep -o "\"dims\":\[16\]" $TRIAL.out | wc -l`
+if [ $matches -ne 8 ]; then
+    cat $TRIAL.out
+    echo -e "\n***\n*** Expected 8 model_warmup:inputs:dims, got $matches\n***"
+    RET=1
+fi
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Test max_priority_level
+TRIAL=max_priority_level
+
+rm -fr models && mkdir models
+mkdir -p models/max_priority_level/1 && cp max_priority_level.pbtxt models/max_priority_level/config.pbtxt
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+code=`curl -s -w %{http_code} -o ./$TRIAL.out localhost:8000/v2/models/max_priority_level/config`
+set -e
+if [ "$code" != "200" ]; then
+    cat $TRIAL.out
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+declare -A expected_values
+
+MAX_UINT64=18446744073709551615
+MAX_UINT32=4294967295
+MAX_UINT32_PLUS_1=4294967296
+
+expected_values["priority_levels"]=$MAX_UINT64
+expected_values["default_priority_level"]=$MAX_UINT32
+expected_values[$MAX_UINT32_PLUS_1]=\{\"timeout_action\":\"REJECT\",\"default_timeout_microseconds\":18446744073709551615,\"allow_timeout_override\":true,\"max_queue_size\":10\}
+expected_values["default_timeout_microseconds"]=$MAX_UINT64
+
+for key in "${!expected_values[@]}"; do
+    value=${expected_values[$key]}
+    matches=`grep -o "\"$key\":$value" $TRIAL.out | wc -l`
+    if [ $matches -ne 1 ]; then
+	cat $TRIAL.out
+	echo -e "\n***\n*** Expected 1 $key == $value, got $matches\n***"
+	RET=1
+    fi
+done
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test Failed\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_cuda_graph/test.sh b/qa/L0_cuda_graph/test.sh
new file mode 100755
index 0000000000..9388dba77d
--- /dev/null
+++ b/qa/L0_cuda_graph/test.sh
@@ -0,0 +1,344 @@
+#!/bin/bash
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+CLIENT_LOG="./client.log"
+TRT_CUDA_GRAPH_TEST=trt_cuda_graph_test.py
+TEST_RESULT_FILE='test_results.txt'
+DATADIR="./models"
+
+rm -rf ${DATADIR}
+mkdir -p ${DATADIR}
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--log-verbose=1 --model-repository=$DATADIR --strict-model-config=true"
+SERVER_LOG="./inference_server.log"
+source ../common/util.sh
+
+rm -f *.log*
+
+RET=0
+
+# TrtCudaGraphTest.test_fixed_shape
+rm -rf ${DATADIR} && mkdir -p ${DATADIR}
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/plan_float32_float32_float32 ${DATADIR}/
+# Make sure only one version is present
+rm -rf ${DATADIR}/plan_float32_float32_float32/3
+
+CLIENT_LOG="./fixed_shape.client.log"
+SERVER_LOG="./fixed_shape.inference_server.log"
+echo "optimization { cuda { graphs: true } }" >> ${DATADIR}/plan_float32_float32_float32/config.pbtxt
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python $TRT_CUDA_GRAPH_TEST TrtCudaGraphTest.test_fixed_shape>>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    cat $CLIENT_LOG
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+set +e
+if [ `grep -c "Context with profile default \[0\] is being executed for " $SERVER_LOG` != "1" ]; then
+    echo -e "\n***\n*** Failed. Expected only one execution without CUDA graph\n***"
+    RET=1
+fi
+
+if [ `grep -c "captured CUDA graph for" $SERVER_LOG` != "6" ]; then
+    echo -e "\n***\n*** Failed. Expected 6 CUDA graphs are captured\n***"
+    RET=1
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# TrtCudaGraphTest.test_dynamic_shape
+# plan_float32_float32_float32 models with dynamic shapes has 6 profiles
+# min, opt, max, idx
+# [1, 1], [1, 16], [8, 33], 0 (*)
+# [1, 1], [2, 16], [7, 32], 1
+# [1, 1], [3, 16], [6, 32], 2
+# [1, 1], [4, 16], [5, 32], 3
+# [5, 1], [6, 16], [8, 32], 4 (*)
+# [6, 1], [6, 16], [8, 32], 5 (*)
+# [1, 1], [1, 16], [8, 32], 6
+rm -rf ${DATADIR} && mkdir -p ${DATADIR}
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/plan_float32_float32_float32 ${DATADIR}/
+
+SERVER_ARGS="--log-verbose=1 --model-repository=$DATADIR --strict-model-config=true"
+CLIENT_LOG="./dynamic_shape.client.log"
+SERVER_LOG="./dynamic_shape.inference_server.log"
+sed -i "s/profile:.*/profile: [\"0\"]/" ${DATADIR}/plan_float32_float32_float32/config.pbtxt
+echo "optimization { cuda { graphs: true } }" >> ${DATADIR}/plan_float32_float32_float32/config.pbtxt
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python $TRT_CUDA_GRAPH_TEST TrtCudaGraphTest.test_dynamic_shape>>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    cat $CLIENT_LOG
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+set +e
+if [ `grep -c "Context with profile 0 \[0\] is being executed for " $SERVER_LOG` != "2" ]; then
+    echo -e "\n***\n*** Failed. Expected 2 execution without CUDA graph\n***"
+    RET=1
+fi
+
+if [ `grep -c "captured CUDA graph for" $SERVER_LOG` != "6" ]; then
+    echo -e "\n***\n*** Failed. Expected 6 CUDA graphs are captured\n***"
+    RET=1
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# TrtCudaGraphTest.test_range_fixed_shape
+rm -rf ${DATADIR} && mkdir -p ${DATADIR}
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/plan_float32_float32_float32 ${DATADIR}/
+# Make sure only one version is present
+rm -rf ${DATADIR}/plan_float32_float32_float32/3
+
+SERVER_ARGS="--log-verbose=1 --model-repository=$DATADIR"
+CLIENT_LOG="./range_fixed_shape.client.log"
+SERVER_LOG="./range_fixed_shape.inference_server.log"
+echo "optimization { \
+    cuda { \
+        graphs: true \
+        graph_spec [ { \
+            batch_size: 4 \
+            graph_lower_bound { \
+                batch_size: 2 \
+            } \
+} ] } }" >> ${DATADIR}/plan_float32_float32_float32/config.pbtxt
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python $TRT_CUDA_GRAPH_TEST TrtCudaGraphTest.test_range_fixed_shape>>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    cat $CLIENT_LOG
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+set +e
+if [ `grep -c "Context with profile default \[0\] is being executed for " $SERVER_LOG` != "2" ]; then
+    echo -e "\n***\n*** Failed. Expected only 2 execution without CUDA graph\n***"
+    RET=1
+fi
+
+if [ `grep -c "captured CUDA graph for" $SERVER_LOG` != "1" ]; then
+    echo -e "\n***\n*** Failed. Expected 1 CUDA graphs are captured\n***"
+    RET=1
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# TrtCudaGraphTest.test_range_dynamic_shape
+# plan_float32_float32_float32 models with dynamic shapes has 6 profiles
+# min, opt, max, idx
+# [1, 1], [1, 16], [8, 33], 0 (*)
+# [1, 1], [2, 16], [7, 32], 1
+# [1, 1], [3, 16], [6, 32], 2
+# [1, 1], [4, 16], [5, 32], 3
+# [5, 1], [6, 16], [8, 32], 4 (*)
+# [6, 1], [6, 16], [8, 32], 5 (*)
+# [1, 1], [1, 16], [8, 32], 6
+rm -rf ${DATADIR} && mkdir -p ${DATADIR}
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/plan_float32_float32_float32 ${DATADIR}/
+
+CLIENT_LOG="./range_dynamic_shape.client.log"
+SERVER_LOG="./range_dynamic_shape.inference_server.log"
+sed -i "s/profile:.*/profile: [\"0\"]/" ${DATADIR}/plan_float32_float32_float32/config.pbtxt
+echo "optimization { \
+    cuda { \
+        graphs: true \
+        graph_spec [ { \
+            batch_size: 4 \
+            input { key: \"INPUT0\" value: {dim : [16]} } \
+            input { key: \"INPUT1\" value: {dim : [16]} } \
+            graph_lower_bound { \
+                batch_size: 2 \
+                input { key: \"INPUT0\" value: {dim : [8]} } \
+                input { key: \"INPUT1\" value: {dim : [8]} } \
+            } \
+} ] } }" >> ${DATADIR}/plan_float32_float32_float32/config.pbtxt
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python $TRT_CUDA_GRAPH_TEST TrtCudaGraphTest.test_range_dynamic_shape>>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    cat $CLIENT_LOG
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+set +e
+if [ `grep -c "Context with profile 0 \[0\] is being executed for " $SERVER_LOG` != "4" ]; then
+    echo -e "\n***\n*** Failed. Expected 4 execution without CUDA graph\n***"
+    RET=1
+fi
+
+if [ `grep -c "captured CUDA graph for" $SERVER_LOG` != "1" ]; then
+    echo -e "\n***\n*** Failed. Expected 1 CUDA graphs are captured\n***"
+    RET=1
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# TrtCudaGraphTest.test_nobatch_fixed_shape
+rm -rf ${DATADIR} && mkdir -p ${DATADIR}
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/plan_nobatch_float32_float32_float32 ${DATADIR}/
+# Make sure only one version is present
+rm -rf ${DATADIR}/plan_nobatch_float32_float32_float32/2 ${DATADIR}/plan_nobatch_float32_float32_float32/3
+
+CLIENT_LOG="./nobatch_fixed_shape.client.log"
+SERVER_LOG="./nobatch_fixed_shape.inference_server.log"
+echo "optimization { cuda { graphs: true } }" >> ${DATADIR}/plan_nobatch_float32_float32_float32/config.pbtxt
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python $TRT_CUDA_GRAPH_TEST TrtCudaGraphTest.test_nobatch_fixed_shape plan_nobatch>>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    cat $CLIENT_LOG
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+set +e
+if [ `grep -c "Context with profile default \[0\] is launching CUDA graph " $SERVER_LOG` != "1" ]; then
+    echo -e "\n***\n*** Failed. Expected only one execution with CUDA graph\n***"
+    RET=1
+fi
+
+if [ `grep -c "captured CUDA graph for" $SERVER_LOG` != "1" ]; then
+    echo -e "\n***\n*** Failed. Expected 1 CUDA graph to be captured\n***"
+    RET=1
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+else
+  echo -e "\n***\n*** Test Failed\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_cuda_graph/trt_cuda_graph_test.py b/qa/L0_cuda_graph/trt_cuda_graph_test.py
new file mode 100755
index 0000000000..a7f9f3be98
--- /dev/null
+++ b/qa/L0_cuda_graph/trt_cuda_graph_test.py
@@ -0,0 +1,163 @@
+#!/usr/bin/env python3
+
+# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import unittest
+
+import infer_util as iu
+import numpy as np
+import test_util as tu
+from tritonclientutils import *
+
+
+class TrtCudaGraphTest(tu.TestResultCollector):
+    MODELNAME = "plan"
+
+    def setUp(self):
+        self.dtype_ = np.float32
+        self.dtype_str_ = "FP32"
+        self.model_name_ = self.MODELNAME
+
+    def _check_infer(self, tensor_shape, batch_size=1):
+        try:
+            if batch_size:
+                full_shape = (batch_size,) + tensor_shape
+            else:
+                full_shape = tensor_shape
+            iu.infer_exact(
+                self,
+                self.model_name_,
+                full_shape,
+                batch_size,
+                self.dtype_,
+                self.dtype_,
+                self.dtype_,
+                model_version=1,
+                use_http_json_tensors=False,
+                use_grpc=False,
+                use_streaming=False,
+            )
+        except InferenceServerException as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def _erroneous_infer(self, tensor_shape, batch_size):
+        import tritonhttpclient
+
+        item_size = batch_size
+        for dim in tensor_shape:
+            item_size *= dim
+        full_shape = (batch_size,) + tensor_shape
+        input_np = np.arange(item_size, dtype=self.dtype_).reshape(full_shape)
+        expected_output0_np = input_np + input_np
+        expected_output1_np = input_np - input_np
+
+        inputs = []
+        inputs.append(
+            tritonhttpclient.InferInput("INPUT0", full_shape, self.dtype_str_)
+        )
+        inputs[-1].set_data_from_numpy(input_np)
+        inputs.append(
+            tritonhttpclient.InferInput("INPUT1", full_shape, self.dtype_str_)
+        )
+        inputs[-1].set_data_from_numpy(input_np)
+        outputs = []
+        outputs.append(
+            tritonhttpclient.InferRequestedOutput("OUTPUT0", binary_data=True)
+        )
+        outputs.append(
+            tritonhttpclient.InferRequestedOutput("OUTPUT1", binary_data=True)
+        )
+
+        model_name = tu.get_model_name(
+            self.model_name_, self.dtype_, self.dtype_, self.dtype_
+        )
+        results = tritonhttpclient.InferenceServerClient(
+            "localhost:8000", verbose=True
+        ).infer(model_name=model_name, inputs=inputs, outputs=outputs)
+        # Validate the results by comparing with precomputed values.
+        output0_np = results.as_numpy("OUTPUT0")
+        output1_np = results.as_numpy("OUTPUT1")
+        self.assertFalse(
+            np.array_equal(output0_np, expected_output0_np),
+            "expects OUTPUT0 is not correct",
+        )
+        self.assertFalse(
+            np.array_equal(output1_np, expected_output1_np),
+            "expects OUTPUT1 is not correct",
+        )
+
+    def test_fixed_shape(self):
+        tensor_shape = (16,)
+        self._check_infer(tensor_shape)
+        # Inference that should not have CUDA graph captured
+        self._check_infer(tensor_shape, 5)
+
+    def test_dynamic_shape(self):
+        tensor_shape = (16,)
+        self._check_infer(tensor_shape)
+        # Inference that should not have CUDA graph captured
+        self._check_infer((20,))
+        self._check_infer(tensor_shape, 5)
+
+    def test_range_fixed_shape(self):
+        tensor_shape = (16,)
+        # Inferences that are in range of captured CUDA graph,
+        # model should tolerate difference in batch size
+        self._check_infer(tensor_shape, 4)
+        self._check_infer(tensor_shape, 2)
+        # Inferences that shouldn't use CUDA graph
+        self._check_infer(tensor_shape, 1)
+        self._check_infer(tensor_shape, 8)
+
+    def test_range_dynamic_shape(self):
+        # Inferences that are in range of captured CUDA graph,
+        # model should tolerate difference in batch size
+        self._check_infer((16,), 4)
+        self._check_infer((16,), 2)
+        # Inference should return dummy result
+        # because the input shape is different
+        self._erroneous_infer((10,), 3)
+
+        # Inferences that shouldn't use CUDA graph
+        self._check_infer((7,), 3)
+        self._check_infer((16,), 1)
+        self._check_infer((16,), 8)
+        self._check_infer((30,), 4)
+
+    def test_nobatch_fixed_shape(self):
+        self._check_infer((16,), 0)
+
+
+if __name__ == "__main__":
+    if len(sys.argv) > 2:
+        TrtCudaGraphTest.MODELNAME = sys.argv.pop()
+
+    unittest.main()
diff --git a/qa/L0_cuda_shared_memory/cuda_shared_memory_test.py b/qa/L0_cuda_shared_memory/cuda_shared_memory_test.py
new file mode 100755
index 0000000000..87fb7c1d3c
--- /dev/null
+++ b/qa/L0_cuda_shared_memory/cuda_shared_memory_test.py
@@ -0,0 +1,334 @@
+#!/usr/bin/env python3
+
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import os
+import unittest
+
+import numpy as np
+import test_util as tu
+import tritongrpcclient as grpcclient
+import tritonhttpclient as httpclient
+import tritonshmutils.cuda_shared_memory as cshm
+from tritonclientutils import *
+
+
+class CudaSharedMemoryTest(tu.TestResultCollector):
+    def test_invalid_create_shm(self):
+        # Raises error since tried to create invalid cuda shared memory region
+        try:
+            shm_op0_handle = cshm.create_shared_memory_region("dummy_data", -1, 0)
+            cshm.destroy_shared_memory_region(shm_op0_handle)
+        except Exception as ex:
+            self.assertEqual(str(ex), "unable to create cuda shared memory handle")
+
+    def test_valid_create_set_register(self):
+        # Create a valid cuda shared memory region, fill data in it and register
+        if _protocol == "http":
+            triton_client = httpclient.InferenceServerClient(_url, verbose=True)
+        else:
+            triton_client = grpcclient.InferenceServerClient(_url, verbose=True)
+        shm_op0_handle = cshm.create_shared_memory_region("dummy_data", 8, 0)
+        cshm.set_shared_memory_region(
+            shm_op0_handle, [np.array([1, 2], dtype=np.float32)]
+        )
+        triton_client.register_cuda_shared_memory(
+            "dummy_data", cshm.get_raw_handle(shm_op0_handle), 0, 8
+        )
+        shm_status = triton_client.get_cuda_shared_memory_status()
+        if _protocol == "http":
+            self.assertEqual(len(shm_status), 1)
+        else:
+            self.assertEqual(len(shm_status.regions), 1)
+        cshm.destroy_shared_memory_region(shm_op0_handle)
+
+    def test_unregister_before_register(self):
+        # Create a valid cuda shared memory region and unregister before register
+        if _protocol == "http":
+            triton_client = httpclient.InferenceServerClient(_url, verbose=True)
+        else:
+            triton_client = grpcclient.InferenceServerClient(_url, verbose=True)
+        shm_op0_handle = cshm.create_shared_memory_region("dummy_data", 8, 0)
+        triton_client.unregister_cuda_shared_memory("dummy_data")
+        shm_status = triton_client.get_cuda_shared_memory_status()
+        if _protocol == "http":
+            self.assertEqual(len(shm_status), 0)
+        else:
+            self.assertEqual(len(shm_status.regions), 0)
+        cshm.destroy_shared_memory_region(shm_op0_handle)
+
+    def test_unregister_after_register(self):
+        # Create a valid cuda shared memory region and unregister after register
+        if _protocol == "http":
+            triton_client = httpclient.InferenceServerClient(_url, verbose=True)
+        else:
+            triton_client = grpcclient.InferenceServerClient(_url, verbose=True)
+        shm_op0_handle = cshm.create_shared_memory_region("dummy_data", 8, 0)
+        triton_client.register_cuda_shared_memory(
+            "dummy_data", cshm.get_raw_handle(shm_op0_handle), 0, 8
+        )
+        triton_client.unregister_cuda_shared_memory("dummy_data")
+        shm_status = triton_client.get_cuda_shared_memory_status()
+        if _protocol == "http":
+            self.assertEqual(len(shm_status), 0)
+        else:
+            self.assertEqual(len(shm_status.regions), 0)
+        cshm.destroy_shared_memory_region(shm_op0_handle)
+
+    def test_reregister_after_register(self):
+        # Create a valid cuda shared memory region and unregister after register
+        if _protocol == "http":
+            triton_client = httpclient.InferenceServerClient(_url, verbose=True)
+        else:
+            triton_client = grpcclient.InferenceServerClient(_url, verbose=True)
+        shm_op0_handle = cshm.create_shared_memory_region("dummy_data", 8, 0)
+        triton_client.register_cuda_shared_memory(
+            "dummy_data", cshm.get_raw_handle(shm_op0_handle), 0, 8
+        )
+        try:
+            triton_client.register_cuda_shared_memory(
+                "dummy_data", cshm.get_raw_handle(shm_op0_handle), 0, 8
+            )
+        except Exception as ex:
+            self.assertIn(
+                "shared memory region 'dummy_data' already in manager", str(ex)
+            )
+        shm_status = triton_client.get_cuda_shared_memory_status()
+        if _protocol == "http":
+            self.assertEqual(len(shm_status), 1)
+        else:
+            self.assertEqual(len(shm_status.regions), 1)
+        cshm.destroy_shared_memory_region(shm_op0_handle)
+
+    def _configure_sever(self):
+        shm_ip0_handle = cshm.create_shared_memory_region("input0_data", 64, 0)
+        shm_ip1_handle = cshm.create_shared_memory_region("input1_data", 64, 0)
+        shm_op0_handle = cshm.create_shared_memory_region("output0_data", 64, 0)
+        shm_op1_handle = cshm.create_shared_memory_region("output1_data", 64, 0)
+
+        input0_data = np.arange(start=0, stop=16, dtype=np.int32)
+        input1_data = np.ones(shape=16, dtype=np.int32)
+        cshm.set_shared_memory_region(shm_ip0_handle, [input0_data])
+        cshm.set_shared_memory_region(shm_ip1_handle, [input1_data])
+        if _protocol == "http":
+            triton_client = httpclient.InferenceServerClient(_url, verbose=True)
+        else:
+            triton_client = grpcclient.InferenceServerClient(_url, verbose=True)
+        triton_client.register_cuda_shared_memory(
+            "input0_data", cshm.get_raw_handle(shm_ip0_handle), 0, 64
+        )
+        triton_client.register_cuda_shared_memory(
+            "input1_data", cshm.get_raw_handle(shm_ip1_handle), 0, 64
+        )
+        triton_client.register_cuda_shared_memory(
+            "output0_data", cshm.get_raw_handle(shm_op0_handle), 0, 64
+        )
+        triton_client.register_cuda_shared_memory(
+            "output1_data", cshm.get_raw_handle(shm_op1_handle), 0, 64
+        )
+        return [shm_ip0_handle, shm_ip1_handle, shm_op0_handle, shm_op1_handle]
+
+    def _cleanup_server(self, shm_handles):
+        for shm_handle in shm_handles:
+            cshm.destroy_shared_memory_region(shm_handle)
+
+    def _basic_inference(
+        self,
+        shm_ip0_handle,
+        shm_ip1_handle,
+        shm_op0_handle,
+        shm_op1_handle,
+        error_msg,
+        big_shm_name="",
+        big_shm_size=64,
+    ):
+        input0_data = np.arange(start=0, stop=16, dtype=np.int32)
+        input1_data = np.ones(shape=16, dtype=np.int32)
+        inputs = []
+        outputs = []
+        if _protocol == "http":
+            triton_client = httpclient.InferenceServerClient(_url, verbose=True)
+            inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
+            inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))
+            outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=True))
+            outputs.append(
+                httpclient.InferRequestedOutput("OUTPUT1", binary_data=False)
+            )
+        else:
+            triton_client = grpcclient.InferenceServerClient(_url, verbose=True)
+            inputs.append(grpcclient.InferInput("INPUT0", [1, 16], "INT32"))
+            inputs.append(grpcclient.InferInput("INPUT1", [1, 16], "INT32"))
+            outputs.append(grpcclient.InferRequestedOutput("OUTPUT0"))
+            outputs.append(grpcclient.InferRequestedOutput("OUTPUT1"))
+        inputs[0].set_shared_memory("input0_data", 64)
+        if type(shm_ip1_handle) == np.array:
+            inputs[1].set_data_from_numpy(input0_data, binary_data=True)
+        elif big_shm_name != "":
+            inputs[1].set_shared_memory(big_shm_name, big_shm_size)
+        else:
+            inputs[1].set_shared_memory("input1_data", 64)
+        outputs[0].set_shared_memory("output0_data", 64)
+        outputs[1].set_shared_memory("output1_data", 64)
+
+        try:
+            results = triton_client.infer(
+                "simple", inputs, model_version="", outputs=outputs
+            )
+            output = results.get_output("OUTPUT0")
+            if _protocol == "http":
+                output_datatype = output["datatype"]
+                output_shape = output["shape"]
+            else:
+                output_datatype = output.datatype
+                output_shape = output.shape
+            output_dtype = triton_to_np_dtype(output_datatype)
+            output_data = cshm.get_contents_as_numpy(
+                shm_op0_handle, output_dtype, output_shape
+            )
+            self.assertTrue((output_data[0] == (input0_data + input1_data)).all())
+        except Exception as ex:
+            error_msg.append(str(ex))
+
+    def test_unregister_after_inference(self):
+        # Unregister after inference
+        error_msg = []
+        shm_handles = self._configure_sever()
+        self._basic_inference(
+            shm_handles[0], shm_handles[1], shm_handles[2], shm_handles[3], error_msg
+        )
+        if len(error_msg) > 0:
+            raise Exception(str(error_msg))
+        if _protocol == "http":
+            triton_client = httpclient.InferenceServerClient(_url, verbose=True)
+        else:
+            triton_client = grpcclient.InferenceServerClient(_url, verbose=True)
+        triton_client.unregister_cuda_shared_memory("output0_data")
+        shm_status = triton_client.get_cuda_shared_memory_status()
+        if _protocol == "http":
+            self.assertEqual(len(shm_status), 3)
+        else:
+            self.assertEqual(len(shm_status.regions), 3)
+        self._cleanup_server(shm_handles)
+
+    def test_register_after_inference(self):
+        # Register after inference
+        error_msg = []
+        shm_handles = self._configure_sever()
+        if _protocol == "http":
+            triton_client = httpclient.InferenceServerClient(_url, verbose=True)
+        else:
+            triton_client = grpcclient.InferenceServerClient(_url, verbose=True)
+        self._basic_inference(
+            shm_handles[0], shm_handles[1], shm_handles[2], shm_handles[3], error_msg
+        )
+        if len(error_msg) > 0:
+            raise Exception(str(error_msg))
+        shm_ip2_handle = cshm.create_shared_memory_region("input2_data", 64, 0)
+        triton_client.register_cuda_shared_memory(
+            "input2_data", cshm.get_raw_handle(shm_ip2_handle), 0, 64
+        )
+        shm_status = triton_client.get_cuda_shared_memory_status()
+        if _protocol == "http":
+            self.assertEqual(len(shm_status), 5)
+        else:
+            self.assertEqual(len(shm_status.regions), 5)
+        shm_handles.append(shm_ip2_handle)
+        self._cleanup_server(shm_handles)
+
+    def test_too_big_shm(self):
+        # Shared memory input region larger than needed - Throws error
+        error_msg = []
+        shm_handles = self._configure_sever()
+        shm_ip2_handle = cshm.create_shared_memory_region("input2_data", 128, 0)
+        if _protocol == "http":
+            triton_client = httpclient.InferenceServerClient(_url, verbose=True)
+        else:
+            triton_client = grpcclient.InferenceServerClient(_url, verbose=True)
+        triton_client.register_cuda_shared_memory(
+            "input2_data", cshm.get_raw_handle(shm_ip2_handle), 0, 128
+        )
+        self._basic_inference(
+            shm_handles[0],
+            shm_ip2_handle,
+            shm_handles[2],
+            shm_handles[3],
+            error_msg,
+            "input2_data",
+            128,
+        )
+        if len(error_msg) > 0:
+            self.assertIn(
+                "unexpected total byte size 128 for input 'INPUT1', expecting 64",
+                error_msg[-1],
+            )
+        shm_handles.append(shm_ip2_handle)
+        self._cleanup_server(shm_handles)
+
+    def test_mixed_raw_shm(self):
+        # Mix of shared memory and RAW inputs
+        error_msg = []
+        shm_handles = self._configure_sever()
+        input1_data = np.ones(shape=16, dtype=np.int32)
+        self._basic_inference(
+            shm_handles[0], [input1_data], shm_handles[2], shm_handles[3], error_msg
+        )
+        if len(error_msg) > 0:
+            raise Exception(error_msg[-1])
+        self._cleanup_server(shm_handles)
+
+    def test_unregisterall(self):
+        # Unregister all shared memory blocks
+        shm_handles = self._configure_sever()
+        if _protocol == "http":
+            triton_client = httpclient.InferenceServerClient(_url, verbose=True)
+        else:
+            triton_client = grpcclient.InferenceServerClient(_url, verbose=True)
+        status_before = triton_client.get_cuda_shared_memory_status()
+        if _protocol == "http":
+            self.assertEqual(len(status_before), 4)
+        else:
+            self.assertEqual(len(status_before.regions), 4)
+        triton_client.unregister_cuda_shared_memory()
+        status_after = triton_client.get_cuda_shared_memory_status()
+        if _protocol == "http":
+            self.assertEqual(len(status_after), 0)
+        else:
+            self.assertEqual(len(status_after.regions), 0)
+        self._cleanup_server(shm_handles)
+
+
+if __name__ == "__main__":
+    _protocol = os.environ.get("CLIENT_TYPE", "http")
+    if _protocol == "http":
+        _url = "localhost:8000"
+    else:
+        _url = "localhost:8001"
+    unittest.main()
diff --git a/qa/L0_cuda_shared_memory/test.sh b/qa/L0_cuda_shared_memory/test.sh
new file mode 100755
index 0000000000..b011244174
--- /dev/null
+++ b/qa/L0_cuda_shared_memory/test.sh
@@ -0,0 +1,91 @@
+#!/bin/bash
+# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+export CUDA_VISIBLE_DEVICES=0
+
+CLIENT_LOG="./client.log"
+SHM_TEST=cuda_shared_memory_test.py
+
+TEST_RESULT_FILE='test_results.txt'
+SERVER=/opt/tritonserver/bin/tritonserver
+source ../common/util.sh
+
+RET=0
+rm -fr *.log
+
+for i in \
+        test_invalid_create_shm \
+        test_valid_create_set_register \
+        test_unregister_before_register \
+        test_unregister_after_register \
+        test_reregister_after_register \
+        test_unregister_after_inference \
+        test_register_after_inference \
+        test_too_big_shm \
+        test_mixed_raw_shm \
+        test_unregisterall; do
+    for client_type in http grpc; do
+        SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1"
+        SERVER_LOG="./$i.$client_type.server.log"
+        run_server
+        if [ "$SERVER_PID" == "0" ]; then
+            echo -e "\n***\n*** Failed to start $SERVER\n***"
+            cat $SERVER_LOG
+            exit 1
+        fi
+
+        export CLIENT_TYPE=$client_type
+        echo "Test: $i, client type: $client_type" >>$CLIENT_LOG
+
+        set +e
+        python $SHM_TEST CudaSharedMemoryTest.$i >>$CLIENT_LOG 2>&1
+        if [ $? -ne 0 ]; then
+            echo -e "\n***\n*** Test Failed\n***"
+            RET=1
+        else
+            check_test_results $TEST_RESULT_FILE 1
+            if [ $? -ne 0 ]; then
+                cat $CLIENT_LOG
+                echo -e "\n***\n*** Test Result Verification Failed\n***"
+                RET=1
+            fi
+        fi
+        set -e
+
+        kill $SERVER_PID
+        wait $SERVER_PID
+    done
+done
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_custom_ops/cuda_op_test.py b/qa/L0_custom_ops/cuda_op_test.py
new file mode 100755
index 0000000000..896ed2adf0
--- /dev/null
+++ b/qa/L0_custom_ops/cuda_op_test.py
@@ -0,0 +1,108 @@
+#!/usr/bin/python
+
+# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import sys
+from builtins import range
+
+import numpy as np
+import tritongrpcclient as grpcclient
+import tritonhttpclient as httpclient
+from tritonclientutils import np_to_triton_dtype
+
+FLAGS = None
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        required=False,
+        default=False,
+        help="Enable verbose output",
+    )
+    parser.add_argument(
+        "-u",
+        "--url",
+        type=str,
+        required=False,
+        default="localhost:8000",
+        help="Inference server URL. Default is localhost:8000.",
+    )
+    parser.add_argument(
+        "-i",
+        "--protocol",
+        type=str,
+        required=False,
+        default="http",
+        help='Protocol ("http"/"grpc") used to '
+        + 'communicate with inference service. Default is "http".',
+    )
+    parser.add_argument("-m", "--model", type=str, required=True, help="Name of model.")
+
+    FLAGS = parser.parse_args()
+    if (FLAGS.protocol != "http") and (FLAGS.protocol != "grpc"):
+        print(
+            'unexpected protocol "{}", expects "http" or "grpc"'.format(FLAGS.protocol)
+        )
+        exit(1)
+
+    client_util = httpclient if FLAGS.protocol == "http" else grpcclient
+
+    # Run the cudaop model, which depends on a custom operation that
+    # uses CUDA. The custom operator adds one to each input
+    model_name = FLAGS.model
+    elements = 8
+
+    # Create the inference context for the model.
+    client = client_util.InferenceServerClient(FLAGS.url, verbose=FLAGS.verbose)
+
+    # Create the data for one input tensor.
+    input_data = np.arange(start=42, stop=42 + elements, dtype=np.int32)
+
+    inputs = [
+        client_util.InferInput(
+            "in", input_data.shape, np_to_triton_dtype(input_data.dtype)
+        )
+    ]
+    inputs[0].set_data_from_numpy(input_data)
+
+    results = client.infer(model_name, inputs)
+    output_data = results.as_numpy("out")
+    if output_data is None:
+        print("error: expected 'out'")
+        sys.exit(1)
+
+    for i in range(elements):
+        print(
+            str(i) + ": input " + str(input_data[i]) + ", output " + str(output_data[i])
+        )
+        if output_data[i] != (input_data[i] + 1):
+            print("error: incorrect value")
+            sys.exit(1)
diff --git a/qa/L0_custom_ops/mod_op_test.py b/qa/L0_custom_ops/mod_op_test.py
new file mode 100755
index 0000000000..14855f7c40
--- /dev/null
+++ b/qa/L0_custom_ops/mod_op_test.py
@@ -0,0 +1,121 @@
+#!/usr/bin/python
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import sys
+from builtins import range
+
+import numpy as np
+import tritongrpcclient as grpcclient
+import tritonhttpclient as httpclient
+from tritonclientutils import np_to_triton_dtype
+
+FLAGS = None
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        required=False,
+        default=False,
+        help="Enable verbose output",
+    )
+    parser.add_argument(
+        "-u",
+        "--url",
+        type=str,
+        required=False,
+        default="localhost:8000",
+        help="Inference server URL. Default is localhost:8000.",
+    )
+    parser.add_argument(
+        "-i",
+        "--protocol",
+        type=str,
+        required=False,
+        default="http",
+        help='Protocol ("http"/"grpc") used to '
+        + 'communicate with inference service. Default is "http".',
+    )
+    parser.add_argument("-m", "--model", type=str, required=True, help="Name of model.")
+
+    FLAGS = parser.parse_args()
+    if (FLAGS.protocol != "http") and (FLAGS.protocol != "grpc"):
+        print(
+            'unexpected protocol "{}", expects "http" or "grpc"'.format(FLAGS.protocol)
+        )
+        exit(1)
+
+    client_util = httpclient if FLAGS.protocol == "http" else grpcclient
+
+    # Run the custom_modulo model, which depends on a custom mod operation
+    model_name = FLAGS.model
+    elements = 10
+
+    # Create the inference context for the model.
+    client = client_util.InferenceServerClient(FLAGS.url, verbose=FLAGS.verbose)
+
+    # Create the data for one input tensor.
+    input_data = []
+    input_data.append(np.arange(start=1, stop=1 + elements, dtype=np.float32))
+    input_data.append(np.array([2] * elements, dtype=np.float32))
+
+    inputs = []
+    for i in range(len(input_data)):
+        inputs.append(
+            client_util.InferInput(
+                "INPUT__{}".format(i),
+                input_data[0].shape,
+                np_to_triton_dtype(input_data[0].dtype),
+            )
+        )
+        inputs[i].set_data_from_numpy(input_data[i])
+
+    results = client.infer(model_name, inputs)
+
+    # We expect 1 result of size 10 with alternating 1 and 0.
+    output_data = results.as_numpy("OUTPUT__0")
+    if output_data is None:
+        print("error: expected 'OUTPUT__0'")
+        sys.exit(1)
+
+    for i in range(elements):
+        print(
+            str(i)
+            + ": "
+            + str(input_data[0][i])
+            + " % "
+            + str(input_data[1][i])
+            + " = "
+            + str(output_data[i])
+        )
+        if (input_data[0][i] % input_data[1][i]) != output_data[i]:
+            print("error: incorrect value")
+            sys.exit(1)
diff --git a/qa/L0_custom_ops/onnx_op_test.py b/qa/L0_custom_ops/onnx_op_test.py
new file mode 100755
index 0000000000..9b246c8e31
--- /dev/null
+++ b/qa/L0_custom_ops/onnx_op_test.py
@@ -0,0 +1,119 @@
+#!/usr/bin/python
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import sys
+from builtins import range
+
+import numpy as np
+import tritongrpcclient as grpcclient
+import tritonhttpclient as httpclient
+from tritonclientutils import np_to_triton_dtype
+
+FLAGS = None
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        required=False,
+        default=False,
+        help="Enable verbose output",
+    )
+    parser.add_argument(
+        "-u",
+        "--url",
+        type=str,
+        required=False,
+        default="localhost:8000",
+        help="Inference server URL. Default is localhost:8000.",
+    )
+    parser.add_argument(
+        "-i",
+        "--protocol",
+        type=str,
+        required=False,
+        default="http",
+        help='Protocol ("http"/"grpc") used to '
+        + 'communicate with inference service. Default is "http".',
+    )
+    parser.add_argument("-m", "--model", type=str, required=True, help="Name of model.")
+
+    FLAGS = parser.parse_args()
+    if (FLAGS.protocol != "http") and (FLAGS.protocol != "grpc"):
+        print(
+            'unexpected protocol "{}", expects "http" or "grpc"'.format(FLAGS.protocol)
+        )
+        exit(1)
+
+    client_util = httpclient if FLAGS.protocol == "http" else grpcclient
+
+    # Run the custom_modulo model, which depends on a custom mod operation
+    model_name = FLAGS.model
+    shape = (3, 5)
+    dtype = np.float32
+
+    # Create the inference context for the model.
+    client = client_util.InferenceServerClient(FLAGS.url, verbose=FLAGS.verbose)
+
+    # Create the data for one input tensor.
+    input_data = []
+    input_data.append(np.ones((3, 5), dtype=np.float32))
+    input_data.append(np.ones((3, 5), dtype=np.float32))
+
+    inputs = []
+    for i in range(len(input_data)):
+        inputs.append(
+            client_util.InferInput(
+                "input_{}".format(i + 1), shape, np_to_triton_dtype(dtype)
+            )
+        )
+        inputs[i].set_data_from_numpy(input_data[i])
+
+    results = client.infer(model_name, inputs)
+
+    # We expect 1 result of size 10 with alternating 1 and 0.
+    output_data = results.as_numpy("output")
+    if output_data is None:
+        print("error: expected 'output'")
+        sys.exit(1)
+
+    for i in range(3):
+        for j in range(5):
+            print(
+                str(input_data[0][i][j])
+                + " + "
+                + str(input_data[1][i][j])
+                + " = "
+                + str(output_data[i][j])
+            )
+            if (input_data[0][i][j] + input_data[1][i][j]) != output_data[i][j]:
+                print("error: incorrect value")
+                sys.exit(1)
diff --git a/qa/L0_custom_ops/test.sh b/qa/L0_custom_ops/test.sh
new file mode 100755
index 0000000000..a12c1d67a4
--- /dev/null
+++ b/qa/L0_custom_ops/test.sh
@@ -0,0 +1,245 @@
+#!/bin/bash
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+CLIENT_LOG="./client.log"
+ZERO_OUT_TEST=zero_out_test.py
+CUDA_OP_TEST=cuda_op_test.py
+MOD_OP_TEST=mod_op_test.py
+VISION_OP_TEST=vision_op_test.py
+ONNX_OP_TEST=onnx_op_test.py
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_LOG="./inference_server.log"
+source ../common/util.sh
+
+rm -f $SERVER_LOG $CLIENT_LOG
+
+RET=0
+
+# Must explicitly set LD_LIBRARY_PATH so that the custom operations
+# can find libtensorflow_framework.so.
+LD_LIBRARY_PATH=/opt/tritonserver/backends/tensorflow:$LD_LIBRARY_PATH
+
+# Tensorflow
+## Load operations via LD_PRELOAD
+SERVER_ARGS="--model-repository=/data/inferenceserver/${REPO_VERSION}/qa_custom_ops/tf_custom_ops"
+SERVER_LD_PRELOAD="/data/inferenceserver/${REPO_VERSION}/qa_custom_ops/tf_custom_ops/libzeroout.so:/data/inferenceserver/${REPO_VERSION}/qa_custom_ops/tf_custom_ops/libcudaop.so:/data/inferenceserver/${REPO_VERSION}/qa_custom_ops/tf_custom_ops/libbusyop.so"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+python $ZERO_OUT_TEST -v -m graphdef_zeroout >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+python $ZERO_OUT_TEST -v -m savedmodel_zeroout >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+python $CUDA_OP_TEST -v -m graphdef_cudaop >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+python $CUDA_OP_TEST -v -m savedmodel_cudaop >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+## Load operations via model config
+SERVER_ARGS="--model-repository=tf_custom_ops"
+SERVER_LD_PRELOAD=""
+
+rm -rf tf_custom_ops && \
+    mkdir -p tf_custom_ops && \
+    cp -r /data/inferenceserver/${REPO_VERSION}/qa_custom_ops/tf_custom_ops .
+
+for MODEL_TYPE in savedmodel graphdef; do
+    echo "model_operations { op_library_filename: \"tf_custom_ops/libbusyop.so\" }" >> tf_custom_ops/${MODEL_TYPE}_busyop/config.pbtxt
+    echo "model_operations { op_library_filename: \"tf_custom_ops/libcudaop.so\" }" >> tf_custom_ops/${MODEL_TYPE}_cudaop/config.pbtxt
+    echo "model_operations { op_library_filename: \"tf_custom_ops/libzeroout.so\" }" >> tf_custom_ops/${MODEL_TYPE}_zeroout/config.pbtxt
+done
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+python $ZERO_OUT_TEST -v -m graphdef_zeroout >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+python $ZERO_OUT_TEST -v -m savedmodel_zeroout >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+python $CUDA_OP_TEST -v -m graphdef_cudaop >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+python $CUDA_OP_TEST -v -m savedmodel_cudaop >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Must set LD_LIBRARY_PATH just for the server launch so that the
+# custom operations can find libtorch.so and other pytorch dependencies.
+LD_LIBRARY_PATH=/opt/tritonserver/backends/pytorch:$LD_LIBRARY_PATH
+
+# Pytorch
+SERVER_ARGS="--model-repository=/data/inferenceserver/${REPO_VERSION}/qa_custom_ops/libtorch_custom_ops"
+# FIXME: Pre-loading the python library system to satisfy the symbol definitions
+# as the custom op library is built with different python version within
+# pytorch container. See DLIS-4152.
+SERVER_LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libpython3.10.so.1:/data/inferenceserver/${REPO_VERSION}/qa_custom_ops/libtorch_custom_ops/libtorch_modulo/custom_modulo.so"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+python $MOD_OP_TEST -v -m libtorch_modulo >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+python $VISION_OP_TEST -v -m libtorch_visionop >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+set -e
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+fi
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# ONNX
+rm -rf onnx_custom_ops && \
+    mkdir -p onnx_custom_ops/custom_op/1 && \
+    cp custom_op_test.onnx onnx_custom_ops/custom_op/1/model.onnx
+
+touch onnx_custom_ops/custom_op/config.pbtxt
+echo "name: \"custom_op\"" >> onnx_custom_ops/custom_op/config.pbtxt && \
+echo "platform: \"onnxruntime_onnx\"" >> onnx_custom_ops/custom_op/config.pbtxt && \
+echo "max_batch_size: 0" >> onnx_custom_ops/custom_op/config.pbtxt && \
+echo "model_operations { op_library_filename: \"./libcustom_op_library.so\" }" >> onnx_custom_ops/custom_op/config.pbtxt
+
+SERVER_ARGS="--model-repository=onnx_custom_ops --strict-model-config=false"
+SERVER_LD_PRELOAD=""
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+python $ONNX_OP_TEST -v -m custom_op >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+set -e
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+fi
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+exit $RET
diff --git a/qa/L0_custom_ops/vision_op_test.py b/qa/L0_custom_ops/vision_op_test.py
new file mode 100755
index 0000000000..88857c3d12
--- /dev/null
+++ b/qa/L0_custom_ops/vision_op_test.py
@@ -0,0 +1,111 @@
+#!/usr/bin/python
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import sys
+
+import numpy as np
+import tritonclient.grpc as grpcclient
+import tritonclient.http as httpclient
+from tritonclient.utils import np_to_triton_dtype
+
+FLAGS = None
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        required=False,
+        default=False,
+        help="Enable verbose output",
+    )
+    parser.add_argument(
+        "-u",
+        "--url",
+        type=str,
+        required=False,
+        default="localhost:8000",
+        help="Inference server URL. Default is localhost:8000.",
+    )
+    parser.add_argument(
+        "-i",
+        "--protocol",
+        type=str,
+        required=False,
+        default="http",
+        help='Protocol ("http"/"grpc") used to '
+        + 'communicate with inference service. Default is "http".',
+    )
+    parser.add_argument("-m", "--model", type=str, required=True, help="Name of model.")
+
+    FLAGS = parser.parse_args()
+    if (FLAGS.protocol != "http") and (FLAGS.protocol != "grpc"):
+        print(
+            'unexpected protocol "{}", expects "http" or "grpc"'.format(FLAGS.protocol)
+        )
+        exit(1)
+
+    client_util = httpclient if FLAGS.protocol == "http" else grpcclient
+
+    # Run the libtorch_visionop model, which depends on a torchvision custom operation
+    model_name = FLAGS.model
+
+    # Create the inference context for the model.
+    client = client_util.InferenceServerClient(FLAGS.url, verbose=FLAGS.verbose)
+
+    # Create the data for the input tensors.
+    input_data = np.random.rand(1, 3, 10, 10).astype(np.float32)
+    box_data = np.array([[1, 1, 2, 3, 4]]).astype(np.float32)
+
+    inputs = []
+    inputs.append(
+        client_util.InferInput(
+            "INPUT__0", input_data.shape, np_to_triton_dtype(input_data.dtype)
+        )
+    )
+    inputs[0].set_data_from_numpy(input_data)
+    inputs.append(
+        client_util.InferInput(
+            "INPUT__1", box_data.shape, np_to_triton_dtype(box_data.dtype)
+        )
+    )
+    inputs[1].set_data_from_numpy(box_data)
+
+    results = client.infer(model_name, inputs)
+
+    # We expect 1 result of shape [1, 3, 5, 5].
+    output_data = results.as_numpy("OUTPUT__0")
+    if output_data is None:
+        print("error: expected 'OUTPUT__0'")
+        sys.exit(1)
+
+    if output_data.shape != (1, 3, 5, 5):
+        print("error: incorrect shape " + str(output_data.shape) + "for 'OUTPUT__0'")
+        sys.exit(1)
diff --git a/qa/L0_custom_ops/zero_out_test.py b/qa/L0_custom_ops/zero_out_test.py
new file mode 100755
index 0000000000..28d5d2c9e6
--- /dev/null
+++ b/qa/L0_custom_ops/zero_out_test.py
@@ -0,0 +1,111 @@
+#!/usr/bin/python
+
+# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import sys
+from builtins import range
+
+import numpy as np
+import tritongrpcclient as grpcclient
+import tritonhttpclient as httpclient
+from tritonclientutils import np_to_triton_dtype
+
+FLAGS = None
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        required=False,
+        default=False,
+        help="Enable verbose output",
+    )
+    parser.add_argument(
+        "-u",
+        "--url",
+        type=str,
+        required=False,
+        default="localhost:8000",
+        help="Inference server URL. Default is localhost:8000.",
+    )
+    parser.add_argument(
+        "-i",
+        "--protocol",
+        type=str,
+        required=False,
+        default="http",
+        help='Protocol ("http"/"grpc") used to '
+        + 'communicate with inference service. Default is "http".',
+    )
+    parser.add_argument("-m", "--model", type=str, required=True, help="Name of model.")
+
+    FLAGS = parser.parse_args()
+    if (FLAGS.protocol != "http") and (FLAGS.protocol != "grpc"):
+        print(
+            'unexpected protocol "{}", expects "http" or "grpc"'.format(FLAGS.protocol)
+        )
+        exit(1)
+
+    client_util = httpclient if FLAGS.protocol == "http" else grpcclient
+
+    # Run the zero-out model, which depends on a custom operation
+    model_name = FLAGS.model
+    elements = 8
+
+    # Create the inference context for the model.
+    client = client_util.InferenceServerClient(FLAGS.url, verbose=FLAGS.verbose)
+
+    # Create the data for one input tensor.
+    input_data = np.arange(start=42, stop=42 + elements, dtype=np.int32)
+
+    inputs = [
+        client_util.InferInput(
+            "to_zero", input_data.shape, np_to_triton_dtype(input_data.dtype)
+        )
+    ]
+    inputs[0].set_data_from_numpy(input_data)
+    results = client.infer(model_name, inputs)
+
+    # We expect 1 result with all inputs except first to be zeroed.
+    output_data = results.as_numpy("zeroed")
+    if output_data is None:
+        print("error: expected 'zeroed'")
+        sys.exit(1)
+
+    for i in range(elements):
+        print(
+            str(i) + ": input " + str(input_data[i]) + ", output " + str(output_data[i])
+        )
+        if (i == 0) and (input_data[i] != output_data[i]):
+            print("error: incorrect value")
+            sys.exit(1)
+        if (i != 0) and (output_data[i] != 0):
+            print("error: expected 0")
+            sys.exit(1)
diff --git a/qa/L0_data_compression/test.sh b/qa/L0_data_compression/test.sh
new file mode 100755
index 0000000000..9ec0487a4b
--- /dev/null
+++ b/qa/L0_data_compression/test.sh
@@ -0,0 +1,147 @@
+#!/bin/bash
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+source ../common/util.sh
+
+RET=0
+
+TEST_LOG="./data_compressor_test.log"
+DATA_COMPRESSOR_TEST=./data_compressor_test
+
+
+export CUDA_VISIBLE_DEVICES=0
+
+rm -fr *.log *_data
+
+set +e
+
+echo "All work and no play makes Jack a dull boy" >> raw_data
+python3 validation.py generate_compressed_data
+
+LD_LIBRARY_PATH=/opt/tritonserver/lib:${LD_LIBRARY_PATH} $DATA_COMPRESSOR_TEST >>$TEST_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Data Compression Test Failed\n***"
+    RET=1
+fi
+
+python3 validation.py validate_compressed_data
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Data Compression Failed\n***"
+    RET=1
+fi
+
+set -e
+
+# End-to-end testing with simple model
+function run_data_compression_infer_client() {
+    local client_path=$1
+    local request_algorithm=$2
+    local response_algorithm=$3
+    local log_path=$4
+
+    local python_or_cpp=`echo -n "$client_path" | tail -c 3`
+    if [ "$python_or_cpp" == ".py" ]; then
+        local infer_client="python $client_path"
+        local request_cmd_option="--request-compression-algorithm $request_algorithm"
+        local response_cmd_option="--response-compression-algorithm $response_algorithm"
+    else  # C++ if not end with ".py"
+        local infer_client=$client_path
+        local request_cmd_option="-i $request_algorithm"
+        local response_cmd_option="-o $response_algorithm"
+    fi
+
+    local cmd_options="-v"
+    if [ "$request_algorithm" != "" ]; then
+        cmd_options+=" $request_cmd_option"
+    fi
+    if [ "$response_algorithm" != "" ]; then
+        cmd_options+=" $response_cmd_option"
+    fi
+
+    $infer_client $cmd_options >> $log_path 2>&1
+    return $?
+}
+
+SIMPLE_INFER_CLIENT_PY=../clients/simple_http_infer_client.py
+SIMPLE_AIO_INFER_CLIENT_PY=../clients/simple_http_aio_infer_client.py
+SIMPLE_INFER_CLIENT=../clients/simple_http_infer_client
+
+CLIENT_LOG=`pwd`/client.log
+DATADIR=`pwd`/models
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=$DATADIR"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+for INFER_CLIENT in "$SIMPLE_INFER_CLIENT_PY" "$SIMPLE_AIO_INFER_CLIENT_PY" "$SIMPLE_INFER_CLIENT"; do
+    for REQUEST_ALGORITHM in "deflate" "gzip" ""; do
+        for RESPONSE_ALGORITHM in "deflate" "gzip" ""; do
+            if [ "$REQUEST_ALGORITHM" == "$RESPONSE_ALGORITHM" ]; then
+                continue
+            fi
+
+            set +e
+            run_data_compression_infer_client "$INFER_CLIENT" "$REQUEST_ALGORITHM" "$RESPONSE_ALGORITHM" "$CLIENT_LOG"
+            if [ $? -ne 0 ]; then
+                RET=1
+            fi
+            set -e
+        done
+    done
+done
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $TEST_LOG
+    cat $SERVER_LOG
+    cat ${CLIENT_LOG}
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_data_compression/validation.py b/qa/L0_data_compression/validation.py
new file mode 100755
index 0000000000..a0e5cb1576
--- /dev/null
+++ b/qa/L0_data_compression/validation.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python3
+
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+
+def generate_compressed_data():
+    with open("raw_data", "rb") as f:
+        import gzip
+        import zlib
+
+        raw_data = f.read()
+        with open("deflate_compressed_data", "wb") as of:
+            of.write(zlib.compress(raw_data))
+        with open("gzip_compressed_data", "wb") as of:
+            of.write(gzip.compress(raw_data))
+
+
+def validate_compressed_data():
+    with open("raw_data", "rb") as f:
+        import gzip
+        import zlib
+
+        raw_data = f.read()
+        with open("generated_deflate_compressed_data", "rb") as cf:
+            decompressed_data = zlib.decompress(cf.read())
+            if decompressed_data != raw_data:
+                exit(1)
+        with open("generated_gzip_compressed_data", "rb") as cf:
+            decompressed_data = gzip.decompress(cf.read())
+            if decompressed_data != raw_data:
+                exit(1)
+
+
+if __name__ == "__main__":
+    globals()[sys.argv[1]]()
diff --git a/qa/L0_decoupled/decoupled_test.py b/qa/L0_decoupled/decoupled_test.py
new file mode 100755
index 0000000000..1f76f4845b
--- /dev/null
+++ b/qa/L0_decoupled/decoupled_test.py
@@ -0,0 +1,647 @@
+#!/usr/bin/env python3
+
+# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import os
+import queue
+import time
+import unittest
+from functools import partial
+
+import numpy as np
+import test_util as tu
+import tritonclient.grpc as grpcclient
+import tritonclient.http as httpclient
+from tritonclient.utils import InferenceServerException
+
+
+class UserData:
+    def __init__(self):
+        self._response_queue = queue.Queue()
+
+
+def callback(user_data, result, error):
+    if error:
+        user_data._response_queue.put(error)
+    else:
+        user_data._response_queue.put(result)
+
+
+class DecoupledTest(tu.TestResultCollector):
+    def setUp(self):
+        self.trials_ = [
+            ("repeat_int32", None),
+            ("simple_repeat", None),
+            ("sequence_repeat", None),
+            ("fan_repeat", self._fan_validate),
+            ("repeat_square", self._nested_validate),
+            ("nested_square", self._nested_validate),
+        ]
+        self.model_name_ = "repeat_int32"
+
+        self.inputs_ = []
+        self.inputs_.append(grpcclient.InferInput("IN", [1], "INT32"))
+        self.inputs_.append(grpcclient.InferInput("DELAY", [1], "UINT32"))
+        self.inputs_.append(grpcclient.InferInput("WAIT", [1], "UINT32"))
+
+        self.outputs_ = []
+        self.outputs_.append(grpcclient.InferRequestedOutput("OUT"))
+        self.outputs_.append(grpcclient.InferRequestedOutput("IDX"))
+        # Some trials only expect a subset of outputs
+        self.requested_outputs_ = self.outputs_
+
+    # Client can receive a "triton_final_response" response parameter
+    # from Triton server that indicates when a response is the final response for
+    # its request.
+    #
+    # For non-decoupled models, there is a 1:1 request:response ratio, so every
+    # response is the final response, and this parameter is unnecessary.
+    #
+    # For decoupled models, there is a 1:N request:response ratio, so there may be
+    # more one response before receiving the "final" response.
+    #
+    # However, decoupled models have the unique property in that they can return
+    # a flags-only response to the server to indicate completion, which is not
+    # returned to the client by default (See TRITONBACKEND_ResponseFactorySendFlags).
+    #
+    # To forward this flags-only response to the client, users must opt-in to this
+    # behavior by adding the following argument:
+    # client.async_stream_infer(..., enable_empty_final_response=True).
+    #
+    # If the decoupled backend/model always sends the final response flag along
+    # with a non-null response, no opt-in is needed.
+    #
+    # With this behavior, the client can programmatically detect when all responses
+    # for an individual request have been received without knowing the expected
+    # number of responses in advance and without closing the stream.
+    def _stream_infer_with_params(
+        self,
+        request_count,
+        request_delay,
+        _,
+        delay_data,
+        delay_factor,
+        user_data,
+        result_dict,
+    ):
+        with grpcclient.InferenceServerClient(
+            url="localhost:8001", verbose=True
+        ) as triton_client:
+            # Establish stream
+            triton_client.start_stream(callback=partial(callback, user_data))
+            # Send specified many requests in parallel
+            for i in range(request_count):
+                time.sleep((request_delay / 1000))
+                self.inputs_[1].set_data_from_numpy(delay_data)
+                triton_client.async_stream_infer(
+                    model_name=self.model_name_,
+                    inputs=self.inputs_,
+                    request_id=str(i),
+                    outputs=self.requested_outputs_,
+                    # Opt-in to receiving flags-only responses from model/backend
+                    # to help detect final responses for decoupled models.
+                    enable_empty_final_response=True,
+                )
+                # Update delay input in accordance with the scaling factor
+                delay_data = delay_data * delay_factor
+                delay_data = delay_data.astype(np.uint32)
+
+            # Retrieve results...
+            recv_count = 0
+            completed_requests = 0
+            while completed_requests < request_count:
+                data_item = user_data._response_queue.get()
+                if type(data_item) == InferenceServerException:
+                    raise data_item
+                else:
+                    response = data_item.get_response()
+                    # Request IDs should generally be provided with each request
+                    # to associate decoupled responses with their requests.
+                    if not response.id:
+                        raise ValueError(
+                            "No response id found. Was a request_id provided?"
+                        )
+
+                    # Detect final response. Parameters are oneof and we expect bool_param
+                    if response.parameters.get("triton_final_response").bool_param:
+                        completed_requests += 1
+
+                    # Only process non-empty response, ignore if empty (no outputs)
+                    if response.outputs:
+                        if response.id not in result_dict:
+                            result_dict[response.id] = []
+                        result_dict[response.id].append((recv_count, data_item))
+                        recv_count += 1
+
+    def _stream_infer(
+        self,
+        request_count,
+        request_delay,
+        expected_count,
+        delay_data,
+        delay_factor,
+        user_data,
+        result_dict,
+    ):
+        with grpcclient.InferenceServerClient(
+            url="localhost:8001", verbose=True
+        ) as triton_client:
+            # Establish stream
+            triton_client.start_stream(callback=partial(callback, user_data))
+            # Send specified many requests in parallel
+            for i in range(request_count):
+                time.sleep((request_delay / 1000))
+                self.inputs_[1].set_data_from_numpy(delay_data)
+                triton_client.async_stream_infer(
+                    model_name=self.model_name_,
+                    inputs=self.inputs_,
+                    request_id=str(i),
+                    outputs=self.requested_outputs_,
+                )
+                # Update delay input in accordance with the scaling factor
+                delay_data = delay_data * delay_factor
+                delay_data = delay_data.astype(np.uint32)
+
+            # Retrieve results...
+            recv_count = 0
+            while recv_count < expected_count:
+                data_item = user_data._response_queue.get()
+                if type(data_item) == InferenceServerException:
+                    raise data_item
+                else:
+                    this_id = data_item.get_response().id
+                    if this_id not in result_dict:
+                        result_dict[this_id] = []
+                    result_dict[this_id].append((recv_count, data_item))
+
+                recv_count += 1
+
+    def _fan_validate(self, result_list, data_offset, repeat_count):
+        # fan_repeat returns "2 * data_offset" as result
+        self.assertEqual(len(result_list), repeat_count)
+        expected_data = 2 * data_offset
+        for j in range(len(result_list)):
+            this_data = result_list[j][1].as_numpy("OUT")
+            self.assertEqual(len(this_data), 1)
+            self.assertEqual(this_data[0], expected_data)
+            expected_data += 2
+
+    def _nested_validate(self, result_list, data_offset, repeat_count):
+        # if repeat model returns repeat result n, repeat_square-like model
+        # will return the same result n times
+        expected_len = sum(x for x in range(data_offset, data_offset + repeat_count))
+        self.assertEqual(len(result_list), expected_len)
+        expected_data = data_offset
+        expected_count = expected_data
+        for j in range(len(result_list)):
+            this_data = result_list[j][1].as_numpy("OUT")
+            self.assertEqual(len(this_data), 1)
+            self.assertEqual(this_data[0], expected_data)
+            expected_count -= 1
+            if expected_count == 0:
+                expected_data += 1
+                expected_count = expected_data
+
+    def _decoupled_infer(
+        self,
+        request_count,
+        request_delay=0,
+        repeat_count=1,
+        data_offset=100,
+        delay_time=1000,
+        delay_factor=1,
+        wait_time=500,
+        order_sequence=None,
+        validate_fn=None,
+    ):
+        # Initialize data for IN
+        input_data = np.arange(
+            start=data_offset, stop=data_offset + repeat_count, dtype=np.int32
+        )
+        self.inputs_[0].set_shape([repeat_count])
+        self.inputs_[0].set_data_from_numpy(input_data)
+
+        # Initialize data for DELAY
+        delay_data = (np.ones([repeat_count], dtype=np.uint32)) * delay_time
+        self.inputs_[1].set_shape([repeat_count])
+
+        # Initialize data for WAIT
+        wait_data = np.array([wait_time], dtype=np.uint32)
+        self.inputs_[2].set_data_from_numpy(wait_data)
+
+        # use validate_fn to differentiate requested outputs
+        self.requested_outputs_ = (
+            self.outputs_ if validate_fn is None else self.outputs_[0:1]
+        )
+
+        for infer_helper in [self._stream_infer, self._stream_infer_with_params]:
+            user_data = UserData()
+            result_dict = {}
+
+            try:
+                if "square" not in self.model_name_:
+                    expected_count = repeat_count * request_count
+                else:
+                    expected_count = (
+                        sum(x for x in range(data_offset, data_offset + repeat_count))
+                        * request_count
+                    )
+                infer_helper(
+                    request_count,
+                    request_delay,
+                    expected_count,
+                    delay_data,
+                    delay_factor,
+                    user_data,
+                    result_dict,
+                )
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+            # Validate the results..
+            for i in range(request_count):
+                this_id = str(i)
+                if repeat_count != 0 and this_id not in result_dict.keys():
+                    self.assertTrue(
+                        False, "response for request id {} not received".format(this_id)
+                    )
+                elif repeat_count == 0 and this_id in result_dict.keys():
+                    self.assertTrue(
+                        False,
+                        "received unexpected response for request id {}".format(
+                            this_id
+                        ),
+                    )
+                if repeat_count != 0:
+                    if validate_fn is None:
+                        self.assertEqual(len(result_dict[this_id]), repeat_count)
+                        expected_data = data_offset
+                        result_list = result_dict[this_id]
+                        for j in range(len(result_list)):
+                            if order_sequence is not None:
+                                self.assertEqual(
+                                    result_list[j][0], order_sequence[i][j]
+                                )
+                            this_data = result_list[j][1].as_numpy("OUT")
+                            self.assertEqual(len(this_data), 1)
+                            self.assertEqual(this_data[0], expected_data)
+                            this_idx = result_list[j][1].as_numpy("IDX")
+                            self.assertEqual(len(this_idx), 1)
+                            self.assertEqual(this_idx[0], j)
+                            expected_data += 1
+                    else:
+                        validate_fn(result_dict[this_id], data_offset, repeat_count)
+
+    def test_one_to_none(self):
+        # Test cases where each request generates no response.
+        # Note the name of the test one_to_none implies the
+        # mapping between requests and responses.
+
+        for trial in self.trials_:
+            self.model_name_ = trial[0]
+            # Single request case
+            self._decoupled_infer(request_count=1, repeat_count=0, validate_fn=trial[1])
+            # Multiple request case
+            self._decoupled_infer(request_count=5, repeat_count=0, validate_fn=trial[1])
+
+    def test_one_to_one(self):
+        # Test cases where each request generates single response.
+        # Note the name of the test one_to_one implies the
+        # mapping between requests and responses.
+
+        for trial in self.trials_:
+            self.model_name_ = trial[0]
+            # Single request case
+            # Release request before the response is delivered
+            self._decoupled_infer(request_count=1, wait_time=500, validate_fn=trial[1])
+            # Release request after the response is delivered
+            self._decoupled_infer(request_count=1, wait_time=2000, validate_fn=trial[1])
+
+            # Multiple request case
+            # Release request before the response is delivered
+            self._decoupled_infer(request_count=5, wait_time=500, validate_fn=trial[1])
+            # Release request after the response is delivered
+            self._decoupled_infer(request_count=5, wait_time=2000, validate_fn=trial[1])
+
+    def test_one_to_many(self):
+        # Test cases where each request generates multiple response.
+        # Note the name of the test one_to_many implies the
+        # mapping between requests and responses.
+
+        self.assertFalse("TRITONSERVER_DELAY_GRPC_RESPONSE" in os.environ)
+
+        for trial in self.trials_:
+            self.model_name_ = trial[0]
+            # Single request case
+            # Release request before the first response is delivered
+            self._decoupled_infer(
+                request_count=1, repeat_count=5, wait_time=500, validate_fn=trial[1]
+            )
+            # Release request when the responses are getting delivered
+            self._decoupled_infer(
+                request_count=1, repeat_count=5, wait_time=2000, validate_fn=trial[1]
+            )
+            # Release request after all the responses are delivered
+            self._decoupled_infer(
+                request_count=1, repeat_count=5, wait_time=10000, validate_fn=trial[1]
+            )
+
+            # Multiple request case
+            # Release request before the first response is delivered
+            self._decoupled_infer(
+                request_count=5, repeat_count=5, wait_time=500, validate_fn=trial[1]
+            )
+            # Release request when the responses are getting delivered
+            self._decoupled_infer(
+                request_count=5, repeat_count=5, wait_time=2000, validate_fn=trial[1]
+            )
+            # Release request after all the responses are delivered
+            self._decoupled_infer(
+                request_count=5, repeat_count=5, wait_time=10000, validate_fn=trial[1]
+            )
+
+    def test_one_to_multi_many(self):
+        # Test cases where each request generates multiple response but the
+        # responses are delayed so as to stress the control path handling the
+        # queued responses.
+
+        self.assertTrue("TRITONSERVER_DELAY_GRPC_RESPONSE" in os.environ)
+
+        for trial in self.trials_:
+            self.model_name_ = trial[0]
+            # Single request case
+            # Release request before the first response is delivered
+            self._decoupled_infer(
+                request_count=1, repeat_count=5, wait_time=500, validate_fn=trial[1]
+            )
+            # Release request when the responses are getting delivered
+            self._decoupled_infer(
+                request_count=1, repeat_count=5, wait_time=8000, validate_fn=trial[1]
+            )
+            # Release request after all the responses are delivered
+            self._decoupled_infer(
+                request_count=1, repeat_count=5, wait_time=20000, validate_fn=trial[1]
+            )
+
+            # Multiple request case
+            # Release request before the first response is delivered
+            self._decoupled_infer(
+                request_count=5, repeat_count=5, wait_time=500, validate_fn=trial[1]
+            )
+            # Release request when the responses are getting delivered
+            self._decoupled_infer(
+                request_count=5, repeat_count=5, wait_time=3000, validate_fn=trial[1]
+            )
+            # Release request after all the responses are delivered
+            self._decoupled_infer(
+                request_count=5, repeat_count=5, wait_time=10000, validate_fn=trial[1]
+            )
+
+    def test_response_order(self):
+        # Test the expected response order for different cases
+
+        self.assertFalse("TRITONSERVER_DELAY_GRPC_RESPONSE" in os.environ)
+
+        for trial in self.trials_:
+            self.model_name_ = trial[0]
+
+            # Case 1: Interleaved responses
+            self._decoupled_infer(
+                request_count=2,
+                request_delay=500,
+                repeat_count=4,
+                order_sequence=[[0, 2, 4, 6], [1, 3, 5, 7]],
+                validate_fn=trial[1],
+            )
+
+            # Case 2: All responses of second request delivered before any
+            # response from the first
+            self._decoupled_infer(
+                request_count=2,
+                request_delay=500,
+                repeat_count=4,
+                delay_time=2000,
+                delay_factor=0.1,
+                order_sequence=[[4, 5, 6, 7], [0, 1, 2, 3]],
+                validate_fn=trial[1],
+            )
+
+            # Case 3: Similar to Case 2, but the second request is generated
+            # after the first response from first request is received
+            self._decoupled_infer(
+                request_count=2,
+                request_delay=2500,
+                repeat_count=4,
+                delay_time=2000,
+                delay_factor=0.1,
+                order_sequence=[[0, 5, 6, 7], [1, 2, 3, 4]],
+                validate_fn=trial[1],
+            )
+
+            # Case 4: All the responses of second requests are dleivered after
+            # all the responses from first requests are received
+            self._decoupled_infer(
+                request_count=2,
+                request_delay=100,
+                repeat_count=4,
+                delay_time=500,
+                delay_factor=10,
+                order_sequence=[[0, 1, 2, 3], [4, 5, 6, 7]],
+                validate_fn=trial[1],
+            )
+
+            # Case 5: Similar to Case 4, but the second request is generated
+            # after the first response from the first request is received
+            self._decoupled_infer(
+                request_count=2,
+                request_delay=750,
+                repeat_count=4,
+                delay_time=500,
+                delay_factor=10,
+                order_sequence=[[0, 1, 2, 3], [4, 5, 6, 7]],
+                validate_fn=trial[1],
+            )
+
+    def _no_streaming_helper(self, protocol):
+        data_offset = 100
+        repeat_count = 1
+        delay_time = 1000
+        wait_time = 2000
+
+        input_data = np.arange(
+            start=data_offset, stop=data_offset + repeat_count, dtype=np.int32
+        )
+        delay_data = (np.ones([repeat_count], dtype=np.uint32)) * delay_time
+        wait_data = np.array([wait_time], dtype=np.uint32)
+
+        if protocol == "grpc":
+            # Use the inputs and outputs from the setUp
+            this_inputs = self.inputs_
+            this_outputs = self.outputs_
+        else:
+            this_inputs = []
+            this_inputs.append(httpclient.InferInput("IN", [repeat_count], "INT32"))
+            this_inputs.append(httpclient.InferInput("DELAY", [1], "UINT32"))
+            this_inputs.append(httpclient.InferInput("WAIT", [1], "UINT32"))
+            this_outputs = []
+            this_outputs.append(httpclient.InferRequestedOutput("OUT"))
+
+        # Initialize data for IN
+        this_inputs[0].set_shape([repeat_count])
+        this_inputs[0].set_data_from_numpy(input_data)
+
+        # Initialize data for DELAY
+        this_inputs[1].set_shape([repeat_count])
+        this_inputs[1].set_data_from_numpy(delay_data)
+
+        # Initialize data for WAIT
+        this_inputs[2].set_data_from_numpy(wait_data)
+
+        if protocol == "grpc":
+            triton_client = grpcclient.InferenceServerClient(
+                url="localhost:8001", verbose=True
+            )
+        else:
+            triton_client = httpclient.InferenceServerClient(
+                url="localhost:8000", verbose=True
+            )
+
+        with self.assertRaises(InferenceServerException) as cm:
+            triton_client.infer(
+                model_name=self.model_name_, inputs=this_inputs, outputs=this_outputs
+            )
+
+        self.assertIn(
+            "doesn't support models with decoupled transaction policy",
+            str(cm.exception),
+        )
+
+    def test_no_streaming(self):
+        # Test cases with no streaming inference. Server should give
+        # appropriate error in such cases.
+
+        self._no_streaming_helper("grpc")
+        self._no_streaming_helper("http")
+
+    def test_wrong_shape(self):
+        # Sends mismatching shapes for IN and DELAY. Server should return
+        # appropriate error message. The shape of IN is [repeat_count],
+        # where as shape of DELAY is [repeat_count + 1].
+
+        data_offset = 100
+        repeat_count = 1
+        delay_time = 1000
+        wait_time = 2000
+
+        input_data = np.arange(
+            start=data_offset, stop=data_offset + repeat_count, dtype=np.int32
+        )
+        delay_data = (np.ones([repeat_count + 1], dtype=np.uint32)) * delay_time
+        wait_data = np.array([wait_time], dtype=np.uint32)
+
+        # Initialize data for IN
+        self.inputs_[0].set_shape([repeat_count])
+        self.inputs_[0].set_data_from_numpy(input_data)
+
+        # Initialize data for DELAY
+        self.inputs_[1].set_shape([repeat_count + 1])
+        self.inputs_[1].set_data_from_numpy(delay_data)
+
+        # Initialize data for WAIT
+        self.inputs_[2].set_data_from_numpy(wait_data)
+
+        user_data = UserData()
+        result_dict = {}
+
+        with self.assertRaises(InferenceServerException) as cm:
+            self._stream_infer(
+                1, 0, repeat_count, delay_data, 1, user_data, result_dict
+            )
+
+        self.assertIn(
+            "expected IN and DELAY shape to match, got [1] and [2]", str(cm.exception)
+        )
+
+
+class NonDecoupledTest(tu.TestResultCollector):
+    def setUp(self):
+        self.model_name_ = "repeat_int32"
+        self.input_data = {
+            "IN": np.array([1], dtype=np.int32),
+            "DELAY": np.array([0], dtype=np.uint32),
+            "WAIT": np.array([0], dtype=np.uint32),
+        }
+
+    def test_grpc(self):
+        inputs = [
+            grpcclient.InferInput("IN", [1], "INT32").set_data_from_numpy(
+                self.input_data["IN"]
+            ),
+            grpcclient.InferInput("DELAY", [1], "UINT32").set_data_from_numpy(
+                self.input_data["DELAY"]
+            ),
+            grpcclient.InferInput("WAIT", [1], "UINT32").set_data_from_numpy(
+                self.input_data["WAIT"]
+            ),
+        ]
+
+        triton_client = grpcclient.InferenceServerClient(
+            url="localhost:8001", verbose=True
+        )
+        # Expect the inference is successful
+        res = triton_client.infer(model_name=self.model_name_, inputs=inputs)
+        self.assertEqual(1, res.as_numpy("OUT")[0])
+        self.assertEqual(0, res.as_numpy("IDX")[0])
+
+    def test_http(self):
+        inputs = [
+            httpclient.InferInput("IN", [1], "INT32").set_data_from_numpy(
+                self.input_data["IN"]
+            ),
+            httpclient.InferInput("DELAY", [1], "UINT32").set_data_from_numpy(
+                self.input_data["DELAY"]
+            ),
+            httpclient.InferInput("WAIT", [1], "UINT32").set_data_from_numpy(
+                self.input_data["WAIT"]
+            ),
+        ]
+
+        triton_client = httpclient.InferenceServerClient(
+            url="localhost:8000", verbose=True
+        )
+        # Expect the inference is successful
+        res = triton_client.infer(model_name=self.model_name_, inputs=inputs)
+        self.assertEqual(1, res.as_numpy("OUT")[0])
+        self.assertEqual(0, res.as_numpy("IDX")[0])
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_decoupled/models/fan_repeat/config.pbtxt b/qa/L0_decoupled/models/fan_repeat/config.pbtxt
new file mode 100644
index 0000000000..f56b8c59f3
--- /dev/null
+++ b/qa/L0_decoupled/models/fan_repeat/config.pbtxt
@@ -0,0 +1,106 @@
+# Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "fan_repeat"
+max_batch_size: 0
+platform: "ensemble"
+ensemble_scheduling {
+  step [
+    {
+      model_name: "repeat_int32"
+      model_version: -1
+      input_map {
+        key: "IN"
+        value: "IN"
+      }
+      input_map {
+        key: "DELAY"
+        value: "DELAY"
+      }
+      input_map {
+        key: "WAIT"
+        value: "WAIT"
+      }
+      output_map {
+        key: "OUT"
+        value: "repeat_out"
+      }
+    },
+    {
+      model_name: "identity_int32"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "repeat_out"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "identity_out"
+      }
+    },
+    {
+      model_name: "libtorch_nobatch_int32_int32_int32"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "repeat_out"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "identity_out"
+      }
+      output_map {
+        key: "OUTPUT__1"
+        value: "OUT"
+      }
+
+    }
+  ]
+}
+input [
+  {
+    name: "IN"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  },
+  {
+    name: "DELAY"
+    data_type: TYPE_UINT32
+    dims: [ -1 ]
+  },
+  {
+    name: "WAIT"
+    data_type: TYPE_UINT32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_decoupled/models/identity_int32/config.pbtxt b/qa/L0_decoupled/models/identity_int32/config.pbtxt
new file mode 100644
index 0000000000..cee7f35f09
--- /dev/null
+++ b/qa/L0_decoupled/models/identity_int32/config.pbtxt
@@ -0,0 +1,43 @@
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "identity_int32"
+backend: "identity"
+max_batch_size: 0
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
diff --git a/qa/L0_decoupled/models/nested_square/config.pbtxt b/qa/L0_decoupled/models/nested_square/config.pbtxt
new file mode 100644
index 0000000000..755c39854e
--- /dev/null
+++ b/qa/L0_decoupled/models/nested_square/config.pbtxt
@@ -0,0 +1,89 @@
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "nested_square"
+max_batch_size: 0
+platform: "ensemble"
+ensemble_scheduling {
+  step [
+    {
+      model_name: "simple_repeat"
+      model_version: -1
+      input_map {
+        key: "IN"
+        value: "IN"
+      }
+      input_map {
+        key: "DELAY"
+        value: "DELAY"
+      }
+      input_map {
+        key: "WAIT"
+        value: "WAIT"
+      }
+      output_map {
+        key: "OUT"
+        value: "repeat_out"
+      }
+    },
+    {
+      model_name: "square_int32"
+      model_version: -1
+      input_map {
+        key: "IN"
+        value: "repeat_out"
+      }
+      output_map {
+        key: "OUT"
+        value: "OUT"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "IN"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  },
+  {
+    name: "DELAY"
+    data_type: TYPE_UINT32
+    dims: [ -1 ]
+  },
+  {
+    name: "WAIT"
+    data_type: TYPE_UINT32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_decoupled/models/repeat_square/config.pbtxt b/qa/L0_decoupled/models/repeat_square/config.pbtxt
new file mode 100644
index 0000000000..112b0025e5
--- /dev/null
+++ b/qa/L0_decoupled/models/repeat_square/config.pbtxt
@@ -0,0 +1,89 @@
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "repeat_square"
+max_batch_size: 0
+platform: "ensemble"
+ensemble_scheduling {
+  step [
+    {
+      model_name: "repeat_int32"
+      model_version: -1
+      input_map {
+        key: "IN"
+        value: "IN"
+      }
+      input_map {
+        key: "DELAY"
+        value: "DELAY"
+      }
+      input_map {
+        key: "WAIT"
+        value: "WAIT"
+      }
+      output_map {
+        key: "OUT"
+        value: "repeat_out"
+      }
+    },
+    {
+      model_name: "square_int32"
+      model_version: -1
+      input_map {
+        key: "IN"
+        value: "repeat_out"
+      }
+      output_map {
+        key: "OUT"
+        value: "OUT"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "IN"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  },
+  {
+    name: "DELAY"
+    data_type: TYPE_UINT32
+    dims: [ -1 ]
+  },
+  {
+    name: "WAIT"
+    data_type: TYPE_UINT32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_decoupled/models/sequence_repeat/config.pbtxt b/qa/L0_decoupled/models/sequence_repeat/config.pbtxt
new file mode 100644
index 0000000000..3b23b4eb4c
--- /dev/null
+++ b/qa/L0_decoupled/models/sequence_repeat/config.pbtxt
@@ -0,0 +1,98 @@
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "sequence_repeat"
+max_batch_size: 0
+platform: "ensemble"
+ensemble_scheduling {
+  step [
+    {
+      model_name: "repeat_int32"
+      model_version: -1
+      input_map {
+        key: "IN"
+        value: "IN"
+      }
+      input_map {
+        key: "DELAY"
+        value: "DELAY"
+      }
+      input_map {
+        key: "WAIT"
+        value: "WAIT"
+      }
+      output_map {
+        key: "OUT"
+        value: "repeat_out"
+      }
+      output_map {
+        key: "IDX"
+        value: "IDX"
+      }
+    },
+    {
+      model_name: "identity_int32"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "repeat_out"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "OUT"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "IN"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  },
+  {
+    name: "DELAY"
+    data_type: TYPE_UINT32
+    dims: [ -1 ]
+  },
+  {
+    name: "WAIT"
+    data_type: TYPE_UINT32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  },
+  {
+    name: "IDX"
+    data_type: TYPE_UINT32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_decoupled/models/simple_repeat/config.pbtxt b/qa/L0_decoupled/models/simple_repeat/config.pbtxt
new file mode 100644
index 0000000000..77f6ea98ec
--- /dev/null
+++ b/qa/L0_decoupled/models/simple_repeat/config.pbtxt
@@ -0,0 +1,86 @@
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "simple_repeat"
+max_batch_size: 0
+platform: "ensemble"
+ensemble_scheduling {
+  step [
+    {
+      model_name: "repeat_int32"
+      model_version: -1
+      input_map {
+        key: "IN"
+        value: "IN"
+      }
+      input_map {
+        key: "DELAY"
+        value: "DELAY"
+      }
+      input_map {
+        key: "WAIT"
+        value: "WAIT"
+      }
+      output_map {
+        key: "OUT"
+        value: "OUT"
+      }
+      output_map {
+        key: "IDX"
+        value: "IDX"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "IN"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  },
+  {
+    name: "DELAY"
+    data_type: TYPE_UINT32
+    dims: [ -1 ]
+  },
+  {
+    name: "WAIT"
+    data_type: TYPE_UINT32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  },
+  {
+    name: "IDX"
+    data_type: TYPE_UINT32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_decoupled/test.sh b/qa/L0_decoupled/test.sh
new file mode 100755
index 0000000000..98ad134d8b
--- /dev/null
+++ b/qa/L0_decoupled/test.sh
@@ -0,0 +1,179 @@
+#!/bin/bash
+# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+RET=0
+TEST_RESULT_FILE='test_results.txt'
+DECOUPLED_TEST=decoupled_test.py
+
+rm -f *.log
+
+CLIENT_LOG=`pwd`/client.log
+DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_model_repository
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=`pwd`/models"
+SERVER_LOG="./inference_server.log"
+source ../common/util.sh
+
+
+TRIALS="python custom"
+
+for trial in $TRIALS; do
+  if [ $trial == "python" ]; then
+    MODELDIR=`pwd`/python_models
+  else
+    MODELDIR=`pwd`/models
+  fi
+
+  SERVER_ARGS="--model-repository=$MODELDIR"
+  cp -r $DATADIR/libtorch_nobatch_int32_int32_int32 $MODELDIR/.
+  (cd $MODELDIR/libtorch_nobatch_int32_int32_int32 && \
+   sed -i "s/dims:.*\[.*\]/dims: \[ 1 \]/g" config.pbtxt)
+
+  run_server
+  if [ "$SERVER_PID" == "0" ]; then
+      echo -e "\n***\n*** Failed to start $SERVER\n***"
+      cat $SERVER_LOG
+      exit 1
+  fi
+
+  for i in \
+              test_one_to_none \
+              test_one_to_one \
+              test_one_to_many \
+              test_no_streaming \
+              test_response_order \
+	      test_wrong_shape; do
+
+      echo "Test: $i" >>$CLIENT_LOG
+      set +e
+      python $DECOUPLED_TEST DecoupledTest.$i >>$CLIENT_LOG 2>&1
+      if [ $? -ne 0 ]; then
+              echo -e "\n***\n*** Test $i Failed\n***" >>$CLIENT_LOG
+              echo -e "\n***\n*** Test $i Failed\n***"
+              RET=1
+      else
+          check_test_results $TEST_RESULT_FILE 1
+          if [ $? -ne 0 ]; then
+              cat $CLIENT_LOG
+              echo -e "\n***\n*** Test Result Verification Failed\n***"
+              RET=1
+          fi
+      fi
+      set -e
+  done
+
+  # Will delay the writing of each response by the specified many milliseconds.
+  # This will ensure that there are multiple responses available to be written.
+  export TRITONSERVER_DELAY_GRPC_RESPONSE=2000
+
+  echo "Test: test_one_to_multi_many" >>$CLIENT_LOG
+  set +e
+  python $DECOUPLED_TEST DecoupledTest.test_one_to_multi_many >>$CLIENT_LOG 2>&1
+  if [ $? -ne 0 ]; then
+      echo -e "\n***\n*** Test test_one_to_multi_many Failed\n***" >>$CLIENT_LOG
+          echo -e "\n***\n*** Test test_one_to_multi_many Failed\n***"
+          RET=1
+  else
+      check_test_results $TEST_RESULT_FILE 1
+      if [ $? -ne 0 ]; then
+          cat $CLIENT_LOG
+          echo -e "\n***\n*** Test Result Verification Failed\n***"
+          RET=1
+      fi
+  fi
+
+  set -e
+
+  unset TRITONSERVER_DELAY_GRPC_RESPONSE
+
+  kill $SERVER_PID
+  wait $SERVER_PID
+done
+
+# Test the server frontend can merge the responses of non-decoupled model that
+# sends inference response and COMPLETE flag separately. In other words, from
+# the client's perspective there will still be one response.
+NON_DECOUPLED_DIR=`pwd`/non_decoupled_models
+rm -rf ${NON_DECOUPLED_DIR} && mkdir -p ${NON_DECOUPLED_DIR}
+cp -r `pwd`/models/repeat_int32 ${NON_DECOUPLED_DIR}/. && \
+    (cd ${NON_DECOUPLED_DIR}/repeat_int32 && \
+        sed -i "s/decoupled: True/decoupled: False/" config.pbtxt)
+
+SERVER_ARGS="--model-repository=${NON_DECOUPLED_DIR}"
+SERVER_LOG="./non_decoupled_inference_server.log"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+CLIENT_LOG=`pwd`/non_decoupled_client.log
+echo "Test: NonDecoupledTest" >>$CLIENT_LOG
+set +e
+python $DECOUPLED_TEST NonDecoupledTest >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test NonDecoupledTest Failed\n***" >>$CLIENT_LOG
+        echo -e "\n***\n*** Test NonDecoupledTest Failed\n***"
+        RET=1
+else
+    check_test_results $TEST_RESULT_FILE 2
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+else
+  echo -e "\n***\n*** Test Failed\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_device_memory_tracker/test.py b/qa/L0_device_memory_tracker/test.py
new file mode 100755
index 0000000000..1d443d1032
--- /dev/null
+++ b/qa/L0_device_memory_tracker/test.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import time
+import unittest
+from functools import partial
+
+import nvidia_smi
+import tritonclient.grpc as grpcclient
+import tritonclient.http as httpclient
+
+
+class UnifiedClientProxy:
+    def __init__(self, client):
+        self.client_ = client
+
+    def __getattr__(self, attr):
+        forward_attr = getattr(self.client_, attr)
+        if type(self.client_) == grpcclient.InferenceServerClient:
+            if attr == "get_model_config":
+                return lambda *args, **kwargs: forward_attr(
+                    *args, **kwargs, as_json=True
+                )["config"]
+            elif attr == "get_inference_statistics":
+                return partial(forward_attr, as_json=True)
+        return forward_attr
+
+
+class MemoryUsageTest(unittest.TestCase):
+    def setUp(self):
+        nvidia_smi.nvmlInit()
+        self.gpu_handle_ = nvidia_smi.nvmlDeviceGetHandleByIndex(0)
+        self.http_client_ = httpclient.InferenceServerClient(url="localhost:8000")
+        self.grpc_client_ = grpcclient.InferenceServerClient(url="localhost:8001")
+
+    def tearDown(self):
+        nvidia_smi.nvmlShutdown()
+
+    def report_used_gpu_memory(self):
+        info = nvidia_smi.nvmlDeviceGetMemoryInfo(self.gpu_handle_)
+        return info.used
+
+    def is_testing_backend(self, model_name, backend_name):
+        return self.client_.get_model_config(model_name)["backend"] == backend_name
+
+    def verify_recorded_usage(self, model_stat):
+        recorded_gpu_usage = 0
+        for usage in model_stat["memory_usage"]:
+            if usage["type"] == "GPU":
+                recorded_gpu_usage += int(usage["byte_size"])
+        # unload and verify recorded usage
+        before_total_usage = self.report_used_gpu_memory()
+        self.client_.unload_model(model_stat["name"])
+        # unload can return before the model is fully unloaded,
+        # wait to be finished
+        time.sleep(2)
+        usage_delta = before_total_usage - self.report_used_gpu_memory()
+        # check with tolerance as gpu usage obtained is overall usage
+        self.assertTrue(
+            usage_delta * 0.9 <= recorded_gpu_usage <= usage_delta * 1.1,
+            msg="For model {}, expect recorded usage to be in range [{}, {}], got {}".format(
+                model_stat["name"],
+                usage_delta * 0.9,
+                usage_delta * 1.1,
+                recorded_gpu_usage,
+            ),
+        )
+
+    def test_onnx_http(self):
+        self.client_ = UnifiedClientProxy(self.http_client_)
+        model_stats = self.client_.get_inference_statistics()["model_stats"]
+        for model_stat in model_stats:
+            if self.is_testing_backend(model_stat["name"], "onnxruntime"):
+                self.verify_recorded_usage(model_stat)
+
+    def test_plan_grpc(self):
+        self.client_ = UnifiedClientProxy(self.grpc_client_)
+        model_stats = self.client_.get_inference_statistics()["model_stats"]
+        for model_stat in model_stats:
+            if self.is_testing_backend(model_stat["name"], "tensorrt"):
+                self.verify_recorded_usage(model_stat)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_device_memory_tracker/test.sh b/qa/L0_device_memory_tracker/test.sh
new file mode 100755
index 0000000000..7eb0d745da
--- /dev/null
+++ b/qa/L0_device_memory_tracker/test.sh
@@ -0,0 +1,128 @@
+#!/bin/bash
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+TEST_LOG="./test.log"
+TEST_PY=test.py
+
+DATADIR=/data/inferenceserver/${REPO_VERSION}
+rm -f *.log
+
+TEST_RESULT_FILE='test_results.txt'
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_LOG="./server.log"
+
+source ../common/util.sh
+
+RET=0
+
+# prepare model repository, only contains ONNX and TRT models as the
+# corresponding backend are known to be memory.
+rm -rf models && mkdir models
+# ONNX
+cp -r /data/inferenceserver/${REPO_VERSION}/onnx_model_store/* models/.
+rm -r models/*cpu
+
+# Convert to get TRT models against the system
+CAFFE2PLAN=../common/caffe2plan
+set +e
+mkdir -p models/vgg19_plan/1 && rm -f models/vgg19_plan/1/model.plan && \
+    $CAFFE2PLAN -b32 -n prob -o models/vgg19_plan/1/model.plan \
+                $DATADIR/caffe_models/vgg19.prototxt $DATADIR/caffe_models/vgg19.caffemodel
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed to generate vgg19 PLAN\n***"
+    exit 1
+fi
+
+mkdir -p models/resnet50_plan/1 && rm -f models/resnet50_plan/1/model.plan && \
+    $CAFFE2PLAN -b32 -n prob -o models/resnet50_plan/1/model.plan \
+                $DATADIR/caffe_models/resnet50.prototxt $DATADIR/caffe_models/resnet50.caffemodel
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed to generate resnet50 PLAN\n***"
+    exit 1
+fi
+
+mkdir -p models/resnet152_plan/1 && rm -f models/resnet152_plan/1/model.plan && \
+    $CAFFE2PLAN -h -b32 -n prob -o models/resnet152_plan/1/model.plan \
+                $DATADIR/caffe_models/resnet152.prototxt $DATADIR/caffe_models/resnet152.caffemodel
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed to generate resnet152 PLAN\n***"
+    exit 1
+fi
+set -e
+
+# Set multiple instances on selected model to test instance-wise collection
+# and accumulation.
+echo "instance_group [{ count: 2; kind: KIND_GPU }]" >> models/resnet152_plan/config.pbtxt
+echo "instance_group [{ count: 2; kind: KIND_GPU }]" >> models/densenet/config.pbtxt
+
+# testing use nvidia-smi for Python to validate the reported usage
+pip install nvidia-ml-py3
+
+# Start server to load all models (in parallel), then gradually unload
+# the models and expect the memory usage changes matches what are reported
+# in statistic.
+SERVER_ARGS="--backend-config=triton-backend-memory-tracker=true --model-repository=models --model-control-mode=explicit --load-model=*"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python $TEST_PY > $TEST_LOG 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+fi
+set -e
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $SERVER_LOG
+    cat $TEST_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_dlpack_multi_gpu/test.sh b/qa/L0_dlpack_multi_gpu/test.sh
new file mode 100755
index 0000000000..996f062f42
--- /dev/null
+++ b/qa/L0_dlpack_multi_gpu/test.sh
@@ -0,0 +1,86 @@
+#!/bin/bash
+# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1"
+CLIENT_PY=./python_unittest.py
+CLIENT_LOG="./client.log"
+EXPECTED_NUM_TESTS="1"
+TEST_RESULT_FILE='test_results.txt'
+SERVER_LOG="./inference_server.log"
+export CUDA_VISIBLE_DEVICES=0,1,2,3
+
+RET=0
+rm -fr *.log ./models
+
+source ../common/util.sh
+
+# Uninstall the non CUDA version of PyTorch
+pip3 uninstall -y torch
+pip3 install torch==1.13.0+cu117 -f https://download.pytorch.org/whl/torch_stable.html
+pip3 install tensorflow
+
+# Install CuPy for testing non_blocking compute streams
+pip3 install cupy-cuda12x
+
+rm -fr *.log ./models
+
+mkdir -p models/dlpack_test/1/
+cp ../python_models/dlpack_test/model.py models/dlpack_test/1/
+cp ../python_models/dlpack_test/config.pbtxt models/dlpack_test
+cp ../L0_backend_python/python_unittest.py .
+sed -i 's#sys.path.append("../../common")#sys.path.append("../common")#g' python_unittest.py
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    RET=1
+fi
+
+set +e
+export MODEL_NAME="dlpack_test"
+python3 -m pytest --junitxml=dlpack_multi_gpu.report.xml $CLIENT_PY > $CLIENT_LOG 2>&1
+
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** python_unittest.py FAILED. \n***"
+    RET=1
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 1 ]; then
+    cat $CLIENT_LOG
+    cat $SERVER_LOG
+    echo -e "\n***\n*** dlpack_multi_gpu test FAILED. \n***"
+else
+    echo -e "\n***\n*** dlpack_multi_gpu test PASSED. \n***"
+fi
+
+exit $RET
diff --git a/qa/L0_doc_links/mkdocs.yml b/qa/L0_doc_links/mkdocs.yml
new file mode 100644
index 0000000000..1588680d92
--- /dev/null
+++ b/qa/L0_doc_links/mkdocs.yml
@@ -0,0 +1,44 @@
+# Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+site_name: CI Test
+use_directory_urls: False
+docs_dir: "./repos"
+plugins:
+        - htmlproofer
+        - search
diff --git a/qa/L0_doc_links/test.sh b/qa/L0_doc_links/test.sh
new file mode 100755
index 0000000000..e30ddd59eb
--- /dev/null
+++ b/qa/L0_doc_links/test.sh
@@ -0,0 +1,77 @@
+#!/bin/bash
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+LOG="`pwd`/log.txt"
+CONFIG="`pwd`/mkdocs.yml"
+RET=0
+# Download necessary packages
+python3 -m pip install mkdocs
+python3 -m pip install mkdocs-htmlproofer-plugin
+
+# Get the necessary repos
+mkdir repos && cd repos
+TRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION:="http://github.com/triton-inference-server"}
+TRITON_BACKEND_REPO_TAG=${TRITON_BACKEND_REPO_TAG:="main"}
+echo ${TRITON_BACKEND_REPO_TAG}
+git clone --single-branch --depth=1 -b ${TRITON_BACKEND_REPO_TAG} ${TRITON_REPO_ORGANIZATION}/backend.git
+cd ..
+
+exec mkdocs serve -f $CONFIG > $LOG &
+PID=$!
+# Time for the compilation to finish. This needs to be increased if other repos
+# are added to the test
+sleep 20
+
+until [[ (-z `pgrep mkdocs`) ]]; do
+    kill -2 $PID
+    sleep 2
+done
+
+if [[ ! -z `grep "invalid url" $LOG` ]]; then
+    cat $LOG
+    RET=1
+fi
+
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test PASSED\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+# exit $RET
diff --git a/qa/L0_docs/test.sh b/qa/L0_docs/test.sh
deleted file mode 100755
index a7f9b3ae38..0000000000
--- a/qa/L0_docs/test.sh
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-TEST_LOG="./docs.log"
-
-rm -f $TEST_LOG
-RET=0
-
-apt-get update && \
-    apt-get install -y --no-install-recommends doxygen && \
-    pip install --upgrade sphinx sphinx-rtd-theme nbsphinx exhale && \
-    pip install --upgrade /opt/tensorrtserver/pip/tensorrtserver-*.whl
-
-set +e
-
-(cd /workspace/docs && \
-        make BUILDDIR=/opt/tensorrtserver/qa/L0_docs/build clean html) > $TEST_LOG 2>&1
-if [ $? -ne 0 ]; then
-    RET=1
-fi
-
-set -e
-
-if [ $RET -eq 0 ]; then
-    echo -e "\n***\n*** Test Passed\n***"
-else
-    cat $TEST_LOG
-    echo -e "\n***\n*** Test FAILED\n***"
-fi
-
-exit $RET
diff --git a/qa/L0_dyna_implicit_state/test.sh b/qa/L0_dyna_implicit_state/test.sh
new file mode 100755
index 0000000000..0721d5cd32
--- /dev/null
+++ b/qa/L0_dyna_implicit_state/test.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export ENSEMBLES=0
+BACKENDS=${BACKENDS:="onnx plan"}
+export BACKENDS
+export IMPLICIT_STATE=1
+
+(cd ../L0_dyna_sequence_batcher/ && bash -ex test.sh $REPO_VERSION)
+RET=$?
+
+if [ $RET == 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_dyna_sequence_batcher/dyna_sequence_batcher_test.py b/qa/L0_dyna_sequence_batcher/dyna_sequence_batcher_test.py
new file mode 100755
index 0000000000..f2c709469b
--- /dev/null
+++ b/qa/L0_dyna_sequence_batcher/dyna_sequence_batcher_test.py
@@ -0,0 +1,1299 @@
+#!/usr/bin/env python3
+
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import os
+import threading
+import time
+import unittest
+from builtins import str
+
+import numpy as np
+import sequence_util as su
+import test_util as tu
+
+_test_system_shared_memory = bool(int(os.environ.get("TEST_SYSTEM_SHARED_MEMORY", 0)))
+_test_cuda_shared_memory = bool(int(os.environ.get("TEST_CUDA_SHARED_MEMORY", 0)))
+
+NO_BATCHING = int(os.environ.get("NO_BATCHING", 0)) == 1
+BACKENDS = os.environ.get(
+    "BACKENDS", "graphdef savedmodel libtorch onnx plan custom custom_string"
+)
+IMPLICIT_STATE = int(os.environ["IMPLICIT_STATE"]) == 1
+
+_trials = BACKENDS.split(" ")
+for backend in BACKENDS.split(" "):
+    if NO_BATCHING:
+        if (backend != "custom") and (backend != "custom_string"):
+            _trials += (backend + "_nobatch",)
+
+_ragged_batch_supported_trials = []
+if "custom" in BACKENDS.split(" "):
+    _ragged_batch_supported_trials.append("custom")
+
+_protocols = ("http", "grpc")
+_max_sequence_idle_ms = 5000
+
+
+class DynaSequenceBatcherTest(su.SequenceBatcherTestUtil):
+    def get_datatype(self, trial):
+        return np.int32
+
+    def get_expected_result(self, expected_result, corrid, value, trial, flag_str=None):
+        # Adjust the expected_result for models that
+        # could not implement the full accumulator. See
+        # qa/common/gen_qa_dyna_sequence_models.py for more
+        # information.
+        if (
+            (("nobatch" not in trial) and ("custom" not in trial))
+            or ("graphdef" in trial)
+            or ("plan" in trial)
+            or ("onnx" in trial)
+            or ("libtorch" in trial)
+        ):
+            expected_result = value
+            if flag_str is not None:
+                if "start" in flag_str:
+                    expected_result += 1
+                if "end" in flag_str:
+                    if isinstance(corrid, str):
+                        expected_result += int(corrid)
+                    else:
+                        expected_result += corrid
+        return expected_result
+
+    def get_expected_result_implicit(
+        self, expected_result, corrid, value, trial, flag_str=None
+    ):
+        return expected_result
+
+    def test_simple_sequence(self):
+        # Send one sequence and check for correct accumulator
+        # result. The result should be returned immediately.
+        for trial in _trials:
+            # Run on different protocols.
+            for idx, protocol in enumerate(_protocols):
+                self.clear_deferred_exceptions()
+                try:
+                    dtype = self.get_datatype(trial)
+                    model_name = tu.get_dyna_sequence_model_name(trial, dtype)
+
+                    self.check_setup(model_name)
+                    self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
+                    self.assertNotIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
+
+                    if "string" in trial:
+                        corrid = "52"
+                    else:
+                        corrid = 52
+
+                    expected_result = (
+                        self.get_expected_result(
+                            45 + int(corrid), corrid, 9, trial, "end"
+                        )
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            45, corrid, 9, trial, "end"
+                        )
+                    )
+
+                    self.check_sequence(
+                        trial,
+                        model_name,
+                        dtype,
+                        corrid,
+                        (4000, None),
+                        # (flag_str, value, (ls_ms, gt_ms), (pre_delay, post_delay))
+                        (
+                            ("start", 1, None, None),
+                            (None, 2, None, None),
+                            (None, 3, None, None),
+                            (None, 4, None, None),
+                            (None, 5, None, None),
+                            (None, 6, None, None),
+                            (None, 7, None, None),
+                            (None, 8, None, None),
+                            ("end", 9, None, None),
+                        ),
+                        expected_result,
+                        protocol,
+                        sequence_name="{}_{}".format(self._testMethodName, protocol),
+                    )
+
+                    self.check_deferred_exception()
+                    self.check_status(
+                        model_name, {1: 9 * (idx + 1)}, 9 * (idx + 1), 9 * (idx + 1)
+                    )
+                except Exception as ex:
+                    self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_length1_sequence(self):
+        # Send a length-1 sequence and check for correct accumulator
+        # result. The result should be returned immediately.
+        for trial in _trials:
+            # Run on different protocols.
+            for idx, protocol in enumerate(_protocols):
+                self.clear_deferred_exceptions()
+                try:
+                    dtype = self.get_datatype(trial)
+                    model_name = tu.get_dyna_sequence_model_name(trial, dtype)
+
+                    self.check_setup(model_name)
+                    self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
+                    self.assertNotIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
+
+                    if "string" in trial:
+                        corrid = "99"
+                    else:
+                        corrid = 99
+
+                    expected_result = (
+                        self.get_expected_result(
+                            42 + int(corrid), corrid, 42, trial, "start,end"
+                        )
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            42, corrid, 42, trial, "start,end"
+                        )
+                    )
+
+                    self.check_sequence(
+                        trial,
+                        model_name,
+                        dtype,
+                        corrid,
+                        (4000, None),
+                        # (flag_str, value, (ls_ms, gt_ms), (pre_delay, post_delay))
+                        (("start,end", 42, None, None),),
+                        expected_result,
+                        protocol,
+                        sequence_name="{}_{}".format(self._testMethodName, protocol),
+                    )
+
+                    self.check_deferred_exception()
+                    self.check_status(model_name, {1: (idx + 1)}, (idx + 1), (idx + 1))
+                except Exception as ex:
+                    self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def _multi_sequence_impl(
+        self, trials, expected_batch_exec, expected_exec_cnt, sleep_secs, tensor_shapes
+    ):
+        for trial in trials:
+            self.clear_deferred_exceptions()
+            dtype = self.get_datatype(trial)
+            precreated_shm0_handles = self.precreate_register_regions(
+                (1, 3), dtype, 0, tensor_shape=(tensor_shapes[0],)
+            )
+            precreated_shm1_handles = self.precreate_register_regions(
+                (11, 12, 13), dtype, 1, tensor_shape=(tensor_shapes[1],)
+            )
+            precreated_shm2_handles = self.precreate_register_regions(
+                (111, 112, 113), dtype, 2, tensor_shape=(tensor_shapes[2],)
+            )
+            precreated_shm3_handles = self.precreate_register_regions(
+                (1111, 1112, 1113), dtype, 3, tensor_shape=(tensor_shapes[3],)
+            )
+            try:
+                model_name = tu.get_dyna_sequence_model_name(trial, dtype)
+
+                self.check_setup(model_name)
+                self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
+                self.assertNotIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
+
+                if "string" in trial:
+                    corrids = ["1001", "1002", "1003", "1004"]
+                else:
+                    corrids = [1001, 1002, 1003, 1004]
+
+                expected_result = (
+                    self.get_expected_result(
+                        4 * tensor_shapes[0] + int(corrids[0]),
+                        corrids[0],
+                        3,
+                        trial,
+                        "end",
+                    )
+                    if not IMPLICIT_STATE
+                    else self.get_expected_result_implicit(
+                        4, corrids[0], 3, trial, "end"
+                    )
+                )
+
+                threads = []
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_async,
+                        args=(
+                            trial,
+                            model_name,
+                            dtype,
+                            corrids[0],
+                            (None, None),
+                            # (flag_str, value, pre_delay_ms)
+                            (("start", 1, None), ("end", 3, None)),
+                            expected_result,
+                            precreated_shm0_handles,
+                        ),
+                        kwargs={
+                            "sequence_name": "{}_{}".format(
+                                self._testMethodName, corrids[0]
+                            ),
+                            "tensor_shape": (tensor_shapes[0],),
+                        },
+                    )
+                )
+
+                expected_result = (
+                    self.get_expected_result(
+                        36 * tensor_shapes[1] + int(corrids[1]),
+                        corrids[1],
+                        13,
+                        trial,
+                        "end",
+                    )
+                    if not IMPLICIT_STATE
+                    else self.get_expected_result_implicit(
+                        36, corrids[1], 13, trial, "end"
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_async,
+                        args=(
+                            trial,
+                            model_name,
+                            dtype,
+                            corrids[1],
+                            (None, None),
+                            # (flag_str, value, pre_delay_ms)
+                            (("start", 11, None), (None, 12, None), ("end", 13, None)),
+                            expected_result,
+                            precreated_shm1_handles,
+                        ),
+                        kwargs={
+                            "sequence_name": "{}_{}".format(
+                                self._testMethodName, corrids[1]
+                            ),
+                            "tensor_shape": (tensor_shapes[1],),
+                        },
+                    )
+                )
+
+                expected_result = (
+                    self.get_expected_result(
+                        336 * tensor_shapes[2] + int(corrids[2]),
+                        corrids[2],
+                        113,
+                        trial,
+                        "end",
+                    )
+                    if not IMPLICIT_STATE
+                    else self.get_expected_result_implicit(
+                        336, corrids[2], 113, trial, "end"
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_async,
+                        args=(
+                            trial,
+                            model_name,
+                            dtype,
+                            corrids[2],
+                            (None, None),
+                            # (flag_str, value, pre_delay_ms)
+                            (
+                                ("start", 111, None),
+                                (None, 112, None),
+                                ("end", 113, None),
+                            ),
+                            expected_result,
+                            precreated_shm2_handles,
+                        ),
+                        kwargs={
+                            "sequence_name": "{}_{}".format(
+                                self._testMethodName, corrids[2]
+                            ),
+                            "tensor_shape": (tensor_shapes[2],),
+                        },
+                    )
+                )
+                expected_result = (
+                    self.get_expected_result(
+                        3336 * tensor_shapes[3] + int(corrids[3]),
+                        corrids[3],
+                        1113,
+                        trial,
+                        "end",
+                    )
+                    if not IMPLICIT_STATE
+                    else self.get_expected_result_implicit(
+                        3336, corrids[3], 1113, trial, "end"
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_async,
+                        args=(
+                            trial,
+                            model_name,
+                            dtype,
+                            corrids[3],
+                            (None, None),
+                            # (flag_str, value, pre_delay_ms)
+                            (
+                                ("start", 1111, None),
+                                (None, 1112, None),
+                                ("end", 1113, None),
+                            ),
+                            expected_result,
+                            precreated_shm3_handles,
+                        ),
+                        kwargs={
+                            "sequence_name": "{}_{}".format(
+                                self._testMethodName, corrids[3]
+                            ),
+                            "tensor_shape": (tensor_shapes[3],),
+                        },
+                    )
+                )
+
+                for t in threads:
+                    t.start()
+                    if sleep_secs > 0:
+                        time.sleep(sleep_secs)
+                for t in threads:
+                    t.join()
+                self.check_deferred_exception()
+                self.check_status(
+                    model_name, expected_batch_exec, expected_exec_cnt, 11
+                )
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+            finally:
+                if _test_system_shared_memory or _test_cuda_shared_memory:
+                    self.cleanup_shm_regions(precreated_shm0_handles)
+                    self.cleanup_shm_regions(precreated_shm1_handles)
+                    self.cleanup_shm_regions(precreated_shm2_handles)
+                    self.cleanup_shm_regions(precreated_shm3_handles)
+
+    def test_multi_sequence(self):
+        # Send four sequences in series and make sure they get
+        # batched correctly.
+        self._multi_sequence_impl(_trials, {4: 2, 3: 1}, 3, 1, (1, 1, 1, 1))
+
+    def test_multi_parallel_sequence(self):
+        # Send four sequences in parallel and make sure they get
+        # batched correctly.
+        self._multi_sequence_impl(_trials, {4: 2, 3: 1}, 3, 0, (1, 1, 1, 1))
+
+    def test_multi_sequence_different_shape(self):
+        # Send four sequences in parallel where the requests in each
+        # sequence have different shape. Sequences should not be
+        # batched due to input tensor size differences.
+        self._multi_sequence_impl(
+            _ragged_batch_supported_trials, {1: 11}, 11, 0, (4, 3, 1, 2)
+        )
+
+    def test_multi_sequence_different_shape_allow_ragged(self):
+        # Send four sequences in parallel where the requests in each
+        # sequence have different shape. Input is marked as allowing
+        # ragged and so sequences should be batched even with input
+        # tensor size differences.
+        self._multi_sequence_impl(
+            _ragged_batch_supported_trials, {4: 2, 3: 1}, 3, 1, (4, 3, 1, 2)
+        )
+
+    def test_backlog(self):
+        # Send 5 equal-length sequences in parallel and make sure they
+        # get completely batched into batch-size 4 inferences plus the
+        # 5th should go in the backlog and then get handled once there
+        # is a free slot.
+        for trial in _trials:
+            self.clear_deferred_exceptions()
+            dtype = self.get_datatype(trial)
+            precreated_shm0_handles = self.precreate_register_regions(
+                (1, 2, 3), dtype, 0
+            )
+            precreated_shm1_handles = self.precreate_register_regions(
+                (11, 12, 13), dtype, 1
+            )
+            precreated_shm2_handles = self.precreate_register_regions(
+                (111, 112, 113), dtype, 2
+            )
+            precreated_shm3_handles = self.precreate_register_regions(
+                (1111, 1112, 1113), dtype, 3
+            )
+            precreated_shm4_handles = self.precreate_register_regions(
+                (11111, 11112, 11113), dtype, 4
+            )
+            try:
+                model_name = tu.get_dyna_sequence_model_name(trial, dtype)
+
+                self.check_setup(model_name)
+                self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
+                self.assertNotIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
+
+                if "string" in trial:
+                    corrids = ["1001", "1002", "1003", "1004", "1005"]
+                else:
+                    corrids = [1001, 1002, 1003, 1004, 1005]
+
+                expected_result = (
+                    self.get_expected_result(
+                        6 + int(corrids[0]), corrids[0], 3, trial, "end"
+                    )
+                    if not IMPLICIT_STATE
+                    else self.get_expected_result_implicit(
+                        6, corrids[0], 3, trial, "end"
+                    )
+                )
+
+                threads = []
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_async,
+                        args=(
+                            trial,
+                            model_name,
+                            dtype,
+                            corrids[0],
+                            (None, None),
+                            # (flag_str, value, pre_delay_ms)
+                            (("start", 1, None), (None, 2, None), ("end", 3, None)),
+                            expected_result,
+                            precreated_shm0_handles,
+                        ),
+                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                    )
+                )
+
+                expected_result = (
+                    self.get_expected_result(
+                        36 + int(corrids[1]), corrids[1], 13, trial, "end"
+                    )
+                    if not IMPLICIT_STATE
+                    else self.get_expected_result_implicit(
+                        36, corrids[1], 13, trial, "end"
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_async,
+                        args=(
+                            trial,
+                            model_name,
+                            dtype,
+                            corrids[1],
+                            (None, None),
+                            # (flag_str, value, pre_delay_ms)
+                            (("start", 11, None), (None, 12, None), ("end", 13, None)),
+                            expected_result,
+                            precreated_shm1_handles,
+                        ),
+                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                    )
+                )
+
+                expected_result = (
+                    self.get_expected_result(
+                        336 + int(corrids[2]), corrids[2], 113, trial, "end"
+                    )
+                    if not IMPLICIT_STATE
+                    else self.get_expected_result_implicit(
+                        336, corrids[2], 113, trial, "end"
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_async,
+                        args=(
+                            trial,
+                            model_name,
+                            dtype,
+                            corrids[2],
+                            (None, None),
+                            # (flag_str, value, pre_delay_ms)
+                            (
+                                ("start", 111, None),
+                                (None, 112, None),
+                                ("end", 113, None),
+                            ),
+                            expected_result,
+                            precreated_shm2_handles,
+                        ),
+                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                    )
+                )
+
+                expected_result = (
+                    self.get_expected_result(
+                        3336 + int(corrids[3]), corrids[3], 1113, trial, "end"
+                    )
+                    if not IMPLICIT_STATE
+                    else self.get_expected_result_implicit(
+                        3336, corrids[3], 1113, trial, "end"
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_async,
+                        args=(
+                            trial,
+                            model_name,
+                            dtype,
+                            corrids[3],
+                            (None, None),
+                            # (flag_str, value, pre_delay_ms)
+                            (
+                                ("start", 1111, None),
+                                (None, 1112, None),
+                                ("end", 1113, None),
+                            ),
+                            expected_result,
+                            precreated_shm3_handles,
+                        ),
+                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                    )
+                )
+
+                expected_result = (
+                    self.get_expected_result(
+                        33336 + int(corrids[4]), corrids[4], 11113, trial, "end"
+                    )
+                    if not IMPLICIT_STATE
+                    else self.get_expected_result_implicit(
+                        33336, corrids[4], 11113, trial, "end"
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_async,
+                        args=(
+                            trial,
+                            model_name,
+                            dtype,
+                            corrids[4],
+                            (None, None),
+                            # (flag_str, value, pre_delay_ms)
+                            (
+                                ("start", 11111, None),
+                                (None, 11112, None),
+                                ("end", 11113, None),
+                            ),
+                            expected_result,
+                            precreated_shm4_handles,
+                        ),
+                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                    )
+                )
+
+                for t in threads:
+                    t.start()
+                for t in threads:
+                    t.join()
+                self.check_deferred_exception()
+                self.check_status(model_name, {4: 3, 1: 3}, 6, 15)
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+            finally:
+                if _test_system_shared_memory or _test_cuda_shared_memory:
+                    self.cleanup_shm_regions(precreated_shm0_handles)
+                    self.cleanup_shm_regions(precreated_shm1_handles)
+                    self.cleanup_shm_regions(precreated_shm2_handles)
+                    self.cleanup_shm_regions(precreated_shm3_handles)
+                    self.cleanup_shm_regions(precreated_shm4_handles)
+
+    def test_backlog_fill(self):
+        # Send 4 sequences in parallel, two of which are shorter. Send
+        # 2 additional sequences that should go into backlog but
+        # should immediately fill into the short sequences.
+        for trial in _trials:
+            self.clear_deferred_exceptions()
+            dtype = self.get_datatype(trial)
+            precreated_shm0_handles = self.precreate_register_regions(
+                (1, 2, 3), dtype, 0
+            )
+            precreated_shm1_handles = self.precreate_register_regions(
+                (11, 13), dtype, 1
+            )
+            precreated_shm2_handles = self.precreate_register_regions(
+                (111, 113), dtype, 2
+            )
+            precreated_shm3_handles = self.precreate_register_regions(
+                (1111, 1112, 1113), dtype, 3
+            )
+            precreated_shm4_handles = self.precreate_register_regions(
+                (11111,), dtype, 4
+            )
+            precreated_shm5_handles = self.precreate_register_regions(
+                (22222,), dtype, 5
+            )
+            try:
+                model_name = tu.get_dyna_sequence_model_name(trial, dtype)
+
+                self.check_setup(model_name)
+                self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
+                self.assertNotIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
+                if "string" in trial:
+                    corrids = ["1001", "1002", "1003", "1004", "1005", "1006"]
+                else:
+                    corrids = [1001, 1002, 1003, 1004, 1005, 1006]
+                threads = []
+
+                expected_result = (
+                    self.get_expected_result(
+                        6 + int(corrids[0]), corrids[0], 3, trial, "end"
+                    )
+                    if not IMPLICIT_STATE
+                    else self.get_expected_result_implicit(
+                        6, corrids[0], 3, trial, "end"
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_async,
+                        args=(
+                            trial,
+                            model_name,
+                            dtype,
+                            corrids[0],
+                            (None, None),
+                            # (flag_str, value, pre_delay_ms)
+                            (("start", 1, None), (None, 2, None), ("end", 3, None)),
+                            expected_result,
+                            precreated_shm0_handles,
+                        ),
+                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                    )
+                )
+                expected_result = (
+                    self.get_expected_result(
+                        24 + int(corrids[1]), corrids[1], 13, trial, "end"
+                    )
+                    if not IMPLICIT_STATE
+                    else self.get_expected_result_implicit(
+                        24, corrids[1], 13, trial, "end"
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_async,
+                        args=(
+                            trial,
+                            model_name,
+                            dtype,
+                            corrids[1],
+                            (None, None),
+                            # (flag_str, value, pre_delay_ms)
+                            (("start", 11, None), ("end", 13, None)),
+                            expected_result,
+                            precreated_shm1_handles,
+                        ),
+                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                    )
+                )
+                expected_result = (
+                    self.get_expected_result(
+                        224 + int(corrids[2]), corrids[2], 113, trial, "end"
+                    )
+                    if not IMPLICIT_STATE
+                    else self.get_expected_result_implicit(
+                        224, corrids[2], 113, trial, "end"
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_async,
+                        args=(
+                            trial,
+                            model_name,
+                            dtype,
+                            corrids[2],
+                            (None, None),
+                            # (flag_str, value, pre_delay_ms)
+                            (("start", 111, None), ("end", 113, None)),
+                            expected_result,
+                            precreated_shm2_handles,
+                        ),
+                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                    )
+                )
+                expected_result = (
+                    self.get_expected_result(
+                        3336 + int(corrids[3]), corrids[3], 1113, trial, "end"
+                    )
+                    if not IMPLICIT_STATE
+                    else self.get_expected_result_implicit(
+                        3336, corrids[3], 1113, trial, "end"
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_async,
+                        args=(
+                            trial,
+                            model_name,
+                            dtype,
+                            corrids[3],
+                            (None, None),
+                            # (flag_str, value, pre_delay_ms)
+                            (
+                                ("start", 1111, None),
+                                (None, 1112, 3000),
+                                ("end", 1113, None),
+                            ),
+                            expected_result,
+                            precreated_shm3_handles,
+                        ),
+                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                    )
+                )
+                expected_result = (
+                    self.get_expected_result(
+                        11111 + int(corrids[4]), corrids[4], 11111, trial, "start,end"
+                    )
+                    if not IMPLICIT_STATE
+                    else self.get_expected_result_implicit(
+                        11111, corrids[4], 11111, trial, "start,end"
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_async,
+                        args=(
+                            trial,
+                            model_name,
+                            dtype,
+                            corrids[4],
+                            (None, None),
+                            # (flag_str, value, pre_delay_ms)
+                            (("start,end", 11111, None),),
+                            expected_result,
+                            precreated_shm4_handles,
+                        ),
+                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                    )
+                )
+                expected_result = (
+                    self.get_expected_result(
+                        22222 + int(corrids[5]), corrids[5], 22222, trial, "start,end"
+                    )
+                    if not IMPLICIT_STATE
+                    else self.get_expected_result_implicit(
+                        22222, corrids[5], 22222, trial, "start,end"
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_async,
+                        args=(
+                            trial,
+                            model_name,
+                            dtype,
+                            corrids[5],
+                            (None, None),
+                            # (flag_str, value, pre_delay_ms)
+                            (("start,end", 22222, None),),
+                            expected_result,
+                            precreated_shm5_handles,
+                        ),
+                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                    )
+                )
+
+                threads[0].start()
+                threads[1].start()
+                threads[2].start()
+                threads[3].start()
+                time.sleep(2)
+                threads[4].start()
+                threads[5].start()
+                for t in threads:
+                    t.join()
+                self.check_deferred_exception()
+                self.check_status(model_name, {4: 3}, 3, 12)
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+            finally:
+                if _test_system_shared_memory or _test_cuda_shared_memory:
+                    self.cleanup_shm_regions(precreated_shm0_handles)
+                    self.cleanup_shm_regions(precreated_shm1_handles)
+                    self.cleanup_shm_regions(precreated_shm2_handles)
+                    self.cleanup_shm_regions(precreated_shm3_handles)
+                    self.cleanup_shm_regions(precreated_shm4_handles)
+                    self.cleanup_shm_regions(precreated_shm5_handles)
+
+    def test_backlog_fill_no_end(self):
+        # Send 4 sequences in parallel, two of which are shorter. Send
+        # 2 additional sequences that should go into backlog but
+        # should immediately fill into the short sequences. One of
+        # those sequences is filled before it gets its end request.
+        for trial in _trials:
+            self.clear_deferred_exceptions()
+            dtype = self.get_datatype(trial)
+            precreated_shm0_handles = self.precreate_register_regions(
+                (1, 2, 3), dtype, 0
+            )
+            precreated_shm1_handles = self.precreate_register_regions(
+                (11, 13), dtype, 1
+            )
+            precreated_shm2_handles = self.precreate_register_regions(
+                (111, 113), dtype, 2
+            )
+            precreated_shm3_handles = self.precreate_register_regions(
+                (1111, 1112, 1113), dtype, 3
+            )
+            precreated_shm4_handles = self.precreate_register_regions(
+                (11111,), dtype, 4
+            )
+            precreated_shm5_handles = self.precreate_register_regions(
+                (22222, 22223, 22224), dtype, 5
+            )
+            try:
+                model_name = tu.get_dyna_sequence_model_name(trial, dtype)
+
+                self.check_setup(model_name)
+                self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
+                self.assertNotIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
+
+                if "string" in trial:
+                    corrids = ["1001", "1002", "1003", "1004", "1005", "1006"]
+                else:
+                    corrids = [1001, 1002, 1003, 1004, 1005, 1006]
+                threads = []
+                expected_result = (
+                    self.get_expected_result(
+                        6 + int(corrids[0]), corrids[0], 3, trial, "end"
+                    )
+                    if not IMPLICIT_STATE
+                    else self.get_expected_result_implicit(
+                        6, corrids[0], 3, trial, "end"
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_async,
+                        args=(
+                            trial,
+                            model_name,
+                            dtype,
+                            corrids[0],
+                            (None, None),
+                            # (flag_str, value, pre_delay_ms)
+                            (("start", 1, None), (None, 2, None), ("end", 3, None)),
+                            expected_result,
+                            precreated_shm0_handles,
+                        ),
+                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                    )
+                )
+                expected_result = (
+                    self.get_expected_result(
+                        24 + int(corrids[1]), corrids[1], 13, trial, "end"
+                    )
+                    if not IMPLICIT_STATE
+                    else self.get_expected_result_implicit(
+                        24, corrids[1], 13, trial, "end"
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_async,
+                        args=(
+                            trial,
+                            model_name,
+                            dtype,
+                            corrids[1],
+                            (None, None),
+                            # (flag_str, value, pre_delay_ms)
+                            (("start", 11, None), ("end", 13, None)),
+                            expected_result,
+                            precreated_shm1_handles,
+                        ),
+                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                    )
+                )
+                expected_result = (
+                    self.get_expected_result(
+                        224 + int(corrids[2]), corrids[2], 113, trial, "end"
+                    )
+                    if not IMPLICIT_STATE
+                    else self.get_expected_result_implicit(
+                        224, corrids[2], 113, trial, "end"
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_async,
+                        args=(
+                            trial,
+                            model_name,
+                            dtype,
+                            corrids[2],
+                            (None, None),
+                            # (flag_str, value, pre_delay_ms)
+                            (("start", 111, None), ("end", 113, None)),
+                            expected_result,
+                            precreated_shm2_handles,
+                        ),
+                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                    )
+                )
+                expected_result = (
+                    self.get_expected_result(
+                        3336 + int(corrids[3]), corrids[3], 1113, trial, "end"
+                    )
+                    if not IMPLICIT_STATE
+                    else self.get_expected_result_implicit(
+                        3336, corrids[3], 1113, trial, "end"
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_async,
+                        args=(
+                            trial,
+                            model_name,
+                            dtype,
+                            corrids[3],
+                            (None, None),
+                            # (flag_str, value, pre_delay_ms)
+                            (
+                                ("start", 1111, None),
+                                (None, 1112, 3000),
+                                ("end", 1113, None),
+                            ),
+                            expected_result,
+                            precreated_shm3_handles,
+                        ),
+                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                    )
+                )
+                expected_result = (
+                    self.get_expected_result(
+                        11111 + int(corrids[4]), corrids[4], 11111, trial, "start,end"
+                    )
+                    if not IMPLICIT_STATE
+                    else self.get_expected_result_implicit(
+                        11111, corrids[4], 11111, trial, "start,end"
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_async,
+                        args=(
+                            trial,
+                            model_name,
+                            dtype,
+                            corrids[4],
+                            (None, None),
+                            # (flag_str, value, pre_delay_ms)
+                            (("start,end", 11111, None),),
+                            expected_result,
+                            precreated_shm4_handles,
+                        ),
+                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                    )
+                )
+                expected_result = (
+                    self.get_expected_result(
+                        66669 + int(corrids[5]), corrids[5], 22224, trial, "end"
+                    )
+                    if not IMPLICIT_STATE
+                    else self.get_expected_result_implicit(
+                        66669, corrids[5], 22224, trial, "end"
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_async,
+                        args=(
+                            trial,
+                            model_name,
+                            dtype,
+                            corrids[5],
+                            (None, None),
+                            # (flag_str, value, pre_delay_ms)
+                            (
+                                ("start", 22222, None),
+                                (None, 22223, None),
+                                ("end", 22224, 2000),
+                            ),
+                            expected_result,
+                            precreated_shm5_handles,
+                        ),
+                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                    )
+                )
+
+                threads[0].start()
+                threads[1].start()
+                threads[2].start()
+                threads[3].start()
+                time.sleep(2)
+                threads[4].start()
+                threads[5].start()
+                for t in threads:
+                    t.join()
+                self.check_deferred_exception()
+                # Expecting the requests of the same sequence to be in the same
+                # slot, so the execution for thelast long sequence will be
+                # padded to a batch.
+                self.check_status(model_name, {4: 3, 1: 2}, 5, 14)
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+            finally:
+                if _test_system_shared_memory or _test_cuda_shared_memory:
+                    self.cleanup_shm_regions(precreated_shm0_handles)
+                    self.cleanup_shm_regions(precreated_shm1_handles)
+                    self.cleanup_shm_regions(precreated_shm2_handles)
+                    self.cleanup_shm_regions(precreated_shm3_handles)
+                    self.cleanup_shm_regions(precreated_shm4_handles)
+                    self.cleanup_shm_regions(precreated_shm5_handles)
+
+    def test_backlog_sequence_timeout(self):
+        # Send 4 sequences in parallel and make sure they get
+        # completely batched into batch-size 4 inferences. One of the
+        # sequences has a long delay that causes it to timeout and
+        # that allows a 5th sequence to come out of the backlog and
+        # finish. The timed-out sequence will then send the delayed
+        # inference but it will appear as a new sequence and so fail
+        # because it doesn't have the START flag.
+        for trial in _trials:
+            self.clear_deferred_exceptions()
+            dtype = self.get_datatype(trial)
+            precreated_shm0_handles = self.precreate_register_regions((1, 3), dtype, 0)
+            precreated_shm1_handles = self.precreate_register_regions(
+                (11, 12, 12, 13), dtype, 1
+            )
+            precreated_shm2_handles = self.precreate_register_regions(
+                (111, 112, 112, 113), dtype, 2
+            )
+            precreated_shm3_handles = self.precreate_register_regions(
+                (1111, 1112, 1112, 1113), dtype, 3
+            )
+            precreated_shm4_handles = self.precreate_register_regions(
+                (11111, 11113), dtype, 4
+            )
+            try:
+                model_name = tu.get_dyna_sequence_model_name(trial, dtype)
+
+                self.check_setup(model_name)
+                self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
+                self.assertNotIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
+
+                if "string" in trial:
+                    corrids = ["1001", "1002", "1003", "1004", "1005"]
+                else:
+                    corrids = [1001, 1002, 1003, 1004, 1005]
+                threads = []
+                expected_result = (
+                    self.get_expected_result(
+                        4 + int(corrids[0]), corrids[0], 3, trial, None
+                    )
+                    if not IMPLICIT_STATE
+                    else self.get_expected_result_implicit(
+                        4, corrids[0], 3, trial, None
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_async,
+                        args=(
+                            trial,
+                            model_name,
+                            dtype,
+                            corrids[0],
+                            (None, None),
+                            # (flag_str, value, pre_delay_ms)
+                            (
+                                ("start", 1, None),
+                                (None, 3, _max_sequence_idle_ms + 1000),
+                            ),
+                            expected_result,
+                            precreated_shm0_handles,
+                        ),
+                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                    )
+                )
+                expected_result = (
+                    self.get_expected_result(
+                        48 + int(corrids[1]), corrids[1], 13, trial, None
+                    )
+                    if not IMPLICIT_STATE
+                    else self.get_expected_result_implicit(
+                        48, corrids[1], 13, trial, None
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_async,
+                        args=(
+                            trial,
+                            model_name,
+                            dtype,
+                            corrids[1],
+                            (None, None),
+                            # (flag_str, value, pre_delay_ms)
+                            (
+                                ("start", 11, None),
+                                (None, 12, _max_sequence_idle_ms / 2),
+                                (None, 12, _max_sequence_idle_ms / 2),
+                                ("end", 13, _max_sequence_idle_ms / 2),
+                            ),
+                            expected_result,
+                            precreated_shm1_handles,
+                        ),
+                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                    )
+                )
+                expected_result = (
+                    self.get_expected_result(
+                        448 + int(corrids[2]), corrids[2], 113, trial, None
+                    )
+                    if not IMPLICIT_STATE
+                    else self.get_expected_result_implicit(
+                        448, corrids[2], 113, trial, None
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_async,
+                        args=(
+                            trial,
+                            model_name,
+                            dtype,
+                            corrids[2],
+                            (None, None),
+                            # (flag_str, value, pre_delay_ms)
+                            (
+                                ("start", 111, None),
+                                (None, 112, _max_sequence_idle_ms / 2),
+                                (None, 112, _max_sequence_idle_ms / 2),
+                                ("end", 113, _max_sequence_idle_ms / 2),
+                            ),
+                            expected_result,
+                            precreated_shm2_handles,
+                        ),
+                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                    )
+                )
+                expected_result = (
+                    self.get_expected_result(
+                        4448 + int(corrids[3]), corrids[3], 1113, trial, None
+                    )
+                    if not IMPLICIT_STATE
+                    else self.get_expected_result_implicit(
+                        4448, corrids[3], 1113, trial, None
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_async,
+                        args=(
+                            trial,
+                            model_name,
+                            dtype,
+                            corrids[3],
+                            (None, None),
+                            # (flag_str, value, pre_delay_ms)
+                            (
+                                ("start", 1111, None),
+                                (None, 1112, _max_sequence_idle_ms / 2),
+                                (None, 1112, _max_sequence_idle_ms / 2),
+                                ("end", 1113, _max_sequence_idle_ms / 2),
+                            ),
+                            expected_result,
+                            precreated_shm3_handles,
+                        ),
+                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                    )
+                )
+                expected_result = (
+                    self.get_expected_result(
+                        22224 + int(corrids[4]), corrids[4], 11113, trial, "end"
+                    )
+                    if not IMPLICIT_STATE
+                    else self.get_expected_result_implicit(
+                        22224, corrids[4], 11113, trial, "end"
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_async,
+                        args=(
+                            trial,
+                            model_name,
+                            dtype,
+                            corrids[4],
+                            (None, None),
+                            # (flag_str, value, pre_delay_ms)
+                            (("start", 11111, None), ("end", 11113, None)),
+                            expected_result,
+                            precreated_shm4_handles,
+                        ),
+                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                    )
+                )
+
+                threads[0].start()
+                threads[1].start()
+                threads[2].start()
+                threads[3].start()
+                time.sleep(2)
+                threads[4].start()
+                for t in threads:
+                    t.join()
+
+                self.check_deferred_exception()
+                self.assertTrue(False, "expected error")
+            except Exception as ex:
+                self.assertTrue(
+                    ex.message().startswith(
+                        str(
+                            "inference request for sequence 1001 to "
+                            + "model '{}' must specify the START flag on the first "
+                            + "request of the sequence"
+                        ).format(model_name)
+                    )
+                )
+            finally:
+                if _test_system_shared_memory or _test_cuda_shared_memory:
+                    self.cleanup_shm_regions(precreated_shm0_handles)
+                    self.cleanup_shm_regions(precreated_shm1_handles)
+                    self.cleanup_shm_regions(precreated_shm2_handles)
+                    self.cleanup_shm_regions(precreated_shm3_handles)
+                    self.cleanup_shm_regions(precreated_shm4_handles)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_dyna_sequence_batcher/test.sh b/qa/L0_dyna_sequence_batcher/test.sh
new file mode 100755
index 0000000000..acac8399af
--- /dev/null
+++ b/qa/L0_dyna_sequence_batcher/test.sh
@@ -0,0 +1,230 @@
+#!/bin/bash
+# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+CLIENT_LOG="./client.log"
+BATCHER_TEST=dyna_sequence_batcher_test.py
+
+DATADIR=/data/inferenceserver/${REPO_VERSION}
+
+SERVER=/opt/tritonserver/bin/tritonserver
+source ../common/util.sh
+
+export CUDA_VISIBLE_DEVICES=0
+
+# If IMPLICIT_STATE not specified, set to 0
+IMPLICIT_STATE=${IMPLICIT_STATE:="0"}
+export IMPLICIT_STATE
+
+# If BACKENDS not specified, set to all
+BACKENDS=${BACKENDS:="graphdef savedmodel libtorch onnx plan custom custom_string"}
+export BACKENDS
+
+MODEL_REPOSITORY=''
+if [ "$IMPLICIT_STATE" == "1" ]; then
+  MODEL_REPOSITORY="qa_dyna_sequence_implicit_model_repository"
+else
+  MODEL_REPOSITORY="qa_dyna_sequence_model_repository"
+fi
+
+RET=0
+
+rm -fr *.log
+
+# models
+rm -fr models && mkdir models
+for MODEL in ${DATADIR}/$MODEL_REPOSITORY/* ; do
+    cp -r $MODEL models/. && \
+        (cd models/$(basename $MODEL) && \
+            sed -i "s/kind: KIND_CPU/kind: KIND_CPU\\ncount: 1/" config.pbtxt)
+done
+
+# Implicit state models for custom backend do not exist.
+if [ $IMPLICIT_STATE == "0" ]; then
+    cp -r ../custom_models/custom_dyna_sequence_int32 models/.
+    sed -i "s/kind: KIND_CPU/kind: KIND_CPU\\ncount: 1/" models/custom_dyna_sequence_int32/config.pbtxt
+    # Construct custom dyna_sequence_model with STRING sequence ID. Copy model and edit config.pbtxt
+    cp -r models/custom_dyna_sequence_int32 models/custom_string_dyna_sequence_int32
+    sed -i "s/custom_dyna_sequence_int32/custom_string_dyna_sequence_int32/g" models/custom_string_dyna_sequence_int32/config.pbtxt
+    sed -i "/CONTROL_SEQUENCE_CORRID/{n;s/data_type:.*/data_type: TYPE_STRING/}" models/custom_string_dyna_sequence_int32/config.pbtxt
+fi
+
+# Implicit state models that support ragged batching do not exist.
+if [ $IMPLICIT_STATE == "0" ]; then
+    # ragged models
+    rm -fr ragged_models && mkdir ragged_models
+    cp -r ../custom_models/custom_dyna_sequence_int32 ragged_models/.
+    (cd ragged_models/custom_dyna_sequence_int32 && \
+            sed -i "s/kind: KIND_CPU/kind: KIND_CPU\\ncount: 1/" config.pbtxt && \
+            sed -i "s/name:.*\"INPUT\"/name: \"INPUT\"\\nallow_ragged_batch: true/" config.pbtxt)
+fi
+
+# Need to launch the server for each test so that the model status is
+# reset (which is used to make sure the correct batch size was used
+# for execution). Test everything with fixed-tensor-size models and
+# variable-tensor-size models.
+export NO_BATCHING=1
+for i in \
+        test_simple_sequence \
+        test_length1_sequence \
+         ; do
+    SERVER_LOG="./$i.server.log"
+    SERVER_ARGS="--model-repository=`pwd`/models"
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    echo "Test: $i" >>$CLIENT_LOG
+
+    set +e
+    python $BATCHER_TEST DynaSequenceBatcherTest.$i >>$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Test $i Failed\n***" >>$CLIENT_LOG
+        echo -e "\n***\n*** Test $i Failed\n***"
+        RET=1
+    fi
+    set -e
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+done
+
+# Tests that require max_queue_delay_microseconds to be non-zero so
+# that batching is delayed until a full preferred batch is available.
+for m in `ls models`; do
+    (cd models/$m && \
+            sed -i "s/max_candidate_sequences:.*/max_candidate_sequences:4/" config.pbtxt && \
+            sed -i "s/max_queue_delay_microseconds:.*/max_queue_delay_microseconds:5000000/" config.pbtxt)
+done
+
+export NO_BATCHING=0
+for i in \
+        test_multi_sequence_different_shape \
+        test_multi_sequence \
+        test_multi_parallel_sequence \
+        test_backlog \
+        test_backlog_fill \
+        test_backlog_fill_no_end \
+        test_backlog_sequence_timeout \
+    ; do
+
+    SERVER_LOG="./$i.server.log"
+    SERVER_ARGS="--model-repository=`pwd`/models"
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    echo "Test: $i" >>$CLIENT_LOG
+
+    set +e
+    python $BATCHER_TEST DynaSequenceBatcherTest.$i >>$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Test $i Failed\n***" >>$CLIENT_LOG
+        echo -e "\n***\n*** Test $i Failed\n***"
+        RET=1
+    fi
+    set -e
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+done
+
+if [ $IMPLICIT_STATE == "0" ]; then
+    # Ragged-batch tests that require max_queue_delay_microseconds to be
+    # non-zero so that batching is delayed until a full preferred batch is
+    # available.
+    for m in `ls ragged_models`; do
+        (cd ragged_models/$m && \
+                sed -i "s/max_candidate_sequences:.*/max_candidate_sequences:4/" config.pbtxt && \
+                sed -i "s/max_queue_delay_microseconds:.*/max_queue_delay_microseconds:5000000/" config.pbtxt)
+    done
+
+    export NO_BATCHING=0
+    for i in \
+        test_multi_sequence_different_shape_allow_ragged \
+        ; do
+
+        SERVER_LOG="./$i.server.log"
+        SERVER_ARGS="--model-repository=`pwd`/ragged_models"
+        run_server
+        if [ "$SERVER_PID" == "0" ]; then
+            echo -e "\n***\n*** Failed to start $SERVER\n***"
+            cat $SERVER_LOG
+            exit 1
+        fi
+
+        echo "Test: $i" >>$CLIENT_LOG
+
+        set +e
+        python $BATCHER_TEST DynaSequenceBatcherTest.$i >>$CLIENT_LOG 2>&1
+        if [ $? -ne 0 ]; then
+            echo -e "\n***\n*** Test $i Failed\n***" >>$CLIENT_LOG
+            echo -e "\n***\n*** Test $i Failed\n***"
+            RET=1
+        fi
+        set -e
+
+        kill $SERVER_PID
+        wait $SERVER_PID
+    done
+fi
+
+# python unittest seems to swallow ImportError and still return 0 exit
+# code. So need to explicitly check CLIENT_LOG to make sure we see
+# some running tests
+grep -c "HTTPSocketPoolResponse status=200" $CLIENT_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed To Run\n***"
+    RET=1
+fi
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_grpc/client_plugin_models/client_plugin_test/1/model.py b/qa/L0_grpc/client_plugin_models/client_plugin_test/1/model.py
new file mode 100644
index 0000000000..17c406b18e
--- /dev/null
+++ b/qa/L0_grpc/client_plugin_models/client_plugin_test/1/model.py
@@ -0,0 +1,63 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    def execute(self, requests):
+        responses = []
+
+        for request in requests:
+            json_string = (
+                pb_utils.get_input_tensor_by_name(request, "EXPECTED_HEADERS")
+                .as_numpy()[0]
+                .decode("utf-8")
+            )
+            expected_headers = json.loads(json_string)
+
+            success = True
+            if request.parameters() != "":
+                parameters = json.loads(request.parameters())
+                for key, value in expected_headers.items():
+                    if key in parameters:
+                        if parameters[key] != value:
+                            success = False
+                    else:
+                        success = False
+
+            test_success = pb_utils.Tensor(
+                "TEST_SUCCESS", np.array([success], dtype=bool)
+            )
+            inference_response = pb_utils.InferenceResponse(
+                output_tensors=[test_success]
+            )
+            responses.append(inference_response)
+
+        return responses
diff --git a/qa/L0_grpc/client_plugin_models/client_plugin_test/config.pbtxt b/qa/L0_grpc/client_plugin_models/client_plugin_test/config.pbtxt
new file mode 100644
index 0000000000..1bf368f795
--- /dev/null
+++ b/qa/L0_grpc/client_plugin_models/client_plugin_test/config.pbtxt
@@ -0,0 +1,45 @@
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "client_plugin_test"
+backend: "python"
+
+input [
+  {
+    name: "EXPECTED_HEADERS"
+    data_type: TYPE_STRING
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "TEST_SUCCESS"
+    data_type: TYPE_BOOL
+    dims: [ 1 ]
+  }
+]
+
+instance_group [{ kind: KIND_CPU }]
diff --git a/qa/L0_grpc/grpc_basic_auth_test.py b/qa/L0_grpc/grpc_basic_auth_test.py
new file mode 100755
index 0000000000..07d29ef5b7
--- /dev/null
+++ b/qa/L0_grpc/grpc_basic_auth_test.py
@@ -0,0 +1,66 @@
+#!/usr/bin/python
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+import sys
+import unittest
+
+sys.path.append("../common")
+
+import test_util as tu
+import tritonclient.grpc as tritongrpcclient
+import tritonclient.grpc.aio as asynctritongrpcclient
+from tritonclient.grpc.aio.auth import BasicAuth as AsyncBasicAuth
+from tritonclient.grpc.auth import BasicAuth
+
+
+class GRPCBasicAuthTest(tu.TestResultCollector):
+    def setUp(self):
+        # Use the nginx port
+        self._client = tritongrpcclient.InferenceServerClient(url="localhost:8004")
+        self._client.register_plugin(BasicAuth("username", "password"))
+
+    def test_client_call(self):
+        self.assertTrue(self._client.is_server_live())
+
+    def tearDown(self):
+        self._client.close()
+
+
+class GRPCBasicAuthAsyncTest(unittest.IsolatedAsyncioTestCase):
+    async def asyncSetUp(self):
+        # Use the nginx port
+        self._client = asynctritongrpcclient.InferenceServerClient(url="localhost:8004")
+        self._client.register_plugin(AsyncBasicAuth("username", "password"))
+
+    async def test_client_call(self):
+        self.assertTrue(await self._client.is_server_live())
+
+    async def asyncTearDown(self):
+        await self._client.close()
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_grpc/grpc_client_plugin_test.py b/qa/L0_grpc/grpc_client_plugin_test.py
new file mode 100755
index 0000000000..1cc8c474ef
--- /dev/null
+++ b/qa/L0_grpc/grpc_client_plugin_test.py
@@ -0,0 +1,120 @@
+#!/usr/bin/python
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+import json
+import sys
+
+sys.path.append("../common")
+
+import unittest
+
+import numpy as np
+import test_util as tu
+import tritonclient.grpc as tritongrpcclient
+import tritonclient.grpc.aio as asynctritongrpcclient
+from tritonclient.grpc import InferenceServerClientPlugin
+from tritonclient.utils import np_to_triton_dtype
+
+
+# A simple plugin that adds headers to the inference request.
+class TestPlugin(InferenceServerClientPlugin):
+    def __init__(self, headers):
+        self._headers = headers
+
+    def __call__(self, request):
+        request.headers.update(self._headers)
+
+
+def prepare_infer_inputs(headers):
+    expected_headers = np.array([json.dumps(headers)], dtype=object)
+    inputs = []
+    inputs.append(
+        tritongrpcclient.InferInput(
+            "EXPECTED_HEADERS",
+            expected_headers.shape,
+            np_to_triton_dtype(expected_headers.dtype),
+        )
+    )
+    inputs[0].set_data_from_numpy(expected_headers)
+
+    return inputs
+
+
+class GRPCClientPluginAsyncTest(unittest.IsolatedAsyncioTestCase):
+    async def asyncSetUp(self):
+        self._headers = {"my-key": "my-value"}
+        self._plugin = TestPlugin(self._headers)
+        self._client = asynctritongrpcclient.InferenceServerClient(url="localhost:8001")
+
+    async def test_simple_infer(self):
+        model = "client_plugin_test"
+        inputs = prepare_infer_inputs(self._headers)
+        self._client.register_plugin(self._plugin)
+        response = await self._client.infer(model_name=model, inputs=inputs)
+        test_success = response.as_numpy("TEST_SUCCESS")
+        self.assertEqual(test_success, True)
+
+        self._client.unregister_plugin()
+        inputs = prepare_infer_inputs({})
+        response = await self._client.infer(model_name=model, inputs=inputs)
+        test_success = response.as_numpy("TEST_SUCCESS")
+        self.assertEqual(test_success, True)
+
+    async def asyncTearDown(self):
+        await self._client.close()
+
+
+class GRPCClientPluginTest(tu.TestResultCollector):
+    def setUp(self):
+        self._headers = {"my-key": "my-value"}
+        self._plugin = TestPlugin(self._headers)
+        self._client = tritongrpcclient.InferenceServerClient(url="localhost:8001")
+
+    def test_simple_infer(self):
+        # Set the binary data to False so that 'Inference-Header-Length' is not
+        # added to the headers.
+        model = "client_plugin_test"
+        inputs = prepare_infer_inputs(self._headers)
+        self._client.register_plugin(self._plugin)
+        self.assertEqual(self._plugin, self._client.plugin())
+        response = self._client.infer(model_name=model, inputs=inputs)
+        test_success = response.as_numpy("TEST_SUCCESS")
+        self.assertEqual(test_success, True)
+
+        # Unregister the plugin
+        inputs = prepare_infer_inputs({})
+        self._client.unregister_plugin()
+        self.assertEqual(None, self._client.plugin())
+        response = self._client.infer(model_name=model, inputs=inputs)
+        test_success = response.as_numpy("TEST_SUCCESS")
+        self.assertEqual(test_success, True)
+
+    def tearDown(self):
+        self._client.close()
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_grpc/nginx.conf b/qa/L0_grpc/nginx.conf
new file mode 100644
index 0000000000..063d358c21
--- /dev/null
+++ b/qa/L0_grpc/nginx.conf
@@ -0,0 +1,54 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+worker_processes  1;
+
+error_log  /var/log/nginx/error.log;
+
+events {
+    worker_connections  1024;
+}
+
+http {
+    # Configure basic authentication
+    auth_basic "Restricted Content";
+    auth_basic_user_file /opt/tritonserver/qa/L0_grpc/pswd;
+
+    # Define upstream server
+    upstream backend {
+        server localhost:8001;
+    }
+
+    # Define server block for reverse proxy
+    server {
+        listen 8004 http2;
+
+        # Configure location for reverse proxy
+        location / {
+            grpc_pass grpc://backend;
+        }
+    }
+}
diff --git a/qa/L0_grpc/python_grpc_aio_test.py b/qa/L0_grpc/python_grpc_aio_test.py
new file mode 100755
index 0000000000..f342f19ad5
--- /dev/null
+++ b/qa/L0_grpc/python_grpc_aio_test.py
@@ -0,0 +1,125 @@
+#!/usr/bin/env python
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import unittest
+
+import tritonclient.grpc.aio as grpcclient
+from tritonclient.utils import *
+
+
+class TestGrpcAioClient(unittest.IsolatedAsyncioTestCase):
+    """Test if aio rpc can reach the server"""
+
+    def setUp(self):
+        self._triton_client = grpcclient.InferenceServerClient(url="localhost:8001")
+
+    async def asyncTearDown(self):
+        await self._triton_client.close()
+
+    async def test_is_server_live(self):
+        ret = await self._triton_client.is_server_live()
+        self.assertEqual(ret, True)
+
+    async def test_is_server_ready(self):
+        ret = await self._triton_client.is_server_ready()
+        self.assertEqual(ret, True)
+
+    async def test_is_model_ready(self):
+        ret = await self._triton_client.is_model_ready("simple")
+        self.assertEqual(ret, True)
+
+    async def test_get_server_metadata(self):
+        ret = await self._triton_client.get_server_metadata()
+        self.assertEqual(ret.name, "triton")
+
+        ret = await self._triton_client.get_server_metadata(as_json=True)
+        self.assertEqual(ret["name"], "triton")
+
+    async def test_get_model_metadata(self):
+        ret = await self._triton_client.get_model_metadata("simple")
+        self.assertEqual(ret.name, "simple")
+
+    async def test_get_model_config(self):
+        ret = await self._triton_client.get_model_config("simple")
+        self.assertEqual(ret.config.name, "simple")
+
+    async def test_get_model_repository_index(self):
+        ret = await self._triton_client.get_model_repository_index()
+        self.assertEqual(len(ret.models), 8)
+
+    async def test_load_model(self):
+        with self.assertRaisesRegex(
+            InferenceServerException,
+            "\[StatusCode\.UNAVAILABLE\] explicit model load / unload is not allowed if polling is enabled",
+        ):
+            await self._triton_client.load_model("simple")
+
+    async def test_unload_model(self):
+        with self.assertRaisesRegex(
+            InferenceServerException,
+            "\[StatusCode\.UNAVAILABLE\] explicit model load / unload is not allowed if polling is enabled",
+        ):
+            await self._triton_client.load_model("simple")
+
+    async def test_get_inference_statistics(self):
+        await self._triton_client.get_inference_statistics()
+
+    async def test_update_trace_settings(self):
+        await self._triton_client.update_trace_settings()
+
+    async def test_get_trace_settings(self):
+        await self._triton_client.get_trace_settings()
+
+    async def test_get_system_shared_memory_status(self):
+        await self._triton_client.get_system_shared_memory_status()
+
+    async def test_register_system_shared_memory(self):
+        with self.assertRaisesRegex(
+            InferenceServerException,
+            "\[StatusCode\.INTERNAL\] Unable to open shared memory region: ''",
+        ):
+            await self._triton_client.register_system_shared_memory("", "", 0)
+
+    async def test_unregister_system_shared_memory(self):
+        await self._triton_client.unregister_system_shared_memory()
+
+    async def test_get_cuda_shared_memory_status(self):
+        await self._triton_client.get_cuda_shared_memory_status()
+
+    async def test_register_cuda_shared_memory(self):
+        with self.assertRaisesRegex(
+            InferenceServerException,
+            "\[StatusCode\.INVALID_ARGUMENT\] failed to register CUDA shared memory region '': failed to open CUDA IPC handle: invalid argument",
+        ):
+            await self._triton_client.register_cuda_shared_memory("", b"", 0, 0)
+
+    async def test_unregister_cuda_shared_memory(self):
+        await self._triton_client.unregister_cuda_shared_memory()
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_grpc/python_unit_test.py b/qa/L0_grpc/python_unit_test.py
new file mode 100755
index 0000000000..9591d4274c
--- /dev/null
+++ b/qa/L0_grpc/python_unit_test.py
@@ -0,0 +1,159 @@
+#!/usr/bin/env python
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import queue
+import time
+import unittest
+
+# For stream infer test
+from functools import partial
+
+import numpy as np
+import tritonclient.grpc as grpcclient
+from tritonclient.utils import InferenceServerException
+
+
+class UserData:
+    def __init__(self):
+        self._completed_requests = queue.Queue()
+
+
+def callback(user_data, result, error):
+    if error:
+        user_data._completed_requests.put(error)
+    else:
+        user_data._completed_requests.put(result)
+
+
+class RestrictedProtocolTest(unittest.TestCase):
+    def setUp(self):
+        self.client_ = grpcclient.InferenceServerClient(url="localhost:8001")
+        self.model_name_ = "simple"
+        self.prefix_ = "triton-grpc-protocol-"
+
+    # Other unspecified protocols should not be restricted
+    def test_sanity(self):
+        self.client_.get_inference_statistics("simple")
+        self.client_.get_inference_statistics(
+            "simple", headers={self.prefix_ + "infer-key": "infer-value"}
+        )
+
+    # health, infer, model repository protocols are restricted.
+    # health and infer expects "triton-grpc-restricted-infer-key : infer-value" header,
+    # model repository expected "triton-grpc-restricted-admin-key : admin-value".
+    def test_model_repository(self):
+        with self.assertRaisesRegex(
+            InferenceServerException, "This protocol is restricted"
+        ):
+            self.client_.unload_model(
+                self.model_name_, headers={self.prefix_ + "infer-key": "infer-value"}
+            )
+        # Request go through and get actual transaction error
+        with self.assertRaisesRegex(
+            InferenceServerException, "explicit model load / unload is not allowed"
+        ):
+            self.client_.unload_model(
+                self.model_name_, headers={self.prefix_ + "admin-key": "admin-value"}
+            )
+
+    def test_health(self):
+        with self.assertRaisesRegex(
+            InferenceServerException, "This protocol is restricted"
+        ):
+            self.client_.is_server_live()
+        self.client_.is_server_live({self.prefix_ + "infer-key": "infer-value"})
+
+    def test_infer(self):
+        # setup
+        inputs = [
+            grpcclient.InferInput("INPUT0", [1, 16], "INT32"),
+            grpcclient.InferInput("INPUT1", [1, 16], "INT32"),
+        ]
+        inputs[0].set_data_from_numpy(np.ones(shape=(1, 16), dtype=np.int32))
+        inputs[1].set_data_from_numpy(np.ones(shape=(1, 16), dtype=np.int32))
+
+        # This test only care if the request goes through
+        with self.assertRaisesRegex(
+            InferenceServerException, "This protocol is restricted"
+        ):
+            _ = self.client_.infer(
+                model_name=self.model_name_, inputs=inputs, headers={"test": "1"}
+            )
+        self.client_.infer(
+            model_name=self.model_name_,
+            inputs=inputs,
+            headers={self.prefix_ + "infer-key": "infer-value"},
+        )
+
+    def test_stream_infer(self):
+        # setup
+        inputs = [
+            grpcclient.InferInput("INPUT0", [1, 16], "INT32"),
+            grpcclient.InferInput("INPUT1", [1, 16], "INT32"),
+        ]
+        inputs[0].set_data_from_numpy(np.ones(shape=(1, 16), dtype=np.int32))
+        inputs[1].set_data_from_numpy(np.ones(shape=(1, 16), dtype=np.int32))
+        user_data = UserData()
+        # The server can't interfere with whether GRPC should create the stream,
+        # server will be notified after the stream is established and only
+        # until then be able to access metadata to decide whether to continue
+        # the stream.
+        # So on client side, it will always perceive that the stream is
+        # successfully created and can only check its health at a later time.
+        self.client_.start_stream(partial(callback, user_data), headers={"test": "1"})
+        # wait for sufficient round-trip time
+        time.sleep(1)
+        with self.assertRaisesRegex(
+            InferenceServerException, "The stream is no longer in valid state"
+        ):
+            self.client_.async_stream_infer(model_name=self.model_name_, inputs=inputs)
+        # callback should record error detail
+        self.assertFalse(user_data._completed_requests.empty())
+        with self.assertRaisesRegex(
+            InferenceServerException, "This protocol is restricted"
+        ):
+            raise user_data._completed_requests.get()
+
+        self.assertTrue(user_data._completed_requests.empty())
+
+        # Stop and start new stream with proper header
+        self.client_.stop_stream()
+        self.client_.start_stream(
+            partial(callback, user_data),
+            headers={self.prefix_ + "infer-key": "infer-value"},
+        )
+        self.client_.async_stream_infer(model_name=self.model_name_, inputs=inputs)
+        # wait for response
+        time.sleep(1)
+        self.assertFalse(user_data._completed_requests.empty())
+        self.assertNotEqual(
+            type(user_data._completed_requests.get()), InferenceServerException
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_grpc/test.sh b/qa/L0_grpc/test.sh
new file mode 100755
index 0000000000..73b9710a71
--- /dev/null
+++ b/qa/L0_grpc/test.sh
@@ -0,0 +1,673 @@
+#!/bin/bash
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+RET=0
+
+CLIENT_PLUGIN_TEST="./grpc_client_plugin_test.py"
+BASIC_AUTH_TEST="./grpc_basic_auth_test.py"
+NGINX_CONF="./nginx.conf"
+# On windows the paths invoked by the script (running in WSL) must use
+# /mnt/c when needed but the paths on the tritonserver command-line
+# must be C:/ style.
+if [[ "$(< /proc/sys/kernel/osrelease)" == *microsoft* ]]; then
+    SDKDIR=${SDKDIR:=C:/sdk}
+    MODELDIR=${MODELDIR:=C:/models}
+    CLIENT_PLUGIN_MODELDIR=${MODELDIR:=C:/client_plugin_models}
+    DATADIR=${DATADIR:="/mnt/c/data/inferenceserver/${REPO_VERSION}"}
+    BACKEND_DIR=${BACKEND_DIR:=C:/tritonserver/backends}
+    SERVER=${SERVER:=/mnt/c/tritonserver/bin/tritonserver.exe}
+
+    SIMPLE_AIO_INFER_CLIENT_PY=${SDKDIR}/python/simple_grpc_aio_infer_client.py
+    SIMPLE_AIO_STREAM_INFER_CLIENT_PY=${SDKDIR}/python/simple_grpc_aio_sequence_stream_infer_client.py
+    SIMPLE_HEALTH_CLIENT_PY=${SDKDIR}/python/simple_grpc_health_metadata.py
+    SIMPLE_INFER_CLIENT_PY=${SDKDIR}/python/simple_grpc_infer_client.py
+    SIMPLE_ASYNC_INFER_CLIENT_PY=${SDKDIR}/python/simple_grpc_async_infer_client.py
+    SIMPLE_STRING_INFER_CLIENT_PY=${SDKDIR}/python/simple_grpc_string_infer_client.py
+    SIMPLE_STREAM_INFER_CLIENT_PY=${SDKDIR}/python/simple_grpc_sequence_stream_infer_client.py
+    SIMPLE_SEQUENCE_INFER_CLIENT_PY=${SDKDIR}/python/simple_grpc_sequence_sync_infer_client.py
+    SIMPLE_IMAGE_CLIENT_PY=${SDKDIR}/python/image_client.py
+    # SIMPLE_ENSEMBLE_IMAGE_CLIENT_PY=${SDKDIR}/python/ensemble_image_client.py
+    SIMPLE_SHM_STRING_CLIENT_PY=${SDKDIR}/python/simple_grpc_shm_string_client.py
+    SIMPLE_SHM_CLIENT_PY=${SDKDIR}/python/simple_grpc_shm_client.py
+    SIMPLE_CUDASHM_CLIENT_PY=${SDKDIR}/python/simple_grpc_cudashm_client.py
+    SIMPLE_MODEL_CONTROL_PY=${SDKDIR}/python/simple_grpc_model_control.py
+    SIMPLE_REUSE_INFER_OBJECTS_CLIENT_PY=${SDKDIR}/python/reuse_infer_objects_client.py
+    SIMPLE_KEEPALIVE_CLIENT_PY=${SDKDIR}/python/simple_grpc_keepalive_client.py
+    SIMPLE_CUSTOM_ARGS_CLIENT_PY=${SDKDIR}/python/simple_grpc_custom_args_client.py
+    EXPLICIT_BYTE_CONTENT_CLIENT_PY=${SDKDIR}/python/grpc_explicit_byte_content_client.py
+    EXPLICIT_INT_CONTENT_CLIENT_PY=${SDKDIR}/python/grpc_explicit_int_content_client.py
+    EXPLICIT_INT8_CONTENT_CLIENT_PY=${SDKDIR}/python/grpc_explicit_int8_content_client.py
+    GRPC_CLIENT_PY=${SDKDIR}/python/grpc_client.py
+    GRPC_IMAGE_CLIENT_PY=${SDKDIR}/python/grpc_image_client.py
+
+    SIMPLE_HEALTH_CLIENT=${SDKDIR}/python/simple_grpc_health_metadata
+    SIMPLE_INFER_CLIENT=${SDKDIR}/python/simple_grpc_infer_client
+    SIMPLE_STRING_INFER_CLIENT=${SDKDIR}/python/simple_grpc_string_infer_client
+    SIMPLE_ASYNC_INFER_CLIENT=${SDKDIR}/python/simple_grpc_async_infer_client
+    SIMPLE_MODEL_CONTROL=${SDKDIR}/python/simple_grpc_model_control
+    SIMPLE_STREAM_INFER_CLIENT=${SDKDIR}/python/simple_grpc_sequence_stream_infer_client
+    SIMPLE_SEQUENCE_INFER_CLIENT=${SDKDIR}/python/simple_grpc_sequence_sync_infer_client
+    SIMPLE_SHM_CLIENT=${SDKDIR}/python/simple_grpc_shm_client
+    SIMPLE_CUDASHM_CLIENT=${SDKDIR}/python/simple_grpc_cudashm_client
+    SIMPLE_IMAGE_CLIENT=${SDKDIR}/python/image_client
+    # SIMPLE_ENSEMBLE_IMAGE_CLIENT=${SDKDIR}/python/ensemble_image_client
+    SIMPLE_REUSE_INFER_OBJECTS_CLIENT=${SDKDIR}/python/reuse_infer_objects_client
+    SIMPLE_KEEPALIVE_CLIENT=${SDKDIR}/python/simple_grpc_keepalive_client
+    SIMPLE_CUSTOM_ARGS_CLIENT=${SDKDIR}/python/simple_grpc_custom_args_client
+    # [FIXME] point to proper client
+    CC_UNIT_TEST=${SDKDIR}/python/cc_client_test
+else
+    MODELDIR=${MODELDIR:=`pwd`/models}
+    CLIENT_PLUGIN_MODELDIR=${CLIENTPLUGINMODELDIR:=`pwd`/client_plugin_models}
+    DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"}
+    TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
+    SERVER=${TRITON_DIR}/bin/tritonserver
+    BACKEND_DIR=${TRITON_DIR}/backends
+
+    SIMPLE_AIO_INFER_CLIENT_PY=../clients/simple_grpc_aio_infer_client.py
+    SIMPLE_AIO_STREAM_INFER_CLIENT_PY=../clients/simple_grpc_aio_sequence_stream_infer_client.py
+    SIMPLE_HEALTH_CLIENT_PY=../clients/simple_grpc_health_metadata.py
+    SIMPLE_INFER_CLIENT_PY=../clients/simple_grpc_infer_client.py
+    SIMPLE_ASYNC_INFER_CLIENT_PY=../clients/simple_grpc_async_infer_client.py
+    SIMPLE_STRING_INFER_CLIENT_PY=../clients/simple_grpc_string_infer_client.py
+    SIMPLE_STREAM_INFER_CLIENT_PY=../clients/simple_grpc_sequence_stream_infer_client.py
+    SIMPLE_SEQUENCE_INFER_CLIENT_PY=../clients/simple_grpc_sequence_sync_infer_client.py
+    SIMPLE_IMAGE_CLIENT_PY=../clients/image_client.py
+    # SIMPLE_ENSEMBLE_IMAGE_CLIENT_PY=../clients/ensemble_image_client.py
+    SIMPLE_SHM_STRING_CLIENT_PY=../clients/simple_grpc_shm_string_client.py
+    SIMPLE_SHM_CLIENT_PY=../clients/simple_grpc_shm_client.py
+    SIMPLE_CUDASHM_CLIENT_PY=../clients/simple_grpc_cudashm_client.py
+    SIMPLE_MODEL_CONTROL_PY=../clients/simple_grpc_model_control.py
+    SIMPLE_REUSE_INFER_OBJECTS_CLIENT_PY=../clients/reuse_infer_objects_client.py
+    SIMPLE_KEEPALIVE_CLIENT_PY=../clients/simple_grpc_keepalive_client.py
+    SIMPLE_CUSTOM_ARGS_CLIENT_PY=../clients/simple_grpc_custom_args_client.py
+    EXPLICIT_BYTE_CONTENT_CLIENT_PY=../clients/grpc_explicit_byte_content_client.py
+    EXPLICIT_INT_CONTENT_CLIENT_PY=../clients/grpc_explicit_int_content_client.py
+    EXPLICIT_INT8_CONTENT_CLIENT_PY=../clients/grpc_explicit_int8_content_client.py
+    GRPC_CLIENT_PY=../clients/grpc_client.py
+    GRPC_IMAGE_CLIENT_PY=../clients/grpc_image_client.py
+
+    SIMPLE_HEALTH_CLIENT=../clients/simple_grpc_health_metadata
+    SIMPLE_INFER_CLIENT=../clients/simple_grpc_infer_client
+    SIMPLE_STRING_INFER_CLIENT=../clients/simple_grpc_string_infer_client
+    SIMPLE_ASYNC_INFER_CLIENT=../clients/simple_grpc_async_infer_client
+    SIMPLE_MODEL_CONTROL=../clients/simple_grpc_model_control
+    SIMPLE_STREAM_INFER_CLIENT=../clients/simple_grpc_sequence_stream_infer_client
+    SIMPLE_SEQUENCE_INFER_CLIENT=../clients/simple_grpc_sequence_sync_infer_client
+    SIMPLE_SHM_CLIENT=../clients/simple_grpc_shm_client
+    SIMPLE_CUDASHM_CLIENT=../clients/simple_grpc_cudashm_client
+    SIMPLE_IMAGE_CLIENT=../clients/image_client
+    # SIMPLE_ENSEMBLE_IMAGE_CLIENT=../clients/ensemble_image_client
+    SIMPLE_REUSE_INFER_OBJECTS_CLIENT=../clients/reuse_infer_objects_client
+    SIMPLE_KEEPALIVE_CLIENT=../clients/simple_grpc_keepalive_client
+    SIMPLE_CUSTOM_ARGS_CLIENT=../clients/simple_grpc_custom_args_client
+    CC_UNIT_TEST=../clients/cc_client_test
+fi
+PYTHON_UNIT_TEST=python_unit_test.py
+
+# Add string_dyna_sequence model to repo
+cp -r ${MODELDIR}/simple_dyna_sequence ${MODELDIR}/simple_string_dyna_sequence
+sed -i "s/simple_dyna_sequence/simple_string_dyna_sequence/g" ${MODELDIR}/simple_string_dyna_sequence/config.pbtxt
+sed -i "s/^platform: .*/backend: \"dyna_sequence\"/g" ${MODELDIR}/simple_string_dyna_sequence/config.pbtxt
+sed -i "/CONTROL_SEQUENCE_CORRID/{n;s/data_type:.*/data_type: TYPE_STRING/}" ${MODELDIR}/simple_string_dyna_sequence/config.pbtxt
+rm -f ${MODELDIR}/simple_string_dyna_sequence/1/model.graphdef
+cp ../custom_models/custom_dyna_sequence_int32/1/libtriton_dyna_sequence.so ${MODELDIR}/simple_string_dyna_sequence/1/
+
+rm -f *.log
+rm -f *.log.*
+
+set -e
+
+CLIENT_LOG=`pwd`/client.log
+SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${MODELDIR}"
+source ../common/util.sh
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+python $SIMPLE_HEALTH_CLIENT_PY -v >> ${CLIENT_LOG}.health 2>&1
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}.health
+    RET=1
+fi
+
+IMAGE=../images/vulture.jpeg
+for i in \
+        $SIMPLE_AIO_INFER_CLIENT_PY \
+        $SIMPLE_AIO_STREAM_INFER_CLIENT_PY \
+        $SIMPLE_INFER_CLIENT_PY \
+        $SIMPLE_ASYNC_INFER_CLIENT_PY \
+        $SIMPLE_STRING_INFER_CLIENT_PY \
+        $SIMPLE_IMAGE_CLIENT_PY \
+        $SIMPLE_ENSEMBLE_IMAGE_CLIENT_PY \
+        $SIMPLE_STREAM_INFER_CLIENT_PY \
+        $SIMPLE_SEQUENCE_INFER_CLIENT_PY \
+        $SIMPLE_SHM_STRING_CLIENT_PY \
+        $SIMPLE_SHM_CLIENT_PY \
+        $SIMPLE_CUDASHM_CLIENT_PY \
+        $SIMPLE_KEEPALIVE_CLIENT_PY \
+        $SIMPLE_CUSTOM_ARGS_CLIENT_PY \
+        $EXPLICIT_BYTE_CONTENT_CLIENT_PY \
+        $EXPLICIT_INT_CONTENT_CLIENT_PY \
+        $EXPLICIT_INT8_CONTENT_CLIENT_PY \
+        $GRPC_CLIENT_PY \
+        $GRPC_IMAGE_CLIENT_PY \
+        ; do
+    BASE=$(basename -- $i)
+    SUFFIX="${BASE%.*}"
+    EXTRA_ARGS=""
+    if [ $SUFFIX == "image_client" ]; then
+        EXTRA_ARGS="-i grpc -u localhost:8001"
+    fi
+    if [[ ($SUFFIX == "image_client") || ($SUFFIX == "grpc_image_client") ]]; then
+        python $i -m inception_graphdef -s INCEPTION -a -c 1 -b 1 $EXTRA_ARGS $IMAGE >> "${CLIENT_LOG}.async.${SUFFIX}" 2>&1
+        if [ `grep -c VULTURE ${CLIENT_LOG}.async.${SUFFIX}` != "1" ]; then
+            echo -e "\n***\n*** Failed. Expected 1 VULTURE results\n***"
+            cat $CLIENT_LOG.async.${SUFFIX}
+            RET=1
+        fi
+        python $i -m inception_graphdef -s INCEPTION -a --streaming -c 1 -b 1 $EXTRA_ARGS $IMAGE >> "${CLIENT_LOG}.streaming.${SUFFIX}" 2>&1
+        if [ `grep -c VULTURE ${CLIENT_LOG}.streaming.${SUFFIX}` != "1" ]; then
+            echo -e "\n***\n*** Failed. Expected 1 VULTURE results\n***"
+            cat $CLIENT_LOG.streaming.${SUFFIX}
+            RET=1
+        fi
+        python $i -m inception_graphdef -s INCEPTION -c 1 -b 1 $EXTRA_ARGS $IMAGE >> "${CLIENT_LOG}.${SUFFIX}" 2>&1
+        if [ `grep -c VULTURE ${CLIENT_LOG}.${SUFFIX}` != "1" ]; then
+            echo -e "\n***\n*** Failed. Expected 1 VULTURE results\n***"
+            cat $CLIENT_LOG.${SUFFIX}
+            RET=1
+        fi
+    # elif [ $SUFFIX == "ensemble_image_client" ]; then
+    #     python $i -c 1 $EXTRA_ARGS ../images >> "${CLIENT_LOG}.${SUFFIX}" 2>&1
+    #     for result in "SPORTS CAR" "COFFEE MUG" "VULTURE"; do
+    #         if [ `grep -c "$result" ${CLIENT_LOG}.${SUFFIX}` != "1" ]; then
+    #             echo -e "\n***\n*** Failed. Expected 1 $result result\n***"
+    #             RET=1
+    #         fi
+    #     done
+    else
+        python $i -v >> "${CLIENT_LOG}.${SUFFIX}" 2>&1
+    fi
+
+    if [ $? -ne 0 ]; then
+        cat "${CLIENT_LOG}.${SUFFIX}"
+        RET=1
+    fi
+
+    if [ $(cat "${CLIENT_LOG}.${SUFFIX}" | grep "PASS" | wc -l) -ne 1 ]; then
+        cat "${CLIENT_LOG}.${SUFFIX}"
+        RET=1
+    fi
+done
+
+# Test while reusing the InferInput and InferRequestedOutput objects
+$SIMPLE_REUSE_INFER_OBJECTS_CLIENT_PY -v -i grpc -u localhost:8001 >> ${CLIENT_LOG}.reuse 2>&1
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}.reuse
+    RET=1
+fi
+
+for i in \
+   $SIMPLE_INFER_CLIENT \
+   $SIMPLE_STRING_INFER_CLIENT \
+   $SIMPLE_ASYNC_INFER_CLIENT \
+   $SIMPLE_HEALTH_CLIENT \
+   $SIMPLE_STREAM_INFER_CLIENT \
+   $SIMPLE_SEQUENCE_INFER_CLIENT \
+   $SIMPLE_SHM_CLIENT \
+   $SIMPLE_CUDASHM_CLIENT \
+   $SIMPLE_IMAGE_CLIENT \
+   $SIMPLE_ENSEMBLE_IMAGE_CLIENT \
+   $SIMPLE_KEEPALIVE_CLIENT \
+   $SIMPLE_CUSTOM_ARGS_CLIENT \
+   ; do
+   BASE=$(basename -- $i)
+   SUFFIX="${BASE%.*}"
+    if [ $SUFFIX == "image_client" ]; then
+        $i -m inception_graphdef -s INCEPTION -a -c 1 -b 1 -i grpc -u localhost:8001 $IMAGE >> "${CLIENT_LOG}.c++.async.${SUFFIX}" 2>&1
+        if [ `grep -c VULTURE ${CLIENT_LOG}.c++.async.${SUFFIX}` != "1" ]; then
+            echo -e "\n***\n*** Failed. Expected 1 VULTURE results\n***"
+            cat $CLIENT_LOG.c++.${SUFFIX}
+            RET=1
+        fi
+        $i -m inception_graphdef -s INCEPTION -a --streaming -c 1 -b 1 -i grpc -u localhost:8001 $IMAGE >> "${CLIENT_LOG}.c++.streaming.${SUFFIX}" 2>&1
+        if [ `grep -c VULTURE ${CLIENT_LOG}.c++.streaming.${SUFFIX}` != "1" ]; then
+            echo -e "\n***\n*** Failed. Expected 1 VULTURE results\n***"
+            cat $CLIENT_LOG.c++.${SUFFIX}
+            RET=1
+        fi
+        $i -m inception_graphdef -s INCEPTION -c 1 -b 1 -i grpc -u localhost:8001 $IMAGE >> "${CLIENT_LOG}.c++.${SUFFIX}" 2>&1
+        if [ `grep -c VULTURE ${CLIENT_LOG}.c++.${SUFFIX}` != "1" ]; then
+            echo -e "\n***\n*** Failed. Expected 1 VULTURE results\n***"
+            cat $CLIENT_LOG.c++.${SUFFIX}
+            RET=1
+        fi
+    # elif [ $SUFFIX == "ensemble_image_client" ]; then
+    #     $i -c 1 -i grpc -u localhost:8001 ../images >> "${CLIENT_LOG}.c++.${SUFFIX}" 2>&1
+    #     for result in "SPORTS CAR" "COFFEE MUG" "VULTURE"; do
+    #         if [ `grep -c "$result" ${CLIENT_LOG}.c++.${SUFFIX}` != "1" ]; then
+    #             echo -e "\n***\n*** Failed. Expected 1 $result result\n***"
+    #             RET=1
+    #         fi
+    #     done
+    elif [ $BASE = ${SIMPLE_INFER_CLIENT} ]; then
+        # Test forcing new channel creation with simple infer client
+        NEW_CHANNEL_STRING = "creating client_channel for channel stack"
+        GRPC_TRACE=subchannel GRPC_VERBOSITY=info $i -v -c "true" >> ${CLIENT_LOG}.c++.${SUFFIX} 2>&1
+        if [ $? -ne 0 ]; then
+            cat ${CLIENT_LOG}.c++.${SUFFIX}
+            RET=1
+        fi
+        if [ `grep -c ${NEW_CHANNEL_STRING} ${CLIENT_LOG}.c++.${SUFFIX}` != "1" ]; then
+            echo -e "\n***\n*** Failed. Expected 1 ${NEW_CHANNEL_STRING} calls\n***"
+            cat $CLIENT_LOG.c++.${SUFFIX}
+            RET=1
+        fi
+        GRPC_TRACE=subchannel GRPC_VERBOSITY=info $i -v -c "false" >> ${CLIENT_LOG}.c++.${SUFFIX} 2>&1
+        if [ $? -ne 0 ]; then
+            cat ${CLIENT_LOG}.c++.${SUFFIX}
+            RET=1
+        fi
+        if [ `grep -c ${NEW_CHANNEL_STRING} ${CLIENT_LOG}.c++.${SUFFIX}` != "2" ]; then
+            echo -e "\n***\n*** Failed. Expected 2 ${NEW_CHANNEL_STRING} calls\n***"
+            cat $CLIENT_LOG.c++.${SUFFIX}
+            RET=1
+        fi
+    else
+        $i -v -H test:1 >> ${CLIENT_LOG}.c++.${SUFFIX} 2>&1
+        if [ $? -ne 0 ]; then
+            cat ${CLIENT_LOG}.c++.${SUFFIX}
+            RET=1
+        fi
+    fi
+done
+
+# Test while reusing the InferInput and InferRequestedOutput objects
+$SIMPLE_REUSE_INFER_OBJECTS_CLIENT -v -i grpc -u localhost:8001 >> ${CLIENT_LOG}.c++.reuse 2>&1
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}.c++.reuse
+    RET=1
+fi
+
+set -e
+kill $SERVER_PID
+wait $SERVER_PID
+
+SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${CLIENT_PLUGIN_MODELDIR} --http-header-forward-pattern=.* --grpc-header-forward-pattern=.*"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python3 $CLIENT_PLUGIN_TEST >> ${CLIENT_LOG}.python.plugin 2>&1
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}.python.plugin
+    RET=1
+fi
+set -e
+
+# Create a password file with username:password
+echo -n 'username:' > pswd
+echo "password" | openssl passwd -stdin -apr1 >> pswd
+nginx -c `pwd`/$NGINX_CONF
+
+python3 $BASIC_AUTH_TEST
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}.python.plugin.auth
+    RET=1
+fi
+service nginx stop
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+export GRPC_TRACE=compression, channel
+export GRPC_VERBOSITY=DEBUG
+SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${MODELDIR} --grpc-infer-response-compression-level=high"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+$SIMPLE_INFER_CLIENT -v -C deflate>> ${CLIENT_LOG}.c++.compress 2>&1
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}.c++.compress
+    RET=1
+fi
+if [ $(cat ${CLIENT_LOG}.c++.compress | grep "Compressed\[deflate\]" | wc -l) -eq 0 ]; then
+    cat ${CLIENT_LOG}.c++.compress
+    RET=1
+fi
+
+python $SIMPLE_INFER_CLIENT_PY -v -C deflate>> ${CLIENT_LOG}.compress 2>&1
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}.compress
+    RET=1
+fi
+if [ $(cat ${CLIENT_LOG}.compress | grep "Compressed\[deflate\]" | wc -l) -eq 0 ]; then
+    cat ${CLIENT_LOG}.compress
+    RET=1
+fi
+
+set -e
+kill $SERVER_PID
+wait $SERVER_PID
+
+unset GRPC_TRACE
+unset GRPC_VERBOSITY
+
+SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${MODELDIR} --model-control-mode=explicit"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+# Test Model Control API
+python $SIMPLE_MODEL_CONTROL_PY -v >> ${CLIENT_LOG}.model_control 2>&1
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}.model_control
+    RET=1
+fi
+
+if [ $(cat ${CLIENT_LOG}.model_control | grep "PASS" | wc -l) -ne 1 ]; then
+    cat ${CLIENT_LOG}.model_control
+    RET=1
+fi
+if [ $(cat ${SERVER_LOG} | grep "Invalid config override" | wc -l) -eq 0 ]; then
+    cat ${SERVER_LOG}
+    RET=1
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${MODELDIR} --model-control-mode=explicit"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+# Test Model Control API
+$SIMPLE_MODEL_CONTROL -v >> ${CLIENT_LOG}.c++.model_control 2>&1
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}.c++.model_control
+    RET=1
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Test with dynamic sequence models
+SERVER_ARGS="--model-repository=`pwd`/models"
+SERVER_LOG="./inference_server_dyna.log"
+CLIENT_LOG="./client_dyna.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+set +e
+
+for i in \
+    $SIMPLE_STREAM_INFER_CLIENT_PY \
+    $SIMPLE_SEQUENCE_INFER_CLIENT_PY \
+    $SIMPLE_STREAM_INFER_CLIENT \
+    $SIMPLE_SEQUENCE_INFER_CLIENT; do
+
+    $i -v -d >>$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        RET=1
+    fi
+done
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Run cpp client unit test
+rm -rf unit_test_models && mkdir unit_test_models
+cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32 unit_test_models/.
+cp -r ${MODELDIR}/simple unit_test_models/.
+
+SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=unit_test_models
+            --trace-file=global_unittest.log --trace-level=TIMESTAMPS --trace-rate=1"
+SERVER_LOG="./inference_server_cc_unit_test.log"
+CLIENT_LOG="./cc_unit_test.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+# Run all unit tests except load
+$CC_UNIT_TEST --gtest_filter=GRPC*:-*Load* >> ${CLIENT_LOG} 2>&1
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}
+    RET=1
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Run cpp client load API unit test
+rm -rf unit_test_models && mkdir unit_test_models
+cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32 unit_test_models/.
+# Make only version 2, 3 is valid version directory while config requests 1, 3
+rm -rf unit_test_models/onnx_int32_int32_int32/1
+
+# Start with EXPLICIT mode and load onnx_float32_float32_float32
+SERVER_ARGS="--model-repository=`pwd`/unit_test_models \
+             --model-control-mode=explicit \
+             --load-model=onnx_int32_int32_int32 \
+             --strict-model-config=false"
+SERVER_LOG="./inference_server_cc_unit_test.load.log"
+CLIENT_LOG="./cc_unit_test.load.log"
+
+for i in \
+   "LoadWithFileOverride" \
+   "LoadWithConfigOverride" \
+   ; do
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    set +e
+    $CC_UNIT_TEST --gtest_filter=GRPC*$i >> ${CLIENT_LOG}.$i 2>&1
+    if [ $? -ne 0 ]; then
+        cat ${CLIENT_LOG}.$i
+        RET=1
+    fi
+    set -e
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+done
+
+# Run python grpc aio unit test
+PYTHON_GRPC_AIO_TEST=python_grpc_aio_test.py
+CLIENT_LOG=`pwd`/python_grpc_aio_test.log
+SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${MODELDIR}"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+set +e
+python $PYTHON_GRPC_AIO_TEST > $CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Python GRPC AsyncIO Test Failed\n***"
+    RET=1
+fi
+set -e
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Test GRPC health check implemented
+go install github.com/grpc-ecosystem/grpc-health-probe@latest
+HEALTH_PROBE="${GOPATH}/bin/grpc-health-probe -addr=localhost:8001"
+
+CLIENT_LOG=`pwd`/grpc_health_probe_offline.log
+set +e
+$HEALTH_PROBE > $CLIENT_LOG 2>&1
+set -e
+if [ `grep -c "timeout: failed to connect service" ${CLIENT_LOG}` != "1" ]; then
+    echo -e "\n***\n*** Failed. Expected health check timeout\n***"
+    cat $CLIENT_LOG
+    RET=1
+fi
+
+SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${MODELDIR}"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+CLIENT_LOG=`pwd`/grpc_health_probe_online.log
+set +e
+$HEALTH_PROBE > $CLIENT_LOG 2>&1
+set -e
+if [ `grep -c "status: SERVING" ${CLIENT_LOG}` != "1" ]; then
+    echo -e "\n***\n*** Failed. Expected health check to return SERVING\n***"
+    cat $CLIENT_LOG
+    RET=1
+fi
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Repeated protocol, not allowed
+SERVER_ARGS="--model-repository=${MODELDIR} \
+             --grpc-restricted-protocol=model-repository,health:k1=v1 \
+             --grpc-restricted-protocol=metadata,health:k2=v2"
+run_server
+EXPECTED_MSG="protocol 'health' can not be specified in multiple config groups"
+if [ "$SERVER_PID" != "0" ]; then
+    echo -e "\n***\n*** Expect fail to start $SERVER\n***"
+    kill $SERVER_PID
+    wait $SERVER_PID
+    RET=1
+elif [ `grep -c "${EXPECTED_MSG}" ${SERVER_LOG}` != "1" ]; then
+    echo -e "\n***\n*** Failed. Expected ${EXPECTED_MSG} to be found in log\n***"
+    cat $SERVER_LOG
+    RET=1
+fi
+
+# Unknown protocol, not allowed
+SERVER_ARGS="--model-repository=${MODELDIR} \
+             --grpc-restricted-protocol=model-reposit,health:k1=v1 \
+             --grpc-restricted-protocol=metadata,health:k2=v2"
+run_server
+EXPECTED_MSG="unknown restricted protocol 'model-reposit'"
+if [ "$SERVER_PID" != "0" ]; then
+    echo -e "\n***\n*** Expect fail to start $SERVER\n***"
+    kill $SERVER_PID
+    wait $SERVER_PID
+    RET=1
+elif [ `grep -c "${EXPECTED_MSG}" ${SERVER_LOG}` != "1" ]; then
+    echo -e "\n***\n*** Failed. Expected ${EXPECTED_MSG} to be found in log\n***"
+    cat $SERVER_LOG
+    RET=1
+fi
+
+# Test restricted protocols
+SERVER_ARGS="--model-repository=${MODELDIR} \
+             --grpc-restricted-protocol=model-repository:admin-key=admin-value \
+             --grpc-restricted-protocol=inference,health:infer-key=infer-value"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+set +e
+python $PYTHON_UNIT_TEST RestrictedProtocolTest > $CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Python GRPC Restricted Protocol Test Failed\n***"
+    RET=1
+fi
+set -e
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
+
diff --git a/qa/L0_grpc_state_cleanup/cleanup_test.py b/qa/L0_grpc_state_cleanup/cleanup_test.py
new file mode 100755
index 0000000000..89af756a8b
--- /dev/null
+++ b/qa/L0_grpc_state_cleanup/cleanup_test.py
@@ -0,0 +1,560 @@
+#!/usr/bin/env python3
+
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import os
+import queue
+import signal
+import time
+import unittest
+from functools import partial
+
+import numpy as np
+import test_util as tu
+import tritonclient.grpc as grpcclient
+from tritonclient.utils import InferenceServerException
+
+
+class UserData:
+    def __init__(self):
+        self._response_queue = queue.Queue()
+
+
+def callback(user_data, result, error):
+    if error:
+        user_data._response_queue.put(error)
+    else:
+        user_data._response_queue.put(result)
+
+
+# These state cleanup tests relies on the test.sh
+# to check whether all the created request objects
+# were properly deleted by the sever.
+# The purpose on these unittest is to exercise
+# different portions of the gRPC frontend and
+# and track the state objects.
+class CleanUpTest(tu.TestResultCollector):
+    SERVER_PID = None
+
+    def setUp(self):
+        self.decoupled_model_name_ = "repeat_int32"
+        self.identity_model_name_ = "custom_zero_1_float32"
+
+    def _prepare_inputs_and_outputs(self, kind):
+        if kind == "decoupled_streaming":
+            self.inputs_ = []
+            self.inputs_.append(grpcclient.InferInput("IN", [1], "INT32"))
+            self.inputs_.append(grpcclient.InferInput("DELAY", [1], "UINT32"))
+            self.inputs_.append(grpcclient.InferInput("WAIT", [1], "UINT32"))
+
+            self.outputs_ = []
+            self.outputs_.append(grpcclient.InferRequestedOutput("OUT"))
+            self.outputs_.append(grpcclient.InferRequestedOutput("IDX"))
+            self.requested_outputs_ = self.outputs_
+        elif kind == "simple" or kind == "streaming":
+            self.inputs_ = []
+            self.inputs_.append(grpcclient.InferInput("INPUT0", [1, 1], "FP32"))
+
+            self.outputs_ = []
+            self.outputs_.append(grpcclient.InferRequestedOutput("OUTPUT0"))
+            self.requested_outputs_ = self.outputs_
+        else:
+            raise ValueError("Unsupported kind specified to prepare inputs/outputs")
+
+    def _simple_infer(
+        self,
+        request_count,
+        cancel_response_idx=None,
+        client_timeout_pair=None,
+        kill_server=None,
+    ):
+        with grpcclient.InferenceServerClient(
+            url="localhost:8001", verbose=True
+        ) as triton_client:
+            self._prepare_inputs_and_outputs("simple")
+
+            input_data = np.array([[1.0]], dtype=np.float32)
+            self.inputs_[0].set_data_from_numpy(input_data)
+
+            user_data = UserData()
+
+            futures = []
+            timeout_idx = None
+            timeout_value = None
+            if client_timeout_pair:
+                timeout_idx, timeout_value = client_timeout_pair
+            for i in range(request_count):
+                if kill_server == i:
+                    os.kill(int(self.SERVER_PID), signal.SIGINT)
+                this_timeout = None
+                if timeout_idx == i:
+                    this_timeout = timeout_value
+                futures.append(
+                    triton_client.async_infer(
+                        model_name=self.identity_model_name_,
+                        inputs=self.inputs_,
+                        request_id=str(i),
+                        callback=partial(callback, user_data),
+                        outputs=self.requested_outputs_,
+                        client_timeout=this_timeout,
+                    )
+                )
+
+            if cancel_response_idx is not None:
+                futures[cancel_response_idx].cancel()
+
+            responses = []
+            while len(responses) < len(futures):
+                data_item = user_data._response_queue.get()
+                if type(data_item) == InferenceServerException:
+                    raise data_item
+                else:
+                    responses.append(data_item)
+
+            for response in responses:
+                output0_data = response.as_numpy("OUTPUT0")
+                self.assertTrue(np.array_equal(input_data, output0_data))
+
+    def _stream_infer_with_params(
+        self,
+        request_count,
+        request_delay,
+        _,
+        user_data,
+        result_dict,
+        delay_data=None,
+        delay_factor=None,
+        cancel_response_idx=None,
+        stream_timeout=None,
+        kill_server=None,
+    ):
+        with grpcclient.InferenceServerClient(
+            url="localhost:8001", verbose=True
+        ) as triton_client:
+            # Establish stream
+            triton_client.start_stream(
+                callback=partial(callback, user_data), stream_timeout=stream_timeout
+            )
+            # Send specified many requests in parallel
+            for i in range(request_count):
+                time.sleep((request_delay / 1000))
+                self.inputs_[1].set_data_from_numpy(delay_data)
+                if kill_server == i:
+                    os.kill(int(self.SERVER_PID), signal.SIGINT)
+                triton_client.async_stream_infer(
+                    model_name=self.decoupled_model_name_,
+                    inputs=self.inputs_,
+                    request_id=str(i),
+                    outputs=self.requested_outputs_,
+                    # Opt-in to receiving flags-only responses from model/backend
+                    # to help detect final responses for decoupled models.
+                    enable_empty_final_response=True,
+                )
+                # Update delay input in accordance with the scaling factor
+                delay_data = delay_data * delay_factor
+                delay_data = delay_data.astype(np.uint32)
+
+            # Retrieve results...
+            recv_count = 0
+            completed_requests = 0
+            while completed_requests < request_count:
+                if cancel_response_idx == recv_count:
+                    triton_client.stop_stream(cancel_requests=True)
+                data_item = user_data._response_queue.get()
+                if type(data_item) == InferenceServerException:
+                    raise data_item
+                else:
+                    response = data_item.get_response()
+                    # Request IDs should generally be provided with each request
+                    # to associate decoupled responses with their requests.
+                    if not response.id:
+                        raise ValueError(
+                            "No response id found. Was a request_id provided?"
+                        )
+
+                    # Detect final response. Parameters are oneof and we expect bool_param
+                    if response.parameters.get("triton_final_response").bool_param:
+                        completed_requests += 1
+
+                    # Only process non-empty response, ignore if empty (no outputs)
+                    if response.outputs:
+                        if response.id not in result_dict:
+                            result_dict[response.id] = []
+                        result_dict[response.id].append((recv_count, data_item))
+                        recv_count += 1
+
+    def _stream_infer(
+        self,
+        request_count,
+        request_delay,
+        expected_count,
+        user_data,
+        result_dict,
+        delay_data=None,
+        delay_factor=None,
+        cancel_response_idx=None,
+        stream_timeout=None,
+        kill_server=None,
+    ):
+        with grpcclient.InferenceServerClient(
+            url="localhost:8001", verbose=True
+        ) as triton_client:
+            # Establish stream
+            triton_client.start_stream(
+                callback=partial(callback, user_data), stream_timeout=stream_timeout
+            )
+            # Send specified many requests in parallel
+            for i in range(request_count):
+                time.sleep((request_delay / 1000))
+                model_name = self.identity_model_name_
+                if delay_data is not None:
+                    model_name = self.decoupled_model_name_
+                    self.inputs_[1].set_data_from_numpy(delay_data)
+                if kill_server == i:
+                    os.kill(int(self.SERVER_PID), signal.SIGINT)
+                triton_client.async_stream_infer(
+                    model_name=model_name,
+                    inputs=self.inputs_,
+                    request_id=str(i),
+                    outputs=self.requested_outputs_,
+                )
+                if (delay_data is not None) and (delay_factor is not None):
+                    # Update delay input in accordance with the scaling factor
+                    delay_data = delay_data * delay_factor
+                    delay_data = delay_data.astype(np.uint32)
+
+            # Retrieve results...
+            recv_count = 0
+            while recv_count < expected_count:
+                if cancel_response_idx == recv_count:
+                    triton_client.stop_stream(cancel_requests=True)
+                data_item = user_data._response_queue.get()
+                if type(data_item) == InferenceServerException:
+                    raise data_item
+                else:
+                    this_id = data_item.get_response().id
+                    if this_id not in result_dict:
+                        result_dict[this_id] = []
+                    result_dict[this_id].append((recv_count, data_item))
+
+                recv_count += 1
+
+    def _streaming_infer(
+        self,
+        request_count,
+        request_delay=0,
+        cancel_response_idx=None,
+        stream_timeout=None,
+        kill_server=None,
+        should_error=True,
+    ):
+        self._prepare_inputs_and_outputs("streaming")
+
+        input_data = np.array([[1.0]], dtype=np.float32)
+        self.inputs_[0].set_data_from_numpy(input_data)
+
+        user_data = UserData()
+        result_dict = {}
+
+        try:
+            expected_count = request_count
+            self._stream_infer(
+                request_count,
+                request_delay,
+                expected_count,
+                user_data,
+                result_dict,
+                cancel_response_idx=cancel_response_idx,
+                stream_timeout=stream_timeout,
+                kill_server=kill_server,
+            )
+        except Exception as ex:
+            if cancel_response_idx or stream_timeout or should_error:
+                raise ex
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+        # Validate the results..
+        for i in range(request_count):
+            this_id = str(i)
+            if this_id not in result_dict.keys():
+                self.assertTrue(
+                    False, "response for request id {} not received".format(this_id)
+                )
+            self.assertEqual(len(result_dict[this_id]), 1)
+            result = result_dict[this_id][0][1]
+            output0_data = result.as_numpy("OUTPUT0")
+            self.assertTrue(np.array_equal(input_data, output0_data))
+
+    def _decoupled_infer(
+        self,
+        request_count,
+        request_delay=0,
+        repeat_count=1,
+        data_offset=100,
+        delay_time=1000,
+        delay_factor=1,
+        wait_time=500,
+        cancel_response_idx=None,
+        stream_timeout=None,
+        kill_server=None,
+        should_error=True,
+        infer_helper_map=[True, True],
+    ):
+        self._prepare_inputs_and_outputs(kind="decoupled_streaming")
+
+        # Initialize data for IN
+        input_data = np.arange(
+            start=data_offset, stop=data_offset + repeat_count, dtype=np.int32
+        )
+        self.inputs_[0].set_shape([repeat_count])
+        self.inputs_[0].set_data_from_numpy(input_data)
+
+        # Initialize data for DELAY
+        delay_data = (np.ones([repeat_count], dtype=np.uint32)) * delay_time
+        self.inputs_[1].set_shape([repeat_count])
+
+        # Initialize data for WAIT
+        wait_data = np.array([wait_time], dtype=np.uint32)
+        self.inputs_[2].set_data_from_numpy(wait_data)
+
+        infer_helpers = []
+        if infer_helper_map[0]:
+            infer_helpers.append(self._stream_infer)
+        if infer_helper_map[1]:
+            infer_helpers.append(self._stream_infer_with_params)
+
+        for infer_helper in infer_helpers:
+            user_data = UserData()
+            result_dict = {}
+
+            try:
+                expected_count = repeat_count * request_count
+                infer_helper(
+                    request_count,
+                    request_delay,
+                    expected_count,
+                    user_data,
+                    result_dict,
+                    delay_data,
+                    delay_factor,
+                    cancel_response_idx,
+                    stream_timeout,
+                    kill_server,
+                )
+            except Exception as ex:
+                if cancel_response_idx or stream_timeout or should_error:
+                    raise ex
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+            # Validate the results..
+            for i in range(request_count):
+                this_id = str(i)
+                if repeat_count != 0 and this_id not in result_dict.keys():
+                    self.assertTrue(
+                        False, "response for request id {} not received".format(this_id)
+                    )
+                elif repeat_count == 0 and this_id in result_dict.keys():
+                    self.assertTrue(
+                        False,
+                        "received unexpected response for request id {}".format(
+                            this_id
+                        ),
+                    )
+                if repeat_count != 0:
+                    self.assertEqual(len(result_dict[this_id]), repeat_count)
+                    expected_data = data_offset
+                    result_list = result_dict[this_id]
+                    for j in range(len(result_list)):
+                        this_data = result_list[j][1].as_numpy("OUT")
+                        self.assertEqual(len(this_data), 1)
+                        self.assertEqual(this_data[0], expected_data)
+                        this_idx = result_list[j][1].as_numpy("IDX")
+                        self.assertEqual(len(this_idx), 1)
+                        self.assertEqual(this_idx[0], j)
+                        expected_data += 1
+
+    ###
+    ### Non-Streaming Tests
+    ###
+    def test_simple_infer(self):
+        # This test case sends 10 asynchronous requests and validates
+        # the response.
+        self._simple_infer(request_count=10)
+
+    def test_simple_infer_cancellation(self):
+        # This test case is used to check whether all the states are
+        # correctly released when one of the request is cancelled from
+        # the client side.
+        with self.assertRaises(InferenceServerException) as cm:
+            self._simple_infer(request_count=10, cancel_response_idx=5)
+        self.assertIn("Locally cancelled by application!", str(cm.exception))
+
+    def test_simple_infer_timeout(self):
+        # This test case is used to check whether all the states are
+        # correctly released when the request gets timed-out on the client.
+        with self.assertRaises(InferenceServerException) as cm:
+            self._simple_infer(request_count=10, client_timeout_pair=[5, 0.1])
+        self.assertIn("Deadline Exceeded", str(cm.exception))
+
+    def test_simple_infer_error_status(self):
+        # This test case is used to check whether all the state objects are
+        # released when RPC runs into error.
+        with self.assertRaises(InferenceServerException) as cm:
+            self._simple_infer(request_count=10)
+        self.assertIn(
+            "This protocol is restricted, expecting header 'triton-grpc-protocol-infer-key'",
+            str(cm.exception),
+        )
+
+    def test_simple_infer_shutdownserver(self):
+        # This test case is used to check whether all the state objects are
+        # released when the server is interrupted to shutdown in middle of
+        # inference run with final parameters being returned.
+        with self.assertRaises(InferenceServerException) as cm:
+            self._simple_infer(request_count=10, kill_server=5)
+
+    ###
+    ### Streaming Tests
+    ###
+    def test_streaming_infer(self):
+        # Sanity test to check whether all the state objects
+        # are correctly released. Sends 10 requests in a single
+        # gRPC bidirectional stream.
+        self._streaming_infer(request_count=10)
+
+    def test_streaming_cancellation(self):
+        # This test case is used to check whether all the states are
+        # correctly released when the stream is closed when fifth
+        # response is received.
+        with self.assertRaises(InferenceServerException) as cm:
+            self._streaming_infer(request_count=10, cancel_response_idx=5)
+        self.assertIn("Locally cancelled by application!", str(cm.exception))
+
+    def test_streaming_timeout(self):
+        # This test case is used to check whether all the states are
+        # released when some of the requests timeouts.
+        with self.assertRaises(InferenceServerException) as cm:
+            self._streaming_infer(request_count=10, request_delay=1, stream_timeout=2)
+        self.assertIn("Deadline Exceeded", str(cm.exception))
+
+    def test_streaming_error_status(self):
+        # This test case is used to check whether all the state objects are
+        # released when RPC runs into error.
+        with self.assertRaises(InferenceServerException) as cm:
+            self._streaming_infer(request_count=10, should_error=True)
+        self.assertIn(
+            "This protocol is restricted, expecting header 'triton-grpc-protocol-infer-key'",
+            str(cm.exception),
+        )
+
+    def test_streaming_infer_shutdownserver(self):
+        # This test case is used to check whether all the state objects are
+        # released when the server is interrupted to shutdown in middle of
+        # inference run.
+        with self.assertRaises(InferenceServerException) as cm:
+            self._streaming_infer(
+                request_count=10,
+                request_delay=1,
+                kill_server=5,
+                should_error=True,
+            )
+
+    ###
+    ### Decoupled Streaming Tests
+    ###
+    def test_decoupled_infer(self):
+        # Sanity test to check whether all the state objects
+        # are correctly released. Sends 10 requests in a single
+        # gRPC bidirectional stream and expects each of these
+        # requests to generate 10 responses.
+        self._decoupled_infer(request_count=10, repeat_count=10)
+
+    def test_decoupled_cancellation(self):
+        # This test case is used to check whether all the states are
+        # correctly released when the stream is closed when fifth
+        # response is received.
+        with self.assertRaises(InferenceServerException) as cm:
+            self._decoupled_infer(
+                request_count=10, repeat_count=10, cancel_response_idx=5
+            )
+        self.assertIn("Locally cancelled by application!", str(cm.exception))
+
+    def test_decoupled_timeout(self):
+        # This test case is used to check whether all the states are
+        # released when some of the requests timeouts.
+        with self.assertRaises(InferenceServerException) as cm:
+            self._decoupled_infer(
+                request_count=10, repeat_count=10, request_delay=1, stream_timeout=2
+            )
+        self.assertIn("Deadline Exceeded", str(cm.exception))
+
+    def test_decoupled_error_status(self):
+        # This test case is used to check whether all the state objects are
+        # released when RPC runs into error.
+        with self.assertRaises(InferenceServerException) as cm:
+            self._decoupled_infer(request_count=10, repeat_count=10, should_error=True)
+        self.assertIn(
+            "This protocol is restricted, expecting header 'triton-grpc-protocol-infer-key'",
+            str(cm.exception),
+        )
+
+    def test_decoupled_infer_shutdownserver(self):
+        # This test case is used to check whether all the state objects are
+        # released when the server is interrupted to shutdown in middle of
+        # inference run.
+        with self.assertRaises(InferenceServerException) as cm:
+            self._decoupled_infer(
+                request_count=10,
+                repeat_count=10,
+                request_delay=1,
+                kill_server=5,
+                should_error=True,
+                infer_helper_map=[True, False],
+            )
+
+    def test_decoupled_infer_with_params_shutdownserver(self):
+        # This test case is used to check whether all the state objects are
+        # released when the server is interrupted to shutdown in middle of
+        # inference run with final parameters being returned.
+        with self.assertRaises(InferenceServerException) as cm:
+            self._decoupled_infer(
+                request_count=10,
+                repeat_count=10,
+                request_delay=1,
+                kill_server=5,
+                should_error=True,
+                infer_helper_map=[False, True],
+            )
+
+
+if __name__ == "__main__":
+    CleanUpTest.SERVER_PID = os.environ.get("SERVER_PID", CleanUpTest.SERVER_PID)
+    unittest.main()
diff --git a/qa/L0_grpc_state_cleanup/test.sh b/qa/L0_grpc_state_cleanup/test.sh
new file mode 100755
index 0000000000..605edb6f9c
--- /dev/null
+++ b/qa/L0_grpc_state_cleanup/test.sh
@@ -0,0 +1,194 @@
+#!/bin/bash
+# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+RET=0
+CLEANUP_TEST=cleanup_test.py
+
+rm -f *.log
+
+CLIENT_LOG=`pwd`/client.log
+SERVER=/opt/tritonserver/bin/tritonserver
+source ../common/util.sh
+
+function check_state_release() {
+  local log_file=$1
+
+  num_state_release=`cat $log_file | grep  "StateRelease" | wc -l`
+  num_state_new=`cat $log_file | grep  "StateNew" | wc -l`
+
+  if [ $num_state_release -ne $num_state_new ]; then
+    cat $log_file
+    echo -e "\n***\n*** Test Failed: Mismatch detected, $num_state_new state(s) created, $num_state_release state(s) released. \n***" >> $log_file
+    return 1
+  fi
+
+  return 0
+}
+
+rm -fr ./models/custom_zero_1_float32 && \
+        cp -r ../custom_models/custom_zero_1_float32 ./models/. && \
+        mkdir -p ./models/custom_zero_1_float32/1
+
+(cd models/custom_zero_1_float32 && \
+    echo "parameters [" >> config.pbtxt && \
+    echo "{ key: \"execute_delay_ms\"; value: { string_value: \"1000\" }}" >> config.pbtxt && \
+    echo "]" >> config.pbtxt)
+
+for i in test_simple_infer \
+            test_simple_infer_cancellation \
+            test_simple_infer_timeout \
+            test_streaming_infer \
+            test_streaming_timeout \
+            test_streaming_cancellation \
+            test_decoupled_infer \
+            test_decoupled_cancellation \
+            test_decoupled_timeout; do
+  SERVER_LOG="./inference_server.$i.log"
+  SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=2"
+  run_server
+  if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+  fi
+
+  echo "Test: $i" >>$CLIENT_LOG
+
+  set +e
+  python $CLEANUP_TEST CleanUpTest.$i >>$CLIENT_LOG 2>&1
+  if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test $i Failed\n***" >>$CLIENT_LOG
+    echo -e "\n***\n*** Test $i Failed\n***"
+    RET=1
+  fi
+
+  kill $SERVER_PID
+  wait $SERVER_PID
+
+  check_state_release $SERVER_LOG
+  if [ $? -ne 0 ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** State Verification Failed for $i\n***"
+      RET=1
+  fi
+  set -e
+done
+
+
+for i in test_simple_infer_error_status \
+                test_streaming_error_status \
+                test_decoupled_error_status; do
+  SERVER_LOG="./inference_server.$i.log"
+  SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=2 --grpc-restricted-protocol=inference:infer-key=infer-value"
+  run_server
+  if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+  fi
+
+  echo "Test: $i" >>$CLIENT_LOG
+
+  set +e
+  python $CLEANUP_TEST CleanUpTest.$i >>$CLIENT_LOG 2>&1
+  if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test $i Failed\n***" >>$CLIENT_LOG
+    echo -e "\n***\n*** Test $i Failed\n***"
+    RET=1
+  fi
+
+  kill $SERVER_PID
+  wait $SERVER_PID
+
+  check_state_release $SERVER_LOG
+  if [ $? -ne 0 ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** State Verification Failed for $i\n***"
+      RET=1
+  fi
+
+  set -e
+done
+
+for i in test_simple_infer_shutdownserver \
+         test_streaming_infer_shutdownserver \
+         test_decoupled_infer_shutdownserver \
+         test_decoupled_infer_with_params_shutdownserver; do
+  SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=2"
+  SERVER_LOG="./inference_server.$i.log"
+  run_server
+  if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+  fi
+
+  echo "Test: $i" >>$CLIENT_LOG
+
+  set +e
+  SERVER_PID=$SERVER_PID python $CLEANUP_TEST CleanUpTest.$i >>$CLIENT_LOG 2>&1
+  if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test $i Failed\n***" >>$CLIENT_LOG
+    echo -e "\n***\n*** Test $i Failed\n***"
+    RET=1
+  fi
+
+  wait $SERVER_PID
+
+  check_state_release $SERVER_LOG
+  if [ $? -ne 0 ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** State Verification Failed for $i\n***"
+      RET=1
+  fi
+
+  set -e
+done
+
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+else
+  echo -e "\n***\n*** Test Failed\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_http/generate_endpoint_test.py b/qa/L0_http/generate_endpoint_test.py
new file mode 100755
index 0000000000..8c44ad8419
--- /dev/null
+++ b/qa/L0_http/generate_endpoint_test.py
@@ -0,0 +1,440 @@
+#!/usr/bin/python3
+# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import json
+import threading
+import time
+import unittest
+
+import requests
+import sseclient
+import test_util as tu
+
+
+class GenerateEndpointTest(tu.TestResultCollector):
+    def setUp(self):
+        self._model_name = "mock_llm"
+
+    def _get_infer_url(self, model_name, route):
+        return f"http://localhost:8000/v2/models/{model_name}/{route}"
+
+    def generate_stream(self, model_name, inputs, stream=False):
+        headers = {"Accept": "text/event-stream"}
+        url = self._get_infer_url(model_name, "generate_stream")
+        # stream=True used to indicate response can be iterated over, which
+        # should be the common setting for generate_stream.
+        # For correctness test case, stream=False so that we can re-examine
+        # the response content.
+        return requests.post(
+            url,
+            data=inputs if isinstance(inputs, str) else json.dumps(inputs),
+            headers=headers,
+            stream=stream,
+        )
+
+    def generate(self, model_name, inputs):
+        url = self._get_infer_url(model_name, "generate")
+        return requests.post(
+            url, data=inputs if isinstance(inputs, str) else json.dumps(inputs)
+        )
+
+    def generate_expect_failure(self, model_name, inputs, msg):
+        url = self._get_infer_url(model_name, "generate")
+        r = requests.post(
+            url, data=inputs if isinstance(inputs, str) else json.dumps(inputs)
+        )
+        # Content-Type header should always be JSON for errors
+        self.assertEqual(r.headers["Content-Type"], "application/json")
+
+        try:
+            r.raise_for_status()
+            self.assertTrue(False, f"Expected failure, success for {inputs}")
+        except requests.exceptions.HTTPError as e:
+            self.assertIn(msg, r.json()["error"])
+
+    def generate_stream_expect_failure(self, model_name, inputs, msg):
+        r = self.generate_stream(model_name, inputs)
+        # Content-Type header should always be JSON for errors
+        self.assertEqual(r.headers["Content-Type"], "application/json")
+
+        try:
+            r.raise_for_status()
+            self.assertTrue(False, f"Expected failure, success for {inputs}")
+        except requests.exceptions.HTTPError as e:
+            self.assertIn(msg, r.json()["error"])
+
+    def generate_stream_expect_success(
+        self, model_name, inputs, expected_output, rep_count
+    ):
+        r = self.generate_stream(model_name, inputs)
+        r.raise_for_status()
+        self.check_sse_responses(r, [{"TEXT": expected_output}] * rep_count)
+
+    def check_sse_responses(self, res, expected_res):
+        # Validate SSE format
+        self.assertIn("Content-Type", res.headers)
+        self.assertEqual(
+            "text/event-stream; charset=utf-8", res.headers["Content-Type"]
+        )
+
+        # SSE format (data: []) is hard to parse, use helper library for simplicity
+        client = sseclient.SSEClient(res)
+        res_count = 0
+        for event in client.events():
+            # Parse event data, join events into a single response
+            data = json.loads(event.data)
+            for key, value in expected_res[res_count].items():
+                self.assertIn(key, data)
+                self.assertEqual(value, data[key])
+            res_count += 1
+        self.assertEqual(len(expected_res), res_count)
+        # Make sure there is no message in the wrong form
+        for remaining in client._read():
+            self.assertTrue(
+                remaining.startswith(b"data:"),
+                f"SSE response not formed properly, got: {remaining}",
+            )
+            self.assertTrue(
+                remaining.endswith(b"\n\n"),
+                f"SSE response not formed properly, got: {remaining}",
+            )
+
+    def test_generate(self):
+        # Setup text-based input
+        text = "hello world"
+        inputs = {"PROMPT": text, "STREAM": False}
+
+        r = self.generate(self._model_name, inputs)
+        r.raise_for_status()
+
+        self.assertIn("Content-Type", r.headers)
+        self.assertEqual(r.headers["Content-Type"], "application/json")
+
+        data = r.json()
+        self.assertIn("TEXT", data)
+        self.assertEqual(text, data["TEXT"])
+
+    def test_generate_stream(self):
+        # Setup text-based input
+        text = "hello world"
+        rep_count = 3
+        inputs = {"PROMPT": [text], "STREAM": True, "REPETITION": rep_count}
+        self.generate_stream_expect_success(self._model_name, inputs, text, rep_count)
+
+    def test_streaming(self):
+        # verify the responses are streamed as soon as it is generated
+        text = "hello world"
+        rep_count = 3
+        inputs = {"PROMPT": [text], "STREAM": True, "REPETITION": rep_count, "DELAY": 2}
+        past = time.time()
+        res = self.generate_stream(self._model_name, inputs, stream=True)
+        client = sseclient.SSEClient(res)
+        # This test does not focus on event content
+        for _ in client.events():
+            now = time.time()
+            self.assertTrue(1 < (now - past) < 3)
+            past = now
+
+    def test_missing_inputs(self):
+        missing_all_inputs = [
+            # Missing all inputs
+            {},
+            {"abc": 123},
+        ]
+        missing_one_input = [
+            # Missing 1 input
+            {"PROMPT": "hello"},
+            {"STREAM": False},
+            {"STREAM": False, "other": "param"},
+        ]
+        for inputs in missing_all_inputs:
+            self.generate_expect_failure(
+                self._model_name, inputs, "expected 2 inputs but got 0"
+            )
+            self.generate_stream_expect_failure(
+                self._model_name, inputs, "expected 2 inputs but got 0"
+            )
+
+        for inputs in missing_one_input:
+            self.generate_expect_failure(
+                self._model_name, inputs, "expected 2 inputs but got 1"
+            )
+            self.generate_stream_expect_failure(
+                self._model_name, inputs, "expected 2 inputs but got 1"
+            )
+
+    def test_invalid_input_types(self):
+        invalid_bool = "attempt to access JSON non-boolean as boolean"
+        invalid_string = "attempt to access JSON non-string as string"
+        invalid_type_inputs = [
+            # Prompt bad type
+            ({"PROMPT": 123, "STREAM": False}, invalid_string),
+            # Stream bad type
+            ({"PROMPT": "hello", "STREAM": "false"}, invalid_bool),
+            # Both bad type, parsed in order
+            ({"PROMPT": True, "STREAM": 123}, invalid_string),
+            ({"STREAM": 123, "PROMPT": True}, invalid_bool),
+        ]
+
+        for inputs, error_msg in invalid_type_inputs:
+            self.generate_expect_failure(self._model_name, inputs, error_msg)
+            self.generate_stream_expect_failure(self._model_name, inputs, error_msg)
+
+    def test_duplicate_inputs(self):
+        dupe_prompt = "input 'PROMPT' already exists in request"
+        dupe_stream = "input 'STREAM' already exists in request"
+        # Use JSON string directly as Python Dict doesn't support duplicate keys
+        invalid_type_inputs = [
+            # One duplicate
+            (
+                '{"PROMPT": "hello", "STREAM": false, "PROMPT": "duplicate"}',
+                dupe_prompt,
+            ),
+            ('{"PROMPT": "hello", "STREAM": false, "STREAM": false}', dupe_stream),
+            # Multiple duplicates, parsed in order
+            (
+                '{"PROMPT": "hello", "STREAM": false, "PROMPT": "duplicate", "STREAM": true}',
+                dupe_prompt,
+            ),
+            (
+                '{"PROMPT": "hello", "STREAM": false, "STREAM": true, "PROMPT": "duplicate"}',
+                dupe_stream,
+            ),
+        ]
+        for inputs, error_msg in invalid_type_inputs:
+            self.generate_expect_failure(self._model_name, inputs, error_msg)
+            self.generate_stream_expect_failure(self._model_name, inputs, error_msg)
+
+    def test_generate_stream_response_error(self):
+        # Setup text-based input
+        text = "hello world"
+        inputs = {"PROMPT": [text], "STREAM": True, "REPETITION": 0, "FAIL_LAST": True}
+        r = self.generate_stream(self._model_name, inputs)
+
+        # With "REPETITION": 0, error will be first response and the HTTP code
+        # will be set properly
+        try:
+            r.raise_for_status()
+        except requests.exceptions.HTTPError as e:
+            self.check_sse_responses(r, [{"error": "An Error Occurred"}])
+
+        # With "REPETITION" > 0, the first response is valid response and set
+        # HTTP code to success, so user must validate each response
+        inputs["REPETITION"] = 1
+        r = self.generate_stream(self._model_name, inputs)
+        r.raise_for_status()
+
+        self.check_sse_responses(r, [{"TEXT": text}, {"error": "An Error Occurred"}])
+
+    def test_race_condition(self):
+        # In Triton HTTP frontend, the HTTP response is sent in a different
+        # thread than Triton response complete thread, both programs have shared
+        # access to the same object, so this test is sending sufficient load to
+        # the endpoint, in attempt to expose race condition if any  .
+        input1 = {"PROMPT": "hello", "STREAM": False, "param": "segfault"}
+        input2 = {
+            "PROMPT": "hello",
+            "STREAM": True,
+            "REPETITION": 3,
+            "param": "segfault",
+        }
+        threads = []
+
+        def thread_func(model_name, inputs):
+            self.generate_stream(model_name, inputs).raise_for_status()
+
+        for _ in range(50):
+            threads.append(
+                threading.Thread(target=thread_func, args=((self._model_name, input1)))
+            )
+            threads.append(
+                threading.Thread(target=thread_func, args=((self._model_name, input2)))
+            )
+        for thread in threads:
+            thread.start()
+        for thread in threads:
+            thread.join()
+
+    def test_one_response(self):
+        # In the current 'inputs' setting, the model will send at least 1
+        # response, "STREAM" controls model behavior on sending model responses:
+        # If True, the model sends two responses, one is the actual infer
+        # response and the other contains flag only to signal end of response.
+        # 'generate_stream' endpoint is designed for this case so it should send
+        # infer response and complete HTTP response appropriately. And
+        # 'generate' endpoint will be able to handle this case as at its core
+        # only one infer response is received, which is the same as typical HTTP
+        # usage.
+        # If False, the model sends one response containing infer response and
+        # end flag, which is the same as how non-decoupled model responds.
+        inputs = {"PROMPT": "hello world", "STREAM": True}
+        r = self.generate_stream(self._model_name, inputs)
+        r.raise_for_status()
+        r = self.generate(self._model_name, inputs)
+        r.raise_for_status()
+
+        inputs["STREAM"] = False
+        r = self.generate_stream(self._model_name, inputs)
+        r.raise_for_status()
+        r = self.generate(self._model_name, inputs)
+        r.raise_for_status()
+
+    def test_zero_response(self):
+        inputs = {"PROMPT": "hello world", "STREAM": True, "REPETITION": 0}
+        r = self.generate_stream(self._model_name, inputs)
+        r.raise_for_status()
+        # Expect generate fails the inference
+        r = self.generate(self._model_name, inputs)
+        try:
+            r.raise_for_status()
+        except requests.exceptions.HTTPError as e:
+            self.assertIn(
+                "generate expects model to produce exactly 1 response",
+                r.json()["error"],
+            )
+
+    def test_many_response(self):
+        inputs = {"PROMPT": "hello world", "STREAM": True, "REPETITION": 2}
+        r = self.generate_stream(self._model_name, inputs)
+        r.raise_for_status()
+        # Expect generate fails the inference
+        r = self.generate(self._model_name, inputs)
+        try:
+            r.raise_for_status()
+        except requests.exceptions.HTTPError as e:
+            self.assertIn(
+                "generate expects model to produce exactly 1 response",
+                r.json()["error"],
+            )
+
+    def test_complex_schema(self):
+        # Currently only the fundamental conversion is supported, nested object
+        # in the request will results in parsing error
+
+        # complex object to parameters (specifying non model input)
+        inputs = {
+            "PROMPT": "hello world",
+            "STREAM": True,
+            "PARAMS": {"PARAM_0": 0, "PARAM_1": True, "PARAM_2": 123.123},
+        }
+        r = self.generate(self._model_name, inputs)
+        try:
+            r.raise_for_status()
+        except requests.exceptions.HTTPError as e:
+            self.assertIn("parameter 'PARAMS' has invalid type", r.json()["error"])
+
+        # complex object to model input
+        inputs = {
+            "PROMPT": {"USER": "hello world", "BOT": "world hello"},
+            "STREAM": True,
+        }
+        r = self.generate(self._model_name, inputs)
+        try:
+            r.raise_for_status()
+        except requests.exceptions.HTTPError as e:
+            self.assertIn(
+                "attempt to access JSON non-string as string", r.json()["error"]
+            )
+
+    def test_close_connection_during_streaming(self):
+        # verify the responses are streamed as soon as it is generated
+        text = "hello world"
+        rep_count = 3
+        inputs = {"PROMPT": [text], "STREAM": True, "REPETITION": rep_count, "DELAY": 2}
+        res = self.generate_stream(self._model_name, inputs, stream=True)
+        # close connection while the responses are being generated
+        res.close()
+        # check server healthiness
+        health_url = "http://localhost:8000/v2/health/live"
+        requests.get(health_url).raise_for_status()
+
+    def test_parameters(self):
+        # Test reserved nested object for parameters
+        text = "hello world"
+        rep_count = 3
+        inputs = {
+            "PROMPT": [text],
+            "STREAM": True,
+            "parameters": {"REPETITION": rep_count},
+        }
+        self.generate_stream_expect_success(self._model_name, inputs, text, rep_count)
+
+        # parameters keyword is not an object
+        inputs = {"PROMPT": [text], "STREAM": True, "parameters": 1}
+
+        r = self.generate(self._model_name, inputs)
+        try:
+            r.raise_for_status()
+        except requests.exceptions.HTTPError as e:
+            self.assertIn(
+                "Expected JSON object for keyword: 'parameters'", r.json()["error"]
+            )
+
+        # parameters contains complex object
+        inputs = {
+            "PROMPT": [text],
+            "STREAM": True,
+            "parameters": {"nested": {"twice": 1}},
+        }
+
+        r = self.generate(self._model_name, inputs)
+        try:
+            r.raise_for_status()
+        except requests.exceptions.HTTPError as e:
+            self.assertIn(
+                "Converting keyword: 'parameters': parameter 'nested' has invalid type.",
+                r.json()["error"],
+            )
+
+    def test_0_dimension_output(self):
+        # With the trtllm backend, if the end token is predicted at the first
+        # step, the output tensors will have the shapes with 0 dimension.
+        text = "hello world"
+        inputs = {
+            "PROMPT": text,
+            "STREAM": False,
+            "REPETITION": 0,
+            "OUTPUT_0_DIM": True,
+        }
+
+        r = self.generate(self._model_name, inputs)
+        r.raise_for_status()
+
+        self.assertIn("Content-Type", r.headers)
+        self.assertEqual(r.headers["Content-Type"], "application/json")
+
+        data = r.json()
+        self.assertIn("TEXT", data)
+        self.assertEqual([], data["TEXT"])
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_http/generate_models/mock_llm/1/model.py b/qa/L0_http/generate_models/mock_llm/1/model.py
new file mode 100644
index 0000000000..117b097c3d
--- /dev/null
+++ b/qa/L0_http/generate_models/mock_llm/1/model.py
@@ -0,0 +1,109 @@
+# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+import time
+
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    def initialize(self, args):
+        self.model_config = json.loads(args["model_config"])
+        self.decoupled = self.model_config.get("model_transaction_policy", {}).get(
+            "decoupled"
+        )
+
+    def execute(self, requests):
+        if self.decoupled:
+            return self.exec_decoupled(requests)
+        else:
+            return self.exec(requests)
+
+    def exec(self, requests):
+        responses = []
+        for request in requests:
+            params = json.loads(request.parameters())
+            rep_count = params["REPETITION"] if "REPETITION" in params else 1
+
+            input_np = pb_utils.get_input_tensor_by_name(request, "PROMPT").as_numpy()
+            stream_np = pb_utils.get_input_tensor_by_name(request, "STREAM").as_numpy()
+            stream = stream_np.flatten()[0]
+            if stream:
+                responses.append(
+                    pb_utils.InferenceResponse(
+                        error=pb_utils.TritonError(
+                            "STREAM only supported in decoupled mode"
+                        )
+                    )
+                )
+            else:
+                out_tensor = pb_utils.Tensor(
+                    "TEXT", np.repeat(input_np, rep_count, axis=1)
+                )
+                responses.append(pb_utils.InferenceResponse([out_tensor]))
+        return responses
+
+    def exec_decoupled(self, requests):
+        for request in requests:
+            params = json.loads(request.parameters())
+            rep_count = params["REPETITION"] if "REPETITION" in params else 1
+            fail_last = params["FAIL_LAST"] if "FAIL_LAST" in params else False
+            delay = params["DELAY"] if "DELAY" in params else None
+            output_0_dim = params["OUTPUT_0_DIM"] if "OUTPUT_0_DIM" in params else False
+
+            sender = request.get_response_sender()
+            input_np = pb_utils.get_input_tensor_by_name(request, "PROMPT").as_numpy()
+            stream_np = pb_utils.get_input_tensor_by_name(request, "STREAM").as_numpy()
+            out_value = np.array([]) if output_0_dim else input_np
+            out_tensor = pb_utils.Tensor("TEXT", out_value)
+            response = pb_utils.InferenceResponse([out_tensor])
+            # If stream enabled, just send multiple copies of response
+            # FIXME: Could split up response string into tokens, but this is simpler for now.
+            stream = stream_np.flatten()[0]
+            if stream:
+                for _ in range(rep_count):
+                    if delay is not None:
+                        time.sleep(delay)
+                    if not sender.is_cancelled():
+                        sender.send(response)
+                    else:
+                        break
+                sender.send(
+                    None
+                    if not fail_last
+                    else pb_utils.InferenceResponse(
+                        error=pb_utils.TritonError("An Error Occurred")
+                    ),
+                    flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL,
+                )
+            # If stream disabled, just send one response
+            else:
+                sender.send(
+                    response, flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
+                )
+        return None
diff --git a/qa/L0_http/generate_models/mock_llm/config.pbtxt b/qa/L0_http/generate_models/mock_llm/config.pbtxt
new file mode 100644
index 0000000000..6871661525
--- /dev/null
+++ b/qa/L0_http/generate_models/mock_llm/config.pbtxt
@@ -0,0 +1,60 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+backend: "python"
+
+max_batch_size: 0
+
+model_transaction_policy {
+  decoupled: True
+}
+
+input [
+  {
+    name: "PROMPT"
+    data_type: TYPE_STRING
+    dims: [ 1, 1 ]
+  },
+  {
+    name: "STREAM"
+    data_type: TYPE_BOOL
+    dims: [ 1, 1 ]
+  }
+]
+
+output [
+  {
+    name: "TEXT"
+    data_type: TYPE_STRING
+    dims: [ 1, -1 ]
+  }
+]
+
+instance_group [
+  {
+    count: 1
+    kind: KIND_MODEL
+  }
+]
diff --git a/qa/L0_http/http_basic_auth_test.py b/qa/L0_http/http_basic_auth_test.py
new file mode 100755
index 0000000000..5aa1f71d81
--- /dev/null
+++ b/qa/L0_http/http_basic_auth_test.py
@@ -0,0 +1,66 @@
+#!/usr/bin/python
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+import sys
+import unittest
+
+sys.path.append("../common")
+
+import test_util as tu
+import tritonclient.http as tritonhttpclient
+import tritonclient.http.aio as asynctritonhttpclient
+from tritonclient.http.aio.auth import BasicAuth as AsyncBasicAuth
+from tritonclient.http.auth import BasicAuth
+
+
+class HTTPBasicAuthTest(tu.TestResultCollector):
+    def setUp(self):
+        # Use the nginx port
+        self._client = tritonhttpclient.InferenceServerClient(url="localhost:8004")
+        self._client.register_plugin(BasicAuth("username", "password"))
+
+    def test_client_call(self):
+        self.assertTrue(self._client.is_server_live())
+
+    def tearDown(self):
+        self._client.close()
+
+
+class HTTPBasicAuthAsyncTest(unittest.IsolatedAsyncioTestCase):
+    async def asyncSetUp(self):
+        # Use the nginx port
+        self._client = asynctritonhttpclient.InferenceServerClient(url="localhost:8004")
+        self._client.register_plugin(AsyncBasicAuth("username", "password"))
+
+    async def test_client_call(self):
+        self.assertTrue(await self._client.is_server_live())
+
+    async def asyncTearDown(self):
+        await self._client.close()
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_http/http_client_plugin_test.py b/qa/L0_http/http_client_plugin_test.py
new file mode 100755
index 0000000000..963ea2a81b
--- /dev/null
+++ b/qa/L0_http/http_client_plugin_test.py
@@ -0,0 +1,175 @@
+#!/usr/bin/python
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import unittest
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import numpy as np
+import test_util as tu
+import tritonclient.http as tritonhttpclient
+import tritonclient.http.aio as asynctritonhttpclient
+from tritonclient.http import InferenceServerClientPlugin
+from tritonclient.utils import np_to_triton_dtype
+
+
+# A simple plugin that adds headers to the inference request.
+class TestPlugin(InferenceServerClientPlugin):
+    def __init__(self, headers):
+        self._headers = headers
+
+    def __call__(self, request):
+        request.headers.update(self._headers)
+
+
+class HTTPClientPluginAsyncTest(unittest.IsolatedAsyncioTestCase):
+    async def asyncSetUp(self):
+        self._headers = {"MY-KEY": "MY-VALUE"}
+        self._plugin = TestPlugin(self._headers)
+        self._client = asynctritonhttpclient.InferenceServerClient(url="localhost:8001")
+
+    async def test_server_is_live(self):
+        # We are testing is_server_live as an example API that uses GET method
+        # for communication with the server.
+        self._client._stub.get = AsyncMock()
+
+        self._client.register_plugin(self._plugin)
+        self.assertEqual(self._plugin, self._client.plugin())
+        await self._client.is_server_live()
+        self._client._stub.get.assert_awaited_with(
+            url=unittest.mock.ANY, headers=self._headers
+        )
+
+        # Make sure unregistering the plugin would no longer add the headers
+        self._client.unregister_plugin()
+        self.assertEqual(None, self._client.plugin())
+        await self._client.is_server_live()
+        self._client._stub.get.assert_awaited_with(url=unittest.mock.ANY, headers={})
+
+    async def test_simple_infer(self):
+        # Only the read function must return async
+        post_return = MagicMock()
+        post_return.read = AsyncMock()
+        self._client._stub.post = AsyncMock(return_value=post_return)
+
+        np_input = np.arange(8, dtype=np.float32).reshape(1, -1)
+        model = "onnx_zero_1_float32"
+
+        # Setup inputs
+        inputs = []
+        inputs.append(
+            tritonhttpclient.InferInput(
+                "INPUT0", np_input.shape, np_to_triton_dtype(np_input.dtype)
+            )
+        )
+
+        # Set the binary data to False so that 'Inference-Header-Length' is not
+        # added to the headers.
+        inputs[0].set_data_from_numpy(np_input, binary_data=False)
+
+        async def run_infer(headers):
+            with patch("tritonclient.http.aio._raise_if_error"):
+                with patch("tritonclient.http.aio.InferResult"):
+                    await self._client.infer(model_name=model, inputs=inputs)
+                    self._client._stub.post.assert_awaited_with(
+                        url=unittest.mock.ANY, data=unittest.mock.ANY, headers=headers
+                    )
+
+        self._client.register_plugin(self._plugin)
+        await run_infer(self._headers)
+
+        self._client.unregister_plugin()
+        await run_infer({})
+
+    async def asyncTearDown(self):
+        await self._client.close()
+
+
+class HTTPClientPluginTest(tu.TestResultCollector):
+    def setUp(self):
+        self._headers = {"MY-KEY": "MY-VALUE"}
+        self._plugin = TestPlugin(self._headers)
+        self._client = tritonhttpclient.InferenceServerClient(url="localhost:8001")
+
+        # Use magic mock for the client stub
+        self._client._client_stub = MagicMock()
+
+    def test_server_is_live(self):
+        # We are testing is_server_live as an example API that uses GET method
+        # for communication with the server.
+        self._client.register_plugin(self._plugin)
+        self._client.is_server_live()
+        self._client._client_stub.get.assert_called_with(
+            unittest.mock.ANY, headers=self._headers
+        )
+
+        # Make sure unregistering the plugin would no longer add the headers
+        self._client.unregister_plugin()
+        self._client.is_server_live()
+        self._client._client_stub.get.assert_called_with(unittest.mock.ANY, headers={})
+
+    def test_simple_infer(self):
+        np_input = np.arange(8, dtype=np.float32).reshape(1, -1)
+        model = "onnx_zero_1_float32"
+
+        # Setup inputs
+        inputs = []
+        inputs.append(
+            tritonhttpclient.InferInput(
+                "INPUT0", np_input.shape, np_to_triton_dtype(np_input.dtype)
+            )
+        )
+
+        # Set the binary data to False so that 'Inference-Header-Length' is not
+        # added to the headers.
+        inputs[0].set_data_from_numpy(np_input, binary_data=False)
+
+        def run_infer(headers):
+            with patch("tritonclient.http._client._raise_if_error"):
+                with patch("tritonclient.http._client.InferResult"):
+                    self._client.infer(model_name=model, inputs=inputs)
+                    self._client._client_stub.post.assert_called_with(
+                        request_uri=unittest.mock.ANY,
+                        body=unittest.mock.ANY,
+                        headers=headers,
+                    )
+
+        self._client.register_plugin(self._plugin)
+        run_infer(self._headers)
+
+        self._client.unregister_plugin()
+        run_infer({})
+
+    def tearDown(self):
+        self._client.close()
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_http/http_restricted_api_test.py b/qa/L0_http/http_restricted_api_test.py
new file mode 100755
index 0000000000..e5e3d5fd2d
--- /dev/null
+++ b/qa/L0_http/http_restricted_api_test.py
@@ -0,0 +1,94 @@
+#!/usr/bin/python
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import unittest
+
+import numpy as np
+import tritonclient.http as tritonhttpclient
+from tritonclient.utils import InferenceServerException
+
+
+class RestrictedAPITest(unittest.TestCase):
+    def setUp(self):
+        self.model_name_ = "simple"
+        self.client_ = tritonhttpclient.InferenceServerClient("localhost:8000")
+
+    # Other unspecified APIs should not be restricted
+    def test_sanity(self):
+        self.client_.get_inference_statistics("simple")
+        self.client_.get_inference_statistics(
+            "simple", headers={"infer-key": "infer-value"}
+        )
+
+    # metadata, infer, model repository APIs are restricted.
+    # metadata and infer expects "infer-key : infer-value" header,
+    # model repository expected "admin-key : admin-value".
+    def test_model_repository(self):
+        with self.assertRaisesRegex(InferenceServerException, "This API is restricted"):
+            self.client_.unload_model(
+                self.model_name_, headers={"infer-key": "infer-value"}
+            )
+        # Request go through and get actual transaction error
+        with self.assertRaisesRegex(
+            InferenceServerException, "explicit model load / unload is not allowed"
+        ):
+            self.client_.unload_model(
+                self.model_name_, headers={"admin-key": "admin-value"}
+            )
+
+    def test_metadata(self):
+        with self.assertRaisesRegex(InferenceServerException, "This API is restricted"):
+            self.client_.get_server_metadata()
+        self.client_.get_server_metadata({"infer-key": "infer-value"})
+
+    def test_infer(self):
+        # setup
+        inputs = [
+            tritonhttpclient.InferInput("INPUT0", [1, 16], "INT32"),
+            tritonhttpclient.InferInput("INPUT1", [1, 16], "INT32"),
+        ]
+        inputs[0].set_data_from_numpy(np.ones(shape=(1, 16), dtype=np.int32))
+        inputs[1].set_data_from_numpy(np.ones(shape=(1, 16), dtype=np.int32))
+
+        # This test only care if the request goes through
+        with self.assertRaisesRegex(InferenceServerException, "This API is restricted"):
+            _ = self.client_.infer(
+                model_name=self.model_name_, inputs=inputs, headers={"test": "1"}
+            )
+        self.client_.infer(
+            model_name=self.model_name_,
+            inputs=inputs,
+            headers={"infer-key": "infer-value"},
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_http/http_test.py b/qa/L0_http/http_test.py
new file mode 100755
index 0000000000..4432fe9186
--- /dev/null
+++ b/qa/L0_http/http_test.py
@@ -0,0 +1,236 @@
+#!/usr/bin/python
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import threading
+import time
+import unittest
+
+import numpy as np
+import requests
+import test_util as tu
+import tritonclient.http as tritonhttpclient
+from tritonclient.utils import InferenceServerException, np_to_triton_dtype
+
+
+class HttpTest(tu.TestResultCollector):
+    def _get_infer_url(self, model_name):
+        return "http://localhost:8000/v2/models/{}/infer".format(model_name)
+
+    def _raw_binary_helper(
+        self, model, input_bytes, expected_output_bytes, extra_headers={}
+    ):
+        # Select model that satisfies constraints for raw binary request
+        headers = {"Inference-Header-Content-Length": "0"}
+        # Add extra headers (if any) before sending request
+        headers.update(extra_headers)
+        r = requests.post(self._get_infer_url(model), data=input_bytes, headers=headers)
+        r.raise_for_status()
+
+        # Get the inference header size so we can locate the output binary data
+        header_size = int(r.headers["Inference-Header-Content-Length"])
+        # Assert input == output since this tests an identity model
+        self.assertEqual(
+            expected_output_bytes,
+            r.content[header_size:],
+            "Expected response body contains correct output binary data: {}; got: {}".format(
+                expected_output_bytes, r.content[header_size:]
+            ),
+        )
+
+    def test_raw_binary(self):
+        model = "onnx_zero_1_float32"
+        input_bytes = np.arange(8, dtype=np.float32).tobytes()
+        self._raw_binary_helper(model, input_bytes, input_bytes)
+
+    def test_raw_binary_longer(self):
+        # Similar to test_raw_binary but test with different data size
+        model = "onnx_zero_1_float32"
+        input_bytes = np.arange(32, dtype=np.float32).tobytes()
+        self._raw_binary_helper(model, input_bytes, input_bytes)
+
+    def test_byte(self):
+        # Select model that satisfies constraints for raw binary request
+        # i.e. BYTE type the element count must be 1
+        model = "onnx_zero_1_object_1_element"
+        input = "427"
+        headers = {"Inference-Header-Content-Length": "0"}
+        r = requests.post(self._get_infer_url(model), data=input, headers=headers)
+        r.raise_for_status()
+
+        # Get the inference header size so we can locate the output binary data
+        header_size = int(r.headers["Inference-Header-Content-Length"])
+        # Triton returns BYTES tensor with byte size prepended
+        output = r.content[header_size + 4 :].decode()
+        self.assertEqual(
+            input,
+            output,
+            "Expected response body contains correct output binary data: {}; got: {}".format(
+                input, output
+            ),
+        )
+
+    def test_byte_too_many_elements(self):
+        # Select model that doesn't satisfy constraints for raw binary request
+        # i.e. BYTE type the element count must be 1
+        model = "onnx_zero_1_object"
+        input = "427"
+        headers = {"Inference-Header-Content-Length": "0"}
+        r = requests.post(self._get_infer_url(model), data=input, headers=headers)
+        self.assertEqual(
+            400,
+            r.status_code,
+            "Expected error code {} returned for the request; got: {}".format(
+                400, r.status_code
+            ),
+        )
+        self.assertIn(
+            "For BYTE datatype raw input 'INPUT0', the model must have input shape [1]",
+            r.content.decode(),
+        )
+
+    def test_multi_variable_dimensions(self):
+        # Select model that doesn't satisfy constraints for raw binary request
+        # i.e. this model has multiple variable-sized dimensions
+        model = "onnx_zero_1_float16"
+        input = np.ones([2, 2], dtype=np.float16)
+        headers = {"Inference-Header-Content-Length": "0"}
+        r = requests.post(
+            self._get_infer_url(model), data=input.tobytes(), headers=headers
+        )
+        self.assertEqual(
+            400,
+            r.status_code,
+            "Expected error code {} returned for the request; got: {}".format(
+                400, r.status_code
+            ),
+        )
+        self.assertIn(
+            "The shape of the raw input 'INPUT0' can not be deduced because there are more than one variable-sized dimension",
+            r.content.decode(),
+        )
+
+    def test_multi_inputs(self):
+        # Select model that doesn't satisfy constraints for raw binary request
+        # i.e. input count must be 1
+        model = "onnx_zero_3_float32"
+        # Use one numpy array, after tobytes() it can be seen as three inputs
+        # each with 8 elements (this ambiguity is why this is not allowed)
+        input = np.arange(24, dtype=np.float32)
+        headers = {"Inference-Header-Content-Length": "0"}
+        r = requests.post(
+            self._get_infer_url(model), data=input.tobytes(), headers=headers
+        )
+        self.assertEqual(
+            400,
+            r.status_code,
+            "Expected error code {} returned for the request; got: {}".format(
+                400, r.status_code
+            ),
+        )
+        self.assertIn(
+            "Raw request must only have 1 input (found 1) to be deduced but got 3 inputs in",
+            r.content.decode(),
+        )
+
+    # This is to test that a properly chunk-encoded request by the caller works,
+    # though Triton does not specifically do any special chunk handling outside
+    # of underlying HTTP libraries used
+    # Future Enhancement: Test other encodings as they come up
+    def test_content_encoding_chunked_manually(self):
+        # Similar to test_raw_binary but test with extra headers
+        extra_headers = {"Transfer-Encoding": "chunked"}
+        model = "onnx_zero_1_float32"
+        input_bytes = np.arange(8, dtype=np.float32).tobytes()
+        # Encode input into a single chunk (for simplicity) following chunked
+        # encoding format: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Transfer-Encoding
+        chunk_encoded_input = b""
+        # Length of chunk in hexadecimal and line separator
+        chunk_encoded_input += f"{len(input_bytes):X}\r\n".encode("utf-8")
+        # Chunk bytes and line separator
+        chunk_encoded_input += input_bytes + b"\r\n"
+        # Final byte (0) and end message
+        chunk_encoded_input += b"0\r\n\r\n"
+        self._raw_binary_helper(model, chunk_encoded_input, input_bytes, extra_headers)
+
+    # Test that Python client rejects any "Transfer-Encoding" HTTP headers
+    # as we don't specially handle encoding requests for the user through
+    # these headers. There are special arguments exposed in the client to
+    # handle some "Content-Encoding" headers.
+    def test_content_encoding_unsupported_client(self):
+        for encoding in ["chunked", "compress", "deflate", "gzip"]:
+            with self.subTest(encoding=encoding):
+                headers = {"Transfer-Encoding": encoding}
+                np_input = np.arange(8, dtype=np.float32).reshape(1, -1)
+                model = "onnx_zero_1_float32"
+                # Setup inputs
+                inputs = []
+                inputs.append(
+                    tritonhttpclient.InferInput(
+                        "INPUT0", np_input.shape, np_to_triton_dtype(np_input.dtype)
+                    )
+                )
+                inputs[0].set_data_from_numpy(np_input)
+
+                with tritonhttpclient.InferenceServerClient("localhost:8000") as client:
+                    # Python client is expected to raise an exception to reject
+                    # 'content-encoding' HTTP headers.
+                    with self.assertRaisesRegex(
+                        InferenceServerException, "Unsupported HTTP header"
+                    ):
+                        client.infer(model_name=model, inputs=inputs, headers=headers)
+
+    def test_descriptive_status_code(self):
+        model = "onnx_zero_1_float32_queue"
+        input_bytes = np.arange(8, dtype=np.float32).tobytes()
+
+        # Send two requests to model that only queues 1 request at the maximum,
+        # Expect the second request will be rejected with HTTP status code that
+        # aligns with error detail (server unavailable).
+        t = threading.Thread(
+            target=self._raw_binary_helper, args=(model, input_bytes, input_bytes)
+        )
+        t.start()
+        time.sleep(0.5)
+        with self.assertRaises(requests.exceptions.HTTPError) as context:
+            self._raw_binary_helper(model, input_bytes, input_bytes)
+        self.assertEqual(
+            503,
+            context.exception.response.status_code,
+            "Expected error code {} returned for the request; got: {}".format(
+                503,
+                context.exception.response.status_code,
+            ),
+        )
+        t.join()
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_http/nginx.conf b/qa/L0_http/nginx.conf
new file mode 100644
index 0000000000..fb62ca719c
--- /dev/null
+++ b/qa/L0_http/nginx.conf
@@ -0,0 +1,57 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+worker_processes  1;
+
+error_log  /var/log/nginx/error.log;
+
+events {
+    worker_connections  1024;
+}
+
+http {
+    # Configure basic authentication
+    auth_basic "Restricted Content";
+    auth_basic_user_file /opt/tritonserver/qa/L0_http/pswd;
+
+    # Define upstream server
+    upstream backend {
+        server localhost:8000;
+    }
+
+    # Define server block for reverse proxy
+    server {
+        listen 8004;
+
+        # Configure location for reverse proxy
+        location / {
+            proxy_pass http://backend;
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        }
+    }
+}
diff --git a/qa/L0_http/python_http_aio_test.py b/qa/L0_http/python_http_aio_test.py
new file mode 100755
index 0000000000..bd8d342bb1
--- /dev/null
+++ b/qa/L0_http/python_http_aio_test.py
@@ -0,0 +1,116 @@
+#!/usr/bin/env python
+# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import unittest
+
+import tritonclient.http.aio as httpclient
+from tritonclient.utils import *
+
+
+class TestHttpAioClient(unittest.IsolatedAsyncioTestCase):
+    """Test if aio rpc can reach the server"""
+
+    async def asyncSetUp(self):
+        self._triton_client = httpclient.InferenceServerClient(url="localhost:8000")
+
+    async def asyncTearDown(self):
+        await self._triton_client.close()
+
+    async def test_is_server_live(self):
+        ret = await self._triton_client.is_server_live()
+        self.assertEqual(ret, True)
+
+    async def test_is_server_ready(self):
+        ret = await self._triton_client.is_server_ready()
+        self.assertEqual(ret, True)
+
+    async def test_is_model_ready(self):
+        ret = await self._triton_client.is_model_ready("simple")
+        self.assertEqual(ret, True)
+
+    async def test_get_server_metadata(self):
+        ret = await self._triton_client.get_server_metadata()
+        self.assertEqual(ret["name"], "triton")
+
+    async def test_get_model_metadata(self):
+        ret = await self._triton_client.get_model_metadata("simple")
+        self.assertEqual(ret["name"], "simple")
+
+    async def test_get_model_config(self):
+        ret = await self._triton_client.get_model_config("simple")
+        self.assertEqual(ret["name"], "simple")
+
+    async def test_get_model_repository_index(self):
+        ret = await self._triton_client.get_model_repository_index()
+        self.assertEqual(len(ret), 7)
+
+    async def test_load_model(self):
+        with self.assertRaisesRegex(
+            InferenceServerException,
+            "explicit model load / unload is not allowed if polling is enabled",
+        ):
+            await self._triton_client.load_model("simple")
+
+    async def test_unload_model(self):
+        with self.assertRaisesRegex(
+            InferenceServerException,
+            "explicit model load / unload is not allowed if polling is enabled",
+        ):
+            await self._triton_client.load_model("simple")
+
+    async def test_get_inference_statistics(self):
+        await self._triton_client.get_inference_statistics()
+
+    async def test_update_trace_settings(self):
+        await self._triton_client.update_trace_settings()
+
+    async def test_get_trace_settings(self):
+        await self._triton_client.get_trace_settings()
+
+    async def test_get_system_shared_memory_status(self):
+        await self._triton_client.get_system_shared_memory_status()
+
+    async def test_register_system_shared_memory(self):
+        with self.assertRaisesRegex(InferenceServerException, ""):
+            await self._triton_client.register_system_shared_memory("", "", 0)
+
+    async def test_unregister_system_shared_memory(self):
+        await self._triton_client.unregister_system_shared_memory()
+
+    async def test_get_cuda_shared_memory_status(self):
+        await self._triton_client.get_cuda_shared_memory_status()
+
+    async def test_register_cuda_shared_memory(self):
+        with self.assertRaisesRegex(InferenceServerException, ""):
+            await self._triton_client.register_cuda_shared_memory("", b"", 0, 0)
+
+    async def test_unregister_cuda_shared_memory(self):
+        await self._triton_client.unregister_cuda_shared_memory()
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_http/test.sh b/qa/L0_http/test.sh
new file mode 100755
index 0000000000..2b78305452
--- /dev/null
+++ b/qa/L0_http/test.sh
@@ -0,0 +1,758 @@
+#!/bin/bash
+# Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+RET=0
+
+CLIENT_PLUGIN_TEST="./http_client_plugin_test.py"
+BASIC_AUTH_TEST="./http_basic_auth_test.py"
+RESTRICTED_API_TEST="./http_restricted_api_test.py"
+NGINX_CONF="./nginx.conf"
+# On windows the paths invoked by the script (running in WSL) must use
+# /mnt/c when needed but the paths on the tritonserver command-line
+# must be C:/ style.
+if [[ "$(< /proc/sys/kernel/osrelease)" == *microsoft* ]]; then
+    SDKDIR=${SDKDIR:=C:/sdk}
+    MODELDIR=${MODELDIR:=C:/models}
+    DATADIR=${DATADIR:="/mnt/c/data/inferenceserver/${REPO_VERSION}"}
+    BACKEND_DIR=${BACKEND_DIR:=C:/tritonserver/backends}
+    SERVER=${SERVER:=/mnt/c/tritonserver/bin/tritonserver.exe}
+
+    SIMPLE_AIO_INFER_CLIENT_PY=${SDKDIR}/python/simple_http_aio_infer_client.py
+    SIMPLE_HEALTH_CLIENT_PY=${SDKDIR}/python/simple_http_health_metadata.py
+    SIMPLE_INFER_CLIENT_PY=${SDKDIR}/python/simple_http_infer_client.py
+    SIMPLE_ASYNC_INFER_CLIENT_PY=${SDKDIR}/python/simple_http_async_infer_client.py
+    SIMPLE_STRING_INFER_CLIENT_PY=${SDKDIR}/python/simple_http_string_infer_client.py
+    SIMPLE_IMAGE_CLIENT_PY=${SDKDIR}/python/image_client.py
+    # SIMPLE_ENSEMBLE_IMAGE_CLIENT_PY=${SDKDIR}/python/ensemble_image_client.py
+    SIMPLE_SHM_STRING_CLIENT_PY=${SDKDIR}/python/simple_http_shm_string_client.py
+    SIMPLE_SHM_CLIENT_PY=${SDKDIR}/python/simple_http_shm_client.py
+    SIMPLE_CUDASHM_CLIENT_PY=${SDKDIR}/python/simple_http_cudashm_client.py
+    SIMPLE_MODEL_CONTROL_PY=${SDKDIR}/python/simple_http_model_control.py
+    SIMPLE_SEQUENCE_INFER_CLIENT_PY=${SDKDIR}/python/simple_http_sequence_sync_infer_client.py
+    SIMPLE_REUSE_INFER_OBJECTS_CLIENT_PY=${SDKDIR}/python/reuse_infer_objects_client.py
+
+    SIMPLE_HEALTH_CLIENT=${SDKDIR}/python/simple_http_health_metadata
+    SIMPLE_INFER_CLIENT=${SDKDIR}/python/simple_http_infer_client
+    SIMPLE_STRING_INFER_CLIENT=${SDKDIR}/python/simple_http_string_infer_client
+    SIMPLE_ASYNC_INFER_CLIENT=${SDKDIR}/python/simple_http_async_infer_client
+    SIMPLE_MODEL_CONTROL=${SDKDIR}/python/simple_http_model_control
+    SIMPLE_SEQUENCE_INFER_CLIENT=${SDKDIR}/python/simple_http_sequence_sync_infer_client
+    SIMPLE_SHM_CLIENT=${SDKDIR}/python/simple_http_shm_client
+    SIMPLE_CUDASHM_CLIENT=${SDKDIR}/python/simple_http_cudashm_client
+    SIMPLE_REUSE_INFER_OBJECTS_CLIENT=${SDKDIR}/python/reuse_infer_objects_client
+    # [FIXME] point to proper client
+    CC_UNIT_TEST=${SDKDIR}/python/cc_client_test
+else
+    MODELDIR=${MODELDIR:=`pwd`/models}
+    DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"}
+    TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
+    SERVER=${TRITON_DIR}/bin/tritonserver
+    BACKEND_DIR=${TRITON_DIR}/backends
+
+    SIMPLE_AIO_INFER_CLIENT_PY=../clients/simple_http_aio_infer_client.py
+    SIMPLE_HEALTH_CLIENT_PY=../clients/simple_http_health_metadata.py
+    SIMPLE_INFER_CLIENT_PY=../clients/simple_http_infer_client.py
+    SIMPLE_ASYNC_INFER_CLIENT_PY=../clients/simple_http_async_infer_client.py
+    SIMPLE_STRING_INFER_CLIENT_PY=../clients/simple_http_string_infer_client.py
+    SIMPLE_IMAGE_CLIENT_PY=../clients/image_client.py
+    # SIMPLE_ENSEMBLE_IMAGE_CLIENT_PY=../clients/ensemble_image_client.py
+    SIMPLE_SHM_STRING_CLIENT_PY=../clients/simple_http_shm_string_client.py
+    SIMPLE_SHM_CLIENT_PY=../clients/simple_http_shm_client.py
+    SIMPLE_CUDASHM_CLIENT_PY=../clients/simple_http_cudashm_client.py
+    SIMPLE_MODEL_CONTROL_PY=../clients/simple_http_model_control.py
+    SIMPLE_SEQUENCE_INFER_CLIENT_PY=../clients/simple_http_sequence_sync_infer_client.py
+    SIMPLE_REUSE_INFER_OBJECTS_CLIENT_PY=../clients/reuse_infer_objects_client.py
+
+    SIMPLE_HEALTH_CLIENT=../clients/simple_http_health_metadata
+    SIMPLE_INFER_CLIENT=../clients/simple_http_infer_client
+    SIMPLE_STRING_INFER_CLIENT=../clients/simple_http_string_infer_client
+    SIMPLE_ASYNC_INFER_CLIENT=../clients/simple_http_async_infer_client
+    SIMPLE_MODEL_CONTROL=../clients/simple_http_model_control
+    SIMPLE_SEQUENCE_INFER_CLIENT=../clients/simple_http_sequence_sync_infer_client
+    SIMPLE_SHM_CLIENT=../clients/simple_http_shm_client
+    SIMPLE_CUDASHM_CLIENT=../clients/simple_http_cudashm_client
+    SIMPLE_REUSE_INFER_OBJECTS_CLIENT=../clients/reuse_infer_objects_client
+    CC_UNIT_TEST=../clients/cc_client_test
+fi
+
+# Add string_dyna_sequence model to repo
+cp -r ${MODELDIR}/simple_dyna_sequence ${MODELDIR}/simple_string_dyna_sequence
+sed -i "s/simple_dyna_sequence/simple_string_dyna_sequence/g" ${MODELDIR}/simple_string_dyna_sequence/config.pbtxt
+sed -i "s/^platform: .*/backend: \"dyna_sequence\"/g" ${MODELDIR}/simple_string_dyna_sequence/config.pbtxt
+sed -i "/CONTROL_SEQUENCE_CORRID/{n;s/data_type:.*/data_type: TYPE_STRING/}" ${MODELDIR}/simple_string_dyna_sequence/config.pbtxt
+rm -f ${MODELDIR}/simple_string_dyna_sequence/1/model.graphdef
+cp ../custom_models/custom_dyna_sequence_int32/1/libtriton_dyna_sequence.so ${MODELDIR}/simple_string_dyna_sequence/1/
+
+rm -f *.log
+rm -f *.log.*
+
+set -e
+
+CLIENT_LOG=`pwd`/client.log
+SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${MODELDIR}"
+source ../common/util.sh
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+# Test health
+python $SIMPLE_HEALTH_CLIENT_PY -v >> ${CLIENT_LOG}.health 2>&1
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}.health
+    RET=1
+fi
+
+IMAGE=../images/vulture.jpeg
+for i in \
+        $SIMPLE_AIO_INFER_CLIENT_PY \
+        $SIMPLE_INFER_CLIENT_PY \
+        $SIMPLE_ASYNC_INFER_CLIENT_PY \
+        $SIMPLE_IMAGE_CLIENT_PY \
+        $SIMPLE_ENSEMBLE_IMAGE_CLIENT_PY \
+        $SIMPLE_SHM_STRING_CLIENT_PY \
+        $SIMPLE_SHM_CLIENT_PY \
+        $SIMPLE_CUDASHM_CLIENT_PY \
+        $SIMPLE_STRING_INFER_CLIENT_PY \
+        $SIMPLE_SEQUENCE_INFER_CLIENT_PY \
+        ; do
+    BASE=$(basename -- $i)
+    SUFFIX="${BASE%.*}"
+    if [ $SUFFIX == "image_client" ]; then
+        python $i -m inception_graphdef -s INCEPTION -a -c 1 -b 1 $IMAGE >> "${CLIENT_LOG}.async.${SUFFIX}" 2>&1
+        if [ `grep -c VULTURE ${CLIENT_LOG}.async.${SUFFIX}` != "1" ]; then
+            echo -e "\n***\n*** Failed. Expected 1 VULTURE results\n***"
+            cat $CLIENT_LOG.async.${SUFFIX}
+            RET=1
+        fi
+        python $i -m inception_graphdef -s INCEPTION -c 1 -b 1 $IMAGE >> "${CLIENT_LOG}.${SUFFIX}" 2>&1
+        if [ `grep -c VULTURE ${CLIENT_LOG}.${SUFFIX}` != "1" ]; then
+            echo -e "\n***\n*** Failed. Expected 1 VULTURE results\n***"
+            cat $CLIENT_LOG.${SUFFIX}
+            RET=1
+        fi
+    # elif [ $SUFFIX == "ensemble_image_client" ]; then
+    #     python $i -c 1 ../images >> "${CLIENT_LOG}.${SUFFIX}" 2>&1
+    #     for result in "SPORTS CAR" "COFFEE MUG" "VULTURE"; do
+    #         if [ `grep -c "$result" ${CLIENT_LOG}.${SUFFIX}` != "1" ]; then
+    #             echo -e "\n***\n*** Failed. Expected 1 $result result\n***"
+    #             RET=1
+    #         fi
+    #     done
+    else
+        python $i -v >> "${CLIENT_LOG}.${SUFFIX}" 2>&1
+    fi
+
+    if [ $? -ne 0 ]; then
+        cat "${CLIENT_LOG}.${SUFFIX}"
+        RET=1
+    fi
+done
+
+# Test while reusing the InferInput and InferRequestedOutput objects
+$SIMPLE_REUSE_INFER_OBJECTS_CLIENT_PY -v >> ${CLIENT_LOG}.reuse 2>&1
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}.reuse
+    RET=1
+fi
+
+# Test with the base path in url.
+$SIMPLE_INFER_CLIENT_PY -u localhost:8000/base_path -v >> ${CLIENT_LOG}.base_path_url 2>&1
+if [ $? -eq 0 ]; then
+    cat ${CLIENT_LOG}.base_path_url
+    RET=1
+fi
+if [ $(cat ${CLIENT_LOG}.base_path_url | grep "POST /base_path/v2/models/simple/infer" | wc -l) -eq 0 ]; then
+    cat ${CLIENT_LOG}.base_path_url
+    RET=1
+fi
+
+for i in \
+   $SIMPLE_INFER_CLIENT \
+   $SIMPLE_STRING_INFER_CLIENT \
+   $SIMPLE_ASYNC_INFER_CLIENT \
+   $SIMPLE_HEALTH_CLIENT \
+   $SIMPLE_SHM_CLIENT \
+   $SIMPLE_CUDASHM_CLIENT \
+   $SIMPLE_SEQUENCE_INFER_CLIENT \
+   ; do
+   BASE=$(basename -- $i)
+   SUFFIX="${BASE%.*}"
+
+    $i -v -H test:1 >> ${CLIENT_LOG}.c++.${SUFFIX} 2>&1
+    if [ $? -ne 0 ]; then
+        cat ${CLIENT_LOG}.c++.${SUFFIX}
+        RET=1
+    fi
+done
+
+# Test with json input and output data
+$SIMPLE_STRING_INFER_CLIENT --json-input-data --json-output-data >> ${CLIENT_LOG}.c++.json 2>&1
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}.c++.json
+    RET=1
+fi
+
+# Test while reusing the InferInput and InferRequestedOutput objects
+$SIMPLE_REUSE_INFER_OBJECTS_CLIENT -v >> ${CLIENT_LOG}.c++.reuse 2>&1
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}.c++.reuse
+    RET=1
+fi
+
+python $CLIENT_PLUGIN_TEST >> ${CLIENT_LOG}.python.plugin 2>&1
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}.python.plugin
+    RET=1
+fi
+
+# Create a password file with username:password
+echo -n 'username:' > pswd
+echo "password" | openssl passwd -stdin -apr1 >> pswd
+nginx -c `pwd`/$NGINX_CONF
+
+python $BASIC_AUTH_TEST
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}.python.plugin.auth
+    RET=1
+fi
+service nginx stop
+
+# Test with the base path in url.
+$SIMPLE_INFER_CLIENT -u localhost:8000/base_path -v >> ${CLIENT_LOG}.c++.base_path_url 2>&1
+if [ $? -eq 0 ]; then
+    cat ${CLIENT_LOG}.c++.base_path_url
+    RET=1
+fi
+if [ $(cat ${CLIENT_LOG}.c++.base_path_url | grep "POST /base_path/v2/models/simple/infer" | wc -l) -eq 0 ]; then
+    cat ${CLIENT_LOG}.c++.base_path_url
+    RET=1
+fi
+
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${MODELDIR} --model-control-mode=explicit"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+# Test Model Control API
+python $SIMPLE_MODEL_CONTROL_PY -v >> ${CLIENT_LOG}.model_control 2>&1
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}.model_control
+    RET=1
+fi
+
+if [ $(cat ${CLIENT_LOG}.model_control | grep "PASS" | wc -l) -ne 1 ]; then
+    cat ${CLIENT_LOG}.model_control
+    RET=1
+fi
+if [ $(cat ${SERVER_LOG} | grep "Invalid config override" | wc -l) -eq 0 ]; then
+    cat ${SERVER_LOG}
+    RET=1
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${MODELDIR} --model-control-mode=explicit"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+# Test Model Control API
+$SIMPLE_MODEL_CONTROL -v >> ${CLIENT_LOG}.c++.model_control 2>&1
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}.c++.model_control
+    RET=1
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Test with dynamic sequence models
+SERVER_ARGS="--model-repository=`pwd`/models"
+SERVER_LOG="./inference_server_dyna.log"
+CLIENT_LOG="./client_dyna.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+set +e
+
+for i in \
+    $SIMPLE_SEQUENCE_INFER_CLIENT \
+    $SIMPLE_SEQUENCE_INFER_CLIENT_PY; do
+
+    $i -v -d >>$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        RET=1
+    fi
+done
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Test combinations of binary and JSON data
+SERVER_ARGS="--model-repository=`pwd`/models"
+SERVER_LOG="./inference_server_binaryjson.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+# no parameters, no outputs == json output
+rm -f ./curl.out
+set +e
+code=`curl -s -w %{http_code} -o ./curl.out -d'{"inputs":[{"name":"INPUT0","datatype":"INT32","shape":[1,16],"data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},{"name":"INPUT1","datatype":"INT32","shape":[1,16],"data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]}]}' localhost:8000/v2/models/simple/infer`
+set -e
+if [ "$code" != "200" ]; then
+    cat ./curl.out
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+if [ `grep -c "\[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32\]" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0\]" ./curl.out` != "1" ]; then
+    RET=1
+fi
+
+# binary_data=true on INPUT0, binary_data=false on INPUT1
+rm -f ./curl.out
+set +e
+code=`curl -s -w %{http_code} -o ./curl.out -d'{"inputs":[{"name":"INPUT0","datatype":"INT32","shape":[1,16],"data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},{"name":"INPUT1","datatype":"INT32","shape":[1,16],"data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]}],"outputs":[{"name":"OUTPUT0","parameters":{"binary_data":true}},{"name":"OUTPUT1","parameters":{"binary_data":false}}]}' localhost:8000/v2/models/simple/infer`
+set -e
+if [ "$code" != "200" ]; then
+    cat ./curl.out
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+if [ `grep -c "\[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32\]" ./curl.out` != "0" ]; then
+    RET=1
+fi
+if [ `grep -c "\[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0\]" ./curl.out` != "1" ]; then
+    RET=1
+fi
+
+# binary_data=true on INPUT0, binary_data not given on INPUT1
+rm -f ./curl.out
+set +e
+code=`curl -s -w %{http_code} -o ./curl.out -d'{"inputs":[{"name":"INPUT0","datatype":"INT32","shape":[1,16],"data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},{"name":"INPUT1","datatype":"INT32","shape":[1,16],"data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]}],"outputs":[{"name":"OUTPUT0","parameters":{"binary_data":true}},{"name":"OUTPUT1"}]}' localhost:8000/v2/models/simple/infer`
+set -e
+if [ "$code" != "200" ]; then
+    cat ./curl.out
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+if [ `grep -c "\[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32\]" ./curl.out` != "0" ]; then
+    RET=1
+fi
+if [ `grep -c "\[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0\]" ./curl.out` != "1" ]; then
+    RET=1
+fi
+
+# binary_data_output=true, no outputs requested
+rm -f ./curl.out
+set +e
+code=`curl -s -w %{http_code} -o ./curl.out -d'{"parameters":{"binary_data_output":true},"inputs":[{"name":"INPUT0","datatype":"INT32","shape":[1,16],"data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},{"name":"INPUT1","datatype":"INT32","shape":[1,16],"data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]}]}' localhost:8000/v2/models/simple/infer`
+set -e
+if [ "$code" != "200" ]; then
+    cat ./curl.out
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+if [ `grep -c "\[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32\]" ./curl.out` != "0" ]; then
+    RET=1
+fi
+if [ `grep -c "\[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0\]" ./curl.out` != "0" ]; then
+    RET=1
+fi
+
+# binary_data_output=true
+# binary_data=false on INPUT0, binary_data not given on INPUT1
+rm -f ./curl.out
+set +e
+code=`curl -s -w %{http_code} -o ./curl.out -d'{"parameters":{"binary_data_output":true},"inputs":[{"name":"INPUT0","datatype":"INT32","shape":[1,16],"data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},{"name":"INPUT1","datatype":"INT32","shape":[1,16],"data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]}],"outputs":[{"name":"OUTPUT0","parameters":{"binary_data":false}},{"name":"OUTPUT1"}]}' localhost:8000/v2/models/simple/infer`
+set -e
+if [ "$code" != "200" ]; then
+    cat ./curl.out
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+if [ `grep -c "\[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32\]" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0\]" ./curl.out` != "1" ]; then
+    RET=1
+fi
+
+# Send bad request where the 'data' field misaligns with the 'shape' field of the input
+rm -f ./curl.out
+set +e
+code=`curl -s -w %{http_code} -o ./curl.out -d'{"inputs":[{"name":"INPUT0","datatype":"INT32","shape":[1,16],"data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]}]}' localhost:8000/v2/models/simple/infer`
+set -e
+if [ "$code" == "200" ]; then
+    cat ./curl.out
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+if [ `grep -c "\{\"error\":\"Unable to parse 'data': Shape does not match true shape of 'data' field\"\}" ./curl.out` != "1" ]; then
+    RET=1
+fi
+
+rm -f ./curl.out
+set +e
+code=`curl -s -w %{http_code} -o ./curl.out -d'{"inputs":[{"name":"INPUT0","datatype":"INT32","shape":[1,16],"data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18]}]}' localhost:8000/v2/models/simple/infer`
+set -e
+if [ "$code" == "200" ]; then
+    cat ./curl.out
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+if [ `grep -c "\{\"error\":\"Unable to parse 'data': Shape does not match true shape of 'data' field\"\}" ./curl.out` != "1" ]; then
+    RET=1
+fi
+
+# Check if the server is still working after the above bad requests
+rm -f ./curl.out
+set +e
+code=`curl -s -w %{http_code} -o ./curl.out -d'{"inputs":[{"name":"INPUT0","datatype":"INT32","shape":[1,16],"data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},{"name":"INPUT1","datatype":"INT32","shape":[1,16],"data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]}]}' localhost:8000/v2/models/simple/infer`
+set -e
+if [ "$code" != "200" ]; then
+    cat ./curl.out
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+if [ `grep -c "\[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32\]" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0\]" ./curl.out` != "1" ]; then
+    RET=1
+fi
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Run cpp client unit test
+rm -rf unit_test_models && mkdir unit_test_models
+cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32 unit_test_models/.
+cp -r ${MODELDIR}/simple unit_test_models/.
+
+SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=unit_test_models
+            --trace-file=global_unittest.log --trace-level=TIMESTAMPS --trace-rate=1"
+SERVER_LOG="./inference_server_cc_unit_test.log"
+CLIENT_LOG="./cc_unit_test.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+# Run all unit tests except load
+$CC_UNIT_TEST --gtest_filter=HTTP*:-*Load* >> ${CLIENT_LOG} 2>&1
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}
+    RET=1
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Run cpp client load API unit test
+rm -rf unit_test_models && mkdir unit_test_models
+cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32 unit_test_models/.
+# Make only version 2, 3 is valid version directory while config requests 1, 3
+rm -rf unit_test_models/onnx_int32_int32_int32/1
+
+# Start with EXPLICIT mode and load onnx_float32_float32_float32
+SERVER_ARGS="--model-repository=`pwd`/unit_test_models \
+             --model-control-mode=explicit \
+             --load-model=onnx_int32_int32_int32 \
+             --strict-model-config=false"
+SERVER_LOG="./inference_server_cc_unit_test.load.log"
+CLIENT_LOG="./cc_unit_test.load.log"
+
+for i in \
+   "LoadWithFileOverride" \
+   "LoadWithConfigOverride" \
+   ; do
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    set +e
+    $CC_UNIT_TEST --gtest_filter=HTTP*$i >> ${CLIENT_LOG}.$i 2>&1
+    if [ $? -ne 0 ]; then
+        cat ${CLIENT_LOG}.$i
+        RET=1
+    fi
+    set -e
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+done
+
+# Run python http aio unit test
+PYTHON_HTTP_AIO_TEST=python_http_aio_test.py
+CLIENT_LOG=`pwd`/python_http_aio_test.log
+SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${MODELDIR}"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+set +e
+python $PYTHON_HTTP_AIO_TEST > $CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Python HTTP AsyncIO Test Failed\n***"
+    RET=1
+fi
+set -e
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Run python unit test
+MODELDIR=python_unit_test_models
+mkdir -p $MODELDIR
+rm -rf ${MODELDIR}/*
+cp -r $DATADIR/qa_identity_model_repository/onnx_zero_1_float32 ${MODELDIR}/.
+cp -r $DATADIR/qa_identity_model_repository/onnx_zero_1_object ${MODELDIR}/.
+cp -r $DATADIR/qa_identity_model_repository/onnx_zero_1_float16 ${MODELDIR}/.
+cp -r $DATADIR/qa_identity_model_repository/onnx_zero_3_float32 ${MODELDIR}/.
+cp -r ${MODELDIR}/onnx_zero_1_object ${MODELDIR}/onnx_zero_1_object_1_element && \
+    (cd $MODELDIR/onnx_zero_1_object_1_element && \
+        sed -i "s/onnx_zero_1_object/onnx_zero_1_object_1_element/" config.pbtxt && \
+        sed -i "0,/-1/{s/-1/1/}" config.pbtxt)
+# Model for error code test
+cp -r ${MODELDIR}/onnx_zero_1_float32 ${MODELDIR}/onnx_zero_1_float32_queue && \
+    (cd $MODELDIR/onnx_zero_1_float32_queue && \
+        sed -i "s/onnx_zero_1_float32/onnx_zero_1_float32_queue/" config.pbtxt && \
+        echo "dynamic_batching { " >> config.pbtxt && \
+        echo "    max_queue_delay_microseconds: 1000000" >> config.pbtxt && \
+        echo "    preferred_batch_size: [ 8 ]" >> config.pbtxt && \
+        echo "    default_queue_policy {" >> config.pbtxt && \
+        echo "        max_queue_size: 1" >> config.pbtxt && \
+        echo "    }" >> config.pbtxt && \
+        echo "}" >> config.pbtxt)
+
+SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${MODELDIR}"
+SERVER_LOG="./inference_server_http_test.log"
+CLIENT_LOG="./http_test.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+TEST_RESULT_FILE='test_results.txt'
+PYTHON_TEST=http_test.py
+EXPECTED_NUM_TESTS=9
+set +e
+python $PYTHON_TEST >$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+### LLM / Generate REST API Endpoint Tests ###
+
+# Helper library to parse SSE events
+# https://github.com/mpetazzoni/sseclient
+pip install sseclient-py
+
+SERVER_ARGS="--model-repository=`pwd`/generate_models"
+SERVER_LOG="./inference_server_generate_endpoint_test.log"
+CLIENT_LOG="./generate_endpoint_test.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+## Python Unit Tests
+TEST_RESULT_FILE='test_results.txt'
+PYTHON_TEST=generate_endpoint_test.py
+EXPECTED_NUM_TESTS=15
+set +e
+python $PYTHON_TEST >$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+### Test Restricted  APIs ###
+### Repeated API not allowed
+
+MODELDIR="`pwd`/models"
+SERVER_ARGS="--model-repository=${MODELDIR}
+             --http-restricted-api=model-repository,health:k1=v1 \
+             --http-restricted-api=metadata,health:k2=v2"
+SERVER_LOG="./http_restricted_endpoint_test.log"
+CLIENT_LOG="./http_restricted_endpoint_test.log"
+run_server
+EXPECTED_MSG="api 'health' can not be specified in multiple config groups"
+if [ "$SERVER_PID" != "0" ]; then
+    echo -e "\n***\n*** Expect fail to start $SERVER\n***"
+    kill $SERVER_PID
+    wait $SERVER_PID
+    RET=1
+elif [ `grep -c "${EXPECTED_MSG}" ${SERVER_LOG}` != "1" ]; then
+    echo -e "\n***\n*** Failed. Expected ${EXPECTED_MSG} to be found in log\n***"
+    cat $SERVER_LOG
+    RET=1
+fi
+
+### Test Unknown Restricted  API###
+### Unknown API not allowed
+
+MODELDIR="`pwd`/models"
+SERVER_ARGS="--model-repository=${MODELDIR}
+             --http-restricted-api=model-reposit,health:k1=v1 \
+             --http-restricted-api=metadata,health:k2=v2"
+run_server
+EXPECTED_MSG="unknown restricted api 'model-reposit'"
+if [ "$SERVER_PID" != "0" ]; then
+    echo -e "\n***\n*** Expect fail to start $SERVER\n***"
+    kill $SERVER_PID
+    wait $SERVER_PID
+    RET=1
+elif [ `grep -c "${EXPECTED_MSG}" ${SERVER_LOG}` != "1" ]; then
+    echo -e "\n***\n*** Failed. Expected ${EXPECTED_MSG} to be found in log\n***"
+    cat $SERVER_LOG
+    RET=1
+fi
+
+### Test Restricted  APIs ###
+### Restricted model-repository, metadata, and inference
+
+SERVER_ARGS="--model-repository=${MODELDIR} \
+             --http-restricted-api=model-repository:admin-key=admin-value \
+             --http-restricted-api=inference,metadata:infer-key=infer-value"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+set +e
+
+python $RESTRICTED_API_TEST RestrictedAPITest > $CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Python HTTP Restricted Protocol Test Failed\n***"
+    RET=1
+fi
+set -e
+kill $SERVER_PID
+wait $SERVER_PID
+
+###
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_http_fuzz/fuzztest.py b/qa/L0_http_fuzz/fuzztest.py
new file mode 100755
index 0000000000..8e84ffffc7
--- /dev/null
+++ b/qa/L0_http_fuzz/fuzztest.py
@@ -0,0 +1,110 @@
+#!/usr/bin/env python3
+
+# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import glob
+import os
+import sqlite3
+import unittest
+
+import test_util as tu
+from boofuzz import *
+
+
+class FuzzTest(tu.TestResultCollector):
+    def _run_fuzz(self, url, logger):
+        session = Session(
+            target=Target(connection=TCPSocketConnection("127.0.0.1", 8000)),
+            fuzz_loggers=logger,
+            keep_web_open=False,
+        )
+
+        s_initialize(name="Request" + url)
+        with s_block("Request-Line"):
+            s_group(
+                "Method",
+                ["GET", "HEAD", "POST", "PUT", "DELETE", "CONNECT", "OPTIONS", "TRACE"],
+            )
+            s_delim(" ", name="space-1")
+            s_string(url, name="Request-URI")
+            s_delim(" ", name="space-2")
+            s_string("HTTP/1.1", name="HTTP-Version")
+            s_static("\r\n", name="Request-Line-CRLF")
+        s_static("\r\n", "Request-CRLF")
+
+        session.connect(s_get("Request" + url))
+        session.fuzz()
+
+    def test_failures_from_db(self):
+        url_list = [
+            "/v2",
+            "/v2/models/simple",
+            "/v2/models/simple/infer",
+            "/v2/models/simple/versions/v1",
+            "/v2/models/simple/config",
+            "/v2/models/simple/stats",
+            "/v2/models/simple/ready",
+            "/v2/health/ready",
+            "/v2/health/live",
+            "/v2/repository/index",
+            "/v2/repository/models/simple/unload",
+            "/v2/repository/models/simple/load",
+            "/v2/systemsharedmemory/status",
+            "/v2/systemsharedmemory/register",
+            "/v2/systemsharedmemory/unregister",
+            "/v2/systemsharedmemory/region/xx/status",
+            "/v2/cudasharedmemory/status",
+            "/v2/cudasharedmemory/register",
+            "/v2/cudasharedmemory/unregister",
+            "/v2/cudasharedmemory/region/xx/status",
+        ]
+
+        csv_log = open("fuzz_results.csv", "w")
+        logger = [FuzzLoggerCsv(file_handle=csv_log)]
+
+        for url in url_list:
+            self._run_fuzz(url, logger)
+
+            # Get latest db file
+            files = glob.glob("boofuzz-results/*")
+            dbfile = max(files, key=os.path.getctime)
+
+            conn = sqlite3.connect(dbfile)
+            c = conn.cursor()
+
+            # Get number of failures, should be 0
+            self.assertEqual(
+                len([x for x in c.execute('SELECT * FROM steps WHERE type="fail"')]), 0
+            )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_http_fuzz/test.sh b/qa/L0_http_fuzz/test.sh
new file mode 100755
index 0000000000..f721135698
--- /dev/null
+++ b/qa/L0_http_fuzz/test.sh
@@ -0,0 +1,107 @@
+#!/bin/bash
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+TEST_RESULT_FILE='test_results.txt'
+RET=0
+rm -f *.log *.db
+EXPECTED_NUM_TESTS="1"
+
+mkdir -p models
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository/savedmodel_zero_1_object models/
+
+FUZZTEST=fuzztest.py
+FUZZ_LOG=`pwd`/fuzz.log
+DATADIR=`pwd`/models
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=$DATADIR"
+source ../common/util.sh
+
+# Remove this once foobuzz and tornado packages upgrade to work with python 3.10
+# This test tests the server's ability to handle poor input and not the compatibility
+# with python 3.10. Python 3.8 is ok to use here.
+function_install_python38() {
+    source ../L0_backend_python/common.sh
+    install_conda
+    create_conda_env "3.8" "python-3-8"
+
+    # Install test script dependencies
+    pip3 install --upgrade wheel setuptools boofuzz==0.3.0 numpy pillow attrdict future grpcio requests gsutil \
+                            awscli six grpcio-channelz prettytable virtualenv
+}
+function_install_python38
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+# Test health
+python3 $FUZZTEST -v >> ${FUZZ_LOG} 2>&1
+if [ $? -ne 0 ]; then
+    cat ${FUZZ_LOG}
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+    if [ $? -ne 0 ]; then
+        cat $TEST_RESULT_FILE
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_https/nginx.conf b/qa/L0_https/nginx.conf
new file mode 100644
index 0000000000..e3a78b14e1
--- /dev/null
+++ b/qa/L0_https/nginx.conf
@@ -0,0 +1,38 @@
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+server {
+   listen 443 ssl;
+   server_name localhost;
+
+   ssl_certificate /etc/nginx/cert.crt;
+   ssl_certificate_key /etc/nginx/cert.key;
+
+    location / {
+              proxy_pass http://localhost:8000;
+              proxy_http_version 1.1;
+              }
+}
diff --git a/qa/L0_https/test.sh b/qa/L0_https/test.sh
new file mode 100755
index 0000000000..2c030332e5
--- /dev/null
+++ b/qa/L0_https/test.sh
@@ -0,0 +1,201 @@
+#!/bin/bash
+# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+RET=0
+
+SIMPLE_AIO_INFER_CLIENT_PY=../clients/simple_http_aio_infer_client.py
+SIMPLE_INFER_CLIENT_PY=../clients/simple_http_infer_client.py
+TEST_CLIENT=../clients/simple_http_infer_client
+
+NGINX_CONF=`pwd`/nginx.conf
+CLIENT_LOG=`pwd`/client.log
+DATADIR=`pwd`/models
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=$DATADIR"
+source ../common/util.sh
+
+rm -f *.key *.crt ${CLIENT_LOG}.* server.log
+
+# Generate valid CA
+openssl genrsa -passout pass:1234 -des3 -out ca.key 4096
+openssl req -passin pass:1234 -new -x509 -days 365 -key ca.key -out ca.crt -subj  "/C=SP/ST=Spain/L=Valdepenias/O=Test/OU=Test/CN=Root CA"
+
+# Generate valid Server Key/Cert
+openssl genrsa -passout pass:1234 -des3 -out server.key 4096
+openssl req -passin pass:1234 -new -key server.key -out server.csr -subj  "/C=SP/ST=Spain/L=Valdepenias/O=Test/OU=Server/CN=localhost"
+openssl x509 -req -passin pass:1234 -days 365 -in server.csr -CA ca.crt -CAkey ca.key -set_serial 01 -out server.crt
+
+# Remove passphrase from the Server Key
+openssl rsa -passin pass:1234 -in server.key -out server.key
+
+# Generate valid Client Key/Cert
+openssl genrsa -passout pass:1234 -des3 -out client.key 4096
+openssl req -passin pass:1234 -new -key client.key -out client.csr -subj  "/C=SP/ST=Spain/L=Valdepenias/O=Test/OU=Client/CN=localhost"
+openssl x509 -passin pass:1234 -req -days 365 -in client.csr -CA ca.crt -CAkey ca.key -set_serial 01 -out client.crt
+
+# Remove passphrase from Client Key
+openssl rsa -passin pass:1234 -in client.key -out client.key
+
+# Create mutated client key (Make first char of each like capital)
+cp client.key client2.key && sed -i "s/\b\(.\)/\u\1/g" client2.key
+cp client.crt client2.crt && sed -i "s/\b\(.\)/\u\1/g" client2.crt
+
+mv server.crt /etc/nginx/cert.crt
+mv server.key /etc/nginx/cert.key
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+# Setup the new configuration for the proxy. The HTTPS traffic will be
+# redirected to the running instance of server at localhost:8000
+cp ${NGINX_CONF} /etc/nginx/sites-available/default
+
+# Start the proxy server
+service nginx restart
+
+set +e
+
+# Test basic inference with https
+python $SIMPLE_INFER_CLIENT_PY -v -u localhost --ssl --key-file client.key --cert-file client.crt --ca-certs ca.crt >> ${CLIENT_LOG}.ssl_infer 2>&1
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}.ssl_infer
+    RET=1
+fi
+python $SIMPLE_AIO_INFER_CLIENT_PY -v -u localhost --ssl --key-file client.key --cert-file client.crt --ca-certs ca.crt >> ${CLIENT_LOG}.ssl_infer.aio 2>&1
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}.ssl_infer.aio
+    RET=1
+fi
+
+$TEST_CLIENT -v -u https://localhost:443 --key-file client.key --cert-file client.crt --ca-certs ca.crt >> ${CLIENT_LOG}.c++.ssl_infer 2>&1
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}.c++.ssl_infer
+    RET=1
+fi
+
+# Test basic inference on https without peer verification
+python $SIMPLE_INFER_CLIENT_PY -v -u localhost --ssl --insecure >> ${CLIENT_LOG}.ssl_infer_insecure 2>&1
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}.ssl_infer_insecure
+    RET=1
+fi
+python $SIMPLE_AIO_INFER_CLIENT_PY -v -u localhost --ssl --insecure >> ${CLIENT_LOG}.ssl_infer_insecure.aio 2>&1
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}.ssl_infer_insecure.aio
+    RET=1
+fi
+
+$TEST_CLIENT -v -u https://localhost:443 --verify-host 0 --verify-peer 0 >> ${CLIENT_LOG}.c++.ssl_infer_insecure 2>&1
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}.c++.ssl_infer_insecure
+    RET=1
+fi
+
+# Test failure cases for SSL
+# Try without SSL
+$SIMPLE_INFER_CLIENT_PY -v -u localhost >> ${CLIENT_LOG}.no_ssl_fail_infer 2>&1
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}.no_ssl_fail_infer
+    echo -e "\n***\n*** Expected test failure\n***"
+else
+    RET=1
+fi
+$SIMPLE_AIO_INFER_CLIENT_PY -v -u localhost >> ${CLIENT_LOG}.no_ssl_fail_infer.aio 2>&1
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}.no_ssl_fail_infer.aio
+    echo -e "\n***\n*** Expected test failure\n***"
+else
+    RET=1
+fi
+
+$TEST_CLIENT -v -u https://localhost:443 >> ${CLIENT_LOG}.c++.no_ssl_fail_infer 2>&1
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}.c++.no_ssl_fail_infer
+    echo -e "\n***\n*** Expected test failure\n***"
+else
+    RET=1
+fi
+
+
+# Try with incorrect key
+$SIMPLE_INFER_CLIENT_PY -v -u localhost --ssl --key-file client2.key --cert-file client.crt --ca-certs ca.crt >> ${CLIENT_LOG}.ssl_wrong_key 2>&1
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}.ssl_wrong_key
+    echo -e "\n***\n*** Expected test failure\n***"
+else
+    RET=1
+fi
+$SIMPLE_AIO_INFER_CLIENT_PY -v -u localhost --ssl --key-file client2.key --cert-file client.crt --ca-certs ca.crt >> ${CLIENT_LOG}.ssl_wrong_key.aio 2>&1
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}.ssl_wrong_key.aio
+    echo -e "\n***\n*** Expected test failure\n***"
+else
+    RET=1
+fi
+
+$TEST_CLIENT -v -u https://localhost:443 --key-file client2.key --cert-file client.crt --ca-certs ca.crt >> ${CLIENT_LOG}.c++.ssl_wrong_key 2>&1
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}.c++.ssl_wrong_key
+    echo -e "\n***\n*** Expected test failure\n***"
+else
+    RET=1
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Stop the proxy server
+service nginx stop
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_implicit_state/implicit_state.py b/qa/L0_implicit_state/implicit_state.py
new file mode 100755
index 0000000000..2cdf7ff2e0
--- /dev/null
+++ b/qa/L0_implicit_state/implicit_state.py
@@ -0,0 +1,264 @@
+#!/usr/bin/env python
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import os
+import unittest
+from builtins import range
+
+import numpy as np
+import test_util as tu
+import tritonclient.http as tritonhttpclient
+from tritonclient.utils import InferenceServerException
+
+BACKENDS = os.environ.get("BACKENDS", "onnx plan libtorch")
+
+
+class ImplicitStateTest(tu.TestResultCollector):
+    def test_no_implicit_state(self):
+        triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
+        inputs = []
+        inputs.append(tritonhttpclient.InferInput("INPUT", [1], "INT32"))
+        inputs.append(tritonhttpclient.InferInput("TEST_CASE", [1], "INT32"))
+        inputs[0].set_data_from_numpy(np.random.randint(5, size=[1], dtype=np.int32))
+        inputs[1].set_data_from_numpy(np.asarray([0], dtype=np.int32))
+
+        with self.assertRaises(InferenceServerException) as e:
+            triton_client.infer(
+                model_name="no_implicit_state",
+                inputs=inputs,
+                sequence_id=1,
+                sequence_start=True,
+            )
+
+        err_str = str(e.exception).lower()
+        self.assertIn("unable to add state 'undefined_state'", err_str)
+        self.assertIn(
+            "state configuration is missing for model 'no_implicit_state'", err_str
+        )
+
+    def test_wrong_implicit_state_name(self):
+        triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
+        inputs = []
+        inputs.append(tritonhttpclient.InferInput("INPUT", [1], "INT32"))
+        inputs.append(tritonhttpclient.InferInput("TEST_CASE", [1], "INT32"))
+        inputs[0].set_data_from_numpy(np.random.randint(5, size=[1], dtype=np.int32))
+        inputs[1].set_data_from_numpy(np.asarray([0], dtype=np.int32))
+
+        with self.assertRaises(InferenceServerException) as e:
+            triton_client.infer(
+                model_name="wrong_internal_state",
+                inputs=inputs,
+                sequence_id=2,
+                sequence_start=True,
+            )
+
+        err_str = str(e.exception).lower()
+        self.assertIn("state 'undefined_state' is not a valid state name", err_str)
+
+    def test_implicit_state_single_buffer(self):
+        triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
+        inputs = []
+        inputs.append(tritonhttpclient.InferInput("INPUT", [1], "INT32"))
+        inputs.append(tritonhttpclient.InferInput("TEST_CASE", [1], "INT32"))
+        inputs[0].set_data_from_numpy(np.random.randint(5, size=[1], dtype=np.int32))
+        inputs[1].set_data_from_numpy(np.asarray([2], dtype=np.int32))
+
+        triton_client.infer(
+            model_name="single_state_buffer",
+            inputs=inputs,
+            sequence_id=2,
+            sequence_start=True,
+            sequence_end=False,
+        )
+
+        triton_client.infer(
+            model_name="single_state_buffer",
+            inputs=inputs,
+            sequence_id=2,
+            sequence_start=False,
+            sequence_end=True,
+        )
+
+    def test_implicit_state_growable_memory(self):
+        triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
+        inputs = []
+        inputs.append(tritonhttpclient.InferInput("INPUT", [1], "INT32"))
+        inputs.append(tritonhttpclient.InferInput("TEST_CASE", [1], "INT32"))
+        inputs[0].set_data_from_numpy(np.random.randint(5, size=[1], dtype=np.int32))
+        inputs[1].set_data_from_numpy(np.asarray([3], dtype=np.int32))
+
+        output = triton_client.infer(
+            model_name="growable_memory",
+            inputs=inputs,
+            sequence_id=2,
+            sequence_start=True,
+            sequence_end=False,
+        )
+        output_state = output.as_numpy("OUTPUT_STATE")
+        expected_output_state = np.zeros(output_state.shape, dtype=np.int8)
+        np.testing.assert_equal(output_state, expected_output_state)
+
+        output = triton_client.infer(
+            model_name="growable_memory",
+            inputs=inputs,
+            sequence_id=2,
+            sequence_start=False,
+            sequence_end=False,
+        )
+        output_state = output.as_numpy("OUTPUT_STATE")
+        expected_output_state = np.concatenate(
+            [expected_output_state, np.ones(expected_output_state.shape, dtype=np.int8)]
+        )
+        np.testing.assert_equal(output_state, expected_output_state)
+
+        output = triton_client.infer(
+            model_name="growable_memory",
+            inputs=inputs,
+            sequence_id=2,
+            sequence_start=False,
+            sequence_end=False,
+        )
+        output_state = output.as_numpy("OUTPUT_STATE")
+        expected_output_state = np.concatenate(
+            [
+                expected_output_state,
+                np.full(
+                    (expected_output_state.shape[0] // 2,), dtype=np.int8, fill_value=2
+                ),
+            ]
+        )
+        np.testing.assert_equal(output_state, expected_output_state)
+
+    def test_no_update(self):
+        # Test implicit state without updating any state
+        triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
+        inputs = []
+        inputs.append(tritonhttpclient.InferInput("INPUT", [1], "INT32"))
+        inputs.append(tritonhttpclient.InferInput("TEST_CASE", [1], "INT32"))
+        inputs[0].set_data_from_numpy(np.asarray([1], dtype=np.int32))
+        inputs[1].set_data_from_numpy(np.asarray([1], dtype=np.int32))
+        correlation_id = 3
+
+        # Make sure the state is never updated.
+        result_start = triton_client.infer(
+            model_name="no_state_update",
+            inputs=inputs,
+            sequence_id=correlation_id,
+            sequence_start=True,
+        )
+        self.assertEqual(result_start.as_numpy("OUTPUT")[0], 1)
+        for _ in range(10):
+            result = triton_client.infer(
+                model_name="no_state_update", inputs=inputs, sequence_id=correlation_id
+            )
+            self.assertEqual(result.as_numpy("OUTPUT")[0], 1)
+
+        _ = triton_client.infer(
+            model_name="no_state_update",
+            inputs=inputs,
+            sequence_id=correlation_id,
+            sequence_end=True,
+        )
+        self.assertEqual(result.as_numpy("OUTPUT")[0], 1)
+
+    def test_request_output_not_allowed(self):
+        triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
+
+        for backend in BACKENDS.split(" "):
+            inputs = []
+            if backend.strip() == "libtorch":
+                inputs.append(tritonhttpclient.InferInput("INPUT__0", [1], "INT32"))
+            else:
+                inputs.append(tritonhttpclient.InferInput("INPUT", [1], "INT32"))
+            inputs[0].set_data_from_numpy(np.asarray([1], dtype=np.int32))
+
+            outputs = []
+            if backend.strip() == "libtorch":
+                outputs.append(tritonhttpclient.InferRequestedOutput("OUTPUT_STATE__1"))
+            else:
+                outputs.append(tritonhttpclient.InferRequestedOutput("OUTPUT_STATE"))
+
+            with self.assertRaises(InferenceServerException) as e:
+                triton_client.infer(
+                    model_name=f"{backend}_nobatch_sequence_int32",
+                    inputs=inputs,
+                    outputs=outputs,
+                    sequence_id=1,
+                    sequence_start=True,
+                    sequence_end=True,
+                )
+            if backend.strip() == "libtorch":
+                self.assertIn(
+                    "unexpected inference output 'OUTPUT_STATE__1' for model",
+                    str(e.exception),
+                )
+            else:
+                self.assertIn(
+                    "unexpected inference output 'OUTPUT_STATE' for model",
+                    str(e.exception),
+                )
+
+    def test_request_output(self):
+        triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
+        for backend in BACKENDS.split(" "):
+            inputs = []
+            if backend.strip() == "libtorch":
+                inputs.append(tritonhttpclient.InferInput("INPUT__0", [1], "INT32"))
+            else:
+                inputs.append(tritonhttpclient.InferInput("INPUT", [1], "INT32"))
+            inputs[0].set_data_from_numpy(np.asarray([1], dtype=np.int32))
+
+            outputs = []
+            if backend.strip() == "libtorch":
+                outputs.append(tritonhttpclient.InferRequestedOutput("OUTPUT_STATE__1"))
+                outputs.append(tritonhttpclient.InferRequestedOutput("OUTPUT__0"))
+            else:
+                outputs.append(tritonhttpclient.InferRequestedOutput("OUTPUT_STATE"))
+                outputs.append(tritonhttpclient.InferRequestedOutput("OUTPUT"))
+
+            result = triton_client.infer(
+                model_name=f"{backend}_nobatch_sequence_int32_output",
+                inputs=inputs,
+                outputs=outputs,
+                sequence_id=1,
+                sequence_start=True,
+                sequence_end=True,
+            )
+            if backend.strip() == "libtorch":
+                self.assertTrue(result.as_numpy("OUTPUT_STATE__1")[0], 1)
+                self.assertTrue(result.as_numpy("OUTPUT__0")[0], 1)
+            else:
+                self.assertTrue(result.as_numpy("OUTPUT_STATE")[0], 1)
+                self.assertTrue(result.as_numpy("OUTPUT")[0], 1)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_implicit_state/models/growable_memory/config.pbtxt b/qa/L0_implicit_state/models/growable_memory/config.pbtxt
new file mode 100644
index 0000000000..0a7920bdf1
--- /dev/null
+++ b/qa/L0_implicit_state/models/growable_memory/config.pbtxt
@@ -0,0 +1,103 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "growable_memory"
+backend: "implicit_state"
+max_batch_size: 0
+sequence_batching {
+  control_input [
+    {
+      name: "START"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_START
+          fp32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "READY"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_READY
+          fp32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "END"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_END
+          fp32_false_true: [ 0, 1 ]
+        }
+      ]
+    }
+  ]
+  state [
+    {
+        input_name: "INPUT_STATE"
+        output_name: "OUTPUT_STATE"
+        data_type: TYPE_INT8
+        dims: [1024, 1024]
+        use_same_buffer_for_input_output: true
+        use_growable_memory: true
+    }
+  ]
+}
+
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  },
+  {
+    name: "TEST_CASE"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  },
+  {
+    name: "OUTPUT_STATE"
+    data_type: TYPE_INT8
+    dims: [ 1 ]
+  }
+]
+
+instance_group [
+  {
+    count: 1
+    kind : KIND_GPU
+  }
+]
diff --git a/qa/L0_implicit_state/models/no_implicit_state/config.pbtxt b/qa/L0_implicit_state/models/no_implicit_state/config.pbtxt
new file mode 100644
index 0000000000..e1540d36ed
--- /dev/null
+++ b/qa/L0_implicit_state/models/no_implicit_state/config.pbtxt
@@ -0,0 +1,89 @@
+# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "no_implicit_state"
+backend: "implicit_state"
+max_batch_size: 0
+
+sequence_batching {
+  control_input [
+    {
+      name: "START"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_START
+          fp32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "READY"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_READY
+          fp32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "END"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_END
+          fp32_false_true: [ 0, 1 ]
+        }
+      ]
+    }
+  ]
+}
+
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  },
+  {
+    name: "TEST_CASE"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+
+instance_group [
+  {
+    count: 1
+    kind : KIND_CPU
+  }
+]
diff --git a/qa/L0_implicit_state/models/no_state_update/config.pbtxt b/qa/L0_implicit_state/models/no_state_update/config.pbtxt
new file mode 100644
index 0000000000..e7fb6afe8f
--- /dev/null
+++ b/qa/L0_implicit_state/models/no_state_update/config.pbtxt
@@ -0,0 +1,102 @@
+# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "no_state_update"
+backend: "implicit_state"
+max_batch_size: 0
+sequence_batching {
+  control_input [
+    {
+      name: "START"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_START
+          fp32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "READY"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_READY
+          fp32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "END"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_END
+          fp32_false_true: [ 0, 1 ]
+        }
+      ]
+    }
+  ]
+  state [
+    {
+        input_name: "INPUT_STATE"
+        output_name: "OUTPUT_STATE"
+        data_type: TYPE_INT32
+        dims: 1
+        initial_state: {
+          name: "state init"
+          data_type: TYPE_INT32
+          dims: 1
+          zero_data: true
+        }
+    }
+  ]
+}
+
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  },
+  {
+    name: "TEST_CASE"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+
+instance_group [
+  {
+    count: 1
+    kind : KIND_CPU
+  }
+]
diff --git a/qa/L0_implicit_state/models/single_state_buffer/config.pbtxt b/qa/L0_implicit_state/models/single_state_buffer/config.pbtxt
new file mode 100644
index 0000000000..0f72d772a6
--- /dev/null
+++ b/qa/L0_implicit_state/models/single_state_buffer/config.pbtxt
@@ -0,0 +1,97 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "single_state_buffer"
+backend: "implicit_state"
+max_batch_size: 0
+sequence_batching {
+  control_input [
+    {
+      name: "START"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_START
+          fp32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "READY"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_READY
+          fp32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "END"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_END
+          fp32_false_true: [ 0, 1 ]
+        }
+      ]
+    }
+  ]
+  state [
+    {
+        input_name: "INPUT_STATE"
+        output_name: "OUTPUT_STATE"
+        data_type: TYPE_INT32
+        dims: 1
+        use_same_buffer_for_input_output: true
+    }
+  ]
+}
+
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  },
+  {
+    name: "TEST_CASE"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+
+instance_group [
+  {
+    count: 1
+    kind : KIND_CPU
+  }
+]
diff --git a/qa/L0_implicit_state/models/wrong_internal_state/config.pbtxt b/qa/L0_implicit_state/models/wrong_internal_state/config.pbtxt
new file mode 100644
index 0000000000..afe55ecf14
--- /dev/null
+++ b/qa/L0_implicit_state/models/wrong_internal_state/config.pbtxt
@@ -0,0 +1,97 @@
+# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "wrong_internal_state"
+backend: "implicit_state"
+max_batch_size: 0
+
+sequence_batching {
+  control_input [
+    {
+      name: "START"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_START
+          fp32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "READY"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_READY
+          fp32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "END"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_END
+          fp32_false_true: [ 0, 1 ]
+        }
+      ]
+    }
+  ]
+  state [
+    {
+        input_name: "INPUT_STATE"
+        output_name: "OUTPUT_STATE"
+        data_type: TYPE_INT32
+        dims: 1
+    }
+  ]
+}
+
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  },
+  {
+    name: "TEST_CASE"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+
+instance_group [
+  {
+    count: 1
+    kind : KIND_CPU
+  }
+]
diff --git a/qa/L0_implicit_state/test.sh b/qa/L0_implicit_state/test.sh
new file mode 100755
index 0000000000..0722d29be1
--- /dev/null
+++ b/qa/L0_implicit_state/test.sh
@@ -0,0 +1,143 @@
+#!/bin/bash
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"}
+TEST_RESULT_FILE='test_results.txt'
+
+export ENSEMBLES=0
+BACKENDS=${BACKENDS:="libtorch onnx plan"}
+export BACKENDS
+export IMPLICIT_STATE=1
+INITIAL_STATE_ZERO=${INITIAL_STATE_ZERO:="0"}
+INITIAL_STATE_FILE=${INITIAL_STATE_FILE:="0"}
+SINGLE_STATE_BUFFER=${SINGLE_STATE_BUFFER:="0"}
+
+export INITIAL_STATE_ZERO
+export INITIAL_STATE_FILE
+export SINGLE_STATE_BUFFER
+
+MODELDIR=${MODELDIR:=`pwd`/models}
+TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
+SERVER=${TRITON_DIR}/bin/tritonserver
+BACKEND_DIR=${TRITON_DIR}/backends
+source ../common/util.sh
+
+# Setup the custom models shared library
+cp ./libtriton_implicit_state.so models/no_implicit_state/
+cp ./libtriton_implicit_state.so models/no_state_update/
+cp ./libtriton_implicit_state.so models/wrong_internal_state/
+cp ./libtriton_implicit_state.so models/single_state_buffer/
+cp ./libtriton_implicit_state.so models/growable_memory/
+
+mkdir -p models/no_implicit_state/1/
+mkdir -p models/no_state_update/1/
+mkdir -p models/wrong_internal_state/1/
+mkdir -p models/single_state_buffer/1/
+mkdir -p models/growable_memory/1/
+
+for BACKEND in $BACKENDS; do
+    dtype="int32"
+    model_name=${BACKEND}_nobatch_sequence_${dtype}
+    rm -rf models/$model_name
+    cp -r $DATADIR/qa_sequence_implicit_model_repository/$model_name models
+    output_dtype=
+
+    # In order to allow the state to be returned, the model must describe
+    # state as one of the outputs of the model.
+    model_name_allow_output=${BACKEND}_nobatch_sequence_${dtype}_output
+    rm -rf models/$model_name_allow_output
+    cp -r $DATADIR/qa_sequence_implicit_model_repository/$model_name models/$model_name_allow_output
+
+    if [ $BACKEND == "libtorch" ]; then
+    	(cd models/$model_name_allow_output && \
+    	    sed -i "s/^name:.*/name: \"$model_name_allow_output\"/" config.pbtxt && \
+    	    echo -e "output [{ name: \"OUTPUT_STATE__1\" \n data_type: TYPE_INT32 \n dims: [ 1 ] }]" >> config.pbtxt)
+    else
+    	(cd models/$model_name_allow_output && \
+    	    sed -i "s/^name:.*/name: \"$model_name_allow_output\"/" config.pbtxt && \
+    	    echo -e "output [{ name: \"OUTPUT_STATE\" \n data_type: TYPE_INT32 \n dims: [ 1 ] }]" >> config.pbtxt)
+    fi
+done
+
+CLIENT_LOG=`pwd`/client.log
+SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${MODELDIR} --cuda-virtual-address-size=0:$((1024*1024*4))"
+IMPLICIT_STATE_CLIENT='implicit_state.py'
+EXPECTED_TEST_NUM=7
+rm -rf $CLIENT_LOG
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+python3 $IMPLICIT_STATE_CLIENT > $CLIENT_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Implicit State FAILED\n***"
+    cat ${CLIENT_LOG}
+    exit 1
+else
+    check_test_results $TEST_RESULT_FILE $EXPECTED_TEST_NUM
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+(cd ../L0_sequence_batcher/ && bash -ex test.sh)
+RET=$?
+
+if [ $RET == 0 ]; then
+    echo -e "\n***\n*** Implicit State Passed\n***"
+else
+    echo -e "\n***\n*** Implicit State FAILED\n***"
+    exit 1
+fi
+
+exit $RET
+
diff --git a/qa/L0_infer/infer_test.py b/qa/L0_infer/infer_test.py
old mode 100644
new mode 100755
index bcdfd0e694..c304917d9c
--- a/qa/L0_infer/infer_test.py
+++ b/qa/L0_infer/infer_test.py
@@ -1,4 +1,6 @@
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+#!/usr/bin/env python3
+
+# Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -25,219 +27,1139 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import sys
+
 sys.path.append("../common")
 
-from builtins import range
-from future.utils import iteritems
+import os
 import unittest
-import numpy as np
+
 import infer_util as iu
+import numpy as np
 import test_util as tu
-from tensorrtserver.api import *
+from tritonclient.utils import *
+
+TEST_SYSTEM_SHARED_MEMORY = bool(int(os.environ.get("TEST_SYSTEM_SHARED_MEMORY", 0)))
+TEST_CUDA_SHARED_MEMORY = bool(int(os.environ.get("TEST_CUDA_SHARED_MEMORY", 0)))
+CPU_ONLY = os.environ.get("TRITON_SERVER_CPU_ONLY") is not None
+TEST_VALGRIND = bool(int(os.environ.get("TEST_VALGRIND", 0)))
+
+USE_GRPC = os.environ.get("USE_GRPC", 1) != "0"
+USE_HTTP = os.environ.get("USE_HTTP", 1) != "0"
+assert USE_GRPC or USE_HTTP, "USE_GRPC or USE_HTTP must be non-zero"
+
+BACKENDS = os.environ.get(
+    "BACKENDS", "graphdef savedmodel onnx libtorch plan python python_dlpack openvino"
+)
+ENSEMBLES = bool(int(os.environ.get("ENSEMBLES", 1)))
+NOBATCH = bool(int(os.environ.get("NOBATCH", 1)))
+BATCH = bool(int(os.environ.get("BATCH", 1)))
+
+np_dtype_string = np.dtype(object)
+
+# 60 sec is the default value
+NETWORK_TIMEOUT = 300.0 if TEST_VALGRIND else 60.0
+
+
+class InferTest(tu.TestResultCollector):
+    def _full_exact(
+        self,
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+        output0_raw,
+        output1_raw,
+        swap,
+        network_timeout=NETWORK_TIMEOUT,
+    ):
+        def _infer_exact_helper(
+            tester,
+            pf,
+            tensor_shape,
+            batch_size,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            output0_raw=True,
+            output1_raw=True,
+            model_version=None,
+            swap=False,
+            outputs=("OUTPUT0", "OUTPUT1"),
+            use_http=USE_HTTP,
+            use_grpc=USE_GRPC,
+            use_http_json_tensors=True,
+            skip_request_id_check=True,
+            use_streaming=True,
+            correlation_id=0,
+            network_timeout=NETWORK_TIMEOUT,
+        ):
+            for bs in (1, batch_size):
+                # model that does not support batching
+                if NOBATCH:
+                    if bs == 1:
+                        iu.infer_exact(
+                            tester,
+                            pf + "_nobatch",
+                            tensor_shape,
+                            bs,
+                            input_dtype,
+                            output0_dtype,
+                            output1_dtype,
+                            output0_raw=output0_raw,
+                            output1_raw=output1_raw,
+                            model_version=model_version,
+                            swap=swap,
+                            outputs=outputs,
+                            use_http=use_http,
+                            use_grpc=use_grpc,
+                            use_http_json_tensors=use_http_json_tensors,
+                            skip_request_id_check=skip_request_id_check,
+                            use_streaming=use_streaming,
+                            correlation_id=correlation_id,
+                            use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                            use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+                            network_timeout=network_timeout,
+                        )
+
+                if BATCH:
+                    # model that supports batching.
+                    iu.infer_exact(
+                        tester,
+                        pf,
+                        (bs,) + tensor_shape,
+                        bs,
+                        input_dtype,
+                        output0_dtype,
+                        output1_dtype,
+                        output0_raw=output0_raw,
+                        output1_raw=output1_raw,
+                        model_version=model_version,
+                        swap=swap,
+                        outputs=outputs,
+                        use_http=use_http,
+                        use_grpc=use_grpc,
+                        use_http_json_tensors=use_http_json_tensors,
+                        skip_request_id_check=skip_request_id_check,
+                        use_streaming=use_streaming,
+                        correlation_id=correlation_id,
+                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+                        network_timeout=network_timeout,
+                    )
 
-class InferTest(unittest.TestCase):
-    def _full_exact(self, req_raw, input_dtype, output0_dtype, output1_dtype, swap):
         input_size = 16
 
-        if tu.validate_for_tf_model(input_dtype, output0_dtype, output1_dtype):
-            # model that supports batching
-            for bs in (1, 8):
-                iu.infer_exact(self, 'graphdef', (input_size,), bs, req_raw,
-                               input_dtype, output0_dtype, output1_dtype, swap=swap)
-                iu.infer_exact(self, 'savedmodel', (input_size,), bs, req_raw,
-                               input_dtype, output0_dtype, output1_dtype, swap=swap)
-            # model that does not batching
-            iu.infer_exact(self, 'graphdef_nobatch', (input_size,), 1, req_raw,
-                           input_dtype, output0_dtype, output1_dtype, swap=swap)
-            iu.infer_exact(self, 'savedmodel_nobatch', (input_size,), 1, req_raw,
-                           input_dtype, output0_dtype, output1_dtype, swap=swap)
-
-        if tu.validate_for_c2_model(input_dtype, output0_dtype, output1_dtype):
-            # model that supports batching
-            for bs in (1, 8):
-                iu.infer_exact(self, 'netdef', (input_size,), bs, req_raw,
-                               input_dtype, output0_dtype, output1_dtype, swap=swap)
-            # model that does not batching
-            iu.infer_exact(self, 'netdef_nobatch', (input_size,), 1, req_raw,
-                           input_dtype, output0_dtype, output1_dtype, swap=swap)
-
-        if tu.validate_for_trt_model(input_dtype, output0_dtype, output1_dtype):
-            # model that supports batching
-            for bs in (1, 8):
-                iu.infer_exact(self, 'plan', (input_size, 1, 1), bs, req_raw,
-                               input_dtype, output0_dtype, output1_dtype, swap=swap)
-            # model that does not batching
-            iu.infer_exact(self, 'plan_nobatch', (input_size, 1, 1), 1, req_raw,
-                           input_dtype, output0_dtype, output1_dtype, swap=swap)
+        all_ensemble_prefix = ["simple_", "sequence_", "fan_"]
+        ensemble_prefix = [""]
+        if ENSEMBLES:
+            for prefix in all_ensemble_prefix:
+                if tu.validate_for_ensemble_model(
+                    prefix,
+                    input_dtype,
+                    output0_dtype,
+                    output1_dtype,
+                    (input_size,),
+                    (input_size,),
+                    (input_size,),
+                ):
+                    ensemble_prefix.append(prefix)
+
+        if tu.validate_for_tf_model(
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            (input_size,),
+            (input_size,),
+            (input_size,),
+        ):
+            for prefix in ensemble_prefix:
+                for pf in ["graphdef", "savedmodel"]:
+                    if pf in BACKENDS:
+                        _infer_exact_helper(
+                            self,
+                            prefix + pf,
+                            (input_size,),
+                            8,
+                            input_dtype,
+                            output0_dtype,
+                            output1_dtype,
+                            output0_raw=output0_raw,
+                            output1_raw=output1_raw,
+                            swap=swap,
+                            network_timeout=network_timeout,
+                        )
+
+        if not CPU_ONLY and tu.validate_for_trt_model(
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            (input_size, 1, 1),
+            (input_size, 1, 1),
+            (input_size, 1, 1),
+        ):
+            for prefix in ensemble_prefix:
+                if "plan" in BACKENDS:
+                    if input_dtype == np.int8:
+                        _infer_exact_helper(
+                            self,
+                            prefix + "plan",
+                            (input_size, 1, 1),
+                            8,
+                            input_dtype,
+                            output0_dtype,
+                            output1_dtype,
+                            output0_raw=output0_raw,
+                            output1_raw=output1_raw,
+                            swap=swap,
+                        )
+                    else:
+                        _infer_exact_helper(
+                            self,
+                            prefix + "plan",
+                            (input_size,),
+                            8,
+                            input_dtype,
+                            output0_dtype,
+                            output1_dtype,
+                            output0_raw=output0_raw,
+                            output1_raw=output1_raw,
+                            swap=swap,
+                        )
+
+        if tu.validate_for_onnx_model(
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            (input_size,),
+            (input_size,),
+            (input_size,),
+        ):
+            for prefix in ensemble_prefix:
+                if "onnx" in BACKENDS:
+                    _infer_exact_helper(
+                        self,
+                        prefix + "onnx",
+                        (input_size,),
+                        8,
+                        input_dtype,
+                        output0_dtype,
+                        output1_dtype,
+                        output0_raw=output0_raw,
+                        output1_raw=output1_raw,
+                        swap=swap,
+                    )
+
+        if tu.validate_for_libtorch_model(
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            (input_size,),
+            (input_size,),
+            (input_size,),
+        ):
+            # Due to PyTorch bug
+            # https://github.com/pytorch/pytorch/issues/66930 we can't
+            # run this test with int8 input and int32 outputs.
+            if (
+                (input_dtype == np.int8)
+                and (output0_dtype == np.int32)
+                and (output1_dtype == np.int32)
+            ):
+                print("skipping pytorch test for int8_int32_int32")
+            else:
+                for prefix in ensemble_prefix:
+                    if "libtorch" in BACKENDS:
+                        # Skip batching for PyTorch String I/O
+                        if (
+                            (input_dtype == np_dtype_string)
+                            or (output0_dtype == np_dtype_string)
+                            or (output1_dtype == np_dtype_string)
+                        ):
+                            iu.infer_exact(
+                                self,
+                                prefix + "libtorch_nobatch",
+                                (input_size,),
+                                1,  # batch_size
+                                input_dtype,
+                                output0_dtype,
+                                output1_dtype,
+                                output0_raw=output0_raw,
+                                output1_raw=output1_raw,
+                                swap=swap,
+                                use_http=USE_HTTP,
+                                use_grpc=USE_GRPC,
+                                use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                                use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+                            )
+                        else:
+                            _infer_exact_helper(
+                                self,
+                                prefix + "libtorch",
+                                (input_size,),
+                                8,
+                                input_dtype,
+                                output0_dtype,
+                                output1_dtype,
+                                output0_raw=output0_raw,
+                                output1_raw=output1_raw,
+                                swap=swap,
+                            )
+
+        for prefix in ensemble_prefix:
+            if prefix != "":
+                continue
+            if (
+                input_dtype == np.uint8
+                or output0_dtype == np.uint8
+                or output1_dtype == np.uint8
+            ):
+                continue
+
+            if "python_dlpack" in BACKENDS:
+                _infer_exact_helper(
+                    self,
+                    prefix + "python_dlpack",
+                    (input_size,),
+                    8,
+                    input_dtype,
+                    output0_dtype,
+                    output1_dtype,
+                    output0_raw=output0_raw,
+                    output1_raw=output1_raw,
+                    swap=swap,
+                )
+            elif "python" in BACKENDS:
+                _infer_exact_helper(
+                    self,
+                    prefix + "python",
+                    (input_size,),
+                    8,
+                    input_dtype,
+                    output0_dtype,
+                    output1_dtype,
+                    output0_raw=output0_raw,
+                    output1_raw=output1_raw,
+                    swap=swap,
+                )
+
+    def test_raw_uuu(self):
+        self._full_exact(
+            np.uint8, np.uint8, np.uint8, output0_raw=True, output1_raw=True, swap=True
+        )
 
     def test_raw_bbb(self):
-        self._full_exact(True, np.int8, np.int8, np.int8, swap=True)
+        self._full_exact(
+            np.int8, np.int8, np.int8, output0_raw=True, output1_raw=True, swap=True
+        )
+
     def test_raw_sss(self):
-        self._full_exact(True, np.int16, np.int16, np.int16, swap=True)
+        self._full_exact(
+            np.int16, np.int16, np.int16, output0_raw=True, output1_raw=True, swap=True
+        )
+
     def test_raw_iii(self):
-        self._full_exact(True, np.int32, np.int32, np.int32, swap=True)
+        self._full_exact(
+            np.int32, np.int32, np.int32, output0_raw=True, output1_raw=True, swap=True
+        )
+
     def test_raw_lll(self):
-        self._full_exact(True, np.int64, np.int64, np.int64, swap=False)
+        self._full_exact(
+            np.int64, np.int64, np.int64, output0_raw=True, output1_raw=True, swap=False
+        )
+
     def test_raw_hhh(self):
-        self._full_exact(True, np.float16, np.float16, np.float16, swap=False)
+        self._full_exact(
+            np.float16,
+            np.float16,
+            np.float16,
+            output0_raw=True,
+            output1_raw=True,
+            swap=False,
+        )
+
     def test_raw_fff(self):
-        self._full_exact(True, np.float32, np.float32, np.float32, swap=True)
+        self._full_exact(
+            np.float32,
+            np.float32,
+            np.float32,
+            output0_raw=True,
+            output1_raw=True,
+            swap=True,
+        )
+
     def test_raw_hff(self):
-        self._full_exact(True, np.float16, np.float32, np.float32, swap=False)
+        self._full_exact(
+            np.float16,
+            np.float32,
+            np.float32,
+            output0_raw=True,
+            output1_raw=True,
+            swap=False,
+        )
+
     def test_raw_bii(self):
-        self._full_exact(True, np.int8, np.int32, np.int32, swap=False)
+        self._full_exact(
+            np.int8, np.int32, np.int32, output0_raw=True, output1_raw=True, swap=False
+        )
+
     def test_raw_ibb(self):
-        self._full_exact(True, np.int32, np.int8, np.int8, swap=False)
+        self._full_exact(
+            np.int32, np.int8, np.int8, output0_raw=True, output1_raw=True, swap=False
+        )
+
     def test_raw_ibs(self):
-        self._full_exact(True, np.int32, np.int8, np.int16, swap=False)
+        self._full_exact(
+            np.int32, np.int8, np.int16, output0_raw=True, output1_raw=True, swap=False
+        )
+
+    def test_raw_fuu(self):
+        self._full_exact(
+            np.float32,
+            np.uint8,
+            np.uint8,
+            output0_raw=True,
+            output1_raw=True,
+            swap=False,
+        )
+
+    def test_raw_uff(self):
+        self._full_exact(
+            np.uint8,
+            np.float32,
+            np.float32,
+            output0_raw=True,
+            output1_raw=True,
+            swap=False,
+        )
+
+    def test_raw_fuh(self):
+        self._full_exact(
+            np.float32,
+            np.uint8,
+            np.float16,
+            output0_raw=True,
+            output1_raw=True,
+            swap=False,
+        )
+
     def test_raw_iff(self):
-        self._full_exact(True, np.int32, np.float32, np.float32, swap=False)
+        self._full_exact(
+            np.int32,
+            np.float32,
+            np.float32,
+            output0_raw=True,
+            output1_raw=True,
+            swap=False,
+        )
+
     def test_raw_fii(self):
-        self._full_exact(True, np.float32, np.int32, np.int32, swap=False)
+        self._full_exact(
+            np.float32,
+            np.int32,
+            np.int32,
+            output0_raw=True,
+            output1_raw=True,
+            swap=False,
+        )
+
     def test_raw_ihs(self):
-        self._full_exact(True, np.int32, np.float16, np.int16, swap=False)
-
-    def test_class_bbb(self):
-        self._full_exact(False, np.int8, np.int8, np.int8, swap=True)
-    def test_class_sss(self):
-        self._full_exact(False, np.int16, np.int16, np.int16, swap=True)
-    def test_class_iii(self):
-        self._full_exact(False, np.int32, np.int32, np.int32, swap=True)
-    def test_class_lll(self):
-        self._full_exact(False, np.int64, np.int64, np.int64, swap=False)
-    def test_class_fff(self):
-        self._full_exact(False, np.float32, np.float32, np.float32, swap=True)
-    def test_class_iff(self):
-        self._full_exact(False, np.int32, np.float32, np.float32, swap=False)
+        self._full_exact(
+            np.int32,
+            np.float16,
+            np.int16,
+            output0_raw=True,
+            output1_raw=True,
+            swap=False,
+        )
+
+    def test_raw_ooo(self):
+        self._full_exact(
+            np_dtype_string,
+            np_dtype_string,
+            np_dtype_string,
+            output0_raw=True,
+            output1_raw=True,
+            swap=False,
+        )
+
+    def test_raw_oii(self):
+        self._full_exact(
+            np_dtype_string,
+            np.int32,
+            np.int32,
+            output0_raw=True,
+            output1_raw=True,
+            swap=False,
+        )
+
+    def test_raw_oio(self):
+        self._full_exact(
+            np_dtype_string,
+            np.int32,
+            np_dtype_string,
+            output0_raw=True,
+            output1_raw=True,
+            swap=False,
+        )
+
+    def test_raw_ooi(self):
+        self._full_exact(
+            np_dtype_string,
+            np_dtype_string,
+            np.int32,
+            output0_raw=True,
+            output1_raw=True,
+            swap=False,
+        )
+
+    def test_raw_ioo(self):
+        self._full_exact(
+            np.int32,
+            np_dtype_string,
+            np_dtype_string,
+            output0_raw=True,
+            output1_raw=True,
+            swap=False,
+        )
+
+    def test_raw_iio(self):
+        self._full_exact(
+            np.int32,
+            np.int32,
+            np_dtype_string,
+            output0_raw=True,
+            output1_raw=True,
+            swap=False,
+        )
+
+    def test_raw_ioi(self):
+        self._full_exact(
+            np.int32,
+            np_dtype_string,
+            np.int32,
+            output0_raw=True,
+            output1_raw=True,
+            swap=False,
+        )
+
+    # shared memory does not support class output
+    if not (TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY):
+
+        def test_class_bbb(self):
+            self._full_exact(
+                np.int8,
+                np.int8,
+                np.int8,
+                output0_raw=False,
+                output1_raw=False,
+                swap=True,
+            )
+
+        def test_class_sss(self):
+            self._full_exact(
+                np.int16,
+                np.int16,
+                np.int16,
+                output0_raw=False,
+                output1_raw=False,
+                swap=True,
+            )
+
+        def test_class_iii(self):
+            self._full_exact(
+                np.int32,
+                np.int32,
+                np.int32,
+                output0_raw=False,
+                output1_raw=False,
+                swap=True,
+            )
+
+        def test_class_lll(self):
+            self._full_exact(
+                np.int64,
+                np.int64,
+                np.int64,
+                output0_raw=False,
+                output1_raw=False,
+                swap=False,
+            )
+
+        def test_class_fff(self):
+            self._full_exact(
+                np.float32,
+                np.float32,
+                np.float32,
+                output0_raw=False,
+                output1_raw=False,
+                swap=True,
+            )
+
+        def test_class_iff(self):
+            self._full_exact(
+                np.int32,
+                np.float32,
+                np.float32,
+                output0_raw=False,
+                output1_raw=False,
+                swap=False,
+            )
+
+        def test_mix_bbb(self):
+            self._full_exact(
+                np.int8,
+                np.int8,
+                np.int8,
+                output0_raw=True,
+                output1_raw=False,
+                swap=True,
+            )
+
+        def test_mix_sss(self):
+            self._full_exact(
+                np.int16,
+                np.int16,
+                np.int16,
+                output0_raw=False,
+                output1_raw=True,
+                swap=True,
+            )
+
+        def test_mix_iii(self):
+            self._full_exact(
+                np.int32,
+                np.int32,
+                np.int32,
+                output0_raw=True,
+                output1_raw=False,
+                swap=True,
+            )
+
+        def test_mix_lll(self):
+            self._full_exact(
+                np.int64,
+                np.int64,
+                np.int64,
+                output0_raw=False,
+                output1_raw=True,
+                swap=False,
+            )
+
+        def test_mix_fff(self):
+            self._full_exact(
+                np.float32,
+                np.float32,
+                np.float32,
+                output0_raw=True,
+                output1_raw=False,
+                swap=True,
+            )
+
+        def test_mix_iff(self):
+            self._full_exact(
+                np.int32,
+                np.float32,
+                np.float32,
+                output0_raw=False,
+                output1_raw=True,
+                swap=False,
+            )
 
     def test_raw_version_latest_1(self):
         input_size = 16
-        tensor_shape = (input_size,)
+        tensor_shape = (1, input_size)
 
         # There are 3 versions of graphdef_int8_int8_int8 but
         # only version 3 should be available
-        for platform in ('graphdef', 'savedmodel'):
+        for platform in ("graphdef", "savedmodel"):
+            if platform not in BACKENDS:
+                continue
             try:
-                iu.infer_exact(self, platform, tensor_shape, 1, True,
-                               np.int8, np.int8, np.int8,
-                               model_version=1, swap=False)
+                iu.infer_exact(
+                    self,
+                    platform,
+                    tensor_shape,
+                    1,
+                    np.int8,
+                    np.int8,
+                    np.int8,
+                    model_version=1,
+                    swap=False,
+                    use_http=USE_HTTP,
+                    use_grpc=USE_GRPC,
+                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+                )
             except InferenceServerException as ex:
-                self.assertEqual("inference:0", ex.server_id())
-                self.assertGreater(ex.request_id(), 0)
-                self.assertTrue(
-                    ex.message().startswith("Inference request for unknown model"))
+                self.assertTrue(ex.message().startswith("Request for unknown model"))
 
             try:
-                iu.infer_exact(self, platform, tensor_shape, 1, True,
-                               np.int8, np.int8, np.int8,
-                               model_version=2, swap=True)
+                iu.infer_exact(
+                    self,
+                    platform,
+                    tensor_shape,
+                    1,
+                    np.int8,
+                    np.int8,
+                    np.int8,
+                    model_version=2,
+                    swap=True,
+                    use_http=USE_HTTP,
+                    use_grpc=USE_GRPC,
+                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+                )
             except InferenceServerException as ex:
-                self.assertEqual("inference:0", ex.server_id())
-                self.assertGreater(ex.request_id(), 0)
-                self.assertTrue(
-                    ex.message().startswith("Inference request for unknown model"))
+                self.assertTrue(ex.message().startswith("Request for unknown model"))
 
-            iu.infer_exact(self, platform, tensor_shape, 1, True,
-                           np.int8, np.int8, np.int8,
-                           model_version=3, swap=True)
+            iu.infer_exact(
+                self,
+                platform,
+                tensor_shape,
+                1,
+                np.int8,
+                np.int8,
+                np.int8,
+                model_version=3,
+                swap=True,
+                use_http=USE_HTTP,
+                use_grpc=USE_GRPC,
+                use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+            )
 
     def test_raw_version_latest_2(self):
         input_size = 16
-        tensor_shape = (input_size,)
+        tensor_shape = (1, input_size)
 
         # There are 3 versions of graphdef_int16_int16_int16 but only
         # versions 2 and 3 should be available
-        for platform in ('graphdef', 'savedmodel'):
+        for platform in ("graphdef", "savedmodel"):
+            if platform not in BACKENDS:
+                continue
             try:
-                iu.infer_exact(self, platform, tensor_shape, 1, True,
-                               np.int16, np.int16, np.int16,
-                               model_version=1, swap=False)
+                iu.infer_exact(
+                    self,
+                    platform,
+                    tensor_shape,
+                    1,
+                    np.int16,
+                    np.int16,
+                    np.int16,
+                    model_version=1,
+                    swap=False,
+                    use_http=USE_HTTP,
+                    use_grpc=USE_GRPC,
+                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+                )
             except InferenceServerException as ex:
-                self.assertEqual("inference:0", ex.server_id())
-                self.assertGreater(ex.request_id(), 0)
-                self.assertTrue(
-                    ex.message().startswith("Inference request for unknown model"))
-
-            iu.infer_exact(self, platform, tensor_shape, 1, True,
-                           np.int16, np.int16, np.int16,
-                           model_version=2, swap=True)
-            iu.infer_exact(self, platform, tensor_shape, 1, True,
-                           np.int16, np.int16, np.int16,
-                           model_version=3, swap=True)
+                self.assertTrue(ex.message().startswith("Request for unknown model"))
+
+            iu.infer_exact(
+                self,
+                platform,
+                tensor_shape,
+                1,
+                np.int16,
+                np.int16,
+                np.int16,
+                model_version=2,
+                swap=True,
+                use_http=USE_HTTP,
+                use_grpc=USE_GRPC,
+                use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+            )
+            iu.infer_exact(
+                self,
+                platform,
+                tensor_shape,
+                1,
+                np.int16,
+                np.int16,
+                np.int16,
+                model_version=3,
+                swap=True,
+                use_http=USE_HTTP,
+                use_grpc=USE_GRPC,
+                use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+            )
 
     def test_raw_version_all(self):
         input_size = 16
-        tensor_shape = (input_size,)
+        tensor_shape = (1, input_size)
 
         # There are 3 versions of *_int32_int32_int32 and all should
         # be available.
-        for platform in ('graphdef', 'savedmodel', 'netdef'):
-            iu.infer_exact(self, platform, tensor_shape, 1, True,
-                           np.int32, np.int32, np.int32,
-                           model_version=1, swap=False)
-            iu.infer_exact(self, platform, tensor_shape, 1, True,
-                           np.int32, np.int32, np.int32,
-                           model_version=2, swap=True)
-            iu.infer_exact(self, platform, tensor_shape, 1, True,
-                           np.int32, np.int32, np.int32,
-                           model_version=3, swap=True)
+        for platform in ("graphdef", "savedmodel"):
+            if platform not in BACKENDS:
+                continue
+            iu.infer_exact(
+                self,
+                platform,
+                tensor_shape,
+                1,
+                np.int32,
+                np.int32,
+                np.int32,
+                model_version=1,
+                swap=False,
+                use_http=USE_HTTP,
+                use_grpc=USE_GRPC,
+                use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+            )
+            iu.infer_exact(
+                self,
+                platform,
+                tensor_shape,
+                1,
+                np.int32,
+                np.int32,
+                np.int32,
+                model_version=2,
+                swap=True,
+                use_http=USE_HTTP,
+                use_grpc=USE_GRPC,
+                use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+            )
+            iu.infer_exact(
+                self,
+                platform,
+                tensor_shape,
+                1,
+                np.int32,
+                np.int32,
+                np.int32,
+                model_version=3,
+                swap=True,
+                use_http=USE_HTTP,
+                use_grpc=USE_GRPC,
+                use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+            )
 
     def test_raw_version_specific_1(self):
         input_size = 16
-        tensor_shape = (input_size,)
+        tensor_shape = (1, input_size)
 
         # There are 3 versions of *_float16_float16_float16 but only
         # version 1 should be available.
-        for platform in ('graphdef', 'savedmodel'):
-            iu.infer_exact(self, platform, tensor_shape, 1, True,
-                           np.float16, np.float16, np.float16,
-                           model_version=1, swap=False)
+        for platform in ("graphdef", "savedmodel"):
+            if platform not in BACKENDS:
+                continue
+            iu.infer_exact(
+                self,
+                platform,
+                tensor_shape,
+                1,
+                np.float16,
+                np.float16,
+                np.float16,
+                model_version=1,
+                swap=False,
+                use_http=USE_HTTP,
+                use_grpc=USE_GRPC,
+                use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+            )
 
             try:
-                iu.infer_exact(self, platform, tensor_shape, 1, True,
-                               np.float16, np.float16, np.float16,
-                               model_version=2, swap=True)
+                iu.infer_exact(
+                    self,
+                    platform,
+                    tensor_shape,
+                    1,
+                    np.float16,
+                    np.float16,
+                    np.float16,
+                    model_version=2,
+                    swap=True,
+                    use_http=USE_HTTP,
+                    use_grpc=USE_GRPC,
+                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+                )
             except InferenceServerException as ex:
-                self.assertEqual("inference:0", ex.server_id())
-                self.assertGreater(ex.request_id(), 0)
-                self.assertTrue(
-                    ex.message().startswith("Inference request for unknown model"))
+                self.assertTrue(ex.message().startswith("Request for unknown model"))
 
             try:
-                iu.infer_exact(self, platform, tensor_shape, 1, True,
-                               np.float16, np.float16, np.float16,
-                               model_version=3, swap=True)
+                iu.infer_exact(
+                    self,
+                    platform,
+                    tensor_shape,
+                    1,
+                    np.float16,
+                    np.float16,
+                    np.float16,
+                    model_version=3,
+                    swap=True,
+                    use_http=USE_HTTP,
+                    use_grpc=USE_GRPC,
+                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+                )
             except InferenceServerException as ex:
-                self.assertEqual("inference:0", ex.server_id())
-                self.assertGreater(ex.request_id(), 0)
-                self.assertTrue(
-                    ex.message().startswith("Inference request for unknown model"))
+                self.assertTrue(ex.message().startswith("Request for unknown model"))
 
     def test_raw_version_specific_1_3(self):
         input_size = 16
 
         # There are 3 versions of *_float32_float32_float32 but only
         # versions 1 and 3 should be available.
-        for platform in ('graphdef', 'savedmodel', 'netdef', 'plan'):
-            tensor_shape = (input_size, 1, 1) if platform == 'plan' else (input_size,)
-            iu.infer_exact(self, platform, tensor_shape, 1, True,
-                           np.float32, np.float32, np.float32,
-                           model_version=1, swap=False)
+        for platform in ("graphdef", "savedmodel", "plan"):
+            if platform == "plan" and CPU_ONLY:
+                continue
+            if platform not in BACKENDS:
+                continue
+            tensor_shape = (1, input_size)
+            iu.infer_exact(
+                self,
+                platform,
+                tensor_shape,
+                1,
+                np.float32,
+                np.float32,
+                np.float32,
+                model_version=1,
+                swap=False,
+                use_http=USE_HTTP,
+                use_grpc=USE_GRPC,
+                use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+            )
 
             try:
-                iu.infer_exact(self, platform, tensor_shape, 1, True,
-                               np.float32, np.float32, np.float32,
-                               model_version=2, swap=True)
+                iu.infer_exact(
+                    self,
+                    platform,
+                    tensor_shape,
+                    1,
+                    np.float32,
+                    np.float32,
+                    np.float32,
+                    model_version=2,
+                    swap=True,
+                    use_http=USE_HTTP,
+                    use_grpc=USE_GRPC,
+                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+                )
             except InferenceServerException as ex:
-                self.assertEqual("inference:0", ex.server_id())
-                self.assertGreater(ex.request_id(), 0)
-                self.assertTrue(
-                    ex.message().startswith("Inference request for unknown model"))
+                self.assertTrue(ex.message().startswith("Request for unknown model"))
+
+            iu.infer_exact(
+                self,
+                platform,
+                tensor_shape,
+                1,
+                np.float32,
+                np.float32,
+                np.float32,
+                model_version=3,
+                swap=True,
+                use_http=USE_HTTP,
+                use_grpc=USE_GRPC,
+                use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+            )
+
+    if ENSEMBLES:
+        if all(x in BACKENDS for x in ["graphdef", "savedmodel"]):
+
+            def test_ensemble_mix_platform(self):
+                # Skip on CPU only machine as TensorRT model is used in this ensemble
+                if CPU_ONLY:
+                    return
+                for bs in (1, 8):
+                    iu.infer_exact(
+                        self,
+                        "mix_platform",
+                        (bs, 16),
+                        bs,
+                        np.float32,
+                        np.float32,
+                        np.float32,
+                        use_http=USE_HTTP,
+                        use_grpc=USE_GRPC,
+                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+                    )
+
+        if "graphdef" in BACKENDS:
+
+            def test_ensemble_mix_type(self):
+                for bs in (1, 8):
+                    iu.infer_exact(
+                        self,
+                        "mix_type",
+                        (bs, 16),
+                        bs,
+                        np.int32,
+                        np.float32,
+                        np.float32,
+                        use_http=USE_HTTP,
+                        use_grpc=USE_GRPC,
+                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+                    )
+
+        if all(x in BACKENDS for x in ["graphdef", "savedmodel"]):
+
+            def test_ensemble_mix_ensemble(self):
+                for bs in (1, 8):
+                    iu.infer_exact(
+                        self,
+                        "mix_ensemble",
+                        (bs, 16),
+                        bs,
+                        np.int32,
+                        np.float32,
+                        np.float32,
+                        use_http=USE_HTTP,
+                        use_grpc=USE_GRPC,
+                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+                    )
+
+        if all(
+            x in BACKENDS
+            for x in [
+                "graphdef",
+            ]
+        ):
+
+            def test_ensemble_mix_batch_nobatch(self):
+                base_names = ["batch_to_nobatch", "nobatch_to_batch"]
+                for name in base_names:
+                    for bs in (1, 8):
+                        iu.infer_exact(
+                            self,
+                            name,
+                            (bs, 16),
+                            bs,
+                            np.float32,
+                            np.float32,
+                            np.float32,
+                            use_http=USE_HTTP,
+                            use_grpc=USE_GRPC,
+                            use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                            use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+                        )
+                    iu.infer_exact(
+                        self,
+                        name + "_nobatch",
+                        (8, 16),
+                        1,
+                        np.float32,
+                        np.float32,
+                        np.float32,
+                        use_http=USE_HTTP,
+                        use_grpc=USE_GRPC,
+                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+                    )
+
+                # batch -> nobatch -> batch
+                for bs in (1, 8):
+                    iu.infer_exact(
+                        self,
+                        "mix_nobatch_batch",
+                        (bs, 16),
+                        bs,
+                        np.float32,
+                        np.float32,
+                        np.float32,
+                        use_http=USE_HTTP,
+                        use_grpc=USE_GRPC,
+                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+                    )
+
+        if not (TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY):
+
+            def test_ensemble_label_lookup(self):
+                if all(x in BACKENDS for x in ["graphdef", "savedmodel"]):
+                    # Ensemble needs to look up label from the actual model
+                    for bs in (1, 8):
+                        iu.infer_exact(
+                            self,
+                            "mix_platform",
+                            (bs, 16),
+                            bs,
+                            np.float32,
+                            np.float32,
+                            np.float32,
+                            output0_raw=False,
+                            output1_raw=False,
+                            use_http=USE_HTTP,
+                            use_grpc=USE_GRPC,
+                            use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                            use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+                        )
+
+                if all(x in BACKENDS for x in ["graphdef", "savedmodel"]):
+                    # Label from the actual model will be passed along the nested ensemble
+                    for bs in (1, 8):
+                        iu.infer_exact(
+                            self,
+                            "mix_ensemble",
+                            (bs, 16),
+                            bs,
+                            np.int32,
+                            np.float32,
+                            np.float32,
+                            output0_raw=False,
+                            output1_raw=False,
+                            use_http=USE_HTTP,
+                            use_grpc=USE_GRPC,
+                            use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                            use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+                        )
+
+                if "graphdef" in BACKENDS:
+                    # If label file is provided, it will use the provided label file directly
+                    try:
+                        iu.infer_exact(
+                            self,
+                            "wrong_label",
+                            (1, 16),
+                            1,
+                            np.int32,
+                            np.float32,
+                            np.float32,
+                            output0_raw=False,
+                            output1_raw=False,
+                            use_http=USE_HTTP,
+                            use_grpc=USE_GRPC,
+                            use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                            use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+                        )
+                    except AssertionError:
+                        # Sanity check that infer_exact failed since this ensemble is provided
+                        # with unexpected labels
+                        pass
 
-            iu.infer_exact(self, platform, tensor_shape, 1, True,
-                           np.float32, np.float32, np.float32,
-                           model_version=3, swap=True)
+                if "graphdef" in BACKENDS:
+                    for bs in (1, 8):
+                        iu.infer_exact(
+                            self,
+                            "label_override",
+                            (bs, 16),
+                            bs,
+                            np.int32,
+                            np.float32,
+                            np.float32,
+                            output0_raw=False,
+                            output1_raw=False,
+                            use_http=USE_HTTP,
+                            use_grpc=USE_GRPC,
+                            use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                            use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+                        )
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()
diff --git a/qa/L0_infer/install_and_test.sh b/qa/L0_infer/install_and_test.sh
new file mode 100755
index 0000000000..28e5dad52e
--- /dev/null
+++ b/qa/L0_infer/install_and_test.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# Note: This script is to be used with customized triton containers that need
+# dependencies to run L0_infer tests
+apt-get update && \
+    apt-get install -y --no-install-recommends \
+         curl \
+         jq \
+         python3 \
+         python3-pip
+pip3 install --upgrade pip
+# install client libraries
+pip3 install tritonclient[all]
+
+# Run the actual test
+bash -x test.sh
diff --git a/qa/L0_infer/test.sh b/qa/L0_infer/test.sh
index 50e9870497..bb1414eb78 100755
--- a/qa/L0_infer/test.sh
+++ b/qa/L0_infer/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+# Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -25,52 +25,417 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-CLIENT_LOG="./client.log"
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+ldconfig || true
+
+export CUDA_VISIBLE_DEVICES=0
+
+TEST_RESULT_FILE='test_results.txt'
+CLIENT_LOG_BASE="./client"
 INFER_TEST=infer_test.py
+SERVER_TIMEOUT=${SERVER_TIMEOUT:=600}
 
-DATADIR=/data/inferenceserver
+if [ -z "$TEST_SYSTEM_SHARED_MEMORY" ]; then
+    TEST_SYSTEM_SHARED_MEMORY="0"
+fi
 
-SERVER=/opt/tensorrtserver/bin/trtserver
-SERVER_ARGS=--model-store=$DATADIR/qa_model_repository
-SERVER_LOG="./inference_server.log"
-source ../common/util.sh
+if [ -z "$TEST_CUDA_SHARED_MEMORY" ]; then
+    TEST_CUDA_SHARED_MEMORY="0"
+fi
 
-run_server
-if [ "$SERVER_PID" == "0" ]; then
-    echo -e "\n***\n*** Failed to start $SERVER\n***"
-    cat $SERVER_LOG
-    exit 1
+if [ -z "$TEST_VALGRIND" ]; then
+    TEST_VALGRIND="0"
 fi
 
-RET=0
+if [ "$TEST_VALGRIND" -eq 1 ]; then
+    LEAKCHECK_LOG_BASE="./valgrind_test"
+    LEAKCHECK=/usr/bin/valgrind
+    LEAKCHECK_ARGS_BASE="--leak-check=full --show-leak-kinds=definite --max-threads=3000 --num-callers=20"
+    SERVER_TIMEOUT=4000
+    rm -f $LEAKCHECK_LOG_BASE*
+    # Remove 'python', 'python_dlpack' and 'onnx' from BACKENDS and test them
+    # separately below.
+    BACKENDS="graphdef savedmodel libtorch plan openvino"
+fi
+
+if [ "$TEST_SYSTEM_SHARED_MEMORY" -eq 1 ] || [ "$TEST_CUDA_SHARED_MEMORY" -eq 1 ]; then
+    EXPECTED_NUM_TESTS=${EXPECTED_NUM_TESTS:="33"}
+else
+    EXPECTED_NUM_TESTS=${EXPECTED_NUM_TESTS:="46"}
+fi
 
-set +e
+TF_VERSION=${TF_VERSION:=2}
+TEST_JETSON=${TEST_JETSON:=0}
 
-# python unittest seems to swallow ImportError and still return 0 exit
-# code. So need to explicitly check CLIENT_LOG to make sure we see
-# some running tests
-rm -f $CLIENT_LOG
-python $INFER_TEST >$CLIENT_LOG 2>&1
-if [ $? -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
+# Default size (in MB) of shared memory to be used by each python model
+# instance (Default is 1MB)
+DEFAULT_SHM_SIZE_MB=${DEFAULT_SHM_SIZE_MB:=1}
+DEFAULT_SHM_SIZE_BYTES=$((1024*1024*$DEFAULT_SHM_SIZE_MB))
+
+# On windows the paths invoked by the script (running in WSL) must use
+# /mnt/c when needed but the paths on the tritonserver command-line
+# must be C:/ style.
+if [[ "$(< /proc/sys/kernel/osrelease)" == *microsoft* ]]; then
+    MODELDIR=${MODELDIR:=C:/models}
+    DATADIR=${DATADIR:="/mnt/c/data/inferenceserver/${REPO_VERSION}"}
+    BACKEND_DIR=${BACKEND_DIR:=C:/tritonserver/backends}
+    SERVER=${SERVER:=/mnt/c/tritonserver/bin/tritonserver.exe}
+else
+    MODELDIR=${MODELDIR:=`pwd`/models}
+    DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"}
+    TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
+    SERVER=${TRITON_DIR}/bin/tritonserver
+    BACKEND_DIR=${TRITON_DIR}/backends
+
+    # PyTorch on SBSA requires libgomp to be loaded first. See the following
+    # GitHub issue for more information:
+    # https://github.com/pytorch/pytorch/issues/2575
+    arch=`uname -m`
+    if [ $arch = "aarch64" ]; then
+      SERVER_LD_PRELOAD=/usr/lib/$(uname -m)-linux-gnu/libgomp.so.1
+    fi
 fi
 
-grep -c "HTTP/1.1 200 OK" $CLIENT_LOG
-if [ $? -ne 0 ]; then
-    cat $CLIENT_LOG
+# Allow more time to exit. Ensemble brings in too many models
+SERVER_ARGS_EXTRA="--exit-timeout-secs=${SERVER_TIMEOUT} --backend-directory=${BACKEND_DIR} --backend-config=tensorflow,version=${TF_VERSION} --backend-config=python,stub-timeout-seconds=120 --backend-config=python,shm-default-byte-size=${DEFAULT_SHM_SIZE_BYTES}"
+SERVER_ARGS="--model-repository=${MODELDIR} ${SERVER_ARGS_EXTRA}"
+SERVER_LOG_BASE="./inference_server"
+source ../common/util.sh
+
+rm -f $SERVER_LOG_BASE* $CLIENT_LOG_BASE*
+
+RET=0
+
+# Verify the flag is set only on CPU-only device
+if [ "$TRITON_SERVER_CPU_ONLY" == "1" ]; then
+    gpu_count=`nvidia-smi -L | grep GPU | wc -l`
+    if [ "$gpu_count" -ne 0 ]; then
+    echo -e "\n***\n*** Running on a device with GPU\n***"
     echo -e "\n***\n*** Test Failed To Run\n***"
-    RET=1
+    exit 1
+    fi
 fi
 
-set -e
+# If BACKENDS not specified, set to all
+BACKENDS=${BACKENDS:="graphdef savedmodel onnx libtorch plan python python_dlpack openvino"}
+export BACKENDS
+
+# If ENSEMBLES not specified, set to 1
+ENSEMBLES=${ENSEMBLES:="1"}
+export ENSEMBLES
+
+# Test for both batch and nobatch models
+NOBATCH=${NOBATCH:="1"}
+export NOBATCH
+BATCH=${BATCH:="1"}
+export BATCH
+
+if [[ $BACKENDS == *"python_dlpack"* ]]; then
+    if [[ "aarch64" != $(uname -m) ]] ; then
+        pip3 install torch==1.13.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
+    else
+        pip3 install torch==1.13.0 -f https://download.pytorch.org/whl/torch_stable.html
+    fi
+fi
+
+function generate_model_repository() {
+    rm -fr models && mkdir models
+    for BACKEND in $BACKENDS; do
+      if [ "$BACKEND" == "python" ] || [ "$BACKEND" == "python_dlpack" ]; then
+        # We will be using ONNX models config.pbtxt and tweak them to make them
+        # appropriate for Python backend
+        onnx_models=`find ${DATADIR}/qa_model_repository/ -maxdepth 1 -type d -regex '.*onnx_.*'`
+
+        # Types that need to use SubAdd instead of AddSub
+        swap_types="float32 int32 int16 int8"
+        for onnx_model in $onnx_models; do
+          if [ "$BACKEND" == "python_dlpack" ]; then
+            python_model=`echo $onnx_model | sed 's/onnx/python_dlpack/g' | sed 's,'"$DATADIR/qa_model_repository/"',,g'`
+          else
+            python_model=`echo $onnx_model | sed 's/onnx/python/g' | sed 's,'"$DATADIR/qa_model_repository/"',,g'`
+          fi
+
+          mkdir -p models/$python_model/1/
+          # Remove platform and use Python as the backend
+          if [ "$BACKEND" == "python" ]; then
+            cat $onnx_model/config.pbtxt | sed 's/platform:.*//g' | sed 's/version_policy.*/backend:\ "python"/g' | sed 's/onnx/python/g' > models/$python_model/config.pbtxt
+          else
+            cat $onnx_model/config.pbtxt | sed 's/platform:.*//g' | sed 's/version_policy.*/backend:\ "python"/g' | sed 's/onnx/python_dlpack/g' > models/$python_model/config.pbtxt
+          fi
+          cp $onnx_model/output0_labels.txt models/$python_model
+
+          is_swap_type="0"
+
+          # Check whether this model needs to be swapped
+          for swap_type in $swap_types; do
+            model_type="$swap_type"_"$swap_type"_"$swap_type"
+            if [ "$BACKEND" == "python_dlpack" ]; then
+              model_name=python_dlpack_$model_type
+              model_name_nobatch=python_dlpack_nobatch_$model_type
+              if [ $python_model == $model_name ] || [ $python_model == $model_name_nobatch ]; then
+                  cp ../python_models/dlpack_sub_add/model.py models/$python_model/1/
+                  is_swap_type="1"
+              fi
+            else
+              model_name=python_$model_type
+              model_name_nobatch=python_nobatch_$model_type
+              if [ $python_model == $model_name ] || [ $python_model == $model_name_nobatch ]; then
+                  cp ../python_models/sub_add/model.py models/$python_model/1/
+                  is_swap_type="1"
+              fi
+            fi
+          done
 
-kill $SERVER_PID
-wait $SERVER_PID
+          # Use the AddSub model if it doesn't need to be swapped
+          if [ $is_swap_type == "0" ]; then
+            if [ "$BACKEND" == "python_dlpack" ]; then
+                    cp ../python_models/dlpack_add_sub/model.py models/$python_model/1/
+            else
+                    cp ../python_models/add_sub/model.py models/$python_model/1/
+            fi
+          fi
+        done
+      elif [ "$BACKEND" == "plan" ] && [ "$TRITON_SERVER_CPU_ONLY" == "1" ]; then
+        # skip plan_tensorrt models since they don't run on CPU only containers
+        continue
+      else
+        cp -r ${DATADIR}/qa_model_repository/${BACKEND}* \
+          models/.
+      fi
+    done
+
+    if [ "$ENSEMBLES" == "1" ]; then
+
+      # Copy identity backend models and ensembles
+      for BACKEND in $BACKENDS; do
+        if [ "$BACKEND" == "plan" ] && [ "$TRITON_SERVER_CPU_ONLY" == "1" ]; then
+            # skip plan_tensorrt models since they don't run on CPU only containers
+            continue
+        elif [ "$BACKEND" != "python" ] && [ "$BACKEND" != "python_dlpack" ] && [ "$BACKEND" != "openvino" ]; then
+            cp -r ${DATADIR}/qa_ensemble_model_repository/qa_model_repository/*${BACKEND}* \
+              models/.
+        fi
+      done
+
+      cp -r ${DATADIR}/qa_ensemble_model_repository/qa_model_repository/nop_* \
+        models/.
+
+      create_nop_version_dir `pwd`/models
+
+      if [[ $BACKENDS == *"graphdef"* ]]; then
+        ENSEMBLE_MODELS="wrong_label_int32_float32_float32 label_override_int32_float32_float32 mix_type_int32_float32_float32"
+
+        ENSEMBLE_MODELS="${ENSEMBLE_MODELS} batch_to_nobatch_float32_float32_float32 batch_to_nobatch_nobatch_float32_float32_float32 nobatch_to_batch_float32_float32_float32 nobatch_to_batch_nobatch_float32_float32_float32 mix_nobatch_batch_float32_float32_float32"
+
+        if [[ $BACKENDS == *"savedmodel"* ]] ; then
+          ENSEMBLE_MODELS="${ENSEMBLE_MODELS} mix_platform_float32_float32_float32 mix_ensemble_int32_float32_float32"
+        fi
+
+        for EM in $ENSEMBLE_MODELS; do
+          mkdir -p ../ensemble_models/$EM/1 && cp -r ../ensemble_models/$EM models/.
+        done
+      fi
+    fi
+
+    KIND="KIND_GPU" && [[ "$TARGET" == "cpu" ]] && KIND="KIND_CPU"
+    for FW in $BACKENDS; do
+      if [ "$FW" == "onnx" ] && [ "$TEST_VALGRIND" -eq 1 ]; then
+        # Reduce the instance count to make loading onnx models faster
+        for MC in `ls models/${FW}*/config.pbtxt`; do
+            echo "instance_group [ { kind: ${KIND} count: 1 }]" >> $MC
+        done
+      elif [ "$FW" != "plan" ] && [ "$FW" != "python" ] && [ "$FW" != "python_dlpack" ] && [ "$FW" != "openvino" ];then
+        for MC in `ls models/${FW}*/config.pbtxt`; do
+            echo "instance_group [ { kind: ${KIND} }]" >> $MC
+        done
+      elif [ "$FW" == "python" ] || [ "$FW" == "python_dlpack" ] || [ "$FW" == "openvino" ]; then
+        for MC in `ls models/${FW}*/config.pbtxt`; do
+            echo "instance_group [ { kind: KIND_CPU }]" >> $MC
+        done
+      fi
+    done
+
+    # Modify custom_zero_1_float32 and custom_nobatch_zero_1_float32 for relevant ensembles
+    # This is done after the instance group change above so that identity backend models
+    # are run on CPU. Skip for Windows test.
+    cp -r ../custom_models/custom_zero_1_float32 models/. &&\
+        mkdir -p models/custom_zero_1_float32/1 && \
+        (cd models/custom_zero_1_float32 && \
+            echo "instance_group [ { kind: KIND_CPU }]" >> config.pbtxt)
+    cp -r models/custom_zero_1_float32 models/custom_nobatch_zero_1_float32 && \
+        (cd models/custom_zero_1_float32 && \
+            sed -i "s/max_batch_size: 1/max_batch_size: 8/" config.pbtxt && \
+            sed -i "s/dims: \[ 1 \]/dims: \[ -1 \]/" config.pbtxt) && \
+        (cd models/custom_nobatch_zero_1_float32 && \
+            sed -i "s/custom_zero_1_float32/custom_nobatch_zero_1_float32/" config.pbtxt && \
+            sed -i "s/max_batch_size: 1/max_batch_size: 0/" config.pbtxt && \
+            sed -i "s/dims: \[ 1 \]/dims: \[ -1, -1 \]/" config.pbtxt)
+
+}
+
+for TARGET in cpu gpu; do
+    if [ "$TRITON_SERVER_CPU_ONLY" == "1" ]; then
+        if [ "$TARGET" == "gpu" ]; then
+            echo -e "Skip GPU testing on CPU-only device"
+            continue
+        fi
+    fi
+
+    SERVER_LOG=$SERVER_LOG_BASE.${TARGET}.log
+    CLIENT_LOG=$CLIENT_LOG_BASE.${TARGET}.log
+
+    generate_model_repository
+
+    # Check if running a memory leak check
+    if [ "$TEST_VALGRIND" -eq 1 ]; then
+        LEAKCHECK_LOG=$LEAKCHECK_LOG_BASE.${TARGET}.log
+        LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --log-file=$LEAKCHECK_LOG"
+        run_server_leakcheck
+    else
+        run_server
+    fi
+
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    set +e
+
+    python3 $INFER_TEST >$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        RET=1
+    else
+        check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Result Verification Failed\n***"
+            RET=1
+        fi
+    fi
+
+    set -e
+
+    kill_server
+
+    set +e
+    if [ "$TEST_VALGRIND" -eq 1 ]; then
+        python3 ../common/check_valgrind_log.py -f $LEAKCHECK_LOG
+        if [ $? -ne 0 ]; then
+            RET=1
+        fi
+    fi
+    set -e
+done
+
+# Run 'python', 'python_dlpack' and 'onnx' models separately in valgrind test.
+# Loading python and python_dlpack models has OOM issue when running with
+# valgrind, so loading only batch or nobatch models for each time.
+# Loading all the onnx models at once requires more than 12 hours. Loading them
+# separately to reduce the loading time.
+if [ "$TEST_VALGRIND" -eq 1 ]; then
+  TESTING_BACKENDS="python python_dlpack onnx"
+  EXPECTED_NUM_TESTS=42
+  if [[ "aarch64" != $(uname -m) ]] ; then
+      pip3 install torch==1.13.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
+  else
+      pip3 install torch==1.13.0 -f https://download.pytorch.org/whl/torch_stable.html
+  fi
+
+  for BACKENDS in $TESTING_BACKENDS; do
+    export BACKENDS
+    for TARGET in cpu gpu; do
+      rm -fr *models
+      generate_model_repository
+      mkdir nobatch_models
+      mv ./models/*nobatch_* ./nobatch_models/.
+      cp -fr ./models/nop_* ./nobatch_models/.
+
+      for BATCHING_MODE in batch nobatch; do
+        if [ "$TRITON_SERVER_CPU_ONLY" == "1" ]; then
+          if [ "$TARGET" == "gpu" ]; then
+              echo -e "Skip GPU testing on CPU-only device"
+              continue
+          fi
+        fi
+
+        SERVER_LOG=$SERVER_LOG_BASE.${TARGET}.${BACKENDS}.${BATCHING_MODE}.log
+        CLIENT_LOG=$CLIENT_LOG_BASE.${TARGET}.${BACKENDS}.${BATCHING_MODE}.log
+
+        if [ "$BATCHING_MODE" == "batch" ]; then
+          NOBATCH="0"
+          export NOBATCH
+          BATCH="1"
+          export BATCH
+          MODELDIR=`pwd`/models
+        else
+          NOBATCH="1"
+          export NOBATCH
+          BATCH="0"
+          export BATCH
+          MODELDIR=`pwd`/nobatch_models
+        fi
+
+        SERVER_ARGS="--model-repository=${MODELDIR} ${SERVER_ARGS_EXTRA}"
+        LEAKCHECK_LOG=$LEAKCHECK_LOG_BASE.${TARGET}.${BACKENDS}.${BATCHING_MODE}.log
+        LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --log-file=$LEAKCHECK_LOG"
+        run_server_leakcheck
+
+        if [ "$SERVER_PID" == "0" ]; then
+            echo -e "\n***\n*** Failed to start $SERVER\n***"
+            cat $SERVER_LOG
+            exit 1
+        fi
+
+        set +e
+
+        python3 $INFER_TEST >$CLIENT_LOG 2>&1
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            RET=1
+        else
+            check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+            if [ $? -ne 0 ]; then
+                cat $CLIENT_LOG
+                cat $TEST_RESULT_FILE
+                echo -e "\n***\n*** Test Result Verification Failed\n***"
+                RET=1
+            fi
+        fi
+
+        set -e
+
+        kill_server
+
+        set +e
+        python3 ../common/check_valgrind_log.py -f $LEAKCHECK_LOG
+        if [ $? -ne 0 ]; then
+            RET=1
+        fi
+        set -e
+      done
+    done
+  done
+fi
 
 if [ $RET -eq 0 ]; then
   echo -e "\n***\n*** Test Passed\n***"
+else
+  echo -e "\n***\n*** Test FAILED\n***"
 fi
 
 exit $RET
diff --git a/qa/L0_infer_reshape/infer_reshape_test.py b/qa/L0_infer_reshape/infer_reshape_test.py
new file mode 100755
index 0000000000..e77dcbecaf
--- /dev/null
+++ b/qa/L0_infer_reshape/infer_reshape_test.py
@@ -0,0 +1,348 @@
+#!/usr/bin/env python3
+
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import os
+import unittest
+
+import infer_util as iu
+import numpy as np
+import test_util as tu
+
+np_dtype_string = np.dtype(object)
+
+TEST_SYSTEM_SHARED_MEMORY = bool(int(os.environ.get("TEST_SYSTEM_SHARED_MEMORY", 0)))
+TEST_CUDA_SHARED_MEMORY = bool(int(os.environ.get("TEST_CUDA_SHARED_MEMORY", 0)))
+
+
+class InferReshapeTest(tu.TestResultCollector):
+    def _full_reshape(self, dtype, input_shapes, output_shapes=None, no_batch=True):
+        # 'shapes' is list of shapes, one for each input.
+        if output_shapes is None:
+            output_shapes = input_shapes
+
+        # For validation assume any shape can be used...
+        if tu.validate_for_tf_model(
+            dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]
+        ):
+            # model that supports batching
+            for bs in (1, 8):
+                full_shapes = [
+                    [
+                        bs,
+                    ]
+                    + input_shape
+                    for input_shape in input_shapes
+                ]
+                full_output_shapes = [
+                    [
+                        bs,
+                    ]
+                    + output_shape
+                    for output_shape in output_shapes
+                ]
+                iu.infer_zero(
+                    self,
+                    "graphdef",
+                    bs,
+                    dtype,
+                    full_shapes,
+                    full_output_shapes,
+                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+                )
+                iu.infer_zero(
+                    self,
+                    "savedmodel",
+                    bs,
+                    dtype,
+                    full_shapes,
+                    full_output_shapes,
+                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+                )
+            # model that does not support batching
+            if no_batch:
+                iu.infer_zero(
+                    self,
+                    "graphdef_nobatch",
+                    1,
+                    dtype,
+                    input_shapes,
+                    output_shapes,
+                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+                )
+                iu.infer_zero(
+                    self,
+                    "savedmodel_nobatch",
+                    1,
+                    dtype,
+                    input_shapes,
+                    output_shapes,
+                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+                )
+
+        if tu.validate_for_onnx_model(
+            dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]
+        ):
+            # model that supports batching
+            for bs in (1, 8):
+                full_shapes = [
+                    [
+                        bs,
+                    ]
+                    + input_shape
+                    for input_shape in input_shapes
+                ]
+                full_output_shapes = [
+                    [
+                        bs,
+                    ]
+                    + output_shape
+                    for output_shape in output_shapes
+                ]
+                iu.infer_zero(
+                    self,
+                    "onnx",
+                    bs,
+                    dtype,
+                    full_shapes,
+                    full_output_shapes,
+                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+                )
+            # model that does not support batching
+            if no_batch:
+                iu.infer_zero(
+                    self,
+                    "onnx_nobatch",
+                    1,
+                    dtype,
+                    input_shapes,
+                    output_shapes,
+                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+                )
+
+        if tu.validate_for_libtorch_model(
+            dtype,
+            dtype,
+            dtype,
+            input_shapes[0],
+            input_shapes[0],
+            input_shapes[0],
+            reshape=True,
+        ):
+            # skip variable size reshape on libtorch for now,
+            # see "gen_qa_reshape_model.py" for detail
+            if dtype != np.int32:
+                # model that does not support batching
+                # skip for libtorch string I/O
+                if no_batch and (dtype != np_dtype_string):
+                    iu.infer_zero(
+                        self,
+                        "libtorch_nobatch",
+                        1,
+                        dtype,
+                        input_shapes,
+                        output_shapes,
+                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+                    )
+
+                # model that supports batching
+                for bs in (1, 8):
+                    full_shapes = [
+                        [
+                            bs,
+                        ]
+                        + input_shape
+                        for input_shape in input_shapes
+                    ]
+                    full_output_shapes = [
+                        [
+                            bs,
+                        ]
+                        + output_shape
+                        for output_shape in output_shapes
+                    ]
+                    iu.infer_zero(
+                        self,
+                        "libtorch",
+                        bs,
+                        dtype,
+                        full_shapes,
+                        full_output_shapes,
+                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+                    )
+
+        for name in ["simple_reshape", "sequence_reshape", "fan_reshape"]:
+            # [TODO] Skip variable size reshape on ensemble for now.
+            # Need rework on how ensemble for reshape are generated
+            if dtype == np.int32:
+                break
+            if tu.validate_for_ensemble_model(
+                name,
+                dtype,
+                dtype,
+                dtype,
+                input_shapes[0],
+                input_shapes[0],
+                input_shapes[0],
+            ):
+                # model that supports batching
+                for bs in (1, 8):
+                    full_shapes = [
+                        [
+                            bs,
+                        ]
+                        + input_shape
+                        for input_shape in input_shapes
+                    ]
+                    full_output_shapes = [
+                        [
+                            bs,
+                        ]
+                        + output_shape
+                        for output_shape in output_shapes
+                    ]
+                    iu.infer_zero(
+                        self,
+                        name,
+                        bs,
+                        dtype,
+                        full_shapes,
+                        full_output_shapes,
+                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+                    )
+                # model that does not support batching
+                if no_batch:
+                    iu.infer_zero(
+                        self,
+                        name + "_nobatch",
+                        1,
+                        dtype,
+                        input_shapes,
+                        output_shapes,
+                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+                    )
+
+    def _trt_reshape(self, dtype, input_shapes, output_shapes=None, no_batch=True):
+        # 'shapes' is list of shapes, one for each input.
+        if output_shapes is None:
+            output_shapes = input_shapes
+
+        if tu.validate_for_trt_model(
+            dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]
+        ):
+            # model that supports batching
+            for bs in (1, 8):
+                full_shapes = [
+                    [
+                        bs,
+                    ]
+                    + input_shape
+                    for input_shape in input_shapes
+                ]
+                full_output_shapes = [
+                    [
+                        bs,
+                    ]
+                    + output_shape
+                    for output_shape in output_shapes
+                ]
+                iu.infer_zero(
+                    self,
+                    "plan",
+                    bs,
+                    dtype,
+                    full_shapes,
+                    full_output_shapes,
+                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+                )
+            # model that does not support batching
+            if no_batch:
+                iu.infer_zero(
+                    self,
+                    "plan_nobatch",
+                    1,
+                    dtype,
+                    input_shapes,
+                    output_shapes,
+                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+                )
+
+    def test_ff1(self):
+        self._full_reshape(np.float32, input_shapes=([1],), no_batch=False)
+
+    def test_ff2(self):
+        self._full_reshape(np.float32, input_shapes=([1], [8]), no_batch=False)
+        self._trt_reshape(np.float32, input_shapes=([1], [8]))
+
+    def test_ff3(self):
+        self._full_reshape(np.float32, input_shapes=([4, 4], [2], [2, 2, 3]))
+
+    def test_ff4(self):
+        self._full_reshape(
+            np.float32,
+            input_shapes=([4, 4], [2], [2, 2, 3], [1]),
+            output_shapes=([16], [1, 2], [3, 2, 2], [1]),
+        )
+        self._trt_reshape(
+            np.float32,
+            input_shapes=([4, 4], [2], [2, 2, 3], [1]),
+            output_shapes=([2, 2, 4], [1, 2, 1], [3, 2, 2], [1, 1, 1]),
+        )
+
+    def test_ii1(self):
+        self._full_reshape(np.int32, input_shapes=([2, 4, 5, 6],))
+
+    def test_ii2(self):
+        self._full_reshape(
+            np.int32, input_shapes=([4, 1], [2]), output_shapes=([1, 4], [1, 2])
+        )
+
+    def test_ii3(self):
+        self._full_reshape(np.int32, input_shapes=([1, 4, 1], [8], [2, 2, 3]))
+
+    def test_oo1(self):
+        self._full_reshape(np.object_, input_shapes=([1],), no_batch=False)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_infer_reshape/test.sh b/qa/L0_infer_reshape/test.sh
new file mode 100755
index 0000000000..218be954d9
--- /dev/null
+++ b/qa/L0_infer_reshape/test.sh
@@ -0,0 +1,117 @@
+#!/bin/bash
+# Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+CLIENT_LOG="./client.log"
+INFER_TEST=infer_reshape_test.py
+EXPECTED_NUM_TESTS="8"
+TEST_RESULT_FILE='test_results.txt'
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=`pwd`/models"
+SERVER_LOG="./inference_server.log"
+source ../common/util.sh
+
+rm -f $SERVER_LOG $CLIENT_LOG
+rm -fr models && mkdir models
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_reshape_model_repository/* models/. && \
+    cp -r /data/inferenceserver/${REPO_VERSION}/qa_ensemble_model_repository/qa_reshape_model_repository/* \
+       models/.
+for i in \
+        nobatch_zero_3_float32 \
+        nobatch_zero_4_float32 \
+        zero_1_float32 \
+        zero_2_float32 \
+        zero_3_float32 \
+        zero_4_float32 \
+        nobatch_zero_1_int32 \
+        nobatch_zero_2_int32 \
+        nobatch_zero_3_int32 \
+        zero_1_int32 \
+        zero_2_int32 \
+        zero_3_int32 ; do
+    cp -r models/graphdef_${i} models/custom_${i}
+    rm -fr models/custom_${i}/1/*
+    (cd models/custom_${i} && \
+                sed -i "s/^platform:.*/backend: \"identity\"/" config.pbtxt && \
+                sed -i "s/^name:.*/name: \"custom_${i}\"/" config.pbtxt && \
+                echo "instance_group [ { kind: KIND_CPU }]" >> config.pbtxt)
+done
+
+create_nop_version_dir `pwd`/models
+
+RET=0
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+# python unittest seems to swallow ImportError and still return 0
+# exit code. So need to explicitly check CLIENT_LOG to make sure
+# we see some running tests
+python $INFER_TEST >$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_infer_variable/infer_variable_test.py b/qa/L0_infer_variable/infer_variable_test.py
new file mode 100755
index 0000000000..e5e6470a3c
--- /dev/null
+++ b/qa/L0_infer_variable/infer_variable_test.py
@@ -0,0 +1,417 @@
+#!/usr/bin/env python3
+
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import os
+import unittest
+
+import infer_util as iu
+import numpy as np
+import test_util as tu
+
+np_dtype_string = np.dtype(object)
+
+TEST_SYSTEM_SHARED_MEMORY = bool(int(os.environ.get("TEST_SYSTEM_SHARED_MEMORY", 0)))
+TEST_CUDA_SHARED_MEMORY = bool(int(os.environ.get("TEST_CUDA_SHARED_MEMORY", 0)))
+
+
+class InferVariableTest(tu.TestResultCollector):
+    def _full_exact(
+        self,
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+        input_shape,
+        output0_shape,
+        output1_shape,
+        output0_raw=True,
+        output1_raw=True,
+        swap=False,
+    ):
+        def _infer_exact_helper(
+            tester,
+            pf,
+            tensor_shape,
+            batch_size,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            output0_raw=True,
+            output1_raw=True,
+            model_version=None,
+            swap=False,
+            outputs=("OUTPUT0", "OUTPUT1"),
+            use_http=True,
+            use_grpc=True,
+            skip_request_id_check=False,
+            use_streaming=True,
+            correlation_id=0,
+        ):
+            for bs in (1, batch_size):
+                # model that does not support batching
+                if bs == 1:
+                    iu.infer_exact(
+                        tester,
+                        pf + "_nobatch",
+                        tensor_shape,
+                        bs,
+                        input_dtype,
+                        output0_dtype,
+                        output1_dtype,
+                        output0_raw=output0_raw,
+                        output1_raw=output1_raw,
+                        model_version=model_version,
+                        swap=swap,
+                        outputs=outputs,
+                        use_http=use_http,
+                        use_grpc=use_grpc,
+                        skip_request_id_check=skip_request_id_check,
+                        use_streaming=use_streaming,
+                        correlation_id=correlation_id,
+                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+                    )
+
+                # model that supports batching. Skip for libtorch string I/O
+                elif pf == "libtorch" and tu.validate_for_libtorch_model(
+                    input_dtype,
+                    output0_dtype,
+                    output1_dtype,
+                    tensor_shape,
+                    tensor_shape,
+                    tensor_shape,
+                    bs,
+                ):
+                    iu.infer_exact(
+                        tester,
+                        pf,
+                        (bs,) + tensor_shape,
+                        bs,
+                        input_dtype,
+                        output0_dtype,
+                        output1_dtype,
+                        output0_raw=output0_raw,
+                        output1_raw=output1_raw,
+                        model_version=model_version,
+                        swap=swap,
+                        outputs=outputs,
+                        use_http=use_http,
+                        use_grpc=use_grpc,
+                        skip_request_id_check=skip_request_id_check,
+                        use_streaming=use_streaming,
+                        correlation_id=correlation_id,
+                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+                    )
+
+        all_ensemble_prefix = ["simple_", "sequence_", "fan_"]
+        ensemble_prefix = [""]
+        for prefix in all_ensemble_prefix:
+            if tu.validate_for_ensemble_model(
+                prefix,
+                input_dtype,
+                output0_dtype,
+                output1_dtype,
+                input_shape,
+                input_shape,
+                input_shape,
+            ):
+                ensemble_prefix.append(prefix)
+
+        if tu.validate_for_tf_model(
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            input_shape,
+            output0_shape,
+            output1_shape,
+        ):
+            for prefix in ensemble_prefix:
+                for pf in ["graphdef", "savedmodel"]:
+                    _infer_exact_helper(
+                        self,
+                        prefix + pf,
+                        input_shape,
+                        8,
+                        input_dtype,
+                        output0_dtype,
+                        output1_dtype,
+                        output0_raw=output0_raw,
+                        output1_raw=output1_raw,
+                        swap=swap,
+                    )
+
+        if tu.validate_for_trt_model(
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            input_shape,
+            output0_shape,
+            output1_shape,
+        ):
+            for prefix in ensemble_prefix:
+                if input_dtype == np.int8:
+                    _infer_exact_helper(
+                        self,
+                        prefix + "plan",
+                        input_shape + (1, 1),
+                        8,
+                        input_dtype,
+                        output0_dtype,
+                        output1_dtype,
+                        output0_raw=output0_raw,
+                        output1_raw=output1_raw,
+                        swap=swap,
+                    )
+                else:
+                    _infer_exact_helper(
+                        self,
+                        prefix + "plan",
+                        input_shape,
+                        8,
+                        input_dtype,
+                        output0_dtype,
+                        output1_dtype,
+                        output0_raw=output0_raw,
+                        output1_raw=output1_raw,
+                        swap=swap,
+                    )
+
+        if tu.validate_for_onnx_model(
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            input_shape,
+            output0_shape,
+            output1_shape,
+        ):
+            # No basic ensemble models are created against custom models [TODO]
+            _infer_exact_helper(
+                self,
+                "onnx",
+                input_shape,
+                8,
+                input_dtype,
+                output0_dtype,
+                output1_dtype,
+                output0_raw=output0_raw,
+                output1_raw=output1_raw,
+                swap=swap,
+            )
+
+        if tu.validate_for_libtorch_model(
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            input_shape,
+            output0_shape,
+            output1_shape,
+        ):
+            # No basic ensemble models are created against custom models [TODO]
+            _infer_exact_helper(
+                self,
+                "libtorch",
+                input_shape,
+                8,
+                input_dtype,
+                output0_dtype,
+                output1_dtype,
+                output0_raw=output0_raw,
+                output1_raw=output1_raw,
+                swap=swap,
+            )
+
+    def test_raw_fff(self):
+        self._full_exact(np.float32, np.float32, np.float32, (16,), (16,), (16,))
+
+    def test_raw_fii(self):
+        self._full_exact(np.float32, np.int32, np.int32, (2, 8), (2, 8), (2, 8))
+
+    def test_raw_fll(self):
+        self._full_exact(np.float32, np.int64, np.int64, (8, 4), (8, 4), (8, 4))
+
+    def test_raw_fil(self):
+        self._full_exact(
+            np.float32, np.int32, np.int64, (2, 8, 2), (2, 8, 2), (2, 8, 2)
+        )
+
+    def test_raw_ffi(self):
+        self._full_exact(np.float32, np.float32, np.int32, (16,), (16,), (16,))
+
+    def test_raw_iii(self):
+        self._full_exact(np.int32, np.int32, np.int32, (2, 8), (2, 8), (2, 8))
+
+    def test_faw_iif(self):
+        self._full_exact(
+            np.int32, np.int32, np.float32, (2, 8, 2), (2, 8, 2), (2, 8, 2)
+        )
+
+    def test_raw_ooo(self):
+        self._full_exact(
+            np_dtype_string, np_dtype_string, np_dtype_string, (16,), (16,), (16,)
+        )
+
+    def test_raw_oii(self):
+        self._full_exact(np_dtype_string, np.int32, np.int32, (2, 8), (2, 8), (2, 8))
+
+    def test_raw_ooi(self):
+        self._full_exact(
+            np_dtype_string, np_dtype_string, np.int32, (8, 4), (8, 4), (8, 4)
+        )
+
+    def test_raw_oio(self):
+        self._full_exact(
+            np_dtype_string, np.int32, np_dtype_string, (2, 8, 2), (2, 8, 2), (2, 8, 2)
+        )
+
+    def test_class_fff(self):
+        self._full_exact(
+            np.float32,
+            np.float32,
+            np.float32,
+            (16,),
+            (16,),
+            (16,),
+            output0_raw=False,
+            output1_raw=False,
+        )
+
+    def test_class_fii(self):
+        self._full_exact(
+            np.float32,
+            np.int32,
+            np.int32,
+            (2, 8),
+            (2, 8),
+            (2, 8),
+            output0_raw=False,
+            output1_raw=False,
+        )
+
+    def test_class_fll(self):
+        self._full_exact(
+            np.float32,
+            np.int64,
+            np.int64,
+            (8, 4),
+            (8, 4),
+            (8, 4),
+            output0_raw=False,
+            output1_raw=False,
+        )
+
+    def test_class_fil(self):
+        self._full_exact(
+            np.float32,
+            np.int32,
+            np.int64,
+            (2, 8, 2),
+            (2, 8, 2),
+            (2, 8, 2),
+            output0_raw=False,
+            output1_raw=False,
+        )
+
+    def test_class_ffi(self):
+        self._full_exact(
+            np.float32,
+            np.float32,
+            np.int32,
+            (16,),
+            (16,),
+            (16,),
+            output0_raw=False,
+            output1_raw=False,
+        )
+
+    def test_class_iii(self):
+        self._full_exact(
+            np.int32,
+            np.int32,
+            np.int32,
+            (2, 8),
+            (2, 8),
+            (2, 8),
+            output0_raw=False,
+            output1_raw=False,
+        )
+
+    def test_class_iif(self):
+        self._full_exact(
+            np.int32,
+            np.int32,
+            np.float32,
+            (2, 8, 2),
+            (2, 8, 2),
+            (2, 8, 2),
+            output0_raw=False,
+            output1_raw=False,
+        )
+
+    def test_mix_ffi(self):
+        self._full_exact(
+            np.float32,
+            np.float32,
+            np.int32,
+            (16,),
+            (16,),
+            (16,),
+            output0_raw=True,
+            output1_raw=False,
+        )
+
+    def test_mix_iii(self):
+        self._full_exact(
+            np.int32,
+            np.int32,
+            np.int32,
+            (2, 8),
+            (2, 8),
+            (2, 8),
+            output0_raw=False,
+            output1_raw=True,
+        )
+
+    def test_mix_iif(self):
+        self._full_exact(
+            np.int32,
+            np.int32,
+            np.float32,
+            (2, 8, 2),
+            (2, 8, 2),
+            (2, 8, 2),
+            output0_raw=True,
+            output1_raw=False,
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_infer_variable/test.sh b/qa/L0_infer_variable/test.sh
new file mode 100755
index 0000000000..9760583b94
--- /dev/null
+++ b/qa/L0_infer_variable/test.sh
@@ -0,0 +1,111 @@
+#!/bin/bash
+# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+CLIENT_LOG_BASE="./client"
+INFER_TEST=infer_variable_test.py
+EXPECTED_NUM_TESTS="21"
+TEST_RESULT_FILE='test_results.txt'
+
+DATADIR=`pwd`/models
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=$DATADIR --exit-timeout-secs=120"
+SERVER_LOG_BASE="./inference_server"
+source ../common/util.sh
+
+rm -f $SERVER_LOG_BASE* $CLIENT_LOG_BASE*
+
+RET=0
+
+for TARGET in cpu gpu; do
+    SERVER_LOG=$SERVER_LOG_BASE.${TARGET}.log
+    CLIENT_LOG=$CLIENT_LOG_BASE.${TARGET}.log
+
+    rm -fr models && \
+        cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository models && \
+        cp -r /data/inferenceserver/${REPO_VERSION}/qa_ensemble_model_repository/qa_variable_model_repository/* models/.
+
+    create_nop_version_dir `pwd`/models
+
+    KIND="KIND_GPU" && [[ "$TARGET" == "cpu" ]] && KIND="KIND_CPU"
+    # Onnx models are handled separately, see below
+    for FW in graphdef savedmodel onnx libtorch; do
+        for MC in `ls models/${FW}*/config.pbtxt`; do
+            echo "instance_group [ { kind: ${KIND} }]" >> $MC
+        done
+    done
+
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    set +e
+
+    python $INFER_TEST >$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    else
+        check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Result Verification Failed\n***"
+            RET=1
+        fi
+    fi
+
+
+    set -e
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+done
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_infer_zero/infer_zero_test.py b/qa/L0_infer_zero/infer_zero_test.py
new file mode 100755
index 0000000000..3786c5b4a1
--- /dev/null
+++ b/qa/L0_infer_zero/infer_zero_test.py
@@ -0,0 +1,338 @@
+#!/usr/bin/env python3
+
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import os
+import unittest
+
+import infer_util as iu
+import numpy as np
+import test_util as tu
+
+np_dtype_string = np.dtype(object)
+
+TEST_SYSTEM_SHARED_MEMORY = bool(int(os.environ.get("TEST_SYSTEM_SHARED_MEMORY", 0)))
+TEST_CUDA_SHARED_MEMORY = bool(int(os.environ.get("TEST_CUDA_SHARED_MEMORY", 0)))
+BACKENDS = os.environ.get("BACKENDS", "graphdef savedmodel onnx libtorch")
+VALIDATION_FNS = {
+    "onnx": tu.validate_for_onnx_model,
+    "graphdef": tu.validate_for_tf_model,
+    "savedmodel": tu.validate_for_tf_model,
+    "libtorch": tu.validate_for_libtorch_model,
+}
+
+
+class InferZeroTest(tu.TestResultCollector):
+    def _full_zero(self, dtype, shapes):
+        # 'shapes' is list of shapes, one for each input.
+        for backend in BACKENDS.split(" "):
+            # object models do not exist right now for PyTorch
+            if backend == "libtorch" and dtype == "object":
+                return
+
+            if not VALIDATION_FNS[backend](
+                dtype, dtype, dtype, shapes[0], shapes[0], shapes[0]
+            ):
+                return
+
+            for bs in (1, 8):
+                batch_shapes = [
+                    [
+                        bs,
+                    ]
+                    + shape
+                    for shape in shapes
+                ]
+                iu.infer_zero(
+                    self,
+                    backend,
+                    bs,
+                    dtype,
+                    batch_shapes,
+                    batch_shapes,
+                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+                )
+
+            # model that does not support batching
+            iu.infer_zero(
+                self,
+                f"{backend}_nobatch",
+                1,
+                dtype,
+                shapes,
+                shapes,
+                use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+            )
+
+        for name in ["simple_zero", "sequence_zero", "fan_zero"]:
+            if tu.validate_for_ensemble_model(
+                name, dtype, dtype, dtype, shapes[0], shapes[0], shapes[0]
+            ):
+                # model that supports batching
+                for bs in (1, 8):
+                    batch_shapes = [
+                        [
+                            bs,
+                        ]
+                        + shape
+                        for shape in shapes
+                    ]
+                    iu.infer_zero(
+                        self,
+                        name,
+                        bs,
+                        dtype,
+                        batch_shapes,
+                        batch_shapes,
+                        use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                        use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+                    )
+                # model that does not support batching
+                iu.infer_zero(
+                    self,
+                    name + "_nobatch",
+                    1,
+                    dtype,
+                    shapes,
+                    shapes,
+                    use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                    use_cuda_shared_memory=TEST_CUDA_SHARED_MEMORY,
+                )
+
+    def test_ff1_sanity(self):
+        self._full_zero(
+            np.float32,
+            (
+                [
+                    1,
+                ],
+            ),
+        )
+
+    def test_ff1(self):
+        self._full_zero(
+            np.float32,
+            (
+                [
+                    0,
+                ],
+            ),
+        )
+
+    def test_ff3_sanity(self):
+        self._full_zero(
+            np.float32,
+            (
+                [
+                    1,
+                ],
+                [
+                    2,
+                ],
+                [
+                    1,
+                ],
+            ),
+        )
+
+    def test_ff3_0(self):
+        self._full_zero(
+            np.float32,
+            (
+                [
+                    0,
+                ],
+                [
+                    0,
+                ],
+                [
+                    0,
+                ],
+            ),
+        )
+
+    def test_ff3_1(self):
+        self._full_zero(
+            np.float32,
+            (
+                [
+                    0,
+                ],
+                [
+                    0,
+                ],
+                [
+                    1,
+                ],
+            ),
+        )
+
+    def test_ff3_2(self):
+        self._full_zero(
+            np.float32,
+            (
+                [
+                    0,
+                ],
+                [
+                    1,
+                ],
+                [
+                    0,
+                ],
+            ),
+        )
+
+    def test_ff3_3(self):
+        self._full_zero(
+            np.float32,
+            (
+                [
+                    1,
+                ],
+                [
+                    0,
+                ],
+                [
+                    0,
+                ],
+            ),
+        )
+
+    def test_ff3_4(self):
+        self._full_zero(
+            np.float32,
+            (
+                [
+                    1,
+                ],
+                [
+                    0,
+                ],
+                [
+                    1,
+                ],
+            ),
+        )
+
+    def test_hh1_sanity(self):
+        self._full_zero(np.float16, ([2, 2],))
+
+    def test_hh1_0(self):
+        self._full_zero(np.float16, ([1, 0],))
+
+    def test_hh1_1(self):
+        self._full_zero(np.float16, ([0, 1],))
+
+    def test_hh1_2(self):
+        self._full_zero(np.float16, ([0, 0],))
+
+    def test_hh3_sanity(self):
+        self._full_zero(np.float16, ([2, 2], [2, 2], [1, 1]))
+
+    def test_hh3_0(self):
+        self._full_zero(np.float16, ([0, 0], [0, 0], [0, 0]))
+
+    def test_hh3_1(self):
+        self._full_zero(np.float16, ([0, 1], [0, 1], [2, 3]))
+
+    def test_hh3_2(self):
+        self._full_zero(np.float16, ([1, 0], [1, 3], [0, 1]))
+
+    def test_hh3_3(self):
+        self._full_zero(np.float16, ([1, 1], [3, 0], [0, 0]))
+
+    def test_hh3_4(self):
+        self._full_zero(np.float16, ([1, 1], [0, 6], [2, 2]))
+
+    def test_oo1_sanity(self):
+        self._full_zero(
+            np_dtype_string,
+            (
+                [
+                    2,
+                ],
+            ),
+        )
+
+    def test_oo1(self):
+        self._full_zero(
+            np_dtype_string,
+            (
+                [
+                    0,
+                ],
+            ),
+        )
+
+    def test_oo3_sanity(self):
+        self._full_zero(np_dtype_string, ([2, 2], [2, 2], [1, 1]))
+
+    def test_oo3_0(self):
+        self._full_zero(np_dtype_string, ([0, 0], [0, 0], [0, 0]))
+
+    def test_oo3_1(self):
+        self._full_zero(np_dtype_string, ([0, 1], [0, 1], [2, 3]))
+
+    def test_oo3_2(self):
+        self._full_zero(np_dtype_string, ([1, 0], [1, 3], [0, 1]))
+
+    def test_oo3_3(self):
+        self._full_zero(np_dtype_string, ([1, 1], [3, 0], [0, 0]))
+
+    def test_oo3_4(self):
+        self._full_zero(np_dtype_string, ([1, 1], [0, 6], [2, 2]))
+
+    def test_bb1_sanity(self):
+        self._full_zero(
+            bool,
+            (
+                [
+                    10,
+                ],
+            ),
+        )
+
+    def test_bb1_0(self):
+        self._full_zero(
+            bool,
+            (
+                [
+                    0,
+                ],
+            ),
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_infer_zero/test.sh b/qa/L0_infer_zero/test.sh
new file mode 100755
index 0000000000..02676b2f85
--- /dev/null
+++ b/qa/L0_infer_zero/test.sh
@@ -0,0 +1,101 @@
+#!/bin/bash
+# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+TEST_RESULT_FILE='test_results.txt'
+CLIENT_LOG="./client.log"
+INFER_TEST=infer_zero_test.py
+EXPECTED_NUM_TESTS="28"
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=`pwd`/models"
+SERVER_LOG="./inference_server.log"
+source ../common/util.sh
+
+rm -f $SERVER_LOG $CLIENT_LOG
+rm -fr models && mkdir models
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository/* models/. && \
+    cp -r /data/inferenceserver/${REPO_VERSION}/qa_ensemble_model_repository/qa_identity_model_repository/* models/.
+
+# Remove version-compatible TensorRT models, as they require version-compatibility
+# mode to be turned on when starting the server.
+rm -rf models/plan_compatible*
+
+create_nop_version_dir `pwd`/models
+
+RET=0
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+# python unittest seems to swallow ImportError and still return 0
+# exit code. So need to explicitly check CLIENT_LOG to make sure
+# we see some running tests
+python $INFER_TEST >$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_inferentia_perf_analyzer/test.sh b/qa/L0_inferentia_perf_analyzer/test.sh
new file mode 100755
index 0000000000..1881e07f87
--- /dev/null
+++ b/qa/L0_inferentia_perf_analyzer/test.sh
@@ -0,0 +1,220 @@
+#!/bin/bash
+# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# First need to set up environment
+if [ ${USE_TENSORFLOW} == "1" ] && [ ${USE_PYTORCH} == "1" ] ; then
+    echo " Unsupported test configuration. Only one of USE_TENSORFLOW and USE_PYTORCH can be set to 1."
+    exit 0
+elif [ ${USE_TENSORFLOW} == "1" ] ; then
+    echo "Setting up environment with tensorflow 1"
+    source ${TRITON_PATH}/python_backend/inferentia/scripts/setup.sh -t --tensorflow-version 1
+elif [ ${USE_PYTORCH} == "1" ] ; then
+    echo "Setting up environment with pytorch"
+    source ${TRITON_PATH}/python_backend/inferentia/scripts/setup.sh -p
+else
+    echo " Unsupported test configuration. USE_TENSORFLOW flag is: ${USE_TENSORFLOW} and USE_PYTORCH flag is: ${USE_PYTORCH}. Only one of them can be set to 1."
+    exit 0
+fi
+echo "done setting up environment"
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+
+CLIENT_LOG="./perf_analyzer.log"
+PERF_ANALYZER=/opt/tritonserver/qa/clients/perf_analyzer
+
+OUTPUT_NO_BATCH_JSONDATAFILE=${TEST_JSON_REPO}/validation_no_batch.json
+OUTPUT_BATCHED_JSONDATAFILE=${TEST_JSON_REPO}/validation_batched.json
+NON_ALIGNED_OUTPUT_NO_BATCH_JSONDATAFILE=${TEST_JSON_REPO}/non_aligned_validation_no_batch.json
+NON_ALIGNED_OUTPUT_BATCHED_JSONDATAFILE=${TEST_JSON_REPO}/non_aligned_validation_batched.json
+WRONG_OUTPUT_NO_BATCH_JSONDATAFILE=${TEST_JSON_REPO}/wrong_validation_no_batch.json
+WRONG_OUTPUT_BATCHED_JSONDATAFILE=${TEST_JSON_REPO}/wrong_validation_batched.json
+
+ERROR_STRING="error | Request count: 0 | : 0 infer/sec"
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_LOG="./inference_server.log"
+source /opt/tritonserver/qa/common/util.sh
+TEST_TYPES="single multiple"
+BATCHED_FLAGS="_ _batched_"
+DISABLE_DEFAULT_BATCHING_FLAGS="_default_batch _no_batch"
+# Helper function for clearing out existing model directories
+function clear_model_dir () {
+    for DISABLE_DEFAULT_BATCHING_FLAG in ${DISABLE_DEFAULT_BATCHING_FLAGS}; do
+        for BATCHED_FLAG in ${BATCHED_FLAGS}; do
+            for TEST_TYPE in ${TEST_TYPES}; do
+                DATADIR="${TRITON_PATH}/models_${TEST_TYPE}${BATCHED_FLAG}${TEST_FRAMEWORK}${DISABLE_DEFAULT_BATCHING_FLAG}"
+                rm -rf DATADIR
+            done
+        done
+    done
+}
+# Helper function for generating models
+function create_inferentia_models () {
+    for DISABLE_DEFAULT_BATCHING_FLAG in ${DISABLE_DEFAULT_BATCHING_FLAGS}; do
+        for BATCHED_FLAG in ${BATCHED_FLAGS}; do
+            for TEST_TYPE in ${TEST_TYPES}; do
+                CURR_GEN_SCRIPT="${GEN_SCRIPT} --model_type ${MODEL_TYPE}
+                --triton_model_dir ${TRITON_PATH}/models_${TEST_TYPE}${BATCHED_FLAG}${TEST_FRAMEWORK}${DISABLE_DEFAULT_BATCHING_FLAG}/add-sub-1x4
+                --compiled_model ${COMPILED_MODEL}"
+                if [ ${DISABLE_DEFAULT_BATCHING_FLAG} == "_no_batch" ]; then
+                    CURR_GEN_SCRIPT="${CURR_GEN_SCRIPT}
+                    --disable_batch_requests_to_neuron"
+                fi
+                if [ ${BATCHED_FLAG} == "_batched_" ]; then
+                    CURR_GEN_SCRIPT="${CURR_GEN_SCRIPT}
+                    --triton_input INPUT__0,INT64,4 INPUT__1,INT64,4
+                    --triton_output OUTPUT__0,INT64,4 OUTPUT__1,INT64,4
+                    --enable_dynamic_batching
+                    --max_batch_size 1000
+                    --preferred_batch_size 8
+                    --max_queue_delay_microseconds 100"
+                else
+                    CURR_GEN_SCRIPT="${CURR_GEN_SCRIPT}
+                    --triton_input INPUT__0,INT64,-1x4 INPUT__1,INT64,-1x4
+                    --triton_output OUTPUT__0,INT64,-1x4 OUTPUT__1,INT64,-1x4"
+                fi
+                if [ ${TEST_TYPE} == "single" ]; then
+                    CURR_GEN_SCRIPT="${CURR_GEN_SCRIPT}
+                    --neuron_core_range 0:0"
+                elif [ ${TEST_TYPE} == "multiple" ]; then
+                    CURR_GEN_SCRIPT="${CURR_GEN_SCRIPT}
+                    --triton_model_instance_count 3
+                    --neuron_core_range 0:7"
+                fi
+                echo ${CURR_GEN_SCRIPT}
+                eval ${CURR_GEN_SCRIPT}
+            done
+        done
+    done
+}
+
+# Setup models
+if [ ${USE_TENSORFLOW} == "1" ]; then
+    TEST_FRAMEWORK="tf1"
+    clear_model_dir
+    python ${TEST_JSON_REPO}/simple_model.py \
+        --name add_sub_model_tf1 \
+        --model_type tensorflow \
+        --tf_version 1 \
+        --batch_size 1
+    GEN_SCRIPT="python ${TRITON_PATH}/python_backend/inferentia/scripts/gen_triton_model.py"
+    MODEL_TYPE="tensorflow"
+    COMPILED_MODEL="${PWD}/add_sub_model_tf1"
+    create_inferentia_models
+
+elif [ ${USE_PYTORCH} == "1" ]; then
+    TEST_FRAMEWORK="pyt"
+    clear_model_dir
+    python ${TEST_JSON_REPO}/simple_model.py \
+        --name add_sub_model_pyt \
+        --model_type pytorch \
+        --batch_size 1
+    GEN_SCRIPT="python ${TRITON_PATH}/python_backend/inferentia/scripts/gen_triton_model.py"
+    MODEL_TYPE="pytorch"
+    COMPILED_MODEL="$PWD/add_sub_model_pyt.pt"
+    create_inferentia_models
+fi
+
+
+RET=0
+for DISABLE_DEFAULT_BATCHING_FLAG in ${DISABLE_DEFAULT_BATCHING_FLAGS}; do
+    for BATCHED_FLAG in ${BATCHED_FLAGS}; do
+        for TEST_TYPE in $TEST_TYPES; do
+            DATADIR="${TRITON_PATH}/models_${TEST_TYPE}${BATCHED_FLAG}${TEST_FRAMEWORK}${DISABLE_DEFAULT_BATCHING_FLAG}"
+            SERVER_ARGS="--model-repository=${DATADIR} --log-verbose=1"
+            PERF_ANALYZER_EXTRA_ARGS=""
+            if [ ${BATCHED_FLAG} == "_batched_" ]; then
+                PERF_ANALYZER_EXTRA_ARGS="-b 6"
+                NON_ALIGNED_OUTPUT_JSONDATAFILE=${NON_ALIGNED_OUTPUT_BATCHED_JSONDATAFILE}
+                WRONG_OUTPUT_JSONDATAFILE=${WRONG_OUTPUT_BATCHED_JSONDATAFILE}
+                OUTPUT_JSONDATAFILE=${OUTPUT_BATCHED_JSONDATAFILE}
+            else
+                PERF_ANALYZER_EXTRA_ARGS=""
+                NON_ALIGNED_OUTPUT_JSONDATAFILE=${NON_ALIGNED_OUTPUT_NO_BATCH_JSONDATAFILE}
+                WRONG_OUTPUT_JSONDATAFILE=${WRONG_OUTPUT_NO_BATCH_JSONDATAFILE}
+                OUTPUT_JSONDATAFILE=${OUTPUT_NO_BATCH_JSONDATAFILE}
+            fi
+            rm -f $SERVER_LOG $CLIENT_LOG
+
+            run_server
+            if [ "$SERVER_PID" == "0" ]; then
+                echo -e "\n***\n*** Failed to start $SERVER\n***"
+                cat $SERVER_LOG
+                exit 1
+            fi
+            set +e
+            $PERF_ANALYZER -v -m add-sub-1x4 --concurrency-range 1:10:4 --input-data=${NON_ALIGNED_OUTPUT_JSONDATAFILE} ${PERF_ANALYZER_EXTRA_ARGS} >$CLIENT_LOG 2>&1
+            if [ $? -eq 0 ]; then
+                cat $CLIENT_LOG
+                echo -e "\n***\n*** Test Failed\n***"
+                RET=1
+            fi
+            if [ $(cat $CLIENT_LOG |  grep "The 'validation_data' field doesn't align with 'data' field in the json file" | wc -l) -eq 0 ]; then
+                cat $CLIENT_LOG
+                echo -e "\n***\n*** Test Failed\n***"
+                RET=1
+            fi
+
+            $PERF_ANALYZER -v -m add-sub-1x4 --concurrency-range 1:10:4 --input-data=${WRONG_OUTPUT_JSONDATAFILE} ${PERF_ANALYZER_EXTRA_ARGS} >$CLIENT_LOG 2>&1
+            if [ $? -eq 0 ]; then
+                cat $CLIENT_LOG
+                echo -e "\n***\n*** Test Failed\n***"
+                RET=1
+            fi
+            if [ $(cat $CLIENT_LOG |  grep "Output doesn't match expected output" | wc -l) -eq 0 ]; then
+                cat $CLIENT_LOG
+                echo -e "\n***\n*** Test Failed\n***"
+                RET=1
+            fi
+
+            $PERF_ANALYZER -v -m add-sub-1x4 --concurrency-range 1:10:4 --input-data=${OUTPUT_JSONDATAFILE} ${PERF_ANALYZER_EXTRA_ARGS} >$CLIENT_LOG 2>&1
+            if [ $? -ne 0 ]; then
+                cat $CLIENT_LOG
+                echo -e "\n***\n*** Test Failed\n***"
+                RET=1
+            fi
+            if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+                cat $CLIENT_LOG
+                echo -e "\n***\n*** Test Failed\n***"
+                RET=1
+            fi
+            set -e
+            kill_server
+        done
+    done
+done
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+else
+  echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_input_validation/input_validation_test.py b/qa/L0_input_validation/input_validation_test.py
new file mode 100755
index 0000000000..afd791b527
--- /dev/null
+++ b/qa/L0_input_validation/input_validation_test.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python
+# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import unittest
+
+import numpy as np
+import tritonclient.grpc as tritongrpcclient
+from tritonclient.utils import InferenceServerException
+
+
+class InputValTest(unittest.TestCase):
+    def test_input_validation_required_empty(self):
+        triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
+        inputs = []
+        with self.assertRaises(InferenceServerException) as e:
+            triton_client.infer(
+                model_name="input_all_required",
+                inputs=inputs,
+            )
+        err_str = str(e.exception)
+        self.assertIn(
+            "expected 3 inputs but got 0 inputs for model 'input_all_required'. Got input(s) [], but missing required input(s) ['INPUT0','INPUT1','INPUT2']. Please provide all required input(s).",
+            err_str,
+        )
+
+    def test_input_validation_optional_empty(self):
+        triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
+        inputs = []
+        with self.assertRaises(InferenceServerException) as e:
+            triton_client.infer(
+                model_name="input_optional",
+                inputs=inputs,
+            )
+        err_str = str(e.exception)
+        self.assertIn(
+            "expected number of inputs between 3 and 4 but got 0 inputs for model 'input_optional'. Got input(s) [], but missing required input(s) ['INPUT0','INPUT1','INPUT2']. Please provide all required input(s).",
+            err_str,
+        )
+
+    def test_input_validation_required_missing(self):
+        triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
+        inputs = []
+        inputs.append(tritongrpcclient.InferInput("INPUT0", [1], "FP32"))
+
+        inputs[0].set_data_from_numpy(np.arange(1, dtype=np.float32))
+
+        with self.assertRaises(InferenceServerException) as e:
+            triton_client.infer(
+                model_name="input_all_required",
+                inputs=inputs,
+            )
+        err_str = str(e.exception)
+        self.assertIn(
+            "expected 3 inputs but got 1 inputs for model 'input_all_required'. Got input(s) ['INPUT0'], but missing required input(s) ['INPUT1','INPUT2']. Please provide all required input(s).",
+            err_str,
+        )
+
+    def test_input_validation_optional(self):
+        triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
+        inputs = []
+        inputs.append(tritongrpcclient.InferInput("INPUT0", [1], "FP32"))
+        # Option Input is added, 2 required are missing
+
+        inputs[0].set_data_from_numpy(np.arange(1, dtype=np.float32))
+
+        with self.assertRaises(InferenceServerException) as e:
+            triton_client.infer(
+                model_name="input_optional",
+                inputs=inputs,
+            )
+        err_str = str(e.exception)
+        self.assertIn(
+            "expected number of inputs between 3 and 4 but got 1 inputs for model 'input_optional'. Got input(s) ['INPUT0'], but missing required input(s) ['INPUT1','INPUT2']. Please provide all required input(s).",
+            err_str,
+        )
+
+    def test_input_validation_all_optional(self):
+        triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
+        inputs = []
+        result = triton_client.infer(
+            model_name="input_all_optional",
+            inputs=inputs,
+        )
+        response = result.get_response()
+        self.assertIn(str(response.outputs[0].name), "OUTPUT0")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_input_validation/models/input_all_optional/1/model.py b/qa/L0_input_validation/models/input_all_optional/1/model.py
new file mode 100644
index 0000000000..40f8b25579
--- /dev/null
+++ b/qa/L0_input_validation/models/input_all_optional/1/model.py
@@ -0,0 +1,47 @@
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    def initialize(self, args):
+        self.model_config = json.loads(args["model_config"])
+
+    def execute(self, requests):
+        """This function is called on inference request."""
+
+        responses = []
+        for _ in requests:
+            # Include one of each specially parsed JSON value: nan, inf, and -inf
+            out_0 = np.array([1], dtype=np.float32)
+            out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0)
+            responses.append(pb_utils.InferenceResponse([out_tensor_0]))
+
+        return responses
diff --git a/qa/L0_input_validation/models/input_all_optional/config.pbtxt b/qa/L0_input_validation/models/input_all_optional/config.pbtxt
new file mode 100644
index 0000000000..24e8259070
--- /dev/null
+++ b/qa/L0_input_validation/models/input_all_optional/config.pbtxt
@@ -0,0 +1,58 @@
+# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "input_all_optional"
+backend: "python"
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+    optional: true
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+    optional: true
+  },
+  {
+    name: "INPUT2"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+    optional: true
+  }
+]
+
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+
+instance_group [{ kind: KIND_CPU }]
diff --git a/qa/L0_input_validation/models/input_all_required/1/model.py b/qa/L0_input_validation/models/input_all_required/1/model.py
new file mode 100644
index 0000000000..40f8b25579
--- /dev/null
+++ b/qa/L0_input_validation/models/input_all_required/1/model.py
@@ -0,0 +1,47 @@
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    def initialize(self, args):
+        self.model_config = json.loads(args["model_config"])
+
+    def execute(self, requests):
+        """This function is called on inference request."""
+
+        responses = []
+        for _ in requests:
+            # Include one of each specially parsed JSON value: nan, inf, and -inf
+            out_0 = np.array([1], dtype=np.float32)
+            out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0)
+            responses.append(pb_utils.InferenceResponse([out_tensor_0]))
+
+        return responses
diff --git a/qa/L0_input_validation/models/input_all_required/config.pbtxt b/qa/L0_input_validation/models/input_all_required/config.pbtxt
new file mode 100644
index 0000000000..e1268ff210
--- /dev/null
+++ b/qa/L0_input_validation/models/input_all_required/config.pbtxt
@@ -0,0 +1,55 @@
+# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "input_all_required"
+backend: "python"
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  },
+  {
+    name: "INPUT2"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+
+instance_group [{ kind: KIND_CPU }]
diff --git a/qa/L0_input_validation/models/input_optional/1/model.py b/qa/L0_input_validation/models/input_optional/1/model.py
new file mode 100644
index 0000000000..40f8b25579
--- /dev/null
+++ b/qa/L0_input_validation/models/input_optional/1/model.py
@@ -0,0 +1,47 @@
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    def initialize(self, args):
+        self.model_config = json.loads(args["model_config"])
+
+    def execute(self, requests):
+        """This function is called on inference request."""
+
+        responses = []
+        for _ in requests:
+            # Include one of each specially parsed JSON value: nan, inf, and -inf
+            out_0 = np.array([1], dtype=np.float32)
+            out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0)
+            responses.append(pb_utils.InferenceResponse([out_tensor_0]))
+
+        return responses
diff --git a/qa/L0_input_validation/models/input_optional/config.pbtxt b/qa/L0_input_validation/models/input_optional/config.pbtxt
new file mode 100644
index 0000000000..2fe7183a16
--- /dev/null
+++ b/qa/L0_input_validation/models/input_optional/config.pbtxt
@@ -0,0 +1,61 @@
+# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "input_optional"
+backend: "python"
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  },
+  {
+    name: "INPUT2"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  },
+  {
+    name: "INPUT3"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+    optional: true
+  }
+]
+
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+
+instance_group [{ kind: KIND_CPU }]
diff --git a/qa/L0_input_validation/test.sh b/qa/L0_input_validation/test.sh
new file mode 100755
index 0000000000..1c66c2bbaa
--- /dev/null
+++ b/qa/L0_input_validation/test.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+source ../common/util.sh
+
+RET=0
+
+CLIENT_LOG="./input_validation_client.log"
+TEST_PY=./input_validation_test.py
+TEST_RESULT_FILE='./test_results.txt'
+
+export CUDA_VISIBLE_DEVICES=0
+
+rm -fr *.log
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=`pwd`/models"
+SERVER_LOG="./inference_server.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python3 -m pytest --junitxml="input_validation.report.xml" $TEST_PY >> $CLIENT_LOG 2>&1
+
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** python_unittest.py FAILED. \n***"
+    RET=1
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Input Validation Test Passed\n***"
+else
+    cat $CLIENT_LOG
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Input Validation Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_io/test.sh b/qa/L0_io/test.sh
new file mode 100755
index 0000000000..84ab4fb0c0
--- /dev/null
+++ b/qa/L0_io/test.sh
@@ -0,0 +1,266 @@
+#!/bin/bash
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0,1,2,3
+
+IO_TEST_UTIL=./memory_alloc
+CLIENT_LOG="./client.log"
+MODELSDIR=`pwd`/models
+
+DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_model_repository
+ENSEMBLEDIR=/data/inferenceserver/${REPO_VERSION}/qa_ensemble_model_repository/qa_model_repository
+
+# Must explicitly set LD_LIBRARY_PATH so that IO_TEST_UTIL can find
+# libtritonserver.so.
+LD_LIBRARY_PATH=/opt/tritonserver/lib:$LD_LIBRARY_PATH
+
+rm -f $CLIENT_LOG*
+
+# PyTorch is required for the Python backend dlpack add sub models
+pip3 install torch==1.13.0+cu117 -f https://download.pytorch.org/whl/torch_stable.html
+RET=0
+
+# Prepare float32 models with basic config
+rm -rf $MODELSDIR
+
+for trial in graphdef savedmodel onnx libtorch plan python python_dlpack; do
+    full=${trial}_float32_float32_float32
+    if [ "$trial" == "python" ]; then
+        mkdir -p $MODELSDIR/${full}/1 && \
+            cp ../python_models/add_sub/model.py $MODELSDIR/${full}/1/. && \
+            cp ../python_models/add_sub/config.pbtxt $MODELSDIR/${full}/. && \
+            (cd $MODELSDIR/${full} && \
+                    sed -i "s/label_filename:.*//" config.pbtxt && \
+                    echo "max_batch_size: 64" >> config.pbtxt)
+
+        # ensemble version of the model.
+        mkdir -p $MODELSDIR/fan_${full}/1 && \
+            cp ../python_models/add_sub/model.py $MODELSDIR/fan_${full}/1/. && \
+            cp ../python_models/fan_add_sub/config.pbtxt $MODELSDIR/fan_${full}/. && \
+            (cd $MODELSDIR/fan_${full} && \
+                    sed -i "s/label_filename:.*//" config.pbtxt && \
+                    sed -i "s/model_name: \"ENSEMBLE_MODEL_NAME\"/model_name: \"${full}\"/" config.pbtxt && \
+                    sed -i "0,/name:.*/{s/name:.*/name: \"fan_${full}\"/}" config.pbtxt && \
+                    echo "max_batch_size: 64" >> config.pbtxt)
+        continue
+    fi
+
+    if [ "$trial" == "python_dlpack" ]; then
+        mkdir -p $MODELSDIR/${full}/1 && \
+            cp ../python_models/dlpack_add_sub/model.py $MODELSDIR/${full}/1/. && \
+            cp ../python_models/dlpack_add_sub/config.pbtxt $MODELSDIR/${full}/. && \
+            (cd $MODELSDIR/${full} && \
+                    sed -i "s/label_filename:.*//" config.pbtxt && \
+                    sed -i "0,/name:.*/{s/name:.*/name: \"${full}\"/}" config.pbtxt && \
+                    echo "max_batch_size: 64" >> config.pbtxt)
+
+        # ensemble version of the model.
+        mkdir -p $MODELSDIR/fan_${full}/1 && \
+            cp ../python_models/dlpack_add_sub/model.py $MODELSDIR/fan_${full}/1/. && \
+            cp ../python_models/fan_add_sub/config.pbtxt $MODELSDIR/fan_${full}/. && \
+            (cd $MODELSDIR/fan_${full} && \
+                    sed -i "s/label_filename:.*//" config.pbtxt && \
+                    sed -i "s/model_name: \"ENSEMBLE_MODEL_NAME\"/model_name: \"${full}\"/" config.pbtxt && \
+                    sed -i "0,/name:.*/{s/name:.*/name: \"fan_${full}\"/}" config.pbtxt && \
+                    echo "max_batch_size: 64" >> config.pbtxt)
+        continue
+    fi
+
+    mkdir -p $MODELSDIR/${full}/1 && \
+        cp -r $DATADIR/${full}/1/* $MODELSDIR/${full}/1/. && \
+        cp $DATADIR/${full}/config.pbtxt $MODELSDIR/${full}/. && \
+        (cd $MODELSDIR/${full} && \
+                sed -i "s/label_filename:.*//" config.pbtxt && \
+                echo "instance_group [{ kind: KIND_CPU }]" >> config.pbtxt)
+
+    # ensemble version of the model.
+    mkdir -p $MODELSDIR/fan_${full}/1 && \
+    cp $ENSEMBLEDIR/fan_${full}/config.pbtxt $MODELSDIR/fan_${full}/. && \
+        (cd $MODELSDIR/fan_${full} && \
+                sed -i "s/label_filename:.*//" config.pbtxt)
+
+    if [ "$trial" == "libtorch" ]; then
+        (cd $MODELSDIR/fan_${full} && \
+                sed -i -e '{
+                    N
+                    s/key: "OUTPUT\([0-9]\)"\n\(.*\)value: "same_output/key: "OUTPUT__\1"\n\2value: "same_output/
+                }' config.pbtxt)
+    fi
+done
+
+# Prepare string models with basic config
+for trial in graphdef savedmodel onnx ; do
+    full=${trial}_object_object_object
+    mkdir -p $MODELSDIR/${full}/1 && \
+        cp -r $DATADIR/${full}/1/* $MODELSDIR/${full}/1/. && \
+        cp $DATADIR/${full}/config.pbtxt $MODELSDIR/${full}/. && \
+                (cd $MODELSDIR/${full} && \
+                sed -i "s/label_filename:.*//" config.pbtxt && \
+                echo "instance_group [{ kind: KIND_CPU }]" >> config.pbtxt)
+done
+
+# set up "addsub" ensemble for custom float32 model
+cp -r $MODELSDIR/fan_graphdef_float32_float32_float32 $MODELSDIR/fan_${full} && \
+    (cd $MODELSDIR/fan_${full} && \
+            sed -i "s/graphdef_float32_float32_float32/${full}/" config.pbtxt)
+
+# custom float32 component of ensemble
+cp -r $ENSEMBLEDIR/nop_TYPE_FP32_-1 $MODELSDIR/. && \
+    mkdir -p $MODELSDIR/nop_TYPE_FP32_-1/1
+
+# prepare libtorch multi-device and multi-gpu models
+cp -r ../L0_libtorch_instance_group_kind_model/models/libtorch_multi_device $MODELSDIR/.
+cp ../L0_libtorch_instance_group_kind_model/gen_models.py ./gen_libtorch_model.py
+mkdir -p $MODELSDIR/libtorch_multi_device/1
+mkdir -p $MODELSDIR/libtorch_multi_gpu/1
+cp $MODELSDIR/libtorch_multi_device/config.pbtxt $MODELSDIR/libtorch_multi_gpu/.
+(cd $MODELSDIR/libtorch_multi_gpu && \
+    sed -i "s/name: \"libtorch_multi_device\"/name: \"libtorch_multi_gpu\"/" config.pbtxt)
+
+set +e
+python3 gen_libtorch_model.py >> $CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Error when generating libtorch models. \n***"
+    cat $CLIENT_LOG
+    exit 1
+fi
+set -e
+
+TRIALS="graphdef savedmodel onnx libtorch plan python python_dlpack libtorch_multi_gpu libtorch_multi_device"
+for input_device in -1 0 1; do
+    for output_device in -1 0 1; do
+        for trial in ${TRIALS}; do
+            # TensorRT Plan should only be deployed on GPU device
+            model_devices="-1 0 1" && [[ "$trial" == "plan" ]] && model_devices="0 1"
+            full=${trial}_float32_float32_float32 && [[ "$trial" == "libtorch_multi"* ]] && full=${trial}
+
+            for model_device in $model_devices; do
+                full_log=$CLIENT_LOG.$full.$input_device.$output_device.$model_device
+
+                host_policy=cpu
+                if [ "$model_device" == "-1" ]; then
+                    if [[ "$trial" != "libtorch_multi"* ]]; then
+                        (cd $MODELSDIR/${full} && \
+                            sed -i "s/instance_group.*/instance_group [{ kind: KIND_CPU }]/" config.pbtxt)
+                    fi
+                else
+                    host_policy=gpu_${model_device}
+                    if [[ "$trial" != "libtorch_multi"* ]]; then
+                        (cd $MODELSDIR/${full} && \
+                            sed -i "s/instance_group.*/instance_group [{ kind: KIND_GPU, gpus: [${model_device}] }]/" config.pbtxt)
+                    fi
+                fi
+
+                set +e
+                $IO_TEST_UTIL -i $input_device -o $output_device -r $MODELSDIR -m $full >>$full_log 2>&1
+                if [ $? -ne 0 ]; then
+                    cat $full_log
+                    echo -e "\n***\n*** Test Failed\n***"
+                    RET=1
+                fi
+                set -e
+
+                # Test with host policy
+                set +e
+                $IO_TEST_UTIL -i $input_device -o $output_device -h $host_policy -r $MODELSDIR -m $full >>$full_log 2>&1
+                # FIXME currently only apply the new changes to ORT backend, should apply to others
+                if [[ "$trial" == "onnx" ]]; then
+                  if [ $? -ne 0 ]; then
+                      cat $full_log
+                      echo -e "\n***\n*** Test Failed. Expect passing \n***"
+                      RET=1
+                  fi
+                else
+                  if [ $? -eq 0 ]; then
+                      cat $full_log
+                      echo -e "\n***\n*** Test Failed. Expect failure \n***"
+                      RET=1
+                  fi
+                fi
+                set -e
+
+                # ensemble
+                if [[ "$trial" != "libtorch_multi"* ]]; then
+                    set +e
+                    $IO_TEST_UTIL -i $input_device -o $output_device -r $MODELSDIR -m fan_$full >>$full_log.ensemble 2>&1
+                    if [ $? -ne 0 ]; then
+                        cat $full_log.ensemble
+                        echo -e "\n***\n*** Test Failed\n***"
+                        RET=1
+                    fi
+                    set -e
+                fi
+            done
+        done
+
+        for trial in graphdef savedmodel onnx; do
+            model_devices="-1 0 1"
+            for model_device in $model_devices; do
+                full=${trial}_object_object_object
+                full_log=$CLIENT_LOG.$full.$input_device.$output_device.$model_device
+
+                if [ "$model_device" == "-1" ]; then
+                    (cd $MODELSDIR/${full} && \
+                        sed -i "s/instance_group.*/instance_group [{ kind: KIND_CPU }]/" config.pbtxt)
+                else
+                    (cd $MODELSDIR/${full} && \
+                        sed -i "s/instance_group.*/instance_group [{ kind: KIND_GPU, gpus: [${model_device}] }]/" config.pbtxt)
+                fi
+
+                set +e
+                $IO_TEST_UTIL -i $input_device -o $output_device -r $MODELSDIR -m $full >>$full_log 2>&1
+                if [ $? -ne 0 ]; then
+                    cat $full_log
+                    echo -e "\n***\n*** Test Failed\n***"
+                    RET=1
+                fi
+                set -e
+            done
+        done
+    done
+done
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_iterative_sequence/iterative_sequence_e2e.py b/qa/L0_iterative_sequence/iterative_sequence_e2e.py
new file mode 100755
index 0000000000..3676a2f6b3
--- /dev/null
+++ b/qa/L0_iterative_sequence/iterative_sequence_e2e.py
@@ -0,0 +1,206 @@
+#!/usr/bin/env python
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import json
+
+# GRPC streaming helpers..
+import queue
+import unittest
+from functools import partial
+
+import numpy as np
+import requests
+import sseclient
+import test_util as tu
+import tritonclient.grpc as grpcclient
+from tritonclient.utils import InferenceServerException
+
+MODEL_CONFIG_BASE = """
+{{
+"backend": "iterative_sequence",
+"max_batch_size": 1,
+"input" : [
+  {{
+    "name": "INPUT",
+    "data_type": "TYPE_INT32",
+    "dims": [ 1 ]
+  }}
+],
+"output" : [
+  {{
+    "name": "OUTPUT",
+    "data_type": "TYPE_INT32",
+    "dims": [ 1 ]
+  }}
+],
+"model_transaction_policy" : {{
+  "decoupled": true
+}},
+{},
+"instance_group" : [{{ "kind": "KIND_CPU" }}]
+}}
+"""
+
+
+class UserData:
+    def __init__(self):
+        self._completed_requests = queue.Queue()
+
+
+def callback(user_data, result, error):
+    if error:
+        user_data._completed_requests.put(error)
+    else:
+        user_data._completed_requests.put(result)
+
+
+class IterativeSequenceTest(tu.TestResultCollector):
+    def setUp(self):
+        # Always make sure the original config is used
+        with grpcclient.InferenceServerClient("localhost:8001") as triton_client:
+            triton_client.load_model("iterative_sequence")
+
+    def test_generate_stream(self):
+        headers = {"Accept": "text/event-stream"}
+        url = "http://localhost:8000/v2/models/iterative_sequence/generate_stream"
+        inputs = {"INPUT": 2}
+        res = requests.post(url, data=json.dumps(inputs), headers=headers)
+        res.raise_for_status()
+        client = sseclient.SSEClient(res)
+        res_count = 2
+        for event in client.events():
+            res_count -= 1
+            data = json.loads(event.data)
+            self.assertIn("OUTPUT", data)
+            self.assertEqual(res_count, data["OUTPUT"])
+        self.assertEqual(0, res_count)
+
+    def test_grpc_stream(
+        self, sequence_id=0, sequence_start=False, num_requests=1, validation=True
+    ):
+        user_data = UserData()
+        with grpcclient.InferenceServerClient("localhost:8001") as triton_client:
+            triton_client.start_stream(callback=partial(callback, user_data))
+            inputs = []
+            inputs.append(grpcclient.InferInput("INPUT", [1, 1], "INT32"))
+            inputs[0].set_data_from_numpy(np.array([[2]], dtype=np.int32))
+
+            for _ in range(num_requests):
+                triton_client.async_stream_infer(
+                    model_name="iterative_sequence",
+                    inputs=inputs,
+                    sequence_id=sequence_id,
+                    sequence_start=sequence_start,
+                )
+            res_count = 2 * num_requests
+            while res_count > 0:
+                data_item = user_data._completed_requests.get()
+                res_count -= 1
+                if type(data_item) == InferenceServerException:
+                    raise data_item
+                else:
+                    if validation:
+                        self.assertEqual(
+                            res_count % 2, data_item.as_numpy("OUTPUT")[0][0]
+                        )
+            self.assertEqual(0, res_count)
+
+    def test_backlog_fill(self):
+        config = r'"sequence_batching" : { "iterative_sequence" : true, "max_sequence_idle_microseconds": 8000000, direct: { "max_queue_delay_microseconds" : 10000000 }}'
+        with grpcclient.InferenceServerClient("localhost:8001") as triton_client:
+            triton_client.load_model(
+                "iterative_sequence", config=MODEL_CONFIG_BASE.format(config)
+            )
+        self.test_grpc_stream(num_requests=4, validation=False)
+
+    def test_reschedule_error(self):
+        # Use short idle timeout (< backend reschedule delay: 0.5s) so that
+        # the backend won't be able to reschedule the request as the scheduler
+        # will terminate the sequence early
+        config = r'"sequence_batching" : { "iterative_sequence" : true, "max_sequence_idle_microseconds" : 200000 }'
+        with grpcclient.InferenceServerClient("localhost:8001") as triton_client:
+            triton_client.load_model(
+                "iterative_sequence", config=MODEL_CONFIG_BASE.format(config)
+            )
+        with self.assertRaises(InferenceServerException) as context:
+            # Without specifying 'iterative_sequence : true', the sequence
+            # batcher expects sequence parameters to be provided explicitly
+            self.test_grpc_stream()
+        print(str(context.exception))
+        self.assertTrue(
+            "must specify the START flag on the first request of the sequence"
+            in str(context.exception)
+        )
+
+    def test_unsupported_sequence_scheduler(self):
+        # Override model config with scheduler settings that do not support
+        # request rescheduling.
+        configs = [
+            r'"sequence_batching" : { "direct" : {}, "iterative_sequence" : false }',
+            r'"sequence_batching" : { "oldest" : {}, "iterative_sequence" : false }',
+        ]
+        sid = 1
+        for sc in configs:
+            with grpcclient.InferenceServerClient("localhost:8001") as triton_client:
+                triton_client.load_model(
+                    "iterative_sequence", config=MODEL_CONFIG_BASE.format(sc)
+                )
+            with self.assertRaises(InferenceServerException) as context:
+                # Without specifying 'iterative_sequence : true', the sequence
+                # batcher expects sequence parameters to be provided explicitly
+                self.test_grpc_stream(sequence_id=sid, sequence_start=True)
+            sid += 1
+            self.assertTrue(
+                "Request is released with TRITONSERVER_REQUEST_RELEASE_RESCHEDULE"
+                in str(context.exception)
+            )
+
+    def test_unsupported_dynamic_scheduler(self):
+        # Override model config with scheduler settings that do not support
+        # request rescheduling.
+        configs = [
+            r'"dynamic_batching" : {}',
+        ]
+        for sc in configs:
+            with grpcclient.InferenceServerClient("localhost:8001") as triton_client:
+                triton_client.load_model(
+                    "iterative_sequence", config=MODEL_CONFIG_BASE.format(sc)
+                )
+            with self.assertRaises(InferenceServerException) as context:
+                self.test_grpc_stream()
+            self.assertTrue(
+                "Request is released with TRITONSERVER_REQUEST_RELEASE_RESCHEDULE"
+                in str(context.exception)
+            )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_iterative_sequence/models/iterative_sequence/config.pbtxt b/qa/L0_iterative_sequence/models/iterative_sequence/config.pbtxt
new file mode 100644
index 0000000000..fbf8685291
--- /dev/null
+++ b/qa/L0_iterative_sequence/models/iterative_sequence/config.pbtxt
@@ -0,0 +1,48 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+backend: "iterative_sequence"
+max_batch_size: 1
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+model_transaction_policy {
+  decoupled: True
+}
+sequence_batching {
+  iterative_sequence : true
+}
+instance_group [{ kind: KIND_CPU }]
diff --git a/qa/L0_iterative_sequence/test.sh b/qa/L0_iterative_sequence/test.sh
new file mode 100755
index 0000000000..faf1cff084
--- /dev/null
+++ b/qa/L0_iterative_sequence/test.sh
@@ -0,0 +1,92 @@
+#!/bin/bash
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+source ../common/util.sh
+
+RET=0
+
+CLIENT_LOG="./iterative_sequence_client.log"
+TEST_PY=./iterative_sequence_e2e.py
+EXPECTED_NUM_TESTS="6"
+TEST_RESULT_FILE='test_results.txt'
+
+
+export CUDA_VISIBLE_DEVICES=0
+
+rm -fr *.log
+
+pip install sseclient-py
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=EXPLICIT"
+SERVER_LOG="./inference_server.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python $TEST_PY >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $CLIENT_LOG
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_java_memory_growth/MemoryGrowthTest.java b/qa/L0_java_memory_growth/MemoryGrowthTest.java
new file mode 100644
index 0000000000..28243459ec
--- /dev/null
+++ b/qa/L0_java_memory_growth/MemoryGrowthTest.java
@@ -0,0 +1,943 @@
+// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+import com.google.gson.*;
+import java.io.*;
+import java.util.*;
+import java.util.concurrent.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.tritonserver.tritonserver.*;
+
+public class MemoryGrowthTest {
+  static final double TRITON_MIN_COMPUTE_CAPABILITY = 6.0;
+  private static boolean done = false;
+  static float max_growth_allowed = .10f;
+  static int max_mem_allowed = 30;
+
+  static void FAIL(String MSG)
+  {
+    System.err.println("failure: " + MSG);
+    System.exit(1);
+  }
+
+  static void FAIL_IF_ERR(TRITONSERVER_Error err__, String MSG)
+  {
+    if (err__ != null) {
+      System.err.println(
+          "error: " + MSG + ":" + TRITONSERVER_ErrorCodeString(err__) + " - "
+          + TRITONSERVER_ErrorMessage(err__));
+      TRITONSERVER_ErrorDelete(err__);
+      System.exit(1);
+    }
+  }
+
+  static boolean enforce_memory_type = false;
+  static int requested_memory_type;
+  // Parameters for percentile range to include (exclude outliers)
+  static final int max_percentile = 90;
+  static final int min_percentile = 10;
+
+  static class TRITONSERVER_ServerDeleter extends TRITONSERVER_Server {
+    public TRITONSERVER_ServerDeleter(TRITONSERVER_Server p)
+    {
+      super(p);
+      deallocator(new DeleteDeallocator(this));
+    }
+    protected static class DeleteDeallocator
+        extends TRITONSERVER_Server implements Deallocator {
+      DeleteDeallocator(Pointer p) { super(p); }
+      @Override public void deallocate() { TRITONSERVER_ServerDelete(this); }
+    }
+  }
+
+  static void Usage(String msg)
+  {
+    if (msg != null) {
+      System.err.println(msg);
+    }
+
+    System.err.println(
+        "Usage: java " + MemoryGrowthTest.class.getSimpleName() + " [options]");
+    System.err.println("\t-i Set number of iterations");
+    System.err.println(
+        "\t-m <\"system\"|\"pinned\"|gpu>"
+        + " Enforce the memory type for input and output tensors."
+        + " If not specified, inputs will be in system memory and outputs"
+        + " will be based on the model's preferred type.");
+    System.err.println("\t-v Enable verbose logging");
+    System.err.println("\t-r [model repository absolute path]");
+    System.err.println(
+        "\t--max-growth Specify maximum allowed memory growth (%)");
+    System.err.println("\t--max-memory Specify maximum allowed memory (MB)");
+
+    System.exit(1);
+  }
+
+  static class ResponseAlloc extends TRITONSERVER_ResponseAllocatorAllocFn_t {
+    @Override
+    public TRITONSERVER_Error call(
+        TRITONSERVER_ResponseAllocator allocator, String tensor_name,
+        long byte_size, int preferred_memory_type,
+        long preferred_memory_type_id, Pointer userp, PointerPointer buffer,
+        PointerPointer buffer_userp, IntPointer actual_memory_type,
+        LongPointer actual_memory_type_id)
+    {
+      // Initially attempt to make the actual memory type and id that we
+      // allocate be the same as preferred memory type
+      actual_memory_type.put(0, preferred_memory_type);
+      actual_memory_type_id.put(0, preferred_memory_type_id);
+
+      // If 'byte_size' is zero just return 'buffer' == nullptr, we don't
+      // need to do any other book-keeping.
+      if (byte_size == 0) {
+        buffer.put(0, null);
+        buffer_userp.put(0, null);
+      } else {
+        Pointer allocated_ptr = new Pointer();
+        if (enforce_memory_type) {
+          actual_memory_type.put(0, requested_memory_type);
+        }
+
+        actual_memory_type.put(0, TRITONSERVER_MEMORY_CPU);
+        allocated_ptr = Pointer.malloc(byte_size);
+
+        // Pass the tensor name with buffer_userp so we can show it when
+        // releasing the buffer.
+        if (!allocated_ptr.isNull()) {
+          buffer.put(0, allocated_ptr);
+          buffer_userp.put(0, Loader.newGlobalRef(tensor_name));
+        }
+      }
+
+      return null; // Success
+    }
+  }
+
+  static class ResponseRelease
+      extends TRITONSERVER_ResponseAllocatorReleaseFn_t {
+    @Override
+    public TRITONSERVER_Error call(
+        TRITONSERVER_ResponseAllocator allocator, Pointer buffer,
+        Pointer buffer_userp, long byte_size, int memory_type,
+        long memory_type_id)
+    {
+      String name = null;
+      if (buffer_userp != null) {
+        name = (String) Loader.accessGlobalRef(buffer_userp);
+      } else {
+        name = "<unknown>";
+      }
+      Pointer.free(buffer);
+      Loader.deleteGlobalRef(buffer_userp);
+
+      return null; // Success
+    }
+  }
+
+  static class InferRequestComplete
+      extends TRITONSERVER_InferenceRequestReleaseFn_t {
+    @Override
+    public void call(
+        TRITONSERVER_InferenceRequest request, int flags, Pointer userp)
+    {
+      // We reuse the request so we don't delete it here.
+    }
+  }
+
+  static class InferResponseComplete
+      extends TRITONSERVER_InferenceResponseCompleteFn_t {
+    @Override
+    public void call(
+        TRITONSERVER_InferenceResponse response, int flags, Pointer userp)
+    {
+      if (response != null) {
+        // Send 'response' to the future.
+        futures.get(userp).complete(response);
+      }
+    }
+  }
+
+  static ConcurrentHashMap<
+      Pointer, CompletableFuture<TRITONSERVER_InferenceResponse>> futures =
+      new ConcurrentHashMap<>();
+  static ResponseAlloc responseAlloc = new ResponseAlloc();
+  static ResponseRelease responseRelease = new ResponseRelease();
+  static InferRequestComplete inferRequestComplete = new InferRequestComplete();
+  static InferResponseComplete inferResponseComplete =
+      new InferResponseComplete();
+
+  static TRITONSERVER_Error ParseModelMetadata(
+      JsonObject model_metadata, boolean[] is_int, boolean[] is_torch_model)
+  {
+    String seen_data_type = null;
+    for (JsonElement input_element :
+         model_metadata.get("inputs").getAsJsonArray()) {
+      JsonObject input = input_element.getAsJsonObject();
+      if (!input.get("datatype").getAsString().equals("INT32")
+          && !input.get("datatype").getAsString().equals("FP32")) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_UNSUPPORTED,
+            "simple lib example only supports model with data type INT32 or "
+                + "FP32");
+      }
+      if (seen_data_type == null) {
+        seen_data_type = input.get("datatype").getAsString();
+      } else if (!seen_data_type.equals(input.get("datatype").getAsString())) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG,
+            "the inputs and outputs of 'simple' model must have the data type");
+      }
+    }
+    for (JsonElement output_element :
+         model_metadata.get("outputs").getAsJsonArray()) {
+      JsonObject output = output_element.getAsJsonObject();
+      if (!output.get("datatype").getAsString().equals("INT32")
+          && !output.get("datatype").getAsString().equals("FP32")) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_UNSUPPORTED,
+            "simple lib example only supports model with data type INT32 or "
+                + "FP32");
+      } else if (!seen_data_type.equals(output.get("datatype").getAsString())) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG,
+            "the inputs and outputs of 'simple' model must have the data type");
+      }
+    }
+
+    is_int[0] = seen_data_type.equals("INT32");
+    is_torch_model[0] =
+        model_metadata.get("platform").getAsString().equals("pytorch_libtorch");
+    return null;
+  }
+
+  static void GenerateInputData(
+      IntPointer[] input0_data, IntPointer[] input1_data)
+  {
+    input0_data[0] = new IntPointer(16);
+    input1_data[0] = new IntPointer(16);
+    for (int i = 0; i < 16; ++i) {
+      input0_data[0].put(i, i);
+      input1_data[0].put(i, 1);
+    }
+  }
+
+  static void GenerateInputData(
+      FloatPointer[] input0_data, FloatPointer[] input1_data)
+  {
+    input0_data[0] = new FloatPointer(16);
+    input1_data[0] = new FloatPointer(16);
+    for (int i = 0; i < 16; ++i) {
+      input0_data[0].put(i, i);
+      input1_data[0].put(i, 1);
+    }
+  }
+
+  static void CompareResult(
+      String output0_name, String output1_name, IntPointer input0,
+      IntPointer input1, IntPointer output0, IntPointer output1)
+  {
+    for (int i = 0; i < 16; ++i) {
+      if ((input0.get(i) + input1.get(i)) != output0.get(i)) {
+        FAIL("incorrect sum in " + output0_name);
+      }
+      if ((input0.get(i) - input1.get(i)) != output1.get(i)) {
+        FAIL("incorrect difference in " + output1_name);
+      }
+    }
+  }
+
+  static void CompareResult(
+      String output0_name, String output1_name, FloatPointer input0,
+      FloatPointer input1, FloatPointer output0, FloatPointer output1)
+  {
+    for (int i = 0; i < 16; ++i) {
+      if ((input0.get(i) + input1.get(i)) != output0.get(i)) {
+        FAIL("incorrect sum in " + output0_name);
+      }
+      if ((input0.get(i) - input1.get(i)) != output1.get(i)) {
+        FAIL("incorrect difference in " + output1_name);
+      }
+    }
+  }
+
+  static void Check(
+      TRITONSERVER_InferenceResponse response, Pointer input0_data,
+      Pointer input1_data, String output0, String output1,
+      long expected_byte_size, int expected_datatype, boolean is_int)
+  {
+    HashMap<String, Pointer> output_data = new HashMap<>();
+
+    int[] output_count = {0};
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceResponseOutputCount(response, output_count),
+        "getting number of response outputs");
+    if (output_count[0] != 2) {
+      FAIL("expecting 2 response outputs, got " + output_count[0]);
+    }
+
+    for (int idx = 0; idx < output_count[0]; ++idx) {
+      BytePointer cname = new BytePointer((Pointer) null);
+      IntPointer datatype = new IntPointer(1);
+      LongPointer shape = new LongPointer((Pointer) null);
+      LongPointer dim_count = new LongPointer(1);
+      Pointer base = new Pointer();
+      SizeTPointer byte_size = new SizeTPointer(1);
+      IntPointer memory_type = new IntPointer(1);
+      LongPointer memory_type_id = new LongPointer(1);
+      Pointer userp = new Pointer();
+
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceResponseOutput(
+              response, idx, cname, datatype, shape, dim_count, base, byte_size,
+              memory_type, memory_type_id, userp),
+          "getting output info");
+
+      if (cname.isNull()) {
+        FAIL("unable to get output name");
+      }
+
+      String name = cname.getString();
+      if ((!name.equals(output0)) && (!name.equals(output1))) {
+        FAIL("unexpected output '" + name + "'");
+      }
+
+      if ((dim_count.get() != 2) || (shape.get(0) != 1)
+          || (shape.get(1) != 16)) {
+        FAIL("unexpected shape for '" + name + "'");
+      }
+
+      if (datatype.get() != expected_datatype) {
+        FAIL(
+            "unexpected datatype '"
+            + TRITONSERVER_DataTypeString(datatype.get()) + "' for '" + name
+            + "'");
+      }
+
+      if (byte_size.get() != expected_byte_size) {
+        FAIL(
+            "unexpected byte-size, expected " + expected_byte_size + ", got "
+            + byte_size.get() + " for " + name);
+      }
+
+      if (enforce_memory_type && (memory_type.get() != requested_memory_type)) {
+        FAIL(
+            "unexpected memory type, expected to be allocated in "
+            + TRITONSERVER_MemoryTypeString(requested_memory_type) + ", got "
+            + TRITONSERVER_MemoryTypeString(memory_type.get()) + ", id "
+            + memory_type_id.get() + " for " + name);
+      }
+
+      // We make a copy of the data here... which we could avoid for
+      // performance reasons but ok for this simple example.
+      BytePointer odata = new BytePointer(byte_size.get());
+      output_data.put(name, odata);
+      odata.put(base.limit(byte_size.get()));
+    }
+
+    if (is_int) {
+      CompareResult(
+          output0, output1, new IntPointer(input0_data),
+          new IntPointer(input1_data), new IntPointer(output_data.get(output0)),
+          new IntPointer(output_data.get(output1)));
+    } else {
+      CompareResult(
+          output0, output1, new FloatPointer(input0_data),
+          new FloatPointer(input1_data),
+          new FloatPointer(output_data.get(output0)),
+          new FloatPointer(output_data.get(output1)));
+    }
+  }
+
+  /**
+  Returns whether the memory growth is within the acceptable range
+  @param  max_float_allowed     Maximum allowed memory growth (%)
+  @param  max_mem_allowed       Maximum allowed memory (MB)
+   */
+  static boolean ValidateMemoryGrowth(
+      float max_growth_allowed, int max_mem_allowed)
+  {
+    // Allocate list starting capacity to hold up to 24 hours worth of
+    // snapshots.
+    List<Double> memory_snapshots = new ArrayList<Double>(20000);
+    while (!done) {
+      try {
+        Thread.sleep(5000);
+      }
+      catch (InterruptedException e) {
+        System.out.println("Memory growth validation interrupted.");
+      }
+      System.gc();
+      double snapshot = Runtime.getRuntime().totalMemory()
+          - Runtime.getRuntime().freeMemory();
+      memory_snapshots.add(snapshot);
+      System.out.println("Memory allocated (MB):" + snapshot / 1E6);
+    }
+    if (memory_snapshots.size() < 5) {
+      System.out.println(
+          "Error: Not enough snapshots, found " + memory_snapshots.size()
+          + " snapshots");
+      return false;
+    }
+
+    // Measure memory growth without outliers by taking difference
+    // between 90th percentile and 10th percentile memory usage.
+    final double bytes_in_mb = 1E6;
+    Collections.sort(memory_snapshots);
+    int index_max =
+        ((int) Math.ceil(max_percentile / 100.0 * memory_snapshots.size())) - 1;
+    int index_min =
+        ((int) Math.ceil(min_percentile / 100.0 * memory_snapshots.size())) - 1;
+    double memory_allocation_delta =
+        memory_snapshots.get(index_max) - memory_snapshots.get(index_min);
+    double memory_allocation_delta_mb = memory_allocation_delta / bytes_in_mb;
+    double memory_allocation_delta_percent =
+        memory_allocation_delta / memory_snapshots.get(index_max);
+
+    System.out.println(
+        "Change in memory allocation (MB): " + memory_allocation_delta_mb + ", "
+        + (memory_allocation_delta_percent * 100) + "%");
+
+    boolean passed = true;
+
+    if (memory_allocation_delta_percent >= max_growth_allowed) {
+      passed = false;
+      System.out.println(
+          "Exceeded allowed memory growth (" + (max_growth_allowed * 100)
+          + "%)");
+    }
+
+    if ((memory_snapshots.get(index_max) / bytes_in_mb) >= max_mem_allowed) {
+      passed = false;
+      System.out.println(
+          "Exceeded allowed memory (" + max_mem_allowed + "MB), got "
+          + (memory_snapshots.get(index_max) / bytes_in_mb) + "MB");
+    }
+    return passed;
+  }
+
+  static void RunInference(
+      TRITONSERVER_ServerDeleter server, String model_name, boolean[] is_int,
+      boolean[] is_torch_model, boolean check_accuracy) throws Exception
+  {
+    // Create the allocator that will be used to allocate buffers for
+    // the result tensors.
+    TRITONSERVER_ResponseAllocator allocator =
+        new TRITONSERVER_ResponseAllocator(null);
+    FAIL_IF_ERR(
+        TRITONSERVER_ResponseAllocatorNew(
+            allocator, responseAlloc, responseRelease, null /* start_fn */),
+        "creating response allocator");
+
+    // Inference
+    TRITONSERVER_InferenceRequest irequest =
+        new TRITONSERVER_InferenceRequest(null);
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestNew(
+            irequest, server, model_name, -1 /* model_version */),
+        "creating inference request");
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestSetId(irequest, "my_request_id"),
+        "setting ID for the request");
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestSetReleaseCallback(
+            irequest, inferRequestComplete, null /* request_release_userp */),
+        "setting request release callback");
+
+    // Inputs
+    String input0 = is_torch_model[0] ? "INPUT__0" : "INPUT0";
+    String input1 = is_torch_model[0] ? "INPUT__1" : "INPUT1";
+
+    long[] input0_shape = {1, 16};
+    long[] input1_shape = {1, 16};
+
+    int datatype =
+        (is_int[0]) ? TRITONSERVER_TYPE_INT32 : TRITONSERVER_TYPE_FP32;
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestAddInput(
+            irequest, input0, datatype, input0_shape, input0_shape.length),
+        "setting input 0 meta-data for the request");
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestAddInput(
+            irequest, input1, datatype, input1_shape, input1_shape.length),
+        "setting input 1 meta-data for the request");
+
+    String output0 = is_torch_model[0] ? "OUTPUT__0" : "OUTPUT0";
+    String output1 = is_torch_model[0] ? "OUTPUT__1" : "OUTPUT1";
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output0),
+        "requesting output 0 for the request");
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output1),
+        "requesting output 1 for the request");
+
+    // Create the data for the two input tensors. Initialize the first
+    // to unique values and the second to all ones.
+    BytePointer input0_data;
+    BytePointer input1_data;
+    if (is_int[0]) {
+      IntPointer[] p0 = {null}, p1 = {null};
+      GenerateInputData(p0, p1);
+      input0_data = p0[0].getPointer(BytePointer.class);
+      input1_data = p1[0].getPointer(BytePointer.class);
+    } else {
+      FloatPointer[] p0 = {null}, p1 = {null};
+      GenerateInputData(p0, p1);
+      input0_data = p0[0].getPointer(BytePointer.class);
+      input1_data = p1[0].getPointer(BytePointer.class);
+    }
+
+    long input0_size = input0_data.limit();
+    long input1_size = input1_data.limit();
+
+    Pointer input0_base = input0_data;
+    Pointer input1_base = input1_data;
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestAppendInputData(
+            irequest, input0, input0_base, input0_size, requested_memory_type,
+            0 /* memory_type_id */),
+        "assigning INPUT0 data");
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestAppendInputData(
+            irequest, input1, input1_base, input1_size, requested_memory_type,
+            0 /* memory_type_id */),
+        "assigning INPUT1 data");
+
+    // Perform inference...
+    {
+      CompletableFuture<TRITONSERVER_InferenceResponse> completed =
+          new CompletableFuture<>();
+      futures.put(irequest, completed);
+
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceRequestSetResponseCallback(
+              irequest, allocator, null /* response_allocator_userp */,
+              inferResponseComplete, irequest),
+          "setting response callback");
+
+      FAIL_IF_ERR(
+          TRITONSERVER_ServerInferAsync(server, irequest, null /* trace */),
+          "running inference");
+
+      // Wait for the inference to complete.
+      TRITONSERVER_InferenceResponse completed_response = completed.get();
+      futures.remove(irequest);
+
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceResponseError(completed_response),
+          "response status");
+      if (check_accuracy) {
+        Check(
+            completed_response, input0_data, input1_data, output0, output1,
+            input0_size, datatype, is_int[0]);
+      }
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceResponseDelete(completed_response),
+          "deleting inference response");
+    }
+
+    // Modify some input data in place and then reuse the request
+    // object. For simplicity we only do this when the input tensors are
+    // in non-pinned system memory.
+    if (!enforce_memory_type
+        || (requested_memory_type == TRITONSERVER_MEMORY_CPU)) {
+      if (is_int[0]) {
+        new IntPointer(input0_data).put(0, 27);
+      } else {
+        new FloatPointer(input0_data).put(0, 27.0f);
+      }
+
+      CompletableFuture<TRITONSERVER_InferenceResponse> completed =
+          new CompletableFuture<>();
+      futures.put(irequest, completed);
+
+      // Using a new promise so have to re-register the callback to set
+      // the promise as the userp.
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceRequestSetResponseCallback(
+              irequest, allocator, null /* response_allocator_userp */,
+              inferResponseComplete, irequest),
+          "setting response callback");
+
+      FAIL_IF_ERR(
+          TRITONSERVER_ServerInferAsync(server, irequest, null /* trace */),
+          "running inference");
+
+      // Wait for the inference to complete.
+      TRITONSERVER_InferenceResponse completed_response = completed.get();
+      futures.remove(irequest);
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceResponseError(completed_response),
+          "response status");
+      if (check_accuracy) {
+        Check(
+            completed_response, input0_data, input1_data, output0, output1,
+            input0_size, datatype, is_int[0]);
+      }
+
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceResponseDelete(completed_response),
+          "deleting inference response");
+    }
+
+    // Remove input data and then add back different data.
+    {
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceRequestRemoveAllInputData(irequest, input0),
+          "removing INPUT0 data");
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceRequestAppendInputData(
+              irequest, input0, input1_base, input1_size, requested_memory_type,
+              0 /* memory_type_id */),
+          "assigning INPUT1 data to INPUT0");
+
+      CompletableFuture<TRITONSERVER_InferenceResponse> completed =
+          new CompletableFuture<>();
+      futures.put(irequest, completed);
+
+      // Using a new promise so have to re-register the callback to set
+      // the promise as the userp.
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceRequestSetResponseCallback(
+              irequest, allocator, null /* response_allocator_userp */,
+              inferResponseComplete, irequest),
+          "setting response callback");
+
+      FAIL_IF_ERR(
+          TRITONSERVER_ServerInferAsync(server, irequest, null /* trace */),
+          "running inference");
+
+      // Wait for the inference to complete.
+      TRITONSERVER_InferenceResponse completed_response = completed.get();
+      futures.remove(irequest);
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceResponseError(completed_response),
+          "response status");
+
+      if (check_accuracy) {
+        // Both inputs are using input1_data...
+        Check(
+            completed_response, input1_data, input1_data, output0, output1,
+            input0_size, datatype, is_int[0]);
+      }
+
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceResponseDelete(completed_response),
+          "deleting inference response");
+    }
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestDelete(irequest),
+        "deleting inference request");
+
+    FAIL_IF_ERR(
+        TRITONSERVER_ResponseAllocatorDelete(allocator),
+        "deleting response allocator");
+  }
+
+  public static void main(String[] args) throws Exception
+  {
+    int num_iterations = 1000000;
+    String model_repository_path = null;
+    int verbose_level = 0;
+    boolean check_accuracy = false;
+
+    // Parse commandline...
+    for (int i = 0; i < args.length; i++) {
+      switch (args[i]) {
+        case "-i":
+          i++;
+          try {
+            num_iterations = Integer.parseInt(args[i]);
+          }
+          catch (NumberFormatException e) {
+            Usage("-i must be used to specify number of iterations");
+          }
+          break;
+        case "-m":
+          enforce_memory_type = true;
+          i++;
+          if (args[i].equals("system")) {
+            requested_memory_type = TRITONSERVER_MEMORY_CPU;
+          } else if (args[i].equals("pinned")) {
+            requested_memory_type = TRITONSERVER_MEMORY_CPU_PINNED;
+          } else if (args[i].equals("gpu")) {
+            requested_memory_type = TRITONSERVER_MEMORY_GPU;
+          } else {
+            Usage(
+                "-m must be used to specify one of the following types:"
+                + " <\"system\"|\"pinned\"|gpu>");
+          }
+          break;
+        case "-r":
+          model_repository_path = args[++i];
+          break;
+        case "-v":
+          verbose_level = 1;
+          break;
+        case "-c":
+          check_accuracy = true;
+          break;
+        case "-?":
+          Usage(null);
+          break;
+        case "--max-growth":
+          i++;
+          try {
+            max_growth_allowed = Integer.parseInt(args[i]) / 100.0f;
+          }
+          catch (NumberFormatException e) {
+            Usage(
+                "--max-growth must be an integer value specifying allowed memory growth (%)");
+          }
+          break;
+        case "--max-memory":
+          i++;
+          try {
+            max_mem_allowed = Integer.parseInt(args[i]);
+          }
+          catch (NumberFormatException e) {
+            Usage(
+                "--max-memory must be an integer value specifying maximum allowed memory (MB)");
+          }
+          break;
+      }
+    }
+
+    if (model_repository_path == null) {
+      Usage("-r must be used to specify model repository path");
+    }
+    if (enforce_memory_type
+        && requested_memory_type != TRITONSERVER_MEMORY_CPU) {
+      Usage("-m can only be set to \"system\" without enabling GPU");
+    }
+
+    // Check API version.
+    int[] api_version_major = {0}, api_version_minor = {0};
+    FAIL_IF_ERR(
+        TRITONSERVER_ApiVersion(api_version_major, api_version_minor),
+        "getting Triton API version");
+    if ((TRITONSERVER_API_VERSION_MAJOR != api_version_major[0])
+        || (TRITONSERVER_API_VERSION_MINOR > api_version_minor[0])) {
+      FAIL("triton server API version mismatch");
+    }
+
+    // Create the server...
+    TRITONSERVER_ServerOptions server_options =
+        new TRITONSERVER_ServerOptions(null);
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerOptionsNew(server_options),
+        "creating server options");
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerOptionsSetModelRepositoryPath(
+            server_options, model_repository_path),
+        "setting model repository path");
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerOptionsSetLogVerbose(server_options, verbose_level),
+        "setting verbose logging level");
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerOptionsSetBackendDirectory(
+            server_options, "/opt/tritonserver/backends"),
+        "setting backend directory");
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerOptionsSetRepoAgentDirectory(
+            server_options, "/opt/tritonserver/repoagents"),
+        "setting repository agent directory");
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerOptionsSetStrictModelConfig(server_options, true),
+        "setting strict model configuration");
+    double min_compute_capability = TRITON_MIN_COMPUTE_CAPABILITY;
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability(
+            server_options, min_compute_capability),
+        "setting minimum supported CUDA compute capability");
+
+    TRITONSERVER_Server server_ptr = new TRITONSERVER_Server(null);
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerNew(server_ptr, server_options), "creating server");
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerOptionsDelete(server_options),
+        "deleting server options");
+
+    TRITONSERVER_ServerDeleter server =
+        new TRITONSERVER_ServerDeleter(server_ptr);
+
+    // Wait until the server is both live and ready.
+    int health_iters = 0;
+    while (true) {
+      boolean[] live = {false}, ready = {false};
+      FAIL_IF_ERR(
+          TRITONSERVER_ServerIsLive(server, live),
+          "unable to get server liveness");
+      FAIL_IF_ERR(
+          TRITONSERVER_ServerIsReady(server, ready),
+          "unable to get server readiness");
+      System.out.println(
+          "Server Health: live " + live[0] + ", ready " + ready[0]);
+      if (live[0] && ready[0]) {
+        break;
+      }
+
+      if (++health_iters >= 10) {
+        FAIL("failed to find healthy inference server");
+      }
+
+      Thread.sleep(500);
+    }
+
+    // Print status of the server.
+    {
+      TRITONSERVER_Message server_metadata_message =
+          new TRITONSERVER_Message(null);
+      FAIL_IF_ERR(
+          TRITONSERVER_ServerMetadata(server, server_metadata_message),
+          "unable to get server metadata message");
+      BytePointer buffer = new BytePointer((Pointer) null);
+      SizeTPointer byte_size = new SizeTPointer(1);
+      FAIL_IF_ERR(
+          TRITONSERVER_MessageSerializeToJson(
+              server_metadata_message, buffer, byte_size),
+          "unable to serialize server metadata message");
+
+      System.out.println("Server Status:");
+      System.out.println(buffer.limit(byte_size.get()).getString());
+
+      FAIL_IF_ERR(
+          TRITONSERVER_MessageDelete(server_metadata_message),
+          "deleting status metadata");
+    }
+
+    String model_name = "simple";
+
+    // Wait for the model to become available.
+    boolean[] is_torch_model = {false};
+    boolean[] is_int = {true};
+    boolean[] is_ready = {false};
+    health_iters = 0;
+    while (!is_ready[0]) {
+      FAIL_IF_ERR(
+          TRITONSERVER_ServerModelIsReady(server, model_name, 1, is_ready),
+          "unable to get model readiness");
+      if (!is_ready[0]) {
+        if (++health_iters >= 10) {
+          FAIL("model failed to be ready in 10 iterations");
+        }
+        Thread.sleep(500);
+        continue;
+      }
+
+      TRITONSERVER_Message model_metadata_message =
+          new TRITONSERVER_Message(null);
+      FAIL_IF_ERR(
+          TRITONSERVER_ServerModelMetadata(
+              server, model_name, 1, model_metadata_message),
+          "unable to get model metadata message");
+      BytePointer buffer = new BytePointer((Pointer) null);
+      SizeTPointer byte_size = new SizeTPointer(1);
+      FAIL_IF_ERR(
+          TRITONSERVER_MessageSerializeToJson(
+              model_metadata_message, buffer, byte_size),
+          "unable to serialize model status protobuf");
+
+      JsonParser parser = new JsonParser();
+      JsonObject model_metadata = null;
+      try {
+        model_metadata = parser.parse(buffer.limit(byte_size.get()).getString())
+                             .getAsJsonObject();
+      }
+      catch (Exception e) {
+        FAIL("error: failed to parse model metadata from JSON: " + e);
+      }
+
+      FAIL_IF_ERR(
+          TRITONSERVER_MessageDelete(model_metadata_message),
+          "deleting status protobuf");
+
+      if (!model_metadata.get("name").getAsString().equals(model_name)) {
+        FAIL("unable to find metadata for model");
+      }
+
+      boolean found_version = false;
+      if (model_metadata.has("versions")) {
+        for (JsonElement version :
+             model_metadata.get("versions").getAsJsonArray()) {
+          if (version.getAsString().equals("1")) {
+            found_version = true;
+            break;
+          }
+        }
+      }
+      if (!found_version) {
+        FAIL("unable to find version 1 status for model");
+      }
+
+      FAIL_IF_ERR(
+          ParseModelMetadata(model_metadata, is_int, is_torch_model),
+          "parsing model metadata");
+    }
+
+    Runnable runnable = () ->
+    {
+      boolean passed =
+          ValidateMemoryGrowth(max_growth_allowed, max_mem_allowed);
+
+      // Sleep to give the garbage collector time to free the server.
+      // This avoids race conditions between Triton bindings' printing and
+      // Java's native printing below.
+      try {
+        Thread.sleep(5000);
+      }
+      catch (InterruptedException e) {
+        System.out.println("Sleep interrupted: " + e.toString());
+      }
+
+      if (passed) {
+        System.out.println("Memory growth test passed");
+      } else {
+        System.out.println("Memory growth test FAILED");
+      }
+    };
+    Thread memory_thread = new Thread(runnable);
+    memory_thread.start();
+
+    for (int i = 0; i < num_iterations; i++) {
+      try (PointerScope scope = new PointerScope()) {
+        RunInference(
+            server, model_name, is_int, is_torch_model, check_accuracy);
+      }
+    }
+    done = true;
+    memory_thread.join();
+
+    System.exit(0);
+  }
+}
diff --git a/qa/L0_java_memory_growth/test.sh b/qa/L0_java_memory_growth/test.sh
new file mode 100755
index 0000000000..d5ec33a5d5
--- /dev/null
+++ b/qa/L0_java_memory_growth/test.sh
@@ -0,0 +1,105 @@
+#!/bin/bash
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# Set up test files based on installation instructions
+# https://github.com/bytedeco/javacpp-presets/blob/master/tritonserver/README.md
+JAVACPP_BRANCH=${JAVACPP_BRANCH:="https://github.com/bytedeco/javacpp-presets.git"}
+JAVACPP_BRANCH_TAG=${JAVACPP_BRANCH_TAG:="master"}
+set -e
+git clone --single-branch --depth=1 -b ${TRITON_CLIENT_REPO_TAG} https://github.com/triton-inference-server/client.git
+source client/src/java-api-bindings/scripts/install_dependencies_and_build.sh -b $PWD --javacpp-branch ${JAVACPP_BRANCH} --javacpp-tag ${JAVACPP_BRANCH_TAG} --keep-build-dependencies
+cd ..
+
+export MAVEN_OPTS="-XX:MaxGCPauseMillis=40"
+MODEL_REPO=`pwd`/models
+SAMPLES_REPO=`pwd`/javacpp-presets/tritonserver/samples/simple
+BASE_COMMAND="mvn clean compile -f $SAMPLES_REPO exec:java -Djavacpp.platform=linux-x86_64"
+source ../common/util.sh
+
+# Create local model repository
+rm -rf ${MODEL_REPO}
+mkdir ${MODEL_REPO}
+cp -r `pwd`/../L0_simple_ensemble/models/simple ${MODEL_REPO}/.
+
+cp MemoryGrowthTest.java $SAMPLES_REPO
+sed -i 's/Simple/MemoryGrowthTest/g' $SAMPLES_REPO/pom.xml
+
+rm -f *.log
+RET=0
+
+
+# Sanity test: check accuracy
+ITERS=200000
+
+LOG_IDX=0
+CLIENT_LOG="./client_$LOG_IDX.log"
+
+echo -e "\nRunning Sanity Test (accuracy checking)\n"
+$BASE_COMMAND -Dexec.args="-r $MODEL_REPO -i $ITERS" >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed to run sanity test to complete\n***"
+    RET=1
+fi
+
+if [ `grep -c "Memory growth test passed" $CLIENT_LOG` != "1" ]; then
+    echo -e "\n***\n*** Failed. Expected 1 'Memory growth test passed' in $CLIENT_LOG\n***"
+    cat $CLIENT_LOG
+    RET=1
+fi
+
+LOG_IDX=$((LOG_IDX+1))
+CLIENT_LOG="./client_$LOG_IDX.log"
+
+# Longer-running memory growth test
+ITERS=1000000
+MAX_MEM_GROWTH_MB=10
+if [ "$TRITON_PERF_LONG" == 1 ]; then
+    # ~1 day
+    ITERS=150000000
+    MAX_MEM_GROWTH_MB=25
+fi
+
+echo -e "\nRunning Memory Growth Test, $ITERS Iterations\n"
+$BASE_COMMAND -Dexec.args="-r $MODEL_REPO -c -i $ITERS --max-growth $MAX_MEM_GROWTH_MB" >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed to run memory growth test to complete\n***"
+    RET=1
+fi
+
+if [ `grep -c "Memory growth test passed" $CLIENT_LOG` != "1" ]; then
+    echo -e "\n***\n*** Failed. Expected 1 'Memory growth test passed' in $CLIENT_LOG\n***"
+    cat $CLIENT_LOG
+    RET=1
+fi
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_java_resnet/ResnetTest.java b/qa/L0_java_resnet/ResnetTest.java
new file mode 100644
index 0000000000..4827273926
--- /dev/null
+++ b/qa/L0_java_resnet/ResnetTest.java
@@ -0,0 +1,639 @@
+// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+import com.google.gson.*;
+import java.io.*;
+import java.util.*;
+import java.util.concurrent.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.tritonserver.tritonserver.*;
+
+public class ResnetTest {
+  // Maximum allowed difference from expected model outputs
+  private static final float ALLOWED_DELTA = .001f;
+  private static final String[] MODELS = {
+      "resnet50_fp32_libtorch", "resnet50_fp32_onnx",
+      // TODO: fix build to support GPU only resnet50v1.5_fp16_savedmodel
+      //"resnet50v1.5_fp16_savedmodel",
+  };
+  private static final double TRITON_MIN_COMPUTE_CAPABILITY = 6.0;
+  private enum Backend {
+    NONE,
+    ONNX,
+    TF,
+    TORCH,
+  }
+
+  static void FAIL(String MSG)
+  {
+    System.err.println("failure: " + MSG);
+    System.exit(1);
+  }
+
+  static void FAIL_IF_ERR(TRITONSERVER_Error err__, String MSG)
+  {
+    if (err__ != null) {
+      System.err.println(
+          "error: " + MSG + ":" + TRITONSERVER_ErrorCodeString(err__) + " - "
+          + TRITONSERVER_ErrorMessage(err__));
+      TRITONSERVER_ErrorDelete(err__);
+      System.exit(1);
+    }
+  }
+
+  static boolean enforce_memory_type = false;
+  static int requested_memory_type;
+
+  static class TRITONSERVER_ServerDeleter extends TRITONSERVER_Server {
+    public TRITONSERVER_ServerDeleter(TRITONSERVER_Server p)
+    {
+      super(p);
+      deallocator(new DeleteDeallocator(this));
+    }
+    protected static class DeleteDeallocator
+        extends TRITONSERVER_Server implements Deallocator {
+      DeleteDeallocator(Pointer p) { super(p); }
+      @Override public void deallocate() { TRITONSERVER_ServerDelete(this); }
+    }
+  }
+
+  static void Usage(String msg)
+  {
+    if (msg != null) {
+      System.err.println(msg);
+    }
+
+    System.err.println(
+        "Usage: java " + ResnetTest.class.getSimpleName() + " [options]");
+    System.err.println(
+        "\t-m <\"system\"|\"pinned\"|gpu>"
+        + " Enforce the memory type for input and output tensors."
+        + " If not specified, inputs will be in system memory and outputs"
+        + " will be based on the model's preferred type.");
+    System.err.println("\t-v Enable verbose logging");
+    System.err.println("\t-r [model repository absolute path]");
+
+    System.exit(1);
+  }
+
+  static class ResponseAlloc extends TRITONSERVER_ResponseAllocatorAllocFn_t {
+    @Override
+    public TRITONSERVER_Error call(
+        TRITONSERVER_ResponseAllocator allocator, String tensor_name,
+        long byte_size, int preferred_memory_type,
+        long preferred_memory_type_id, Pointer userp, PointerPointer buffer,
+        PointerPointer buffer_userp, IntPointer actual_memory_type,
+        LongPointer actual_memory_type_id)
+    {
+      // Initially attempt to make the actual memory type and id that we
+      // allocate be the same as preferred memory type
+      actual_memory_type.put(0, preferred_memory_type);
+      actual_memory_type_id.put(0, preferred_memory_type_id);
+
+      // If 'byte_size' is zero just return 'buffer' == nullptr, we don't
+      // need to do any other book-keeping.
+      if (byte_size == 0) {
+        buffer.put(0, null);
+        buffer_userp.put(0, null);
+        System.out.println(
+            "allocated " + byte_size + " bytes for result tensor "
+            + tensor_name);
+      } else {
+        Pointer allocated_ptr = new Pointer();
+        if (enforce_memory_type) {
+          actual_memory_type.put(0, requested_memory_type);
+        }
+
+        actual_memory_type.put(0, TRITONSERVER_MEMORY_CPU);
+        allocated_ptr = Pointer.malloc(byte_size);
+
+        // Pass the tensor name with buffer_userp so we can show it when
+        // releasing the buffer.
+        if (!allocated_ptr.isNull()) {
+          buffer.put(0, allocated_ptr);
+          buffer_userp.put(0, Loader.newGlobalRef(tensor_name));
+          System.out.println(
+              "allocated " + byte_size + " bytes in "
+              + TRITONSERVER_MemoryTypeString(actual_memory_type.get())
+              + " for result tensor " + tensor_name);
+        }
+      }
+
+      return null; // Success
+    }
+  }
+
+  static class ResponseRelease
+      extends TRITONSERVER_ResponseAllocatorReleaseFn_t {
+    @Override
+    public TRITONSERVER_Error call(
+        TRITONSERVER_ResponseAllocator allocator, Pointer buffer,
+        Pointer buffer_userp, long byte_size, int memory_type,
+        long memory_type_id)
+    {
+      String name = null;
+      if (buffer_userp != null) {
+        name = (String) Loader.accessGlobalRef(buffer_userp);
+      } else {
+        name = "<unknown>";
+      }
+
+      Pointer.free(buffer);
+      Loader.deleteGlobalRef(buffer_userp);
+
+      return null; // Success
+    }
+  }
+
+  static class InferRequestComplete
+      extends TRITONSERVER_InferenceRequestReleaseFn_t {
+    @Override
+    public void call(
+        TRITONSERVER_InferenceRequest request, int flags, Pointer userp)
+    {
+      // We reuse the request so we don't delete it here.
+    }
+  }
+
+  static class InferResponseComplete
+      extends TRITONSERVER_InferenceResponseCompleteFn_t {
+    @Override
+    public void call(
+        TRITONSERVER_InferenceResponse response, int flags, Pointer userp)
+    {
+      if (response != null) {
+        // Send 'response' to the future.
+        futures.get(userp).complete(response);
+      }
+    }
+  }
+
+  static ConcurrentHashMap<
+      Pointer, CompletableFuture<TRITONSERVER_InferenceResponse>> futures =
+      new ConcurrentHashMap<>();
+  static ResponseAlloc responseAlloc = new ResponseAlloc();
+  static ResponseRelease responseRelease = new ResponseRelease();
+  static InferRequestComplete inferRequestComplete = new InferRequestComplete();
+  static InferResponseComplete inferResponseComplete =
+      new InferResponseComplete();
+
+  static void GenerateInputData(FloatPointer[] input_data)
+  {
+    // Input size is 3 * 224 * 224
+    input_data[0] = new FloatPointer(150528);
+    for (int i = 0; i < 150528; ++i) {
+      input_data[0].put(i, 1);
+    }
+  }
+
+  static boolean AreValidResults(
+      String model_name, FloatPointer output, FloatPointer expected_output)
+  {
+    int output_length = model_name.contains("tensorflow") ? 1001 : 1000;
+    for (int i = 0; i < output_length; ++i) {
+      float difference = output.get(i) - expected_output.get(i);
+      if (difference > ALLOWED_DELTA) {
+        System.out.println(
+            model_name + "inference failure: unexpected output "
+            + "in " + model_name + ", index " + i);
+
+        System.out.println(
+            "Value: " + output.get(i) + ", expected " + expected_output.get(i));
+
+        return false; // Failure
+      }
+    }
+    return true; // Success
+  }
+
+  static void Check(
+      String model_name, Backend backend,
+      TRITONSERVER_InferenceResponse response, Pointer input_data,
+      String output, int expected_datatype) throws Exception
+  {
+    HashMap<String, Pointer> output_data = new HashMap<>();
+
+    int[] output_count = {0};
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceResponseOutputCount(response, output_count),
+        "getting number of response outputs");
+    if (output_count[0] != 1) {
+      FAIL("expecting 1 response output, got " + output_count[0]);
+    }
+
+    for (int idx = 0; idx < output_count[0]; ++idx) {
+      BytePointer cname = new BytePointer((Pointer) null);
+      IntPointer datatype = new IntPointer(1);
+      LongPointer shape = new LongPointer((Pointer) null);
+      LongPointer dim_count = new LongPointer(1);
+      Pointer base = new Pointer();
+      SizeTPointer byte_size = new SizeTPointer(1);
+      IntPointer memory_type = new IntPointer(1);
+      LongPointer memory_type_id = new LongPointer(1);
+      Pointer userp = new Pointer();
+
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceResponseOutput(
+              response, idx, cname, datatype, shape, dim_count, base, byte_size,
+              memory_type, memory_type_id, userp),
+          "getting output info");
+
+      if (cname.isNull()) {
+        FAIL("unable to get output name");
+      }
+
+      String name = cname.getString();
+      if (!name.equals(output)) {
+        FAIL("unexpected output '" + name + "'");
+      }
+
+      int output_length = backend == backend.TF ? 1001 : 1000;
+
+      if ((dim_count.get() != 2) || (shape.get(0) != 1)
+          || shape.get(1) != output_length) {
+        FAIL("unexpected shape for '" + name + "'");
+      }
+
+      if (datatype.get() != expected_datatype) {
+        FAIL(
+            "unexpected datatype '"
+            + TRITONSERVER_DataTypeString(datatype.get()) + "' for '" + name
+            + "'");
+      }
+
+      if (enforce_memory_type && (memory_type.get() != requested_memory_type)) {
+        FAIL(
+            "unexpected memory type, expected to be allocated in "
+            + TRITONSERVER_MemoryTypeString(requested_memory_type) + ", got "
+            + TRITONSERVER_MemoryTypeString(memory_type.get()) + ", id "
+            + memory_type_id.get() + " for " + name);
+      }
+
+      // We make a copy of the data here... which we could avoid for
+      // performance reasons but ok for this simple example.
+      BytePointer odata = new BytePointer(byte_size.get());
+      output_data.put(name, odata);
+      odata.put(base.limit(byte_size.get()));
+    }
+
+    // Expected output for model
+    String file_name = "expected_output_data/expected_output_";
+    switch (backend) {
+      case ONNX:
+        file_name += "onnx";
+        break;
+      case TF:
+        file_name += "tensorflow";
+        break;
+      case TORCH:
+        file_name += "pytorch";
+        break;
+      default:
+        FAIL("Unsupported model type");
+        break;
+    }
+    file_name += ".txt";
+
+    int output_length = backend == backend.TF ? 1001 : 1000;
+    FloatPointer expected_output = new FloatPointer(output_length);
+
+    try (Scanner scanner = new Scanner(new File(file_name))) {
+      for (int i = 0; i < output_length; ++i) {
+        expected_output.put(i, scanner.nextFloat());
+      }
+    }
+
+    boolean correct_results = AreValidResults(
+        model_name, new FloatPointer(output_data.get(output)), expected_output);
+
+    if (correct_results) {
+      System.out.println(backend.name() + " test PASSED");
+    } else {
+      System.out.println(backend.name() + " test FAILED");
+    }
+  }
+
+  static void PerformInference(
+      TRITONSERVER_ServerDeleter server, String model_name) throws Exception
+  {
+    // Get type of model
+    Backend backend = Backend.NONE;
+    if (model_name.contains("onnx")) {
+      backend = Backend.ONNX;
+    } else if (model_name.contains("savedmodel")) {
+      backend = Backend.TF;
+    } else if (model_name.contains("torch")) {
+      backend = Backend.TORCH;
+    } else {
+      FAIL(
+          "Supported model types (Onnx, TensorFlow, Torch) "
+          + "cannot be inferred from model name " + model_name);
+    }
+
+    // Wait for the model to become available.
+    boolean[] is_ready = {false};
+    int health_iters = 0;
+    while (!is_ready[0]) {
+      FAIL_IF_ERR(
+          TRITONSERVER_ServerModelIsReady(server, model_name, 1, is_ready),
+          "unable to get model readiness");
+      if (!is_ready[0]) {
+        if (++health_iters >= 10) {
+          FAIL(model_name + " model failed to be ready in 10 iterations");
+        }
+        Thread.sleep(500);
+        continue;
+      }
+    }
+
+    // Create the allocator that will be used to allocate buffers for
+    // the result tensors.
+    TRITONSERVER_ResponseAllocator allocator =
+        new TRITONSERVER_ResponseAllocator(null);
+    FAIL_IF_ERR(
+        TRITONSERVER_ResponseAllocatorNew(
+            allocator, responseAlloc, responseRelease, null /* start_fn */),
+        "creating response allocator");
+
+    // Inference
+    TRITONSERVER_InferenceRequest irequest =
+        new TRITONSERVER_InferenceRequest(null);
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestNew(
+            irequest, server, model_name, -1 /* model_version */),
+        "creating inference request");
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestSetId(irequest, "my_request_id"),
+        "setting ID for the request");
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestSetReleaseCallback(
+            irequest, inferRequestComplete, null /* request_release_userp */),
+        "setting request release callback");
+
+
+    // Model inputs
+    String input = "";
+    String output = "";
+    long[] input_shape = {1, 224, 224, 3};
+
+    switch (backend) {
+      case ONNX:
+        input = "import/input:0";
+        output = "import/resnet_v1_50/predictions/Softmax:0";
+        break;
+      case TF:
+        input = "input";
+        output = "probabilities";
+        break;
+      case TORCH:
+        input = "INPUT__0";
+        input_shape[1] = 3;
+        input_shape[3] = 224;
+        output = "OUTPUT__0";
+        break;
+      default:
+        FAIL("Unsupported model type");
+        break;
+    }
+
+    int datatype = TRITONSERVER_TYPE_FP32;
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestAddInput(
+            irequest, input, datatype, input_shape, input_shape.length),
+        "setting input 0 meta-data for the request");
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output),
+        "requesting output 0 for the request");
+
+    // Create the data for the two input tensors. Initialize the first
+    // to unique values and the second to all ones.
+    BytePointer input_data;
+    FloatPointer[] p0 = {null};
+    GenerateInputData(p0);
+    input_data = p0[0].getPointer(BytePointer.class);
+    long input_size = input_data.limit();
+    Pointer input_base = input_data;
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestAppendInputData(
+            irequest, input, input_base, input_size, requested_memory_type,
+            0 /* memory_type_id */),
+        "assigning INPUT data");
+
+    // Perform inference...
+    {
+      CompletableFuture<TRITONSERVER_InferenceResponse> completed =
+          new CompletableFuture<>();
+      futures.put(irequest, completed);
+
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceRequestSetResponseCallback(
+              irequest, allocator, null /* response_allocator_userp */,
+              inferResponseComplete, irequest),
+          "setting response callback");
+
+      FAIL_IF_ERR(
+          TRITONSERVER_ServerInferAsync(server, irequest, null /* trace */),
+          "running inference");
+
+      // Wait for the inference to complete.
+      TRITONSERVER_InferenceResponse completed_response = completed.get();
+      futures.remove(irequest);
+
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceResponseError(completed_response),
+          "response status");
+
+      Check(
+          model_name, backend, completed_response, input_data, output,
+          datatype);
+
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceResponseDelete(completed_response),
+          "deleting inference response");
+    }
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestDelete(irequest),
+        "deleting inference request");
+
+    FAIL_IF_ERR(
+        TRITONSERVER_ResponseAllocatorDelete(allocator),
+        "deleting response allocator");
+  }
+
+  public static void main(String[] args) throws Exception
+  {
+    String model_repository_path = null;
+    int verbose_level = 0;
+
+    // Parse commandline...
+    for (int i = 0; i < args.length; i++) {
+      switch (args[i]) {
+        case "-m": {
+          enforce_memory_type = true;
+          i++;
+          if (args[i].equals("system")) {
+            requested_memory_type = TRITONSERVER_MEMORY_CPU;
+          } else if (args[i].equals("pinned")) {
+            requested_memory_type = TRITONSERVER_MEMORY_CPU_PINNED;
+          } else if (args[i].equals("gpu")) {
+            requested_memory_type = TRITONSERVER_MEMORY_GPU;
+          } else {
+            Usage(
+                "-m must be used to specify one of the following types:"
+                + " <\"system\"|\"pinned\"|gpu>");
+          }
+          break;
+        }
+        case "-r":
+          model_repository_path = args[++i];
+          break;
+        case "-v":
+          verbose_level = 1;
+          break;
+        case "-?":
+          Usage(null);
+          break;
+      }
+    }
+
+    if (model_repository_path == null) {
+      Usage("-r must be used to specify model repository path");
+    }
+    if (enforce_memory_type
+        && requested_memory_type != TRITONSERVER_MEMORY_CPU) {
+      Usage("-m can only be set to \"system\" without enabling GPU");
+    }
+
+    // Check API version.
+    int[] api_version_major = {0}, api_version_minor = {0};
+    FAIL_IF_ERR(
+        TRITONSERVER_ApiVersion(api_version_major, api_version_minor),
+        "getting Triton API version");
+    if ((TRITONSERVER_API_VERSION_MAJOR != api_version_major[0])
+        || (TRITONSERVER_API_VERSION_MINOR > api_version_minor[0])) {
+      FAIL("triton server API version mismatch");
+    }
+
+    // Create the server...
+    TRITONSERVER_ServerOptions server_options =
+        new TRITONSERVER_ServerOptions(null);
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerOptionsNew(server_options),
+        "creating server options");
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerOptionsSetModelRepositoryPath(
+            server_options, model_repository_path),
+        "setting model repository path");
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerOptionsSetLogVerbose(server_options, verbose_level),
+        "setting verbose logging level");
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerOptionsSetBackendDirectory(
+            server_options, "/opt/tritonserver/backends"),
+        "setting backend directory");
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerOptionsSetRepoAgentDirectory(
+            server_options, "/opt/tritonserver/repoagents"),
+        "setting repository agent directory");
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerOptionsSetStrictModelConfig(server_options, true),
+        "setting strict model configuration");
+    double min_compute_capability = TRITON_MIN_COMPUTE_CAPABILITY;
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability(
+            server_options, min_compute_capability),
+        "setting minimum supported CUDA compute capability");
+
+    TRITONSERVER_Server server_ptr = new TRITONSERVER_Server(null);
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerNew(server_ptr, server_options), "creating server");
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerOptionsDelete(server_options),
+        "deleting server options");
+
+    TRITONSERVER_ServerDeleter server =
+        new TRITONSERVER_ServerDeleter(server_ptr);
+
+    // Wait until the server is both live and ready.
+    int health_iters = 0;
+    while (true) {
+      boolean[] live = {false}, ready = {false};
+      FAIL_IF_ERR(
+          TRITONSERVER_ServerIsLive(server, live),
+          "unable to get server liveness");
+      FAIL_IF_ERR(
+          TRITONSERVER_ServerIsReady(server, ready),
+          "unable to get server readiness");
+      System.out.println(
+          "Server Health: live " + live[0] + ", ready " + ready[0]);
+      if (live[0] && ready[0]) {
+        break;
+      }
+
+      if (++health_iters >= 10) {
+        FAIL("failed to find healthy inference server");
+      }
+
+      Thread.sleep(500);
+    }
+
+    // Print status of the server.
+    {
+      TRITONSERVER_Message server_metadata_message =
+          new TRITONSERVER_Message(null);
+      FAIL_IF_ERR(
+          TRITONSERVER_ServerMetadata(server, server_metadata_message),
+          "unable to get server metadata message");
+      BytePointer buffer = new BytePointer((Pointer) null);
+      SizeTPointer byte_size = new SizeTPointer(1);
+      FAIL_IF_ERR(
+          TRITONSERVER_MessageSerializeToJson(
+              server_metadata_message, buffer, byte_size),
+          "unable to serialize server metadata message");
+
+      System.out.println("Server Status:");
+      System.out.println(buffer.limit(byte_size.get()).getString());
+
+      FAIL_IF_ERR(
+          TRITONSERVER_MessageDelete(server_metadata_message),
+          "deleting status metadata");
+    }
+
+    for (String model : MODELS) {
+      PerformInference(server, model);
+    }
+
+    System.exit(0);
+  }
+}
diff --git a/qa/L0_java_resnet/expected_output_data/expected_output_onnx.txt b/qa/L0_java_resnet/expected_output_data/expected_output_onnx.txt
new file mode 100644
index 0000000000..0d438e670e
--- /dev/null
+++ b/qa/L0_java_resnet/expected_output_data/expected_output_onnx.txt
@@ -0,0 +1,1000 @@
+0.00016980497
+0.0003274878
+5.9229897e-05
+0.00010386822
+5.1683604e-05
+0.0005200729
+9.252152e-06
+3.5043122e-05
+1.7310056e-05
+0.00014115982
+0.0007192011
+0.00014146192
+5.864904e-05
+8.102552e-05
+1.6766031e-05
+4.9913597e-05
+0.00012557638
+2.9249455e-05
+5.8186713e-05
+4.997704e-05
+0.00019478115
+0.001593597
+0.0009770031
+0.00022523475
+8.752639e-05
+0.00011251909
+0.00031572866
+0.00023567723
+0.00017536257
+0.00018674227
+4.333203e-05
+0.00033384693
+8.9560366e-05
+0.00011413613
+0.00028333033
+1.6440303e-05
+0.000121921854
+1.1142264e-05
+0.0059000477
+3.741594e-05
+4.867915e-05
+0.00020082401
+0.00023553567
+0.00016318199
+5.550063e-05
+0.00012654626
+4.0553005e-05
+0.00023072284
+3.575522e-05
+3.5885336e-05
+0.000167727
+0.0004284156
+0.00029606326
+0.0005308822
+0.00025003406
+2.4711164e-05
+5.9230402e-05
+1.4644651e-05
+0.00013812816
+0.0030018578
+0.0004657613
+3.8773556e-05
+0.00029646824
+0.00039393824
+0.0006814109
+0.00017464366
+0.000501648
+6.748e-05
+0.00021987612
+4.2551095e-05
+7.442098e-05
+0.00073552737
+6.145523e-05
+0.0019270201
+1.1406245e-05
+0.00033168247
+2.7609263e-05
+0.00055849075
+0.0018151653
+0.0012854141
+0.0005644851
+0.0002643019
+0.00012686521
+0.00031014124
+3.576934e-05
+1.5226503e-05
+0.00023373427
+0.00025264034
+9.125392e-05
+0.00010886967
+5.68172e-05
+0.00022797973
+0.0005024418
+0.00013592323
+0.00016360248
+4.724841e-05
+0.00016500028
+3.5815625e-05
+0.0009926121
+0.00018996779
+0.00032009778
+6.5463086e-05
+4.915879e-05
+0.0023545807
+0.00019779587
+9.740985e-06
+5.916514e-05
+8.342835e-05
+3.5502824e-05
+5.5155975e-05
+0.0002953913
+0.14522666
+0.00026150284
+0.0004633083
+0.00010001568
+7.724773e-05
+0.00020212827
+0.0003651837
+2.3665098e-05
+8.007319e-05
+0.000164059
+2.0919639e-05
+0.00015904989
+2.8181286e-05
+2.1252014e-05
+0.00016757358
+0.0026105049
+0.00016491314
+0.0033536772
+0.00045177306
+0.00015669028
+5.8178866e-05
+0.0005335324
+7.4700896e-05
+4.13347e-05
+0.00013332519
+0.00024123705
+0.00024420477
+0.00010249778
+0.00014476122
+0.00043495715
+0.00040990766
+0.00021976302
+0.00028396113
+0.00018292265
+0.0005685563
+0.0005261158
+0.0005394564
+0.0006722254
+0.00041848654
+0.0002058497
+0.00020697096
+0.00038915384
+0.00063642685
+3.952872e-05
+4.7074976e-05
+0.0001484932
+0.0001767462
+0.00018367334
+9.1362854e-05
+0.00020925087
+4.683706e-05
+8.098025e-05
+0.00038643452
+2.1166008e-05
+0.00023816078
+0.00040344987
+0.00014309994
+0.00016946216
+0.0001158025
+0.00015477811
+0.00013820603
+0.00039157816
+0.00012628519
+6.416812e-05
+5.319338e-05
+8.096635e-05
+9.268181e-05
+0.00021009038
+8.123741e-05
+2.1137266e-05
+0.00013860558
+9.888543e-06
+9.180427e-05
+5.726596e-05
+0.00024706984
+3.4873163e-05
+9.941785e-05
+0.0002506603
+0.00011764638
+0.00086345134
+0.00011305928
+3.6803817e-06
+8.0881466e-05
+0.00017012736
+0.0003054968
+5.9778555e-05
+1.0738367e-05
+3.9709514e-05
+7.807765e-05
+8.485133e-05
+1.4551556e-05
+5.0553328e-05
+0.0001432179
+0.00012594614
+4.33862e-05
+0.00016131556
+0.00012815706
+4.6910594e-05
+5.9233225e-05
+5.5869554e-05
+7.410936e-05
+9.937572e-05
+6.092812e-05
+6.309549e-05
+8.338313e-05
+0.00044575817
+5.111232e-05
+2.1025462e-05
+4.1145802e-05
+0.00019077354
+0.00019071896
+0.00025231927
+0.00019271992
+0.00013492932
+0.00010883319
+2.025502e-05
+0.0002089905
+9.62682e-05
+0.00012668235
+1.5566122e-05
+2.2314523e-05
+0.00017040399
+0.0001946466
+2.8189646e-05
+4.8383175e-05
+0.00013236424
+0.00016888845
+5.468688e-05
+0.00014190435
+8.5229825e-05
+5.173721e-05
+3.7611204e-05
+9.9274024e-05
+3.191364e-05
+6.1621664e-05
+0.00013842362
+6.9894915e-05
+9.658343e-05
+6.903254e-05
+0.0002400999
+0.00026015204
+0.000105622945
+0.0001664888
+0.00013265685
+1.5738156e-05
+0.0003335177
+0.00010971267
+0.0002484887
+0.00019186472
+8.8625755e-05
+6.912767e-05
+0.00045799493
+5.394646e-05
+0.00017973136
+8.907009e-05
+0.000110481764
+4.1266052e-05
+0.00013683847
+4.2938726e-05
+0.00012697978
+5.5856824e-05
+0.00014599289
+9.960172e-05
+0.00012956791
+0.00027035273
+0.00026089343
+0.00058428914
+9.604311e-05
+0.00030085753
+0.00013629998
+7.053258e-05
+0.00023789746
+0.00045626136
+0.00024321792
+0.00039255328
+9.378134e-05
+3.3330132e-05
+6.2762956e-05
+0.00010464993
+6.4440836e-05
+0.000114770344
+9.773856e-05
+0.00024476458
+0.00022140365
+8.682848e-05
+0.00014253015
+0.00041922313
+9.2946466e-05
+0.0007321677
+8.819961e-05
+0.00033927264
+0.0001434792
+0.0004997533
+5.05367e-05
+1.6199812e-05
+0.00081437116
+0.00029276052
+0.0003227374
+2.10321e-05
+0.00041501687
+7.6642566e-05
+0.0007460653
+0.00010704513
+0.0010337052
+0.00016585
+0.00010267203
+9.844521e-05
+0.00036912857
+0.0004210494
+0.0007636784
+8.831775e-06
+2.4511684e-05
+6.654908e-05
+3.845051e-05
+3.2900447e-05
+0.0002467062
+5.595124e-05
+0.00010915978
+1.5788999e-05
+0.00010652153
+0.0002424042
+0.0001448311
+1.1700289e-05
+3.8083996e-05
+9.013652e-05
+0.00016588188
+0.00014541998
+4.446017e-05
+5.857866e-05
+5.703819e-05
+6.140147e-05
+2.5429461e-05
+1.2527011e-05
+0.00029506863
+0.00017385624
+4.4041873e-05
+4.213424e-05
+7.223138e-05
+5.3147643e-05
+0.00028015298
+0.0005170326
+9.355127e-05
+0.00023953259
+0.00041169117
+6.737018e-05
+0.00097511435
+0.00044960703
+0.00041690134
+0.00036505918
+0.00035000656
+0.00020413095
+0.00014936135
+4.925268e-05
+4.6020858e-05
+0.0001434502
+3.7963135e-05
+0.00053391827
+3.7399033e-05
+0.000112552734
+8.935715e-05
+0.0008973427
+6.539161e-05
+0.00023165658
+0.0003438208
+6.735287e-05
+0.00016886953
+0.00042564265
+0.0001101864
+3.034124e-05
+0.000176773
+2.9307617e-05
+8.214749e-05
+7.6573786e-05
+0.00032455323
+0.00018222861
+3.7278707e-05
+0.00011895009
+6.777756e-05
+0.00040660411
+4.0756473e-05
+2.686724e-05
+0.0011102126
+1.7472128e-05
+3.215658e-05
+0.00019766577
+2.4107696e-05
+9.5941454e-05
+0.00013294643
+0.012934193
+0.0014889088
+0.00030110637
+0.0004861949
+0.00022020873
+0.0004120663
+0.0028884916
+2.075195e-05
+5.6945166e-05
+0.00010725547
+0.00061704434
+1.2163917e-05
+0.00013528275
+0.000321602
+0.0049974765
+0.00036395655
+7.939798e-06
+0.0027076406
+0.0009837962
+0.017314037
+0.00036551448
+0.00027795092
+0.00029623153
+0.00016959595
+0.00019360533
+3.4470788e-05
+6.317202e-05
+0.00028958637
+0.00052192796
+9.2430375e-05
+0.0010162767
+0.00010013961
+5.5248547e-05
+0.01881616
+0.000114972405
+0.00012866792
+0.0001735118
+9.917765e-05
+0.0011450195
+0.0015877285
+0.0017322781
+0.00056879356
+0.00025545148
+0.0007390253
+0.00012345372
+0.00022441847
+0.0001914855
+0.0026525552
+0.00044881727
+0.00034022957
+0.00028609563
+1.7402317e-05
+0.004177963
+5.312598e-05
+7.086197e-05
+1.07296755e-05
+0.0003122828
+0.0017724611
+0.0011016912
+2.7802036e-05
+0.00044330902
+2.7724009e-05
+0.00070999836
+0.0025074244
+0.00029760305
+0.0017468698
+0.0033079428
+0.00023698558
+1.8203225e-05
+4.298752e-05
+0.003792394
+0.0043774135
+0.0002578806
+6.7714565e-05
+0.010979906
+7.88773e-05
+0.00020034179
+3.9189625e-05
+7.9022284e-05
+0.00019010075
+0.00018935381
+0.000151655
+0.00063424406
+0.00010652139
+2.2907618e-05
+0.00021650721
+0.0006931013
+0.0016945377
+0.0018049135
+0.0016268345
+1.3866996e-05
+0.00023594845
+0.00077581
+0.00037083545
+0.0002482703
+0.00199448
+8.8194734e-05
+4.5612232e-05
+8.859373e-05
+5.174079e-05
+0.027618717
+7.469677e-05
+0.004212717
+0.00029109194
+0.0042880555
+0.0015089285
+0.0005760798
+0.0002101491
+0.0030193415
+0.0002710225
+1.43144425e-05
+0.0012474942
+7.6482655e-05
+0.012027938
+0.0013138817
+0.00024912177
+0.00039606096
+0.00017222571
+0.00077096495
+9.616005e-05
+0.00012808497
+0.00011093941
+0.0004788455
+0.00027597338
+0.0018378077
+0.00048597282
+2.693032e-05
+0.00015658996
+0.00045992344
+4.849936e-05
+0.00023919567
+0.0032133528
+0.0044528083
+0.00015469016
+8.7847635e-05
+0.0121315615
+0.00018360339
+2.8868575e-05
+7.337089e-05
+0.000533506
+0.0002060245
+0.001834617
+0.0014196439
+0.00109954
+0.0014719801
+0.00013069775
+0.000612675
+0.0007288255
+4.03345e-05
+6.908545e-05
+0.0045452276
+0.00020541927
+0.0022583636
+0.00011107671
+0.00054280076
+0.00014280484
+5.260433e-05
+0.0013882591
+0.0004975726
+0.004215462
+0.00118553
+6.8419955e-05
+7.3308154e-05
+7.351188e-05
+0.0012610124
+2.1918344e-05
+5.3881315e-05
+0.000348318
+0.0111174295
+0.0001844288
+0.00023055756
+0.00067666965
+2.1618225e-05
+0.00065558555
+0.00011886986
+0.004878329
+7.532305e-05
+0.00029515053
+0.0008771214
+0.00044318815
+0.00045352246
+2.219967e-05
+3.4630368e-05
+2.1955417e-05
+0.0082423575
+0.02084665
+0.008617819
+2.37336e-05
+0.0007988152
+0.00033299648
+0.00053600385
+0.00012942769
+0.00023972764
+0.00047354214
+0.0029637653
+0.00017331565
+5.8418576e-05
+0.0026522074
+0.00013416266
+0.00024219774
+0.0002707129
+0.0037202735
+0.0004878337
+0.0016466635
+3.0741547e-05
+0.00824405
+0.0016471919
+0.00048588854
+0.00041886864
+0.00038283042
+5.720226e-05
+0.0013508176
+0.00025465732
+6.677686e-05
+0.0031950285
+0.00022743837
+0.0012873787
+0.0019100192
+0.00016512939
+0.0066867983
+0.0025570705
+7.590332e-05
+0.0001290511
+0.0013077843
+0.009066646
+8.278893e-06
+0.00014440181
+0.008204297
+0.0006864818
+0.0008325608
+0.0047303867
+0.00063803
+0.00058498216
+0.007141755
+0.0025759342
+1.5265148e-05
+0.000791608
+0.0002963567
+6.699214e-05
+0.00015540588
+1.9577861e-05
+0.00019148094
+0.0050711925
+0.0003821164
+0.00031181856
+0.02256623
+9.6739546e-05
+0.00022743792
+0.0002277875
+0.00024204118
+9.2040355e-05
+0.006166843
+0.0004336779
+0.0001697661
+0.0033746548
+0.00019502817
+5.0561524e-05
+2.586181e-05
+0.0010798759
+3.664102e-05
+0.00013510302
+0.00016221526
+2.2405515e-05
+0.0014313295
+0.00017091136
+0.0023739443
+6.802837e-05
+0.00064769934
+0.00034750463
+0.00011071275
+1.7708879e-05
+0.00013680755
+2.4237579e-05
+0.0003371289
+0.0006825689
+0.0028515519
+0.00011692811
+0.00022007397
+0.02142835
+0.0017977277
+0.00035943018
+0.001095244
+0.00077389204
+0.0002297276
+0.025019487
+0.0019389915
+0.00033054518
+0.0114699
+5.516768e-05
+0.000209548
+0.00040630833
+2.0364629e-05
+0.00039122297
+0.0020364495
+0.0008940088
+6.6173154e-05
+0.00034862926
+0.0042634625
+2.3698478e-05
+5.9804384e-05
+0.0037845175
+0.00018579431
+0.0011340764
+0.0005943249
+0.00020876242
+0.0001095363
+1.866407e-05
+1.5485472e-05
+8.666633e-05
+0.0040748627
+6.6307715e-05
+0.00070469885
+0.0008672148
+0.0002835482
+0.0002781067
+0.0025088897
+0.0002623553
+0.0002617934
+4.9439703e-05
+0.00010924356
+0.00043568495
+0.002368831
+1.9224659e-05
+0.0015811798
+0.0006842592
+0.0002917136
+0.0003131275
+0.00060534995
+0.0001427105
+8.8764216e-05
+0.001122838
+7.210702e-05
+0.0041576345
+0.00011061608
+0.0007480099
+1.3065656e-05
+4.5712564e-05
+0.0007861731
+0.0003158539
+0.00015036995
+0.0003323501
+0.0012030656
+0.00019688989
+0.00016745002
+0.00024887823
+0.0034065044
+0.0023652983
+0.00031526107
+0.0066307853
+0.00017283301
+0.0022883036
+0.00017895563
+0.00018347587
+0.00035834042
+0.0008326437
+0.0017283945
+0.00035829068
+0.00029964442
+2.0670632e-05
+0.0008355308
+0.00048754443
+0.0017713069
+0.00191648
+1.9209521e-05
+0.005908878
+0.002205918
+0.00039330104
+0.00043703758
+0.0017654483
+0.00013185009
+0.00395082
+0.0001576185
+0.00038202494
+0.0038736896
+0.00041661857
+0.00012902985
+7.777089e-05
+0.0017715484
+0.0023155885
+0.00055541855
+4.9337166e-05
+0.00047428903
+0.00043557858
+0.00069765287
+0.009222093
+0.010263749
+6.7705434e-05
+5.966209e-05
+5.7554716e-05
+8.994978e-06
+0.0009418844
+0.00019504203
+0.000114773786
+0.0004218587
+0.00014428151
+8.655709e-05
+0.0008147674
+0.0008794013
+0.00014804432
+0.0027704514
+8.3283114e-05
+4.3073826e-05
+0.00018634342
+9.9652214e-05
+0.000109504916
+0.0067855045
+0.00015742471
+0.00077502604
+0.0006362351
+0.00046153838
+5.4576325e-05
+0.00017408792
+0.0012021991
+0.009413977
+0.022948345
+0.0010692423
+3.5031127e-05
+5.092194e-05
+6.2689374e-05
+0.0068375845
+0.00027439403
+2.1836517e-05
+0.0002581114
+0.00027914194
+0.00027809184
+3.0986383e-05
+3.457496e-05
+0.0046969666
+0.00046523788
+0.0021990726
+5.2927287e-05
+0.00029199888
+0.0006094933
+0.00014609241
+0.0005544162
+0.0021697562
+3.2796317e-05
+0.00084513065
+0.000516489
+0.0005635408
+6.230352e-05
+0.00054642366
+0.00013715419
+0.00013440092
+0.00011689427
+0.00056491833
+0.00064705784
+0.010491602
+3.0012101e-05
+0.0005605288
+0.0002985542
+6.826285e-05
+0.0013857664
+0.0032425607
+1.2750059e-05
+0.00404577
+0.0050039887
+0.0001610246
+0.0003332945
+0.00028637925
+0.0011893546
+0.00030820677
+0.0022603609
+0.0010670897
+0.00031939565
+4.9374088e-05
+9.66541e-06
+8.219991e-05
+0.00027665813
+6.6826746e-05
+0.0003693902
+7.4780626e-05
+0.00018097041
+0.0014217026
+0.00015682563
+5.9905306e-06
+0.0035234408
+0.0001482323
+0.00035662283
+4.427336e-05
+0.00025081105
+0.00036762984
+0.00013225578
+0.00017834459
+0.0041054576
+8.1886355e-05
+0.0006386442
+0.00016379755
+0.00014210392
+8.108431e-05
+0.0007447243
+8.90168e-05
+9.151607e-05
+0.0005884257
+0.0022961798
+0.00013226802
+0.00066101504
+0.00046616056
+0.00064051466
+0.003273349
+0.00048656296
+0.00022358973
+0.0043424554
+0.0039812205
+0.00028370952
+0.0008125159
+0.0004582208
+0.0012607021
+0.0009775694
+0.00010673987
+6.354423e-05
+0.0003419572
+0.00018932321
+3.1185988e-05
+0.00031975837
+0.00031104262
+7.3926254e-05
+0.0011545917
+8.575014e-05
+0.00023361114
+0.0006610472
+0.0004883716
+0.0003722783
+9.297524e-05
+0.00012120991
+3.4105407e-05
+0.00024642906
+0.000107494736
+6.998423e-05
+1.7957382e-05
+1.0631384e-05
+0.00018812768
+9.721867e-05
+5.1466308e-05
+2.9841798e-05
+5.317565e-05
+4.5402485e-05
+7.383276e-05
+5.9323876e-05
+0.00011473314
+2.5858333e-05
+0.0002425595
+4.3574375e-05
+0.00016768574
+0.00012793462
+7.1418945e-05
+0.00023895786
+0.00017441496
+2.3925382e-05
+0.0007274894
+0.00054904143
+0.0006600553
+0.0003689452
+0.00019176958
+8.68306e-05
+0.00018872788
+9.3901745e-06
+0.0003732282
+9.679007e-05
+5.338826e-05
+8.710209e-05
+0.00010672185
+4.1709736e-05
+7.757896e-05
+1.37239085e-05
+3.7243954e-05
+0.00015834477
+0.0005567674
+0.00032743503
+0.0011654142
+0.00081817544
+0.00024791955
+0.00015350303
+4.055702e-05
+1.2827285e-05
+0.00036997424
+6.42643e-05
+0.00015970865
+0.00030701264
+0.0005480433
+3.475775e-05
+0.0002730317
+0.00013267291
+2.429988e-05
+0.0001434095
+8.784407e-05
+0.00047590246
+3.644311e-05
+0.00023676634
+0.0002182384
+0.000118374526
+0.00029589442
+3.6611822e-05
+1.2448694e-05
+9.5065865e-05
+0.00013185348
+5.2593718e-05
+0.00011015442
+1.475699e-05
+0.00014547075
+0.0006541775
\ No newline at end of file
diff --git a/qa/L0_java_resnet/expected_output_data/expected_output_pytorch.txt b/qa/L0_java_resnet/expected_output_data/expected_output_pytorch.txt
new file mode 100644
index 0000000000..2ae00703da
--- /dev/null
+++ b/qa/L0_java_resnet/expected_output_data/expected_output_pytorch.txt
@@ -0,0 +1,1000 @@
+-0.30805874
+0.07984302
+-1.1900374
+-1.4836702
+-0.5135901
+0.36827153
+-2.1639166
+-0.8705013
+-1.8812447
+-0.16076666
+0.21684004
+-0.928281
+-1.2953714
+-1.0791287
+-1.444455
+-0.89458805
+-0.09590192
+-1.3098954
+-1.2062448
+-1.2327268
+-1.0658404
+0.9427469
+0.5738615
+-0.27459937
+-1.0188934
+-0.35831845
+-0.18257675
+0.27853626
+0.22089688
+-0.3340493
+-1.979969
+-0.555245
+-1.0804464
+-0.8055694
+-0.0004951467
+-1.8401799
+-0.79792225
+-1.4822828
+1.3656672
+-0.89703584
+-1.0853906
+-1.1591249
+-0.032266144
+0.19187923
+-0.4777367
+0.031621072
+-0.7464974
+-0.10246294
+-1.3072289
+-1.8479855
+-0.86044043
+0.8683053
+-0.13818197
+-0.5942293
+-1.0837044
+-1.5115174
+-1.4216323
+-1.7622145
+-1.3229938
+0.3092505
+-0.91198456
+-1.2568892
+-0.42140645
+0.7647873
+0.096434265
+-0.2201274
+0.20995392
+-1.071132
+0.14306861
+0.7973344
+-0.8894367
+1.6341836
+-0.98152703
+1.1916499
+-1.625073
+0.2928239
+-0.8159483
+-0.19991271
+1.6001159
+1.1522979
+0.5397157
+-0.21569327
+-0.5722878
+-0.2540483
+-1.3144569
+-1.3187109
+-0.6919892
+0.06748002
+-0.16136988
+-0.16745704
+-1.043228
+-0.07053011
+0.6526221
+-0.6888746
+-1.0834798
+-0.76091695
+-0.69209605
+-2.3364725
+0.20736966
+-0.21594861
+-0.5073983
+-0.18135151
+-0.85716504
+0.7947216
+-1.5203276
+-2.1758971
+-1.1328814
+-0.13168834
+-1.00645
+-1.0352936
+-0.7703913
+2.5937598
+-0.18291345
+0.037092943
+-0.8275598
+-1.6695257
+-0.007664892
+1.1827207
+-1.3609017
+-1.6130087
+-0.34498727
+-2.0094082
+-0.3217112
+-1.40436
+-1.0353576
+-0.5387643
+1.3731303
+0.17038514
+1.3134736
+-0.6706346
+-1.2812335
+-1.2500542
+-0.8758088
+-1.2494946
+-1.1121799
+-0.43794972
+-1.3552142
+-0.85109013
+-0.806748
+-1.3894855
+-0.93128216
+-0.5771268
+-0.8600849
+-0.6528389
+-0.96694344
+-0.2790189
+-0.13756554
+0.33111212
+-1.017053
+-0.06247963
+-0.82307434
+0.2321171
+0.5925774
+0.11956272
+-0.39129296
+-0.96967256
+-0.34883505
+-0.32861945
+-0.17424661
+-0.5203654
+-0.05074156
+-0.5735833
+-0.89118445
+0.94264233
+-0.48076403
+0.23871332
+-0.5359333
+-0.17496297
+-0.1825326
+-0.8143634
+-0.25432184
+-0.8875172
+-0.40212584
+-0.4248538
+-1.0707774
+0.28054383
+-0.8788248
+-0.063131236
+-0.13580973
+-0.633922
+-1.0408156
+-0.2155596
+-0.868021
+0.02111919
+-0.8062073
+0.21586944
+-0.84782946
+0.36418468
+0.23975046
+0.07298894
+0.8168585
+-0.37726068
+-0.8602677
+-0.21154118
+-0.06361114
+0.39261663
+-1.0140715
+-1.0971476
+-0.94316417
+-0.12982899
+-0.7508501
+-1.874781
+-0.21622303
+-0.7669267
+-0.42140815
+-1.5047493
+-0.6215693
+-0.2612905
+-0.35666725
+-1.0537395
+-0.38551807
+-0.6064094
+-1.2473556
+-1.0768366
+-0.3829122
+-0.85829455
+0.25932565
+-0.9240785
+-1.4660195
+-1.1539187
+0.5768459
+-0.21287401
+-0.4301784
+-0.27853447
+-0.5630739
+-0.88488144
+-0.6149986
+-1.2260586
+-0.118166335
+-0.30751112
+-1.3458123
+-0.787824
+-0.4979396
+-0.07821896
+-0.47691333
+0.21768509
+-0.28501546
+-0.39360434
+-0.99358493
+-0.44038853
+-1.1004056
+-0.36356282
+-1.4787167
+-0.6785121
+-1.0707904
+-0.60454124
+0.0018921697
+-0.60659164
+-0.7804347
+-0.70279366
+-0.45327887
+-0.5740117
+0.12954347
+-1.0870117
+-0.071922086
+-1.5970279
+-0.12967396
+-0.41402286
+-0.34608856
+-0.45053896
+-0.050228007
+-0.036393084
+0.64593357
+-0.91866577
+-0.79366595
+-0.60279816
+-0.55361813
+-0.9942526
+-0.30023605
+-1.0588075
+-0.1602141
+-1.2761784
+-0.80111355
+-0.7847453
+-0.4366057
+-0.29868704
+-0.11143246
+0.24950753
+-1.0829991
+-0.235288
+-0.56935483
+0.8004865
+-0.15923998
+1.5074099
+0.15986127
+0.42949948
+-1.5360352
+-1.3022994
+-0.621235
+-1.2557826
+-1.6063809
+-0.39241713
+-0.8660014
+0.43634364
+-0.7142573
+-1.8392187
+-0.66524017
+-0.4094579
+-0.55560684
+0.26369932
+-0.2994155
+0.19446117
+0.00012531597
+-0.056575328
+-1.0310686
+-1.1073819
+0.95716393
+-0.039132416
+-0.17284413
+-1.7137713
+1.0318145
+-0.6407014
+-0.20157519
+-0.53714764
+1.3076634
+-0.21518743
+-0.10755904
+-0.6703936
+0.58359814
+0.1296847
+-0.74383837
+-2.052296
+-1.943493
+-1.2419901
+-1.5791146
+-1.7323232
+-0.4647262
+-0.8547239
+-0.5982981
+-1.3872371
+-0.8413639
+0.5059893
+-0.028888466
+-1.0159539
+-0.8781407
+-0.8586551
+-1.5765216
+-0.72110957
+-0.54951406
+-1.0456697
+-0.46384534
+-0.8682762
+-1.329279
+-1.5812683
+-1.1616806
+-0.5591132
+-0.68271846
+-0.6140093
+-0.8487391
+-0.8138591
+0.5194415
+0.8475472
+-1.2317592
+-0.06508279
+0.84332556
+-0.7534412
+-0.061359435
+-0.17108928
+0.029114015
+0.29252198
+-0.99659246
+0.18716425
+-0.48432857
+-0.574279
+-0.149806
+-0.526539
+-1.6839328
+0.298726
+-1.12589
+-1.2416302
+-1.0083416
+-0.1886835
+-1.2171522
+-0.11976431
+-0.2596951
+-1.1662437
+-0.019736286
+-0.4496138
+-0.12932746
+-1.9007655
+-1.2868488
+-1.1776145
+-0.70207584
+-0.99402976
+-1.5353495
+-0.08161428
+-0.7827241
+-0.9851597
+-1.7212214
+0.30599424
+-1.3255223
+-0.78677404
+0.020959575
+-1.938007
+-0.87134534
+-0.4159284
+-1.7782842
+-1.1730373
+0.08866749
+2.0492961
+1.0762362
+-0.007216261
+0.97626513
+-0.74596655
+0.4418997
+1.1642963
+-1.1256992
+-0.95673156
+-0.64594465
+1.9042864
+-2.266134
+0.23068756
+-0.2024498
+2.4514055
+0.17042859
+-2.1488516
+2.029247
+0.08730792
+2.4830267
+1.5408521
+0.22265166
+0.059148587
+0.47132158
+1.0918839
+-0.8906889
+-0.35221744
+1.1023836
+0.71585846
+-0.080300555
+0.7830516
+-0.1106352
+0.3845232
+1.8773831
+0.25839618
+0.01753266
+0.4585675
+-0.556912
+0.18327093
+1.1123434
+0.54181975
+0.2650874
+-0.6121097
+0.6899404
+0.5327037
+-0.4582712
+-0.3463424
+2.0124485
+0.92011964
+-0.21810834
+-0.7299846
+-0.9932826
+2.0011783
+-0.9193375
+0.07095367
+-2.3654485
+-0.13455993
+1.2373506
+0.55914795
+-0.7927128
+0.24315542
+-0.6954934
+0.818556
+2.2227504
+0.50524724
+1.5352136
+2.1754854
+-1.2847167
+-1.7190467
+-1.2820773
+0.48650953
+0.9624859
+-0.28632626
+-1.7782934
+3.3267088
+-0.80292964
+-0.82254416
+-2.4419034
+1.1831589
+0.27238667
+-0.08926326
+0.114699185
+2.2780476
+1.2212758
+-1.5160606
+-0.7004898
+0.46838894
+0.61680245
+1.7088135
+1.709012
+-1.2258106
+0.67940307
+1.9137111
+-0.13307501
+0.8966815
+1.0661377
+-0.077985905
+0.294199
+-1.1051399
+-0.61139315
+3.6302567
+-0.82702637
+0.40620643
+0.898003
+2.2812579
+0.42015857
+0.41871074
+-0.5433154
+2.1934881
+0.44938952
+-2.4096403
+0.3080853
+-0.75909114
+2.749651
+1.273376
+0.88220817
+0.46447915
+0.84428304
+0.5331683
+0.41311303
+0.3472368
+0.42634374
+0.5020205
+1.133693
+0.6315067
+0.49277782
+-1.1333336
+0.5877674
+1.6507065
+0.6192476
+0.6534441
+1.9449492
+0.80630463
+0.57669324
+-0.67982227
+1.7395757
+-0.028182037
+-0.9472996
+-0.8416842
+-0.12939622
+1.2086351
+0.57445955
+0.7767944
+1.280486
+1.2262709
+-0.8028702
+1.0569873
+0.94939137
+-1.4751376
+-0.19903125
+2.3615687
+1.1166264
+2.325268
+0.8368003
+1.1348325
+-0.81748235
+-0.94805723
+1.3997422
+0.48129374
+0.87885517
+1.8402383
+-0.7471128
+0.063835524
+-1.1082904
+0.8763111
+-1.1521848
+-1.3750111
+-0.17355038
+2.084852
+0.0059059784
+-0.9651331
+1.4963127
+-1.2178527
+0.85985076
+-0.04743771
+1.2991531
+-1.2023815
+-0.538383
+1.2776058
+0.44704303
+-0.09368593
+-1.4124348
+-1.66763
+-1.382003
+2.56167
+2.7520278
+1.7802238
+0.20748135
+2.201629
+1.4195694
+1.0006833
+1.2050105
+0.7915465
+0.80263686
+1.54673
+0.29449403
+-0.18094113
+2.7645786
+0.08308226
+0.32472438
+0.41058362
+2.673242
+1.3079755
+0.78823054
+-1.2491844
+2.6995187
+0.3947289
+1.5972215
+-0.2016275
+0.667046
+-1.0026234
+1.5369157
+-0.21158755
+-0.5587798
+1.8455683
+0.18770997
+1.7668104
+1.3544986
+0.5668934
+1.6499695
+0.79549676
+0.23864032
+-0.076060526
+0.54530853
+3.0026731
+-1.3816507
+-0.9419994
+2.1659389
+-0.49469137
+-0.23300627
+2.2649322
+0.6988553
+1.7207134
+1.4296931
+1.8957422
+-1.7843419
+2.108782
+0.63150716
+-1.2306048
+0.4726084
+0.16148792
+-1.1888111
+2.5059545
+0.49573082
+1.0300703
+2.1389406
+-0.6599807
+-0.037568122
+0.94101214
+-0.2563992
+0.37840766
+2.115041
+0.7366525
+0.3634316
+0.93945736
+-0.4147591
+-0.38213915
+-1.2784125
+-0.08756078
+-0.9641913
+0.19105943
+-0.3143284
+-1.6625874
+1.6527823
+-0.5382227
+0.3207345
+-0.595412
+1.5850205
+0.8305495
+-0.8234362
+-0.8500601
+-0.7534717
+-0.9616986
+0.4730339
+1.5510118
+2.668524
+-0.60776836
+1.7700179
+2.7614388
+1.3252912
+0.59501547
+2.1923153
+1.6112024
+-0.40866897
+1.8549836
+2.2821114
+-0.77804285
+1.6713705
+-1.6944448
+0.17435041
+-0.2616872
+-1.3363857
+0.6129463
+0.86893713
+0.6393853
+-1.234884
+1.1132063
+2.0555096
+0.022984732
+-1.0277154
+2.4854038
+1.451681
+1.6226276
+0.67418146
+-0.85724473
+-0.7612631
+-1.2767704
+-1.0986053
+-0.21717405
+1.6196754
+0.6333269
+1.2900922
+1.2161998
+0.36294502
+1.5778857
+1.6918045
+0.99078727
+-0.45147473
+-1.2807459
+0.045685403
+1.0520277
+1.9152287
+-1.3029758
+0.9261474
+0.7156784
+-0.19225252
+0.55643463
+2.0766673
+-0.18557347
+0.13493066
+1.802568
+0.23648183
+2.766143
+0.2725357
+1.0387229
+-1.9429945
+0.23742795
+0.54052275
+0.2342531
+0.132205
+0.82999367
+1.7976496
+0.49230877
+0.7958189
+-0.37094918
+1.110652
+0.6413396
+1.1133307
+1.7305324
+0.37832874
+2.2200847
+-0.36919576
+-0.9609986
+0.19756792
+1.3253196
+1.8076504
+0.103227235
+-0.42585406
+-1.348184
+1.8132821
+1.2306423
+1.1028852
+1.9165587
+-2.4476745
+2.054153
+1.682224
+0.44401717
+0.19734457
+1.5318341
+-0.47473955
+2.3914623
+0.42040017
+0.6056829
+2.4316716
+0.34631512
+1.3324567
+0.0011816069
+1.1105287
+1.4553503
+1.7634965
+-0.6814372
+0.2123078
+0.16176923
+1.0453559
+2.9997826
+2.2626696
+-0.76536435
+-0.42744967
+0.14685751
+-2.1144905
+0.90889215
+1.048776
+-0.1111255
+1.91633
+0.45815408
+0.054494135
+0.420825
+0.21111344
+1.0745884
+1.3172199
+-0.20259683
+-0.77705085
+-0.0074540502
+-0.3671591
+-0.33085522
+1.9708865
+-0.57260597
+0.46406755
+-0.46640325
+-0.46216512
+-0.59125966
+0.87914044
+0.7298775
+1.101785
+3.035671
+-0.35254276
+-0.86594146
+-0.80589545
+-0.7337217
+1.8224323
+-1.2016355
+-0.72215164
+-0.47425175
+0.3528979
+1.0273298
+-0.036939412
+0.2297522
+2.528665
+0.3788014
+1.9056299
+-1.8528597
+1.3645221
+1.9897952
+-0.32049844
+0.20599015
+1.1722815
+-0.74404633
+1.4928225
+0.8872909
+1.4359131
+-0.72126484
+1.1888711
+1.0988497
+0.34612125
+-1.1861738
+2.339421
+1.8755157
+2.8820977
+-0.5806484
+0.39929
+-0.2774235
+-0.27243808
+1.1287675
+1.7444426
+-0.59589016
+1.1558293
+1.2643657
+-0.024029814
+0.23252903
+-0.2631906
+0.82813776
+-0.01724714
+1.3382394
+0.8137164
+0.0848312
+-0.667315
+-2.0700092
+-0.4838388
+-0.51320595
+0.0037372224
+1.3113365
+-0.22582024
+-0.48156402
+1.6307961
+0.09801248
+-2.1774163
+0.64898616
+0.19490883
+0.1979113
+0.5982482
+0.08691002
+0.46526366
+0.80410117
+0.7230205
+1.8608608
+-0.79288054
+1.1912636
+-0.38980532
+-0.44946012
+-0.18038842
+0.37972292
+-1.0056939
+1.2174432
+2.4348667
+0.66281396
+1.1692165
+0.5451535
+1.162487
+1.303657
+1.1611288
+1.4010147
+0.04817031
+1.7428269
+3.0368202
+0.8766508
+-0.26485524
+0.26849088
+1.869811
+0.48758182
+-0.6030314
+0.14393385
+0.57609755
+0.5643001
+-1.2467934
+0.17159785
+-0.56257993
+-1.4873617
+2.5040245
+-0.57016486
+-0.56566435
+0.13093448
+0.35735604
+0.5589198
+-0.28002298
+-0.20552874
+-1.1545538
+-0.12005596
+-0.63608867
+-0.5422438
+-1.5786606
+-0.08732763
+0.26583073
+-0.48822308
+-0.61887413
+-2.0053678
+-0.8047017
+-0.78162575
+-0.06668275
+0.49894157
+0.15497255
+-0.7863977
+0.6278491
+-0.9034021
+0.19300902
+0.026619527
+-0.3625757
+0.51064104
+-0.40118733
+-1.2872294
+1.4680091
+1.5331635
+-0.0104825385
+0.7074813
+0.47988775
+-0.15154226
+0.9793232
+-0.8414473
+0.6749984
+0.3124825
+0.027812386
+-0.59152645
+-0.05568023
+-0.7404828
+-0.5500867
+-1.7206669
+-0.7042971
+-1.0925202
+1.581233
+-0.121507704
+0.8914928
+0.9794418
+-1.1422362
+-0.12346666
+-0.5999273
+-2.1338222
+-0.077511735
+-0.8373626
+-0.23501818
+-0.010404997
+-0.041594535
+-1.0295677
+-0.29143637
+-0.22416036
+-0.8062624
+-0.7818173
+-0.2714035
+0.00018124096
+-1.2354704
+0.123760514
+0.018292539
+-0.6903522
+0.52160364
+-1.8007841
+-1.782615
+-1.2970004
+-1.6565065
+-1.3305808
+-0.6563534
+-1.6530751
+0.117775925
+0.24357137
\ No newline at end of file
diff --git a/qa/L0_java_resnet/expected_output_data/expected_output_tensorflow.txt b/qa/L0_java_resnet/expected_output_data/expected_output_tensorflow.txt
new file mode 100644
index 0000000000..d017d7d60b
--- /dev/null
+++ b/qa/L0_java_resnet/expected_output_data/expected_output_tensorflow.txt
@@ -0,0 +1,1001 @@
+0.00070911006
+0.0010684511
+0.0002289149
+0.0002890797
+0.001823506
+0.00033588437
+0.0005761559
+0.00026887475
+0.00016327911
+0.00062107155
+0.00035215134
+0.00021309333
+0.0002824714
+0.00032690517
+0.000362966
+0.00029754156
+0.000462734
+0.0009069857
+0.00024187386
+0.00022825644
+0.0005646942
+0.0005685028
+0.0015051479
+0.000550871
+0.00035833745
+0.0007460652
+0.00018980923
+0.0006296634
+0.0009744452
+0.0004044121
+0.00021716364
+0.003566736
+0.00033353135
+0.00038591775
+0.0012752721
+0.00010569831
+0.0002329158
+7.213156e-05
+0.0042858184
+0.0008237876
+0.0010394026
+0.00012532603
+0.00022559383
+0.00018184909
+0.00024319398
+0.0005497621
+0.0010193866
+0.0012020781
+0.0002604365
+0.00036887883
+0.00039009948
+0.0005622609
+0.0005074424
+0.00065419363
+0.0001678674
+0.0007651498
+0.00019579448
+0.000100849866
+0.00060587144
+0.009335775
+0.002238217
+0.00042261003
+0.0004869275
+0.0017416928
+0.00050716975
+0.0003331386
+0.0009492363
+0.00026299703
+0.00096314494
+0.0002454126
+0.00052854954
+0.0022881972
+9.451885e-05
+0.0020056511
+0.00017017504
+0.00013614705
+0.00031952796
+0.0006581821
+0.00086781656
+0.0010920991
+0.00016639908
+0.00029970525
+0.00036486977
+8.347438e-05
+0.00027294483
+0.00027506787
+0.00014957263
+0.00012473388
+0.00047103016
+0.00068512204
+0.00026231256
+0.0002471854
+0.00038985602
+0.0005510145
+0.0015379117
+6.391459e-05
+0.00075941073
+0.00021282899
+0.00016255074
+0.0006057964
+0.00034061813
+0.000116008356
+0.00013254896
+0.00072937884
+0.00025322058
+0.00013424494
+0.000116978124
+0.0012361282
+0.0008600386
+0.00016587735
+0.0008544744
+0.048096422
+0.0014968353
+0.0025525852
+0.0003895892
+0.0004779505
+0.0010679476
+0.001583124
+6.0117403e-05
+0.00023506799
+0.00080862094
+9.170748e-05
+0.0003459704
+0.00025960154
+0.00032231968
+0.00024193742
+0.0005336417
+0.0003331181
+0.00083348254
+0.0005098401
+0.00050219166
+0.0001382532
+0.0013238905
+0.00024549733
+0.00018288675
+0.00032616325
+0.00016282972
+0.00012510039
+0.00037040826
+0.00023140096
+0.00033143876
+0.00035791306
+0.00014701433
+0.00016613651
+0.0007882612
+0.00020208673
+0.00025587558
+0.0005947067
+0.00020411932
+0.0003501464
+0.00019414612
+0.00036807868
+0.00016704168
+0.00031899076
+0.00014016406
+0.0001590781
+0.0001042989
+6.693639e-05
+0.00044032355
+0.00019823047
+0.0001648452
+0.00021023075
+0.000121602214
+0.0008859733
+0.00027336556
+0.00021329267
+0.00042354263
+0.00015121586
+0.000366059
+0.00013732535
+0.00029352968
+0.00021702389
+0.00028322692
+0.00041577345
+0.00022989941
+0.00022801253
+0.00016557571
+0.00020442168
+0.00084447116
+0.00024891275
+0.0002122566
+0.00030452234
+7.565878e-05
+0.00012686373
+0.00019746723
+0.00032517608
+0.00019016817
+0.00029626995
+0.00016989792
+0.00049037643
+0.00020838893
+0.00019873244
+9.189098e-05
+0.0006875357
+0.00064732507
+0.00034183732
+0.00015014365
+0.00011403188
+0.000537032
+0.0003341667
+0.00029259248
+0.00038738886
+0.00012182328
+0.00051590457
+0.00033943634
+9.197326e-05
+0.00039432684
+0.00014883016
+0.00045966395
+0.00023865228
+7.6960285e-05
+0.00014399357
+0.0003608486
+0.00025755627
+0.00020178013
+0.0003600289
+0.0011284449
+0.0001712409
+0.00019862416
+0.00025335004
+0.0001756047
+0.00034503645
+0.00039285867
+0.00017203313
+0.0012871717
+0.00030436684
+0.00024817986
+0.0010085882
+0.00027581956
+0.00028622823
+0.0002573273
+0.00038505017
+0.00039457608
+0.0002494052
+0.00018972508
+0.0003315194
+0.00022963689
+7.301075e-05
+0.00023747115
+0.00032635694
+0.00021661345
+0.00034653372
+0.00018944537
+0.0002243273
+0.0003466119
+7.474429e-05
+0.00029931893
+0.00026417332
+0.000116994954
+0.0002012358
+0.0004963594
+0.00027601913
+0.00023313782
+0.00021496546
+0.00033204685
+0.00038143748
+0.00010215905
+0.00022710346
+0.0004710895
+0.00010912214
+0.00067364104
+0.0002553266
+0.00024328758
+0.00018621673
+0.00024005111
+3.6619393e-05
+0.00031510097
+0.00025127587
+0.00020713067
+0.00053867674
+0.0004486591
+0.00012326887
+0.00013776327
+0.00010066613
+0.0001907201
+0.00019176993
+0.00028617049
+0.00043150192
+0.00022882965
+0.00017046132
+0.0001404705
+0.0003074807
+0.00069475063
+0.0005420082
+0.00016548761
+0.0011550415
+0.0003579725
+0.00013039725
+0.00046354206
+0.00025531164
+0.00015127688
+0.0003076982
+9.368715e-05
+0.000253574
+0.0004157336
+0.00025558594
+0.00020862755
+0.0003325044
+0.00010430214
+0.0005750662
+0.00034912725
+0.0003502339
+0.00013765084
+0.0011814896
+0.0007353515
+0.0004288803
+0.0010895525
+0.0021925315
+0.0010849636
+0.0002088477
+0.000698407
+0.0005413023
+0.0025422976
+0.00050733547
+0.00056180026
+0.0032103728
+0.00023816712
+0.0017631998
+0.003166806
+0.00075065246
+0.00043124682
+0.00020120693
+0.00030978755
+0.00040472345
+0.0010322309
+0.0002756523
+0.0007263063
+0.00038796544
+0.0014804546
+0.00025164674
+0.00021415394
+0.00015569745
+0.00047274903
+0.00026750995
+9.396422e-05
+0.0003232726
+0.0003681733
+0.00017011825
+0.00037481345
+0.000110637375
+0.00027844915
+0.00027941877
+0.00028294954
+0.000107615866
+0.00013299155
+0.00025102712
+0.0003521134
+0.00018762982
+0.0005306597
+0.00027527596
+0.0001893789
+0.0006203038
+0.0002596028
+8.3349165e-05
+0.000421517
+0.00033665064
+0.00045308896
+0.000110814566
+0.00016861226
+0.0006383047
+0.00020831541
+0.00014839825
+0.00029492003
+0.00019427085
+0.00045692816
+0.00020844795
+0.00019500752
+0.00040315292
+8.695124e-05
+0.00013987756
+0.00012228725
+0.00056897226
+0.00020290921
+0.0002687522
+0.00023272065
+0.00015077695
+0.0004568092
+0.00052215316
+0.00027182538
+0.00020620856
+0.0010283174
+8.266399e-05
+0.00021341672
+0.00019470627
+0.0004475956
+0.00043766637
+0.00018623582
+0.00022168642
+0.00027278156
+0.00027336203
+0.00034579786
+9.910105e-05
+0.00036059332
+0.0005613833
+0.00021642471
+0.00061176467
+0.00032723378
+0.0007215444
+0.00042581535
+0.006056687
+0.00015225813
+0.0038606655
+0.0033682694
+0.0005007813
+0.00034089078
+0.001088126
+0.0003091816
+0.00025670388
+0.00028364526
+0.0039907284
+8.005619e-05
+0.0003177985
+0.00044217892
+0.003775855
+0.00022793307
+9.9455334e-05
+0.0042361487
+0.0015110963
+0.0014649354
+0.00076693745
+0.00014660056
+0.0008259513
+0.00014898773
+0.00094022823
+0.0018079237
+0.00027478446
+0.0008579107
+0.00027007243
+0.00027866405
+0.0021426745
+0.00030444653
+0.00013589527
+0.0025529363
+0.00022925495
+0.00020205135
+0.0006399196
+0.0001175159
+0.0008898152
+0.0007308672
+0.00015426724
+0.00070449885
+0.00063714065
+0.0011764771
+0.000113688315
+0.00025997663
+0.0002751466
+0.0012629845
+0.00061876763
+0.00047713597
+0.00018022317
+0.000112102745
+0.0019180076
+0.00014341537
+0.00038212672
+0.00023863306
+0.00014654716
+0.0009910533
+0.00046345408
+0.0006146838
+0.0022888945
+0.00014176867
+0.0009656023
+0.0007254071
+0.0003110353
+0.00075938756
+0.0017488213
+0.00026165575
+0.00043671884
+0.00025007708
+0.0010205496
+0.0072930735
+0.00079188804
+0.00014444374
+0.0022240074
+9.0894464e-05
+0.0005548176
+0.00036375815
+0.00045969928
+0.00049831875
+0.0006171517
+0.0005445464
+0.0005370632
+0.0009902638
+0.0005372154
+0.00047807358
+0.0018499892
+0.00092412543
+0.006397552
+0.0046642385
+0.00015648386
+0.0003896425
+0.0050082384
+0.0003178785
+0.00040727912
+0.0012690715
+0.00029073926
+0.00041457833
+0.0022713607
+0.0026651558
+0.0043892586
+0.0002917294
+0.0015015705
+0.0002936945
+0.0011139546
+0.0022272936
+0.0006511537
+0.0008047797
+0.0006209673
+0.0012966822
+0.000117934265
+0.0003287383
+0.00011685335
+0.00408869
+0.0020391766
+0.0005868179
+0.00081892085
+0.0008156648
+0.0029200844
+0.0005166022
+0.0005672602
+0.0001692095
+0.0003508818
+0.00013667026
+0.0019258707
+0.0002646609
+0.000203857
+0.0004557036
+0.0014699163
+0.00075061055
+0.00027520838
+0.024521487
+0.0023796572
+0.00031702282
+0.00016261516
+0.0030187324
+0.0001344725
+0.00052194105
+0.00040833795
+0.00073826866
+0.0013697975
+0.00053330604
+0.00047440815
+0.002975715
+0.0034163008
+0.00039923526
+0.0003814433
+0.0033519045
+0.00018409718
+0.00015521496
+0.0012500272
+0.000352364
+0.0026179687
+0.0009001603
+0.0007409122
+0.00020439974
+0.0001491525
+0.00057623035
+0.00053739885
+0.001619859
+0.0007606476
+0.00043201642
+0.00048651415
+0.001913164
+0.001860702
+7.184188e-05
+0.0003567602
+0.00047219777
+0.0013026253
+0.0005371198
+0.0003526595
+0.0010887473
+0.00021295802
+0.0015989357
+0.00016607663
+0.002568109
+0.0002009078
+0.00010516546
+0.00071283046
+0.0003078614
+0.0021156948
+0.00077290024
+0.00027787217
+0.00018751186
+0.0016615124
+0.0015545615
+0.0010933804
+0.0001293072
+0.0012888343
+0.00020816487
+0.00030583332
+0.00016422627
+0.00015186946
+0.00031760518
+0.003668799
+0.0008204296
+0.0006058452
+0.0075512384
+0.0006543231
+0.0003984883
+0.0004991135
+0.0063148434
+0.0004667902
+0.0019243147
+0.00026864174
+0.004626689
+0.0016829795
+0.0024464321
+0.002604262
+0.0005715485
+0.0004827969
+0.00059977506
+0.00044812242
+0.00018801834
+0.0014922172
+0.00039306682
+0.00038797187
+0.0017823273
+7.7641904e-05
+0.0013096565
+0.0033977008
+0.0014362672
+0.0010601832
+0.0016821629
+0.0041754427
+0.00036547767
+0.00034212973
+0.04761514
+0.00039928395
+0.0007339863
+0.0048003797
+0.00032377243
+0.0006853962
+0.0019343331
+0.0021214003
+8.4536754e-05
+0.0014679983
+9.906235e-05
+0.0001737739
+0.00044015897
+5.232733e-05
+0.0003227811
+0.0011037658
+0.0009596574
+0.002163132
+0.034116793
+0.00018434737
+0.00054400944
+0.00027010517
+0.00029613025
+0.0002854188
+0.008274664
+0.0026966897
+0.00056778896
+0.00056742143
+0.0001424069
+0.00021398348
+0.0002040955
+0.0007528397
+0.00047215613
+0.0003180315
+0.00026779302
+0.00017190988
+0.00057392224
+0.00026870312
+0.0041729347
+0.00022995795
+0.00473034
+0.00053698535
+0.0015700939
+8.663364e-05
+0.00037708133
+0.00010627266
+0.0008188108
+0.0013689178
+0.0028652248
+0.00030012682
+0.00019088034
+0.0020974467
+0.0005804101
+0.0046054157
+0.000866855
+0.0028432102
+0.0004053386
+0.0022837527
+0.00031697293
+0.0020557377
+0.0006195521
+0.00029529422
+0.0019667863
+0.00028010362
+0.00036917007
+0.0014461187
+0.0010241494
+0.00035407842
+0.0007762103
+0.0007345563
+0.0016735821
+0.000100398545
+0.00042761158
+0.0018091354
+0.0011984855
+0.00054059736
+0.0010517223
+0.0003952099
+0.0004072673
+5.0896953e-05
+0.00015406184
+0.0011205417
+0.0016784162
+6.48444e-05
+0.001374897
+0.0049680024
+0.00031736813
+0.00040638892
+0.0031774077
+0.00014365144
+0.00058315735
+9.539311e-05
+0.0002490495
+0.00080948864
+0.0026334277
+5.1187024e-05
+0.0019501996
+0.00017581039
+0.0007018262
+0.00082990975
+0.00033347218
+0.0003785377
+0.00024977518
+0.0006290335
+0.0005053414
+0.001499565
+0.0002951073
+0.00053611986
+0.00018856855
+0.00011126017
+0.0019289504
+0.0006362068
+0.000522457
+0.00032152023
+0.0018640001
+0.0008822051
+0.0009148322
+0.0009896222
+0.0029765042
+0.0014977105
+0.0003173049
+0.0015661103
+0.00010378374
+0.0067265066
+0.0005495047
+0.00020958934
+0.00019278725
+0.0009433383
+0.0026177543
+0.00051816285
+0.00017156888
+7.744175e-05
+0.0003151731
+0.0008290297
+0.0032181763
+0.0024396458
+0.00025281956
+0.0029372664
+0.0014309491
+0.00055660465
+0.0007385025
+0.0009333291
+0.0002543238
+0.0060301092
+0.00057014904
+0.0013402926
+0.0027256922
+0.0009102879
+0.0001869125
+8.260008e-05
+0.0039338632
+0.0023134286
+0.0012300106
+0.00029246748
+0.000283189
+0.00026828857
+0.0025049848
+0.0016384326
+0.0022900025
+0.0002599975
+0.0004017206
+0.0016243177
+0.0006216647
+0.0036319585
+0.00028053645
+0.0004719083
+0.00096298783
+0.00025558157
+0.00021045441
+0.00043856484
+0.00095168
+0.0002192634
+0.00033050985
+0.00012919637
+0.00022991112
+0.00042593313
+0.00029524197
+0.0003437868
+0.0051064813
+0.0005583069
+0.0007269702
+0.00024129995
+0.00030284372
+0.00027721547
+0.0003213354
+0.0006788763
+0.0012024492
+0.0036741009
+0.00024671428
+0.0005882029
+5.3842294e-05
+0.00040663296
+0.02228713
+0.0016194598
+0.00015659895
+0.00037711856
+0.00040618918
+0.0011397398
+0.00011992812
+0.0001520243
+0.0048938077
+0.0016474533
+0.0019597847
+0.00048948237
+0.00030241054
+0.00049067725
+0.00073232397
+0.00032315947
+0.0014954191
+0.00037097387
+0.0013783753
+0.0016116645
+0.00029578464
+0.00090505433
+0.00027435934
+0.0005812986
+0.000120840086
+0.00039883642
+0.0015213061
+0.0027571726
+0.0023957484
+0.00019108997
+0.0007307167
+0.000956605
+0.0006839414
+0.0024526927
+0.007934612
+0.00020379393
+0.015423247
+0.001909548
+0.000276556
+0.00094950746
+0.00063008594
+0.0019207522
+0.00024915
+0.00062654825
+0.0019300466
+0.00035208502
+0.00028049122
+0.0003148443
+0.00038308708
+0.00027527692
+0.00026734636
+0.000109911365
+0.00015939883
+0.00020454325
+0.0014520382
+0.0005228617
+0.00011064936
+0.003540477
+0.00031232936
+0.00044735873
+0.00017807365
+0.0013564116
+0.000965749
+0.0010829738
+0.00073439174
+0.0027080632
+0.00030311418
+0.00044519626
+0.0007992933
+0.00032909622
+0.00030226275
+0.0029641816
+0.00011622985
+0.0007482988
+0.001229003
+0.0025723213
+0.00065770274
+0.00015693594
+0.00054296193
+0.0013329909
+0.002655394
+0.00034390666
+0.00031026872
+0.0020210485
+0.0008697185
+0.00032176377
+0.0041055335
+0.0057543945
+0.00040670217
+0.0005435844
+0.009029863
+0.00028603026
+0.00064405525
+9.242199e-05
+6.4520485e-05
+0.00018704256
+0.00015222837
+0.00019523445
+0.005567865
+6.787147e-05
+0.00034305613
+0.0028331447
+0.0020781667
+0.00010261523
+0.0002362934
+0.00013399884
+0.00022745578
+0.00025935622
+0.00031119035
+0.00038356654
+0.00022390902
+0.00047898493
+0.0004629675
+0.000112182315
+0.00013342654
+0.00018693593
+0.00046389582
+0.00042846476
+0.00045707394
+0.00045862008
+0.00034546596
+8.175569e-05
+0.00023262479
+0.00021009706
+0.00047855324
+0.00030753214
+0.00019426928
+0.0010725219
+0.0003141107
+0.0005669363
+0.0012055356
+0.001431565
+0.0007926821
+0.0008843769
+0.0005278664
+0.00042725797
+0.003944173
+0.00015261356
+0.000299945
+0.00079040887
+0.00060629344
+0.00020051922
+0.00031456698
+0.00040859287
+0.00027128076
+0.00021296159
+8.693237e-05
+0.00027029635
+0.00305675
+0.0023890452
+0.003111028
+0.0006668401
+0.0004029482
+0.0032200122
+0.00013293372
+0.0007656965
+0.00023606456
+0.0003478867
+0.00031042635
+0.00016308061
+0.00038783776
+0.00043370973
+0.00089249195
+4.2713556e-05
+0.0004966322
+0.0016314207
+0.0004260099
+0.0017055604
+0.00043873576
+0.0004356743
+0.00071425876
+0.00013353773
+0.00031172932
+0.00033197878
+0.00043404778
+0.00013681914
+0.00016265325
+0.000201886
+0.000113467126
+0.000118104785
+0.0006379289
+0.0009817044
+0.00019666742
\ No newline at end of file
diff --git a/qa/L0_java_resnet/test.sh b/qa/L0_java_resnet/test.sh
new file mode 100755
index 0000000000..1ca08b4c65
--- /dev/null
+++ b/qa/L0_java_resnet/test.sh
@@ -0,0 +1,95 @@
+#!/bin/bash
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+# Models
+DATADIR=/data/inferenceserver/${REPO_VERSION}
+MODEL_REPO=`pwd`/models
+JAVACPP_BRANCH=${JAVACPP_BRANCH:="https://github.com/bytedeco/javacpp-presets.git"}
+JAVACPP_BRANCH_TAG=${JAVACPP_BRANCH_TAG:="master"}
+
+# Create local model repository
+mkdir -p ${MODEL_REPO}
+# TODO: fix build to support GPU only resnet50v1.5_fp16_savedmodel
+for BACKEND in _fp32_libtorch _fp32_onnx; do
+    cp -r $DATADIR/perf_model_store/resnet50${BACKEND} ${MODEL_REPO}/
+    echo ${MODEL_REPO}/resnet50${BACKEND}/config.pbtxt
+    sed -i "s/kind: KIND_GPU/kind: KIND_CPU/" ${MODEL_REPO}/resnet50${BACKEND}/config.pbtxt
+done
+
+# Set up test files based on installation instructions
+# https://github.com/bytedeco/javacpp-presets/blob/master/tritonserver/README.md
+set -e
+git clone --single-branch --depth=1 -b ${TRITON_CLIENT_REPO_TAG} https://github.com/triton-inference-server/client.git
+source client/src/java-api-bindings/scripts/install_dependencies_and_build.sh -b $PWD --javacpp-branch ${JAVACPP_BRANCH} --javacpp-tag ${JAVACPP_BRANCH_TAG} --keep-build-dependencies
+cd ..
+
+CLIENT_LOG="client.log"
+SAMPLES_REPO=`pwd`/javacpp-presets/tritonserver/samples/simple
+BASE_COMMAND="mvn clean compile -f $SAMPLES_REPO exec:java -Djavacpp.platform=linux-x86_64"
+source ../common/util.sh
+
+cp ResnetTest.java $SAMPLES_REPO
+sed -i 's/Simple/ResnetTest/g' $SAMPLES_REPO/pom.xml
+
+rm -f *.log
+RET=0
+
+# Run with default settings
+$BASE_COMMAND -Dexec.args="-r $MODEL_REPO" >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+fi
+
+# TODO: fix build to support GPU only resnet so can test TF as well
+for BACKEND in ONNX TORCH; do
+    if [ `grep -c "${BACKEND} test PASSED" ${CLIENT_LOG}` != "1" ]; then
+        echo -e "\n***\n*** ${BACKEND} backend test FAILED. Expected '${BACKEND} test PASSED'\n***"
+        RET=1
+    fi
+done
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_java_sequence_batcher/SequenceTest.java b/qa/L0_java_sequence_batcher/SequenceTest.java
new file mode 100644
index 0000000000..cfce3584de
--- /dev/null
+++ b/qa/L0_java_sequence_batcher/SequenceTest.java
@@ -0,0 +1,665 @@
+// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+import com.google.gson.*;
+import java.io.*;
+import java.util.*;
+import java.util.concurrent.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.tritonserver.tritonserver.*;
+
+public class SequenceTest {
+  // Boilerplate code for setting up Triton
+  static void FAIL(String MSG)
+  {
+    System.err.println("Failure: " + MSG);
+    System.exit(1);
+  }
+
+  static void FAIL_IF_ERR(TRITONSERVER_Error err__, String MSG)
+  {
+    if (err__ != null) {
+      System.err.println(
+          "error: " + MSG + ":" + TRITONSERVER_ErrorCodeString(err__) + " - "
+          + TRITONSERVER_ErrorMessage(err__));
+      TRITONSERVER_ErrorDelete(err__);
+      System.exit(1);
+    }
+  }
+
+  static int requested_memory_type = TRITONSERVER_MEMORY_CPU;
+
+  static class TRITONSERVER_ServerDeleter extends TRITONSERVER_Server {
+    public TRITONSERVER_ServerDeleter(TRITONSERVER_Server p)
+    {
+      super(p);
+      deallocator(new DeleteDeallocator(this));
+    }
+    protected static class DeleteDeallocator
+        extends TRITONSERVER_Server implements Deallocator {
+      DeleteDeallocator(Pointer p) { super(p); }
+      @Override public void deallocate() { TRITONSERVER_ServerDelete(this); }
+    }
+  }
+
+  static void Usage(String msg)
+  {
+    if (msg != null) {
+      System.err.println(msg);
+    }
+
+    System.err.println(
+        "Usage: java " + SequenceTest.class.getSimpleName() + " [options]");
+    System.err.println("\t-m [model name]");
+    System.err.println("\t-v Enable verbose logging");
+    System.err.println("\t-r [model repository absolute path]");
+
+    System.exit(1);
+  }
+
+  static class ResponseAlloc extends TRITONSERVER_ResponseAllocatorAllocFn_t {
+    @Override
+    public TRITONSERVER_Error call(
+        TRITONSERVER_ResponseAllocator allocator, String tensor_name,
+        long byte_size, int preferred_memory_type,
+        long preferred_memory_type_id, Pointer userp, PointerPointer buffer,
+        PointerPointer buffer_userp, IntPointer actual_memory_type,
+        LongPointer actual_memory_type_id)
+    {
+      // Initially attempt to make the actual memory type and id that we
+      // allocate be the same as preferred memory type
+      actual_memory_type.put(0, preferred_memory_type);
+      actual_memory_type_id.put(0, preferred_memory_type_id);
+
+      // If 'byte_size' is zero just return 'buffer' == nullptr, we don't
+      // need to do any other book-keeping.
+      if (byte_size == 0) {
+        buffer.put(0, null);
+        buffer_userp.put(0, null);
+        System.out.println(
+            "allocated " + byte_size + " bytes for result tensor "
+            + tensor_name);
+      } else {
+        Pointer allocated_ptr = new Pointer();
+        actual_memory_type.put(0, requested_memory_type);
+
+        actual_memory_type.put(0, TRITONSERVER_MEMORY_CPU);
+        allocated_ptr = Pointer.malloc(byte_size);
+
+        // Pass the tensor name with buffer_userp so we can show it when
+        // releasing the buffer.
+        if (!allocated_ptr.isNull()) {
+          buffer.put(0, allocated_ptr);
+          buffer_userp.put(0, new BytePointer(tensor_name));
+          System.out.println(
+              "allocated " + byte_size + " bytes in "
+              + TRITONSERVER_MemoryTypeString(actual_memory_type.get())
+              + " for result tensor " + tensor_name);
+        }
+      }
+
+      return null; // Success
+    }
+  }
+
+  static class ResponseRelease
+      extends TRITONSERVER_ResponseAllocatorReleaseFn_t {
+    @Override
+    public TRITONSERVER_Error call(
+        TRITONSERVER_ResponseAllocator allocator, Pointer buffer,
+        Pointer buffer_userp, long byte_size, int memory_type,
+        long memory_type_id)
+    {
+      BytePointer name = null;
+      if (buffer_userp != null) {
+        name = new BytePointer(buffer_userp);
+      } else {
+        name = new BytePointer("<unknown>");
+      }
+
+      System.out.println(
+          "Releasing buffer " + buffer + " of size " + byte_size + " in "
+          + TRITONSERVER_MemoryTypeString(memory_type) + " for result '"
+          + name.getString() + "'");
+      Pointer.free(buffer);
+      name.deallocate();
+
+      return null; // Success
+    }
+  }
+
+  static class InferRequestComplete
+      extends TRITONSERVER_InferenceRequestReleaseFn_t {
+    @Override
+    public void call(
+        TRITONSERVER_InferenceRequest request, int flags, Pointer userp)
+    {
+      // We reuse the request so we don't delete it here.
+    }
+  }
+
+  static class InferResponseComplete
+      extends TRITONSERVER_InferenceResponseCompleteFn_t {
+    @Override
+    public void call(
+        TRITONSERVER_InferenceResponse response, int flags, Pointer userp)
+    {
+      if (response != null) {
+        // Send 'response' to the future.
+        futures.get(userp).complete(response);
+      }
+    }
+  }
+
+  static ConcurrentHashMap<
+      Pointer, CompletableFuture<TRITONSERVER_InferenceResponse>> futures =
+      new ConcurrentHashMap<>();
+  static ResponseAlloc responseAlloc = new ResponseAlloc();
+  static ResponseRelease responseRelease = new ResponseRelease();
+  static InferRequestComplete inferRequestComplete = new InferRequestComplete();
+  static InferResponseComplete inferResponseComplete =
+      new InferResponseComplete();
+
+  static TRITONSERVER_Error ParseModelMetadata(
+      JsonObject model_metadata, boolean[] is_torch_model)
+  {
+    String seen_data_type = null;
+    for (JsonElement input_element :
+         model_metadata.get("inputs").getAsJsonArray()) {
+      JsonObject input = input_element.getAsJsonObject();
+      if (!input.get("datatype").getAsString().equals("INT32")) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_UNSUPPORTED,
+            "sequence qa example only supports model with data type INT32");
+      }
+      if (seen_data_type == null) {
+        seen_data_type = input.get("datatype").getAsString();
+      } else if (!seen_data_type.equals(input.get("datatype").getAsString())) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG,
+            "the inputs and outputs of sequence model must have the data type");
+      }
+    }
+    for (JsonElement output_element :
+         model_metadata.get("outputs").getAsJsonArray()) {
+      JsonObject output = output_element.getAsJsonObject();
+      if (!output.get("datatype").getAsString().equals("INT32")) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_UNSUPPORTED,
+            "sequence qa example only supports model with data type INT32");
+      } else if (!seen_data_type.equals(output.get("datatype").getAsString())) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG,
+            "the inputs and outputs of sequence' model must have the data type");
+      }
+    }
+
+    is_torch_model[0] =
+        model_metadata.get("platform").getAsString().equals("pytorch_libtorch");
+    return null;
+  }
+
+  // Custom function to set metadata required for sequence batcher
+  static void SetSequenceMetadata(
+      TRITONSERVER_InferenceRequest irequest, long correlation_id,
+      boolean sequence_start, boolean sequence_end)
+  {
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestSetCorrelationId(irequest, correlation_id),
+        "Unable to set correlation ID");
+    int flags = 0;
+    if (sequence_start) {
+      flags += TRITONSERVER_REQUEST_FLAG_SEQUENCE_START;
+    }
+    if (sequence_end) {
+      flags += TRITONSERVER_REQUEST_FLAG_SEQUENCE_END;
+    }
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestSetFlags(irequest, flags),
+        "Unable to set flags");
+  }
+
+  // Custom function for adjusting sequence batcher
+  // expected results for backends that do not implement
+  // full accumulator
+  static int GetExpectedResult(
+      String model_name, int expected_result, int value, String flag)
+  {
+    if ((!model_name.contains("nobatch") && !model_name.contains("custom"))
+        || model_name.contains("graphdef") || model_name.contains("plan")
+        || model_name.contains("onnx") || model_name.contains("libtorch")) {
+      expected_result = value;
+      if (flag != null && flag.contains("start")) {
+        expected_result++;
+      }
+    }
+    return expected_result;
+  }
+
+  // Standard function for checking response parameters,
+  // plus customized check that final sequence result
+  // "out" matches expected result
+  static void Check(
+      String model_name, TRITONSERVER_InferenceResponse response,
+      int input_value, String output0, long expected_byte_size,
+      int expected_datatype, boolean sequence_end, int expected_result)
+  {
+    HashMap<String, Pointer> output_data = new HashMap<>();
+
+    int[] output_count = {0};
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceResponseOutputCount(response, output_count),
+        "getting number of response outputs");
+    if (output_count[0] != 1) {
+      FAIL("expecting 1 response outputs, got " + output_count[0]);
+    }
+
+    for (int idx = 0; idx < output_count[0]; ++idx) {
+      BytePointer cname = new BytePointer((Pointer) null);
+      IntPointer datatype = new IntPointer(1);
+      LongPointer shape = new LongPointer((Pointer) null);
+      LongPointer dim_count = new LongPointer(1);
+      Pointer base = new Pointer();
+      SizeTPointer byte_size = new SizeTPointer(1);
+      IntPointer memory_type = new IntPointer(1);
+      LongPointer memory_type_id = new LongPointer(1);
+      Pointer userp = new Pointer();
+
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceResponseOutput(
+              response, idx, cname, datatype, shape, dim_count, base, byte_size,
+              memory_type, memory_type_id, userp),
+          "getting output info");
+
+      if (cname.isNull()) {
+        FAIL("unable to get output name");
+      }
+
+      String name = cname.getString();
+      if (!name.equals(output0)) {
+        FAIL("unexpected output '" + name + "'");
+      }
+
+      if ((dim_count.get() != 1) || (shape.get(0) != 1)) {
+        FAIL("unexpected shape for '" + name + "'");
+      }
+
+      if (datatype.get() != expected_datatype) {
+        FAIL(
+            "unexpected datatype '"
+            + TRITONSERVER_DataTypeString(datatype.get()) + "' for '" + name
+            + "'");
+      }
+
+      if (byte_size.get() != expected_byte_size) {
+        FAIL(
+            "unexpected byte-size, expected " + expected_byte_size + ", got "
+            + byte_size.get() + " for " + name);
+      }
+
+      if (memory_type.get() != requested_memory_type) {
+        FAIL(
+            "unexpected memory type, expected to be allocated in "
+            + TRITONSERVER_MemoryTypeString(requested_memory_type) + ", got "
+            + TRITONSERVER_MemoryTypeString(memory_type.get()) + ", id "
+            + memory_type_id.get() + " for " + name);
+      }
+
+      // We make a copy of the data here... which we could avoid for
+      // performance reasons but ok for this sequence example.
+      BytePointer odata = new BytePointer(byte_size.get());
+      output_data.put(name, odata);
+      System.out.println(name + " is stored in system memory");
+      odata.put(base.limit(byte_size.get()));
+    }
+
+    int out = new IntPointer(output_data.get(output0)).get(0);
+    System.out.println("Value: " + out);
+    if (sequence_end) {
+      expected_result =
+          GetExpectedResult(model_name, expected_result, input_value, "end");
+      if (out != expected_result) {
+        FAIL("Expected result: " + expected_result + ", got " + out);
+      } else {
+        System.out.println(model_name + " test PASSED");
+      }
+    }
+  }
+
+  // Boilerplate main function to run inference
+  // for provided model, custom setting of
+  // sequence metadata
+  public static void main(String[] args) throws Exception
+  {
+    String model_repository_path = null;
+    String model_name = null;
+    int verbose_level = 0;
+
+    // Parse commandline...
+    for (int i = 0; i < args.length; i++) {
+      switch (args[i]) {
+        case "-m":
+          model_name = args[++i];
+          break;
+        case "-r":
+          model_repository_path = args[++i];
+          break;
+        case "-v":
+          verbose_level = 1;
+          break;
+        case "-?":
+          Usage(null);
+          break;
+      }
+    }
+
+    if (model_name == null) {
+      Usage("-m must be used to specify model name");
+    }
+    if (model_repository_path == null) {
+      Usage("-r must be used to specify model repository path");
+    }
+
+    // Check API version.
+    int[] api_version_major = {0}, api_version_minor = {0};
+    FAIL_IF_ERR(
+        TRITONSERVER_ApiVersion(api_version_major, api_version_minor),
+        "getting Triton API version");
+    if ((TRITONSERVER_API_VERSION_MAJOR != api_version_major[0])
+        || (TRITONSERVER_API_VERSION_MINOR > api_version_minor[0])) {
+      FAIL("triton server API version mismatch");
+    }
+
+    // Create the server...
+    TRITONSERVER_ServerOptions server_options =
+        new TRITONSERVER_ServerOptions(null);
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerOptionsNew(server_options),
+        "creating server options");
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerOptionsSetModelRepositoryPath(
+            server_options, model_repository_path),
+        "setting model repository path");
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerOptionsSetLogVerbose(server_options, verbose_level),
+        "setting verbose logging level");
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerOptionsSetBackendDirectory(
+            server_options, "/opt/tritonserver/backends"),
+        "setting backend directory");
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerOptionsSetRepoAgentDirectory(
+            server_options, "/opt/tritonserver/repoagents"),
+        "setting repository agent directory");
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerOptionsSetStrictModelConfig(server_options, true),
+        "setting strict model configuration");
+
+    TRITONSERVER_Server server_ptr = new TRITONSERVER_Server(null);
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerNew(server_ptr, server_options), "creating server");
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerOptionsDelete(server_options),
+        "deleting server options");
+
+    TRITONSERVER_ServerDeleter server =
+        new TRITONSERVER_ServerDeleter(server_ptr);
+
+    // Wait until the server is both live and ready.
+    int health_iters = 0;
+    while (true) {
+      boolean[] live = {false}, ready = {false};
+      FAIL_IF_ERR(
+          TRITONSERVER_ServerIsLive(server, live),
+          "unable to get server liveness");
+      FAIL_IF_ERR(
+          TRITONSERVER_ServerIsReady(server, ready),
+          "unable to get server readiness");
+      System.out.println(
+          "Server Health: live " + live[0] + ", ready " + ready[0]);
+      if (live[0] && ready[0]) {
+        break;
+      }
+
+      if (++health_iters >= 10) {
+        FAIL("failed to find healthy inference server");
+      }
+
+      Thread.sleep(500);
+    }
+
+    // Print status of the server.
+    {
+      TRITONSERVER_Message server_metadata_message =
+          new TRITONSERVER_Message(null);
+      FAIL_IF_ERR(
+          TRITONSERVER_ServerMetadata(server, server_metadata_message),
+          "unable to get server metadata message");
+      BytePointer buffer = new BytePointer((Pointer) null);
+      SizeTPointer byte_size = new SizeTPointer(1);
+      FAIL_IF_ERR(
+          TRITONSERVER_MessageSerializeToJson(
+              server_metadata_message, buffer, byte_size),
+          "unable to serialize server metadata message");
+
+      System.out.println("Server Status:");
+      System.out.println(buffer.limit(byte_size.get()).getString());
+
+      FAIL_IF_ERR(
+          TRITONSERVER_MessageDelete(server_metadata_message),
+          "deleting status metadata");
+    }
+
+    // Wait for the model to become available.
+    boolean[] is_torch_model = {false};
+    boolean[] is_ready = {false};
+    health_iters = 0;
+    while (!is_ready[0]) {
+      FAIL_IF_ERR(
+          TRITONSERVER_ServerModelIsReady(server, model_name, 1, is_ready),
+          "unable to get model readiness");
+      if (!is_ready[0]) {
+        if (++health_iters >= 10) {
+          FAIL("model failed to be ready in 10 iterations");
+        }
+        Thread.sleep(500);
+        continue;
+      }
+
+      TRITONSERVER_Message model_metadata_message =
+          new TRITONSERVER_Message(null);
+      FAIL_IF_ERR(
+          TRITONSERVER_ServerModelMetadata(
+              server, model_name, 1, model_metadata_message),
+          "unable to get model metadata message");
+      BytePointer buffer = new BytePointer((Pointer) null);
+      SizeTPointer byte_size = new SizeTPointer(1);
+      FAIL_IF_ERR(
+          TRITONSERVER_MessageSerializeToJson(
+              model_metadata_message, buffer, byte_size),
+          "unable to serialize model status protobuf");
+
+      JsonParser parser = new JsonParser();
+      JsonObject model_metadata = null;
+      try {
+        model_metadata = parser.parse(buffer.limit(byte_size.get()).getString())
+                             .getAsJsonObject();
+      }
+      catch (Exception e) {
+        FAIL("error: failed to parse model metadata from JSON: " + e);
+      }
+
+      FAIL_IF_ERR(
+          TRITONSERVER_MessageDelete(model_metadata_message),
+          "deleting status protobuf");
+
+      if (!model_metadata.get("name").getAsString().equals(model_name)) {
+        FAIL("unable to find metadata for model");
+      }
+
+      boolean found_version = false;
+      if (model_metadata.has("versions")) {
+        for (JsonElement version :
+             model_metadata.get("versions").getAsJsonArray()) {
+          if (version.getAsString().equals("1")) {
+            found_version = true;
+            break;
+          }
+        }
+      }
+      if (!found_version) {
+        FAIL("unable to find version 1 status for model");
+      }
+
+      FAIL_IF_ERR(
+          ParseModelMetadata(model_metadata, is_torch_model),
+          "parsing model metadata");
+    }
+
+    // Create the allocator that will be used to allocate buffers for
+    // the result tensors.
+    TRITONSERVER_ResponseAllocator allocator =
+        new TRITONSERVER_ResponseAllocator(null);
+    FAIL_IF_ERR(
+        TRITONSERVER_ResponseAllocatorNew(
+            allocator, responseAlloc, responseRelease, null /* start_fn */),
+        "creating response allocator");
+
+    // Inference
+    TRITONSERVER_InferenceRequest irequest =
+        new TRITONSERVER_InferenceRequest(null);
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestNew(
+            irequest, server, model_name, -1 /* model_version */),
+        "creating inference request");
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestSetId(irequest, "my_request_id"),
+        "setting ID for the request");
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestSetReleaseCallback(
+            irequest, inferRequestComplete, null /* request_release_userp */),
+        "setting request release callback");
+
+    // Inputs
+    String input0 = is_torch_model[0] ? "INPUT__0" : "INPUT";
+
+    long[] input0_shape = {1};
+
+    int datatype = TRITONSERVER_TYPE_INT32;
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestAddInput(
+            irequest, input0, datatype, input0_shape, input0_shape.length),
+        "setting input 0 meta-data for the request");
+
+    String output0 = is_torch_model[0] ? "OUTPUT__0" : "OUTPUT";
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output0),
+        "requesting output 0 for the request");
+
+    // Non-zero ID for the sequence requests
+    long correlation_id = 5;
+    // Number of requests in the sequence
+    int num_requests = 9;
+    // Expected_result is  1+2+3+...+num_requests
+    int expected_result = num_requests * (1 + num_requests) / 2;
+    boolean sequence_start = true;
+    boolean sequence_end = false;
+
+    // Create the initial data for the input tensor.
+    IntPointer[] p0 = {new IntPointer(1)};
+    BytePointer input0_data = p0[0].getPointer(BytePointer.class);
+    long input0_size = input0_data.limit();
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestAppendInputData(
+            irequest, input0, input0_data, input0_size, requested_memory_type,
+            0 /* memory_type_id */),
+        "assigning INPUT0 data");
+
+    for (int i = 0; i < num_requests; i++) {
+      // Update input value
+      int input = i + 1;
+      p0[0].put(0, input);
+
+      // Set sequence metadata
+      if (i == 1) {
+        sequence_start = false;
+      }
+      if (i == num_requests - 1) {
+        sequence_end = true;
+      }
+      SetSequenceMetadata(
+          irequest, correlation_id, sequence_start, sequence_end);
+
+      // Perform inference...
+      CompletableFuture<TRITONSERVER_InferenceResponse> completed =
+          new CompletableFuture<>();
+      futures.put(irequest, completed);
+
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceRequestSetResponseCallback(
+              irequest, allocator, null /* response_allocator_userp */,
+              inferResponseComplete, irequest),
+          "setting response callback");
+
+      FAIL_IF_ERR(
+          TRITONSERVER_ServerInferAsync(server, irequest, null /* trace */),
+          "running inference");
+
+      // Wait for the inference to complete.
+      TRITONSERVER_InferenceResponse completed_response = completed.get();
+      futures.remove(irequest);
+
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceResponseError(completed_response),
+          "response status");
+
+      Check(
+          model_name, completed_response, input, output0, input0_size, datatype,
+          sequence_end, expected_result);
+
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceResponseDelete(completed_response),
+          "deleting inference response");
+    }
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestDelete(irequest),
+        "deleting inference request");
+
+    FAIL_IF_ERR(
+        TRITONSERVER_ResponseAllocatorDelete(allocator),
+        "deleting response allocator");
+
+    System.exit(0);
+  }
+}
diff --git a/qa/L0_java_sequence_batcher/test.sh b/qa/L0_java_sequence_batcher/test.sh
new file mode 100755
index 0000000000..2f988322d9
--- /dev/null
+++ b/qa/L0_java_sequence_batcher/test.sh
@@ -0,0 +1,93 @@
+#!/bin/bash
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+# Models
+DATADIR=/data/inferenceserver/${REPO_VERSION}
+JAVACPP_BRANCH=${JAVACPP_BRANCH:="https://github.com/bytedeco/javacpp-presets.git"}
+JAVACPP_BRANCH_TAG=${JAVACPP_BRANCH_TAG:="master"}
+
+# Set up test files based on installation instructions
+# https://github.com/bytedeco/javacpp-presets/blob/master/tritonserver/README.md
+set -e
+git clone --single-branch --depth=1 -b ${TRITON_CLIENT_REPO_TAG} https://github.com/triton-inference-server/client.git
+source client/src/java-api-bindings/scripts/install_dependencies_and_build.sh -b $PWD --javacpp-branch ${JAVACPP_BRANCH} --javacpp-tag ${JAVACPP_BRANCH_TAG} --keep-build-dependencies
+cd ..
+
+CLIENT_LOG="client.log"
+MODEL_REPO=`pwd`/models
+SAMPLES_REPO=`pwd`/javacpp-presets/tritonserver/samples/simple
+BASE_COMMAND="mvn clean compile -f $SAMPLES_REPO exec:java -Djavacpp.platform=linux-x86_64"
+source ../common/util.sh
+
+cp SequenceTest.java $SAMPLES_REPO
+sed -i 's/Simple/SequenceTest/g' $SAMPLES_REPO/pom.xml
+
+rm -f *.log
+RET=0
+
+for BACKEND in graphdef libtorch onnx savedmodel; do
+    # Create local model repository
+    mkdir -p ${MODEL_REPO}
+    MODEL=${BACKEND}_nobatch_sequence_int32
+    cp -r $DATADIR/qa_sequence_model_repository/${MODEL}/ ${MODEL_REPO}/
+    sed -i "s/kind: KIND_GPU/kind: KIND_CPU/" ${MODEL_REPO}/$MODEL/config.pbtxt
+
+    # Run with default settings
+    $BASE_COMMAND -Dexec.args="-r $MODEL_REPO -m ${MODEL}" >>client.log 2>&1
+    if [ $? -ne 0 ]; then
+        RET=1
+    fi
+
+    # Check results
+    if [ `grep -c "${MODEL} test PASSED" ${CLIENT_LOG}` != "1" ]; then
+        echo -e "\n***\n*** ${BACKEND} sequence batcher test FAILED. Expected '${MODEL} test PASSED'\n***"
+        RET=1
+    fi
+    rm -r ${MODEL_REPO}
+    rm ${CLIENT_LOG}
+done
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_java_simple_example/test.sh b/qa/L0_java_simple_example/test.sh
new file mode 100755
index 0000000000..e9726edff4
--- /dev/null
+++ b/qa/L0_java_simple_example/test.sh
@@ -0,0 +1,176 @@
+#!/bin/bash
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# Set up test files based on installation instructions
+# https://github.com/bytedeco/javacpp-presets/blob/master/tritonserver/README.md
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+
+JAVACPP_BRANCH=${JAVACPP_BRANCH:="https://github.com/bytedeco/javacpp-presets.git"}
+JAVACPP_BRANCH_TAG=${JAVACPP_BRANCH_TAG:="master"}
+set -e
+git clone --single-branch --depth=1 -b ${TRITON_CLIENT_REPO_TAG} https://github.com/triton-inference-server/client.git
+source client/src/java-api-bindings/scripts/install_dependencies_and_build.sh -b $PWD --javacpp-branch ${JAVACPP_BRANCH} --javacpp-tag ${JAVACPP_BRANCH_TAG} --keep-build-dependencies
+cd ..
+
+CLIENT_LOG="client_cpu_only.log"
+DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_model_repository
+MODEL_REPO=`pwd`/models
+
+SAMPLES_REPO=`pwd`/javacpp-presets/tritonserver/samples/simple
+BASE_COMMAND="mvn clean compile -f $SAMPLES_REPO exec:java -Djavacpp.platform=linux-x86_64"
+source ../common/util.sh
+
+
+rm -f *.log
+RET=0
+
+function run_cpu_tests_int32() {
+    # Create local model repository
+    set +e
+    rm -r ${MODEL_REPO}
+    cp -r `pwd`/../L0_simple_ensemble/models .
+    mkdir ${MODEL_REPO}/ensemble_add_sub_int32_int32_int32/1
+    set -e
+
+    # Run with default settings
+    $BASE_COMMAND -Dexec.args="-r $MODEL_REPO" >>$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        echo -e "Failed to run: ${BASE_COMMAND} -Dexec.args=\"-r ${MODEL_REPO}\""
+        RET=1
+    fi
+
+    if [ `grep -c "1 - 1 = 0" ${CLIENT_LOG}` != "18" ]; then
+        echo -e "\n***\n*** Failed. Expected 18 '1 - 1 = 0'\n***"
+        RET=1
+    fi
+
+    # Run with verbose logging
+    $BASE_COMMAND -Dexec.args="-r $MODEL_REPO -v" >>$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        echo -e "Failed to run: ${BASE_COMMAND} -Dexec.args=\"-r ${MODEL_REPO} -v\""
+        RET=1
+    fi
+
+    if [ `grep -c "Server side auto-completed config" ${CLIENT_LOG}` != "2" ]; then
+        echo -e "\n***\n*** Failed. Expected 'Server side auto-completed config'\n***"
+        RET=1
+    fi
+
+    # Run with memory set to system
+    $BASE_COMMAND -Dexec.args="-r $MODEL_REPO -m system" >>$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        echo -e "Failed to run: ${BASE_COMMAND} -Dexec.args=\"-r ${MODEL_REPO} -m system\""
+        RET=1
+    fi
+
+    if [ `grep -c "OUTPUT0 is stored in system memory" ${CLIENT_LOG}` != "9" ]; then
+        echo -e "\n***\n*** Failed. Expected 9 'OUTPUT0 is stored in system memory'\n***"
+        RET=1
+    fi
+
+}
+
+function run_cpu_tests_fp32() {
+    for trial in graphdef savedmodel; do
+        full=${trial}_float32_float32_float32
+        set +e
+        rm -rf ${MODEL_REPO}
+        mkdir -p ${MODEL_REPO}/simple/1 && \
+            cp -r $DATADIR/${full}/1/* ${MODEL_REPO}/simple/1/. && \
+            cp $DATADIR/${full}/config.pbtxt ${MODEL_REPO}/simple/. && \
+            (cd ${MODEL_REPO}/simple && \
+                    sed -i "s/^name:.*/name: \"simple\"/" config.pbtxt && \
+                    sed -i "s/label_filename:.*//" config.pbtxt)
+
+
+        # No memory type enforcement
+        $BASE_COMMAND -Dexec.args="-r $MODEL_REPO -v" >>$CLIENT_LOG.$full.log 2>&1
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG.$full.log
+            echo -e "Failed to run: ${BASE_COMMAND} -Dexec.args=\"-r ${MODEL_REPO} -v\" for ${full}"
+            RET=1
+        fi
+
+        # Enforce I/O to be in specific memory type
+        for MEM_TYPE in system; do
+            $BASE_COMMAND -Dexec.args="-r $MODEL_REPO -m ${MEM_TYPE}" >>$CLIENT_LOG.$full.${MEM_TYPE}.log 2>&1
+            if [ $? -ne 0 ]; then
+                cat $CLIENT_LOG.$full.$MEM_TYPE.log
+                echo -e "Failed to run: ${BASE_COMMAND} -Dexec.args=\"-r ${MODEL_REPO} -v -m ${MEM_TYPE}\" for ${full}"
+                RET=1
+            fi
+        done
+    done
+    set -e
+}
+
+
+# Run ensemble
+function run_ensemble_tests() {
+    set +e
+    rm -r ${MODEL_REPO}
+    cp -r `pwd`/../L0_simple_ensemble/models .
+    mkdir -p ${MODEL_REPO}/ensemble_add_sub_int32_int32_int32/1
+    sed -i 's/"simple"/"ensemble_add_sub_int32_int32_int32"/g' $SAMPLES_REPO/Simple.java
+    cat $SAMPLES_REPO/pom.xml >>$CLIENT_LOG 2>&1
+    set -e
+
+    $BASE_COMMAND -Dexec.args="-r $MODEL_REPO -v" >>$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        echo -e "Failed to run ensemble model: ${BASE_COMMAND} -Dexec.args=\"-r ${MODEL_REPO} -v\""
+        RET=1
+    fi
+    sed -i 's/"ensemble_add_sub_int32_int32_int32"/"simple"/g' $SAMPLES_REPO/Simple.java
+
+    if [ `grep -c "request id: my_request_id, model: ensemble_add_sub_int32_int32_int32" ${CLIENT_LOG}` != "3" ]; then
+        echo -e "\n***\n*** Failed. Expected 3 'request id: my_request_id, model: ensemble_add_sub_int32_int32_int32'\n***"
+        RET=1
+    fi
+}
+
+# Run tests on simple example
+echo -e "\nRunning Simple Tests\n"
+
+run_cpu_tests_fp32
+run_cpu_tests_int32
+run_ensemble_tests
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_json/test.sh b/qa/L0_json/test.sh
new file mode 100755
index 0000000000..39aa07f040
--- /dev/null
+++ b/qa/L0_json/test.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+RET=0
+UNIT_TEST="./triton_json_test --gtest_output=xml:triton_json.report.xml"
+TEST_LOG="./triton_json_test.log"
+$UNIT_TEST >> $TEST_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $TEST_LOG
+    echo -e "\n***\n*** Triton Json Unit Test Failed\n***"
+    RET=1
+fi
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+else
+  echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_large_payload/large_payload_test.py b/qa/L0_large_payload/large_payload_test.py
new file mode 100755
index 0000000000..fff57290ef
--- /dev/null
+++ b/qa/L0_large_payload/large_payload_test.py
@@ -0,0 +1,183 @@
+#!/usr/bin/env python3
+
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import math
+import unittest
+
+import numpy as np
+import test_util as tu
+import tritongrpcclient as grpcclient
+import tritonhttpclient as httpclient
+from tritonclientutils import InferenceServerException, np_to_triton_dtype
+
+
+class LargePayLoadTest(tu.TestResultCollector):
+    def setUp(self):
+        self._data_type = np.float32
+
+        # Very large tensor will always fail for gRPC because the Protobuf has a
+        # hard limit on 2GBs for the size of input tensors. All backends except
+        # plan backend should be able to handle payloads larger than 2GBs using
+        # HTTP.
+        very_large_tensor_shape = (
+            math.trunc(3 * (1024 * 1024 * 1024) / np.dtype(self._data_type).itemsize),
+        )
+        self._very_large_in0 = np.random.random(very_large_tensor_shape).astype(
+            self._data_type
+        )
+
+        # 1.9 GBs allows us to test gRPC with moderate sizes too.
+        large_tensor_shape = (
+            math.trunc(
+                1.9 * (1024 * 1024 * 1024) // np.dtype(self._data_type).itemsize
+            ),
+        )
+        self._large_in0 = np.random.random(large_tensor_shape).astype(self._data_type)
+
+        small_tensor_shape = (1,)
+        self._small_in0 = np.random.random(small_tensor_shape).astype(self._data_type)
+
+        self._clients = (
+            (httpclient, httpclient.InferenceServerClient("localhost:8000")),
+            (grpcclient, grpcclient.InferenceServerClient("localhost:8001")),
+        )
+
+    def _test_helper(
+        self, client, model_name, input_name="INPUT0", output_name="OUTPUT0"
+    ):
+        # plan does not support large batch sizes.
+        if not model_name.startswith("plan"):
+            inputs = [
+                client[0].InferInput(
+                    input_name,
+                    self._large_in0.shape,
+                    np_to_triton_dtype(self._data_type),
+                )
+            ]
+            inputs[0].set_data_from_numpy(self._large_in0)
+            results = client[1].infer(model_name, inputs)
+
+            # if the inference is completed, examine results to ensure that
+            # the framework and protocol do support large payload
+            self.assertTrue(
+                np.array_equal(self._large_in0, results.as_numpy(output_name)),
+                "output is different from input",
+            )
+
+        if client[0] == httpclient:
+            # FIXME HTTPServer cannot support large payloads. See DLIS-1776.
+            inputs = [
+                client[0].InferInput(
+                    input_name,
+                    self._very_large_in0.shape,
+                    np_to_triton_dtype(self._data_type),
+                )
+            ]
+            inputs[0].set_data_from_numpy(self._very_large_in0)
+            with self.assertRaises(InferenceServerException):
+                results = client[1].infer(model_name, inputs)
+
+        # FIXME Test is terminated due to libprotobuf FATAL error when GRPC sends
+        # the second request with input tensors larger than 1.3GBs. In this test
+        # GRPC has been currently exempted from testing for Very Large tensor(3GBs)
+        # until the problem is resolved. Should be uncommented once the GRPC issue is resolved.
+        # See DLIS-2474.
+        # if client[0] == grpcclient:
+        #     inputs = [
+        #         client[0].InferInput(input_name, self._very_large_in0.shape,
+        #                              np_to_triton_dtype(self._data_type))
+        #     ]
+        #     inputs[0].set_data_from_numpy(self._very_large_in0)
+        #     # GRPC must fail for large payloads because of a 2GB protobuf limit
+        #     with self.assertRaises(InferenceServerException):
+        #         results = client[1].infer(model_name, inputs)
+
+        # Send a small payload to verify if the server is still functional
+        inputs = [
+            client[0].InferInput(
+                input_name, self._small_in0.shape, np_to_triton_dtype(self._data_type)
+            )
+        ]
+        inputs[0].set_data_from_numpy(self._small_in0)
+        results = client[1].infer(model_name, inputs)
+        self.assertTrue(
+            np.array_equal(self._small_in0, results.as_numpy(output_name)),
+            "output is different from input",
+        )
+
+    def test_graphdef(self):
+        # graphdef_nobatch_zero_1_float32 is identity model with input shape [-1]
+        for client in self._clients:
+            model_name = tu.get_zero_model_name("graphdef_nobatch", 1, self._data_type)
+            self._test_helper(client, model_name)
+
+    def test_savedmodel(self):
+        # savedmodel_nobatch_zero_1_float32 is identity model with input shape [-1]
+        for client in self._clients:
+            model_name = tu.get_zero_model_name(
+                "savedmodel_nobatch", 1, self._data_type
+            )
+            self._test_helper(client, model_name)
+
+    def test_onnx(self):
+        # onnx_nobatch_zero_1_float32 is identity model with input shape [-1]
+        for client in self._clients:
+            model_name = tu.get_zero_model_name("onnx_nobatch", 1, self._data_type)
+            self._test_helper(client, model_name)
+
+    def test_python(self):
+        # python_nobatch_zero_1_float32 is identity model with input shape [-1]
+        for client in self._clients:
+            model_name = tu.get_zero_model_name("python_nobatch", 1, self._data_type)
+            self._test_helper(client, model_name)
+
+    def test_plan(self):
+        # plan_nobatch_zero_1_float32 is identity model with input shape [-1]
+        for client in self._clients:
+            model_name = tu.get_zero_model_name("plan_nobatch", 1, self._data_type)
+            self._test_helper(client, model_name)
+
+    def test_libtorch(self):
+        # libtorch_nobatch_zero_1_float32 is identity model with input shape [-1]
+        for client in self._clients:
+            model_name = tu.get_zero_model_name("libtorch_nobatch", 1, self._data_type)
+            self._test_helper(client, model_name, "INPUT__0", "OUTPUT__0")
+
+    def test_custom(self):
+        # custom_zero_1_float32 is identity model with input shape [-1]
+        for client in self._clients:
+            model_name = tu.get_zero_model_name("custom", 1, self._data_type)
+            self._test_helper(client, model_name)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_large_payload/test.sh b/qa/L0_large_payload/test.sh
new file mode 100755
index 0000000000..325cab4ed5
--- /dev/null
+++ b/qa/L0_large_payload/test.sh
@@ -0,0 +1,114 @@
+#!/bin/bash
+# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+TEST_RESULT_FILE='test_results.txt'
+LARGE_PAYLOAD_TEST_PY=large_payload_test.py
+CLIENT_LOG_BASE="./client.log"
+DATADIR=`pwd`/models
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=$DATADIR --log-verbose=1"
+SERVER_LOG_BASE="./inference_server.log"
+source ../common/util.sh
+
+rm -f $SERVER_LOG_BASE* $CLIENT_LOG_BASE*
+
+RET=0
+
+MODEL_SUFFIX=nobatch_zero_1_float32
+rm -fr all_models && mkdir all_models
+for TARGET in graphdef savedmodel onnx libtorch plan; do
+    cp -r /data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository/${TARGET}_$MODEL_SUFFIX \
+       all_models/.
+done
+
+mkdir -p all_models/python_$MODEL_SUFFIX/1/
+cp ../python_models/identity_fp32/config.pbtxt all_models/python_$MODEL_SUFFIX/
+(cd all_models/python_$MODEL_SUFFIX && \
+            sed -i "s/max_batch_size: 64/max_batch_size: 0/" config.pbtxt && \
+            sed -i "s/name: \"identity_fp32\"/name: \"python_$MODEL_SUFFIX\"/" config.pbtxt)
+
+cp ../python_models/identity_fp32/model.py all_models/python_$MODEL_SUFFIX/1/model.py
+
+# Restart server before every test to make sure server state
+# is invariant to previous test
+for TARGET in graphdef savedmodel onnx libtorch plan python; do
+    rm -fr models && mkdir models && \
+        cp -r all_models/${TARGET}_$MODEL_SUFFIX models/.
+
+    SERVER_LOG=$SERVER_LOG_BASE.$TARGET
+    CLIENT_LOG=$CLIENT_LOG_BASE.$TARGET
+
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    set +e
+
+    python $LARGE_PAYLOAD_TEST_PY LargePayLoadTest.test_$TARGET >$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    else
+        check_test_results $TEST_RESULT_FILE 1
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Result Verification Failed\n***"
+            RET=1
+        fi
+    fi
+
+    set -e
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+done
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_libtorch_inference_mode/test.sh b/qa/L0_libtorch_inference_mode/test.sh
new file mode 100755
index 0000000000..85b4a49fae
--- /dev/null
+++ b/qa/L0_libtorch_inference_mode/test.sh
@@ -0,0 +1,108 @@
+#!/bin/bash
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+LIBTORCH_INFER_CLIENT_PY=../common/libtorch_infer_client.py
+
+DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_model_repository
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=models --log-verbose=1"
+SERVER_LOG="./inference_server.log"
+CLIENT_LOG="./client.log"
+source ../common/util.sh
+
+RET=0
+
+for FLAG in true false; do
+    rm -f *.log
+    mkdir -p models && cp -r $DATADIR/libtorch_int32_int32_int32 models/.
+
+    echo """
+    parameters: {
+        key: \"INFERENCE_MODE\"
+        value: {
+            string_value: \"$FLAG\"
+        }
+    }""" >> models/libtorch_int32_int32_int32/config.pbtxt
+
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    set +e
+
+    python $LIBTORCH_INFER_CLIENT_PY >> $CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        RET=1
+    fi
+
+    INFERMODE_LOG="Inference Mode is "
+    if [ "$FLAG" == "true" ]; then
+        INFERMODE_LOG+=enabled
+    else
+        INFERMODE_LOG+=disabled
+    fi
+
+    if [ `grep -c "$INFERMODE_LOG" $SERVER_LOG` != "3" ]; then
+        echo -e "\n***\n*** Failed. Expected 3 $INFERMODE_LOG in log\n***"
+        RET=1
+    fi
+
+    set -e
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+
+    rm -rf models
+done
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_libtorch_instance_group_kind_model/client.py b/qa/L0_libtorch_instance_group_kind_model/client.py
new file mode 100755
index 0000000000..92bead3464
--- /dev/null
+++ b/qa/L0_libtorch_instance_group_kind_model/client.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import sys
+
+sys.path.append("../common")
+
+import unittest
+
+import numpy as np
+import test_util as tu
+import tritonclient.http as httpclient
+
+# By default, find tritonserver on "localhost", but can be overridden
+# with TRITONSERVER_IPADDR envvar
+_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")
+
+
+class InferTest(tu.TestResultCollector):
+    def test_infer(self):
+        try:
+            triton_client = httpclient.InferenceServerClient(
+                url=f"{_tritonserver_ipaddr}:8000"
+            )
+        except Exception as e:
+            print("channel creation failed: " + str(e))
+            sys.exit(1)
+
+        model_name = os.environ["MODEL_NAME"]
+
+        inputs = []
+        outputs = []
+        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "FP32"))
+        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "FP32"))
+
+        # Create the data for the two input tensors.
+        input0_data = np.arange(start=0, stop=16, dtype=np.float32)
+        input0_data = np.expand_dims(input0_data, axis=0)
+        input1_data = np.arange(start=32, stop=48, dtype=np.float32)
+        input1_data = np.expand_dims(input1_data, axis=0)
+
+        # Initialize the data
+        inputs[0].set_data_from_numpy(input0_data, binary_data=True)
+        inputs[1].set_data_from_numpy(input1_data, binary_data=True)
+
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT__0", binary_data=True))
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT__1", binary_data=True))
+
+        results = triton_client.infer(model_name, inputs, outputs=outputs)
+
+        output0_data = results.as_numpy("OUTPUT__0")
+        output1_data = results.as_numpy("OUTPUT__1")
+
+        expected_output_0 = input0_data + input1_data
+        expected_output_1 = input0_data - input1_data
+
+        self.assertEqual(output0_data.shape, (1, 16))
+        self.assertEqual(output1_data.shape, (1, 16))
+
+        self.assertTrue(np.all(expected_output_0 == output0_data))
+        self.assertTrue(np.all(expected_output_1 == output1_data))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_libtorch_instance_group_kind_model/gen_models.py b/qa/L0_libtorch_instance_group_kind_model/gen_models.py
new file mode 100755
index 0000000000..e61980f491
--- /dev/null
+++ b/qa/L0_libtorch_instance_group_kind_model/gen_models.py
@@ -0,0 +1,90 @@
+#!/usr/bin/python
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import torch
+import torch.nn as nn
+
+
+class SumModule(nn.Module):
+    def __init__(self, device):
+        super(SumModule, self).__init__()
+        self.device = device
+
+    def forward(self, INPUT0, INPUT1):
+        INPUT0 = INPUT0.to(self.device)
+        INPUT1 = INPUT1.to(self.device)
+        print(
+            "SumModule - INPUT0 device: {}, INPUT1 device: {}\n".format(
+                INPUT0.device, INPUT1.device
+            )
+        )
+        return INPUT0 + INPUT1
+
+
+class DiffModule(nn.Module):
+    def __init__(self, device):
+        super(DiffModule, self).__init__()
+        self.device = device
+
+    def forward(self, INPUT0, INPUT1):
+        INPUT0 = INPUT0.to(self.device)
+        INPUT1 = INPUT1.to(self.device)
+        print(
+            "DiffModule - INPUT0 device: {}, INPUT1 device: {}\n".format(
+                INPUT0.device, INPUT1.device
+            )
+        )
+        return INPUT0 - INPUT1
+
+
+class TestModel(nn.Module):
+    def __init__(self, device0, device1):
+        super(TestModel, self).__init__()
+        self.device0 = device0
+        self.device1 = device1
+
+        self.layer1 = SumModule(self.device0)
+        self.layer2 = DiffModule(self.device1)
+
+    def forward(self, INPUT0, INPUT1):
+        op0 = self.layer1(INPUT0, INPUT1)
+        op1 = self.layer2(INPUT0, INPUT1)
+        return op0, op1
+
+
+if torch.cuda.device_count() < 4:
+    print("Need at least 4 GPUs to run this test")
+    exit(1)
+
+devices = [("cuda:2", "cuda:0"), ("cpu", "cuda:3")]
+model_names = ["libtorch_multi_gpu", "libtorch_multi_device"]
+
+for device_pair, model_name in zip(devices, model_names):
+    model = TestModel(device_pair[0], device_pair[1])
+    model_path = "models/" + model_name + "/1/model.pt"
+    scripted_model = torch.jit.script(model)
+    scripted_model.save(model_path)
diff --git a/qa/L0_libtorch_instance_group_kind_model/models/libtorch_multi_device/config.pbtxt b/qa/L0_libtorch_instance_group_kind_model/models/libtorch_multi_device/config.pbtxt
new file mode 100644
index 0000000000..bf8ca0d649
--- /dev/null
+++ b/qa/L0_libtorch_instance_group_kind_model/models/libtorch_multi_device/config.pbtxt
@@ -0,0 +1,60 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "libtorch_multi_device"
+platform: "pytorch_libtorch"
+max_batch_size: 8
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT__0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "OUTPUT__1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+
+instance_group [
+  {
+    kind: KIND_MODEL
+  }
+]
diff --git a/qa/L0_libtorch_instance_group_kind_model/test.sh b/qa/L0_libtorch_instance_group_kind_model/test.sh
new file mode 100755
index 0000000000..04d76bd036
--- /dev/null
+++ b/qa/L0_libtorch_instance_group_kind_model/test.sh
@@ -0,0 +1,149 @@
+#!/bin/bash
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+pip3 uninstall -y torch
+pip3 install torch==1.13.0+cu117 -f https://download.pytorch.org/whl/torch_stable.html
+
+DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_model_repository
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=models --log-verbose=1"
+SERVER_LOG="./inference_server.log"
+
+CLIENT_PY=./client.py
+CLIENT_LOG="./client.log"
+EXPECTED_NUM_TESTS="1"
+TEST_RESULT_FILE='test_results.txt'
+
+source ../common/util.sh
+
+RET=0
+
+rm -f *.log *.txt
+
+mkdir -p models/libtorch_multi_device/1
+mkdir -p models/libtorch_multi_gpu/1
+cp models/libtorch_multi_device/config.pbtxt models/libtorch_multi_gpu/.
+(cd models/libtorch_multi_gpu && \
+    sed -i "s/name: \"libtorch_multi_device\"/name: \"libtorch_multi_gpu\"/" config.pbtxt)
+
+# Generate the models which are partitioned across multiple devices
+set +e
+python3 gen_models.py >> $CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Error when generating models. \n***"
+    cat $CLIENT_LOG
+    exit 1
+fi
+set -e
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+export MODEL_NAME='libtorch_multi_device'
+python3 $CLIENT_PY >> $CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Model $MODEL_NAME FAILED. \n***"
+    cat $CLIENT_LOG
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+
+MESSAGES=("SumModule - INPUT0 device: cpu, INPUT1 device: cpu"
+    "DiffModule - INPUT0 device: cuda:3, INPUT1 device: cuda:3")
+for MESSAGE in "${MESSAGES[@]}"; do
+    if grep -q "$MESSAGE" "$SERVER_LOG"; then
+        echo -e "Found \"$MESSAGE\"" >> "$CLIENT_LOG"
+    else
+        echo -e "Not found \"$MESSAGE\"" >> "$CLIENT_LOG"
+        RET=1
+    fi
+done
+
+export MODEL_NAME='libtorch_multi_gpu'
+python3 $CLIENT_PY >> $CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Model $MODEL_NAME FAILED. \n***"
+    cat $CLIENT_LOG
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+
+MESSAGES=("SumModule - INPUT0 device: cuda:2, INPUT1 device: cuda:2"
+    "DiffModule - INPUT0 device: cuda:0, INPUT1 device: cuda:0")
+for MESSAGE in "${MESSAGES[@]}"; do
+    if grep -q "$MESSAGE" "$SERVER_LOG"; then
+        echo -e "Found \"$MESSAGE\"" >> "$CLIENT_LOG"
+    else
+        echo -e "Not found \"$MESSAGE\"" >> "$CLIENT_LOG"
+        RET=1
+    fi
+done
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_libtorch_io_names/io_names_client.py b/qa/L0_libtorch_io_names/io_names_client.py
new file mode 100755
index 0000000000..b74e520de2
--- /dev/null
+++ b/qa/L0_libtorch_io_names/io_names_client.py
@@ -0,0 +1,119 @@
+#!/usr/bin/python
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import unittest
+from builtins import range
+
+import numpy as np
+import test_util as tu
+import tritonclient.http as httpclient
+
+
+class IONamingConvention(tu.TestResultCollector):
+    def _infer_helper(self, model_name, io_names, reversed_order=False):
+        triton_client = httpclient.InferenceServerClient(
+            "localhost:8000", verbose=False
+        )
+
+        # Create the data for the two inputs. Initialize the first to unique
+        # integers and the second to all ones.
+        input0_data = np.arange(start=0, stop=16, dtype=np.float32)
+        input0_data = np.expand_dims(input0_data, axis=0)
+        input1_data = np.full(shape=(1, 16), fill_value=-1, dtype=np.float32)
+
+        inputs = []
+        output_req = []
+        inputs.append(
+            httpclient.InferInput(
+                io_names[0] if not reversed_order else io_names[1], [1, 16], "FP32"
+            )
+        )
+        inputs[-1].set_data_from_numpy(input0_data)
+        inputs.append(
+            httpclient.InferInput(
+                io_names[1] if not reversed_order else io_names[0], [1, 16], "FP32"
+            )
+        )
+        inputs[-1].set_data_from_numpy(input1_data)
+        output_req.append(
+            httpclient.InferRequestedOutput(io_names[2], binary_data=True)
+        )
+        output_req.append(
+            httpclient.InferRequestedOutput(io_names[3], binary_data=True)
+        )
+
+        results = triton_client.infer(model_name, inputs, outputs=output_req)
+
+        output0_data = results.as_numpy(
+            io_names[2] if not reversed_order else io_names[3]
+        )
+        output1_data = results.as_numpy(
+            io_names[3] if not reversed_order else io_names[2]
+        )
+        for i in range(16):
+            self.assertEqual(input0_data[0][i] - input1_data[0][i], output0_data[0][i])
+            self.assertEqual(input0_data[0][i] + input1_data[0][i], output1_data[0][i])
+
+    def test_io_index(self):
+        io_names = ["INPUT__0", "INPUT__1", "OUTPUT__0", "OUTPUT__1"]
+        self._infer_helper("libtorch_io_index", io_names)
+
+    def test_output_index(self):
+        io_names = ["INPUT0", "INPUT1", "OUTPUT__0", "OUTPUT__1"]
+        self._infer_helper("libtorch_output_index", io_names)
+
+    def test_no_output_index(self):
+        io_names = ["INPUT0", "INPUT1", "OUTPUT0", "OUTPUT1"]
+        self._infer_helper("libtorch_no_output_index", io_names)
+
+    def test_no_arguments_no_output_index(self):
+        io_names = ["INPUTA", "INPUTB", "OUTPUTA", "OUTPUTB"]
+        self._infer_helper("libtorch_no_arguments_output_index", io_names)
+
+    def test_mix_index(self):
+        io_names = ["INPUTA", "INPUT__1", "OUTPUTA", "OUTPUT__1"]
+        self._infer_helper("libtorch_mix_index", io_names)
+
+    def test_mix_arguments(self):
+        io_names = ["INPUT0", "INPUTB", "OUTPUTA", "OUTPUT__1"]
+        self._infer_helper("libtorch_mix_arguments", io_names)
+
+    def test_mix_arguments_index(self):
+        io_names = ["INPUT0", "INPUT__1", "OUTPUT0", "OUTPUT__1"]
+        self._infer_helper("libtorch_mix_arguments_index", io_names)
+
+    def test_unordered_index(self):
+        io_names = ["INPUT1", "INPUT0", "OUT__1", "OUT__0"]
+        self._infer_helper("libtorch_unordered_index", io_names, reversed_order=True)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_libtorch_io_names/test.sh b/qa/L0_libtorch_io_names/test.sh
new file mode 100755
index 0000000000..999bb2b513
--- /dev/null
+++ b/qa/L0_libtorch_io_names/test.sh
@@ -0,0 +1,130 @@
+#!/bin/bash
+# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+IO_NAMES_CLIENT=./io_names_client.py
+DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_model_repository
+
+rm -rf models && mkdir -p models
+
+# Prepare models
+cp -r $DATADIR/libtorch_float32_float32_float32 models/libtorch_output_index && \
+    sed -i 's/libtorch_float32_float32_float32/libtorch_output_index/' models/libtorch_output_index/config.pbtxt
+
+cp -r $DATADIR/libtorch_float32_float32_float32 models/libtorch_io_index && \
+    sed -i 's/libtorch_float32_float32_float32/libtorch_io_index/' models/libtorch_io_index/config.pbtxt && \
+    sed -i 's/INPUT0/INPUT__0/' models/libtorch_io_index/config.pbtxt && \
+    sed -i 's/INPUT1/INPUT__1/' models/libtorch_io_index/config.pbtxt
+
+cp -r $DATADIR/libtorch_float32_float32_float32 models/libtorch_no_output_index && \
+    sed -i 's/libtorch_float32_float32_float32/libtorch_no_output_index/' models/libtorch_no_output_index/config.pbtxt && \
+    sed -i 's/OUTPUT__0/OUTPUT0/' models/libtorch_no_output_index/config.pbtxt && \
+    sed -i 's/OUTPUT__1/OUTPUT1/' models/libtorch_no_output_index/config.pbtxt
+
+cp -r $DATADIR/libtorch_float32_float32_float32 models/libtorch_no_arguments_output_index && \
+    sed -i 's/libtorch_float32_float32_float32/libtorch_no_arguments_output_index/' models/libtorch_no_arguments_output_index/config.pbtxt && \
+    sed -i 's/INPUT0/INPUTA/' models/libtorch_no_arguments_output_index/config.pbtxt && \
+    sed -i 's/INPUT1/INPUTB/' models/libtorch_no_arguments_output_index/config.pbtxt && \
+    sed -i 's/OUTPUT__0/OUTPUTA/' models/libtorch_no_arguments_output_index/config.pbtxt && \
+    sed -i 's/OUTPUT__1/OUTPUTB/' models/libtorch_no_arguments_output_index/config.pbtxt
+
+cp -r $DATADIR/libtorch_float32_float32_float32 models/libtorch_mix_index && \
+    sed -i 's/libtorch_float32_float32_float32/libtorch_mix_index/' models/libtorch_mix_index/config.pbtxt && \
+    sed -i 's/INPUT0/INPUTA/' models/libtorch_mix_index/config.pbtxt && \
+    sed -i 's/INPUT1/INPUT__1/' models/libtorch_mix_index/config.pbtxt && \
+    sed -i 's/OUTPUT__0/OUTPUTA/' models/libtorch_mix_index/config.pbtxt
+
+cp -r $DATADIR/libtorch_float32_float32_float32 models/libtorch_mix_arguments && \
+    sed -i 's/libtorch_float32_float32_float32/libtorch_mix_arguments/' models/libtorch_mix_arguments/config.pbtxt && \
+    sed -i 's/INPUT1/INPUTB/' models/libtorch_mix_arguments/config.pbtxt && \
+    sed -i 's/OUTPUT__0/OUTPUTA/' models/libtorch_mix_arguments/config.pbtxt
+
+cp -r $DATADIR/libtorch_float32_float32_float32 models/libtorch_mix_arguments_index && \
+    sed -i 's/libtorch_float32_float32_float32/libtorch_mix_arguments_index/' models/libtorch_mix_arguments_index/config.pbtxt && \
+    sed -i 's/INPUT1/INPUT__1/' models/libtorch_mix_arguments_index/config.pbtxt && \
+    sed -i 's/OUTPUT__0/OUTPUT0/' models/libtorch_mix_arguments_index/config.pbtxt
+
+cp -r $DATADIR/libtorch_float32_float32_float32 models/libtorch_unordered_index && \
+    sed -i 's/libtorch_float32_float32_float32/libtorch_unordered_index/' models/libtorch_unordered_index/config.pbtxt && \
+    sed -i 's/INPUT0/INPUT_TMP1/' models/libtorch_unordered_index/config.pbtxt && \
+    sed -i 's/INPUT1/INPUT0/' models/libtorch_unordered_index/config.pbtxt && \
+    sed -i 's/INPUT_TMP1/INPUT1/' models/libtorch_unordered_index/config.pbtxt && \
+    sed -i 's/OUTPUT__0/OUT__1/' models/libtorch_unordered_index/config.pbtxt && \
+    sed -i 's/OUTPUT__1/OUT__0/' models/libtorch_unordered_index/config.pbtxt
+
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=models"
+SERVER_LOG="./inference_server.log"
+source ../common/util.sh
+
+rm -f *.log
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+RET=0
+
+set +e
+
+CLIENT_LOG=client.log
+python $IO_NAMES_CLIENT >> $CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_libtorch_io_types/test.sh b/qa/L0_libtorch_io_types/test.sh
new file mode 100755
index 0000000000..ddd38810b6
--- /dev/null
+++ b/qa/L0_libtorch_io_types/test.sh
@@ -0,0 +1,131 @@
+#!/bin/bash
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=models"
+SERVER_LOG="./server.log"
+DATADIR=/data/inferenceserver/${REPO_VERSION}
+source ../common/util.sh
+
+# Test unsupported INPUT data type
+rm -rf models && mkdir -p models
+cp -r $DATADIR/qa_model_repository/libtorch_int32_int8_int8 models/libtorch_invalid_input_type && \
+    sed -i 's/libtorch_int32_int8_int8/libtorch_invalid_input_type/' models/libtorch_invalid_input_type/config.pbtxt && \
+    sed -i 's/TYPE_INT32/TYPE_UINT32/' models/libtorch_invalid_input_type/config.pbtxt
+
+rm -f *.log
+
+run_server
+if [ "$SERVER_PID" != "0" ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Unexpected server start $SERVER\n***"
+    kill $SERVER_PID
+    wait $SERVER_PID
+    exit 1
+fi
+
+set +e
+grep "unsupported datatype TYPE_UINT32 for input 'INPUT0' for model 'libtorch_invalid_input_type'" $SERVER_LOG
+if [ $? -ne 0 ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Unsupported INPUT datatype not found in server log\n***"
+    exit 1
+fi
+set -e
+
+# Test unsupported OUTPUT data type
+rm -rf models && mkdir -p models
+cp -r $DATADIR/qa_model_repository/libtorch_int32_int8_int8 models/libtorch_invalid_output_type && \
+    sed -i 's/libtorch_int32_int8_int8/libtorch_invalid_output_type/' models/libtorch_invalid_output_type/config.pbtxt && \
+    sed -i 's/TYPE_INT8/TYPE_UINT64/' models/libtorch_invalid_output_type/config.pbtxt
+
+rm -f *.log
+
+run_server
+if [ "$SERVER_PID" != "0" ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Unexpected server start $SERVER\n***"
+    kill $SERVER_PID
+    wait $SERVER_PID
+    exit 1
+fi
+
+set +e
+grep "unsupported datatype TYPE_UINT64 for output 'OUTPUT__0' for model 'libtorch_invalid_output_type'" $SERVER_LOG
+if [ $? -ne 0 ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Unsupported OUTPUT datatype not found in server log\n***"
+    exit 1
+fi
+set -e
+
+# Test unsupported sequence_batching data type
+rm -rf models && mkdir -p models
+cp -r $DATADIR/qa_variable_sequence_model_repository/libtorch_sequence_int32 models/libtorch_invalid_sequence_int32 && \
+    sed -i 's/libtorch_sequence_int32/libtorch_invalid_sequence_int32/' models/libtorch_invalid_sequence_int32/config.pbtxt && \
+    sed -i 's/READY__2/CORRID__2/' models/libtorch_invalid_sequence_int32/config.pbtxt && \
+    sed -i 's/CONTROL_SEQUENCE_READY/CONTROL_SEQUENCE_CORRID/' models/libtorch_invalid_sequence_int32/config.pbtxt && \
+    sed -i ':begin;$!N;s/CORRID\n\(.*\)int32_false_true: \[ 0, 1 \]/CORRID\ndata_type: TYPE_UINT32/' models/libtorch_invalid_sequence_int32/config.pbtxt
+
+rm -f *.log
+
+run_server
+if [ "$SERVER_PID" != "0" ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Unexpected server start $SERVER\n***"
+    kill $SERVER_PID
+    wait $SERVER_PID
+    exit 1
+fi
+
+set +e
+grep "input 'CORRID__2' type 'TYPE_UINT32' is not supported by PyTorch." $SERVER_LOG
+if [ $? -ne 0 ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Unsupported sequence_batching datatype not found in server log\n***"
+    exit 1
+fi
+set -e
+
+# Test passed
+echo -e "\n***\n*** Test Passed\n***"
+exit 0
diff --git a/qa/L0_libtorch_optimized_execution/test.sh b/qa/L0_libtorch_optimized_execution/test.sh
new file mode 100755
index 0000000000..5b7e19e282
--- /dev/null
+++ b/qa/L0_libtorch_optimized_execution/test.sh
@@ -0,0 +1,108 @@
+#!/bin/bash
+# Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+LIBTORCH_INFER_CLIENT_PY=../common/libtorch_infer_client.py
+
+DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_model_repository
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=models --log-verbose=1"
+SERVER_LOG="./inference_server.log"
+CLIENT_LOG="./client.log"
+source ../common/util.sh
+
+RET=0
+
+for FLAG in true false; do
+    rm -f *.log
+    mkdir -p models && cp -r $DATADIR/libtorch_int32_int32_int32 models/.
+
+    echo """
+    parameters: {
+        key: \"DISABLE_OPTIMIZED_EXECUTION\"
+        value: {
+            string_value: \"$FLAG\"
+        }
+    }""" >> models/libtorch_int32_int32_int32/config.pbtxt
+
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    set +e
+
+    python $LIBTORCH_INFER_CLIENT_PY >> $CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        RET=1
+    fi
+
+    OPTIMIZED_LOG="Optimized execution is "
+    if [ "$FLAG" == "true" ]; then
+        OPTIMIZED_LOG+=disabled
+    else
+        OPTIMIZED_LOG+=enabled
+    fi
+
+    if [ `grep -c "$OPTIMIZED_LOG" $SERVER_LOG` != "3" ]; then
+        echo -e "\n***\n*** Failed. Expected 3 $OPTIMIZED_LOG in log\n***"
+        RET=1
+    fi
+
+    set -e
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+
+    rm -rf models
+done
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_libtorch_shared_weights/libtorch_shared_weights_test.py b/qa/L0_libtorch_shared_weights/libtorch_shared_weights_test.py
new file mode 100755
index 0000000000..7c2fdb5a71
--- /dev/null
+++ b/qa/L0_libtorch_shared_weights/libtorch_shared_weights_test.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python3
+
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import unittest
+from builtins import range
+
+import numpy as np
+import test_util as tu
+import tritonhttpclient as httpclient
+
+FLAGS = None
+
+
+class SharedWeightsTest(tu.TestResultCollector):
+    def _full_exact(self, model_name, request_concurrency, shape):
+        # Run async requests to make sure backend handles concurrent requests
+        # correctly.
+        client = httpclient.InferenceServerClient(
+            "localhost:8000", concurrency=request_concurrency
+        )
+        input_datas = []
+        requests = []
+        for i in range(request_concurrency):
+            input_data = (16384 * np.random.randn(*shape)).astype(np.float32)
+            input_datas.append(input_data)
+            inputs = [httpclient.InferInput("INPUT__0", input_data.shape, "FP32")]
+            inputs[0].set_data_from_numpy(input_data)
+            requests.append(client.async_infer(model_name, inputs))
+
+        for i in range(request_concurrency):
+            # Get the result from the initiated asynchronous inference request.
+            # Note the call will block until the server responds.
+            results = requests[i].get_result()
+
+            output_data = results.as_numpy("OUTPUT__0")
+            self.assertIsNotNone(output_data, "error: expected 'OUTPUT__0' to be found")
+            np.testing.assert_allclose(output_data, input_datas[i])
+
+    def test_pytorch_identity_model(self):
+        model_name = "libtorch_nobatch_zero_1_float32"
+        self._full_exact(model_name, 128, [8])
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_libtorch_shared_weights/test.sh b/qa/L0_libtorch_shared_weights/test.sh
new file mode 100755
index 0000000000..6ca251ce32
--- /dev/null
+++ b/qa/L0_libtorch_shared_weights/test.sh
@@ -0,0 +1,180 @@
+#!/bin/bash
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+CLIENT_LOG="./client.log"
+DATADIR=/data/inferenceserver/${REPO_VERSION}
+INSTANCE_CNT=16
+REUSE_MSG="Reusing TorchScript model for instance"
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=`pwd`/models --exit-on-error=false \
+             --exit-timeout-secs=10"
+TEST_RESULT_FILE='test_results.txt'
+WEIGHTS_TEST=libtorch_shared_weights_test.py
+source ../common/util.sh
+
+RET=0
+rm -fr *.log
+
+LOG_IDX=0
+
+# SharedWeightsTest.test_pytorch_identity_model
+# Without shared weights, GPU
+
+# Prepare model repository
+rm -fr models
+mkdir models
+for i in models; do
+    cp -r $DATADIR/qa_identity_model_repository/libtorch_nobatch_zero_1_float32 models/.
+done
+
+for MC in `ls models/libtorch*/config.pbtxt`; do
+    echo "instance_group [ { count: ${INSTANCE_CNT} kind: KIND_GPU}]" >> $MC
+done
+
+# Start server
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+# Run test
+rm -f $CLIENT_LOG
+set +e
+python $WEIGHTS_TEST SharedWeightsTest.test_pytorch_identity_model >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+
+if [ `grep -c "$REUSE_MSG" $SERVER_LOG` != "0" ]; then
+    echo -e "\n***\n*** Failed. Expected 0 "$REUSE_MSG"\n***"
+    RET=1
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# SharedWeightsTest.test_pytorch_identity_model
+# With shared weights
+
+for KIND in KIND_CPU KIND_GPU; do
+
+    # Prepare model repository
+    rm -fr models
+    mkdir models
+    for i in models; do
+        cp -r $DATADIR/qa_identity_model_repository/libtorch_nobatch_zero_1_float32 models/.
+    done
+
+    LOG_IDX=$((LOG_IDX+1))
+    for MC in `ls models/libtorch*/config.pbtxt`; do
+        echo "instance_group [ { count: ${INSTANCE_CNT} kind: ${KIND}}]" >> $MC
+    done
+
+    for MC in `ls models/libtorch*/config.pbtxt`; do
+        echo """
+        parameters: {
+            key: \"ENABLE_WEIGHT_SHARING\"
+            value: {
+                string_value: \"true\"
+            }
+        }""" >> $MC
+    done
+
+    # Start server
+    SERVER_LOG="./inference_server_$LOG_IDX.log"
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    # Run test
+    rm -f $CLIENT_LOG
+    set +e
+    python $WEIGHTS_TEST SharedWeightsTest.test_pytorch_identity_model >>$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    else
+        check_test_results $TEST_RESULT_FILE 1
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Result Verification Failed\n***"
+            RET=1
+        fi
+    fi
+
+    if [ `grep -c "$REUSE_MSG" $SERVER_LOG` != "15" ]; then
+        echo -e "\n***\n*** Failed. Expected 15 "$REUSE_MSG"\n***"
+        RET=1
+    fi
+
+    set -e
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+done
+
+# Test Cleanup
+rm -f $CLIENT_LOG
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_lifecycle/ensemble_zero_1_float32/config.pbtxt b/qa/L0_lifecycle/ensemble_zero_1_float32/config.pbtxt
new file mode 100644
index 0000000000..9c6f37c9a9
--- /dev/null
+++ b/qa/L0_lifecycle/ensemble_zero_1_float32/config.pbtxt
@@ -0,0 +1,59 @@
+# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "ensemble_zero_1_float32"
+platform: "ensemble"
+max_batch_size: 1
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+ensemble_scheduling {
+  step [
+    {
+      model_name: "custom_zero_1_float32"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT0"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "OUTPUT0"
+      }
+    }
+  ]
+}
diff --git a/qa/L0_lifecycle/identity_zero_1_int32/config.pbtxt b/qa/L0_lifecycle/identity_zero_1_int32/config.pbtxt
new file mode 100644
index 0000000000..0f971aa5fd
--- /dev/null
+++ b/qa/L0_lifecycle/identity_zero_1_int32/config.pbtxt
@@ -0,0 +1,55 @@
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "identity_zero_1_int32"
+backend: "identity"
+max_batch_size: 0
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
+instance_group [
+  {
+    count: 1
+    kind : KIND_CPU
+  }
+]
+parameters [
+  {
+    key: "creation_delay_sec"
+    value: { string_value: "10" }
+  }
+]
diff --git a/qa/L0_lifecycle/lifecycle_test.py b/qa/L0_lifecycle/lifecycle_test.py
old mode 100644
new mode 100755
index 68f576aaf7..4797ab7465
--- a/qa/L0_lifecycle/lifecycle_test.py
+++ b/qa/L0_lifecycle/lifecycle_test.py
@@ -1,4 +1,6 @@
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+#!/usr/bin/env python3
+
+# Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -25,558 +27,1024 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import sys
+
 sys.path.append("../common")
 
-from builtins import range
-from future.utils import iteritems
+import base64
+import concurrent.futures
+import json
+import multiprocessing
 import os
 import shutil
+import signal
+import threading
 import time
 import unittest
-import numpy as np
+from builtins import range
+from functools import partial
+from pathlib import Path
+
 import infer_util as iu
+import numpy as np
 import test_util as tu
-from tensorrtserver.api import *
-import tensorrtserver.api.server_status_pb2 as server_status
+import tritonclient.grpc as grpcclient
+import tritonclient.http as httpclient
+from tritonclient.utils import InferenceServerException
 
-class LifeCycleTest(unittest.TestCase):
 
-    def test_parse_error_noexit(self):
-        # Server was started with invalid args and
-        # --exit-on-error=false so expect it to be running with
-        # SERVER_FAILED_TO_INITIALIZE status.
-        # --strict-readiness=false so server is not live and not ready
-        try:
-            for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
-                ctx = ServerStatusContext(pair[0], pair[1], None, True)
-                ss = ctx.get_server_status()
-                self.assertEqual(os.environ["TENSORRT_SERVER_VERSION"], ss.version)
-                self.assertEqual("inference:0", ss.id)
-                self.assertEqual(server_status.SERVER_FAILED_TO_INITIALIZE, ss.ready_state)
-                self.assertEqual(len(ss.model_status), 0)
-                uptime = ss.uptime_ns
-                self.assertGreater(uptime, 0)
-
-                hctx = ServerHealthContext(pair[0], pair[1], True)
-                self.assertFalse(hctx.is_ready())
-                self.assertFalse(hctx.is_live())
+class LifeCycleTest(tu.TestResultCollector):
+    def _infer_success_models(
+        self, model_base_names, versions, tensor_shape, swap=False
+    ):
+        for base_name in model_base_names:
+            try:
+                model_name = tu.get_model_name(
+                    base_name, np.float32, np.float32, np.float32
+                )
+                for triton_client in (
+                    httpclient.InferenceServerClient("localhost:8000", verbose=True),
+                    grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+                ):
+                    self.assertTrue(triton_client.is_server_live())
+                    # FIXME is_server_ready should be true here DLIS-1296
+                    # self.assertTrue(triton_client.is_server_ready())
+                    for v in versions:
+                        self.assertTrue(
+                            triton_client.is_model_ready(model_name, str(v))
+                        )
+
+                for v in versions:
+                    iu.infer_exact(
+                        self,
+                        base_name,
+                        tensor_shape,
+                        1,
+                        np.float32,
+                        np.float32,
+                        np.float32,
+                        model_version=v,
+                        swap=(swap or (v != 1)),
+                    )
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
 
-        except InferenceServerException as ex:
+    def _infer_success_identity(self, model_base, versions, tensor_dtype, tensor_shape):
+        try:
+            triton_client = httpclient.InferenceServerClient(
+                "localhost:8000", verbose=True
+            )
+            self.assertTrue(triton_client.is_server_live())
+            self.assertTrue(triton_client.is_server_ready())
+            for v in versions:
+                self.assertTrue(
+                    triton_client.is_model_ready(
+                        tu.get_zero_model_name(model_base, 1, tensor_dtype), str(v)
+                    )
+                )
+
+            for v in versions:
+                iu.infer_zero(
+                    self,
+                    model_base,
+                    1,
+                    tensor_dtype,
+                    tensor_shape,
+                    tensor_shape,
+                    use_http=False,
+                    use_grpc=True,
+                    use_http_json_tensors=False,
+                    use_streaming=False,
+                )
+        except Exception as ex:
             self.assertTrue(False, "unexpected error {}".format(ex))
 
-    def test_parse_error_noexit_strict(self):
+    def _get_client(self, use_grpc=False):
+        if use_grpc:
+            triton_client = grpcclient.InferenceServerClient(
+                "localhost:8001", verbose=True
+            )
+        else:
+            triton_client = httpclient.InferenceServerClient(
+                "localhost:8000", verbose=True
+            )
+        return triton_client
+
+    def _async_load(self, model_name, use_grpc):
+        try:
+            triton_client = self._get_client(use_grpc)
+            triton_client.load_model(model_name)
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_parse_error_noexit(self):
         # Server was started with invalid args and
         # --exit-on-error=false so expect it to be running with
         # SERVER_FAILED_TO_INITIALIZE status.
-        # --strict-readiness=false so server is not live and not ready
-        try:
-            for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
-                ctx = ServerStatusContext(pair[0], pair[1], None, True)
-                ss = ctx.get_server_status()
-                self.assertEqual(os.environ["TENSORRT_SERVER_VERSION"], ss.version)
-                self.assertEqual("inference:0", ss.id)
-                self.assertEqual(server_status.SERVER_FAILED_TO_INITIALIZE, ss.ready_state)
-                self.assertEqual(len(ss.model_status), 0)
-                uptime = ss.uptime_ns
-                self.assertGreater(uptime, 0)
-
-                hctx = ServerHealthContext(pair[0], pair[1], True)
-                self.assertFalse(hctx.is_ready())
-                self.assertFalse(hctx.is_live())
+        # Server is not live and not ready regardless of --strict-readiness
+        try:
+            triton_client = grpcclient.InferenceServerClient(
+                "localhost:8001", verbose=True
+            )
+            self.assertFalse(triton_client.is_server_live())
+            self.assertFalse(triton_client.is_server_ready())
+            md = triton_client.get_server_metadata()
+            self.assertEqual(os.environ["TRITON_SERVER_VERSION"], md.version)
+            self.assertEqual("triton", md.name)
+        except InferenceServerException as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
 
+        try:
+            triton_client = httpclient.InferenceServerClient(
+                "localhost:8000", verbose=True
+            )
+            self.assertFalse(triton_client.is_server_live())
+            self.assertFalse(triton_client.is_server_ready())
+            md = triton_client.get_server_metadata()
+            self.assertEqual(os.environ["TRITON_SERVER_VERSION"], md["version"])
+            self.assertEqual("triton", md["name"])
         except InferenceServerException as ex:
             self.assertTrue(False, "unexpected error {}".format(ex))
 
     def test_parse_error_modelfail(self):
         # --strict-readiness=true so server is live but not ready
-        input_size = 16
-        tensor_shape = (input_size,)
+        tensor_shape = (1, 16)
 
         # Server was started but with a model that fails to load
         try:
-            for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
-                model_name = tu.get_model_name('graphdef', np.float32, np.float32, np.float32)
-                ctx = ServerStatusContext(pair[0], pair[1], model_name, True)
-                ss = ctx.get_server_status()
-                self.assertEqual(os.environ["TENSORRT_SERVER_VERSION"], ss.version)
-                self.assertEqual("inference:0", ss.id)
-                self.assertEqual(server_status.SERVER_READY, ss.ready_state)
-                uptime = ss.uptime_ns
-                self.assertGreater(uptime, 0)
-
-                self.assertEqual(len(ss.model_status), 1)
-                self.assertTrue(model_name in ss.model_status,
-                                "expected status for model " + model_name)
-                for (k, v) in iteritems(ss.model_status[model_name].version_status):
-                    self.assertEqual(v.ready_state, server_status.MODEL_UNAVAILABLE)
-
-                hctx = ServerHealthContext(pair[0], pair[1], True)
-                self.assertFalse(hctx.is_ready())
-                self.assertTrue(hctx.is_live())
+            model_name = tu.get_model_name(
+                "graphdef", np.float32, np.float32, np.float32
+            )
+
+            triton_client = grpcclient.InferenceServerClient(
+                "localhost:8001", verbose=True
+            )
+            self.assertTrue(triton_client.is_server_live())
+            self.assertFalse(triton_client.is_server_ready())
+            self.assertFalse(triton_client.is_model_ready(model_name, "1"))
+
+            triton_client = httpclient.InferenceServerClient(
+                "localhost:8000", verbose=True
+            )
+            self.assertTrue(triton_client.is_server_live())
+            self.assertFalse(triton_client.is_server_ready())
+            self.assertFalse(triton_client.is_model_ready(model_name, "1"))
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
 
-        except InferenceServerException as ex:
+        # Inferencing with the missing model should fail.
+        try:
+            iu.infer_exact(
+                self, "graphdef", tensor_shape, 1, np.float32, np.float32, np.float32
+            )
+            self.assertTrue(False, "expected error for unavailable model " + model_name)
+        except Exception as ex:
+            self.assertIn(
+                "Request for unknown model: 'graphdef_float32_float32_float32' has no available versions",
+                ex.message(),
+            )
+
+        # And other models should be loaded successfully
+        try:
+            for base_name in ["savedmodel", "onnx"]:
+                for triton_client in (
+                    httpclient.InferenceServerClient("localhost:8000", verbose=True),
+                    grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+                ):
+                    model_name = tu.get_model_name(
+                        base_name, np.float32, np.float32, np.float32
+                    )
+                    self.assertTrue(triton_client.is_model_ready(model_name, "1"))
+
+                iu.infer_exact(
+                    self,
+                    base_name,
+                    tensor_shape,
+                    1,
+                    np.float32,
+                    np.float32,
+                    np.float32,
+                    model_version=1,
+                )
+        except Exception as ex:
             self.assertTrue(False, "unexpected error {}".format(ex))
 
+    def test_parse_error_modelfail_nostrict(self):
+        # --strict-readiness=false so server is live and ready
+        tensor_shape = (1, 16)
+
+        # Server was started but with a model that fails to load
         try:
-            iu.infer_exact(self, 'graphdef', tensor_shape, 1, True,
-                           np.float32, np.float32, np.float32)
+            model_name = tu.get_model_name(
+                "graphdef", np.float32, np.float32, np.float32
+            )
+
+            triton_client = grpcclient.InferenceServerClient(
+                "localhost:8001", verbose=True
+            )
+            self.assertTrue(triton_client.is_server_live())
+            self.assertTrue(triton_client.is_server_ready())
+            self.assertFalse(triton_client.is_model_ready(model_name, "1"))
+
+            triton_client = httpclient.InferenceServerClient(
+                "localhost:8000", verbose=True
+            )
+            self.assertTrue(triton_client.is_server_live())
+            self.assertTrue(triton_client.is_server_ready())
+            self.assertFalse(triton_client.is_model_ready(model_name, "1"))
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+        # Inferencing with the missing model should fail.
+        try:
+            iu.infer_exact(
+                self, "graphdef", tensor_shape, 1, np.float32, np.float32, np.float32
+            )
             self.assertTrue(False, "expected error for unavailable model " + model_name)
-        except InferenceServerException as ex:
-            self.assertEqual("inference:0", ex.server_id())
-            self.assertGreater(ex.request_id(), 0)
-            self.assertTrue(
-                ex.message().startswith(
-                    "Inference request for unknown model 'graphdef_float32_float32_float32'"))
+        except Exception as ex:
+            self.assertIn(
+                "Request for unknown model: 'graphdef_float32_float32_float32' has no available versions",
+                ex.message(),
+            )
+
+        # And other models should be loaded successfully
+        try:
+            for base_name in ["savedmodel", "onnx"]:
+                for triton_client in (
+                    httpclient.InferenceServerClient("localhost:8000", verbose=True),
+                    grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+                ):
+                    model_name = tu.get_model_name(
+                        base_name, np.float32, np.float32, np.float32
+                    )
+                    self.assertTrue(triton_client.is_model_ready(model_name, "1"))
+
+                iu.infer_exact(
+                    self,
+                    base_name,
+                    tensor_shape,
+                    1,
+                    np.float32,
+                    np.float32,
+                    np.float32,
+                    model_version=1,
+                )
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_parse_error_no_model_config(self):
+        tensor_shape = (1, 16)
+
+        # Server was started but with a model that fails to be polled
+        for triton_client in (
+            httpclient.InferenceServerClient("localhost:8000", verbose=True),
+            grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+        ):
+            try:
+                model_name = tu.get_model_name(
+                    "graphdef", np.float32, np.float32, np.float32
+                )
+
+                # expecting ready because not strict readiness
+                self.assertTrue(triton_client.is_server_live())
+                self.assertTrue(triton_client.is_server_ready())
+
+                md = triton_client.get_model_metadata(model_name, "1")
+                self.assertTrue(
+                    False,
+                    "expected model '"
+                    + model_name
+                    + "' to be ignored due to polling failure",
+                )
+
+            except Exception as ex:
+                self.assertIn(
+                    "Request for unknown model: 'graphdef_float32_float32_float32' is not found",
+                    ex.message(),
+                )
+
+        # And other models should be loaded successfully
+        try:
+            for base_name in ["savedmodel", "onnx"]:
+                model_name = tu.get_model_name(
+                    base_name, np.float32, np.float32, np.float32
+                )
+                self.assertTrue(triton_client.is_model_ready(model_name, "1"))
+
+                iu.infer_exact(
+                    self,
+                    base_name,
+                    tensor_shape,
+                    1,
+                    np.float32,
+                    np.float32,
+                    np.float32,
+                    model_version=1,
+                )
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_init_error_modelfail(self):
+        # --strict-readiness=true so server is live but not ready
+
+        # Server was started but with models that fail to load
+        for triton_client in (
+            httpclient.InferenceServerClient("localhost:8000", verbose=True),
+            grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+        ):
+            try:
+                self.assertTrue(triton_client.is_server_live())
+                self.assertFalse(triton_client.is_server_ready())
+
+                # one model uses sequence batcher while the other uses dynamic batcher
+                model_names = ["onnx_sequence_int32", "onnx_int32_int32_int32"]
+                for model_name in model_names:
+                    self.assertFalse(triton_client.is_model_ready(model_name))
+
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+            # And other models should be loaded successfully
+            try:
+                for base_name in ["graphdef", "savedmodel", "onnx"]:
+                    model_name = tu.get_model_name(
+                        base_name, np.float32, np.float32, np.float32
+                    )
+                    self.assertTrue(triton_client.is_model_ready(model_name))
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+        try:
+            tensor_shape = (1, 16)
+            for base_name in ["graphdef", "savedmodel", "onnx"]:
+                iu.infer_exact(
+                    self,
+                    base_name,
+                    tensor_shape,
+                    1,
+                    np.float32,
+                    np.float32,
+                    np.float32,
+                    model_version=1,
+                )
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_parse_error_model_no_version(self):
+        # --strict-readiness=true so server is live but not ready
+        tensor_shape = (1, 16)
+
+        # Server was started but with a model that fails to load
+        for triton_client in (
+            httpclient.InferenceServerClient("localhost:8000", verbose=True),
+            grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+        ):
+            try:
+                self.assertTrue(triton_client.is_server_live())
+                self.assertFalse(triton_client.is_server_ready())
+
+                model_name = tu.get_model_name(
+                    "graphdef", np.float32, np.float32, np.float32
+                )
+                self.assertFalse(triton_client.is_model_ready(model_name))
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+            # Sanity check that other models are loaded properly
+            try:
+                for base_name in ["savedmodel", "onnx"]:
+                    model_name = tu.get_model_name(
+                        base_name, np.float32, np.float32, np.float32
+                    )
+                    self.assertTrue(triton_client.is_model_ready(model_name))
+                for version in ["1", "3"]:
+                    model_name = tu.get_model_name(
+                        "plan", np.float32, np.float32, np.float32
+                    )
+                    self.assertTrue(triton_client.is_model_ready(model_name, version))
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+        try:
+            for base_name in ["savedmodel", "onnx"]:
+                iu.infer_exact(
+                    self,
+                    base_name,
+                    tensor_shape,
+                    1,
+                    np.float32,
+                    np.float32,
+                    np.float32,
+                    swap=True,
+                )
+            for version in [1, 3]:
+                iu.infer_exact(
+                    self,
+                    "plan",
+                    tensor_shape,
+                    1,
+                    np.float32,
+                    np.float32,
+                    np.float32,
+                    swap=(version == 3),
+                    model_version=version,
+                )
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+        try:
+            iu.infer_exact(
+                self, "graphdef", tensor_shape, 1, np.float32, np.float32, np.float32
+            )
+            self.assertTrue(False, "expected error for unavailable model " + model_name)
+        except Exception as ex:
+            self.assertIn(
+                "Request for unknown model: 'graphdef_float32_float32_float32' has no available versions",
+                ex.message(),
+            )
+
+    def test_parse_ignore_zero_prefixed_version(self):
+        tensor_shape = (1, 16)
+
+        # Server was started but only version 1 is loaded
+        for triton_client in (
+            httpclient.InferenceServerClient("localhost:8000", verbose=True),
+            grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+        ):
+            try:
+                self.assertTrue(triton_client.is_server_live())
+                self.assertTrue(triton_client.is_server_ready())
+
+                model_name = tu.get_model_name(
+                    "savedmodel", np.float32, np.float32, np.float32
+                )
+                self.assertTrue(triton_client.is_model_ready(model_name, "1"))
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+        try:
+            # swap=False for version 1
+            iu.infer_exact(
+                self,
+                "savedmodel",
+                tensor_shape,
+                1,
+                np.float32,
+                np.float32,
+                np.float32,
+                swap=False,
+            )
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_parse_ignore_non_intergral_version(self):
+        tensor_shape = (1, 16)
+
+        # Server was started but only version 1 is loaded
+        for triton_client in (
+            httpclient.InferenceServerClient("localhost:8000", verbose=True),
+            grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+        ):
+            try:
+                self.assertTrue(triton_client.is_server_live())
+                self.assertTrue(triton_client.is_server_ready())
+
+                model_name = tu.get_model_name(
+                    "savedmodel", np.float32, np.float32, np.float32
+                )
+                self.assertTrue(triton_client.is_model_ready(model_name, "1"))
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+        try:
+            # swap=False for version 1
+            iu.infer_exact(
+                self,
+                "savedmodel",
+                tensor_shape,
+                1,
+                np.float32,
+                np.float32,
+                np.float32,
+                swap=False,
+            )
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
 
     def test_dynamic_model_load_unload(self):
-        input_size = 16
-        tensor_shape = (input_size,)
-        savedmodel_name = tu.get_model_name('savedmodel', np.float32, np.float32, np.float32)
-        netdef_name = tu.get_model_name('netdef', np.float32, np.float32, np.float32)
+        tensor_shape = (1, 16)
+        savedmodel_name = tu.get_model_name(
+            "savedmodel", np.float32, np.float32, np.float32
+        )
+        onnx_name = tu.get_model_name("onnx", np.float32, np.float32, np.float32)
 
         # Make sure savedmodel model is not in the status (because
-        # initially it is not in the model store)
-        try:
-            for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
-                ctx = ServerStatusContext(pair[0], pair[1], savedmodel_name, True)
-                ss = ctx.get_server_status()
-                self.assertTrue(False, "expected status failure for " + savedmodel_name)
-        except InferenceServerException as ex:
-            self.assertEqual("inference:0", ex.server_id())
-            self.assertGreater(ex.request_id(), 0)
-            self.assertTrue(
-                ex.message().startswith("no status available for unknown model"))
+        # initially it is not in the model repository)
+        for triton_client in (
+            httpclient.InferenceServerClient("localhost:8000", verbose=True),
+            grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+        ):
+            try:
+                self.assertTrue(triton_client.is_server_live())
+                self.assertTrue(triton_client.is_server_ready())
+                self.assertFalse(triton_client.is_model_ready(savedmodel_name, "1"))
+                self.assertFalse(triton_client.is_model_ready(savedmodel_name, "3"))
+                self.assertTrue(triton_client.is_model_ready(onnx_name, "1"))
+                self.assertTrue(triton_client.is_model_ready(onnx_name, "3"))
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
 
-        # Add savedmodel model to the model store and give it time to
+        # Add savedmodel model to the model repository and give it time to
         # load. Make sure that it has a status and is ready.
         try:
             shutil.copytree(savedmodel_name, "models/" + savedmodel_name)
-            time.sleep(5) # wait for model to load
-            for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
-                ctx = ServerStatusContext(pair[0], pair[1], savedmodel_name, True)
-                ss = ctx.get_server_status()
-                self.assertEqual(os.environ["TENSORRT_SERVER_VERSION"], ss.version)
-                self.assertEqual("inference:0", ss.id)
-                self.assertEqual(server_status.SERVER_READY, ss.ready_state)
-
-                self.assertEqual(len(ss.model_status), 1)
-                self.assertTrue(savedmodel_name in ss.model_status,
-                                "expected status for model " + savedmodel_name)
-                for (k, v) in iteritems(ss.model_status[savedmodel_name].version_status):
-                    self.assertEqual(v.ready_state, server_status.MODEL_READY)
-        except InferenceServerException as ex:
+            time.sleep(5)  # wait for model to load
+            for triton_client in (
+                httpclient.InferenceServerClient("localhost:8000", verbose=True),
+                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+            ):
+                self.assertTrue(triton_client.is_server_live())
+                self.assertTrue(triton_client.is_server_ready())
+                self.assertTrue(triton_client.is_model_ready(savedmodel_name, "1"))
+                self.assertTrue(triton_client.is_model_ready(savedmodel_name, "3"))
+                self.assertTrue(triton_client.is_model_ready(onnx_name, "1"))
+                self.assertTrue(triton_client.is_model_ready(onnx_name, "3"))
+        except Exception as ex:
             self.assertTrue(False, "unexpected error {}".format(ex))
 
         # Run inference on the just loaded model
         try:
-            iu.infer_exact(self, 'savedmodel', tensor_shape, 1, True,
-                           np.float32, np.float32, np.float32, swap=True)
-        except InferenceServerException as ex:
+            iu.infer_exact(
+                self,
+                "savedmodel",
+                tensor_shape,
+                1,
+                np.float32,
+                np.float32,
+                np.float32,
+                swap=True,
+            )
+        except Exception as ex:
             self.assertTrue(False, "unexpected error {}".format(ex))
 
-        # Make sure savedmodel has execution stats in the status.
-        expected_exec_cnt = 0
+        # Make sure savedmodel has execution stats
         try:
-            for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
-                ctx = ServerStatusContext(pair[0], pair[1], savedmodel_name, True)
-                ss = ctx.get_server_status()
-                self.assertEqual(os.environ["TENSORRT_SERVER_VERSION"], ss.version)
-                self.assertEqual("inference:0", ss.id)
-                self.assertEqual(server_status.SERVER_READY, ss.ready_state)
-
-                self.assertEqual(len(ss.model_status), 1)
-                self.assertTrue(savedmodel_name in ss.model_status,
-                                "expected status for model " + savedmodel_name)
-                self.assertTrue(3 in ss.model_status[savedmodel_name].version_status,
-                                "expected status for version 3 of model " + savedmodel_name)
-
-                version_status = ss.model_status[savedmodel_name].version_status[3]
-                self.assertEqual(version_status.ready_state, server_status.MODEL_READY)
-                self.assertGreater(version_status.model_execution_count, 0)
-                expected_exec_cnt = version_status.model_execution_count
-        except InferenceServerException as ex:
+            triton_client = httpclient.InferenceServerClient(
+                "localhost:8000", verbose=True
+            )
+            stats = triton_client.get_inference_statistics(savedmodel_name)
+            self.assertEqual(len(stats["model_stats"]), 2)
+            for idx in range(len(stats["model_stats"])):
+                self.assertEqual(stats["model_stats"][idx]["name"], savedmodel_name)
+                if stats["model_stats"][idx]["version"] == "1":
+                    self.assertEqual(
+                        stats["model_stats"][idx]["inference_stats"]["success"][
+                            "count"
+                        ],
+                        0,
+                    )
+                else:
+                    self.assertNotEqual(
+                        stats["model_stats"][idx]["inference_stats"]["success"][
+                            "count"
+                        ],
+                        0,
+                    )
+
+            triton_client = grpcclient.InferenceServerClient(
+                "localhost:8001", verbose=True
+            )
+            stats = triton_client.get_inference_statistics(savedmodel_name)
+            self.assertEqual(len(stats.model_stats), 2)
+            for idx in range(len(stats.model_stats)):
+                self.assertEqual(stats.model_stats[idx].name, savedmodel_name)
+                if stats.model_stats[idx].version == "1":
+                    self.assertEqual(
+                        stats.model_stats[idx].inference_stats.success.count, 0
+                    )
+                else:
+                    self.assertNotEqual(
+                        stats.model_stats[idx].inference_stats.success.count, 0
+                    )
+
+        except Exception as ex:
             self.assertTrue(False, "unexpected error {}".format(ex))
 
-        # Remove savedmodel model from the model store and give it
-        # time to unload. Make sure that it has a status but is
-        # unavailable.
+        # Remove savedmodel model from the model repository and give it
+        # time to unload. Make sure that it is no longer available.
         try:
             shutil.rmtree("models/" + savedmodel_name)
-            time.sleep(5) # wait for model to unload
-            for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
-                ctx = ServerStatusContext(pair[0], pair[1], savedmodel_name, True)
-                ss = ctx.get_server_status()
-                self.assertEqual(os.environ["TENSORRT_SERVER_VERSION"], ss.version)
-                self.assertEqual("inference:0", ss.id)
-                self.assertEqual(server_status.SERVER_READY, ss.ready_state)
-
-                self.assertEqual(len(ss.model_status), 1)
-                self.assertTrue(savedmodel_name in ss.model_status,
-                                "expected status for model " + savedmodel_name)
-                self.assertTrue(3 in ss.model_status[savedmodel_name].version_status,
-                                "expected status for version 3 of model " + savedmodel_name)
-
-                version_status = ss.model_status[savedmodel_name].version_status[3]
-                self.assertEqual(version_status.ready_state, server_status.MODEL_UNAVAILABLE)
-                self.assertEqual(version_status.model_execution_count, expected_exec_cnt)
-        except InferenceServerException as ex:
+            time.sleep(5)  # wait for model to unload
+            for triton_client in (
+                httpclient.InferenceServerClient("localhost:8000", verbose=True),
+                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+            ):
+                self.assertTrue(triton_client.is_server_live())
+                self.assertTrue(triton_client.is_server_ready())
+                self.assertFalse(triton_client.is_model_ready(savedmodel_name, "1"))
+                self.assertFalse(triton_client.is_model_ready(savedmodel_name, "3"))
+                self.assertTrue(triton_client.is_model_ready(onnx_name, "1"))
+                self.assertTrue(triton_client.is_model_ready(onnx_name, "3"))
+        except Exception as ex:
             self.assertTrue(False, "unexpected error {}".format(ex))
 
         # Model is removed so inference should fail
         try:
-            iu.infer_exact(self, 'savedmodel', tensor_shape, 1, True,
-                           np.float32, np.float32, np.float32, swap=True)
-            self.assertTrue(False, "expected error for unavailable model " + savedmodel_name)
-        except InferenceServerException as ex:
-            self.assertEqual("inference:0", ex.server_id())
-            self.assertGreater(ex.request_id(), 0)
+            iu.infer_exact(
+                self,
+                "savedmodel",
+                tensor_shape,
+                1,
+                np.float32,
+                np.float32,
+                np.float32,
+                swap=True,
+            )
             self.assertTrue(
-                ex.message().startswith(
-                    "Inference request for unknown model 'savedmodel_float32_float32_float32'"))
+                False, "expected error for unavailable model " + savedmodel_name
+            )
+        except Exception as ex:
+            self.assertIn(
+                "Request for unknown model: '{}' has no available versions".format(
+                    savedmodel_name
+                ),
+                ex.message(),
+            )
 
         # Add back the same model. The status/stats should be reset.
         try:
             shutil.copytree(savedmodel_name, "models/" + savedmodel_name)
-            time.sleep(5) # wait for model to load
-            for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
-                ctx = ServerStatusContext(pair[0], pair[1], savedmodel_name, True)
-                ss = ctx.get_server_status()
-                self.assertEqual(os.environ["TENSORRT_SERVER_VERSION"], ss.version)
-                self.assertEqual("inference:0", ss.id)
-                self.assertEqual(server_status.SERVER_READY, ss.ready_state)
-
-                self.assertEqual(len(ss.model_status), 1)
-                self.assertTrue(savedmodel_name in ss.model_status,
-                                "expected status for model " + savedmodel_name)
-                for (k, v) in iteritems(ss.model_status[savedmodel_name].version_status):
-                    self.assertEqual(v.ready_state, server_status.MODEL_READY)
-                    self.assertEqual(v.model_execution_count, 0)
-        except InferenceServerException as ex:
+            time.sleep(5)  # wait for model to load
+            for triton_client in (
+                httpclient.InferenceServerClient("localhost:8000", verbose=True),
+                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+            ):
+                self.assertTrue(triton_client.is_server_live())
+                self.assertTrue(triton_client.is_server_ready())
+                self.assertTrue(triton_client.is_model_ready(savedmodel_name, "1"))
+                self.assertTrue(triton_client.is_model_ready(savedmodel_name, "3"))
+                self.assertTrue(triton_client.is_model_ready(onnx_name, "1"))
+                self.assertTrue(triton_client.is_model_ready(onnx_name, "3"))
+
+            triton_client = httpclient.InferenceServerClient(
+                "localhost:8000", verbose=True
+            )
+            stats = triton_client.get_inference_statistics(savedmodel_name)
+            self.assertEqual(len(stats["model_stats"]), 2)
+            self.assertEqual(stats["model_stats"][0]["name"], savedmodel_name)
+            self.assertEqual(stats["model_stats"][1]["name"], savedmodel_name)
+            self.assertEqual(
+                stats["model_stats"][0]["inference_stats"]["success"]["count"], 0
+            )
+            self.assertEqual(
+                stats["model_stats"][1]["inference_stats"]["success"]["count"], 0
+            )
+
+            triton_client = grpcclient.InferenceServerClient(
+                "localhost:8001", verbose=True
+            )
+            stats = triton_client.get_inference_statistics(savedmodel_name)
+            self.assertEqual(len(stats.model_stats), 2)
+            self.assertEqual(stats.model_stats[0].name, savedmodel_name)
+            self.assertEqual(stats.model_stats[1].name, savedmodel_name)
+            self.assertEqual(stats.model_stats[0].inference_stats.success.count, 0)
+            self.assertEqual(stats.model_stats[1].inference_stats.success.count, 0)
+
+        except Exception as ex:
             self.assertTrue(False, "unexpected error {}".format(ex))
 
-        # Remove original model from the model store and give it time
-        # to unload. Make sure that it has a status but is
-        # unavailable.
+        # Remove onnx model from the model repository and give it
+        # time to unload. Make sure that it is unavailable.
         try:
-            shutil.rmtree("models/" + netdef_name)
-            time.sleep(5) # wait for model to unload
-            for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
-                ctx = ServerStatusContext(pair[0], pair[1], netdef_name, True)
-                ss = ctx.get_server_status()
-                self.assertEqual(os.environ["TENSORRT_SERVER_VERSION"], ss.version)
-                self.assertEqual("inference:0", ss.id)
-                self.assertEqual(server_status.SERVER_READY, ss.ready_state)
-
-                self.assertEqual(len(ss.model_status), 1)
-                self.assertTrue(netdef_name in ss.model_status,
-                                "expected status for model " + netdef_name)
-                self.assertTrue(3 in ss.model_status[netdef_name].version_status,
-                                "expected status for version 3 of model " + netdef_name)
-
-                version_status = ss.model_status[netdef_name].version_status[3]
-                self.assertEqual(version_status.ready_state, server_status.MODEL_UNAVAILABLE)
-        except InferenceServerException as ex:
+            shutil.rmtree("models/" + onnx_name)
+            time.sleep(5)  # wait for model to unload
+            for triton_client in (
+                httpclient.InferenceServerClient("localhost:8000", verbose=True),
+                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+            ):
+                self.assertTrue(triton_client.is_server_live())
+                self.assertTrue(triton_client.is_server_ready())
+                self.assertTrue(triton_client.is_model_ready(savedmodel_name, "1"))
+                self.assertTrue(triton_client.is_model_ready(savedmodel_name, "3"))
+                self.assertFalse(triton_client.is_model_ready(onnx_name, "1"))
+                self.assertFalse(triton_client.is_model_ready(onnx_name, "3"))
+        except Exception as ex:
             self.assertTrue(False, "unexpected error {}".format(ex))
 
         # Model is removed so inference should fail
         try:
-            iu.infer_exact(self, 'netdef', tensor_shape, 1, True,
-                           np.float32, np.float32, np.float32, swap=True)
-            self.assertTrue(False, "expected error for unavailable model " + netdef_name)
-        except InferenceServerException as ex:
-            self.assertEqual("inference:0", ex.server_id())
-            self.assertGreater(ex.request_id(), 0)
-            self.assertTrue(
-                ex.message().startswith(
-                    "Inference request for unknown model 'netdef_float32_float32_float32'"))
+            iu.infer_exact(
+                self,
+                "onnx",
+                tensor_shape,
+                1,
+                np.float32,
+                np.float32,
+                np.float32,
+                swap=True,
+            )
+            self.assertTrue(False, "expected error for unavailable model " + onnx_name)
+        except Exception as ex:
+            self.assertIn(
+                "Request for unknown model: 'onnx_float32_float32_float32' has no available versions",
+                ex.message(),
+            )
 
     def test_dynamic_model_load_unload_disabled(self):
-        input_size = 16
-        tensor_shape = (input_size,)
-        savedmodel_name = tu.get_model_name('savedmodel', np.float32, np.float32, np.float32)
-        netdef_name = tu.get_model_name('netdef', np.float32, np.float32, np.float32)
+        tensor_shape = (1, 16)
+        savedmodel_name = tu.get_model_name(
+            "savedmodel", np.float32, np.float32, np.float32
+        )
+        onnx_name = tu.get_model_name("onnx", np.float32, np.float32, np.float32)
 
         # Make sure savedmodel model is not in the status (because
-        # initially it is not in the model store)
-        try:
-            for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
-                ctx = ServerStatusContext(pair[0], pair[1], savedmodel_name, True)
-                ss = ctx.get_server_status()
-                self.assertTrue(False, "expected status failure for " + savedmodel_name)
-        except InferenceServerException as ex:
-            self.assertEqual("inference:0", ex.server_id())
-            self.assertGreater(ex.request_id(), 0)
-            self.assertTrue(
-                ex.message().startswith("no status available for unknown model"))
+        # initially it is not in the model repository)
+        for triton_client in (
+            httpclient.InferenceServerClient("localhost:8000", verbose=True),
+            grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+        ):
+            try:
+                self.assertTrue(triton_client.is_server_live())
+                self.assertTrue(triton_client.is_server_ready())
+                self.assertFalse(triton_client.is_model_ready(savedmodel_name, "1"))
+                self.assertFalse(triton_client.is_model_ready(savedmodel_name, "3"))
+                self.assertTrue(triton_client.is_model_ready(onnx_name, "1"))
+                self.assertTrue(triton_client.is_model_ready(onnx_name, "3"))
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
 
-        # Add savedmodel model to the model store and give it time to
+        # Add savedmodel model to the model repository and give it time to
         # load. But it shouldn't load because dynamic loading is disabled.
         try:
             shutil.copytree(savedmodel_name, "models/" + savedmodel_name)
-            time.sleep(5) # wait for model to load
-            for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
-                ctx = ServerStatusContext(pair[0], pair[1], savedmodel_name, True)
-                ss = ctx.get_server_status()
-                self.assertTrue(False, "expected status failure for " + savedmodel_name)
-        except InferenceServerException as ex:
-            self.assertEqual("inference:0", ex.server_id())
-            self.assertGreater(ex.request_id(), 0)
-            self.assertTrue(
-                ex.message().startswith("no status available for unknown model"))
+            time.sleep(5)  # wait for model to load
+            for triton_client in (
+                httpclient.InferenceServerClient("localhost:8000", verbose=True),
+                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+            ):
+                self.assertTrue(triton_client.is_server_live())
+                self.assertTrue(triton_client.is_server_ready())
+                self.assertFalse(triton_client.is_model_ready(savedmodel_name, "1"))
+                self.assertFalse(triton_client.is_model_ready(savedmodel_name, "3"))
+                self.assertTrue(triton_client.is_model_ready(onnx_name, "1"))
+                self.assertTrue(triton_client.is_model_ready(onnx_name, "3"))
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
 
         # Run inference which should fail because the model isn't there
         try:
-            iu.infer_exact(self, 'savedmodel', tensor_shape, 1, True,
-                           np.float32, np.float32, np.float32, swap=True)
-            self.assertTrue(False, "expected error for unavailable model " + savedmodel_name)
-        except InferenceServerException as ex:
-            self.assertEqual("inference:0", ex.server_id())
-            self.assertGreater(ex.request_id(), 0)
+            iu.infer_exact(
+                self,
+                "savedmodel",
+                tensor_shape,
+                1,
+                np.float32,
+                np.float32,
+                np.float32,
+                swap=True,
+            )
             self.assertTrue(
-                ex.message().startswith("no status available for unknown model"))
-
-        # Remove one of the original models from the model
-        # store. Unloading is disabled so it should remain available
-        # in the status.
+                False, "expected error for unavailable model " + savedmodel_name
+            )
+        except Exception as ex:
+            self.assertIn(
+                "Request for unknown model: 'savedmodel_float32_float32_float32' is not found",
+                ex.message(),
+            )
+
+        # Remove one of the original models from the model repository.
+        # Unloading is disabled so it should remain available in the status.
         try:
-            shutil.rmtree("models/" + netdef_name)
-            time.sleep(5) # wait for model to unload (but it shouldn't)
-            for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
-                ctx = ServerStatusContext(pair[0], pair[1], netdef_name, True)
-                ss = ctx.get_server_status()
-                self.assertEqual(os.environ["TENSORRT_SERVER_VERSION"], ss.version)
-                self.assertEqual("inference:0", ss.id)
-                self.assertEqual(server_status.SERVER_READY, ss.ready_state)
-
-                self.assertEqual(len(ss.model_status), 1)
-                self.assertTrue(netdef_name in ss.model_status,
-                                "expected status for model " + netdef_name)
-                self.assertTrue(3 in ss.model_status[netdef_name].version_status,
-                                "expected status for version 3 of model " + netdef_name)
-
-                version_status = ss.model_status[netdef_name].version_status[3]
-                self.assertEqual(version_status.ready_state, server_status.MODEL_READY)
-
-        except InferenceServerException as ex:
+            shutil.rmtree("models/" + onnx_name)
+            time.sleep(5)  # wait for model to unload (but it shouldn't)
+            for triton_client in (
+                httpclient.InferenceServerClient("localhost:8000", verbose=True),
+                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+            ):
+                self.assertTrue(triton_client.is_server_live())
+                self.assertTrue(triton_client.is_server_ready())
+                self.assertFalse(triton_client.is_model_ready(savedmodel_name, "1"))
+                self.assertFalse(triton_client.is_model_ready(savedmodel_name, "3"))
+                self.assertTrue(triton_client.is_model_ready(onnx_name, "1"))
+                self.assertTrue(triton_client.is_model_ready(onnx_name, "3"))
+        except Exception as ex:
             self.assertTrue(False, "unexpected error {}".format(ex))
 
         # Run inference to make sure model still being served even
-        # though deleted from model store
+        # though deleted from model repository
         try:
-            iu.infer_exact(self, 'netdef', tensor_shape, 1, True,
-                           np.float32, np.float32, np.float32, swap=True)
-        except InferenceServerException as ex:
+            iu.infer_exact(
+                self,
+                "onnx",
+                tensor_shape,
+                1,
+                np.float32,
+                np.float32,
+                np.float32,
+                swap=True,
+            )
+        except Exception as ex:
             self.assertTrue(False, "unexpected error {}".format(ex))
 
     def test_dynamic_version_load_unload(self):
-        input_size = 16
-        tensor_shape = (input_size,)
-        graphdef_name = tu.get_model_name('graphdef', np.int32, np.int32, np.int32)
+        tensor_shape = (1, 16)
+        graphdef_name = tu.get_model_name("graphdef", np.int32, np.int32, np.int32)
 
         # There are 3 versions. Make sure that all have status and are
         # ready.
         try:
-            for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
-                ctx = ServerStatusContext(pair[0], pair[1], graphdef_name, True)
-                ss = ctx.get_server_status()
-                self.assertEqual(os.environ["TENSORRT_SERVER_VERSION"], ss.version)
-                self.assertEqual("inference:0", ss.id)
-                self.assertEqual(server_status.SERVER_READY, ss.ready_state)
-
-                self.assertEqual(len(ss.model_status), 1)
-                self.assertTrue(graphdef_name in ss.model_status,
-                                "expected status for model " + graphdef_name)
-                self.assertEqual(len(ss.model_status[graphdef_name].version_status), 3)
-                for (k, v) in iteritems(ss.model_status[graphdef_name].version_status):
-                    self.assertEqual(v.ready_state, server_status.MODEL_READY)
-        except InferenceServerException as ex:
+            for triton_client in (
+                httpclient.InferenceServerClient("localhost:8000", verbose=True),
+                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+            ):
+                self.assertTrue(triton_client.is_server_live())
+                self.assertTrue(triton_client.is_server_ready())
+                self.assertTrue(triton_client.is_model_ready(graphdef_name, "1"))
+                self.assertTrue(triton_client.is_model_ready(graphdef_name, "2"))
+                self.assertTrue(triton_client.is_model_ready(graphdef_name, "3"))
+        except Exception as ex:
             self.assertTrue(False, "unexpected error {}".format(ex))
 
         # Run inference on version 1 to make sure it is available
         try:
-            iu.infer_exact(self, 'graphdef', tensor_shape, 1, True,
-                           np.int32, np.int32, np.int32, swap=False,
-                           model_version=1)
-        except InferenceServerException as ex:
+            iu.infer_exact(
+                self,
+                "graphdef",
+                tensor_shape,
+                1,
+                np.int32,
+                np.int32,
+                np.int32,
+                swap=False,
+                model_version=1,
+            )
+        except Exception as ex:
             self.assertTrue(False, "unexpected error {}".format(ex))
 
-        # Make sure version 1 has execution stats in the status.
-        expected_exec_cnt = 0
+        # Make sure only version 1 has execution stats in the status.
         try:
-            for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
-                ctx = ServerStatusContext(pair[0], pair[1], graphdef_name, True)
-                ss = ctx.get_server_status()
-                self.assertEqual(os.environ["TENSORRT_SERVER_VERSION"], ss.version)
-                self.assertEqual("inference:0", ss.id)
-                self.assertEqual(server_status.SERVER_READY, ss.ready_state)
-
-                self.assertEqual(len(ss.model_status), 1)
-                self.assertTrue(graphdef_name in ss.model_status,
-                                "expected status for model " + graphdef_name)
-                self.assertTrue(1 in ss.model_status[graphdef_name].version_status,
-                                "expected status for version 1 of model " + graphdef_name)
-
-                version_status = ss.model_status[graphdef_name].version_status[1]
-                self.assertEqual(version_status.ready_state, server_status.MODEL_READY)
-                self.assertGreater(version_status.model_execution_count, 0)
-                expected_exec_cnt = version_status.model_execution_count
-        except InferenceServerException as ex:
+            triton_client = httpclient.InferenceServerClient(
+                "localhost:8000", verbose=True
+            )
+            stats = triton_client.get_inference_statistics(graphdef_name)
+            self.assertEqual(len(stats["model_stats"]), 3)
+            for idx in range(len(stats["model_stats"])):
+                self.assertEqual(stats["model_stats"][idx]["name"], graphdef_name)
+                if stats["model_stats"][idx]["version"] == "1":
+                    self.assertNotEqual(
+                        stats["model_stats"][idx]["inference_stats"]["success"][
+                            "count"
+                        ],
+                        0,
+                    )
+                else:
+                    self.assertEqual(
+                        stats["model_stats"][idx]["inference_stats"]["success"][
+                            "count"
+                        ],
+                        0,
+                    )
+
+            triton_client = grpcclient.InferenceServerClient(
+                "localhost:8001", verbose=True
+            )
+            stats = triton_client.get_inference_statistics(graphdef_name)
+            self.assertEqual(len(stats.model_stats), 3)
+            for idx in range(len(stats.model_stats)):
+                self.assertEqual(stats.model_stats[idx].name, graphdef_name)
+                if stats.model_stats[idx].version == "1":
+                    self.assertNotEqual(
+                        stats.model_stats[idx].inference_stats.success.count, 0
+                    )
+                else:
+                    self.assertEqual(
+                        stats.model_stats[idx].inference_stats.success.count, 0
+                    )
+
+        except Exception as ex:
             self.assertTrue(False, "unexpected error {}".format(ex))
 
-        # Remove version 1 from the model store and give it time to
-        # unload. Make sure that it has a status but is unavailable.
+        # Remove version 1 from the model repository and give it time to
+        # unload. Make sure that it is unavailable.
         try:
             shutil.rmtree("models/" + graphdef_name + "/1")
-            time.sleep(5) # wait for version to unload
-            for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
-                ctx = ServerStatusContext(pair[0], pair[1], graphdef_name, True)
-                ss = ctx.get_server_status()
-                self.assertEqual(os.environ["TENSORRT_SERVER_VERSION"], ss.version)
-                self.assertEqual("inference:0", ss.id)
-                self.assertEqual(server_status.SERVER_READY, ss.ready_state)
-
-                self.assertEqual(len(ss.model_status), 1)
-                self.assertTrue(graphdef_name in ss.model_status,
-                                "expected status for model " + graphdef_name)
-                self.assertTrue(1 in ss.model_status[graphdef_name].version_status,
-                                "expected status for version 1 of model " + graphdef_name)
-
-                version_status = ss.model_status[graphdef_name].version_status[1]
-                self.assertEqual(version_status.ready_state, server_status.MODEL_UNAVAILABLE)
-                self.assertEqual(version_status.model_execution_count, expected_exec_cnt)
-        except InferenceServerException as ex:
+            time.sleep(5)  # wait for version to unload
+            for triton_client in (
+                httpclient.InferenceServerClient("localhost:8000", verbose=True),
+                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+            ):
+                self.assertTrue(triton_client.is_server_live())
+                self.assertTrue(triton_client.is_server_ready())
+                self.assertFalse(triton_client.is_model_ready(graphdef_name, "1"))
+                self.assertTrue(triton_client.is_model_ready(graphdef_name, "2"))
+                self.assertTrue(triton_client.is_model_ready(graphdef_name, "3"))
+        except Exception as ex:
             self.assertTrue(False, "unexpected error {}".format(ex))
 
         # Version is removed so inference should fail
         try:
-            iu.infer_exact(self, 'graphdef', tensor_shape, 1, True,
-                           np.int32, np.int32, np.int32, swap=False,
-                           model_version=1)
-            self.assertTrue(False, "expected error for unavailable model " + graphdef_name)
-        except InferenceServerException as ex:
-            self.assertEqual("inference:0", ex.server_id())
-            self.assertGreater(ex.request_id(), 0)
+            iu.infer_exact(
+                self,
+                "graphdef",
+                tensor_shape,
+                1,
+                np.int32,
+                np.int32,
+                np.int32,
+                swap=False,
+                model_version=1,
+            )
             self.assertTrue(
-                ex.message().startswith(
-                    "Inference request for unknown model 'graphdef_int32_int32_int32'"))
-
-        # Add back the same version. The status/stats should be
-        # retained for versions (note that this is different behavior
-        # than if a model is removed and then added back).
-        try:
-            shutil.copytree("models/" + graphdef_name + "/2",
-                            "models/" + graphdef_name + "/1")
-            time.sleep(5) # wait for model to load
-            for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
-                ctx = ServerStatusContext(pair[0], pair[1], graphdef_name, True)
-                ss = ctx.get_server_status()
-                self.assertEqual(os.environ["TENSORRT_SERVER_VERSION"], ss.version)
-                self.assertEqual("inference:0", ss.id)
-                self.assertEqual(server_status.SERVER_READY, ss.ready_state)
-
-                self.assertEqual(len(ss.model_status), 1)
-                self.assertTrue(graphdef_name in ss.model_status,
-                                "expected status for model " + graphdef_name)
-                self.assertEqual(len(ss.model_status[graphdef_name].version_status), 3)
-                for (k, v) in iteritems(ss.model_status[graphdef_name].version_status):
-                    self.assertEqual(v.ready_state, server_status.MODEL_READY)
-                    if k == 1:
-                        self.assertEqual(v.model_execution_count, expected_exec_cnt)
-                    else:
-                        self.assertEqual(v.model_execution_count, 0)
-        except InferenceServerException as ex:
-            self.assertTrue(False, "unexpected error {}".format(ex))
-
-        # Add another version from the model store.
+                False, "expected error for unavailable model " + graphdef_name
+            )
+        except Exception as ex:
+            self.assertIn(
+                "Request for unknown model: 'graphdef_int32_int32_int32' version 1 is not at ready state",
+                ex.message(),
+            )
+
+        # Add another version to the model repository.
         try:
-            shutil.copytree("models/" + graphdef_name + "/2",
-                            "models/" + graphdef_name + "/7")
-            time.sleep(5) # wait for version to load
-            for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
-                ctx = ServerStatusContext(pair[0], pair[1], graphdef_name, True)
-                ss = ctx.get_server_status()
-                self.assertEqual(os.environ["TENSORRT_SERVER_VERSION"], ss.version)
-                self.assertEqual("inference:0", ss.id)
-                self.assertEqual(server_status.SERVER_READY, ss.ready_state)
-
-                self.assertEqual(len(ss.model_status), 1)
-                self.assertTrue(graphdef_name in ss.model_status,
-                                "expected status for model " + graphdef_name)
-                self.assertTrue(7 in ss.model_status[graphdef_name].version_status,
-                                "expected status for version 7 of model " + graphdef_name)
-
-                self.assertEqual(len(ss.model_status[graphdef_name].version_status), 4)
-                for (k, v) in iteritems(ss.model_status[graphdef_name].version_status):
-                    self.assertEqual(v.ready_state, server_status.MODEL_READY)
-        except InferenceServerException as ex:
+            shutil.copytree(
+                "models/" + graphdef_name + "/2", "models/" + graphdef_name + "/7"
+            )
+            time.sleep(5)  # wait for version to load
+            for triton_client in (
+                httpclient.InferenceServerClient("localhost:8000", verbose=True),
+                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+            ):
+                self.assertTrue(triton_client.is_server_live())
+                self.assertTrue(triton_client.is_server_ready())
+                self.assertFalse(triton_client.is_model_ready(graphdef_name, "1"))
+                self.assertTrue(triton_client.is_model_ready(graphdef_name, "2"))
+                self.assertTrue(triton_client.is_model_ready(graphdef_name, "3"))
+                self.assertTrue(triton_client.is_model_ready(graphdef_name, "7"))
+        except Exception as ex:
             self.assertTrue(False, "unexpected error {}".format(ex))
 
     def test_dynamic_version_load_unload_disabled(self):
-        input_size = 16
-        tensor_shape = (input_size,)
-        graphdef_name = tu.get_model_name('graphdef', np.int32, np.int32, np.int32)
+        tensor_shape = (1, 16)
+        graphdef_name = tu.get_model_name("graphdef", np.int32, np.int32, np.int32)
 
-        # Add a new version to the model store and give it time to
+        # Add a new version to the model repository and give it time to
         # load. But it shouldn't load because dynamic loading is
         # disabled.
         try:
-            shutil.copytree("models/" + graphdef_name + "/2",
-                            "models/" + graphdef_name + "/7")
-            time.sleep(5) # wait for model to load
-            for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
-                ctx = ServerStatusContext(pair[0], pair[1], graphdef_name, True)
-                ss = ctx.get_server_status()
-                self.assertEqual(os.environ["TENSORRT_SERVER_VERSION"], ss.version)
-                self.assertEqual("inference:0", ss.id)
-                self.assertEqual(server_status.SERVER_READY, ss.ready_state)
-
-                self.assertEqual(len(ss.model_status), 1)
-                self.assertTrue(graphdef_name in ss.model_status,
-                                "expected status for model " + graphdef_name)
-                self.assertFalse(7 in ss.model_status[graphdef_name].version_status,
-                                "unexpected status for version 7 of model " + graphdef_name)
-                self.assertEqual(len(ss.model_status[graphdef_name].version_status), 3)
-        except InferenceServerException as ex:
+            shutil.copytree(
+                "models/" + graphdef_name + "/2", "models/" + graphdef_name + "/7"
+            )
+            time.sleep(5)  # wait for model to load
+            for triton_client in (
+                httpclient.InferenceServerClient("localhost:8000", verbose=True),
+                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+            ):
+                self.assertTrue(triton_client.is_server_live())
+                self.assertTrue(triton_client.is_server_ready())
+                self.assertTrue(triton_client.is_model_ready(graphdef_name, "1"))
+                self.assertTrue(triton_client.is_model_ready(graphdef_name, "2"))
+                self.assertTrue(triton_client.is_model_ready(graphdef_name, "3"))
+                self.assertFalse(triton_client.is_model_ready(graphdef_name, "7"))
+        except Exception as ex:
             self.assertTrue(False, "unexpected error {}".format(ex))
 
-        # Remove one of the original versions from the model
-        # store. Unloading is disabled so it should remain available
+        # Remove one of the original versions from the model repository.
+        # Unloading is disabled so it should remain available
         # in the status.
         try:
             shutil.rmtree("models/" + graphdef_name + "/1")
-            time.sleep(5) # wait for version to unload (but it shouldn't)
-            for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
-                ctx = ServerStatusContext(pair[0], pair[1], graphdef_name, True)
-                ss = ctx.get_server_status()
-                self.assertEqual(os.environ["TENSORRT_SERVER_VERSION"], ss.version)
-                self.assertEqual("inference:0", ss.id)
-                self.assertEqual(server_status.SERVER_READY, ss.ready_state)
-
-                self.assertEqual(len(ss.model_status), 1)
-                self.assertTrue(graphdef_name in ss.model_status,
-                                "expected status for model " + graphdef_name)
-                self.assertTrue(1 in ss.model_status[graphdef_name].version_status,
-                                "expected status for version 1 of model " + graphdef_name)
-
-                self.assertEqual(len(ss.model_status[graphdef_name].version_status), 3)
-                for (k, v) in iteritems(ss.model_status[graphdef_name].version_status):
-                    self.assertEqual(v.ready_state, server_status.MODEL_READY)
-        except InferenceServerException as ex:
+            time.sleep(5)  # wait for version to unload (but it shouldn't)
+            for triton_client in (
+                httpclient.InferenceServerClient("localhost:8000", verbose=True),
+                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+            ):
+                self.assertTrue(triton_client.is_server_live())
+                self.assertTrue(triton_client.is_server_ready())
+                self.assertTrue(triton_client.is_model_ready(graphdef_name, "1"))
+                self.assertTrue(triton_client.is_model_ready(graphdef_name, "2"))
+                self.assertTrue(triton_client.is_model_ready(graphdef_name, "3"))
+                self.assertFalse(triton_client.is_model_ready(graphdef_name, "7"))
+        except Exception as ex:
             self.assertTrue(False, "unexpected error {}".format(ex))
 
         # Run inference to make sure model still being served even
-        # though version deleted from model store
+        # though version deleted from model repository
         try:
-            iu.infer_exact(self, 'graphdef', tensor_shape, 1, True,
-                           np.int32, np.int32, np.int32, swap=False,
-                           model_version=1)
-        except InferenceServerException as ex:
+            iu.infer_exact(
+                self,
+                "graphdef",
+                tensor_shape,
+                1,
+                np.int32,
+                np.int32,
+                np.int32,
+                swap=False,
+                model_version=1,
+            )
+        except Exception as ex:
             self.assertTrue(False, "unexpected error {}".format(ex))
 
     def test_dynamic_model_modify(self):
-        input_size = 16
-        models_base = ('savedmodel', 'plan')
-        models_shape = ((input_size,), (input_size, 1, 1))
+        models_base = ("savedmodel", "plan")
+        models_shape = ((1, 16), (1, 16))
         models = list()
         for m in models_base:
             models.append(tu.get_model_name(m, np.float32, np.float32, np.float32))
@@ -584,81 +1052,2359 @@ def test_dynamic_model_modify(self):
         # Make sure savedmodel and plan are in the status
         for model_name in models:
             try:
-                for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
-                    ctx = ServerStatusContext(pair[0], pair[1], model_name, True)
-                    ss = ctx.get_server_status()
-                    self.assertEqual(os.environ["TENSORRT_SERVER_VERSION"], ss.version)
-                    self.assertEqual("inference:0", ss.id)
-                    self.assertEqual(server_status.SERVER_READY, ss.ready_state)
-
-                    self.assertEqual(len(ss.model_status), 1)
-                    self.assertTrue(model_name in ss.model_status,
-                                    "expected status for model " + model_name)
-                    for (k, v) in iteritems(ss.model_status[model_name].version_status):
-                        self.assertEqual(v.ready_state, server_status.MODEL_READY)
-            except InferenceServerException as ex:
+                for triton_client in (
+                    httpclient.InferenceServerClient("localhost:8000", verbose=True),
+                    grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+                ):
+                    self.assertTrue(triton_client.is_server_live())
+                    self.assertTrue(triton_client.is_server_ready())
+                    self.assertTrue(triton_client.is_model_ready(model_name, "1"))
+                    self.assertTrue(triton_client.is_model_ready(model_name, "3"))
+            except Exception as ex:
                 self.assertTrue(False, "unexpected error {}".format(ex))
 
         # Run inference on the model, both versions 1 and 3
         for version in (1, 3):
             for model_name, model_shape in zip(models_base, models_shape):
                 try:
-                    iu.infer_exact(self, model_name, model_shape, 1, True,
-                                   np.float32, np.float32, np.float32, swap=(version == 3),
-                                   model_version=version)
-                except InferenceServerException as ex:
+                    iu.infer_exact(
+                        self,
+                        model_name,
+                        model_shape,
+                        1,
+                        np.float32,
+                        np.float32,
+                        np.float32,
+                        swap=(version == 3),
+                        model_version=version,
+                    )
+                except Exception as ex:
                     self.assertTrue(False, "unexpected error {}".format(ex))
 
-        # Change the model configuration to have the default version
-        # policy (so that only version 3) if available.
+        # Change the model configuration to use wrong label file
+        for base_name, model_name in zip(models_base, models):
+            shutil.copyfile(
+                "config.pbtxt.wrong." + base_name,
+                "models/" + model_name + "/config.pbtxt",
+            )
+
+        time.sleep(5)  # wait for models to reload
+        for model_name in models:
+            for model_name, model_shape in zip(models_base, models_shape):
+                try:
+                    iu.infer_exact(
+                        self,
+                        model_name,
+                        model_shape,
+                        1,
+                        np.float32,
+                        np.float32,
+                        np.float32,
+                        swap=(version == 3),
+                        model_version=version,
+                        output0_raw=False,
+                    )
+                    self.assertTrue(
+                        False, "expected error for wrong label for " + model_name
+                    )
+                except AssertionError as ex:
+                    self.assertTrue("'label9" in str(ex) and "!=" in str(ex), str(ex))
+
+        # Change the model configuration to use correct label file and to have
+        # the default version policy (so that only version 3) is available.
         for base_name, model_name in zip(models_base, models):
-            shutil.copyfile("config.pbtxt." + base_name, "models/" + model_name + "/config.pbtxt")
+            shutil.copyfile(
+                "config.pbtxt." + base_name, "models/" + model_name + "/config.pbtxt"
+            )
 
-        time.sleep(5) # wait for models to reload
+        time.sleep(5)  # wait for models to reload
         for model_name in models:
             try:
-                for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
-                    ctx = ServerStatusContext(pair[0], pair[1], model_name, True)
-                    ss = ctx.get_server_status()
-                    self.assertEqual(os.environ["TENSORRT_SERVER_VERSION"], ss.version)
-                    self.assertEqual("inference:0", ss.id)
-                    self.assertEqual(server_status.SERVER_READY, ss.ready_state)
-                    self.assertEqual(len(ss.model_status), 1)
-                    self.assertTrue(model_name in ss.model_status,
-                                    "expected status for model " + model_name)
-                    self.assertTrue(1 in ss.model_status[model_name].version_status,
-                                    "expected status for version 1 of model " + model_name)
-                    self.assertTrue(3 in ss.model_status[model_name].version_status,
-                                    "expected status for version 3 of model " + model_name)
-                    self.assertEqual(ss.model_status[model_name].version_status[1].ready_state,
-                                     server_status.MODEL_UNAVAILABLE)
-                    self.assertEqual(ss.model_status[model_name].version_status[3].ready_state,
-                                     server_status.MODEL_READY)
-            except InferenceServerException as ex:
+                for triton_client in (
+                    httpclient.InferenceServerClient("localhost:8000", verbose=True),
+                    grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+                ):
+                    self.assertTrue(triton_client.is_server_live())
+                    self.assertTrue(triton_client.is_server_ready())
+                    self.assertFalse(triton_client.is_model_ready(model_name, "1"))
+                    self.assertTrue(triton_client.is_model_ready(model_name, "3"))
+            except Exception as ex:
                 self.assertTrue(False, "unexpected error {}".format(ex))
 
         # Attempt inferencing using version 1, should fail since
         # change in model policy makes that no longer available.
         for model_name, model_shape in zip(models_base, models_shape):
             try:
-                iu.infer_exact(self, model_name, model_shape, 1, True,
-                               np.float32, np.float32, np.float32, swap=False,
-                               model_version=1)
-                self.assertTrue(False, "expected error for unavailable model " + model_name)
-            except InferenceServerException as ex:
-                self.assertEqual("inference:0", ex.server_id())
-                self.assertGreater(ex.request_id(), 0)
+                iu.infer_exact(
+                    self,
+                    model_name,
+                    model_shape,
+                    1,
+                    np.float32,
+                    np.float32,
+                    np.float32,
+                    swap=False,
+                    model_version=1,
+                )
                 self.assertTrue(
-                    ex.message().startswith("Inference request for unknown model"))
+                    False, "expected error for unavailable model " + model_name
+                )
+            except Exception as ex:
+                self.assertIn("Request for unknown model", ex.message())
 
         # Version 3 should continue to work...
         for model_name, model_shape in zip(models_base, models_shape):
             try:
-                iu.infer_exact(self, model_name, model_shape, 1, True,
-                               np.float32, np.float32, np.float32, swap=True,
-                               model_version=3)
-            except InferenceServerException as ex:
+                iu.infer_exact(
+                    self,
+                    model_name,
+                    model_shape,
+                    1,
+                    np.float32,
+                    np.float32,
+                    np.float32,
+                    swap=True,
+                    model_version=3,
+                )
+            except Exception as ex:
                 self.assertTrue(False, "unexpected error {}".format(ex))
 
-if __name__ == '__main__':
+    def test_dynamic_file_delete(self):
+        models_base = ("savedmodel", "plan")
+        models_shape = ((1, 16), (1, 16))
+        models = list()
+        for m in models_base:
+            models.append(tu.get_model_name(m, np.float32, np.float32, np.float32))
+
+        # Make sure savedmodel and plan are in the status
+        for model_name in models:
+            try:
+                for triton_client in (
+                    httpclient.InferenceServerClient("localhost:8000", verbose=True),
+                    grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+                ):
+                    self.assertTrue(triton_client.is_server_live())
+                    self.assertTrue(triton_client.is_server_ready())
+                    self.assertTrue(triton_client.is_model_ready(model_name, "1"))
+                    self.assertTrue(triton_client.is_model_ready(model_name, "3"))
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+        # Run inference on the model, both versions 1 and 3
+        for version in (1, 3):
+            for model_name, model_shape in zip(models_base, models_shape):
+                try:
+                    iu.infer_exact(
+                        self,
+                        model_name,
+                        model_shape,
+                        1,
+                        np.float32,
+                        np.float32,
+                        np.float32,
+                        swap=(version == 3),
+                        model_version=version,
+                    )
+                except Exception as ex:
+                    self.assertTrue(False, "unexpected error {}".format(ex))
+
+        # Delete model configuration, which cause model to be
+        # re-loaded and use autofilled config, which means that
+        # version policy will be latest and so only version 3 will be
+        # available
+        for model_name in models:
+            os.remove("models/" + model_name + "/config.pbtxt")
+
+        time.sleep(5)  # wait for models to reload
+        for model_name in models:
+            try:
+                for triton_client in (
+                    httpclient.InferenceServerClient("localhost:8000", verbose=True),
+                    grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+                ):
+                    self.assertTrue(triton_client.is_server_live())
+                    self.assertTrue(triton_client.is_server_ready())
+                    self.assertFalse(triton_client.is_model_ready(model_name, "1"))
+                    self.assertTrue(triton_client.is_model_ready(model_name, "3"))
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+        # Only version 3 (latest) should work...
+        for model_name, model_shape in zip(models_base, models_shape):
+            try:
+                iu.infer_exact(
+                    self,
+                    model_name,
+                    model_shape,
+                    1,
+                    np.float32,
+                    np.float32,
+                    np.float32,
+                    swap=True,
+                    model_version=3,
+                )
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+            try:
+                iu.infer_exact(
+                    self,
+                    model_name,
+                    model_shape,
+                    1,
+                    np.float32,
+                    np.float32,
+                    np.float32,
+                    swap=False,
+                    model_version=1,
+                )
+                self.assertTrue(
+                    False, "expected error for unavailable model " + graphdef_name
+                )
+            except Exception as ex:
+                self.assertIn("Request for unknown model", ex.message())
+
+    def test_multiple_model_repository_polling(self):
+        model_shape = (1, 16)
+        savedmodel_name = tu.get_model_name(
+            "savedmodel", np.float32, np.float32, np.float32
+        )
+
+        # Models should be loaded successfully and infer
+        # successfully. Initially savedmodel only has version 1.
+        self._infer_success_models(
+            [
+                "savedmodel",
+            ],
+            (1,),
+            model_shape,
+        )
+        self._infer_success_models(["graphdef", "onnx"], (1, 3), model_shape)
+
+        # Add the savedmodel to the second model repository, should cause
+        # it to be unloaded due to duplication
+        shutil.copytree(savedmodel_name, "models_0/" + savedmodel_name)
+        time.sleep(5)  # wait for models to reload
+        try:
+            for triton_client in (
+                httpclient.InferenceServerClient("localhost:8000", verbose=True),
+                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+            ):
+                self.assertTrue(triton_client.is_server_live())
+                self.assertTrue(triton_client.is_server_ready())
+                self.assertFalse(triton_client.is_model_ready(savedmodel_name, "1"))
+                self.assertFalse(triton_client.is_model_ready(savedmodel_name, "3"))
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+        self._infer_success_models(["graphdef", "onnx"], (1, 3), model_shape)
+
+        # Remove the savedmodel from the first model repository, the
+        # model from the second model repository should be loaded
+        # properly. In the second model repository savedmodel should
+        # have versions 1 and 3.
+        shutil.rmtree("models/" + savedmodel_name)
+        time.sleep(5)  # wait for model to unload
+        self._infer_success_models(
+            ["savedmodel", "graphdef", "onnx"], (1, 3), model_shape
+        )
+
+    def test_multiple_model_repository_control(self):
+        # similar to test_multiple_model_repository_polling, but the
+        # model load/unload is controlled by the API
+        model_shape = (1, 16)
+        savedmodel_name = tu.get_model_name(
+            "savedmodel", np.float32, np.float32, np.float32
+        )
+        model_bases = ["savedmodel", "graphdef", "onnx"]
+
+        # Initially models are not loaded
+        for base in model_bases:
+            try:
+                model_name = tu.get_model_name(base, np.float32, np.float32, np.float32)
+                for triton_client in (
+                    httpclient.InferenceServerClient("localhost:8000", verbose=True),
+                    grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+                ):
+                    self.assertTrue(triton_client.is_server_live())
+                    self.assertTrue(triton_client.is_server_ready())
+                    self.assertFalse(triton_client.is_model_ready(model_name, "1"))
+                    self.assertFalse(triton_client.is_model_ready(model_name, "3"))
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+        # Load all models, here we use GRPC
+        for base in model_bases:
+            try:
+                model_name = tu.get_model_name(base, np.float32, np.float32, np.float32)
+                triton_client = grpcclient.InferenceServerClient(
+                    "localhost:8001", verbose=True
+                )
+                triton_client.load_model(model_name)
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+        # Models should be loaded successfully and infer
+        # successfully. Initially savedmodel only has version 1.
+        self._infer_success_models(
+            [
+                "savedmodel",
+            ],
+            (1,),
+            model_shape,
+        )
+        self._infer_success_models(["graphdef", "onnx"], (1, 3), model_shape)
+
+        # Add the savedmodel to the second model repository. Because
+        # not polling this doesn't change any model state, all models
+        # are still loaded and available.
+        shutil.copytree(savedmodel_name, "models_0/" + savedmodel_name)
+        self._infer_success_models(
+            [
+                "savedmodel",
+            ],
+            (1,),
+            model_shape,
+        )
+        self._infer_success_models(["graphdef", "onnx"], (1, 3), model_shape)
+
+        # Load savedmodel again which should fail because it is now duplicated
+        # in 2 model repositories. Use HTTP here.
+        try:
+            triton_client = httpclient.InferenceServerClient(
+                "localhost:8000", verbose=True
+            )
+            triton_client.load_model(savedmodel_name)
+        except Exception as ex:
+            self.assertIn("failed to load '{}'".format(savedmodel_name), ex.message())
+
+        try:
+            for triton_client in (
+                httpclient.InferenceServerClient("localhost:8000", verbose=True),
+                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+            ):
+                self.assertTrue(triton_client.is_server_live())
+                self.assertTrue(triton_client.is_server_ready())
+                # Unlike polling mode, the failed load on the duplicate model
+                # should NOT unload the existing versions in model control mode.
+                self.assertTrue(triton_client.is_model_ready(savedmodel_name, "1"))
+                # Version 3 did not exist in the first model repository, so
+                # it should still not be loaded.
+                self.assertFalse(triton_client.is_model_ready(savedmodel_name, "3"))
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+        self._infer_success_models(["graphdef", "onnx"], (1, 3), model_shape)
+
+        # Remove the savedmodel from the first model repository and
+        # explicitly load savedmodel. The savedmodel from the second
+        # model repository should be loaded properly. In the second
+        # model repository savedmodel should have versions 1 and 3.
+        shutil.rmtree("models/" + savedmodel_name)
+        try:
+            triton_client = httpclient.InferenceServerClient(
+                "localhost:8000", verbose=True
+            )
+            # Unload existing in-memory model from first model repository
+            triton_client.unload_model(savedmodel_name)
+            # Load model from second model repository since original was deleted
+            triton_client.load_model(savedmodel_name)
+        except Exception as ex:
+            self.assertIn("failed to load '{}'".format(savedmodel_name), ex.message())
+
+        self._infer_success_models(
+            ["savedmodel", "graphdef", "onnx"], (1, 3), model_shape
+        )
+
+    def test_model_control(self):
+        model_shape = (1, 16)
+        onnx_name = tu.get_model_name("onnx", np.float32, np.float32, np.float32)
+
+        ensemble_prefix = "simple_"
+        ensemble_name = ensemble_prefix + onnx_name
+
+        # Make sure no models are loaded
+        for model_name in (onnx_name, ensemble_name):
+            try:
+                for triton_client in (
+                    httpclient.InferenceServerClient("localhost:8000", verbose=True),
+                    grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+                ):
+                    self.assertTrue(triton_client.is_server_live())
+                    self.assertTrue(triton_client.is_server_ready())
+                    self.assertFalse(triton_client.is_model_ready(model_name, "1"))
+                    self.assertFalse(triton_client.is_model_ready(model_name, "3"))
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+        # Load non-existent model
+        for triton_client in (
+            httpclient.InferenceServerClient("localhost:8000", verbose=True),
+            grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+        ):
+            try:
+                triton_client.load_model("unknown_model")
+                self.assertTrue(False, "expected unknown model failure")
+            except Exception as ex:
+                self.assertIn(
+                    "failed to load 'unknown_model', failed to poll from model repository",
+                    ex.message(),
+                )
+
+        # Load ensemble model, the dependent model should be polled and loaded
+        try:
+            triton_client = httpclient.InferenceServerClient(
+                "localhost:8000", verbose=True
+            )
+            triton_client.load_model(ensemble_name)
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+        self._infer_success_models(
+            [
+                "onnx",
+            ],
+            (1, 3),
+            model_shape,
+        )
+        self._infer_success_models(
+            [
+                "simple_onnx",
+            ],
+            (1, 3),
+            model_shape,
+            swap=True,
+        )
+
+        # Delete model configuration for onnx, which will cause
+        # the autofiller to use the latest version policy so that only
+        # version 3 will be available if the models are re-loaded
+        for model_name in (onnx_name,):
+            os.remove("models/" + model_name + "/config.pbtxt")
+
+        self._infer_success_models(
+            [
+                "onnx",
+            ],
+            (1, 3),
+            model_shape,
+        )
+        self._infer_success_models(
+            [
+                "simple_onnx",
+            ],
+            (1, 3),
+            model_shape,
+            swap=True,
+        )
+
+        # Reload models, only version 3 should be available for onnx
+        for model_name in (onnx_name, ensemble_name):
+            try:
+                triton_client = grpcclient.InferenceServerClient(
+                    "localhost:8001", verbose=True
+                )
+                triton_client.load_model(model_name)
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+        self._infer_success_models(
+            [
+                "onnx",
+            ],
+            (3,),
+            model_shape,
+        )
+        self._infer_success_models(
+            [
+                "simple_onnx",
+            ],
+            (1, 3),
+            model_shape,
+            swap=True,
+        )
+
+        for model_name in (onnx_name,):
+            try:
+                for triton_client in (
+                    httpclient.InferenceServerClient("localhost:8000", verbose=True),
+                    grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+                ):
+                    self.assertTrue(triton_client.is_server_live())
+                    self.assertTrue(triton_client.is_server_ready())
+                    self.assertFalse(triton_client.is_model_ready(model_name, "1"))
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+        # Unload non-existing model, nothing should happen
+        for triton_client in (
+            httpclient.InferenceServerClient("localhost:8000", verbose=True),
+            grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+        ):
+            try:
+                triton_client.unload_model("unknown_model")
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+        # Unload the depending model, as side effect, the ensemble model will be
+        # forced to be unloaded
+        try:
+            triton_client = httpclient.InferenceServerClient(
+                "localhost:8000", verbose=True
+            )
+            triton_client.unload_model(onnx_name)
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+        for model_name in (onnx_name, ensemble_name):
+            try:
+                for triton_client in (
+                    httpclient.InferenceServerClient("localhost:8000", verbose=True),
+                    grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+                ):
+                    self.assertTrue(triton_client.is_server_live())
+                    self.assertTrue(triton_client.is_server_ready())
+                    self.assertFalse(triton_client.is_model_ready(model_name, "1"))
+                    self.assertFalse(triton_client.is_model_ready(model_name, "3"))
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+        # Explicitly unload the ensemble and load the depending
+        # model. The ensemble model should not be reloaded because it
+        # was explicitly unloaded.
+        try:
+            triton_client = httpclient.InferenceServerClient(
+                "localhost:8000", verbose=True
+            )
+            triton_client.unload_model(ensemble_name)
+            triton_client.load_model(onnx_name)
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+        self._infer_success_models(
+            [
+                "onnx",
+            ],
+            (3,),
+            model_shape,
+        )
+
+        try:
+            for triton_client in (
+                httpclient.InferenceServerClient("localhost:8000", verbose=True),
+                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+            ):
+                self.assertTrue(triton_client.is_server_live())
+                self.assertTrue(triton_client.is_server_ready())
+                self.assertFalse(triton_client.is_model_ready(ensemble_name, "1"))
+                self.assertFalse(triton_client.is_model_ready(ensemble_name, "3"))
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_model_control_fail(self):
+        model_name = tu.get_model_name("onnx", np.float32, np.float32, np.float32)
+
+        # Make sure no models are loaded
+        try:
+            for triton_client in (
+                httpclient.InferenceServerClient("localhost:8000", verbose=True),
+                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+            ):
+                self.assertTrue(triton_client.is_server_live())
+                self.assertTrue(triton_client.is_server_ready())
+                self.assertFalse(triton_client.is_model_ready(model_name, "1"))
+                self.assertFalse(triton_client.is_model_ready(model_name, "3"))
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+        # Request to load the model and expect fail to load
+        try:
+            triton_client = httpclient.InferenceServerClient(
+                "localhost:8000", verbose=True
+            )
+            triton_client.load_model(model_name)
+            self.assertTrue(False, "expecting load failure")
+        except InferenceServerException as ex:
+            self.assertIn("load failed for model '{}'".format(model_name), ex.message())
+
+        # Another attempt should fail as well
+        try:
+            triton_client = httpclient.InferenceServerClient(
+                "localhost:8000", verbose=True
+            )
+            triton_client.load_model(model_name)
+            self.assertTrue(False, "expecting load failure")
+        except InferenceServerException as ex:
+            self.assertIn("load failed for model '{}'".format(model_name), ex.message())
+
+    def test_model_control_ensemble(self):
+        model_shape = (1, 16)
+        onnx_name = tu.get_model_name("onnx", np.float32, np.float32, np.float32)
+
+        ensemble_prefix = "simple_"
+        ensemble_name = ensemble_prefix + onnx_name
+
+        # Make sure no models are loaded
+        for model_name in (onnx_name, ensemble_name):
+            try:
+                for triton_client in (
+                    httpclient.InferenceServerClient("localhost:8000", verbose=True),
+                    grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+                ):
+                    self.assertTrue(triton_client.is_server_live())
+                    self.assertTrue(triton_client.is_server_ready())
+                    self.assertFalse(triton_client.is_model_ready(model_name, "1"))
+                    self.assertFalse(triton_client.is_model_ready(model_name, "3"))
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+        # Load ensemble model, the dependent model should be polled and loaded
+        try:
+            triton_client = httpclient.InferenceServerClient(
+                "localhost:8000", verbose=True
+            )
+            triton_client.load_model(ensemble_name)
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+        self._infer_success_models(
+            [
+                "onnx",
+            ],
+            (1, 3),
+            model_shape,
+        )
+        self._infer_success_models(
+            [
+                "simple_onnx",
+            ],
+            (1, 3),
+            model_shape,
+            swap=True,
+        )
+
+        # Unload the ensemble with unload_dependents flag. all models should be unloaded
+        try:
+            triton_client = httpclient.InferenceServerClient(
+                "localhost:8000", verbose=True
+            )
+            triton_client.unload_model(ensemble_name, unload_dependents=True)
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+        for model_name in (onnx_name, ensemble_name):
+            try:
+                for triton_client in (
+                    httpclient.InferenceServerClient("localhost:8000", verbose=True),
+                    grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+                ):
+                    self.assertTrue(triton_client.is_server_live())
+                    self.assertTrue(triton_client.is_server_ready())
+                    self.assertFalse(triton_client.is_model_ready(model_name, "1"))
+                    self.assertFalse(triton_client.is_model_ready(model_name, "3"))
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+        # Load ensemble model, and unload it without unload_dependents flag (default).
+        # The dependent model should still be available
+        try:
+            triton_client = httpclient.InferenceServerClient(
+                "localhost:8000", verbose=True
+            )
+            triton_client.load_model(ensemble_name)
+            triton_client.unload_model(ensemble_name)
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+        self._infer_success_models(
+            [
+                "onnx",
+            ],
+            (1, 3),
+            model_shape,
+        )
+
+        try:
+            for triton_client in (
+                httpclient.InferenceServerClient("localhost:8000", verbose=True),
+                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+            ):
+                self.assertTrue(triton_client.is_server_live())
+                self.assertTrue(triton_client.is_server_ready())
+                self.assertFalse(triton_client.is_model_ready(ensemble_name, "1"))
+                self.assertFalse(triton_client.is_model_ready(ensemble_name, "3"))
+                self.assertTrue(triton_client.is_model_ready(onnx_name, "1"))
+                self.assertTrue(triton_client.is_model_ready(onnx_name, "3"))
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_load_same_model_different_platform(self):
+        model_shape = (1, 16)
+        model_name = tu.get_model_name("simple", np.float32, np.float32, np.float32)
+
+        # Check whether or not to use grpc protocol
+        use_grpc = "TRITONSERVER_USE_GRPC" in os.environ
+
+        # Make sure version 1 and 3 of the model are loaded
+        # and the model platform is TensorRT
+        try:
+            triton_client = self._get_client(use_grpc)
+            self.assertTrue(triton_client.is_server_live())
+            self.assertTrue(triton_client.is_server_ready())
+            self.assertTrue(triton_client.is_model_ready(model_name, "1"))
+            self.assertTrue(triton_client.is_model_ready(model_name, "3"))
+            if use_grpc:
+                metadata = triton_client.get_model_metadata(model_name, as_json=True)
+            else:
+                metadata = triton_client.get_model_metadata(model_name)
+            self.assertEqual(metadata["platform"], "tensorrt_plan")
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+        self._infer_success_models(
+            [
+                "simple",
+            ],
+            (
+                1,
+                3,
+            ),
+            model_shape,
+        )
+
+        # Copy the same model of different platform to model repository
+        shutil.rmtree("models/" + model_name)
+        shutil.copytree(model_name, "models/" + model_name)
+
+        # Reload models
+        try:
+            triton_client = self._get_client(use_grpc)
+            triton_client.load_model(model_name)
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+        # Make sure version 1 and 3 of the model are loaded
+        # and the model platform is PyTorch
+        try:
+            triton_client = self._get_client(use_grpc)
+            self.assertTrue(triton_client.is_server_live())
+            self.assertTrue(triton_client.is_server_ready())
+            self.assertTrue(triton_client.is_model_ready(model_name, "1"))
+            self.assertTrue(triton_client.is_model_ready(model_name, "3"))
+            if use_grpc:
+                metadata = triton_client.get_model_metadata(model_name, as_json=True)
+            else:
+                metadata = triton_client.get_model_metadata(model_name)
+            self.assertEqual(metadata["platform"], "pytorch_libtorch")
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+        self._infer_success_models(
+            [
+                "simple",
+            ],
+            (
+                1,
+                3,
+            ),
+            model_shape,
+        )
+
+    def test_model_availability_on_reload(self):
+        model_name = "identity_zero_1_int32"
+        model_base = "identity"
+        model_shape = (16,)
+
+        # Check whether or not to use grpc protocol
+        use_grpc = "TRITONSERVER_USE_GRPC" in os.environ
+
+        # Make sure version 1 of the model is loaded
+        try:
+            triton_client = self._get_client(use_grpc)
+            self.assertTrue(triton_client.is_server_live())
+            self.assertTrue(triton_client.is_server_ready())
+            self.assertTrue(triton_client.is_model_ready(model_name, "1"))
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+        self._infer_success_identity(model_base, (1,), np.int32, model_shape)
+
+        # Create a new version for reload
+        os.mkdir("models/" + model_name + "/2")
+
+        # Reload models, v1 should still be available until v2 is loaded
+        # The load is requested in other thread as it is blocking API,
+        # and the v1 availability should be tested during the reload
+        thread = threading.Thread(target=self._async_load, args=(model_name, use_grpc))
+        thread.start()
+        # wait for time < model creation delay to ensure load request is sent
+        time.sleep(3)
+        load_start = time.time()
+
+        # Make sure version 1 of the model is still available
+        try:
+            triton_client = self._get_client(use_grpc)
+            self.assertTrue(triton_client.is_server_live())
+            load_end = time.time()
+            self.assertTrue(
+                (load_end - load_start) < 5,
+                "server was waiting unexpectedly, waited {}".format(
+                    (load_end - load_start)
+                ),
+            )
+            self.assertTrue(triton_client.is_server_ready())
+            self.assertTrue(triton_client.is_model_ready(model_name, "1"))
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+        self._infer_success_identity(model_base, (1,), np.int32, model_shape)
+
+        thread.join()
+        # Make sure version 2 of the model is available while version 1 is not
+        try:
+            triton_client = self._get_client(use_grpc)
+            self.assertTrue(triton_client.is_server_live())
+            self.assertTrue(triton_client.is_server_ready())
+            self.assertFalse(triton_client.is_model_ready(model_name, "1"))
+            self.assertTrue(triton_client.is_model_ready(model_name, "2"))
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+        self._infer_success_identity(model_base, (2,), np.int32, model_shape)
+
+    def test_model_availability_on_reload_2(self):
+        model_name = "identity_zero_1_int32"
+        model_base = "identity"
+        model_shape = (16,)
+
+        # Check whether or not to use grpc protocol
+        use_grpc = "TRITONSERVER_USE_GRPC" in os.environ
+
+        # Make sure version 1 of the model is loaded
+        try:
+            triton_client = self._get_client(use_grpc)
+            self.assertTrue(triton_client.is_server_live())
+            self.assertTrue(triton_client.is_server_ready())
+            self.assertTrue(triton_client.is_model_ready(model_name, "1"))
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+        self._infer_success_identity(model_base, (1,), np.int32, model_shape)
+
+        # Overwrite config.pbtxt to load v2 only
+        shutil.copyfile("config.pbtxt.v2", "models/" + model_name + "/config.pbtxt")
+
+        # Reload models, v1 should still be available until v2 is loaded
+        # The load is requested in other thread as it is blocking API,
+        # and the v1 availability should be tested during the reload
+        thread = threading.Thread(target=self._async_load, args=(model_name, use_grpc))
+        thread.start()
+        # wait for time < model creation delay to ensure load request is sent
+        time.sleep(3)
+        load_start = time.time()
+
+        # Make sure version 1 of the model is still available
+        try:
+            triton_client = self._get_client(use_grpc)
+            self.assertTrue(triton_client.is_server_live())
+            load_end = time.time()
+            self.assertTrue(
+                (load_end - load_start) < 5,
+                "server was waiting unexpectedly, waited {}".format(
+                    (load_end - load_start)
+                ),
+            )
+            self.assertTrue(triton_client.is_server_ready())
+            self.assertTrue(triton_client.is_model_ready(model_name, "1"))
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+        self._infer_success_identity(model_base, (1,), np.int32, model_shape)
+
+        thread.join()
+        # Make sure version 2 of the model is available while version 1 is not
+        try:
+            triton_client = self._get_client(use_grpc)
+            self.assertTrue(triton_client.is_server_live())
+            self.assertTrue(triton_client.is_server_ready())
+            self.assertFalse(triton_client.is_model_ready(model_name, "1"))
+            self.assertTrue(triton_client.is_model_ready(model_name, "2"))
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+        self._infer_success_identity(model_base, (2,), np.int32, model_shape)
+
+    def test_model_availability_on_reload_3(self):
+        model_name = "identity_zero_1_int32"
+        model_base = "identity"
+        model_shape = (16,)
+
+        # Check whether or not to use grpc protocol
+        use_grpc = "TRITONSERVER_USE_GRPC" in os.environ
+
+        # Make sure version 1 of the model is loaded
+        try:
+            triton_client = self._get_client(use_grpc)
+            self.assertTrue(triton_client.is_server_live())
+            self.assertTrue(triton_client.is_server_ready())
+            self.assertTrue(triton_client.is_model_ready(model_name, "1"))
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+        self._infer_success_identity(model_base, (1,), np.int32, model_shape)
+
+        # Overwrite config.pbtxt to load v2 only
+        shutil.copyfile("config.pbtxt.new", "models/" + model_name + "/config.pbtxt")
+
+        # Reload models, v1 will be reloaded but it should  be available
+        # during the whole reload
+        thread = threading.Thread(target=self._async_load, args=(model_name, use_grpc))
+        thread.start()
+        # wait for time < model creation delay to ensure load request is sent
+        time.sleep(3)
+        load_start = time.time()
+
+        # Make sure version 1 of the model is still available
+        try:
+            triton_client = self._get_client(use_grpc)
+            self.assertTrue(triton_client.is_server_live())
+            load_end = time.time()
+            self.assertTrue(
+                (load_end - load_start) < 5,
+                "server was waiting unexpectedly, waited {}".format(
+                    (load_end - load_start)
+                ),
+            )
+            self.assertTrue(triton_client.is_server_ready())
+            self.assertTrue(triton_client.is_model_ready(model_name, "1"))
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+        self._infer_success_identity(model_base, (1,), np.int32, model_shape)
+
+        thread.join()
+        # Make sure version 1 of the model is still available after reload
+        try:
+            triton_client = self._get_client(use_grpc)
+            self.assertTrue(triton_client.is_server_live())
+            self.assertTrue(triton_client.is_server_ready())
+            self.assertTrue(triton_client.is_model_ready(model_name, "1"))
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+        self._infer_success_identity(model_base, (1,), np.int32, model_shape)
+
+    def test_model_reload_fail(self):
+        model_name = "identity_zero_1_int32"
+        model_base = "identity"
+        model_shape = (16,)
+
+        # Make sure version 1 of the model is loaded
+        try:
+            triton_client = httpclient.InferenceServerClient(
+                "localhost:8000", verbose=True
+            )
+            self.assertTrue(triton_client.is_server_live())
+            self.assertTrue(triton_client.is_server_ready())
+            self.assertTrue(triton_client.is_model_ready(model_name, "1"))
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+        self._infer_success_identity(model_base, (1,), np.int32, model_shape)
+
+        # Overwrite config.pbtxt to load v2 only on GPU, which will fail
+        shutil.copyfile("config.pbtxt.v2.gpu", "models/" + model_name + "/config.pbtxt")
+
+        # Reload models, v1 should still be available even if v2 fails to load
+        try:
+            triton_client = httpclient.InferenceServerClient(
+                "localhost:8000", verbose=True
+            )
+            triton_client.load_model(model_name)
+            self.assertTrue(False, "expecting load failure")
+        except Exception as ex:
+            self.assertIn(
+                "version 2 is at UNAVAILABLE state: Internal: GPU instances not supported",
+                ex.message(),
+            )
+
+        # Make sure version 1 of the model is available, and version 2 is not
+        try:
+            triton_client = httpclient.InferenceServerClient(
+                "localhost:8000", verbose=True
+            )
+            self.assertTrue(triton_client.is_server_live())
+            self.assertTrue(triton_client.is_server_ready())
+            self.assertTrue(triton_client.is_model_ready(model_name, "1"))
+            self.assertFalse(triton_client.is_model_ready(model_name, "2"))
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+        self._infer_success_identity(model_base, (1,), np.int32, model_shape)
+
+    def test_multiple_model_repository_control_startup_models(self):
+        model_shape = (1, 16)
+        onnx_name = tu.get_model_name("onnx", np.float32, np.float32, np.float32)
+        plan_name = tu.get_model_name("plan", np.float32, np.float32, np.float32)
+
+        ensemble_prefix = "simple_"
+        onnx_ensemble_name = ensemble_prefix + onnx_name
+        plan_ensemble_name = ensemble_prefix + plan_name
+
+        # Make sure unloaded models are not in the status
+        for base in ("savedmodel",):
+            model_name = tu.get_model_name(base, np.float32, np.float32, np.float32)
+            try:
+                for triton_client in (
+                    httpclient.InferenceServerClient("localhost:8000", verbose=True),
+                    grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+                ):
+                    self.assertTrue(triton_client.is_server_live())
+                    self.assertTrue(triton_client.is_server_ready())
+                    self.assertFalse(triton_client.is_model_ready(model_name, "1"))
+                    self.assertFalse(triton_client.is_model_ready(model_name, "3"))
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+        # And loaded models work properly
+        self._infer_success_models(
+            [
+                "onnx",
+            ],
+            (1, 3),
+            model_shape,
+        )
+        self._infer_success_models(
+            [
+                "simple_onnx",
+            ],
+            (1, 3),
+            model_shape,
+            swap=True,
+        )
+        self._infer_success_models(
+            [
+                "plan",
+            ],
+            (1, 3),
+            model_shape,
+        )
+
+        # Load non-existing model
+        for triton_client in (
+            httpclient.InferenceServerClient("localhost:8000", verbose=True),
+            grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+        ):
+            try:
+                triton_client.load_model("unknown_model")
+                self.assertTrue(False, "expected unknown model failure")
+            except Exception as ex:
+                self.assertIn(
+                    "failed to load 'unknown_model', failed to poll from model repository",
+                    ex.message(),
+                )
+
+        # Load plan ensemble model, the dependent model is already
+        # loaded via command-line
+        try:
+            triton_client = httpclient.InferenceServerClient(
+                "localhost:8000", verbose=True
+            )
+            triton_client.load_model(plan_ensemble_name)
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+        self._infer_success_models(
+            [
+                "plan",
+            ],
+            (1, 3),
+            model_shape,
+        )
+        self._infer_success_models(
+            [
+                "simple_plan",
+            ],
+            (1, 3),
+            model_shape,
+            swap=True,
+        )
+
+        # Delete model configuration, which will cause the autofiller
+        # to use the latest version policy so that only version 3 will
+        # be available if the models are re-loaded
+        os.remove("models/" + onnx_name + "/config.pbtxt")
+
+        self._infer_success_models(
+            [
+                "plan",
+            ],
+            (1, 3),
+            model_shape,
+        )
+        self._infer_success_models(
+            [
+                "simple_plan",
+            ],
+            (1, 3),
+            model_shape,
+            swap=True,
+        )
+
+        # Reload onnx, only version 3 should be available
+        try:
+            triton_client = grpcclient.InferenceServerClient(
+                "localhost:8001", verbose=True
+            )
+            triton_client.load_model(onnx_name)
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+        self._infer_success_models(
+            [
+                "onnx",
+            ],
+            (3,),
+            model_shape,
+        )
+        self._infer_success_models(
+            [
+                "simple_onnx",
+            ],
+            (1, 3),
+            model_shape,
+            swap=True,
+        )
+
+        try:
+            for triton_client in (
+                httpclient.InferenceServerClient("localhost:8000", verbose=True),
+                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+            ):
+                self.assertTrue(triton_client.is_server_live())
+                self.assertTrue(triton_client.is_server_ready())
+                self.assertFalse(triton_client.is_model_ready(onnx_name, "1"))
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+        # Unload non-existing model, nothing should happen
+        for triton_client in (
+            httpclient.InferenceServerClient("localhost:8000", verbose=True),
+            grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+        ):
+            try:
+                triton_client.unload_model("unknown_model")
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+        # Unload the onnx, as side effect, the ensemble model
+        # will be forced to be unloaded
+        try:
+            triton_client = httpclient.InferenceServerClient(
+                "localhost:8000", verbose=True
+            )
+            triton_client.unload_model(onnx_name)
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+        for model_name in [onnx_name, onnx_ensemble_name]:
+            try:
+                for triton_client in (
+                    httpclient.InferenceServerClient("localhost:8000", verbose=True),
+                    grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+                ):
+                    self.assertTrue(triton_client.is_server_live())
+                    self.assertTrue(triton_client.is_server_ready())
+                    self.assertFalse(triton_client.is_model_ready(model_name, "1"))
+                    self.assertFalse(triton_client.is_model_ready(model_name, "3"))
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+        # Explicitly unload the onnx ensemble and load the
+        # depending model. The ensemble model should not be reloaded
+        # because it was explicitly unloaded.
+        try:
+            triton_client = httpclient.InferenceServerClient(
+                "localhost:8000", verbose=True
+            )
+            triton_client.unload_model(onnx_ensemble_name)
+            triton_client.load_model(onnx_name)
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+        self._infer_success_models(
+            [
+                "onnx",
+            ],
+            (3,),
+            model_shape,
+        )
+        self._infer_success_models(
+            [
+                "plan",
+            ],
+            (1, 3),
+            model_shape,
+        )
+        self._infer_success_models(
+            [
+                "simple_plan",
+            ],
+            (1, 3),
+            model_shape,
+            swap=True,
+        )
+
+        try:
+            for triton_client in (
+                httpclient.InferenceServerClient("localhost:8000", verbose=True),
+                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+            ):
+                self.assertTrue(triton_client.is_server_live())
+                self.assertTrue(triton_client.is_server_ready())
+                self.assertFalse(triton_client.is_model_ready(onnx_ensemble_name, "1"))
+                self.assertFalse(triton_client.is_model_ready(onnx_ensemble_name, "3"))
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_model_repository_index(self):
+        # use model control EXPLICIT and --load-model to load a subset of models
+        # in model repository
+        tensor_shape = (1, 16)
+        model_bases = ["graphdef", "savedmodel", "simple_savedmodel"]
+
+        # Sanity check on loaded models
+        # 3 models should be loaded:
+        #     simple_savedmodel_float32_float32_float32
+        #     savedmodel_float32_float32_float32
+        #     graphdef_float32_float32_float32
+        for model_base in model_bases:
+            try:
+                model_name = tu.get_model_name(
+                    model_base, np.float32, np.float32, np.float32
+                )
+                for triton_client in (
+                    httpclient.InferenceServerClient("localhost:8000", verbose=True),
+                    grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+                ):
+                    self.assertTrue(triton_client.is_server_live())
+                    self.assertTrue(triton_client.is_server_ready())
+                    self.assertTrue(triton_client.is_model_ready(model_name))
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+        # Check model repository index
+        # All models should be in ready state except onnx_float32_float32_float32
+        # which appears in two repositories.
+        model_bases.append("simple_graphdef")
+        try:
+            triton_client = httpclient.InferenceServerClient(
+                "localhost:8000", verbose=True
+            )
+            index = triton_client.get_model_repository_index()
+            indexed = list()
+            self.assertEqual(len(index), 8)
+            for i in index:
+                indexed.append(i["name"])
+                if i["name"] == "onnx_float32_float32_float32":
+                    self.assertEqual(i["state"], "UNAVAILABLE")
+                    self.assertEqual(
+                        i["reason"], "model appears in two or more repositories"
+                    )
+            for model_base in model_bases:
+                model_name = tu.get_model_name(
+                    model_base, np.float32, np.float32, np.float32
+                )
+                self.assertTrue(model_name in indexed)
+
+            triton_client = grpcclient.InferenceServerClient(
+                "localhost:8001", verbose=True
+            )
+            index = triton_client.get_model_repository_index()
+            indexed = list()
+            self.assertEqual(len(index.models), 8)
+            for i in index.models:
+                indexed.append(i.name)
+                if i.name == "onnx_float32_float32_float32":
+                    self.assertEqual(i.state, "UNAVAILABLE")
+                    self.assertEqual(
+                        i.reason, "model appears in two or more repositories"
+                    )
+            for model_base in model_bases:
+                model_name = tu.get_model_name(
+                    model_base, np.float32, np.float32, np.float32
+                )
+                self.assertTrue(model_name in indexed)
+
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_config_override(self):
+        model_shape = (1, 16)
+
+        for triton_client in (
+            httpclient.InferenceServerClient("localhost:8000", verbose=True),
+            grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+        ):
+            for base in (("onnx", "onnxruntime"),):
+                model_name = tu.get_model_name(
+                    base[0], np.float32, np.float32, np.float32
+                )
+                try:
+                    self.assertTrue(triton_client.is_server_live())
+                    self.assertFalse(triton_client.is_model_ready(model_name, "1"))
+                    self.assertFalse(triton_client.is_model_ready(model_name, "2"))
+                    self.assertFalse(triton_client.is_model_ready(model_name, "3"))
+                except Exception as ex:
+                    self.assertTrue(False, "unexpected error {}".format(ex))
+
+                # Request to load the model as is and expect the model fails
+                # to load with default config
+                try:
+                    triton_client.load_model(model_name)
+                    self.assertTrue(
+                        False, "expected fail to load '{}'".format(model_name)
+                    )
+                except Exception as ex:
+                    self.assertIn(
+                        "load failed for model '{}'".format(model_name), ex.message()
+                    )
+
+                # Request to load the model with provided "correct" config
+                try:
+                    triton_client.load_model(
+                        model_name,
+                        config="""
+{{"backend":"{backend}","version_policy":{{"specific" : {{ "versions": [2] }} }} }}
+""".format(
+                            backend=base[1]
+                        ),
+                    )
+                except Exception as ex:
+                    self.assertTrue(False, "unexpected error {}".format(ex))
+                self.assertFalse(triton_client.is_model_ready(model_name, "1"))
+                self.assertTrue(triton_client.is_model_ready(model_name, "2"))
+                self.assertFalse(triton_client.is_model_ready(model_name, "3"))
+
+                # And loaded models work properly
+                self._infer_success_models(
+                    [
+                        base[0],
+                    ],
+                    (2,),
+                    model_shape,
+                )
+
+                # request without additional config will load retain the provided
+                # config and expect to not fail, and version 2 will not be loaded.
+                try:
+                    triton_client.load_model(model_name)
+                except Exception as ex:
+                    self.assertTrue(False, "unexpected error {}".format(ex))
+                self.assertFalse(triton_client.is_model_ready(model_name, "1"))
+                self.assertTrue(triton_client.is_model_ready(model_name, "2"))
+                self.assertFalse(triton_client.is_model_ready(model_name, "3"))
+
+                # Unload model for the next client iteration
+                try:
+                    triton_client.unload_model(model_name)
+                    self.assertFalse(triton_client.is_model_ready(model_name, "1"))
+                    self.assertFalse(triton_client.is_model_ready(model_name, "2"))
+                    self.assertFalse(triton_client.is_model_ready(model_name, "3"))
+                except Exception as ex:
+                    self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_file_override(self):
+        model_shape = (1, 16)
+        override_base = "override_model"
+
+        for base in (("onnx", "onnxruntime"),):
+            model_name = tu.get_model_name(base[0], np.float32, np.float32, np.float32)
+            override_model_name = tu.get_model_name(
+                override_base, np.float32, np.float32, np.float32
+            )
+
+            # Prepare override file
+            with open("models/{}/3/model.{}".format(model_name, base[0]), "rb") as f:
+                file_content = f.read()
+
+            for triton_client in (
+                httpclient.InferenceServerClient("localhost:8000", verbose=True),
+                grpcclient.InferenceServerClient("localhost:8001", verbose=True),
+            ):
+                try:
+                    self.assertTrue(triton_client.is_server_live())
+                    self.assertFalse(triton_client.is_model_ready(model_name, "1"))
+                    self.assertFalse(triton_client.is_model_ready(model_name, "2"))
+                    self.assertTrue(triton_client.is_model_ready(model_name, "3"))
+                except Exception as ex:
+                    self.assertTrue(False, "unexpected error {}".format(ex))
+
+                # Request to load the model with override file, should fail
+                # without providing override config. The config requirement
+                # serves as an reminder that the existing model directory will
+                # not be used.
+                try:
+                    triton_client.load_model(
+                        model_name, files={"file:1/model.onnx": file_content}
+                    )
+                    self.assertTrue(False, "expected error on missing override config")
+                except InferenceServerException as ex:
+                    # [FIXME] Improve error reporting to mention missing config
+                    self.assertIn(
+                        "failed to load '{}', failed to poll from model repository".format(
+                            model_name
+                        ),
+                        ex.message(),
+                    )
+
+                # Sanity check on previous loaded version is still available
+                # after the failure attempt to load model with different version
+                self.assertFalse(triton_client.is_model_ready(model_name, "1"))
+                self.assertFalse(triton_client.is_model_ready(model_name, "2"))
+                self.assertTrue(triton_client.is_model_ready(model_name, "3"))
+
+                self._infer_success_models(
+                    [
+                        base[0],
+                    ],
+                    (3,),
+                    model_shape,
+                )
+
+                # Request to load the model with override file and config in
+                # a different name
+                try:
+                    triton_client.load_model(
+                        override_model_name,
+                        config="""{{"backend":"{backend}" }}""".format(backend=base[1]),
+                        files={"file:1/model.onnx": file_content},
+                    )
+                except Exception as ex:
+                    self.assertTrue(False, "unexpected error {}".format(ex))
+
+                # Sanity check on previous loaded version is still available
+                # after the load with different model name
+                self.assertFalse(triton_client.is_model_ready(model_name, "1"))
+                self.assertFalse(triton_client.is_model_ready(model_name, "2"))
+                self.assertTrue(triton_client.is_model_ready(model_name, "3"))
+                self._infer_success_models(
+                    [
+                        base[0],
+                    ],
+                    (3,),
+                    model_shape,
+                )
+
+                # New override model should also be available
+                self.assertTrue(triton_client.is_model_ready(override_model_name, "1"))
+                self.assertFalse(triton_client.is_model_ready(override_model_name, "2"))
+                self.assertFalse(triton_client.is_model_ready(override_model_name, "3"))
+                self._infer_success_models(
+                    [
+                        override_base,
+                    ],
+                    (1,),
+                    model_shape,
+                    swap=True,
+                )
+
+                # Request to load the model with override file and config in
+                # original name
+                try:
+                    triton_client.load_model(
+                        model_name,
+                        config="""{{"backend":"{backend}" }}""".format(backend=base[1]),
+                        files={"file:1/model.onnx": file_content},
+                    )
+                except Exception as ex:
+                    self.assertTrue(False, "unexpected error {}".format(ex))
+
+                # The model should be loaded from the override model directory
+                # which has different model version
+                self.assertTrue(triton_client.is_model_ready(model_name, "1"))
+                self.assertFalse(triton_client.is_model_ready(model_name, "2"))
+                self.assertFalse(triton_client.is_model_ready(model_name, "3"))
+                self._infer_success_models(
+                    [
+                        base[0],
+                    ],
+                    (1,),
+                    model_shape,
+                    swap=True,
+                )
+
+                # The model with different name should be available
+                self.assertTrue(triton_client.is_model_ready(override_model_name, "1"))
+                self.assertFalse(triton_client.is_model_ready(override_model_name, "2"))
+                self.assertFalse(triton_client.is_model_ready(override_model_name, "3"))
+                self._infer_success_models(
+                    [
+                        override_base,
+                    ],
+                    (1,),
+                    model_shape,
+                    swap=True,
+                )
+
+                # Reset model for the next client iteration
+                try:
+                    # Unload and load the model again and the original model repository will
+                    # be used
+                    triton_client.unload_model(model_name)
+                    triton_client.load_model(model_name)
+                    triton_client.unload_model(override_model_name)
+                except Exception as ex:
+                    self.assertTrue(False, "unexpected error {}".format(ex))
+                self.assertFalse(triton_client.is_model_ready(model_name, "1"))
+                self.assertFalse(triton_client.is_model_ready(model_name, "2"))
+                self.assertTrue(triton_client.is_model_ready(model_name, "3"))
+                self._infer_success_models(
+                    [
+                        base[0],
+                    ],
+                    (3,),
+                    model_shape,
+                )
+
+    # Test that model load API file override can't be used to create files
+    # outside of any model directory.
+    def test_file_override_security(self):
+        # When using model load API, temporary model directories are created in
+        # a randomly generated /tmp/folderXXXXXX directory for the life of the
+        # model, and cleaned up on model unload.
+        model_basepath = "/tmp/folderXXXXXX"
+        if os.path.exists(model_basepath) and os.path.isdir(model_basepath):
+            shutil.rmtree(model_basepath)
+        os.makedirs(model_basepath)
+
+        # Set file override paths that try to escape out of model directory,
+        # and test both pre-existing and non-existent files.
+        root_home_dir = "/root"
+
+        # Relative paths
+        escape_dir_rel = os.path.join("..", "..", "root")
+        escape_dir_full = os.path.join(model_basepath, escape_dir_rel)
+        self.assertEqual(os.path.abspath(escape_dir_full), root_home_dir)
+
+        new_file_rel = os.path.join(escape_dir_rel, "new_dir", "test.txt")
+        self.assertFalse(os.path.exists(os.path.join(model_basepath, new_file_rel)))
+        existing_file_rel = os.path.join(escape_dir_rel, ".bashrc")
+        self.assertTrue(os.path.exists(os.path.join(model_basepath, existing_file_rel)))
+
+        # Symlinks
+        ## No easy way to inject symlink into generated temp model dir, so for
+        ## testing sake, make a fixed symlink path in /tmp.
+        escape_dir_symlink_rel = os.path.join("..", "escape_symlink")
+        escape_dir_symlink_full = "/tmp/escape_symlink"
+        self.assertEqual(
+            os.path.abspath(os.path.join(model_basepath, escape_dir_symlink_rel)),
+            escape_dir_symlink_full,
+        )
+        if os.path.exists(escape_dir_symlink_full):
+            os.unlink(escape_dir_symlink_full)
+        os.symlink(root_home_dir, escape_dir_symlink_full)
+        self.assertTrue(os.path.abspath(escape_dir_symlink_full), root_home_dir)
+
+        symlink_new_file_rel = os.path.join(
+            escape_dir_symlink_rel, "new_dir", "test.txt"
+        )
+        self.assertFalse(
+            os.path.exists(os.path.join(model_basepath, symlink_new_file_rel))
+        )
+        symlink_existing_file_rel = os.path.join(escape_dir_symlink_rel, ".bashrc")
+        self.assertTrue(
+            os.path.exists(os.path.join(model_basepath, symlink_existing_file_rel))
+        )
+
+        # Contents to try writing to file, though it should fail to be written
+        new_contents = "This shouldn't exist"
+        new_contents_b64 = base64.b64encode(new_contents.encode())
+
+        new_files = [new_file_rel, symlink_new_file_rel]
+        existing_files = [existing_file_rel, symlink_existing_file_rel]
+        all_files = new_files + existing_files
+        for filepath in all_files:
+            # minimal config to create a new model
+            config = json.dumps({"backend": "identity"})
+            files = {f"file:{filepath}": new_contents_b64}
+            with httpclient.InferenceServerClient("localhost:8000") as client:
+                with self.assertRaisesRegex(InferenceServerException, "failed to load"):
+                    client.load_model("new_model", config=config, files=files)
+
+        for rel_path in new_files:
+            # Assert new file wasn't created
+            self.assertFalse(os.path.exists(os.path.join(model_basepath, rel_path)))
+
+        for rel_path in existing_files:
+            # Read the existing file and make sure it's contents weren't overwritten
+            existing_file = os.path.join(model_basepath, rel_path)
+            self.assertTrue(os.path.exists(existing_file))
+            with open(existing_file) as f:
+                contents = f.read()
+                self.assertNotEqual(contents, new_contents)
+
+    def test_shutdown_dynamic(self):
+        model_shape = (1, 1)
+        input_data = np.ones(shape=(1, 1), dtype=np.float32)
+
+        inputs = [grpcclient.InferInput("INPUT0", model_shape, "FP32")]
+        inputs[0].set_data_from_numpy(input_data)
+
+        triton_client = grpcclient.InferenceServerClient("localhost:8001", verbose=True)
+        model_name = "custom_zero_1_float32"
+
+        # Send two requests as only requests held in scheduler are counted
+        # as in-flight (the first request is in execution)
+        def callback(user_data, result, error):
+            if error:
+                user_data.append(error)
+            else:
+                user_data.append(result)
+
+        # Currently the dynamic batcher will form payloads and place to
+        # instance queue in advance. The batcher doesn't track requests
+        # in the next stage so need to send more requests to saturate the
+        # queue.
+        request_count = 6
+        async_results = []
+        for _ in range(request_count):
+            triton_client.async_infer(
+                model_name, inputs, partial(callback, async_results)
+            )
+        time.sleep(1)
+
+        # Send signal to shutdown the server
+        os.kill(int(os.environ["SERVER_PID"]), signal.SIGINT)
+        time.sleep(0.5)
+
+        # Send more requests and should be rejected
+        try:
+            triton_client.infer(model_name, inputs)
+            self.assertTrue(False, "expected error for new inference during shutdown")
+        except InferenceServerException as ex:
+            self.assertIn(
+                "Server is stopping, scheduler for model has stopped accepting new inference requests",
+                ex.message(),
+            )
+
+        # Wait until the results are available in user_data
+        time_out = 30
+        while (len(async_results) < request_count) and time_out > 0:
+            time_out = time_out - 1
+            time.sleep(1)
+
+        # Previous requests should succeed
+        for result in async_results:
+            if type(result) == InferenceServerException:
+                raise result
+            output_data = result.as_numpy("OUTPUT0")
+            np.testing.assert_allclose(
+                output_data, input_data, err_msg="Inference result is not correct"
+            )
+
+    def test_shutdown_sequence(self):
+        model_shape = (1, 1)
+        input_data = np.ones(shape=(1, 1), dtype=np.int32)
+
+        inputs = [grpcclient.InferInput("INPUT", model_shape, "INT32")]
+        inputs[0].set_data_from_numpy(input_data)
+
+        triton_client = grpcclient.InferenceServerClient("localhost:8001", verbose=True)
+        model_name = "custom_sequence_int32"
+
+        # Send two requests as only requests held in scheduler are counted
+        # as in-flight (the first request is in execution)
+        def callback(user_data, result, error):
+            if error:
+                user_data.append(error)
+            else:
+                user_data.append(result)
+
+        # Start multiple sequences
+        request_count = 2
+        async_results = []
+        for i in range(request_count):
+            triton_client.async_infer(
+                model_name,
+                inputs,
+                partial(callback, async_results),
+                sequence_id=(i + 1),
+                sequence_start=True,
+            )
+        time.sleep(1)
+
+        # Send signal to shutdown the server
+        os.kill(int(os.environ["SERVER_PID"]), signal.SIGINT)
+        time.sleep(0.5)
+
+        # Send requests with different characteristic
+        # 1: New sequence with new sequence ID
+        try:
+            triton_client.infer(
+                model_name, inputs, sequence_id=request_count, sequence_start=True
+            )
+            self.assertTrue(False, "expected error for new inference during shutdown")
+        except InferenceServerException as ex:
+            self.assertIn(
+                "Server is stopping, scheduler for model has stopped accepting new inference requests",
+                ex.message(),
+            )
+        # 2: New sequence with existing sequence ID
+        try:
+            triton_client.infer(model_name, inputs, sequence_id=1, sequence_start=True)
+            self.assertTrue(False, "expected error for new inference during shutdown")
+        except InferenceServerException as ex:
+            self.assertIn(
+                "Server is stopping, scheduler for model has stopped accepting new inference requests",
+                ex.message(),
+            )
+        # 3: Continuing sequence
+        try:
+            res = triton_client.infer(
+                model_name, inputs, sequence_id=2, sequence_end=True
+            )
+            output_data = res.as_numpy("OUTPUT")
+            # Result are accumulated
+            np.testing.assert_allclose(
+                output_data,
+                input_data + input_data,
+                err_msg="Inference result is not correct",
+            )
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+        # Wait until the results are available in user_data
+        time_out = 30
+        while (len(async_results) < request_count) and time_out > 0:
+            time_out = time_out - 1
+            time.sleep(1)
+
+        # Previous requests should succeed
+        for result in async_results:
+            if type(result) == InferenceServerException:
+                raise result
+            output_data = result.as_numpy("OUTPUT")
+            np.testing.assert_allclose(
+                output_data, input_data, err_msg="Inference result is not correct"
+            )
+
+        # Sleep 5 seconds for scheduler timeout to work and should
+        # reduce the in-flight count
+        time.sleep(5)
+
+    def test_shutdown_ensemble(self):
+        model_shape = (1, 1)
+        input_data = np.ones(shape=(1, 1), dtype=np.float32)
+
+        inputs = [grpcclient.InferInput("INPUT0", model_shape, "FP32")]
+        inputs[0].set_data_from_numpy(input_data)
+
+        triton_client = grpcclient.InferenceServerClient("localhost:8001", verbose=True)
+        model_name = "ensemble_zero_1_float32"
+
+        # Send two requests as only requests held in scheduler are counted
+        # as in-flight (the first request is in execution)
+        def callback(user_data, result, error):
+            if error:
+                user_data.append(error)
+            else:
+                user_data.append(result)
+
+        # Even the ensemble is actually a wrapper over the model for
+        # test_shutdown_dynamic, we don't need to send many requests as
+        # ensemble scheduler tracks in-flight requests w.r.t. the whole pipeline
+        request_count = 1
+        async_results = []
+        for _ in range(request_count):
+            triton_client.async_infer(
+                model_name, inputs, partial(callback, async_results)
+            )
+        time.sleep(1)
+
+        # Send signal to shutdown the server
+        os.kill(int(os.environ["SERVER_PID"]), signal.SIGINT)
+        time.sleep(0.5)
+
+        # Send more requests and should be rejected
+        try:
+            triton_client.infer(model_name, inputs)
+            self.assertTrue(False, "expected error for new inference during shutdown")
+        except InferenceServerException as ex:
+            self.assertIn("in ensemble 'ensemble_zero_1_float32'", ex.message())
+            self.assertIn(
+                "Server is stopping, scheduler for model has stopped accepting new inference requests",
+                ex.message(),
+            )
+
+        # Wait until the results are available in user_data
+        time_out = 10
+        while (len(async_results) < request_count) and time_out > 0:
+            time_out = time_out - 1
+            time.sleep(1)
+
+        # Previous requests should succeed
+        for result in async_results:
+            if type(result) == InferenceServerException:
+                raise result
+            output_data = result.as_numpy("OUTPUT0")
+            np.testing.assert_allclose(
+                output_data, input_data, err_msg="Inference result is not correct"
+            )
+
+    def test_load_gpu_limit(self):
+        model_name = "cuda_memory_consumer"
+        try:
+            triton_client = grpcclient.InferenceServerClient(
+                "localhost:8001", verbose=True
+            )
+            triton_client.load_model(model_name + "_1")
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+        # After the first load, the memory consumption should have exceeded
+        # the specified limit, load will fail
+        try:
+            triton_client = grpcclient.InferenceServerClient(
+                "localhost:8001", verbose=True
+            )
+            triton_client.load_model(model_name + "_2")
+            self.assertTrue(False, "expected error for loading model")
+        except Exception as ex:
+            self.assertIn("memory limit set for GPU 0 has exceeded", ex.message())
+
+        # Load should work after explicitly unload model to free memory
+        try:
+            triton_client = grpcclient.InferenceServerClient(
+                "localhost:8001", verbose=True
+            )
+            triton_client.unload_model(model_name + "_1")
+            triton_client.load_model(model_name + "_2")
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_concurrent_model_load_speedup(self):
+        # Initialize client
+        try:
+            triton_client = grpcclient.InferenceServerClient(
+                "localhost:8001", verbose=True
+            )
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+        # Each model should have a loading delay of 10 seconds
+        model_pairs = [
+            ["identity_zero_1_int32_1", "identity_zero_1_int32_2"],
+            ["python_identity_fp32_1", "python_identity_fp32_2"],
+        ]
+        # Test each model pair for speed up
+        for model_pair in model_pairs:
+            # Load both models concurrently
+            threads = []
+            for model_name in model_pair:
+                threads.append(
+                    threading.Thread(
+                        target=triton_client.load_model, args=(model_name,)
+                    )
+                )
+            start_time = time.time()
+            for thread in threads:
+                thread.start()
+            for thread in threads:
+                thread.join()
+            end_time = time.time()
+            loading_time = end_time - start_time
+            # Each of the two models has a minimum loading delay of 10 seconds
+            # Speedup is observed when the concurrent loading time < 20 seconds
+            # but use a tighter bound of 15 seconds
+            self.assertLess(
+                loading_time, 15.0, "Concurrent loading speedup not observed"
+            )
+            # Concurrent loading time cannot be < 10 seconds
+            self.assertGreaterEqual(
+                loading_time, 10.0, "Invalid concurrent loading time"
+            )
+            # Make sure the models are loaded
+            self.assertTrue(triton_client.is_server_live())
+            self.assertTrue(triton_client.is_server_ready())
+            for model_name in model_pair:
+                self.assertTrue(triton_client.is_model_ready(model_name))
+
+    def test_concurrent_model_load(self):
+        # Initialize client
+        try:
+            triton_client = grpcclient.InferenceServerClient(
+                "localhost:8001", verbose=True
+            )
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+        # Load same named model concurrently
+        with concurrent.futures.ThreadPoolExecutor() as pool:
+            # First load an 10 seconds delayed identity backend model
+            thread_1 = pool.submit(triton_client.load_model, "identity_model")
+            time.sleep(2)  # wait between loads
+            # Switch the model file to python backend
+            shutil.move("models", "models_v1")
+            shutil.move("models_v2", "models")
+            # Second load should be blocked until the first completes
+            thread_2 = pool.submit(triton_client.load_model, "identity_model")
+            # Both loads should succeed
+            thread_1.result()
+            thread_2.result()
+        # Check the model is ready
+        self.assertTrue(triton_client.is_server_live())
+        self.assertTrue(triton_client.is_server_ready())
+        self.assertTrue(triton_client.is_model_ready("identity_model"))
+        # Check the finally loaded model is the second one
+        model_metadata = triton_client.get_model_metadata("identity_model")
+        self.assertEqual(model_metadata.platform, "python")
+
+    def test_concurrent_model_load_unload(self):
+        # Initialize client
+        try:
+            triton_client = grpcclient.InferenceServerClient(
+                "localhost:8001", verbose=True
+            )
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+        # Load identity_zero_1_int32 and unload it while loading
+        # The unload operation should wait until the load is completed
+        with concurrent.futures.ThreadPoolExecutor() as pool:
+            load_thread = pool.submit(triton_client.load_model, "identity_zero_1_int32")
+            time.sleep(2)  # wait between load and unload
+            unload_thread = pool.submit(
+                triton_client.unload_model, "identity_zero_1_int32"
+            )
+            load_thread.result()
+            unload_thread.result()
+        self.assertTrue(triton_client.is_server_live())
+        self.assertTrue(triton_client.is_server_ready())
+        self.assertFalse(triton_client.is_model_ready("identity_zero_1_int32"))
+        # Load ensemble_zero_1_float32 and unload its dependency while loading
+        # The unload operation should wait until the load is completed
+        with concurrent.futures.ThreadPoolExecutor() as pool:
+            load_thread = pool.submit(
+                triton_client.load_model, "ensemble_zero_1_float32"
+            )
+            time.sleep(2)  # wait between load and unload
+            unload_thread = pool.submit(
+                triton_client.unload_model, "custom_zero_1_float32"
+            )
+            load_thread.result()
+            unload_thread.result()
+        self.assertTrue(triton_client.is_server_live())
+        self.assertTrue(triton_client.is_server_ready())
+        self.assertFalse(triton_client.is_model_ready("ensemble_zero_1_float32"))
+        self.assertFalse(triton_client.is_model_ready("custom_zero_1_float32"))
+        # Load both models and unload them concurrently
+        model_names = ["identity_zero_1_int32", "ensemble_zero_1_float32"]
+        for is_load in [True, False]:
+            action_fn = (
+                triton_client.load_model if is_load else triton_client.unload_model
+            )
+            with concurrent.futures.ThreadPoolExecutor() as pool:
+                threads = []
+                for model_name in model_names:
+                    threads.append(pool.submit(action_fn, model_name))
+                for thread in concurrent.futures.as_completed(threads):
+                    thread.result()
+            for model_name in model_names:
+                self.assertEqual(is_load, triton_client.is_model_ready(model_name))
+
+    # TODO: Consider revisiting this test
+    # The goal of this test is only to ensure the server does not crash when
+    # bombarded with concurrent load/unload requests for the same model.
+    # Some clean-up:
+    # 1. Improve core logic so all load/unload requests will always success, so
+    #    'load_fail_reasons' and 'unload_fail_reasons' can be removed.
+    # 2. Is it still necessary to track the ability to replicate a load while
+    #    async unloading?
+    # 3. What is the ideal number of threads and iterations, across different
+    #    machines, that the server is sufficiently stressed?
+    def test_concurrent_same_model_load_unload_stress(self):
+        model_name = "identity_zero_1_int32"
+        num_threads = 32
+        num_iterations = 1024
+        try:
+            triton_client = grpcclient.InferenceServerClient(
+                "localhost:8001", verbose=True
+            )
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+        load_fail_reasons = [
+            "unexpected miss in global map",
+            "no version is available",
+            "failed to poll from model repository",
+        ]
+        unload_fail_reasons = ["versions that are still available: 1"]
+        load_fail_messages = [
+            ("failed to load '" + model_name + "', " + reason)
+            for reason in load_fail_reasons
+        ]
+        unload_fail_messages = [
+            ("failed to unload '" + model_name + "', " + reason)
+            for reason in unload_fail_reasons
+        ]
+        global_exception_stats = {}  # { "exception message": number of occurrence }
+        load_before_unload_finish = [False]  # use list to access by reference
+
+        def _load_unload():
+            exception_stats = {}  # { "exception message": number of occurrence }
+            for i in range(num_iterations):
+                try:
+                    triton_client.load_model(model_name)
+                except InferenceServerException as ex:
+                    # Acceptable for an unload to happen after a load completes, only
+                    # before the load can verify its load state.
+                    error_message = ex.message()
+                    self.assertIn(error_message, load_fail_messages)
+                    if error_message not in exception_stats:
+                        exception_stats[error_message] = 0
+                    exception_stats[error_message] += 1
+                try:
+                    triton_client.unload_model(model_name)
+                except InferenceServerException as ex:
+                    # Acceptable for a load to happen after an unload completes, only
+                    # before the unload can verify its unload state.
+                    error_message = ex.message()
+                    self.assertIn(error_message, unload_fail_messages)
+                    if error_message not in exception_stats:
+                        exception_stats[error_message] = 0
+                    exception_stats[error_message] += 1
+                    load_before_unload_finish[0] = True
+            return exception_stats
+
+        with concurrent.futures.ThreadPoolExecutor() as pool:
+            threads = []
+            for i in range(num_threads):
+                threads.append(pool.submit(_load_unload))
+            for t in threads:
+                exception_stats = t.result()
+                for key, count in exception_stats.items():
+                    if key not in global_exception_stats:
+                        global_exception_stats[key] = 0
+                    global_exception_stats[key] += count
+
+        self.assertTrue(triton_client.is_server_live())
+        self.assertTrue(triton_client.is_server_ready())
+
+        # This test can replicate a load while async unloading on machines with
+        # sufficient concurrency. Regardless on whether it is replicated or not,
+        # the server must not crash.
+        if load_before_unload_finish[0] == False:
+            # Track non-replication on test printout via statistics.
+            warning_msg = "Cannot replicate a load while async unloading. CPU count: {}. num_threads: {}.".format(
+                multiprocessing.cpu_count(), num_threads
+            )
+            global_exception_stats[warning_msg] = 1
+
+        stats_path = "./test_concurrent_same_model_load_unload_stress.statistics.log"
+        with open(stats_path, mode="w", encoding="utf-8") as f:
+            f.write(str(global_exception_stats) + "\n")
+
+    def test_concurrent_model_instance_load_speedup(self):
+        # Initialize client
+        try:
+            triton_client = httpclient.InferenceServerClient(
+                "localhost:8000", verbose=True
+            )
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+        models = ["identity_fp32"]
+        # Create 2 instances which each have a delay time of 10 seconds.
+        num_instances = 2
+        instance_group = [{"kind": "KIND_CPU", "count": num_instances}]
+        config = {"instance_group": instance_group}
+        for model in models:
+            # Instances should be loaded concurrently for supported backends
+            start_time = time.time()
+            try:
+                triton_client.load_model(model, config=json.dumps(config))
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+            end_time = time.time()
+            loading_time = end_time - start_time
+            print(f"Time to load {num_instances} instances: {loading_time}")
+
+            # Each of the two models has a minimum loading delay of 10 seconds
+            # Speedup is observed when the concurrent loading time < 20 seconds
+            # but use a tighter bound of 15 seconds
+            self.assertLess(
+                loading_time, 15.0, "Concurrent loading speedup not observed"
+            )
+            # Concurrent loading time cannot be < 10 seconds
+            self.assertGreaterEqual(
+                loading_time, 10.0, "Invalid concurrent loading time"
+            )
+            # Make sure the models are loaded
+            self.assertTrue(triton_client.is_server_live())
+            self.assertTrue(triton_client.is_server_ready())
+            self.assertTrue(triton_client.is_model_ready(model))
+
+    def _call_with_timeout(self, callable, timeout_secs):
+        # Setup handler for timing out call
+        def timeout_handler(sig, frame):
+            raise TimeoutError()
+
+        signal.signal(signal.SIGALRM, timeout_handler)
+        signal.alarm(timeout_secs)
+        result = callable()
+        return result
+
+    def _call_with_expected_timeout(self, callable, timeout_secs=3):
+        # Call callable with expectation that it will timeout
+        try:
+            self._call_with_timeout(callable, timeout_secs)
+        except TimeoutError:
+            print("Inference timed out as expected.")
+            return
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+        else:
+            self.assertTrue(False, "unexpected success, call should've timed out.")
+
+    def _get_fp32_io(self, client_type):
+        # Config
+        input_names = ["INPUT0", "INPUT1"]
+        output_names = ["OUTPUT0", "OUTPUT1"]
+        dtype, dims, shape = ("TYPE_FP32", [-1, 16], [1, 16])
+        input_config = [
+            {"name": name, "data_type": dtype, "dims": dims} for name in input_names
+        ]
+        output_config = [
+            {"name": name, "data_type": dtype, "dims": dims} for name in output_names
+        ]
+        # Inputs
+        inputs = []
+        for name in input_names:
+            inputs.append(
+                client_type.InferInput(name, shape, dtype.replace("TYPE_", ""))
+            )
+            inputs[-1].set_data_from_numpy(np.ones(shape, dtype=np.float32))
+        return input_config, output_config, inputs
+
+    def test_concurrent_model_instance_load_sanity(self):
+        cpu, gpu = "KIND_CPU", "KIND_GPU"
+        default_kinds = [cpu, gpu]
+        backend_kinds = {"plan": [gpu], "openvino": [cpu]}
+        try:
+            client_type = httpclient
+            triton_client = client_type.InferenceServerClient(
+                "localhost:8000", verbose=True
+            )
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+        backends = os.environ.get("PARALLEL_BACKENDS", "").split()
+        self.assertTrue(len(backends) > 0, "PARALLEL_BACKENDS wasn't set")
+
+        num_instances = 5
+        input_config, output_config, inputs = self._get_fp32_io(client_type)
+        for backend in backends:
+            model = tu.get_model_name(backend, np.float32, np.float32, np.float32)
+            kinds = backend_kinds.get(backend, default_kinds)
+            for kind in kinds:
+                with self.subTest(backend=backend, model=model, kind=kind):
+                    # Setup model config
+                    instance_group = {"kind": kind, "count": num_instances}
+                    # Disable batching to guarantee 1 request per instance
+                    # Configure sequence batching such that each instance cannot accept new requests
+                    # while it is busy with an ongoing sequence. This way we can guarantee sending 1 request to each instance.
+                    max_batch_size = 0
+                    sequence_timeout_secs = 10
+                    sequence_batching = {
+                        "direct": {},
+                        "max_sequence_idle_microseconds": sequence_timeout_secs
+                        * 1000000,
+                    }
+                    config = {
+                        "instance_group": instance_group,
+                        "max_batch_size": max_batch_size,
+                        "sequence_batching": sequence_batching,
+                        "input": input_config,
+                        "output": output_config,
+                    }
+                    print(
+                        f"~~~ Backend: [{backend}], Model: [{model}], Config: [{config}] ~~~"
+                    )
+                    # Load the model
+                    try:
+                        triton_client.load_model(model, config=json.dumps(config))
+                    except Exception as ex:
+                        self.assertTrue(False, "unexpected error {}".format(ex))
+
+                    # Make sure the model is loaded
+                    self.assertTrue(triton_client.is_server_live())
+                    self.assertTrue(triton_client.is_model_ready(model))
+                    print(
+                        "Model Repository Index after load:",
+                        triton_client.get_model_repository_index(),
+                    )
+
+                    # Test inference on each instance
+                    for i in range(1, num_instances + 1):
+                        try:
+                            triton_client.infer(
+                                model, inputs, sequence_id=i, sequence_start=True
+                            )
+                        except Exception as ex:
+                            self.assertTrue(
+                                False, "unexpected inference error {}".format(ex)
+                            )
+
+                    # Each instance should be busy until their sequence times out, so
+                    # an additional infer call should time out. If it doesn't time out, something
+                    # is wrong and the test should fail.
+                    callable = partial(
+                        triton_client.infer,
+                        model,
+                        inputs,
+                        sequence_id=num_instances + 1,
+                        sequence_start=True,
+                    )
+                    self._call_with_expected_timeout(callable, timeout_secs=3)
+
+                    # Unload the model
+                    try:
+                        triton_client.unload_model(model)
+                    except Exception as ex:
+                        self.assertTrue(False, "unexpected error {}".format(ex))
+
+                    # Allow server to fully unload model before next test iteration
+                    num_tries = 10
+                    for i in range(num_tries):
+                        if triton_client.is_server_ready():
+                            break
+                        print(
+                            f"[Attempt {i}] Server not ready yet, sleeping and retrying. Current repository index: {triton_client.get_model_repository_index()}"
+                        )
+                        time.sleep(6)
+                    print(
+                        "Model Repository Index after unload attempts:",
+                        triton_client.get_model_repository_index(),
+                    )
+                    self.assertTrue(triton_client.is_server_ready())
+
+    def test_model_config_overwite(self):
+        model_name = "identity_fp32"
+
+        # Make sure version 1 of the model is loaded
+        try:
+            triton_client = self._get_client()
+            self.assertTrue(triton_client.is_server_live())
+            self.assertTrue(triton_client.is_server_ready())
+            self.assertTrue(triton_client.is_model_ready(model_name, "1"))
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+        # Load the model from disk w/o any special configuration settings.
+        original_config = triton_client.get_model_config(model_name)
+
+        # The instance_group[0].count is set to 2 instead of the default 1.
+        # This enough of a delta to ensure the correct model configuration
+        # has been applied to the model.
+        override_config = """
+{
+  "name": "identity_fp32",
+  "backend": "identity",
+  "instance_group": [
+    {
+      "count": 2,
+      "kind" : "KIND_CPU"
+    }
+  ]
+}
+"""
+
+        # Ensure the model has been loaded w/ the expected (different from override) config.
+        self.assertTrue(original_config != None and original_config != override_config)
+
+        # Reload the model with the overriding configuration value.
+        triton_client.load_model(model_name, config=override_config)
+
+        # Ensure the model has been loaded w/ the expected (override) config.
+        updated_config = triton_client.get_model_config(model_name)
+
+        # Reload the model
+        triton_client.load_model(model_name)
+
+        # Ensure the model has been loaded w/ the expected (override) config.
+        updated_config2 = triton_client.get_model_config(model_name)
+        self.assertEqual(updated_config, updated_config2)
+
+        # Touch the local config.pbtxt and reload the file to ensure the local config
+        # is preferred because it has a more recent mtime.
+        time.sleep(0.1)  # make sure timestamps are different
+        Path(os.path.join("models", model_name, "config.pbtxt")).touch()
+
+        # Reload the model
+        triton_client.load_model(model_name)
+
+        # Ensure the model has been loaded w/ the expected (local) config.
+        updated_config = triton_client.get_model_config(model_name)
+        self.assertEqual(original_config, updated_config)
+
+    def test_shutdown_while_background_unloading(self):
+        model_name = "identity_fp32"
+        triton_client = self._get_client()
+        self.assertTrue(triton_client.is_server_live())
+        self.assertTrue(triton_client.is_server_ready())
+        # Check the Python version of the model is loaded.
+        self.assertTrue(triton_client.is_model_ready(model_name, "1"))
+        python_model_config = triton_client.get_model_config(model_name)
+        self.assertEqual(python_model_config["backend"], "python")
+        # Load the Identity version, which will put the Python version into the
+        # background and unload it, the unload will take at least 10 seconds.
+        override_config = "{\n"
+        override_config += '"name": "identity_fp32",\n'
+        override_config += '"backend": "identity"\n'
+        override_config += "}"
+        triton_client.load_model(model_name, config=override_config)
+        identity_model_config = triton_client.get_model_config(model_name)
+        self.assertEqual(identity_model_config["backend"], "identity")
+        # The server will shutdown after this sub-test exits. The server must shutdown
+        # without any hang or runtime error.
+
+    def test_shutdown_while_loading(self):
+        triton_client = self._get_client()
+        self.assertTrue(triton_client.is_server_live())
+        self.assertTrue(triton_client.is_server_ready())
+        # Load the model which will load for at least 10 seconds.
+        model_name = "identity_fp32"
+        with concurrent.futures.ThreadPoolExecutor() as pool:
+            pool.submit(triton_client.load_model, model_name)
+        self.assertFalse(triton_client.is_model_ready(model_name))
+        # The server will shutdown after this sub-test exits. The server must shutdown
+        # without any hang or runtime error.
+
+    def test_shutdown_with_live_connection(self):
+        model_name = "add_sub"
+        model_shape = (16,)
+        from geventhttpclient.response import HTTPConnectionClosed
+
+        input_data = np.ones(shape=model_shape, dtype=np.float32)
+        inputs = [
+            httpclient.InferInput("INPUT0", model_shape, "FP32"),
+            httpclient.InferInput("INPUT1", model_shape, "FP32"),
+        ]
+        inputs[0].set_data_from_numpy(input_data)
+        inputs[1].set_data_from_numpy(input_data)
+
+        # start connection
+        conn = httpclient.InferenceServerClient("localhost:8000", verbose=True)
+        conn.infer(model_name, inputs)
+
+        # shutdown the server
+        os.kill(int(os.environ["SERVER_PID"]), signal.SIGINT)
+        time.sleep(2)
+
+        # connection should still work
+        conn.infer(model_name, inputs)
+
+        # close connection
+        conn.close()
+        time.sleep(2)
+
+        # check exit timeout countdown did not restart
+        with open(os.environ["SERVER_LOG"]) as f:
+            server_log = f.read()
+        self.assertIn(
+            "Waiting for in-flight requests to complete.",
+            server_log,
+            "precondition not met - core shutdown did not begin",
+        )
+        self.assertEqual(
+            server_log.count("Timeout 30: "),
+            1,
+            "exit timeout countdown restart detected",
+        )
+
+
+if __name__ == "__main__":
     unittest.main()
diff --git a/qa/L0_lifecycle/retry_model/1/model.py b/qa/L0_lifecycle/retry_model/1/model.py
new file mode 100644
index 0000000000..49127d0422
--- /dev/null
+++ b/qa/L0_lifecycle/retry_model/1/model.py
@@ -0,0 +1,79 @@
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+import os
+
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+
+        auto_complete_model_config.set_max_batch_size(0)
+        auto_complete_model_config.add_input(input0)
+        auto_complete_model_config.add_input(input1)
+        auto_complete_model_config.add_output(output0)
+        auto_complete_model_config.add_output(output1)
+
+        return auto_complete_model_config
+
+    def initialize(self, args):
+        # Check if an special file has been created in the version directory,
+        # The existence is the indicator of whether the model load has been
+        # retried (model control mode should NOT be POLL to avoid re-load).
+        model_path = os.path.join(args["model_repository"], args["model_version"])
+        self.indicator_file = os.path.join(model_path, "indicator")
+        if not os.path.exists(self.indicator_file):
+            with open(self.indicator_file, "x") as f:
+                pass
+            raise Exception("failing first load attempt")
+
+        self.model_config = model_config = json.loads(args["model_config"])
+
+        output0_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT0")
+        output1_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT1")
+
+        self.output0_dtype = pb_utils.triton_string_to_numpy(
+            output0_config["data_type"]
+        )
+        self.output1_dtype = pb_utils.triton_string_to_numpy(
+            output1_config["data_type"]
+        )
+
+    def finalize(self):
+        # Clean up the file on successful load (after first attempt)
+        os.remove(self.indicator_file)
+
+    def execute(self, requests):
+        # This model is for testing loading behavior only
+        # and is not intended to be executed
+        pass
diff --git a/qa/L0_lifecycle/test.sh b/qa/L0_lifecycle/test.sh
index edc0d418ee..d40f953b7f 100755
--- a/qa/L0_lifecycle/test.sh
+++ b/qa/L0_lifecycle/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+# Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -25,32 +25,1735 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
 CLIENT_LOG="./client.log"
+DATADIR=/data/inferenceserver/${REPO_VERSION}
 LC_TEST=lifecycle_test.py
+SLEEP_TIME=10
+SERVER=/opt/tritonserver/bin/tritonserver
+TEST_RESULT_FILE='test_results.txt'
+source ../common/util.sh
+
+function check_unit_test() {
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    else
+        check_test_results $TEST_RESULT_FILE 1
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Result Verification Failed\n***"
+            RET=1
+        fi
+    fi
+}
+
+RET=0
+rm -fr *.log
+
+LOG_IDX=0
+
+if [ `ps | grep -c "tritonserver"` != "0" ]; then
+    echo -e "Tritonserver already running"
+    echo -e `ps | grep tritonserver`
+    exit 1
+fi
+
+# LifeCycleTest.test_parse_error_noexit_strict
+SERVER_ARGS="--model-repository=/tmp/xyzx --strict-readiness=true \
+             --exit-on-error=false"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server_nowait
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+sleep $SLEEP_TIME
+
+rm -f $CLIENT_LOG
+set +e
+python $LC_TEST LifeCycleTest.test_parse_error_noexit >>$CLIENT_LOG 2>&1
+check_unit_test
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_parse_error_noexit
+SERVER_ARGS="--model-repository=/tmp/xyzx --strict-readiness=false \
+             --exit-on-error=false"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server_nowait
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+sleep $SLEEP_TIME
+
+
+rm -f $CLIENT_LOG
+set +e
+python $LC_TEST LifeCycleTest.test_parse_error_noexit >>$CLIENT_LOG 2>&1
+check_unit_test
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_parse_error_noexit_strict (multiple model repositories)
+rm -rf models
+mkdir models
+SERVER_ARGS="--model-repository=/tmp/xyzx --model-repository=`pwd`/models \
+             --strict-readiness=true --exit-on-error=false"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server_nowait
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+sleep $SLEEP_TIME
+
+
+rm -f $CLIENT_LOG
+set +e
+python $LC_TEST LifeCycleTest.test_parse_error_noexit >>$CLIENT_LOG 2>&1
+check_unit_test
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_parse_error_noexit (multiple model repositories)
+rm -rf models
+mkdir models
+SERVER_ARGS="--model-repository=`pwd`/models --model-repository=/tmp/xyzx \
+             --strict-readiness=false --exit-on-error=false"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server_nowait
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+sleep $SLEEP_TIME
+
+
+rm -f $CLIENT_LOG
+set +e
+python $LC_TEST LifeCycleTest.test_parse_error_noexit >>$CLIENT_LOG 2>&1
+check_unit_test
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+LOG_IDX=$((LOG_IDX+1))
+
+# GRPC Port Collision Test
+rm -rf models
+mkdir models
+SERVER_ARGS="--model-repository=`pwd`/models"
+SERVER_LOG="./stub_inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+SAVED_SERVER_PID=$SERVER_PID
+SERVER_ARGS="--model-repository=`pwd`/models --http-port 8003 --metrics-port 8004"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+sleep $SLEEP_TIME
+# check server log for the warning messages
+if [ `grep -c "failed to start GRPC service: Unavailable - Socket '0.0.0.0:8001' already in use" $SERVER_LOG` != "1" ]; then
+    echo -e "\n***\n*** Server log ${SERVER_LOG} did not report GRPC port collision\n***"
+    echo -e "\n***\n*** Test Failed\n***"
+    kill $SERVER_PID
+    wait $SERVER_PID
+    RET=1
+fi
+
+SERVER_PID=$SAVED_SERVER_PID
+kill $SERVER_PID
+wait $SERVER_PID
+
+LOG_IDX=$((LOG_IDX+1))
+
+# HTTP Port Collision Test
+rm -rf models
+mkdir models
+SERVER_ARGS="--model-repository=`pwd`/models"
+SERVER_LOG="./stub_inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+SAVED_SERVER_PID=$SERVER_PID
+SERVER_ARGS="--model-repository=`pwd`/models --grpc-port 8003 --metrics-port 8004"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+sleep $SLEEP_TIME
+# check server log for the warning messages
+if [ `grep -c "failed to start HTTP service: Unavailable - Socket '0.0.0.0:8000' already in use" $SERVER_LOG` != "1" ]; then
+    echo -e "\n***\n*** Server log ${SERVER_LOG} did not report HTTP port collision\n***"
+    echo -e "\n***\n*** Test Failed\n***"
+    kill $SERVER_PID
+    wait $SERVER_PID
+    RET=1
+fi
+
+SERVER_PID=$SAVED_SERVER_PID
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+LOG_IDX=$((LOG_IDX+1))
+
+# Metrics Port Collision Test
+rm -rf models
+mkdir models
+SERVER_ARGS="--model-repository=`pwd`/models"
+SERVER_LOG="./stub_inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+SAVED_SERVER_PID=$SERVER_PID
+SERVER_ARGS="--model-repository=`pwd`/models --grpc-port 8003 --http-port 8004"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+sleep $SLEEP_TIME
+# check server log for the warning messages
+if [ `grep -c "failed to start Metrics service: Unavailable - Socket '0.0.0.0:8002' already in use" $SERVER_LOG` != "1" ]; then
+    echo -e "\n***\n*** Server log ${SERVER_LOG} did not report metrics port collision\n***"
+    echo -e "\n***\n*** Test Failed\n***"
+    kill $SERVER_PID
+    wait $SERVER_PID
+    RET=1
+fi
+
+SERVER_PID=$SAVED_SERVER_PID
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+LOG_IDX=$((LOG_IDX+1))
+
+# Multiple Port Collisions Test
+rm -rf models
+mkdir models
+SERVER_ARGS="--model-repository=`pwd`/models"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+SAVED_SERVER_PID=$SERVER_PID
+run_server
+sleep $SLEEP_TIME
+# check server log for the warning messages
+if [ `grep -c "failed to start.*service: Unavailable - Socket '.*' already in use" $SERVER_LOG` == "0" ]; then
+    echo -e "\n***\n*** Server log ${SERVER_LOG} did not report port collision\n***"
+    echo -e "\n***\n*** Test Failed\n***"
+    kill $SERVER_PID
+    wait $SERVER_PID
+    RET=1
+fi
+
+SERVER_PID=$SAVED_SERVER_PID
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+LOG_IDX=$((LOG_IDX+1))
+
+# No Port Collision Test
+rm -rf models
+mkdir models
+SERVER_ARGS="--model-repository=`pwd`/models"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+LOG_IDX=$((LOG_IDX+1))
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+
+SAVED_SERVER_PID=$SERVER_PID
+SERVER_ARGS="--model-repository=`pwd`/models --grpc-port 8003 --http-port 8004 --metrics-port 8005"
+run_server
+sleep $SLEEP_TIME
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+kill $SERVER_PID
+wait $SERVER_PID
+kill $SAVED_SERVER_PID
+wait $SAVED_SERVER_PID
+
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_parse_error_modelfail
+rm -fr models models_0
+mkdir models models_0
+for i in graphdef savedmodel ; do
+    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
+done
+for i in onnx plan ; do
+    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models_0/.
+done
+# Change the model files so that multiple versions will be loaded, and one of
+# the versions will fail to load and cause all other versions to be unloaded.
+rm models/graphdef_float32_float32_float32/3/*
+
+SERVER_ARGS="--model-repository=`pwd`/models --model-repository=`pwd`/models_0 \
+             --exit-on-error=false --exit-timeout-secs=5"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server_tolive
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+# give plenty of time for model to load (and fail to load)
+wait_for_model_stable $SERVER_TIMEOUT
+
+rm -f $CLIENT_LOG
+set +e
+python $LC_TEST LifeCycleTest.test_parse_error_modelfail >>$CLIENT_LOG 2>&1
+check_unit_test
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_parse_error_modelfail_nostrict
+SERVER_ARGS="--model-repository=`pwd`/models --model-repository=`pwd`/models_0 \
+             --exit-on-error=false --exit-timeout-secs=5 --strict-readiness=false"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server_tolive
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+# give plenty of time for model to load (and fail to load)
+wait_for_model_stable $SERVER_TIMEOUT
+
+rm -f $CLIENT_LOG
+set +e
+python $LC_TEST LifeCycleTest.test_parse_error_modelfail_nostrict >>$CLIENT_LOG 2>&1
+check_unit_test
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_parse_error_no_model_config
+rm -fr models models_0
+mkdir models models_0
+for i in graphdef savedmodel ; do
+    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
+done
+for i in onnx plan ; do
+    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models_0/.
+done
+rm models/graphdef_float32_float32_float32/config.pbtxt
+
+# Autocomplete should not be turned on for this test because it asserts an error was logged
+# when in strict model configuration mode.
+SERVER_ARGS="--model-repository=`pwd`/models --model-repository=`pwd`/models_0 \
+             --exit-on-error=false --exit-timeout-secs=5 --strict-model-config=true"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server_tolive
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+# give plenty of time for model to load (and fail to load)
+wait_for_model_stable $SERVER_TIMEOUT
+
+rm -f $CLIENT_LOG
+set +e
+python $LC_TEST LifeCycleTest.test_parse_error_no_model_config >>$CLIENT_LOG 2>&1
+check_unit_test
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# check server log for the warning messages
+if [ `grep -c "failed to open text file for read" $SERVER_LOG` == "0" ] || [ `grep -c "graphdef_float32_float32_float32/config.pbtxt: No such file or directory" $SERVER_LOG` == "0" ]; then
+    echo -e "\n***\n*** Server log ${SERVER_LOG} did not print model load failure\n***"
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_init_error_modelfail
+rm -fr models models_0
+mkdir models models_0
+cp -r $DATADIR/qa_sequence_model_repository/onnx_sequence_int32 models/.
+cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32 models_0/.
+sed -i "s/OUTPUT/_OUTPUT/" models/onnx_sequence_int32/config.pbtxt
+sed -i "s/OUTPUT/_OUTPUT/" models_0/onnx_int32_int32_int32/config.pbtxt
+for i in graphdef savedmodel; do
+    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
+done
+for i in onnx ; do
+    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models_0/.
+done
+
+SERVER_ARGS="--model-repository=`pwd`/models --model-repository=`pwd`/models_0 \
+             --exit-on-error=false --exit-timeout-secs=5"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server_tolive
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+# give plenty of time for model to load (and fail to load)
+wait_for_model_stable $SERVER_TIMEOUT
+
+rm -f $CLIENT_LOG
+set +e
+python $LC_TEST LifeCycleTest.test_init_error_modelfail >>$CLIENT_LOG 2>&1
+check_unit_test
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_parse_error_model_no_version
+rm -fr models
+mkdir models
+for i in savedmodel onnx plan ; do
+    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
+done
+mkdir -p models/graphdef_float32_float32_float32
+cp $DATADIR/qa_model_repository/graphdef_float32_float32_float32/config.pbtxt \
+    models/graphdef_float32_float32_float32/.
+
+SERVER_ARGS="--model-repository=`pwd`/models --exit-on-error=false \
+             --exit-timeout-secs=5"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server_tolive
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+# give plenty of time for model to load (and fail to load)
+wait_for_model_stable $SERVER_TIMEOUT
+
+rm -f $CLIENT_LOG
+set +e
+python $LC_TEST LifeCycleTest.test_parse_error_model_no_version >>$CLIENT_LOG 2>&1
+check_unit_test
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_parse_ignore_zero_prefixed_version
+rm -fr models
+mkdir models
+for i in savedmodel ; do
+    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
+    mv models/${i}_float32_float32_float32/3 models/${i}_float32_float32_float32/003
+done
+
+SERVER_ARGS="--model-repository=`pwd`/models --exit-on-error=false \
+             --exit-timeout-secs=5"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+rm -f $CLIENT_LOG
+set +e
+python $LC_TEST LifeCycleTest.test_parse_ignore_zero_prefixed_version >>$CLIENT_LOG 2>&1
+check_unit_test
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# check server log for the warning messages
+if [ `grep -c "ignore version directory '003' which contains leading zeros in its directory name" $SERVER_LOG` == "0" ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_parse_ignore_non_intergral_version
+rm -fr models
+mkdir models
+for i in savedmodel ; do
+    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
+    mv models/${i}_float32_float32_float32/3 models/${i}_float32_float32_float32/abc
+done
+
+SERVER_ARGS="--model-repository=`pwd`/models --exit-on-error=false \
+             --exit-timeout-secs=5"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+rm -f $CLIENT_LOG
+set +e
+python $LC_TEST LifeCycleTest.test_parse_ignore_non_intergral_version >>$CLIENT_LOG 2>&1
+check_unit_test
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# check server log for the warning messages
+if [ `grep -c "ignore version directory 'abc' which fails to convert to integral number" $SERVER_LOG` == "0" ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_dynamic_model_load_unload
+rm -fr models savedmodel_float32_float32_float32
+mkdir models
+for i in graphdef onnx plan ; do
+    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
+done
+cp -r $DATADIR/qa_model_repository/savedmodel_float32_float32_float32 .
+
+SERVER_ARGS="--model-repository=`pwd`/models --repository-poll-secs=1 \
+             --model-control-mode=poll --exit-timeout-secs=5"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+rm -f $CLIENT_LOG
+set +e
+python $LC_TEST LifeCycleTest.test_dynamic_model_load_unload >>$CLIENT_LOG 2>&1
+check_unit_test
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_dynamic_model_load_unload_disabled
+rm -fr models savedmodel_float32_float32_float32
+mkdir models
+for i in graphdef onnx plan; do
+    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
+done
+cp -r $DATADIR/qa_model_repository/savedmodel_float32_float32_float32 .
+
+SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=none \
+             --exit-timeout-secs=5"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+rm -f $CLIENT_LOG
+set +e
+python $LC_TEST LifeCycleTest.test_dynamic_model_load_unload_disabled >>$CLIENT_LOG 2>&1
+check_unit_test
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_dynamic_version_load_unload
+rm -fr models
+mkdir models
+for i in graphdef ; do
+    cp -r $DATADIR/qa_model_repository/${i}_int32_int32_int32 models/.
+done
+
+SERVER_ARGS="--model-repository=`pwd`/models --repository-poll-secs=1 \
+             --model-control-mode=poll --exit-timeout-secs=5"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+rm -f $CLIENT_LOG
+set +e
+python $LC_TEST LifeCycleTest.test_dynamic_version_load_unload >>$CLIENT_LOG 2>&1
+check_unit_test
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_dynamic_version_load_unload_disabled
+rm -fr models
+mkdir models
+for i in graphdef ; do
+    cp -r $DATADIR/qa_model_repository/${i}_int32_int32_int32 models/.
+done
+
+# Show model control mode will override deprecated model control options
+SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=none \
+             --exit-timeout-secs=5"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+rm -f $CLIENT_LOG
+set +e
+python $LC_TEST LifeCycleTest.test_dynamic_version_load_unload_disabled >>$CLIENT_LOG 2>&1
+check_unit_test
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_dynamic_model_modify
+rm -fr models config.pbtxt.*
+mkdir models
+for i in savedmodel plan ; do
+    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
+    sed '/^version_policy/d' \
+        $DATADIR/qa_model_repository/${i}_float32_float32_float32/config.pbtxt > config.pbtxt.${i}
+    sed 's/output0_labels/wrong_output0_labels/' \
+        $DATADIR/qa_model_repository/${i}_float32_float32_float32/config.pbtxt > config.pbtxt.wrong.${i}
+    sed 's/label/label9/' \
+        $DATADIR/qa_model_repository/${i}_float32_float32_float32/output0_labels.txt > \
+        models/${i}_float32_float32_float32/wrong_output0_labels.txt
+done
+
+SERVER_ARGS="--model-repository=`pwd`/models --repository-poll-secs=1 \
+             --model-control-mode=poll --exit-timeout-secs=5"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+rm -f $CLIENT_LOG
+set +e
+python $LC_TEST LifeCycleTest.test_dynamic_model_modify >>$CLIENT_LOG 2>&1
+check_unit_test
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_dynamic_file_delete
+rm -fr models config.pbtxt.*
+mkdir models
+for i in savedmodel plan; do
+    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
+done
+
+SERVER_ARGS="--model-repository=`pwd`/models --repository-poll-secs=1 \
+             --model-control-mode=poll --exit-timeout-secs=5 --strict-model-config=false"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+rm -f $CLIENT_LOG
+set +e
+python $LC_TEST LifeCycleTest.test_dynamic_file_delete >>$CLIENT_LOG 2>&1
+check_unit_test
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_multiple_model_repository_polling
+rm -fr models models_0 savedmodel_float32_float32_float32
+mkdir models models_0
+for i in graphdef ; do
+    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
+done
+for i in onnx ; do
+    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models_0/.
+done
+cp -r $DATADIR/qa_model_repository/savedmodel_float32_float32_float32 .
+cp -r $DATADIR/qa_model_repository/savedmodel_float32_float32_float32 models/. && \
+    rm -rf models/savedmodel_float32_float32_float32/3
+
+SERVER_ARGS="--model-repository=`pwd`/models --model-repository=`pwd`/models_0 \
+             --model-control-mode=poll --repository-poll-secs=1 --exit-timeout-secs=5"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+rm -f $CLIENT_LOG
+set +e
+python $LC_TEST LifeCycleTest.test_multiple_model_repository_polling >>$CLIENT_LOG 2>&1
+check_unit_test
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_multiple_model_repository_control
+rm -fr models models_0 savedmodel_float32_float32_float32
+mkdir models models_0
+for i in graphdef ; do
+    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
+done
+for i in onnx ; do
+    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models_0/.
+done
+cp -r $DATADIR/qa_model_repository/savedmodel_float32_float32_float32 .
+cp -r $DATADIR/qa_model_repository/savedmodel_float32_float32_float32 models/. && \
+    rm -rf models/savedmodel_float32_float32_float32/3
+
+# Show model control mode will override deprecated model control options
+SERVER_ARGS="--model-repository=`pwd`/models --model-repository=`pwd`/models_0 \
+             --model-control-mode=explicit \
+             --exit-timeout-secs=5"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+rm -f $CLIENT_LOG
+set +e
+python $LC_TEST LifeCycleTest.test_multiple_model_repository_control >>$CLIENT_LOG 2>&1
+check_unit_test
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_model_control
+rm -fr models config.pbtxt.*
+mkdir models
+for i in onnx ; do
+    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
+    cp -r $DATADIR/qa_ensemble_model_repository/qa_model_repository/simple_${i}_float32_float32_float32 models/.
+    sed -i "s/max_batch_size:.*/max_batch_size: 1/" models/${i}_float32_float32_float32/config.pbtxt
+    sed -i "s/max_batch_size:.*/max_batch_size: 1/" models/simple_${i}_float32_float32_float32/config.pbtxt
+done
+
+SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit \
+             --exit-timeout-secs=5 --strict-model-config=false
+             --strict-readiness=false"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+rm -f $CLIENT_LOG
+set +e
+python $LC_TEST LifeCycleTest.test_model_control >>$CLIENT_LOG 2>&1
+check_unit_test
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_model_control_fail
+rm -fr models config.pbtxt.*
+mkdir models
+for i in onnx ; do
+    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
+    # Remove all model files so the model will fail to load
+    rm models/${i}_float32_float32_float32/*/*
+    sed -i "s/max_batch_size:.*/max_batch_size: 1/" models/${i}_float32_float32_float32/config.pbtxt
+done
+
+SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit \
+             --exit-timeout-secs=5 --strict-model-config=false
+             --strict-readiness=false"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+rm -f $CLIENT_LOG
+set +e
+python $LC_TEST LifeCycleTest.test_model_control_fail >>$CLIENT_LOG 2>&1
+check_unit_test
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_model_control_ensemble
+rm -fr models config.pbtxt.*
+mkdir models
+for i in onnx ; do
+    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
+    cp -r $DATADIR/qa_ensemble_model_repository/qa_model_repository/simple_${i}_float32_float32_float32 models/.
+    sed -i "s/max_batch_size:.*/max_batch_size: 1/" models/${i}_float32_float32_float32/config.pbtxt
+    sed -i "s/max_batch_size:.*/max_batch_size: 1/" models/simple_${i}_float32_float32_float32/config.pbtxt
+done
+
+SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit \
+             --exit-timeout-secs=5 --strict-model-config=false
+             --strict-readiness=false"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+rm -f $CLIENT_LOG
+set +e
+python $LC_TEST LifeCycleTest.test_model_control_ensemble >>$CLIENT_LOG 2>&1
+check_unit_test
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_multiple_model_repository_control_startup_models
+rm -fr models models_0 config.pbtxt.*
+mkdir models models_0
+# Ensemble models in the second repository
+for i in plan onnx ; do
+    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
+    cp -r $DATADIR/qa_ensemble_model_repository/qa_model_repository/simple_${i}_float32_float32_float32 models_0/.
+    sed -i "s/max_batch_size:.*/max_batch_size: 1/" models/${i}_float32_float32_float32/config.pbtxt
+    sed -i "s/max_batch_size:.*/max_batch_size: 1/" models_0/simple_${i}_float32_float32_float32/config.pbtxt
+done
+
+# savedmodel doesn't load because it is duplicated in 2 repositories
+for i in savedmodel ; do
+    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
+    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models_0/.
+done
+
+SERVER_ARGS="--model-repository=`pwd`/models --model-repository=`pwd`/models_0 \
+             --model-control-mode=explicit \
+             --strict-readiness=false \
+             --strict-model-config=false --exit-on-error=false \
+             --load-model=savedmodel_float32_float32_float32 \
+             --load-model=plan_float32_float32_float32 \
+             --load-model=simple_onnx_float32_float32_float32"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+rm -f $CLIENT_LOG
+set +e
+python $LC_TEST LifeCycleTest.test_multiple_model_repository_control_startup_models >>$CLIENT_LOG 2>&1
+check_unit_test
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+LOG_IDX=$((LOG_IDX+1))
+
+# Test loading all models on startup in EXPLICIT model control mode, re-use
+# existing LifeCycleTest.test_multiple_model_repository_control_startup_models
+# unit test
+rm -fr models models_0 config.pbtxt.*
+mkdir models models_0
+# Ensemble models in the second repository
+for i in plan onnx ; do
+    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
+    cp -r $DATADIR/qa_ensemble_model_repository/qa_model_repository/simple_${i}_float32_float32_float32 models_0/.
+    sed -i "s/max_batch_size:.*/max_batch_size: 1/" models/${i}_float32_float32_float32/config.pbtxt
+    sed -i "s/max_batch_size:.*/max_batch_size: 1/" models_0/simple_${i}_float32_float32_float32/config.pbtxt
+done
+
+# savedmodel doesn't load because it is duplicated in 2 repositories
+for i in savedmodel ; do
+    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
+    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models_0/.
+done
+
+SERVER_ARGS="--model-repository=`pwd`/models --model-repository=`pwd`/models_0 \
+             --model-control-mode=explicit \
+             --strict-readiness=false \
+             --strict-model-config=false --exit-on-error=false \
+             --load-model=*"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+rm -f $CLIENT_LOG
+set +e
+python $LC_TEST LifeCycleTest.test_multiple_model_repository_control_startup_models >>$CLIENT_LOG 2>&1
+check_unit_test
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+LOG_IDX=$((LOG_IDX+1))
+
+# Test loading all models on startup in EXPLICIT model control mode AND
+# an additional --load-model argument, it should fail
+rm -fr models
+mkdir models
+for i in onnx ; do
+    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
+    sed -i "s/max_batch_size:.*/max_batch_size: 1/" models/${i}_float32_float32_float32/config.pbtxt
+done
+
+# --load-model=* can not be used with any other --load-model arguments
+# as it's unclear what the user's intentions are.
+SERVER_ARGS="--model-repository=`pwd`/models --model-repository=`pwd`/models_0 \
+             --model-control-mode=explicit \
+             --strict-readiness=true \
+             --exit-on-error=true \
+             --load-model=* \
+             --load-model=onnx_float32_float32_float32"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" != "0" ]; then
+    echo -e "\n***\n*** Failed: $SERVER started successfully when it was expected to fail\n***"
+    cat $SERVER_LOG
+    RET=1
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+fi
+
+LOG_IDX=$((LOG_IDX+1))
+
+# Test loading a startup model that doesn't exist, it should fail
+rm -fr models && mkdir models
+INVALID_MODEL="does-not-exist"
+SERVER_ARGS="--model-repository=`pwd`/models \
+             --model-control-mode=explicit \
+             --strict-readiness=true \
+             --exit-on-error=true \
+             --load-model=${INVALID_MODEL}"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" != "0" ]; then
+    echo -e "\n***\n*** Failed: $SERVER started successfully when it was expected to fail\n***"
+    echo -e "ERROR: Startup model [${INVALID_MODEL}] should have failed to load."
+    cat $SERVER_LOG
+    RET=1
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+fi
+# check server log for the error messages to make sure they're printed
+if [ `grep -c "model not found in any model repository" $SERVER_LOG` == "0" ]; then
+    echo -e "\n***\n*** Server log ${SERVER_LOG} did not print model load failure for non-existent model\n***"
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_model_repository_index
+rm -fr models models_0 config.pbtxt.*
+mkdir models models_0
+# Ensemble models in the second repository
+for i in graphdef savedmodel ; do
+    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
+    cp -r $DATADIR/qa_ensemble_model_repository/qa_model_repository/simple_${i}_float32_float32_float32 models_0/.
+done
+
+# onnx doesn't load because it is duplicated in 2 repositories
+for i in onnx ; do
+    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
+    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models_0/.
+done
+
+SERVER_ARGS="--model-repository=`pwd`/models --model-repository=`pwd`/models_0 \
+             --model-control-mode=explicit \
+             --strict-readiness=false \
+             --strict-model-config=false --exit-on-error=false \
+             --load-model=onnx_float32_float32_float32 \
+             --load-model=graphdef_float32_float32_float32 \
+             --load-model=simple_savedmodel_float32_float32_float32"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+rm -f $CLIENT_LOG
+set +e
+python $LC_TEST LifeCycleTest.test_model_repository_index >>$CLIENT_LOG 2>&1
+check_unit_test
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_model_availability_on_reload
+for protocol in grpc http; do
+    if [[ $protocol == "grpc" ]]; then
+       export TRITONSERVER_USE_GRPC=1
+    fi
+    rm -fr models config.pbtxt.*
+    mkdir models
+    cp -r identity_zero_1_int32 models/. && mkdir -p models/identity_zero_1_int32/1
+
+    SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit \
+                 --exit-timeout-secs=5 --strict-model-config=false \
+                 --load-model=identity_zero_1_int32 \
+                 --strict-readiness=false"
+    SERVER_LOG="./inference_server_$LOG_IDX.log"
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    rm -f $CLIENT_LOG
+    set +e
+    python $LC_TEST LifeCycleTest.test_model_availability_on_reload >>$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    else
+        check_test_results $TEST_RESULT_FILE 1
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Result Verification Failed\n***"
+            RET=1
+        fi
+    fi
+    set -e
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+
+    unset TRITONSERVER_USE_GRPC
+
+    LOG_IDX=$((LOG_IDX+1))
+done
+
+# LifeCycleTest.test_model_availability_on_reload_2
+for protocol in grpc http; do
+    if [[ $protocol == "grpc" ]]; then
+       export TRITONSERVER_USE_GRPC=1
+    fi
+    rm -fr models config.pbtxt.*
+    mkdir models
+    cp -r identity_zero_1_int32 models/. \
+        && mkdir -p models/identity_zero_1_int32/1 \
+        && mkdir -p models/identity_zero_1_int32/2
+    echo "version_policy: { specific { versions: [1] }}" >> models/identity_zero_1_int32/config.pbtxt
+    cp identity_zero_1_int32/config.pbtxt config.pbtxt.v2
+    echo "version_policy: { specific { versions: [2] }}" >> config.pbtxt.v2
+
+    SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit \
+                 --exit-timeout-secs=5 --strict-model-config=false \
+                 --load-model=identity_zero_1_int32 \
+                 --strict-readiness=false"
+    SERVER_LOG="./inference_server_$LOG_IDX.log"
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    rm -f $CLIENT_LOG
+    set +e
+    python $LC_TEST LifeCycleTest.test_model_availability_on_reload_2 >>$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+            cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    else
+        check_test_results $TEST_RESULT_FILE 1
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Result Verification Failed\n***"
+            RET=1
+        fi
+    fi
+    set -e
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+
+    unset TRITONSERVER_USE_GRPC
+
+    LOG_IDX=$((LOG_IDX+1))
+done
+
+# LifeCycleTest.test_model_availability_on_reload_3
+for protocol in grpc http; do
+    if [[ $protocol == "grpc" ]]; then
+       export TRITONSERVER_USE_GRPC=1
+    fi
+    rm -fr models config.pbtxt.*
+    mkdir models
+    cp -r identity_zero_1_int32 models/. \
+        && mkdir -p models/identity_zero_1_int32/1 \
+        && mkdir -p models/identity_zero_1_int32/2
+    echo "version_policy: { specific { versions: [1] }}" >> models/identity_zero_1_int32/config.pbtxt
+    cp models/identity_zero_1_int32/config.pbtxt config.pbtxt.new
+
+    SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit \
+                 --exit-timeout-secs=5 --strict-model-config=false \
+                 --load-model=identity_zero_1_int32 \
+                 --strict-readiness=false"
+    SERVER_LOG="./inference_server_$LOG_IDX.log"
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    rm -f $CLIENT_LOG
+    set +e
+    python $LC_TEST LifeCycleTest.test_model_availability_on_reload_3 >>$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    else
+        check_test_results $TEST_RESULT_FILE 1
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Result Verification Failed\n***"
+            RET=1
+        fi
+    fi
+    set -e
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+
+    unset TRITONSERVER_USE_GRPC
+
+    LOG_IDX=$((LOG_IDX+1))
+done
+
+# LifeCycleTest.test_model_reload_fail
+rm -fr models config.pbtxt.*
+mkdir models
+cp -r identity_zero_1_int32 models/. && \
+    mkdir -p models/identity_zero_1_int32/1 && \
+    cp libtriton_identity.so models/identity_zero_1_int32/1/. && \
+    mkdir -p models/identity_zero_1_int32/2 && \
+    cp libtriton_identity.so models/identity_zero_1_int32/2/.
+echo "version_policy: { specific { versions: [1] }}" >> models/identity_zero_1_int32/config.pbtxt
+cp identity_zero_1_int32/config.pbtxt config.pbtxt.v2.gpu && \
+    echo "version_policy: { specific { versions: [2] }}" >> config.pbtxt.v2.gpu && \
+    sed -i "s/KIND_CPU/KIND_GPU/" config.pbtxt.v2.gpu
+
+SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit \
+             --exit-timeout-secs=5 --strict-model-config=false \
+             --load-model=identity_zero_1_int32 \
+             --strict-readiness=false"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+rm -f $CLIENT_LOG
+set +e
+python $LC_TEST LifeCycleTest.test_model_reload_fail >>$CLIENT_LOG 2>&1
+check_unit_test
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# check server log for the warning messages
+if [ `grep -c "failed to load 'identity_zero_1_int32' version 2: Internal: GPU instances not supported" $SERVER_LOG` == "0" ]; then
+    echo -e "\n***\n*** Server log ${SERVER_LOG} did not print model load failure\n***"
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_load_same_model_different_platform
+for protocol in grpc http; do
+    if [[ $protocol == "grpc" ]]; then
+       export TRITONSERVER_USE_GRPC=1
+    fi
+
+    # The OS file system is more granular when determining modification time,
+    # the modification timestamp is updated when the file content is changed in
+    # place, but not updated when the file is copied or moved. With Triton, any
+    # operation that changes a file is a modification. Thus, preparing the
+    # models backward will test when a replacement model is having an earlier or
+    # equal modification timestamp than the current model, Triton must still
+    # detect the model is modified and proceed with model reload.
+    for prep_order in normal reverse; do
+        rm -fr models simple_float32_float32_float32
+        mkdir models
+        # Prepare two models of different platforms, but with the same name
+        if [[ $prep_order == "normal" ]]; then
+            # Prepare the TRT model first, then the pytorch model
+            cp -r $DATADIR/qa_model_repository/plan_float32_float32_float32 models/simple_float32_float32_float32
+            sed -i "s/plan_float32_float32_float32/simple_float32_float32_float32/" models/simple_float32_float32_float32/config.pbtxt
+            cp -r $DATADIR/qa_model_repository/libtorch_float32_float32_float32 simple_float32_float32_float32
+            sed -i "s/libtorch_float32_float32_float32/simple_float32_float32_float32/" simple_float32_float32_float32/config.pbtxt
+        else
+            # Prepare the pytorch model first, then the TRT model
+            cp -r $DATADIR/qa_model_repository/libtorch_float32_float32_float32 simple_float32_float32_float32
+            sed -i "s/libtorch_float32_float32_float32/simple_float32_float32_float32/" simple_float32_float32_float32/config.pbtxt
+            cp -r $DATADIR/qa_model_repository/plan_float32_float32_float32 models/simple_float32_float32_float32
+            sed -i "s/plan_float32_float32_float32/simple_float32_float32_float32/" models/simple_float32_float32_float32/config.pbtxt
+        fi
+
+        SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit \
+                    --load-model=simple_float32_float32_float32 \
+                    --exit-timeout-secs=5"
+        SERVER_LOG="./inference_server_$LOG_IDX.log"
+        run_server
+        if [ "$SERVER_PID" == "0" ]; then
+            echo -e "\n***\n*** Failed to start $SERVER\n***"
+            cat $SERVER_LOG
+            exit 1
+        fi
+
+        rm -f $CLIENT_LOG
+        set +e
+        python $LC_TEST LifeCycleTest.test_load_same_model_different_platform >>$CLIENT_LOG 2>&1
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Failed\n***"
+            RET=1
+        else
+            check_test_results $TEST_RESULT_FILE 1
+            if [ $? -ne 0 ]; then
+                cat $CLIENT_LOG
+                echo -e "\n***\n*** Test Result Verification Failed\n***"
+                RET=1
+            fi
+        fi
+        set -e
+
+        kill $SERVER_PID
+        wait $SERVER_PID
+
+        LOG_IDX=$((LOG_IDX+1))
+    done
+
+    unset TRITONSERVER_USE_GRPC
+done
+
+# Send HTTP request to control endpoint
+rm -fr models config.pbtxt.*
+mkdir models
+for i in graphdef savedmodel onnx plan ; do
+    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
+done
+
+# Polling enabled (default), control API should not work
+# This test also keeps using "--model-store" to ensure backward compatibility
+SERVER_ARGS="--model-store=`pwd`/models --repository-poll-secs=0 \
+             --exit-timeout-secs=5 --strict-model-config=false \
+             --model-control-mode=poll"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+# unload API should return bad request
+set +e
+code=`curl -s -w %{http_code} -o ./curl.out -X POST localhost:8000/v2/repository/models/graphdef_float32_float32_float32/unload`
+set -e
+if [ "$code" == "200" ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+# the model should be available/ready
+set +e
+code=`curl -s -w %{http_code} localhost:8000/v2/models/graphdef_float32_float32_float32/ready`
+set -e
+if [ "$code" != "200" ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+# remove model file so that if reload is triggered, model will become unavailable
+rm models/graphdef_float32_float32_float32/*/*
+
+# load API should return bad request
+set +e
+code=`curl -s -w %{http_code} -o ./curl.out -X POST localhost:8000/v2/repository/models/graphdef_float32_float32_float32/load`
+set -e
+if [ "$code" == "200" ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+# the model should be available/ready
+set +e
+code=`curl -s -w %{http_code} localhost:8000/v2/models/graphdef_float32_float32_float32/ready`
+set -e
+if [ "$code" != "200" ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+LOG_IDX=$((LOG_IDX+1))
+
+# Send HTTP request to invalid endpoints. This should be replaced by
+# some more comprehensive fuzz attacks.
+rm -fr models
+mkdir models
+for i in graphdef ; do
+    cp -r $DATADIR/qa_model_repository/${i}_int32_int32_int32 models/.
+done
+
+SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=none \
+             --exit-timeout-secs=5"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+code=`curl -s -w %{http_code} -o ./curl.out localhost:8000/notapi/v2`
+set -e
+if [ "$code" != "404" ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+set +e
+code=`curl -s -w %{http_code} -o ./curl.out localhost:8000/v2/notapi`
+set -e
+if [ "$code" != "404" ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+set +e
+code=`curl -s -w %{http_code} -o ./curl.out localhost:8000/v2/models/notapi/foo`
+set -e
+if [ "$code" != "404" ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_config_override
+rm -fr models config.pbtxt.*
+mkdir models
+cp -r $DATADIR/qa_model_repository/onnx_float32_float32_float32 models/.
+# Make only version 2 is valid version directory while config requests 1, 3
+rm models/onnx_float32_float32_float32/1/*
+rm models/onnx_float32_float32_float32/3/*
+
+SERVER_ARGS="--model-repository=`pwd`/models --model-repository=`pwd`/models \
+             --model-control-mode=explicit \
+             --strict-model-config=false"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+rm -f $CLIENT_LOG
+set +e
+python $LC_TEST LifeCycleTest.test_config_override >>$CLIENT_LOG 2>&1
+check_unit_test
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
 
-DATADIR=/data/inferenceserver
+rm -f $CLIENT_LOG
 
-SERVER=/opt/tensorrtserver/bin/trtserver
-source ../common/util.sh
+LOG_IDX=$((LOG_IDX+1))
 
-RET=0
-rm -fr *.log
+# LifeCycleTest.test_file_override
+rm -fr models config.pbtxt.*
+mkdir models
+cp -r $DATADIR/qa_model_repository/onnx_float32_float32_float32 models/.
+# Make only version 2, 3 is valid version directory while config requests 1, 3
+rm -rf models/onnx_float32_float32_float32/1
 
-# LifeCycleTest.test_parse_error_noexit_strict
-SERVER_ARGS="--model-store=$DATADIR/qa_model_repository --strict-readiness=true --exit-on-error=false --platform-config-file=/tmp/dhweiu"
-SERVER_LOG="./inference_server_0.log"
-run_server_nowait
+# Start with EXPLICIT mode and load onnx_float32_float32_float32
+SERVER_ARGS="--model-repository=`pwd`/models \
+             --model-control-mode=explicit \
+             --load-model=onnx_float32_float32_float32 \
+             --strict-model-config=false"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+rm -f $CLIENT_LOG
+set +e
+python $LC_TEST LifeCycleTest.test_file_override >>$CLIENT_LOG 2>&1
+check_unit_test
+python $LC_TEST LifeCycleTest.test_file_override_security >>$CLIENT_LOG 2>&1
+check_unit_test
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+rm -f $CLIENT_LOG
+
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_shutdown_dynamic
+rm -fr models config.pbtxt.*
+mkdir models
+cp -r ../custom_models/custom_zero_1_float32 models/. && \
+    mkdir -p models/custom_zero_1_float32/1 && \
+    (cd models/custom_zero_1_float32 && \
+        echo "dynamic_batching {}" >> config.pbtxt
+        echo "parameters [" >> config.pbtxt && \
+        echo "{ key: \"execute_delay_ms\"; value: { string_value: \"5000\" }}" >> config.pbtxt && \
+        echo "]" >> config.pbtxt)
+
+SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+# Server will be shutdown in test script, need to make PID available in script
+SERVER_PID=$SERVER_PID python $LC_TEST LifeCycleTest.test_shutdown_dynamic >>$CLIENT_LOG 2>&1
+check_unit_test
+set -e
+
+# check server log
+if [ `grep -c "Model 'custom_zero_1_float32' (version 1) has 1 in-flight inferences" $SERVER_LOG` == "0" ]; then
+    echo -e "\n***\n*** Expect logging for model and in-flight inference count\n***"
+    RET=1
+fi
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+rm -f $CLIENT_LOG
+
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_shutdown_sequence
+rm -fr models config.pbtxt.*
+mkdir models
+cp -r ../custom_models/custom_sequence_int32 models/. && \
+    mkdir -p models/custom_sequence_int32/1
+
+SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
 if [ "$SERVER_PID" == "0" ]; then
     echo -e "\n***\n*** Failed to start $SERVER\n***"
     cat $SERVER_LOG
     exit 1
 fi
-sleep 5
+
+set +e
+# Server will be shutdown in test script, need to make PID available in script
+SERVER_PID=$SERVER_PID python $LC_TEST LifeCycleTest.test_shutdown_sequence >>$CLIENT_LOG 2>&1
+check_unit_test
+set -e
+
+# check server log
+if [ `grep -c "Model 'custom_sequence_int32' (version 1) has 2 in-flight inferences" $SERVER_LOG` == "0" ]; then
+    echo -e "\n***\n*** Expect logging for model having 2 in-flight inferences\n***"
+    RET=1
+fi
+if [ `grep -c "Model 'custom_sequence_int32' (version 1) has 1 in-flight inferences" $SERVER_LOG` == "0" ]; then
+    echo -e "\n***\n*** Expect logging for model having 1 in-flight inference\n***"
+    RET=1
+fi
+
+kill $SERVER_PID
+wait $SERVER_PID
 
 rm -f $CLIENT_LOG
+
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_shutdown_ensemble
+rm -fr models config.pbtxt.*
+mkdir models
+cp -r ensemble_zero_1_float32 models/. && \
+    mkdir -p models/ensemble_zero_1_float32/1
+cp -r ../custom_models/custom_zero_1_float32 models/. && \
+    mkdir -p models/custom_zero_1_float32/1 && \
+    (cd models/custom_zero_1_float32 && \
+        echo "dynamic_batching {}" >> config.pbtxt
+        echo "parameters [" >> config.pbtxt && \
+        echo "{ key: \"execute_delay_ms\"; value: { string_value: \"5000\" }}" >> config.pbtxt && \
+        echo "]" >> config.pbtxt)
+
+SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+# Server will be shutdown in test script, need to make PID available in script
+SERVER_PID=$SERVER_PID python $LC_TEST LifeCycleTest.test_shutdown_ensemble >>$CLIENT_LOG 2>&1
+check_unit_test
+set -e
+
+# check server log
+if [ `grep -c "Model 'ensemble_zero_1_float32' (version 1) has 1 in-flight inferences" $SERVER_LOG` == "0" ]; then
+    echo -e "\n***\n*** Expect logging for model and in-flight inference count\n***"
+    RET=1
+fi
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_load_gpu_limit
+# dependency of the Python model to be used
+pip install cuda-python
+rm -fr models config.pbtxt.*
+mkdir models
+cp -r ../python_models/cuda_memory_consumer models/cuda_memory_consumer_1 && \
+    cp -r ../python_models/cuda_memory_consumer models/cuda_memory_consumer_2
+
+# Negative testing
+SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit --model-load-gpu-limit -1:0.6"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" != "0" ]; then
+    echo -e "\n***\n*** unexpected start $SERVER\n***"
+    cat $SERVER_LOG
+    RET=1
+    kill $SERVER_PID
+    wait $SERVER_PID
+elif [ `grep -c "expects device ID >= 0, got -1" $SERVER_LOG` == "0" ]; then
+    echo -e "\n***\n*** Expect error on invalid device\n***"
+    RET=1
+fi
+
+LOG_IDX=$((LOG_IDX+1))
+
+SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit --model-load-gpu-limit 0:-0.4"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" != "0" ]; then
+    echo -e "\n***\n*** unexpected start $SERVER\n***"
+    cat $SERVER_LOG
+    RET=1
+    kill $SERVER_PID
+    wait $SERVER_PID
+elif [ `grep -c "expects limit fraction to be in range \[0.0, 1.0\], got -0.4" $SERVER_LOG` == "0" ]; then
+    echo -e "\n***\n*** Expect error on invalid fraction\n***"
+    RET=1
+fi
+
+LOG_IDX=$((LOG_IDX+1))
+
+# Run server to stop model loading if > 60% of GPU 0 memory is used
+SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit --model-load-gpu-limit 0:0.6"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python $LC_TEST LifeCycleTest.test_load_gpu_limit >>$CLIENT_LOG 2>&1
+check_unit_test
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_concurrent_model_load_speedup
+rm -rf models
+mkdir models
+MODEL_NAME="identity_zero_1_int32"
+cp -r ${MODEL_NAME} models && mkdir -p models/${MODEL_NAME}/1
+cp -r models/${MODEL_NAME} models/${MODEL_NAME}_1 && \
+    sed -i "s/${MODEL_NAME}/${MODEL_NAME}_1/" models/${MODEL_NAME}_1/config.pbtxt
+mv models/${MODEL_NAME} models/${MODEL_NAME}_2 && \
+    sed -i "s/${MODEL_NAME}/${MODEL_NAME}_2/" models/${MODEL_NAME}_2/config.pbtxt
+MODEL_NAME="identity_fp32"
+cp -r ../python_models/${MODEL_NAME} models && (cd models/${MODEL_NAME} && \
+    mkdir 1 && mv model.py 1 && \
+    echo "    def initialize(self, args):" >> 1/model.py && \
+    echo "        import time" >> 1/model.py && \
+    echo "        time.sleep(10)" >> 1/model.py)
+cp -r models/${MODEL_NAME} models/python_${MODEL_NAME}_1 && \
+    sed -i "s/${MODEL_NAME}/python_${MODEL_NAME}_1/" models/python_${MODEL_NAME}_1/config.pbtxt
+mv models/${MODEL_NAME} models/python_${MODEL_NAME}_2 && \
+    sed -i "s/${MODEL_NAME}/python_${MODEL_NAME}_2/" models/python_${MODEL_NAME}_2/config.pbtxt
+
+SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
 set +e
-python $LC_TEST LifeCycleTest.test_parse_error_noexit_strict >>$CLIENT_LOG 2>&1
+python $LC_TEST LifeCycleTest.test_concurrent_model_load_speedup >>$CLIENT_LOG 2>&1
 if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
     echo -e "\n***\n*** Test Failed\n***"
     RET=1
 fi
@@ -59,21 +1762,33 @@ set -e
 kill $SERVER_PID
 wait $SERVER_PID
 
-# LifeCycleTest.test_parse_error_noexit
-SERVER_ARGS="--model-store=$DATADIR/qa_model_repository --strict-readiness=false --exit-on-error=false --platform-config-file=/tmp/dhweiu"
-SERVER_LOG="./inference_server_1.log"
-run_server_nowait
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_concurrent_model_load
+rm -rf models models_v1 models_v2
+mkdir models models_v2
+cp -r identity_zero_1_int32 models/identity_model && \
+    (cd models/identity_model && \
+        mkdir 1 && \
+        sed -i "s/identity_zero_1_int32/identity_model/" config.pbtxt)
+cp -r ../python_models/identity_fp32 models_v2/identity_model && \
+    (cd models_v2/identity_model && \
+        mkdir 1 && mv model.py 1 && \
+        sed -i "s/identity_fp32/identity_model/" config.pbtxt)
+
+SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
 if [ "$SERVER_PID" == "0" ]; then
     echo -e "\n***\n*** Failed to start $SERVER\n***"
     cat $SERVER_LOG
     exit 1
 fi
-sleep 5
 
-rm -f $CLIENT_LOG
 set +e
-python $LC_TEST LifeCycleTest.test_parse_error_noexit >>$CLIENT_LOG 2>&1
+python $LC_TEST LifeCycleTest.test_concurrent_model_load >>$CLIENT_LOG 2>&1
 if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
     echo -e "\n***\n*** Test Failed\n***"
     RET=1
 fi
@@ -82,47 +1797,89 @@ set -e
 kill $SERVER_PID
 wait $SERVER_PID
 
-# LifeCycleTest.test_parse_error_modelfail
-rm -fr models
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_concurrent_model_load_unload
+rm -rf models
 mkdir models
-for i in graphdef savedmodel netdef plan ; do
-    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
-done
-rm models/graphdef_float32_float32_float32/*/*
+cp -r identity_zero_1_int32 models && mkdir -p models/identity_zero_1_int32/1
+cp -r ensemble_zero_1_float32 models && mkdir -p models/ensemble_zero_1_float32/1
+cp -r ../custom_models/custom_zero_1_float32 models/. && \
+    mkdir -p models/custom_zero_1_float32/1 && \
+    (cd models/custom_zero_1_float32 && \
+        echo "parameters [" >> config.pbtxt && \
+        echo "{ key: \"creation_delay_sec\"; value: { string_value: \"10\" }}" >> config.pbtxt && \
+        echo "]" >> config.pbtxt)
 
-SERVER_ARGS="--model-store=`pwd`/models --exit-on-error=false --exit-timeout-secs=5"
-SERVER_LOG="./inference_server_2.log"
-run_server_tolive
+SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
 if [ "$SERVER_PID" == "0" ]; then
     echo -e "\n***\n*** Failed to start $SERVER\n***"
     cat $SERVER_LOG
     exit 1
 fi
 
-# give plenty of time for model to load (and fail to load)
-sleep 10
+set +e
+python $LC_TEST LifeCycleTest.test_concurrent_model_load_unload >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_concurrent_same_model_load_unload_stress
+rm -rf models
+mkdir models
+cp -r identity_zero_1_int32 models && \
+    (cd models/identity_zero_1_int32 && \
+        mkdir 1 && \
+        sed -i "s/string_value: \"10\"/string_value: \"0\"/" config.pbtxt)
+
+SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit --model-load-thread-count=32 --log-verbose=2"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
 
 set +e
-python $LC_TEST LifeCycleTest.test_parse_error_modelfail >>$CLIENT_LOG 2>&1
+python $LC_TEST LifeCycleTest.test_concurrent_same_model_load_unload_stress >>$CLIENT_LOG 2>&1
 if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
     echo -e "\n***\n*** Test Failed\n***"
     RET=1
+else
+    cat ./test_concurrent_same_model_load_unload_stress.statistics.log
 fi
 set -e
 
 kill $SERVER_PID
 wait $SERVER_PID
 
-# LifeCycleTest.test_dynamic_model_load_unload
-rm -fr models savedmodel_float32_float32_float32
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_concurrent_model_instance_load_speedup
+rm -rf models
 mkdir models
-for i in graphdef netdef plan ; do
-    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
-done
-cp -r $DATADIR/qa_model_repository/savedmodel_float32_float32_float32 .
+MODEL_NAME="identity_fp32"
+cp -r ../python_models/${MODEL_NAME} models/ && (cd models/${MODEL_NAME} && \
+    mkdir 1 && mv model.py 1 && \
+    echo "    def initialize(self, args):" >> 1/model.py && \
+    echo "        import time" >> 1/model.py && \
+    echo "        time.sleep(10)" >> 1/model.py)
+rm models/${MODEL_NAME}/config.pbtxt
 
-SERVER_ARGS="--model-store=`pwd`/models --repository-poll-secs=1 --exit-timeout-secs=5"
-SERVER_LOG="./inference_server_3.log"
+SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
 run_server
 if [ "$SERVER_PID" == "0" ]; then
     echo -e "\n***\n*** Failed to start $SERVER\n***"
@@ -131,8 +1888,9 @@ if [ "$SERVER_PID" == "0" ]; then
 fi
 
 set +e
-python $LC_TEST LifeCycleTest.test_dynamic_model_load_unload >>$CLIENT_LOG 2>&1
+python $LC_TEST LifeCycleTest.test_concurrent_model_instance_load_speedup >>$CLIENT_LOG 2>&1
 if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
     echo -e "\n***\n*** Test Failed\n***"
     RET=1
 fi
@@ -141,17 +1899,28 @@ set -e
 kill $SERVER_PID
 wait $SERVER_PID
 
-# LifeCycleTest.test_dynamic_model_load_unload_disabled
-rm -fr models savedmodel_float32_float32_float32
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_concurrent_model_instance_load_sanity
+rm -rf models
 mkdir models
-for i in graphdef netdef plan ; do
-    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
+# Sanity check loading multiple instances in parallel for each supported backend
+PARALLEL_BACKENDS="python onnx"
+for backend in ${PARALLEL_BACKENDS} ; do
+    model="${backend}_float32_float32_float32"
+    model_dir="models/${model}"
+    if [[ $backend == "python" ]]; then
+      cp -r ../python_models/identity_fp32 ${model_dir}
+      mkdir ${model_dir}/1 && mv ${model_dir}/model.py ${model_dir}/1
+      rm ${model_dir}/config.pbtxt
+    else
+      mkdir models/${model}
+      cp -r $DATADIR/qa_model_repository/${model}/1 models/${model}/1
+    fi
 done
-cp -r $DATADIR/qa_model_repository/savedmodel_float32_float32_float32 .
 
-SERVER_ARGS="--model-store=`pwd`/models --allow-poll-model-repository=false \
-             --repository-poll-secs=1 --exit-timeout-secs=5"
-SERVER_LOG="./inference_server_4.log"
+SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit --log-verbose=2"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
 run_server
 if [ "$SERVER_PID" == "0" ]; then
     echo -e "\n***\n*** Failed to start $SERVER\n***"
@@ -160,8 +1929,9 @@ if [ "$SERVER_PID" == "0" ]; then
 fi
 
 set +e
-python $LC_TEST LifeCycleTest.test_dynamic_model_load_unload_disabled >>$CLIENT_LOG 2>&1
+PARALLEL_BACKENDS=${PARALLEL_BACKENDS} python $LC_TEST LifeCycleTest.test_concurrent_model_instance_load_sanity >>$CLIENT_LOG 2>&1
 if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
     echo -e "\n***\n*** Test Failed\n***"
     RET=1
 fi
@@ -170,15 +1940,35 @@ set -e
 kill $SERVER_PID
 wait $SERVER_PID
 
-# LifeCycleTest.test_dynamic_version_load_unload
-rm -fr models
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_load_retry
+rm -fr models config.pbtxt.*
 mkdir models
-for i in graphdef ; do
-    cp -r $DATADIR/qa_model_repository/${i}_int32_int32_int32 models/.
-done
+cp -r retry_model models/.
+
+# Start without retry and the server should fail to start
+SERVER_ARGS="--model-repository=`pwd`/models \
+             --model-control-mode=none"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" != "0" ]; then
+    echo -e "\n***\n*** Failed: $SERVER started successfully when it was expected to fail\n***"
+    cat $SERVER_LOG
+    RET=1
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+fi
 
-SERVER_ARGS="--model-store=`pwd`/models --repository-poll-secs=1 --exit-timeout-secs=5"
-SERVER_LOG="./inference_server_5.log"
+rm -fr models config.pbtxt.*
+mkdir models
+cp -r retry_model models/.
+
+SERVER_ARGS="--model-repository=`pwd`/models \
+             --model-control-mode=none \
+             --model-load-retry-count=1"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
 run_server
 if [ "$SERVER_PID" == "0" ]; then
     echo -e "\n***\n*** Failed to start $SERVER\n***"
@@ -186,9 +1976,44 @@ if [ "$SERVER_PID" == "0" ]; then
     exit 1
 fi
 
+# the model should be available/ready
 set +e
-python $LC_TEST LifeCycleTest.test_dynamic_version_load_unload >>$CLIENT_LOG 2>&1
+code=`curl -s -w %{http_code} localhost:8000/v2/models/retry_model/ready`
+set -e
+if [ "$code" != "200" ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_model_config_overwrite
+rm -rf models
+mkdir models
+MODEL_NAME="identity_fp32"
+cp -r ../python_models/${MODEL_NAME} models/ && (cd models/${MODEL_NAME} && \
+    mkdir 1 && mv model.py 1 && \
+    echo "    def initialize(self, args):" >> 1/model.py && \
+    echo "        import time" >> 1/model.py && \
+    echo "        time.sleep(5)" >> 1/model.py)
+rm models/${MODEL_NAME}/config.pbtxt
+
+SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit --load-model ${MODEL_NAME}"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python $LC_TEST LifeCycleTest.test_model_config_overwite >>$CLIENT_LOG 2>&1
 if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
     echo -e "\n***\n*** Test Failed\n***"
     RET=1
 fi
@@ -197,16 +2022,20 @@ set -e
 kill $SERVER_PID
 wait $SERVER_PID
 
-# LifeCycleTest.test_dynamic_version_load_unload_disabled
-rm -fr models
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_shutdown_while_background_unloading
+rm -rf models
 mkdir models
-for i in graphdef ; do
-    cp -r $DATADIR/qa_model_repository/${i}_int32_int32_int32 models/.
-done
+MODEL_NAME="identity_fp32"
+cp -r ../python_models/${MODEL_NAME} models/ && (cd models/${MODEL_NAME} && \
+    mkdir 1 && mv model.py 1 && \
+    echo "    def finalize(self):" >> 1/model.py && \
+    echo "        import time" >> 1/model.py && \
+    echo "        time.sleep(10)" >> 1/model.py)
 
-SERVER_ARGS="--model-store=`pwd`/models --repository-poll-secs=1 \
-             --allow-poll-model-repository=false --exit-timeout-secs=5"
-SERVER_LOG="./inference_server_6.log"
+SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit --load-model ${MODEL_NAME} --log-verbose=2"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
 run_server
 if [ "$SERVER_PID" == "0" ]; then
     echo -e "\n***\n*** Failed to start $SERVER\n***"
@@ -215,8 +2044,9 @@ if [ "$SERVER_PID" == "0" ]; then
 fi
 
 set +e
-python $LC_TEST LifeCycleTest.test_dynamic_version_load_unload_disabled >>$CLIENT_LOG 2>&1
+python $LC_TEST LifeCycleTest.test_shutdown_while_background_unloading >>$CLIENT_LOG 2>&1
 if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
     echo -e "\n***\n*** Test Failed\n***"
     RET=1
 fi
@@ -225,17 +2055,26 @@ set -e
 kill $SERVER_PID
 wait $SERVER_PID
 
-# LifeCycleTest.test_dynamic_model_modify
-rm -fr models config.pbtxt.*
+NUMBER_OF_MODELS_UNLOADED=`grep -o "successfully unloaded" $SERVER_LOG | wc -l`
+if [ $NUMBER_OF_MODELS_UNLOADED -ne 2 ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Unexpected number of successfully unloaded models\n***"
+    RET=1
+fi
+
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_shutdown_while_loading
+rm -rf models
 mkdir models
-for i in savedmodel plan ; do
-    cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
-    sed '/^version_policy/d' \
-        $DATADIR/qa_model_repository/${i}_float32_float32_float32/config.pbtxt > config.pbtxt.${i}
-done
+cp -r ../python_models/identity_fp32 models/ && (cd models/identity_fp32 && \
+    mkdir 1 && mv model.py 1 && \
+    echo "    def initialize(self, args):" >> 1/model.py && \
+    echo "        import time" >> 1/model.py && \
+    echo "        time.sleep(10)" >> 1/model.py)
 
-SERVER_ARGS="--model-store=`pwd`/models --repository-poll-secs=1 --exit-timeout-secs=5"
-SERVER_LOG="./inference_server_7.log"
+SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit --log-verbose=2"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
 run_server
 if [ "$SERVER_PID" == "0" ]; then
     echo -e "\n***\n*** Failed to start $SERVER\n***"
@@ -244,8 +2083,9 @@ if [ "$SERVER_PID" == "0" ]; then
 fi
 
 set +e
-python $LC_TEST LifeCycleTest.test_dynamic_model_modify >>$CLIENT_LOG 2>&1
+python $LC_TEST LifeCycleTest.test_shutdown_while_loading >>$CLIENT_LOG 2>&1
 if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
     echo -e "\n***\n*** Test Failed\n***"
     RET=1
 fi
@@ -254,20 +2094,47 @@ set -e
 kill $SERVER_PID
 wait $SERVER_PID
 
+ACTUAL_LOAD_UNLOAD_ORDER="`grep -o -e 'AsyncUnload()' -e 'OnLoadFinal()' $SERVER_LOG`"
+EXPECTED_LOAD_UNLOAD_ORDER="`echo -e 'OnLoadFinal()\nAsyncUnload()'`"
+if [ "$ACTUAL_LOAD_UNLOAD_ORDER" != "$EXPECTED_LOAD_UNLOAD_ORDER" ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Failed assert load finish before unload\n***"
+    RET=1
+fi
+
+LOG_IDX=$((LOG_IDX+1))
+
+# LifeCycleTest.test_shutdown_with_live_connection
+rm -rf models
+mkdir models
+cp -r ../python_models/add_sub models/ && (cd models/add_sub && \
+    mkdir 1 && mv model.py 1)
+
+SERVER_ARGS="--model-repository=`pwd`/models"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
 
-# python unittest seems to swallow ImportError and still return 0 exit
-# code. So need to explicitly check CLIENT_LOG to make sure we see
-# some running tests
 set +e
-grep -c "HTTP/1.1 200 OK" $CLIENT_LOG
+SERVER_PID=$SERVER_PID SERVER_LOG=$SERVER_LOG python $LC_TEST LifeCycleTest.test_shutdown_with_live_connection >>$CLIENT_LOG 2>&1
 if [ $? -ne 0 ]; then
     cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed To Run\n***"
+    echo -e "\n***\n*** Test Failed\n***"
     RET=1
 fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
 
 if [ $RET -eq 0 ]; then
   echo -e "\n***\n*** Test Passed\n***"
+else
+  echo -e "\n***\n*** Test Failed\n***"
 fi
 
 exit $RET
diff --git a/qa/L0_logging/logging_endpoint_test.py b/qa/L0_logging/logging_endpoint_test.py
new file mode 100755
index 0000000000..106afba6f1
--- /dev/null
+++ b/qa/L0_logging/logging_endpoint_test.py
@@ -0,0 +1,411 @@
+#!/usr/bin/python
+
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import json
+import sys
+import unittest
+
+import test_util as tu
+import tritonclient.grpc as grpcclient
+import tritonclient.http as httpclient
+from google.protobuf import json_format
+from tritonclient.utils import InferenceServerException
+
+
+# Similar set up as dynamic batcher tests
+class LogEndpointTest(tu.TestResultCollector):
+    def tearDown(self):
+        # Clear all log settings to initial state.
+        # Note that the tearDown function uses HTTP client so the pass/fail
+        # of the HTTP log setting test cases should be checked to make sure
+        # tearDown() is properly executed and not affecting start state of
+        # other test cases
+        clear_settings = {
+            "log_info": True,
+            "log_warning": True,
+            "log_error": True,
+            "log_verbose_level": 0,
+            "log_format": "default",
+        }
+        triton_client = httpclient.InferenceServerClient("localhost:8000")
+        triton_client.update_log_settings(settings=clear_settings)
+
+    def check_server_initial_state(self):
+        # Helper function to make sure the log setting is properly
+        # initialized / reset before actually running the test case.
+        # Note that this function uses HTTP client so the pass/fail of
+        # the HTTP log setting test cases should be checked to make sure
+        # the initial state is checked properly before running other test cases.
+        initial_settings = {
+            "log_file": "",
+            "log_info": True,
+            "log_warning": True,
+            "log_error": True,
+            "log_verbose_level": 0,
+            "log_format": "default",
+        }
+        triton_client = httpclient.InferenceServerClient("localhost:8000")
+        self.assertEqual(initial_settings, triton_client.get_log_settings())
+
+    def test_http_get_settings(self):
+        # Log settings will be the same as default settings since
+        # no update has been made.
+        initial_settings = {
+            "log_file": "",
+            "log_info": True,
+            "log_warning": True,
+            "log_error": True,
+            "log_verbose_level": 0,
+            "log_format": "default",
+        }
+        triton_client = httpclient.InferenceServerClient("localhost:8000")
+        self.assertEqual(
+            initial_settings,
+            triton_client.get_log_settings(),
+            "Unexpected initial log settings",
+        )
+
+    def test_grpc_get_settings(self):
+        # Log settings will be the same as default settings since
+        # no update has been made.
+        initial_settings = grpcclient.service_pb2.LogSettingsResponse()
+        json_format.Parse(
+            json.dumps(
+                {
+                    "settings": {
+                        "log_file": {"stringParam": ""},
+                        "log_info": {"boolParam": True},
+                        "log_warning": {"boolParam": True},
+                        "log_error": {"boolParam": True},
+                        "log_verbose_level": {"uint32Param": 0},
+                        "log_format": {"stringParam": "default"},
+                    }
+                }
+            ),
+            initial_settings,
+        )
+        triton_client = grpcclient.InferenceServerClient("localhost:8001")
+        self.assertEqual(
+            initial_settings,
+            triton_client.get_log_settings(),
+            "Unexpected initial log settings",
+        )
+
+    def test_http_update_settings(self):
+        # Update each possible log configuration
+        # field and check that they are reflected
+        # by the server
+        self.check_server_initial_state()
+
+        log_settings_1 = {
+            "log_file": "log_file.log",
+            "log_info": True,
+            "log_warning": True,
+            "log_error": True,
+            "log_verbose_level": 0,
+            "log_format": "default",
+        }
+        expected_log_settings_1 = {
+            "error": "log file location can not be updated through network protocol"
+        }
+
+        log_settings_2 = {
+            "log_info": False,
+            "log_warning": True,
+            "log_error": True,
+            "log_verbose_level": 0,
+            "log_format": "default",
+        }
+        expected_log_settings_2 = log_settings_2.copy()
+        expected_log_settings_2["log_file"] = ""
+
+        log_settings_3 = {
+            "log_info": False,
+            "log_warning": False,
+            "log_error": True,
+            "log_verbose_level": 0,
+            "log_format": "default",
+        }
+        expected_log_settings_3 = log_settings_3.copy()
+        expected_log_settings_3["log_file"] = ""
+
+        log_settings_4 = {
+            "log_info": False,
+            "log_warning": False,
+            "log_error": False,
+            "log_verbose_level": 0,
+            "log_format": "default",
+        }
+        expected_log_settings_4 = log_settings_4.copy()
+        expected_log_settings_4["log_file"] = ""
+
+        log_settings_5 = {
+            "log_info": False,
+            "log_warning": False,
+            "log_error": False,
+            "log_verbose_level": 1,
+            "log_format": "default",
+        }
+        expected_log_settings_5 = log_settings_5.copy()
+        expected_log_settings_5["log_file"] = ""
+
+        log_settings_6 = {
+            "log_info": False,
+            "log_warning": False,
+            "log_error": False,
+            "log_verbose_level": 1,
+            "log_format": "ISO8601",
+        }
+        expected_log_settings_6 = log_settings_6.copy()
+        expected_log_settings_6["log_file"] = ""
+
+        triton_client = httpclient.InferenceServerClient("localhost:8000")
+        with self.assertRaisesRegex(
+            InferenceServerException, expected_log_settings_1["error"]
+        ) as e:
+            triton_client.update_log_settings(settings=log_settings_1)
+        self.assertEqual(
+            expected_log_settings_2,
+            triton_client.update_log_settings(settings=log_settings_2),
+            "Unexpected updated log settings",
+        )
+        self.assertEqual(
+            expected_log_settings_3,
+            triton_client.update_log_settings(settings=log_settings_3),
+            "Unexpected updated log settings",
+        )
+        self.assertEqual(
+            expected_log_settings_4,
+            triton_client.update_log_settings(settings=log_settings_4),
+            "Unexpected updated log settings",
+        )
+        self.assertEqual(
+            expected_log_settings_5,
+            triton_client.update_log_settings(settings=log_settings_5),
+            "Unexpected updated log settings",
+        )
+        self.assertEqual(
+            expected_log_settings_6,
+            triton_client.update_log_settings(settings=log_settings_6),
+            "Unexpected updated log settings",
+        )
+
+    def test_grpc_update_settings(self):
+        # Update each possible log configuration
+        # field and check that they are reflected
+        # by the server
+        self.check_server_initial_state()
+        triton_client = grpcclient.InferenceServerClient("localhost:8001")
+
+        log_settings_1 = {
+            "log_info": True,
+            "log_warning": True,
+            "log_error": True,
+            "log_verbose_level": 0,
+            "log_format": "default",
+        }
+        expected_log_settings_1 = grpcclient.service_pb2.LogSettingsResponse()
+        json_format.Parse(
+            json.dumps(
+                {
+                    "settings": {
+                        "log_file": {"stringParam": ""},
+                        "log_info": {"boolParam": True},
+                        "log_warning": {"boolParam": True},
+                        "log_error": {"boolParam": True},
+                        "log_verbose_level": {"uint32Param": 0},
+                        "log_format": {"stringParam": "default"},
+                    }
+                }
+            ),
+            expected_log_settings_1,
+        )
+
+        self.assertEqual(
+            expected_log_settings_1,
+            triton_client.update_log_settings(settings=log_settings_1),
+            "Unexpected updated log settings",
+        )
+
+        log_settings_2 = {
+            "log_info": False,
+            "log_warning": True,
+            "log_error": True,
+            "log_verbose_level": 0,
+            "log_format": "default",
+        }
+        expected_log_settings_2 = grpcclient.service_pb2.LogSettingsResponse()
+        json_format.Parse(
+            json.dumps(
+                {
+                    "settings": {
+                        "log_file": {"stringParam": ""},
+                        "log_info": {"boolParam": False},
+                        "log_warning": {"boolParam": True},
+                        "log_error": {"boolParam": True},
+                        "log_verbose_level": {"uint32Param": 0},
+                        "log_format": {"stringParam": "default"},
+                    }
+                }
+            ),
+            expected_log_settings_2,
+        )
+
+        self.assertEqual(
+            expected_log_settings_2,
+            triton_client.update_log_settings(settings=log_settings_2),
+            "Unexpected updated log settings",
+        )
+
+        log_settings_3 = {
+            "log_info": False,
+            "log_warning": False,
+            "log_error": True,
+            "log_verbose_level": 0,
+            "log_format": "default",
+        }
+        expected_log_settings_3 = grpcclient.service_pb2.LogSettingsResponse()
+        json_format.Parse(
+            json.dumps(
+                {
+                    "settings": {
+                        "log_file": {"stringParam": ""},
+                        "log_info": {"boolParam": False},
+                        "log_warning": {"boolParam": False},
+                        "log_error": {"boolParam": True},
+                        "log_verbose_level": {"uint32Param": 0},
+                        "log_format": {"stringParam": "default"},
+                    }
+                }
+            ),
+            expected_log_settings_3,
+        )
+
+        self.assertEqual(
+            expected_log_settings_3,
+            triton_client.update_log_settings(settings=log_settings_3),
+            "Unexpected updated log settings",
+        )
+
+        log_settings_4 = {
+            "log_info": False,
+            "log_warning": False,
+            "log_error": False,
+            "log_verbose_level": 0,
+            "log_format": "default",
+        }
+        expected_log_settings_4 = grpcclient.service_pb2.LogSettingsResponse()
+        json_format.Parse(
+            json.dumps(
+                {
+                    "settings": {
+                        "log_file": {"stringParam": ""},
+                        "log_info": {"boolParam": False},
+                        "log_warning": {"boolParam": False},
+                        "log_error": {"boolParam": False},
+                        "log_verbose_level": {"uint32Param": 0},
+                        "log_format": {"stringParam": "default"},
+                    }
+                }
+            ),
+            expected_log_settings_4,
+        )
+
+        self.assertEqual(
+            expected_log_settings_4,
+            triton_client.update_log_settings(settings=log_settings_4),
+            "Unexpected updated log settings",
+        )
+
+        log_settings_5 = {
+            "log_info": False,
+            "log_warning": False,
+            "log_error": False,
+            "log_verbose_level": 1,
+            "log_format": "default",
+        }
+        expected_log_settings_5 = grpcclient.service_pb2.LogSettingsResponse()
+        json_format.Parse(
+            json.dumps(
+                {
+                    "settings": {
+                        "log_file": {"stringParam": ""},
+                        "log_info": {"boolParam": False},
+                        "log_warning": {"boolParam": False},
+                        "log_error": {"boolParam": False},
+                        "log_verbose_level": {"uint32Param": 1},
+                        "log_format": {"stringParam": "default"},
+                    }
+                }
+            ),
+            expected_log_settings_5,
+        )
+
+        self.assertEqual(
+            expected_log_settings_5,
+            triton_client.update_log_settings(settings=log_settings_5),
+            "Unexpected updated log settings",
+        )
+
+        log_settings_6 = {
+            "log_info": False,
+            "log_warning": False,
+            "log_error": False,
+            "log_verbose_level": 1,
+            "log_format": "ISO8601",
+        }
+        expected_log_settings_6 = grpcclient.service_pb2.LogSettingsResponse()
+        json_format.Parse(
+            json.dumps(
+                {
+                    "settings": {
+                        "log_file": {"stringParam": ""},
+                        "log_info": {"boolParam": False},
+                        "log_warning": {"boolParam": False},
+                        "log_error": {"boolParam": False},
+                        "log_verbose_level": {"uint32Param": 1},
+                        "log_format": {"stringParam": "ISO8601"},
+                    }
+                }
+            ),
+            expected_log_settings_6,
+        )
+
+        self.assertEqual(
+            expected_log_settings_6,
+            triton_client.update_log_settings(settings=log_settings_6),
+            "Unexpected updated log settings",
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_logging/test.sh b/qa/L0_logging/test.sh
new file mode 100755
index 0000000000..e5012d5a80
--- /dev/null
+++ b/qa/L0_logging/test.sh
@@ -0,0 +1,600 @@
+#!/bin/bash
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+SIMPLE_HTTP_CLIENT=../clients/simple_http_infer_client
+SIMPLE_GRPC_CLIENT=../clients/simple_grpc_infer_client
+
+CLIENT_TEST=logging_endpoint_test.py
+CLIENT_LOG="client.log"
+TEST_RESULT_FILE="test_results.txt"
+EXPECTED_NUM_TESTS="4"
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_model_repository
+MODELBASE=onnx_int32_int32_int32
+
+MODELSDIR=`pwd`/log_models
+
+SERVER=/opt/tritonserver/bin/tritonserver
+source ../common/util.sh
+
+rm -f *.log
+rm -fr $MODELSDIR && mkdir -p $MODELSDIR
+
+# set up simple repository MODELBASE
+rm -fr $MODELSDIR && mkdir -p $MODELSDIR && \
+    cp -r $DATADIR/$MODELBASE $MODELSDIR/simple && \
+    rm -r $MODELSDIR/simple/2 && rm -r $MODELSDIR/simple/3 && \
+    (cd $MODELSDIR/simple && \
+            sed -i "s/^name:.*/name: \"simple\"/" config.pbtxt)
+RET=0
+
+function verify_correct_settings () {
+  log_file_expected=$1
+  log_info_expected=$2
+  log_warn_expected=$3
+  log_error_expected=$4
+  log_verbose_expected=$5
+  log_format_expected=$6
+  code=`curl -s -w %{http_code} -o ./curl.out localhost:8000/v2/logging`
+
+  if [ `grep -c "\"log_file\":\"$log_file_expected"\" ./curl.out` != "1" ]; then
+    echo -e "\n***\n*** Test Failed: Incorrect Log File Setting\n***"
+    RET=1
+  fi
+  if [ `grep -c "\"log_info\":$log_info_expected" ./curl.out` != "1" ]; then
+    echo -e "\n***\n*** Test Failed: Incorrect Log Info Setting\n***"
+    RET=1
+  fi
+  if [ `grep -c "\"log_warning\":$log_warn_expected" ./curl.out` != "1" ]; then
+    echo -e "\n***\n*** Test Failed: Incorrect Log Warn Setting\n***"
+    RET=1
+  fi
+  if [ `grep -c "\"log_error\":$log_error_expected" ./curl.out` != "1" ]; then
+    echo -e "\n***\n*** Test Failed: Incorrect Log Error Setting\n***"
+    RET=1
+  fi
+  if [ `grep -c "\"log_verbose_level\":$log_verbose_expected" ./curl.out` != "1" ]; then
+    echo -e "\n***\n*** Test Failed: Incorrect Log Verbose Setting\n***"
+    RET=1
+  fi
+  if [ `grep -c "\"log_format\":\"$log_format_expected\"" ./curl.out` != "1" ]; then
+    echo -e "\n***\n*** Test Failed: Incorrect Log Format Setting\n***"
+    RET=1
+  fi
+}
+
+#Run Default Server
+SERVER_ARGS="--model-repository=$MODELSDIR"
+SERVER_LOG="./server.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+# Check Default Settings
+rm -f ./curl.out
+set +e
+
+# Check if the current settings are returned [ file | info | warn | error | verbosity |format ]
+verify_correct_settings "" "true" "true" "true" "0" "default"
+
+$SIMPLE_HTTP_CLIENT >> client_default.log 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+fi
+
+$SIMPLE_GRPC_CLIENT >> client_default.log 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+fi
+
+# Check log is streaming to console by default
+console_count=($(wc -l ./server.log))
+if [ $console_count -le 30 ]; then
+    echo -e "\n***\n*** Test Failed: Log File Error\n***"
+    RET=1
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Test Log File (Argument)
+SERVER_ARGS="--log-file=log_file.log --model-repository=$MODELSDIR"
+SERVER_LOG="./inference_server_log_file.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+rm -f ./curl.out
+set +e
+
+verify_correct_settings "log_file.log" "true" "true" "true" "0" "default"
+
+$SIMPLE_HTTP_CLIENT >> client_test_log_file.log 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+fi
+
+$SIMPLE_GRPC_CLIENT >> client_test_log_file.log 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+fi
+expected_log_count=19
+actual_log_count=$(grep -c ^[IWEV][0-9][0-9][0-9][0-9].* ./log_file.log)
+if [ $actual_log_count -lt $expected_log_count ]; then
+    echo $actual_log_count
+    echo $expected_log_count
+    echo -e "\n***\n*** Test Failed: Less Log Messages Than Expected $LINENO\n***"
+    RET=1
+fi
+expected_server_count=0
+actual_server_count=$(grep -c ^[IWEV][0-9][0-9][0-9][0-9].* inference_server_log_file.log)
+if [ $actual_server_count -gt $expected_server_count ]; then
+    echo $actual_server_count
+    echo $expected_server_count
+    echo -e "\n***\n*** Test Failed: More Log Messages Than Expected $LINENO\n***"
+    RET=1
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Test Log File (Dynamic)
+rm -f log_file.log
+SERVER_ARGS="--log-file=log_file.log --log-verbose=1 --model-repository=$MODELSDIR"
+SERVER_LOG="./inference_server_log_file.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+rm -f ./curl.out
+code=`curl -s -w %{http_code} -o ./curl.out -d'{"log_file":"other_log.log"}' localhost:8000/v2/logging`
+set +e
+
+# updating log file location no longer supported
+if [ `grep -c "\"error\":\"log file location can not be updated through network protocol\"" ./curl.out` != "1" ]; then
+    echo -e "\n***\n*** Test Failed: Incorrect Error Response\n***"
+    RET=1
+fi
+verify_correct_settings "log_file.log" "true" "true" "true" "1" "default"
+
+$SIMPLE_HTTP_CLIENT >> client_test_log_file.log 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+fi
+
+$SIMPLE_GRPC_CLIENT >> client_test_log_file.log 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+fi
+
+# Check redirection worked properly (server log has tolerance of 40 due to
+# unavoidable onnx framework logging)
+expected_log_count=75
+actual_log_count=$(grep -c ^[IWEV][0-9][0-9][0-9][0-9].* ./log_file.log)
+if [ $actual_log_count -lt $expected_log_count ]; then
+    echo $actual_log_count
+    echo $expected_log_count
+    echo -e "\n***\n*** Test Failed: Less Log Messages Than Expected $LINENO\n***"
+    RET=1
+fi
+expected_other_log_count=31
+actual_other_log_count=$(grep -c ^[IWEV][0-9][0-9][0-9][0-9].* ./log_file.log)
+if [ $actual_other_log_count -lt $expected_other_log_count ]; then
+    echo $actual_other_log_count
+    echo $expected_other_log_count
+    echo -e "\n***\n*** Test Failed: Less Log Messages Than Expected $LINENO\n***"
+    RET=1
+fi
+expected_server_count=0
+actual_server_count=$(grep -c ^[IWEV][0-9][0-9][0-9][0-9].* inference_server_log_file.log)
+if [ $actual_server_count -gt $expected_server_count ]; then
+    echo $actual_server_count
+    echo $expected_server_count
+    echo -e "\n***\n*** Test Failed: More Log Messages Than Expected $LINENO\n***"
+    RET=1
+fi
+
+set -e
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Test Log Info (Argument)
+rm -f log_file.log
+SERVER_ARGS="--log-file=log_file.log --log-info=false --log-verbose=1 --model-repository=$MODELSDIR"
+SERVER_LOG="./inference_server_log_file.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+rm -f ./curl.out
+set +e
+code=`curl -s -w %{http_code} -o ./curl.out localhost:8000/v2/logging`
+
+verify_correct_settings "log_file.log" "false" "true" "true" "1" "default"
+
+$SIMPLE_HTTP_CLIENT >> client_test_log_info.log 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+fi
+
+$SIMPLE_GRPC_CLIENT >> client_test_log_info.log 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+fi
+
+# Test against guaranteed info message
+count=$(grep -c "Started HTTPService at" ./log_file.log)
+if [ $count -gt 0 ]; then
+    echo -e "\n***\n*** Test Failed: Info Message Not Expected $LINENO\n***"
+    RET=1
+fi
+
+set -e
+
+# Test Log Info (Dynamic)
+set +e
+rm -f ./curl.out
+code=`curl -s -w %{http_code} -o ./curl.out -d'{"log_info":true}' localhost:8000/v2/logging`
+
+verify_correct_settings "log_file.log" "true" "true" "true" "1" "default"
+
+$SIMPLE_HTTP_CLIENT >> client_test_log_info.log 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+fi
+
+$SIMPLE_GRPC_CLIENT >> client_test_log_info.log 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+set +e
+# Test against guaranteed info message
+count=$(grep -c "Waiting for in-flight requests to complete" ./log_file.log)
+if [ $count -ne 1 ]; then
+    echo -e "\n***\n*** Test Failed: Info Message Expected $LINENO\n***"
+    RET=1
+fi
+set -e
+
+# Test Log Warning
+SERVER_ARGS="--log-warning=false --model-repository=$MODELSDIR"
+SERVER_LOG="./inference_server_log_file.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+rm -f ./curl.out
+set +e
+code=`curl -s -w %{http_code} -o ./curl.out localhost:8000/v2/logging`
+
+verify_correct_settings "" "true" "false" "true" "0" "default"
+
+$SIMPLE_HTTP_CLIENT >> client_test_log_warning.log 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+fi
+
+$SIMPLE_GRPC_CLIENT >> client_test_log_warning.log 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Test Log Error
+SERVER_ARGS="--log-error=false --model-repository=$MODELSDIR"
+SERVER_LOG="./inference_server_log_file.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+rm -f ./curl.out
+set +e
+code=`curl -s -w %{http_code} -o ./curl.out localhost:8000/v2/logging`
+
+# Check if the current settings are returned [ file | info | warn | error | verbosity |format ]
+verify_correct_settings "" "true" "true" "false" "0" "default"
+
+$SIMPLE_HTTP_CLIENT >> client_test_log_error.log 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+fi
+
+$SIMPLE_GRPC_CLIENT >> client_test_log_error.log 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Test Log Verbose Level (Argument)
+rm -f log_file.log
+SERVER_ARGS="--log-file=log_file.log --log-verbose=1 --model-repository=$MODELSDIR"
+SERVER_LOG="./inference_server_log_file.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+rm -f ./curl.out
+set +e
+code=`curl -s -w %{http_code} -o ./curl.out localhost:8000/v2/logging`
+
+verify_correct_settings "log_file.log" "true" "true" "true" "1" "default"
+
+$SIMPLE_HTTP_CLIENT >> client_test_log_verbose.log 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+fi
+
+$SIMPLE_GRPC_CLIENT >> client_test_log_verbose.log 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+fi
+
+count=$(grep -c "/v2/logging" ./log_file.log)
+if [ $count -ne 2 ]; then
+    echo -e "\n***\n*** Test Failed: Verbose Message Expected $LINENO\n***"
+    RET=1
+fi
+
+code=`curl -s -w %{http_code} -o ./curl.out -d'{"log_verbose_level":0}' localhost:8000/v2/logging`
+verify_correct_settings "log_file.log" "true" "true" "true" "0" "default"
+
+code=`curl -s -w %{http_code} -o ./curl.out localhost:8000/v2/logging`
+count=$(grep -c "/v2/logging" ./log_file.log)
+if [ $count -gt 3 ]; then
+    echo -e "\n***\n*** Test Failed: Too Many Verbose Messages $LINENO\n***"
+    RET=1
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Test Log Format (Argument)
+rm -f log_file.log
+SERVER_ARGS="--log-file=log_file.log --log-verbose=1 --log-format=ISO8601 --model-repository=$MODELSDIR"
+SERVER_LOG="./inference_server_log_file.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+rm -f ./curl.out
+set +e
+code=`curl -s -w %{http_code} -o ./curl.out localhost:8000/v2/logging`
+verify_correct_settings "log_file.log" "true" "true" "true" "1" "ISO8601"
+
+$SIMPLE_HTTP_CLIENT >> client_test_log_format.log 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+fi
+
+$SIMPLE_GRPC_CLIENT >> client_test_log_format.log 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+fi
+
+line=$(head -n 1 log_file.log)
+date=$(date '+%m%d')
+final_date="I${date}"
+format_date=$(echo $line | head -n1 | awk '{print $1;}')
+if [[ $final_date == $format_date ]]; then
+    echo -e "\n***\n*** Test Failed: Unexpected Log Format $LINENO\n***"
+    RET=1
+fi
+
+set -e
+
+# Test Log Format (Dynamic)
+set +e
+rm -f ./curl.out
+code=`curl -s -w %{http_code} -o ./curl.out -d'{"log_format":"default"}' localhost:8000/v2/logging`
+verify_correct_settings "log_file.log" "true" "true" "true" "1" "default"
+
+line=$(tail -n 1 log_file.log)
+date=$(date '+%m%d')
+final_date="I${date}"
+format_date=$(echo $line | head -n1 | awk '{print $1;}')
+if [[ $final_date != $format_date ]]; then
+    echo -e "\n***\n*** Test Failed: Unexpected Log Format $LINENO\n***"
+    RET=1
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+#Test Negative Test Cases
+SERVER_ARGS="--log-warn="false" --model-repository=$MODELSDIR"
+SERVER_LOG="./server.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+BOOL_PARAMS=${BOOL_PARAMS:="log_info log_warning log_error"}
+for BOOL_PARAM in $BOOL_PARAMS; do
+    # Attempt to use integer instead of bool
+    code=`curl -s -w %{http_code} -o ./curl.out -d'{"'"$BOOL_PARAM"'":1}' localhost:8000/v2/logging`
+    if [ "$code" == "200" ]; then
+        echo $code
+        cat ./curl.out
+        echo -e "\n***\n*** Test Failed: Line: $LINENO\n***"
+        RET=1
+    fi
+    # Attempt to use upper-case bool
+    code=`curl -s -w %{http_code} -o ./curl.out -d'{"'"$BOOL_PARAM"'":False}' localhost:8000/v2/logging`
+    if [ "$code" == "200" ]; then
+        cat ./curl.out
+        echo -e "\n***\n*** Test Failed: Line: $LINENO\n***"
+        RET=1
+    fi
+    # Attempt to use string bool
+    code=`curl -s -w %{http_code} -o ./curl.out -d'{"'"$BOOL_PARAM"'":"false"}' localhost:8000/v2/logging`
+    if [ "$code" == "200" ]; then
+        echo $code
+        cat ./curl.out
+        echo -e "\n***\n*** Test Failed: Line: $LINENO\n***"
+        RET=1
+    fi
+    # Positive test case
+    code=`curl -s -w %{http_code} -o ./curl.out -d'{"'"$BOOL_PARAM"'":true}' localhost:8000/v2/logging`
+    if [ "$code" != "200" ]; then
+        cat ./curl.out
+        echo -e "\n***\n*** Test Failed: Line: $LINENO\n***"
+        RET=1
+    fi
+done
+
+code=`curl -s -w %{http_code} -o ./curl.out -d'{"log_verbose_level":-1}' localhost:8000/v2/logging`
+if [ "$code" == "200" ]; then
+    echo $code
+    cat ./curl.out
+    echo -e "\n***\n*** Test Failed: Line: $LINENO\n***"
+    RET=1
+fi
+code=`curl -s -w %{http_code} -o ./curl.out -d'{"log_verbose_level":"1"}' localhost:8000/v2/logging`
+if [ "$code" == "200" ]; then
+    echo $code
+    cat ./curl.out
+    echo -e "\n***\n*** Test Failed: Line: $LINENO\n***"
+    RET=1
+fi
+code=`curl -s -w %{http_code} -o ./curl.out -d'{"log_verbose_level":0}' localhost:8000/v2/logging`
+if [ "$code" != "200" ]; then
+    echo $code
+    cat ./curl.out
+    echo -e "\n***\n*** Test Failed: Line: $LINENO\n***"
+    RET=1
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Test Python client library
+SERVER_ARGS="--model-repository=$MODELSDIR"
+SERVER_LOG="./inference_server_unittest.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+python $CLIENT_TEST >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+
+exit $RET
diff --git a/qa/L0_long_running_stress/crashing_client.py b/qa/L0_long_running_stress/crashing_client.py
new file mode 100755
index 0000000000..d9c727a3d3
--- /dev/null
+++ b/qa/L0_long_running_stress/crashing_client.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python3
+
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import argparse
+import time
+from multiprocessing import Process, shared_memory
+
+import numpy as np
+import test_util as tu
+import tritonclient.grpc as grpcclient
+from tritonclient.utils import np_to_triton_dtype
+
+
+def crashing_client(
+    model_name, dtype, tensor_shape, shm_name, triton_client, input_name="INPUT0"
+):
+    in0 = np.random.random(tensor_shape).astype(dtype)
+    if "libtorch" in model_name:
+        input_name = "INPUT__0"
+    inputs = [
+        grpcclient.InferInput(input_name, tensor_shape, np_to_triton_dtype(dtype)),
+    ]
+    inputs[0].set_data_from_numpy(in0)
+
+    # Run in a loop so that it is guaranteed that
+    # the inference will not have completed when being terminated.
+    while True:
+        existing_shm = shared_memory.SharedMemory(shm_name)
+        count = np.ndarray((1,), dtype=np.int32, buffer=existing_shm.buf)
+        count[0] += 1
+        existing_shm.close()
+        results = triton_client.infer(model_name, inputs)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-t",
+        "--trial",
+        type=str,
+        required=True,
+        help="Set trial for the crashing client",
+    )
+    FLAGS = parser.parse_args()
+    trial = FLAGS.trial
+
+    dtype = np.float32
+    model_name = tu.get_zero_model_name(trial, 1, dtype)
+    tensor_shape = (1,) if "nobatch" in trial else (1, 1)
+
+    triton_client = grpcclient.InferenceServerClient(url="localhost:8001", verbose=True)
+
+    shm = shared_memory.SharedMemory(create=True, size=8)
+    count = np.ndarray((1,), dtype=np.int32, buffer=shm.buf)
+    count[0] = 0
+
+    p = Process(
+        target=crashing_client,
+        name="crashing_client",
+        args=(
+            model_name,
+            dtype,
+            tensor_shape,
+            shm.name,
+            triton_client,
+        ),
+    )
+
+    p.start()
+
+    # Terminate the client after 3 seconds
+    time.sleep(3)
+    p.terminate()
+
+    # Cleanup
+    p.join()
+
+    print("request_count:", count[0])
+
+    shm.close()
+    shm.unlink()
+
+    if not triton_client.is_server_live():
+        sys.exit(1)
+
+    sys.exit(0)
diff --git a/qa/L0_long_running_stress/scenarios.py b/qa/L0_long_running_stress/scenarios.py
new file mode 100755
index 0000000000..abb0004e90
--- /dev/null
+++ b/qa/L0_long_running_stress/scenarios.py
@@ -0,0 +1,1033 @@
+#!/usr/bin/env python3
+
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import math
+import sys
+
+sys.path.append("../common")
+
+import math
+import os
+import subprocess
+import threading
+import time
+
+import numpy as np
+import test_util as tu
+import tritonclient.grpc as grpcclient
+from PIL import Image
+from tritonclient.utils import np_to_triton_dtype
+
+if sys.version_info >= (3, 0):
+    import queue
+else:
+    import Queue as queue
+
+import abc
+import csv
+import json
+import re
+from functools import partial
+
+DEFAULT_TIMEOUT_MS = 25000
+SEQUENCE_LENGTH_MEAN = 16
+SEQUENCE_LENGTH_STDEV = 8
+
+
+class TimeoutException(Exception):
+    pass
+
+
+# Callback function used for async_stream_infer()
+def completion_callback(user_data, result, error):
+    # passing error raise and handling out
+    user_data._completed_requests.put((result, error))
+
+
+class Scenario(metaclass=abc.ABCMeta):
+    def __init__(self, name, trials, verbose=False, out_stream=sys.stdout):
+        self.name_ = name
+        self.trials_ = trials
+        self.verbose_ = verbose
+        self.out_stream_ = out_stream
+
+    def scenario_name(self):
+        return type(self).__name__
+
+    def get_trial(self):
+        return np.random.choice(self.trials_)
+
+    def get_datatype(self, trial):
+        # Get the datatype to use based on what models are available (see test.sh)
+        if ("plan" in trial) or ("savedmodel" in trial):
+            return np.float32
+        if "graphdef" in trial:
+            return np.dtype(object)
+        return np.int32
+
+    # FIXME do we need client meta data?
+    # Run the scenario and return the number of requests sent on success.
+    # Exception should be raised on failure, and None should be returned if
+    # the scenario is not run (i.e. due to unsatisfied constraints)
+    @abc.abstractmethod
+    def run(self, client_metadata):
+        pass
+
+
+class PerfAnalyzerScenario(Scenario):
+    # Some class static variables
+    command_ = "../clients/perf_analyzer"
+    generation_mutex_ = threading.Lock()
+
+    class ModelOption:
+        # 'concurrency_range' is a 3 element tuple/list that specifies
+        # (min_concurrency, max_concurrency, current_concurrency) to limit the
+        # allowed range of concurrency
+        #
+        # 'queue_latency_range_us' specifies the range where queue latency
+        # reported should be, otherwise, model concurrency will be adjusted
+        # within 'concurrency_range' to influence the queue latency.
+        def __init__(
+            self,
+            model_name,
+            batch_size,
+            concurrency_range,
+            queue_latency_range_us,
+            input_shapes=[],
+            input_file=None,
+        ):
+            self.model_name_ = model_name
+            self.concurrency_range_ = list(concurrency_range)
+            self.batch_size_ = batch_size
+            self.input_shapes_ = input_shapes
+            self.queue_latency_range_us_ = queue_latency_range_us
+            self.input_file_ = input_file
+
+        def run(self, name, sequence_id_range, out_stream):
+            csv_file = os.path.join(
+                "csv_dir",
+                "{}_{}_{}.csv".format(
+                    name, self.model_name_, self.concurrency_range_[2]
+                ),
+            )
+
+            arg_list = [PerfAnalyzerScenario.command_]
+            # Always use GRPC streaming feature to ensure requests are handled
+            # in order
+            arg_list += ["-i", "grpc", "--streaming"]
+            arg_list += ["-m", "{}".format(self.model_name_)]
+            arg_list += ["-b", "{}".format(self.batch_size_)]
+            arg_list += [
+                "--concurrency-range",
+                "{}:{}:1".format(
+                    self.concurrency_range_[2], self.concurrency_range_[2]
+                ),
+            ]
+            arg_list += ["-f", csv_file]
+            for name, shape in self.input_shapes_:
+                arg_list += ["--shape", "{}:{}".format(name, shape)]
+            if self.input_file_ is not None:
+                arg_list += ["--input-data", self.input_file_]
+            if sequence_id_range is not None:
+                arg_list += [
+                    "--sequence-id-range",
+                    "{}:{}".format(sequence_id_range[0], sequence_id_range[1]),
+                ]
+
+            completed_process = subprocess.run(
+                arg_list, text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
+            )
+            # Write output to file before checking return code
+            print(completed_process.stdout, file=out_stream)
+            completed_process.check_returncode()
+
+            # Read queue time and adjust concurrency
+            with open(csv_file, newline="") as csvfile:
+                reader = csv.DictReader(csvfile)
+                for row in reader:
+                    current_queue_us = int(row["Server Queue"])
+                    if current_queue_us < self.queue_latency_range_us_[0]:
+                        self.concurrency_range_[2] = min(
+                            self.concurrency_range_[2] + 1, self.concurrency_range_[1]
+                        )
+                    elif current_queue_us > self.queue_latency_range_us_[0]:
+                        self.concurrency_range_[2] = max(
+                            self.concurrency_range_[2] - 1, self.concurrency_range_[0]
+                        )
+                    break
+            m = re.search(r"Request count: ([0-9]+)", completed_process.stdout)
+            return int(m.group(1))
+
+    def __init__(
+        self,
+        name,
+        rng,
+        sequence_trials,
+        identity_trials,
+        queue_latency_range_us=(10000, 100000),
+        sequence_id_range=None,
+        verbose=False,
+        out_stream=sys.stdout,
+    ):
+        super().__init__(name, [], verbose, out_stream)
+        self.rng_ = rng
+        self.sequence_id_range_ = sequence_id_range
+        # List of tuples
+        # (model_name, max_concurrency, batch_size, list(more PA options),
+        #  real_data_file),
+        self.options_ = []
+
+        # Add no validation models
+        self.options_.append(
+            PerfAnalyzerScenario.ModelOption(
+                "resnet_v1_50_graphdef_def", 32, (1, 4, 1), queue_latency_range_us
+            )
+        )
+        for trial in sequence_trials:
+            dtype = self.get_datatype(trial)
+            # Skip string sequence model for now, it is hard for PA to generate
+            # valid input
+            if dtype == np.dtype(object):
+                continue
+            model_name = tu.get_sequence_model_name(trial, dtype)
+            self.options_.append(
+                PerfAnalyzerScenario.ModelOption(
+                    model_name, 1, (1, 4, 1), queue_latency_range_us
+                )
+            )
+        for trial in identity_trials:
+            dtype = np.float32
+            model_name = tu.get_zero_model_name(trial, 1, dtype)
+            if "libtorch" in trial:
+                input_shapes = [("INPUT__0", "16")]
+            else:
+                input_shapes = [("INPUT0", "16")]
+            self.options_.append(
+                PerfAnalyzerScenario.ModelOption(
+                    model_name, 1, (1, 4, 1), queue_latency_range_us, input_shapes
+                )
+            )
+
+        # Add output validation version of the models
+        # Skip resnet as the output data has variation which makes exact
+        # matching hard
+        for trial in sequence_trials:
+            dtype = self.get_datatype(trial)
+            model_name = tu.get_sequence_model_name(trial, dtype)
+            data_file = os.path.join("validation_data", "{}.json".format(model_name))
+            self.generate_sequence_data(trial, dtype, data_file)
+            self.options_.append(
+                PerfAnalyzerScenario.ModelOption(
+                    model_name,
+                    1,
+                    (1, 4, 1),
+                    queue_latency_range_us,
+                    input_file=data_file,
+                )
+            )
+        for trial in identity_trials:
+            dtype = np.float32
+            model_name = tu.get_zero_model_name(trial, 1, dtype)
+            data_file = os.path.join("validation_data", "{}.json".format(model_name))
+            self.generate_identity_data(trial, dtype, data_file)
+            self.options_.append(
+                PerfAnalyzerScenario.ModelOption(
+                    model_name,
+                    1,
+                    (1, 4, 1),
+                    queue_latency_range_us,
+                    input_file=data_file,
+                )
+            )
+
+    def generate_sequence_data(self, trial, dtype, data_filename):
+        input0 = "INPUT" if "libtorch" not in trial else "INPUT__0"
+        input_data = []
+        for i in range(3):
+            if dtype == np.float32:
+                res = float(i)
+            elif dtype == np.int32:
+                res = i
+            elif dtype == np.dtype(object):
+                res = str(i)
+            else:
+                raise Exception("unexpected sequence data type {}".format(dtype))
+            input_data.append({input0: [res]})
+        output0 = "OUTPUT" if "libtorch" not in trial else "OUTPUT__0"
+        output_data = []
+        if ("savedmodel" in trial) and ("nobatch" in trial):
+            # Special case where the model is accumulator
+            sum = 0
+            for i in range(3):
+                sum += i
+                if dtype == np.float32:
+                    res = float(sum)
+                elif dtype == np.int32:
+                    res = sum
+                elif dtype == np.dtype(object):
+                    res = str(sum)
+                else:
+                    raise Exception("unexpected sequence data type {}".format(dtype))
+                output_data.append({output0: [res]})
+        else:
+            for i in range(3):
+                res = 1 if i == 0 else i
+                if dtype == np.float32:
+                    res = float(res)
+                elif dtype == np.int32:
+                    res = int(res)
+                elif dtype == np.dtype(object):
+                    res = str(res)
+                else:
+                    raise Exception("unexpected sequence data type {}".format(dtype))
+                output_data.append(
+                    {output0: [res if dtype != np.dtype(object) else str(res)]}
+                )
+        data = {"data": [input_data]}
+        data["validation_data"] = [output_data]
+
+        # Only write to a file if there isn't validation file for the model
+        PerfAnalyzerScenario.generation_mutex_.acquire()
+        if not os.path.exists(data_filename):
+            with open(data_filename, "w") as f:
+                json.dump(data, f)
+        PerfAnalyzerScenario.generation_mutex_.release()
+
+    def generate_identity_data(self, trial, dtype, data_filename):
+        input0 = "INPUT0" if "libtorch" not in trial else "INPUT__0"
+        output0 = "OUTPUT0" if "libtorch" not in trial else "OUTPUT__0"
+        io_data = []
+        for i in range(16):
+            if dtype == np.float32:
+                res = float(i)
+            elif dtype == np.int32:
+                res = i
+            elif dtype == np.dtype(object):
+                res = str(i)
+            else:
+                raise Exception("unexpected identity data type {}".format(dtype))
+            io_data.append(res)
+        data = {
+            "data": [{input0: {"content": io_data, "shape": [16]}}],
+            "validation_data": [{output0: {"content": io_data, "shape": [16]}}],
+        }
+        # Only write to a file if there isn't validation file for the model
+        PerfAnalyzerScenario.generation_mutex_.acquire()
+        if not os.path.exists(data_filename):
+            with open(data_filename, "w") as f:
+                json.dump(data, f)
+        PerfAnalyzerScenario.generation_mutex_.release()
+
+    def run(self, client_metadata):
+        model_option = np.random.choice(self.options_)
+        return model_option.run(self.name_, self.sequence_id_range_, self.out_stream_)
+
+
+class ResNetScenario(Scenario):
+    def __init__(self, name, batch_size=32, verbose=False, out_stream=sys.stdout):
+        super().__init__(name, [], verbose, out_stream)
+        self.model_name_ = "resnet_v1_50_graphdef_def"
+        self.batch_size_ = batch_size
+
+        img = self.preprocess("../images/vulture.jpeg")
+        batched_img = []
+        for i in range(batch_size):
+            batched_img.append(img)
+        self.image_data_ = np.stack(batched_img, axis=0)
+
+    def preprocess(self, filename):
+        img = Image.open(filename)
+        resized_img = img.convert("RGB").resize((224, 224), Image.BILINEAR)
+        np_img = np.array(resized_img).astype(np.float32)
+        if np_img.ndim == 2:
+            np_img = np_img[:, :, np.newaxis]
+        scaled = np_img - np.asarray((123, 117, 104), dtype=np.float32)
+        return scaled
+
+    def postprocess(self, results):
+        output_array = results.as_numpy("resnet_v1_50/predictions/Softmax")
+        if len(output_array) != self.batch_size_:
+            raise Exception(
+                "expected {} results, got {}".format(
+                    self.batch_size_, len(output_array)
+                )
+            )
+
+        for results in output_array:
+            for result in results:
+                if output_array.dtype.type == np.object_:
+                    cls = "".join(chr(x) for x in result).split(":")
+                else:
+                    cls = result.split(":")
+                if cls[2] != "VULTURE":
+                    raise Exception(
+                        "expected VULTURE as classification result, got {}".format(
+                            cls[2]
+                        )
+                    )
+
+    def run(self, client_metadata):
+        triton_client = client_metadata[0]
+
+        inputs = [grpcclient.InferInput("input", self.image_data_.shape, "FP32")]
+        inputs[0].set_data_from_numpy(self.image_data_)
+
+        outputs = [
+            grpcclient.InferRequestedOutput(
+                "resnet_v1_50/predictions/Softmax", class_count=1
+            )
+        ]
+        res = triton_client.infer(self.model_name_, inputs, outputs=outputs)
+        self.postprocess(res)
+        return self.batch_size_
+
+
+class TimeoutScenario(Scenario):
+    def __init__(
+        self,
+        name,
+        trials,
+        input_dtype=np.float32,
+        input_name="INPUT0",
+        verbose=False,
+        out_stream=sys.stdout,
+    ):
+        super().__init__(name, trials, verbose, out_stream)
+        self.input_dtype_ = input_dtype
+        self.input_name_ = input_name
+
+    def run(self, client_metadata):
+        trial = self.get_trial()
+        model_name = tu.get_zero_model_name(trial, 1, self.input_dtype_)
+        triton_client = client_metadata[0]
+        input_name = self.input_name_
+        if "librotch" in trial:
+            input_name = "INPUT__0"
+
+        tensor_shape = (
+            math.trunc(
+                1 * (1024 * 1024 * 1024) // np.dtype(self.input_dtype_).itemsize
+            ),
+        )
+        in0 = np.random.random(tensor_shape).astype(self.input_dtype_)
+        inputs = [
+            grpcclient.InferInput(
+                input_name, tensor_shape, np_to_triton_dtype(self.input_dtype_)
+            ),
+        ]
+        inputs[0].set_data_from_numpy(in0)
+
+        # Expect an exception for small timeout values.
+        try:
+            triton_client.infer(model_name, inputs, client_timeout=0.1)
+            assert False, "expected inference failure from deadline exceeded"
+        except Exception as ex:
+            if "Deadline Exceeded" not in ex.message():
+                assert False, "timeout_client failed {}".format(self.name_)
+            # Expect timeout error as success case
+            return 1
+
+
+class CrashingScenario(Scenario):
+    def __init__(self, name, verbose=False, out_stream=sys.stdout):
+        super().__init__(name, [], verbose, out_stream)
+
+    def run(self, client_metadata):
+        # Only use "custom" model as it simulates execution delay which
+        # simplifies "crashing simulation" (client exits while request is being
+        # executed)
+        trial = "custom"
+
+        # Call the client as subprocess to avoid crashing stress test
+        # and gather logging as string variable
+        crashing_client = "crashing_client.py"
+        log = subprocess.check_output([sys.executable, crashing_client, "-t", trial])
+        result = self.parse_result(log.decode("utf-8"))
+        if not result[1]:
+            assert False, "crashing_client failed {}".format(self.name_)
+
+        return int(result[0])
+
+    def parse_result(self, log):
+        # Get result from the log
+        request_count = 0
+        is_server_live = "false"
+
+        if "request_count:" in log:
+            idx_start = log.rindex("request_count:")
+            idx_start = log.find(" ", idx_start)
+            idx_end = log.find("\n", idx_start)
+            request_count = int(log[idx_start + 1 : idx_end])
+
+        if "live:" in log:
+            idx_start = log.rindex("live:")
+            idx_start = log.find(" ", idx_start)
+            idx_end = log.find("\n", idx_start)
+            is_server_live = log[idx_start + 1 : idx_end]
+
+        return (request_count, is_server_live == "true")
+
+
+class SequenceScenario(Scenario):
+    class UserData:
+        def __init__(self):
+            self._completed_requests = queue.Queue()
+
+    # For sequence requests, the state of previous sequence that share the same
+    # sequence id will affect the current sequence, so must check if the
+    # constraints are satisfied for the scenario
+    @abc.abstractmethod
+    def check_constraints(self, model_name, sequence_id):
+        pass
+
+    def __init__(
+        self,
+        name,
+        trials,
+        rng,
+        sequence_constraints,
+        verbose=False,
+        out_stream=sys.stdout,
+    ):
+        super().__init__(name, trials, verbose, out_stream)
+        self.rng_ = rng
+        self.sequence_constraints_ = sequence_constraints
+
+    def get_expected_result(self, expected_result, value, trial, flag_str=None):
+        # Adjust the expected_result for models that
+        # could not implement the full accumulator. See
+        # qa/common/gen_qa_sequence_models.py for more
+        # information.
+        if (
+            ("nobatch" not in trial and ("custom" not in trial))
+            or ("graphdef" in trial)
+            or ("plan" in trial)
+            or ("onnx" in trial)
+        ) or ("libtorch" in trial):
+            expected_result = value
+            if (flag_str is not None) and ("start" in flag_str):
+                expected_result += 1
+        return expected_result
+
+    def check_sequence_async(
+        self,
+        client_metadata,
+        trial,
+        model_name,
+        input_dtype,
+        steps,
+        timeout_ms=DEFAULT_TIMEOUT_MS,
+        batch_size=1,
+        sequence_name="<unknown>",
+        tensor_shape=(1,),
+        input_name="INPUT",
+        output_name="OUTPUT",
+    ):
+        """Perform sequence of inferences using async run. The 'steps' holds
+        a list of tuples, one for each inference with format:
+
+        (flag_str, value, expected_result, delay_ms)
+
+        """
+        if (
+            ("savedmodel" not in trial)
+            and ("graphdef" not in trial)
+            and ("custom" not in trial)
+            and ("onnx" not in trial)
+            and ("libtorch" not in trial)
+            and ("plan" not in trial)
+        ):
+            assert False, "unknown trial type: " + trial
+
+        if "nobatch" not in trial:
+            tensor_shape = (batch_size,) + tensor_shape
+        if "libtorch" in trial:
+            input_name = "INPUT__0"
+            output_name = "OUTPUT__0"
+
+        triton_client = client_metadata[0]
+        sequence_id = client_metadata[1]
+
+        # Execute the sequence of inference...
+        seq_start_ms = int(round(time.time() * 1000))
+        user_data = SequenceScenario.UserData()
+        # Ensure there is no running stream
+        triton_client.stop_stream()
+        triton_client.start_stream(partial(completion_callback, user_data))
+
+        sent_count = 0
+        for flag_str, value, _, delay_ms in steps:
+            seq_start = False
+            seq_end = False
+            if flag_str is not None:
+                seq_start = "start" in flag_str
+                seq_end = "end" in flag_str
+
+            if input_dtype == np.object_:
+                in0 = np.full(tensor_shape, value, dtype=np.int32)
+                in0n = np.array([str(x) for x in in0.reshape(in0.size)], dtype=object)
+                in0 = in0n.reshape(tensor_shape)
+            else:
+                in0 = np.full(tensor_shape, value, dtype=input_dtype)
+
+            inputs = [
+                grpcclient.InferInput(
+                    input_name, tensor_shape, np_to_triton_dtype(input_dtype)
+                ),
+            ]
+            inputs[0].set_data_from_numpy(in0)
+
+            triton_client.async_stream_infer(
+                model_name,
+                inputs,
+                sequence_id=sequence_id,
+                sequence_start=seq_start,
+                sequence_end=seq_end,
+            )
+            sent_count += 1
+
+            if delay_ms is not None:
+                time.sleep(delay_ms / 1000.0)
+
+        # Process the results in order that they were sent
+        result = None
+        processed_count = 0
+        while processed_count < sent_count:
+            (results, error) = user_data._completed_requests.get()
+            if error is not None:
+                raise error
+
+            (_, value, expected, _) = steps[processed_count]
+            processed_count += 1
+            if timeout_ms != None:
+                now_ms = int(round(time.time() * 1000))
+                if (now_ms - seq_start_ms) > timeout_ms:
+                    raise TimeoutException(
+                        "Timeout expired for {}, got {} ms".format(
+                            sequence_name, (now_ms - seq_start_ms)
+                        )
+                    )
+
+            result = (
+                results.as_numpy(output_name)[0]
+                if "nobatch" in trial
+                else results.as_numpy(output_name)[0][0]
+            )
+            if self.verbose_:
+                print(
+                    "{} {}: + {} = {}".format(
+                        sequence_name, sequence_id, value, result
+                    ),
+                    file=self.out_stream_,
+                )
+
+            if expected is not None:
+                if input_dtype == np.object_:
+                    assert (
+                        int(result) == expected
+                    ), "{}: expected result {}, got {} {} {}".format(
+                        sequence_name, expected, int(result), trial, model_name
+                    )
+                else:
+                    assert (
+                        result == expected
+                    ), "{}: expected result {}, got {} {} {}".format(
+                        sequence_name, expected, result, trial, model_name
+                    )
+        triton_client.stop_stream()
+        return sent_count
+
+
+class SequenceNoEndScenario(SequenceScenario):
+    def __init__(
+        self,
+        name,
+        trials,
+        rng,
+        sequence_constraints,
+        verbose=False,
+        out_stream=sys.stdout,
+    ):
+        super().__init__(name, trials, rng, sequence_constraints, verbose, out_stream)
+
+    def check_constraints(self, model_name, sequence_id):
+        # The scenario can always be run regardless of the previous runs
+        return True
+
+    def run(
+        self,
+        client_metadata,
+        len_mean=SEQUENCE_LENGTH_MEAN,
+        len_stddev=SEQUENCE_LENGTH_STDEV,
+    ):
+        trial = self.get_trial()
+        dtype = self.get_datatype(trial)
+        model_name = tu.get_sequence_model_name(trial, dtype)
+        if not self.check_constraints(model_name, client_metadata[1]):
+            return None
+
+        # Track that the sequence id of the model is used for no-end sequence
+        if not model_name in self.sequence_constraints_:
+            self.sequence_constraints_[model_name] = {}
+        self.sequence_constraints_[model_name][client_metadata[1]] = True
+
+        # Create a variable length sequence with "start" flag but that
+        # never ends. The sequence should be aborted by the server and its
+        # slot reused for another sequence.
+        seqlen = max(1, int(self.rng_.normal(len_mean, len_stddev)))
+        print(
+            "{} {}: no-end seqlen = {}".format(self.name_, client_metadata[1], seqlen),
+            file=self.out_stream_,
+        )
+
+        values = self.rng_.randint(0, 1024 * 1024, size=seqlen).astype(dtype)
+
+        steps = []
+        expected_result = 0
+
+        for idx, _ in enumerate(range(seqlen)):
+            flags = ""
+            if idx == 0:
+                flags = "start"
+
+            val = values[idx]
+            delay_ms = None
+            expected_result += val
+            expected_result = self.get_expected_result(
+                expected_result, val, trial, flags
+            )
+
+            # (flag_str, value, expected_result, delay_ms)
+            steps.append(
+                (flags, val, expected_result, delay_ms),
+            )
+
+        return self.check_sequence_async(
+            client_metadata, trial, model_name, dtype, steps, sequence_name=self.name_
+        )
+
+
+class SequenceValidNoEndScenario(SequenceScenario):
+    def __init__(
+        self,
+        name,
+        trials,
+        rng,
+        sequence_constraints,
+        verbose=False,
+        out_stream=sys.stdout,
+    ):
+        super().__init__(name, trials, rng, sequence_constraints, verbose, out_stream)
+
+    def check_constraints(self, model_name, sequence_id):
+        # The scenario can always be run regardless of the previous runs
+        return True
+
+    def run(
+        self,
+        client_metadata,
+        len_mean=SEQUENCE_LENGTH_MEAN,
+        len_stddev=SEQUENCE_LENGTH_STDEV,
+    ):
+        trial = self.get_trial()
+        dtype = self.get_datatype(trial)
+        model_name = tu.get_sequence_model_name(trial, dtype)
+        if not self.check_constraints(model_name, client_metadata[1]):
+            return None
+
+        # Track that the sequence id of the model is used for no-end sequence
+        if not model_name in self.sequence_constraints_:
+            self.sequence_constraints_[model_name] = {}
+        self.sequence_constraints_[model_name][client_metadata[1]] = True
+
+        # Create two variable length sequences, the first with "start" and
+        # "end" flags and the second with no "end" flag, where both
+        # sequences use the same correlation ID and are sent back-to-back.
+        seqlen = [
+            max(1, int(self.rng_.normal(len_mean, len_stddev))),
+            max(1, int(self.rng_.normal(len_mean, len_stddev))),
+        ]
+        print(
+            "{} {}: valid-no-end seqlen[0] = {}, seqlen[1] = {}".format(
+                self.name_, client_metadata[1], seqlen[0], seqlen[1]
+            ),
+            file=self.out_stream_,
+        )
+
+        values = [
+            self.rng_.randint(0, 1024 * 1024, size=seqlen[0]).astype(dtype),
+            self.rng_.randint(0, 1024 * 1024, size=seqlen[1]).astype(dtype),
+        ]
+
+        for p in [0, 1]:
+            steps = []
+            expected_result = 0
+
+            for idx, _ in enumerate(range(seqlen[p])):
+                flags = ""
+                if idx == 0:
+                    flags += ",start"
+                if (p == 0) and (idx == (seqlen[p] - 1)):
+                    flags += ",end"
+
+                val = values[p][idx]
+                delay_ms = None
+                expected_result += val
+                expected_result = self.get_expected_result(
+                    expected_result, val, trial, flags
+                )
+
+                # (flag_str, value, expected_result, delay_ms)
+                steps.append(
+                    (flags, val, expected_result, delay_ms),
+                )
+
+        return self.check_sequence_async(
+            client_metadata, trial, model_name, dtype, steps, sequence_name=self.name_
+        )
+
+
+class SequenceValidValidScenario(SequenceScenario):
+    def __init__(
+        self,
+        name,
+        trials,
+        rng,
+        sequence_constraints,
+        verbose=False,
+        out_stream=sys.stdout,
+    ):
+        super().__init__(name, trials, rng, sequence_constraints, verbose, out_stream)
+
+    def check_constraints(self, model_name, sequence_id):
+        # The scenario can always be run regardless of the previous runs
+        return True
+
+    def run(
+        self,
+        client_metadata,
+        len_mean=SEQUENCE_LENGTH_MEAN,
+        len_stddev=SEQUENCE_LENGTH_STDEV,
+    ):
+        trial = self.get_trial()
+        dtype = self.get_datatype(trial)
+        model_name = tu.get_sequence_model_name(trial, dtype)
+        if not self.check_constraints(model_name, client_metadata[1]):
+            return None
+
+        # Track that the sequence id of the model is used for no-end sequence
+        if not model_name in self.sequence_constraints_:
+            self.sequence_constraints_[model_name] = {}
+        self.sequence_constraints_[model_name][client_metadata[1]] = False
+
+        # Create two variable length sequences with "start" and "end"
+        # flags, where both sequences use the same correlation ID and are
+        # sent back-to-back.
+        seqlen = [
+            max(1, int(self.rng_.normal(len_mean, len_stddev))),
+            max(1, int(self.rng_.normal(len_mean, len_stddev))),
+        ]
+        print(
+            "{} {}: valid-valid seqlen[0] = {}, seqlen[1] = {}".format(
+                self.name_, client_metadata[1], seqlen[0], seqlen[1]
+            ),
+            file=self.out_stream_,
+        )
+
+        values = [
+            self.rng_.randint(0, 1024 * 1024, size=seqlen[0]).astype(dtype),
+            self.rng_.randint(0, 1024 * 1024, size=seqlen[1]).astype(dtype),
+        ]
+
+        for p in [0, 1]:
+            steps = []
+            expected_result = 0
+
+            for idx, _ in enumerate(range(seqlen[p])):
+                flags = ""
+                if idx == 0:
+                    flags += ",start"
+                if idx == (seqlen[p] - 1):
+                    flags += ",end"
+
+                val = values[p][idx]
+                delay_ms = None
+                expected_result += val
+                expected_result = self.get_expected_result(
+                    expected_result, val, trial, flags
+                )
+
+                # (flag_str, value, expected_result, delay_ms)
+                steps.append(
+                    (flags, val, expected_result, delay_ms),
+                )
+
+        return self.check_sequence_async(
+            client_metadata, trial, model_name, dtype, steps, sequence_name=self.name_
+        )
+
+
+class SequenceNoStartScenario(SequenceScenario):
+    def __init__(
+        self,
+        name,
+        trials,
+        rng,
+        sequence_constraints,
+        verbose=False,
+        out_stream=sys.stdout,
+    ):
+        super().__init__(name, trials, rng, sequence_constraints, verbose, out_stream)
+
+    def check_constraints(self, model_name, sequence_id):
+        # no-start cannot follow no-end since the server will
+        # just assume that the no-start is a continuation of
+        # the no-end sequence instead of being a sequence
+        # missing start flag.
+        if (model_name in self.sequence_constraints_) and (
+            sequence_id in self.sequence_constraints_[model_name]
+        ):
+            return not self.sequence_constraints_[model_name][sequence_id]
+        return True
+
+    def run(self, client_metadata):
+        trial = self.get_trial()
+        dtype = self.get_datatype(trial)
+        model_name = tu.get_sequence_model_name(trial, dtype)
+        if not self.check_constraints(model_name, client_metadata[1]):
+            return None
+
+        # Track that the sequence id of the model is used for no-end sequence
+        if not model_name in self.sequence_constraints_:
+            self.sequence_constraints_[model_name] = {}
+        self.sequence_constraints_[model_name][client_metadata[1]] = False
+
+        # Create a sequence without a "start" flag. Sequence should get an
+        # error from the server.
+        seqlen = 1
+        print(
+            "{} {}: no-start seqlen = {}".format(
+                self.name_, client_metadata[1], seqlen
+            ),
+            file=self.out_stream_,
+        )
+
+        values = self.rng_.randint(0, 1024 * 1024, size=seqlen).astype(dtype)
+
+        steps = []
+
+        for idx, _ in enumerate(range(seqlen)):
+            flags = None
+            val = values[idx]
+            delay_ms = None
+
+            # (flag_str, value, expected_result, delay_ms)
+            steps.append(
+                (flags, val, None, delay_ms),
+            )
+
+        try:
+            self.check_sequence_async(client_metadata, trial, model_name, dtype, steps)
+            # Hit this point if sending no-start sequence to sequence id that
+            # was used for no-end sequence and that means the constraints check
+            # is inaccurate
+            assert False, "expected inference failure from missing START flag"
+        except Exception as ex:
+            if "must specify the START flag" not in ex.message():
+                raise
+            # Expect no START error as success case
+            return seqlen
+
+
+class SequenceValidScenario(SequenceScenario):
+    def __init__(
+        self,
+        name,
+        trials,
+        rng,
+        sequence_constraints,
+        verbose=False,
+        out_stream=sys.stdout,
+    ):
+        super().__init__(name, trials, rng, sequence_constraints, verbose, out_stream)
+
+    def check_constraints(self, model_name, sequence_id):
+        # The scenario can always be run regardless of the previous runs
+        return True
+
+    def run(
+        self,
+        client_metadata,
+        len_mean=SEQUENCE_LENGTH_MEAN,
+        len_stddev=SEQUENCE_LENGTH_STDEV,
+    ):
+        trial = self.get_trial()
+        dtype = self.get_datatype(trial)
+        model_name = tu.get_sequence_model_name(trial, dtype)
+        if not self.check_constraints(model_name, client_metadata[1]):
+            return None
+
+        # Track that the sequence id of the model is used for no-end sequence
+        if not model_name in self.sequence_constraints_:
+            self.sequence_constraints_[model_name] = {}
+        self.sequence_constraints_[model_name][client_metadata[1]] = False
+
+        # Create a variable length sequence with "start" and "end" flags.
+        seqlen = max(1, int(self.rng_.normal(len_mean, len_stddev)))
+        print(
+            "{} {}: valid seqlen = {}".format(self.name_, client_metadata[1], seqlen),
+            file=self.out_stream_,
+        )
+
+        values = self.rng_.randint(0, 1024 * 1024, size=seqlen).astype(dtype)
+
+        steps = []
+        expected_result = 0
+
+        for idx, _ in enumerate(range(seqlen)):
+            flags = ""
+            if idx == 0:
+                flags += ",start"
+            if idx == (seqlen - 1):
+                flags += ",end"
+
+            val = values[idx]
+            delay_ms = None
+            expected_result += val
+            expected_result = self.get_expected_result(
+                expected_result, val, trial, flags
+            )
+
+            # (flag_str, value, expected_result, delay_ms)
+            steps.append(
+                (flags, val, expected_result, delay_ms),
+            )
+
+        return self.check_sequence_async(
+            client_metadata, trial, model_name, dtype, steps, sequence_name=self.name_
+        )
diff --git a/qa/L0_long_running_stress/stress.py b/qa/L0_long_running_stress/stress.py
new file mode 100755
index 0000000000..978f204ee6
--- /dev/null
+++ b/qa/L0_long_running_stress/stress.py
@@ -0,0 +1,657 @@
+#!/usr/bin/env python3
+
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+from scenarios import *
+
+sys.path.append("../common")
+
+import argparse
+import bisect
+import os
+import threading
+import time
+import traceback
+from builtins import range, str
+from functools import partial
+
+import numpy as np
+import prettytable
+import tritonclient.grpc as grpcclient
+
+FLAGS = None
+CORRELATION_ID_BLOCK_SIZE = 1024 * 1024
+BACKENDS = os.environ.get("BACKENDS", "graphdef savedmodel onnx plan")
+
+_thread_exceptions = []
+_thread_exceptions_mutex = threading.Lock()
+
+# List of scenario that failure doesn't contribute to test fail at the momeent.
+# Note that all scenario should not have error but some edge cases are hard to
+# track down so the investigation is postponed.
+ALLOW_FAILURE_SCENARIO = [
+    PerfAnalyzerScenario.__name__,
+]
+
+STOP_STRESS_THREAD = False
+
+
+def get_trials(is_sequence=True):
+    _trials = ()
+    if is_sequence:
+        for backend in BACKENDS.split(" "):
+            if (backend != "libtorch") and (backend != "savedmodel"):
+                _trials += (backend + "_nobatch",)
+            _trials += (backend,)
+    else:
+        _trials = ()
+        for backend in BACKENDS.split(" "):
+            if backend != "libtorch":
+                _trials += (backend + "_nobatch",)
+    return _trials
+
+
+def update_test_count(
+    test_case_count,
+    failed_test_case_count,
+    request_count,
+    test_case_name,
+    success=True,
+    count=1,
+):
+    if success:
+        # Count the times each test case runs
+        if test_case_name in test_case_count:
+            test_case_count[test_case_name] += 1
+        else:
+            test_case_count[test_case_name] = 1
+
+        # Count the number of requests were sent for each test case
+        if test_case_name in request_count:
+            request_count[test_case_name] += count
+        else:
+            request_count[test_case_name] = count
+    else:
+        # Count the times each test case fails
+        if test_case_name in failed_test_case_count:
+            failed_test_case_count[test_case_name] += 1
+        else:
+            failed_test_case_count[test_case_name] = 1
+
+
+class ScenarioSelector:
+    def __init__(self, probs, rng):
+        self.rng_ = rng
+        self.probs_range_ = []
+        self.scenarios_ = []
+
+        # probs is a list/dict of scenario weights and types
+        total_weight = 0
+        for weight, scenario in probs:
+            total_weight += weight
+            self.scenarios_.append(scenario)
+            self.probs_range_.append(float(total_weight))
+        # Normalize weight
+        for i in range(len(self.probs_range_)):
+            self.probs_range_[i] /= total_weight
+
+    def get_scenario(self):
+        return self.scenarios_[bisect.bisect_left(self.probs_range_, self.rng_.rand())]
+
+
+def stress_thread(
+    name,
+    seed,
+    correlation_id_base,
+    test_case_count,
+    failed_test_case_count,
+    sequence_request_count,
+):
+    # Thread responsible for generating sequences of inference
+    # requests.
+    global _thread_exceptions
+
+    # Write any thread output to dedicated file
+    with open("{}.log".format(name), "w") as out_file:
+        print("Starting thread {} with seed {}".format(name, seed), file=out_file)
+        rng = np.random.RandomState(seed)
+
+        # FIXME revisit to check if it is necessary
+        client_metadata_list = []
+
+        # Must use streaming GRPC context to ensure each sequences'
+        # requests are received in order. Create 2 common-use contexts
+        # with different correlation IDs that are used for most
+        # inference requests. Also create some rare-use contexts that
+        # are used to make requests with rarely-used correlation IDs.
+        #
+        # Need to remember if the last sequence case runs on each model
+        # is no-end cases since we don't want some choices to follow others
+        # since that gives results not expected. See below for details.
+        common_cnt = 2
+        rare_cnt = 8
+        is_last_used_no_end = {}
+
+        update_counter_fn = partial(
+            update_test_count,
+            test_case_count,
+            failed_test_case_count,
+            sequence_request_count,
+        )
+        for c in range(common_cnt + rare_cnt):
+            client_metadata_list.append(
+                (
+                    grpcclient.InferenceServerClient(
+                        "localhost:8001", verbose=FLAGS.verbose
+                    ),
+                    correlation_id_base + c,
+                )
+            )
+        pa_start_seq_id = correlation_id_base + common_cnt + rare_cnt
+        pa_end_seq_id = correlation_id_base + CORRELATION_ID_BLOCK_SIZE
+
+        # Weight roughly in thousandth percent
+        ss = ScenarioSelector(
+            [
+                (
+                    60,
+                    TimeoutScenario(
+                        name,
+                        get_trials(False),
+                        verbose=FLAGS.verbose,
+                        out_stream=out_file,
+                    ),
+                ),
+                (80, ResNetScenario(name, verbose=FLAGS.verbose, out_stream=out_file)),
+                (
+                    60,
+                    CrashingScenario(name, verbose=FLAGS.verbose, out_stream=out_file),
+                ),
+                (
+                    62,
+                    SequenceNoEndScenario(
+                        name,
+                        get_trials(),
+                        rng,
+                        is_last_used_no_end,
+                        verbose=FLAGS.verbose,
+                        out_stream=out_file,
+                    ),
+                ),
+                (
+                    68,
+                    SequenceValidNoEndScenario(
+                        name,
+                        get_trials(),
+                        rng,
+                        is_last_used_no_end,
+                        verbose=FLAGS.verbose,
+                        out_stream=out_file,
+                    ),
+                ),
+                (
+                    68,
+                    SequenceValidValidScenario(
+                        name,
+                        get_trials(),
+                        rng,
+                        is_last_used_no_end,
+                        verbose=FLAGS.verbose,
+                        out_stream=out_file,
+                    ),
+                ),
+                (
+                    7,
+                    SequenceNoStartScenario(
+                        name,
+                        get_trials(),
+                        rng,
+                        is_last_used_no_end,
+                        verbose=FLAGS.verbose,
+                        out_stream=out_file,
+                    ),
+                ),
+                (
+                    295,
+                    SequenceValidScenario(
+                        name,
+                        get_trials(),
+                        rng,
+                        is_last_used_no_end,
+                        verbose=FLAGS.verbose,
+                        out_stream=out_file,
+                    ),
+                ),
+                (
+                    300,
+                    PerfAnalyzerScenario(
+                        name,
+                        rng,
+                        get_trials(),
+                        get_trials(False),
+                        sequence_id_range=(pa_start_seq_id, pa_end_seq_id),
+                        verbose=FLAGS.verbose,
+                        out_stream=out_file,
+                    ),
+                ),
+            ],
+            rng,
+        )
+
+        rare_idx = 0
+        common_idx = 0
+        while not STOP_STRESS_THREAD:
+            scenario = ss.get_scenario()
+            # FIXME generating 'is_rare' for now as some scenario uses it to select
+            # client context, but we may not need this if we roll forward the sequence id
+            if rng.rand() < 0.1:
+                client_idx = common_cnt + rare_idx
+                rare_idx = (rare_idx + 1) % rare_cnt
+            else:
+                client_idx = common_idx
+                common_idx = (common_idx + 1) % common_cnt
+
+            try:
+                res = scenario.run(client_metadata_list[client_idx])
+                if res is not None:
+                    update_counter_fn(scenario.scenario_name(), count=res)
+            except Exception as ex:
+                update_counter_fn(scenario.scenario_name(), False)
+                _thread_exceptions_mutex.acquire()
+                try:
+                    _thread_exceptions.append(
+                        (name, scenario.scenario_name(), traceback.format_exc())
+                    )
+                finally:
+                    _thread_exceptions_mutex.release()
+
+        # We need to explicitly close each client so that streams get
+        # cleaned up and closed correctly, otherwise the application
+        # can hang when exiting.
+        for c, i in client_metadata_list:
+            print("thread {} closing client {}".format(name, i), file=out_file)
+            c.close()
+
+        print("Exiting thread {}".format(name), file=out_file)
+
+
+def load_thread(
+    name,
+    seed,
+    correlation_id_base,
+    test_case_count,
+    failed_test_case_count,
+    sequence_request_count,
+):
+    # Thread responsible for generating sequences of inference
+    # requests.
+    global _thread_exceptions
+
+    # Write any thread output to dedicated file
+    with open("{}.log".format(name), "w") as out_file:
+        print("Starting thread {} with seed {}".format(name, seed), file=out_file)
+        rng = np.random.RandomState(seed)
+
+        update_counter_fn = partial(
+            update_test_count,
+            test_case_count,
+            failed_test_case_count,
+            sequence_request_count,
+        )
+        pa_start_seq_id = correlation_id_base
+        pa_end_seq_id = correlation_id_base + CORRELATION_ID_BLOCK_SIZE
+
+        # Create PerfAnalyzerScenario with no additional trial,
+        # the default model 'resnet', more compute intense than the simple
+        # models, will be the only choice for generating load
+        ss = ScenarioSelector(
+            [
+                (
+                    1,
+                    PerfAnalyzerScenario(
+                        name,
+                        rng,
+                        [],
+                        [],
+                        sequence_id_range=(pa_start_seq_id, pa_end_seq_id),
+                        verbose=FLAGS.verbose,
+                        out_stream=out_file,
+                    ),
+                ),
+            ],
+            rng,
+        )
+
+        while not STOP_STRESS_THREAD:
+            scenario = ss.get_scenario()
+            try:
+                res = scenario.run(None)
+                if res is not None:
+                    update_counter_fn(scenario.scenario_name(), count=res)
+            except Exception as ex:
+                update_counter_fn(scenario.scenario_name(), False)
+                _thread_exceptions_mutex.acquire()
+                try:
+                    _thread_exceptions.append(
+                        (name, scenario.scenario_name(), traceback.format_exc())
+                    )
+                finally:
+                    _thread_exceptions_mutex.release()
+
+        print("Exiting thread {}".format(name), file=out_file)
+
+
+def format_content(content, max_line_length):
+    # Accumulated line length
+    ACC_length = 0
+    words = content.split(" ")
+    formatted_content = ""
+
+    for word in words:
+        if (ACC_length + (len(word) + 1)) <= max_line_length:
+            # Append the word and a space
+            formatted_content = formatted_content + word + " "
+            ACC_length = ACC_length + len(word) + 1
+        else:
+            # Append a line break, then the word and a space
+            formatted_content = formatted_content + "\n" + word + " "
+            # Reset the counter of length
+            ACC_length = len(word) + 1
+    return formatted_content
+
+
+def accumulate_count(dict_list, test_case_name):
+    count = 0
+    for d in dict_list:
+        if test_case_name in d:
+            count += d[test_case_name]
+
+    return count
+
+
+def generate_report(
+    elapsed_time, _test_case_count, _failed_test_case_count, _sequence_request_count
+):
+    hrs = elapsed_time // 3600
+    mins = (elapsed_time / 60) % 60
+    secs = elapsed_time % 60
+
+    test_case_description = {
+        "SequenceValidScenario": 'Send a sequence with "start" and "end" flags.',
+        "SequenceValidValidScenario": "Send two sequences back to back using the same correlation ID"
+        ' with "start" and "end" flags.',
+        "SequenceValidNoEndScenario": "Send two sequences back to back using the same correlation ID."
+        ' The first with "start" and "end" flags, and the second with no'
+        ' "end" flag.',
+        "SequenceNoStartScenario": 'Send a sequence without a "start" flag. Sequence should get an'
+        " error from the server.",
+        "SequenceNoEndScenario": 'Send a sequence with "start" flag but that never ends. The'
+        " sequence should be aborted by the server and its slot reused"
+        " for another sequence.",
+        "TimeoutScenario": "Expect an exception for small timeout values.",
+        "ResNetScenario": "Send a request using resnet model.",
+        "CrashingScenario": "Client crashes in the middle of inferences.",
+        "PerfAnalyzerScenario": "Client that maintains a specific load.",
+    }
+
+    f = open("stress_report.txt", "w")
+    f.write(
+        "Test Duration: {:0>2}:{:0>2}:{:0>2} (HH:MM:SS)\n".format(
+            int(hrs), int(mins), int(secs)
+        )
+    )
+
+    t = prettytable.PrettyTable(hrules=prettytable.ALL)
+    t.field_names = [
+        "Test Case",
+        "Number of Failures",
+        "Test Count",
+        "Request Count",
+        "Test Case Description",
+    ]
+
+    t.align["Test Case"] = "l"
+    t.align["Number of Failures"] = "l"
+    t.align["Test Count"] = "l"
+    t.align["Request Count"] = "l"
+    t.align["Test Case Description"] = "l"
+
+    acc_test_case_count = {}
+    acc_failed_test_case_count = {}
+    acc_sequence_request_count = {}
+
+    for c in test_case_description:
+        # Accumulate all the individual thread counts
+        acc_test_case_count[c] = accumulate_count(_test_case_count, c)
+        acc_failed_test_case_count[c] = accumulate_count(_failed_test_case_count, c)
+        acc_sequence_request_count[c] = accumulate_count(_sequence_request_count, c)
+
+        description = test_case_description[c]
+        # Add additional description on scenarios that allow failure
+        if c in ALLOW_FAILURE_SCENARIO:
+            description += (
+                " Note that this scenario is marked to allow "
+                "failure due to subtle edge cases that will be "
+                "investigated in the future. However, only a "
+                "minimal failure count is expected and we should "
+                "take action if the number is concerning."
+            )
+        t.add_row(
+            [
+                c,
+                acc_failed_test_case_count[c] if c in acc_failed_test_case_count else 0,
+                acc_test_case_count[c] if c in acc_test_case_count else 0,
+                acc_sequence_request_count[c] if c in acc_sequence_request_count else 0,
+                format_content(description, 50),
+            ]
+        )
+
+    t.add_row(
+        [
+            "TOTAL",
+            sum(acc_failed_test_case_count.values()),
+            sum(acc_test_case_count.values()),
+            sum(acc_sequence_request_count.values()),
+            "X",
+        ]
+    )
+
+    print(t)
+    f.write(str(t))
+
+    f.close()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        required=False,
+        default=False,
+        help="Enable verbose output",
+    )
+    parser.add_argument(
+        "-r", "--random-seed", type=int, required=False, help="Random seed."
+    )
+    parser.add_argument(
+        "-t",
+        "--concurrency",
+        type=int,
+        required=False,
+        default=8,
+        help="Request concurrency. Default is 8.",
+    )
+    parser.add_argument(
+        "--load-thread",
+        type=int,
+        required=False,
+        default=0,
+        help="Number of dedicated threads that keep compute "
+        "device (i.e. GPU/CPUs) under load. The load generated "
+        'from "--concurrency" often behaves as request spike, '
+        " this argument may be used to produce consistent load "
+        " to keep devices at high utilization. Default is 0, "
+        "which means no dedicated load thread will be created.",
+    )
+    parser.add_argument(
+        "-d",
+        "--test-duration",
+        type=int,
+        required=False,
+        default=25000,
+        help="Duration of stress test to run. Default is 25000 seconds "
+        + "(approximately 7 hours).",
+    )
+    FLAGS = parser.parse_args()
+
+    # Initialize the random seed. For reproducibility each thread
+    # maintains its own RNG which is initialized based on this seed.
+    randseed = 0
+    if FLAGS.random_seed != None:
+        randseed = FLAGS.random_seed
+    else:
+        randseed = int(time.time())
+    np.random.seed(randseed)
+
+    print("random seed = {}".format(randseed))
+    print("concurrency = {}".format(FLAGS.concurrency))
+    print("test duration = {}".format(FLAGS.test_duration))
+
+    # Create hashes for each thread for generating report
+    _test_case_count = [dict() for _ in range(FLAGS.concurrency + FLAGS.load_thread)]
+    _failed_test_case_count = [
+        dict() for _ in range(FLAGS.concurrency + FLAGS.load_thread)
+    ]
+    _sequence_request_count = [
+        dict() for _ in range(FLAGS.concurrency + FLAGS.load_thread)
+    ]
+
+    threads = []
+
+    for idx in range(FLAGS.concurrency):
+        thread_name = "thread_{}".format(idx)
+
+        # Create the seed for the thread. Since these are created in
+        # reproducible order off of the initial seed we will get
+        # reproducible results when given the same seed.
+        seed = np.random.randint(2**32)
+
+        # Each thread is reserved a block of correlation IDs or size
+        # CORRELATION_ID_BLOCK_SIZE
+        correlation_id_base = 1 + (idx * CORRELATION_ID_BLOCK_SIZE)
+
+        threads.append(
+            threading.Thread(
+                target=stress_thread,
+                args=(
+                    thread_name,
+                    seed,
+                    correlation_id_base,
+                    _test_case_count[idx],
+                    _failed_test_case_count[idx],
+                    _sequence_request_count[idx],
+                ),
+            )
+        )
+
+    for idx in range(FLAGS.load_thread):
+        thread_name = "load_thread_{}".format(idx)
+
+        # Create the seed for the thread. Since these are created in
+        # reproducible order off of the initial seed we will get
+        # reproducible results when given the same seed.
+        seed = np.random.randint(2**32)
+
+        # Each thread is reserved a block of correlation IDs or size
+        # CORRELATION_ID_BLOCK_SIZE
+        correlation_id_base = 1 + (
+            (FLAGS.concurrency + idx) * CORRELATION_ID_BLOCK_SIZE
+        )
+
+        threads.append(
+            threading.Thread(
+                target=load_thread,
+                args=(
+                    thread_name,
+                    seed,
+                    correlation_id_base,
+                    _test_case_count[idx],
+                    _failed_test_case_count[idx],
+                    _sequence_request_count[idx],
+                ),
+            )
+        )
+
+    exit_code = 0
+
+    start_time = time.time()
+    for t in threads:
+        t.start()
+
+    while (time.time() - start_time) < FLAGS.test_duration:
+        time.sleep(1)
+        for t in threads:
+            # Stop the test early if there is early termination of a thread.
+            if not t.is_alive():
+                exit_code = 1
+                break
+        if exit_code != 0:
+            break
+
+    STOP_STRESS_THREAD = True
+    for t in threads:
+        # Given long timeout to determine if a thread hangs
+        t.join(timeout=300)
+        # join() returns due to timeout
+        if t.is_alive() and (exit_code == 0):
+            exit_code = 1
+
+    generate_report(
+        time.time() - start_time,
+        _test_case_count,
+        _failed_test_case_count,
+        _sequence_request_count,
+    )
+
+    _thread_exceptions_mutex.acquire()
+    try:
+        if len(_thread_exceptions) > 0:
+            for thread, scenario, ex in _thread_exceptions:
+                print("*********\n* {} {}\n{}*********\n".format(thread, scenario, ex))
+                if scenario not in ALLOW_FAILURE_SCENARIO:
+                    exit_code = 1
+    finally:
+        _thread_exceptions_mutex.release()
+
+    print(
+        "Exiting stress test. In the case of failure, please refer to the thread log files for detail"
+    )
+    sys.exit(exit_code)
diff --git a/qa/L0_long_running_stress/stress_mail.py b/qa/L0_long_running_stress/stress_mail.py
new file mode 100755
index 0000000000..36f347c2ac
--- /dev/null
+++ b/qa/L0_long_running_stress/stress_mail.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python
+# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import os
+from datetime import date
+
+import nightly_email_helper
+
+CI_JOB_ID = os.environ.get("CI_JOB_ID", "")
+
+if __name__ == "__main__":
+    today = date.today().strftime("%Y-%m-%d")
+    subject = (
+        "Triton Long-Running Stress Test "
+        + ((sys.argv[1] + " ") if len(sys.argv) >= 2 else "")
+        + "Summary: "
+        + today
+    )
+    stress_report = "stress_report.txt"
+    link = "https://gitlab-master.nvidia.com/dl/dgx/tritonserver/-/jobs/" + CI_JOB_ID
+    write_up = "<p>The table below includes results from long-running stress test. Please refer to the description of each test case to see what different kinds of inference requests were sent. Request concurrency is set to 8.</p>"
+    write_up += (
+        "<p>Please check the CI output webpage for the details of the failures: "
+        + link
+        + "</p>"
+    )
+    html_content = (
+        '<html><head></head><body><pre style="font-size:11pt;font-family:Arial, sans-serif;">'
+        + write_up
+        + '</pre><pre style="font-size:11pt;font-family:Consolas;">'
+    )
+    with open(stress_report, "r") as f:
+        html_content += f.read() + "\n"
+    html_content += "</pre></body></html>"
+    nightly_email_helper.send(subject, html_content, is_html=True)
diff --git a/qa/L0_long_running_stress/test.sh b/qa/L0_long_running_stress/test.sh
new file mode 100755
index 0000000000..b98a89f955
--- /dev/null
+++ b/qa/L0_long_running_stress/test.sh
@@ -0,0 +1,173 @@
+#!/bin/bash
+# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+CLIENT_LOG="./client.log"
+STRESS_TEST=stress.py
+
+DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"}
+SERVER=/opt/tritonserver/bin/tritonserver
+source ../common/util.sh
+
+# If the test should be run in long and high load setting
+if [ "$TRITON_PERF_LONG" == 1 ]; then
+    # ~ 6.5 days
+    TEST_DURATION=480000
+    LOAD_THREAD_COUNT=2
+    EMAIL_SUBJECT="Long"
+else
+    # ~ 7 hours
+    TEST_DURATION=25000
+    LOAD_THREAD_COUNT=0
+    EMAIL_SUBJECT=""
+fi
+
+RET=0
+
+# If BACKENDS not specified, set to all
+BACKENDS=${BACKENDS:="graphdef savedmodel onnx libtorch"}
+export BACKENDS
+
+export CI_JOB_ID=${CI_JOB_ID}
+
+MODEL_DIR=models
+
+rm -fr *.log *.txt  models validation_data csv_dir && mkdir models validation_data csv_dir
+
+# Get the datatype to use based on the backend
+function get_datatype () {
+  local dtype='int32'
+  if [[ $1 == "plan" ]] || [[ $1 == "savedmodel" ]]; then
+      dtype='float32'
+  elif [[ $1 == "graphdef" ]]; then
+      dtype='object'
+  fi
+  echo $dtype
+}
+
+# Setup model repository - two instances with batch-size 2
+MODELS=""
+for BACKEND in $BACKENDS; do
+  DTYPE=$(get_datatype $BACKEND)
+  MODELS="$MODELS $DATADIR/qa_sequence_model_repository/${BACKEND}_sequence_${DTYPE}"
+done
+
+for MODEL in $MODELS; do
+    cp -r $MODEL $MODEL_DIR/. && \
+      (cd $MODEL_DIR/$(basename $MODEL) && \
+        sed -i "s/^max_batch_size:.*/max_batch_size: 2/" config.pbtxt && \
+        sed -i "s/max_sequence_idle_microseconds:.*/max_sequence_idle_microseconds: 7000000/" config.pbtxt && \
+        sed -i "s/kind: KIND_GPU/kind: KIND_GPU\\ncount: 2/" config.pbtxt && \
+        sed -i "s/kind: KIND_CPU/kind: KIND_CPU\\ncount: 2/" config.pbtxt)
+done
+
+MODELS=""
+for BACKEND in $BACKENDS; do
+    DTYPE=$(get_datatype $BACKEND)
+    MODELS="$MODELS $DATADIR/qa_sequence_model_repository/${BACKEND}_nobatch_sequence_${DTYPE}"
+done
+
+for MODEL in $MODELS; do
+    cp -r $MODEL $MODEL_DIR/. && \
+      (cd $MODEL_DIR/$(basename $MODEL) && \
+        sed -i "s/max_sequence_idle_microseconds:.*/max_sequence_idle_microseconds: 7000000/" config.pbtxt && \
+        sed -i "s/kind: KIND_GPU/kind: KIND_GPU\\ncount: 2/" config.pbtxt && \
+        sed -i "s/kind: KIND_CPU/kind: KIND_CPU\\ncount: 2/" config.pbtxt)
+done
+
+MODELS=""
+for BACKEND in $BACKENDS; do
+    MODELS="$MODELS $DATADIR/qa_identity_model_repository/${BACKEND}_nobatch_zero_1_float32"
+done
+
+for MODEL in $MODELS; do
+    cp -r $MODEL $MODEL_DIR/. && \
+      (cd $MODEL_DIR/$(basename $MODEL) && \
+        echo "parameters [" >> config.pbtxt && \
+        echo "{ key: \"execute_delay_ms\"; value: { string_value: \"1000\" }}" >> config.pbtxt && \
+        echo "]" >> config.pbtxt)
+done
+cp -r ../custom_models/custom_zero_1_float32 $MODEL_DIR/custom_zero_1_float32 && \
+  mkdir $MODEL_DIR/custom_zero_1_float32/1 && \
+  (cd $MODEL_DIR/custom_zero_1_float32 && \
+    echo "parameters [" >> config.pbtxt && \
+        echo "{ key: \"execute_delay_ms\"; value: { string_value: \"10000\" }}" >> config.pbtxt && \
+        echo "]" >> config.pbtxt)
+
+cp -r $DATADIR/tf_model_store/resnet_v1_50_graphdef $MODEL_DIR/resnet_v1_50_graphdef_def && \
+  (cd $MODEL_DIR/resnet_v1_50_graphdef_def && \
+    sed -i 's/^name: "resnet_v1_50_graphdef"/name: "resnet_v1_50_graphdef_def"/' config.pbtxt && \
+    echo "optimization { }" >> config.pbtxt)
+
+SERVER_ARGS="--model-repository=`pwd`/$MODEL_DIR"
+SERVER_LOG="./server.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python $STRESS_TEST -d ${TEST_DURATION} --load-thread ${LOAD_THREAD_COUNT} >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+# Run only if both TRITON_FROM and TRITON_TO_DL are set
+if [[ ! -z "$TRITON_FROM" ]] && [[ ! -z "$TRITON_TO_DL" ]]; then
+    python stress_mail.py "$EMAIL_SUBJECT"
+fi
+
+exit $RET
diff --git a/qa/L0_memory/test.sh b/qa/L0_memory/test.sh
new file mode 100755
index 0000000000..e7c08d9453
--- /dev/null
+++ b/qa/L0_memory/test.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+TEST_LOG="./memory_test.log"
+MEMORY_TEST=./memory_test
+PINNED_MEMORY_MANAGER_TEST=./pinned_memory_manager_test
+
+RET=0
+
+# Must run on multiple devices
+export CUDA_VISIBLE_DEVICES=0,1
+
+rm -f TEST_LOG
+
+set +e
+$MEMORY_TEST >>$TEST_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Memory Test Failed\n***"
+    RET=1
+fi
+set -e
+
+set +e
+$PINNED_MEMORY_MANAGER_TEST >>$TEST_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Pinned Memory Manager Test Failed\n***"
+    RET=1
+fi
+set -e
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $TEST_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_memory_growth/busy_op_test.py b/qa/L0_memory_growth/busy_op_test.py
new file mode 100755
index 0000000000..b7916090fa
--- /dev/null
+++ b/qa/L0_memory_growth/busy_op_test.py
@@ -0,0 +1,99 @@
+#!/usr/bin/python
+
+# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+from builtins import range
+
+import numpy as np
+import tritonclient.http as httpclient
+from tritonclient.utils import np_to_triton_dtype
+
+FLAGS = None
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        required=False,
+        default=False,
+        help="Enable verbose output",
+    )
+    parser.add_argument(
+        "-u",
+        "--url",
+        type=str,
+        required=False,
+        default="localhost:8000",
+        help="Inference server URL. Default is localhost:8000.",
+    )
+    parser.add_argument("-m", "--model", type=str, required=True, help="Name of model.")
+    parser.add_argument(
+        "-n",
+        "--num-requests",
+        type=int,
+        required=True,
+        help="Number of asynchronous requests to launch.",
+    )
+
+    FLAGS = parser.parse_args()
+
+    # Run the busyop model which takes a delay as input.
+    model_name = FLAGS.model
+
+    # Create the inference context for the model. Need to set the concurrency
+    # based on the number of requests so that the delivery of the async
+    # requests is not blocked.
+    # See the comment for more details: https://github.com/triton-inference-server/client/blob/r24.02/src/python/library/tritonclient/http/_client.py#L1501
+    client = httpclient.InferenceServerClient(
+        FLAGS.url, verbose=FLAGS.verbose, concurrency=FLAGS.num_requests
+    )
+
+    # Collect async requests here
+    requests = []
+
+    # Create the data for the input tensor. Creating tensor size with 5 MB.
+    tensor_size = [1, 5 * 1024 * 1024]
+    input_data = np.random.randn(*tensor_size).astype(np.float32)
+
+    inputs = [
+        httpclient.InferInput(
+            "INPUT0", input_data.shape, np_to_triton_dtype(input_data.dtype)
+        )
+    ]
+    inputs[0].set_data_from_numpy(input_data)
+
+    # Send requests
+    for i in range(FLAGS.num_requests):
+        requests.append(client.async_infer(model_name, inputs))
+        print("Sent request %d" % i, flush=True)
+    # wait for requests to finish
+    for i in range(len(requests)):
+        requests[i].get_result()
+        print("Received result %d" % i, flush=True)
diff --git a/qa/L0_memory_growth/server_memory_mail.py b/qa/L0_memory_growth/server_memory_mail.py
new file mode 100755
index 0000000000..d1307d97a6
--- /dev/null
+++ b/qa/L0_memory_growth/server_memory_mail.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python
+# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import glob
+from datetime import date
+
+import nightly_email_helper
+
+if __name__ == "__main__":
+    today = date.today().strftime("%Y-%m-%d")
+    subject = "Triton Server Memory Growth " + sys.argv[1] + " Summary: " + today
+    memory_graphs_resnet = glob.glob("memory_growth_resnet*.log")
+    memory_graphs_busyop = glob.glob("memory_growth_busyop.log")
+    write_up = "<p>This test uses perf_analyzer as clients running on 4 different models. The max allowed difference between mean and maximum memory usage is set to 150MB.</p>"
+    write_up += "<p><b>&#8226 What to look for</b><br>A linear memory growth in the beginning of the graph is acceptable only when it is followed by a flat memory usage. If a linear memory growth is observed during the entire test then there is possibly a memory leak.</p>"
+    html_content = (
+        '<html><head></head><body><pre style="font-size:11pt;font-family:Arial, sans-serif;">'
+        + write_up
+        + '</pre><pre style="font-size:11pt;font-family:Consolas;">'
+    )
+    for mem_graph in sorted(memory_graphs_resnet):
+        html_content += "\n" + mem_graph + "\n"
+        with open(mem_graph, "r") as f:
+            html_content += f.read() + "\n"
+
+    html_content += "<p>The busyop test is by design to show that actual memory growth is correctly detected and displayed.</p>"
+
+    # When we see PTX failures in CI, the busyop memory graph is not created.
+    if len(memory_graphs_busyop):
+        write_up = "<p><b>&#8226 What to look for</b><br>The memory usage should increase continually over time, and a linear growth should be observed in the graph below.</p>"
+        html_content += (
+            '</pre><pre style="font-size:11pt;font-family:Arial, sans-serif;">'
+            + write_up
+            + '</pre><pre style="font-size:11pt;font-family:Consolas;">'
+        )
+        for mem_graph in sorted(memory_graphs_busyop):
+            html_content += "\n" + mem_graph + "\n"
+            with open(mem_graph, "r") as f:
+                html_content += f.read() + "\n"
+    else:
+        html_content += (
+            "<p>The busyop model caused PTX failures when running the CI.</p>"
+        )
+    html_content += "</pre></body></html>"
+    nightly_email_helper.send(subject, html_content, is_html=True)
diff --git a/qa/L0_memory_growth/test.sh b/qa/L0_memory_growth/test.sh
new file mode 100755
index 0000000000..25f670f532
--- /dev/null
+++ b/qa/L0_memory_growth/test.sh
@@ -0,0 +1,306 @@
+#!/bin/bash
+# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+# Single GPU
+export CUDA_VISIBLE_DEVICES=0
+
+# Clients
+PERF_ANALYZER=../clients/perf_analyzer
+IMAGE=../images/vulture.jpeg
+
+# Models
+CAFFE2PLAN=../common/caffe2plan
+DATADIR=/data/inferenceserver/${REPO_VERSION}
+
+# Server
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_TIMEOUT=1200
+
+# Valgrind massif
+LEAKCHECK=/usr/bin/valgrind
+LEAKCHECK_ARGS_BASE="--tool=massif --time-unit=B"
+MASSIF_TEST=../common/check_massif_log.py
+
+source ../common/util.sh
+
+# Function that checks the massif logs
+function check_massif_log () {
+    local massif_out=$1
+}
+
+rm -rf *.log models/ *.massif
+
+# Test parameters
+STATIC_BATCH=128
+INSTANCE_CNT=2
+CONCURRENCY=32
+CLIENT_BS=8
+
+# Set the number of repetitions in nightly and weekly tests
+# Set the email subject for nightly and weekly tests
+if [ "$TRITON_PERF_WEEKLY" == 1 ]; then
+    if [ "$TRITON_PERF_LONG" == 1 ]; then
+        # ~ 2.5 days for system under test
+        REPETITION=1400
+        EMAIL_SUBJECT="Weekly Long"
+    else
+        # Run the test for each model approximately 1.5 hours
+        # All tests are run cumulatively for 7 hours
+        REPETITION=200
+        EMAIL_SUBJECT="Weekly"
+    fi
+else
+    REPETITION=3
+    EMAIL_SUBJECT="Nightly"
+fi
+
+# Threshold memory growth in MB
+# NOTES:
+# - Bounded memory growth tests typically show < 70 MB usage
+#   - Plan/ONNX is typically between 20-40 MB
+#   - Savedmodel is closer to 50-70 MB
+# - Unbounded memory growth test typically shows > 100 MB usage
+export MAX_ALLOWED_ALLOC="100"
+
+# Create local model repository
+mkdir -p models/
+cp -r $DATADIR/perf_model_store/resnet50* models/
+
+# Copy and prepare trt model
+cp -r $DATADIR/caffe_models/trt_model_store/resnet50_plan models/resnet50_fp16_plan
+mkdir -p models/resnet50_fp16_plan/1
+sed -i "s/^name:.*/name: \"resnet50_fp16_plan\"/" models/resnet50_fp16_plan/config.pbtxt
+
+set +e
+
+# Create the PLAN
+$CAFFE2PLAN -h -b ${STATIC_BATCH} \
+    -n prob -o models/resnet50_fp16_plan/1/model.plan \
+    $DATADIR/caffe_models/resnet50.prototxt $DATADIR/caffe_models/resnet50.caffemodel
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed to generate resnet50 PLAN\n***"
+    exit 1
+fi
+
+set -e
+
+RET=0
+
+for MODEL in $(ls models); do
+    # Skip the resnet50_fp32_libtorch model as it is running into `misaligned address'
+    # Tracked here: https://nvbugs/3954104
+    if [ "$MODEL" == "resnet50_fp32_libtorch" ]; then
+        continue
+    fi
+
+    # Create temporary model repository and copy only the model being tested
+    rm -rf test_repo && mkdir test_repo
+    cp -r models/$MODEL test_repo/
+
+    # Set server, client and valgrind arguments
+    SERVER_ARGS="--model-repository=`pwd`/test_repo"
+    LEAKCHECK_LOG="test_${MODEL}.valgrind.log"
+    MASSIF_LOG="test_${MODEL}.massif"
+    GRAPH_LOG="memory_growth_${MODEL}.log"
+    LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --massif-out-file=$MASSIF_LOG --max-threads=3000 --log-file=$LEAKCHECK_LOG"
+    SERVER_LOG="test_$MODEL.server.log"
+    CLIENT_LOG="test_$MODEL.client.log"
+
+    # Enable dynamic batching, set max batch size and instance count
+    if [ "$MODEL" == "resnet50_fp32_libtorch" ]; then
+        sed -i "s/^max_batch_size:.*/max_batch_size: 32/" test_repo/$MODEL/config.pbtxt
+    else
+        sed -i "s/^max_batch_size:.*/max_batch_size: ${STATIC_BATCH}/" test_repo/$MODEL/config.pbtxt
+    fi
+    echo "dynamic_batching {}" >> test_repo/$MODEL/config.pbtxt
+    echo "instance_group [{ count: ${INSTANCE_CNT} }]" >> test_repo/$MODEL/config.pbtxt
+
+    # Run the server
+    run_server_leakcheck
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    set +e
+
+    TEMP_CLIENT_LOG=temp_client.log
+    TEMP_RET=0
+
+    SECONDS=0
+    # Run the perf analyzer 'REPETITION' times
+    for ((i=1; i<=$REPETITION; i++)); do
+        # [TMA-621] Use --no-stability mode in perf analyzer when available
+        $PERF_ANALYZER -v -m $MODEL -i grpc --concurrency-range $CONCURRENCY -b $CLIENT_BS > $TEMP_CLIENT_LOG 2>&1
+        PA_RET=$?
+        # Success
+        if [ ${PA_RET} -eq 0 ]; then
+          continue
+        # Unstable measurement: OK for this test
+        elif [ ${PA_RET} -eq 2 ]; then
+          continue
+        # Other failures unexpected, report error
+        else
+            cat $TEMP_CLIENT_LOG >> $CLIENT_LOG
+            echo -e "\n***\n*** perf_analyzer for $MODEL failed on iteration $i\n***" >> $CLIENT_LOG
+            RET=1
+        fi
+    done
+    TEST_DURATION=$SECONDS
+
+    set -e
+
+    # Stop Server
+    kill $SERVER_PID
+    wait $SERVER_PID
+
+    set +e
+
+    # Log test duration and the graph for memory growth
+    hrs=$(printf "%02d" $((TEST_DURATION / 3600)))
+    mins=$(printf "%02d" $(((TEST_DURATION / 60) % 60)))
+    secs=$(printf "%02d" $((TEST_DURATION % 60)))
+    echo -e "Test Duration: $hrs:$mins:$secs (HH:MM:SS)" >> ${GRAPH_LOG}
+    ms_print ${MASSIF_LOG} | head -n35 >> ${GRAPH_LOG}
+    cat ${GRAPH_LOG}
+    # Check the massif output
+    python $MASSIF_TEST $MASSIF_LOG $MAX_ALLOWED_ALLOC --start-from-middle >> $CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test for $MODEL Failed.\n***"
+        RET=1
+    fi
+    # Always output memory usage for easier triage of MAX_ALLOWED_ALLOC settings in the future
+    grep -i "Change in memory allocation" "${CLIENT_LOG}" || true
+    set -e
+done
+
+# Next perform a test that has unbound memory growth. Use the busy op Python model
+# with a sleep function in order to force requests to sit in the queue, and result
+# in memory growth.
+BUSY_OP_TEST=busy_op_test.py
+NUM_REQUESTS=100
+
+rm -rf test_repo && mkdir test_repo
+mkdir -p test_repo/busy_op/1/
+cp ../python_models/busy_op/model.py test_repo/busy_op/1/
+cp ../python_models/busy_op/config.pbtxt test_repo/busy_op
+
+SERVER_ARGS="--model-repository=`pwd`/test_repo"
+
+LEAKCHECK_LOG="test_busyop.valgrind.log"
+MASSIF_LOG="test_busyop.massif"
+GRAPH_LOG="memory_growth_busyop.log"
+LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --massif-out-file=$MASSIF_LOG --max-threads=3000 --log-file=$LEAKCHECK_LOG"
+SERVER_LOG="test_busyop.server.log"
+CLIENT_LOG="test_busyop.client.log"
+SKIP_BUSYOP=0
+
+# Run server
+run_server_leakcheck
+if [ "$SERVER_PID" == "0" ]; then
+    cat $SERVER_LOG
+    if [ `grep -c "provided PTX was compiled" $SERVER_LOG` != "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER due to PTX issue\n***"
+        SKIP_BUSYOP=1
+    else
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        exit 1
+    fi
+fi
+
+set +e
+
+# Run the busy_op test if no PTX issue was observed when launching server
+if [ $SKIP_BUSYOP -ne 1 ]; then
+    SECONDS=0
+    python $BUSY_OP_TEST -v -m busy_op -n $NUM_REQUESTS > $CLIENT_LOG 2>&1
+    TEST_RETCODE=$?
+    TEST_DURATION=$SECONDS
+    if [ ${TEST_RETCODE} -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** busy_op_test.py Failed\n***"
+        RET=1
+    fi
+    set -e
+
+    # Stop Server
+    kill $SERVER_PID
+    wait $SERVER_PID
+
+    set +e
+
+    # Log test duration and the graph for memory growth
+    hrs=$(printf "%02d" $((TEST_DURATION / 3600)))
+    mins=$(printf "%02d" $(((TEST_DURATION / 60) % 60)))
+    secs=$(printf "%02d" $((TEST_DURATION % 60)))
+    echo -e "Test Duration: $hrs:$mins:$secs (HH:MM:SS)" >> ${GRAPH_LOG}
+    ms_print ${MASSIF_LOG} | head -n35 >> ${GRAPH_LOG}
+    cat ${GRAPH_LOG}
+    # Check the massif output
+    python $MASSIF_TEST $MASSIF_LOG $MAX_ALLOWED_ALLOC --start-from-middle >> $CLIENT_LOG 2>&1
+    # This busyop test is expected to return a non-zero error since it is
+    # intentionally testing unbounded growth. If it returns success for some
+    # reason, raise error.
+    if [ $? -ne 1 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Massif test for graphdef_busyop Failed\n***"
+        echo -e "\n***\n*** Expected unbounded growth, but found acceptable growth within ${MAX_ALLOWED_ALLOC} MB\n***"
+        RET=1
+    fi
+    # Always output memory usage for easier triage of MAX_ALLOWED_ALLOC settings in the future
+    grep -i "Change in memory allocation" "${CLIENT_LOG}" || true
+fi
+
+set -e
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test Failed\n***"
+fi
+
+# Run only if both TRITON_FROM and TRITON_TO_DL are set
+if [[ ! -z "$TRITON_FROM" ]] && [[ ! -z "$TRITON_TO_DL" ]]; then
+    python server_memory_mail.py "$EMAIL_SUBJECT"
+fi
+
+exit $RET
diff --git a/qa/L0_metrics/ensemble_delay/config.pbtxt b/qa/L0_metrics/ensemble_delay/config.pbtxt
new file mode 100644
index 0000000000..0eaa2f76f7
--- /dev/null
+++ b/qa/L0_metrics/ensemble_delay/config.pbtxt
@@ -0,0 +1,67 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+platform: "ensemble"
+max_batch_size: 4
+
+input [
+  {
+    name: "ENSEMBLE_INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+
+output [
+  {
+    name: "ENSEMBLE_OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  },
+  {
+    name: "ENSEMBLE_OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+
+ensemble_scheduling
+{
+  step [
+    {
+      model_name: "dynamic_composing"
+      model_version: -1
+      input_map { key: "INPUT0", value: "ENSEMBLE_INPUT0" }
+      output_map { key: "OUTPUT0", value: "ENSEMBLE_OUTPUT0" }
+    },
+    {
+      model_name: "default_composing"
+      model_version: -1
+      input_map { key: "INPUT0", value: "ENSEMBLE_INPUT0" }
+      output_map { key: "OUTPUT0", value: "ENSEMBLE_OUTPUT1" }
+    }
+  ]
+}
diff --git a/qa/L0_metrics/identity_delay/config.pbtxt b/qa/L0_metrics/identity_delay/config.pbtxt
new file mode 100644
index 0000000000..1062868c2b
--- /dev/null
+++ b/qa/L0_metrics/identity_delay/config.pbtxt
@@ -0,0 +1,58 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+backend: "identity"
+max_batch_size: 4
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+
+instance_group [
+  {
+    count: 1
+    kind : KIND_CPU
+  }
+]
+
+parameters [
+  {
+    key: "execute_delay_ms"
+    value: { string_value: "2000" }
+  }
+]
diff --git a/qa/L0_metrics/metrics_config_test.py b/qa/L0_metrics/metrics_config_test.py
new file mode 100755
index 0000000000..9a5e93c24a
--- /dev/null
+++ b/qa/L0_metrics/metrics_config_test.py
@@ -0,0 +1,143 @@
+#!/usr/bin/python
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import sys
+
+sys.path.append("../common")
+
+import unittest
+
+import requests
+import test_util as tu
+
+INF_COUNTER_PATTERNS = [
+    "nv_inference_request_duration",
+    "nv_inference_queue_duration",
+    "nv_inference_compute_input_duration",
+    "nv_inference_compute_infer_duration",
+    "nv_inference_compute_output_duration",
+]
+INF_SUMMARY_PATTERNS = [
+    "nv_inference_request_summary",
+    "nv_inference_queue_summary",
+    "nv_inference_compute_input_summary",
+    "nv_inference_compute_infer_summary",
+    "nv_inference_compute_output_summary",
+]
+CACHE_COUNTER_PATTERNS = [
+    "nv_cache_num_hits_per_model",
+    "nv_cache_num_misses_per_model",
+    "nv_cache_hit_duration_per_model",
+    "nv_cache_miss_duration_per_model",
+]
+PINNED_MEMORY_PATTERNS = [
+    "nv_pinned_memory_pool_total_bytes",
+    "nv_pinned_memory_pool_used_bytes",
+]
+CACHE_SUMMARY_PATTERNS = ["nv_cache_hit_summary", "nv_cache_miss_summary"]
+
+
+class MetricsConfigTest(tu.TestResultCollector):
+    def _get_metrics(self):
+        metrics_url = "http://localhost:8002/metrics"
+        r = requests.get(metrics_url)
+        r.raise_for_status()
+        return r.text
+
+    def test_pinned_memory_metrics_exist(self):
+        metrics = self._get_metrics()
+        for metric in PINNED_MEMORY_PATTERNS:
+            self.assertIn(metric, metrics)
+
+    # Counters
+    def test_inf_counters_exist(self):
+        metrics = self._get_metrics()
+        for metric in INF_COUNTER_PATTERNS:
+            self.assertIn(metric, metrics)
+
+    def test_inf_counters_missing(self):
+        metrics = self._get_metrics()
+        for metric in INF_COUNTER_PATTERNS:
+            self.assertNotIn(metric, metrics)
+
+    def test_cache_counters_exist(self):
+        metrics = self._get_metrics()
+        for metric in CACHE_COUNTER_PATTERNS:
+            self.assertIn(metric, metrics)
+
+    def test_cache_counters_missing(self):
+        metrics = self._get_metrics()
+        for metric in CACHE_COUNTER_PATTERNS:
+            self.assertNotIn(metric, metrics)
+
+    # Summaries
+    def test_inf_summaries_exist(self):
+        metrics = self._get_metrics()
+        for metric in INF_SUMMARY_PATTERNS:
+            self.assertIn(metric, metrics)
+
+    def test_inf_summaries_missing(self):
+        metrics = self._get_metrics()
+        for metric in INF_SUMMARY_PATTERNS:
+            self.assertNotIn(metric, metrics)
+
+    def test_cache_summaries_exist(self):
+        metrics = self._get_metrics()
+        for metric in CACHE_SUMMARY_PATTERNS:
+            self.assertIn(metric, metrics)
+
+    def test_cache_summaries_missing(self):
+        metrics = self._get_metrics()
+        for metric in CACHE_SUMMARY_PATTERNS:
+            self.assertNotIn(metric, metrics)
+
+    def test_summaries_custom_quantiles(self):
+        metrics = self._get_metrics()
+        # This env var should be set by test.sh or caller
+        quantile_pairs = os.environ.get("SUMMARY_QUANTILES", None)
+        self.assertIsNotNone(quantile_pairs)
+
+        quantiles = [pair.split(":")[0] for pair in quantile_pairs.split(",")]
+        print(metrics)
+        for quantile in quantiles:
+            print(quantile)
+            self.assertIn(f'quantile="{quantile}"', metrics)
+
+    # DLIS-4762: Disable request summary when caching enabled for now
+    def test_inf_summaries_exist_with_cache(self):
+        metrics = self._get_metrics()
+        bad_patterns = ["nv_inference_request_summary"]
+        ok_patterns = list(set(INF_SUMMARY_PATTERNS) - set(bad_patterns))
+        for metric in ok_patterns:
+            self.assertIn(metric, metrics)
+        for metric in bad_patterns:
+            self.assertNotIn(metric, metrics)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_metrics/metrics_queue_size_test.py b/qa/L0_metrics/metrics_queue_size_test.py
new file mode 100755
index 0000000000..0554274109
--- /dev/null
+++ b/qa/L0_metrics/metrics_queue_size_test.py
@@ -0,0 +1,306 @@
+#!/usr/bin/python
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import math
+import time
+import unittest
+from functools import partial
+
+import numpy as np
+import requests
+import test_util as tu
+import tritonclient.http
+from tritonclient.utils import triton_to_np_dtype
+
+QUEUE_METRIC_TEMPLATE = (
+    'nv_inference_pending_request_count{{model="{model_name}",version="1"}}'
+)
+INFER_METRIC_TEMPLATE = 'nv_inference_count{{model="{model_name}",version="1"}}'
+EXEC_METRIC_TEMPLATE = 'nv_inference_exec_count{{model="{model_name}",version="1"}}'
+
+
+class MetricsPendingRequestCountTest(tu.TestResultCollector):
+    def setUp(self):
+        self.metrics = None
+        self.metrics_url = "http://localhost:8002/metrics"
+        self.server_url = "localhost:8000"
+
+        # Used to verify model config is set to expected values
+        self.max_batch_size = 4
+        self.delay_ms = 2000
+        self.delay_sec = self.delay_ms // 1000
+
+        # Setup dummy inputs
+        dtype = "FP32"
+        shape = (1, 1)
+        input_np = np.ones(shape, dtype=triton_to_np_dtype(dtype))
+        self.inputs = [
+            tritonclient.http.InferInput("INPUT0", shape, dtype).set_data_from_numpy(
+                input_np
+            )
+        ]
+        self.ensemble_inputs = [
+            tritonclient.http.InferInput(
+                "ENSEMBLE_INPUT0", shape, dtype
+            ).set_data_from_numpy(input_np)
+        ]
+
+        # Verify values for filling request queues
+        self.num_requests = 10
+        self.concurrency = 10
+        # Concurrency must be at least as high as number of async requests we intend
+        # to send N requests to fill request queues before blocking on any results.
+        self.assertGreaterEqual(self.concurrency, self.num_requests)
+        self.client = tritonclient.http.InferenceServerClient(
+            url=self.server_url, concurrency=self.concurrency
+        )
+
+        # Test specific configurations
+        self.max_queue_size = 0
+
+    def _validate_model_config(self, model_name, max_queue_size=0):
+        config = self.client.get_model_config(model_name)
+        print(config)
+        params = config.get("parameters", {})
+        delay_ms = int(params.get("execute_delay_ms", {}).get("string_value"))
+        max_batch_size = config.get("max_batch_size")
+        self.assertEqual(delay_ms, self.delay_ms)
+        self.assertEqual(max_batch_size, self.max_batch_size)
+
+        dynamic_batching = config.get("dynamic_batching", {})
+        default_queue_policy = dynamic_batching.get("default_queue_policy", {})
+        self.max_queue_size = default_queue_policy.get("max_queue_size", 0)
+
+        self.assertEqual(self.max_queue_size, max_queue_size)
+
+        return config
+
+    def _get_metrics(self):
+        r = requests.get(self.metrics_url)
+        r.raise_for_status()
+        return r.text
+
+    def _get_metric_line(self, metric, metrics):
+        for line in metrics.splitlines():
+            if metric in line:
+                return line
+        return None
+
+    def _get_metric_value(self, metric):
+        metrics = self._get_metrics()
+        self.assertIn(metric, metrics)
+        line = self._get_metric_line(metric, metrics)
+        print(line)
+        if not line:
+            return None
+        value = line.split()[1]
+        return float(value)
+
+    def _assert_metric_equals(self, metric, expected_value):
+        value = self._get_metric_value(metric)
+        self.assertEqual(value, expected_value)
+
+    def _assert_metric_greater_than(self, metric, gt_value):
+        value = self._get_metric_value(metric)
+        self.assertGreater(value, gt_value)
+
+    def _send_async_requests(self, model_name, inputs, futures):
+        for _ in range(self.num_requests):
+            futures.append(self.client.async_infer(model_name, inputs))
+
+    def _send_async_requests_sequence(self, num_seq_slots, model_name, inputs, futures):
+        started_seqs = {}
+        num_sent = 0
+        while num_sent < self.num_requests:
+            # Add requests to each sequence slot round-robin, seq_id must be > 0
+            # We don't care about finishing any sequences, just need to queue up
+            # requests for each sequence until num_requests is hit.
+            seq_id = (num_sent % num_seq_slots) + 1
+            # Toggle start flag to False after first request per sequence ID
+            start = True if seq_id not in started_seqs else False
+            started_seqs[seq_id] = True
+            futures.append(
+                self.client.async_infer(
+                    model_name,
+                    inputs,
+                    request_id=str(num_sent),
+                    sequence_id=seq_id,
+                    sequence_start=start,
+                )
+            )
+            num_sent += 1
+
+    def _test_helper(
+        self, model_name, batch_size, send_requests_func, max_queue_size=0
+    ):
+        self._validate_model_config(model_name, max_queue_size=max_queue_size)
+
+        queue_size = QUEUE_METRIC_TEMPLATE.format(model_name=model_name)
+        infer_count = INFER_METRIC_TEMPLATE.format(model_name=model_name)
+        exec_count = EXEC_METRIC_TEMPLATE.format(model_name=model_name)
+        # Metric should be zero before sending any requests
+        self._assert_metric_equals(queue_size, 0)
+        # Send N requests, letting scheduler delay queue fill up when applicable
+        futures = []
+        send_requests_func(model_name, self.inputs, futures)
+        # Give Triton a second to load all requests into queues
+        time.sleep(1)
+
+        # Start from (num_requests-batch_size) because 1 batch should be executing,
+        # and the rest of the requests should be queued.
+        # If max_queue_size is specified then the queued requests would be capped
+        # at max_queue_size.
+        if max_queue_size != 0:
+            self._assert_metric_equals(queue_size, max_queue_size)
+            starting_queue_size = max_queue_size
+        else:
+            starting_queue_size = self.num_requests - batch_size
+
+        for expected_queue_size in range(starting_queue_size, 0, -1 * batch_size):
+            self._assert_metric_equals(queue_size, expected_queue_size)
+            time.sleep(self.delay_sec)
+        # Queue should be empty now
+        self._assert_metric_equals(queue_size, 0)
+        # Let final batch finish
+        time.sleep(self.delay_sec)
+
+        # All requests should've been executed without any batching
+        expected_infer_count = starting_queue_size + batch_size
+        self._assert_metric_equals(infer_count, expected_infer_count)
+        expected_exec_count = math.ceil(expected_infer_count / batch_size)
+        self._assert_metric_equals(exec_count, expected_exec_count)
+
+        failed_count = 0
+        for future in futures:
+            try:
+                future.get_result()
+            except Exception as e:
+                failed_count = failed_count + 1
+
+        self.assertEqual(
+            failed_count, self.num_requests - batch_size - starting_queue_size
+        )
+
+    def test_default_scheduler(self):
+        model_name = "default"
+        # Default scheduler won't do any batching
+        batch_size = 1
+        self._test_helper(model_name, batch_size, self._send_async_requests)
+
+    def test_dynamic_batch_scheduler(self):
+        model_name = "dynamic"
+        # With sufficient queue delay set, we expect full batches to be executed
+        batch_size = self.max_batch_size
+        self._test_helper(model_name, batch_size, self._send_async_requests)
+
+    def test_fail_max_queue_size(self):
+        model_name = "max_queue_size"
+        # This test checks whether metrics are properly accounts for requests
+        # that fail to enqueue on the server. The test sets the max_queue_size
+        # and any additional requests beyond the specified queue size should fail
+        # instead of waiting for execution.
+        batch_size = self.max_batch_size
+        self._test_helper(
+            model_name, batch_size, self._send_async_requests, max_queue_size=4
+        )
+
+    def test_sequence_batch_scheduler_direct(self):
+        model_name = "sequence_direct"
+        # With sufficient queue delay and minimum_slot_utilization set, we
+        # expect full batches to be executed.
+        batch_size = self.max_batch_size
+        num_seq_slots = batch_size
+        send_requests_func = partial(self._send_async_requests_sequence, num_seq_slots)
+        self._test_helper(model_name, batch_size, send_requests_func)
+
+    def test_sequence_batch_scheduler_oldest(self):
+        model_name = "sequence_oldest"
+        # With sufficient queue delay set, we expect full batches to be executed
+        batch_size = self.max_batch_size
+        num_seq_slots = batch_size
+        send_requests_func = partial(self._send_async_requests_sequence, num_seq_slots)
+        self._test_helper(model_name, batch_size, send_requests_func)
+
+    def test_ensemble_scheduler(self):
+        ensemble_model_name = "ensemble"
+        composing_model_names = ["dynamic_composing", "default_composing"]
+        ensemble_queue_size = QUEUE_METRIC_TEMPLATE.format(
+            model_name=ensemble_model_name
+        )
+        composing_queue_sizes = [
+            QUEUE_METRIC_TEMPLATE.format(model_name=name)
+            for name in composing_model_names
+        ]
+        ensemble_infer_count = INFER_METRIC_TEMPLATE.format(
+            model_name=ensemble_model_name
+        )
+        composing_infer_counts = [
+            INFER_METRIC_TEMPLATE.format(model_name=name)
+            for name in composing_model_names
+        ]
+
+        # Metric should be zero before sending any requests
+        self._assert_metric_equals(ensemble_queue_size, 0)
+        for queue_size in composing_queue_sizes:
+            self._assert_metric_equals(queue_size, 0)
+        # Send some ensemble requests
+        futures = []
+        self._send_async_requests(ensemble_model_name, self.ensemble_inputs, futures)
+        # Give Triton time to pass some requests to composing models. This test
+        # is less comprehensive on checking exact queue values, and just verifies
+        # each composing queue gets filled and ensemble's queue is empty.
+        time.sleep(1)
+
+        # Top-level ensemble size should still be zero, as all pending requests should
+        # be scheduled and reflected in composing models, and not considered "pending" at ensemble level.
+        self._assert_metric_equals(ensemble_queue_size, 0)
+        # Composing models should be non-zero
+        for queue_size in composing_queue_sizes:
+            self._assert_metric_greater_than(queue_size, 0)
+
+        # Verify no inference exceptions were raised and let composing models
+        # finish their requests
+        for future in futures:
+            future.get_result()
+
+        # Check that all queues are empty after getting results
+        self._assert_metric_equals(ensemble_queue_size, 0)
+        for queue_size in composing_queue_sizes:
+            self._assert_metric_equals(queue_size, 0)
+
+        # Sanity check infer counts on ensemble and composing models
+        self._assert_metric_equals(ensemble_infer_count, self.num_requests)
+        for infer_count in composing_infer_counts:
+            self._assert_metric_equals(infer_count, self.num_requests)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_metrics/pinned_memory_metrics_test.py b/qa/L0_metrics/pinned_memory_metrics_test.py
new file mode 100755
index 0000000000..42f6bbe2f7
--- /dev/null
+++ b/qa/L0_metrics/pinned_memory_metrics_test.py
@@ -0,0 +1,176 @@
+#!/usr/bin/python
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import re
+import threading
+import time
+import unittest
+
+import numpy as np
+import requests
+import tritonclient.http as httpclient
+from tritonclient.utils import *
+
+# Triton server reserves 256 MB for pinned memory by default.
+DEFAULT_TOTAL_PINNED_MEMORY_SIZE = 2**28  # bytes, Equivalent to 256 MB
+TOTAL_PINNED_MEMORY_SIZE = int(
+    os.environ.get("CUSTOM_PINNED_MEMORY_POOL_SIZE", DEFAULT_TOTAL_PINNED_MEMORY_SIZE)
+)
+print(f"TOTAL_PINNED_MEMORY_SIZE: {TOTAL_PINNED_MEMORY_SIZE} bytes")
+
+# Pinned memory usage when server is idle (no inference)
+DEFAULT_USED_PINNED_MEMORY_SIZE = 0  # bytes
+
+
+def get_metrics():
+    total_bytes_pattern = re.compile(r"pool_total_bytes (\d+)")
+    used_bytes_pattern = re.compile(r"pool_used_bytes (\d+)")
+
+    r = requests.get("http://localhost:8002/metrics")
+    r.raise_for_status()
+
+    total_bytes_match = total_bytes_pattern.search(r.text)
+    total_bytes_value = total_bytes_match.group(1)
+
+    used_bytes_match = used_bytes_pattern.search(r.text)
+    used_bytes_value = used_bytes_match.group(1)
+
+    return total_bytes_value, used_bytes_value
+
+
+class TestPinnedMemoryMetrics(unittest.TestCase):
+    def setUp(self):
+        self.inference_completed = threading.Event()
+
+        shape = [1, 16]
+        self.model_name = "libtorch_float32_float32_float32"
+        input0_data = np.random.rand(*shape).astype(np.float32)
+        input1_data = np.random.rand(*shape).astype(np.float32)
+
+        self.inputs = [
+            httpclient.InferInput(
+                "INPUT0", input0_data.shape, "FP32"
+            ).set_data_from_numpy(input0_data),
+            httpclient.InferInput(
+                "INPUT1", input1_data.shape, "FP32"
+            ).set_data_from_numpy(input1_data),
+        ]
+
+        self.outputs = [
+            httpclient.InferRequestedOutput("OUTPUT__0"),
+            httpclient.InferRequestedOutput("OUTPUT__1"),
+        ]
+
+        # Before loading the model
+        self._assert_pinned_memory_utilization()
+
+    def _assert_pinned_memory_utilization(self):
+        total_bytes_value, used_bytes_value = get_metrics()
+        self.assertEqual(int(total_bytes_value), TOTAL_PINNED_MEMORY_SIZE)
+        self.assertEqual(int(used_bytes_value), DEFAULT_USED_PINNED_MEMORY_SIZE)
+
+    def _collect_metrics(self):
+        while not self.inference_completed.is_set():
+            total_bytes_value, used_bytes_value = get_metrics()
+            self.assertEqual(int(total_bytes_value), TOTAL_PINNED_MEMORY_SIZE)
+            # Assert pinned memory usage is within anticipated values
+            self.assertIn(int(used_bytes_value), [0, 64, 128, 192, 256])
+
+    def test_pinned_memory_metrics_asynchronous_requests(self):
+        with httpclient.InferenceServerClient(
+            url="localhost:8000", concurrency=10
+        ) as client:
+            if not client.is_model_ready(self.model_name):
+                client.load_model(self.model_name)
+
+            # Before starting the inference
+            self._assert_pinned_memory_utilization()
+
+            # Start a thread to collect metrics asynchronously
+            metrics_thread = threading.Thread(target=self._collect_metrics)
+            metrics_thread.start()
+
+            # Asynchronous inference requests
+            async_requests = []
+            for _ in range(100):
+                async_requests.append(
+                    client.async_infer(
+                        model_name=self.model_name,
+                        inputs=self.inputs,
+                        outputs=self.outputs,
+                    )
+                )
+
+            time.sleep(1)
+
+            # Wait for all inference requests to complete
+            for async_request in async_requests:
+                async_request.get_result()
+
+            # Set the event to indicate that inference is completed
+            self.inference_completed.set()
+
+            # Wait for the metrics thread to complete
+            metrics_thread.join()
+
+        # After Completing inference, used_bytes_value should comedown to 0
+        self._assert_pinned_memory_utilization()
+
+    def test_pinned_memory_metrics_synchronous_requests(self):
+        with httpclient.InferenceServerClient(url="localhost:8000") as client:
+            if not client.is_model_ready(self.model_name):
+                client.load_model(self.model_name)
+
+            # Before starting the inference
+            self._assert_pinned_memory_utilization()
+
+            # Start a thread to collect metrics asynchronously
+            metrics_thread = threading.Thread(target=self._collect_metrics)
+            metrics_thread.start()
+
+            # Synchronous inference requests
+            for _ in range(100):
+                response = client.infer(
+                    model_name=self.model_name, inputs=self.inputs, outputs=self.outputs
+                )
+                response.get_response()
+
+            time.sleep(0.1)
+
+            # Set the event to indicate that inference is completed
+            self.inference_completed.set()
+
+            # Wait for the metrics thread to complete
+            metrics_thread.join()
+
+        # After Completing inference, used_bytes_value should comedown to 0
+        self._assert_pinned_memory_utilization()
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_metrics/test.sh b/qa/L0_metrics/test.sh
new file mode 100755
index 0000000000..e19590e2ed
--- /dev/null
+++ b/qa/L0_metrics/test.sh
@@ -0,0 +1,394 @@
+#!/bin/bash
+# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+  REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+  echo -e "Repository version must be specified"
+  echo -e "\n***\n*** Test Failed\n***"
+  exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+MODELDIR=`pwd`/models
+DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_model_repository
+TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
+SERVER=${TRITON_DIR}/bin/tritonserver
+BASE_SERVER_ARGS="--model-repository=${MODELDIR}"
+SERVER_ARGS="${BASE_SERVER_ARGS}"
+SERVER_LOG="./inference_server.log"
+PYTHON_TEST="metrics_config_test.py"
+source ../common/util.sh
+
+CLIENT_LOG="client.log"
+TEST_RESULT_FILE="test_results.txt"
+function check_unit_test() {
+    if [ "${PIPESTATUS[0]}" -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    else
+        EXPECTED_NUM_TESTS="${1:-1}"
+        check_test_results ${TEST_RESULT_FILE} ${EXPECTED_NUM_TESTS}
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Result Verification Failed\n***"
+            RET=1
+        fi
+    fi
+}
+
+function run_and_check_server() {
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+      echo -e "\n***\n*** Failed to start $SERVER\n***"
+      cat $SERVER_LOG
+      exit 1
+    fi
+}
+
+rm -f $SERVER_LOG
+RET=0
+
+if [ `ps | grep -c "tritonserver"` != "0" ]; then
+    echo -e "Tritonserver already running"
+    echo -e `ps | grep tritonserver`
+    exit 1
+fi
+
+### UNIT TESTS
+
+TEST_LOG="./metrics_api_test.log"
+UNIT_TEST="./metrics_api_test --gtest_output=xml:metrics_api.report.xml"
+
+rm -fr *.log
+
+set +e
+export CUDA_VISIBLE_DEVICES=0
+LD_LIBRARY_PATH=/opt/tritonserver/lib:$LD_LIBRARY_PATH $UNIT_TEST >>$TEST_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $TEST_LOG
+    echo -e "\n***\n*** Metrics API Unit Test Failed\n***"
+    RET=1
+fi
+set -e
+
+# Prepare a libtorch float32 model with basic config
+rm -rf $MODELDIR
+model=libtorch_float32_float32_float32
+mkdir -p $MODELDIR/${model}/1 && \
+  cp -r $DATADIR/${model}/1/* $MODELDIR/${model}/1/. && \
+  cp $DATADIR/${model}/config.pbtxt $MODELDIR/${model}/. && \
+  (cd $MODELDIR/${model} && \
+  sed -i "s/label_filename:.*//" config.pbtxt && \
+  echo "instance_group [{ kind: KIND_GPU }]" >> config.pbtxt)
+
+### Pinned memory metrics tests
+set +e
+CLIENT_PY="./pinned_memory_metrics_test.py"
+SERVER_LOG="pinned_memory_metrics_test_server.log"
+SERVER_ARGS="$BASE_SERVER_ARGS --metrics-interval-ms=1 --model-control-mode=explicit --log-verbose=1"
+run_and_check_server
+python3 ${PYTHON_TEST} MetricsConfigTest.test_pinned_memory_metrics_exist -v 2>&1 | tee ${CLIENT_LOG}
+check_unit_test
+
+CLIENT_LOG="pinned_memory_metrics_test_client.log"
+python3 ${CLIENT_PY} -v 2>&1 | tee ${CLIENT_LOG}
+check_unit_test
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Custom Pinned memory pool size
+export CUSTOM_PINNED_MEMORY_POOL_SIZE=1024 # bytes
+SERVER_LOG="custom_pinned_memory_test_server.log"
+CLIENT_LOG="custom_pinned_memory_test_client.log"
+SERVER_ARGS="$BASE_SERVER_ARGS --metrics-interval-ms=1 --model-control-mode=explicit --log-verbose=1 --pinned-memory-pool-byte-size=$CUSTOM_PINNED_MEMORY_POOL_SIZE"
+run_and_check_server
+python3 ${CLIENT_PY} -v 2>&1 | tee ${CLIENT_LOG}
+check_unit_test
+
+kill $SERVER_PID
+wait $SERVER_PID
+set -e
+
+
+### GPU Metrics
+set +e
+export CUDA_VISIBLE_DEVICES=0,1,2
+SERVER_LOG="./inference_server.log"
+CLIENT_LOG="client.log"
+run_and_check_server
+
+num_gpus=`curl -s localhost:8002/metrics | grep "nv_gpu_utilization{" | wc -l`
+if [ $num_gpus -ne 3 ]; then
+  echo "Found $num_gpus GPU(s) instead of 3 GPUs being monitored."
+  echo -e "\n***\n*** GPU metric test failed. \n***"
+  RET=1
+fi
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+export CUDA_VISIBLE_DEVICES=0
+run_and_check_server
+
+num_gpus=`curl -s localhost:8002/metrics | grep "nv_gpu_utilization{" | wc -l`
+if [ $num_gpus -ne 1 ]; then
+  echo "Found $num_gpus GPU(s) instead of 1 GPU being monitored."
+  echo -e "\n***\n*** GPU metric test failed. \n***"
+  RET=1
+fi
+kill $SERVER_PID
+wait $SERVER_PID
+
+
+# Test metrics interval by querying host and checking energy
+METRICS_INTERVAL_MS=500
+# Below time interval is larger than actual metrics interval in case
+# the update is not ready for unexpected reason
+WAIT_INTERVAL_SECS=0.6
+
+SERVER_ARGS="$BASE_SERVER_ARGS --metrics-interval-ms=${METRICS_INTERVAL_MS}"
+run_and_check_server
+
+num_iterations=10
+
+# Add "warm up" iteration because in some cases the GPU metrics collection
+# doesn't start immediately
+prev_energy=`curl -s localhost:8002/metrics | awk '/nv_energy_consumption{/ {print $2}'`
+for (( i = 0; i < $num_iterations; ++i )); do
+  sleep $WAIT_INTERVAL_SECS
+  current_energy=`curl -s localhost:8002/metrics | awk '/nv_energy_consumption{/ {print $2}'`
+  if [ $current_energy != $prev_energy ]; then
+    echo -e "\n***\n*** Detected changing metrics, warmup completed.\n***"
+    break
+  fi
+  prev_energy=$current_energy
+done
+
+prev_energy=`curl -s localhost:8002/metrics | awk '/nv_energy_consumption{/ {print $2}'`
+for (( i = 0; i < $num_iterations; ++i )); do
+  sleep $WAIT_INTERVAL_SECS
+  current_energy=`curl -s localhost:8002/metrics | awk '/nv_energy_consumption{/ {print $2}'`
+  if [ $current_energy == $prev_energy ]; then
+    cat $SERVER_LOG
+    echo "Metrics were not updated in interval of ${METRICS_INTERVAL_MS} milliseconds"
+    echo -e "\n***\n*** Metric Interval test failed. \n***"
+    RET=1
+    break
+  fi
+  prev_energy=$current_energy
+done
+
+### CPU / RAM Metrics
+
+# The underlying values for these metrics do not always update frequently,
+# so give ample WAIT time to make sure they change and are being updated.
+CPU_METRICS="nv_cpu_utilization nv_cpu_memory_used_bytes"
+WAIT_INTERVAL_SECS=2.0
+for metric in ${CPU_METRICS}; do
+    echo -e "\n=== Checking Metric: ${metric} ===\n"
+    prev_value=`curl -s localhost:8002/metrics | grep ${metric} | grep -v "HELP\|TYPE" | awk '{print $2}'`
+
+    num_not_updated=0
+    num_not_updated_threshold=3
+    for (( i = 0; i < $num_iterations; ++i )); do
+      sleep $WAIT_INTERVAL_SECS
+      current_value=`curl -s localhost:8002/metrics | grep ${metric} | grep -v "HELP\|TYPE" | awk '{print $2}'`
+      if [ $current_value == $prev_value ]; then
+        num_not_updated=$((num_not_updated+1))
+      fi
+      prev_value=$current_value
+    done
+
+    # Give CPU metrics some tolerance to not update, up to a threshold
+    # DLIS-4304: An alternative may be to run some busy work on CPU in the
+    #            background rather than allowing a tolerance threshold
+    if [[ ${num_not_updated} -gt ${num_not_updated_threshold} ]]; then
+        cat $SERVER_LOG
+        echo "Metrics were not updated ${num_not_updated}/${num_iterations} times for interval of ${METRICS_INTERVAL_MS} milliseconds for metric: ${metric}"
+        echo -e "\n***\n*** Metric Interval test failed. \n***"
+        RET=1
+        break
+    fi
+done
+
+# Verify reported total memory is non-zero
+total_memory=`curl -s localhost:8002/metrics | grep "nv_cpu_memory_total_bytes" | grep -v "HELP\|TYPE" | awk '{print $2}'`
+test -z "${total_memory}" && total_memory=0
+if [ ${total_memory} -eq 0 ]; then
+  echo "Found nv_cpu_memory_total_bytes had a value of zero, this should not happen."
+  echo -e "\n***\n*** CPU total memory test failed. \n***"
+  RET=1
+fi
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+### Metric Config CLI and different Metric Types ###
+MODELDIR="${PWD}/unit_test_models"
+mkdir -p "${MODELDIR}/identity_cache_on/1"
+mkdir -p "${MODELDIR}/identity_cache_off/1"
+BASE_SERVER_ARGS="--model-repository=${MODELDIR} --model-control-mode=explicit"
+
+# Check default settings: Counters should be enabled, summaries should be disabled
+SERVER_ARGS="${BASE_SERVER_ARGS} --load-model=identity_cache_off"
+run_and_check_server
+python3 ${PYTHON_TEST} MetricsConfigTest.test_inf_counters_exist 2>&1 | tee ${CLIENT_LOG}
+check_unit_test
+python3 ${PYTHON_TEST} MetricsConfigTest.test_inf_summaries_missing 2>&1 | tee ${CLIENT_LOG}
+check_unit_test
+python3 ${PYTHON_TEST} MetricsConfigTest.test_cache_counters_missing 2>&1 | tee ${CLIENT_LOG}
+check_unit_test
+python3 ${PYTHON_TEST} MetricsConfigTest.test_cache_summaries_missing 2>&1 | tee ${CLIENT_LOG}
+check_unit_test
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Enable summaries, counters still enabled by default
+SERVER_ARGS="${BASE_SERVER_ARGS} --load-model=identity_cache_off --metrics-config summary_latencies=true"
+run_and_check_server
+python3 ${PYTHON_TEST} MetricsConfigTest.test_inf_counters_exist 2>&1 | tee ${CLIENT_LOG}
+check_unit_test
+python3 ${PYTHON_TEST} MetricsConfigTest.test_inf_summaries_exist 2>&1 | tee ${CLIENT_LOG}
+check_unit_test
+python3 ${PYTHON_TEST} MetricsConfigTest.test_cache_counters_missing 2>&1 | tee ${CLIENT_LOG}
+check_unit_test
+python3 ${PYTHON_TEST} MetricsConfigTest.test_cache_summaries_missing 2>&1 | tee ${CLIENT_LOG}
+check_unit_test
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Enable summaries, disable counters
+SERVER_ARGS="${BASE_SERVER_ARGS} --load-model=identity_cache_off --metrics-config summary_latencies=true --metrics-config counter_latencies=false"
+run_and_check_server
+python3 ${PYTHON_TEST} MetricsConfigTest.test_inf_counters_missing 2>&1 | tee ${CLIENT_LOG}
+check_unit_test
+python3 ${PYTHON_TEST} MetricsConfigTest.test_inf_summaries_exist 2>&1 | tee ${CLIENT_LOG}
+check_unit_test
+python3 ${PYTHON_TEST} MetricsConfigTest.test_cache_counters_missing 2>&1 | tee ${CLIENT_LOG}
+check_unit_test
+python3 ${PYTHON_TEST} MetricsConfigTest.test_cache_summaries_missing 2>&1 | tee ${CLIENT_LOG}
+check_unit_test
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Enable summaries and counters, check cache metrics
+CACHE_ARGS="--cache-config local,size=1048576"
+SERVER_ARGS="${BASE_SERVER_ARGS} ${CACHE_ARGS} --load-model=identity_cache_on --metrics-config summary_latencies=true --metrics-config counter_latencies=true"
+run_and_check_server
+python3 ${PYTHON_TEST} MetricsConfigTest.test_inf_counters_exist 2>&1 | tee ${CLIENT_LOG}
+check_unit_test
+# DLIS-4762: Asserts that request summary is not published when cache is
+# enabled for a model, until this if fixed.
+python3 ${PYTHON_TEST} MetricsConfigTest.test_inf_summaries_exist_with_cache 2>&1 | tee ${CLIENT_LOG}
+check_unit_test
+python3 ${PYTHON_TEST} MetricsConfigTest.test_cache_counters_exist 2>&1 | tee ${CLIENT_LOG}
+check_unit_test
+python3 ${PYTHON_TEST} MetricsConfigTest.test_cache_summaries_exist 2>&1 | tee ${CLIENT_LOG}
+check_unit_test
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Check setting custom summary quantiles
+export SUMMARY_QUANTILES="0.1:0.0.1,0.7:0.01,0.75:0.01"
+SERVER_ARGS="${BASE_SERVER_ARGS} --load-model=identity_cache_off --metrics-config summary_latencies=true --metrics-config summary_quantiles=${SUMMARY_QUANTILES}"
+run_and_check_server
+python3 ${PYTHON_TEST} MetricsConfigTest.test_summaries_custom_quantiles 2>&1 | tee ${CLIENT_LOG}
+check_unit_test
+kill $SERVER_PID
+wait $SERVER_PID
+
+### Pending Request Count (Queue Size) Metric Behavioral Tests ###
+MODELDIR="${PWD}/queue_size_models"
+SERVER_ARGS="--model-repository=${MODELDIR} --log-verbose=1"
+PYTHON_TEST="metrics_queue_size_test.py"
+rm -rf "${MODELDIR}"
+mkdir -p "${MODELDIR}"
+
+# Re-use an identity model that sleeps during execution for N seconds for the
+# batch of requests. Then we can confirm queue size behaviors for various
+# scheduling/batching strategies.
+BASE_MODEL="identity_delay"
+# Don't use special debug env var for this, just set sufficient parameters for
+# each scheduler to let them fill batches when possible.
+unset TRITONSERVER_DELAY_SCHEDULER
+export MAX_BATCH_SIZE=4
+# Delay up to 100ms to form batches up to MAX_BATCH_SIZE
+export MAX_QUEUE_DELAY_US=100000
+
+# Create a model per scheduler type
+DEFAULT_MODEL="${MODELDIR}/default"
+cp -r "${BASE_MODEL}" "${DEFAULT_MODEL}"
+mkdir -p "${DEFAULT_MODEL}/1"
+sed -i "s/^max_batch_size.*/max_batch_size: ${MAX_BATCH_SIZE}/" "${DEFAULT_MODEL}/config.pbtxt"
+
+DYNAMIC_MODEL="${MODELDIR}/dynamic"
+cp -r "${DEFAULT_MODEL}" "${DYNAMIC_MODEL}"
+echo -e "\ndynamic_batching { max_queue_delay_microseconds: ${MAX_QUEUE_DELAY_US} }\n" >> "${DYNAMIC_MODEL}/config.pbtxt"
+
+MAX_QUEUE_SIZE_MODEL="${MODELDIR}/max_queue_size"
+cp -r "${DEFAULT_MODEL}" "${MAX_QUEUE_SIZE_MODEL}"
+echo -e "\ndynamic_batching { max_queue_delay_microseconds: ${MAX_QUEUE_DELAY_US} default_queue_policy { max_queue_size: 4 } }\n" >> "${MAX_QUEUE_SIZE_MODEL}/config.pbtxt"
+
+SEQUENCE_DIRECT_MODEL="${MODELDIR}/sequence_direct"
+cp -r "${DEFAULT_MODEL}" "${SEQUENCE_DIRECT_MODEL}"
+echo -e "\nsequence_batching { direct { max_queue_delay_microseconds: ${MAX_QUEUE_DELAY_US}, minimum_slot_utilization: 1.0 } }\n" >> "${SEQUENCE_DIRECT_MODEL}/config.pbtxt"
+
+SEQUENCE_OLDEST_MODEL="${MODELDIR}/sequence_oldest"
+cp -r "${DEFAULT_MODEL}" "${SEQUENCE_OLDEST_MODEL}"
+echo -e "\nsequence_batching { oldest { max_queue_delay_microseconds: ${MAX_QUEUE_DELAY_US}, max_candidate_sequences: ${MAX_BATCH_SIZE} } }\n" >> "${SEQUENCE_OLDEST_MODEL}/config.pbtxt"
+
+BASE_ENSEMBLE="ensemble_delay"
+ENSEMBLE_MODEL="${MODELDIR}/ensemble"
+cp -r "${BASE_ENSEMBLE}" "${ENSEMBLE_MODEL}"
+mkdir -p "${ENSEMBLE_MODEL}/1"
+# Use uniquely named composing models to avoid clashing
+# metric values with individual and ensemble tests.
+cp -r "${DEFAULT_MODEL}" "${MODELDIR}/default_composing"
+cp -r "${DYNAMIC_MODEL}" "${MODELDIR}/dynamic_composing"
+
+
+run_and_check_server
+python3 ${PYTHON_TEST} 2>&1 | tee ${CLIENT_LOG}
+kill $SERVER_PID
+wait $SERVER_PID
+expected_tests=6
+check_unit_test "${expected_tests}"
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+else
+  echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_metrics/unit_test_models/identity_cache_off/config.pbtxt b/qa/L0_metrics/unit_test_models/identity_cache_off/config.pbtxt
new file mode 100644
index 0000000000..863c35df07
--- /dev/null
+++ b/qa/L0_metrics/unit_test_models/identity_cache_off/config.pbtxt
@@ -0,0 +1,46 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+backend: "identity"
+max_batch_size: 0
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+response_cache {
+  enable: false
+}
diff --git a/qa/L0_metrics/unit_test_models/identity_cache_on/config.pbtxt b/qa/L0_metrics/unit_test_models/identity_cache_on/config.pbtxt
new file mode 100644
index 0000000000..4bf5a7ef3b
--- /dev/null
+++ b/qa/L0_metrics/unit_test_models/identity_cache_on/config.pbtxt
@@ -0,0 +1,46 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+backend: "identity"
+max_batch_size: 0
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+response_cache {
+  enable: true
+}
diff --git a/qa/L0_mlflow/plugin_test.py b/qa/L0_mlflow/plugin_test.py
new file mode 100755
index 0000000000..a5d87a3c19
--- /dev/null
+++ b/qa/L0_mlflow/plugin_test.py
@@ -0,0 +1,121 @@
+#!/usr/bin/python
+
+# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import json
+import unittest
+
+import numpy as np
+import test_util as tu
+from mlflow.deployments import get_deploy_client
+
+
+class PluginTest(tu.TestResultCollector):
+    def setUp(self):
+        self.client_ = get_deploy_client("triton")
+
+    def _validate_deployment(self, model_name):
+        # create
+        self.client_.create_deployment(
+            model_name, "models:/{}/1".format(model_name), flavor="onnx"
+        )
+
+        # list
+        deployment_list = self.client_.list_deployments()
+        self.assertEqual(len(deployment_list), 1)
+        self.assertEqual(deployment_list[0]["name"], model_name)
+
+        # get
+        deployment = self.client_.get_deployment(model_name)
+        self.assertEqual(deployment["name"], model_name)
+
+        # predict
+        inputs = {}
+        with open("./mlflow-triton-plugin/examples/input.json", "r") as f:
+            input_json = json.load(f)
+            for key, value in input_json["inputs"].items():
+                inputs[key] = np.array(value, dtype=np.float32)
+
+        output = self.client_.predict(model_name, inputs)
+        with open("./mlflow-triton-plugin/examples/expected_output.json", "r") as f:
+            output_json = json.load(f)
+            for key, value in output_json["outputs"].items():
+                np.testing.assert_allclose(
+                    output["outputs"][key],
+                    np.array(value, dtype=np.int32),
+                    err_msg="Inference result is not correct",
+                )
+
+        # delete
+        self.client_.delete_deployment(model_name)
+
+    def test_onnx_flavor(self):
+        # Log the ONNX model to MLFlow
+        import mlflow.onnx
+        import onnx
+
+        model = onnx.load(
+            "./mlflow-triton-plugin/examples/onnx_float32_int32_int32/1/model.onnx"
+        )
+        # Use a different name to ensure the plugin operates on correct model
+        mlflow.onnx.log_model(model, "triton", registered_model_name="onnx_model")
+
+        self._validate_deployment("onnx_model")
+
+    def test_onnx_flavor_with_files(self):
+        # Log the ONNX model and additional Triton config file to MLFlow
+        import mlflow.onnx
+        import onnx
+
+        model = onnx.load(
+            "./mlflow-triton-plugin/examples/onnx_float32_int32_int32/1/model.onnx"
+        )
+        config_path = (
+            "./mlflow-triton-plugin/examples/onnx_float32_int32_int32/config.pbtxt"
+        )
+        # Use a different name to ensure the plugin operates on correct model
+        mlflow.onnx.log_model(
+            model, "triton", registered_model_name="onnx_model_with_files"
+        )
+        mlflow.log_artifact(config_path, "triton")
+
+        self._validate_deployment("onnx_model_with_files")
+
+        # Check if the additional files are properly copied
+        import filecmp
+
+        self.assertTrue(
+            filecmp.cmp(config_path, "./models/onnx_model_with_files/config.pbtxt")
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_mlflow/test.sh b/qa/L0_mlflow/test.sh
new file mode 100755
index 0000000000..4b5205ba25
--- /dev/null
+++ b/qa/L0_mlflow/test.sh
@@ -0,0 +1,276 @@
+#!/bin/bash
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+export CUDA_VISIBLE_DEVICES=0
+
+source ../common/util.sh
+
+rm -fr *.log *.json
+
+# The default version of python 3.10.6 included in
+# Ubuntu 22.04 installs blinker 1.4. This doesn't
+# work with the awscli which we try to install.
+# Uninstalling blinker and allowing pip to install blinker 1.6
+# fixes this issue. The alternative to this is to
+# install a higher version of python which uses blinker 1.6,
+# but it is unknown whether this test should rely on
+# the default installation of python.
+apt remove -y python3-blinker
+
+RET=0
+
+# Set up MLflow and dependencies used by the test
+pip install mlflow onnx onnxruntime boto3
+
+# Install AWS CLI
+if ! command -v aws --version &> /dev/null; then
+ curl "https://awscli.amazonaws.com/awscli-exe-linux-$(uname -m).zip" -o "awscliv2.zip"
+ unzip awscliv2.zip
+ ./aws/install
+ rm -r ./aws/ ./awscliv2.zip
+fi
+
+# Set environment variables for MLFlow and Triton plugin
+export MLFLOW_MODEL_REPO=./mlflow/artifacts
+export MLFLOW_TRACKING_URI=sqlite:////tmp/mlflow-db.sqlite
+export TRITON_URL=localhost:8000
+export TRITON_MODEL_REPO=models
+mkdir -p ./mlflow/artifacts
+
+pip install ./mlflow-triton-plugin/
+
+# Clear mlflow registered models if any
+python - << EOF
+from mlflow.tracking import MlflowClient
+c = MlflowClient()
+for m in c.search_registered_models():
+    c.delete_registered_model(m.name)
+EOF
+
+rm -rf ./models
+mkdir -p ./models
+# Put some models in model repository to make sure MLFlow plugin would ignore
+# model that is not registered via MLFlow
+cp -r ./mlflow-triton-plugin/examples/onnx_float32_int32_int32 ./models/existing_model
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=./models --strict-model-config=false --model-control-mode=explicit --load-model=*"
+SERVER_LOG="./inference_server.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** fail to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+# Triton flavor with CLI
+set +e
+CLI_LOG=plugin_cli.log
+CLI_RET=0
+python ./mlflow-triton-plugin/scripts/publish_model_to_mlflow.py \
+    --model_name onnx_float32_int32_int32 \
+    --model_directory ./mlflow-triton-plugin/examples/onnx_float32_int32_int32/ \
+    --flavor triton >>$CLI_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Expect 'triton' flavor model is logged to MLFlow\n***"
+    CLI_RET=1
+fi
+if [ $CLI_RET -eq 0 ]; then
+    mlflow deployments create -t triton --flavor triton \
+        --name onnx_float32_int32_int32 -m models:/onnx_float32_int32_int32/1 >>$CLI_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Expect 'triton' flavor model is deployed via MLFlow\n***"
+        CLI_RET=1
+    fi
+fi
+if [ $CLI_RET -eq 0 ]; then
+    mlflow deployments list -t triton >>$CLI_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        CLI_RET=1
+    fi
+    if [ `grep -c "onnx_float32_int32_int32.*READY" $CLI_LOG` != "1" ]; then
+        echo -e "\n***\n*** Expect deployed 'triton' flavor model to be listed\n***"
+        CLI_RET=1
+    fi
+    if [ `grep -c "existing_model.*READY" $CLI_LOG` != "0" ]; then
+        echo -e "\n***\n*** Unexpected non-MLflow model listed\n***"
+        CLI_RET=1
+    fi
+fi
+if [ $CLI_RET -eq 0 ]; then
+    mlflow deployments get -t triton --name onnx_float32_int32_int32 >>$CLI_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        CLI_RET=1
+    fi
+    if [ `grep -c "^name: onnx_float32_int32_int32" $CLI_LOG` != "1" ]; then
+        echo -e "\n***\n*** Expect deployed 'triton' flavor model is found\n***"
+        CLI_RET=1
+    fi
+fi
+if [ $CLI_RET -eq 0 ]; then
+    mlflow deployments predict -t triton --name onnx_float32_int32_int32 --input-path ./mlflow-triton-plugin/examples/input.json --output-path output.json >>$CLI_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Expect successful 'triton' flavor model prediction\n***"
+        CLI_RET=1
+    fi
+    python - << EOF
+import json
+with open("./output.json", "r") as f:
+    output = json.load(f)
+with open("./mlflow-triton-plugin/examples/expected_output.json", "r") as f:
+    expected_output = json.load(f)
+if output == expected_output:
+    exit(0)
+else:
+    exit(1)
+EOF
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Expect 'triton' flavor model prediction matches expected output\n***"
+        echo -e "Expect:\n"
+        cat ./mlflow-triton-plugin/examples/expected_output.json
+        echo -e "\n\nGot:\n"
+        cat output.json
+        CLI_RET=1
+    fi
+fi
+if [ $CLI_RET -eq 0 ]; then
+    mlflow deployments delete -t triton --name onnx_float32_int32_int32 >>$CLI_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Expect successful deletion of 'triton' flavor model\n***"
+        CLI_RET=1
+    fi
+fi
+if [ $CLI_RET -ne 0 ]; then
+  cat $CLI_LOG
+  echo -e "\n***\n*** MLFlow Triton plugin CLI Test FAILED\n***"
+  RET=1
+fi
+set -e
+
+# ONNX flavor with Python package
+set +e
+PY_LOG=plugin_py.log
+PY_TEST=plugin_test.py
+TEST_RESULT_FILE='test_results.txt'
+python $PY_TEST >>$PY_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $SERVER_LOG
+    cat $PY_LOG
+    echo -e "\n***\n*** Python Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 2
+    if [ $? -ne 0 ]; then
+        cat $PY_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+kill_server
+
+
+#
+# Test S3, the setup is duplicated from L0_storage_S3, except the bucket is
+# created empty
+#
+
+# Clear mlflow registered models if any
+python - << EOF
+from mlflow.tracking import MlflowClient
+c = MlflowClient()
+for m in c.search_registered_models():
+    c.delete_registered_model(m.name)
+EOF
+
+# S3 credentials are necessary for this test. Pass via ENV variables
+aws configure set default.region $AWS_DEFAULT_REGION && \
+    aws configure set aws_access_key_id $AWS_ACCESS_KEY_ID && \
+    aws configure set aws_secret_access_key $AWS_SECRET_ACCESS_KEY
+
+# S3 bucket path (Point to bucket when testing cloud storage)
+BUCKET_URL="s3://triton-bucket-${CI_JOB_ID}"
+
+# Cleanup and delete S3 test bucket if it already exists (due to test failure)
+aws s3 rm $BUCKET_URL --recursive --include "*" && \
+    aws s3 rb $BUCKET_URL || true
+
+# Make S3 test bucket
+aws s3 mb "${BUCKET_URL}"
+
+# Remove Slash in BUCKET_URL
+BUCKET_URL=${BUCKET_URL%/}
+BUCKET_URL_SLASH="${BUCKET_URL}/"
+
+export TRITON_MODEL_REPO=${BUCKET_URL}
+SERVER_ARGS="--model-repository=${TRITON_MODEL_REPO} --model-control-mode=explicit"
+SERVER_LOG="./inference_server.s3.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    # Clean up bucket contents and delete bucket before exiting test
+    aws s3 rm "${BUCKET_URL_SLASH}" --recursive --include "*"
+    aws s3 rb "${BUCKET_URL}"
+    exit 1
+fi
+
+# ONNX flavor with Python package
+set +e
+PY_LOG=plugin_py.s3.log
+PY_TEST=plugin_test.py
+TEST_RESULT_FILE='test_results.txt'
+python $PY_TEST >>$PY_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $SERVER_LOG
+    cat $PY_LOG
+    echo -e "\n***\n*** Python Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 2
+    if [ $? -ne 0 ]; then
+        cat $PY_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+kill_server
+
+# Clean up bucket contents and delete bucket
+aws s3 rm "${BUCKET_URL_SLASH}" --recursive --include "*"
+aws s3 rb "${BUCKET_URL}"
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+else
+  echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/src/test/testdata/autofill_sanity/no_version/config.pbtxt b/qa/L0_model_config/autofill_noplatform/common/no_version/config.pbtxt
similarity index 100%
rename from src/test/testdata/autofill_sanity/no_version/config.pbtxt
rename to qa/L0_model_config/autofill_noplatform/common/no_version/config.pbtxt
diff --git a/qa/L0_model_config/autofill_noplatform/common/no_version/expected b/qa/L0_model_config/autofill_noplatform/common/no_version/expected
new file mode 100644
index 0000000000..94e9de9123
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/common/no_version/expected
@@ -0,0 +1 @@
+Invalid model name: Could not determine backend for model 'no_version' with no backend in model configuration. Expected model name of the form 'model.<backend_name>'.
diff --git a/qa/L0_model_config/autofill_noplatform/custom/no_delimiter/config.pbtxt b/qa/L0_model_config/autofill_noplatform/custom/no_delimiter/config.pbtxt
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/qa/L0_model_config/autofill_noplatform/custom/no_delimiter/expected b/qa/L0_model_config/autofill_noplatform/custom/no_delimiter/expected
new file mode 100644
index 0000000000..57b8cbdc02
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/custom/no_delimiter/expected
@@ -0,0 +1 @@
+Invalid model name: Could not determine backend for model 'no_delimiter' with no backend in model configuration. Expected model name of the form 'model.<backend_name>'.
diff --git a/qa/L0_model_config/autofill_noplatform/custom/unknown_backend.unknown/config.pbtxt b/qa/L0_model_config/autofill_noplatform/custom/unknown_backend.unknown/config.pbtxt
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/qa/L0_model_config/autofill_noplatform/custom/unknown_backend.unknown/expected b/qa/L0_model_config/autofill_noplatform/custom/unknown_backend.unknown/expected
new file mode 100644
index 0000000000..010d38a442
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/custom/unknown_backend.unknown/expected
@@ -0,0 +1 @@
+Invalid argument: unable to find backend library for backend 'unknown', try specifying runtime on the model configuration.
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/circular_dependency/circular_dependency/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/circular_dependency/circular_dependency/config.pbtxt
new file mode 100644
index 0000000000..75403bc0d8
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/circular_dependency/circular_dependency/config.pbtxt
@@ -0,0 +1,33 @@
+name: "circular_dependency"
+max_batch_size: 2
+platform: "ensemble"
+ensemble_scheduling {
+  step [
+    {
+      model_name: "circular_dependency_2"
+      model_version: -1
+      input_map {
+        key: "input"
+        value: "data"
+      }
+      output_map {
+        key: "output"
+        value: "prob"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/circular_dependency/circular_dependency_2/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/circular_dependency/circular_dependency_2/config.pbtxt
new file mode 100644
index 0000000000..906899dbf5
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/circular_dependency/circular_dependency_2/config.pbtxt
@@ -0,0 +1,33 @@
+name: "circular_dependency_2"
+max_batch_size: 2
+platform: "ensemble"
+ensemble_scheduling {
+  step [
+    {
+      model_name: "circular_dependency"
+      model_version: -1
+      input_map {
+        key: "data"
+        value: "input"
+      }
+      output_map {
+        key: "prob"
+        value: "output"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "input"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "output"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/circular_dependency/expected b/qa/L0_model_config/autofill_noplatform/ensemble/circular_dependency/expected
new file mode 100644
index 0000000000..8aac3f2b80
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/circular_dependency/expected
@@ -0,0 +1 @@
+circular dependency between ensembles: circular_dependency -> ... -> circular_dependency_2 -> circular_dependency
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/circular_dependency/expected_2 b/qa/L0_model_config/autofill_noplatform/ensemble/circular_dependency/expected_2
new file mode 100644
index 0000000000..d4e9a04222
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/circular_dependency/expected_2
@@ -0,0 +1 @@
+circular dependency between ensembles: circular_dependency_2 -> ... -> circular_dependency -> circular_dependency_2
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/ensemble_scheduling_no_set/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/ensemble_scheduling_no_set/config.pbtxt
new file mode 100644
index 0000000000..6c5a2dcf0d
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/ensemble_scheduling_no_set/config.pbtxt
@@ -0,0 +1,18 @@
+name: "ensemble_scheduling_not_set"
+max_batch_size: 8
+platform: "ensemble"
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    format: FORMAT_NCHW
+    dims: [ 1, 28, 28 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 10, 1, 1 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/ensemble_scheduling_no_set/expected b/qa/L0_model_config/autofill_noplatform/ensemble/ensemble_scheduling_no_set/expected
new file mode 100644
index 0000000000..aa199bba4a
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/ensemble_scheduling_no_set/expected
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble ensemble_scheduling_not_set whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/has_backend/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/has_backend/config.pbtxt
new file mode 100644
index 0000000000..e1b11917ba
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/has_backend/config.pbtxt
@@ -0,0 +1,19 @@
+name: "has_backend"
+max_batch_size: 8
+backend: "onnxruntime"
+platform: "ensemble"
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    format: FORMAT_NCHW
+    dims: [ 1, 28, 28 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 10, 1, 1 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/has_backend/expected b/qa/L0_model_config/autofill_noplatform/ensemble/has_backend/expected
new file mode 100644
index 0000000000..2e1e35ceec
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/has_backend/expected
@@ -0,0 +1 @@
+Ensemble model 'has_backend' must have platform type 'ensemble' and empty backend type
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/inconsistent_data_type/expected b/qa/L0_model_config/autofill_noplatform/ensemble/inconsistent_data_type/expected
new file mode 100644
index 0000000000..e8fe6d69cc
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/inconsistent_data_type/expected
@@ -0,0 +1 @@
+in ensemble inconsistent_data_type, ensemble tensor data: inconsistent data type: TYPE_FP32 is inferred from model inconsistent_data_type while TYPE_INT32 is inferred from model int32_dim1_batch4
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/inconsistent_data_type/fp32_dim1_batch2/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/inconsistent_data_type/fp32_dim1_batch2/config.pbtxt
new file mode 100644
index 0000000000..a69fcded8b
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/inconsistent_data_type/fp32_dim1_batch2/config.pbtxt
@@ -0,0 +1,22 @@
+name: "fp32_dim1_batch2"
+max_batch_size: 2
+backend: "identity"
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+instance_group [
+  {
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/inconsistent_data_type/inconsistent_data_type/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/inconsistent_data_type/inconsistent_data_type/config.pbtxt
new file mode 100644
index 0000000000..246bd03fb0
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/inconsistent_data_type/inconsistent_data_type/config.pbtxt
@@ -0,0 +1,45 @@
+name: "inconsistent_data_type"
+max_batch_size: 2
+platform: "ensemble"
+ensemble_scheduling {
+  step [
+    {
+      model_name: "int32_dim1_batch4"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "data"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "temp_tensor"
+      }
+    },
+    {
+      model_name: "fp32_dim1_batch2"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "temp_tensor"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "prob"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/inconsistent_data_type/int32_dim1_batch4/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/inconsistent_data_type/int32_dim1_batch4/config.pbtxt
new file mode 100644
index 0000000000..6f07ddbc04
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/inconsistent_data_type/int32_dim1_batch4/config.pbtxt
@@ -0,0 +1,22 @@
+name: "int32_dim1_batch4"
+max_batch_size: 4
+backend: "identity"
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
+instance_group [
+  {
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/inconsistent_shape/expected b/qa/L0_model_config/autofill_noplatform/ensemble/inconsistent_shape/expected
new file mode 100644
index 0000000000..88887f658a
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/inconsistent_shape/expected
@@ -0,0 +1 @@
+in ensemble inconsistent_shape, ensemble tensor temp_tensor: inconsistent shape: \[-1,16\] is inferred from model fp32_dim1_batch4 while \[-1,16,16,16\] is inferred from model fp32_dim3_batch4
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/inconsistent_shape/expected_2 b/qa/L0_model_config/autofill_noplatform/ensemble/inconsistent_shape/expected_2
new file mode 100644
index 0000000000..1fb5954303
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/inconsistent_shape/expected_2
@@ -0,0 +1 @@
+in ensemble inconsistent_shape, ensemble tensor temp_tensor: inconsistent shape: \[-1,16,16,16\] is inferred from model fp32_dim3_batch4 while \[-1,16\] is inferred from model fp32_dim1_batch4
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/inconsistent_shape/fp32_dim1_batch4/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/inconsistent_shape/fp32_dim1_batch4/config.pbtxt
new file mode 100644
index 0000000000..f5a689d604
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/inconsistent_shape/fp32_dim1_batch4/config.pbtxt
@@ -0,0 +1,22 @@
+name: "fp32_dim1_batch4"
+max_batch_size: 4
+backend: "identity"
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+instance_group [
+  {
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/inconsistent_shape/fp32_dim3_batch4/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/inconsistent_shape/fp32_dim3_batch4/config.pbtxt
new file mode 100644
index 0000000000..8832b63b20
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/inconsistent_shape/fp32_dim3_batch4/config.pbtxt
@@ -0,0 +1,22 @@
+name: "fp32_dim3_batch4"
+max_batch_size: 4
+backend: "identity"
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16, 16, 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16, 16, 16 ]
+  }
+]
+instance_group [
+  {
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/inconsistent_shape/inconsistent_shape/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/inconsistent_shape/inconsistent_shape/config.pbtxt
new file mode 100644
index 0000000000..a436668f1f
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/inconsistent_shape/inconsistent_shape/config.pbtxt
@@ -0,0 +1,45 @@
+name: "inconsistent_shape"
+max_batch_size: 2
+platform: "ensemble"
+ensemble_scheduling {
+  step [
+    {
+      model_name: "fp32_dim1_batch4"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "data"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "temp_tensor"
+      }
+    },
+    {
+      model_name: "fp32_dim3_batch4"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "temp_tensor"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "prob"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/instance_group_set/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/instance_group_set/config.pbtxt
new file mode 100644
index 0000000000..0dfb5058d3
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/instance_group_set/config.pbtxt
@@ -0,0 +1,40 @@
+name: "instance_group_set"
+max_batch_size: 8
+platform: "ensemble"
+ensemble_scheduling {
+  step [
+    {
+      model_name: "model_a"
+      model_version: -1
+      input_map {
+        key: "model_a_input"
+        value: "data"
+      }
+      output_map {
+        key: "model_a_output"
+        value: "prob"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    format: FORMAT_NCHW
+    dims: [ 1, 28, 28 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 10, 1, 1 ]
+  }
+]
+instance_group [
+  {
+    kind: KIND_GPU
+    gpus: [ 42 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/instance_group_set/expected b/qa/L0_model_config/autofill_noplatform/ensemble/instance_group_set/expected
new file mode 100644
index 0000000000..1baf78222c
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/instance_group_set/expected
@@ -0,0 +1 @@
+instance group should not be specified for ensemble 'instance_group_set'
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/invalid_batch_size/expected b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_batch_size/expected
new file mode 100644
index 0000000000..9891d071a9
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_batch_size/expected
@@ -0,0 +1 @@
+ensemble invalid_batch_size allows maximum batch size 3, but it contains model fp32_dim1_batch2 which only allows maximum batch size to be 2
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/invalid_batch_size/fp32_dim1_batch2/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_batch_size/fp32_dim1_batch2/config.pbtxt
new file mode 100644
index 0000000000..a69fcded8b
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_batch_size/fp32_dim1_batch2/config.pbtxt
@@ -0,0 +1,22 @@
+name: "fp32_dim1_batch2"
+max_batch_size: 2
+backend: "identity"
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+instance_group [
+  {
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/invalid_batch_size/fp32_dim1_batch4/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_batch_size/fp32_dim1_batch4/config.pbtxt
new file mode 100644
index 0000000000..f5a689d604
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_batch_size/fp32_dim1_batch4/config.pbtxt
@@ -0,0 +1,22 @@
+name: "fp32_dim1_batch4"
+max_batch_size: 4
+backend: "identity"
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+instance_group [
+  {
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/invalid_batch_size/invalid_batch_size/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_batch_size/invalid_batch_size/config.pbtxt
new file mode 100644
index 0000000000..89e66b0b72
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_batch_size/invalid_batch_size/config.pbtxt
@@ -0,0 +1,45 @@
+name: "invalid_batch_size"
+max_batch_size: 3
+platform: "ensemble"
+ensemble_scheduling {
+  step [
+    {
+      model_name: "fp32_dim1_batch4"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "data"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "temp_tensor"
+      }
+    },
+    {
+      model_name: "fp32_dim1_batch2"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "temp_tensor"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "prob"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/invalid_decoupled_branching/expected b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_decoupled_branching/expected
new file mode 100644
index 0000000000..69e010b9c7
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_decoupled_branching/expected
@@ -0,0 +1 @@
+in ensemble invalid_decoupled_branching, step of model 'int32_dim1_nobatch_output2' receives inputs originated from different decoupled models
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/invalid_decoupled_branching/int32_dim1_nobatch_output2/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_decoupled_branching/int32_dim1_nobatch_output2/config.pbtxt
new file mode 100644
index 0000000000..2becd8e08d
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_decoupled_branching/int32_dim1_nobatch_output2/config.pbtxt
@@ -0,0 +1,32 @@
+name: "int32_dim1_nobatch_output2"
+max_batch_size: 0
+backend: "identity"
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+instance_group [
+  {
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/invalid_decoupled_branching/invalid_decoupled_branching/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_decoupled_branching/invalid_decoupled_branching/config.pbtxt
new file mode 100644
index 0000000000..b02938cd30
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_decoupled_branching/invalid_decoupled_branching/config.pbtxt
@@ -0,0 +1,133 @@
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "invalid_decoupled_branching"
+max_batch_size: 0
+platform: "ensemble"
+ensemble_scheduling {
+  step [
+    {
+      model_name: "repeat_int32"
+      model_version: -1
+      input_map {
+        key: "IN"
+        value: "IN"
+      }
+      input_map {
+        key: "DELAY"
+        value: "DELAY"
+      }
+      input_map {
+        key: "WAIT"
+        value: "WAIT"
+      }
+      output_map {
+        key: "OUT"
+        value: "repeat_1_out"
+      }
+    },
+    {
+      model_name: "repeat_int32"
+      model_version: -1
+      input_map {
+        key: "IN"
+        value: "IN"
+      }
+      input_map {
+        key: "DELAY"
+        value: "DELAY"
+      }
+      input_map {
+        key: "WAIT"
+        value: "WAIT"
+      }
+      output_map {
+        key: "OUT"
+        value: "repeat_2_out"
+      }
+    },
+    {
+      model_name: "int32_dim1_nobatch_output2"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "repeat_1_out"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "repeat_2_out"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "identity_0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "identity_1"
+      }
+    },
+    {
+      model_name: "int32_dim1_nobatch_output2"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "identity_0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "identity_1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "OUT"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "IN"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  },
+  {
+    name: "DELAY"
+    data_type: TYPE_UINT32
+    dims: [ -1 ]
+  },
+  {
+    name: "WAIT"
+    data_type: TYPE_UINT32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/invalid_decoupled_branching/repeat_int32/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_decoupled_branching/repeat_int32/config.pbtxt
new file mode 100644
index 0000000000..ea8955412b
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_decoupled_branching/repeat_int32/config.pbtxt
@@ -0,0 +1,61 @@
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "repeat_int32"
+backend: "repeat"
+max_batch_size: 0
+model_transaction_policy {
+  decoupled: True
+}
+input [
+  {
+    name: "IN"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  },
+  {
+    name: "DELAY"
+    data_type: TYPE_UINT32
+    dims: [ -1 ]
+  },
+  {
+    name: "WAIT"
+    data_type: TYPE_UINT32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  },
+  {
+    name: "IDX"
+    data_type: TYPE_UINT32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/invalid_decoupled_branching_2/expected b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_decoupled_branching_2/expected
new file mode 100644
index 0000000000..c84c6abe1a
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_decoupled_branching_2/expected
@@ -0,0 +1 @@
+in ensemble invalid_decoupled_branching_2, step of model 'invalid_decoupled_branching_2' receives inputs originated from different decoupled models
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/invalid_decoupled_branching_2/invalid_decoupled_branching_2/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_decoupled_branching_2/invalid_decoupled_branching_2/config.pbtxt
new file mode 100644
index 0000000000..0f448f3a31
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_decoupled_branching_2/invalid_decoupled_branching_2/config.pbtxt
@@ -0,0 +1,102 @@
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "invalid_decoupled_branching_2"
+max_batch_size: 0
+platform: "ensemble"
+ensemble_scheduling {
+  step [
+    {
+      model_name: "repeat_int32"
+      model_version: -1
+      input_map {
+        key: "IN"
+        value: "IN"
+      }
+      input_map {
+        key: "DELAY"
+        value: "DELAY"
+      }
+      input_map {
+        key: "WAIT"
+        value: "WAIT"
+      }
+      output_map {
+        key: "OUT"
+        value: "OUT"
+      }
+    },
+    {
+      model_name: "repeat_int32"
+      model_version: -1
+      input_map {
+        key: "IN"
+        value: "IN"
+      }
+      input_map {
+        key: "DELAY"
+        value: "DELAY"
+      }
+      input_map {
+        key: "WAIT"
+        value: "WAIT"
+      }
+      output_map {
+        key: "IDX"
+        value: "IDX"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "IN"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  },
+  {
+    name: "DELAY"
+    data_type: TYPE_UINT32
+    dims: [ -1 ]
+  },
+  {
+    name: "WAIT"
+    data_type: TYPE_UINT32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  },
+  {
+    name: "IDX"
+    data_type: TYPE_UINT32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/invalid_decoupled_branching_2/repeat_int32/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_decoupled_branching_2/repeat_int32/config.pbtxt
new file mode 100644
index 0000000000..ea8955412b
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_decoupled_branching_2/repeat_int32/config.pbtxt
@@ -0,0 +1,61 @@
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "repeat_int32"
+backend: "repeat"
+max_batch_size: 0
+model_transaction_policy {
+  decoupled: True
+}
+input [
+  {
+    name: "IN"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  },
+  {
+    name: "DELAY"
+    data_type: TYPE_UINT32
+    dims: [ -1 ]
+  },
+  {
+    name: "WAIT"
+    data_type: TYPE_UINT32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  },
+  {
+    name: "IDX"
+    data_type: TYPE_UINT32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/invalid_input_map/expected b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_input_map/expected
new file mode 100644
index 0000000000..38c7681775
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_input_map/expected
@@ -0,0 +1 @@
+in ensemble invalid_input_map, ensemble tensor temp_tensor_5 is mapping to non-existing input invalid_input in model fp32_dim1_batch4_input4
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/invalid_input_map/fp32_dim1_batch4/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_input_map/fp32_dim1_batch4/config.pbtxt
new file mode 100644
index 0000000000..f5a689d604
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_input_map/fp32_dim1_batch4/config.pbtxt
@@ -0,0 +1,22 @@
+name: "fp32_dim1_batch4"
+max_batch_size: 4
+backend: "identity"
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+instance_group [
+  {
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/invalid_input_map/fp32_dim1_batch4_input4/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_input_map/fp32_dim1_batch4_input4/config.pbtxt
new file mode 100644
index 0000000000..f534f352c9
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_input_map/fp32_dim1_batch4_input4/config.pbtxt
@@ -0,0 +1,52 @@
+name: "fp32_dim1_batch4_input4"
+max_batch_size: 4
+backend: "identity"
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT2"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT3"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT2"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT3"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+instance_group [
+  {
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/invalid_input_map/fp32_dim1_batch4_output3/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_input_map/fp32_dim1_batch4_output3/config.pbtxt
new file mode 100644
index 0000000000..69b18e83e7
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_input_map/fp32_dim1_batch4_output3/config.pbtxt
@@ -0,0 +1,42 @@
+name: "fp32_dim1_batch4_output3"
+max_batch_size: 4
+backend: "identity"
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT2"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT2"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+instance_group [
+  {
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/invalid_input_map/invalid_input_map/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_input_map/invalid_input_map/config.pbtxt
new file mode 100644
index 0000000000..8bb0896d40
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_input_map/invalid_input_map/config.pbtxt
@@ -0,0 +1,101 @@
+name: "invalid_input_map"
+max_batch_size: 2
+platform: "ensemble"
+ensemble_scheduling {
+  step [
+    {
+      model_name: "fp32_dim1_batch4"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "data"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "temp_tensor_4"
+      }
+    },
+    {
+      model_name: "fp32_dim1_batch4"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "temp_tensor_4"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "temp_tensor_5"
+      }
+    },
+    {
+      model_name: "fp32_dim1_batch4_output3"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "data"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "data"
+      }
+      input_map {
+        key: "INPUT2"
+        value: "data"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "temp_tensor_1"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "temp_tensor_2"
+      }
+      output_map {
+        key: "OUTPUT2"
+        value: "temp_tensor_3"
+      }
+    },
+    {
+      model_name: "fp32_dim1_batch4_input4"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "temp_tensor_1"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "temp_tensor_2"
+      }
+      input_map {
+        key: "INPUT2"
+        value: "temp_tensor_3"
+      }
+      input_map {
+        key: "INPUT3"
+        value: "temp_tensor_4"
+      }
+      input_map {
+        key: "invalid_input"
+        value: "temp_tensor_5"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "prob"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/invalid_output_map/expected b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_output_map/expected
new file mode 100644
index 0000000000..d9d252e8b6
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_output_map/expected
@@ -0,0 +1 @@
+in ensemble invalid_output_map, ensemble tensor temp_tensor_2 is mapped from non-existing output invalid_output in model fp32_dim1_batch4_output3
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/invalid_output_map/fp32_dim1_batch4/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_output_map/fp32_dim1_batch4/config.pbtxt
new file mode 100644
index 0000000000..f5a689d604
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_output_map/fp32_dim1_batch4/config.pbtxt
@@ -0,0 +1,22 @@
+name: "fp32_dim1_batch4"
+max_batch_size: 4
+backend: "identity"
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+instance_group [
+  {
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/invalid_output_map/fp32_dim1_batch4_input4/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_output_map/fp32_dim1_batch4_input4/config.pbtxt
new file mode 100644
index 0000000000..f534f352c9
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_output_map/fp32_dim1_batch4_input4/config.pbtxt
@@ -0,0 +1,52 @@
+name: "fp32_dim1_batch4_input4"
+max_batch_size: 4
+backend: "identity"
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT2"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT3"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT2"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT3"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+instance_group [
+  {
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/invalid_output_map/fp32_dim1_batch4_output3/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_output_map/fp32_dim1_batch4_output3/config.pbtxt
new file mode 100644
index 0000000000..69b18e83e7
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_output_map/fp32_dim1_batch4_output3/config.pbtxt
@@ -0,0 +1,42 @@
+name: "fp32_dim1_batch4_output3"
+max_batch_size: 4
+backend: "identity"
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT2"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT2"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+instance_group [
+  {
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/invalid_output_map/invalid_output_map/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_output_map/invalid_output_map/config.pbtxt
new file mode 100644
index 0000000000..dd57560268
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/invalid_output_map/invalid_output_map/config.pbtxt
@@ -0,0 +1,81 @@
+name: "invalid_output_map"
+max_batch_size: 2
+platform: "ensemble"
+ensemble_scheduling {
+  step [
+    {
+      model_name: "fp32_dim1_batch4"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "data"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "temp_tensor_4"
+      }
+    },
+    {
+      model_name: "fp32_dim1_batch4_output3"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "data"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "data"
+      }
+      input_map {
+        key: "INPUT2"
+        value: "data"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "temp_tensor_1"
+      }
+      output_map {
+        key: "invalid_output"
+        value: "temp_tensor_2"
+      }
+    },
+    {
+      model_name: "fp32_dim1_batch4_input4"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "temp_tensor_1"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "temp_tensor_2"
+      }
+      input_map {
+        key: "INPUT2"
+        value: "temp_tensor_1"
+      }
+      input_map {
+        key: "INPUT3"
+        value: "temp_tensor_4"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "prob"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/model_warm_up_set/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/model_warm_up_set/config.pbtxt
new file mode 100644
index 0000000000..50891243fe
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/model_warm_up_set/config.pbtxt
@@ -0,0 +1,35 @@
+name: "model_warmup_set"
+max_batch_size: 8
+platform: "ensemble"
+ensemble_scheduling {
+  step [
+    {
+      model_name: "model_a"
+      model_version: -1
+      input_map {
+        key: "model_a_input"
+        value: "data"
+      }
+      output_map {
+        key: "model_a_output"
+        value: "prob"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    format: FORMAT_NCHW
+    dims: [ 1, 28, 28 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 10, 1, 1 ]
+  }
+]
+model_warmup [{}]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/model_warm_up_set/expected b/qa/L0_model_config/autofill_noplatform/ensemble/model_warm_up_set/expected
new file mode 100644
index 0000000000..c2323073a9
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/model_warm_up_set/expected
@@ -0,0 +1 @@
+model_warmup can not be specified for ensemble 'model_warmup_set'
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/no_input_map/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/no_input_map/config.pbtxt
new file mode 100644
index 0000000000..040c4c5d94
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/no_input_map/config.pbtxt
@@ -0,0 +1,54 @@
+name: "no_input_map"
+max_batch_size: 8
+platform: "ensemble"
+ensemble_scheduling {
+  step [
+    {
+      model_name: "model_a"
+      model_version: -1
+      input_map {
+        key: "model_a_input"
+        value: "data"
+      }
+      output_map {
+        key: "model_a_output"
+        value: "temp_1"
+      }
+    },
+    {
+      model_name: "model_b"
+      model_version: -1
+      output_map {
+        key: "model_b_output"
+        value: "temp_2"
+      }
+    },
+    {
+      model_name: "model_c"
+      model_version: -1
+      input_map {
+        key: "model_c_input"
+        value: "temp_2"
+      }
+      output_map {
+        key: "model_c_output"
+        value: "prob"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    format: FORMAT_NCHW
+    dims: [ 1, 28, 28 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 10, 1, 1 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/no_input_map/expected b/qa/L0_model_config/autofill_noplatform/ensemble/no_input_map/expected
new file mode 100644
index 0000000000..3b1dea3fa2
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/no_input_map/expected
@@ -0,0 +1 @@
+must specify 'input_map' in step 1 of ensemble 'no_input_map'
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/no_model_name/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/no_model_name/config.pbtxt
new file mode 100644
index 0000000000..f312eb8ae8
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/no_model_name/config.pbtxt
@@ -0,0 +1,56 @@
+name: "no_model_name"
+max_batch_size: 8
+platform: "ensemble"
+ensemble_scheduling {
+  step [
+    {
+      model_name: "model_a"
+      model_version: -1
+      input_map {
+        key: "model_a_input"
+        value: "data"
+      }
+      output_map {
+        key: "model_a_output"
+        value: "temp_1"
+      }
+    },
+    {
+      input_map {
+        key: "model_b_input"
+        value: "temp_1"
+      }
+      output_map {
+        key: "model_b_output"
+        value: "temp_2"
+      }
+    },
+    {
+      model_name: "model_c"
+      model_version: -1
+      input_map {
+        key: "model_c_input"
+        value: "temp_2"
+      }
+      output_map {
+        key: "model_c_output"
+        value: "prob"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    format: FORMAT_NCHW
+    dims: [ 1, 28, 28 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 10, 1, 1 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/no_model_name/expected b/qa/L0_model_config/autofill_noplatform/ensemble/no_model_name/expected
new file mode 100644
index 0000000000..87aba87093
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/no_model_name/expected
@@ -0,0 +1 @@
+must specify 'model_name' in step 1 of ensemble 'no_model_name'
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/no_output_map/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/no_output_map/config.pbtxt
new file mode 100644
index 0000000000..50d00f7576
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/no_output_map/config.pbtxt
@@ -0,0 +1,54 @@
+name: "no_output_map"
+max_batch_size: 8
+platform: "ensemble"
+ensemble_scheduling {
+  step [
+    {
+      model_name: "model_a"
+      model_version: -1
+      input_map {
+        key: "model_a_input"
+        value: "data"
+      }
+      output_map {
+        key: "model_a_output"
+        value: "temp_1"
+      }
+    },
+    {
+      model_name: "model_b"
+      model_version: -1
+      input_map {
+        key: "model_b_input"
+        value: "temp_1"
+      }
+    },
+    {
+      model_name: "model_c"
+      model_version: -1
+      input_map {
+        key: "model_c_input"
+        value: "temp_2"
+      }
+      output_map {
+        key: "model_c_output"
+        value: "prob"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    format: FORMAT_NCHW
+    dims: [ 1, 28, 28 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 10, 1, 1 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/no_output_map/expected b/qa/L0_model_config/autofill_noplatform/ensemble/no_output_map/expected
new file mode 100644
index 0000000000..cbbdb6c0dd
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/no_output_map/expected
@@ -0,0 +1 @@
+must specify 'output_map' in step 1 of ensemble 'no_output_map'
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/no_required_version/expected b/qa/L0_model_config/autofill_noplatform/ensemble/no_required_version/expected
new file mode 100644
index 0000000000..8db4ecdb69
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/no_required_version/expected
@@ -0,0 +1 @@
+ensemble 'no_required_version' depends on 'simple' whose required version 2 is not loaded
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/no_required_version/no_required_version/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/no_required_version/no_required_version/config.pbtxt
new file mode 100644
index 0000000000..ab1c13f28c
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/no_required_version/no_required_version/config.pbtxt
@@ -0,0 +1,51 @@
+name: "no_required_version"
+platform: "ensemble"
+max_batch_size: 8
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
+ensemble_scheduling {
+  step [
+    {
+      model_name: "simple"
+      model_version: 2
+      input_map {
+        key: "INPUT0"
+        value: "INPUT0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "OUTPUT0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "OUTPUT1"
+      }
+    }
+  ]
+}
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/no_required_version/simple/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/no_required_version/simple/config.pbtxt
new file mode 100644
index 0000000000..855f3d365b
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/no_required_version/simple/config.pbtxt
@@ -0,0 +1,33 @@
+name: "simple"
+backend: "identity"
+max_batch_size: 8
+version_policy : { all {} }
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
+instance_group [
+  {
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/no_required_version_2/expected b/qa/L0_model_config/autofill_noplatform/ensemble/no_required_version_2/expected
new file mode 100644
index 0000000000..943d792015
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/no_required_version_2/expected
@@ -0,0 +1 @@
+ensemble 'no_required_version_2' depends on 'simple' whose required version 2 is not loaded
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/no_required_version_2/no_required_version_2/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/no_required_version_2/no_required_version_2/config.pbtxt
new file mode 100644
index 0000000000..5fd69124b1
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/no_required_version_2/no_required_version_2/config.pbtxt
@@ -0,0 +1,71 @@
+name: "no_required_version_2"
+platform: "ensemble"
+max_batch_size: 8
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
+ensemble_scheduling {
+  step [
+    {
+      model_name: "simple"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "temp0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "temp1"
+      }
+    },
+    {
+      model_name: "simple"
+      model_version: 2
+      input_map {
+        key: "INPUT0"
+        value: "temp0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "temp1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "OUTPUT0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "OUTPUT1"
+      }
+    }
+  ]
+}
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/no_required_version_2/simple/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/no_required_version_2/simple/config.pbtxt
new file mode 100644
index 0000000000..855f3d365b
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/no_required_version_2/simple/config.pbtxt
@@ -0,0 +1,33 @@
+name: "simple"
+backend: "identity"
+max_batch_size: 8
+version_policy : { all {} }
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
+instance_group [
+  {
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/no_required_version_3/expected b/qa/L0_model_config/autofill_noplatform/ensemble/no_required_version_3/expected
new file mode 100644
index 0000000000..3b86973e24
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/no_required_version_3/expected
@@ -0,0 +1 @@
+ensemble 'no_required_version_3' depends on 'simple' whose required version 2 is not loaded
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/no_required_version_3/no_required_version_3/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/no_required_version_3/no_required_version_3/config.pbtxt
new file mode 100644
index 0000000000..5f120007d5
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/no_required_version_3/no_required_version_3/config.pbtxt
@@ -0,0 +1,71 @@
+name: "no_required_version_3"
+platform: "ensemble"
+max_batch_size: 8
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
+ensemble_scheduling {
+  step [
+    {
+      model_name: "simple"
+      model_version: 2
+      input_map {
+        key: "INPUT0"
+        value: "INPUT0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "temp0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "temp1"
+      }
+    },
+    {
+      model_name: "simple"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "temp0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "temp1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "OUTPUT0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "OUTPUT1"
+      }
+    }
+  ]
+}
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/no_required_version_3/simple/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/no_required_version_3/simple/config.pbtxt
new file mode 100644
index 0000000000..855f3d365b
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/no_required_version_3/simple/config.pbtxt
@@ -0,0 +1,33 @@
+name: "simple"
+backend: "identity"
+max_batch_size: 8
+version_policy : { all {} }
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
+instance_group [
+  {
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/no_step/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/no_step/config.pbtxt
new file mode 100644
index 0000000000..d4aa386243
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/no_step/config.pbtxt
@@ -0,0 +1,22 @@
+name: "no_step"
+max_batch_size: 8
+platform: "ensemble"
+ensemble_scheduling {
+  step [
+  ]
+}
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    format: FORMAT_NCHW
+    dims: [ 1, 28, 28 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 10, 1, 1 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/no_step/expected b/qa/L0_model_config/autofill_noplatform/ensemble/no_step/expected
new file mode 100644
index 0000000000..bd0d9afc31
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/no_step/expected
@@ -0,0 +1 @@
+must specify 'step' for ensemble 'no_step'
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/no_step_2/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/no_step_2/config.pbtxt
new file mode 100644
index 0000000000..aa32212f9a
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/no_step_2/config.pbtxt
@@ -0,0 +1,20 @@
+name: "no_step_2"
+max_batch_size: 8
+platform: "ensemble"
+ensemble_scheduling {
+}
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    format: FORMAT_NCHW
+    dims: [ 1, 28, 28 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 10, 1, 1 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/no_step_2/expected b/qa/L0_model_config/autofill_noplatform/ensemble/no_step_2/expected
new file mode 100644
index 0000000000..a704e750b3
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/no_step_2/expected
@@ -0,0 +1 @@
+must specify 'step' for ensemble 'no_step_2'
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/non_existing_model/expected b/qa/L0_model_config/autofill_noplatform/ensemble/non_existing_model/expected
new file mode 100644
index 0000000000..09561377d9
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/non_existing_model/expected
@@ -0,0 +1 @@
+ensemble non_existing_model contains models that are not available or ambiguous: fp32_dim1_batch4_input4
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/non_existing_model/fp32_dim1_batch4/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/non_existing_model/fp32_dim1_batch4/config.pbtxt
new file mode 100644
index 0000000000..f5a689d604
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/non_existing_model/fp32_dim1_batch4/config.pbtxt
@@ -0,0 +1,22 @@
+name: "fp32_dim1_batch4"
+max_batch_size: 4
+backend: "identity"
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+instance_group [
+  {
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/non_existing_model/fp32_dim1_batch4_output3/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/non_existing_model/fp32_dim1_batch4_output3/config.pbtxt
new file mode 100644
index 0000000000..69b18e83e7
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/non_existing_model/fp32_dim1_batch4_output3/config.pbtxt
@@ -0,0 +1,42 @@
+name: "fp32_dim1_batch4_output3"
+max_batch_size: 4
+backend: "identity"
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT2"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT2"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+instance_group [
+  {
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/non_existing_model/non_existing_model/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/non_existing_model/non_existing_model/config.pbtxt
new file mode 100644
index 0000000000..69d9a3fbbe
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/non_existing_model/non_existing_model/config.pbtxt
@@ -0,0 +1,85 @@
+name: "non_existing_model"
+max_batch_size: 2
+platform: "ensemble"
+ensemble_scheduling {
+  step [
+    {
+      model_name: "fp32_dim1_batch4"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "data"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "temp_tensor_4"
+      }
+    },
+    {
+      model_name: "fp32_dim1_batch4_output3"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "data"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "data"
+      }
+      input_map {
+        key: "INPUT2"
+        value: "data"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "temp_tensor_1"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "temp_tensor_2"
+      }
+      output_map {
+        key: "OUTPUT2"
+        value: "temp_tensor_3"
+      }
+    },
+    {
+      model_name: "fp32_dim1_batch4_input4"
+      model_version: -1
+      input_map {
+        key: "input1"
+        value: "temp_tensor_1"
+      }
+      input_map {
+        key: "input2"
+        value: "temp_tensor_2"
+      }
+      input_map {
+        key: "input3"
+        value: "temp_tensor_3"
+      }
+      input_map {
+        key: "input4"
+        value: "temp_tensor_4"
+      }
+      output_map {
+        key: "output"
+        value: "prob"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/optimization_set/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/optimization_set/config.pbtxt
new file mode 100644
index 0000000000..28a20e5fce
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/optimization_set/config.pbtxt
@@ -0,0 +1,37 @@
+name: "optimization_set"
+max_batch_size: 8
+platform: "ensemble"
+ensemble_scheduling {
+  step [
+    {
+      model_name: "model_a"
+      model_version: -1
+      input_map {
+        key: "model_a_input"
+        value: "data"
+      }
+      output_map {
+        key: "model_a_output"
+        value: "prob"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    format: FORMAT_NCHW
+    dims: [ 1, 28, 28 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 10, 1, 1 ]
+  }
+]
+optimization {
+  priority: PRIORITY_MAX
+}
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/optimization_set/expected b/qa/L0_model_config/autofill_noplatform/ensemble/optimization_set/expected
new file mode 100644
index 0000000000..c301936862
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/optimization_set/expected
@@ -0,0 +1 @@
+optimization should not be specified for ensemble 'optimization_set'
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/output_to_tensor_overmapped/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/output_to_tensor_overmapped/config.pbtxt
new file mode 100644
index 0000000000..b312e545a5
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/output_to_tensor_overmapped/config.pbtxt
@@ -0,0 +1,89 @@
+name: "output_to_tensor_overmapped"
+max_batch_size: 2
+platform: "ensemble"
+ensemble_scheduling {
+  step [
+    {
+      model_name: "fp32_dim1_batch4"
+      model_version: -1
+      input_map {
+        key: "input"
+        value: "data"
+      }
+      output_map {
+        key: "output"
+        value: "temp_tensor_4"
+      }
+    },
+    {
+      model_name: "fp32_dim1_batch4"
+      model_version: -1
+      input_map {
+        key: "input"
+        value: "data"
+      }
+      output_map {
+        key: "output"
+        value: "temp_tensor_1"
+      }
+    },
+    {
+      model_name: "fp32_dim1_batch4_output3"
+      model_version: -1
+      input_map {
+        key: "input"
+        value: "data"
+      }
+      output_map {
+        key: "output1"
+        value: "temp_tensor_2"
+      }
+      output_map {
+        key: "output2"
+        value: "temp_tensor_2"
+      }
+      output_map {
+        key: "output3"
+        value: "temp_tensor_3"
+      }
+    },
+    {
+      model_name: "fp32_dim1_batch4_input4"
+      model_version: -1
+      input_map {
+        key: "input1"
+        value: "temp_tensor_1"
+      }
+      input_map {
+        key: "input2"
+        value: "temp_tensor_2"
+      }
+      input_map {
+        key: "input3"
+        value: "temp_tensor_3"
+      }
+      input_map {
+        key: "input4"
+        value: "temp_tensor_4"
+      }
+      output_map {
+        key: "output"
+        value: "prob"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/output_to_tensor_overmapped/expected b/qa/L0_model_config/autofill_noplatform/ensemble/output_to_tensor_overmapped/expected
new file mode 100644
index 0000000000..10d49e9683
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/output_to_tensor_overmapped/expected
@@ -0,0 +1 @@
+ensemble tensor 'temp_tensor_2' can appear in an output map only once for ensemble 'output_to_tensor_overmapped'
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/redundant_tensor_as_input/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/redundant_tensor_as_input/config.pbtxt
new file mode 100644
index 0000000000..7f48b18da0
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/redundant_tensor_as_input/config.pbtxt
@@ -0,0 +1,89 @@
+name: "redundant_tensor_as_input"
+max_batch_size: 2
+platform: "ensemble"
+ensemble_scheduling {
+  step [
+    {
+      model_name: "fp32_dim1_batch4"
+      model_version: -1
+      input_map {
+        key: "input"
+        value: "data"
+      }
+      output_map {
+        key: "output"
+        value: "temp_tensor_4"
+      }
+    },
+    {
+      model_name: "fp32_dim1_batch4"
+      model_version: -1
+      input_map {
+        key: "input"
+        value: "temp_tensor_5"
+      }
+      output_map {
+        key: "output"
+        value: "temp_tensor_6"
+      }
+    },
+    {
+      model_name: "fp32_dim1_batch4_output3"
+      model_version: -1
+      input_map {
+        key: "input"
+        value: "temp_tensor_4"
+      }
+      output_map {
+        key: "output1"
+        value: "temp_tensor_1"
+      }
+      output_map {
+        key: "output2"
+        value: "temp_tensor_2"
+      }
+      output_map {
+        key: "output3"
+        value: "temp_tensor_3"
+      }
+    },
+    {
+      model_name: "fp32_dim1_batch4_input4"
+      model_version: -1
+      input_map {
+        key: "input1"
+        value: "temp_tensor_1"
+      }
+      input_map {
+        key: "input2"
+        value: "temp_tensor_2"
+      }
+      input_map {
+        key: "input3"
+        value: "temp_tensor_3"
+      }
+      input_map {
+        key: "input4"
+        value: "temp_tensor_4"
+      }
+      output_map {
+        key: "output"
+        value: "prob"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/redundant_tensor_as_input/expected b/qa/L0_model_config/autofill_noplatform/ensemble/redundant_tensor_as_input/expected
new file mode 100644
index 0000000000..ddef5813cb
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/redundant_tensor_as_input/expected
@@ -0,0 +1 @@
+ensemble tensor 'temp_tensor_6' is unused in ensemble 'redundant_tensor_as_input'
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/redundant_tensor_as_input/expected_2 b/qa/L0_model_config/autofill_noplatform/ensemble/redundant_tensor_as_input/expected_2
new file mode 100644
index 0000000000..7137095510
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/redundant_tensor_as_input/expected_2
@@ -0,0 +1 @@
+ensemble tensor 'temp_tensor_5' is unused in ensemble 'redundant_tensor_as_input'
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/redundant_tensor_as_output/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/redundant_tensor_as_output/config.pbtxt
new file mode 100644
index 0000000000..841218efad
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/redundant_tensor_as_output/config.pbtxt
@@ -0,0 +1,58 @@
+name: "redundant_tensor_as_output"
+max_batch_size: 2
+platform: "ensemble"
+ensemble_scheduling {
+  step [
+    {
+      model_name: "fp32_dim1_batch4"
+      model_version: -1
+      input_map {
+        key: "input"
+        value: "data"
+      }
+      output_map {
+        key: "output"
+        value: "temp_tensor_1"
+      }
+    },
+    {
+      model_name: "fp32_dim1_batch4_output3"
+      model_version: -1
+      input_map {
+        key: "input"
+        value: "temp_tensor_1"
+      }
+      output_map {
+        key: "output1"
+        value: "prob"
+      }
+      output_map {
+        key: "output2"
+        value: "prob_2"
+      }
+      output_map {
+        key: "output3"
+        value: "temp_tensor_2"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "prob_2"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/redundant_tensor_as_output/expected b/qa/L0_model_config/autofill_noplatform/ensemble/redundant_tensor_as_output/expected
new file mode 100644
index 0000000000..8e43657c42
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/redundant_tensor_as_output/expected
@@ -0,0 +1 @@
+ensemble tensor 'temp_tensor_2' is unused in ensemble 'redundant_tensor_as_output'
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/self_circular_dependency/expected b/qa/L0_model_config/autofill_noplatform/ensemble/self_circular_dependency/expected
new file mode 100644
index 0000000000..c8eda96aa3
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/self_circular_dependency/expected
@@ -0,0 +1 @@
+circular dependency between ensembles: self_circular_dependency -> ... -> self_circular_dependency -> self_circular_dependency
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/self_circular_dependency/fp32_dim1_batch4/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/self_circular_dependency/fp32_dim1_batch4/config.pbtxt
new file mode 100644
index 0000000000..f5a689d604
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/self_circular_dependency/fp32_dim1_batch4/config.pbtxt
@@ -0,0 +1,22 @@
+name: "fp32_dim1_batch4"
+max_batch_size: 4
+backend: "identity"
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+instance_group [
+  {
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/self_circular_dependency/fp32_dim1_batch4_input4/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/self_circular_dependency/fp32_dim1_batch4_input4/config.pbtxt
new file mode 100644
index 0000000000..f534f352c9
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/self_circular_dependency/fp32_dim1_batch4_input4/config.pbtxt
@@ -0,0 +1,52 @@
+name: "fp32_dim1_batch4_input4"
+max_batch_size: 4
+backend: "identity"
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT2"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT3"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT2"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT3"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+instance_group [
+  {
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/self_circular_dependency/fp32_dim1_batch4_output3/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/self_circular_dependency/fp32_dim1_batch4_output3/config.pbtxt
new file mode 100644
index 0000000000..69b18e83e7
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/self_circular_dependency/fp32_dim1_batch4_output3/config.pbtxt
@@ -0,0 +1,42 @@
+name: "fp32_dim1_batch4_output3"
+max_batch_size: 4
+backend: "identity"
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT2"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT2"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+instance_group [
+  {
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/self_circular_dependency/self_circular_dependency/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/self_circular_dependency/self_circular_dependency/config.pbtxt
new file mode 100644
index 0000000000..6441f2b6c3
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/self_circular_dependency/self_circular_dependency/config.pbtxt
@@ -0,0 +1,97 @@
+name: "self_circular_dependency"
+max_batch_size: 2
+platform: "ensemble"
+ensemble_scheduling {
+  step [
+    {
+      model_name: "fp32_dim1_batch4"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "data"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "temp_tensor_4"
+      }
+    },
+    {
+      model_name: "fp32_dim1_batch4_output3"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "data"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "data"
+      }
+      input_map {
+        key: "INPUT2"
+        value: "data"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "temp_tensor_1"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "temp_tensor_2"
+      }
+      output_map {
+        key: "OUTPUT2"
+        value: "temp_tensor_3"
+      }
+    },
+    {
+      model_name: "fp32_dim1_batch4_input4"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "temp_tensor_1"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "temp_tensor_2"
+      }
+      input_map {
+        key: "INPUT2"
+        value: "temp_tensor_3"
+      }
+      input_map {
+        key: "INPUT3"
+        value: "temp_tensor_4"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "temp_tensor_5"
+      }
+    },
+    {
+      model_name: "self_circular_dependency"
+      model_version: -1
+      input_map {
+        key: "data"
+        value: "temp_tensor_5"
+      }
+      output_map {
+        key: "prob"
+        value: "prob"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/tensor_to_input_overmapped/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/tensor_to_input_overmapped/config.pbtxt
new file mode 100644
index 0000000000..0ea19f9d54
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/tensor_to_input_overmapped/config.pbtxt
@@ -0,0 +1,93 @@
+name: "tensor_to_input_overmapped"
+max_batch_size: 2
+platform: "ensemble"
+ensemble_scheduling {
+  step [
+    {
+      model_name: "fp32_dim1_batch4"
+      model_version: -1
+      input_map {
+        key: "input"
+        value: "data"
+      }
+      output_map {
+        key: "output"
+        value: "temp_tensor_4"
+      }
+    },
+    {
+      model_name: "fp32_dim1_batch4"
+      model_version: -1
+      input_map {
+        key: "input"
+        value: "temp_tensor_4"
+      }
+      output_map {
+        key: "output"
+        value: "temp_tensor_5"
+      }
+    },
+    {
+      model_name: "fp32_dim1_batch4_output3"
+      model_version: -1
+      input_map {
+        key: "input"
+        value: "data"
+      }
+      output_map {
+        key: "output1"
+        value: "temp_tensor_1"
+      }
+      output_map {
+        key: "output2"
+        value: "temp_tensor_2"
+      }
+      output_map {
+        key: "output3"
+        value: "temp_tensor_3"
+      }
+    },
+    {
+      model_name: "fp32_dim1_batch4_input4"
+      model_version: -1
+      input_map {
+        key: "input1"
+        value: "temp_tensor_1"
+      }
+      input_map {
+        key: "input2"
+        value: "temp_tensor_2"
+      }
+      input_map {
+        key: "input3"
+        value: "temp_tensor_3"
+      }
+      input_map {
+        key: "input4"
+        value: "temp_tensor_5"
+      }
+      input_map {
+        key: "input4"
+        value: "temp_tensor_4"
+      }
+      output_map {
+        key: "output"
+        value: "prob"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/tensor_to_input_overmapped/expected b/qa/L0_model_config/autofill_noplatform/ensemble/tensor_to_input_overmapped/expected
new file mode 100644
index 0000000000..da1e39d5e9
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/tensor_to_input_overmapped/expected
@@ -0,0 +1 @@
+ensemble tensor 'temp_tensor_5' is unused in ensemble 'tensor_to_input_overmapped'
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/unmapped_input/expected b/qa/L0_model_config/autofill_noplatform/ensemble/unmapped_input/expected
new file mode 100644
index 0000000000..910cda2481
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/unmapped_input/expected
@@ -0,0 +1 @@
+in ensemble unmapped_input, input INPUT0 in model fp32_dim1_batch4_input4 is not mapped to any ensemble tensors
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/unmapped_input/fp32_dim1_batch4/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/unmapped_input/fp32_dim1_batch4/config.pbtxt
new file mode 100644
index 0000000000..f5a689d604
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/unmapped_input/fp32_dim1_batch4/config.pbtxt
@@ -0,0 +1,22 @@
+name: "fp32_dim1_batch4"
+max_batch_size: 4
+backend: "identity"
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+instance_group [
+  {
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/unmapped_input/fp32_dim1_batch4_input4/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/unmapped_input/fp32_dim1_batch4_input4/config.pbtxt
new file mode 100644
index 0000000000..f534f352c9
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/unmapped_input/fp32_dim1_batch4_input4/config.pbtxt
@@ -0,0 +1,52 @@
+name: "fp32_dim1_batch4_input4"
+max_batch_size: 4
+backend: "identity"
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT2"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT3"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT2"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT3"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+instance_group [
+  {
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/unmapped_input/fp32_dim1_batch4_output3/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/unmapped_input/fp32_dim1_batch4_output3/config.pbtxt
new file mode 100644
index 0000000000..69b18e83e7
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/unmapped_input/fp32_dim1_batch4_output3/config.pbtxt
@@ -0,0 +1,42 @@
+name: "fp32_dim1_batch4_output3"
+max_batch_size: 4
+backend: "identity"
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT2"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT2"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+instance_group [
+  {
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/unmapped_input/unmapped_input/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/unmapped_input/unmapped_input/config.pbtxt
new file mode 100644
index 0000000000..8667da98cc
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/unmapped_input/unmapped_input/config.pbtxt
@@ -0,0 +1,69 @@
+name: "unmapped_input"
+max_batch_size: 2
+platform: "ensemble"
+ensemble_scheduling {
+  step [
+    {
+      model_name: "fp32_dim1_batch4_output3"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "data"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "data"
+      }
+      input_map {
+        key: "INPUT2"
+        value: "data"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "temp_tensor_2"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "temp_tensor_3"
+      }
+      output_map {
+        key: "OUTPUT2"
+        value: "temp_tensor_4"
+      }
+    },
+    {
+      model_name: "fp32_dim1_batch4_input4"
+      model_version: -1
+      input_map {
+        key: "INPUT1"
+        value: "temp_tensor_2"
+      }
+      input_map {
+        key: "INPUT2"
+        value: "temp_tensor_3"
+      }
+      input_map {
+        key: "INPUT3"
+        value: "temp_tensor_4"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "prob"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/unreachable_input/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/unreachable_input/config.pbtxt
new file mode 100644
index 0000000000..3e490278ae
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/unreachable_input/config.pbtxt
@@ -0,0 +1,82 @@
+name: "unreachable_input"
+max_batch_size: 2
+platform: "ensemble"
+ensemble_scheduling {
+  step [
+    {
+      model_name: "fp32_dim1_batch4"
+      model_version: -1
+      input_map {
+        key: "input"
+        value: "data"
+      }
+      output_map {
+        key: "output"
+        value: "temp_tensor_4"
+      }
+    },
+    {
+      model_name: "fp32_dim1_batch4_output3"
+      model_version: -1
+      input_map {
+        key: "input"
+        value: "data"
+      }
+      output_map {
+        key: "output1"
+        value: "temp_tensor_1"
+      }
+      output_map {
+        key: "output2"
+        value: "temp_tensor_2"
+      }
+      output_map {
+        key: "output3"
+        value: "temp_tensor_3"
+      }
+    },
+    {
+      model_name: "fp32_dim1_batch4_input4"
+      model_version: -1
+      input_map {
+        key: "input1"
+        value: "temp_tensor_1"
+      }
+      input_map {
+        key: "input2"
+        value: "temp_tensor_2"
+      }
+      input_map {
+        key: "input3"
+        value: "temp_tensor_3"
+      }
+      input_map {
+        key: "input4"
+        value: "temp_tensor_4"
+      }
+      output_map {
+        key: "output"
+        value: "prob"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "data_2"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/unreachable_input/expected b/qa/L0_model_config/autofill_noplatform/ensemble/unreachable_input/expected
new file mode 100644
index 0000000000..891c3e3324
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/unreachable_input/expected
@@ -0,0 +1 @@
+ensemble input 'data_2' for ensemble unreachable_input' is not used
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/unreachable_output/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/unreachable_output/config.pbtxt
new file mode 100644
index 0000000000..e0367a66cf
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/unreachable_output/config.pbtxt
@@ -0,0 +1,82 @@
+name: "unreachable_output"
+max_batch_size: 2
+platform: "ensemble"
+ensemble_scheduling {
+  step [
+    {
+      model_name: "fp32_dim1_batch4"
+      model_version: -1
+      input_map {
+        key: "input"
+        value: "data"
+      }
+      output_map {
+        key: "output"
+        value: "temp_tensor_4"
+      }
+    },
+    {
+      model_name: "fp32_dim1_batch4_output3"
+      model_version: -1
+      input_map {
+        key: "input"
+        value: "data"
+      }
+      output_map {
+        key: "output1"
+        value: "temp_tensor_1"
+      }
+      output_map {
+        key: "output2"
+        value: "temp_tensor_2"
+      }
+      output_map {
+        key: "output3"
+        value: "temp_tensor_3"
+      }
+    },
+    {
+      model_name: "fp32_dim1_batch4_input4"
+      model_version: -1
+      input_map {
+        key: "input1"
+        value: "temp_tensor_1"
+      }
+      input_map {
+        key: "input2"
+        value: "temp_tensor_2"
+      }
+      input_map {
+        key: "input3"
+        value: "temp_tensor_3"
+      }
+      input_map {
+        key: "input4"
+        value: "temp_tensor_4"
+      }
+      output_map {
+        key: "output"
+        value: "prob"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "prob_2"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/unreachable_output/expected b/qa/L0_model_config/autofill_noplatform/ensemble/unreachable_output/expected
new file mode 100644
index 0000000000..9d193b26d0
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/unreachable_output/expected
@@ -0,0 +1 @@
+ensemble output 'prob_2' for ensemble unreachable_output' is not used
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/unreachable_output_2/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/unreachable_output_2/config.pbtxt
new file mode 100644
index 0000000000..0d40bf1b45
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/unreachable_output_2/config.pbtxt
@@ -0,0 +1,94 @@
+name: "unreachable_output_2"
+max_batch_size: 2
+platform: "ensemble"
+ensemble_scheduling {
+  step [
+    {
+      model_name: "fp32_dim1_batch4"
+      model_version: -1
+      input_map {
+        key: "input"
+        value: "data"
+      }
+      output_map {
+        key: "output"
+        value: "temp_tensor_4"
+      }
+    },
+    {
+      model_name: "fp32_dim1_batch4"
+      model_version: -1
+      input_map {
+        key: "input"
+        value: "prob_2"
+      }
+      output_map {
+        key: "output"
+        value: "prob_2"
+      }
+    },
+    {
+      model_name: "fp32_dim1_batch4_output3"
+      model_version: -1
+      input_map {
+        key: "input"
+        value: "data"
+      }
+      output_map {
+        key: "output1"
+        value: "temp_tensor_1"
+      }
+      output_map {
+        key: "output2"
+        value: "temp_tensor_2"
+      }
+      output_map {
+        key: "output3"
+        value: "temp_tensor_3"
+      }
+    },
+    {
+      model_name: "fp32_dim1_batch4_input4"
+      model_version: -1
+      input_map {
+        key: "input1"
+        value: "temp_tensor_1"
+      }
+      input_map {
+        key: "input2"
+        value: "temp_tensor_2"
+      }
+      input_map {
+        key: "input3"
+        value: "temp_tensor_3"
+      }
+      input_map {
+        key: "input4"
+        value: "temp_tensor_4"
+      }
+      output_map {
+        key: "output"
+        value: "prob"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "prob_2"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/unreachable_output_2/expected b/qa/L0_model_config/autofill_noplatform/ensemble/unreachable_output_2/expected
new file mode 100644
index 0000000000..0363277f57
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/unreachable_output_2/expected
@@ -0,0 +1 @@
+output 'prob_2' for ensemble 'unreachable_output_2' is not written
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/unreachable_output_3/config.pbtxt b/qa/L0_model_config/autofill_noplatform/ensemble/unreachable_output_3/config.pbtxt
new file mode 100644
index 0000000000..61e5eee972
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/unreachable_output_3/config.pbtxt
@@ -0,0 +1,94 @@
+name: "unreachable_output_3"
+max_batch_size: 2
+platform: "ensemble"
+ensemble_scheduling {
+  step [
+    {
+      model_name: "fp32_dim1_batch4"
+      model_version: -1
+      input_map {
+        key: "input"
+        value: "data"
+      }
+      output_map {
+        key: "output"
+        value: "temp_tensor_4"
+      }
+    },
+    {
+      model_name: "fp32_dim1_batch4"
+      model_version: -1
+      input_map {
+        key: "input"
+        value: "not_written_tensor"
+      }
+      output_map {
+        key: "output"
+        value: "prob_2"
+      }
+    },
+    {
+      model_name: "fp32_dim1_batch4_output3"
+      model_version: -1
+      input_map {
+        key: "input"
+        value: "data"
+      }
+      output_map {
+        key: "output1"
+        value: "temp_tensor_1"
+      }
+      output_map {
+        key: "output2"
+        value: "temp_tensor_2"
+      }
+      output_map {
+        key: "output3"
+        value: "temp_tensor_3"
+      }
+    },
+    {
+      model_name: "fp32_dim1_batch4_input4"
+      model_version: -1
+      input_map {
+        key: "input1"
+        value: "temp_tensor_1"
+      }
+      input_map {
+        key: "input2"
+        value: "temp_tensor_2"
+      }
+      input_map {
+        key: "input3"
+        value: "temp_tensor_3"
+      }
+      input_map {
+        key: "input4"
+        value: "temp_tensor_4"
+      }
+      output_map {
+        key: "output"
+        value: "prob"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "prob_2"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/ensemble/unreachable_output_3/expected b/qa/L0_model_config/autofill_noplatform/ensemble/unreachable_output_3/expected
new file mode 100644
index 0000000000..f7add40dda
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/ensemble/unreachable_output_3/expected
@@ -0,0 +1 @@
+output 'prob_2' for ensemble 'unreachable_output_3' is not written: at least one of its depending tensors, 'not_written_tensor', is not connected
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/onnx/bad_input_dims/1/model.onnx b/qa/L0_model_config/autofill_noplatform/onnx/bad_input_dims/1/model.onnx
new file mode 100644
index 0000000000..b352d3225f
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/onnx/bad_input_dims/1/model.onnx
@@ -0,0 +1,33 @@
+TRTIS:�
+
+INPUT0_INPUT0"Identity
+
+INPUT1_INPUT1"Identity
+
+_INPUT0
+_INPUT1CAST0"Add
+
+_INPUT0
+_INPUT1CAST1"Sub
+!
+CAST0OUTPUT0"Cast*	
+to�
+!
+CAST1OUTPUT1"Cast*	
+to�onnx_int32_int8_int8Z
+INPUT0
+
+var_0
+Z
+INPUT1
+
+var_0
+b
+OUTPUT0
+
+var_1
+b
+OUTPUT1
+
+var_2
+B
diff --git a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/bad_input_dims/config.pbtxt b/qa/L0_model_config/autofill_noplatform/onnx/bad_input_dims/config.pbtxt
similarity index 100%
rename from src/servables/tensorflow/testdata/savedmodel_autofill_sanity/bad_input_dims/config.pbtxt
rename to qa/L0_model_config/autofill_noplatform/onnx/bad_input_dims/config.pbtxt
diff --git a/qa/L0_model_config/autofill_noplatform/onnx/bad_input_dims/expected b/qa/L0_model_config/autofill_noplatform/onnx/bad_input_dims/expected
new file mode 100644
index 0000000000..52d579417e
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/onnx/bad_input_dims/expected
@@ -0,0 +1 @@
+model 'bad_input_dims', tensor 'INPUT0': the model expects 2 dimensions (shape \[-1,16\]) but the model configuration specifies 3 dimensions (an initial batch dimension because max_batch_size > 0 followed by the explicit tensor shape, making complete shape \[-1,16,1\])
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/onnx/bad_max_batch_size/1/model.onnx b/qa/L0_model_config/autofill_noplatform/onnx/bad_max_batch_size/1/model.onnx
new file mode 100644
index 0000000000..c9f6a92bc7
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/onnx/bad_max_batch_size/1/model.onnx
@@ -0,0 +1,33 @@
+triton:�
+
+INPUT0_INPUT0"Identity
+
+INPUT1_INPUT1"Identity
+
+_INPUT0
+_INPUT1CAST0"Add
+
+_INPUT0
+_INPUT1CAST1"Sub
+!
+CAST0OUTPUT0"Cast*	
+to�
+!
+CAST1OUTPUT1"Cast*	
+to�onnx_nobatch_int32_int8_int8Z
+INPUT0
+
+
+Z
+INPUT1
+
+
+b
+OUTPUT0
+
+
+b
+OUTPUT1
+
+
+B
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/onnx/bad_max_batch_size/config.pbtxt b/qa/L0_model_config/autofill_noplatform/onnx/bad_max_batch_size/config.pbtxt
new file mode 100644
index 0000000000..7d4be73dbb
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/onnx/bad_max_batch_size/config.pbtxt
@@ -0,0 +1,13 @@
+max_batch_size: 1
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/onnx/bad_max_batch_size/expected b/qa/L0_model_config/autofill_noplatform/onnx/bad_max_batch_size/expected
new file mode 100644
index 0000000000..07ebf4b459
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/onnx/bad_max_batch_size/expected
@@ -0,0 +1 @@
+autofill failed for model 'bad_max_batch_size': model does not support batching while non-zero max_batch_size is specified
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/onnx/bad_output_dims/1/model.onnx b/qa/L0_model_config/autofill_noplatform/onnx/bad_output_dims/1/model.onnx
new file mode 100644
index 0000000000..b352d3225f
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/onnx/bad_output_dims/1/model.onnx
@@ -0,0 +1,33 @@
+TRTIS:�
+
+INPUT0_INPUT0"Identity
+
+INPUT1_INPUT1"Identity
+
+_INPUT0
+_INPUT1CAST0"Add
+
+_INPUT0
+_INPUT1CAST1"Sub
+!
+CAST0OUTPUT0"Cast*	
+to�
+!
+CAST1OUTPUT1"Cast*	
+to�onnx_int32_int8_int8Z
+INPUT0
+
+var_0
+Z
+INPUT1
+
+var_0
+b
+OUTPUT0
+
+var_1
+b
+OUTPUT1
+
+var_2
+B
diff --git a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/bad_output_dims/config.pbtxt b/qa/L0_model_config/autofill_noplatform/onnx/bad_output_dims/config.pbtxt
similarity index 100%
rename from src/servables/tensorflow/testdata/savedmodel_autofill_sanity/bad_output_dims/config.pbtxt
rename to qa/L0_model_config/autofill_noplatform/onnx/bad_output_dims/config.pbtxt
diff --git a/qa/L0_model_config/autofill_noplatform/onnx/bad_output_dims/expected b/qa/L0_model_config/autofill_noplatform/onnx/bad_output_dims/expected
new file mode 100644
index 0000000000..5a11d49e68
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/onnx/bad_output_dims/expected
@@ -0,0 +1 @@
+model 'bad_output_dims', tensor 'OUTPUT1': the model expects 2 dimensions (shape \[-1,16\]) but the model configuration specifies 2 dimensions (an initial batch dimension because max_batch_size > 0 followed by the explicit tensor shape, making complete shape \[-1,1\])
diff --git a/qa/L0_model_config/autofill_noplatform/onnx/too_few_inputs/1/model.onnx b/qa/L0_model_config/autofill_noplatform/onnx/too_few_inputs/1/model.onnx
new file mode 100644
index 0000000000..b352d3225f
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/onnx/too_few_inputs/1/model.onnx
@@ -0,0 +1,33 @@
+TRTIS:�
+
+INPUT0_INPUT0"Identity
+
+INPUT1_INPUT1"Identity
+
+_INPUT0
+_INPUT1CAST0"Add
+
+_INPUT0
+_INPUT1CAST1"Sub
+!
+CAST0OUTPUT0"Cast*	
+to�
+!
+CAST1OUTPUT1"Cast*	
+to�onnx_int32_int8_int8Z
+INPUT0
+
+var_0
+Z
+INPUT1
+
+var_0
+b
+OUTPUT0
+
+var_1
+b
+OUTPUT1
+
+var_2
+B
diff --git a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/too_few_inputs/config.pbtxt b/qa/L0_model_config/autofill_noplatform/onnx/too_few_inputs/config.pbtxt
similarity index 100%
rename from src/servables/tensorflow/testdata/savedmodel_autofill_sanity/too_few_inputs/config.pbtxt
rename to qa/L0_model_config/autofill_noplatform/onnx/too_few_inputs/config.pbtxt
diff --git a/qa/L0_model_config/autofill_noplatform/onnx/too_few_inputs/expected b/qa/L0_model_config/autofill_noplatform/onnx/too_few_inputs/expected
new file mode 100644
index 0000000000..f6639e85ae
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/onnx/too_few_inputs/expected
@@ -0,0 +1 @@
+unable to load model 'too_few_inputs', configuration expects 1 inputs, model provides 2
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/onnx/too_many_inputs/1/model.onnx b/qa/L0_model_config/autofill_noplatform/onnx/too_many_inputs/1/model.onnx
new file mode 100644
index 0000000000..b352d3225f
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/onnx/too_many_inputs/1/model.onnx
@@ -0,0 +1,33 @@
+TRTIS:�
+
+INPUT0_INPUT0"Identity
+
+INPUT1_INPUT1"Identity
+
+_INPUT0
+_INPUT1CAST0"Add
+
+_INPUT0
+_INPUT1CAST1"Sub
+!
+CAST0OUTPUT0"Cast*	
+to�
+!
+CAST1OUTPUT1"Cast*	
+to�onnx_int32_int8_int8Z
+INPUT0
+
+var_0
+Z
+INPUT1
+
+var_0
+b
+OUTPUT0
+
+var_1
+b
+OUTPUT1
+
+var_2
+B
diff --git a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/too_many_inputs/config.pbtxt b/qa/L0_model_config/autofill_noplatform/onnx/too_many_inputs/config.pbtxt
similarity index 100%
rename from src/servables/tensorflow/testdata/savedmodel_autofill_sanity/too_many_inputs/config.pbtxt
rename to qa/L0_model_config/autofill_noplatform/onnx/too_many_inputs/config.pbtxt
diff --git a/qa/L0_model_config/autofill_noplatform/onnx/too_many_inputs/expected b/qa/L0_model_config/autofill_noplatform/onnx/too_many_inputs/expected
new file mode 100644
index 0000000000..e88e97dcfb
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/onnx/too_many_inputs/expected
@@ -0,0 +1 @@
+unable to load model 'too_many_inputs', configuration expects 3 inputs, model provides 2
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/onnx/unknown_input/1/model.onnx b/qa/L0_model_config/autofill_noplatform/onnx/unknown_input/1/model.onnx
new file mode 100644
index 0000000000..b352d3225f
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/onnx/unknown_input/1/model.onnx
@@ -0,0 +1,33 @@
+TRTIS:�
+
+INPUT0_INPUT0"Identity
+
+INPUT1_INPUT1"Identity
+
+_INPUT0
+_INPUT1CAST0"Add
+
+_INPUT0
+_INPUT1CAST1"Sub
+!
+CAST0OUTPUT0"Cast*	
+to�
+!
+CAST1OUTPUT1"Cast*	
+to�onnx_int32_int8_int8Z
+INPUT0
+
+var_0
+Z
+INPUT1
+
+var_0
+b
+OUTPUT0
+
+var_1
+b
+OUTPUT1
+
+var_2
+B
diff --git a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/unknown_input/config.pbtxt b/qa/L0_model_config/autofill_noplatform/onnx/unknown_input/config.pbtxt
similarity index 100%
rename from src/servables/tensorflow/testdata/savedmodel_autofill_sanity/unknown_input/config.pbtxt
rename to qa/L0_model_config/autofill_noplatform/onnx/unknown_input/config.pbtxt
diff --git a/qa/L0_model_config/autofill_noplatform/onnx/unknown_input/expected b/qa/L0_model_config/autofill_noplatform/onnx/unknown_input/expected
new file mode 100644
index 0000000000..e2a2abbf09
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/onnx/unknown_input/expected
@@ -0,0 +1 @@
+unexpected inference input 'INPUT_UNKNOWN', allowed inputs are: INPUT0, INPUT1
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/onnx/unknown_output/1/model.onnx b/qa/L0_model_config/autofill_noplatform/onnx/unknown_output/1/model.onnx
new file mode 100644
index 0000000000..b352d3225f
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/onnx/unknown_output/1/model.onnx
@@ -0,0 +1,33 @@
+TRTIS:�
+
+INPUT0_INPUT0"Identity
+
+INPUT1_INPUT1"Identity
+
+_INPUT0
+_INPUT1CAST0"Add
+
+_INPUT0
+_INPUT1CAST1"Sub
+!
+CAST0OUTPUT0"Cast*	
+to�
+!
+CAST1OUTPUT1"Cast*	
+to�onnx_int32_int8_int8Z
+INPUT0
+
+var_0
+Z
+INPUT1
+
+var_0
+b
+OUTPUT0
+
+var_1
+b
+OUTPUT1
+
+var_2
+B
diff --git a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/unknown_output/config.pbtxt b/qa/L0_model_config/autofill_noplatform/onnx/unknown_output/config.pbtxt
similarity index 100%
rename from src/servables/tensorflow/testdata/savedmodel_autofill_sanity/unknown_output/config.pbtxt
rename to qa/L0_model_config/autofill_noplatform/onnx/unknown_output/config.pbtxt
diff --git a/qa/L0_model_config/autofill_noplatform/onnx/unknown_output/expected b/qa/L0_model_config/autofill_noplatform/onnx/unknown_output/expected
new file mode 100644
index 0000000000..38fd5e2785
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/onnx/unknown_output/expected
@@ -0,0 +1 @@
+unexpected inference output 'OUTPUT_UNKNOWN', allowed outputs are: OUTPUT0, OUTPUT1
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/openvino/bad_input_dims/config.pbtxt b/qa/L0_model_config/autofill_noplatform/openvino/bad_input_dims/config.pbtxt
new file mode 100644
index 0000000000..87f49cf11a
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/openvino/bad_input_dims/config.pbtxt
@@ -0,0 +1,12 @@
+input {
+  name: "Func/PartitionedCall/input/_0:0"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+input {
+  name: "input1"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 256
+}
diff --git a/qa/L0_model_config/autofill_noplatform/openvino/bad_input_dims/expected b/qa/L0_model_config/autofill_noplatform/openvino/bad_input_dims/expected
new file mode 100644
index 0000000000..bd6051f9d5
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/openvino/bad_input_dims/expected
@@ -0,0 +1 @@
+model 'bad_input_dims', tensor 'input1': the model expects 2 dimensions (shape \[1,4\]) but the model configuration specifies 2 dimensions (shape \[1,256\])
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/openvino/bad_output_dims/config.pbtxt b/qa/L0_model_config/autofill_noplatform/openvino/bad_output_dims/config.pbtxt
new file mode 100644
index 0000000000..b177c07d18
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/openvino/bad_output_dims/config.pbtxt
@@ -0,0 +1,12 @@
+output {
+  name: "Func/PartitionedCall/output/_2:0"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 128
+}
+output {
+  name: "Func/PartitionedCall/output/_3:0"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
diff --git a/qa/L0_model_config/autofill_noplatform/openvino/bad_output_dims/expected b/qa/L0_model_config/autofill_noplatform/openvino/bad_output_dims/expected
new file mode 100644
index 0000000000..2f0e5be8e2
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/openvino/bad_output_dims/expected
@@ -0,0 +1 @@
+model 'bad_output_dims', tensor 'Func/PartitionedCall/output/_2:0': the model expects 2 dimensions (shape \[1,4\]) but the model configuration specifies 2 dimensions (shape \[1,128\])
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/openvino/too_few_inputs/config.pbtxt b/qa/L0_model_config/autofill_noplatform/openvino/too_few_inputs/config.pbtxt
new file mode 100644
index 0000000000..be95f0b18a
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/openvino/too_few_inputs/config.pbtxt
@@ -0,0 +1,6 @@
+input {
+  name: "input1"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
diff --git a/qa/L0_model_config/autofill_noplatform/openvino/too_few_inputs/expected b/qa/L0_model_config/autofill_noplatform/openvino/too_few_inputs/expected
new file mode 100644
index 0000000000..f6639e85ae
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/openvino/too_few_inputs/expected
@@ -0,0 +1 @@
+unable to load model 'too_few_inputs', configuration expects 1 inputs, model provides 2
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/openvino/too_many_inputs/config.pbtxt b/qa/L0_model_config/autofill_noplatform/openvino/too_many_inputs/config.pbtxt
new file mode 100644
index 0000000000..283f498b33
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/openvino/too_many_inputs/config.pbtxt
@@ -0,0 +1,18 @@
+input {
+  name: "Func/PartitionedCall/input/_0:0"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+input {
+  name: "input1"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+input {
+  name: "input_extra"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
diff --git a/qa/L0_model_config/autofill_noplatform/openvino/too_many_inputs/expected b/qa/L0_model_config/autofill_noplatform/openvino/too_many_inputs/expected
new file mode 100644
index 0000000000..e88e97dcfb
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/openvino/too_many_inputs/expected
@@ -0,0 +1 @@
+unable to load model 'too_many_inputs', configuration expects 3 inputs, model provides 2
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/openvino/unknown_input/config.pbtxt b/qa/L0_model_config/autofill_noplatform/openvino/unknown_input/config.pbtxt
new file mode 100644
index 0000000000..ed519869f3
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/openvino/unknown_input/config.pbtxt
@@ -0,0 +1,24 @@
+input {
+  name: "Func/PartitionedCall/input/_0:0"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+input {
+  name: "unknown_input"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+output {
+  name: "Func/PartitionedCall/output/_2:0"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+output {
+  name: "Func/PartitionedCall/output/_3:0"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
diff --git a/qa/L0_model_config/autofill_noplatform/openvino/unknown_input/expected b/qa/L0_model_config/autofill_noplatform/openvino/unknown_input/expected
new file mode 100644
index 0000000000..e540422197
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/openvino/unknown_input/expected
@@ -0,0 +1 @@
+unexpected inference input 'unknown_input', allowed inputs are: Func/PartitionedCall/input/_0:0, input1
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/openvino/unknown_output/config.pbtxt b/qa/L0_model_config/autofill_noplatform/openvino/unknown_output/config.pbtxt
new file mode 100644
index 0000000000..202ec57eca
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/openvino/unknown_output/config.pbtxt
@@ -0,0 +1,18 @@
+input {
+  name: "Func/PartitionedCall/input/_0:0"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+input {
+  name: "input1"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+output {
+  name: "unknown_output"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
diff --git a/qa/L0_model_config/autofill_noplatform/openvino/unknown_output/expected b/qa/L0_model_config/autofill_noplatform/openvino/unknown_output/expected
new file mode 100644
index 0000000000..b374338374
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/openvino/unknown_output/expected
@@ -0,0 +1 @@
+unexpected inference output 'unknown_output', allowed outputs are: Func/PartitionedCall/output/_2:0, Func/PartitionedCall/output/_3:0
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/python/conflicting_max_batch_size/config.pbtxt b/qa/L0_model_config/autofill_noplatform/python/conflicting_max_batch_size/config.pbtxt
new file mode 100644
index 0000000000..e4d60bbc87
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/conflicting_max_batch_size/config.pbtxt
@@ -0,0 +1,27 @@
+name: "conflicting_max_batch_size"
+max_batch_size: 6
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/python/conflicting_max_batch_size/expected b/qa/L0_model_config/autofill_noplatform/python/conflicting_max_batch_size/expected
new file mode 100644
index 0000000000..a83cc3cb6b
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/conflicting_max_batch_size/expected
@@ -0,0 +1 @@
+configuration specified max_batch_size 6, but in auto-complete-config function for model 'conflicting_max_batch_size' specified max_batch_size 4
diff --git a/qa/L0_model_config/autofill_noplatform/python/conflicting_max_batch_size/model.py b/qa/L0_model_config/autofill_noplatform/python/conflicting_max_batch_size/model.py
new file mode 100644
index 0000000000..17da02915b
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/conflicting_max_batch_size/model.py
@@ -0,0 +1,45 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+class TritonPythonModel:
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+
+        auto_complete_model_config.set_max_batch_size(4)
+        auto_complete_model_config.add_input(input0)
+        auto_complete_model_config.add_input(input1)
+        auto_complete_model_config.add_output(output0)
+        auto_complete_model_config.add_output(output1)
+
+        return auto_complete_model_config
+
+    def execute(self, requests):
+        pass
diff --git a/qa/L0_model_config/autofill_noplatform/python/conflicting_scheduler_sequence/config.pbtxt b/qa/L0_model_config/autofill_noplatform/python/conflicting_scheduler_sequence/config.pbtxt
new file mode 100644
index 0000000000..894908fa29
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/conflicting_scheduler_sequence/config.pbtxt
@@ -0,0 +1,28 @@
+name: "conflicting_scheduler_sequence"
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+sequence_batching: {
+}
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/python/conflicting_scheduler_sequence/expected b/qa/L0_model_config/autofill_noplatform/python/conflicting_scheduler_sequence/expected
new file mode 100644
index 0000000000..cd931ba1d6
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/conflicting_scheduler_sequence/expected
@@ -0,0 +1 @@
+Configuration specified scheduling_choice as 'sequence_batching', but auto-complete-config function for model 'conflicting_scheduler_sequence' tries to set scheduling_choice as 'dynamic_batching'
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/python/conflicting_scheduler_sequence/model.py b/qa/L0_model_config/autofill_noplatform/python/conflicting_scheduler_sequence/model.py
new file mode 100644
index 0000000000..b1399382c4
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/conflicting_scheduler_sequence/model.py
@@ -0,0 +1,46 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+class TritonPythonModel:
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+
+        auto_complete_model_config.set_max_batch_size(4)
+        auto_complete_model_config.set_dynamic_batching()
+        auto_complete_model_config.add_input(input0)
+        auto_complete_model_config.add_input(input1)
+        auto_complete_model_config.add_output(output0)
+        auto_complete_model_config.add_output(output1)
+
+        return auto_complete_model_config
+
+    def execute(self, requests):
+        pass
diff --git a/qa/L0_model_config/autofill_noplatform/python/input_mismatch_datatype/config.pbtxt b/qa/L0_model_config/autofill_noplatform/python/input_mismatch_datatype/config.pbtxt
new file mode 100644
index 0000000000..a375af8d3c
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/input_mismatch_datatype/config.pbtxt
@@ -0,0 +1,26 @@
+name: "input_mismatch_datatype"
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_INT32
+    dims: [ 4 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/python/input_mismatch_datatype/expected b/qa/L0_model_config/autofill_noplatform/python/input_mismatch_datatype/expected
new file mode 100644
index 0000000000..881f83ab7e
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/input_mismatch_datatype/expected
@@ -0,0 +1 @@
+unable to load model 'input_mismatch_datatype', configuration expects datatype TYPE_INT32 for input 'INPUT1', model provides TYPE_FP32
diff --git a/qa/L0_model_config/autofill_noplatform/python/input_mismatch_dims/config.pbtxt b/qa/L0_model_config/autofill_noplatform/python/input_mismatch_dims/config.pbtxt
new file mode 100644
index 0000000000..da27669c2a
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/input_mismatch_dims/config.pbtxt
@@ -0,0 +1,26 @@
+name: "input_mismatch_dims"
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/python/input_mismatch_dims/expected b/qa/L0_model_config/autofill_noplatform/python/input_mismatch_dims/expected
new file mode 100644
index 0000000000..f572ceb26d
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/input_mismatch_dims/expected
@@ -0,0 +1 @@
+model 'input_mismatch_dims', tensor 'INPUT1': the model expects dims \[4\] but the model configuration specifies dims \[16\]
diff --git a/qa/L0_model_config/autofill_noplatform/python/input_missing_datatype/config.pbtxt b/qa/L0_model_config/autofill_noplatform/python/input_missing_datatype/config.pbtxt
new file mode 100644
index 0000000000..220b9921cf
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/input_missing_datatype/config.pbtxt
@@ -0,0 +1,26 @@
+name: "input_missing_datatype"
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/python/input_missing_datatype/expected b/qa/L0_model_config/autofill_noplatform/python/input_missing_datatype/expected
new file mode 100644
index 0000000000..23a402bdc0
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/input_missing_datatype/expected
@@ -0,0 +1 @@
+input 'INPUT0' in auto-complete-config function for model 'input_missing_datatype' is missing 'data_type' property.
diff --git a/qa/L0_model_config/autofill_noplatform/python/input_missing_datatype/model.py b/qa/L0_model_config/autofill_noplatform/python/input_missing_datatype/model.py
new file mode 100644
index 0000000000..cfd6aab9d6
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/input_missing_datatype/model.py
@@ -0,0 +1,45 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+class TritonPythonModel:
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        input0 = {"name": "INPUT0", "dims": [4]}
+        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+
+        auto_complete_model_config.set_max_batch_size(0)
+        auto_complete_model_config.add_input(input0)
+        auto_complete_model_config.add_input(input1)
+        auto_complete_model_config.add_output(output0)
+        auto_complete_model_config.add_output(output1)
+
+        return auto_complete_model_config
+
+    def execute(self, requests):
+        pass
diff --git a/qa/L0_model_config/autofill_noplatform/python/input_missing_dims/config.pbtxt b/qa/L0_model_config/autofill_noplatform/python/input_missing_dims/config.pbtxt
new file mode 100644
index 0000000000..8f5e7bfd01
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/input_missing_dims/config.pbtxt
@@ -0,0 +1,26 @@
+name: "input_missing_dims"
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/python/input_missing_dims/expected b/qa/L0_model_config/autofill_noplatform/python/input_missing_dims/expected
new file mode 100644
index 0000000000..02a1d955db
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/input_missing_dims/expected
@@ -0,0 +1 @@
+input 'INPUT1' in auto-complete-config function for model 'input_missing_dims' is missing 'dims' property.
diff --git a/qa/L0_model_config/autofill_noplatform/python/input_missing_dims/model.py b/qa/L0_model_config/autofill_noplatform/python/input_missing_dims/model.py
new file mode 100644
index 0000000000..8c02b4ce40
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/input_missing_dims/model.py
@@ -0,0 +1,45 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+class TritonPythonModel:
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32"}
+        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+
+        auto_complete_model_config.set_max_batch_size(0)
+        auto_complete_model_config.add_input(input0)
+        auto_complete_model_config.add_input(input1)
+        auto_complete_model_config.add_output(output0)
+        auto_complete_model_config.add_output(output1)
+
+        return auto_complete_model_config
+
+    def execute(self, requests):
+        pass
diff --git a/qa/L0_model_config/autofill_noplatform/python/input_missing_name/config.pbtxt b/qa/L0_model_config/autofill_noplatform/python/input_missing_name/config.pbtxt
new file mode 100644
index 0000000000..66f105f834
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/input_missing_name/config.pbtxt
@@ -0,0 +1,26 @@
+name: "input_missing_name"
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/python/input_missing_name/expected b/qa/L0_model_config/autofill_noplatform/python/input_missing_name/expected
new file mode 100644
index 0000000000..c3b05a0fc3
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/input_missing_name/expected
@@ -0,0 +1 @@
+input in auto-complete-config function for model 'input_missing_name' is missing 'name' property.
diff --git a/qa/L0_model_config/autofill_noplatform/python/input_missing_name/model.py b/qa/L0_model_config/autofill_noplatform/python/input_missing_name/model.py
new file mode 100644
index 0000000000..33a76b6b30
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/input_missing_name/model.py
@@ -0,0 +1,45 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+class TritonPythonModel:
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        input1 = {"data_type": "TYPE_FP32", "dims": [4]}
+        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+
+        auto_complete_model_config.set_max_batch_size(0)
+        auto_complete_model_config.add_input(input0)
+        auto_complete_model_config.add_input(input1)
+        auto_complete_model_config.add_output(output0)
+        auto_complete_model_config.add_output(output1)
+
+        return auto_complete_model_config
+
+    def execute(self, requests):
+        pass
diff --git a/qa/L0_model_config/autofill_noplatform/python/input_wrong_property/config.pbtxt b/qa/L0_model_config/autofill_noplatform/python/input_wrong_property/config.pbtxt
new file mode 100644
index 0000000000..348928a31b
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/input_wrong_property/config.pbtxt
@@ -0,0 +1,26 @@
+name: "input_wrong_property"
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/python/input_wrong_property/expected b/qa/L0_model_config/autofill_noplatform/python/input_wrong_property/expected
new file mode 100644
index 0000000000..c91f4599ee
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/input_wrong_property/expected
@@ -0,0 +1 @@
+input 'INPUT1' in auto-complete-config function for model 'input_wrong_property' contains property other than 'name', 'data_type', 'dims' and 'optional'.
diff --git a/qa/L0_model_config/autofill_noplatform/python/input_wrong_property/model.py b/qa/L0_model_config/autofill_noplatform/python/input_wrong_property/model.py
new file mode 100644
index 0000000000..f3e883db06
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/input_wrong_property/model.py
@@ -0,0 +1,50 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+class TritonPythonModel:
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        input1 = {
+            "name": "INPUT1",
+            "data_type": "TYPE_FP32",
+            "dims": [4],
+            "is_shape_tensor:": True,
+        }
+        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+
+        auto_complete_model_config.set_max_batch_size(0)
+        auto_complete_model_config.add_input(input0)
+        auto_complete_model_config.add_input(input1)
+        auto_complete_model_config.add_output(output0)
+        auto_complete_model_config.add_output(output1)
+
+        return auto_complete_model_config
+
+    def execute(self, requests):
+        pass
diff --git a/qa/L0_model_config/autofill_noplatform/python/model_transaction_policy_invalid_args/config.pbtxt b/qa/L0_model_config/autofill_noplatform/python/model_transaction_policy_invalid_args/config.pbtxt
new file mode 100644
index 0000000000..3100235010
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/model_transaction_policy_invalid_args/config.pbtxt
@@ -0,0 +1,24 @@
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/python/model_transaction_policy_invalid_args/expected b/qa/L0_model_config/autofill_noplatform/python/model_transaction_policy_invalid_args/expected
new file mode 100644
index 0000000000..388c6a728d
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/model_transaction_policy_invalid_args/expected
@@ -0,0 +1 @@
+model transaction property in auto-complete-config function for model 'model_transaction_policy_invalid_args' contains property other than 'decoupled'
diff --git a/qa/L0_model_config/autofill_noplatform/python/model_transaction_policy_invalid_args/model.py b/qa/L0_model_config/autofill_noplatform/python/model_transaction_policy_invalid_args/model.py
new file mode 100644
index 0000000000..4de9d7c80a
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/model_transaction_policy_invalid_args/model.py
@@ -0,0 +1,47 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+class TritonPythonModel:
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+        transaction_policy = {"invalid": "argument"}
+
+        auto_complete_model_config.set_max_batch_size(4)
+        auto_complete_model_config.set_model_transaction_policy(transaction_policy)
+        auto_complete_model_config.add_input(input0)
+        auto_complete_model_config.add_input(input1)
+        auto_complete_model_config.add_output(output0)
+        auto_complete_model_config.add_output(output1)
+
+        return auto_complete_model_config
+
+    def execute(self, requests):
+        pass
diff --git a/qa/L0_model_config/autofill_noplatform/python/model_transaction_policy_mismatch/config.pbtxt b/qa/L0_model_config/autofill_noplatform/python/model_transaction_policy_mismatch/config.pbtxt
new file mode 100644
index 0000000000..f8113f307e
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/model_transaction_policy_mismatch/config.pbtxt
@@ -0,0 +1,28 @@
+model_transaction_policy {
+  decoupled: false
+}
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/python/model_transaction_policy_mismatch/expected b/qa/L0_model_config/autofill_noplatform/python/model_transaction_policy_mismatch/expected
new file mode 100644
index 0000000000..bbdc5d2165
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/model_transaction_policy_mismatch/expected
@@ -0,0 +1 @@
+trying to change decoupled property in auto-complete-config for model 'model_transaction_policy_mismatch', which is already set to 'False'
diff --git a/qa/L0_model_config/autofill_noplatform/python/model_transaction_policy_mismatch/model.py b/qa/L0_model_config/autofill_noplatform/python/model_transaction_policy_mismatch/model.py
new file mode 100644
index 0000000000..424eca60ce
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/model_transaction_policy_mismatch/model.py
@@ -0,0 +1,46 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+class TritonPythonModel:
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+
+        auto_complete_model_config.set_max_batch_size(4)
+        auto_complete_model_config.set_model_transaction_policy(dict(decoupled=True))
+        auto_complete_model_config.add_input(input0)
+        auto_complete_model_config.add_input(input1)
+        auto_complete_model_config.add_output(output0)
+        auto_complete_model_config.add_output(output1)
+
+        return auto_complete_model_config
+
+    def execute(self, requests):
+        pass
diff --git a/qa/L0_model_config/autofill_noplatform/python/no_return/config.pbtxt b/qa/L0_model_config/autofill_noplatform/python/no_return/config.pbtxt
new file mode 100644
index 0000000000..3f8526855f
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/no_return/config.pbtxt
@@ -0,0 +1,26 @@
+name: "no_return"
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/python/no_return/expected b/qa/L0_model_config/autofill_noplatform/python/no_return/expected
new file mode 100644
index 0000000000..17fcb4737c
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/no_return/expected
@@ -0,0 +1 @@
+auto_complete_config function in model 'no_return' must return a valid pb.ModelConfig object.
diff --git a/qa/L0_model_config/autofill_noplatform/python/no_return/model.py b/qa/L0_model_config/autofill_noplatform/python/no_return/model.py
new file mode 100644
index 0000000000..65fae1dcc2
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/no_return/model.py
@@ -0,0 +1,43 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+class TritonPythonModel:
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+
+        auto_complete_model_config.set_max_batch_size(0)
+        auto_complete_model_config.add_input(input0)
+        auto_complete_model_config.add_input(input1)
+        auto_complete_model_config.add_output(output0)
+        auto_complete_model_config.add_output(output1)
+
+    def execute(self, requests):
+        pass
diff --git a/qa/L0_model_config/autofill_noplatform/python/output_mismatch_datatype/config.pbtxt b/qa/L0_model_config/autofill_noplatform/python/output_mismatch_datatype/config.pbtxt
new file mode 100644
index 0000000000..5b3f5f24e3
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/output_mismatch_datatype/config.pbtxt
@@ -0,0 +1,26 @@
+name: "output_mismatch_datatype"
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ 4 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/python/output_mismatch_datatype/expected b/qa/L0_model_config/autofill_noplatform/python/output_mismatch_datatype/expected
new file mode 100644
index 0000000000..ad150a320d
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/output_mismatch_datatype/expected
@@ -0,0 +1 @@
+unable to load model 'output_mismatch_datatype', configuration expects datatype TYPE_INT32 for output 'OUTPUT0', model provides TYPE_FP32
diff --git a/qa/L0_model_config/autofill_noplatform/python/output_mismatch_dims/config.pbtxt b/qa/L0_model_config/autofill_noplatform/python/output_mismatch_dims/config.pbtxt
new file mode 100644
index 0000000000..ec617ed76a
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/output_mismatch_dims/config.pbtxt
@@ -0,0 +1,26 @@
+name: "output_mismatch_dims"
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/python/output_mismatch_dims/expected b/qa/L0_model_config/autofill_noplatform/python/output_mismatch_dims/expected
new file mode 100644
index 0000000000..173ee0582a
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/output_mismatch_dims/expected
@@ -0,0 +1 @@
+model 'output_mismatch_dims', tensor 'OUTPUT1': the model expects dims \[4\] but the model configuration specifies dims \[16\]
diff --git a/qa/L0_model_config/autofill_noplatform/python/output_missing_datatype/config.pbtxt b/qa/L0_model_config/autofill_noplatform/python/output_missing_datatype/config.pbtxt
new file mode 100644
index 0000000000..dff1f7f754
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/output_missing_datatype/config.pbtxt
@@ -0,0 +1,26 @@
+name: "output_missing_datatype"
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/python/output_missing_datatype/expected b/qa/L0_model_config/autofill_noplatform/python/output_missing_datatype/expected
new file mode 100644
index 0000000000..31e748a285
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/output_missing_datatype/expected
@@ -0,0 +1 @@
+output 'OUTPUT0' in auto-complete-config function for model 'output_missing_datatype' is missing 'data_type' property.
diff --git a/qa/L0_model_config/autofill_noplatform/python/output_missing_datatype/model.py b/qa/L0_model_config/autofill_noplatform/python/output_missing_datatype/model.py
new file mode 100644
index 0000000000..26ef3e5c7e
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/output_missing_datatype/model.py
@@ -0,0 +1,45 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+class TritonPythonModel:
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+        output0 = {"name": "OUTPUT0", "dims": [4]}
+        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+
+        auto_complete_model_config.set_max_batch_size(0)
+        auto_complete_model_config.add_input(input0)
+        auto_complete_model_config.add_input(input1)
+        auto_complete_model_config.add_output(output0)
+        auto_complete_model_config.add_output(output1)
+
+        return auto_complete_model_config
+
+    def execute(self, requests):
+        pass
diff --git a/qa/L0_model_config/autofill_noplatform/python/output_missing_dims/config.pbtxt b/qa/L0_model_config/autofill_noplatform/python/output_missing_dims/config.pbtxt
new file mode 100644
index 0000000000..47c652164b
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/output_missing_dims/config.pbtxt
@@ -0,0 +1,26 @@
+name: "output_missing_dims"
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/python/output_missing_dims/expected b/qa/L0_model_config/autofill_noplatform/python/output_missing_dims/expected
new file mode 100644
index 0000000000..3c24750edb
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/output_missing_dims/expected
@@ -0,0 +1 @@
+output 'OUTPUT1' in auto-complete-config function for model 'output_missing_dims' is missing 'dims' property.
diff --git a/qa/L0_model_config/autofill_noplatform/python/output_missing_dims/model.py b/qa/L0_model_config/autofill_noplatform/python/output_missing_dims/model.py
new file mode 100644
index 0000000000..6e43928239
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/output_missing_dims/model.py
@@ -0,0 +1,45 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+class TritonPythonModel:
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32"}
+
+        auto_complete_model_config.set_max_batch_size(0)
+        auto_complete_model_config.add_input(input0)
+        auto_complete_model_config.add_input(input1)
+        auto_complete_model_config.add_output(output0)
+        auto_complete_model_config.add_output(output1)
+
+        return auto_complete_model_config
+
+    def execute(self, requests):
+        pass
diff --git a/qa/L0_model_config/autofill_noplatform/python/output_missing_name/config.pbtxt b/qa/L0_model_config/autofill_noplatform/python/output_missing_name/config.pbtxt
new file mode 100644
index 0000000000..17bed797d9
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/output_missing_name/config.pbtxt
@@ -0,0 +1,26 @@
+name: "output_missing_name"
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/python/output_missing_name/expected b/qa/L0_model_config/autofill_noplatform/python/output_missing_name/expected
new file mode 100644
index 0000000000..7ad0a3a1d2
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/output_missing_name/expected
@@ -0,0 +1 @@
+output in auto-complete-config function for model 'output_missing_name' is missing 'name' property.
diff --git a/qa/L0_model_config/autofill_noplatform/python/output_missing_name/model.py b/qa/L0_model_config/autofill_noplatform/python/output_missing_name/model.py
new file mode 100644
index 0000000000..cde57b7827
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/output_missing_name/model.py
@@ -0,0 +1,45 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+class TritonPythonModel:
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+        output0 = {"data_type": "TYPE_FP32", "dims": [4]}
+        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+
+        auto_complete_model_config.set_max_batch_size(0)
+        auto_complete_model_config.add_input(input0)
+        auto_complete_model_config.add_input(input1)
+        auto_complete_model_config.add_output(output0)
+        auto_complete_model_config.add_output(output1)
+
+        return auto_complete_model_config
+
+    def execute(self, requests):
+        pass
diff --git a/qa/L0_model_config/autofill_noplatform/python/output_wrong_property/config.pbtxt b/qa/L0_model_config/autofill_noplatform/python/output_wrong_property/config.pbtxt
new file mode 100644
index 0000000000..21d4d12c32
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/output_wrong_property/config.pbtxt
@@ -0,0 +1,26 @@
+name: "output_wrong_property"
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/python/output_wrong_property/expected b/qa/L0_model_config/autofill_noplatform/python/output_wrong_property/expected
new file mode 100644
index 0000000000..34a6a728e8
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/output_wrong_property/expected
@@ -0,0 +1 @@
+output 'OUTPUT1' in auto-complete-config function for model 'output_wrong_property' contains property other than 'name', 'data_type' and 'dims'.
diff --git a/qa/L0_model_config/autofill_noplatform/python/output_wrong_property/model.py b/qa/L0_model_config/autofill_noplatform/python/output_wrong_property/model.py
new file mode 100644
index 0000000000..4dd17ea4e3
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/python/output_wrong_property/model.py
@@ -0,0 +1,50 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+class TritonPythonModel:
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        output1 = {
+            "name": "OUTPUT1",
+            "data_type": "TYPE_FP32",
+            "dims": [4],
+            "is_shape_tensor:": True,
+        }
+
+        auto_complete_model_config.set_max_batch_size(0)
+        auto_complete_model_config.add_input(input0)
+        auto_complete_model_config.add_input(input1)
+        auto_complete_model_config.add_output(output0)
+        auto_complete_model_config.add_output(output1)
+
+        return auto_complete_model_config
+
+    def execute(self, requests):
+        pass
diff --git a/qa/L0_model_config/autofill_noplatform/pytorch/too_few_inputs/config.pbtxt b/qa/L0_model_config/autofill_noplatform/pytorch/too_few_inputs/config.pbtxt
new file mode 100644
index 0000000000..df6ea8b3de
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/pytorch/too_few_inputs/config.pbtxt
@@ -0,0 +1,14 @@
+max_batch_size: 1
+output [
+  {
+    name: "OUTPUT__0"
+    data_type: TYPE_INT8
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT__1"
+    data_type: TYPE_INT8
+    dims: [ 16 ]
+  }
+]
+backend: "pytorch"
diff --git a/qa/L0_model_config/autofill_noplatform/pytorch/too_few_inputs/expected b/qa/L0_model_config/autofill_noplatform/pytorch/too_few_inputs/expected
new file mode 100644
index 0000000000..7a89bd65b4
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/pytorch/too_few_inputs/expected
@@ -0,0 +1 @@
+unable to load model 'too_few_inputs', configuration expects 0 inputs, model provides 2
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/pytorch/too_few_outputs/config.pbtxt b/qa/L0_model_config/autofill_noplatform/pytorch/too_few_outputs/config.pbtxt
new file mode 100644
index 0000000000..8b93e58410
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/pytorch/too_few_outputs/config.pbtxt
@@ -0,0 +1,14 @@
+max_batch_size: 1
+input [
+  {
+    name: "INPUT__0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT__1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
+backend: "pytorch"
diff --git a/qa/L0_model_config/autofill_noplatform/pytorch/too_few_outputs/expected b/qa/L0_model_config/autofill_noplatform/pytorch/too_few_outputs/expected
new file mode 100644
index 0000000000..872e19b2d1
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/pytorch/too_few_outputs/expected
@@ -0,0 +1 @@
+model configuration must contain at least one output, none were specified
\ No newline at end of file
diff --git a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/bad_input_dims/1/model.savedmodel/saved_model.pb b/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/bad_input_dims/1/model.savedmodel/saved_model.pb
similarity index 100%
rename from src/servables/tensorflow/testdata/savedmodel_autofill_sanity/bad_input_dims/1/model.savedmodel/saved_model.pb
rename to qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/bad_input_dims/1/model.savedmodel/saved_model.pb
diff --git a/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/bad_input_dims/config.pbtxt b/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/bad_input_dims/config.pbtxt
new file mode 100644
index 0000000000..b393fb4e00
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/bad_input_dims/config.pbtxt
@@ -0,0 +1,25 @@
+max_batch_size: 1
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16, 1 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT8
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_INT8
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/bad_input_dims/expected b/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/bad_input_dims/expected
new file mode 100644
index 0000000000..9db37f7864
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/bad_input_dims/expected
@@ -0,0 +1 @@
+Internal: unable to autofill for 'bad_input_dims', model tensor configurations are contradicting each other in terms of whether batching is supported
\ No newline at end of file
diff --git a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/bad_input_type/1/model.savedmodel/saved_model.pb b/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/bad_input_type/1/model.savedmodel/saved_model.pb
similarity index 100%
rename from src/servables/tensorflow/testdata/savedmodel_autofill_sanity/bad_input_type/1/model.savedmodel/saved_model.pb
rename to qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/bad_input_type/1/model.savedmodel/saved_model.pb
diff --git a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/bad_input_type/config.pbtxt b/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/bad_input_type/config.pbtxt
similarity index 100%
rename from src/servables/tensorflow/testdata/savedmodel_autofill_sanity/bad_input_type/config.pbtxt
rename to qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/bad_input_type/config.pbtxt
diff --git a/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/bad_input_type/expected b/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/bad_input_type/expected
new file mode 100644
index 0000000000..584634b2eb
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/bad_input_type/expected
@@ -0,0 +1 @@
+Invalid argument: unable to load model 'bad_input_type', configuration expects datatype TYPE_FP32 for input 'INPUT1', model provides TYPE_INT32
\ No newline at end of file
diff --git a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/bad_output_dims/1/model.savedmodel/saved_model.pb b/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/bad_output_dims/1/model.savedmodel/saved_model.pb
similarity index 100%
rename from src/servables/tensorflow/testdata/savedmodel_autofill_sanity/bad_output_dims/1/model.savedmodel/saved_model.pb
rename to qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/bad_output_dims/1/model.savedmodel/saved_model.pb
diff --git a/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/bad_output_dims/config.pbtxt b/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/bad_output_dims/config.pbtxt
new file mode 100644
index 0000000000..004ed9a54f
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/bad_output_dims/config.pbtxt
@@ -0,0 +1,25 @@
+max_batch_size: 1
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT8
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_INT8
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/bad_output_dims/expected b/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/bad_output_dims/expected
new file mode 100644
index 0000000000..70a0138e77
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/bad_output_dims/expected
@@ -0,0 +1 @@
+Invalid argument: model 'bad_output_dims', tensor 'OUTPUT1': the model expects 2 dimensions (shape \[-1,16\]) but the model configuration specifies 2 dimensions (an initial batch dimension because max_batch_size > 0 followed by the explicit tensor shape, making complete shape \[-1,1\])
\ No newline at end of file
diff --git a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/bad_output_type/1/model.savedmodel/saved_model.pb b/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/bad_output_type/1/model.savedmodel/saved_model.pb
similarity index 100%
rename from src/servables/tensorflow/testdata/savedmodel_autofill_sanity/bad_output_type/1/model.savedmodel/saved_model.pb
rename to qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/bad_output_type/1/model.savedmodel/saved_model.pb
diff --git a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/bad_output_type/config.pbtxt b/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/bad_output_type/config.pbtxt
similarity index 100%
rename from src/servables/tensorflow/testdata/savedmodel_autofill_sanity/bad_output_type/config.pbtxt
rename to qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/bad_output_type/config.pbtxt
diff --git a/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/bad_output_type/expected b/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/bad_output_type/expected
new file mode 100644
index 0000000000..bbbe1846d1
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/bad_output_type/expected
@@ -0,0 +1 @@
+Invalid argument: unable to load model 'bad_output_type', configuration expects datatype TYPE_INT16 for output 'OUTPUT0', model provides TYPE_INT8
\ No newline at end of file
diff --git a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/empty_config/1/model.savedmodel/saved_model.pb b/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/too_many_inputs/1/model.savedmodel/saved_model.pb
similarity index 100%
rename from src/servables/tensorflow/testdata/savedmodel_autofill_sanity/empty_config/1/model.savedmodel/saved_model.pb
rename to qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/too_many_inputs/1/model.savedmodel/saved_model.pb
diff --git a/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/too_many_inputs/config.pbtxt b/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/too_many_inputs/config.pbtxt
new file mode 100644
index 0000000000..cee3e28b89
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/too_many_inputs/config.pbtxt
@@ -0,0 +1,30 @@
+max_batch_size: 1
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT8
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_INT8
+    dims: [ 16 ]
+  }
+]
diff --git a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/too_many_inputs/expected b/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/too_many_inputs/expected
similarity index 100%
rename from src/servables/tensorflow/testdata/savedmodel_autofill_sanity/too_many_inputs/expected
rename to qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/too_many_inputs/expected
diff --git a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/no_config/1/model.savedmodel/saved_model.pb b/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/unknown_input/1/model.savedmodel/saved_model.pb
similarity index 100%
rename from src/servables/tensorflow/testdata/savedmodel_autofill_sanity/no_config/1/model.savedmodel/saved_model.pb
rename to qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/unknown_input/1/model.savedmodel/saved_model.pb
diff --git a/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/unknown_input/config.pbtxt b/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/unknown_input/config.pbtxt
new file mode 100644
index 0000000000..0df318caa8
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/unknown_input/config.pbtxt
@@ -0,0 +1,25 @@
+max_batch_size: 1
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT_UNKNOWN"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT8
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_INT8
+    dims: [ 16 ]
+  }
+]
diff --git a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/unknown_input/expected b/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/unknown_input/expected
similarity index 100%
rename from src/servables/tensorflow/testdata/savedmodel_autofill_sanity/unknown_input/expected
rename to qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/unknown_input/expected
diff --git a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/no_name_platform/1/model.savedmodel/saved_model.pb b/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/unknown_output/1/model.savedmodel/saved_model.pb
similarity index 100%
rename from src/servables/tensorflow/testdata/savedmodel_autofill_sanity/no_name_platform/1/model.savedmodel/saved_model.pb
rename to qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/unknown_output/1/model.savedmodel/saved_model.pb
diff --git a/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/unknown_output/config.pbtxt b/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/unknown_output/config.pbtxt
new file mode 100644
index 0000000000..979b05c4ee
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/unknown_output/config.pbtxt
@@ -0,0 +1,20 @@
+max_batch_size: 1
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT_UNKNOWN"
+    data_type: TYPE_INT8
+    dims: [ 16 ]
+  }
+]
diff --git a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/unknown_output/expected b/qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/unknown_output/expected
similarity index 100%
rename from src/servables/tensorflow/testdata/savedmodel_autofill_sanity/unknown_output/expected
rename to qa/L0_model_config/autofill_noplatform/tensorflow_savedmodel/unknown_output/expected
diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/bad_dynamic_shapes_max/config.pbtxt b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_dynamic_shapes_max/config.pbtxt
new file mode 100644
index 0000000000..939680951a
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_dynamic_shapes_max/config.pbtxt
@@ -0,0 +1,24 @@
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 33 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 33 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 33 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 33 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/bad_dynamic_shapes_max/expected b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_dynamic_shapes_max/expected
new file mode 100644
index 0000000000..33630c195b
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_dynamic_shapes_max/expected
@@ -0,0 +1 @@
+model configuration specified invalid shape for input 'INPUT0' for model bad_dynamic_shapes_max. Error details: model expected the shape of dimension 1 to be between 4 and 32 but received 33
diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/bad_dynamic_shapes_min/config.pbtxt b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_dynamic_shapes_min/config.pbtxt
new file mode 100644
index 0000000000..06b86f1053
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_dynamic_shapes_min/config.pbtxt
@@ -0,0 +1,24 @@
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 3 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 3 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 3 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 3 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/bad_dynamic_shapes_min/expected b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_dynamic_shapes_min/expected
new file mode 100644
index 0000000000..288d129df0
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_dynamic_shapes_min/expected
@@ -0,0 +1 @@
+model configuration specified invalid shape for input 'INPUT0' for model bad_dynamic_shapes_min. Error details: model expected the shape of dimension 1 to be between 4 and 32 but received 3
diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_dims/config.pbtxt b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_dims/config.pbtxt
new file mode 100644
index 0000000000..b2fa506170
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_dims/config.pbtxt
@@ -0,0 +1,25 @@
+max_batch_size: 8
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16, 1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_dims/expected b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_dims/expected
new file mode 100644
index 0000000000..5a5f596c4e
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_dims/expected
@@ -0,0 +1 @@
+model 'bad_input_dims', tensor 'INPUT1': the model expects 1 dimensions (shape \[16\]) but the model configuration specifies 2 dimensions (shape \[16,1\])
diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_shape_tensor/config.pbtxt b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_shape_tensor/config.pbtxt
new file mode 100644
index 0000000000..f72f941ae5
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_shape_tensor/config.pbtxt
@@ -0,0 +1,26 @@
+max_batch_size: 8
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+    is_shape_tensor: true
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_shape_tensor/expected b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_shape_tensor/expected
new file mode 100644
index 0000000000..a31b0168b9
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_shape_tensor/expected
@@ -0,0 +1 @@
+'INPUT0' is incorrectly specified as a shape tensor.
diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_type/config.pbtxt b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_type/config.pbtxt
new file mode 100644
index 0000000000..9a9337e334
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_type/config.pbtxt
@@ -0,0 +1,25 @@
+max_batch_size: 8
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP16
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_type/expected b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_type/expected
new file mode 100644
index 0000000000..cd93c19cae
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_input_type/expected
@@ -0,0 +1 @@
+unexpected datatype TYPE_FP32 for inference input 'INPUT0', expecting TYPE_FP16 for bad_input_type
diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/bad_output_dims/config.pbtxt b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_output_dims/config.pbtxt
new file mode 100644
index 0000000000..8ef432f5d6
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_output_dims/config.pbtxt
@@ -0,0 +1,25 @@
+max_batch_size: 8
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 7 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/bad_output_dims/expected b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_output_dims/expected
new file mode 100644
index 0000000000..6acfd848d6
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_output_dims/expected
@@ -0,0 +1 @@
+model 'bad_output_dims', tensor 'OUTPUT1': the model expects 1 dimensions (shape \[16\]) but the model configuration specifies 1 dimensions (shape \[7\])
diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/bad_output_shape_tensor/config.pbtxt b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_output_shape_tensor/config.pbtxt
new file mode 100644
index 0000000000..46628b8592
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_output_shape_tensor/config.pbtxt
@@ -0,0 +1,26 @@
+max_batch_size: 8
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+    is_shape_tensor: true
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/bad_output_shape_tensor/expected b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_output_shape_tensor/expected
new file mode 100644
index 0000000000..ec7ade3c8a
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_output_shape_tensor/expected
@@ -0,0 +1 @@
+'OUTPUT1' is incorrectly specified as a shape tensor.
diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/bad_output_type/config.pbtxt b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_output_type/config.pbtxt
new file mode 100644
index 0000000000..d442656cd9
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_output_type/config.pbtxt
@@ -0,0 +1,25 @@
+max_batch_size: 8
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_INT8
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/bad_output_type/expected b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_output_type/expected
new file mode 100644
index 0000000000..cf9262af4a
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/tensorrt/bad_output_type/expected
@@ -0,0 +1 @@
+unexpected datatype TYPE_FP32 for inference output 'OUTPUT1', expecting TYPE_INT8 for bad_output_type
diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/mixed_batch_hint_dims/config.pbtxt b/qa/L0_model_config/autofill_noplatform/tensorrt/mixed_batch_hint_dims/config.pbtxt
new file mode 100644
index 0000000000..a33c5e383a
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/tensorrt/mixed_batch_hint_dims/config.pbtxt
@@ -0,0 +1,24 @@
+input [
+  {
+    name: "DUMMY_INPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1,-1 ]
+  },
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 2 ]
+  }
+]
+output [
+  {
+    name: "DUMMY_OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1,-1,-1 ]
+  },
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ 2 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/mixed_batch_hint_dims/expected b/qa/L0_model_config/autofill_noplatform/tensorrt/mixed_batch_hint_dims/expected
new file mode 100644
index 0000000000..e36bcf627a
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/tensorrt/mixed_batch_hint_dims/expected
@@ -0,0 +1 @@
+model tensor configurations are contradicting each other in terms of whether batching is supported
diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/mixed_batch_hint_shape_values/config.pbtxt b/qa/L0_model_config/autofill_noplatform/tensorrt/mixed_batch_hint_shape_values/config.pbtxt
new file mode 100644
index 0000000000..d664a4af15
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/tensorrt/mixed_batch_hint_shape_values/config.pbtxt
@@ -0,0 +1,24 @@
+input [
+  {
+    name: "DUMMY_INPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1,-1 ]
+  },
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 3 ]
+  }
+]
+output [
+  {
+    name: "DUMMY_OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1,-1 ]
+  },
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ 2 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/mixed_batch_hint_shape_values/expected b/qa/L0_model_config/autofill_noplatform/tensorrt/mixed_batch_hint_shape_values/expected
new file mode 100644
index 0000000000..e36bcf627a
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/tensorrt/mixed_batch_hint_shape_values/expected
@@ -0,0 +1 @@
+model tensor configurations are contradicting each other in terms of whether batching is supported
diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/too_few_inputs/config.pbtxt b/qa/L0_model_config/autofill_noplatform/tensorrt/too_few_inputs/config.pbtxt
new file mode 100644
index 0000000000..eb9b9c17c2
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/tensorrt/too_few_inputs/config.pbtxt
@@ -0,0 +1,20 @@
+max_batch_size: 8
+input [
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/too_few_inputs/expected b/qa/L0_model_config/autofill_noplatform/tensorrt/too_few_inputs/expected
new file mode 100644
index 0000000000..f2e4b67237
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/tensorrt/too_few_inputs/expected
@@ -0,0 +1 @@
+expected configuration for input 'INPUT0' for too_few_inputs
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/too_many_inputs/config.pbtxt b/qa/L0_model_config/autofill_noplatform/tensorrt/too_many_inputs/config.pbtxt
new file mode 100644
index 0000000000..85d53707b1
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/tensorrt/too_many_inputs/config.pbtxt
@@ -0,0 +1,30 @@
+max_batch_size: 8
+input [
+  {
+    name: "INPUT_EXTRA"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/too_many_inputs/expected b/qa/L0_model_config/autofill_noplatform/tensorrt/too_many_inputs/expected
new file mode 100644
index 0000000000..9a92ec8cbf
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/tensorrt/too_many_inputs/expected
@@ -0,0 +1 @@
+unexpected inference input 'INPUT_EXTRA', allowed inputs are: INPUT0, INPUT1
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/unknown_input/config.pbtxt b/qa/L0_model_config/autofill_noplatform/tensorrt/unknown_input/config.pbtxt
new file mode 100644
index 0000000000..7b1195a1b1
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/tensorrt/unknown_input/config.pbtxt
@@ -0,0 +1,25 @@
+max_batch_size: 8
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT_UNKNOWN"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/unknown_input/expected b/qa/L0_model_config/autofill_noplatform/tensorrt/unknown_input/expected
new file mode 100644
index 0000000000..e2a2abbf09
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/tensorrt/unknown_input/expected
@@ -0,0 +1 @@
+unexpected inference input 'INPUT_UNKNOWN', allowed inputs are: INPUT0, INPUT1
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/unknown_output/config.pbtxt b/qa/L0_model_config/autofill_noplatform/tensorrt/unknown_output/config.pbtxt
new file mode 100644
index 0000000000..09d60567ad
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/tensorrt/unknown_output/config.pbtxt
@@ -0,0 +1,25 @@
+max_batch_size: 8
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT_UNKNOWN"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform/tensorrt/unknown_output/expected b/qa/L0_model_config/autofill_noplatform/tensorrt/unknown_output/expected
new file mode 100644
index 0000000000..38fd5e2785
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform/tensorrt/unknown_output/expected
@@ -0,0 +1 @@
+unexpected inference output 'OUTPUT_UNKNOWN', allowed outputs are: OUTPUT0, OUTPUT1
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/custom/empty_config.identity/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/custom/empty_config.identity/config.pbtxt
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/qa/L0_model_config/autofill_noplatform_success/custom/empty_config.identity/expected b/qa/L0_model_config/autofill_noplatform_success/custom/empty_config.identity/expected
new file mode 100644
index 0000000000..014c6dd3ad
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/custom/empty_config.identity/expected
@@ -0,0 +1,23 @@
+name: "empty_config.identity"
+version_policy {
+latest {
+    num_versions: 1
+}
+}
+instance_group {
+name: "empty_config.identity"
+count: 1
+gpus: 0
+kind: KIND_GPU
+}
+default_model_filename: "model.identity"
+optimization {
+input_pinned_memory {
+    enable: true
+}
+output_pinned_memory {
+    enable: true
+}
+}
+backend: "identity"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/custom/no_backend.identity/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/custom/no_backend.identity/config.pbtxt
new file mode 100644
index 0000000000..575da253a5
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/custom/no_backend.identity/config.pbtxt
@@ -0,0 +1,15 @@
+max_batch_size: 64
+input [
+ {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 1000 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ 1000 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/custom/no_backend.identity/expected b/qa/L0_model_config/autofill_noplatform_success/custom/no_backend.identity/expected
new file mode 100644
index 0000000000..c6c3ddcd8c
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/custom/no_backend.identity/expected
@@ -0,0 +1,34 @@
+name: "no_backend.identity"
+version_policy {
+latest {
+    num_versions: 1
+}
+}
+max_batch_size: 64
+input {
+name: "INPUT0"
+data_type: TYPE_INT32
+dims: 1000
+}
+output {
+name: "OUTPUT0"
+data_type: TYPE_INT32
+dims: 1000
+}
+instance_group {
+name: "no_backend.identity"
+count: 1
+gpus: 0
+kind: KIND_GPU
+}
+default_model_filename: "model.identity"
+optimization {
+input_pinned_memory {
+    enable: true
+}
+output_pinned_memory {
+    enable: true
+}
+}
+backend: "identity"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/ensemble/embedded_ensemble/embedded_ensemble/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/ensemble/embedded_ensemble/embedded_ensemble/config.pbtxt
new file mode 100644
index 0000000000..87ffa565f7
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/ensemble/embedded_ensemble/embedded_ensemble/config.pbtxt
@@ -0,0 +1,33 @@
+name: "embedded_ensemble"
+max_batch_size: 2
+platform: "ensemble"
+ensemble_scheduling {
+  step [
+    {
+      model_name: "inner_ensemble"
+      model_version: -1
+      input_map {
+        key: "data"
+        value: "data"
+      }
+      output_map {
+        key: "prob"
+        value: "prob"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/ensemble/embedded_ensemble/embedded_ensemble/expected b/qa/L0_model_config/autofill_noplatform_success/ensemble/embedded_ensemble/embedded_ensemble/expected
new file mode 100644
index 0000000000..db3c2334b2
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/ensemble/embedded_ensemble/embedded_ensemble/expected
@@ -0,0 +1,40 @@
+name: "embedded_ensemble"
+max_batch_size: 2
+platform: "ensemble"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+ensemble_scheduling {
+  step [
+    {
+      model_name: "inner_ensemble"
+      model_version: -1
+      input_map {
+        key: "data"
+        value: "data"
+      }
+      output_map {
+        key: "prob"
+        value: "prob"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+model_transaction_policy {
+}
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/ensemble/embedded_ensemble/fp32_dim1_batch4/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/ensemble/embedded_ensemble/fp32_dim1_batch4/config.pbtxt
new file mode 100644
index 0000000000..f5a689d604
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/ensemble/embedded_ensemble/fp32_dim1_batch4/config.pbtxt
@@ -0,0 +1,22 @@
+name: "fp32_dim1_batch4"
+max_batch_size: 4
+backend: "identity"
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+instance_group [
+  {
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/ensemble/embedded_ensemble/inner_ensemble/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/ensemble/embedded_ensemble/inner_ensemble/config.pbtxt
new file mode 100644
index 0000000000..115412c088
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/ensemble/embedded_ensemble/inner_ensemble/config.pbtxt
@@ -0,0 +1,33 @@
+name: "inner_ensemble"
+max_batch_size: 2
+platform: "ensemble"
+ensemble_scheduling {
+  step [
+    {
+      model_name: "fp32_dim1_batch4"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "data"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "prob"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/ensemble/inconsistent_shape/fp32_dim1_batch4/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/ensemble/inconsistent_shape/fp32_dim1_batch4/config.pbtxt
new file mode 100644
index 0000000000..f5a689d604
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/ensemble/inconsistent_shape/fp32_dim1_batch4/config.pbtxt
@@ -0,0 +1,22 @@
+name: "fp32_dim1_batch4"
+max_batch_size: 4
+backend: "identity"
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+instance_group [
+  {
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/ensemble/inconsistent_shape/fp32_dim2_nobatch/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/ensemble/inconsistent_shape/fp32_dim2_nobatch/config.pbtxt
new file mode 100644
index 0000000000..54db15ca0c
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/ensemble/inconsistent_shape/fp32_dim2_nobatch/config.pbtxt
@@ -0,0 +1,22 @@
+name: "fp32_dim2_nobatch"
+max_batch_size: 0
+backend: "identity"
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1, 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1, 16 ]
+  }
+]
+instance_group [
+  {
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/ensemble/inconsistent_shape/inconsistent_shape/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/ensemble/inconsistent_shape/inconsistent_shape/config.pbtxt
new file mode 100644
index 0000000000..26e919bcdb
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/ensemble/inconsistent_shape/inconsistent_shape/config.pbtxt
@@ -0,0 +1,45 @@
+name: "inconsistent_shape"
+max_batch_size: 2
+platform: "ensemble"
+ensemble_scheduling {
+  step [
+    {
+      model_name: "fp32_dim1_batch4"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "data"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "temp_tensor"
+      }
+    },
+    {
+      model_name: "fp32_dim2_nobatch"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "temp_tensor"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "prob"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/ensemble/inconsistent_shape/inconsistent_shape/expected b/qa/L0_model_config/autofill_noplatform_success/ensemble/inconsistent_shape/inconsistent_shape/expected
new file mode 100644
index 0000000000..fe249bbd6c
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/ensemble/inconsistent_shape/inconsistent_shape/expected
@@ -0,0 +1,52 @@
+name: "inconsistent_shape"
+max_batch_size: 2
+platform: "ensemble"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+ensemble_scheduling {
+  step [
+    {
+      model_name: "fp32_dim1_batch4"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "data"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "temp_tensor"
+      }
+    },
+    {
+      model_name: "fp32_dim2_nobatch"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "temp_tensor"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "prob"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+model_transaction_policy {
+}
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/ensemble/inconsistent_shape_2/fp32_dim1_batch4/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/ensemble/inconsistent_shape_2/fp32_dim1_batch4/config.pbtxt
new file mode 100644
index 0000000000..f5a689d604
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/ensemble/inconsistent_shape_2/fp32_dim1_batch4/config.pbtxt
@@ -0,0 +1,22 @@
+name: "fp32_dim1_batch4"
+max_batch_size: 4
+backend: "identity"
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+instance_group [
+  {
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/ensemble/inconsistent_shape_2/fp32_dim2_nobatch/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/ensemble/inconsistent_shape_2/fp32_dim2_nobatch/config.pbtxt
new file mode 100644
index 0000000000..54db15ca0c
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/ensemble/inconsistent_shape_2/fp32_dim2_nobatch/config.pbtxt
@@ -0,0 +1,22 @@
+name: "fp32_dim2_nobatch"
+max_batch_size: 0
+backend: "identity"
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1, 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1, 16 ]
+  }
+]
+instance_group [
+  {
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/ensemble/inconsistent_shape_2/inconsistent_shape_2/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/ensemble/inconsistent_shape_2/inconsistent_shape_2/config.pbtxt
new file mode 100644
index 0000000000..431b5b27b0
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/ensemble/inconsistent_shape_2/inconsistent_shape_2/config.pbtxt
@@ -0,0 +1,45 @@
+name: "inconsistent_shape_2"
+max_batch_size: 0
+platform: "ensemble"
+ensemble_scheduling {
+  step [
+    {
+      model_name: "fp32_dim2_nobatch"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "data"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "temp_tensor"
+      }
+    },
+    {
+      model_name: "fp32_dim1_batch4"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "temp_tensor"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "prob"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    dims: [ 4, 16 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 4, 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/ensemble/inconsistent_shape_2/inconsistent_shape_2/expected b/qa/L0_model_config/autofill_noplatform_success/ensemble/inconsistent_shape_2/inconsistent_shape_2/expected
new file mode 100644
index 0000000000..687eabf159
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/ensemble/inconsistent_shape_2/inconsistent_shape_2/expected
@@ -0,0 +1,52 @@
+name: "inconsistent_shape_2"
+max_batch_size: 0
+platform: "ensemble"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+ensemble_scheduling {
+  step [
+    {
+      model_name: "fp32_dim2_nobatch"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "data"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "temp_tensor"
+      }
+    },
+    {
+      model_name: "fp32_dim1_batch4"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "temp_tensor"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "prob"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    dims: [ 4, 16 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 4, 16 ]
+  }
+]
+model_transaction_policy {
+}
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/ensemble/unmapped_output/fp32_dim1_batch4_output3/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/ensemble/unmapped_output/fp32_dim1_batch4_output3/config.pbtxt
new file mode 100644
index 0000000000..69b18e83e7
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/ensemble/unmapped_output/fp32_dim1_batch4_output3/config.pbtxt
@@ -0,0 +1,42 @@
+name: "fp32_dim1_batch4_output3"
+max_batch_size: 4
+backend: "identity"
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT2"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT2"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+instance_group [
+  {
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/ensemble/unmapped_output/unmapped_output/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/ensemble/unmapped_output/unmapped_output/config.pbtxt
new file mode 100644
index 0000000000..852d1b3448
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/ensemble/unmapped_output/unmapped_output/config.pbtxt
@@ -0,0 +1,41 @@
+name: "unmapped_output"
+max_batch_size: 2
+platform: "ensemble"
+ensemble_scheduling {
+  step [
+    {
+      model_name: "fp32_dim1_batch4_output3"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "data"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "data"
+      }
+      input_map {
+        key: "INPUT2"
+        value: "data"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "prob"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/ensemble/unmapped_output/unmapped_output/expected b/qa/L0_model_config/autofill_noplatform_success/ensemble/unmapped_output/unmapped_output/expected
new file mode 100644
index 0000000000..2f54380f3f
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/ensemble/unmapped_output/unmapped_output/expected
@@ -0,0 +1,48 @@
+name: "unmapped_output"
+max_batch_size: 2
+platform: "ensemble"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+ensemble_scheduling {
+  step [
+    {
+      model_name: "fp32_dim1_batch4_output3"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "data"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "data"
+      }
+      input_map {
+        key: "INPUT2"
+        value: "data"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "prob"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+model_transaction_policy {
+}
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/cpu_instance/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/onnx/cpu_instance/config.pbtxt
new file mode 100644
index 0000000000..137ad375c8
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/onnx/cpu_instance/config.pbtxt
@@ -0,0 +1,23 @@
+
+name: "cpu_instance"
+platform: "onnxruntime_onnx"
+max_batch_size: 8
+version_policy: { latest { num_versions: 1 }}
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP16
+    dims: [ -1,-1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP16
+    dims: [ -1,-1 ]
+  }
+]
+instance_group {
+  name: "cpu_instance"
+  kind: KIND_CPU
+}
diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/cpu_instance/expected b/qa/L0_model_config/autofill_noplatform_success/onnx/cpu_instance/expected
new file mode 100644
index 0000000000..008a7a0b7f
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/onnx/cpu_instance/expected
@@ -0,0 +1,36 @@
+name: "cpu_instance"
+platform: "onnxruntime_onnx"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 8
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP16
+  dims: -1
+  dims: -1
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP16
+  dims: -1
+  dims: -1
+}
+instance_group {
+  name: "cpu_instance"
+  count: 2
+  kind: KIND_CPU
+}
+default_model_filename: "model.onnx"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "onnxruntime"
+runtime: ""
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/1/model.onnx b/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/1/model.onnx
new file mode 100644
index 0000000000..b352d3225f
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/1/model.onnx
@@ -0,0 +1,33 @@
+TRTIS:�
+
+INPUT0_INPUT0"Identity
+
+INPUT1_INPUT1"Identity
+
+_INPUT0
+_INPUT1CAST0"Add
+
+_INPUT0
+_INPUT1CAST1"Sub
+!
+CAST0OUTPUT0"Cast*	
+to�
+!
+CAST1OUTPUT1"Cast*	
+to�onnx_int32_int8_int8Z
+INPUT0
+
+var_0
+Z
+INPUT1
+
+var_0
+b
+OUTPUT0
+
+var_1
+b
+OUTPUT1
+
+var_2
+B
diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/config.pbtxt
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected b/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected
new file mode 100644
index 0000000000..bedc4e44fa
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected
@@ -0,0 +1,48 @@
+name: "empty_config"
+platform: "onnxruntime_onnx"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "empty_config"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 4
+}
+default_model_filename: "model.onnx"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "onnxruntime"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected.1 b/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected.1
new file mode 100644
index 0000000000..7e2a45c522
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected.1
@@ -0,0 +1,48 @@
+name: "empty_config"
+platform: "onnxruntime_onnx"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "empty_config"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 4
+}
+default_model_filename: "model.onnx"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "onnxruntime"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected.2 b/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected.2
new file mode 100644
index 0000000000..56def5c317
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected.2
@@ -0,0 +1,48 @@
+name: "empty_config"
+platform: "onnxruntime_onnx"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "empty_config"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 4
+}
+default_model_filename: "model.onnx"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "onnxruntime"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected.3 b/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected.3
new file mode 100644
index 0000000000..35a82c5be1
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/onnx/empty_config/expected.3
@@ -0,0 +1,48 @@
+name: "empty_config"
+platform: "onnxruntime_onnx"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "empty_config"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 4
+}
+default_model_filename: "model.onnx"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "onnxruntime"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/1/model.onnx b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/1/model.onnx
new file mode 100644
index 0000000000..b352d3225f
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/1/model.onnx
@@ -0,0 +1,33 @@
+TRTIS:�
+
+INPUT0_INPUT0"Identity
+
+INPUT1_INPUT1"Identity
+
+_INPUT0
+_INPUT1CAST0"Add
+
+_INPUT0
+_INPUT1CAST1"Sub
+!
+CAST0OUTPUT0"Cast*	
+to�
+!
+CAST1OUTPUT1"Cast*	
+to�onnx_int32_int8_int8Z
+INPUT0
+
+var_0
+Z
+INPUT1
+
+var_0
+b
+OUTPUT0
+
+var_1
+b
+OUTPUT1
+
+var_2
+B
diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected
new file mode 100644
index 0000000000..f2a7d4e43e
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected
@@ -0,0 +1,48 @@
+name: "no_config"
+platform: "onnxruntime_onnx"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "no_config"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 4
+}
+default_model_filename: "model.onnx"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "onnxruntime"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected.1 b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected.1
new file mode 100644
index 0000000000..ca6269959f
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected.1
@@ -0,0 +1,48 @@
+name: "no_config"
+platform: "onnxruntime_onnx"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "no_config"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 4
+}
+default_model_filename: "model.onnx"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "onnxruntime"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected.2 b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected.2
new file mode 100644
index 0000000000..51d73ebdfe
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected.2
@@ -0,0 +1,48 @@
+name: "no_config"
+platform: "onnxruntime_onnx"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "no_config"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 4
+}
+default_model_filename: "model.onnx"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "onnxruntime"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected.3 b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected.3
new file mode 100644
index 0000000000..c5121d60b5
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config/expected.3
@@ -0,0 +1,48 @@
+name: "no_config"
+platform: "onnxruntime_onnx"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "no_config"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 4
+}
+default_model_filename: "model.onnx"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "onnxruntime"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/1/model.onnx b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/1/model.onnx
new file mode 100644
index 0000000000..ebe41ef108
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/1/model.onnx
@@ -0,0 +1,33 @@
+TRTIS:�
+
+INPUT0_INPUT0"Identity
+
+INPUT1_INPUT1"Identity
+
+_INPUT0
+_INPUT1CAST0"Add
+
+_INPUT0
+_INPUT1CAST1"Sub
+!
+CAST0OUTPUT0"Cast*	
+to�
+!
+CAST1OUTPUT1"Cast*	
+to�onnx_nobatch_int32_int8_int8Z
+INPUT0
+
+
+Z
+INPUT1
+
+
+b
+OUTPUT0
+
+
+b
+OUTPUT1
+
+
+B
diff --git a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/no_config_no_batch/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/config.pbtxt
similarity index 100%
rename from src/servables/tensorflow/testdata/savedmodel_autofill_sanity/no_config_no_batch/config.pbtxt
rename to qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/config.pbtxt
diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected
new file mode 100644
index 0000000000..9adc820017
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected
@@ -0,0 +1,43 @@
+name: "no_config_no_batch"
+platform: "onnxruntime_onnx"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "no_config_no_batch_0"
+  count: 2
+  kind: KIND_CPU
+}
+default_model_filename: "model.onnx"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "onnxruntime"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected.1 b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected.1
new file mode 100644
index 0000000000..5ba1985bd6
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected.1
@@ -0,0 +1,43 @@
+name: "no_config_no_batch"
+platform: "onnxruntime_onnx"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "no_config_no_batch_0"
+  count: 2
+  kind: KIND_CPU
+}
+default_model_filename: "model.onnx"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "onnxruntime"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected.2 b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected.2
new file mode 100644
index 0000000000..fa82234e53
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected.2
@@ -0,0 +1,43 @@
+name: "no_config_no_batch"
+platform: "onnxruntime_onnx"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "no_config_no_batch_0"
+  count: 2
+  kind: KIND_CPU
+}
+default_model_filename: "model.onnx"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "onnxruntime"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected.3 b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected.3
new file mode 100644
index 0000000000..e5e92cb9be
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/onnx/no_config_no_batch/expected.3
@@ -0,0 +1,43 @@
+name: "no_config_no_batch"
+platform: "onnxruntime_onnx"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "no_config_no_batch_0"
+  count: 2
+  kind: KIND_CPU
+}
+default_model_filename: "model.onnx"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "onnxruntime"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/openvino/dynamic_batch/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/openvino/dynamic_batch/config.pbtxt
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/qa/L0_model_config/autofill_noplatform_success/openvino/dynamic_batch/expected b/qa/L0_model_config/autofill_noplatform_success/openvino/dynamic_batch/expected
new file mode 100644
index 0000000000..21219a9f1b
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/openvino/dynamic_batch/expected
@@ -0,0 +1,46 @@
+name: "dynamic_batch"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "Func/PartitionedCall/input/_0:0"
+  data_type: TYPE_INT32
+  dims: 4
+}
+input {
+  name: "input1"
+  data_type: TYPE_INT32
+  dims: 4
+}
+output {
+  name: "Func/PartitionedCall/output/_2:0"
+  data_type: TYPE_INT32
+  dims: 4
+}
+output {
+  name: "Func/PartitionedCall/output/_3:0"
+  data_type: TYPE_INT32
+  dims: 4
+}
+instance_group {
+  name: "dynamic_batch"
+  count: 1
+  kind: KIND_CPU
+}
+default_model_filename: "model.xml"
+dynamic_batching {
+  preferred_batch_size: 4
+}
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "openvino"
+runtime: ""
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/openvino/dynamic_batch/expected.1 b/qa/L0_model_config/autofill_noplatform_success/openvino/dynamic_batch/expected.1
new file mode 100644
index 0000000000..5e6e16e636
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/openvino/dynamic_batch/expected.1
@@ -0,0 +1,46 @@
+name: "dynamic_batch"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "input1"
+  data_type: TYPE_INT32
+  dims: 4
+}
+input {
+  name: "Func/PartitionedCall/input/_0:0"
+  data_type: TYPE_INT32
+  dims: 4
+}
+output {
+  name: "Func/PartitionedCall/output/_2:0"
+  data_type: TYPE_INT32
+  dims: 4
+}
+output {
+  name: "Func/PartitionedCall/output/_3:0"
+  data_type: TYPE_INT32
+  dims: 4
+}
+instance_group {
+  name: "dynamic_batch"
+  count: 1
+  kind: KIND_CPU
+}
+default_model_filename: "model.xml"
+dynamic_batching {
+  preferred_batch_size: 4
+}
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "openvino"
+runtime: ""
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/openvino/dynamic_batch/expected.2 b/qa/L0_model_config/autofill_noplatform_success/openvino/dynamic_batch/expected.2
new file mode 100644
index 0000000000..7710b9be44
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/openvino/dynamic_batch/expected.2
@@ -0,0 +1,46 @@
+name: "dynamic_batch"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "Func/PartitionedCall/input/_0:0"
+  data_type: TYPE_INT32
+  dims: 4
+}
+input {
+  name: "input1"
+  data_type: TYPE_INT32
+  dims: 4
+}
+output {
+  name: "Func/PartitionedCall/output/_3:0"
+  data_type: TYPE_INT32
+  dims: 4
+}
+output {
+  name: "Func/PartitionedCall/output/_2:0"
+  data_type: TYPE_INT32
+  dims: 4
+}
+instance_group {
+  name: "dynamic_batch"
+  count: 1
+  kind: KIND_CPU
+}
+default_model_filename: "model.xml"
+dynamic_batching {
+  preferred_batch_size: 4
+}
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "openvino"
+runtime: ""
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/openvino/dynamic_batch/expected.3 b/qa/L0_model_config/autofill_noplatform_success/openvino/dynamic_batch/expected.3
new file mode 100644
index 0000000000..299d7286af
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/openvino/dynamic_batch/expected.3
@@ -0,0 +1,46 @@
+name: "dynamic_batch"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "input1"
+  data_type: TYPE_INT32
+  dims: 4
+}
+input {
+  name: "Func/PartitionedCall/input/_0:0"
+  data_type: TYPE_INT32
+  dims: 4
+}
+output {
+  name: "Func/PartitionedCall/output/_3:0"
+  data_type: TYPE_INT32
+  dims: 4
+}
+output {
+  name: "Func/PartitionedCall/output/_2:0"
+  data_type: TYPE_INT32
+  dims: 4
+}
+instance_group {
+  name: "dynamic_batch"
+  count: 1
+  kind: KIND_CPU
+}
+default_model_filename: "model.xml"
+dynamic_batching {
+  preferred_batch_size: 4
+}
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "openvino"
+runtime: ""
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/openvino/empty_config/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/openvino/empty_config/config.pbtxt
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/qa/L0_model_config/autofill_noplatform_success/openvino/empty_config/expected b/qa/L0_model_config/autofill_noplatform_success/openvino/empty_config/expected
new file mode 100644
index 0000000000..327b6560c0
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/openvino/empty_config/expected
@@ -0,0 +1,46 @@
+name: "empty_config"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+input {
+  name: "Func/PartitionedCall/input/_0:0"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+input {
+  name: "input1"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+output {
+  name: "Func/PartitionedCall/output/_2:0"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+output {
+  name: "Func/PartitionedCall/output/_3:0"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+instance_group {
+  name: "empty_config"
+  count: 1
+  kind: KIND_CPU
+}
+default_model_filename: "model.xml"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "openvino"
+runtime: ""
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/openvino/empty_config/expected.1 b/qa/L0_model_config/autofill_noplatform_success/openvino/empty_config/expected.1
new file mode 100644
index 0000000000..5e21bdee82
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/openvino/empty_config/expected.1
@@ -0,0 +1,46 @@
+name: "empty_config"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+input {
+  name: "input1"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+input {
+  name: "Func/PartitionedCall/input/_0:0"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+output {
+  name: "Func/PartitionedCall/output/_2:0"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+output {
+  name: "Func/PartitionedCall/output/_3:0"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+instance_group {
+  name: "empty_config"
+  count: 1
+  kind: KIND_CPU
+}
+default_model_filename: "model.xml"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "openvino"
+runtime: ""
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/openvino/empty_config/expected.2 b/qa/L0_model_config/autofill_noplatform_success/openvino/empty_config/expected.2
new file mode 100644
index 0000000000..30cbf9467b
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/openvino/empty_config/expected.2
@@ -0,0 +1,46 @@
+name: "empty_config"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+input {
+  name: "Func/PartitionedCall/input/_0:0"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+input {
+  name: "input1"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+output {
+  name: "Func/PartitionedCall/output/_3:0"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+output {
+  name: "Func/PartitionedCall/output/_2:0"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+instance_group {
+  name: "empty_config"
+  count: 1
+  kind: KIND_CPU
+}
+default_model_filename: "model.xml"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "openvino"
+runtime: ""
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/openvino/empty_config/expected.3 b/qa/L0_model_config/autofill_noplatform_success/openvino/empty_config/expected.3
new file mode 100644
index 0000000000..044930e8ad
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/openvino/empty_config/expected.3
@@ -0,0 +1,46 @@
+name: "empty_config"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+input {
+  name: "input1"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+input {
+  name: "Func/PartitionedCall/input/_0:0"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+output {
+  name: "Func/PartitionedCall/output/_3:0"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+output {
+  name: "Func/PartitionedCall/output/_2:0"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+instance_group {
+  name: "empty_config"
+  count: 1
+  kind: KIND_CPU
+}
+default_model_filename: "model.xml"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "openvino"
+runtime: ""
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/openvino/no_config/expected b/qa/L0_model_config/autofill_noplatform_success/openvino/no_config/expected
new file mode 100644
index 0000000000..24f54fd3ee
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/openvino/no_config/expected
@@ -0,0 +1,46 @@
+name: "no_config"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+input {
+  name: "Func/PartitionedCall/input/_0:0"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+input {
+  name: "input1"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+output {
+  name: "Func/PartitionedCall/output/_2:0"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+output {
+  name: "Func/PartitionedCall/output/_3:0"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+instance_group {
+  name: "no_config"
+  count: 1
+  kind: KIND_CPU
+}
+default_model_filename: "model.xml"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "openvino"
+runtime: ""
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/openvino/no_config/expected.1 b/qa/L0_model_config/autofill_noplatform_success/openvino/no_config/expected.1
new file mode 100644
index 0000000000..b351867e69
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/openvino/no_config/expected.1
@@ -0,0 +1,46 @@
+name: "no_config"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+input {
+  name: "input1"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+input {
+  name: "Func/PartitionedCall/input/_0:0"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+output {
+  name: "Func/PartitionedCall/output/_2:0"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+output {
+  name: "Func/PartitionedCall/output/_3:0"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+instance_group {
+  name: "no_config"
+  count: 1
+  kind: KIND_CPU
+}
+default_model_filename: "model.xml"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "openvino"
+runtime: ""
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/openvino/no_config/expected.2 b/qa/L0_model_config/autofill_noplatform_success/openvino/no_config/expected.2
new file mode 100644
index 0000000000..b41dfc199b
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/openvino/no_config/expected.2
@@ -0,0 +1,46 @@
+name: "no_config"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+input {
+  name: "Func/PartitionedCall/input/_0:0"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+input {
+  name: "input1"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+output {
+  name: "Func/PartitionedCall/output/_3:0"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+output {
+  name: "Func/PartitionedCall/output/_2:0"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+instance_group {
+  name: "no_config"
+  count: 1
+  kind: KIND_CPU
+}
+default_model_filename: "model.xml"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "openvino"
+runtime: ""
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/openvino/no_config/expected.3 b/qa/L0_model_config/autofill_noplatform_success/openvino/no_config/expected.3
new file mode 100644
index 0000000000..d4c9e3b6f8
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/openvino/no_config/expected.3
@@ -0,0 +1,46 @@
+name: "no_config"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+input {
+  name: "input1"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+input {
+  name: "Func/PartitionedCall/input/_0:0"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+output {
+  name: "Func/PartitionedCall/output/_3:0"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+output {
+  name: "Func/PartitionedCall/output/_2:0"
+  data_type: TYPE_INT32
+  dims: 1
+  dims: 4
+}
+instance_group {
+  name: "no_config"
+  count: 1
+  kind: KIND_CPU
+}
+default_model_filename: "model.xml"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "openvino"
+runtime: ""
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/openvino/partial_config/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/openvino/partial_config/config.pbtxt
new file mode 100644
index 0000000000..cfdc579dae
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/openvino/partial_config/config.pbtxt
@@ -0,0 +1,14 @@
+max_batch_size: 8
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT8
+    dims: [ 16 ]
+    label_filename: "output0_labels.txt"
+   },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_INT8
+    dims: [ 16 ]
+  }
+]
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/openvino/partial_config/expected b/qa/L0_model_config/autofill_noplatform_success/openvino/partial_config/expected
new file mode 100644
index 0000000000..c77e76c867
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/openvino/partial_config/expected
@@ -0,0 +1,44 @@
+name: "partial_config"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 8
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+  label_filename: "output0_labels.txt"
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "partial_config"
+  count: 1
+  kind: KIND_CPU
+}
+default_model_filename: "model.xml"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "openvino"
+runtime: ""
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/openvino/partial_config/expected.1 b/qa/L0_model_config/autofill_noplatform_success/openvino/partial_config/expected.1
new file mode 100644
index 0000000000..82a1a71df9
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/openvino/partial_config/expected.1
@@ -0,0 +1,44 @@
+name: "partial_config"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 8
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+  label_filename: "output0_labels.txt"
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "partial_config"
+  count: 1
+  kind: KIND_CPU
+}
+default_model_filename: "model.xml"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "openvino"
+runtime: ""
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/conflicting_scheduler_ensemble/conflicting_scheduler_ensemble/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/python/conflicting_scheduler_ensemble/conflicting_scheduler_ensemble/config.pbtxt
new file mode 100644
index 0000000000..4415942640
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/conflicting_scheduler_ensemble/conflicting_scheduler_ensemble/config.pbtxt
@@ -0,0 +1,45 @@
+name: "conflicting_scheduler_ensemble"
+platform: "ensemble"
+input [
+  {
+    name: "ENSEMBLE_INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+output [
+  {
+    name: "ENSEMBLE_OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+ensemble_scheduling {
+  step [
+    {
+      # batch model
+      model_name: "ensemble_first_step"
+      model_version: 1
+      input_map {
+        key: "INPUT0"
+        value: "ENSEMBLE_INPUT0"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "temp_output_0"
+      }
+    },
+    {
+      model_name: "ensemble_second_step"
+      model_version: 1
+      input_map {
+        key: "INPUT0"
+        value: "temp_output_0"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "ENSEMBLE_OUTPUT0"
+      }
+    }
+  ]
+}
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/conflicting_scheduler_ensemble/conflicting_scheduler_ensemble/expected b/qa/L0_model_config/autofill_noplatform_success/python/conflicting_scheduler_ensemble/conflicting_scheduler_ensemble/expected
new file mode 100644
index 0000000000..9ded94672a
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/conflicting_scheduler_ensemble/conflicting_scheduler_ensemble/expected
@@ -0,0 +1,45 @@
+name: "conflicting_scheduler_ensemble"
+  platform: "ensemble"
+  version_policy {
+    latest {
+      num_versions: 1
+    }
+  }
+  input {
+    name: "ENSEMBLE_INPUT0"
+    data_type: TYPE_FP32
+    dims: 4
+  }
+  output {
+    name: "ENSEMBLE_OUTPUT0"
+    data_type: TYPE_FP32
+    dims: 4
+  }
+  ensemble_scheduling {
+    step {
+      model_name: "ensemble_first_step"
+      model_version: 1
+      input_map {
+        key: "INPUT0"
+        value: "ENSEMBLE_INPUT0"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "temp_output_0"
+      }
+    }
+    step {
+      model_name: "ensemble_second_step"
+      model_version: 1
+      input_map {
+        key: "INPUT0"
+        value: "temp_output_0"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "ENSEMBLE_OUTPUT0"
+      }
+    }
+  }
+  model_transaction_policy {
+  }
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/conflicting_scheduler_ensemble/conflicting_scheduler_ensemble/model.py b/qa/L0_model_config/autofill_noplatform_success/python/conflicting_scheduler_ensemble/conflicting_scheduler_ensemble/model.py
new file mode 100644
index 0000000000..57589bacdf
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/conflicting_scheduler_ensemble/conflicting_scheduler_ensemble/model.py
@@ -0,0 +1,42 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+class TritonPythonModel:
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+
+        auto_complete_model_config.set_max_batch_size(4)
+        auto_complete_model_config.set_dynamic_batching()
+        auto_complete_model_config.add_input(input0)
+        auto_complete_model_config.add_output(output0)
+
+        return auto_complete_model_config
+
+    def execute(self, requests):
+        pass
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/conflicting_scheduler_ensemble/ensemble_first_step/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/python/conflicting_scheduler_ensemble/ensemble_first_step/config.pbtxt
new file mode 100644
index 0000000000..bb4ad9111c
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/conflicting_scheduler_ensemble/ensemble_first_step/config.pbtxt
@@ -0,0 +1,15 @@
+name: "ensemble_first_step"
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/conflicting_scheduler_ensemble/ensemble_first_step/model.py b/qa/L0_model_config/autofill_noplatform_success/python/conflicting_scheduler_ensemble/ensemble_first_step/model.py
new file mode 100644
index 0000000000..57589bacdf
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/conflicting_scheduler_ensemble/ensemble_first_step/model.py
@@ -0,0 +1,42 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+class TritonPythonModel:
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+
+        auto_complete_model_config.set_max_batch_size(4)
+        auto_complete_model_config.set_dynamic_batching()
+        auto_complete_model_config.add_input(input0)
+        auto_complete_model_config.add_output(output0)
+
+        return auto_complete_model_config
+
+    def execute(self, requests):
+        pass
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/conflicting_scheduler_ensemble/ensemble_second_step/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/python/conflicting_scheduler_ensemble/ensemble_second_step/config.pbtxt
new file mode 100644
index 0000000000..e1af511bc5
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/conflicting_scheduler_ensemble/ensemble_second_step/config.pbtxt
@@ -0,0 +1,15 @@
+name: "ensemble_second_step"
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/conflicting_scheduler_ensemble/ensemble_second_step/model.py b/qa/L0_model_config/autofill_noplatform_success/python/conflicting_scheduler_ensemble/ensemble_second_step/model.py
new file mode 100644
index 0000000000..57589bacdf
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/conflicting_scheduler_ensemble/ensemble_second_step/model.py
@@ -0,0 +1,42 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+class TritonPythonModel:
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+
+        auto_complete_model_config.set_max_batch_size(4)
+        auto_complete_model_config.set_dynamic_batching()
+        auto_complete_model_config.add_input(input0)
+        auto_complete_model_config.add_output(output0)
+
+        return auto_complete_model_config
+
+    def execute(self, requests):
+        pass
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching/config.pbtxt
new file mode 100644
index 0000000000..c67582c241
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching/config.pbtxt
@@ -0,0 +1,26 @@
+name: "dynamic_batching"
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching/expected b/qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching/expected
new file mode 100644
index 0000000000..09d462cb28
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching/expected
@@ -0,0 +1,47 @@
+name: "dynamic_batching"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+instance_group {
+  name: "dynamic_batching"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.py"
+dynamic_batching {
+  preferred_batch_size: 4
+}
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "python"
+runtime: ""
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching/expected.1 b/qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching/expected.1
new file mode 100644
index 0000000000..a941ad5548
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching/expected.1
@@ -0,0 +1,47 @@
+name: "dynamic_batching"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+instance_group {
+  name: "dynamic_batching"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.py"
+dynamic_batching {
+  preferred_batch_size: 4
+}
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "python"
+runtime: ""
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching/expected.2 b/qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching/expected.2
new file mode 100644
index 0000000000..ac6a13950c
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching/expected.2
@@ -0,0 +1,47 @@
+name: "dynamic_batching"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+instance_group {
+  name: "dynamic_batching"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.py"
+dynamic_batching {
+  preferred_batch_size: 4
+}
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "python"
+runtime: ""
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching/expected.3 b/qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching/expected.3
new file mode 100644
index 0000000000..f1b5c5cefd
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching/expected.3
@@ -0,0 +1,47 @@
+name: "dynamic_batching"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+instance_group {
+  name: "dynamic_batching"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.py"
+dynamic_batching {
+  preferred_batch_size: 4
+}
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "python"
+runtime: ""
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching/model.py b/qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching/model.py
new file mode 100644
index 0000000000..b1399382c4
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching/model.py
@@ -0,0 +1,46 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+class TritonPythonModel:
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+
+        auto_complete_model_config.set_max_batch_size(4)
+        auto_complete_model_config.set_dynamic_batching()
+        auto_complete_model_config.add_input(input0)
+        auto_complete_model_config.add_input(input1)
+        auto_complete_model_config.add_output(output0)
+        auto_complete_model_config.add_output(output1)
+
+        return auto_complete_model_config
+
+    def execute(self, requests):
+        pass
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching_no_op/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching_no_op/config.pbtxt
new file mode 100644
index 0000000000..928ee51f50
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching_no_op/config.pbtxt
@@ -0,0 +1,29 @@
+name: "dynamic_batching_no_op"
+max_batch_size: 4
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+dynamic_batching: {
+  preferred_batch_size: [ 4 ]
+}
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching_no_op/expected b/qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching_no_op/expected
new file mode 100644
index 0000000000..515cba2485
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching_no_op/expected
@@ -0,0 +1,47 @@
+name: "dynamic_batching_no_op"
+version_policy {
+latest {
+    num_versions: 1
+}
+}
+max_batch_size: 4
+input {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: 4
+}
+input {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: 4
+}
+output {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: 4
+}
+output {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: 4
+}
+instance_group {
+    name: "dynamic_batching_no_op"
+    count: 1
+    gpus: 0
+    kind: KIND_GPU
+}
+default_model_filename: "model.py"
+dynamic_batching {
+    preferred_batch_size: 4
+}
+optimization {
+    input_pinned_memory {
+        enable: true
+    }
+    output_pinned_memory {
+        enable: true
+    }
+}
+backend: "python"
+runtime: ""
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching_no_op/expected.1 b/qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching_no_op/expected.1
new file mode 100644
index 0000000000..046a58bef6
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching_no_op/expected.1
@@ -0,0 +1,47 @@
+name: "dynamic_batching_no_op"
+version_policy {
+latest {
+    num_versions: 1
+}
+}
+max_batch_size: 4
+input {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: 4
+}
+input {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: 4
+}
+output {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: 4
+}
+output {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: 4
+}
+instance_group {
+    name: "dynamic_batching_no_op"
+    count: 1
+    gpus: 0
+    kind: KIND_GPU
+}
+default_model_filename: "model.py"
+dynamic_batching {
+    preferred_batch_size: 4
+}
+optimization {
+    input_pinned_memory {
+        enable: true
+    }
+    output_pinned_memory {
+        enable: true
+    }
+}
+backend: "python"
+runtime: ""
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching_no_op/expected.2 b/qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching_no_op/expected.2
new file mode 100644
index 0000000000..52bc58aa9c
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching_no_op/expected.2
@@ -0,0 +1,47 @@
+name: "dynamic_batching_no_op"
+version_policy {
+latest {
+    num_versions: 1
+}
+}
+max_batch_size: 4
+input {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: 4
+}
+input {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: 4
+}
+output {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: 4
+}
+output {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: 4
+}
+instance_group {
+    name: "dynamic_batching_no_op"
+    count: 1
+    gpus: 0
+    kind: KIND_GPU
+}
+default_model_filename: "model.py"
+dynamic_batching {
+    preferred_batch_size: 4
+}
+optimization {
+    input_pinned_memory {
+        enable: true
+    }
+    output_pinned_memory {
+        enable: true
+    }
+}
+backend: "python"
+runtime: ""
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching_no_op/expected.3 b/qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching_no_op/expected.3
new file mode 100644
index 0000000000..8b32f40d48
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching_no_op/expected.3
@@ -0,0 +1,47 @@
+name: "dynamic_batching_no_op"
+version_policy {
+latest {
+    num_versions: 1
+}
+}
+max_batch_size: 4
+input {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: 4
+}
+input {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: 4
+}
+output {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: 4
+}
+output {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: 4
+}
+instance_group {
+    name: "dynamic_batching_no_op"
+    count: 1
+    gpus: 0
+    kind: KIND_GPU
+}
+default_model_filename: "model.py"
+dynamic_batching {
+    preferred_batch_size: 4
+}
+optimization {
+    input_pinned_memory {
+        enable: true
+    }
+    output_pinned_memory {
+        enable: true
+    }
+}
+backend: "python"
+runtime: ""
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching_no_op/model.py b/qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching_no_op/model.py
new file mode 100644
index 0000000000..b1399382c4
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/dynamic_batching_no_op/model.py
@@ -0,0 +1,46 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+class TritonPythonModel:
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+
+        auto_complete_model_config.set_max_batch_size(4)
+        auto_complete_model_config.set_dynamic_batching()
+        auto_complete_model_config.add_input(input0)
+        auto_complete_model_config.add_input(input1)
+        auto_complete_model_config.add_output(output0)
+        auto_complete_model_config.add_output(output1)
+
+        return auto_complete_model_config
+
+    def execute(self, requests):
+        pass
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/empty_config/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/python/empty_config/config.pbtxt
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/empty_config/expected b/qa/L0_model_config/autofill_noplatform_success/python/empty_config/expected
new file mode 100644
index 0000000000..c9d04026cc
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/empty_config/expected
@@ -0,0 +1,43 @@
+name: "empty_config"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+instance_group {
+  name: "empty_config"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.py"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "python"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/empty_config/expected.1 b/qa/L0_model_config/autofill_noplatform_success/python/empty_config/expected.1
new file mode 100644
index 0000000000..57d4d9339c
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/empty_config/expected.1
@@ -0,0 +1,43 @@
+name: "empty_config"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+instance_group {
+  name: "empty_config"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.py"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "python"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/empty_config/expected.2 b/qa/L0_model_config/autofill_noplatform_success/python/empty_config/expected.2
new file mode 100644
index 0000000000..acedc2af71
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/empty_config/expected.2
@@ -0,0 +1,43 @@
+name: "empty_config"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+instance_group {
+  name: "empty_config"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.py"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "python"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/empty_config/expected.3 b/qa/L0_model_config/autofill_noplatform_success/python/empty_config/expected.3
new file mode 100644
index 0000000000..783841c7a6
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/empty_config/expected.3
@@ -0,0 +1,43 @@
+name: "empty_config"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+instance_group {
+  name: "empty_config"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.py"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "python"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/incomplete_input/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/python/incomplete_input/config.pbtxt
new file mode 100644
index 0000000000..b92d44b3ec
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/incomplete_input/config.pbtxt
@@ -0,0 +1,9 @@
+name: "incomplete_input"
+
+input [
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/incomplete_input/expected b/qa/L0_model_config/autofill_noplatform_success/python/incomplete_input/expected
new file mode 100644
index 0000000000..ca0caa06c1
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/incomplete_input/expected
@@ -0,0 +1,43 @@
+name: "incomplete_input"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+instance_group {
+  name: "incomplete_input"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.py"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "python"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/incomplete_input/model.py b/qa/L0_model_config/autofill_noplatform_success/python/incomplete_input/model.py
new file mode 100644
index 0000000000..75000a0ba4
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/incomplete_input/model.py
@@ -0,0 +1,43 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+class TritonPythonModel:
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+
+        auto_complete_model_config.set_max_batch_size(0)
+        auto_complete_model_config.add_input(input0)
+        auto_complete_model_config.add_output(output0)
+        auto_complete_model_config.add_output(output1)
+
+        return auto_complete_model_config
+
+    def execute(self, requests):
+        pass
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/incomplete_output/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/python/incomplete_output/config.pbtxt
new file mode 100644
index 0000000000..df7b925a2a
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/incomplete_output/config.pbtxt
@@ -0,0 +1,12 @@
+name: "incomplete_output"
+
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+  },
+  {
+    name: "OUTPUT1"
+    dims: [ 4 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/incomplete_output/expected b/qa/L0_model_config/autofill_noplatform_success/python/incomplete_output/expected
new file mode 100644
index 0000000000..50e86ec30a
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/incomplete_output/expected
@@ -0,0 +1,43 @@
+name: "incomplete_output"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+instance_group {
+  name: "incomplete_output"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.py"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "python"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy/config.pbtxt
new file mode 100644
index 0000000000..3100235010
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy/config.pbtxt
@@ -0,0 +1,24 @@
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy/expected b/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy/expected
new file mode 100644
index 0000000000..0d8cda5f0a
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy/expected
@@ -0,0 +1,47 @@
+name: "model_transaction_policy"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+instance_group {
+  name: "model_transaction_policy"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.py"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "python"
+runtime: ""
+model_transaction_policy {
+  decoupled: true
+}
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy/expected.1 b/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy/expected.1
new file mode 100644
index 0000000000..8d70f0de6b
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy/expected.1
@@ -0,0 +1,47 @@
+name: "model_transaction_policy"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+instance_group {
+  name: "model_transaction_policy"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.py"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "python"
+runtime: ""
+model_transaction_policy {
+  decoupled: true
+}
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy/expected.2 b/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy/expected.2
new file mode 100644
index 0000000000..d19e1f3f2e
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy/expected.2
@@ -0,0 +1,47 @@
+name: "model_transaction_policy"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+instance_group {
+  name: "model_transaction_policy"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.py"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "python"
+runtime: ""
+model_transaction_policy {
+  decoupled: true
+}
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy/expected.3 b/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy/expected.3
new file mode 100644
index 0000000000..619b818fa2
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy/expected.3
@@ -0,0 +1,47 @@
+name: "model_transaction_policy"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+instance_group {
+  name: "model_transaction_policy"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.py"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "python"
+runtime: ""
+model_transaction_policy {
+  decoupled: true
+}
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy/model.py b/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy/model.py
new file mode 100644
index 0000000000..424eca60ce
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy/model.py
@@ -0,0 +1,46 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+class TritonPythonModel:
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+
+        auto_complete_model_config.set_max_batch_size(4)
+        auto_complete_model_config.set_model_transaction_policy(dict(decoupled=True))
+        auto_complete_model_config.add_input(input0)
+        auto_complete_model_config.add_input(input1)
+        auto_complete_model_config.add_output(output0)
+        auto_complete_model_config.add_output(output1)
+
+        return auto_complete_model_config
+
+    def execute(self, requests):
+        pass
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_decoupled_false/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_decoupled_false/config.pbtxt
new file mode 100644
index 0000000000..3100235010
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_decoupled_false/config.pbtxt
@@ -0,0 +1,24 @@
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_decoupled_false/expected b/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_decoupled_false/expected
new file mode 100644
index 0000000000..413a21bb23
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_decoupled_false/expected
@@ -0,0 +1,46 @@
+name: "model_transaction_policy_decoupled_false"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+instance_group {
+  name: "model_transaction_policy_decoupled_false"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.py"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "python"
+runtime: ""
+model_transaction_policy {
+}
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_decoupled_false/expected.1 b/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_decoupled_false/expected.1
new file mode 100644
index 0000000000..d750463837
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_decoupled_false/expected.1
@@ -0,0 +1,46 @@
+name: "model_transaction_policy_decoupled_false"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+instance_group {
+  name: "model_transaction_policy_decoupled_false"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.py"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "python"
+runtime: ""
+model_transaction_policy {
+}
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_decoupled_false/expected.2 b/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_decoupled_false/expected.2
new file mode 100644
index 0000000000..d82a9fc3c6
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_decoupled_false/expected.2
@@ -0,0 +1,46 @@
+name: "model_transaction_policy_decoupled_false"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+instance_group {
+  name: "model_transaction_policy_decoupled_false"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.py"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "python"
+runtime: ""
+model_transaction_policy {
+}
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_decoupled_false/expected.3 b/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_decoupled_false/expected.3
new file mode 100644
index 0000000000..ed1f10fac8
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_decoupled_false/expected.3
@@ -0,0 +1,46 @@
+name: "model_transaction_policy_decoupled_false"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+instance_group {
+  name: "model_transaction_policy_decoupled_false"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.py"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "python"
+runtime: ""
+model_transaction_policy {
+}
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_decoupled_false/model.py b/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_decoupled_false/model.py
new file mode 100644
index 0000000000..848af2a2b2
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_decoupled_false/model.py
@@ -0,0 +1,46 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+class TritonPythonModel:
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+
+        auto_complete_model_config.set_max_batch_size(4)
+        auto_complete_model_config.set_model_transaction_policy(dict(decoupled=False))
+        auto_complete_model_config.add_input(input0)
+        auto_complete_model_config.add_input(input1)
+        auto_complete_model_config.add_output(output0)
+        auto_complete_model_config.add_output(output1)
+
+        return auto_complete_model_config
+
+    def execute(self, requests):
+        pass
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_no_op/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_no_op/config.pbtxt
new file mode 100644
index 0000000000..1bbf76caaf
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_no_op/config.pbtxt
@@ -0,0 +1,28 @@
+model_transaction_policy {
+  decoupled: true
+}
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_no_op/expected b/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_no_op/expected
new file mode 100644
index 0000000000..1c8ba1ada4
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_no_op/expected
@@ -0,0 +1,47 @@
+name: "model_transaction_policy_no_op"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+instance_group {
+  name: "model_transaction_policy_no_op"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.py"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "python"
+runtime: ""
+model_transaction_policy {
+  decoupled: true
+}
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_no_op/expected.1 b/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_no_op/expected.1
new file mode 100644
index 0000000000..4a854508a6
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_no_op/expected.1
@@ -0,0 +1,47 @@
+name: "model_transaction_policy_no_op"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+instance_group {
+  name: "model_transaction_policy_no_op"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.py"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "python"
+runtime: ""
+model_transaction_policy {
+  decoupled: true
+}
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_no_op/expected.2 b/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_no_op/expected.2
new file mode 100644
index 0000000000..b6f5dbf368
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_no_op/expected.2
@@ -0,0 +1,47 @@
+name: "model_transaction_policy_no_op"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+instance_group {
+  name: "model_transaction_policy_no_op"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.py"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "python"
+runtime: ""
+model_transaction_policy {
+  decoupled: true
+}
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_no_op/expected.3 b/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_no_op/expected.3
new file mode 100644
index 0000000000..9c9fbf99a9
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_no_op/expected.3
@@ -0,0 +1,47 @@
+name: "model_transaction_policy_no_op"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+instance_group {
+  name: "model_transaction_policy_no_op"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.py"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "python"
+runtime: ""
+model_transaction_policy {
+  decoupled: true
+}
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_no_op/model.py b/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_no_op/model.py
new file mode 100644
index 0000000000..424eca60ce
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/model_transaction_policy_no_op/model.py
@@ -0,0 +1,46 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+class TritonPythonModel:
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+
+        auto_complete_model_config.set_max_batch_size(4)
+        auto_complete_model_config.set_model_transaction_policy(dict(decoupled=True))
+        auto_complete_model_config.add_input(input0)
+        auto_complete_model_config.add_input(input1)
+        auto_complete_model_config.add_output(output0)
+        auto_complete_model_config.add_output(output1)
+
+        return auto_complete_model_config
+
+    def execute(self, requests):
+        pass
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/optional_input/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/python/optional_input/config.pbtxt
new file mode 100644
index 0000000000..2d2868b90e
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/optional_input/config.pbtxt
@@ -0,0 +1,7 @@
+input [
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/optional_input/expected b/qa/L0_model_config/autofill_noplatform_success/python/optional_input/expected
new file mode 100644
index 0000000000..f298e4629c
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/optional_input/expected
@@ -0,0 +1,44 @@
+name: "optional_input"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+  optional: true
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+instance_group {
+  name: "optional_input"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.py"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "python"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/optional_input/model.py b/qa/L0_model_config/autofill_noplatform_success/python/optional_input/model.py
new file mode 100644
index 0000000000..fca8e06818
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/optional_input/model.py
@@ -0,0 +1,48 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+class TritonPythonModel:
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        input0 = {
+            "name": "INPUT0",
+            "data_type": "TYPE_FP32",
+            "dims": [4],
+            "optional": True,
+        }
+        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+
+        auto_complete_model_config.set_max_batch_size(0)
+        auto_complete_model_config.add_input(input0)
+        auto_complete_model_config.add_output(output0)
+        auto_complete_model_config.add_output(output1)
+
+        return auto_complete_model_config
+
+    def execute(self, requests):
+        pass
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/unknown_input/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/python/unknown_input/config.pbtxt
new file mode 100644
index 0000000000..c76ea45e21
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/unknown_input/config.pbtxt
@@ -0,0 +1,26 @@
+name: "unknown_input"
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "INPUT_UNKNOWN"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/unknown_input/expected b/qa/L0_model_config/autofill_noplatform_success/python/unknown_input/expected
new file mode 100644
index 0000000000..c7da54358d
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/unknown_input/expected
@@ -0,0 +1,48 @@
+name: "unknown_input"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+input {
+  name: "INPUT_UNKNOWN"
+  data_type: TYPE_FP32
+  dims: 4
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+instance_group {
+  name: "unknown_input"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.py"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "python"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/unknown_output/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/python/unknown_output/config.pbtxt
new file mode 100644
index 0000000000..78fe02c570
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/unknown_output/config.pbtxt
@@ -0,0 +1,26 @@
+name: "unknown_output"
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT_UNKNOWN"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/python/unknown_output/expected b/qa/L0_model_config/autofill_noplatform_success/python/unknown_output/expected
new file mode 100644
index 0000000000..7d3bf765bb
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/python/unknown_output/expected
@@ -0,0 +1,48 @@
+name: "unknown_output"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT_UNKNOWN"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 4
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+}
+instance_group {
+  name: "unknown_output"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.py"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "python"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/pytorch/cpu_instance/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/pytorch/cpu_instance/config.pbtxt
new file mode 100644
index 0000000000..9d1564bed4
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/pytorch/cpu_instance/config.pbtxt
@@ -0,0 +1,42 @@
+name: "cpu_instance"
+platform: "pytorch_libtorch"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 8
+input {
+  name: "INPUT__0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT__1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT__0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT__1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+instance_group {
+  name: "cpu_instance"
+  kind: KIND_CPU
+}
+default_model_filename: "model.pt"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "pytorch"
diff --git a/qa/L0_model_config/autofill_noplatform_success/pytorch/cpu_instance/expected b/qa/L0_model_config/autofill_noplatform_success/pytorch/cpu_instance/expected
new file mode 100644
index 0000000000..3d5cd8545f
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/pytorch/cpu_instance/expected
@@ -0,0 +1,44 @@
+name: "cpu_instance"
+platform: "pytorch_libtorch"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 8
+input {
+  name: "INPUT__0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT__1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT__0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT__1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+instance_group {
+  name: "cpu_instance"
+  count: 1
+  kind: KIND_CPU
+}
+default_model_filename: "model.pt"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "pytorch"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/pytorch/no_name_platform/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/pytorch/no_name_platform/config.pbtxt
new file mode 100644
index 0000000000..412b42b1a3
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/pytorch/no_name_platform/config.pbtxt
@@ -0,0 +1,25 @@
+max_batch_size: 8
+input [
+  {
+    name: "INPUT__0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT__1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT__0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT__1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/pytorch/no_name_platform/expected b/qa/L0_model_config/autofill_noplatform_success/pytorch/no_name_platform/expected
new file mode 100644
index 0000000000..34a14958d4
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/pytorch/no_name_platform/expected
@@ -0,0 +1,45 @@
+name: "no_name_platform"
+platform: "pytorch_libtorch"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 8
+input {
+  name: "INPUT__0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT__1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT__0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT__1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+instance_group {
+  name: "no_name_platform"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.pt"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "pytorch"
+runtime: ""
diff --git a/src/servables/tensorflow/testdata/graphdef_autofill_sanity/no_name_platform/1/model.graphdef b/qa/L0_model_config/autofill_noplatform_success/tensorflow_graphdef/no_name_platform/1/model.graphdef
similarity index 100%
rename from src/servables/tensorflow/testdata/graphdef_autofill_sanity/no_name_platform/1/model.graphdef
rename to qa/L0_model_config/autofill_noplatform_success/tensorflow_graphdef/no_name_platform/1/model.graphdef
diff --git a/src/servables/tensorflow/testdata/graphdef_autofill_sanity/no_name_platform/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/tensorflow_graphdef/no_name_platform/config.pbtxt
similarity index 100%
rename from src/servables/tensorflow/testdata/graphdef_autofill_sanity/no_name_platform/config.pbtxt
rename to qa/L0_model_config/autofill_noplatform_success/tensorflow_graphdef/no_name_platform/config.pbtxt
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_graphdef/no_name_platform/expected b/qa/L0_model_config/autofill_noplatform_success/tensorflow_graphdef/no_name_platform/expected
new file mode 100644
index 0000000000..ed01acd5e0
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_graphdef/no_name_platform/expected
@@ -0,0 +1,45 @@
+name: "no_name_platform"
+platform: "tensorflow_graphdef"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 1
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "no_name_platform"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.graphdef"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_graphdef/reshape_config_provided/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/tensorflow_graphdef/reshape_config_provided/config.pbtxt
new file mode 100644
index 0000000000..b3bc21377e
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_graphdef/reshape_config_provided/config.pbtxt
@@ -0,0 +1,41 @@
+name: "reshape_config_provided"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 8
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 1
+  reshape {
+  }
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 8
+  reshape {
+    shape: 4
+    shape: 1
+    shape: 2
+  }
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 1
+  reshape {
+  }
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 8
+  reshape {
+    shape: 4
+    shape: 1
+    shape: 2
+  }
+}
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_graphdef/reshape_config_provided/expected b/qa/L0_model_config/autofill_noplatform_success/tensorflow_graphdef/reshape_config_provided/expected
new file mode 100644
index 0000000000..51e2d46d42
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_graphdef/reshape_config_provided/expected
@@ -0,0 +1,59 @@
+name: "reshape_config_provided"
+platform: "tensorflow_graphdef"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 8
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 1
+  reshape {
+  }
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 8
+  reshape {
+    shape: 4
+    shape: 1
+    shape: 2
+  }
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 1
+  reshape {
+  }
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 8
+  reshape {
+    shape: 4
+    shape: 1
+    shape: 2
+  }
+}
+instance_group {
+  name: "reshape_config_provided"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.graphdef"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/too_few_inputs/1/model.savedmodel/saved_model.pb b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/cpu_instance/1/model.savedmodel/saved_model.pb
similarity index 100%
rename from src/servables/tensorflow/testdata/savedmodel_autofill_sanity/too_few_inputs/1/model.savedmodel/saved_model.pb
rename to qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/cpu_instance/1/model.savedmodel/saved_model.pb
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/cpu_instance/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/cpu_instance/config.pbtxt
new file mode 100644
index 0000000000..bf4222124a
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/cpu_instance/config.pbtxt
@@ -0,0 +1,44 @@
+name: "cpu_instance"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "cpu_instance"
+  kind: KIND_CPU
+}
+dynamic_batching {
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/cpu_instance/expected b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/cpu_instance/expected
new file mode 100644
index 0000000000..f60d0950f1
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/cpu_instance/expected
@@ -0,0 +1,47 @@
+name: "cpu_instance"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "cpu_instance"
+  count: 2
+  kind: KIND_CPU
+}
+dynamic_batching {
+  preferred_batch_size: 4
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/cpu_instance/expected.1 b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/cpu_instance/expected.1
new file mode 100644
index 0000000000..dfcf1c7e89
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/cpu_instance/expected.1
@@ -0,0 +1,47 @@
+name: "cpu_instance"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "cpu_instance"
+  count: 2
+  kind: KIND_CPU
+}
+dynamic_batching {
+  preferred_batch_size: 4
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/cpu_instance/expected.2 b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/cpu_instance/expected.2
new file mode 100644
index 0000000000..03a9721822
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/cpu_instance/expected.2
@@ -0,0 +1,47 @@
+name: "cpu_instance"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "cpu_instance"
+  count: 2
+  kind: KIND_CPU
+}
+dynamic_batching {
+  preferred_batch_size: 4
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/cpu_instance/expected.3 b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/cpu_instance/expected.3
new file mode 100644
index 0000000000..4d69237a2e
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/cpu_instance/expected.3
@@ -0,0 +1,47 @@
+name: "cpu_instance"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "cpu_instance"
+  count: 2
+  kind: KIND_CPU
+}
+dynamic_batching {
+  preferred_batch_size: 4
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/too_many_inputs/1/model.savedmodel/saved_model.pb b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/empty_config/1/model.savedmodel/saved_model.pb
similarity index 100%
rename from src/servables/tensorflow/testdata/savedmodel_autofill_sanity/too_many_inputs/1/model.savedmodel/saved_model.pb
rename to qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/empty_config/1/model.savedmodel/saved_model.pb
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/empty_config/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/empty_config/config.pbtxt
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/empty_config/expected b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/empty_config/expected
new file mode 100644
index 0000000000..abbc108196
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/empty_config/expected
@@ -0,0 +1,48 @@
+name: "empty_config"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "empty_config"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 4
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/empty_config/expected.1 b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/empty_config/expected.1
new file mode 100644
index 0000000000..164b3afd2f
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/empty_config/expected.1
@@ -0,0 +1,48 @@
+name: "empty_config"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "empty_config"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 4
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/empty_config/expected.2 b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/empty_config/expected.2
new file mode 100644
index 0000000000..6ad6e0d311
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/empty_config/expected.2
@@ -0,0 +1,48 @@
+name: "empty_config"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "empty_config"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 4
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/empty_config/expected.3 b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/empty_config/expected.3
new file mode 100644
index 0000000000..9298a2dc33
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/empty_config/expected.3
@@ -0,0 +1,48 @@
+name: "empty_config"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "empty_config"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 4
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/unknown_input/1/model.savedmodel/saved_model.pb b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/hint_for_no_batch_1/1/model.savedmodel/saved_model.pb
similarity index 100%
rename from src/servables/tensorflow/testdata/savedmodel_autofill_sanity/unknown_input/1/model.savedmodel/saved_model.pb
rename to qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/hint_for_no_batch_1/1/model.savedmodel/saved_model.pb
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/hint_for_no_batch_1/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/hint_for_no_batch_1/config.pbtxt
new file mode 100644
index 0000000000..e2c3c36d49
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/hint_for_no_batch_1/config.pbtxt
@@ -0,0 +1,10 @@
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT8
+  },
+  {
+    name: "OUTPUT1"
+    dims: [ -1, 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/hint_for_no_batch_1/expected b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/hint_for_no_batch_1/expected
new file mode 100644
index 0000000000..5ba092a8ad
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/hint_for_no_batch_1/expected
@@ -0,0 +1,48 @@
+name: "hint_for_no_batch_1"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: -1
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: -1
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: -1
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: -1
+  dims: 16
+}
+instance_group {
+  name: "hint_for_no_batch_1"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/hint_for_no_batch_1/expected.1 b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/hint_for_no_batch_1/expected.1
new file mode 100644
index 0000000000..a2db3d6b62
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/hint_for_no_batch_1/expected.1
@@ -0,0 +1,48 @@
+name: "hint_for_no_batch_1"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: -1
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: -1
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: -1
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: -1
+  dims: 16
+}
+instance_group {
+  name: "hint_for_no_batch_1"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/hint_for_no_batch_1/expected.2 b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/hint_for_no_batch_1/expected.2
new file mode 100644
index 0000000000..f847b58097
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/hint_for_no_batch_1/expected.2
@@ -0,0 +1,48 @@
+name: "hint_for_no_batch_1"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: -1
+  dims: 16
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: -1
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: -1
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: -1
+  dims: 16
+}
+instance_group {
+  name: "hint_for_no_batch_1"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/hint_for_no_batch_1/expected.3 b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/hint_for_no_batch_1/expected.3
new file mode 100644
index 0000000000..0e09e46e87
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/hint_for_no_batch_1/expected.3
@@ -0,0 +1,48 @@
+name: "hint_for_no_batch_1"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: -1
+  dims: 16
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: -1
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: -1
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: -1
+  dims: 16
+}
+instance_group {
+  name: "hint_for_no_batch_1"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/unknown_output/1/model.savedmodel/saved_model.pb b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/hint_for_no_batch_2/1/model.savedmodel/saved_model.pb
similarity index 100%
rename from src/servables/tensorflow/testdata/savedmodel_autofill_sanity/unknown_output/1/model.savedmodel/saved_model.pb
rename to qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/hint_for_no_batch_2/1/model.savedmodel/saved_model.pb
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/hint_for_no_batch_2/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/hint_for_no_batch_2/config.pbtxt
new file mode 100644
index 0000000000..a4faf54369
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/hint_for_no_batch_2/config.pbtxt
@@ -0,0 +1,10 @@
+output [
+  {
+    name: "OUTPUT1"
+    dims: [ -1, 16 ]
+  },
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT8
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/hint_for_no_batch_2/expected b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/hint_for_no_batch_2/expected
new file mode 100644
index 0000000000..137a62f2c1
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/hint_for_no_batch_2/expected
@@ -0,0 +1,48 @@
+name: "hint_for_no_batch_2"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: -1
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: -1
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: -1
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: -1
+  dims: 16
+}
+instance_group {
+  name: "hint_for_no_batch_2"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/hint_for_no_batch_2/expected.1 b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/hint_for_no_batch_2/expected.1
new file mode 100644
index 0000000000..fd9da45fc4
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/hint_for_no_batch_2/expected.1
@@ -0,0 +1,48 @@
+name: "hint_for_no_batch_2"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: -1
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: -1
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: -1
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: -1
+  dims: 16
+}
+instance_group {
+  name: "hint_for_no_batch_2"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/hint_for_no_batch_2/expected.2 b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/hint_for_no_batch_2/expected.2
new file mode 100644
index 0000000000..efbb5a2a0c
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/hint_for_no_batch_2/expected.2
@@ -0,0 +1,48 @@
+name: "hint_for_no_batch_2"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: -1
+  dims: 16
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: -1
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: -1
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: -1
+  dims: 16
+}
+instance_group {
+  name: "hint_for_no_batch_2"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/hint_for_no_batch_2/expected.3 b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/hint_for_no_batch_2/expected.3
new file mode 100644
index 0000000000..27fa02d910
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/hint_for_no_batch_2/expected.3
@@ -0,0 +1,48 @@
+name: "hint_for_no_batch_2"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: -1
+  dims: 16
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: -1
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: -1
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: -1
+  dims: 16
+}
+instance_group {
+  name: "hint_for_no_batch_2"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/incomplete_input/1/model.savedmodel/saved_model.pb b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/incomplete_input/1/model.savedmodel/saved_model.pb
new file mode 100644
index 0000000000..a76abafbf7
Binary files /dev/null and b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/incomplete_input/1/model.savedmodel/saved_model.pb differ
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/incomplete_input/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/incomplete_input/config.pbtxt
new file mode 100644
index 0000000000..29ee883a4b
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/incomplete_input/config.pbtxt
@@ -0,0 +1,10 @@
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+  },
+  {
+    name: "INPUT1"
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/incomplete_input/expected b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/incomplete_input/expected
new file mode 100644
index 0000000000..42eb4b0821
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/incomplete_input/expected
@@ -0,0 +1,48 @@
+name: "incomplete_input"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "incomplete_input"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 4
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/incomplete_input/expected.1 b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/incomplete_input/expected.1
new file mode 100644
index 0000000000..c5925abf6b
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/incomplete_input/expected.1
@@ -0,0 +1,48 @@
+name: "incomplete_input"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "incomplete_input"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 4
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/incomplete_input/expected.2 b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/incomplete_input/expected.2
new file mode 100644
index 0000000000..0951a6ceaf
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/incomplete_input/expected.2
@@ -0,0 +1,48 @@
+name: "incomplete_input"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "incomplete_input"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 4
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/incomplete_input/expected.3 b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/incomplete_input/expected.3
new file mode 100644
index 0000000000..c2e88938bb
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/incomplete_input/expected.3
@@ -0,0 +1,48 @@
+name: "incomplete_input"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "incomplete_input"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 4
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/incomplete_output/1/model.savedmodel/saved_model.pb b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/incomplete_output/1/model.savedmodel/saved_model.pb
new file mode 100644
index 0000000000..a76abafbf7
Binary files /dev/null and b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/incomplete_output/1/model.savedmodel/saved_model.pb differ
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/incomplete_output/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/incomplete_output/config.pbtxt
new file mode 100644
index 0000000000..fa9cc35967
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/incomplete_output/config.pbtxt
@@ -0,0 +1,10 @@
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT8
+  },
+  {
+    name: "OUTPUT1"
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/incomplete_output/expected b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/incomplete_output/expected
new file mode 100644
index 0000000000..2e1f32882f
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/incomplete_output/expected
@@ -0,0 +1,48 @@
+name: "incomplete_output"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "incomplete_output"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 4
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/incomplete_output/expected.1 b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/incomplete_output/expected.1
new file mode 100644
index 0000000000..cf9d68e891
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/incomplete_output/expected.1
@@ -0,0 +1,48 @@
+name: "incomplete_output"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "incomplete_output"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 4
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/incomplete_output/expected.2 b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/incomplete_output/expected.2
new file mode 100644
index 0000000000..48deb2c7fe
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/incomplete_output/expected.2
@@ -0,0 +1,48 @@
+name: "incomplete_output"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "incomplete_output"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 4
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/incomplete_output/expected.3 b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/incomplete_output/expected.3
new file mode 100644
index 0000000000..c3f49cdfd7
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/incomplete_output/expected.3
@@ -0,0 +1,48 @@
+name: "incomplete_output"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "incomplete_output"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 4
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/kind_model_config/1/model.savedmodel/saved_model.pb b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/kind_model_config/1/model.savedmodel/saved_model.pb
new file mode 100644
index 0000000000..a76abafbf7
Binary files /dev/null and b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/kind_model_config/1/model.savedmodel/saved_model.pb differ
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/kind_model_config/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/kind_model_config/config.pbtxt
new file mode 100644
index 0000000000..78cc4480b8
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/kind_model_config/config.pbtxt
@@ -0,0 +1,3 @@
+instance_group {
+  kind: KIND_MODEL
+}
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/kind_model_config/expected b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/kind_model_config/expected
new file mode 100644
index 0000000000..7f1b142e3b
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/kind_model_config/expected
@@ -0,0 +1,47 @@
+name: "kind_model_config"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "kind_model_config_0"
+  count: 1
+  kind: KIND_MODEL
+}
+dynamic_batching {
+  preferred_batch_size: 4
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/kind_model_config/expected.1 b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/kind_model_config/expected.1
new file mode 100644
index 0000000000..61cfcc6a23
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/kind_model_config/expected.1
@@ -0,0 +1,47 @@
+name: "kind_model_config"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "kind_model_config_0"
+  count: 1
+  kind: KIND_MODEL
+}
+dynamic_batching {
+  preferred_batch_size: 4
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/kind_model_config/expected.2 b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/kind_model_config/expected.2
new file mode 100644
index 0000000000..4b0ddbeb8e
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/kind_model_config/expected.2
@@ -0,0 +1,47 @@
+name: "kind_model_config"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "kind_model_config_0"
+  count: 1
+  kind: KIND_MODEL
+}
+dynamic_batching {
+  preferred_batch_size: 4
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/kind_model_config/expected.3 b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/kind_model_config/expected.3
new file mode 100644
index 0000000000..abea687937
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/kind_model_config/expected.3
@@ -0,0 +1,47 @@
+name: "kind_model_config"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "kind_model_config_0"
+  count: 1
+  kind: KIND_MODEL
+}
+dynamic_batching {
+  preferred_batch_size: 4
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/max_batch_size_set/1/model.savedmodel/saved_model.pb b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/max_batch_size_set/1/model.savedmodel/saved_model.pb
new file mode 100644
index 0000000000..a76abafbf7
Binary files /dev/null and b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/max_batch_size_set/1/model.savedmodel/saved_model.pb differ
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/max_batch_size_set/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/max_batch_size_set/config.pbtxt
new file mode 100644
index 0000000000..1cf214cafe
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/max_batch_size_set/config.pbtxt
@@ -0,0 +1 @@
+max_batch_size: 8
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/max_batch_size_set/expected b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/max_batch_size_set/expected
new file mode 100644
index 0000000000..fcf0de4262
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/max_batch_size_set/expected
@@ -0,0 +1,48 @@
+name: "max_batch_size_set"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 8
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "max_batch_size_set"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 8
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/max_batch_size_set/expected.1 b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/max_batch_size_set/expected.1
new file mode 100644
index 0000000000..4b1dc1abd2
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/max_batch_size_set/expected.1
@@ -0,0 +1,48 @@
+name: "max_batch_size_set"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 8
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "max_batch_size_set"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 8
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/max_batch_size_set/expected.2 b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/max_batch_size_set/expected.2
new file mode 100644
index 0000000000..9acbbe3f12
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/max_batch_size_set/expected.2
@@ -0,0 +1,48 @@
+name: "max_batch_size_set"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 8
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "max_batch_size_set"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 8
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/max_batch_size_set/expected.3 b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/max_batch_size_set/expected.3
new file mode 100644
index 0000000000..e129508a01
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/max_batch_size_set/expected.3
@@ -0,0 +1,48 @@
+name: "max_batch_size_set"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 8
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "max_batch_size_set"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 8
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_config/1/model.savedmodel/saved_model.pb b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_config/1/model.savedmodel/saved_model.pb
new file mode 100644
index 0000000000..a76abafbf7
Binary files /dev/null and b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_config/1/model.savedmodel/saved_model.pb differ
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_config/expected b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_config/expected
new file mode 100644
index 0000000000..2250f91f71
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_config/expected
@@ -0,0 +1,48 @@
+name: "no_config"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "no_config"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 4
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_config/expected.1 b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_config/expected.1
new file mode 100644
index 0000000000..56c1221734
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_config/expected.1
@@ -0,0 +1,48 @@
+name: "no_config"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "no_config"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 4
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_config/expected.2 b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_config/expected.2
new file mode 100644
index 0000000000..30875b1998
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_config/expected.2
@@ -0,0 +1,48 @@
+name: "no_config"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "no_config"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 4
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_config/expected.3 b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_config/expected.3
new file mode 100644
index 0000000000..469b9aff76
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_config/expected.3
@@ -0,0 +1,48 @@
+name: "no_config"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 4
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "no_config"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 4
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/no_config_no_batch/1/vnetsavedmodel/saved_model.pb b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_config_no_batch/1/model.savedmodel/saved_model.pb
similarity index 100%
rename from src/servables/tensorflow/testdata/savedmodel_autofill_sanity/no_config_no_batch/1/vnetsavedmodel/saved_model.pb
rename to qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_config_no_batch/1/model.savedmodel/saved_model.pb
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_config_no_batch/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_config_no_batch/config.pbtxt
new file mode 100644
index 0000000000..5913902a76
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_config_no_batch/config.pbtxt
@@ -0,0 +1,5 @@
+instance_group [
+  {
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_config_no_batch/expected b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_config_no_batch/expected
new file mode 100644
index 0000000000..165300aa9b
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_config_no_batch/expected
@@ -0,0 +1,41 @@
+name: "no_config_no_batch"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+input {
+  name: "input"
+  data_type: TYPE_FP32
+  dims: 1
+  dims: 256
+  dims: 256
+  dims: 256
+  dims: 1
+}
+output {
+  name: "output"
+  data_type: TYPE_FP32
+  dims: 1
+  dims: 256
+  dims: 256
+  dims: 256
+  dims: 14
+}
+instance_group {
+  name: "no_config_no_batch_0"
+  count: 2
+  kind: KIND_CPU
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_name_platform/1/model.savedmodel/saved_model.pb b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_name_platform/1/model.savedmodel/saved_model.pb
new file mode 100644
index 0000000000..a76abafbf7
Binary files /dev/null and b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_name_platform/1/model.savedmodel/saved_model.pb differ
diff --git a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/no_name_platform/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_name_platform/config.pbtxt
similarity index 100%
rename from src/servables/tensorflow/testdata/savedmodel_autofill_sanity/no_name_platform/config.pbtxt
rename to qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_name_platform/config.pbtxt
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_name_platform/expected b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_name_platform/expected
new file mode 100644
index 0000000000..393000147a
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_name_platform/expected
@@ -0,0 +1,45 @@
+name: "no_name_platform"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 1
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "no_name_platform"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_name_platform/expected.1 b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_name_platform/expected.1
new file mode 100644
index 0000000000..1a9c47cca7
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_name_platform/expected.1
@@ -0,0 +1,45 @@
+name: "no_name_platform"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 1
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "no_name_platform"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_name_platform/expected.2 b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_name_platform/expected.2
new file mode 100644
index 0000000000..c47e51aeb3
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_name_platform/expected.2
@@ -0,0 +1,45 @@
+name: "no_name_platform"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 1
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "no_name_platform"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_name_platform/expected.3 b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_name_platform/expected.3
new file mode 100644
index 0000000000..42adbbf4d3
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/no_name_platform/expected.3
@@ -0,0 +1,45 @@
+name: "no_name_platform"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 1
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "no_name_platform"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/reshape_config_provided/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/reshape_config_provided/config.pbtxt
new file mode 100644
index 0000000000..95f67e119e
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/reshape_config_provided/config.pbtxt
@@ -0,0 +1,34 @@
+name: "reshape_config_provided"
+max_batch_size: 8
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+    reshape: { shape: [  ] }
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+    reshape: { shape: [  ] }
+  }
+]
+input [
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 8 ]
+    reshape: { shape: [ 4,1,2 ] }
+  }
+]
+output [
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 8 ]
+    reshape: { shape: [ 4,1,2 ] }
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/reshape_config_provided/expected b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/reshape_config_provided/expected
new file mode 100644
index 0000000000..4fd8a8edb9
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/reshape_config_provided/expected
@@ -0,0 +1,62 @@
+name: "reshape_config_provided"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 8
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 8
+  reshape {
+    shape: 4
+    shape: 1
+    shape: 2
+  }
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 1
+  reshape {
+  }
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 8
+  reshape {
+    shape: 4
+    shape: 1
+    shape: 2
+  }
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 1
+  reshape {
+  }
+}
+instance_group {
+  name: "reshape_config_provided"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 8
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/reshape_config_provided/expected.1 b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/reshape_config_provided/expected.1
new file mode 100644
index 0000000000..87d646c314
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/reshape_config_provided/expected.1
@@ -0,0 +1,62 @@
+name: "reshape_config_provided"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 8
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 1
+  reshape {
+  }
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 8
+  reshape {
+    shape: 4
+    shape: 1
+    shape: 2
+  }
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 8
+  reshape {
+    shape: 4
+    shape: 1
+    shape: 2
+  }
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 1
+  reshape {
+  }
+}
+instance_group {
+  name: "reshape_config_provided"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 8
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/reshape_config_provided/expected.2 b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/reshape_config_provided/expected.2
new file mode 100644
index 0000000000..3605cb1fc0
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/reshape_config_provided/expected.2
@@ -0,0 +1,62 @@
+name: "reshape_config_provided"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 8
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 8
+  reshape {
+    shape: 4
+    shape: 1
+    shape: 2
+  }
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 1
+  reshape {
+  }
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 1
+  reshape {
+  }
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 8
+  reshape {
+    shape: 4
+    shape: 1
+    shape: 2
+  }
+}
+instance_group {
+  name: "reshape_config_provided"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 8
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/reshape_config_provided/expected.3 b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/reshape_config_provided/expected.3
new file mode 100644
index 0000000000..c273096707
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/reshape_config_provided/expected.3
@@ -0,0 +1,62 @@
+name: "reshape_config_provided"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 8
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 1
+  reshape {
+  }
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 8
+  reshape {
+    shape: 4
+    shape: 1
+    shape: 2
+  }
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 1
+  reshape {
+  }
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 8
+  reshape {
+    shape: 4
+    shape: 1
+    shape: 2
+  }
+}
+instance_group {
+  name: "reshape_config_provided"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 8
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
\ No newline at end of file
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/too_few_inputs/1/model.savedmodel/saved_model.pb b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/too_few_inputs/1/model.savedmodel/saved_model.pb
new file mode 100644
index 0000000000..a76abafbf7
Binary files /dev/null and b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/too_few_inputs/1/model.savedmodel/saved_model.pb differ
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/too_few_inputs/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/too_few_inputs/config.pbtxt
new file mode 100644
index 0000000000..2814fb7e5c
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/too_few_inputs/config.pbtxt
@@ -0,0 +1,20 @@
+max_batch_size: 1
+input [
+  {
+    name: "INPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT8
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_INT8
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/too_few_inputs/expected b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/too_few_inputs/expected
new file mode 100644
index 0000000000..c41ed15143
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/too_few_inputs/expected
@@ -0,0 +1,45 @@
+name: "too_few_inputs"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 1
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "too_few_inputs"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/too_few_inputs/expected.1 b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/too_few_inputs/expected.1
new file mode 100644
index 0000000000..0a4b67356d
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/too_few_inputs/expected.1
@@ -0,0 +1,45 @@
+name: "too_few_inputs"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 1
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "too_few_inputs"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/too_few_inputs/expected.2 b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/too_few_inputs/expected.2
new file mode 100644
index 0000000000..626db7022b
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/too_few_inputs/expected.2
@@ -0,0 +1,45 @@
+name: "too_few_inputs"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 1
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "too_few_inputs"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/too_few_inputs/expected.3 b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/too_few_inputs/expected.3
new file mode 100644
index 0000000000..5c93813b17
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorflow_savedmodel/too_few_inputs/expected.3
@@ -0,0 +1,45 @@
+name: "too_few_inputs"
+platform: "tensorflow_savedmodel"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 1
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_INT32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT8
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_INT8
+  dims: 16
+}
+instance_group {
+  name: "too_few_inputs"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.savedmodel"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorflow"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorrt/empty_config/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/tensorrt/empty_config/config.pbtxt
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorrt/empty_config/expected b/qa/L0_model_config/autofill_noplatform_success/tensorrt/empty_config/expected
new file mode 100644
index 0000000000..9ddf6080a8
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorrt/empty_config/expected
@@ -0,0 +1,48 @@
+name: "empty_config"
+platform: "tensorrt_plan"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 8
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 16
+}
+instance_group {
+  name: "empty_config"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 8
+}
+default_model_filename: "model.plan"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorrt"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorrt/empty_config_variable/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/tensorrt/empty_config_variable/config.pbtxt
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorrt/empty_config_variable/expected b/qa/L0_model_config/autofill_noplatform_success/tensorrt/empty_config_variable/expected
new file mode 100644
index 0000000000..f5a6b625e9
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorrt/empty_config_variable/expected
@@ -0,0 +1,48 @@
+name: "empty_config_variable"
+platform: "tensorrt_plan"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 8
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: -1
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: -1
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: -1
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: -1
+}
+instance_group {
+  name: "empty_config_variable"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 8
+}
+default_model_filename: "model.plan"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorrt"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorrt/hint_for_no_batch/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/tensorrt/hint_for_no_batch/config.pbtxt
new file mode 100644
index 0000000000..50329ef203
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorrt/hint_for_no_batch/config.pbtxt
@@ -0,0 +1,21 @@
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1, 16 ]
+  },
+  {
+    name: "INPUT1"
+    dims: [ -1, 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    dims: [ -1, 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    dims: [ -1, 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorrt/hint_for_no_batch/expected b/qa/L0_model_config/autofill_noplatform_success/tensorrt/hint_for_no_batch/expected
new file mode 100644
index 0000000000..98f369d44e
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorrt/hint_for_no_batch/expected
@@ -0,0 +1,48 @@
+name: "hint_for_no_batch"
+platform: "tensorrt_plan"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: -1
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: -1
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: -1
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: -1
+  dims: 16
+}
+instance_group {
+  name: "hint_for_no_batch"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+default_model_filename: "model.plan"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorrt"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_input/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_input/config.pbtxt
new file mode 100644
index 0000000000..be47907359
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_input/config.pbtxt
@@ -0,0 +1,10 @@
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+  },
+  {
+    name: "INPUT1"
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_input/expected b/qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_input/expected
new file mode 100644
index 0000000000..24e41aabd2
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_input/expected
@@ -0,0 +1,48 @@
+name: "incomplete_input"
+platform: "tensorrt_plan"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 8
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 16
+}
+instance_group {
+  name: "incomplete_input"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 8
+}
+default_model_filename: "model.plan"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorrt"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_input/expected.1 b/qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_input/expected.1
new file mode 100644
index 0000000000..42071819df
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_input/expected.1
@@ -0,0 +1,48 @@
+name: "incomplete_input"
+platform: "tensorrt_plan"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 8
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 16
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 16
+}
+instance_group {
+  name: "incomplete_input"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 8
+}
+default_model_filename: "model.plan"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorrt"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_input/expected.2 b/qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_input/expected.2
new file mode 100644
index 0000000000..75c6f0941a
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_input/expected.2
@@ -0,0 +1,48 @@
+name: "incomplete_input"
+platform: "tensorrt_plan"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 8
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 16
+}
+instance_group {
+  name: "incomplete_input"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 8
+}
+default_model_filename: "model.plan"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorrt"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_input/expected.3 b/qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_input/expected.3
new file mode 100644
index 0000000000..1f9b17efc0
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_input/expected.3
@@ -0,0 +1,48 @@
+name: "incomplete_input"
+platform: "tensorrt_plan"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 8
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 16
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 16
+}
+instance_group {
+  name: "incomplete_input"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 8
+}
+default_model_filename: "model.plan"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorrt"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_output/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_output/config.pbtxt
new file mode 100644
index 0000000000..ebca692e95
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_output/config.pbtxt
@@ -0,0 +1,10 @@
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+  },
+  {
+    name: "OUTPUT1"
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_output/expected b/qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_output/expected
new file mode 100644
index 0000000000..d4e3d3e39c
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_output/expected
@@ -0,0 +1,48 @@
+name: "incomplete_output"
+platform: "tensorrt_plan"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 8
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 16
+}
+instance_group {
+  name: "incomplete_output"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 8
+}
+default_model_filename: "model.plan"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorrt"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_output/expected.1 b/qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_output/expected.1
new file mode 100644
index 0000000000..580a784fff
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_output/expected.1
@@ -0,0 +1,48 @@
+name: "incomplete_output"
+platform: "tensorrt_plan"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 8
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 16
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 16
+}
+instance_group {
+  name: "incomplete_output"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 8
+}
+default_model_filename: "model.plan"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorrt"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_output/expected.2 b/qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_output/expected.2
new file mode 100644
index 0000000000..de19771f7a
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_output/expected.2
@@ -0,0 +1,48 @@
+name: "incomplete_output"
+platform: "tensorrt_plan"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 8
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 16
+}
+instance_group {
+  name: "incomplete_output"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 8
+}
+default_model_filename: "model.plan"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorrt"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_output/expected.3 b/qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_output/expected.3
new file mode 100644
index 0000000000..e74c75cf26
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorrt/incomplete_output/expected.3
@@ -0,0 +1,48 @@
+name: "incomplete_output"
+platform: "tensorrt_plan"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 8
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 16
+}
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 16
+}
+instance_group {
+  name: "incomplete_output"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 8
+}
+default_model_filename: "model.plan"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorrt"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorrt/multi_prof_max_bs/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/tensorrt/multi_prof_max_bs/config.pbtxt
new file mode 100644
index 0000000000..c60296ee07
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorrt/multi_prof_max_bs/config.pbtxt
@@ -0,0 +1,17 @@
+instance_group [
+  {
+      profile: "0"
+  }
+]
+
+instance_group [
+  {
+      profile: "1"
+  }
+]
+
+instance_group [
+  {
+      profile: "2"
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorrt/multi_prof_max_bs/expected b/qa/L0_model_config/autofill_noplatform_success/tensorrt/multi_prof_max_bs/expected
new file mode 100644
index 0000000000..4e482e182d
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorrt/multi_prof_max_bs/expected
@@ -0,0 +1,63 @@
+name: "multi_prof_max_bs"
+platform: "tensorrt_plan"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 8
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: -1
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: -1
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: -1
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: -1
+}
+instance_group {
+  name: "multi_prof_max_bs_0"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+  profile: "0"
+}
+instance_group {
+  name: "multi_prof_max_bs_1"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+  profile: "1"
+}
+instance_group {
+  name: "multi_prof_max_bs_2"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+  profile: "2"
+}
+dynamic_batching {
+  preferred_batch_size: 8
+}
+default_model_filename: "model.plan"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorrt"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorrt/no_config/expected b/qa/L0_model_config/autofill_noplatform_success/tensorrt/no_config/expected
new file mode 100644
index 0000000000..80351a3b6e
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorrt/no_config/expected
@@ -0,0 +1,48 @@
+name: "no_config"
+platform: "tensorrt_plan"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 8
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 16
+}
+instance_group {
+  name: "no_config"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 8
+}
+default_model_filename: "model.plan"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorrt"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorrt/no_config_shape_tensor/expected b/qa/L0_model_config/autofill_noplatform_success/tensorrt/no_config_shape_tensor/expected
new file mode 100644
index 0000000000..d533da094a
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorrt/no_config_shape_tensor/expected
@@ -0,0 +1,52 @@
+name: "no_config_shape_tensor"
+platform: "tensorrt_plan"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 8
+input {
+  name: "INPUT0"
+  data_type: TYPE_INT32
+  dims: 2
+  is_shape_tensor: true
+}
+input {
+  name: "DUMMY_INPUT0"
+  data_type: TYPE_FP32
+  dims: -1
+  dims: -1
+}
+output {
+  name: "DUMMY_OUTPUT0"
+  data_type: TYPE_FP32
+  dims: -1
+  dims: -1
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_INT32
+  dims: 2
+  is_shape_tensor: true
+}
+instance_group {
+  name: "no_config_shape_tensor"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 8
+}
+default_model_filename: "model.plan"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorrt"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorrt/no_config_variable/expected b/qa/L0_model_config/autofill_noplatform_success/tensorrt/no_config_variable/expected
new file mode 100644
index 0000000000..7524614e05
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorrt/no_config_variable/expected
@@ -0,0 +1,48 @@
+name: "no_config_variable"
+platform: "tensorrt_plan"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 8
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: -1
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: -1
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: -1
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: -1
+}
+instance_group {
+  name: "no_config_variable"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 8
+}
+default_model_filename: "model.plan"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorrt"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorrt/no_name_platform/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/tensorrt/no_name_platform/config.pbtxt
new file mode 100644
index 0000000000..b922983950
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorrt/no_name_platform/config.pbtxt
@@ -0,0 +1,25 @@
+max_batch_size: 8
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorrt/no_name_platform/expected b/qa/L0_model_config/autofill_noplatform_success/tensorrt/no_name_platform/expected
new file mode 100644
index 0000000000..039f974c6b
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorrt/no_name_platform/expected
@@ -0,0 +1,48 @@
+name: "no_name_platform"
+platform: "tensorrt_plan"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 8
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 16
+}
+instance_group {
+  name: "no_name_platform"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 8
+}
+default_model_filename: "model.plan"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorrt"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorrt/no_name_platform_variable/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/tensorrt/no_name_platform_variable/config.pbtxt
new file mode 100644
index 0000000000..b922983950
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorrt/no_name_platform_variable/config.pbtxt
@@ -0,0 +1,25 @@
+max_batch_size: 8
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorrt/no_name_platform_variable/expected b/qa/L0_model_config/autofill_noplatform_success/tensorrt/no_name_platform_variable/expected
new file mode 100644
index 0000000000..3ec6b85a34
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorrt/no_name_platform_variable/expected
@@ -0,0 +1,48 @@
+name: "no_name_platform_variable"
+platform: "tensorrt_plan"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 8
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 16
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 16
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 16
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 16
+}
+instance_group {
+  name: "no_name_platform_variable"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 8
+}
+default_model_filename: "model.plan"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorrt"
+runtime: ""
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorrt/reshape_config_provided/config.pbtxt b/qa/L0_model_config/autofill_noplatform_success/tensorrt/reshape_config_provided/config.pbtxt
new file mode 100644
index 0000000000..b4a9aee896
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorrt/reshape_config_provided/config.pbtxt
@@ -0,0 +1,62 @@
+name: "reshape_config_provided"
+max_batch_size: 8
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4,4 ]
+    reshape: { shape: [ 2,2,4 ] }
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 2,2,4 ]
+  }
+]
+input [
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 2 ]
+    reshape: { shape: [ 1,2,1 ] }
+  }
+]
+output [
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 1,2,1 ]
+  }
+]
+input [
+  {
+    name: "INPUT2"
+    data_type: TYPE_FP32
+    dims: [ 2,2,3 ]
+    reshape: { shape: [ 3,2,2 ] }
+  }
+]
+output [
+  {
+    name: "OUTPUT2"
+    data_type: TYPE_FP32
+    dims: [ 3,2,2 ]
+  }
+]
+input [
+  {
+    name: "INPUT3"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+    reshape: { shape: [ 1,1,1 ] }
+  }
+]
+output [
+  {
+    name: "OUTPUT3"
+    data_type: TYPE_FP32
+    dims: [ 1,1,1 ]
+  }
+]
diff --git a/qa/L0_model_config/autofill_noplatform_success/tensorrt/reshape_config_provided/expected b/qa/L0_model_config/autofill_noplatform_success/tensorrt/reshape_config_provided/expected
new file mode 100644
index 0000000000..015b54111b
--- /dev/null
+++ b/qa/L0_model_config/autofill_noplatform_success/tensorrt/reshape_config_provided/expected
@@ -0,0 +1,99 @@
+name: "reshape_config_provided"
+platform: "tensorrt_plan"
+version_policy {
+  latest {
+    num_versions: 1
+  }
+}
+max_batch_size: 8
+input {
+  name: "INPUT0"
+  data_type: TYPE_FP32
+  dims: 4
+  dims: 4
+  reshape {
+    shape: 2
+    shape: 2
+    shape: 4
+  }
+}
+input {
+  name: "INPUT1"
+  data_type: TYPE_FP32
+  dims: 2
+  reshape {
+    shape: 1
+    shape: 2
+    shape: 1
+  }
+}
+input {
+  name: "INPUT2"
+  data_type: TYPE_FP32
+  dims: 2
+  dims: 2
+  dims: 3
+  reshape {
+    shape: 3
+    shape: 2
+    shape: 2
+  }
+}
+input {
+  name: "INPUT3"
+  data_type: TYPE_FP32
+  dims: 1
+  reshape {
+    shape: 1
+    shape: 1
+    shape: 1
+  }
+}
+output {
+  name: "OUTPUT0"
+  data_type: TYPE_FP32
+  dims: 2
+  dims: 2
+  dims: 4
+}
+output {
+  name: "OUTPUT1"
+  data_type: TYPE_FP32
+  dims: 1
+  dims: 2
+  dims: 1
+}
+output {
+  name: "OUTPUT2"
+  data_type: TYPE_FP32
+  dims: 3
+  dims: 2
+  dims: 2
+}
+output {
+  name: "OUTPUT3"
+  data_type: TYPE_FP32
+  dims: 1
+  dims: 1
+  dims: 1
+}
+instance_group {
+  name: "reshape_config_provided"
+  count: 1
+  gpus: 0
+  kind: KIND_GPU
+}
+dynamic_batching {
+  preferred_batch_size: 8
+}
+default_model_filename: "model.plan"
+optimization {
+  input_pinned_memory {
+    enable: true
+  }
+  output_pinned_memory {
+    enable: true
+  }
+}
+backend: "tensorrt"
+runtime: ""
diff --git a/qa/L0_model_config/cli_messages/cli_deprecation/expected b/qa/L0_model_config/cli_messages/cli_deprecation/expected
new file mode 100644
index 0000000000..3205f6a9c2
--- /dev/null
+++ b/qa/L0_model_config/cli_messages/cli_deprecation/expected
@@ -0,0 +1 @@
+Warning: '--strict-model-config' has been deprecated! Please use '--disable-auto-complete-config' instead.
\ No newline at end of file
diff --git a/qa/L0_model_config/cli_messages/cli_override/expected b/qa/L0_model_config/cli_messages/cli_override/expected
new file mode 100644
index 0000000000..51553c31ec
--- /dev/null
+++ b/qa/L0_model_config/cli_messages/cli_override/expected
@@ -0,0 +1 @@
+Warning: Overriding deprecated '--strict-model-config' from False to True in favor of '--disable-auto-complete-config'!
\ No newline at end of file
diff --git a/qa/L0_model_config/compare_status.py b/qa/L0_model_config/compare_status.py
new file mode 100755
index 0000000000..dbed05772a
--- /dev/null
+++ b/qa/L0_model_config/compare_status.py
@@ -0,0 +1,94 @@
+#!/usr/bin/env python3
+
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import json
+import os
+import sys
+
+import tritonclient.grpc as grpcclient
+import tritonclient.grpc.model_config_pb2 as mc
+import tritonclient.http as httpclient
+from google.protobuf import json_format, text_format
+from tritonclient.utils import *
+
+FLAGS = None
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--expected_dir",
+        type=str,
+        required=True,
+        help="Directory containing expected output files",
+    )
+    parser.add_argument("--model", type=str, required=True, help="Model name")
+    FLAGS, unparsed = parser.parse_known_args()
+
+    for pair in [("localhost:8000", "http"), ("localhost:8001", "grpc")]:
+        model_name = FLAGS.model
+        if pair[1] == "http":
+            triton_client = httpclient.InferenceServerClient(url=pair[0], verbose=False)
+            model_config = triton_client.get_model_config(model_name)
+        else:
+            triton_client = grpcclient.InferenceServerClient(url=pair[0], verbose=False)
+            model_config = triton_client.get_model_config(model_name)
+
+        nonmatch = list()
+        expected_files = [
+            f
+            for f in os.listdir(FLAGS.expected_dir)
+            if (
+                os.path.isfile(os.path.join(FLAGS.expected_dir, f))
+                and (f.startswith("expected"))
+            )
+        ]
+        for efile in expected_files:
+            with open(os.path.join(FLAGS.expected_dir, efile)) as f:
+                config = text_format.Parse(f.read(), mc.ModelConfig())
+
+            if pair[1] == "http":
+                config_json = json.loads(
+                    json_format.MessageToJson(config, preserving_proto_field_name=True)
+                )
+                if config_json == model_config:
+                    sys.exit(0)
+            else:
+                if config == model_config.config:
+                    sys.exit(0)
+
+        nonmatch.append(config)
+
+    print("Model config doesn't match any expected output:")
+    print("Model config:")
+    print(model_config)
+    for nm in nonmatch:
+        print("Non-matching:")
+        print(nm)
+
+    sys.exit(1)
diff --git a/qa/L0_model_config/noautofill_platform/batch_input_less_source0/config.pbtxt b/qa/L0_model_config/noautofill_platform/batch_input_less_source0/config.pbtxt
new file mode 100644
index 0000000000..2b784afc77
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_input_less_source0/config.pbtxt
@@ -0,0 +1,22 @@
+name: "batch_input_less_source0"
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+batch_input [
+  {
+    kind: BATCH_ELEMENT_COUNT
+    target_name: "BATCH_INPUT"
+    data_type: TYPE_FP32
+  }
+]
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/batch_input_less_source0/expected b/qa/L0_model_config/noautofill_platform/batch_input_less_source0/expected
new file mode 100644
index 0000000000..e96bc39270
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_input_less_source0/expected
@@ -0,0 +1 @@
+batch input kind 'BATCH_ELEMENT_COUNT' expects 1 source input, got 0
diff --git a/qa/L0_model_config/noautofill_platform/batch_input_less_source0/expected_unsupported b/qa/L0_model_config/noautofill_platform/batch_input_less_source0/expected_unsupported
new file mode 100644
index 0000000000..91ffcb84d7
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_input_less_source0/expected_unsupported
@@ -0,0 +1 @@
+batch inputs and batch outputs are only supported for custom platform and TensorRT platform
diff --git a/qa/L0_model_config/noautofill_platform/batch_input_less_source1/config.pbtxt b/qa/L0_model_config/noautofill_platform/batch_input_less_source1/config.pbtxt
new file mode 100644
index 0000000000..a7200b4d9c
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_input_less_source1/config.pbtxt
@@ -0,0 +1,22 @@
+name: "batch_input_less_source1"
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+batch_input [
+  {
+    kind: BATCH_ACCUMULATED_ELEMENT_COUNT
+    target_name: "BATCH_INPUT"
+    data_type: TYPE_FP32
+  }
+]
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/batch_input_less_source1/expected b/qa/L0_model_config/noautofill_platform/batch_input_less_source1/expected
new file mode 100644
index 0000000000..1bb8148409
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_input_less_source1/expected
@@ -0,0 +1 @@
+batch input kind 'BATCH_ACCUMULATED_ELEMENT_COUNT' expects 1 source input, got 0
diff --git a/qa/L0_model_config/noautofill_platform/batch_input_less_source1/expected_unsupported b/qa/L0_model_config/noautofill_platform/batch_input_less_source1/expected_unsupported
new file mode 100644
index 0000000000..91ffcb84d7
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_input_less_source1/expected_unsupported
@@ -0,0 +1 @@
+batch inputs and batch outputs are only supported for custom platform and TensorRT platform
diff --git a/qa/L0_model_config/noautofill_platform/batch_input_less_source2/config.pbtxt b/qa/L0_model_config/noautofill_platform/batch_input_less_source2/config.pbtxt
new file mode 100644
index 0000000000..cf13307e0a
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_input_less_source2/config.pbtxt
@@ -0,0 +1,22 @@
+name: "batch_input_less_source2"
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+batch_input [
+  {
+    kind: BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO
+    target_name: "BATCH_INPUT"
+    data_type: TYPE_FP32
+  }
+]
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/batch_input_less_source2/expected b/qa/L0_model_config/noautofill_platform/batch_input_less_source2/expected
new file mode 100644
index 0000000000..d379897ca2
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_input_less_source2/expected
@@ -0,0 +1 @@
+batch input kind 'BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO' expects 1 source input, got 0
diff --git a/qa/L0_model_config/noautofill_platform/batch_input_less_source2/expected_unsupported b/qa/L0_model_config/noautofill_platform/batch_input_less_source2/expected_unsupported
new file mode 100644
index 0000000000..91ffcb84d7
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_input_less_source2/expected_unsupported
@@ -0,0 +1 @@
+batch inputs and batch outputs are only supported for custom platform and TensorRT platform
diff --git a/qa/L0_model_config/noautofill_platform/batch_input_less_source3/config.pbtxt b/qa/L0_model_config/noautofill_platform/batch_input_less_source3/config.pbtxt
new file mode 100644
index 0000000000..68edf85715
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_input_less_source3/config.pbtxt
@@ -0,0 +1,22 @@
+name: "batch_input_less_source3"
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+batch_input [
+  {
+    kind: BATCH_MAX_ELEMENT_COUNT_AS_SHAPE
+    target_name: "BATCH_INPUT"
+    data_type: TYPE_FP32
+  }
+]
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/batch_input_less_source3/expected b/qa/L0_model_config/noautofill_platform/batch_input_less_source3/expected
new file mode 100644
index 0000000000..dde21a1ee0
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_input_less_source3/expected
@@ -0,0 +1 @@
+batch input kind 'BATCH_MAX_ELEMENT_COUNT_AS_SHAPE' expects 1 source input, got 0
diff --git a/qa/L0_model_config/noautofill_platform/batch_input_less_source3/expected_unsupported b/qa/L0_model_config/noautofill_platform/batch_input_less_source3/expected_unsupported
new file mode 100644
index 0000000000..91ffcb84d7
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_input_less_source3/expected_unsupported
@@ -0,0 +1 @@
+batch inputs and batch outputs are only supported for custom platform and TensorRT platform
diff --git a/qa/L0_model_config/noautofill_platform/batch_input_many_source0/config.pbtxt b/qa/L0_model_config/noautofill_platform/batch_input_many_source0/config.pbtxt
new file mode 100644
index 0000000000..d24e629a26
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_input_many_source0/config.pbtxt
@@ -0,0 +1,23 @@
+name: "batch_input_many_source0"
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+batch_input [
+  {
+    kind: BATCH_ELEMENT_COUNT
+    target_name: "BATCH_AND_SIZE_INPUT"
+    data_type: TYPE_FP32
+    source_input: ["INPUT", "INPUT"]
+  }
+]
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/batch_input_many_source0/expected b/qa/L0_model_config/noautofill_platform/batch_input_many_source0/expected
new file mode 100644
index 0000000000..36ec328e2b
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_input_many_source0/expected
@@ -0,0 +1 @@
+batch input kind 'BATCH_ELEMENT_COUNT' expects 1 source input, got 2
diff --git a/qa/L0_model_config/noautofill_platform/batch_input_many_source0/expected_unsupported b/qa/L0_model_config/noautofill_platform/batch_input_many_source0/expected_unsupported
new file mode 100644
index 0000000000..91ffcb84d7
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_input_many_source0/expected_unsupported
@@ -0,0 +1 @@
+batch inputs and batch outputs are only supported for custom platform and TensorRT platform
diff --git a/qa/L0_model_config/noautofill_platform/batch_input_many_source1/config.pbtxt b/qa/L0_model_config/noautofill_platform/batch_input_many_source1/config.pbtxt
new file mode 100644
index 0000000000..30805521e4
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_input_many_source1/config.pbtxt
@@ -0,0 +1,23 @@
+name: "batch_input_many_source1"
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+batch_input [
+  {
+    kind: BATCH_ACCUMULATED_ELEMENT_COUNT
+    target_name: "BATCH_AND_SIZE_INPUT"
+    data_type: TYPE_FP32
+    source_input: ["INPUT", "INPUT"]
+  }
+]
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/batch_input_many_source1/expected b/qa/L0_model_config/noautofill_platform/batch_input_many_source1/expected
new file mode 100644
index 0000000000..c193f9987c
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_input_many_source1/expected
@@ -0,0 +1 @@
+batch input kind 'BATCH_ACCUMULATED_ELEMENT_COUNT' expects 1 source input, got 2
diff --git a/qa/L0_model_config/noautofill_platform/batch_input_many_source1/expected_unsupported b/qa/L0_model_config/noautofill_platform/batch_input_many_source1/expected_unsupported
new file mode 100644
index 0000000000..91ffcb84d7
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_input_many_source1/expected_unsupported
@@ -0,0 +1 @@
+batch inputs and batch outputs are only supported for custom platform and TensorRT platform
diff --git a/qa/L0_model_config/noautofill_platform/batch_input_many_source2/config.pbtxt b/qa/L0_model_config/noautofill_platform/batch_input_many_source2/config.pbtxt
new file mode 100644
index 0000000000..683bbe7271
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_input_many_source2/config.pbtxt
@@ -0,0 +1,23 @@
+name: "batch_input_many_source2"
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+batch_input [
+  {
+    kind: BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO
+    target_name: "BATCH_AND_SIZE_INPUT"
+    data_type: TYPE_FP32
+    source_input: ["INPUT", "INPUT"]
+  }
+]
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/batch_input_many_source2/expected b/qa/L0_model_config/noautofill_platform/batch_input_many_source2/expected
new file mode 100644
index 0000000000..70db5c3352
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_input_many_source2/expected
@@ -0,0 +1 @@
+batch input kind 'BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO' expects 1 source input, got 2
diff --git a/qa/L0_model_config/noautofill_platform/batch_input_many_source2/expected_unsupported b/qa/L0_model_config/noautofill_platform/batch_input_many_source2/expected_unsupported
new file mode 100644
index 0000000000..91ffcb84d7
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_input_many_source2/expected_unsupported
@@ -0,0 +1 @@
+batch inputs and batch outputs are only supported for custom platform and TensorRT platform
diff --git a/qa/L0_model_config/noautofill_platform/batch_input_many_source3/config.pbtxt b/qa/L0_model_config/noautofill_platform/batch_input_many_source3/config.pbtxt
new file mode 100644
index 0000000000..2a96b00274
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_input_many_source3/config.pbtxt
@@ -0,0 +1,23 @@
+name: "batch_input_many_source3"
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+batch_input [
+  {
+    kind: BATCH_MAX_ELEMENT_COUNT_AS_SHAPE
+    target_name: "BATCH_AND_SIZE_INPUT"
+    data_type: TYPE_FP32
+    source_input: ["INPUT", "INPUT"]
+  }
+]
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/batch_input_many_source3/expected b/qa/L0_model_config/noautofill_platform/batch_input_many_source3/expected
new file mode 100644
index 0000000000..cf309c2a5a
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_input_many_source3/expected
@@ -0,0 +1 @@
+batch input kind 'BATCH_MAX_ELEMENT_COUNT_AS_SHAPE' expects 1 source input, got 2
diff --git a/qa/L0_model_config/noautofill_platform/batch_input_many_source3/expected_unsupported b/qa/L0_model_config/noautofill_platform/batch_input_many_source3/expected_unsupported
new file mode 100644
index 0000000000..91ffcb84d7
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_input_many_source3/expected_unsupported
@@ -0,0 +1 @@
+batch inputs and batch outputs are only supported for custom platform and TensorRT platform
diff --git a/qa/L0_model_config/noautofill_platform/batch_input_unknown_source/config.pbtxt b/qa/L0_model_config/noautofill_platform/batch_input_unknown_source/config.pbtxt
new file mode 100644
index 0000000000..dc1cd96637
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_input_unknown_source/config.pbtxt
@@ -0,0 +1,23 @@
+name: "batch_input_unknown_source"
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+batch_input [
+  {
+    kind: BATCH_ELEMENT_COUNT
+    target_name: "BATCH_INPUT"
+    data_type: TYPE_FP32
+    source_input: "UNKNOWN_INPUT"
+  }
+]
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/batch_input_unknown_source/expected b/qa/L0_model_config/noautofill_platform/batch_input_unknown_source/expected
new file mode 100644
index 0000000000..56262ba55f
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_input_unknown_source/expected
@@ -0,0 +1 @@
+unknown source input name 'UNKNOWN_INPUT'
diff --git a/qa/L0_model_config/noautofill_platform/batch_input_unknown_source/expected_unsupported b/qa/L0_model_config/noautofill_platform/batch_input_unknown_source/expected_unsupported
new file mode 100644
index 0000000000..91ffcb84d7
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_input_unknown_source/expected_unsupported
@@ -0,0 +1 @@
+batch inputs and batch outputs are only supported for custom platform and TensorRT platform
diff --git a/qa/L0_model_config/noautofill_platform/batch_output_duplicated_target/config.pbtxt b/qa/L0_model_config/noautofill_platform/batch_output_duplicated_target/config.pbtxt
new file mode 100644
index 0000000000..1024f717b7
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_output_duplicated_target/config.pbtxt
@@ -0,0 +1,22 @@
+name: "batch_output_duplicated_target"
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
+batch_output [
+  {
+    target_name: ["OUTPUT", "OUTPUT"]
+    kind: BATCH_SCATTER_WITH_INPUT_SHAPE
+    source_input: "INPUT"
+  }
+]
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/batch_output_duplicated_target/expected b/qa/L0_model_config/noautofill_platform/batch_output_duplicated_target/expected
new file mode 100644
index 0000000000..b4fa728bb9
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_output_duplicated_target/expected
@@ -0,0 +1 @@
+target output name 'OUTPUT' can only be specified once
diff --git a/qa/L0_model_config/noautofill_platform/batch_output_duplicated_target/expected_unsupported b/qa/L0_model_config/noautofill_platform/batch_output_duplicated_target/expected_unsupported
new file mode 100644
index 0000000000..91ffcb84d7
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_output_duplicated_target/expected_unsupported
@@ -0,0 +1 @@
+batch inputs and batch outputs are only supported for custom platform and TensorRT platform
diff --git a/qa/L0_model_config/noautofill_platform/batch_output_less_source/config.pbtxt b/qa/L0_model_config/noautofill_platform/batch_output_less_source/config.pbtxt
new file mode 100644
index 0000000000..8d9fd6c0e0
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_output_less_source/config.pbtxt
@@ -0,0 +1,21 @@
+name: "batch_output_less_source"
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
+batch_output [
+  {
+    target_name: "OUTPUT"
+    kind: BATCH_SCATTER_WITH_INPUT_SHAPE
+  }
+]
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/batch_output_less_source/expected b/qa/L0_model_config/noautofill_platform/batch_output_less_source/expected
new file mode 100644
index 0000000000..c30339511c
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_output_less_source/expected
@@ -0,0 +1 @@
+batch output kind 'BATCH_SCATTER_WITH_INPUT_SHAPE' expects 1 source input, got 0
diff --git a/qa/L0_model_config/noautofill_platform/batch_output_less_source/expected_unsupported b/qa/L0_model_config/noautofill_platform/batch_output_less_source/expected_unsupported
new file mode 100644
index 0000000000..91ffcb84d7
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_output_less_source/expected_unsupported
@@ -0,0 +1 @@
+batch inputs and batch outputs are only supported for custom platform and TensorRT platform
diff --git a/qa/L0_model_config/noautofill_platform/batch_output_many_source/config.pbtxt b/qa/L0_model_config/noautofill_platform/batch_output_many_source/config.pbtxt
new file mode 100644
index 0000000000..859e17f6d7
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_output_many_source/config.pbtxt
@@ -0,0 +1,22 @@
+name: "batch_output_many_source"
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
+batch_output [
+  {
+    target_name: "OUTPUT"
+    kind: BATCH_SCATTER_WITH_INPUT_SHAPE
+    source_input: ["INPUT", "INPUT"]
+  }
+]
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/batch_output_many_source/expected b/qa/L0_model_config/noautofill_platform/batch_output_many_source/expected
new file mode 100644
index 0000000000..fae05908e7
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_output_many_source/expected
@@ -0,0 +1 @@
+batch output kind 'BATCH_SCATTER_WITH_INPUT_SHAPE' expects 1 source input, got 2
diff --git a/qa/L0_model_config/noautofill_platform/batch_output_many_source/expected_unsupported b/qa/L0_model_config/noautofill_platform/batch_output_many_source/expected_unsupported
new file mode 100644
index 0000000000..91ffcb84d7
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_output_many_source/expected_unsupported
@@ -0,0 +1 @@
+batch inputs and batch outputs are only supported for custom platform and TensorRT platform
diff --git a/qa/L0_model_config/noautofill_platform/batch_output_unknown_source/config.pbtxt b/qa/L0_model_config/noautofill_platform/batch_output_unknown_source/config.pbtxt
new file mode 100644
index 0000000000..2e686f329d
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_output_unknown_source/config.pbtxt
@@ -0,0 +1,22 @@
+name: "batch_output_unknown_source"
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
+batch_output [
+  {
+    target_name: "OUTPUT"
+    kind: BATCH_SCATTER_WITH_INPUT_SHAPE
+    source_input: "UNKNOWN_INPUT"
+  }
+]
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/batch_output_unknown_source/expected b/qa/L0_model_config/noautofill_platform/batch_output_unknown_source/expected
new file mode 100644
index 0000000000..56262ba55f
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_output_unknown_source/expected
@@ -0,0 +1 @@
+unknown source input name 'UNKNOWN_INPUT'
diff --git a/qa/L0_model_config/noautofill_platform/batch_output_unknown_source/expected_unsupported b/qa/L0_model_config/noautofill_platform/batch_output_unknown_source/expected_unsupported
new file mode 100644
index 0000000000..91ffcb84d7
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_output_unknown_source/expected_unsupported
@@ -0,0 +1 @@
+batch inputs and batch outputs are only supported for custom platform and TensorRT platform
diff --git a/qa/L0_model_config/noautofill_platform/batch_output_unknown_target/config.pbtxt b/qa/L0_model_config/noautofill_platform/batch_output_unknown_target/config.pbtxt
new file mode 100644
index 0000000000..28dff8242d
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_output_unknown_target/config.pbtxt
@@ -0,0 +1,22 @@
+name: "batch_output_unknown_target"
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
+batch_output [
+  {
+    target_name: "UNKNOWN_OUTPUT"
+    kind: BATCH_SCATTER_WITH_INPUT_SHAPE
+    source_input: "INPUT"
+  }
+]
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/batch_output_unknown_target/expected b/qa/L0_model_config/noautofill_platform/batch_output_unknown_target/expected
new file mode 100644
index 0000000000..61f1d035e7
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_output_unknown_target/expected
@@ -0,0 +1 @@
+unknown target output name 'UNKNOWN_OUTPUT'
diff --git a/qa/L0_model_config/noautofill_platform/batch_output_unknown_target/expected_unsupported b/qa/L0_model_config/noautofill_platform/batch_output_unknown_target/expected_unsupported
new file mode 100644
index 0000000000..91ffcb84d7
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/batch_output_unknown_target/expected_unsupported
@@ -0,0 +1 @@
+batch inputs and batch outputs are only supported for custom platform and TensorRT platform
diff --git a/qa/L0_model_config/noautofill_platform/control_kind_end_multiple/config.pbtxt b/qa/L0_model_config/noautofill_platform/control_kind_end_multiple/config.pbtxt
new file mode 100644
index 0000000000..1fecf68202
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/control_kind_end_multiple/config.pbtxt
@@ -0,0 +1,57 @@
+name: "control_kind_end_multiple"
+platform: "tensorflow_savedmodel"
+max_batch_size: 8
+sequence_batching {
+  control_input [
+    {
+      name: "START"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_START
+          int32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "READY"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_READY
+          int32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "END0"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_END
+          int32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "END1"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_END
+          int32_false_true: [ 0, 1 ]
+        }
+      ]
+    }
+  ]
+}
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/control_kind_end_multiple/expected b/qa/L0_model_config/noautofill_platform/control_kind_end_multiple/expected
new file mode 100644
index 0000000000..403e602278
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/control_kind_end_multiple/expected
@@ -0,0 +1 @@
+sequence batching specifies multiple CONTROL_SEQUENCE_END tensors for control_kind_end_multiple
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/control_kind_end_multiple/expected_ensemble b/qa/L0_model_config/noautofill_platform/control_kind_end_multiple/expected_ensemble
new file mode 100644
index 0000000000..8afe2b20b8
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/control_kind_end_multiple/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble control_kind_end_multiple whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/control_kind_ready_multiple/config.pbtxt b/qa/L0_model_config/noautofill_platform/control_kind_ready_multiple/config.pbtxt
new file mode 100644
index 0000000000..82f35e2aa0
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/control_kind_ready_multiple/config.pbtxt
@@ -0,0 +1,48 @@
+name: "control_kind_ready_multiple"
+platform: "tensorflow_savedmodel"
+max_batch_size: 8
+sequence_batching {
+  control_input [
+    {
+      name: "START"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_START
+          int32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "READY0"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_READY
+          int32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "READY1"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_READY
+          int32_false_true: [ 0, 1 ]
+        }
+      ]
+    }
+  ]
+}
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/control_kind_ready_multiple/expected b/qa/L0_model_config/noautofill_platform/control_kind_ready_multiple/expected
new file mode 100644
index 0000000000..eb8ed9a38d
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/control_kind_ready_multiple/expected
@@ -0,0 +1 @@
+sequence batching specifies multiple CONTROL_SEQUENCE_READY tensors for control_kind_ready_multiple
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/control_kind_ready_multiple/expected_ensemble b/qa/L0_model_config/noautofill_platform/control_kind_ready_multiple/expected_ensemble
new file mode 100644
index 0000000000..b1fcdb1756
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/control_kind_ready_multiple/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble control_kind_ready_multiple whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/control_kind_start_multiple/config.pbtxt b/qa/L0_model_config/noautofill_platform/control_kind_start_multiple/config.pbtxt
new file mode 100644
index 0000000000..83ae70256e
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/control_kind_start_multiple/config.pbtxt
@@ -0,0 +1,39 @@
+name: "control_kind_start_multiple"
+platform: "tensorflow_savedmodel"
+max_batch_size: 8
+sequence_batching {
+  control_input [
+    {
+      name: "START"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_START
+          int32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "READY"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_START
+          int32_false_true: [ 0, 1 ]
+        }
+      ]
+    }
+  ]
+}
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/control_kind_start_multiple/expected b/qa/L0_model_config/noautofill_platform/control_kind_start_multiple/expected
new file mode 100644
index 0000000000..f55bb96195
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/control_kind_start_multiple/expected
@@ -0,0 +1 @@
+sequence batching specifies multiple CONTROL_SEQUENCE_START tensors for control_kind_start_multiple
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/control_kind_start_multiple/expected_ensemble b/qa/L0_model_config/noautofill_platform/control_kind_start_multiple/expected_ensemble
new file mode 100644
index 0000000000..160ba8e1f1
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/control_kind_start_multiple/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble control_kind_start_multiple whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/control_tensor_multiple/config.pbtxt b/qa/L0_model_config/noautofill_platform/control_tensor_multiple/config.pbtxt
new file mode 100644
index 0000000000..a9bcf6680f
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/control_tensor_multiple/config.pbtxt
@@ -0,0 +1,39 @@
+name: "control_tensor_multiple"
+platform: "tensorflow_savedmodel"
+max_batch_size: 8
+sequence_batching {
+  control_input [
+    {
+      name: "START"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_START
+          int32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "START"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_READY
+          int32_false_true: [ 0, 1 ]
+        }
+      ]
+    }
+  ]
+}
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/control_tensor_multiple/expected b/qa/L0_model_config/noautofill_platform/control_tensor_multiple/expected
new file mode 100644
index 0000000000..d62e3221ba
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/control_tensor_multiple/expected
@@ -0,0 +1 @@
+sequence batching control tensor 'START' is specified for multiple control kinds for control_tensor_multiple
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/control_tensor_multiple/expected_ensemble b/qa/L0_model_config/noautofill_platform/control_tensor_multiple/expected_ensemble
new file mode 100644
index 0000000000..f58f4da59f
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/control_tensor_multiple/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble control_tensor_multiple whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/control_tensor_no_value/config.pbtxt b/qa/L0_model_config/noautofill_platform/control_tensor_no_value/config.pbtxt
new file mode 100644
index 0000000000..a4763b6f59
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/control_tensor_no_value/config.pbtxt
@@ -0,0 +1,38 @@
+name: "control_tensor_no_value"
+platform: "tensorflow_savedmodel"
+max_batch_size: 8
+sequence_batching {
+  control_input [
+    {
+      name: "START"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_START
+          int32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "READY"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_READY
+        }
+      ]
+    }
+  ]
+}
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/control_tensor_no_value/expected b/qa/L0_model_config/noautofill_platform/control_tensor_no_value/expected
new file mode 100644
index 0000000000..1d14957565
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/control_tensor_no_value/expected
@@ -0,0 +1 @@
+sequence batching must specify either 'int32_false_true', 'fp32_false_true' or 'bool_false_true' for CONTROL_SEQUENCE_READY for control_tensor_no_value
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/control_tensor_no_value/expected_ensemble b/qa/L0_model_config/noautofill_platform/control_tensor_no_value/expected_ensemble
new file mode 100644
index 0000000000..ae9a8db64b
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/control_tensor_no_value/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble control_tensor_no_value whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/default_priority_level0/config.pbtxt b/qa/L0_model_config/noautofill_platform/default_priority_level0/config.pbtxt
new file mode 100644
index 0000000000..7c29e74637
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/default_priority_level0/config.pbtxt
@@ -0,0 +1,20 @@
+name: "default_priority_level0"
+platform: "tensorflow_savedmodel"
+max_batch_size: 8
+dynamic_batching {
+  priority_levels: 3
+}
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/default_priority_level0/expected b/qa/L0_model_config/noautofill_platform/default_priority_level0/expected
new file mode 100644
index 0000000000..c30877f08b
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/default_priority_level0/expected
@@ -0,0 +1 @@
+default priority level must be in range \[1, 3\] for default_priority_level0
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/default_priority_level0/expected_ensemble b/qa/L0_model_config/noautofill_platform/default_priority_level0/expected_ensemble
new file mode 100644
index 0000000000..95f1e1950e
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/default_priority_level0/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble default_priority_level0 whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/default_priority_level1/config.pbtxt b/qa/L0_model_config/noautofill_platform/default_priority_level1/config.pbtxt
new file mode 100644
index 0000000000..87cffd5c84
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/default_priority_level1/config.pbtxt
@@ -0,0 +1,21 @@
+name: "default_priority_level1"
+platform: "tensorflow_savedmodel"
+max_batch_size: 8
+dynamic_batching {
+  priority_levels: 3
+  default_priority_level: 5
+}
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/default_priority_level1/expected b/qa/L0_model_config/noautofill_platform/default_priority_level1/expected
new file mode 100644
index 0000000000..b1ff9725f4
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/default_priority_level1/expected
@@ -0,0 +1 @@
+default priority level must be in range \[1, 3\] for default_priority_level1
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/default_priority_level1/expected_ensemble b/qa/L0_model_config/noautofill_platform/default_priority_level1/expected_ensemble
new file mode 100644
index 0000000000..7d661dbad5
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/default_priority_level1/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble default_priority_level1 whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/ensemble_scheduling_set/config.pbtxt b/qa/L0_model_config/noautofill_platform/ensemble_scheduling_set/config.pbtxt
new file mode 100644
index 0000000000..21e7272482
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/ensemble_scheduling_set/config.pbtxt
@@ -0,0 +1,39 @@
+name: "ensemble_scheduling_set"
+max_batch_size: 8
+ensemble_scheduling {
+  step [
+    {
+      model_name: "model_a"
+      model_version: -1
+      input_map {
+        key: "model_a_input"
+        value: "data"
+      }
+      output_map {
+        key: "model_a_output"
+        value: "prob"
+      }
+    }
+  ]
+}
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    format: FORMAT_NCHW
+    dims: [ 1, 28, 28 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 10, 1, 1 ]
+  }
+]
+instance_group [
+  {
+    kind: KIND_GPU
+    gpus: [ 42 ]
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/ensemble_scheduling_set/expected b/qa/L0_model_config/noautofill_platform/ensemble_scheduling_set/expected
new file mode 100644
index 0000000000..615b25f02d
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/ensemble_scheduling_set/expected
@@ -0,0 +1 @@
+ensemble scheduling cannot be set for model 'ensemble_scheduling_set' whose platform is not ensemble
\ No newline at end of file
diff --git a/src/test/testdata/model_config_sanity/invalid_cpu/config.pbtxt b/qa/L0_model_config/noautofill_platform/invalid_cpu/config.pbtxt
similarity index 100%
rename from src/test/testdata/model_config_sanity/invalid_cpu/config.pbtxt
rename to qa/L0_model_config/noautofill_platform/invalid_cpu/config.pbtxt
diff --git a/qa/L0_model_config/noautofill_platform/invalid_cpu/expected b/qa/L0_model_config/noautofill_platform/invalid_cpu/expected
new file mode 100644
index 0000000000..6a9d6b6c5a
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/invalid_cpu/expected
@@ -0,0 +1 @@
+instance group invalid_cpu_0 of model invalid_cpu has kind KIND_CPU but specifies one or more GPU
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/invalid_cpu/expected_ensemble b/qa/L0_model_config/noautofill_platform/invalid_cpu/expected_ensemble
new file mode 100644
index 0000000000..efb14622f6
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/invalid_cpu/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble invalid_cpu whose platform is ensemble
\ No newline at end of file
diff --git a/src/test/testdata/model_config_sanity/invalid_gpu/config.pbtxt b/qa/L0_model_config/noautofill_platform/invalid_gpu/config.pbtxt
similarity index 100%
rename from src/test/testdata/model_config_sanity/invalid_gpu/config.pbtxt
rename to qa/L0_model_config/noautofill_platform/invalid_gpu/config.pbtxt
diff --git a/qa/L0_model_config/noautofill_platform/invalid_gpu/expected b/qa/L0_model_config/noautofill_platform/invalid_gpu/expected
new file mode 100644
index 0000000000..f679d70920
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/invalid_gpu/expected
@@ -0,0 +1 @@
+instance group invalid_gpu_0 of model invalid_gpu specifies invalid or unsupported gpu id 42. GPUs with at least the minimum required CUDA compute compatibility
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/invalid_gpu/expected_ensemble b/qa/L0_model_config/noautofill_platform/invalid_gpu/expected_ensemble
new file mode 100644
index 0000000000..ff29961e3c
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/invalid_gpu/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble invalid_gpu whose platform is ensemble
\ No newline at end of file
diff --git a/src/test/testdata/model_config_sanity/negative_gpu/config.pbtxt b/qa/L0_model_config/noautofill_platform/negative_gpu/config.pbtxt
similarity index 100%
rename from src/test/testdata/model_config_sanity/negative_gpu/config.pbtxt
rename to qa/L0_model_config/noautofill_platform/negative_gpu/config.pbtxt
diff --git a/qa/L0_model_config/noautofill_platform/negative_gpu/expected b/qa/L0_model_config/noautofill_platform/negative_gpu/expected
new file mode 100644
index 0000000000..ad134739c8
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/negative_gpu/expected
@@ -0,0 +1 @@
+instance group negative_gpu_1 of model negative_gpu specifies invalid or unsupported gpu id -1. GPUs with at least the minimum required CUDA compute compatibility
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/negative_gpu/expected_ensemble b/qa/L0_model_config/noautofill_platform/negative_gpu/expected_ensemble
new file mode 100644
index 0000000000..ba0aa8770f
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/negative_gpu/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble negative_gpu whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/negative_max_batch_size/config.pbtxt b/qa/L0_model_config/noautofill_platform/negative_max_batch_size/config.pbtxt
new file mode 100644
index 0000000000..11baf44450
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/negative_max_batch_size/config.pbtxt
@@ -0,0 +1,17 @@
+name: "negative_max_batch_size"
+max_batch_size: -2
+input [
+  {
+    name: "data"
+    data_type: TYPE_FP32
+    format: FORMAT_NCHW
+    dims: [ 1, 28, 28 ]
+  }
+]
+output [
+  {
+    name: "prob"
+    data_type: TYPE_FP32
+    dims: [ 10, 1, 1 ]
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/negative_max_batch_size/expected b/qa/L0_model_config/noautofill_platform/negative_max_batch_size/expected
new file mode 100644
index 0000000000..4d8ef43082
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/negative_max_batch_size/expected
@@ -0,0 +1 @@
+'max_batch_size' must be non-negative value for negative_max_batch_size
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/negative_max_batch_size/expected_ensemble b/qa/L0_model_config/noautofill_platform/negative_max_batch_size/expected_ensemble
new file mode 100644
index 0000000000..41bc3f0c25
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/negative_max_batch_size/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble negative_max_batch_size whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/preserve_ordering0/config.pbtxt b/qa/L0_model_config/noautofill_platform/preserve_ordering0/config.pbtxt
new file mode 100644
index 0000000000..3d7c0a6fd9
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/preserve_ordering0/config.pbtxt
@@ -0,0 +1,27 @@
+name: "preserve_ordering0"
+platform: "tensorflow_savedmodel"
+max_batch_size: 8
+dynamic_batching {
+  preserve_ordering: true
+  priority_levels: 3
+  default_priority_level: 2
+  priority_queue_policy {
+    key: 1
+    value: {
+    }
+  }
+}
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/preserve_ordering0/expected b/qa/L0_model_config/noautofill_platform/preserve_ordering0/expected
new file mode 100644
index 0000000000..4b1638966f
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/preserve_ordering0/expected
@@ -0,0 +1 @@
+Only one priority level is allowed when 'preserve_ordering' is true for preserve_ordering0
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/preserve_ordering0/expected_ensemble b/qa/L0_model_config/noautofill_platform/preserve_ordering0/expected_ensemble
new file mode 100644
index 0000000000..d1e724ef2d
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/preserve_ordering0/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble preserve_ordering0 whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/preserve_ordering1/config.pbtxt b/qa/L0_model_config/noautofill_platform/preserve_ordering1/config.pbtxt
new file mode 100644
index 0000000000..82a8948817
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/preserve_ordering1/config.pbtxt
@@ -0,0 +1,24 @@
+name: "preserve_ordering1"
+platform: "tensorflow_savedmodel"
+max_batch_size: 8
+dynamic_batching {
+  preserve_ordering: true
+  default_queue_policy {
+    timeout_action: DELAY
+    default_timeout_microseconds: 1000
+  }
+}
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/preserve_ordering1/expected b/qa/L0_model_config/noautofill_platform/preserve_ordering1/expected
new file mode 100644
index 0000000000..f9b014ad85
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/preserve_ordering1/expected
@@ -0,0 +1 @@
+Queue policy can not have DELAY as timeout action when 'preserve_ordering' is true for preserve_ordering1
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/preserve_ordering1/expected_ensemble b/qa/L0_model_config/noautofill_platform/preserve_ordering1/expected_ensemble
new file mode 100644
index 0000000000..695852a01f
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/preserve_ordering1/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble preserve_ordering1 whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/preserve_ordering2/config.pbtxt b/qa/L0_model_config/noautofill_platform/preserve_ordering2/config.pbtxt
new file mode 100644
index 0000000000..35ff408967
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/preserve_ordering2/config.pbtxt
@@ -0,0 +1,29 @@
+name: "preserve_ordering2"
+platform: "tensorflow_savedmodel"
+max_batch_size: 8
+dynamic_batching {
+  preserve_ordering: true
+  priority_levels: 1
+  default_priority_level: 1
+  priority_queue_policy {
+    key: 1
+    value: {
+      timeout_action: DELAY
+      default_timeout_microseconds: 1000
+    }
+  }
+}
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/preserve_ordering2/expected b/qa/L0_model_config/noautofill_platform/preserve_ordering2/expected
new file mode 100644
index 0000000000..5db30b45fd
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/preserve_ordering2/expected
@@ -0,0 +1 @@
+Queue policy can not have DELAY as timeout action when 'preserve_ordering' is true for preserve_ordering2
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/preserve_ordering2/expected_ensemble b/qa/L0_model_config/noautofill_platform/preserve_ordering2/expected_ensemble
new file mode 100644
index 0000000000..8c0ec8a2c2
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/preserve_ordering2/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble preserve_ordering2 whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/priority_level0/config.pbtxt b/qa/L0_model_config/noautofill_platform/priority_level0/config.pbtxt
new file mode 100644
index 0000000000..7167b043d2
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/priority_level0/config.pbtxt
@@ -0,0 +1,26 @@
+name: "priority_level0"
+platform: "tensorflow_savedmodel"
+max_batch_size: 8
+dynamic_batching {
+  priority_levels: 3
+  default_priority_level: 2
+  priority_queue_policy {
+    key: 0
+    value: {
+    }
+  }
+}
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/priority_level0/expected b/qa/L0_model_config/noautofill_platform/priority_level0/expected
new file mode 100644
index 0000000000..ae22d2f5b2
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/priority_level0/expected
@@ -0,0 +1 @@
+priority queue policy must have priority level in range \[1, 3\] for priority_level0
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/priority_level0/expected_ensemble b/qa/L0_model_config/noautofill_platform/priority_level0/expected_ensemble
new file mode 100644
index 0000000000..8fce1c7a95
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/priority_level0/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble priority_level0 whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/priority_level1/config.pbtxt b/qa/L0_model_config/noautofill_platform/priority_level1/config.pbtxt
new file mode 100644
index 0000000000..aa274bd9b3
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/priority_level1/config.pbtxt
@@ -0,0 +1,26 @@
+name: "priority_level1"
+platform: "tensorflow_savedmodel"
+max_batch_size: 8
+dynamic_batching {
+  priority_levels: 3
+  default_priority_level: 2
+  priority_queue_policy {
+    key: 4
+    value: {
+    }
+  }
+}
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/priority_level1/expected b/qa/L0_model_config/noautofill_platform/priority_level1/expected
new file mode 100644
index 0000000000..21b8ae0c66
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/priority_level1/expected
@@ -0,0 +1 @@
+priority queue policy must have priority level in range \[1, 3\] for priority_level1
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/priority_level1/expected_ensemble b/qa/L0_model_config/noautofill_platform/priority_level1/expected_ensemble
new file mode 100644
index 0000000000..ee97f91f68
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/priority_level1/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble priority_level1 whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_elementcount0/config.pbtxt b/qa/L0_model_config/noautofill_platform/reshape_elementcount0/config.pbtxt
new file mode 100644
index 0000000000..c89d9b7f54
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_elementcount0/config.pbtxt
@@ -0,0 +1,17 @@
+name: "reshape_elementcount0"
+max_batch_size: 4
+input [
+  {
+    name: "input"
+    data_type: TYPE_FP32
+    dims: [ 1, 3, 2 ]
+    reshape { shape: [ 5 ] }
+  }
+]
+output [
+  {
+    name: "output"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/reshape_elementcount0/expected b/qa/L0_model_config/noautofill_platform/reshape_elementcount0/expected
new file mode 100644
index 0000000000..54a6c4892c
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_elementcount0/expected
@@ -0,0 +1 @@
+model input has different size for dims and reshape for reshape_elementcount0
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_elementcount0/expected_ensemble b/qa/L0_model_config/noautofill_platform/reshape_elementcount0/expected_ensemble
new file mode 100644
index 0000000000..a615097e86
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_elementcount0/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble reshape_elementcount0 whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_elementcount1/config.pbtxt b/qa/L0_model_config/noautofill_platform/reshape_elementcount1/config.pbtxt
new file mode 100644
index 0000000000..90a32cb647
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_elementcount1/config.pbtxt
@@ -0,0 +1,17 @@
+name: "reshape_elementcount1"
+max_batch_size: 4
+input [
+  {
+    name: "input"
+    data_type: TYPE_FP32
+    dims: [ 15 ]
+    reshape { shape: [ 2, 1, 5 ] }
+  }
+]
+output [
+  {
+    name: "output"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/reshape_elementcount1/expected b/qa/L0_model_config/noautofill_platform/reshape_elementcount1/expected
new file mode 100644
index 0000000000..53d67b401f
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_elementcount1/expected
@@ -0,0 +1 @@
+model input has different size for dims and reshape for reshape_elementcount1
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_elementcount1/expected_ensemble b/qa/L0_model_config/noautofill_platform/reshape_elementcount1/expected_ensemble
new file mode 100644
index 0000000000..4a89e78a5a
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_elementcount1/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble reshape_elementcount1 whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_elementcount2/config.pbtxt b/qa/L0_model_config/noautofill_platform/reshape_elementcount2/config.pbtxt
new file mode 100644
index 0000000000..5268958e58
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_elementcount2/config.pbtxt
@@ -0,0 +1,17 @@
+name: "reshape_elementcount2"
+max_batch_size: 4
+input [
+  {
+    name: "input"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "output"
+    data_type: TYPE_FP32
+    dims: [ 1, 3, 2 ]
+    reshape { shape: [ 3 ] }
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/reshape_elementcount2/expected b/qa/L0_model_config/noautofill_platform/reshape_elementcount2/expected
new file mode 100644
index 0000000000..e8b9f5b1a5
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_elementcount2/expected
@@ -0,0 +1 @@
+model output has different size for dims and reshape for reshape_elementcount2
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_elementcount2/expected_ensemble b/qa/L0_model_config/noautofill_platform/reshape_elementcount2/expected_ensemble
new file mode 100644
index 0000000000..9bba6b5fdd
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_elementcount2/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble reshape_elementcount2 whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_elementcount3/config.pbtxt b/qa/L0_model_config/noautofill_platform/reshape_elementcount3/config.pbtxt
new file mode 100644
index 0000000000..587a96b8dc
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_elementcount3/config.pbtxt
@@ -0,0 +1,17 @@
+name: "reshape_elementcount3"
+max_batch_size: 4
+input [
+  {
+    name: "input"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "output"
+    data_type: TYPE_FP32
+    dims: [ 15 ]
+    reshape { shape: [ 3, 2, 5 ] }
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/reshape_elementcount3/expected b/qa/L0_model_config/noautofill_platform/reshape_elementcount3/expected
new file mode 100644
index 0000000000..e6c370775a
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_elementcount3/expected
@@ -0,0 +1 @@
+model output has different size for dims and reshape for reshape_elementcount3
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_elementcount3/expected_ensemble b/qa/L0_model_config/noautofill_platform/reshape_elementcount3/expected_ensemble
new file mode 100644
index 0000000000..82dac72f9f
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_elementcount3/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble reshape_elementcount3 whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_nobatch_empty0/config.pbtxt b/qa/L0_model_config/noautofill_platform/reshape_nobatch_empty0/config.pbtxt
new file mode 100644
index 0000000000..353eb7d4ca
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_nobatch_empty0/config.pbtxt
@@ -0,0 +1,17 @@
+name: "reshape_nobatch_empty0"
+max_batch_size: 0
+input [
+  {
+    name: "input"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+    reshape { shape: [ ] }
+  }
+]
+output [
+  {
+    name: "output"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/reshape_nobatch_empty0/expected b/qa/L0_model_config/noautofill_platform/reshape_nobatch_empty0/expected
new file mode 100644
index 0000000000..6c34e145ce
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_nobatch_empty0/expected
@@ -0,0 +1 @@
+model input cannot have empty reshape for non-batching model as scalar tensors are not supported for reshape_nobatch_empty0
diff --git a/qa/L0_model_config/noautofill_platform/reshape_nobatch_empty0/expected_ensemble b/qa/L0_model_config/noautofill_platform/reshape_nobatch_empty0/expected_ensemble
new file mode 100644
index 0000000000..bd9a79b78c
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_nobatch_empty0/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble reshape_nobatch_empty0 whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_nobatch_empty1/config.pbtxt b/qa/L0_model_config/noautofill_platform/reshape_nobatch_empty1/config.pbtxt
new file mode 100644
index 0000000000..e75e5c0d08
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_nobatch_empty1/config.pbtxt
@@ -0,0 +1,17 @@
+name: "reshape_nobatch_empty1"
+max_batch_size: 0
+input [
+  {
+    name: "input"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "output"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+    reshape { shape: [ ] }
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/reshape_nobatch_empty1/expected b/qa/L0_model_config/noautofill_platform/reshape_nobatch_empty1/expected
new file mode 100644
index 0000000000..6b3ad3a200
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_nobatch_empty1/expected
@@ -0,0 +1 @@
+model output cannot have empty reshape for non-batching model as scalar tensors are not supported for reshape_nobatch_empty1
diff --git a/qa/L0_model_config/noautofill_platform/reshape_nobatch_empty1/expected_ensemble b/qa/L0_model_config/noautofill_platform/reshape_nobatch_empty1/expected_ensemble
new file mode 100644
index 0000000000..57f7cd9477
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_nobatch_empty1/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble reshape_nobatch_empty1 whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable0/config.pbtxt b/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable0/config.pbtxt
new file mode 100644
index 0000000000..05e58808ce
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable0/config.pbtxt
@@ -0,0 +1,17 @@
+name: "reshape_nobatch_variable0"
+max_batch_size: 0
+input [
+  {
+    name: "input"
+    data_type: TYPE_FP32
+    dims: [ 2, -1 ]
+    reshape { shape: [ 2, 2 ] }
+  }
+]
+output [
+  {
+    name: "output"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable0/expected b/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable0/expected
new file mode 100644
index 0000000000..fee62787e2
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable0/expected
@@ -0,0 +1 @@
+model input has different size for dims and reshape for reshape_nobatch_variable0
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable0/expected_ensemble b/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable0/expected_ensemble
new file mode 100644
index 0000000000..83fad14994
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable0/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble reshape_nobatch_variable0 whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable1/config.pbtxt b/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable1/config.pbtxt
new file mode 100644
index 0000000000..0a821e956c
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable1/config.pbtxt
@@ -0,0 +1,17 @@
+name: "reshape_nobatch_variable1"
+max_batch_size: 0
+input [
+  {
+    name: "input"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "output"
+    data_type: TYPE_FP32
+    dims: [ 2, -1 ]
+    reshape { shape: [ 2, 2 ] }
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable1/expected b/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable1/expected
new file mode 100644
index 0000000000..1d26dc3b34
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable1/expected
@@ -0,0 +1 @@
+model output has different size for dims and reshape for reshape_nobatch_variable1
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable1/expected_ensemble b/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable1/expected_ensemble
new file mode 100644
index 0000000000..e4b07427c3
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable1/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble reshape_nobatch_variable1 whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable2/config.pbtxt b/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable2/config.pbtxt
new file mode 100644
index 0000000000..7fdf90005c
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable2/config.pbtxt
@@ -0,0 +1,17 @@
+name: "reshape_nobatch_variable2"
+max_batch_size: 0
+input [
+  {
+    name: "input"
+    data_type: TYPE_FP32
+    dims: [ 2, -1 ]
+    reshape { shape: [ 2, -1, 2 ] }
+  }
+]
+output [
+  {
+    name: "output"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable2/expected b/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable2/expected
new file mode 100644
index 0000000000..e9b22cdbb4
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable2/expected
@@ -0,0 +1 @@
+model input has different size for dims and reshape for reshape_nobatch_variable2
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable2/expected_ensemble b/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable2/expected_ensemble
new file mode 100644
index 0000000000..7dae5f5eb2
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable2/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble reshape_nobatch_variable2 whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable3/config.pbtxt b/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable3/config.pbtxt
new file mode 100644
index 0000000000..052262b68d
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable3/config.pbtxt
@@ -0,0 +1,17 @@
+name: "reshape_nobatch_variable3"
+max_batch_size: 0
+input [
+  {
+    name: "input"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "output"
+    data_type: TYPE_FP32
+    dims: [ 2, -1 ]
+    reshape { shape: [ 1, -1, 2 ] }
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable3/expected b/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable3/expected
new file mode 100644
index 0000000000..47a1e6e1fa
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable3/expected
@@ -0,0 +1 @@
+model output has different size for dims and reshape for reshape_nobatch_variable3
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable3/expected_ensemble b/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable3/expected_ensemble
new file mode 100644
index 0000000000..bd5536b49e
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable3/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble reshape_nobatch_variable3 whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable4/config.pbtxt b/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable4/config.pbtxt
new file mode 100644
index 0000000000..206567621c
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable4/config.pbtxt
@@ -0,0 +1,17 @@
+name: "reshape_nobatch_variable4"
+max_batch_size: 0
+input [
+  {
+    name: "input"
+    data_type: TYPE_FP32
+    dims: [ 2, -1 ]
+    reshape { shape: [ 2, -1, -1 ] }
+  }
+]
+output [
+  {
+    name: "output"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable4/expected b/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable4/expected
new file mode 100644
index 0000000000..f4032ae225
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable4/expected
@@ -0,0 +1 @@
+model input has different number of variable-size dimensions for dims and reshape for reshape_nobatch_variable4
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable4/expected_ensemble b/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable4/expected_ensemble
new file mode 100644
index 0000000000..9c768459fd
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable4/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble reshape_nobatch_variable4 whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable5/config.pbtxt b/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable5/config.pbtxt
new file mode 100644
index 0000000000..f7eb036612
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable5/config.pbtxt
@@ -0,0 +1,17 @@
+name: "reshape_nobatch_variable5"
+max_batch_size: 0
+input [
+  {
+    name: "input"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "output"
+    data_type: TYPE_FP32
+    dims: [ 2, -1 ]
+    reshape { shape: [ 2, -1, -1 ] }
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable5/expected b/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable5/expected
new file mode 100644
index 0000000000..6a92d51fba
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable5/expected
@@ -0,0 +1 @@
+model output has different number of variable-size dimensions for dims and reshape for reshape_nobatch_variable5
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable5/expected_ensemble b/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable5/expected_ensemble
new file mode 100644
index 0000000000..fcc60c4ec7
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_nobatch_variable5/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble reshape_nobatch_variable5 whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_nobatch_zerodims0/config.pbtxt b/qa/L0_model_config/noautofill_platform/reshape_nobatch_zerodims0/config.pbtxt
new file mode 100644
index 0000000000..5a574ed09f
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_nobatch_zerodims0/config.pbtxt
@@ -0,0 +1,17 @@
+name: "reshape_nobatch_zerodims0"
+max_batch_size: 0
+input [
+  {
+    name: "input"
+    data_type: TYPE_FP32
+    dims: [ 2, 2 ]
+    reshape { shape: [ 0 ] }
+  }
+]
+output [
+  {
+    name: "output"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/reshape_nobatch_zerodims0/expected b/qa/L0_model_config/noautofill_platform/reshape_nobatch_zerodims0/expected
new file mode 100644
index 0000000000..075bbf458b
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_nobatch_zerodims0/expected
@@ -0,0 +1 @@
+model input reshape dimensions must be integer >= 1, or -1 to indicate a variable-size dimension for reshape_nobatch_zerodims0
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_nobatch_zerodims0/expected_ensemble b/qa/L0_model_config/noautofill_platform/reshape_nobatch_zerodims0/expected_ensemble
new file mode 100644
index 0000000000..1605054be0
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_nobatch_zerodims0/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble reshape_nobatch_zerodims0 whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_nobatch_zerodims1/config.pbtxt b/qa/L0_model_config/noautofill_platform/reshape_nobatch_zerodims1/config.pbtxt
new file mode 100644
index 0000000000..0e701ac6db
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_nobatch_zerodims1/config.pbtxt
@@ -0,0 +1,17 @@
+name: "reshape_nobatch_zerodims1"
+max_batch_size: 0
+input [
+  {
+    name: "input"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "output"
+    data_type: TYPE_FP32
+    dims: [ 2, 2 ]
+    reshape { shape: [ 0 ] }
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/reshape_nobatch_zerodims1/expected b/qa/L0_model_config/noautofill_platform/reshape_nobatch_zerodims1/expected
new file mode 100644
index 0000000000..7441fbddf9
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_nobatch_zerodims1/expected
@@ -0,0 +1 @@
+model output reshape dimensions must be integer >= 1, or -1 to indicate a variable-size dimension for reshape_nobatch_zerodims1
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_nobatch_zerodims1/expected_ensemble b/qa/L0_model_config/noautofill_platform/reshape_nobatch_zerodims1/expected_ensemble
new file mode 100644
index 0000000000..7859d26451
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_nobatch_zerodims1/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble reshape_nobatch_zerodims1 whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_variable0/config.pbtxt b/qa/L0_model_config/noautofill_platform/reshape_variable0/config.pbtxt
new file mode 100644
index 0000000000..1e34af076b
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_variable0/config.pbtxt
@@ -0,0 +1,17 @@
+name: "reshape_variable0"
+max_batch_size: 4
+input [
+  {
+    name: "input"
+    data_type: TYPE_FP32
+    dims: [ 2, -1 ]
+    reshape { shape: [ 2, 2 ] }
+  }
+]
+output [
+  {
+    name: "output"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/reshape_variable0/expected b/qa/L0_model_config/noautofill_platform/reshape_variable0/expected
new file mode 100644
index 0000000000..4002be255b
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_variable0/expected
@@ -0,0 +1 @@
+model input has different size for dims and reshape for reshape_variable0
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_variable0/expected_ensemble b/qa/L0_model_config/noautofill_platform/reshape_variable0/expected_ensemble
new file mode 100644
index 0000000000..6c9a6a26ce
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_variable0/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble reshape_variable0 whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_variable1/config.pbtxt b/qa/L0_model_config/noautofill_platform/reshape_variable1/config.pbtxt
new file mode 100644
index 0000000000..7c6b9d7002
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_variable1/config.pbtxt
@@ -0,0 +1,17 @@
+name: "reshape_variable1"
+max_batch_size: 4
+input [
+  {
+    name: "input"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "output"
+    data_type: TYPE_FP32
+    dims: [ 2, -1 ]
+    reshape { shape: [ 2, 2 ] }
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/reshape_variable1/expected b/qa/L0_model_config/noautofill_platform/reshape_variable1/expected
new file mode 100644
index 0000000000..1bb65078e9
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_variable1/expected
@@ -0,0 +1 @@
+model output has different size for dims and reshape for reshape_variable1
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_variable1/expected_ensemble b/qa/L0_model_config/noautofill_platform/reshape_variable1/expected_ensemble
new file mode 100644
index 0000000000..ba3d55612f
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_variable1/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble reshape_variable1 whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_variable2/config.pbtxt b/qa/L0_model_config/noautofill_platform/reshape_variable2/config.pbtxt
new file mode 100644
index 0000000000..0d89113b1a
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_variable2/config.pbtxt
@@ -0,0 +1,17 @@
+name: "reshape_variable2"
+max_batch_size: 4
+input [
+  {
+    name: "input"
+    data_type: TYPE_FP32
+    dims: [ 2, -1 ]
+    reshape { shape: [ 2, -1, 2 ] }
+  }
+]
+output [
+  {
+    name: "output"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/reshape_variable2/expected b/qa/L0_model_config/noautofill_platform/reshape_variable2/expected
new file mode 100644
index 0000000000..c554e390aa
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_variable2/expected
@@ -0,0 +1 @@
+model input has different size for dims and reshape for reshape_variable2
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_variable2/expected_ensemble b/qa/L0_model_config/noautofill_platform/reshape_variable2/expected_ensemble
new file mode 100644
index 0000000000..d81ab1a703
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_variable2/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble reshape_variable2 whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_variable3/config.pbtxt b/qa/L0_model_config/noautofill_platform/reshape_variable3/config.pbtxt
new file mode 100644
index 0000000000..48b28aab70
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_variable3/config.pbtxt
@@ -0,0 +1,17 @@
+name: "reshape_variable3"
+max_batch_size: 4
+input [
+  {
+    name: "input"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "output"
+    data_type: TYPE_FP32
+    dims: [ 2, -1 ]
+    reshape { shape: [ 1, -1, 2 ] }
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/reshape_variable3/expected b/qa/L0_model_config/noautofill_platform/reshape_variable3/expected
new file mode 100644
index 0000000000..d9c1e23e4f
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_variable3/expected
@@ -0,0 +1 @@
+model output has different size for dims and reshape for reshape_variable3
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_variable3/expected_ensemble b/qa/L0_model_config/noautofill_platform/reshape_variable3/expected_ensemble
new file mode 100644
index 0000000000..7f7c79c695
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_variable3/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble reshape_variable3 whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_variable4/config.pbtxt b/qa/L0_model_config/noautofill_platform/reshape_variable4/config.pbtxt
new file mode 100644
index 0000000000..71f2d71221
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_variable4/config.pbtxt
@@ -0,0 +1,17 @@
+name: "reshape_variable4"
+max_batch_size: 4
+input [
+  {
+    name: "input"
+    data_type: TYPE_FP32
+    dims: [ 2, -1 ]
+    reshape { shape: [ 2, -1, -1 ] }
+  }
+]
+output [
+  {
+    name: "output"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/reshape_variable4/expected b/qa/L0_model_config/noautofill_platform/reshape_variable4/expected
new file mode 100644
index 0000000000..bca6bbe173
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_variable4/expected
@@ -0,0 +1 @@
+model input has different number of variable-size dimensions for dims and reshape for reshape_variable4
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_variable4/expected_ensemble b/qa/L0_model_config/noautofill_platform/reshape_variable4/expected_ensemble
new file mode 100644
index 0000000000..5bffc461a6
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_variable4/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble reshape_variable4 whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_variable5/config.pbtxt b/qa/L0_model_config/noautofill_platform/reshape_variable5/config.pbtxt
new file mode 100644
index 0000000000..cf4dbe3b5e
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_variable5/config.pbtxt
@@ -0,0 +1,17 @@
+name: "reshape_variable5"
+max_batch_size: 4
+input [
+  {
+    name: "input"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "output"
+    data_type: TYPE_FP32
+    dims: [ 2, -1 ]
+    reshape { shape: [ 2, -1, -1 ] }
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/reshape_variable5/expected b/qa/L0_model_config/noautofill_platform/reshape_variable5/expected
new file mode 100644
index 0000000000..77d6f7fc45
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_variable5/expected
@@ -0,0 +1 @@
+model output has different number of variable-size dimensions for dims and reshape for reshape_variable5
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_variable5/expected_ensemble b/qa/L0_model_config/noautofill_platform/reshape_variable5/expected_ensemble
new file mode 100644
index 0000000000..75b2e73278
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_variable5/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble reshape_variable5 whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_zerodims0/config.pbtxt b/qa/L0_model_config/noautofill_platform/reshape_zerodims0/config.pbtxt
new file mode 100644
index 0000000000..49365e0dd9
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_zerodims0/config.pbtxt
@@ -0,0 +1,17 @@
+name: "reshape_zerodims0"
+max_batch_size: 4
+input [
+  {
+    name: "input"
+    data_type: TYPE_FP32
+    dims: [ 2, 2 ]
+    reshape { shape: [ 0 ] }
+  }
+]
+output [
+  {
+    name: "output"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/reshape_zerodims0/expected b/qa/L0_model_config/noautofill_platform/reshape_zerodims0/expected
new file mode 100644
index 0000000000..fb81e88f02
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_zerodims0/expected
@@ -0,0 +1 @@
+model input reshape dimensions must be integer >= 1, or -1 to indicate a variable-size dimension for reshape_zerodims0
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_zerodims0/expected_ensemble b/qa/L0_model_config/noautofill_platform/reshape_zerodims0/expected_ensemble
new file mode 100644
index 0000000000..fb2078fb98
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_zerodims0/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble reshape_zerodims0 whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_zerodims1/config.pbtxt b/qa/L0_model_config/noautofill_platform/reshape_zerodims1/config.pbtxt
new file mode 100644
index 0000000000..f5b6078810
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_zerodims1/config.pbtxt
@@ -0,0 +1,17 @@
+name: "reshape_zerodims1"
+max_batch_size: 4
+input [
+  {
+    name: "input"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "output"
+    data_type: TYPE_FP32
+    dims: [ 2, 2 ]
+    reshape { shape: [ 0 ] }
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/reshape_zerodims1/expected b/qa/L0_model_config/noautofill_platform/reshape_zerodims1/expected
new file mode 100644
index 0000000000..dfc3b6f6b7
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_zerodims1/expected
@@ -0,0 +1 @@
+model output reshape dimensions must be integer >= 1, or -1 to indicate a variable-size dimension for reshape_zerodims1
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/reshape_zerodims1/expected_ensemble b/qa/L0_model_config/noautofill_platform/reshape_zerodims1/expected_ensemble
new file mode 100644
index 0000000000..991bdc9f63
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/reshape_zerodims1/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble reshape_zerodims1 whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/zerodims_input0/config.pbtxt b/qa/L0_model_config/noautofill_platform/zerodims_input0/config.pbtxt
new file mode 100644
index 0000000000..e6e392d661
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/zerodims_input0/config.pbtxt
@@ -0,0 +1,16 @@
+name: "zerodims_input0"
+max_batch_size: 8
+input [
+  {
+    name: "input"
+    data_type: TYPE_FP32
+    dims: [ 1, 0, 28 ]
+  }
+]
+output [
+  {
+    name: "output"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/zerodims_input0/expected b/qa/L0_model_config/noautofill_platform/zerodims_input0/expected
new file mode 100644
index 0000000000..d2e3232def
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/zerodims_input0/expected
@@ -0,0 +1 @@
+model input dimension must be integer >= 1, or -1 to indicate a variable-size dimension for zerodims_input0
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/zerodims_input0/expected_ensemble b/qa/L0_model_config/noautofill_platform/zerodims_input0/expected_ensemble
new file mode 100644
index 0000000000..951c9f0f97
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/zerodims_input0/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble zerodims_input0 whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/zerodims_input1/config.pbtxt b/qa/L0_model_config/noautofill_platform/zerodims_input1/config.pbtxt
new file mode 100644
index 0000000000..bab31346ad
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/zerodims_input1/config.pbtxt
@@ -0,0 +1,16 @@
+name: "zerodims_input1"
+max_batch_size: 8
+input [
+  {
+    name: "input"
+    data_type: TYPE_FP32
+    dims: [ 0 ]
+  }
+]
+output [
+  {
+    name: "output"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/zerodims_input1/expected b/qa/L0_model_config/noautofill_platform/zerodims_input1/expected
new file mode 100644
index 0000000000..b03c3f0498
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/zerodims_input1/expected
@@ -0,0 +1 @@
+model input dimension must be integer >= 1, or -1 to indicate a variable-size dimension for zerodims_input1
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/zerodims_input1/expected_ensemble b/qa/L0_model_config/noautofill_platform/zerodims_input1/expected_ensemble
new file mode 100644
index 0000000000..0e80f84ef0
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/zerodims_input1/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble zerodims_input1 whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/zerodims_output0/config.pbtxt b/qa/L0_model_config/noautofill_platform/zerodims_output0/config.pbtxt
new file mode 100644
index 0000000000..f7d0a3cc7d
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/zerodims_output0/config.pbtxt
@@ -0,0 +1,16 @@
+name: "zerodims_output0"
+max_batch_size: 8
+input [
+  {
+    name: "input"
+    data_type: TYPE_FP32
+    dims: [ 1, 28 ]
+  }
+]
+output [
+  {
+    name: "output"
+    data_type: TYPE_FP32
+    dims: [ 1, 1, 0 ]
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/zerodims_output0/expected b/qa/L0_model_config/noautofill_platform/zerodims_output0/expected
new file mode 100644
index 0000000000..25581bd3be
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/zerodims_output0/expected
@@ -0,0 +1 @@
+model output dimension must be integer >= 1, or -1 to indicate a variable-size dimension for zerodims_output0
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/zerodims_output0/expected_ensemble b/qa/L0_model_config/noautofill_platform/zerodims_output0/expected_ensemble
new file mode 100644
index 0000000000..d2c5d0c5f2
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/zerodims_output0/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble zerodims_output0 whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/zerodims_output1/config.pbtxt b/qa/L0_model_config/noautofill_platform/zerodims_output1/config.pbtxt
new file mode 100644
index 0000000000..5346e0794e
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/zerodims_output1/config.pbtxt
@@ -0,0 +1,16 @@
+name: "zerodims_output1"
+max_batch_size: 8
+input [
+  {
+    name: "input"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "output"
+    data_type: TYPE_FP32
+    dims: [ 0 ]
+  }
+]
diff --git a/qa/L0_model_config/noautofill_platform/zerodims_output1/expected b/qa/L0_model_config/noautofill_platform/zerodims_output1/expected
new file mode 100644
index 0000000000..664cc8b26d
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/zerodims_output1/expected
@@ -0,0 +1 @@
+model output dimension must be integer >= 1, or -1 to indicate a variable-size dimension for zerodims_output1
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_platform/zerodims_output1/expected_ensemble b/qa/L0_model_config/noautofill_platform/zerodims_output1/expected_ensemble
new file mode 100644
index 0000000000..92554ebcbd
--- /dev/null
+++ b/qa/L0_model_config/noautofill_platform/zerodims_output1/expected_ensemble
@@ -0,0 +1 @@
+ensemble scheduling must be set for ensemble zerodims_output1 whose platform is ensemble
\ No newline at end of file
diff --git a/qa/L0_model_config/noautofill_test.py b/qa/L0_model_config/noautofill_test.py
new file mode 100755
index 0000000000..d89e306eb8
--- /dev/null
+++ b/qa/L0_model_config/noautofill_test.py
@@ -0,0 +1,62 @@
+#!/usr/bin/python
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import unittest
+
+import test_util as tu
+import tritonclient.http as httpclient
+from tritonclient.utils import InferenceServerException
+
+
+class NoAutoFillTest(tu.TestResultCollector):
+    def setUp(self):
+        self._model_name = "noautofill_noconfig"
+        self._triton_client = httpclient.InferenceServerClient("localhost:8000")
+
+    def tearDown(self):
+        self._triton_client.unload_model(self._model_name)
+
+    def test_load_no_autofill_model_with_config(self):
+        config = '{"max_batch_size":"16"}'
+        self._triton_client.load_model(self._model_name, config=config)
+
+        # Check if the model config is correct
+        model_config = self._triton_client.get_model_config(self._model_name)
+        self.assertEqual(model_config["max_batch_size"], 16)
+
+    def test_load_no_autofill_model_with_no_config(self):
+        with self.assertRaises(InferenceServerException) as ex:
+            self._triton_client.load_model(self._model_name)
+        self.assertIn("model configuration is not provided", str(ex.exception))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_model_config/special_cases/invalid_platform/config.pbtxt b/qa/L0_model_config/special_cases/invalid_platform/config.pbtxt
new file mode 100644
index 0000000000..6cdb34f1c0
--- /dev/null
+++ b/qa/L0_model_config/special_cases/invalid_platform/config.pbtxt
@@ -0,0 +1,29 @@
+name: "invalid_platform"
+platform: "tensorflo"
+default_model_filename: "model.savedmodel"
+max_batch_size: 8
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+    label_filename: "output0_labels.txt"
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_model_config/special_cases/invalid_platform/expected b/qa/L0_model_config/special_cases/invalid_platform/expected
new file mode 100644
index 0000000000..4be7d00ff0
--- /dev/null
+++ b/qa/L0_model_config/special_cases/invalid_platform/expected
@@ -0,0 +1 @@
+unexpected platform type 'tensorflo' for invalid_platform
diff --git a/qa/L0_model_config/special_cases/invalid_runtime/config.pbtxt b/qa/L0_model_config/special_cases/invalid_runtime/config.pbtxt
new file mode 100644
index 0000000000..492ff9094f
--- /dev/null
+++ b/qa/L0_model_config/special_cases/invalid_runtime/config.pbtxt
@@ -0,0 +1,23 @@
+name: "invalid_runtime"
+max_batch_size: 2
+backend: "identity"
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+instance_group [
+  {
+    kind: KIND_CPU
+  }
+]
+runtime: "__invalid_runtime__"
diff --git a/qa/L0_model_config/special_cases/invalid_runtime/expected b/qa/L0_model_config/special_cases/invalid_runtime/expected
new file mode 100644
index 0000000000..2570c6f2d1
--- /dev/null
+++ b/qa/L0_model_config/special_cases/invalid_runtime/expected
@@ -0,0 +1 @@
+unable to find backend library '__invalid_runtime__' for model 'invalid_runtime'
diff --git a/qa/L0_model_config/special_cases/noautofill_noconfig/expected b/qa/L0_model_config/special_cases/noautofill_noconfig/expected
new file mode 100644
index 0000000000..5a0abf84dc
--- /dev/null
+++ b/qa/L0_model_config/special_cases/noautofill_noconfig/expected
@@ -0,0 +1 @@
+model configuration is not provided
diff --git a/qa/L0_model_config/special_cases/runtime_escape/config.pbtxt b/qa/L0_model_config/special_cases/runtime_escape/config.pbtxt
new file mode 100644
index 0000000000..8365e2d14a
--- /dev/null
+++ b/qa/L0_model_config/special_cases/runtime_escape/config.pbtxt
@@ -0,0 +1,23 @@
+name: "runtime_escape"
+max_batch_size: 2
+backend: "identity"
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+instance_group [
+  {
+    kind: KIND_CPU
+  }
+]
+runtime: "../dummy_runtime/libtriton_identity.so"
diff --git a/qa/L0_model_config/special_cases/runtime_escape/expected b/qa/L0_model_config/special_cases/runtime_escape/expected
new file mode 100644
index 0000000000..7c2506c692
--- /dev/null
+++ b/qa/L0_model_config/special_cases/runtime_escape/expected
@@ -0,0 +1 @@
+backend library name '../dummy_runtime/libtriton_identity.so' escapes backend directory
diff --git a/qa/L0_model_config/test.sh b/qa/L0_model_config/test.sh
new file mode 100755
index 0000000000..5b8cf6cf26
--- /dev/null
+++ b/qa/L0_model_config/test.sh
@@ -0,0 +1,627 @@
+#!/bin/bash
+# Copyright (c) 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+CLIENT_LOG="./client.log"
+CLIENT=model_config_test.py
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_TIMEOUT=20
+SERVER_LOG_BASE="./inference_server"
+source ../common/util.sh
+
+export CUDA_VISIBLE_DEVICES=0
+
+TRIALS="tensorflow_savedmodel tensorflow_graphdef tensorrt_plan onnxruntime_onnx pytorch_libtorch"
+
+# Copy fixed TensorRT plans into the test model repositories.
+for modelpath in \
+        autofill_noplatform/tensorrt/bad_input_dims/1 \
+        autofill_noplatform/tensorrt/bad_input_type/1 \
+        autofill_noplatform/tensorrt/bad_input_shape_tensor/1 \
+        autofill_noplatform/tensorrt/bad_output_dims/1 \
+        autofill_noplatform/tensorrt/bad_output_type/1 \
+        autofill_noplatform/tensorrt/bad_output_shape_tensor/1 \
+        autofill_noplatform/tensorrt/too_few_inputs/1 \
+        autofill_noplatform/tensorrt/too_many_inputs/1 \
+        autofill_noplatform/tensorrt/unknown_input/1 \
+        autofill_noplatform/tensorrt/unknown_output/1 \
+        autofill_noplatform_success/tensorrt/no_name_platform/1 \
+        autofill_noplatform_success/tensorrt/empty_config/1     \
+        autofill_noplatform_success/tensorrt/no_config/1 \
+        autofill_noplatform_success/tensorrt/incomplete_input/1 \
+        autofill_noplatform_success/tensorrt/incomplete_output/1 ; do
+    mkdir -p $modelpath
+    cp /data/inferenceserver/${REPO_VERSION}/qa_model_repository/plan_float32_float32_float32/1/model.plan \
+       $modelpath/.
+
+    # Create a dummy file which must be ignored. This test is only needed
+    # for TensorRT autofiller as it is the last backend that attempts to
+    # load the files provided in the version directory. Essentially,
+    # for autofiller of other backends, a TensorRT plan would behave
+    # like this dummy file.
+    echo "dummy_content" >> $modelpath/dummy_file.txt
+done
+
+
+# Copy TensorRT plans with shape tensor into the test model repositories.
+for modelpath in \
+        autofill_noplatform/tensorrt/mixed_batch_hint_dims/1 \
+        autofill_noplatform/tensorrt/mixed_batch_hint_shape_values/1 \
+        autofill_noplatform_success/tensorrt/no_config_shape_tensor/1 ; do
+    mkdir -p $modelpath
+    cp /data/inferenceserver/${REPO_VERSION}/qa_shapetensor_model_repository/plan_zero_1_float32/1/model.plan \
+       $modelpath/.
+done
+
+# Copy variable-sized TensorRT plans into the test model repositories.
+for modelpath in \
+        autofill_noplatform_success/tensorrt/no_name_platform_variable/1 \
+        autofill_noplatform_success/tensorrt/empty_config_variable/1     \
+        autofill_noplatform_success/tensorrt/no_config_variable/1 \
+        autofill_noplatform_success/tensorrt/hint_for_no_batch/1 \
+        autofill_noplatform_success/tensorrt/multi_prof_max_bs/1 ; do
+    mkdir -p $modelpath
+    cp /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/plan_float32_float32_float32/1/model.plan \
+       $modelpath/.
+done
+
+for modelpath in \
+        autofill_noplatform/tensorrt/bad_dynamic_shapes_max/1 \
+        autofill_noplatform/tensorrt/bad_dynamic_shapes_min/1 ; do
+    mkdir -p $modelpath
+    cp /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/plan_float32_float32_float32-4-32/1/model.plan \
+       $modelpath/.
+done
+
+for modelpath in \
+   autofill_noplatform/ensemble/invalid_input_map/invalid_input_map/1 \
+       autofill_noplatform/ensemble/invalid_input_map/fp32_dim1_batch4/1 \
+       autofill_noplatform/ensemble/invalid_input_map/fp32_dim1_batch4_input4/1 \
+       autofill_noplatform/ensemble/invalid_input_map/fp32_dim1_batch4_output3/1 \
+       autofill_noplatform/ensemble/invalid_output_map/invalid_output_map/1 \
+       autofill_noplatform/ensemble/invalid_output_map/fp32_dim1_batch4/1 \
+       autofill_noplatform/ensemble/invalid_output_map/fp32_dim1_batch4_input4/1 \
+       autofill_noplatform/ensemble/invalid_output_map/fp32_dim1_batch4_output3/1 \
+       autofill_noplatform/ensemble/invalid_batch_size/invalid_batch_size/1 \
+       autofill_noplatform/ensemble/invalid_batch_size/invalid_batch_size/1 \
+       autofill_noplatform/ensemble/invalid_batch_size/fp32_dim1_batch2/1 \
+       autofill_noplatform/ensemble/invalid_batch_size/fp32_dim1_batch4/1 \
+       autofill_noplatform/ensemble/invalid_decoupled_branching/invalid_decoupled_branching/1 \
+       autofill_noplatform/ensemble/invalid_decoupled_branching/int32_dim1_nobatch_output2/1 \
+       autofill_noplatform/ensemble/invalid_decoupled_branching_2/invalid_decoupled_branching_2/1 \
+       autofill_noplatform/ensemble/inconsistent_shape/inconsistent_shape/1 \
+       autofill_noplatform/ensemble/inconsistent_shape/fp32_dim1_batch4/1 \
+       autofill_noplatform/ensemble/inconsistent_shape/fp32_dim3_batch4/1 \
+       autofill_noplatform/ensemble/inconsistent_data_type/inconsistent_data_type/1 \
+       autofill_noplatform/ensemble/inconsistent_data_type/fp32_dim1_batch2/1 \
+       autofill_noplatform/ensemble/inconsistent_data_type/int32_dim1_batch4/1 \
+       autofill_noplatform/ensemble/non_existing_model/non_existing_model/1 \
+       autofill_noplatform/ensemble/non_existing_model/fp32_dim1_batch4/1 \
+       autofill_noplatform/ensemble/non_existing_model/fp32_dim1_batch4_output3/1 \
+       autofill_noplatform/ensemble/self_circular_dependency/self_circular_dependency/1 \
+       autofill_noplatform/ensemble/self_circular_dependency/fp32_dim1_batch4/1 \
+       autofill_noplatform/ensemble/self_circular_dependency/fp32_dim1_batch4_input4/1 \
+       autofill_noplatform/ensemble/self_circular_dependency/fp32_dim1_batch4_output3/1 \
+       autofill_noplatform/ensemble/unmapped_input/unmapped_input/1 \
+       autofill_noplatform/ensemble/unmapped_input/fp32_dim1_batch4/1 \
+       autofill_noplatform/ensemble/unmapped_input/fp32_dim1_batch4_input4/1 \
+       autofill_noplatform/ensemble/unmapped_input/fp32_dim1_batch4_output3/1 \
+       autofill_noplatform/ensemble/circular_dependency/circular_dependency/1 \
+       autofill_noplatform/ensemble/circular_dependency/circular_dependency_2/1 \
+       autofill_noplatform/ensemble/no_required_version/no_required_version/1 \
+       autofill_noplatform/ensemble/no_required_version/simple/1 \
+       autofill_noplatform/ensemble/no_required_version_2/no_required_version_2/1 \
+       autofill_noplatform/ensemble/no_required_version_2/simple/1 \
+       autofill_noplatform/ensemble/no_required_version_3/no_required_version_3/1 \
+       autofill_noplatform/ensemble/no_required_version_3/simple/1 \
+       autofill_noplatform_success/ensemble/embedded_ensemble/embedded_ensemble/1 \
+       autofill_noplatform_success/ensemble/embedded_ensemble/fp32_dim1_batch4/1 \
+       autofill_noplatform_success/ensemble/embedded_ensemble/inner_ensemble/1 \
+       autofill_noplatform_success/ensemble/inconsistent_shape/inconsistent_shape/1 \
+       autofill_noplatform_success/ensemble/inconsistent_shape/fp32_dim1_batch4/1 \
+       autofill_noplatform_success/ensemble/inconsistent_shape/fp32_dim2_nobatch/1 \
+       autofill_noplatform_success/ensemble/inconsistent_shape_2/inconsistent_shape_2/1 \
+       autofill_noplatform_success/ensemble/inconsistent_shape_2/fp32_dim1_batch4/1 \
+       autofill_noplatform_success/ensemble/inconsistent_shape_2/fp32_dim2_nobatch/1 \
+       autofill_noplatform_success/ensemble/unmapped_output/unmapped_output/1 \
+       autofill_noplatform_success/ensemble/unmapped_output/fp32_dim1_batch4_output3/1 ; do
+   mkdir -p $modelpath
+done
+
+for modelpath in \
+        autofill_noplatform/ensemble/invalid_decoupled_branching/repeat_int32/1 \
+        autofill_noplatform/ensemble/invalid_decoupled_branching_2/repeat_int32/1; do
+    mkdir -p $modelpath
+    cp ./libtriton_repeat.so $modelpath/libtriton_repeat.so
+done
+
+# Copy PyTorch models into the test model repositories.
+for modelpath in \
+        autofill_noplatform/pytorch/too_few_inputs/1 \
+        autofill_noplatform/pytorch/too_few_outputs/1 \
+        autofill_noplatform_success/pytorch/no_name_platform/1 \
+        autofill_noplatform_success/pytorch/cpu_instance/1 ; do
+    mkdir -p $modelpath
+    cp /data/inferenceserver/${REPO_VERSION}/qa_model_repository/libtorch_float32_float32_float32/1/model.pt \
+       $modelpath/.
+done
+
+# Copy Python models into the test model repositories.
+for modelpath in \
+        autofill_noplatform/python/input_mismatch_datatype/1 \
+        autofill_noplatform/python/input_mismatch_dims/1 \
+        autofill_noplatform/python/output_mismatch_datatype/1 \
+        autofill_noplatform/python/output_mismatch_dims/1 \
+        autofill_noplatform_success/python/incomplete_output/1 \
+        autofill_noplatform_success/python/unknown_input/1 \
+        autofill_noplatform_success/python/unknown_output/1 \
+        autofill_noplatform_success/python/empty_config/1 ; do
+    mkdir -p $modelpath
+    cp /opt/tritonserver/qa/python_models/auto_complete/model.py $modelpath/.
+done
+for modelpath in \
+        autofill_noplatform/python/conflicting_max_batch_size \
+        autofill_noplatform/python/input_missing_datatype \
+        autofill_noplatform/python/input_missing_dims \
+        autofill_noplatform/python/input_missing_name \
+        autofill_noplatform/python/output_missing_datatype \
+        autofill_noplatform/python/output_missing_dims \
+        autofill_noplatform/python/output_missing_name \
+        autofill_noplatform/python/no_return \
+        autofill_noplatform/python/conflicting_scheduler_sequence \
+        autofill_noplatform_success/python/dynamic_batching_no_op \
+        autofill_noplatform_success/python/dynamic_batching \
+        autofill_noplatform_success/python/incomplete_input \
+        autofill_noplatform_success/python/model_transaction_policy \
+        autofill_noplatform_success/python/model_transaction_policy_decoupled_false \
+        autofill_noplatform_success/python/model_transaction_policy_no_op \
+        autofill_noplatform_success/python/optional_input \
+        autofill_noplatform/python/input_wrong_property \
+        autofill_noplatform/python/model_transaction_policy_invalid_args \
+        autofill_noplatform/python/model_transaction_policy_mismatch \
+        autofill_noplatform/python/output_wrong_property ; do
+    mkdir -p $modelpath/1
+    mv $modelpath/model.py $modelpath/1/.
+done
+for modelpath in \
+        autofill_noplatform_success/python/conflicting_scheduler_ensemble/conflicting_scheduler_ensemble \
+        autofill_noplatform_success/python/conflicting_scheduler_ensemble/ensemble_first_step \
+        autofill_noplatform_success/python/conflicting_scheduler_ensemble/ensemble_second_step ; do
+    mkdir -p $modelpath/1
+    mv $modelpath/model.py $modelpath/1/.
+done
+
+# Make version folders for custom test model repositories.
+for modelpath in \
+        autofill_noplatform/custom/no_delimiter/1 \
+        autofill_noplatform/custom/unknown_backend.unknown/1 \
+        autofill_noplatform_success/custom/empty_config.identity/1 \
+        autofill_noplatform_success/custom/no_backend.identity/1 ; do
+    mkdir -p $modelpath
+done
+
+# Make version folders as the instance group validation is deferred to
+# the beginning of model creation
+for modelpath in \
+        noautofill_platform/invalid_cpu/1 \
+        noautofill_platform/invalid_gpu/1 \
+        noautofill_platform/negative_gpu/1 ; do
+    mkdir -p $modelpath
+done
+
+# Copy other required models
+mkdir -p special_cases/invalid_platform/1
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/savedmodel_float32_float32_float32/1/model.savedmodel \
+    special_cases/invalid_platform/1/
+# Note that graphdef models don't support auto-complete-config
+# and that is why we are using graphdef model in this test case.
+mkdir -p special_cases/noautofill_noconfig/1
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/graphdef_float32_float32_float32/1/model.graphdef \
+    special_cases/noautofill_noconfig/1/
+# Create runtime escape scenario
+mkdir -p special_cases/runtime_escape/1 special_cases/runtime_escape/dummy_runtime
+touch special_cases/runtime_escape/dummy_runtime/libtriton_identity.so
+# Setup invalid runtime model
+mkdir -p special_cases/invalid_runtime/1
+
+# Copy reshape model files into the test model repositories.
+mkdir -p autofill_noplatform_success/tensorflow_graphdef/reshape_config_provided/1
+cp /data/inferenceserver/${REPO_VERSION}/qa_reshape_model_repository/graphdef_zero_2_float32/1/model.graphdef \
+    autofill_noplatform_success/tensorflow_graphdef/reshape_config_provided/1
+
+mkdir -p autofill_noplatform_success/tensorflow_savedmodel/reshape_config_provided/1
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_reshape_model_repository/savedmodel_zero_2_float32/1/model.savedmodel \
+    autofill_noplatform_success/tensorflow_savedmodel/reshape_config_provided/1
+
+mkdir -p autofill_noplatform_success/tensorrt/reshape_config_provided/1
+cp /data/inferenceserver/${REPO_VERSION}/qa_reshape_model_repository/plan_zero_4_float32/1/model.plan \
+    autofill_noplatform_success/tensorrt/reshape_config_provided/1
+
+# Copy identity model into onnx test directories
+mkdir -p autofill_noplatform_success/onnx/cpu_instance/1
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository/onnx_zero_1_float16/1/model.onnx \
+    autofill_noplatform_success/onnx/cpu_instance/1
+
+# Copy openvino models into test directories
+for modelpath in \
+        autofill_noplatform/openvino/bad_input_dims \
+        autofill_noplatform/openvino/bad_output_dims \
+        autofill_noplatform/openvino/too_few_inputs \
+        autofill_noplatform/openvino/too_many_inputs \
+        autofill_noplatform/openvino/unknown_input \
+        autofill_noplatform/openvino/unknown_output \
+        autofill_noplatform_success/openvino/empty_config \
+        autofill_noplatform_success/openvino/no_config; do
+    cp -r /opt/tritonserver/qa/openvino_models/fixed_batch/1 $modelpath
+done
+cp -r /opt/tritonserver/qa/openvino_models/dynamic_batch/1 \
+    autofill_noplatform_success/openvino/dynamic_batch
+# Copy openvino model from qa_model_repository
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/openvino_int8_int8_int8/1 \
+    autofill_noplatform_success/openvino/partial_config
+cp /data/inferenceserver/${REPO_VERSION}/qa_model_repository/openvino_int8_int8_int8/output0_labels.txt \
+    autofill_noplatform_success/openvino/partial_config
+
+rm -f $SERVER_LOG_BASE* $CLIENT_LOG
+RET=0
+
+# Run tests for logs which do not have a timestamp on them
+for TARGET in `ls cli_messages`; do
+    case $TARGET in
+        "cli_override")
+            EXTRA_ARGS="--disable-auto-complete-config --strict-model-config=false" ;;
+        "cli_deprecation")
+            EXTRA_ARGS="--strict-model-config=true" ;;
+        *)
+            EXTRA_ARGS="" ;;
+    esac
+
+    SERVER_ARGS="--model-repository=`pwd`/models  $EXTRA_ARGS"
+    SERVER_LOG=$SERVER_LOG_BASE.cli_messages_${TARGET}.log
+
+    rm -fr models && mkdir models
+    cp -r cli_messages/$TARGET models/.
+
+    EXPECTEDS=models/$TARGET/expected*
+
+    echo -e "Test on cli_messages/$TARGET" >> $CLIENT_LOG
+
+    run_server
+    if [ "$SERVER_PID" != "0" ]; then
+        echo -e "*** FAILED: unexpected success starting $SERVER" >> $CLIENT_LOG
+        RET=1
+        kill $SERVER_PID
+        wait $SERVER_PID
+    else
+        EXFOUND=0
+        for EXPECTED in `ls $EXPECTEDS`; do
+            EX=`cat $EXPECTED`
+            echo "grepping for: $EX"
+            if grep "$EX" $SERVER_LOG; then
+                echo -e "Found \"$EX\"" >> $CLIENT_LOG
+                EXFOUND=1
+                break
+            else
+                echo -e "Not found \"$EX\"" >> $CLIENT_LOG
+            fi
+        done
+        if [ "$EXFOUND" == "0" ]; then
+            echo -e "*** FAILED: cli_messages/$TARGET" >> $CLIENT_LOG
+            RET=1
+        fi
+    fi
+done
+
+# Run special test cases
+for TARGET in `ls special_cases`; do
+    case $TARGET in
+        "invalid_platform")
+            EXTRA_ARGS="--disable-auto-complete-config" ;;
+        *)
+            EXTRA_ARGS="" ;;
+    esac
+
+    SERVER_ARGS="--model-repository=`pwd`/models $EXTRA_ARGS"
+    SERVER_LOG=$SERVER_LOG_BASE.special_case_${TARGET}.log
+
+    rm -fr models && mkdir models
+    cp -r special_cases/$TARGET models/.
+
+    CONFIG=models/$TARGET/config.pbtxt
+    EXPECTEDS=models/$TARGET/expected*
+
+    echo -e "Test on special_cases/$TARGET" >> $CLIENT_LOG
+
+    # We expect all the tests to fail with one of the expected
+    # error messages
+    run_server
+    if [ "$SERVER_PID" != "0" ]; then
+        echo -e "*** FAILED: unexpected success starting $SERVER" >> $CLIENT_LOG
+        RET=1
+        kill $SERVER_PID
+        wait $SERVER_PID
+    else
+        EXFOUND=0
+        for EXPECTED in `ls $EXPECTEDS`; do
+            EX=`cat $EXPECTED`
+            if grep ^E[0-9][0-9][0-9][0-9].*"$EX" $SERVER_LOG; then
+                echo -e "Found \"$EX\"" >> $CLIENT_LOG
+                EXFOUND=1
+                break
+            else
+                echo -e "Not found \"$EX\"" >> $CLIENT_LOG
+            fi
+        done
+        if [ "$EXFOUND" == "0" ]; then
+            echo -e "*** FAILED: special_cases/$TARGET" >> $CLIENT_LOG
+            RET=1
+        fi
+    fi
+done
+
+# Run noautofill unittest
+SERVER_ARGS="--model-repository=`pwd`/models --model-control-mode=explicit --log-verbose=1"
+SERVER_LOG=$SERVER_LOG_BASE.special_case_noautofill_test.log
+
+rm -fr models && mkdir models
+cp -r special_cases/noautofill_noconfig models/.
+
+echo -e "Test on special_cases/noautofill_test" >> $CLIENT_LOG
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python noautofill_test.py >> $CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Python NoAutoFill Test Failed\n***"
+    RET=1
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+for TRIAL in $TRIALS; do
+    # Run all tests that require no autofill but that add the platform to
+    # the model config before running the test
+    for TARGET in `ls noautofill_platform`; do
+        SERVER_ARGS="--model-repository=`pwd`/models --strict-model-config=true"
+        SERVER_LOG=$SERVER_LOG_BASE.noautofill_platform_${TRIAL}_${TARGET}.log
+
+        rm -fr models && mkdir models
+        cp -r noautofill_platform/$TARGET models/.
+
+        CONFIG=models/$TARGET/config.pbtxt
+        EXPECTEDS=models/$TARGET/expected*
+
+        # If there is a config.pbtxt change/add platform to it
+        if [ -f $CONFIG ]; then
+            sed -i '/platform:/d' $CONFIG
+            echo "platform: \"$TRIAL\"" >> $CONFIG
+            cat $CONFIG
+        fi
+
+        echo -e "Test platform $TRIAL on noautofill_platform/$TARGET" >> $CLIENT_LOG
+
+        # We expect all the tests to fail with one of the expected
+        # error messages
+        run_server
+        if [ "$SERVER_PID" != "0" ]; then
+            echo -e "*** FAILED: unexpected success starting $SERVER" >> $CLIENT_LOG
+            RET=1
+            kill $SERVER_PID
+            wait $SERVER_PID
+        else
+            EXFOUND=0
+            for EXPECTED in `ls $EXPECTEDS`; do
+                EX=`cat $EXPECTED`
+                if grep ^E[0-9][0-9][0-9][0-9].*"$EX" $SERVER_LOG; then
+                    echo -e "Found \"$EX\"" >> $CLIENT_LOG
+                    EXFOUND=1
+                    break
+                else
+                    echo -e "Not found \"$EX\"" >> $CLIENT_LOG
+                fi
+            done
+
+            if [ "$EXFOUND" == "0" ]; then
+                echo -e "*** FAILED: platform $TRIAL noautofill_platform/$TARGET" >> $CLIENT_LOG
+                RET=1
+            fi
+        fi
+    done
+done
+
+for TRIAL in $TRIALS; do
+    # Run all tests that require no autofill but that add the platform to
+    # the model config before running the test
+    for TARGET in `ls noautofill_platform`; do
+        SERVER_ARGS="--model-repository=`pwd`/models --disable-auto-complete-config"
+        SERVER_LOG=$SERVER_LOG_BASE.noautofill_platform_disableflag_${TRIAL}_${TARGET}.log
+
+        rm -fr models && mkdir models
+        cp -r noautofill_platform/$TARGET models/.
+
+        CONFIG=models/$TARGET/config.pbtxt
+        EXPECTEDS=models/$TARGET/expected*
+
+        # If there is a config.pbtxt change/add platform to it
+        if [ -f $CONFIG ]; then
+            sed -i '/platform:/d' $CONFIG
+            echo "platform: \"$TRIAL\"" >> $CONFIG
+            cat $CONFIG
+        fi
+
+        echo -e "Test platform $TRIAL on noautofill_platform/$TARGET with disable-auto-complete-config flag" >> $CLIENT_LOG
+
+        # We expect all the tests to fail with one of the expected
+        # error messages
+        run_server
+        if [ "$SERVER_PID" != "0" ]; then
+            echo -e "*** FAILED: unexpected success starting $SERVER" >> $CLIENT_LOG
+            RET=1
+            kill $SERVER_PID
+            wait $SERVER_PID
+        else
+            EXFOUND=0
+            for EXPECTED in `ls $EXPECTEDS`; do
+                EX=`cat $EXPECTED`
+                if grep ^E[0-9][0-9][0-9][0-9].*"$EX" $SERVER_LOG; then
+                    echo -e "Found \"$EX\"" >> $CLIENT_LOG
+                    EXFOUND=1
+                    break
+                else
+                    echo -e "Not found \"$EX\"" >> $CLIENT_LOG
+                fi
+            done
+
+            if [ "$EXFOUND" == "0" ]; then
+                echo -e "*** FAILED: platform $TRIAL noautofill_platform/$TARGET with disable-auto-complete-config flag" >> $CLIENT_LOG
+                RET=1
+            fi
+        fi
+    done
+done
+
+# Run all autofill tests that don't add a platform to the model config
+# before running the test
+for TARGET_DIR in `ls -d autofill_noplatform/*/*`; do
+    TARGET_DIR_DOT=`echo $TARGET_DIR | tr / .`
+    TARGET=`basename ${TARGET_DIR}`
+
+    SERVER_ARGS="--model-repository=`pwd`/models --strict-model-config=false"
+    SERVER_LOG=$SERVER_LOG_BASE.${TARGET_DIR_DOT}.log
+
+    # If there is a config.pbtxt at the top-level of the test then
+    # assume that the directory is a single model. Otherwise assume
+    # that the directory is an entire model repository.
+    rm -fr models && mkdir models
+    if [ -f ${TARGET_DIR}/config.pbtxt ]; then
+        cp -r ${TARGET_DIR} models/.
+        EXPECTEDS=models/$TARGET/expected*
+    else
+        cp -r ${TARGET_DIR}/* models/.
+        EXPECTEDS=models/expected*
+    fi
+
+    echo -e "Test ${TARGET_DIR}" >> $CLIENT_LOG
+
+    # We expect all the tests to fail with one of the expected
+    # error messages
+    run_server
+    if [ "$SERVER_PID" != "0" ]; then
+        echo -e "*** FAILED: unexpected success starting $SERVER" >> $CLIENT_LOG
+        RET=1
+        kill $SERVER_PID
+        wait $SERVER_PID
+    else
+        EXFOUND=0
+        for EXPECTED in `ls $EXPECTEDS`; do
+            EX=`cat $EXPECTED`
+            if grep ^E[0-9][0-9][0-9][0-9].*"$EX" $SERVER_LOG; then
+                echo -e "Found \"$EX\"" >> $CLIENT_LOG
+                EXFOUND=1
+                break
+            else
+                echo -e "Not found \"$EX\"" >> $CLIENT_LOG
+            fi
+        done
+
+        if [ "$EXFOUND" == "0" ]; then
+            echo -e "*** FAILED: ${TARGET_DIR}" >> $CLIENT_LOG
+            RET=1
+        fi
+    fi
+done
+
+# Run all autofill tests that are expected to be successful. These
+# tests don't add a platform to the model config before running
+for TARGET_DIR in `ls -d autofill_noplatform_success/*/*`; do
+    TARGET_DIR_DOT=`echo $TARGET_DIR | tr / .`
+    TARGET=`basename ${TARGET_DIR}`
+
+    SERVER_ARGS="--model-repository=`pwd`/models --strict-model-config=false"
+    SERVER_LOG=$SERVER_LOG_BASE.${TARGET_DIR_DOT}.log
+
+    # If there is a config.pbtxt at the top-level of the test then
+    # assume that the directory is a single model. Otherwise assume
+    # that the directory is an entire model repository.
+    rm -fr models && mkdir models
+    if [ -f ${TARGET_DIR}/config.pbtxt ] || [ "$TARGET" = "no_config" ] \
+            || [ "$TARGET" = "no_config_variable" ] || [ "$TARGET" = "no_config_shape_tensor" ] ; then
+        cp -r ${TARGET_DIR} models/.
+    else
+        cp -r ${TARGET_DIR}/* models/.
+    fi
+
+    echo -e "Test $TARGET_DIR" >> $CLIENT_LOG
+
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "*** FAILED: unable to start $SERVER" >> $CLIENT_LOG
+        RET=1
+    else
+        set +e
+        python ./compare_status.py --expected_dir models/$TARGET --model $TARGET >>$CLIENT_LOG 2>&1
+        if [ $? -ne 0 ]; then
+            echo -e "*** FAILED: unexpected model config" >> $CLIENT_LOG
+            RET=1
+        fi
+        set -e
+
+        kill $SERVER_PID
+        wait $SERVER_PID
+    fi
+done
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+    cat $CLIENT_LOG
+fi
+
+exit $RET
diff --git a/qa/L0_model_namespacing/python_addsub/__init__.py b/qa/L0_model_namespacing/python_addsub/__init__.py
new file mode 100755
index 0000000000..a664eafef0
--- /dev/null
+++ b/qa/L0_model_namespacing/python_addsub/__init__.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python3
+
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    # Use auto complete feature to ship config.pbtxt along with the Python
+    # model definition
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        # Only use packaged config if config is not explicitly provided
+        config = auto_complete_model_config.as_dict()
+        if (len(config["input"]) != 0) or (len(config["output"]) != 0):
+            return auto_complete_model_config
+
+        auto_complete_model_config.add_input(
+            {
+                "name": "INPUT0",
+                "data_type": "TYPE_INT32",
+                "dims": [
+                    16,
+                ],
+            }
+        )
+        auto_complete_model_config.add_input(
+            {
+                "name": "INPUT1",
+                "data_type": "TYPE_INT32",
+                "dims": [
+                    16,
+                ],
+            }
+        )
+        auto_complete_model_config.add_output(
+            {
+                "name": "OUTPUT0",
+                "data_type": "TYPE_INT32",
+                "dims": [
+                    16,
+                ],
+            }
+        )
+        auto_complete_model_config.add_output(
+            {
+                "name": "OUTPUT1",
+                "data_type": "TYPE_INT32",
+                "dims": [
+                    16,
+                ],
+            }
+        )
+        return auto_complete_model_config
+
+    def initialize(self, args):
+        self.model_config = model_config = json.loads(args["model_config"])
+
+        output0_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT0")
+        output1_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT1")
+
+        self.output0_dtype = pb_utils.triton_string_to_numpy(
+            output0_config["data_type"]
+        )
+        self.output1_dtype = pb_utils.triton_string_to_numpy(
+            output1_config["data_type"]
+        )
+
+    def execute(self, requests):
+        """This function is called on inference request."""
+
+        responses = []
+        for request in requests:
+            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")
+            responses.append(pb_utils.InferenceResponse(self.addsub(in_0, in_1)))
+        return responses
+
+    def addsub(self, in_0, in_1):
+        if (
+            in_0.as_numpy().dtype.type is np.bytes_
+            or in_0.as_numpy().dtype == np.object_
+        ):
+            out_0, out_1 = (
+                in_0.as_numpy().astype(np.int32) + in_1.as_numpy().astype(np.int32),
+                in_0.as_numpy().astype(np.int32) - in_1.as_numpy().astype(np.int32),
+            )
+        else:
+            out_0, out_1 = (
+                in_0.as_numpy() + in_1.as_numpy(),
+                in_0.as_numpy() - in_1.as_numpy(),
+            )
+
+        out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0.astype(self.output0_dtype))
+        out_tensor_1 = pb_utils.Tensor("OUTPUT1", out_1.astype(self.output1_dtype))
+        return [out_tensor_0, out_tensor_1]
diff --git a/qa/L0_model_namespacing/python_subadd/__init__.py b/qa/L0_model_namespacing/python_subadd/__init__.py
new file mode 100755
index 0000000000..bd3ddefe9e
--- /dev/null
+++ b/qa/L0_model_namespacing/python_subadd/__init__.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python3
+
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    # Use auto complete feature to ship config.pbtxt along with the Python
+    # model definition
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        # Only use packaged config if config is not explicitly provided
+        config = auto_complete_model_config.as_dict()
+        if (len(config["input"]) != 0) or (len(config["output"]) != 0):
+            return auto_complete_model_config
+
+        auto_complete_model_config.add_input(
+            {
+                "name": "INPUT0",
+                "data_type": "TYPE_INT32",
+                "dims": [
+                    16,
+                ],
+            }
+        )
+        auto_complete_model_config.add_input(
+            {
+                "name": "INPUT1",
+                "data_type": "TYPE_INT32",
+                "dims": [
+                    16,
+                ],
+            }
+        )
+        auto_complete_model_config.add_output(
+            {
+                "name": "OUTPUT0",
+                "data_type": "TYPE_INT32",
+                "dims": [
+                    16,
+                ],
+            }
+        )
+        auto_complete_model_config.add_output(
+            {
+                "name": "OUTPUT1",
+                "data_type": "TYPE_INT32",
+                "dims": [
+                    16,
+                ],
+            }
+        )
+        return auto_complete_model_config
+
+    def initialize(self, args):
+        self.model_config = model_config = json.loads(args["model_config"])
+
+        output0_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT0")
+        output1_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT1")
+
+        self.output0_dtype = pb_utils.triton_string_to_numpy(
+            output0_config["data_type"]
+        )
+        self.output1_dtype = pb_utils.triton_string_to_numpy(
+            output1_config["data_type"]
+        )
+
+    def execute(self, requests):
+        """This function is called on inference request."""
+
+        responses = []
+        for request in requests:
+            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")
+            responses.append(pb_utils.InferenceResponse(self.subadd(in_0, in_1)))
+        return responses
+
+    def subadd(self, in_0, in_1):
+        if (
+            in_0.as_numpy().dtype.type is np.bytes_
+            or in_0.as_numpy().dtype == np.object_
+        ):
+            out_0, out_1 = (
+                in_0.as_numpy().astype(np.int32) - in_1.as_numpy().astype(np.int32),
+                in_0.as_numpy().astype(np.int32) + in_1.as_numpy().astype(np.int32),
+            )
+        else:
+            out_0, out_1 = (
+                in_0.as_numpy() - in_1.as_numpy(),
+                in_0.as_numpy() + in_1.as_numpy(),
+            )
+
+        out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0.astype(self.output0_dtype))
+        out_tensor_1 = pb_utils.Tensor("OUTPUT1", out_1.astype(self.output1_dtype))
+        return [out_tensor_0, out_tensor_1]
diff --git a/qa/L0_model_namespacing/test.py b/qa/L0_model_namespacing/test.py
new file mode 100755
index 0000000000..f45300d4fd
--- /dev/null
+++ b/qa/L0_model_namespacing/test.py
@@ -0,0 +1,361 @@
+#!/usr/bin/env python
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import sys
+
+sys.path.append(os.path.join(os.environ["TRITON_QA_ROOT_DIR"], "common"))
+
+import shutil
+import time
+import unittest
+
+import numpy as np
+import test_util as tu
+import tritonclient.http as httpclient
+from tritonclient.utils import InferenceServerException
+
+#
+# Test utilities
+#
+
+
+# Checker to perform inference on given model, expecting model to have
+# [INPUT0, INPUT1] and produce [OUTPUT0, OUTPUT1] where:
+# OUTPUT0 = INPUT0 + INPUT1
+# OUTPUT1 = INPUT0 - INPUT1
+class AddSubChecker:
+    # Optional 'checker_client' may be provided to use a different
+    # Triton client library, currently it must be either Triton HTTP client
+    # library or Triton GRPC client library
+    def __init__(self, checker_client=None):
+        # client library selection
+        if checker_client is None:
+            import tritonclient.http as checker_client
+        if "http" in checker_client.__name__:
+            self.client_ = checker_client.InferenceServerClient("localhost:8000")
+        else:
+            self.client_ = checker_client.InferenceServerClient("localhost:8001")
+
+        # Create infer input tensors
+        self.inputs_ = []
+        self.inputs_.append(checker_client.InferInput("INPUT0", [16], "INT32"))
+        self.inputs_.append(checker_client.InferInput("INPUT1", [16], "INT32"))
+
+        # Initialize the data and expected output
+        input_data = np.arange(start=0, stop=16, dtype=np.int32)
+        self.inputs_[0].set_data_from_numpy(input_data)
+        self.inputs_[1].set_data_from_numpy(input_data)
+        self.expected_outputs_ = {
+            "add": (input_data + input_data),
+            "sub": (input_data - input_data),
+        }
+
+    def infer(self, model):
+        res = self.client_.infer(model, self.inputs_)
+        np.testing.assert_allclose(
+            res.as_numpy("OUTPUT0"), self.expected_outputs_["add"]
+        )
+        np.testing.assert_allclose(
+            res.as_numpy("OUTPUT1"), self.expected_outputs_["sub"]
+        )
+
+
+# Checker to perform inference on given model, expecting model to have
+# [INPUT0, INPUT1] and produce [OUTPUT0, OUTPUT1] where:
+# OUTPUT0 = INPUT0 - INPUT1
+# OUTPUT1 = INPUT0 + INPUT1
+class SubAddChecker(AddSubChecker):
+    def infer(self, model):
+        res = self.client_.infer(model, self.inputs_)
+        np.testing.assert_allclose(
+            res.as_numpy("OUTPUT0"), self.expected_outputs_["sub"]
+        )
+        np.testing.assert_allclose(
+            res.as_numpy("OUTPUT1"), self.expected_outputs_["add"]
+        )
+
+
+#
+# Test suites and cases
+#
+
+
+class ModelNamespacePoll(tu.TestResultCollector):
+    def setUp(self):
+        self.addsub_ = AddSubChecker()
+        self.subadd_ = SubAddChecker()
+        # For other server interaction
+        self.client_ = httpclient.InferenceServerClient("localhost:8000")
+
+    def check_health(self, expect_live=True, expect_ready=True):
+        self.assertEqual(self.client_.is_server_live(), expect_live)
+        self.assertEqual(self.client_.is_server_ready(), expect_ready)
+
+    def test_no_duplication(self):
+        # Enable model namspacing on repositories that is already valid without
+        # enabling model namespacing.
+        # All models should be visible and can be inferred individually
+        self.check_health()
+
+        # infer check
+        for model in ["simple_addsub", "composing_addsub"]:
+            self.addsub_.infer(model)
+        for model in ["simple_subadd", "composing_subadd"]:
+            self.subadd_.infer(model)
+
+    def test_duplication(self):
+        # Enable model namspacing on repositories that each repo has one
+        # ensemble and it requires an composing model ('composing_model') that
+        # exists in both repos.
+        # Expect all models are visible, the ensemble will pick up the correct
+        # model even the composing model can't be inferred individually.
+        self.check_health()
+
+        # infer check
+        for model in [
+            "simple_addsub",
+        ]:
+            self.addsub_.infer(model)
+        for model in [
+            "simple_subadd",
+        ]:
+            self.subadd_.infer(model)
+
+        # error check
+        try:
+            self.addsub_.infer("composing_model")
+            self.assertTrue(False, "expected error for inferring ambiguous named model")
+        except InferenceServerException as ex:
+            self.assertIn("ambiguity", ex.message())
+
+    def test_ensemble_duplication(self):
+        # Enable model namspacing on repositories that each repo has one
+        # ensemble with the same name. Expect the ensemble will pick up the correct
+        # model.
+        # Expect all models are visible, the ensemble will pick up the correct
+        # model even the ensemble itself can't be inferred without providing
+        # namespace.
+        self.check_health()
+
+        # infer
+        for model in [
+            "composing_addsub",
+        ]:
+            self.addsub_.infer(model)
+        for model in [
+            "composing_subadd",
+        ]:
+            self.subadd_.infer(model)
+
+        # error check
+        try:
+            self.addsub_.infer("simple_ensemble")
+            self.assertTrue(False, "expected error for inferring ambiguous named model")
+        except InferenceServerException as ex:
+            self.assertIn("ambiguity", ex.message())
+
+    def test_dynamic_resolution(self):
+        # Same model setup as 'test_duplication', will remove / add one of the
+        # composing model at runtime and expect the ensemble to be properly
+        # linked to existing composing model at different steps.
+        # 1. Remove 'composing_model' in addsub_repo, expect both ensembles use
+        #    'composing_model' in subadd_repo and act as subadd
+        # 2. Add back 'composing_model' in addsub_repo, expect the ensembles to behave the
+        #    same as before the removal.
+        self.assertTrue("NAMESPACE_TESTING_DIRCTORY" in os.environ)
+        td = os.environ["NAMESPACE_TESTING_DIRCTORY"]
+        composing_before_path = os.path.join(td, "addsub_repo", "composing_model")
+        composing_after_path = os.path.join(td, "composing_model")
+
+        self.check_health()
+        # step 1.
+        shutil.move(composing_before_path, composing_after_path)
+        time.sleep(5)
+
+        # infer
+        for model in ["simple_subadd", "simple_addsub", "composing_model"]:
+            self.subadd_.infer(model)
+
+        # step 2.
+        shutil.move(composing_after_path, composing_before_path)
+        time.sleep(5)
+
+        # infer
+        for model in [
+            "simple_addsub",
+        ]:
+            self.addsub_.infer(model)
+        for model in [
+            "simple_subadd",
+        ]:
+            self.subadd_.infer(model)
+
+        # error check
+        try:
+            self.addsub_.infer("composing_model")
+            self.assertTrue(False, "expected error for inferring ambiguous named model")
+        except InferenceServerException as ex:
+            self.assertIn("ambiguity", ex.message())
+
+
+class ModelNamespaceExplicit(tu.TestResultCollector):
+    def setUp(self):
+        self.addsub_ = AddSubChecker()
+        self.subadd_ = SubAddChecker()
+        # For other server interaction
+        self.client_ = httpclient.InferenceServerClient("localhost:8000")
+
+    def check_health(self, expect_live=True, expect_ready=True):
+        self.assertEqual(self.client_.is_server_live(), expect_live)
+        self.assertEqual(self.client_.is_server_ready(), expect_ready)
+
+    def test_no_duplication(self):
+        # Enable model namspacing on repositories that is already valid without
+        # enabling model namespacing.
+        # All models should be visible and can be inferred individually
+        self.check_health()
+        # load ensembles, cascadingly load composing model
+        for model in ["simple_addsub", "simple_subadd"]:
+            self.client_.load_model(model)
+
+        # infer
+        for model in ["simple_addsub", "composing_addsub"]:
+            self.addsub_.infer(model)
+        for model in ["simple_subadd", "composing_subadd"]:
+            self.subadd_.infer(model)
+
+    def test_duplication(self):
+        # Enable model namspacing on repositories that each repo has one
+        # ensemble and it requires an composing model ('composing_model') that
+        # exists in both repos.
+        # Expect all models are visible, the ensemble will pick up the correct
+        # model even the composing model can't be inferred individually.
+        self.check_health()
+        # load ensembles, cascadingly load composing model
+        for model in ["simple_addsub", "simple_subadd"]:
+            self.client_.load_model(model)
+
+        # infer
+        for model in [
+            "simple_addsub",
+        ]:
+            self.addsub_.infer(model)
+        for model in [
+            "simple_subadd",
+        ]:
+            self.subadd_.infer(model)
+
+        # error check
+        try:
+            self.addsub_.infer("composing_model")
+            self.assertTrue(False, "expected error for inferring ambiguous named model")
+        except InferenceServerException as ex:
+            self.assertIn("ambiguity", ex.message())
+
+    def test_ensemble_duplication(self):
+        # Enable model namspacing on repositories that each repo has one
+        # ensemble with the same name. Expect the ensemble will pick up the correct
+        # model.
+        # Expect all models are visible, the ensemble will pick up the correct
+        # model even the ensemble itself can't be inferred without providing
+        # namespace.
+        self.check_health()
+        # load ensembles, cascadingly load composing model
+        for model in ["simple_ensemble"]:
+            self.client_.load_model(model)
+
+        # infer
+        for model in [
+            "composing_addsub",
+        ]:
+            self.addsub_.infer(model)
+        for model in [
+            "composing_subadd",
+        ]:
+            self.subadd_.infer(model)
+
+        # error check
+        try:
+            self.addsub_.infer("simple_ensemble")
+            self.assertTrue(False, "expected error for inferring ambiguous named model")
+        except InferenceServerException as ex:
+            self.assertIn("ambiguity", ex.message())
+
+    def test_dynamic_resolution(self):
+        # Same model setup as 'test_duplication', will remove / add one of the
+        # composing model at runtime and expect the ensemble to be properly
+        # linked to existing composing model at different steps.
+        # 1. Remove 'composing_model' in addsub_repo, expect both ensembles use
+        #    'composing_model' in subadd_repo and act as subadd.
+        # 2. Add back 'composing_model' in addsub_repo, expect the ensembles to behave the
+        #    same as before the removal.
+        self.assertTrue("NAMESPACE_TESTING_DIRCTORY" in os.environ)
+        td = os.environ["NAMESPACE_TESTING_DIRCTORY"]
+        composing_before_path = os.path.join(td, "addsub_repo", "composing_model")
+        composing_after_path = os.path.join(td, "composing_model")
+
+        self.check_health()
+        # step 1.
+        shutil.move(composing_before_path, composing_after_path)
+        # load ensembles, cascadingly load composing model
+        for model in ["simple_addsub", "simple_subadd"]:
+            self.client_.load_model(model)
+
+        # infer
+        for model in ["simple_subadd", "simple_addsub", "composing_model"]:
+            self.subadd_.infer(model)
+
+        # step 2.
+        shutil.move(composing_after_path, composing_before_path)
+        # Explicitly load one of the ensembel, should still trigger cascading
+        # (re-)load
+        for model in [
+            "simple_addsub",
+        ]:
+            self.client_.load_model(model)
+
+        # infer
+        for model in [
+            "simple_addsub",
+        ]:
+            self.addsub_.infer(model)
+        for model in [
+            "simple_subadd",
+        ]:
+            self.subadd_.infer(model)
+
+        # error check
+        try:
+            self.addsub_.infer("composing_model")
+            self.assertTrue(False, "expected error for inferring ambiguous named model")
+        except InferenceServerException as ex:
+            self.assertIn("ambiguity", ex.message())
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_model_namespacing/test.sh b/qa/L0_model_namespacing/test.sh
new file mode 100755
index 0000000000..414bd3dde9
--- /dev/null
+++ b/qa/L0_model_namespacing/test.sh
@@ -0,0 +1,149 @@
+#!/bin/bash
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+TRITON_QA_ROOT_DIR=${TRITON_QA_ROOT_DIR:="/opt/tritonserver/qa"}
+source $TRITON_QA_ROOT_DIR/common/util.sh
+
+RET=0
+
+TEST_PY=./test.py
+# tests are run individually
+EXPECTED_NUM_TESTS="1"
+TEST_RESULT_FILE='test_results.txt'
+
+
+export CUDA_VISIBLE_DEVICES=0
+export TRITON_QA_ROOT_DIR=$TRITON_QA_ROOT_DIR
+export TRITON_QA_PYTHON_MODEL_DIR=$TRITON_QA_ROOT_DIR/L0_model_namespacing
+
+rm -fr *.log
+
+REPO_ARGS="--model-namespacing=true --model-repository=`pwd`/test_dir/addsub_repo --model-repository=`pwd`/test_dir/subadd_repo"
+POLL_ARGS="--model-control-mode=POLL --repository-poll-secs=2"
+EXPLICIT_ARGS="--model-control-mode=EXPLICIT"
+
+SERVER=/opt/tritonserver/bin/tritonserver
+
+# List all tests as each test will use different repo configuration
+TEST_LIST=${TEST_LIST:="test_duplication \
+                            test_dynamic_resolution \
+                            test_ensemble_duplication \
+                            test_no_duplication"}
+
+# Helper to make sure all ensemble have version directory
+CURR_DIR=`pwd`
+for test_name in $TEST_LIST; do
+    for model_dir in $CURR_DIR/$test_name/*/*; do
+        mkdir -p $model_dir/1
+    done
+done
+
+# Set this variable to avoid generation of '__pycache__' in the model directory,
+# which will cause unintended model reload in POLLING model as Triton sees
+# changes in the model directory
+export PYTHONDONTWRITEBYTECODE=1
+
+# Polling
+for test_name in $TEST_LIST; do
+    TEST_SUITE="ModelNamespacePoll"
+    TEST_LOG="`pwd`/test.$TEST_SUITE.$test_name.log"
+    SERVER_LOG="./server.$TEST_SUITE.$test_name.log"
+
+    rm -fr `pwd`/test_dir
+    cp -r `pwd`/$test_name `pwd`/test_dir
+    SERVER_ARGS="$REPO_ARGS $POLL_ARGS"
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    set +e
+    # Pass in the test directory as the test may modify the structure
+    NAMESPACE_TESTING_DIRCTORY=`pwd`/test_dir python $TEST_PY $TEST_SUITE.$test_name >>$TEST_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        RET=1
+        cat $TEST_LOG
+    else
+        check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+        if [ $? -ne 0 ]; then
+            cat $TEST_LOG
+            echo -e "\n***\n*** Test Result Verification Failed\n***"
+            RET=1
+        fi
+    fi
+    set -e
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+done
+
+# Explicit
+for test_name in $TEST_LIST; do
+    TEST_SUITE="ModelNamespaceExplicit"
+    TEST_LOG="`pwd`/test.$TEST_SUITE.$test_name.log"
+    SERVER_LOG="./server.$TEST_SUITE.$test_name.log"
+
+    rm -fr `pwd`/test_dir
+    cp -r `pwd`/$test_name `pwd`/test_dir
+    SERVER_ARGS="$REPO_ARGS $EXPLICIT_ARGS"
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    set +e
+    # Pass in the test directory as the test may modify the structure
+    NAMESPACE_TESTING_DIRCTORY=`pwd`/test_dir python $TEST_PY $TEST_SUITE.$test_name >>$TEST_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        RET=1
+        cat $TEST_LOG
+    else
+        check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+        if [ $? -ne 0 ]; then
+            cat $TEST_LOG
+            echo -e "\n***\n*** Test Result Verification Failed\n***"
+            RET=1
+        fi
+    fi
+    set -e
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+done
+
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_model_namespacing/test_duplication/addsub_repo/composing_model/1/model.py b/qa/L0_model_namespacing/test_duplication/addsub_repo/composing_model/1/model.py
new file mode 100644
index 0000000000..13a611e7a3
--- /dev/null
+++ b/qa/L0_model_namespacing/test_duplication/addsub_repo/composing_model/1/model.py
@@ -0,0 +1,6 @@
+import os
+import sys
+
+# load pre-defined QA model
+sys.path.append(os.environ["TRITON_QA_PYTHON_MODEL_DIR"])
+from python_addsub import *
diff --git a/qa/L0_model_namespacing/test_duplication/addsub_repo/simple_addsub/config.pbtxt b/qa/L0_model_namespacing/test_duplication/addsub_repo/simple_addsub/config.pbtxt
new file mode 100644
index 0000000000..245e256976
--- /dev/null
+++ b/qa/L0_model_namespacing/test_duplication/addsub_repo/simple_addsub/config.pbtxt
@@ -0,0 +1,90 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+platform: "ensemble"
+max_batch_size: 0
+version_policy: { all { }}
+
+
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+
+  }
+]
+input [
+  {
+    name: "INPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+
+
+  }
+]
+output [
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+
+
+  }
+]
+ensemble_scheduling {
+  step [
+    {
+      model_name: "composing_model"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "OUTPUT0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "OUTPUT1"
+      }
+    }
+  ]
+}
diff --git a/qa/L0_model_namespacing/test_duplication/subadd_repo/composing_model/1/model.py b/qa/L0_model_namespacing/test_duplication/subadd_repo/composing_model/1/model.py
new file mode 100644
index 0000000000..664c20b58f
--- /dev/null
+++ b/qa/L0_model_namespacing/test_duplication/subadd_repo/composing_model/1/model.py
@@ -0,0 +1,6 @@
+import os
+import sys
+
+# load pre-defined QA model
+sys.path.append(os.environ["TRITON_QA_PYTHON_MODEL_DIR"])
+from python_subadd import *
diff --git a/qa/L0_model_namespacing/test_duplication/subadd_repo/simple_subadd/config.pbtxt b/qa/L0_model_namespacing/test_duplication/subadd_repo/simple_subadd/config.pbtxt
new file mode 100644
index 0000000000..85d8ec0051
--- /dev/null
+++ b/qa/L0_model_namespacing/test_duplication/subadd_repo/simple_subadd/config.pbtxt
@@ -0,0 +1,88 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+platform: "ensemble"
+max_batch_size: 0
+version_policy: { all { }}
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+
+  }
+]
+input [
+  {
+    name: "INPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+
+
+  }
+]
+output [
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+
+
+  }
+]
+ensemble_scheduling {
+  step [
+    {
+      model_name: "composing_model"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "OUTPUT0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "OUTPUT1"
+      }
+    }
+  ]
+}
diff --git a/qa/L0_model_namespacing/test_dynamic_resolution/addsub_repo/composing_model/1/model.py b/qa/L0_model_namespacing/test_dynamic_resolution/addsub_repo/composing_model/1/model.py
new file mode 100644
index 0000000000..13a611e7a3
--- /dev/null
+++ b/qa/L0_model_namespacing/test_dynamic_resolution/addsub_repo/composing_model/1/model.py
@@ -0,0 +1,6 @@
+import os
+import sys
+
+# load pre-defined QA model
+sys.path.append(os.environ["TRITON_QA_PYTHON_MODEL_DIR"])
+from python_addsub import *
diff --git a/qa/L0_model_namespacing/test_dynamic_resolution/addsub_repo/simple_addsub/config.pbtxt b/qa/L0_model_namespacing/test_dynamic_resolution/addsub_repo/simple_addsub/config.pbtxt
new file mode 100644
index 0000000000..245e256976
--- /dev/null
+++ b/qa/L0_model_namespacing/test_dynamic_resolution/addsub_repo/simple_addsub/config.pbtxt
@@ -0,0 +1,90 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+platform: "ensemble"
+max_batch_size: 0
+version_policy: { all { }}
+
+
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+
+  }
+]
+input [
+  {
+    name: "INPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+
+
+  }
+]
+output [
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+
+
+  }
+]
+ensemble_scheduling {
+  step [
+    {
+      model_name: "composing_model"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "OUTPUT0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "OUTPUT1"
+      }
+    }
+  ]
+}
diff --git a/qa/L0_model_namespacing/test_dynamic_resolution/subadd_repo/composing_model/1/model.py b/qa/L0_model_namespacing/test_dynamic_resolution/subadd_repo/composing_model/1/model.py
new file mode 100644
index 0000000000..664c20b58f
--- /dev/null
+++ b/qa/L0_model_namespacing/test_dynamic_resolution/subadd_repo/composing_model/1/model.py
@@ -0,0 +1,6 @@
+import os
+import sys
+
+# load pre-defined QA model
+sys.path.append(os.environ["TRITON_QA_PYTHON_MODEL_DIR"])
+from python_subadd import *
diff --git a/qa/L0_model_namespacing/test_dynamic_resolution/subadd_repo/simple_subadd/config.pbtxt b/qa/L0_model_namespacing/test_dynamic_resolution/subadd_repo/simple_subadd/config.pbtxt
new file mode 100644
index 0000000000..245e256976
--- /dev/null
+++ b/qa/L0_model_namespacing/test_dynamic_resolution/subadd_repo/simple_subadd/config.pbtxt
@@ -0,0 +1,90 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+platform: "ensemble"
+max_batch_size: 0
+version_policy: { all { }}
+
+
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+
+  }
+]
+input [
+  {
+    name: "INPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+
+
+  }
+]
+output [
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+
+
+  }
+]
+ensemble_scheduling {
+  step [
+    {
+      model_name: "composing_model"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "OUTPUT0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "OUTPUT1"
+      }
+    }
+  ]
+}
diff --git a/qa/L0_model_namespacing/test_ensemble_duplication/addsub_repo/composing_addsub/1/model.py b/qa/L0_model_namespacing/test_ensemble_duplication/addsub_repo/composing_addsub/1/model.py
new file mode 100644
index 0000000000..13a611e7a3
--- /dev/null
+++ b/qa/L0_model_namespacing/test_ensemble_duplication/addsub_repo/composing_addsub/1/model.py
@@ -0,0 +1,6 @@
+import os
+import sys
+
+# load pre-defined QA model
+sys.path.append(os.environ["TRITON_QA_PYTHON_MODEL_DIR"])
+from python_addsub import *
diff --git a/qa/L0_model_namespacing/test_ensemble_duplication/addsub_repo/simple_ensemble/config.pbtxt b/qa/L0_model_namespacing/test_ensemble_duplication/addsub_repo/simple_ensemble/config.pbtxt
new file mode 100644
index 0000000000..2a9f0003a3
--- /dev/null
+++ b/qa/L0_model_namespacing/test_ensemble_duplication/addsub_repo/simple_ensemble/config.pbtxt
@@ -0,0 +1,90 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+platform: "ensemble"
+max_batch_size: 0
+version_policy: { all { }}
+
+
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+
+  }
+]
+input [
+  {
+    name: "INPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+
+
+  }
+]
+output [
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+
+
+  }
+]
+ensemble_scheduling {
+  step [
+    {
+      model_name: "composing_addsub"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "OUTPUT0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "OUTPUT1"
+      }
+    }
+  ]
+}
diff --git a/qa/L0_model_namespacing/test_ensemble_duplication/subadd_repo/composing_subadd/1/model.py b/qa/L0_model_namespacing/test_ensemble_duplication/subadd_repo/composing_subadd/1/model.py
new file mode 100644
index 0000000000..664c20b58f
--- /dev/null
+++ b/qa/L0_model_namespacing/test_ensemble_duplication/subadd_repo/composing_subadd/1/model.py
@@ -0,0 +1,6 @@
+import os
+import sys
+
+# load pre-defined QA model
+sys.path.append(os.environ["TRITON_QA_PYTHON_MODEL_DIR"])
+from python_subadd import *
diff --git a/qa/L0_model_namespacing/test_ensemble_duplication/subadd_repo/simple_ensemble/config.pbtxt b/qa/L0_model_namespacing/test_ensemble_duplication/subadd_repo/simple_ensemble/config.pbtxt
new file mode 100644
index 0000000000..0ee1015f25
--- /dev/null
+++ b/qa/L0_model_namespacing/test_ensemble_duplication/subadd_repo/simple_ensemble/config.pbtxt
@@ -0,0 +1,90 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+platform: "ensemble"
+max_batch_size: 0
+version_policy: { all { }}
+
+
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+
+  }
+]
+input [
+  {
+    name: "INPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+
+
+  }
+]
+output [
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+
+
+  }
+]
+ensemble_scheduling {
+  step [
+    {
+      model_name: "composing_subadd"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "OUTPUT0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "OUTPUT1"
+      }
+    }
+  ]
+}
diff --git a/qa/L0_model_namespacing/test_no_duplication/addsub_repo/composing_addsub/1/model.py b/qa/L0_model_namespacing/test_no_duplication/addsub_repo/composing_addsub/1/model.py
new file mode 100644
index 0000000000..13a611e7a3
--- /dev/null
+++ b/qa/L0_model_namespacing/test_no_duplication/addsub_repo/composing_addsub/1/model.py
@@ -0,0 +1,6 @@
+import os
+import sys
+
+# load pre-defined QA model
+sys.path.append(os.environ["TRITON_QA_PYTHON_MODEL_DIR"])
+from python_addsub import *
diff --git a/qa/L0_model_namespacing/test_no_duplication/addsub_repo/simple_addsub/config.pbtxt b/qa/L0_model_namespacing/test_no_duplication/addsub_repo/simple_addsub/config.pbtxt
new file mode 100644
index 0000000000..2a9f0003a3
--- /dev/null
+++ b/qa/L0_model_namespacing/test_no_duplication/addsub_repo/simple_addsub/config.pbtxt
@@ -0,0 +1,90 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+platform: "ensemble"
+max_batch_size: 0
+version_policy: { all { }}
+
+
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+
+  }
+]
+input [
+  {
+    name: "INPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+
+
+  }
+]
+output [
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+
+
+  }
+]
+ensemble_scheduling {
+  step [
+    {
+      model_name: "composing_addsub"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "OUTPUT0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "OUTPUT1"
+      }
+    }
+  ]
+}
diff --git a/qa/L0_model_namespacing/test_no_duplication/subadd_repo/composing_subadd/1/model.py b/qa/L0_model_namespacing/test_no_duplication/subadd_repo/composing_subadd/1/model.py
new file mode 100644
index 0000000000..664c20b58f
--- /dev/null
+++ b/qa/L0_model_namespacing/test_no_duplication/subadd_repo/composing_subadd/1/model.py
@@ -0,0 +1,6 @@
+import os
+import sys
+
+# load pre-defined QA model
+sys.path.append(os.environ["TRITON_QA_PYTHON_MODEL_DIR"])
+from python_subadd import *
diff --git a/qa/L0_model_namespacing/test_no_duplication/subadd_repo/simple_subadd/config.pbtxt b/qa/L0_model_namespacing/test_no_duplication/subadd_repo/simple_subadd/config.pbtxt
new file mode 100644
index 0000000000..0ee1015f25
--- /dev/null
+++ b/qa/L0_model_namespacing/test_no_duplication/subadd_repo/simple_subadd/config.pbtxt
@@ -0,0 +1,90 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+platform: "ensemble"
+max_batch_size: 0
+version_policy: { all { }}
+
+
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+
+  }
+]
+input [
+  {
+    name: "INPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+
+
+  }
+]
+output [
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+
+
+  }
+]
+ensemble_scheduling {
+  step [
+    {
+      model_name: "composing_subadd"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "OUTPUT0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "OUTPUT1"
+      }
+    }
+  ]
+}
diff --git a/qa/L0_model_queue/ensemble_zero_1_float32/config.pbtxt b/qa/L0_model_queue/ensemble_zero_1_float32/config.pbtxt
new file mode 100644
index 0000000000..8cf3d53e79
--- /dev/null
+++ b/qa/L0_model_queue/ensemble_zero_1_float32/config.pbtxt
@@ -0,0 +1,59 @@
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "ensemble_zero_1_float32"
+platform: "ensemble"
+max_batch_size: 32
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+ensemble_scheduling {
+  step [
+    {
+      model_name: "custom_zero_1_float32"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT0"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "OUTPUT0"
+      }
+    }
+  ]
+}
\ No newline at end of file
diff --git a/qa/L0_model_queue/model_queue_test.py b/qa/L0_model_queue/model_queue_test.py
new file mode 100755
index 0000000000..e7be471f79
--- /dev/null
+++ b/qa/L0_model_queue/model_queue_test.py
@@ -0,0 +1,629 @@
+#!/usr/bin/env python3
+
+# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import threading
+import time
+import unittest
+from builtins import range
+from ctypes import *
+
+import infer_util as iu
+import numpy as np
+import test_util as tu
+from tritonclientutils import InferenceServerException
+
+_max_queue_delay_ms = 10000
+
+_deferred_exceptions_lock = threading.Lock()
+_deferred_exceptions = []
+
+
+class ModelQueueTest(tu.TestResultCollector):
+    def setUp(self):
+        self.trials_ = []
+        for base in ["custom", "ensemble"]:
+            for is_http_trial in [True, False]:
+                self.trials_.append({"base": base, "is_http_trial": is_http_trial})
+        global _deferred_exceptions
+        _deferred_exceptions = []
+
+    def add_deferred_exception(self, ex):
+        global _deferred_exceptions
+        with _deferred_exceptions_lock:
+            _deferred_exceptions.append(ex)
+
+    def check_deferred_exception(self):
+        # Just raise one of the exceptions...
+        with _deferred_exceptions_lock:
+            if len(_deferred_exceptions) > 0:
+                first_exception = _deferred_exceptions[0]
+                _deferred_exceptions.pop(0)
+                raise first_exception
+
+    def check_response(
+        self,
+        bs,
+        dtype,
+        shapes,
+        priority,
+        timeout_us,
+        thresholds,
+        base="custom",
+        is_http_trial=True,
+    ):
+        full_shapes = [
+            [
+                bs,
+            ]
+            + shape
+            for shape in shapes
+        ]
+        try:
+            start_ms = int(round(time.time() * 1000))
+            iu.infer_zero(
+                self,
+                base,
+                bs,
+                dtype,
+                full_shapes,
+                full_shapes,
+                model_version=1,
+                use_http_json_tensors=False,
+                use_http=is_http_trial,
+                use_grpc=(not is_http_trial),
+                use_streaming=False,
+                priority=priority,
+                timeout_us=timeout_us,
+            )
+
+            end_ms = int(round(time.time() * 1000))
+
+            lt_ms = thresholds[0]
+            gt_ms = thresholds[1]
+            if lt_ms is not None:
+                self.assertTrue(
+                    (end_ms - start_ms) < lt_ms,
+                    "expected less than "
+                    + str(lt_ms)
+                    + "ms response time, got "
+                    + str(end_ms - start_ms)
+                    + " ms",
+                )
+            if gt_ms is not None:
+                self.assertTrue(
+                    (end_ms - start_ms) > gt_ms,
+                    "expected greater than "
+                    + str(gt_ms)
+                    + "ms response time, got "
+                    + str(end_ms - start_ms)
+                    + " ms",
+                )
+        except Exception as ex:
+            self.add_deferred_exception(ex)
+
+    def test_max_queue_size(self):
+        # Send a request with a static batch size == preferred size to trigger
+        # model execution. Then sends 10 requests to overload the model queue,
+        # expecting 2 of the requests are returned with error code immediately.
+        dtype = np.float32
+        shapes = ([16],)
+
+        for trial in self.trials_:
+            preceding_thread = threading.Thread(
+                target=self.check_response,
+                args=(8, dtype, shapes, 0, 0, (5999, 1000)),
+            )
+            threads = []
+            for i in range(10):
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(1, dtype, shapes, 0, 0, (None, None)),
+                        kwargs=trial,
+                    )
+                )
+            preceding_thread.start()
+            time.sleep(0.5)
+            for t in threads:
+                t.start()
+
+            preceding_thread.join()
+            for t in threads:
+                t.join()
+
+            # Expect exactly two exception with exceeding max queue size error
+            expected_exceeded_count = 2
+            exceeded_count = 0
+            for i in range(expected_exceeded_count):
+                try:
+                    self.check_deferred_exception()
+                except InferenceServerException as ex:
+                    self.assertTrue(
+                        "Exceeds maximum queue size" in ex.message(),
+                        'Expected error message "Exceeds maximum queue size", got: {}'.format(
+                            ex
+                        ),
+                    )
+                    exceeded_count = exceeded_count + 1
+            self.assertEqual(
+                exceeded_count,
+                expected_exceeded_count,
+                "expected {} requests to fail with exceeded max queue size error, got {}".format(
+                    expected_exceeded_count, exceeded_count
+                ),
+            )
+            try:
+                self.check_deferred_exception()
+            except InferenceServerException as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_policy_delay(self):
+        # Send requests with batch sizes 1, 1, 3 where the second and third
+        # requests are sent after 'default_timeout_microseconds'.
+        # Expect the first request is timed-out and delayed, which makes the
+        # second and third request be batched together and executed. While the
+        # first request must wait for 'max_queue_delay_microseconds' until it
+        # can be executed.
+        dtype = np.float32
+        shapes = ([16],)
+        for trial in self.trials_:
+            try:
+                threads = []
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(1, dtype, shapes, 0, 0, (15000, 10000)),
+                        kwargs=trial,
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(2, dtype, shapes, 0, 0, (100, 0)),
+                        kwargs=trial,
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_response,
+                        args=(2, dtype, shapes, 0, 0, (100, 0)),
+                        kwargs=trial,
+                    )
+                )
+                threads[0].start()
+                time.sleep(0.2)
+                threads[1].start()
+                threads[2].start()
+
+                for t in threads:
+                    t.join()
+                self.check_deferred_exception()
+            except InferenceServerException as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_policy_reject(self):
+        # Send requests with batch sizes 1, 1, 3 where the second and third
+        # requests are sent after 'default_timeout_microseconds'.
+        # Expect the first request is timed-out and rejected, which makes the
+        # second and third request be batched together and executed.
+        dtype = np.float32
+        shapes = ([16],)
+        for trial in self.trials_:
+            threads = []
+            threads.append(
+                threading.Thread(
+                    target=self.check_response,
+                    args=(1, dtype, shapes, 0, 0, (None, None)),
+                    kwargs=trial,
+                )
+            )
+            threads.append(
+                threading.Thread(
+                    target=self.check_response,
+                    args=(2, dtype, shapes, 0, 0, (100, 0)),
+                    kwargs=trial,
+                )
+            )
+            threads.append(
+                threading.Thread(
+                    target=self.check_response,
+                    args=(2, dtype, shapes, 0, 0, (100, 0)),
+                    kwargs=trial,
+                )
+            )
+            threads[0].start()
+            time.sleep(0.2)
+            threads[1].start()
+            threads[2].start()
+
+            for t in threads:
+                t.join()
+
+            # Expect only one error for rejection
+            try:
+                self.check_deferred_exception()
+            except InferenceServerException as ex:
+                self.assertTrue(
+                    "Request timeout expired" in ex.message(),
+                    'Expected error message "Request timeout expired", got: {}'.format(
+                        ex
+                    ),
+                )
+
+            try:
+                self.check_deferred_exception()
+            except InferenceServerException as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_timeout_override(self):
+        # Send requests with batch sizes 1, 1, 3 where the first request
+        # overrides the timeout to be less than 'default_timeout_microseconds',
+        # and the second and third requests are sent after the overridden
+        # timeout. Expect the first request is timed-out and rejected before
+        # 'default_timeout_microseconds', which makes the second and third
+        # request be batched together and executed earlier than
+        # 'default_timeout_microseconds'.
+
+        dtype = np.float32
+        shapes = ([16],)
+        for trial in self.trials_:
+            threads = []
+            threads.append(
+                threading.Thread(
+                    target=self.check_response,
+                    args=(1, dtype, shapes, 0, 100000, (None, None)),
+                    kwargs=trial,
+                )
+            )
+            threads.append(
+                threading.Thread(
+                    target=self.check_response,
+                    args=(2, dtype, shapes, 0, 0, (100, 0)),
+                    kwargs=trial,
+                )
+            )
+            threads.append(
+                threading.Thread(
+                    target=self.check_response,
+                    args=(2, dtype, shapes, 0, 0, (100, 0)),
+                    kwargs=trial,
+                )
+            )
+            threads[0].start()
+            time.sleep(0.2)
+            threads[1].start()
+            threads[2].start()
+
+            for t in threads:
+                t.join()
+
+            # Expect only one error for rejection
+            try:
+                self.check_deferred_exception()
+            except InferenceServerException as ex:
+                self.assertTrue(
+                    "Request timeout expired" in ex.message(),
+                    'Expected error message "Request timeout expired", got: {}'.format(
+                        ex
+                    ),
+                )
+
+            try:
+                self.check_deferred_exception()
+            except InferenceServerException as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+            # Check that timeout larger than 'default_timeout_microseconds' will not
+            # override, the last two requests will be processed only after
+            # 'default_timeout_microseconds' and before queue delay.
+            threads = []
+            threads.append(
+                threading.Thread(
+                    target=self.check_response,
+                    args=(1, dtype, shapes, 0, 10000000, (None, None)),
+                    kwargs=trial,
+                )
+            )
+            threads.append(
+                threading.Thread(
+                    target=self.check_response,
+                    args=(2, dtype, shapes, 0, 0, (1100, 700)),
+                    kwargs=trial,
+                )
+            )
+            threads.append(
+                threading.Thread(
+                    target=self.check_response,
+                    args=(2, dtype, shapes, 0, 0, (1100, 700)),
+                    kwargs=trial,
+                )
+            )
+            threads[0].start()
+            time.sleep(0.2)
+            threads[1].start()
+            threads[2].start()
+
+            for t in threads:
+                t.join()
+
+            # Expect only one error for rejection
+            try:
+                self.check_deferred_exception()
+            except InferenceServerException as ex:
+                self.assertTrue(
+                    "Request timeout expired" in ex.message(),
+                    'Expected error message "Request timeout expired", got: {}'.format(
+                        ex
+                    ),
+                )
+
+            try:
+                self.check_deferred_exception()
+            except InferenceServerException as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+            # Sanity check that without override, the last two requests will be
+            # processed only after 'default_timeout_microseconds'
+            threads = []
+            threads.append(
+                threading.Thread(
+                    target=self.check_response,
+                    args=(1, dtype, shapes, 0, 0, (None, None)),
+                    kwargs=trial,
+                )
+            )
+            threads.append(
+                threading.Thread(
+                    target=self.check_response,
+                    args=(2, dtype, shapes, 0, 0, (1100, 700)),
+                    kwargs=trial,
+                )
+            )
+            threads.append(
+                threading.Thread(
+                    target=self.check_response,
+                    args=(2, dtype, shapes, 0, 0, (1100, 700)),
+                    kwargs=trial,
+                )
+            )
+            threads[0].start()
+            time.sleep(0.2)
+            threads[1].start()
+            threads[2].start()
+
+            for t in threads:
+                t.join()
+
+            # Expect only one error for rejection
+            try:
+                self.check_deferred_exception()
+            except InferenceServerException as ex:
+                self.assertTrue(
+                    "Request timeout expired" in ex.message(),
+                    'Expected error message "Request timeout expired", got: {}'.format(
+                        ex
+                    ),
+                )
+
+            try:
+                self.check_deferred_exception()
+            except InferenceServerException as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_priority_levels(self):
+        # Send 2 requests with batch sizes 2, 1 in default priority. Then send
+        # 1 request with batch size 2 in priority 1. Expect the third request is
+        # place in the front of the queue and form a preferred batch with the
+        # first request.
+        dtype = np.float32
+        shapes = ([16],)
+        for trial in self.trials_:
+            threads = []
+            threads.append(
+                threading.Thread(
+                    target=self.check_response,
+                    args=(2, dtype, shapes, 0, 0, (500, 200)),
+                    kwargs=trial,
+                )
+            )
+            threads.append(
+                threading.Thread(
+                    target=self.check_response,
+                    args=(1, dtype, shapes, 0, 0, (15000, 10000)),
+                    kwargs=trial,
+                )
+            )
+            threads.append(
+                threading.Thread(
+                    target=self.check_response,
+                    args=(2, dtype, shapes, 1, 0, (100, 0)),
+                    kwargs=trial,
+                )
+            )
+            threads[0].start()
+            # wait to make sure the order is correct
+            time.sleep(0.1)
+            threads[1].start()
+            time.sleep(0.2)
+            threads[2].start()
+
+            for t in threads:
+                t.join()
+
+            try:
+                self.check_deferred_exception()
+            except InferenceServerException as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_max_priority_levels(self):
+        # Send 2 requests with batch sizes 2, 1 in default priority (MAX_UINT32+1). Then send
+        # 1 request with batch size 2 in priority 1. Expect the third request is
+        # place in the front of the queue and form a preferred batch with the
+        # first request.
+        dtype = np.float32
+        shapes = ([16],)
+        MAX_UINT32_PLUS_1 = 4294967296
+        for trial in self.trials_:
+            threads = []
+            threads.append(
+                threading.Thread(
+                    target=self.check_response,
+                    args=(2, dtype, shapes, 0, 0, (500, 200)),
+                    kwargs=trial,
+                )
+            )
+            threads.append(
+                threading.Thread(
+                    target=self.check_response,
+                    args=(1, dtype, shapes, MAX_UINT32_PLUS_1, 0, (15000, 10000)),
+                    kwargs=trial,
+                )
+            )
+            threads.append(
+                threading.Thread(
+                    target=self.check_response,
+                    args=(2, dtype, shapes, 1, 0, (100, 0)),
+                    kwargs=trial,
+                )
+            )
+            threads[0].start()
+            # wait to make sure the order is correct
+            time.sleep(0.1)
+            threads[1].start()
+            time.sleep(0.2)
+            threads[2].start()
+
+            for t in threads:
+                t.join()
+
+            try:
+                self.check_deferred_exception()
+            except InferenceServerException as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_priority_with_policy(self):
+        # Two set of requests are being sent at different priority levels
+        # in sequence:
+        # priority 1:
+        #     batch size 2, default timeout
+        #     batch size 1, short timeout
+        #     batch size 2, default timeout
+        # priority 2:
+        #     batch size 2, medium timeout
+        #     batch size 3, default timeout
+        #     batch size 6, default timeout
+        # Expecting that by the time when the last request, second request in
+        # priority 2, is sent, the requests with short timeout will be handled
+        # accordingly, and the queue becomes:
+        # priority 1:
+        #     batch size 2, default timeout (1st batch)
+        #     batch size 2, default timeout (1st batch)
+        #     batch size 1, short timeout (delayed, will be 2nd batch)
+        # priority 2:
+        #     batch size 2, medium timeout (will be rejected)
+        #     batch size 3, default timeout (will be 2nd batch)
+        #     batch size 6, default timeout (will be 3rd batch)
+
+        dtype = np.float32
+        shapes = ([16],)
+        for trial in self.trials_:
+            threads = []
+            # The expected ranges may not be rounded to accommodate
+            # the sleep between sending requests
+            threads.append(
+                threading.Thread(
+                    target=self.check_response,
+                    args=(2, dtype, shapes, 1, 0, (2000, 1000)),
+                    kwargs=trial,
+                )
+            )
+            threads.append(
+                threading.Thread(
+                    target=self.check_response,
+                    args=(1, dtype, shapes, 1, 1000000, (3400, 2400)),
+                    kwargs=trial,
+                )
+            )
+            threads.append(
+                threading.Thread(
+                    target=self.check_response,
+                    args=(2, dtype, shapes, 1, 0, (1700, 700)),
+                    kwargs=trial,
+                )
+            )
+            threads.append(
+                threading.Thread(
+                    target=self.check_response,
+                    args=(2, dtype, shapes, 2, 2000000, (None, None)),
+                    kwargs=trial,
+                )
+            )
+            threads.append(
+                threading.Thread(
+                    target=self.check_response,
+                    args=(3, dtype, shapes, 2, 0, (2700, 1700)),
+                    kwargs=trial,
+                )
+            )
+            threads.append(
+                threading.Thread(
+                    target=self.check_response,
+                    args=(6, dtype, shapes, 2, 0, (15000, 10000)),
+                    kwargs=trial,
+                )
+            )
+            for t in threads:
+                t.start()
+                time.sleep(0.2)
+
+            for t in threads:
+                t.join()
+
+            # Expect only one error for rejection
+            try:
+                self.check_deferred_exception()
+            except InferenceServerException as ex:
+                self.assertTrue(
+                    "Request timeout expired" in ex.message(),
+                    'Expected error message "Request timeout expired", got: {}'.format(
+                        ex
+                    ),
+                )
+
+            try:
+                self.check_deferred_exception()
+            except InferenceServerException as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_model_queue/test.sh b/qa/L0_model_queue/test.sh
new file mode 100755
index 0000000000..577b7b7fc2
--- /dev/null
+++ b/qa/L0_model_queue/test.sh
@@ -0,0 +1,394 @@
+#!/bin/bash
+# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+TEST_RESULT_FILE='test_results.txt'
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+CLIENT_LOG="./client.log"
+MODEL_QUEUE_TEST=model_queue_test.py
+
+DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"}
+TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
+SERVER=${TRITON_DIR}/bin/tritonserver
+
+SERVER_ARGS="--model-repository=`pwd`/models"
+
+source ../common/util.sh
+
+RET=0
+
+# Must run on a single device or else the TRITONSERVER_DELAY_SCHEDULER
+# can fail when the requests are distributed to multiple devices.
+export CUDA_VISIBLE_DEVICES=0
+
+# Prepare base model. Only test with custom backend as it is sufficient
+rm -fr *.log  models custom_zero_1_float32
+cp -r ../custom_models/custom_zero_1_float32 . && \
+    mkdir -p ./custom_zero_1_float32/1 && \
+    mkdir -p ./ensemble_zero_1_float32/1
+
+(cd custom_zero_1_float32 && \
+        sed -i "s/dims:.*\[.*\]/dims: \[ -1 \]/g" config.pbtxt && \
+        sed -i "s/max_batch_size:.*/max_batch_size: 32/g" config.pbtxt && \
+        echo "instance_group [ { kind: KIND_CPU count: 1 }]" >> config.pbtxt)
+
+# test_max_queue_size
+# For testing max queue size, we use delay in the custom model to
+# create backlogs, "TRITONSERVER_DELAY_SCHEDULER" is not desired as queue size
+# is capped by max queue size.
+rm -fr models && mkdir models && \
+    cp -r ensemble_zero_1_float32 models/. && \
+    cp -r custom_zero_1_float32 models/. && \
+    (cd models/custom_zero_1_float32 && \
+        echo "dynamic_batching { " >> config.pbtxt && \
+        echo "    preferred_batch_size: [ 4, 8 ]" >> config.pbtxt && \
+        echo "    default_queue_policy {" >> config.pbtxt && \
+        echo "        max_queue_size: 8" >> config.pbtxt && \
+        echo "    }" >> config.pbtxt && \
+        echo "}" >> config.pbtxt && \
+        echo "parameters [" >> config.pbtxt && \
+        echo "{ key: \"execute_delay_ms\"; value: { string_value: \"5000\" }}" >> config.pbtxt && \
+        echo "]" >> config.pbtxt)
+
+TEST_CASE=test_max_queue_size
+SERVER_LOG="./$TEST_CASE.server.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+echo "Test: $TEST_CASE" >>$CLIENT_LOG
+
+set +e
+python $MODEL_QUEUE_TEST ModelQueueTest.$TEST_CASE >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# test_policy_delay
+rm -fr models && mkdir models && \
+    cp -r ensemble_zero_1_float32 models/. && \
+    cp -r custom_zero_1_float32 models/. && \
+    (cd models/custom_zero_1_float32 && \
+        echo "dynamic_batching { " >> config.pbtxt && \
+        echo "    preferred_batch_size: [ 4, 8 ]" >> config.pbtxt && \
+        echo "    max_queue_delay_microseconds: 10000000" >> config.pbtxt && \
+        echo "    default_queue_policy {" >> config.pbtxt && \
+        echo "        timeout_action: DELAY" >> config.pbtxt && \
+        echo "        default_timeout_microseconds: 100000" >> config.pbtxt && \
+        echo "    }" >> config.pbtxt && \
+        echo "}" >> config.pbtxt)
+
+TEST_CASE=test_policy_delay
+SERVER_LOG="./$TEST_CASE.server.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+echo "Test: $TEST_CASE" >>$CLIENT_LOG
+
+set +e
+python $MODEL_QUEUE_TEST ModelQueueTest.$TEST_CASE >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# test_policy_reject
+rm -fr models && mkdir models && \
+    cp -r ensemble_zero_1_float32 models/. && \
+    cp -r custom_zero_1_float32 models/. && \
+    (cd models/custom_zero_1_float32 && \
+        echo "dynamic_batching { " >> config.pbtxt && \
+        echo "    preferred_batch_size: [ 4, 8 ]" >> config.pbtxt && \
+        echo "    max_queue_delay_microseconds: 10000000" >> config.pbtxt && \
+        echo "    default_queue_policy {" >> config.pbtxt && \
+        echo "        default_timeout_microseconds: 100000" >> config.pbtxt && \
+        echo "    }" >> config.pbtxt && \
+        echo "}" >> config.pbtxt)
+
+TEST_CASE=test_policy_reject
+SERVER_LOG="./$TEST_CASE.server.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+echo "Test: $TEST_CASE" >>$CLIENT_LOG
+
+set +e
+python $MODEL_QUEUE_TEST ModelQueueTest.$TEST_CASE >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# test_timeout_override
+rm -fr models && mkdir models && \
+    cp -r ensemble_zero_1_float32 models/. && \
+    cp -r custom_zero_1_float32 models/. && \
+    (cd models/custom_zero_1_float32 && \
+        echo "dynamic_batching { " >> config.pbtxt && \
+        echo "    preferred_batch_size: [ 4, 8 ]" >> config.pbtxt && \
+        echo "    max_queue_delay_microseconds: 10000000" >> config.pbtxt && \
+        echo "    default_queue_policy {" >> config.pbtxt && \
+        echo "        allow_timeout_override: true" >> config.pbtxt && \
+        echo "        default_timeout_microseconds: 1000000" >> config.pbtxt && \
+        echo "    }" >> config.pbtxt && \
+        echo "}" >> config.pbtxt)
+
+TEST_CASE=test_timeout_override
+SERVER_LOG="./$TEST_CASE.server.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+echo "Test: $TEST_CASE" >>$CLIENT_LOG
+
+set +e
+python $MODEL_QUEUE_TEST ModelQueueTest.$TEST_CASE >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# test_priority_levels
+rm -fr models && mkdir models && \
+    cp -r ensemble_zero_1_float32 models/. && \
+    cp -r custom_zero_1_float32 models/. && \
+    (cd models/custom_zero_1_float32 && \
+        echo "dynamic_batching { " >> config.pbtxt && \
+        echo "    preferred_batch_size: [ 4, 8 ]" >> config.pbtxt && \
+        echo "    max_queue_delay_microseconds: 10000000" >> config.pbtxt && \
+        echo "    priority_levels: 2" >> config.pbtxt && \
+        echo "    default_priority_level: 2" >> config.pbtxt && \
+        echo "}" >> config.pbtxt)
+
+TEST_CASE=test_priority_levels
+SERVER_LOG="./$TEST_CASE.server.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+echo "Test: $TEST_CASE" >>$CLIENT_LOG
+
+set +e
+python $MODEL_QUEUE_TEST ModelQueueTest.$TEST_CASE >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+MAX_UINT64=18446744073709551615
+MAX_UINT32_PLUS_1=4294967296
+
+# test_max_priority_levels
+rm -fr models && mkdir models && \
+    cp -r ensemble_zero_1_float32 models/. && \
+    cp -r custom_zero_1_float32 models/. && \
+    (cd models/custom_zero_1_float32 && \
+        echo "dynamic_batching { " >> config.pbtxt && \
+        echo "    preferred_batch_size: [ 4, 8 ]" >> config.pbtxt && \
+        echo "    max_queue_delay_microseconds: 10000000" >> config.pbtxt && \
+        echo "    priority_levels: $MAX_UINT64" >> config.pbtxt && \
+        echo "    default_priority_level: $MAX_UINT32_PLUS_1" >> config.pbtxt && \
+        echo "}" >> config.pbtxt)
+
+TEST_CASE=test_max_priority_levels
+SERVER_LOG="./$TEST_CASE.server.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+echo "Test: $TEST_CASE" >>$CLIENT_LOG
+
+set +e
+python $MODEL_QUEUE_TEST ModelQueueTest.$TEST_CASE >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# test_priority_with_policy
+# 2 levels and 2 policies:
+#     priority 1: delay
+#     priority 2: reject
+rm -fr models && mkdir models && \
+    cp -r ensemble_zero_1_float32 models/. && \
+    cp -r custom_zero_1_float32 models/. && \
+    (cd models/custom_zero_1_float32 && \
+        echo "dynamic_batching { " >> config.pbtxt && \
+        echo "    preferred_batch_size: [ 4, 8, 32 ]" >> config.pbtxt && \
+        echo "    max_queue_delay_microseconds: 10000000" >> config.pbtxt && \
+        echo "    priority_levels: 2" >> config.pbtxt && \
+        echo "    default_priority_level: 2" >> config.pbtxt && \
+        echo "    default_queue_policy {" >> config.pbtxt && \
+        echo "        timeout_action: DELAY" >> config.pbtxt && \
+        echo "        allow_timeout_override: true" >> config.pbtxt && \
+        echo "        default_timeout_microseconds: 11000000" >> config.pbtxt && \
+        echo "    }" >> config.pbtxt && \
+        echo "    priority_queue_policy {" >> config.pbtxt && \
+        echo "        key: 2" >> config.pbtxt && \
+        echo "        value: {" >> config.pbtxt && \
+        echo "            timeout_action: REJECT" >> config.pbtxt && \
+        echo "            allow_timeout_override: true" >> config.pbtxt && \
+        echo "            default_timeout_microseconds: 11000000" >> config.pbtxt && \
+        echo "        }" >> config.pbtxt && \
+        echo "    }" >> config.pbtxt && \
+        echo "}" >> config.pbtxt)
+
+TEST_CASE=test_priority_with_policy
+SERVER_LOG="./$TEST_CASE.server.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+echo "Test: $TEST_CASE" >>$CLIENT_LOG
+
+set +e
+python $MODEL_QUEUE_TEST ModelQueueTest.$TEST_CASE >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_model_update/instance_update_test.py b/qa/L0_model_update/instance_update_test.py
new file mode 100755
index 0000000000..a3c9ce3201
--- /dev/null
+++ b/qa/L0_model_update/instance_update_test.py
@@ -0,0 +1,649 @@
+#!/usr/bin/env python3
+
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import concurrent.futures
+import json
+import os
+import random
+import time
+import unittest
+
+import numpy as np
+import tritonclient.grpc as grpcclient
+from models.model_init_del.util import (
+    disable_batching,
+    enable_batching,
+    get_count,
+    reset_count,
+    set_delay,
+    update_instance_group,
+    update_model_file,
+    update_sequence_batching,
+)
+from tritonclient.utils import InferenceServerException
+
+
+class TestInstanceUpdate(unittest.TestCase):
+    _model_name = "model_init_del"
+
+    def setUp(self):
+        # Reset counters
+        reset_count("initialize")
+        reset_count("finalize")
+        # Reset batching
+        disable_batching()
+        # Reset delays
+        set_delay("initialize", 0)
+        set_delay("infer", 0)
+        # Reset sequence batching
+        update_sequence_batching("")
+        # Initialize client
+        self._triton = grpcclient.InferenceServerClient("localhost:8001")
+
+    def tearDown(self):
+        # Check if the test passed for this test case that is tearing down
+        r = self.defaultTestResult()
+        self._feedErrorsToResult(r, self._outcome.errors)
+        # Use `r = self._outcome.result` for the above, if Python >= 3.11
+        passed = all(self != test_case for test_case, _ in r.errors + r.failures)
+        if passed:
+            # Do nothing if passed
+            return
+        # Best effort to reset the model state for the next test case
+        self._triton.unload_model(self._model_name)
+        time.sleep(30)  # time for instances to finish unloading
+
+    def _get_inputs(self, batching=False):
+        self.assertIsInstance(batching, bool)
+        if batching:
+            shape = [random.randint(1, 2), random.randint(1, 16)]
+        else:
+            shape = [random.randint(1, 16)]
+        inputs = [grpcclient.InferInput("INPUT0", shape, "FP32")]
+        inputs[0].set_data_from_numpy(np.ones(shape, dtype=np.float32))
+        return inputs
+
+    def _infer(self, batching=False):
+        self._triton.infer(self._model_name, self._get_inputs(batching))
+
+    def _concurrent_infer(self, concurrency=4, batching=False):
+        pool = concurrent.futures.ThreadPoolExecutor()
+        stop = [False]
+
+        def repeat_infer():
+            while not stop[0]:
+                self._infer(batching)
+
+        infer_threads = [pool.submit(repeat_infer) for i in range(concurrency)]
+
+        def stop_infer():
+            stop[0] = True
+            [t.result() for t in infer_threads]
+            pool.shutdown()
+
+        return stop_infer
+
+    def _check_count(self, kind, expected_count, poll=False):
+        self.assertIsInstance(poll, bool)
+        if poll:
+            timeout = 30  # seconds
+            poll_interval = 0.1  # seconds
+            max_retry = timeout / poll_interval
+            num_retry = 0
+            while num_retry < max_retry and get_count(kind) < expected_count:
+                time.sleep(poll_interval)
+                num_retry += 1
+        self.assertEqual(get_count(kind), expected_count)
+
+    def _load_model(self, instance_count, instance_config="", batching=False):
+        # Set batching
+        enable_batching() if batching else disable_batching()
+        # Load model
+        self._update_instance_count(
+            instance_count, 0, instance_config, batching=batching
+        )
+
+    def _update_instance_count(
+        self,
+        add_count,
+        del_count,
+        instance_config="",
+        wait_for_finalize=False,
+        batching=False,
+    ):
+        self.assertIsInstance(add_count, int)
+        self.assertGreaterEqual(add_count, 0)
+        self.assertIsInstance(del_count, int)
+        self.assertGreaterEqual(del_count, 0)
+        self.assertIsInstance(instance_config, str)
+        prev_initialize_count = get_count("initialize")
+        prev_finalize_count = get_count("finalize")
+        new_initialize_count = prev_initialize_count + add_count
+        new_finalize_count = prev_finalize_count + del_count
+        if len(instance_config) == 0:
+            prev_count = prev_initialize_count - prev_finalize_count
+            new_count = prev_count + add_count - del_count
+            instance_config = "{\ncount: " + str(new_count) + "\nkind: KIND_CPU\n}"
+        update_instance_group(instance_config)
+        self._triton.load_model(self._model_name)
+        self._check_count("initialize", new_initialize_count)
+        self._check_count("finalize", new_finalize_count, wait_for_finalize)
+        self._infer(batching)
+
+    def _unload_model(self, batching=False):
+        prev_initialize_count = get_count("initialize")
+        self._triton.unload_model(self._model_name)
+        self._check_count("initialize", prev_initialize_count)
+        self._check_count("finalize", prev_initialize_count, True)
+        with self.assertRaises(InferenceServerException):
+            self._infer(batching)
+
+    # Test add -> remove -> add an instance without batching
+    def test_add_rm_add_instance_no_batching(self):
+        self._load_model(3, batching=False)
+        stop = self._concurrent_infer(batching=False)
+        self._update_instance_count(1, 0, batching=False)  # add
+        self._update_instance_count(0, 1, batching=False)  # remove
+        self._update_instance_count(1, 0, batching=False)  # add
+        stop()
+        self._unload_model(batching=False)
+
+    # Test add -> remove -> add an instance with batching
+    def test_add_rm_add_instance_with_batching(self):
+        self._load_model(4, batching=True)
+        stop = self._concurrent_infer(batching=True)
+        self._update_instance_count(1, 0, batching=True)  # add
+        self._update_instance_count(0, 1, batching=True)  # remove
+        self._update_instance_count(1, 0, batching=True)  # add
+        stop()
+        self._unload_model(batching=True)
+
+    # Test remove -> add -> remove an instance without batching
+    def test_rm_add_rm_instance_no_batching(self):
+        self._load_model(2, batching=False)
+        stop = self._concurrent_infer(batching=False)
+        self._update_instance_count(0, 1, batching=False)  # remove
+        self._update_instance_count(1, 0, batching=False)  # add
+        self._update_instance_count(0, 1, batching=False)  # remove
+        stop()
+        self._unload_model(batching=False)
+
+    # Test remove -> add -> remove an instance with batching
+    def test_rm_add_rm_instance_with_batching(self):
+        self._load_model(3, batching=True)
+        stop = self._concurrent_infer(batching=True)
+        self._update_instance_count(0, 1, batching=True)  # remove
+        self._update_instance_count(1, 0, batching=True)  # add
+        self._update_instance_count(0, 1, batching=True)  # remove
+        stop()
+        self._unload_model(batching=True)
+
+    # Test reduce instance count to zero
+    def test_rm_instance_to_zero(self):
+        self._load_model(1)
+        # Setting instance group count to 0 will be overwritten to 1, so no
+        # instances should be created or removed.
+        self._update_instance_count(0, 0, "{\ncount: 0\nkind: KIND_CPU\n}")
+        self._unload_model()
+
+    # Test add/remove multiple CPU instances at a time
+    def test_cpu_instance_update(self):
+        self._load_model(8)
+        self._update_instance_count(0, 4)  # remove 4 instances
+        self._update_instance_count(0, 3)  # remove 3 instances
+        self._update_instance_count(0, 0)  # no change
+        time.sleep(0.1)  # larger the gap for config.pbtxt timestamp to update
+        self._update_instance_count(2, 0)  # add 2 instances
+        self._update_instance_count(5, 0)  # add 5 instances
+        self._unload_model()
+
+    # Test add/remove multiple GPU instances at a time
+    def test_gpu_instance_update(self):
+        self._load_model(6, "{\ncount: 6\nkind: KIND_GPU\n}")
+        self._update_instance_count(0, 2, "{\ncount: 4\nkind: KIND_GPU\n}")
+        self._update_instance_count(3, 0, "{\ncount: 7\nkind: KIND_GPU\n}")
+        self._unload_model()
+
+    # Test add/remove multiple CPU/GPU instances at a time
+    def test_gpu_cpu_instance_update(self):
+        # Load model with 1 GPU instance and 2 CPU instance
+        self._load_model(
+            3, "{\ncount: 2\nkind: KIND_CPU\n},\n{\ncount: 1\nkind: KIND_GPU\n}"
+        )
+        # Add 2 GPU instance and remove 1 CPU instance
+        self._update_instance_count(
+            2, 1, "{\ncount: 1\nkind: KIND_CPU\n},\n{\ncount: 3\nkind: KIND_GPU\n}"
+        )
+        # Shuffle the instances
+        self._update_instance_count(
+            0, 0, "{\ncount: 3\nkind: KIND_GPU\n},\n{\ncount: 1\nkind: KIND_CPU\n}"
+        )
+        time.sleep(0.1)  # larger the gap for config.pbtxt timestamp to update
+        # Remove 1 GPU instance and add 1 CPU instance
+        self._update_instance_count(
+            1, 1, "{\ncount: 2\nkind: KIND_GPU\n},\n{\ncount: 2\nkind: KIND_CPU\n}"
+        )
+        # Unload model
+        self._unload_model()
+
+    # Test model instance name update
+    def test_instance_name_update(self):
+        # Load 3 instances with 2 different names
+        self._load_model(
+            3,
+            '{\nname: "old_1"\ncount: 1\nkind: KIND_CPU\n},\n{\nname: "old_2"\ncount: 2\nkind: KIND_GPU\n}',
+        )
+        # Change the instance names
+        self._update_instance_count(
+            0,
+            0,
+            '{\nname: "new_1"\ncount: 1\nkind: KIND_CPU\n},\n{\nname: "new_2"\ncount: 2\nkind: KIND_GPU\n}',
+        )
+        # Unload model
+        self._unload_model()
+
+    # Test instance signature grouping
+    def test_instance_signature(self):
+        # Load 2 GPU instances and 3 CPU instances
+        self._load_model(
+            5,
+            '{\nname: "GPU_group"\ncount: 2\nkind: KIND_GPU\n},\n{\nname: "CPU_group"\ncount: 3\nkind: KIND_CPU\n}',
+        )
+        # Flatten the instances representation
+        self._update_instance_count(
+            0,
+            0,
+            '{\nname: "CPU_1"\ncount: 1\nkind: KIND_CPU\n},\n{\nname: "CPU_2_3"\ncount: 2\nkind: KIND_CPU\n},\n{\nname: "GPU_1"\ncount: 1\nkind: KIND_GPU\n},\n{\nname: "GPU_2"\ncount: 1\nkind: KIND_GPU\n}',
+        )
+        time.sleep(0.1)  # larger the gap for config.pbtxt timestamp to update
+        # Consolidate different representations
+        self._update_instance_count(
+            0,
+            0,
+            '{\nname: "CPU_group"\ncount: 3\nkind: KIND_CPU\n},\n{\nname: "GPU_group"\ncount: 2\nkind: KIND_GPU\n}',
+        )
+        time.sleep(0.1)  # larger the gap for config.pbtxt timestamp to update
+        # Flatten the instances representation
+        self._update_instance_count(
+            0,
+            0,
+            '{\nname: "GPU_1"\ncount: 1\nkind: KIND_GPU\n},\n{\nname: "GPU_2"\ncount: 1\nkind: KIND_GPU\n},\n{\nname: "CPU_1"\ncount: 1\nkind: KIND_CPU\n},\n{\nname: "CPU_2"\ncount: 1\nkind: KIND_CPU\n},\n{\nname: "CPU_3"\ncount: 1\nkind: KIND_CPU\n}',
+        )
+        # Unload model
+        self._unload_model()
+
+    # Test instance update with invalid instance group config
+    def test_invalid_config(self):
+        # Load model with 8 instances
+        self._load_model(8)
+        # Set invalid config
+        update_instance_group("--- invalid config ---")
+        with self.assertRaises(InferenceServerException):
+            self._triton.load_model("model_init_del")
+        # Correct config by reducing instances to 4
+        self._update_instance_count(0, 4)
+        # Unload model
+        self._unload_model()
+
+    # Test instance update with model file changed
+    def test_model_file_update(self):
+        self._load_model(5)
+        update_model_file()
+        self._update_instance_count(
+            6, 5, "{\ncount: 6\nkind: KIND_CPU\n}", wait_for_finalize=True
+        )
+        self._unload_model()
+
+    # Test instance update with non instance config changed in config.pbtxt
+    def test_non_instance_config_update(self):
+        self._load_model(4, batching=False)
+        enable_batching()
+        self._update_instance_count(
+            2,
+            4,
+            "{\ncount: 2\nkind: KIND_CPU\n}",
+            wait_for_finalize=True,
+            batching=True,
+        )
+        self._unload_model(batching=True)
+
+    # Test passing new instance config via load API
+    def test_load_api_with_config(self):
+        # Load model with 1 instance
+        self._load_model(1)
+        # Get the model config from Triton
+        config = self._triton.get_model_config(self._model_name, as_json=True)
+        self.assertIn("config", config)
+        self.assertIsInstance(config["config"], dict)
+        config = config["config"]
+        self.assertIn("instance_group", config)
+        self.assertIsInstance(config["instance_group"], list)
+        self.assertEqual(len(config["instance_group"]), 1)
+        self.assertIn("count", config["instance_group"][0])
+        self.assertIsInstance(config["instance_group"][0]["count"], int)
+        # Add an extra instance into the model config
+        config["instance_group"][0]["count"] += 1
+        self.assertEqual(config["instance_group"][0]["count"], 2)
+        # Load the extra instance via the load API
+        self._triton.load_model(self._model_name, config=json.dumps(config))
+        self._check_count("initialize", 2)  # 2 instances in total
+        self._check_count("finalize", 0)  # no instance is removed
+        self._infer()
+        # Unload model
+        self._unload_model()
+
+    # Test instance update with an ongoing inference
+    def test_update_while_inferencing(self):
+        # Load model with 1 instance
+        self._load_model(1)
+        # Add 1 instance while inferencing
+        set_delay("infer", 10)
+        update_instance_group("{\ncount: 2\nkind: KIND_CPU\n}")
+        with concurrent.futures.ThreadPoolExecutor() as pool:
+            infer_start_time = time.time()
+            infer_thread = pool.submit(self._infer)
+            time.sleep(2)  # make sure inference has started
+            update_start_time = time.time()
+            update_thread = pool.submit(self._triton.load_model, self._model_name)
+            update_thread.result()
+            update_end_time = time.time()
+            infer_thread.result()
+            infer_end_time = time.time()
+        infer_time = infer_end_time - infer_start_time
+        update_time = update_end_time - update_start_time
+        # Adding a new instance does not depend on existing instances, so the
+        # ongoing inference should not block the update.
+        self.assertGreaterEqual(infer_time, 10.0, "Invalid infer time")
+        self.assertLess(update_time, 5.0, "Update blocked by infer")
+        self._check_count("initialize", 2)
+        self._check_count("finalize", 0)
+        self._infer()
+        # Unload model
+        self._unload_model()
+
+    # Test inference with an ongoing instance update
+    def test_infer_while_updating(self):
+        # Load model with 1 instance
+        self._load_model(1)
+        # Infer while adding 1 instance
+        set_delay("initialize", 10)
+        update_instance_group("{\ncount: 2\nkind: KIND_CPU\n}")
+        with concurrent.futures.ThreadPoolExecutor() as pool:
+            update_start_time = time.time()
+            update_thread = pool.submit(self._triton.load_model, self._model_name)
+            time.sleep(2)  # make sure update has started
+            infer_start_time = time.time()
+            infer_thread = pool.submit(self._infer)
+            infer_thread.result()
+            infer_end_time = time.time()
+            update_thread.result()
+            update_end_time = time.time()
+        update_time = update_end_time - update_start_time
+        infer_time = infer_end_time - infer_start_time
+        # Waiting on new instance creation should not block inference on
+        # existing instances.
+        self.assertGreaterEqual(update_time, 10.0, "Invalid update time")
+        self.assertLess(infer_time, 5.0, "Infer blocked by update")
+        self._check_count("initialize", 2)
+        self._check_count("finalize", 0)
+        self._infer()
+        # Unload model
+        self._unload_model()
+
+    # Test instance resource requirement increase
+    @unittest.skipUnless(
+        "execution_count" in os.environ["RATE_LIMIT_MODE"],
+        "Rate limiter precondition not met for this test",
+    )
+    def test_instance_resource_increase(self):
+        # Load model
+        self._load_model(
+            1,
+            '{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: "R1"\ncount: 2\n}\n]\n}\n}',
+        )
+        # Increase resource requirement
+        self._update_instance_count(
+            1,
+            1,
+            '{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: "R1"\ncount: 8\n}\n]\n}\n}',
+        )
+        # Check the model is not blocked from infer due to the default resource
+        # possibly not updated to the larger resource requirement.
+        infer_count = 8
+        infer_complete = [False for i in range(infer_count)]
+
+        def infer():
+            for i in range(infer_count):
+                self._infer()
+                infer_complete[i] = True
+
+        with concurrent.futures.ThreadPoolExecutor() as pool:
+            infer_thread = pool.submit(infer)
+            time.sleep(infer_count / 2)  # each infer should take < 0.5 seconds
+            self.assertNotIn(False, infer_complete, "Infer possibly stuck")
+            infer_thread.result()
+        # Unload model
+        self._unload_model()
+
+    # Test instance resource requirement increase above explicit resource
+    @unittest.skipUnless(
+        os.environ["RATE_LIMIT_MODE"] == "execution_count_with_explicit_resource",
+        "Rate limiter precondition not met for this test",
+    )
+    def test_instance_resource_increase_above_explicit(self):
+        # Load model
+        self._load_model(
+            1,
+            '{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: "R1"\ncount: 2\n}\n]\n}\n}',
+        )
+        # Increase resource requirement
+        with self.assertRaises(InferenceServerException):
+            self._update_instance_count(
+                0,
+                0,
+                '{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: "R1"\ncount: 32\n}\n]\n}\n}',
+            )
+        # Correct the resource requirement to match the explicit resource
+        self._update_instance_count(
+            1,
+            1,
+            '{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: "R1"\ncount: 10\n}\n]\n}\n}',
+        )
+        # Unload model
+        self._unload_model()
+
+    # Test instance resource requirement decrease
+    @unittest.skipUnless(
+        "execution_count" in os.environ["RATE_LIMIT_MODE"],
+        "Rate limiter precondition not met for this test",
+    )
+    def test_instance_resource_decrease(self):
+        # Load model
+        self._load_model(
+            1,
+            '{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: "R1"\ncount: 4\n}\n]\n}\n}',
+        )
+        # Decrease resource requirement
+        self._update_instance_count(
+            1,
+            1,
+            '{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: "R1"\ncount: 3\n}\n]\n}\n}',
+        )
+        # Unload model
+        self._unload_model()
+        # The resource count of 3 is unique across this entire test, so check
+        # the server output to make sure it is printed, which ensures the
+        # max resource is actually decreased.
+        time.sleep(1)  # make sure the log file is updated
+        log_path = os.path.join(
+            os.environ["MODEL_LOG_DIR"],
+            "instance_update_test.rate_limit_"
+            + os.environ["RATE_LIMIT_MODE"]
+            + ".server.log",
+        )
+        with open(log_path, mode="r", encoding="utf-8", errors="strict") as f:
+            if os.environ["RATE_LIMIT_MODE"] == "execution_count":
+                # Make sure the previous max resource limit of 4 is reduced to 3
+                # when no explicit limit is set.
+                self.assertIn("Resource: R1\t Count: 3", f.read())
+            else:
+                # Make sure the max resource limit is never set to 3 when
+                # explicit limit of 10 is set.
+                self.assertNotIn("Resource: R1\t Count: 3", f.read())
+
+    _direct_sequence_batching_str = (
+        "direct { }\nmax_sequence_idle_microseconds: 8000000"
+    )
+    _oldest_sequence_batching_str = (
+        "oldest { max_candidate_sequences: 4 }\nmax_sequence_idle_microseconds: 8000000"
+    )
+
+    # Test instance update for direct scheduler without any ongoing sequences
+    def test_direct_scheduler_update_no_ongoing_sequences(self):
+        self._test_scheduler_update_no_ongoing_sequences(
+            self._direct_sequence_batching_str
+        )
+
+    # Test instance update for direct scheduler with any ongoing sequences
+    def test_direct_scheduler_update_with_ongoing_sequences(self):
+        self._test_scheduler_update_with_ongoing_sequences(
+            self._direct_sequence_batching_str
+        )
+
+    # Test instance update for oldest scheduler without ongoing sequences
+    def test_oldest_scheduler_update_no_ongoing_sequences(self):
+        self._test_scheduler_update_no_ongoing_sequences(
+            self._oldest_sequence_batching_str
+        )
+
+    # Test instance update for oldest scheduler with ongoing sequences
+    def test_oldest_scheduler_update_with_ongoing_sequences(self):
+        self._test_scheduler_update_with_ongoing_sequences(
+            self._oldest_sequence_batching_str
+        )
+
+    # Helper function for testing the success of sequence instance updates
+    # without any ongoing sequences.
+    def _test_scheduler_update_no_ongoing_sequences(self, sequence_batching_str):
+        # Load model
+        update_instance_group("{\ncount: 2\nkind: KIND_CPU\n}")
+        update_sequence_batching(sequence_batching_str)
+        self._triton.load_model(self._model_name)
+        self._check_count("initialize", 2)
+        self._check_count("finalize", 0)
+        # Basic sequence inference
+        self._triton.infer(
+            self._model_name, self._get_inputs(), sequence_id=1, sequence_start=True
+        )
+        self._triton.infer(self._model_name, self._get_inputs(), sequence_id=1)
+        self._triton.infer(
+            self._model_name, self._get_inputs(), sequence_id=1, sequence_end=True
+        )
+        # Add 2 instances without in-flight sequence
+        update_instance_group("{\ncount: 4\nkind: KIND_CPU\n}")
+        self._triton.load_model(self._model_name)
+        self._check_count("initialize", 4)
+        self._check_count("finalize", 0)
+        # Basic sequence inference
+        self._triton.infer(
+            self._model_name, self._get_inputs(), sequence_id=1, sequence_start=True
+        )
+        self._triton.infer(
+            self._model_name, self._get_inputs(), sequence_id=1, sequence_end=True
+        )
+        # Remove 1 instance without in-flight sequence
+        update_instance_group("{\ncount: 3\nkind: KIND_CPU\n}")
+        self._triton.load_model(self._model_name)
+        self._check_count("initialize", 4)
+        self._check_count("finalize", 1, poll=True)
+        # Basic sequence inference
+        self._triton.infer(
+            self._model_name, self._get_inputs(), sequence_id=1, sequence_start=True
+        )
+        self._triton.infer(
+            self._model_name, self._get_inputs(), sequence_id=1, sequence_end=True
+        )
+        # Unload model
+        self._triton.unload_model(self._model_name)
+        self._check_count("initialize", 4)
+        self._check_count("finalize", 4, poll=True)
+
+    # Helper function for testing if ongoing sequences may continue to infer on
+    # the same instance after the instance processing the sequence is removed
+    # from an instance update, which the removed instance will live until the
+    # sequences end.
+    def _test_scheduler_update_with_ongoing_sequences(self, sequence_batching_str):
+        # Load model
+        update_instance_group("{\ncount: 3\nkind: KIND_CPU\n}")
+        update_sequence_batching(sequence_batching_str)
+        self._triton.load_model(self._model_name)
+        self._check_count("initialize", 3)
+        self._check_count("finalize", 0)
+        # Start sequence 1 and 2 on CPU instances
+        self._triton.infer(
+            self._model_name, self._get_inputs(), sequence_id=1, sequence_start=True
+        )
+        self._triton.infer(
+            self._model_name, self._get_inputs(), sequence_id=2, sequence_start=True
+        )
+        # Remove all 3 CPU and add 1 GPU instance with in-flight sequences. Both
+        # in-flight sequences are assigned to any 2 CPU instances, so exactly 1
+        # CPU instance can be removed immediately.
+        update_instance_group("{\ncount: 1\nkind: KIND_GPU\n}")
+        self._triton.load_model(self._model_name)
+        self._check_count("initialize", 4)  # 3 CPU + 1 GPU
+        self._check_count("finalize", 1, poll=True)  # 1 CPU
+        # Sequence 1 and 2 may continue to infer
+        self._triton.infer(self._model_name, self._get_inputs(), sequence_id=1)
+        self._triton.infer(self._model_name, self._get_inputs(), sequence_id=2)
+        self._check_count("finalize", 1)  # check 2 CPU instances not removed
+        # Start sequence 3 on GPU instance
+        self._triton.infer(
+            self._model_name, self._get_inputs(), sequence_id=3, sequence_start=True
+        )
+        self._check_count("finalize", 1)  # check 2 CPU instances not removed
+        # End sequence 1 and 2 will remove the 2 CPU instances
+        self._triton.infer(
+            self._model_name, self._get_inputs(), sequence_id=1, sequence_end=True
+        )
+        self._triton.infer(
+            self._model_name, self._get_inputs(), sequence_id=2, sequence_end=True
+        )
+        self._check_count("finalize", 3, poll=True)  # 3 CPU
+        # End sequence 3
+        self._triton.infer(
+            self._model_name, self._get_inputs(), sequence_id=3, sequence_end=True
+        )
+        # Unload model
+        self._triton.unload_model(self._model_name)
+        self._check_count("initialize", 4)  # 3 CPU + 1 GPU
+        self._check_count("finalize", 4, poll=True)  # 3 CPU + 1 GPU
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_model_update/test.sh b/qa/L0_model_update/test.sh
new file mode 100755
index 0000000000..aa9cf7fcc1
--- /dev/null
+++ b/qa/L0_model_update/test.sh
@@ -0,0 +1,111 @@
+#!/bin/bash
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+# This L0_model_update test should make changes to models without restarting the
+# server, unless restarting the server is the only way of accomplishing the
+# change.
+
+export CUDA_VISIBLE_DEVICES=0
+export PYTHONDONTWRITEBYTECODE="True"
+export MODEL_LOG_DIR="`pwd`"
+
+SERVER=/opt/tritonserver/bin/tritonserver
+source ../common/util.sh
+
+function setup_models() {
+    rm -rf models && mkdir models
+    # Basic model that log instance creation and destruction
+    cp -r ../python_models/model_init_del models/model_init_del && \
+        mkdir models/model_init_del/1 && \
+        mv models/model_init_del/model.py models/model_init_del/1
+}
+
+RET=0
+
+# Test model instance update with rate limiting on/off and explicit resource
+for RATE_LIMIT_MODE in "off" "execution_count" "execution_count_with_explicit_resource"; do
+
+    RATE_LIMIT_ARGS="--rate-limit=$RATE_LIMIT_MODE"
+    if [ "$RATE_LIMIT_MODE" == "execution_count_with_explicit_resource" ]; then
+        RATE_LIMIT_ARGS="--rate-limit=execution_count --rate-limit-resource=R1:10"
+    fi
+
+    export RATE_LIMIT_MODE=$RATE_LIMIT_MODE
+    TEST_LOG="instance_update_test.rate_limit_$RATE_LIMIT_MODE.log"
+    SERVER_LOG="./instance_update_test.rate_limit_$RATE_LIMIT_MODE.server.log"
+
+    setup_models
+    SERVER_ARGS="--model-repository=models --model-control-mode=explicit $RATE_LIMIT_ARGS --log-verbose=2"
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    set +e
+    python instance_update_test.py > $TEST_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed model instance update test on rate limit mode $RATE_LIMIT_MODE\n***"
+        cat $TEST_LOG
+        RET=1
+    fi
+    set -e
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+
+    set +e
+    grep "Should not print this" $SERVER_LOG
+    if [ $? -eq 0 ]; then
+        echo -e "\n***\n*** Found \"Should not print this\" on \"$SERVER_LOG\"\n***"
+        cat $SERVER_LOG
+        RET=1
+    fi
+    set -e
+
+done
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+exit $RET
diff --git a/qa/L0_multi_server/test.sh b/qa/L0_multi_server/test.sh
new file mode 100755
index 0000000000..cd5ff3d407
--- /dev/null
+++ b/qa/L0_multi_server/test.sh
@@ -0,0 +1,92 @@
+#!/bin/bash
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+MODELSDIR=`pwd`/models
+DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_model_repository
+
+export CUDA_VISIBLE_DEVICES=0
+
+# Must explicitly set LD_LIBRARY_PATH so that server can find
+# libtritonserver.so.
+LD_LIBRARY_PATH=/opt/tritonserver/lib:$LD_LIBRARY_PATH
+
+rm -f *.log && rm -rf ${MODELSDIR}*
+
+RET=0
+
+MULTI_SERVER=multi_server
+CLIENT_LOG=$MULTI_SERVER
+MULTI_SERVER=./$MULTI_SERVER
+BACKENDS=(graphdef onnx plan)
+THREAD_COUNT=32
+LOOPS=32
+
+EXTRA_ARGS=" -t ${THREAD_COUNT} -l ${LOOPS}"
+for (( I=1; I<${THREAD_COUNT}+2; I++ )); do
+    BACKEND_INDEX=$(((I % 3) - 1))
+    full=${BACKENDS[$BACKEND_INDEX]}_float32_float32_float32
+    mkdir -p ${MODELSDIR}${I}/simple${I}/1 && \
+        cp -r $DATADIR/${full}/1/* ${MODELSDIR}${I}/simple${I}/1/. && \
+        cp $DATADIR/${full}/config.pbtxt ${MODELSDIR}${I}/simple${I}/. && \
+        (cd ${MODELSDIR}${I}/simple${I} && \
+                sed -i "s/^name:.*/name: \"simple${I}\"/" config.pbtxt && \
+                sed -i "s/label_filename:.*//" config.pbtxt)
+    EXTRA_ARGS="${EXTRA_ARGS} -r ${MODELSDIR}${I}"
+done
+
+set +e
+
+# No memory type enforcement
+$MULTI_SERVER ${EXTRA_ARGS} >>$CLIENT_LOG.log 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG.log
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+set -e
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_nan_inf/models/nan_inf_output/1/model.py b/qa/L0_nan_inf/models/nan_inf_output/1/model.py
new file mode 100644
index 0000000000..17cfb04fa0
--- /dev/null
+++ b/qa/L0_nan_inf/models/nan_inf_output/1/model.py
@@ -0,0 +1,47 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    def initialize(self, args):
+        self.model_config = json.loads(args["model_config"])
+
+    def execute(self, requests):
+        """This function is called on inference request."""
+
+        responses = []
+        for _ in requests:
+            # Include one of each specially parsed JSON value: nan, inf, and -inf
+            out_0 = np.array([np.nan, np.inf, np.NINF, 1, 2, 3], dtype=np.float32)
+            out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0)
+            responses.append(pb_utils.InferenceResponse([out_tensor_0]))
+
+        return responses
diff --git a/qa/L0_nan_inf/models/nan_inf_output/config.pbtxt b/qa/L0_nan_inf/models/nan_inf_output/config.pbtxt
new file mode 100644
index 0000000000..75071bbad0
--- /dev/null
+++ b/qa/L0_nan_inf/models/nan_inf_output/config.pbtxt
@@ -0,0 +1,46 @@
+# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "nan_inf_output"
+backend: "python"
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 6 ]
+  }
+]
+
+instance_group [{ kind: KIND_CPU }]
diff --git a/qa/L0_nan_inf/nan_inf_test.py b/qa/L0_nan_inf/nan_inf_test.py
new file mode 100755
index 0000000000..3013b03850
--- /dev/null
+++ b/qa/L0_nan_inf/nan_inf_test.py
@@ -0,0 +1,101 @@
+#!/usr/bin/env python
+# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import json
+import traceback
+import unittest
+
+import numpy as np
+import requests
+import test_util as tu
+import tritonclient.grpc as tritongrpcclient
+import tritonclient.http as tritonhttpclient
+from tritonclient.utils import InferenceServerException
+
+
+class NanInfTest(tu.TestResultCollector):
+    expected_output = np.array([np.nan, np.inf, np.NINF, 1, 2, 3], dtype=np.float32)
+    model_name = "nan_inf_output"
+
+    def test_http_raw(self):
+        payload = {
+            "inputs": [
+                {"name": "INPUT0", "datatype": "FP32", "shape": [1], "data": [1]}
+            ]
+        }
+        response = requests.post(
+            "http://localhost:8000/v2/models/nan_inf_output/infer",
+            data=json.dumps(payload),
+        )
+        if not response.ok:
+            self.assertTrue(False, "Response not OK: {}".format(response.text))
+
+        try:
+            print(response.json())
+        except:
+            self.assertTrue(
+                False, "Response was not valid JSON:\n{}".format(response.text)
+            )
+
+    def test_http(self):
+        triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
+        inputs = []
+        inputs.append(tritonhttpclient.InferInput("INPUT0", [1], "FP32"))
+        self.infer_helper(triton_client, inputs)
+
+    def test_grpc(self):
+        triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
+        inputs = []
+        inputs.append(tritongrpcclient.InferInput("INPUT0", [1], "FP32"))
+        self.infer_helper(triton_client, inputs)
+
+    def infer_helper(self, triton_client, inputs):
+        inputs[0].set_data_from_numpy(np.arange(1, dtype=np.float32))
+
+        try:
+            results = triton_client.infer(model_name=self.model_name, inputs=inputs)
+            output0_data = results.as_numpy("OUTPUT0")
+            # Verify output is as expected
+            # Make sure nan's are equivalent when compared
+            output_correct = np.array_equal(
+                output0_data, self.expected_output, equal_nan=True
+            )
+            self.assertTrue(
+                output_correct, "didn't get expected output0: {}".format(output0_data)
+            )
+        except InferenceServerException as ex:
+            self.assertTrue(False, ex.message())
+        except:
+            self.assertTrue(False, traceback.format_exc())
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_nan_inf/test.sh b/qa/L0_nan_inf/test.sh
new file mode 100755
index 0000000000..0e778966a4
--- /dev/null
+++ b/qa/L0_nan_inf/test.sh
@@ -0,0 +1,89 @@
+#!/bin/bash
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+source ../common/util.sh
+
+RET=0
+
+CLIENT_LOG="./nan_inf_client.log"
+TEST_PY=./nan_inf_test.py
+EXPECTED_NUM_TESTS="3"
+TEST_RESULT_FILE='test_results.txt'
+
+export CUDA_VISIBLE_DEVICES=0
+
+rm -fr *.log
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=`pwd`/models"
+SERVER_LOG="./inference_server.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python3 $TEST_PY >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $CLIENT_LOG
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_nullchar_string/nullchar_string_client.py b/qa/L0_nullchar_string/nullchar_string_client.py
new file mode 100755
index 0000000000..2d69b41b3d
--- /dev/null
+++ b/qa/L0_nullchar_string/nullchar_string_client.py
@@ -0,0 +1,106 @@
+#!/usr/bin/env python
+# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+
+import numpy as np
+import tritongrpcclient as grpcclient
+import tritonhttpclient as httpclient
+from tritonclientutils import np_to_triton_dtype
+
+FLAGS = None
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        required=False,
+        default=False,
+        help="Enable verbose output",
+    )
+    parser.add_argument(
+        "-m", "--model-name", type=str, required=True, help="Name of model"
+    )
+    parser.add_argument(
+        "-u",
+        "--url",
+        type=str,
+        required=False,
+        default="localhost:8000",
+        help="Inference server URL. Default is localhost:8000.",
+    )
+    parser.add_argument(
+        "-i",
+        "--protocol",
+        type=str,
+        required=False,
+        default="http",
+        help='Protocol ("http"/"grpc") used to '
+        + 'communicate with inference service. Default is "http".',
+    )
+
+    FLAGS = parser.parse_args()
+
+    if (FLAGS.protocol != "http") and (FLAGS.protocol != "grpc"):
+        print(
+            'unexpected protocol "{}", expects "http" or "grpc"'.format(FLAGS.protocol)
+        )
+        exit(1)
+
+    client_util = httpclient if FLAGS.protocol == "http" else grpcclient
+    # Create the inference context for the model.
+    client = client_util.InferenceServerClient(FLAGS.url, verbose=FLAGS.verbose)
+
+    # We use identity string models that takes 1 input tensor of a single string
+    # and returns 1 output tensor of a single string. The output tensor is the
+    # same as the input tensor.
+    batch_size = 1
+
+    # Create the data for the input tensor. It contains a null character in
+    # the middle of the string.
+    tmp_str = "abc\0def"
+    input0_data = np.array([tmp_str], dtype=object)
+
+    # Send inference request to the inference server. Get results for
+    # output tensor.
+    inputs = [
+        client_util.InferInput(
+            "INPUT0", input0_data.shape, np_to_triton_dtype(np.object_)
+        )
+    ]
+    inputs[0].set_data_from_numpy(input0_data)
+
+    results = client.infer(FLAGS.model_name, inputs)
+
+    # We expect there to be 1 result (with batch-size 1). Compare the input
+    # and output tensor calculated by the model. They must be the same.
+    output0_data = results.as_numpy("OUTPUT0")
+
+    print(input0_data, "?=?", output0_data)
+    assert np.equal(input0_data.astype(np.bytes_), output0_data).all()
diff --git a/qa/L0_nullchar_string/test.sh b/qa/L0_nullchar_string/test.sh
new file mode 100755
index 0000000000..bded41dc92
--- /dev/null
+++ b/qa/L0_nullchar_string/test.sh
@@ -0,0 +1,98 @@
+#!/bin/bash
+# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+CLIENT_LOG="./client.log"
+DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository
+MODELS="graphdef_nobatch_zero_1_object savedmodel_nobatch_zero_1_object"
+NULLCHAR_CLIENT_PY=nullchar_string_client.py
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=models"
+SERVER_LOG="./inference_server.log"
+source ../common/util.sh
+
+rm -f $CLIENT_LOG $SERVER_LOG models
+
+mkdir -p models
+for MODEL in $MODELS; do
+    cp -r $DATADIR/$MODEL models/.
+done
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+RET=0
+
+set +e
+
+# Ignore ONNX backend because even though ONNX supports string data type,
+# strings that contain null character in the middle is not allowed.
+# https://github.com/microsoft/onnxruntime/issues/2284
+for MODEL in $MODELS; do
+  python $NULLCHAR_CLIENT_PY -m $MODEL -v >>$CLIENT_LOG 2>&1
+  if [ $? -ne 0 ]; then
+      RET=1
+  fi
+
+  python $NULLCHAR_CLIENT_PY -m $MODEL -i grpc -u localhost:8001 -v >>$CLIENT_LOG 2>&1
+  if [ $? -ne 0 ]; then
+      RET=1
+  fi
+done
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_onnx_optimization/test.sh b/qa/L0_onnx_optimization/test.sh
new file mode 100755
index 0000000000..1ffdd4f65f
--- /dev/null
+++ b/qa/L0_onnx_optimization/test.sh
@@ -0,0 +1,233 @@
+#!/bin/bash
+# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+DATADIR=/data/inferenceserver/${REPO_VERSION}
+
+CLIENT_LOG="./client.log"
+ONNXTRT_OPTIMIZATION_TEST=onnxtrt_optimization_test.py
+
+SERVER=/opt/tritonserver/bin/tritonserver
+CACHE_PATH=`pwd`/trt_cache
+SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1 --exit-on-error=false"
+SERVER_LOG="./inference_server.log"
+source ../common/util.sh
+
+RET=0
+
+for MODEL in \
+        onnx_float32_float32_float32; do
+    rm -f ./*.log
+    rm -fr models && mkdir -p models
+    cp -r $DATADIR/qa_model_repository/${MODEL} \
+       models/${MODEL}_test && \
+    rm -fr models/${MODEL}_test/2 && \
+    rm -fr models/${MODEL}_test/3 && \
+    # Set instance count > 1 to test parallel instance loading across all EPs
+    INSTANCE_COUNT=5
+    (cd models/${MODEL}_test && \
+            sed -i 's/_float32_float32_float32/&_test/' config.pbtxt && \
+            echo -e "\ninstance_group { count: ${INSTANCE_COUNT} }" >> config.pbtxt) && \
+    # CUDA EP optimization params
+    cp -r models/${MODEL}_test models/${MODEL}_cuda_config && \
+    (cd models/${MODEL}_cuda_config && \
+            sed -i 's/_float32_test/_float32_cuda_config/' \
+                config.pbtxt && \
+            echo "parameters: { key: \"cudnn_conv_algo_search\" value: { string_value: \"1\" }} \
+            parameters: { key: \"arena_extend_strategy\" value: { string_value: \"1\" }}
+            parameters: { key: \"gpu_mem_limit\" value: { string_value: \"18446744073709551614\" }} " \ >> config.pbtxt) && \
+    # CPU EP optimization params
+    cp -r models/${MODEL}_test models/${MODEL}_cpu_config && \
+    (cd models/${MODEL}_cpu_config && \
+            sed -i 's/_float32_test/_float32_cpu_config/' \
+                config.pbtxt && \
+            echo "parameters: { key: \"intra_op_thread_count\" value: { string_value: \"1\" }} \
+            parameters: { key: \"enable_mem_arena\" value: { string_value: \"1\" }}
+            parameters: { key: \"enable_mem_pattern\" value: { string_value: \"1\" }}
+            parameters: { key: \"memory.enable_memory_arena_shrinkage\" value: { string_value: \"cpu:0\" }} " \ >> config.pbtxt) && \
+    # GPU execution accelerators with default setting
+    cp -r models/${MODEL}_test models/${MODEL}_trt && \
+    (cd models/${MODEL}_trt && \
+            sed -i 's/_float32_test/_float32_trt/' \
+                config.pbtxt && \
+            echo "optimization { execution_accelerators { gpu_execution_accelerator : [ { name : \"tensorrt\"} ] } }" >> config.pbtxt) && \
+    # GPU execution accelerators with correct parameters
+    cp -r models/${MODEL}_test models/${MODEL}_param && \
+    (cd models/${MODEL}_param && \
+            sed -i 's/_float32_test/_float32_param/' \
+                config.pbtxt && \
+            echo "optimization { execution_accelerators { gpu_execution_accelerator : [ { name : \"tensorrt\" \
+            parameters { key: \"precision_mode\" value: \"FP16\" } \
+            parameters { key: \"trt_max_partition_iterations\" value: \"1000\" } \
+            parameters { key: \"trt_dump_subgraphs\" value: \"1\" } \
+            parameters { key: \"trt_timing_cache_enable\" value: \"1\" } \
+            parameters { key: \"trt_build_heuristics_enable\" value: \"1\" } \
+            parameters { key: \"trt_cuda_graph_enable\" value: \"1\" } \
+            parameters { key: \"max_workspace_size_bytes\" value: \"1073741824\" } }]}}" \
+            >> config.pbtxt) && \
+    # GPU execution accelerators with cache enabled
+    cp -r models/${MODEL}_test models/${MODEL}_cache_on && \
+    (cd models/${MODEL}_cache_on && \
+            sed -i 's/_float32_test/_float32_cache_on/' \
+                config.pbtxt && \
+            echo "optimization { execution_accelerators { gpu_execution_accelerator : [ { name : \"tensorrt\" \
+            parameters { key: \"trt_engine_cache_enable\" value: \"1\" } \
+            parameters { key: \"trt_max_partition_iterations\" value: \"1000\" } \
+            parameters { key: \"trt_dump_subgraphs\" value: \"1\" } \
+            parameters { key: \"trt_timing_cache_enable\" value: \"1\" } \
+            parameters { key: \"trt_build_heuristics_enable\" value: \"1\" } \
+            parameters { key: \"trt_cuda_graph_enable\" value: \"1\" } \
+            parameters { key: \"trt_engine_cache_path\" value: \"${CACHE_PATH}\" } }]}}" \
+            >> config.pbtxt) && \
+    # GPU execution accelerators with unknown parameters
+    cp -r models/${MODEL}_test models/${MODEL}_unknown_param && \
+    (cd models/${MODEL}_unknown_param && \
+            sed -i 's/_float32_test/_float32_unknown_param/' \
+                config.pbtxt && \
+            echo "optimization { execution_accelerators { gpu_execution_accelerator : [ { name : \"tensorrt\" \
+            parameters { key: \"precision_mode\" value: \"FP16\" } \
+            parameters { key: \"segment_size\" value: \"1\" } }]}}" \
+            >> config.pbtxt) && \
+    # GPU execution accelerators with invalid parameters
+    cp -r models/${MODEL}_test models/${MODEL}_invalid_param && \
+    (cd models/${MODEL}_invalid_param && \
+            sed -i 's/_float32_test/_float32_invalid_param/' \
+                config.pbtxt && \
+            echo "optimization { execution_accelerators { gpu_execution_accelerator : [ { name : \"tensorrt\" \
+            parameters { key: \"precision_mode\" value: \"FP16\" } \
+            parameters { key: \"max_workspace_size_bytes\" value: \"abc\" } }]}}" \
+            >> config.pbtxt) && \
+    # Unknown GPU execution accelerator
+    cp -r models/${MODEL}_test models/${MODEL}_unknown_gpu && \
+    (cd models/${MODEL}_unknown_gpu && \
+            sed -i 's/_float32_test/_float32_unknown_gpu/' \
+                config.pbtxt && \
+            echo "optimization { execution_accelerators { gpu_execution_accelerator : [ { name : \"unknown_gpu\" } ] } }" >> config.pbtxt) && \
+
+    run_server_tolive
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    set +e
+
+    grep "TensorRT Execution Accelerator is set for '${MODEL}_trt'" $SERVER_LOG
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected TensorRT Execution Accelerator is set for '${MODEL}_trt'\n***"
+        RET=1
+    fi
+
+    grep "TensorRT Execution Accelerator is set for '${MODEL}_param'" $SERVER_LOG
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected TensorRT Execution Accelerator is set for '${MODEL}_param'\n***"
+        RET=1
+    fi
+
+    grep "TensorRT Execution Accelerator is set for '${MODEL}_cache_on'" $SERVER_LOG
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected TensorRT Execution Accelerator is set for '${MODEL}_cache_on'\n***"
+        RET=1
+    fi
+
+    grep "failed to load '${MODEL}_unknown_param' version 1: Invalid argument: unknown parameter 'segment_size' is provided for TensorRT Execution Accelerator" $SERVER_LOG
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected unknown parameter 'segment_size' returns error\n***"
+        RET=1
+    fi
+
+    grep "failed to load '${MODEL}_invalid_param' version 1: Invalid argument: failed to convert 'abc' to unsigned long long integral number" $SERVER_LOG
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected invalid parameter 'abc' returns error\n***"
+        RET=1
+    fi
+
+    grep "failed to load '${MODEL}_unknown_gpu' version 1: Invalid argument: unknown Execution Accelerator 'unknown_gpu' is requested" $SERVER_LOG
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected 'unknown_gpu' Execution Accelerator returns error\n***"
+        RET=1
+    fi
+
+    grep "memory limit: 18446744073709551614 arena_extend_strategy: 1" $SERVER_LOG
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected configurations not set for '${MODEL}_cuda_config'\n***"
+        RET=1
+    fi
+
+    grep "CUDA Execution Accelerator is set for '${MODEL}_cpu_config'" $SERVER_LOG
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected CUDA Execution Accelerator is set for '${MODEL}_cpu_config'\n***"
+        RET=1
+    fi
+
+    # arena configs
+    grep "Configuring enable_mem_arena to 1" $SERVER_LOG
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected Configuring enable_mem_arena to 1\n***"
+        RET=1
+    fi
+
+    grep "Configuring enable_mem_pattern to 1" $SERVER_LOG
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected Configuring enable_mem_pattern to 1\n***"
+        RET=1
+    fi
+
+    grep "Configuring memory.enable_memory_arena_shrinkage to cpu:0" $SERVER_LOG
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected Configuring memory.enable_memory_arena_shrinkage to cpu:0\n***"
+        RET=1
+    fi
+
+    set -e
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+done
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_optional_input/models/ensemble_identity_2_float32/config.pbtxt b/qa/L0_optional_input/models/ensemble_identity_2_float32/config.pbtxt
new file mode 100644
index 0000000000..e0dfcd2b48
--- /dev/null
+++ b/qa/L0_optional_input/models/ensemble_identity_2_float32/config.pbtxt
@@ -0,0 +1,79 @@
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "ensemble_identity_2_float32"
+platform: "ensemble"
+max_batch_size: 4
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+    optional: true
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+    optional: true
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+ensemble_scheduling {
+  step [
+    {
+      model_name: "identity_2_float32"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "OUTPUT0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "OUTPUT1"
+      }
+    }
+  ]
+}
diff --git a/qa/L0_optional_input/models/identity_2_float32/config.pbtxt b/qa/L0_optional_input/models/identity_2_float32/config.pbtxt
new file mode 100644
index 0000000000..37d15089e1
--- /dev/null
+++ b/qa/L0_optional_input/models/identity_2_float32/config.pbtxt
@@ -0,0 +1,56 @@
+# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "identity_2_float32"
+backend: "identity"
+max_batch_size: 4
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+    optional: true
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+    optional: true
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+dynamic_batching { preferred_batch_size: [4], max_queue_delay_microseconds: 5000000 }
diff --git a/qa/L0_optional_input/models/optional_connecting_tensor/config.pbtxt b/qa/L0_optional_input/models/optional_connecting_tensor/config.pbtxt
new file mode 100644
index 0000000000..afc4ebc00f
--- /dev/null
+++ b/qa/L0_optional_input/models/optional_connecting_tensor/config.pbtxt
@@ -0,0 +1,98 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+platform: "ensemble"
+max_batch_size: 4
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+    optional: true
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+    optional: true
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+ensemble_scheduling {
+  step [
+    {
+      model_name: "optional_identity"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "internal_output0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "internal_output1"
+      }
+    },
+    {
+      model_name: "optional_identity"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "internal_output0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "internal_output1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "OUTPUT0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "OUTPUT1"
+      }
+    }
+  ]
+}
diff --git a/qa/L0_optional_input/models/optional_identity/1/model.py b/qa/L0_optional_input/models/optional_identity/1/model.py
new file mode 100644
index 0000000000..c736ecc3bd
--- /dev/null
+++ b/qa/L0_optional_input/models/optional_identity/1/model.py
@@ -0,0 +1,46 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    def execute(self, requests):
+        """
+        Identity model in Python backend.
+        """
+        responses = []
+        for request in requests:
+            for tidx in ("0", "1"):
+                input_tensor = pb_utils.get_input_tensor_by_name(
+                    request, "INPUT" + tidx
+                )
+                if input_tensor is not None:
+                    out_tensor = pb_utils.Tensor(
+                        "OUTPUT" + tidx, input_tensor.as_numpy()
+                    )
+                    responses.append(pb_utils.InferenceResponse([out_tensor]))
+        return responses
diff --git a/qa/L0_optional_input/models/optional_identity/config.pbtxt b/qa/L0_optional_input/models/optional_identity/config.pbtxt
new file mode 100644
index 0000000000..0c73fd7ca5
--- /dev/null
+++ b/qa/L0_optional_input/models/optional_identity/config.pbtxt
@@ -0,0 +1,53 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+backend: "python"
+max_batch_size: 4
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+    optional: true
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+    optional: true
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_optional_input/models/pipeline_identity_2_float32/config.pbtxt b/qa/L0_optional_input/models/pipeline_identity_2_float32/config.pbtxt
new file mode 100644
index 0000000000..58e867482d
--- /dev/null
+++ b/qa/L0_optional_input/models/pipeline_identity_2_float32/config.pbtxt
@@ -0,0 +1,91 @@
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "pipeline_identity_2_float32"
+platform: "ensemble"
+max_batch_size: 4
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+    optional: true
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+    optional: true
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+ensemble_scheduling {
+  step [
+    {
+      model_name: "identity_2_float32"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "internal_output"
+      }
+    },
+    {
+      model_name: "identity_2_float32"
+      model_version: -1
+      input_map {
+        key: "INPUT1"
+        value: "internal_output"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "OUTPUT0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "OUTPUT1"
+      }
+    }
+  ]
+}
diff --git a/qa/L0_optional_input/optional_input_test.py b/qa/L0_optional_input/optional_input_test.py
new file mode 100755
index 0000000000..c1fd114d6b
--- /dev/null
+++ b/qa/L0_optional_input/optional_input_test.py
@@ -0,0 +1,445 @@
+#!/usr/bin/python
+
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import sys
+import threading
+import time
+import unittest
+
+import numpy as np
+import test_util as tu
+import tritonclient.grpc as grpcclient
+
+_deferred_exceptions_lock = threading.Lock()
+_deferred_exceptions = []
+
+
+# Similar set up as dynamic batcher tests
+class OptionalInputTest(tu.TestResultCollector):
+    def setUp(self):
+        global _deferred_exceptions
+        _deferred_exceptions = []
+
+        # The helper client for setup will be GRPC for simplicity.
+        self.triton_client_ = grpcclient.InferenceServerClient("localhost:8001")
+        self.model_name_ = "identity_2_float32"
+        # This will not be changed even when ensemble is under test,
+        # as the dynamic batching is performed within the composing model
+        self.check_status_model = "identity_2_float32"
+        self.tensor_shape_ = (1, 1)
+        self.inputs_ = {
+            "INPUT0": grpcclient.InferInput("INPUT0", [1, 1], "FP32"),
+            "INPUT1": grpcclient.InferInput("INPUT1", [1, 1], "FP32"),
+        }
+        self.input_data_ = {
+            "INPUT0": np.ones(shape=(1, 1), dtype=np.float32),
+            "INPUT1": np.zeros(shape=(1, 1), dtype=np.float32),
+        }
+        self.inputs_["INPUT0"].set_data_from_numpy(self.input_data_["INPUT0"])
+        self.inputs_["INPUT1"].set_data_from_numpy(self.input_data_["INPUT1"])
+        self.outputs_ = {
+            "INPUT0": grpcclient.InferRequestedOutput("OUTPUT0"),
+            "INPUT1": grpcclient.InferRequestedOutput("OUTPUT1"),
+        }
+
+    def add_deferred_exception(self, ex):
+        global _deferred_exceptions
+        with _deferred_exceptions_lock:
+            _deferred_exceptions.append(ex)
+
+    def check_deferred_exception(self):
+        # Just raise one of the exceptions...
+        with _deferred_exceptions_lock:
+            if len(_deferred_exceptions) > 0:
+                raise _deferred_exceptions[0]
+
+    def check_response(self, thresholds, provided_inputs=("INPUT0", "INPUT1")):
+        try:
+            start_ms = int(round(time.time() * 1000))
+
+            inputs = []
+            outputs = []
+            for provided_input in provided_inputs:
+                inputs.append(self.inputs_[provided_input])
+                outputs.append(self.outputs_[provided_input])
+
+            triton_client = grpcclient.InferenceServerClient("localhost:8001")
+            results = triton_client.infer(
+                model_name=self.model_name_, inputs=inputs, outputs=outputs
+            )
+
+            end_ms = int(round(time.time() * 1000))
+
+            for provided_input in provided_inputs:
+                output_name = self.outputs_[provided_input].name()
+                expected = self.input_data_[provided_input]
+                output_data = results.as_numpy(output_name)
+                self.assertTrue(
+                    np.array_equal(output_data, expected),
+                    "{}, {}, expected: {}, got {}".format(
+                        self.model_name_, output_name, expected, output_data
+                    ),
+                )
+
+            gt_ms = thresholds[0]
+            lt_ms = thresholds[1]
+            if lt_ms is not None:
+                self.assertTrue(
+                    (end_ms - start_ms) < lt_ms,
+                    "expected less than "
+                    + str(lt_ms)
+                    + "ms response time, got "
+                    + str(end_ms - start_ms)
+                    + " ms",
+                )
+            if gt_ms is not None:
+                self.assertTrue(
+                    (end_ms - start_ms) > gt_ms,
+                    "expected greater than "
+                    + str(gt_ms)
+                    + "ms response time, got "
+                    + str(end_ms - start_ms)
+                    + " ms",
+                )
+        except Exception as ex:
+            self.add_deferred_exception(ex)
+
+    def check_status(self, model_name, batch_exec, request_cnt, infer_cnt):
+        # There is a time window between when responses are returned and statistics are updated.
+        # To prevent intermittent test failure during that window, wait up to 10 seconds for the
+        # inference statistics to be ready.
+        num_tries = 10
+        for i in range(num_tries):
+            stats = self.triton_client_.get_inference_statistics(model_name, "1")
+            self.assertEqual(len(stats.model_stats), 1, "expect 1 model stats")
+            actual_exec_cnt = stats.model_stats[0].execution_count
+            if stats.model_stats[0].execution_count > 0:
+                break
+            time.sleep(1)
+
+        self.assertEqual(
+            stats.model_stats[0].name,
+            model_name,
+            "expect model stats for model {}".format(model_name),
+        )
+        self.assertEqual(
+            stats.model_stats[0].version,
+            "1",
+            "expect model stats for model {} version 1".format(model_name),
+        )
+
+        batch_stats = stats.model_stats[0].batch_stats
+        self.assertEqual(
+            len(batch_stats),
+            len(batch_exec),
+            "expected {} different batch-sizes, got {}".format(
+                len(batch_exec), len(batch_stats)
+            ),
+        )
+
+        for batch_stat in batch_stats:
+            bs = batch_stat.batch_size
+            bc = batch_stat.compute_infer.count
+            self.assertTrue(bs in batch_exec, "unexpected batch-size {}".format(bs))
+            # Get count from one of the stats
+            self.assertEqual(
+                bc,
+                batch_exec[bs],
+                "expected model-execution-count {} for batch size {}, got {}".format(
+                    batch_exec[bs], bs, bc
+                ),
+            )
+
+        actual_request_cnt = stats.model_stats[0].inference_stats.success.count
+        self.assertEqual(
+            actual_request_cnt,
+            request_cnt,
+            "expected model-request-count {}, got {}".format(
+                request_cnt, actual_request_cnt
+            ),
+        )
+
+        actual_exec_cnt = stats.model_stats[0].execution_count
+        self.assertEqual(
+            actual_request_cnt,
+            request_cnt,
+            "expected model-exec-count {}, got {}".format(request_cnt, actual_exec_cnt),
+        )
+
+        actual_infer_cnt = stats.model_stats[0].inference_count
+        self.assertEqual(
+            actual_infer_cnt,
+            infer_cnt,
+            "expected model-inference-count {}, got {}".format(
+                infer_cnt, actual_infer_cnt
+            ),
+        )
+
+    def test_all_inputs(self):
+        # Provide all inputs, send requests that don't form preferred batch
+        # so all requests should be returned after the queue delay
+        try:
+            threads = []
+            threads.append(
+                threading.Thread(target=self.check_response, args=((4000, None),))
+            )
+            threads.append(
+                threading.Thread(target=self.check_response, args=((4000, None),))
+            )
+            threads[0].start()
+            threads[1].start()
+            for t in threads:
+                t.join()
+            self.check_deferred_exception()
+            self.check_status(self.check_status_model, {2: 1}, 2, 2)
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_optional_same_input(self):
+        # Provide only one of the inputs, send requests that don't form
+        # preferred batch so all requests should be returned after
+        # the queue delay
+        try:
+            threads = []
+            threads.append(
+                threading.Thread(
+                    target=self.check_response,
+                    args=((4000, None),),
+                    kwargs={"provided_inputs": ("INPUT1",)},
+                )
+            )
+            threads.append(
+                threading.Thread(
+                    target=self.check_response,
+                    args=((4000, None),),
+                    kwargs={"provided_inputs": ("INPUT1",)},
+                )
+            )
+            threads[0].start()
+            threads[1].start()
+            for t in threads:
+                t.join()
+            self.check_deferred_exception()
+            self.check_status(self.check_status_model, {2: 1}, 2, 2)
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_optional_mix_inputs(self):
+        # Each request provides one of the inputs interleavingly,
+        # all requests except the last one should be returned in less
+        # than the queue delay because batcher should send the batch immediately
+        # when it sees the provided inputs are different
+        try:
+            threads = []
+            threads.append(
+                threading.Thread(
+                    target=self.check_response,
+                    args=((0, 4000),),
+                    kwargs={"provided_inputs": ("INPUT0",)},
+                )
+            )
+            threads.append(
+                threading.Thread(
+                    target=self.check_response,
+                    args=((0, 4000),),
+                    kwargs={"provided_inputs": ("INPUT1",)},
+                )
+            )
+
+            threads.append(
+                threading.Thread(
+                    target=self.check_response,
+                    args=((0, 4000),),
+                    kwargs={"provided_inputs": ("INPUT0",)},
+                )
+            )
+            threads.append(
+                threading.Thread(
+                    target=self.check_response,
+                    args=((4000, None),),
+                    kwargs={"provided_inputs": ("INPUT1",)},
+                )
+            )
+            for t in threads:
+                t.start()
+                time.sleep(0.5)
+
+            for t in threads:
+                t.join()
+            self.check_deferred_exception()
+            self.check_status(self.check_status_model, {1: 4}, 4, 4)
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_optional_mix_inputs_2(self):
+        # Each request provides one of the inputs or all inputs interleavingly,
+        # all requests except the last one should be returned in less
+        # than the queue delay because batcher should send the batch immediately
+        # when it sees the provided inputs are different
+        try:
+            threads = []
+            threads.append(
+                threading.Thread(
+                    target=self.check_response,
+                    args=((0, 4000),),
+                    kwargs={"provided_inputs": ("INPUT0",)},
+                )
+            )
+            threads.append(
+                threading.Thread(target=self.check_response, args=((0, 4000),))
+            )
+
+            threads.append(
+                threading.Thread(
+                    target=self.check_response,
+                    args=((0, 4000),),
+                    kwargs={"provided_inputs": ("INPUT0",)},
+                )
+            )
+            threads.append(
+                threading.Thread(target=self.check_response, args=((4000, None),))
+            )
+            for t in threads:
+                t.start()
+                time.sleep(0.5)
+
+            for t in threads:
+                t.join()
+            self.check_deferred_exception()
+            self.check_status(self.check_status_model, {1: 4}, 4, 4)
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_ensemble_all_inputs(self):
+        # The ensemble is only a wrapper over 'identity_2_float32'
+        self.model_name_ = "ensemble_identity_2_float32"
+        self.test_all_inputs()
+        # From the ensemble's perspective, the requests are processed as it is
+        self.check_status(self.model_name_, {1: 2}, 2, 2)
+
+    def test_ensemble_optional_same_input(self):
+        # The ensemble is only a wrapper over 'identity_2_float32'
+        self.model_name_ = "ensemble_identity_2_float32"
+        self.test_optional_same_input()
+        # From the ensemble's perspective, the requests are processed as it is
+        self.check_status(self.model_name_, {1: 2}, 2, 2)
+
+    def test_ensemble_optional_mix_inputs(self):
+        # The ensemble is only a wrapper over 'identity_2_float32'
+        self.model_name_ = "ensemble_identity_2_float32"
+        self.test_optional_mix_inputs()
+        # From the ensemble's perspective, the requests are processed as it is
+        self.check_status(self.model_name_, {1: 4}, 4, 4)
+
+    def test_ensemble_optional_mix_inputs_2(self):
+        # The ensemble is only a wrapper over 'identity_2_float32'
+        self.model_name_ = "ensemble_identity_2_float32"
+        self.test_optional_mix_inputs_2()
+        # From the ensemble's perspective, the requests are processed as it is
+        self.check_status(self.model_name_, {1: 4}, 4, 4)
+
+    def test_ensemble_optional_pipeline(self):
+        # The ensemble is a special case of pipelining models with optional
+        # inputs, where the ensemble step only connects a subset of inputs
+        # for the second model (which is valid because the disconnected inputs
+        # are marked optional). See 'config.pbtxt' for detail.
+        self.model_name_ = "pipeline_identity_2_float32"
+
+        # Provide all inputs, send requests that don't form preferred batch
+        # so all requests should be returned after the queue delay
+        try:
+            provided_inputs = ("INPUT0", "INPUT1")
+            inputs = []
+            for provided_input in provided_inputs:
+                inputs.append(self.inputs_[provided_input])
+
+            triton_client = grpcclient.InferenceServerClient("localhost:8001")
+            results = triton_client.infer(model_name=self.model_name_, inputs=inputs)
+
+            # OUTPU0 is always zero, OUTPUT1 = INPUT0
+            output_data = results.as_numpy("OUTPUT0")
+            expected = np.zeros(shape=(1, 1), dtype=np.float32)
+            self.assertTrue(
+                np.array_equal(output_data, expected),
+                "{}, {}, expected: {}, got {}".format(
+                    self.model_name_, "OUTPUT0", expected, output_data
+                ),
+            )
+
+            expected = self.input_data_["INPUT0"]
+            output_data = results.as_numpy("OUTPUT1")
+            self.assertTrue(
+                np.array_equal(output_data, expected),
+                "{}, {}, expected: {}, got {}".format(
+                    self.model_name_, "OUTPUT1", expected, output_data
+                ),
+            )
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_ensemble_optional_connecting_tensor(self):
+        # The ensemble is a special case of pipelining models with optional
+        # inputs, where the request will only produce a subset of inputs
+        # for the second model while the ensemble graph connects all inputs of
+        # the second model (which is valid because the not-provided inputs
+        # are marked optional). See 'config.pbtxt' for detail.
+        self.model_name_ = "optional_connecting_tensor"
+
+        # Provide all inputs, send requests that don't form preferred batch
+        # so all requests should be returned after the queue delay
+        try:
+            provided_inputs = ("INPUT0",)
+            inputs = []
+            outputs = []
+            for provided_input in provided_inputs:
+                inputs.append(self.inputs_[provided_input])
+                outputs.append(self.outputs_[provided_input])
+
+            triton_client = grpcclient.InferenceServerClient("localhost:8001")
+            results = triton_client.infer(
+                model_name=self.model_name_, inputs=inputs, outputs=outputs
+            )
+
+            expected = self.input_data_["INPUT0"]
+            output_data = results.as_numpy("OUTPUT0")
+            self.assertTrue(
+                np.array_equal(output_data, expected),
+                "{}, {}, expected: {}, got {}".format(
+                    self.model_name_, "OUTPUT0", expected, output_data
+                ),
+            )
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_optional_input/test.sh b/qa/L0_optional_input/test.sh
new file mode 100755
index 0000000000..8bfd113d32
--- /dev/null
+++ b/qa/L0_optional_input/test.sh
@@ -0,0 +1,96 @@
+#!/bin/bash
+# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+export CUDA_VISIBLE_DEVICES=0
+
+TEST_PY=./optional_input_test.py
+TEST_LOG="./test.log"
+TEST_RESULT_FILE='test_results.txt'
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1"
+SERVER_LOG="./inference_server.log"
+source ../common/util.sh
+
+rm -fr *.log
+
+mkdir -p ./models/identity_2_float32/1
+mkdir -p ./models/ensemble_identity_2_float32/1
+mkdir -p ./models/pipeline_identity_2_float32/1
+mkdir -p ./models/optional_connecting_tensor/1
+
+# Basic test cases
+TEST_CASES=${TEST_CASES:="test_all_inputs \
+                            test_optional_same_input \
+                            test_optional_mix_inputs \
+                            test_optional_mix_inputs_2 \
+                            test_ensemble_all_inputs \
+                            test_ensemble_optional_same_input \
+                            test_ensemble_optional_mix_inputs \
+                            test_ensemble_optional_mix_inputs_2 \
+                            test_ensemble_optional_pipeline \
+                            test_ensemble_optional_connecting_tensor"}
+RET=0
+for i in $TEST_CASES ; do
+    # Restart server for every test to clear model stats
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    echo "Test: $i" >>$TEST_LOG
+
+    set +e
+    python $TEST_PY OptionalInputTest.$i >>$TEST_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    else
+        check_test_results $TEST_RESULT_FILE 1
+        if [ $? -ne 0 ]; then
+            cat $TEST_LOG
+            echo -e "\n***\n*** Test Result Verification Failed\n***"
+            RET=1
+        fi
+    fi
+    set -e
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+done
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $SERVER_LOG
+    cat $TEST_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_output_name/output_name_test.py b/qa/L0_output_name/output_name_test.py
new file mode 100755
index 0000000000..905174640c
--- /dev/null
+++ b/qa/L0_output_name/output_name_test.py
@@ -0,0 +1,84 @@
+#!/bin/bash
+# Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import unittest
+
+import test_util as tu
+from tritongrpcclient import grpc_service_pb2, grpc_service_pb2_grpc
+
+import grpc
+
+_trials = ("graphdef", "libtorch", "onnx", "plan", "savedmodel")
+
+
+class OutputNameValidationTest(tu.TestResultCollector):
+    def requestGenerator(self, model_name, output_name):
+        request = grpc_service_pb2.ModelInferRequest()
+        request.model_name = model_name
+        request.id = "output name validation"
+
+        input = grpc_service_pb2.ModelInferRequest().InferInputTensor()
+        input.name = "INPUT0"
+        input.datatype = "FP32"
+        input.shape.extend([1])
+
+        request.inputs.extend([input])
+
+        output = grpc_service_pb2.ModelInferRequest().InferRequestedOutputTensor()
+        output.name = output_name
+        request.outputs.extend([output])
+
+        request.raw_input_contents.extend([bytes(4 * "a", "utf-8")])
+
+        return request
+
+    def test_grpc(self):
+        channel = grpc.insecure_channel("localhost:8001")
+        grpc_stub = grpc_service_pb2_grpc.GRPCInferenceServiceStub(channel)
+
+        # Send request with invalid output name
+        for trial in _trials:
+            model_name = "{}_nobatch_zero_1_float32".format(trial)
+            request = self.requestGenerator(model_name, "DUMMY")
+            try:
+                response = grpc_stub.ModelInfer(request)
+                self.assertTrue(
+                    False, "unexpected success for unknown output " + model_name
+                )
+            except grpc.RpcError as rpc_error:
+                msg = rpc_error.details()
+                self.assertTrue(
+                    msg.startswith("unexpected inference output 'DUMMY' for model")
+                )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_output_name/test.sh b/qa/L0_output_name/test.sh
new file mode 100755
index 0000000000..7c1a5664a0
--- /dev/null
+++ b/qa/L0_output_name/test.sh
@@ -0,0 +1,96 @@
+#!/bin/bash
+# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+TEST_RESULT_FILE='test_results.txt'
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+OP_NAME_TEST_PY=output_name_test.py
+CLIENT_LOG="./client.log"
+EXPECTED_NUM_TESTS="1"
+DATADIR=`pwd`/models
+
+rm -rf $DATADIR
+mkdir $DATADIR
+
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository/*_nobatch_zero_1_float32 $DATADIR
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=$DATADIR"
+SERVER_LOG="./inference_server.log"
+source ../common/util.sh
+
+rm -f $SERVER_LOG $CLIENT_LOG
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+RET=0
+
+# test gRPC for output name validation
+set +e
+python $OP_NAME_TEST_PY OutputNameValidationTest >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test PASSED\n***"
+else
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_output_validation/lt_op_val_client.py b/qa/L0_output_validation/lt_op_val_client.py
new file mode 100755
index 0000000000..77b5a16e3f
--- /dev/null
+++ b/qa/L0_output_validation/lt_op_val_client.py
@@ -0,0 +1,73 @@
+#!/usr/bin/python
+
+# Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import unittest
+
+import requests
+import test_util as tu
+
+
+class OutputValidationTest(tu.TestResultCollector):
+    # for datatype mismatch
+    def test_datatype(self):
+        url = "http://localhost:8000/v2/models/libtorch_datatype_1_float32/infer"
+        body = '{"inputs":[{"name":"INPUT__0","shape":[1,1],"datatype":"FP32","data":[1.0]}],"outputs":[{"name":"OUTPUT__0"}]}'
+        response = requests.post(url, data=body)
+        msg = response.json()["error"]
+        self.assertTrue(
+            msg.startswith(
+                "configuration expects datatype TYPE_INT32 for output 'OUTPUT__0', model provides TYPE_FP32"
+            )
+        )
+
+    # for output mismatch
+    def test_index(self):
+        url = "http://localhost:8000/v2/models/libtorch_index_1_float32/infer"
+        body = '{"inputs":[{"name":"INPUT__0","shape":[1,1],"datatype":"FP32","data":[1.0]}],"outputs":[{"name":"OUTPUT__1"}]}'
+        response = requests.post(url, data=body)
+        msg = response.json()["error"]
+        self.assertTrue(
+            msg.startswith(
+                "The output OUTPUT__1 in the model configuration refers to an output index which doesn't exist. This model has 1 outputs"
+            )
+        )
+
+    # successful run
+    def test_success(self):
+        url = "http://localhost:8000/v2/models/libtorch_zero_1_float32/infer"
+        body = '{"inputs":[{"name":"INPUT__0","shape":[1,1],"datatype":"FP32","data":[1.0]}],"outputs":[{"name":"OUTPUT__0"}]}'
+        response = requests.post(url, data=body)
+        self.assertEqual(response.status_code, 200)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_output_validation/test.sh b/qa/L0_output_validation/test.sh
new file mode 100755
index 0000000000..39874ff4fd
--- /dev/null
+++ b/qa/L0_output_validation/test.sh
@@ -0,0 +1,92 @@
+#!/bin/bash
+# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+TEST_RESULT_FILE='test_results.txt'
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+LIBTORCH_OP_VAL_CLIENT=lt_op_val_client.py
+
+DATADIR=/data/inferenceserver/${REPO_VERSION}/libtorch_model_store2
+EXPECTED_NUM_TESTS="3"
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=$DATADIR --exit-on-error=false"
+SERVER_LOG="./inference_server.log"
+source ../common/util.sh
+
+run_server_tolive
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+# give plenty of time for model to load (and fail to load)
+wait_for_model_stable $SERVER_TIMEOUT
+
+RET=0
+CLIENT_LOG=client.log
+rm -f ./client.log
+
+set +e
+python $LIBTORCH_OP_VAL_CLIENT >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_parallel_copy/parallel_copy_test.py b/qa/L0_parallel_copy/parallel_copy_test.py
new file mode 100755
index 0000000000..6748fee006
--- /dev/null
+++ b/qa/L0_parallel_copy/parallel_copy_test.py
@@ -0,0 +1,163 @@
+#!/usr/bin/env python3
+
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import functools
+import time
+import unittest
+from builtins import range
+
+import numpy as np
+import test_util as tu
+import tritonclient.grpc as grpcclient
+from tritonclient.utils import InferenceServerException
+
+
+class ParallelCopyTest(tu.TestResultCollector):
+    def setUp(self):
+        self.client_ = grpcclient.InferenceServerClient("localhost:8001")
+        self.dtype_ = np.float32
+        self.model_name_ = tu.get_zero_model_name("plan", 1, self.dtype_)
+
+    def _batch_input_duration(self, batch_size):
+        stats = self.client_.get_inference_statistics(self.model_name_, "1")
+        self.assertEqual(len(stats.model_stats), 1, "expect 1 model stats")
+        self.assertEqual(
+            stats.model_stats[0].name,
+            self.model_name_,
+            "expect model stats for model {}".format(self.model_name_),
+        )
+        self.assertEqual(
+            stats.model_stats[0].version,
+            "1",
+            "expect model stats for model {} version 1".format(self.model_name_),
+        )
+
+        batch_stats = stats.model_stats[0].batch_stats
+
+        batch_input_duration = 0
+        for batch_stat in batch_stats:
+            if batch_stat.batch_size == batch_size:
+                batch_input_duration = batch_stat.compute_input.ns
+        return batch_input_duration
+
+    def _run(self, batch_sizes):
+        batch_size = functools.reduce(lambda a, b: a + b, batch_sizes, 0)
+        input_data = [
+            np.random.random([bs, 16 * 1024 * 1024]).astype(self.dtype_)
+            for bs in batch_sizes
+        ]
+        inputs = [
+            [grpcclient.InferInput("INPUT0", [bs, 16 * 1024 * 1024], "FP32")]
+            for bs in batch_sizes
+        ]
+        output = [grpcclient.InferRequestedOutput("OUTPUT0")]
+
+        for idx in range(len(inputs)):
+            inputs[idx][0].set_data_from_numpy(input_data[idx])
+
+        def callback(user_data, idx, result, error):
+            if error:
+                user_data[idx] = error
+            else:
+                user_data[idx] = result
+
+        # list to hold the results of inference.
+        user_data = [None] * len(batch_sizes)
+
+        before_compute_input_duration = self._batch_input_duration(batch_size)
+        for idx in range(len(batch_sizes)):
+            self.client_.async_infer(
+                model_name=self.model_name_,
+                inputs=inputs[idx],
+                callback=functools.partial(callback, user_data, idx),
+                outputs=output,
+            )
+
+        # Wait until the results are available in user_data
+        time_out = 20
+        while time_out > 0:
+            done = True
+            for res in user_data:
+                if res is None:
+                    done = False
+                    break
+            if done:
+                break
+            time_out = time_out - 1
+            time.sleep(1)
+        done_cnt = functools.reduce(
+            lambda dc, x: dc + 1 if x is not None else dc, user_data, 0
+        )
+        self.assertEqual(
+            done_cnt,
+            len(batch_sizes),
+            "expected {} responses, got {}".format(len(batch_sizes), done_cnt),
+        )
+        for idx in range(len(batch_sizes)):
+            res = user_data[idx]
+            self.assertFalse(
+                type(res) == InferenceServerException,
+                "expected response for request {}, got exception {}".format(idx, res),
+            )
+            output_data = res.as_numpy("OUTPUT0")
+            self.assertTrue(
+                np.array_equal(output_data, input_data[idx]),
+                "Mismatched output data for request {}".format(idx),
+            )
+
+        after_compute_input_duration = self._batch_input_duration(batch_size)
+        return after_compute_input_duration - before_compute_input_duration
+
+    def test_performance(self):
+        model_status = self.client_.is_model_ready(self.model_name_, "1")
+        self.assertTrue(model_status, "expected model to be ready")
+
+        # Send 1 request with batch size 8 so that the copy is not parallelized
+        serialized_time = self._run([8])
+        parallelized_time = self._run([2, 2, 2, 2])
+
+        # The following check is loose, local runs show that the speedup is not
+        # significant (~15%), may be due to the dispatch overhead
+        # which cancels part of the improvement
+        self.assertTrue(
+            serialized_time > parallelized_time,
+            "Expected parallelized copy is faster than serialized copy",
+        )
+        print(
+            "serialized v.s. parallelized : {} v.s. {}".format(
+                serialized_time, parallelized_time
+            )
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_parallel_copy/test.sh b/qa/L0_parallel_copy/test.sh
new file mode 100755
index 0000000000..24a673731a
--- /dev/null
+++ b/qa/L0_parallel_copy/test.sh
@@ -0,0 +1,100 @@
+#!/bin/bash
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+TEST_RESULT_FILE='test_results.txt'
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+CLIENT_LOG="./client.log"
+PARALLEL_COPY_TEST=parallel_copy_test.py
+
+DATADIR="./models"
+
+rm -rf ${DATADIR}
+mkdir -p ${DATADIR}
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_identity_big_model_repository/plan_zero_1_float32 ${DATADIR}/
+# set queue delay to ensure the execution will be in full batch
+(cd ${DATADIR}/plan_zero_1_float32 && \
+    echo "dynamic_batching { " >> config.pbtxt && \
+    echo "    preferred_batch_size: [ 8 ]" >> config.pbtxt && \
+    echo "    max_queue_delay_microseconds: 10000000" >> config.pbtxt && \
+    echo "}" >> config.pbtxt)
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=$DATADIR --buffer-manager-thread-count=4"
+SERVER_LOG="./inference_server.log"
+source ../common/util.sh
+
+rm -f *.log*
+
+RET=0
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python $PARALLEL_COPY_TEST >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    cat $CLIENT_LOG
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+else
+  echo -e "\n***\n*** Test Failed\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_parameters/model_repository/ensemble/config.pbtxt b/qa/L0_parameters/model_repository/ensemble/config.pbtxt
new file mode 100644
index 0000000000..383d89c9f6
--- /dev/null
+++ b/qa/L0_parameters/model_repository/ensemble/config.pbtxt
@@ -0,0 +1,68 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+platform: "ensemble"
+max_batch_size: 0
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+
+output [
+  {
+    name: "key"
+    data_type: TYPE_STRING
+    dims: [ -1 ]
+  },
+  {
+    name: "value"
+    data_type: TYPE_STRING
+    dims: [ -1 ]
+  }
+]
+
+ensemble_scheduling
+{
+  step [
+    {
+      model_name: "identity"
+      model_version: -1
+      input_map { key: "INPUT0", value: "INPUT0" }
+      output_map { key: "OUTPUT0", value: "OUTPUT0" }
+    },
+    {
+      model_name: "parameter"
+      model_version: -1
+      input_map { key: "INPUT0", value: "OUTPUT0" }
+      output_map { key: "key", value: "key" }
+      output_map { key: "value", value: "value" }
+    }
+  ]
+}
diff --git a/qa/L0_parameters/model_repository/identity/config.pbtxt b/qa/L0_parameters/model_repository/identity/config.pbtxt
new file mode 100644
index 0000000000..8908845574
--- /dev/null
+++ b/qa/L0_parameters/model_repository/identity/config.pbtxt
@@ -0,0 +1,44 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+backend: "identity"
+max_batch_size: 0
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_parameters/model_repository/parameter/1/model.py b/qa/L0_parameters/model_repository/parameter/1/model.py
new file mode 100644
index 0000000000..c175860962
--- /dev/null
+++ b/qa/L0_parameters/model_repository/parameter/1/model.py
@@ -0,0 +1,77 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        inputs = [{"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [1]}]
+        outputs = [
+            {"name": "key", "data_type": "TYPE_STRING", "dims": [-1]},
+            {"name": "value", "data_type": "TYPE_STRING", "dims": [-1]},
+        ]
+
+        config = auto_complete_model_config.as_dict()
+        input_names = []
+        output_names = []
+        for input in config["input"]:
+            input_names.append(input["name"])
+        for output in config["output"]:
+            output_names.append(output["name"])
+
+        for input in inputs:
+            if input["name"] not in input_names:
+                auto_complete_model_config.add_input(input)
+        for output in outputs:
+            if output["name"] not in output_names:
+                auto_complete_model_config.add_output(output)
+
+        auto_complete_model_config.set_max_batch_size(0)
+        return auto_complete_model_config
+
+    def execute(self, requests):
+        # A simple model that puts the request parameters into the outputs.
+        responses = []
+        for request in requests:
+            parameters = json.loads(request.parameters())
+            keys = []
+            values = []
+            for key, value in parameters.items():
+                keys.append(key)
+                values.append(value)
+            key_output = pb_utils.Tensor("key", np.asarray(keys, dtype=object))
+            value_output = pb_utils.Tensor("value", np.asarray(values, dtype=object))
+            inference_response = pb_utils.InferenceResponse(
+                output_tensors=[key_output, value_output]
+            )
+            responses.append(inference_response)
+
+        return responses
diff --git a/qa/L0_parameters/parameters_test.py b/qa/L0_parameters/parameters_test.py
new file mode 100755
index 0000000000..a20d13c1eb
--- /dev/null
+++ b/qa/L0_parameters/parameters_test.py
@@ -0,0 +1,228 @@
+#!/usr/bin/env python3
+
+# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import os
+import queue
+import unittest
+from functools import partial
+from unittest import IsolatedAsyncioTestCase
+
+import numpy as np
+import tritonclient.grpc as grpcclient
+import tritonclient.grpc.aio as asyncgrpcclient
+import tritonclient.http as httpclient
+import tritonclient.http.aio as asynchttpclient
+from tritonclient.utils import InferenceServerException
+
+TEST_HEADER = os.environ.get("TEST_HEADER")
+
+
+class InferenceParametersTest(IsolatedAsyncioTestCase):
+    async def asyncSetUp(self):
+        self.http = httpclient.InferenceServerClient(url="localhost:8000")
+        self.async_http = asynchttpclient.InferenceServerClient(url="localhost:8000")
+        self.grpc = grpcclient.InferenceServerClient(url="localhost:8001")
+        self.async_grpc = asyncgrpcclient.InferenceServerClient(url="localhost:8001")
+
+        self.parameter_list = []
+        self.parameter_list.append({"key1": "value1", "key2": "value2"})
+        self.parameter_list.append({"key1": 1, "key2": 2})
+        self.parameter_list.append({"key1": 123.123, "key2": 321.321})
+        self.parameter_list.append({"key1": True, "key2": "value2"})
+        self.parameter_list.append({"triton_": True, "key2": "value2"})
+
+        # Only "test_params" tests parameters without headers.
+        if TEST_HEADER != "test_params":
+            self.headers = {
+                "header_1": "value_1",
+                "header_2": "value_2",
+                "my_header_1": "my_value_1",
+                "my_header_2": "my_value_2",
+                "my_header_3": 'This is a "quoted" string with a backslash\ ',
+            }
+
+            # only these headers should be forwarded to the model.
+            if TEST_HEADER == "test_grpc_header_forward_pattern_case_sensitive":
+                self.expected_headers = {}
+            else:
+                self.expected_headers = {
+                    "my_header_1": "my_value_1",
+                    "my_header_2": "my_value_2",
+                    "my_header_3": 'This is a "quoted" string with a backslash\ ',
+                }
+        else:
+            self.headers = {}
+            self.expected_headers = {}
+
+        def callback(user_data, result, error):
+            if error:
+                user_data.put(error)
+            else:
+                user_data.put(result)
+
+        self.grpc_callback = callback
+
+    def create_inputs(self, client_type):
+        inputs = []
+        inputs.append(client_type.InferInput("INPUT0", [1], "FP32"))
+
+        # Initialize the data
+        inputs[0].set_data_from_numpy(np.asarray([1], dtype=np.float32))
+        return inputs
+
+    async def send_request_and_verify(
+        self, client_type, client, is_async=False, model_name="parameter"
+    ):
+        inputs = self.create_inputs(client_type)
+        for parameters in self.parameter_list:
+            # Setup infer callable to re-use below for brevity
+            infer_callable = partial(
+                client.infer,
+                model_name=model_name,
+                inputs=inputs,
+                parameters=parameters,
+                headers=self.headers,
+            )
+
+            # The `triton_` prefix is reserved for Triton usage
+            should_error = False
+            if "triton_" in parameters.keys():
+                should_error = True
+
+            if is_async:
+                if should_error:
+                    with self.assertRaises(InferenceServerException):
+                        await infer_callable()
+                    return
+                else:
+                    result = await infer_callable()
+            else:
+                if should_error:
+                    with self.assertRaises(InferenceServerException):
+                        infer_callable()
+                    return
+                else:
+                    result = infer_callable()
+
+            self.verify_outputs(result, parameters)
+
+    def verify_outputs(self, result, parameters):
+        keys = result.as_numpy("key")
+        values = result.as_numpy("value")
+        keys = keys.astype(str).tolist()
+        expected_keys = list(parameters.keys()) + list(self.expected_headers.keys())
+        self.assertEqual(set(keys), set(expected_keys))
+
+        # We have to convert the parameter values to string
+        expected_values = []
+        for expected_value in list(parameters.values()):
+            expected_values.append(str(expected_value))
+        for value in self.expected_headers.values():
+            expected_values.append(value)
+        self.assertEqual(set(values.astype(str).tolist()), set(expected_values))
+
+    async def test_grpc_parameter(self):
+        await self.send_request_and_verify(grpcclient, self.grpc)
+
+    async def test_http_parameter(self):
+        await self.send_request_and_verify(httpclient, self.http)
+
+    async def test_async_http_parameter(self):
+        await self.send_request_and_verify(
+            asynchttpclient, self.async_http, is_async=True
+        )
+
+    async def test_async_grpc_parameter(self):
+        await self.send_request_and_verify(
+            asyncgrpcclient, self.async_grpc, is_async=True
+        )
+
+    def test_http_async_parameter(self):
+        inputs = self.create_inputs(httpclient)
+        # Skip the parameter that returns an error
+        parameter_list = self.parameter_list[:-1]
+        for parameters in parameter_list:
+            result = self.http.async_infer(
+                model_name="parameter",
+                inputs=inputs,
+                parameters=parameters,
+                headers=self.headers,
+            ).get_result()
+            self.verify_outputs(result, parameters)
+
+    def test_grpc_async_parameter(self):
+        user_data = queue.Queue()
+        inputs = self.create_inputs(grpcclient)
+        # Skip the parameter that returns an error
+        parameter_list = self.parameter_list[:-1]
+        for parameters in parameter_list:
+            self.grpc.async_infer(
+                model_name="parameter",
+                inputs=inputs,
+                parameters=parameters,
+                headers=self.headers,
+                callback=partial(self.grpc_callback, user_data),
+            )
+            result = user_data.get()
+            self.assertFalse(result is InferenceServerException)
+            self.verify_outputs(result, parameters)
+
+    def test_grpc_stream_parameter(self):
+        user_data = queue.Queue()
+        self.grpc.start_stream(
+            callback=partial(self.grpc_callback, user_data), headers=self.headers
+        )
+        inputs = self.create_inputs(grpcclient)
+        # Skip the parameter that returns an error
+        parameter_list = self.parameter_list[:-1]
+        for parameters in parameter_list:
+            # async stream infer
+            self.grpc.async_stream_infer(
+                model_name="parameter", inputs=inputs, parameters=parameters
+            )
+            result = user_data.get()
+            self.assertFalse(result is InferenceServerException)
+            self.verify_outputs(result, parameters)
+        self.grpc.stop_stream()
+
+    async def test_ensemble_parameter_forwarding(self):
+        await self.send_request_and_verify(httpclient, self.http, model_name="ensemble")
+
+    async def asyncTearDown(self):
+        self.http.close()
+        self.grpc.close()
+        await self.async_grpc.close()
+        await self.async_http.close()
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_parameters/test.sh b/qa/L0_parameters/test.sh
new file mode 100755
index 0000000000..c53b02d4b7
--- /dev/null
+++ b/qa/L0_parameters/test.sh
@@ -0,0 +1,108 @@
+#!/bin/bash
+# Copyright 2023-2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+CLIENT_LOG="./client.log"
+TEST_SCRIPT_PY="parameters_test.py"
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_LOG="./inference_server.log"
+source ../common/util.sh
+
+MODELDIR="model_repository"
+# Use identity model as dummy step to ensure parameters pass through each step
+mkdir -p "${MODELDIR}/identity/1"
+mkdir -p "${MODELDIR}/ensemble/1"
+
+# TODO: Add support and testing for C++ client parameters:
+# https://jirasw.nvidia.com/browse/DLIS-4673
+
+all_tests=("test_params"
+           "test_headers"
+           "test_header_forward_pattern_case_insensitive"
+           "test_grpc_header_forward_pattern_case_sensitive")
+
+RET=0
+for i in "${all_tests[@]}"; do
+  # TEST_HEADER is a parameter used by `parameters_test.py` that controls
+  # whether the script will test for inclusion of headers in parameters or not.
+  SERVER_ARGS="--model-repository=${MODELDIR} --exit-timeout-secs=120"
+  if [ "$i" == "test_headers" ]; then
+    SERVER_ARGS+=" --grpc-header-forward-pattern my_header.*"
+    SERVER_ARGS+=" --http-header-forward-pattern my_header.*"
+  elif [ "$i" == "test_header_forward_pattern_case_insensitive" ]; then
+    SERVER_ARGS+=" --grpc-header-forward-pattern MY_HEADER.*"
+    SERVER_ARGS+=" --http-header-forward-pattern MY_HEADER.*"
+  # NOTE: headers sent through the python HTTP client may be automatically
+  # lowercased by internal libraries like geventhttpclient, so we only test
+  # GRPC client for case-sensitivity here:
+  # https://github.com/geventhttpclient/geventhttpclient/blob/d1e14356c3b02099c879cf9b3bdb684a0cbd8bf5/src/geventhttpclient/header.py#L62-L63
+  elif [ "$i" == "test_grpc_header_forward_pattern_case_sensitive" ]; then
+    SERVER_ARGS+=" --grpc-header-forward-pattern (?-i)MY_HEADER.*"
+  fi
+  run_server
+  if [ "$SERVER_PID" == "0" ]; then
+      echo -e "\n***\n*** Failed to start $SERVER\n***"
+      cat $SERVER_LOG
+      exit 1
+  fi
+
+  set +e
+  TEST_HEADER="$i" python3 $TEST_SCRIPT_PY >$CLIENT_LOG 2>&1
+  if [ $? -ne 0 ]; then
+      cat $CLIENT_LOG
+      echo -e "\n***\n*** Test Failed\n***"
+      RET=1
+  fi
+
+  set -e
+
+  kill $SERVER_PID
+  wait $SERVER_PID
+done
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
+
diff --git a/qa/L0_passive_instance/models/distributed_int32_int32_int32/config.pbtxt b/qa/L0_passive_instance/models/distributed_int32_int32_int32/config.pbtxt
new file mode 100644
index 0000000000..72d041feac
--- /dev/null
+++ b/qa/L0_passive_instance/models/distributed_int32_int32_int32/config.pbtxt
@@ -0,0 +1,62 @@
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "distributed_int32_int32_int32"
+backend: "distributed_addsub"
+max_batch_size: 1
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
+instance_group [
+  {
+    kind: KIND_CPU
+  },
+  {
+    kind: KIND_GPU
+    passive: true
+  }
+]
\ No newline at end of file
diff --git a/qa/L0_passive_instance/passive_instance_test.py b/qa/L0_passive_instance/passive_instance_test.py
new file mode 100755
index 0000000000..d7cdfffa7b
--- /dev/null
+++ b/qa/L0_passive_instance/passive_instance_test.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import unittest
+
+import infer_util as iu
+import numpy as np
+import test_util as tu
+
+
+class PassiveInstanceTest(tu.TestResultCollector):
+    def test_inference(self):
+        try:
+            iu.infer_exact(
+                self, "distributed", (1, 16), 1, np.int32, np.int32, np.int32
+            )
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_passive_instance/test.sh b/qa/L0_passive_instance/test.sh
new file mode 100755
index 0000000000..8948434485
--- /dev/null
+++ b/qa/L0_passive_instance/test.sh
@@ -0,0 +1,111 @@
+#!/bin/bash
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+TEST_RESULT_FILE='test_results.txt'
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+CLIENT_LOG="./client.log"
+TEST_SCRIPT_PY=passive_instance_test.py
+EXPECTED_NUM_TESTS="1"
+
+PERF_ANALYZER=../clients/perf_analyzer
+MODEL=distributed_int32_int32_int32
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=models --exit-timeout-secs=120"
+SERVER_LOG="./inference_server.log"
+source ../common/util.sh
+
+rm -f $SERVER_LOG $CLIENT_LOG
+
+mkdir -p models/${MODEL}/1
+
+RET=0
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python $TEST_SCRIPT_PY >$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+set +e
+# Generate concurrency, check if only the CPU instances are accepting requests
+$PERF_ANALYZER -m $MODEL --concurrency-range 4 >> $CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** perf_analyzer for $MODEL failed\n***"
+    RET=1
+fi
+
+grep "(GPU device 0), executing" $SERVER_LOG
+if [ $? -eq 0 ]; then
+    echo -e "\n***\n*** Failed. Expecting no request sent to GPU instance\n***"
+    RET=1
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_perf_analyzer/nginx.conf b/qa/L0_perf_analyzer/nginx.conf
new file mode 100644
index 0000000000..4a7dfcc04a
--- /dev/null
+++ b/qa/L0_perf_analyzer/nginx.conf
@@ -0,0 +1,38 @@
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+server {
+   listen 443 ssl;
+   server_name localhost;
+
+   ssl_certificate /etc/nginx/cert.crt;
+   ssl_certificate_key /etc/nginx/cert.key;
+
+    location / {
+              proxy_pass http://localhost:8000;
+              proxy_http_version 1.1;
+              }
+}
diff --git a/qa/L0_perf_analyzer/perf_analyzer_profile_export_schema.json b/qa/L0_perf_analyzer/perf_analyzer_profile_export_schema.json
new file mode 100644
index 0000000000..d0feacd9b4
--- /dev/null
+++ b/qa/L0_perf_analyzer/perf_analyzer_profile_export_schema.json
@@ -0,0 +1,95 @@
+{
+    "$schema": "https://json-schema.org/draft/2020-12/schema",
+    "$id": "https://github.com/triton-inference-server/client/blob/main/src/c%2B%2B/perf_analyzer/examples/schema.json",
+    "title": "Perf Analyzer output data",
+    "description": "A json file describing the output from a Perf Analyzer run.",
+    "type": "object",
+    "required": [
+        "experiments",
+        "version"
+    ],
+    "properties": {
+        "experiments": {
+            "description": "The array of all experiments run by Perf Analyzer.",
+            "type": "array",
+            "required": [
+                "experiment",
+                "requests",
+                "window_boundaries"
+            ],
+            "minItems": 1,
+            "uniqueItems": true,
+            "items": {
+                "type": "object",
+                "properties": {
+                    "experiment": {
+                        "description": "A single experiment run by Perf Analyzer.",
+                        "type": "object",
+                        "required": [
+                            "mode",
+                            "value"
+                        ],
+                        "minItems": 1,
+                        "maxItems": 1,
+                        "properties": {
+                            "mode": {
+                                "description": "Operating mode of Perf Analyzer: For example, 'concurrency' or 'request rate'.",
+                                "type": "string"
+                            },
+                            "value": {
+                                "description": "Concurrency or request rate for the current experiment.",
+                                "type": "integer"
+                            }
+                        }
+                    },
+                    "requests": {
+                        "description": "The array of requests sent by Perf Analyzer for this experiment.",
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/properties/experiments/items/properties/$defs/request"
+                        }
+                    },
+                    "$defs": {
+                        "request": {
+                            "description": "Info for a single request.",
+                            "type": "object",
+                            "required": [
+                                "timestamp",
+                                "response_timestamps"
+                            ],
+                            "properties": {
+                                "timestamp": {
+                                    "description": "Time stamp of the request.",
+                                    "type": "integer"
+                                },
+                                "sequence_id": {
+                                    "description": "The sequence_id of the request.",
+                                    "type": "integer"
+                                },
+                                "response_timestamps": {
+                                    "description": "All associated responses to this request.",
+                                    "type": "array",
+                                    "items": {
+                                        "type": "integer"
+                                    }
+                                }
+                            }
+                        }
+                    },
+                    "window_boundaries": {
+                        "description": "An array of time stamps describing window boundaries.",
+                        "type": "array",
+                        "items": {
+                            "type": "integer"
+                        },
+                        "uniqueItems": true
+                    }
+                }
+            }
+        },
+        "version": {
+            "description": "The version of Perf Analyzer that generated the report.",
+            "type": "string"
+        }
+    }
+}
\ No newline at end of file
diff --git a/qa/L0_perf_analyzer/test.sh b/qa/L0_perf_analyzer/test.sh
new file mode 100755
index 0000000000..20a659da85
--- /dev/null
+++ b/qa/L0_perf_analyzer/test.sh
@@ -0,0 +1,1150 @@
+#!/bin/bash
+# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+CLIENT_LOG="./perf_analyzer.log"
+PERF_ANALYZER=../clients/perf_analyzer
+
+DATADIR=`pwd`/models
+TESTDATADIR=`pwd`/test_data
+
+INT_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/int_data.json
+INT_DIFFSHAPE_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/int_data_diff_shape.json
+INT_OPTIONAL_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/int_data_optional.json
+FLOAT_DIFFSHAPE_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/float_data_with_shape.json
+STRING_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/string_data.json
+STRING_WITHSHAPE_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/string_data_with_shape.json
+SEQ_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/seq_data.json
+SHAPETENSORADTAFILE=`pwd`/../common/perf_analyzer_input_data_json/shape_tensor_data.json
+IMAGE_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/image_data.json
+
+OUTPUT_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/output.json
+NON_ALIGNED_OUTPUT_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/non_aligned_output.json
+WRONG_OUTPUT_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/wrong_output.json
+WRONG_OUTPUT_2_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/wrong_output_2.json
+
+SEQ_OUTPUT_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/seq_output.json
+SEQ_WRONG_OUTPUT_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/seq_wrong_output.json
+
+REPEAT_INT32_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/repeat_int32_data.json
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=${DATADIR}"
+SERVER_LOG="./inference_server.log"
+
+ERROR_STRING="error | Request count: 0 | : 0 infer/sec"
+
+STABILITY_THRESHOLD="100"
+
+source ../common/util.sh
+
+rm -f $SERVER_LOG $CLIENT_LOG
+rm -rf $DATADIR $TESTDATADIR $ENSEMBLE_DATADIR
+
+mkdir -p $DATADIR
+# Copy fixed-shape models
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/graphdef_int32_int32_int32 $DATADIR/
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/graphdef_nobatch_int32_int32_int32 $DATADIR/
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/graphdef_object_object_object $DATADIR/
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/graphdef_nobatch_object_object_object $DATADIR/
+
+# Copy a variable-shape models
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/graphdef_object_int32_int32 $DATADIR/
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/graphdef_int32_int32_float32 $DATADIR/
+
+# Copy shape tensor models
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_shapetensor_model_repository/plan_zero_1_float32 $DATADIR/
+
+# Copying ensemble including a sequential model
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_sequence_model_repository/savedmodel_sequence_object $DATADIR
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_ensemble_model_repository/qa_sequence_model_repository/simple_savedmodel_sequence_object $DATADIR
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_ensemble_model_repository/qa_sequence_model_repository/nop_TYPE_FP32_-1 $DATADIR
+
+# Copying variable sequence model
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_sequence_model_repository/graphdef_sequence_float32 $DATADIR
+
+mkdir $DATADIR/nop_TYPE_FP32_-1/1
+
+# Copy inception model to the model repository
+cp -r /data/inferenceserver/${REPO_VERSION}/tf_model_store/inception_v1_graphdef $DATADIR
+
+# Copy resnet50v1.5_fp16
+cp -r /data/inferenceserver/${REPO_VERSION}/perf_model_store/resnet50v1.5_fp16_savedmodel $DATADIR
+
+# Copy and customize custom_zero_1_float32
+cp -r ../custom_models/custom_zero_1_float32 $DATADIR && \
+  mkdir $DATADIR/custom_zero_1_float32/1 && \
+  (cd $DATADIR/custom_zero_1_float32 && \
+    echo "parameters [" >> config.pbtxt && \
+        echo "{ key: \"execute_delay_ms\"; value: { string_value: \"100\" }}" >> config.pbtxt && \
+        echo "]" >> config.pbtxt)
+
+# Copy and customize optional inputs model
+cp -r ../python_models/optional $DATADIR && \
+  mkdir $DATADIR/optional/1 && \
+  mv $DATADIR/optional/model.py $DATADIR/optional/1 && \
+  sed -i 's/max_batch_size: 0/max_batch_size: 2/g' $DATADIR/optional/config.pbtxt
+
+# Copy decoupled model
+git clone --depth=1 https://github.com/triton-inference-server/python_backend
+mkdir -p $DATADIR/repeat_int32/1
+cp python_backend/examples/decoupled/repeat_config.pbtxt $DATADIR/repeat_int32/config.pbtxt
+cp python_backend/examples/decoupled/repeat_model.py $DATADIR/repeat_int32/1/model.py
+
+# Generating test data
+mkdir -p $TESTDATADIR
+for INPUT in INPUT0 INPUT1; do
+    for i in {1..16}; do
+        echo '1' >> $TESTDATADIR/${INPUT}
+    done
+done
+
+RET=0
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+
+# Test whether there was a conflict in sending sequences. This should
+# be done before other testing as the server might emit this warning
+# in certain test cases that are expected to raise this warning
+SERVER_ERROR_STRING="The previous sequence did not end before this sequence start"
+
+set +e
+$PERF_ANALYZER -v -i $PROTOCOL -m graphdef_object_object_object -p2000 -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+if [ $? -eq 0 ]; then
+  cat $CLIENT_LOG
+  echo -e "\n***\n*** Test Failed: Expected an error when using dynamic shapes in string inputs\n***"
+  RET=1
+fi
+if [ $(cat $CLIENT_LOG |  grep "input INPUT0 contains dynamic shape, provide shapes to send along with the request" | wc -l) -ne 0 ]; then
+  cat $CLIENT_LOG
+  echo -e "\n***\n*** Test Failed: \n***"
+  RET=1
+fi
+
+# Testing with ensemble and sequential model variants
+$PERF_ANALYZER -v -i grpc -m  simple_savedmodel_sequence_object -p 2000 -t5 --streaming \
+--input-data=$SEQ_JSONDATAFILE  --input-data=$SEQ_JSONDATAFILE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed: Sequence conflict when maintaining concurrency\n***"
+    RET=1
+fi
+
+$PERF_ANALYZER -v -i grpc -m  simple_savedmodel_sequence_object -p 1000 --request-rate-range 100:200:50 --streaming \
+--input-data=$SEQ_JSONDATAFILE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+if [ $(cat $SERVER_LOG |  grep "${SERVER_ERROR_STRING}" | wc -l) -ne 0 ]; then
+    cat $SERVER_LOG |  grep "${SERVER_ERROR_STRING}"
+    echo -e "\n***\n*** Test Failed: Sequence conflict\n***"
+    RET=1
+fi
+set -e
+
+for PROTOCOL in grpc http; do
+
+    # Testing simple configurations with different shared memory types
+    for SHARED_MEMORY_TYPE in none system cuda; do
+        set +e
+        $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_int32_int32_int32 -t 1 -p2000 -b 1 \
+    --shared-memory=$SHARED_MEMORY_TYPE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Failed\n***"
+            RET=1
+        fi
+        if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Failed\n***"
+            RET=1
+        fi
+
+        $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_int32_int32_int32 -t 1 -p2000 -b 1 -a \
+    --shared-memory=$SHARED_MEMORY_TYPE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Failed\n***"
+            RET=1
+        fi
+        if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Failed\n***"
+            RET=1
+        fi
+        set -e
+    done
+
+    # TODO Add back testing with preprocess_inception_ensemble model
+
+    # Testing with inception model
+    for SHARED_MEMORY_TYPE in none system cuda; do
+        set +e
+        $PERF_ANALYZER -v -i $PROTOCOL -m inception_v1_graphdef -t 1 -p2000 -b 1 \
+    --shared-memory=$SHARED_MEMORY_TYPE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Failed\n***"
+            RET=1
+        fi
+        if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Failed\n***"
+            RET=1
+        fi
+
+        $PERF_ANALYZER -v -i $PROTOCOL -m inception_v1_graphdef -t 1 -p2000 -b 1 -a \
+    --shared-memory=$SHARED_MEMORY_TYPE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Failed\n***"
+            RET=1
+        fi
+        if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Failed\n***"
+            RET=1
+        fi
+        set -e
+    done
+
+    # Testing with resnet50 models with large batch sizes
+    for SHARED_MEMORY_TYPE in none system cuda; do
+        set +e
+        $PERF_ANALYZER -v -i $PROTOCOL -m inception_v1_graphdef -t 2 -p2000 -b 64 \
+    --shared-memory=$SHARED_MEMORY_TYPE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Failed\n***"
+            RET=1
+        fi
+        if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Failed\n***"
+            RET=1
+        fi
+
+        $PERF_ANALYZER -v -i $PROTOCOL -m inception_v1_graphdef -t 2 -p2000 -b 64 \
+    --shared-memory=$SHARED_MEMORY_TYPE -a -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Failed\n***"
+            RET=1
+        fi
+        if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Failed\n***"
+            RET=1
+        fi
+        set -e
+    done
+
+    # Test perf client behavior on different model with different batch size
+    for MODEL in graphdef_nobatch_int32_int32_int32 graphdef_int32_int32_int32; do
+        # Valid batch size
+        set +e
+        $PERF_ANALYZER -v -i $PROTOCOL -m $MODEL -t 1 -p2000 -b 1 -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Failed\n***"
+            RET=1
+        fi
+        set -e
+
+        # Invalid batch sizes
+        for STATIC_BATCH in 0 10; do
+            set +e
+            $PERF_ANALYZER -v -i $PROTOCOL -m $MODEL -t 1 -p2000 -b $STATIC_BATCH -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+            if [ $? -eq 0 ]; then
+                cat $CLIENT_LOG
+                echo -e "\n***\n*** Test Failed\n***"
+                RET=1
+            fi
+            set -e
+        done
+    done
+
+    # Testing with the new arguments
+    set +e
+    $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_int32_int32_int32 -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+    if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+
+    $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_int32_int32_int32 --concurrency-range 1:5:2 -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+    if [ $(cat $CLIENT_LOG | grep "error | Request count: 0 | : 0 infer/sec\|: 0 usec|Request concurrency: 2" | wc -l) -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+
+    $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_int32_int32_int32 --concurrency-range 1:5:2 \
+    --input-data=${INT_JSONDATAFILE} -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+    if [ $(cat $CLIENT_LOG | grep "error | Request count: 0 | : 0 infer/sec\|: 0 usec|Request concurrency: 2" | wc -l) -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+
+    $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_int32_int32_int32 --request-rate-range 1000:2000:500 \
+    -p1000 -b 1 -a -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+    if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+
+    $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_int32_int32_int32 --request-rate-range 1000:2000:500 \
+    --input-data=${INT_JSONDATAFILE} -p1000 -b 1 -a -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+    if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+
+    # Binary search for request rate mode
+    $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_int32_int32_int32 --request-rate-range 1000:2000:100 -p1000 -b 1 \
+    -a --binary-search --request-distribution "poisson" -l 10 -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+    if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+    set -e
+
+    # Binary search for concurrency range mode and make sure it doesn't hang
+    $PERF_ANALYZER -v -a --request-distribution "poisson" --shared-memory none \
+    --percentile 99 --binary-search --concurrency-range 1:8:2 -l 5 \
+    -m graphdef_int32_int32_int32 -b 1 -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 &
+    PA_PID=$!
+    if [ "$PA_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $PERF_ANALYZER\n***"
+        cat $CLIENT_LOG
+        RET=1
+    fi
+    # wait for PA to finish running
+    sleep 200
+    if ps -p $PA_PID > /dev/null; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** $PERF_ANALYZER is hanging after 200 s\n***"
+        kill $PA_PID
+        RET=1
+    fi
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+    if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+    set -e
+
+    # Testing with combinations of string input and shared memory types
+    for SHARED_MEMORY_TYPE in none system cuda; do
+        set +e
+        $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_object_object_object --string-data=1 -p2000 \
+    --shared-memory=$SHARED_MEMORY_TYPE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Failed\n***"
+            RET=1
+        fi
+        if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Failed\n***"
+            RET=1
+        fi
+        set -e
+    done
+
+    # Testing with combinations of file inputs and shared memory types
+    for SHARED_MEMORY_TYPE in none system cuda; do
+        set +e
+        $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_object_object_object --input-data=$TESTDATADIR -p2000 \
+    --shared-memory=$SHARED_MEMORY_TYPE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Failed\n***"
+            RET=1
+        fi
+        if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Failed\n***"
+            RET=1
+        fi
+        set -e
+    done
+
+    for SHARED_MEMORY_TYPE in none system cuda; do
+        set +e
+        $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_object_object_object --input-data=$STRING_JSONDATAFILE \
+    --input-data=$STRING_JSONDATAFILE -p2000 --shared-memory=$SHARED_MEMORY_TYPE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Failed\n***"
+            RET=1
+        fi
+        if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Failed\n***"
+            RET=1
+        fi
+        set -e
+    done
+
+    # Testing with combinations of variable inputs and shared memory types
+    for SHARED_MEMORY_TYPE in none system cuda; do
+        set +e
+        $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_object_int32_int32 --input-data=$TESTDATADIR \
+    --shape INPUT0:2,8 --shape INPUT1:2,8 -p2000 --shared-memory=$SHARED_MEMORY_TYPE -s ${STABILITY_THRESHOLD} \
+    >$CLIENT_LOG 2>&1
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Failed\n***"
+            RET=1
+        fi
+        if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Failed\n***"
+            RET=1
+        fi
+        set -e
+    done
+
+    for SHARED_MEMORY_TYPE in none system cuda; do
+        set +e
+        $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_object_int32_int32 --input-data=$STRING_WITHSHAPE_JSONDATAFILE \
+    --shape INPUT0:2,8 --shape INPUT1:2,8 -p2000 --shared-memory=$SHARED_MEMORY_TYPE -s ${STABILITY_THRESHOLD} \
+    >$CLIENT_LOG 2>&1
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Failed\n***"
+            RET=1
+        fi
+        if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Failed\n***"
+            RET=1
+        fi
+        set -e
+    done
+
+    set +e
+    $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_int32_int32_float32 --shape INPUT0:2,8,2 \
+    --shape INPUT1:2,8,2 -p2000 -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+    if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+    set -e
+
+    # Trying to batch tensors with different shape
+    for SHARED_MEMORY_TYPE in none system cuda; do
+        set +e
+        $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_int32_int32_float32 --shape INPUT0:2,8,2 --shape INPUT1:2,8,2 -p2000 -b 4 \
+    --shared-memory=$SHARED_MEMORY_TYPE --input-data=$INT_DIFFSHAPE_JSONDATAFILE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+        if [ $? -eq 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Failed\n***"
+            RET=1
+        fi
+        if [ $(cat $CLIENT_LOG | grep -P "The supplied shape .+ is incompatible with the model's input shape" | wc -l) -eq 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Failed\n***"
+            RET=1
+        fi
+        set -e
+    done
+
+    # Shape tensor I/O model (server needs the shape tensor on the CPU)
+    for SHARED_MEMORY_TYPE in none system; do
+        set +e
+        $PERF_ANALYZER -v -i $PROTOCOL -m plan_zero_1_float32 --input-data=$SHAPETENSORADTAFILE \
+    --shape DUMMY_INPUT0:4,4 -p2000 --shared-memory=$SHARED_MEMORY_TYPE -b 8 -s ${STABILITY_THRESHOLD} \
+    >$CLIENT_LOG 2>&1
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Failed\n***"
+            RET=1
+        fi
+        if [ $(cat $CLIENT_LOG | grep ": 0 infer/sec\|: 0 usec" | wc -l) -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Failed\n***"
+            RET=1
+        fi
+        set -e
+    done
+
+    set +e
+    $PERF_ANALYZER -v -i $PROTOCOL -m  simple_savedmodel_sequence_object -p 2000 -t5 --sync \
+    --input-data=$SEQ_JSONDATAFILE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+    if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+
+    $PERF_ANALYZER -v -i $PROTOCOL -m  simple_savedmodel_sequence_object -p 2000 -t5 --sync \
+    --input-data=$SEQ_JSONDATAFILE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+    if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+
+    $PERF_ANALYZER -v -i $PROTOCOL -m  simple_savedmodel_sequence_object -p 1000 --request-rate-range 100:200:50 --sync \
+    --input-data=$SEQ_JSONDATAFILE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+    if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+    set -e
+
+
+    # Testing with variable ensemble model. This unit specifies different shape values
+    # for different inferences.
+    for SHARED_MEMORY_TYPE in none system cuda; do
+        set +e
+        # FIXME: Enable HTTP when the server is able to correctly return the complex error messages.
+        $PERF_ANALYZER -v -i grpc -m graphdef_sequence_float32 --shape INPUT:2 --input-data=$FLOAT_DIFFSHAPE_JSONDATAFILE \
+    --input-data=$FLOAT_DIFFSHAPE_JSONDATAFILE -p2000 --shared-memory=$SHARED_MEMORY_TYPE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+        if [ $? -eq 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Failed\n***"
+            RET=1
+        fi
+        if [ $(cat $CLIENT_LOG |  grep -P "The supplied shape .+ is incompatible with the model's input shape" | wc -l) -eq 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Failed\n***"
+            RET=1
+        fi
+        set -e
+    done
+
+    # Testing that trace logging works
+    set +e
+    TRACE_FILE="trace.json"
+    rm ${TRACE_FILE}*
+    $PERF_ANALYZER -v -i $PROTOCOL -m simple_savedmodel_sequence_object -p 2000 -t5 --sync --trace-file $TRACE_FILE \
+    --trace-level TIMESTAMPS --trace-rate 1000 --trace-count 100 --log-frequency 10 \
+    --input-data=$SEQ_JSONDATAFILE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+    if ! compgen -G "$TRACE_FILE*" > /dev/null; then
+        echo -e "\n***\n*** Test Failed. $TRACE_FILE failed to generate.\n***"
+        RET=1
+    elif [ $(cat ${TRACE_FILE}* |  grep "REQUEST_START" | wc -l) -eq 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed. Did not find `REQUEST_START` in $TRACE_FILE \n***"
+        RET=1
+    fi
+    set -e
+done
+
+# Test with output validation
+set +e
+$PERF_ANALYZER -v -m graphdef_int32_int32_int32 --input-data=${NON_ALIGNED_OUTPUT_JSONDATAFILE} -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+if [ $? -eq 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+if [ $(cat $CLIENT_LOG |  grep "The 'validation_data' field doesn't align with 'data' field in the json file" | wc -l) -eq 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+$PERF_ANALYZER -v -m graphdef_int32_int32_int32 --input-data=${WRONG_OUTPUT_JSONDATAFILE} -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+if [ $? -eq 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+if [ $(cat $CLIENT_LOG |  grep "mismatch in the data provided" | wc -l) -eq 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+$PERF_ANALYZER -v -m graphdef_int32_int32_int32 --input-data=${WRONG_OUTPUT_2_JSONDATAFILE} -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+if [ $? -eq 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+if [ $(cat $CLIENT_LOG |  grep "Output doesn't match expected output" | wc -l) -eq 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+
+$PERF_ANALYZER -v -m graphdef_int32_int32_int32 --input-data=${OUTPUT_JSONDATAFILE} -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+$PERF_ANALYZER -v -m simple_savedmodel_sequence_object -i grpc --streaming \
+--input-data=${SEQ_WRONG_OUTPUT_JSONDATAFILE} -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+if [ $? -eq 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+if [ $(cat $CLIENT_LOG |  grep "Output doesn't match expected output" | wc -l) -eq 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+$PERF_ANALYZER -v -m simple_savedmodel_sequence_object -i grpc --streaming \
+--input-data=${SEQ_OUTPUT_JSONDATAFILE} -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+set -e
+
+## Testing with very large concurrencies and large dataset
+INPUT_DATA_OPTION="--input-data $SEQ_JSONDATAFILE "
+for i in {1..9}; do
+   INPUT_DATA_OPTION=" ${INPUT_DATA_OPTION} ${INPUT_DATA_OPTION}"
+done
+set +e
+$PERF_ANALYZER -v -m  simple_savedmodel_sequence_object -p 10000 --concurrency-range 1500:2000:250 -i grpc --streaming \
+${INPUT_DATA_OPTION} -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+   cat $CLIENT_LOG
+   echo -e "\n***\n*** Test Failed\n***"
+   RET=1
+fi
+if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+   cat $CLIENT_LOG
+   echo -e "\n***\n*** Test Failed\n***"
+   RET=1
+fi
+set -e
+
+## Test count_windows mode
+set +e
+
+# Send incorrect shape and make sure that perf_analyzer doesn't hang
+$PERF_ANALYZER -v -m graphdef_object_int32_int32 --measurement-mode "count_windows" \
+    --shape INPUT0:1,8,100 --shape INPUT1:2,8 --string-data=1 -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+if [ $? -eq 0 ]; then
+   cat $CLIENT_LOG
+   echo -e "\n***\n*** Test Failed\n***"
+   RET=1
+fi
+if [ $(cat $CLIENT_LOG |  grep "unexpected shape for input 'INPUT0' for model" | wc -l) -eq 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+$PERF_ANALYZER -v -m graphdef_object_int32_int32 --measurement-mode "count_windows" \
+    --shape INPUT0:2,8 --shape INPUT1:2,8 --string-data=1 -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+   cat $CLIENT_LOG
+   echo -e "\n***\n*** Test Failed\n***"
+   RET=1
+fi
+if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+   cat $CLIENT_LOG
+   echo -e "\n***\n*** Test Failed\n***"
+   RET=1
+fi
+set -e
+
+# Test with optional inputs missing but still valid
+set +e
+$PERF_ANALYZER -v -m optional --measurement-mode "count_windows" \
+    --input-data=${INT_OPTIONAL_JSONDATAFILE} -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+   cat $CLIENT_LOG
+   echo -e "\n***\n*** Test Failed\n***"
+   RET=1
+fi
+set -e
+
+# Test with optional inputs missing and invalid
+set +e
+OPTIONAL_INPUT_ERROR_STRING="For batch sizes larger than 1, the same set of
+inputs must be specified for each batch. You cannot use different set of
+optional inputs for each individual batch."
+$PERF_ANALYZER -v -m optional -b 2 --measurement-mode "count_windows" \
+    --input-data=${INT_OPTIONAL_JSONDATAFILE} -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+if [ $? -eq 0 ]; then
+   cat $CLIENT_LOG
+   echo -e "\n***\n*** Test Failed\n***"
+   RET=1
+fi
+if [ $(cat $CLIENT_LOG |  grep "${OPTIONAL_INPUT_ERROR_STRING}" | wc -l) -eq 0 ]; then
+   cat $CLIENT_LOG
+   echo -e "\n***\n*** Test Failed\n***"
+   RET=1
+fi
+set -e
+
+
+# Test Custom request rate option
+CUSTOM_SCHEDULE_FILE=$TESTDATADIR/custom.schedule
+echo '30000' >> $CUSTOM_SCHEDULE_FILE
+echo '10000' >> $CUSTOM_SCHEDULE_FILE
+echo '40000' >> $CUSTOM_SCHEDULE_FILE
+echo '20000' >> $CUSTOM_SCHEDULE_FILE
+echo '25000' >> $CUSTOM_SCHEDULE_FILE
+
+set +e
+$PERF_ANALYZER -v -i grpc -m graphdef_int32_int32_int32 --request-intervals $CUSTOM_SCHEDULE_FILE >$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+if [ $(cat $CLIENT_LOG |  grep "Request Rate: 40" | wc -l) -eq 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed: \n***"
+    RET=1
+fi
+set -e
+
+# Test --serial-sequences mode
+set +e
+$PERF_ANALYZER -v -i $PROTOCOL -m  simple_savedmodel_sequence_object -p 1000 --request-rate-range 100:200:50 --serial-sequences \
+    --input-data=$SEQ_JSONDATAFILE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+$PERF_ANALYZER -v -i $PROTOCOL -m  simple_savedmodel_sequence_object -p 1000 --request-intervals $CUSTOM_SCHEDULE_FILE --serial-sequences \
+    --input-data=$SEQ_JSONDATAFILE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+set -e
+
+## Test decoupled model support
+$PERF_ANALYZER -v -m repeat_int32 --input-data=$REPEAT_INT32_JSONDATAFILE \
+    --profile-export-file profile_export.json -i grpc --async --streaming -s \
+    ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+python3 -c "import json ; \
+    requests = json.load(open('profile_export.json'))['experiments'][0]['requests'] ; \
+    assert any(len(r['response_timestamps']) > 1 for r in requests)"
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+check-jsonschema --schemafile perf_analyzer_profile_export_schema.json profile_export.json
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+## Test perf_analyzer with MPI / multiple models
+
+is_synchronized() {
+  local TIMESTAMP_RANK_0_STABLE=$(grep -oP "^\K[^$]+(?=\[1,0\]<stdout>:All models on all MPI ranks are stable)" 1/rank.0/stdout | date "+%s" -f -)
+  local TIMESTAMP_RANK_1_STABLE=$(grep -oP "^\K[^$]+(?=\[1,1\]<stdout>:All models on all MPI ranks are stable)" 1/rank.1/stdout | date "+%s" -f -)
+  local TIMESTAMP_RANK_2_STABLE=$(grep -oP "^\K[^$]+(?=\[1,2\]<stdout>:All models on all MPI ranks are stable)" 1/rank.2/stdout | date "+%s" -f -)
+  local TIMESTAMP_MIN=$(echo -e "${TIMESTAMP_RANK_0_STABLE}\n${TIMESTAMP_RANK_1_STABLE}\n${TIMESTAMP_RANK_2_STABLE}" | sort -n | head -1)
+  local TIMESTAMP_MAX=$(echo -e "${TIMESTAMP_RANK_0_STABLE}\n${TIMESTAMP_RANK_1_STABLE}\n${TIMESTAMP_RANK_2_STABLE}" | sort -n | tail -1)
+  local TIMESTAMP_MAX_MIN_DIFFERENCE=$((${TIMESTAMP_MAX}-${TIMESTAMP_MIN}))
+  local ALLOWABLE_SECONDS_BETWEEN_PROFILES_FINISHING="5"
+  echo $(($TIMESTAMP_MAX_MIN_DIFFERENCE <= $ALLOWABLE_SECONDS_BETWEEN_PROFILES_FINISHING))
+}
+
+is_stable() {
+  local RANK=$1
+  local IS_THROUGHPUT=$2
+  if [ $IS_THROUGHPUT ]; then
+    local GREP_PATTERN="\[1,$RANK\]<stdout>:  Pass \[[0-9]+\] throughput: \K[0-9]+\.?[0-9]*"
+  else
+    local GREP_PATTERN="\[1,$RANK\]<stdout>:  Pass \[[0-9]+\] throughput: [0-9]+\.?[0-9]* infer/sec. Avg latency: \K[0-9]+"
+  fi
+  local LAST_MINUS_0=$(grep -oP "$GREP_PATTERN" 1/rank.$RANK/stdout | tail -3 | sed -n 3p)
+  local LAST_MINUS_1=$(grep -oP "$GREP_PATTERN" 1/rank.$RANK/stdout | tail -3 | sed -n 2p)
+  local LAST_MINUS_2=$(grep -oP "$GREP_PATTERN" 1/rank.$RANK/stdout | tail -3 | sed -n 1p)
+  local MEAN=$(awk "BEGIN {print (($LAST_MINUS_0+$LAST_MINUS_1+$LAST_MINUS_2)/3)}")
+  local STABILITY_THRESHOLD=0.5
+  # Based on this: https://github.com/triton-inference-server/client/blob/main/src/c++/perf_analyzer/inference_profiler.cc#L629-L644
+  local WITHIN_THRESHOLD_0=$(awk "BEGIN {print ($LAST_MINUS_0 >= ((1 - $STABILITY_THRESHOLD) * $MEAN) && $LAST_MINUS_0 <= ((1 + $STABILITY_THRESHOLD) * $MEAN))}")
+  local WITHIN_THRESHOLD_1=$(awk "BEGIN {print ($LAST_MINUS_1 >= ((1 - $STABILITY_THRESHOLD) * $MEAN) && $LAST_MINUS_1 <= ((1 + $STABILITY_THRESHOLD) * $MEAN))}")
+  local WITHIN_THRESHOLD_2=$(awk "BEGIN {print ($LAST_MINUS_2 >= ((1 - $STABILITY_THRESHOLD) * $MEAN) && $LAST_MINUS_2 <= ((1 + $STABILITY_THRESHOLD) * $MEAN))}")
+  echo $(($WITHIN_THRESHOLD_0 && $WITHIN_THRESHOLD_1 && $WITHIN_THRESHOLD_2))
+}
+
+set +e
+mpiexec --allow-run-as-root \
+  -n 1 --merge-stderr-to-stdout --output-filename . --tag-output --timestamp-output \
+    $PERF_ANALYZER -v -m graphdef_int32_int32_int32 \
+      --measurement-mode count_windows -s 50 --enable-mpi : \
+  -n 1 --merge-stderr-to-stdout --output-filename . --tag-output --timestamp-output \
+    $PERF_ANALYZER -v -m graphdef_nobatch_int32_int32_int32 \
+      --measurement-mode count_windows -s 50 --enable-mpi : \
+  -n 1 --merge-stderr-to-stdout --output-filename . --tag-output --timestamp-output \
+    $PERF_ANALYZER -v -m custom_zero_1_float32 \
+      --measurement-mode count_windows -s 50 --enable-mpi
+if [ $? -ne 0 ]; then
+   cat 1/rank.0/stdout 1/rank.2/stdout 1/rank.2/stdout
+   echo -e "\n***\n*** Perf Analyzer returned non-zero exit code\n***"
+   echo -e "\n***\n*** Test Failed\n***"
+   RET=1
+else
+  if [ $(is_synchronized) -eq 0 ]; then
+    cat 1/rank.0/stdout 1/rank.2/stdout 1/rank.2/stdout
+    echo -e "\n***\n*** All models did not finish profiling at almost the same time\n***"
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+  fi
+
+  RANK_0_THROUGHPUT_IS_STABLE=$(is_stable 0 1)
+  RANK_0_LATENCY_IS_STABLE=$(is_stable 0 0)
+  RANK_1_THROUGHPUT_IS_STABLE=$(is_stable 1 1)
+  RANK_1_LATENCY_IS_STABLE=$(is_stable 1 0)
+  RANK_2_THROUGHPUT_IS_STABLE=$(is_stable 2 1)
+  RANK_2_LATENCY_IS_STABLE=$(is_stable 2 0)
+
+  ALL_STABLE=$(( \
+    $RANK_0_THROUGHPUT_IS_STABLE && \
+    $RANK_0_LATENCY_IS_STABLE && \
+    $RANK_1_THROUGHPUT_IS_STABLE && \
+    $RANK_1_LATENCY_IS_STABLE && \
+    $RANK_2_THROUGHPUT_IS_STABLE && \
+    $RANK_2_LATENCY_IS_STABLE))
+
+  if [ $ALL_STABLE -eq 0 ]; then
+    cat 1/rank.0/stdout 1/rank.2/stdout 1/rank.2/stdout
+    echo -e "\n***\n*** All models did not stabilize\n***"
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+  fi
+
+  rm -rf 1
+fi
+set -e
+
+## Test perf_analyzer without MPI library (`libmpi.so`) available
+
+rm -rf /opt/hpcx/ompi/lib/libmpi*
+
+set +e
+$PERF_ANALYZER -v -m graphdef_int32_int32_int32 -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+   cat $CLIENT_LOG
+   echo -e "\n***\n*** Test Failed\n***"
+   RET=1
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Generate valid CA
+openssl genrsa -passout pass:1234 -des3 -out ca.key 4096
+openssl req -passin pass:1234 -new -x509 -days 365 -key ca.key -out ca.crt -subj  "/C=SP/ST=Spain/L=Valdepenias/O=Test/OU=Test/CN=Root CA"
+
+# Generate valid Server Key/Cert
+openssl genrsa -passout pass:1234 -des3 -out server.key 4096
+openssl req -passin pass:1234 -new -key server.key -out server.csr -subj  "/C=SP/ST=Spain/L=Valdepenias/O=Test/OU=Server/CN=localhost"
+openssl x509 -req -passin pass:1234 -days 365 -in server.csr -CA ca.crt -CAkey ca.key -set_serial 01 -out server.crt
+
+# Remove passphrase from the Server Key
+openssl rsa -passin pass:1234 -in server.key -out server.key
+
+# Generate valid Client Key/Cert
+openssl genrsa -passout pass:1234 -des3 -out client.key 4096
+openssl req -passin pass:1234 -new -key client.key -out client.csr -subj  "/C=SP/ST=Spain/L=Valdepenias/O=Test/OU=Client/CN=localhost"
+openssl x509 -passin pass:1234 -req -days 365 -in client.csr -CA ca.crt -CAkey ca.key -set_serial 01 -out client.crt
+
+# Remove passphrase from Client Key
+openssl rsa -passin pass:1234 -in client.key -out client.key
+
+# Create mutated client key (Make first char of each like capital)
+cp client.key client2.key && sed -i "s/\b\(.\)/\u\1/g" client2.key
+cp client.crt client2.crt && sed -i "s/\b\(.\)/\u\1/g" client2.crt
+
+SERVER_ARGS="--model-repository=${DATADIR} --grpc-use-ssl=1 --grpc-server-cert=server.crt --grpc-server-key=server.key --grpc-root-cert=ca.crt"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+# Test gRPC SSL
+set +e
+
+# Test that gRPC protocol with SSL works correctly
+$PERF_ANALYZER -v -i grpc -m graphdef_int32_int32_int32 \
+  --ssl-grpc-use-ssl \
+  --ssl-grpc-root-certifications-file=ca.crt \
+  --ssl-grpc-private-key-file=client.key \
+  --ssl-grpc-certificate-chain-file=client.crt \
+  -s ${STABILITY_THRESHOLD} \
+  > ${CLIENT_LOG}.grpc_success 2>&1
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}.grpc_success
+    RET=1
+fi
+
+# Test that gRPC protocol with SSL fails with incorrect key
+$PERF_ANALYZER -v -i grpc -m graphdef_int32_int32_int32 \
+    --ssl-grpc-use-ssl \
+    --ssl-grpc-root-certifications-file=ca.crt \
+    --ssl-grpc-private-key-file=client.key \
+    --ssl-grpc-certificate-chain-file=client2.crt \
+    -s ${STABILITY_THRESHOLD} \
+    > ${CLIENT_LOG}.grpc_failure 2>&1
+if [ $? -eq 0 ]; then
+    cat ${CLIENT_LOG}.grpc_failure
+    echo -e "\n***\n*** Expected test failure\n***"
+    RET=1
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+cp server.crt /etc/nginx/cert.crt
+cp server.key /etc/nginx/cert.key
+
+SERVER_ARGS="--model-repository=${DATADIR}"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+# Setup the new configuration for the proxy. The HTTPS traffic will be
+# redirected to the running instance of server at localhost:8000
+cp nginx.conf /etc/nginx/sites-available/default
+
+# Start the proxy server
+service nginx restart
+
+# Test HTTP SSL
+set +e
+
+# Test that HTTP protocol with SSL works correctly with certificates
+$PERF_ANALYZER -v -u https://localhost:443 -i http -m graphdef_int32_int32_int32 \
+    --ssl-https-verify-peer 1 \
+    --ssl-https-verify-host 2 \
+    --ssl-https-ca-certificates-file ca.crt \
+    --ssl-https-client-certificate-file client.crt \
+    --ssl-https-client-certificate-type PEM \
+    --ssl-https-private-key-file client.key \
+    --ssl-https-private-key-type PEM \
+    -s ${STABILITY_THRESHOLD} \
+    > ${CLIENT_LOG}.https_success 2>&1
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}.https_success
+    RET=1
+fi
+
+# Test that HTTP protocol with SSL works correctly without certificates
+$PERF_ANALYZER -v -u https://localhost:443 -i http -m graphdef_int32_int32_int32 \
+    --ssl-https-verify-peer 0 \
+    --ssl-https-verify-host 0 \
+    -s ${STABILITY_THRESHOLD} \
+    > ${CLIENT_LOG}.https_success 2>&1
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}.https_success
+    RET=1
+fi
+
+# Test that HTTP protocol with SSL fails with incorrect key
+$PERF_ANALYZER -v -u https://localhost:443 -i http -m graphdef_int32_int32_int32 \
+    --ssl-https-verify-peer 1 \
+    --ssl-https-verify-host 2 \
+    --ssl-https-ca-certificates-file ca.crt \
+    --ssl-https-client-certificate-file client.crt \
+    --ssl-https-client-certificate-type PEM \
+    --ssl-https-private-key-file client2.key \
+    --ssl-https-private-key-type PEM \
+    -s ${STABILITY_THRESHOLD} \
+    > ${CLIENT_LOG}.https_failure 2>&1
+if [ $? -eq 0 ]; then
+    cat ${CLIENT_LOG}.https_failure
+    echo -e "\n***\n*** Expected test failure\n***"
+    RET=1
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+else
+  echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_perf_analyzer_capi/test.sh b/qa/L0_perf_analyzer_capi/test.sh
new file mode 100755
index 0000000000..f9fa3c078e
--- /dev/null
+++ b/qa/L0_perf_analyzer_capi/test.sh
@@ -0,0 +1,330 @@
+#!/bin/bash
+# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# TESTS COPIED FROM L0_perf_analyzer/test.sh
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+CLIENT_LOG="./perf_analyzer.log"
+PERF_ANALYZER=../clients/perf_analyzer
+
+DATADIR=`pwd`/models
+TESTDATADIR=`pwd`/test_data
+
+SERVER_LIBRARY_PATH=/opt/tritonserver
+
+FLOAT_DIFFSHAPE_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/float_data_with_shape.json
+STRING_WITHSHAPE_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/string_data_with_shape.json
+SEQ_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/seq_data.json
+SHAPETENSORADTAFILE=`pwd`/../common/perf_analyzer_input_data_json/shape_tensor_data.json
+
+ERROR_STRING="error | Request count: 0 | : 0 infer/sec"
+
+STABILITY_THRESHOLD="15"
+
+source ../common/util.sh
+
+rm -f $CLIENT_LOG
+rm -rf $DATADIR $TESTDATADIR $ENSEMBLE_DATADIR
+
+mkdir -p $DATADIR
+# Copy fixed-shape models
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/graphdef_int32_int32_int32 $DATADIR/
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/graphdef_object_object_object $DATADIR/
+
+# Copy a variable-shape models
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/graphdef_object_int32_int32 $DATADIR/
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/graphdef_int32_int32_float32 $DATADIR/
+
+# Copy shape tensor models
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_shapetensor_model_repository/plan_zero_1_float32 $DATADIR/
+
+# Copying ensemble including a sequential model
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_sequence_model_repository/savedmodel_sequence_object $DATADIR
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_ensemble_model_repository/qa_sequence_model_repository/simple_savedmodel_sequence_object $DATADIR
+
+# Copying variable sequence model
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_sequence_model_repository/graphdef_sequence_float32 $DATADIR
+
+# Copying bls model with undefined variable
+mkdir -p $DATADIR/bls_undefined/1 && \
+    cp ../python_models/bls_undefined/model.py $DATADIR/bls_undefined/1/. && \
+    cp ../python_models/bls_undefined/config.pbtxt $DATADIR/bls_undefined/.
+
+# Generating test data
+mkdir -p $TESTDATADIR
+for INPUT in INPUT0 INPUT1; do
+    for i in {1..16}; do
+        echo '1' >> $TESTDATADIR/${INPUT}
+    done
+done
+
+RET=0
+
+########## Test C API #############
+# Make sure tritonserver is not running first
+set +e
+SERVER_PID=$(pidof tritonserver)
+if [ $? -ne 1 ]; then
+echo -e "\n There was a previous instance of tritonserver, killing \n"
+  kill $SERVER_PID
+  wait $SERVER_PID
+fi
+set -e
+
+# Testing simple configuration
+$PERF_ANALYZER -v -m graphdef_int32_int32_int32 \
+--service-kind=triton_c_api \
+--model-repository=$DATADIR --triton-server-directory=$SERVER_LIBRARY_PATH \
+-s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+$PERF_ANALYZER -v -m graphdef_int32_int32_int32 -t 1 -p2000 -b 1 \
+--service-kind=triton_c_api --model-repository=$DATADIR \
+--triton-server-directory=$SERVER_LIBRARY_PATH -s ${STABILITY_THRESHOLD} \
+>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+#Testing with string input
+$PERF_ANALYZER -v -m graphdef_object_object_object --string-data=1 -p2000 \
+--service-kind=triton_c_api --model-repository=$DATADIR \
+--triton-server-directory=$SERVER_LIBRARY_PATH -s ${STABILITY_THRESHOLD} \
+>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+# Testing with variable inputs
+$PERF_ANALYZER -v -m graphdef_object_int32_int32 --input-data=$TESTDATADIR \
+--shape INPUT0:2,8 --shape INPUT1:2,8 \
+--service-kind=triton_c_api --model-repository=$DATADIR \
+--triton-server-directory=$SERVER_LIBRARY_PATH -s ${STABILITY_THRESHOLD} \
+>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+$PERF_ANALYZER -v -m graphdef_object_int32_int32 \
+--input-data=$STRING_WITHSHAPE_JSONDATAFILE \
+--shape INPUT0:2,8 --shape INPUT1:2,8 -p2000 \
+--service-kind=triton_c_api --model-repository=$DATADIR \
+--triton-server-directory=$SERVER_LIBRARY_PATH -s ${STABILITY_THRESHOLD} \
+>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+$PERF_ANALYZER -v -m graphdef_int32_int32_float32 --shape INPUT0:2,8,2 \
+--shape INPUT1:2,8,2 -p2000 \
+--service-kind=triton_c_api --model-repository=$DATADIR \
+--triton-server-directory=$SERVER_LIBRARY_PATH -s ${STABILITY_THRESHOLD} \
+>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+# Shape tensor I/O model (server needs the shape tensor on the CPU)
+$PERF_ANALYZER -v -m plan_zero_1_float32 --input-data=$SHAPETENSORADTAFILE \
+--shape DUMMY_INPUT0:4,4 -p2000 -b 8 \
+--service-kind=triton_c_api --model-repository=$DATADIR \
+--triton-server-directory=$SERVER_LIBRARY_PATH -s ${STABILITY_THRESHOLD} \
+>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+if [ $(cat $CLIENT_LOG | grep ": 0 infer/sec\|: 0 usec" | wc -l) -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+$PERF_ANALYZER -v -m  simple_savedmodel_sequence_object -p 2000 -t5 --sync \
+--input-data=$SEQ_JSONDATAFILE \
+--service-kind=triton_c_api --model-repository=$DATADIR \
+--triton-server-directory=$SERVER_LIBRARY_PATH >$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+set +e
+$PERF_ANALYZER -v -m graphdef_sequence_float32 --shape INPUT:2 \
+--input-data=$FLOAT_DIFFSHAPE_JSONDATAFILE \
+--input-data=$FLOAT_DIFFSHAPE_JSONDATAFILE -p2000 \
+--service-kind=triton_c_api --model-repository=$DATADIR \
+--triton-server-directory=$SERVER_LIBRARY_PATH --sync >$CLIENT_LOG 2>&1
+if [ $? -eq 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+if [ $(cat $CLIENT_LOG |  grep -P "The supplied shape .+ is incompatible with the model's input shape" | wc -l) -eq 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+set -e
+
+# Negative test for the async mode.
+set +e
+$PERF_ANALYZER -v -m graphdef_int32_int32_int32 -t 1 -p2000 -b 1 -a \
+--service-kind=triton_c_api --model-repository=$DATADIR \
+--triton-server-directory=$SERVER_LIBRARY_PATH -s ${STABILITY_THRESHOLD} \
+>$CLIENT_LOG 2>&1
+if [ $(cat $CLIENT_LOG | grep "not supported by triton_c_api service" | wc -l) -ne 1 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+set -e
+
+for SHARED_MEMORY_TYPE in system cuda; do
+    $PERF_ANALYZER -v -m graphdef_int32_int32_int32 -t 1 -p2000 -b 1 \
+    --shared-memory=$SHARED_MEMORY_TYPE \
+    --service-kind=triton_c_api --model-repository=$DATADIR \
+    --triton-server-directory=$SERVER_LIBRARY_PATH >$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+    if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+done
+
+
+$PERF_ANALYZER -v -m graphdef_int32_int32_int32 --request-rate-range 1000:2000:500 -p1000 -b 1 \
+--service-kind=triton_c_api --model-repository=$DATADIR \
+--triton-server-directory=$SERVER_LIBRARY_PATH -s ${STABILITY_THRESHOLD} \
+>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+set -e
+
+set +e
+# Testing erroneous configuration
+# This model is expected to fail
+$PERF_ANALYZER -v -m bls_undefined --shape INPUT0:1048576 -t 64\
+--service-kind=triton_c_api \
+--model-repository=$DATADIR --triton-server-directory=$SERVER_LIBRARY_PATH \
+-s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
+if [ $? -ne 99 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+set -e
+
+# Make sure server is not still running
+set +e
+SERVER_PID=$(pidof tritonserver)
+if [ $? -eq 0 ]; then
+  echo -e "\n Tritonserver did not exit properly, killing \n"
+  kill $SERVER_PID
+  wait $SERVER_PID
+  RET=1
+fi
+set -e
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+else
+  echo -e "\n***\n*** Test FAILED\n***"
+fi
+exit $RET
diff --git a/qa/L0_perf_analyzer_doc_links/mkdocs.yml b/qa/L0_perf_analyzer_doc_links/mkdocs.yml
new file mode 100644
index 0000000000..41a4bfe485
--- /dev/null
+++ b/qa/L0_perf_analyzer_doc_links/mkdocs.yml
@@ -0,0 +1,36 @@
+# Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+site_name: CI Test
+use_directory_urls: False
+docs_dir: "./docs"
+plugins:
+        - htmlproofer
+        - search
+
+markdown_extensions:
+    - toc:
+        permalink: True
diff --git a/qa/L0_perf_analyzer_doc_links/test.sh b/qa/L0_perf_analyzer_doc_links/test.sh
new file mode 100755
index 0000000000..db80e84974
--- /dev/null
+++ b/qa/L0_perf_analyzer_doc_links/test.sh
@@ -0,0 +1,74 @@
+#!/bin/bash
+# Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+LOG="`pwd`/doc_links.log"
+CONFIG="`pwd`/mkdocs.yml"
+RET=0
+
+# Download necessary packages
+python3 -m pip install mkdocs
+python3 -m pip install mkdocs-htmlproofer-plugin==0.10.3
+
+#Download perf_analyzer docs
+TRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION:="http://github.com/triton-inference-server"}
+TRITON_CLIENT_REPO_TAG="${TRITON_CLIENT_REPO_TAG:=main}"
+git clone -b ${TRITON_CLIENT_REPO_TAG} ${TRITON_REPO_ORGANIZATION}/client.git
+cp `pwd`/client/src/c++/perf_analyzer/README.md .
+cp -rf `pwd`/client/src/c++/perf_analyzer/docs .
+
+# Need to remove all links that start with -- or -. Mkdocs converts all -- to - for anchor links.
+# This breaks all links to cli commands throughout the docs. This will iterate over all
+# files in the docs directory and remove -- and - at the start of options, which allows the
+# tool to check links for correctness.
+for file in `pwd`/docs/*.md
+do
+  echo $file
+  sed -i 's/`-*/`/g' $file
+  sed -i 's/#-*/#/g' $file
+done
+
+exec mkdocs serve -f $CONFIG > $LOG &
+PID=$!
+sleep 20
+
+until [[ (-z `pgrep mkdocs`) ]]; do
+    kill -2 $PID
+    sleep 2
+done
+
+if [[ ! -z `grep "invalid url" $LOG` ]]; then
+    cat $LOG
+    RET=1
+fi
+
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test PASSED\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+exit $RET
diff --git a/qa/L0_perf_analyzer_ground_truth/test.sh b/qa/L0_perf_analyzer_ground_truth/test.sh
new file mode 100755
index 0000000000..d5d78e63f4
--- /dev/null
+++ b/qa/L0_perf_analyzer_ground_truth/test.sh
@@ -0,0 +1,175 @@
+#!/bin/bash
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "${REPO_VERSION}" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+source ../common/util.sh
+
+# Setup client/perf_analyzer
+CLIENT_LOG="./perf_analyzer.log"
+PERF_ANALYZER=../clients/perf_analyzer
+
+function check_perf_analyzer_error {
+    ERROR_STRING="error | Request count: 0 | : 0 infer/sec"
+    CLIENT_RET="$1"
+    if [ ${CLIENT_RET} -ne 0 ]; then
+        cat ${CLIENT_LOG}
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+    if [ $(cat ${CLIENT_LOG} |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+        cat ${CLIENT_LOG}
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+}
+
+# Checks that the model infer/sec performance is equal to an expected value
+# +/- some tolerance.
+# $1: csv result file from PA run
+# $2: expected infer/sec value
+# $3: tolerance for expected value equality
+function check_performance {
+    # get the boundary values based on the tolerance percentage
+    MIN=$(python3 -c "print(${2} * (1 - ${3}))")
+    MAX=$(python3 -c "print(${2} * (1 + ${3}))")
+
+    # delete all but the 2nd line in the resulting file
+    # then get the 2nd column value which is the infer/sec measurement
+    report_val=$(sed '2!d' $1 | awk -F ',' {'print $2'})
+
+    # check if within tolerance
+    ret=$(python3 -c "print(${report_val} >= ${MIN} and ${report_val} <= ${MAX})")
+    if [ "$ret" = "False" ]; then
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+}
+
+# Iterate over the grpc results to ensure gRPC times are greater than 0
+# $1: client log file
+# example line: Avg gRPC time: 42648 usec (marshal 6 usec + response wait 42640 usec + unmarshal 2 usec)
+function check_grpc_time {
+    grep "gRPC" $1 | awk '{print $4}' | while read -r line; do
+        if [ $line -eq 0 ]; then
+            RET=1
+        fi
+    done
+}
+
+# Create input_data.json to communicate the requested model delay
+# $1: desired model delay
+function create_input_data {
+    echo "{\"data\":[{\"INPUT0\" : [${1}]}]}" > input_data.json
+}
+
+# Setup server
+export CUDA_VISIBLE_DEVICES=0
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=`pwd`/models"
+SERVER_LOG="./inference_server.log"
+
+rm -f $SERVER_LOG $CLIENT_LOG
+MODEL_DIR="./models"
+rm -fr ${MODEL_DIR} && mkdir ${MODEL_DIR}
+MODELS="ground_truth"
+
+for model in ${MODELS}; do
+    # Add version directory to each model if non-existent
+    mkdir -p "${MODEL_DIR}/${model}/1"
+    cp ../python_models/${model}/model.py     ./models/${model}/1/model.py
+    cp ../python_models/${model}/config.pbtxt ./models/${model}/config.pbtxt
+done
+
+# Run server
+run_server
+if [ "${SERVER_PID}" == "0" ]; then
+    echo -e "\n***\n*** Failed to start ${SERVER}\n***"
+    cat ${SERVER_LOG}
+    exit 1
+fi
+
+# Run perf_analyzer
+set +e
+RET=0
+PROTOCOLS="http grpc"
+OUTPUT_FILE="results"
+MODEL_DELAYS=(0.05 0.5)
+TOLERANCE="0.05"
+
+for model_delay in ${MODEL_DELAYS[@]}; do
+    create_input_data ${model_delay}
+    EXPECTED_RESULT=$(python3 -c "print(1 / ${model_delay})")
+    for protocol in ${PROTOCOLS}; do
+        for model in ${MODELS}; do
+        echo "================================================================"
+        echo "[PERMUTATION] Protocol=${protocol} Model=${model}"
+        echo "================================================================"
+
+            ${PERF_ANALYZER} -v -i ${protocol} --concurrency-range 2 --input-data input_data.json -m ${model} -f ${OUTPUT_FILE} | tee ${CLIENT_LOG} 2>&1
+            check_perf_analyzer_error $?
+
+            check_performance ${OUTPUT_FILE} ${EXPECTED_RESULT} ${TOLERANCE}
+
+            if [ "${protocol}" == "grpc" ]; then
+                check_grpc_time ${CLIENT_LOG}
+            fi
+        done;
+    done;
+done;
+
+
+set -e
+
+# Cleanup
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+else
+  echo "=== START SERVER LOG ==="
+  cat ${SERVER_LOG}
+  echo "=== END SERVER LOG ==="
+  echo "=== START CLIENT LOG ==="
+  cat ${CLIENT_LOG}
+  echo "=== END CLIENT LOG ==="
+  echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit ${RET}
diff --git a/qa/L0_perf_analyzer_report/test.sh b/qa/L0_perf_analyzer_report/test.sh
new file mode 100755
index 0000000000..7a04905842
--- /dev/null
+++ b/qa/L0_perf_analyzer_report/test.sh
@@ -0,0 +1,180 @@
+#!/bin/bash
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "${REPO_VERSION}" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+source ../common/util.sh
+
+# Setup client/perf_analyzer
+CLIENT_LOG="./perf_analyzer.log"
+PERF_ANALYZER=../clients/perf_analyzer
+
+function check_perf_analyzer_error {
+    ERROR_STRING="error | Request count: 0 | : 0 infer/sec"
+    CLIENT_RET="$1"
+    if [ ${CLIENT_RET} -ne 0 ]; then
+        cat ${CLIENT_LOG}
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+    if [ $(cat ${CLIENT_LOG} |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
+        cat ${CLIENT_LOG}
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+}
+
+function check_cache_output {
+    # Validate cache info in perf_analyzer output
+    CACHE_STRING="Cache hit count"
+    if [ $(cat ${CLIENT_LOG} |  grep -i "${CACHE_STRING}" | wc -l) -eq 0 ]; then
+        cat ${CLIENT_LOG}
+	echo "ERROR: No cache hit count found in output"
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+
+    # Validate non-zero number of cache hits
+    ERROR_STRING="Cache hit count: 0"
+    num_cache_hit_lines=$(cat ${CLIENT_LOG} |  grep -i "${CACHE_STRING}" | wc -l)
+    num_cache_hit_zero_lines=$(cat ${CLIENT_LOG} |  grep -i "${ERROR_STRING}" | wc -l)
+    # Top-level ensemble model requests do not currently support caching and
+    # will always report a cache hit count of zero if any composing model
+    # has caching enabled. So we check that at least one model reports
+    # non-zero cache hits for now.
+    # TODO: When ensemble models support cache hits, this should just fail
+    #       for any occurrence of ERROR_STRING
+    if [ ${num_cache_hit_lines} -eq ${num_cache_hit_zero_lines} ]; then
+        cat ${CLIENT_LOG}
+	echo "ERROR: All cache hit counts were zero, expected a non-zero number of cache hits"
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+}
+
+# Setup server
+export CUDA_VISIBLE_DEVICES=0
+SERVER=/opt/tritonserver/bin/tritonserver
+# --response-cache-byte-size must be non-zero to test models with cache enabled
+SERVER_ARGS="--model-repository=`pwd`/models --response-cache-byte-size=8192"
+SERVER_LOG="./inference_server.log"
+
+# Setup model repository from existing qa_model_repository
+rm -f $SERVER_LOG $CLIENT_LOG
+MODEL_DIR="./models"
+rm -fr ${MODEL_DIR} && mkdir ${MODEL_DIR}
+ENSEMBLE_MODEL="simple_onnx_float32_float32_float32"
+COMPOSING_MODEL="onnx_float32_float32_float32"
+ENSEMBLE_MODEL_CACHE_ENABLED="${ENSEMBLE_MODEL}_cache_enabled"
+ENSEMBLE_MODEL_CACHE_DISABLED="${ENSEMBLE_MODEL}_cache_disabled"
+COMPOSING_MODEL_CACHE_ENABLED="${COMPOSING_MODEL}_cache_enabled"
+COMPOSING_MODEL_CACHE_DISABLED="${COMPOSING_MODEL}_cache_disabled"
+MODELS="${ENSEMBLE_MODEL_CACHE_ENABLED} ${ENSEMBLE_MODEL_CACHE_DISABLED} ${COMPOSING_MODEL_CACHE_ENABLED} ${COMPOSING_MODEL_CACHE_DISABLED}"
+
+## Setup ensemble models, one with cache enabled and one with cache disabled
+cp -r "/data/inferenceserver/${REPO_VERSION}/qa_ensemble_model_repository/qa_model_repository/${ENSEMBLE_MODEL}" "${MODEL_DIR}/${ENSEMBLE_MODEL_CACHE_ENABLED}"
+cp -r "/data/inferenceserver/${REPO_VERSION}/qa_ensemble_model_repository/qa_model_repository/${ENSEMBLE_MODEL}" "${MODEL_DIR}/${ENSEMBLE_MODEL_CACHE_DISABLED}"
+
+## Setup composing models, one with cache enabled and one with cache disabled
+cp -r "/data/inferenceserver/${REPO_VERSION}/qa_model_repository/${COMPOSING_MODEL}" "${MODEL_DIR}/${COMPOSING_MODEL_CACHE_ENABLED}"
+cp -r "/data/inferenceserver/${REPO_VERSION}/qa_model_repository/${COMPOSING_MODEL}" "${MODEL_DIR}/${COMPOSING_MODEL_CACHE_DISABLED}"
+
+for model in ${MODELS}; do
+    # Remove "name" line from each config to use directory name for simplicity
+    sed -i "/^name:/d" "${MODEL_DIR}/${model}/config.pbtxt"
+    # Add version directory to each model if non-existent
+    mkdir -p "${MODEL_DIR}/${model}/1"
+done
+
+## Update "model_name" lines in each ensemble model config ensemble steps
+sed -i "s/${COMPOSING_MODEL}/${COMPOSING_MODEL_CACHE_ENABLED}/g" "${MODEL_DIR}/${ENSEMBLE_MODEL_CACHE_ENABLED}/config.pbtxt"
+sed -i "s/${COMPOSING_MODEL}/${COMPOSING_MODEL_CACHE_DISABLED}/g" "${MODEL_DIR}/${ENSEMBLE_MODEL_CACHE_DISABLED}/config.pbtxt"
+
+## Append cache config to each model config
+echo -e "response_cache { enable: True }" >> "${MODEL_DIR}/${ENSEMBLE_MODEL_CACHE_ENABLED}/config.pbtxt"
+echo -e "response_cache { enable: False }" >> "${MODEL_DIR}/${ENSEMBLE_MODEL_CACHE_DISABLED}/config.pbtxt"
+echo -e "response_cache { enable: True }" >> "${MODEL_DIR}/${COMPOSING_MODEL_CACHE_ENABLED}/config.pbtxt"
+echo -e "response_cache { enable: False }" >> "${MODEL_DIR}/${COMPOSING_MODEL_CACHE_DISABLED}/config.pbtxt"
+# Force CPU memory for composing models since cache doesn't currently support GPU memory
+echo -e "instance_group [{ kind: KIND_CPU, count: 1 }]" >> "${MODEL_DIR}/${COMPOSING_MODEL_CACHE_ENABLED}/config.pbtxt"
+echo -e "instance_group [{ kind: KIND_CPU, count: 1 }]" >> "${MODEL_DIR}/${COMPOSING_MODEL_CACHE_DISABLED}/config.pbtxt"
+
+# Run server
+run_server
+if [ "${SERVER_PID}" == "0" ]; then
+    echo -e "\n***\n*** Failed to start ${SERVER}\n***"
+    cat ${SERVER_LOG}
+    exit 1
+fi
+
+# Run perf_analyzer
+set +e
+RET=0
+PROTOCOLS="http grpc"
+STABILITY_THRESHOLD="15"
+for protocol in ${PROTOCOLS}; do
+    for model in ${MODELS}; do
+	echo "================================================================"
+	echo "[PERMUTATION] Protocol=${protocol} Model=${model}"
+	echo "================================================================"
+
+        ${PERF_ANALYZER} -v -i ${protocol} -m ${model} -s ${STABILITY_THRESHOLD} | tee ${CLIENT_LOG} 2>&1
+        check_perf_analyzer_error $?
+
+	# Check response cache outputs
+	if [[ ${model} == *"cache_enabled"* ]]; then
+	  check_cache_output
+	fi
+    done;
+done;
+set -e
+
+# Cleanup
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+else
+  echo "=== START SERVER LOG ==="
+  cat ${SERVER_LOG}
+  echo "=== END SERVER LOG ==="
+  echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit ${RET}
diff --git a/qa/L0_perf_analyzer_unit_tests/test.sh b/qa/L0_perf_analyzer_unit_tests/test.sh
new file mode 100755
index 0000000000..f2a70d23ff
--- /dev/null
+++ b/qa/L0_perf_analyzer_unit_tests/test.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+TEST_LOG="./perf_analyzer_unit_tests.log"
+PERF_ANALYZER_UNIT_TESTS=../clients/perf_analyzer_unit_tests
+
+RET=0
+
+rm -f $TEST_LOG
+
+set +e
+$PERF_ANALYZER_UNIT_TESTS >> $TEST_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+set -e
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $TEST_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_perf_deeprecommender/run_test.sh b/qa/L0_perf_deeprecommender/run_test.sh
new file mode 100755
index 0000000000..2fb74eadfc
--- /dev/null
+++ b/qa/L0_perf_deeprecommender/run_test.sh
@@ -0,0 +1,151 @@
+#!/bin/bash
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+STATIC_BATCH_SIZES=${STATIC_BATCH_SIZES:=1}
+DYNAMIC_BATCH_SIZES=${DYNAMIC_BATCH_SIZES:=1}
+INSTANCE_COUNTS=${INSTANCE_COUNTS:=1}
+TF_VERSION=${TF_VERSION:=2}
+
+PERF_CLIENT=../clients/perf_client
+REPORTER=../common/reporter.py
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=`pwd`/models --backend-config=tensorflow,version=${TF_VERSION}"
+source ../common/util.sh
+
+# Select the single GPU that will be available to the inference
+# server. Or use "export CUDA_VISIBLE_DEVICE=" to run on CPU.
+export CUDA_VISIBLE_DEVICES=0
+
+RET=0
+
+for STATIC_BATCH in $STATIC_BATCH_SIZES; do
+    for DYNAMIC_BATCH in $DYNAMIC_BATCH_SIZES; do
+        for INSTANCE_CNT in $INSTANCE_COUNTS; do
+            if (( ($DYNAMIC_BATCH > 1) && ($STATIC_BATCH >= $DYNAMIC_BATCH) )); then
+                continue
+            fi
+
+            MAX_BATCH=${STATIC_BATCH} && \
+                (( $DYNAMIC_BATCH > $STATIC_BATCH )) && \
+                MAX_BATCH=${DYNAMIC_BATCH}
+
+            if (( $DYNAMIC_BATCH > 1 )); then
+                NAME=${MODEL_NAME}_sbatch${STATIC_BATCH}_dbatch${DYNAMIC_BATCH}_instance${INSTANCE_CNT}_${PERF_CLIENT_PROTOCOL}
+            else
+                NAME=${MODEL_NAME}_sbatch${STATIC_BATCH}_instance${INSTANCE_CNT}_${PERF_CLIENT_PROTOCOL}
+            fi
+
+            rm -fr models && mkdir -p models && \
+                cp -r $MODEL_PATH models/. && \
+                (cd models/$MODEL_NAME && \
+                        sed -i "s/^max_batch_size:.*/max_batch_size: ${MAX_BATCH}/" config.pbtxt && \
+                        echo "instance_group [ { count: ${INSTANCE_CNT} }]" >> config.pbtxt)
+            if (( $DYNAMIC_BATCH > 1 )); then
+                (cd models/$MODEL_NAME && \
+                        echo "dynamic_batching { preferred_batch_size: [ ${DYNAMIC_BATCH} ] }" >> config.pbtxt)
+            fi
+
+            echo "Time before starting server: $(date)"
+            SERVER_LOG="${NAME}.server.log"
+            run_server
+            if (( $SERVER_PID == 0 )); then
+                echo -e "\n***\n*** Failed to start $SERVER\n***"
+                cat $SERVER_LOG
+                exit 1
+            fi
+
+            set +e
+            echo "Time before perf analyzer trials: $(date)"
+
+            # Run the model once to warm up. Some frameworks do
+            # optimization on the first requests.  Must warmup similar
+            # to actual run so that all instances are ready
+            $PERF_CLIENT -v -i ${PERF_CLIENT_PROTOCOL} -m $MODEL_NAME -p5000 \
+                         -b${STATIC_BATCH} --concurrency-range ${CONCURRENCY}
+
+            set -o pipefail
+            PA_MAX_TRIALS=${PA_MAX_TRIALS:-"50"}
+            $PERF_CLIENT -v -i ${PERF_CLIENT_PROTOCOL} -m $MODEL_NAME -p5000 \
+                         -b${STATIC_BATCH} --concurrency-range ${CONCURRENCY} \
+                         --max-trials "${PA_MAX_TRIALS}" \
+                         -f ${NAME}.csv 2>&1 | tee ${NAME}.log
+            if (( $? != 0 )); then
+                echo -e "\n***\n*** FAILED Perf Analyzer measurement\n***"
+                RET=1
+            fi
+            echo "Time after perf analyzer trials: $(date)"
+            set +o pipefail
+
+            curl localhost:8002/metrics -o ${NAME}.metrics >> ${NAME}.log 2>&1
+            if (( $? != 0 )); then
+                echo -e "\n***\n*** FAILED to get metrics\n***"
+                RET=1
+            fi
+
+            set -e
+
+            echo -e "[{\"s_benchmark_kind\":\"benchmark_perf\"," >> ${NAME}.tjson
+            echo -e "\"s_benchmark_name\":\"deeprecommender\"," >> ${NAME}.tjson
+            echo -e "\"s_server\":\"triton\"," >> ${NAME}.tjson
+            echo -e "\"s_protocol\":\"${PERF_CLIENT_PROTOCOL}\"," >> ${NAME}.tjson
+            echo -e "\"s_framework\":\"${MODEL_FRAMEWORK}\"," >> ${NAME}.tjson
+            echo -e "\"s_model\":\"${MODEL_NAME}\"," >> ${NAME}.tjson
+            echo -e "\"l_concurrency\":${CONCURRENCY}," >> ${NAME}.tjson
+            echo -e "\"l_dynamic_batch_size\":${DYNAMIC_BATCH}," >> ${NAME}.tjson
+            echo -e "\"l_batch_size\":${STATIC_BATCH}," >> ${NAME}.tjson
+            echo -e "\"l_instance_count\":${INSTANCE_CNT}}]" >> ${NAME}.tjson
+
+            kill $SERVER_PID
+            wait $SERVER_PID
+
+            if [ -f $REPORTER ]; then
+                set +e
+
+                URL_FLAG=
+                if [ ! -z ${BENCHMARK_REPORTER_URL} ]; then
+                    URL_FLAG="-u ${BENCHMARK_REPORTER_URL}"
+                fi
+
+                $REPORTER -v -o ${NAME}.json --csv ${NAME}.csv ${URL_FLAG} ${NAME}.tjson
+                if (( $? != 0 )); then
+                    RET=1
+                fi
+
+                set -e
+            fi
+        done
+    done
+done
+
+if (( $RET == 0 )); then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_perf_deeprecommender/test.sh b/qa/L0_perf_deeprecommender/test.sh
new file mode 100755
index 0000000000..3048e46cf5
--- /dev/null
+++ b/qa/L0_perf_deeprecommender/test.sh
@@ -0,0 +1,150 @@
+#!/bin/bash
+# Copyright 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+REPODIR=/data/inferenceserver/${REPO_VERSION}
+TRTEXEC=/usr/src/tensorrt/bin/trtexec
+MODEL="deeprecommender"
+PROTOCOLS="grpc http"
+
+rm -f *.log  *.csv *.metrics *.tjson *.json
+
+#
+# Test minimum latency
+#
+STATIC_BATCH=1
+INSTANCE_CNT=1
+CONCURRENCY=1
+
+# Create the TensorRT plan from TF
+rm -fr tensorrt_models && mkdir tensorrt_models
+    cp -r $REPODIR/perf_model_store/deeprecommender_graphdef tensorrt_models/deeprecommender_plan && \
+    (cd tensorrt_models/deeprecommender_plan && \
+        sed -i "s/^name:.*/name: \"deeprecommender_plan\"/" config.pbtxt && \
+        sed -i "s/tensorflow_graphdef/tensorrt_plan/" config.pbtxt && \
+        sed -i "s/max_batch_size:.*/max_batch_size: ${STATIC_BATCH}/" config.pbtxt && \
+        sed -i "s/\[17736\]/\[17736,1,1\]/" config.pbtxt)
+
+$TRTEXEC --uff=$REPODIR/perf_model_store/deeprecommender_graphdef/deeprecommender_graphdef.uff \
+         --uffInput=Placeholder,1,1,17736\
+         --batch=${STATIC_BATCH} --output=fc5/Relu --verbose \
+         --saveEngine=tensorrt_models/deeprecommender_plan/0/model.plan
+
+OPTIMIZED_MODEL_NAMES="deeprecommender_graphdef_trt"
+
+# Create optimized models (TF-TRT and ONNX-TRT)
+rm -fr optimized_model_store && mkdir optimized_model_store
+for MODEL_NAME in $OPTIMIZED_MODEL_NAMES; do
+    BASE_MODEL=$(echo ${MODEL_NAME} | cut -d '_' -f 1,2)
+    cp -r $REPODIR/perf_model_store/${BASE_MODEL} optimized_model_store/${MODEL_NAME}
+    CONFIG_PATH="optimized_model_store/${MODEL_NAME}/config.pbtxt"
+    sed -i "s/^name: \"${BASE_MODEL}\"/name: \"${MODEL_NAME}\"/" ${CONFIG_PATH}
+    echo "optimization { execution_accelerators {" >> ${CONFIG_PATH}
+    echo "gpu_execution_accelerator : [ {" >> ${CONFIG_PATH}
+    echo "name : \"tensorrt\" " >> ${CONFIG_PATH}
+    echo "} ]" >> ${CONFIG_PATH}
+    echo "}}" >> ${CONFIG_PATH}
+done
+
+# Tests with each model
+for FRAMEWORK in graphdef plan graphdef_trt onnx libtorch; do
+    MODEL_NAME=${MODEL}_${FRAMEWORK}
+    if [ "$FRAMEWORK" == "plan" ]; then
+        REPO=`pwd`/tensorrt_models
+    elif [[ "$FRAMEWORK" == *"_trt" ]]; then
+        REPO=`pwd`/optimized_model_store
+    else
+        REPO=$REPODIR/perf_model_store
+    fi
+    for PROTOCOL in $PROTOCOLS; do
+        MODEL_NAME=${MODEL_NAME} \
+                MODEL_FRAMEWORK=${FRAMEWORK} \
+                MODEL_PATH="$REPO/${MODEL_NAME}" \
+                STATIC_BATCH_SIZES=${STATIC_BATCH} \
+                DYNAMIC_BATCH_SIZES=1 \
+                PERF_CLIENT_PROTOCOL=${PROTOCOL} \
+                INSTANCE_COUNTS=${INSTANCE_CNT} \
+                CONCURRENCY=${CONCURRENCY} \
+                bash -x run_test.sh
+    done
+done
+
+#
+# Test large static batch = 256 w/ 2 instances
+#
+STATIC_BATCH=256
+INSTANCE_CNT=2
+CONCURRENCY=4
+
+# Create the TensorRT plan from TF
+rm -fr tensorrt_models && mkdir tensorrt_models
+    cp -r $REPODIR/perf_model_store/deeprecommender_graphdef tensorrt_models/deeprecommender_plan && \
+    (cd tensorrt_models/deeprecommender_plan && \
+        sed -i "s/^name:.*/name: \"deeprecommender_plan\"/" config.pbtxt && \
+        sed -i "s/tensorflow_graphdef/tensorrt_plan/" config.pbtxt && \
+        sed -i "s/max_batch_size:.*/max_batch_size: ${STATIC_BATCH}/" config.pbtxt && \
+        sed -i "s/\[17736\]/\[17736,1,1\]/" config.pbtxt)
+
+$TRTEXEC --uff=$REPODIR/perf_model_store/deeprecommender_graphdef/deeprecommender_graphdef.uff \
+         --uffInput=Placeholder,1,1,17736\
+         --batch=${STATIC_BATCH} --output=fc5/Relu --verbose \
+         --saveEngine=tensorrt_models/deeprecommender_plan/0/model.plan
+
+# Tests with each model
+for FRAMEWORK in graphdef plan graphdef_trt onnx libtorch; do
+    MODEL_NAME=${MODEL}_${FRAMEWORK}
+    if [ "$FRAMEWORK" == "plan" ]; then
+        REPO=`pwd`/tensorrt_models
+    elif [[ "$FRAMEWORK" == *"_trt" ]]; then
+        REPO=`pwd`/optimized_model_store
+    else
+        REPO=$REPODIR/perf_model_store
+    fi
+    for PROTOCOL in $PROTOCOLS; do
+        MODEL_NAME=${MODEL_NAME} \
+                MODEL_FRAMEWORK=${FRAMEWORK} \
+                MODEL_PATH="$REPO/${MODEL_NAME}" \
+                STATIC_BATCH_SIZES=${STATIC_BATCH} \
+                DYNAMIC_BATCH_SIZES=1 \
+                PERF_CLIENT_PROTOCOL=${PROTOCOL} \
+                INSTANCE_COUNTS=${INSTANCE_CNT} \
+                CONCURRENCY=${CONCURRENCY} \
+                bash -x run_test.sh
+    done
+done
diff --git a/qa/L0_perf_kaldi/create_data.sh b/qa/L0_perf_kaldi/create_data.sh
new file mode 100755
index 0000000000..849b56d906
--- /dev/null
+++ b/qa/L0_perf_kaldi/create_data.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# Needs to be run in asr_kaldi main directory and must be copied to
+# draco for benchmark test
+TRITON_VERSION="20.05"
+
+nvidia-docker run --rm \
+   --shm-size=1g \
+   --ulimit memlock=-1 \
+   --ulimit stack=67108864 \
+   -v $PWD/data:/mnt/data \
+   gitlab-master.nvidia.com:5005/dl/joc/asr_kaldi:${TRITON_VERSION}-server-py3-devel \
+   /workspace/scripts/docker/dataset_setup.sh $(id -u) $(id -g)
diff --git a/qa/L0_perf_kaldi/test.sh b/qa/L0_perf_kaldi/test.sh
new file mode 100755
index 0000000000..31d4c99ee6
--- /dev/null
+++ b/qa/L0_perf_kaldi/test.sh
@@ -0,0 +1,127 @@
+#!/bin/bash
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# Test with 20.05 because kaldi image for 20.06 is not yet available
+TRITON_VERSION="20.05"
+
+cd /workspace
+git clone --single-branch --depth=1 -b r${TRITON_VERSION} \
+    https://github.com/NVIDIA/triton-inference-server.git
+
+echo "add_subdirectory(kaldi-asr-client)" >> triton-inference-server/src/clients/c++/CMakeLists.txt
+
+cp -r asr_kaldi/kaldi-asr-client triton-inference-server/src/clients/c++
+cp -r asr_kaldi/model-repo/kaldi_online/config.pbtxt model-repo/kaldi_online/
+
+# Client dependencies
+(apt-get update && \
+    apt-get install -y --no-install-recommends \
+        libssl-dev \
+        libb64-dev \
+        rapidjson-dev)
+
+pip3 install --upgrade wheel setuptools grpcio-tools
+
+# Build client library and kaldi perf client
+(cd triton-inference-server/build && \
+    cmake -DCMAKE_BUILD_TYPE=Release \
+          -DCMAKE_INSTALL_PREFIX:PATH=/workspace/install && \
+    make -j16 trtis-clients)
+
+RET=0
+rm -rf *.log
+
+# Run server
+/opt/tritonserver/bin/trtserver --model-repo=/workspace/model-repo > server.log 2>&1 &
+SERVER_PID=$!
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start server\n***"
+    cat server.log
+    exit 1
+fi
+
+KALDI_CLIENT=install/bin/kaldi_asr_parallel_client
+
+# Run client
+RESULTS_DIR="/data/results"
+mkdir -p $RESULTS_DIR
+
+CONCURRENCY=2000
+
+# Client only supports GRPC (5 iterations on the dataset)
+$KALDI_CLIENT -i 5 -c ${CONCURRENCY} >> client_1.log 2>&1
+if (( $? != 0 )); then
+    RET=1
+fi
+
+# Capture Throughput
+THROUGHPUT=`cat client_1.log | grep 'Throughput:' | cut -f 2 | cut -f 1 -d ' '`
+
+# '-o' Flag is needed to run online and capture latency
+$KALDI_CLIENT -i 5 -c ${CONCURRENCY} -o >> client_2.log 2>&1
+if (( $? != 0 )); then
+    RET=1
+fi
+
+# Capture Latency 95 percentile
+LATENCY_95=`cat client_2.log | grep -A1 "Latencies:" | sed -n '2 p' | cut -f 5`
+
+REPORTER=triton-inference-server/qa/common/reporter.py
+
+echo -e "[{\"s_benchmark_kind\":\"benchmark_perf\"," >> results.tjson
+echo -e "\"s_benchmark_name\":\"kaldi\"," >> results.tjson
+echo -e "\"s_server\":\"triton\"," >> results.tjson
+echo -e "\"s_protocol\":\"grpc\"," >> results.tjson
+echo -e "\"s_model\":\"asr_kaldi\"," >> results.tjson
+echo -e "\"l_concurrency\":${CONCURRENCY}," >> results.tjson
+echo -e "\"d_infer_per_sec\":${THROUGHPUT}," >> results.tjson
+echo -e "\"d_latency_p95_ms\":${LATENCY_95}," >> results.tjson
+echo -e "\"l_instance_count\":1}]" >> results.tjson
+
+if [ -f $REPORTER ]; then
+    set +e
+
+    URL_FLAG=
+    if [ ! -z ${BENCHMARK_REPORTER_URL} ]; then
+        URL_FLAG="-u ${BENCHMARK_REPORTER_URL}"
+    fi
+
+    $REPORTER -v -o results.json ${URL_FLAG} results.tjson
+    if (( $? != 0 )); then
+        RET=1
+    fi
+
+    set -e
+fi
+
+if (( $RET == 0 )); then
+    echo -e "\n***\n*** ASR Kaldi Benchmark Passed\n***"
+else
+    echo -e "\n***\n*** ASR Kaldi Benchmark FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_perf_nomodel/custom_models/custom_zero_1_float32/config.pbtxt b/qa/L0_perf_nomodel/custom_models/custom_zero_1_float32/config.pbtxt
new file mode 100644
index 0000000000..de852749c0
--- /dev/null
+++ b/qa/L0_perf_nomodel/custom_models/custom_zero_1_float32/config.pbtxt
@@ -0,0 +1,43 @@
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "custom_zero_1_float32"
+backend: "identity"
+max_batch_size: 8
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
diff --git a/qa/L0_perf_nomodel/run_test.sh b/qa/L0_perf_nomodel/run_test.sh
new file mode 100755
index 0000000000..b1e2702ecb
--- /dev/null
+++ b/qa/L0_perf_nomodel/run_test.sh
@@ -0,0 +1,252 @@
+#!/bin/bash
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=$1
+
+BACKENDS=${BACKENDS:="plan custom graphdef savedmodel onnx libtorch python"}
+STATIC_BATCH_SIZES=${STATIC_BATCH_SIZES:=1}
+DYNAMIC_BATCH_SIZES=${DYNAMIC_BATCH_SIZES:=1}
+INSTANCE_COUNTS=${INSTANCE_COUNTS:=1}
+CONCURRENCY=${CONCURRENCY:=1}
+
+PERF_CLIENT_PROTOCOL=${PERF_CLIENT_PROTOCOL:=grpc}
+PERF_CLIENT_PERCENTILE=${PERF_CLIENT_PERCENTILE:=95}
+PERF_CLIENT_STABILIZE_WINDOW=${PERF_CLIENT_STABILIZE_WINDOW:=5000}
+PERF_CLIENT_STABILIZE_THRESHOLD=${PERF_CLIENT_STABILIZE_THRESHOLD:=5}
+TENSOR_SIZE=${TENSOR_SIZE:=1}
+SHARED_MEMORY=${SHARED_MEMORY:="none"}
+REPORTER=../common/reporter.py
+
+RESULTDIR=${RESULTDIR:=.}
+
+TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
+ARCH=${ARCH:="x86_64"}
+SERVER=${TRITON_DIR}/bin/tritonserver
+BACKEND_DIR=${TRITON_DIR}/backends
+MODEL_REPO="${PWD}/models"
+PERF_CLIENT=../clients/perf_client
+TF_VERSION=${TF_VERSION:=2}
+SERVER_ARGS="--model-repository=${MODEL_REPO} --backend-directory=${BACKEND_DIR} --backend-config=tensorflow,version=${TF_VERSION}"
+source ../common/util.sh
+
+# DATADIR is already set in environment variable for aarch64
+if [ "$ARCH" != "aarch64" ]; then
+    DATADIR="/data/inferenceserver/${REPO_VERSION}"
+fi
+
+# Select the single GPU that will be available to the inference server
+export CUDA_VISIBLE_DEVICES=0
+
+mkdir -p ${RESULTDIR}
+RET=0
+
+if [[ $BACKENDS == *"python"* ]]; then
+    cp /opt/tritonserver/backends/python/triton_python_backend_utils.py .
+
+    mkdir -p python_models/python_zero_1_float32/1 && \
+        cp ../python_models/identity_fp32/model.py ./python_models/python_zero_1_float32/1/model.py && \
+        cp ../python_models/identity_fp32/config.pbtxt ./python_models/python_zero_1_float32/config.pbtxt
+    (cd python_models/python_zero_1_float32 && \
+        sed -i "s/^name:.*/name: \"python_zero_1_float32\"/" config.pbtxt)
+fi
+
+if [[ $BACKENDS == *"custom"* ]]; then
+    mkdir -p "custom_models/custom_zero_1_float32/1"
+fi
+
+PERF_CLIENT_PERCENTILE_ARGS="" &&
+    (( ${PERF_CLIENT_PERCENTILE} != 0 )) &&
+    PERF_CLIENT_PERCENTILE_ARGS="--percentile=${PERF_CLIENT_PERCENTILE}"
+PERF_CLIENT_EXTRA_ARGS="$PERF_CLIENT_PERCENTILE_ARGS --shared-memory ${SHARED_MEMORY}"
+
+# Overload use of PERF_CLIENT_PROTOCOL for convenience with existing test and
+# reporting structure, though "triton_c_api" is not strictly a "protocol".
+if [[ "${PERF_CLIENT_PROTOCOL}" == "triton_c_api" ]]; then
+    # Server will be run in-process with C API
+    SERVICE_ARGS="--service-kind triton_c_api \
+                  --triton-server-directory ${TRITON_DIR} \
+                  --model-repository ${MODEL_REPO}"
+else
+    SERVICE_ARGS="-i ${PERF_CLIENT_PROTOCOL}"
+fi
+
+#
+# Use "identity" model for all model types.
+#
+for BACKEND in $BACKENDS; do
+ for STATIC_BATCH in $STATIC_BATCH_SIZES; do
+  for DYNAMIC_BATCH in $DYNAMIC_BATCH_SIZES; do
+   for INSTANCE_CNT in $INSTANCE_COUNTS; do
+    if (( ($DYNAMIC_BATCH > 1) && ($STATIC_BATCH >= $DYNAMIC_BATCH) )); then
+        continue
+    fi
+
+    # plan and openvino models do not support 16MB I/O tests
+    if ([ $BACKEND == "plan" ] || [ $BACKEND == "openvino" ]) && [ $TENSOR_SIZE != 1 ]; then
+        continue
+    fi
+
+    # set input name (special case for libtorch model)
+    INPUT_NAME="INPUT0" && [ $BACKEND == "libtorch" ] && INPUT_NAME="INPUT__0"
+
+    MAX_LATENCY=300
+    MAX_BATCH=${STATIC_BATCH} && [ $DYNAMIC_BATCH > $STATIC_BATCH ] && MAX_BATCH=${DYNAMIC_BATCH}
+
+    # TODO Add openvino identity model that supports batching/dynamic batching
+    # The current openvino identity model does also not support batching
+    if [ $BACKEND == "openvino" ]; then
+        if [ $MAX_BATCH != 1 ]; then
+            continue
+        else
+            MAX_BATCH=0
+        fi
+    fi
+
+    if [ $DYNAMIC_BATCH > 1 ]; then
+        NAME=${BACKEND}_sbatch${STATIC_BATCH}_dbatch${DYNAMIC_BATCH}_instance${INSTANCE_CNT}
+    else
+        NAME=${BACKEND}_sbatch${STATIC_BATCH}_instance${INSTANCE_CNT}
+    fi
+
+    # set model name (special case for openvino i.e. nobatch)
+    MODEL_NAME=${BACKEND}_zero_1_float32 && [ $BACKEND == "openvino" ] && MODEL_NAME=${BACKEND}_nobatch_zero_1_float32
+
+    if [ $BACKEND == "custom" ]; then
+        REPO_DIR=./custom_models
+    elif [ $BACKEND == "python" ]; then
+        REPO_DIR=./python_models
+    else
+        REPO_DIR=$DATADIR/qa_identity_model_repository
+    fi
+
+    SHAPE=${TENSOR_SIZE}
+    KIND="KIND_GPU" && [ $BACKEND == "custom" ] || [ $BACKEND == "python" ] || [ $BACKEND == "openvino" ] && KIND="KIND_CPU"
+
+    rm -fr models && mkdir -p models && \
+        cp -r $REPO_DIR/$MODEL_NAME models/. && \
+        (cd models/$MODEL_NAME && \
+                sed -i "s/^max_batch_size:.*/max_batch_size: ${MAX_BATCH}/" config.pbtxt)
+
+    # python model already has instance count and kind
+    if [ $BACKEND == "python" ]; then
+        (cd models/$MODEL_NAME && \
+                sed -i "s/count:.*/count: ${INSTANCE_CNT}/" config.pbtxt)
+    else
+        (cd models/$MODEL_NAME && \
+                echo "instance_group [ { kind: ${KIND}, count: ${INSTANCE_CNT} }]" >> config.pbtxt)
+    fi
+
+    if [ $BACKEND == "custom" ]; then
+        (cd models/$MODEL_NAME && \
+                sed -i "s/dims:.*\[.*\]/dims: \[ ${SHAPE} \]/g" config.pbtxt)
+    fi
+    if [ $DYNAMIC_BATCH > 1 ] && [ $BACKEND != "openvino" ]; then
+        (cd models/$MODEL_NAME && \
+                echo "dynamic_batching { preferred_batch_size: [ ${DYNAMIC_BATCH} ] }" >> config.pbtxt)
+    fi
+
+    echo "Time before starting server: $(date)"
+    # Only start separate server if not using C API, since C API runs server in-process
+    if [[ "${PERF_CLIENT_PROTOCOL}" != "triton_c_api" ]]; then
+        SERVER_LOG="${RESULTDIR}/${NAME}.server.log"
+        run_server
+        if [ $SERVER_PID == 0 ]; then
+            echo -e "\n***\n*** Failed to start $SERVER\n***"
+            cat $SERVER_LOG
+            exit 1
+        fi
+    fi
+
+    echo "Time before perf analyzer trials: $(date)"
+    set +e
+    set -o pipefail
+    PA_MAX_TRIALS=${PA_MAX_TRIALS:-"50"}
+    $PERF_CLIENT -v \
+                 -p${PERF_CLIENT_STABILIZE_WINDOW} \
+                 -s${PERF_CLIENT_STABILIZE_THRESHOLD} \
+                 ${PERF_CLIENT_EXTRA_ARGS} \
+                 -m ${MODEL_NAME} \
+                 -b${STATIC_BATCH} -t${CONCURRENCY} \
+                 --max-trials "${PA_MAX_TRIALS}" \
+                 --shape ${INPUT_NAME}:${SHAPE} \
+                 ${SERVICE_ARGS} \
+                 -f ${RESULTDIR}/${NAME}.csv 2>&1 | tee ${RESULTDIR}/${NAME}.log
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** FAILED Perf Analyzer measurement\n***"
+        RET=1
+    fi
+    echo "Time after perf analyzer trials: $(date)"
+    set +o pipefail
+    set -e
+
+    echo -e "[{\"s_benchmark_kind\":\"benchmark_perf\"," >> ${RESULTDIR}/${NAME}.tjson
+    echo -e "\"s_benchmark_name\":\"nomodel\"," >> ${RESULTDIR}/${NAME}.tjson
+    echo -e "\"s_server\":\"triton\"," >> ${RESULTDIR}/${NAME}.tjson
+    echo -e "\"s_protocol\":\"${PERF_CLIENT_PROTOCOL}\"," >> ${RESULTDIR}/${NAME}.tjson
+    echo -e "\"s_framework\":\"${BACKEND}\"," >> ${RESULTDIR}/${NAME}.tjson
+    echo -e "\"s_model\":\"${MODEL_NAME}\"," >> ${RESULTDIR}/${NAME}.tjson
+    echo -e "\"l_concurrency\":${CONCURRENCY}," >> ${RESULTDIR}/${NAME}.tjson
+    echo -e "\"l_dynamic_batch_size\":${DYNAMIC_BATCH}," >> ${RESULTDIR}/${NAME}.tjson
+    echo -e "\"l_batch_size\":${STATIC_BATCH}," >> ${RESULTDIR}/${NAME}.tjson
+    echo -e "\"l_size\":${TENSOR_SIZE}," >> ${RESULTDIR}/${NAME}.tjson
+    echo -e "\"s_shared_memory\":\"${SHARED_MEMORY}\"," >> ${RESULTDIR}/${NAME}.tjson
+    echo -e "\"l_instance_count\":${INSTANCE_CNT}," >> ${RESULTDIR}/${NAME}.tjson
+    echo -e "\"s_architecture\":\"${ARCH}\"}]" >> ${RESULTDIR}/${NAME}.tjson
+
+    # SERVER_PID may not be set if using "triton_c_api" for example
+    if [[ -n "${SERVER_PID}" ]]; then
+        kill $SERVER_PID
+        wait $SERVER_PID
+    fi
+
+    if [ -f $REPORTER ]; then
+        set +e
+
+        URL_FLAG=
+        if [ ! -z ${BENCHMARK_REPORTER_URL} ]; then
+            URL_FLAG="-u ${BENCHMARK_REPORTER_URL}"
+        fi
+
+        $REPORTER -v -o ${RESULTDIR}/${NAME}.json --csv ${RESULTDIR}/${NAME}.csv ${URL_FLAG} ${RESULTDIR}/${NAME}.tjson
+        if [ $? -ne 0 ]; then
+            RET=1
+        fi
+
+        set -e
+    fi
+   done
+  done
+ done
+done
+
+if [ $RET == 0 ]; then
+    echo -e "\n***\n*** Test ${RESULTNAME} Passed\n***"
+else
+    echo -e "\n***\n*** Test ${RESULTNAME} FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_perf_nomodel/test.sh b/qa/L0_perf_nomodel/test.sh
new file mode 100755
index 0000000000..6ff68303ed
--- /dev/null
+++ b/qa/L0_perf_nomodel/test.sh
@@ -0,0 +1,225 @@
+#!/bin/bash
+# Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+rm -f *.log  *.csv *.tjson *.json
+
+# Descriptive name for the current results
+UNDERTEST_NAME=${NVIDIA_TRITON_SERVER_VERSION}
+
+# Confidence percentile to use when stabilizing and reporting
+# results. A value of 0 indicates that average value should be used
+# for stabilizing results.
+PERF_CLIENT_PERCENTILE=${PERF_CLIENT_PERCENTILE:=95}
+
+# Threshold, as a percentage, to mark any performance change as a
+# speedup or a slowdown.
+PERF_CLIENT_SPEEDUP_THRESHOLD=5.0
+PERF_CLIENT_SLOWDOWN_THRESHOLD=5.0
+
+# Length of window, in milliseconds, to use when stabilizing latency
+# and infer/sec results.
+PERF_CLIENT_STABILIZE_WINDOW=10000
+
+# Threshold, as a percentage, to use when stabilizing latency and
+# infer/sec results. Values must vary by less than this percent over 3
+# measurement windows to be considered value.
+PERF_CLIENT_STABILIZE_THRESHOLD=15.0
+
+RUNTEST=./run_test.sh
+
+# The model used for data collection has a single input and a single
+# output. The model does minimal work (just copy input to
+# output). TENSOR_SIZE is the number of elements in the model input
+# and the model output. The tensor element type is float so to get the
+# number of elements in each tensor need to divide the test I/O size
+# by 4.
+TENSOR_SIZE_16MB=$((4*1024*1024))
+
+if [ "$BENCHMARK_TEST_SHARED_MEMORY" == "system" ]; then
+    UNDERTEST_NAME="$UNDERTEST_NAME System Shared Memory";
+    SUFFIX="_shm"
+elif [ "$BENCHMARK_TEST_SHARED_MEMORY" == "cuda" ]; then
+    UNDERTEST_NAME="$UNDERTEST_NAME CUDA Shared Memory";
+    SUFFIX="_cudashm"
+else
+    BENCHMARK_TEST_SHARED_MEMORY="none"
+    TEST_NAMES=(
+        "${UNDERTEST_NAME} Minimum Latency GRPC"
+        "${UNDERTEST_NAME} Minimum Latency HTTP"
+        "${UNDERTEST_NAME} Minimum Latency C API"
+        "${UNDERTEST_NAME} Maximum Throughput GRPC"
+        "${UNDERTEST_NAME} Maximum Throughput HTTP"
+        "${UNDERTEST_NAME} Maximum Throughput C API")
+    TEST_DIRS=(
+        min_latency_grpc
+        min_latency_http
+        min_latency_triton_c_api
+        max_throughput_grpc
+        max_throughput_http
+        max_throughput_triton_c_api)
+    SUFFIX=""
+    TEST_CONCURRENCY=(
+        1
+        1
+        1
+        16
+        16
+        16)
+    TEST_INSTANCE_COUNTS=(
+        1
+        1
+        1
+        2
+        2
+        2)
+    # Small payloads
+    TEST_TENSOR_SIZES=(
+        1
+        1
+        1
+        1
+        1
+        1)
+    TEST_PROTOCOLS=(
+        grpc
+        http
+        triton_c_api
+        grpc
+        http
+        triton_c_api)
+fi
+TEST_NAMES+=(
+    "${UNDERTEST_NAME} 16MB I/O Latency GRPC"
+    "${UNDERTEST_NAME} 16MB I/O Latency HTTP"
+    "${UNDERTEST_NAME} 16MB I/O Latency C API"
+    "${UNDERTEST_NAME} 16MB I/O Throughput GRPC"
+    "${UNDERTEST_NAME} 16MB I/O Throughput HTTP"
+    "${UNDERTEST_NAME} 16MB I/O Throughput C API")
+TEST_DIRS+=(
+    16mb_latency_grpc${SUFFIX}
+    16mb_latency_http${SUFFIX}
+    16mb_latency_triton_c_api${SUFFIX}
+    16mb_throughput_grpc${SUFFIX}
+    16mb_throughput_http${SUFFIX}
+    16mb_throughput_triton_c_api${SUFFIX})
+TEST_PROTOCOLS+=(
+    grpc
+    http
+    triton_c_api
+    grpc
+    http
+    triton_c_api)
+# Large payloads
+TEST_TENSOR_SIZES+=(
+    ${TENSOR_SIZE_16MB}
+    ${TENSOR_SIZE_16MB}
+    ${TENSOR_SIZE_16MB}
+    ${TENSOR_SIZE_16MB}
+    ${TENSOR_SIZE_16MB}
+    ${TENSOR_SIZE_16MB})
+TEST_INSTANCE_COUNTS+=(
+    1
+    1
+    1
+    2
+    2
+    2)
+TEST_CONCURRENCY+=(
+    1
+    1
+    1
+    16
+    16
+    16)
+TEST_BACKENDS=${BACKENDS:="plan custom graphdef savedmodel onnx libtorch python"}
+
+mkdir -p ${REPO_VERSION}
+
+#
+# Run Performance tests
+#
+
+RET=0
+set +e
+
+for idx in "${!TEST_NAMES[@]}"; do
+    TEST_NAME=${TEST_NAMES[$idx]}
+    TEST_DIR=${TEST_DIRS[$idx]}
+    TEST_PROTOCOL=${TEST_PROTOCOLS[$idx]}
+    TEST_TENSOR_SIZE=${TEST_TENSOR_SIZES[$idx]}
+    TEST_INSTANCE_COUNT=${TEST_INSTANCE_COUNTS[$idx]}
+    TEST_CONCURRENCY=${TEST_CONCURRENCY[$idx]}
+
+    # FIXME: If PA C API adds SHMEM support, remove this.
+    if [[ "${BENCHMARK_TEST_SHARED_MEMORY}" != "none" ]] && \
+       [[ "${TEST_PROTOCOL}" == "triton_c_api" ]]; then
+      echo "WARNING: Perf Analyzer does not support shared memory I/O when benchmarking directly with Triton C API, skipping."
+      continue
+    fi
+
+    RESULTNAME=${TEST_NAME} \
+                RESULTDIR=${REPO_VERSION}/${TEST_DIR} \
+                PERF_CLIENT_PERCENTILE=${PERF_CLIENT_PERCENTILE} \
+                PERF_CLIENT_STABILIZE_WINDOW=${PERF_CLIENT_STABILIZE_WINDOW} \
+                PERF_CLIENT_STABILIZE_THRESHOLD=${PERF_CLIENT_STABILIZE_THRESHOLD} \
+                PERF_CLIENT_PROTOCOL=${TEST_PROTOCOL} \
+                TENSOR_SIZE=${TEST_TENSOR_SIZE} \
+                BACKENDS=${TEST_BACKENDS} \
+                SHARED_MEMORY=${BENCHMARK_TEST_SHARED_MEMORY} \
+                STATIC_BATCH_SIZES=1 \
+                DYNAMIC_BATCH_SIZES=1 \
+                INSTANCE_COUNTS=${TEST_INSTANCE_COUNT} \
+                CONCURRENCY=${TEST_CONCURRENCY} \
+                bash -x ${RUNTEST} ${REPO_VERSION}
+    if (( $? != 0 )); then
+        RET=1
+    fi
+done
+
+set -e
+
+if (( $RET == 0 )); then
+    echo -e "\n***\n*** Data Collection Passed\n***"
+else
+    echo -e "\n***\n*** Data Collection FAILED\n***"
+    exit $RET
+fi
+
+exit $RET
diff --git a/qa/L0_perf_pyclients/custom_models/custom_zero_1_int32/config.pbtxt b/qa/L0_perf_pyclients/custom_models/custom_zero_1_int32/config.pbtxt
new file mode 100644
index 0000000000..c5d18c442d
--- /dev/null
+++ b/qa/L0_perf_pyclients/custom_models/custom_zero_1_int32/config.pbtxt
@@ -0,0 +1,47 @@
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "custom_zero_1_int32"
+backend: "identity"
+max_batch_size: 8
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
+instance_group {
+ count: 1
+ kind:KIND_CPU
+}
diff --git a/qa/L0_perf_pyclients/simple_perf_client.py b/qa/L0_perf_pyclients/simple_perf_client.py
new file mode 100755
index 0000000000..fd02f94887
--- /dev/null
+++ b/qa/L0_perf_pyclients/simple_perf_client.py
@@ -0,0 +1,379 @@
+#!/usr/bin/env python
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import sys
+import time
+
+import numpy as np
+import tritonclient.grpc as grpcclient
+import tritonclient.http as httpclient
+from tritonclient.utils import InferenceServerException, triton_to_np_dtype
+
+FLAGS = None
+
+
+def parse_model_grpc(model_metadata, model_config):
+    """
+    Check the configuration of a model to make sure it is supported
+    by this client.
+    """
+    if len(model_metadata.inputs) != 1:
+        raise Exception("expecting 1 input, got {}".format(len(model_metadata.inputs)))
+    if len(model_metadata.outputs) != 1:
+        raise Exception(
+            "expecting 1 output, got {}".format(len(model_metadata.outputs))
+        )
+
+    if len(model_config.input) != 1:
+        raise Exception(
+            "expecting 1 input in model configuration, got {}".format(
+                len(model_config.input)
+            )
+        )
+
+    input_metadata = model_metadata.inputs[0]
+    output_metadata = model_metadata.outputs[0]
+
+    batch_dim = model_config.max_batch_size > 0
+    expected_dims = 1 + (1 if batch_dim else 0)
+
+    if len(input_metadata.shape) != expected_dims:
+        raise Exception(
+            "expecting input to have {} dimensions, model '{}' input has {}".format(
+                expected_dims, model_metadata.name, len(input_metadata.shape)
+            )
+        )
+
+    if len(output_metadata.shape) != expected_dims:
+        raise Exception(
+            "expecting output to have {} dimensions, model '{}' output has {}".format(
+                expected_dims, model_metadata.name, len(output_metadata.shape)
+            )
+        )
+
+    if input_metadata.shape[-1] != -1:
+        raise Exception(
+            "expecting input to have variable shape [-1], model '{}' input has {}".format(
+                model_metadata.name, input_metadata.shape
+            )
+        )
+
+    if output_metadata.shape[-1] != -1:
+        raise Exception(
+            "expecting output to have variable shape [-1], model '{}' output has {}".format(
+                model_metadata.name, output_metadata.shape
+            )
+        )
+
+    return (
+        model_config.max_batch_size,
+        input_metadata.name,
+        output_metadata.name,
+        input_metadata.datatype,
+    )
+
+
+def parse_model_http(model_metadata, model_config):
+    """
+    Check the configuration of a model to make sure it is supported
+    by this client.
+    """
+    if len(model_metadata["inputs"]) != 1:
+        raise Exception(
+            "expecting 1 input, got {}".format(len(model_metadata["inputs"]))
+        )
+    if len(model_metadata["outputs"]) != 1:
+        raise Exception(
+            "expecting 1 output, got {}".format(len(model_metadata["outputs"]))
+        )
+
+    if len(model_config["input"]) != 1:
+        raise Exception(
+            "expecting 1 input in model configuration, got {}".format(
+                len(model_config["input"])
+            )
+        )
+
+    input_metadata = model_metadata["inputs"][0]
+    output_metadata = model_metadata["outputs"][0]
+
+    max_batch_size = 0
+    if "max_batch_size" in model_config:
+        max_batch_size = model_config["max_batch_size"]
+
+    batch_dim = max_batch_size > 0
+    expected_dims = 1 + (1 if batch_dim else 0)
+
+    if len(input_metadata["shape"]) != expected_dims:
+        raise Exception(
+            "expecting input to have {} dimensions, model '{}' input has {}".format(
+                expected_dims, model_metadata.name, len(input_metadata["shape"])
+            )
+        )
+
+    if len(output_metadata["shape"]) != expected_dims:
+        raise Exception(
+            "expecting output to have {} dimensions, model '{}' output has {}".format(
+                expected_dims, model_metadata.name, len(output_metadata["shape"])
+            )
+        )
+
+    if input_metadata["shape"][-1] != -1:
+        raise Exception(
+            "expecting input to have variable shape [-1], model '{}' input has {}".format(
+                model_metadata.name, input_metadata["shape"]
+            )
+        )
+
+    if output_metadata["shape"][-1] != -1:
+        raise Exception(
+            "expecting output to have variable shape [-1], model '{}' output has {}".format(
+                model_metadata.name, output_metadata["shape"]
+            )
+        )
+
+    return (
+        max_batch_size,
+        input_metadata["name"],
+        output_metadata["name"],
+        input_metadata["datatype"],
+    )
+
+
+def requestGenerator(input_name, input_data, output_name, dtype, protocol):
+    # Set the input data
+    inputs = []
+    if protocol.lower() == "grpc":
+        inputs.append(grpcclient.InferInput(input_name, input_data.shape, dtype))
+        inputs[0].set_data_from_numpy(input_data)
+    else:
+        inputs.append(httpclient.InferInput(input_name, input_data.shape, dtype))
+        inputs[0].set_data_from_numpy(input_data, binary_data=True)
+
+    outputs = []
+    if protocol.lower() == "grpc":
+        outputs.append(grpcclient.InferRequestedOutput(output_name))
+    else:
+        outputs.append(httpclient.InferRequestedOutput(output_name, binary_data=True))
+
+    return inputs, outputs
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        required=False,
+        default=False,
+        help="Enable verbose output",
+    )
+    parser.add_argument(
+        "-m", "--model-name", type=str, required=True, help="Name of model"
+    )
+    parser.add_argument(
+        "-x",
+        "--model-version",
+        type=str,
+        required=False,
+        default="",
+        help="Version of model. Default is to use latest version.",
+    )
+    parser.add_argument(
+        "-b",
+        "--batch-size",
+        type=int,
+        required=False,
+        default=1,
+        help="Batch size. Default is 1.",
+    )
+    parser.add_argument(
+        "-s",
+        "--shape",
+        type=int,
+        required=False,
+        default=1,
+        help="The shape of the tensor. Default is 1.",
+    )
+    parser.add_argument(
+        "-u",
+        "--url",
+        type=str,
+        required=False,
+        default="localhost:8000",
+        help="Inference server URL. Default is localhost:8000.",
+    )
+    parser.add_argument(
+        "-i",
+        "--protocol",
+        type=str,
+        required=False,
+        default="HTTP",
+        help="Protocol (HTTP/gRPC) used to communicate with "
+        + "the inference service. Default is HTTP.",
+    )
+    parser.add_argument(
+        "-c",
+        "--iteration_count",
+        type=int,
+        required=False,
+        default=1000,
+        help="The number of iterations. Default is 1000.",
+    )
+    parser.add_argument(
+        "-w",
+        "--warmup_count",
+        type=int,
+        required=False,
+        default=500,
+        help="The number of warm-up iterations. Default is 500.",
+    )
+    parser.add_argument(
+        "--csv",
+        type=str,
+        required=False,
+        default=None,
+        help="The name of the file to store the results in CSV format",
+    )
+    FLAGS = parser.parse_args()
+
+    try:
+        if FLAGS.protocol.lower() == "grpc":
+            # Create gRPC client for communicating with the server
+            triton_client = grpcclient.InferenceServerClient(
+                url=FLAGS.url, verbose=FLAGS.verbose
+            )
+        else:
+            triton_client = httpclient.InferenceServerClient(
+                url=FLAGS.url, verbose=FLAGS.verbose, concurrency=1
+            )
+    except Exception as e:
+        print("client creation failed: " + str(e))
+        sys.exit(1)
+
+    # Make sure the model matches our requirements, and get some
+    # properties of the model that we need for preprocessing
+    try:
+        model_metadata = triton_client.get_model_metadata(
+            model_name=FLAGS.model_name, model_version=FLAGS.model_version
+        )
+    except InferenceServerException as e:
+        print("failed to retrieve the metadata: " + str(e))
+        sys.exit(1)
+
+    # Make sure the model matches our requirements, and get some
+    # properties of the model that we need for preprocessing
+    try:
+        model_metadata = triton_client.get_model_metadata(
+            model_name=FLAGS.model_name, model_version=FLAGS.model_version
+        )
+    except InferenceServerException as e:
+        print("failed to retrieve the metadata: " + str(e))
+        sys.exit(1)
+
+    try:
+        model_config = triton_client.get_model_config(
+            model_name=FLAGS.model_name, model_version=FLAGS.model_version
+        )
+    except InferenceServerException as e:
+        print("failed to retrieve the config: " + str(e))
+        sys.exit(1)
+
+    if FLAGS.protocol.lower() == "grpc":
+        max_batch_size, input_name, output_name, dtype = parse_model_grpc(
+            model_metadata, model_config.config
+        )
+    else:
+        max_batch_size, input_name, output_name, dtype = parse_model_http(
+            model_metadata, model_config
+        )
+
+    input_data = np.zeros(
+        [FLAGS.batch_size, FLAGS.shape], dtype=triton_to_np_dtype(dtype)
+    )
+
+    # --------------------------- Warm-Up --------------------------------------------------------
+    for i in range(FLAGS.warmup_count):
+        inputs, outputs = requestGenerator(
+            input_name, input_data, output_name, dtype, FLAGS.protocol.lower()
+        )
+        triton_client.infer(
+            FLAGS.model_name, inputs, model_version=FLAGS.model_version, outputs=outputs
+        )
+
+    latencies = []
+
+    # --------------------------- Start Load --------------------------------------------------------
+
+    start_time = time.time()
+
+    for i in range(FLAGS.iteration_count):
+        t0 = time.time()
+        inputs, outputs = requestGenerator(
+            input_name, input_data, output_name, dtype, FLAGS.protocol.lower()
+        )
+        triton_client.infer(
+            FLAGS.model_name, inputs, model_version=FLAGS.model_version, outputs=outputs
+        )
+        latencies.append(time.time() - t0)
+
+    end_time = time.time()
+
+    throughput = FLAGS.iteration_count / (end_time - start_time)
+    average_latency = np.average(latencies) * 1000
+    p50_latency = np.percentile(latencies, 50) * 1000
+    p90_latency = np.percentile(latencies, 90) * 1000
+    p95_latency = np.percentile(latencies, 95) * 1000
+    p99_latency = np.percentile(latencies, 99) * 1000
+
+    # --------------------------- Print Report -----------------------------------------------------
+    print("Throughput: {} infer/sec".format(throughput))
+    print("Latencies:")
+    print("\tAvg: {} ms".format(average_latency))
+    print("\tp50: {} ms".format(p50_latency))
+    print("\tp90: {} ms".format(p90_latency))
+    print("\tp95: {} ms".format(p95_latency))
+    print("\tp99: {} ms".format(p99_latency))
+
+    # --------------------------- Write CSV --------------------------------------------------------
+    if FLAGS.csv != None:
+        file = open(FLAGS.csv, "w")
+        file.write(
+            "Concurrency,Inferences/Second,p50 latency,p90 latency,p95 latency,p99 latency\n"
+        )
+        file.write(
+            "1,{},{},{},{},{}".format(
+                throughput,
+                p50_latency * 1000,
+                p90_latency * 1000,
+                p95_latency * 1000,
+                p99_latency * 1000,
+            )
+        )
+        file.close()
diff --git a/qa/L0_perf_pyclients/test.sh b/qa/L0_perf_pyclients/test.sh
new file mode 100755
index 0000000000..9b7e405977
--- /dev/null
+++ b/qa/L0_perf_pyclients/test.sh
@@ -0,0 +1,116 @@
+#!/bin/bash
+# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+REPORTER=../common/reporter.py
+CLIENT_LOG="./simple_perf_client.log"
+SIMPLE_PERF_CLIENT=simple_perf_client.py
+
+TF_VERSION=${TF_VERSION:=2}
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=`pwd`/custom_models --backend-config=tensorflow,version=${TF_VERSION}"
+source ../common/util.sh
+
+# Select the single GPU that will be available to the inference
+# server.
+export CUDA_VISIBLE_DEVICES=0
+PROTOCOLS="grpc http"
+
+rm -f *.log *.csv *.tjson *.json
+
+RET=0
+
+MODEL_NAME="custom_zero_1_int32"
+
+for PROTOCOL in $PROTOCOLS; do
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+
+    NAME=${MODEL_NAME}_${PROTOCOL}
+    EXTRA_ARGS="" && [[ "${PROTOCOL}" == "grpc" ]] && EXTRA_ARGS="-i grpc -u localhost:8001"
+    python $SIMPLE_PERF_CLIENT -m $MODEL_NAME --shape 100000 --csv ${NAME}.csv ${EXTRA_ARGS}>> ${NAME}.log 2>&1
+    if (( $? != 0 )); then
+        RET=1
+    fi
+
+    echo -e "[{\"s_benchmark_kind\":\"benchmark_perf\"," >> ${NAME}.tjson
+    echo -e "\"s_benchmark_name\":\"python_client\"," >> ${NAME}.tjson
+    echo -e "\"s_server\":\"triton\"," >> ${NAME}.tjson
+    echo -e "\"s_protocol\":\"${PROTOCOL}\"," >> ${NAME}.tjson
+    echo -e "\"s_framework\":\"custom\"," >> ${NAME}.tjson
+    echo -e "\"s_model\":\"${MODEL_NAME}\"," >> ${NAME}.tjson
+    echo -e "\"l_concurrency\":1," >> ${NAME}.tjson
+    echo -e "\"l_batch_size\":1," >> ${NAME}.tjson
+    echo -e "\"l_instance_count\":1}]" >> ${NAME}.tjson
+
+
+    if [ -f $REPORTER ]; then
+        set +e
+
+        URL_FLAG=
+        if [ ! -z ${BENCHMARK_REPORTER_URL} ]; then
+            URL_FLAG="-u ${BENCHMARK_REPORTER_URL}"
+        fi
+
+        python $REPORTER -v -o ${NAME}.json --csv ${NAME}.csv ${URL_FLAG} ${NAME}.tjson
+        if (( $? != 0 )); then
+            RET=1
+        fi
+
+        set -e
+    fi
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+done
+
+if (( $RET == 0 )); then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_perf_resnet/run_test.sh b/qa/L0_perf_resnet/run_test.sh
new file mode 100755
index 0000000000..579d00c0e5
--- /dev/null
+++ b/qa/L0_perf_resnet/run_test.sh
@@ -0,0 +1,150 @@
+#!/bin/bash
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+STATIC_BATCH=${STATIC_BATCH:=1}
+INSTANCE_CNT=${INSTANCE_CNT:=1}
+BACKEND_CONFIG=${BACKEND_CONFIG:=""}
+TF_VERSION=${TF_VERSION:=2}
+
+REPORTER=../common/reporter.py
+
+TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
+SERVER=${TRITON_DIR}/bin/tritonserver
+BACKEND_DIR=${TRITON_DIR}/backends
+MODEL_REPO="${PWD}/models"
+SERVER_ARGS="--model-repository=${MODEL_REPO} --backend-directory=${BACKEND_DIR} ${BACKEND_CONFIG} --backend-config=tensorflow,version=${TF_VERSION}"
+source ../common/util.sh
+
+# Select the single GPU that will be available to the inference
+# server. Or use "export CUDA_VISIBLE_DEVICE=" to run on CPU.
+export CUDA_VISIBLE_DEVICES=0
+
+RET=0
+
+MAX_BATCH=${STATIC_BATCH}
+NAME=${MODEL_NAME}_sbatch${STATIC_BATCH}_instance${INSTANCE_CNT}_${PERF_CLIENT_PROTOCOL}
+
+rm -fr models && mkdir -p models && \
+    cp -r $MODEL_PATH models/. && \
+    (cd models/$MODEL_NAME && \
+            sed -i "s/^max_batch_size:.*/max_batch_size: ${MAX_BATCH}/" config.pbtxt && \
+            echo "instance_group [ { count: ${INSTANCE_CNT} }]")
+
+MEASUREMENT_WINDOW=5000
+PERF_CLIENT=../clients/perf_client
+# Onnx and onnx-trt models are very slow on Jetson.
+if [ "$ARCH" == "aarch64" ]; then
+    if [ "$MODEL_FRAMEWORK" == "onnx" ] || [ "$MODEL_FRAMEWORK" == "onnx_trt" ]; then
+        MEASUREMENT_WINDOW=20000
+    fi
+fi
+
+# Overload use of PERF_CLIENT_PROTOCOL for convenience with existing test and
+# reporting structure, though "triton_c_api" is not strictly a "protocol".
+if [[ "${PERF_CLIENT_PROTOCOL}" == "triton_c_api" ]]; then
+    # Server will be run in-process with C API
+    SERVICE_ARGS="--service-kind triton_c_api \
+                  --triton-server-directory ${TRITON_DIR} \
+                  --model-repository ${MODEL_REPO}"
+else
+    SERVICE_ARGS="-i ${PERF_CLIENT_PROTOCOL}"
+
+    SERVER_LOG="${NAME}.server.log"
+    run_server
+    if (( $SERVER_PID == 0 )); then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    # Run the model once to warm up. Some frameworks do optimization on the first requests.
+    # Must warmup similar to actual run so that all instances are ready
+    # Note: Running extra PA for warmup doesn't make sense for C API since it
+    # uses in-process tritonserver which will exit along with this PA process.
+    set +e
+    $PERF_CLIENT -v -m $MODEL_NAME -p${MEASUREMENT_WINDOW} \
+                    -b${STATIC_BATCH} --concurrency-range ${CONCURRENCY} \
+                    ${SERVICE_ARGS}
+    set -e
+fi
+
+set +e
+set -o pipefail
+PA_MAX_TRIALS=${PA_MAX_TRIALS:-"50"}
+# Measure perf client results and write them to a file for reporting
+$PERF_CLIENT -v -m $MODEL_NAME -p${MEASUREMENT_WINDOW} \
+                -b${STATIC_BATCH} --concurrency-range ${CONCURRENCY} \
+                --max-trials "${PA_MAX_TRIALS}" \
+                ${SERVICE_ARGS} \
+                -f ${NAME}.csv 2>&1 | tee ${NAME}.log
+if (( $? != 0 )); then
+    echo -e "\n***\n*** FAILED Perf Analyzer measurement\n***"
+    RET=1
+fi
+set +o pipefail
+set -e
+
+echo -e "[{\"s_benchmark_kind\":\"benchmark_perf\"," >> ${NAME}.tjson
+echo -e "\"s_benchmark_name\":\"resnet50\"," >> ${NAME}.tjson
+echo -e "\"s_server\":\"triton\"," >> ${NAME}.tjson
+echo -e "\"s_protocol\":\"${PERF_CLIENT_PROTOCOL}\"," >> ${NAME}.tjson
+echo -e "\"s_framework\":\"${MODEL_FRAMEWORK}\"," >> ${NAME}.tjson
+echo -e "\"s_model\":\"${MODEL_NAME}\"," >> ${NAME}.tjson
+echo -e "\"l_concurrency\":${CONCURRENCY}," >> ${NAME}.tjson
+echo -e "\"l_batch_size\":${STATIC_BATCH}," >> ${NAME}.tjson
+echo -e "\"l_instance_count\":${INSTANCE_CNT}," >> ${NAME}.tjson
+echo -e "\"s_architecture\":\"${ARCH}\"}]" >> ${NAME}.tjson
+
+# SERVER_PID may not be set if using "triton_c_api" for example
+if [[ -n "${SERVER_PID}" ]]; then
+  kill $SERVER_PID
+  wait $SERVER_PID
+fi
+
+if [ -f $REPORTER ]; then
+    set +e
+
+    URL_FLAG=
+    if [ ! -z ${BENCHMARK_REPORTER_URL} ]; then
+        URL_FLAG="-u ${BENCHMARK_REPORTER_URL}"
+    fi
+
+    $REPORTER -v -o ${NAME}.json --csv ${NAME}.csv ${URL_FLAG} ${NAME}.tjson
+    if (( $? != 0 )); then
+        RET=1
+    fi
+
+    set -e
+fi
+
+if (( $RET == 0 )); then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_perf_resnet/test.sh b/qa/L0_perf_resnet/test.sh
new file mode 100755
index 0000000000..93b946ec35
--- /dev/null
+++ b/qa/L0_perf_resnet/test.sh
@@ -0,0 +1,232 @@
+#!/bin/bash
+# Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+rm -f *.log  *.csv *.tjson *.json
+
+PROTOCOLS="grpc http triton_c_api"
+
+TRT_MODEL_NAME="resnet50_fp16_plan"
+TF_MODEL_NAME="resnet50v1.5_fp16_savedmodel"
+PYT_MODEL_NAME="resnet50_fp32_libtorch"
+ONNX_MODEL_NAME="resnet50_fp32_onnx"
+
+# The base model name should be the prefix to the
+# respective optimized model name.
+TFTRT_MODEL_NAME="resnet50v1.5_fp16_savedmodel_trt"
+ONNXTRT_MODEL_NAME="resnet50_fp32_onnx_trt"
+TFAMP_MODEL_NAME="resnet50v1.5_fp16_savedmodel_amp"
+
+ARCH=${ARCH:="x86_64"}
+REPODIR=${REPODIR:="/data/inferenceserver/${REPO_VERSION}"}
+TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
+CACHE_PATH=`pwd`/trt_cache
+
+
+#
+# Test minimum latency
+#
+STATIC_BATCH=1
+INSTANCE_CNT=1
+CONCURRENCY=1
+
+# Disable TF-TRT test on Jetson due to Segfault
+# Disable ORT-TRT test on Jetson due to support being disabled
+if [ "$ARCH" == "aarch64" ]; then
+    MODEL_NAMES="${TRT_MODEL_NAME} ${TF_MODEL_NAME} ${ONNX_MODEL_NAME} ${PYT_MODEL_NAME}"
+    OPTIMIZED_MODEL_NAMES="${TFAMP_MODEL_NAME}"
+    CAFFE2PLAN=${TRITON_DIR}/bin/caffe2plan
+else
+    MODEL_NAMES="${TRT_MODEL_NAME} ${TF_MODEL_NAME} ${ONNX_MODEL_NAME} ${PYT_MODEL_NAME}"
+    OPTIMIZED_MODEL_NAMES="${TFTRT_MODEL_NAME} ${TFAMP_MODEL_NAME} ${ONNXTRT_MODEL_NAME}"
+    CAFFE2PLAN=../common/caffe2plan
+fi
+
+# Create optimized models
+rm -fr optimized_model_store && mkdir optimized_model_store
+for MODEL_NAME in $OPTIMIZED_MODEL_NAMES; do
+    BASE_MODEL=$(echo ${MODEL_NAME} | cut -d '_' -f 1,2,3)
+    cp -r $REPODIR/perf_model_store/${BASE_MODEL} optimized_model_store/${MODEL_NAME}
+    CONFIG_PATH="optimized_model_store/${MODEL_NAME}/config.pbtxt"
+    sed -i "s/^name: \"${BASE_MODEL}\"/name: \"${MODEL_NAME}\"/" ${CONFIG_PATH}
+    echo "optimization { execution_accelerators {" >> ${CONFIG_PATH}
+    echo "gpu_execution_accelerator : [ {" >> ${CONFIG_PATH}
+    if [ "${MODEL_NAME}" = "${TFAMP_MODEL_NAME}" ] ; then
+        echo "name : \"auto_mixed_precision\" " >> ${CONFIG_PATH}
+    else
+        echo "name : \"tensorrt\" " >> ${CONFIG_PATH}
+        if [ "${MODEL_NAME}" = "${TFTRT_MODEL_NAME}" ] ; then
+            echo "parameters { key: \"precision_mode\" value: \"FP16\" }" >> ${CONFIG_PATH}
+        fi
+
+        if [ "${MODEL_NAME}" = "${ONNXTRT_MODEL_NAME}" ] ; then
+            echo "parameters { key: \"precision_mode\" value: \"FP16\" }" >> ${CONFIG_PATH}
+            echo "parameters { key: \"max_workspace_size_bytes\" value: \"1073741824\" }" >> ${CONFIG_PATH}
+            echo "parameters { key: \"trt_engine_cache_enable\" value: \"1\" }" >> ${CONFIG_PATH}
+            echo "parameters { key: \"trt_engine_cache_path\" value: \"${CACHE_PATH}\" } " >> ${CONFIG_PATH}
+        fi
+    fi
+    echo "} ]" >> ${CONFIG_PATH}
+    echo "}}" >> ${CONFIG_PATH}
+done
+
+# Create the TensorRT plan from Caffe model
+rm -fr tensorrt_models && mkdir tensorrt_models
+cp -r $REPODIR/caffe_models/trt_model_store/resnet50_plan tensorrt_models/${TRT_MODEL_NAME} && \
+    (cd tensorrt_models/${TRT_MODEL_NAME} && \
+            sed -i "s/^name:.*/name: \"${TRT_MODEL_NAME}\"/" config.pbtxt && \
+            sed -i "s/max_batch_size:.*/max_batch_size: ${STATIC_BATCH}/" config.pbtxt) && \
+    mkdir -p tensorrt_models/${TRT_MODEL_NAME}/1
+$CAFFE2PLAN -h -b ${STATIC_BATCH} \
+            -n prob -o tensorrt_models/${TRT_MODEL_NAME}/1/model.plan \
+            $REPODIR/caffe_models/resnet50.prototxt $REPODIR/caffe_models/resnet50.caffemodel
+
+# Tests with each "non-optimized" model
+for MODEL_NAME in $MODEL_NAMES; do
+    for PROTOCOL in $PROTOCOLS; do
+        REPO=`pwd`/tensorrt_models && [ "$MODEL_NAME" != "$TRT_MODEL_NAME" ] && \
+            REPO=$REPODIR/perf_model_store
+        FRAMEWORK=$(echo ${MODEL_NAME} | cut -d '_' -f 3)
+        MODEL_NAME=${MODEL_NAME} \
+                MODEL_FRAMEWORK=${FRAMEWORK} \
+                MODEL_PATH="$REPO/${MODEL_NAME}" \
+                STATIC_BATCH=${STATIC_BATCH} \
+                PERF_CLIENT_PROTOCOL=${PROTOCOL} \
+                INSTANCE_CNT=${INSTANCE_CNT} \
+                CONCURRENCY=${CONCURRENCY} \
+                ARCH=${ARCH} \
+                bash -x run_test.sh
+    done
+done
+
+# Tests with optimization enabled models
+for MODEL_NAME in $OPTIMIZED_MODEL_NAMES; do
+    for PROTOCOL in $PROTOCOLS; do
+        REPO=`pwd`/optimized_model_store
+        FRAMEWORK=$(echo ${MODEL_NAME} | cut -d '_' -f 3,4)
+        MODEL_NAME=${MODEL_NAME} \
+                MODEL_FRAMEWORK=${FRAMEWORK} \
+                MODEL_PATH="$REPO/${MODEL_NAME}" \
+                STATIC_BATCH=${STATIC_BATCH} \
+                PERF_CLIENT_PROTOCOL=${PROTOCOL} \
+                INSTANCE_CNT=${INSTANCE_CNT} \
+                CONCURRENCY=${CONCURRENCY} \
+                ARCH=${ARCH} \
+                bash -x run_test.sh
+    done
+done
+
+#
+# Test large static batch = 128 w/ 2 instances (Use batch size 64 on Jetson Xavier)
+#
+if [ "$ARCH" == "aarch64" ]; then
+    STATIC_BATCH=64
+else
+    STATIC_BATCH=128
+fi
+
+INSTANCE_CNT=2
+CONCURRENCY=4
+
+# Create the TensorRT plan from Caffe model
+rm -fr tensorrt_models && mkdir tensorrt_models
+cp -r $REPODIR/caffe_models/trt_model_store/resnet50_plan tensorrt_models/${TRT_MODEL_NAME} && \
+    (cd tensorrt_models/${TRT_MODEL_NAME} && \
+            sed -i "s/^name:.*/name: \"${TRT_MODEL_NAME}\"/" config.pbtxt && \
+            sed -i "s/max_batch_size:.*/max_batch_size: ${STATIC_BATCH}/" config.pbtxt) && \
+    mkdir -p tensorrt_models/${TRT_MODEL_NAME}/1
+$CAFFE2PLAN -h -b ${STATIC_BATCH} \
+            -n prob -o tensorrt_models/${TRT_MODEL_NAME}/1/model.plan \
+            $REPODIR/caffe_models/resnet50.prototxt $REPODIR/caffe_models/resnet50.caffemodel
+
+for MODEL_NAME in $MODEL_NAMES; do
+    for PROTOCOL in $PROTOCOLS; do
+        REPO=`pwd`/tensorrt_models && [ "$MODEL_NAME" != "$TRT_MODEL_NAME" ] && \
+            REPO=$REPODIR/perf_model_store
+        FRAMEWORK=$(echo ${MODEL_NAME} | cut -d '_' -f 3)
+        MODEL_NAME=${MODEL_NAME} \
+                MODEL_FRAMEWORK=${FRAMEWORK} \
+                MODEL_PATH="$REPO/${MODEL_NAME}" \
+                STATIC_BATCH=${STATIC_BATCH} \
+                PERF_CLIENT_PROTOCOL=${PROTOCOL} \
+                INSTANCE_CNT=${INSTANCE_CNT} \
+                CONCURRENCY=${CONCURRENCY} \
+                ARCH=${ARCH} \
+                bash -x run_test.sh
+    done
+done
+
+for MODEL_NAME in $OPTIMIZED_MODEL_NAMES; do
+    for PROTOCOL in $PROTOCOLS; do
+        REPO=`pwd`/optimized_model_store
+        FRAMEWORK=$(echo ${MODEL_NAME} | cut -d '_' -f 3,4)
+        MODEL_NAME=${MODEL_NAME} \
+                MODEL_FRAMEWORK=${FRAMEWORK} \
+                MODEL_PATH="$REPO/${MODEL_NAME}" \
+                STATIC_BATCH=${STATIC_BATCH} \
+                PERF_CLIENT_PROTOCOL=${PROTOCOL} \
+                INSTANCE_CNT=${INSTANCE_CNT} \
+                CONCURRENCY=${CONCURRENCY} \
+                ARCH=${ARCH} \
+                bash -x run_test.sh
+    done
+done
+
+# FIXME Disable the following due to
+# https://jirasw.nvidia.com/browse/DLIS-2933.
+#
+# Needs this additional test configuration for comparing against TFS.
+if [ "$ARCH" == "x86_64" ]; then
+   MODEL_NAME=${TF_MODEL_NAME}
+   REPO=$REPODIR/perf_model_store
+   STATIC_BATCH=128
+   INSTANCE_CNT=1
+   CONCURRENCY=1
+   FRAMEWORK=$(echo ${MODEL_NAME} | cut -d '_' -f 3)
+   MODEL_NAME=${MODEL_NAME} \
+       MODEL_FRAMEWORK=${FRAMEWORK} \
+       MODEL_PATH="$REPO/${MODEL_NAME}" \
+       STATIC_BATCH=${STATIC_BATCH} \
+       PERF_CLIENT_PROTOCOL="grpc" \
+       INSTANCE_CNT=${INSTANCE_CNT} \
+       CONCURRENCY=${CONCURRENCY} \
+       ARCH=${ARCH} \
+       BACKEND_CONFIG=" --backend-config=tensorflow,version=2" \
+       bash -x run_test.sh
+fi
diff --git a/qa/L0_perf_tensorrt_llm/test.sh b/qa/L0_perf_tensorrt_llm/test.sh
new file mode 100755
index 0000000000..35d360498d
--- /dev/null
+++ b/qa/L0_perf_tensorrt_llm/test.sh
@@ -0,0 +1,329 @@
+#!/bin/bash
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+RET=0
+BASE_DIR=$(pwd)
+NUM_GPUS=${NUM_GPUS:=1}
+TENSORRTLLM_BACKEND_REPO_TAG=${TENSORRTLLM_BACKEND_REPO_TAG:="main"}
+TRT_ROOT="/usr/local/tensorrt"
+
+MODEL_NAME="gpt2_tensorrt_llm"
+NAME="tensorrt_llm_benchmarking_test"
+MODEL_REPOSITORY="$(pwd)/triton_model_repo"
+TENSORRTLLM_BACKEND_DIR="/opt/tritonserver/tensorrtllm_backend"
+GPT_DIR="$TENSORRTLLM_BACKEND_DIR/tensorrt_llm/examples/gpt"
+TOKENIZER_DIR="$GPT_DIR/gpt2"
+ENGINES_DIR="${BASE_DIR}/engines/inflight_batcher_llm/${NUM_GPUS}-gpu"
+TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
+SERVER=${TRITON_DIR}/bin/tritonserver
+BACKEND_DIR=${TRITON_DIR}/backends
+SERVER_LOG="${NAME}_server.log"
+SERVER_TIMEOUT=${SERVER_TIMEOUT:=120}
+
+function clone_tensorrt_llm_backend_repo {
+    rm -rf $TENSORRTLLM_BACKEND_DIR && mkdir $TENSORRTLLM_BACKEND_DIR
+    apt-get update && apt-get install git-lfs -y --no-install-recommends
+    git clone --single-branch --depth=1 -b ${TENSORRTLLM_BACKEND_REPO_TAG} https://github.com/triton-inference-server/tensorrtllm_backend.git $TENSORRTLLM_BACKEND_DIR
+    cd $TENSORRTLLM_BACKEND_DIR && git lfs install && git submodule update --init --recursive
+}
+
+# Update Open MPI to a version compatible with SLURM.
+function upgrade_openmpi {
+    cd /tmp/
+    local CURRENT_VERSION=$(mpirun --version 2>&1 | awk '/Open MPI/ {gsub(/rc[0-9]+/, "", $NF); print $NF}')
+
+    if [ -n "$CURRENT_VERSION" ] && dpkg --compare-versions "$CURRENT_VERSION" lt "5.0.1"; then
+        # Uninstall the current version of Open MPI
+        wget "https://download.open-mpi.org/release/open-mpi/v$(echo "${CURRENT_VERSION}" | awk -F. '{print $1"."$2}')/openmpi-${CURRENT_VERSION}.tar.gz" || {
+            echo "Failed to download Open MPI ${CURRENT_VERSION}"
+            exit 1
+        }
+        rm -rf "openmpi-${CURRENT_VERSION}" && tar -xzf "openmpi-${CURRENT_VERSION}.tar.gz" && cd "openmpi-${CURRENT_VERSION}" || {
+            echo "Failed to extract Open MPI ${CURRENT_VERSION}"
+            exit 1
+        }
+        unset PMIX_VERSION && ./configure --prefix=/opt/hpcx/ompi/ && make uninstall || {
+            echo "Failed to uninstall Open MPI ${CURRENT_VERSION}"
+            exit 1
+        }
+        rm -rf /opt/hpcx/ompi/ /usr/local/mpi/ || {
+            echo "Failed to remove Open MPI ${CURRENT_VERSION} installation directories"
+            exit 1
+        }
+        cd ../ && rm -r openmpi-${CURRENT_VERSION}
+    else
+        echo "Installed Open MPI version is not less than 5.0.1. Skipping the upgrade."
+        return
+    fi
+
+    # Install SLURM supported Open MPI version
+    wget "https://download.open-mpi.org/release/open-mpi/v5.0/openmpi-5.0.1.tar.gz" || {
+        echo "Failed to download Open MPI 5.0.1"
+        exit 1
+    }
+    rm -rf openmpi-5.0.1 && tar -xzf openmpi-5.0.1.tar.gz && cd openmpi-5.0.1 || {
+        echo "Failed to extract Open MPI 5.0.1"
+        exit 1
+    }
+    ./configure --prefix=/opt/hpcx/ompi/ && make && make install || {
+        echo "Failed to install Open MPI 5.0.1"
+        exit 1
+    }
+
+    # Update environment variables
+    if ! grep -q '/opt/hpcx/ompi/bin' ~/.bashrc; then
+        echo 'export PATH=/opt/hpcx/ompi/bin:$PATH' >>~/.bashrc
+    fi
+
+    if ! grep -q '/opt/hpcx/ompi/lib' ~/.bashrc; then
+        echo 'export LD_LIBRARY_PATH=/opt/hpcx/ompi/lib:$LD_LIBRARY_PATH' >>~/.bashrc
+    fi
+    ldconfig
+    source ~/.bashrc
+    cd "$BASE_DIR"
+    mpirun --version
+}
+
+function install_tensorrt_llm {
+    # Install CMake
+    bash ${TENSORRTLLM_BACKEND_DIR}/tensorrt_llm/docker/common/install_cmake.sh
+    export PATH="/usr/local/cmake/bin:${PATH}"
+
+    TORCH_INSTALL_TYPE="pypi" &&
+        (cd ${TENSORRTLLM_BACKEND_DIR}/tensorrt_llm &&
+            bash docker/common/install_pytorch.sh $TORCH_INSTALL_TYPE &&
+            python3 ./scripts/build_wheel.py --trt_root=/usr/local/tensorrt &&
+            pip3 install ./build/tensorrt_llm*.whl)
+}
+
+function build_gpt2_base_model {
+    # Download weights from HuggingFace Transformers
+    cd ${GPT_DIR} && rm -rf gpt2 && git clone https://huggingface.co/gpt2-medium gpt2 && cd gpt2
+    rm pytorch_model.bin model.safetensors
+    if ! wget -q https://huggingface.co/gpt2-medium/resolve/main/pytorch_model.bin; then
+        echo "Downloading pytorch_model.bin failed."
+        exit 1
+    fi
+    cd ${GPT_DIR}
+
+    # Convert weights from HF Tranformers to FT format
+    python3 hf_gpt_convert.py -p 1 -i gpt2 -o ./c-model/gpt2 --tensor-parallelism ${NUM_GPUS} --storage-type float16
+    cd ${BASE_DIR}
+}
+
+function build_gpt2_tensorrt_engine {
+    # Build TensorRT engines
+    cd ${GPT_DIR}
+    python3 build.py --model_dir="./c-model/gpt2/${NUM_GPUS}-gpu/" \
+        --world_size="${NUM_GPUS}" \
+        --dtype float16 \
+        --use_inflight_batching \
+        --use_gpt_attention_plugin float16 \
+        --paged_kv_cache \
+        --use_gemm_plugin float16 \
+        --remove_input_padding \
+        --hidden_act gelu \
+        --parallel_build \
+        --output_dir="${ENGINES_DIR}"
+    cd ${BASE_DIR}
+}
+
+function replace_config_tags {
+    tag_to_replace="${1}"
+    new_value="${2}"
+    config_file_path="${3}"
+    sed -i "s|${tag_to_replace}|${new_value}|g" ${config_file_path}
+}
+
+function prepare_model_repository {
+    rm -rf ${MODEL_REPOSITORY} && mkdir ${MODEL_REPOSITORY}
+    cp -r ${TENSORRTLLM_BACKEND_DIR}/all_models/inflight_batcher_llm/* ${MODEL_REPOSITORY}
+    rm -rf ${MODEL_REPOSITORY}/tensorrt_llm_bls
+    mv "${MODEL_REPOSITORY}/ensemble" "${MODEL_REPOSITORY}/${MODEL_NAME}"
+
+    replace_config_tags "model_version: -1" "model_version: 1" "${MODEL_REPOSITORY}/${MODEL_NAME}/config.pbtxt"
+    replace_config_tags '${triton_max_batch_size}' "128" "${MODEL_REPOSITORY}/${MODEL_NAME}/config.pbtxt"
+    replace_config_tags 'name: "ensemble"' "name: \"$MODEL_NAME\"" "${MODEL_REPOSITORY}/${MODEL_NAME}/config.pbtxt"
+
+    replace_config_tags '${triton_max_batch_size}' "128" "${MODEL_REPOSITORY}/preprocessing/config.pbtxt"
+    replace_config_tags '${preprocessing_instance_count}' '1' "${MODEL_REPOSITORY}/preprocessing/config.pbtxt"
+    replace_config_tags '${tokenizer_dir}' "${TOKENIZER_DIR}/" "${MODEL_REPOSITORY}/preprocessing/config.pbtxt"
+    replace_config_tags '${tokenizer_type}' 'auto' "${MODEL_REPOSITORY}/preprocessing/config.pbtxt"
+
+    replace_config_tags '${triton_max_batch_size}' "128" "${MODEL_REPOSITORY}/postprocessing/config.pbtxt"
+    replace_config_tags '${postprocessing_instance_count}' '1' "${MODEL_REPOSITORY}/postprocessing/config.pbtxt"
+    replace_config_tags '${tokenizer_dir}' "${TOKENIZER_DIR}/" "${MODEL_REPOSITORY}/postprocessing/config.pbtxt"
+    replace_config_tags '${tokenizer_type}' 'auto' "${MODEL_REPOSITORY}/postprocessing/config.pbtxt"
+
+    replace_config_tags '${triton_max_batch_size}' "128" "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
+    replace_config_tags '${decoupled_mode}' 'true' "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
+    replace_config_tags '${max_queue_delay_microseconds}' "1000000" "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
+    replace_config_tags '${batching_strategy}' 'inflight_fused_batching' "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
+    replace_config_tags '${engine_dir}' "${ENGINES_DIR}" "${MODEL_REPOSITORY}/tensorrt_llm/config.pbtxt"
+}
+
+# Wait until server health endpoint shows ready. Sets WAIT_RET to 0 on
+# success, 1 on failure
+function wait_for_server_ready() {
+    local wait_time_secs="${1:-30}"
+    shift
+    local spids=("$@")
+
+    WAIT_RET=0
+
+    for _ in $(seq "$wait_time_secs"); do
+        for pid in "${spids[@]}"; do
+            if ! kill -0 "$pid" >/dev/null 2>&1; then
+                echo "=== Server not running."
+                WAIT_RET=1
+                return
+            fi
+        done
+
+        sleep 1
+
+        if curl -s --fail localhost:8000/v2/health/ready &&
+            curl -s --fail -w "%{http_code}" -o /dev/null -d '{"log_verbose_level":1}' localhost:8000/v2/logging; then
+            return
+        fi
+    done
+
+    echo "=== Timeout $wait_time_secs secs. Server not ready."
+    WAIT_RET=1
+}
+
+function run_server {
+    python3 ${TENSORRTLLM_BACKEND_DIR}/scripts/launch_triton_server.py --world_size="${NUM_GPUS}" --model_repo="${MODEL_REPOSITORY}" >${SERVER_LOG} 2>&1 &
+    sleep 2 # allow time to obtain the pid(s)
+    # Read PIDs into an array, trimming whitespaces
+    readarray -t SERVER_PID < <(pgrep "tritonserver")
+
+    wait_for_server_ready ${SERVER_TIMEOUT} "${SERVER_PID[@]}"
+    if [ "$WAIT_RET" != "0" ]; then
+        # Cleanup
+        kill "${SERVER_PID[@]}" >/dev/null 2>&1 || true
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+}
+
+function kill_server {
+    pgrep tritonserver | xargs kill -SIGINT
+    for pid in "${SERVER_PID[@]}"; do
+        echo "Waiting for proc ${pid} to terminate..."
+        while kill -0 $pid >/dev/null 2>&1; do
+            sleep 1
+        done
+    done
+}
+
+upgrade_openmpi
+clone_tensorrt_llm_backend_repo
+install_tensorrt_llm
+build_gpt2_base_model
+build_gpt2_tensorrt_engine
+prepare_model_repository
+
+# Install perf_analyzer
+pip3 install tritonclient nvidia-ml-py3
+
+ARCH="amd64"
+STATIC_BATCH=1
+INSTANCE_CNT=1
+CONCURRENCY=100
+MODEL_FRAMEWORK="tensorrt-llm"
+PERF_CLIENT="perf_analyzer"
+REPORTER=../common/reporter.py
+INPUT_DATA="./input_data.json"
+PERF_CLIENT_PROTOCOL="grpc"
+EXPORT_FILE=profile-export-tensorrt-llm-model.json
+rm -rf *.tjson *.json *.csv *log
+
+echo '{
+  "data": [
+    {
+      "text_input": ["Hello, my name is"],
+      "stream": [true],
+      "max_tokens": [16],
+      "bad_words": [""],
+      "stop_words": [""]
+    }
+  ]
+}' >$INPUT_DATA
+
+# Set stability-percentage 999 to bypass the stability check in PA.
+# LLM generates a sequence of tokens that is unlikely to be within a reasonable bound to determine valid measurement in terms of latency.
+# Using "count_windows" measurement mode, which automatically extends the window for collecting responses.
+PERF_CLIENT_ARGS="-v -m $MODEL_NAME -i $PERF_CLIENT_PROTOCOL --async --streaming --input-data=$INPUT_DATA --profile-export-file=$EXPORT_FILE \
+                  --shape=text_input:1 --shape=max_tokens:1 --shape=bad_words:1 --shape=stop_words:1 --measurement-mode=count_windows \
+                  --concurrency-range=$CONCURRENCY --measurement-request-count=10 --stability-percentage=999"
+
+set +e
+run_server
+
+$PERF_CLIENT $PERF_CLIENT_ARGS -f ${NAME}.csv 2>&1 | tee ${NAME}_perf_analyzer.log
+set +o pipefail
+
+kill_server
+set -e
+
+echo -e "[{\"s_benchmark_kind\":\"benchmark_perf\"," >>${NAME}.tjson
+echo -e "\"s_benchmark_repo_branch\":\"${BENCHMARK_REPO_BRANCH}\"," >>${NAME}.tjson
+echo -e "\"s_benchmark_name\":\"${NAME}\"," >>${NAME}.tjson
+echo -e "\"s_server\":\"triton\"," >>${NAME}.tjson
+echo -e "\"s_protocol\":\"${PERF_CLIENT_PROTOCOL}\"," >>${NAME}.tjson
+echo -e "\"s_framework\":\"${MODEL_FRAMEWORK}\"," >>${NAME}.tjson
+echo -e "\"s_model\":\"${MODEL_NAME}\"," >>${NAME}.tjson
+echo -e "\"l_concurrency\":${CONCURRENCY}," >>${NAME}.tjson
+echo -e "\"l_batch_size\":${STATIC_BATCH}," >>${NAME}.tjson
+echo -e "\"l_instance_count\":${INSTANCE_CNT}," >>${NAME}.tjson
+echo -e "\"s_architecture\":\"${ARCH}\"}]" >>${NAME}.tjson
+
+if [ -f $REPORTER ]; then
+    set +e
+
+    URL_FLAG=
+    if [ ! -z ${BENCHMARK_REPORTER_URL} ]; then
+        URL_FLAG="-u ${BENCHMARK_REPORTER_URL}"
+    fi
+
+    python3 $REPORTER -v -e ${EXPORT_FILE} -o ${NAME}.json --csv ${NAME}.csv --gpu-metrics --token-latency ${URL_FLAG} ${NAME}.tjson
+    if (($? != 0)); then
+        RET=1
+    fi
+
+    set -e
+fi
+
+if (($RET == 0)); then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_perf_vllm/test.sh b/qa/L0_perf_vllm/test.sh
new file mode 100755
index 0000000000..498f6f8e14
--- /dev/null
+++ b/qa/L0_perf_vllm/test.sh
@@ -0,0 +1,146 @@
+#!/bin/bash
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+source ../common/util.sh
+
+REPORTER=../common/reporter.py
+TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
+SERVER=${TRITON_DIR}/bin/tritonserver
+BACKEND_DIR=${TRITON_DIR}/backends
+MODEL_REPO="${PWD}/models"
+NAME="vllm_benchmarking_test"
+MODEL_NAME="gpt2_vllm"
+INPUT_DATA="./input_data.json"
+SERVER_LOG="${NAME}_server.log"
+SERVER_ARGS="--model-repository=${MODEL_REPO} --backend-directory=${BACKEND_DIR} --log-verbose=1"
+
+export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:=0}
+EXPORT_FILE=profile-export-vllm-model.json
+
+pip3 install tritonclient nvidia-ml-py3
+rm -rf $MODEL_REPO $EXPORT_FILE *.tjson *.json *.csv
+
+mkdir -p $MODEL_REPO/$MODEL_NAME/1
+echo '{
+    "model":"gpt2",
+    "disable_log_requests": "true",
+    "gpu_memory_utilization": 0.5
+}' >$MODEL_REPO/$MODEL_NAME/1/model.json
+
+echo 'backend: "vllm"
+instance_group [
+  {
+    count: 1
+    kind: KIND_MODEL
+  }
+]' >$MODEL_REPO/$MODEL_NAME/config.pbtxt
+
+echo '{
+    "data": [
+        {
+            "text_input": [
+                "hi hi hi hi hi hi hi hi hi hi"
+            ],
+            "stream": [
+                true
+            ],
+            "sampling_parameters": [
+                "{\"max_tokens\": 1024, \"ignore_eos\": true}"
+            ]
+        }
+    ]
+}' >$INPUT_DATA
+
+RET=0
+ARCH="amd64"
+STATIC_BATCH=1
+INSTANCE_CNT=1
+CONCURRENCY=100
+MODEL_FRAMEWORK="vllm"
+PERF_CLIENT_PROTOCOL="grpc"
+PERF_CLIENT=perf_analyzer
+
+# Set stability-percentage 999 to bypass the stability check in PA.
+# LLM generates a sequence of tokens that is unlikely to be within a reasonable bound to determine valid measurement in terms of latency.
+# Using "count_windows" measurement mode, which automatically extends the window for collecting responses.
+PERF_CLIENT_ARGS="-v -m $MODEL_NAME --concurrency-range=${CONCURRENCY} --measurement-mode=count_windows --measurement-request-count=10 \
+                  --input-data=$INPUT_DATA --profile-export-file=$EXPORT_FILE -i $PERF_CLIENT_PROTOCOL --async --streaming --stability-percentage=999"
+
+run_server
+if (($SERVER_PID == 0)); then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+$PERF_CLIENT $PERF_CLIENT_ARGS -f ${NAME}.csv 2>&1 | tee ${NAME}_perf_analyzer.log
+set +o pipefail
+set -e
+
+if [[ -n "${SERVER_PID}" ]]; then
+    kill $SERVER_PID
+    wait $SERVER_PID
+fi
+
+echo -e "[{\"s_benchmark_kind\":\"benchmark_perf\"," >>${NAME}.tjson
+echo -e "\"s_benchmark_repo_branch\":\"${BENCHMARK_REPO_BRANCH}\"," >>${NAME}.tjson
+echo -e "\"s_benchmark_name\":\"${NAME}\"," >>${NAME}.tjson
+echo -e "\"s_server\":\"triton\"," >>${NAME}.tjson
+echo -e "\"s_protocol\":\"${PERF_CLIENT_PROTOCOL}\"," >>${NAME}.tjson
+echo -e "\"s_framework\":\"${MODEL_FRAMEWORK}\"," >>${NAME}.tjson
+echo -e "\"s_model\":\"${MODEL_NAME}\"," >>${NAME}.tjson
+echo -e "\"l_concurrency\":\"${CONCURRENCY}\"," >>${NAME}.tjson
+echo -e "\"l_batch_size\":${STATIC_BATCH}," >>${NAME}.tjson
+echo -e "\"l_instance_count\":${INSTANCE_CNT}," >>${NAME}.tjson
+echo -e "\"s_architecture\":\"${ARCH}\"}]" >>${NAME}.tjson
+
+if [ -f $REPORTER ]; then
+    set +e
+
+    URL_FLAG=
+    if [ ! -z ${BENCHMARK_REPORTER_URL} ]; then
+        URL_FLAG="-u ${BENCHMARK_REPORTER_URL}"
+    fi
+
+    python3 $REPORTER -v -e ${EXPORT_FILE} -o ${NAME}.json --csv ${NAME}.csv --gpu-metrics --token-latency ${URL_FLAG} ${NAME}.tjson
+    if (($? != 0)); then
+        RET=1
+    fi
+
+    set -e
+fi
+
+rm -rf $MODEL_REPO $INPUT_DATA
+
+if (($RET == 0)); then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_pinned_memory/libtorch_ensemble.pbtxt b/qa/L0_pinned_memory/libtorch_ensemble.pbtxt
new file mode 100644
index 0000000000..ee500fcab4
--- /dev/null
+++ b/qa/L0_pinned_memory/libtorch_ensemble.pbtxt
@@ -0,0 +1,70 @@
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+name: "libtorch_ensemble"
+platform: "ensemble"
+max_batch_size: 0
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+ensemble_scheduling {
+  step [
+    {
+      model_name: "custom_zero_1_float32"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT0"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "temp_0"
+      }
+    },
+    {
+      model_name: "libtorch_zero_1_float32"
+      model_version: -1
+      input_map {
+        key: "INPUT__0"
+        value: "temp_0"
+      }
+      output_map {
+        key: "OUTPUT__0"
+        value: "OUTPUT0"
+      }
+    }
+  ]
+}
\ No newline at end of file
diff --git a/qa/L0_pinned_memory/test.sh b/qa/L0_pinned_memory/test.sh
new file mode 100755
index 0000000000..89b59d7c18
--- /dev/null
+++ b/qa/L0_pinned_memory/test.sh
@@ -0,0 +1,203 @@
+#!/bin/bash
+# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+CLIENT=../clients/perf_client
+# Only use libtorch as it accepts GPU I/O and it can handle variable shape
+BACKENDS=${BACKENDS:="libtorch"}
+
+DATADIR=/data/inferenceserver/${REPO_VERSION}
+
+SERVER=/opt/tritonserver/bin/tritonserver
+source ../common/util.sh
+
+# Select the single GPU that will be available to the inference server
+export CUDA_VISIBLE_DEVICES=0
+
+rm -f *.log  *.csv *.metrics
+RET=0
+
+rm -fr ./custom_models && mkdir ./custom_models && \
+    cp -r ../custom_models/custom_zero_1_float32 ./custom_models/. && \
+    mkdir -p ./custom_models/custom_zero_1_float32/1
+
+#
+# Use "identity" model for all model types.
+#
+rm -fr models && mkdir -p models && \
+    cp -r ./custom_models/custom_zero_1_float32 models/. && \
+        (cd models/custom_zero_1_float32 && \
+                sed -i "s/dims:.*\[.*\]/dims: \[ -1 \]/g" config.pbtxt && \
+                echo "instance_group [ { kind: KIND_CPU }]" >> config.pbtxt)
+
+for BACKEND in $BACKENDS; do
+    MODEL_NAME=${BACKEND}_zero_1_float32
+    REPO_DIR=$DATADIR/qa_identity_model_repository
+
+    cp -r $REPO_DIR/$MODEL_NAME models/. && \
+        (cd models/$MODEL_NAME && \
+            sed -i "s/dims:.*\[.*\]/dims: \[ -1 \]/g" config.pbtxt && \
+            echo "instance_group [ { kind: KIND_GPU }]" >> config.pbtxt)
+
+    ENSEMBLE_NAME=${BACKEND}_ensemble
+    mkdir -p models/$ENSEMBLE_NAME/1 && \
+        cp $ENSEMBLE_NAME.pbtxt models/$ENSEMBLE_NAME/config.pbtxt
+
+    # With pinned memory
+    SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1"
+    SERVER_LOG="${ENSEMBLE_NAME}.pinned.server.log"
+    run_server
+    if (( $SERVER_PID == 0 )); then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    # Sanity check that the server allocates pinned memory for large size
+    set +e
+    $CLIENT -m${ENSEMBLE_NAME} --shape INPUT0:16777216
+    if (( $? != 0 )); then
+        RET=1
+    fi
+
+    grep "] non-pinned" ${ENSEMBLE_NAME}.pinned.server.log
+    if [ $? -eq 0 ]; then
+        echo -e "\n***\n*** Failed. Expected only pinned memory is allocated\n***"
+        RET=1
+    fi
+    set -e
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+
+    # Restart the server without verbose logging
+    SERVER_ARGS="--model-repository=`pwd`/models"
+    SERVER_LOG="${ENSEMBLE_NAME}.pinned.server.log"
+    run_server
+    if (( $SERVER_PID == 0 )); then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    # 16k 1m 2m 4m 8m 16m elements
+    set +e
+    for TENSOR_SIZE in 16384 1048576 2097152 4194304 8388608 16777216; do
+        $CLIENT -i grpc -u localhost:8001 -m${ENSEMBLE_NAME} \
+                --shape INPUT0:${TENSOR_SIZE} \
+                >> ${BACKEND}.${TENSOR_SIZE}.pinned.log 2>&1
+        if (( $? != 0 )); then
+            RET=1
+        fi
+    done
+    set -e
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+
+    # Without pinned memory
+    SERVER_ARGS="--model-repository=`pwd`/models --pinned-memory-pool-byte-size=0 --log-verbose=1"
+    SERVER_LOG="${ENSEMBLE_NAME}.nonpinned.server.log"
+    run_server
+    if (( $SERVER_PID == 0 )); then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    # Sanity check that the server allocates non-pinned memory
+    set +e
+    $CLIENT  -m${ENSEMBLE_NAME} --shape INPUT0:1
+    if (( $? != 0 )); then
+        RET=1
+    fi
+
+    grep "] pinned" ${ENSEMBLE_NAME}.nonpinned.server.log
+    if [ $? -eq 0 ]; then
+        echo -e "\n***\n*** Failed. Expected only non-pinned memory is allocated\n***"
+        RET=1
+    fi
+    set -e
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+
+    # Restart the server without verbose logging
+    SERVER_ARGS="--model-repository=`pwd`/models --pinned-memory-pool-byte-size=0"
+    SERVER_LOG="${ENSEMBLE_NAME}.nonpinned.server.log"
+    run_server
+    if (( $SERVER_PID == 0 )); then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    # 16k 1m 2m 4m 8m 16m elements
+    set +e
+    for TENSOR_SIZE in 16384 1048576 2097152 4194304 8388608 16777216; do
+        $CLIENT -i grpc -u localhost:8001 -m${ENSEMBLE_NAME} \
+                --shape INPUT0:${TENSOR_SIZE} \
+                >> ${BACKEND}.${TENSOR_SIZE}.nonpinned.log 2>&1
+        if (( $? != 0 )); then
+            RET=1
+        fi
+    done
+    set -e
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+done
+
+for BACKEND in $BACKENDS; do
+    for TENSOR_SIZE in 16384 1048576 2097152 4194304 8388608 16777216; do
+        echo -e "${BACKEND} ensemble ${TENSOR_SIZE} elements\n"
+        echo -e "non-pinned\n"
+        cat ${BACKEND}.${TENSOR_SIZE}.nonpinned.log
+        echo -e "pinned\n"
+        cat ${BACKEND}.${TENSOR_SIZE}.pinned.log
+    done
+done
+
+if (( $RET == 0 )); then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_priority/test.sh b/qa/L0_priority/test.sh
index 407dfdb9cc..6756c93f21 100755
--- a/qa/L0_priority/test.sh
+++ b/qa/L0_priority/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -25,10 +25,25 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-DATADIR=/data/inferenceserver
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
 
-SERVER=/opt/tensorrtserver/bin/trtserver
-SERVER_ARGS=--model-store=`pwd`/models
+export CUDA_VISIBLE_DEVICES=0
+
+DATADIR=/data/inferenceserver/${REPO_VERSION}
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=`pwd`/models"
 SERVER_LOG="./inference_server.log"
 source ../common/util.sh
 
@@ -69,9 +84,9 @@ if [ $? -ne 0 ]; then
     RET=1
 fi
 
-grep "plan_float32_float32_float32_max" $SERVER_LOG | grep "stream priority -1"
+grep "plan_float32_float32_float32_max" $SERVER_LOG | grep "stream priority -5"
 if [ $? -ne 0 ]; then
-    echo -e "\n***\n*** Failed. Expected MAX priority -1\n***"
+    echo -e "\n***\n*** Failed. Expected MAX priority -5\n***"
     RET=1
 fi
 
@@ -89,6 +104,7 @@ wait $SERVER_PID
 if [ $RET -eq 0 ]; then
     echo -e "\n***\n*** Test Passed\n***"
 else
+    cat $SERVER_LOG
     echo -e "\n***\n*** Test FAILED\n***"
 fi
 
diff --git a/qa/L0_python_api/test.sh b/qa/L0_python_api/test.sh
new file mode 100755
index 0000000000..6dc7206fe3
--- /dev/null
+++ b/qa/L0_python_api/test.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+BINDING_TEST_LOG="./python_binding.log"
+
+RET=0
+
+rm -f $BINDING_TEST_LOG
+
+set +e
+
+python -m pytest --junitxml=test_binding_report.xml test_binding.py > $BINDING_TEST_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $BINDING_TEST_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+API_TEST_LOG="./python_api.log"
+
+python -m pytest --junitxml=test_api_report.xml test_api.py > $API_TEST_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $API_TEST_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+set -e
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_python_client_unit_tests/test.sh b/qa/L0_python_client_unit_tests/test.sh
new file mode 100755
index 0000000000..5a46ecccc5
--- /dev/null
+++ b/qa/L0_python_client_unit_tests/test.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+TEST_LOG="./python_client_unit_tests.log"
+PYTHON_CLIENT_UNIT_TESTS_DIR=/opt/tritonserver/qa/python_client_unit_tests/
+PYTHON_CLIENT_UNIT_TESTS_CMD="python3 -m unittest discover -v -s $PYTHON_CLIENT_UNIT_TESTS_DIR -t $PYTHON_CLIENT_UNIT_TESTS_DIR"
+
+# DLPack test requires Torch to validate GPU tensor
+pip3 install torch
+
+RET=0
+
+rm -f $TEST_LOG
+
+set +e
+
+$PYTHON_CLIENT_UNIT_TESTS_CMD > $TEST_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+set -e
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $TEST_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_pytorch_python_runtime/infer.py b/qa/L0_pytorch_python_runtime/infer.py
new file mode 100755
index 0000000000..aeda498710
--- /dev/null
+++ b/qa/L0_pytorch_python_runtime/infer.py
@@ -0,0 +1,146 @@
+#!/usr/bin/env python3
+
+# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import concurrent.futures
+import json
+import sys
+
+import numpy as np
+import tritonclient.http as httpclient
+from tritonclient.utils import *
+
+
+def infer_model_without_parameter_file():
+    model_name = "addsub"
+    shape = [4]
+
+    with httpclient.InferenceServerClient("localhost:8000") as client:
+        input0_data = np.random.rand(*shape).astype(np.float32)
+        input1_data = np.random.rand(*shape).astype(np.float32)
+        inputs = [
+            httpclient.InferInput(
+                "INPUT0", input0_data.shape, np_to_triton_dtype(input0_data.dtype)
+            ),
+            httpclient.InferInput(
+                "INPUT1", input1_data.shape, np_to_triton_dtype(input1_data.dtype)
+            ),
+        ]
+
+        inputs[0].set_data_from_numpy(input0_data)
+        inputs[1].set_data_from_numpy(input1_data)
+
+        outputs = [
+            httpclient.InferRequestedOutput("OUTPUT0"),
+            httpclient.InferRequestedOutput("OUTPUT1"),
+        ]
+
+        response = client.infer(model_name, inputs, request_id=str(1), outputs=outputs)
+
+        output0_data = response.as_numpy("OUTPUT0")
+        output1_data = response.as_numpy("OUTPUT1")
+
+        print(
+            "INPUT0 ({}) + INPUT1 ({}) = OUTPUT0 ({})".format(
+                input0_data, input1_data, output0_data
+            )
+        )
+        print(
+            "INPUT0 ({}) - INPUT1 ({}) = OUTPUT0 ({})".format(
+                input0_data, input1_data, output1_data
+            )
+        )
+
+        if not np.allclose(input0_data + input1_data, output0_data):
+            print(model_name + " error: incorrect sum")
+            return False
+
+        if not np.allclose(input0_data - input1_data, output1_data):
+            print(model_name + " error: incorrect difference")
+            return False
+
+        print("PASS: " + model_name)
+        return True
+
+
+def infer_model_with_parameter_file(batch_size, data_offset=0):
+    model_name = "neuralnet"
+    test_data_file = "neuralnet_test_data.json"
+    np_dtype = np.single
+
+    # prepare input data
+    with open(test_data_file) as f:
+        test_data = json.load(f)
+    input_data = np.array(test_data["input_data"], dtype=np_dtype)
+    input_data = input_data[data_offset : (data_offset + batch_size)]
+    labels = test_data["labels"][data_offset : (data_offset + batch_size)]
+
+    # inference
+    with httpclient.InferenceServerClient("localhost:8000") as client:
+        inputs = [
+            httpclient.InferInput(
+                "INPUT", input_data.shape, np_to_triton_dtype(input_data.dtype)
+            )
+        ]
+        inputs[0].set_data_from_numpy(input_data)
+
+        response = client.infer(model_name, inputs, request_id=str(1))
+        output_data = response.as_numpy("OUTPUT")
+        output_data_max = np.max(output_data, axis=1)
+
+        print("Inference result: " + str(output_data))
+        print("Inference result (max): " + str(output_data_max))
+        print("Expected result: " + str(labels))
+
+        if not np.all(np.isclose(np.max(output_data, axis=1), labels, atol=8)):
+            print(model_name + " error: incorrect result")
+            return False
+
+    print("PASS: " + model_name)
+    return True
+
+
+def parallel_infer_a_full_dynamic_batch(max_batch_size):
+    batch_size = 1
+    success = True
+    with concurrent.futures.ThreadPoolExecutor() as pool:
+        threads = []
+        for i in range(max_batch_size // batch_size):
+            t = pool.submit(infer_model_with_parameter_file, batch_size, i)
+            threads.append(t)
+        for t in threads:
+            success &= t.result()
+    return success
+
+
+if __name__ == "__main__":
+    success = infer_model_without_parameter_file()
+    success &= infer_model_with_parameter_file(batch_size=4)
+    success &= parallel_infer_a_full_dynamic_batch(max_batch_size=8)
+    if not success:
+        sys.exit(1)
+    sys.exit(0)
diff --git a/qa/L0_pytorch_python_runtime/test.sh b/qa/L0_pytorch_python_runtime/test.sh
new file mode 100755
index 0000000000..23ce022955
--- /dev/null
+++ b/qa/L0_pytorch_python_runtime/test.sh
@@ -0,0 +1,156 @@
+#!/bin/bash
+# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+DATA_DIR=/data/inferenceserver/${REPO_VERSION}
+IMAGE_DIR="/opt/tritonserver/qa/images"
+SERVER=/opt/tritonserver/bin/tritonserver
+IMAGE_CLIENT="/opt/tritonserver/qa/clients/image_client.py"
+BACKENDS="/opt/tritonserver/backends"
+source ../common/util.sh
+
+if [ ! -f "$BACKENDS/pytorch/pb_exec_env_model.py.tar.gz" ]; then
+    PYTORCH_BACKEND_REPO_TAG=${PYTORCH_BACKEND_REPO_TAG:="main"}
+    rm -rf pytorch_backend
+    git clone --single-branch --depth=1 -b $PYTORCH_BACKEND_REPO_TAG https://github.com/triton-inference-server/pytorch_backend
+    (cd pytorch_backend/tools && \
+        ./gen_pb_exec_env.sh && \
+        mv pb_exec_env_model.py.tar.gz $BACKENDS/pytorch)
+fi
+
+rm -f *.log
+RET=0
+
+#
+# Unit tests
+#
+rm -rf py_runtime_exec_env py_runtime_exec_env.tar.gz py_runtime.py
+cp $BACKENDS/pytorch/model.py py_runtime.py
+cp $BACKENDS/pytorch/pb_exec_env_model.py.tar.gz py_runtime_exec_env.tar.gz
+mkdir py_runtime_exec_env && tar -xzf py_runtime_exec_env.tar.gz -C py_runtime_exec_env
+
+set +e
+
+UNIT_TEST_ENV="source py_runtime_exec_env/bin/activate && exec env LD_LIBRARY_PATH=`pwd`/py_runtime_exec_env/lib:$LD_LIBRARY_PATH"
+UNIT_TEST_LOG="./unit_test.log"
+bash -c "$UNIT_TEST_ENV python3 unit_test.py" > $UNIT_TEST_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed PyTorch Python backend based runtime unit test\n***"
+    cat $UNIT_TEST_LOG
+    RET=1
+fi
+
+set -e
+
+#
+# End-to-end inference tests
+#
+rm -rf models && mkdir models
+cp -r $DATA_DIR/pytorch_model_store/* models
+cp -r $DATA_DIR/libtorch_model_store/resnet50_libtorch models && \
+    sed -i "/platform/d" models/resnet50_libtorch/config.pbtxt && \
+    echo "backend: \"pytorch\"" >> models/resnet50_libtorch/config.pbtxt && \
+    echo "runtime: \"model.py\"" >> models/resnet50_libtorch/config.pbtxt && \
+    echo "instance_group: [{ kind: KIND_MODEL }]" >> models/resnet50_libtorch/config.pbtxt
+mv models/neuralnet/1/test_data.json neuralnet_test_data.json
+
+SERVER_ARGS="--model-repository=models --log-verbose=1"
+SERVER_LOG="./infer.server.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    RET=1
+else
+    set +e
+
+    # Check correct model instance initialization
+    EXPECTED_LOG_MSGS=(
+        'Loading '"'"'resnet50_libtorch'"'"' as TorchScript'
+        'Torch parallelism settings for '"'"'addsub'"'"': NUM_THREADS = 1; NUM_INTEROP_THREADS = 1;'
+        'Torch parallelism settings for '"'"'neuralnet'"'"': NUM_THREADS = 4; NUM_INTEROP_THREADS = 2;'
+        'Torch parallelism settings for '"'"'resnet50_libtorch'"'"': NUM_THREADS = 1; NUM_INTEROP_THREADS = 1;'
+        ''"'"'torch.compile'"'"' optional parameter(s) for '"'"'addsub'"'"': {'"'"'disable'"'"': True}'
+        ''"'"'torch.compile'"'"' optional parameter(s) for '"'"'neuralnet'"'"': {}'
+        ''"'"'torch.compile'"'"' optional parameter(s) for '"'"'resnet50_libtorch'"'"': {}'
+    )
+    for EXPECTED_LOG_MSG in "${EXPECTED_LOG_MSGS[@]}"; do
+        grep "$EXPECTED_LOG_MSG" $SERVER_LOG
+        if [ $? -ne 0 ]; then
+            echo -e "\n***\n*** Cannot find \"$EXPECTED_LOG_MSG\" on server log. \n***"
+            cat $SERVER_LOG
+            RET=1
+        fi
+    done
+
+    # Infer TorchScript model
+    CLIENT_LOG="./infer.torchscript.log"
+    python $IMAGE_CLIENT -m "resnet50_libtorch" -s INCEPTION -c 1 -b 2 "$IMAGE_DIR/vulture.jpeg" > $CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed to inference TorchScript model\n***"
+        cat $CLIENT_LOG
+        RET=1
+    fi
+
+    # Infer PyTorch models
+    CLIENT_LOG="./infer.pytorch.log"
+    python infer.py > $CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed to inference PyTorch models\n***"
+        cat $CLIENT_LOG
+        RET=1
+    fi
+
+    set -e
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+fi
+
+#
+# Print result and exit
+#
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+exit $RET
diff --git a/qa/L0_pytorch_python_runtime/unit_test.py b/qa/L0_pytorch_python_runtime/unit_test.py
new file mode 100755
index 0000000000..5b69f23a8a
--- /dev/null
+++ b/qa/L0_pytorch_python_runtime/unit_test.py
@@ -0,0 +1,154 @@
+#!/usr/bin/env python3
+
+# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+import unittest
+
+import torch
+
+# satisfy Python runtime import requirements
+sys.modules["triton_python_backend_utils"] = unittest.mock.MagicMock()
+# import modules from Python runtime to be tested
+from py_runtime import _gather_torch_tensors, _scatter_torch_tensors
+
+
+class PyTorchPythonBackendRuntimeUnittest(unittest.TestCase):
+    # _gather_scatter_cases: [(tensors_scatter, tensors_gather, sections), ...]
+    #   tensors_scatter: [an_infer_request, ...]
+    #     an_infer_request: [a_torch_tensor_with_batch_dimension, ...]
+    #   tensors_gather: [a_torch_tensor_gathering_all_requests, ...]
+    #   sections: [batch_size_of_the_corresponding_infer_request, ...]
+    _gather_scatter_cases = [
+        # shape [batch=1, 1]
+        ([[torch.tensor([[1]])]], [torch.tensor([[1]])], [1]),
+        # shape [batch=1, 2]
+        ([[torch.tensor([[1, 2]])]], [torch.tensor([[1, 2]])], [1]),
+        # shape [batch=1, 2, 4]
+        ([[torch.arange(8).reshape(1, 2, 4)]], [torch.arange(8).reshape(1, 2, 4)], [1]),
+        # shape [batch=3, 1]
+        ([[torch.arange(3).reshape(3, 1)]], [torch.arange(3).reshape(3, 1)], [3]),
+        # shapes ([batch=1, 1], [batch=1, 2])
+        (
+            [[torch.tensor([[1]]), torch.tensor([[2, 3]])]],
+            [torch.tensor([[1]]), torch.tensor([[2, 3]])],
+            [1],
+        ),
+        # scatter shape [batch=1, 1] x 2 -> gather shape [batch=2, 1]
+        (
+            [[torch.tensor([[1]])], [torch.tensor([[2]])]],
+            [torch.tensor([[1], [2]])],
+            [1, 1],
+        ),
+        # scatter shape [batch=1, 2, 1] x 3 -> gather shape [batch=3, 2, 1]
+        (
+            [[torch.tensor([[[i], [i + 3]]])] for i in range(3)],
+            [torch.tensor([[[0], [3]], [[1], [4]], [[2], [5]]])],
+            [1, 1, 1],
+        ),
+        # scatter shape [batch=1, 1] & [batch=2, 1] -> gather shape [batch=3, 1]
+        (
+            [[torch.tensor([[1]])], [torch.tensor([[2], [3]])]],
+            [torch.tensor([[1], [2], [3]])],
+            [1, 2],
+        ),
+        # scatter shape [batch=3, 1, 1] & [batch=1, 1, 1] & [batch=2, 1, 1]
+        # -> gather shape [batch=6, 1, 1]
+        (
+            [
+                [torch.tensor([[[0]], [[1]], [[2]]])],
+                [torch.tensor([[[3]]])],
+                [torch.tensor([[[4]], [[5]]])],
+            ],
+            [torch.arange(6).reshape(6, 1, 1)],
+            [3, 1, 2],
+        ),
+        # scatter shapes ([batch=3, 1, 1], [batch=3, 2]) & ([batch=2, 1, 1], [batch=2, 2])
+        # -> gather shapes ([batch=5, 1, 1], [batch=5, 2])
+        (
+            [
+                [
+                    torch.tensor([[[0]], [[1]], [[2]]]),
+                    torch.tensor([[5, 6], [7, 8], [9, 10]]),
+                ],
+                [torch.tensor([[[3]], [[4]]]), torch.tensor([[11, 12], [13, 14]])],
+            ],
+            [
+                torch.arange(5).reshape(5, 1, 1),
+                torch.arange(start=5, end=15).reshape(5, 2),
+            ],
+            [3, 2],
+        ),
+    ]
+
+    def test_gather_torch_tensors(self):
+        for (
+            tensors_scatter,
+            expected_tensors_gather,
+            expected_sections,
+        ) in self._gather_scatter_cases:
+            tensors_gather, sections = _gather_torch_tensors(tensors_scatter)
+
+            self.assertIsInstance(tensors_gather, list)
+            self.assertEqual(len(tensors_gather), len(expected_tensors_gather))
+            for j in range(len(expected_tensors_gather)):
+                expected_tensor = expected_tensors_gather[j]
+                tensor = tensors_gather[j]
+                self.assertIsInstance(tensor, torch.Tensor)
+                self.assertTrue(torch.equal(tensor, expected_tensor))
+
+            self.assertIsInstance(sections, list)
+            self.assertEqual(len(sections), len(expected_sections))
+            for i in range(len(expected_sections)):
+                expected_section = expected_sections[i]
+                section = sections[i]
+                self.assertIsInstance(section, int)
+                self.assertEqual(section, expected_section)
+
+    def test_scatter_torch_tensors(self):
+        for (
+            expected_tensors_scatter,
+            tensors_gather,
+            sections,
+        ) in self._gather_scatter_cases:
+            tensors_scatter = _scatter_torch_tensors(tensors_gather, sections)
+            self.assertIsInstance(tensors_scatter, list)
+            self.assertEqual(len(tensors_scatter), len(expected_tensors_scatter))
+            for i in range(len(expected_tensors_scatter)):
+                expected_tensors = expected_tensors_scatter[i]
+                tensors = tensors_scatter[i]
+                self.assertIsInstance(tensors, list)
+                self.assertEqual(len(tensors), len(expected_tensors))
+                for j in range(len(expected_tensors)):
+                    expected_tensor = expected_tensors[j]
+                    tensor = tensors[j]
+                    self.assertIsInstance(tensor, torch.Tensor)
+                    self.assertTrue(torch.equal(tensor, expected_tensor))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_query/models/query/config.pbtxt b/qa/L0_query/models/query/config.pbtxt
new file mode 100644
index 0000000000..5251ab0422
--- /dev/null
+++ b/qa/L0_query/models/query/config.pbtxt
@@ -0,0 +1,47 @@
+# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+name: "query"
+backend: "query"
+max_batch_size: 0
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_UINT8
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_UINT8
+    dims: [ -1 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_UINT8
+    dims: [ -1 ]
+  }
+]
diff --git a/qa/L0_query/query_e2e.py b/qa/L0_query/query_e2e.py
new file mode 100755
index 0000000000..048a4a8d41
--- /dev/null
+++ b/qa/L0_query/query_e2e.py
@@ -0,0 +1,225 @@
+#!/usr/bin/env python
+# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import unittest
+
+import numpy as np
+import test_util as tu
+import tritonclient.grpc as tritongrpcclient
+import tritonclient.http as tritonhttpclient
+from tritonclient.utils import InferenceServerException
+from tritonclient.utils import cuda_shared_memory as cudashm
+
+
+class QueryTest(tu.TestResultCollector):
+    def test_http(self):
+        triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
+        inputs = []
+        inputs.append(tritonhttpclient.InferInput("INPUT", [1], "UINT8"))
+        inputs[0].set_data_from_numpy(np.arange(1, dtype=np.uint8))
+
+        try:
+            triton_client.infer(model_name="query", inputs=inputs)
+            self.assertTrue(False, "expect error with query information")
+        except InferenceServerException as ex:
+            self.assertTrue("OUTPUT0 CPU 0" in ex.message())
+            self.assertTrue("OUTPUT1 CPU 0" in ex.message())
+
+    def test_http_shared_memory(self):
+        triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
+        inputs = []
+        inputs.append(tritonhttpclient.InferInput("INPUT", [1], "UINT8"))
+        inputs[0].set_data_from_numpy(np.arange(1, dtype=np.uint8))
+
+        # Set up CUDA shared memory for outputs
+        triton_client.unregister_system_shared_memory()
+        triton_client.unregister_cuda_shared_memory()
+        shm_op0_handle = cudashm.create_shared_memory_region("output0_data", 4, 0)
+        shm_op1_handle = cudashm.create_shared_memory_region("output1_data", 4, 0)
+        triton_client.register_cuda_shared_memory(
+            "output0_data", cudashm.get_raw_handle(shm_op0_handle), 0, 4
+        )
+        triton_client.register_cuda_shared_memory(
+            "output1_data", cudashm.get_raw_handle(shm_op1_handle), 0, 4
+        )
+        outputs = []
+        outputs.append(
+            tritonhttpclient.InferRequestedOutput("OUTPUT0", binary_data=True)
+        )
+        outputs[-1].set_shared_memory("output0_data", 4)
+
+        outputs.append(
+            tritonhttpclient.InferRequestedOutput("OUTPUT1", binary_data=True)
+        )
+        outputs[-1].set_shared_memory("output1_data", 4)
+
+        try:
+            triton_client.infer(model_name="query", inputs=inputs, outputs=outputs)
+            self.assertTrue(False, "expect error with query information")
+        except InferenceServerException as ex:
+            self.assertTrue("OUTPUT0 GPU 0" in ex.message())
+            self.assertTrue("OUTPUT1 GPU 0" in ex.message())
+
+        cudashm.destroy_shared_memory_region(shm_op0_handle)
+        cudashm.destroy_shared_memory_region(shm_op1_handle)
+        triton_client.unregister_system_shared_memory()
+        triton_client.unregister_cuda_shared_memory()
+
+    def test_http_out_of_shared_memory(self):
+        triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
+        inputs = []
+        inputs.append(tritonhttpclient.InferInput("INPUT", [1], "UINT8"))
+        inputs[0].set_data_from_numpy(np.arange(1, dtype=np.uint8))
+
+        # Set up too small CUDA shared memory for outputs, expect query
+        # returns default value
+        triton_client.unregister_system_shared_memory()
+        triton_client.unregister_cuda_shared_memory()
+        shm_op0_handle = cudashm.create_shared_memory_region("output0_data", 1, 0)
+        shm_op1_handle = cudashm.create_shared_memory_region("output1_data", 1, 0)
+        triton_client.register_cuda_shared_memory(
+            "output0_data", cudashm.get_raw_handle(shm_op0_handle), 0, 1
+        )
+        triton_client.register_cuda_shared_memory(
+            "output1_data", cudashm.get_raw_handle(shm_op1_handle), 0, 1
+        )
+        outputs = []
+        outputs.append(
+            tritonhttpclient.InferRequestedOutput("OUTPUT0", binary_data=True)
+        )
+        outputs[-1].set_shared_memory("output0_data", 1)
+
+        outputs.append(
+            tritonhttpclient.InferRequestedOutput("OUTPUT1", binary_data=True)
+        )
+        outputs[-1].set_shared_memory("output1_data", 1)
+
+        try:
+            triton_client.infer(model_name="query", inputs=inputs, outputs=outputs)
+            self.assertTrue(False, "expect error with query information")
+        except InferenceServerException as ex:
+            self.assertTrue("OUTPUT0 CPU 0" in ex.message())
+            self.assertTrue("OUTPUT1 CPU 0" in ex.message())
+
+        cudashm.destroy_shared_memory_region(shm_op0_handle)
+        cudashm.destroy_shared_memory_region(shm_op1_handle)
+        triton_client.unregister_system_shared_memory()
+        triton_client.unregister_cuda_shared_memory()
+
+    def test_grpc(self):
+        triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
+        inputs = []
+        inputs.append(tritongrpcclient.InferInput("INPUT", [1], "UINT8"))
+        inputs[0].set_data_from_numpy(np.arange(1, dtype=np.uint8))
+
+        try:
+            triton_client.infer(model_name="query", inputs=inputs)
+            self.assertTrue(False, "expect error with query information")
+        except InferenceServerException as ex:
+            self.assertTrue("OUTPUT0 CPU 0" in ex.message())
+            self.assertTrue("OUTPUT1 CPU 0" in ex.message())
+
+    def test_grpc_shared_memory(self):
+        triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
+        inputs = []
+        inputs.append(tritongrpcclient.InferInput("INPUT", [1], "UINT8"))
+        inputs[0].set_data_from_numpy(np.arange(1, dtype=np.uint8))
+
+        # Set up CUDA shared memory for outputs
+        triton_client.unregister_system_shared_memory()
+        triton_client.unregister_cuda_shared_memory()
+        shm_op0_handle = cudashm.create_shared_memory_region("output0_data", 4, 0)
+        shm_op1_handle = cudashm.create_shared_memory_region("output1_data", 4, 0)
+        triton_client.register_cuda_shared_memory(
+            "output0_data", cudashm.get_raw_handle(shm_op0_handle), 0, 4
+        )
+        triton_client.register_cuda_shared_memory(
+            "output1_data", cudashm.get_raw_handle(shm_op1_handle), 0, 4
+        )
+        outputs = []
+        outputs.append(tritongrpcclient.InferRequestedOutput("OUTPUT0"))
+        outputs[-1].set_shared_memory("output0_data", 4)
+
+        outputs.append(tritongrpcclient.InferRequestedOutput("OUTPUT1"))
+        outputs[-1].set_shared_memory("output1_data", 4)
+
+        try:
+            triton_client.infer(model_name="query", inputs=inputs, outputs=outputs)
+            self.assertTrue(False, "expect error with query information")
+        except InferenceServerException as ex:
+            self.assertTrue("OUTPUT0 GPU 0" in ex.message())
+            self.assertTrue("OUTPUT1 GPU 0" in ex.message())
+
+        cudashm.destroy_shared_memory_region(shm_op0_handle)
+        cudashm.destroy_shared_memory_region(shm_op1_handle)
+        triton_client.unregister_system_shared_memory()
+        triton_client.unregister_cuda_shared_memory()
+
+    def test_grpc_out_of_shared_memory(self):
+        triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
+        inputs = []
+        inputs.append(tritongrpcclient.InferInput("INPUT", [1], "UINT8"))
+        inputs[0].set_data_from_numpy(np.arange(1, dtype=np.uint8))
+
+        # Set up too small CUDA shared memory for outputs, expect query
+        # returns default value
+        triton_client.unregister_system_shared_memory()
+        triton_client.unregister_cuda_shared_memory()
+        shm_op0_handle = cudashm.create_shared_memory_region("output0_data", 1, 0)
+        shm_op1_handle = cudashm.create_shared_memory_region("output1_data", 1, 0)
+        triton_client.register_cuda_shared_memory(
+            "output0_data", cudashm.get_raw_handle(shm_op0_handle), 0, 1
+        )
+        triton_client.register_cuda_shared_memory(
+            "output1_data", cudashm.get_raw_handle(shm_op1_handle), 0, 1
+        )
+        outputs = []
+        outputs.append(tritongrpcclient.InferRequestedOutput("OUTPUT0"))
+        outputs[-1].set_shared_memory("output0_data", 1)
+
+        outputs.append(tritongrpcclient.InferRequestedOutput("OUTPUT1"))
+        outputs[-1].set_shared_memory("output1_data", 1)
+
+        try:
+            triton_client.infer(model_name="query", inputs=inputs, outputs=outputs)
+            self.assertTrue(False, "expect error with query information")
+        except InferenceServerException as ex:
+            self.assertTrue("OUTPUT0 CPU 0" in ex.message())
+            self.assertTrue("OUTPUT1 CPU 0" in ex.message())
+
+        cudashm.destroy_shared_memory_region(shm_op0_handle)
+        cudashm.destroy_shared_memory_region(shm_op1_handle)
+        triton_client.unregister_system_shared_memory()
+        triton_client.unregister_cuda_shared_memory()
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_query/test.sh b/qa/L0_query/test.sh
new file mode 100755
index 0000000000..153cd69381
--- /dev/null
+++ b/qa/L0_query/test.sh
@@ -0,0 +1,110 @@
+#!/bin/bash
+# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+source ../common/util.sh
+
+RET=0
+
+TEST_LOG="./query_test.log"
+CLIENT_LOG="./query_client.log"
+TEST_EXEC=./query_test
+TEST_PY=./query_e2e.py
+EXPECTED_NUM_TESTS="6"
+TEST_RESULT_FILE='test_results.txt'
+
+
+export CUDA_VISIBLE_DEVICES=0
+
+rm -fr *.log
+
+unset TEST_FAIL_WITH_QUERY_RESULT
+unset TEST_BYTE_SIZE
+
+set +e
+LD_LIBRARY_PATH=/opt/tritonserver/lib:$LD_LIBRARY_PATH $TEST_EXEC >>$TEST_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Query Unit Test Failed\n***"
+    RET=1
+fi
+set -e
+
+export TEST_FAIL_WITH_QUERY_RESULT=1
+export TEST_BYTE_SIZE=4
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=`pwd`/models"
+SERVER_LOG="./inference_server.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python $TEST_PY >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+unset TEST_FAIL_WITH_QUERY_RESULT
+unset TEST_BYTE_SIZE
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $TEST_LOG
+    cat $CLIENT_LOG
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_rate_limiter/rate_limiter_test.py b/qa/L0_rate_limiter/rate_limiter_test.py
new file mode 100755
index 0000000000..4bc7b82e70
--- /dev/null
+++ b/qa/L0_rate_limiter/rate_limiter_test.py
@@ -0,0 +1,316 @@
+#!/usr/bin/env python3
+
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import functools
+import os
+import threading
+import time
+import unittest
+
+import numpy as np
+import sequence_util as su
+import tritongrpcclient as grpcclient
+from tritonclientutils import *
+
+_inference_count = 80
+_inference_concurrency = 8
+_response_wait_time_s = 10
+_finish_wait_time_s = 10
+_exit_signal = False
+
+
+class AsyncGrpcRunner:
+    def __init__(self, tester, server_url, model_name, delay_ms):
+        self._tester = tester
+        self._server_url = server_url
+        self._model_name = model_name
+        self._delay_ms = delay_ms
+
+        self._input_data = []
+        self._shape = [1, 1]
+        self._dtype = np.float32
+        self._results = {}
+        self._processed_all = False
+        self._errors = []
+        self._inflight_requests = 0
+        self._num_sent_request = 0
+        self._processed_request_count = 0
+        self._sync = threading.Condition()
+        self._req_thread = threading.Thread(target=self.req_loop, daemon=True)
+
+    def _on_result(self, result, error):
+        with self._sync:
+            if error:
+                self._errors.append(error)
+            else:
+                this_id = int(result.get_response().id)
+                self._results[this_id] = result
+            self._inflight_requests -= 1
+            self._sync.notify_all()
+
+    def req_loop(self):
+        client = grpcclient.InferenceServerClient(self._server_url)
+
+        inputs = [
+            grpcclient.InferInput(
+                "INPUT0", self._shape, np_to_triton_dtype(self._dtype)
+            )
+        ]
+
+        self._inflight_requests = 0
+        start_stat = client.get_inference_statistics(model_name=self._model_name)
+        global _exit_signal
+
+        while not _exit_signal:
+            input_numpy = np.random.random_sample(self._shape).astype(self._dtype)
+            inputs[0].set_data_from_numpy(input_numpy)
+            self._input_data.append(input_numpy)
+
+            with self._sync:
+
+                def _check_can_send():
+                    return self._inflight_requests < _inference_concurrency
+
+                can_send = self._sync.wait_for(
+                    _check_can_send, timeout=_response_wait_time_s
+                )
+                self._tester.assertTrue(
+                    can_send,
+                    "client didn't receive a response within {}s".format(
+                        _response_wait_time_s
+                    ),
+                )
+
+                callback = functools.partial(AsyncGrpcRunner._on_result, self)
+                client.async_infer(
+                    model_name=self._model_name,
+                    inputs=inputs,
+                    request_id="{}".format(self._num_sent_request),
+                    callback=callback,
+                )
+                self._inflight_requests += 1
+                self._num_sent_request += 1
+                if self._num_sent_request == _inference_count:
+                    _exit_signal = True
+                time.sleep(self._delay_ms / 1000.0)
+
+        # wait till receive all requested data
+        with self._sync:
+
+            def _all_processed():
+                return self._inflight_requests == 0
+
+            self._processed_all = self._sync.wait_for(
+                _all_processed, _finish_wait_time_s
+            )
+            self._tester.assertTrue(
+                self._processed_all,
+                "the processing didn't complete even after waiting for {}s".format(
+                    _finish_wait_time_s
+                ),
+            )
+
+        end_stat = client.get_inference_statistics(model_name=self._model_name)
+        self._processed_request_count = (
+            end_stat.model_stats[0].inference_stats.success.count
+            - start_stat.model_stats[0].inference_stats.success.count
+        )
+
+    def start(self):
+        self._req_thread.start()
+
+    def _validate_run(self):
+        if len(self._errors) != 0:
+            raise self._errors[0]
+        self._tester.assertEqual(
+            len(self._input_data),
+            len(self._results.keys()),
+            "the number of inputs and output should match",
+        )
+        for i in range(len(self._input_data)):
+            self._tester.assertFalse(
+                (self._input_data[i] != self._results[i].as_numpy("OUTPUT0")).any(),
+                "the output data should match with the input data",
+            )
+
+    def join(self):
+        self._req_thread.join()
+        self._validate_run()
+
+
+class RateLimiterTest(su.SequenceBatcherTestUtil):
+    def stress_models(self, model_names, delay_ms=0):
+        infer_counts = {}
+        try:
+            runners = []
+            for model_name in model_names:
+                runners.append(
+                    AsyncGrpcRunner(
+                        self, "localhost:8001", model_name, delay_ms=delay_ms
+                    )
+                )
+            for r in runners:
+                r.start()
+            for r in runners:
+                r.join()
+                infer_counts[r._model_name] = r._processed_request_count
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+        return infer_counts
+
+    def test_single_model(self):
+        # Send all the inference requests to a single model.
+        # Simple sanity check.
+
+        model_names = ["custom_zero_1_float32"]
+        infer_counts = self.stress_models(model_names)
+
+        self.assertEqual(infer_counts[model_names[0]], _inference_count)
+
+    def test_cross_model_prioritization_limited_resource(self):
+        # Sends requests to two models, one operating at
+        # priority of 1 and other at 2 respectively.
+        # The available resource counts doesn't allow models
+        # to execute simultaneously.
+
+        model_names = ["custom_zero_1_float32", "custom_zero_1_float32_v2"]
+
+        # TODO: Validate the priority and resource counts are set correctly
+
+        infer_counts = self.stress_models(model_names)
+        infer_ratio = infer_counts[model_names[0]] / float(infer_counts[model_names[1]])
+
+        self.assertGreater(
+            infer_ratio,
+            1.80,
+            "Got infer ratio across models {}, expected closer to 2".format(
+                infer_ratio
+            ),
+        )
+
+    def test_cross_model_prioritization_plenty_resource(self):
+        # Sends requests to two models, one operating at
+        # priority of 1 and other at 2 respectively.
+        # The available resource counts wll allow both models
+        # to run simultaneously.
+
+        model_names = ["custom_zero_1_float32", "custom_zero_1_float32_v2"]
+
+        # TODO: Validate the priority and resource counts are set correctly
+
+        infer_counts = self.stress_models(model_names)
+        infer_diff = abs(infer_counts[model_names[0]] - infer_counts[model_names[1]])
+
+        self.assertGreater(
+            10,
+            infer_diff,
+            "Got infer difference between models {}, expected closer to 0".format(
+                infer_diff
+            ),
+        )
+
+    def test_single_model_dynamic_batching(self):
+        # Send all the inference requests with a delay to a model
+
+        self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
+        model_names = ["custom_zero_1_float32"]
+        infer_counts = self.stress_models(model_names, delay_ms=100)
+
+        self.assertEqual(infer_counts[model_names[0]], _inference_count)
+
+        # Check whether all requests used batch size of 4 or not
+        client = grpcclient.InferenceServerClient("localhost:8001")
+        stats = client.get_inference_statistics(model_names[0], "1")
+        self.assertEqual(len(stats.model_stats), 1, "expect 1 model stats")
+
+        batch_stats = stats.model_stats[0].batch_stats
+        self.assertEqual(
+            len(batch_stats),
+            1,
+            "expected single batch-size, got {}".format(len(batch_stats)),
+        )
+
+        for batch_stat in batch_stats:
+            self.assertEqual(
+                batch_stat.batch_size,
+                4,
+                "unexpected batch-size {}".format(batch_stat.batch_size),
+            )
+            # Get count from one of the stats
+            self.assertEqual(
+                batch_stat.compute_infer.count,
+                _inference_count / 4,
+                "expected model-execution-count {} for batch size {}, got {}".format(
+                    _inference_count / 4, 4, batch_stat.compute_infer.count
+                ),
+            )
+
+    def test_single_model_sequence_batching(self):
+        # Send one sequence and check for correct accumulator
+        # result. The result should be returned immediately.
+        # This test checks whether all the requests are
+        # directed to the same instance.
+
+        try:
+            model_name = "custom_sequence_int32"
+            self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
+            self.check_sequence(
+                "custom",
+                model_name,
+                np.int32,
+                5,
+                (4000, None),
+                # (flag_str, value, (ls_ms, gt_ms), (pre_delay, post_delay))
+                (
+                    ("start", 1, None, None),
+                    (None, 2, None, None),
+                    (None, 3, None, None),
+                    (None, 4, None, None),
+                    (None, 5, None, None),
+                    (None, 6, None, None),
+                    (None, 7, None, None),
+                    (None, 8, None, None),
+                    ("end", 9, None, None),
+                ),
+                45,
+                "grpc",
+            )
+
+            self.check_deferred_exception()
+            self.check_status(model_name, {1: 9}, 9, 9)
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_rate_limiter/test.sh b/qa/L0_rate_limiter/test.sh
new file mode 100755
index 0000000000..334af99e4c
--- /dev/null
+++ b/qa/L0_rate_limiter/test.sh
@@ -0,0 +1,472 @@
+#!/bin/bash
+# Copyright 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+CLIENT_LOG="./client.log"
+RATE_LIMITER_TEST=rate_limiter_test.py
+TEST_RESULT_FILE='test_results.txt'
+
+MODELDIR=${MODELDIR:=`pwd`}
+DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"}
+TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
+SERVER=${TRITON_DIR}/bin/tritonserver
+BACKEND_DIR=${TRITON_DIR}/backends
+
+
+SERVER_ARGS_EXTRA="--backend-directory=${BACKEND_DIR}"
+source ../common/util.sh
+
+RET=0
+
+rm -f *.log
+rm -fr ./custom_models && mkdir ./custom_models && \
+cp -r ../custom_models/custom_zero_1_float32 ./custom_models/. && \
+cp -r ../custom_models/custom_sequence_int32 ./custom_models/. && \
+mkdir -p ./custom_models/custom_zero_1_float32/1 && \
+cp -r ./custom_models/custom_zero_1_float32 ./custom_models/custom_zero_1_float32_v2
+
+
+(cd custom_models/custom_zero_1_float32 && \
+        sed -i "s/dims:.*\[.*\]/dims: \[ -1 \]/g" config.pbtxt && \
+        sed -i "s/max_batch_size:.*/max_batch_size: 4/g" config.pbtxt && \
+        echo "instance_group [{"  >> config.pbtxt && \
+        echo "kind: KIND_GPU count: 1"  >> config.pbtxt && \
+        echo "rate_limiter { resources [{name: \"resource1\" count: 4 }]}"  >> config.pbtxt && \
+        echo "}]" >> config.pbtxt && \
+        echo "parameters [" >> config.pbtxt && \
+        echo "{ key: \"execute_delay_ms\"; value: { string_value: \"100\" }}" >> config.pbtxt && \
+        echo "]" >> config.pbtxt)
+
+
+(cd custom_models/custom_zero_1_float32_v2 && \
+        sed -i "s/custom_zero_1_float32/custom_zero_1_float32_v2/g" config.pbtxt && \
+        sed -i "s/dims:.*\[.*\]/dims: \[ -1 \]/g" config.pbtxt && \
+        sed -i "s/max_batch_size:.*/max_batch_size: 4/g" config.pbtxt && \
+        echo "instance_group [{"  >> config.pbtxt && \
+        echo "kind: KIND_GPU count: 1"  >> config.pbtxt && \
+        echo "rate_limiter { resources [{name: \"resource1\" count: 2 }, {name: \"resource2\" global: True count: 2 }] priority: 2}"  >> config.pbtxt && \
+        echo "}]" >> config.pbtxt && \
+        echo "parameters [" >> config.pbtxt && \
+        echo "{ key: \"execute_delay_ms\"; value: { string_value: \"100\" }}" >> config.pbtxt && \
+        echo "]" >> config.pbtxt)
+
+##
+## Test cases that fails to load models
+##
+# Case1: Both resource lesser than required
+SERVER_ARGS="--rate-limit=execution_count --rate-limit-resource=resource1:1 --model-repository=$MODELDIR/custom_models"
+SERVER_LOG="./inference_server_r1.log"
+run_server
+if [ "$SERVER_PID" != "0" ]; then
+    echo -e "\n***\n*** Unexpected success with resource count 1\n***"
+    RET=1
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+fi
+
+set +e
+grep "Resource count for \"resource1\" is limited to 1 which will prevent scheduling of one or more model instances, the minimum required count is 4" $SERVER_LOG
+if [ $? -ne 0 ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Failed. Expected error message while loading the model \"custom_zero_1_float32\"\n***"
+    RET=1
+fi
+
+set -e
+
+# Case2: resources sufficient only for one model
+SERVER_ARGS="--rate-limit=execution_count --rate-limit-resource=resource1:3 --rate-limit-resource=resource2:2 --model-repository=$MODELDIR/custom_models"
+SERVER_LOG="./inference_server_r3.log"
+run_server
+if [ "$SERVER_PID" != "0" ]; then
+    echo -e "\n***\n*** Unexpected success with resource count 1\n***"
+    RET=1
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+fi
+
+set +e
+grep "Resource count for \"resource1\" is limited to 3 which will prevent scheduling of one or more model instances, the minimum required count is 4" $SERVER_LOG
+if [ $? -ne 0 ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Failed. Expected error message while loading the model \"custom_zero_1_float32\"\n***"
+    RET=1
+fi
+
+set -e
+
+# Case3: Resource specified only for specific device id 10 and not for the GPU that loads the model instance.
+SERVER_ARGS="--rate-limit=execution_count --rate-limit-resource=resource1:10:10 --rate-limit-resource=resource2:2 --model-repository=$MODELDIR/custom_models"
+SERVER_LOG="./inference_server_rdevice.log"
+run_server
+if [ "$SERVER_PID" != "0" ]; then
+    echo -e "\n***\n*** Unexpected success with resource count 1\n***"
+    RET=1
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+fi
+
+set +e
+grep "Resource count for \"resource1\" is limited to 0 which will prevent scheduling of one or more model instances, the minimum required count is 4" $SERVER_LOG
+if [ $? -ne 0 ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Failed. Expected error message while loading the model \"custom_zero_1_float32\"\n***"
+    RET=1
+fi
+
+set -e
+
+# Case4: Conflicting resource types in the config
+cp -r ./custom_models/custom_zero_1_float32_v2 ./custom_models/custom_zero_1_float32_v3
+(cd custom_models/custom_zero_1_float32_v3 && \
+        sed -i "s/custom_zero_1_float32_v2/custom_zero_1_float32_v3/g" config.pbtxt && \
+        sed -i "s/global: True/global: False/g " config.pbtxt)
+
+SERVER_ARGS="--rate-limit=execution_count --model-repository=$MODELDIR/custom_models"
+SERVER_LOG="./inference_server_conflict.log"
+run_server
+if [ "$SERVER_PID" != "0" ]; then
+    echo -e "\n***\n*** Unexpected success with resource count 1\n***"
+    RET=1
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+fi
+
+set +e
+grep "Resource \"resource2\" is present as both global and device-specific resource in the model configuration." $SERVER_LOG
+if [ $? -ne 0 ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Failed. Expected error message for conflicting resource types\n***"
+    RET=1
+fi
+rm -rf ./custom_models/custom_zero_1_float32_v3
+
+set -e
+
+##
+## Tests with cross-model prioritization with various cases:
+##
+# CASE1: Explicit limited resource: only allows one model to run at a time
+SERVER_ARGS="--rate-limit=execution_count --rate-limit-resource=resource1:4 --rate-limit-resource=resource2:2 --model-repository=$MODELDIR/custom_models"
+SERVER_LOG="./inference_server.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python3 $RATE_LIMITER_TEST RateLimiterTest.test_cross_model_prioritization_limited_resource >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# CASE2: Implicit Limited resource: By default, server will select max resources of one of the
+# model as available resource. This means only one model will run at a time.
+SERVER_ARGS="--rate-limit=execution_count --model-repository=$MODELDIR/custom_models"
+SERVER_LOG="./inference_server.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python3 $RATE_LIMITER_TEST RateLimiterTest.test_cross_model_prioritization_limited_resource >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# CASE3: Explicit plenty resource: Allows multiple models to run simultaneously
+SERVER_ARGS="--rate-limit=execution_count --rate-limit-resource=resource1:6 --rate-limit-resource=resource2:2 --model-repository=$MODELDIR/custom_models"
+SERVER_LOG="./inference_server.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+python3 $RATE_LIMITER_TEST RateLimiterTest.test_cross_model_prioritization_plenty_resource >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+##
+## Tests with multiple instances of the same model
+##
+# Replace the second model with a second instance with same resource requirements and priority.
+# TODO: Currently there is no way to check which instance got to run inferences hence we only
+# check the resource constraint. Add more extensive tests for multiple instances once required
+# information is made available.
+rm -rf custom_models/custom_zero_1_float32_v2
+(cd custom_models/custom_zero_1_float32 && \
+        echo "instance_group [{"  >> config.pbtxt && \
+        echo "kind: KIND_GPU count: 1"  >> config.pbtxt && \
+        echo "rate_limiter { resources [{name: \"resource1\" count: 2 }, {name: \"resource2\" global: True count: 2 }] priority: 2}"  >> config.pbtxt && \
+        echo "}]" >> config.pbtxt)
+
+# CASE1: limited resource: only allows one model instance to run at a time.
+SERVER_ARGS="--rate-limit=execution_count --model-repository=$MODELDIR/custom_models"
+SERVER_LOG="./inference_server.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+SECONDS=0
+python3 $RATE_LIMITER_TEST RateLimiterTest.test_single_model >>$CLIENT_LOG 2>&1
+LIMITED_RESOURCE_TEST_DURATION=$SECONDS
+echo -e "Limited resource time: ${LIMITED_RESOURCE_TEST_DURATION}s"
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# CASE 2: plenty resource: allows both the instances to run simultaneously
+SERVER_ARGS="--rate-limit=execution_count  --rate-limit-resource=resource1:6 --rate-limit-resource=resource2:2  --model-repository=$MODELDIR/custom_models"
+SERVER_LOG="./inference_server.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+SECONDS=0
+python3 $RATE_LIMITER_TEST RateLimiterTest.test_single_model >>$CLIENT_LOG 2>&1
+PLENTY_RESOURCE_TEST_DURATION=$SECONDS
+echo -e "Plenty resource time: ${LIMITED_RESOURCE_TEST_DURATION}s"
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+
+if [ $PLENTY_RESOURCE_TEST_DURATION -gt $LIMITED_RESOURCE_TEST_DURATION ]; then
+   echo -e "Error: Test with limited resources should take more time"
+   echo -e "\n***\n*** Test Failed\n***"
+   RET=1
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Case 3: resources sufficient only for one model instance. Hence, should fail to load
+SERVER_ARGS="--rate-limit=execution_count --rate-limit-resource=resource1:3 --rate-limit-resource=resource2:2 --model-repository=$MODELDIR/custom_models"
+SERVER_LOG="./inference_server_r3i.log"
+run_server
+if [ "$SERVER_PID" != "0" ]; then
+    echo -e "\n***\n*** Unexpected success with resource count 1\n***"
+    RET=1
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+fi
+grep "Resource count for \"resource1\" is limited to 3 which will prevent scheduling of one or more model instances, the minimum required count is 4" $SERVER_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed. Expected error message while loading the model \"custom_zero_1_float32\"\n***"
+    RET=1
+fi
+
+##
+## Tests with dynamic batching
+##
+# Despite all the possible bs being preferred triton should always form full batches as
+# the second instance would be blocked because of the resource constraints.
+(cd custom_models/custom_zero_1_float32 && \
+        sed -i "s/.*execute_delay_ms.*/{ key: \"execute_delay_ms\"; value: { string_value: \"1000\" }}/g" config.pbtxt && \
+        echo "dynamic_batching { preferred_batch_size: [ 1, 2, 3, 4 ]" >> config.pbtxt && \
+        echo " max_queue_delay_microseconds: 5000000 }"  >> config.pbtxt)
+export TRITONSERVER_DELAY_SCHEDULER=8
+SERVER_ARGS="--rate-limit=execution_count --model-repository=$MODELDIR/custom_models"
+SERVER_LOG="./inference_server.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python3 $RATE_LIMITER_TEST RateLimiterTest.test_single_model_dynamic_batching >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+unset TRITONSERVER_DELAY_SCHEDULER
+
+##
+## Tests with sequence batching
+##
+# Send one sequence and check for correct accumulator result. The result should be returned immediately.
+# This test checks whether all the requests are directed to the same instance despite there being other
+# instances with higher priority.
+FIRST_INSTANCE_RESOURCE="rate_limiter { resources [{name: \"resource1\" count: 4 }]}"
+(cd custom_models/custom_sequence_int32/ && \
+        sed -i "s/max_sequence_idle_microseconds:.*/max_sequence_idle_microseconds: 1000000/" config.pbtxt && \
+        sed -i "s/^max_batch_size:.*/max_batch_size: 1/" config.pbtxt && \
+        sed -i "s/kind: KIND_GPU/kind: KIND_CPU\\ncount: 1 \n${FIRST_INSTANCE_RESOURCE}/" config.pbtxt && \
+        sed -i "s/kind: KIND_CPU/kind: KIND_CPU\\ncount: 1 \n${FIRST_INSTANCE_RESOURCE}/" config.pbtxt &&\
+        echo "instance_group [{"  >> config.pbtxt && \
+        echo "kind: KIND_CPU count: 1"  >> config.pbtxt && \
+        echo "rate_limiter { resources [{name: \"resource1\" count: 2 }, {name: \"resource2\" global: True count: 2 }] priority: 2}"  >> config.pbtxt && \
+        echo "}]" >> config.pbtxt && \
+        echo "instance_group [{"  >> config.pbtxt && \
+        echo "kind: KIND_CPU count: 2"  >> config.pbtxt && \
+        echo "rate_limiter { resources [{name: \"resource1\" count: 2 }, {name: \"resource2\" global: True count: 2 }] priority: 3}"  >> config.pbtxt && \
+        echo "}]" >> config.pbtxt)
+SERVER_ARGS="--rate-limit=execution_count --model-repository=$MODELDIR/custom_models"
+SERVER_LOG="./inference_server.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python3 $RATE_LIMITER_TEST RateLimiterTest.test_single_model_sequence_batching >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_register/config.pbtxt b/qa/L0_register/config.pbtxt
new file mode 100644
index 0000000000..4ba0081da8
--- /dev/null
+++ b/qa/L0_register/config.pbtxt
@@ -0,0 +1,49 @@
+# Copyright 2022, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "model"
+backend: "identity"
+max_batch_size: 0
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
+instance_group [
+  {
+    count: 1
+    kind : KIND_CPU
+  }
+]
diff --git a/qa/L0_register/test.sh b/qa/L0_register/test.sh
new file mode 100755
index 0000000000..6a5a4123ad
--- /dev/null
+++ b/qa/L0_register/test.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+source ../common/util.sh
+
+RET=0
+
+TEST_LOG="./register_api_test.log"
+TEST_EXEC=./register_api_test
+
+export CUDA_VISIBLE_DEVICES=0
+
+rm -fr *.log
+
+# Setup repositories for testing, note that we use
+# model version as hint for which directory is used for model loading
+mkdir empty_models models_0 models_1
+mkdir -p models_0/model_0/1 && \
+    cp config.pbtxt models_0/model_0/. && \
+    (cd models_0/model_0 && \
+        sed -i "s/^name:.*/name: \"model_0\"/" config.pbtxt)
+mkdir -p models_1/model_0/2 && \
+    cp config.pbtxt models_1/model_0/. && \
+    (cd models_1/model_0 && \
+        sed -i "s/^name:.*/name: \"model_0\"/" config.pbtxt)
+mkdir -p models_1/model_1/3 && \
+    cp config.pbtxt models_1/model_1/. && \
+    (cd models_1/model_1 && \
+        sed -i "s/^name:.*/name: \"model_1\"/" config.pbtxt)
+
+set +e
+LD_LIBRARY_PATH=/opt/tritonserver/lib:$LD_LIBRARY_PATH $TEST_EXEC >>$TEST_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Register API Unit Test Failed\n***"
+    RET=1
+fi
+set -e
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $TEST_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_repoagent_checksum/identity_test.py b/qa/L0_repoagent_checksum/identity_test.py
new file mode 100755
index 0000000000..4db55e0d45
--- /dev/null
+++ b/qa/L0_repoagent_checksum/identity_test.py
@@ -0,0 +1,113 @@
+#!/usr/bin/python
+
+# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import sys
+
+import numpy as np
+import tritongrpcclient as grpcclient
+import tritonhttpclient as httpclient
+from tritonclientutils import np_to_triton_dtype
+
+FLAGS = None
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        required=False,
+        default=False,
+        help="Enable verbose output",
+    )
+    parser.add_argument(
+        "-u", "--url", type=str, required=False, help="Inference server URL."
+    )
+    parser.add_argument(
+        "-i",
+        "--protocol",
+        type=str,
+        required=False,
+        default="http",
+        help='Protocol ("http"/"grpc") used to '
+        + 'communicate with inference service. Default is "http".',
+    )
+
+    FLAGS = parser.parse_args()
+    if (FLAGS.protocol != "http") and (FLAGS.protocol != "grpc"):
+        print(
+            'unexpected protocol "{}", expects "http" or "grpc"'.format(FLAGS.protocol)
+        )
+        exit(1)
+
+    client_util = httpclient if FLAGS.protocol == "http" else grpcclient
+
+    if FLAGS.url is None:
+        FLAGS.url = "localhost:8000" if FLAGS.protocol == "http" else "localhost:8001"
+
+    # Reuse a single client for all sync tests
+    with client_util.InferenceServerClient(FLAGS.url, verbose=FLAGS.verbose) as client:
+        for model_name, np_dtype, shape in (
+            # yapf: disable
+            ("identity_int32", np.int32, [0]),
+            ("identity_int32", np.int32, [7])
+        ):
+            # yapf: enable
+            if np_dtype != object:
+                input_data = (16384 * np.random.randn(*shape)).astype(np_dtype)
+            else:
+                in0 = 16384 * np.ones(shape, dtype="int")
+                in0n = np.array([str(x) for x in in0.reshape(in0.size)], dtype=object)
+                input_data = in0n.reshape(in0.shape)
+            inputs = [
+                client_util.InferInput(
+                    "INPUT0", input_data.shape, np_to_triton_dtype(input_data.dtype)
+                )
+            ]
+            inputs[0].set_data_from_numpy(input_data)
+
+            results = client.infer(model_name, inputs)
+            print(results)
+
+            # Make sure outputs are expected value
+            output_data = results.as_numpy("OUTPUT0")
+            if output_data is None:
+                print("error: expected 'OUTPUT0'")
+                sys.exit(1)
+
+            if np_dtype == object:
+                output_data = np.char.decode(output_data)
+
+            if not np.array_equal(output_data, input_data):
+                print(
+                    "error: expected output {} to match input {}".format(
+                        output_data, input_data
+                    )
+                )
+                sys.exit(1)
diff --git a/qa/L0_repoagent_checksum/models/identity_int32/config.pbtxt b/qa/L0_repoagent_checksum/models/identity_int32/config.pbtxt
new file mode 100644
index 0000000000..0c6edea6a7
--- /dev/null
+++ b/qa/L0_repoagent_checksum/models/identity_int32/config.pbtxt
@@ -0,0 +1,67 @@
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "identity_int32"
+backend: "identity"
+max_batch_size: 0
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
+instance_group [
+  {
+    count: 1
+    kind : KIND_CPU
+  }
+]
+model_repository_agents
+{
+  agents [
+    {
+      name: "checksum",
+      parameters [
+        {
+          key: "MD5:1/libtriton_identity.so",
+          value: "invalid_checksum"
+        },
+        {
+          key: "MD5:data_file",
+          value: "4e41030bb1531cd68b2c0277b0aad2e9"
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/qa/L0_repoagent_checksum/models/identity_int32/data_file b/qa/L0_repoagent_checksum/models/identity_int32/data_file
new file mode 100644
index 0000000000..e08163d46f
--- /dev/null
+++ b/qa/L0_repoagent_checksum/models/identity_int32/data_file
@@ -0,0 +1,28 @@
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+This file is treated as some other files needed by the model
+and thus the repo agent should also verify its checksum.
diff --git a/qa/L0_repoagent_checksum/test.sh b/qa/L0_repoagent_checksum/test.sh
new file mode 100755
index 0000000000..279cce303e
--- /dev/null
+++ b/qa/L0_repoagent_checksum/test.sh
@@ -0,0 +1,96 @@
+#!/bin/bash
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+export CUDA_VISIBLE_DEVICES=0
+
+CLIENT_PY=./identity_test.py
+CLIENT_LOG="./client.log"
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=`pwd`/models"
+SERVER_LOG="./inference_server.log"
+source ../common/util.sh
+
+rm -fr *.log
+
+RET=0
+
+# The config is set with invalid checksum, so expect server failed to
+# load all models
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    set +e
+    grep "'identity_int32': Mismatched MD5 hash for file 1/libtriton_identity.so" $SERVER_LOG
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected error on mismatched MD5 hash\n***"
+        cat $SERVER_LOG
+        RET=1
+    fi
+    set -e
+else
+    echo -e "\n***\n*** Expect fail to start $SERVER\n***"
+    cat $SERVER_LOG
+    kill $SERVER_PID
+    wait $SERVER_PID
+    exit 1
+fi
+
+# Set correct md5sum
+(cd models/identity_int32 && \
+    model_hash=$(md5sum 1/libtriton_identity.so | cut -d' ' -f 1); sed -i "s/invalid_checksum/${model_hash}/" config.pbtxt
+)
+
+# Server should run successfully
+rm -fr *.log
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** fail to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+for PROTOCOL in http grpc; do
+    set +e
+    python $CLIENT_PY -i $PROTOCOL -v >>$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        RET=1
+    fi
+    set -e
+done
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $SERVER_LOG
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_request_cancellation/grpc_cancellation_test.py b/qa/L0_request_cancellation/grpc_cancellation_test.py
new file mode 100755
index 0000000000..fadaa291e8
--- /dev/null
+++ b/qa/L0_request_cancellation/grpc_cancellation_test.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python3
+
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import asyncio
+import queue
+import time
+import unittest
+from functools import partial
+
+import numpy as np
+import tritonclient.grpc as grpcclient
+import tritonclient.grpc.aio as grpcclientaio
+from tritonclient.utils import InferenceServerException
+
+
+class UserData:
+    def __init__(self):
+        self._completed_requests = queue.Queue()
+
+
+def callback(user_data, result, error):
+    if error:
+        user_data._completed_requests.put(error)
+    else:
+        user_data._completed_requests.put(result)
+
+
+class GrpcCancellationTest(unittest.IsolatedAsyncioTestCase):
+    _model_name = "custom_identity_int32"
+    _model_delay = 10.0  # seconds
+    _grpc_params = {"url": "localhost:8001", "verbose": True}
+
+    def setUp(self):
+        self._client = grpcclient.InferenceServerClient(**self._grpc_params)
+        self._client_aio = grpcclientaio.InferenceServerClient(**self._grpc_params)
+        self._user_data = UserData()
+        self._callback = partial(callback, self._user_data)
+        self._prepare_request()
+        self._start_time = time.time()  # seconds
+
+    def tearDown(self):
+        self._end_time = time.time()  # seconds
+        self._assert_max_duration()
+
+    def _prepare_request(self):
+        self._inputs = []
+        self._inputs.append(grpcclient.InferInput("INPUT0", [1, 1], "INT32"))
+        self._outputs = []
+        self._outputs.append(grpcclient.InferRequestedOutput("OUTPUT0"))
+        self._inputs[0].set_data_from_numpy(np.array([[10]], dtype=np.int32))
+
+    def _assert_max_duration(self):
+        max_duration = self._model_delay * 0.5  # seconds
+        duration = self._end_time - self._start_time  # seconds
+        self.assertLess(
+            duration,
+            max_duration,
+            f"test runtime expected less than {max_duration}s response time, got {duration}s",
+        )
+
+    def _assert_callback_cancelled(self):
+        self.assertFalse(self._user_data._completed_requests.empty())
+        data_item = self._user_data._completed_requests.get()
+        self.assertIsInstance(data_item, InferenceServerException)
+        self.assertIn("Locally cancelled by application!", str(data_item))
+
+    def test_grpc_async_infer(self):
+        future = self._client.async_infer(
+            model_name=self._model_name,
+            inputs=self._inputs,
+            callback=self._callback,
+            outputs=self._outputs,
+        )
+        time.sleep(2)  # ensure the inference has started
+        future.cancel()
+        time.sleep(0.1)  # context switch
+        self._assert_callback_cancelled()
+
+    def test_grpc_stream_infer(self):
+        self._client.start_stream(callback=self._callback)
+        self._client.async_stream_infer(
+            model_name=self._model_name, inputs=self._inputs, outputs=self._outputs
+        )
+        time.sleep(2)  # ensure the inference has started
+        self._client.stop_stream(cancel_requests=True)
+        self._assert_callback_cancelled()
+
+    async def test_aio_grpc_async_infer(self):
+        infer_task = asyncio.create_task(
+            self._client_aio.infer(
+                model_name=self._model_name, inputs=self._inputs, outputs=self._outputs
+            )
+        )
+        await asyncio.sleep(2)  # ensure the inference has started
+        infer_task.cancel()
+        with self.assertRaises(asyncio.CancelledError):
+            await infer_task
+
+    async def test_aio_grpc_stream_infer(self):
+        async def requests_generator():
+            yield {
+                "model_name": self._model_name,
+                "inputs": self._inputs,
+                "outputs": self._outputs,
+            }
+
+        responses_iterator = self._client_aio.stream_infer(requests_generator())
+        await asyncio.sleep(2)  # ensure the inference has started
+        self.assertTrue(responses_iterator.cancel())
+        with self.assertRaises(asyncio.CancelledError):
+            async for result, error in responses_iterator:
+                self._callback(result, error)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_request_cancellation/scheduler_test.py b/qa/L0_request_cancellation/scheduler_test.py
new file mode 100755
index 0000000000..a6cd97efaa
--- /dev/null
+++ b/qa/L0_request_cancellation/scheduler_test.py
@@ -0,0 +1,233 @@
+#!/usr/bin/env python3
+
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import concurrent.futures
+import time
+import unittest
+
+import numpy as np
+import tritonclient.grpc as grpcclient
+from tritonclient.utils import InferenceServerException
+
+
+class TestScheduler(unittest.TestCase):
+    def setUp(self):
+        # Initialize client
+        self._triton = grpcclient.InferenceServerClient("localhost:8001")
+
+    def _get_inputs(self, batch_size):
+        self.assertIsInstance(batch_size, int)
+        self.assertGreater(batch_size, 0)
+        shape = [batch_size, 8]
+        inputs = [grpcclient.InferInput("INPUT0", shape, "FP32")]
+        inputs[0].set_data_from_numpy(np.ones(shape, dtype=np.float32))
+        return inputs
+
+    def _generate_callback_and_response_pair(self):
+        response = {"responded": False, "result": None, "error": None}
+
+        def callback(result, error):
+            response["responded"] = True
+            response["result"] = result
+            response["error"] = error
+
+        return callback, response
+
+    def _assert_response_is_cancelled(self, response):
+        self.assertTrue(response["responded"])
+        self.assertEqual(response["result"], None)
+        self.assertIsInstance(response["error"], InferenceServerException)
+        self.assertEqual(response["error"].status(), "StatusCode.CANCELLED")
+
+    def _generate_streaming_callback_and_response_pair(self):
+        response = []  # [{"result": result, "error": error}, ...]
+
+        def callback(result, error):
+            response.append({"result": result, "error": error})
+
+        return callback, response
+
+    def _assert_streaming_response_is_cancelled(self, response):
+        self.assertGreater(len(response), 0)
+        cancelled_count = 0
+        for res in response:
+            result, error = res["result"], res["error"]
+            if error:
+                self.assertEqual(result, None)
+                self.assertIsInstance(error, InferenceServerException)
+                if error.status() == "StatusCode.CANCELLED":
+                    cancelled_count += 1
+        self.assertEqual(cancelled_count, 1)
+
+    # Test queued requests on dynamic batch scheduler can be cancelled
+    def test_dynamic_batch_scheduler_request_cancellation(self):
+        model_name = "dynamic_batch"
+        with concurrent.futures.ThreadPoolExecutor() as pool:
+            # Saturate the 2 batch slots on the model of 1 instance
+            saturate_thread_1 = pool.submit(
+                self._triton.infer, model_name, self._get_inputs(batch_size=1)
+            )
+            saturate_thread_2 = pool.submit(
+                self._triton.infer, model_name, self._get_inputs(batch_size=1)
+            )
+            time.sleep(2)  # ensure the slots are filled
+            # The next request should be queued
+            callback, response = self._generate_callback_and_response_pair()
+            queue_future = self._triton.async_infer(
+                model_name, self._get_inputs(batch_size=1), callback
+            )
+            time.sleep(2)  # ensure the request is queued
+            self.assertFalse(response["responded"])
+            # Cancel the queued request
+            queue_future.cancel()
+            time.sleep(2)  # ensure the cancellation is delivered
+            self._assert_response_is_cancelled(response)
+            # Join saturating thread
+            saturate_thread_1.result()
+            saturate_thread_2.result()
+
+    # Test backlogged requests on sequence batch scheduler can be cancelled
+    def test_sequence_batch_scheduler_backlog_request_cancellation(self):
+        model_name = "sequence_direct"
+        with concurrent.futures.ThreadPoolExecutor() as pool:
+            # Saturate the single sequence slot
+            saturate_thread = pool.submit(
+                self._triton.infer,
+                model_name,
+                self._get_inputs(batch_size=1),
+                sequence_id=1,
+                sequence_start=True,
+            )
+            time.sleep(2)  # ensure the slot is filled
+            # The next sequence with 2 requests should be on the backlog
+            backlog_requests = []
+            for i in range(2):
+                callback, response = self._generate_callback_and_response_pair()
+                backlog_future = self._triton.async_infer(
+                    model_name,
+                    self._get_inputs(batch_size=1),
+                    callback,
+                    sequence_id=2,
+                    sequence_start=(True if i == 0 else False),
+                )
+                backlog_requests.append(
+                    {"future": backlog_future, "response": response}
+                )
+            time.sleep(2)  # ensure the sequence is backlogged
+            self.assertFalse(backlog_requests[0]["response"]["responded"])
+            self.assertFalse(backlog_requests[1]["response"]["responded"])
+            # Cancelling any backlogged request cancels the entire sequence
+            backlog_requests[0]["future"].cancel()
+            time.sleep(2)  # ensure the cancellation is delivered
+            time.sleep(2)  # ensure reaper thread has responded
+            self._assert_response_is_cancelled(backlog_requests[0]["response"])
+            self._assert_response_is_cancelled(backlog_requests[1]["response"])
+            # Join saturating thread
+            saturate_thread.result()
+
+    # Test queued requests on direct sequence batch scheduler can be cancelled
+    def test_direct_sequence_batch_scheduler_request_cancellation(self):
+        model_name = "sequence_direct"
+        self._test_sequence_batch_scheduler_queued_request_cancellation(model_name)
+
+    # Test queued requests on oldest sequence batch scheduler can be cancelled
+    def test_oldest_sequence_batch_scheduler_request_cancellation(self):
+        model_name = "sequence_oldest"
+        self._test_sequence_batch_scheduler_queued_request_cancellation(model_name)
+
+    # Helper function
+    def _test_sequence_batch_scheduler_queued_request_cancellation(self, model_name):
+        with concurrent.futures.ThreadPoolExecutor() as pool:
+            # Start the sequence
+            start_thread = pool.submit(
+                self._triton.infer,
+                model_name,
+                self._get_inputs(batch_size=1),
+                sequence_id=1,
+                sequence_start=True,
+            )
+            time.sleep(2)  # ensure the sequence has started
+            # The next 2 requests should be queued
+            queue_requests = []
+            for i in range(2):
+                callback, response = self._generate_callback_and_response_pair()
+                queue_future = self._triton.async_infer(
+                    model_name, self._get_inputs(batch_size=1), callback, sequence_id=1
+                )
+                queue_requests.append({"future": queue_future, "response": response})
+            time.sleep(2)  # ensure the requests are queued
+            self.assertFalse(queue_requests[0]["response"]["responded"])
+            self.assertFalse(queue_requests[1]["response"]["responded"])
+            # Cancelling any queued request cancels the entire sequence
+            queue_requests[0]["future"].cancel()
+            time.sleep(2)  # ensure the cancellation is delivered
+            time.sleep(2)  # ensure reaper thread has responded
+            self._assert_response_is_cancelled(queue_requests[0]["response"])
+            self._assert_response_is_cancelled(queue_requests[1]["response"])
+            # Join start thread
+            start_thread.result()
+
+    # Test ensemble scheduler will propagate cancellation request to child
+    def test_ensemble_scheduler_request_cancellation(self):
+        model_name = "ensemble_model"
+        callback, response = self._generate_callback_and_response_pair()
+        infer_future = self._triton.async_infer(
+            model_name, self._get_inputs(batch_size=1), callback
+        )
+        time.sleep(2)  # ensure the inference has started
+        self.assertFalse(response["responded"])
+        infer_future.cancel()
+        time.sleep(2)  # ensure the cancellation is delivered
+        self._assert_response_is_cancelled(response)
+
+    # Test cancellation on multiple gRPC streaming sequences
+    def test_scheduler_streaming_request_cancellation(self):
+        model_name = "sequence_oldest"
+        # Start 2 sequences with many requests
+        callback, response = self._generate_streaming_callback_and_response_pair()
+        self._triton.start_stream(callback)
+        for sequence_id in [1, 2]:
+            sequence_start = True
+            for request_id in range(16):
+                self._triton.async_stream_infer(
+                    model_name,
+                    self._get_inputs(batch_size=1),
+                    sequence_id=sequence_id,
+                    sequence_start=sequence_start,
+                )
+                sequence_start = False
+        time.sleep(2)  # ensure the requests are delivered
+        # Cancelling the stream cancels all requests on the stream
+        self._triton.stop_stream(cancel_requests=True)
+        time.sleep(2)  # ensure the cancellation is delivered
+        time.sleep(2)  # ensure reaper thread has responded
+        self._assert_streaming_response_is_cancelled(response)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_request_cancellation/test.sh b/qa/L0_request_cancellation/test.sh
new file mode 100755
index 0000000000..4929be3a5f
--- /dev/null
+++ b/qa/L0_request_cancellation/test.sh
@@ -0,0 +1,183 @@
+#!/bin/bash
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+SERVER=/opt/tritonserver/bin/tritonserver
+source ../common/util.sh
+
+RET=0
+
+#
+# Unit tests
+#
+rm -rf models && mkdir models
+mkdir -p models/model/1 && (cd models/model && \
+    echo 'name: "model"' >> config.pbtxt && \
+    echo 'backend: "identity"' >> config.pbtxt && \
+    echo 'max_batch_size: 64' >> config.pbtxt && \
+    echo -e 'input [{ name: "INPUT0" \n data_type: TYPE_INT32 \n dims: [ 1000 ] }]' >> config.pbtxt && \
+    echo -e 'output [{ name: "OUTPUT0" \n data_type: TYPE_INT32 \n dims: [ 1000 ] }]' >> config.pbtxt && \
+    echo 'instance_group [{ kind: KIND_CPU }]' >> config.pbtxt)
+
+SERVER_LOG=server.log
+LD_LIBRARY_PATH=/opt/tritonserver/lib:$LD_LIBRARY_PATH ./request_cancellation_test > $SERVER_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Unit Tests Failed\n***"
+    cat $SERVER_LOG
+    RET=1
+fi
+
+#
+# gRPC cancellation tests
+#
+rm -rf models && mkdir models
+mkdir -p models/custom_identity_int32/1 && (cd models/custom_identity_int32 && \
+    echo 'name: "custom_identity_int32"' >> config.pbtxt && \
+    echo 'backend: "identity"' >> config.pbtxt && \
+    echo 'max_batch_size: 1024' >> config.pbtxt && \
+    echo -e 'input [{ name: "INPUT0" \n data_type: TYPE_INT32 \n dims: [ -1 ] }]' >> config.pbtxt && \
+    echo -e 'output [{ name: "OUTPUT0" \n data_type: TYPE_INT32 \n dims: [ -1 ] }]' >> config.pbtxt && \
+    echo 'instance_group [{ kind: KIND_CPU }]' >> config.pbtxt && \
+    echo -e 'parameters [{ key: "execute_delay_ms" \n value: { string_value: "10000" } }]' >> config.pbtxt)
+
+for TEST_CASE in "test_grpc_async_infer" "test_grpc_stream_infer" "test_aio_grpc_async_infer" "test_aio_grpc_stream_infer"; do
+
+    TEST_LOG="./grpc_cancellation_test.$TEST_CASE.log"
+    SERVER_LOG="grpc_cancellation_test.$TEST_CASE.server.log"
+
+    SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1"
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    set +e
+    python grpc_cancellation_test.py GrpcCancellationTest.$TEST_CASE > $TEST_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** gRPC Cancellation Tests Failed on $TEST_CASE\n***"
+        cat $TEST_LOG
+        RET=1
+    fi
+    grep "Cancellation notification received for" $SERVER_LOG
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Cancellation not received by server on $TEST_CASE\n***"
+        cat $SERVER_LOG
+        RET=1
+    fi
+    set -e
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+done
+
+#
+# End-to-end scheduler tests
+#
+rm -rf models && mkdir models
+mkdir -p models/dynamic_batch/1 && (cd models/dynamic_batch && \
+    echo 'name: "dynamic_batch"' >> config.pbtxt && \
+    echo 'backend: "identity"' >> config.pbtxt && \
+    echo 'max_batch_size: 2' >> config.pbtxt && \
+    echo -e 'input [{ name: "INPUT0" \n data_type: TYPE_FP32 \n dims: [ -1 ] }]' >> config.pbtxt && \
+    echo -e 'output [{ name: "OUTPUT0" \n data_type: TYPE_FP32 \n dims: [ -1 ] }]' >> config.pbtxt && \
+    echo -e 'instance_group [{ count: 1 \n kind: KIND_CPU }]' >> config.pbtxt && \
+    echo -e 'dynamic_batching { max_queue_delay_microseconds: 600000 }' >> config.pbtxt && \
+    echo -e 'parameters [{ key: "execute_delay_ms" \n value: { string_value: "6000" } }]' >> config.pbtxt)
+mkdir -p models/sequence_direct/1 && (cd models/sequence_direct && \
+    echo 'name: "sequence_direct"' >> config.pbtxt && \
+    echo 'backend: "identity"' >> config.pbtxt && \
+    echo 'max_batch_size: 1' >> config.pbtxt && \
+    echo -e 'input [{ name: "INPUT0" \n data_type: TYPE_FP32 \n dims: [ -1 ] }]' >> config.pbtxt && \
+    echo -e 'output [{ name: "OUTPUT0" \n data_type: TYPE_FP32 \n dims: [ -1 ] }]' >> config.pbtxt && \
+    echo -e 'instance_group [{ count: 1 \n kind: KIND_CPU }]' >> config.pbtxt && \
+    echo -e 'sequence_batching { direct { } \n max_sequence_idle_microseconds: 6000000 }' >> config.pbtxt && \
+    echo -e 'parameters [{ key: "execute_delay_ms" \n value: { string_value: "6000" } }]' >> config.pbtxt)
+mkdir -p models/sequence_oldest/1 && (cd models/sequence_oldest && \
+    echo 'name: "sequence_oldest"' >> config.pbtxt && \
+    echo 'backend: "identity"' >> config.pbtxt && \
+    echo 'max_batch_size: 1' >> config.pbtxt && \
+    echo -e 'input [{ name: "INPUT0" \n data_type: TYPE_FP32 \n dims: [ -1 ] }]' >> config.pbtxt && \
+    echo -e 'output [{ name: "OUTPUT0" \n data_type: TYPE_FP32 \n dims: [ -1 ] }]' >> config.pbtxt && \
+    echo -e 'instance_group [{ count: 1 \n kind: KIND_CPU }]' >> config.pbtxt && \
+    echo -e 'sequence_batching { oldest { max_candidate_sequences: 1 } \n max_sequence_idle_microseconds: 6000000 }' >> config.pbtxt && \
+    echo -e 'parameters [{ key: "execute_delay_ms" \n value: { string_value: "6000" } }]' >> config.pbtxt)
+mkdir -p models/ensemble_model/1 && (cd models/ensemble_model && \
+    echo 'name: "ensemble_model"' >> config.pbtxt && \
+    echo 'platform: "ensemble"' >> config.pbtxt && \
+    echo 'max_batch_size: 1' >> config.pbtxt && \
+    echo -e 'input [{ name: "INPUT0" \n data_type: TYPE_FP32 \n dims: [ -1 ] }]' >> config.pbtxt && \
+    echo -e 'output [{ name: "OUTPUT0" \n data_type: TYPE_FP32 \n dims: [ -1 ] }]' >> config.pbtxt && \
+    echo 'ensemble_scheduling { step [' >> config.pbtxt && \
+    echo -e '{ model_name: "dynamic_batch" \n model_version: -1 \n input_map { key: "INPUT0" \n value: "INPUT0" } \n output_map { key: "OUTPUT0" \n value: "out" } },' >> config.pbtxt && \
+    echo -e '{ model_name: "dynamic_batch" \n model_version: -1 \n input_map { key: "INPUT0" \n value: "out" } \n output_map { key: "OUTPUT0" \n value: "OUTPUT0" } }' >> config.pbtxt && \
+    echo '] }' >> config.pbtxt)
+
+TEST_LOG="scheduler_test.log"
+SERVER_LOG="./scheduler_test.server.log"
+
+SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=2"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python scheduler_test.py > $TEST_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Scheduler Tests Failed\n***"
+    cat $TEST_LOG
+    RET=1
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+exit $RET
diff --git a/qa/L0_response_cache/models/decoupled_cache/config.pbtxt b/qa/L0_response_cache/models/decoupled_cache/config.pbtxt
new file mode 100644
index 0000000000..c243e72861
--- /dev/null
+++ b/qa/L0_response_cache/models/decoupled_cache/config.pbtxt
@@ -0,0 +1,49 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+backend: "identity"
+max_batch_size: 0
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+model_transaction_policy {
+  decoupled: True
+}
+response_cache {
+  enable: True
+}
diff --git a/qa/L0_response_cache/models/identity_cache/config.pbtxt b/qa/L0_response_cache/models/identity_cache/config.pbtxt
new file mode 100644
index 0000000000..7ba5cf2afb
--- /dev/null
+++ b/qa/L0_response_cache/models/identity_cache/config.pbtxt
@@ -0,0 +1,46 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+backend: "identity"
+max_batch_size: 0
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+response_cache {
+  enable: True
+}
diff --git a/qa/L0_response_cache/test.sh b/qa/L0_response_cache/test.sh
new file mode 100755
index 0000000000..b749571e06
--- /dev/null
+++ b/qa/L0_response_cache/test.sh
@@ -0,0 +1,286 @@
+#!/bin/bash
+# Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+RET=0
+
+TEST_LOG="./response_cache_test.log"
+UNIT_TEST="./response_cache_test --gtest_output=xml:response_cache.report.xml"
+export CUDA_VISIBLE_DEVICES=0
+
+# Only localhost supported in this test for now, but in future could make
+# use of a persistent remote redis server, or similarly use --replicaof arg.
+export TRITON_REDIS_HOST="localhost"
+export TRITON_REDIS_PORT="6379"
+REDIS_LOG="./redis-server.unit_tests.log"
+
+rm -fr *.log
+
+function install_redis() {
+  ## Install redis if not already installed
+  if ! command -v redis-server >/dev/null 2>&1; then
+    apt update -y && apt install -y redis
+  fi
+}
+
+function start_redis() {
+  # Run redis server in background
+  redis-server                    \
+    --daemonize yes               \
+    --port "${TRITON_REDIS_PORT}" \
+    --logfile "${REDIS_LOG}"      \
+    --loglevel debug
+
+  # Check redis server is running
+  REDIS_PING_RESPONSE=$(redis-cli -h ${TRITON_REDIS_HOST} -p ${TRITON_REDIS_PORT} ping)
+  if [ "${REDIS_PING_RESPONSE}" == "PONG" ]; then
+    echo "Redis successfully started in background"
+  else
+    echo -e "\n***\n*** Failed: Redis server did not start successfully\n***"
+    RET=1
+  fi
+}
+
+function stop_redis() {
+  echo "Stopping Redis server..."
+  redis-cli -h "${TRITON_REDIS_HOST}" -p "${TRITON_REDIS_PORT}" shutdown || true
+  echo "Redis server shutdown"
+}
+
+function set_redis_auth() {
+  # NOTE: Per-user auth [Access Control List (ACL)] is only supported in
+  #       Redis >= 6.0 and is more comprehensive in what can be configured.
+  #       For simplicity and wider range of Redis version support, use
+  #       server-wide password  via "requirepass" for now.
+  redis-cli -h "${TRITON_REDIS_HOST}" -p "${TRITON_REDIS_PORT}" config set requirepass "${REDIS_PW}"
+  export REDISCLI_AUTH="${REDIS_PW}"
+}
+
+function unset_redis_auth() {
+  # Authenticate implicitly via REDISCLI_AUTH env var, then unset password/var
+  redis-cli -h "${TRITON_REDIS_HOST}" -p "${TRITON_REDIS_PORT}" config set requirepass ""
+  unset REDISCLI_AUTH
+}
+
+# UNIT TESTS
+set +e
+
+## Unit tests currently run for both Local and Redis cache implementations
+## by default. However, we could break out the unit tests for each
+## into separate runs gtest filters if needed in the future:
+## - `${UNIT_TEST} --gtest_filter=*Local*`
+## - `${UNIT_TEST} --gtest_filter=*Redis*`
+install_redis
+# Stop any existing redis server first for good measure
+stop_redis
+start_redis
+LD_LIBRARY_PATH=/opt/tritonserver/lib:$LD_LIBRARY_PATH $UNIT_TEST >>$TEST_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $TEST_LOG
+    echo -e "\n***\n*** Response Cache Unit Test Failed\n***"
+    RET=1
+fi
+stop_redis
+set -e
+
+# SERVER TESTS
+function check_server_success_and_kill {
+    if [ "${SERVER_PID}" == "0" ]; then
+        echo -e "\n***\n*** Failed to start ${SERVER}\n***"
+        cat ${SERVER_LOG}
+        RET=1
+    else
+        kill ${SERVER_PID}
+        wait ${SERVER_PID}
+    fi
+}
+
+function check_server_expected_failure {
+    EXPECTED_MESSAGE="${1}"
+    if [ "${SERVER_PID}" != "0" ]; then
+        echo -e "\n***\n*** Failed: ${SERVER} started successfully when it was expected to fail\n***"
+        cat ${SERVER_LOG}
+        RET=1
+
+        kill ${SERVER_PID}
+        wait ${SERVER_PID}
+    else
+        # Check that server fails with the correct error message
+        set +e
+        grep -i "${EXPECTED_MESSAGE}" ${SERVER_LOG}
+        if [ $? -ne 0 ]; then
+            echo -e "\n***\n*** Failed: Expected [${EXPECTED_MESSAGE}] error message in output\n***"
+            cat $SERVER_LOG
+            RET=1
+        fi
+        set -e
+    fi
+}
+
+MODEL_DIR="${PWD}/models"
+mkdir -p "${MODEL_DIR}/decoupled_cache/1"
+mkdir -p "${MODEL_DIR}/identity_cache/1"
+
+# Check that server fails to start for a "decoupled" model with cache enabled
+EXTRA_ARGS="--model-control-mode=explicit --load-model=decoupled_cache"
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=${MODEL_DIR} --response-cache-byte-size=8192 ${EXTRA_ARGS}"
+SERVER_LOG="./inference_server.log"
+source ../common/util.sh
+run_server
+if [ "$SERVER_PID" != "0" ]; then
+    echo -e "\n***\n*** Failed: $SERVER started successfully when it was expected to fail\n***"
+    cat $SERVER_LOG
+    RET=1
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+else
+    # Check that server fails with the correct error message
+    set +e
+    grep -i "response cache does not currently support" ${SERVER_LOG} | grep -i "decoupled"
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed: Expected response cache / decoupled mode error message in output\n***"
+        cat $SERVER_LOG
+        RET=1
+    fi
+    set -e
+fi
+
+# Test with model expected to load successfully
+EXTRA_ARGS="--model-control-mode=explicit --load-model=identity_cache"
+
+# Test old cache config method
+# --response-cache-byte-size must be non-zero to test models with cache enabled
+SERVER_ARGS="--model-repository=${MODEL_DIR} --response-cache-byte-size=8192 ${EXTRA_ARGS}"
+run_server
+check_server_success_and_kill
+
+# Test new cache config method
+SERVER_ARGS="--model-repository=${MODEL_DIR} --cache-config=local,size=8192 ${EXTRA_ARGS}"
+run_server
+check_server_success_and_kill
+
+# Test that specifying multiple cache types is not supported and should fail
+SERVER_ARGS="--model-repository=${MODEL_DIR} --cache-config=local,size=8192 --cache-config=redis,key=value ${EXTRA_ARGS}"
+run_server
+check_server_expected_failure "multiple cache configurations"
+
+# Test that specifying both config styles is incompatible and should fail
+SERVER_ARGS="--model-repository=${MODEL_DIR} --response-cache-byte-size=12345 --cache-config=local,size=67890 ${EXTRA_ARGS}"
+run_server
+check_server_expected_failure "incompatible flags"
+
+## Redis Cache CLI tests
+REDIS_ENDPOINT="--cache-config redis,host=${TRITON_REDIS_HOST} --cache-config redis,port=${TRITON_REDIS_PORT}"
+REDIS_LOG="./redis-server.cli_tests.log"
+start_redis
+
+# Test simple redis cache config succeeds
+SERVER_ARGS="--model-repository=${MODEL_DIR} ${REDIS_ENDPOINT} ${EXTRA_ARGS}"
+run_server
+check_server_success_and_kill
+
+# Test triton fails to initialize if it can't connect to redis cache
+SERVER_ARGS="--model-repository=${MODEL_DIR} --cache-config=redis,host=localhost --cache-config=redis,port=nonexistent ${EXTRA_ARGS}"
+run_server
+check_server_expected_failure "Failed to connect to Redis: Connection refused"
+
+# Test triton fails to initialize if it can't resolve host for redis cache
+SERVER_ARGS="--model-repository=${MODEL_DIR} --cache-config=redis,host=nonexistent --cache-config=redis,port=nonexistent ${EXTRA_ARGS}"
+run_server
+# Either of these errors can be returned for bad hostname, so check for either.
+MSG1="Temporary failure in name resolution"
+MSG2="Name or service not known"
+check_server_expected_failure "${MSG1}\|${MSG2}"
+
+# Test triton fails to initialize if minimum required args (host & port) not all provided
+SERVER_ARGS="--model-repository=${MODEL_DIR} --cache-config=redis,port=${TRITON_REDIS_HOST} ${EXTRA_ARGS}"
+run_server
+check_server_expected_failure "Must at a minimum specify"
+
+## Redis Authentication tests
+
+# Automatically provide auth via REDISCLI_AUTH env var when set: https://redis.io/docs/ui/cli/
+REDIS_PW="redis123!"
+set_redis_auth
+
+### Credentials via command-line
+
+# Test simple redis authentication succeeds with correct credentials
+REDIS_CACHE_AUTH="--cache-config redis,password=${REDIS_PW}"
+SERVER_ARGS="--model-repository=${MODEL_DIR} ${REDIS_ENDPOINT} ${REDIS_CACHE_AUTH} ${EXTRA_ARGS}"
+run_server
+check_server_success_and_kill
+
+# Test simple redis authentication fails with wrong credentials
+REDIS_CACHE_AUTH="--cache-config redis,password=wrong"
+SERVER_ARGS="--model-repository=${MODEL_DIR} ${REDIS_ENDPOINT} ${REDIS_CACHE_AUTH} ${EXTRA_ARGS}"
+run_server
+check_server_expected_failure "WRONGPASS"
+
+# Test simple redis authentication fails with no credentials
+SERVER_ARGS="--model-repository=${MODEL_DIR} ${REDIS_ENDPOINT} ${EXTRA_ARGS}"
+run_server
+check_server_expected_failure "NOAUTH Authentication required"
+
+### Credentials via environment variables
+
+# Test simple redis authentication succeeds with password-only via env vars
+# No username means use "default" as the username
+unset TRITONCACHE_REDIS_USERNAME
+export TRITONCACHE_REDIS_PASSWORD="${REDIS_PW}"
+SERVER_ARGS="--model-repository=${MODEL_DIR} ${REDIS_ENDPOINT} ${EXTRA_ARGS}"
+run_server
+check_server_success_and_kill
+
+# Test simple redis authentication succeeds with correct user and password via env vars
+export TRITONCACHE_REDIS_USERNAME="default"
+export TRITONCACHE_REDIS_PASSWORD="${REDIS_PW}"
+SERVER_ARGS="--model-repository=${MODEL_DIR} ${REDIS_ENDPOINT} ${EXTRA_ARGS}"
+run_server
+check_server_success_and_kill
+
+# Test simple redis authentication fails with wrong credentials via env vars
+export TRITONCACHE_REDIS_PASSWORD="wrong"
+SERVER_ARGS="--model-repository=${MODEL_DIR} ${REDIS_ENDPOINT} ${EXTRA_ARGS}"
+run_server
+check_server_expected_failure "WRONGPASS"
+unset TRITONCACHE_REDIS_USERNAME
+unset TRITONCACHE_REDIS_PASSWORD
+
+# Clean up redis server before exiting test
+unset_redis_auth
+stop_redis
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+else
+  echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_response_statistics/response_statistics_test.py b/qa/L0_response_statistics/response_statistics_test.py
new file mode 100755
index 0000000000..64f2d4fb68
--- /dev/null
+++ b/qa/L0_response_statistics/response_statistics_test.py
@@ -0,0 +1,262 @@
+#!/usr/bin/env python3
+
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import time
+import unittest
+
+import numpy as np
+import tritonclient.grpc as grpcclient
+import tritonclient.http as httpclient
+
+
+class TestResponseStatistics(unittest.TestCase):
+    def setUp(self):
+        self._model_name = "set_by_test_case"
+        self._min_infer_delay_ns = 0
+        self._min_output_delay_ns = 0
+        self._min_cancel_delay_ns = 0
+        self._number_of_fail_responses = 0
+        self._number_of_empty_responses = 0
+        self._statistics_counts = []
+        self._grpc_client = grpcclient.InferenceServerClient(
+            "localhost:8001", verbose=True
+        )
+        self._http_client = httpclient.InferenceServerClient("localhost:8000")
+
+    # Return a coupled (callback, response) pair for gRPC stream infer.
+    def _generate_streaming_callback_and_response_pair(self):
+        # [{"result": result, "error": error}, ...]
+        response = []
+
+        def callback(result, error):
+            response.append({"result": result, "error": error})
+
+        return callback, response
+
+    # Send an infer request and return its responses. 'number_of_responses' is the sum
+    # of success, fail and empty responses the model should return for this request.
+    # 'cancel_at_response_size' will cancel the stream when the number of responses
+    # received equals the size, set to None if cancellation is not required. This
+    # function waits until all success and fail responses are received, or cancelled.
+    def _stream_infer(self, number_of_responses, cancel_at_response_size=None):
+        callback, responses = self._generate_streaming_callback_and_response_pair()
+        self._grpc_client.start_stream(callback)
+        input_data = np.array([number_of_responses], dtype=np.int32)
+        inputs = [grpcclient.InferInput("IN", input_data.shape, "INT32")]
+        inputs[0].set_data_from_numpy(input_data)
+        outputs = [grpcclient.InferRequestedOutput("OUT")]
+        self._grpc_client.async_stream_infer(
+            model_name=self._model_name, inputs=inputs, outputs=outputs
+        )
+        if cancel_at_response_size is None:
+            # poll until all expected responses are received
+            while len(responses) < (
+                number_of_responses - self._number_of_empty_responses
+            ):
+                time.sleep(0.1)
+            self._grpc_client.stop_stream(cancel_requests=False)
+        else:
+            # poll until cancellation response size is reached
+            while len(responses) < cancel_at_response_size:
+                time.sleep(0.1)
+            self._grpc_client.stop_stream(cancel_requests=True)
+        return responses
+
+    # Update expected statistics counts for the response at 'current_index'.
+    # 'number_of_responses' is the sum of success, fail and empty responses expected
+    # from this inference request. 'cancel_at_index' is the index at which the request
+    # should be cancelled.
+    def _update_statistics_counts(
+        self, current_index, number_of_responses, cancel_at_index
+    ):
+        if current_index >= len(self._statistics_counts):
+            self._statistics_counts.append(
+                {
+                    "compute_infer": 0,
+                    "compute_output": 0,
+                    "success": 0,
+                    "fail": 0,
+                    "empty_response": 0,
+                    "cancel": 0,
+                }
+            )
+        if current_index == cancel_at_index:
+            # cancel
+            self._statistics_counts[current_index]["cancel"] += 1
+        elif (
+            current_index
+            + self._number_of_fail_responses
+            + self._number_of_empty_responses
+            < number_of_responses
+        ):
+            # success
+            self._statistics_counts[current_index]["compute_infer"] += 1
+            self._statistics_counts[current_index]["compute_output"] += 1
+            self._statistics_counts[current_index]["success"] += 1
+        elif current_index + self._number_of_empty_responses < number_of_responses:
+            # fail
+            self._statistics_counts[current_index]["compute_infer"] += 1
+            self._statistics_counts[current_index]["compute_output"] += 1
+            self._statistics_counts[current_index]["fail"] += 1
+        else:
+            # empty
+            self._statistics_counts[current_index]["compute_infer"] += 1
+            self._statistics_counts[current_index]["empty_response"] += 1
+
+    # Check the 'response_stats' at 'current_index' for 'stats_name' is valid.
+    def _check_statistics_count_and_duration(
+        self, response_stats, current_index, stats_name
+    ):
+        expected_count = self._statistics_counts[current_index][stats_name]
+        if stats_name == "compute_infer" or stats_name == "empty_response":
+            delay_ns = self._min_infer_delay_ns
+        elif stats_name == "compute_output":
+            delay_ns = self._min_output_delay_ns
+        elif stats_name == "cancel":
+            delay_ns = self._min_cancel_delay_ns
+        else:  # success or fail
+            delay_ns = self._min_infer_delay_ns + self._min_output_delay_ns
+        if delay_ns == 0:
+            upper_bound_ns = 10000000 * expected_count
+            lower_bound_ns = 0
+        else:
+            upper_bound_ns = 1.1 * delay_ns * expected_count
+            lower_bound_ns = 0.9 * delay_ns * expected_count
+        stats = response_stats[str(current_index)][stats_name]
+        self.assertEqual(stats["count"], expected_count)
+        self.assertLessEqual(stats["ns"], upper_bound_ns)
+        self.assertGreaterEqual(stats["ns"], lower_bound_ns)
+
+    # Fetch and return the response statistics from both gRPC and HTTP endpoints, and
+    # check they are equivalent before returning.
+    def _get_response_statistics(self):
+        # http response statistics
+        statistics_http = self._http_client.get_inference_statistics(
+            model_name=self._model_name
+        )
+        model_stats_http = statistics_http["model_stats"][0]
+        self.assertEqual(model_stats_http["name"], self._model_name)
+        response_stats_http = model_stats_http["response_stats"]
+        # grpc response statistics
+        statistics_grpc = self._grpc_client.get_inference_statistics(
+            model_name=self._model_name, as_json=True
+        )
+        model_stats_grpc = statistics_grpc["model_stats"][0]
+        self.assertEqual(model_stats_grpc["name"], self._model_name)
+        response_stats_grpc = model_stats_grpc["response_stats"]
+        # check equivalent between http and grpc statistics
+        self.assertEqual(len(response_stats_http), len(response_stats_grpc))
+        for idx, statistics_http in response_stats_http.items():
+            self.assertIn(idx, response_stats_grpc)
+            statistics_grpc = response_stats_grpc[idx]
+            for name, stats_http in statistics_http.items():
+                self.assertIn(name, statistics_grpc)
+                stats_grpc = statistics_grpc[name]
+                # normalize gRPC statistics to http
+                stats_grpc["count"] = (
+                    int(stats_grpc["count"]) if ("count" in stats_grpc) else 0
+                )
+                stats_grpc["ns"] = int(stats_grpc["ns"]) if ("ns" in stats_grpc) else 0
+                # check equal
+                self.assertEqual(stats_http, stats_grpc)
+        return response_stats_http
+
+    # Check the response statistics is valid for a given infer request, providing its
+    # 'responses', expected 'number_of_responses' and 'cancel_at_index'.
+    def _check_response_stats(
+        self, responses, number_of_responses, cancel_at_index=None
+    ):
+        response_stats = self._get_response_statistics()
+        self.assertGreaterEqual(len(response_stats), number_of_responses)
+        for i in range(number_of_responses):
+            self._update_statistics_counts(i, number_of_responses, cancel_at_index)
+            self._check_statistics_count_and_duration(
+                response_stats, i, "compute_infer"
+            )
+            self._check_statistics_count_and_duration(
+                response_stats, i, "compute_output"
+            )
+            self._check_statistics_count_and_duration(response_stats, i, "success")
+            self._check_statistics_count_and_duration(response_stats, i, "fail")
+            self._check_statistics_count_and_duration(
+                response_stats, i, "empty_response"
+            )
+            self._check_statistics_count_and_duration(response_stats, i, "cancel")
+
+    # Test response statistics. The statistics must be valid over two or more infers.
+    def test_response_statistics(self):
+        self._model_name = "square_int32"
+        self._min_infer_delay_ns = 400000000
+        self._min_output_delay_ns = 200000000
+        self._number_of_fail_responses = 2
+        self._number_of_empty_responses = 1
+        # Send a request that generates 4 responses.
+        number_of_responses = 4
+        responses = self._stream_infer(number_of_responses)
+        self._check_response_stats(responses, number_of_responses)
+        # Send a request that generates 6 responses, and make sure the statistics are
+        # aggregated with the previous request.
+        number_of_responses = 6
+        responses = self._stream_infer(number_of_responses)
+        self._check_response_stats(responses, number_of_responses)
+        # Send a request that generates 3 responses, and make sure the statistics are
+        # aggregated with the previous requests.
+        number_of_responses = 3
+        responses = self._stream_infer(number_of_responses)
+        self._check_response_stats(responses, number_of_responses)
+
+    # Test response statistics with cancellation.
+    def test_response_statistics_cancel(self):
+        self._model_name = "square_int32_slow"
+        self._min_infer_delay_ns = 1200000000
+        self._min_output_delay_ns = 800000000
+        self._min_cancel_delay_ns = 400000000
+
+        # Send a request that generates 4 responses.
+        number_of_responses = 4
+        responses = self._stream_infer(number_of_responses)
+        self._check_response_stats(responses, number_of_responses)
+
+        # Send a request that generates 4 responses, and cancel on the 3rd response.
+        # Make sure the statistics are aggregated with the previous request.
+        responses = self._stream_infer(number_of_responses=4, cancel_at_response_size=1)
+        # There is an infer and output delay on the 1st and 2nd response, and a cancel
+        # delay on the 3rd response.
+        min_total_delay_ns = (
+            self._min_infer_delay_ns + self._min_output_delay_ns
+        ) * 2 + self._min_cancel_delay_ns
+        # Make sure the inference and cancellation is completed before checking.
+        time.sleep(min_total_delay_ns * 1.5 / 1000000000)
+        # The request is cancelled when the 2nd response is computing, so the
+        # cancellation should be received at the 3rd response (index 2), making a total
+        # of 3 responses on the statistics.
+        self._check_response_stats(responses, number_of_responses=3, cancel_at_index=2)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_response_statistics/test.sh b/qa/L0_response_statistics/test.sh
new file mode 100755
index 0000000000..b91e3bbde1
--- /dev/null
+++ b/qa/L0_response_statistics/test.sh
@@ -0,0 +1,97 @@
+#!/bin/bash
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+SERVER=/opt/tritonserver/bin/tritonserver
+source ../common/util.sh
+
+RET=0
+
+rm -rf models && mkdir models
+mkdir -p models/square_int32/1 && (cd models/square_int32 && \
+    echo 'backend: "square"' >> config.pbtxt && \
+    echo 'max_batch_size: 0' >> config.pbtxt && \
+    echo 'model_transaction_policy { decoupled: True }' >> config.pbtxt && \
+    echo -e 'input [{ name: "IN" \n data_type: TYPE_INT32 \n dims: [ 1 ] }]' >> config.pbtxt && \
+    echo -e 'output [{ name: "OUT" \n data_type: TYPE_INT32 \n dims: [ 1 ] }]' >> config.pbtxt && \
+    echo -e 'parameters [{ key: "CUSTOM_INFER_DELAY_NS" \n value: { string_value: "400000000" } }]' >> config.pbtxt && \
+    echo -e 'parameters [{ key: "CUSTOM_OUTPUT_DELAY_NS" \n value: { string_value: "200000000" } }]' >> config.pbtxt && \
+    echo -e 'parameters [{ key: "CUSTOM_FAIL_COUNT" \n value: { string_value: "2" } }]' >> config.pbtxt && \
+    echo -e 'parameters [{ key: "CUSTOM_EMPTY_COUNT" \n value: { string_value: "1" } }]' >> config.pbtxt)
+mkdir -p models/square_int32_slow/1 && (cd models/square_int32_slow && \
+    echo 'backend: "square"' >> config.pbtxt && \
+    echo 'max_batch_size: 0' >> config.pbtxt && \
+    echo 'model_transaction_policy { decoupled: True }' >> config.pbtxt && \
+    echo -e 'input [{ name: "IN" \n data_type: TYPE_INT32 \n dims: [ 1 ] }]' >> config.pbtxt && \
+    echo -e 'output [{ name: "OUT" \n data_type: TYPE_INT32 \n dims: [ 1 ] }]' >> config.pbtxt && \
+    echo -e 'parameters [{ key: "CUSTOM_INFER_DELAY_NS" \n value: { string_value: "1200000000" } }]' >> config.pbtxt && \
+    echo -e 'parameters [{ key: "CUSTOM_OUTPUT_DELAY_NS" \n value: { string_value: "800000000" } }]' >> config.pbtxt && \
+    echo -e 'parameters [{ key: "CUSTOM_CANCEL_DELAY_NS" \n value: { string_value: "400000000" } }]' >> config.pbtxt)
+
+TEST_LOG="response_statistics_test.log"
+SERVER_LOG="./response_statistics_test.server.log"
+
+SERVER_ARGS="--model-repository=`pwd`/models"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python response_statistics_test.py > $TEST_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed response statistics test\n***"
+    cat $TEST_LOG
+    RET=1
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+exit $RET
diff --git a/qa/L0_sagemaker/sagemaker_multi_model_test.py b/qa/L0_sagemaker/sagemaker_multi_model_test.py
new file mode 100755
index 0000000000..b2052f6751
--- /dev/null
+++ b/qa/L0_sagemaker/sagemaker_multi_model_test.py
@@ -0,0 +1,379 @@
+#!/usr/bin/python
+# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import json
+import os
+import sys
+import time
+import unittest
+
+import numpy as np
+import requests
+import test_util as tu
+import tritonclient.http as httpclient
+
+
+class SageMakerMultiModelTest(tu.TestResultCollector):
+    def setUp(self):
+        SAGEMAKER_BIND_TO_PORT = os.getenv("SAGEMAKER_BIND_TO_PORT", "8080")
+        self.url_mme_ = "http://localhost:{}/models".format(SAGEMAKER_BIND_TO_PORT)
+
+        # model_1 setup
+        self.model1_name = "sm_mme_model_1"
+        self.model1_url = "/opt/ml/models/123456789abcdefghi/model"
+
+        self.model1_input_data_ = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+        self.model1_expected_output0_data_ = [
+            0,
+            2,
+            4,
+            6,
+            8,
+            10,
+            12,
+            14,
+            16,
+            18,
+            20,
+            22,
+            24,
+            26,
+            28,
+            30,
+        ]
+        self.model1_expected_output1_data_ = [
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+        ]
+
+        self.model1_expected_result_ = {
+            "model_name": "sm_mme_model_1",
+            "model_version": "1",
+            "outputs": [
+                {
+                    "name": "OUTPUT0",
+                    "datatype": "INT32",
+                    "shape": [1, 16],
+                    "data": self.model1_expected_output0_data_,
+                },
+                {
+                    "name": "OUTPUT1",
+                    "datatype": "INT32",
+                    "shape": [1, 16],
+                    "data": self.model1_expected_output1_data_,
+                },
+            ],
+        }
+
+        # model_2 setup
+        self.model2_name = "sm_mme_model_2"
+        self.model2_url = "/opt/ml/models/987654321ihgfedcba/model"
+
+        # Output is same as input since this is an identity model
+        self.model2_input_data_ = [0, 1, 2, 3, 4, 5, 6, 7]
+
+        # ensemble model setup
+        self.model3_name = "123456789ensemble"
+        self.model3_url = "/opt/ml/models/123456789ensemble/model"
+
+    def test_sm_0_environment_variables_set(self):
+        self.assertEqual(
+            os.getenv("SAGEMAKER_MULTI_MODEL"),
+            "true",
+            "Variable SAGEMAKER_MULTI_MODEL must be set to true",
+        )
+
+    def test_sm_1_model_load(self):
+        # Load model_1
+        request_body = {"model_name": self.model1_name, "url": self.model1_url}
+        headers = {"Content-Type": "application/json"}
+        r = requests.post(self.url_mme_, data=json.dumps(request_body), headers=headers)
+        time.sleep(5)  # wait for model to load
+        self.assertEqual(
+            r.status_code,
+            200,
+            "Expected status code 200, received {}".format(r.status_code),
+        )
+
+        # Load the same model again, expect a 409
+        request_body = {"model_name": self.model1_name, "url": self.model1_url}
+        headers = {"Content-Type": "application/json"}
+        r = requests.post(self.url_mme_, data=json.dumps(request_body), headers=headers)
+        time.sleep(5)  # wait for model to load
+        self.assertEqual(
+            r.status_code,
+            409,
+            "Expected status code 409, received {}".format(r.status_code),
+        )
+
+        # Load model_2
+        request_body = {"model_name": self.model2_name, "url": self.model2_url}
+        headers = {"Content-Type": "application/json"}
+        r = requests.post(self.url_mme_, data=json.dumps(request_body), headers=headers)
+        time.sleep(5)  # wait for model to load
+        self.assertEqual(
+            r.status_code,
+            200,
+            "Expected status code 200, received {}".format(r.status_code),
+        )
+
+    def test_sm_2_model_list(self):
+        r = requests.get(self.url_mme_)
+        time.sleep(3)
+        expected_response_1 = {
+            "models": [
+                {
+                    "modelName": self.model1_name,
+                    "modelUrl": self.model1_url.rstrip("/model"),
+                },
+                {
+                    "modelName": self.model2_name,
+                    "modelUrl": self.model2_url.rstrip("/model"),
+                },
+            ]
+        }
+        expected_response_2 = {
+            "models": [
+                {
+                    "modelName": self.model2_name,
+                    "modelUrl": self.model2_url.rstrip("/model"),
+                },
+                {
+                    "modelName": self.model1_name,
+                    "modelUrl": self.model1_url.rstrip("/model"),
+                },
+            ]
+        }
+
+        # Returned list response's order is not deterministic
+        self.assertIn(
+            r.json(),
+            [expected_response_1, expected_response_2],
+            "Expected one of {}, received: {}".format(
+                [expected_response_1, expected_response_2], r.json()
+            ),
+        )
+
+    def test_sm_3_model_get(self):
+        get_url = "{}/{}".format(self.url_mme_, self.model1_name)
+        r = requests.get(get_url)
+        time.sleep(3)
+        expected_response = {
+            "modelName": self.model1_name,
+            "modelUrl": self.model1_url.rstrip("/model"),
+        }
+        self.assertEqual(
+            r.json(),
+            expected_response,
+            "Expected response: {}, received: {}".format(expected_response, r.json()),
+        )
+
+    def test_sm_4_model_invoke(self):
+        # Invoke model_1
+        inputs = []
+        outputs = []
+        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
+        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))
+
+        # Initialize the data
+        input_data = np.array(self.model1_input_data_, dtype=np.int32)
+        input_data = np.expand_dims(input_data, axis=0)
+        inputs[0].set_data_from_numpy(input_data, binary_data=False)
+        inputs[1].set_data_from_numpy(input_data, binary_data=False)
+
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=False))
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
+        request_body, _ = httpclient.InferenceServerClient.generate_request_body(
+            inputs, outputs=outputs
+        )
+
+        headers = {"Content-Type": "application/json"}
+        invoke_url = "{}/{}/invoke".format(self.url_mme_, self.model1_name)
+        r = requests.post(invoke_url, data=request_body, headers=headers)
+        r.raise_for_status()
+
+        self.assertEqual(
+            self.model1_expected_result_,
+            r.json(),
+            "Expected response : {}, received: {}".format(
+                self.model1_expected_result_, r.json()
+            ),
+        )
+
+        # Invoke model_2
+        inputs = []
+        outputs = []
+        inputs.append(
+            httpclient.InferInput(
+                "INPUT0",
+                [1, 8],
+                "FP32",
+            )
+        )
+        input_data = np.array(self.model2_input_data_, dtype=np.float32)
+        input_data = np.expand_dims(input_data, axis=0)
+        inputs[0].set_data_from_numpy(input_data, binary_data=True)
+
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=True))
+
+        (
+            request_body,
+            header_length,
+        ) = httpclient.InferenceServerClient.generate_request_body(
+            inputs, outputs=outputs
+        )
+
+        invoke_url = "{}/{}/invoke".format(self.url_mme_, self.model2_name)
+        headers = {
+            "Content-Type": "application/vnd.sagemaker-triton.binary+json;json-header-size={}".format(
+                header_length
+            )
+        }
+        r = requests.post(invoke_url, data=request_body, headers=headers)
+
+        header_length_prefix = (
+            "application/vnd.sagemaker-triton.binary+json;json-header-size="
+        )
+        header_length_str = r.headers["Content-Type"][len(header_length_prefix) :]
+        result = httpclient.InferenceServerClient.parse_response_body(
+            r._content, header_length=int(header_length_str)
+        )
+
+        # Get the inference header size so we can locate the output binary data
+        output_data = result.as_numpy("OUTPUT0")
+
+        for i in range(8):
+            self.assertEqual(
+                output_data[0][i], input_data[0][i], "Tensor Value Mismatch"
+            )
+
+    def test_sm_5_model_unload(self):
+        # Unload model_1
+        unload_url = "{}/{}".format(self.url_mme_, self.model1_name)
+        r = requests.delete(unload_url)
+        time.sleep(3)
+        self.assertEqual(
+            r.status_code,
+            200,
+            "Expected status code 200, received {}".format(r.status_code),
+        )
+
+        # Unload model_2
+        unload_url = "{}/{}".format(self.url_mme_, self.model2_name)
+        r = requests.delete(unload_url)
+        time.sleep(3)
+        self.assertEqual(
+            r.status_code,
+            200,
+            "Expected status code 200, received {}".format(r.status_code),
+        )
+
+        # Unload a non-loaded model, expect a 404
+        unload_url = "{}/sm_non_loaded_model".format(self.url_mme_)
+        r = requests.delete(unload_url)
+        time.sleep(3)
+        self.assertEqual(
+            r.status_code,
+            404,
+            "Expected status code 404, received {}".format(r.status_code),
+        )
+
+    def test_sm_6_ensemble_model(self):
+        # Load ensemble model
+        request_body = {"model_name": self.model3_name, "url": self.model3_url}
+        headers = {
+            "Content-Type": "application/json",
+            "X-Amzn-SageMaker-Target-Model": f"{self.model3_name}",
+        }
+        r = requests.post(self.url_mme_, data=json.dumps(request_body), headers=headers)
+        time.sleep(5)  # wait for model to load
+        self.assertEqual(
+            r.status_code,
+            200,
+            "Expected status code 200, received {}".format(r.status_code),
+        )
+
+        # Invoke ensemble model
+        inputs = []
+        outputs = []
+        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "FP32"))
+        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "FP32"))
+
+        # Initialize the data
+        input_data = np.array(self.model1_input_data_, dtype=np.float32)
+        input_data = np.expand_dims(input_data, axis=0)
+        inputs[0].set_data_from_numpy(input_data, binary_data=False)
+        inputs[1].set_data_from_numpy(input_data, binary_data=False)
+
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=False))
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
+        request_body, _ = httpclient.InferenceServerClient.generate_request_body(
+            inputs, outputs=outputs
+        )
+
+        headers = {"Content-Type": "application/json"}
+        invoke_url = "{}/{}/invoke".format(self.url_mme_, self.model3_name)
+        r = requests.post(invoke_url, data=request_body, headers=headers)
+        print(f"response: {r.text}")
+        r.raise_for_status()
+        self.assertEqual(
+            r.status_code,
+            200,
+            "Expected status code 200, received {}".format(r.status_code),
+        )
+
+        # Unload ensemble model
+        unload_url = "{}/{}".format(self.url_mme_, self.model3_name)
+        r = requests.delete(unload_url, headers=headers)
+        time.sleep(5)
+        self.assertEqual(
+            r.status_code,
+            200,
+            "Expected status code 200, received {}".format(r.status_code),
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_sagemaker/sagemaker_test.py b/qa/L0_sagemaker/sagemaker_test.py
new file mode 100755
index 0000000000..6e76a9f0fd
--- /dev/null
+++ b/qa/L0_sagemaker/sagemaker_test.py
@@ -0,0 +1,387 @@
+#!/usr/bin/python
+# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import json
+import os
+import sys
+import unittest
+
+import numpy as np
+import requests
+import test_util as tu
+import tritonclient.http as httpclient
+
+
+class SageMakerTest(tu.TestResultCollector):
+    def setUp(self):
+        SAGEMAKER_BIND_TO_PORT = os.getenv("SAGEMAKER_BIND_TO_PORT", "8080")
+        self.url_ = "http://localhost:{}/invocations".format(SAGEMAKER_BIND_TO_PORT)
+        self.input_data_ = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+        self.expected_output0_data_ = [
+            0,
+            2,
+            4,
+            6,
+            8,
+            10,
+            12,
+            14,
+            16,
+            18,
+            20,
+            22,
+            24,
+            26,
+            28,
+            30,
+        ]
+        self.expected_output1_data_ = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+
+        self.expected_result_ = {
+            "model_name": "sm_model",
+            "model_version": "1",
+            "outputs": [
+                {
+                    "name": "OUTPUT0",
+                    "datatype": "INT32",
+                    "shape": [1, 16],
+                    "data": self.expected_output0_data_,
+                },
+                {
+                    "name": "OUTPUT1",
+                    "datatype": "INT32",
+                    "shape": [1, 16],
+                    "data": self.expected_output1_data_,
+                },
+            ],
+        }
+
+    def test_direct_inference(self):
+        request = {
+            "inputs": [
+                {
+                    "name": "INPUT0",
+                    "datatype": "INT32",
+                    "shape": [1, 16],
+                    "data": self.input_data_,
+                },
+                {
+                    "name": "INPUT1",
+                    "datatype": "INT32",
+                    "shape": [1, 16],
+                    "data": self.input_data_,
+                },
+            ]
+        }
+        headers = {"Content-Type": "application/json"}
+        r = requests.post(self.url_, data=json.dumps(request), headers=headers)
+        r.raise_for_status()
+
+        self.assertEqual(
+            self.expected_result_,
+            r.json(),
+            "Expected response body: {}; got: {}".format(
+                self.expected_result_, r.json()
+            ),
+        )
+
+    def test_inference_client_generated_request(self):
+        inputs = []
+        outputs = []
+        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
+        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))
+
+        # Initialize the data
+        input_data = np.array(self.input_data_, dtype=np.int32)
+        input_data = np.expand_dims(input_data, axis=0)
+        inputs[0].set_data_from_numpy(input_data, binary_data=False)
+        inputs[1].set_data_from_numpy(input_data, binary_data=False)
+
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=False))
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
+        request_body, _ = httpclient.InferenceServerClient.generate_request_body(
+            inputs, outputs=outputs
+        )
+
+        headers = {"Content-Type": "application/json"}
+        r = requests.post(self.url_, data=request_body, headers=headers)
+        r.raise_for_status()
+
+        self.assertEqual(
+            self.expected_result_,
+            r.json(),
+            "Expected response body: {}; got: {}".format(
+                self.expected_result_, r.json()
+            ),
+        )
+
+    def test_inference_client_generated_request_binary(self):
+        inputs = []
+        outputs = []
+        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
+        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))
+
+        # Initialize the data
+        input_data = np.array(self.input_data_, dtype=np.int32)
+        input_data = np.expand_dims(input_data, axis=0)
+        inputs[0].set_data_from_numpy(input_data, binary_data=True)
+        inputs[1].set_data_from_numpy(input_data, binary_data=False)
+
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=False))
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
+        (
+            request_body,
+            header_length,
+        ) = httpclient.InferenceServerClient.generate_request_body(
+            inputs, outputs=outputs
+        )
+
+        headers = {
+            "Content-Type": "application/vnd.sagemaker-triton.binary+json;json-header-size={}".format(
+                header_length
+            )
+        }
+        r = requests.post(self.url_, data=request_body, headers=headers)
+        r.raise_for_status()
+
+        self.assertEqual(
+            self.expected_result_,
+            r.json(),
+            "Expected response body: {}; got: {}".format(
+                self.expected_result_, r.json()
+            ),
+        )
+
+    def test_inference_client_generated_response(self):
+        inputs = []
+        outputs = []
+        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
+        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))
+
+        # Initialize the data
+        input_data = np.array(self.input_data_, dtype=np.int32)
+        input_data = np.expand_dims(input_data, axis=0)
+        inputs[0].set_data_from_numpy(input_data, binary_data=False)
+        inputs[1].set_data_from_numpy(input_data, binary_data=False)
+
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=False))
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
+        request_body, _ = httpclient.InferenceServerClient.generate_request_body(
+            inputs, outputs=outputs
+        )
+
+        headers = {"Content-Type": "application/json"}
+        r = requests.post(self.url_, data=request_body, headers=headers)
+        r.raise_for_status()
+
+        result = httpclient.InferenceServerClient.parse_response_body(r._content)
+
+        output0_data = result.as_numpy("OUTPUT0")
+        output1_data = result.as_numpy("OUTPUT1")
+        for i in range(16):
+            self.assertEqual(output0_data[0][i], self.expected_output0_data_[i])
+            self.assertEqual(output1_data[0][i], self.expected_output1_data_[i])
+
+    def test_inference_client_generated_response_binary(self):
+        inputs = []
+        outputs = []
+        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
+        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))
+
+        # Initialize the data
+        input_data = np.array(self.input_data_, dtype=np.int32)
+        input_data = np.expand_dims(input_data, axis=0)
+        inputs[0].set_data_from_numpy(input_data, binary_data=False)
+        inputs[1].set_data_from_numpy(input_data, binary_data=False)
+
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=True))
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
+        request_body, _ = httpclient.InferenceServerClient.generate_request_body(
+            inputs, outputs=outputs
+        )
+
+        headers = {"Content-Type": "application/json"}
+        r = requests.post(self.url_, data=request_body, headers=headers)
+        r.raise_for_status()
+
+        header_length_prefix = (
+            "application/vnd.sagemaker-triton.binary+json;json-header-size="
+        )
+        header_length_str = r.headers["Content-Type"][len(header_length_prefix) :]
+        result = httpclient.InferenceServerClient.parse_response_body(
+            r._content, header_length=int(header_length_str)
+        )
+
+        output0_data = result.as_numpy("OUTPUT0")
+        output1_data = result.as_numpy("OUTPUT1")
+        for i in range(16):
+            self.assertEqual(output0_data[0][i], self.expected_output0_data_[i])
+            self.assertEqual(output1_data[0][i], self.expected_output1_data_[i])
+
+    def test_malformed_binary_header(self):
+        inputs = []
+        outputs = []
+        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
+        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))
+
+        # Initialize the data
+        input_data = np.array(self.input_data_, dtype=np.int32)
+        input_data = np.expand_dims(input_data, axis=0)
+        inputs[0].set_data_from_numpy(input_data, binary_data=True)
+        inputs[1].set_data_from_numpy(input_data, binary_data=False)
+
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=False))
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
+        (
+            request_body,
+            header_length,
+        ) = httpclient.InferenceServerClient.generate_request_body(
+            inputs, outputs=outputs
+        )
+
+        headers = {
+            "Content-Type": "additional-string/application/vnd.sagemaker-triton.binary+json;json-header-size={}".format(
+                header_length
+            )
+        }
+        r = requests.post(self.url_, data=request_body, headers=headers)
+        self.assertEqual(
+            400,
+            r.status_code,
+            "Expected error code {} returned for the request; got: {}".format(
+                400, r.status_code
+            ),
+        )
+
+    def test_malformed_binary_header_not_number(self):
+        inputs = []
+        outputs = []
+        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
+        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))
+
+        # Initialize the data
+        input_data = np.array(self.input_data_, dtype=np.int32)
+        input_data = np.expand_dims(input_data, axis=0)
+        inputs[0].set_data_from_numpy(input_data, binary_data=True)
+        inputs[1].set_data_from_numpy(input_data, binary_data=False)
+
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=False))
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
+        (
+            request_body,
+            header_length,
+        ) = httpclient.InferenceServerClient.generate_request_body(
+            inputs, outputs=outputs
+        )
+
+        headers = {
+            "Content-Type": "application/vnd.sagemaker-triton.binary+json;json-header-size=additional-string{}".format(
+                header_length
+            )
+        }
+        r = requests.post(self.url_, data=request_body, headers=headers)
+        self.assertEqual(
+            400,
+            r.status_code,
+            "Expected error code {} returned for the request; got: {}".format(
+                400, r.status_code
+            ),
+        )
+
+    def test_malformed_binary_header_negative_number(self):
+        inputs = []
+        outputs = []
+        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
+        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))
+
+        # Initialize the data
+        input_data = np.array(self.input_data_, dtype=np.int32)
+        input_data = np.expand_dims(input_data, axis=0)
+        inputs[0].set_data_from_numpy(input_data, binary_data=True)
+        inputs[1].set_data_from_numpy(input_data, binary_data=False)
+
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=False))
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
+        (
+            request_body,
+            header_length,
+        ) = httpclient.InferenceServerClient.generate_request_body(
+            inputs, outputs=outputs
+        )
+
+        headers = {
+            "Content-Type": "application/vnd.sagemaker-triton.binary+json;json-header-size=-123"
+        }
+        r = requests.post(self.url_, data=request_body, headers=headers)
+        self.assertEqual(
+            400,
+            r.status_code,
+            "Expected error code {} returned for the request; got: {}".format(
+                400, r.status_code
+            ),
+        )
+
+    def test_malformed_binary_header_large_number(self):
+        inputs = []
+        outputs = []
+        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
+        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))
+
+        # Initialize the data
+        input_data = np.array(self.input_data_, dtype=np.int32)
+        input_data = np.expand_dims(input_data, axis=0)
+        inputs[0].set_data_from_numpy(input_data, binary_data=True)
+        inputs[1].set_data_from_numpy(input_data, binary_data=False)
+
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=False))
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
+        (
+            request_body,
+            header_length,
+        ) = httpclient.InferenceServerClient.generate_request_body(
+            inputs, outputs=outputs
+        )
+
+        headers = {
+            "Content-Type": "application/vnd.sagemaker-triton.binary+json;json-header-size=12345"
+        }
+        r = requests.post(self.url_, data=request_body, headers=headers)
+        self.assertEqual(
+            400,
+            r.status_code,
+            "Expected error code {} returned for the request; got: {}".format(
+                400, r.status_code
+            ),
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_sagemaker/test.sh b/qa/L0_sagemaker/test.sh
new file mode 100755
index 0000000000..b5bd07c519
--- /dev/null
+++ b/qa/L0_sagemaker/test.sh
@@ -0,0 +1,465 @@
+#!/bin/bash
+# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+TEST_RESULT_FILE='test_results.txt'
+# Make sure we can safety use symbolic link for SageMaker serve script
+if [ -d "/opt/ml/model" ] || [ -L "/opt/ml/model" ]; then
+    echo -e "Default SageMaker model path must not be used for testing"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+RET=0
+
+rm -rf models
+rm -f *.log
+rm -f *.out
+
+SAGEMAKER_TEST=sagemaker_test.py
+SAGEMAKER_MULTI_MODEL_TEST=sagemaker_multi_model_test.py
+MULTI_MODEL_UNIT_TEST_COUNT=7
+UNIT_TEST_COUNT=9
+CLIENT_LOG="./client.log"
+
+DATADIR=/data/inferenceserver/${REPO_VERSION}
+ENSEMBLEDIR=/data/inferenceserver/${REPO_VERSION}/qa_ensemble_model_repository/qa_model_repository
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_LOG="./server.log"
+# Link model repository to "/opt/ml/model"
+mkdir /opt/ml/
+ln -s `pwd`/models /opt/ml/model
+source ../common/util.sh
+
+mkdir models && \
+    cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32 models/sm_model && \
+    rm -r models/sm_model/2 && rm -r models/sm_model/3 && \
+    sed -i "s/onnx_int32_int32_int32/sm_model/" models/sm_model/config.pbtxt
+
+# Use SageMaker's ping endpoint to check server status
+# Wait until server health endpoint shows ready. Sets WAIT_RET to 0 on
+# success, 1 on failure
+function sagemaker_wait_for_server_ready() {
+    local spid="$1"; shift
+    local wait_time_secs="${1:-30}"; shift
+
+    WAIT_RET=0
+
+    ping_address="localhost:8080/ping"
+    if [ -n "$SAGEMAKER_BIND_TO_PORT" ]; then
+        ping_address="localhost:${SAGEMAKER_BIND_TO_PORT}/ping"
+    fi
+
+    local wait_secs=$wait_time_secs
+    until test $wait_secs -eq 0 ; do
+        if ! kill -0 $spid; then
+            echo "=== Server not running."
+            WAIT_RET=1
+            return
+        fi
+
+        sleep 1;
+
+        set +e
+        code=`curl -s -w %{http_code} $ping_address`
+        set -e
+        if [ "$code" == "200" ]; then
+            return
+        fi
+
+        ((wait_secs--));
+    done
+
+    echo "=== Timeout $wait_time_secs secs. Server not ready."
+    WAIT_RET=1
+}
+
+# Start server with 'serve' script
+export SAGEMAKER_TRITON_DEFAULT_MODEL_NAME=sm_model
+serve > $SERVER_LOG 2>&1 &
+SERVE_PID=$!
+# Obtain Triton PID in such way as $! will return the script PID
+sleep 1
+SERVER_PID=`ps | grep tritonserver | awk '{ printf $1 }'`
+sagemaker_wait_for_server_ready $SERVER_PID 10
+if [ "$WAIT_RET" != "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    kill $SERVER_PID || true
+    cat $SERVER_LOG
+    exit 1
+fi
+
+# Ping
+set +e
+code=`curl -s -w %{http_code} -o ./ping.out localhost:8080/ping`
+set -e
+if [ "$code" != "200" ]; then
+    cat ./ping.out
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+# Inference in default setting
+set +e
+python $SAGEMAKER_TEST SageMakerTest >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    cat $CLIENT_LOG
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $UNIT_TEST_COUNT
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVE_PID
+
+# Change SageMaker port
+export SAGEMAKER_BIND_TO_PORT=8000
+serve > $SERVER_LOG 2>&1 &
+SERVE_PID=$!
+# Obtain Triton PID in such way as $! will return the script PID
+sleep 1
+SERVER_PID=`ps | grep tritonserver | awk '{ printf $1 }'`
+sagemaker_wait_for_server_ready $SERVER_PID 10
+if [ "$WAIT_RET" != "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    kill $SERVER_PID || true
+    cat $SERVER_LOG
+    exit 1
+fi
+
+# Inference with the new port
+set +e
+python $SAGEMAKER_TEST SageMakerTest >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    cat $CLIENT_LOG
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $UNIT_TEST_COUNT
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+unset SAGEMAKER_BIND_TO_PORT
+
+kill $SERVER_PID
+wait $SERVE_PID
+
+# Set SageMaker safe port range
+export SAGEMAKER_SAFE_PORT_RANGE="8081-9000"
+
+# Start Triton in a similar way to 'serve' script, as 'serve' script can't
+# be used to satisfy the setting under test
+SAGEMAKER_ARGS="--model-repository=/opt/ml/model"
+if [ -n "$SAGEMAKER_BIND_TO_PORT" ]; then
+    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --sagemaker-port=${SAGEMAKER_BIND_TO_PORT}"
+fi
+if [ -n "$SAGEMAKER_SAFE_PORT_RANGE" ]; then
+    SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --sagemaker-safe-port-range=${SAGEMAKER_SAFE_PORT_RANGE}"
+fi
+
+# Enable HTTP endpoint and expect server fail to start (default port 8000 < 8081)
+SERVER_ARGS="--allow-sagemaker=true --allow-grpc false --allow-http true --allow-metrics false \
+             --model-control-mode=explicit --load-model=${SAGEMAKER_TRITON_DEFAULT_MODEL_NAME} \
+             $SAGEMAKER_ARGS"
+run_server_nowait
+sagemaker_wait_for_server_ready $SERVER_PID 10
+if [ "$WAIT_RET" == "0" ]; then
+    echo -e "\n***\n*** Expect failed to start $SERVER\n***"
+    kill $SERVER_PID || true
+    cat $SERVER_LOG
+    RET=1
+else
+    grep "The server cannot listen to HTTP requests at port" $SERVER_LOG
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected error on using disallowed port\n***"
+        RET=1
+    fi
+fi
+
+# Run 'serve' script and expect SageMaker endpoint on default port 8080 (< 8081)
+# is working
+serve > $SERVER_LOG 2>&1 &
+SERVE_PID=$!
+# Obtain Triton PID in such way as $! will return the script PID
+sleep 1
+SERVER_PID=`ps | grep tritonserver | awk '{ printf $1 }'`
+
+sagemaker_wait_for_server_ready $SERVER_PID 10
+if [ "$WAIT_RET" != "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    kill $SERVER_PID || true
+    cat $SERVER_LOG
+    exit 1
+fi
+
+# Inference with the new port
+set +e
+python $SAGEMAKER_TEST SageMakerTest >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    cat $CLIENT_LOG
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $UNIT_TEST_COUNT
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+unset SAGEMAKER_SAFE_PORT_RANGE
+unset SAGEMAKER_TRITON_DEFAULT_MODEL_NAME
+
+kill $SERVER_PID
+wait $SERVE_PID
+
+# Test serve with incorrect model name
+export SAGEMAKER_TRITON_DEFAULT_MODEL_NAME=incorrect_model_name
+serve > $SERVER_LOG 2>&1 &
+SERVE_PID=$!
+# Obtain Triton PID in such way as $! will return the script PID
+sleep 1
+SERVER_PID=`ps | grep tritonserver | awk '{ printf $1 }'`
+if [ -n "$SERVER_PID" ]; then
+    echo -e "\n***\n*** Expect failed to start $SERVER\n***"
+    kill $SERVER_PID || true
+    cat $SERVER_LOG
+    RET=1
+else
+    grep "ERROR: Directory with provided SAGEMAKER_TRITON_DEFAULT_MODEL_NAME ${SAGEMAKER_TRITON_DEFAULT_MODEL_NAME} does not exist" $SERVER_LOG
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected error on model name and dir name mismatch\n***"
+        RET=1
+    fi
+fi
+
+unset SAGEMAKER_TRITON_DEFAULT_MODEL_NAME
+
+# Test serve with SAGEMAKER_TRITON_DEFAULT_MODEL_NAME unset, but containing single model directory
+serve > $SERVER_LOG 2>&1 &
+SERVE_PID=$!
+# Obtain Triton PID in such way as $! will return the script PID
+sleep 1
+SERVER_PID=`ps | grep tritonserver | awk '{ printf $1 }'`
+sagemaker_wait_for_server_ready $SERVER_PID 10
+if [ "$WAIT_RET" != "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    kill $SERVER_PID || true
+    cat $SERVER_LOG
+    exit 1
+else
+    grep "WARNING: No SAGEMAKER_TRITON_DEFAULT_MODEL_NAME provided" $SERVER_LOG
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected server to start with only existing directory as model.\n***"
+	RET=1
+    fi
+fi
+
+kill $SERVER_PID
+wait $SERVE_PID
+
+# Test unspecified SAGEMAKER_TRITON_DEFAULT_MODEL_NAME for ecs/eks case
+SERVER_ARGS="--allow-sagemaker=true --allow-grpc false --allow-http false --allow-metrics false \
+             --model-repository `pwd`/models --model-control-mode=explicit --exit-on-error=false"
+run_server_nowait
+sleep 5
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+code=`curl -X POST -s -w %{http_code} -o ./invoke.out localhost:8080/invocations --data-raw 'dummy'`
+set -e
+if [ "$code" == "200" ]; then
+    cat ./invoke.out
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    grep "Request for unknown model: 'unspecified_SAGEMAKER_TRITON_DEFAULT_MODEL_NAME' is not found" ./invoke.out
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected inference to fail with unspecified model error.\n***"
+    fi
+fi
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# TODO: Test ensemble backend
+
+# Run server with invalid model and exit-on-error=false
+rm models/sm_model/1/*
+SERVER_ARGS="--allow-sagemaker=true --allow-grpc false --allow-http false --allow-metrics false \
+             --model-repository `pwd`/models --model-control-mode=explicit --load-model=sm_model \
+             --exit-on-error=false"
+run_server_nowait
+sleep 5
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+# Ping and expect error code in SME mode.
+set +e
+code=`curl -s -w %{http_code} -o ./ping.out localhost:8080/ping`
+set -e
+if [ "$code" == "200" ]; then
+    cat ./ping.out
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# MME begin
+# Prepare model repository
+
+ln -s `pwd`/models /opt/ml/models
+# Model path will be of the form /opt/ml/models/<hash>/model
+MODEL1_PATH="models/123456789abcdefghi/model"
+MODEL2_PATH="models/987654321ihgfedcba/model"
+mkdir -p "${MODEL1_PATH}"
+mkdir -p "${MODEL2_PATH}"
+
+cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32/* ${MODEL1_PATH} && \
+    rm -r ${MODEL1_PATH}/2 && rm -r ${MODEL1_PATH}/3 && \
+    sed -i "s/onnx_int32_int32_int32/sm_mme_model_1/" ${MODEL1_PATH}/config.pbtxt
+
+cp -r $DATADIR/qa_identity_model_repository/onnx_zero_1_float32/* ${MODEL2_PATH} && \
+    sed -i "s/onnx_zero_1_float32/sm_mme_model_2/" ${MODEL2_PATH}/config.pbtxt
+
+# Ensemble model
+ENSEMBLE_MODEL_PATH="models/123456789ensemble/model"
+mkdir -p "${ENSEMBLE_MODEL_PATH}"
+
+model_name=python_float32_float32_float32
+
+mkdir -p ${ENSEMBLE_MODEL_PATH}/${model_name}/1 && \
+cp ../python_models/add_sub/model.py ${ENSEMBLE_MODEL_PATH}/${model_name}/1/. && \
+cp ../python_models/add_sub/config.pbtxt ${ENSEMBLE_MODEL_PATH}/${model_name}/.
+(cd ${ENSEMBLE_MODEL_PATH}/${model_name} && \
+                    sed -i "s/label_filename:.*//" config.pbtxt && \
+                    echo "max_batch_size: 64" >> config.pbtxt)
+
+# Ensemble part
+mkdir -p ${ENSEMBLE_MODEL_PATH}/fan_${model_name}/1 && \
+            cp ../python_models/add_sub/model.py ${ENSEMBLE_MODEL_PATH}/fan_${model_name}/1/. && \
+            cp ../python_models/fan_add_sub/config.pbtxt ${ENSEMBLE_MODEL_PATH}/fan_${model_name}/. && \
+            (cd ${ENSEMBLE_MODEL_PATH}/fan_${model_name} && \
+                    sed -i "s/label_filename:.*//" config.pbtxt && \
+                    sed -i "s/model_name: \"ENSEMBLE_MODEL_NAME\"/model_name: \"${model_name}\"/" config.pbtxt && \
+                    sed -i "0,/name:.*/{s/name:.*/name: \"fan_${model_name}\"/}" config.pbtxt && \
+                    echo "max_batch_size: 64" >> config.pbtxt)
+
+# # custom float32 component of ensemble
+cp -r $ENSEMBLEDIR/nop_TYPE_FP32_-1 ${ENSEMBLE_MODEL_PATH}/. && \
+    mkdir -p ${ENSEMBLE_MODEL_PATH}/nop_TYPE_FP32_-1/1
+
+# Start server with 'serve' script
+export SAGEMAKER_MULTI_MODEL=true
+export SAGEMAKER_TRITON_LOG_VERBOSE=true
+
+serve > $SERVER_LOG 2>&1 &
+SERVE_PID=$!
+# Obtain Triton PID in such way as $! will return the script PID
+sleep 1
+SERVER_PID=`ps | grep tritonserver | awk '{ printf $1 }'`
+sagemaker_wait_for_server_ready $SERVER_PID 10
+if [ "$WAIT_RET" != "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    kill $SERVER_PID || true
+    cat $SERVER_LOG
+    exit 1
+fi
+
+# API tests in default setting
+set +e
+python $SAGEMAKER_MULTI_MODEL_TEST SageMakerMultiModelTest >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    cat $CLIENT_LOG
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $MULTI_MODEL_UNIT_TEST_COUNT
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+unset SAGEMAKER_MULTI_MODEL
+
+unlink /opt/ml/models
+rm -rf /opt/ml/models
+
+kill $SERVER_PID
+wait $SERVE_PID
+# MME end
+
+unlink /opt/ml/model
+rm -rf /opt/ml/model
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_savedmodel_shape/saved_model_shape_test.py b/qa/L0_savedmodel_shape/saved_model_shape_test.py
new file mode 100755
index 0000000000..b5ae13a680
--- /dev/null
+++ b/qa/L0_savedmodel_shape/saved_model_shape_test.py
@@ -0,0 +1,228 @@
+#!/usr/bin/env python3
+
+# Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import unittest
+
+import infer_util as iu
+import numpy as np
+import test_util as tu
+
+np_dtype_string = np.dtype(object)
+
+
+class SavedModelShapeTest(tu.TestResultCollector):
+    def _full_exact(
+        self, input_dtype, output0_dtype, output1_dtype, output0_raw, output1_raw, swap
+    ):
+        def _infer_exact_helper(
+            tester,
+            pf,
+            tensor_shape,
+            batch_size,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            output0_raw=True,
+            output1_raw=True,
+            model_version=None,
+            swap=False,
+            outputs=("OUTPUT0", "OUTPUT1"),
+            use_http=True,
+            use_grpc=True,
+            skip_request_id_check=False,
+            use_streaming=True,
+            correlation_id=0,
+        ):
+            for bs in (1, batch_size):
+                # model that does not support batching
+                if bs == 1:
+                    iu.infer_exact(
+                        tester,
+                        "savedmodel_nobatch",
+                        tensor_shape,
+                        bs,
+                        input_dtype,
+                        output0_dtype,
+                        output1_dtype,
+                        output0_raw=output0_raw,
+                        output1_raw=output1_raw,
+                        model_version=model_version,
+                        swap=swap,
+                        outputs=outputs,
+                        use_http=use_http,
+                        use_grpc=use_grpc,
+                        skip_request_id_check=skip_request_id_check,
+                        use_streaming=use_streaming,
+                        correlation_id=correlation_id,
+                    )
+                # model that supports batching
+                iu.infer_exact(
+                    tester,
+                    "savedmodel",
+                    (bs,) + tensor_shape,
+                    bs,
+                    input_dtype,
+                    output0_dtype,
+                    output1_dtype,
+                    output0_raw=output0_raw,
+                    output1_raw=output1_raw,
+                    model_version=model_version,
+                    swap=swap,
+                    outputs=outputs,
+                    use_http=use_http,
+                    use_grpc=use_grpc,
+                    skip_request_id_check=skip_request_id_check,
+                    use_streaming=use_streaming,
+                    correlation_id=correlation_id,
+                )
+
+        input_size = 16
+
+        if tu.validate_for_tf_model(
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            (input_size,),
+            (input_size,),
+            (input_size,),
+        ):
+            _infer_exact_helper(
+                self,
+                "savedmodel",
+                (input_size,),
+                8,
+                input_dtype,
+                output0_dtype,
+                output1_dtype,
+                output0_raw=output0_raw,
+                output1_raw=output1_raw,
+                swap=swap,
+            )
+
+    def test_raw_bbb(self):
+        self._full_exact(
+            np.int8, np.int8, np.int8, output0_raw=True, output1_raw=True, swap=True
+        )
+
+    def test_raw_sss(self):
+        self._full_exact(
+            np.int16, np.int16, np.int16, output0_raw=True, output1_raw=True, swap=True
+        )
+
+    def test_raw_iii(self):
+        self._full_exact(
+            np.int32, np.int32, np.int32, output0_raw=True, output1_raw=True, swap=True
+        )
+
+    def test_raw_lll(self):
+        self._full_exact(
+            np.int64, np.int64, np.int64, output0_raw=True, output1_raw=True, swap=False
+        )
+
+    def test_raw_hhh(self):
+        self._full_exact(
+            np.float16,
+            np.float16,
+            np.float16,
+            output0_raw=True,
+            output1_raw=True,
+            swap=False,
+        )
+
+    def test_raw_fff(self):
+        self._full_exact(
+            np.float32,
+            np.float32,
+            np.float32,
+            output0_raw=True,
+            output1_raw=True,
+            swap=True,
+        )
+
+    def test_raw_hff(self):
+        self._full_exact(
+            np.float16,
+            np.float32,
+            np.float32,
+            output0_raw=True,
+            output1_raw=True,
+            swap=False,
+        )
+
+    def test_raw_bii(self):
+        self._full_exact(
+            np.int8, np.int32, np.int32, output0_raw=True, output1_raw=True, swap=False
+        )
+
+    def test_raw_ibb(self):
+        self._full_exact(
+            np.int32, np.int8, np.int8, output0_raw=True, output1_raw=True, swap=False
+        )
+
+    def test_raw_ibs(self):
+        self._full_exact(
+            np.int32, np.int8, np.int16, output0_raw=True, output1_raw=True, swap=False
+        )
+
+    def test_raw_iff(self):
+        self._full_exact(
+            np.int32,
+            np.float32,
+            np.float32,
+            output0_raw=True,
+            output1_raw=True,
+            swap=False,
+        )
+
+    def test_raw_fii(self):
+        self._full_exact(
+            np.float32,
+            np.int32,
+            np.int32,
+            output0_raw=True,
+            output1_raw=True,
+            swap=False,
+        )
+
+    def test_raw_ihs(self):
+        self._full_exact(
+            np.int32,
+            np.float16,
+            np.int16,
+            output0_raw=True,
+            output1_raw=True,
+            swap=False,
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_savedmodel_shape/test.sh b/qa/L0_savedmodel_shape/test.sh
new file mode 100755
index 0000000000..e059a5bf0b
--- /dev/null
+++ b/qa/L0_savedmodel_shape/test.sh
@@ -0,0 +1,102 @@
+#!/bin/bash
+# Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+TEST_RESULT_FILE='test_results.txt'
+CLIENT_LOG_BASE="./client_saved_model_shape"
+INFER_TEST=saved_model_shape_test.py
+EXPECTED_NUM_TESTS="13"
+
+DATADIR=`pwd`/models
+
+SERVER=/opt/tritonserver/bin/tritonserver
+# Allow more time to exit. Ensemble brings in too many models
+SERVER_ARGS="--model-repository=$DATADIR --exit-timeout-secs=120"
+SERVER_LOG_BASE="./server_saved_model_shape"
+source ../common/util.sh
+
+rm -f $SERVER_LOG_BASE* $CLIENT_LOG_BASE*
+
+RET=0
+
+SERVER_LOG=$SERVER_LOG_BASE.${TARGET}.log
+CLIENT_LOG=$CLIENT_LOG_BASE.${TARGET}.log
+
+rm -fr models && \
+    cp -r /data/inferenceserver/${REPO_VERSION}/qa_noshape_model_repository models
+
+create_nop_version_dir `pwd`/models
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+# python unittest seems to swallow ImportError and still return 0
+# exit code. So need to explicitly check CLIENT_LOG to make sure
+# we see some running tests
+python $INFER_TEST >$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_scalar_io/scalar_test.py b/qa/L0_scalar_io/scalar_test.py
new file mode 100755
index 0000000000..16aa1136ca
--- /dev/null
+++ b/qa/L0_scalar_io/scalar_test.py
@@ -0,0 +1,71 @@
+#!/usr/bin/env python3
+
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import os
+import unittest
+
+import numpy as np
+import test_util as tu
+import tritonclient.grpc as grpcclient
+from tritonclient.utils import np_to_triton_dtype
+
+
+class ScalarIOTest(tu.TestResultCollector):
+    def setUp(self):
+        self._client = grpcclient.InferenceServerClient(url="localhost:8001")
+        self._backends = os.environ.get("BACKENDS", "onnx").split(",")
+
+    def _send_request_and_verify_result(self, input, model_name):
+        inputs = []
+        inputs.append(
+            grpcclient.InferInput("INPUT", input.shape, np_to_triton_dtype(input.dtype))
+        )
+        inputs[-1].set_data_from_numpy(input)
+        result = self._client.infer(inputs=inputs, model_name=model_name)
+        output = result.as_numpy("OUTPUT")
+        np.testing.assert_allclose(input, output)
+
+    def test_scalar_io(self):
+        for backend in self._backends:
+            model_name = f"{backend}_scalar_1dim"
+            self._send_request_and_verify_result(
+                np.asarray([1], dtype=np.float32), model_name
+            )
+
+            model_name = f"{backend}_scalar_2dim"
+            self._send_request_and_verify_result(
+                np.asarray([[1]], dtype=np.float32), model_name
+            )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_scalar_io/test.sh b/qa/L0_scalar_io/test.sh
new file mode 100755
index 0000000000..ebb9a48d95
--- /dev/null
+++ b/qa/L0_scalar_io/test.sh
@@ -0,0 +1,93 @@
+#!/bin/bash
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+RET=0
+TEST_RESULT_FILE='test_results.txt'
+BACKENDS="onnx"
+export CUDA_VISIBLE_DEVICES=0
+DATADIR=/data/inferenceserver/${REPO_VERSION}
+
+rm -rf models
+mkdir models
+cp -r $DATADIR/qa_scalar_models/* models/
+
+CLIENT_LOG="./client.log"
+SCALAR_TEST=scalar_test.py
+source ../common/util.sh
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=`pwd`/models"
+SERVER_LOG="./inference_server.log"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+python3 $SCALAR_TEST >> $CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** scalar_test.py FAILED. \n***"
+    cat $CLIENT_LOG
+    cat $SERVER_LOG
+    RET=1
+fi
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Make sure the server fails loading the model if it has a dimension higher than
+# 1
+sed -i "s/dims.*/dims:\[2\]/g" models/onnx_scalar_1dim/config.pbtxt
+run_server
+if [ "$SERVER_PID" != "0" ]; then
+    echo -e "\n***\n*** Expected the server to fail loading \n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_sdk/grpc_test.cc b/qa/L0_sdk/grpc_test.cc
new file mode 100644
index 0000000000..3f45e4ae25
--- /dev/null
+++ b/qa/L0_sdk/grpc_test.cc
@@ -0,0 +1,57 @@
+// Copyright 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <iostream>
+
+#include "grpc_client.h"
+
+namespace tc = triton::client;
+
+int
+main(int argc, char* argv[])
+{
+  std::unique_ptr<tc::InferenceServerGrpcClient> client;
+  // Add a symbol from protobufs to verify correct linking
+  inference::ModelConfigResponse model_config;
+  tc::Error err =
+      tc::InferenceServerGrpcClient::Create(&client, "localhost:8001");
+  if (!err.IsOk()) {
+    std::cerr << "InferenceServerGrpcClient::Create failed: " << err.Message()
+              << std::endl;
+    return 1;
+  }
+
+  // No server is running so expect liveness call to fail
+  bool live;
+  err = client->IsServerLive(&live);
+  if (!err.IsOk()) {
+    std::cerr << "InferenceServerGrpcClient::IsServerLive expected fail: "
+              << err.Message() << std::endl;
+    return 0;
+  }
+
+  return 1;
+}
diff --git a/qa/L0_sdk/http_test.cc b/qa/L0_sdk/http_test.cc
new file mode 100644
index 0000000000..0b2a4da597
--- /dev/null
+++ b/qa/L0_sdk/http_test.cc
@@ -0,0 +1,55 @@
+// Copyright 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <iostream>
+
+#include "http_client.h"
+
+namespace tc = triton::client;
+
+int
+main(int argc, char* argv[])
+{
+  std::unique_ptr<tc::InferenceServerHttpClient> client;
+  tc::Error err =
+      tc::InferenceServerHttpClient::Create(&client, "localhost:8000");
+  if (!err.IsOk()) {
+    std::cerr << "InferenceServerHttpClient::Create failed: " << err.Message()
+              << std::endl;
+    return 1;
+  }
+
+  // No server is running so expect liveness call to fail
+  bool live;
+  err = client->IsServerLive(&live);
+  if (!err.IsOk()) {
+    std::cerr << "InferenceServerHttpClient::IsServerLive expected fail: "
+              << err.Message() << std::endl;
+    return 0;
+  }
+
+  return 1;
+}
diff --git a/qa/L0_sdk/test.sh b/qa/L0_sdk/test.sh
new file mode 100755
index 0000000000..20baf31639
--- /dev/null
+++ b/qa/L0_sdk/test.sh
@@ -0,0 +1,217 @@
+#!/bin/bash
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# Install the tar file
+rm -fr triton_client
+mkdir triton_client
+(cd triton_client && tar xzvf /workspace/*.tar.gz)
+
+set +e
+
+RET=0
+
+# Check image_client and perf_client
+if [[ ! -x "triton_client/bin/image_client" ]]; then
+    echo -e "*** image_client executable not present\n"
+    RET=1
+fi
+if [[ ! -x "triton_client/bin/perf_analyzer" ]]; then
+    echo -e "*** perf_analyzer executable is not present\n"
+    RET=1
+fi
+if [[ ! -x "triton_client/bin/perf_client" ]]; then
+    echo -e "*** perf_client link is not present\n"
+    RET=1
+fi
+
+# Check static libraries
+for l in libgrpcclient.so libgrpcclient_static.a libhttpclient.so libhttpclient_static.a; do
+    if [[ ! -f "triton_client/lib/$l" ]]; then
+        echo -e "*** library $l not present\n"
+        RET=1
+    fi
+done
+
+client_lib=$(pwd)/triton_client/lib
+client_inc=$(pwd)/triton_client/include
+
+# Test linking against the shared library
+g++ grpc_test.cc -o grpc_test -I$client_inc -L$client_lib -lgrpcclient
+
+if [ $? -eq 0 ]; then
+    if [[ ! -x "./grpc_test" ]]; then
+        echo -e "*** grpc_test executable not present\n"
+        RET=1
+    else
+        ./grpc_test
+        if [ $? -eq 0 ]; then
+            echo -e "\n***\n*** grpc_test exited with 0 PASSED\n***"
+        else
+            echo -e "\n***\n*** grpc_test exited with non-zero FAILED\n***"
+            RET=1
+        fi
+    fi
+else
+    echo -e "\n***\n*** Client headers build FAILED\n***"
+    RET=1
+fi
+
+#
+# Test linking against static library
+#
+
+grpc_static_libs="-Wl,--start-group $client_lib/*.a -Wl,--end-group"
+
+g++ grpc_test.cc $grpc_static_libs -o grpc_test_static -I$client_inc -lz -lssl -lcrypto -lpthread
+
+if [ $? -eq 0 ]; then
+    if [[ ! -x "./grpc_test_static" ]]; then
+        echo -e "*** grpc_test_static executable not present\n"
+        RET=1
+    else
+        ./grpc_test_static
+        if [ $? -eq 0 ]; then
+            echo -e "\n***\n*** grpc_test_static exited with 0 PASSED\n***"
+        else
+            echo -e "\n***\n*** grpc_test_static exited with non-zero FAILED\n***"
+            RET=1
+        fi
+    fi
+else
+    echo -e "\n***\n*** Client headers build FAILED\n***"
+    RET=1
+fi
+
+#
+# Test a simple app using Triton HTTP API
+#
+
+# Test linking against the shared library
+g++ http_test.cc -o http_test -I$client_inc -L$client_lib -lhttpclient
+
+if [ $? -eq 0 ]; then
+    if [[ ! -x "./http_test" ]]; then
+        echo -e "*** http_test executable not present\n"
+        RET=1
+    else
+        ./http_test
+        if [ $? -eq 0 ]; then
+            echo -e "\n***\n*** http_test exited with 0 PASSED\n***"
+        else
+            echo -e "\n***\n*** http_test exited with non-zero FAILED\n***"
+            RET=1
+        fi
+    fi
+else
+    echo -e "\n***\n*** Client headers build FAILED\n***"
+    RET=1
+fi
+
+g++ http_test.cc $client_lib/libhttpclient_static.a $client_lib/libcurl.a -o http_test_static \
+  -I$client_inc -lz -lssl -lcrypto -lpthread
+
+if [ $? -eq 0 ]; then
+    if [[ ! -x "./http_test_static" ]]; then
+        echo -e "*** http_test_static executable not present\n"
+        RET=1
+    else
+        ./http_test_static
+        if [ $? -eq 0 ]; then
+            echo -e "\n***\n*** http_test_static exited with 0 PASSED\n***"
+        else
+            echo -e "\n***\n*** http_test_static exited with non-zero FAILED\n***"
+            RET=1
+        fi
+    fi
+else
+    echo -e "\n***\n*** Client headers build FAILED\n***"
+    RET=1
+fi
+
+# Check wheels, note that even TRITON_VERSION is passed as version field for
+# wheel generation. The version number will be normalized by setuptools, so
+# we need to replace the text here as well to match the normalized version.
+WHLVERSION=`cat /workspace/TRITON_VERSION | sed 's/dev/\.dev0/'`
+if [[ "aarch64" != $(uname -m) ]] ; then
+    WHLS="tritonclient-${WHLVERSION}-py3-none-any.whl \
+          tritonclient-${WHLVERSION}-py3-none-manylinux1_x86_64.whl"
+else
+    WHLS="tritonclient-${WHLVERSION}-py3-none-any.whl \
+          tritonclient-${WHLVERSION}-py3-none-manylinux2014_aarch64.whl"
+fi
+for l in $WHLS; do
+    if [[ ! -f "triton_client/python/$l" ]]; then
+        echo -e "*** wheel $l not present\n"
+        echo -e "*** available wheels in triton_client/python\n"
+        ls -ltr triton_client/python
+        RET=1
+    fi
+done
+
+# Check wheel installation
+python -c """import tritonclient; import tritonclient.grpc; import tritonclient.http; \
+          import tritonclient.utils; import tritonclient.grpc.model_config_pb2; \
+          import tritonclient.grpc.service_pb2; import tritonclient.grpc.service_pb2_grpc; \
+          import tritonclient.utils.cuda_shared_memory; import tritonclient.utils.shared_memory"""
+RET=$(($RET+$?))
+
+EXECUTABLES="perf_analyzer perf_client"
+for l in $EXECUTABLES; do
+  if [ $(which -a $l | grep "/usr/local/bin/$l" | wc -l) -ne 1 ]; then
+    which -a $l
+    echo -e "*** $l executable not installed by tritonclient wheel\n"
+    RET=1
+  fi
+done
+
+# Check java client
+if [[ ! -e "triton_client/java/java-api-0.0.1.jar" ]]; then
+    echo -e "*** java-api-0.0.1.jar not present\n"
+    RET=1
+fi
+if [[ ! -e "triton_client/java/examples/MemoryGrowthTest.jar" ]]; then
+    echo -e "*** MemoryGrowthTest.jar not present\n"
+    RET=1
+fi
+if [[ ! -e "triton_client/java/examples/SimpleInferClient.jar" ]]; then
+    echo -e "*** SimpleInferClient.jar not present\n"
+    RET=1
+fi
+if [[ ! -e "triton_client/java/examples/SimpleInferPerf.jar" ]]; then
+    echo -e "*** SimpleInferPerf.jar not present\n"
+    RET=1
+fi
+
+set -e
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+else
+  echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_secure_grpc/test.sh b/qa/L0_secure_grpc/test.sh
new file mode 100755
index 0000000000..784613c6a2
--- /dev/null
+++ b/qa/L0_secure_grpc/test.sh
@@ -0,0 +1,201 @@
+#!/bin/bash
+# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+RET=0
+
+TEST_CLIENT_AIO_PY=../clients/simple_grpc_aio_infer_client.py
+TEST_CLIENT_PY=../clients/simple_grpc_infer_client.py
+TEST_CLIENT=../clients/simple_grpc_infer_client
+
+CLIENT_LOG=`pwd`/client.log
+DATADIR=`pwd`/models
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_BASE_ARGS="--model-repository=$DATADIR --grpc-use-ssl=1 --grpc-server-cert server.crt --grpc-server-key server.key --grpc-root-cert ca.crt"
+source ../common/util.sh
+
+rm -fr *.log *.log.*
+
+# Generate valid CA
+openssl genrsa -passout pass:1234 -des3 -out ca.key 4096
+openssl req -passin pass:1234 -new -x509 -days 365 -key ca.key -out ca.crt -subj  "/C=SP/ST=Spain/L=Valdepenias/O=Test/OU=Test/CN=Root CA"
+
+# Generate valid Server Key/Cert
+openssl genrsa -passout pass:1234 -des3 -out server.key 4096
+openssl req -passin pass:1234 -new -key server.key -out server.csr -subj  "/C=SP/ST=Spain/L=Valdepenias/O=Test/OU=Server/CN=localhost"
+openssl x509 -req -passin pass:1234 -days 365 -in server.csr -CA ca.crt -CAkey ca.key -set_serial 01 -out server.crt
+
+# Remove passphrase from the Server Key
+openssl rsa -passin pass:1234 -in server.key -out server.key
+
+# Generate valid Client Key/Cert
+openssl genrsa -passout pass:1234 -des3 -out client.key 4096
+openssl req -passin pass:1234 -new -key client.key -out client.csr -subj  "/C=SP/ST=Spain/L=Valdepenias/O=Test/OU=Client/CN=localhost"
+openssl x509 -passin pass:1234 -req -days 365 -in client.csr -CA ca.crt -CAkey ca.key -set_serial 01 -out client.crt
+
+# Remove passphrase from Client Key
+openssl rsa -passin pass:1234 -in client.key -out client.key
+
+# Create mutated client key (Make first char of each like capital)
+cp client.key client2.key && sed -i "s/\b\(.\)/\u\1/g" client2.key
+cp client.crt client2.crt && sed -i "s/\b\(.\)/\u\1/g" client2.crt
+
+# Test all 3 SSL/TLS cases, server authentication, mutual authentication and when both flags are specified
+for CASE in server mutual both; do
+    if [ "$CASE" == "server" ]; then
+        SERVER_ARGS="$SERVER_BASE_ARGS --grpc-use-ssl=1"
+    elif [ "$CASE" == "mutual" ]; then
+        SERVER_ARGS="$SERVER_BASE_ARGS --grpc-use-ssl-mutual=1"
+    else
+        SERVER_ARGS="$SERVER_BASE_ARGS --grpc-use-ssl=1 --grpc-use-ssl-mutual=1"
+    fi
+
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    set +e
+
+    # Test basic inference using grpc secure channel
+    $TEST_CLIENT_PY -v --ssl --root-certificates ca.crt --private-key client.key --certificate-chain client.crt >> ${CLIENT_LOG}.${CASE}.ssl_infer 2>&1
+    if [ $? -ne 0 ]; then
+        cat ${CLIENT_LOG}.${CASE}.ssl_infer
+        RET=1
+    fi
+    $TEST_CLIENT_AIO_PY -v --ssl --root-certificates ca.crt --private-key client.key --certificate-chain client.crt >> ${CLIENT_LOG}.${CASE}.ssl_infer.aio 2>&1
+    if [ $? -ne 0 ]; then
+        cat ${CLIENT_LOG}.${CASE}.ssl_infer.aio
+        RET=1
+    fi
+
+    $TEST_CLIENT -v --ssl --root-certificates ca.crt --private-key client.key --certificate-chain client.crt >> ${CLIENT_LOG}.${CASE}.c++.ssl_infer 2>&1
+    if [ $? -ne 0 ]; then
+        cat ${CLIENT_LOG}.${CASE}.c++.ssl_infer
+        RET=1
+    fi
+
+    set -e
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+done
+
+# Test failure cases for SSL
+for CASE in server mutual; do
+    if [ "$CASE" == "server" ]; then
+        SERVER_ARGS="$SERVER_BASE_ARGS --grpc-use-ssl=1"
+    elif [ "$CASE" == "mutual" ]; then
+        SERVER_ARGS="$SERVER_BASE_ARGS --grpc-use-ssl-mutual=1"
+    fi
+
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    set +e
+
+    # Test inference client using grpc secure channel without ssl
+    $TEST_CLIENT_PY -v >> ${CLIENT_LOG}.${CASE}.no_ssl_fail_infer 2>&1
+    if [ $? -ne 0 ]; then
+        cat ${CLIENT_LOG}.${CASE}.no_ssl_fail_infer
+        echo -e "\n***\n*** Expected test failure\n***"
+    else
+        RET=1
+    fi
+    $TEST_CLIENT_AIO_PY -v >> ${CLIENT_LOG}.${CASE}.no_ssl_fail_infer.aio 2>&1
+    if [ $? -ne 0 ]; then
+        cat ${CLIENT_LOG}.${CASE}.no_ssl_fail_infer.aio
+        echo -e "\n***\n*** Expected test failure\n***"
+    else
+        RET=1
+    fi
+
+    $TEST_CLIENT -v >> ${CLIENT_LOG}.${CASE}.c++.no_ssl_fail_infer 2>&1
+    if [ $? -ne 0 ]; then
+        cat ${CLIENT_LOG}.${CASE}.c++.no_ssl_fail_infer
+        echo -e "\n***\n*** Expected test failure\n***"
+    else
+        RET=1
+    fi
+
+    # Test inference client using grpc secure channel with incorrect ssl creds
+    $TEST_CLIENT_PY -v --ssl --root-certificates ca.crt --private-key client2.key --certificate-chain client2.crt >> ${CLIENT_LOG}.${CASE}.wrong_ssl_fail_infer 2>&1
+    if [ $? -ne 0 ]; then
+        cat ${CLIENT_LOG}.${CASE}.wrong_ssl_fail_infer
+        echo -e "\n***\n*** Expected test failure\n***"
+    else
+        RET=1
+    fi
+    $TEST_CLIENT_AIO_PY -v --ssl --root-certificates ca.crt --private-key client2.key --certificate-chain client2.crt >> ${CLIENT_LOG}.${CASE}.wrong_ssl_fail_infer.aio 2>&1
+    if [ $? -ne 0 ]; then
+        cat ${CLIENT_LOG}.${CASE}.wrong_ssl_fail_infer.aio
+        echo -e "\n***\n*** Expected test failure\n***"
+    else
+        RET=1
+    fi
+
+    $TEST_CLIENT -v --ssl --root-certificates ca.crt --private-key client2.key --certificate-chain client2.crt >> ${CLIENT_LOG}.${CASE}.c++.wrong_ssl_fail_infer 2>&1
+    if [ $? -ne 0 ]; then
+        cat ${CLIENT_LOG}.${CASE}.c++.wrong_ssl_fail_infer
+        echo -e "\n***\n*** Expected test failure\n***"
+    else
+        RET=1
+    fi
+
+    set -e
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+done
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_sequence_batcher/request_timeout_models/custom_sequence_int32_timeout/config.pbtxt b/qa/L0_sequence_batcher/request_timeout_models/custom_sequence_int32_timeout/config.pbtxt
new file mode 100644
index 0000000000..d9be228d5d
--- /dev/null
+++ b/qa/L0_sequence_batcher/request_timeout_models/custom_sequence_int32_timeout/config.pbtxt
@@ -0,0 +1,62 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+backend: "identity"
+max_batch_size: 1
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+
+instance_group [
+  {
+    count: 1
+    kind : KIND_CPU
+  }
+]
+
+sequence_batching {
+  max_sequence_idle_microseconds: 50000000
+}
+
+parameters [
+  {
+    key: "execute_delay_ms"
+    value: { string_value: "5000" }
+  }
+]
diff --git a/qa/L0_sequence_batcher/sequence_batcher_test.py b/qa/L0_sequence_batcher/sequence_batcher_test.py
new file mode 100755
index 0000000000..3e6cfc032a
--- /dev/null
+++ b/qa/L0_sequence_batcher/sequence_batcher_test.py
@@ -0,0 +1,3618 @@
+#!/usr/bin/env python
+
+# Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import os
+import random
+import threading
+import time
+import unittest
+from builtins import str
+from functools import partial
+
+import numpy as np
+import sequence_util as su
+import test_util as tu
+import tritonclient.grpc as grpcclient
+from tritonclient.utils import InferenceServerException
+
+TEST_SYSTEM_SHARED_MEMORY = bool(int(os.environ.get("TEST_SYSTEM_SHARED_MEMORY", 0)))
+TEST_CUDA_SHARED_MEMORY = bool(int(os.environ.get("TEST_CUDA_SHARED_MEMORY", 0)))
+
+USE_GRPC = os.environ.get("USE_GRPC", 1) != "0"
+USE_HTTP = os.environ.get("USE_HTTP", 1) != "0"
+assert USE_GRPC or USE_HTTP, "USE_GRPC or USE_HTTP must be non-zero"
+if USE_GRPC and USE_HTTP:
+    _protocols = ("http", "grpc")
+elif USE_GRPC:
+    _protocols = ("grpc",)
+else:
+    _protocols = ("http",)
+
+BACKENDS = os.environ.get("BACKENDS", "graphdef savedmodel onnx plan custom python")
+ENSEMBLES = bool(int(os.environ.get("ENSEMBLES", 1)))
+
+NO_BATCHING = int(os.environ["NO_BATCHING"]) == 1
+MODEL_INSTANCES = int(os.environ["MODEL_INSTANCES"])
+IMPLICIT_STATE = int(os.environ["IMPLICIT_STATE"]) == 1
+
+# Use initial state for implicit state
+INITIAL_STATE_FILE = int(os.environ["INITIAL_STATE_FILE"]) == 1
+
+_trials = ()
+if NO_BATCHING:
+    for backend in BACKENDS.split(" "):
+        if backend != "custom":
+            _trials += (backend + "_nobatch",)
+elif os.environ["BATCHER_TYPE"] == "VARIABLE":
+    for backend in BACKENDS.split(" "):
+        if (backend != "libtorch") and (backend != "custom"):
+            _trials += (backend,)
+else:
+    _trials = BACKENDS.split(" ")
+
+# Add ensemble to the _trials
+ENSEMBLE_PREFIXES = ["simple_", "sequence_", "fan_"]
+
+if ENSEMBLES:
+    res = []
+    for trial in _trials:
+        res.append(trial)
+        if "custom" in trial:
+            continue
+        for ensemble_prefix in ENSEMBLE_PREFIXES:
+            res.append(ensemble_prefix + trial)
+    _trials = tuple(res)
+
+_ragged_batch_supported_trials = list()
+if "custom" in _trials:
+    _ragged_batch_supported_trials = ("custom",)
+
+# Not all models can be tested for ragged handling because the models
+# don't deal well with non-size-1 shapes
+_ragged_batch_not_supported_trials = list()
+if os.environ["BATCHER_TYPE"] == "VARIABLE":
+    if "custom" in _trials:
+        _ragged_batch_not_supported_trials.append("custom")
+    if "plan" in _trials:
+        _ragged_batch_not_supported_trials.append("plan")
+    if "onnx" in _trials:
+        _ragged_batch_not_supported_trials.append("onnx")
+
+_max_sequence_idle_ms = 5000
+
+
+# Checks whether the provided model name belongs to an ensemble
+# model.
+def is_ensemble(model_name):
+    for prefix in ENSEMBLE_PREFIXES:
+        if model_name.startswith(prefix):
+            return True
+    return False
+
+
+class SequenceBatcherTest(su.SequenceBatcherTestUtil):
+    def get_datatype(self, trial):
+        # Get the datatype to use based on what models are available (see test.sh)
+        if "plan" in trial:
+            return (np.float32,)
+        if "custom" in trial:
+            return (np.int32,)
+        if "savedmodel" in trial:
+            return (np.float32, np.bool_)
+        if "graphdef" in trial:
+            return (np.dtype(object), np.bool_)
+
+        # Only test the string data type for ONNX and libtorch models in implicit state
+        if IMPLICIT_STATE:
+            if "onnx" in trial:
+                return (np.dtype(object), np.int32, np.bool_)
+            if NO_BATCHING:
+                if "libtorch" in trial:
+                    return (np.dtype(object), np.int32, np.bool_)
+
+        return (np.int32, np.bool_)
+
+    def get_expected_result(self, expected_result, value, trial, flag_str=None):
+        # Adjust the expected_result for models that
+        # could not implement the full accumulator. See
+        # qa/common/gen_qa_sequence_models.py for more
+        # information.
+        if (
+            (not NO_BATCHING and ("custom" not in trial))
+            or ("graphdef" in trial)
+            or ("plan" in trial)
+            or ("onnx" in trial)
+        ) or ("libtorch" in trial):
+            expected_result = value
+            if (flag_str is not None) and ("start" in flag_str):
+                expected_result += 1
+        return expected_result
+
+    def get_expected_result_implicit(
+        self, expected_result, value, trial, flag_str=None, dtype=None
+    ):
+        if dtype == np.dtype(object) and trial.startswith("onnx"):
+            return value
+
+        if INITIAL_STATE_FILE:
+            # When the INITIAL_STATE_FILE is set the initial value
+            # used for sequence will be 100 instead of zero and the
+            # results will be offset by the same amount.
+            return expected_result + 100
+        else:
+            return expected_result
+
+    def test_simple_sequence(self):
+        # Send one sequence and check for correct accumulator
+        # result. The result should be returned immediately.
+        for trial in _trials:
+            # Run on different protocols.
+            for idx, protocol in enumerate(_protocols):
+                dtypes = self.get_datatype(trial)
+
+                for dtype in dtypes:
+                    model_name = tu.get_sequence_model_name(trial, dtype)
+                    # Skip bool type ensemble models
+                    if (any(word in trial for word in ENSEMBLE_PREFIXES)) and (
+                        dtype == np.bool_
+                    ):
+                        continue
+                    # For bool type control models, use int32 as I/O types
+                    if dtype == np.bool_:
+                        dtype = np.int32
+
+                    self.clear_deferred_exceptions()
+                    try:
+                        self.check_setup(model_name)
+                        self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
+                        self.assertNotIn(
+                            "TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ
+                        )
+                        expected_result = (
+                            self.get_expected_result(45, 9, trial, "end")
+                            if not IMPLICIT_STATE
+                            else self.get_expected_result_implicit(
+                                45, 9, trial, "end", dtype
+                            )
+                        )
+
+                        self.check_sequence(
+                            trial,
+                            model_name,
+                            dtype,
+                            5,
+                            (4000, None),
+                            # (flag_str, value, (ls_ms, gt_ms), (pre_delay, post_delay))
+                            (
+                                ("start", 1, None, None),
+                                (None, 2, None, None),
+                                (None, 3, None, None),
+                                (None, 4, None, None),
+                                (None, 5, None, None),
+                                (None, 6, None, None),
+                                (None, 7, None, None),
+                                (None, 8, None, None),
+                                ("end", 9, None, None),
+                            ),
+                            expected_result,
+                            protocol,
+                            sequence_name="{}_{}".format(
+                                self._testMethodName, protocol
+                            ),
+                        )
+
+                        self.check_deferred_exception()
+                        self.check_status(
+                            model_name, {1: 9 * (idx + 1)}, 9 * (idx + 1), 9 * (idx + 1)
+                        )
+                    except Exception as ex:
+                        self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_length1_sequence(self):
+        # Send a length-1 sequence and check for correct accumulator
+        # result. The result should be returned immediately.
+        for trial in _trials:
+            # Run on different protocols.
+            for idx, protocol in enumerate(_protocols):
+                dtypes = self.get_datatype(trial)
+
+                for dtype in dtypes:
+                    model_name = tu.get_sequence_model_name(trial, dtype)
+                    # Skip bool type ensemble models
+                    if (any(word in trial for word in ENSEMBLE_PREFIXES)) and (
+                        dtype == np.bool_
+                    ):
+                        continue
+                    # For bool type control models, use int32 as I/O types
+                    if dtype == np.bool_:
+                        dtype = np.int32
+
+                    self.clear_deferred_exceptions()
+                    try:
+                        self.check_setup(model_name)
+                        self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
+                        self.assertNotIn(
+                            "TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ
+                        )
+                        expected_result = (
+                            self.get_expected_result(42, 42, trial, "start,end")
+                            if not IMPLICIT_STATE
+                            else self.get_expected_result_implicit(
+                                42, 42, trial, "start,end", dtype
+                            )
+                        )
+
+                        self.check_sequence(
+                            trial,
+                            model_name,
+                            dtype,
+                            99,
+                            (4000, None),
+                            # (flag_str, value, (ls_ms, gt_ms), (pre_delay, post_delay))
+                            (("start,end", 42, None, None),),
+                            expected_result,
+                            protocol,
+                            sequence_name="{}_{}".format(
+                                self._testMethodName, protocol
+                            ),
+                        )
+
+                        self.check_deferred_exception()
+                        self.check_status(
+                            model_name, {1: idx + 1}, (idx + 1), (idx + 1)
+                        )
+                    except Exception as ex:
+                        self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_batch_size(self):
+        # Send sequence with a batch-size > 1 and check for error.
+
+        # When 4 model instances the max-batch-size is 1 so can't test
+        # since that gives a different error: "batch-size 2 exceeds
+        # maximum batch size"
+        if (MODEL_INSTANCES == 4) or NO_BATCHING:
+            return
+
+        for trial in _trials:
+            # Run on different protocols.
+            for idx, protocol in enumerate(_protocols):
+                dtypes = self.get_datatype(trial)
+
+                for dtype in dtypes:
+                    model_name = tu.get_sequence_model_name(trial, dtype)
+                    # Skip bool type ensemble models
+                    if (any(word in trial for word in ENSEMBLE_PREFIXES)) and (
+                        dtype == np.bool_
+                    ):
+                        continue
+                    # For bool type control models, use int32 as I/O types
+                    if dtype == np.bool_:
+                        dtype = np.int32
+
+                    self.clear_deferred_exceptions()
+                    try:
+                        self.check_setup(model_name)
+                        self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
+                        self.assertNotIn(
+                            "TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ
+                        )
+                        expected_result = (
+                            self.get_expected_result(10, 9, trial, "end")
+                            if not IMPLICIT_STATE
+                            else self.get_expected_result_implicit(
+                                10, 9, trial, "end", dtype
+                            )
+                        )
+
+                        self.check_sequence(
+                            trial,
+                            model_name,
+                            dtype,
+                            27,
+                            (4000, None),
+                            # (flag_str, value, (ls_ms, gt_ms), (pre_delay, post_delay))
+                            (("start", 1, None, None), ("end", 9, None, None)),
+                            expected_result,
+                            protocol,
+                            batch_size=2,
+                            sequence_name="{}_{}".format(
+                                self._testMethodName, protocol
+                            ),
+                        )
+
+                        self.check_deferred_exception()
+                        self.assertTrue(False, "expected error")
+                    except Exception as ex:
+                        for prefix in ENSEMBLE_PREFIXES:
+                            if model_name.startswith(prefix):
+                                base_model_name = model_name[(len(prefix)) :]
+                                self.assertTrue(
+                                    ex.message().startswith(
+                                        str(
+                                            "in ensemble '{}', "
+                                            + "inference request to model '{}' must specify "
+                                            + "batch-size 1 due to requirements of sequence "
+                                            + "batcher"
+                                        ).format(model_name, base_model_name)
+                                    )
+                                )
+                                return
+                        self.assertTrue(
+                            ex.message().startswith(
+                                str(
+                                    "inference request to model '{}' must specify "
+                                    + "batch-size 1 due to requirements of sequence "
+                                    + "batcher"
+                                ).format(model_name)
+                            )
+                        )
+
+    def test_no_correlation_id(self):
+        # Send sequence without correlation ID and check for error.
+        for trial in _trials:
+            # Run on different protocols.
+            for idx, protocol in enumerate(_protocols):
+                dtypes = self.get_datatype(trial)
+                for dtype in dtypes:
+                    model_name = tu.get_sequence_model_name(trial, dtype)
+                    # Skip bool type ensemble models
+                    if (any(word in trial for word in ENSEMBLE_PREFIXES)) and (
+                        dtype == np.bool_
+                    ):
+                        continue
+                    # For bool type control models, use int32 as I/O types
+                    if dtype == np.bool_:
+                        dtype = np.int32
+
+                    self.clear_deferred_exceptions()
+                    try:
+                        self.check_setup(model_name)
+                        self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
+                        self.assertNotIn(
+                            "TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ
+                        )
+                        expected_result = (
+                            self.get_expected_result(10, 9, trial, "end")
+                            if not IMPLICIT_STATE
+                            else self.get_expected_result_implicit(
+                                10, 9, trial, "end", dtype
+                            )
+                        )
+
+                        self.check_sequence(
+                            trial,
+                            model_name,
+                            dtype,
+                            0,  # correlation_id = 0
+                            (4000, None),
+                            # (flag_str, value, (ls_ms, gt_ms), (pre_delay, post_delay))
+                            (("start", 1, None, None), ("end", 9, None, None)),
+                            expected_result,
+                            protocol,
+                            sequence_name="{}_{}".format(
+                                self._testMethodName, protocol
+                            ),
+                        )
+
+                        self.check_deferred_exception()
+                        self.assertTrue(False, "expected error")
+                    except Exception as ex:
+                        for prefix in ENSEMBLE_PREFIXES:
+                            if model_name.startswith(prefix):
+                                base_model_name = model_name[(len(prefix)) :]
+                                self.assertTrue(
+                                    ex.message().startswith(
+                                        str(
+                                            "in ensemble '{}', "
+                                            + "inference request to model '{}' must specify a "
+                                            + "non-zero or non-empty correlation ID"
+                                        ).format(model_name, base_model_name)
+                                    )
+                                )
+                                return
+                        self.assertTrue(
+                            ex.message().startswith(
+                                str(
+                                    "inference request to model '{}' must specify a "
+                                    + "non-zero or non-empty correlation ID"
+                                ).format(model_name)
+                            )
+                        )
+
+    def test_no_sequence_start(self):
+        # Send sequence without start flag for never before seen
+        # correlation ID. Expect failure.
+        for trial in _trials:
+            # Run on different protocols.
+            for idx, protocol in enumerate(_protocols):
+                dtypes = self.get_datatype(trial)
+                for dtype in dtypes:
+                    model_name = tu.get_sequence_model_name(trial, dtype)
+                    # Skip bool type ensemble models
+                    if (any(word in trial for word in ENSEMBLE_PREFIXES)) and (
+                        dtype == np.bool_
+                    ):
+                        continue
+                    # For bool type control models, use int32 as I/O types
+                    if dtype == np.bool_:
+                        dtype = np.int32
+
+                    self.clear_deferred_exceptions()
+                    try:
+                        self.check_setup(model_name)
+                        self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
+                        self.assertNotIn(
+                            "TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ
+                        )
+
+                        expected_result = (
+                            self.get_expected_result(6, 3, trial, "end")
+                            if not IMPLICIT_STATE
+                            else self.get_expected_result_implicit(
+                                6, 3, trial, "end", dtype
+                            )
+                        )
+                        self.check_sequence(
+                            trial,
+                            model_name,
+                            dtype,
+                            37469245,
+                            (4000, None),
+                            # (flag_str, value, (ls_ms, gt_ms), (pre_delay, post_delay))
+                            (
+                                (None, 1, None, None),
+                                (None, 2, None, None),
+                                ("end", 3, None, None),
+                            ),
+                            expected_result,
+                            protocol,
+                            sequence_name="{}_{}".format(
+                                self._testMethodName, protocol
+                            ),
+                        )
+
+                        self.check_deferred_exception()
+                        self.assertTrue(False, "expected error")
+                    except Exception as ex:
+                        print(model_name + "-> " + ex.message())
+                        for prefix in ENSEMBLE_PREFIXES:
+                            if model_name.startswith(prefix):
+                                base_model_name = model_name[(len(prefix)) :]
+                                self.assertTrue(
+                                    ex.message().startswith(
+                                        str(
+                                            "in ensemble '{}', "
+                                            + "inference request for sequence 37469245 to "
+                                            + "model '{}' must specify the START flag on the first "
+                                            + "request of the sequence"
+                                        ).format(model_name, base_model_name)
+                                    )
+                                )
+                                return
+                        self.assertTrue(
+                            ex.message().startswith(
+                                str(
+                                    "inference request for sequence 37469245 to "
+                                    + "model '{}' must specify the START flag on the first "
+                                    + "request of the sequence"
+                                ).format(model_name)
+                            )
+                        )
+
+    def test_no_sequence_start2(self):
+        # Send sequence without start flag after sending a valid
+        # sequence with the same correlation ID. Expect failure for
+        # the second sequence.
+        for trial in _trials:
+            # Run on different protocols.
+            for idx, protocol in enumerate(_protocols):
+                dtypes = self.get_datatype(trial)
+                for dtype in dtypes:
+                    model_name = tu.get_sequence_model_name(trial, dtype)
+                    # Skip bool type ensemble models
+                    if (any(word in trial for word in ENSEMBLE_PREFIXES)) and (
+                        dtype == np.bool_
+                    ):
+                        continue
+                    # For bool type control models, use int32 as I/O types
+                    if dtype == np.bool_:
+                        dtype = np.int32
+
+                    self.clear_deferred_exceptions()
+                    try:
+                        self.check_setup(model_name)
+                        self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
+                        self.assertNotIn(
+                            "TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ
+                        )
+                        expected_result = (
+                            self.get_expected_result(6, 3, trial, None)
+                            if not IMPLICIT_STATE
+                            else self.get_expected_result_implicit(
+                                6, 3, trial, None, dtype
+                            )
+                        )
+
+                        self.check_sequence(
+                            trial,
+                            model_name,
+                            dtype,
+                            3,
+                            (4000, None),
+                            # (flag_str, value, (ls_ms, gt_ms), (pre_delay, post_delay))
+                            (
+                                ("start", 1, None, None),
+                                (None, 2, None, None),
+                                ("end", 3, None, None),
+                                (None, 55, None, None),
+                            ),
+                            expected_result,
+                            protocol,
+                            sequence_name="{}_{}".format(
+                                self._testMethodName, protocol
+                            ),
+                        )
+
+                        self.check_status(
+                            model_name, {1: 3 * (idx + 1)}, 3 * (idx + 1), 3 * (idx + 1)
+                        )
+                        self.check_deferred_exception()
+                        self.assertTrue(False, "expected error")
+                    except Exception as ex:
+                        for prefix in ENSEMBLE_PREFIXES:
+                            if model_name.startswith(prefix):
+                                base_model_name = model_name[(len(prefix)) :]
+                                self.assertTrue(
+                                    ex.message().startswith(
+                                        str(
+                                            "in ensemble '{}', "
+                                            + "inference request for sequence 3 to model '{}' must "
+                                            + "specify the START flag on the first request of "
+                                            + "the sequence"
+                                        ).format(model_name, base_model_name)
+                                    )
+                                )
+                                return
+                        self.assertTrue(
+                            ex.message().startswith(
+                                str(
+                                    "inference request for sequence 3 to model '{}' must "
+                                    + "specify the START flag on the first request of "
+                                    + "the sequence"
+                                ).format(model_name)
+                            )
+                        )
+
+    def test_no_sequence_end(self):
+        # Send sequence without end flag. Use same correlation ID to
+        # send another sequence. The first sequence will be ended
+        # automatically but the second should complete successfully.
+        for trial in _trials:
+            # Run on different protocols.
+            for idx, protocol in enumerate(_protocols):
+                dtypes = self.get_datatype(trial)
+                for dtype in dtypes:
+                    model_name = tu.get_sequence_model_name(trial, dtype)
+                    # Skip bool type ensemble models
+                    if (any(word in trial for word in ENSEMBLE_PREFIXES)) and (
+                        dtype == np.bool_
+                    ):
+                        continue
+                    # For bool type control models, use int32 as I/O types
+                    if dtype == np.bool_:
+                        dtype = np.int32
+
+                    self.clear_deferred_exceptions()
+                    try:
+                        self.check_setup(model_name)
+                        self.assertNotIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
+                        self.assertNotIn(
+                            "TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ
+                        )
+                        expected_result = (
+                            self.get_expected_result(51, 9, trial, "end")
+                            if not IMPLICIT_STATE
+                            else self.get_expected_result_implicit(
+                                51, 9, trial, "end", dtype
+                            )
+                        )
+
+                        self.check_sequence(
+                            trial,
+                            model_name,
+                            dtype,
+                            4566,
+                            (4000, None),
+                            # (flag_str, value, (ls_ms, gt_ms), (pre_delay, post_delay))
+                            (
+                                ("start", 1, None, None),
+                                (None, 2, None, None),
+                                ("start", 42, None, None),
+                                ("end", 9, None, None),
+                            ),
+                            expected_result,
+                            protocol,
+                            sequence_name="{}_{}".format(
+                                self._testMethodName, protocol
+                            ),
+                        )
+
+                        self.check_deferred_exception()
+                        self.check_status(
+                            model_name, {1: 4 * (idx + 1)}, 4 * (idx + 1), 4 * (idx + 1)
+                        )
+                    except Exception as ex:
+                        self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_half_batch(self):
+        # Test model instances that together are configured with
+        # total-batch-size 4. Send two equal-length sequences in
+        # parallel and make sure they get completely batched into
+        # batch-size 2 inferences.
+        for trial in _trials:
+            dtypes = self.get_datatype(trial)
+            for dtype in dtypes:
+                model_name = tu.get_sequence_model_name(trial, dtype)
+                # Skip bool type ensemble models
+                if (any(word in trial for word in ENSEMBLE_PREFIXES)) and (
+                    dtype == np.bool_
+                ):
+                    continue
+                # For bool type control models, use int32 as I/O types
+                if dtype == np.bool_:
+                    dtype = np.int32
+
+                self.clear_deferred_exceptions()
+
+                precreated_shm0_handles = self.precreate_register_regions(
+                    (1, 2, 3, 4), dtype, 0
+                )
+                precreated_shm1_handles = self.precreate_register_regions(
+                    (0, 9, 5, 13), dtype, 1
+                )
+
+                try:
+                    self.check_setup(model_name)
+
+                    # Need scheduler to wait for queue to contain all
+                    # inferences for both sequences.
+                    self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
+                    self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 8)
+                    self.assertIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
+                    self.assertEqual(
+                        int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0
+                    )
+
+                    expected_result = (
+                        self.get_expected_result(10, 4, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            10, 4, trial, "end", dtype
+                        )
+                    )
+
+                    threads = []
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                987,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (
+                                    ("start", 1, None),
+                                    (None, 2, None),
+                                    (None, 3, None),
+                                    ("end", 4, None),
+                                ),
+                                expected_result,
+                                precreated_shm0_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+                    expected_result = (
+                        self.get_expected_result(27, 13, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            27, 13, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                988,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (
+                                    ("start", 0, None),
+                                    (None, 9, None),
+                                    (None, 5, None),
+                                    ("end", 13, None),
+                                ),
+                                expected_result,
+                                precreated_shm1_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+
+                    for t in threads:
+                        t.start()
+                    for t in threads:
+                        t.join()
+                    self.check_deferred_exception()
+                    if is_ensemble(model_name):
+                        # Requests do not get batched for the ensemble model
+                        self.check_status(model_name, {1: 8}, 8, 8)
+                    else:
+                        stats_batch_size = 2 if MODEL_INSTANCES == 1 else 1
+                        exec_cnt = 4 if MODEL_INSTANCES == 1 else 8
+                        self.check_status(
+                            model_name,
+                            {stats_batch_size: 4 * min(2, MODEL_INSTANCES)},
+                            exec_cnt,
+                            8,
+                        )
+                except Exception as ex:
+                    self.assertTrue(False, "unexpected error {}".format(ex))
+                finally:
+                    if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
+                        self.cleanup_shm_regions(precreated_shm0_handles)
+                        self.cleanup_shm_regions(precreated_shm1_handles)
+
+    def test_skip_batch(self):
+        # Test model instances together are configured with
+        # total-batch-size 4. Send four sequences in parallel where
+        # two sequences have shorter length so that padding must be
+        # applied correctly for the longer sequences.
+        for trial in _trials:
+            dtypes = self.get_datatype(trial)
+            for dtype in dtypes:
+                model_name = tu.get_sequence_model_name(trial, dtype)
+                # Skip bool type ensemble models
+                if (any(word in trial for word in ENSEMBLE_PREFIXES)) and (
+                    dtype == np.bool_
+                ):
+                    continue
+                # For bool type control models, use int32 as I/O types
+                if dtype == np.bool_:
+                    dtype = np.int32
+
+                self.clear_deferred_exceptions()
+
+                precreated_shm0_handles = self.precreate_register_regions(
+                    (1, 3), dtype, 0
+                )
+                precreated_shm1_handles = self.precreate_register_regions(
+                    (11, 12, 13, 14), dtype, 1
+                )
+                precreated_shm2_handles = self.precreate_register_regions(
+                    (111, 113), dtype, 2
+                )
+                precreated_shm3_handles = self.precreate_register_regions(
+                    (1111, 1112, 1113, 1114), dtype, 3
+                )
+
+                try:
+                    self.check_setup(model_name)
+
+                    # Need scheduler to wait for queue to contain all
+                    # inferences for both sequences.
+                    self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
+                    self.assertEqual(
+                        int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 12
+                    )
+                    self.assertIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
+                    self.assertEqual(
+                        int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0
+                    )
+
+                    threads = []
+                    expected_result = (
+                        self.get_expected_result(4, 3, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            4, 3, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1001,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (("start", 1, None), ("end", 3, None)),
+                                expected_result,
+                                precreated_shm0_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+                    expected_result = (
+                        self.get_expected_result(50, 14, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            50, 14, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1002,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (
+                                    ("start", 11, None),
+                                    (None, 12, None),
+                                    (None, 13, None),
+                                    ("end", 14, None),
+                                ),
+                                expected_result,
+                                precreated_shm1_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+                    expected_result = (
+                        self.get_expected_result(224, 113, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            224, 113, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1003,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (("start", 111, None), ("end", 113, None)),
+                                expected_result,
+                                precreated_shm2_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+                    expected_result = (
+                        self.get_expected_result(4450, 1114, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            4450, 1114, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1004,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (
+                                    ("start", 1111, None),
+                                    (None, 1112, None),
+                                    (None, 1113, None),
+                                    ("end", 1114, None),
+                                ),
+                                expected_result,
+                                precreated_shm3_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+
+                    threads[1].start()
+                    threads[3].start()
+                    time.sleep(3)
+                    threads[0].start()
+                    threads[2].start()
+                    for t in threads:
+                        t.join()
+                    self.check_deferred_exception()
+                    if is_ensemble(model_name):
+                        # Requests do not get batched for the ensemble model
+                        self.check_status(model_name, {1: 12}, 12, 12)
+                    else:
+                        # Batch size is 4 for the first two inferences and
+                        # then 2 for the second two inferences. This is
+                        # because we request the longer sequences first
+                        # (threads 1 and 3) in slots 0 and 1 and so after
+                        # shorter sequences are complete there are only slots
+                        # 0 and 1 to execute.
+                        if MODEL_INSTANCES == 1:
+                            self.check_status(model_name, {2: 2, 4: 2}, 4, 12)
+                        elif MODEL_INSTANCES == 2:
+                            self.check_status(model_name, {2: 4, 1: 4}, 8, 12)
+                        elif MODEL_INSTANCES == 4:
+                            self.check_status(model_name, {1: 12}, 12, 12)
+                except Exception as ex:
+                    self.assertTrue(False, "unexpected error {}".format(ex))
+                finally:
+                    if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
+                        self.cleanup_shm_regions(precreated_shm0_handles)
+                        self.cleanup_shm_regions(precreated_shm1_handles)
+                        self.cleanup_shm_regions(precreated_shm2_handles)
+                        self.cleanup_shm_regions(precreated_shm3_handles)
+
+    def test_full_batch(self):
+        # Test model instances together are configured with
+        # total-batch-size 4. Send four equal-length sequences in
+        # parallel and make sure they get completely batched into
+        # batch-size 4 inferences.
+        for trial in _trials:
+            dtypes = self.get_datatype(trial)
+            for dtype in dtypes:
+                model_name = tu.get_sequence_model_name(trial, dtype)
+                # Skip bool type ensemble models
+                if (any(word in trial for word in ENSEMBLE_PREFIXES)) and (
+                    dtype == np.bool_
+                ):
+                    continue
+                # For bool type control models, use int32 as I/O types
+                if dtype == np.bool_:
+                    dtype = np.int32
+
+                self.clear_deferred_exceptions()
+
+                precreated_shm0_handles = self.precreate_register_regions(
+                    (1, 2, 3), dtype, 0
+                )
+                precreated_shm1_handles = self.precreate_register_regions(
+                    (11, 12, 13), dtype, 1
+                )
+                precreated_shm2_handles = self.precreate_register_regions(
+                    (111, 112, 113), dtype, 2
+                )
+                precreated_shm3_handles = self.precreate_register_regions(
+                    (1111, 1112, 1113), dtype, 3
+                )
+
+                try:
+                    self.check_setup(model_name)
+
+                    # Need scheduler to wait for queue to contain all
+                    # inferences for both sequences.
+                    self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
+                    self.assertEqual(
+                        int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 12
+                    )
+                    self.assertIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
+                    self.assertEqual(
+                        int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0
+                    )
+
+                    expected_result = (
+                        self.get_expected_result(6, 3, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            6, 3, trial, "end", dtype
+                        )
+                    )
+                    threads = []
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1001,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (("start", 1, None), (None, 2, None), ("end", 3, None)),
+                                expected_result,
+                                precreated_shm0_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+
+                    expected_result = (
+                        self.get_expected_result(36, 13, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            36, 13, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1002,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (
+                                    ("start", 11, None),
+                                    (None, 12, None),
+                                    ("end", 13, None),
+                                ),
+                                expected_result,
+                                precreated_shm1_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+
+                    expected_result = (
+                        self.get_expected_result(336, 113, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            336, 113, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1003,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (
+                                    ("start", 111, None),
+                                    (None, 112, None),
+                                    ("end", 113, None),
+                                ),
+                                expected_result,
+                                precreated_shm2_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+                    expected_result = (
+                        self.get_expected_result(3336, 1113, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            3336, 1113, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1004,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (
+                                    ("start", 1111, None),
+                                    (None, 1112, None),
+                                    ("end", 1113, None),
+                                ),
+                                expected_result,
+                                precreated_shm3_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+
+                    for t in threads:
+                        t.start()
+                    for t in threads:
+                        t.join()
+                    self.check_deferred_exception()
+                    if is_ensemble(model_name):
+                        # Requests do not get batched for the ensemble model
+                        self.check_status(model_name, {1: 12}, 12, 12)
+                    else:
+                        self.check_status(
+                            model_name,
+                            {(4 / MODEL_INSTANCES): (3 * MODEL_INSTANCES)},
+                            3 * MODEL_INSTANCES,
+                            12,
+                        )
+                except Exception as ex:
+                    self.assertTrue(False, "unexpected error {}".format(ex))
+                finally:
+                    if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
+                        self.cleanup_shm_regions(precreated_shm0_handles)
+                        self.cleanup_shm_regions(precreated_shm1_handles)
+                        self.cleanup_shm_regions(precreated_shm2_handles)
+                        self.cleanup_shm_regions(precreated_shm3_handles)
+
+    def test_ragged_batch(self):
+        # Test model instances that together are configured with
+        # total-batch-size 4. The sequences use the different size
+        # inputs and the inputs are *not* marked as allowing ragged
+        # batch. Send four equal-length sequences in parallel and
+        # make sure they don't get batched.
+
+        # Only works with 1 model instance since want to test all
+        # sequences batching together.
+        if MODEL_INSTANCES != 1:
+            return
+
+        for trial in _ragged_batch_not_supported_trials:
+            dtypes = self.get_datatype(trial)
+            for dtype in dtypes:
+                model_name = tu.get_sequence_model_name(trial, dtype)
+                # Skip bool type ensemble models
+                if (any(word in trial for word in ENSEMBLE_PREFIXES)) and (
+                    dtype == np.bool_
+                ):
+                    continue
+                # For bool type control models, use int32 as I/O types
+                if dtype == np.bool_:
+                    dtype = np.int32
+
+                self.clear_deferred_exceptions()
+
+                precreated_shm0_handles = self.precreate_register_regions(
+                    (1, 2, 3), dtype, 0, tensor_shape=(2,)
+                )
+                precreated_shm1_handles = self.precreate_register_regions(
+                    (11, 12, 13), dtype, 1, tensor_shape=(2,)
+                )
+                precreated_shm2_handles = self.precreate_register_regions(
+                    (111, 112, 113), dtype, 2, tensor_shape=(1,)
+                )
+                precreated_shm3_handles = self.precreate_register_regions(
+                    (1111, 1112, 1113), dtype, 3, tensor_shape=(3,)
+                )
+
+                try:
+                    self.check_setup(model_name)
+
+                    # Need scheduler to wait for queue to contain all
+                    # inferences for both sequences.
+                    self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
+                    self.assertEqual(
+                        int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 12
+                    )
+                    self.assertIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
+                    self.assertEqual(
+                        int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0
+                    )
+
+                    threads = []
+                    expected_result = (
+                        self.get_expected_result(6 * 2, 3, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            6, 3, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1001,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (("start", 1, None), (None, 2, None), ("end", 3, None)),
+                                expected_result,
+                                precreated_shm0_handles,
+                            ),
+                            kwargs={
+                                "sequence_name": "{}".format(self._testMethodName),
+                                "tensor_shape": (2,),
+                            },
+                        )
+                    )
+
+                    expected_result = (
+                        self.get_expected_result(36 * 2, 13, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            36, 13, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1002,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (
+                                    ("start", 11, None),
+                                    (None, 12, None),
+                                    ("end", 13, None),
+                                ),
+                                expected_result,
+                                precreated_shm1_handles,
+                            ),
+                            kwargs={
+                                "sequence_name": "{}".format(self._testMethodName),
+                                "tensor_shape": (2,),
+                            },
+                        )
+                    )
+                    expected_result = (
+                        self.get_expected_result(336, 113, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            336, 113, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1003,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (
+                                    ("start", 111, None),
+                                    (None, 112, None),
+                                    ("end", 113, None),
+                                ),
+                                expected_result,
+                                precreated_shm2_handles,
+                            ),
+                            kwargs={
+                                "sequence_name": "{}".format(self._testMethodName),
+                                "tensor_shape": (1,),
+                            },
+                        )
+                    )
+                    expected_result = (
+                        self.get_expected_result(3336 * 3, 1113, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            3336, 1113, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1004,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (
+                                    ("start", 1111, None),
+                                    (None, 1112, None),
+                                    ("end", 1113, None),
+                                ),
+                                expected_result,
+                                precreated_shm3_handles,
+                            ),
+                            kwargs={
+                                "sequence_name": "{}".format(self._testMethodName),
+                                "tensor_shape": (3,),
+                            },
+                        )
+                    )
+
+                    threads[0].start()
+                    threads[1].start()
+                    threads[2].start()
+                    time.sleep(3)
+                    threads[3].start()
+                    for t in threads:
+                        t.join()
+                    self.check_deferred_exception()
+                    if is_ensemble(model_name):
+                        # Requests do not get batched for the ensemble model
+                        self.check_status(model_name, {1: 12}, 12, 12)
+                    else:
+                        self.check_status(model_name, {4: 9}, 9, 12)
+                except Exception as ex:
+                    self.assertTrue(False, "unexpected error {}".format(ex))
+                finally:
+                    if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
+                        self.cleanup_shm_regions(precreated_shm0_handles)
+                        self.cleanup_shm_regions(precreated_shm1_handles)
+                        self.cleanup_shm_regions(precreated_shm2_handles)
+                        self.cleanup_shm_regions(precreated_shm3_handles)
+
+    def test_ragged_batch_allowed(self):
+        # Test model instances that together are configured with
+        # total-batch-size 4. The sequences use the different size
+        # inputs.  Send four equal-length sequences in parallel and
+        # make sure they get batched appropriately even with size
+        # differences.
+
+        # Only works with 1 model instance since want to test all
+        # sequences batching together.
+        if MODEL_INSTANCES != 1:
+            return
+
+        for trial in _ragged_batch_supported_trials:
+            dtypes = self.get_datatype(trial)
+            for dtype in dtypes:
+                model_name = tu.get_sequence_model_name(trial, dtype)
+                # Skip bool type ensemble models
+                if (any(word in trial for word in ENSEMBLE_PREFIXES)) and (
+                    dtype == np.bool_
+                ):
+                    continue
+                # For bool type control models, use int32 as I/O types
+                if dtype == np.bool_:
+                    dtype = np.int32
+
+                self.clear_deferred_exceptions()
+
+                precreated_shm0_handles = self.precreate_register_regions(
+                    (1, 2, 3), dtype, 0, tensor_shape=(2,)
+                )
+                precreated_shm1_handles = self.precreate_register_regions(
+                    (11, 12, 13), dtype, 1, tensor_shape=(2,)
+                )
+                precreated_shm2_handles = self.precreate_register_regions(
+                    (111, 112, 113), dtype, 2, tensor_shape=(1,)
+                )
+                precreated_shm3_handles = self.precreate_register_regions(
+                    (1111, 1112, 1113), dtype, 3, tensor_shape=(3,)
+                )
+                try:
+                    self.check_setup(model_name)
+
+                    # Need scheduler to wait for queue to contain all
+                    # inferences for both sequences.
+                    self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
+                    self.assertEqual(
+                        int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 12
+                    )
+                    self.assertIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
+                    self.assertEqual(
+                        int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0
+                    )
+
+                    threads = []
+
+                    expected_result = (
+                        self.get_expected_result(6 * 2, 3, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            6 * 2, 3, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1001,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (("start", 1, None), (None, 2, None), ("end", 3, None)),
+                                expected_result,
+                                precreated_shm0_handles,
+                            ),
+                            kwargs={
+                                "sequence_name": "{}".format(self._testMethodName),
+                                "tensor_shape": (2,),
+                            },
+                        )
+                    )
+
+                    expected_result = (
+                        self.get_expected_result(36 * 2, 13, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            36 * 2, 13, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1002,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (
+                                    ("start", 11, None),
+                                    (None, 12, None),
+                                    ("end", 13, None),
+                                ),
+                                expected_result,
+                                precreated_shm1_handles,
+                            ),
+                            kwargs={
+                                "sequence_name": "{}".format(self._testMethodName),
+                                "tensor_shape": (2,),
+                            },
+                        )
+                    )
+                    expected_result = (
+                        self.get_expected_result(336, 113, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            336, 113, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1003,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (
+                                    ("start", 111, None),
+                                    (None, 112, None),
+                                    ("end", 113, None),
+                                ),
+                                expected_result,
+                                precreated_shm2_handles,
+                            ),
+                            kwargs={
+                                "sequence_name": "{}".format(self._testMethodName),
+                                "tensor_shape": (1,),
+                            },
+                        )
+                    )
+                    expected_result = (
+                        self.get_expected_result(3336 * 3, 1113, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            3336 * 3, 1113, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1004,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (
+                                    ("start", 1111, None),
+                                    (None, 1112, None),
+                                    ("end", 1113, None),
+                                ),
+                                expected_result,
+                                precreated_shm3_handles,
+                            ),
+                            kwargs={
+                                "sequence_name": "{}".format(self._testMethodName),
+                                "tensor_shape": (3,),
+                            },
+                        )
+                    )
+
+                    for t in threads:
+                        t.start()
+                    for t in threads:
+                        t.join()
+                    self.check_deferred_exception()
+                    if is_ensemble(model_name):
+                        # Requests do not get batched for the ensemble model
+                        self.check_status(model_name, {1: 12}, 12, 12)
+                    else:
+                        self.check_status(model_name, {4: 3}, 3, 12)
+                except Exception as ex:
+                    self.assertTrue(False, "unexpected error {}".format(ex))
+                finally:
+                    if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
+                        self.cleanup_shm_regions(precreated_shm0_handles)
+                        self.cleanup_shm_regions(precreated_shm1_handles)
+                        self.cleanup_shm_regions(precreated_shm2_handles)
+                        self.cleanup_shm_regions(precreated_shm3_handles)
+
+    def test_backlog(self):
+        # Test model instances together are configured with
+        # total-max-batch-size 4. Send 5 equal-length sequences in
+        # parallel and make sure they get completely batched into
+        # batch-size 4 inferences plus the 5th should go in the
+        # backlog and then get handled once there is a free slot.
+        for trial in _trials:
+            dtypes = self.get_datatype(trial)
+            for dtype in dtypes:
+                model_name = tu.get_sequence_model_name(trial, dtype)
+                # Skip bool type ensemble models
+                if (any(word in trial for word in ENSEMBLE_PREFIXES)) and (
+                    dtype == np.bool_
+                ):
+                    continue
+                # For bool type control models, use int32 as I/O types
+                if dtype == np.bool_:
+                    dtype = np.int32
+
+                self.clear_deferred_exceptions()
+
+                precreated_shm0_handles = self.precreate_register_regions(
+                    (1, 2, 3), dtype, 0
+                )
+                precreated_shm1_handles = self.precreate_register_regions(
+                    (11, 12, 13), dtype, 1
+                )
+                precreated_shm2_handles = self.precreate_register_regions(
+                    (111, 112, 113), dtype, 2
+                )
+                precreated_shm3_handles = self.precreate_register_regions(
+                    (1111, 1112, 1113), dtype, 3
+                )
+                precreated_shm4_handles = self.precreate_register_regions(
+                    (11111, 11112, 11113), dtype, 4
+                )
+
+                try:
+                    self.check_setup(model_name)
+
+                    # Need scheduler to wait for queue to contain all
+                    # inferences for both sequences.
+                    self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
+                    self.assertEqual(
+                        int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 12
+                    )
+                    self.assertIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
+                    self.assertEqual(
+                        int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0
+                    )
+
+                    threads = []
+                    expected_result = (
+                        self.get_expected_result(6, 3, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            6, 3, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1001,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (("start", 1, None), (None, 2, None), ("end", 3, None)),
+                                expected_result,
+                                precreated_shm0_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+                    expected_result = (
+                        self.get_expected_result(36, 13, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            36, 13, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1002,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (
+                                    ("start", 11, None),
+                                    (None, 12, None),
+                                    ("end", 13, None),
+                                ),
+                                expected_result,
+                                precreated_shm1_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+                    expected_result = (
+                        self.get_expected_result(336, 113, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            336, 113, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1003,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (
+                                    ("start", 111, None),
+                                    (None, 112, None),
+                                    ("end", 113, None),
+                                ),
+                                expected_result,
+                                precreated_shm2_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+                    expected_result = (
+                        self.get_expected_result(3336, 1113, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            3336, 1113, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1004,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (
+                                    ("start", 1111, None),
+                                    (None, 1112, None),
+                                    ("end", 1113, None),
+                                ),
+                                expected_result,
+                                precreated_shm3_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+
+                    expected_result = (
+                        self.get_expected_result(33336, 11113, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            33336, 11113, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1005,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (
+                                    ("start", 11111, None),
+                                    (None, 11112, None),
+                                    ("end", 11113, None),
+                                ),
+                                expected_result,
+                                precreated_shm4_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+
+                    for t in threads:
+                        t.start()
+                    for t in threads:
+                        t.join()
+                    self.check_deferred_exception()
+                    if is_ensemble(model_name):
+                        # Requests do not get batched for the ensemble model
+                        self.check_status(model_name, {1: 15}, 15, 15)
+                    else:
+                        if MODEL_INSTANCES == 1:
+                            self.check_status(model_name, {4: 3, 1: 3}, 6, 15)
+                        elif MODEL_INSTANCES == 2:
+                            self.check_status(model_name, {2: 6, 1: 3}, 9, 15)
+                        else:
+                            self.check_status(model_name, {1: 15}, 15, 15)
+                except Exception as ex:
+                    self.assertTrue(False, "unexpected error {}".format(ex))
+                finally:
+                    if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
+                        self.cleanup_shm_regions(precreated_shm0_handles)
+                        self.cleanup_shm_regions(precreated_shm1_handles)
+                        self.cleanup_shm_regions(precreated_shm2_handles)
+                        self.cleanup_shm_regions(precreated_shm3_handles)
+                        self.cleanup_shm_regions(precreated_shm4_handles)
+
+    def test_backlog_fill(self):
+        # Test model instances together are configured with
+        # total-max-batch-size 4. Send 4 sequences in parallel, two of
+        # which are shorter. Send 2 additional sequences that should
+        # go into backlog but should immediately fill into the short
+        # sequences.
+
+        # Only works with 1 model instance since otherwise an instance
+        # can run ahead and handle more work than expected (leads to
+        # intermittent failures)
+        if MODEL_INSTANCES != 1:
+            return
+
+        for trial in _trials:
+            dtypes = self.get_datatype(trial)
+            for dtype in dtypes:
+                model_name = tu.get_sequence_model_name(trial, dtype)
+                # Skip bool type ensemble models
+                if (any(word in trial for word in ENSEMBLE_PREFIXES)) and (
+                    dtype == np.bool_
+                ):
+                    continue
+                # For bool type control models, use int32 as I/O types
+                if dtype == np.bool_:
+                    dtype = np.int32
+
+                self.clear_deferred_exceptions()
+
+                precreated_shm0_handles = self.precreate_register_regions(
+                    (1, 2, 3), dtype, 0
+                )
+                precreated_shm1_handles = self.precreate_register_regions(
+                    (11, 13), dtype, 1
+                )
+                precreated_shm2_handles = self.precreate_register_regions(
+                    (111, 113), dtype, 2
+                )
+                precreated_shm3_handles = self.precreate_register_regions(
+                    (1111, 1112, 1113), dtype, 3
+                )
+                precreated_shm4_handles = self.precreate_register_regions(
+                    (11111,), dtype, 4
+                )
+                precreated_shm5_handles = self.precreate_register_regions(
+                    (22222,), dtype, 5
+                )
+
+                try:
+                    self.check_setup(model_name)
+
+                    # Need scheduler to wait for queue to contain all
+                    # inferences for both sequences.
+                    self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
+                    self.assertEqual(
+                        int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 10
+                    )
+                    self.assertIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
+                    self.assertEqual(
+                        int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 2
+                    )
+
+                    threads = []
+                    expected_result = (
+                        self.get_expected_result(6, 3, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            6, 3, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1001,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (("start", 1, None), (None, 2, None), ("end", 3, None)),
+                                expected_result,
+                                precreated_shm0_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+                    expected_result = (
+                        self.get_expected_result(24, 13, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            24, 13, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1002,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (("start", 11, None), ("end", 13, None)),
+                                expected_result,
+                                precreated_shm1_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+                    expected_result = (
+                        self.get_expected_result(224, 113, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            224, 113, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1003,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (("start", 111, None), ("end", 113, None)),
+                                expected_result,
+                                precreated_shm2_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+                    expected_result = (
+                        self.get_expected_result(3336, 1113, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            3336, 1113, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1004,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (
+                                    ("start", 1111, None),
+                                    (None, 1112, None),
+                                    ("end", 1113, None),
+                                ),
+                                expected_result,
+                                precreated_shm3_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+                    expected_result = (
+                        self.get_expected_result(11111, 11111, trial, "start,end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            11111, 11111, trial, "start,end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1005,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (("start,end", 11111, None),),
+                                expected_result,
+                                precreated_shm4_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+                    expected_result = (
+                        self.get_expected_result(22222, 22222, trial, "start,end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            22222, 22222, trial, "start,end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1006,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (("start,end", 22222, None),),
+                                expected_result,
+                                precreated_shm5_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+
+                    threads[0].start()
+                    threads[1].start()
+                    threads[2].start()
+                    threads[3].start()
+                    time.sleep(3)
+                    threads[4].start()
+                    threads[5].start()
+                    for t in threads:
+                        t.join()
+                    self.check_deferred_exception()
+                    if is_ensemble(model_name):
+                        # Requests do not get batched for the ensemble model
+                        self.check_status(model_name, {1: 12}, 12, 12)
+                    else:
+                        self.check_status(model_name, {4: 3}, 3, 12)
+                except Exception as ex:
+                    self.assertTrue(False, "unexpected error {}".format(ex))
+                finally:
+                    if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
+                        self.cleanup_shm_regions(precreated_shm0_handles)
+                        self.cleanup_shm_regions(precreated_shm1_handles)
+                        self.cleanup_shm_regions(precreated_shm2_handles)
+                        self.cleanup_shm_regions(precreated_shm3_handles)
+                        self.cleanup_shm_regions(precreated_shm4_handles)
+                        self.cleanup_shm_regions(precreated_shm5_handles)
+
+    def test_backlog_fill_no_end(self):
+        # Test model instances together are configured with
+        # total-max-batch-size 4. Send 4 sequences in parallel, two of
+        # which are shorter. Send 2 additional sequences that should
+        # go into backlog but should immediately fill into the short
+        # sequences. One of those sequences is filled before it gets
+        # its end request.
+
+        # Only works with 1 model instance since otherwise an instance
+        # can run ahead and handle more work than expected (leads to
+        # intermittent failures)
+        if MODEL_INSTANCES != 1:
+            return
+
+        for trial in _trials:
+            dtypes = self.get_datatype(trial)
+            for dtype in dtypes:
+                model_name = tu.get_sequence_model_name(trial, dtype)
+                # Skip bool type ensemble models
+                if (any(word in trial for word in ENSEMBLE_PREFIXES)) and (
+                    dtype == np.bool_
+                ):
+                    continue
+                # For bool type control models, use int32 as I/O types
+                if dtype == np.bool_:
+                    dtype = np.int32
+
+                self.clear_deferred_exceptions()
+
+                precreated_shm0_handles = self.precreate_register_regions(
+                    (1, 2, 3), dtype, 0
+                )
+                precreated_shm1_handles = self.precreate_register_regions(
+                    (11, 13), dtype, 1
+                )
+                precreated_shm2_handles = self.precreate_register_regions(
+                    (111, 113), dtype, 2
+                )
+                precreated_shm3_handles = self.precreate_register_regions(
+                    (1111, 1112, 1113), dtype, 3
+                )
+                precreated_shm4_handles = self.precreate_register_regions(
+                    (11111,), dtype, 4
+                )
+                precreated_shm5_handles = self.precreate_register_regions(
+                    (22222, 22223, 22224), dtype, 5
+                )
+
+                try:
+                    self.check_setup(model_name)
+
+                    # Need scheduler to wait for queue to contain all
+                    # inferences for both sequences.
+                    self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
+                    self.assertEqual(
+                        int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 10
+                    )
+                    self.assertIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
+                    self.assertEqual(
+                        int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 3
+                    )
+
+                    threads = []
+                    expected_result = (
+                        self.get_expected_result(6, 3, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            6, 3, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1001,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (("start", 1, None), (None, 2, None), ("end", 3, None)),
+                                expected_result,
+                                precreated_shm0_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+                    expected_result = (
+                        self.get_expected_result(24, 13, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            24, 13, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1002,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (("start", 11, None), ("end", 13, None)),
+                                expected_result,
+                                precreated_shm1_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+                    expected_result = (
+                        self.get_expected_result(224, 113, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            224, 113, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1003,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (("start", 111, None), ("end", 113, None)),
+                                expected_result,
+                                precreated_shm2_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+                    expected_result = (
+                        self.get_expected_result(3336, 1113, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            3336, 1113, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1004,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (
+                                    ("start", 1111, None),
+                                    (None, 1112, None),
+                                    ("end", 1113, None),
+                                ),
+                                expected_result,
+                                precreated_shm3_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+                    expected_result = (
+                        self.get_expected_result(11111, 11111, trial, "start,end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            11111, 11111, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1005,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (("start,end", 11111, None),),
+                                expected_result,
+                                precreated_shm4_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+                    expected_result = (
+                        self.get_expected_result(66669, 22224, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            66669, 22224, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1006,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (
+                                    ("start", 22222, None),
+                                    (None, 22223, None),
+                                    ("end", 22224, 2000),
+                                ),
+                                expected_result,
+                                precreated_shm5_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+
+                    threads[0].start()
+                    time.sleep(2)
+                    threads[1].start()
+                    time.sleep(2)
+                    threads[2].start()
+                    time.sleep(2)
+                    threads[3].start()
+                    time.sleep(2)
+                    threads[4].start()
+                    time.sleep(2)
+                    threads[5].start()
+                    for t in threads:
+                        t.join()
+                    self.check_deferred_exception()
+                    if is_ensemble(model_name):
+                        # Requests do not get batched for the ensemble model
+                        self.check_status(model_name, {1: 14}, 14, 14)
+                    else:
+                        # Expecting 3 batch-size 4 inferences and then the
+                        # 1006 sequence will follow 1003 (a different
+                        # implementation could also follow 1002...)
+                        self.check_status(model_name, {4: 3, 3: 2}, 5, 14)
+                except Exception as ex:
+                    self.assertTrue(False, "unexpected error {}".format(ex))
+                finally:
+                    if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
+                        self.cleanup_shm_regions(precreated_shm0_handles)
+                        self.cleanup_shm_regions(precreated_shm1_handles)
+                        self.cleanup_shm_regions(precreated_shm2_handles)
+                        self.cleanup_shm_regions(precreated_shm3_handles)
+                        self.cleanup_shm_regions(precreated_shm4_handles)
+                        self.cleanup_shm_regions(precreated_shm5_handles)
+
+    def test_backlog_same_correlation_id(self):
+        # Test model instances together are configured with
+        # total-max-batch-size 4. Send 4 equal-length sequences in
+        # parallel and make sure they get completely batched into
+        # batch-size 4 inferences. Send a 5th with the same
+        # correlation ID as one of the first four.
+        for trial in _trials:
+            dtypes = self.get_datatype(trial)
+            for dtype in dtypes:
+                model_name = tu.get_sequence_model_name(trial, dtype)
+                # Skip bool type ensemble models
+                if (any(word in trial for word in ENSEMBLE_PREFIXES)) and (
+                    dtype == np.bool_
+                ):
+                    continue
+                # For bool type control models, use int32 as I/O types
+                if dtype == np.bool_:
+                    dtype = np.int32
+
+                self.clear_deferred_exceptions()
+
+                precreated_shm0_handles = self.precreate_register_regions(
+                    (1, 2, 3), dtype, 0
+                )
+                precreated_shm1_handles = self.precreate_register_regions(
+                    (11, 12, 13), dtype, 1
+                )
+                precreated_shm2_handles = self.precreate_register_regions(
+                    (111, 112, 113), dtype, 2
+                )
+                precreated_shm3_handles = self.precreate_register_regions(
+                    (1111, 1112, 1113), dtype, 3
+                )
+                precreated_shm4_handles = self.precreate_register_regions(
+                    (11111, 11113), dtype, 4
+                )
+
+                try:
+                    self.check_setup(model_name)
+
+                    # Need scheduler to wait for queue to contain all
+                    # inferences for both sequences.
+                    self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
+                    self.assertEqual(
+                        int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 12
+                    )
+                    self.assertIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
+                    self.assertEqual(
+                        int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 2
+                    )
+
+                    threads = []
+                    expected_result = (
+                        self.get_expected_result(6, 3, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            6, 3, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1001,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (("start", 1, None), (None, 2, None), ("end", 3, None)),
+                                expected_result,
+                                precreated_shm0_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+                    expected_result = (
+                        self.get_expected_result(36, 13, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            36, 13, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1002,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (
+                                    ("start", 11, None),
+                                    (None, 12, None),
+                                    ("end", 13, None),
+                                ),
+                                expected_result,
+                                precreated_shm1_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+                    expected_result = (
+                        self.get_expected_result(336, 113, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            336, 113, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1003,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (
+                                    ("start", 111, None),
+                                    (None, 112, None),
+                                    ("end", 113, None),
+                                ),
+                                expected_result,
+                                precreated_shm2_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+                    expected_result = (
+                        self.get_expected_result(3336, 1113, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            3336, 1113, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1004,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (
+                                    ("start", 1111, None),
+                                    (None, 1112, None),
+                                    ("end", 1113, None),
+                                ),
+                                expected_result,
+                                precreated_shm3_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+                    expected_result = (
+                        self.get_expected_result(22224, 11113, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            22224, 11113, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1002,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (("start", 11111, None), ("end", 11113, None)),
+                                expected_result,
+                                precreated_shm4_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+
+                    threads[0].start()
+                    threads[1].start()
+                    threads[2].start()
+                    threads[3].start()
+                    time.sleep(3)
+                    threads[4].start()
+                    for t in threads:
+                        t.join()
+                    self.check_deferred_exception()
+                    if is_ensemble(model_name):
+                        # Requests do not get batched for the ensemble model
+                        self.check_status(model_name, {1: 14}, 14, 14)
+                    else:
+                        if MODEL_INSTANCES != 4:
+                            batch_exec = {
+                                (4 / MODEL_INSTANCES): (3 * MODEL_INSTANCES),
+                                1: 2,
+                            }
+                        else:
+                            batch_exec = {1: (3 * MODEL_INSTANCES) + 2}
+                        self.check_status(
+                            model_name, batch_exec, (3 * MODEL_INSTANCES) + 2, 14
+                        )
+                except Exception as ex:
+                    self.assertTrue(False, "unexpected error {}".format(ex))
+                finally:
+                    if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
+                        self.cleanup_shm_regions(precreated_shm0_handles)
+                        self.cleanup_shm_regions(precreated_shm1_handles)
+                        self.cleanup_shm_regions(precreated_shm2_handles)
+                        self.cleanup_shm_regions(precreated_shm3_handles)
+                        self.cleanup_shm_regions(precreated_shm4_handles)
+
+    def test_backlog_same_correlation_id_no_end(self):
+        # Test model instances together are configured with
+        # total-max-batch-size 4. Send 4 sequences in parallel and
+        # make sure they get completely batched into batch-size 4
+        # inferences. One of the sequences is shorter and does not
+        # have an end marker but has same correlation ID as the 5th
+        # sequence. We expect that short sequence to get ended early
+        # (because of the same correlation ID) and make room for the
+        # 5th sequence.
+
+        # Only works with 1 model instance since otherwise an instance
+        # can run ahead and handle more work than expected (leads to
+        # intermittent failures)
+        if MODEL_INSTANCES != 1:
+            return
+
+        for trial in _trials:
+            dtypes = self.get_datatype(trial)
+            for dtype in dtypes:
+                model_name = tu.get_sequence_model_name(trial, dtype)
+                # Skip bool type ensemble models
+                if (any(word in trial for word in ENSEMBLE_PREFIXES)) and (
+                    dtype == np.bool_
+                ):
+                    continue
+                # For bool type control models, use int32 as I/O types
+                if dtype == np.bool_:
+                    dtype = np.int32
+
+                self.clear_deferred_exceptions()
+
+                precreated_shm0_handles = self.precreate_register_regions(
+                    (1, 3), dtype, 0
+                )
+                precreated_shm1_handles = self.precreate_register_regions(
+                    (11, 12, 12, 13), dtype, 1
+                )
+                precreated_shm2_handles = self.precreate_register_regions(
+                    (111, 112, 112, 113), dtype, 2
+                )
+                precreated_shm3_handles = self.precreate_register_regions(
+                    (1111, 1112, 1112, 1113), dtype, 3
+                )
+                precreated_shm4_handles = self.precreate_register_regions(
+                    (11111, 11113), dtype, 4
+                )
+                try:
+                    self.check_setup(model_name)
+
+                    # Need scheduler to wait for queue to contain all
+                    # inferences for both sequences.
+                    self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
+                    self.assertEqual(
+                        int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 16
+                    )
+                    self.assertIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
+                    self.assertEqual(
+                        int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0
+                    )
+
+                    threads = []
+                    expected_result = (
+                        self.get_expected_result(4, 3, trial, None)
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(4, 3, trial, None, dtype)
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1001,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (("start", 1, None), (None, 3, None)),
+                                expected_result,
+                                precreated_shm0_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+                    expected_result = (
+                        self.get_expected_result(48, 13, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            48, 13, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1002,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (
+                                    ("start", 11, None),
+                                    (None, 12, None),
+                                    (None, 12, None),
+                                    ("end", 13, None),
+                                ),
+                                expected_result,
+                                precreated_shm1_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+                    expected_result = (
+                        self.get_expected_result(448, 113, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            448, 113, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1003,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (
+                                    ("start", 111, None),
+                                    (None, 112, None),
+                                    (None, 112, None),
+                                    ("end", 113, None),
+                                ),
+                                expected_result,
+                                precreated_shm2_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+                    expected_result = (
+                        self.get_expected_result(4448, 1113, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            4448, 1113, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1004,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (
+                                    ("start", 1111, None),
+                                    (None, 1112, None),
+                                    (None, 1112, None),
+                                    ("end", 1113, None),
+                                ),
+                                expected_result,
+                                precreated_shm3_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+                    expected_result = (
+                        self.get_expected_result(22224, 11113, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            22224, 11113, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1001,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (("start", 11111, None), ("end", 11113, None)),
+                                expected_result,
+                                precreated_shm4_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+
+                    threads[0].start()
+                    threads[1].start()
+                    threads[2].start()
+                    threads[3].start()
+                    time.sleep(2)
+                    threads[4].start()
+                    for t in threads:
+                        t.join()
+                    self.check_deferred_exception()
+                    if is_ensemble(model_name):
+                        # Requests do not get batched for the ensemble model
+                        self.check_status(model_name, {1: 16}, 16, 16)
+                    else:
+                        self.check_status(model_name, {4: 4}, 4, 16)
+                except Exception as ex:
+                    self.assertTrue(False, "unexpected error {}".format(ex))
+                finally:
+                    if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
+                        self.cleanup_shm_regions(precreated_shm0_handles)
+                        self.cleanup_shm_regions(precreated_shm1_handles)
+                        self.cleanup_shm_regions(precreated_shm2_handles)
+                        self.cleanup_shm_regions(precreated_shm3_handles)
+                        self.cleanup_shm_regions(precreated_shm4_handles)
+
+    def test_backlog_sequence_timeout(self):
+        # Test model instances together are configured with
+        # total-max-batch-size 4. Send 4 sequences in parallel and
+        # make sure they get completely batched into batch-size 4
+        # inferences. One of the sequences has a long delay that
+        # causes it to timeout and that allows a 5th sequence to come
+        # out of the backlog and finish. The timed-out sequence will
+        # then send the delayed inference but it will appear as a new
+        # sequence and so fail because it doesn't have the START flag.
+
+        # Only works with 1 model instance since otherwise an instance
+        # can run ahead and handle more work than expected (leads to
+        # intermittent failures)
+        if MODEL_INSTANCES != 1:
+            return
+
+        for trial in _trials:
+            dtypes = self.get_datatype(trial)
+            for dtype in dtypes:
+                model_name = tu.get_sequence_model_name(trial, dtype)
+                # Skip bool type ensemble models
+                if (any(word in trial for word in ENSEMBLE_PREFIXES)) and (
+                    dtype == np.bool_
+                ):
+                    continue
+                # For bool type control models, use int32 as I/O types
+                if dtype == np.bool_:
+                    dtype = np.int32
+
+                self.clear_deferred_exceptions()
+
+                precreated_shm0_handles = self.precreate_register_regions(
+                    (1, 3), dtype, 0
+                )
+                precreated_shm1_handles = self.precreate_register_regions(
+                    (11, 12, 12, 13), dtype, 1
+                )
+                precreated_shm2_handles = self.precreate_register_regions(
+                    (111, 112, 112, 113), dtype, 2
+                )
+                precreated_shm3_handles = self.precreate_register_regions(
+                    (1111, 1112, 1112, 1113), dtype, 3
+                )
+                precreated_shm4_handles = self.precreate_register_regions(
+                    (11111, 11113), dtype, 4
+                )
+                try:
+                    self.check_setup(model_name)
+
+                    # Need scheduler to wait for queue to contain all
+                    # inferences for all sequences.
+                    self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
+                    self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 4)
+                    self.assertIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
+                    self.assertEqual(
+                        int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0
+                    )
+
+                    threads = []
+                    expected_result = (
+                        self.get_expected_result(4, 3, trial, None)
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(4, 3, trial, None, dtype)
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1001,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (
+                                    ("start", 1, None),
+                                    (None, 3, _max_sequence_idle_ms + 1000),
+                                ),
+                                expected_result,
+                                precreated_shm0_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+                    expected_result = (
+                        self.get_expected_result(48, 13, trial, None)
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            48, 13, trial, None, dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1002,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (
+                                    ("start", 11, None),
+                                    (None, 12, _max_sequence_idle_ms / 2),
+                                    (None, 12, _max_sequence_idle_ms / 2),
+                                    ("end", 13, _max_sequence_idle_ms / 2),
+                                ),
+                                expected_result,
+                                precreated_shm1_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+                    expected_result = (
+                        self.get_expected_result(448, 113, trial, None)
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            448, 113, trial, None, dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1003,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (
+                                    ("start", 111, None),
+                                    (None, 112, _max_sequence_idle_ms / 2),
+                                    (None, 112, _max_sequence_idle_ms / 2),
+                                    ("end", 113, _max_sequence_idle_ms / 2),
+                                ),
+                                expected_result,
+                                precreated_shm2_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+                    expected_result = (
+                        self.get_expected_result(4448, 1113, trial, None)
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            4448, 1113, trial, None, dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1004,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (
+                                    ("start", 1111, None),
+                                    (None, 1112, _max_sequence_idle_ms / 2),
+                                    (None, 1112, _max_sequence_idle_ms / 2),
+                                    ("end", 1113, _max_sequence_idle_ms / 2),
+                                ),
+                                expected_result,
+                                precreated_shm3_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+                    expected_result = (
+                        self.get_expected_result(22224, 11113, trial, "end")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            22224, 11113, trial, "end", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1005,
+                                (None, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (("start", 11111, None), ("end", 11113, None)),
+                                expected_result,
+                                precreated_shm4_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+
+                    threads[0].start()
+                    threads[1].start()
+                    threads[2].start()
+                    threads[3].start()
+                    time.sleep(2)
+                    threads[4].start()
+                    for t in threads:
+                        t.join()
+
+                    self.check_deferred_exception()
+                    self.assertTrue(False, "expected error")
+                except Exception as ex:
+                    for prefix in ENSEMBLE_PREFIXES:
+                        if model_name.startswith(prefix):
+                            base_model_name = model_name[(len(prefix)) :]
+                            self.assertTrue(
+                                ex.message().startswith(
+                                    str(
+                                        "in ensemble '{}', "
+                                        + "inference request for sequence 1001 to "
+                                        + "model '{}' must specify the START flag on the first "
+                                        + "request of the sequence"
+                                    ).format(model_name, base_model_name)
+                                )
+                            )
+                            return
+                    self.assertTrue(
+                        ex.message().startswith(
+                            str(
+                                "inference request for sequence 1001 to "
+                                + "model '{}' must specify the START flag on the first "
+                                + "request of the sequence"
+                            ).format(model_name)
+                        )
+                    )
+                finally:
+                    if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
+                        self.cleanup_shm_regions(precreated_shm0_handles)
+                        self.cleanup_shm_regions(precreated_shm1_handles)
+                        self.cleanup_shm_regions(precreated_shm2_handles)
+                        self.cleanup_shm_regions(precreated_shm3_handles)
+                        self.cleanup_shm_regions(precreated_shm4_handles)
+
+    def test_queue_delay_no_min_util(self):
+        # Test model that have set max queue delay but minimum slot utilization
+        # is 0. Send 2 sequences in parallel and make sure they get completely
+        # batched into batch-size 2 inferences. The first sequence only has one
+        # request while the second sequence has two, so expecting the second
+        # execution to be a batch of 'null, seq 2'. The executions should not be
+        # waited.
+
+        for trial in _trials:
+            is_ensemble = False
+            for prefix in ENSEMBLE_PREFIXES:
+                if prefix in trial:
+                    is_ensemble = True
+                    break
+            if is_ensemble:
+                continue
+
+            dtypes = self.get_datatype(trial)
+            for dtype in dtypes:
+                model_name = tu.get_sequence_model_name(trial, dtype)
+                # For bool type control models, use int32 as I/O types
+                if dtype == np.bool_:
+                    dtype = np.int32
+
+                self.clear_deferred_exceptions()
+
+                precreated_shm0_handles = self.precreate_register_regions(
+                    (1,), dtype, 0
+                )
+                precreated_shm1_handles = self.precreate_register_regions(
+                    (11, 12), dtype, 1
+                )
+                try:
+                    self.check_setup(model_name)
+
+                    # Need scheduler to wait for queue to contain 2 sequences.
+                    self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
+                    self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 2)
+                    self.assertIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
+                    self.assertEqual(
+                        int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0
+                    )
+
+                    threads = []
+                    expected_result = (
+                        self.get_expected_result(1, 1, trial, "start")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            1, 1, trial, "start", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1001,
+                                (2000, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (("start", 1, None),),
+                                expected_result,
+                                precreated_shm0_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+                    expected_result = (
+                        self.get_expected_result(23, 12, trial, None)
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            23, 12, trial, None, dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1002,
+                                (2000, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (
+                                    ("start", 11, None),
+                                    (None, 12, None),
+                                ),
+                                expected_result,
+                                precreated_shm1_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+
+                    threads[0].start()
+                    time.sleep(1)
+                    threads[1].start()
+                    for t in threads:
+                        t.join()
+
+                    self.check_deferred_exception()
+                    self.check_status(model_name, {2: 2}, 2, 3)
+                except Exception as ex:
+                    self.assertTrue(False, "unexpected error {}".format(ex))
+                finally:
+                    if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
+                        self.cleanup_shm_regions(precreated_shm0_handles)
+                        self.cleanup_shm_regions(precreated_shm1_handles)
+
+    def test_queue_delay_half_min_util(self):
+        # Test model that have set max queue delay but minimum slot utilization
+        # is 0.5. Send 2 sequences in parallel and make sure they get completely
+        # batched into batch-size 2 inferences. The first sequence only has one
+        # request while the second sequence has two, so expecting the second
+        # execution to be a batch of 'null, seq 2'. The second execution should
+        # be waited until the max queue delay is exceeded for sequence 2.
+
+        for trial in _trials:
+            is_ensemble = False
+            for prefix in ENSEMBLE_PREFIXES:
+                if prefix in trial:
+                    is_ensemble = True
+                    break
+            if is_ensemble:
+                continue
+
+            dtypes = self.get_datatype(trial)
+            for dtype in dtypes:
+                model_name = tu.get_sequence_model_name(trial, dtype) + "_half"
+                # For bool type control models, use int32 as I/O types
+                if dtype == np.bool_:
+                    dtype = np.int32
+
+                self.clear_deferred_exceptions()
+
+                precreated_shm0_handles = self.precreate_register_regions(
+                    (1,), dtype, 0
+                )
+                precreated_shm1_handles = self.precreate_register_regions(
+                    (11, 12), dtype, 1
+                )
+                try:
+                    self.check_setup(model_name)
+
+                    # Need scheduler to wait for queue to contain 2 sequences.
+                    self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
+                    self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 2)
+                    self.assertIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
+                    self.assertEqual(
+                        int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0
+                    )
+
+                    threads = []
+                    expected_result = (
+                        self.get_expected_result(1, 1, trial, "start")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            1, 1, trial, "start", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1001,
+                                (2000, None),
+                                # (flag_str, value, pre_delay_ms)
+                                (("start", 1, None),),
+                                expected_result,
+                                precreated_shm0_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+                    expected_result = (
+                        self.get_expected_result(23, 12, trial, None)
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            23, 12, trial, None, dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1002,
+                                (4000, 3000),
+                                # (flag_str, value, pre_delay_ms)
+                                (
+                                    ("start", 11, None),
+                                    (None, 12, None),
+                                ),
+                                expected_result,
+                                precreated_shm1_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+
+                    threads[0].start()
+                    time.sleep(1)
+                    threads[1].start()
+                    for t in threads:
+                        t.join()
+
+                    self.check_deferred_exception()
+                    self.check_status(model_name, {2: 2}, 2, 3)
+                except Exception as ex:
+                    self.assertTrue(False, "unexpected error {}".format(ex))
+                finally:
+                    if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
+                        self.cleanup_shm_regions(precreated_shm0_handles)
+                        self.cleanup_shm_regions(precreated_shm1_handles)
+
+    def test_queue_delay_full_min_util(self):
+        # Test model that have set max queue delay but minimum slot utilization
+        # is 1. Send 2 sequences in parallel and make sure they get completely
+        # batched into batch-size 2 inferences. The first sequence only has one
+        # request while the second sequence has two, so expecting the second
+        # execution to be a batch of 'null, seq 2'. Both executions should be
+        # waited until the max queue delay is exceeded.
+
+        for trial in _trials:
+            is_ensemble = False
+            for prefix in ENSEMBLE_PREFIXES:
+                if prefix in trial:
+                    is_ensemble = True
+                    break
+            if is_ensemble:
+                continue
+
+            dtypes = self.get_datatype(trial)
+            for dtype in dtypes:
+                model_name = tu.get_sequence_model_name(trial, dtype) + "_full"
+                # For bool type control models, use int32 as I/O types
+                if dtype == np.bool_:
+                    dtype = np.int32
+
+                self.clear_deferred_exceptions()
+
+                precreated_shm0_handles = self.precreate_register_regions(
+                    (1,), dtype, 0
+                )
+                precreated_shm1_handles = self.precreate_register_regions(
+                    (11, 12), dtype, 1
+                )
+                try:
+                    self.check_setup(model_name)
+
+                    # Need scheduler to wait for queue to contain 2 sequences.
+                    self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
+                    self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 2)
+                    self.assertIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
+                    self.assertEqual(
+                        int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0
+                    )
+
+                    threads = []
+                    expected_result = (
+                        self.get_expected_result(1, 1, trial, "start")
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            1, 1, trial, "start", dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1001,
+                                (4000, 3000),
+                                # (flag_str, value, pre_delay_ms)
+                                (("start", 1, None),),
+                                expected_result,
+                                precreated_shm0_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+                    expected_result = (
+                        self.get_expected_result(23, 12, trial, None)
+                        if not IMPLICIT_STATE
+                        else self.get_expected_result_implicit(
+                            23, 12, trial, None, dtype
+                        )
+                    )
+                    threads.append(
+                        threading.Thread(
+                            target=self.check_sequence_async,
+                            args=(
+                                trial,
+                                model_name,
+                                dtype,
+                                1002,
+                                (6000, 5000),
+                                # (flag_str, value, pre_delay_ms)
+                                (
+                                    ("start", 11, None),
+                                    (None, 12, 2000),
+                                ),
+                                expected_result,
+                                precreated_shm1_handles,
+                            ),
+                            kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                        )
+                    )
+
+                    threads[0].start()
+                    time.sleep(1)
+                    threads[1].start()
+                    for t in threads:
+                        t.join()
+
+                    self.check_deferred_exception()
+                    self.check_status(model_name, {2: 2}, 2, 3)
+                except Exception as ex:
+                    self.assertTrue(False, "unexpected error {}".format(ex))
+                finally:
+                    if TEST_SYSTEM_SHARED_MEMORY or TEST_CUDA_SHARED_MEMORY:
+                        self.cleanup_shm_regions(precreated_shm0_handles)
+                        self.cleanup_shm_regions(precreated_shm1_handles)
+
+
+class SequenceBatcherRequestTimeoutTest(su.SequenceBatcherTestUtil):
+    def setUp(self):
+        super(SequenceBatcherRequestTimeoutTest, self).setUp()
+        # By default, find tritonserver on "localhost", but can be overridden
+        # with TRITONSERVER_IPADDR envvar
+        self.server_address_ = (
+            os.environ.get("TRITONSERVER_IPADDR", "localhost") + ":8001"
+        )
+
+        # Prepare input and expected output based on the model and
+        # the infer sequence sent for testing. If the test is to be extended
+        # for different sequence and model, then proper grouping should be added
+        self.model_name_ = "custom_sequence_int32_timeout"
+        self.tensor_data_ = np.ones(shape=[1, 1], dtype=np.int32)
+        self.inputs_ = [grpcclient.InferInput("INPUT0", [1, 1], "INT32")]
+        self.inputs_[0].set_data_from_numpy(self.tensor_data_)
+        self.expected_out_seq_ = [
+            ("OUTPUT0", self.tensor_data_),
+            ("OUTPUT0", self.tensor_data_),
+            ("OUTPUT0", self.tensor_data_),
+        ]
+
+    def send_sequence_with_timeout(
+        self, seq_id, callback, timeout_us=3000000, request_pause_sec=0
+    ):
+        with grpcclient.InferenceServerClient(self.server_address_) as triton_client:
+            triton_client.start_stream(callback=callback)
+            triton_client.async_stream_infer(
+                self.model_name_,
+                self.inputs_,
+                sequence_id=seq_id,
+                sequence_start=True,
+                timeout=timeout_us,
+            )
+            if request_pause_sec != 0:
+                time.sleep(request_pause_sec)
+            triton_client.async_stream_infer(
+                self.model_name_, self.inputs_, sequence_id=seq_id, timeout=timeout_us
+            )
+            if request_pause_sec != 0:
+                time.sleep(request_pause_sec)
+            triton_client.async_stream_infer(
+                self.model_name_,
+                self.inputs_,
+                sequence_id=seq_id,
+                sequence_end=True,
+                timeout=timeout_us,
+            )
+
+    def test_request_timeout(self):
+        # Test long running model that receives requests with shorter timeout,
+        # expect the timeout will only be expired on backlog sequence and reject
+        # all requests of the sequence once expired.
+        # Sending two sequences while the model can only process one sequence
+        # at a time. Each model execution takes 5 second and all requests have
+        # 3 second timeout, so the second sequence will be rejected.
+
+        # correlation ID is 1-index
+        seq1_res = []
+        seq2_res = []
+        seq1_callback = lambda result, error: seq1_res.append((result, error))
+        seq2_callback = lambda result, error: seq2_res.append((result, error))
+
+        # send sequence with 1s interval to ensure processing order
+        threads = []
+        threads.append(
+            threading.Thread(
+                target=self.send_sequence_with_timeout, args=(1, seq1_callback)
+            )
+        )
+        threads.append(
+            threading.Thread(
+                target=self.send_sequence_with_timeout, args=(2, seq2_callback)
+            )
+        )
+        threads[0].start()
+        time.sleep(1)
+        threads[1].start()
+        for t in threads:
+            t.join()
+
+        for idx in range(len(seq1_res)):
+            result, error = seq1_res[idx]
+            self.assertIsNone(
+                error,
+                "Expect successful inference for sequence 1 requests, got error: {}".format(
+                    error
+                ),
+            )
+            out = result.as_numpy(self.expected_out_seq_[idx][0])
+            expected_out = self.expected_out_seq_[idx][1]
+            np.testing.assert_allclose(
+                out,
+                expected_out,
+                err_msg="Unexpected output tensor: expect {}, got {}".format(
+                    expected_out, out
+                ),
+            )
+
+        for _, error in seq2_res:
+            self.assertIsNotNone(error, "Expect error for sequence 2 requests")
+            with self.assertRaisesRegex(
+                InferenceServerException,
+                "timeout of the corresponding sequence has been expired",
+                msg="Unexpected error: {}".format(error),
+            ):
+                raise error
+
+    def test_send_request_after_timeout(self):
+        # Similar to test_request_timeout, but the sequence to be timed out
+        # will send the last request after the sequence has been timed out,
+        # and expecting server to return error regarding sending request of
+        # an untracked sequence
+
+        seq1_res = []
+        seq2_res = []
+        seq1_callback = lambda result, error: seq1_res.append((result, error))
+        seq2_callback = lambda result, error: seq2_res.append((result, error))
+
+        threads = []
+        threads.append(
+            threading.Thread(
+                target=self.send_sequence_with_timeout, args=(1, seq1_callback)
+            )
+        )
+        # Each request will be sent with a pause, so the third request
+        # will be sent after the sequence has been timed out
+        threads.append(
+            threading.Thread(
+                target=self.send_sequence_with_timeout,
+                args=(2, seq2_callback),
+                kwargs={"request_pause_sec": 2},
+            )
+        )
+        threads[0].start()
+        time.sleep(1)
+        threads[1].start()
+        for t in threads:
+            t.join()
+
+        # Check error message of the last request and the rest
+        # separately
+        for _, error in seq2_res[0:-1]:
+            self.assertIsNotNone(error, "Expect error for sequence 2 requests")
+            with self.assertRaisesRegex(
+                InferenceServerException,
+                "timeout of the corresponding sequence has been expired",
+                msg="Unexpected error: {}".format(error),
+            ):
+                raise error
+        _, last_err = seq2_res[-1]
+        self.assertIsNotNone(last_err, "Expect error for sequence 2 requests")
+        with self.assertRaisesRegex(
+            InferenceServerException,
+            "must specify the START flag on the first request",
+            msg="Unexpected error: {}".format(last_err),
+        ):
+            raise last_err
+
+
+class SequenceBatcherPreserveOrderingTest(su.SequenceBatcherTestUtil):
+    def setUp(self):
+        super().setUp()
+        # By default, find tritonserver on "localhost", but can be overridden
+        # with TRITONSERVER_IPADDR envvar
+        self.server_address_ = (
+            os.environ.get("TRITONSERVER_IPADDR", "localhost") + ":8001"
+        )
+
+        # Prepare input and expected output based on the model and
+        # the infer sequence sent for testing. If the test is to be extended
+        # for different sequence and model, then proper grouping should be added
+        self.model_name_ = "sequence_py"
+        self.tensor_data_ = np.ones(shape=[1, 1], dtype=np.int32)
+        self.inputs_ = [grpcclient.InferInput("INPUT0", [1, 1], "INT32")]
+        self.inputs_[0].set_data_from_numpy(self.tensor_data_)
+        self.triton_client = grpcclient.InferenceServerClient(self.server_address_)
+
+        # Atomic request ID for multi-threaded inference
+        self.request_id_lock = threading.Lock()
+        self.request_id = 1
+
+    def send_sequence(self, seq_id, seq_id_map, req_id_map):
+        if seq_id not in seq_id_map:
+            seq_id_map[seq_id] = []
+
+        start, middle, end = (True, False), (False, False), (False, True)
+        # Send sequence with 1 start, 1 middle, and 1 end request
+        seq_flags = [start, middle, end]
+        for start_flag, end_flag in seq_flags:
+            # Introduce random sleep to better interweave requests from different sequences
+            time.sleep(random.uniform(0.0, 1.0))
+
+            # Serialize sending requests to ensure ordered request IDs
+            with self.request_id_lock:
+                req_id = self.request_id
+                self.request_id += 1
+
+                # Store metadata to validate results later
+                req_id_map[req_id] = seq_id
+                seq_id_map[seq_id].append(req_id)
+
+                self.triton_client.async_stream_infer(
+                    self.model_name_,
+                    self.inputs_,
+                    sequence_id=seq_id,
+                    sequence_start=start_flag,
+                    sequence_end=end_flag,
+                    timeout=None,
+                    request_id=str(req_id),
+                )
+
+    def _test_sequence_ordering(self, preserve_ordering, decoupled):
+        # 1. Send a few grpc streaming sequence requests to the model.
+        # 2. With grpc streaming, the model should receive the requests in
+        #    the same order they are sent from client, and the client should
+        #    receive the responses in the same order sent back by the
+        #    model/server. With sequence scheduler, the requests for each sequence should be routed to the same model
+        #    instance, and no two requests from the same sequence should
+        #    get batched together.
+        # 3. With preserve_ordering=False, we may get the responses back in a different
+        #    order than the requests, but with grpc streaming we should still expect responses for each sequence to be ordered.
+        # 4. Assert that the sequence values are ordered, and that the response IDs per sequence are ordered
+        class SequenceResult:
+            def __init__(self, seq_id, result, request_id):
+                self.seq_id = seq_id
+                self.result = result
+                self.request_id = int(request_id)
+
+        def full_callback(sequence_dict, sequence_list, result, error):
+            # We expect no model errors for this test
+            if error:
+                self.assertTrue(False, error)
+
+            # Gather all the necessary metadata for validation
+            request_id = int(result.get_response().id)
+            sequence_id = request_id_map[request_id]
+            # Overall list of results in the order received, regardless of sequence ID
+            sequence_list.append(SequenceResult(sequence_id, result, request_id))
+            # Ordered results organized by their seq IDs
+            sequence_dict[sequence_id].append(result)
+
+        # Store ordered list in which responses are received by client
+        sequence_list = []
+        # Store mapping of sequence ID to response results
+        sequence_dict = {}
+        # Store mapping of sequence ID to request IDs and vice versa
+        sequence_id_map = {}
+        request_id_map = {}
+
+        # Start stream
+        seq_callback = partial(full_callback, sequence_dict, sequence_list)
+        self.triton_client.start_stream(callback=seq_callback)
+
+        # Send N sequences concurrently
+        threads = []
+        num_sequences = 10
+        for i in range(num_sequences):
+            # Sequence IDs are 1-indexed
+            sequence_id = i + 1
+            # Add a result list and callback for each sequence
+            sequence_dict[sequence_id] = []
+            threads.append(
+                threading.Thread(
+                    target=self.send_sequence,
+                    args=(sequence_id, sequence_id_map, request_id_map),
+                )
+            )
+
+        # Start all sequence threads
+        for t in threads:
+            t.start()
+
+        # Wait for threads to return
+        for t in threads:
+            t.join()
+
+        # Block until all requests are completed
+        self.triton_client.stop_stream()
+
+        # Make sure some inferences occurred and metadata was collected
+        self.assertGreater(len(sequence_dict), 0)
+        self.assertGreater(len(sequence_list), 0)
+
+        # Validate model results are sorted per sequence ID (model specific logic)
+        print(f"=== {preserve_ordering=} {decoupled=} ===")
+        print("Outputs per Sequence:")
+        for seq_id, sequence in sequence_dict.items():
+            seq_outputs = [
+                result.as_numpy("OUTPUT0").flatten().tolist() for result in sequence
+            ]
+            print(f"{seq_id}: {seq_outputs}")
+            self.assertEqual(seq_outputs, sorted(seq_outputs))
+
+        # Validate request/response IDs for each response in a sequence is sorted
+        # This should be true regardless of preserve_ordering or not
+        print("Request IDs per Sequence:")
+        for seq_id in sequence_id_map:
+            per_seq_request_ids = sequence_id_map[seq_id]
+            print(f"{seq_id}: {per_seq_request_ids}")
+            self.assertEqual(per_seq_request_ids, sorted(per_seq_request_ids))
+
+        # Validate results are sorted in request order if preserve_ordering is True
+        if preserve_ordering:
+            request_ids = [s.request_id for s in sequence_list]
+            print(f"Request IDs overall:\n{request_ids}")
+            sequence_ids = [s.seq_id for s in sequence_list]
+            print(f"Sequence IDs overall:\n{sequence_ids}")
+            self.assertEqual(request_ids, sorted(request_ids))
+
+        # Assert some dynamic batching of requests was done
+        stats = self.triton_client.get_inference_statistics(
+            model_name=self.model_name_, headers={}, as_json=True
+        )
+        model_stats = stats["model_stats"][0]
+        self.assertEqual(model_stats["name"], self.model_name_)
+        self.assertLess(
+            int(model_stats["execution_count"]), int(model_stats["inference_count"])
+        )
+
+    def test_sequence_with_preserve_ordering(self):
+        self.model_name_ = "seqpy_preserve_ordering_nondecoupled"
+        self._test_sequence_ordering(preserve_ordering=True, decoupled=False)
+
+    def test_sequence_without_preserve_ordering(self):
+        self.model_name_ = "seqpy_no_preserve_ordering_nondecoupled"
+        self._test_sequence_ordering(preserve_ordering=False, decoupled=False)
+
+    # FIXME [DLIS-5280]: This may fail for decoupled models if writes to GRPC
+    # stream are done out of order in server, so disable test for now.
+    # def test_sequence_with_preserve_ordering_decoupled(self):
+    #    self.model_name_ = "seqpy_preserve_ordering_decoupled"
+    #    self._test_sequence_ordering(preserve_ordering=True, decoupled=True)
+
+    # FIXME [DLIS-5280]
+    # def test_sequence_without_preserve_ordering_decoupled(self):
+    #    self.model_name_ = "seqpy_no_preserve_ordering_decoupled"
+    #    self._test_sequence_ordering(preserve_ordering=False, decoupled=True)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_sequence_batcher/test.sh b/qa/L0_sequence_batcher/test.sh
new file mode 100755
index 0000000000..d91b433966
--- /dev/null
+++ b/qa/L0_sequence_batcher/test.sh
@@ -0,0 +1,914 @@
+#!/bin/bash
+# Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+TEST_RESULT_FILE='test_results.txt'
+
+# Must run on a single device or else the TRITONSERVER_DELAY_SCHEDULER
+# can fail when the requests are distributed to multiple devices.
+ldconfig || true
+
+export CUDA_VISIBLE_DEVICES=0
+
+CLIENT_LOG="./client.log"
+BATCHER_TEST=sequence_batcher_test.py
+
+if [ -z "$TEST_SYSTEM_SHARED_MEMORY" ]; then
+    TEST_SYSTEM_SHARED_MEMORY="0"
+fi
+
+if [ -z "$TEST_CUDA_SHARED_MEMORY" ]; then
+    TEST_CUDA_SHARED_MEMORY="0"
+fi
+
+if [ -z "$TEST_VALGRIND" ]; then
+    TEST_VALGRIND="0"
+fi
+
+if [ "$TEST_VALGRIND" -eq 1 ]; then
+    LEAKCHECK=/usr/bin/valgrind
+    LEAKCHECK_ARGS_BASE="--leak-check=full --show-leak-kinds=definite --max-threads=3000"
+    SERVER_TIMEOUT=3600
+    rm -f *.valgrind.log
+
+    # Shortened tests due valgrind overhead
+    MODEL_TRIALS="0 v"
+    NO_DELAY_TESTS="test_simple_sequence \
+                      test_no_sequence_start \
+                      test_batch_size"
+    DELAY_TESTS="test_backlog_fill_no_end \
+                    test_backlog_sequence_timeout \
+                    test_ragged_batch"
+    QUEUE_DELAY_TESTS="test_queue_delay_full_min_util"
+fi
+
+if [ -z "$TEST_JETSON" ]; then
+    TEST_JETSON="0"
+fi
+
+# Shortened tests due to jetson slowdown
+if [ "$TEST_JETSON" -eq 1 ]; then
+    MODEL_TRIALS="0 v"
+fi
+
+TF_VERSION=${TF_VERSION:=2}
+
+# On windows the paths invoked by the script (running in WSL) must use
+# /mnt/c when needed but the paths on the tritonserver command-line
+# must be C:/ style.
+WINDOWS=0
+if [[ "$(< /proc/sys/kernel/osrelease)" == *microsoft* ]]; then
+    MODELDIR=${MODELDIR:=C:/models}
+    DATADIR=${DATADIR:="/mnt/c/data/inferenceserver/${REPO_VERSION}"}
+    BACKEND_DIR=${BACKEND_DIR:=C:/tritonserver/backends}
+    SERVER=${SERVER:=/mnt/c/tritonserver/bin/tritonserver.exe}
+    export WSLENV=$WSLENV:TRITONSERVER_DELAY_SCHEDULER:TRITONSERVER_BACKLOG_DELAY_SCHEDULER
+    WINDOWS=1
+else
+    MODELDIR=${MODELDIR:=`pwd`}
+    DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"}
+    TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
+    SERVER=${TRITON_DIR}/bin/tritonserver
+    BACKEND_DIR=${TRITON_DIR}/backends
+
+    # PyTorch on SBSA requires libgomp to be loaded first. See the following
+    # GitHub issue for more information:
+    # https://github.com/pytorch/pytorch/issues/2575
+    arch=`uname -m`
+    if [ $arch = "aarch64" ]; then
+      SERVER_LD_PRELOAD=/usr/lib/$(uname -m)-linux-gnu/libgomp.so.1
+    fi
+fi
+
+SERVER_ARGS_EXTRA="--backend-directory=${BACKEND_DIR} --backend-config=tensorflow,version=${TF_VERSION} --log-verbose=1"
+
+source ../common/util.sh
+
+RET=0
+
+# If BACKENDS not specified, set to all
+BACKENDS=${BACKENDS:="graphdef savedmodel onnx plan libtorch custom python"}
+export BACKENDS
+
+# If MODEL_TRIALS not specified set to 0 1 2 4 v
+MODEL_TRIALS=${MODEL_TRIALS:="0 1 2 4 v"}
+
+# Basic sequence batcher tests
+NO_DELAY_TESTS=${NO_DELAY_TESTS:="test_simple_sequence \
+                                    test_length1_sequence \
+                                    test_batch_size \
+                                    test_no_sequence_start \
+                                    test_no_sequence_start2 \
+                                    test_no_sequence_end \
+                                    test_no_correlation_id"}
+
+# Tests that use scheduler delay
+DELAY_TESTS=${DELAY_TESTS:="test_backlog_fill \
+                              test_backlog_fill_no_end \
+                              test_backlog_same_correlation_id \
+                              test_backlog_same_correlation_id_no_end \
+                              test_backlog_sequence_timeout \
+                              test_half_batch \
+                              test_skip_batch \
+                              test_full_batch \
+                              test_ragged_batch \
+                              test_backlog"}
+
+# Tests on queue delay
+QUEUE_DELAY_TESTS=${QUEUE_DELAY_TESTS:="test_queue_delay_no_min_util \
+                                    test_queue_delay_half_min_util \
+                                    test_queue_delay_full_min_util"}
+
+# If ENSEMBLES not specified, set to 1
+ENSEMBLES=${ENSEMBLES:="1"}
+export ENSEMBLES
+
+# If IMPLICIT_STATE not specified, set to 0
+IMPLICIT_STATE=${IMPLICIT_STATE:="0"}
+export IMPLICIT_STATE
+
+# If INITIAL_STATE_FILE is not specified, set to 0
+INITIAL_STATE_FILE=${INITIAL_STATE_FILE:="0"}
+export INITIAL_STATE_FILE
+
+# If INITIAL_STATE_ZERO is not specified, set to 0
+INITIAL_STATE_ZERO=${INITIAL_STATE_ZERO:="0"}
+export INITIAL_STATE_ZERO
+
+# If USE_SINGLE_BUFFER is not specified, set to 0
+USE_SINGLE_BUFFER=${USE_SINGLE_BUFFER:="0"}
+export USE_SINGLE_BUFFER
+
+# Setup non-variable-size model repositories. The same models are in each
+# repository but they are configured as:
+#   models0 - four instances with non-batching model
+#   models1 - one instance with batch-size 4
+#   models2 - two instances with batch-size 2
+#   models4 - four instances with batch-size 1
+rm -fr *.log  models{0,1,2,4} queue_delay_models && mkdir models{0,1,2,4} queue_delay_models
+
+# Get the datatype to use based on the backend
+function get_datatype () {
+  local dtype="int32 bool"
+  if [[ $1 == "plan" ]]; then
+    dtype="float32"
+  elif [[ $1 == "savedmodel" ]]; then
+    dtype="float32 bool"
+  elif [[ $1 == "graphdef" ]]; then
+    dtype="object bool int32"
+  fi
+
+  # Add type string to the onnx model tests only for implicit state.
+  if [ "$IMPLICIT_STATE" == "1" ]; then
+    if [[ $1 == "onnx" ]]; then
+        dtype="object int32 bool"
+    fi
+    if [[ $1 == "libtorch" ]]; then
+        dtype="object int32 bool"
+    fi
+  fi
+  echo $dtype
+}
+
+# Modify corresponding onnx config.pbtxt to create python config.pbtxt
+function generate_python_models () {
+  model_path=$1
+  dest_dir=$2
+  onnx_model=$(echo ${model_path//python/onnx})
+  python_model=$(basename $model_path)
+  mkdir -p $dest_dir/$python_model/1/
+  # for emsemble models keep "platform: ensemble"
+  if [[ "$model_path" == *"ensemble_model"* ]]; then
+    cat $onnx_model/config.pbtxt | sed 's/onnx/python/g' > $dest_dir/$python_model/config.pbtxt
+  else
+    cat $onnx_model/config.pbtxt | sed 's/platform:.*/backend:\ "python"/g' | sed 's/onnx/python/g' > $dest_dir/$python_model/config.pbtxt
+    cp ../python_models/sequence_int32/model.py $dest_dir/$python_model/1/
+  fi
+}
+
+if [[ "$INITIAL_STATE_ZERO" == "1" && "$INITIAL_STATE_FILE" == "1" ]]; then
+  echo -e "\n***\n*** 'INITIAL_STATE_ZERO' and 'INITIAL_STATE_FILE' can't be enabled simultaneously. \n***"
+  exit 1
+fi
+
+FIXED_MODEL_REPOSITORY=''
+VAR_MODEL_REPOSITORY=''
+if [ "$IMPLICIT_STATE" == "1" ]; then
+  if [[ "$INITIAL_STATE_ZERO" == "0" && "$INITIAL_STATE_FILE" == "0" ]]; then
+    FIXED_MODEL_REPOSITORY="qa_sequence_implicit_model_repository"
+    VAR_MODEL_REPOSITORY="qa_variable_sequence_implicit_model_repository"
+  else
+    FIXED_MODEL_REPOSITORY="qa_sequence_initial_state_implicit_model_repository"
+    VAR_MODEL_REPOSITORY="qa_variable_sequence_initial_state_implicit_model_repository"
+  fi
+else
+  FIXED_MODEL_REPOSITORY="qa_sequence_model_repository"
+  VAR_MODEL_REPOSITORY="qa_variable_sequence_model_repository"
+fi
+
+MODELS=""
+PYTHON_MODELS=""
+for BACKEND in $BACKENDS; do
+  if [[ $BACKEND == "custom" ]]; then
+    MODELS="$MODELS ../custom_models/custom_sequence_int32"
+  else
+    DTYPES=$(get_datatype $BACKEND)
+
+    for DTYPE in $DTYPES; do
+      MODELS="$MODELS $DATADIR/$FIXED_MODEL_REPOSITORY/${BACKEND}_sequence_${DTYPE}"
+    done
+
+    if [ "$ENSEMBLES" == "1" ]; then
+      for DTYPE in $DTYPES; do
+        # We don't generate ensemble models for bool data type.
+        if [[ $DTYPE != "bool" ]]; then
+          if [ "$BACKEND" == "python" ]; then
+            PYTHON_MODELS="$DATADIR/qa_ensemble_model_repository/$FIXED_MODEL_REPOSITORY/*_onnx_sequence_${DTYPE}"
+            TMP=$(echo $PYTHON_MODELS)
+            MODELS="$MODELS ${TMP//onnx/python}"
+          else
+            MODELS="$MODELS $DATADIR/qa_ensemble_model_repository/$FIXED_MODEL_REPOSITORY/*_${BACKEND}_sequence_${DTYPE}"
+          fi
+        fi
+      done
+    fi
+  fi
+done
+
+if [ "$INITIAL_STATE_FILE" == "1" ]; then
+  # Create the input_state_data file.
+  rm -rf input_state_data
+  echo -n -e "\\x64\\x00\\x00\\x00" > input_state_data
+fi
+
+for MODEL in $MODELS; do
+  if [[ ! "$TEST_VALGRIND" -eq 1 ]]; then
+    # Skip libtorch string models
+    if [[ "$MODEL" =~ .*"libtorch".*"object".* ]]; then
+        continue
+    fi
+    if [[ "$MODEL" =~ .*"python".* ]]; then
+      generate_python_models "$MODEL" "models1"
+    else
+      cp -r $MODEL models1/.
+    fi
+      (cd models1/$(basename $MODEL) && \
+        sed -i "s/^max_batch_size:.*/max_batch_size: 4/" config.pbtxt && \
+        sed -i "s/kind: KIND_GPU/kind: KIND_GPU\\ncount: 1/" config.pbtxt && \
+        sed -i "s/kind: KIND_CPU/kind: KIND_CPU\\ncount: 1/" config.pbtxt)
+
+    # Skip libtorch string models
+    if [[ "$MODEL" =~ .*"libtorch".*"object".* ]]; then
+        continue
+    fi
+
+    if [[ "$MODEL" =~ .*"python".* ]]; then
+      generate_python_models "$MODEL" "models2"
+    else
+      cp -r $MODEL models2/.
+    fi
+      (cd models2/$(basename $MODEL) && \
+        sed -i "s/^max_batch_size:.*/max_batch_size: 2/" config.pbtxt && \
+        sed -i "s/kind: KIND_GPU/kind: KIND_GPU\\ncount: 2/" config.pbtxt && \
+        sed -i "s/kind: KIND_CPU/kind: KIND_CPU\\ncount: 2/" config.pbtxt)
+
+    if [[ "$MODEL" =~ .*"python".* ]]; then
+      generate_python_models "$MODEL" "models4"
+    else
+      cp -r $MODEL models4/.
+    fi
+      (cd models4/$(basename $MODEL) && \
+        sed -i "s/^max_batch_size:.*/max_batch_size: 1/" config.pbtxt && \
+        sed -i "s/kind: KIND_GPU/kind: KIND_GPU\\ncount: 4/" config.pbtxt && \
+        sed -i "s/kind: KIND_CPU/kind: KIND_CPU\\ncount: 4/" config.pbtxt)
+
+    # Duplicate the models for different delay settings
+    if [[ "$MODEL" =~ .*"python".* ]]; then
+      generate_python_models "$MODEL" "queue_delay_models"
+    else
+      cp -r $MODEL queue_delay_models/.
+    fi
+      (cd queue_delay_models/$(basename $MODEL) && \
+        sed -i "s/^max_batch_size:.*/max_batch_size: 4/" config.pbtxt && \
+        sed -i "s/kind: KIND_GPU/kind: KIND_GPU\\ncount: 1/" config.pbtxt && \
+        sed -i "s/kind: KIND_CPU/kind: KIND_CPU\\ncount: 1/" config.pbtxt && \
+        sed -i "s/sequence_batching {/sequence_batching {\\ndirect {\\nmax_queue_delay_microseconds: 3000000\\nminimum_slot_utilization: 0\\n}/" config.pbtxt)
+
+    cp -r queue_delay_models/$(basename $MODEL) queue_delay_models/$(basename $MODEL)_half && \
+      (cd queue_delay_models/$(basename $MODEL)_half && \
+        sed -i "s/$(basename $MODEL)/$(basename $MODEL)_half/" config.pbtxt && \
+        sed -i "s/minimum_slot_utilization: 0/minimum_slot_utilization: 0.5/" config.pbtxt)
+    cp -r queue_delay_models/$(basename $MODEL) queue_delay_models/$(basename $MODEL)_full && \
+      (cd queue_delay_models/$(basename $MODEL)_full && \
+        sed -i "s/$(basename $MODEL)/$(basename $MODEL)_full/" config.pbtxt && \
+        sed -i "s/minimum_slot_utilization: 0/minimum_slot_utilization: 1/" config.pbtxt)
+
+    # TODO: Enable single state buffer testing for sequence batcher
+    # if [ "$USE_SINGLE_BUFFER" == "1" && "$IMPLICIT_STATE" == "1" ]; then
+    #   SED_REPLACE_PATTERN="N;N;N;N;N;/state.*dims:.*/a use_single_buffer: true"
+    #   (cd models0/$(basename $MODEL) && \
+    #     sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
+    #   (cd models1/$(basename $MODEL) && \
+    #     sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
+    #   (cd models2/$(basename $MODEL) && \
+    #     sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
+    #   (cd models4/$(basename $MODEL) && \
+    #     sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
+    #   (cd queue_delay_models/$(basename $MODEL)_full && \
+    #     sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
+    #   (cd queue_delay_models/$(basename $MODEL)_half && \
+    #     sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
+    # fi
+  else
+    cp -r $MODEL queue_delay_models/$(basename $MODEL)_full && \
+      (cd queue_delay_models/$(basename $MODEL)_full && \
+        sed -i "s/$(basename $MODEL)/$(basename $MODEL)_full/" config.pbtxt && \
+        sed -i "s/^max_batch_size:.*/max_batch_size: 4/" config.pbtxt && \
+        sed -i "s/kind: KIND_GPU/kind: KIND_GPU\\ncount: 1/" config.pbtxt && \
+        sed -i "s/kind: KIND_CPU/kind: KIND_CPU\\ncount: 1/" config.pbtxt && \
+        sed -i "s/sequence_batching {/sequence_batching {\\ndirect {\\nmax_queue_delay_microseconds: 3000000\\nminimum_slot_utilization: 0\\n}/" config.pbtxt && \
+        sed -i "s/minimum_slot_utilization: 0/minimum_slot_utilization: 1/" config.pbtxt)
+  fi
+done
+
+# Adjust the model repository for reading initial state for implicit state from file
+if [ "$INITIAL_STATE_FILE" == "1" ]; then
+  for MODEL in $MODELS; do
+    if [[ ! "$TEST_VALGRIND" -eq 1 ]]; then
+      mkdir -p models1/$(basename $MODEL)/initial_state/ && cp input_state_data models1/$(basename $MODEL)/initial_state/ && \
+      (cd models1/$(basename $MODEL) && \
+        sed -i "s/zero_data.*/data_file:\"input_state_data\"/" config.pbtxt)
+
+      mkdir -p models2/$(basename $MODEL)/initial_state/ && cp input_state_data models2/$(basename $MODEL)/initial_state/ && \
+      (cd models2/$(basename $MODEL) && \
+        sed -i "s/zero_data.*/data_file:\"input_state_data\"/" config.pbtxt)
+
+      mkdir -p models4/$(basename $MODEL)/initial_state/ && cp input_state_data models4/$(basename $MODEL)/initial_state/ && \
+      (cd models4/$(basename $MODEL) && \
+        sed -i "s/zero_data.*/data_file:\"input_state_data\"/" config.pbtxt)
+
+      mkdir -p queue_delay_models/$(basename $MODEL)/initial_state/ && cp input_state_data queue_delay_models/$(basename $MODEL)/initial_state/ && \
+      (cd queue_delay_models/$(basename $MODEL) && \
+        sed -i "s/zero_data.*/data_file:\"input_state_data\"/" config.pbtxt)
+
+      mkdir -p queue_delay_models/$(basename $MODEL)_half/initial_state/ && cp input_state_data queue_delay_models/$(basename $MODEL)_half/initial_state/ && \
+      (cd queue_delay_models/$(basename $MODEL)_half && \
+        sed -i "s/zero_data.*/data_file:\"input_state_data\"/" config.pbtxt)
+
+      mkdir -p queue_delay_models/$(basename $MODEL)_full/initial_state/ && cp input_state_data queue_delay_models/$(basename $MODEL)_full/initial_state/ && \
+      (cd queue_delay_models/$(basename $MODEL)_full && \
+        sed -i "s/zero_data.*/data_file:\"input_state_data\"/" config.pbtxt)
+    else
+      mkdir -p queue_delay_models/$(basename $MODEL)_full/initial_state/ && cp input_state_data queue_delay_models/$(basename $MODEL)_full/initial_state/ && \
+       (cd queue_delay_models/$(basename $MODEL)_full && \
+         sed -i "s/zero_data.*/data_file:\"input_state_data\"/" config.pbtxt)
+    fi
+  done
+fi
+
+MODELS=""
+PYTHON_MODELS=""
+for BACKEND in $BACKENDS; do
+  if [[ $BACKEND == "custom" ]]; then
+    MODELS="$MODELS ../custom_models/custom_sequence_int32"
+  else
+    DTYPES=$(get_datatype $BACKEND)
+    for DTYPE in $DTYPES; do
+      MODELS="$MODELS $DATADIR/$FIXED_MODEL_REPOSITORY/${BACKEND}_nobatch_sequence_${DTYPE}"
+    done
+
+    if [ "$ENSEMBLES" == "1" ]; then
+      for DTYPE in $DTYPES; do
+        # We don't generate ensemble models for bool data type.
+        if [[ $DTYPE != "bool" ]]; then
+          if [ "$BACKEND" == "python" ]; then
+            PYTHON_MODELS="$DATADIR/qa_ensemble_model_repository/$FIXED_MODEL_REPOSITORY/*_onnx_nobatch_sequence_${DTYPE}"
+            TMP=$(echo $PYTHON_MODELS)
+            MODELS="$MODELS ${TMP//onnx/python}"
+          else
+            MODELS="$MODELS $DATADIR/qa_ensemble_model_repository/$FIXED_MODEL_REPOSITORY/*_${BACKEND}_nobatch_sequence_${DTYPE}"
+          fi
+        fi
+      done
+
+    fi
+  fi
+done
+
+for MODEL in $MODELS; do
+  if [[ "$MODEL" =~ .*"python".* ]]; then
+      generate_python_models "$MODEL" "models0"
+  else
+    cp -r $MODEL models0/.
+  fi
+    (cd models0/$(basename $MODEL) && \
+      sed -i "s/kind: KIND_GPU/kind: KIND_GPU\\ncount: 4/" config.pbtxt && \
+      sed -i "s/kind: KIND_CPU/kind: KIND_CPU\\ncount: 4/" config.pbtxt)
+
+  if [ "$INITIAL_STATE_FILE" == "1" ]; then
+      mkdir -p models0/$(basename $MODEL)/initial_state/ && cp input_state_data models0/$(basename $MODEL)/initial_state/ && \
+          (cd models0/$(basename $MODEL) && \
+          sed -i "s/zero_data.*/data_file:\"input_state_data\"/" config.pbtxt)
+  fi
+done
+
+# modelsv - one instance with batch-size 4
+rm -fr modelsv && mkdir modelsv
+
+MODELS=""
+PYTHON_MODELS=""
+for BACKEND in $BACKENDS; do
+  if [[ $BACKEND == "custom" ]]; then
+    MODELS="$MODELS ../custom_models/custom_sequence_int32"
+  else
+    DTYPES=$(get_datatype $BACKEND)
+    for DTYPE in $DTYPES; do
+      MODELS="$MODELS $DATADIR/${VAR_MODEL_REPOSITORY}/${BACKEND}_sequence_${DTYPE}"
+    done
+
+    if [ "$ENSEMBLES" == "1" ]; then
+      for DTYPE in $DTYPES; do
+        # We don't generate ensemble models for bool data type.
+        if [[ $DTYPE != "bool" ]]; then
+          if [ "$BACKEND" == "python" ]; then
+            PYTHON_MODELS="$DATADIR/qa_ensemble_model_repository/$FIXED_MODEL_REPOSITORY/*_onnx_sequence_${DTYPE}"
+            TMP=$(echo $PYTHON_MODELS)
+            MODELS="$MODELS ${TMP//onnx/python}"
+          else
+            MODELS="$MODELS $DATADIR/qa_ensemble_model_repository/${VAR_MODEL_REPOSITORY}/*_${BACKEND}_sequence_${DTYPE}"
+          fi
+        fi
+      done
+    fi
+  fi
+done
+
+for MODEL in $MODELS; do
+  # Skip libtorch string models
+  if [[ "$MODEL" =~ .*"libtorch".*"object".* ]]; then
+      continue
+  fi
+  if [[ "$MODEL" =~ .*"python".* ]]; then
+      generate_python_models "$MODEL" "modelsv"
+  else
+    cp -r $MODEL modelsv/.
+  fi
+    (cd modelsv/$(basename $MODEL) && \
+      sed -i "s/^max_batch_size:.*/max_batch_size: 4/" config.pbtxt && \
+      sed -i "s/kind: KIND_GPU/kind: KIND_GPU\\ncount: 1/" config.pbtxt && \
+      sed -i "s/kind: KIND_CPU/kind: KIND_CPU\\ncount: 1/" config.pbtxt)
+
+  if [ "$INITIAL_STATE_FILE" == "1" ]; then
+      mkdir -p modelsv/$(basename $MODEL)/initial_state/ && cp input_state_data modelsv/$(basename $MODEL)/initial_state/ && \
+          (cd modelsv/$(basename $MODEL) && \
+          sed -i "s/zero_data.*/data_file:\"input_state_data\"/" config.pbtxt)
+  fi
+done
+
+# Same test work on all models since they all have same total number
+# of batch slots.
+for model_trial in $MODEL_TRIALS; do
+    export NO_BATCHING=1 &&
+        [[ "$model_trial" != "0" ]] && export NO_BATCHING=0
+    export MODEL_INSTANCES=1 &&
+        [[ "$model_trial" != "v" ]] && export MODEL_INSTANCES=4 &&
+        [[ "$model_trial" != "0" ]] && export MODEL_INSTANCES=$model_trial
+
+    MODEL_PATH=models${model_trial}
+
+    if [ "$ENSEMBLES" == "1" ]; then
+      cp -r $DATADIR/qa_ensemble_model_repository/${FIXED_MODEL_REPOSITORY}/nop_* `pwd`/$MODEL_PATH/.
+        create_nop_version_dir `pwd`/$MODEL_PATH
+      # Must load identity backend on GPU to avoid cuda init delay during 1st run
+      for NOP_MODEL in `pwd`/$MODEL_PATH/nop_*; do
+        (cd $NOP_MODEL && sed -i "s/kind: KIND_CPU/kind: KIND_GPU/" config.pbtxt)
+      done
+    fi
+
+    # Need to launch the server for each test so that the model status
+    # is reset (which is used to make sure the correct batch size was
+    # used for execution). Test everything with fixed-tensor-size
+    # models and variable-tensor-size models.
+    export BATCHER_TYPE="VARIABLE" &&
+        [[ "$model_trial" != "v" ]] && export BATCHER_TYPE="FIXED"
+
+    for i in $NO_DELAY_TESTS; do
+        SERVER_ARGS="--model-repository=$MODELDIR/$MODEL_PATH ${SERVER_ARGS_EXTRA}"
+        SERVER_LOG="./$i.$MODEL_PATH.server.log"
+
+        if [ "$TEST_VALGRIND" -eq 1 ]; then
+            LEAKCHECK_LOG="./$i.$MODEL_PATH.valgrind.log"
+            LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --log-file=$LEAKCHECK_LOG"
+            run_server_leakcheck
+        else
+            run_server
+        fi
+
+        if [ "$SERVER_PID" == "0" ]; then
+            echo -e "\n***\n*** Failed to start $SERVER\n***"
+            cat $SERVER_LOG
+            exit 1
+        fi
+
+        echo "Test: $i, repository $MODEL_PATH" >>$CLIENT_LOG
+
+        set +e
+        python3 $BATCHER_TEST SequenceBatcherTest.$i >>$CLIENT_LOG 2>&1
+        if [ $? -ne 0 ]; then
+            echo -e "\n***\n*** Test $i Failed\n***" >>$CLIENT_LOG
+            echo -e "\n***\n*** Test $i Failed\n***"
+            RET=1
+        else
+            check_test_results $TEST_RESULT_FILE 1
+            if [ $? -ne 0 ]; then
+                cat $CLIENT_LOG
+                echo -e "\n***\n*** Test Result Verification Failed\n***"
+                RET=1
+            fi
+        fi
+        set -e
+
+        kill_server
+
+        set +e
+        if [ "$TEST_VALGRIND" -eq 1 ]; then
+            python3 ../common/check_valgrind_log.py -f $LEAKCHECK_LOG
+            if [ $? -ne 0 ]; then
+                RET=1
+            fi
+        fi
+        set -e
+    done
+
+    # Tests that require TRITONSERVER_DELAY_SCHEDULER so that the
+    # scheduler is delayed and requests can collect in the queue.
+    for i in $DELAY_TESTS; do
+        export TRITONSERVER_BACKLOG_DELAY_SCHEDULER=3 &&
+            [[ "$i" != "test_backlog_fill_no_end" ]] && export TRITONSERVER_BACKLOG_DELAY_SCHEDULER=2 &&
+            [[ "$i" != "test_backlog_fill" ]] &&
+            [[ "$i" != "test_backlog_same_correlation_id" ]] && export TRITONSERVER_BACKLOG_DELAY_SCHEDULER=0
+        export TRITONSERVER_DELAY_SCHEDULER=10 &&
+            [[ "$i" != "test_backlog_fill_no_end" ]] &&
+            [[ "$i" != "test_backlog_fill" ]] && export TRITONSERVER_DELAY_SCHEDULER=16 &&
+            [[ "$i" != "test_backlog_same_correlation_id_no_end" ]] && export TRITONSERVER_DELAY_SCHEDULER=8 &&
+            [[ "$i" != "test_half_batch" ]] && export TRITONSERVER_DELAY_SCHEDULER=4 &&
+            [[ "$i" != "test_backlog_sequence_timeout" ]] && export TRITONSERVER_DELAY_SCHEDULER=12
+        SERVER_ARGS="--model-repository=$MODELDIR/$MODEL_PATH ${SERVER_ARGS_EXTRA}"
+        SERVER_LOG="./$i.$MODEL_PATH.server.log"
+
+        if [ "$TEST_VALGRIND" -eq 1 ]; then
+            LEAKCHECK_LOG="./$i.$MODEL_PATH.valgrind.log"
+            LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --log-file=$LEAKCHECK_LOG"
+            run_server_leakcheck
+        else
+            run_server
+        fi
+
+        if [ "$SERVER_PID" == "0" ]; then
+            echo -e "\n***\n*** Failed to start $SERVER\n***"
+            cat $SERVER_LOG
+            exit 1
+        fi
+
+        echo "Test: $i, repository $MODEL_PATH" >>$CLIENT_LOG
+
+        set +e
+        python3 $BATCHER_TEST SequenceBatcherTest.$i >>$CLIENT_LOG 2>&1
+        if [ $? -ne 0 ]; then
+            echo -e "\n***\n*** Test $i Failed\n***" >>$CLIENT_LOG
+            echo -e "\n***\n*** Test $i Failed\n***"
+            RET=1
+        else
+            check_test_results $TEST_RESULT_FILE 1
+            if [ $? -ne 0 ]; then
+                cat $CLIENT_LOG
+                echo -e "\n***\n*** Test Result Verification Failed\n***"
+                RET=1
+            fi
+        fi
+        set -e
+
+        unset TRITONSERVER_DELAY_SCHEDULER
+        unset TRITONSERVER_BACKLOG_DELAY_SCHEDULER
+        kill_server
+
+        set +e
+        if [ "$TEST_VALGRIND" -eq 1 ]; then
+            python3 ../common/check_valgrind_log.py -f $LEAKCHECK_LOG
+            if [ $? -ne 0 ]; then
+                RET=1
+            fi
+        fi
+        set -e
+    done
+done
+
+# ragged models
+if [[ $BACKENDS == *"custom"* ]]; then
+  rm -fr ragged_models && mkdir ragged_models
+  cp -r ../custom_models/custom_sequence_int32 ragged_models/.
+  (cd ragged_models/custom_sequence_int32 && \
+          sed -i "s/name:.*\"INPUT\"/name: \"INPUT\"\\nallow_ragged_batch: true/" config.pbtxt)
+
+  export NO_BATCHING=0
+  export MODEL_INSTANCES=1
+  export BATCHER_TYPE="FIXED"
+  MODEL_PATH=ragged_models
+
+  # Need to launch the server for each test so that the model status
+  # is reset (which is used to make sure the correct batch size was
+  # used for execution). Test everything with fixed-tensor-size
+  # models and variable-tensor-size models.
+  for i in test_ragged_batch_allowed ; do
+    export TRITONSERVER_BACKLOG_DELAY_SCHEDULER=0
+    export TRITONSERVER_DELAY_SCHEDULER=12
+
+    SERVER_ARGS="--model-repository=$MODELDIR/$MODEL_PATH ${SERVER_ARGS_EXTRA}"
+    SERVER_LOG="./$i.$MODEL_PATH.server.log"
+
+    if [ "$TEST_VALGRIND" -eq 1 ]; then
+      LEAKCHECK_LOG="./$i.$MODEL_PATH.valgrind.log"
+      LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --log-file=$LEAKCHECK_LOG"
+      run_server_leakcheck
+    else
+        run_server
+    fi
+
+    if [ "$SERVER_PID" == "0" ]; then
+      echo -e "\n***\n*** Failed to start $SERVER\n***"
+      cat $SERVER_LOG
+      exit 1
+    fi
+
+    echo "Test: $i, repository $MODEL_PATH" >>$CLIENT_LOG
+
+    set +e
+    python3 $BATCHER_TEST SequenceBatcherTest.$i >>$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+      echo -e "\n***\n*** Test $i Failed\n***" >>$CLIENT_LOG
+      echo -e "\n***\n*** Test $i Failed\n***"
+      RET=1
+    else
+      check_test_results $TEST_RESULT_FILE 1
+      if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+      fi
+    fi
+    set -e
+
+    unset TRITONSERVER_DELAY_SCHEDULER
+    unset TRITONSERVER_BACKLOG_DELAY_SCHEDULER
+    kill_server
+
+    set +e
+    if [ "$TEST_VALGRIND" -eq 1 ]; then
+      python3 ../common/check_valgrind_log.py -f $LEAKCHECK_LOG
+      if [ $? -ne 0 ]; then
+          RET=1
+      fi
+    fi
+    set -e
+  done
+fi
+
+# max queue delay
+MODEL_PATH=queue_delay_models
+# remove ensemble models from the test model repo
+rm -rf queue_delay_models/simple_* queue_delay_models/fan_* queue_delay_models/sequence_*
+for i in $QUEUE_DELAY_TESTS ; do
+    export NO_BATCHING=0
+    export TRITONSERVER_BACKLOG_DELAY_SCHEDULER=0
+    export TRITONSERVER_DELAY_SCHEDULER=2
+    SERVER_ARGS="--model-repository=$MODELDIR/$MODEL_PATH ${SERVER_ARGS_EXTRA}"
+    SERVER_LOG="./$i.$MODEL_PATH.server.log"
+
+    if [ "$TEST_VALGRIND" -eq 1 ]; then
+        LEAKCHECK_LOG="./$i.$MODEL_PATH.valgrind.log"
+        LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --log-file=$LEAKCHECK_LOG"
+        run_server_leakcheck
+    else
+        run_server
+    fi
+
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    echo "Test: $i, repository $MODEL_PATH" >>$CLIENT_LOG
+
+    set +e
+    python3 $BATCHER_TEST SequenceBatcherTest.$i >>$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Test $i Failed\n***" >>$CLIENT_LOG
+        echo -e "\n***\n*** Test $i Failed\n***"
+        RET=1
+    else
+        check_test_results $TEST_RESULT_FILE 1
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Result Verification Failed\n***"
+            RET=1
+        fi
+    fi
+    set -e
+
+    unset TRITONSERVER_DELAY_SCHEDULER
+    unset TRITONSERVER_BACKLOG_DELAY_SCHEDULER
+    kill_server
+
+    set +e
+    if [ "$TEST_VALGRIND" -eq 1 ]; then
+        python3 ../common/check_valgrind_log.py -f $LEAKCHECK_LOG
+        if [ $? -ne 0 ]; then
+            RET=1
+        fi
+    fi
+    set -e
+done
+
+# Test request timeout with sequence batcher
+# only run the test outside shared memory setting as
+# shared memory feature is irrelevant
+if [ "$TEST_SYSTEM_SHARED_MEMORY" -ne 1 ] && [ "$TEST_CUDA_SHARED_MEMORY" -ne 1 ]; then
+    export NO_BATCHING=0
+    export MODEL_INSTANCES=1
+    export BATCHER_TYPE="FIXED"
+
+    TEST_CASE=SequenceBatcherRequestTimeoutTest
+    MODEL_PATH=request_timeout_models
+    mkdir -p ${MODEL_PATH}/custom_sequence_int32_timeout/1
+
+    SERVER_ARGS="--model-repository=$MODELDIR/$MODEL_PATH ${SERVER_ARGS_EXTRA}"
+    SERVER_LOG="./$TEST_CASE.$MODEL_PATH.server.log"
+
+    if [ "$TEST_VALGRIND" -eq 1 ]; then
+        LEAKCHECK_LOG="./$i.$MODEL_PATH.valgrind.log"
+        LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --log-file=$LEAKCHECK_LOG"
+        run_server_leakcheck
+    else
+        run_server
+    fi
+
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    echo "Test: $TEST_CASE, repository $MODEL_PATH" >>$CLIENT_LOG
+
+    set +e
+    python3 $BATCHER_TEST $TEST_CASE >>$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Test $TEST_CASE Failed\n***" >>$CLIENT_LOG
+        echo -e "\n***\n*** Test $TEST_CASE Failed\n***"
+        RET=1
+    else
+        check_test_results $TEST_RESULT_FILE 2
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Result Verification Failed\n***"
+            RET=1
+        fi
+    fi
+    set -e
+
+    kill_server
+
+    set +e
+    if [ "$TEST_VALGRIND" -eq 1 ]; then
+        python3 ../common/check_valgrind_log.py -f $LEAKCHECK_LOG
+        if [ $? -ne 0 ]; then
+            RET=1
+        fi
+    fi
+    set -e
+fi
+
+### Start Preserve Ordering Tests ###
+
+# Test only supported on windows currently due to use of python backend models
+if [ ${WINDOWS} -ne 1 ]; then
+    # Test preserve ordering true/false and decoupled/non-decoupled
+    TEST_CASE=SequenceBatcherPreserveOrderingTest
+    MODEL_PATH=preserve_ordering_models
+    BASE_MODEL="../python_models/sequence_py"
+    rm -rf ${MODEL_PATH}
+
+    # FIXME [DLIS-5280]: This may fail for decoupled models if writes to GRPC
+    # stream are done out of order in server, so decoupled tests are disabled.
+    MODES="decoupled nondecoupled"
+    for mode in $MODES; do
+        NO_PRESERVE="${MODEL_PATH}/seqpy_no_preserve_ordering_${mode}"
+        mkdir -p ${NO_PRESERVE}/1
+        cp ${BASE_MODEL}/config.pbtxt ${NO_PRESERVE}
+        cp ${BASE_MODEL}/model.py ${NO_PRESERVE}/1
+
+        PRESERVE="${MODEL_PATH}/seqpy_preserve_ordering_${mode}"
+        cp -r ${NO_PRESERVE} ${PRESERVE}
+        sed -i "s/^preserve_ordering: False/preserve_ordering: True/" ${PRESERVE}/config.pbtxt
+
+        if [ ${mode} == "decoupled" ]; then
+          echo -e "\nmodel_transaction_policy { decoupled: true }" >> ${NO_PRESERVE}/config.pbtxt
+          echo -e "\nmodel_transaction_policy { decoupled: true }" >> ${PRESERVE}/config.pbtxt
+        fi
+    done
+
+    SERVER_ARGS="--model-repository=$MODELDIR/$MODEL_PATH ${SERVER_ARGS_EXTRA}"
+    SERVER_LOG="./$TEST_CASE.$MODEL_PATH.server.log"
+
+    if [ "$TEST_VALGRIND" -eq 1 ]; then
+        LEAKCHECK_LOG="./$i.$MODEL_PATH.valgrind.log"
+        LEAKCHECK_ARGS="$LEAKCHECK_ARGS_BASE --log-file=$LEAKCHECK_LOG"
+        run_server_leakcheck
+    else
+        run_server
+    fi
+
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    echo "Test: $TEST_CASE, repository $MODEL_PATH" >>$CLIENT_LOG
+
+    set +e
+    python3 $BATCHER_TEST $TEST_CASE >>$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Test $TEST_CASE Failed\n***" >>$CLIENT_LOG
+        echo -e "\n***\n*** Test $TEST_CASE Failed\n***"
+        RET=1
+    else
+        # 2 for preserve_ordering = True/False
+        check_test_results $TEST_RESULT_FILE 2
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Result Verification Failed\n***"
+            RET=1
+        fi
+    fi
+    set -e
+
+    kill_server
+
+    set +e
+    if [ "$TEST_VALGRIND" -eq 1 ]; then
+        python3 ../common/check_valgrind_log.py -f $LEAKCHECK_LOG
+        if [ $? -ne 0 ]; then
+            RET=1
+        fi
+    fi
+    set -e
+fi
+
+### End Preserve Ordering Tests ###
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_sequence_corrid_batcher/sequence_corrid_batcher_test.py b/qa/L0_sequence_corrid_batcher/sequence_corrid_batcher_test.py
new file mode 100755
index 0000000000..15f16da352
--- /dev/null
+++ b/qa/L0_sequence_corrid_batcher/sequence_corrid_batcher_test.py
@@ -0,0 +1,226 @@
+#!/usr/bin/env python3
+
+# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import os
+import threading
+import time
+import unittest
+
+import numpy as np
+import sequence_util as su
+import test_util as tu
+
+_test_system_shared_memory = bool(int(os.environ.get("TEST_SYSTEM_SHARED_MEMORY", 0)))
+_test_cuda_shared_memory = bool(int(os.environ.get("TEST_CUDA_SHARED_MEMORY", 0)))
+
+_no_batching = int(os.environ["NO_BATCHING"]) == 1
+_model_instances = int(os.environ["MODEL_INSTANCES"])
+
+if _no_batching:
+    _trials = ("savedmodel_nobatch", "graphdef_nobatch", "plan_nobatch", "onnx_nobatch")
+else:
+    _trials = ("savedmodel", "graphdef", "plan", "onnx")
+
+_protocols = ("http", "grpc")
+_max_sequence_idle_ms = 5000
+
+
+class SequenceCorrIDBatcherTest(su.SequenceBatcherTestUtil):
+    def get_datatype(self, trial):
+        return np.int32
+
+    def get_expected_result(self, expected_result, corrid, value, trial, flag_str=None):
+        # Adjust the expected_result for models that
+        # could not implement the full accumulator. See
+        # qa/common/gen_qa_dyna_sequence_models.py for more
+        # information.
+        if (
+            (("nobatch" not in trial) and ("custom" not in trial))
+            or ("graphdef" in trial)
+            or ("plan" in trial)
+            or ("onnx" in trial)
+        ) or ("libtorch" in trial):
+            expected_result = value
+            if flag_str is not None:
+                if "start" in flag_str:
+                    expected_result += 1
+                if "end" in flag_str:
+                    expected_result += corrid
+        return expected_result
+
+    def test_skip_batch(self):
+        # Test model instances together are configured with
+        # total-batch-size 4. Send four sequences in parallel where
+        # two sequences have shorter length so that padding must be
+        # applied correctly for the longer sequences.
+        for trial in _trials:
+            self.clear_deferred_exceptions()
+            dtype = self.get_datatype(trial)
+            precreated_shm0_handles = self.precreate_register_regions((1, 3), dtype, 0)
+            precreated_shm1_handles = self.precreate_register_regions(
+                (11, 12, 13, 14), dtype, 1
+            )
+            precreated_shm2_handles = self.precreate_register_regions(
+                (111, 113), dtype, 2
+            )
+            precreated_shm3_handles = self.precreate_register_regions(
+                (1111, 1112, 1113, 1114), dtype, 3
+            )
+            try:
+                model_name = tu.get_dyna_sequence_model_name(trial, dtype)
+
+                self.check_setup(model_name)
+
+                # Need scheduler to wait for queue to contain all
+                # inferences for both sequences.
+                self.assertIn("TRITONSERVER_DELAY_SCHEDULER", os.environ)
+                self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 12)
+                self.assertIn("TRITONSERVER_BACKLOG_DELAY_SCHEDULER", os.environ)
+                self.assertEqual(
+                    int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0
+                )
+
+                corrids = [1001, 1002, 1003, 1004]
+                threads = []
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_async,
+                        args=(
+                            trial,
+                            model_name,
+                            dtype,
+                            corrids[0],
+                            (None, None),
+                            # (flag_str, value, pre_delay_ms)
+                            (("start", 1, None), ("end", 3, None)),
+                            self.get_expected_result(
+                                4 + corrids[0], corrids[0], 3, trial, "end"
+                            ),
+                            precreated_shm0_handles,
+                        ),
+                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_async,
+                        args=(
+                            trial,
+                            model_name,
+                            dtype,
+                            corrids[1],
+                            (None, None),
+                            # (flag_str, value, pre_delay_ms)
+                            (
+                                ("start", 11, None),
+                                (None, 12, None),
+                                (None, 13, None),
+                                ("end", 14, None),
+                            ),
+                            self.get_expected_result(
+                                50 + corrids[1], corrids[1], 14, trial, "end"
+                            ),
+                            precreated_shm1_handles,
+                        ),
+                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_async,
+                        args=(
+                            trial,
+                            model_name,
+                            dtype,
+                            corrids[2],
+                            (None, None),
+                            # (flag_str, value, pre_delay_ms)
+                            (("start", 111, None), ("end", 113, None)),
+                            self.get_expected_result(
+                                224 + corrids[2], corrids[2], 113, trial, "end"
+                            ),
+                            precreated_shm2_handles,
+                        ),
+                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                    )
+                )
+                threads.append(
+                    threading.Thread(
+                        target=self.check_sequence_async,
+                        args=(
+                            trial,
+                            model_name,
+                            dtype,
+                            corrids[3],
+                            (None, None),
+                            # (flag_str, value, pre_delay_ms)
+                            (
+                                ("start", 1111, None),
+                                (None, 1112, None),
+                                (None, 1113, None),
+                                ("end", 1114, None),
+                            ),
+                            self.get_expected_result(
+                                4450 + corrids[3], corrids[3], 1114, trial, "end"
+                            ),
+                            precreated_shm3_handles,
+                        ),
+                        kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                    )
+                )
+
+                threads[1].start()
+                threads[3].start()
+                time.sleep(1)
+                threads[0].start()
+                threads[2].start()
+                for t in threads:
+                    t.join()
+                self.check_deferred_exception()
+                if _model_instances == 1:
+                    self.check_status(model_name, {4: 4}, 12, 12)
+                elif _model_instances == 2:
+                    self.check_status(model_name, {2: 8}, 12, 12)
+                elif _model_instances == 4:
+                    self.check_status(model_name, {1: 12}, 12, 12)
+            except Exception as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+            finally:
+                if _test_system_shared_memory or _test_cuda_shared_memory:
+                    self.cleanup_shm_regions(precreated_shm0_handles)
+                    self.cleanup_shm_regions(precreated_shm1_handles)
+                    self.cleanup_shm_regions(precreated_shm2_handles)
+                    self.cleanup_shm_regions(precreated_shm3_handles)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_sequence_corrid_batcher/test.sh b/qa/L0_sequence_corrid_batcher/test.sh
new file mode 100755
index 0000000000..8d114a395a
--- /dev/null
+++ b/qa/L0_sequence_corrid_batcher/test.sh
@@ -0,0 +1,131 @@
+#!/bin/bash
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+TEST_RESULT_FILE='test_results.txt'
+
+CLIENT_LOG="./client.log"
+BATCHER_TEST=sequence_corrid_batcher_test.py
+
+DATADIR=/data/inferenceserver/${REPO_VERSION}
+
+SERVER=/opt/tritonserver/bin/tritonserver
+source ../common/util.sh
+
+RET=0
+
+# Must run on a single device or else the TRITONSERVER_DELAY_SCHEDULER
+# can fail when the requests are distributed to multiple devices.
+export CUDA_VISIBLE_DEVICES=0
+
+# Setup non-variable-size model repositories. The same models are in each
+# repository but they are configured as:
+#   models4 - four instances with batch-size 1
+rm -fr *.log  models{0,1,2,4} && mkdir models4
+for m in \
+        $DATADIR/qa_dyna_sequence_model_repository/graphdef_dyna_sequence_int32 \
+        $DATADIR/qa_dyna_sequence_model_repository/savedmodel_dyna_sequence_int32 \
+        $DATADIR/qa_dyna_sequence_model_repository/plan_dyna_sequence_int32 \
+        $DATADIR/qa_dyna_sequence_model_repository/onnx_dyna_sequence_int32 \
+        $DATADIR/qa_dyna_sequence_model_repository/libtorch_dyna_sequence_int32; do
+    cp -r $m models4/. && \
+        (cd models4/$(basename $m) && \
+            sed -i -z "s/oldest.*{.*}.*control_input/control_input/" config.pbtxt && \
+            sed -i "s/^max_batch_size:.*/max_batch_size: 1/" config.pbtxt && \
+            sed -i "s/kind: KIND_GPU/kind: KIND_GPU\\ncount: 4/" config.pbtxt && \
+            sed -i "s/kind: KIND_CPU/kind: KIND_CPU\\ncount: 4/" config.pbtxt)
+done
+
+# Same test work on all models since they all have same total number
+# of batch slots.
+for model_trial in 4; do
+    export NO_BATCHING=1 &&
+        [[ "$model_trial" != "0" ]] && export NO_BATCHING=0
+    export MODEL_INSTANCES=1 &&
+        [[ "$model_trial" != "0" ]] && export MODEL_INSTANCES=$model_trial
+
+    MODEL_DIR=models${model_trial}
+
+    # Tests that require TRITONSERVER_DELAY_SCHEDULER so that the
+    # scheduler is delayed and requests can collect in the queue.
+    for i in test_skip_batch ; do
+        export TRITONSERVER_BACKLOG_DELAY_SCHEDULER=0
+        export TRITONSERVER_DELAY_SCHEDULER=12
+        SERVER_ARGS="--model-repository=`pwd`/$MODEL_DIR"
+        SERVER_LOG="./$i.$MODEL_DIR.server.log"
+        run_server
+        if [ "$SERVER_PID" == "0" ]; then
+            echo -e "\n***\n*** Failed to start $SERVER\n***"
+            cat $SERVER_LOG
+            exit 1
+        fi
+
+        echo "Test: $i, repository $MODEL_DIR" >>$CLIENT_LOG
+
+        set +e
+        python $BATCHER_TEST SequenceCorrIDBatcherTest.$i >>$CLIENT_LOG 2>&1
+        if [ $? -ne 0 ]; then
+            echo -e "\n***\n*** Test $i Failed\n***" >>$CLIENT_LOG
+            echo -e "\n***\n*** Test $i Failed\n***"
+            RET=1
+        else
+            check_test_results $TEST_RESULT_FILE 1
+            if [ $? -ne 0 ]; then
+                cat $CLIENT_LOG
+                echo -e "\n***\n*** Test Result Verification Failed\n***"
+                RET=1
+            fi
+        fi
+        set -e
+
+        unset TRITONSERVER_DELAY_SCHEDULER
+        unset TRITONSERVER_BACKLOG_DELAY_SCHEDULER
+        kill $SERVER_PID
+        wait $SERVER_PID
+    done
+done
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_sequence_stress/sequence_stress.py b/qa/L0_sequence_stress/sequence_stress.py
new file mode 100755
index 0000000000..039cf793a2
--- /dev/null
+++ b/qa/L0_sequence_stress/sequence_stress.py
@@ -0,0 +1,657 @@
+#!/usr/bin/env python3
+
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import argparse
+import threading
+import time
+import traceback
+from builtins import range, str
+from functools import partial
+
+import numpy as np
+import test_util as tu
+import tritongrpcclient as grpcclient
+from tritonclientutils import np_to_triton_dtype
+
+if sys.version_info >= (3, 0):
+    import queue
+else:
+    import Queue as queue
+
+FLAGS = None
+CORRELATION_ID_BLOCK_SIZE = 100
+DEFAULT_TIMEOUT_MS = 5000
+SEQUENCE_LENGTH_MEAN = 16
+SEQUENCE_LENGTH_STDEV = 8
+
+_thread_exceptions = []
+_thread_exceptions_mutex = threading.Lock()
+
+
+class UserData:
+    def __init__(self):
+        self._completed_requests = queue.Queue()
+
+
+# Callback function used for async_stream_infer()
+def completion_callback(user_data, result, error):
+    # passing error raise and handling out
+    user_data._completed_requests.put((result, error))
+
+
+class TimeoutException(Exception):
+    pass
+
+
+def check_sequence_async(
+    client_metadata,
+    trial,
+    model_name,
+    input_dtype,
+    steps,
+    timeout_ms=DEFAULT_TIMEOUT_MS,
+    sequence_name="<unknown>",
+):
+    """Perform sequence of inferences using async run. The 'steps' holds
+    a list of tuples, one for each inference with format:
+
+    (flag_str, value, expected_result, delay_ms)
+
+    """
+    if (
+        ("savedmodel" in trial)
+        or ("graphdef" in trial)
+        or ("custom" in trial)
+        or ("plan" in trial)
+    ):
+        tensor_shape = (
+            1,
+            1,
+        )
+    else:
+        assert False, "unknown trial type: " + trial
+
+    triton_client = client_metadata[0]
+    sequence_id = client_metadata[1]
+
+    # Execute the sequence of inference...
+    seq_start_ms = int(round(time.time() * 1000))
+    user_data = UserData()
+    # Ensure there is no running stream
+    triton_client.stop_stream()
+    triton_client.start_stream(partial(completion_callback, user_data))
+
+    sent_count = 0
+    for flag_str, value, expected_result, delay_ms in steps:
+        seq_start = False
+        seq_end = False
+        if flag_str is not None:
+            seq_start = "start" in flag_str
+            seq_end = "end" in flag_str
+
+        if input_dtype == np.object_:
+            in0 = np.full(tensor_shape, value, dtype=np.int32)
+            in0n = np.array([str(x) for x in in0.reshape(in0.size)], dtype=object)
+            in0 = in0n.reshape(tensor_shape)
+        else:
+            in0 = np.full(tensor_shape, value, dtype=input_dtype)
+        inputs = [
+            grpcclient.InferInput(
+                "INPUT", tensor_shape, np_to_triton_dtype(input_dtype)
+            ),
+        ]
+        inputs[0].set_data_from_numpy(in0)
+
+        triton_client.async_stream_infer(
+            model_name,
+            inputs,
+            sequence_id=sequence_id,
+            sequence_start=seq_start,
+            sequence_end=seq_end,
+        )
+        sent_count += 1
+
+        if delay_ms is not None:
+            time.sleep(delay_ms / 1000.0)
+
+    # Process the results in order that they were sent
+    result = None
+    processed_count = 0
+    while processed_count < sent_count:
+        (results, error) = user_data._completed_requests.get()
+        if error is not None:
+            raise error
+
+        (_, value, expected, _) = steps[processed_count]
+        processed_count += 1
+        if timeout_ms != None:
+            now_ms = int(round(time.time() * 1000))
+            if (now_ms - seq_start_ms) > timeout_ms:
+                raise TimeoutException("Timeout expired for {}".format(sequence_name))
+
+        result = results.as_numpy("OUTPUT")[0][0]
+        if FLAGS.verbose:
+            print("{} {}: + {} = {}".format(sequence_name, sequence_id, value, result))
+
+        if expected is not None:
+            if input_dtype == np.object_:
+                assert int(result) == expected, "{}: expected result {}, got {}".format(
+                    sequence_name, expected, int(result)
+                )
+            else:
+                assert result == expected, "{}: expected result {}, got {}".format(
+                    sequence_name, expected, result
+                )
+    triton_client.stop_stream()
+
+
+def get_datatype(trial):
+    # Get the datatype to use based on what models are available (see test.sh)
+    if ("plan" in trial) or ("savedmodel" in trial):
+        return np.float32
+    if "graphdef" in trial:
+        return np.dtype(object)
+    return np.int32
+
+
+def sequence_valid(
+    client_metadata, rng, trial, model_name, dtype, len_mean, len_stddev, sequence_name
+):
+    # Create a variable length sequence with "start" and "end" flags.
+    seqlen = max(1, int(rng.normal(len_mean, len_stddev)))
+    print("{} {}: valid seqlen = {}".format(sequence_name, client_metadata[1], seqlen))
+
+    values = rng.randint(0, 1024 * 1024, size=seqlen, dtype=dtype)
+
+    steps = []
+    expected_result = 0
+
+    for idx, step in enumerate(range(seqlen)):
+        flags = ""
+        if idx == 0:
+            flags += ",start"
+        if idx == (seqlen - 1):
+            flags += ",end"
+
+        val = values[idx]
+        delay_ms = None
+        expected_result += val
+
+        # (flag_str, value, expected_result, delay_ms)
+        steps.append(
+            (flags, val, expected_result, delay_ms),
+        )
+
+    check_sequence_async(
+        client_metadata, trial, model_name, dtype, steps, sequence_name=sequence_name
+    )
+
+
+def sequence_valid_valid(
+    client_metadata, rng, trial, model_name, dtype, len_mean, len_stddev, sequence_name
+):
+    # Create two variable length sequences with "start" and "end"
+    # flags, where both sequences use the same correlation ID and are
+    # sent back-to-back.
+    seqlen = [
+        max(1, int(rng.normal(len_mean, len_stddev))),
+        max(1, int(rng.normal(len_mean, len_stddev))),
+    ]
+    print(
+        "{} {}: valid-valid seqlen[0] = {}, seqlen[1] = {}".format(
+            sequence_name, client_metadata[1], seqlen[0], seqlen[1]
+        )
+    )
+
+    values = [
+        rng.randint(0, 1024 * 1024, size=seqlen[0], dtype=dtype),
+        rng.randint(0, 1024 * 1024, size=seqlen[1], dtype=dtype),
+    ]
+
+    for p in [0, 1]:
+        steps = []
+        expected_result = 0
+
+        for idx, step in enumerate(range(seqlen[p])):
+            flags = ""
+            if idx == 0:
+                flags += ",start"
+            if idx == (seqlen[p] - 1):
+                flags += ",end"
+
+            val = values[p][idx]
+            delay_ms = None
+            expected_result += val
+
+            # (flag_str, value, expected_result, delay_ms)
+            steps.append(
+                (flags, val, expected_result, delay_ms),
+            )
+
+    check_sequence_async(
+        client_metadata, trial, model_name, dtype, steps, sequence_name=sequence_name
+    )
+
+
+def sequence_valid_no_end(
+    client_metadata, rng, trial, model_name, dtype, len_mean, len_stddev, sequence_name
+):
+    # Create two variable length sequences, the first with "start" and
+    # "end" flags and the second with no "end" flag, where both
+    # sequences use the same correlation ID and are sent back-to-back.
+    seqlen = [
+        max(1, int(rng.normal(len_mean, len_stddev))),
+        max(1, int(rng.normal(len_mean, len_stddev))),
+    ]
+    print(
+        "{} {}: valid-no-end seqlen[0] = {}, seqlen[1] = {}".format(
+            sequence_name, client_metadata[1], seqlen[0], seqlen[1]
+        )
+    )
+
+    values = [
+        rng.randint(0, 1024 * 1024, size=seqlen[0], dtype=dtype),
+        rng.randint(0, 1024 * 1024, size=seqlen[1], dtype=dtype),
+    ]
+
+    for p in [0, 1]:
+        steps = []
+        expected_result = 0
+
+        for idx, step in enumerate(range(seqlen[p])):
+            flags = ""
+            if idx == 0:
+                flags += ",start"
+            if (p == 0) and (idx == (seqlen[p] - 1)):
+                flags += ",end"
+
+            val = values[p][idx]
+            delay_ms = None
+            expected_result += val
+
+            # (flag_str, value, expected_result, delay_ms)
+            steps.append(
+                (flags, val, expected_result, delay_ms),
+            )
+
+    check_sequence_async(
+        client_metadata, trial, model_name, dtype, steps, sequence_name=sequence_name
+    )
+
+
+def sequence_no_start(client_metadata, rng, trial, model_name, dtype, sequence_name):
+    # Create a sequence without a "start" flag. Sequence should get an
+    # error from the server.
+    seqlen = 1
+    print(
+        "{} {}: no-start seqlen = {}".format(sequence_name, client_metadata[1], seqlen)
+    )
+
+    values = rng.randint(0, 1024 * 1024, size=seqlen, dtype=dtype)
+
+    steps = []
+
+    for idx, step in enumerate(range(seqlen)):
+        flags = None
+        val = values[idx]
+        delay_ms = None
+
+        # (flag_str, value, expected_result, delay_ms)
+        steps.append(
+            (flags, val, None, delay_ms),
+        )
+
+    try:
+        check_sequence_async(
+            client_metadata,
+            trial,
+            model_name,
+            dtype,
+            steps,
+            sequence_name=sequence_name,
+        )
+        assert False, "expected inference failure from missing START flag"
+    except Exception as ex:
+        if "must specify the START flag" not in ex.message():
+            raise
+
+
+def sequence_no_end(
+    client_metadata, rng, trial, model_name, dtype, len_mean, len_stddev, sequence_name
+):
+    # Create a variable length sequence with "start" flag but that
+    # never ends. The sequence should be aborted by the server and its
+    # slot reused for another sequence.
+    seqlen = max(1, int(rng.normal(len_mean, len_stddev)))
+    print("{} {}: no-end seqlen = {}".format(sequence_name, client_metadata[1], seqlen))
+
+    values = rng.randint(0, 1024 * 1024, size=seqlen, dtype=dtype)
+
+    steps = []
+    expected_result = 0
+
+    for idx, step in enumerate(range(seqlen)):
+        flags = ""
+        if idx == 0:
+            flags = "start"
+
+        val = values[idx]
+        delay_ms = None
+        expected_result += val
+
+        # (flag_str, value, expected_result, delay_ms)
+        steps.append(
+            (flags, val, expected_result, delay_ms),
+        )
+
+    check_sequence_async(
+        client_metadata, trial, model_name, dtype, steps, sequence_name=sequence_name
+    )
+
+
+def stress_thread(name, seed, pass_cnt, correlation_id_base, trial, model_name, dtype):
+    # Thread responsible for generating sequences of inference
+    # requests.
+    global _thread_exceptions
+
+    print("Starting thread {} with seed {}".format(name, seed))
+    rng = np.random.RandomState(seed)
+
+    client_metadata_list = []
+
+    try:
+        # Must use streaming GRPC context to ensure each sequences'
+        # requests are received in order. Create 2 common-use contexts
+        # with different correlation IDs that are used for most
+        # inference requests. Also create some rare-use contexts that
+        # are used to make requests with rarely-used correlation IDs.
+        #
+        # Need to remember the last choice for each context since we
+        # don't want some choices to follow others since that gives
+        # results not expected. See below for details.
+        common_cnt = 2
+        rare_cnt = 8
+        last_choices = []
+
+        for c in range(common_cnt + rare_cnt):
+            client_metadata_list.append(
+                (
+                    grpcclient.InferenceServerClient(
+                        "localhost:8001", verbose=FLAGS.verbose
+                    ),
+                    correlation_id_base + c,
+                )
+            )
+            last_choices.append(None)
+
+        rare_idx = 0
+        for p in range(pass_cnt):
+            # Common or rare context?
+            if rng.rand() < 0.1:
+                # Rare context...
+                choice = rng.rand()
+                client_idx = common_cnt + rare_idx
+
+                # Send a no-end, valid-no-end or valid-valid
+                # sequence... because it is a rare context this should
+                # exercise the idle sequence path of the sequence
+                # scheduler
+                if choice < 0.33:
+                    sequence_no_end(
+                        client_metadata_list[client_idx],
+                        rng,
+                        trial,
+                        model_name,
+                        dtype,
+                        SEQUENCE_LENGTH_MEAN,
+                        SEQUENCE_LENGTH_STDEV,
+                        sequence_name=name,
+                    )
+                    last_choices[client_idx] = "no-end"
+                elif choice < 0.66:
+                    sequence_valid_no_end(
+                        client_metadata_list[client_idx],
+                        rng,
+                        trial,
+                        model_name,
+                        dtype,
+                        SEQUENCE_LENGTH_MEAN,
+                        SEQUENCE_LENGTH_STDEV,
+                        sequence_name=name,
+                    )
+                    last_choices[client_idx] = "valid-no-end"
+                else:
+                    sequence_valid_valid(
+                        client_metadata_list[client_idx],
+                        rng,
+                        trial,
+                        model_name,
+                        dtype,
+                        SEQUENCE_LENGTH_MEAN,
+                        SEQUENCE_LENGTH_STDEV,
+                        sequence_name=name,
+                    )
+                    last_choices[client_idx] = "valid-valid"
+
+                rare_idx = (rare_idx + 1) % rare_cnt
+            else:
+                # Common context...
+                client_idx = 0 if rng.rand() < 0.5 else 1
+                client_metadata = client_metadata_list[client_idx]
+                last_choice = last_choices[client_idx]
+
+                choice = rng.rand()
+
+                # no-start cannot follow no-end since the server will
+                # just assume that the no-start is a continuation of
+                # the no-end sequence instead of being a sequence
+                # missing start flag.
+                if (
+                    (last_choice != "no-end")
+                    and (last_choice != "valid-no-end")
+                    and (choice < 0.01)
+                ):
+                    sequence_no_start(
+                        client_metadata,
+                        rng,
+                        trial,
+                        model_name,
+                        dtype,
+                        sequence_name=name,
+                    )
+                    last_choices[client_idx] = "no-start"
+                elif choice < 0.05:
+                    sequence_no_end(
+                        client_metadata,
+                        rng,
+                        trial,
+                        model_name,
+                        dtype,
+                        SEQUENCE_LENGTH_MEAN,
+                        SEQUENCE_LENGTH_STDEV,
+                        sequence_name=name,
+                    )
+                    last_choices[client_idx] = "no-end"
+                elif choice < 0.10:
+                    sequence_valid_no_end(
+                        client_metadata,
+                        rng,
+                        trial,
+                        model_name,
+                        dtype,
+                        SEQUENCE_LENGTH_MEAN,
+                        SEQUENCE_LENGTH_STDEV,
+                        sequence_name=name,
+                    )
+                    last_choices[client_idx] = "valid-no-end"
+                elif choice < 0.15:
+                    sequence_valid_valid(
+                        client_metadata,
+                        rng,
+                        trial,
+                        model_name,
+                        dtype,
+                        SEQUENCE_LENGTH_MEAN,
+                        SEQUENCE_LENGTH_STDEV,
+                        sequence_name=name,
+                    )
+                    last_choices[client_idx] = "valid-valid"
+                else:
+                    sequence_valid(
+                        client_metadata,
+                        rng,
+                        trial,
+                        model_name,
+                        dtype,
+                        SEQUENCE_LENGTH_MEAN,
+                        SEQUENCE_LENGTH_STDEV,
+                        sequence_name=name,
+                    )
+                    last_choices[client_idx] = "valid"
+
+    except Exception as ex:
+        _thread_exceptions_mutex.acquire()
+        try:
+            _thread_exceptions.append(traceback.format_exc())
+        finally:
+            _thread_exceptions_mutex.release()
+
+    # We need to explicitly close each client so that streams get
+    # cleaned up and closed correctly, otherwise the application
+    # can hang when exiting.
+    for c, i in client_metadata_list:
+        print("thread {} closing client {}".format(name, i))
+        c.close()
+
+    print("Exiting thread {}".format(name))
+
+
+def check_status(model_name):
+    client = grpcclient.InferenceServerClient("localhost:8001", verbose=FLAGS.verbose)
+    stats = client.get_inference_statistics(model_name)
+    print(stats)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        required=False,
+        default=False,
+        help="Enable verbose output",
+    )
+    parser.add_argument(
+        "-r", "--random-seed", type=int, required=False, help="Random seed."
+    )
+    parser.add_argument(
+        "-t",
+        "--concurrency",
+        type=int,
+        required=False,
+        default=8,
+        help="Request concurrency. Default is 8.",
+    )
+    parser.add_argument(
+        "-i",
+        "--iterations",
+        type=int,
+        required=False,
+        default=200,
+        help="Number of iterations of stress test to run. Default is 200.",
+    )
+    FLAGS = parser.parse_args()
+
+    # Initialize the random seed. For reproducibility each thread
+    # maintains its own RNG which is initialized based on this seed.
+    randseed = 0
+    if FLAGS.random_seed != None:
+        randseed = FLAGS.random_seed
+    else:
+        randseed = int(time.time())
+    np.random.seed(randseed)
+
+    print("random seed = {}".format(randseed))
+    print("concurrency = {}".format(FLAGS.concurrency))
+    print("iterations = {}".format(FLAGS.iterations))
+
+    trial = "custom"
+    dtype = get_datatype(trial)
+    model_name = tu.get_sequence_model_name(trial, dtype)
+
+    threads = []
+    for idx, thd in enumerate(range(FLAGS.concurrency)):
+        thread_name = "thread_{}".format(idx)
+
+        # Create the seed for the thread. Since these are created in
+        # reproducible order off of the initial seed we will get
+        # reproducible results when given the same seed.
+        seed = np.random.randint(2**32)
+
+        # Each thread is reserved a block of correlation IDs or size
+        # CORRELATION_ID_BLOCK_SIZE
+        correlation_id_base = 1 + (idx * CORRELATION_ID_BLOCK_SIZE)
+
+        threads.append(
+            threading.Thread(
+                target=stress_thread,
+                args=(
+                    thread_name,
+                    seed,
+                    FLAGS.iterations,
+                    correlation_id_base,
+                    trial,
+                    model_name,
+                    dtype,
+                ),
+            )
+        )
+
+    for t in threads:
+        t.start()
+    for t in threads:
+        t.join()
+
+    check_status(model_name)
+
+    _thread_exceptions_mutex.acquire()
+    try:
+        if len(_thread_exceptions) > 0:
+            for ex in _thread_exceptions:
+                print("*********\n{}".format(ex))
+            sys.exit(1)
+    finally:
+        _thread_exceptions_mutex.release()
+
+    print("Exiting stress test")
+    sys.exit(0)
diff --git a/qa/L0_sequence_stress/test.sh b/qa/L0_sequence_stress/test.sh
new file mode 100755
index 0000000000..b2bc66f8ac
--- /dev/null
+++ b/qa/L0_sequence_stress/test.sh
@@ -0,0 +1,95 @@
+#!/bin/bash
+# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+export CUDA_VISIBLE_DEVICES=0
+
+CLIENT_LOG="./client.log"
+STRESS_TEST=sequence_stress.py
+
+SERVER=/opt/tritonserver/bin/tritonserver
+source ../common/util.sh
+
+RET=0
+
+# Setup model repository.
+#   models1 - one instance with batch-size 4
+#   models2 - two instances with batch-size 2
+#   models4 - four instances with batch-size 1
+rm -fr *.log  models{1,2,4} && mkdir models{1,2,4}
+for m in ../custom_models/custom_sequence_int32 ; do
+    cp -r $m models1/. && \
+        (cd models1/$(basename $m) && \
+            sed -i "s/max_sequence_idle_microseconds:.*/max_sequence_idle_microseconds: 1000000/" config.pbtxt && \
+            sed -i "s/^max_batch_size:.*/max_batch_size: 4/" config.pbtxt && \
+            sed -i "s/kind: KIND_GPU/kind: KIND_GPU\\ncount: 1/" config.pbtxt && \
+            sed -i "s/kind: KIND_CPU/kind: KIND_CPU\\ncount: 1/" config.pbtxt)
+    cp -r $m models2/. && \
+        (cd models2/$(basename $m) && \
+            sed -i "s/max_sequence_idle_microseconds:.*/max_sequence_idle_microseconds: 1000000/" config.pbtxt && \
+            sed -i "s/^max_batch_size:.*/max_batch_size: 2/" config.pbtxt && \
+            sed -i "s/kind: KIND_GPU/kind: KIND_GPU\\ncount: 2/" config.pbtxt && \
+            sed -i "s/kind: KIND_CPU/kind: KIND_CPU\\ncount: 2/" config.pbtxt)
+    cp -r $m models4/. && \
+        (cd models4/$(basename $m) && \
+            sed -i "s/max_sequence_idle_microseconds:.*/max_sequence_idle_microseconds: 1000000/" config.pbtxt && \
+            sed -i "s/^max_batch_size:.*/max_batch_size: 1/" config.pbtxt && \
+            sed -i "s/kind: KIND_GPU/kind: KIND_GPU\\ncount: 4/" config.pbtxt && \
+            sed -i "s/kind: KIND_CPU/kind: KIND_CPU\\ncount: 4/" config.pbtxt)
+done
+
+# Stress-test each model repository
+for model_trial in 1 2 4 ; do
+    MODEL_DIR=models${model_trial}
+    SERVER_ARGS="--model-repository=`pwd`/$MODEL_DIR"
+    SERVER_LOG="./$MODEL_DIR.server.log"
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    set +e
+    python $STRESS_TEST >>$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+    set -e
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+done
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_server_status/server_status_test.py b/qa/L0_server_status/server_status_test.py
old mode 100644
new mode 100755
index 4fc178f5ae..7ab04708f0
--- a/qa/L0_server_status/server_status_test.py
+++ b/qa/L0_server_status/server_status_test.py
@@ -1,4 +1,6 @@
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+#!/usr/bin/env python3
+
+# Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -25,134 +27,286 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import sys
+
 sys.path.append("../common")
 
-from builtins import range
-from future.utils import iteritems
 import os
-import infer_util as iu
 import unittest
-from tensorrtserver.api import *
-import tensorrtserver.api.server_status_pb2 as server_status
-
-
-def _get_server_status(url="localhost:8000", protocol=ProtocolType.HTTP, model_name=None):
-   ctx = ServerStatusContext(url, protocol, model_name, True)
-   return (ctx.get_server_status(), ctx.get_last_request_id())
 
+import infer_util as iu
+import numpy as np
+import test_util as tu
+import tritongrpcclient as grpcclient
+import tritonhttpclient as httpclient
+from tritonclientutils import *
 
-class ServerStatusTest(unittest.TestCase):
 
+class ServerMetadataTest(tu.TestResultCollector):
     def test_basic(self):
         try:
-            for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
-                model_name0 = "graphdef_int32_int8_int8"
-                server_status0, req_id0 = _get_server_status(pair[0], pair[1], model_name0)
-                self.assertEqual(os.environ["TENSORRT_SERVER_VERSION"],
-                                server_status0.version)
-                self.assertEqual("inference:0", server_status0.id)
-                uptime0 = server_status0.uptime_ns
-                self.assertGreater(uptime0, 0)
-                self.assertEqual(len(server_status0.model_status), 1)
-                self.assertTrue(model_name0 in server_status0.model_status,
-                                "expected status for model " + model_name0)
-
-                model_name1 = "graphdef_float32_float32_float32"
-                server_status1, req_id1 = _get_server_status(pair[0], pair[1], model_name1)
-                self.assertEqual(os.environ["TENSORRT_SERVER_VERSION"],
-                                server_status1.version)
-                self.assertEqual("inference:0", server_status1.id)
-                uptime1 = server_status1.uptime_ns
-                self.assertEqual(len(server_status1.model_status), 1)
-                self.assertTrue(model_name1 in server_status1.model_status,
-                                "expected status for model " + model_name1)
-
-                self.assertGreater(uptime1, uptime0)
-                self.assertEqual(req_id1, req_id0 + 1)
-
-                server_status2, req_id2 = _get_server_status(pair[0], pair[1])
-                self.assertEqual(os.environ["TENSORRT_SERVER_VERSION"],
-                                server_status2.version)
-                self.assertEqual("inference:0", server_status2.id)
-                uptime2 = server_status2.uptime_ns
-                for mn in (model_name0, model_name1, "netdef_float32_float32_float32",
-                        "plan_float32_float32_float32"):
-                    self.assertTrue(mn in server_status2.model_status,
-                                    "expected status for model " + model_name1)
-
-                self.assertGreater(uptime2, uptime1)
-                self.assertEqual(req_id2, req_id1 + 1)
-
+            for pair in [("localhost:8000", "http"), ("localhost:8001", "grpc")]:
+                model_name = "graphdef_int32_int8_int8"
+                extensions = [
+                    "classification",
+                    "sequence",
+                    "model_repository",
+                    "schedule_policy",
+                    "model_configuration",
+                    "system_shared_memory",
+                    "cuda_shared_memory",
+                    "binary_tensor_data",
+                    "statistics",
+                ]
+                if pair[1] == "http":
+                    triton_client = httpclient.InferenceServerClient(
+                        url=pair[0], verbose=True
+                    )
+                else:
+                    triton_client = grpcclient.InferenceServerClient(
+                        url=pair[0], verbose=True
+                    )
+
+                self.assertTrue(triton_client.is_server_live())
+                self.assertTrue(triton_client.is_server_ready())
+                server_metadata = triton_client.get_server_metadata()
+                model_metadata = triton_client.get_model_metadata(model_name)
+
+                if pair[1] == "http":
+                    self.assertEqual(
+                        os.environ["TRITON_SERVER_VERSION"], server_metadata["version"]
+                    )
+                    self.assertEqual("triton", server_metadata["name"])
+                    for ext in extensions:
+                        self.assertIn(ext, server_metadata["extensions"])
+
+                    self.assertEqual(model_name, model_metadata["name"])
+                else:
+                    self.assertEqual(
+                        os.environ["TRITON_SERVER_VERSION"], server_metadata.version
+                    )
+                    self.assertEqual("triton", server_metadata.name)
+                    for ext in extensions:
+                        self.assertIn(ext, server_metadata.extensions)
+
+                    self.assertEqual(model_name, model_metadata.name)
         except InferenceServerException as ex:
             self.assertTrue(False, "unexpected error {}".format(ex))
 
     def test_unknown_model(self):
         try:
-            for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
-                server_status = _get_server_status(pair[0], pair[1], "foo")
+            for pair in [("localhost:8000", "http"), ("localhost:8001", "grpc")]:
+                model_name = "foo"
+                if pair[1] == "http":
+                    triton_client = httpclient.InferenceServerClient(
+                        url=pair[0], verbose=True
+                    )
+                else:
+                    triton_client = grpcclient.InferenceServerClient(
+                        url=pair[0], verbose=True
+                    )
+
+                self.assertTrue(triton_client.is_server_live())
+                self.assertTrue(triton_client.is_server_ready())
+                server_metadata = triton_client.get_server_metadata()
+                if pair[1] == "http":
+                    self.assertEqual(
+                        os.environ["TRITON_SERVER_VERSION"], server_metadata["version"]
+                    )
+                    self.assertEqual("triton", server_metadata["name"])
+                else:
+                    self.assertEqual(
+                        os.environ["TRITON_SERVER_VERSION"], server_metadata.version
+                    )
+                    self.assertEqual("triton", server_metadata.name)
+
+                model_metadata = triton_client.get_model_metadata(model_name)
                 self.assertTrue(False, "expected unknown model failure")
         except InferenceServerException as ex:
-            self.assertEqual("inference:0", ex.server_id())
-            self.assertGreater(ex.request_id(), 0)
             self.assertTrue(
-                ex.message().startswith("no status available for unknown model"))
+                ex.message().startswith("Request for unknown model: 'foo' is not found")
+            )
+
+    def test_unknown_model_version(self):
+        try:
+            for pair in [("localhost:8000", "http"), ("localhost:8001", "grpc")]:
+                model_name = "graphdef_int32_int8_int8"
+                if pair[1] == "http":
+                    triton_client = httpclient.InferenceServerClient(
+                        url=pair[0], verbose=True
+                    )
+                else:
+                    triton_client = grpcclient.InferenceServerClient(
+                        url=pair[0], verbose=True
+                    )
+
+                self.assertTrue(triton_client.is_server_live())
+                self.assertTrue(triton_client.is_server_ready())
+
+                model_metadata = triton_client.get_model_metadata(
+                    model_name, model_version="99"
+                )
+                self.assertTrue(False, "expected unknown model version failure")
+        except InferenceServerException as ex:
+            self.assertTrue(
+                ex.message().startswith(
+                    "Request for unknown model: 'graphdef_int32_int8_int8' version 99 is not found"
+                )
+            )
 
     def test_model_latest_infer(self):
         input_size = 16
-        tensor_shape = (input_size,)
+        tensor_shape = (1, input_size)
+        platform_name = {"graphdef": "tensorflow_graphdef", "onnx": "onnxruntime_onnx"}
 
         # There are 3 versions of *_int32_int32_int32 and all
         # should be available.
-        for platform in ('graphdef', 'netdef'):
+        for platform in ("graphdef", "onnx"):
             model_name = platform + "_int32_int32_int32"
 
             # Initially there should be no version stats..
             try:
-                for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
-                    server_status0, req_id0 = _get_server_status(pair[0], pair[1], model_name)
-                    self.assertTrue(model_name in server_status0.model_status,
-                                    "expected status for model " + model_name)
-                    self.assertEqual(len(server_status0.model_status[model_name].version_status), 3,
-                                    "expected status for 3 versions for model " + model_name)
-                    for v in (1, 2, 3):
-                        self.assertTrue(v in server_status0.model_status[model_name].version_status,
-                                        "expected version " + str(v) + " status for model " + model_name)
-                        self.assertEqual(server_status0.model_status[model_name].version_status[v].ready_state,
-                                        server_status.MODEL_READY)
+                for pair in [("localhost:8000", "http"), ("localhost:8001", "grpc")]:
+                    if pair[1] == "http":
+                        triton_client = httpclient.InferenceServerClient(
+                            url=pair[0], verbose=True
+                        )
+                    else:
+                        triton_client = grpcclient.InferenceServerClient(
+                            url=pair[0], verbose=True
+                        )
+
+                    self.assertTrue(triton_client.is_server_live())
+                    self.assertTrue(triton_client.is_server_ready())
+                    model_metadata = triton_client.get_model_metadata(model_name)
+                    # verify all versions are reported when no model version is specified
+                    if pair[1] == "http":
+                        self.assertEqual(model_name, model_metadata["name"])
+                        self.assertEqual(len(model_metadata["versions"]), 3)
+                        for v in (1, 2, 3):
+                            self.assertIn(str(v), model_metadata["versions"])
+                    else:
+                        self.assertEqual(model_name, model_metadata.name)
+                        self.assertEqual(len(model_metadata.versions), 3)
+                        for v in (1, 2, 3):
+                            self.assertIn(str(v), model_metadata.versions)
+
+                    # verify contents of model metadata
+                    if pair[1] == "http":
+                        model_platform = model_metadata["platform"]
+                        model_inputs = model_metadata["inputs"]
+                        model_outputs = model_metadata["outputs"]
+                    else:
+                        model_platform = model_metadata.platform
+                        model_inputs = model_metadata.inputs
+                        model_outputs = model_metadata.outputs
+
+                    self.assertEqual(platform_name[platform], model_platform)
+                    self.assertEqual(len(model_inputs), 2)
+                    self.assertEqual(len(model_outputs), 2)
+
+                    for model_input in model_inputs:
+                        if pair[1] == "http":
+                            input_dtype = model_input["datatype"]
+                            input_shape = model_input["shape"]
+                            input_name = model_input["name"]
+                        else:
+                            input_dtype = model_input.datatype
+                            input_shape = model_input.shape
+                            input_name = model_input.name
+                        self.assertIn(input_name, ["INPUT0", "INPUT1"])
+                        self.assertEqual(input_dtype, "INT32")
+                        self.assertEqual(input_shape, [-1, 16])
+
+                    for model_output in model_outputs:
+                        if pair[1] == "http":
+                            output_dtype = model_output["datatype"]
+                            output_shape = model_output["shape"]
+                            output_name = model_output["name"]
+                        else:
+                            output_dtype = model_output.datatype
+                            output_shape = model_output.shape
+                            output_name = model_output.name
+                        self.assertIn(output_name, ["OUTPUT0", "OUTPUT1"])
+                        self.assertEqual(output_dtype, "INT32")
+                        self.assertEqual(output_shape, [-1, 16])
+
             except InferenceServerException as ex:
                 self.assertTrue(False, "unexpected error {}".format(ex))
 
             # Infer using latest version (which is 3)...
-            iu.infer_exact(self, platform, tensor_shape, 1, True,
-                           np.int32, np.int32, np.int32,
-                           model_version=None, swap=True)
+            iu.infer_exact(
+                self,
+                platform,
+                tensor_shape,
+                1,
+                np.int32,
+                np.int32,
+                np.int32,
+                model_version=None,
+                swap=True,
+            )
 
             try:
-                for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
-                    server_status1, req_id1 = _get_server_status(pair[0], pair[1], model_name)
-                    self.assertTrue(model_name in server_status1.model_status,
-                                    "expected status for model " + model_name)
-                    self.assertEqual(len(server_status1.model_status[model_name].version_status), 3,
-                                    "expected status for 3 versions for model " + model_name)
+                for pair in [("localhost:8000", "http"), ("localhost:8001", "grpc")]:
+                    if pair[1] == "http":
+                        triton_client = httpclient.InferenceServerClient(
+                            url=pair[0], verbose=True
+                        )
+                    else:
+                        triton_client = grpcclient.InferenceServerClient(
+                            url=pair[0], verbose=True
+                        )
+
+                    self.assertTrue(triton_client.is_server_live())
+                    self.assertTrue(triton_client.is_server_ready())
                     for v in (1, 2, 3):
-                        self.assertTrue(v in server_status1.model_status[model_name].version_status,
-                                        "expected version " + str(v) + " status for model " + model_name)
-                        self.assertEqual(server_status1.model_status[model_name].version_status[v].ready_state,
-                                        server_status.MODEL_READY)
+                        self.assertTrue(
+                            triton_client.is_model_ready(
+                                model_name, model_version=str(v)
+                            )
+                        )
 
                     # Only version 3 should have infer stats
-                    for v in (1, 2, 3):
-                        version_status = server_status1.model_status[model_name].version_status[v]
-                        if v == 3:
-                            self.assertEqual(len(version_status.infer_stats), 1,
-                                            "expected 1 infer stats for v" + str(v) + " model " + model_name)
-                            self.assertTrue(1 in version_status.infer_stats,
-                                            "expected batch 1 status for v" + str(v) + " model " + model_name)
-                            infer_stats = version_status.infer_stats[1]
-                            self.assertTrue(infer_stats.success.count, 1)
+                    infer_stats = triton_client.get_inference_statistics(model_name)
+                    if pair[1] == "http":
+                        stats = infer_stats["model_stats"]
+                    else:
+                        stats = infer_stats.model_stats
+                    self.assertEqual(
+                        len(stats), 3, "expected 3 infer stats for model " + model_name
+                    )
+                    for s in stats:
+                        if pair[1] == "http":
+                            v = s["version"]
+                            stat = s["inference_stats"]
+                        else:
+                            v = s.version
+                            stat = s.inference_stats
+
+                        if v == "3":
+                            if pair[1] == "http":
+                                self.assertTrue(stat["success"]["count"], 3)
+                            else:
+                                self.assertTrue(stat.success.count, 3)
                         else:
-                            self.assertEqual(len(version_status.infer_stats), 0,
-                                            "unexpected infer stats for v" + str(v) + " model " + model_name)
+                            if pair[1] == "http":
+                                self.assertEqual(
+                                    stat["success"]["count"],
+                                    0,
+                                    "unexpected infer success counts for version "
+                                    + str(v)
+                                    + " of model "
+                                    + model_name,
+                                )
+                            else:
+                                self.assertEqual(
+                                    stat.success.count,
+                                    0,
+                                    "unexpected infer success counts for version "
+                                    + str(v)
+                                    + " of model "
+                                    + model_name,
+                                )
 
             except InferenceServerException as ex:
                 self.assertTrue(False, "unexpected error {}".format(ex))
@@ -162,141 +316,420 @@ def test_model_specific_infer(self):
 
         # There are 3 versions of *_float32_float32_float32 but only
         # versions 1 and 3 should be available.
-        for platform in ('graphdef', 'netdef', 'plan'):
-            tensor_shape = (input_size, 1, 1) if platform == 'plan' else (input_size,)
+        for platform in ("graphdef", "onnx", "plan"):
+            tensor_shape = (1, input_size)
             model_name = platform + "_float32_float32_float32"
 
             # Initially there should be no version status...
             try:
-                for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
-                    server_status0, req_id0 = _get_server_status(pair[0], pair[1], model_name)
-                    self.assertTrue(model_name in server_status0.model_status,
-                                    "expected status for model " + model_name)
-                    self.assertEqual(len(server_status0.model_status[model_name].version_status), 2,
-                                    "expected status for 2 versions for model " + model_name)
-                    for v in (1, 3):
-                        self.assertTrue(v in server_status0.model_status[model_name].version_status,
-                                        "expected version " + str(v) + " status for model " + model_name)
-                        self.assertEqual(server_status0.model_status[model_name].version_status[v].ready_state,
-                                        server_status.MODEL_READY)
+                for pair in [("localhost:8000", "http"), ("localhost:8001", "grpc")]:
+                    if pair[1] == "http":
+                        triton_client = httpclient.InferenceServerClient(
+                            url=pair[0], verbose=True
+                        )
+                    else:
+                        triton_client = grpcclient.InferenceServerClient(
+                            url=pair[0], verbose=True
+                        )
+
+                    self.assertTrue(triton_client.is_server_live())
+                    self.assertTrue(triton_client.is_server_ready())
+                    self.assertTrue(
+                        triton_client.is_model_ready(model_name, model_version="1")
+                    )
+                    self.assertFalse(
+                        triton_client.is_model_ready(model_name, model_version="2")
+                    )
+                    self.assertTrue(
+                        triton_client.is_model_ready(model_name, model_version="3")
+                    )
             except InferenceServerException as ex:
                 self.assertTrue(False, "unexpected error {}".format(ex))
 
             # Infer using version 1...
-            iu.infer_exact(self, platform, tensor_shape, 1, True,
-                           np.float32, np.float32, np.float32,
-                           model_version=1, swap=False)
+            iu.infer_exact(
+                self,
+                platform,
+                tensor_shape,
+                1,
+                np.float32,
+                np.float32,
+                np.float32,
+                model_version=1,
+                swap=False,
+            )
 
             try:
-                for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
-                    server_status1, req_id1 = _get_server_status(pair[0], pair[1], model_name)
-                    self.assertTrue(model_name in server_status1.model_status,
-                                    "expected status for model " + model_name)
-                    self.assertEqual(len(server_status1.model_status[model_name].version_status), 2,
-                                    "expected status for 2 versions for model " + model_name)
-                    for v in (1, 3):
-                        self.assertTrue(v in server_status1.model_status[model_name].version_status,
-                                        "expected version " + str(v) + " status for model " + model_name)
-                        self.assertEqual(server_status1.model_status[model_name].version_status[v].ready_state,
-                                        server_status.MODEL_READY)
+                for pair in [("localhost:8000", "http"), ("localhost:8001", "grpc")]:
+                    if pair[1] == "http":
+                        triton_client = httpclient.InferenceServerClient(
+                            url=pair[0], verbose=True
+                        )
+                    else:
+                        triton_client = grpcclient.InferenceServerClient(
+                            url=pair[0], verbose=True
+                        )
+
+                    self.assertTrue(triton_client.is_server_live())
+                    self.assertTrue(triton_client.is_server_ready())
+                    self.assertTrue(
+                        triton_client.is_model_ready(model_name, model_version="1")
+                    )
+                    self.assertFalse(
+                        triton_client.is_model_ready(model_name, model_version="2")
+                    )
+                    self.assertTrue(
+                        triton_client.is_model_ready(model_name, model_version="3")
+                    )
 
                     # Only version 1 should have infer stats
-                    for v in (1, 3):
-                        version_status = server_status1.model_status[model_name].version_status[v]
-                        if v == 1:
-                            self.assertEqual(len(version_status.infer_stats), 1,
-                                            "expected 1 infer stats for v" + str(v) + " model " + model_name)
-                            self.assertTrue(1 in version_status.infer_stats,
-                                            "expected batch 1 status for v" + str(v) + " model " + model_name)
-                            infer_stats = version_status.infer_stats[1]
-                            self.assertTrue(infer_stats.success.count, 1)
-                        else:
-                            self.assertEqual(len(version_status.infer_stats), 0,
-                                            "unexpected infer stats for v" + str(v) + " model " + model_name)
+                    infer_stats = triton_client.get_inference_statistics(
+                        model_name, model_version="1"
+                    )
+                    if pair[1] == "http":
+                        self.assertEqual(
+                            len(infer_stats["model_stats"]),
+                            1,
+                            "expected 1 infer stats for version 1"
+                            " of model " + model_name,
+                        )
+                        stats = infer_stats["model_stats"][0]["inference_stats"]
+                        self.assertTrue(stats["success"]["count"], 3)
+                    else:
+                        self.assertEqual(
+                            len(infer_stats.model_stats),
+                            1,
+                            "expected 1 infer stats for version 1"
+                            " of model " + model_name,
+                        )
+                        stats = infer_stats.model_stats[0].inference_stats
+                        self.assertTrue(stats.success.count, 3)
+                    infer_stats = triton_client.get_inference_statistics(
+                        model_name, model_version="3"
+                    )
+                    if pair[1] == "http":
+                        stats = infer_stats["model_stats"][0]["inference_stats"]
+                        self.assertEqual(
+                            stats["success"]["count"],
+                            0,
+                            "unexpected infer stats for version 3"
+                            " of model " + model_name,
+                        )
+                    else:
+                        stats = infer_stats.model_stats[0].inference_stats
+                        self.assertEqual(
+                            stats.success.count,
+                            0,
+                            "unexpected infer stats for version 3"
+                            " of model " + model_name,
+                        )
 
             except InferenceServerException as ex:
                 self.assertTrue(False, "unexpected error {}".format(ex))
 
 
-class ModelStatusTest(unittest.TestCase):
-    '''
-    These tests must be run after the ServerStatusTest. See test.sh
+class ModelMetadataTest(tu.TestResultCollector):
+    """
+    These tests must be run after the ServerMetadataTest. See test.sh
     file for correct test running.
-    '''
+    """
+
     def test_model_versions_deleted(self):
         # Originally There were 3 versions of *_int32_int32_int32 and
         # version 3 was executed once. Version 2 and 3 models were
-        # deleted from the model store so now only expect version 1 to
-        # be ready and version 3 to show stats but not be ready.
-        for platform in ('graphdef', 'netdef'):
+        # deleted from the model repository so now only expect version 1 to
+        # be ready and show stats.
+        for platform in ("graphdef", "onnx"):
             model_name = platform + "_int32_int32_int32"
 
             try:
-                for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
-                    server_status1, req_id1 = _get_server_status(pair[0], pair[1], model_name)
-                    self.assertTrue(model_name in server_status1.model_status,
-                                    "expected status for model " + model_name)
-                    self.assertEqual(len(server_status1.model_status[model_name].version_status), 3,
-                                    "expected status for 3 versions for model " + model_name)
+                for pair in [("localhost:8000", "http"), ("localhost:8001", "grpc")]:
+                    if pair[1] == "http":
+                        triton_client = httpclient.InferenceServerClient(
+                            url=pair[0], verbose=True
+                        )
+                    else:
+                        triton_client = grpcclient.InferenceServerClient(
+                            url=pair[0], verbose=True
+                        )
+
+                    self.assertTrue(triton_client.is_server_live())
+                    self.assertTrue(triton_client.is_server_ready())
+                    model_metadata = triton_client.get_model_metadata(model_name)
+                    if pair[1] == "http":
+                        self.assertEqual(model_name, model_metadata["name"])
+                        self.assertEqual(len(model_metadata["versions"]), 1)
+                        self.assertEqual("1", model_metadata["versions"][0])
+                    else:
+                        self.assertEqual(model_name, model_metadata.name)
+                        self.assertEqual(len(model_metadata.versions), 1)
+                        self.assertEqual("1", model_metadata.versions[0])
 
                     # Only version 3 should have infer stats, only 1 is ready
                     for v in (1, 2, 3):
-                        self.assertTrue(v in server_status1.model_status[model_name].version_status,
-                                        "expected version " + str(v) + " status for model " + model_name)
-                        version_status = server_status1.model_status[model_name].version_status[v]
                         if v == 1:
-                            self.assertEqual(version_status.ready_state, server_status.MODEL_READY)
-                            self.assertEqual(len(version_status.infer_stats), 0,
-                                            "unexpected infer stats for v" + str(v) + " model " + model_name)
-                        else:
-                            self.assertEqual(version_status.ready_state, server_status.MODEL_UNAVAILABLE)
-                            if v == 2:
-                                self.assertEqual(len(version_status.infer_stats), 0,
-                                                "unexpected infer stats for v" + str(v) + " model " + model_name)
+                            self.assertTrue(
+                                triton_client.is_model_ready(
+                                    model_name, model_version=str(v)
+                                )
+                            )
+                            infer_stats = triton_client.get_inference_statistics(
+                                model_name, model_version=str(v)
+                            )
+                            if pair[1] == "http":
+                                self.assertEqual(
+                                    len(infer_stats["model_stats"]),
+                                    1,
+                                    "expected 1 infer stats for version "
+                                    + str(v)
+                                    + " of model "
+                                    + model_name,
+                                )
+                                stats = infer_stats["model_stats"][0]["inference_stats"]
+                                self.assertEqual(stats["success"]["count"], 0)
                             else:
-                                self.assertEqual(len(version_status.infer_stats), 1,
-                                                "expected 1 infer stats for v" + str(v) + " model " + model_name)
-                                self.assertTrue(1 in version_status.infer_stats,
-                                            "expected batch 1 status for v" + str(v) + " model " + model_name)
-                                infer_stats = version_status.infer_stats[1]
-                                self.assertTrue(infer_stats.success.count, 1)
+                                self.assertEqual(
+                                    len(infer_stats.model_stats),
+                                    1,
+                                    "expected 1 infer stats for version "
+                                    + str(v)
+                                    + " of model "
+                                    + model_name,
+                                )
+                                stats = infer_stats.model_stats[0].inference_stats
+                                self.assertEqual(stats.success.count, 0)
+
+                        else:
+                            self.assertFalse(
+                                triton_client.is_model_ready(
+                                    model_name, model_version=str(v)
+                                )
+                            )
 
             except InferenceServerException as ex:
                 self.assertTrue(False, "unexpected error {}".format(ex))
 
     def test_model_versions_added(self):
         # Originally There was version 1 of *_float16_float32_float32.
-        # Version 7 was added so now expect just version 7 to be ready.
-        for platform in ('graphdef',):
+        # Version 7 was added so now expect just version 7 to be ready
+        # and provide infer stats.
+        for platform in ("graphdef",):
             model_name = platform + "_float16_float32_float32"
 
             try:
-                for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
-                    server_status1, req_id1 = _get_server_status(pair[0], pair[1], model_name)
-                    self.assertTrue(model_name in server_status1.model_status,
-                                    "expected status for model " + model_name)
-                    self.assertEqual(len(server_status1.model_status[model_name].version_status), 2,
-                                    "expected status for 2 versions for model " + model_name)
-
-                    for v in (1,):
-                        self.assertTrue(v in server_status1.model_status[model_name].version_status,
-                                        "expected version " + str(v) + " status for model " + model_name)
-                        version_status = server_status1.model_status[model_name].version_status[v]
-                        self.assertEqual(version_status.ready_state, server_status.MODEL_UNAVAILABLE)
-                        self.assertEqual(len(version_status.infer_stats), 0,
-                                        "unexpected infer stats for v" + str(v) + " model " + model_name)
-
-                    for v in (7,):
-                        self.assertTrue(v in server_status1.model_status[model_name].version_status,
-                                        "expected version " + str(v) + " status for model " + model_name)
-                        version_status = server_status1.model_status[model_name].version_status[v]
-                        self.assertEqual(version_status.ready_state, server_status.MODEL_READY)
-                        self.assertEqual(len(version_status.infer_stats), 0,
-                                        "unexpected infer stats for v" + str(v) + " model " + model_name)
+                for pair in [("localhost:8000", "http"), ("localhost:8001", "grpc")]:
+                    if pair[1] == "http":
+                        triton_client = httpclient.InferenceServerClient(
+                            url=pair[0], verbose=True
+                        )
+                    else:
+                        triton_client = grpcclient.InferenceServerClient(
+                            url=pair[0], verbose=True
+                        )
+
+                    self.assertTrue(triton_client.is_server_live())
+                    self.assertTrue(triton_client.is_server_ready())
+                    model_metadata = triton_client.get_model_metadata(model_name)
+                    if pair[1] == "http":
+                        self.assertEqual(
+                            model_name,
+                            model_metadata["name"],
+                            "expected status for model " + model_name,
+                        )
+                        self.assertEqual(
+                            len(model_metadata["versions"]),
+                            1,
+                            "expected status for 1 versions for model " + model_name,
+                        )
+                        self.assertEqual("7", model_metadata["versions"][0])
+                    else:
+                        self.assertEqual(
+                            model_name,
+                            model_metadata.name,
+                            "expected status for model " + model_name,
+                        )
+                        self.assertEqual(
+                            len(model_metadata.versions),
+                            1,
+                            "expected status for 1 versions for model " + model_name,
+                        )
+                        self.assertEqual("7", model_metadata.versions[0])
+
+                    # Only version 7 should be ready and show infer stat.
+                    for v in (1, 7):
+                        if v == 7:
+                            self.assertTrue(
+                                triton_client.is_model_ready(
+                                    model_name, model_version=str(v)
+                                )
+                            )
+                            infer_stats = triton_client.get_inference_statistics(
+                                model_name, model_version=str(v)
+                            )
+                            if pair[1] == "http":
+                                stats = infer_stats["model_stats"][0]["inference_stats"]
+                                self.assertEqual(
+                                    stats["success"]["count"],
+                                    0,
+                                    "unexpected infer stats for version "
+                                    + str(v)
+                                    + " of model "
+                                    + model_name,
+                                )
+                            else:
+                                stats = infer_stats.model_stats[0].inference_stats
+                                self.assertEqual(
+                                    stats.success.count,
+                                    0,
+                                    "unexpected infer stats for version "
+                                    + str(v)
+                                    + " of model "
+                                    + model_name,
+                                )
+
+                        else:
+                            self.assertFalse(
+                                triton_client.is_model_ready(
+                                    model_name, model_version=str(v)
+                                )
+                            )
+                            try:
+                                infer_stats = triton_client.get_inference_statistics(
+                                    model_name, model_version=str(v)
+                                )
+                                self.assertTrue(
+                                    False,
+                                    "unexpected infer stats for the model that is not ready",
+                                )
+                            except InferenceServerException as ex:
+                                self.assertIn(
+                                    "requested model version is not available for model",
+                                    str(ex),
+                                )
+
+            except InferenceServerException as ex:
+                self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_infer_stats_no_model_version(self):
+        # Originally There were 3 versions of *_int32_int32_int32 and
+        # version 3 was executed once. Version 2 and 3 models were
+        # deleted from the model repository so now only expect version 1 to
+        # be ready and show infer stats.
+        for platform in ("graphdef", "onnx"):
+            model_name = platform + "_int32_int32_int32"
+
+            try:
+                for pair in [("localhost:8000", "http"), ("localhost:8001", "grpc")]:
+                    if pair[1] == "http":
+                        triton_client = httpclient.InferenceServerClient(
+                            url=pair[0], verbose=True
+                        )
+                    else:
+                        triton_client = grpcclient.InferenceServerClient(
+                            url=pair[0], verbose=True
+                        )
+
+                    self.assertTrue(triton_client.is_server_live())
+                    self.assertTrue(triton_client.is_server_ready())
+                    model_metadata = triton_client.get_model_metadata(model_name)
+                    if pair[1] == "http":
+                        self.assertEqual(model_name, model_metadata["name"])
+                        self.assertEqual(len(model_metadata["versions"]), 1)
+                        self.assertEqual("1", model_metadata["versions"][0])
+                    else:
+                        self.assertEqual(model_name, model_metadata.name)
+                        self.assertEqual(len(model_metadata.versions), 1)
+                        self.assertEqual("1", model_metadata.versions[0])
+
+                    # Only version 3 should have infer stats, only 1 is ready
+                    for v in (1, 2, 3):
+                        if v == 1:
+                            self.assertTrue(
+                                triton_client.is_model_ready(
+                                    model_name, model_version=str(v)
+                                )
+                            )
+                        else:
+                            self.assertFalse(
+                                triton_client.is_model_ready(
+                                    model_name, model_version=str(v)
+                                )
+                            )
+
+                    infer_stats = triton_client.get_inference_statistics(model_name)
+                    if pair[1] == "http":
+                        stats = infer_stats["model_stats"]
+                    else:
+                        stats = infer_stats.model_stats
+                    self.assertEqual(
+                        len(stats), 1, "expected 1 infer stats for model " + model_name
+                    )
+
+                    if pair[1] == "http":
+                        version = stats[0]["version"]
+                        stat = stats[0]["inference_stats"]
+                    else:
+                        version = stats[0].version
+                        stat = stats[0].inference_stats
+
+                    if version != "1":
+                        self.assertTrue(
+                            False, "expected version 1 for infer stat, got " + version
+                        )
+                    else:
+                        if pair[1] == "http":
+                            self.assertEqual(
+                                stat["success"]["count"],
+                                0,
+                                "unexpected infer stats for version "
+                                + str(version)
+                                + " of model "
+                                + model_name,
+                            )
+                        else:
+                            self.assertEqual(
+                                stat.success.count,
+                                0,
+                                "unexpected infer stats for version "
+                                + str(version)
+                                + " of model "
+                                + model_name,
+                            )
 
             except InferenceServerException as ex:
                 self.assertTrue(False, "unexpected error {}".format(ex))
 
+    def test_infer_stats_no_model(self):
+        # Test get_inference_statistics when no model/model_version is passed.
+        try:
+            for pair in [("localhost:8000", "http"), ("localhost:8001", "grpc")]:
+                if pair[1] == "http":
+                    triton_client = httpclient.InferenceServerClient(
+                        url=pair[0], verbose=True
+                    )
+                else:
+                    triton_client = grpcclient.InferenceServerClient(
+                        url=pair[0], verbose=True
+                    )
+
+                self.assertTrue(triton_client.is_server_live())
+                self.assertTrue(triton_client.is_server_ready())
+
+                # Returns infer stats for ALL models + ready versions
+                infer_stats = triton_client.get_inference_statistics()
+                if pair[1] == "http":
+                    stats = infer_stats["model_stats"]
+                else:
+                    stats = infer_stats.model_stats
+                self.assertEqual(
+                    len(stats),
+                    219,
+                    "expected 219 infer stats for all ready versions of all model",
+                )
+
+        except InferenceServerException as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()
diff --git a/qa/L0_server_status/test.sh b/qa/L0_server_status/test.sh
index 536d2a9043..1e27339a38 100755
--- a/qa/L0_server_status/test.sh
+++ b/qa/L0_server_status/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -25,13 +25,31 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+TEST_RESULT_FILE='test_results.txt'
 CLIENT_LOG="./client.log"
 SERVER_STATUS_TEST=server_status_test.py
+EXPECTED_NUM_TESTS_MMDT="4"
+EXPECTED_NUM_TESTS_SMDT="5"
 
-DATADIR=/data/inferenceserver
+DATADIR=/data/inferenceserver/${REPO_VERSION}
 
-SERVER=/opt/tensorrtserver/bin/trtserver
-SERVER_ARGS="--repository-poll-secs=1 --model-store=`pwd`/models"
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--repository-poll-secs=1 --model-control-mode=poll --model-repository=`pwd`/models"
 SERVER_LOG="./inference_server.log"
 source ../common/util.sh
 
@@ -47,35 +65,47 @@ fi
 
 RET=0
 
+set +e
+
 rm -f $CLIENT_LOG
-python $SERVER_STATUS_TEST ServerStatusTest >>$CLIENT_LOG 2>&1
+python $SERVER_STATUS_TEST ServerMetadataTest >>$CLIENT_LOG 2>&1
 if [ $? -ne 0 ]; then
     cat $CLIENT_LOG
     echo -e "\n***\n*** Test Failed\n***"
     RET=1
+else
+    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS_SMDT
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
 fi
 
+set -e
+
 rm -fr models/graphdef_int32_int32_int32/2 models/graphdef_int32_int32_int32/3
-rm -fr models/netdef_int32_int32_int32/2 models/netdef_int32_int32_int32/3
+rm -fr models/onnx_int32_int32_int32/2 models/onnx_int32_int32_int32/3
 cp -r models/graphdef_float16_float32_float32/1 models/graphdef_float16_float32_float32/7
 sleep 3
 
-python $SERVER_STATUS_TEST ModelStatusTest >>$CLIENT_LOG 2>&1
+set +e
+
+python $SERVER_STATUS_TEST ModelMetadataTest >>$CLIENT_LOG 2>&1
 if [ $? -ne 0 ]; then
     cat $CLIENT_LOG
     echo -e "\n***\n*** Test Failed\n***"
     RET=1
+else
+    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS_MMDT
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
 fi
 
-# python unittest seems to swallow ImportError and still return 0 exit
-# code. So need to explicitly check CLIENT_LOG to make sure we see
-# some running tests
-grep -c "HTTP/1.1 200 OK" $CLIENT_LOG
-if [ $? -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed To Run\n***"
-    RET=1
-fi
+set -e
 
 kill $SERVER_PID
 wait $SERVER_PID
diff --git a/qa/L0_shared_memory/shared_memory_test.py b/qa/L0_shared_memory/shared_memory_test.py
new file mode 100755
index 0000000000..321c80f058
--- /dev/null
+++ b/qa/L0_shared_memory/shared_memory_test.py
@@ -0,0 +1,701 @@
+#!/usr/bin/env python3
+
+# Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+import os
+import unittest
+from pathlib import Path
+
+import numpy as np
+import test_util as tu
+import tritonclient.grpc as grpcclient
+import tritonclient.http as httpclient
+import tritonclient.utils.shared_memory as shm
+import util
+from parameterized import parameterized
+from tritonclient import utils
+
+
+class SharedMemoryTest(tu.TestResultCollector):
+    # Constant members
+    shared_memory_test_client_log = Path(os.getcwd()) / "client.log"
+    model_dir_path = Path(os.getcwd()) / "models"
+    model_source_path = Path(os.getcwd()).parents[0] / "python_models/add_sub/model.py"
+    model_config_source_path = (
+        Path(os.getcwd()).parents[0] / "python_models/add_sub/config.pbtxt"
+    )
+
+    # Custom setup method to allow passing of parameters
+    def _setUp(self, protocol, log_file_path):
+        self._tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")
+        self._test_windows = bool(int(os.environ.get("TEST_WINDOWS", 0)))
+        self._shm_key_prefix = "/" if not self._test_windows else ""
+        self._timeout = os.environ.get("SERVER_TIMEOUT", 120)
+        self._protocol = protocol
+        self._test_passed = False
+        self._original_stdout, self._original_stderr = None, None
+        self._log_file_path = log_file_path
+
+        self._setup_client()
+        self._build_model_repo()
+        self._build_server_args()
+        self._shared_memory_test_server_log = open(log_file_path, "w")
+        self._server_process = util.run_server(
+            server_executable=self._server_executable,
+            launch_command=self._launch_command,
+            log_file=self._shared_memory_test_server_log,
+        )
+        util.wait_for_server_ready(
+            self._server_process, self._triton_client, self._timeout
+        )
+        # Redirect console output to client log and begin test
+        self._client_log = open(SharedMemoryTest.shared_memory_test_client_log, "a")
+        self._original_stdout, self._original_stderr = util.stream_to_log(
+            self._client_log
+        )
+
+    def _setup_client(self):
+        if self._protocol == "http":
+            self._url = self._tritonserver_ipaddr + ":8000"
+            self._triton_client = httpclient.InferenceServerClient(
+                self._url, verbose=True
+            )
+        else:
+            self._url = self._tritonserver_ipaddr + ":8001"
+            self._triton_client = grpcclient.InferenceServerClient(
+                self._url, verbose=True
+            )
+
+    def _build_server_args(self):
+        if self._test_windows:
+            backend_dir = os.environ.get("BACKEND_DIR", "C:\\opt\\tritonserver\\backends")
+            model_dir = os.environ.get("MODELDIR", (os.getcwd() + "\\models"))
+            self._server_executable = os.environ.get(
+                "SERVER", "C:\\tritonserver\\bin\\tritonserver.exe"
+            )
+        else:
+            triton_dir = os.environ.get("TRITON_DIR", "/opt/tritonserver")
+            backend_dir = os.environ.get("BACKEND_DIR", f"{triton_dir}/backends")
+            model_dir = os.environ.get("MODELDIR", (os.getcwd() + "/models"))
+            self._server_executable = os.environ.get(
+                "SERVER", f"{triton_dir}/bin/tritonserver"
+            )
+
+        extra_args = f"--backend-directory={backend_dir}"
+        self._launch_command = f"{self._server_executable} --model-repository={model_dir} --log-verbose 1 {extra_args}"
+
+    def _build_model_repo(self, model_name="simple", model_version=1):
+        util.create_model_dir(
+            SharedMemoryTest.model_dir_path,
+            model_name,
+            model_version,
+            SharedMemoryTest.model_source_path,
+            SharedMemoryTest.model_config_source_path,
+        )
+        test_model_config = (
+            SharedMemoryTest.model_dir_path / model_name / "config.pbtxt"
+        )
+        util.replace_config_attribute(test_model_config, "TYPE_FP32", "TYPE_INT32")
+        util.add_config_attribute(test_model_config, "max_batch_size: 8")
+
+    def tearDown(self):
+        util.kill_server(self._server_process)
+        # Restore stdout / stderr so we can print to console and see server
+        # output in CI even after logs expire. Print test result to client
+        # before doing so for legibility.
+        if not self._test_passed:
+            print("*\n*\n*\nTest Failed\n*\n*\n*\n")
+        util.stream_to_console(self._original_stdout, self._original_stderr)
+        self._shared_memory_test_server_log.close()
+        self._client_log.close()
+        # Print server log to console on failure
+        if not self._test_passed:
+            with open(self._log_file_path, "r") as f:
+                print(f.read())
+            print("*\n*\n*\nEnd of Server Output\n*\n*\n*\n")
+
+    def _configure_server(
+        self, create_byte_size=64, register_byte_size=64, register_offset=0
+    ):
+        """Creates and registers shared memory regions for testing.
+
+        Parameters
+        ----------
+        create_byte_size: int
+            Size of each system shared memory region to create.
+            NOTE: This should be sufficiently large to hold the inputs/outputs
+                  stored in shared memory.
+
+        register_byte_size: int
+            Size of each system shared memory region to register with server.
+            NOTE: The (offset + register_byte_size) should be less than or equal
+            to the create_byte_size. Otherwise an exception will be raised for
+            an invalid set of registration args.
+
+        register_offset: int
+            Offset into the shared memory object to start the registered region.
+
+        """
+        shm_ip0_handle = shm.create_shared_memory_region(
+            "input0_data", (self._shm_key_prefix + "input0_data"), create_byte_size
+        )
+        shm_ip1_handle = shm.create_shared_memory_region(
+            "input1_data", (self._shm_key_prefix + "input1_data"), create_byte_size
+        )
+        shm_op0_handle = shm.create_shared_memory_region(
+            "output0_data", (self._shm_key_prefix + "output0_data"), create_byte_size
+        )
+        shm_op1_handle = shm.create_shared_memory_region(
+            "output1_data", (self._shm_key_prefix + "output1_data"), create_byte_size
+        )
+        # Implicit assumption that input and output byte_sizes are 64 bytes for now
+        input0_data = np.arange(start=0, stop=16, dtype=np.int32)
+        input1_data = np.ones(shape=16, dtype=np.int32)
+        shm.set_shared_memory_region(shm_ip0_handle, [input0_data])
+        shm.set_shared_memory_region(shm_ip1_handle, [input1_data])
+        try:
+            self._triton_client.register_system_shared_memory(
+                "input0_data",
+                (self._shm_key_prefix + "input0_data"),
+                register_byte_size,
+                offset=register_offset,
+            )
+            self._triton_client.register_system_shared_memory(
+                "input1_data",
+                (self._shm_key_prefix + "input1_data"),
+                register_byte_size,
+                offset=register_offset,
+            )
+            self._triton_client.register_system_shared_memory(
+                "output0_data",
+                (self._shm_key_prefix + "output0_data"),
+                register_byte_size,
+                offset=register_offset,
+            )
+            self._triton_client.register_system_shared_memory(
+                "output1_data",
+                (self._shm_key_prefix + "output1_data"),
+                register_byte_size,
+                offset=register_offset,
+            )
+        except utils.InferenceServerException as e:
+            shm_handles = [
+                shm_ip0_handle,
+                shm_ip1_handle,
+                shm_op0_handle,
+                shm_op1_handle,
+            ]
+            self._cleanup_server(shm_handles)
+            raise (e)
+        return [shm_ip0_handle, shm_ip1_handle, shm_op0_handle, shm_op1_handle]
+
+    def _cleanup_server(self, shm_handles):
+        self._triton_client.unregister_system_shared_memory()
+        for shm_handle in shm_handles:
+            shm.destroy_shared_memory_region(shm_handle)
+
+    def _basic_inference(
+        self,
+        shm_ip0_handle,
+        shm_ip1_handle,
+        shm_op0_handle,
+        shm_op1_handle,
+        error_msg,
+        big_shm_name="",
+        big_shm_size=64,
+        shm_output_offset=0,
+    ):
+        input0_data = np.arange(start=0, stop=16, dtype=np.int32)
+        input1_data = np.ones(shape=16, dtype=np.int32)
+        inputs = []
+        outputs = []
+        if self._protocol == "http":
+            inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
+            inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))
+            outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=True))
+            outputs.append(
+                httpclient.InferRequestedOutput("OUTPUT1", binary_data=False)
+            )
+        else:
+            inputs.append(grpcclient.InferInput("INPUT0", [1, 16], "INT32"))
+            inputs.append(grpcclient.InferInput("INPUT1", [1, 16], "INT32"))
+            outputs.append(grpcclient.InferRequestedOutput("OUTPUT0"))
+            outputs.append(grpcclient.InferRequestedOutput("OUTPUT1"))
+
+        inputs[0].set_shared_memory("input0_data", 64)
+
+        if type(shm_ip1_handle) == np.array:
+            inputs[1].set_data_from_numpy(input0_data, binary_data=True)
+        elif big_shm_name != "":
+            inputs[1].set_shared_memory(big_shm_name, big_shm_size)
+        else:
+            inputs[1].set_shared_memory("input1_data", 64)
+
+        outputs[0].set_shared_memory("output0_data", 64, offset=shm_output_offset)
+        outputs[1].set_shared_memory("output1_data", 64, offset=shm_output_offset)
+
+        try:
+            results = self._triton_client.infer(
+                "simple", inputs, model_version="", outputs=outputs
+            )
+            output = results.get_output("OUTPUT0")
+            if self._protocol == "http":
+                output_datatype = output["datatype"]
+                output_shape = output["shape"]
+            else:
+                output_datatype = output.datatype
+                output_shape = output.shape
+            output_dtype = utils.triton_to_np_dtype(output_datatype)
+            output_data = shm.get_contents_as_numpy(
+                shm_op0_handle, output_dtype, output_shape
+            )
+            self.assertTrue(
+                (output_data[0] == (input0_data + input1_data)).all(),
+                "Model output does not match expected output",
+            )
+        except Exception as ex:
+            error_msg.append(str(ex))
+
+    @parameterized.expand([("grpc"), ("http")])
+    def test_invalid_create_shm(self, protocol):
+        # Raises error since tried to create invalid system shared memory region
+        self._setUp(
+            protocol,
+            Path(os.getcwd()) / f"test_invalid_create_shm.{protocol}.server.log",
+        )
+        print(f"*\n*\n*\nStarting Test:test_invalid_create_shm.{protocol}\n*\n*\n*\n")
+        try:
+            shm.create_shared_memory_region(
+                "dummy_data", (self._shm_key_prefix + "dummy_data"), -1
+            )
+        except Exception as ex:
+            if self._test_windows:
+                self.assertEqual(str(ex), "unable to create file mapping")
+            else:
+                self.assertEqual(str(ex), "unable to initialize the size")
+        self._test_passed = True
+
+    @parameterized.expand([("grpc"), ("http")])
+    def test_invalid_registration(self, protocol):
+        # Attempt to register non-existent shared memory region
+        self._setUp(
+            protocol,
+            Path(os.getcwd()) / f"test_invalid_registration.{protocol}.server.log",
+        )
+        print(f"*\n*\n*\nStarting Test:test_invalid_registration.{protocol}\n*\n*\n*\n")
+
+        shm_op0_handle = shm.create_shared_memory_region(
+            "dummy_data", (self._shm_key_prefix + "dummy_data"), 8
+        )
+        shm.set_shared_memory_region(
+            shm_op0_handle, [np.array([1, 2], dtype=np.float32)]
+        )
+        try:
+            self._triton_client.register_system_shared_memory(
+                "dummy_data", (self._shm_key_prefix + "wrong_key"), 8
+            )
+        except Exception as ex:
+            self.assertIn("Unable to open shared memory region", str(ex))
+        shm.destroy_shared_memory_region(shm_op0_handle)
+        self._test_passed = True
+
+    @parameterized.expand([("grpc"), ("http")])
+    def test_valid_create_set_register(self, protocol):
+        # Create a valid system shared memory region, fill data in it and register
+        self._setUp(
+            protocol,
+            Path(os.getcwd()) / f"test_valid_create_set_register.{protocol}.server.log",
+        )
+        print(
+            f"*\n*\n*\nStarting Test:test_valid_create_set_register.{protocol}\n*\n*\n*\n"
+        )
+
+        shm_op0_handle = shm.create_shared_memory_region(
+            "dummy_data", (self._shm_key_prefix + "dummy_data"), 8
+        )
+        shm.set_shared_memory_region(
+            shm_op0_handle, [np.array([1, 2], dtype=np.float32)]
+        )
+        self._triton_client.register_system_shared_memory(
+            "dummy_data", (self._shm_key_prefix + "dummy_data"), 8
+        )
+        shm_status = self._triton_client.get_system_shared_memory_status()
+        if self._protocol == "http":
+            self.assertEqual(len(shm_status), 1)
+        else:
+            self.assertEqual(len(shm_status.regions), 1)
+        self._triton_client.unregister_system_shared_memory()
+        shm.destroy_shared_memory_region(shm_op0_handle)
+        self._test_passed = True
+
+    @parameterized.expand([("grpc"), ("http")])
+    def test_different_name_same_key(self, protocol):
+        # Create a valid system shared memory region, fill data in it and register
+        self._setUp(
+            protocol,
+            Path(os.getcwd()) / f"test_different_name_same_key.{protocol}.server.log",
+        )
+        print(
+            f"*\n*\n*\nStarting Test:test_different_name_same_key.{protocol}\n*\n*\n*\n"
+        )
+
+        shm_op0_handle = shm.create_shared_memory_region(
+            "dummy", (self._shm_key_prefix + "dummy_data"), 8
+        )
+        shm.set_shared_memory_region(
+            shm_op0_handle, [np.array([1, 2], dtype=np.float32)]
+        )
+        self._triton_client.register_system_shared_memory(
+            "dummy", (self._shm_key_prefix + "dummy_data"), 8
+        )
+        try:
+            self._triton_client.register_system_shared_memory(
+                "dummy2", (self._shm_key_prefix + "dummy_data"), 8
+            )
+        except Exception as ex:
+            self.assertIn("registering an active shared memory key", str(ex))
+        self._triton_client.unregister_system_shared_memory()
+        shm.destroy_shared_memory_region(shm_op0_handle)
+        self._test_passed = True
+
+    @parameterized.expand([("grpc"), ("http")])
+    def test_unregister_before_register(self, protocol):
+        # Create a valid system shared memory region and unregister before register
+        self._setUp(
+            protocol,
+            Path(os.getcwd())
+            / f"test_unregister_before_register.{protocol}.server.log",
+        )
+        print(
+            f"*\n*\n*\nStarting Test:test_unregister_before_register.{protocol}\n*\n*\n*\n"
+        )
+
+        shm_op0_handle = shm.create_shared_memory_region(
+            "dummy_data", (self._shm_key_prefix + "dummy_data"), 8
+        )
+        self._triton_client.unregister_system_shared_memory("dummy_data")
+        shm_status = self._triton_client.get_system_shared_memory_status()
+        if self._protocol == "http":
+            self.assertEqual(len(shm_status), 0)
+        else:
+            self.assertEqual(len(shm_status.regions), 0)
+        self._triton_client.unregister_system_shared_memory()
+        shm.destroy_shared_memory_region(shm_op0_handle)
+        self._test_passed = True
+
+    @parameterized.expand([("grpc"), ("http")])
+    def test_unregister_after_register(self, protocol):
+        # Create a valid system shared memory region and unregister after register
+        self._setUp(
+            protocol,
+            Path(os.getcwd()) / f"test_unregister_after_register.{protocol}.server.log",
+        )
+        print(
+            f"*\n*\n*\nStarting Test:test_unregister_after_register.{protocol}\n*\n*\n*\n"
+        )
+
+        shm_op0_handle = shm.create_shared_memory_region(
+            "dummy_data", (self._shm_key_prefix + "dummy_data"), 8
+        )
+        self._triton_client.register_system_shared_memory(
+            "dummy_data", (self._shm_key_prefix + "dummy_data"), 8
+        )
+        self._triton_client.unregister_system_shared_memory("dummy_data")
+        shm_status = self._triton_client.get_system_shared_memory_status()
+        if self._protocol == "http":
+            self.assertEqual(len(shm_status), 0)
+        else:
+            self.assertEqual(len(shm_status.regions), 0)
+        self._triton_client.unregister_system_shared_memory()
+        shm.destroy_shared_memory_region(shm_op0_handle)
+        self._test_passed = True
+
+    @parameterized.expand([("grpc"), ("http")])
+    def test_reregister_after_register(self, protocol):
+        # Create a valid system shared memory region and unregister after register
+        self._setUp(
+            protocol,
+            Path(os.getcwd()) / f"test_reregister_after_register.{protocol}.server.log",
+        )
+        print(
+            f"*\n*\n*\nStarting Test:test_reregister_after_register.{protocol}\n*\n*\n*\n"
+        )
+
+        shm_op0_handle = shm.create_shared_memory_region(
+            "dummy_data", (self._shm_key_prefix + "dummy_data"), 8
+        )
+        self._triton_client.register_system_shared_memory(
+            "dummy_data", (self._shm_key_prefix + "dummy_data"), 8
+        )
+        try:
+            self._triton_client.register_system_shared_memory(
+                "dummy_data", (self._shm_key_prefix + "dummy_data"), 8
+            )
+        except Exception as ex:
+            self.assertIn(
+                "shared memory region 'dummy_data' already in manager", str(ex)
+            )
+        shm_status = self._triton_client.get_system_shared_memory_status()
+        if self._protocol == "http":
+            self.assertEqual(len(shm_status), 1)
+        else:
+            self.assertEqual(len(shm_status.regions), 1)
+        self._triton_client.unregister_system_shared_memory()
+        shm.destroy_shared_memory_region(shm_op0_handle)
+        self._test_passed = True
+
+    @parameterized.expand([("grpc"), ("http")])
+    def test_unregister_after_inference(self, protocol):
+        # Unregister after inference
+        self._setUp(
+            protocol,
+            Path(os.getcwd())
+            / f"test_unregister_after_inference.{protocol}.server.log",
+        )
+        print(
+            f"*\n*\n*\nStarting Test:test_unregister_after_inference.{protocol}\n*\n*\n*\n"
+        )
+
+        error_msg = []
+        shm_handles = self._configure_server()
+        self._basic_inference(
+            shm_handles[0], shm_handles[1], shm_handles[2], shm_handles[3], error_msg
+        )
+        if len(error_msg) > 0:
+            raise Exception(str(error_msg))
+
+        self._triton_client.unregister_system_shared_memory("output0_data")
+        shm_status = self._triton_client.get_system_shared_memory_status()
+        if self._protocol == "http":
+            self.assertEqual(len(shm_status), 3)
+        else:
+            self.assertEqual(len(shm_status.regions), 3)
+        self._cleanup_server(shm_handles)
+        self._test_passed = True
+
+    @parameterized.expand([("grpc"), ("http")])
+    def test_register_after_inference(self, protocol):
+        # Register after inference
+        self._setUp(
+            protocol,
+            Path(os.getcwd()) / f"test_register_after_inference.{protocol}.server.log",
+        )
+        print(
+            f"*\n*\n*\nStarting Test:test_register_after_inference.{protocol}\n*\n*\n*\n"
+        )
+
+        error_msg = []
+        shm_handles = self._configure_server()
+
+        self._basic_inference(
+            shm_handles[0], shm_handles[1], shm_handles[2], shm_handles[3], error_msg
+        )
+        if len(error_msg) > 0:
+            raise Exception(str(error_msg))
+        shm_ip2_handle = shm.create_shared_memory_region(
+            "input2_data", (self._shm_key_prefix + "input2_data"), 64
+        )
+        self._triton_client.register_system_shared_memory(
+            "input2_data", (self._shm_key_prefix + "input2_data"), 64
+        )
+        shm_status = self._triton_client.get_system_shared_memory_status()
+        if self._protocol == "http":
+            self.assertEqual(len(shm_status), 5)
+        else:
+            self.assertEqual(len(shm_status.regions), 5)
+        shm_handles.append(shm_ip2_handle)
+        self._cleanup_server(shm_handles)
+        self._test_passed = True
+
+    # FIXME: Only works with graphdef models.
+    # @parameterized.expand([("grpc"),("http")])
+    # def test_too_big_shm(self, protocol):
+    #     # Shared memory input region larger than needed - Throws error
+    #     error_msg = []
+    #     shm_handles = self._configure_server()
+    #     shm_ip2_handle = shm.create_shared_memory_region(
+    #         "input2_data", (self._shm_key_prefix + "input2_data"), 128
+    #     )
+
+    #     self._triton_client.register_system_shared_memory("input2_data", (self._shm_key_prefix + "input2_data"), 128)
+    #     shm_status = self._triton_client.get_system_shared_memory_status()
+    #     self._basic_inference(
+    #         shm_handles[0],
+    #         shm_ip2_handle,
+    #         shm_handles[2],
+    #         shm_handles[3],
+    #         error_msg,
+    #         "input2_data",
+    #         128,
+    #     )
+    #     if len(error_msg) > 0:
+    #         self.assertTrue(
+    #             "unexpected total byte size 128 for input 'INPUT1', expecting 64"
+    #             in error_msg[-1]
+    #         )
+    #     shm_handles.append(shm_ip2_handle)
+    #     self._cleanup_server(shm_handles)
+
+    @parameterized.expand([("grpc"), ("http")])
+    def test_mixed_raw_shm(self, protocol):
+        # Mix of shared memory and RAW inputs
+        self._setUp(
+            protocol,
+            Path(os.getcwd()) / f"test_mixed_raw_shm.{protocol}.server.log",
+        )
+        print(f"*\n*\n*\nStarting Test:test_mixed_raw_shm.{protocol}\n*\n*\n*\n")
+
+        error_msg = []
+        shm_handles = self._configure_server()
+        input1_data = np.ones(shape=16, dtype=np.int32)
+        self._basic_inference(
+            shm_handles[0], [input1_data], shm_handles[2], shm_handles[3], error_msg
+        )
+        if len(error_msg) > 0:
+            raise Exception(error_msg[-1])
+        self._cleanup_server(shm_handles)
+        self._test_passed = True
+
+    @parameterized.expand([("grpc"), ("http")])
+    def test_unregisterall(self, protocol):
+        # Unregister all shared memory blocks
+        self._setUp(
+            protocol,
+            Path(os.getcwd()) / f"test_unregisterall.{protocol}.server.log",
+        )
+        print(f"*\n*\n*\nStarting Test:test_unregisterall.{protocol}\n*\n*\n*\n")
+
+        shm_handles = self._configure_server()
+
+        status_before = self._triton_client.get_system_shared_memory_status()
+        if self._protocol == "http":
+            self.assertEqual(len(status_before), 4)
+        else:
+            self.assertEqual(len(status_before.regions), 4)
+        self._triton_client.unregister_system_shared_memory()
+        status_after = self._triton_client.get_system_shared_memory_status()
+        if self._protocol == "http":
+            self.assertEqual(len(status_after), 0)
+        else:
+            self.assertEqual(len(status_after.regions), 0)
+        self._cleanup_server(shm_handles)
+        self._test_passed = True
+
+    @parameterized.expand([("grpc"), ("http")])
+    def test_infer_offset_out_of_bound(self, protocol):
+        # Shared memory offset outside output region - Throws error
+        self._setUp(
+            protocol,
+            Path(os.getcwd()) / f"test_infer_offset_out_of_bound.{protocol}.server.log",
+        )
+        print(
+            f"*\n*\n*\nStarting Test:test_infer_offset_out_of_bound.{protocol}\n*\n*\n*\n"
+        )
+
+        error_msg = []
+        shm_handles = self._configure_server()
+        if self._protocol == "http":
+            # -32 when placed in an int64 signed type, to get a negative offset
+            # by overflowing
+            offset = 2**64 - 32
+        else:
+            # gRPC will throw an error if > 2**63 - 1, so instead test for
+            # exceeding shm region size by 1 byte, given its size is 64 bytes
+            offset = 64
+        self._basic_inference(
+            shm_handles[0],
+            shm_handles[1],
+            shm_handles[2],
+            shm_handles[3],
+            error_msg,
+            shm_output_offset=offset,
+        )
+        self.assertEqual(len(error_msg), 1)
+        self.assertIn("Invalid offset for shared memory region", error_msg[0])
+        self._cleanup_server(shm_handles)
+        self._test_passed = True
+
+    @parameterized.expand([("grpc"), ("http")])
+    def test_register_out_of_bound(self, protocol):
+        self._setUp(
+            protocol,
+            Path(os.getcwd()) / f"test_register_out_of_bound.{protocol}.server.log",
+        )
+        print(
+            f"*\n*\n*\nStarting Test:test_register_out_of_bound.{protocol}\n*\n*\n*\n"
+        )
+        create_byte_size = 64
+
+        # Verify various edge cases of registered region size (offset+byte_size)
+        # don't go out of bounds of the actual created shm file object's size.
+        with self.assertRaisesRegex(
+            utils.InferenceServerException,
+            "failed to register shared memory region.*invalid args",
+        ):
+            self._configure_server(
+                create_byte_size=create_byte_size,
+                register_byte_size=create_byte_size + 1,
+                register_offset=0,
+            )
+
+        with self.assertRaisesRegex(
+            utils.InferenceServerException,
+            "failed to register shared memory region.*invalid args",
+        ):
+            self._configure_server(
+                create_byte_size=create_byte_size,
+                register_byte_size=create_byte_size,
+                register_offset=1,
+            )
+
+        with self.assertRaisesRegex(
+            utils.InferenceServerException,
+            "failed to register shared memory region.*invalid args",
+        ):
+            self._configure_server(
+                create_byte_size=create_byte_size,
+                register_byte_size=1,
+                register_offset=create_byte_size,
+            )
+
+        with self.assertRaisesRegex(
+            utils.InferenceServerException,
+            "failed to register shared memory region.*invalid args",
+        ):
+            self._configure_server(
+                create_byte_size=create_byte_size,
+                register_byte_size=0,
+                register_offset=create_byte_size + 1,
+            )
+        self._test_passed = True
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_shared_memory/test.sh b/qa/L0_shared_memory/test.sh
new file mode 100755
index 0000000000..84b5a8a857
--- /dev/null
+++ b/qa/L0_shared_memory/test.sh
@@ -0,0 +1,95 @@
+#!/bin/bash
+# Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+CLIENT_LOG="./client.log"
+SHM_TEST=shared_memory_test.py
+TEST_RESULT_FILE='test_results.txt'
+
+# Configure to support test on jetson as well
+TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
+SERVER=${TRITON_DIR}/bin/tritonserver
+BACKEND_DIR=${TRITON_DIR}/backends
+SERVER_ARGS_EXTRA="--backend-directory=${BACKEND_DIR}"
+source ../common/util.sh
+
+RET=0
+rm -fr *.log
+
+for i in \
+        test_invalid_create_shm \
+        test_valid_create_set_register \
+        test_unregister_before_register \
+        test_unregister_after_register \
+        test_reregister_after_register \
+        test_unregister_after_inference \
+        test_register_after_inference \
+        test_too_big_shm \
+        test_mixed_raw_shm \
+        test_unregisterall \
+        test_infer_offset_out_of_bound \
+        test_register_out_of_bound; do
+    for client_type in http grpc; do
+        SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1 ${SERVER_ARGS_EXTRA}"
+        SERVER_LOG="./$i.$client_type.server.log"
+        run_server
+        if [ "$SERVER_PID" == "0" ]; then
+            echo -e "\n***\n*** Failed to start $SERVER\n***"
+            cat $SERVER_LOG
+            exit 1
+        fi
+
+        export CLIENT_TYPE=$client_type
+        echo "Test: $i, client type: $client_type" >>$CLIENT_LOG
+
+        set +e
+        python3 $SHM_TEST SharedMemoryTest.$i >>$CLIENT_LOG 2>&1
+        if [ $? -ne 0 ]; then
+            echo -e "\n***\n*** Test Failed\n***"
+            RET=1
+        else
+            check_test_results $TEST_RESULT_FILE 1
+            if [ $? -ne 0 ]; then
+                cat $CLIENT_LOG
+                echo -e "\n***\n*** Test Result Verification Failed\n***"
+                RET=1
+            fi
+        fi
+        set -e
+
+        kill $SERVER_PID
+        wait $SERVER_PID
+    done
+done
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_simple_ensemble/ensemble_test.py b/qa/L0_simple_ensemble/ensemble_test.py
new file mode 100755
index 0000000000..0b064c13e8
--- /dev/null
+++ b/qa/L0_simple_ensemble/ensemble_test.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python3
+
+# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+sys.path.append("../clients")
+
+import logging
+import unittest
+
+import infer_util as iu
+import numpy as np
+import test_util as tu
+import tritonhttpclient
+
+
+class EnsembleTest(tu.TestResultCollector):
+    def _get_infer_count_per_version(self, model_name):
+        triton_client = tritonhttpclient.InferenceServerClient(
+            "localhost:8000", verbose=True
+        )
+        stats = triton_client.get_inference_statistics(model_name)
+        self.assertEqual(len(stats["model_stats"]), 2)
+        infer_count = [0, 0]
+        for model_stat in stats["model_stats"]:
+            self.assertEqual(
+                model_stat["name"], model_name, "expected stats for model " + model_name
+            )
+            model_version = model_stat["version"]
+            if model_version == "1":
+                infer_count[0] = model_stat["inference_stats"]["success"]["count"]
+            elif model_version == "2":
+                infer_count[1] = model_stat["inference_stats"]["success"]["count"]
+            else:
+                self.assertTrue(
+                    False,
+                    "unexpected version {} for model {}".format(
+                        model_version, model_name
+                    ),
+                )
+        return infer_count
+
+    def test_ensemble_add_sub(self):
+        for bs in (1, 8):
+            iu.infer_exact(
+                self, "ensemble_add_sub", (bs, 16), bs, np.int32, np.int32, np.int32
+            )
+
+        infer_count = self._get_infer_count_per_version("simple")
+        # The two 'simple' versions should have the same infer count
+        if infer_count[0] != infer_count[1]:
+            self.assertTrue(
+                False, "unexpeced different infer count for different 'simple' versions"
+            )
+
+    def test_ensemble_add_sub_one_output(self):
+        for bs in (1, 8):
+            iu.infer_exact(
+                self,
+                "ensemble_add_sub",
+                (bs, 16),
+                bs,
+                np.int32,
+                np.int32,
+                np.int32,
+                outputs=("OUTPUT0",),
+            )
+
+        infer_count = self._get_infer_count_per_version("simple")
+        # Only 'simple' version 2 should have non-zero infer count
+        # as it is in charge of producing OUTPUT0
+        if infer_count[0] != 0:
+            self.assertTrue(
+                False, "unexpeced non-zero infer count for 'simple' version 1"
+            )
+        elif infer_count[1] == 0:
+            self.assertTrue(False, "unexpeced zero infer count for 'simple' version 2")
+
+
+if __name__ == "__main__":
+    logging.basicConfig(stream=sys.stderr)
+    unittest.main()
diff --git a/qa/L0_simple_ensemble/models/ensemble_add_sub_int32_int32_int32/config.pbtxt b/qa/L0_simple_ensemble/models/ensemble_add_sub_int32_int32_int32/config.pbtxt
new file mode 100644
index 0000000000..1b04885428
--- /dev/null
+++ b/qa/L0_simple_ensemble/models/ensemble_add_sub_int32_int32_int32/config.pbtxt
@@ -0,0 +1,153 @@
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "ensemble_add_sub_int32_int32_int32"
+platform: "ensemble"
+max_batch_size: 8
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
+ensemble_scheduling {
+  step [
+    {
+      model_name: "simple"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT0"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "double_input0"
+      }
+    },
+    {
+      model_name: "simple"
+      model_version: 1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT1"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "double_input1"
+      }
+    },
+    {
+      model_name: "simple"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "double_input0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "input0_val"
+      }
+    },
+    {
+      model_name: "simple"
+      model_version: 1
+      input_map {
+        key: "INPUT0"
+        value: "double_input1"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT1"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "input1_val"
+      }
+    },
+    {
+      model_name: "simple"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "input0_val"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "OUTPUT0"
+      }
+    },
+    {
+      model_name: "simple"
+      model_version: 1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "input1_val"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "OUTPUT1"
+      }
+    }
+  ]
+}
diff --git a/qa/L0_simple_ensemble/models/simple/config.pbtxt b/qa/L0_simple_ensemble/models/simple/config.pbtxt
new file mode 100644
index 0000000000..7e7a178fc1
--- /dev/null
+++ b/qa/L0_simple_ensemble/models/simple/config.pbtxt
@@ -0,0 +1,59 @@
+# Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "simple"
+platform: "tensorflow_graphdef"
+max_batch_size: 8
+version_policy: { all {} }
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
+instance_group [
+  {
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/L0_simple_ensemble/test.sh b/qa/L0_simple_ensemble/test.sh
new file mode 100755
index 0000000000..705490dc3f
--- /dev/null
+++ b/qa/L0_simple_ensemble/test.sh
@@ -0,0 +1,107 @@
+#!/bin/bash
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+export CUDA_VISIBLE_DEVICES=0
+
+SIMPLE_TEST_PY=./ensemble_test.py
+
+CLIENT_LOG="./client.log"
+
+TEST_RESULT_FILE='test_results.txt'
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=`pwd`/models"
+SERVER_LOG="./inference_server.log"
+source ../common/util.sh
+
+# ensure ensemble model has version sub-directory
+mkdir -p `pwd`/models/ensemble_add_sub_int32_int32_int32/1
+
+rm -f $CLIENT_LOG $SERVER_LOG
+
+# Run ensemble model with all outputs requested
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+RET=0
+
+set +e
+python $SIMPLE_TEST_PY EnsembleTest.test_ensemble_add_sub >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+
+# Run ensemble model with only one output requested
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+RET=0
+
+set +e
+python $SIMPLE_TEST_PY EnsembleTest.test_ensemble_add_sub_one_output >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_simple_example/test.sh b/qa/L0_simple_example/test.sh
index 4be9be7e7b..d2d4f4b505 100755
--- a/qa/L0_simple_example/test.sh
+++ b/qa/L0_simple_example/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -25,17 +25,17 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-SIMPLE_CLIENT=../clients/simple_client
-SIMPLE_CLIENT_PY=../clients/simple_client.py
+export CUDA_VISIBLE_DEVICES=0
 
-CLIENT_LOG="./client.log"
+SIMPLE_CLIENT=../clients/simple_http_infer_client
+SIMPLE_CLIENT_PY=../clients/simple_http_infer_client.py
 
-SERVER=/opt/tensorrtserver/bin/trtserver
-SERVER_ARGS=--model-store=`pwd`/models
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=`pwd`/models"
 SERVER_LOG="./inference_server.log"
 source ../common/util.sh
 
-rm -f $CLIENT_LOG $SERVER_LOG
+rm -f *.log
 
 run_server
 if [ "$SERVER_PID" == "0" ]; then
@@ -46,23 +46,87 @@ fi
 
 RET=0
 
-$SIMPLE_CLIENT -v >>$CLIENT_LOG 2>&1
+set +e
+
+# Run with default host header...
+$SIMPLE_CLIENT -v >>client_c++.log 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+fi
+
+if [ `grep -c "localhost:8000" client_c++.log` != "2" ]; then
+    echo -e "\n***\n*** Failed. Expected 2 Host:localhost:8000 headers for C++ client\n***"
+    RET=1
+fi
+
+python $SIMPLE_CLIENT_PY -v >>client_py.log 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+fi
+
+if [ `grep -c "HTTPSocketPoolResponse status=200" client_py.log` != "3" ]; then
+    echo -e "\n***\n*** Failed. Expected 3 Host:HTTPSocketPoolResponse status=200 headers for Python client\n***"
+    RET=1
+fi
+
+# Run with custom host header...
+$SIMPLE_CLIENT -v -H"Host:my_host_" >>client_c++_host.log 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+fi
+
+if [ `grep -c my_host_ client_c++_host.log` != "1" ]; then
+    echo -e "\n***\n*** Failed. Expected 1 Host:my_host_ headers for C++ client\n***"
+    RET=1
+fi
+
+python $SIMPLE_CLIENT_PY -v -H"Host:my_host_" >>client_py_host.log 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+fi
+
+if [ `grep -c my_host_ client_py_host.log` != "3" ]; then
+    echo -e "\n***\n*** Failed. Expected 3 Host:my_host_ headers for Python client\n***"
+    RET=1
+fi
+
+# Run with multiple headers...
+$SIMPLE_CLIENT -v -H"abc:xyz" -H"123:456" >>client_c++_multi.log 2>&1
 if [ $? -ne 0 ]; then
     RET=1
 fi
 
-python $SIMPLE_CLIENT_PY -v >>$CLIENT_LOG 2>&1
+if [ `grep -c "abc: xyz" client_c++_multi.log` != "1" ]; then
+    echo -e "\n***\n*** Failed. Expected 1 abc:xyz headers for C++ client\n***"
+    RET=1
+fi
+if [ `grep -c "123: 456" client_c++_multi.log` != "1" ]; then
+    echo -e "\n***\n*** Failed. Expected 1 123:456 headers for C++ client\n***"
+    RET=1
+fi
+
+python $SIMPLE_CLIENT_PY -v -H"abc:xyz" -H"123:456" >>client_py_multi.log 2>&1
 if [ $? -ne 0 ]; then
     RET=1
 fi
 
+if [ `grep -c "'abc': 'xyz'" client_py_multi.log` != "3" ]; then
+    echo -e "\n***\n*** Failed. Expected 3 abc:xyz headers for Python client\n***"
+    RET=1
+fi
+if [ `grep -c "'123': '456'" client_py_multi.log` != "3" ]; then
+    echo -e "\n***\n*** Failed. Expected 3 123:456 headers for Python client\n***"
+    RET=1
+fi
+
+set -e
+
 kill $SERVER_PID
 wait $SERVER_PID
 
 if [ $RET -eq 0 ]; then
-  echo -e "\n***\n*** Test Passed\n***"
+    echo -e "\n***\n*** Test Passed\n***"
 else
-    cat $CLIENT_LOG
     echo -e "\n***\n*** Test FAILED\n***"
 fi
 
diff --git a/qa/L0_simple_go_client/test.sh b/qa/L0_simple_go_client/test.sh
new file mode 100755
index 0000000000..bfda2e9c60
--- /dev/null
+++ b/qa/L0_simple_go_client/test.sh
@@ -0,0 +1,93 @@
+#!/bin/bash
+# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+export CUDA_VISIBLE_DEVICES=0
+
+TRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION:="http://github.com/triton-inference-server"}
+TRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG:="main"}
+
+GO_CLIENT_DIR=client/src/grpc_generated/go
+SIMPLE_GO_CLIENT=${GO_CLIENT_DIR}/grpc_simple_client.go
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS=--model-repository=`pwd`/models
+SERVER_LOG="./inference_server.log"
+source ../common/util.sh
+
+rm -f *.log
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+RET=0
+
+# Generate Go stubs.
+rm -fr client common
+git clone ${TRITON_REPO_ORGANIZATION}/client.git
+go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
+
+pushd ${GO_CLIENT_DIR}
+git clone --single-branch --depth=1 -b $TRITON_COMMON_REPO_TAG \
+    ${TRITON_REPO_ORGANIZATION}/common.git
+bash gen_go_stubs.sh
+popd
+
+# Copy packages to GOPATH, where Go expects to find packages.
+PACKAGE_PATH="${GOPATH}/src/github.com/triton-inference-server"
+rm -rf ${PACKAGE_PATH}/client
+mkdir -p ${PACKAGE_PATH}
+cp -r client $PACKAGE_PATH
+
+set +e
+
+# Run test for GRPC variant of go client
+GO111MODULE=off go run $SIMPLE_GO_CLIENT >>client.log 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+fi
+
+if [ `grep -c "Checking Inference Outputs" client.log` != "1" ]; then
+    echo -e "\n***\n*** Failed. Unable to run inference.\n***"
+    RET=1
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_simple_lib/test.sh b/qa/L0_simple_lib/test.sh
new file mode 100755
index 0000000000..7045f512ef
--- /dev/null
+++ b/qa/L0_simple_lib/test.sh
@@ -0,0 +1,154 @@
+#!/bin/bash
+# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+MODELSDIR=`pwd`/models
+DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_model_repository
+
+export CUDA_VISIBLE_DEVICES=0
+
+# Must explicitly set LD_LIBRARY_PATH so that clients can find
+# libtritonserver.so.
+LD_LIBRARY_PATH=/opt/tritonserver/lib:$LD_LIBRARY_PATH
+
+rm -f *.log
+
+RET=0
+
+for SIMPLE_CLIENT in simple ; do
+    CLIENT_LOG=$SIMPLE_CLIENT
+    SIMPLE_CLIENT=./$SIMPLE_CLIENT
+
+    for trial in graphdef savedmodel onnx libtorch plan; do
+        full=${trial}_float32_float32_float32
+        rm -rf $MODELSDIR
+        mkdir -p $MODELSDIR/simple/1 && \
+            cp -r $DATADIR/${full}/1/* $MODELSDIR/simple/1/. && \
+            cp $DATADIR/${full}/config.pbtxt $MODELSDIR/simple/. && \
+            (cd $MODELSDIR/simple && \
+                    sed -i "s/^name:.*/name: \"simple\"/" config.pbtxt && \
+                    sed -i "s/label_filename:.*//" config.pbtxt)
+
+        set +e
+
+        # No memory type enforcement
+        $SIMPLE_CLIENT -r $MODELSDIR >>$CLIENT_LOG.$full.log 2>&1
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG.$full.log
+            echo -e "\n***\n*** Test Failed\n***"
+            RET=1
+        fi
+
+        # Enforce I/O to be in specific memory type
+        for MEM_TYPE in system pinned gpu ; do
+            $SIMPLE_CLIENT -r $MODELSDIR -m $MEM_TYPE >>$CLIENT_LOG.$full.$MEM_TYPE.log 2>&1
+            if [ $? -ne 0 ]; then
+                cat $CLIENT_LOG.$full.$MEM_TYPE.log
+                echo -e "\n***\n*** Test Failed\n***"
+                RET=1
+            fi
+        done
+
+        set -e
+    done
+
+    # Use savedmodel for addsub ensemble
+    mkdir -p $MODELSDIR/simple/1
+    cp -r $DATADIR/savedmodel_float32_float32_float32/1/* $MODELSDIR/simple/1/.
+    cp $DATADIR/savedmodel_float32_float32_float32/config.pbtxt $MODELSDIR/simple/.
+    (cd $MODELSDIR/simple && \
+            sed -i "s/^name:.*/name: \"simple\"/" config.pbtxt && \
+            sed -i "s/label_filename:.*//" config.pbtxt)
+
+    # set up "addsub" ensemble
+    ENSEMBLEDIR=$DATADIR/../qa_ensemble_model_repository/qa_model_repository/
+    rm -rf $MODELSDIR
+    mkdir -p $MODELSDIR/simple/1 && \
+        cp $ENSEMBLEDIR/fan_plan_float32_float32_float32/config.pbtxt $MODELSDIR/simple/. && \
+        (cd $MODELSDIR/simple && \
+                sed -i "s/^name:.*/name: \"simple\"/" config.pbtxt && \
+                sed -i "s/label_filename:.*//" config.pbtxt)
+
+    cp -r $ENSEMBLEDIR/nop_TYPE_FP32_-1 $MODELSDIR/. && \
+        mkdir -p $MODELSDIR/nop_TYPE_FP32_-1/1
+
+    cp -r $DATADIR/plan_float32_float32_float32 $MODELSDIR/. && \
+        # make sure version 1 is used (no swap)
+        rm -r $MODELSDIR/plan_float32_float32_float32/2 && \
+        rm -r $MODELSDIR/plan_float32_float32_float32/3
+    full=ensemble
+
+    set +e
+
+    # No memory type enforcement
+    $SIMPLE_CLIENT -r $MODELSDIR >>$CLIENT_LOG.$full.log 2>&1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG.$full.log
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+
+    # Enforce I/O to be in specific memory type
+    for MEM_TYPE in system pinned gpu ; do
+        $SIMPLE_CLIENT -r $MODELSDIR -m $MEM_TYPE >>$CLIENT_LOG.$full.$MEM_TYPE.log 2>&1
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG.$full.$MEM_TYPE.log
+            echo -e "\n***\n*** Test Failed\n***"
+            RET=1
+        fi
+    done
+
+    # For GPU input / output case, all ensemble allocation should be on GPU
+    if grep ^I[0-9][0-9][0-9][0-9].*"Internal response".*"memory type 0" $CLIENT_LOG.$full.gpu.log; then
+        echo -e "\n*** FAILED: unexpected CPU allocation for ensemble" >> $CLIENT_LOG.$full.gpu.log
+        cat $CLIENT_LOG.$full.gpu.log
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+
+    set -e
+done
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_simple_nodejs_client/test.sh b/qa/L0_simple_nodejs_client/test.sh
new file mode 100755
index 0000000000..871c793bf9
--- /dev/null
+++ b/qa/L0_simple_nodejs_client/test.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+export CUDA_VISIBLE_DEVICES=0
+
+TRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION:="http://github.com/triton-inference-server"}
+TRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG:="main"}
+
+SIMPLE_NODEJS_CLIENT=client.js
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS=--model-repository=`pwd`/models
+SERVER_LOG="./inference_server.log"
+source ../common/util.sh
+
+rm -f *.log
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+RET=0
+
+# Get the proto files from the common repo
+rm -fr common
+git clone --single-branch --depth=1 -b $TRITON_COMMON_REPO_TAG \
+    ${TRITON_REPO_ORGANIZATION}/common.git
+mkdir proto && cp common/protobuf/*.proto proto/.
+
+npm install
+
+set +e
+
+# Runs test for GRPC variant of nodejs client
+node $SIMPLE_NODEJS_CLIENT >> client.log 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+fi
+
+if [ `grep -c "Checking Inference Output" client.log` != "1" ]; then
+    echo -e "\n***\n*** Failed. Unable to run inference.\n***"
+    RET=1
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_socket/models/simple/config.pbtxt b/qa/L0_socket/models/simple/config.pbtxt
new file mode 100644
index 0000000000..838edd5d55
--- /dev/null
+++ b/qa/L0_socket/models/simple/config.pbtxt
@@ -0,0 +1,53 @@
+# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "simple"
+platform: "tensorflow_graphdef"
+max_batch_size: 8
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
diff --git a/qa/L0_socket/test.sh b/qa/L0_socket/test.sh
new file mode 100755
index 0000000000..2fd37bd054
--- /dev/null
+++ b/qa/L0_socket/test.sh
@@ -0,0 +1,416 @@
+#!/bin/bash
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+export CUDA_VISIBLE_DEVICES=0
+
+CLIENT_LOG="./client.log"
+SERVER_LOG="./inference_server.log"
+
+DATADIR=`pwd`/models
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_TIMEOUT=15
+source ../common/util.sh
+
+rm -f *.log
+
+RET=0
+
+# CUSTOM CASES
+for address in default explicit; do
+    if [ "$address" == "default" ]; then
+        # without specifying address, will use "0.0.0.0" as default
+        SAME_EXPLICIT_ADDRESS=""
+        DIFF_EXPLICIT_ADDRESS_ARGS=""
+    else
+        SAME_EXPLICIT_ADDRESS="--http-address 127.0.0.1 --grpc-address 127.0.0.1 --metrics-address 127.0.0.1"
+        DIFF_EXPLICIT_ADDRESS="--http-address 127.0.0.1 --grpc-address 127.0.0.2 --metrics-address 127.0.0.3"
+    fi
+
+    for p in http grpc; do
+        if [ "$address" == "default" ]; then
+            # allow illegal http/grpc port if disabled
+            SERVER_ARGS="--model-repository=$DATADIR --${p}-port -47 --allow-${p} 0"
+        else
+            # allow illegal http/grpc address if disabled
+            SERVER_ARGS="--model-repository=$DATADIR --${p}-address -47 --allow-${p} 0"
+        fi
+        run_server_nowait
+        sleep 15
+        if [ "$SERVER_PID" == "0" ]; then
+            echo -e "\n***\n*** Failed to start $SERVER\n***"
+            cat $SERVER_LOG
+            exit 1
+        fi
+        kill $SERVER_PID
+        wait $SERVER_PID
+
+        # allow http/grpc port overlap with grpc/http default if disabled
+        if [ "$p" == "http" ]; then
+            SERVER_ARGS="--model-repository=$DATADIR $SAME_EXPLICIT_ADDRESS --http-port 8001 --allow-http 0"
+            run_server_nowait
+            sleep 15
+        else
+            SERVER_ARGS="--model-repository=$DATADIR $SAME_EXPLICIT_ADDRESS --grpc-port 8000 --allow-grpc 0"
+            run_server
+        fi
+        if [ "$SERVER_PID" == "0" ]; then
+            echo -e "\n***\n*** Failed to start $SERVER\n***"
+            cat $SERVER_LOG
+            exit 1
+        fi
+        kill $SERVER_PID
+        wait $SERVER_PID
+
+        # error if http/grpc port overlaps with grpc/http default port
+        if [ "$p" == "http" ]; then
+            SERVER_ARGS="--model-repository=$DATADIR $SAME_EXPLICIT_ADDRESS --http-port 8001"
+        else
+            SERVER_ARGS="--model-repository=$DATADIR $SAME_EXPLICIT_ADDRESS --grpc-port 8000"
+        fi
+        run_server
+        if [ "$SERVER_PID" != "0" ]; then
+            set +e
+            kill $SERVER_PID
+            wait $SERVER_PID
+            if [ "$?" == "0" ]; then
+                echo -e "\n***\n*** unexpected start $SERVER\n***"
+                cat $SERVER_LOG
+                exit 1
+            fi
+            set -e
+        fi
+
+        # when using different addresses, allow http/grpc port overlap with grpc/http default port
+        if [ "$address" == "explicit" ]; then
+            if [ "$p" == "http" ]; then
+                SERVER_ARGS="--model-repository=$DATADIR $DIFF_EXPLICIT_ADDRESS --http-port 8001"
+            else
+                SERVER_ARGS="--model-repository=$DATADIR $DIFF_EXPLICIT_ADDRESS --grpc-port 8000"
+            fi
+            run_server_nowait
+            sleep 15
+            if [ "$SERVER_PID" == "0" ]; then
+                echo -e "\n***\n*** Failed to start $SERVER\n***"
+                cat $SERVER_LOG
+                exit 1
+            fi
+            kill $SERVER_PID
+            wait $SERVER_PID
+        fi
+
+        # allow http/grpc port overlap with grpc/http explicit if disabled
+        if [ "$p" == "http" ]; then
+            SERVER_ARGS="--model-repository=$DATADIR $SAME_EXPLICIT_ADDRESS --http-port 8007 --grpc-port 8007 --allow-http 0"
+        else
+            SERVER_ARGS="--model-repository=$DATADIR $SAME_EXPLICIT_ADDRESS --grpc-port 8007 --http-port 8007 --allow-grpc 0"
+        fi
+        run_server_nowait
+        sleep 15
+        if [ "$SERVER_PID" == "0" ]; then
+            echo -e "\n***\n*** Failed to start $SERVER\n***"
+            cat $SERVER_LOG
+            exit 1
+        fi
+        kill $SERVER_PID
+        wait $SERVER_PID
+
+        # error if http/grpc port overlaps with grpc/http explicit port
+        if [ "$p" == "http" ]; then
+            SERVER_ARGS="--model-repository=$DATADIR $SAME_EXPLICIT_ADDRESS --http-port 8003 --grpc-port 8003"
+            run_server_nowait
+            sleep 15
+            if [ "$SERVER_PID" != "0" ]; then
+                set +e
+                kill $SERVER_PID
+                wait $SERVER_PID
+                if [ "$?" == "0" ]; then
+                    echo -e "\n***\n*** unexpected start $SERVER\n***"
+                    cat $SERVER_LOG
+                    exit 1
+                fi
+                set -e
+            fi
+        else
+            # skip, same as http case
+            true
+        fi
+
+        # when using different addresses, allow http/grpc port overlap with grpc/http explicit
+        if [ "$address" == "explicit" ]; then
+            if [ "$p" == "http" ]; then
+                SERVER_ARGS="--model-repository=$DATADIR $DIFF_EXPLICIT_ADDRESS --http-port 8007 --grpc-port 8007"
+            else
+                SERVER_ARGS="--model-repository=$DATADIR $DIFF_EXPLICIT_ADDRESS --grpc-port 8007 --http-port 8007"
+            fi
+            run_server_nowait
+            sleep 15
+            if [ "$SERVER_PID" == "0" ]; then
+                echo -e "\n***\n*** Failed to start $SERVER\n***"
+                cat $SERVER_LOG
+                exit 1
+            fi
+            code=`curl -s -w %{http_code} 127.0.0.1:8007/v2/health/ready`
+            if [ "$code" != "200" ]; then
+                echo -e "\n***\n*** Server is not ready\n***"
+                RET=1
+            fi
+            kill $SERVER_PID
+            wait $SERVER_PID
+        fi
+
+        # allow http/grpc port overlap with metrics default port if disabled
+        if [ "$p" == "http" ]; then
+            SERVER_ARGS="--model-repository=$DATADIR $SAME_EXPLICIT_ADDRESS --http-port 8002 --allow-http 0"
+            run_server_nowait
+            sleep 15
+        else
+            SERVER_ARGS="--model-repository=$DATADIR $SAME_EXPLICIT_ADDRESS --grpc-port 8002 --allow-grpc 0"
+            run_server
+        fi
+        if [ "$SERVER_PID" == "0" ]; then
+            echo -e "\n***\n*** Failed to start $SERVER\n***"
+            cat $SERVER_LOG
+            exit 1
+        fi
+        kill $SERVER_PID
+        wait $SERVER_PID
+
+        # error if http/grpc port overlaps with metrics default port
+        if [ "$p" == "http" ]; then
+            SERVER_ARGS="--model-repository=$DATADIR $SAME_EXPLICIT_ADDRESS --http-port 8002"
+        else
+            SERVER_ARGS="--model-repository=$DATADIR $SAME_EXPLICIT_ADDRESS --grpc-port 8002"
+        fi
+        run_server
+        if [ "$SERVER_PID" != "0" ]; then
+            set +e
+            kill $SERVER_PID
+            wait $SERVER_PID
+            if [ "$?" == "0" ]; then
+                echo -e "\n***\n*** unexpected start $SERVER\n***"
+                cat $SERVER_LOG
+                exit 1
+            fi
+            set -e
+        fi
+
+        # when using different addresses, allow grpc port overlap with metrics default port
+        if [ "$address" == "explicit" ]; then
+            if [ "$p" == "grpc" ]; then
+                SERVER_ARGS="--model-repository=$DATADIR $DIFF_EXPLICIT_ADDRESS --grpc-port 8002"
+                run_server_nowait
+                sleep 15
+                if [ "$SERVER_PID" == "0" ]; then
+                    echo -e "\n***\n*** Failed to start $SERVER\n***"
+                    cat $SERVER_LOG
+                    exit 1
+                fi
+                code=`curl -s -w %{http_code} 127.0.0.1:8000/v2/health/ready`
+                if [ "$code" != "200" ]; then
+                    echo -e "\n***\n*** Server is not ready\n***"
+                    RET=1
+                fi
+                kill $SERVER_PID
+                wait $SERVER_PID
+            else
+                # http and metrics server bind to the same address, should skip this test case.
+                true
+            fi
+        fi
+
+        # allow metrics port overlap with http/grpc default port if disabled
+        if [ "$p" == "http" ]; then
+            SERVER_ARGS="--model-repository=$DATADIR $SAME_EXPLICIT_ADDRESS --metrics-port 8000 --allow-metrics 0"
+        else
+            SERVER_ARGS="--model-repository=$DATADIR $SAME_EXPLICIT_ADDRESS --metrics-port 8001 --allow-metrics 0"
+        fi
+        run_server
+        if [ "$SERVER_PID" == "0" ]; then
+            echo -e "\n***\n*** Failed to start $SERVER\n***"
+            cat $SERVER_LOG
+            exit 1
+        fi
+        kill $SERVER_PID
+        wait $SERVER_PID
+
+        # error if metrics port overlaps with http/grpc default port
+        if [ "$p" == "http" ]; then
+            SERVER_ARGS="--model-repository=$DATADIR $SAME_EXPLICIT_ADDRESS --metrics-port 8000"
+        else
+            SERVER_ARGS="--model-repository=$DATADIR $SAME_EXPLICIT_ADDRESS --metrics-port 8001"
+        fi
+        run_server
+        if [ "$SERVER_PID" != "0" ]; then
+            set +e
+            kill $SERVER_PID
+            wait $SERVER_PID
+            if [ "$?" == "0" ]; then
+                echo -e "\n***\n*** unexpected start $SERVER\n***"
+                cat $SERVER_LOG
+                exit 1
+            fi
+            set -e
+        fi
+
+        # when using different addresses, allow metrics port overlap with grpc default port
+        if [ "$address" == "explicit" ]; then
+            if [ "$p" == "grpc" ]; then
+                SERVER_ARGS="--model-repository=$DATADIR $DIFF_EXPLICIT_ADDRESS --metrics-port 8001"
+                run_server_nowait
+                sleep 15
+                if [ "$SERVER_PID" == "0" ]; then
+                    echo -e "\n***\n*** Failed to start $SERVER\n***"
+                    cat $SERVER_LOG
+                    exit 1
+                fi
+                code=`curl -s -w %{http_code} 127.0.0.1:8000/v2/health/ready`
+                if [ "$code" != "200" ]; then
+                    echo -e "\n***\n*** Server is not ready\n***"
+                    RET=1
+                fi
+                kill $SERVER_PID
+                wait $SERVER_PID
+            else
+                # http and metrics server bind to the same address, should skip this test case.
+                true
+            fi
+        fi
+    done
+done
+
+# Test multiple servers binding to the same http/grpc port
+SERVER0_LOG="./inference_server0.log"
+SERVER1_LOG="./inference_server1.log"
+SERVER2_LOG="./inference_server2.log"
+
+for p in http grpc; do
+    # error if servers bind to the same http/grpc port without setting the reuse flag
+    if [ "$p" == "http" ]; then
+        SERVER_ARGS="--model-repository=$DATADIR --metrics-port 8002 --reuse-grpc-port=true"
+        SERVER0_ARGS="--model-repository=$DATADIR --metrics-port 8003 --reuse-grpc-port=true"
+        SERVER1_ARGS="--model-repository=$DATADIR --metrics-port 8004 --reuse-grpc-port=true"
+    else
+        SERVER_ARGS="--model-repository=$DATADIR --metrics-port 8002 --reuse-http-port=true"
+        SERVER0_ARGS="--model-repository=$DATADIR --metrics-port 8003 --reuse-http-port=true"
+        SERVER1_ARGS="--model-repository=$DATADIR --metrics-port 8004 --reuse-http-port=true"
+    fi
+    # make sure the first server is launched successfully, then run the other
+    # two servers and expect them to fail
+    run_server
+    run_multiple_servers_nowait 2
+    sleep 15
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start SERVER $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+    if [ "$SERVER1_PID" != "0" ]; then
+        set +e
+        kill $SERVER0_PID
+        wait $SERVER0_PID
+        if [ "$?" == "0" ]; then
+            echo -e "\n***\n*** unexpected start SERVER0 $SERVER\n***"
+            cat $SERVER0_LOG
+            exit 1
+        fi
+        set -e
+    fi
+    if [ "$SERVER1_PID" != "0" ]; then
+        set +e
+        kill $SERVER1_PID
+        wait $SERVER1_PID
+        if [ "$?" == "0" ]; then
+            echo -e "\n***\n*** unexpected start SERVER1 $SERVER\n***"
+            cat $SERVER1_LOG
+            exit 1
+        fi
+        set -e
+    fi
+    kill_server
+
+    # 1. Allow multiple servers bind to the same http/grpc port with setting the reuse flag
+    # 2. Test different forms of setting --metrics-address and verify metrics are queryable
+    #   (a) Test default metrics-address being same as http-address
+    #   (b) Test setting metrics-address explicitly to 0.0.0.0
+    #   (c) Test setting metrics-address explicitly to 127.0.0.1
+    SERVER0_ARGS="--model-repository=$DATADIR --metrics-port 8002 --reuse-http-port=true --reuse-grpc-port=true"
+    SERVER1_ARGS="--model-repository=$DATADIR --metrics-address 0.0.0.0 --metrics-port 8003 --reuse-http-port=true --reuse-grpc-port=true"
+    SERVER2_ARGS="--model-repository=$DATADIR --metrics-address 127.0.0.2 --metrics-port 8004 --reuse-http-port=true --reuse-grpc-port=true"
+    run_multiple_servers_nowait 3
+    sleep 15
+    if [ "$SERVER0_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start SERVER0 $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+    if [ "$SERVER1_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start SERVER1 $SERVER\n***"
+        cat $SERVER1_LOG
+        exit 1
+    fi
+    if [ "$SERVER2_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start SERVER2 $SERVER\n***"
+        cat $SERVER2_LOG
+        exit 1
+    fi
+
+    set +e
+
+    # test if requests are being distributed among three servers
+    if [ "$p" == "http" ]; then
+        CLIENT_PY=../clients/simple_http_infer_client.py
+    else
+        CLIENT_PY=../clients/simple_grpc_infer_client.py
+    fi
+
+    pids=()
+    for i in {0..10}; do
+        python3 $CLIENT_PY >> $CLIENT_LOG 2>&1 &
+        pids+=" $!"
+    done
+    wait $pids || { echo -e "\n***\n*** Python ${p} Async Infer Test Failed\n***"; cat $CLIENT_LOG; RET=1; }
+
+    set -e
+
+    server0_request_count=`curl -s localhost:8002/metrics | awk '/nv_inference_request_success{/ {print $2}'`
+    server1_request_count=`curl -s localhost:8003/metrics | awk '/nv_inference_request_success{/ {print $2}'`
+    server2_request_count=`curl -s 127.0.0.2:8004/metrics | awk '/nv_inference_request_success{/ {print $2}'`
+    if [ ${server0_request_count%.*} -eq 0 ] || \
+       [ ${server1_request_count%.*} -eq 0 ] || \
+       [ ${server2_request_count%.*} -eq 0 ]; then
+        echo -e "\n***\n*** Failed: ${p} requests are not distributed among all servers.\n***"
+        RET=1
+    fi
+    kill_servers
+done
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+else
+  echo -e "\n***\n*** Test Failed\n***"
+fi
+exit $RET
diff --git a/qa/L0_storage_S3/test.sh b/qa/L0_storage_S3/test.sh
new file mode 100755
index 0000000000..f16dc81e83
--- /dev/null
+++ b/qa/L0_storage_S3/test.sh
@@ -0,0 +1,529 @@
+#!/bin/bash
+# Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+TEST_RESULT_FILE='test_results.txt'
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+CLIENT_LOG_BASE="./client"
+INFER_TEST="../common/infer_test.py"
+EXPECTED_NUM_TESTS="3"
+TEST_RESULT_FILE='test_results.txt'
+
+# S3 credentials are necessary for this test. Pass via ENV variables
+aws configure set default.region $AWS_DEFAULT_REGION && \
+    aws configure set aws_access_key_id $AWS_ACCESS_KEY_ID && \
+    aws configure set aws_secret_access_key $AWS_SECRET_ACCESS_KEY
+
+# S3 bucket path (Point to bucket when testing cloud storage)
+BUCKET_URL="s3://triton-bucket-${CI_JOB_ID}"
+
+# Cleanup and delete S3 test bucket if it already exists (due to test failure)
+aws s3 rm $BUCKET_URL --recursive --include "*" && \
+    aws s3 rb $BUCKET_URL || true
+
+# Make S3 test bucket
+aws s3 mb "${BUCKET_URL}"
+
+# Remove Slash in BUCKET_URL
+BUCKET_URL=${BUCKET_URL%/}
+BUCKET_URL_SLASH="${BUCKET_URL}/"
+
+# Backup S3 credentials as they will be unset during the test
+AWS_DEFAULT_REGION_BACKUP=$AWS_DEFAULT_REGION
+AWS_ACCESS_KEY_ID_BACKUP=$AWS_ACCESS_KEY_ID
+AWS_SECRET_ACCESS_KEY_BACKUP=$AWS_SECRET_ACCESS_KEY
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_TIMEOUT=600
+
+SERVER_LOG_BASE="./inference_server"
+source ../common/util.sh
+
+rm -f $SERVER_LOG_BASE* $CLIENT_LOG_BASE*
+RET=0
+
+# Test 3 Scenarios:
+# 1. Only AWS ENV vars (Without aws configure)
+# 2. AWS ENV vars + dummy values in aws configure [ENV vars have higher priority]
+# 3. Only AWS configured (Without AWS ENV vars)
+for ENV_VAR in "env" "env_dummy" "config"; do
+    SERVER_LOG=$SERVER_LOG_BASE.$ENV_VAR.log
+    CLIENT_LOG=$CLIENT_LOG_BASE.$ENV_VAR.log
+
+    if [ "$ENV_VAR" == "config" ]; then
+        unset AWS_ACCESS_KEY_ID
+        unset AWS_SECRET_ACCESS_KEY
+        unset AWS_DEFAULT_REGION
+    elif [ "$ENV_VAR" == "env_dummy" ]; then
+        aws configure set default.region "dummy_region" && \
+            aws configure set aws_access_key_id "dummy_id" && \
+            aws configure set aws_secret_access_key "dummy_key"
+    else
+        rm ~/.aws/credentials && rm ~/.aws/config
+    fi
+
+    # Construct model repository
+
+    KIND="KIND_GPU"
+
+    # Test coverage for extra slashes
+    for MAYBE_SLASH in "" "/" "//"; do
+
+        ROOT_REPO="$BUCKET_URL$MAYBE_SLASH"
+        MODEL_REPO="${BUCKET_URL}/${MAYBE_SLASH}models${MAYBE_SLASH}"
+
+        # copy models in model directory
+        rm -rf models && mkdir -p models
+
+        # perform empty repo tests
+
+        SERVER_ARGS="--model-repository=$ROOT_REPO --exit-timeout-secs=120"
+
+        run_server
+        if [ "$SERVER_PID" == "0" ]; then
+            echo -e "\n***\n*** Failed to start $SERVER\n***"
+            cat $SERVER_LOG
+            exit 1
+        fi
+
+        kill $SERVER_PID
+        wait $SERVER_PID
+
+        # run with a non-root empty model repo
+        touch models/dummy
+        if [ "$ENV_VAR" != "config" ]; then
+            aws configure set default.region $AWS_DEFAULT_REGION && \
+                aws configure set aws_access_key_id $AWS_ACCESS_KEY_ID && \
+                aws configure set aws_secret_access_key $AWS_SECRET_ACCESS_KEY
+        fi
+        aws s3 cp . "$BUCKET_URL_SLASH" --recursive --include "*"
+        if [ "$ENV_VAR" == "env_dummy" ]; then
+            aws configure set default.region "dummy_region" && \
+                aws configure set aws_access_key_id "dummy_id" && \
+                aws configure set aws_secret_access_key "dummy_key"
+        elif [ "$ENV_VAR" == "env" ]; then
+            rm ~/.aws/credentials && rm ~/.aws/config
+        fi
+
+        SERVER_ARGS="--model-repository=$MODEL_REPO --exit-timeout-secs=120"
+
+        run_server
+        if [ "$SERVER_PID" == "0" ]; then
+            echo -e "\n***\n*** Failed to start $SERVER\n***"
+            cat $SERVER_LOG
+            exit 1
+        fi
+
+        kill $SERVER_PID
+        wait $SERVER_PID
+
+        if [ "$ENV_VAR" != "config" ]; then
+            aws configure set default.region $AWS_DEFAULT_REGION && \
+                aws configure set aws_access_key_id $AWS_ACCESS_KEY_ID && \
+                aws configure set aws_secret_access_key $AWS_SECRET_ACCESS_KEY
+        fi
+        aws s3 rm "${BUCKET_URL_SLASH}" --recursive --include "*"
+        rm models/dummy
+
+        # Now start model tests
+
+        for FW in graphdef savedmodel onnx libtorch plan; do
+            cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/${FW}_float32_float32_float32/ models/
+        done
+
+        # Copy models with string inputs and remove nobatch (bs=1) models
+        cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/*_object_object_object/ models/
+        rm -rf models/*nobatch*
+
+        for FW in graphdef savedmodel onnx libtorch plan; do
+            for MC in `ls models/${FW}*/config.pbtxt`; do
+                echo "instance_group [ { kind: ${KIND} }]" >> $MC
+            done
+        done
+
+        # now traverse the tree and create empty version directories that the CLI skips
+        for dir in `ls models/`; do
+            for subdir in `ls models/$dir`; do
+                if [ -d models/$dir/$subdir ] && [ -z "$(ls models/$dir/$subdir)" ]; then
+                    touch models/$dir/$subdir/$subdir
+                fi
+            done
+        done
+
+        # Perform test with model repository variants
+        for src in "models/" "."  ; do
+
+            # copy contents of /models into S3 bucket.
+            aws s3 cp $src $BUCKET_URL_SLASH --recursive --include "*"
+            if [ "$ENV_VAR" == "env_dummy" ]; then
+                aws configure set default.region "dummy_region" && \
+                    aws configure set aws_access_key_id "dummy_id" && \
+                    aws configure set aws_secret_access_key "dummy_key"
+            elif [ "$ENV_VAR" == "env" ]; then
+                rm ~/.aws/credentials && rm ~/.aws/config
+            fi
+
+            if [ "$src" == "." ]; then
+                # set server arguments
+                SERVER_ARGS="--model-repository=$MODEL_REPO --exit-timeout-secs=120"
+            else
+                # set server arguments
+                SERVER_ARGS="--model-repository=$ROOT_REPO --exit-timeout-secs=120"
+            fi
+
+            run_server
+            if [ "$SERVER_PID" == "0" ]; then
+                echo -e "\n***\n*** Failed to start $SERVER\n***"
+                cat $SERVER_LOG
+                exit 1
+            fi
+
+            set +e
+
+            python $INFER_TEST >$CLIENT_LOG 2>&1
+            if [ $? -ne 0 ]; then
+                cat $CLIENT_LOG
+                echo -e "\n***\n*** Test Failed\n***"
+                RET=1
+            else
+                check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+                if [ $? -ne 0 ]; then
+                    cat $CLIENT_LOG
+                    echo -e "\n***\n*** Test Result Verification Failed\n***"
+                    RET=1
+                fi
+            fi
+
+            set -e
+
+            kill $SERVER_PID
+            wait $SERVER_PID
+
+            # Clean up bucket
+            if [ "$ENV_VAR" != "config" ]; then
+                aws configure set default.region $AWS_DEFAULT_REGION && \
+                    aws configure set aws_access_key_id $AWS_ACCESS_KEY_ID && \
+                    aws configure set aws_secret_access_key $AWS_SECRET_ACCESS_KEY
+            fi
+            aws s3 rm "${BUCKET_URL_SLASH}" --recursive --include "*"
+        done
+    done
+done
+
+# Restore S3 credentials
+rm ~/.aws/credentials && rm ~/.aws/config
+export AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION_BACKUP
+export AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID_BACKUP
+export AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY_BACKUP
+aws configure set default.region $AWS_DEFAULT_REGION && \
+    aws configure set aws_access_key_id $AWS_ACCESS_KEY_ID && \
+    aws configure set aws_secret_access_key $AWS_SECRET_ACCESS_KEY
+
+# Test with polling enabled
+SERVER_ARGS="--model-repository=$ROOT_REPO --exit-timeout-secs=120 --model-control-mode=poll"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+# copy contents of /models into S3 bucket and wait for them to be loaded.
+aws s3 cp models/ "${BUCKET_URL_SLASH}" --recursive --include "*"
+sleep 600
+
+set +e
+
+python $INFER_TEST >$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Test localization to a specified location
+export TRITON_AWS_MOUNT_DIRECTORY=`pwd`/aws_localization_test
+
+if [ -d "$TRITON_AWS_MOUNT_DIRECTORY" ]; then
+  rm -rf $TRITON_AWS_MOUNT_DIRECTORY
+fi
+
+mkdir -p $TRITON_AWS_MOUNT_DIRECTORY
+
+SERVER_LOG=$SERVER_LOG_BASE.custom_localization.log
+SERVER_ARGS="--model-repository=$ROOT_REPO --exit-timeout-secs=120"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+if [ -z "$(ls -A $TRITON_AWS_MOUNT_DIRECTORY)" ]; then
+    echo -e "\n***\n*** Test localization to a specified location failed. \n***"
+    echo -e "\n***\n*** Specified mount folder $TRITON_AWS_MOUNT_DIRECTORY is empty \n***"
+    ls -A $TRITON_AWS_MOUNT_DIRECTORY
+    exit 1
+fi
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ -d "$TRITON_AWS_MOUNT_DIRECTORY" ] && [ ! -z "$(ls -A $TRITON_AWS_MOUNT_DIRECTORY)" ]; then
+    echo -e "\n***\n*** Test localization to a specified location failed. \n***"
+    echo -e "\n***\n*** Specified mount folder $TRITON_AWS_MOUNT_DIRECTORY was not cleared properly. \n***"
+    ls -A $TRITON_AWS_MOUNT_DIRECTORY
+    exit 1
+fi
+
+rm -rf $TRITON_AWS_MOUNT_DIRECTORY
+unset TRITON_AWS_MOUNT_DIRECTORY
+
+# Save models for AWS_SESSION_TOKEN test
+rm -rf tmp_cred_test_models
+mv models tmp_cred_test_models
+# Clean up bucket contents
+aws s3 rm "${BUCKET_URL_SLASH}" --recursive --include "*"
+
+# Test reload of model with explicit model control
+rm -rf models && mkdir -p models/libtorch_float32_float32_float32 && \
+    cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/libtorch_float32_float32_float32/1 models/libtorch_float32_float32_float32/. && \
+    cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/libtorch_float32_float32_float32/config.pbtxt models/libtorch_float32_float32_float32/.
+    cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/libtorch_float32_float32_float32/output0_labels.txt models/libtorch_float32_float32_float32/.
+
+# Remove version policy from config.pbtxt
+sed -i '/^version_policy/d' models/libtorch_float32_float32_float32/config.pbtxt
+
+# Copy contents of models into S3 bucket
+aws s3 cp models/ "${BUCKET_URL_SLASH}" --recursive --include "*"
+
+SERVER_ARGS="--model-repository=$BUCKET_URL --exit-timeout-secs=120 --model-control-mode=explicit"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+curl -X POST localhost:8000/v2/repository/models/libtorch_float32_float32_float32/load
+
+CURL_LOG=$(curl -X POST localhost:8000/v2/repository/index)
+
+if [ "$CURL_LOG" != "[{\"name\":\"libtorch_float32_float32_float32\",\"version\":\"1\",\"state\":\"READY\"}]" ]; then
+    RET=1
+fi
+
+# Add new model version
+aws s3 cp /data/inferenceserver/${REPO_VERSION}/qa_model_repository/libtorch_float32_float32_float32/3 "${BUCKET_URL_SLASH}libtorch_float32_float32_float32/3" --recursive --include "*"
+
+curl -X POST localhost:8000/v2/repository/models/libtorch_float32_float32_float32/load
+
+CURL_LOG=$(curl -X POST localhost:8000/v2/repository/index)
+if [ "$CURL_LOG" != "[{\"name\":\"libtorch_float32_float32_float32\",\"version\":\"1\",\"state\":\"UNAVAILABLE\",\"reason\":\"unloaded\"},{\"name\":\"libtorch_float32_float32_float32\",\"version\":\"3\",\"state\":\"READY\"}]" ]; then
+    RET=1
+fi
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Clean up bucket contents
+aws s3 rm "${BUCKET_URL_SLASH}" --recursive --include "*"
+
+# Test with temporary credential (AWS_SESSION_TOKEN)
+AWS_GET_SESSION_TOKEN_RES=`aws sts get-session-token --duration-seconds 900` && \
+    export AWS_ACCESS_KEY_ID=`echo $AWS_GET_SESSION_TOKEN_RES | jq -r ".Credentials.AccessKeyId"` && \
+    export AWS_SECRET_ACCESS_KEY=`echo $AWS_GET_SESSION_TOKEN_RES | jq -r ".Credentials.SecretAccessKey"` && \
+    export AWS_SESSION_TOKEN=`echo $AWS_GET_SESSION_TOKEN_RES | jq -r ".Credentials.SessionToken"`
+rm ~/.aws/credentials && rm ~/.aws/config
+aws configure set default.region $AWS_DEFAULT_REGION && \
+    aws configure set aws_access_key_id $AWS_ACCESS_KEY_ID && \
+    aws configure set aws_secret_access_key $AWS_SECRET_ACCESS_KEY && \
+    aws configure set aws_session_token $AWS_SESSION_TOKEN
+
+# Copy models into S3 bucket
+aws s3 cp tmp_cred_test_models/ "${BUCKET_URL_SLASH}" --recursive --include "*"
+
+SERVER_LOG=$SERVER_LOG_BASE.temporary_credentials_test.log
+SERVER_ARGS="--model-repository=$BUCKET_URL --exit-timeout-secs=120"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+python $INFER_TEST >$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Test access decline
+export AWS_SECRET_ACCESS_KEY="[Invalid]" && export AWS_SESSION_TOKEN=""
+SERVER_LOG=$SERVER_LOG_BASE.access_decline_test.log
+SERVER_ARGS="--model-repository=$BUCKET_URL --exit-timeout-secs=120"
+run_server
+if [ "$SERVER_PID" != "0" ]; then
+    echo -e "\n***\n*** Unexpected server start $SERVER\n***"
+    cat $SERVER_LOG
+    kill $SERVER_PID
+    wait $SERVER_PID
+    RET=1
+else
+  # AWS S3 does not appear to reply on access decline, but other implementations
+  # might provide extra messages, so make sure Triton will print the messages.
+  EXPECTED_MSG="Unable to create S3 filesystem client. Check account credentials. Exception: '' Message: 'No response body.'"
+  if ! grep "$EXPECTED_MSG" $SERVER_LOG; then
+    echo -e "\n***\n*** Expected error message not found\n***"
+    cat $SERVER_LOG
+    RET=1
+  fi
+fi
+
+# Restore S3 credentials
+rm ~/.aws/credentials && rm ~/.aws/config
+export AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION_BACKUP
+export AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID_BACKUP
+export AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY_BACKUP
+aws configure set default.region $AWS_DEFAULT_REGION && \
+    aws configure set aws_access_key_id $AWS_ACCESS_KEY_ID && \
+    aws configure set aws_secret_access_key $AWS_SECRET_ACCESS_KEY
+
+# Clean up bucket contents
+aws s3 rm "${BUCKET_URL_SLASH}" --recursive --include "*"
+
+# Test case where S3 folder has >1000 files
+rm -rf models
+
+mkdir -p models/model/1
+# Create Python model that reads the number of files in the
+# model directory when loaded
+echo "import os
+
+class TritonPythonModel:
+
+    def initialize(self, args):
+        count = 0
+        model_dir = args['model_repository']
+        for path in os.listdir(model_dir):
+            if os.path.isfile(os.path.join(model_dir, path)):
+                count += 1
+        print('Found {} files in model directory'.format(count))
+
+    def execute(self):
+        pass" > models/model/1/model.py
+
+for i in {1..1050}; do
+    touch models/model/0${i}.txt
+done
+
+# Provide extended timeout to allow >1000 files to be loaded
+SERVER_ARGS="--model-repository=$BUCKET_URL --exit-timeout-secs=600 --model-control-mode=none"
+SERVER_LOG=$SERVER_LOG_BASE.many_files.log
+
+# copy contents of /models into S3 bucket and wait for them to be loaded.
+aws s3 cp models/ "${BUCKET_URL_SLASH}" --recursive --include "*"
+
+# Test that the server starts up. Files will be loaded in numerically
+# ascending order, so the model file is loaded after the first 1000
+# files. If AWS fails to load >1000 files, the model file will not
+# be loaded and the server will fail to start.
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Confirm the correct number of files loaded
+EXPECTED_MSG="Found 1050 files in model directory"
+if ! grep "$EXPECTED_MSG" $SERVER_LOG; then
+echo -e "\n***\n*** Expected file count message not found\n***"
+cat $SERVER_LOG
+RET=1
+fi
+
+# Clean up bucket contents and delete bucket
+aws s3 rm "${BUCKET_URL_SLASH}" --recursive --include "*"
+aws s3 rb "${BUCKET_URL}"
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+else
+  echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_storage_S3_local/mock_s3_service.py b/qa/L0_storage_S3_local/mock_s3_service.py
new file mode 100755
index 0000000000..956aac0e66
--- /dev/null
+++ b/qa/L0_storage_S3_local/mock_s3_service.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python3
+
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import threading
+import time
+from http.server import BaseHTTPRequestHandler, HTTPServer
+
+
+class MockS3Service:
+    __address = "localhost"
+    __port = 8080
+
+    def __init__(self):
+        # Test passed when:
+        # - at least one HEAD request is received; and
+        # - at least one GET request is received; and
+        # - all received requests do not advertise for HTTP/2.
+        test_results = {"head_count": 0, "get_count": 0, "http2_ads": False}
+
+        class RequestValidator(BaseHTTPRequestHandler):
+            protocol_version = "HTTP/1.1"
+
+            def __CheckHttp2Ads(self):
+                if "connection" in self.headers:
+                    v = self.headers["connection"].lower()
+                    if "upgrade" in v or "http2" in v:
+                        test_results["http2_ads"] = True
+                if (
+                    "upgrade" in self.headers
+                    and "h2c" in self.headers["upgrade"].lower()
+                ):
+                    test_results["http2_ads"] = True
+                if "http2-settings" in self.headers:
+                    test_results["http2_ads"] = True
+
+            def do_HEAD(self):
+                self.__CheckHttp2Ads()
+                test_results["head_count"] += 1
+                self.send_response(200)
+                self.end_headers()
+
+            def do_GET(self):
+                self.__CheckHttp2Ads()
+                test_results["get_count"] += 1
+                self.send_error(
+                    404,
+                    "Thank you for using the mock s3 service!",
+                    "Your bucket is not found here!",
+                )
+
+        self.__test_results = test_results
+        self.__server = HTTPServer((self.__address, self.__port), RequestValidator)
+        self.__service_thread = threading.Thread(target=self.__server.serve_forever)
+
+    def __enter__(self):
+        self.__service_thread.start()
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.__server.shutdown()
+        self.__server.server_close()
+        self.__service_thread.join()
+
+    def TestPassed(self):
+        return (
+            self.__test_results["head_count"] > 0
+            and self.__test_results["get_count"] > 0
+            and not self.__test_results["http2_ads"]
+        )
+
+
+if __name__ == "__main__":
+    # Initialize mock service
+    mock_s3_service = MockS3Service()
+
+    # Start service and poll until test passed or timed-out
+    with mock_s3_service:
+        poll_interval = 1  # seconds
+        timeout = 10  # seconds
+        elapsed_time = 0  # seconds
+        while not mock_s3_service.TestPassed() and elapsed_time < timeout:
+            elapsed_time += poll_interval
+            time.sleep(poll_interval)
+
+    # Print the result
+    if mock_s3_service.TestPassed():
+        print("TEST PASSED")
+    else:
+        print("TEST FAILED")
diff --git a/qa/L0_storage_S3_local/test.sh b/qa/L0_storage_S3_local/test.sh
new file mode 100755
index 0000000000..e60b106b31
--- /dev/null
+++ b/qa/L0_storage_S3_local/test.sh
@@ -0,0 +1,387 @@
+#!/bin/bash
+# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+CLIENT_LOG="./client.log"
+TEST_RESULT_FILE='test_results.txt'
+INFER_TEST="../common/infer_test.py"
+EXPECTED_NUM_TESTS="3"
+
+DATADIR="/data/inferenceserver/${REPO_VERSION}/qa_model_repository"
+# Used to control which backends are run in infer_test.py
+BACKENDS=${BACKENDS:="graphdef savedmodel onnx libtorch plan"}
+
+function run_unit_tests() {
+    echo "Running unit tests: ${INFER_TEST}"
+    python $INFER_TEST >$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    else
+        check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Result Verification Failed\n***"
+            RET=1
+        fi
+    fi
+}
+
+function setup_model_repo() {
+    model_repo=${1:-"models"}
+    backends=${2:-${BACKENDS}}
+    types=${3:-"float32_float32_float32 object_object_object"}
+    echo "[setup_model_repo] model_repo: ${model_repo}, backends: ${backends}"
+    rm -rf ${model_repo} && mkdir ${model_repo}
+    for BACKEND in ${backends}; do
+        for TYPE in ${types}; do
+            model="${BACKEND}_${TYPE}"
+	    echo "Copying ${DATADIR}/${model} to ${model_repo}."
+            cp -r "${DATADIR}/${model}" "${model_repo}/"
+            # Remove version policy from config.pbtxt
+            sed -i '/^version_policy/d' ${model_repo}/${model}/config.pbtxt
+        done
+    done
+}
+
+function load_models() {
+    model_repo=${1:-"models"}
+    for model in `ls ${model_repo}`; do
+	echo "Loading model: ${model}"
+	code=`curl -s -w %{http_code} -X POST localhost:8000/v2/repository/models/${model}/load`
+	if [ "$code" != "200" ]; then
+	    echo -e "\n***\n*** Test Failed. Failed to load model: ${model}\n***"
+	    RET=1
+	fi
+    done
+}
+
+set +e
+setup_model_repo
+set -e
+
+# Create model with name that has all types of allowed characters
+DUMMY_MODEL="Model_repo-1.0"
+cp -r models/libtorch_float32_float32_float32 models/$DUMMY_MODEL
+sed -i 's/libtorch_float32_float32_float32/Model_repo-1.0/g' models/$DUMMY_MODEL/config.pbtxt
+
+SERVER=/opt/tritonserver/bin/tritonserver
+source ../common/util.sh
+
+rm -f *.log*
+
+## Setup local MINIO server
+(wget https://dl.min.io/server/minio/release/linux-amd64/minio && \
+    chmod +x minio && \
+    mv minio /usr/local/bin && \
+    mkdir /usr/local/share/minio && \
+    mkdir /etc/minio)
+
+export MINIO_ACCESS_KEY="minio"
+# Specify MINIO CI env to allow using root disk
+# https://github.com/minio/minio/issues/15030
+export MINIO_CI_CD=true
+MINIO_VOLUMES="/usr/local/share/minio/"
+MINIO_OPTS="-C /etc/minio --address 127.0.0.1:4572"
+export MINIO_SECRET_KEY="miniostorage"
+
+(curl -O https://raw.githubusercontent.com/minio/minio-service/master/linux-systemd/minio.service && \
+    mv minio.service /etc/systemd/system)
+
+# Start minio server
+/usr/local/bin/minio server $MINIO_OPTS $MINIO_VOLUMES &
+MINIO_PID=$!
+
+export AWS_ACCESS_KEY_ID=minio && \
+    export AWS_SECRET_ACCESS_KEY=miniostorage
+
+# Force version to 0.07 to prevent failures due to version changes
+python -m pip install awscli-local==0.07
+
+# Needed to set correct port for awscli-local
+ENDPOINT_FLAG="--endpoint-url=http://localhost:4572"
+
+# Cleanup bucket if exists
+awslocal $ENDPOINT_FLAG s3 rm s3://demo-bucket1.0 --recursive --include "*" && \
+    awslocal $ENDPOINT_FLAG s3 rb s3://demo-bucket1.0 || true
+
+# Create and add data to bucket
+awslocal $ENDPOINT_FLAG s3 mb s3://demo-bucket1.0 && \
+    awslocal $ENDPOINT_FLAG s3 sync models s3://demo-bucket1.0
+
+RET=0
+
+# Test with hostname and IP address
+echo "=== Running hostname/IP tests ==="
+for HOST in "127.0.0.1" "localhost"; do
+    SERVER_ARGS="--model-repository=s3://$HOST:4572/demo-bucket1.0 --model-control-mode=explicit"
+    if [ "$HOST" = "127.0.0.1" ]; then
+        SERVER_LOG="./inference_server_hostname.log"
+    else
+        SERVER_LOG="./inference_server_ip.log"
+    fi
+
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        # Kill minio server
+        kill $MINIO_PID
+        wait $MINIO_PID
+        exit 1
+    fi
+
+    set +e
+    load_models
+    run_unit_tests
+
+    # Try to load model with name that checks for all types of allowed characters
+    code=`curl -s -w %{http_code} -X POST localhost:8000/v2/repository/models/${DUMMY_MODEL}/load`
+    if [ "$code" != "200" ]; then
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+    set -e
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+done
+
+# Test with Polling
+echo "=== Running polling tests ==="
+SERVER_ARGS="--model-repository=s3://localhost:4572/demo-bucket1.0 --model-control-mode=poll"
+SERVER_LOG="./inference_server_poll.log"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    # Kill minio server
+    kill $MINIO_PID
+    wait $MINIO_PID
+    exit 1
+fi
+
+cp -r models/libtorch_float32_float32_float32/1 models/libtorch_float32_float32_float32/4
+awslocal $ENDPOINT_FLAG s3 sync models s3://demo-bucket1.0
+
+sleep 20
+
+set +e
+CURL_LOG=$(curl -X POST localhost:8000/v2/repository/index)
+if [[ "$CURL_LOG" != *"{\"name\":\"libtorch_float32_float32_float32\",\"version\":\"3\",\"state\":\"UNAVAILABLE\",\"reason\":\"unloaded\"}"* ]]; then
+    echo -e "\n***\n*** Failed. Server did not unload libtorch_float32_float32_float32 version 3\n***"
+    RET=1
+fi
+
+if [[ "$CURL_LOG" != *"{\"name\":\"libtorch_float32_float32_float32\",\"version\":\"4\",\"state\":\"READY\"}"* ]]; then
+    echo -e "\n***\n*** Failed. Server did not load libtorch_float32_float32_float32 version 4\n***"
+    RET=1
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Destroy bucket
+awslocal $ENDPOINT_FLAG s3 rm s3://demo-bucket1.0 --recursive --include "*" && \
+    awslocal $ENDPOINT_FLAG s3 rb s3://demo-bucket1.0
+
+# Test with Polling, no model configuration file - with strict model config disabled
+echo "=== Running autocomplete tests ==="
+AUTOCOMPLETE_BACKENDS="savedmodel"
+export BACKENDS=${AUTOCOMPLETE_BACKENDS}
+
+set +e
+setup_model_repo
+
+TYPES="float32_float32_float32 object_object_object"
+for BACKEND in ${AUTOCOMPLETE_BACKENDS}; do
+    for TYPE in ${TYPES}; do
+        model="${BACKEND}_${TYPE}"
+        # Config files specify things expected by unit test like label_filename
+        # and max_batch_size for comparing results, so remove some key fields
+        # for autocomplete to fill that won't break the unit test.
+        sed -i '/platform:/d' models/${model}/config.pbtxt
+        sed -i '/data_type:/d' models/${model}/config.pbtxt
+        sed -i '/dims:/d' models/${model}/config.pbtxt
+    done
+done
+set -e
+
+awslocal $ENDPOINT_FLAG s3 mb s3://demo-bucket1.0 && \
+    awslocal $ENDPOINT_FLAG s3 sync models s3://demo-bucket1.0
+
+SERVER_ARGS="--model-repository=s3://localhost:4572/demo-bucket1.0 --model-control-mode=poll --strict-model-config=false"
+SERVER_LOG="./inference_server_noconfig.log"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    # Kill minio server
+    kill $MINIO_PID
+    wait $MINIO_PID
+    exit 1
+fi
+
+run_unit_tests
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Destroy bucket
+awslocal $ENDPOINT_FLAG s3 rm s3://demo-bucket1.0 --recursive --include "*" && \
+    awslocal $ENDPOINT_FLAG s3 rb s3://demo-bucket1.0
+
+# Test for multiple model repositories using S3 cloud storage
+echo "=== Running multiple-model-repository tests ==="
+BACKENDS1="graphdef libtorch"
+BACKENDS2="onnx plan savedmodel"
+export BACKENDS="$BACKENDS1 $BACKENDS2"
+
+set +e
+setup_model_repo "models1" "${BACKENDS1}"
+setup_model_repo "models2" "${BACKENDS2}"
+set -e
+
+BUCKET_NAME="demo-bucket"
+MODEL_REPO_ARGS=""
+for BUCKET_SUFFIX in 1 2; do
+    # Cleanup bucket if exists
+    awslocal $ENDPOINT_FLAG s3 rm s3://$BUCKET_NAME$BUCKET_SUFFIX --recursive --include "*" && \
+        awslocal $ENDPOINT_FLAG s3 rb s3://$BUCKET_NAME$BUCKET_SUFFIX || true
+
+    # Create and add data to bucket
+    awslocal $ENDPOINT_FLAG s3 mb s3://$BUCKET_NAME$BUCKET_SUFFIX && \
+        awslocal $ENDPOINT_FLAG s3 sync models$BUCKET_SUFFIX s3://$BUCKET_NAME$BUCKET_SUFFIX
+
+    MODEL_REPO_ARGS="$MODEL_REPO_ARGS --model-repository=s3://localhost:4572/$BUCKET_NAME$BUCKET_SUFFIX"
+done
+
+SERVER_ARGS="$MODEL_REPO_ARGS --model-control-mode=explicit"
+SERVER_LOG="./inference_server.multi.log"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    # Kill minio server
+    kill $MINIO_PID
+    wait $MINIO_PID
+    exit 1
+fi
+
+set +e
+load_models "models1"
+load_models "models2"
+run_unit_tests
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Test access decline
+AWS_SECRET_ACCESS_KEY_BACKUP=$AWS_SECRET_ACCESS_KEY
+export AWS_SECRET_ACCESS_KEY="[Invalid]"
+SERVER_ARGS="--model-repository=s3://localhost:4572/${BUCKET_NAME}1 --exit-timeout-secs=120"
+SERVER_LOG="./inference_server.access_decline.log"
+run_server
+if [ "$SERVER_PID" != "0" ]; then
+    echo -e "\n***\n*** Unexpected server start $SERVER\n***"
+    cat $SERVER_LOG
+    kill $SERVER_PID
+    wait $SERVER_PID
+    RET=1
+else
+  # MinIO does not appear to reply on access decline, but other implementations
+  # might provide extra messages, so make sure Triton will print the messages.
+  EXPECTED_MSG="Unable to create S3 filesystem client. Check account credentials. Exception: '' Message: 'No response body.'"
+  if ! grep "$EXPECTED_MSG" $SERVER_LOG; then
+    echo -e "\n***\n*** Expected error message not found\n***"
+    cat $SERVER_LOG
+    RET=1
+  fi
+fi
+# Restore keys for destroying buckets
+export AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY_BACKUP
+
+# Destroy buckets
+for BUCKET_SUFFIX in 1 2; do
+    awslocal $ENDPOINT_FLAG s3 rm s3://$BUCKET_NAME$BUCKET_SUFFIX --recursive --include "*" && \
+        awslocal $ENDPOINT_FLAG s3 rb s3://$BUCKET_NAME$BUCKET_SUFFIX || true
+done
+
+# Kill minio server
+kill $MINIO_PID
+wait $MINIO_PID
+
+# Test the S3 client will not advertise HTTP/2
+TEST_LOG="./http2_advertise_test.log"
+python3 mock_s3_service.py > $TEST_LOG 2>&1 &
+sleep 2  # make sure the mock service has started
+SERVER_LOG="./http2_advertise_test.server.log"
+SERVER_ARGS="--model-repository=s3://localhost:8080/dummy-bucket --exit-timeout-secs=120"
+run_server
+if [ "$SERVER_PID" != "0" ]; then
+    echo -e "\n***\n*** Unexpected server start $SERVER\n***"
+    cat $SERVER_LOG
+    kill $SERVER_PID
+    wait $SERVER_PID
+    RET=1
+else
+    sleep 2  # make sure the mock service has stopped
+    PASSED_MSG="TEST PASSED"
+    if ! grep "$PASSED_MSG" $TEST_LOG; then
+        echo -e "\n***\n*** S3 client HTTP/2 advertise test failed\n***"
+        cat $TEST_LOG
+        RET=1
+    fi
+fi
+
+# Print and return test result
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test Failed\n***"
+fi
+exit $RET
diff --git a/qa/L0_storage_azure/test.sh b/qa/L0_storage_azure/test.sh
new file mode 100755
index 0000000000..15f9c78bcc
--- /dev/null
+++ b/qa/L0_storage_azure/test.sh
@@ -0,0 +1,297 @@
+#!/bin/bash
+# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+TEST_RESULT_FILE='test_results.txt'
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+if [ -z "$AZURE_STORAGE_ACCOUNT" ]; then
+    echo -e "azure storage account must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+
+if [ -z "$AZURE_STORAGE_KEY" ]; then
+    echo -e "azure storage key must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+
+ACCOUNT_NAME=$AZURE_STORAGE_ACCOUNT
+ACCOUNT_KEY=$AZURE_STORAGE_KEY
+export CUDA_VISIBLE_DEVICES=0
+CLIENT_LOG_BASE="./client"
+INFER_TEST="../common/infer_test.py"
+EXPECTED_NUM_TESTS="3"
+timestamp=$(date +%s)
+CONTAINER_NAME="tritonqatest${timestamp}"
+
+# container path (Point to the container when testing cloud storage)
+AS_URL="as://${ACCOUNT_NAME}/${CONTAINER_NAME}"
+
+# Must use setuptools version before 58.0.0 due to https://github.com/Azure/azure-cli/issues/19468
+python -m pip install -U setuptools==57.5.0
+
+# Can now install latest azure-cli (instead of 2.0.73)
+python -m pip install azure-cli
+
+# create test container
+az storage container create --name ${CONTAINER_NAME} --account-name ${ACCOUNT_NAME} --account-key ${ACCOUNT_KEY}
+sleep 10
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_TIMEOUT=420
+SERVER_LOG_BASE="./inference_server"
+source ../common/util.sh
+
+rm -f $SERVER_LOG_BASE* $CLIENT_LOG_BASE*
+RET=0
+
+# Used to control which backends are run in infer_test.py
+BACKENDS=${BACKENDS:="graphdef savedmodel onnx libtorch plan"}
+
+function run_unit_tests() {
+    BACKENDS=$BACKENDS python $INFER_TEST >$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    else
+        check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Result Verification Failed\n***"
+            RET=1
+        fi
+    fi
+}
+
+function setup_model_repo() {
+    # Construct model repository
+    rm -rf models && mkdir -p models
+    for FW in $BACKENDS; do
+        cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/${FW}_float32_float32_float32 models/
+    done
+
+    # Copy models with string inputs and remove nobatch (bs=1) models
+    cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/*_object_object_object models/
+    rm -rf models/*nobatch*
+}
+
+setup_model_repo
+KIND="KIND_GPU"
+for FW in $BACKENDS; do
+    for MC in `ls models/${FW}*/config.pbtxt`; do
+        echo "instance_group [ { kind: ${KIND} }]" >> $MC
+    done
+done
+
+# now traverse the tree and create empty version directories that the CLI skips
+for dir in `ls models/`; do
+    for subdir in `ls models/$dir`; do
+        if [ -d models/$dir/$subdir ] && [ -z "$(ls models/$dir/$subdir)" ]; then
+            touch models/$dir/$subdir/$subdir
+        fi
+    done
+done
+
+# copy contents of models into container.
+for file in `find models -type f` ;do
+    az storage blob upload --container-name ${CONTAINER_NAME} --account-name ${ACCOUNT_NAME} --account-key ${ACCOUNT_KEY} --file $file --name $file
+done
+sleep 10
+
+# Test 1 Scenarios:
+# 1. access blob using shared key in envs
+# 2. adding more scenarios in future
+for ENV_VAR in "shared_key"; do
+    SERVER_LOG=$SERVER_LOG_BASE.$ENV_VAR.log
+    CLIENT_LOG=$CLIENT_LOG_BASE.$ENV_VAR.log
+    MODEL_REPO="${AS_URL}/models"
+    if [ "$ENV_VAR" == "sas" ]; then
+        unset AZURE_STORAGE_KEY
+        sas=`az storage blob generate-sas --container-name ${CONTAINER_NAME} --account-name ${ACCOUNT_NAME} --account-key ${ACCOUNT_KEY} --name models`
+        sas_without_quote=$(eval echo $sas)
+        export AZURE_STORAGE_SAS="?$sas_without_quote"
+    fi
+
+    # Now start model tests
+    # set server arguments
+    SERVER_ARGS="--model-repository=$MODEL_REPO --exit-timeout-secs=120"
+
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        RET=1
+        break
+    fi
+
+    set +e
+    run_unit_tests
+    set -e
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+done
+
+# Test localization to a specified location
+export TRITON_AZURE_MOUNT_DIRECTORY=`pwd`/azure_localization_test
+
+if [ -d "$TRITON_AZURE_MOUNT_DIRECTORY" ]; then
+  rm -rf $TRITON_AZURE_MOUNT_DIRECTORY
+fi
+
+mkdir -p $TRITON_AZURE_MOUNT_DIRECTORY
+
+SERVER_LOG=$SERVER_LOG_BASE.custom_localization.log
+SERVER_ARGS="--model-repository=$MODEL_REPO --exit-timeout-secs=120"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+if [ -z "$(ls -A $TRITON_AZURE_MOUNT_DIRECTORY)" ]; then
+    echo -e "\n***\n*** Test localization to a specified location failed. \n***"
+    echo -e "\n***\n*** Specified mount folder $TRITON_AZURE_MOUNT_DIRECTORY is empty \n***"
+    ls -A $TRITON_AZURE_MOUNT_DIRECTORY
+    exit 1
+fi
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ -d "$TRITON_AZURE_MOUNT_DIRECTORY" ] && [ ! -z "$(ls -A $TRITON_AZURE_MOUNT_DIRECTORY)" ]; then
+    echo -e "\n***\n*** Test localization to a specified location failed. \n***"
+    echo -e "\n***\n*** Specified mount folder $TRITON_AZURE_MOUNT_DIRECTORY was not cleared properly. \n***"
+    ls -A $TRITON_AZURE_MOUNT_DIRECTORY
+    exit 1
+fi
+
+rm -rf $TRITON_AZURE_MOUNT_DIRECTORY
+unset TRITON_AZURE_MOUNT_DIRECTORY
+
+# Add test for explicit model control
+SERVER_LOG=$SERVER_LOG_BASE.explicit.log
+CLIENT_LOG=$CLIENT_LOG_BASE.explicit.log
+SERVER_ARGS="--model-repository=${AS_URL}/models --model-control-mode=explicit"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    RET=1
+    break
+fi
+
+set +e
+for model in `ls models/`; do
+    code=`curl -s -w %{http_code} -X POST localhost:8000/v2/repository/models/${model}/load`
+    if [ "$code" != "200" ]; then
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    fi
+done
+
+# Check that each explicitly loaded model runs correctly
+run_unit_tests
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Clean up container
+az storage container delete --name ${CONTAINER_NAME} --account-name ${ACCOUNT_NAME} --account-key ${ACCOUNT_KEY}
+sleep 60
+
+# Test with Polling, no model configuration file - with strict model config disabled
+SERVER_LOG=$SERVER_LOG_BASE.noconfig.log
+CLIENT_LOG=$CLIENT_LOG_BASE.noconfig.log
+SERVER_ARGS="--model-repository=${AS_URL}/models --model-control-mode=poll --strict-model-config=false"
+
+# create test container
+az storage container create --name ${CONTAINER_NAME} --account-name ${ACCOUNT_NAME} --account-key ${ACCOUNT_KEY}
+sleep 10
+
+# Setup model repository with minimal configs to be autocompleted
+rm -rf models && mkdir -p models
+AUTOCOMPLETE_BACKENDS="savedmodel"
+for FW in ${AUTOCOMPLETE_BACKENDS}; do
+    for model in ${FW}_float32_float32_float32 ${FW}_object_object_object; do
+        cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/${model} models/
+        # Config files specify things expected by unit test like label_filename
+        # and max_batch_size for comparing results, so remove some key fields
+        # for autocomplete to fill that won't break the unit test.
+        sed -i '/platform:/d' models/${model}/config.pbtxt
+        sed -i '/data_type:/d' models/${model}/config.pbtxt
+        sed -i '/dims:/d' models/${model}/config.pbtxt
+    done
+done
+
+# copy contents of models into container.
+for file in `find models -type f` ;do
+    az storage blob upload --container-name ${CONTAINER_NAME} --account-name ${ACCOUNT_NAME} --account-key ${ACCOUNT_KEY} --file $file --name $file
+done
+sleep 10
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+# Check that each polled model runs correctly
+export BACKENDS="${AUTOCOMPLETE_BACKENDS}"
+run_unit_tests
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Clean up container
+az storage container delete --name ${CONTAINER_NAME} --account-name ${ACCOUNT_NAME} --account-key ${ACCOUNT_KEY}
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_storage_swiftstack/infer_test.py b/qa/L0_storage_swiftstack/infer_test.py
new file mode 100755
index 0000000000..f8a65a01a4
--- /dev/null
+++ b/qa/L0_storage_swiftstack/infer_test.py
@@ -0,0 +1,207 @@
+#!/usr/bin/env python3
+
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import unittest
+
+import infer_util as iu
+import numpy as np
+import test_util as tu
+
+
+class InferTest(tu.TestResultCollector):
+    def _full_exact(
+        self, input_dtype, output0_dtype, output1_dtype, output0_raw, output1_raw, swap
+    ):
+        def _infer_exact_helper(
+            tester,
+            pf,
+            tensor_shape,
+            batch_size,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            output0_raw=True,
+            output1_raw=True,
+            model_version=None,
+            swap=False,
+            outputs=("OUTPUT0", "OUTPUT1"),
+            use_http=True,
+            use_grpc=True,
+            skip_request_id_check=False,
+            use_streaming=True,
+            correlation_id=0,
+        ):
+            for bs in (1, batch_size):
+                iu.infer_exact(
+                    tester,
+                    pf,
+                    (bs,) + tensor_shape,
+                    bs,
+                    input_dtype,
+                    output0_dtype,
+                    output1_dtype,
+                    output0_raw=output0_raw,
+                    output1_raw=output1_raw,
+                    model_version=model_version,
+                    swap=swap,
+                    outputs=outputs,
+                    use_http=use_http,
+                    use_grpc=use_grpc,
+                    skip_request_id_check=skip_request_id_check,
+                    use_streaming=use_streaming,
+                    correlation_id=correlation_id,
+                )
+
+        input_size = 16
+
+        if tu.validate_for_tf_model(
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            (input_size,),
+            (input_size,),
+            (input_size,),
+        ):
+            for pf in ["graphdef", "savedmodel"]:
+                _infer_exact_helper(
+                    self,
+                    pf,
+                    (input_size,),
+                    8,
+                    input_dtype,
+                    output0_dtype,
+                    output1_dtype,
+                    output0_raw=output0_raw,
+                    output1_raw=output1_raw,
+                    swap=swap,
+                )
+
+        if tu.validate_for_trt_model(
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            (input_size, 1, 1),
+            (input_size, 1, 1),
+            (input_size, 1, 1),
+        ):
+            if input_dtype == np.int8:
+                _infer_exact_helper(
+                    self,
+                    "plan",
+                    (input_size, 1, 1),
+                    8,
+                    input_dtype,
+                    output0_dtype,
+                    output1_dtype,
+                    output0_raw=output0_raw,
+                    output1_raw=output1_raw,
+                    swap=swap,
+                )
+            else:
+                _infer_exact_helper(
+                    self,
+                    "plan",
+                    (input_size,),
+                    8,
+                    input_dtype,
+                    output0_dtype,
+                    output1_dtype,
+                    output0_raw=output0_raw,
+                    output1_raw=output1_raw,
+                    swap=swap,
+                )
+
+        if tu.validate_for_onnx_model(
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            (input_size,),
+            (input_size,),
+            (input_size,),
+        ):
+            _infer_exact_helper(
+                self,
+                "onnx",
+                (input_size,),
+                8,
+                input_dtype,
+                output0_dtype,
+                output1_dtype,
+                output0_raw=output0_raw,
+                output1_raw=output1_raw,
+                swap=swap,
+            )
+
+        if tu.validate_for_libtorch_model(
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            (input_size,),
+            (input_size,),
+            (input_size,),
+        ):
+            _infer_exact_helper(
+                self,
+                "libtorch",
+                (input_size,),
+                8,
+                input_dtype,
+                output0_dtype,
+                output1_dtype,
+                output0_raw=output0_raw,
+                output1_raw=output1_raw,
+                swap=swap,
+            )
+
+    def test_raw_fff(self):
+        self._full_exact(
+            np.float32,
+            np.float32,
+            np.float32,
+            output0_raw=True,
+            output1_raw=True,
+            swap=True,
+        )
+
+    def test_class_fff(self):
+        self._full_exact(
+            np.float32,
+            np.float32,
+            np.float32,
+            output0_raw=False,
+            output1_raw=False,
+            swap=True,
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_storage_swiftstack/test.sh b/qa/L0_storage_swiftstack/test.sh
new file mode 100755
index 0000000000..99fb5610d6
--- /dev/null
+++ b/qa/L0_storage_swiftstack/test.sh
@@ -0,0 +1,198 @@
+#!/bin/bash
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+TEST_RESULT_FILE='test_results.txt'
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+unset AWS_ACCESS_KEY_ID
+unset AWS_SECRET_ACCESS_KEY
+unset AWS_DEFAULT_REGION
+
+pip3 install --no-deps awscli-plugin-endpoint
+
+# cli_legacy_plugin_path = /usr/local/lib/python3.8/site-packages
+
+mkdir -p ~/.aws
+# Swiftstack S3 credentials are necessary for this test. Passed via ENV variables
+echo "[plugins]
+endpoint = awscli_plugin_endpoint
+
+[default]
+aws_access_key_id = $SWIFTSTACK_ACCESS_KEY_ID
+aws_secret_access_key = $SWIFTSTACK_SECRET_ACCESS_KEY
+region = $SWIFTSTACK_DEFAULT_REGION
+
+s3 =
+    endpoint_url = https://pbss.s8k.io
+    signature_version = s3v4
+    payload_signing_enabled = true
+" > ~/.aws/config
+
+export AWS_ACCESS_KEY_ID=$SWIFTSTACK_ACCESS_KEY_ID &&
+export AWS_SECRET_ACCESS_KEY=$SWIFTSTACK_SECRET_ACCESS_KEY &&
+export AWS_DEFAULT_REGION=$SWIFTSTACK_DEFAULT_REGION
+
+# S3 bucket path (Point to bucket when testing cloud storage)
+BUCKET_URL="s3://triton-bucket-${CI_JOB_ID}"
+
+# S3 repo path to pass to Triton server
+S3_REPO_URL="s3://https://pbss.s8k.io:443/triton-bucket-${CI_JOB_ID}"
+
+# Cleanup S3 test bucket if exists (due to test failure)
+aws s3 rm $BUCKET_URL --recursive --include "*" && \
+    aws s3 rb $BUCKET_URL || true
+
+# Make S3 test bucket
+aws s3 mb $BUCKET_URL
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_TIMEOUT=420
+
+CLIENT_LOG_BASE="./client"
+SERVER_LOG_BASE="./inference_server"
+INFER_TEST=infer_test.py
+EXPECTED_NUM_TESTS="2"
+source ../common/util.sh
+
+rm -f $SERVER_LOG_BASE* $CLIENT_LOG_BASE*
+RET=0
+
+SERVER_LOG=$SERVER_LOG_BASE.log
+CLIENT_LOG=$CLIENT_LOG_BASE.log
+
+# Copy models in model directory
+rm -rf models && mkdir -p models
+
+aws s3 rm $BUCKET_URL/ --recursive --include "*"
+
+# Now start model tests
+
+for FW in graphdef savedmodel onnx libtorch plan; do
+    cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/${FW}_float32_float32_float32/ models/
+done
+
+for FW in graphdef savedmodel onnx libtorch plan; do
+    for MC in `ls models/${FW}*/config.pbtxt`; do
+        echo "instance_group [ { kind: KIND_GPU }]" >> $MC
+    done
+done
+
+# copy contents of /models into S3 bucket.
+aws s3 cp models/ $BUCKET_URL/ --recursive --include "*"
+
+# Test without polling
+SERVER_ARGS="--model-repository=$S3_REPO_URL --exit-timeout-secs=120"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+python $INFER_TEST >$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Clean up bucket contents
+aws s3 rm $BUCKET_URL/ --recursive --include "*"
+
+
+# Test with polling enabled
+SERVER_ARGS="--model-repository=$S3_REPO_URL --exit-timeout-secs=120 --model-control-mode=poll"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+# copy contents of /models into S3 bucket and wait for them to be loaded.
+aws s3 cp models/ $BUCKET_URL/ --recursive --include "*"
+sleep 420
+
+set +e
+
+python $INFER_TEST >$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Clean up bucket contents and delete bucket
+aws s3 rm $BUCKET_URL/ --recursive --include "*"
+aws s3 rb $BUCKET_URL
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_string_io/string_client_test.py b/qa/L0_string_io/string_client_test.py
new file mode 100755
index 0000000000..16112ac70c
--- /dev/null
+++ b/qa/L0_string_io/string_client_test.py
@@ -0,0 +1,208 @@
+#!/usr/bin/env python
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import unittest
+from builtins import range
+
+import numpy as np
+import test_util as tu
+import tritonclient.grpc as tritongrpcclient
+import tritonclient.http as tritonhttpclient
+import tritonclient.utils as tritonutils
+
+
+class ClientStringTest(tu.TestResultCollector):
+    def _test_infer_unicode(self, model_name, client, input_):
+        # Send inference request to the inference server. Get results for
+        # both output tensors.
+        inputs = []
+        outputs = []
+        inputs.append(client[1].InferInput("INPUT0", input_.shape, "BYTES"))
+
+        if client[1] == tritonhttpclient:
+            inputs[0].set_data_from_numpy(input_, client[3])
+        else:
+            inputs[0].set_data_from_numpy(input_)
+
+        if client[1] == tritonhttpclient:
+            outputs.append(
+                client[1].InferRequestedOutput("OUTPUT0", binary_data=client[2])
+            )
+        else:
+            outputs.append(client[1].InferRequestedOutput("OUTPUT0"))
+
+        results = client[0].infer(model_name=model_name, inputs=inputs, outputs=outputs)
+
+        out0 = results.as_numpy("OUTPUT0")
+        # We expect there to be 1 results (with batch-size 1). Verify
+        # that all 8 result elements are the same as the input.
+        self.assertTrue(np.array_equal(input_, out0))
+        return out0
+
+    def _test_infer_non_unicode(self, model_name, client, input_, binary_data=True):
+        # Send inference request to the inference server. Get results for
+        # both output tensors.
+        inputs = []
+        outputs = []
+        inputs.append(client[1].InferInput("INPUT0", input_.shape, "BYTES"))
+
+        if client[1] == tritonhttpclient:
+            inputs[0].set_data_from_numpy(input_, client[3])
+        else:
+            inputs[0].set_data_from_numpy(input_)
+
+        if client[1] == tritonhttpclient:
+            outputs.append(
+                client[1].InferRequestedOutput("OUTPUT0", binary_data=client[2])
+            )
+        else:
+            outputs.append(client[1].InferRequestedOutput("OUTPUT0"))
+
+        results = client[0].infer(model_name=model_name, inputs=inputs, outputs=outputs)
+
+        out0 = results.as_numpy("OUTPUT0")
+        # We expect there to be 1 results (with batch-size 1). Verify
+        # that all 8 result elements are the same as the input.
+        if client[2]:
+            self.assertTrue(np.array_equal(input_.astype(np.bytes_), out0))
+        else:
+            self.assertTrue(
+                np.array_equal(input_.astype(np.bytes_), out0.astype(np.bytes_))
+            )
+        return out0
+
+    def _test_unicode_bytes_dtype(self, client, model_name, dtype="|S78"):
+        # Create the data for the input tensor. Initialize the tensor to 8
+        # byte strings. (dtype of np.bytes_)
+        # Sample string that should no longer cause failure
+        in0 = np.array(
+            [
+                [
+                    b"\nF\n'\n\x01a\x12\"\x1a \n\x1e\xfa\x03\x94\x01\x0f\xd7\x02\xf1\x05\xdf\x01\x82\x03\xb5\x05\xc1\x07\xba\x06\xff\x06\xc7\x07L\xf5\x03\xe2\x07\xa9\x03\n\x0c\n\x01b\x12\x07\x1a\x05\n\x03\x89\xcc=\n\r\n\x01c\x12\x08\x12\x06\n\x04\xdf\\\xcb\xbf"
+                ],
+                [
+                    b"\n:\n\x1a\n\x01a\x12\x15\x1a\x13\n\x11*\xe3\x05\xc5\x06\xda\x07\xcb\x06~\xb1\x05\xb3\x01\xa9\x02\x15\n\r\n\x01b\x12\x08\x1a\x06\n\x04\xf6\xa2\xc5\x01\n\r\n\x01c\x12\x08\x12\x06\n\x04\xbb[\n\xbf"
+                ],
+                [
+                    b"\nL\n-\n\x01a\x12(\x1a&\n$\x87\x07\xce\x01\xe7\x06\xee\x04\xe1\x03\xf1\x03\xd7\x07\xbe\x02\xb8\x05\xe0\x05\xe4\x01\x88\x06\xb6\x03\xb9\x05\x83\x06\xf8\x04\xe2\x04\xf4\x06\n\x0c\n\x01b\x12\x07\x1a\x05\n\x03\x89\xcc=\n\r\n\x01c\x12\x08\x12\x06\n\x04\xbc\x99+@"
+                ],
+                [
+                    b"\n2\n\x12\n\x01a\x12\r\x1a\x0b\n\t\x99\x02\xde\x04\x9f\x04\xc5\x053\n\r\n\x01b\x12\x08\x1a\x06\n\x04\xf6\xa2\xc5\x01\n\r\n\x01c\x12\x08\x12\x06\n\x04\x12\x07\x83\xbe"
+                ],
+                [
+                    b"\nJ\n\r\n\x01b\x12\x08\x1a\x06\n\x04\x9b\x94\xad\x04\n\r\n\x01c\x12\x08\x12\x06\n\x04\xc3\x8a\x08\xbf\n*\n\x01a\x12%\x1a#\n!\x9c\x02\xb2\x02\xcd\x02\x9d\x07\x8d\x01\xb6\x05a\xf1\x01\xf0\x05\xdb\x02\xac\x04\xbd\x05\xe0\x04\xd2\x06\xaf\x02\xa8\x01\x8b\x04"
+                ],
+                [
+                    b"\n3\n\x13\n\x01a\x12\x0e\x1a\x0c\n\n<\xe2\x05\x8a\x01\xb3\x07?\xfd\x01\n\r\n\x01b\x12\x08\x1a\x06\n\x04\xf6\xa2\xc5\x01\n\r\n\x01c\x12\x08\x12\x06\n\x04\x1b\x931\xbf\x00\x00"
+                ],
+                [
+                    b"\n&\n\x07\n\x01a\x12\x02\x1a\x00\n\x0c\n\x01b\x12\x07\x1a\x05\n\x03\x89\xcc=\n\r\n\x01c\x12\x08\x12\x06\n\x04{\xbc\x0e>\x00\x00\x00"
+                ],
+                [
+                    b"\nF\n'\n\x01a\x12\"\x1a \n\x1e\x97\x01\x93\x02\x9e\x01\xac\x06\xff\x01\xd8\x05\xe1\x07\xd8\x04g]\x9a\x05\xff\x06\xde\x07\x8f\x04\x97\x04\xda\x03\n\x0c\n\x01b\x12\x07\x1a\x05\n\x03\x9a\xb7I\n\r\n\x01c\x12\x08\x12\x06\n\x04\xfb\x87\x83\xbf"
+                ],
+            ],
+            dtype=dtype,
+        ).flatten()
+        self._test_infer_unicode(model_name, client, in0)
+
+    def _test_str_dtype(self, client, model_name, dtype=np.object_):
+        in0_bytes = np.array([str(i) for i in range(10000, 10008)], dtype=dtype)
+        self._test_infer_non_unicode(model_name, client, in0_bytes)
+
+        in0_bytes = np.array([i for i in range(10000, 10008)], dtype=dtype)
+        self._test_infer_non_unicode(model_name, client, in0_bytes)
+
+    def _test_bytes(self, model_name):
+        dtypes = [np.object_, np.bytes_]
+
+        # This clients will fail for binary_data=False when the binary input
+        # is not UTF-8 encodable. They should work for other cases however.
+        binary_false_clients = [
+            (
+                tritonhttpclient.InferenceServerClient("localhost:8000", verbose=True),
+                tritonhttpclient,
+                True,
+                False,
+            ),
+            (
+                tritonhttpclient.InferenceServerClient("localhost:8000", verbose=True),
+                tritonhttpclient,
+                False,
+                False,
+            ),
+            (
+                tritonhttpclient.InferenceServerClient("localhost:8000", verbose=True),
+                tritonhttpclient,
+                False,
+                True,
+            ),
+        ]
+
+        # These clients work for every data type
+        other_clients = [
+            (
+                tritongrpcclient.InferenceServerClient("localhost:8001", verbose=True),
+                tritongrpcclient,
+                False,
+            ),
+            (
+                tritonhttpclient.InferenceServerClient("localhost:8000", verbose=True),
+                tritonhttpclient,
+                True,
+                True,
+            ),
+        ]
+
+        for client in other_clients + binary_false_clients:
+            self._test_str_dtype(client, model_name)
+            for dtype in dtypes:
+                self._test_str_dtype(client, model_name, dtype)
+
+        for client in other_clients:
+            self._test_unicode_bytes_dtype(client, model_name)
+            for dtype in dtypes:
+                self._test_unicode_bytes_dtype(client, model_name, dtype)
+
+        for client in binary_false_clients:
+            with self.assertRaises(tritonutils.InferenceServerException):
+                self._test_unicode_bytes_dtype(client, model_name)
+            for dtype in dtypes:
+                with self.assertRaises(tritonutils.InferenceServerException):
+                    self._test_unicode_bytes_dtype(client, model_name, dtype)
+
+    def test_tf_unicode_bytes(self):
+        self._test_bytes("graphdef_nobatch_zero_1_object")
+        self._test_bytes("string_identity")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_string_io/test.sh b/qa/L0_string_io/test.sh
new file mode 100755
index 0000000000..eb45d43ba2
--- /dev/null
+++ b/qa/L0_string_io/test.sh
@@ -0,0 +1,98 @@
+#!/bin/bash
+# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+TEST_RESULT_FILE='test_results.txt'
+export CUDA_VISIBLE_DEVICES=0
+
+CLIENT_LOG="./client.log"
+STRING_CLIENT_TEST_PY=string_client_test.py
+EXPECTED_NUM_TESTS="1"
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=`pwd`/models"
+SERVER_LOG="./inference_server.log"
+source ../common/util.sh
+
+rm -f $CLIENT_LOG $SERVER_LOG
+rm -fr models && mkdir models
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository/graphdef_nobatch_zero_1_object models/.
+cp -r ../python_models/string_identity models/.
+mkdir models/string_identity/1/
+mv models/string_identity/model.py models/string_identity/1/model.py
+
+(cd models/string_identity && \
+          sed -i "s/\[ 1 \]/\[ 8 \]/" config.pbtxt)
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+RET=0
+
+set +e
+
+python $STRING_CLIENT_TEST_PY -v >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_tf_gpu_io/test.sh b/qa/L0_tf_gpu_io/test.sh
new file mode 100755
index 0000000000..98a5dff1ef
--- /dev/null
+++ b/qa/L0_tf_gpu_io/test.sh
@@ -0,0 +1,147 @@
+#!/bin/bash
+# Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+TF_TEST=tf_gpu_io_test.py
+BACKENDS=${BACKENDS:="graphdef savedmodel"}
+
+DATADIR=/data/inferenceserver/${REPO_VERSION}
+
+SERVER=/opt/tritonserver/bin/tritonserver
+source ../common/util.sh
+
+RET=0
+rm -f ./*.log
+
+# Test with qa identity TF models
+for BACKEND in $BACKENDS; do
+    MODEL_NAME=${BACKEND}_zero_1_float32
+    rm -fr models && mkdir -p models
+    cp -r $DATADIR/qa_identity_model_repository/${MODEL_NAME} \
+       models/${MODEL_NAME}_def && \
+    (cd models/${MODEL_NAME}_def && \
+            sed -i 's/_zero_1_float32/&_def/' config.pbtxt) && \
+    # Enable GPU I/O for TensorFlow model
+    cp -r models/${MODEL_NAME}_def models/${MODEL_NAME}_gpu && \
+    (cd models/${MODEL_NAME}_gpu && \
+            sed -i 's/_zero_1_float32_def/_zero_1_float32_gpu/' \
+                config.pbtxt && \
+            echo "optimization { execution_accelerators { gpu_execution_accelerator : [ { name : \"gpu_io\"} ] } }" >> config.pbtxt)
+
+    SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1"
+    SERVER_LOG="${MODEL_NAME}.server.log"
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    set +e
+
+    python $TF_TEST TfGpuIoTest.test_${MODEL_NAME}_def >> ${BACKEND}.sanity.log 2>&1
+    if (( $? != 0 )); then
+        cat ${BACKEND}.sanity.log
+        RET=1
+    fi
+
+    grep "is GPU tensor: true" $SERVER_LOG >> grep.out.log
+    if [ $? -eq 0 ]; then
+        echo -e "\n***\n*** Failed. Expected neither input or output is GPU tensor\n***"
+        RET=1
+    fi
+
+    python $TF_TEST TfGpuIoTest.test_${MODEL_NAME}_gpu >> ${BACKEND}.gpu.sanity.log 2>&1
+    if (( $? != 0 )); then
+        cat ${BACKEND}.gpu.sanity.log
+        RET=1
+    fi
+
+    grep "is GPU tensor: true" $SERVER_LOG >> grep.out.log
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected input and output are GPU tensors\n***"
+        RET=1
+    fi
+
+    set -e
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+done
+
+# Test savedmodel with mismatched key and name
+rm -rf models && mkdir -p models
+cp -r $DATADIR/qa_tf_tag_sigdef_repository/sig_tag0 models
+(cd models/sig_tag0 && \
+    echo "optimization { execution_accelerators { gpu_execution_accelerator : [ { name : \"gpu_io\"} ] } }" >> config.pbtxt)
+
+SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1"
+SERVER_LOG="sig_tag0.server.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+CLIENT_LOG="sig_tag0.gpu.log"
+python $TF_TEST TfGpuIoTest.test_sig_tag0 >> $CLIENT_LOG 2>&1
+if (( $? != 0 )); then
+    cat $CLIENT_LOG
+    RET=1
+fi
+grep "is GPU tensor: true" $SERVER_LOG >> grep.out.log
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed. Expected input and output are GPU tensors\n***"
+    RET=1
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+exit $RET
diff --git a/qa/L0_tf_gpu_io/tf_gpu_io_test.py b/qa/L0_tf_gpu_io/tf_gpu_io_test.py
new file mode 100755
index 0000000000..fd3550e434
--- /dev/null
+++ b/qa/L0_tf_gpu_io/tf_gpu_io_test.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python3
+
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import unittest
+
+import infer_util as iu
+import numpy as np
+import test_util as tu
+
+TENSOR_SIZE = 16384
+
+
+class TfGpuIoTest(tu.TestResultCollector):
+    def _test_helper(
+        self,
+        model_name,
+        shape,
+        override_input_names=[],
+        override_output_names=[],
+        batching_enabled=False,
+    ):
+        try:
+            bs = 1
+            if batching_enabled:
+                shape = [
+                    [
+                        bs,
+                    ]
+                    + shape
+                ]
+            iu.infer_zero(
+                self,
+                "graphdef",
+                bs,
+                np.float32,
+                shape,
+                shape,
+                override_model_name=model_name,
+                override_input_names=override_input_names,
+                override_output_names=override_output_names,
+            )
+
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_sig_tag0(self):
+        self._test_helper(
+            "sig_tag0",
+            [16],
+            override_input_names=["INPUT"],
+            override_output_names=["OUTPUT"],
+        )
+
+    def test_graphdef_zero_1_float32_def(self):
+        self._test_helper(
+            "graphdef_zero_1_float32_def", [TENSOR_SIZE], batching_enabled=True
+        )
+
+    def test_graphdef_zero_1_float32_gpu(self):
+        self._test_helper(
+            "graphdef_zero_1_float32_gpu", [TENSOR_SIZE], batching_enabled=True
+        )
+
+    def test_savedmodel_zero_1_float32_def(self):
+        self._test_helper(
+            "savedmodel_zero_1_float32_def", [TENSOR_SIZE], batching_enabled=True
+        )
+
+    def test_savedmodel_zero_1_float32_gpu(self):
+        self._test_helper(
+            "savedmodel_zero_1_float32_gpu", [TENSOR_SIZE], batching_enabled=True
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_tf_parameters/test.sh b/qa/L0_tf_parameters/test.sh
new file mode 100755
index 0000000000..133b6ef68d
--- /dev/null
+++ b/qa/L0_tf_parameters/test.sh
@@ -0,0 +1,150 @@
+#!/bin/bash
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+source ../common/util.sh
+
+export CUDA_VISIBLE_DEVICES=0
+
+DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_tf_parameters_repository
+TEST_RESULT_FILE='test_results.txt'
+CLIENT_LOG="./client.log"
+TEST=tf_parameter_test.py
+EXPECTED_NUM_TESTS="1"
+MODEL_REPOSITORY=`pwd`/models
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_LOG="./inference_server.log"
+
+RET=0
+
+rm -rf $SERVER_LOG $CLIENT_LOG models/
+cp -r $DATADIR models
+SERVER_ARGS="--model-repository=$MODEL_REPOSITORY"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python $TEST TFParameterTest.test_tf_variable_error>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Add the initialization operation
+echo "{\"init_ops\": [\"init\"]}" > models/graphdef_variable/init_ops.json
+echo "parameters: { key: \"TF_INIT_OPS_FILE\" value: { string_value:\"init_ops.json\" }}" >> models/graphdef_variable/config.pbtxt
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python $TEST TFParameterTest.test_tf_variable>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Move the initialization op to the model version folder.
+mv models/graphdef_variable/init_ops.json models/graphdef_variable/1/
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python $TEST TFParameterTest.test_tf_variable>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_tf_parameters/tf_parameter_test.py b/qa/L0_tf_parameters/tf_parameter_test.py
new file mode 100755
index 0000000000..f1a4621d93
--- /dev/null
+++ b/qa/L0_tf_parameters/tf_parameter_test.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python3
+
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import unittest
+
+import numpy as np
+import test_util as tu
+import tritonclient.http as tritonhttpclient
+import tritonclient.utils
+
+
+class TFParameterTest(tu.TestResultCollector):
+    def setUp(self):
+        self._client = tritonhttpclient.InferenceServerClient(
+            "localhost:8000", verbose=True
+        )
+
+    def _infer_helper(self):
+        # The model has a single variable which is added to the input.  Since the
+        # variable is initialized to zero the input and output must match.
+        model_name = "graphdef_variable"
+        input = np.array([10], dtype=np.int32)
+
+        inputs = []
+        inputs.append(tritonhttpclient.InferInput("INPUT", input.shape, "INT32"))
+        inputs[-1].set_data_from_numpy(input)
+
+        outputs = []
+        outputs.append(tritonhttpclient.InferRequestedOutput("OUTPUT"))
+
+        results = self._client.infer(
+            model_name=model_name, inputs=inputs, outputs=outputs
+        )
+        output = results.as_numpy("OUTPUT")
+        np.testing.assert_array_equal(output, input)
+
+    def test_tf_variable(self):
+        self._infer_helper()
+
+    def test_tf_variable_error(self):
+        with self.assertRaises(tritonclient.utils.InferenceServerException) as e:
+            self._infer_helper()
+        self.assertIn(
+            "FAILED_PRECONDITION: Could not find variable VARIABLE. This "
+            + "could mean that the variable has been deleted. In TF1, it can "
+            + "also mean the variable is uninitialized.",
+            e.exception.message(),
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_tf_tag_sigdef/test.sh b/qa/L0_tf_tag_sigdef/test.sh
new file mode 100755
index 0000000000..32248c74ad
--- /dev/null
+++ b/qa/L0_tf_tag_sigdef/test.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+TEST_RESULT_FILE='test_results.txt'
+CLIENT_LOG="./client.log"
+TEST=tf_tag_sigdef_test.py
+
+DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_tf_tag_sigdef_repository
+MODELDIR=`pwd`/models
+
+rm -rf $SERVER_LOG $CLIENT_LOG $MODELDIR
+mkdir $MODELDIR
+cp -r $DATADIR/* $MODELDIR
+
+EXPECTED_NUM_TESTS="4"
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=$MODELDIR --exit-timeout-secs=120"
+SERVER_LOG="./inference_server.log"
+source ../common/util.sh
+
+RET=0
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    if [ `grep -c "configuration expects 2 inputs, model provides 1" $SERVER_LOG` != "0" ]; then
+        echo -e "*** FAILED: sig_tag_different_io config autocompleted with wrong model tag variant, failed to load.\n"
+        RET=1
+    fi
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python $TEST>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_tf_tag_sigdef/tf_tag_sigdef_test.py b/qa/L0_tf_tag_sigdef/tf_tag_sigdef_test.py
new file mode 100755
index 0000000000..b4a11ac04e
--- /dev/null
+++ b/qa/L0_tf_tag_sigdef/tf_tag_sigdef_test.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python3
+
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import unittest
+
+import numpy as np
+import test_util as tu
+import tritonhttpclient as httpclient
+
+
+class TagSigdefTest(tu.TestResultCollector):
+    base_model_name = "sig_tag"
+    base_tag = "serve"
+    test_tag = "testTag"
+    base_sig_def = "serving_default"
+    test_sig_def = "testSigDef"
+    dims = 16
+
+    def _test_helper(self, modelVersion, tag, sig_def):
+        shape = [self.dims]
+        model_name = self.base_model_name + str(modelVersion)
+        # The multiplier is defined during model creation. See server/qa/common/gen_tag_sigdef.py
+        # for details
+        multiplier = modelVersion + 1
+        output_name = "OUTPUT"
+        triton_client = httpclient.InferenceServerClient("localhost:8000", verbose=True)
+        inputs = []
+        outputs = []
+        inputs.append(httpclient.InferInput("INPUT", shape, "FP32"))
+        input_data = np.ones(shape=shape).astype(np.float32)
+        inputs[0].set_data_from_numpy(input_data, binary_data=True)
+
+        outputs.append(httpclient.InferRequestedOutput(output_name, binary_data=True))
+        results = triton_client.infer(model_name, inputs, outputs=outputs)
+        output_data = results.as_numpy(output_name)
+        test_output = input_data * multiplier
+        self.assertTrue(np.isclose(output_data, test_output).all())
+
+    def test_default(self):
+        self._test_helper(0, self.base_tag, self.base_sig_def)
+
+    def test_sig_def(self):
+        self._test_helper(1, self.base_tag, self.test_sig_def)
+
+    def test_tag(self):
+        self._test_helper(2, self.test_tag, self.base_sig_def)
+
+    def test_tag_sig_def(self):
+        self._test_helper(3, self.test_tag, self.test_sig_def)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_tf_unknown_rank/test.sh b/qa/L0_tf_unknown_rank/test.sh
new file mode 100755
index 0000000000..e279a46267
--- /dev/null
+++ b/qa/L0_tf_unknown_rank/test.sh
@@ -0,0 +1,127 @@
+#!/bin/bash
+# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+TEST_RESULT_FILE='test_results.txt'
+export CUDA_VISIBLE_DEVICES=0
+
+DATADIR=/data/inferenceserver/${REPO_VERSION}
+
+CLIENT_LOG="./client.log"
+UNKNOWN_RANK_TEST=tf_unknown_rank_test.py
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=`pwd`/models"
+SERVER_LOG="./inference_server.log"
+source ../common/util.sh
+
+rm -f ./*.log
+rm -fr models && mkdir -p models
+cp -r $DATADIR/tf_model_store2/unknown_rank_* models/
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+RET=0
+
+set +e
+python $UNKNOWN_RANK_TEST UnknownRankTest.test_success >> $CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    cat $CLIENT_LOG
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+
+python $UNKNOWN_RANK_TEST UnknownRankTest.test_wrong_input >> $CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    cat $CLIENT_LOG
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Try to load model with scalar tensor. The server should fail to load the model.
+rm -rf scalar_repo; mkdir scalar_repo
+cp -r $DATADIR/tf_model_store3/scalar_model scalar_repo/
+SERVER_ARGS="--model-repository=`pwd`/scalar_repo --strict-model-config=false"
+run_server
+if [ "$SERVER_PID" != "0" ]; then
+    echo -e "*** FAILED: unexpected success starting $SERVER" >> $CLIENT_LOG
+    RET=1
+    kill $SERVER_PID
+    wait $SERVER_PID
+else
+    ERROR_MESSAGE="Unable to autofill for 'scalar_model': the rank of model tensor 'x' is 0 and dimensions are not defined"
+    if [[ $(cat $SERVER_LOG | grep "${ERROR_MESSAGE}" | wc -l) -ne 2 ]]; then
+        echo -e "\n***\n*** Test Failed: "${ERROR_MESSAGE}" not found\n***"
+        cat $SERVER_LOG
+        RET=1
+    fi
+fi
+
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_tf_unknown_rank/tf_unknown_rank_test.py b/qa/L0_tf_unknown_rank/tf_unknown_rank_test.py
new file mode 100755
index 0000000000..add6b32c13
--- /dev/null
+++ b/qa/L0_tf_unknown_rank/tf_unknown_rank_test.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python3
+
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import unittest
+
+import numpy as np
+import test_util as tu
+import tritonhttpclient
+from tritonclientutils import *
+
+
+class UnknownRankTest(tu.TestResultCollector):
+    # helper function to generate requests to the server
+    def infer_unknown(self, model_name, tensor_shape):
+        print("About to run the test")
+        input_data = np.random.random_sample(tensor_shape).astype(np.float32)
+        client = tritonhttpclient.InferenceServerClient("localhost:8000")
+        inputs = [
+            tritonhttpclient.InferInput(
+                "INPUT", input_data.shape, np_to_triton_dtype(input_data.dtype)
+            )
+        ]
+        inputs[0].set_data_from_numpy(input_data)
+        results = client.infer(model_name, inputs)
+        self.assertTrue(np.array_equal(results.as_numpy("OUTPUT"), input_data))
+
+    def test_success(self):
+        model_name = "unknown_rank_success"
+        tensor_shape = 1
+        try:
+            self.infer_unknown(model_name, tensor_shape)
+        except InferenceServerException as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_wrong_input(self):
+        model_name = "unknown_rank_wrong_output"
+        tensor_shape = (1, 2)
+        try:
+            self.infer_unknown(model_name, tensor_shape)
+            self.fail(
+                "Found success when expected failure with model given "
+                "wrong input tensor [1,2] for input [-1,1]."
+            )
+        except InferenceServerException as ex:
+            self.assertIn(
+                "unexpected shape for input 'INPUT' for model "
+                "'unknown_rank_wrong_output'. Expected [1], got [1,2]",
+                ex.message(),
+            )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_tftrt_optimization/test.sh b/qa/L0_tftrt_optimization/test.sh
new file mode 100755
index 0000000000..04dcdc2f65
--- /dev/null
+++ b/qa/L0_tftrt_optimization/test.sh
@@ -0,0 +1,212 @@
+#!/bin/bash
+# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+TEST_RESULT_FILE='test_results.txt'
+DATADIR=/data/inferenceserver/${REPO_VERSION}
+
+CLIENT_LOG="./client.log"
+TFTRT_OPTIMIZATION_TEST=tftrt_optimization_test.py
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1 --exit-on-error=false"
+SERVER_LOG="./inference_server.log"
+source ../common/util.sh
+
+RET=0
+
+for MODEL in \
+        graphdef_float32_float32_float32 \
+        savedmodel_float32_float32_float32; do
+    rm -f ./*.log
+    rm -fr models && mkdir -p models
+    cp -r $DATADIR/qa_model_repository/${MODEL} \
+       models/${MODEL}_def && \
+    rm -fr models/${MODEL}_def/2 && \
+    rm -fr models/${MODEL}_def/3 && \
+    (cd models/${MODEL}_def && \
+            sed -i 's/_float32_float32_float32/&_def/' config.pbtxt) && \
+    # GPU execution accelerators with default setting
+    cp -r models/${MODEL}_def models/${MODEL}_trt && \
+    (cd models/${MODEL}_trt && \
+            sed -i 's/_float32_def/_float32_trt/' \
+                config.pbtxt && \
+            echo "optimization { execution_accelerators { gpu_execution_accelerator : [ { name : \"tensorrt\"} ] } }" >> config.pbtxt) && \
+    # GPU execution accelerators with correct parameters
+    cp -r models/${MODEL}_def models/${MODEL}_param && \
+    (cd models/${MODEL}_param && \
+            sed -i 's/_float32_def/_float32_param/' \
+                config.pbtxt && \
+            echo "optimization { execution_accelerators { gpu_execution_accelerator : [ { name : \"tensorrt\" \
+            parameters { key: \"precision_mode\" value: \"FP16\" } \
+            parameters { key: \"minimum_segment_size\" value: \"1\" } }]}}" \
+            >> config.pbtxt) && \
+    # GPU execution accelerators with unknown parameters
+    cp -r models/${MODEL}_def models/${MODEL}_unknown_param && \
+    (cd models/${MODEL}_unknown_param && \
+            sed -i 's/_float32_def/_float32_unknown_param/' \
+                config.pbtxt && \
+            echo "optimization { execution_accelerators { gpu_execution_accelerator : [ { name : \"tensorrt\" \
+            parameters { key: \"precision_mode\" value: \"FP16\" } \
+            parameters { key: \"segment_size\" value: \"1\" } }]}}" \
+            >> config.pbtxt) && \
+    # GPU execution accelerators with invalid parameters
+    cp -r models/${MODEL}_def models/${MODEL}_invalid_param && \
+    (cd models/${MODEL}_invalid_param && \
+            sed -i 's/_float32_def/_float32_invalid_param/' \
+                config.pbtxt && \
+            echo "optimization { execution_accelerators { gpu_execution_accelerator : [ { name : \"tensorrt\" \
+            parameters { key: \"precision_mode\" value: \"FP16\" } \
+            parameters { key: \"max_workspace_size_bytes\" value: \"abc\" } }]}}" \
+            >> config.pbtxt) && \
+    # GPU execution accelerators on CPU context
+    cp -r models/${MODEL}_trt models/${MODEL}_cpu_trt && \
+    (cd models/${MODEL}_cpu_trt && \
+            sed -i 's/_float32_trt/_float32_cpu_trt/' \
+                config.pbtxt && \
+            echo "instance_group [ { kind: KIND_CPU }]" >> config.pbtxt) && \
+    # CPU execution accelerators
+    cp -r models/${MODEL}_def models/${MODEL}_openvino && \
+    (cd models/${MODEL}_openvino && \
+            sed -i 's/_float32_def/_float32_openvino/' \
+                config.pbtxt && \
+            echo "optimization { execution_accelerators { cpu_execution_accelerator : [ { name : \"openvino\" } ] } }" >> config.pbtxt) && \
+    # Unknown GPU execution accelerator
+    cp -r models/${MODEL}_def models/${MODEL}_unknown_gpu && \
+    (cd models/${MODEL}_unknown_gpu && \
+            sed -i 's/_float32_def/_float32_unknown_gpu/' \
+                config.pbtxt && \
+            echo "optimization { execution_accelerators { gpu_execution_accelerator : [ { name : \"unknown_gpu\" } ] } }" >> config.pbtxt) && \
+    # Unknown CPU execution accelerators
+    cp -r models/${MODEL}_def models/${MODEL}_unknown_cpu && \
+    (cd models/${MODEL}_unknown_cpu && \
+            sed -i 's/_float32_def/_float32_unknown_cpu/' \
+                config.pbtxt && \
+            echo "optimization { execution_accelerators { cpu_execution_accelerator : [ { name : \"unknown_cpu\" } ] } }" >> config.pbtxt)
+
+    run_server_tolive
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    set +e
+
+    grep "TensorRT Execution Accelerator is set for ${MODEL}_trt" $SERVER_LOG
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected TensorRT Execution Accelerator is set\n***"
+        RET=1
+    fi
+
+    grep "TensorRT Execution Accelerator is set for ${MODEL}_param" $SERVER_LOG
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected TensorRT Execution Accelerator is set\n***"
+        RET=1
+    fi
+
+    grep "failed to load '${MODEL}_unknown_param' version 1: Invalid argument: unknown parameter 'segment_size' is provided for TensorRT Execution Accelerator" $SERVER_LOG
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected unknown parameter 'segment_size' returns error\n***"
+        RET=1
+    fi
+
+    grep "failed to load '${MODEL}_invalid_param' version 1: Invalid argument: failed to convert 'abc' to long long integral number" $SERVER_LOG
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected invalid parameter 'abc' returns error\n***"
+        RET=1
+    fi
+
+    grep "GPU Execution Accelerator will be ignored for model instance on CPU" $SERVER_LOG
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected logged warning: GPU Execution Accelerator will be ignored for model instance on CPU\n***"
+        RET=1
+    fi
+
+    grep "failed to load '${MODEL}_openvino' version 1: Invalid argument: CPU Execution Accelerator is not supported in TensorFlow backend" $SERVER_LOG
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected CPU Execution Accelerator returns error\n***"
+        RET=1
+    fi
+
+    grep "failed to load '${MODEL}_unknown_gpu' version 1: Invalid argument: unknown Execution Accelerator 'unknown_gpu' is requested" $SERVER_LOG
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected 'unknown_gpu' Execution Accelerator returns error\n***"
+        RET=1
+    fi
+    grep "failed to load '${MODEL}_unknown_cpu' version 1: Invalid argument: CPU Execution Accelerator is not supported in TensorFlow backend" $SERVER_LOG
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected 'unknown_cpu' Execution Accelerator returns error\n***"
+        RET=1
+    fi
+
+    TEST_TYPE=test_graphdef && \
+            [[ "$MODEL" == "savedmodel_float32_float32_float32" ]] && \
+            TEST_TYPE=test_savedmodel
+    echo "Test: $MODEL" >>$CLIENT_LOG
+    python $TFTRT_OPTIMIZATION_TEST TFTRTOptimizationTest.$TEST_TYPE \
+            >>$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Test Failed\n***"
+        cat $CLIENT_LOG
+        RET=1
+    else
+        check_test_results $TEST_RESULT_FILE 1
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Result Verification Failed\n***"
+            RET=1
+        fi
+    fi
+
+    set -e
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+done
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_tftrt_optimization/tftrt_optimization_test.py b/qa/L0_tftrt_optimization/tftrt_optimization_test.py
new file mode 100755
index 0000000000..9e59677317
--- /dev/null
+++ b/qa/L0_tftrt_optimization/tftrt_optimization_test.py
@@ -0,0 +1,84 @@
+#!/usr/bin/env python3
+
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import unittest
+
+import numpy as np
+import test_util as tu
+import tritonhttpclient as httpclient
+
+
+class TFTRTOptimizationTest(tu.TestResultCollector):
+    def setUp(self):
+        self.input0_ = np.arange(start=0, stop=16, dtype=np.float32).reshape(1, 16)
+        self.input1_ = np.ones(shape=16, dtype=np.float32).reshape(1, 16)
+        self.expected_output0_ = self.input0_ + self.input1_
+        self.expected_output1_ = self.input0_ - self.input1_
+
+    def _addsub_infer(self, model_name):
+        triton_client = httpclient.InferenceServerClient("localhost:8000", verbose=True)
+
+        inputs = []
+        outputs = []
+        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "FP32"))
+        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "FP32"))
+
+        # Initialize the data
+        inputs[0].set_data_from_numpy(self.input0_, binary_data=True)
+        inputs[1].set_data_from_numpy(self.input1_, binary_data=False)
+
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=True))
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=True))
+
+        results = triton_client.infer(model_name, inputs, outputs=outputs)
+
+        output0_data = results.as_numpy("OUTPUT0")
+        output1_data = results.as_numpy("OUTPUT1")
+
+        self.assertTrue(
+            np.array_equal(self.expected_output0_, output0_data), "incorrect sum"
+        )
+        self.assertTrue(
+            np.array_equal(self.expected_output1_, output1_data), "incorrect difference"
+        )
+
+    def test_graphdef(self):
+        self._addsub_infer("graphdef_float32_float32_float32_trt")
+        self._addsub_infer("graphdef_float32_float32_float32_param")
+
+    def test_savedmodel(self):
+        self._addsub_infer("savedmodel_float32_float32_float32_trt")
+        self._addsub_infer("savedmodel_float32_float32_float32_param")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_trace/opentelemetry_unittest.py b/qa/L0_trace/opentelemetry_unittest.py
new file mode 100644
index 0000000000..04a82d157c
--- /dev/null
+++ b/qa/L0_trace/opentelemetry_unittest.py
@@ -0,0 +1,784 @@
+# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+import json
+import queue
+import re
+import shutil
+import subprocess
+import time
+import unittest
+from functools import partial
+
+import numpy as np
+import requests
+import test_util as tu
+import tritonclient.grpc as grpcclient
+import tritonclient.http as httpclient
+from tritonclient.utils import InferenceServerException
+
+NO_PARENT_SPAN_ID = ""
+COLLECTOR_TIMEOUT = 10
+
+
+def callback(user_data, result, error):
+    if error:
+        user_data.put(error)
+    else:
+        user_data.put(result)
+
+
+def prepare_data(client, is_binary=True):
+    inputs = []
+    dim = 16
+    input_data = np.arange(dim, dtype=np.int32)
+    inputs.append(client.InferInput("INPUT0", [1, dim], "INT32"))
+    inputs.append(client.InferInput("INPUT1", [1, dim], "INT32"))
+
+    # Initialize the data
+    input_data = np.expand_dims(input_data, axis=0)
+
+    if is_binary:
+        inputs[0].set_data_from_numpy(input_data)
+        inputs[1].set_data_from_numpy(input_data)
+    else:
+        inputs[0].set_data_from_numpy(input_data, binary_data=is_binary)
+        inputs[1].set_data_from_numpy(input_data, binary_data=is_binary)
+
+    return inputs
+
+
+def send_bls_request(model_name="simple", headers=None):
+    with httpclient.InferenceServerClient("localhost:8000") as client:
+        inputs = prepare_data(httpclient)
+        inputs.append(httpclient.InferInput("MODEL_NAME", [1], "BYTES"))
+        inputs[-1].set_data_from_numpy(np.array([model_name], dtype=np.object_))
+        client.infer("bls_simple", inputs, headers=headers)
+
+
+class OpenTelemetryTest(tu.TestResultCollector):
+    def setUp(self):
+        self.collector_subprocess = subprocess.Popen(
+            ["./otelcol", "--config", "./trace-config.yaml"]
+        )
+        time.sleep(5)
+        self.filename = "collected_traces.json"
+        # This simulates OTel context being injected on client side.
+        # Format explained here: https://www.w3.org/TR/trace-context/#design-overview
+        # OTel code reference for extraction:
+        # https://github.com/open-telemetry/opentelemetry-cpp/blob/c4f39f2be8109fd1a3e047677c09cf47954b92db/api/include/opentelemetry/trace/propagation/http_trace_context.h#L165
+        # Essentially, this is what will be injected to headers/metadata
+        # on the client side. Code reference:
+        # https://github.com/open-telemetry/opentelemetry-cpp/blob/c4f39f2be8109fd1a3e047677c09cf47954b92db/api/include/opentelemetry/trace/propagation/http_trace_context.h#L91
+        # Format is: 00-traceId-spanId-traceFlags
+        # By simply adding this header during tests, we imitate
+        # that on client side OTel Propagator injected it to request.
+        self.client_headers = dict(
+            {"traceparent": "00-0af7651916cd43dd8448eb211c12666c-b7ad6b7169242424-01"}
+        )
+        self.simple_model_name = "simple"
+        self.ensemble_model_name = "ensemble_add_sub_int32_int32_int32"
+        self.bls_model_name = "bls_simple"
+        self.trace_context_model = "trace_context"
+        self.test_models = [
+            self.simple_model_name,
+            self.ensemble_model_name,
+            self.bls_model_name,
+        ]
+        self.root_span = "InferRequest"
+
+    def tearDown(self):
+        self.collector_subprocess.kill()
+        self.collector_subprocess.wait()
+        time.sleep(5)
+        test_name = unittest.TestCase.id(self).split(".")[-1]
+        shutil.copyfile(self.filename, self.filename + "_" + test_name + ".log")
+
+    def _parse_trace_log(self, trace_log):
+        """
+        Helper function that parses file, containing collected traces.
+
+        Args:
+            trace_log (str): Name of a file, containing all traces.
+
+        Returns:
+            traces (List[dict]): List of json objects, representing each span.
+        """
+        traces = []
+        with open(trace_log) as f:
+            for json_obj in f:
+                entry = json.loads(json_obj)
+                traces.append(entry)
+
+        return traces
+
+    def _check_events(self, span_name, events):
+        """
+        Helper function that verifies passed events contain expected entries.
+
+        Args:
+            span_name (str): name of a span.
+            events (List[str]): list of event names, collected for the span with the name `span_name`.
+        """
+        root_events_http = [
+            "HTTP_RECV_START",
+            "HTTP_RECV_END",
+            "INFER_RESPONSE_COMPLETE",
+            "HTTP_SEND_START",
+            "HTTP_SEND_END",
+        ]
+        root_events_grpc = [
+            "GRPC_WAITREAD_START",
+            "GRPC_WAITREAD_END",
+            "INFER_RESPONSE_COMPLETE",
+            "GRPC_SEND_START",
+            "GRPC_SEND_END",
+        ]
+        request_events = ["REQUEST_START", "QUEUE_START", "REQUEST_END"]
+        compute_events = [
+            "COMPUTE_START",
+            "COMPUTE_INPUT_END",
+            "COMPUTE_OUTPUT_START",
+            "COMPUTE_END",
+        ]
+
+        if span_name == "compute":
+            # Check that all compute related events (and only them)
+            # are recorded in compute span
+            self.assertTrue(all(entry in events for entry in compute_events))
+            self.assertFalse(all(entry in events for entry in request_events))
+            self.assertFalse(
+                all(entry in events for entry in root_events_http + root_events_grpc)
+            )
+
+        elif span_name == self.root_span:
+            # Check that root span has INFER_RESPONSE_COMPLETE, _RECV/_WAITREAD
+            # and _SEND events (and only them)
+            if "HTTP" in events:
+                self.assertTrue(all(entry in events for entry in root_events_http))
+                self.assertFalse(all(entry in events for entry in root_events_grpc))
+
+            elif "GRPC" in events:
+                self.assertTrue(all(entry in events for entry in root_events_grpc))
+                self.assertFalse(all(entry in events for entry in root_events_http))
+            self.assertFalse(all(entry in events for entry in request_events))
+            self.assertFalse(all(entry in events for entry in compute_events))
+
+        elif span_name in self.test_models:
+            # Check that all request related events (and only them)
+            # are recorded in request span
+            self.assertTrue(all(entry in events for entry in request_events))
+            self.assertFalse(
+                all(entry in events for entry in root_events_http + root_events_grpc)
+            )
+            self.assertFalse(all(entry in events for entry in compute_events))
+
+    def _test_resource_attributes(self, attributes):
+        """
+        Helper function that verifies passed span attributes.
+        Currently only test 2 attributes, specified upon tritonserver start:
+
+        --trace-config=opentelemetry,resource=test.key=test.value
+        and
+        --trace-config=opentelemetry,resource=service.name=test_triton
+
+        Args:
+            attributes (List[dict]): list of attributes, collected for a span.
+        """
+        expected_service_name = dict(
+            {"key": "service.name", "value": {"stringValue": "test_triton"}}
+        )
+        expected_test_key_value = dict(
+            {"key": "test.key", "value": {"stringValue": "test.value"}}
+        )
+        self.assertIn(
+            expected_service_name,
+            attributes,
+            "Expected entry: {}, was not found in the set of collected attributes: {}".format(
+                expected_service_name, attributes
+            ),
+        )
+        self.assertIn(
+            expected_test_key_value,
+            attributes,
+            "Expected entry: {}, was not found in the set of collected attributes: {}".format(
+                expected_test_key_value, attributes
+            ),
+        )
+
+    def _verify_contents(self, spans, expected_counts):
+        """
+        Helper function that:
+         * iterates over `spans` and for every span it verifies that proper events are collected
+         * verifies that `spans` has expected number of total spans collected
+         * verifies that `spans` contains expected number different spans,
+           specified in `expected_counts` in the form:
+                    span_name : #expected_number_of_entries
+
+        Args:
+            spans (List[dict]): list of json objects, extracted from the trace and
+                   containing span info. For this test `name`
+                   and `events` are required.
+            expected_counts (dict): dictionary, containing expected spans in the form:
+                    span_name : #expected_number_of_entries
+        """
+
+        span_names = []
+        for span in spans:
+            # Check that collected spans have proper events recorded
+            span_name = span["name"]
+            span_names.append(span_name)
+            span_events = span["events"]
+            event_names_only = [event["name"] for event in span_events]
+            self._check_events(span_name, event_names_only)
+
+        self.assertEqual(
+            len(span_names),
+            sum(expected_counts.values()),
+            "Unexpeced number of span names collected",
+        )
+        for name, count in expected_counts.items():
+            self.assertEqual(
+                span_names.count(name),
+                count,
+                "Unexpeced number of " + name + " spans collected",
+            )
+
+    def _verify_nesting(self, spans, expected_parent_span_dict):
+        """
+        Helper function that checks parent-child relationships between
+        collected spans are the same as in `expected_parent_span_dict`.
+
+        Args:
+            spans (List[dict]): list of json objects, extracted from the trace and
+                   containing span info. For this test `name`
+                   and `events` are required.
+            expected_parent_span_dict (dict): dictionary, containing expected
+                   parents and children in the dictionary form:
+                        <parent_span_name> (str) : <children_names> (List[str])
+        """
+        seen_spans = {}
+        for span in spans:
+            cur_span = span["spanId"]
+            seen_spans[cur_span] = span["name"]
+
+        parent_child_dict = {}
+        for span in spans:
+            cur_parent = span["parentSpanId"]
+            cur_span = span["name"]
+            if cur_parent in seen_spans.keys():
+                parent_name = seen_spans[cur_parent]
+                if parent_name not in parent_child_dict:
+                    parent_child_dict[parent_name] = []
+                parent_child_dict[parent_name].append(cur_span)
+
+        for key in parent_child_dict.keys():
+            parent_child_dict[key].sort()
+
+        self.assertDictEqual(parent_child_dict, expected_parent_span_dict)
+
+    def _verify_headers_propagated_from_client_if_any(self, root_span, headers):
+        """
+        Helper function that checks traceparent's ids, passed in clients
+        headers/metadata was picked up on the server side.
+        If `headers` are None, checks that `root_span` does not have
+        `parentSpanId` specified.
+
+        Args:
+            root_span (List[dict]): a json objects, extracted from the trace and
+                   containing root span info. For this test `traceID`
+                   and `parentSpanId` are required.
+            expected_parent_span_dict (dict): dictionary, containing expected
+                   parents and children in the dictionary form:
+                        <parent_span_name> (str) : <children_names> (List[str])
+        """
+        parent_span_id = NO_PARENT_SPAN_ID
+
+        if headers != None:
+            parent_span_id = headers["traceparent"].split("-")[2]
+            parent_trace_id = headers["traceparent"].split("-")[1]
+            self.assertEqual(
+                root_span["traceId"],
+                parent_trace_id,
+                "Child and parent trace ids do not match! child's trace id = {} , expected trace id = {}".format(
+                    root_span["traceId"], parent_trace_id
+                ),
+            )
+
+        self.assertEqual(
+            root_span["parentSpanId"],
+            parent_span_id,
+            "Child and parent span ids do not match! child's parentSpanId = {} , expected parentSpanId {}".format(
+                root_span["parentSpanId"], parent_span_id
+            ),
+        )
+
+    def _test_trace(
+        self,
+        headers,
+        expected_number_of_spans,
+        expected_counts,
+        expected_parent_span_dict,
+    ):
+        """
+        Helper method that defines the general test scenario for a trace,
+        described as follows.
+
+        1. Parse trace log, exported by OTel collector in self.filename.
+        2. For each test we re-start OTel collector, so trace log should
+           have only 1 trace.
+        3. Test that reported resource attributes contain manually specified
+           at `tritonserver` start time. Currently only test 2 attributes,
+           specified upon tritonserver start:
+
+            --trace-config=opentelemetry,resource=test.key=test.value
+            and
+            --trace-config=opentelemetry,resource=service.name=test_triton
+        4. Verifies that every collected span, has expected contents
+        5. Verifies parent - child span relationships
+        6. Verifies that OTel context was propagated from client side
+           to server side through headers. For cases, when headers for
+           context propagation were not specified, checks that root_span has
+           no `parentSpanId` specified.
+
+        Args:
+            headers (dict | None): dictionary, containing OTel headers,
+                specifying OTel context.
+            expected_number_of_spans (int): expected number of collected spans.
+            expected_counts(dict): dictionary, containing expected spans in the form:
+                    span_name : #expected_number_of_entries
+            expected_parent_span_dict (dict): dictionary, containing expected
+                   parents and children in the dictionary form:
+                        <parent_span_name> (str) : <children_names> (List[str])
+        """
+        time.sleep(COLLECTOR_TIMEOUT)
+        traces = self._parse_trace_log(self.filename)
+        expected_traces_number = 1
+        self.assertEqual(
+            len(traces),
+            expected_traces_number,
+            "Unexpected number of traces collected. Expected {}, but got {}".format(
+                expected_traces_number, len(traces)
+            ),
+        )
+        self._test_resource_attributes(
+            traces[0]["resourceSpans"][0]["resource"]["attributes"]
+        )
+
+        parsed_spans = traces[0]["resourceSpans"][0]["scopeSpans"][0]["spans"]
+        root_span = [
+            entry for entry in parsed_spans if entry["name"] == "InferRequest"
+        ][0]
+        self.assertEqual(len(parsed_spans), expected_number_of_spans)
+
+        self._verify_contents(parsed_spans, expected_counts)
+        self._verify_nesting(parsed_spans, expected_parent_span_dict)
+        self._verify_headers_propagated_from_client_if_any(root_span, headers)
+
+    def _test_simple_trace(self, headers=None):
+        """
+        Helper function, that specifies expected parameters to evaluate trace,
+        collected from running 1 inference request for `simple` model.
+        """
+        expected_number_of_spans = 3
+        expected_counts = dict(
+            {"compute": 1, self.simple_model_name: 1, self.root_span: 1}
+        )
+        expected_parent_span_dict = dict(
+            {"InferRequest": ["simple"], "simple": ["compute"]}
+        )
+        self._test_trace(
+            headers=headers,
+            expected_number_of_spans=expected_number_of_spans,
+            expected_counts=expected_counts,
+            expected_parent_span_dict=expected_parent_span_dict,
+        )
+
+    def _test_bls_trace(self, headers=None):
+        """
+        Helper function, that specifies expected parameters to evaluate trace,
+        collected from running 1 inference request for `bls_simple` model.
+        """
+        expected_number_of_spans = 6
+        expected_counts = dict(
+            {
+                "compute": 2,
+                self.simple_model_name: 1,
+                self.ensemble_model_name: 1,
+                self.bls_model_name: 1,
+                self.root_span: 1,
+            }
+        )
+        expected_parent_span_dict = dict(
+            {
+                "InferRequest": ["bls_simple"],
+                "bls_simple": ["compute", "ensemble_add_sub_int32_int32_int32"],
+                "ensemble_add_sub_int32_int32_int32": ["simple"],
+                "simple": ["compute"],
+            }
+        )
+        for key in expected_parent_span_dict.keys():
+            expected_parent_span_dict[key].sort()
+
+        self._test_trace(
+            headers=headers,
+            expected_number_of_spans=expected_number_of_spans,
+            expected_counts=expected_counts,
+            expected_parent_span_dict=expected_parent_span_dict,
+        )
+
+    def _test_ensemble_trace(self, headers=None):
+        """
+        Helper function, that specifies expected parameters to evaluate trace,
+        collected from running 1 inference request for an
+        `ensemble_add_sub_int32_int32_int32` model.
+        """
+        expected_number_of_spans = 4
+        expected_counts = dict(
+            {
+                "compute": 1,
+                self.simple_model_name: 1,
+                self.ensemble_model_name: 1,
+                self.root_span: 1,
+            }
+        )
+        expected_parent_span_dict = dict(
+            {
+                "InferRequest": ["ensemble_add_sub_int32_int32_int32"],
+                "ensemble_add_sub_int32_int32_int32": ["simple"],
+                "simple": ["compute"],
+            }
+        )
+        for key in expected_parent_span_dict.keys():
+            expected_parent_span_dict[key].sort()
+
+        self._test_trace(
+            headers=headers,
+            expected_number_of_spans=expected_number_of_spans,
+            expected_counts=expected_counts,
+            expected_parent_span_dict=expected_parent_span_dict,
+        )
+
+    def test_http_trace_simple_model(self):
+        """
+        Tests trace, collected from executing one inference request
+        for a `simple` model and HTTP client.
+        """
+        triton_client_http = httpclient.InferenceServerClient(
+            "localhost:8000", verbose=True
+        )
+        inputs = prepare_data(httpclient)
+        triton_client_http.infer(self.simple_model_name, inputs)
+
+        self._test_simple_trace()
+
+    def test_http_trace_simple_model_context_propagation(self):
+        """
+        Tests trace, collected from executing one inference request
+        for a `simple` model, HTTP client and context propagation,
+        i.e. client specifies OTel headers, defined in `self.client_headers`.
+        """
+        triton_client_http = httpclient.InferenceServerClient(
+            "localhost:8000", verbose=True
+        )
+        inputs = prepare_data(httpclient)
+        triton_client_http.infer(
+            self.simple_model_name, inputs, headers=self.client_headers
+        )
+
+        self._test_simple_trace(headers=self.client_headers)
+
+    def test_grpc_trace_simple_model(self):
+        """
+        Tests trace, collected from executing one inference request
+        for a `simple` model and GRPC client.
+        """
+        triton_client_grpc = grpcclient.InferenceServerClient(
+            "localhost:8001", verbose=True
+        )
+        inputs = prepare_data(grpcclient)
+        triton_client_grpc.infer(self.simple_model_name, inputs)
+
+        self._test_simple_trace()
+
+    def test_grpc_trace_simple_model_context_propagation(self):
+        """
+        Tests trace, collected from executing one inference request
+        for a `simple` model, GRPC client and context propagation,
+        i.e. client specifies OTel headers, defined in `self.client_headers`.
+        """
+        triton_client_grpc = grpcclient.InferenceServerClient(
+            "localhost:8001", verbose=True
+        )
+        inputs = prepare_data(grpcclient)
+        triton_client_grpc.infer(
+            self.simple_model_name, inputs, headers=self.client_headers
+        )
+
+        self._test_simple_trace(headers=self.client_headers)
+
+    def test_streaming_grpc_trace_simple_model(self):
+        """
+        Tests trace, collected from executing one inference request
+        for a `simple` model and GRPC streaming client.
+        """
+        triton_client_grpc = grpcclient.InferenceServerClient(
+            "localhost:8001", verbose=True
+        )
+        user_data = queue.Queue()
+        triton_client_grpc.start_stream(callback=partial(callback, user_data))
+
+        inputs = prepare_data(grpcclient)
+        triton_client_grpc.async_stream_infer(self.simple_model_name, inputs)
+        result = user_data.get()
+        self.assertIsNot(result, InferenceServerException)
+        triton_client_grpc.stop_stream()
+
+        self._test_simple_trace()
+
+    def test_streaming_grpc_trace_simple_model_context_propagation(self):
+        """
+        Tests trace, collected from executing one inference request
+        for a `simple` model, GRPC streaming client and context propagation,
+        i.e. client specifies OTel headers, defined in `self.client_headers`.
+        """
+        triton_client_grpc = grpcclient.InferenceServerClient(
+            "localhost:8001", verbose=True
+        )
+        user_data = queue.Queue()
+        triton_client_grpc.start_stream(
+            callback=partial(callback, user_data),
+            headers=self.client_headers,
+        )
+
+        inputs = prepare_data(grpcclient)
+        triton_client_grpc.async_stream_infer(self.simple_model_name, inputs)
+        result = user_data.get()
+        self.assertIsNot(result, InferenceServerException)
+        triton_client_grpc.stop_stream()
+
+        self._test_simple_trace(headers=self.client_headers)
+
+    def test_http_trace_bls_model(self):
+        """
+        Tests trace, collected from executing one inference request
+        for a `bls_simple` model and HTTP client.
+        """
+        send_bls_request(model_name=self.ensemble_model_name)
+
+        self._test_bls_trace()
+
+    def test_http_trace_bls_model_context_propagation(self):
+        """
+        Tests trace, collected from executing one inference request
+        for a `bls_simple` model, HTTP client and context propagation,
+        i.e. client specifies OTel headers, defined in `self.client_headers`.
+        """
+        send_bls_request(
+            model_name=self.ensemble_model_name, headers=self.client_headers
+        )
+
+        self._test_bls_trace(headers=self.client_headers)
+
+    def test_http_trace_ensemble_model(self):
+        """
+        Tests trace, collected from executing one inference request
+        for a `ensemble_add_sub_int32_int32_int32` model and HTTP client.
+        """
+        triton_client_http = httpclient.InferenceServerClient(
+            "localhost:8000", verbose=True
+        )
+        inputs = prepare_data(httpclient)
+        triton_client_http.infer(self.ensemble_model_name, inputs)
+
+        self._test_ensemble_trace()
+
+    def test_http_trace_ensemble_model_context_propagation(self):
+        """
+        Tests trace, collected from executing one inference request
+        for a `ensemble_add_sub_int32_int32_int32` model, HTTP client
+        and context propagation, i.e. client specifies OTel headers,
+        defined in `self.client_headers`.
+        """
+        triton_client_http = httpclient.InferenceServerClient(
+            "localhost:8000", verbose=True
+        )
+        inputs = prepare_data(httpclient)
+        triton_client_http.infer(
+            self.ensemble_model_name, inputs, headers=self.client_headers
+        )
+
+        self._test_ensemble_trace(headers=self.client_headers)
+
+    def test_http_trace_triggered(self):
+        triton_client_http = httpclient.InferenceServerClient("localhost:8000")
+        triton_client_http.update_trace_settings(settings={"trace_rate": "5"})
+
+        expected_trace_rate = "5"
+        simple_model_trace_settings = triton_client_http.get_trace_settings(
+            model_name=self.simple_model_name
+        )
+
+        self.assertEqual(
+            expected_trace_rate,
+            simple_model_trace_settings["trace_rate"],
+            "Unexpected model trace rate settings after its update. Expected {}, but got {}".format(
+                expected_trace_rate, simple_model_trace_settings["trace_rate"]
+            ),
+        )
+
+        inputs = prepare_data(httpclient)
+        for _ in range(5):
+            triton_client_http.infer(self.ensemble_model_name, inputs)
+            time.sleep(COLLECTOR_TIMEOUT)
+
+        expected_accumulated_traces = 1
+        traces = self._parse_trace_log(self.filename)
+        # Should only be 1 trace collected
+        self.assertEqual(
+            len(traces),
+            expected_accumulated_traces,
+            "Unexpected number of traces collected",
+        )
+
+        for _ in range(5):
+            triton_client_http.infer(
+                self.ensemble_model_name, inputs, headers=self.client_headers
+            )
+            expected_accumulated_traces += 1
+            time.sleep(COLLECTOR_TIMEOUT)
+
+        traces = self._parse_trace_log(self.filename)
+        # Should only be 1 trace collected
+        self.assertEqual(
+            len(traces),
+            expected_accumulated_traces,
+            "Unexpected number of traces collected",
+        )
+
+        # Restore trace rate to 1
+        triton_client_http.update_trace_settings(settings={"trace_rate": "1"})
+        expected_trace_rate = "1"
+        simple_model_trace_settings = triton_client_http.get_trace_settings(
+            model_name=self.simple_model_name
+        )
+
+        self.assertEqual(
+            expected_trace_rate,
+            simple_model_trace_settings["trace_rate"],
+            "Unexpected model trace rate settings after its update. Expected {}, but got {}".format(
+                expected_trace_rate, simple_model_trace_settings["trace_rate"]
+            ),
+        )
+
+    def test_sagemaker_invocation_trace_simple_model_context_propagation(self):
+        """
+        Tests trace, collected from executing one inference request
+        for a `simple` model, SageMaker (invocations) and context propagation,
+        i.e. client specifies OTel headers, defined in `self.client_headers`.
+        """
+        inputs = prepare_data(httpclient, is_binary=False)
+        request_body, _ = httpclient.InferenceServerClient.generate_request_body(inputs)
+        self.client_headers["Content-Type"] = "application/json"
+        r = requests.post(
+            "http://localhost:8080/invocations",
+            data=request_body,
+            headers=self.client_headers,
+        )
+        r.raise_for_status()
+        self.assertEqual(
+            r.status_code,
+            200,
+            "Expected status code 200, received {}".format(r.status_code),
+        )
+        self._test_simple_trace(headers=self.client_headers)
+
+    def test_sagemaker_invoke_trace_simple_model_context_propagation(self):
+        """
+        Tests trace, collected from executing one inference request
+        for a `simple` model, SageMaker (invoke) and context propagation,
+        i.e. client specifies OTel headers, defined in `self.client_headers`.
+        """
+        # Loading model for this test
+        model_url = "/opt/ml/models/123456789abcdefghi/model"
+        request_body = {"model_name": self.simple_model_name, "url": model_url}
+        headers = {"Content-Type": "application/json"}
+        r = requests.post(
+            "http://localhost:8080/models",
+            data=json.dumps(request_body),
+            headers=headers,
+        )
+        time.sleep(5)  # wait for model to load
+        self.assertEqual(
+            r.status_code,
+            200,
+            "Expected status code 200, received {}".format(r.status_code),
+        )
+
+        inputs = prepare_data(httpclient, is_binary=False)
+        request_body, _ = httpclient.InferenceServerClient.generate_request_body(inputs)
+
+        self.client_headers["Content-Type"] = "application/json"
+        invoke_url = "{}/{}/invoke".format(
+            "http://localhost:8080/models", self.simple_model_name
+        )
+        r = requests.post(invoke_url, data=request_body, headers=self.client_headers)
+        r.raise_for_status()
+        self.assertEqual(
+            r.status_code,
+            200,
+            "Expected status code 200, received {}".format(r.status_code),
+        )
+        time.sleep(5)
+        self._test_simple_trace(headers=self.client_headers)
+
+    def test_trace_context_exposed_to_pbe(self):
+        """
+        Tests trace context, propagated to python backend.
+        """
+        triton_client_http = httpclient.InferenceServerClient(
+            "localhost:8000", verbose=True
+        )
+        expect_none = np.array([False], dtype=bool)
+        inputs = httpclient.InferInput("expect_none", [1], "BOOL")
+        inputs.set_data_from_numpy(expect_none)
+        try:
+            result = triton_client_http.infer(self.trace_context_model, inputs=[inputs])
+        except InferenceServerException as e:
+            self.fail(e.message())
+
+        context = result.as_numpy("OUTPUT0")[()].decode("utf-8")
+        context = json.loads(context)
+        self.assertIn("traceparent", context.keys())
+        context_pattern = re.compile(r"\d{2}-[0-9a-f]{32}-[0-9a-f]{16}-\d{2}")
+        self.assertIsNotNone(re.match(context_pattern, context["traceparent"]))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_trace/test.sh b/qa/L0_trace/test.sh
new file mode 100755
index 0000000000..989e2f7c02
--- /dev/null
+++ b/qa/L0_trace/test.sh
@@ -0,0 +1,1155 @@
+#!/bin/bash
+# Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+SIMPLE_HTTP_CLIENT=../clients/simple_http_infer_client
+SIMPLE_GRPC_CLIENT=../clients/simple_grpc_infer_client
+TRACE_SUMMARY=../common/trace_summary.py
+
+CLIENT_TEST=trace_endpoint_test.py
+CLIENT_LOG="client.log"
+TEST_RESULT_FILE="test_results.txt"
+EXPECTED_NUM_TESTS="6"
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_model_repository
+ENSEMBLEDIR=$DATADIR/../qa_ensemble_model_repository/qa_model_repository/
+BLSDIR=../python_models/bls_simple
+MODELBASE=onnx_int32_int32_int32
+
+MODELSDIR=`pwd`/trace_models
+
+SERVER=/opt/tritonserver/bin/tritonserver
+source ../common/util.sh
+
+rm -f *.log
+rm -fr $MODELSDIR && mkdir -p $MODELSDIR
+
+# set up simple and global_simple model using MODELBASE
+cp -r $DATADIR/$MODELBASE $MODELSDIR/simple && \
+    rm -r $MODELSDIR/simple/2 && rm -r $MODELSDIR/simple/3 && \
+    (cd $MODELSDIR/simple && \
+            sed -i "s/^name:.*/name: \"simple\"/" config.pbtxt) && \
+    cp -r $MODELSDIR/simple $MODELSDIR/global_simple && \
+    (cd $MODELSDIR/global_simple && \
+            sed -i "s/^name:.*/name: \"global_simple\"/" config.pbtxt) && \
+    cp -r $ENSEMBLEDIR/simple_onnx_int32_int32_int32 $MODELSDIR/ensemble_add_sub_int32_int32_int32 && \
+    rm -r $MODELSDIR/ensemble_add_sub_int32_int32_int32/2 && \
+    rm -r $MODELSDIR/ensemble_add_sub_int32_int32_int32/3 && \
+    (cd $MODELSDIR/ensemble_add_sub_int32_int32_int32 && \
+            sed -i "s/^name:.*/name: \"ensemble_add_sub_int32_int32_int32\"/" config.pbtxt && \
+            sed -i "s/model_name:.*/model_name: \"simple\"/" config.pbtxt) && \
+    mkdir -p $MODELSDIR/bls_simple/1 && cp $BLSDIR/bls_simple.py $MODELSDIR/bls_simple/1/model.py
+
+RET=0
+
+# Helpers =======================================
+function assert_curl_success {
+  message="${1}"
+  if [ "$code" != "200" ]; then
+    cat ./curl.out
+    echo -e "\n***\n*** ${message} : line ${BASH_LINENO}\n***"
+    RET=1
+  fi
+}
+
+function assert_curl_failure {
+  message="${1}"
+  if [ "$code" == "200" ]; then
+    cat ./curl.out
+    echo -e "\n***\n*** ${message} : line ${BASH_LINENO}\n***"
+    RET=1
+  fi
+}
+
+function get_global_trace_setting {
+  rm -f ./curl.out
+  set +e
+  code=`curl -s -w %{http_code} -o ./curl.out localhost:8000/v2/trace/setting`
+  set -e
+}
+
+function get_trace_setting {
+  model_name="${1}"
+  rm -f ./curl.out
+  set +e
+  code=`curl -s -w %{http_code} -o ./curl.out localhost:8000/v2/models/${model_name}/trace/setting`
+  set -e
+}
+
+function update_global_trace_setting {
+  settings="${1}"
+  rm -f ./curl.out
+  set +e
+  code=`curl -s -w %{http_code} -o ./curl.out -X POST localhost:8000/v2/trace/setting -d ${settings}`
+  set -e
+}
+
+function update_trace_setting {
+  model_name="${1}"
+  settings="${2}"
+  rm -f ./curl.out
+  set +e
+  code=`curl -s -w %{http_code} -o ./curl.out -X POST localhost:8000/v2/models/${model_name}/trace/setting -d ${settings}`
+  set -e
+}
+
+function check_pbe_trace_context {
+  model_name="${1}"
+  expect_none="${2}"
+  data='{"inputs":[{"name":"expect_none","datatype":"BOOL","shape":[1],"data":['${expect_none}']}]}'
+  rm -f ./curl.out
+  set +e
+  code=`curl -s -w %{http_code} -o ./curl.out -X POST localhost:8000/v2/models/${model_name}/infer -d ${data}`
+  set -e
+}
+
+function send_inference_requests {
+    log_file="${1}"
+    upper_bound="${2}"
+    for (( p = 1; p <= $upper_bound; p++ )) do
+        $SIMPLE_HTTP_CLIENT >> ${log_file} 2>&1
+        if [ $? -ne 0 ]; then
+            RET=1
+        fi
+
+        $SIMPLE_GRPC_CLIENT >> ${log_file} 2>&1
+        if [ $? -ne 0 ]; then
+            RET=1
+        fi
+    done
+}
+
+#=======================================
+
+# start with trace-level=OFF
+SERVER_ARGS="--trace-config triton,file=trace_off_to_min.log --trace-config level=OFF --trace-config rate=1 --model-repository=$MODELSDIR"
+SERVER_LOG="./inference_server_off.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+# Enable via trace API and send again
+update_global_trace_setting '{"trace_level":["TIMESTAMPS"]}'
+assert_curl_success "Failed to modify global trace settings"
+
+# Check if the current setting is returned
+if [ `grep -c "\"trace_level\":\[\"TIMESTAMPS\"\]" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_rate\":\"1\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"log_frequency\":\"0\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_file\":\"trace_off_to_min.log\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_mode\":\"triton\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+
+send_inference_requests "client_min.log" 10
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+set +e
+
+# Expect only the requests after calling trace API are traced
+$TRACE_SUMMARY -t trace_off_to_min.log > summary_off_to_min.log
+
+if [ `grep -c "COMPUTE_INPUT_END" summary_off_to_min.log` != "20" ]; then
+    cat summary_off_to_min.log
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+if [ `grep -c ^simple summary_off_to_min.log` != "20" ]; then
+    cat summary_off_to_min.log
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+set -e
+
+# Add model specific setting
+SERVER_ARGS="--trace-config triton,file=global_trace.log --trace-config level=TIMESTAMPS --trace-config rate=6 --model-repository=$MODELSDIR"
+SERVER_LOG="./inference_server_off.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+# Add trace setting for 'simple' via trace API, first use the same trace file
+update_trace_setting "simple" '{"trace_file":"global_trace.log"}'
+assert_curl_success "Failed to modify trace settings for 'simple' model"
+
+# Check if the current setting is returned (not specified setting from global)
+if [ `grep -c "\"trace_level\":\[\"TIMESTAMPS\"\]" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_rate\":\"6\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"log_frequency\":\"0\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_file\":\"global_trace.log\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_mode\":\"triton\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+
+# Use a different name
+update_trace_setting "simple" '{"trace_file":"simple_trace.log","log_frequency":"2"}'
+assert_curl_success "Failed to modify trace settings for 'simple' model"
+
+# Check if the current setting is returned (not specified setting from global)
+if [ `grep -c "\"trace_level\":\[\"TIMESTAMPS\"\]" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_rate\":\"6\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_count\":\"-1\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"log_frequency\":\"2\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_file\":\"simple_trace.log\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_mode\":\"triton\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+
+send_inference_requests "client_simple.log" 10
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+set +e
+
+if [ -f ./global_trace.log ]; then
+    echo -e "\n***\n*** Test Failed, unexpected generation of global_trace.log\n***"
+    RET=1
+fi
+
+$TRACE_SUMMARY -t simple_trace.log.0 > summary_simple_trace.log.0
+
+if [ `grep -c "COMPUTE_INPUT_END" summary_simple_trace.log.0` != "2" ]; then
+    cat summary_simple_trace.log.0
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+if [ `grep -c ^simple summary_simple_trace.log.0` != "2" ]; then
+    cat summary_simple_trace.log.0
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+$TRACE_SUMMARY -t simple_trace.log.1 > summary_simple_trace.log.1
+
+if [ `grep -c "COMPUTE_INPUT_END" summary_simple_trace.log.1` != "1" ]; then
+    cat summary_simple_trace.log.1
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+if [ `grep -c ^simple summary_simple_trace.log.1` != "1" ]; then
+    cat summary_simple_trace.log.1
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+set -e
+
+# Update and clear model specific setting
+SERVER_ARGS="--trace-config triton,file=global_trace.log --trace-config level=TIMESTAMPS --trace-config rate=6 --model-repository=$MODELSDIR"
+SERVER_LOG="./inference_server_off.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+# Add model setting and update it
+update_trace_setting "simple" '{"trace_file":"update_trace.log","trace_rate":"1"}'
+assert_curl_success "Failed to modify trace settings for 'simple' model"
+
+update_trace_setting "simple" '{"trace_file":"update_trace.log","trace_level":["OFF"]}'
+assert_curl_success "Failed to modify trace settings for 'simple' model"
+
+# Check if the current setting is returned
+if [ `grep -c "\"trace_level\":\[\"OFF\"\]" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_rate\":\"1\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_count\":\"-1\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"log_frequency\":\"0\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_file\":\"update_trace.log\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_mode\":\"triton\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+
+# Send requests to simple where trace is explicitly disabled
+send_inference_requests "client_update.log" 10
+
+rm -f ./curl.out
+set +e
+
+# Clear trace setting by explicitly asking removal for every field except 'trace_rate'
+update_trace_setting "simple" '{"trace_file":null,"trace_level":null}'
+assert_curl_success "Failed to modify trace settings for 'simple' model"
+
+# Check if the current setting (global) is returned
+if [ `grep -c "\"trace_level\":\[\"TIMESTAMPS\"\]" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_rate\":\"1\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_count\":\"-1\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"log_frequency\":\"0\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_file\":\"global_trace.log\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_mode\":\"triton\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+
+# Send requests to simple where now uses global setting
+send_inference_requests "client_clear.log" 5
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+set +e
+
+if [ -f ./update_trace.log ]; then
+    echo -e "\n***\n*** Test Failed, unexpected generation of update_trace.log\n***"
+    RET=1
+fi
+
+$TRACE_SUMMARY -t global_trace.log > summary_global_trace.log
+
+if [ `grep -c "COMPUTE_INPUT_END" summary_global_trace.log` != "10" ]; then
+    cat summary_global_trace.log
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+if [ `grep -c ^simple summary_global_trace.log` != "10" ]; then
+    cat summary_global_trace.log
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+set -e
+
+# Update trace count
+SERVER_ARGS="--trace-config triton,file=global_count.log --trace-config level=TIMESTAMPS --trace-config rate=1 --model-repository=$MODELSDIR"
+SERVER_LOG="./inference_server_off.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+# Send requests without trace count
+send_inference_requests "client_update.log" 10
+
+set -e
+
+# Check the current setting
+get_trace_setting "simple"
+assert_curl_success "Failed to obtain trace settings for 'simple' model"
+
+if [ `grep -c "\"trace_level\":\[\"TIMESTAMPS\"\]" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_rate\":\"1\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_count\":\"-1\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"log_frequency\":\"0\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_file\":\"global_count.log\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_mode\":\"triton\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+
+# Set trace count
+update_global_trace_setting '{"trace_count":"5"}'
+assert_curl_success "Failed to modify global trace settings"
+
+# Check if the current setting is returned
+if [ `grep -c "\"trace_level\":\[\"TIMESTAMPS\"\]" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_rate\":\"1\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_count\":\"5\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"log_frequency\":\"0\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_file\":\"global_count.log\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_mode\":\"triton\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+
+# Send requests to simple where trace is explicitly disabled
+send_inference_requests "client_update.log" 10
+
+# Check the current setting again and expect 'trace_count' becomes 0
+get_trace_setting "simple"
+assert_curl_success "Failed to obtain trace settings for 'simple' model"
+
+if [ `grep -c "\"trace_level\":\[\"TIMESTAMPS\"\]" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_rate\":\"1\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_count\":\"0\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"log_frequency\":\"0\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_file\":\"global_count.log\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_mode\":\"triton\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+
+# Check if the indexed file has been generated when trace count reaches 0
+if [ -f ./global_trace.log.0 ]; then
+    echo -e "\n***\n*** Test Failed, expect generation of global_trace.log.0 before stopping server\n***"
+    RET=1
+fi
+
+SETTINGS="trace_count trace_rate log_frequency"
+
+for SETTING in $SETTINGS; do
+    # Check `out of range` errors
+    update_trace_setting "simple" '{"'${SETTING}'":"10000000000"}'
+    assert_curl_failure "Server modified '${SETTING}' with an out of range value."
+done
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+set +e
+
+# There should be two trace files for trace counted requests and before trace
+# counted requests
+$TRACE_SUMMARY -t global_count.log > summary_global_count.log
+
+if [ `grep -c "COMPUTE_INPUT_END" summary_global_count.log` != "20" ]; then
+    cat summary_global_count.log
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+if [ `grep -c ^simple summary_global_count.log` != "20" ]; then
+    cat summary_global_count.log
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+$TRACE_SUMMARY -t global_count.log.0 > summary_global_count.log.0
+
+if [ `grep -c "COMPUTE_INPUT_END" summary_global_count.log.0` != "5" ]; then
+    cat summary_global_count.log.0
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+if [ `grep -c ^simple summary_global_count.log.0` != "5" ]; then
+    cat summary_global_count.log.0
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+set -e
+
+# Test Python client library
+SERVER_ARGS="--trace-config triton,file=global_unittest.log --trace-config level=TIMESTAMPS --trace-config rate=1 --model-repository=$MODELSDIR"
+SERVER_LOG="./inference_server_unittest.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+RET=0
+
+set +e
+
+python $CLIENT_TEST >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+
+# Check `--trace-config` sets arguments properly
+SERVER_ARGS="--trace-config=triton,file=bls_trace.log --trace-config=level=TIMESTAMPS \
+            --trace-config=rate=4 --trace-config=count=6 --trace-config=mode=triton --model-repository=$MODELSDIR"
+SERVER_LOG="./inference_server_trace_config.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+get_trace_setting "simple"
+assert_curl_success "Failed to obtain trace settings for 'simple' model"
+
+if [ `grep -c "\"trace_level\":\[\"TIMESTAMPS\"\]" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_rate\":\"4\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_count\":\"6\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"log_frequency\":\"0\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_file\":\"bls_trace.log\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_mode\":\"triton\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+
+set +e
+# Send bls requests to make sure simple model is traced
+for p in {1..4}; do
+    python -c 'import opentelemetry_unittest; \
+        opentelemetry_unittest.send_bls_request(model_name="ensemble_add_sub_int32_int32_int32")'  >> client_update.log 2>&1
+done
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+set +e
+
+$TRACE_SUMMARY -t bls_trace.log > summary_bls.log
+
+if [ `grep -c "COMPUTE_INPUT_END" summary_bls.log` != "2" ]; then
+    cat summary_bls.log
+    echo -e "\n***\n*** Test Failed: Unexpected number of traced "COMPUTE_INPUT_END" events.\n***"
+    RET=1
+fi
+
+if [ `grep -c ^ensemble_add_sub_int32_int32_int32 summary_bls.log` != "1" ]; then
+    cat summary_bls.log
+    echo -e "\n***\n*** Test Failed: BLS child ensemble model wasn't traced. \n***"
+    RET=1
+fi
+
+if [ `grep -c ^simple summary_bls.log` != "1" ]; then
+    cat summary_bls.log
+    echo -e "\n***\n*** Test Failed: ensemble's model 'simple' wasn't traced. \n***"
+    RET=1
+fi
+
+if [ `grep -o 'parent_id' bls_trace.log | wc -l` != "2" ]; then
+    cat bls_trace.log
+    echo -e "\n***\n*** Test Failed: Unexpected number of 'parent id' fields. \n***"
+    RET=1
+fi
+
+# Attempt to trace non-existent model
+SERVER_ARGS="--model-control-mode=explicit --model-repository=$MODELSDIR"
+SERVER_LOG="./inference_server_nonexistent_model.log"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+# Explicitly load model
+rm -f ./curl.out
+set +e
+code=`curl -s -w %{http_code} -o ./curl.out -X POST localhost:8000/v2/repository/models/simple/load`
+set -e
+assert_curl_success "Failed to load 'simple' model"
+
+# Non-existent model (get)
+get_trace_setting "does-not-exist"
+assert_curl_failure "Server returned trace settings for a non-existent model"
+
+# Non-existent model (post)
+update_trace_setting "does-not-exist" '{"log_frequency":"1"}'
+assert_curl_failure "Server modified trace settings for a non-existent model"
+
+# Local model (get)
+get_trace_setting "simple"
+assert_curl_success "Failed to obtain trace settings for 'simple' model"
+
+# Local model (post)
+update_trace_setting "simple" '{"log_frequency":"1"}'
+assert_curl_success "Failed to modify trace settings for 'simple' model"
+
+# Local model (unload)
+rm -f ./curl.out
+set +e
+code=`curl -s -w %{http_code} -o ./curl.out -X POST localhost:8000/v2/repository/models/simple/unload`
+set -e
+assert_curl_success "Failed to unload 'simple' model"
+
+get_trace_setting "simple"
+assert_curl_failure "Server returned trace settings for an unloaded model"
+
+update_trace_setting "simple" '{"log_frequency":"1"}'
+assert_curl_failure "Server modified trace settings for an unloaded model"
+
+# Local model (reload)
+rm -f ./curl.out
+set +e
+code=`curl -s -w %{http_code} -o ./curl.out -X POST localhost:8000/v2/repository/models/simple/load`
+set -e
+assert_curl_success "Failed to load 'simple' model"
+
+get_trace_setting "simple"
+assert_curl_success "Failed to obtain trace settings for 'simple' model"
+
+update_trace_setting "simple" '{"log_frequency":"1"}'
+assert_curl_success "Failed to modify trace settings for 'simple' model"
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+set +e
+
+# Check opentelemetry trace exporter sends proper info.
+# A helper python script starts listening on $OTLP_PORT, where
+# OTLP exporter sends traces.
+OTLP_PORT=10000
+OTEL_COLLECTOR=./otelcol
+OTEL_COLLECTOR_LOG="./trace_collector_http_exporter.log"
+
+# Installing OpenTelemetry collector (v0.91.0).
+# Ref: https://opentelemetry.io/docs/collector/getting-started/#local
+curl --proto '=https' --tlsv1.2 -fOL https://github.com/open-telemetry/opentelemetry-collector-releases/releases/download/v0.91.0/otelcol_0.91.0_linux_amd64.tar.gz
+tar -xvf otelcol_0.91.0_linux_amd64.tar.gz
+
+rm collected_traces.json*
+# Unittests then check that produced spans have expected format and events
+OPENTELEMETRY_TEST=opentelemetry_unittest.py
+OPENTELEMETRY_LOG="opentelemetry_unittest.log"
+EXPECTED_NUM_TESTS="14"
+
+# Set up repo and args for SageMaker
+export SAGEMAKER_TRITON_DEFAULT_MODEL_NAME="simple"
+MODEL_PATH="/opt/ml/models/123456789abcdefghi/model"
+rm -r ${MODEL_PATH}
+mkdir -p "${MODEL_PATH}"
+cp -r $DATADIR/$MODELBASE/* ${MODEL_PATH} && \
+    rm -r ${MODEL_PATH}/2 && rm -r ${MODEL_PATH}/3 && \
+        sed -i "s/onnx_int32_int32_int32/simple/" ${MODEL_PATH}/config.pbtxt
+
+# Add model to test trace context exposed to python backend
+mkdir -p $MODELSDIR/trace_context/1 && cp ./trace_context.py $MODELSDIR/trace_context/1/model.py
+
+
+SERVER_ARGS="--allow-sagemaker=true --model-control-mode=explicit \
+                --load-model=simple --load-model=ensemble_add_sub_int32_int32_int32 \
+                --load-model=bls_simple --trace-config=level=TIMESTAMPS \
+                --load-model=trace_context --trace-config=rate=1 \
+                --trace-config=count=-1 --trace-config=mode=opentelemetry \
+                --trace-config=opentelemetry,resource=test.key=test.value \
+                --trace-config=opentelemetry,resource=service.name=test_triton \
+                --trace-config=opentelemetry,url=localhost:$OTLP_PORT/v1/traces \
+                --model-repository=$MODELSDIR"
+SERVER_LOG="./inference_server_otel_otelcol_exporter.log"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+python $OPENTELEMETRY_TEST >>$OPENTELEMETRY_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $OPENTELEMETRY_LOG
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
+    if [ $? -ne 0 ]; then
+        cat $OPENTELEMETRY_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+
+set -e
+kill $SERVER_PID
+wait $SERVER_PID
+set +e
+
+# Testing OTel WAR with trace rate = 0
+rm collected_traces.json
+
+OTEL_COLLECTOR=./otelcol
+OTEL_COLLECTOR_LOG="./trace_collector_exporter.log"
+$OTEL_COLLECTOR --config ./trace-config.yaml >> $OTEL_COLLECTOR_LOG 2>&1 & COLLECTOR_PID=$!
+
+SERVER_ARGS="--trace-config=level=TIMESTAMPS --trace-config=rate=0\
+                --trace-config=count=-1 --trace-config=mode=opentelemetry \
+                --trace-config=opentelemetry,url=localhost:$OTLP_PORT/v1/traces \
+                --model-repository=$MODELSDIR"
+SERVER_LOG="./inference_server_otel_WAR.log"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+get_trace_setting "bls_simple"
+assert_curl_success "Failed to obtain trace settings for 'simple' model"
+
+if [ `grep -c "\"trace_level\":\[\"TIMESTAMPS\"\]" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_rate\":\"0\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_count\":\"-1\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_mode\":\"opentelemetry\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"url\":\"localhost:$OTLP_PORT/v1/traces\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"bsp_max_export_batch_size\":\"512\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"bsp_schedule_delay\":\"5000\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"bsp_max_queue_size\":\"2048\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_file\":" ./curl.out` != "0" ]; then
+    RET=1
+fi
+if [ `grep -c "\"log_frequency\":" ./curl.out` != "0" ]; then
+    RET=1
+fi
+
+
+set +e
+# Send bls requests to make sure bls_simple model is NOT traced
+for p in {1..10}; do
+    python -c 'import opentelemetry_unittest; \
+        opentelemetry_unittest.send_bls_request(model_name="ensemble_add_sub_int32_int32_int32")'  >> client_update.log 2>&1
+done
+
+if [ -s collected_traces.json ] ; then
+    echo -e "\n***\n*** collected_traces.json should be empty, but it is not.\n***"
+    exit 1
+fi
+
+# Send 1 bls request with OTel context to make sure it is traced
+python -c 'import opentelemetry_unittest; \
+        opentelemetry_unittest.send_bls_request(model_name="ensemble_add_sub_int32_int32_int32", \
+            headers={"traceparent": "00-0af7651916cd43dd8448eb211c12666c-b7ad6b7169242424-01"} \
+        )'  >> client_update.log 2>&1
+
+sleep 20
+
+if ! [ -s collected_traces.json ] ; then
+    echo -e "\n***\n*** collected_traces.json should contain OTel trace, but it is not. \n***"
+    exit 1
+fi
+
+set -e
+kill $COLLECTOR_PID
+wait $COLLECTOR_PID
+kill $SERVER_PID
+wait $SERVER_PID
+set +e
+
+# Test that only traces with OTel Context are collected after count goes to 0
+SERVER_ARGS="--trace-config=level=TIMESTAMPS --trace-config=rate=5\
+                --trace-config=count=1 --trace-config=mode=opentelemetry \
+                --trace-config=opentelemetry,url=localhost:$OTLP_PORT/v1/traces \
+                --model-repository=$MODELSDIR"
+SERVER_LOG="./inference_server_otel_WAR.log"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+
+rm collected_traces.json
+$OTEL_COLLECTOR --config ./trace-config.yaml >> $OTEL_COLLECTOR_LOG 2>&1 & COLLECTOR_PID=$!
+
+get_trace_setting "bls_simple"
+assert_curl_success "Failed to obtain trace settings for 'simple' model"
+
+if [ `grep -c "\"trace_level\":\[\"TIMESTAMPS\"\]" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_rate\":\"5\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_count\":\"1\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_mode\":\"opentelemetry\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"url\":\"localhost:$OTLP_PORT/v1/traces\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"bsp_max_export_batch_size\":\"512\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"bsp_schedule_delay\":\"5000\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"bsp_max_queue_size\":\"2048\"" ./curl.out` != "1" ]; then
+    RET=1
+fi
+if [ `grep -c "\"trace_file\":" ./curl.out` != "0" ]; then
+    RET=1
+fi
+if [ `grep -c "\"log_frequency\":" ./curl.out` != "0" ]; then
+    RET=1
+fi
+
+set +e
+# Send bls requests to make sure bls_simple model is NOT traced
+for p in {1..20}; do
+    python -c 'import opentelemetry_unittest; \
+        opentelemetry_unittest.send_bls_request(model_name="ensemble_add_sub_int32_int32_int32")'  >> client_update.log 2>&1
+done
+
+sleep 20
+
+if ! [[ -s collected_traces.json && `grep -c "\"name\":\"InferRequest\"" ./collected_traces.json` == 1 && `grep -c "\"parentSpanId\":\"\"" ./collected_traces.json` == 1 ]] ; then
+    echo -e "\n***\n*** collected_traces.json should contain only 1 trace.\n***"
+    cat collected_traces.json
+    exit 1
+fi
+
+# Send 4 bls request with OTel context and 4 without to make sure it is traced
+for p in {1..10}; do
+    python -c 'import opentelemetry_unittest; \
+            opentelemetry_unittest.send_bls_request(model_name="ensemble_add_sub_int32_int32_int32", \
+                headers={"traceparent": "00-0af7651916cd43dd8448eb211c12666c-b7ad6b7169242424-01"} \
+            )'  >> client_update.log 2>&1
+
+    python -c 'import opentelemetry_unittest; \
+            opentelemetry_unittest.send_bls_request(model_name="ensemble_add_sub_int32_int32_int32" \
+            )'  >> client_update.log 2>&1
+
+    sleep 10
+done
+
+if ! [[ -s collected_traces.json && `grep -c "\"parentSpanId\":\"\"" ./collected_traces.json` == 1 && `grep -c "\"parentSpanId\":\"b7ad6b7169242424\"" ./collected_traces.json` == 10 ]] ; then
+    echo -e "\n***\n*** collected_traces.json should contain 11 OTel trace, but it is not. \n***"
+    exit 1
+fi
+
+set -e
+kill $COLLECTOR_PID
+wait $COLLECTOR_PID
+kill $SERVER_PID
+wait $SERVER_PID
+set +e
+
+################################################################################
+# Tests to make sure BatchSpanProcessor's arguments are propagated from cmd    #
+# to trace initialization step.                                                #
+################################################################################
+
+# bsp_max_queue_size = 1
+# We are sending a bls request, that results in a trace with 6 spans,
+# but because `bsp_max_queue_size` is 1, OTel should drop some of them
+# and print a warning in a log.
+EXPECTED_WARNING="BatchSpanProcessor queue is full - dropping span."
+SERVER_ARGS="--trace-config=level=TIMESTAMPS --trace-config=rate=1\
+                --trace-config=count=-1 --trace-config=mode=opentelemetry \
+                --trace-config=opentelemetry,url=localhost:$OTLP_PORT/v1/traces \
+                --trace-config opentelemetry,bsp_max_queue_size=1
+                --model-repository=$MODELSDIR --log-verbose=1"
+SERVER_LOG="./inference_server_otel_BSP_max_queue_size.log"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+rm collected_traces.json
+$OTEL_COLLECTOR --config ./trace-config.yaml >> $OTEL_COLLECTOR_LOG 2>&1 & COLLECTOR_PID=$!
+
+set +e
+python -c 'import opentelemetry_unittest; \
+    opentelemetry_unittest.send_bls_request(model_name="ensemble_add_sub_int32_int32_int32")'  >> client_update.log 2>&1
+
+sleep 20
+
+if ! [[ `grep -c "$EXPECTED_WARNING" $SERVER_LOG` > 0 ]] ; then
+    echo -e "\n***\n*** $SERVER_LOG does not contain expected BSP warning.\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set -e
+kill $COLLECTOR_PID
+wait $COLLECTOR_PID
+kill $SERVER_PID
+wait $SERVER_PID
+set +e
+
+# bsp_schedule_delay = 0
+# We are sending a bls request, that results in a trace with 6 spans.
+# `bsp_schedule_delay` is 0, so OTel should export traces in batches of random
+# size, that translates into random number of 'scopeSpans' field in
+# `collected_traces.json`.
+SERVER_ARGS="--trace-config=level=TIMESTAMPS --trace-config=rate=1\
+                --trace-config=count=-1 --trace-config=mode=opentelemetry \
+                --trace-config=opentelemetry,url=localhost:$OTLP_PORT/v1/traces \
+                --trace-config opentelemetry,bsp_schedule_delay=0
+                --model-repository=$MODELSDIR --log-verbose=1"
+SERVER_LOG="./inference_server_otel_BSP_schedule_delay.log"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+rm collected_traces.json
+$OTEL_COLLECTOR --config ./trace-config.yaml >> $OTEL_COLLECTOR_LOG 2>&1 & COLLECTOR_PID=$!
+
+set +e
+python -c 'import opentelemetry_unittest; \
+    opentelemetry_unittest.send_bls_request(model_name="ensemble_add_sub_int32_int32_int32")'  >> client_update.log 2>&1
+
+sleep 10
+
+if ! [[ -s collected_traces.json && `grep -o "scopeSpans" ./collected_traces.json | wc -l` > 1 ]] ; then
+    echo -e "\n***\n*** collected_traces.json has unexpected number of span batches collected.\n***"
+    cat collected_traces.json
+    exit 1
+fi
+
+set -e
+kill $COLLECTOR_PID
+wait $COLLECTOR_PID
+kill $SERVER_PID
+wait $SERVER_PID
+set +e
+
+# bsp_max_export_batch_size = 1
+# We are sending a bls request, that results in a trace with 6 spans.
+# `bsp_max_export_batch_size` is 1, so OTel should export traces in batches of
+# size 1, that translates into 6 entries of 'scopeSpans' field in
+# `collected_traces.json`.
+SERVER_ARGS="--trace-config=level=TIMESTAMPS --trace-config=rate=1\
+                --trace-config=count=-1 --trace-config=mode=opentelemetry \
+                --trace-config=opentelemetry,url=localhost:$OTLP_PORT/v1/traces \
+                --trace-config opentelemetry,bsp_max_export_batch_size=1
+                --model-repository=$MODELSDIR --log-verbose=1"
+SERVER_LOG="./inference_server_otel_BSP_max_export_batch_size.log"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+rm collected_traces.json
+$OTEL_COLLECTOR --config ./trace-config.yaml >> $OTEL_COLLECTOR_LOG 2>&1 & COLLECTOR_PID=$!
+
+set +e
+python -c 'import opentelemetry_unittest; \
+    opentelemetry_unittest.send_bls_request(model_name="ensemble_add_sub_int32_int32_int32")'  >> client_update.log 2>&1
+
+sleep 10
+
+if ! [[ -s collected_traces.json && `grep -o "scopeSpans" ./collected_traces.json | wc -l` == 6 ]] ; then
+    echo -e "\n***\n*** collected_traces.json has unexpected number of span batches collected.\n***"
+    cat collected_traces.json
+    exit 1
+fi
+
+set -e
+kill $COLLECTOR_PID
+wait $COLLECTOR_PID
+kill $SERVER_PID
+wait $SERVER_PID
+set +e
+
+# Test that PBE returns None as trace context in trace mode Triton
+SERVER_ARGS="--trace-config=level=TIMESTAMPS --trace-config=rate=1\
+                --trace-config=count=-1 --trace-config=mode=triton \
+                --model-repository=$MODELSDIR --log-verbose=1"
+SERVER_LOG="./inference_server_triton_trace_context.log"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+check_pbe_trace_context "trace_context" true
+assert_curl_success "PBE trace context is not None"
+
+set -e
+kill $SERVER_PID
+wait $SERVER_PID
+set +e
+
+# Test that PBE returns None as trace context in trace mode OpenTelemetry,
+# but traceing is OFF.
+SERVER_ARGS="--trace-config=level=OFF --trace-config=rate=1\
+                --trace-config=count=-1 --trace-config=mode=opentelemetry \
+                --model-repository=$MODELSDIR --log-verbose=1"
+SERVER_LOG="./inference_server_triton_trace_context.log"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+check_pbe_trace_context "trace_context" true
+assert_curl_success "PBE trace context is not None"
+
+set -e
+kill $SERVER_PID
+wait $SERVER_PID
+set +e
+
+exit $RET
diff --git a/qa/L0_trace/trace-config.yaml b/qa/L0_trace/trace-config.yaml
new file mode 100644
index 0000000000..2948058adf
--- /dev/null
+++ b/qa/L0_trace/trace-config.yaml
@@ -0,0 +1,51 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# Simple config file for OpenTelemetry collector.
+# It receives all traces, received on localhost:10000 and prints
+# it into the output stream.
+# Ref: https://opentelemetry.io/docs/collector/configuration/
+receivers:
+  otlp:
+    protocols:
+      http:
+        endpoint: 0.0.0.0:10000
+
+processors:
+  batch:
+    send_batch_size: 10
+    timeout: 10s
+
+exporters:
+  file:
+    path: ./collected_traces.json
+
+service:
+  pipelines:
+    traces:
+      receivers: [otlp]
+      processors: [batch]
+      exporters: [file]
diff --git a/qa/L0_trace/trace_context.py b/qa/L0_trace/trace_context.py
new file mode 100644
index 0000000000..db2db29ce8
--- /dev/null
+++ b/qa/L0_trace/trace_context.py
@@ -0,0 +1,72 @@
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        inputs = [{"name": "expect_none", "data_type": "TYPE_BOOL", "dims": [1]}]
+        outputs = [{"name": "OUTPUT0", "data_type": "TYPE_STRING", "dims": [-1]}]
+
+        config = auto_complete_model_config.as_dict()
+        input_names = []
+        output_names = []
+        for input in config["input"]:
+            input_names.append(input["name"])
+        for output in config["output"]:
+            output_names.append(output["name"])
+
+        for input in inputs:
+            if input["name"] not in input_names:
+                auto_complete_model_config.add_input(input)
+        for output in outputs:
+            if output["name"] not in output_names:
+                auto_complete_model_config.add_output(output)
+
+        return auto_complete_model_config
+
+    def execute(self, requests):
+        responses = []
+        for request in requests:
+            expect_none = pb_utils.get_input_tensor_by_name(
+                request, "expect_none"
+            ).as_numpy()[0]
+            context = request.trace().get_context()
+            if expect_none and context is not None:
+                raise pb_utils.TritonModelException("Context should be None")
+            if not expect_none and context is None:
+                raise pb_utils.TritonModelException("Context should NOT be None")
+
+            output_tensor = pb_utils.Tensor(
+                "OUTPUT0", np.array(context).astype(np.bytes_)
+            )
+            inference_response = pb_utils.InferenceResponse([output_tensor])
+            responses.append(inference_response)
+
+        return responses
diff --git a/qa/L0_trace/trace_endpoint_test.py b/qa/L0_trace/trace_endpoint_test.py
new file mode 100755
index 0000000000..d21738196c
--- /dev/null
+++ b/qa/L0_trace/trace_endpoint_test.py
@@ -0,0 +1,480 @@
+#!/usr/bin/python
+
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import json
+import sys
+import unittest
+
+import test_util as tu
+import tritonclient.grpc as grpcclient
+import tritonclient.http as httpclient
+from google.protobuf import json_format
+
+
+# Similar set up as dynamic batcher tests
+class TraceEndpointTest(tu.TestResultCollector):
+    def tearDown(self):
+        # Clear all trace settings to initial state.
+        # Note that the tearDown function uses HTTP client so the pass/fail
+        # of the HTTP trace setting test cases should be checked to make sure
+        # tearDown() is properly executed and not affecting start state of
+        # other test cases
+        clear_settings = {
+            "trace_file": None,
+            "trace_level": None,
+            "trace_rate": None,
+            "trace_count": None,
+            "log_frequency": None,
+        }
+        triton_client = httpclient.InferenceServerClient("localhost:8000")
+        triton_client.update_trace_settings(
+            model_name="simple", settings=clear_settings
+        )
+        triton_client.update_trace_settings(model_name=None, settings=clear_settings)
+
+    def check_server_initial_state(self):
+        # Helper function to make sure the trace setting is properly
+        # initialized / reset before actually running the test case.
+        # Note that this function uses HTTP client so the pass/fail of
+        # the HTTP trace setting test cases should be checked to make sure
+        # the initial state is checked properly before running other test cases.
+        initial_settings = {
+            "trace_file": "global_unittest.log",
+            "trace_level": ["TIMESTAMPS"],
+            "trace_rate": "1",
+            "trace_count": "-1",
+            "log_frequency": "0",
+            "trace_mode": "triton",
+        }
+        triton_client = httpclient.InferenceServerClient("localhost:8000")
+        self.assertEqual(
+            initial_settings, triton_client.get_trace_settings(model_name="simple")
+        )
+        self.assertEqual(initial_settings, triton_client.get_trace_settings())
+
+    def test_http_get_settings(self):
+        # Model trace settings will be the same as global trace settings since
+        # no update has been made.
+        initial_settings = {
+            "trace_file": "global_unittest.log",
+            "trace_level": ["TIMESTAMPS"],
+            "trace_rate": "1",
+            "trace_count": "-1",
+            "log_frequency": "0",
+            "trace_mode": "triton",
+        }
+        triton_client = httpclient.InferenceServerClient("localhost:8000")
+        self.assertEqual(
+            initial_settings,
+            triton_client.get_trace_settings(model_name="simple"),
+            "Unexpected initial model trace settings",
+        )
+        self.assertEqual(
+            initial_settings,
+            triton_client.get_trace_settings(),
+            "Unexpected initial global settings",
+        )
+        try:
+            triton_client.get_trace_settings(model_name="does-not-exist")
+        except Exception as ex:
+            self.assertIn(
+                "Request for unknown model : does-not-exist",
+                ex.message(),
+            )
+
+    def test_grpc_get_settings(self):
+        # Model trace settings will be the same as global trace settings since
+        # no update has been made.
+        initial_settings = grpcclient.service_pb2.TraceSettingResponse()
+        json_format.Parse(
+            json.dumps(
+                {
+                    "settings": {
+                        "trace_file": {"value": ["global_unittest.log"]},
+                        "trace_level": {"value": ["TIMESTAMPS"]},
+                        "trace_rate": {"value": ["1"]},
+                        "trace_count": {"value": ["-1"]},
+                        "trace_mode": {"value": ["triton"]},
+                        "log_frequency": {"value": ["0"]},
+                    }
+                }
+            ),
+            initial_settings,
+        )
+
+        triton_client = grpcclient.InferenceServerClient("localhost:8001")
+        self.assertEqual(
+            initial_settings,
+            triton_client.get_trace_settings(model_name="simple"),
+            "Unexpected initial model trace settings",
+        )
+        self.assertEqual(
+            initial_settings,
+            triton_client.get_trace_settings(),
+            "Unexpected initial global settings",
+        )
+        try:
+            triton_client.get_trace_settings(model_name="does-not-exist")
+        except Exception as ex:
+            self.assertIn(
+                "Request for unknown model : does-not-exist",
+                ex.message(),
+            )
+
+    def test_http_update_settings(self):
+        # Update model and global trace settings in order,
+        # and expect the global trace settings will only reflect to
+        # the model setting fields that haven't been specified.
+        self.check_server_initial_state()
+
+        expected_first_model_settings = {
+            "trace_file": "model.log",
+            "trace_level": ["TIMESTAMPS"],
+            "trace_rate": "1",
+            "trace_count": "-1",
+            "log_frequency": "0",
+            "trace_mode": "triton",
+        }
+        expected_second_model_settings = {
+            "trace_file": "model.log",
+            "trace_level": ["TIMESTAMPS", "TENSORS"],
+            "trace_rate": "1",
+            "trace_count": "-1",
+            "log_frequency": "0",
+            "trace_mode": "triton",
+        }
+        expected_global_settings = {
+            "trace_file": "another.log",
+            "trace_level": ["TIMESTAMPS", "TENSORS"],
+            "trace_rate": "1",
+            "trace_count": "-1",
+            "log_frequency": "0",
+            "trace_mode": "triton",
+        }
+
+        model_update_settings = {"trace_file": "model.log"}
+        global_update_settings = {
+            "trace_file": "another.log",
+            "trace_level": ["TIMESTAMPS", "TENSORS"],
+        }
+
+        triton_client = httpclient.InferenceServerClient("localhost:8000")
+        self.assertEqual(
+            expected_first_model_settings,
+            triton_client.update_trace_settings(
+                model_name="simple", settings=model_update_settings
+            ),
+            "Unexpected updated model trace settings",
+        )
+        # Note that 'trace_level' may be mismatch due to the order of
+        # the levels listed, currently we assume the order is the same
+        # for simplicity. But the order shouldn't be enforced and this checking
+        # needs to be improved when this kind of failure is reported
+        self.assertEqual(
+            expected_global_settings,
+            triton_client.update_trace_settings(settings=global_update_settings),
+            "Unexpected updated global settings",
+        )
+        self.assertEqual(
+            expected_second_model_settings,
+            triton_client.get_trace_settings(model_name="simple"),
+            "Unexpected model trace settings after global update",
+        )
+        try:
+            triton_client.update_trace_settings(
+                model_name="does-not-exist", settings=model_update_settings
+            )
+        except Exception as ex:
+            self.assertIn(
+                "Request for unknown model : does-not-exist",
+                ex.message(),
+            )
+
+    def test_grpc_update_settings(self):
+        # Update model and global trace settings in order,
+        # and expect the global trace settings will only reflect to
+        # the model setting fields that haven't been specified.
+        self.check_server_initial_state()
+
+        expected_first_model_settings = grpcclient.service_pb2.TraceSettingResponse()
+        json_format.Parse(
+            json.dumps(
+                {
+                    "settings": {
+                        "trace_file": {"value": ["model.log"]},
+                        "trace_level": {"value": ["TIMESTAMPS"]},
+                        "trace_rate": {"value": ["1"]},
+                        "trace_count": {"value": ["-1"]},
+                        "log_frequency": {"value": ["0"]},
+                        "trace_mode": {"value": ["triton"]},
+                    }
+                }
+            ),
+            expected_first_model_settings,
+        )
+
+        expected_second_model_settings = grpcclient.service_pb2.TraceSettingResponse()
+        json_format.Parse(
+            json.dumps(
+                {
+                    "settings": {
+                        "trace_file": {"value": ["model.log"]},
+                        "trace_level": {"value": ["TIMESTAMPS", "TENSORS"]},
+                        "trace_rate": {"value": ["1"]},
+                        "trace_count": {"value": ["-1"]},
+                        "log_frequency": {"value": ["0"]},
+                        "trace_mode": {"value": ["triton"]},
+                    }
+                }
+            ),
+            expected_second_model_settings,
+        )
+
+        expected_global_settings = grpcclient.service_pb2.TraceSettingResponse()
+        json_format.Parse(
+            json.dumps(
+                {
+                    "settings": {
+                        "trace_file": {"value": ["another.log"]},
+                        "trace_level": {"value": ["TIMESTAMPS", "TENSORS"]},
+                        "trace_rate": {"value": ["1"]},
+                        "trace_count": {"value": ["-1"]},
+                        "log_frequency": {"value": ["0"]},
+                        "trace_mode": {"value": ["triton"]},
+                    }
+                }
+            ),
+            expected_global_settings,
+        )
+
+        model_update_settings = {"trace_file": "model.log"}
+        global_update_settings = {
+            "trace_file": "another.log",
+            "trace_level": ["TIMESTAMPS", "TENSORS"],
+        }
+
+        triton_client = grpcclient.InferenceServerClient("localhost:8001")
+        self.assertEqual(
+            expected_first_model_settings,
+            triton_client.update_trace_settings(
+                model_name="simple", settings=model_update_settings
+            ),
+            "Unexpected updated model trace settings",
+        )
+        # Note that 'trace_level' may be mismatch due to the order of
+        # the levels listed, currently we assume the order is the same
+        # for simplicity. But the order shouldn't be enforced and this checking
+        # needs to be improved when this kind of failure is reported
+        self.assertEqual(
+            expected_global_settings,
+            triton_client.update_trace_settings(settings=global_update_settings),
+            "Unexpected updated global settings",
+        )
+        self.assertEqual(
+            expected_second_model_settings,
+            triton_client.get_trace_settings(model_name="simple"),
+            "Unexpected model trace settings after global update",
+        )
+        try:
+            triton_client.update_trace_settings(
+                model_name="does-not-exist", settings=model_update_settings
+            )
+        except Exception as ex:
+            self.assertIn(
+                "Request for unknown model : does-not-exist",
+                ex.message(),
+            )
+
+    def test_http_clear_settings(self):
+        # Clear global and model trace settings in order,
+        # and expect the default / global trace settings are
+        # propagated properly.
+        self.check_server_initial_state()
+
+        # First set up the model / global trace setting that:
+        # model 'simple' has 'trace_rate' and 'log_frequency' specified
+        # global has 'trace_level', 'trace_count' and 'trace_rate' specified
+        triton_client = httpclient.InferenceServerClient("localhost:8000")
+        triton_client.update_trace_settings(
+            model_name="simple", settings={"trace_rate": "12", "log_frequency": "34"}
+        )
+        triton_client.update_trace_settings(
+            settings={"trace_rate": "56", "trace_count": "78", "trace_level": ["OFF"]}
+        )
+
+        expected_global_settings = {
+            "trace_file": "global_unittest.log",
+            "trace_level": ["OFF"],
+            "trace_rate": "1",
+            "trace_count": "-1",
+            "log_frequency": "0",
+            "trace_mode": "triton",
+        }
+        expected_first_model_settings = {
+            "trace_file": "global_unittest.log",
+            "trace_level": ["OFF"],
+            "trace_rate": "12",
+            "trace_count": "-1",
+            "log_frequency": "34",
+            "trace_mode": "triton",
+        }
+        expected_second_model_settings = {
+            "trace_file": "global_unittest.log",
+            "trace_level": ["OFF"],
+            "trace_rate": "1",
+            "trace_count": "-1",
+            "log_frequency": "34",
+            "trace_mode": "triton",
+        }
+        global_clear_settings = {"trace_rate": None, "trace_count": None}
+        model_clear_settings = {"trace_rate": None, "trace_level": None}
+
+        # Clear global
+        self.assertEqual(
+            expected_global_settings,
+            triton_client.update_trace_settings(settings=global_clear_settings),
+            "Unexpected cleared global trace settings",
+        )
+        self.assertEqual(
+            expected_first_model_settings,
+            triton_client.get_trace_settings(model_name="simple"),
+            "Unexpected model trace settings after global clear",
+        )
+        self.assertEqual(
+            expected_second_model_settings,
+            triton_client.update_trace_settings(
+                model_name="simple", settings=model_clear_settings
+            ),
+            "Unexpected model trace settings after model clear",
+        )
+        self.assertEqual(
+            expected_global_settings,
+            triton_client.get_trace_settings(),
+            "Unexpected global trace settings after model clear",
+        )
+
+    def test_grpc_clear_settings(self):
+        # Clear global and model trace settings in order,
+        # and expect the default / global trace settings are
+        # propagated properly.
+        self.check_server_initial_state()
+
+        # First set up the model / global trace setting that:
+        # model 'simple' has 'trace_rate' and 'log_frequency' specified
+        # global has 'trace_level', 'trace_count' and 'trace_rate' specified
+        triton_client = grpcclient.InferenceServerClient("localhost:8001")
+        triton_client.update_trace_settings(
+            model_name="simple", settings={"trace_rate": "12", "log_frequency": "34"}
+        )
+        triton_client.update_trace_settings(
+            settings={"trace_rate": "56", "trace_count": "78", "trace_level": ["OFF"]}
+        )
+
+        expected_global_settings = grpcclient.service_pb2.TraceSettingResponse()
+        json_format.Parse(
+            json.dumps(
+                {
+                    "settings": {
+                        "trace_file": {"value": ["global_unittest.log"]},
+                        "trace_level": {"value": ["OFF"]},
+                        "trace_mode": {"value": ["triton"]},
+                        "trace_rate": {"value": ["1"]},
+                        "trace_count": {"value": ["-1"]},
+                        "log_frequency": {"value": ["0"]},
+                    }
+                }
+            ),
+            expected_global_settings,
+        )
+        expected_first_model_settings = grpcclient.service_pb2.TraceSettingResponse()
+        json_format.Parse(
+            json.dumps(
+                {
+                    "settings": {
+                        "trace_file": {"value": ["global_unittest.log"]},
+                        "trace_level": {"value": ["OFF"]},
+                        "trace_rate": {"value": ["12"]},
+                        "trace_count": {"value": ["-1"]},
+                        "log_frequency": {"value": ["34"]},
+                        "trace_mode": {"value": ["triton"]},
+                    }
+                }
+            ),
+            expected_first_model_settings,
+        )
+        expected_second_model_settings = grpcclient.service_pb2.TraceSettingResponse()
+        json_format.Parse(
+            json.dumps(
+                {
+                    "settings": {
+                        "trace_file": {"value": ["global_unittest.log"]},
+                        "trace_level": {"value": ["OFF"]},
+                        "trace_rate": {"value": ["1"]},
+                        "trace_count": {"value": ["-1"]},
+                        "log_frequency": {"value": ["34"]},
+                        "trace_mode": {"value": ["triton"]},
+                    }
+                }
+            ),
+            expected_second_model_settings,
+        )
+
+        global_clear_settings = {"trace_rate": None, "trace_count": None}
+        model_clear_settings = {"trace_rate": None, "trace_level": None}
+
+        # Clear global
+        self.assertEqual(
+            expected_global_settings,
+            triton_client.update_trace_settings(settings=global_clear_settings),
+            "Unexpected cleared global trace settings",
+        )
+        self.assertEqual(
+            expected_first_model_settings,
+            triton_client.get_trace_settings(model_name="simple"),
+            "Unexpected model trace settings after global clear",
+        )
+        self.assertEqual(
+            expected_second_model_settings,
+            triton_client.update_trace_settings(
+                model_name="simple", settings=model_clear_settings
+            ),
+            "Unexpected model trace settings after model clear",
+        )
+        self.assertEqual(
+            expected_global_settings,
+            triton_client.get_trace_settings(),
+            "Unexpected global trace settings after model clear",
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_triton_repo_agent/models/chain_relocation/config.pbtxt b/qa/L0_triton_repo_agent/models/chain_relocation/config.pbtxt
new file mode 100644
index 0000000000..8c8cc8287c
--- /dev/null
+++ b/qa/L0_triton_repo_agent/models/chain_relocation/config.pbtxt
@@ -0,0 +1,49 @@
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+model_repository_agents
+{
+  agents [
+    {
+      name: "relocation",
+      parameters [
+        {
+          key: "empty_config",
+          value: "false"
+        }
+      ]
+    },
+    {
+      name: "relocation",
+      parameters [
+        {
+          key: "empty_config",
+          value: "true"
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/qa/L0_triton_repo_agent/models/relocation_sanity_check/config.pbtxt b/qa/L0_triton_repo_agent/models/relocation_sanity_check/config.pbtxt
new file mode 100644
index 0000000000..9478be13cb
--- /dev/null
+++ b/qa/L0_triton_repo_agent/models/relocation_sanity_check/config.pbtxt
@@ -0,0 +1,40 @@
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+model_repository_agents
+{
+  agents [
+    {
+      name: "relocation",
+      parameters [
+        {
+          key: "empty_config",
+          value: "true"
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/qa/L0_triton_repo_agent/test.sh b/qa/L0_triton_repo_agent/test.sh
new file mode 100755
index 0000000000..7dbc559891
--- /dev/null
+++ b/qa/L0_triton_repo_agent/test.sh
@@ -0,0 +1,100 @@
+#!/bin/bash
+# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+source ../common/util.sh
+
+RET=0
+
+TEST_LOG="./triton_repo_agent_test.log"
+TRITON_REPO_AGENT_TEST=./repo_agent_test
+
+
+export CUDA_VISIBLE_DEVICES=0
+
+rm -fr *.log
+
+set +e
+$TRITON_REPO_AGENT_TEST >>$TEST_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Repo Agent Unit Test Failed\n***"
+    RET=1
+fi
+set -e
+
+rm -rf /opt/tritonserver/repoagents/relocation
+mkdir -p /opt/tritonserver/repoagents/relocation &&
+    cp libtritonrepoagent_relocation.so /opt/tritonserver/repoagents/relocation/.
+
+SERVER=/opt/tritonserver/bin/tritonserver
+
+SERVER_ARGS="--model-repository=`pwd`/models"
+SERVER_LOG="./inference_server.log"
+run_server
+if [ "$SERVER_PID" != "0" ]; then
+    kill $SERVER_PID
+    wait $SERVER_PID
+
+    echo -e "\n***\n*** Expect fail to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+grep "Poll failed for model directory 'relocation_sanity_check': Relocation repoagent expects config does not contain 'model_repository_agents' field when 'empty_config' has value 'true' for relocation agent" $SERVER_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed. Expected repo agent of 'relocation_sanity_check' returns error on load\n***"
+    RET=1
+fi
+grep "Poll failed for model directory 'chain_relocation': Relocation repoagent" $SERVER_LOG
+if [ $? -eq 0 ]; then
+    echo -e "\n***\n*** Failed. Expected repo agent of 'chain_relocation' returns success on load\n***"
+    RET=1
+fi
+set -e
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    cat $TEST_LOG
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_trt_compat/test.sh b/qa/L0_trt_compat/test.sh
new file mode 100755
index 0000000000..6b4f83cbc8
--- /dev/null
+++ b/qa/L0_trt_compat/test.sh
@@ -0,0 +1,110 @@
+#!/bin/bash
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+
+TEST_RESULT_FILE='test_results.txt'
+COMPATIBILITY_TEST_PY=trt_compatibility_test.py
+CLIENT_LOG="client.log"
+DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"}
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=`pwd`/models --exit-timeout-secs=120"
+SERVER_LOG="./inference_server.log"
+source ../common/util.sh
+
+rm -fr models && mkdir models
+cp -r $DATADIR/qa_identity_model_repository/plan_compatible_zero_1_float32 models/.
+
+RET=0
+
+if [ `ps | grep -c "tritonserver"` != "0" ]; then
+    echo -e "Tritonserver already running"
+    echo -e `ps | grep tritonserver`
+    exit 1
+fi
+
+run_server
+if [ "$SERVER_PID" != "0" ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** FAILED: unexpected server start (version compatibility disabled): $SERVER\n***" >> $CLIENT_LOG
+    kill $SERVER_PID
+    wait $SERVER_PID
+    exit 1
+fi
+
+EXPECTED_ERR="Internal Error (Cannot deserialize engine with lean runtime"
+if ! grep "$EXPECTED_ERR" $SERVER_LOG; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Failed to find expected error: ${EXPECTED_ERR} \n***"
+    RET=1
+fi
+
+SERVER_ARGS="--model-repository=`pwd`/models --exit-timeout-secs=120 --backend-config=tensorrt,version-compatible=true"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** FAILED: unsuccessful server start (version compatibility enabled): $SERVER\n***"
+    exit 1
+fi
+
+set +e
+
+python $COMPATIBILITY_TEST_PY >$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+else
+  echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_trt_compat/trt_compatibility_test.py b/qa/L0_trt_compat/trt_compatibility_test.py
new file mode 100755
index 0000000000..6991299a4c
--- /dev/null
+++ b/qa/L0_trt_compat/trt_compatibility_test.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python3
+
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import unittest
+
+import infer_util as iu
+import numpy as np
+import test_util as tu
+
+
+class TrtCompatibilityTest(tu.TestResultCollector):
+    def setUp(self):
+        self._data_type = np.float32
+
+    def test_plan(self):
+        # plan_compatible_zero_1_float32 is an identity model with input shape [-1]
+        iu.infer_zero(self, "plan_compatible", 1, self._data_type, [[2, 4]], [[2, 4]])
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_trt_data_dependent_shape/test.sh b/qa/L0_trt_data_dependent_shape/test.sh
new file mode 100755
index 0000000000..61efb053f8
--- /dev/null
+++ b/qa/L0_trt_data_dependent_shape/test.sh
@@ -0,0 +1,94 @@
+#!/bin/bash
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+TEST_RESULT_FILE='test_results.txt'
+export CUDA_VISIBLE_DEVICES=0
+
+TRT_TEST=trt_data_dependent_shape_test.py
+
+DATADIR="./models"
+
+rm -rf ${DATADIR}
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_trt_data_dependent_model_repository/ ${DATADIR}
+
+source ../common/util.sh
+
+rm -f *.log*
+
+RET=0
+
+CLIENT_LOG="./client.log"
+SERVER_LOG="./inference_server.log"
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=$DATADIR"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python $TRT_TEST >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    cat $CLIENT_LOG
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 2
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+else
+  echo -e "\n***\n*** Test Failed\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_trt_data_dependent_shape/trt_data_dependent_shape_test.py b/qa/L0_trt_data_dependent_shape/trt_data_dependent_shape_test.py
new file mode 100755
index 0000000000..ee0b675d84
--- /dev/null
+++ b/qa/L0_trt_data_dependent_shape/trt_data_dependent_shape_test.py
@@ -0,0 +1,85 @@
+#!/usr/bin/env python3
+
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import unittest
+
+import numpy as np
+import test_util as tu
+import tritonclient.http as client
+
+
+class TrtDataDependentShapeTest(tu.TestResultCollector):
+    def setUp(self):
+        self.triton_client = client.InferenceServerClient(
+            "localhost:8000", verbose=True
+        )
+
+    def test_fixed(self):
+        model_name = "plan_nobatch_nonzero_fixed"
+        input_np = np.arange(16, dtype=np.int32).reshape((4, 4))
+        expected_output_np = np.nonzero(input_np)
+
+        inputs = []
+        inputs.append(client.InferInput("INPUT", [4, 4], "INT32"))
+        inputs[-1].set_data_from_numpy(input_np)
+
+        results = self.triton_client.infer(model_name=model_name, inputs=inputs)
+        # Validate the results by comparing with precomputed values.
+        output_np = results.as_numpy("OUTPUT")
+        self.assertTrue(
+            np.array_equal(output_np, expected_output_np),
+            "OUTPUT expected: {}, got {}".format(expected_output_np, output_np),
+        )
+
+    def test_dynamic(self):
+        model_name = "plan_nobatch_nonzero_dynamic"
+        input_data = []
+        for i in range(20 * 16):
+            input_data.append(i if (i % 2) == 0 else 0)
+        input_np = np.array(input_data, dtype=np.int32).reshape((20, 16))
+        expected_output_np = np.nonzero(input_np)
+
+        inputs = []
+        inputs.append(client.InferInput("INPUT", [20, 16], "INT32"))
+        inputs[-1].set_data_from_numpy(input_np)
+
+        results = self.triton_client.infer(model_name=model_name, inputs=inputs)
+        # Validate the results by comparing with precomputed values.
+        output_np = results.as_numpy("OUTPUT")
+        self.assertTrue(
+            np.array_equal(output_np, expected_output_np),
+            "OUTPUT expected: {}, got {}".format(expected_output_np, output_np),
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_trt_dla/dla_test.py b/qa/L0_trt_dla/dla_test.py
new file mode 100755
index 0000000000..d71d277ac4
--- /dev/null
+++ b/qa/L0_trt_dla/dla_test.py
@@ -0,0 +1,100 @@
+#!/usr/bin/env python
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import unittest
+
+import numpy as np
+import test_util as tu
+import tritonclient.http as httpclient
+from PIL import Image
+
+
+class InferTest(tu.TestResultCollector):
+    def _preprocess(self, img, dtype):
+        """
+        Pre-process an image to meet the size and type
+        requirements specified by the parameters.
+        """
+
+        sample_img = img.convert("RGB")
+        resized_img = sample_img.resize((224, 224), Image.BILINEAR)
+        resized = np.array(resized_img)
+
+        typed = resized.astype(dtype)
+        scaled = typed - np.asarray((123, 117, 104), dtype=dtype)
+        ordered = np.transpose(scaled, (2, 0, 1))
+
+        return ordered
+
+    def test_resnet50(self):
+        try:
+            triton_client = httpclient.InferenceServerClient(url="localhost:8000")
+        except Exception as e:
+            print("channel creation failed: " + str(e))
+            sys.exit(1)
+
+        image_filename = "../images/vulture.jpeg"
+        model_name = "resnet50_plan"
+        batch_size = 32
+
+        img = Image.open(image_filename)
+        image_data = self._preprocess(img, np.int8)
+        image_data = np.expand_dims(image_data, axis=0)
+
+        batched_image_data = image_data
+        for i in range(1, batch_size):
+            batched_image_data = np.concatenate(
+                (batched_image_data, image_data), axis=0
+            )
+
+        inputs = [
+            httpclient.InferInput("input_tensor_0", [batch_size, 3, 224, 224], "INT8")
+        ]
+        inputs[0].set_data_from_numpy(batched_image_data, binary_data=True)
+
+        outputs = [
+            httpclient.InferRequestedOutput("topk_layer_output_index", binary_data=True)
+        ]
+
+        results = triton_client.infer(model_name, inputs, outputs=outputs)
+
+        output_data = results.as_numpy("topk_layer_output_index")
+        print(output_data)
+
+        # Validate the results by comparing with precomputed values.
+        # VULTURE class corresponds with index 23
+        EXPECTED_CLASS_INDEX = 23
+        for i in range(batch_size):
+            self.assertEqual(output_data[i][0][0], EXPECTED_CLASS_INDEX)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_trt_dla/test.sh b/qa/L0_trt_dla/test.sh
new file mode 100755
index 0000000000..5c57d447c4
--- /dev/null
+++ b/qa/L0_trt_dla/test.sh
@@ -0,0 +1,92 @@
+#!/bin/bash
+# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+# Need to run on only one device since only creating a single
+# PLAN. Without this test will fail on a heterogeneous system.
+export CUDA_VISIBLE_DEVICES=0
+
+# Only need to set paths for jetson since this test runs only on jetson
+TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
+DLA_TEST=./dla_test.py
+
+DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"}
+SERVER=${TRITON_DIR}/bin/tritonserver
+BACKEND_DIR=${TRITON_DIR}/backends
+
+SERVER_ARGS="--model-repository=`pwd`/models --exit-timeout-secs=120 --backend-directory=${BACKEND_DIR}"
+SERVER_LOG="./inference_server.log"
+source ../common/util.sh
+
+rm -fr models && mkdir models
+cp -r $DATADIR/trt_dla_model_store/resnet50_plan models/.
+rm -f *.log
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+RET=0
+CLIENT_LOG=client.log
+
+set +e
+
+python3 $DLA_TEST >> $CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    RET=1
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+rm -rf models
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_trt_dynamic_shape/test.sh b/qa/L0_trt_dynamic_shape/test.sh
new file mode 100755
index 0000000000..43a39dd199
--- /dev/null
+++ b/qa/L0_trt_dynamic_shape/test.sh
@@ -0,0 +1,407 @@
+#!/bin/bash
+# Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+TEST_RESULT_FILE='test_results.txt'
+export CUDA_VISIBLE_DEVICES=0
+
+CLIENT_LOG="./client.log"
+PERF_CLIENT=../clients/perf_client
+TRT_OP_TEST=trt_dynamic_shape_test.py
+
+DATADIR="./models"
+
+rm -rf ${DATADIR}
+mkdir -p ${DATADIR}
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/plan_float32_float32_float32-4-32 ${DATADIR}/
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=$DATADIR"
+SERVER_LOG="./inference_server.log"
+source ../common/util.sh
+
+rm -f *.log*
+
+RET=0
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+# Shape beyond the limits of optimization profile
+set +e
+$PERF_CLIENT -v -i grpc -u localhost:8001 -m plan_float32_float32_float32-4-32 --shape INPUT0:33 --shape INPUT1:33 -t 1 -p2000 -b 1 > ${CLIENT_LOG}_max 2>&1
+if [ $? -eq 0 ]; then
+    cat ${CLIENT_LOG}_max
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+EXPECTED_MESSAGE="model expected the shape of dimension 1 to be between 4 and 32 but received"
+if [ $(cat ${CLIENT_LOG}_max | grep "${EXPECTED_MESSAGE} 33" | wc -l) -eq 0 ]; then
+    cat ${CLIENT_LOG}_max
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+$PERF_CLIENT -v -i grpc -u localhost:8001 -m plan_float32_float32_float32-4-32 --shape INPUT0:3 --shape INPUT1:3 -t 1 -p2000 -b 1 > ${CLIENT_LOG}_min 2>&1
+if [ $? -eq 0 ]; then
+    cat ${CLIENT_LOG}_min
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+if [ $(cat ${CLIENT_LOG}_min | grep "${EXPECTED_MESSAGE} 3" | wc -l) -eq 0 ]; then
+    cat ${CLIENT_LOG}_min
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Tests with multiple optimization profiles
+
+# plan_float32_float32_float32 models with dynamic shapes has 9 profiles
+# min, opt, max, idx
+# [1, 1], [1, 16], [8, 33], 0 (*)
+# [1, 1], [2, 16], [7, 32], 1
+# [1, 1], [3, 16], [6, 32], 2
+# [1, 1], [4, 16], [5, 32], 3
+# [5, 1], [6, 16], [8, 32], 4 (*)
+# [6, 1], [6, 16], [8, 32], 5 (*)
+# [1, 1], [1, 16], [8, 32], 6
+# [1, 33], [1, 33], [1, 33], 7 (static shapes)
+# [3, 33], [3, 33], [3, 33], 8 (static shapes)
+# [5, 33], [5, 33], [5, 33], 9 (static shapes)
+rm -rf ${DATADIR} && rm -f config.pbtxt && mkdir -p ${DATADIR}
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/plan_float32_float32_float32 ${DATADIR}/
+
+# Keep a copy of original model config for different modifications
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/plan_float32_float32_float32/config.pbtxt .
+
+# TrtDynamicShapeTest.test_load_specific_optimization_profile
+CLIENT_LOG="./test_load_specific_optimization_profile.client.log"
+SERVER_LOG="./test_load_specific_optimization_profile.inference_server.log"
+cp config.pbtxt ${DATADIR}/plan_float32_float32_float32/config.pbtxt && \
+sed -i "s/profile:.*/profile: [\"5\"]/" ${DATADIR}/plan_float32_float32_float32/config.pbtxt
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python $TRT_OP_TEST TrtDynamicShapeTest.test_load_specific_optimization_profile >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    cat $CLIENT_LOG
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# TrtDynamicShapeTest.test_load_default_optimization_profile
+CLIENT_LOG="./test_load_default_optimization_profile.client.log"
+SERVER_LOG="./test_load_default_optimization_profile.inference_server.log"
+cp config.pbtxt ${DATADIR}/plan_float32_float32_float32/config.pbtxt && \
+sed -i "s/profile:.*//" ${DATADIR}/plan_float32_float32_float32/config.pbtxt
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python $TRT_OP_TEST TrtDynamicShapeTest.test_load_default_optimization_profile >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    cat $CLIENT_LOG
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# TrtDynamicShapeTest.test_select_optimization_profile
+# Note that this test needs to check server log for which OP is used
+#
+# finding OP that best fit the input shape:
+#     load OP 0, 1, 2, 3, send [4 16] and 3 should be used
+SERVER_ARGS="--model-repository=$DATADIR --log-verbose=1"
+CLIENT_LOG="./test_select_optimization_profile.client.best.log"
+SERVER_LOG="./test_select_optimization_profile.inference_server.best.log"
+(cp config.pbtxt ${DATADIR}/plan_float32_float32_float32/config.pbtxt && \
+        sed -i "s/max_batch_size:.*/max_batch_size: 5/" ${DATADIR}/plan_float32_float32_float32/config.pbtxt && \
+        sed -i "s/profile:.*/profile: [\"0\", \"1\", \"2\", \"3\"]/" ${DATADIR}/plan_float32_float32_float32/config.pbtxt)
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python $TRT_OP_TEST TrtDynamicShapeTest.test_select_optimization_profile >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+set +e
+grep "Context with profile 3 \[3\] is being executed for " test_select_optimization_profile.inference_server.best.log
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed. Expected profile 3 is used\n***"
+    RET=1
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# finding OP that best fit the input shape while the input shape is allowed:
+#     load OP 0, 5, send [4 16] and 0 should be used
+#     (OP 5 is the best in terms of OPT dims, but it requires min dims [6, 1])
+CLIENT_LOG="./test_select_optimization_profile.client.allow.log"
+SERVER_LOG="./test_select_optimization_profile.inference_server.allow.log"
+cp config.pbtxt ${DATADIR}/plan_float32_float32_float32/config.pbtxt && \
+sed -i "s/profile:.*/profile: [\"0\", \"5\"]/" ${DATADIR}/plan_float32_float32_float32/config.pbtxt
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python $TRT_OP_TEST TrtDynamicShapeTest.test_select_optimization_profile >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    cat $CLIENT_LOG
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+set +e
+grep "Context with profile 0 \[0\] is being executed for " test_select_optimization_profile.inference_server.allow.log
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed. Expected profile 0 is used\n***"
+    RET=1
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# TrtDynamicShapeTest.test_load_wrong_optimization_profile
+SERVER_ARGS="--model-repository=$DATADIR --exit-on-error=false --strict-readiness=false"
+CLIENT_LOG="./test_load_wrong_optimization_profile.client.log"
+SERVER_LOG="./test_load_wrong_optimization_profile.inference_server.log"
+cp config.pbtxt ${DATADIR}/plan_float32_float32_float32/config.pbtxt && \
+sed -i "s/profile:.*/profile: [\"100\"]/" ${DATADIR}/plan_float32_float32_float32/config.pbtxt
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python $TRT_OP_TEST TrtDynamicShapeTest.test_load_wrong_optimization_profile >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    cat $CLIENT_LOG
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+
+# Adding test cases for multiple optimization profiles with static shapes.
+# Will load only the following profiles with the static shapes:
+# Profile 7: [1, 33]
+# Profile 8: [3, 33]
+# Profile 9: [5, 33]
+(cd  ${DATADIR}/plan_float32_float32_float32/ && \
+            rm -f config.pbtxt && \
+            echo "instance_group { profile : [\"7\", \"8\", \"9\" ] }" >> config.pbtxt)
+SERVER_ARGS="--model-repository=$DATADIR --strict-model-config=false"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+# Shape beyond the limits of optimization profile
+set +e
+$PERF_CLIENT -v -i grpc -u localhost:8001 -m plan_float32_float32_float32 --shape INPUT0:33 --shape INPUT1:33 -t 1 -p2000 -b 5 > ${CLIENT_LOG}_static_pass 2>&1
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}_static_pass
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+$PERF_CLIENT -v -i grpc -u localhost:8001 -m plan_float32_float32_float32 --shape INPUT0:33 --shape INPUT1:33 -t 1 -p2000 -b 6 > ${CLIENT_LOG}_static_fail 2>&1
+if [ $? -eq 0 ]; then
+    ${CLIENT_LOG}_static_fail
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+if [ $(cat ${CLIENT_LOG}_static_fail | grep "inference request batch-size must be <= 5" | wc -l) -eq 0 ]; then
+    cat ${CLIENT_LOG}_static_fail
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+$PERF_CLIENT -v -i grpc -u localhost:8001 -m plan_float32_float32_float32 --shape INPUT0:33 --shape INPUT1:33 -t 1 -p2000 -b 2 > ${CLIENT_LOG}_static_bs_2 2>&1
+if [ $? -eq 0 ]; then
+    ${CLIENT_LOG}_static_bs_2
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+if [ $(cat ${CLIENT_LOG}_static_bs_2 | grep "model expected the shape of dimension 0 to be between 1 and 1 but received 2" | wc -l) -eq 0 ]; then
+    cat ${CLIENT_LOG}_static_bs_2
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Tests for multiple optimization profile with static shapes and dynamic batching.
+# Profile 10: [1, 1], [1, 16], [1, 33]
+# Profile 11: [2, 1], [2, 16], [2, 33]
+# Profile 12: [3, 1], [3, 16], [3, 33]
+# Profile 13: [4, 1], [4, 16], [4, 33]
+# Profile 14: [5, 1], [5, 16], [5, 33]
+# Profile 15: [6, 1], [6, 16], [6, 33]
+# Profile 16: [7, 1], [7, 16], [7, 33]
+# Profile 17: [8, 1], [8, 16], [8, 33]
+
+(cd  ${DATADIR}/plan_float32_float32_float32/ && \
+            rm -f config.pbtxt && \
+            echo "instance_group { profile : [" >> config.pbtxt && \
+            for i in {10..16}; do echo "\"${i}\"," >> config.pbtxt; done && \
+            echo " \"17\"] }" >> config.pbtxt && \
+            echo "dynamic_batching {}" >> config.pbtxt)
+
+SERVER_ARGS="--model-repository=$DATADIR --strict-model-config=false"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+$PERF_CLIENT -v -i grpc -u localhost:8001 -m plan_float32_float32_float32 --shape INPUT0:33 --shape INPUT1:33 -t 16 -p2000 > ${CLIENT_LOG}_db_pass 2>&1
+if [ $? -ne 0 ]; then
+    cat ${CLIENT_LOG}_db_pass
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+else
+  echo -e "\n***\n*** Test Failed\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_trt_dynamic_shape/trt_dynamic_shape_test.py b/qa/L0_trt_dynamic_shape/trt_dynamic_shape_test.py
new file mode 100755
index 0000000000..d9f890d9b6
--- /dev/null
+++ b/qa/L0_trt_dynamic_shape/trt_dynamic_shape_test.py
@@ -0,0 +1,140 @@
+#!/usr/bin/env python3
+
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import unittest
+
+import infer_util as iu
+import numpy as np
+import test_util as tu
+import tritonhttpclient
+from tritonclientutils import InferenceServerException
+
+
+class TrtDynamicShapeTest(tu.TestResultCollector):
+    def setUp(self):
+        self.dtype_ = np.float32
+        self.model_name_ = "plan"
+
+    def test_load_specific_optimization_profile(self):
+        # Only OP 5 should be available, which only allow batch size 8
+        tensor_shape = (1,)
+        try:
+            iu.infer_exact(
+                self,
+                self.model_name_,
+                (1,) + tensor_shape,
+                1,
+                self.dtype_,
+                self.dtype_,
+                self.dtype_,
+            )
+        except InferenceServerException as ex:
+            self.assertTrue(
+                "model expected the shape of dimension 0 to be between 6 and 8 but received 1"
+                in ex.message()
+            )
+
+        try:
+            iu.infer_exact(
+                self,
+                self.model_name_,
+                (8,) + tensor_shape,
+                8,
+                self.dtype_,
+                self.dtype_,
+                self.dtype_,
+            )
+        except InferenceServerException as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_load_default_optimization_profile(self):
+        # Only default OP (OP 0) has max tensor shape 33
+        tensor_shape = (33,)
+
+        try:
+            iu.infer_exact(
+                self,
+                self.model_name_,
+                (8,) + tensor_shape,
+                8,
+                self.dtype_,
+                self.dtype_,
+                self.dtype_,
+            )
+        except InferenceServerException as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+        over_tensor_shape = (34,)
+        try:
+            iu.infer_exact(
+                self,
+                self.model_name_,
+                (8,) + over_tensor_shape,
+                8,
+                self.dtype_,
+                self.dtype_,
+                self.dtype_,
+            )
+        except InferenceServerException as ex:
+            self.assertTrue(
+                "model expected the shape of dimension 1 to be between 1 and 33 but received 34"
+                in ex.message()
+            )
+
+    def test_select_optimization_profile(self):
+        # Different profile has different optimized input shape
+        batch_size = 4
+        tensor_shape = (16,)
+        try:
+            iu.infer_exact(
+                self,
+                self.model_name_,
+                (batch_size,) + tensor_shape,
+                batch_size,
+                self.dtype_,
+                self.dtype_,
+                self.dtype_,
+            )
+        except InferenceServerException as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_load_wrong_optimization_profile(self):
+        client = tritonhttpclient.InferenceServerClient("localhost:8000")
+        model_name = tu.get_model_name(
+            self.model_name_, self.dtype_, self.dtype_, self.dtype_
+        )
+        model_status = client.is_model_ready(model_name, "1")
+        self.assertFalse(model_status, "expected model to be not ready")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_trt_error_propagation/test.sh b/qa/L0_trt_error_propagation/test.sh
new file mode 100755
index 0000000000..dac3f6349e
--- /dev/null
+++ b/qa/L0_trt_error_propagation/test.sh
@@ -0,0 +1,82 @@
+#!/bin/bash
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+export CUDA_VISIBLE_DEVICES=0
+SERVER=/opt/tritonserver/bin/tritonserver
+source ../common/util.sh
+
+# Create TensorRT model with invalid plan file
+rm -rf models && mkdir models
+mkdir models/invalid_plan_file && (cd models/invalid_plan_file && \
+    echo -e "name: \"invalid_plan_file\"" >> config.pbtxt && \
+    echo -e "platform: \"tensorrt_plan\"" >> config.pbtxt && \
+    echo -e "input [\n {\n name: \"INPUT\"\n data_type: TYPE_FP32\n dims: [-1]\n }\n ]" >> config.pbtxt && \
+    echo -e "output [\n {\n name: \"OUTPUT\"\n data_type: TYPE_FP32\n dims: [-1]\n }\n ]" >> config.pbtxt && \
+    mkdir 1 && echo "----- invalid model.plan -----" >> 1/model.plan)
+
+# Test with and without auto complete enabled
+for ENABLE_AUTOCOMPLETE in "YES" "NO"; do
+
+    if [[ "$ENABLE_AUTOCOMPLETE" == "YES" ]]; then
+        TEST_NAME="test_invalid_trt_model_autocomplete"
+        SERVER_ARGS="--model-repository=models --model-control-mode=explicit"
+    else
+        TEST_NAME="test_invalid_trt_model"
+        SERVER_ARGS="--model-repository=models --model-control-mode=explicit --disable-auto-complete-config"
+    fi
+
+    SERVER_LOG="./$TEST_NAME.server.log"
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    RET=0
+
+    set +e
+    python trt_error_propagation_test.py TestTrtErrorPropagation.$TEST_NAME > $TEST_NAME.log 2>&1
+    if [ $? -ne 0 ]; then
+        cat $TEST_NAME.log
+        echo -e "\n***\n*** Test FAILED\n***"
+        RET=1
+    fi
+    set -e
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+
+    if [ $RET -ne 0 ]; then
+        exit $RET
+    fi
+
+done
+
+# Exit with success
+echo -e "\n***\n*** Test Passed\n***"
+exit 0
diff --git a/qa/L0_trt_error_propagation/trt_error_propagation_test.py b/qa/L0_trt_error_propagation/trt_error_propagation_test.py
new file mode 100755
index 0000000000..83527a7533
--- /dev/null
+++ b/qa/L0_trt_error_propagation/trt_error_propagation_test.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python3
+
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import unittest
+
+import tritonclient.grpc as grpcclient
+from tritonclient.utils import InferenceServerException
+
+
+class TestTrtErrorPropagation(unittest.TestCase):
+    def setUp(self):
+        # Initialize client
+        self.__triton = grpcclient.InferenceServerClient("localhost:8001", verbose=True)
+
+    def test_invalid_trt_model(self):
+        with self.assertRaises(InferenceServerException) as cm:
+            self.__triton.load_model("invalid_plan_file")
+        err_msg = str(cm.exception)
+        # All 'expected_msg_parts' should be present in the 'err_msg' in order
+        expected_msg_parts = [
+            "load failed for model",
+            "version 1 is at UNAVAILABLE state: ",
+            "Internal: unable to create TensorRT engine: ",
+            "Error Code ",
+            "Internal Error ",
+        ]
+        for expected_msg_part in expected_msg_parts:
+            self.assertIn(
+                expected_msg_part,
+                err_msg,
+                "Cannot find an expected part of error message",
+            )
+            _, err_msg = err_msg.split(expected_msg_part)
+
+    def test_invalid_trt_model_autocomplete(self):
+        with self.assertRaises(InferenceServerException) as cm:
+            self.__triton.load_model("invalid_plan_file")
+        err_msg = str(cm.exception)
+        self.assertIn(
+            "Internal: unable to load plan file to auto complete config",
+            err_msg,
+            "Caught an unexpected exception",
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_trt_plugin/test.sh b/qa/L0_trt_plugin/test.sh
new file mode 100755
index 0000000000..7ffc7e215d
--- /dev/null
+++ b/qa/L0_trt_plugin/test.sh
@@ -0,0 +1,254 @@
+#!/bin/bash
+# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+TEST_RESULT_FILE='test_results.txt'
+export CUDA_VISIBLE_DEVICES=0
+
+CLIENT_LOG="./client.log"
+PLUGIN_TEST=trt_plugin_test.py
+
+# On windows the paths invoked by the script (running in WSL) must use
+# /mnt/c when needed but the paths on the tritonserver command-line
+# must be C:/ style.
+if [[ "$(< /proc/sys/kernel/osrelease)" == *microsoft* ]]; then
+    DATADIR=${DATADIR:="/mnt/c/data/inferenceserver/${REPO_VERSION}"}
+    MODELDIR=${MODELDIR:=C:/models}
+    CUSTOMPLUGIN=${CUSTOMPLUGIN:=$MODELDIR/clipplugin.dll}
+    BACKEND_DIR=${BACKEND_DIR:=C:/tritonserver/backends}
+    SERVER=${SERVER:=/mnt/c/tritonserver/bin/tritonserver.exe}
+else
+    DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"}
+    MODELDIR=${MODELDIR:=`pwd`/models}
+    CUSTOMPLUGIN=${CUSTOMPLUGIN:=$MODELDIR/libclipplugin.so}
+    TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
+    BACKEND_DIR=${TRITON_DIR}/backends
+    SERVER=${TRITON_DIR}/bin/tritonserver
+fi
+
+source ../common/util.sh
+
+RET=0
+rm -f ./*.log
+
+SERVER_ARGS_BASE="--model-repository=${MODELDIR} --backend-directory=${BACKEND_DIR} --log-verbose=1"
+SERVER_TIMEOUT=20
+
+LOG_IDX=0
+
+## Default Plugin Tests
+
+## Create model folder with default plugin models
+rm -fr models && mkdir -p models
+set -e
+find $DATADIR/qa_trt_plugin_model_repository/ -mindepth 1 -maxdepth 1 ! -iname '*clipplugin*' -exec cp -rv {} models \;
+
+SERVER_ARGS=$SERVER_ARGS_BASE
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+rm -f $CLIENT_LOG
+set +e
+python3 $PLUGIN_TEST PluginModelTest.test_raw_fff_gelu >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+rm -f $CLIENT_LOG
+python3 $PLUGIN_TEST PluginModelTest.test_raw_fff_norm >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+kill_server
+
+## Custom Plugin Tests
+
+## Create model folder with custom plugin models for remaining tests
+rm -fr models && mkdir -p models
+find $DATADIR/qa_trt_plugin_model_repository/ -maxdepth 1 -iname '*clipplugin*' -exec cp -r {} models \;
+
+LOG_IDX=$((LOG_IDX+1))
+
+## Baseline Failure Test
+## Plugin library not loaded
+SERVER_ARGS=$SERVER_ARGS_BASE
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+
+run_server
+if [ "$SERVER_PID" != "0" ]; then
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Test Failed\n"
+    echo -e "Unexpected successful server start $SERVER\n***"
+    kill_server
+    exit 1
+fi
+
+LOG_IDX=$((LOG_IDX+1))
+
+## Backend Config, Single Plugin Test
+SERVER_ARGS="${SERVER_ARGS_BASE} --backend-config=tensorrt,plugins=${CUSTOMPLUGIN}"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+rm -f $CLIENT_LOG
+set +e
+python3 $PLUGIN_TEST PluginModelTest.test_raw_fff_clip >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+kill_server
+
+LOG_IDX=$((LOG_IDX+1))
+
+## Backend Config, Multiple Plugins Test
+SERVER_ARGS="${SERVER_ARGS_BASE} --backend-config=tensorrt,plugins=${CUSTOMPLUGIN}"
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+rm -f $CLIENT_LOG
+set +e
+python3 $PLUGIN_TEST PluginModelTest.test_raw_fff_clip >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+kill_server
+
+LOG_IDX=$((LOG_IDX+1))
+
+## LD_PRELOAD, Single Plugin Test
+## LD_PRELOAD is only on Linux
+
+SERVER_LD_PRELOAD=$CUSTOMPLUGIN
+SERVER_ARGS=$SERVER_ARGS_BASE
+SERVER_LOG="./inference_server_$LOG_IDX.log"
+
+if [[ "$(< /proc/sys/kernel/osrelease)" != *microsoft* ]]; then
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    rm -f $CLIENT_LOG
+    set +e
+    python3 $PLUGIN_TEST PluginModelTest.test_raw_fff_clip >>$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Failed\n***"
+        RET=1
+    else
+        check_test_results $TEST_RESULT_FILE 1
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Result Verification Failed\n***"
+            RET=1
+        fi
+    fi
+    set -e
+
+    kill_server
+fi
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_trt_plugin/trt_plugin_test.py b/qa/L0_trt_plugin/trt_plugin_test.py
new file mode 100755
index 0000000000..5dcc6318f5
--- /dev/null
+++ b/qa/L0_trt_plugin/trt_plugin_test.py
@@ -0,0 +1,124 @@
+#!/usr/bin/env python3
+
+# Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import os
+import unittest
+
+import numpy as np
+import test_util as tu
+import tritonclient.http as httpclient
+
+# By default, find tritonserver on "localhost", but can be overridden
+# with TRITONSERVER_IPADDR envvar
+_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")
+
+
+class PluginModelTest(tu.TestResultCollector):
+    def _full_exact(self, model_name, plugin_name, shape):
+        print(f"{_tritonserver_ipaddr}:8000")
+        triton_client = httpclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8000")
+
+        inputs = []
+        outputs = []
+        inputs.append(httpclient.InferInput("INPUT0", list(shape), "FP32"))
+
+        input0_data = np.ones(shape=shape).astype(np.float32)
+        inputs[0].set_data_from_numpy(input0_data, binary_data=True)
+
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=True))
+
+        results = triton_client.infer(
+            model_name + "_" + plugin_name, inputs, outputs=outputs
+        )
+
+        output0_data = results.as_numpy("OUTPUT0")
+        tolerance_relative = 1e-6
+        tolerance_absolute = 1e-7
+
+        # Verify values of Clip, GELU, and Normalize
+        if plugin_name == "CustomClipPlugin":
+            # Clip data to minimum of .1, maximum of .5
+            test_output = np.clip(input0_data, 0.1, 0.5)
+            np.testing.assert_allclose(
+                output0_data,
+                test_output,
+                rtol=tolerance_relative,
+                atol=tolerance_absolute,
+            )
+        elif plugin_name == "CustomGeluPluginDynamic":
+            # Add bias
+            input0_data += 1
+            # Calculate Gelu activation
+            test_output = (input0_data * 0.5) * (
+                1 + np.tanh((0.797885 * input0_data) + (0.035677 * (input0_data**3)))
+            )
+            np.testing.assert_allclose(
+                output0_data,
+                test_output,
+                rtol=tolerance_relative,
+                atol=tolerance_absolute,
+            )
+        elif plugin_name == "Normalize_TRT":
+            # L2 norm is sqrt(sum([1]*16)))
+            test_output = input0_data / np.sqrt(sum([1] * 16))
+            np.testing.assert_allclose(
+                output0_data,
+                test_output,
+                rtol=tolerance_relative,
+                atol=tolerance_absolute,
+            )
+        else:
+            self.fail("Unexpected plugin: " + plugin_name)
+
+    def test_raw_fff_clip(self):
+        for bs in (1, 8):
+            self._full_exact(
+                "plan_float32_float32_float32", "CustomClipPlugin", (bs, 16)
+            )
+
+    def test_raw_fff_gelu(self):
+        self._full_exact(
+            "plan_nobatch_float32_float32_float32",
+            "CustomGeluPluginDynamic",
+            (16, 1, 1),
+        )
+
+    def test_raw_fff_norm(self):
+        # model that supports batching
+        for bs in (1, 8):
+            self._full_exact(
+                "plan_float32_float32_float32", "Normalize_TRT", (bs, 16, 16, 16)
+            )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_trt_reformat_free/test.sh b/qa/L0_trt_reformat_free/test.sh
new file mode 100755
index 0000000000..ebdc83a5b8
--- /dev/null
+++ b/qa/L0_trt_reformat_free/test.sh
@@ -0,0 +1,96 @@
+#!/bin/bash
+# Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+TEST_RESULT_FILE='test_results.txt'
+export CUDA_VISIBLE_DEVICES=0
+
+CLIENT_LOG="./client.log"
+TRT_TEST=trt_reformat_free_test.py
+
+DATADIR="./models"
+
+rm -rf ${DATADIR}
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_trt_format_model_repository/ ${DATADIR}
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=$DATADIR"
+source ../common/util.sh
+
+rm -f *.log*
+
+RET=0
+
+# TrtReformatFreeTest
+CLIENT_LOG="./test_reformat_free.client.log"
+SERVER_LOG="./test_reformat_free.inference_server.log"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python $TRT_TEST TrtReformatFreeTest >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    cat $CLIENT_LOG
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 4
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+else
+  echo -e "\n***\n*** Test Failed\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_trt_reformat_free/trt_reformat_free_test.py b/qa/L0_trt_reformat_free/trt_reformat_free_test.py
new file mode 100755
index 0000000000..ea36f9c24a
--- /dev/null
+++ b/qa/L0_trt_reformat_free/trt_reformat_free_test.py
@@ -0,0 +1,279 @@
+#!/usr/bin/env python3
+
+# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import unittest
+from builtins import range
+
+import numpy as np
+import test_util as tu
+import tritonclient.utils.shared_memory as shm
+import tritonhttpclient
+
+
+def div_up(a, b):
+    return (a + b - 1) // b
+
+
+def reformat(format, tensor_np):
+    if format == "CHW2":
+        factor = 2
+    elif format == "CHW32":
+        factor = 32
+    else:
+        raise ValueError(
+            "Unexpected format {} for testing reformat-free input".format(format)
+        )
+    shape = list(tensor_np.shape) + [factor]
+    shape[-4] = div_up(shape[-4], factor)
+    reformatted_tensor_np = np.empty(shape, tensor_np.dtype)
+    if len(tensor_np.shape) == 3:
+        batch = [(tensor_np, reformatted_tensor_np)]
+    elif len(tensor_np.shape) == 4:
+        batch = [
+            (tensor_np[idx], reformatted_tensor_np[idx])
+            for idx in range(tensor_np.shape[0])
+        ]
+    else:
+        raise ValueError(
+            "Unexpected numpy shape {} for testing reformat-free input".format(
+                tensor_np.shape
+            )
+        )
+    for tensor, reformatted_tensor in batch:
+        for c in range(tensor.shape[0]):
+            for h in range(tensor.shape[1]):
+                for w in range(tensor.shape[2]):
+                    reformatted_tensor[c // factor][h][w][c % factor] = tensor[c][h][w]
+    return reformatted_tensor_np
+
+
+class TrtReformatFreeTest(tu.TestResultCollector):
+    def add_reformat_free_data_as_shared_memory(self, name, tensor, tensor_np):
+        byte_size = tensor_np.size * tensor_np.dtype.itemsize
+        self.shm_handles.append(shm.create_shared_memory_region(name, name, byte_size))
+        # Put data values into shared memory
+        shm.set_shared_memory_region(self.shm_handles[-1], [tensor_np])
+        # Register shared memory with Triton Server
+        self.triton_client.register_system_shared_memory(name, name, byte_size)
+        # Set the parameters to use data from shared memory
+        tensor.set_shared_memory(name, byte_size)
+
+    def setUp(self):
+        self.shm_handles = []
+        self.triton_client = tritonhttpclient.InferenceServerClient(
+            "localhost:8000", verbose=True
+        )
+
+    def tearDown(self):
+        self.triton_client.unregister_system_shared_memory()
+        for handle in self.shm_handles:
+            shm.destroy_shared_memory_region(handle)
+
+    def test_nobatch_chw2_input(self):
+        model_name = "plan_nobatch_CHW2_LINEAR_float16_float16_float16"
+        input_np = np.arange(26, dtype=np.float16).reshape((13, 2, 1))
+        expected_output0_np = input_np + input_np
+        expected_output1_np = input_np - input_np
+        reformatted_input_np = reformat("CHW2", input_np)
+
+        # Use shared memory to bypass the shape check in client library, because
+        # for non-linear format tensor, the data buffer is padded and thus the
+        # data byte size may not match what is calculated from tensor shape
+        inputs = []
+        inputs.append(tritonhttpclient.InferInput("INPUT0", [13, 2, 1], "FP16"))
+        self.add_reformat_free_data_as_shared_memory(
+            "input0", inputs[-1], reformatted_input_np
+        )
+        inputs.append(tritonhttpclient.InferInput("INPUT1", [13, 2, 1], "FP16"))
+        self.add_reformat_free_data_as_shared_memory(
+            "input1", inputs[-1], reformatted_input_np
+        )
+
+        outputs = []
+        outputs.append(
+            tritonhttpclient.InferRequestedOutput("OUTPUT0", binary_data=True)
+        )
+        outputs.append(
+            tritonhttpclient.InferRequestedOutput("OUTPUT1", binary_data=True)
+        )
+
+        results = self.triton_client.infer(
+            model_name=model_name, inputs=inputs, outputs=outputs
+        )
+        # Validate the results by comparing with precomputed values.
+        output0_np = results.as_numpy("OUTPUT0")
+        output1_np = results.as_numpy("OUTPUT1")
+        self.assertTrue(
+            np.array_equal(output0_np, expected_output0_np),
+            "OUTPUT0 expected: {}, got {}".format(expected_output0_np, output0_np),
+        )
+        self.assertTrue(
+            np.array_equal(output1_np, expected_output1_np),
+            "OUTPUT0 expected: {}, got {}".format(expected_output1_np, output1_np),
+        )
+
+    def test_chw2_input(self):
+        model_name = "plan_CHW2_LINEAR_float16_float16_float16"
+        for bs in [1, 8]:
+            input_np = np.arange(26 * bs, dtype=np.float16).reshape((bs, 13, 2, 1))
+            expected_output0_np = input_np + input_np
+            expected_output1_np = input_np - input_np
+            reformatted_input_np = reformat("CHW2", input_np)
+
+            # Use shared memory to bypass the shape check in client library,
+            # because for non-linear format tensor, the data buffer is padded
+            # and thus the data byte size may not match what is calculated from
+            # tensor shape
+            inputs = []
+            inputs.append(tritonhttpclient.InferInput("INPUT0", [bs, 13, 2, 1], "FP16"))
+            self.add_reformat_free_data_as_shared_memory(
+                "input0" + str(bs), inputs[-1], reformatted_input_np
+            )
+            inputs.append(tritonhttpclient.InferInput("INPUT1", [bs, 13, 2, 1], "FP16"))
+            self.add_reformat_free_data_as_shared_memory(
+                "input1" + str(bs), inputs[-1], reformatted_input_np
+            )
+
+            outputs = []
+            outputs.append(
+                tritonhttpclient.InferRequestedOutput("OUTPUT0", binary_data=True)
+            )
+            outputs.append(
+                tritonhttpclient.InferRequestedOutput("OUTPUT1", binary_data=True)
+            )
+
+            results = self.triton_client.infer(
+                model_name=model_name, inputs=inputs, outputs=outputs
+            )
+            # Validate the results by comparing with precomputed values.
+            output0_np = results.as_numpy("OUTPUT0")
+            output1_np = results.as_numpy("OUTPUT1")
+            self.assertTrue(
+                np.array_equal(output0_np, expected_output0_np),
+                "OUTPUT0 expected: {}, got {}".format(expected_output0_np, output0_np),
+            )
+            self.assertTrue(
+                np.array_equal(output1_np, expected_output1_np),
+                "OUTPUT0 expected: {}, got {}".format(expected_output1_np, output1_np),
+            )
+
+    def test_nobatch_chw32_input(self):
+        model_name = "plan_nobatch_CHW32_LINEAR_float32_float32_float32"
+        input_np = np.arange(26, dtype=np.float32).reshape((13, 2, 1))
+        expected_output0_np = input_np + input_np
+        expected_output1_np = input_np - input_np
+        reformatted_input_np = reformat("CHW32", input_np)
+
+        # Use shared memory to bypass the shape check in client library, because
+        # for non-linear format tensor, the data buffer is padded and thus the
+        # data byte size may not match what is calculated from tensor shape
+        inputs = []
+        inputs.append(tritonhttpclient.InferInput("INPUT0", [13, 2, 1], "FP32"))
+        self.add_reformat_free_data_as_shared_memory(
+            "input0", inputs[-1], reformatted_input_np
+        )
+        inputs.append(tritonhttpclient.InferInput("INPUT1", [13, 2, 1], "FP32"))
+        self.add_reformat_free_data_as_shared_memory(
+            "input1", inputs[-1], reformatted_input_np
+        )
+
+        outputs = []
+        outputs.append(
+            tritonhttpclient.InferRequestedOutput("OUTPUT0", binary_data=True)
+        )
+        outputs.append(
+            tritonhttpclient.InferRequestedOutput("OUTPUT1", binary_data=True)
+        )
+
+        results = self.triton_client.infer(
+            model_name=model_name, inputs=inputs, outputs=outputs
+        )
+        # Validate the results by comparing with precomputed values.
+        output0_np = results.as_numpy("OUTPUT0")
+        output1_np = results.as_numpy("OUTPUT1")
+        self.assertTrue(
+            np.array_equal(output0_np, expected_output0_np),
+            "OUTPUT0 expected: {}, got {}".format(expected_output0_np, output0_np),
+        )
+        self.assertTrue(
+            np.array_equal(output1_np, expected_output1_np),
+            "OUTPUT0 expected: {}, got {}".format(expected_output1_np, output1_np),
+        )
+
+    def test_chw32_input(self):
+        model_name = "plan_CHW32_LINEAR_float32_float32_float32"
+        for bs in [1, 8]:
+            input_np = np.arange(26 * bs, dtype=np.float32).reshape((bs, 13, 2, 1))
+            expected_output0_np = input_np + input_np
+            expected_output1_np = input_np - input_np
+            reformatted_input_np = reformat("CHW32", input_np)
+
+            # Use shared memory to bypass the shape check in client library,
+            # because for non-linear format tensor, the data buffer is padded
+            # and thus the data byte size may not match what is calculated from
+            # tensor shape
+            inputs = []
+            inputs.append(tritonhttpclient.InferInput("INPUT0", [bs, 13, 2, 1], "FP32"))
+            self.add_reformat_free_data_as_shared_memory(
+                "input0" + str(bs), inputs[-1], reformatted_input_np
+            )
+            inputs.append(tritonhttpclient.InferInput("INPUT1", [bs, 13, 2, 1], "FP32"))
+            self.add_reformat_free_data_as_shared_memory(
+                "input1" + str(bs), inputs[-1], reformatted_input_np
+            )
+
+            outputs = []
+            outputs.append(
+                tritonhttpclient.InferRequestedOutput("OUTPUT0", binary_data=True)
+            )
+            outputs.append(
+                tritonhttpclient.InferRequestedOutput("OUTPUT1", binary_data=True)
+            )
+
+            results = self.triton_client.infer(
+                model_name=model_name, inputs=inputs, outputs=outputs
+            )
+            # Validate the results by comparing with precomputed values.
+            output0_np = results.as_numpy("OUTPUT0")
+            output1_np = results.as_numpy("OUTPUT1")
+            self.assertTrue(
+                np.array_equal(output0_np, expected_output0_np),
+                "OUTPUT0 expected: {}, got {}".format(expected_output0_np, output0_np),
+            )
+            self.assertTrue(
+                np.array_equal(output1_np, expected_output1_np),
+                "OUTPUT0 expected: {}, got {}".format(expected_output1_np, output1_np),
+            )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_trt_shape_tensors/test.sh b/qa/L0_trt_shape_tensors/test.sh
new file mode 100755
index 0000000000..eed67d9dcb
--- /dev/null
+++ b/qa/L0_trt_shape_tensors/test.sh
@@ -0,0 +1,252 @@
+#!/bin/bash
+# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+TEST_RESULT_FILE='test_results.txt'
+CLIENT_LOG="./client.log"
+SHAPE_TENSOR_TEST=trt_shape_tensor_test.py
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=`pwd`/models"
+SERVER_LOG="./inference_server.log"
+source ../common/util.sh
+
+rm -fr  *.log
+rm -fr models && mkdir models
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_shapetensor_model_repository/* models/.
+
+RET=0
+
+# Must run on a single device or else the TRITONSERVER_DELAY_SCHEDULER
+# can fail when the requests are distributed to multiple devices.
+export CUDA_VISIBLE_DEVICES=0
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+# python unittest seems to swallow ImportError and still return 0
+# exit code. So need to explicitly check CLIENT_LOG to make sure
+# we see some running tests
+
+# Sanity tests
+python $SHAPE_TENSOR_TEST InferShapeTensorTest.test_static_batch >$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+
+python $SHAPE_TENSOR_TEST InferShapeTensorTest.test_nobatch >$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+
+python $SHAPE_TENSOR_TEST InferShapeTensorTest.test_wrong_shape_values >$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE 1
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n*** Sanity Test Passed*** \n"
+else
+  exit $RET
+fi
+
+# Prepare the config file for dynamic batching tests
+CONFIG_FILE="models/plan_zero_1_float32/config.pbtxt"
+sed -i "s/^max_batch_size:.*/max_batch_size: 8/" $CONFIG_FILE && \
+sed -i "s/^version_policy:.*/version_policy: { specific { versions: [1] }}/" $CONFIG_FILE && \
+                echo "dynamic_batching { preferred_batch_size: [ 2, 6 ], max_queue_delay_microseconds: 10000000 }" >> $CONFIG_FILE
+for i in \
+            test_dynamic_different_shape_values \
+            test_dynamic_identical_shape_values; do
+        SERVER_LOG="./$i.server.log"
+        run_server
+        if [ "$SERVER_PID" == "0" ]; then
+            echo -e "\n***\n*** Failed to start $SERVER\n***"
+            cat $SERVER_LOG
+            exit 1
+        fi
+
+        echo "Test: $i, $model_type" >>$CLIENT_LOG
+
+        set +e
+        python $SHAPE_TENSOR_TEST InferShapeTensorTest.$i >>$CLIENT_LOG 2>&1
+        if [ $? -ne 0 ]; then
+            echo -e "\n***\n*** Test $i Failed\n***" >>$CLIENT_LOG
+            echo -e "\n***\n*** Test Failed $i\n***"
+            RET=1
+        else
+            check_test_results $TEST_RESULT_FILE 1
+            if [ $? -ne 0 ]; then
+                cat $CLIENT_LOG
+                echo -e "\n***\n*** Test Result Verification Failed\n***"
+                RET=1
+            fi
+        fi
+        set -e
+
+        kill $SERVER_PID
+        wait $SERVER_PID
+    done
+
+for i in \
+            test_sequence_different_shape_values \
+            test_sequence_identical_shape_values ; do
+        export TRITONSERVER_BACKLOG_DELAY_SCHEDULER=0
+        export TRITONSERVER_DELAY_SCHEDULER=12
+        SERVER_LOG="./$i.server.log"
+        run_server
+        if [ "$SERVER_PID" == "0" ]; then
+            echo -e "\n***\n*** Failed to start $SERVER\n***"
+            cat $SERVER_LOG
+            exit 1
+        fi
+
+        echo "Test: $i, $model_type" >>$CLIENT_LOG
+
+        set +e
+        python $SHAPE_TENSOR_TEST SequenceBatcherShapeTensorTest.$i >>$CLIENT_LOG 2>&1
+        if [ $? -ne 0 ]; then
+            echo -e "\n***\n*** Test $i Failed\n***" >>$CLIENT_LOG
+            echo -e "\n***\n*** Test Failed $i\n***"
+            RET=1
+        else
+            check_test_results $TEST_RESULT_FILE 1
+            if [ $? -ne 0 ]; then
+                cat $CLIENT_LOG
+                echo -e "\n***\n*** Test Result Verification Failed\n***"
+                RET=1
+            fi
+        fi
+        set -e
+
+        unset TRITONSERVER_DELAY_SCHEDULER
+        unset TRITONSERVER_BACKLOG_DELAY_SCHEDULER
+        kill $SERVER_PID
+        wait $SERVER_PID
+    done
+
+# Prepare the config file for dynamic sequence batching tests
+CONFIG_FILE="models/plan_dyna_sequence_float32/config.pbtxt"
+sed -i "s/max_candidate_sequences:.*/max_candidate_sequences:4/" $CONFIG_FILE && \
+sed -i "s/max_queue_delay_microseconds:.*/max_queue_delay_microseconds:5000000/" $CONFIG_FILE
+
+export NO_BATCHING=0
+
+for i in \
+    test_dynaseq_identical_shape_values_series \
+    test_dynaseq_identical_shape_values_parallel \
+    test_dynaseq_different_shape_values_series \
+    test_dynaseq_different_shape_values_parallel \
+    ;do
+    SERVER_ARGS="--model-repository=`pwd`/models"
+    SERVER_LOG="./$i.server.log"
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    echo "Test: $i" >>$CLIENT_LOG
+
+    set +e
+    python $SHAPE_TENSOR_TEST DynaSequenceBatcherTest.$i >>$CLIENT_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Test $i Failed\n***" >>$CLIENT_LOG
+        echo -e "\n***\n*** Test $i Failed\n***"
+        RET=1
+    else
+        check_test_results $TEST_RESULT_FILE 1
+        if [ $? -ne 0 ]; then
+            cat $CLIENT_LOG
+            echo -e "\n***\n*** Test Result Verification Failed\n***"
+            RET=1
+        fi
+    fi
+    set -e
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+done
+
+if [ $RET -eq 0 ]; then
+  echo -e "\n***\n*** Test Passed\n***"
+fi
+
+exit $RET
diff --git a/qa/L0_trt_shape_tensors/trt_shape_tensor_test.py b/qa/L0_trt_shape_tensors/trt_shape_tensor_test.py
new file mode 100755
index 0000000000..a83795f981
--- /dev/null
+++ b/qa/L0_trt_shape_tensors/trt_shape_tensor_test.py
@@ -0,0 +1,989 @@
+#!/usr/bin/env python3
+
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import os
+import threading
+import time
+import unittest
+from builtins import range
+
+import infer_util as iu
+import numpy as np
+import sequence_util as su
+import test_util as tu
+import tritongrpcclient as grpcclient
+
+TEST_SYSTEM_SHARED_MEMORY = bool(int(os.environ.get("TEST_SYSTEM_SHARED_MEMORY", 0)))
+
+_model_instances = 1
+_max_queue_delay_ms = 10000
+_max_sequence_idle_ms = 5000
+
+_deferred_exceptions_lock = threading.Lock()
+_deferred_exceptions = []
+
+
+class InferShapeTensorTest(tu.TestResultCollector):
+    def setUp(self):
+        # The helper client for setup will be GRPC for simplicity.
+        self.triton_client_ = grpcclient.InferenceServerClient("localhost:8001")
+        global _deferred_exceptions
+        _deferred_exceptions = []
+
+    def tearDown(self):
+        self.triton_client_.unregister_system_shared_memory()
+        self.triton_client_.unregister_cuda_shared_memory()
+        super().tearDown()
+
+    def add_deferred_exception(self, ex):
+        global _deferred_exceptions
+        with _deferred_exceptions_lock:
+            _deferred_exceptions.append(ex)
+
+    def check_deferred_exception(self):
+        # Just raise one of the exceptions...
+        with _deferred_exceptions_lock:
+            if len(_deferred_exceptions) > 0:
+                raise _deferred_exceptions[0]
+
+    def check_response(
+        self,
+        bs,
+        thresholds,
+        shape_values,
+        dummy_input_shapes,
+        shm_region_names=None,
+        precreated_shm_regions=None,
+        shm_suffix="",
+    ):
+        try:
+            # Add batch size to shape as full shape is expected
+            for i in range(len(dummy_input_shapes)):
+                dummy_input_shapes[i] = [
+                    bs,
+                ] + dummy_input_shapes[i]
+            start_ms = int(round(time.time() * 1000))
+
+            iu.infer_shape_tensor(
+                self,
+                "plan",
+                np.float32,
+                shape_values,
+                dummy_input_shapes,
+                use_grpc=False,
+                use_streaming=False,
+                shm_suffix=shm_suffix,
+                use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                batch_size=bs,
+            )
+
+            end_ms = int(round(time.time() * 1000))
+
+            lt_ms = thresholds[0]
+            gt_ms = thresholds[1]
+            if lt_ms is not None:
+                self.assertTrue(
+                    (end_ms - start_ms) < lt_ms,
+                    "expected less than "
+                    + str(lt_ms)
+                    + "ms response time, got "
+                    + str(end_ms - start_ms)
+                    + " ms",
+                )
+            if gt_ms is not None:
+                self.assertTrue(
+                    (end_ms - start_ms) > gt_ms,
+                    "expected greater than "
+                    + str(gt_ms)
+                    + "ms response time, got "
+                    + str(end_ms - start_ms)
+                    + " ms",
+                )
+        except Exception as ex:
+            self.add_deferred_exception(ex)
+
+    def check_setup(self, model_name):
+        # Make sure test.sh set up the correct batcher settings
+        config = self.triton_client_.get_model_config(model_name).config
+        bconfig = config.dynamic_batching
+        self.assertTrue(2 in bconfig.preferred_batch_size)
+        self.assertTrue(6 in bconfig.preferred_batch_size)
+        self.assertEqual(
+            bconfig.max_queue_delay_microseconds, _max_queue_delay_ms * 1000
+        )  # 10 secs
+
+    def check_status(self, model_name, batch_exec, exec_cnt, infer_cnt):
+        # There is a time window between when responses are returned and statistics are updated.
+        # To prevent intermittent test failure during that window, wait up to 10 seconds for the
+        # inference statistics to be ready.
+        num_tries = 10
+        for i in range(num_tries):
+            stats = self.triton_client_.get_inference_statistics(model_name, "1")
+            self.assertEqual(len(stats.model_stats), 1, "expect 1 model stats")
+            actual_exec_cnt = stats.model_stats[0].execution_count
+            if actual_exec_cnt == exec_cnt:
+                break
+            print(
+                "WARNING: expect {} executions, got {} (attempt {})".format(
+                    exec_cnt, actual_exec_cnt, i
+                )
+            )
+            time.sleep(1)
+
+        self.assertEqual(
+            stats.model_stats[0].name,
+            model_name,
+            "expect model stats for model {}".format(model_name),
+        )
+        self.assertEqual(
+            stats.model_stats[0].version,
+            "1",
+            "expect model stats for model {} version 1".format(model_name),
+        )
+
+        if batch_exec is not None:
+            batch_stats = stats.model_stats[0].batch_stats
+            print(batch_stats)
+            self.assertEqual(
+                len(batch_stats),
+                len(batch_exec),
+                "expected {} different batch-sizes, got {}".format(
+                    len(batch_exec), len(batch_stats)
+                ),
+            )
+
+            for batch_stat in batch_stats:
+                bs = batch_stat.batch_size
+                bc = batch_stat.compute_infer.count
+                self.assertTrue(
+                    bs in batch_exec, "did not find expected batch-size {}".format(bs)
+                )
+                # Get count from one of the stats
+                self.assertEqual(
+                    bc,
+                    batch_exec[bs],
+                    "expected model-execution-count {} for batch size {}, got {}".format(
+                        batch_exec[bs], bs, bc
+                    ),
+                )
+
+        actual_exec_cnt = stats.model_stats[0].execution_count
+        self.assertEqual(
+            actual_exec_cnt,
+            exec_cnt,
+            "expected model-exec-count {}, got {}".format(exec_cnt, actual_exec_cnt),
+        )
+
+        actual_infer_cnt = stats.model_stats[0].inference_count
+        self.assertEqual(
+            actual_infer_cnt,
+            infer_cnt,
+            "expected model-inference-count {}, got {}".format(
+                infer_cnt, actual_infer_cnt
+            ),
+        )
+
+        actual_infer_cnt = stats.model_stats[0].inference_count
+        self.assertEqual(
+            actual_infer_cnt,
+            infer_cnt,
+            "expected model-inference-count {}, got {}".format(
+                infer_cnt, actual_infer_cnt
+            ),
+        )
+
+    def test_static_batch(self):
+        iu.infer_shape_tensor(
+            self,
+            "plan",
+            np.float32,
+            [[32, 32]],
+            [[8, 4, 4]],
+            use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+            batch_size=8,
+        )
+        iu.infer_shape_tensor(
+            self,
+            "plan",
+            np.float32,
+            [[4, 4]],
+            [[8, 32, 32]],
+            use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+            batch_size=8,
+        )
+        iu.infer_shape_tensor(
+            self,
+            "plan",
+            np.float32,
+            [[4, 4]],
+            [[8, 4, 4]],
+            use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+            batch_size=8,
+        )
+
+    def test_nobatch(self):
+        iu.infer_shape_tensor(
+            self,
+            "plan_nobatch",
+            np.float32,
+            [[32, 32]],
+            [[4, 4]],
+            use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+        )
+        iu.infer_shape_tensor(
+            self,
+            "plan_nobatch",
+            np.float32,
+            [[4, 4]],
+            [[32, 32]],
+            use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+        )
+        iu.infer_shape_tensor(
+            self,
+            "plan_nobatch",
+            np.float32,
+            [[4, 4]],
+            [[4, 4]],
+            use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+        )
+
+    def test_wrong_shape_values(self):
+        over_shape_values = [[32, 33]]
+        try:
+            iu.infer_shape_tensor(
+                self,
+                "plan",
+                np.float32,
+                over_shape_values,
+                [[8, 4, 4]],
+                use_system_shared_memory=TEST_SYSTEM_SHARED_MEMORY,
+                batch_size=8,
+            )
+        # InferenceServerException will be raised from different namespace,
+        # use dynamic type characteristic to catch both ex
+        except Exception as ex:
+            self.assertTrue(
+                "The shape value at index 2 is expected to be in range from 1 to 32, Got: 33"
+                in ex.message()
+            )
+
+    # Dynamic Batcher tests
+    def test_dynamic_different_shape_values(self):
+        # Send two requests with sum of static batch sizes ==
+        # preferred size, but with different shape values. This
+        # should cause the requests to not be batched. The first
+        # response will come back immediately and the second
+        # delayed by the max batch queue delay
+        try:
+            model_name = tu.get_zero_model_name("plan", 1, np.float32)
+            self.check_setup(model_name)
+            self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
+
+            threads = []
+            threads.append(
+                threading.Thread(
+                    target=self.check_response,
+                    args=(3, (6000, None)),
+                    kwargs={
+                        "shape_values": [[2, 2]],
+                        "dummy_input_shapes": [[16, 16]],
+                        "shm_suffix": "{}".format(len(threads)),
+                    },
+                )
+            )
+            threads.append(
+                threading.Thread(
+                    target=self.check_response,
+                    args=(3, (_max_queue_delay_ms * 1.5, _max_queue_delay_ms)),
+                    kwargs={
+                        "shape_values": [[4, 4]],
+                        "dummy_input_shapes": [[16, 16]],
+                        "shm_suffix": "{}".format(len(threads)),
+                    },
+                )
+            )
+            threads[0].start()
+            time.sleep(1)
+            threads[1].start()
+            for t in threads:
+                t.join()
+            self.check_deferred_exception()
+            self.check_status(model_name, {3: 2}, 2, 6)
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+    def test_dynamic_identical_shape_values(self):
+        # Send two requests with sum of static batch sizes ==
+        # preferred size, but with identical shape values. This
+        # should cause the requests to get batched. Both
+        # responses should come back immediately.
+        try:
+            model_name = tu.get_zero_model_name("plan", 1, np.float32)
+            self.check_setup(model_name)
+            self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
+
+            threads = []
+            threads.append(
+                threading.Thread(
+                    target=self.check_response,
+                    args=(4, (6000, None)),
+                    kwargs={
+                        "shape_values": [[4, 4]],
+                        "dummy_input_shapes": [[16, 16]],
+                        "shm_suffix": "{}".format(len(threads)),
+                    },
+                )
+            )
+            threads.append(
+                threading.Thread(
+                    target=self.check_response,
+                    args=(2, (6000, None)),
+                    kwargs={
+                        "shape_values": [[4, 4]],
+                        "dummy_input_shapes": [[16, 16]],
+                        "shm_suffix": "{}".format(len(threads)),
+                    },
+                )
+            )
+            threads[0].start()
+            time.sleep(1)
+            threads[1].start()
+            for t in threads:
+                t.join()
+            self.check_deferred_exception()
+            self.check_status(model_name, {6: 1}, 1, 6)
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+
+
+class SequenceBatcherShapeTensorTest(su.SequenceBatcherTestUtil):
+    def get_expected_result(self, expected_result, value, flag_str=None):
+        # Adjust the expected_result for models
+        expected_result = value
+        if (flag_str is not None) and ("start" in flag_str):
+            expected_result += 1
+        return expected_result
+
+    def test_sequence_identical_shape_values(self):
+        # Test model instances together are configured with
+        # total-batch-size 4. Send four equal-length sequences
+        # with identical shape values in parallel and make sure
+        # they get completely batched into batch-size 4
+        # inferences.
+        self.clear_deferred_exceptions()
+        dtype = np.float32
+        try:
+            model_name = tu.get_sequence_model_name("plan", dtype)
+            self.check_setup(model_name)
+
+            # Need scheduler to wait for queue to contain all
+            # inferences for both sequences.
+            self.assertTrue("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
+            self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 12)
+            self.assertTrue("TRITONSERVER_BACKLOG_DELAY_SCHEDULER" in os.environ)
+            self.assertEqual(int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0)
+            precreated_shm0_handles = self.precreate_register_shape_tensor_regions(
+                ((2, 1), (4, 2), (8, 3)), dtype, 0
+            )
+            precreated_shm1_handles = self.precreate_register_shape_tensor_regions(
+                ((2, 11), (4, 12), (8, 13)), dtype, 1
+            )
+            precreated_shm2_handles = self.precreate_register_shape_tensor_regions(
+                ((2, 111), (4, 112), (8, 113)), dtype, 2
+            )
+            precreated_shm3_handles = self.precreate_register_shape_tensor_regions(
+                ((2, 1111), (4, 1112), (8, 1113)), dtype, 3
+            )
+            threads = []
+            threads.append(
+                threading.Thread(
+                    target=self.check_sequence_shape_tensor_io,
+                    args=(
+                        model_name,
+                        dtype,
+                        1001,
+                        (None, None),
+                        # (flag_str, shape_value, value, pre_delay_ms)
+                        (
+                            ("start", 2, 1, None),
+                            (None, 4, 2, None),
+                            ("end", 8, 3, None),
+                        ),
+                        self.get_expected_result(6, 3, "end"),
+                        precreated_shm0_handles,
+                    ),
+                    kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                )
+            )
+            threads.append(
+                threading.Thread(
+                    target=self.check_sequence_shape_tensor_io,
+                    args=(
+                        model_name,
+                        dtype,
+                        1002,
+                        (None, None),
+                        # (flag_str, shape_value, value, pre_delay_ms)
+                        (
+                            ("start", 2, 11, None),
+                            (None, 4, 12, None),
+                            ("end", 8, 13, None),
+                        ),
+                        self.get_expected_result(36, 13, "end"),
+                        precreated_shm1_handles,
+                    ),
+                    kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                )
+            )
+            threads.append(
+                threading.Thread(
+                    target=self.check_sequence_shape_tensor_io,
+                    args=(
+                        model_name,
+                        dtype,
+                        1003,
+                        (None, None),
+                        # (flag_str, shape_value, value, pre_delay_ms)
+                        (
+                            ("start", 2, 111, None),
+                            (None, 4, 112, None),
+                            ("end", 8, 113, None),
+                        ),
+                        self.get_expected_result(336, 113, "end"),
+                        precreated_shm2_handles,
+                    ),
+                    kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                )
+            )
+            threads.append(
+                threading.Thread(
+                    target=self.check_sequence_shape_tensor_io,
+                    args=(
+                        model_name,
+                        dtype,
+                        1004,
+                        (None, None),
+                        # (flag_str, shape_value, value, pre_delay_ms)
+                        (
+                            ("start", 2, 1111, None),
+                            (None, 4, 1112, None),
+                            ("end", 8, 1113, None),
+                        ),
+                        self.get_expected_result(3336, 1113, "end"),
+                        precreated_shm3_handles,
+                    ),
+                    kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                )
+            )
+
+            for t in threads:
+                t.start()
+            for t in threads:
+                t.join()
+            self.check_deferred_exception()
+            self.check_status(model_name, {4: 3}, 3, 12)
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+        finally:
+            if TEST_SYSTEM_SHARED_MEMORY:
+                self.cleanup_shm_regions(precreated_shm0_handles)
+                self.cleanup_shm_regions(precreated_shm1_handles)
+                self.cleanup_shm_regions(precreated_shm2_handles)
+                self.cleanup_shm_regions(precreated_shm3_handles)
+
+    def test_sequence_different_shape_values(self):
+        # Test model instances together are configured with
+        # total-batch-size 4. Send four equal-length sequences with
+        # different shape values in 2 sequences and 2 sequences that
+        # share the same shape value. Make sure that the 2 sequences
+        # with same shapes batch together but other two sequences do
+        # not.
+        self.clear_deferred_exceptions()
+        dtype = np.float32
+
+        precreated_shm0_handles = self.precreate_register_shape_tensor_regions(
+            ((1, 1), (1, 2), (1, 3)), dtype, 0
+        )
+        precreated_shm1_handles = self.precreate_register_shape_tensor_regions(
+            ((32, 11), (32, 12), (32, 13)), dtype, 1
+        )
+        precreated_shm2_handles = self.precreate_register_shape_tensor_regions(
+            ((16, 111), (16, 112), (16, 113)), dtype, 2
+        )
+        precreated_shm3_handles = self.precreate_register_shape_tensor_regions(
+            ((1, 1111), (1, 1112), (1, 1113)), dtype, 3
+        )
+        try:
+            model_name = tu.get_sequence_model_name("plan", dtype)
+            self.check_setup(model_name)
+
+            # Need scheduler to wait for queue to contain all
+            # inferences for both sequences.
+            self.assertTrue("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
+            self.assertEqual(int(os.environ["TRITONSERVER_DELAY_SCHEDULER"]), 12)
+            self.assertTrue("TRITONSERVER_BACKLOG_DELAY_SCHEDULER" in os.environ)
+            self.assertEqual(int(os.environ["TRITONSERVER_BACKLOG_DELAY_SCHEDULER"]), 0)
+
+            threads = []
+            threads.append(
+                threading.Thread(
+                    target=self.check_sequence_shape_tensor_io,
+                    args=(
+                        model_name,
+                        dtype,
+                        1001,
+                        (None, None),
+                        # (flag_str, shape_value, value, pre_delay_ms)
+                        (
+                            ("start", 1, 1, None),
+                            (None, 1, 2, None),
+                            ("end", 1, 3, None),
+                        ),
+                        self.get_expected_result(6, 3, "end"),
+                        precreated_shm0_handles,
+                    ),
+                    kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                )
+            )
+            threads.append(
+                threading.Thread(
+                    target=self.check_sequence_shape_tensor_io,
+                    args=(
+                        model_name,
+                        dtype,
+                        1002,
+                        (None, None),
+                        # (flag_str, shape_value, value, pre_delay_ms)
+                        (
+                            ("start", 32, 11, None),
+                            (None, 32, 12, None),
+                            ("end", 32, 13, None),
+                        ),
+                        self.get_expected_result(36, 13, "end"),
+                        precreated_shm1_handles,
+                    ),
+                    kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                )
+            )
+            threads.append(
+                threading.Thread(
+                    target=self.check_sequence_shape_tensor_io,
+                    args=(
+                        model_name,
+                        dtype,
+                        1003,
+                        (None, None),
+                        # (flag_str, shape_value, value, pre_delay_ms)
+                        (
+                            ("start", 16, 111, None),
+                            (None, 16, 112, None),
+                            ("end", 16, 113, None),
+                        ),
+                        self.get_expected_result(336, 113, "end"),
+                        precreated_shm2_handles,
+                    ),
+                    kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                )
+            )
+            threads.append(
+                threading.Thread(
+                    target=self.check_sequence_shape_tensor_io,
+                    args=(
+                        model_name,
+                        dtype,
+                        1004,
+                        (None, None),
+                        # (flag_str, shape_value, value, pre_delay_ms)
+                        (
+                            ("start", 1, 1111, None),
+                            (None, 1, 1112, None),
+                            ("end", 1, 1113, None),
+                        ),
+                        self.get_expected_result(3336, 1113, "end"),
+                        precreated_shm3_handles,
+                    ),
+                    kwargs={"sequence_name": "{}".format(self._testMethodName)},
+                )
+            )
+
+            for t in threads:
+                t.start()
+                time.sleep(1)
+            for t in threads:
+                t.join()
+
+            self.check_deferred_exception()
+            self.check_status(model_name, {4: 3, 3: 6}, 9, 12)
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+        finally:
+            if TEST_SYSTEM_SHARED_MEMORY:
+                self.cleanup_shm_regions(precreated_shm0_handles)
+                self.cleanup_shm_regions(precreated_shm1_handles)
+                self.cleanup_shm_regions(precreated_shm2_handles)
+                self.cleanup_shm_regions(precreated_shm3_handles)
+
+
+class DynaSequenceBatcherTest(su.SequenceBatcherTestUtil):
+    def get_expected_result(self, expected_result, corrid, value, flag_str=None):
+        expected_result = value
+        if flag_str is not None:
+            if "start" in flag_str:
+                expected_result += 1
+            if "end" in flag_str:
+                expected_result += corrid
+        return expected_result
+
+    def _multi_sequence_different_shape_impl(self, sleep_secs):
+        self.clear_deferred_exceptions()
+        dtype = np.float32
+
+        precreated_shm0_handles = self.precreate_register_dynaseq_shape_tensor_regions(
+            ((1, 1), (12, 2), (2, 3)), dtype, 0
+        )
+        precreated_shm1_handles = self.precreate_register_dynaseq_shape_tensor_regions(
+            ((3, 11), (4, 12), (5, 13)), dtype, 1
+        )
+        precreated_shm2_handles = self.precreate_register_dynaseq_shape_tensor_regions(
+            ((6, 111), (7, 112), (8, 113)), dtype, 2
+        )
+        precreated_shm3_handles = self.precreate_register_dynaseq_shape_tensor_regions(
+            ((9, 1111), (10, 1112), (11, 1113)), dtype, 3
+        )
+
+        try:
+            model_name = tu.get_dyna_sequence_model_name("plan", dtype)
+            self.check_setup(model_name)
+            self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
+            self.assertFalse("TRITONSERVER_BACKLOG_DELAY_SCHEDULER" in os.environ)
+
+            corrids = [1001, 1002, 1003, 1004]
+            threads = []
+            threads.append(
+                threading.Thread(
+                    target=self.check_sequence_shape_tensor_io,
+                    args=(
+                        model_name,
+                        dtype,
+                        corrids[0],
+                        (None, None),
+                        # (flag_str, shape_value, value, pre_delay_ms)
+                        (
+                            ("start", 1, 1, None),
+                            (None, 12, 2, None),
+                            ("end", 2, 3, None),
+                        ),
+                        self.get_expected_result(4 + corrids[0], corrids[0], 3, "end"),
+                        precreated_shm0_handles,
+                    ),
+                    kwargs={
+                        "sequence_name": "{}_{}".format(
+                            self._testMethodName, corrids[0]
+                        ),
+                        "using_dynamic_batcher": True,
+                    },
+                )
+            )
+            threads.append(
+                threading.Thread(
+                    target=self.check_sequence_shape_tensor_io,
+                    args=(
+                        model_name,
+                        dtype,
+                        corrids[1],
+                        (None, None),
+                        # (flag_str, shape_value, value, pre_delay_ms)
+                        (
+                            ("start", 3, 11, None),
+                            (None, 4, 12, None),
+                            ("end", 5, 13, None),
+                        ),
+                        self.get_expected_result(
+                            36 + corrids[1], corrids[1], 13, "end"
+                        ),
+                        precreated_shm1_handles,
+                    ),
+                    kwargs={
+                        "sequence_name": "{}_{}".format(
+                            self._testMethodName, corrids[1]
+                        ),
+                        "using_dynamic_batcher": True,
+                    },
+                )
+            )
+            threads.append(
+                threading.Thread(
+                    target=self.check_sequence_shape_tensor_io,
+                    args=(
+                        model_name,
+                        dtype,
+                        corrids[2],
+                        (None, None),
+                        # (flag_str, shape_value, value, pre_delay_ms)
+                        (
+                            ("start", 6, 111, None),
+                            (None, 7, 112, None),
+                            ("end", 8, 113, None),
+                        ),
+                        self.get_expected_result(
+                            336 + corrids[2], corrids[2], 113, "end"
+                        ),
+                        precreated_shm2_handles,
+                    ),
+                    kwargs={
+                        "sequence_name": "{}_{}".format(
+                            self._testMethodName, corrids[2]
+                        ),
+                        "using_dynamic_batcher": True,
+                    },
+                )
+            )
+            threads.append(
+                threading.Thread(
+                    target=self.check_sequence_shape_tensor_io,
+                    args=(
+                        model_name,
+                        dtype,
+                        corrids[3],
+                        (None, None),
+                        # (flag_str, shape_value, value, pre_delay_ms)
+                        (
+                            ("start", 9, 1111, None),
+                            (None, 10, 1112, None),
+                            ("end", 11, 1113, None),
+                        ),
+                        self.get_expected_result(
+                            3336 + corrids[3], corrids[3], 1113, "end"
+                        ),
+                        precreated_shm3_handles,
+                    ),
+                    kwargs={
+                        "sequence_name": "{}_{}".format(
+                            self._testMethodName, corrids[3]
+                        ),
+                        "using_dynamic_batcher": True,
+                    },
+                )
+            )
+
+            for t in threads:
+                t.start()
+                if sleep_secs > 0:
+                    time.sleep(sleep_secs)
+            for t in threads:
+                t.join()
+            self.check_deferred_exception()
+            self.check_status(model_name, {1: 12}, 12, 12)
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+        finally:
+            if TEST_SYSTEM_SHARED_MEMORY:
+                self.cleanup_shm_regions(precreated_shm0_handles)
+                self.cleanup_shm_regions(precreated_shm1_handles)
+                self.cleanup_shm_regions(precreated_shm2_handles)
+                self.cleanup_shm_regions(precreated_shm3_handles)
+
+    def _multi_sequence_identical_shape_impl(self, sleep_secs):
+        self.clear_deferred_exceptions()
+        dtype = np.float32
+
+        precreated_shm0_handles = self.precreate_register_dynaseq_shape_tensor_regions(
+            ((2, 1), (4, 2), (8, 3)), dtype, 0
+        )
+        precreated_shm1_handles = self.precreate_register_dynaseq_shape_tensor_regions(
+            ((2, 11), (4, 12), (8, 13)), dtype, 1
+        )
+        precreated_shm2_handles = self.precreate_register_dynaseq_shape_tensor_regions(
+            ((2, 111), (4, 112), (8, 113)), dtype, 2
+        )
+        precreated_shm3_handles = self.precreate_register_dynaseq_shape_tensor_regions(
+            ((2, 1111), (4, 1112), (8, 1113)), dtype, 3
+        )
+
+        try:
+            model_name = tu.get_dyna_sequence_model_name("plan", dtype)
+
+            self.check_setup(model_name)
+            self.assertFalse("TRITONSERVER_DELAY_SCHEDULER" in os.environ)
+            self.assertFalse("TRITONSERVER_BACKLOG_DELAY_SCHEDULER" in os.environ)
+
+            corrids = [1001, 1002, 1003, 1004]
+            threads = []
+            threads.append(
+                threading.Thread(
+                    target=self.check_sequence_shape_tensor_io,
+                    args=(
+                        model_name,
+                        dtype,
+                        corrids[0],
+                        (None, None),
+                        # (flag_str, shape_value, value, pre_delay_ms)
+                        (
+                            ("start", 2, 1, None),
+                            (None, 4, 2, None),
+                            ("end", 8, 3, None),
+                        ),
+                        self.get_expected_result(4 + corrids[0], corrids[0], 3, "end"),
+                        precreated_shm0_handles,
+                    ),
+                    kwargs={
+                        "sequence_name": "{}_{}".format(
+                            self._testMethodName, corrids[0]
+                        ),
+                        "using_dynamic_batcher": True,
+                    },
+                )
+            )
+            threads.append(
+                threading.Thread(
+                    target=self.check_sequence_shape_tensor_io,
+                    args=(
+                        model_name,
+                        dtype,
+                        corrids[1],
+                        (None, None),
+                        # (flag_str, shape_value, value, pre_delay_ms)
+                        (
+                            ("start", 2, 11, None),
+                            (None, 4, 12, None),
+                            ("end", 8, 13, None),
+                        ),
+                        self.get_expected_result(
+                            36 + corrids[1], corrids[1], 13, "end"
+                        ),
+                        precreated_shm1_handles,
+                    ),
+                    kwargs={
+                        "sequence_name": "{}_{}".format(
+                            self._testMethodName, corrids[1]
+                        ),
+                        "using_dynamic_batcher": True,
+                    },
+                )
+            )
+            threads.append(
+                threading.Thread(
+                    target=self.check_sequence_shape_tensor_io,
+                    args=(
+                        model_name,
+                        dtype,
+                        corrids[2],
+                        (None, None),
+                        # (flag_str, shape_value, value, pre_delay_ms)
+                        (
+                            ("start", 2, 111, None),
+                            (None, 4, 112, None),
+                            ("end", 8, 113, None),
+                        ),
+                        self.get_expected_result(
+                            336 + corrids[2], corrids[2], 113, "end"
+                        ),
+                        precreated_shm2_handles,
+                    ),
+                    kwargs={
+                        "sequence_name": "{}_{}".format(
+                            self._testMethodName, corrids[2]
+                        ),
+                        "using_dynamic_batcher": True,
+                    },
+                )
+            )
+            threads.append(
+                threading.Thread(
+                    target=self.check_sequence_shape_tensor_io,
+                    args=(
+                        model_name,
+                        dtype,
+                        corrids[3],
+                        (None, None),
+                        # (flag_str, shape_value, value, pre_delay_ms)
+                        (
+                            ("start", 2, 1111, None),
+                            (None, 4, 1112, None),
+                            ("end", 8, 1113, None),
+                        ),
+                        self.get_expected_result(
+                            3336 + corrids[3], corrids[3], 1113, "end"
+                        ),
+                        precreated_shm3_handles,
+                    ),
+                    kwargs={
+                        "sequence_name": "{}_{}".format(
+                            self._testMethodName, corrids[3]
+                        ),
+                        "using_dynamic_batcher": True,
+                    },
+                )
+            )
+
+            for t in threads:
+                t.start()
+                if sleep_secs > 0:
+                    time.sleep(sleep_secs)
+            for t in threads:
+                t.join()
+            self.check_deferred_exception()
+            self.check_status(model_name, {4: 3}, 3, 12)
+        except Exception as ex:
+            self.assertTrue(False, "unexpected error {}".format(ex))
+        finally:
+            if TEST_SYSTEM_SHARED_MEMORY:
+                self.cleanup_shm_regions(precreated_shm0_handles)
+                self.cleanup_shm_regions(precreated_shm1_handles)
+                self.cleanup_shm_regions(precreated_shm2_handles)
+                self.cleanup_shm_regions(precreated_shm3_handles)
+
+    def test_dynaseq_identical_shape_values_series(self):
+        # Send four sequences with identical shape values in series
+        # and make sure they get completely batched into batch-size
+        # 4 inferences.
+        self._multi_sequence_identical_shape_impl(1)
+
+    def test_dynaseq_identical_shape_values_parallel(self):
+        # Send four sequences with identical shape values in parallel
+        # and make sure they get completely batched into batch-size
+        # 4 inferences.
+        self._multi_sequence_identical_shape_impl(0)
+
+    def test_dynaseq_different_shape_values_series(self):
+        # Send four sequences with different shape values in series
+        # and make sure they don't get batched together.
+        self._multi_sequence_different_shape_impl(1)
+
+    def test_dynaseq_different_shape_values_parallel(self):
+        # Send four sequences with different shape values in parallel
+        # and make sure they don't get batched together.
+        self._multi_sequence_different_shape_impl(0)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_unit_test/test.sh b/qa/L0_unit_test/test.sh
deleted file mode 100755
index 229e8e8ced..0000000000
--- a/qa/L0_unit_test/test.sh
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-export UNIT_TESTS="//src/..."
-TEST_LOG="./unit_test.log"
-
-# Need to have all libraries on a standard path since that is what is
-# expected by bazel test.
-cp /opt/tensorrtserver/lib/* /usr/lib/.
-
-rm -f $TEST_LOG
-RET=0
-
-set +e
-
-# Return code 3 indicates a test failure so ignore that failure as we
-# use 'show_testlogs' to parse out more specific error messages.
-(cd /workspace && \
-        bazel test -c opt --config=cuda --verbose_failures --cache_test_results=no \
-              --build_tests_only -- $(bazel query "tests($UNIT_TESTS)")) > $TEST_LOG 2>&1
-BLDRET=$?
-if [ $BLDRET -ne 0 ]; then
-    RET=1
-    if [ $BLDRET -ne 3 ]; then
-      cat $TEST_LOG
-      echo -e "\n***\n*** Failed to build\n***"
-      exit 1
-    fi
-fi
-
-grep "test\.log$" $TEST_LOG | /workspace/qa/common/show_testlogs
-if [ $? -ne 0 ]; then
-    RET=1
-fi
-
-set -e
-
-if [ $RET -eq 0 ]; then
-    echo -e "\n***\n*** Test Passed\n***"
-else
-    echo -e "\n***\n*** Test FAILED\n***"
-fi
-
-exit $RET
diff --git a/qa/L0_vertex_ai/test.sh b/qa/L0_vertex_ai/test.sh
new file mode 100755
index 0000000000..7403bf14cf
--- /dev/null
+++ b/qa/L0_vertex_ai/test.sh
@@ -0,0 +1,722 @@
+#!/bin/bash
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+TEST_RESULT_FILE='test_results.txt'
+
+export CUDA_VISIBLE_DEVICES=0
+
+RET=0
+
+rm -rf multi_models single_model
+rm -f *.log
+rm -f *.out
+
+CLIENT_TEST_SCRIPT=vertex_ai_test.py
+UNIT_TEST_COUNT=8
+CLIENT_LOG="./client.log"
+
+DATADIR=/data/inferenceserver/${REPO_VERSION}
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_LOG="./server.log"
+source ../common/util.sh
+
+# Set up the multi model repository with the swap and non-swap versions
+mkdir multi_models && \
+    cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32 multi_models/addsub && \
+    rm -r multi_models/addsub/2 && rm -r multi_models/addsub/3 && \
+    sed -i "s/onnx_int32_int32_int32/addsub/" multi_models/addsub/config.pbtxt && \
+    cp -r $DATADIR/qa_model_repository/onnx_int32_int32_int32 multi_models/subadd && \
+    rm -r multi_models/subadd/1 && rm -r multi_models/subadd/2 && \
+    sed -i "s/onnx_int32_int32_int32/subadd/" multi_models/subadd/config.pbtxt
+mkdir single_model && \
+    cp -r multi_models/addsub single_model/.
+
+# Use Vertex AI's health endpoint to check server status
+# Wait until server health endpoint shows ready. Sets WAIT_RET to 0 on
+# success, 1 on failure
+function vertex_ai_wait_for_server_ready() {
+    local spid="$1"; shift
+    local wait_time_secs="${1:-30}"; shift
+
+    WAIT_RET=0
+
+    ping_address="localhost:8080${AIP_HEALTH_ROUTE}"
+    if [ -n "$AIP_HTTP_PORT" ]; then
+        ping_address="localhost:${AIP_HTTP_PORT}${AIP_HEALTH_ROUTE}"
+    fi
+
+    local wait_secs=$wait_time_secs
+    until test $wait_secs -eq 0 ; do
+        if ! kill -0 $spid; then
+            echo "=== Server not running."
+            WAIT_RET=1
+            return
+        fi
+
+        sleep 1;
+
+        set +e
+        code=`curl -s -w %{http_code} $ping_address`
+        set -e
+        if [ "$code" == "200" ]; then
+            return
+        fi
+
+        ((wait_secs--));
+    done
+
+    echo "=== Timeout $wait_time_secs secs. Server not ready."
+    WAIT_RET=1
+}
+
+# Helper function to unset all AIP variables before test
+function unset_vertex_variables() {
+    unset AIP_MODE
+    unset AIP_HTTP_PORT
+    unset AIP_HEALTH_ROUTE
+    unset AIP_PREDICT_ROUTE
+    unset AIP_STORAGE_URI
+}
+
+#
+# Test default allow-vertex-ai
+#
+unset_vertex_variables
+
+# Enable HTTP endpoint to check server readiness in the case of disabling Vertex AI
+BASE_SERVER_ARGS="--allow-http true --model-repository=single_model"
+export AIP_HEALTH_ROUTE="/health"
+export AIP_PREDICT_ROUTE="/predict"
+
+# Default false
+SERVER_ARGS=${BASE_SERVER_ARGS}
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+kill $SERVER_PID
+wait $SERVE_PID
+set +e
+# Expect no message regarding Vertex AI as it is disabled
+grep "failed to start Vertex AI service" $SERVER_LOG
+if [ $? -eq 0 ]; then
+    echo -e "\n***\n*** Failed. Expected Vertex AI service is disabled\n***"
+    RET=1
+fi
+grep "Started Vertex AI HTTPService at" $SERVER_LOG
+if [ $? -eq 0 ]; then
+    echo -e "\n***\n*** Failed. Expected Vertex AI service is disabled\n***"
+    RET=1
+fi
+set -e
+# Enable
+SERVER_ARGS="${BASE_SERVER_ARGS} --allow-vertex-ai=true"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+kill $SERVER_PID
+wait $SERVE_PID
+set +e
+grep "Started Vertex AI HTTPService at" $SERVER_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed. Expected Vertex AI service is enabled\n***"
+    RET=1
+fi
+set -e
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+# Default true
+# Note that when default true, HTTP / GRPC endpoints will be disabled,
+# check those endpoints by enabling one of them at a time and greping keywords
+export AIP_MODE=PREDICTION
+SERVER_ARGS="--model-repository=single_model --allow-grpc=true"
+# Using nowait as 'run_server' requires HTTP endpoint enabled
+run_server_nowait
+sleep 10
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+kill $SERVER_PID
+wait $SERVE_PID
+set +e
+grep "Started Vertex AI HTTPService at" $SERVER_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed. Expected Vertex AI service is enabled\n***"
+    RET=1
+fi
+grep "Started GRPCInferenceService at" $SERVER_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed. Expected GRPC service is enabled\n***"
+    RET=1
+fi
+# Expect no message regarding HTTP as it is disabled
+grep "failed to start HTTP service" $SERVER_LOG
+if [ $? -eq 0 ]; then
+    echo -e "\n***\n*** Failed. Expected HTTP service is disabled\n***"
+    RET=1
+fi
+grep "Started HTTPService at" $SERVER_LOG
+if [ $? -eq 0 ]; then
+    echo -e "\n***\n*** Failed. Expected HTTP service is disabled\n***"
+    RET=1
+fi
+set -e
+
+# Disable
+SERVER_ARGS="${BASE_SERVER_ARGS} --allow-vertex-ai=false --allow-http=true"
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+kill $SERVER_PID
+wait $SERVE_PID
+set +e
+# Expect no message regarding Vertex AI as it is disabled
+grep "failed to start Vertex AI service" $SERVER_LOG
+if [ $? -eq 0 ]; then
+    echo -e "\n***\n*** Failed. Expected Vertex AI service is disabled\n***"
+    RET=1
+fi
+grep "Started Vertex AI HTTPService at" $SERVER_LOG
+if [ $? -eq 0 ]; then
+    echo -e "\n***\n*** Failed. Expected Vertex AI service is disabled\n***"
+    RET=1
+fi
+grep "Started HTTPService at" $SERVER_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed. Expected HTTP service is enabled\n***"
+    RET=1
+fi
+# Expect no message regarding GRPC as it is disabled
+grep "failed to start GRPC service" $SERVER_LOG
+if [ $? -eq 0 ]; then
+    echo -e "\n***\n*** Failed. Expected GRPC service is disabled\n***"
+    RET=1
+fi
+grep "Started GRPCInferenceService at" $SERVER_LOG
+if [ $? -eq 0 ]; then
+    echo -e "\n***\n*** Failed. Expected GRPC service is disabled\n***"
+    RET=1
+fi
+set -e
+
+#
+# Test missing route
+#
+unset_vertex_variables
+export AIP_HEALTH_ROUTE="/health"
+
+SERVER_ARGS="--allow-vertex-ai=true --model-repository=single_model"
+run_server_nowait
+vertex_ai_wait_for_server_ready $SERVER_PID 10
+if [ "$WAIT_RET" == "0" ]; then
+    echo -e "\n***\n*** Expect failed to start $SERVER\n***"
+    kill $SERVER_PID || true
+    cat $SERVER_LOG
+    RET=1
+else
+  set +e
+  grep "API_PREDICT_ROUTE is not defined for Vertex AI endpoint" $SERVER_LOG
+  set -e
+  if [ $? -ne 0 ]; then
+      echo -e "\n***\n*** Failed. Expected error on using undefined route\n***"
+      RET=1
+  fi
+fi
+
+unset_vertex_variables
+export AIP_PREDICT_ROUTE="/predict"
+run_server_nowait
+vertex_ai_wait_for_server_ready $SERVER_PID 10
+if [ "$WAIT_RET" == "0" ]; then
+    echo -e "\n***\n*** Expect failed to start $SERVER\n***"
+    kill $SERVER_PID || true
+    cat $SERVER_LOG
+    RET=1
+else
+  set +e
+  grep "AIP_HEALTH_ROUTE is not defined for Vertex AI endpoint" $SERVER_LOG
+  set -e
+  if [ $? -ne 0 ]; then
+      echo -e "\n***\n*** Failed. Expected error on using undefined route\n***"
+      RET=1
+  fi
+fi
+
+#
+# Test endpoints
+#
+unset_vertex_variables
+export AIP_PREDICT_ROUTE="/predict"
+export AIP_HEALTH_ROUTE="/health"
+
+SERVER_ARGS="--allow-vertex-ai=true --model-repository=single_model"
+run_server_nowait
+# health
+vertex_ai_wait_for_server_ready $SERVER_PID 10
+if [ "$WAIT_RET" != "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    kill $SERVER_PID
+    wait $SERVER_PID
+    cat $SERVER_LOG
+    exit 1
+fi
+
+# predict (single model)
+set +e
+python $CLIENT_TEST_SCRIPT >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    cat $CLIENT_LOG
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $UNIT_TEST_COUNT
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVE_PID
+
+#
+# AIP_STORAGE_URI / AIP_HTTP_PORT
+#
+unset_vertex_variables
+export AIP_PREDICT_ROUTE="/predict"
+export AIP_HEALTH_ROUTE="/health"
+export AIP_STORAGE_URI=single_model
+export AIP_HTTP_PORT=5234
+
+SERVER_ARGS="--allow-vertex-ai=true"
+run_server_nowait
+vertex_ai_wait_for_server_ready $SERVER_PID 10
+if [ "$WAIT_RET" != "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    kill $SERVER_PID
+    wait $SERVER_PID
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python $CLIENT_TEST_SCRIPT >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    cat $CLIENT_LOG
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $UNIT_TEST_COUNT
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVE_PID
+
+#
+# default model
+#
+unset_vertex_variables
+export AIP_MODE=PREDICTION
+export AIP_PREDICT_ROUTE="/predict"
+export AIP_HEALTH_ROUTE="/health"
+
+export AIP_STORAGE_URI=single_model
+SERVER_ARGS="--vertex-ai-default-model=subadd"
+run_server_nowait
+vertex_ai_wait_for_server_ready $SERVER_PID 10
+if [ "$WAIT_RET" == "0" ]; then
+    echo -e "\n***\n*** Expect failed to start $SERVER\n***"
+    kill $SERVER_PID || true
+    cat $SERVER_LOG
+    RET=1
+else
+  set +e
+  grep "Expect the default model 'subadd' is loaded" $SERVER_LOG
+  set -e
+  if [ $? -ne 0 ]; then
+      echo -e "\n***\n*** Failed. Expected error on nonexistent default model\n***"
+      RET=1
+  fi
+fi
+
+export AIP_STORAGE_URI=multi_models
+SERVER_ARGS=""
+run_server_nowait
+vertex_ai_wait_for_server_ready $SERVER_PID 10
+if [ "$WAIT_RET" == "0" ]; then
+    echo -e "\n***\n*** Expect failed to start $SERVER\n***"
+    kill $SERVER_PID || true
+    cat $SERVER_LOG
+    RET=1
+else
+  set +e
+  grep "Expect the model repository contains only a single model if default model is not specified" $SERVER_LOG
+  set -e
+  if [ $? -ne 0 ]; then
+      echo -e "\n***\n*** Failed. Expected error on unspecified default model\n***"
+      RET=1
+  fi
+fi
+
+# Test AIP_STORAGE_URI won't be used if model repository is specified
+SERVER_ARGS="--model-repository=single_model"
+run_server_nowait
+vertex_ai_wait_for_server_ready $SERVER_PID 10
+if [ "$WAIT_RET" != "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    kill $SERVER_PID
+    wait $SERVER_PID
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+# subadd should not be loaded
+code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: v2/models/subadd/ready" localhost:8080/predict`
+if [ "$code" == "200" ]; then
+    cat ./curl.out
+    echo -e "\n***\n*** Expect 'subadd' is not loaded\n***"
+    RET=1
+fi
+python $CLIENT_TEST_SCRIPT >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    cat $CLIENT_LOG
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $UNIT_TEST_COUNT
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+kill $SERVER_PID
+wait $SERVE_PID
+
+# Test default model as well as multi model
+SERVER_ARGS="--vertex-ai-default-model=addsub"
+run_server_nowait
+vertex_ai_wait_for_server_ready $SERVER_PID 10
+if [ "$WAIT_RET" != "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    kill $SERVER_PID
+    wait $SERVER_PID
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+python $CLIENT_TEST_SCRIPT >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Test Failed\n***"
+    cat $CLIENT_LOG
+    RET=1
+else
+    check_test_results $TEST_RESULT_FILE $UNIT_TEST_COUNT
+    if [ $? -ne 0 ]; then
+        cat $CLIENT_LOG
+        echo -e "\n***\n*** Test Result Verification Failed\n***"
+        RET=1
+    fi
+fi
+set -e
+
+# Defer the server exit to test redirection as the same time
+
+#
+# Redirect
+#
+
+# Metrics
+rm -f ./curl.out
+set +e
+code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: metrics" localhost:8080/predict`
+if [ "$code" != "200" ]; then
+    cat ./curl.out
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    grep "nv_inference_request_success" ./curl.out
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected metrics are returned\n***"
+        RET=1
+    fi
+fi
+set -e
+
+# All Model stats
+rm -f ./curl.out
+set +e
+code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: v2/models/stats" localhost:8080/predict`
+if [ "$code" != "200" ]; then
+    cat ./curl.out
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    grep "model_stats" ./curl.out
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected model stats are returned\n***"
+        RET=1
+    fi
+    grep "addsub" ./curl.out
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected 'addsub' model stats are returned\n***"
+        RET=1
+    fi
+    grep "subadd" ./curl.out
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected 'subadd' model stats are returned\n***"
+        RET=1
+    fi
+fi
+set -e
+
+# Single model stats
+rm -f ./curl.out
+set +e
+code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: v2/models/subadd/stats" localhost:8080/predict`
+if [ "$code" != "200" ]; then
+    cat ./curl.out
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    grep "model_stats" ./curl.out
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected model stats are returned\n***"
+        RET=1
+    fi
+    grep "addsub" ./curl.out
+    if [ $? -eq 0 ]; then
+        echo -e "\n***\n*** Failed. Unexpected 'addsub' model stats are returned\n***"
+        RET=1
+    fi
+    grep "subadd" ./curl.out
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected 'subadd' model stats are returned\n***"
+        RET=1
+    fi
+fi
+set -e
+
+# Server health
+rm -f ./curl.out
+set +e
+code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: v2/health/live" localhost:8080/predict`
+if [ "$code" != "200" ]; then
+    cat ./curl.out
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+set -e
+
+# Model ready
+rm -f ./curl.out
+set +e
+code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: v2/models/addsub/ready" localhost:8080/predict`
+if [ "$code" != "200" ]; then
+    cat ./curl.out
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+set -e
+
+# Server metadata
+rm -f ./curl.out
+set +e
+code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: v2" localhost:8080/predict`
+if [ "$code" != "200" ]; then
+    cat ./curl.out
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    grep "extensions" ./curl.out
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected server metadata are returned\n***"
+        RET=1
+    fi
+fi
+set -e
+
+# Model metadata
+rm -f ./curl.out
+set +e
+code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: v2/models/addsub" localhost:8080/predict`
+if [ "$code" != "200" ]; then
+    cat ./curl.out
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    grep "platform" ./curl.out
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected model metadata are returned\n***"
+        RET=1
+    fi
+fi
+set -e
+
+# Model config
+rm -f ./curl.out
+set +e
+code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: v2/models/addsub/config" localhost:8080/predict`
+if [ "$code" != "200" ]; then
+    cat ./curl.out
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    grep "version_policy" ./curl.out
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected model configuration are returned\n***"
+        RET=1
+    fi
+fi
+set -e
+
+# shared memory (only test "status" as register requires shared memory allocation)
+rm -f ./curl.out
+set +e
+code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: v2/systemsharedmemory/status" localhost:8080/predict`
+if [ "$code" != "200" ]; then
+    cat ./curl.out
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    grep "name" ./curl.out
+    if [ $? -eq 0 ]; then
+        echo -e "\n***\n*** Failed. Expected no region is registered\n***"
+        RET=1
+    fi
+fi
+set -e
+
+# cuda shared memory (only test "status" as register requires shared memory allocation)
+rm -f ./curl.out
+set +e
+code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: v2/cudasharedmemory/status" localhost:8080/predict`
+if [ "$code" != "200" ]; then
+    cat ./curl.out
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    grep "name" ./curl.out
+    if [ $? -eq 0 ]; then
+        echo -e "\n***\n*** Failed. Expected no region is registered\n***"
+        RET=1
+    fi
+fi
+set -e
+
+# repository index
+rm -f ./curl.out
+set +e
+code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: v2/repository/index" localhost:8080/predict`
+if [ "$code" != "200" ]; then
+    cat ./curl.out
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    grep "state" ./curl.out
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected model index are returned\n***"
+        RET=1
+    fi
+    grep "addsub" ./curl.out
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected 'addsub' in the index\n***"
+        RET=1
+    fi
+    grep "subadd" ./curl.out
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected 'subadd' in the index\n***"
+        RET=1
+    fi
+fi
+set -e
+
+# repository control (expect error)
+rm -f ./curl.out
+set +e
+code=`curl -s -w %{http_code} -o ./curl.out -X POST -H "X-Vertex-Ai-Triton-Redirect: v2/repository/models/subadd/unload" localhost:8080/predict`
+if [ "$code" == "200" ]; then
+    cat ./curl.out
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+else
+    grep "explicit model load / unload is not allowed" ./curl.out
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected error on model control\n***"
+        RET=1
+    fi
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVE_PID
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+exit $RET
diff --git a/qa/L0_vertex_ai/vertex_ai_test.py b/qa/L0_vertex_ai/vertex_ai_test.py
new file mode 100755
index 0000000000..b6f9fc42b4
--- /dev/null
+++ b/qa/L0_vertex_ai/vertex_ai_test.py
@@ -0,0 +1,328 @@
+#!/usr/bin/python
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import os
+import sys
+import unittest
+
+import numpy as np
+import requests
+import test_util as tu
+import tritonclient.http as httpclient
+
+
+class VertexAiTest(tu.TestResultCollector):
+    def setUp(self):
+        port = os.getenv("AIP_HTTP_PORT", "8080")
+        predict_endpoint = os.getenv("AIP_PREDICT_ROUTE", "/predict")
+        self.model_ = os.getenv("TEST_EXPLICIT_MODEL_NAME", "addsub")
+        self.url_ = "http://localhost:{}{}".format(port, predict_endpoint)
+        self.input_data_ = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+        self.expected_output0_data_ = [x * 2 for x in self.input_data_]
+        self.expected_output1_data_ = [0 for x in self.input_data_]
+
+    def test_predict(self):
+        inputs = []
+        outputs = []
+        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
+        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))
+
+        # Initialize the data
+        input_data = np.array(self.input_data_, dtype=np.int32)
+        input_data = np.expand_dims(input_data, axis=0)
+        inputs[0].set_data_from_numpy(input_data, binary_data=False)
+        inputs[1].set_data_from_numpy(input_data, binary_data=False)
+
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=False))
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
+        request_body, _ = httpclient.InferenceServerClient.generate_request_body(
+            inputs, outputs=outputs
+        )
+
+        headers = {"Content-Type": "application/json"}
+        r = requests.post(self.url_, data=request_body, headers=headers)
+        r.raise_for_status()
+
+        result = httpclient.InferenceServerClient.parse_response_body(r._content)
+
+        output0_data = result.as_numpy("OUTPUT0")
+        output1_data = result.as_numpy("OUTPUT1")
+        for i in range(16):
+            self.assertEqual(output0_data[0][i], self.expected_output0_data_[i])
+            self.assertEqual(output1_data[0][i], self.expected_output1_data_[i])
+
+    def test_predict_specified_model(self):
+        inputs = []
+        outputs = []
+        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
+        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))
+
+        # Initialize the data
+        input_data = np.array(self.input_data_, dtype=np.int32)
+        input_data = np.expand_dims(input_data, axis=0)
+        inputs[0].set_data_from_numpy(input_data, binary_data=False)
+        inputs[1].set_data_from_numpy(input_data, binary_data=False)
+
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=False))
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
+        request_body, _ = httpclient.InferenceServerClient.generate_request_body(
+            inputs, outputs=outputs
+        )
+
+        headers = {
+            "Content-Type": "application/json",
+            "X-Vertex-Ai-Triton-Redirect": "v2/models/{}/infer".format(self.model_),
+        }
+        r = requests.post(self.url_, data=request_body, headers=headers)
+        r.raise_for_status()
+
+        result = httpclient.InferenceServerClient.parse_response_body(r._content)
+
+        output0_data = result.as_numpy("OUTPUT0")
+        output1_data = result.as_numpy("OUTPUT1")
+        if self.model_ == "addsub":
+            expected_output0_data = [x * 2 for x in self.input_data_]
+            expected_output1_data = [0 for x in self.input_data_]
+        else:
+            expected_output0_data = [0 for x in self.input_data_]
+            expected_output1_data = [x * 2 for x in self.input_data_]
+        for i in range(16):
+            self.assertEqual(output0_data[0][i], expected_output0_data[i])
+            self.assertEqual(output1_data[0][i], expected_output1_data[i])
+
+    def test_predict_request_binary(self):
+        inputs = []
+        outputs = []
+        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
+        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))
+
+        # Initialize the data
+        input_data = np.array(self.input_data_, dtype=np.int32)
+        input_data = np.expand_dims(input_data, axis=0)
+        inputs[0].set_data_from_numpy(input_data, binary_data=True)
+        inputs[1].set_data_from_numpy(input_data, binary_data=False)
+
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=False))
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
+        (
+            request_body,
+            header_length,
+        ) = httpclient.InferenceServerClient.generate_request_body(
+            inputs, outputs=outputs
+        )
+
+        headers = {
+            "Content-Type": "application/vnd.vertex-ai-triton.binary+json;json-header-size={}".format(
+                header_length
+            )
+        }
+        r = requests.post(self.url_, data=request_body, headers=headers)
+        r.raise_for_status()
+
+        result = httpclient.InferenceServerClient.parse_response_body(r._content)
+        output0_data = result.as_numpy("OUTPUT0")
+        output1_data = result.as_numpy("OUTPUT1")
+        for i in range(16):
+            self.assertEqual(output0_data[0][i], self.expected_output0_data_[i])
+            self.assertEqual(output1_data[0][i], self.expected_output1_data_[i])
+
+    def test_predict_response_binary(self):
+        inputs = []
+        outputs = []
+        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
+        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))
+
+        # Initialize the data
+        input_data = np.array(self.input_data_, dtype=np.int32)
+        input_data = np.expand_dims(input_data, axis=0)
+        inputs[0].set_data_from_numpy(input_data, binary_data=False)
+        inputs[1].set_data_from_numpy(input_data, binary_data=False)
+
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=True))
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
+        request_body, _ = httpclient.InferenceServerClient.generate_request_body(
+            inputs, outputs=outputs
+        )
+
+        headers = {"Content-Type": "application/json"}
+        r = requests.post(self.url_, data=request_body, headers=headers)
+        r.raise_for_status()
+
+        header_length_str = r.headers["Inference-Header-Content-Length"]
+        result = httpclient.InferenceServerClient.parse_response_body(
+            r._content, header_length=int(header_length_str)
+        )
+
+        output0_data = result.as_numpy("OUTPUT0")
+        output1_data = result.as_numpy("OUTPUT1")
+        for i in range(16):
+            self.assertEqual(output0_data[0][i], self.expected_output0_data_[i])
+            self.assertEqual(output1_data[0][i], self.expected_output1_data_[i])
+
+    def test_malformed_binary_header(self):
+        inputs = []
+        outputs = []
+        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
+        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))
+
+        # Initialize the data
+        input_data = np.array(self.input_data_, dtype=np.int32)
+        input_data = np.expand_dims(input_data, axis=0)
+        inputs[0].set_data_from_numpy(input_data, binary_data=True)
+        inputs[1].set_data_from_numpy(input_data, binary_data=False)
+
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=False))
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
+        (
+            request_body,
+            header_length,
+        ) = httpclient.InferenceServerClient.generate_request_body(
+            inputs, outputs=outputs
+        )
+
+        headers = {
+            "Content-Type": "additional-string/application/vnd.vertex-ai-triton.binary+json;json-header-size={}".format(
+                header_length
+            )
+        }
+        r = requests.post(self.url_, data=request_body, headers=headers)
+        self.assertEqual(
+            400,
+            r.status_code,
+            "Expected error code {} returned for the request; got: {}".format(
+                400, r.status_code
+            ),
+        )
+
+    def test_malformed_binary_header_not_number(self):
+        inputs = []
+        outputs = []
+        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
+        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))
+
+        # Initialize the data
+        input_data = np.array(self.input_data_, dtype=np.int32)
+        input_data = np.expand_dims(input_data, axis=0)
+        inputs[0].set_data_from_numpy(input_data, binary_data=True)
+        inputs[1].set_data_from_numpy(input_data, binary_data=False)
+
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=False))
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
+        (
+            request_body,
+            header_length,
+        ) = httpclient.InferenceServerClient.generate_request_body(
+            inputs, outputs=outputs
+        )
+
+        headers = {
+            "Content-Type": "application/vnd.vertex-ai-triton.binary+json;json-header-size=additional-string{}".format(
+                header_length
+            )
+        }
+        r = requests.post(self.url_, data=request_body, headers=headers)
+        self.assertEqual(
+            400,
+            r.status_code,
+            "Expected error code {} returned for the request; got: {}".format(
+                400, r.status_code
+            ),
+        )
+
+    def test_malformed_binary_header_negative_number(self):
+        inputs = []
+        outputs = []
+        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
+        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))
+
+        # Initialize the data
+        input_data = np.array(self.input_data_, dtype=np.int32)
+        input_data = np.expand_dims(input_data, axis=0)
+        inputs[0].set_data_from_numpy(input_data, binary_data=True)
+        inputs[1].set_data_from_numpy(input_data, binary_data=False)
+
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=False))
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
+        (
+            request_body,
+            header_length,
+        ) = httpclient.InferenceServerClient.generate_request_body(
+            inputs, outputs=outputs
+        )
+
+        headers = {
+            "Content-Type": "application/vnd.vertex-ai-triton.binary+json;json-header-size=-123"
+        }
+        r = requests.post(self.url_, data=request_body, headers=headers)
+        self.assertEqual(
+            400,
+            r.status_code,
+            "Expected error code {} returned for the request; got: {}".format(
+                400, r.status_code
+            ),
+        )
+
+    def test_malformed_binary_header_large_number(self):
+        inputs = []
+        outputs = []
+        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
+        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))
+
+        # Initialize the data
+        input_data = np.array(self.input_data_, dtype=np.int32)
+        input_data = np.expand_dims(input_data, axis=0)
+        inputs[0].set_data_from_numpy(input_data, binary_data=True)
+        inputs[1].set_data_from_numpy(input_data, binary_data=False)
+
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT0", binary_data=False))
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT1", binary_data=False))
+        (
+            request_body,
+            header_length,
+        ) = httpclient.InferenceServerClient.generate_request_body(
+            inputs, outputs=outputs
+        )
+
+        headers = {
+            "Content-Type": "application/vnd.vertex-ai-triton.binary+json;json-header-size=12345"
+        }
+        r = requests.post(self.url_, data=request_body, headers=headers)
+        self.assertEqual(
+            400,
+            r.status_code,
+            "Expected error code {} returned for the request; got: {}".format(
+                400, r.status_code
+            ),
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_warmup/decoupled/1/model.py b/qa/L0_warmup/decoupled/1/model.py
new file mode 100644
index 0000000000..9827a87f09
--- /dev/null
+++ b/qa/L0_warmup/decoupled/1/model.py
@@ -0,0 +1,39 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    """Test model that always returns 0 response for all requests."""
+
+    def execute(self, requests):
+        for request in requests:
+            request.get_response_sender().send(
+                flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
+            )
+
+        return None
diff --git a/qa/L0_warmup/decoupled/config.pbtxt b/qa/L0_warmup/decoupled/config.pbtxt
new file mode 100644
index 0000000000..8d1f4f79b0
--- /dev/null
+++ b/qa/L0_warmup/decoupled/config.pbtxt
@@ -0,0 +1,59 @@
+# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "decoupled"
+backend: "python"
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+instance_group [{ kind: KIND_CPU }]
+model_warmup [
+{
+    name : "decoupled sample"
+    batch_size: 1
+    inputs {
+        key: "INPUT"
+        value: {
+            data_type: TYPE_FP32
+            dims: 4
+            zero_data: true
+        }
+    }
+}]
+model_transaction_policy {
+  decoupled: True
+}
\ No newline at end of file
diff --git a/qa/L0_warmup/failing_infer/1/model.py b/qa/L0_warmup/failing_infer/1/model.py
new file mode 100644
index 0000000000..632477c903
--- /dev/null
+++ b/qa/L0_warmup/failing_infer/1/model.py
@@ -0,0 +1,45 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    """Test model that always returns error for all requests."""
+
+    def execute(self, requests):
+        responses = []
+
+        for _ in requests:
+            responses.append(
+                pb_utils.InferenceResponse(
+                    output_tensors=[], error=pb_utils.TritonError("An Error Occurred")
+                )
+            )
+
+        # You must return a list of pb_utils.InferenceResponse. Length
+        # of this list must match the length of `requests` list.
+        return responses
diff --git a/qa/L0_warmup/failing_infer/config.pbtxt b/qa/L0_warmup/failing_infer/config.pbtxt
new file mode 100644
index 0000000000..e491844531
--- /dev/null
+++ b/qa/L0_warmup/failing_infer/config.pbtxt
@@ -0,0 +1,56 @@
+# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "failing_infer"
+backend: "python"
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+instance_group [{ kind: KIND_CPU }]
+model_warmup [
+{
+    name : "zero sample"
+    batch_size: 1
+    inputs {
+        key: "INPUT"
+        value: {
+            data_type: TYPE_FP32
+            dims: 4
+            zero_data: true
+        }
+    }
+}]
diff --git a/qa/L0_warmup/raw_mug_data b/qa/L0_warmup/raw_mug_data
new file mode 100644
index 0000000000..e9833f54bd
Binary files /dev/null and b/qa/L0_warmup/raw_mug_data differ
diff --git a/qa/L0_warmup/test.sh b/qa/L0_warmup/test.sh
new file mode 100755
index 0000000000..aeed873b25
--- /dev/null
+++ b/qa/L0_warmup/test.sh
@@ -0,0 +1,487 @@
+#!/bin/bash
+# Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+CLIENT=../clients/image_client
+CLIENT_LOG="./client.log"
+CLIENT_PY=./python_unittest.py
+EXPECTED_NUM_TESTS="1"
+TEST_RESULT_FILE='test_results.txt'
+
+IMAGE="../images/vulture.jpeg"
+
+DATADIR=`pwd`/models
+
+# If BACKENDS not specified, set to all
+BACKENDS=${BACKENDS:="graphdef savedmodel onnx libtorch plan"}
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=$DATADIR --log-verbose=1 --exit-timeout-secs=120"
+SERVER_LOG="./inference_server.log"
+source ../common/util.sh
+
+RET=0
+rm -fr *.txt
+
+for BACKEND in ${BACKENDS}; do
+    rm -f $SERVER_LOG $CLIENT_LOG
+    # Test for fixed-size data type
+    # Use the addsub models as example.
+    rm -fr models && mkdir models
+    cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/${BACKEND}_float32_float32_float32 models/. && \
+    cp -r /data/inferenceserver/${REPO_VERSION}/qa_sequence_model_repository/${BACKEND}_sequence_int32 models/.
+
+    INPUT_PREFIX="INPUT"
+    IDENTITY_INPUT_PREFIX="INPUT" && [ "$BACKEND" == "libtorch" ] && IDENTITY_INPUT_PREFIX="INPUT__"
+    SEQ_INPUT="INPUT" && [ "$BACKEND" == "libtorch" ] && SEQ_INPUT="INPUT__0"
+    START="START" && [ "$BACKEND" == "libtorch" ] && START="START__1"
+    READY="READY" && [ "$BACKEND" == "libtorch" ] && READY="READY__2"
+
+    # 2 instances per device with random / zero data.
+    # The zero data sample will run twice
+    #
+    # Provide warmup instruction (batch size 1) in model config
+    (cd models/${BACKEND}_float32_float32_float32 && \
+        echo "model_warmup [{" >> config.pbtxt && \
+        echo "    name : \"regular sample\"" >> config.pbtxt && \
+        echo "    batch_size: 1" >> config.pbtxt && \
+        echo "    inputs {" >> config.pbtxt && \
+        echo "        key: \"${INPUT_PREFIX}0\"" >> config.pbtxt && \
+        echo "        value: {" >> config.pbtxt && \
+        echo "            data_type: TYPE_FP32" >> config.pbtxt && \
+        echo "            dims: 16" >> config.pbtxt && \
+        echo "            zero_data: true" >> config.pbtxt && \
+        echo "        }" >> config.pbtxt && \
+        echo "    }" >> config.pbtxt && \
+        echo "    inputs {" >> config.pbtxt && \
+        echo "        key: \"${INPUT_PREFIX}1\"" >> config.pbtxt && \
+        echo "        value: {" >> config.pbtxt && \
+        echo "            data_type: TYPE_FP32" >> config.pbtxt && \
+        echo "            dims: 16" >> config.pbtxt && \
+        echo "            random_data: true" >> config.pbtxt && \
+        echo "        }" >> config.pbtxt && \
+        echo "    }" >> config.pbtxt && \
+        echo "}]" >> config.pbtxt )
+
+    # zero data. For realistic sequence model, 'count' may not work
+    # well because the model will expect a valid sequence of requests which
+    # should be represented by a series of warmup samples. 'count > 1'
+    # essentially "resends" one of the sample, which may invalidate the
+    # sequence. This is okay for this specific test because the synthetic model
+    # is not data sensitive.
+    #
+    # Instruction for sequence model (batch size 8), need to specify control tensor
+    (cd models/${BACKEND}_sequence_int32 && \
+        echo "model_warmup [{" >> config.pbtxt && \
+        echo "    name : \"sequence sample\"" >> config.pbtxt && \
+        echo "    count : 2" >> config.pbtxt && \
+        echo "    batch_size: 8" >> config.pbtxt && \
+        echo "    inputs {" >> config.pbtxt && \
+        echo "        key: \"${SEQ_INPUT}\"" >> config.pbtxt && \
+        echo "        value: {" >> config.pbtxt && \
+        echo "            data_type: TYPE_INT32" >> config.pbtxt && \
+        echo "            dims: 1" >> config.pbtxt && \
+        echo "            zero_data: true" >> config.pbtxt && \
+        echo "        }" >> config.pbtxt && \
+        echo "    }" >> config.pbtxt && \
+        echo "    inputs {" >> config.pbtxt && \
+        echo "        key: \"${START}\"" >> config.pbtxt && \
+        echo "        value: {" >> config.pbtxt && \
+        echo "            data_type: TYPE_INT32" >> config.pbtxt && \
+        echo "            dims: 1" >> config.pbtxt && \
+        echo "            zero_data: true" >> config.pbtxt && \
+        echo "        }" >> config.pbtxt && \
+        echo "    }" >> config.pbtxt && \
+        echo "    inputs {" >> config.pbtxt && \
+        echo "        key: \"${READY}\"" >> config.pbtxt && \
+        echo "        value: {" >> config.pbtxt && \
+        echo "            data_type: TYPE_INT32" >> config.pbtxt && \
+        echo "            dims: 1" >> config.pbtxt && \
+        echo "            zero_data: true" >> config.pbtxt && \
+        echo "        }" >> config.pbtxt && \
+        echo "    }" >> config.pbtxt && \
+        echo "}]" >> config.pbtxt )
+
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    set +e
+
+    grep "is running warmup sample 'regular sample'" $SERVER_LOG
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected warmup for stateless model\n***"
+        RET=1
+    fi
+    grep "is running warmup sample 'sequence sample' for iteration 1" $SERVER_LOG
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected 1st warmup iteration for stateful model\n***"
+        RET=1
+    fi
+    grep "is running warmup sample 'sequence sample' for iteration 2" $SERVER_LOG
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** Failed. Expected 2nd warmup iteration for stateful model\n***"
+        RET=1
+    fi
+    grep "failed to run warmup" $SERVER_LOG
+    if [ $? -eq 0 ]; then
+        echo -e "\n***\n*** Failed. Expected no warmup error\n***"
+        RET=1
+    fi
+
+    set -e
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+
+    # Test for variable-size data type (string)
+    rm -fr models && mkdir models
+    SUPPORT_STRING=0 && ([[ $BACKEND == "savedmodel" ]] || [[ $BACKEND == "onnx" ]] || [[ $BACKEND == "savedmodel" ]]) && SUPPORT_STRING=1
+    if [ "$SUPPORT_STRING" == "1" ] ; then
+        cp -r /data/inferenceserver/${REPO_VERSION}/qa_sequence_model_repository/${BACKEND}_sequence_object models/.
+        cp -r /data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository/${BACKEND}_zero_1_object models/.
+
+        # random and zero data (two samples)
+        #
+        # Provide warmup instruction (batch size 1) in model config
+        (cd models/${BACKEND}_zero_1_object && \
+            echo "model_warmup [" >> config.pbtxt && \
+            echo "{" >> config.pbtxt && \
+            echo "    name : \"zero string stateless\"" >> config.pbtxt && \
+            echo "    batch_size: 1" >> config.pbtxt && \
+            echo "    inputs {" >> config.pbtxt && \
+            echo "        key: \"${IDENTITY_INPUT_PREFIX}0\"" >> config.pbtxt && \
+            echo "        value: {" >> config.pbtxt && \
+            echo "            data_type: TYPE_STRING" >> config.pbtxt && \
+            echo "            dims: 16" >> config.pbtxt && \
+            echo "            zero_data: true" >> config.pbtxt && \
+            echo "        }" >> config.pbtxt && \
+            echo "    }" >> config.pbtxt && \
+            echo "}," >> config.pbtxt && \
+            echo "{" >> config.pbtxt && \
+            echo "    name : \"random string stateless\"" >> config.pbtxt && \
+            echo "    batch_size: 1" >> config.pbtxt && \
+            echo "    inputs {" >> config.pbtxt && \
+            echo "        key: \"${IDENTITY_INPUT_PREFIX}0\"" >> config.pbtxt && \
+            echo "        value: {" >> config.pbtxt && \
+            echo "            data_type: TYPE_STRING" >> config.pbtxt && \
+            echo "            dims: 16" >> config.pbtxt && \
+            echo "            random_data: true" >> config.pbtxt && \
+            echo "        }" >> config.pbtxt && \
+            echo "    }" >> config.pbtxt && \
+            echo "}" >> config.pbtxt && \
+            echo "]" >> config.pbtxt )
+
+        # user provided data
+        #
+        # Instruction for sequence model (batch size 8), need to specify control tensor
+        (cd models/${BACKEND}_sequence_object && \
+            echo "model_warmup [{" >> config.pbtxt && \
+            echo "    name : \"string statefull\"" >> config.pbtxt && \
+            echo "    batch_size: 8" >> config.pbtxt && \
+            echo "    inputs {" >> config.pbtxt && \
+            echo "        key: \"${SEQ_INPUT}\"" >> config.pbtxt && \
+            echo "        value: {" >> config.pbtxt && \
+            echo "            data_type: TYPE_STRING" >> config.pbtxt && \
+            echo "            dims: 1" >> config.pbtxt && \
+            echo "            input_data_file: \"raw_string_data\"" >> config.pbtxt && \
+            echo "        }" >> config.pbtxt && \
+            echo "    }" >> config.pbtxt && \
+            echo "    inputs {" >> config.pbtxt && \
+            echo "        key: \"${START}\"" >> config.pbtxt && \
+            echo "        value: {" >> config.pbtxt && \
+            echo "            data_type: TYPE_INT32" >> config.pbtxt && \
+            echo "            dims: 1" >> config.pbtxt && \
+            echo "            zero_data: true" >> config.pbtxt && \
+            echo "        }" >> config.pbtxt && \
+            echo "    }" >> config.pbtxt && \
+            echo "    inputs {" >> config.pbtxt && \
+            echo "        key: \"${READY}\"" >> config.pbtxt && \
+            echo "        value: {" >> config.pbtxt && \
+            echo "            data_type: TYPE_INT32" >> config.pbtxt && \
+            echo "            dims: 1" >> config.pbtxt && \
+            echo "            zero_data: true" >> config.pbtxt && \
+            echo "        }" >> config.pbtxt && \
+            echo "    }" >> config.pbtxt && \
+            echo "}]" >> config.pbtxt )
+
+        # Prepare string data (one element that is "233")
+        mkdir -p models/${BACKEND}_sequence_object/warmup && \
+            (cd models/${BACKEND}_sequence_object/warmup && \
+                    echo -n -e '\x03\x00\x00\x00\x32\x33\x33' > raw_string_data)
+
+        run_server
+        if [ "$SERVER_PID" == "0" ]; then
+            echo -e "\n***\n*** Failed to start $SERVER\n***"
+            cat $SERVER_LOG
+            exit 1
+        fi
+
+        set +e
+
+        grep "is running warmup sample 'zero string stateless'" $SERVER_LOG
+        if [ $? -ne 0 ]; then
+            echo -e "\n***\n*** Failed. Expected warmup for zero string stateless model\n***"
+            RET=1
+        fi
+        grep "is running warmup sample 'random string stateless'" $SERVER_LOG
+        if [ $? -ne 0 ]; then
+            echo -e "\n***\n*** Failed. Expected warmup for random string stateless model\n***"
+            RET=1
+        fi
+        grep "is running warmup sample 'string statefull'" $SERVER_LOG
+        if [ $? -ne 0 ]; then
+            echo -e "\n***\n*** Failed. Expected warmup for string stateful model\n***"
+            RET=1
+        fi
+        grep "failed to run warmup" $SERVER_LOG
+        if [ $? -eq 0 ]; then
+            echo -e "\n***\n*** Failed. Expected no warmup error\n***"
+            RET=1
+        fi
+
+        set -e
+
+        kill $SERVER_PID
+        wait $SERVER_PID
+    fi
+
+    if [ "$BACKEND" == "graphdef" ]; then
+        # Show effect of warmup by using a TF model with TF-TRT optimization which is
+        # known to be slow on first inference.
+        # Note: model can be obatined via the fetching script in docs/example
+        rm -fr models && \
+            mkdir models && \
+            cp -r /data/inferenceserver/${REPO_VERSION}/tf_model_store/inception_v3_graphdef models/.
+
+        # Enable TF-TRT optimization
+        (cd models/inception_v3_graphdef && \
+            echo "optimization { execution_accelerators { gpu_execution_accelerator : [ { name : \"tensorrt\"} ] } }" >> config.pbtxt)
+
+        # Duplicate the same model with warmup enabled
+        cp -r models/inception_v3_graphdef models/inception_v3_warmup &&
+            (cd models/inception_v3_warmup && \
+                sed -i 's/inception_v3_graphdef/inception_v3_warmup/' config.pbtxt)
+
+        (cd models/inception_v3_warmup && \
+            echo 'model_warmup [{' >> config.pbtxt && \
+            echo '    name : "image sample"' >> config.pbtxt && \
+            echo '    batch_size: 1' >> config.pbtxt && \
+            echo '    inputs {' >> config.pbtxt && \
+            echo '        key: "input"' >> config.pbtxt && \
+            echo '        value: {' >> config.pbtxt && \
+            echo '            data_type: TYPE_FP32' >> config.pbtxt && \
+            echo '            dims: [ 299, 299, 3 ]' >> config.pbtxt && \
+            echo '            input_data_file: "raw_mug_data"' >> config.pbtxt && \
+            echo '        }' >> config.pbtxt && \
+            echo '    }' >> config.pbtxt && \
+            echo '}]' >> config.pbtxt )
+
+        # prepare provided data instead of synthetic one
+        mkdir -p models/inception_v3_warmup/warmup && \
+            cp raw_mug_data models/inception_v3_warmup/warmup/.
+
+        run_server
+        if [ "$SERVER_PID" == "0" ]; then
+            echo -e "\n***\n*** Failed to start $SERVER\n***"
+            cat $SERVER_LOG
+            exit 1
+        fi
+
+        set +e
+
+        grep "is running warmup sample 'image sample'" $SERVER_LOG
+        if [ $? -ne 0 ]; then
+            echo -e "\n***\n*** Failed. Expected warmup for image model\n***"
+            RET=1
+        fi
+        grep "failed to run warmup" $SERVER_LOG
+        if [ $? -eq 0 ]; then
+            echo -e "\n***\n*** Failed. Expected no warmup error\n***"
+            RET=1
+        fi
+
+        # Time the first inference for both models
+        time $CLIENT -m inception_v3_graphdef -s INCEPTION $IMAGE -i grpc -u localhost:8001 >>$CLIENT_LOG 2>&1
+        if [ $? -ne 0 ]; then
+            echo -e "\n***\n*** Test Failed\n***"
+            cat $CLIENT_LOG
+            RET=1
+        fi
+        time $CLIENT -m inception_v3_warmup -s INCEPTION $IMAGE -i grpc -u localhost:8001 >>$CLIENT_LOG 2>&1
+        if [ $? -ne 0 ]; then
+            echo -e "\n***\n*** Test Failed\n***"
+            cat $CLIENT_LOG
+            RET=1
+        fi
+
+        set -e
+
+        kill $SERVER_PID
+        wait $SERVER_PID
+    fi
+done
+
+# Test warmup sample failure
+rm -fr models && \
+    mkdir models && \
+    cp -r failing_infer models/.
+
+run_server
+if [ "$SERVER_PID" != "0" ]; then
+    echo -e "\n***\n*** Expect fail to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+grep "failed to run warmup sample 'zero sample': An Error Occurred;" $SERVER_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed. Expected warmup error\n***"
+    cat $SERVER_LOG
+    RET=1
+fi
+set -e
+
+# Test decoupled model
+rm -fr models && \
+    mkdir models && \
+    cp -r decoupled models/.
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+grep "is running warmup sample 'decoupled sample'" $SERVER_LOG
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** Failed. Expected warmup for decoupled model\n***"
+    RET=1
+fi
+grep "failed to run warmup" $SERVER_LOG
+if [ $? -eq 0 ]; then
+    echo -e "\n***\n*** Failed. Expected no warmup error\n***"
+    RET=1
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Test the onnx model to verify that the memory type of the output tensor
+# remains unchanged with the warmup setting
+pip3 uninstall -y torch
+pip3 install torch==1.13.0+cu117 -f https://download.pytorch.org/whl/torch_stable.html
+
+rm -fr models && mkdir models
+cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/onnx_nobatch_float32_float32_float32 models/.
+(cd models/onnx_nobatch_float32_float32_float32 && \
+            echo "" >> config.pbtxt && \
+            echo 'instance_group [{' >> config.pbtxt && \
+            echo '    kind : KIND_GPU' >> config.pbtxt && \
+            echo '}]' >> config.pbtxt && \
+            echo 'model_warmup [{' >> config.pbtxt && \
+            echo '    name : "sample"' >> config.pbtxt && \
+            echo '    batch_size: 1' >> config.pbtxt && \
+            echo '    inputs {' >> config.pbtxt && \
+            echo '        key: "INPUT0"' >> config.pbtxt && \
+            echo '        value: {' >> config.pbtxt && \
+            echo '            data_type: TYPE_FP32' >> config.pbtxt && \
+            echo "            dims: 16" >> config.pbtxt && \
+            echo "            zero_data: false" >> config.pbtxt && \
+            echo '        }' >> config.pbtxt && \
+            echo '    }' >> config.pbtxt && \
+             echo '    inputs {' >> config.pbtxt && \
+            echo '        key: "INPUT1"' >> config.pbtxt && \
+            echo '        value: {' >> config.pbtxt && \
+            echo '            data_type: TYPE_FP32' >> config.pbtxt && \
+            echo "            dims: 16" >> config.pbtxt && \
+            echo "            zero_data: false" >> config.pbtxt && \
+            echo '        }' >> config.pbtxt && \
+            echo '    }' >> config.pbtxt && \
+            echo '}]' >> config.pbtxt )
+
+mkdir -p models/bls_onnx_warmup/1/
+cp ../python_models/bls_onnx_warmup/model.py models/bls_onnx_warmup/1/
+cp ../python_models/bls_onnx_warmup/config.pbtxt models/bls_onnx_warmup/.
+
+cp ../L0_backend_python/python_unittest.py .
+sed -i 's#sys.path.append("../../common")#sys.path.append("../common")#g' python_unittest.py
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+export MODEL_NAME='bls_onnx_warmup'
+python3 -m pytest --junitxml=warmup.report.xml $CLIENT_PY >> $CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** 'bls_onnx_warmup' test FAILED. \n***"
+    cat $CLIENT_LOG
+    RET=1
+fi
+
+set -e
+
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+
+if [ $RET -eq 1 ]; then
+    cat $CLIENT_LOG
+    cat $SERVER_LOG
+    echo -e "\n***\n*** Test Failed \n***"
+else
+    echo -e "\n***\n*** Test Passed \n***"
+fi
+
+exit $RET
diff --git a/qa/common/busy_op_kernel.cc b/qa/common/busy_op_kernel.cc
new file mode 100644
index 0000000000..119ed0a1ce
--- /dev/null
+++ b/qa/common/busy_op_kernel.cc
@@ -0,0 +1,73 @@
+// Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <time.h>
+
+#include "tensorflow/core/framework/device_base.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+
+using namespace tensorflow;  // NOLINT(build/namespaces)
+
+REGISTER_OP("BusyLoop").Input("input: int32").Output("output: int32").Doc(R"doc(
+Busy waits for input number of clock cycles
+)doc");
+
+void BusyLoopKernelLauncher(
+    const Eigen::GpuDevice& device, const int* num_delay_cycles, int* out);
+
+class BusyLoopOp : public OpKernel {
+ public:
+  explicit BusyLoopOp(OpKernelConstruction* context) : OpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override
+  {
+    // Grab the input
+    const Tensor& input_tensor = context->input(0);
+    auto num_delay_cycles = input_tensor.flat<int32>();
+
+    // Create dummy output
+    Tensor* output_tensor = nullptr;
+    OP_REQUIRES_OK(
+        context,
+        context->allocate_output(0, input_tensor.shape(), &output_tensor));
+    auto output = output_tensor->template flat<int32>();
+
+    // Verify input dimension
+    OP_REQUIRES(
+        context, TensorShapeUtils::IsVector(input_tensor.shape()),
+        errors::InvalidArgument(
+            "BusyLoop expects a single value as a 1-D Vector"));
+
+    // Call the cuda kernel launcher
+    BusyLoopKernelLauncher(
+        context->eigen_device<Eigen::GpuDevice>(), num_delay_cycles.data(),
+        output.data());
+  }
+};
+
+REGISTER_KERNEL_BUILDER(Name("BusyLoop").Device(DEVICE_GPU), BusyLoopOp);
diff --git a/qa/common/busy_op_kernel.cu.cc b/qa/common/busy_op_kernel.cu.cc
new file mode 100644
index 0000000000..745679be15
--- /dev/null
+++ b/qa/common/busy_op_kernel.cu.cc
@@ -0,0 +1,64 @@
+// Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#if GOOGLE_CUDA
+#define EIGEN_USE_GPU
+
+#include <cuda_runtime.h>
+#include <time.h>
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+
+__device__ long store_now[1];
+
+__global__ void
+BusyLoopKernel(const int* num_delay_cycles, int* out)
+{
+  // As shown in
+  // https://stackoverflow.com/questions/11217117/equivalent-of-usleep-in-cuda-kernel
+  clock_t start = clock();
+
+  for (;;) {
+    clock_t now = clock();
+    // Adjust for overflow
+    clock_t cycles = now > start ? now - start : now + (0xffffffff - start);
+    if (cycles >= num_delay_cycles[0]) {
+      break;
+    }
+    // Prevent nvcc optimizations
+    store_now[0] = cycles;
+  }
+}
+
+void
+BusyLoopKernelLauncher(
+    const Eigen::GpuDevice& device, const int* num_delay_cycles, int* out)
+{
+  auto stream = device.stream();
+  BusyLoopKernel<<<1, 256, 0, stream>>>(num_delay_cycles, out);
+}
+
+#endif
diff --git a/qa/common/check_copyright.py b/qa/common/check_copyright.py
index 11c32902e6..ff18ca8e39 100755
--- a/qa/common/check_copyright.py
+++ b/qa/common/check_copyright.py
@@ -1,6 +1,6 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
 
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+# Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -28,24 +28,68 @@
 
 import argparse
 import os
+import pathlib
 import re
 
 FLAGS = None
-SKIP_EXTS = ('jpeg', 'jpg', 'pgm', 'png',
-             'log', 'serverlog',
-             'preprocessed', 'jmx', 'gz',
-             'caffemodel', 'prototxt')
-SKIP_PATHS = ('docs/examples/model_repository',
-              'serving',
-              'src/servables/caffe2/testdata',
-              'src/servables/tensorflow/testdata',
-              'src/servables/tensorrt/testdata',
-              'src/test/testdata',
-              'tools/patch',
-              'VERSION')
-
-COPYRIGHT ='''
-Copyright (c) YYYY, NVIDIA CORPORATION. All rights reserved.
+SKIP_EXTS = (
+    "jpeg",
+    "jpg",
+    "pgm",
+    "png",
+    "log",
+    "preprocessed",
+    "jmx",
+    "gz",
+    "json",
+    "pdf",
+    "so",
+    "onnx",
+    "svg",
+)
+REPO_PATH_FROM_THIS_FILE = "../.."
+SKIP_PATHS = (
+    "build",
+    "deploy/gke-marketplace-app/.gitignore",
+    "deploy/gke-marketplace-app/server-deployer/chart/.helmignore",
+    "deploy/gcp/.helmignore",
+    "deploy/aws/.helmignore",
+    "deploy/fleetcommand/.helmignore",
+    "docs/.gitignore",
+    "docs/_static/.gitattributes",
+    "docs/examples/model_repository",
+    "docs/examples/jetson",
+    "docker",
+    "qa/common/cuda_op_kernel.cu.cc.patch",
+    "qa/ensemble_models/mix_platform_float32_float32_float32/output0_labels.txt",
+    "qa/ensemble_models/mix_type_int32_float32_float32/output0_labels.txt",
+    "qa/ensemble_models/mix_ensemble_int32_float32_float32/output0_labels.txt",
+    "qa/ensemble_models/wrong_label_int32_float32_float32/output0_labels.txt",
+    "qa/ensemble_models/label_override_int32_float32_float32/output0_labels.txt",
+    "qa/L0_model_config/noautofill_platform",
+    "qa/L0_model_config/autofill_noplatform",
+    "qa/L0_model_config/autofill_noplatform_success",
+    "qa/L0_model_config/special_cases",
+    "qa/L0_model_config/cli_messages/cli_override/expected",
+    "qa/L0_model_config/cli_messages/cli_deprecation/expected",
+    "qa/L0_model_namespacing/test_duplication",
+    "qa/L0_model_namespacing/test_dynamic_resolution",
+    "qa/L0_model_namespacing/test_ensemble_duplication",
+    "qa/L0_model_namespacing/test_no_duplication",
+    "qa/L0_perf_nomodel/baseline",
+    "qa/L0_perf_nomodel/legacy_baseline",
+    "qa/L0_warmup/raw_mug_data",
+    "qa/L0_java_resnet/expected_output_data",
+    "qa/L0_trt_dla_jetson/trt_dla_model_store",
+    "qa/openvino_models/dynamic_batch",
+    "qa/openvino_models/fixed_batch",
+    "CITATION.cff",
+    "TRITON_VERSION",
+)
+
+COPYRIGHT_YEAR_RE = "Copyright( \\(c\\))? 20[1-9][0-9](-(20)?[1-9][0-9])?(,((20[2-9][0-9](-(20)?[2-9][0-9])?)|([2-9][0-9](-[2-9][0-9])?)))*,? NVIDIA CORPORATION( & AFFILIATES)?. All rights reserved."
+
+COPYRIGHT = """
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions
@@ -70,40 +114,52 @@
 OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-'''
+"""
+
+repo_abs_path = (
+    pathlib.Path(__file__).parent.joinpath(REPO_PATH_FROM_THIS_FILE).resolve()
+)
+
+copyright_year_re = re.compile(COPYRIGHT_YEAR_RE)
 
-copyright_list = [l.rstrip() for i, l in enumerate(COPYRIGHT.splitlines()) if i > 0]
 
 def visit(path):
     if FLAGS.verbose:
         print("visiting " + path)
 
     for skip in SKIP_EXTS:
-        if path.endswith('.' + skip):
+        if path.endswith("." + skip):
             if FLAGS.verbose:
                 print("skipping due to extension: " + path)
             return True
 
     for skip in SKIP_PATHS:
-        if path.startswith(skip):
+        if str(pathlib.Path(path).resolve()).startswith(
+            str(repo_abs_path.joinpath(skip).resolve())
+        ):
             if FLAGS.verbose:
                 print("skipping due to path prefix: " + path)
             return True
 
-    with open(path, 'r') as f:
+    with open(path, "r") as f:
         first_line = True
         line = None
         try:
             for fline in f:
                 line = fline
 
-                # Skip any '#!' or '..' (from rst) lines at the start
-                # of the file
+                # Skip any '#!', '..', '<!--', '\*' or '{{/*' lines at the
+                # start of the file
                 if first_line:
                     first_line = False
-                    if fline.startswith("#!") or fline.startswith("..") or fline.startswith("<!--"):
+                    if (
+                        fline.startswith("#!")
+                        or fline.startswith("..")
+                        or fline.startswith("<!--")
+                        or fline.startswith("/*")
+                        or fline.startswith("{{/*")
+                    ):
                         continue
-
                 # Skip empty lines...
                 if len(fline.strip()) != 0:
                     break
@@ -123,36 +179,89 @@ def visit(path):
 
         line = line.strip()
 
-        # The next line must be the copyright line with the expected
-        # year. It must start with either '#' or '//'
-        prefix = None
-        if line.startswith('#'):
-            prefix = '#'
-        elif line.startswith('//'):
-            prefix = '//'
+        # The next line must be the copyright line with a single year
+        # or a year range. It is optionally allowed to have '# ' or
+        # '// ' prefix.
+        prefix = ""
+        if line.startswith("# "):
+            prefix = "# "
+        elif line.startswith("// "):
+            prefix = "// "
+        elif not line.startswith(COPYRIGHT_YEAR_RE[0]):
+            print(
+                "incorrect prefix for copyright line, allowed prefixes '# ' or '// ', for "
+                + path
+                + ": "
+                + line
+            )
+            return False
+
+        # Check if the copyright year line matches the regex
+        # and see if the year(s) are reasonable
+        years = []
+
+        copyright_row = line[len(prefix) :]
+        if copyright_year_re.match(copyright_row):
+            for year in (
+                copyright_row.split(
+                    "(c) " if "(c) " in copyright_row else "Copyright "
+                )[1]
+                .split(" NVIDIA ")[0]
+                .split(",")
+            ):
+                if len(year) == 4:  # 2021
+                    years.append(int(year))
+                elif len(year) == 2:  # 21
+                    years.append(int(year) + 2000)
+                elif len(year) == 9:  # 2021-2022
+                    years.append(int(year[0:4]))
+                    years.append(int(year[5:9]))
+                elif len(year) == 7:  # 2021-22
+                    years.append(int(year[0:4]))
+                    years.append(int(year[5:7]) + 2000)
+                elif len(year) == 5:  # 21-23
+                    years.append(int(year[0:2]) + 2000)
+                    years.append(int(year[3:5]) + 2000)
         else:
-            print("incorrect prefix for copyright line, expecting '#' or '//', for " +
-                  path + ": " + line)
+            print("copyright year is not recognized for " + path + ": " + line)
             return False
 
-        expected_copyright = (prefix + " " + copyright_list[0])
-        if line != expected_copyright:
-            print("incorrect copyright for " + path)
-            print("  expected: " + expected_copyright)
-            print("       got: " + line)
+        if years[0] > FLAGS.year:
+            print(
+                "copyright start year greater than current year for "
+                + path
+                + ": "
+                + line
+            )
+            return False
+        if years[-1] > FLAGS.year:
+            print(
+                "copyright end year greater than current year for " + path + ": " + line
+            )
             return False
+        for i in range(1, len(years)):
+            if years[i - 1] >= years[i]:
+                print("copyright years are not increasing for " + path + ": " + line)
+                return False
 
         # Subsequent lines must match the copyright body.
-        copyright_idx = 1
+        copyright_body = [
+            l.rstrip() for i, l in enumerate(COPYRIGHT.splitlines()) if i > 0
+        ]
+        copyright_idx = 0
         for line in f:
-            if copyright_idx >= len(copyright_list):
+            if copyright_idx >= len(copyright_body):
                 break
 
-            line = line.strip()
-            if len(copyright_list[copyright_idx]) == 0:
-                expected = prefix
+            if len(prefix) == 0:
+                line = line.rstrip()
             else:
-                expected = (prefix + " " + copyright_list[copyright_idx])
+                line = line.strip()
+
+            if len(copyright_body[copyright_idx]) == 0:
+                expected = prefix.strip()
+            else:
+                expected = prefix + copyright_body[copyright_idx]
             if line != expected:
                 print("incorrect copyright body for " + path)
                 print("  expected: '" + expected + "'")
@@ -160,31 +269,39 @@ def visit(path):
                 return False
             copyright_idx += 1
 
-        if copyright_idx != len(copyright_list):
-            print("missing " + str(len(copyright_list) - copyright_idx) +
-                  " lines of the copyright body")
+        if copyright_idx != len(copyright_body):
+            print(
+                "missing "
+                + str(len(copyright_body) - copyright_idx)
+                + " lines of the copyright body"
+            )
             return False
 
     if FLAGS.verbose:
         print("copyright correct for " + path)
     return True
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument('-v', '--verbose', action="store_true", required=False, default=False,
-                        help='Enable verbose output')
-    parser.add_argument('-y', '--year', type=str, required=True,
-                        help='Copyright year')
-    parser.add_argument('paths', type=str, nargs='*', default=None,
-                        help='Directories or files to check')
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        required=False,
+        default=False,
+        help="Enable verbose output",
+    )
+    parser.add_argument("-y", "--year", type=int, required=True, help="Copyright year")
+    parser.add_argument(
+        "paths", type=str, nargs="*", default=None, help="Directories or files to check"
+    )
     FLAGS = parser.parse_args()
 
     if FLAGS.paths is None or len(FLAGS.paths) == 0:
         parser.print_help()
         exit(1)
 
-    copyright_list[0] = copyright_list[0].replace('YYYY', str(FLAGS.year))
-
     ret = True
     for path in FLAGS.paths:
         if not os.path.isdir(path):
diff --git a/qa/common/check_massif_log.py b/qa/common/check_massif_log.py
new file mode 100755
index 0000000000..3d08922f88
--- /dev/null
+++ b/qa/common/check_massif_log.py
@@ -0,0 +1,104 @@
+#!/usr/bin/env python3
+
+# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import math
+import re
+import sys
+from collections import defaultdict
+
+
+def parse_massif_out(filename):
+    """
+    Extract the allocation data from the massif output file, and compile
+    it into a dictionary.
+
+    """
+    # Read the file
+    with open(filename, "r") as f:
+        contents = f.read()
+        snapshots = re.findall("snapshot=(.*?)heap_tree", contents, flags=re.DOTALL)
+
+    # Create snapshot dictionary
+    summary = defaultdict(list)
+
+    for snapshot in snapshots:
+        # Split the record and ignore first two columns
+        columns = snapshot.split()[2:]
+
+        # Put columns and values into dictionary
+        for col in columns:
+            k, v = col.split("=")
+            summary[k].append(int(v))
+
+    # Return dict
+    return summary
+
+
+def is_unbounded_growth(summary, max_allowed_alloc, start_from_middle):
+    """
+    Check whether the heap allocations is increasing
+
+    """
+    totals = summary["mem_heap_B"]
+
+    if len(totals) < 5:
+        print("Error: Not enough snapshots")
+        return False
+
+    # Measure difference between mean and maximum memory usage
+    processed_snapshot = totals[len(totals) // 2 :] if start_from_middle else totals
+    processed_snapshot.sort(reverse=True)
+    # Remove 5% of the max value which will be treated as outlier
+    num_max_min_dropout = math.ceil(0.05 * len(processed_snapshot))
+    start = num_max_min_dropout
+    end = len(processed_snapshot) - num_max_min_dropout
+    mem_heap_avg = sum(processed_snapshot[start:end]) / len(
+        processed_snapshot[start:end]
+    )
+    mem_heap_max = max(processed_snapshot[start:end])
+
+    # Compute change in allocation rate
+    memory_allocation_delta_mb = (mem_heap_max - mem_heap_avg) / 1e6
+
+    print(
+        "Change in memory allocation: %f MB, MAX ALLOWED: %f MB"
+        % (memory_allocation_delta_mb, max_allowed_alloc)
+    )
+
+    return memory_allocation_delta_mb > max_allowed_alloc
+
+
+if __name__ == "__main__":
+    # FIXME turn to proper argument handling
+    summary = parse_massif_out(sys.argv[1])
+    max_allowed_alloc = float(sys.argv[2])
+    start_from_middle = (len(sys.argv) == 4) and (sys.argv[3] == "--start-from-middle")
+    if is_unbounded_growth(summary, max_allowed_alloc, start_from_middle):
+        sys.exit(1)
+    else:
+        sys.exit(0)
diff --git a/qa/common/check_valgrind_log.py b/qa/common/check_valgrind_log.py
new file mode 100755
index 0000000000..201d0e922c
--- /dev/null
+++ b/qa/common/check_valgrind_log.py
@@ -0,0 +1,118 @@
+#!/usr/bin/env python3
+
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import sys
+
+# Check the valgrind logs for memory leaks, ignoring known memory leaks
+#   * cnmem https://github.com/NVIDIA/cnmem/issues/12
+#   * Tensorflow::NewSession
+#   * dl-open leak could be due to https://bugs.kde.org/show_bug.cgi?id=358980
+#   * dlerror leak in tensorflow::HadoopFileSystem::HadoopFileSystem()
+#     -> tensorflow::LibHDFS::LoadAndBind()::{lambda(char const*, void**)#1}::operator()(char const*, void**)
+#     -> tensorflow::internal::LoadLibrary
+#     -> dlerror
+
+LEAK_WHITE_LIST = [
+    "cnmem",
+    "tensorflow::NewSession",
+    "dl-init",
+    "dl-open",
+    "dlerror",
+    "libtorch",
+]
+
+
+def check_valgrind_log(log_file):
+    """
+    Counts the definite leaks reported
+    by valgrind, matches them against
+    the whitelist.
+
+    Parameters
+    ----------
+    log_file: str
+        The path to the log file
+
+    Returns
+    -------
+    list of str
+        a list of the leak records as strings
+    """
+
+    with open(args.input_log_file, "r") as f:
+        logs = f.read()
+
+    # Find the pid and start and end of definite leak reports
+    pid_token_end = logs.find("==", logs.find("==") + 1) + 2
+    pid_token = logs[:pid_token_end]
+    leaks_start = logs.find("are definitely lost")
+    first_leak_line = logs.rfind("\n", 0, leaks_start)
+    if leaks_start == -1 or first_leak_line == -1:
+        # No leaks in log
+        return []
+    end_of_leaks = logs.find(f"{pid_token} LEAK SUMMARY:")
+    if end_of_leaks == -1:
+        print(f"\n***\n*** Test Failed for {log_file}: Malformed Valgrind log.\n***")
+        sys.exit(1)
+    leak_records_section = logs[first_leak_line + 1 : end_of_leaks]
+
+    # Each leak record is separated by a line containing '==<pid>== \n'
+    record_separator = f"{pid_token} \n"
+    leak_records = leak_records_section.split(record_separator)
+
+    # Check each leak against whitelist
+    filtered_leak_records = []
+    for leak in leak_records:
+        for token in LEAK_WHITE_LIST:
+            if not leak or leak.find(token) != -1:
+                break
+        else:
+            filtered_leak_records.append(leak)
+
+    return filtered_leak_records
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-f",
+        "--input-log-file",
+        type=str,
+        required=True,
+        help="The name of the file containing the valgrind logs.",
+    )
+    args = parser.parse_args()
+
+    leak_records = check_valgrind_log(log_file=args.input_log_file)
+    if leak_records:
+        for leak in leak_records:
+            print(leak)
+        print(f"\n***\n*** Test Failed: {len(leak_records)} leaks detected.\n***")
+        sys.exit(1)
+    sys.exit(0)
diff --git a/qa/common/cuda_op_kernel.cu.cc.patch b/qa/common/cuda_op_kernel.cu.cc.patch
new file mode 100644
index 0000000000..617521a0f9
--- /dev/null
+++ b/qa/common/cuda_op_kernel.cu.cc.patch
@@ -0,0 +1,33 @@
+diff --git a/tensorflow/examples/adding_an_op/cuda_op_kernel.cu.cc b/tensorflow/examples/adding_an_op/cuda_op_kernel.cu.cc
+index a9d66f9..a92e218 100644
+--- a/tensorflow/examples/adding_an_op/cuda_op_kernel.cu.cc
++++ b/tensorflow/examples/adding_an_op/cuda_op_kernel.cu.cc
+@@ -14,10 +14,12 @@ limitations under the License.
+ ==============================================================================*/
+
+ #if GOOGLE_CUDA
+-#define EIGEN_USE_GPU
+-#include "unsupported/Eigen/CXX11/Tensor"  // from @eigen_archive
+-#include "tensorflow/core/util/gpu_kernel_helper.h"
+-#include "tensorflow/core/util/gpu_launch_config.h"
++//#define EIGEN_USE_GPU
++//#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
++//#include "tensorflow/core/util/gpu_kernel_helper.h"
++//#include "tensorflow/core/util/gpu_launch_config.h"
++#include <algorithm>
++#include <stdint.h>
+
+ __global__ void AddOneKernel(const int* in, const int N, int* out) {
+   for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N;
+@@ -27,8 +29,9 @@ __global__ void AddOneKernel(const int* in, const int N, int* out) {
+ }
+
+ void AddOneKernelLauncher(const int* in, const int N, int* out) {
+-  TF_CHECK_OK(::tensorflow::GpuLaunchKernel(AddOneKernel, 32, 256, 0, nullptr,
+-                                            in, N, out));
++  int block_size = std::min(N, 1024);
++  int grid_size = (N + block_size - 1) / block_size;
++  AddOneKernel<<<grid_size, block_size>>>(in, N, out);
+ }
+
+ #endif
diff --git a/qa/common/gen_common.py b/qa/common/gen_common.py
new file mode 100644
index 0000000000..d574627dfd
--- /dev/null
+++ b/qa/common/gen_common.py
@@ -0,0 +1,160 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from typing import List
+
+# Common utilities for model generation scripts
+import numpy as np
+
+np_dtype_string = np.dtype(object)
+
+
+def np_to_onnx_dtype(np_dtype):
+    import onnx
+
+    if np_dtype == bool:
+        return onnx.TensorProto.BOOL
+    elif np_dtype == np.int8:
+        return onnx.TensorProto.INT8
+    elif np_dtype == np.int16:
+        return onnx.TensorProto.INT16
+    elif np_dtype == np.int32:
+        return onnx.TensorProto.INT32
+    elif np_dtype == np.int64:
+        return onnx.TensorProto.INT64
+    elif np_dtype == np.uint8:
+        return onnx.TensorProto.UINT8
+    elif np_dtype == np.uint16:
+        return onnx.TensorProto.UINT16
+    elif np_dtype == np.float16:
+        return onnx.TensorProto.FLOAT16
+    elif np_dtype == np.float32:
+        return onnx.TensorProto.FLOAT
+    elif np_dtype == np.float64:
+        return onnx.TensorProto.DOUBLE
+    elif np_dtype == np_dtype_string:
+        return onnx.TensorProto.STRING
+    return None
+
+
+def np_to_model_dtype(np_dtype):
+    if np_dtype == bool:
+        return "TYPE_BOOL"
+    elif np_dtype == np.int8:
+        return "TYPE_INT8"
+    elif np_dtype == np.int16:
+        return "TYPE_INT16"
+    elif np_dtype == np.int32:
+        return "TYPE_INT32"
+    elif np_dtype == np.int64:
+        return "TYPE_INT64"
+    elif np_dtype == np.uint8:
+        return "TYPE_UINT8"
+    elif np_dtype == np.uint16:
+        return "TYPE_UINT16"
+    elif np_dtype == np.float16:
+        return "TYPE_FP16"
+    elif np_dtype == np.float32:
+        return "TYPE_FP32"
+    elif np_dtype == np.float64:
+        return "TYPE_FP64"
+    elif np_dtype == np_dtype_string:
+        return "TYPE_STRING"
+    return None
+
+
+def np_to_trt_dtype(np_dtype):
+    import tensorrt as trt
+
+    if np_dtype == bool:
+        return trt.bool
+    elif np_dtype == np.int8:
+        return trt.int8
+    elif np_dtype == np.int32:
+        return trt.int32
+    elif np_dtype == np.uint8:
+        return trt.uint8
+    elif np_dtype == np.float16:
+        return trt.float16
+    elif np_dtype == np.float32:
+        return trt.float32
+    return None
+
+
+def np_to_tf_dtype(np_dtype):
+    import tensorflow as tf
+
+    if np_dtype == bool:
+        return tf.bool
+    elif np_dtype == np.int8:
+        return tf.int8
+    elif np_dtype == np.int16:
+        return tf.int16
+    elif np_dtype == np.int32:
+        return tf.int32
+    elif np_dtype == np.int64:
+        return tf.int64
+    elif np_dtype == np.uint8:
+        return tf.uint8
+    elif np_dtype == np.uint16:
+        return tf.uint16
+    elif np_dtype == np.float16:
+        return tf.float16
+    elif np_dtype == np.float32:
+        return tf.float32
+    elif np_dtype == np.float64:
+        return tf.float64
+    elif np_dtype == np_dtype_string:
+        return tf.string
+    return None
+
+
+def np_to_torch_dtype(np_dtype):
+    import torch
+
+    if np_dtype == bool:
+        return torch.bool
+    elif np_dtype == np.int8:
+        return torch.int8
+    elif np_dtype == np.int16:
+        return torch.int16
+    elif np_dtype == np.int32:
+        return torch.int
+    elif np_dtype == np.int64:
+        return torch.long
+    elif np_dtype == np.uint8:
+        return torch.uint8
+    elif np_dtype == np.uint16:
+        return None  # Not supported in Torch
+    elif np_dtype == np.float16:
+        return None
+    elif np_dtype == np.float32:
+        return torch.float
+    elif np_dtype == np.float64:
+        return torch.double
+    elif np_dtype == np_dtype_string:
+        return List[str]
+    return None
diff --git a/qa/common/gen_ensemble_model_utils.py b/qa/common/gen_ensemble_model_utils.py
new file mode 100755
index 0000000000..dd4f6e326a
--- /dev/null
+++ b/qa/common/gen_ensemble_model_utils.py
@@ -0,0 +1,1220 @@
+#!/usr/bin/env python3
+
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+
+import numpy as np
+import test_util as tu
+from gen_common import np_to_model_dtype
+
+BASIC_ENSEMBLE_TYPES = ["simple", "sequence", "fan"]
+
+np_dtype_string = np.dtype(object)
+
+
+def fixed_to_variable_size(shape):
+    return [-1] * len(shape)
+
+
+def platform_types_and_validation():
+    res = [
+        ("graphdef", tu.validate_for_tf_model),
+        ("savedmodel", tu.validate_for_tf_model),
+        ("plan", tu.validate_for_trt_model),
+        ("onnx", tu.validate_for_onnx_model),
+        ("libtorch", tu.validate_for_libtorch_model),
+    ]
+    return res
+
+
+class AddSubEnsembleSchedule:
+    """
+    Helper class to generate ensemble schedule that behaves the same as
+    addsub model given an ensemble type
+    """
+
+    def __init__(self, ensemble_type):
+        if ensemble_type == "fan":
+            self._get_schedule = AddSubEnsembleSchedule._get_fan_ensemble_schedule
+        elif ensemble_type == "sequence":
+            self._get_schedule = AddSubEnsembleSchedule._get_sequence_ensemble_schedule
+        else:
+            self._get_schedule = AddSubEnsembleSchedule._get_simple_ensemble_schedule
+
+    def get_schedule(
+        self,
+        base_model_name,
+        input_shape,
+        output0_shape,
+        output1_shape,
+        input_model_dtype,
+        output0_model_dtype,
+        output1_model_dtype,
+    ):
+        return self._get_schedule(
+            base_model_name,
+            input_shape,
+            output0_shape,
+            output1_shape,
+            input_model_dtype,
+            output0_model_dtype,
+            output1_model_dtype,
+        )
+
+    @classmethod
+    def _get_simple_ensemble_schedule(
+        cls,
+        base_model_name,
+        input_shape,
+        output0_shape,
+        output1_shape,
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+    ):
+        # libtorch model uses other naming convention for outputs
+        output_index_delimiter = "__" if "libtorch" in base_model_name else ""
+        # ensemble input -> addsub -> ensemble output
+        schedule = """
+ensemble_scheduling {{
+  step [
+    {{
+      model_name: "{}"
+      model_version: -1
+      input_map {{
+        key: "INPUT0"
+        value: "INPUT0"
+      }}
+      input_map {{
+        key: "INPUT1"
+        value: "INPUT1"
+      }}
+      output_map {{
+        key: "OUTPUT{delimiter}0"
+        value: "OUTPUT0"
+      }}
+      output_map {{
+        key: "OUTPUT{delimiter}1"
+        value: "OUTPUT1"
+      }}
+    }}
+  ]
+}}
+""".format(
+            base_model_name, delimiter=output_index_delimiter
+        )
+        return schedule
+
+    @classmethod
+    def _get_sequence_ensemble_schedule(
+        cls,
+        base_model_name,
+        input_shape,
+        output0_shape,
+        output1_shape,
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+    ):
+        # libtorch model uses other naming convention for outputs
+        output_index_delimiter = "__" if "libtorch" in base_model_name else ""
+        # ensemble input -> nop -> addsub -> ensemble output
+        nop_input_shape = fixed_to_variable_size(input_shape)
+        schedule = """
+ensemble_scheduling {{
+  step [
+    {{
+      model_name: "nop_{}_{}"
+      model_version: -1
+      input_map {{
+        key: "INPUT0"
+        value: "INPUT0"
+      }}
+      input_map {{
+        key: "INPUT1"
+        value: "INPUT1"
+      }}
+      output_map {{
+        key: "OUTPUT0"
+        value: "same_input0"
+      }}
+      output_map {{
+        key: "OUTPUT1"
+        value: "same_input1"
+      }}
+    }},
+    {{
+      model_name: "{}"
+      model_version: -1
+      input_map {{
+        key: "INPUT0"
+        value: "same_input0"
+      }}
+      input_map {{
+        key: "INPUT1"
+        value: "same_input1"
+      }}
+      output_map {{
+        key: "OUTPUT{delimiter}0"
+        value: "OUTPUT0"
+      }}
+      output_map {{
+        key: "OUTPUT{delimiter}1"
+        value: "OUTPUT1"
+      }}
+    }}
+  ]
+}}
+""".format(
+            input_dtype,
+            tu.shape_to_dims_str(nop_input_shape),
+            base_model_name,
+            delimiter=output_index_delimiter,
+        )
+        return schedule
+
+    @classmethod
+    def _get_fan_ensemble_schedule(
+        cls,
+        base_model_name,
+        input_shape,
+        output0_shape,
+        output1_shape,
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+    ):
+        # libtorch model uses other naming convention for outputs
+        output_index_delimiter = "__" if "libtorch" in base_model_name else ""
+
+        # ensemble input -> nop -> addsub ->
+        # nop (fan out, one output send to one nop) -> ensemble output (fan in)
+        nop_input_shape = fixed_to_variable_size(input_shape)
+        nop_output0_shape = fixed_to_variable_size(output0_shape)
+        nop_output1_shape = fixed_to_variable_size(output1_shape)
+        schedule = """
+ensemble_scheduling {{
+  step [
+    {{
+      model_name: "nop_{}_{}"
+      model_version: -1
+      input_map {{
+        key: "INPUT0"
+        value: "INPUT0"
+      }}
+      input_map {{
+        key: "INPUT1"
+        value: "INPUT1"
+      }}
+      output_map {{
+        key: "OUTPUT0"
+        value: "same_input0"
+      }}
+      output_map {{
+        key: "OUTPUT1"
+        value: "same_input1"
+      }}
+    }},
+    {{
+      model_name: "{}"
+      model_version: -1
+      input_map {{
+        key: "INPUT0"
+        value: "same_input0"
+      }}
+      input_map {{
+        key: "INPUT1"
+        value: "same_input1"
+      }}
+      output_map {{
+        key: "OUTPUT{delimiter}0"
+        value: "same_output0"
+      }}
+      output_map {{
+        key: "OUTPUT{delimiter}1"
+        value: "same_output1"
+      }}
+    }},
+    {{
+      model_name: "nop_{}_{}"
+      model_version: -1
+      input_map {{
+        key: "INPUT0"
+        value: "same_output0"
+      }}
+      input_map {{
+        key: "INPUT1"
+        value: "same_output0"
+      }}
+      output_map {{
+        key: "OUTPUT0"
+        value: "OUTPUT0"
+      }}
+    }},
+    {{
+      model_name: "nop_{}_{}"
+      model_version: -1
+      input_map {{
+        key: "INPUT0"
+        value: "same_output1"
+      }}
+      input_map {{
+        key: "INPUT1"
+        value: "same_output1"
+      }}
+      output_map {{
+        key: "OUTPUT1"
+        value: "OUTPUT1"
+      }}
+    }}
+  ]
+}}
+""".format(
+            input_dtype,
+            tu.shape_to_dims_str(nop_input_shape),
+            base_model_name,
+            output0_dtype,
+            tu.shape_to_dims_str(nop_output0_shape),
+            output1_dtype,
+            tu.shape_to_dims_str(nop_output1_shape),
+            delimiter=output_index_delimiter,
+        )
+        return schedule
+
+
+class IdentityEnsembleSchedule:
+    """
+    Helper class to generate ensemble schedule that behaves the same as
+    identity model given an ensemble type
+    """
+
+    def __init__(self, ensemble_type, ensemble_test_type="zero"):
+        self._test_type = ensemble_test_type
+        if ensemble_type == "fan":
+            self._get_schedule = IdentityEnsembleSchedule._get_fan_ensemble_schedule
+        elif ensemble_type == "sequence":
+            self._get_schedule = (
+                IdentityEnsembleSchedule._get_sequence_ensemble_schedule
+            )
+        else:
+            self._get_schedule = IdentityEnsembleSchedule._get_simple_ensemble_schedule
+
+    def get_schedule(
+        self,
+        dtype,
+        input_shapes,
+        input_model_shapes,
+        output_shapes,
+        output_model_shapes,
+    ):
+        return self._get_schedule(
+            dtype,
+            input_shapes,
+            input_model_shapes,
+            output_shapes,
+            output_model_shapes,
+            self._test_type,
+        )
+
+    @classmethod
+    def _get_simple_ensemble_schedule(
+        cls,
+        dtype,
+        input_shapes,
+        input_model_shapes,
+        output_shapes,
+        output_model_shapes,
+        test_type,
+    ):
+        # ensemble reshaped input -> nop with reshaped tensor shape -> ensemble
+        # reshaped output (actual ensemble input/output is not visible in schedule)
+        steps = []
+        for idx in range(len(input_shapes)):
+            steps.append(
+                """
+    {{
+      model_name: "nop_{}_{}"
+      model_version: -1
+      input_map {{
+        key: "INPUT0"
+        value: "INPUT{}"
+      }}
+      input_map {{
+        key: "INPUT1"
+        value: "INPUT{}"
+      }}
+      output_map {{
+        key: "OUTPUT0"
+        value: "OUTPUT{}"
+      }}
+    }}
+""".format(
+                    np_to_model_dtype(dtype),
+                    tu.shape_to_dims_str(input_model_shapes[idx]),
+                    idx,
+                    idx,
+                    idx,
+                )
+            )
+
+        schedule = """
+ensemble_scheduling {{
+  step [
+{}
+  ]
+}}
+""".format(
+            ",".join(steps)
+        )
+
+        return schedule
+
+    @classmethod
+    def _get_sequence_ensemble_schedule(
+        cls,
+        dtype,
+        input_shapes,
+        input_model_shapes,
+        output_shapes,
+        output_model_shapes,
+        test_type,
+    ):
+        in_str = "tunnel_in_" if test_type == "reshape" else ""
+        out_str = "tunnel_out_" if test_type == "reshape" else ""
+        # ensemble reshaped input -> nop with another input only reshape ->
+        # nop with output only reshape -> ensemble reshaped output
+        steps = []
+        for idx in range(len(input_shapes)):
+            steps.append(
+                """
+    {{
+      model_name: "nop_{in_str}{type}_{shape}"
+      model_version: -1
+      input_map {{
+        key: "INPUT0"
+        value: "INPUT{idx}"
+      }}
+      input_map {{
+        key: "INPUT1"
+        value: "INPUT{idx}"
+      }}
+      output_map {{
+        key: "OUTPUT0"
+        value: "temp_{idx}"
+      }}
+    }},
+    {{
+      model_name: "nop_{out_str}{type}_{shape}"
+      model_version: -1
+      input_map {{
+        key: "INPUT0"
+        value: "temp_{idx}"
+      }}
+      input_map {{
+        key: "INPUT1"
+        value: "temp_{idx}"
+      }}
+      output_map {{
+        key: "OUTPUT0"
+        value: "OUTPUT{idx}"
+      }}
+    }}
+""".format(
+                    type=np_to_model_dtype(dtype),
+                    in_str=in_str,
+                    out_str=out_str,
+                    idx=idx,
+                    shape=tu.shape_to_dims_str(input_model_shapes[idx]),
+                )
+            )
+
+        schedule = """
+ensemble_scheduling {{
+  step [
+{}
+  ]
+}}
+""".format(
+            ",".join(steps)
+        )
+
+        return schedule
+
+    @classmethod
+    def _get_fan_ensemble_schedule(
+        cls,
+        dtype,
+        input_shapes,
+        input_model_shapes,
+        output_shapes,
+        output_model_shapes,
+        test_type,
+    ):
+        # Note that the simple and sequence test already test "fan" in some
+        # degree, because there is no direct match from nop input/output
+        # like what is in addsub-like ensemble.
+        #
+        # ensemble reshaped input -> nop with another input only reshape ->
+        # nop with variable size -> nop with output only reshape ->
+        # ensemble reshaped output
+        in_str = ""
+        out_str = ""
+        intermediate_shapes = input_model_shapes
+        if test_type == "reshape":
+            in_str = "tunnel_in_"
+            out_str = "tunnel_out_"
+            intermediate_shapes = [[-1]] * len(input_model_shapes)
+        steps = []
+        for idx in range(len(input_shapes)):
+            steps.append(
+                """
+    {{
+      model_name: "nop_{in_str}{type}_{shape}"
+      model_version: -1
+      input_map {{
+        key: "INPUT0"
+        value: "INPUT{idx}"
+      }}
+      input_map {{
+        key: "INPUT1"
+        value: "INPUT{idx}"
+      }}
+      output_map {{
+        key: "OUTPUT0"
+        value: "temp_in_{idx}"
+      }}
+    }},
+    {{
+      model_name: "nop_{type}_{intermediate_shape}"
+      model_version: -1
+      input_map {{
+        key: "INPUT0"
+        value: "temp_in_{idx}"
+      }}
+      input_map {{
+        key: "INPUT1"
+        value: "temp_in_{idx}"
+      }}
+      output_map {{
+        key: "OUTPUT0"
+        value: "temp_out_{idx}"
+      }}
+    }},
+    {{
+      model_name: "nop_{out_str}{type}_{shape}"
+      model_version: -1
+      input_map {{
+        key: "INPUT0"
+        value: "temp_out_{idx}"
+      }}
+      input_map {{
+        key: "INPUT1"
+        value: "temp_out_{idx}"
+      }}
+      output_map {{
+        key: "OUTPUT0"
+        value: "OUTPUT{idx}"
+      }}
+    }}
+""".format(
+                    type=np_to_model_dtype(dtype),
+                    in_str=in_str,
+                    out_str=out_str,
+                    intermediate_shape=tu.shape_to_dims_str(intermediate_shapes[idx]),
+                    idx=idx,
+                    shape=tu.shape_to_dims_str(input_model_shapes[idx]),
+                )
+            )
+
+        schedule = """
+ensemble_scheduling {{
+  step [
+{}
+  ]
+}}
+""".format(
+            ",".join(steps)
+        )
+
+        return schedule
+
+
+class SequenceEnsembleSchedule:
+    """
+    Helper class to generate ensemble schedule that behaves the same as
+    sequence model given an ensemble type
+    """
+
+    def __init__(self, ensemble_type):
+        if ensemble_type == "fan":
+            self._get_schedule = SequenceEnsembleSchedule._get_fan_ensemble_schedule
+        elif ensemble_type == "sequence":
+            self._get_schedule = (
+                SequenceEnsembleSchedule._get_sequence_ensemble_schedule
+            )
+        else:
+            self._get_schedule = SequenceEnsembleSchedule._get_simple_ensemble_schedule
+
+    def get_schedule(self, base_model_name, shape, model_dtype):
+        return self._get_schedule(base_model_name, shape, model_dtype)
+
+    @classmethod
+    def _get_simple_ensemble_schedule(cls, base_model_name, shape, model_dtype):
+        # libtorch model uses other naming convention
+        index_suffix = "__0" if "libtorch" in base_model_name else ""
+        # ensemble input -> sequence -> ensemble output
+        schedule = """
+ensemble_scheduling {{
+  step [
+    {{
+      model_name: "{}"
+      model_version: -1
+      input_map {{
+        key: "INPUT{index}"
+        value: "INPUT"
+      }}
+      output_map {{
+        key: "OUTPUT{index}"
+        value: "OUTPUT"
+      }}
+    }}
+  ]
+}}
+""".format(
+            base_model_name, index=index_suffix
+        )
+        return schedule
+
+    @classmethod
+    def _get_sequence_ensemble_schedule(cls, base_model_name, shape, model_dtype):
+        # nop cannot handle STRING data type, fall back to simple
+        if model_dtype == "TYPE_STRING":
+            return SequenceEnsembleSchedule._get_simple_ensemble_schedule(
+                base_model_name, shape, model_dtype
+            )
+
+        # libtorch model uses other naming convention
+        index_suffix = "__0" if "libtorch" in base_model_name else ""
+        # ensemble input -> nop -> sequence -> ensemble output
+        nop_input_shape = fixed_to_variable_size(shape)
+        schedule = """
+ensemble_scheduling {{
+  step [
+    {{
+      model_name: "nop_{}_{}"
+      model_version: -1
+      input_map {{
+        key: "INPUT0"
+        value: "INPUT"
+      }}
+      input_map {{
+        key: "INPUT1"
+        value: "INPUT"
+      }}
+      output_map {{
+        key: "OUTPUT0"
+        value: "same_input"
+      }}
+    }},
+    {{
+      model_name: "{}"
+      model_version: -1
+      input_map {{
+        key: "INPUT{index}"
+        value: "same_input"
+      }}
+      output_map {{
+        key: "OUTPUT{index}"
+        value: "OUTPUT"
+      }}
+    }}
+  ]
+}}
+""".format(
+            model_dtype,
+            tu.shape_to_dims_str(nop_input_shape),
+            base_model_name,
+            index=index_suffix,
+        )
+        return schedule
+
+    @classmethod
+    def _get_fan_ensemble_schedule(cls, base_model_name, shape, model_dtype):
+        # nop cannot handle STRING data type, fall back to simple
+        if model_dtype == "TYPE_STRING":
+            return SequenceEnsembleSchedule._get_simple_ensemble_schedule(
+                base_model_name, shape, model_dtype
+            )
+
+        # libtorch model uses other naming convention
+        index_suffix = "__0" if "libtorch" in base_model_name else ""
+        # Not a "fan" due to configuration of base sequence model
+        # ensemble input -> nop -> sequence -> nop -> ensemble output
+        nop_shape = fixed_to_variable_size(shape)
+        schedule = """
+ensemble_scheduling {{
+  step [
+    {{
+      model_name: "nop_{}_{}"
+      model_version: -1
+      input_map {{
+        key: "INPUT0"
+        value: "INPUT"
+      }}
+      input_map {{
+        key: "INPUT1"
+        value: "INPUT"
+      }}
+      output_map {{
+        key: "OUTPUT0"
+        value: "same_input"
+      }}
+    }},
+    {{
+      model_name: "{}"
+      model_version: -1
+      input_map {{
+        key: "INPUT{index}"
+        value: "same_input"
+      }}
+      output_map {{
+        key: "OUTPUT{index}"
+        value: "same_output"
+      }}
+    }},
+    {{
+      model_name: "nop_{}_{}"
+      model_version: -1
+      input_map {{
+        key: "INPUT0"
+        value: "same_output"
+      }}
+      input_map {{
+        key: "INPUT1"
+        value: "same_output"
+      }}
+      output_map {{
+        key: "OUTPUT0"
+        value: "OUTPUT"
+      }}
+    }}
+  ]
+}}
+""".format(
+            model_dtype,
+            tu.shape_to_dims_str(nop_shape),
+            base_model_name,
+            model_dtype,
+            tu.shape_to_dims_str(nop_shape),
+            index=index_suffix,
+        )
+        return schedule
+
+
+def create_ensemble_modelfile(
+    base_model,
+    models_dir,
+    max_batch,
+    model_version,
+    input_shape,
+    output0_shape,
+    output1_shape,
+    input_dtype,
+    output0_dtype,
+    output1_dtype,
+    swap=False,
+):
+    # No actual model file in ensemble model
+
+    # Use a different model name for the non-batching variant
+    for ensemble_type in BASIC_ENSEMBLE_TYPES:
+        ensemble_model_name = "{}_{}{}".format(
+            ensemble_type, base_model, "_nobatch" if max_batch == 0 else ""
+        )
+        model_name = tu.get_model_name(
+            ensemble_model_name, input_dtype, output0_dtype, output1_dtype
+        )
+        model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+        try:
+            os.makedirs(model_version_dir)
+        except OSError as ex:
+            pass  # ignore existing dir
+
+
+def create_ensemble_modelconfig(
+    base_model,
+    models_dir,
+    max_batch,
+    model_version,
+    input_shape,
+    output0_shape,
+    output1_shape,
+    input_dtype,
+    output0_dtype,
+    output1_dtype,
+    output0_label_cnt,
+    version_policy,
+):
+    # No validation as long as the base model supports the type and shape
+
+    input_model_dtype = np_to_model_dtype(input_dtype)
+    output0_model_dtype = np_to_model_dtype(output0_dtype)
+    output1_model_dtype = np_to_model_dtype(output1_dtype)
+
+    for ensemble_type in BASIC_ENSEMBLE_TYPES:
+        # Only in "fan" that ensemble output is not directly from addsub. In
+        # other case, ensemble should still generate proper labell without
+        # label file
+        labels = None if ensemble_type != "fan" else "output0_labels.txt"
+
+        # Use a different model name for the non-batching variant
+        ensemble_model_name = "{}_{}{}".format(
+            ensemble_type, base_model, "_nobatch" if max_batch == 0 else ""
+        )
+        model_name = tu.get_model_name(
+            ensemble_model_name, input_dtype, output0_dtype, output1_dtype
+        )
+        base_model_name = tu.get_model_name(
+            "{}{}".format(base_model, "_nobatch" if max_batch == 0 else ""),
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+        )
+
+        ensemble_schedule = AddSubEnsembleSchedule(ensemble_type).get_schedule(
+            base_model_name,
+            input_shape,
+            output0_shape,
+            output1_shape,
+            input_model_dtype,
+            output0_model_dtype,
+            output1_model_dtype,
+        )
+
+        config_dir = models_dir + "/" + model_name
+        config = create_general_modelconfig(
+            model_name,
+            "ensemble",
+            max_batch,
+            repeat(input_dtype, 2),
+            repeat(input_shape, 2),
+            repeat(None, 2),
+            [output0_dtype, output1_dtype],
+            [output0_shape, output1_shape],
+            repeat(None, 2),
+            [labels, None],
+            version_policy=version_policy,
+            force_tensor_number_suffix=True,
+        )
+        config += ensemble_schedule
+
+        try:
+            os.makedirs(config_dir)
+        except OSError as ex:
+            pass  # ignore existing dir
+
+        with open(config_dir + "/config.pbtxt", "w") as cfile:
+            cfile.write(config)
+
+        if labels is not None:
+            with open(config_dir + "/output0_labels.txt", "w") as lfile:
+                for l in range(output0_label_cnt):
+                    lfile.write("label" + str(l) + "\n")
+
+
+def create_identity_ensemble_modelfile(
+    ensemble_test_type,
+    models_dir,
+    model_version,
+    max_batch,
+    dtype,
+    input_shapes,
+    output_shapes,
+):
+    io_cnt = len(input_shapes)
+
+    # Use a different model name for the non-batching variant
+    for ensemble_type in BASIC_ENSEMBLE_TYPES:
+        ensemble_prefix = "{}_{}".format(ensemble_type, ensemble_test_type)
+        model_name = tu.get_zero_model_name(
+            ensemble_prefix + ("_nobatch" if max_batch == 0 else ""), io_cnt, dtype
+        )
+        model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+        try:
+            os.makedirs(model_version_dir)
+        except OSError as ex:
+            pass  # ignore existing dir
+
+
+def create_identity_ensemble_modelconfig(
+    ensemble_test_type,
+    models_dir,
+    model_version,
+    max_batch,
+    dtype,
+    input_shapes,
+    input_model_shapes,
+    output_shapes,
+    output_model_shapes,
+):
+    io_cnt = len(input_shapes)
+
+    for ensemble_type in BASIC_ENSEMBLE_TYPES:
+        # Use a different model name for the non-batching variant
+        ensemble_prefix = "{}_{}".format(ensemble_type, ensemble_test_type)
+        model_name = tu.get_zero_model_name(
+            ensemble_prefix + ("_nobatch" if max_batch == 0 else ""), io_cnt, dtype
+        )
+
+        ensemble_schedule = IdentityEnsembleSchedule(
+            ensemble_type, ensemble_test_type
+        ).get_schedule(
+            dtype, input_shapes, input_model_shapes, output_shapes, output_model_shapes
+        )
+
+        config_dir = models_dir + "/" + model_name
+        config = create_general_modelconfig(
+            model_name,
+            "ensemble",
+            max_batch,
+            repeat(dtype, io_cnt),
+            input_shapes,
+            input_model_shapes,
+            repeat(dtype, io_cnt),
+            output_shapes,
+            output_model_shapes,
+            repeat(None, io_cnt),
+            force_tensor_number_suffix=True,
+        )
+        config += ensemble_schedule
+
+        try:
+            os.makedirs(config_dir)
+        except OSError as ex:
+            pass  # ignore existing dir
+
+        with open(config_dir + "/config.pbtxt", "w") as cfile:
+            cfile.write(config)
+
+
+def create_sequence_ensemble_modelfile(
+    base_model, models_dir, max_batch, model_version, shape, dtype
+):
+    # No actual model file in ensemble model
+
+    # Use a different model name for the non-batching variant
+    for ensemble_type in BASIC_ENSEMBLE_TYPES:
+        ensemble_model_name = "{}_{}{}".format(
+            ensemble_type, base_model, "_nobatch" if max_batch == 0 else ""
+        )
+        model_name = tu.get_sequence_model_name(ensemble_model_name, dtype)
+        model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+        try:
+            os.makedirs(model_version_dir)
+        except OSError as ex:
+            pass  # ignore existing dir
+
+
+def create_sequence_ensemble_modelconfig(
+    base_model, models_dir, max_batch, model_version, shape, dtype
+):
+    # No validation as long as the base model supports the type and shape
+
+    model_dtype = np_to_model_dtype(dtype)
+
+    for ensemble_type in BASIC_ENSEMBLE_TYPES:
+        # Use a different model name for the non-batching variant
+        ensemble_model_name = "{}_{}{}".format(
+            ensemble_type, base_model, "_nobatch" if max_batch == 0 else ""
+        )
+        model_name = tu.get_sequence_model_name(ensemble_model_name, dtype)
+        base_model_name = tu.get_sequence_model_name(
+            "{}{}".format(base_model, "_nobatch" if max_batch == 0 else ""), dtype
+        )
+
+        ensemble_schedule = SequenceEnsembleSchedule(ensemble_type).get_schedule(
+            base_model_name, shape, model_dtype
+        )
+
+        config_dir = models_dir + "/" + model_name
+        config = create_general_modelconfig(
+            model_name,
+            "ensemble",
+            max_batch,
+            [dtype],
+            [shape],
+            [None],
+            [dtype],
+            [shape],
+            [None],
+            [None],
+        )
+        config += ensemble_schedule
+
+        try:
+            os.makedirs(config_dir)
+        except OSError as ex:
+            pass  # ignore existing dir
+
+        with open(config_dir + "/config.pbtxt", "w") as cfile:
+            cfile.write(config)
+
+
+def create_nop_modelconfig(
+    models_dir, tensor_shape, tensor_dtype, tensor_model_shape=None
+):
+    model_name = "nop_{}_{}".format(
+        dtype_str(tensor_dtype), tu.shape_to_dims_str(tensor_shape)
+    )
+    # Make [] to [1].
+    # Note that this doesn't affect the naming ("nop_{}_" instead of "nop_{}_1")
+    if len(tensor_shape) == 0:
+        tensor_shape = [1]
+
+    config_dir = models_dir + "/" + model_name
+    config = create_general_modelconfig(
+        model_name,
+        "",
+        1024,
+        repeat(tensor_dtype, 2),
+        repeat(tensor_shape, 2),
+        repeat(tensor_model_shape, 2),
+        repeat(tensor_dtype, 2),
+        repeat(tensor_shape, 2),
+        repeat(tensor_model_shape, 2),
+        repeat(None, 2),
+        backend="identity",
+        instance_group_str="instance_group [ { kind: KIND_CPU } ]",
+    )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+def create_nop_tunnel_modelconfig(models_dir, tensor_shape, tensor_dtype):
+    # Must be fixed size
+    in_model_name = "nop_tunnel_in_{}_{}".format(
+        dtype_str(tensor_dtype), tu.shape_to_dims_str(tensor_shape)
+    )
+    out_model_name = "nop_tunnel_out_{}_{}".format(
+        dtype_str(tensor_dtype), tu.shape_to_dims_str(tensor_shape)
+    )
+    # Make [] to [1].
+    # Note that this doesn't affect the naming ("nop_{}_" instead of "nop_{}_1")
+    if len(tensor_shape) == 0:
+        tensor_shape = [1]
+    internal_shape = 1
+    for dim in tensor_shape:
+        if dim < 0:
+            raise Exception("Must specify fixed size input / output for nop tunnel")
+        internal_shape *= dim
+
+    # Tunnel in nop (reshape to one dimension)
+    config_dir = models_dir + "/" + in_model_name
+    config = create_general_modelconfig(
+        in_model_name,
+        "",
+        1024,
+        repeat(tensor_dtype, 2),
+        repeat(tensor_shape, 2),
+        repeat([internal_shape], 2),
+        repeat(tensor_dtype, 2),
+        repeat([internal_shape], 2),
+        repeat(None, 2),
+        repeat(None, 2),
+        backend="identity",
+        instance_group_str="instance_group [ { kind: KIND_CPU } ]",
+    )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+    # Tunnel out nop (reshape back to original shape)
+    config_dir = models_dir + "/" + out_model_name
+    config = create_general_modelconfig(
+        out_model_name,
+        "",
+        1024,
+        repeat(tensor_dtype, 2),
+        repeat([internal_shape], 2),
+        repeat(tensor_shape, 2),
+        repeat(tensor_dtype, 2),
+        repeat(tensor_shape, 2),
+        repeat(None, 2),
+        repeat(None, 2),
+        backend="identity",
+        instance_group_str="instance_group [ { kind: KIND_CPU } ]",
+    )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+def create_general_modelconfig(
+    model_name,
+    platform,
+    max_batch,
+    input_dtypes,
+    input_shapes,
+    input_model_shapes,
+    output_dtypes,
+    output_shapes,
+    output_model_shapes,
+    label_filenames,
+    backend=None,
+    version_policy=None,
+    default_model_filename=None,
+    instance_group_str="",
+    force_tensor_number_suffix=False,
+):
+    assert len(input_dtypes) == len(input_shapes)
+    assert len(input_model_shapes) == len(input_shapes)
+    assert len(output_dtypes) == len(output_shapes)
+    assert len(output_model_shapes) == len(output_shapes)
+    assert len(label_filenames) == len(output_shapes)
+
+    # Unpack version policy
+    version_policy_str = "{ latest { num_versions: 1 }}"
+    if version_policy is not None:
+        type, val = version_policy
+        if type == "latest":
+            version_policy_str = "{{ latest {{ num_versions: {} }}}}".format(val)
+        elif type == "specific":
+            version_policy_str = "{{ specific {{ versions: {} }}}}".format(val)
+        else:
+            version_policy_str = "{ all { }}"
+
+    default_model_filename_str = ""
+    if default_model_filename is not None:
+        default_model_filename_str = 'default_model_filename: "{}"'.format(
+            default_model_filename
+        )
+
+    # If backend is specified use backend instead of platform
+    if backend is not None:
+        key = "backend"
+        val = backend
+    else:
+        key = "platform"
+        val = platform
+
+    config = """
+name: "{}"
+{}: "{}"
+max_batch_size: {}
+version_policy: {}
+{}
+{}
+""".format(
+        model_name,
+        key,
+        val,
+        max_batch,
+        version_policy_str,
+        default_model_filename_str,
+        instance_group_str,
+    )
+
+    for idx in range(len(input_dtypes)):
+        idx_str = ""
+        if len(input_dtypes) != 1 or force_tensor_number_suffix:
+            idx_str = str(idx)
+        config += """
+input [
+  {{
+    name: "INPUT{}"
+    data_type: {}
+    dims: [ {} ]
+    {}
+  }}
+]""".format(
+            idx_str,
+            dtype_str(input_dtypes[idx]),
+            tu.shape_to_dims_str(input_shapes[idx]),
+            reshape_str(input_shapes[idx], input_model_shapes[idx]),
+        )
+
+    for idx in range(len(output_dtypes)):
+        idx_str = ""
+        if len(input_dtypes) != 1 or force_tensor_number_suffix:
+            idx_str = str(idx)
+        config += """
+output [
+  {{
+    name: "OUTPUT{}"
+    data_type: {}
+    dims: [ {} ]
+    {}
+    {}
+  }}
+]""".format(
+            idx_str,
+            dtype_str(output_dtypes[idx]),
+            tu.shape_to_dims_str(output_shapes[idx]),
+            reshape_str(output_shapes[idx], output_model_shapes[idx]),
+            label_str(label_filenames[idx]),
+        )
+    return config
+
+
+def repeat(obj, cnt):
+    return [obj] * cnt
+
+
+def dtype_str(dtype):
+    return dtype if isinstance(dtype, str) else np_to_model_dtype(dtype)
+
+
+def reshape_str(shape, model_shape):
+    if model_shape is None or shape == model_shape:
+        return ""
+    return "reshape: {{ shape: [ {} ] }}".format(tu.shape_to_dims_str(model_shape))
+
+
+def label_str(label):
+    if label is None:
+        return ""
+    return 'label_filename: "{}"'.format(label)
diff --git a/qa/common/gen_jetson_trt_models b/qa/common/gen_jetson_trt_models
new file mode 100755
index 0000000000..a5fbc6a51e
--- /dev/null
+++ b/qa/common/gen_jetson_trt_models
@@ -0,0 +1,188 @@
+#!/bin/bash
+# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+############################################################################
+## This script generates the model repository needed for TensorRT testing
+## on the Jetson device. Generating these models requires having TensorRT
+## container.
+############################################################################
+#!/bin/bash -xe
+# Make all generated files accessible outside of container
+umask 0000
+# Set the version of the models
+TRITON_VERSION=${TRITON_VERSION:=24.03}
+# Set the CUDA device to use
+CUDA_DEVICE=${RUNNER_ID:=0}
+# Set TensorRT image
+TENSORRT_IMAGE=${TENSORRT_IMAGE:=nvcr.io/nvidia/tensorrt:$TRITON_VERSION-py3-igpu}
+
+# Set the path to the host working directory
+HOST_BUILD_DIR=${HOST_BUILD_DIR:=/tmp/models_build}
+# Set the path to the host model output directory
+HOST_MODEL_DIR=${HOST_MODEL_DIR:="${HOST_BUILD_DIR}/${TRITON_VERSION}"}
+# Set the source directory to store executable source file to generate models
+HOST_SOURCE_DIR=$HOST_BUILD_DIR/gen_srcdir
+
+# Set CI specific parameters
+DOCKER_GPU_ARGS="${DOCKER_GPU_ARGS:="--gpus device=$CUDA_DEVICE"}"
+[[ $RUNNER_GPUS =~ ^[0-9] ]] && DOCKER_GPU_ARGS=$(eval $NV_DOCKER_ARGS)
+
+
+# Set model output directories
+
+HOST_DESTDIR=$HOST_MODEL_DIR/qa_model_repository
+HOST_DATADEPENDENTDIR=$HOST_MODEL_DIR/qa_trt_data_dependent_model_repository
+HOST_DYNASEQDESTDIR=$HOST_MODEL_DIR/qa_dyna_sequence_model_repository
+HOST_DYNASEQIMPLICITDESTDIR=$HOST_MODEL_DIR/qa_dyna_sequence_implicit_model_repository
+HOST_FORMATDESTDIR=$HOST_MODEL_DIR/qa_trt_format_model_repository
+HOST_IDENTITYBIGDESTDIR=$HOST_MODEL_DIR/qa_identity_big_model_repository
+HOST_IDENTITYDESTDIR=$HOST_MODEL_DIR/qa_identity_model_repository
+HOST_IMPLICITSEQDESTDIR=$HOST_MODEL_DIR/qa_sequence_implicit_model_repository
+HOST_RAGGEDDESTDIR=$HOST_MODEL_DIR/qa_ragged_model_repository
+HOST_RESHAPEDESTDIR=$HOST_MODEL_DIR/qa_reshape_model_repository
+HOST_SEQDESTDIR=$HOST_MODEL_DIR/qa_sequence_model_repository
+HOST_SHAPEDESTDIR=$HOST_MODEL_DIR/qa_shapetensor_model_repository
+HOST_VARDESTDIR=$HOST_MODEL_DIR/qa_variable_model_repository
+HOST_VARIMPLICITSEQDESTDIR=$HOST_MODEL_DIR/qa_variable_sequence_implicit_model_repository
+HOST_VARSEQDESTDIR=$HOST_MODEL_DIR/qa_variable_sequence_model_repository
+
+# Clean up host working directory
+rm -fr $HOST_BUILD_DIR
+
+# Create the model output directories
+mkdir -p $HOST_SOURCE_DIR
+mkdir -p $HOST_DESTDIR
+mkdir -p $HOST_DATADEPENDENTDIR
+mkdir -p $HOST_DYNASEQDESTDIR
+mkdir -p $HOST_DYNASEQIMPLICITDESTDIR
+mkdir -p $HOST_FORMATDESTDIR
+mkdir -p $HOST_IDENTITYBIGDESTDIR
+mkdir -p $HOST_IDENTITYDESTDIR
+mkdir -p $HOST_IMPLICITSEQDESTDIR
+mkdir -p $HOST_RAGGEDDESTDIR
+mkdir -p $HOST_RESHAPEDESTDIR
+mkdir -p $HOST_SEQDESTDIR
+mkdir -p $HOST_SHAPEDESTDIR
+mkdir -p $HOST_VARDESTDIR
+mkdir -p $HOST_VARIMPLICITSEQDESTDIR
+mkdir -p $HOST_VARSEQDESTDIR
+
+# Copy the executable source file to the host generate models source directory
+cp ./gen_ensemble_model_utils.py $HOST_SOURCE_DIR/.
+cp ./gen_common.py $HOST_SOURCE_DIR/.
+cp ./gen_qa_dyna_sequence_implicit_models.py $HOST_SOURCE_DIR/.
+cp ./gen_qa_dyna_sequence_models.py $HOST_SOURCE_DIR/.
+cp ./gen_qa_identity_models.py $HOST_SOURCE_DIR/.
+cp ./gen_qa_implicit_models.py $HOST_SOURCE_DIR/.
+cp ./gen_qa_models.py $HOST_SOURCE_DIR/.
+cp ./gen_qa_noshape_models.py $HOST_SOURCE_DIR/.
+cp ./gen_qa_ragged_models.py $HOST_SOURCE_DIR/.
+cp ./gen_qa_reshape_models.py $HOST_SOURCE_DIR/.
+cp ./gen_qa_sequence_models.py $HOST_SOURCE_DIR/.
+cp ./gen_qa_trt_data_dependent_shape.py $HOST_SOURCE_DIR/.
+cp ./gen_qa_trt_format_models.py $HOST_SOURCE_DIR/.
+cp ./gen_qa_trt_plugin_models.py $HOST_SOURCE_DIR/.
+cp ./test_util.py $HOST_SOURCE_DIR/.
+
+# Set TensorRT model generation script name
+TRT_MODEL_SCRIPT=trt_gen.cmds
+
+# Set container working directory
+CONTAINER_SOURCE_DIR=/workspace/src
+CONTAINER_MODEL_DIR=/tmp/models
+CONTAINER_DESTDIR=$CONTAINER_MODEL_DIR/qa_model_repository
+CONTAINER_DATADEPENDENTDIR=$CONTAINER_MODEL_DIR/qa_trt_data_dependent_model_repository
+CONTAINER_DYNASEQDESTDIR=$CONTAINER_MODEL_DIR/qa_dyna_sequence_model_repository
+CONTAINER_DYNASEQIMPLICITDESTDIR=$CONTAINER_MODEL_DIR/qa_dyna_sequence_implicit_model_repository
+CONTAINER_FORMATDESTDIR=$CONTAINER_MODEL_DIR/qa_trt_format_model_repository
+CONTAINER_IDENTITYBIGDESTDIR=$CONTAINER_MODEL_DIR/qa_identity_big_model_repository
+CONTAINER_IDENTITYDESTDIR=$CONTAINER_MODEL_DIR/qa_identity_model_repository
+CONTAINER_IMPLICITSEQDESTDIR=$CONTAINER_MODEL_DIR/qa_sequence_implicit_model_repository
+CONTAINER_RAGGEDDESTDIR=$CONTAINER_MODEL_DIR/qa_ragged_model_repository
+CONTAINER_RESHAPEDESTDIR=$CONTAINER_MODEL_DIR/qa_reshape_model_repository
+CONTAINER_SEQDESTDIR=$CONTAINER_MODEL_DIR/qa_sequence_model_repository
+CONTAINER_SHAPEDESTDIR=$CONTAINER_MODEL_DIR/qa_shapetensor_model_repository
+CONTAINER_VARDESTDIR=$CONTAINER_MODEL_DIR/qa_variable_model_repository
+CONTAINER_VARIMPLICITSEQDESTDIR=$CONTAINER_MODEL_DIR/qa_variable_sequence_implicit_model_repository
+CONTAINER_VARSEQDESTDIR=$CONTAINER_MODEL_DIR/qa_variable_sequence_model_repository
+
+# Set script to generate TensorRT models
+cat >$HOST_SOURCE_DIR/$TRT_MODEL_SCRIPT <<EOF
+#!/bin/bash -xe
+# Make all generated files accessible outside of container
+umask 0000
+nvidia-smi -L || true
+export TRT_SUPPRESS_DEPRECATION_WARNINGS=1
+ldconfig || true
+
+cd $CONTAINER_SOURCE_DIR
+# Models using shape tensor i/o
+python3 $CONTAINER_SOURCE_DIR/gen_qa_identity_models.py --tensorrt-shape-io --models_dir=$CONTAINER_SHAPEDESTDIR
+python3 $CONTAINER_SOURCE_DIR/gen_qa_sequence_models.py --tensorrt-shape-io --models_dir=$CONTAINER_SHAPEDESTDIR
+python3 $CONTAINER_SOURCE_DIR/gen_qa_dyna_sequence_models.py --tensorrt-shape-io --models_dir=$CONTAINER_SHAPEDESTDIR
+python3 $CONTAINER_SOURCE_DIR/gen_qa_models.py --tensorrt --models_dir=$CONTAINER_DESTDIR
+python3 $CONTAINER_SOURCE_DIR/gen_qa_models.py --tensorrt --variable --models_dir=$CONTAINER_VARDESTDIR
+python3 $CONTAINER_SOURCE_DIR/gen_qa_identity_models.py --tensorrt --models_dir=$CONTAINER_IDENTITYDESTDIR
+python3 $CONTAINER_SOURCE_DIR/gen_qa_identity_models.py --tensorrt-big --models_dir=$CONTAINER_IDENTITYBIGDESTDIR
+python3 $CONTAINER_SOURCE_DIR/gen_qa_reshape_models.py --tensorrt --variable --models_dir=$CONTAINER_RESHAPEDESTDIR
+python3 $CONTAINER_SOURCE_DIR/gen_qa_sequence_models.py --tensorrt --models_dir=$CONTAINER_SEQDESTDIR
+python3 $CONTAINER_SOURCE_DIR/gen_qa_implicit_models.py --tensorrt --models_dir=$CONTAINER_IMPLICITSEQDESTDIR
+python3 $CONTAINER_SOURCE_DIR/gen_qa_implicit_models.py --tensorrt --variable --models_dir=$CONTAINER_VARIMPLICITSEQDESTDIR
+python3 $CONTAINER_SOURCE_DIR/gen_qa_dyna_sequence_models.py --tensorrt --models_dir=$CONTAINER_DYNASEQDESTDIR
+python3 $CONTAINER_SOURCE_DIR/gen_qa_sequence_models.py --tensorrt --variable --models_dir=$CONTAINER_VARSEQDESTDIR
+python3 $CONTAINER_SOURCE_DIR/gen_qa_dyna_sequence_implicit_models.py --tensorrt --models_dir=$CONTAINER_DYNASEQIMPLICITDESTDIR
+python3 $CONTAINER_SOURCE_DIR/gen_qa_ragged_models.py --tensorrt --models_dir=$CONTAINER_RAGGEDDESTDIR
+python3 $CONTAINER_SOURCE_DIR/gen_qa_trt_format_models.py --models_dir=$CONTAINER_FORMATDESTDIR
+python3 $CONTAINER_SOURCE_DIR/gen_qa_trt_data_dependent_shape.py --models_dir=$CONTAINER_DATADEPENDENTDIR
+
+chmod -R 777 $CONTAINER_MODEL_DIR
+EOF
+# Make the TensorRT model generation script executable
+chmod a+x $HOST_SOURCE_DIR/$TRT_MODEL_SCRIPT
+# Pull the TensorRT image
+docker pull $TENSORRT_IMAGE
+# Run the TensorRT model generation script inside the TensorRT container
+docker run \
+  $DOCKER_GPU_ARGS \
+  --rm --entrypoint $CONTAINER_SOURCE_DIR/$TRT_MODEL_SCRIPT \
+  --mount type=bind,source=$HOST_SOURCE_DIR,target=$CONTAINER_SOURCE_DIR \
+  --mount type=bind,source=$HOST_DESTDIR,target=$CONTAINER_DESTDIR \
+  --mount type=bind,source=$HOST_DATADEPENDENTDIR,target=$CONTAINER_DATADEPENDENTDIR \
+  --mount type=bind,source=$HOST_DYNASEQDESTDIR,target=$CONTAINER_DYNASEQDESTDIR \
+  --mount type=bind,source=$HOST_DYNASEQIMPLICITDESTDIR,target=$CONTAINER_DYNASEQIMPLICITDESTDIR \
+  --mount type=bind,source=$HOST_FORMATDESTDIR,target=$CONTAINER_FORMATDESTDIR \
+  --mount type=bind,source=$HOST_IDENTITYBIGDESTDIR,target=$CONTAINER_IDENTITYBIGDESTDIR \
+  --mount type=bind,source=$HOST_IDENTITYDESTDIR,target=$CONTAINER_IDENTITYDESTDIR \
+  --mount type=bind,source=$HOST_IMPLICITSEQDESTDIR,target=$CONTAINER_IMPLICITSEQDESTDIR \
+  --mount type=bind,source=$HOST_RAGGEDDESTDIR,target=$CONTAINER_RAGGEDDESTDIR \
+  --mount type=bind,source=$HOST_RESHAPEDESTDIR,target=$CONTAINER_RESHAPEDESTDIR \
+  --mount type=bind,source=$HOST_SEQDESTDIR,target=$CONTAINER_SEQDESTDIR \
+  --mount type=bind,source=$HOST_SHAPEDESTDIR,target=$CONTAINER_SHAPEDESTDIR \
+  --mount type=bind,source=$HOST_VARDESTDIR,target=$CONTAINER_VARDESTDIR \
+  --mount type=bind,source=$HOST_VARIMPLICITSEQDESTDIR,target=$CONTAINER_VARIMPLICITSEQDESTDIR \
+  --mount type=bind,source=$HOST_VARSEQDESTDIR,target=$CONTAINER_VARSEQDESTDIR \
+  $TENSORRT_IMAGE
diff --git a/qa/common/gen_qa_custom_ops b/qa/common/gen_qa_custom_ops
new file mode 100755
index 0000000000..13a67c2578
--- /dev/null
+++ b/qa/common/gen_qa_custom_ops
@@ -0,0 +1,153 @@
+#!/bin/bash
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+############################################################################
+## This script generates custom operations needed by some of the
+## Triton's CI tests. Generating these custom operations requires
+## using the TensorFlow container.
+##
+## 1. Update TENSORFLOW_IMAGE and PYTORCH_IMAGE to match what is being
+## used by the Triton release being tested.
+##
+## 2. Run this script to create /tmp/qa_custom_ops.
+##
+############################################################################
+
+TRITON_VERSION=${TRITON_VERSION:=24.03}
+NVIDIA_UPSTREAM_VERSION=${NVIDIA_UPSTREAM_VERSION:=$TRITON_VERSION}
+TENSORFLOW_IMAGE=${TENSORFLOW_IMAGE:=nvcr.io/nvidia/tensorflow:$NVIDIA_UPSTREAM_VERSION-tf2-py3}
+PYTORCH_IMAGE=${PYTORCH_IMAGE:=nvcr.io/nvidia/pytorch:$NVIDIA_UPSTREAM_VERSION-py3}
+
+CUDA_DEVICE=${NV_GPU:=0}
+
+[[ $RUNNER_GPUS =~ ^[0-9] ]] && DOCKER_GPU_ARGS=$(eval $NV_DOCKER_ARGS) || DOCKER_GPU_ARGS="--gpus device=$CUDA_DEVICE"
+
+###
+HOST_BUILD_DIR=${HOST_BUILD_DIR:=/tmp}
+HOST_SRCDIR=$HOST_BUILD_DIR/gen_srcdir
+HOST_DESTDIR=$HOST_BUILD_DIR/$TRITON_VERSION/qa_custom_ops
+
+rm -fr $HOST_DESTDIR
+mkdir -p $HOST_DESTDIR/tf_custom_ops $HOST_DESTDIR/libtorch_custom_ops
+
+rm -fr $HOST_SRCDIR
+mkdir -p $HOST_SRCDIR
+
+cp ./gen_qa_custom_ops_models.py $HOST_SRCDIR/.
+cp ./cuda_op_kernel.cu.cc.patch $HOST_SRCDIR/.
+cp ./busy_op_kernel.* $HOST_SRCDIR/.
+TFSCRIPT=tf_gen.cmds
+PYTSCRIPT=pyt_gen.cmds
+
+SRCDIR=/tmp/src
+DESTDIR=/tmp/custom_ops
+
+# Tensorflow
+# Set compilation option by "Select a particular C++ dialect."
+[[ "${NVIDIA_UPSTREAM_VERSION}" < "22.10"  ]] && STD_FLAG="c++14" || STD_FLAG="c++17"
+
+cat >$HOST_SRCDIR/$TFSCRIPT <<EOF
+#!/bin/bash -x
+nvidia-smi -L || true
+nvidia-smi || true
+set -e
+
+# Segmentation fault with protobuf 4.24.0 (https://github.com/tensorflow/tensorflow/issues/61551)
+# Upgrade protobuf version to fix the issue.
+pip3 install "protobuf>4.24.0"
+
+TF_CFLAGS=\$(python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))')
+TF_LFLAGS=\$(python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))')
+
+# No CUDA
+cd /tmp
+cp /opt/tensorflow/tensorflow-source/tensorflow/examples/adding_an_op/zero_out_op_kernel_1.cc .
+g++ -std=${STD_FLAG} -O2 -shared -fPIC zero_out_op_kernel_1.cc -o $DESTDIR/libzeroout.so \${TF_CFLAGS[@]} \${TF_LFLAGS[@]}
+
+# CUDA. Need to patch so that we can build it outside of bazel/TF
+cp /opt/tensorflow/tensorflow-source/tensorflow/examples/adding_an_op/cuda_op_kernel.cc .
+cp /opt/tensorflow/tensorflow-source/tensorflow/examples/adding_an_op/cuda_op_kernel.cu.cc .
+patch -i $SRCDIR/cuda_op_kernel.cu.cc.patch cuda_op_kernel.cu.cc
+nvcc --expt-relaxed-constexpr -std=${STD_FLAG} -O2 -c -arch=all -o cuda_op_kernel.cu.o cuda_op_kernel.cu.cc \${TF_CFLAGS[@]} -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC
+g++ -std=${STD_FLAG} -shared -o $DESTDIR/libcudaop.so cuda_op_kernel.cc cuda_op_kernel.cu.o \${TF_CFLAGS[@]} -fPIC -L/usr/local/cuda/lib64 -lcudart \${TF_LFLAGS[@]}
+
+cp $SRCDIR/busy_op_kernel.cc .
+cp $SRCDIR/busy_op_kernel.cu.cc .
+nvcc --expt-relaxed-constexpr -std=${STD_FLAG} -O2 -c -arch=all -o busy_op_kernel.cu.o busy_op_kernel.cu.cc \${TF_CFLAGS[@]} -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC
+g++ -std=${STD_FLAG} -shared -o $DESTDIR/libbusyop.so busy_op_kernel.cc busy_op_kernel.cu.o \${TF_CFLAGS[@]} -fPIC -L/usr/local/cuda/lib64 -lcudart \${TF_LFLAGS[@]}
+
+python3 $SRCDIR/gen_qa_custom_ops_models.py --graphdef --savedmodel \
+    --models_dir=$DESTDIR --zero_out_lib_path=$DESTDIR/libzeroout.so \
+    --cuda_op_lib_path=$DESTDIR/libcudaop.so \
+    --busy_op_lib_path=$DESTDIR/libbusyop.so
+chmod -R 777 $DESTDIR
+EOF
+
+chmod a+x $HOST_SRCDIR/$TFSCRIPT
+if [ $? -ne 0 ]; then
+    echo -e "Failed: chmod"
+    exit 1
+fi
+
+docker pull $TENSORFLOW_IMAGE
+docker run $DOCKER_GPU_ARGS --rm --entrypoint $SRCDIR/$TFSCRIPT \
+       --mount type=bind,source=$HOST_SRCDIR,target=$SRCDIR \
+       --mount type=bind,source=$HOST_DESTDIR/tf_custom_ops,target=$DESTDIR \
+       $TENSORFLOW_IMAGE
+if [ $? -ne 0 ]; then
+    echo -e "Failed"
+    exit 1
+fi
+
+# PyTorch
+
+cat >$HOST_SRCDIR/$PYTSCRIPT <<EOF
+#!/bin/bash -x
+nvidia-smi -L || true
+nvidia-smi || true
+set -e
+export TORCH_EXTENSIONS_DIR="/root/.cache/torch_extensions/"
+python3 $SRCDIR/gen_qa_custom_ops_models.py --libtorch --models_dir=$DESTDIR
+cp \${TORCH_EXTENSIONS_DIR}/custom_modulo/custom_modulo.so $DESTDIR/libtorch_modulo/.
+chmod -R 777 $DESTDIR
+EOF
+
+chmod a+x $HOST_SRCDIR/$PYTSCRIPT
+if [ $? -ne 0 ]; then
+    echo -e "Failed: chmod"
+    exit 1
+fi
+
+docker pull $PYTORCH_IMAGE
+docker run $DOCKER_GPU_ARGS --rm --entrypoint $SRCDIR/$PYTSCRIPT \
+       --mount type=bind,source=$HOST_SRCDIR,target=$SRCDIR \
+       --mount type=bind,source=$HOST_DESTDIR/libtorch_custom_ops,target=$DESTDIR \
+       $PYTORCH_IMAGE
+if [ $? -ne 0 ]; then
+    echo -e "Failed"
+    exit 1
+fi
diff --git a/qa/common/gen_qa_custom_ops_models.py b/qa/common/gen_qa_custom_ops_models.py
new file mode 100755
index 0000000000..31219f82aa
--- /dev/null
+++ b/qa/common/gen_qa_custom_ops_models.py
@@ -0,0 +1,564 @@
+#!/usr/bin/env python3
+
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import os
+
+FLAGS = None
+
+
+def create_zeroout_modelfile(create_savedmodel, models_dir, model_version):
+    # Load the zero-out custom operator
+    _zero_out_module = tf.load_op_library(os.path.join(FLAGS.zero_out_lib_path))
+    zero_out = _zero_out_module.zero_out
+
+    # Create the model that uses custom operator.
+    tf.compat.v1.reset_default_graph()
+    zin = tf.compat.v1.placeholder(
+        tf.int32,
+        [
+            None,
+        ],
+        "to_zero",
+    )
+    zout = zero_out(zin, name="zeroed")
+
+    model_name = "savedmodel_zeroout" if create_savedmodel else "graphdef_zeroout"
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    if create_savedmodel:
+        with tf.compat.v1.Session() as sess:
+            input_name = "to_zero"
+            output_name = "zeroed"
+            input_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name(
+                input_name + ":0"
+            )
+            output_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name(
+                output_name + ":0"
+            )
+            input_dict = dict()
+            output_dict = dict()
+            input_dict[input_name] = input_tensor
+            output_dict[output_name] = output_tensor
+            tf.compat.v1.saved_model.simple_save(
+                sess,
+                model_version_dir + "/model.savedmodel",
+                inputs=input_dict,
+                outputs=output_dict,
+            )
+    else:
+        with tf.compat.v1.Session() as sess:
+            graph_io.write_graph(
+                sess.graph.as_graph_def(),
+                model_version_dir,
+                "model.graphdef",
+                as_text=False,
+            )
+
+
+def create_zeroout_modelconfig(create_savedmodel, models_dir, model_version):
+    model_name = "savedmodel_zeroout" if create_savedmodel else "graphdef_zeroout"
+    config_dir = models_dir + "/" + model_name
+    config = """
+name: "{}"
+platform: "{}"
+max_batch_size: 0
+input [
+  {{
+    name: "to_zero"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }}
+]
+output [
+  {{
+    name: "zeroed"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }}
+]
+""".format(
+        model_name,
+        "tensorflow_savedmodel" if create_savedmodel else "tensorflow_graphdef",
+    )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+def create_cudaop_modelfile(create_savedmodel, models_dir, model_version):
+    # Load the add_one custom operator
+    _cuda_op_module = tf.load_op_library(os.path.join(FLAGS.cuda_op_lib_path))
+    add_one = _cuda_op_module.add_one
+
+    # Create the model that uses custom operator.
+    tf.compat.v1.reset_default_graph()
+    zin = tf.compat.v1.placeholder(
+        tf.int32,
+        [
+            None,
+        ],
+        "in",
+    )
+    zout = add_one(zin, name="out")
+
+    model_name = "savedmodel_cudaop" if create_savedmodel else "graphdef_cudaop"
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    if create_savedmodel:
+        with tf.compat.v1.Session() as sess:
+            input_name = "in"
+            output_name = "out"
+            input_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name(
+                input_name + ":0"
+            )
+            output_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name(
+                output_name + ":0"
+            )
+            input_dict = dict()
+            output_dict = dict()
+            input_dict[input_name] = input_tensor
+            output_dict[output_name] = output_tensor
+            tf.compat.v1.saved_model.simple_save(
+                sess,
+                model_version_dir + "/model.savedmodel",
+                inputs=input_dict,
+                outputs=output_dict,
+            )
+    else:
+        with tf.compat.v1.Session() as sess:
+            graph_io.write_graph(
+                sess.graph.as_graph_def(),
+                model_version_dir,
+                "model.graphdef",
+                as_text=False,
+            )
+
+
+def create_cudaop_modelconfig(create_savedmodel, models_dir, model_version):
+    model_name = "savedmodel_cudaop" if create_savedmodel else "graphdef_cudaop"
+    config_dir = models_dir + "/" + model_name
+    config = """
+name: "{}"
+platform: "{}"
+max_batch_size: 0
+input [
+  {{
+    name: "in"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }}
+]
+output [
+  {{
+    name: "out"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }}
+]
+""".format(
+        model_name,
+        "tensorflow_savedmodel" if create_savedmodel else "tensorflow_graphdef",
+    )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+def create_busyop_modelfile(create_savedmodel, models_dir, model_version):
+    # Load the busy_loop custom operator
+    _busy_op_module = tf.load_op_library(os.path.join(FLAGS.busy_op_lib_path))
+    busy_loop = _busy_op_module.busy_loop
+
+    # Create the model that uses custom operator.
+    tf.compat.v1.reset_default_graph()
+    zin = tf.compat.v1.placeholder(
+        tf.int32,
+        [
+            None,
+        ],
+        "in",
+    )
+    zout = busy_loop(zin, name="out")
+
+    model_name = "savedmodel_busyop" if create_savedmodel else "graphdef_busyop"
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    if create_savedmodel:
+        with tf.compat.v1.Session() as sess:
+            input_name = "in"
+            output_name = "out"
+            input_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name(
+                input_name + ":0"
+            )
+            output_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name(
+                output_name + ":0"
+            )
+            input_dict = dict()
+            output_dict = dict()
+            input_dict[input_name] = input_tensor
+            output_dict[output_name] = output_tensor
+            tf.compat.v1.saved_model.simple_save(
+                sess,
+                model_version_dir + "/model.savedmodel",
+                inputs=input_dict,
+                outputs=output_dict,
+            )
+    else:
+        with tf.compat.v1.Session() as sess:
+            graph_io.write_graph(
+                sess.graph.as_graph_def(),
+                model_version_dir,
+                "model.graphdef",
+                as_text=False,
+            )
+
+
+def create_busyop_modelconfig(create_savedmodel, models_dir, model_version):
+    model_name = "savedmodel_busyop" if create_savedmodel else "graphdef_busyop"
+    config_dir = models_dir + "/" + model_name
+    config = """
+name: "{}"
+platform: "{}"
+max_batch_size: 0
+input [
+  {{
+    name: "in"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }}
+]
+output [
+  {{
+    name: "out"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }}
+]
+""".format(
+        model_name,
+        "tensorflow_savedmodel" if create_savedmodel else "tensorflow_graphdef",
+    )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+def create_moduloop_modelfile(models_dir, model_version):
+    model_name = "libtorch_modulo"
+
+    op_source = """
+    #include <torch/script.h>
+    torch::Tensor custom_modulo(torch::Tensor input1, torch::Tensor input2) {
+      torch::Tensor output = torch::fmod(input1, input2);
+      return output.clone();
+    }
+    static auto registry =
+      torch::RegisterOperators("my_ops::custom_modulo", &custom_modulo);
+    """
+
+    torch.utils.cpp_extension.load_inline(
+        name="custom_modulo",
+        cpp_sources=op_source,
+        is_python_module=False,
+        verbose=True,
+    )
+
+    class ModuloCustomNet(nn.Module):
+        def __init__(self):
+            super(ModuloCustomNet, self).__init__()
+
+        def forward(self, input0, input1):
+            return torch.ops.my_ops.custom_modulo(input0, input1)
+
+    moduloCustomModel = ModuloCustomNet()
+    example_input0 = torch.arange(1, 11, dtype=torch.float32)
+    example_input1 = torch.tensor([2] * 10, dtype=torch.float32)
+    traced = torch.jit.trace(moduloCustomModel, (example_input0, example_input1))
+
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    traced.save(model_version_dir + "/model.pt")
+
+
+def create_moduloop_modelconfig(models_dir, model_version):
+    model_name = "libtorch_modulo"
+    config_dir = models_dir + "/" + model_name
+    config = """
+name: "{}"
+platform: "pytorch_libtorch"
+max_batch_size: 0
+input [
+  {{
+    name: "INPUT__0"
+    data_type: TYPE_FP32
+    dims: [ 10 ]
+  }},
+  {{
+    name: "INPUT__1"
+    data_type: TYPE_FP32
+    dims: [ 10 ]
+  }}
+]
+output [
+  {{
+    name: "OUTPUT__0"
+    data_type: TYPE_FP32
+    dims: [ 10 ]
+  }}
+]
+""".format(
+        model_name
+    )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+# Use Torchvision ops
+def create_visionop_modelfile(models_dir, model_version):
+    model_name = "libtorch_visionop"
+
+    class CustomVisionNet(nn.Module):
+        def __init__(self):
+            super(CustomVisionNet, self).__init__()
+
+        def forward(self, input, boxes):
+            return torchvision.ops.roi_align(input, boxes, [5, 5], 1.0, -1, False)
+
+    visionCustomModel = CustomVisionNet()
+    visionCustomModel.eval()
+    scripted = torch.jit.script(visionCustomModel)
+
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    scripted.save(model_version_dir + "/model.pt")
+
+
+def create_visionop_modelconfig(models_dir, model_version):
+    model_name = "libtorch_visionop"
+    config_dir = models_dir + "/" + model_name
+    config = """
+name: "{}"
+platform: "pytorch_libtorch"
+max_batch_size: 0
+input [
+  {{
+    name: "INPUT__0"
+    data_type: TYPE_FP32
+    dims: [ 1, 3, 10, 10 ]
+  }},
+  {{
+    name: "INPUT__1"
+    data_type: TYPE_FP32
+    dims: [1, 5]
+  }}
+]
+output [
+  {{
+    name: "OUTPUT__0"
+    data_type: TYPE_FP32
+    dims: [1, 3, 5, 5]
+  }}
+]
+""".format(
+        model_name
+    )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+def create_zero_out_models(models_dir):
+    model_version = 1
+
+    if FLAGS.graphdef:
+        create_zeroout_modelconfig(False, models_dir, model_version)
+        create_zeroout_modelfile(False, models_dir, model_version)
+
+    if FLAGS.savedmodel:
+        create_zeroout_modelconfig(True, models_dir, model_version)
+        create_zeroout_modelfile(True, models_dir, model_version)
+
+
+def create_cuda_op_models(models_dir):
+    model_version = 1
+
+    if FLAGS.graphdef:
+        create_cudaop_modelconfig(False, models_dir, model_version)
+        create_cudaop_modelfile(False, models_dir, model_version)
+
+    if FLAGS.savedmodel:
+        create_cudaop_modelconfig(True, models_dir, model_version)
+        create_cudaop_modelfile(True, models_dir, model_version)
+
+
+def create_busy_op_models(models_dir):
+    model_version = 1
+
+    if FLAGS.graphdef:
+        create_busyop_modelconfig(False, models_dir, model_version)
+        create_busyop_modelfile(False, models_dir, model_version)
+
+    if FLAGS.savedmodel:
+        create_busyop_modelconfig(True, models_dir, model_version)
+        create_busyop_modelfile(True, models_dir, model_version)
+
+
+def create_modulo_op_models(models_dir):
+    model_version = 1
+
+    if FLAGS.libtorch:
+        create_moduloop_modelconfig(models_dir, model_version)
+        create_moduloop_modelfile(models_dir, model_version)
+
+
+def create_vision_op_models(models_dir):
+    model_version = 1
+
+    if FLAGS.libtorch:
+        create_visionop_modelconfig(models_dir, model_version)
+        create_visionop_modelfile(models_dir, model_version)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--models_dir", type=str, required=True, help="Top-level model directory"
+    )
+    parser.add_argument(
+        "--zero_out_lib_path",
+        type=str,
+        required=False,
+        default="./libzeroout.so",
+        help="Fullpath to libzeroout.so",
+    )
+    parser.add_argument(
+        "--cuda_op_lib_path",
+        type=str,
+        required=False,
+        default="./libcudaop.so",
+        help="Fullpath to libcudaop.so",
+    )
+    parser.add_argument(
+        "--busy_op_lib_path",
+        type=str,
+        required=False,
+        default="./libbusyop.so",
+        help="Fullpath to libbusyop.so",
+    )
+    parser.add_argument(
+        "--graphdef",
+        required=False,
+        action="store_true",
+        help="Generate GraphDef models",
+    )
+    parser.add_argument(
+        "--savedmodel",
+        required=False,
+        action="store_true",
+        help="Generate SavedModel models",
+    )
+    parser.add_argument(
+        "--libtorch",
+        required=False,
+        action="store_true",
+        help="Generate Pytorch LibTorch models",
+    )
+    FLAGS, unparsed = parser.parse_known_args()
+
+    if FLAGS.graphdef or FLAGS.savedmodel:
+        # Use Tensorflow 2 as default. Need to disable the v2 behavior for
+        # model generation scripts.
+        import tensorflow as tf
+
+        tf.compat.v1.disable_eager_execution()
+        from tensorflow.python.framework import graph_io
+
+        create_zero_out_models(FLAGS.models_dir)
+        create_cuda_op_models(FLAGS.models_dir)
+        create_busy_op_models(FLAGS.models_dir)
+
+    if FLAGS.libtorch:
+        import torch
+        import torch.utils.cpp_extension
+        import torchvision
+        from torch import nn
+
+        create_modulo_op_models(FLAGS.models_dir)
+        create_vision_op_models(FLAGS.models_dir)
diff --git a/qa/common/gen_qa_dyna_sequence_implicit_models.py b/qa/common/gen_qa_dyna_sequence_implicit_models.py
new file mode 100755
index 0000000000..ffa3f48ede
--- /dev/null
+++ b/qa/common/gen_qa_dyna_sequence_implicit_models.py
@@ -0,0 +1,744 @@
+#!/usr/bin/env python3
+
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import os
+
+import numpy as np
+from gen_common import np_to_model_dtype, np_to_onnx_dtype, np_to_trt_dtype
+
+FLAGS = None
+np_dtype_string = np.dtype(object)
+
+
+def create_onnx_modelfile(models_dir, model_version, max_batch, dtype, shape):
+    if not tu.validate_for_onnx_model(dtype, dtype, dtype, shape, shape, shape):
+        return
+
+    model_name = tu.get_dyna_sequence_model_name(
+        "onnx_nobatch" if max_batch == 0 else "onnx", dtype
+    )
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    # Create the model. For now don't implement a proper accumulator
+    # just return 0 if not-ready and 'INPUT'+'START'*('END'*'CORRID')
+    # otherwise...  the tests know to expect this.
+    onnx_dtype = np_to_onnx_dtype(dtype)
+    onnx_input_shape, idx = tu.shape_to_onnx_shape(shape, 0)
+    onnx_output_shape, idx = tu.shape_to_onnx_shape(shape, idx)
+
+    # If the input is a string then use int32 for operation and just
+    # cast to/from string for input and output.
+    onnx_control_dtype = onnx_dtype
+    if onnx_dtype == onnx.TensorProto.STRING:
+        onnx_control_dtype = onnx.TensorProto.INT32
+
+    # If input dtype is bool, then use bool type for control and
+    # int32 type for input/output
+    if onnx_dtype == onnx.TensorProto.BOOL:
+        onnx_dtype = onnx.TensorProto.INT32
+
+    batch_dim = [] if max_batch == 0 else [None]
+
+    onnx_input = onnx.helper.make_tensor_value_info(
+        "INPUT", onnx_dtype, batch_dim + onnx_input_shape
+    )
+    onnx_input_state = onnx.helper.make_tensor_value_info(
+        "INPUT_STATE", onnx_dtype, batch_dim + onnx_input_shape
+    )
+    onnx_start = onnx.helper.make_tensor_value_info(
+        "START", onnx_control_dtype, batch_dim + [1]
+    )
+    onnx_ready = onnx.helper.make_tensor_value_info(
+        "READY", onnx_control_dtype, batch_dim + [1]
+    )
+    onnx_corrid = onnx.helper.make_tensor_value_info(
+        "CORRID", onnx.TensorProto.UINT64, batch_dim + [1]
+    )
+    onnx_end = onnx.helper.make_tensor_value_info(
+        "END", onnx_control_dtype, batch_dim + [1]
+    )
+    onnx_output = onnx.helper.make_tensor_value_info(
+        "OUTPUT", onnx_dtype, batch_dim + onnx_output_shape
+    )
+    onnx_output_state = onnx.helper.make_tensor_value_info(
+        "OUTPUT_STATE", onnx_dtype, batch_dim + onnx_output_shape
+    )
+
+    internal_input = onnx.helper.make_node("Identity", ["INPUT"], ["_INPUT"])
+    internal_input_state = onnx.helper.make_node(
+        "Identity", ["INPUT_STATE"], ["_INPUT_STATE"]
+    )
+    # cast int8, int16 input to higher precision int as Onnx Add/Sub operator doesn't support those type
+    # Also casting String data type to int32
+    if (
+        (onnx_dtype == onnx.TensorProto.INT8)
+        or (onnx_dtype == onnx.TensorProto.INT16)
+        or (onnx_dtype == onnx.TensorProto.STRING)
+    ):
+        internal_input = onnx.helper.make_node(
+            "Cast", ["INPUT"], ["_INPUT"], to=onnx.TensorProto.INT32
+        )
+        internal_input_state = onnx.helper.make_node(
+            "Cast", ["INPUT_STATE"], ["_INPUT_STATE"], to=onnx.TensorProto.INT32
+        )
+
+    # Convert boolean value to int32 value
+    if onnx_control_dtype == onnx.TensorProto.BOOL:
+        internal_input1 = onnx.helper.make_node(
+            "Cast", ["START"], ["_START"], to=onnx.TensorProto.INT32
+        )
+        internal_input2 = onnx.helper.make_node(
+            "Cast", ["READY"], ["_READY"], to=onnx.TensorProto.INT32
+        )
+        not_start_cast = onnx.helper.make_node("Not", ["START"], ["_NOT_START_CAST"])
+        not_start = onnx.helper.make_node(
+            "Cast", ["_NOT_START_CAST"], ["_NOT_START"], to=onnx.TensorProto.INT32
+        )
+        not_ready_cast = onnx.helper.make_node("Not", ["START"], ["_NOT_READY_CAST"])
+        not_ready = onnx.helper.make_node(
+            "Cast", ["_NOT_READY_CAST"], ["_NOT_READY"], to=onnx.TensorProto.INT32
+        )
+
+        input_state_cond = onnx.helper.make_node(
+            "And", ["READY", "_NOT_START_CAST"], ["input_state_cond"]
+        )
+        input_state_cond_cast = onnx.helper.make_node(
+            "Cast",
+            ["input_state_cond"],
+            ["input_state_cond_cast"],
+            to=onnx.TensorProto.INT32,
+        )
+        mul_state = onnx.helper.make_node(
+            "Mul", ["_INPUT_STATE", "input_state_cond_cast"], ["mul_state"]
+        )
+        add = onnx.helper.make_node("Add", ["_INPUT", "mul_state"], ["CAST"])
+
+    else:
+        start_cast = onnx.helper.make_node(
+            "Cast", ["START"], ["_START_CAST"], to=onnx.TensorProto.BOOL
+        )
+        not_start_cast = onnx.helper.make_node(
+            "Not", ["_START_CAST"], ["_NOT_START_CAST"]
+        )
+        not_start = onnx.helper.make_node(
+            "Cast", ["_NOT_START_CAST"], ["_NOT_START"], to=onnx.TensorProto.INT32
+        )
+
+        ready_cast = onnx.helper.make_node(
+            "Cast", ["READY"], ["_READY_CAST"], to=onnx.TensorProto.BOOL
+        )
+        not_ready_cast = onnx.helper.make_node(
+            "Not", ["_READY_CAST"], ["_NOT_READY_CAST"]
+        )
+        not_ready = onnx.helper.make_node(
+            "Cast", ["_NOT_READY_CAST"], ["_NOT_READY"], to=onnx.TensorProto.INT32
+        )
+
+        # Take advantage of knowledge that the READY false value is 0 and true is 1
+        input_state_cond = onnx.helper.make_node(
+            "And", ["_NOT_START_CAST", "_READY_CAST"], ["input_state_cond"]
+        )
+        input_state_cond_cast = onnx.helper.make_node(
+            "Cast",
+            ["input_state_cond"],
+            ["input_state_cond_cast"],
+            to=onnx.TensorProto.INT32,
+        )
+        mul_state = onnx.helper.make_node(
+            "Mul", ["_INPUT_STATE", "input_state_cond_cast"], ["mul_state"]
+        )
+        add = onnx.helper.make_node("Add", ["_INPUT", "mul_state"], ["CAST"])
+
+    cast = onnx.helper.make_node("Cast", ["CAST"], ["OUTPUT"], to=onnx_dtype)
+    cast_output_state = onnx.helper.make_node(
+        "Cast", ["CAST"], ["OUTPUT_STATE"], to=onnx_dtype
+    )
+
+    # Avoid cast from float16 to float16
+    # (bug in Onnx Runtime, cast from float16 to float16 will become cast from float16 to float32)
+    if onnx_dtype == onnx.TensorProto.FLOAT16:
+        cast = onnx.helper.make_node("Identity", ["CAST"], ["OUTPUT"])
+        cast_output_state = onnx.helper.make_node(
+            "Identity", ["CAST"], ["OUTPUT_STATE"]
+        )
+
+    if onnx_control_dtype == onnx.TensorProto.BOOL:
+        onnx_nodes = [
+            internal_input,
+            internal_input_state,
+            internal_input1,
+            internal_input2,
+            not_start_cast,
+            not_start,
+            not_ready_cast,
+            not_ready,
+            input_state_cond,
+            input_state_cond_cast,
+            mul_state,
+            add,
+            cast,
+            cast_output_state,
+        ]
+    else:
+        onnx_nodes = [
+            internal_input,
+            internal_input_state,
+            start_cast,
+            not_start_cast,
+            not_start,
+            ready_cast,
+            not_ready_cast,
+            not_ready,
+            input_state_cond,
+            input_state_cond_cast,
+            mul_state,
+            add,
+            cast,
+            cast_output_state,
+        ]
+
+    onnx_inputs = [
+        onnx_end,
+        onnx_corrid,
+        onnx_input_state,
+        onnx_input,
+        onnx_start,
+        onnx_ready,
+    ]
+    onnx_outputs = [onnx_output, onnx_output_state]
+    graph_proto = onnx.helper.make_graph(
+        onnx_nodes, model_name, onnx_inputs, onnx_outputs
+    )
+
+    if FLAGS.onnx_opset > 0:
+        model_opset = onnx.helper.make_operatorsetid("", FLAGS.onnx_opset)
+        model_def = onnx.helper.make_model(
+            graph_proto, producer_name="triton", opset_imports=[model_opset]
+        )
+    else:
+        model_def = onnx.helper.make_model(graph_proto, producer_name="triton")
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    onnx.save(model_def, model_version_dir + "/model.onnx")
+
+
+def create_onnx_modelconfig(models_dir, model_version, max_batch, dtype, shape):
+    if not tu.validate_for_onnx_model(dtype, dtype, dtype, shape, shape, shape):
+        return
+
+    model_name = tu.get_dyna_sequence_model_name(
+        "onnx_nobatch" if max_batch == 0 else "onnx", dtype
+    )
+    config_dir = models_dir + "/" + model_name
+    config = """
+name: "{}"
+platform: "onnxruntime_onnx"
+max_batch_size: {}
+sequence_batching {{
+  max_sequence_idle_microseconds: 5000000
+  {}
+  control_input [
+    {{
+      name: "START"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_START
+          {type}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }},
+    {{
+      name: "END"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_END
+          {type}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }},
+    {{
+      name: "READY"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_READY
+          {type}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }},
+    {{
+      name: "CORRID"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_CORRID
+          data_type: TYPE_UINT64
+        }}
+      ]
+    }}
+  ]
+  state [
+    {{
+      input_name: "INPUT_STATE"
+      output_name: "OUTPUT_STATE"
+      data_type: {dtype}
+      dims: {dims}
+    }}
+  ]
+}}
+input [
+  {{
+    name: "INPUT"
+    data_type: {dtype}
+    dims: [ {dims} ]
+  }}
+]
+output [
+  {{
+    name: "OUTPUT"
+    data_type: {dtype}
+    dims: [ {dims} ]
+  }}
+]
+instance_group [
+  {{
+    kind: KIND_CPU
+  }}
+]
+""".format(
+        model_name,
+        max_batch,
+        "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}"
+        if max_batch > 0
+        else "",
+        dtype=np_to_model_dtype(dtype),
+        dims=tu.shape_to_dims_str(shape),
+        type="fp32" if dtype == np.float32 else "int32",
+    )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+def create_plan_fixed_modelfile(models_dir, model_version, max_batch, dtype, shape):
+    trt_dtype = np_to_trt_dtype(dtype)
+    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    builder = trt.Builder(TRT_LOGGER)
+    network = builder.create_network()
+    in0 = network.add_input("INPUT", trt_dtype, shape)
+    in_state0 = network.add_input("INPUT_STATE", trt_dtype, shape)
+    start0 = network.add_input("START", trt_dtype, [1 for i in shape])
+    network.add_input("END", trt_dtype, [1 for i in shape])
+    ready0 = network.add_input("READY", trt_dtype, [1 for i in shape])
+    network.add_input("CORRID", trt.int32, [1 for i in shape])
+
+    constant_1_data = trt.Weights(np.ones([1 for i in shape], dtype=dtype))
+    constant_1 = network.add_constant([1 for i in shape], constant_1_data)
+    not_start = network.add_elementwise(
+        constant_1.get_output(0), start0, trt.ElementWiseOperation.SUB
+    )
+    not_start.set_output_type(0, trt_dtype)
+
+    input_state_cond_temp = network.add_elementwise(
+        ready0, not_start.get_output(0), trt.ElementWiseOperation.SUM
+    )
+    constant_2 = network.add_elementwise(
+        constant_1.get_output(0), constant_1.get_output(0), trt.ElementWiseOperation.SUM
+    )
+    input_state_cond = network.add_elementwise(
+        input_state_cond_temp.get_output(0),
+        constant_2.get_output(0),
+        trt.ElementWiseOperation.FLOOR_DIV,
+    )
+    internal_state = network.add_elementwise(
+        in_state0, input_state_cond.get_output(0), trt.ElementWiseOperation.PROD
+    )
+    out0 = network.add_elementwise(
+        internal_state.get_output(0), in0, trt.ElementWiseOperation.SUM
+    )
+    out0_state = network.add_elementwise(
+        internal_state.get_output(0), in0, trt.ElementWiseOperation.SUM
+    )
+
+    out0.get_output(0).name = "OUTPUT"
+    network.mark_output(out0.get_output(0))
+
+    out0_state.get_output(0).name = "OUTPUT_STATE"
+    network.mark_output(out0_state.get_output(0))
+
+    config = builder.create_builder_config()
+    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
+    builder.max_batch_size = max(1, max_batch)
+    try:
+        engine_bytes = builder.build_serialized_network(network, config)
+    except AttributeError:
+        engine = builder.build_engine(network, config)
+        engine_bytes = engine.serialize()
+        del engine
+    del network
+
+    model_name = tu.get_dyna_sequence_model_name(
+        "plan_nobatch" if max_batch == 0 else "plan", dtype
+    )
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(model_version_dir + "/model.plan", "wb") as f:
+        f.write(engine_bytes)
+
+
+def create_plan_fixed_rf_modelfile(models_dir, model_version, max_batch, dtype, shape):
+    trt_dtype = np_to_trt_dtype(dtype)
+    trt_memory_format = trt.TensorFormat.LINEAR
+
+    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    builder = trt.Builder(TRT_LOGGER)
+    network = builder.create_network()
+    in0 = network.add_input("INPUT", trt_dtype, shape)
+    in_state0 = network.add_input("INPUT_STATE", trt_dtype, shape)
+    start0 = network.add_input("START", trt_dtype, [1 for i in shape])
+    network.add_input("END", trt_dtype, [1 for i in shape])
+    ready0 = network.add_input("READY", trt_dtype, [1 for i in shape])
+    network.add_input("CORRID", trt.int32, [1 for i in shape])
+
+    constant_1_data = trt.Weights(np.ones([1 for i in shape], dtype=dtype))
+    constant_1 = network.add_constant([1 for i in shape], constant_1_data)
+    not_start = network.add_elementwise(
+        constant_1.get_output(0), start0, trt.ElementWiseOperation.SUB
+    )
+    not_start.set_output_type(0, trt_dtype)
+
+    input_state_cond_temp = network.add_elementwise(
+        ready0, not_start.get_output(0), trt.ElementWiseOperation.SUM
+    )
+    constant_2 = network.add_elementwise(
+        constant_1.get_output(0), constant_1.get_output(0), trt.ElementWiseOperation.SUM
+    )
+    input_state_cond = network.add_elementwise(
+        input_state_cond_temp.get_output(0),
+        constant_2.get_output(0),
+        trt.ElementWiseOperation.FLOOR_DIV,
+    )
+    internal_state = network.add_elementwise(
+        in_state0, input_state_cond.get_output(0), trt.ElementWiseOperation.PROD
+    )
+    out0 = network.add_elementwise(
+        internal_state.get_output(0), in0, trt.ElementWiseOperation.SUM
+    )
+    out0_state = network.add_elementwise(
+        internal_state.get_output(0), in0, trt.ElementWiseOperation.SUM
+    )
+
+    out0.get_output(0).name = "OUTPUT"
+    network.mark_output(out0.get_output(0))
+    out0.get_output(0).dtype = trt_dtype
+
+    out0_state.get_output(0).name = "OUTPUT_STATE"
+    network.mark_output(out0_state.get_output(0))
+    out0_state.get_output(0).dtype = trt_dtype
+
+    in0.allowed_formats = 1 << int(trt_memory_format)
+    in_state0.allowed_formats = 1 << int(trt_memory_format)
+    start0.allowed_formats = 1 << int(trt_memory_format)
+    ready0.allowed_formats = 1 << int(trt_memory_format)
+    out0.get_output(0).allowed_formats = 1 << int(trt_memory_format)
+    out0_state.get_output(0).allowed_formats = 1 << int(trt_memory_format)
+
+    if trt_dtype == trt.int8:
+        in0.dynamic_range = (-128.0, 127.0)
+        in_state0.dynamic_range = (-128.0, 127.0)
+        out0.dynamic_range = (-128.0, 127.0)
+        out0_state.dynamic_range = (-128.0, 127.0)
+        start0.dynamic_range = (-128.0, 127.0)
+        ready0.dynamic_range = (-128.0, 127.0)
+
+    flags = 1 << int(trt.BuilderFlag.STRICT_TYPES)
+
+    if trt_dtype == trt.int8:
+        flags |= 1 << int(trt.BuilderFlag.INT8)
+    elif trt_dtype == trt.float16:
+        flags |= 1 << int(trt.BuilderFlag.FP16)
+
+    config = builder.create_builder_config()
+    config.flags = flags
+    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
+    builder.max_batch_size = max(1, max_batch)
+    try:
+        engine_bytes = builder.build_serialized_network(network, config)
+    except AttributeError:
+        engine = builder.build_engine(network, config)
+        engine_bytes = engine.serialize()
+        del engine
+
+    model_name = tu.get_dyna_sequence_model_name(
+        "plan_nobatch" if max_batch == 0 else "plan", dtype
+    )
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(model_version_dir + "/model.plan", "wb") as f:
+        f.write(engine_bytes)
+
+
+def create_plan_modelfile(models_dir, model_version, max_batch, dtype, shape):
+    if not tu.validate_for_trt_model(dtype, dtype, dtype, shape, shape, shape):
+        return
+
+    if dtype != np.float32:
+        create_plan_fixed_rf_modelfile(
+            models_dir, model_version, max_batch, dtype, shape
+        )
+    else:
+        create_plan_fixed_modelfile(models_dir, model_version, max_batch, dtype, shape)
+
+
+def create_plan_modelconfig(models_dir, model_version, max_batch, dtype, shape):
+    if not tu.validate_for_trt_model(dtype, dtype, dtype, shape, shape, shape):
+        return
+
+    model_name = tu.get_dyna_sequence_model_name(
+        "plan_nobatch" if max_batch == 0 else "plan", dtype
+    )
+    config_dir = models_dir + "/" + model_name
+    config = """
+name: "{}"
+platform: "tensorrt_plan"
+max_batch_size: {}
+sequence_batching {{
+  max_sequence_idle_microseconds: 5000000
+  {}
+  control_input [
+    {{
+      name: "START"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_START
+          {type}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }},
+    {{
+      name: "END"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_END
+          {type}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }},
+    {{
+      name: "READY"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_READY
+          {type}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }},
+    {{
+      name: "CORRID"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_CORRID
+          data_type: TYPE_INT32
+        }}
+      ]
+    }}
+  ]
+  state [
+    {{
+      input_name: "INPUT_STATE"
+      output_name: "OUTPUT_STATE"
+      data_type: {dtype}
+      dims: {dims}
+    }}
+  ]
+}}
+input [
+  {{
+    name: "INPUT"
+    data_type: {dtype}
+    dims: [ {dims} ]
+  }}
+]
+output [
+  {{
+    name: "OUTPUT"
+    data_type: {dtype}
+    dims: [ {dims} ]
+  }}
+]
+instance_group [
+  {{
+    kind: KIND_GPU
+  }}
+]
+""".format(
+        model_name,
+        max_batch,
+        "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}"
+        if max_batch > 0
+        else "",
+        dtype=np_to_model_dtype(dtype),
+        dims=tu.shape_to_dims_str(shape),
+        type="fp32" if dtype == np.float32 else "int32",
+    )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+def create_models(models_dir, dtype, shape, no_batch=True):
+    model_version = 1
+
+    if FLAGS.onnx:
+        create_onnx_modelconfig(models_dir, model_version, 8, dtype, shape)
+        create_onnx_modelfile(models_dir, model_version, 8, dtype, shape)
+        if no_batch:
+            create_onnx_modelconfig(models_dir, model_version, 0, dtype, shape)
+            create_onnx_modelfile(models_dir, model_version, 0, dtype, shape)
+
+    if FLAGS.tensorrt:
+        if dtype == bool:
+            return
+
+        create_plan_modelconfig(models_dir, model_version, 8, dtype, shape)
+        create_plan_modelfile(models_dir, model_version, 8, dtype, shape)
+        if no_batch:
+            create_plan_modelconfig(models_dir, model_version, 0, dtype, shape)
+            create_plan_modelfile(models_dir, model_version, 0, dtype, shape)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--models_dir", type=str, required=True, help="Top-level model directory"
+    )
+    parser.add_argument(
+        "--graphdef",
+        required=False,
+        action="store_true",
+        help="Generate GraphDef models",
+    )
+    parser.add_argument(
+        "--savedmodel",
+        required=False,
+        action="store_true",
+        help="Generate SavedModel models",
+    )
+    parser.add_argument(
+        "--tensorrt",
+        required=False,
+        action="store_true",
+        help="Generate TensorRT PLAN models",
+    )
+    parser.add_argument(
+        "--tensorrt-shape-io",
+        required=False,
+        action="store_true",
+        help="Generate TensorRT PLAN models w/ shape tensor i/o",
+    )
+    parser.add_argument(
+        "--onnx", required=False, action="store_true", help="Generate Onnx models"
+    )
+    parser.add_argument(
+        "--onnx_opset",
+        type=int,
+        required=False,
+        default=0,
+        help="Opset used for Onnx models. Default is to use ONNXRT default",
+    )
+    parser.add_argument(
+        "--libtorch",
+        required=False,
+        action="store_true",
+        help="Generate Pytorch LibTorch models",
+    )
+    parser.add_argument(
+        "--openvino",
+        required=False,
+        action="store_true",
+        help="Generate OpenVino models",
+    )
+    parser.add_argument(
+        "--variable",
+        required=False,
+        action="store_true",
+        help="Used variable-shape tensors for input/output",
+    )
+    FLAGS, unparsed = parser.parse_known_args()
+
+    if FLAGS.onnx:
+        import onnx
+
+    if FLAGS.tensorrt:
+        import tensorrt as trt
+
+    import test_util as tu
+
+    # Tests with models that accept fixed-shape input/output tensors
+    if not FLAGS.variable:
+        create_models(
+            FLAGS.models_dir,
+            np.int32,
+            [
+                1,
+            ],
+        )
+
+    # Tests with models that accept variable-shape input/output tensors
+    if FLAGS.variable:
+        create_models(
+            FLAGS.models_dir,
+            np.int32,
+            [
+                -1,
+            ],
+            False,
+        )
diff --git a/qa/common/gen_qa_dyna_sequence_models.py b/qa/common/gen_qa_dyna_sequence_models.py
new file mode 100755
index 0000000000..469d524ffb
--- /dev/null
+++ b/qa/common/gen_qa_dyna_sequence_models.py
@@ -0,0 +1,1707 @@
+#!/usr/bin/env python3
+
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import os
+
+import numpy as np
+from gen_common import (
+    np_to_model_dtype,
+    np_to_onnx_dtype,
+    np_to_tf_dtype,
+    np_to_torch_dtype,
+    np_to_trt_dtype,
+)
+
+FLAGS = None
+np_dtype_string = np.dtype(object)
+
+
+def create_tf_modelfile(
+    create_savedmodel, models_dir, model_version, max_batch, dtype, shape
+):
+    if not tu.validate_for_tf_model(dtype, dtype, dtype, shape, shape, shape):
+        return
+
+    tf_input_dtype = np_to_tf_dtype(dtype)
+    tf_dtype = tf_input_dtype
+
+    # If the input is a string then use int32 for operation and just
+    # cast to/from string for input and output.
+    if tf_input_dtype == tf.string:
+        tf_dtype = tf.int32
+
+    # Create the model. If non-batching then don't include the batch
+    # dimension.
+    tf.compat.v1.reset_default_graph()
+    if create_savedmodel and (max_batch == 0):
+        input0 = tf.compat.v1.placeholder(
+            tf_input_dtype,
+            [
+                1,
+            ],
+            "INPUT",
+        )
+        if tf_input_dtype == tf.string:
+            input0 = tf.strings.to_number(tf.strings.join(["0", input0]), tf_dtype)
+        start0 = tf.compat.v1.placeholder(
+            tf_dtype,
+            [
+                1,
+            ],
+            "START",
+        )
+        end0 = tf.compat.v1.placeholder(
+            tf_dtype,
+            [
+                1,
+            ],
+            "END",
+        )
+        ready0 = tf.compat.v1.placeholder(
+            tf_dtype,
+            [
+                1,
+            ],
+            "READY",
+        )
+        corrid0 = tf.compat.v1.placeholder(
+            tf.uint64,
+            [
+                1,
+            ],
+            "CORRID",
+        )
+        corrid_cast0 = tf.cast(corrid0, tf_dtype)
+        acc = tf.compat.v1.get_variable(
+            "ACC",
+            [
+                1,
+            ],
+            dtype=tf_dtype,
+        )
+        tmp0 = tf.compat.v1.where(tf.equal(start0, 1), input0, tf.add(acc, input0))
+        tmp1 = tf.compat.v1.where(tf.equal(end0, 1), tf.add(tmp0, corrid_cast0), tmp0)
+        newacc = tf.compat.v1.where(tf.equal(ready0, 1), tmp1, acc)
+        assign = tf.compat.v1.assign(acc, newacc)
+        if tf_input_dtype == tf.string:
+            tf.strings.as_string(assign, name="OUTPUT")
+        else:
+            tf.identity(assign, name="OUTPUT")
+    else:
+        # For batching we can't use a tf.variable to hold the
+        # accumulated values since that forces the size of the output
+        # to the size of the variable (which must be a max-batch-size
+        # vector since require one accumulator each), instead of the
+        # output shape being [None, 1]. So instead we just return 0 if
+        # not-ready and 'INPUT'+'START'+('END'*'CORRID')
+        # otherwise... the tests know to expect this.
+        input0 = tf.compat.v1.placeholder(
+            tf_input_dtype,
+            [
+                None,
+            ]
+            + tu.shape_to_tf_shape(shape),
+            "INPUT",
+        )
+        if tf_input_dtype == tf.string:
+            input0 = tf.strings.to_number(tf.strings.join(["0", input0]), tf_dtype)
+        start0 = tf.compat.v1.placeholder(tf_dtype, [None, 1], "START")
+        end0 = tf.compat.v1.placeholder(tf_dtype, [None, 1], "END")
+        ready0 = tf.compat.v1.placeholder(tf_dtype, [None, 1], "READY")
+        corrid0 = tf.compat.v1.placeholder(tf.uint64, [None, 1], "CORRID")
+        corrid_cast0 = tf.cast(corrid0, tf_dtype)
+        tmp = tf.compat.v1.where(
+            tf.equal(ready0, 1),
+            tf.add(tf.add(start0, input0), tf.multiply(end0, corrid_cast0)),
+            tf.zeros(tf.shape(input=input0), dtype=tf_dtype),
+        )
+        if tf_input_dtype == tf.string:
+            tf.strings.as_string(tmp, name="OUTPUT")
+        else:
+            tf.identity(tmp, name="OUTPUT")
+
+    # Use a different model name for the non-batching variant
+    if create_savedmodel:
+        model_name = tu.get_dyna_sequence_model_name(
+            "savedmodel_nobatch" if max_batch == 0 else "savedmodel", dtype
+        )
+    else:
+        model_name = tu.get_dyna_sequence_model_name(
+            "graphdef_nobatch" if max_batch == 0 else "graphdef", dtype
+        )
+
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    if create_savedmodel:
+        with tf.compat.v1.Session() as sess:
+            sess.run(tf.compat.v1.initializers.global_variables())
+            input0_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name(
+                "INPUT:0"
+            )
+            start0_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name(
+                "START:0"
+            )
+            end0_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name("END:0")
+            ready0_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name(
+                "READY:0"
+            )
+            corrid0_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name(
+                "CORRID:0"
+            )
+            output0_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name(
+                "OUTPUT:0"
+            )
+            tf.compat.v1.saved_model.simple_save(
+                sess,
+                model_version_dir + "/model.savedmodel",
+                inputs={
+                    "INPUT": input0_tensor,
+                    "START": start0_tensor,
+                    "END": end0_tensor,
+                    "READY": ready0_tensor,
+                    "CORRID": corrid0_tensor,
+                },
+                outputs={"OUTPUT": output0_tensor},
+            )
+    else:
+        with tf.compat.v1.Session() as sess:
+            sess.run(tf.compat.v1.initializers.global_variables())
+            graph_io.write_graph(
+                sess.graph.as_graph_def(),
+                model_version_dir,
+                "model.graphdef",
+                as_text=False,
+            )
+
+
+def create_tf_modelconfig(
+    create_savedmodel, models_dir, model_version, max_batch, dtype, shape
+):
+    if not tu.validate_for_tf_model(dtype, dtype, dtype, shape, shape, shape):
+        return
+
+    # Use a different model name for the non-batching variant
+    if create_savedmodel:
+        model_name = tu.get_dyna_sequence_model_name(
+            "savedmodel_nobatch" if max_batch == 0 else "savedmodel", dtype
+        )
+    else:
+        model_name = tu.get_dyna_sequence_model_name(
+            "graphdef_nobatch" if max_batch == 0 else "graphdef", dtype
+        )
+
+    config_dir = models_dir + "/" + model_name
+    config = """
+name: "{}"
+platform: "{}"
+max_batch_size: {}
+sequence_batching {{
+  max_sequence_idle_microseconds: 5000000
+  {}
+  control_input [
+    {{
+      name: "START"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_START
+          {}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }},
+    {{
+      name: "END"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_END
+          {}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }},
+    {{
+      name: "READY"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_READY
+          {}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }},
+    {{
+      name: "CORRID"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_CORRID
+          data_type: TYPE_UINT64
+        }}
+      ]
+    }}
+  ]
+}}
+input [
+  {{
+    name: "INPUT"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+output [
+  {{
+    name: "OUTPUT"
+    data_type: {}
+    dims: [ 1 ]
+  }}
+]
+instance_group [
+  {{
+    kind: KIND_CPU
+  }}
+]
+""".format(
+        model_name,
+        "tensorflow_savedmodel" if create_savedmodel else "tensorflow_graphdef",
+        max_batch,
+        "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}"
+        if max_batch > 0
+        else "",
+        "fp32" if dtype == np.float32 else "int32",
+        "fp32" if dtype == np.float32 else "int32",
+        "fp32" if dtype == np.float32 else "int32",
+        np_to_model_dtype(dtype),
+        tu.shape_to_dims_str(shape),
+        np_to_model_dtype(dtype),
+    )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+def create_plan_shape_tensor_modelfile(
+    models_dir, model_version, max_batch, dtype, shape
+):
+    # Note that resize layer does not support int tensors.
+    # The model takes three inputs (INPUT, DUMMY_INPUT and SHAPE_INPUT)
+    # and four control inputs(START, END, READY, CORR_ID).
+    # In absence of proper accumulator,
+    # OUTPUT : 0 if not-ready and 'DUMMY_INPUT'+'START'+('END'*'CORRID')
+    #          otherwise
+    # RESIZED_OUTPUT : Obtained after resizing 'INPUT' to shape specified
+    #          in 'SHAPE_INPUT'
+    # SHAPE_OUTPUT : The shape values of resized output
+
+    trt_dtype = np_to_trt_dtype(dtype)
+    trt_memory_format = trt.TensorFormat.LINEAR
+
+    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    builder = trt.Builder(TRT_LOGGER)
+    network = builder.create_network(
+        1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    )
+
+    unit_shape = [1] * len(shape)
+    dummy_shape = [-1] * len(shape)
+    if max_batch != 0:
+        in0 = network.add_input("INPUT", trt.int32, [-1] + dummy_shape)
+        dummy_in0 = network.add_input("DUMMY_INPUT", trt_dtype, [-1] + dummy_shape)
+        shape_in0 = network.add_input("SHAPE_INPUT", trt.int32, [1 + len(shape)])
+        start0 = network.add_input("START", trt.int32, [-1] + unit_shape)
+        end0 = network.add_input("END", trt.int32, [-1] + unit_shape)
+        ready0 = network.add_input("READY", trt.int32, [-1] + unit_shape)
+        corrid0 = network.add_input("CORRID", trt.int32, [-1] + unit_shape)
+    else:
+        in0 = network.add_input("INPUT", trt.int32, dummy_shape)
+        dummy_in0 = network.add_input("DUMMY_INPUT", trt_dtype, dummy_shape)
+        shape_in0 = network.add_input("SHAPE_INPUT", trt.int32, [len(shape)])
+        start0 = network.add_input("START", trt.int32, unit_shape)
+        end0 = network.add_input("END", trt.int32, unit_shape)
+        ready0 = network.add_input("READY", trt.int32, unit_shape)
+        corrid0 = network.add_input("CORRID", trt.int32, unit_shape)
+
+    add0 = network.add_elementwise(in0, start0, trt.ElementWiseOperation.SUM)
+    mul0 = network.add_elementwise(end0, corrid0, trt.ElementWiseOperation.PROD)
+    sum0 = network.add_elementwise(
+        add0.get_output(0), mul0.get_output(0), trt.ElementWiseOperation.SUM
+    )
+    out0 = network.add_elementwise(
+        sum0.get_output(0), ready0, trt.ElementWiseOperation.PROD
+    ).get_output(0)
+
+    resize_layer = network.add_resize(dummy_in0)
+    resize_layer.set_input(1, shape_in0)
+    shape_out0 = network.add_shape(resize_layer.get_output(0))
+    resized_out0 = resize_layer.get_output(0)
+
+    shape_out0.get_output(0).name = "SHAPE_OUTPUT"
+    shape_out0.get_output(0).dtype = trt.int32
+    network.mark_output_for_shapes(shape_out0.get_output(0))
+
+    out0.name = "OUTPUT"
+    out0.dtype = trt.int32
+    network.mark_output(out0)
+
+    resized_out0.name = "RESIZED_OUTPUT"
+    resized_out0.dtype = trt_dtype
+    network.mark_output(resized_out0)
+
+    shape_in0.allowed_formats = 1 << int(trt_memory_format)
+    dummy_in0.allowed_formats = 1 << int(trt_memory_format)
+    start0.allowed_formats = 1 << int(trt_memory_format)
+    ready0.allowed_formats = 1 << int(trt_memory_format)
+    out0.allowed_formats = 1 << int(trt_memory_format)
+    shape_out0.get_output(0).allowed_formats = 1 << int(trt_memory_format)
+    resized_out0.allowed_formats = 1 << int(trt_memory_format)
+
+    if trt_dtype == trt.int8:
+        dummy_in0.dynamic_range = (-128.0, 127.0)
+        resized_out0.dynamic_range = (-128.0, 127.0)
+        start0.dynamic_range = (-128.0, 127.0)
+        end0.dynamic_range = (-128.0, 127.0)
+        ready0.dynamic_range = (-128.0, 127.0)
+
+    flags = 1 << int(trt.BuilderFlag.STRICT_TYPES)
+    if trt_dtype == trt.int8:
+        flags |= 1 << int(trt.BuilderFlag.INT8)
+    elif trt_dtype == trt.float16:
+        flags |= 1 << int(trt.BuilderFlag.FP16)
+
+    min_prefix = []
+    opt_prefix = []
+    max_prefix = []
+
+    if max_batch != 0:
+        min_prefix = [1]
+        opt_prefix = [max(1, max_batch)]
+        max_prefix = [max(1, max_batch)]
+
+    min_shape = min_prefix + [1] * len(shape)
+    opt_shape = opt_prefix + [8] * len(shape)
+    max_shape = max_prefix + [32] * len(shape)
+
+    profile = builder.create_optimization_profile()
+    profile.set_shape("INPUT", min_shape, opt_shape, max_shape)
+    profile.set_shape_input("SHAPE_INPUT", min_shape, opt_shape, max_shape)
+    profile.set_shape("DUMMY_INPUT", min_shape, opt_shape, max_shape)
+    profile.set_shape(
+        "START",
+        min_prefix + unit_shape,
+        opt_prefix + unit_shape,
+        max_prefix + unit_shape,
+    )
+    profile.set_shape(
+        "END", min_prefix + unit_shape, opt_prefix + unit_shape, max_prefix + unit_shape
+    )
+    profile.set_shape(
+        "READY",
+        min_prefix + unit_shape,
+        opt_prefix + unit_shape,
+        max_prefix + unit_shape,
+    )
+    profile.set_shape(
+        "CORRID",
+        min_prefix + unit_shape,
+        opt_prefix + unit_shape,
+        max_prefix + unit_shape,
+    )
+
+    config = builder.create_builder_config()
+    config.flags = flags
+    config.add_optimization_profile(profile)
+    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
+    try:
+        engine_bytes = builder.build_serialized_network(network, config)
+    except AttributeError:
+        engine = builder.build_engine(network, config)
+        engine_bytes = engine.serialize()
+        del engine
+
+    model_name = tu.get_dyna_sequence_model_name(
+        "plan_nobatch" if max_batch == 0 else "plan", dtype
+    )
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(model_version_dir + "/model.plan", "wb") as f:
+        f.write(engine_bytes)
+
+
+def create_plan_fixed_modelfile(models_dir, model_version, max_batch, dtype, shape):
+    trt_dtype = np_to_trt_dtype(dtype)
+    # Create the model. For now don't implement a proper accumulator
+    # just return 0 if not-ready and 'INPUT'+'START'+('END'*'CORRID')
+    # otherwise...  the tests know to expect this.
+    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    builder = trt.Builder(TRT_LOGGER)
+    network = builder.create_network()
+    in0 = network.add_input("INPUT", trt_dtype, shape)
+    start0 = network.add_input("START", trt_dtype, [1 for i in shape])
+    end0 = network.add_input("END", trt_dtype, [1 for i in shape])
+    ready0 = network.add_input("READY", trt_dtype, [1 for i in shape])
+    corrid0 = network.add_input("CORRID", trt.int32, [1 for i in shape])
+    add0 = network.add_elementwise(in0, start0, trt.ElementWiseOperation.SUM)
+    mul0 = network.add_elementwise(end0, corrid0, trt.ElementWiseOperation.PROD)
+    sum0 = network.add_elementwise(
+        add0.get_output(0), mul0.get_output(0), trt.ElementWiseOperation.SUM
+    )
+    out0 = network.add_elementwise(
+        sum0.get_output(0), ready0, trt.ElementWiseOperation.PROD
+    )
+
+    out0.get_output(0).name = "OUTPUT"
+    network.mark_output(out0.get_output(0))
+
+    config = builder.create_builder_config()
+    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
+    builder.max_batch_size = max(1, max_batch)
+    try:
+        engine_bytes = builder.build_serialized_network(network, config)
+    except AttributeError:
+        engine = builder.build_engine(network, config)
+        engine_bytes = engine.serialize()
+        del engine
+    del network
+
+    model_name = tu.get_dyna_sequence_model_name(
+        "plan_nobatch" if max_batch == 0 else "plan", dtype
+    )
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(model_version_dir + "/model.plan", "wb") as f:
+        f.write(engine_bytes)
+
+
+def create_plan_fixed_rf_modelfile(models_dir, model_version, max_batch, dtype, shape):
+    trt_dtype = np_to_trt_dtype(dtype)
+    trt_memory_format = trt.TensorFormat.LINEAR
+    # Create the model. For now don't implement a proper accumulator
+    # just return 0 if not-ready and 'INPUT'+'START'*('END'*'CORRID')
+    # otherwise...  the tests know to expect this.
+    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    builder = trt.Builder(TRT_LOGGER)
+    network = builder.create_network()
+    in0 = network.add_input("INPUT", trt_dtype, shape)
+    start0 = network.add_input("START", trt_dtype, [1 for i in shape])
+    end0 = network.add_input("END", trt_dtype, [1 for i in shape])
+    ready0 = network.add_input("READY", trt_dtype, [1 for i in shape])
+    corrid0 = network.add_input("CORRID", trt.int32, [1 for i in shape])
+    add0 = network.add_elementwise(in0, start0, trt.ElementWiseOperation.SUM)
+    mul0 = network.add_elementwise(end0, corrid0, trt.ElementWiseOperation.PROD)
+    sum0 = network.add_elementwise(
+        add0.get_output(0), mul0.get_output(0), trt.ElementWiseOperation.SUM
+    )
+    out0 = network.add_elementwise(
+        sum0.get_output(0), ready0, trt.ElementWiseOperation.PROD
+    )
+
+    out0.get_output(0).name = "OUTPUT"
+    network.mark_output(out0.get_output(0))
+
+    out0.get_output(0).dtype = trt_dtype
+
+    in0.allowed_formats = 1 << int(trt_memory_format)
+    start0.allowed_formats = 1 << int(trt_memory_format)
+    end0.allowed_formats = 1 << int(trt_memory_format)
+    ready0.allowed_formats = 1 << int(trt_memory_format)
+    corrid0.allowed_formats = 1 << int(trt_memory_format)
+    out0.get_output(0).allowed_formats = 1 << int(trt_memory_format)
+
+    if trt_dtype == trt.int8:
+        in0.dynamic_range = (-128.0, 127.0)
+        out0.dynamic_range = (-128.0, 127.0)
+        start0.dynamic_range = (-128.0, 127.0)
+        end0.dynamic_range = (-128.0, 127.0)
+        ready0.dynamic_range = (-128.0, 127.0)
+        corrid0.dynamic_range = (-128.0, 127.0)
+
+    flags = 1 << int(trt.BuilderFlag.STRICT_TYPES)
+    if trt_dtype == trt.int8:
+        flags |= 1 << int(trt.BuilderFlag.INT8)
+    elif trt_dtype == trt.float16:
+        flags |= 1 << int(trt.BuilderFlag.FP16)
+
+    config = builder.create_builder_config()
+    config.flags = flags
+    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
+    builder.max_batch_size = max(1, max_batch)
+    try:
+        engine_bytes = builder.build_serialized_network(network, config)
+    except AttributeError:
+        engine = builder.build_engine(network, config)
+        engine_bytes = engine.serialize()
+        del engine
+
+    model_name = tu.get_dyna_sequence_model_name(
+        "plan_nobatch" if max_batch == 0 else "plan", dtype
+    )
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(model_version_dir + "/model.plan", "wb") as f:
+        f.write(engine_bytes)
+
+
+def create_plan_dynamic_modelfile(models_dir, model_version, max_batch, dtype, shape):
+    trt_dtype = np_to_trt_dtype(dtype)
+    # Create the model. For now don't implement a proper accumulator
+    # just return 0 if not-ready and 'INPUT'+'START'*('END'*'CORRID')
+    # otherwise...  the tests know to expect this.
+    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    builder = trt.Builder(TRT_LOGGER)
+    network = builder.create_network(
+        1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    )
+
+    unit_shape = [1] * len(shape)
+    if max_batch != 0:
+        in0 = network.add_input("INPUT", trt_dtype, [-1] + shape)
+        start0 = network.add_input("START", trt_dtype, [-1] + unit_shape)
+        end0 = network.add_input("END", trt_dtype, [-1] + unit_shape)
+        ready0 = network.add_input("READY", trt_dtype, [-1] + unit_shape)
+        corrid0 = network.add_input("CORRID", trt.int32, [-1] + unit_shape)
+    else:
+        in0 = network.add_input("INPUT", trt_dtype, shape)
+        start0 = network.add_input("START", trt_dtype, unit_shape)
+        end0 = network.add_input("END", trt_dtype, unit_shape)
+        ready0 = network.add_input("READY", trt_dtype, unit_shape)
+        corrid0 = network.add_input("CORRID", trt.int32, unit_shape)
+
+    add0 = network.add_elementwise(in0, start0, trt.ElementWiseOperation.SUM)
+    mul0 = network.add_elementwise(end0, corrid0, trt.ElementWiseOperation.PROD)
+    sum0 = network.add_elementwise(
+        add0.get_output(0), mul0.get_output(0), trt.ElementWiseOperation.SUM
+    )
+    out0 = network.add_elementwise(
+        sum0.get_output(0), ready0, trt.ElementWiseOperation.PROD
+    )
+
+    out0.get_output(0).name = "OUTPUT"
+    network.mark_output(out0.get_output(0))
+
+    min_shape = []
+    opt_shape = []
+    max_shape = []
+    if max_batch != 0:
+        min_shape = min_shape + [1]
+        opt_shape = opt_shape + [max(1, max_batch)]
+        max_shape = max_shape + [max(1, max_batch)]
+    for i in shape:
+        if i == -1:
+            min_shape = min_shape + [1]
+            opt_shape = opt_shape + [8]
+            max_shape = max_shape + [32]
+        else:
+            min_shape = min_shape + [i]
+            opt_shape = opt_shape + [i]
+            max_shape = max_shape + [i]
+
+    profile = builder.create_optimization_profile()
+    profile.set_shape("INPUT", min_shape, opt_shape, max_shape)
+    if max_batch != 0:
+        profile.set_shape(
+            "START",
+            [1] + unit_shape,
+            [max_batch] + unit_shape,
+            [max_batch] + unit_shape,
+        )
+        profile.set_shape(
+            "END", [1] + unit_shape, [max_batch] + unit_shape, [max_batch] + unit_shape
+        )
+        profile.set_shape(
+            "READY",
+            [1] + unit_shape,
+            [max_batch] + unit_shape,
+            [max_batch] + unit_shape,
+        )
+        profile.set_shape(
+            "CORRID",
+            [1] + unit_shape,
+            [max_batch] + unit_shape,
+            [max_batch] + unit_shape,
+        )
+    else:
+        profile.set_shape("START", unit_shape, unit_shape, unit_shape)
+        profile.set_shape("END", unit_shape, unit_shape, unit_shape)
+        profile.set_shape("READY", unit_shape, unit_shape, unit_shape)
+        profile.set_shape("CORRID", unit_shape, unit_shape, unit_shape)
+    config = builder.create_builder_config()
+    config.add_optimization_profile(profile)
+
+    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
+    try:
+        engine_bytes = builder.build_serialized_network(network, config)
+    except AttributeError:
+        engine = builder.build_engine(network, config)
+        engine_bytes = engine.serialize()
+        del engine
+
+    model_name = tu.get_dyna_sequence_model_name(
+        "plan_nobatch" if max_batch == 0 else "plan", dtype
+    )
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(model_version_dir + "/model.plan", "wb") as f:
+        f.write(engine_bytes)
+
+
+def create_plan_dynamic_rf_modelfile(
+    models_dir, model_version, max_batch, dtype, shape
+):
+    trt_dtype = np_to_trt_dtype(dtype)
+    trt_memory_format = trt.TensorFormat.LINEAR
+
+    # Create the model. For now don't implement a proper accumulator
+    # just return 0 if not-ready and 'INPUT'+'START'*('END'*'CORRID')
+    # otherwise...  the tests know to expect this.
+    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    builder = trt.Builder(TRT_LOGGER)
+    network = builder.create_network(
+        1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    )
+
+    unit_shape = [1] * len(shape)
+    if max_batch != 0:
+        in0 = network.add_input("INPUT", trt_dtype, [-1] + shape)
+        start0 = network.add_input("START", trt_dtype, [-1] + unit_shape)
+        end0 = network.add_input("END", trt_dtype, [-1] + unit_shape)
+        ready0 = network.add_input("READY", trt_dtype, [-1] + unit_shape)
+        corrid0 = network.add_input("CORRID", trt.int32, [-1] + unit_shape)
+    else:
+        in0 = network.add_input("INPUT", trt_dtype, shape)
+        start0 = network.add_input("START", trt_dtype, unit_shape)
+        end0 = network.add_input("END", trt_dtype, unit_shape)
+        ready0 = network.add_input("READY", trt_dtype, unit_shape)
+        corrid0 = network.add_input("CORRID", trt.int32, unit_shape)
+
+    add0 = network.add_elementwise(in0, start0, trt.ElementWiseOperation.SUM)
+    mul0 = network.add_elementwise(end0, corrid0, trt.ElementWiseOperation.PROD)
+    sum0 = network.add_elementwise(
+        add0.get_output(0), mul0.get_output(0), trt.ElementWiseOperation.SUM
+    )
+    out0 = network.add_elementwise(
+        sum0.get_output(0), ready0, trt.ElementWiseOperation.PROD
+    )
+
+    out0.get_output(0).name = "OUTPUT"
+    network.mark_output(out0.get_output(0))
+
+    out0.get_output(0).dtype = trt_dtype
+
+    in0.allowed_formats = 1 << int(trt_memory_format)
+    start0.allowed_formats = 1 << int(trt_memory_format)
+    ready0.allowed_formats = 1 << int(trt_memory_format)
+    out0.get_output(0).allowed_formats = 1 << int(trt_memory_format)
+
+    if trt_dtype == trt.int8:
+        in0.dynamic_range = (-128.0, 127.0)
+        out0.dynamic_range = (-128.0, 127.0)
+        start0.dynamic_range = (-128.0, 127.0)
+        end0.dynamic_range = (-128.0, 127.0)
+        ready0.dynamic_range = (-128.0, 127.0)
+        corrid0.dynamic_range = (-128.0, 127.0)
+
+    flags = 1 << int(trt.BuilderFlag.STRICT_TYPES)
+    if trt_dtype == trt.int8:
+        flags |= 1 << int(trt.BuilderFlag.INT8)
+    elif trt_dtype == trt.float16:
+        flags |= 1 << int(trt.BuilderFlag.FP16)
+
+    min_shape = []
+    opt_shape = []
+    max_shape = []
+    if max_batch != 0:
+        min_shape = min_shape + [1]
+        opt_shape = opt_shape + [max(1, max_batch)]
+        max_shape = max_shape + [max(1, max_batch)]
+    for i in shape:
+        if i == -1:
+            min_shape = min_shape + [1]
+            opt_shape = opt_shape + [8]
+            max_shape = max_shape + [32]
+        else:
+            min_shape = min_shape + [i]
+            opt_shape = opt_shape + [i]
+            max_shape = max_shape + [i]
+
+    profile = builder.create_optimization_profile()
+    profile.set_shape("INPUT", min_shape, opt_shape, max_shape)
+    if max_batch != 0:
+        profile.set_shape(
+            "START",
+            [1] + unit_shape,
+            [max_batch] + unit_shape,
+            [max_batch] + unit_shape,
+        )
+        profile.set_shape(
+            "END", [1] + unit_shape, [max_batch] + unit_shape, [max_batch] + unit_shape
+        )
+        profile.set_shape(
+            "READY",
+            [1] + unit_shape,
+            [max_batch] + unit_shape,
+            [max_batch] + unit_shape,
+        )
+        profile.set_shape(
+            "CORRID",
+            [1] + unit_shape,
+            [max_batch] + unit_shape,
+            [max_batch] + unit_shape,
+        )
+    else:
+        profile.set_shape("START", unit_shape, unit_shape, unit_shape)
+        profile.set_shape("END", unit_shape, unit_shape, unit_shape)
+        profile.set_shape("READY", unit_shape, unit_shape, unit_shape)
+        profile.set_shape("CORRID", unit_shape, unit_shape, unit_shape)
+
+    config = builder.create_builder_config()
+    config.flags = flags
+    config.add_optimization_profile(profile)
+    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
+    try:
+        engine_bytes = builder.build_serialized_network(network, config)
+    except AttributeError:
+        engine = builder.build_engine(network, config)
+        engine_bytes = engine.serialize()
+        del engine
+
+    model_name = tu.get_dyna_sequence_model_name(
+        "plan_nobatch" if max_batch == 0 else "plan", dtype
+    )
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(model_version_dir + "/model.plan", "wb") as f:
+        f.write(engine_bytes)
+
+
+def create_plan_modelfile(models_dir, model_version, max_batch, dtype, shape):
+    if not tu.validate_for_trt_model(dtype, dtype, dtype, shape, shape, shape):
+        return
+
+    if dtype != np.float32:
+        if not tu.shape_is_fixed(shape):
+            create_plan_dynamic_rf_modelfile(
+                models_dir, model_version, max_batch, dtype, shape
+            )
+        else:
+            create_plan_fixed_rf_modelfile(
+                models_dir, model_version, max_batch, dtype, shape
+            )
+    else:
+        if not tu.shape_is_fixed(shape):
+            create_plan_dynamic_modelfile(
+                models_dir, model_version, max_batch, dtype, shape
+            )
+        else:
+            create_plan_fixed_modelfile(
+                models_dir, model_version, max_batch, dtype, shape
+            )
+
+
+def create_plan_modelconfig(models_dir, model_version, max_batch, dtype, shape):
+    if not tu.validate_for_trt_model(dtype, dtype, dtype, shape, shape, shape):
+        return
+
+    model_name = tu.get_dyna_sequence_model_name(
+        "plan_nobatch" if max_batch == 0 else "plan", dtype
+    )
+    config_dir = models_dir + "/" + model_name
+
+    if FLAGS.tensorrt_shape_io:
+        shape_tensor_dim = len(shape)
+        config = """
+name: "{}"
+platform: "tensorrt_plan"
+max_batch_size: {}
+sequence_batching {{
+  max_sequence_idle_microseconds: 5000000
+  {}
+  control_input [
+    {{
+      name: "START"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_START
+          {}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }},
+    {{
+      name: "END"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_END
+          {}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }},
+    {{
+      name: "READY"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_READY
+          {}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }},
+    {{
+      name: "CORRID"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_CORRID
+          data_type: TYPE_INT32
+        }}
+      ]
+    }}
+  ]
+}}
+input [
+  {{
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ {} ]
+  }}
+]
+input [
+  {{
+    name: "DUMMY_INPUT"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+input [
+  {{
+    name: "SHAPE_INPUT"
+    data_type: TYPE_INT32
+    dims: [ {} ]
+    is_shape_tensor: true
+  }}
+]
+output [
+  {{
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ {} ]
+  }}
+]
+output [
+  {{
+    name: "RESIZED_OUTPUT"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+output [
+  {{
+    name: "SHAPE_OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ {} ]
+    is_shape_tensor: true
+  }}
+]
+instance_group [
+  {{
+    kind: KIND_GPU
+  }}
+]
+""".format(
+            model_name,
+            max_batch,
+            "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}"
+            if max_batch > 0
+            else "",
+            "int32",
+            "int32",
+            "int32",
+            tu.shape_to_dims_str(shape),
+            np_to_model_dtype(dtype),
+            tu.shape_to_dims_str(shape),
+            shape_tensor_dim,
+            tu.shape_to_dims_str(shape),
+            np_to_model_dtype(dtype),
+            tu.shape_to_dims_str(shape),
+            shape_tensor_dim,
+        )
+
+    else:
+        config = """
+name: "{}"
+platform: "tensorrt_plan"
+max_batch_size: {}
+sequence_batching {{
+  max_sequence_idle_microseconds: 5000000
+  {}
+  control_input [
+    {{
+      name: "START"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_START
+          {}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }},
+    {{
+      name: "END"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_END
+          {}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }},
+    {{
+      name: "READY"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_READY
+          {}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }},
+    {{
+      name: "CORRID"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_CORRID
+          data_type: TYPE_INT32
+        }}
+      ]
+    }}
+  ]
+}}
+input [
+  {{
+    name: "INPUT"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+output [
+  {{
+    name: "OUTPUT"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+instance_group [
+  {{
+    kind: KIND_GPU
+  }}
+]
+""".format(
+            model_name,
+            max_batch,
+            "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}"
+            if max_batch > 0
+            else "",
+            "int32" if dtype == np.int32 else "fp32",
+            "int32" if dtype == np.int32 else "fp32",
+            "int32" if dtype == np.int32 else "fp32",
+            np_to_model_dtype(dtype),
+            tu.shape_to_dims_str(shape),
+            np_to_model_dtype(dtype),
+            tu.shape_to_dims_str(shape),
+        )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+def create_onnx_modelfile(models_dir, model_version, max_batch, dtype, shape):
+    if not tu.validate_for_onnx_model(dtype, dtype, dtype, shape, shape, shape):
+        return
+
+    model_name = tu.get_dyna_sequence_model_name(
+        "onnx_nobatch" if max_batch == 0 else "onnx", dtype
+    )
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    # Create the model. For now don't implement a proper accumulator
+    # just return 0 if not-ready and 'INPUT'+'START'*('END'*'CORRID')
+    # otherwise...  the tests know to expect this.
+    onnx_dtype = np_to_onnx_dtype(dtype)
+    onnx_input_shape, idx = tu.shape_to_onnx_shape(shape, 0)
+    onnx_output_shape, idx = tu.shape_to_onnx_shape(shape, idx)
+
+    # If the input is a string then use int32 for operation and just
+    # cast to/from string for input and output.
+    onnx_control_dtype = onnx_dtype
+    if onnx_dtype == onnx.TensorProto.STRING:
+        onnx_control_dtype = onnx.TensorProto.INT32
+
+    batch_dim = [] if max_batch == 0 else [None]
+
+    onnx_input = onnx.helper.make_tensor_value_info(
+        "INPUT", onnx_dtype, batch_dim + onnx_input_shape
+    )
+    onnx_start = onnx.helper.make_tensor_value_info(
+        "START", onnx_control_dtype, batch_dim + [1]
+    )
+    onnx_end = onnx.helper.make_tensor_value_info(
+        "END", onnx_control_dtype, batch_dim + [1]
+    )
+    onnx_ready = onnx.helper.make_tensor_value_info(
+        "READY", onnx_control_dtype, batch_dim + [1]
+    )
+    onnx_corrid = onnx.helper.make_tensor_value_info(
+        "CORRID", onnx.TensorProto.UINT64, batch_dim + [1]
+    )
+    onnx_output = onnx.helper.make_tensor_value_info(
+        "OUTPUT", onnx_dtype, batch_dim + onnx_output_shape
+    )
+
+    internal_input = onnx.helper.make_node("Identity", ["INPUT"], ["_INPUT"])
+
+    # cast int8, int16 input to higher precision int as Onnx Add/Sub operator doesn't support those type
+    # Also casting String data type to int32
+    if (
+        (onnx_dtype == onnx.TensorProto.INT8)
+        or (onnx_dtype == onnx.TensorProto.INT16)
+        or (onnx_dtype == onnx.TensorProto.STRING)
+    ):
+        internal_input = onnx.helper.make_node(
+            "Cast", ["INPUT"], ["_INPUT"], to=onnx.TensorProto.INT32
+        )
+
+    onnx_corrid_cast0 = onnx.helper.make_node(
+        "Cast", ["CORRID"], ["onnx_corrid_cast0"], to=onnx_control_dtype
+    )
+    add0 = onnx.helper.make_node("Add", ["_INPUT", "START"], ["add0"])
+    mul0 = onnx.helper.make_node("Mul", ["END", "onnx_corrid_cast0"], ["mul0"])
+    sum0 = onnx.helper.make_node("Add", ["add0", "mul0"], ["sum0"])
+    res0 = onnx.helper.make_node("Mul", ["READY", "sum0"], ["CAST"])
+    cast = onnx.helper.make_node("Cast", ["CAST"], ["OUTPUT"], to=onnx_dtype)
+
+    # Avoid cast from float16 to float16
+    # (bug in Onnx Runtime, cast from float16 to float16 will become cast from float16 to float32)
+    if onnx_dtype == onnx.TensorProto.FLOAT16:
+        cast = onnx.helper.make_node("Identity", ["CAST"], ["OUTPUT"])
+
+    onnx_nodes = [internal_input, onnx_corrid_cast0, add0, mul0, sum0, res0, cast]
+    onnx_inputs = [onnx_input, onnx_start, onnx_end, onnx_ready, onnx_corrid]
+    onnx_outputs = [onnx_output]
+
+    graph_proto = onnx.helper.make_graph(
+        onnx_nodes, model_name, onnx_inputs, onnx_outputs
+    )
+    if FLAGS.onnx_opset > 0:
+        model_opset = onnx.helper.make_operatorsetid("", FLAGS.onnx_opset)
+        model_def = onnx.helper.make_model(
+            graph_proto, producer_name="triton", opset_imports=[model_opset]
+        )
+    else:
+        model_def = onnx.helper.make_model(graph_proto, producer_name="triton")
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    onnx.save(model_def, model_version_dir + "/model.onnx")
+
+
+def create_onnx_modelconfig(models_dir, model_version, max_batch, dtype, shape):
+    if not tu.validate_for_onnx_model(dtype, dtype, dtype, shape, shape, shape):
+        return
+
+    model_name = tu.get_dyna_sequence_model_name(
+        "onnx_nobatch" if max_batch == 0 else "onnx", dtype
+    )
+    config_dir = models_dir + "/" + model_name
+    config = """
+name: "{}"
+platform: "onnxruntime_onnx"
+max_batch_size: {}
+sequence_batching {{
+  max_sequence_idle_microseconds: 5000000
+  {}
+  control_input [
+    {{
+      name: "START"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_START
+          {type}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }},
+    {{
+      name: "END"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_END
+          {type}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }},
+    {{
+      name: "READY"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_READY
+          {type}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }},
+    {{
+      name: "CORRID"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_CORRID
+          data_type: TYPE_UINT64
+        }}
+      ]
+    }}
+  ]
+}}
+input [
+  {{
+    name: "INPUT"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+output [
+  {{
+    name: "OUTPUT"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+instance_group [
+  {{
+    kind: KIND_CPU
+  }}
+]
+""".format(
+        model_name,
+        max_batch,
+        "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}"
+        if max_batch > 0
+        else "",
+        np_to_model_dtype(dtype),
+        tu.shape_to_dims_str(shape),
+        np_to_model_dtype(dtype),
+        tu.shape_to_dims_str(shape),
+        type="fp32" if dtype == np.float32 else "int32",
+    )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+def create_libtorch_modelfile(models_dir, model_version, max_batch, dtype, shape):
+    if not tu.validate_for_libtorch_model(dtype, dtype, dtype, shape, shape, shape):
+        return
+
+    torch_dtype = np_to_torch_dtype(dtype)
+
+    model_name = tu.get_dyna_sequence_model_name(
+        "libtorch_nobatch" if max_batch == 0 else "libtorch", dtype
+    )
+    # handle for -1 (when variable) since can't create tensor with shape of [-1]
+    shape = [abs(ips) for ips in shape]
+
+    class SequenceNet(nn.Module):
+        def __init__(self):
+            super(SequenceNet, self).__init__()
+
+        def forward(self, input0, start0, end0, ready0, corrid0):
+            tmp = input0 + start0 + (end0 * corrid0)
+            return tmp * ready0
+
+    sequenceModel = SequenceNet()
+    example_input = torch.zeros(shape, dtype=torch_dtype)
+    example_corrid_input = torch.zeros(shape, dtype=torch.long)
+    traced = torch.jit.trace(
+        sequenceModel,
+        (
+            example_input,
+            example_input,
+            example_input,
+            example_input,
+            example_corrid_input,
+        ),
+    )
+
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    traced.save(model_version_dir + "/model.pt")
+
+
+def create_libtorch_modelconfig(models_dir, model_version, max_batch, dtype, shape):
+    if not tu.validate_for_libtorch_model(dtype, dtype, dtype, shape, shape, shape):
+        return
+
+    model_name = tu.get_dyna_sequence_model_name(
+        "libtorch_nobatch" if max_batch == 0 else "libtorch", dtype
+    )
+    config_dir = models_dir + "/" + model_name
+    #  FIX FOR LibTorch
+    config = """
+name: "{}"
+platform: "pytorch_libtorch"
+max_batch_size: {}
+sequence_batching {{
+  max_sequence_idle_microseconds: 5000000
+  {}
+  control_input [
+    {{
+      name: "START__1"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_START
+          {}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }},
+    {{
+      name: "END__2"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_END
+          {}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }},
+    {{
+      name: "READY__3"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_READY
+          {}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }},
+    {{
+      name: "CORRID__4"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_CORRID
+          data_type: TYPE_INT32
+        }}
+      ]
+    }}
+  ]
+}}
+input [
+  {{
+    name: "INPUT__0"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+output [
+  {{
+    name: "OUTPUT__0"
+    data_type: {}
+    dims: [ 1 ]
+  }}
+]
+instance_group [
+  {{
+    kind: KIND_CPU
+  }}
+]
+""".format(
+        model_name,
+        max_batch,
+        "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}"
+        if max_batch > 0
+        else "",
+        "int32" if dtype == np.int32 else "fp32",
+        "int32" if dtype == np.int32 else "fp32",
+        "int32" if dtype == np.int32 else "fp32",
+        np_to_model_dtype(dtype),
+        tu.shape_to_dims_str(shape),
+        np_to_model_dtype(dtype),
+    )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+def create_openvino_modelfile(models_dir, model_version, max_batch, dtype, shape):
+    batch_dim = (
+        []
+        if max_batch == 0
+        else [
+            max_batch,
+        ]
+    )
+    if not tu.validate_for_openvino_model(
+        dtype, dtype, dtype, batch_dim + shape, batch_dim + shape, batch_dim + shape
+    ):
+        return
+
+    model_name = tu.get_dyna_sequence_model_name(
+        "openvino_nobatch" if max_batch == 0 else "openvino", dtype
+    )
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    in0 = ng.parameter(shape=batch_dim + shape, dtype=dtype, name="INPUT")
+    start = ng.parameter(shape=batch_dim + shape, dtype=dtype, name="START")
+    end = ng.parameter(shape=batch_dim + shape, dtype=dtype, name="END")
+    ready = ng.parameter(shape=batch_dim + shape, dtype=dtype, name="READY")
+    corrid = ng.parameter(shape=batch_dim + shape, dtype=dtype, name="CORRID")
+
+    tmp1 = ng.add(in0, start)
+    tmp2 = ng.multiply(end, corrid)
+    tmp = ng.add(tmp1, tmp2)
+    op0 = ng.multiply(tmp, ready, name="OUTPUT")
+
+    function = ng.impl.Function([op0], [in0, start, end, ready, corrid], model_name)
+    ie_network = IENetwork(ng.impl.Function.to_capsule(function))
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    ie_network.serialize(
+        model_version_dir + "/model.xml", model_version_dir + "/model.bin"
+    )
+
+
+def create_openvino_modelconfig(models_dir, model_version, max_batch, dtype, shape):
+    batch_dim = (
+        []
+        if max_batch == 0
+        else [
+            max_batch,
+        ]
+    )
+    if not tu.validate_for_openvino_model(
+        dtype, dtype, dtype, batch_dim + shape, batch_dim + shape, batch_dim + shape
+    ):
+        return
+
+    model_name = tu.get_dyna_sequence_model_name(
+        "openvino_nobatch" if max_batch == 0 else "openvino", dtype
+    )
+    config_dir = models_dir + "/" + model_name
+    config = """
+name: "{}"
+backend: "openvino"
+max_batch_size: {}
+sequence_batching {{
+  max_sequence_idle_microseconds: 5000000
+  {}
+  control_input [
+    {{
+      name: "START"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_START
+          {}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }},
+    {{
+      name: "END"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_END
+          {}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }},
+    {{
+      name: "READY"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_READY
+          {}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }},
+    {{
+      name: "CORRID"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_CORRID
+          data_type: TYPE_INT32
+        }}
+      ]
+    }}
+  ]
+}}
+input [
+  {{
+    name: "INPUT"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+output [
+  {{
+    name: "OUTPUT"
+    data_type: {}
+    dims: [ 1 ]
+  }}
+]
+""".format(
+        model_name,
+        max_batch,
+        "oldest { max_candidate_sequences: 6\npreferred_batch_size: [ 4 ]\nmax_queue_delay_microseconds: 0\n}"
+        if max_batch > 0
+        else "",
+        "int32" if dtype == np.int32 else "fp32",
+        "int32" if dtype == np.int32 else "fp32",
+        "int32" if dtype == np.int32 else "fp32",
+        np_to_model_dtype(dtype),
+        tu.shape_to_dims_str(shape),
+        np_to_model_dtype(dtype),
+    )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+def create_shape_tensor_models(models_dir, dtype, shape, no_batch=True):
+    model_version = 1
+
+    create_plan_modelconfig(models_dir, model_version, 8, dtype, shape)
+    create_plan_shape_tensor_modelfile(models_dir, model_version, 8, dtype, shape)
+    if no_batch:
+        create_plan_modelconfig(models_dir, model_version, 0, dtype, shape)
+        create_plan_shape_tensor_modelfile(models_dir, model_version, 0, dtype, shape)
+
+
+def create_models(models_dir, dtype, shape, no_batch=True):
+    model_version = 1
+
+    if FLAGS.graphdef:
+        create_tf_modelconfig(False, models_dir, model_version, 8, dtype, shape)
+        create_tf_modelfile(False, models_dir, model_version, 8, dtype, shape)
+        if no_batch:
+            create_tf_modelconfig(False, models_dir, model_version, 0, dtype, shape)
+            create_tf_modelfile(False, models_dir, model_version, 0, dtype, shape)
+
+    if FLAGS.savedmodel:
+        create_tf_modelconfig(True, models_dir, model_version, 8, dtype, shape)
+        create_tf_modelfile(True, models_dir, model_version, 8, dtype, shape)
+        if no_batch:
+            create_tf_modelconfig(True, models_dir, model_version, 0, dtype, shape)
+            create_tf_modelfile(True, models_dir, model_version, 0, dtype, shape)
+
+    if FLAGS.tensorrt:
+        suffix = []
+        if dtype == np.int8:
+            suffix = [1, 1]
+
+        create_plan_modelconfig(models_dir, model_version, 8, dtype, shape + suffix)
+        create_plan_modelfile(models_dir, model_version, 8, dtype, shape + suffix)
+        if no_batch:
+            create_plan_modelconfig(models_dir, model_version, 0, dtype, shape + suffix)
+            create_plan_modelfile(models_dir, model_version, 0, dtype, shape + suffix)
+
+    if FLAGS.onnx:
+        create_onnx_modelconfig(models_dir, model_version, 8, dtype, shape)
+        create_onnx_modelfile(models_dir, model_version, 8, dtype, shape)
+        if no_batch:
+            create_onnx_modelconfig(models_dir, model_version, 0, dtype, shape)
+            create_onnx_modelfile(models_dir, model_version, 0, dtype, shape)
+
+    if FLAGS.libtorch:
+        create_libtorch_modelconfig(models_dir, model_version, 8, dtype, shape)
+        create_libtorch_modelfile(models_dir, model_version, 8, dtype, shape)
+        if no_batch:
+            create_libtorch_modelconfig(models_dir, model_version, 0, dtype, shape)
+            create_libtorch_modelfile(models_dir, model_version, 0, dtype, shape)
+
+    if FLAGS.openvino:
+        create_openvino_modelconfig(models_dir, model_version, 8, dtype, shape)
+        create_openvino_modelfile(models_dir, model_version, 8, dtype, shape)
+        if no_batch:
+            create_openvino_modelconfig(models_dir, model_version, 0, dtype, shape)
+            create_openvino_modelfile(models_dir, model_version, 0, dtype, shape)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--models_dir", type=str, required=True, help="Top-level model directory"
+    )
+    parser.add_argument(
+        "--graphdef",
+        required=False,
+        action="store_true",
+        help="Generate GraphDef models",
+    )
+    parser.add_argument(
+        "--savedmodel",
+        required=False,
+        action="store_true",
+        help="Generate SavedModel models",
+    )
+    parser.add_argument(
+        "--tensorrt",
+        required=False,
+        action="store_true",
+        help="Generate TensorRT PLAN models",
+    )
+    parser.add_argument(
+        "--tensorrt-shape-io",
+        required=False,
+        action="store_true",
+        help="Generate TensorRT PLAN models w/ shape tensor i/o",
+    )
+    parser.add_argument(
+        "--onnx", required=False, action="store_true", help="Generate Onnx models"
+    )
+    parser.add_argument(
+        "--onnx_opset",
+        type=int,
+        required=False,
+        default=0,
+        help="Opset used for Onnx models. Default is to use ONNXRT default",
+    )
+    parser.add_argument(
+        "--libtorch",
+        required=False,
+        action="store_true",
+        help="Generate Pytorch LibTorch models",
+    )
+    parser.add_argument(
+        "--openvino",
+        required=False,
+        action="store_true",
+        help="Generate OpenVino models",
+    )
+    parser.add_argument(
+        "--variable",
+        required=False,
+        action="store_true",
+        help="Used variable-shape tensors for input/output",
+    )
+    FLAGS, unparsed = parser.parse_known_args()
+
+    if FLAGS.graphdef or FLAGS.savedmodel:
+        import tensorflow as tf
+        from tensorflow.python.framework import graph_io
+
+        tf.compat.v1.disable_eager_execution()
+    if FLAGS.tensorrt or FLAGS.tensorrt_shape_io:
+        import tensorrt as trt
+    if FLAGS.onnx:
+        import onnx
+    if FLAGS.libtorch:
+        import torch
+        from torch import nn
+    if FLAGS.openvino:
+        from openvino.inference_engine import IENetwork
+        import ngraph as ng
+
+    import test_util as tu
+
+    if FLAGS.tensorrt_shape_io:
+        create_shape_tensor_models(
+            FLAGS.models_dir,
+            np.float32,
+            [
+                -1,
+            ],
+        )
+    else:
+        # Tests with models that accept fixed-shape input/output tensors
+        if not FLAGS.variable:
+            create_models(
+                FLAGS.models_dir,
+                np.int32,
+                [
+                    1,
+                ],
+            )
+
+        # Tests with models that accept variable-shape input/output tensors
+        if FLAGS.variable:
+            create_models(
+                FLAGS.models_dir,
+                np.int32,
+                [
+                    -1,
+                ],
+                False,
+            )
diff --git a/qa/common/gen_qa_identity_models.py b/qa/common/gen_qa_identity_models.py
new file mode 100755
index 0000000000..60b045a09c
--- /dev/null
+++ b/qa/common/gen_qa_identity_models.py
@@ -0,0 +1,1302 @@
+#!/usr/bin/env python3
+
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import os
+from builtins import range
+
+import gen_ensemble_model_utils as emu
+import numpy as np
+from gen_common import (
+    np_to_model_dtype,
+    np_to_onnx_dtype,
+    np_to_tf_dtype,
+    np_to_trt_dtype,
+)
+
+FLAGS = None
+np_dtype_string = np.dtype(object)
+from typing import List, Tuple
+
+
+def create_tf_modelfile(
+    create_savedmodel, models_dir, model_version, io_cnt, max_batch, dtype, shape
+):
+    if not tu.validate_for_tf_model(dtype, dtype, dtype, shape, shape, shape):
+        return
+
+    tf_dtype = np_to_tf_dtype(dtype)
+
+    # Create the model that copies inputs to corresponding outputs.
+    tf.compat.v1.reset_default_graph()
+    for io_num in range(io_cnt):
+        input_name = "INPUT{}".format(io_num)
+        output_name = "OUTPUT{}".format(io_num)
+        if max_batch == 0:
+            tin = tf.compat.v1.placeholder(
+                tf_dtype, tu.shape_to_tf_shape(shape), input_name
+            )
+        else:
+            tin = tf.compat.v1.placeholder(
+                tf_dtype,
+                [
+                    None,
+                ]
+                + tu.shape_to_tf_shape(shape),
+                input_name,
+            )
+        toutput = tf.identity(tin, name=output_name)
+
+    # Use model name based on io_cnt and non-batching variant
+    if create_savedmodel:
+        model_name = tu.get_zero_model_name(
+            "savedmodel_nobatch" if max_batch == 0 else "savedmodel", io_cnt, dtype
+        )
+    else:
+        model_name = tu.get_zero_model_name(
+            "graphdef_nobatch" if max_batch == 0 else "graphdef", io_cnt, dtype
+        )
+
+    model_version_dir = os.path.join(models_dir, model_name, str(model_version))
+    os.makedirs(model_version_dir, exist_ok=True)
+
+    if create_savedmodel:
+        with tf.compat.v1.Session() as sess:
+            input_dict = {}
+            output_dict = {}
+            for io_num in range(io_cnt):
+                input_name = "INPUT{}".format(io_num)
+                output_name = "OUTPUT{}".format(io_num)
+                input_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name(
+                    input_name + ":0"
+                )
+                output_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name(
+                    output_name + ":0"
+                )
+                input_dict[input_name] = input_tensor
+                output_dict[output_name] = output_tensor
+            tf.compat.v1.saved_model.simple_save(
+                sess,
+                model_version_dir + "/model.savedmodel",
+                inputs=input_dict,
+                outputs=output_dict,
+            )
+    else:
+        with tf.compat.v1.Session() as sess:
+            graph_io.write_graph(
+                sess.graph.as_graph_def(),
+                model_version_dir,
+                "model.graphdef",
+                as_text=False,
+            )
+
+
+def create_tf_modelconfig(
+    create_savedmodel, models_dir, model_version, io_cnt, max_batch, dtype, shape
+):
+    if not tu.validate_for_tf_model(dtype, dtype, dtype, shape, shape, shape):
+        return
+
+    shape_str = tu.shape_to_dims_str(shape)
+
+    # Use a different model name for the non-batching variant
+    if create_savedmodel:
+        model_name = tu.get_zero_model_name(
+            "savedmodel_nobatch" if max_batch == 0 else "savedmodel", io_cnt, dtype
+        )
+    else:
+        model_name = tu.get_zero_model_name(
+            "graphdef_nobatch" if max_batch == 0 else "graphdef", io_cnt, dtype
+        )
+
+    config_dir = os.path.join(models_dir, model_name)
+    config = """
+name: "{}"
+platform: "{}"
+max_batch_size: {}
+""".format(
+        model_name,
+        "tensorflow_savedmodel" if create_savedmodel else "tensorflow_graphdef",
+        max_batch,
+    )
+
+    for io_num in range(io_cnt):
+        config += """
+input [
+  {{
+    name: "INPUT{}"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+output [
+  {{
+    name: "OUTPUT{}"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+""".format(
+            io_num,
+            np_to_model_dtype(dtype),
+            shape_str,
+            io_num,
+            np_to_model_dtype(dtype),
+            shape_str,
+        )
+
+    os.makedirs(config_dir, exist_ok=True)
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+def create_ensemble_modelfile(
+    create_savedmodel, models_dir, model_version, io_cnt, max_batch, dtype, shape
+):
+    if not tu.validate_for_ensemble_model(
+        "zero", dtype, dtype, dtype, shape, shape, shape
+    ):
+        return
+
+    emu.create_identity_ensemble_modelfile(
+        "zero",
+        models_dir,
+        model_version,
+        max_batch,
+        dtype,
+        [shape] * io_cnt,
+        [shape] * io_cnt,
+    )
+
+
+def create_ensemble_modelconfig(
+    create_savedmodel, models_dir, model_version, io_cnt, max_batch, dtype, shape
+):
+    if not tu.validate_for_ensemble_model(
+        "zero", dtype, dtype, dtype, shape, shape, shape
+    ):
+        return
+
+    emu.create_identity_ensemble_modelconfig(
+        "zero",
+        models_dir,
+        model_version,
+        max_batch,
+        dtype,
+        [shape] * io_cnt,
+        [shape] * io_cnt,
+        [shape] * io_cnt,
+        [shape] * io_cnt,
+    )
+
+
+def create_onnx_modelfile(
+    create_savedmodel, models_dir, model_version, io_cnt, max_batch, dtype, shape
+):
+    if not tu.validate_for_onnx_model(dtype, dtype, dtype, shape, shape, shape):
+        return
+
+    onnx_dtype = np_to_onnx_dtype(dtype)
+
+    # Create the model
+    model_name = tu.get_zero_model_name(
+        "onnx_nobatch" if max_batch == 0 else "onnx", io_cnt, dtype
+    )
+    model_version_dir = os.path.join(models_dir, model_name, str(model_version))
+
+    batch_dim = [] if max_batch == 0 else [None]
+
+    onnx_nodes = []
+    onnx_inputs = []
+    onnx_outputs = []
+    idx = 0
+    for io_num in range(io_cnt):
+        # Repeat so that the variable dimension name is different
+        in_shape, idx = tu.shape_to_onnx_shape(shape, idx)
+        out_shape, idx = tu.shape_to_onnx_shape(shape, idx)
+        in_name = "INPUT{}".format(io_num)
+        out_name = "OUTPUT{}".format(io_num)
+
+        onnx_inputs.append(
+            onnx.helper.make_tensor_value_info(
+                in_name, onnx_dtype, batch_dim + in_shape
+            )
+        )
+        onnx_outputs.append(
+            onnx.helper.make_tensor_value_info(
+                out_name, onnx_dtype, batch_dim + out_shape
+            )
+        )
+        onnx_nodes.append(onnx.helper.make_node("Identity", [in_name], [out_name]))
+
+    graph_proto = onnx.helper.make_graph(
+        onnx_nodes, model_name, onnx_inputs, onnx_outputs
+    )
+    if FLAGS.onnx_opset > 0:
+        model_opset = onnx.helper.make_operatorsetid("", FLAGS.onnx_opset)
+        model_def = onnx.helper.make_model(
+            graph_proto, producer_name="triton", opset_imports=[model_opset]
+        )
+    else:
+        model_def = onnx.helper.make_model(graph_proto, producer_name="triton")
+
+    os.makedirs(model_version_dir, exist_ok=True)
+
+    onnx.save(model_def, model_version_dir + "/model.onnx")
+
+
+def create_onnx_modelconfig(
+    create_savedmodel, models_dir, model_version, io_cnt, max_batch, dtype, shape
+):
+    if not tu.validate_for_onnx_model(dtype, dtype, dtype, shape, shape, shape):
+        return
+
+    # Use a different model name for the non-batching variant
+    model_name = tu.get_zero_model_name(
+        "onnx_nobatch" if max_batch == 0 else "onnx", io_cnt, dtype
+    )
+    config_dir = os.path.join(models_dir, model_name)
+
+    config = emu.create_general_modelconfig(
+        model_name,
+        "onnxruntime_onnx",
+        max_batch,
+        emu.repeat(dtype, io_cnt),
+        emu.repeat(shape, io_cnt),
+        emu.repeat(shape, io_cnt),
+        emu.repeat(dtype, io_cnt),
+        emu.repeat(shape, io_cnt),
+        emu.repeat(shape, io_cnt),
+        emu.repeat(None, io_cnt),
+        force_tensor_number_suffix=True,
+    )
+
+    os.makedirs(config_dir, exist_ok=True)
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+def create_libtorch_modelfile(
+    create_savedmodel, models_dir, model_version, io_cnt, max_batch, dtype, shape
+):
+    if not tu.validate_for_libtorch_model(
+        dtype, dtype, dtype, shape, shape, shape, max_batch
+    ):
+        return
+
+    model_name = tu.get_zero_model_name(
+        "libtorch_nobatch" if max_batch == 0 else "libtorch", io_cnt, dtype
+    )
+
+    # Create the model
+    if io_cnt == 1:
+        if dtype == np_dtype_string:
+
+            class IdentityNet(nn.Module):
+                def __init__(self):
+                    super(IdentityNet, self).__init__()
+
+                def forward(self, input0: List[str]) -> List[str]:
+                    return input0
+
+        else:
+
+            class IdentityNet(nn.Module):
+                def __init__(self):
+                    super(IdentityNet, self).__init__()
+
+                def forward(self, input0):
+                    return input0
+
+    elif io_cnt == 2:
+        if dtype == np_dtype_string:
+
+            class IdentityNet(nn.Module):
+                def __init__(self):
+                    super(IdentityNet, self).__init__()
+
+                def forward(
+                    self, input0: List[str], input1: List[str]
+                ) -> Tuple[List[str], List[str]]:
+                    return input0, input1
+
+        else:
+
+            class IdentityNet(nn.Module):
+                def __init__(self):
+                    super(IdentityNet, self).__init__()
+
+                def forward(self, input0, input1):
+                    return input0, input1
+
+    elif io_cnt == 3:
+        if dtype == np_dtype_string:
+
+            class IdentityNet(nn.Module):
+                def __init__(self):
+                    super(IdentityNet, self).__init__()
+
+                def forward(
+                    self, input0: List[str], input1: List[str], input2: List[str]
+                ) -> Tuple[List[str], List[str], List[str]]:
+                    return input0, input1, input2
+
+        else:
+
+            class IdentityNet(nn.Module):
+                def __init__(self):
+                    super(IdentityNet, self).__init__()
+
+                def forward(self, input0, input1, input2):
+                    return input0, input1, input2
+
+    elif io_cnt == 4:
+        if dtype == np_dtype_string:
+
+            class IdentityNet(nn.Module):
+                def __init__(self):
+                    super(IdentityNet, self).__init__()
+
+                def forward(
+                    self,
+                    input0: List[str],
+                    input1: List[str],
+                    input2: List[str],
+                    input3: List[str],
+                ) -> Tuple[List[str], List[str], List[str], List[str]]:
+                    return input0, input1, input2, input3
+
+        else:
+
+            class IdentityNet(nn.Module):
+                def __init__(self):
+                    super(IdentityNet, self).__init__()
+
+                def forward(self, input0, input1, input2, input3):
+                    return input0, input1, input2, input3
+
+    identityModel = IdentityNet()
+    traced = torch.jit.script(identityModel)
+
+    model_version_dir = os.path.join(models_dir, model_name, str(model_version))
+    os.makedirs(model_version_dir, exist_ok=True)
+
+    traced.save(model_version_dir + "/model.pt")
+
+
+def create_libtorch_modelconfig(
+    create_savedmodel, models_dir, model_version, io_cnt, max_batch, dtype, shape
+):
+    if not tu.validate_for_libtorch_model(
+        dtype, dtype, dtype, shape, shape, shape, max_batch
+    ):
+        return
+
+    # Unpack version policy
+    version_policy_str = "{ latest { num_versions: 1 }}"
+
+    # Use a different model name for the non-batching variant
+    model_name = tu.get_zero_model_name(
+        "libtorch_nobatch" if max_batch == 0 else "libtorch", io_cnt, dtype
+    )
+    shape_str = tu.shape_to_dims_str(shape)
+
+    config_dir = os.path.join(models_dir, model_name)
+    config = """
+name: "{}"
+platform: "pytorch_libtorch"
+max_batch_size: {}
+version_policy: {}
+""".format(
+        model_name, max_batch, version_policy_str
+    )
+
+    for io_num in range(io_cnt):
+        config += """
+input [
+  {{
+    name: "INPUT__{}"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+output [
+  {{
+    name: "OUTPUT__{}"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+""".format(
+            io_num,
+            np_to_model_dtype(dtype),
+            shape_str,
+            io_num,
+            np_to_model_dtype(dtype),
+            shape_str,
+        )
+
+    os.makedirs(config_dir, exist_ok=True)
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+def create_libtorch_linalg_modelfile(create_savedmodel, models_dir, model_version):
+    model_name = "libtorch_float32_linalg"
+
+    # To test the linalg library, this script uses two inverse matrix operations
+    # to return the original input.
+    class IdentityNet(nn.Module):
+        def __init__(self, ref_pts):
+            super(IdentityNet, self).__init__()
+            ref_pts = torch.as_tensor(ref_pts)
+            self.register_buffer("ref_pts", ref_pts)
+
+        def forward(self, src: torch.Tensor):
+            X = torch.linalg.tensorsolve(self.ref_pts, src)
+            Y = torch.tensordot(self.ref_pts, X, dims=X.ndim)
+            return Y
+
+    ref_pts = torch.eye(2 * 3 * 4).reshape(2 * 3, 4, 2, 3, 4)
+    identityModel = IdentityNet(ref_pts)
+    traced = torch.jit.script(identityModel)
+
+    model_version_dir = os.path.join(models_dir, model_name, str(model_version))
+    os.makedirs(model_version_dir, exist_ok=True)
+
+    traced.save(model_version_dir + "/model.pt")
+
+
+def create_libtorch_linalg_modelconfig(create_savedmodel, models_dir, model_version):
+    # Unpack version policy
+    version_policy_str = "{ latest { num_versions: 1 }}"
+
+    model_name = "libtorch_float32_linalg"
+    dtype = np.float32
+    io_cnt = 1
+    max_batch = 0
+    shape = [6, 4]
+    shape_str = tu.shape_to_dims_str(shape)
+
+    config_dir = os.path.join(models_dir, model_name)
+    config = """
+name: "{}"
+platform: "pytorch_libtorch"
+max_batch_size: {}
+version_policy: {}
+""".format(
+        model_name, max_batch, version_policy_str
+    )
+
+    for io_num in range(io_cnt):
+        config += """
+input [
+  {{
+    name: "INPUT__{}"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+output [
+  {{
+    name: "OUTPUT__{}"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+""".format(
+            io_num,
+            np_to_model_dtype(dtype),
+            shape_str,
+            io_num,
+            np_to_model_dtype(dtype),
+            shape_str,
+        )
+
+    os.makedirs(config_dir, exist_ok=True)
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+def create_openvino_modelfile(
+    models_dir, model_version, io_cnt, max_batch, dtype, shape
+):
+    batch_dim = (
+        []
+        if max_batch == 0
+        else [
+            max_batch,
+        ]
+    )
+    if not tu.validate_for_openvino_model(
+        dtype, dtype, dtype, batch_dim + shape, batch_dim + shape, batch_dim + shape
+    ):
+        return
+
+    # Create the model
+    model_name = tu.get_zero_model_name(
+        "openvino_nobatch" if max_batch == 0 else "openvino", io_cnt, dtype
+    )
+    model_version_dir = os.path.join(models_dir, model_name, str(model_version))
+
+    openvino_inputs = []
+    openvino_outputs = []
+    for io_num in range(io_cnt):
+        in_name = "INPUT{}".format(io_num)
+        out_name = "OUTPUT{}".format(io_num)
+        openvino_inputs.append(
+            ng.parameter(shape=batch_dim + shape, dtype=dtype, name=in_name)
+        )
+        openvino_outputs.append(ng.result(openvino_inputs[io_num], name=out_name))
+
+    function = ng.impl.Function(openvino_outputs, openvino_inputs, model_name)
+    ie_network = IENetwork(ng.impl.Function.to_capsule(function))
+
+    os.makedirs(model_version_dir, exist_ok=True)
+
+    ie_network.serialize(
+        model_version_dir + "/model.xml", model_version_dir + "/model.bin"
+    )
+
+
+def create_openvino_modelconfig(
+    models_dir, model_version, io_cnt, max_batch, dtype, shape
+):
+    batch_dim = (
+        []
+        if max_batch == 0
+        else [
+            max_batch,
+        ]
+    )
+    if not tu.validate_for_openvino_model(
+        dtype, dtype, dtype, batch_dim + shape, batch_dim + shape, batch_dim + shape
+    ):
+        return
+
+    # Unpack version policy
+    version_policy_str = "{ latest { num_versions: 1 }}"
+
+    # Use a different model name for the non-batching variant
+    model_name = tu.get_zero_model_name(
+        "openvino_nobatch" if max_batch == 0 else "openvino", io_cnt, dtype
+    )
+    shape_str = tu.shape_to_dims_str(shape)
+
+    config_dir = os.path.join(models_dir, model_name)
+    config = """
+name: "{}"
+backend: "openvino"
+max_batch_size: {}
+version_policy: {}
+""".format(
+        model_name, max_batch, version_policy_str
+    )
+
+    for io_num in range(io_cnt):
+        config += """
+input [
+  {{
+    name: "INPUT__{}"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+output [
+  {{
+    name: "OUTPUT__{}"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+""".format(
+            io_num,
+            np_to_model_dtype(dtype),
+            shape_str,
+            io_num,
+            np_to_model_dtype(dtype),
+            shape_str,
+        )
+
+    os.makedirs(config_dir, exist_ok=True)
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+def create_plan_modelfile(
+    create_savedmodel,
+    models_dir,
+    model_version,
+    io_cnt,
+    max_batch,
+    dtype,
+    shape,
+    profile_max_size,
+):
+    if not tu.validate_for_trt_model(dtype, dtype, dtype, shape, shape, shape):
+        return
+
+    # generate models with different configuration to ensure test coverage
+    if dtype != np.float32:
+        create_plan_dynamic_rf_modelfile(
+            models_dir, model_version, io_cnt, max_batch, dtype, shape, profile_max_size
+        )
+    else:
+        create_plan_dynamic_modelfile(
+            models_dir, model_version, io_cnt, max_batch, dtype, shape, profile_max_size
+        )
+
+
+def create_plan_dynamic_rf_modelfile(
+    models_dir, model_version, io_cnt, max_batch, dtype, shape, profile_max_size
+):
+    # Create the model
+    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    builder = trt.Builder(TRT_LOGGER)
+    network = builder.create_network(
+        1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    )
+    if max_batch == 0:
+        shape_with_batchsize = [i for i in shape]
+    else:
+        shape_with_batchsize = [-1] + [i for i in shape]
+
+    trt_dtype = np_to_trt_dtype(dtype)
+    trt_memory_format = trt.TensorFormat.LINEAR
+    for io_num in range(io_cnt):
+        in_node = network.add_input(
+            "INPUT{}".format(io_num), trt_dtype, shape_with_batchsize
+        )
+        in_node.allowed_formats = 1 << int(trt_memory_format)
+
+        out_node = network.add_identity(in_node)
+
+        out_node.get_output(0).name = "OUTPUT{}".format(io_num)
+        out_node.get_output(0).dtype = trt_dtype
+        network.mark_output(out_node.get_output(0))
+        out_node.get_output(0).allowed_formats = 1 << int(trt_memory_format)
+
+        if trt_dtype == trt.int8:
+            in_node.dynamic_range = (-128.0, 127.0)
+            out_node.get_output(0).dynamic_range = (-128.0, 127.0)
+
+    min_shape = []
+    opt_shape = []
+    max_shape = []
+    if max_batch != 0:
+        min_shape = min_shape + [1]
+        opt_shape = opt_shape + [max(1, max_batch)]
+        max_shape = max_shape + [max(1, max_batch)]
+    for i in shape:
+        if i == -1:
+            # Generating a very generous optimization profile
+            min_shape = min_shape + [1]
+            opt_shape = opt_shape + [8]
+            max_shape = max_shape + [profile_max_size]
+        else:
+            min_shape = min_shape + [i]
+            opt_shape = opt_shape + [i]
+            max_shape = max_shape + [i]
+
+    profile = builder.create_optimization_profile()
+    for io_num in range(io_cnt):
+        profile.set_shape("INPUT{}".format(io_num), min_shape, opt_shape, max_shape)
+
+    flags = 1 << int(trt.BuilderFlag.STRICT_TYPES)
+    datatype_set = set([trt_dtype])
+    for dt in datatype_set:
+        if dt == trt.int8:
+            flags |= 1 << int(trt.BuilderFlag.INT8)
+        elif dt == trt.float16:
+            flags |= 1 << int(trt.BuilderFlag.FP16)
+    config = builder.create_builder_config()
+    config.flags = flags
+    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
+    config.add_optimization_profile(profile)
+    try:
+        engine_bytes = builder.build_serialized_network(network, config)
+    except AttributeError:
+        engine = builder.build_engine(network, config)
+        engine_bytes = engine.serialize()
+        del engine
+
+    model_name = tu.get_zero_model_name(
+        "plan_nobatch" if max_batch == 0 else "plan", io_cnt, dtype
+    )
+    model_version_dir = os.path.join(models_dir, model_name, str(model_version))
+    os.makedirs(model_version_dir, exist_ok=True)
+
+    with open(model_version_dir + "/model.plan", "wb") as f:
+        f.write(engine_bytes)
+
+
+def create_plan_shape_tensor_modelfile(
+    models_dir, model_version, io_cnt, max_batch, dtype, shape, profile_max_size
+):
+    # Note that resize layer does not support int tensors.
+    # The model takes two inputs (INPUT and DUMMY_INPUT)
+    # and produce two outputs.
+    # OUTPUT : The shape of resized output 'DUMMY_OUTPUT'.
+    # DUMMY_OUTPUT : Obtained after resizing 'DUMMY_INPUT'
+    # to shape specified in 'INPUT'.
+    # Note that values of OUTPUT tensor must be identical
+    # to INPUT values
+
+    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    builder = trt.Builder(TRT_LOGGER)
+    network = builder.create_network(
+        1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    )
+    if max_batch == 0:
+        shape_with_batchsize = len(shape)
+        dummy_shape = [-1] * shape_with_batchsize
+    else:
+        shape_with_batchsize = len(shape) + 1
+        dummy_shape = [-1] * shape_with_batchsize
+
+    trt_dtype = np_to_trt_dtype(dtype)
+    trt_memory_format = trt.TensorFormat.LINEAR
+    for io_num in range(io_cnt):
+        in_node = network.add_input(
+            "INPUT{}".format(io_num), trt.int32, [shape_with_batchsize]
+        )
+        in_node.allowed_formats = 1 << int(trt_memory_format)
+        dummy_in_node = network.add_input(
+            "DUMMY_INPUT{}".format(io_num), trt_dtype, dummy_shape
+        )
+        dummy_in_node.allowed_formats = 1 << int(trt_memory_format)
+        resize_layer = network.add_resize(dummy_in_node)
+        resize_layer.set_input(1, in_node)
+        out_node = network.add_shape(resize_layer.get_output(0))
+
+        dummy_out_node = resize_layer.get_output(0)
+        out_node.get_output(0).name = "OUTPUT{}".format(io_num)
+
+        dummy_out_node.name = "DUMMY_OUTPUT{}".format(io_num)
+
+        dummy_out_node.dtype = trt_dtype
+        network.mark_output(dummy_out_node)
+        dummy_out_node.allowed_formats = 1 << int(trt_memory_format)
+
+        out_node.get_output(0).dtype = trt.int32
+        network.mark_output_for_shapes(out_node.get_output(0))
+        out_node.get_output(0).allowed_formats = 1 << int(trt_memory_format)
+
+        if trt_dtype == trt.int8:
+            in_node.dynamic_range = (-128.0, 127.0)
+            out_node.get_output(0).dynamic_range = (-128.0, 127.0)
+
+    config = builder.create_builder_config()
+
+    min_prefix = []
+    opt_prefix = []
+    max_prefix = []
+
+    if max_batch != 0:
+        min_prefix = [1]
+        opt_prefix = [max(1, max_batch)]
+        max_prefix = [max(1, max_batch)]
+
+    min_shape = min_prefix + [1] * len(shape)
+    opt_shape = opt_prefix + [8] * len(shape)
+    max_shape = max_prefix + [profile_max_size] * len(shape)
+
+    profile = builder.create_optimization_profile()
+    for io_num in range(io_cnt):
+        profile.set_shape_input(
+            "INPUT{}".format(io_num), min_shape, opt_shape, max_shape
+        )
+        profile.set_shape(
+            "DUMMY_INPUT{}".format(io_num), min_shape, opt_shape, max_shape
+        )
+
+    config.add_optimization_profile(profile)
+
+    flags = 1 << int(trt.BuilderFlag.STRICT_TYPES)
+    datatype_set = set([trt_dtype])
+    for dt in datatype_set:
+        if dt == trt.int8:
+            flags |= 1 << int(trt.BuilderFlag.INT8)
+        elif dt == trt.float16:
+            flags |= 1 << int(trt.BuilderFlag.FP16)
+    config.flags = flags
+
+    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
+    try:
+        engine_bytes = builder.build_serialized_network(network, config)
+    except AttributeError:
+        engine = builder.build_engine(network, config)
+        engine_bytes = engine.serialize()
+        del engine
+
+    model_name = tu.get_zero_model_name(
+        "plan_nobatch" if max_batch == 0 else "plan", io_cnt, dtype
+    )
+    model_version_dir = os.path.join(models_dir, model_name, str(model_version))
+    os.makedirs(model_version_dir, exist_ok=True)
+
+    with open(model_version_dir + "/model.plan", "wb") as f:
+        f.write(engine_bytes)
+
+
+def create_plan_dynamic_modelfile(
+    models_dir, model_version, io_cnt, max_batch, dtype, shape, profile_max_size
+):
+    # Create the model
+    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    builder = trt.Builder(TRT_LOGGER)
+    network = builder.create_network(
+        1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    )
+    if max_batch == 0:
+        shape_with_batchsize = [i for i in shape]
+    else:
+        shape_with_batchsize = [-1] + [i for i in shape]
+
+    trt_dtype = np_to_trt_dtype(dtype)
+    for io_num in range(io_cnt):
+        in_node = network.add_input(
+            "INPUT{}".format(io_num), trt_dtype, shape_with_batchsize
+        )
+        out_node = network.add_identity(in_node)
+        out_node.get_output(0).name = "OUTPUT{}".format(io_num)
+        network.mark_output(out_node.get_output(0))
+
+    min_shape = []
+    opt_shape = []
+    max_shape = []
+    if max_batch != 0:
+        min_shape = min_shape + [1]
+        opt_shape = opt_shape + [max(1, max_batch)]
+        max_shape = max_shape + [max(1, max_batch)]
+    for i in shape:
+        if i == -1:
+            # Generating a very generous optimization profile
+            min_shape = min_shape + [1]
+            opt_shape = opt_shape + [8]
+            max_shape = max_shape + [profile_max_size]
+        else:
+            min_shape = min_shape + [i]
+            opt_shape = opt_shape + [i]
+            max_shape = max_shape + [i]
+
+    profile = builder.create_optimization_profile()
+    for io_num in range(io_cnt):
+        profile.set_shape("INPUT{}".format(io_num), min_shape, opt_shape, max_shape)
+    config = builder.create_builder_config()
+    config.add_optimization_profile(profile)
+    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
+    if FLAGS.tensorrt_compat:
+        config.set_flag(trt.BuilderFlag.VERSION_COMPATIBLE)
+    try:
+        engine_bytes = builder.build_serialized_network(network, config)
+    except AttributeError:
+        engine = builder.build_engine(network, config)
+        engine_bytes = engine.serialize()
+        del engine
+
+    model_name_base = "plan"
+    if max_batch == 0:
+        model_name_base += "_nobatch"
+    if FLAGS.tensorrt_compat:
+        model_name_base += "_compatible"
+
+    model_name = tu.get_zero_model_name(model_name_base, io_cnt, dtype)
+    model_version_dir = os.path.join(models_dir, model_name, str(model_version))
+    os.makedirs(model_version_dir, exist_ok=True)
+
+    with open(model_version_dir + "/model.plan", "wb") as f:
+        f.write(engine_bytes)
+
+
+def create_plan_modelconfig(
+    create_savedmodel, models_dir, model_version, io_cnt, max_batch, dtype, shape
+):
+    if not tu.validate_for_trt_model(dtype, dtype, dtype, shape, shape, shape):
+        return
+
+    shape_str = tu.shape_to_dims_str(shape)
+
+    model_name_base = "plan"
+    if max_batch == 0:
+        model_name_base += "_nobatch"
+    if FLAGS.tensorrt_compat:
+        model_name_base += "_compatible"
+    model_name = tu.get_zero_model_name(model_name_base, io_cnt, dtype)
+    config_dir = os.path.join(models_dir, model_name)
+
+    if FLAGS.tensorrt_shape_io:
+        shape_tensor_dim = len(shape)
+        config = """
+name: "{}"
+platform: "tensorrt_plan"
+max_batch_size: {}
+""".format(
+            model_name, max_batch
+        )
+
+        for io_num in range(io_cnt):
+            config += """
+input [
+  {{
+    name: "DUMMY_INPUT{}"
+    data_type: {}
+    dims: [ {} ]
+  }},
+  {{
+    name: "INPUT{}"
+    data_type: TYPE_INT32
+    dims: [ {} ]
+    is_shape_tensor: true
+  }}
+]
+output [
+  {{
+    name: "DUMMY_OUTPUT{}"
+    data_type: {}
+    dims: [ {} ]
+  }},
+  {{
+    name: "OUTPUT{}"
+    data_type: TYPE_INT32
+    dims: [ {} ]
+    is_shape_tensor: true
+  }}
+]
+""".format(
+                io_num,
+                np_to_model_dtype(dtype),
+                shape_str,
+                io_num,
+                shape_tensor_dim,
+                io_num,
+                np_to_model_dtype(dtype),
+                shape_str,
+                io_num,
+                shape_tensor_dim,
+            )
+
+    else:
+        config = """
+name: "{}"
+platform: "tensorrt_plan"
+max_batch_size: {}
+""".format(
+            model_name, max_batch
+        )
+
+        for io_num in range(io_cnt):
+            config += """
+input [
+  {{
+    name: "INPUT{}"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+output [
+  {{
+    name: "OUTPUT{}"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+""".format(
+                io_num,
+                np_to_model_dtype(dtype),
+                shape_str,
+                io_num,
+                np_to_model_dtype(dtype),
+                shape_str,
+            )
+
+    os.makedirs(config_dir, exist_ok=True)
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+def create_shape_tensor_models(models_dir, dtype, shape, io_cnt=1, no_batch=True):
+    model_version = 1
+
+    create_plan_modelconfig(True, models_dir, model_version, io_cnt, 8, dtype, shape)
+    create_plan_shape_tensor_modelfile(
+        models_dir, model_version, io_cnt, 8, dtype, shape, 32
+    )
+    if no_batch:
+        create_plan_modelconfig(
+            True, models_dir, model_version, io_cnt, 0, dtype, shape
+        )
+        create_plan_shape_tensor_modelfile(
+            models_dir, model_version, io_cnt, 0, dtype, shape, 32
+        )
+
+
+def create_models(models_dir, dtype, shape, io_cnt=1, no_batch=True):
+    model_version = 1
+
+    if FLAGS.graphdef:
+        create_tf_modelconfig(False, models_dir, model_version, io_cnt, 8, dtype, shape)
+        create_tf_modelfile(False, models_dir, model_version, io_cnt, 8, dtype, shape)
+        if no_batch:
+            create_tf_modelconfig(
+                False, models_dir, model_version, io_cnt, 0, dtype, shape
+            )
+            create_tf_modelfile(
+                False, models_dir, model_version, io_cnt, 0, dtype, shape
+            )
+
+    if FLAGS.savedmodel:
+        create_tf_modelconfig(True, models_dir, model_version, io_cnt, 8, dtype, shape)
+        create_tf_modelfile(True, models_dir, model_version, io_cnt, 8, dtype, shape)
+        if no_batch:
+            create_tf_modelconfig(
+                True, models_dir, model_version, io_cnt, 0, dtype, shape
+            )
+            create_tf_modelfile(
+                True, models_dir, model_version, io_cnt, 0, dtype, shape
+            )
+
+    if FLAGS.onnx:
+        create_onnx_modelconfig(
+            True, models_dir, model_version, io_cnt, 8, dtype, shape
+        )
+        create_onnx_modelfile(True, models_dir, model_version, io_cnt, 8, dtype, shape)
+        if no_batch:
+            create_onnx_modelconfig(
+                True, models_dir, model_version, io_cnt, 0, dtype, shape
+            )
+            create_onnx_modelfile(
+                True, models_dir, model_version, io_cnt, 0, dtype, shape
+            )
+
+    if FLAGS.openvino:
+        create_openvino_modelconfig(models_dir, model_version, io_cnt, 8, dtype, shape)
+        create_openvino_modelfile(models_dir, model_version, io_cnt, 8, dtype, shape)
+        if no_batch:
+            create_openvino_modelconfig(
+                models_dir, model_version, io_cnt, 0, dtype, shape
+            )
+            create_openvino_modelfile(
+                models_dir, model_version, io_cnt, 0, dtype, shape
+            )
+
+    if FLAGS.libtorch:
+        create_libtorch_modelconfig(
+            True, models_dir, model_version, io_cnt, 8, dtype, shape
+        )
+        create_libtorch_modelfile(
+            True, models_dir, model_version, io_cnt, 8, dtype, shape
+        )
+        if no_batch:
+            create_libtorch_modelconfig(
+                True, models_dir, model_version, io_cnt, 0, dtype, shape
+            )
+            create_libtorch_modelfile(
+                True, models_dir, model_version, io_cnt, 0, dtype, shape
+            )
+
+    if FLAGS.tensorrt or FLAGS.tensorrt_compat:
+        create_plan_modelconfig(
+            True, models_dir, model_version, io_cnt, 8, dtype, shape
+        )
+        create_plan_modelfile(
+            True, models_dir, model_version, io_cnt, 8, dtype, shape, 32
+        )
+        if no_batch:
+            create_plan_modelconfig(
+                True, models_dir, model_version, io_cnt, 0, dtype, shape
+            )
+            create_plan_modelfile(
+                True, models_dir, model_version, io_cnt, 0, dtype, shape, 32
+            )
+
+    if FLAGS.tensorrt_big:
+        create_plan_modelconfig(
+            True, models_dir, model_version, io_cnt, 8, dtype, shape
+        )
+        create_plan_modelfile(
+            True, models_dir, model_version, io_cnt, 8, dtype, shape, 16 * 1024 * 1024
+        )
+        if no_batch:
+            create_plan_modelconfig(
+                True, models_dir, model_version, io_cnt, 0, dtype, shape
+            )
+            create_plan_modelfile(
+                True,
+                models_dir,
+                model_version,
+                io_cnt,
+                0,
+                dtype,
+                shape,
+                16 * 1024 * 1024,
+            )
+
+    if FLAGS.ensemble:
+        emu.create_nop_modelconfig(models_dir, shape, dtype)
+        create_ensemble_modelconfig(
+            True, models_dir, model_version, io_cnt, 8, dtype, shape
+        )
+        create_ensemble_modelfile(
+            True, models_dir, model_version, io_cnt, 8, dtype, shape
+        )
+        if no_batch:
+            create_ensemble_modelconfig(
+                True, models_dir, model_version, io_cnt, 0, dtype, shape
+            )
+            create_ensemble_modelfile(
+                True, models_dir, model_version, io_cnt, 0, dtype, shape
+            )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--models_dir", type=str, required=True, help="Top-level model directory"
+    )
+    parser.add_argument(
+        "--graphdef",
+        required=False,
+        action="store_true",
+        help="Generate GraphDef models",
+    )
+    parser.add_argument(
+        "--savedmodel",
+        required=False,
+        action="store_true",
+        help="Generate SavedModel models",
+    )
+    parser.add_argument(
+        "--onnx",
+        required=False,
+        action="store_true",
+        help="Generate Onnx Runtime Onnx models",
+    )
+    parser.add_argument(
+        "--onnx_opset",
+        type=int,
+        required=False,
+        default=0,
+        help="Opset used for Onnx models. Default is to use ONNXRT default",
+    )
+    parser.add_argument(
+        "--libtorch",
+        required=False,
+        action="store_true",
+        help="Generate Pytorch LibTorch models",
+    )
+    parser.add_argument(
+        "--openvino",
+        required=False,
+        action="store_true",
+        help="Generate OpenVino models",
+    )
+    parser.add_argument(
+        "--tensorrt",
+        required=False,
+        action="store_true",
+        help="Generate TensorRT PLAN models",
+    )
+    parser.add_argument(
+        "--tensorrt-big",
+        required=False,
+        action="store_true",
+        help="Generate TensorRT PLAN models w/ opt profile with large max",
+    )
+    parser.add_argument(
+        "--tensorrt-compat",
+        required=False,
+        action="store_true",
+        help="Generate TensorRT version-compatible models",
+    )
+    parser.add_argument(
+        "--tensorrt-shape-io",
+        required=False,
+        action="store_true",
+        help="Generate TensorRT PLAN models w/ shape tensor i/o",
+    )
+    parser.add_argument(
+        "--ensemble",
+        required=False,
+        action="store_true",
+        help="Generate ensemble models",
+    )
+    FLAGS, unparsed = parser.parse_known_args()
+
+    if FLAGS.graphdef or FLAGS.savedmodel:
+        import tensorflow as tf
+        from tensorflow.python.framework import graph_io
+
+        tf.compat.v1.disable_eager_execution()
+    if FLAGS.onnx:
+        import onnx
+    if FLAGS.libtorch:
+        import torch
+        from torch import nn
+    if (
+        FLAGS.tensorrt
+        or FLAGS.tensorrt_big
+        or FLAGS.tensorrt_compat
+        or FLAGS.tensorrt_shape_io
+    ):
+        import tensorrt as trt
+    if FLAGS.openvino:
+        from openvino.inference_engine import IENetwork
+        import ngraph as ng
+
+    import test_util as tu
+
+    # Create models with variable-sized input and output. For big
+    # and version-compatible TensorRT models, only create the one
+    # needed for testing.
+    if FLAGS.tensorrt_big:
+        create_models(FLAGS.models_dir, np.float32, [-1], io_cnt=1)
+    elif FLAGS.tensorrt_compat:
+        create_models(FLAGS.models_dir, np.float32, [-1], io_cnt=1, no_batch=False)
+    elif FLAGS.tensorrt_shape_io:
+        create_shape_tensor_models(FLAGS.models_dir, np.float32, [-1, -1], io_cnt=1)
+    else:
+        create_models(FLAGS.models_dir, bool, [-1], io_cnt=1)
+        create_models(FLAGS.models_dir, np.float32, [-1], io_cnt=1)
+        create_models(FLAGS.models_dir, np.float32, [-1], io_cnt=3)
+        create_models(FLAGS.models_dir, np.float16, [-1, -1], io_cnt=1)
+        create_models(FLAGS.models_dir, np.float16, [-1, -1], io_cnt=3)
+        create_models(FLAGS.models_dir, np_dtype_string, [-1], io_cnt=1)
+        create_models(FLAGS.models_dir, np_dtype_string, [-1, -1], io_cnt=3)
+
+    # Create libtorch linalg model
+    if FLAGS.libtorch:
+        model_version = 1
+        create_libtorch_linalg_modelconfig(True, FLAGS.models_dir, model_version)
+        create_libtorch_linalg_modelfile(True, FLAGS.models_dir, model_version)
diff --git a/qa/common/gen_qa_implicit_models.py b/qa/common/gen_qa_implicit_models.py
new file mode 100755
index 0000000000..89872c3b92
--- /dev/null
+++ b/qa/common/gen_qa_implicit_models.py
@@ -0,0 +1,1588 @@
+#!/usr/bin/env python3
+
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import os
+from typing import List, Tuple
+
+import gen_ensemble_model_utils as emu
+import numpy as np
+from gen_common import (
+    np_to_model_dtype,
+    np_to_onnx_dtype,
+    np_to_torch_dtype,
+    np_to_trt_dtype,
+)
+
+FLAGS = None
+np_dtype_string = np.dtype(object)
+
+
+def create_onnx_modelfile_wo_initial_state(
+    models_dir, model_version, max_batch, dtype, shape
+):
+    if not tu.validate_for_onnx_model(dtype, dtype, dtype, shape, shape, shape):
+        return
+
+    model_name = tu.get_sequence_model_name(
+        "onnx_nobatch" if max_batch == 0 else "onnx", dtype
+    )
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    onnx_dtype = np_to_onnx_dtype(dtype)
+    onnx_control_dtype = onnx_dtype
+    onnx_input_shape, idx = tu.shape_to_onnx_shape(shape, 0)
+    onnx_output_shape, idx = tu.shape_to_onnx_shape(shape, idx)
+
+    # If the input is a string then use int32 for operation and just
+    # cast to/from string for input and output.
+    if onnx_dtype == onnx.TensorProto.STRING:
+        onnx_control_dtype = onnx.TensorProto.INT32
+
+    # If input dtype is bool, then use bool type for control and
+    # int32 type for input/output
+    if onnx_dtype == onnx.TensorProto.BOOL:
+        onnx_dtype = onnx.TensorProto.INT32
+
+    batch_dim = [] if max_batch == 0 else [None]
+
+    onnx_input = onnx.helper.make_tensor_value_info(
+        "INPUT", onnx_dtype, batch_dim + onnx_input_shape
+    )
+    onnx_input_state = onnx.helper.make_tensor_value_info(
+        "INPUT_STATE", onnx_dtype, batch_dim + onnx_input_shape
+    )
+    onnx_start = onnx.helper.make_tensor_value_info(
+        "START", onnx_control_dtype, batch_dim + [1]
+    )
+    onnx_ready = onnx.helper.make_tensor_value_info(
+        "READY", onnx_control_dtype, batch_dim + [1]
+    )
+    onnx_output = onnx.helper.make_tensor_value_info(
+        "OUTPUT", onnx_dtype, batch_dim + onnx_output_shape
+    )
+    onnx_output_state = onnx.helper.make_tensor_value_info(
+        "OUTPUT_STATE", onnx_dtype, batch_dim + onnx_output_shape
+    )
+
+    internal_input = onnx.helper.make_node("Identity", ["INPUT"], ["_INPUT"])
+    internal_input_state = onnx.helper.make_node(
+        "Identity", ["INPUT_STATE"], ["_INPUT_STATE"]
+    )
+    # cast int8, int16 input to higher precision int as Onnx Add/Sub operator doesn't support those type
+    if (onnx_dtype == onnx.TensorProto.INT8) or (onnx_dtype == onnx.TensorProto.INT16):
+        internal_input = onnx.helper.make_node(
+            "Cast", ["INPUT"], ["_INPUT"], to=onnx.TensorProto.INT32
+        )
+        internal_input_state = onnx.helper.make_node(
+            "Cast", ["INPUT_STATE"], ["_INPUT_STATE"], to=onnx.TensorProto.INT32
+        )
+
+    # Convert boolean value to int32 value
+    if onnx_control_dtype == onnx.TensorProto.BOOL:
+        if onnx_dtype != onnx.TensorProto.STRING:
+            internal_input1 = onnx.helper.make_node(
+                "Cast", ["START"], ["_START"], to=onnx.TensorProto.INT32
+            )
+            internal_input2 = onnx.helper.make_node(
+                "Cast", ["READY"], ["_READY"], to=onnx.TensorProto.INT32
+            )
+            not_start_cast = onnx.helper.make_node(
+                "Not", ["START"], ["_NOT_START_CAST"]
+            )
+            not_start = onnx.helper.make_node(
+                "Cast", ["_NOT_START_CAST"], ["_NOT_START"], to=onnx.TensorProto.INT32
+            )
+            not_ready_cast = onnx.helper.make_node(
+                "Not", ["START"], ["_NOT_READY_CAST"]
+            )
+            not_ready = onnx.helper.make_node(
+                "Cast", ["_NOT_READY_CAST"], ["_NOT_READY"], to=onnx.TensorProto.INT32
+            )
+            input_state_cond = onnx.helper.make_node(
+                "And", ["READY", "_NOT_START_CAST"], ["input_state_cond"]
+            )
+            input_state_cond_cast = onnx.helper.make_node(
+                "Cast",
+                ["input_state_cond"],
+                ["input_state_cond_cast"],
+                to=onnx.TensorProto.INT32,
+            )
+            mul_state = onnx.helper.make_node(
+                "Mul", ["_INPUT_STATE", "input_state_cond_cast"], ["mul_state"]
+            )
+            add = onnx.helper.make_node("Add", ["_INPUT", "mul_state"], ["CAST"])
+
+    else:
+        if onnx_dtype != onnx.TensorProto.STRING:
+            start_cast = onnx.helper.make_node(
+                "Cast", ["START"], ["_START_CAST"], to=onnx.TensorProto.BOOL
+            )
+            not_start_cast = onnx.helper.make_node(
+                "Not", ["_START_CAST"], ["_NOT_START_CAST"]
+            )
+            not_start = onnx.helper.make_node(
+                "Cast", ["_NOT_START_CAST"], ["_NOT_START"], to=onnx.TensorProto.INT32
+            )
+
+            ready_cast = onnx.helper.make_node(
+                "Cast", ["READY"], ["_READY_CAST"], to=onnx.TensorProto.BOOL
+            )
+            not_ready_cast = onnx.helper.make_node(
+                "Not", ["_READY_CAST"], ["_NOT_READY_CAST"]
+            )
+            not_ready = onnx.helper.make_node(
+                "Cast", ["_NOT_READY_CAST"], ["_NOT_READY"], to=onnx.TensorProto.INT32
+            )
+            # Take advantage of knowledge that the READY false value is 0 and true is 1
+            input_state_cond = onnx.helper.make_node(
+                "And", ["_NOT_START_CAST", "_READY_CAST"], ["input_state_cond"]
+            )
+            input_state_cond_cast = onnx.helper.make_node(
+                "Cast",
+                ["input_state_cond"],
+                ["input_state_cond_cast"],
+                to=onnx.TensorProto.INT32,
+            )
+            mul_state = onnx.helper.make_node(
+                "Mul", ["_INPUT_STATE", "input_state_cond_cast"], ["mul_state"]
+            )
+            add = onnx.helper.make_node("Add", ["_INPUT", "mul_state"], ["CAST"])
+
+    if onnx_dtype == onnx.TensorProto.STRING:
+        cast = onnx.helper.make_node("Identity", ["_INPUT"], ["OUTPUT"])
+        cast_output_state = onnx.helper.make_node(
+            "Identity", ["_INPUT"], ["OUTPUT_STATE"]
+        )
+    elif onnx_dtype == onnx.TensorProto.FLOAT16:
+        # Avoid cast from float16 to float16
+        # (bug in Onnx Runtime, cast from float16 to float16 will become cast from float16 to float32)
+        cast = onnx.helper.make_node("Identity", ["CAST"], ["OUTPUT"])
+        cast_output_state = onnx.helper.make_node(
+            "Identity", ["CAST"], ["OUTPUT_STATE"]
+        )
+    else:
+        cast = onnx.helper.make_node("Cast", ["CAST"], ["OUTPUT"], to=onnx_dtype)
+        cast_output_state = onnx.helper.make_node(
+            "Cast", ["CAST"], ["OUTPUT_STATE"], to=onnx_dtype
+        )
+
+    if onnx_control_dtype == onnx.TensorProto.BOOL:
+        if onnx_dtype != onnx.TensorProto.STRING:
+            onnx_nodes = [
+                internal_input,
+                internal_input_state,
+                internal_input1,
+                internal_input2,
+                not_start_cast,
+                not_start,
+                not_ready_cast,
+                not_ready,
+                input_state_cond,
+                input_state_cond_cast,
+                mul_state,
+                add,
+                cast,
+                cast_output_state,
+            ]
+        else:
+            onnx_nodes = [internal_input, internal_input_state, cast, cast_output_state]
+    else:
+        if onnx_dtype != onnx.TensorProto.STRING:
+            onnx_nodes = [
+                internal_input,
+                internal_input_state,
+                start_cast,
+                not_start_cast,
+                not_start,
+                ready_cast,
+                not_ready_cast,
+                not_ready,
+                input_state_cond,
+                input_state_cond_cast,
+                mul_state,
+                add,
+                cast,
+                cast_output_state,
+            ]
+        else:
+            onnx_nodes = [internal_input, internal_input_state, cast, cast_output_state]
+
+    onnx_inputs = [onnx_input_state, onnx_input, onnx_start, onnx_ready]
+    onnx_outputs = [onnx_output, onnx_output_state]
+    graph_proto = onnx.helper.make_graph(
+        onnx_nodes, model_name, onnx_inputs, onnx_outputs
+    )
+
+    if FLAGS.onnx_opset > 0:
+        model_opset = onnx.helper.make_operatorsetid("", FLAGS.onnx_opset)
+        model_def = onnx.helper.make_model(
+            graph_proto, producer_name="triton", opset_imports=[model_opset]
+        )
+    else:
+        model_def = onnx.helper.make_model(graph_proto, producer_name="triton")
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    onnx.save(model_def, model_version_dir + "/model.onnx")
+
+
+def create_onnx_modelfile_with_initial_state(
+    models_dir, model_version, max_batch, dtype, shape
+):
+    if not tu.validate_for_onnx_model(dtype, dtype, dtype, shape, shape, shape):
+        return
+
+    model_name = tu.get_sequence_model_name(
+        "onnx_nobatch" if max_batch == 0 else "onnx", dtype
+    )
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    onnx_dtype = np_to_onnx_dtype(dtype)
+    onnx_control_dtype = onnx_dtype
+    onnx_input_shape, idx = tu.shape_to_onnx_shape(shape, 0)
+    onnx_output_shape, idx = tu.shape_to_onnx_shape(shape, idx)
+
+    # If the input is a string then use int32 for operation and just
+    # cast to/from string for input and output.
+    if onnx_dtype == onnx.TensorProto.STRING:
+        onnx_control_dtype = onnx.TensorProto.INT32
+
+    # If input dtype is bool, then use bool type for control and
+    # int32 type for input/output
+    if onnx_dtype == onnx.TensorProto.BOOL:
+        onnx_dtype = onnx.TensorProto.INT32
+
+    batch_dim = [] if max_batch == 0 else [None]
+
+    onnx_input = onnx.helper.make_tensor_value_info(
+        "INPUT", onnx_dtype, batch_dim + onnx_input_shape
+    )
+    onnx_input_state = onnx.helper.make_tensor_value_info(
+        "INPUT_STATE", onnx_dtype, batch_dim + onnx_input_shape
+    )
+    onnx_start = onnx.helper.make_tensor_value_info(
+        "START", onnx_control_dtype, batch_dim + [1]
+    )
+    onnx_ready = onnx.helper.make_tensor_value_info(
+        "READY", onnx_control_dtype, batch_dim + [1]
+    )
+    onnx_output = onnx.helper.make_tensor_value_info(
+        "OUTPUT", onnx_dtype, batch_dim + onnx_output_shape
+    )
+    onnx_output_state = onnx.helper.make_tensor_value_info(
+        "OUTPUT_STATE", onnx_dtype, batch_dim + onnx_output_shape
+    )
+
+    internal_input = onnx.helper.make_node("Identity", ["INPUT"], ["_INPUT"])
+    internal_input_state = onnx.helper.make_node(
+        "Identity", ["INPUT_STATE"], ["_INPUT_STATE"]
+    )
+    # cast int8, int16 input to higher precision int as Onnx Add/Sub operator doesn't support those type
+    if (onnx_dtype == onnx.TensorProto.INT8) or (onnx_dtype == onnx.TensorProto.INT16):
+        internal_input = onnx.helper.make_node(
+            "Cast", ["INPUT"], ["_INPUT"], to=onnx.TensorProto.INT32
+        )
+        internal_input_state = onnx.helper.make_node(
+            "Cast", ["INPUT_STATE"], ["_INPUT_STATE"], to=onnx.TensorProto.INT32
+        )
+
+    if onnx_dtype == onnx.TensorProto.STRING:
+        identity = onnx.helper.make_node("Identity", ["_INPUT"], ["OUTPUT"])
+        identity_output_state = onnx.helper.make_node(
+            "Identity", ["_INPUT"], ["OUTPUT_STATE"]
+        )
+        onnx_nodes = [
+            internal_input,
+            internal_input_state,
+            identity,
+            identity_output_state,
+        ]
+    else:
+        add = onnx.helper.make_node("Add", ["_INPUT", "_INPUT_STATE"], ["CAST"])
+        cast = onnx.helper.make_node("Cast", ["CAST"], ["OUTPUT"], to=onnx_dtype)
+        cast_output_state = onnx.helper.make_node(
+            "Cast", ["CAST"], ["OUTPUT_STATE"], to=onnx_dtype
+        )
+        # Avoid cast from float16 to float16
+        # (bug in Onnx Runtime, cast from float16 to float16 will become cast from float16 to float32)
+        if onnx_dtype == onnx.TensorProto.FLOAT16:
+            cast = onnx.helper.make_node("Identity", ["CAST"], ["OUTPUT"])
+            cast_output_state = onnx.helper.make_node(
+                "Identity", ["CAST"], ["OUTPUT_STATE"]
+            )
+        onnx_nodes = [
+            internal_input,
+            internal_input_state,
+            add,
+            cast,
+            cast_output_state,
+        ]
+
+    onnx_inputs = [onnx_input_state, onnx_input, onnx_start, onnx_ready]
+    onnx_outputs = [onnx_output, onnx_output_state]
+    graph_proto = onnx.helper.make_graph(
+        onnx_nodes, model_name, onnx_inputs, onnx_outputs
+    )
+
+    if FLAGS.onnx_opset > 0:
+        model_opset = onnx.helper.make_operatorsetid("", FLAGS.onnx_opset)
+        model_def = onnx.helper.make_model(
+            graph_proto, producer_name="triton", opset_imports=[model_opset]
+        )
+    else:
+        model_def = onnx.helper.make_model(graph_proto, producer_name="triton")
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    onnx.save(model_def, model_version_dir + "/model.onnx")
+
+
+def create_onnx_modelfile(
+    models_dir, model_version, max_batch, dtype, shape, initial_state
+):
+    if initial_state is None:
+        create_onnx_modelfile_wo_initial_state(
+            models_dir, model_version, max_batch, dtype, shape
+        )
+    else:
+        # This model assumes that the initial state contains correct data
+        create_onnx_modelfile_with_initial_state(
+            models_dir, model_version, max_batch, dtype, shape
+        )
+
+
+def create_libtorch_modelfile_wo_initial_state(
+    models_dir, model_version, max_batch, dtype, shape
+):
+    if not tu.validate_for_libtorch_model(dtype, dtype, dtype, shape, shape, shape):
+        return
+
+    torch_dtype = np_to_torch_dtype(dtype)
+    # If input dtype is bool, then use bool type for control and
+    # int32 type for input/output
+    if torch_dtype == torch.bool:
+        torch_dtype = torch.int32
+
+    model_name = tu.get_sequence_model_name(
+        "libtorch_nobatch" if max_batch == 0 else "libtorch", dtype
+    )
+
+    if torch_dtype == List[str]:
+
+        class SequenceNet(nn.Module):
+            def __init__(self):
+                super(SequenceNet, self).__init__()
+
+            def forward(
+                self, input0: List[str], input0_state: List[str], start0, ready0
+            ) -> Tuple[List[str], List[str]]:
+                use_state = torch.logical_and(ready0, torch.logical_not(start0))
+
+                input0_state_int = torch.tensor(
+                    [int("0" + i) for i in input0_state], device=use_state.device
+                )
+                input0_int = torch.tensor(
+                    [int("0" + i) for i in input0], device=use_state.device
+                )
+                result_int = torch.mul(use_state, input0_state_int)
+                result_int += input0_int
+                result = [str(i.item()) for i in result_int.cpu()]
+                return result, result
+
+    else:
+
+        class SequenceNet(nn.Module):
+            def __init__(self):
+                super(SequenceNet, self).__init__()
+
+            def forward(self, input0, input0_state, start0, ready0):
+                use_state = torch.logical_and(ready0, torch.logical_not(start0))
+
+                result = torch.mul(use_state, input0_state)
+                result += input0
+                return result, result
+
+    traced = torch.jit.script(SequenceNet())
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    traced.save(model_version_dir + "/model.pt")
+
+
+def create_libtorch_modelfile_with_initial_state(
+    models_dir, model_version, max_batch, dtype, shape
+):
+    if not tu.validate_for_libtorch_model(dtype, dtype, dtype, shape, shape, shape):
+        return
+
+    torch_dtype = np_to_torch_dtype(dtype)
+
+    # If input dtype is bool, then use bool type for control and
+    # int32 type for input/output
+    if torch_dtype == torch.bool:
+        torch_dtype = torch.int32
+
+    model_name = tu.get_sequence_model_name(
+        "libtorch_nobatch" if max_batch == 0 else "libtorch", dtype
+    )
+    # handle for -1 (when variable) since can't create tensor with shape of [-1]
+    if torch_dtype == List[str]:
+
+        class SequenceNet(nn.Module):
+            def __init__(self):
+                super(SequenceNet, self).__init__()
+
+            def forward(
+                self, input0: List[str], input0_state: List[str], start0, ready0
+            ) -> Tuple[List[str], List[str]]:
+                input0_state_int = torch.tensor(
+                    [int("0" + i) for i in input0_state], device=start0.device
+                )
+                input0_int = torch.tensor(
+                    [int("0" + i) for i in input0], device=start0.device
+                )
+                result_int = (input0_state_int + input0_int).cpu()
+                result = [str(i.item()) for i in result_int]
+                return result, result
+
+    else:
+
+        class SequenceNet(nn.Module):
+            def __init__(self):
+                super(SequenceNet, self).__init__()
+
+            def forward(self, input0, input0_state, start0, ready0):
+                result = input0_state + input0
+                return result, result
+
+    sequenceModel = SequenceNet()
+
+    traced = torch.jit.script(sequenceModel)
+
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    traced.save(model_version_dir + "/model.pt")
+
+
+def create_libtorch_modelfile(
+    models_dir, model_version, max_batch, dtype, shape, initial_state
+):
+    if initial_state is None:
+        create_libtorch_modelfile_wo_initial_state(
+            models_dir, model_version, max_batch, dtype, shape
+        )
+    else:
+        # This model assumes that the initial state contains correct data
+        create_libtorch_modelfile_with_initial_state(
+            models_dir, model_version, max_batch, dtype, shape
+        )
+
+
+def create_libtorch_modelconfig(
+    models_dir, model_version, max_batch, dtype, shape, initial_state
+):
+    if not tu.validate_for_libtorch_model(dtype, dtype, dtype, shape, shape, shape):
+        return
+
+    model_name = tu.get_sequence_model_name(
+        "libtorch_nobatch" if max_batch == 0 else "libtorch", dtype
+    )
+    config_dir = models_dir + "/" + model_name
+
+    if dtype == np.float32:
+        control_type = "fp32"
+    elif dtype == bool:
+        control_type = "bool"
+        dtype = np.int32
+    else:
+        control_type = "int32"
+
+    instance_group_string = """
+instance_group [
+  {
+    kind: KIND_GPU
+  }
+]
+"""
+
+    config = f"""
+name: "{model_name}"
+platform: "pytorch_libtorch"
+max_batch_size: {max_batch}
+
+input [
+  {{
+    name: "INPUT__0"
+    data_type: {emu.dtype_str(dtype)}
+    dims: [ {tu.shape_to_dims_str(shape)} ]
+  }}
+]
+output [
+  {{
+    name: "OUTPUT__0"
+    data_type: {emu.dtype_str(dtype)}
+    dims: [ {tu.shape_to_dims_str(shape)} ]
+  }}
+]
+"""
+    config += instance_group_string
+
+    # Prepare the shapes for initial state initialization
+    shape_without_variable_dims = []
+    for dim in shape:
+        if dim == -1:
+            shape_without_variable_dims.append(1)
+        else:
+            shape_without_variable_dims.append(dim)
+
+    if initial_state is None:
+        config += """
+    sequence_batching {{
+      max_sequence_idle_microseconds: 5000000
+      control_input [
+        {{
+          name: "START__2"
+          control [
+            {{
+              kind: CONTROL_SEQUENCE_START
+              {type}_false_true: [ 0, 1 ]
+            }}
+          ]
+        }},
+        {{
+          name: "READY__3"
+          control [
+            {{
+              kind: CONTROL_SEQUENCE_READY
+              {type}_false_true: [ 0, 1 ]
+            }}
+          ]
+        }}
+      ]
+      state [
+        {{
+          input_name: "INPUT_STATE__1"
+          output_name: "OUTPUT_STATE__1"
+          data_type: {dtype}
+          dims: {dims}
+        }}
+      ]
+    }}
+    """.format(
+            type=control_type,
+            dims=tu.shape_to_dims_str(shape),
+            dtype=emu.dtype_str(dtype),
+        )
+    elif initial_state == "zero":
+        config += f"""
+    sequence_batching {{
+      max_sequence_idle_microseconds: 5000000
+      control_input [
+        {{
+          name: "START__2"
+          control [
+            {{
+              kind: CONTROL_SEQUENCE_START
+              {control_type}_false_true: [ 0, 1 ]
+            }}
+          ]
+        }},
+        {{
+          name: "READY__3"
+          control [
+            {{
+              kind: CONTROL_SEQUENCE_READY
+              {control_type}_false_true: [ 0, 1 ]
+            }}
+          ]
+        }}
+      ]
+      state [
+        {{
+          input_name: "INPUT_STATE__1"
+          output_name: "OUTPUT_STATE__1"
+          data_type: {emu.dtype_str(dtype)}
+          dims: {tu.shape_to_dims_str(shape)}
+          initial_state: {{
+              name: "state init"
+              data_type: {emu.dtype_str(dtype)}
+              dims: {tu.shape_to_dims_str(shape_without_variable_dims)}
+              zero_data: true
+          }}
+        }}
+      ]
+    }}
+    """
+    elif initial_state == "file":
+        config += """
+    sequence_batching {{
+      max_sequence_idle_microseconds: 5000000
+      control_input [
+        {{
+          name: "START__2"
+          control [
+            {{
+              kind: CONTROL_SEQUENCE_START
+              {type}_false_true: [ 0, 1 ]
+            }}
+          ]
+        }},
+        {{
+          name: "READY__3"
+          control [
+            {{
+              kind: CONTROL_SEQUENCE_READY
+              {type}_false_true: [ 0, 1 ]
+            }}
+          ]
+        }}
+      ]
+      state [
+        {{
+          input_name: "INPUT_STATE_1"
+          output_name: "OUTPUT_STATE_1"
+          data_type: {dtype}
+          dims: {dims}
+          initial_state: {{
+              name: "state init"
+              data_type: {dtype}
+              dims: {shape_without_variable_dims}
+              data_file: input_state_data
+          }}
+        }}
+      ]
+    }}
+    """.format(
+            type=control_type,
+            dims=tu.shape_to_dims_str(shape),
+            dtype=emu.dtype_str(dtype),
+            shape_without_variable_dims=tu.shape_to_dims_str(
+                shape_without_variable_dims
+            ),
+        )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+def create_onnx_modelconfig(
+    models_dir, model_version, max_batch, dtype, shape, initial_state
+):
+    if not tu.validate_for_onnx_model(dtype, dtype, dtype, shape, shape, shape):
+        return
+
+    model_name = tu.get_sequence_model_name(
+        "onnx_nobatch" if max_batch == 0 else "onnx", dtype
+    )
+    config_dir = models_dir + "/" + model_name
+
+    if dtype == np.float32:
+        control_type = "fp32"
+    elif dtype == bool:
+        control_type = "bool"
+        dtype = np.int32
+    else:
+        control_type = "int32"
+
+    instance_group_string = """
+instance_group [
+  {
+    kind: KIND_GPU
+  }
+]
+"""
+
+    # [TODO] move create_general_modelconfig() out of emu as it is general
+    # enough for all backends to use
+    config = emu.create_general_modelconfig(
+        model_name,
+        "onnxruntime_onnx",
+        max_batch,
+        [dtype],
+        [shape],
+        [None],
+        [dtype],
+        [shape],
+        [None],
+        [None],
+        force_tensor_number_suffix=False,
+        instance_group_str=instance_group_string,
+    )
+
+    # Prepare the shapes for initial state initialization
+    shape_without_variable_dims = []
+    for dim in shape:
+        if dim == -1:
+            shape_without_variable_dims.append(1)
+        else:
+            shape_without_variable_dims.append(dim)
+
+    if initial_state is None:
+        config += """
+    sequence_batching {{
+      max_sequence_idle_microseconds: 5000000
+      control_input [
+        {{
+          name: "START"
+          control [
+            {{
+              kind: CONTROL_SEQUENCE_START
+              {type}_false_true: [ 0, 1 ]
+            }}
+          ]
+        }},
+        {{
+          name: "READY"
+          control [
+            {{
+              kind: CONTROL_SEQUENCE_READY
+              {type}_false_true: [ 0, 1 ]
+            }}
+          ]
+        }}
+      ]
+      state [
+        {{
+          input_name: "INPUT_STATE"
+          output_name: "OUTPUT_STATE"
+          data_type: {dtype}
+          dims: {dims}
+        }}
+      ]
+    }}
+    """.format(
+            type=control_type,
+            dims=tu.shape_to_dims_str(shape),
+            dtype=emu.dtype_str(dtype),
+        )
+    elif initial_state == "zero":
+        config += f"""
+    sequence_batching {{
+      max_sequence_idle_microseconds: 5000000
+      control_input [
+        {{
+          name: "START"
+          control [
+            {{
+              kind: CONTROL_SEQUENCE_START
+              {control_type}_false_true: [ 0, 1 ]
+            }}
+          ]
+        }},
+        {{
+          name: "READY"
+          control [
+            {{
+              kind: CONTROL_SEQUENCE_READY
+              {control_type}_false_true: [ 0, 1 ]
+            }}
+          ]
+        }}
+      ]
+      state [
+        {{
+          input_name: "INPUT_STATE"
+          output_name: "OUTPUT_STATE"
+          data_type: {emu.dtype_str(dtype)}
+          dims: {tu.shape_to_dims_str(shape)}
+          initial_state: {{
+              name: "state init"
+              data_type: {emu.dtype_str(dtype)}
+              dims: {tu.shape_to_dims_str(shape_without_variable_dims)}
+              zero_data: true
+          }}
+        }}
+      ]
+    }}
+    """
+    elif initial_state == "file":
+        config += """
+    sequence_batching {{
+      max_sequence_idle_microseconds: 5000000
+      control_input [
+        {{
+          name: "START"
+          control [
+            {{
+              kind: CONTROL_SEQUENCE_START
+              {type}_false_true: [ 0, 1 ]
+            }}
+          ]
+        }},
+        {{
+          name: "READY"
+          control [
+            {{
+              kind: CONTROL_SEQUENCE_READY
+              {type}_false_true: [ 0, 1 ]
+            }}
+          ]
+        }}
+      ]
+      state [
+        {{
+          input_name: "INPUT_STATE"
+          output_name: "OUTPUT_STATE"
+          data_type: {dtype}
+          dims: {dims}
+          initial_state: {{
+              name: "state init"
+              data_type: {dtype}
+              dims: {shape_without_variable_dims}
+              data_file: input_state_data
+          }}
+        }}
+      ]
+    }}
+    """.format(
+            type=control_type,
+            dims=tu.shape_to_dims_str(shape),
+            dtype=emu.dtype_str(dtype),
+            shape_without_variable_dims=tu.shape_to_dims_str(
+                shape_without_variable_dims
+            ),
+        )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+def create_plan_fixed_modelfile(models_dir, model_version, max_batch, dtype, shape):
+    trt_dtype = np_to_trt_dtype(dtype)
+    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    builder = trt.Builder(TRT_LOGGER)
+    network = builder.create_network()
+    in0 = network.add_input("INPUT", trt_dtype, shape)
+    in_state0 = network.add_input("INPUT_STATE", trt_dtype, shape)
+    start0 = network.add_input("START", trt_dtype, [1 for i in shape])
+    network.add_input("READY", trt_dtype, [1 for i in shape])
+    constant_1_data = trt.Weights(np.ones([1 for i in shape], dtype=dtype))
+    constant_1 = network.add_constant([1 for i in shape], constant_1_data)
+    not_start = network.add_elementwise(
+        constant_1.get_output(0), start0, trt.ElementWiseOperation.SUB
+    )
+    not_start.set_output_type(0, trt_dtype)
+    internal_state = network.add_elementwise(
+        in_state0, not_start.get_output(0), trt.ElementWiseOperation.PROD
+    )
+    out0 = network.add_elementwise(
+        internal_state.get_output(0), in0, trt.ElementWiseOperation.SUM
+    )
+    out0_state = network.add_elementwise(
+        internal_state.get_output(0), in0, trt.ElementWiseOperation.SUM
+    )
+
+    out0.get_output(0).name = "OUTPUT"
+    network.mark_output(out0.get_output(0))
+
+    out0_state.get_output(0).name = "OUTPUT_STATE"
+    network.mark_output(out0_state.get_output(0))
+
+    config = builder.create_builder_config()
+    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
+    builder.max_batch_size = max(1, max_batch)
+    try:
+        engine_bytes = builder.build_serialized_network(network, config)
+    except AttributeError:
+        engine = builder.build_engine(network, config)
+        engine_bytes = engine.serialize()
+        del engine
+    del network
+
+    model_name = tu.get_sequence_model_name(
+        "plan_nobatch" if max_batch == 0 else "plan", dtype
+    )
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(model_version_dir + "/model.plan", "wb") as f:
+        f.write(engine_bytes)
+
+
+def create_plan_fixed_rf_modelfile(models_dir, model_version, max_batch, dtype, shape):
+    trt_dtype = np_to_trt_dtype(dtype)
+    trt_memory_format = trt.TensorFormat.LINEAR
+
+    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    builder = trt.Builder(TRT_LOGGER)
+    network = builder.create_network()
+    in0 = network.add_input("INPUT", trt_dtype, shape)
+    in_state0 = network.add_input("INPUT_STATE", trt_dtype, shape)
+    start0 = network.add_input("START", trt_dtype, [1 for i in shape])
+    ready0 = network.add_input("READY", trt_dtype, [1 for i in shape])
+    constant_1_data = trt.Weights(np.ones([1 for i in shape], dtype=dtype))
+    constant_1 = network.add_constant([1 for i in shape], constant_1_data)
+    not_start = network.add_elementwise(
+        constant_1.get_output(0), start0, trt.ElementWiseOperation.SUB
+    )
+    not_start.set_output_type(0, trt_dtype)
+
+    internal_state = network.add_elementwise(
+        in_state0, not_start.get_output(0), trt.ElementWiseOperation.PROD
+    )
+    out0 = network.add_elementwise(
+        internal_state.get_output(0), in0, trt.ElementWiseOperation.SUM
+    )
+    out0_state = network.add_elementwise(
+        internal_state.get_output(0), in0, trt.ElementWiseOperation.SUM
+    )
+
+    out0.get_output(0).name = "OUTPUT"
+    network.mark_output(out0.get_output(0))
+    out0.get_output(0).dtype = trt_dtype
+
+    out0_state.get_output(0).name = "OUTPUT_STATE"
+    network.mark_output(out0_state.get_output(0))
+    out0_state.get_output(0).dtype = trt_dtype
+
+    in0.allowed_formats = 1 << int(trt_memory_format)
+    in_state0.allowed_formats = 1 << int(trt_memory_format)
+    start0.allowed_formats = 1 << int(trt_memory_format)
+    ready0.allowed_formats = 1 << int(trt_memory_format)
+    out0.get_output(0).allowed_formats = 1 << int(trt_memory_format)
+    out0_state.get_output(0).allowed_formats = 1 << int(trt_memory_format)
+
+    if trt_dtype == trt.int8:
+        in0.dynamic_range = (-128.0, 127.0)
+        in_state0.dynamic_range = (-128.0, 127.0)
+        out0.dynamic_range = (-128.0, 127.0)
+        out0_state.dynamic_range = (-128.0, 127.0)
+        start0.dynamic_range = (-128.0, 127.0)
+        ready0.dynamic_range = (-128.0, 127.0)
+
+    flags = 1 << int(trt.BuilderFlag.STRICT_TYPES)
+
+    if trt_dtype == trt.int8:
+        flags |= 1 << int(trt.BuilderFlag.INT8)
+    elif trt_dtype == trt.float16:
+        flags |= 1 << int(trt.BuilderFlag.FP16)
+
+    config = builder.create_builder_config()
+    config.flags = flags
+    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
+    builder.max_batch_size = max(1, max_batch)
+    try:
+        engine_bytes = builder.build_serialized_network(network, config)
+    except AttributeError:
+        engine = builder.build_engine(network, config)
+        engine_bytes = engine.serialize()
+        del engine
+
+    model_name = tu.get_sequence_model_name(
+        "plan_nobatch" if max_batch == 0 else "plan", dtype
+    )
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(model_version_dir + "/model.plan", "wb") as f:
+        f.write(engine_bytes)
+
+
+def create_plan_dynamic_modelfile(models_dir, model_version, max_batch, dtype, shape):
+    trt_dtype = np_to_trt_dtype(dtype)
+    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    builder = trt.Builder(TRT_LOGGER)
+
+    # EXPLICIT_BATCH must be used when the dimension is variable
+    network = builder.create_network(
+        1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    )
+
+    unit_shape = [1] * len(shape)
+    if max_batch != 0:
+        in0 = network.add_input("INPUT", trt_dtype, [-1] + shape)
+        start0 = network.add_input("START", trt_dtype, [-1] + unit_shape)
+        ready0 = network.add_input("READY", trt_dtype, [-1] + unit_shape)
+        in_state0 = network.add_input("INPUT_STATE", trt_dtype, [-1] + shape)
+    else:
+        in0 = network.add_input("INPUT", trt_dtype, shape)
+        start0 = network.add_input("START", trt_dtype, unit_shape)
+        ready0 = network.add_input("READY", trt_dtype, unit_shape)
+        in_state0 = network.add_input("INPUT_STATE", trt_dtype, shape)
+
+    # Append the dimension by 1 so that broadcasting works properly
+    constant_1_data = trt.Weights(np.ones(unit_shape + [1], dtype=dtype))
+    constant_1 = network.add_constant(unit_shape + [1], constant_1_data)
+    not_start = network.add_elementwise(
+        constant_1.get_output(0), start0, trt.ElementWiseOperation.SUB
+    )
+    not_start.set_output_type(0, trt_dtype)
+    internal_state = network.add_elementwise(
+        in_state0, not_start.get_output(0), trt.ElementWiseOperation.PROD
+    )
+    out0 = network.add_elementwise(
+        internal_state.get_output(0), in0, trt.ElementWiseOperation.SUM
+    )
+    out0_state = network.add_elementwise(
+        internal_state.get_output(0), in0, trt.ElementWiseOperation.SUM
+    )
+
+    out0.get_output(0).name = "OUTPUT"
+    network.mark_output(out0.get_output(0))
+
+    out0_state.get_output(0).name = "OUTPUT_STATE"
+    network.mark_output(out0_state.get_output(0))
+
+    min_shape = []
+    opt_shape = []
+    max_shape = []
+    if max_batch != 0:
+        min_shape = min_shape + [1]
+        opt_shape = opt_shape + [max(1, max_batch)]
+        max_shape = max_shape + [max(1, max_batch)]
+    for i in shape:
+        if i == -1:
+            min_shape = min_shape + [1]
+            opt_shape = opt_shape + [8]
+            max_shape = max_shape + [32]
+        else:
+            min_shape = min_shape + [i]
+            opt_shape = opt_shape + [i]
+            max_shape = max_shape + [i]
+
+    profile = builder.create_optimization_profile()
+    profile.set_shape("INPUT", min_shape, opt_shape, max_shape)
+    profile.set_shape("INPUT_STATE", min_shape, opt_shape, max_shape)
+    if max_batch != 0:
+        profile.set_shape(
+            "START",
+            [1] + unit_shape,
+            [max_batch] + unit_shape,
+            [max_batch] + unit_shape,
+        )
+        profile.set_shape(
+            "READY",
+            [1] + unit_shape,
+            [max_batch] + unit_shape,
+            [max_batch] + unit_shape,
+        )
+    else:
+        profile.set_shape("START", unit_shape, unit_shape, unit_shape)
+        profile.set_shape("READY", unit_shape, unit_shape, unit_shape)
+    config = builder.create_builder_config()
+    config.add_optimization_profile(profile)
+
+    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
+    try:
+        engine_bytes = builder.build_serialized_network(network, config)
+    except AttributeError:
+        engine = builder.build_engine(network, config)
+        engine_bytes = engine.serialize()
+        del engine
+
+    model_name = tu.get_sequence_model_name(
+        "plan_nobatch" if max_batch == 0 else "plan", dtype
+    )
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(model_version_dir + "/model.plan", "wb") as f:
+        f.write(engine_bytes)
+
+
+def create_plan_dynamic_rf_modelfile(
+    models_dir, model_version, max_batch, dtype, shape
+):
+    trt_dtype = np_to_trt_dtype(dtype)
+    trt_memory_format = trt.TensorFormat.LINEAR
+
+    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    builder = trt.Builder(TRT_LOGGER)
+
+    # EXPLICIT_BATCH must be used when the dimension is variable
+    network = builder.create_network(
+        1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    )
+
+    unit_shape = [1] * len(shape)
+    if max_batch != 0:
+        in0 = network.add_input("INPUT", trt_dtype, [-1] + shape)
+        start0 = network.add_input("START", trt_dtype, [-1] + unit_shape)
+        ready0 = network.add_input("READY", trt_dtype, [-1] + unit_shape)
+        in_state0 = network.add_input("INPUT_STATE", trt_dtype, [-1] + shape)
+    else:
+        in0 = network.add_input("INPUT", trt_dtype, shape)
+        start0 = network.add_input("START", trt_dtype, unit_shape)
+        ready0 = network.add_input("READY", trt_dtype, unit_shape)
+        in_state0 = network.add_input("INPUT_STATE", trt_dtype, [-1] + shape)
+
+    # Append the dimension by 1 so that broadcasting works properly
+    constant_1_data = trt.Weights(np.ones(unit_shape + [1], dtype=dtype))
+    constant_1 = network.add_constant(unit_shape + [1], constant_1_data)
+    not_start = network.add_elementwise(
+        constant_1.get_output(0), start0, trt.ElementWiseOperation.SUB
+    )
+    not_start.set_output_type(0, trt_dtype)
+    internal_state = network.add_elementwise(
+        in_state0, not_start.get_output(0), trt.ElementWiseOperation.PROD
+    )
+    out0 = network.add_elementwise(
+        internal_state.get_output(0), in0, trt.ElementWiseOperation.SUM
+    )
+    out0_state = network.add_elementwise(
+        internal_state.get_output(0), in0, trt.ElementWiseOperation.SUM
+    )
+    out0.get_output(0).name = "OUTPUT"
+    network.mark_output(out0.get_output(0))
+
+    out0_state.get_output(0).name = "OUTPUT_STATE"
+    network.mark_output(out0_state.get_output(0))
+
+    out0.get_output(0).dtype = trt_dtype
+    out0_state.get_output(0).dtype = trt_dtype
+
+    in0.allowed_formats = 1 << int(trt_memory_format)
+    start0.allowed_formats = 1 << int(trt_memory_format)
+    ready0.allowed_formats = 1 << int(trt_memory_format)
+    out0.get_output(0).allowed_formats = 1 << int(trt_memory_format)
+
+    if trt_dtype == trt.int8:
+        in0.dynamic_range = (-128.0, 127.0)
+        in_state0.dynamic_range = (-128.0, 127.0)
+        out0.dynamic_range = (-128.0, 127.0)
+        start0.dynamic_range = (-128.0, 127.0)
+        ready0.dynamic_range = (-128.0, 127.0)
+
+    flags = 1 << int(trt.BuilderFlag.STRICT_TYPES)
+
+    if trt_dtype == trt.int8:
+        flags |= 1 << int(trt.BuilderFlag.INT8)
+    elif trt_dtype == trt.float16:
+        flags |= 1 << int(trt.BuilderFlag.FP16)
+
+    min_shape = []
+    opt_shape = []
+    max_shape = []
+    if max_batch != 0:
+        min_shape = min_shape + [1]
+        opt_shape = opt_shape + [max(1, max_batch)]
+        max_shape = max_shape + [max(1, max_batch)]
+    for i in shape:
+        if i == -1:
+            min_shape = min_shape + [1]
+            opt_shape = opt_shape + [8]
+            max_shape = max_shape + [32]
+        else:
+            min_shape = min_shape + [i]
+            opt_shape = opt_shape + [i]
+            max_shape = max_shape + [i]
+
+    profile = builder.create_optimization_profile()
+    profile.set_shape("INPUT", min_shape, opt_shape, max_shape)
+    profile.set_shape("INPUT_STATE", min_shape, opt_shape, max_shape)
+    if max_batch != 0:
+        profile.set_shape(
+            "START",
+            [1] + unit_shape,
+            [max_batch] + unit_shape,
+            [max_batch] + unit_shape,
+        )
+        profile.set_shape(
+            "READY",
+            [1] + unit_shape,
+            [max_batch] + unit_shape,
+            [max_batch] + unit_shape,
+        )
+    else:
+        profile.set_shape("START", unit_shape, unit_shape, unit_shape)
+        profile.set_shape("READY", unit_shape, unit_shape, unit_shape)
+
+    config = builder.create_builder_config()
+    config.flags = flags
+    config.add_optimization_profile(profile)
+
+    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
+    try:
+        engine_bytes = builder.build_serialized_network(network, config)
+    except AttributeError:
+        engine = builder.build_engine(network, config)
+        engine_bytes = engine.serialize()
+        del engine
+
+    model_name = tu.get_sequence_model_name(
+        "plan_nobatch" if max_batch == 0 else "plan", dtype
+    )
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(model_version_dir + "/model.plan", "wb") as f:
+        f.write(engine_bytes)
+
+
+def create_plan_modelfile(models_dir, model_version, max_batch, dtype, shape):
+    if not tu.validate_for_trt_model(dtype, dtype, dtype, shape, shape, shape):
+        return
+
+    if dtype != np.float32:
+        if not tu.shape_is_fixed(shape):
+            create_plan_dynamic_rf_modelfile(
+                models_dir, model_version, max_batch, dtype, shape
+            )
+        else:
+            create_plan_fixed_rf_modelfile(
+                models_dir, model_version, max_batch, dtype, shape
+            )
+    else:
+        if not tu.shape_is_fixed(shape):
+            create_plan_dynamic_modelfile(
+                models_dir, model_version, max_batch, dtype, shape
+            )
+        else:
+            create_plan_fixed_modelfile(
+                models_dir, model_version, max_batch, dtype, shape
+            )
+
+
+def create_plan_modelconfig(models_dir, model_version, max_batch, dtype, shape):
+    if not tu.validate_for_trt_model(dtype, dtype, dtype, shape, shape, shape):
+        return
+
+    model_name = tu.get_sequence_model_name(
+        "plan_nobatch" if max_batch == 0 else "plan", dtype
+    )
+    config_dir = models_dir + "/" + model_name
+    config = """
+name: "{}"
+platform: "tensorrt_plan"
+max_batch_size: {}
+sequence_batching {{
+  max_sequence_idle_microseconds: 5000000
+  control_input [
+    {{
+      name: "START"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_START
+          {}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }},
+    {{
+      name: "READY"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_READY
+          {}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }}
+  ]
+  state [
+    {{
+      input_name: "INPUT_STATE"
+      output_name: "OUTPUT_STATE"
+      data_type: {dtype}
+      dims: {shape}
+    }}
+  ]
+}}
+input [
+  {{
+    name: "INPUT"
+    data_type: {dtype}
+    dims: [ {shape} ]
+  }}
+]
+output [
+  {{
+    name: "OUTPUT"
+    data_type: {dtype}
+    dims: [ {shape} ]
+  }}
+]
+instance_group [
+  {{
+    kind: KIND_GPU
+  }}
+]
+""".format(
+        model_name,
+        max_batch,
+        "int32" if dtype == np.int32 else "fp32",
+        "int32" if dtype == np.int32 else "fp32",
+        dtype=np_to_model_dtype(dtype),
+        shape=tu.shape_to_dims_str(shape),
+    )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+def create_models(models_dir, dtype, shape, initial_state, no_batch=True):
+    model_version = 1
+
+    if FLAGS.onnx:
+        create_onnx_modelconfig(
+            models_dir, model_version, 8, dtype, shape, initial_state
+        )
+        create_onnx_modelfile(models_dir, model_version, 8, dtype, shape, initial_state)
+        if no_batch:
+            create_onnx_modelconfig(
+                models_dir, model_version, 0, dtype, shape, initial_state
+            )
+            create_onnx_modelfile(
+                models_dir, model_version, 0, dtype, shape, initial_state
+            )
+
+    if FLAGS.tensorrt:
+        if dtype == bool:
+            return
+        suffix = []
+        if dtype == np.int8:
+            suffix = [1, 1]
+
+        create_plan_modelconfig(models_dir, model_version, 8, dtype, shape + suffix)
+        create_plan_modelfile(models_dir, model_version, 8, dtype, shape + suffix)
+        if no_batch:
+            create_plan_modelconfig(models_dir, model_version, 0, dtype, shape + suffix)
+            create_plan_modelfile(models_dir, model_version, 0, dtype, shape + suffix)
+
+    if FLAGS.libtorch:
+        suffix = []
+        if dtype == np.int8:
+            suffix = [1, 1]
+
+        create_libtorch_modelconfig(
+            models_dir, model_version, 8, dtype, shape + suffix, initial_state
+        )
+        create_libtorch_modelfile(
+            models_dir, model_version, 8, dtype, shape + suffix, initial_state
+        )
+        if no_batch:
+            create_libtorch_modelconfig(
+                models_dir, model_version, 0, dtype, shape + suffix, initial_state
+            )
+            create_libtorch_modelfile(
+                models_dir, model_version, 0, dtype, shape + suffix, initial_state
+            )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--models_dir", type=str, required=True, help="Top-level model directory"
+    )
+    parser.add_argument(
+        "--graphdef",
+        required=False,
+        action="store_true",
+        help="Generate GraphDef models",
+    )
+    parser.add_argument(
+        "--savedmodel",
+        required=False,
+        action="store_true",
+        help="Generate SavedModel models",
+    )
+    parser.add_argument(
+        "--tensorrt",
+        required=False,
+        action="store_true",
+        help="Generate TensorRT PLAN models",
+    )
+    parser.add_argument(
+        "--initial-state",
+        required=False,
+        choices=["zero", "file"],
+        help="Generate models that rely on initial state.",
+    )
+    parser.add_argument(
+        "--tensorrt-shape-io",
+        required=False,
+        action="store_true",
+        help="Generate TensorRT PLAN models w/ shape tensor i/o",
+    )
+    parser.add_argument(
+        "--onnx", required=False, action="store_true", help="Generate Onnx models"
+    )
+    parser.add_argument(
+        "--onnx_opset",
+        type=int,
+        required=False,
+        default=0,
+        help="Opset used for Onnx models. Default is to use ONNXRT default",
+    )
+    parser.add_argument(
+        "--libtorch",
+        required=False,
+        action="store_true",
+        help="Generate Pytorch LibTorch models",
+    )
+    parser.add_argument(
+        "--openvino",
+        required=False,
+        action="store_true",
+        help="Generate OpenVino models",
+    )
+    parser.add_argument(
+        "--variable",
+        required=False,
+        action="store_true",
+        help="Used variable-shape tensors for input/output",
+    )
+    parser.add_argument(
+        "--ensemble",
+        required=False,
+        action="store_true",
+        help="Generate ensemble models against the models"
+        + " in all platforms. Note that the models generated"
+        + " are not completed.",
+    )
+    FLAGS, unparsed = parser.parse_known_args()
+
+    if FLAGS.onnx:
+        import onnx
+
+    if FLAGS.tensorrt:
+        import tensorrt as trt
+
+    if FLAGS.libtorch:
+        import torch
+        from torch import nn
+
+    import test_util as tu
+
+    # Tests with models that accept fixed-shape input/output tensors
+    if not FLAGS.variable:
+        create_models(
+            FLAGS.models_dir,
+            np.float32,
+            [
+                1,
+            ],
+            FLAGS.initial_state,
+        )
+        create_models(
+            FLAGS.models_dir,
+            np.int32,
+            [
+                1,
+            ],
+            FLAGS.initial_state,
+        )
+        create_models(
+            FLAGS.models_dir,
+            np_dtype_string,
+            [
+                1,
+            ],
+            FLAGS.initial_state,
+        )
+        create_models(
+            FLAGS.models_dir,
+            bool,
+            [
+                1,
+            ],
+            FLAGS.initial_state,
+        )
+
+    # Tests with models that accept variable-shape input/output tensors
+    if FLAGS.variable:
+        create_models(
+            FLAGS.models_dir,
+            np.int32,
+            [
+                -1,
+            ],
+            FLAGS.initial_state,
+            False,
+        )
+        create_models(
+            FLAGS.models_dir,
+            np.float32,
+            [
+                -1,
+            ],
+            FLAGS.initial_state,
+            False,
+        )
+        create_models(
+            FLAGS.models_dir,
+            np_dtype_string,
+            [
+                -1,
+            ],
+            FLAGS.initial_state,
+            False,
+        )
+        create_models(
+            FLAGS.models_dir,
+            bool,
+            [
+                -1,
+            ],
+            FLAGS.initial_state,
+            False,
+        )
diff --git a/qa/common/gen_qa_model_repository b/qa/common/gen_qa_model_repository
index 55e89a9d0d..56702e3ef4 100755
--- a/qa/common/gen_qa_model_repository
+++ b/qa/common/gen_qa_model_repository
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+# Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -26,68 +26,362 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 ############################################################################
-## This script generates the model store needed by some of the
-## tensorrtserver CI tests. Generating these models requires using
+## This script generates the model repository needed by some of the
+## tritonserver CI tests. Generating these models requires using
 ## the TensorFlow and PyTorch containers.
 ##
 ## 1. Update TENSORRT_IMAGE, PYTORCH_IMAGE and TENSORFLOW_IMAGE to
-## match what is being used by the tensorrtserver release being
+## match what is being used by the tritonserver release being
 ## tested.
 ##
 ## 2. Set CUDA_DEVICE to the ID of the CUDA device present on the
 ## local system that you want to target for the generated models.
 ##
-## 3. Run this script to create a /tmp/qa_model_repository directory
-## containing all the models needed for CI testing.
+## 3. Run this script to create /tmp/qa_model_repository,
+## /tmp/qa_variable_model_repository, /tmp/qa_shapetensor_model_repository
+## /tmp/qa_identity_model_repository, /tmp/qa_identity_big_model_repository
+## /tmp/qa_reshape_model_repository, /tmp/qa_noshape_model_repository,
+## /tmp/qa_sequence_model_repository, /tmp/qa_ensemble_model_repository
+## /tmp/qa_dyna_sequence_model_repository, and
+## /tmp/qa_variable_sequence_model_repository directories containing
+## all the models needed for CI testing.
 ##
 ############################################################################
 
-PYTORCH_IMAGE=${PYTORCH_IMAGE:=nvcr.io/nvidia/pytorch:18.11-py3}
-TENSORFLOW_IMAGE=${TENSORFLOW_IMAGE:=nvcr.io/nvidia/tensorflow:18.11-py3}
-TENSORRT_IMAGE=${TENSORRT_IMAGE:=nvcr.io/nvidia/tensorrt:18.11-py3}
-CUDA_DEVICE=0
+TRITON_VERSION=${TRITON_VERSION:=24.03}
+
+# ONNX. Use ONNX_OPSET 0 to use the default for ONNX version
+ONNX_VERSION=1.13.0
+ONNX_OPSET=0
+
+# OPENVINO version
+OPENVINO_VERSION=2023.3.0
+
+UBUNTU_IMAGE=${UBUNTU_IMAGE:=ubuntu:22.04}
+PYTORCH_IMAGE=${PYTORCH_IMAGE:=nvcr.io/nvidia/pytorch:$TRITON_VERSION-py3}
+TENSORFLOW_IMAGE=${TENSORFLOW_IMAGE:=nvcr.io/nvidia/tensorflow:$TRITON_VERSION-tf2-py3}
+TENSORRT_IMAGE=${TENSORRT_IMAGE:=nvcr.io/nvidia/tensorrt:$TRITON_VERSION-py3}
+CUDA_DEVICE=${NV_GPU:=0}
+
+[[ $RUNNER_GPUS =~ ^[0-9] ]] && DOCKER_GPU_ARGS=$(eval $NV_DOCKER_ARGS) || DOCKER_GPU_ARGS="--gpus device=$CUDA_DEVICE"
 
 ###
-HOST_SRCDIR=/tmp/gen_srcdir
-HOST_DESTDIR=/tmp/qa_model_repository
+HOST_BUILD_DIR=${HOST_BUILD_DIR:=/tmp}
+HOST_SRCDIR=$HOST_BUILD_DIR/gen_srcdir
+HOST_DESTDIR=$HOST_BUILD_DIR/$TRITON_VERSION/qa_model_repository
+HOST_VARDESTDIR=$HOST_BUILD_DIR/$TRITON_VERSION/qa_variable_model_repository
+HOST_IDENTITYDESTDIR=$HOST_BUILD_DIR/$TRITON_VERSION/qa_identity_model_repository
+HOST_SIGDEFDESTDIR=$HOST_BUILD_DIR/$TRITON_VERSION/qa_tf_tag_sigdef_repository
+HOST_IDENTITYBIGDESTDIR=$HOST_BUILD_DIR/$TRITON_VERSION/qa_identity_big_model_repository
+HOST_TFPARAMETERSDESTDIR=$HOST_BUILD_DIR/$TRITON_VERSION/qa_tf_parameters_repository
+HOST_SHAPEDESTDIR=$HOST_BUILD_DIR/$TRITON_VERSION/qa_shapetensor_model_repository
+HOST_RESHAPEDESTDIR=$HOST_BUILD_DIR/$TRITON_VERSION/qa_reshape_model_repository
+HOST_SEQDESTDIR=$HOST_BUILD_DIR/$TRITON_VERSION/qa_sequence_model_repository
+HOST_DYNASEQDESTDIR=$HOST_BUILD_DIR/$TRITON_VERSION/qa_dyna_sequence_model_repository
+HOST_DYNASEQIMPLICITDESTDIR=$HOST_BUILD_DIR/$TRITON_VERSION/qa_dyna_sequence_implicit_model_repository
+HOST_VARSEQDESTDIR=$HOST_BUILD_DIR/$TRITON_VERSION/qa_variable_sequence_model_repository
+HOST_ENSEMBLEDESTDIR=$HOST_BUILD_DIR/$TRITON_VERSION/qa_ensemble_model_repository
+HOST_NOSHAPEDESTDIR=$HOST_BUILD_DIR/$TRITON_VERSION/qa_noshape_model_repository
+HOST_PLGDESTDIR=$HOST_BUILD_DIR/$TRITON_VERSION/qa_trt_plugin_model_repository
+HOST_RAGGEDDESTDIR=$HOST_BUILD_DIR/$TRITON_VERSION/qa_ragged_model_repository
+HOST_FORMATDESTDIR=$HOST_BUILD_DIR/$TRITON_VERSION/qa_trt_format_model_repository
+HOST_DATADEPENDENTDIR=$HOST_BUILD_DIR/$TRITON_VERSION/qa_trt_data_dependent_model_repository
+HOST_IMPLICITSEQDESTDIR=$HOST_BUILD_DIR/$TRITON_VERSION/qa_sequence_implicit_model_repository
+HOST_VARIMPLICITSEQDESTDIR=$HOST_BUILD_DIR/$TRITON_VERSION/qa_variable_sequence_implicit_model_repository
+HOST_INITIALSTATEIMPLICITSEQDESTDIR=$HOST_BUILD_DIR/$TRITON_VERSION/qa_sequence_initial_state_implicit_model_repository
+HOST_VARINITIALSTATEIMPLICITSEQDESTDIR=$HOST_BUILD_DIR/$TRITON_VERSION/qa_variable_sequence_initial_state_implicit_model_repository
+HOST_TORCHTRTDESTDIR=$HOST_BUILD_DIR/$TRITON_VERSION/torchtrt_model_store
+HOST_SCALARMODELSDESTDIR=$HOST_BUILD_DIR/$TRITON_VERSION/qa_scalar_models
 
-rm -fr $HOST_DESTDIR
+rm -fr $HOST_SRCDIR $HOST_DESTDIR $HOST_VARDESTDIR $HOST_TFPARAMETERSDESTDIR
+rm -fr $HOST_IDENTITYDESTDIR $HOST_IDENTITYBIGDESTDIR $HOST_SHAPEDESTDIR $HOST_SIGDEFDESTDIR
+rm -fr $HOST_SEQDESTDIR $HOST_DYNASEQDESTDIR $HOST_VARSEQDESTDIR
+rm -fr $HOST_ENSEMBLEDESTDIR $HOST_NOSHAPEDESTDIR $HOST_RESHAPEDESTDIR
+rm -fr $HOST_PLGDESTDIR $HOST_RAGGEDDESTDIR $HOST_FORMATDESTDIR $HOST_DATADEPENDENTDIR
+rm -rf $HOST_IMPLICITSEQDESTDIR $HOST_VARIMPLICITSEQDESTDIR $HOST_DYNASEQIMPLICITDESTDIR
+rm -rf $HOST_VARINITIALSTATEIMPLICITSEQDESTDIR $HOST_INITIALSTATEIMPLICITSEQDESTDIR
+rm -rf $HOST_SCALARMODELSDESTDIR
+mkdir -p $HOST_SRCDIR
 mkdir -p $HOST_DESTDIR
+mkdir -p $HOST_VARDESTDIR
+mkdir -p $HOST_IDENTITYDESTDIR
+mkdir -p $HOST_SIGDEFDESTDIR
+mkdir -p $HOST_TFPARAMETERSDESTDIR
+mkdir -p $HOST_IDENTITYBIGDESTDIR
+mkdir -p $HOST_SHAPEDESTDIR
+mkdir -p $HOST_RESHAPEDESTDIR
+mkdir -p $HOST_SEQDESTDIR
+mkdir -p $HOST_DYNASEQDESTDIR
+mkdir -p $HOST_DYNASEQIMPLICITDESTDIR
+mkdir -p $HOST_VARSEQDESTDIR
+mkdir -p $HOST_ENSEMBLEDESTDIR
+mkdir -p $HOST_NOSHAPEDESTDIR
+mkdir -p $HOST_PLGDESTDIR
+mkdir -p $HOST_RAGGEDDESTDIR
+mkdir -p $HOST_FORMATDESTDIR
+mkdir -p $HOST_DATADEPENDENTDIR
+mkdir -p $HOST_IMPLICITSEQDESTDIR
+mkdir -p $HOST_VARIMPLICITSEQDESTDIR
+mkdir -p $HOST_INITIALSTATEIMPLICITSEQDESTDIR
+mkdir -p $HOST_VARINITIALSTATEIMPLICITSEQDESTDIR
+mkdir -p $HOST_TORCHTRTDESTDIR
+mkdir -p $HOST_SCALARMODELSDESTDIR
+
+# Since the models required by ensemble models may not be available
+# at this point, storing ensemble models separately so that other qa directories
+# are proper model repositories
+mkdir -p $HOST_ENSEMBLEDESTDIR/qa_model_repository
+mkdir -p $HOST_ENSEMBLEDESTDIR/qa_variable_model_repository
+mkdir -p $HOST_ENSEMBLEDESTDIR/qa_identity_model_repository
+mkdir -p $HOST_ENSEMBLEDESTDIR/qa_reshape_model_repository
+mkdir -p $HOST_ENSEMBLEDESTDIR/qa_sequence_model_repository
+mkdir -p $HOST_ENSEMBLEDESTDIR/qa_variable_sequence_model_repository
 
 rm -fr $HOST_SRCDIR
 mkdir -p $HOST_SRCDIR
 
 cp ./gen_qa_models.py $HOST_SRCDIR/.
+cp ./gen_qa_identity_models.py $HOST_SRCDIR/.
+cp ./gen_qa_reshape_models.py $HOST_SRCDIR/.
+cp ./gen_qa_noshape_models.py $HOST_SRCDIR/.
+cp ./gen_qa_sequence_models.py $HOST_SRCDIR/.
+cp ./gen_qa_implicit_models.py $HOST_SRCDIR/.
+cp ./gen_qa_dyna_sequence_models.py $HOST_SRCDIR/.
+cp ./gen_qa_dyna_sequence_implicit_models.py $HOST_SRCDIR/.
+cp ./gen_ensemble_model_utils.py $HOST_SRCDIR/.
+cp ./gen_qa_trt_plugin_models.py $HOST_SRCDIR/.
+cp ./gen_qa_trt_format_models.py $HOST_SRCDIR/.
+cp ./gen_qa_trt_data_dependent_shape.py $HOST_SRCDIR/.
+cp ./gen_qa_torchtrt_models.py $HOST_SRCDIR/.
+cp ./gen_qa_ragged_models.py $HOST_SRCDIR/.
 cp ./test_util.py $HOST_SRCDIR/.
+cp ./gen_tag_sigdef.py $HOST_SRCDIR/.
+cp ./gen_qa_tf_parameters.py $HOST_SRCDIR/.
+cp ./gen_common.py $HOST_SRCDIR/.
+cp ./gen_qa_ort_scalar_models.py $HOST_SRCDIR/.
 
-C2SCRIPT=c2_gen.cmds
+ONNXSCRIPT=onnx_gen.cmds
+OPENVINOSCRIPT=openvino_gen.cmds
+TORCHSCRIPT=torch_gen.cmds
 TFSCRIPT=tf_gen.cmds
 TRTSCRIPT=trt_gen.cmds
 
 SRCDIR=/tmp/src
 DESTDIR=/tmp/models
+VARDESTDIR=/tmp/varmodels
+IDENTITYDESTDIR=/tmp/zeromodels
+SIGDEFDESTDIR=/tmp/sigdefmodels
+TFPARAMETERSDIR=/tmp/tfparameters
+IDENTITYBIGDESTDIR=/tmp/zerobigmodels
+SHAPEDESTDIR=/tmp/shapetensormodels
+RESHAPEDESTDIR=/tmp/reshapemodels
+SEQDESTDIR=/tmp/seqmodels
+IMPLICITSEQDESTDIR=/tmp/implicitseqmodels
+INITIALSTATEIMPLICITSEQDESTDIR=/tmp/initialstateimplicitseqmodels
+VARIMPLICITSEQDESTDIR=/tmp/varimplicitseqmodels
+VARINITIALSTATEIMPLICITSEQDESTDIR=/tmp/varinitialstateimplicitseqmodels
+DYNASEQDESTDIR=/tmp/dynaseqmodels
+DYNASEQIMPLICITDESTDIR=/tmp/dynaseqimplicitmodels
+VARSEQDESTDIR=/tmp/varseqmodels
+ENSEMBLEDESTDIR=/tmp/ensemblemodels
+NOSHAPEDESTDIR=/tmp/noshapemodels
+PLGDESTDIR=/tmp/pluginmodels
+RAGGEDDESTDIR=/tmp/raggedmodels
+FORMATDESTDIR=/tmp/formatmodels
+DATADEPENDENTDIR=/tmp/datadependentmodels
+TORCHTRTDESTDIR=/tmp/torchtrtmodels
+SCALARMODELSDESTDIR=/tmp/scalarmodels
+
+# OPENVINO
+#
+# OpenVINO is not available on ARM so skip
+if [[ "aarch64" != $(uname -m) ]] ; then
+
+cat >$HOST_SRCDIR/$OPENVINOSCRIPT <<EOF
+#!/bin/bash
+nvidia-smi -L || true
+nvidia-smi || true
+set -e
+set -x
+export DEBIAN_FRONTEND=noninteractive
+apt-get update && \
+    apt-get install -y --no-install-recommends build-essential cmake libprotobuf-dev \
+            protobuf-compiler python3 python3-dev python3-pip wget gnupg2 \
+            software-properties-common
+ln -s /usr/bin/python3 /usr/bin/python
+
+pip3 install  "numpy<=1.23.5" setuptools
+
+pip3 install openvino==$OPENVINO_VERSION
+
+# Since variable shape tensors are not allowed, identity models may fail to generate.
+# TODO Add variable size tensor models after DLIS-2827 adds support for variable shape tensors.
+# TODO Add sequence models after DLIS-2864 adds support for sequence/control inputs.
+python3 $SRCDIR/gen_qa_models.py --openvino --models_dir=$DESTDIR
+chmod -R 777 $DESTDIR
+# python3 $SRCDIR/gen_qa_identity_models.py --openvino --models_dir=$IDENTITYDESTDIR
+# chmod -R 777 $IDENTITYDESTDIR
+python3 $SRCDIR/gen_qa_reshape_models.py --openvino --models_dir=$RESHAPEDESTDIR
+chmod -R 777 $RESHAPEDESTDIR
+# python3 $SRCDIR/gen_qa_sequence_models.py --openvino --models_dir=$SEQDESTDIR
+# chmod -R 777 $SEQDESTDIR
+# python3 $SRCDIR/gen_qa_dyna_sequence_models.py --openvino --models_dir=$DYNASEQDESTDIR
+# chmod -R 777 $DYNASEQDESTDIR
+EOF
+
+chmod a+x $HOST_SRCDIR/$OPENVINOSCRIPT
+if [ $? -ne 0 ]; then
+    echo -e "Failed: chmod"
+    exit 1
+fi
+
+docker pull $UBUNTU_IMAGE
+docker run $DOCKER_GPU_ARGS --rm --entrypoint $SRCDIR/$OPENVINOSCRIPT \
+       --mount type=bind,source=$HOST_SRCDIR,target=$SRCDIR \
+       --mount type=bind,source=$HOST_DESTDIR,target=$DESTDIR \
+       --mount type=bind,source=$HOST_VARDESTDIR,target=$VARDESTDIR \
+       --mount type=bind,source=$HOST_IDENTITYDESTDIR,target=$IDENTITYDESTDIR \
+       --mount type=bind,source=$HOST_RESHAPEDESTDIR,target=$RESHAPEDESTDIR \
+       --mount type=bind,source=$HOST_SEQDESTDIR,target=$SEQDESTDIR \
+       --mount type=bind,source=$HOST_DYNASEQDESTDIR,target=$DYNASEQDESTDIR \
+       --mount type=bind,source=$HOST_VARSEQDESTDIR,target=$VARSEQDESTDIR \
+       --mount type=bind,source=$HOST_RAGGEDDESTDIR,target=$RAGGEDDESTDIR \
+       --mount type=bind,source=$HOST_SCALARMODELSDESTDIR,target=$SCALARMODELSDESTDIR \
+       $UBUNTU_IMAGE
+if [ $? -ne 0 ]; then
+    echo -e "Failed"
+    exit 1
+fi
+
+fi # [[ "aarch64" != $(uname -m) ]]
+
+# ONNX
+cat >$HOST_SRCDIR/$ONNXSCRIPT <<EOF
+#!/bin/bash
+nvidia-smi -L || true
+nvidia-smi || true
+set -e
+set -x
+export DEBIAN_FRONTEND=noninteractive
+apt-get update && \
+        apt-get install -y --no-install-recommends build-essential cmake libprotobuf-dev \
+                protobuf-compiler python3 python3-dev python3-pip
+ln -s /usr/bin/python3 /usr/bin/python
+
+pip3 install "protobuf<=3.20.1"  "numpy<=1.23.5" # TODO: Remove current line DLIS-3838
+pip3 install --upgrade onnx==${ONNX_VERSION}
+
+python3 $SRCDIR/gen_qa_models.py --onnx --onnx_opset=$ONNX_OPSET --models_dir=$DESTDIR
+chmod -R 777 $DESTDIR
+python3 $SRCDIR/gen_qa_models.py --onnx --onnx_opset=$ONNX_OPSET --variable --models_dir=$VARDESTDIR
+chmod -R 777 $VARDESTDIR
+python3 $SRCDIR/gen_qa_identity_models.py --onnx --onnx_opset=$ONNX_OPSET --models_dir=$IDENTITYDESTDIR
+chmod -R 777 $IDENTITYDESTDIR
+python3 $SRCDIR/gen_qa_reshape_models.py --onnx --onnx_opset=$ONNX_OPSET --variable --models_dir=$RESHAPEDESTDIR
+chmod -R 777 $RESHAPEDESTDIR
+python3 $SRCDIR/gen_qa_sequence_models.py --onnx --onnx_opset=$ONNX_OPSET --models_dir=$SEQDESTDIR
+chmod -R 777 $SEQDESTDIR
+python3 $SRCDIR/gen_qa_sequence_models.py --onnx --onnx_opset=$ONNX_OPSET --variable --models_dir=$VARSEQDESTDIR
+chmod -R 777 $VARSEQDESTDIR
+python3 $SRCDIR/gen_qa_implicit_models.py --onnx --initial-state zero --onnx_opset=$ONNX_OPSET --models_dir=$INITIALSTATEIMPLICITSEQDESTDIR
+chmod -R 777 $INITIALSTATEIMPLICITSEQDESTDIR
+python3 $SRCDIR/gen_qa_implicit_models.py --onnx --initial-state zero --onnx_opset=$ONNX_OPSET --variable --models_dir=$VARINITIALSTATEIMPLICITSEQDESTDIR
+chmod -R 777 $VARINITIALSTATEIMPLICITSEQDESTDIR
+python3 $SRCDIR/gen_qa_implicit_models.py --onnx --onnx_opset=$ONNX_OPSET --models_dir=$IMPLICITSEQDESTDIR
+chmod -R 777 $IMPLICITSEQDESTDIR
+python3 $SRCDIR/gen_qa_implicit_models.py --onnx --onnx_opset=$ONNX_OPSET --variable --models_dir=$VARIMPLICITSEQDESTDIR
+chmod -R 777 $VARIMPLICITSEQDESTDIR
+python3 $SRCDIR/gen_qa_dyna_sequence_models.py --onnx --onnx_opset=$ONNX_OPSET --models_dir=$DYNASEQDESTDIR
+chmod -R 777 $DYNASEQDESTDIR
+python3 $SRCDIR/gen_qa_dyna_sequence_implicit_models.py --onnx --onnx_opset=$ONNX_OPSET --models_dir=$DYNASEQIMPLICITDESTDIR
+chmod -R 777 $DYNASEQIMPLICITDESTDIR
+python3 $SRCDIR/gen_qa_ragged_models.py --onnx --onnx_opset=$ONNX_OPSET --models_dir=$RAGGEDDESTDIR
+chmod -R 777 $RAGGEDDESTDIR
+python3 $SRCDIR/gen_qa_ort_scalar_models.py --onnx_opset=$ONNX_OPSET --models_dir=$SCALARMODELSDESTDIR
+chmod -R 777 $RAGGEDDESTDIR
+EOF
+
+chmod a+x $HOST_SRCDIR/$ONNXSCRIPT
+if [ $? -ne 0 ]; then
+    echo -e "Failed: chmod"
+    exit 1
+fi
+
+docker pull $UBUNTU_IMAGE
+docker run $DOCKER_GPU_ARGS --rm --entrypoint $SRCDIR/$ONNXSCRIPT \
+       --mount type=bind,source=$HOST_SRCDIR,target=$SRCDIR \
+       --mount type=bind,source=$HOST_DESTDIR,target=$DESTDIR \
+       --mount type=bind,source=$HOST_VARDESTDIR,target=$VARDESTDIR \
+       --mount type=bind,source=$HOST_IDENTITYDESTDIR,target=$IDENTITYDESTDIR \
+       --mount type=bind,source=$HOST_RESHAPEDESTDIR,target=$RESHAPEDESTDIR \
+       --mount type=bind,source=$HOST_SEQDESTDIR,target=$SEQDESTDIR \
+       --mount type=bind,source=$HOST_DYNASEQDESTDIR,target=$DYNASEQDESTDIR \
+       --mount type=bind,source=$HOST_DYNASEQIMPLICITDESTDIR,target=$DYNASEQIMPLICITDESTDIR \
+       --mount type=bind,source=$HOST_VARSEQDESTDIR,target=$VARSEQDESTDIR \
+       --mount type=bind,source=$HOST_RAGGEDDESTDIR,target=$RAGGEDDESTDIR \
+       --mount type=bind,source=$HOST_VARIMPLICITSEQDESTDIR,target=$VARIMPLICITSEQDESTDIR \
+       --mount type=bind,source=$HOST_IMPLICITSEQDESTDIR,target=$IMPLICITSEQDESTDIR \
+       --mount type=bind,source=$HOST_VARINITIALSTATEIMPLICITSEQDESTDIR,target=$VARINITIALSTATEIMPLICITSEQDESTDIR \
+       --mount type=bind,source=$HOST_INITIALSTATEIMPLICITSEQDESTDIR,target=$INITIALSTATEIMPLICITSEQDESTDIR \
+       --mount type=bind,source=$HOST_SCALARMODELSDESTDIR,target=$SCALARMODELSDESTDIR \
+       $UBUNTU_IMAGE
+if [ $? -ne 0 ]; then
+    echo -e "Failed"
+    exit 1
+fi
 
 # PyTorch
-cat >$HOST_SRCDIR/$C2SCRIPT <<EOF
+cat >$HOST_SRCDIR/$TORCHSCRIPT <<EOF
 #!/bin/bash
+nvidia-smi -L || true
+nvidia-smi || true
 set -e
-export CUDA_VISIBLE_DEVICES=$CUDA_DEVICE
-cd $SRCDIR
-python3 $SRCDIR/gen_qa_models.py --netdef=True --models_dir=$DESTDIR
-chown -R $(id -u):$(id -g) $DESTDIR
+set -x
+python3 $SRCDIR/gen_qa_models.py --libtorch --models_dir=$DESTDIR
+chmod -R 777 $DESTDIR
+python3 $SRCDIR/gen_qa_models.py --libtorch --variable --models_dir=$VARDESTDIR
+chmod -R 777 $VARDESTDIR
+python3 $SRCDIR/gen_qa_identity_models.py --libtorch --models_dir=$IDENTITYDESTDIR
+chmod -R 777 $IDENTITYDESTDIR
+python3 $SRCDIR/gen_qa_reshape_models.py --libtorch --variable --models_dir=$RESHAPEDESTDIR
+chmod -R 777 $RESHAPEDESTDIR
+python3 $SRCDIR/gen_qa_sequence_models.py --libtorch --models_dir=$SEQDESTDIR
+chmod -R 777 $SEQDESTDIR
+python3 $SRCDIR/gen_qa_sequence_models.py --libtorch --variable --models_dir=$VARSEQDESTDIR
+chmod -R 777 $VARSEQDESTDIR
+python3 $SRCDIR/gen_qa_implicit_models.py --libtorch --models_dir=$IMPLICITSEQDESTDIR
+chmod -R 777 $IMPLICITSEQDESTDIR
+python3 $SRCDIR/gen_qa_implicit_models.py --libtorch --variable --models_dir=$VARIMPLICITSEQDESTDIR
+chmod -R 777 $VARIMPLICITSEQDESTDIR
+python3 $SRCDIR/gen_qa_dyna_sequence_models.py --libtorch --models_dir=$DYNASEQDESTDIR
+chmod -R 777 $DYNASEQDESTDIR
+python3 $SRCDIR/gen_qa_torchtrt_models.py --models_dir=$TORCHTRTDESTDIR
+chmod -R 777 $TORCHTRTDESTDIR
+python3 $SRCDIR/gen_qa_ragged_models.py --libtorch --models_dir=$RAGGEDDESTDIR
+chmod -R 777 $RAGGEDDESTDIR
 EOF
 
-chmod a+x $HOST_SRCDIR/$C2SCRIPT
+chmod a+x $HOST_SRCDIR/$TORCHSCRIPT
 if [ $? -ne 0 ]; then
     echo -e "Failed: chmod"
     exit 1
 fi
 
 docker pull $PYTORCH_IMAGE
-nvidia-docker run --rm --entrypoint $SRCDIR/$C2SCRIPT \
+docker run $DOCKER_GPU_ARGS --rm --entrypoint $SRCDIR/$TORCHSCRIPT \
        --mount type=bind,source=$HOST_SRCDIR,target=$SRCDIR \
        --mount type=bind,source=$HOST_DESTDIR,target=$DESTDIR \
-       -u $(id -u):$(id -g) $PYTORCH_IMAGE
+       --mount type=bind,source=$HOST_VARDESTDIR,target=$VARDESTDIR \
+       --mount type=bind,source=$HOST_IDENTITYDESTDIR,target=$IDENTITYDESTDIR \
+       --mount type=bind,source=$HOST_RESHAPEDESTDIR,target=$RESHAPEDESTDIR \
+       --mount type=bind,source=$HOST_SEQDESTDIR,target=$SEQDESTDIR \
+       --mount type=bind,source=$HOST_DYNASEQDESTDIR,target=$DYNASEQDESTDIR \
+       --mount type=bind,source=$HOST_VARSEQDESTDIR,target=$VARSEQDESTDIR \
+       --mount type=bind,source=$HOST_TORCHTRTDESTDIR,target=$TORCHTRTDESTDIR \
+       --mount type=bind,source=$HOST_RAGGEDDESTDIR,target=$RAGGEDDESTDIR \
+       --mount type=bind,source=$HOST_VARIMPLICITSEQDESTDIR,target=$VARIMPLICITSEQDESTDIR \
+       --mount type=bind,source=$HOST_IMPLICITSEQDESTDIR,target=$IMPLICITSEQDESTDIR \
+       $PYTORCH_IMAGE
 if [ $? -ne 0 ]; then
     echo -e "Failed"
     exit 1
@@ -96,11 +390,43 @@ fi
 # Tensorflow
 cat >$HOST_SRCDIR/$TFSCRIPT <<EOF
 #!/bin/bash
+nvidia-smi -L || true
+nvidia-smi || true
 set -e
-export CUDA_VISIBLE_DEVICES=$CUDA_DEVICE
-cd $SRCDIR
-python3 $SRCDIR/gen_qa_models.py --graphdef=True --savedmodel=True --models_dir=$DESTDIR
-chown -R $(id -u):$(id -g) $DESTDIR
+set -x
+# Segmentation fault with protobuf 4.24.0 (https://github.com/tensorflow/tensorflow/issues/61551)
+# Upgrade protobuf version to fix the issue.
+pip3 install "protobuf>4.24.0"
+
+python3 $SRCDIR/gen_qa_models.py --graphdef --savedmodel --models_dir=$DESTDIR
+chmod -R 777 $DESTDIR
+python3 $SRCDIR/gen_qa_models.py --graphdef --savedmodel --variable --models_dir=$VARDESTDIR
+chmod -R 777 $VARDESTDIR
+python3 $SRCDIR/gen_qa_identity_models.py --graphdef --savedmodel --models_dir=$IDENTITYDESTDIR
+chmod -R 777 $IDENTITYDESTDIR
+python3 $SRCDIR/gen_qa_reshape_models.py --graphdef --savedmodel --variable --models_dir=$RESHAPEDESTDIR
+chmod -R 777 $RESHAPEDESTDIR
+python3 $SRCDIR/gen_qa_sequence_models.py --graphdef --savedmodel --models_dir=$SEQDESTDIR
+chmod -R 777 $SEQDESTDIR
+python3 $SRCDIR/gen_qa_sequence_models.py --graphdef --savedmodel --variable --models_dir=$VARSEQDESTDIR
+chmod -R 777 $VARSEQDESTDIR
+python3 $SRCDIR/gen_qa_dyna_sequence_models.py --graphdef --savedmodel --models_dir=$DYNASEQDESTDIR
+chmod -R 777 $DYNASEQDESTDIR
+python3 $SRCDIR/gen_qa_noshape_models.py --savedmodel --models_dir=$NOSHAPEDESTDIR
+chmod -R 777 $NOSHAPEDESTDIR
+python3 $SRCDIR/gen_qa_ragged_models.py --savedmodel --models_dir=$RAGGEDDESTDIR
+chmod -R 777 $RAGGEDDESTDIR
+python3 $SRCDIR/gen_qa_models.py --ensemble --models_dir=$ENSEMBLEDESTDIR/qa_model_repository
+python3 $SRCDIR/gen_qa_models.py --ensemble --variable --models_dir=$ENSEMBLEDESTDIR/qa_variable_model_repository
+python3 $SRCDIR/gen_qa_reshape_models.py --ensemble --models_dir=$ENSEMBLEDESTDIR/qa_reshape_model_repository
+python3 $SRCDIR/gen_qa_identity_models.py --ensemble --models_dir=$ENSEMBLEDESTDIR/qa_identity_model_repository
+python3 $SRCDIR/gen_qa_sequence_models.py --ensemble --models_dir=$ENSEMBLEDESTDIR/qa_sequence_model_repository
+python3 $SRCDIR/gen_qa_sequence_models.py --ensemble --variable --models_dir=$ENSEMBLEDESTDIR/qa_variable_sequence_model_repository
+chmod -R 777 $ENSEMBLEDESTDIR
+python3 $SRCDIR/gen_tag_sigdef.py --dir $SIGDEFDESTDIR
+chmod -R 777 $SIGDEFDESTDIR
+python3 $SRCDIR/gen_qa_tf_parameters.py --models_dir $TFPARAMETERSDIR
+chmod -R 777 $TFPARAMETERSDIR
 EOF
 
 chmod a+x $HOST_SRCDIR/$TFSCRIPT
@@ -110,10 +436,21 @@ if [ $? -ne 0 ]; then
 fi
 
 docker pull $TENSORFLOW_IMAGE
-nvidia-docker run --rm --entrypoint $SRCDIR/$TFSCRIPT \
+docker run $DOCKER_GPU_ARGS --rm --entrypoint $SRCDIR/$TFSCRIPT \
        --mount type=bind,source=$HOST_SRCDIR,target=$SRCDIR \
        --mount type=bind,source=$HOST_DESTDIR,target=$DESTDIR \
-       -u $(id -u):$(id -g) $TENSORFLOW_IMAGE
+       --mount type=bind,source=$HOST_VARDESTDIR,target=$VARDESTDIR \
+       --mount type=bind,source=$HOST_IDENTITYDESTDIR,target=$IDENTITYDESTDIR \
+       --mount type=bind,source=$HOST_SIGDEFDESTDIR,target=$SIGDEFDESTDIR \
+       --mount type=bind,source=$HOST_RESHAPEDESTDIR,target=$RESHAPEDESTDIR \
+       --mount type=bind,source=$HOST_SEQDESTDIR,target=$SEQDESTDIR \
+       --mount type=bind,source=$HOST_DYNASEQDESTDIR,target=$DYNASEQDESTDIR \
+       --mount type=bind,source=$HOST_VARSEQDESTDIR,target=$VARSEQDESTDIR \
+       --mount type=bind,source=$HOST_NOSHAPEDESTDIR,target=$NOSHAPEDESTDIR \
+       --mount type=bind,source=$HOST_ENSEMBLEDESTDIR,target=$ENSEMBLEDESTDIR \
+       --mount type=bind,source=$HOST_RAGGEDDESTDIR,target=$RAGGEDDESTDIR \
+       --mount type=bind,source=$HOST_TFPARAMETERSDESTDIR,target=$TFPARAMETERSDIR \
+       $TENSORFLOW_IMAGE
 if [ $? -ne 0 ]; then
     echo -e "Failed"
     exit 1
@@ -122,11 +459,54 @@ fi
 # TensorRT
 cat >$HOST_SRCDIR/$TRTSCRIPT <<EOF
 #!/bin/bash
+nvidia-smi -L || true
+nvidia-smi || true
 set -e
-export CUDA_VISIBLE_DEVICES=$CUDA_DEVICE
-cd $SRCDIR
-python3 $SRCDIR/gen_qa_models.py --tensorrt=True --models_dir=$DESTDIR
-chown -R $(id -u):$(id -g) $DESTDIR
+set -x
+export TRT_SUPPRESS_DEPRECATION_WARNINGS=1
+# Models using shape tensor i/o
+python3 $SRCDIR/gen_qa_identity_models.py --tensorrt-shape-io --models_dir=$SHAPEDESTDIR
+python3 $SRCDIR/gen_qa_sequence_models.py --tensorrt-shape-io --models_dir=$SHAPEDESTDIR
+python3 $SRCDIR/gen_qa_dyna_sequence_models.py --tensorrt-shape-io --models_dir=$SHAPEDESTDIR
+chmod -R 777 $SHAPEDESTDIR
+python3 $SRCDIR/gen_qa_models.py --tensorrt --models_dir=$DESTDIR
+chmod -R 777 $DESTDIR
+python3 $SRCDIR/gen_qa_models.py --tensorrt --variable --models_dir=$VARDESTDIR
+chmod -R 777 $VARDESTDIR
+python3 $SRCDIR/gen_qa_identity_models.py --tensorrt --models_dir=$IDENTITYDESTDIR
+python3 $SRCDIR/gen_qa_identity_models.py --tensorrt-compat --models_dir=$IDENTITYDESTDIR
+chmod -R 777 $IDENTITYDESTDIR
+python3 $SRCDIR/gen_qa_identity_models.py --tensorrt-big --models_dir=$IDENTITYBIGDESTDIR
+chmod -R 777 $IDENTITYBIGDESTDIR
+python3 $SRCDIR/gen_qa_reshape_models.py --tensorrt --variable --models_dir=$RESHAPEDESTDIR
+chmod -R 777 $RESHAPEDESTDIR
+python3 $SRCDIR/gen_qa_sequence_models.py --tensorrt --models_dir=$SEQDESTDIR
+chmod -R 777 $SEQDESTDIR
+python3 $SRCDIR/gen_qa_implicit_models.py --tensorrt --models_dir=$IMPLICITSEQDESTDIR
+chmod -R 777 $IMPLICITSEQDESTDIR
+python3 $SRCDIR/gen_qa_implicit_models.py --tensorrt --variable --models_dir=$VARIMPLICITSEQDESTDIR
+chmod -R 777 $VARIMPLICITSEQDESTDIR
+python3 $SRCDIR/gen_qa_dyna_sequence_models.py --tensorrt --models_dir=$DYNASEQDESTDIR
+chmod -R 777 $DYNASEQDESTDIR
+python3 $SRCDIR/gen_qa_sequence_models.py --tensorrt --variable --models_dir=$VARSEQDESTDIR
+chmod -R 777 $VARSEQDESTDIR
+python3 $SRCDIR/gen_qa_dyna_sequence_implicit_models.py --tensorrt --models_dir=$DYNASEQIMPLICITDESTDIR
+chmod -R 777 $DYNASEQIMPLICITDESTDIR
+python3 $SRCDIR/gen_qa_ragged_models.py --tensorrt --models_dir=$RAGGEDDESTDIR
+chmod -R 777 $RAGGEDDESTDIR
+python3 $SRCDIR/gen_qa_trt_format_models.py --models_dir=$FORMATDESTDIR
+chmod -R 777 $FORMATDESTDIR
+python3 $SRCDIR/gen_qa_trt_data_dependent_shape.py --models_dir=$DATADEPENDENTDIR
+chmod -R 777 $DATADEPENDENTDIR
+# Make shared library for custom clip plugin.
+# FIXME: [DLIS-4138] Once TensorRT uploads a new custom sample plugin they maintain, we should switch to
+# that one. This uses the provided plugin code from Release 8.4. This could break when TensorRT makes
+# changes to their plugin code.
+(git clone -b release/8.4 https://github.com/NVIDIA/TensorRT.git && \
+cd /workspace/TensorRT/samples/python/uff_custom_plugin && cmake . && make && \
+cp libclipplugin.so $PLGDESTDIR/.)
+LD_PRELOAD=$PLGDESTDIR/libclipplugin.so python3 $SRCDIR/gen_qa_trt_plugin_models.py --models_dir=$PLGDESTDIR
+chmod -R 777 $PLGDESTDIR
 EOF
 
 chmod a+x $HOST_SRCDIR/$TRTSCRIPT
@@ -136,10 +516,25 @@ if [ $? -ne 0 ]; then
 fi
 
 docker pull $TENSORRT_IMAGE
-nvidia-docker run --rm --entrypoint $SRCDIR/$TRTSCRIPT \
+docker run $DOCKER_GPU_ARGS --rm --entrypoint $SRCDIR/$TRTSCRIPT \
        --mount type=bind,source=$HOST_SRCDIR,target=$SRCDIR \
        --mount type=bind,source=$HOST_DESTDIR,target=$DESTDIR \
-       -u $(id -u):$(id -g) $TENSORRT_IMAGE
+       --mount type=bind,source=$HOST_VARDESTDIR,target=$VARDESTDIR \
+       --mount type=bind,source=$HOST_IDENTITYDESTDIR,target=$IDENTITYDESTDIR \
+       --mount type=bind,source=$HOST_IDENTITYBIGDESTDIR,target=$IDENTITYBIGDESTDIR \
+       --mount type=bind,source=$HOST_SHAPEDESTDIR,target=$SHAPEDESTDIR \
+       --mount type=bind,source=$HOST_RESHAPEDESTDIR,target=$RESHAPEDESTDIR \
+       --mount type=bind,source=$HOST_SEQDESTDIR,target=$SEQDESTDIR \
+       --mount type=bind,source=$HOST_DYNASEQDESTDIR,target=$DYNASEQDESTDIR \
+       --mount type=bind,source=$HOST_VARSEQDESTDIR,target=$VARSEQDESTDIR \
+       --mount type=bind,source=$HOST_PLGDESTDIR,target=$PLGDESTDIR \
+       --mount type=bind,source=$HOST_RAGGEDDESTDIR,target=$RAGGEDDESTDIR \
+       --mount type=bind,source=$HOST_FORMATDESTDIR,target=$FORMATDESTDIR \
+       --mount type=bind,source=$HOST_DATADEPENDENTDIR,target=$DATADEPENDENTDIR \
+       --mount type=bind,source=$HOST_VARIMPLICITSEQDESTDIR,target=$VARIMPLICITSEQDESTDIR \
+       --mount type=bind,source=$HOST_DYNASEQIMPLICITDESTDIR,target=$DYNASEQIMPLICITDESTDIR \
+       --mount type=bind,source=$HOST_IMPLICITSEQDESTDIR,target=$IMPLICITSEQDESTDIR \
+       $TENSORRT_IMAGE
 if [ $? -ne 0 ]; then
     echo -e "Failed"
     exit 1
diff --git a/qa/common/gen_qa_models.py b/qa/common/gen_qa_models.py
old mode 100644
new mode 100755
index 07e6c0eb0b..82d241f470
--- a/qa/common/gen_qa_models.py
+++ b/qa/common/gen_qa_models.py
@@ -1,4 +1,6 @@
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+#!/usr/bin/env python3
+
+# Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -25,98 +27,44 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import argparse
-from builtins import range
 import os
-import sys
+from builtins import range
+
+import gen_ensemble_model_utils as emu
 import numpy as np
+from gen_common import (
+    np_to_model_dtype,
+    np_to_onnx_dtype,
+    np_to_tf_dtype,
+    np_to_torch_dtype,
+    np_to_trt_dtype,
+)
 
 FLAGS = None
+np_dtype_string = np.dtype(object)
+from typing import List, Tuple
 
-def np_to_model_dtype(np_dtype):
-    if np_dtype == np.bool:
-        return "TYPE_BOOL"
-    elif np_dtype == np.int8:
-        return "TYPE_INT8"
-    elif np_dtype == np.int16:
-        return "TYPE_INT16"
-    elif np_dtype == np.int32:
-        return "TYPE_INT32"
-    elif np_dtype == np.int64:
-        return "TYPE_INT64"
-    elif np_dtype == np.uint8:
-        return "TYPE_UINT8"
-    elif np_dtype == np.uint16:
-        return "TYPE_UINT16"
-    elif np_dtype == np.float16:
-        return "TYPE_FP16"
-    elif np_dtype == np.float32:
-        return "TYPE_FP32"
-    elif np_dtype == np.float64:
-        return "TYPE_FP64"
-    return None
-
-def np_to_tf_dtype(np_dtype):
-    if np_dtype == np.bool:
-        return tf.bool
-    elif np_dtype == np.int8:
-        return tf.int8
-    elif np_dtype == np.int16:
-        return tf.int16
-    elif np_dtype == np.int32:
-        return tf.int32
-    elif np_dtype == np.int64:
-        return tf.int64
-    elif np_dtype == np.uint8:
-        return tf.uint8
-    elif np_dtype == np.uint16:
-        return tf.uint16
-    elif np_dtype == np.float16:
-        return tf.float16
-    elif np_dtype == np.float32:
-        return tf.float32
-    elif np_dtype == np.float64:
-        return tf.float64
-    return None
-
-def np_to_c2_dtype(np_dtype):
-    if np_dtype == np.bool:
-        return c2core.DataType.BOOL
-    elif np_dtype == np.int8:
-        return c2core.DataType.INT8
-    elif np_dtype == np.int16:
-        return c2core.DataType.INT16
-    elif np_dtype == np.int32:
-        return c2core.DataType.INT32
-    elif np_dtype == np.int64:
-        return c2core.DataType.INT64
-    elif np_dtype == np.uint8:
-        return c2core.DataType.UINT8
-    elif np_dtype == np.uint16:
-        return c2core.DataType.UINT16
-    elif np_dtype == np.float16:
-        return c2core.DataType.FLOAT16
-    elif np_dtype == np.float32:
-        return c2core.DataType.FLOAT
-    elif np_dtype == np.float64:
-        return c2core.DataType.DOUBLE
-    return None
-
-def np_to_trt_dtype(np_dtype):
-    if np_dtype == np.int8:
-        return trt.infer.DataType.INT8
-    elif np_dtype == np.int32:
-        return trt.infer.DataType.INT32
-    elif np_dtype == np.float16:
-        return trt.infer.DataType.HALF
-    elif np_dtype == np.float32:
-        return trt.infer.DataType.FLOAT
-    return None
 
 def create_graphdef_modelfile(
-        models_dir, input_size, max_batch, model_version,
-        input_dtype, output0_dtype, output1_dtype, swap=False):
-
-    if not tu.validate_for_tf_model(input_dtype, output0_dtype, output1_dtype):
+    models_dir,
+    max_batch,
+    model_version,
+    input_shape,
+    output0_shape,
+    output1_shape,
+    input_dtype,
+    output0_dtype,
+    output1_dtype,
+    swap=False,
+):
+    if not tu.validate_for_tf_model(
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+        input_shape,
+        output0_shape,
+        output1_shape,
+    ):
         return
 
     tf_input_dtype = np_to_tf_dtype(input_dtype)
@@ -125,68 +73,121 @@ def create_graphdef_modelfile(
 
     # Create the model. If non-batching then don't include the batch
     # dimension.
-    tf.reset_default_graph()
+    tf.compat.v1.reset_default_graph()
     if max_batch == 0:
-        in0 = tf.placeholder(tf_input_dtype, [input_size], "INPUT0")
-        in1 = tf.placeholder(tf_input_dtype, [input_size], "INPUT1")
+        in0 = tf.compat.v1.placeholder(
+            tf_input_dtype, tu.shape_to_tf_shape(input_shape), "INPUT0"
+        )
+        in1 = tf.compat.v1.placeholder(
+            tf_input_dtype, tu.shape_to_tf_shape(input_shape), "INPUT1"
+        )
     else:
-        in0 = tf.placeholder(tf_input_dtype, [None, input_size], "INPUT0")
-        in1 = tf.placeholder(tf_input_dtype, [None, input_size], "INPUT1")
-
-    # TF doesn't have GPU add or subtract operation for int8, int16 or
-    # int32 so force those onto CPU.
-    if ((input_dtype == np.int8) or (input_dtype == np.int16) or
-        (input_dtype == np.int32)):
-        with tf.device('/cpu:0'):
-            add = tf.add(in0, in1, "ADD")
-            sub = tf.subtract(in0, in1, "SUB")
+        in0 = tf.compat.v1.placeholder(
+            tf_input_dtype,
+            [
+                None,
+            ]
+            + tu.shape_to_tf_shape(input_shape),
+            "INPUT0",
+        )
+        in1 = tf.compat.v1.placeholder(
+            tf_input_dtype,
+            [
+                None,
+            ]
+            + tu.shape_to_tf_shape(input_shape),
+            "INPUT1",
+        )
+
+    # If the input is a string, then convert each string to the
+    # equivalent int32 value.
+    if tf_input_dtype == tf.string:
+        in0 = tf.strings.to_number(in0, tf.int32)
+        in1 = tf.strings.to_number(in1, tf.int32)
+
+    add = tf.add(in0, in1, "ADD")
+    sub = tf.subtract(in0, in1, "SUB")
+
+    # Cast or convert result to the output dtype.
+    if tf_output0_dtype == tf.string:
+        cast0 = tf.strings.as_string(add if not swap else sub, name="TOSTR0")
     else:
-        add = tf.add(in0, in1, "ADD")
-        sub = tf.subtract(in0, in1, "SUB")
+        cast0 = tf.cast(add if not swap else sub, tf_output0_dtype, "CAST0")
+
+    if tf_output1_dtype == tf.string:
+        cast1 = tf.strings.as_string(sub if not swap else add, name="TOSTR1")
+    else:
+        cast1 = tf.cast(sub if not swap else add, tf_output1_dtype, "CAST1")
 
-    cast0 = tf.cast(add if not swap else sub, tf_output0_dtype, "CAST0")
-    cast1 = tf.cast(sub if not swap else add, tf_output1_dtype, "CAST1")
     out0 = tf.identity(cast0, "OUTPUT0")
     out1 = tf.identity(cast1, "OUTPUT1")
 
     # Use a different model name for the non-batching variant
-    model_name = tu.get_model_name("graphdef_nobatch" if max_batch == 0 else "graphdef",
-                                   input_dtype, output0_dtype, output1_dtype)
+    model_name = tu.get_model_name(
+        "graphdef_nobatch" if max_batch == 0 else "graphdef",
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+    )
     model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
 
     try:
         os.makedirs(model_version_dir)
     except OSError as ex:
-        pass # ignore existing dir
+        pass  # ignore existing dir
 
-    with tf.Session() as sess:
-        graph_io.write_graph(sess.graph.as_graph_def(), model_version_dir,
-                             "model.graphdef", as_text=False)
+    with tf.compat.v1.Session() as sess:
+        graph_io.write_graph(
+            sess.graph.as_graph_def(),
+            model_version_dir,
+            "model.graphdef",
+            as_text=False,
+        )
 
 
 def create_graphdef_modelconfig(
-        models_dir, input_size, max_batch, model_version,
-        input_dtype, output0_dtype, output1_dtype, version_policy):
-
-    if not tu.validate_for_tf_model(input_dtype, output0_dtype, output1_dtype):
+    models_dir,
+    max_batch,
+    model_version,
+    input_shape,
+    output0_shape,
+    output1_shape,
+    input_dtype,
+    output0_dtype,
+    output1_dtype,
+    output0_label_cnt,
+    version_policy,
+):
+    if not tu.validate_for_tf_model(
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+        input_shape,
+        output0_shape,
+        output1_shape,
+    ):
         return
 
     # Unpack version policy
     version_policy_str = "{ latest { num_versions: 1 }}"
     if version_policy is not None:
         type, val = version_policy
-        if type == 'latest':
+        if type == "latest":
             version_policy_str = "{{ latest {{ num_versions: {} }}}}".format(val)
-        elif type == 'specific':
+        elif type == "specific":
             version_policy_str = "{{ specific {{ versions: {} }}}}".format(val)
         else:
             version_policy_str = "{ all { }}"
 
     # Use a different model name for the non-batching variant
-    model_name = tu.get_model_name("graphdef_nobatch" if max_batch == 0 else "graphdef",
-                                   input_dtype, output0_dtype, output1_dtype)
+    model_name = tu.get_model_name(
+        "graphdef_nobatch" if max_batch == 0 else "graphdef",
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+    )
     config_dir = models_dir + "/" + model_name
-    config = '''
+    config = """
 name: "{}"
 platform: "tensorflow_graphdef"
 max_batch_size: {}
@@ -216,35 +217,53 @@ def create_graphdef_modelconfig(
     dims: [ {} ]
   }}
 ]
-instance_group [
-  {{
-    gpus: [ 0 ]
-  }}
-]
-'''.format(model_name, max_batch, version_policy_str,
-           np_to_model_dtype(input_dtype), input_size,
-           np_to_model_dtype(input_dtype), input_size,
-           np_to_model_dtype(output0_dtype), input_size,
-           np_to_model_dtype(output1_dtype), input_size)
+""".format(
+        model_name,
+        max_batch,
+        version_policy_str,
+        np_to_model_dtype(input_dtype),
+        tu.shape_to_dims_str(input_shape),
+        np_to_model_dtype(input_dtype),
+        tu.shape_to_dims_str(input_shape),
+        np_to_model_dtype(output0_dtype),
+        tu.shape_to_dims_str(output0_shape),
+        np_to_model_dtype(output1_dtype),
+        tu.shape_to_dims_str(output1_shape),
+    )
 
     try:
         os.makedirs(config_dir)
     except OSError as ex:
-        pass # ignore existing dir
+        pass  # ignore existing dir
 
     with open(config_dir + "/config.pbtxt", "w") as cfile:
         cfile.write(config)
 
     with open(config_dir + "/output0_labels.txt", "w") as lfile:
-        for l in range(input_size):
+        for l in range(output0_label_cnt):
             lfile.write("label" + str(l) + "\n")
 
 
 def create_savedmodel_modelfile(
-        models_dir, input_size, max_batch, model_version,
-        input_dtype, output0_dtype, output1_dtype, swap=False):
-
-    if not tu.validate_for_tf_model(input_dtype, output0_dtype, output1_dtype):
+    models_dir,
+    max_batch,
+    model_version,
+    input_shape,
+    output0_shape,
+    output1_shape,
+    input_dtype,
+    output0_dtype,
+    output1_dtype,
+    swap=False,
+):
+    if not tu.validate_for_tf_model(
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+        input_shape,
+        output0_shape,
+        output1_shape,
+    ):
         return
 
     tf_input_dtype = np_to_tf_dtype(input_dtype)
@@ -253,73 +272,133 @@ def create_savedmodel_modelfile(
 
     # Create the model. If non-batching then don't include the batch
     # dimension.
-    tf.reset_default_graph()
+    tf.compat.v1.reset_default_graph()
     if max_batch == 0:
-        in0 = tf.placeholder(tf_input_dtype, [input_size], "TENSOR_INPUT0")
-        in1 = tf.placeholder(tf_input_dtype, [input_size], "TENSOR_INPUT1")
+        in0 = tf.compat.v1.placeholder(
+            tf_input_dtype, tu.shape_to_tf_shape(input_shape), "TENSOR_INPUT0"
+        )
+        in1 = tf.compat.v1.placeholder(
+            tf_input_dtype, tu.shape_to_tf_shape(input_shape), "TENSOR_INPUT1"
+        )
     else:
-        in0 = tf.placeholder(tf_input_dtype, [None, input_size], "TENSOR_INPUT0")
-        in1 = tf.placeholder(tf_input_dtype, [None, input_size], "TENSOR_INPUT1")
-
-    # TF doesn't have GPU add or subtract operation for int8, int16 or
-    # int32 so force those onto CPU.
-    if ((input_dtype == np.int8) or (input_dtype == np.int16) or
-        (input_dtype == np.int32)):
-        with tf.device('/cpu:0'):
-            add = tf.add(in0, in1, "ADD")
-            sub = tf.subtract(in0, in1, "SUB")
+        in0 = tf.compat.v1.placeholder(
+            tf_input_dtype,
+            [
+                None,
+            ]
+            + tu.shape_to_tf_shape(input_shape),
+            "TENSOR_INPUT0",
+        )
+        in1 = tf.compat.v1.placeholder(
+            tf_input_dtype,
+            [
+                None,
+            ]
+            + tu.shape_to_tf_shape(input_shape),
+            "TENSOR_INPUT1",
+        )
+
+    # If the input is a string, then convert each string to the
+    # equivalent float value.
+    if tf_input_dtype == tf.string:
+        in0 = tf.strings.to_number(in0, tf.int32)
+        in1 = tf.strings.to_number(in1, tf.int32)
+
+    add = tf.add(in0, in1, "ADD")
+    sub = tf.subtract(in0, in1, "SUB")
+
+    # Cast or convert result to the output dtype.
+    if tf_output0_dtype == tf.string:
+        cast0 = tf.strings.as_string(add if not swap else sub, name="TOSTR0")
     else:
-        add = tf.add(in0, in1, "ADD")
-        sub = tf.subtract(in0, in1, "SUB")
+        cast0 = tf.cast(add if not swap else sub, tf_output0_dtype, "CAST0")
 
-    cast0 = tf.cast(add if not swap else sub, tf_output0_dtype, "CAST0")
-    cast1 = tf.cast(sub if not swap else add, tf_output1_dtype, "CAST1")
-    out0 = tf.identity(cast0, "TENSOR_OUTPUT0")
-    out1 = tf.identity(cast1, "TENSOR_OUTPUT1")
+    if tf_output1_dtype == tf.string:
+        cast1 = tf.strings.as_string(sub if not swap else add, name="TOSTR1")
+    else:
+        cast1 = tf.cast(sub if not swap else add, tf_output1_dtype, "CAST1")
+
+    tf.identity(cast0, "TENSOR_OUTPUT0")
+    tf.identity(cast1, "TENSOR_OUTPUT1")
 
     # Use a different model name for the non-batching variant
-    model_name = tu.get_model_name("savedmodel_nobatch" if max_batch == 0 else "savedmodel",
-                                   input_dtype, output0_dtype, output1_dtype)
+    model_name = tu.get_model_name(
+        "savedmodel_nobatch" if max_batch == 0 else "savedmodel",
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+    )
     model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
 
     try:
         os.makedirs(model_version_dir)
     except OSError as ex:
-        pass # ignore existing dir
-
-    with tf.Session() as sess:
-        input0_tensor = tf.get_default_graph().get_tensor_by_name("TENSOR_INPUT0:0")
-        input1_tensor = tf.get_default_graph().get_tensor_by_name("TENSOR_INPUT1:0")
-        output0_tensor = tf.get_default_graph().get_tensor_by_name("TENSOR_OUTPUT0:0")
-        output1_tensor = tf.get_default_graph().get_tensor_by_name("TENSOR_OUTPUT1:0")
-        tf.saved_model.simple_save(sess, model_version_dir + "/model.savedmodel",
-                                   inputs={"INPUT0": input0_tensor, "INPUT1": input1_tensor},
-                                   outputs={"OUTPUT0": output0_tensor, "OUTPUT1": output1_tensor})
+        pass  # ignore existing dir
+
+    with tf.compat.v1.Session() as sess:
+        input0_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name(
+            "TENSOR_INPUT0:0"
+        )
+        input1_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name(
+            "TENSOR_INPUT1:0"
+        )
+        output0_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name(
+            "TENSOR_OUTPUT0:0"
+        )
+        output1_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name(
+            "TENSOR_OUTPUT1:0"
+        )
+        tf.compat.v1.saved_model.simple_save(
+            sess,
+            model_version_dir + "/model.savedmodel",
+            inputs={"INPUT0": input0_tensor, "INPUT1": input1_tensor},
+            outputs={"OUTPUT0": output0_tensor, "OUTPUT1": output1_tensor},
+        )
 
 
 def create_savedmodel_modelconfig(
-        models_dir, input_size, max_batch, model_version,
-        input_dtype, output0_dtype, output1_dtype, version_policy):
-
-    if not tu.validate_for_tf_model(input_dtype, output0_dtype, output1_dtype):
+    models_dir,
+    max_batch,
+    model_version,
+    input_shape,
+    output0_shape,
+    output1_shape,
+    input_dtype,
+    output0_dtype,
+    output1_dtype,
+    output0_label_cnt,
+    version_policy,
+):
+    if not tu.validate_for_tf_model(
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+        input_shape,
+        output0_shape,
+        output1_shape,
+    ):
         return
 
     # Unpack version policy
     version_policy_str = "{ latest { num_versions: 1 }}"
     if version_policy is not None:
         type, val = version_policy
-        if type == 'latest':
+        if type == "latest":
             version_policy_str = "{{ latest {{ num_versions: {} }}}}".format(val)
-        elif type == 'specific':
+        elif type == "specific":
             version_policy_str = "{{ specific {{ versions: {} }}}}".format(val)
         else:
             version_policy_str = "{ all { }}"
 
     # Use a different model name for the non-batching variant
-    model_name = tu.get_model_name("savedmodel_nobatch" if max_batch == 0 else "savedmodel",
-                                   input_dtype, output0_dtype, output1_dtype)
+    model_name = tu.get_model_name(
+        "savedmodel_nobatch" if max_batch == 0 else "savedmodel",
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+    )
     config_dir = models_dir + "/" + model_name
-    config = '''
+    config = """
 name: "{}"
 platform: "tensorflow_savedmodel"
 max_batch_size: {}
@@ -349,89 +428,662 @@ def create_savedmodel_modelconfig(
     dims: [ {} ]
   }}
 ]
-instance_group [
-  {{
-    gpus: [ 0 ]
-  }}
-]
-'''.format(model_name, max_batch, version_policy_str,
-           np_to_model_dtype(input_dtype), input_size,
-           np_to_model_dtype(input_dtype), input_size,
-           np_to_model_dtype(output0_dtype), input_size,
-           np_to_model_dtype(output1_dtype), input_size)
+""".format(
+        model_name,
+        max_batch,
+        version_policy_str,
+        np_to_model_dtype(input_dtype),
+        tu.shape_to_dims_str(input_shape),
+        np_to_model_dtype(input_dtype),
+        tu.shape_to_dims_str(input_shape),
+        np_to_model_dtype(output0_dtype),
+        tu.shape_to_dims_str(output0_shape),
+        np_to_model_dtype(output1_dtype),
+        tu.shape_to_dims_str(output1_shape),
+    )
 
     try:
         os.makedirs(config_dir)
     except OSError as ex:
-        pass # ignore existing dir
+        pass  # ignore existing dir
 
     with open(config_dir + "/config.pbtxt", "w") as cfile:
         cfile.write(config)
 
     with open(config_dir + "/output0_labels.txt", "w") as lfile:
-        for l in range(input_size):
+        for l in range(output0_label_cnt):
             lfile.write("label" + str(l) + "\n")
 
 
-def create_netdef_modelfile(
-        models_dir, input_size, max_batch, model_version,
-        input_dtype, output0_dtype, output1_dtype, swap=False):
+def create_plan_dynamic_rf_modelfile(
+    models_dir,
+    max_batch,
+    model_version,
+    input_shape,
+    output0_shape,
+    output1_shape,
+    input_dtype,
+    output0_dtype,
+    output1_dtype,
+    swap,
+    min_dim,
+    max_dim,
+):
+    trt_input_dtype = np_to_trt_dtype(input_dtype)
+    trt_output0_dtype = np_to_trt_dtype(output0_dtype)
+    trt_output1_dtype = np_to_trt_dtype(output1_dtype)
+    trt_memory_format = trt.TensorFormat.LINEAR
 
-    if not tu.validate_for_c2_model(input_dtype, output0_dtype, output1_dtype):
-        return
+    # Create the model
+    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    builder = trt.Builder(TRT_LOGGER)
+    network = builder.create_network(
+        1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    )
+    if max_batch == 0:
+        input_with_batchsize = [i for i in input_shape]
+    else:
+        input_with_batchsize = [-1] + [i for i in input_shape]
+
+    in0 = network.add_input("INPUT0", trt_input_dtype, input_with_batchsize)
+    in1 = network.add_input("INPUT1", trt_input_dtype, input_with_batchsize)
+
+    # TRT uint8 cannot be used to represent quantized floating-point value yet
+    # uint8 must be converted to float16 or float32 before any operation
+    # FIXME: Remove support check when jetson supports TRT 8.5 (DLIS-4256)
+    if tu.support_trt_uint8():
+        if trt_input_dtype == trt.uint8:
+            in0_cast = network.add_identity(in0)
+            in0_cast.set_output_type(0, trt.float32)
+            in0 = in0_cast.get_output(0)
+            in1_cast = network.add_identity(in1)
+            in1_cast.set_output_type(0, trt.float32)
+            in1 = in1_cast.get_output(0)
+
+    add = network.add_elementwise(in0, in1, trt.ElementWiseOperation.SUM)
+    sub = network.add_elementwise(in0, in1, trt.ElementWiseOperation.SUB)
+    out0 = add if not swap else sub
+    out1 = sub if not swap else add
+
+    # uint8 conversion after operations
+    # FIXME: Remove support check when jetson supports TRT 8.5 (DLIS-4256)
+    if tu.support_trt_uint8():
+        if trt_output0_dtype == trt.uint8:
+            out0 = network.add_identity(out0.get_output(0))
+            out0.set_output_type(0, trt.uint8)
+        if trt_output1_dtype == trt.uint8:
+            out1 = network.add_identity(out1.get_output(0))
+            out1.set_output_type(0, trt.uint8)
+
+    out0.get_output(0).name = "OUTPUT0"
+    out1.get_output(0).name = "OUTPUT1"
+    network.mark_output(out0.get_output(0))
+    network.mark_output(out1.get_output(0))
+
+    out0.get_output(0).dtype = trt_output0_dtype
+    out1.get_output(0).dtype = trt_output1_dtype
+
+    in0.allowed_formats = 1 << int(trt_memory_format)
+    in1.allowed_formats = 1 << int(trt_memory_format)
+    out0.get_output(0).allowed_formats = 1 << int(trt_memory_format)
+    out1.get_output(0).allowed_formats = 1 << int(trt_memory_format)
+
+    if trt_input_dtype == trt.int8:
+        in0.dynamic_range = (-128.0, 127.0)
+        in1.dynamic_range = (-128.0, 127.0)
+    if trt_output0_dtype == trt.int8:
+        out0.get_output(0).dynamic_range = (-128.0, 127.0)
+    if trt_output1_dtype == trt.int8:
+        out1.get_output(0).dynamic_range = (-128.0, 127.0)
+
+    min_shape = []
+    opt_shape = []
+    max_shape = []
+    if max_batch != 0:
+        min_shape = min_shape + [1]
+        opt_shape = opt_shape + [max(1, max_batch)]
+        max_shape = max_shape + [max(1, max_batch)]
+    for i in input_shape:
+        if i == -1:
+            min_shape = min_shape + [min_dim]
+            opt_shape = opt_shape + [int((max_dim + min_dim) / 2)]
+            max_shape = max_shape + [max_dim]
+        else:
+            min_shape = min_shape + [i]
+            opt_shape = opt_shape + [i]
+            max_shape = max_shape + [i]
+
+    profile = builder.create_optimization_profile()
+    profile.set_shape("INPUT0", min_shape, opt_shape, max_shape)
+    profile.set_shape("INPUT1", min_shape, opt_shape, max_shape)
+    flags = 1 << int(trt.BuilderFlag.STRICT_TYPES)
+    datatype_set = set([trt_input_dtype, trt_output0_dtype, trt_output1_dtype])
+    for dt in datatype_set:
+        if dt == trt.int8:
+            flags |= 1 << int(trt.BuilderFlag.INT8)
+        elif dt == trt.float16:
+            flags |= 1 << int(trt.BuilderFlag.FP16)
+    config = builder.create_builder_config()
+    config.flags = flags
+    config.add_optimization_profile(profile)
+    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
+    try:
+        engine_bytes = builder.build_serialized_network(network, config)
+    except AttributeError:
+        engine = builder.build_engine(network, config)
+        engine_bytes = engine.serialize()
+        del engine
+
+    # Use a different model name for different kinds of models
+    model_name = tu.get_model_name(
+        "plan_nobatch" if max_batch == 0 else "plan",
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+    )
+    if min_dim != 1 or max_dim != 32:
+        model_name = "{}-{}-{}".format(model_name, min_dim, max_dim)
+
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(model_version_dir + "/model.plan", "wb") as f:
+        f.write(engine_bytes)
+
+
+def create_plan_dynamic_modelfile(
+    models_dir,
+    max_batch,
+    model_version,
+    input_shape,
+    output0_shape,
+    output1_shape,
+    input_dtype,
+    output0_dtype,
+    output1_dtype,
+    swap,
+    min_dim,
+    max_dim,
+):
+    trt_input_dtype = np_to_trt_dtype(input_dtype)
+    trt_output0_dtype = np_to_trt_dtype(output0_dtype)
+    trt_output1_dtype = np_to_trt_dtype(output1_dtype)
+
+    # Create the model
+    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    builder = trt.Builder(TRT_LOGGER)
+    network = builder.create_network(
+        1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    )
+    if max_batch == 0:
+        input_with_batchsize = [i for i in input_shape]
+    else:
+        input_with_batchsize = [-1] + [i for i in input_shape]
+
+    in0 = network.add_input("INPUT0", trt_input_dtype, input_with_batchsize)
+    in1 = network.add_input("INPUT1", trt_input_dtype, input_with_batchsize)
+    add = network.add_elementwise(in0, in1, trt.ElementWiseOperation.SUM)
+    sub = network.add_elementwise(in0, in1, trt.ElementWiseOperation.SUB)
+
+    out0 = add if not swap else sub
+    out1 = sub if not swap else add
+
+    out0.get_output(0).name = "OUTPUT0"
+    out1.get_output(0).name = "OUTPUT1"
+    network.mark_output(out0.get_output(0))
+    network.mark_output(out1.get_output(0))
+
+    min_shape = []
+    opt_shape = []
+    max_shape = []
+    for i in input_shape:
+        if i == -1:
+            min_shape = min_shape + [min_dim]
+            opt_shape = opt_shape + [int((max_dim + min_dim) / 2)]
+            max_shape = max_shape + [max_dim]
+        else:
+            min_shape = min_shape + [i]
+            opt_shape = opt_shape + [i]
+            max_shape = max_shape + [i]
+
+    config = builder.create_builder_config()
+    # create multiple profiles with same shape for testing
+    # with decreasing batch sizes
+    profile = []
+    for i in range(4):
+        profile.append(builder.create_optimization_profile())
+        if max_batch == 0:
+            profile[i].set_shape("INPUT0", min_shape, opt_shape, max_shape)
+            profile[i].set_shape("INPUT1", min_shape, opt_shape, max_shape)
+        else:
+            bs = [max_batch - i if max_batch > i else 1]
+            opt_bs = [1 + i if 1 + i < max_batch - 1 else max_batch - 1]
+            # Hardcoded 'max_shape[0] += 1' in default profile for
+            # L0_trt_dynamic_shape, to differentiate whether default profile
+            # is used if no profile is specified
+            max_shape_override = max_shape
+            if i == 0 and (min_dim == 1 and max_dim == 32):
+                max_shape_override[0] += 1
+
+            profile[i].set_shape(
+                "INPUT0", [1] + min_shape, opt_bs + opt_shape, bs + max_shape_override
+            )
+            profile[i].set_shape(
+                "INPUT1", [1] + min_shape, opt_bs + opt_shape, bs + max_shape_override
+            )
+        config.add_optimization_profile(profile[i])
+    # some profiles with non-one min shape for first dim to test autofiller
+    for i in range(2):
+        profile.append(builder.create_optimization_profile())
+        if max_batch == 0:
+            profile[i + 4].set_shape("INPUT0", min_shape, opt_shape, max_shape)
+            profile[i + 4].set_shape("INPUT1", min_shape, opt_shape, max_shape)
+        else:
+            profile[i + 4].set_shape(
+                "INPUT0", [5 + i] + min_shape, [6] + opt_shape, [max_batch] + max_shape
+            )
+            profile[i + 4].set_shape(
+                "INPUT1", [5 + i] + min_shape, [6] + opt_shape, [max_batch] + max_shape
+            )
+        config.add_optimization_profile(profile[i + 4])
+    # Will repeat another profile with same min and max shapes as the first profile to test non-zero profile
+    # for infer_variable test.
+    profile.append(builder.create_optimization_profile())
+    if max_batch == 0:
+        profile[6].set_shape("INPUT0", min_shape, opt_shape, max_shape)
+        profile[6].set_shape("INPUT1", min_shape, opt_shape, max_shape)
+    else:
+        profile[6].set_shape(
+            "INPUT0", [1] + min_shape, [1] + opt_shape, [max_batch] + max_shape
+        )
+        profile[6].set_shape(
+            "INPUT1", [1] + min_shape, [1] + opt_shape, [max_batch] + max_shape
+        )
+    config.add_optimization_profile(profile[6])
+
+    # Will add some profiles with static shapes to test the cases where min_shape=opt_shape=max_shape
+    for i in range(3):
+        profile.append(builder.create_optimization_profile())
+        if max_batch == 0:
+            static_shape = max_shape
+            profile[7 + i].set_shape("INPUT0", static_shape, static_shape, static_shape)
+            profile[7 + i].set_shape("INPUT1", static_shape, static_shape, static_shape)
+        else:
+            # Skipping alternate batch sizes for testing unsupported batches in L0_trt_dynamic_shape.
+            full_static_shape = [1 + (2 * i)] + max_shape
+            profile[7 + i].set_shape(
+                "INPUT0", full_static_shape, full_static_shape, full_static_shape
+            )
+            profile[7 + i].set_shape(
+                "INPUT1", full_static_shape, full_static_shape, full_static_shape
+            )
+        config.add_optimization_profile(profile[7 + i])
+
+    # Add profiles where each profile supports a specific batch size
+    if max_batch != 0:
+        for i in range(max_batch):
+            profile.append(builder.create_optimization_profile())
+            profile[10 + i].set_shape(
+                "INPUT0", [1 + i] + min_shape, [1 + i] + opt_shape, [1 + i] + max_shape
+            )
+            profile[10 + i].set_shape(
+                "INPUT1", [1 + i] + min_shape, [1 + i] + opt_shape, [1 + i] + max_shape
+            )
+            config.add_optimization_profile(profile[10 + i])
+
+    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
+    try:
+        engine_bytes = builder.build_serialized_network(network, config)
+    except AttributeError:
+        engine = builder.build_engine(network, config)
+        engine_bytes = engine.serialize()
+        del engine
+
+    # Use a different model name for different kinds of models
+    model_name = tu.get_model_name(
+        "plan_nobatch" if max_batch == 0 else "plan",
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+    )
+    if min_dim != 1 or max_dim != 32:
+        model_name = "{}-{}-{}".format(model_name, min_dim, max_dim)
+
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(model_version_dir + "/model.plan", "wb") as f:
+        f.write(engine_bytes)
+
+
+def create_plan_fixed_rf_modelfile(
+    models_dir,
+    max_batch,
+    model_version,
+    input_shape,
+    output0_shape,
+    output1_shape,
+    input_dtype,
+    output0_dtype,
+    output1_dtype,
+    swap,
+):
+    trt_input_dtype = np_to_trt_dtype(input_dtype)
+    trt_output0_dtype = np_to_trt_dtype(output0_dtype)
+    trt_output1_dtype = np_to_trt_dtype(output1_dtype)
+    trt_memory_format = trt.TensorFormat.LINEAR
 
-    c2_input_dtype = np_to_c2_dtype(input_dtype)
-    c2_output0_dtype = np_to_c2_dtype(output0_dtype)
-    c2_output1_dtype = np_to_c2_dtype(output1_dtype)
+    # Create the model
+    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    builder = trt.Builder(TRT_LOGGER)
+    network = builder.create_network()
+    in0 = network.add_input("INPUT0", trt_input_dtype, input_shape)
+    in1 = network.add_input("INPUT1", trt_input_dtype, input_shape)
+    add = network.add_elementwise(in0, in1, trt.ElementWiseOperation.SUM)
+    sub = network.add_elementwise(in0, in1, trt.ElementWiseOperation.SUB)
 
-    model_name = tu.get_model_name("netdef_nobatch" if max_batch == 0 else "netdef",
-                                   input_dtype, output0_dtype, output1_dtype)
+    out0 = add if not swap else sub
+    out1 = sub if not swap else add
+
+    out0.get_output(0).name = "OUTPUT0"
+    out1.get_output(0).name = "OUTPUT1"
+    network.mark_output(out0.get_output(0))
+    network.mark_output(out1.get_output(0))
+
+    out0.get_output(0).dtype = trt_output0_dtype
+    out1.get_output(0).dtype = trt_output1_dtype
+
+    in0.allowed_formats = 1 << int(trt_memory_format)
+    in1.allowed_formats = 1 << int(trt_memory_format)
+    out0.get_output(0).allowed_formats = 1 << int(trt_memory_format)
+    out1.get_output(0).allowed_formats = 1 << int(trt_memory_format)
+
+    if trt_input_dtype == trt.int8:
+        in0.dynamic_range = (-128.0, 127.0)
+        in1.dynamic_range = (-128.0, 127.0)
+    if trt_output0_dtype == trt.int8:
+        out0.get_output(0).dynamic_range = (-128.0, 127.0)
+    if trt_output1_dtype == trt.int8:
+        out1.get_output(0).dynamic_range = (-128.0, 127.0)
+
+    flags = 1 << int(trt.BuilderFlag.STRICT_TYPES)
+    datatype_set = set([trt_input_dtype, trt_output0_dtype, trt_output1_dtype])
+    for dt in datatype_set:
+        if dt == trt.int8:
+            flags |= 1 << int(trt.BuilderFlag.INT8)
+        elif dt == trt.float16:
+            flags |= 1 << int(trt.BuilderFlag.FP16)
+    config = builder.create_builder_config()
+    config.flags = flags
+    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
+    builder.max_batch_size = max(1, max_batch)
+    try:
+        engine_bytes = builder.build_serialized_network(network, config)
+    except AttributeError:
+        engine = builder.build_engine(network, config)
+        engine_bytes = engine.serialize()
+        del engine
+
+    model_name = tu.get_model_name(
+        "plan_nobatch" if max_batch == 0 else "plan",
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+    )
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(model_version_dir + "/model.plan", "wb") as f:
+        f.write(engine_bytes)
+
+
+def create_plan_fixed_modelfile(
+    models_dir,
+    max_batch,
+    model_version,
+    input_shape,
+    output0_shape,
+    output1_shape,
+    input_dtype,
+    output0_dtype,
+    output1_dtype,
+    swap,
+):
+    trt_input_dtype = np_to_trt_dtype(input_dtype)
+    trt_output0_dtype = np_to_trt_dtype(output0_dtype)
+    trt_output1_dtype = np_to_trt_dtype(output1_dtype)
 
     # Create the model
-    model = c2model_helper.ModelHelper(name=model_name)
-    add = model.net.Add(["INPUT0", "INPUT1"], "add")
-    sub = model.net.Sub(["INPUT0", "INPUT1"], "sub")
-    out0 = model.net.Cast(["add" if not swap else "sub"], "OUTPUT0", to=c2_output0_dtype)
-    out1 = model.net.Cast(["sub" if not swap else "add"], "OUTPUT1", to=c2_output1_dtype)
+    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    builder = trt.Builder(TRT_LOGGER)
+    network = builder.create_network()
+    in0 = network.add_input("INPUT0", trt_input_dtype, input_shape)
+    in1 = network.add_input("INPUT1", trt_input_dtype, input_shape)
+    add = network.add_elementwise(in0, in1, trt.ElementWiseOperation.SUM)
+    sub = network.add_elementwise(in0, in1, trt.ElementWiseOperation.SUB)
+
+    out0 = add if not swap else sub
+    out1 = sub if not swap else add
 
+    out0.get_output(0).name = "OUTPUT0"
+    out1.get_output(0).name = "OUTPUT1"
+    network.mark_output(out0.get_output(0))
+    network.mark_output(out1.get_output(0))
+
+    config = builder.create_builder_config()
+    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
+    builder.max_batch_size = max(1, max_batch)
+    try:
+        engine_bytes = builder.build_serialized_network(network, config)
+    except AttributeError:
+        engine = builder.build_engine(network, config)
+        engine_bytes = engine.serialize()
+        del engine
+    del network
+
+    model_name = tu.get_model_name(
+        "plan_nobatch" if max_batch == 0 else "plan",
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+    )
     model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
 
     try:
         os.makedirs(model_version_dir)
     except OSError as ex:
-        pass # ignore existing dir
+        pass  # ignore existing dir
+
+    with open(model_version_dir + "/model.plan", "wb") as f:
+        f.write(engine_bytes)
+
+
+def create_plan_modelfile(
+    models_dir,
+    max_batch,
+    model_version,
+    input_shape,
+    output0_shape,
+    output1_shape,
+    input_dtype,
+    output0_dtype,
+    output1_dtype,
+    swap=False,
+    min_dim=1,
+    max_dim=32,
+):
+    if not tu.validate_for_trt_model(
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+        input_shape,
+        output0_shape,
+        output1_shape,
+    ):
+        return
 
-    with open(model_version_dir + "/model.netdef", "wb") as f:
-        f.write(model.Proto().SerializeToString())
-    with open(model_version_dir + "/init_model.netdef", "wb") as f:
-        f.write(model.InitProto().SerializeToString())
+    if (
+        input_dtype == np.uint8
+        or output0_dtype == np.uint8
+        or output1_dtype == np.uint8
+    ):
+        # TRT uint8 cannot be used to represent quantized floating-point value yet
+        # EXPLICIT_BATCH network and conversion are required to create models
+        create_plan_dynamic_rf_modelfile(
+            models_dir,
+            max_batch,
+            model_version,
+            input_shape,
+            output0_shape,
+            output1_shape,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            swap,
+            min_dim,
+            max_dim,
+        )
+
+    elif (
+        input_dtype != np.float32
+        or output0_dtype != np.float32
+        or output1_dtype != np.float32
+    ):
+        if (
+            not tu.shape_is_fixed(input_shape)
+            or not tu.shape_is_fixed(output0_shape)
+            or not tu.shape_is_fixed(output1_shape)
+        ):
+            create_plan_dynamic_rf_modelfile(
+                models_dir,
+                max_batch,
+                model_version,
+                input_shape,
+                output0_shape,
+                output1_shape,
+                input_dtype,
+                output0_dtype,
+                output1_dtype,
+                swap,
+                min_dim,
+                max_dim,
+            )
+        else:
+            create_plan_fixed_rf_modelfile(
+                models_dir,
+                max_batch,
+                model_version,
+                input_shape,
+                output0_shape,
+                output1_shape,
+                input_dtype,
+                output0_dtype,
+                output1_dtype,
+                swap,
+            )
 
+    else:
+        if (
+            not tu.shape_is_fixed(input_shape)
+            or not tu.shape_is_fixed(output0_shape)
+            or not tu.shape_is_fixed(output1_shape)
+        ):
+            create_plan_dynamic_modelfile(
+                models_dir,
+                max_batch,
+                model_version,
+                input_shape,
+                output0_shape,
+                output1_shape,
+                input_dtype,
+                output0_dtype,
+                output1_dtype,
+                swap,
+                min_dim,
+                max_dim,
+            )
+        else:
+            create_plan_fixed_modelfile(
+                models_dir,
+                max_batch,
+                model_version,
+                input_shape,
+                output0_shape,
+                output1_shape,
+                input_dtype,
+                output0_dtype,
+                output1_dtype,
+                swap,
+            )
 
-def create_netdef_modelconfig(
-        models_dir, input_size, max_batch, model_version,
-        input_dtype, output0_dtype, output1_dtype, version_policy):
 
-    if not tu.validate_for_c2_model(input_dtype, output0_dtype, output1_dtype):
+def create_plan_modelconfig(
+    models_dir,
+    max_batch,
+    model_version,
+    input_shape,
+    output0_shape,
+    output1_shape,
+    input_dtype,
+    output0_dtype,
+    output1_dtype,
+    output0_label_cnt,
+    version_policy,
+    min_dim=1,
+    max_dim=32,
+):
+    if not tu.validate_for_trt_model(
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+        input_shape,
+        output0_shape,
+        output1_shape,
+    ):
         return
 
     # Unpack version policy
     version_policy_str = "{ latest { num_versions: 1 }}"
     if version_policy is not None:
         type, val = version_policy
-        if type == 'latest':
+        if type == "latest":
             version_policy_str = "{{ latest {{ num_versions: {} }}}}".format(val)
-        elif type == 'specific':
+        elif type == "specific":
             version_policy_str = "{{ specific {{ versions: {} }}}}".format(val)
         else:
             version_policy_str = "{ all { }}"
 
-    # Use a different model name for the non-batching variant
-    model_name = tu.get_model_name("netdef_nobatch" if max_batch == 0 else "netdef",
-                                   input_dtype, output0_dtype, output1_dtype)
+    # Use a different model name for different kinds of models
+    model_name = tu.get_model_name(
+        "plan_nobatch" if max_batch == 0 else "plan",
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+    )
+    if min_dim != 1 or max_dim != 32:
+        model_name = "{}-{}-{}".format(model_name, min_dim, max_dim)
+
     config_dir = models_dir + "/" + model_name
-    config = '''
+    if -1 in input_shape:
+        # Selects the sixth profile for FP32 datatype
+        # Note the min and max shapes of first and sixth
+        # profile are identical.
+        profile_index = 6 if input_dtype == np.float32 else 0
+        config = """
 name: "{}"
-platform: "caffe2_netdef"
+platform: "tensorrt_plan"
 max_batch_size: {}
 version_policy: {}
 input [
@@ -452,7 +1104,7 @@ def create_netdef_modelconfig(
     data_type: {}
     dims: [ {} ]
     label_filename: "output0_labels.txt"
-  }},
+   }},
   {{
     name: "OUTPUT1"
     data_type: {}
@@ -461,156 +1113,792 @@ def create_netdef_modelconfig(
 ]
 instance_group [
   {{
-    gpus: [ 0 ]
+      profile:"{}"
+  }}
+]
+""".format(
+            model_name,
+            max_batch,
+            version_policy_str,
+            np_to_model_dtype(input_dtype),
+            tu.shape_to_dims_str(input_shape),
+            np_to_model_dtype(input_dtype),
+            tu.shape_to_dims_str(input_shape),
+            np_to_model_dtype(output0_dtype),
+            tu.shape_to_dims_str(output0_shape),
+            np_to_model_dtype(output1_dtype),
+            tu.shape_to_dims_str(output1_shape),
+            profile_index,
+        )
+    else:
+        config = """
+name: "{}"
+platform: "tensorrt_plan"
+max_batch_size: {}
+version_policy: {}
+input [
+  {{
+    name: "INPUT0"
+    data_type: {}
+    dims: [ {} ]
+  }},
+  {{
+    name: "INPUT1"
+    data_type: {}
+    dims: [ {} ]
   }}
 ]
-'''.format(model_name, max_batch, version_policy_str,
-           np_to_model_dtype(input_dtype), input_size,
-           np_to_model_dtype(input_dtype), input_size,
-           np_to_model_dtype(output0_dtype), input_size,
-           np_to_model_dtype(output1_dtype), input_size)
+output [
+  {{
+    name: "OUTPUT0"
+    data_type: {}
+    dims: [ {} ]
+    label_filename: "output0_labels.txt"
+   }},
+  {{
+    name: "OUTPUT1"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+""".format(
+            model_name,
+            max_batch,
+            version_policy_str,
+            np_to_model_dtype(input_dtype),
+            tu.shape_to_dims_str(input_shape),
+            np_to_model_dtype(input_dtype),
+            tu.shape_to_dims_str(input_shape),
+            np_to_model_dtype(output0_dtype),
+            tu.shape_to_dims_str(output0_shape),
+            np_to_model_dtype(output1_dtype),
+            tu.shape_to_dims_str(output1_shape),
+        )
 
     try:
         os.makedirs(config_dir)
     except OSError as ex:
-        pass # ignore existing dir
+        pass  # ignore existing dir
 
     with open(config_dir + "/config.pbtxt", "w") as cfile:
         cfile.write(config)
 
     with open(config_dir + "/output0_labels.txt", "w") as lfile:
-        for l in range(input_size):
+        for l in range(output0_label_cnt):
             lfile.write("label" + str(l) + "\n")
 
 
-def create_plan_modelfile(
-        models_dir, input_size, max_batch, model_version,
-        input_dtype, output0_dtype, output1_dtype, swap=False):
-
-    if not tu.validate_for_trt_model(input_dtype, output0_dtype, output1_dtype):
+def create_onnx_modelfile(
+    models_dir,
+    max_batch,
+    model_version,
+    input_shape,
+    output0_shape,
+    output1_shape,
+    input_dtype,
+    output0_dtype,
+    output1_dtype,
+    swap=False,
+):
+    if not tu.validate_for_onnx_model(
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+        input_shape,
+        output0_shape,
+        output1_shape,
+    ):
         return
 
-    trt_input_dtype = np_to_trt_dtype(input_dtype)
-    trt_output0_dtype = np_to_trt_dtype(output0_dtype)
-    trt_output1_dtype = np_to_trt_dtype(output1_dtype)
+    onnx_input_dtype = np_to_onnx_dtype(input_dtype)
+    onnx_output0_dtype = np_to_onnx_dtype(output0_dtype)
+    onnx_output1_dtype = np_to_onnx_dtype(output1_dtype)
+
+    onnx_input_shape, idx = tu.shape_to_onnx_shape(input_shape, 0)
+    onnx_output0_shape, idx = tu.shape_to_onnx_shape(input_shape, idx)
+    onnx_output1_shape, idx = tu.shape_to_onnx_shape(input_shape, idx)
 
     # Create the model
-    G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.INFO)
-    builder = trt.infer.create_infer_builder(G_LOGGER)
-    network = builder.create_network()
-    in0 = network.add_input("INPUT0", trt_input_dtype, (input_size, 1, 1))
-    in1 = network.add_input("INPUT1", trt_input_dtype, (input_size, 1, 1))
-    add = network.add_element_wise(in0, in1, trt.infer.ElementWiseOperation.SUM)
-    sub = network.add_element_wise(in0, in1, trt.infer.ElementWiseOperation.SUB)
+    model_name = tu.get_model_name(
+        "onnx_nobatch" if max_batch == 0 else "onnx",
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+    )
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
 
-    out0 = add if not swap else sub
-    out1 = sub if not swap else add
+    batch_dim = [] if max_batch == 0 else [None]
+
+    in0 = onnx.helper.make_tensor_value_info(
+        "INPUT0", onnx_input_dtype, batch_dim + onnx_input_shape
+    )
+    in1 = onnx.helper.make_tensor_value_info(
+        "INPUT1", onnx_input_dtype, batch_dim + onnx_input_shape
+    )
+
+    out0 = onnx.helper.make_tensor_value_info(
+        "OUTPUT0", onnx_output0_dtype, batch_dim + onnx_output0_shape
+    )
+    out1 = onnx.helper.make_tensor_value_info(
+        "OUTPUT1", onnx_output1_dtype, batch_dim + onnx_output1_shape
+    )
+
+    internal_in0 = onnx.helper.make_node("Identity", ["INPUT0"], ["_INPUT0"])
+    internal_in1 = onnx.helper.make_node("Identity", ["INPUT1"], ["_INPUT1"])
+
+    # cast int8, int16 input to higher precision int as Onnx Add/Sub operator doesn't support those type
+    # Also casting String data type to int32
+    if (
+        (onnx_input_dtype == onnx.TensorProto.INT8)
+        or (onnx_input_dtype == onnx.TensorProto.INT16)
+        or (onnx_input_dtype == onnx.TensorProto.STRING)
+    ):
+        internal_in0 = onnx.helper.make_node(
+            "Cast", ["INPUT0"], ["_INPUT0"], to=onnx.TensorProto.INT32
+        )
+        internal_in1 = onnx.helper.make_node(
+            "Cast", ["INPUT1"], ["_INPUT1"], to=onnx.TensorProto.INT32
+        )
+
+    add = onnx.helper.make_node(
+        "Add", ["_INPUT0", "_INPUT1"], ["CAST0" if not swap else "CAST1"]
+    )
+    sub = onnx.helper.make_node(
+        "Sub", ["_INPUT0", "_INPUT1"], ["CAST1" if not swap else "CAST0"]
+    )
+    cast0 = onnx.helper.make_node("Cast", ["CAST0"], ["OUTPUT0"], to=onnx_output0_dtype)
+    cast1 = onnx.helper.make_node("Cast", ["CAST1"], ["OUTPUT1"], to=onnx_output1_dtype)
+
+    # Avoid cast from float16 to float16
+    # (bug in Onnx Runtime, cast from float16 to float16 will become cast from float16 to float32)
+    if onnx_input_dtype == onnx.TensorProto.FLOAT16:
+        if onnx_output0_dtype == onnx_input_dtype:
+            cast0 = onnx.helper.make_node("Identity", ["CAST0"], ["OUTPUT0"])
+        if onnx_output1_dtype == onnx_input_dtype:
+            cast1 = onnx.helper.make_node("Identity", ["CAST1"], ["OUTPUT1"])
+
+    onnx_nodes = [internal_in0, internal_in1, add, sub, cast0, cast1]
+    onnx_inputs = [in0, in1]
+    onnx_outputs = [out0, out1]
+
+    graph_proto = onnx.helper.make_graph(
+        onnx_nodes, model_name, onnx_inputs, onnx_outputs
+    )
+    if FLAGS.onnx_opset > 0:
+        model_opset = onnx.helper.make_operatorsetid("", FLAGS.onnx_opset)
+        model_def = onnx.helper.make_model(
+            graph_proto, producer_name="triton", opset_imports=[model_opset]
+        )
+    else:
+        model_def = onnx.helper.make_model(graph_proto, producer_name="triton")
 
-    out0.get_output(0).set_name("OUTPUT0")
-    out1.get_output(0).set_name("OUTPUT1")
-    network.mark_output(out0.get_output(0))
-    network.mark_output(out1.get_output(0))
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    onnx.save(model_def, model_version_dir + "/model.onnx")
+
+
+def create_onnx_modelconfig(
+    models_dir,
+    max_batch,
+    model_version,
+    input_shape,
+    output0_shape,
+    output1_shape,
+    input_dtype,
+    output0_dtype,
+    output1_dtype,
+    output0_label_cnt,
+    version_policy,
+):
+    if not tu.validate_for_onnx_model(
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+        input_shape,
+        output0_shape,
+        output1_shape,
+    ):
+        return
 
-    builder.set_max_batch_size(max(1, max_batch))
-    builder.set_max_workspace_size(1 << 20)
-    engine = builder.build_cuda_engine(network)
-    network.destroy()
+    # Use a different model name for the non-batching variant
+    model_name = tu.get_model_name(
+        "onnx_nobatch" if max_batch == 0 else "onnx",
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+    )
+    config_dir = models_dir + "/" + model_name
 
-    model_name = tu.get_model_name("plan_nobatch" if max_batch == 0 else "plan",
-                                   input_dtype, output0_dtype, output1_dtype)
-    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+    # [TODO] move create_general_modelconfig() out of emu as it is general
+    # enough for all backends to use
+    config = emu.create_general_modelconfig(
+        model_name,
+        "onnxruntime_onnx",
+        max_batch,
+        emu.repeat(input_dtype, 2),
+        emu.repeat(input_shape, 2),
+        emu.repeat(None, 2),
+        [output0_dtype, output1_dtype],
+        [output0_shape, output1_shape],
+        emu.repeat(None, 2),
+        ["output0_labels.txt", None],
+        version_policy=version_policy,
+        force_tensor_number_suffix=True,
+    )
 
     try:
-        os.makedirs(model_version_dir)
+        os.makedirs(config_dir)
     except OSError as ex:
-        pass # ignore existing dir
+        pass  # ignore existing dir
 
-    lengine = trt.lite.Engine(engine_stream=engine.serialize(),
-                              max_batch_size=max(1, max_batch))
-    lengine.save(model_version_dir + "/model.plan")
-    engine.destroy()
-    builder.destroy()
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+    with open(config_dir + "/output0_labels.txt", "w") as lfile:
+        for l in range(output0_label_cnt):
+            lfile.write("label" + str(l) + "\n")
 
 
-def create_plan_modelconfig(
-        models_dir, input_size, max_batch, model_version,
-        input_dtype, output0_dtype, output1_dtype, version_policy):
+def create_libtorch_modelfile(
+    models_dir,
+    max_batch,
+    model_version,
+    input_shape,
+    output0_shape,
+    output1_shape,
+    input_dtype,
+    output0_dtype,
+    output1_dtype,
+    swap=False,
+):
+    if not tu.validate_for_libtorch_model(
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+        input_shape,
+        output0_shape,
+        output1_shape,
+        max_batch,
+    ):
+        return
+
+    torch_output0_dtype = np_to_torch_dtype(output0_dtype)
+    torch_output1_dtype = np_to_torch_dtype(output1_dtype)
 
-    if not tu.validate_for_trt_model(input_dtype, output0_dtype, output1_dtype):
+    model_name = tu.get_model_name(
+        "libtorch_nobatch" if max_batch == 0 else "libtorch",
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+    )
+    # handle for -1 (when variable) since can't create tensor with shape of [-1]
+    input_shape = [abs(ips) for ips in input_shape]
+
+    # Create the model
+    if (
+        (input_dtype == np_dtype_string)
+        and (output0_dtype != np_dtype_string)
+        and (output1_dtype != np_dtype_string)
+    ):
+
+        class AddSubNet(nn.Module):
+            def __init__(self, *args):
+                self.output0_dtype = args[0][0]
+                self.output1_dtype = args[0][1]
+                self.swap = args[0][2]
+                super(AddSubNet, self).__init__()
+
+            def forward(self, INPUT0: List[str], INPUT1: List[str]):
+                input0_int = torch.tensor([int(i) for i in INPUT0])
+                input1_int = torch.tensor([int(i) for i in INPUT1])
+                op0 = (
+                    input0_int + input1_int
+                    if not self.swap
+                    else input0_int - input1_int
+                )
+                op1 = (
+                    input0_int - input1_int
+                    if not self.swap
+                    else input0_int + input1_int
+                )
+                return op0.to(self.output0_dtype), op1.to(self.output1_dtype)
+
+    elif (
+        (input_dtype == np_dtype_string)
+        and (output0_dtype == np_dtype_string)
+        and (output1_dtype == np_dtype_string)
+    ):
+
+        class AddSubNet(nn.Module):
+            def __init__(self, *args):
+                self.output0_dtype = args[0][0]
+                self.output1_dtype = args[0][1]
+                self.swap = args[0][2]
+                super(AddSubNet, self).__init__()
+
+            def forward(
+                self, INPUT0: List[str], INPUT1: List[str]
+            ) -> Tuple[List[str], List[str]]:
+                input0_int = torch.tensor([int(i) for i in INPUT0])
+                input1_int = torch.tensor([int(i) for i in INPUT1])
+                op0 = [
+                    str(i.item())
+                    for i in (
+                        input0_int + input1_int
+                        if not self.swap
+                        else input0_int - input1_int
+                    )
+                ]
+                op1 = [
+                    str(i.item())
+                    for i in (
+                        input0_int - input1_int
+                        if not self.swap
+                        else input0_int + input1_int
+                    )
+                ]
+                return op0, op1
+
+    elif (
+        (input_dtype == np_dtype_string)
+        and (output0_dtype == np_dtype_string)
+        and (output1_dtype != np_dtype_string)
+    ):
+
+        class AddSubNet(nn.Module):
+            def __init__(self, *args):
+                self.output0_dtype = args[0][0]
+                self.output1_dtype = args[0][1]
+                self.swap = args[0][2]
+                super(AddSubNet, self).__init__()
+
+            def forward(
+                self, INPUT0: List[str], INPUT1: List[str]
+            ) -> Tuple[List[str], torch.Tensor]:
+                input0_int = torch.tensor([int(i) for i in INPUT0])
+                input1_int = torch.tensor([int(i) for i in INPUT1])
+                op0 = [
+                    str(i.item())
+                    for i in (
+                        input0_int + input1_int
+                        if not self.swap
+                        else input0_int - input1_int
+                    )
+                ]
+                op1 = (
+                    input0_int - input1_int
+                    if not self.swap
+                    else input0_int + input1_int
+                ).to(self.output1_dtype)
+                return op0, op1
+
+    elif (
+        (input_dtype == np_dtype_string)
+        and (output0_dtype != np_dtype_string)
+        and (output1_dtype == np_dtype_string)
+    ):
+
+        class AddSubNet(nn.Module):
+            def __init__(self, *args):
+                self.output0_dtype = args[0][0]
+                self.output1_dtype = args[0][1]
+                self.swap = args[0][2]
+                super(AddSubNet, self).__init__()
+
+            def forward(
+                self, INPUT0: List[str], INPUT1: List[str]
+            ) -> Tuple[torch.Tensor, List[str]]:
+                input0_int = torch.tensor([int(i) for i in INPUT0])
+                input1_int = torch.tensor([int(i) for i in INPUT1])
+                op0 = (
+                    input0_int + input1_int
+                    if not self.swap
+                    else input0_int - input1_int
+                ).to(self.output0_dtype)
+                op1 = [
+                    str(i.item())
+                    for i in (
+                        input0_int - input1_int
+                        if not self.swap
+                        else input0_int + input1_int
+                    )
+                ]
+                return op0, op1
+
+    elif (
+        (input_dtype != np_dtype_string)
+        and (output0_dtype == np_dtype_string)
+        and (output1_dtype == np_dtype_string)
+    ):
+
+        class AddSubNet(nn.Module):
+            def __init__(self, *args):
+                self.output0_dtype = args[0][0]
+                self.output1_dtype = args[0][1]
+                self.swap = args[0][2]
+                super(AddSubNet, self).__init__()
+
+            def forward(self, INPUT0, INPUT1) -> Tuple[List[str], List[str]]:
+                op0 = [
+                    str(i.item())
+                    for i in (INPUT0 + INPUT1 if not self.swap else INPUT0 - INPUT1)
+                ]
+                op1 = [
+                    str(i.item())
+                    for i in (INPUT0 - INPUT1 if not self.swap else INPUT0 + INPUT1)
+                ]
+                return op0, op1
+
+    elif (
+        (input_dtype != np_dtype_string)
+        and (output0_dtype != np_dtype_string)
+        and (output1_dtype == np_dtype_string)
+    ):
+
+        class AddSubNet(nn.Module):
+            def __init__(self, *args):
+                self.output0_dtype = args[0][0]
+                self.output1_dtype = args[0][1]
+                self.swap = args[0][2]
+                super(AddSubNet, self).__init__()
+
+            def forward(self, INPUT0, INPUT1) -> Tuple[torch.Tensor, List[str]]:
+                op0 = (INPUT0 + INPUT1 if not self.swap else INPUT0 - INPUT1).to(
+                    self.output0_dtype
+                )
+                op1 = [
+                    str(i.item())
+                    for i in (INPUT0 - INPUT1 if not self.swap else INPUT0 + INPUT1)
+                ]
+                return op0, op1
+
+    elif (
+        (input_dtype != np_dtype_string)
+        and (output0_dtype == np_dtype_string)
+        and (output1_dtype != np_dtype_string)
+    ):
+
+        class AddSubNet(nn.Module):
+            def __init__(self, *args):
+                self.output0_dtype = args[0][0]
+                self.output1_dtype = args[0][1]
+                self.swap = args[0][2]
+                super(AddSubNet, self).__init__()
+
+            def forward(self, INPUT0, INPUT1) -> Tuple[List[str], torch.Tensor]:
+                op0 = [
+                    str(i.item())
+                    for i in (INPUT0 + INPUT1 if not self.swap else INPUT0 - INPUT1)
+                ]
+                op1 = (INPUT0 - INPUT1 if not self.swap else INPUT0 + INPUT1).to(
+                    self.output1_dtype
+                )
+                return op0, op1
+
+    else:
+
+        class AddSubNet(nn.Module):
+            def __init__(self, *args):
+                self.output0_dtype = args[0][0]
+                self.output1_dtype = args[0][1]
+                self.swap = args[0][2]
+                super(AddSubNet, self).__init__()
+
+            def forward(self, INPUT0, INPUT1):
+                op0 = (INPUT0 + INPUT1 if not self.swap else INPUT0 - INPUT1).to(
+                    self.output0_dtype
+                )
+                op1 = (INPUT0 - INPUT1 if not self.swap else INPUT0 + INPUT1).to(
+                    self.output1_dtype
+                )
+                return op0, op1
+
+    addSubModel = AddSubNet((torch_output0_dtype, torch_output1_dtype, swap))
+    traced = torch.jit.script(addSubModel)
+
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    traced.save(model_version_dir + "/model.pt")
+
+
+def create_libtorch_modelconfig(
+    models_dir,
+    max_batch,
+    model_version,
+    input_shape,
+    output0_shape,
+    output1_shape,
+    input_dtype,
+    output0_dtype,
+    output1_dtype,
+    output0_label_cnt,
+    version_policy,
+):
+    if not tu.validate_for_libtorch_model(
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+        input_shape,
+        output0_shape,
+        output1_shape,
+        max_batch,
+    ):
         return
 
     # Unpack version policy
     version_policy_str = "{ latest { num_versions: 1 }}"
     if version_policy is not None:
         type, val = version_policy
-        if type == 'latest':
+        if type == "latest":
             version_policy_str = "{{ latest {{ num_versions: {} }}}}".format(val)
-        elif type == 'specific':
+        elif type == "specific":
             version_policy_str = "{{ specific {{ versions: {} }}}}".format(val)
         else:
             version_policy_str = "{ all { }}"
 
     # Use a different model name for the non-batching variant
-    model_name = tu.get_model_name("plan_nobatch" if max_batch == 0 else "plan",
-                                   input_dtype, output0_dtype, output1_dtype)
+    model_name = tu.get_model_name(
+        "libtorch_nobatch" if max_batch == 0 else "libtorch",
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+    )
     config_dir = models_dir + "/" + model_name
-    config = '''
+    config = """
 name: "{}"
-platform: "tensorrt_plan"
+platform: "pytorch_libtorch"
 max_batch_size: {}
 version_policy: {}
 input [
   {{
     name: "INPUT0"
     data_type: {}
-    dims: [ {}, 1, 1 ]
+    dims: [ {} ]
   }},
   {{
     name: "INPUT1"
     data_type: {}
-    dims: [ {}, 1, 1 ]
+    dims: [ {} ]
   }}
 ]
 output [
   {{
-    name: "OUTPUT0"
+    name: "OUTPUT__0"
     data_type: {}
-    dims: [ {}, 1, 1 ]
+    dims: [ {} ]
     label_filename: "output0_labels.txt"
   }},
   {{
-    name: "OUTPUT1"
+    name: "OUTPUT__1"
     data_type: {}
-    dims: [ {}, 1, 1 ]
+    dims: [ {} ]
   }}
 ]
-instance_group [
+""".format(
+        model_name,
+        max_batch,
+        version_policy_str,
+        np_to_model_dtype(input_dtype),
+        tu.shape_to_dims_str(input_shape),
+        np_to_model_dtype(input_dtype),
+        tu.shape_to_dims_str(input_shape),
+        np_to_model_dtype(output0_dtype),
+        tu.shape_to_dims_str(output0_shape),
+        np_to_model_dtype(output1_dtype),
+        tu.shape_to_dims_str(output1_shape),
+    )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+    with open(config_dir + "/output0_labels.txt", "w") as lfile:
+        for l in range(output0_label_cnt):
+            lfile.write("label" + str(l) + "\n")
+
+
+def create_openvino_modelfile(
+    models_dir,
+    max_batch,
+    model_version,
+    input_shape,
+    output0_shape,
+    output1_shape,
+    input_dtype,
+    output0_dtype,
+    output1_dtype,
+    swap=False,
+):
+    batch_dim = () if max_batch == 0 else (max_batch,)
+    if not tu.validate_for_openvino_model(
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+        batch_dim + input_shape,
+        batch_dim + output0_shape,
+        batch_dim + output1_shape,
+    ):
+        return
+
+    # Create the model
+    model_name = tu.get_model_name(
+        "openvino_nobatch" if max_batch == 0 else "openvino",
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+    )
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    in0 = ng.parameter(shape=batch_dim + input_shape, dtype=input_dtype, name="INPUT0")
+    in1 = ng.parameter(shape=batch_dim + input_shape, dtype=input_dtype, name="INPUT1")
+
+    r0 = ng.add(in0, in1) if not swap else ng.subtract(in0, in1)
+    r1 = ng.subtract(in0, in1) if not swap else ng.add(in0, in1)
+
+    result0 = ng.reshape(r0, batch_dim + output0_shape, special_zero=False)
+    result1 = ng.reshape(r1, batch_dim + output1_shape, special_zero=False)
+
+    op0 = ng.convert(result0, destination_type=output0_dtype, name="OUTPUT0")
+    op1 = ng.convert(result1, destination_type=output1_dtype, name="OUTPUT1")
+
+    function = ng.impl.Function([op0, op1], [in0, in1], model_name)
+    ie_network = IENetwork(ng.impl.Function.to_capsule(function))
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    ie_network.serialize(
+        model_version_dir + "/model.xml", model_version_dir + "/model.bin"
+    )
+
+
+def create_openvino_modelconfig(
+    models_dir,
+    max_batch,
+    model_version,
+    input_shape,
+    output0_shape,
+    output1_shape,
+    input_dtype,
+    output0_dtype,
+    output1_dtype,
+    output0_label_cnt,
+    version_policy,
+):
+    batch_dim = () if max_batch == 0 else (max_batch,)
+    if not tu.validate_for_openvino_model(
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+        batch_dim + input_shape,
+        batch_dim + output0_shape,
+        batch_dim + output1_shape,
+    ):
+        return
+
+    # Unpack version policy
+    version_policy_str = "{ latest { num_versions: 1 }}"
+    if version_policy is not None:
+        type, val = version_policy
+        if type == "latest":
+            version_policy_str = "{{ latest {{ num_versions: {} }}}}".format(val)
+        elif type == "specific":
+            version_policy_str = "{{ specific {{ versions: {} }}}}".format(val)
+        else:
+            version_policy_str = "{ all { }}"
+
+    # Use a different model name for the non-batching variant
+    model_name = tu.get_model_name(
+        "openvino_nobatch" if max_batch == 0 else "openvino",
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+    )
+    config_dir = models_dir + "/" + model_name
+
+    # platform is empty and backend is 'openvino' for openvino model
+    config = """
+name: "{}"
+backend: "openvino"
+max_batch_size: {}
+version_policy: {}
+input [
+  {{
+    name: "INPUT0"
+    data_type: {}
+    dims: [ {} ]
+  }},
   {{
-    gpus: [ 0 ]
+    name: "INPUT1"
+    data_type: {}
+    dims: [ {} ]
   }}
 ]
-'''.format(model_name, max_batch, version_policy_str,
-           np_to_model_dtype(input_dtype), input_size,
-           np_to_model_dtype(input_dtype), input_size,
-           np_to_model_dtype(output0_dtype), input_size,
-           np_to_model_dtype(output1_dtype), input_size)
+output [
+  {{
+    name: "OUTPUT0"
+    data_type: {}
+    dims: [ {} ]
+    label_filename: "output0_labels.txt"
+   }},
+  {{
+    name: "OUTPUT1"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+""".format(
+        model_name,
+        max_batch,
+        version_policy_str,
+        np_to_model_dtype(input_dtype),
+        tu.shape_to_dims_str(input_shape),
+        np_to_model_dtype(input_dtype),
+        tu.shape_to_dims_str(input_shape),
+        np_to_model_dtype(output0_dtype),
+        tu.shape_to_dims_str(output0_shape),
+        np_to_model_dtype(output1_dtype),
+        tu.shape_to_dims_str(output1_shape),
+    )
 
     try:
         os.makedirs(config_dir)
     except OSError as ex:
-        pass # ignore existing dir
+        pass  # ignore existing dir
 
     with open(config_dir + "/config.pbtxt", "w") as cfile:
         cfile.write(config)
 
     with open(config_dir + "/output0_labels.txt", "w") as lfile:
-        for l in range(input_size):
+        for l in range(output0_label_cnt):
             lfile.write("label" + str(l) + "\n")
 
 
 def create_models(
-        models_dir, input_dtype, output0_dtype, output1_dtype, version_policy=None):
-    input_size = 16
+    models_dir,
+    input_dtype,
+    output0_dtype,
+    output1_dtype,
+    input_shape,
+    output0_shape,
+    output1_shape,
+    output0_label_cnt,
+    version_policy=None,
+):
     model_version = 1
 
     # Create two models, one that supports batching with a max-batch
@@ -618,128 +1906,919 @@ def create_models(
     if FLAGS.graphdef:
         # max-batch 8
         create_graphdef_modelconfig(
-            models_dir, input_size, 8, model_version,
-            input_dtype, output0_dtype, output1_dtype, version_policy)
+            models_dir,
+            8,
+            model_version,
+            input_shape,
+            output0_shape,
+            output1_shape,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            output0_label_cnt,
+            version_policy,
+        )
         create_graphdef_modelfile(
-            models_dir, input_size, 8, model_version,
-            input_dtype, output0_dtype, output1_dtype)
+            models_dir,
+            8,
+            model_version,
+            input_shape,
+            output0_shape,
+            output1_shape,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+        )
         # max-batch 0
         create_graphdef_modelconfig(
-            models_dir, input_size, 0, model_version,
-            input_dtype, output0_dtype, output1_dtype, version_policy)
+            models_dir,
+            0,
+            model_version,
+            input_shape,
+            output0_shape,
+            output1_shape,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            output0_label_cnt,
+            version_policy,
+        )
         create_graphdef_modelfile(
-            models_dir, input_size, 0, model_version,
-            input_dtype, output0_dtype, output1_dtype)
+            models_dir,
+            0,
+            model_version,
+            input_shape,
+            output0_shape,
+            output1_shape,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+        )
 
     if FLAGS.savedmodel:
         # max-batch 8
         create_savedmodel_modelconfig(
-            models_dir, input_size, 8, model_version,
-            input_dtype, output0_dtype, output1_dtype, version_policy)
+            models_dir,
+            8,
+            model_version,
+            input_shape,
+            output0_shape,
+            output1_shape,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            output0_label_cnt,
+            version_policy,
+        )
         create_savedmodel_modelfile(
-            models_dir, input_size, 8, model_version,
-            input_dtype, output0_dtype, output1_dtype)
+            models_dir,
+            8,
+            model_version,
+            input_shape,
+            output0_shape,
+            output1_shape,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+        )
         # max-batch 0
         create_savedmodel_modelconfig(
-            models_dir, input_size, 0, model_version,
-            input_dtype, output0_dtype, output1_dtype, version_policy)
+            models_dir,
+            0,
+            model_version,
+            input_shape,
+            output0_shape,
+            output1_shape,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            output0_label_cnt,
+            version_policy,
+        )
         create_savedmodel_modelfile(
-            models_dir, input_size, 0, model_version,
-            input_dtype, output0_dtype, output1_dtype)
-
-    if FLAGS.netdef:
-        # max-batch 8
-        create_netdef_modelconfig(
-            models_dir, input_size, 8, model_version,
-            input_dtype, output0_dtype, output1_dtype, version_policy)
-        create_netdef_modelfile(
-            models_dir, input_size, 8, model_version,
-            input_dtype, output0_dtype, output1_dtype)
-        # max-batch 0
-        create_netdef_modelconfig(
-            models_dir, input_size, 0, model_version,
-            input_dtype, output0_dtype, output1_dtype, version_policy)
-        create_netdef_modelfile(
-            models_dir, input_size, 0, model_version,
-            input_dtype, output0_dtype, output1_dtype)
+            models_dir,
+            0,
+            model_version,
+            input_shape,
+            output0_shape,
+            output1_shape,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+        )
 
     if FLAGS.tensorrt:
         # max-batch 8
+        suffix = ()
+        if (
+            input_dtype == np.int8
+            or output0_dtype == np.int8
+            or output1_dtype == np.int8
+        ):
+            suffix = (1, 1)
         create_plan_modelconfig(
-            models_dir, input_size, 8, model_version,
-            input_dtype, output0_dtype, output1_dtype, version_policy)
+            models_dir,
+            8,
+            model_version,
+            input_shape + suffix,
+            output0_shape + suffix,
+            output1_shape + suffix,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            output0_label_cnt,
+            version_policy,
+        )
         create_plan_modelfile(
-            models_dir, input_size, 8, model_version,
-            input_dtype, output0_dtype, output1_dtype)
+            models_dir,
+            8,
+            model_version,
+            input_shape + suffix,
+            output0_shape + suffix,
+            output1_shape + suffix,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+        )
         # max-batch 0
         create_plan_modelconfig(
-            models_dir, input_size, 0, model_version,
-            input_dtype, output0_dtype, output1_dtype, version_policy)
+            models_dir,
+            0,
+            model_version,
+            input_shape + suffix,
+            output0_shape + suffix,
+            output1_shape + suffix,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            output0_label_cnt,
+            version_policy,
+        )
         create_plan_modelfile(
-            models_dir, input_size, 0, model_version,
-            input_dtype, output0_dtype, output1_dtype)
-
-
-if __name__ == '__main__':
+            models_dir,
+            0,
+            model_version,
+            input_shape + suffix,
+            output0_shape + suffix,
+            output1_shape + suffix,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+        )
+
+        if -1 in input_shape:
+            # models for testing optimization profiles
+            create_plan_modelconfig(
+                models_dir,
+                8,
+                model_version,
+                input_shape + suffix,
+                output0_shape + suffix,
+                output1_shape + suffix,
+                input_dtype,
+                output0_dtype,
+                output1_dtype,
+                output0_label_cnt,
+                version_policy,
+                min_dim=4,
+                max_dim=32,
+            )
+            create_plan_modelfile(
+                models_dir,
+                8,
+                model_version,
+                input_shape + suffix,
+                output0_shape + suffix,
+                output1_shape + suffix,
+                input_dtype,
+                output0_dtype,
+                output1_dtype,
+                min_dim=4,
+                max_dim=32,
+            )
+
+    if FLAGS.onnx:
+        # max-batch 8
+        create_onnx_modelconfig(
+            models_dir,
+            8,
+            model_version,
+            input_shape,
+            output0_shape,
+            output1_shape,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            output0_label_cnt,
+            version_policy,
+        )
+        create_onnx_modelfile(
+            models_dir,
+            8,
+            model_version,
+            input_shape,
+            output0_shape,
+            output1_shape,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+        )
+        # max-batch 0
+        create_onnx_modelconfig(
+            models_dir,
+            0,
+            model_version,
+            input_shape,
+            output0_shape,
+            output1_shape,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            output0_label_cnt,
+            version_policy,
+        )
+        create_onnx_modelfile(
+            models_dir,
+            0,
+            model_version,
+            input_shape,
+            output0_shape,
+            output1_shape,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+        )
+
+    if FLAGS.libtorch:
+        # max-batch 8
+        create_libtorch_modelconfig(
+            models_dir,
+            8,
+            model_version,
+            input_shape,
+            output0_shape,
+            output1_shape,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            output0_label_cnt,
+            version_policy,
+        )
+        create_libtorch_modelfile(
+            models_dir,
+            8,
+            model_version,
+            input_shape,
+            output0_shape,
+            output1_shape,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+        )
+        # max-batch 0
+        create_libtorch_modelconfig(
+            models_dir,
+            0,
+            model_version,
+            input_shape,
+            output0_shape,
+            output1_shape,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            output0_label_cnt,
+            version_policy,
+        )
+        create_libtorch_modelfile(
+            models_dir,
+            0,
+            model_version,
+            input_shape,
+            output0_shape,
+            output1_shape,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+        )
+
+    if FLAGS.openvino:
+        # max-batch 8
+        create_openvino_modelconfig(
+            models_dir,
+            8,
+            model_version,
+            input_shape,
+            output0_shape,
+            output1_shape,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            output0_label_cnt,
+            version_policy,
+        )
+        create_openvino_modelfile(
+            models_dir,
+            8,
+            model_version,
+            input_shape,
+            output0_shape,
+            output1_shape,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+        )
+        # max-batch 0
+        create_openvino_modelconfig(
+            models_dir,
+            0,
+            model_version,
+            input_shape,
+            output0_shape,
+            output1_shape,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            output0_label_cnt,
+            version_policy,
+        )
+        create_openvino_modelfile(
+            models_dir,
+            0,
+            model_version,
+            input_shape,
+            output0_shape,
+            output1_shape,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+        )
+
+    if FLAGS.ensemble:
+        for pair in emu.platform_types_and_validation():
+            if not pair[1](
+                input_dtype,
+                output0_dtype,
+                output1_dtype,
+                input_shape,
+                output0_shape,
+                output1_shape,
+            ):
+                continue
+
+            config_input_shape = input_shape
+            config_output0_shape = output0_shape
+            config_output1_shape = output1_shape
+            if pair[0] == "plan":
+                if len(input_shape) == 1 and input_dtype == np.int8:
+                    config_input_shape = (input_shape[0], 1, 1)
+                if len(output0_shape) == 1 and output0_dtype == np.int8:
+                    config_output0_shape = (output0_shape[0], 1, 1)
+                if len(output1_shape) == 1 and output1_dtype == np.int8:
+                    config_output1_shape = (output1_shape[0], 1, 1)
+
+            # max-batch 0
+            emu.create_ensemble_modelconfig(
+                pair[0],
+                models_dir,
+                0,
+                model_version,
+                config_input_shape,
+                config_output0_shape,
+                config_output1_shape,
+                input_dtype,
+                output0_dtype,
+                output1_dtype,
+                output0_label_cnt,
+                version_policy,
+            )
+            emu.create_ensemble_modelfile(
+                pair[0],
+                models_dir,
+                0,
+                model_version,
+                config_input_shape,
+                config_output0_shape,
+                config_output1_shape,
+                input_dtype,
+                output0_dtype,
+                output1_dtype,
+            )
+
+            # max-batch 8 (Skip for PyTorch models with String I/O)
+            if (pair[0] == "libtorch") and not pair[1](
+                input_dtype,
+                output0_dtype,
+                output1_dtype,
+                input_shape,
+                output0_shape,
+                output1_shape,
+                8,
+            ):
+                continue
+
+            emu.create_ensemble_modelconfig(
+                pair[0],
+                models_dir,
+                8,
+                model_version,
+                config_input_shape,
+                config_output0_shape,
+                config_output1_shape,
+                input_dtype,
+                output0_dtype,
+                output1_dtype,
+                output0_label_cnt,
+                version_policy,
+            )
+            emu.create_ensemble_modelfile(
+                pair[0],
+                models_dir,
+                8,
+                model_version,
+                config_input_shape,
+                config_output0_shape,
+                config_output1_shape,
+                input_dtype,
+                output0_dtype,
+                output1_dtype,
+            )
+
+
+def create_fixed_models(
+    models_dir, input_dtype, output0_dtype, output1_dtype, version_policy=None
+):
+    input_size = 16
+    create_models(
+        models_dir,
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+        (input_size,),
+        (input_size,),
+        (input_size,),
+        input_size,
+        version_policy,
+    )
+
+
+if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument('--models_dir', type=str, required=True, help='Top-level model directory')
-    parser.add_argument('--graphdef', type=bool, required=False, help='Generate GraphDef models')
-    parser.add_argument('--savedmodel', type=bool, required=False, help='Generate SavedModel models')
-    parser.add_argument('--netdef', type=bool, required=False, help='Generate NetDef models')
-    parser.add_argument('--tensorrt', type=bool, required=False, help='Generate TensorRT PLAN models')
+    parser.add_argument(
+        "--models_dir", type=str, required=True, help="Top-level model directory"
+    )
+    parser.add_argument(
+        "--graphdef",
+        required=False,
+        action="store_true",
+        help="Generate GraphDef models",
+    )
+    parser.add_argument(
+        "--savedmodel",
+        required=False,
+        action="store_true",
+        help="Generate SavedModel models",
+    )
+    parser.add_argument(
+        "--tensorrt",
+        required=False,
+        action="store_true",
+        help="Generate TensorRT PLAN models",
+    )
+    parser.add_argument(
+        "--onnx",
+        required=False,
+        action="store_true",
+        help="Generate Onnx Runtime Onnx models",
+    )
+    parser.add_argument(
+        "--onnx_opset",
+        type=int,
+        required=False,
+        default=0,
+        help="Opset used for Onnx models. Default is to use ONNXRT default",
+    )
+    parser.add_argument(
+        "--libtorch",
+        required=False,
+        action="store_true",
+        help="Generate Pytorch LibTorch models",
+    )
+    parser.add_argument(
+        "--openvino",
+        required=False,
+        action="store_true",
+        help="Generate Openvino models",
+    )
+    parser.add_argument(
+        "--variable",
+        required=False,
+        action="store_true",
+        help="Used variable-shape tensors for input/output",
+    )
+    parser.add_argument(
+        "--ensemble",
+        required=False,
+        action="store_true",
+        help="Generate ensemble models against the models"
+        + " in all platforms. Note that the models generated"
+        + " are not completed.",
+    )
     FLAGS, unparsed = parser.parse_known_args()
 
-    if FLAGS.netdef:
-        from caffe2.python import core as c2core
-        from caffe2.python import model_helper as c2model_helper
     if FLAGS.graphdef or FLAGS.savedmodel:
         import tensorflow as tf
-        from tensorflow.python.framework import graph_io, graph_util
+        from tensorflow.python.framework import graph_io
+
+        tf.compat.v1.disable_eager_execution()
     if FLAGS.tensorrt:
-        import tensorrt.legacy as trt
+        import tensorrt as trt
+    if FLAGS.onnx:
+        import onnx
+    if FLAGS.libtorch:
+        import torch
+        from torch import nn
+    if FLAGS.openvino:
+        from openvino.inference_engine import IENetwork
+        import ngraph as ng
 
     import test_util as tu
 
-    create_models(FLAGS.models_dir, np.int8, np.int8, np.int8, ('latest', 1))
-    create_models(FLAGS.models_dir, np.int16, np.int16, np.int16, ('latest', 2))
-    create_models(FLAGS.models_dir, np.int32, np.int32, np.int32, ('all', None))
-    create_models(FLAGS.models_dir, np.int64, np.int64, np.int64)
-    create_models(FLAGS.models_dir, np.float16, np.float16, np.float16, ('specific', [1,]))
-    create_models(FLAGS.models_dir, np.float32, np.float32, np.float32, ('specific', [1, 3]))
-    create_models(FLAGS.models_dir, np.float16, np.float32, np.float32)
-    create_models(FLAGS.models_dir, np.int32, np.int8, np.int8)
-    create_models(FLAGS.models_dir, np.int8, np.int32, np.int32)
-    create_models(FLAGS.models_dir, np.int32, np.int8, np.int16)
-    create_models(FLAGS.models_dir, np.int32, np.float32, np.float32)
-    create_models(FLAGS.models_dir, np.float32, np.int32, np.int32)
-    create_models(FLAGS.models_dir, np.int32, np.float16, np.int16)
-
-    # Make multiple versions of some models for version testing (they
-    # use different version policies when created above)
-    if FLAGS.graphdef:
-        for vt in [np.float16, np.float32, np.int8, np.int16, np.int32]:
-            create_graphdef_modelfile(FLAGS.models_dir, 16, 8, 2, vt, vt, vt, swap=True)
-            create_graphdef_modelfile(FLAGS.models_dir, 16, 8, 3, vt, vt, vt, swap=True)
-            create_graphdef_modelfile(FLAGS.models_dir, 16, 0, 2, vt, vt, vt, swap=True)
-            create_graphdef_modelfile(FLAGS.models_dir, 16, 0, 3, vt, vt, vt, swap=True)
-
-    if FLAGS.savedmodel:
-        for vt in [np.float16, np.float32, np.int8, np.int16, np.int32]:
-            create_savedmodel_modelfile(FLAGS.models_dir, 16, 8, 2, vt, vt, vt, swap=True)
-            create_savedmodel_modelfile(FLAGS.models_dir, 16, 8, 3, vt, vt, vt, swap=True)
-            create_savedmodel_modelfile(FLAGS.models_dir, 16, 0, 2, vt, vt, vt, swap=True)
-            create_savedmodel_modelfile(FLAGS.models_dir, 16, 0, 3, vt, vt, vt, swap=True)
-
-    if FLAGS.netdef:
-        for vt in [np.float32, np.int32]:
-            create_netdef_modelfile(FLAGS.models_dir, 16, 8, 2, vt, vt, vt, swap=True)
-            create_netdef_modelfile(FLAGS.models_dir, 16, 8, 3, vt, vt, vt, swap=True)
-            create_netdef_modelfile(FLAGS.models_dir, 16, 0, 2, vt, vt, vt, swap=True)
-            create_netdef_modelfile(FLAGS.models_dir, 16, 0, 3, vt, vt, vt, swap=True)
-
-    if FLAGS.tensorrt:
-        for vt in [np.float32,]:
-            create_plan_modelfile(FLAGS.models_dir, 16, 8, 2, vt, vt, vt, swap=True)
-            create_plan_modelfile(FLAGS.models_dir, 16, 8, 3, vt, vt, vt, swap=True)
-            create_plan_modelfile(FLAGS.models_dir, 16, 0, 2, vt, vt, vt, swap=True)
-            create_plan_modelfile(FLAGS.models_dir, 16, 0, 3, vt, vt, vt, swap=True)
+    # Tests with models that accept fixed-shape input/output tensors
+    if not FLAGS.variable:
+        create_fixed_models(
+            FLAGS.models_dir, np.uint8, np.uint8, np.uint8, ("latest", 3)
+        )
+        create_fixed_models(FLAGS.models_dir, np.int8, np.int8, np.int8, ("latest", 1))
+        create_fixed_models(
+            FLAGS.models_dir, np.int16, np.int16, np.int16, ("latest", 2)
+        )
+        create_fixed_models(
+            FLAGS.models_dir, np.int32, np.int32, np.int32, ("all", None)
+        )
+        create_fixed_models(FLAGS.models_dir, np.int64, np.int64, np.int64)
+        create_fixed_models(
+            FLAGS.models_dir,
+            np.float16,
+            np.float16,
+            np.float16,
+            (
+                "specific",
+                [
+                    1,
+                ],
+            ),
+        )
+        create_fixed_models(
+            FLAGS.models_dir, np.float32, np.float32, np.float32, ("specific", [1, 3])
+        )
+        create_fixed_models(FLAGS.models_dir, np.float16, np.float32, np.float32)
+        create_fixed_models(FLAGS.models_dir, np.int32, np.int8, np.int8)
+        create_fixed_models(FLAGS.models_dir, np.int8, np.int32, np.int32)
+        create_fixed_models(FLAGS.models_dir, np.int32, np.int8, np.int16)
+        create_fixed_models(FLAGS.models_dir, np.float32, np.uint8, np.uint8)
+        create_fixed_models(FLAGS.models_dir, np.uint8, np.float32, np.float32)
+        create_fixed_models(FLAGS.models_dir, np.float32, np.uint8, np.float16)
+        create_fixed_models(FLAGS.models_dir, np.int32, np.float32, np.float32)
+        create_fixed_models(FLAGS.models_dir, np.float32, np.int32, np.int32)
+        create_fixed_models(FLAGS.models_dir, np.int32, np.float16, np.int16)
+
+        create_fixed_models(FLAGS.models_dir, np_dtype_string, np.int32, np.int32)
+        create_fixed_models(
+            FLAGS.models_dir, np_dtype_string, np_dtype_string, np_dtype_string
+        )
+        create_fixed_models(
+            FLAGS.models_dir, np_dtype_string, np.int32, np_dtype_string
+        )
+        create_fixed_models(
+            FLAGS.models_dir, np_dtype_string, np_dtype_string, np.int32
+        )
+        create_fixed_models(
+            FLAGS.models_dir, np.int32, np_dtype_string, np_dtype_string
+        )
+        create_fixed_models(FLAGS.models_dir, np.int32, np.int32, np_dtype_string)
+        create_fixed_models(FLAGS.models_dir, np.int32, np_dtype_string, np.int32)
+
+        # Make multiple versions of some models for version testing
+        # (they use different version policies when created above)
+        if FLAGS.graphdef:
+            for vt in [np.float16, np.float32, np.int8, np.int16, np.int32]:
+                create_graphdef_modelfile(
+                    FLAGS.models_dir, 8, 2, (16,), (16,), (16,), vt, vt, vt, swap=True
+                )
+                create_graphdef_modelfile(
+                    FLAGS.models_dir, 8, 3, (16,), (16,), (16,), vt, vt, vt, swap=True
+                )
+                create_graphdef_modelfile(
+                    FLAGS.models_dir, 0, 2, (16,), (16,), (16,), vt, vt, vt, swap=True
+                )
+                create_graphdef_modelfile(
+                    FLAGS.models_dir, 0, 3, (16,), (16,), (16,), vt, vt, vt, swap=True
+                )
+
+        if FLAGS.savedmodel:
+            for vt in [np.float16, np.float32, np.int8, np.int16, np.int32]:
+                create_savedmodel_modelfile(
+                    FLAGS.models_dir, 8, 2, (16,), (16,), (16,), vt, vt, vt, swap=True
+                )
+                create_savedmodel_modelfile(
+                    FLAGS.models_dir, 8, 3, (16,), (16,), (16,), vt, vt, vt, swap=True
+                )
+                create_savedmodel_modelfile(
+                    FLAGS.models_dir, 0, 2, (16,), (16,), (16,), vt, vt, vt, swap=True
+                )
+                create_savedmodel_modelfile(
+                    FLAGS.models_dir, 0, 3, (16,), (16,), (16,), vt, vt, vt, swap=True
+                )
+
+        if FLAGS.tensorrt:
+            for vt in [np.float32, np.float16, np.int32, np.uint8]:
+                create_plan_modelfile(
+                    FLAGS.models_dir, 8, 2, (16,), (16,), (16,), vt, vt, vt, swap=True
+                )
+                create_plan_modelfile(
+                    FLAGS.models_dir, 8, 3, (16,), (16,), (16,), vt, vt, vt, swap=True
+                )
+                create_plan_modelfile(
+                    FLAGS.models_dir, 0, 2, (16,), (16,), (16,), vt, vt, vt, swap=True
+                )
+                create_plan_modelfile(
+                    FLAGS.models_dir, 0, 3, (16,), (16,), (16,), vt, vt, vt, swap=True
+                )
+
+            vt = np.int8
+            # handle INT8 separately as it doesn't allow 1d tensors
+            create_plan_modelfile(
+                FLAGS.models_dir,
+                8,
+                2,
+                (16, 1, 1),
+                (16, 1, 1),
+                (16, 1, 1),
+                vt,
+                vt,
+                vt,
+                swap=True,
+            )
+            create_plan_modelfile(
+                FLAGS.models_dir,
+                8,
+                3,
+                (16, 1, 1),
+                (16, 1, 1),
+                (16, 1, 1),
+                vt,
+                vt,
+                vt,
+                swap=True,
+            )
+            create_plan_modelfile(
+                FLAGS.models_dir,
+                0,
+                2,
+                (16, 1, 1),
+                (16, 1, 1),
+                (16, 1, 1),
+                vt,
+                vt,
+                vt,
+                swap=True,
+            )
+            create_plan_modelfile(
+                FLAGS.models_dir,
+                0,
+                3,
+                (16, 1, 1),
+                (16, 1, 1),
+                (16, 1, 1),
+                vt,
+                vt,
+                vt,
+                swap=True,
+            )
+
+        if FLAGS.onnx:
+            for vt in [np.float16, np.float32, np.int8, np.int16, np.int32]:
+                create_onnx_modelfile(
+                    FLAGS.models_dir, 8, 2, (16,), (16,), (16,), vt, vt, vt, swap=True
+                )
+                create_onnx_modelfile(
+                    FLAGS.models_dir, 8, 3, (16,), (16,), (16,), vt, vt, vt, swap=True
+                )
+                create_onnx_modelfile(
+                    FLAGS.models_dir, 0, 2, (16,), (16,), (16,), vt, vt, vt, swap=True
+                )
+                create_onnx_modelfile(
+                    FLAGS.models_dir, 0, 3, (16,), (16,), (16,), vt, vt, vt, swap=True
+                )
+        if FLAGS.libtorch:
+            for vt in [np.float32, np.int32, np.int16, np.int8]:
+                create_libtorch_modelfile(
+                    FLAGS.models_dir, 8, 2, (16,), (16,), (16,), vt, vt, vt, swap=True
+                )
+                create_libtorch_modelfile(
+                    FLAGS.models_dir, 8, 3, (16,), (16,), (16,), vt, vt, vt, swap=True
+                )
+                create_libtorch_modelfile(
+                    FLAGS.models_dir, 0, 2, (16,), (16,), (16,), vt, vt, vt, swap=True
+                )
+                create_libtorch_modelfile(
+                    FLAGS.models_dir, 0, 3, (16,), (16,), (16,), vt, vt, vt, swap=True
+                )
+        if FLAGS.openvino:
+            for vt in [np.float16, np.float32, np.int8, np.int16, np.int32]:
+                create_openvino_modelfile(
+                    FLAGS.models_dir, 8, 2, (16,), (16,), (16,), vt, vt, vt, swap=True
+                )
+                create_openvino_modelfile(
+                    FLAGS.models_dir, 8, 3, (16,), (16,), (16,), vt, vt, vt, swap=True
+                )
+                create_openvino_modelfile(
+                    FLAGS.models_dir, 0, 2, (16,), (16,), (16,), vt, vt, vt, swap=True
+                )
+                create_openvino_modelfile(
+                    FLAGS.models_dir, 0, 3, (16,), (16,), (16,), vt, vt, vt, swap=True
+                )
+
+        if FLAGS.ensemble:
+            for pair in emu.platform_types_and_validation():
+                for vt in [np.float16, np.float32, np.int8, np.int16, np.int32]:
+                    shape = (
+                        (16, 1, 1) if (pair[0] == "plan" and vt == np.int8) else (16,)
+                    )
+                    if not pair[1](vt, vt, vt, shape, shape, shape):
+                        continue
+                    emu.create_ensemble_modelfile(
+                        pair[0],
+                        FLAGS.models_dir,
+                        8,
+                        2,
+                        shape,
+                        shape,
+                        shape,
+                        vt,
+                        vt,
+                        vt,
+                        swap=True,
+                    )
+                    emu.create_ensemble_modelfile(
+                        pair[0],
+                        FLAGS.models_dir,
+                        8,
+                        3,
+                        shape,
+                        shape,
+                        shape,
+                        vt,
+                        vt,
+                        vt,
+                        swap=True,
+                    )
+                    emu.create_ensemble_modelfile(
+                        pair[0],
+                        FLAGS.models_dir,
+                        0,
+                        2,
+                        shape,
+                        shape,
+                        shape,
+                        vt,
+                        vt,
+                        vt,
+                        swap=True,
+                    )
+                    emu.create_ensemble_modelfile(
+                        pair[0],
+                        FLAGS.models_dir,
+                        0,
+                        3,
+                        shape,
+                        shape,
+                        shape,
+                        vt,
+                        vt,
+                        vt,
+                        swap=True,
+                    )
+
+    # Tests with models that accept variable-shape input/output tensors
+    if FLAGS.variable:
+        create_models(
+            FLAGS.models_dir,
+            np.float32,
+            np.float32,
+            np.float32,
+            (-1,),
+            (-1,),
+            (-1,),
+            16,
+        )
+        create_models(
+            FLAGS.models_dir,
+            np.float32,
+            np.int32,
+            np.int32,
+            (-1, -1),
+            (-1, -1),
+            (-1, -1),
+            16,
+        )
+        create_models(
+            FLAGS.models_dir,
+            np.float32,
+            np.int64,
+            np.int64,
+            (8, -1),
+            (8, -1),
+            (8, -1),
+            32,
+        )
+        create_models(
+            FLAGS.models_dir,
+            np.float32,
+            np.int32,
+            np.int64,
+            (-1, 8, -1),
+            (-1, 8, -1),
+            (-1, 8, -1),
+            32,
+        )
+        create_models(
+            FLAGS.models_dir, np.float32, np.float32, np.int32, (-1,), (-1,), (-1,), 16
+        )
+        create_models(
+            FLAGS.models_dir,
+            np.int32,
+            np.int32,
+            np.int32,
+            (-1, -1),
+            (-1, -1),
+            (-1, -1),
+            16,
+        )
+        create_models(
+            FLAGS.models_dir,
+            np.int32,
+            np.int32,
+            np.float32,
+            (-1, 8, -1),
+            (-1, 8, -1),
+            (-1, 8, -1),
+            32,
+        )
+
+        create_models(
+            FLAGS.models_dir,
+            np_dtype_string,
+            np_dtype_string,
+            np_dtype_string,
+            (-1,),
+            (-1,),
+            (-1,),
+            16,
+        )
+        create_models(
+            FLAGS.models_dir,
+            np_dtype_string,
+            np.int32,
+            np.int32,
+            (-1, -1),
+            (-1, -1),
+            (-1, -1),
+            16,
+        )
+        create_models(
+            FLAGS.models_dir,
+            np_dtype_string,
+            np_dtype_string,
+            np.int32,
+            (8, -1),
+            (8, -1),
+            (8, -1),
+            32,
+        )
+        create_models(
+            FLAGS.models_dir,
+            np_dtype_string,
+            np.int32,
+            np_dtype_string,
+            (-1, 8, -1),
+            (-1, 8, -1),
+            (-1, 8, -1),
+            32,
+        )
+
+    if FLAGS.ensemble:
+        # Create utility models used in ensemble
+        # nop (only creates model config, should add model file before use)
+        model_dtypes = ["TYPE_BOOL", "TYPE_STRING"]
+        for s in [8, 16, 32, 64]:
+            for t in ["INT", "UINT", "FP"]:
+                if t == "FP" and s == 8:
+                    continue
+                model_dtypes.append("TYPE_{}{}".format(t, s))
+
+        for model_dtype in model_dtypes:
+            # Use variable size to handle all shape. Note: piping variable size output
+            # to fixed size model is not safe but doable
+            for model_shape in [(-1,), (-1, -1), (-1, -1, -1)]:
+                emu.create_nop_modelconfig(FLAGS.models_dir, model_shape, model_dtype)
diff --git a/qa/common/gen_qa_noshape_models.py b/qa/common/gen_qa_noshape_models.py
new file mode 100755
index 0000000000..af26017495
--- /dev/null
+++ b/qa/common/gen_qa_noshape_models.py
@@ -0,0 +1,437 @@
+#!/usr/bin/env python3
+
+# Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import os
+from builtins import range
+
+import numpy as np
+from gen_common import np_to_model_dtype, np_to_tf_dtype
+
+FLAGS = None
+np_dtype_string = np.dtype(object)
+
+
+def create_savedmodel_modelfile(
+    models_dir,
+    max_batch,
+    model_version,
+    input_shape,
+    output0_shape,
+    output1_shape,
+    input_dtype,
+    output0_dtype,
+    output1_dtype,
+    swap=False,
+):
+    if not tu.validate_for_tf_model(
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+        input_shape,
+        output0_shape,
+        output1_shape,
+    ):
+        return
+
+    tf_input_dtype = np_to_tf_dtype(input_dtype)
+    tf_output0_dtype = np_to_tf_dtype(output0_dtype)
+    tf_output1_dtype = np_to_tf_dtype(output1_dtype)
+
+    # Create the model. If non-batching then don't include the batch
+    # dimension.
+    tf.compat.v1.reset_default_graph()
+    if max_batch == 0:
+        in0 = tf.compat.v1.placeholder(
+            tf_input_dtype, tu.shape_to_tf_shape([]), "TENSOR_INPUT0"
+        )
+        in1 = tf.compat.v1.placeholder(
+            tf_input_dtype, tu.shape_to_tf_shape(input_shape), "TENSOR_INPUT1"
+        )
+    else:
+        in0 = tf.compat.v1.placeholder(
+            tf_input_dtype, tu.shape_to_tf_shape([]), "TENSOR_INPUT0"
+        )
+        in1 = tf.compat.v1.placeholder(
+            tf_input_dtype,
+            [
+                None,
+            ]
+            + tu.shape_to_tf_shape(input_shape),
+            "TENSOR_INPUT1",
+        )
+
+    # If the input is a string, then convert each string to the
+    # equivalent float value.
+    if tf_input_dtype == tf.string:
+        in0 = tf.strings.to_number(in0, tf.int32)
+        in1 = tf.strings.to_number(in1, tf.int32)
+
+    add = tf.add(in0, in1, "ADD")
+    sub = tf.subtract(in0, in1, "SUB")
+
+    # Cast or convert result to the output dtype.
+    if tf_output0_dtype == tf.string:
+        cast0 = tf.strings.as_string(add if not swap else sub, name="TOSTR0")
+    else:
+        cast0 = tf.cast(add if not swap else sub, tf_output0_dtype, "CAST0")
+
+    if tf_output1_dtype == tf.string:
+        cast1 = tf.strings.as_string(sub if not swap else add, name="TOSTR1")
+    else:
+        cast1 = tf.cast(sub if not swap else add, tf_output1_dtype, "CAST1")
+
+    tf.identity(cast0, "TENSOR_OUTPUT0")
+    tf.identity(cast1, "TENSOR_OUTPUT1")
+
+    # Use a different model name for the non-batching variant
+    model_name = tu.get_model_name(
+        "savedmodel_nobatch" if max_batch == 0 else "savedmodel",
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+    )
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with tf.compat.v1.Session() as sess:
+        input0_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name(
+            "TENSOR_INPUT0:0"
+        )
+        input1_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name(
+            "TENSOR_INPUT1:0"
+        )
+        output0_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name(
+            "TENSOR_OUTPUT0:0"
+        )
+        output1_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name(
+            "TENSOR_OUTPUT1:0"
+        )
+        tf.compat.v1.saved_model.simple_save(
+            sess,
+            model_version_dir + "/model.savedmodel",
+            inputs={"INPUT0": input0_tensor, "INPUT1": input1_tensor},
+            outputs={"OUTPUT0": output0_tensor, "OUTPUT1": output1_tensor},
+        )
+
+
+def create_savedmodel_modelconfig(
+    models_dir,
+    max_batch,
+    model_version,
+    input_shape,
+    output0_shape,
+    output1_shape,
+    input_dtype,
+    output0_dtype,
+    output1_dtype,
+    output0_label_cnt,
+    version_policy,
+):
+    if not tu.validate_for_tf_model(
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+        input_shape,
+        output0_shape,
+        output1_shape,
+    ):
+        return
+
+    # Unpack version policy
+    version_policy_str = "{ latest { num_versions: 1 }}"
+    if version_policy is not None:
+        type, val = version_policy
+        if type == "latest":
+            version_policy_str = "{{ latest {{ num_versions: {} }}}}".format(val)
+        elif type == "specific":
+            version_policy_str = "{{ specific {{ versions: {} }}}}".format(val)
+        else:
+            version_policy_str = "{ all { }}"
+
+    # Use a different model name for the non-batching variant
+    model_name = tu.get_model_name(
+        "savedmodel_nobatch" if max_batch == 0 else "savedmodel",
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+    )
+    config_dir = models_dir + "/" + model_name
+    config = """
+name: "{}"
+platform: "tensorflow_savedmodel"
+max_batch_size: {}
+version_policy: {}
+input [
+  {{
+    name: "INPUT0"
+    data_type: {}
+    dims: [ {} ]
+  }},
+  {{
+    name: "INPUT1"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+output [
+  {{
+    name: "OUTPUT0"
+    data_type: {}
+    dims: [ {} ]
+    label_filename: "output0_labels.txt"
+  }},
+  {{
+    name: "OUTPUT1"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+""".format(
+        model_name,
+        max_batch,
+        version_policy_str,
+        np_to_model_dtype(input_dtype),
+        tu.shape_to_dims_str(input_shape),
+        np_to_model_dtype(input_dtype),
+        tu.shape_to_dims_str(input_shape),
+        np_to_model_dtype(output0_dtype),
+        tu.shape_to_dims_str(output0_shape),
+        np_to_model_dtype(output1_dtype),
+        tu.shape_to_dims_str(output1_shape),
+    )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+    with open(config_dir + "/output0_labels.txt", "w") as lfile:
+        for l in range(output0_label_cnt):
+            lfile.write("label" + str(l) + "\n")
+
+
+def create_models(
+    models_dir,
+    input_dtype,
+    output0_dtype,
+    output1_dtype,
+    input_shape,
+    output0_shape,
+    output1_shape,
+    output0_label_cnt,
+    version_policy=None,
+):
+    model_version = 1
+
+    # Create two models, one that supports batching with a max-batch
+    # of 8, and one that does not with a max-batch of 0
+
+    if FLAGS.savedmodel:
+        # max-batch 8
+        create_savedmodel_modelconfig(
+            models_dir,
+            8,
+            model_version,
+            input_shape,
+            output0_shape,
+            output1_shape,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            output0_label_cnt,
+            version_policy,
+        )
+        create_savedmodel_modelfile(
+            models_dir,
+            8,
+            model_version,
+            input_shape,
+            output0_shape,
+            output1_shape,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+        )
+        # max-batch 0
+        create_savedmodel_modelconfig(
+            models_dir,
+            0,
+            model_version,
+            input_shape,
+            output0_shape,
+            output1_shape,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            output0_label_cnt,
+            version_policy,
+        )
+        create_savedmodel_modelfile(
+            models_dir,
+            0,
+            model_version,
+            input_shape,
+            output0_shape,
+            output1_shape,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+        )
+
+
+def create_fixed_models(
+    models_dir, input_dtype, output0_dtype, output1_dtype, version_policy=None
+):
+    input_size = 16
+
+    create_models(
+        models_dir,
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+        (input_size,),
+        (input_size,),
+        (input_size,),
+        input_size,
+        version_policy,
+    )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--models_dir", type=str, required=True, help="Top-level model directory"
+    )
+    parser.add_argument(
+        "--graphdef",
+        required=False,
+        action="store_true",
+        help="Generate GraphDef models",
+    )
+    parser.add_argument(
+        "--savedmodel",
+        required=False,
+        action="store_true",
+        help="Generate SavedModel models",
+    )
+    parser.add_argument(
+        "--tensorrt",
+        required=False,
+        action="store_true",
+        help="Generate TensorRT PLAN models",
+    )
+    parser.add_argument(
+        "--onnx",
+        required=False,
+        action="store_true",
+        help="Generate Onnx Runtime Onnx models",
+    )
+    parser.add_argument(
+        "--libtorch",
+        required=False,
+        action="store_true",
+        help="Generate Pytorch LibTorch models",
+    )
+    parser.add_argument(
+        "--variable",
+        required=False,
+        action="store_true",
+        help="Used variable-shape tensors for input/output",
+    )
+    parser.add_argument(
+        "--ensemble",
+        required=False,
+        action="store_true",
+        help="Generate ensemble models against the models"
+        + " in all platforms. Note that the models generated"
+        + " are not completed.",
+    )
+    FLAGS, unparsed = parser.parse_known_args()
+
+    if FLAGS.savedmodel:
+        import tensorflow as tf
+
+        tf.compat.v1.disable_eager_execution()
+
+    import test_util as tu
+
+    # Tests with models that accept fixed-shape input/output tensors
+    if not FLAGS.variable:
+        create_fixed_models(FLAGS.models_dir, np.int8, np.int8, np.int8, ("latest", 1))
+        create_fixed_models(
+            FLAGS.models_dir, np.int16, np.int16, np.int16, ("latest", 2)
+        )
+        create_fixed_models(
+            FLAGS.models_dir, np.int32, np.int32, np.int32, ("all", None)
+        )
+        create_fixed_models(FLAGS.models_dir, np.int64, np.int64, np.int64)
+        create_fixed_models(
+            FLAGS.models_dir,
+            np.float16,
+            np.float16,
+            np.float16,
+            (
+                "specific",
+                [
+                    1,
+                ],
+            ),
+        )
+        create_fixed_models(
+            FLAGS.models_dir, np.float32, np.float32, np.float32, ("specific", [1, 3])
+        )
+        create_fixed_models(FLAGS.models_dir, np.float16, np.float32, np.float32)
+        create_fixed_models(FLAGS.models_dir, np.int32, np.int8, np.int8)
+        create_fixed_models(FLAGS.models_dir, np.int8, np.int32, np.int32)
+        create_fixed_models(FLAGS.models_dir, np.int32, np.int8, np.int16)
+        create_fixed_models(FLAGS.models_dir, np.int32, np.float32, np.float32)
+        create_fixed_models(FLAGS.models_dir, np.float32, np.int32, np.int32)
+        create_fixed_models(FLAGS.models_dir, np.int32, np.float16, np.int16)
+
+        if FLAGS.savedmodel:
+            for vt in [np.float16, np.float32, np.int8, np.int16, np.int32]:
+                create_savedmodel_modelfile(
+                    FLAGS.models_dir, 8, 2, (16,), (16,), (16,), vt, vt, vt, swap=True
+                )
+                create_savedmodel_modelfile(
+                    FLAGS.models_dir, 8, 3, (16,), (16,), (16,), vt, vt, vt, swap=True
+                )
+                create_savedmodel_modelfile(
+                    FLAGS.models_dir, 0, 2, (16,), (16,), (16,), vt, vt, vt, swap=True
+                )
+                create_savedmodel_modelfile(
+                    FLAGS.models_dir, 0, 3, (16,), (16,), (16,), vt, vt, vt, swap=True
+                )
diff --git a/qa/common/gen_qa_ort_scalar_models.py b/qa/common/gen_qa_ort_scalar_models.py
new file mode 100755
index 0000000000..f2ddb35912
--- /dev/null
+++ b/qa/common/gen_qa_ort_scalar_models.py
@@ -0,0 +1,130 @@
+#!/usr/bin/env python3
+
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+import argparse
+import os
+
+import numpy as np
+import onnx
+import test_util as tu
+from gen_common import np_to_model_dtype, np_to_onnx_dtype
+
+
+def create_onnx_modelfile(models_dir, shape, dtype, model_version=1):
+    onnx_io_dtype = np_to_onnx_dtype(dtype)
+
+    # Create the model
+    model_name = f"onnx_scalar_{len(shape)}dim"
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    input = onnx.helper.make_tensor_value_info("INPUT", onnx_io_dtype, None)
+
+    output = onnx.helper.make_tensor_value_info("OUTPUT", onnx_io_dtype, None)
+
+    identity = onnx.helper.make_node("Identity", ["INPUT"], ["OUTPUT"])
+
+    onnx_nodes = [identity]
+    onnx_inputs = [input]
+    onnx_outputs = [output]
+
+    graph_proto = onnx.helper.make_graph(
+        onnx_nodes, model_name, onnx_inputs, onnx_outputs
+    )
+    if FLAGS.onnx_opset > 0:
+        model_opset = onnx.helper.make_operatorsetid("", FLAGS.onnx_opset)
+        model_def = onnx.helper.make_model(
+            graph_proto, producer_name="triton", opset_imports=[model_opset]
+        )
+    else:
+        model_def = onnx.helper.make_model(graph_proto, producer_name="triton")
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    onnx.save(model_def, model_version_dir + "/model.onnx")
+
+
+def create_onnx_modelconfig(models_dir, dtype, shape):
+    # Create the model
+    model_name = f"onnx_scalar_{len(shape)}dim"
+    config_dir = models_dir + "/" + model_name
+
+    config = """
+input [
+  {{
+    name: "INPUT"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+output [
+  {{
+    name: "OUTPUT"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+""".format(
+        np_to_model_dtype(dtype),
+        tu.shape_to_dims_str(shape),
+        np_to_model_dtype(dtype),
+        tu.shape_to_dims_str(shape),
+    )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--models_dir", type=str, required=True, help="Top-level model directory"
+    )
+    parser.add_argument(
+        "--onnx_opset",
+        type=int,
+        required=False,
+        default=0,
+        help="Opset used for Onnx models. Default is to use ONNXRT default",
+    )
+
+    FLAGS = parser.parse_args()
+    create_onnx_modelfile(FLAGS.models_dir, shape=[1], dtype=np.float32)
+    create_onnx_modelconfig(FLAGS.models_dir, shape=[1], dtype=np.float32)
+    create_onnx_modelfile(FLAGS.models_dir, shape=[1, 1], dtype=np.float32)
+    create_onnx_modelconfig(FLAGS.models_dir, shape=[1, 1], dtype=np.float32)
diff --git a/qa/common/gen_qa_pytorch_model.py b/qa/common/gen_qa_pytorch_model.py
new file mode 100644
index 0000000000..2daee9cffc
--- /dev/null
+++ b/qa/common/gen_qa_pytorch_model.py
@@ -0,0 +1,124 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+import argparse
+import os
+
+import torch
+from torch import nn
+
+
+class AddSubNet(nn.Module):
+    def __init__(self):
+        super(AddSubNet, self).__init__()
+
+    def forward(self, input0, input1):
+        return (input0 + input1), (input0 - input1)
+
+
+def generate_model(model_dir):
+    model = AddSubNet()
+
+    traced_model = torch.jit.trace(
+        model,
+        (torch.rand(1, 4, dtype=torch.float), torch.rand(1, 4, dtype=torch.float)),
+    )
+
+    os.makedirs(model_dir, exist_ok=True)
+    model_path = os.path.join(model_dir, "model.pt")
+
+    traced_model.save(model_path)
+
+
+def generate_config(config_path):
+    with open(f"{config_path}/config.pbtxt", "w") as f:
+        f.write(
+            """
+backend: "pytorch"
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+input [
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+"""
+        )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-m",
+        "--model-directory",
+        type=str,
+        required=True,
+        help="The path to the model repository.",
+    )
+    parser.add_argument(
+        "--model-name",
+        type=str,
+        required=False,
+        default="add_sub_pytorch",
+        help="Model name",
+    )
+    parser.add_argument(
+        "--version",
+        type=str,
+        required=False,
+        default="1",
+        help="Model version",
+    )
+
+    args = parser.parse_args()
+
+    model_directory = os.path.join(args.model_directory, args.model_name)
+    os.makedirs(model_directory, exist_ok=True)
+
+    generate_model(model_dir=os.path.join(model_directory, args.version))
+    generate_config(model_directory)
diff --git a/qa/common/gen_qa_ragged_models.py b/qa/common/gen_qa_ragged_models.py
new file mode 100755
index 0000000000..18d465dc94
--- /dev/null
+++ b/qa/common/gen_qa_ragged_models.py
@@ -0,0 +1,861 @@
+#!/usr/bin/env python3
+
+# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import os
+
+import numpy as np
+from gen_common import (
+    np_to_model_dtype,
+    np_to_onnx_dtype,
+    np_to_tf_dtype,
+    np_to_trt_dtype,
+)
+
+np_dtype_string = np.dtype(object)
+
+
+def create_savedmodel_modelfile(models_dir, model_version, dtype):
+    # Create special identity model for batch input testing.
+    # Because the ragged input and batch input are one dimensional vector
+    # when passing to the model, the model must generate output with batch
+    # dimension so that Triton can scatter it to different responses along
+    # the batch dimension.
+    # 'BATCH_AND_SIZE_INPUT' is also used as a hint to generate output with
+    # batch dimension, 'BATCH_AND_SIZE_INPUT' must have shape [batch_size].
+    # Each output corresponds to the input with the same name, so if there
+    # are two requests, one has "RAGGED_INPUT" [2, 4] and the other has [1],
+    # since the input is ragged, the model sees the input as [2, 4, 1], and
+    # "BATCH_AND_SIZE_INPUT" will have shape [2]. Then the model output will
+    # be [[2, 4, 1], [2, 4, 1]] and Triton will send responses that each has
+    # value [[2, 4, 1]].
+    # For "BATCH_INPUT", the input tensor must only have one variable dimension
+    # to be broadcasted along the batch dimension properly, thus the currently
+    # allowed batch input types are:
+    # - BATCH_ACCUMULATED_ELEMENT_COUNT
+    # - BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO
+    # - BATCH_MAX_ELEMENT_COUNT_AS_SHAPE
+    # - BATCH_ITEM_SHAPE_FLATTEN
+
+    tf_dtype = np_to_tf_dtype(dtype)
+
+    tf.compat.v1.reset_default_graph()
+    in_node = tf.compat.v1.placeholder(
+        tf_dtype, tu.shape_to_tf_shape([-1]), "TENSOR_RAGGED_INPUT"
+    )
+    bs_node = tf.compat.v1.placeholder(
+        tf_dtype, tu.shape_to_tf_shape([-1]), "TENSOR_BATCH_AND_SIZE_INPUT"
+    )
+    batch_node = tf.compat.v1.placeholder(
+        tf_dtype, tu.shape_to_tf_shape([-1]), "TENSOR_BATCH_INPUT"
+    )
+
+    in_mat = tf.reshape(in_node, [1, -1])
+    bs_mat = tf.reshape(bs_node, [1, -1])
+    batch_mat = tf.reshape(batch_node, [1, -1])
+
+    output_expander = tf.reshape(tf.divide(bs_node, bs_node), [-1, 1])
+
+    out_node = tf.matmul(output_expander, in_mat, name="TENSOR_RAGGED_OUTPUT")
+    bs_out_node = tf.matmul(
+        output_expander, bs_mat, name="TENSOR_BATCH_AND_SIZE_OUTPUT"
+    )
+    batch_out_node = tf.matmul(output_expander, batch_mat, name="TENSOR_BATCH_OUTPUT")
+
+    model_name = "savedmodel_batch_input"
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with tf.compat.v1.Session() as sess:
+        in_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name(
+            "TENSOR_RAGGED_INPUT:0"
+        )
+        bs_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name(
+            "TENSOR_BATCH_AND_SIZE_INPUT:0"
+        )
+        batch_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name(
+            "TENSOR_BATCH_INPUT:0"
+        )
+        out_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name(
+            "TENSOR_RAGGED_OUTPUT:0"
+        )
+        bs_out_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name(
+            "TENSOR_BATCH_AND_SIZE_OUTPUT:0"
+        )
+        batch_out_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name(
+            "TENSOR_BATCH_OUTPUT:0"
+        )
+        tf.compat.v1.saved_model.simple_save(
+            sess,
+            model_version_dir + "/model.savedmodel",
+            inputs={
+                "RAGGED_INPUT": in_tensor,
+                "BATCH_AND_SIZE_INPUT": bs_tensor,
+                "BATCH_INPUT": batch_tensor,
+            },
+            outputs={
+                "RAGGED_OUTPUT": out_tensor,
+                "BATCH_AND_SIZE_OUTPUT": bs_out_tensor,
+                "BATCH_OUTPUT": batch_out_tensor,
+            },
+        )
+
+
+def create_plan_modelfile(models_dir, model_version, dtype):
+    # Create special identity model for batch input testing.
+    # Because the ragged input and batch input are one dimensional vector
+    # when passing to the model, the model must generate output with batch
+    # dimension so that Triton can scatter it to different responses along
+    # the batch dimension.
+    # 'BATCH_AND_SIZE_INPUT' is also used as a hint to generate output with
+    # batch dimension, 'BATCH_AND_SIZE_INPUT' must have shape [batch_size].
+    # Each output corresponds to the input with the same name, so if there
+    # are two requests, one has "RAGGED_INPUT" [2, 4] and the other has [1],
+    # since the input is ragged, the model sees the input as [2, 4, 1], and
+    # "BATCH_AND_SIZE_INPUT" will have shape [2]. Then the model output will
+    # be [[2, 4, 1], [2, 4, 1]] and Triton will send responses that each has
+    # value [[2, 4, 1]].
+    # For "BATCH_INPUT", the input tensor must only have one variable dimension
+    # to be broadcasted along the batch dimension properly, thus the currently
+    # allowed batch input types are:
+    # - BATCH_ACCUMULATED_ELEMENT_COUNT
+    # - BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO
+    # - BATCH_MAX_ELEMENT_COUNT_AS_SHAPE
+    # - BATCH_ITEM_SHAPE_FLATTEN
+
+    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    builder = trt.Builder(TRT_LOGGER)
+    network = builder.create_network(
+        1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    )
+    trt_dtype = np_to_trt_dtype(dtype)
+
+    in_node = network.add_input("RAGGED_INPUT", trt_dtype, [-1])
+    bs_node = network.add_input("BATCH_AND_SIZE_INPUT", trt_dtype, [-1])
+    batch_node = network.add_input("BATCH_INPUT", trt_dtype, [-1])
+
+    reshape_dims = trt.Dims([-1, 1])
+    in_mat = network.add_shuffle(in_node)
+    in_mat.reshape_dims = reshape_dims
+    bs_mat = network.add_shuffle(bs_node)
+    bs_mat.reshape_dims = reshape_dims
+    batch_mat = network.add_shuffle(batch_node)
+    batch_mat.reshape_dims = reshape_dims
+
+    batch_entry = network.add_elementwise(
+        bs_mat.get_output(0), bs_mat.get_output(0), trt.ElementWiseOperation.DIV
+    )
+    out_node = network.add_matrix_multiply(
+        batch_entry.get_output(0),
+        trt.MatrixOperation.NONE,
+        in_mat.get_output(0),
+        trt.MatrixOperation.TRANSPOSE,
+    )
+    bs_out_node = network.add_matrix_multiply(
+        batch_entry.get_output(0),
+        trt.MatrixOperation.NONE,
+        bs_mat.get_output(0),
+        trt.MatrixOperation.TRANSPOSE,
+    )
+    batch_out_node = network.add_matrix_multiply(
+        batch_entry.get_output(0),
+        trt.MatrixOperation.NONE,
+        batch_mat.get_output(0),
+        trt.MatrixOperation.TRANSPOSE,
+    )
+    out_node.get_output(0).name = "RAGGED_OUTPUT"
+    bs_out_node.get_output(0).name = "BATCH_AND_SIZE_OUTPUT"
+    batch_out_node.get_output(0).name = "BATCH_OUTPUT"
+    network.mark_output(out_node.get_output(0))
+    network.mark_output(bs_out_node.get_output(0))
+    network.mark_output(batch_out_node.get_output(0))
+
+    # Hard coded optimization profile
+    min_shape = [1]
+    opt_shape = [8]
+    max_shape = [32]
+
+    profile = builder.create_optimization_profile()
+    for input_name in ["RAGGED_INPUT", "BATCH_AND_SIZE_INPUT", "BATCH_INPUT"]:
+        profile.set_shape("{}".format(input_name), min_shape, opt_shape, max_shape)
+    config = builder.create_builder_config()
+    config.add_optimization_profile(profile)
+    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
+    try:
+        engine_bytes = builder.build_serialized_network(network, config)
+    except AttributeError:
+        engine = builder.build_engine(network, config)
+        engine_bytes = engine.serialize()
+        del engine
+
+    model_name = "plan_batch_input"
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(model_version_dir + "/model.plan", "wb") as f:
+        f.write(engine_bytes)
+
+
+def create_onnx_modelfile(models_dir, model_version, dtype):
+    # Create special identity model for batch input testing.
+    # Because the ragged input and batch input are one dimensional vector
+    # when passing to the model, the model must generate output with batch
+    # dimension so that Triton can scatter it to different responses along
+    # the batch dimension.
+    # 'BATCH_AND_SIZE_INPUT' is also used as a hint to generate output with
+    # batch dimension, 'BATCH_AND_SIZE_INPUT' must have shape [batch_size].
+    # Each output corresponds to the input with the same name, so if there
+    # are two requests, one has "RAGGED_INPUT" [2, 4] and the other has [1],
+    # since the input is ragged, the model sees the input as [2, 4, 1], and
+    # "BATCH_AND_SIZE_INPUT" will have shape [2]. Then the model output will
+    # be [[2, 4, 1], [2, 4, 1]] and Triton will send responses that each has
+    # value [[2, 4, 1]].
+    # For "BATCH_INPUT", the input tensor must only have one variable dimension
+    # to be broadcasted along the batch dimension properly, thus the currently
+    # allowed batch input types are:
+    # - BATCH_ACCUMULATED_ELEMENT_COUNT
+    # - BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO
+    # - BATCH_MAX_ELEMENT_COUNT_AS_SHAPE
+    # - BATCH_ITEM_SHAPE_FLATTEN
+
+    onnx_dtype = np_to_onnx_dtype(dtype)
+
+    # Create the model
+    model_name = "onnx_batch_input"
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    in0_shape, idx = tu.shape_to_onnx_shape([-1], 0)
+    bs_shape, idx = tu.shape_to_onnx_shape([-1], 0)
+    batch_shape, idx = tu.shape_to_onnx_shape([-1], 0)
+
+    in0 = onnx.helper.make_tensor_value_info("RAGGED_INPUT", onnx_dtype, in0_shape)
+    bs_in = onnx.helper.make_tensor_value_info(
+        "BATCH_AND_SIZE_INPUT", onnx_dtype, bs_shape
+    )
+    batch_in = onnx.helper.make_tensor_value_info(
+        "BATCH_INPUT", onnx_dtype, batch_shape
+    )
+
+    out_shape, idx = tu.shape_to_onnx_shape([-1, -1], idx)
+    bs_out_shape, idx = tu.shape_to_onnx_shape([-1, -1], idx)
+    batch_out_shape, idx = tu.shape_to_onnx_shape([-1, -1], idx)
+
+    out = onnx.helper.make_tensor_value_info("RAGGED_OUTPUT", onnx_dtype, out_shape)
+    bs_out = onnx.helper.make_tensor_value_info(
+        "BATCH_AND_SIZE_OUTPUT", onnx_dtype, bs_out_shape
+    )
+    batch_out = onnx.helper.make_tensor_value_info(
+        "BATCH_OUTPUT", onnx_dtype, batch_out_shape
+    )
+
+    const_node_shape = onnx.helper.make_node(
+        "Constant",
+        [],
+        ["shape"],
+        value=onnx.helper.make_tensor(
+            "const_shape", onnx.TensorProto.INT64, [2], [1, -1]
+        ),
+    )
+
+    const_node_expander_shape = onnx.helper.make_node(
+        "Constant",
+        [],
+        ["expander_shape"],
+        value=onnx.helper.make_tensor(
+            "const_expander_shape", onnx.TensorProto.INT64, [2], [-1, 1]
+        ),
+    )
+
+    in0_mat_node = onnx.helper.make_node(
+        "Reshape", ["RAGGED_INPUT", "shape"], ["in_mat"]
+    )
+    bs_mat_node = onnx.helper.make_node(
+        "Reshape", ["BATCH_AND_SIZE_INPUT", "shape"], ["bs_mat"]
+    )
+    batch_mat_node = onnx.helper.make_node(
+        "Reshape", ["BATCH_INPUT", "shape"], ["batch_mat"]
+    )
+
+    internal_node_div = onnx.helper.make_node(
+        "Div", ["BATCH_AND_SIZE_INPUT", "BATCH_AND_SIZE_INPUT"], ["output_expander_int"]
+    )
+    internal_node_reshape = onnx.helper.make_node(
+        "Reshape", ["output_expander_int", "expander_shape"], ["output_expander"]
+    )
+
+    out_node = onnx.helper.make_node(
+        "MatMul", ["output_expander", "in_mat"], ["RAGGED_OUTPUT"]
+    )
+    bs_out_node = onnx.helper.make_node(
+        "MatMul", ["output_expander", "bs_mat"], ["BATCH_AND_SIZE_OUTPUT"]
+    )
+    batch_out_node = onnx.helper.make_node(
+        "MatMul", ["output_expander", "batch_mat"], ["BATCH_OUTPUT"]
+    )
+
+    onnx_nodes = [
+        const_node_shape,
+        const_node_expander_shape,
+        in0_mat_node,
+        bs_mat_node,
+        batch_mat_node,
+        internal_node_div,
+        internal_node_reshape,
+        out_node,
+        bs_out_node,
+        batch_out_node,
+    ]
+    onnx_inputs = [in0, bs_in, batch_in]
+    onnx_outputs = [out, bs_out, batch_out]
+
+    graph_proto = onnx.helper.make_graph(
+        onnx_nodes, model_name, onnx_inputs, onnx_outputs
+    )
+    if FLAGS.onnx_opset > 0:
+        model_opset = onnx.helper.make_operatorsetid("", FLAGS.onnx_opset)
+        model_def = onnx.helper.make_model(
+            graph_proto, producer_name="triton", opset_imports=[model_opset]
+        )
+    else:
+        model_def = onnx.helper.make_model(graph_proto, producer_name="triton")
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    onnx.save(model_def, model_version_dir + "/model.onnx")
+
+
+def create_libtorch_modelfile(models_dir, model_version, dtype):
+    # Create special identity model for batch input testing.
+    # Because the ragged input and batch input are one dimensional vector
+    # when passing to the model, the model must generate output with batch
+    # dimension so that Triton can scatter it to different responses along
+    # the batch dimension.
+    # 'BATCH_AND_SIZE_INPUT' is also used as a hint to generate output with
+    # batch dimension, 'BATCH_AND_SIZE_INPUT' must have shape [batch_size].
+    # Each output corresponds to the input with the same name, so if there
+    # are two requests, one has "RAGGED_INPUT" [2, 4] and the other has [1],
+    # since the input is ragged, the model sees the input as [2, 4, 1], and
+    # "BATCH_AND_SIZE_INPUT" will have shape [2]. Then the model output will
+    # be [[2, 4, 1], [2, 4, 1]] and Triton will send responses that each has
+    # value [[2, 4, 1]].
+    # For "BATCH_INPUT", the input tensor must only have one variable dimension
+    # to be broadcasted along the batch dimension properly, thus the currently
+    # allowed batch input types are:
+    # - BATCH_ACCUMULATED_ELEMENT_COUNT
+    # - BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO
+    # - BATCH_MAX_ELEMENT_COUNT_AS_SHAPE
+    # - BATCH_ITEM_SHAPE_FLATTEN
+
+    # Create the model
+    model_name = "libtorch_batch_input"
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    if dtype == np_dtype_string:
+        raise Exception(
+            "PyTorch ragged model generation for string models not yet implemented"
+        )
+
+    else:
+
+        class IdentityNet(nn.Module):
+            def __init__(self):
+                super(IdentityNet, self).__init__()
+
+            def forward(self, BATCH_INPUT, BATCH_AND_SIZE_INPUT, RAGGED_INPUT):
+                batch_entry = BATCH_AND_SIZE_INPUT / BATCH_AND_SIZE_INPUT
+                batch_entry = batch_entry.view(-1, 1)
+
+                BATCH_INPUT = BATCH_INPUT.view(1, -1)
+                BATCH_OUTPUT = torch.matmul(batch_entry, BATCH_INPUT)
+
+                BATCH_AND_SIZE_INPUT = BATCH_AND_SIZE_INPUT.view(1, -1)
+                BATCH_AND_SIZE_OUTPUT = torch.matmul(batch_entry, BATCH_AND_SIZE_INPUT)
+
+                RAGGED_INPUT = RAGGED_INPUT.view(1, -1)
+                RAGGED_OUTPUT = torch.matmul(batch_entry, RAGGED_INPUT)
+
+                return RAGGED_OUTPUT, BATCH_AND_SIZE_OUTPUT, BATCH_OUTPUT
+
+    identityModel = IdentityNet()
+    traced = torch.jit.script(identityModel)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    traced.save(model_version_dir + "/model.pt")
+
+
+def create_modelconfig(models_dir, max_batch, model_version, dtype, backend, platform):
+    version_policy_str = "{ latest { num_versions: 1 }}"
+
+    backend_spec = """
+backend: "{}"
+""".format(
+        backend
+    )
+    if backend == "tensorflow":
+        backend_spec += """
+platform: "{}_{}"
+""".format(
+            backend, platform
+        )
+
+    model_name = "{}_batch_input".format(platform)
+    config_dir = models_dir + "/" + model_name
+    config = """
+name: "{}"
+{}
+max_batch_size: {}
+version_policy: {}
+input [
+  {{
+    name: "RAGGED_INPUT"
+    data_type: {data_type}
+    dims: [ -1 ]
+    allow_ragged_batch: true
+  }}
+]
+output [
+  {{
+    name: "RAGGED_OUTPUT"
+    data_type: {data_type}
+    dims: [ -1 ]
+   }}
+]
+output [
+  {{
+    name: "BATCH_AND_SIZE_OUTPUT"
+    data_type: {data_type}
+    dims: [ -1 ]
+   }}
+]
+output [
+  {{
+    name: "BATCH_OUTPUT"
+    data_type: {data_type}
+    dims: [ -1 ]
+   }}
+]
+batch_input [
+  {{
+    kind: BATCH_ELEMENT_COUNT
+    target_name: "BATCH_AND_SIZE_INPUT"
+    data_type: {data_type}
+    source_input: "RAGGED_INPUT"
+  }},
+  {{
+    kind: BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO
+    target_name: "BATCH_INPUT"
+    data_type: {data_type}
+    source_input: "RAGGED_INPUT"
+  }}
+]
+dynamic_batching {{
+  max_queue_delay_microseconds: 1000000
+}}
+""".format(
+        model_name,
+        backend_spec,
+        max_batch,
+        version_policy_str,
+        data_type=np_to_model_dtype(dtype),
+    )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+def create_savedmodel_itemshape_modelfile(models_dir, model_version, dtype):
+    # Create special identity model for batch input 'BATCH_ITEM_SHAPE' testing,
+    # such model has one ragged input and one batch input, and one output to
+    # return the batch input directly. Because 'BATCH_ITEM_SHAPE' should be
+    # generated to have matching batch dimension, the output can be produced
+    # via identity op and expect Triton will scatter the output properly.
+
+    tf_dtype = np_to_tf_dtype(dtype)
+
+    tf.compat.v1.reset_default_graph()
+    tf.compat.v1.placeholder(
+        tf_dtype, tu.shape_to_tf_shape([-1]), "TENSOR_RAGGED_INPUT"
+    )
+    # Shape is predefined
+    batch_node = tf.compat.v1.placeholder(
+        tf_dtype, tu.shape_to_tf_shape([-1, 2]), "TENSOR_BATCH_INPUT"
+    )
+    tf.identity(batch_node, name="TENSOR_BATCH_OUTPUT")
+
+    model_name = "savedmodel_batch_item"
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with tf.compat.v1.Session() as sess:
+        in_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name(
+            "TENSOR_RAGGED_INPUT:0"
+        )
+        batch_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name(
+            "TENSOR_BATCH_INPUT:0"
+        )
+        batch_out_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name(
+            "TENSOR_BATCH_OUTPUT:0"
+        )
+        tf.compat.v1.saved_model.simple_save(
+            sess,
+            model_version_dir + "/model.savedmodel",
+            inputs={
+                "RAGGED_INPUT": in_tensor,
+                "BATCH_INPUT": batch_tensor,
+            },
+            outputs={
+                "BATCH_OUTPUT": batch_out_tensor,
+            },
+        )
+
+
+def create_plan_itemshape_modelfile(models_dir, model_version, dtype):
+    # Create special identity model for batch input 'BATCH_ITEM_SHAPE' testing,
+    # such model has one ragged input and one batch input, and one output to
+    # return the batch input directly. Because 'BATCH_ITEM_SHAPE' should be
+    # generated to have matching batch dimension, the output can be produced
+    # via identity op and expect Triton will scatter the output properly.
+
+    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    builder = trt.Builder(TRT_LOGGER)
+    network = builder.create_network(
+        1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    )
+    trt_dtype = np_to_trt_dtype(dtype)
+
+    in_node = network.add_input("RAGGED_INPUT", trt_dtype, [-1])
+    batch_node = network.add_input("BATCH_INPUT", trt_dtype, [-1, 2])
+
+    batch_out_node = network.add_identity(batch_node)
+    batch_out_node.get_output(0).name = "BATCH_OUTPUT"
+    network.mark_output(batch_out_node.get_output(0))
+
+    # Hard coded optimization profile
+    min_shape = [1]
+    opt_shape = [8]
+    max_shape = [32]
+
+    profile = builder.create_optimization_profile()
+    profile.set_shape("RAGGED_INPUT", min_shape, opt_shape, max_shape)
+    profile.set_shape("BATCH_INPUT", min_shape + [2], opt_shape + [2], max_shape + [2])
+    config = builder.create_builder_config()
+    config.add_optimization_profile(profile)
+    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
+    try:
+        engine_bytes = builder.build_serialized_network(network, config)
+    except AttributeError:
+        engine = builder.build_engine(network, config)
+        engine_bytes = engine.serialize()
+        del engine
+
+    model_name = "plan_batch_item"
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(model_version_dir + "/model.plan", "wb") as f:
+        f.write(engine_bytes)
+
+
+def create_onnx_itemshape_modelfile(models_dir, model_version, dtype):
+    # Create special identity model for batch input 'BATCH_ITEM_SHAPE' testing,
+    # such model has one ragged input and one batch input, and one output to
+    # return the batch input directly. Because 'BATCH_ITEM_SHAPE' should be
+    # generated to have matching batch dimension, the output can be produced
+    # via identity op and expect Triton will scatter the output properly.
+
+    onnx_dtype = np_to_onnx_dtype(dtype)
+
+    # Create the model
+    model_name = "onnx_batch_item"
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    in0_shape, idx = tu.shape_to_onnx_shape([-1], 0)
+    batch_shape, idx = tu.shape_to_onnx_shape([-1, 2], 0)
+
+    in0 = onnx.helper.make_tensor_value_info("RAGGED_INPUT", onnx_dtype, in0_shape)
+    batch_in = onnx.helper.make_tensor_value_info(
+        "BATCH_INPUT", onnx_dtype, batch_shape
+    )
+
+    batch_out_shape, idx = tu.shape_to_onnx_shape([-1, -1], idx)
+    batch_out = onnx.helper.make_tensor_value_info(
+        "BATCH_OUTPUT", onnx_dtype, batch_out_shape
+    )
+
+    onnx_nodes = [onnx.helper.make_node("Identity", ["BATCH_INPUT"], ["BATCH_OUTPUT"])]
+    onnx_inputs = [in0, batch_in]
+    onnx_outputs = [batch_out]
+
+    graph_proto = onnx.helper.make_graph(
+        onnx_nodes, model_name, onnx_inputs, onnx_outputs
+    )
+    if FLAGS.onnx_opset > 0:
+        model_opset = onnx.helper.make_operatorsetid("", FLAGS.onnx_opset)
+        model_def = onnx.helper.make_model(
+            graph_proto, producer_name="triton", opset_imports=[model_opset]
+        )
+    else:
+        model_def = onnx.helper.make_model(graph_proto, producer_name="triton")
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    onnx.save(model_def, model_version_dir + "/model.onnx")
+
+
+def create_libtorch_itemshape_modelfile(models_dir, model_version, dtype):
+    # Create special identity model for batch input 'BATCH_ITEM_SHAPE' testing,
+    # such model has one ragged input and one batch input, and one output to
+    # return the batch input directly. Because 'BATCH_ITEM_SHAPE' should be
+    # generated to have matching batch dimension, the output can be produced
+    # via identity op and expect Triton will scatter the output properly.
+
+    # Create the model
+    model_name = "libtorch_batch_item"
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    if dtype == np_dtype_string:
+        raise Exception(
+            "PyTorch ragged model generation for string models not yet implemented"
+        )
+
+    else:
+
+        class IdentityNet(nn.Module):
+            def __init__(self):
+                super(IdentityNet, self).__init__()
+
+            def forward(self, RAGGED_INPUT, BATCH_INPUT):
+                return BATCH_INPUT
+
+    identityModel = IdentityNet()
+    traced = torch.jit.script(identityModel)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    traced.save(model_version_dir + "/model.pt")
+
+
+def create_itemshape_modelconfig(
+    models_dir, max_batch, model_version, dtype, backend, platform
+):
+    version_policy_str = "{ latest { num_versions: 1 }}"
+
+    backend_spec = """
+backend: "{}"
+""".format(
+        backend
+    )
+    if backend == "tensorflow":
+        backend_spec += """
+platform: "{}_{}"
+""".format(
+            backend, platform
+        )
+
+    model_name = "{}_batch_item".format(platform)
+    config_dir = models_dir + "/" + model_name
+    config = """
+name: "{}"
+{}
+max_batch_size: {}
+version_policy: {}
+input [
+  {{
+    name: "RAGGED_INPUT"
+    data_type: {data_type}
+    dims: [ -1, -1 ]
+    allow_ragged_batch: true
+  }}
+]
+output [
+  {{
+    name: "BATCH_OUTPUT"
+    data_type: {data_type}
+    dims: [ 2 ]
+   }}
+]
+batch_input [
+  {{
+    kind: BATCH_ITEM_SHAPE
+    target_name: "BATCH_INPUT"
+    data_type: {data_type}
+    source_input: "RAGGED_INPUT"
+  }}
+]
+dynamic_batching {{
+  max_queue_delay_microseconds: 1000000
+}}
+""".format(
+        model_name,
+        backend_spec,
+        max_batch,
+        version_policy_str,
+        data_type=np_to_model_dtype(dtype),
+    )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+def create_batch_input_models(models_dir):
+    model_version = 1
+    if FLAGS.tensorrt:
+        create_modelconfig(models_dir, 4, model_version, np.float32, "tensorrt", "plan")
+        create_plan_modelfile(models_dir, model_version, np.float32)
+        create_itemshape_modelconfig(
+            models_dir, 4, model_version, np.float32, "tensorrt", "plan"
+        )
+        create_plan_itemshape_modelfile(models_dir, model_version, np.float32)
+    if FLAGS.savedmodel:
+        create_modelconfig(
+            models_dir, 4, model_version, np.float32, "tensorflow", "savedmodel"
+        )
+        create_savedmodel_modelfile(models_dir, model_version, np.float32)
+        create_itemshape_modelconfig(
+            models_dir, 4, model_version, np.float32, "tensorflow", "savedmodel"
+        )
+        create_savedmodel_itemshape_modelfile(models_dir, model_version, np.float32)
+    if FLAGS.onnx:
+        create_modelconfig(
+            models_dir, 4, model_version, np.float32, "onnxruntime", "onnx"
+        )
+        create_onnx_modelfile(models_dir, model_version, np.float32)
+        create_itemshape_modelconfig(
+            models_dir, 4, model_version, np.float32, "onnxruntime", "onnx"
+        )
+        create_onnx_itemshape_modelfile(models_dir, model_version, np.float32)
+    if FLAGS.libtorch:
+        create_modelconfig(
+            models_dir, 4, model_version, np.float32, "pytorch", "libtorch"
+        )
+        create_libtorch_modelfile(models_dir, model_version, np.float32)
+        create_itemshape_modelconfig(
+            models_dir, 4, model_version, np.float32, "pytorch", "libtorch"
+        )
+        create_libtorch_itemshape_modelfile(models_dir, model_version, np.float32)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--models_dir", type=str, required=True, help="Top-level model directory"
+    )
+    parser.add_argument(
+        "--tensorrt",
+        required=False,
+        action="store_true",
+        help="Generate TensorRT PLAN models",
+    )
+    parser.add_argument(
+        "--savedmodel",
+        required=False,
+        action="store_true",
+        help="Generate SavedModel models",
+    )
+    parser.add_argument(
+        "--graphdef",
+        required=False,
+        action="store_true",
+        help="Generate GraphDef models",
+    )
+    parser.add_argument(
+        "--onnx",
+        required=False,
+        action="store_true",
+        help="Generate Onnx Runtime Onnx models",
+    )
+    parser.add_argument(
+        "--libtorch",
+        required=False,
+        action="store_true",
+        help="Generate Libtorch models",
+    )
+    parser.add_argument(
+        "--onnx_opset",
+        type=int,
+        required=False,
+        default=0,
+        help="Opset used for Onnx models. Default is to use ONNXRT default",
+    )
+
+    FLAGS, unparsed = parser.parse_known_args()
+
+    import test_util as tu
+
+    if FLAGS.tensorrt:
+        import tensorrt as trt
+    if FLAGS.graphdef or FLAGS.savedmodel:
+        import tensorflow as tf
+
+        tf.compat.v1.disable_eager_execution()
+    if FLAGS.onnx:
+        import onnx
+    if FLAGS.libtorch:
+        import torch
+        from torch import nn
+
+    create_batch_input_models(FLAGS.models_dir)
diff --git a/qa/common/gen_qa_reshape_models.py b/qa/common/gen_qa_reshape_models.py
new file mode 100755
index 0000000000..4ac5347a79
--- /dev/null
+++ b/qa/common/gen_qa_reshape_models.py
@@ -0,0 +1,1537 @@
+#!/usr/bin/env python3
+
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import os
+from builtins import range
+
+import gen_ensemble_model_utils as emu
+import numpy as np
+from gen_common import (
+    np_to_model_dtype,
+    np_to_onnx_dtype,
+    np_to_tf_dtype,
+    np_to_torch_dtype,
+    np_to_trt_dtype,
+)
+
+FLAGS = None
+np_dtype_string = np.dtype(object)
+from typing import List
+
+
+def create_tf_modelfile(
+    create_savedmodel,
+    models_dir,
+    model_version,
+    max_batch,
+    dtype,
+    input_shapes,
+    output_shapes,
+):
+    assert len(input_shapes) == len(output_shapes)
+    if not tu.validate_for_tf_model(
+        dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]
+    ):
+        return
+
+    tf_dtype = np_to_tf_dtype(dtype)
+    io_cnt = len(input_shapes)
+
+    # Create the model that copies inputs to corresponding outputs.
+    tf.compat.v1.reset_default_graph()
+    for io_num in range(io_cnt):
+        input_name = "INPUT{}".format(io_num)
+        output_name = "OUTPUT{}".format(io_num)
+        if max_batch == 0:
+            tin = tf.compat.v1.placeholder(
+                tf_dtype, tu.shape_to_tf_shape(input_shapes[io_num]), input_name
+            )
+        else:
+            tin = tf.compat.v1.placeholder(
+                tf_dtype,
+                [
+                    None,
+                ]
+                + tu.shape_to_tf_shape(input_shapes[io_num]),
+                input_name,
+            )
+
+        if input_shapes == output_shapes:
+            tf.identity(tin, name=output_name)
+        else:
+            if max_batch == 0:
+                tf.reshape(tin, output_shapes[io_num], name=output_name)
+            else:
+                tf.reshape(
+                    tin,
+                    [
+                        -1,
+                    ]
+                    + output_shapes[io_num],
+                    name=output_name,
+                )
+
+    # Use model name based on input/output count and non-batching variant
+    if create_savedmodel:
+        model_name = tu.get_zero_model_name(
+            "savedmodel_nobatch" if max_batch == 0 else "savedmodel", io_cnt, dtype
+        )
+    else:
+        model_name = tu.get_zero_model_name(
+            "graphdef_nobatch" if max_batch == 0 else "graphdef", io_cnt, dtype
+        )
+
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    if create_savedmodel:
+        with tf.compat.v1.Session() as sess:
+            input_dict = {}
+            output_dict = {}
+            for io_num in range(io_cnt):
+                input_name = "INPUT{}".format(io_num)
+                output_name = "OUTPUT{}".format(io_num)
+                input_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name(
+                    input_name + ":0"
+                )
+                output_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name(
+                    output_name + ":0"
+                )
+                input_dict[input_name] = input_tensor
+                output_dict[output_name] = output_tensor
+            tf.compat.v1.saved_model.simple_save(
+                sess,
+                model_version_dir + "/model.savedmodel",
+                inputs=input_dict,
+                outputs=output_dict,
+            )
+    else:
+        with tf.compat.v1.Session() as sess:
+            graph_io.write_graph(
+                sess.graph.as_graph_def(),
+                model_version_dir,
+                "model.graphdef",
+                as_text=False,
+            )
+
+
+def create_tf_modelconfig(
+    create_savedmodel,
+    models_dir,
+    model_version,
+    max_batch,
+    dtype,
+    input_shapes,
+    input_model_shapes,
+    output_shapes,
+    output_model_shapes,
+):
+    assert len(input_shapes) == len(input_model_shapes)
+    assert len(output_shapes) == len(output_model_shapes)
+    assert len(input_shapes) == len(output_shapes)
+    if not tu.validate_for_tf_model(
+        dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]
+    ):
+        return
+
+    io_cnt = len(input_shapes)
+
+    # Use a different model name for the non-batching variant
+    if create_savedmodel:
+        model_name = tu.get_zero_model_name(
+            "savedmodel_nobatch" if max_batch == 0 else "savedmodel", io_cnt, dtype
+        )
+    else:
+        model_name = tu.get_zero_model_name(
+            "graphdef_nobatch" if max_batch == 0 else "graphdef", io_cnt, dtype
+        )
+
+    config_dir = models_dir + "/" + model_name
+    config = """
+name: "{}"
+platform: "{}"
+max_batch_size: {}
+""".format(
+        model_name,
+        "tensorflow_savedmodel" if create_savedmodel else "tensorflow_graphdef",
+        max_batch,
+    )
+
+    for io_num in range(io_cnt):
+        config += """
+input [
+  {{
+    name: "INPUT{}"
+    data_type: {}
+    dims: [ {} ]
+    {}
+  }}
+]
+output [
+  {{
+    name: "OUTPUT{}"
+    data_type: {}
+    dims: [ {} ]
+    {}
+  }}
+]
+""".format(
+            io_num,
+            np_to_model_dtype(dtype),
+            tu.shape_to_dims_str(input_shapes[io_num]),
+            "reshape: {{ shape: [ {} ] }}".format(
+                tu.shape_to_dims_str(input_model_shapes[io_num])
+            )
+            if input_shapes[io_num] != input_model_shapes[io_num]
+            else "",
+            io_num,
+            np_to_model_dtype(dtype),
+            tu.shape_to_dims_str(output_shapes[io_num]),
+            "reshape: {{ shape: [ {} ] }}".format(
+                tu.shape_to_dims_str(output_model_shapes[io_num])
+            )
+            if output_shapes[io_num] != output_model_shapes[io_num]
+            else "",
+        )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+def create_plan_modelfile(
+    models_dir, model_version, max_batch, dtype, input_shapes, output_shapes
+):
+    assert len(input_shapes) == len(output_shapes)
+    if not tu.validate_for_trt_model(
+        dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]
+    ):
+        return
+
+    trt_dtype = np_to_trt_dtype(dtype)
+    io_cnt = len(input_shapes)
+
+    # Create the model that copies inputs to corresponding outputs.
+    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    builder = trt.Builder(TRT_LOGGER)
+    network = builder.create_network()
+
+    for io_num in range(io_cnt):
+        input_name = "INPUT{}".format(io_num)
+        output_name = "OUTPUT{}".format(io_num)
+        in0 = network.add_input(input_name, trt_dtype, input_shapes[io_num])
+        if input_shapes == output_shapes:
+            out0 = network.add_identity(in0)
+        else:
+            out0 = network.add_shuffle(in0)
+            out0.set_reshape_dimensions(output_shapes[io_num])
+
+        out0.get_output(0).name = output_name
+        network.mark_output(out0.get_output(0))
+
+    config = builder.create_builder_config()
+    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
+    builder.max_batch_size = max(1, max_batch)
+    try:
+        engine_bytes = builder.build_serialized_network(network, config)
+    except AttributeError:
+        engine = builder.build_engine(network, config)
+        engine_bytes = engine.serialize()
+        del engine
+    del network
+
+    model_name = tu.get_zero_model_name(
+        "plan_nobatch" if max_batch == 0 else "plan", io_cnt, dtype
+    )
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(model_version_dir + "/model.plan", "wb") as f:
+        f.write(engine_bytes)
+
+
+def create_plan_modelconfig(
+    models_dir,
+    model_version,
+    max_batch,
+    dtype,
+    input_shapes,
+    input_model_shapes,
+    output_shapes,
+    output_model_shapes,
+):
+    assert len(input_shapes) == len(input_model_shapes)
+    assert len(output_shapes) == len(output_model_shapes)
+    assert len(input_shapes) == len(output_shapes)
+    if not tu.validate_for_trt_model(
+        dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]
+    ):
+        return
+
+    io_cnt = len(input_shapes)
+
+    model_name = tu.get_zero_model_name(
+        "plan_nobatch" if max_batch == 0 else "plan", io_cnt, dtype
+    )
+    config_dir = models_dir + "/" + model_name
+    config = """
+name: "{}"
+platform: "tensorrt_plan"
+max_batch_size: {}
+""".format(
+        model_name, max_batch
+    )
+
+    for io_num in range(io_cnt):
+        config += """
+input [
+  {{
+    name: "INPUT{}"
+    data_type: {}
+    dims: [ {} ]
+    {}
+  }}
+]
+output [
+  {{
+    name: "OUTPUT{}"
+    data_type: {}
+    dims: [ {} ]
+    {}
+  }}
+]
+""".format(
+            io_num,
+            np_to_model_dtype(dtype),
+            tu.shape_to_dims_str(input_shapes[io_num]),
+            "reshape: {{ shape: [ {} ] }}".format(
+                tu.shape_to_dims_str(input_model_shapes[io_num])
+            )
+            if input_shapes[io_num] != input_model_shapes[io_num]
+            else "",
+            io_num,
+            np_to_model_dtype(dtype),
+            tu.shape_to_dims_str(output_shapes[io_num]),
+            "reshape: {{ shape: [ {} ] }}".format(
+                tu.shape_to_dims_str(output_model_shapes[io_num])
+            )
+            if output_shapes[io_num] != output_model_shapes[io_num]
+            else "",
+        )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+def create_libtorch_modelfile(
+    models_dir, model_version, max_batch, dtype, input_shapes, output_shapes
+):
+    assert len(input_shapes) == len(output_shapes)
+    if not tu.validate_for_libtorch_model(
+        dtype,
+        dtype,
+        dtype,
+        input_shapes[0],
+        input_shapes[0],
+        input_shapes[0],
+        max_batch,
+        reshape=True,
+    ):
+        return
+
+    torch_dtype = np_to_torch_dtype(dtype)
+    io_cnt = len(input_shapes)
+    model_name = tu.get_zero_model_name(
+        "libtorch_nobatch" if max_batch == 0 else "libtorch", io_cnt, dtype
+    )
+
+    # Create the model that reshapes inputs to corresponding outputs
+    # Note that string I/O is supported only for 1-dimensional inputs/outputs.
+    # Use identity model for string I/O models and add 'reshape' field with
+    # empty shape so that batching is supported and the full shape becomes [-1].
+    if io_cnt == 1:
+        if dtype == np_dtype_string:
+
+            class IdentityNet(nn.Module):
+                def __init__(self):
+                    super(IdentityNet, self).__init__()
+
+                def forward(self, input0: List[str]) -> List[str]:
+                    return input0
+
+        else:
+
+            class ReshapeNet(nn.Module):
+                def __init__(self, *args):
+                    super(ReshapeNet, self).__init__()
+                    self.shape = args[0][0]
+                    self.max_batch = args[0][1]
+
+                def forward(self, input0):
+                    if self.max_batch == 0:
+                        return input0.view(self.shape[0])
+                    else:
+                        return input0.view(
+                            [
+                                -1,
+                            ]
+                            + self.shape[0]
+                        )
+
+    elif io_cnt == 2:
+        if dtype == np_dtype_string:
+
+            class IdentityNet(nn.Module):
+                def __init__(self):
+                    super(IdentityNet, self).__init__()
+
+                def forward(
+                    self, input0: List[str], input1: List[str]
+                ) -> Tuple[List[str], List[str]]:
+                    return input0, input1
+
+        else:
+
+            class ReshapeNet(nn.Module):
+                def __init__(self, *args):
+                    super(ReshapeNet, self).__init__()
+                    self.shape = args[0][0]
+                    self.max_batch = args[0][1]
+
+                def forward(self, input0, input1):
+                    if self.max_batch == 0:
+                        return input0.view(self.shape[0]), input1.view(self.shape[1])
+                    else:
+                        return input0.view(
+                            [
+                                -1,
+                            ]
+                            + self.shape[0]
+                        ), input1.view(
+                            [
+                                -1,
+                            ]
+                            + self.shape[1]
+                        )
+
+    elif io_cnt == 3:
+        if dtype == np_dtype_string:
+
+            class IdentityNet(nn.Module):
+                def __init__(self):
+                    super(IdentityNet, self).__init__()
+
+                def forward(
+                    self, input0: List[str], input1: List[str], input2: List[str]
+                ) -> Tuple[List[str], List[str], List[str]]:
+                    return input0, input1, input2
+
+        else:
+
+            class ReshapeNet(nn.Module):
+                def __init__(self, *args):
+                    super(ReshapeNet, self).__init__()
+                    self.shape = args[0][0]
+                    self.max_batch = args[0][1]
+
+                def forward(self, input0, input1, input2):
+                    if self.max_batch == 0:
+                        return (
+                            input0.view(self.shape[0]),
+                            input1.view(self.shape[1]),
+                            input2.view(self.shape[2]),
+                        )
+                    else:
+                        return (
+                            input0.view(
+                                [
+                                    -1,
+                                ]
+                                + self.shape[0]
+                            ),
+                            input1.view(
+                                [
+                                    -1,
+                                ]
+                                + self.shape[1]
+                            ),
+                            input2.view(
+                                [
+                                    -1,
+                                ]
+                                + self.shape[2]
+                            ),
+                        )
+
+    elif io_cnt == 4:
+        if dtype == np_dtype_string:
+
+            class IdentityNet(nn.Module):
+                def __init__(self):
+                    super(IdentityNet, self).__init__()
+
+                def forward(
+                    self,
+                    input0: List[str],
+                    input1: List[str],
+                    input2: List[str],
+                    input3: List[str],
+                ) -> Tuple[List[str], List[str], List[str], List[str]]:
+                    return input0, input1, input2, input3
+
+        else:
+
+            class ReshapeNet(nn.Module):
+                def __init__(self, *args):
+                    super(ReshapeNet, self).__init__()
+                    self.shape = args[0][0]
+                    self.max_batch = args[0][1]
+
+                def forward(self, input0, input1, input2, input3):
+                    if self.max_batch == 0:
+                        return (
+                            input0.view(self.shape[0]),
+                            input1.view(self.shape[1]),
+                            input2.view(self.shape[2]),
+                            input3.view(self.shape[3]),
+                        )
+                    else:
+                        return (
+                            input0.view(
+                                [
+                                    -1,
+                                ]
+                                + self.shape[0]
+                            ),
+                            input1.view(
+                                [
+                                    -1,
+                                ]
+                                + self.shape[1]
+                            ),
+                            input2.view(
+                                [
+                                    -1,
+                                ]
+                                + self.shape[2]
+                            ),
+                            input3.view(
+                                [
+                                    -1,
+                                ]
+                                + self.shape[3]
+                            ),
+                        )
+
+    if dtype == np_dtype_string:
+        identityModel = IdentityNet()
+        traced = torch.jit.script(identityModel)
+    else:
+        reshapeModel = ReshapeNet([[op_shape for op_shape in output_shapes], max_batch])
+        traced = torch.jit.script(reshapeModel)
+
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    traced.save(model_version_dir + "/model.pt")
+
+
+def create_libtorch_modelconfig(
+    models_dir,
+    model_version,
+    max_batch,
+    dtype,
+    input_shapes,
+    input_model_shapes,
+    output_shapes,
+    output_model_shapes,
+):
+    assert len(input_shapes) == len(input_model_shapes)
+    assert len(output_shapes) == len(output_model_shapes)
+    assert len(input_shapes) == len(output_shapes)
+    if not tu.validate_for_libtorch_model(
+        dtype,
+        dtype,
+        dtype,
+        input_shapes[0],
+        input_shapes[0],
+        input_shapes[0],
+        max_batch,
+        reshape=True,
+    ):
+        return
+
+    io_cnt = len(input_shapes)
+
+    model_name = tu.get_zero_model_name(
+        "libtorch_nobatch" if max_batch == 0 else "libtorch", io_cnt, dtype
+    )
+    config_dir = models_dir + "/" + model_name
+    config = """
+name: "{}"
+platform: "pytorch_libtorch"
+max_batch_size: {}
+""".format(
+        model_name, max_batch
+    )
+
+    for io_num in range(io_cnt):
+        config += """
+input [
+  {{
+    name: "INPUT__{}"
+    data_type: {}
+    dims: [ {} ]
+    {}
+  }}
+]
+output [
+  {{
+    name: "OUTPUT__{}"
+    data_type: {}
+    dims: [ {} ]
+    {}
+  }}
+]
+""".format(
+            io_num,
+            np_to_model_dtype(dtype),
+            tu.shape_to_dims_str(input_shapes[io_num]),
+            "reshape: {{ shape: [ {} ] }}".format(
+                tu.shape_to_dims_str(input_model_shapes[io_num])
+            )
+            if input_shapes[io_num] != input_model_shapes[io_num]
+            else "",
+            io_num,
+            np_to_model_dtype(dtype),
+            tu.shape_to_dims_str(output_shapes[io_num]),
+            "reshape: {{ shape: [ {} ] }}".format(
+                tu.shape_to_dims_str(output_model_shapes[io_num])
+            )
+            if output_shapes[io_num] != output_model_shapes[io_num]
+            else "",
+        )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+def create_ensemble_modelfile(
+    models_dir, model_version, max_batch, dtype, input_shapes, output_shapes
+):
+    assert len(input_shapes) == len(output_shapes)
+    if not tu.validate_for_ensemble_model(
+        "reshape",
+        dtype,
+        dtype,
+        dtype,
+        input_shapes[0],
+        input_shapes[0],
+        input_shapes[0],
+    ):
+        return
+
+    emu.create_identity_ensemble_modelfile(
+        "reshape",
+        models_dir,
+        model_version,
+        max_batch,
+        dtype,
+        input_shapes,
+        output_shapes,
+    )
+
+
+def create_ensemble_modelconfig(
+    models_dir,
+    model_version,
+    max_batch,
+    dtype,
+    input_shapes,
+    input_model_shapes,
+    output_shapes,
+    output_model_shapes,
+):
+    assert len(input_shapes) == len(input_model_shapes)
+    assert len(output_shapes) == len(output_model_shapes)
+    assert len(input_shapes) == len(output_shapes)
+    if not tu.validate_for_ensemble_model(
+        "reshape",
+        dtype,
+        dtype,
+        dtype,
+        input_shapes[0],
+        input_shapes[0],
+        input_shapes[0],
+    ):
+        return
+
+    # No reason to reshape ensemble inputs / outputs to empty as the inner models
+    # have to have non-empty shapes for inputs / outputs.
+    input_model_shapes_list = []
+    output_model_shapes_list = []
+    for idx in range(len(input_shapes)):
+        if len(input_model_shapes[idx]) == 0:
+            input_model_shapes_list.append(input_shapes[idx])
+        else:
+            input_model_shapes_list.append(input_model_shapes[idx])
+        if len(output_model_shapes[idx]) == 0:
+            output_model_shapes_list.append(output_shapes[idx])
+        else:
+            output_model_shapes_list.append(output_model_shapes[idx])
+
+    emu.create_identity_ensemble_modelconfig(
+        "reshape",
+        models_dir,
+        model_version,
+        max_batch,
+        dtype,
+        input_shapes,
+        tuple(input_model_shapes_list),
+        output_shapes,
+        tuple(output_model_shapes_list),
+    )
+
+
+def create_onnx_modelfile(
+    models_dir, model_version, max_batch, dtype, input_shapes, output_shapes
+):
+    assert len(input_shapes) == len(output_shapes)
+    if not tu.validate_for_onnx_model(
+        dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]
+    ):
+        return
+
+    onnx_dtype = np_to_onnx_dtype(dtype)
+    io_cnt = len(input_shapes)
+
+    # Create the model
+    model_name = tu.get_zero_model_name(
+        "onnx_nobatch" if max_batch == 0 else "onnx", io_cnt, dtype
+    )
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    batch_dim = [] if max_batch == 0 else [None]
+
+    onnx_nodes = []
+    onnx_inputs = []
+    onnx_outputs = []
+    idx = 0
+    for io_num in range(io_cnt):
+        # Repeat so that the variable dimension name is different
+        in_shape, idx = tu.shape_to_onnx_shape(input_shapes[io_num], idx)
+        out_shape, idx = tu.shape_to_onnx_shape(output_shapes[io_num], idx)
+        in_name = "INPUT{}".format(io_num)
+        out_name = "OUTPUT{}".format(io_num)
+        out_shape_name = out_name + "_shape"
+
+        onnx_inputs.append(
+            onnx.helper.make_tensor_value_info(
+                in_name, onnx_dtype, batch_dim + in_shape
+            )
+        )
+        onnx_outputs.append(
+            onnx.helper.make_tensor_value_info(
+                out_name, onnx_dtype, batch_dim + out_shape
+            )
+        )
+
+        if input_shapes == output_shapes:
+            onnx_nodes.append(onnx.helper.make_node("Identity", [in_name], [out_name]))
+        else:
+            onnx_nodes.append(
+                onnx.helper.make_node("Shape", [out_name], [out_shape_name])
+            )
+            onnx_nodes.append(
+                onnx.helper.make_node("Reshape", [in_name, out_shape_name], [out_name])
+            )
+
+    graph_proto = onnx.helper.make_graph(
+        onnx_nodes, model_name, onnx_inputs, onnx_outputs
+    )
+    if FLAGS.onnx_opset > 0:
+        model_opset = onnx.helper.make_operatorsetid("", FLAGS.onnx_opset)
+        model_def = onnx.helper.make_model(
+            graph_proto, producer_name="triton", opset_imports=[model_opset]
+        )
+    else:
+        model_def = onnx.helper.make_model(graph_proto, producer_name="triton")
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    onnx.save(model_def, model_version_dir + "/model.onnx")
+
+
+def create_onnx_modelconfig(
+    models_dir,
+    model_version,
+    max_batch,
+    dtype,
+    input_shapes,
+    input_model_shapes,
+    output_shapes,
+    output_model_shapes,
+):
+    assert len(input_shapes) == len(input_model_shapes)
+    assert len(output_shapes) == len(output_model_shapes)
+    assert len(input_shapes) == len(output_shapes)
+    if not tu.validate_for_onnx_model(
+        dtype, dtype, dtype, input_shapes[0], input_shapes[0], input_shapes[0]
+    ):
+        return
+
+    io_cnt = len(input_shapes)
+
+    # Use a different model name for the non-batching variant
+    model_name = tu.get_zero_model_name(
+        "onnx_nobatch" if max_batch == 0 else "onnx", io_cnt, dtype
+    )
+    config_dir = models_dir + "/" + model_name
+
+    config = emu.create_general_modelconfig(
+        model_name,
+        "onnxruntime_onnx",
+        max_batch,
+        emu.repeat(dtype, io_cnt),
+        input_shapes,
+        input_model_shapes,
+        emu.repeat(dtype, io_cnt),
+        output_shapes,
+        output_model_shapes,
+        emu.repeat(None, io_cnt),
+        force_tensor_number_suffix=True,
+    )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+def create_openvino_modelfile(
+    models_dir, model_version, max_batch, dtype, input_shapes, output_shapes
+):
+    assert len(input_shapes) == len(output_shapes)
+    batch_dim = (
+        []
+        if max_batch == 0
+        else [
+            max_batch,
+        ]
+    )
+    if not tu.validate_for_openvino_model(
+        dtype,
+        dtype,
+        dtype,
+        batch_dim + input_shapes[0],
+        batch_dim + input_shapes[0],
+        batch_dim + input_shapes[0],
+    ):
+        return
+
+    io_cnt = len(input_shapes)
+
+    # Create the model
+    model_name = tu.get_zero_model_name(
+        "openvino_nobatch" if max_batch == 0 else "openvino", io_cnt, dtype
+    )
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    openvino_inputs = []
+    openvino_outputs = []
+    for io_num in range(io_cnt):
+        in_name = "INPUT{}".format(io_num)
+        out_name = "OUTPUT{}".format(io_num)
+        openvino_inputs.append(
+            ng.parameter(
+                shape=batch_dim + input_shapes[io_num], dtype=dtype, name=in_name
+            )
+        )
+
+        openvino_outputs.append(
+            ng.reshape(
+                openvino_inputs[io_num],
+                batch_dim + output_shapes[io_num],
+                name=out_name,
+                special_zero=False,
+            )
+        )
+
+    function = ng.impl.Function(openvino_outputs, openvino_inputs, model_name)
+    ie_network = IENetwork(ng.impl.Function.to_capsule(function))
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    ie_network.serialize(
+        model_version_dir + "/model.xml", model_version_dir + "/model.bin"
+    )
+
+
+def create_openvino_modelconfig(
+    models_dir,
+    model_version,
+    max_batch,
+    dtype,
+    input_shapes,
+    input_model_shapes,
+    output_shapes,
+    output_model_shapes,
+):
+    assert len(input_shapes) == len(input_model_shapes)
+    assert len(output_shapes) == len(output_model_shapes)
+    assert len(input_shapes) == len(output_shapes)
+    batch_dim = (
+        []
+        if max_batch == 0
+        else [
+            max_batch,
+        ]
+    )
+    if not tu.validate_for_openvino_model(
+        dtype,
+        dtype,
+        dtype,
+        batch_dim + input_shapes[0],
+        batch_dim + input_shapes[0],
+        batch_dim + input_shapes[0],
+    ):
+        return
+
+    io_cnt = len(input_shapes)
+
+    # Use a different model name for the non-batching variant
+    model_name = tu.get_zero_model_name(
+        "openvino_nobatch" if max_batch == 0 else "openvino", io_cnt, dtype
+    )
+    config_dir = models_dir + "/" + model_name
+
+    config = """
+name: "{}"
+backend: "openvino"
+max_batch_size: {}
+""".format(
+        model_name, max_batch
+    )
+
+    for io_num in range(io_cnt):
+        config += """
+input [
+  {{
+    name: "INPUT{}"
+    data_type: {}
+    dims: [ {} ]
+    {}
+  }}
+]
+output [
+  {{
+    name: "OUTPUT{}"
+    data_type: {}
+    dims: [ {} ]
+    {}
+  }}
+]
+""".format(
+            io_num,
+            np_to_model_dtype(dtype),
+            tu.shape_to_dims_str(input_shapes[io_num]),
+            "reshape: {{ shape: [ {} ] }}".format(
+                tu.shape_to_dims_str(input_model_shapes[io_num])
+            )
+            if input_shapes[io_num] != input_model_shapes[io_num]
+            else "",
+            io_num,
+            np_to_model_dtype(dtype),
+            tu.shape_to_dims_str(output_shapes[io_num]),
+            "reshape: {{ shape: [ {} ] }}".format(
+                tu.shape_to_dims_str(output_model_shapes[io_num])
+            )
+            if output_shapes[io_num] != output_model_shapes[io_num]
+            else "",
+        )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+def create_models(
+    models_dir,
+    dtype,
+    input_shapes,
+    input_model_shapes,
+    output_shapes=None,
+    output_model_shapes=None,
+    no_batch=True,
+):
+    model_version = 1
+    if output_shapes is None:
+        output_shapes = input_shapes
+    if output_model_shapes is None:
+        output_model_shapes = input_model_shapes
+
+    if FLAGS.graphdef:
+        create_tf_modelconfig(
+            False,
+            models_dir,
+            model_version,
+            8,
+            dtype,
+            input_shapes,
+            input_model_shapes,
+            output_shapes,
+            output_model_shapes,
+        )
+        create_tf_modelfile(
+            False,
+            models_dir,
+            model_version,
+            8,
+            dtype,
+            input_model_shapes,
+            output_model_shapes,
+        )
+        if no_batch:
+            create_tf_modelconfig(
+                False,
+                models_dir,
+                model_version,
+                0,
+                dtype,
+                input_shapes,
+                input_model_shapes,
+                output_shapes,
+                output_model_shapes,
+            )
+            create_tf_modelfile(
+                False,
+                models_dir,
+                model_version,
+                0,
+                dtype,
+                input_model_shapes,
+                output_model_shapes,
+            )
+
+    if FLAGS.savedmodel:
+        create_tf_modelconfig(
+            True,
+            models_dir,
+            model_version,
+            8,
+            dtype,
+            input_shapes,
+            input_model_shapes,
+            output_shapes,
+            output_model_shapes,
+        )
+        create_tf_modelfile(
+            True,
+            models_dir,
+            model_version,
+            8,
+            dtype,
+            input_model_shapes,
+            output_model_shapes,
+        )
+        if no_batch:
+            create_tf_modelconfig(
+                True,
+                models_dir,
+                model_version,
+                0,
+                dtype,
+                input_shapes,
+                input_model_shapes,
+                output_shapes,
+                output_model_shapes,
+            )
+            create_tf_modelfile(
+                True,
+                models_dir,
+                model_version,
+                0,
+                dtype,
+                input_model_shapes,
+                output_model_shapes,
+            )
+
+    if FLAGS.onnx:
+        create_onnx_modelconfig(
+            models_dir,
+            model_version,
+            8,
+            dtype,
+            input_shapes,
+            input_model_shapes,
+            output_shapes,
+            output_model_shapes,
+        )
+        create_onnx_modelfile(
+            models_dir, model_version, 8, dtype, input_model_shapes, output_model_shapes
+        )
+        if no_batch:
+            create_onnx_modelconfig(
+                models_dir,
+                model_version,
+                0,
+                dtype,
+                input_shapes,
+                input_model_shapes,
+                output_shapes,
+                output_model_shapes,
+            )
+            create_onnx_modelfile(
+                models_dir,
+                model_version,
+                0,
+                dtype,
+                input_model_shapes,
+                output_model_shapes,
+            )
+
+    # Shouldn't create ensembles that reshape to zero-sized tensors. Reshaping
+    # from / to zero dimension is not allow as ensemble inputs / outputs
+    # are passed from / to other model AS IF direct inference from client.
+    # But create it anyway, expecting that the ensemble models can be served but
+    # they will always return error message.
+    if FLAGS.ensemble:
+        # Create fixed size nop for ensemble models
+        for shape in input_model_shapes:
+            emu.create_nop_modelconfig(models_dir, shape, np.float32)
+            emu.create_nop_tunnel_modelconfig(models_dir, shape, np.float32)
+            emu.create_nop_modelconfig(models_dir, [-1], np.float32)
+        create_ensemble_modelconfig(
+            models_dir,
+            model_version,
+            8,
+            dtype,
+            input_shapes,
+            input_model_shapes,
+            output_shapes,
+            output_model_shapes,
+        )
+        create_ensemble_modelfile(
+            models_dir, model_version, 8, dtype, input_model_shapes, output_model_shapes
+        )
+        if no_batch:
+            create_ensemble_modelconfig(
+                models_dir,
+                model_version,
+                0,
+                dtype,
+                input_shapes,
+                input_model_shapes,
+                output_shapes,
+                output_model_shapes,
+            )
+            create_ensemble_modelfile(
+                models_dir,
+                model_version,
+                0,
+                dtype,
+                input_model_shapes,
+                output_model_shapes,
+            )
+
+
+def create_trt_models(
+    models_dir,
+    dtype,
+    input_shapes,
+    input_model_shapes,
+    output_shapes=None,
+    output_model_shapes=None,
+    no_batch=True,
+):
+    model_version = 1
+    if output_shapes is None:
+        output_shapes = input_shapes
+    if output_model_shapes is None:
+        output_model_shapes = input_model_shapes
+
+    if FLAGS.tensorrt:
+        create_plan_modelconfig(
+            models_dir,
+            model_version,
+            8,
+            dtype,
+            input_shapes,
+            input_model_shapes,
+            output_shapes,
+            output_model_shapes,
+        )
+        create_plan_modelfile(
+            models_dir, model_version, 8, dtype, input_model_shapes, output_model_shapes
+        )
+        if no_batch:
+            create_plan_modelconfig(
+                models_dir,
+                model_version,
+                0,
+                dtype,
+                input_shapes,
+                input_model_shapes,
+                output_shapes,
+                output_model_shapes,
+            )
+            create_plan_modelfile(
+                models_dir,
+                model_version,
+                0,
+                dtype,
+                input_model_shapes,
+                output_model_shapes,
+            )
+
+
+def create_libtorch_models(
+    models_dir,
+    dtype,
+    input_shapes,
+    input_model_shapes,
+    output_shapes=None,
+    output_model_shapes=None,
+    no_batch=True,
+):
+    model_version = 1
+    if output_shapes is None:
+        output_shapes = input_shapes
+    if output_model_shapes is None:
+        output_model_shapes = input_model_shapes
+
+    if FLAGS.libtorch:
+        create_libtorch_modelconfig(
+            models_dir,
+            model_version,
+            8,
+            dtype,
+            input_shapes,
+            input_model_shapes,
+            output_shapes,
+            output_model_shapes,
+        )
+        create_libtorch_modelfile(
+            models_dir, model_version, 8, dtype, input_model_shapes, output_model_shapes
+        )
+        # skip for libtorch string I/O
+        if no_batch and (dtype != np_dtype_string):
+            create_libtorch_modelconfig(
+                models_dir,
+                model_version,
+                0,
+                dtype,
+                input_shapes,
+                input_model_shapes,
+                output_shapes,
+                output_model_shapes,
+            )
+            create_libtorch_modelfile(
+                models_dir,
+                model_version,
+                0,
+                dtype,
+                input_model_shapes,
+                output_model_shapes,
+            )
+
+
+def create_openvino_models(
+    models_dir,
+    dtype,
+    input_shapes,
+    input_model_shapes,
+    output_shapes=None,
+    output_model_shapes=None,
+    no_batch=True,
+):
+    model_version = 1
+    if output_shapes is None:
+        output_shapes = input_shapes
+    if output_model_shapes is None:
+        output_model_shapes = input_model_shapes
+
+    if FLAGS.openvino:
+        create_openvino_modelconfig(
+            models_dir,
+            model_version,
+            8,
+            dtype,
+            input_shapes,
+            input_model_shapes,
+            output_shapes,
+            output_model_shapes,
+        )
+        create_openvino_modelfile(
+            models_dir, model_version, 8, dtype, input_model_shapes, output_model_shapes
+        )
+        if no_batch:
+            create_openvino_modelconfig(
+                models_dir,
+                model_version,
+                0,
+                dtype,
+                input_shapes,
+                input_model_shapes,
+                output_shapes,
+                output_model_shapes,
+            )
+            create_openvino_modelfile(
+                models_dir,
+                model_version,
+                0,
+                dtype,
+                input_model_shapes,
+                output_model_shapes,
+            )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--models_dir", type=str, required=True, help="Top-level model directory"
+    )
+    parser.add_argument(
+        "--graphdef",
+        required=False,
+        action="store_true",
+        help="Generate GraphDef models",
+    )
+    parser.add_argument(
+        "--savedmodel",
+        required=False,
+        action="store_true",
+        help="Generate SavedModel models",
+    )
+    parser.add_argument(
+        "--tensorrt",
+        required=False,
+        action="store_true",
+        help="Generate TensorRT PLAN models",
+    )
+    parser.add_argument(
+        "--onnx",
+        required=False,
+        action="store_true",
+        help="Generate Onnx Runtime Onnx models",
+    )
+    parser.add_argument(
+        "--onnx_opset",
+        type=int,
+        required=False,
+        default=0,
+        help="Opset used for Onnx models. Default is to use ONNXRT default",
+    )
+    parser.add_argument(
+        "--libtorch",
+        required=False,
+        action="store_true",
+        help="Generate Pytorch LibTorch models",
+    )
+    parser.add_argument(
+        "--openvino",
+        required=False,
+        action="store_true",
+        help="Generate OpenVino models",
+    )
+    parser.add_argument(
+        "--ensemble",
+        required=False,
+        action="store_true",
+        help="Generate ensemble models",
+    )
+    parser.add_argument(
+        "--variable",
+        required=False,
+        action="store_true",
+        help="Used variable-shape tensors for input/output",
+    )
+    FLAGS, unparsed = parser.parse_known_args()
+
+    if FLAGS.graphdef or FLAGS.savedmodel:
+        import tensorflow as tf
+        from tensorflow.python.framework import graph_io
+
+        tf.compat.v1.disable_eager_execution()
+    if FLAGS.tensorrt:
+        import tensorrt as trt
+    if FLAGS.onnx:
+        import onnx
+    if FLAGS.libtorch:
+        import torch
+        from torch import nn
+    if FLAGS.openvino:
+        from openvino.inference_engine import IENetwork
+        import ngraph as ng
+
+    import test_util as tu
+
+    # TensorRT, OpenVino and LibTorch must be handled separately since they
+    # don't support zero-sized tensors.
+    create_models(FLAGS.models_dir, np_dtype_string, ([1],), ([],), no_batch=False)
+    create_models(FLAGS.models_dir, np.float32, ([1],), ([],), no_batch=False)
+    create_models(
+        FLAGS.models_dir, np.float32, ([1], [8]), ([], [4, 1, 2]), no_batch=False
+    )
+    create_models(
+        FLAGS.models_dir,
+        np.float32,
+        ([4, 4], [2], [2, 2, 3]),
+        ([16], [1, 2], [3, 2, 2]),
+    )
+    create_libtorch_models(
+        FLAGS.models_dir, np.float32, ([1],), ([1, 1, 1],), no_batch=False
+    )
+    create_libtorch_models(
+        FLAGS.models_dir, np.float32, ([1], [8]), ([1, 1, 1], [4, 1, 2]), no_batch=False
+    )
+    create_libtorch_models(
+        FLAGS.models_dir,
+        np.float32,
+        ([4, 4], [2], [2, 2, 3]),
+        ([16], [1, 2], [3, 2, 2]),
+    )
+    create_libtorch_models(
+        FLAGS.models_dir, np_dtype_string, ([1],), ([],), no_batch=False
+    )
+    create_openvino_models(
+        FLAGS.models_dir, np.float32, ([1],), ([1, 1, 1],), no_batch=False
+    )
+    create_openvino_models(
+        FLAGS.models_dir, np.float32, ([1], [8]), ([1, 1, 1], [4, 1, 2]), no_batch=False
+    )
+    create_openvino_models(
+        FLAGS.models_dir,
+        np.float32,
+        ([4, 4], [2], [2, 2, 3]),
+        ([16], [1, 2], [3, 2, 2]),
+    )
+    create_trt_models(FLAGS.models_dir, np.float32, ([1], [8]), ([1, 1, 1], [4, 1, 2]))
+
+    # Models that reshape only the input, not the output.
+    create_models(
+        FLAGS.models_dir,
+        np.float32,
+        ([4, 4], [2], [2, 2, 3], [1]),
+        ([16], [1, 2], [3, 2, 2], [1]),
+        output_shapes=([16], [1, 2], [3, 2, 2], [1]),
+        output_model_shapes=([16], [1, 2], [3, 2, 2], [1]),
+    )
+
+    create_libtorch_models(
+        FLAGS.models_dir,
+        np.float32,
+        ([4, 4], [2], [2, 2, 3], [1]),
+        ([16], [1, 2], [3, 2, 2], [1]),
+        output_shapes=([16], [1, 2], [3, 2, 2], [1]),
+        output_model_shapes=([16], [1, 2], [3, 2, 2], [1]),
+    )
+
+    create_openvino_models(
+        FLAGS.models_dir,
+        np.float32,
+        ([4, 4], [2], [2, 2, 3], [1]),
+        ([16], [1, 2], [3, 2, 2], [1]),
+        output_shapes=([16], [1, 2], [3, 2, 2], [1]),
+        output_model_shapes=([16], [1, 2], [3, 2, 2], [1]),
+    )
+
+    create_trt_models(
+        FLAGS.models_dir,
+        np.float32,
+        ([4, 4], [2], [2, 2, 3], [1]),
+        ([2, 2, 4], [1, 2, 1], [3, 2, 2], [1, 1, 1]),
+        output_shapes=([2, 2, 4], [1, 2, 1], [3, 2, 2], [1, 1, 1]),
+        output_model_shapes=([2, 2, 4], [1, 2, 1], [3, 2, 2], [1, 1, 1]),
+    )
+
+    # Tests with models that accept variable-shape input/output tensors and reshape
+    # TensorRT is ignored as it only allows fixed-shape tensors
+    # PyTorch is ignored as "tensor.view()" is shape dependent (shape is fixed
+    # based on input used for tracing), need to find equivalent operation that
+    # is not shape dependent.
+    if FLAGS.variable:
+        create_models(FLAGS.models_dir, np.int32, ([2, 4, -1, 6],), ([8, -1, 1, 6],))
+        create_models(
+            FLAGS.models_dir,
+            np.int32,
+            ([1, -1, 1], [-1], [2, 2, 3]),
+            ([-1], [1, -1, 1], [3, 2, 2]),
+        )
+        create_models(
+            FLAGS.models_dir,
+            np.int32,
+            ([-1, 1], [2]),
+            ([1, -1], [1, 2]),
+            output_shapes=([1, -1], [1, 2]),
+            output_model_shapes=([1, -1], [1, 2]),
+        )
+
+    # TRT plan that reshapes neither input nor output. Needed for
+    # L0_perflab_nomodel.
+    create_trt_models(FLAGS.models_dir, np.float32, ([1],), ([1],))
diff --git a/qa/common/gen_qa_sequence_models.py b/qa/common/gen_qa_sequence_models.py
new file mode 100755
index 0000000000..4c9ca9d8e5
--- /dev/null
+++ b/qa/common/gen_qa_sequence_models.py
@@ -0,0 +1,1619 @@
+#!/usr/bin/env python3
+
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import os
+
+import gen_ensemble_model_utils as emu
+import numpy as np
+from gen_common import (
+    np_to_model_dtype,
+    np_to_onnx_dtype,
+    np_to_tf_dtype,
+    np_to_torch_dtype,
+    np_to_trt_dtype,
+)
+
+FLAGS = None
+np_dtype_string = np.dtype(object)
+
+
+def create_tf_modelfile(
+    create_savedmodel, models_dir, model_version, max_batch, dtype, shape
+):
+    if not tu.validate_for_tf_model(dtype, dtype, dtype, shape, shape, shape):
+        return
+
+    tf_input_dtype = np_to_tf_dtype(dtype)
+    tf_dtype = tf_input_dtype
+    tf_control_type = tf_input_dtype
+
+    # If the input is a string then use int32 for operation and just
+    # cast to/from string for input and output.
+    if tf_input_dtype == tf.string:
+        tf_dtype = tf.int32
+        tf_control_type = tf.int32
+
+    # If input dtype is bool, then use bool type for control and
+    # int32 type for input/output
+    if tf_input_dtype == tf.bool:
+        tf_control_type = tf.bool
+        tf_dtype = tf.int32
+        tf_input_dtype = tf.int32
+
+    # Create the model. If non-batching then don't include the batch
+    # dimension.
+    tf.compat.v1.reset_default_graph()
+    if create_savedmodel and (max_batch == 0):
+        input0 = tf.compat.v1.placeholder(
+            tf_input_dtype,
+            [
+                1,
+            ],
+            "INPUT",
+        )
+        if tf_input_dtype == tf.string:
+            input0 = tf.strings.to_number(tf.strings.join(["0", input0]), tf_dtype)
+        start0 = tf.compat.v1.placeholder(
+            tf_control_type,
+            [
+                1,
+            ],
+            "START",
+        )
+        ready0 = tf.compat.v1.placeholder(
+            tf_control_type,
+            [
+                1,
+            ],
+            "READY",
+        )
+        acc = tf.compat.v1.get_variable(
+            "ACC",
+            [
+                1,
+            ],
+            dtype=tf_dtype,
+        )
+
+        # Convert boolean value to int32 value
+        if tf_control_type == tf.bool:
+            start0 = tf.cast(start0, tf.int32)
+            ready0 = tf.cast(ready0, tf.int32)
+
+        tmp = tf.compat.v1.where(tf.equal(start0, 1), input0, tf.add(acc, input0))
+        newacc = tf.compat.v1.where(tf.equal(ready0, 1), tmp, acc)
+
+        assign = tf.compat.v1.assign(acc, newacc)
+        if tf_input_dtype == tf.string:
+            tf.strings.as_string(assign, name="OUTPUT")
+        else:
+            tf.identity(assign, name="OUTPUT")
+    else:
+        # For batching we can't use a tf.variable to hold the
+        # accumulated values since that forces the size of the output
+        # to the size of the variable (which must be a max-batch-size
+        # vector since require one accumulator each), instead of the
+        # output shape being [None, 1]. So instead we just return 0 if
+        # not-ready and 'INPUT'+'START' otherwise... the tests know to
+        # expect this.
+        input0 = tf.compat.v1.placeholder(
+            tf_input_dtype,
+            [
+                None,
+            ]
+            + tu.shape_to_tf_shape(shape),
+            "INPUT",
+        )
+        if tf_input_dtype == tf.string:
+            input0 = tf.strings.to_number(tf.strings.join(["0", input0]), tf_dtype)
+        start0 = tf.compat.v1.placeholder(tf_control_type, [None, 1], "START")
+        ready0 = tf.compat.v1.placeholder(tf_control_type, [None, 1], "READY")
+
+        # Convert boolean value to int32 value
+        if tf_control_type == tf.bool:
+            start0 = tf.cast(start0, tf.int32)
+            ready0 = tf.cast(ready0, tf.int32)
+
+        tmp = tf.compat.v1.where(
+            tf.equal(ready0, 1),
+            tf.add(start0, input0),
+            tf.zeros(tf.shape(input=input0), dtype=tf_dtype),
+        )
+
+        if tf_input_dtype == tf.string:
+            tf.strings.as_string(tmp, name="OUTPUT")
+        else:
+            tf.identity(tmp, name="OUTPUT")
+
+    # Use a different model name for the non-batching variant
+    if create_savedmodel:
+        model_name = tu.get_sequence_model_name(
+            "savedmodel_nobatch" if max_batch == 0 else "savedmodel", dtype
+        )
+    else:
+        model_name = tu.get_sequence_model_name(
+            "graphdef_nobatch" if max_batch == 0 else "graphdef", dtype
+        )
+
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    if create_savedmodel:
+        with tf.compat.v1.Session() as sess:
+            sess.run(tf.compat.v1.initializers.global_variables())
+            input0_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name(
+                "INPUT:0"
+            )
+            start0_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name(
+                "START:0"
+            )
+            ready0_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name(
+                "READY:0"
+            )
+            output0_tensor = tf.compat.v1.get_default_graph().get_tensor_by_name(
+                "OUTPUT:0"
+            )
+            tf.compat.v1.saved_model.simple_save(
+                sess,
+                model_version_dir + "/model.savedmodel",
+                inputs={
+                    "INPUT": input0_tensor,
+                    "START": start0_tensor,
+                    "READY": ready0_tensor,
+                },
+                outputs={"OUTPUT": output0_tensor},
+            )
+    else:
+        with tf.compat.v1.Session() as sess:
+            sess.run(tf.compat.v1.initializers.global_variables())
+            graph_io.write_graph(
+                sess.graph.as_graph_def(),
+                model_version_dir,
+                "model.graphdef",
+                as_text=False,
+            )
+
+
+def create_tf_modelconfig(
+    create_savedmodel, models_dir, model_version, max_batch, dtype, shape
+):
+    if not tu.validate_for_tf_model(dtype, dtype, dtype, shape, shape, shape):
+        return
+
+    # Use a different model name for the non-batching variant
+    if create_savedmodel:
+        model_name = tu.get_sequence_model_name(
+            "savedmodel_nobatch" if max_batch == 0 else "savedmodel", dtype
+        )
+    else:
+        model_name = tu.get_sequence_model_name(
+            "graphdef_nobatch" if max_batch == 0 else "graphdef", dtype
+        )
+
+    if dtype == np.float32:
+        control_type = "fp32"
+    elif dtype == bool:
+        control_type = "bool"
+        dtype = np.int32
+    else:
+        control_type = "int32"
+
+    config_dir = models_dir + "/" + model_name
+    config = """
+name: "{}"
+platform: "{}"
+max_batch_size: {}
+sequence_batching {{
+  max_sequence_idle_microseconds: 5000000
+  control_input [
+    {{
+      name: "START"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_START
+          {}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }},
+    {{
+      name: "READY"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_READY
+          {}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }}
+  ]
+}}
+input [
+  {{
+    name: "INPUT"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+output [
+  {{
+    name: "OUTPUT"
+    data_type: {}
+    dims: [ 1 ]
+  }}
+]
+instance_group [
+  {{
+    kind: KIND_GPU
+  }}
+]
+""".format(
+        model_name,
+        "tensorflow_savedmodel" if create_savedmodel else "tensorflow_graphdef",
+        max_batch,
+        control_type,
+        control_type,
+        np_to_model_dtype(dtype),
+        tu.shape_to_dims_str(shape),
+        np_to_model_dtype(dtype),
+    )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+def create_plan_shape_tensor_modelfile(
+    models_dir, model_version, max_batch, dtype, shape
+):
+    # Note that resize layer does not support int tensors.
+    # The model takes two inputs (INPUT and SHAPE_INPUT)
+    # and two control inputs(START and READY).
+    # In absence of proper accumulator,
+    # OUTPUT : 0 if not-ready and 'INPUT'+'START' otherwise
+    # RESIZED_OUTPUT : Obtained after resizing 'INPUT' to shape specified
+    #          in 'SHAPE_INPUT'
+    # SHAPE_OUTPUT : The shape values of resized output
+
+    trt_dtype = np_to_trt_dtype(dtype)
+    trt_memory_format = trt.TensorFormat.LINEAR
+
+    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    builder = trt.Builder(TRT_LOGGER)
+    network = builder.create_network(
+        1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    )
+
+    unit_shape = [1] * len(shape)
+    if max_batch != 0:
+        shape_in0 = network.add_input("SHAPE_INPUT", trt.int32, [1 + len(shape)])
+        in0 = network.add_input("INPUT", trt_dtype, [-1] + shape)
+        start0 = network.add_input("START", trt_dtype, [-1] + unit_shape)
+        ready0 = network.add_input("READY", trt_dtype, [-1] + unit_shape)
+    else:
+        shape_in0 = network.add_input("SHAPE_INPUT", trt.int32, [len(shape)])
+        in0 = network.add_input("INPUT", trt_dtype, shape)
+        start0 = network.add_input("START", trt_dtype, unit_shape)
+        ready0 = network.add_input("READY", trt_dtype, unit_shape)
+
+    add = network.add_elementwise(in0, start0, trt.ElementWiseOperation.SUM)
+    out0 = network.add_elementwise(
+        add.get_output(0), ready0, trt.ElementWiseOperation.PROD
+    ).get_output(0)
+
+    resize_layer = network.add_resize(input=in0)
+    resize_layer.set_input(1, shape_in0)
+    resized_out0 = resize_layer.get_output(0)
+    shape_out0 = network.add_shape(resized_out0)
+
+    shape_out0.get_output(0).name = "SHAPE_OUTPUT"
+    shape_out0.get_output(0).dtype = trt.int32
+    network.mark_output_for_shapes(shape_out0.get_output(0))
+
+    out0.name = "OUTPUT"
+    out0.dtype = trt_dtype
+    network.mark_output(out0)
+
+    resized_out0.name = "RESIZED_OUTPUT"
+    resized_out0.dtype = trt_dtype
+    network.mark_output(resized_out0)
+
+    in0.allowed_formats = 1 << int(trt_memory_format)
+    shape_in0.allowed_formats = 1 << int(trt_memory_format)
+    start0.allowed_formats = 1 << int(trt_memory_format)
+    ready0.allowed_formats = 1 << int(trt_memory_format)
+    out0.allowed_formats = 1 << int(trt_memory_format)
+    shape_out0.get_output(0).allowed_formats = 1 << int(trt_memory_format)
+    resized_out0.allowed_formats = 1 << int(trt_memory_format)
+
+    if trt_dtype == trt.int8:
+        in0.dynamic_range = (-128.0, 127.0)
+        out0.dynamic_range = (-128.0, 127.0)
+        resized_out0.dynamic_range = (-128.0, 127.0)
+        start0.dynamic_range = (-128.0, 127.0)
+        ready0.dynamic_range = (-128.0, 127.0)
+
+    flags = 1 << int(trt.BuilderFlag.STRICT_TYPES)
+
+    if trt_dtype == trt.int8:
+        flags |= 1 << int(trt.BuilderFlag.INT8)
+    elif trt_dtype == trt.float16:
+        flags |= 1 << int(trt.BuilderFlag.FP16)
+
+    min_prefix = []
+    opt_prefix = []
+    max_prefix = []
+
+    if max_batch != 0:
+        min_prefix = [1]
+        opt_prefix = [max(1, max_batch)]
+        max_prefix = [max(1, max_batch)]
+
+    min_shape = min_prefix + [1] * len(shape)
+    opt_shape = opt_prefix + [8] * len(shape)
+    max_shape = max_prefix + [32] * len(shape)
+
+    profile = builder.create_optimization_profile()
+    profile.set_shape_input("SHAPE_INPUT", min_shape, opt_shape, max_shape)
+    profile.set_shape("INPUT", min_shape, opt_shape, max_shape)
+    profile.set_shape(
+        "START",
+        min_prefix + unit_shape,
+        opt_prefix + unit_shape,
+        opt_prefix + unit_shape,
+    )
+    profile.set_shape(
+        "READY",
+        min_prefix + unit_shape,
+        opt_prefix + unit_shape,
+        opt_prefix + unit_shape,
+    )
+
+    config = builder.create_builder_config()
+    config.flags = flags
+    config.add_optimization_profile(profile)
+
+    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
+    try:
+        engine_bytes = builder.build_serialized_network(network, config)
+    except AttributeError:
+        engine = builder.build_engine(network, config)
+        engine_bytes = engine.serialize()
+        del engine
+
+    model_name = tu.get_sequence_model_name(
+        "plan_nobatch" if max_batch == 0 else "plan", dtype
+    )
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(model_version_dir + "/model.plan", "wb") as f:
+        f.write(engine_bytes)
+
+
+def create_plan_fixed_modelfile(models_dir, model_version, max_batch, dtype, shape):
+    trt_dtype = np_to_trt_dtype(dtype)
+    # Create the model. For now don't implement a proper accumulator
+    # just return 0 if not-ready and 'INPUT'+'START' otherwise...  the
+    # tests know to expect this.
+    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    builder = trt.Builder(TRT_LOGGER)
+    network = builder.create_network()
+    in0 = network.add_input("INPUT", trt_dtype, shape)
+    start0 = network.add_input("START", trt_dtype, [1 for i in shape])
+    ready0 = network.add_input("READY", trt_dtype, [1 for i in shape])
+    add = network.add_elementwise(in0, start0, trt.ElementWiseOperation.SUM)
+    out0 = network.add_elementwise(
+        add.get_output(0), ready0, trt.ElementWiseOperation.PROD
+    )
+
+    out0.get_output(0).name = "OUTPUT"
+    network.mark_output(out0.get_output(0))
+
+    config = builder.create_builder_config()
+    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
+    builder.max_batch_size = max(1, max_batch)
+    try:
+        engine_bytes = builder.build_serialized_network(network, config)
+    except AttributeError:
+        engine = builder.build_engine(network, config)
+        engine_bytes = engine.serialize()
+        del engine
+    del network
+
+    model_name = tu.get_sequence_model_name(
+        "plan_nobatch" if max_batch == 0 else "plan", dtype
+    )
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(model_version_dir + "/model.plan", "wb") as f:
+        f.write(engine_bytes)
+
+
+def create_plan_fixed_rf_modelfile(models_dir, model_version, max_batch, dtype, shape):
+    trt_dtype = np_to_trt_dtype(dtype)
+    trt_memory_format = trt.TensorFormat.LINEAR
+    # Create the model. For now don't implement a proper accumulator
+    # just return 0 if not-ready and 'INPUT'+'START' otherwise...  the
+    # tests know to expect this.
+    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    builder = trt.Builder(TRT_LOGGER)
+    network = builder.create_network()
+    in0 = network.add_input("INPUT", trt_dtype, shape)
+    start0 = network.add_input("START", trt_dtype, [1 for i in shape])
+    ready0 = network.add_input("READY", trt_dtype, [1 for i in shape])
+    add = network.add_elementwise(in0, start0, trt.ElementWiseOperation.SUM)
+    out0 = network.add_elementwise(
+        add.get_output(0), ready0, trt.ElementWiseOperation.PROD
+    )
+
+    out0.get_output(0).name = "OUTPUT"
+    network.mark_output(out0.get_output(0))
+
+    out0.get_output(0).dtype = trt_dtype
+
+    in0.allowed_formats = 1 << int(trt_memory_format)
+    start0.allowed_formats = 1 << int(trt_memory_format)
+    ready0.allowed_formats = 1 << int(trt_memory_format)
+    out0.get_output(0).allowed_formats = 1 << int(trt_memory_format)
+
+    if trt_dtype == trt.int8:
+        in0.dynamic_range = (-128.0, 127.0)
+        out0.dynamic_range = (-128.0, 127.0)
+        start0.dynamic_range = (-128.0, 127.0)
+        ready0.dynamic_range = (-128.0, 127.0)
+
+    flags = 1 << int(trt.BuilderFlag.STRICT_TYPES)
+
+    if trt_dtype == trt.int8:
+        flags |= 1 << int(trt.BuilderFlag.INT8)
+    elif trt_dtype == trt.float16:
+        flags |= 1 << int(trt.BuilderFlag.FP16)
+
+    config = builder.create_builder_config()
+    config.flags = flags
+    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
+    builder.max_batch_size = max(1, max_batch)
+    try:
+        engine_bytes = builder.build_serialized_network(network, config)
+    except AttributeError:
+        engine = builder.build_engine(network, config)
+        engine_bytes = engine.serialize()
+        del engine
+
+    model_name = tu.get_sequence_model_name(
+        "plan_nobatch" if max_batch == 0 else "plan", dtype
+    )
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(model_version_dir + "/model.plan", "wb") as f:
+        f.write(engine_bytes)
+
+
+def create_plan_dynamic_modelfile(models_dir, model_version, max_batch, dtype, shape):
+    trt_dtype = np_to_trt_dtype(dtype)
+    # Create the model. For now don't implement a proper accumulator
+    # just return 0 if not-ready and 'INPUT'+'START' otherwise...  the
+    # tests know to expect this.
+    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    builder = trt.Builder(TRT_LOGGER)
+    network = builder.create_network(
+        1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    )
+
+    unit_shape = [1] * len(shape)
+    if max_batch != 0:
+        in0 = network.add_input("INPUT", trt_dtype, [-1] + shape)
+        start0 = network.add_input("START", trt_dtype, [-1] + unit_shape)
+        ready0 = network.add_input("READY", trt_dtype, [-1] + unit_shape)
+    else:
+        in0 = network.add_input("INPUT", trt_dtype, shape)
+        start0 = network.add_input("START", trt_dtype, unit_shape)
+        ready0 = network.add_input("READY", trt_dtype, unit_shape)
+
+    add = network.add_elementwise(in0, start0, trt.ElementWiseOperation.SUM)
+    out0 = network.add_elementwise(
+        add.get_output(0), ready0, trt.ElementWiseOperation.PROD
+    )
+
+    out0.get_output(0).name = "OUTPUT"
+    network.mark_output(out0.get_output(0))
+
+    min_shape = []
+    opt_shape = []
+    max_shape = []
+    if max_batch != 0:
+        min_shape = min_shape + [1]
+        opt_shape = opt_shape + [max(1, max_batch)]
+        max_shape = max_shape + [max(1, max_batch)]
+    for i in shape:
+        if i == -1:
+            min_shape = min_shape + [1]
+            opt_shape = opt_shape + [8]
+            max_shape = max_shape + [32]
+        else:
+            min_shape = min_shape + [i]
+            opt_shape = opt_shape + [i]
+            max_shape = max_shape + [i]
+
+    profile = builder.create_optimization_profile()
+    profile.set_shape("INPUT", min_shape, opt_shape, max_shape)
+    if max_batch != 0:
+        profile.set_shape(
+            "START",
+            [1] + unit_shape,
+            [max_batch] + unit_shape,
+            [max_batch] + unit_shape,
+        )
+        profile.set_shape(
+            "READY",
+            [1] + unit_shape,
+            [max_batch] + unit_shape,
+            [max_batch] + unit_shape,
+        )
+    else:
+        profile.set_shape("START", unit_shape, unit_shape, unit_shape)
+        profile.set_shape("READY", unit_shape, unit_shape, unit_shape)
+    config = builder.create_builder_config()
+    config.add_optimization_profile(profile)
+
+    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
+    try:
+        engine_bytes = builder.build_serialized_network(network, config)
+    except AttributeError:
+        engine = builder.build_engine(network, config)
+        engine_bytes = engine.serialize()
+        del engine
+
+    model_name = tu.get_sequence_model_name(
+        "plan_nobatch" if max_batch == 0 else "plan", dtype
+    )
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(model_version_dir + "/model.plan", "wb") as f:
+        f.write(engine_bytes)
+
+
+def create_plan_dynamic_rf_modelfile(
+    models_dir, model_version, max_batch, dtype, shape
+):
+    trt_dtype = np_to_trt_dtype(dtype)
+    trt_memory_format = trt.TensorFormat.LINEAR
+
+    # Create the model. For now don't implement a proper accumulator
+    # just return 0 if not-ready and 'INPUT'+'START' otherwise...  the
+    # tests know to expect this.
+    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    builder = trt.Builder(TRT_LOGGER)
+    network = builder.create_network(
+        1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    )
+
+    unit_shape = [1] * len(shape)
+    if max_batch != 0:
+        in0 = network.add_input("INPUT", trt_dtype, [-1] + shape)
+        start0 = network.add_input("START", trt_dtype, [-1] + unit_shape)
+        ready0 = network.add_input("READY", trt_dtype, [-1] + unit_shape)
+    else:
+        in0 = network.add_input("INPUT", trt_dtype, shape)
+        start0 = network.add_input("START", trt_dtype, unit_shape)
+        ready0 = network.add_input("READY", trt_dtype, unit_shape)
+
+    add = network.add_elementwise(in0, start0, trt.ElementWiseOperation.SUM)
+    out0 = network.add_elementwise(
+        add.get_output(0), ready0, trt.ElementWiseOperation.PROD
+    )
+
+    out0.get_output(0).name = "OUTPUT"
+    network.mark_output(out0.get_output(0))
+
+    out0.get_output(0).dtype = trt_dtype
+
+    in0.allowed_formats = 1 << int(trt_memory_format)
+    start0.allowed_formats = 1 << int(trt_memory_format)
+    ready0.allowed_formats = 1 << int(trt_memory_format)
+    out0.get_output(0).allowed_formats = 1 << int(trt_memory_format)
+
+    if trt_dtype == trt.int8:
+        in0.dynamic_range = (-128.0, 127.0)
+        out0.dynamic_range = (-128.0, 127.0)
+        start0.dynamic_range = (-128.0, 127.0)
+        ready0.dynamic_range = (-128.0, 127.0)
+
+    flags = 1 << int(trt.BuilderFlag.STRICT_TYPES)
+
+    if trt_dtype == trt.int8:
+        flags |= 1 << int(trt.BuilderFlag.INT8)
+    elif trt_dtype == trt.float16:
+        flags |= 1 << int(trt.BuilderFlag.FP16)
+
+    min_shape = []
+    opt_shape = []
+    max_shape = []
+    if max_batch != 0:
+        min_shape = min_shape + [1]
+        opt_shape = opt_shape + [max(1, max_batch)]
+        max_shape = max_shape + [max(1, max_batch)]
+    for i in shape:
+        if i == -1:
+            min_shape = min_shape + [1]
+            opt_shape = opt_shape + [8]
+            max_shape = max_shape + [32]
+        else:
+            min_shape = min_shape + [i]
+            opt_shape = opt_shape + [i]
+            max_shape = max_shape + [i]
+
+    profile = builder.create_optimization_profile()
+    profile.set_shape("INPUT", min_shape, opt_shape, max_shape)
+    if max_batch != 0:
+        profile.set_shape(
+            "START",
+            [1] + unit_shape,
+            [max_batch] + unit_shape,
+            [max_batch] + unit_shape,
+        )
+        profile.set_shape(
+            "READY",
+            [1] + unit_shape,
+            [max_batch] + unit_shape,
+            [max_batch] + unit_shape,
+        )
+    else:
+        profile.set_shape("START", unit_shape, unit_shape, unit_shape)
+        profile.set_shape("READY", unit_shape, unit_shape, unit_shape)
+
+    config = builder.create_builder_config()
+    config.flags = flags
+    config.add_optimization_profile(profile)
+
+    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
+    try:
+        engine_bytes = builder.build_serialized_network(network, config)
+    except AttributeError:
+        engine = builder.build_engine(network, config)
+        engine_bytes = engine.serialize()
+        del engine
+
+    model_name = tu.get_sequence_model_name(
+        "plan_nobatch" if max_batch == 0 else "plan", dtype
+    )
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(model_version_dir + "/model.plan", "wb") as f:
+        f.write(engine_bytes)
+
+
+def create_plan_modelfile(models_dir, model_version, max_batch, dtype, shape):
+    if not tu.validate_for_trt_model(dtype, dtype, dtype, shape, shape, shape):
+        return
+
+    if dtype != np.float32:
+        if not tu.shape_is_fixed(shape):
+            create_plan_dynamic_rf_modelfile(
+                models_dir, model_version, max_batch, dtype, shape
+            )
+        else:
+            create_plan_fixed_rf_modelfile(
+                models_dir, model_version, max_batch, dtype, shape
+            )
+    else:
+        if not tu.shape_is_fixed(shape):
+            create_plan_dynamic_modelfile(
+                models_dir, model_version, max_batch, dtype, shape
+            )
+        else:
+            create_plan_fixed_modelfile(
+                models_dir, model_version, max_batch, dtype, shape
+            )
+
+
+def create_plan_modelconfig(models_dir, model_version, max_batch, dtype, shape):
+    if not tu.validate_for_trt_model(dtype, dtype, dtype, shape, shape, shape):
+        return
+
+    model_name = tu.get_sequence_model_name(
+        "plan_nobatch" if max_batch == 0 else "plan", dtype
+    )
+    config_dir = models_dir + "/" + model_name
+    if FLAGS.tensorrt_shape_io:
+        shape_tensor_dim = len(shape)
+        config = """
+name: "{}"
+platform: "tensorrt_plan"
+max_batch_size: {}
+sequence_batching {{
+  max_sequence_idle_microseconds: 5000000
+  control_input [
+    {{
+      name: "START"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_START
+          {}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }},
+    {{
+      name: "READY"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_READY
+          {}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }}
+  ]
+}}
+input [
+  {{
+    name: "INPUT"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+input [
+  {{
+    name: "SHAPE_INPUT"
+    data_type: TYPE_INT32
+    dims: [ {} ]
+    is_shape_tensor: true
+  }}
+]
+output [
+  {{
+    name: "OUTPUT"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+output [
+  {{
+    name: "RESIZED_OUTPUT"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+output [
+  {{
+    name: "SHAPE_OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ {} ]
+    is_shape_tensor: true
+  }}
+]
+instance_group [
+  {{
+    kind: KIND_GPU
+  }}
+]
+""".format(
+            model_name,
+            max_batch,
+            "int32" if dtype == np.int32 else "fp32",
+            "int32" if dtype == np.int32 else "fp32",
+            np_to_model_dtype(dtype),
+            tu.shape_to_dims_str(shape),
+            shape_tensor_dim,
+            np_to_model_dtype(dtype),
+            tu.shape_to_dims_str(shape),
+            np_to_model_dtype(dtype),
+            tu.shape_to_dims_str(shape),
+            shape_tensor_dim,
+        )
+
+    else:
+        config = """
+name: "{}"
+platform: "tensorrt_plan"
+max_batch_size: {}
+sequence_batching {{
+  max_sequence_idle_microseconds: 5000000
+  control_input [
+    {{
+      name: "START"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_START
+          {}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }},
+    {{
+      name: "READY"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_READY
+          {}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }}
+  ]
+}}
+input [
+  {{
+    name: "INPUT"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+output [
+  {{
+    name: "OUTPUT"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+instance_group [
+  {{
+    kind: KIND_GPU
+  }}
+]
+""".format(
+            model_name,
+            max_batch,
+            "int32" if dtype == np.int32 else "fp32",
+            "int32" if dtype == np.int32 else "fp32",
+            np_to_model_dtype(dtype),
+            tu.shape_to_dims_str(shape),
+            np_to_model_dtype(dtype),
+            tu.shape_to_dims_str(shape),
+        )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+def create_onnx_modelfile(models_dir, model_version, max_batch, dtype, shape):
+    if not tu.validate_for_onnx_model(dtype, dtype, dtype, shape, shape, shape):
+        return
+
+    model_name = tu.get_sequence_model_name(
+        "onnx_nobatch" if max_batch == 0 else "onnx", dtype
+    )
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    # Create the model. For now don't implement a proper accumulator
+    # just return 0 if not-ready and 'INPUT'+'START' otherwise...  the
+    # tests know to expect this.
+    onnx_dtype = np_to_onnx_dtype(dtype)
+    onnx_control_dtype = onnx_dtype
+    onnx_input_shape, idx = tu.shape_to_onnx_shape(shape, 0)
+    onnx_output_shape, idx = tu.shape_to_onnx_shape(shape, idx)
+
+    # If the input is a string then use int32 for operation and just
+    # cast to/from string for input and output.
+    if onnx_dtype == onnx.TensorProto.STRING:
+        onnx_control_dtype = onnx.TensorProto.INT32
+
+    # If input dtype is bool, then use bool type for control and
+    # int32 type for input/output
+    if onnx_dtype == onnx.TensorProto.BOOL:
+        onnx_dtype = onnx.TensorProto.INT32
+
+    batch_dim = [] if max_batch == 0 else [None]
+
+    onnx_input = onnx.helper.make_tensor_value_info(
+        "INPUT", onnx_dtype, batch_dim + onnx_input_shape
+    )
+    onnx_start = onnx.helper.make_tensor_value_info(
+        "START", onnx_control_dtype, batch_dim + [1]
+    )
+    onnx_ready = onnx.helper.make_tensor_value_info(
+        "READY", onnx_control_dtype, batch_dim + [1]
+    )
+    onnx_output = onnx.helper.make_tensor_value_info(
+        "OUTPUT", onnx_dtype, batch_dim + onnx_output_shape
+    )
+
+    internal_input = onnx.helper.make_node("Identity", ["INPUT"], ["_INPUT"])
+
+    # cast int8, int16 input to higher precision int as Onnx Add/Sub operator doesn't support those type
+    # Also casting String data type to int32
+    if (
+        (onnx_dtype == onnx.TensorProto.INT8)
+        or (onnx_dtype == onnx.TensorProto.INT16)
+        or (onnx_dtype == onnx.TensorProto.STRING)
+    ):
+        internal_input = onnx.helper.make_node(
+            "Cast", ["INPUT"], ["_INPUT"], to=onnx.TensorProto.INT32
+        )
+
+    # Convert boolean value to int32 value
+    if onnx_control_dtype == onnx.TensorProto.BOOL:
+        internal_input1 = onnx.helper.make_node(
+            "Cast", ["START"], ["_START"], to=onnx.TensorProto.INT32
+        )
+        internal_input2 = onnx.helper.make_node(
+            "Cast", ["READY"], ["_READY"], to=onnx.TensorProto.INT32
+        )
+        add = onnx.helper.make_node("Add", ["_INPUT", "_START"], ["add"])
+        # Take advantage of knowledge that the READY false value is 0 and true is 1
+        mul = onnx.helper.make_node("Mul", ["_READY", "add"], ["CAST"])
+
+    else:
+        add = onnx.helper.make_node("Add", ["_INPUT", "START"], ["add"])
+        # Take advantage of knowledge that the READY false value is 0 and true is 1
+        mul = onnx.helper.make_node("Mul", ["READY", "add"], ["CAST"])
+
+    cast = onnx.helper.make_node("Cast", ["CAST"], ["OUTPUT"], to=onnx_dtype)
+
+    # Avoid cast from float16 to float16
+    # (bug in Onnx Runtime, cast from float16 to float16 will become cast from float16 to float32)
+    if onnx_dtype == onnx.TensorProto.FLOAT16:
+        cast = onnx.helper.make_node("Identity", ["CAST"], ["OUTPUT"])
+
+    if onnx_control_dtype == onnx.TensorProto.BOOL:
+        onnx_nodes = [internal_input, internal_input1, internal_input2, add, mul, cast]
+    else:
+        onnx_nodes = [internal_input, add, mul, cast]
+    onnx_inputs = [onnx_input, onnx_start, onnx_ready]
+    onnx_outputs = [onnx_output]
+
+    graph_proto = onnx.helper.make_graph(
+        onnx_nodes, model_name, onnx_inputs, onnx_outputs
+    )
+    if FLAGS.onnx_opset > 0:
+        model_opset = onnx.helper.make_operatorsetid("", FLAGS.onnx_opset)
+        model_def = onnx.helper.make_model(
+            graph_proto, producer_name="triton", opset_imports=[model_opset]
+        )
+    else:
+        model_def = onnx.helper.make_model(graph_proto, producer_name="triton")
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    onnx.save(model_def, model_version_dir + "/model.onnx")
+
+
+def create_onnx_modelconfig(models_dir, model_version, max_batch, dtype, shape):
+    if not tu.validate_for_onnx_model(dtype, dtype, dtype, shape, shape, shape):
+        return
+
+    model_name = tu.get_sequence_model_name(
+        "onnx_nobatch" if max_batch == 0 else "onnx", dtype
+    )
+    config_dir = models_dir + "/" + model_name
+
+    if dtype == np.float32:
+        control_type = "fp32"
+    elif dtype == bool:
+        control_type = "bool"
+        dtype = np.int32
+    else:
+        control_type = "int32"
+
+    instance_group_string = """
+instance_group [
+  {
+    kind: KIND_GPU
+  }
+]
+"""
+
+    # [TODO] move create_general_modelconfig() out of emu as it is general
+    # enough for all backends to use
+    config = emu.create_general_modelconfig(
+        model_name,
+        "onnxruntime_onnx",
+        max_batch,
+        [dtype],
+        [shape],
+        [None],
+        [dtype],
+        [shape],
+        [None],
+        [None],
+        force_tensor_number_suffix=False,
+        instance_group_str=instance_group_string,
+    )
+
+    config += """
+sequence_batching {{
+  max_sequence_idle_microseconds: 5000000
+  control_input [
+    {{
+      name: "START"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_START
+          {type}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }},
+    {{
+      name: "READY"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_READY
+          {type}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }}
+  ]
+}}
+""".format(
+        type=control_type
+    )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+def create_libtorch_modelfile(models_dir, model_version, max_batch, dtype, shape):
+    if not tu.validate_for_libtorch_model(
+        dtype, dtype, dtype, shape, shape, shape, max_batch
+    ):
+        return
+
+    torch_dtype = np_to_torch_dtype(dtype)
+    torch_control_type = torch_dtype
+
+    # If input dtype is bool, then use bool type for control and
+    # int32 type for input/output
+    if torch_dtype == torch.bool:
+        torch_dtype = torch.int32
+
+    model_name = tu.get_sequence_model_name(
+        "libtorch_nobatch" if max_batch == 0 else "libtorch", dtype
+    )
+    # handle for -1 (when variable) since can't create tensor with shape of [-1]
+    shape = [abs(ips) for ips in shape]
+
+    class SequenceNet(nn.Module):
+        def __init__(self):
+            super(SequenceNet, self).__init__()
+
+        def forward(self, input0, start0, ready0):
+            tmp = input0 + start0
+            return tmp * ready0
+
+    sequenceModel = SequenceNet()
+    example_input0 = torch.zeros(shape, dtype=torch_dtype)
+    example_input1 = torch.zeros(shape, dtype=torch_control_type)
+    example_input2 = torch.zeros(shape, dtype=torch_control_type)
+
+    # Convert boolean value to int32 value
+    if torch_control_type == torch.bool:
+        example_input1 = example_input1.long()
+        example_input2 = example_input2.long()
+
+    traced = torch.jit.trace(
+        sequenceModel, (example_input0, example_input1, example_input2)
+    )
+
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    traced.save(model_version_dir + "/model.pt")
+
+
+def create_libtorch_modelconfig(models_dir, model_version, max_batch, dtype, shape):
+    if not tu.validate_for_libtorch_model(
+        dtype, dtype, dtype, shape, shape, shape, max_batch
+    ):
+        return
+
+    model_name = tu.get_sequence_model_name(
+        "libtorch_nobatch" if max_batch == 0 else "libtorch", dtype
+    )
+    config_dir = models_dir + "/" + model_name
+
+    if dtype == np.float32:
+        control_type = "fp32"
+    elif dtype == bool:
+        control_type = "bool"
+        dtype = np.int32
+    else:
+        control_type = "int32"
+
+    #  FIX FOR LibTorch
+    config = """
+name: "{}"
+platform: "pytorch_libtorch"
+max_batch_size: {}
+sequence_batching {{
+  max_sequence_idle_microseconds: 5000000
+  control_input [
+    {{
+      name: "START__1"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_START
+          {}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }},
+    {{
+      name: "READY__2"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_READY
+          {}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }}
+  ]
+}}
+input [
+  {{
+    name: "INPUT__0"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+output [
+  {{
+    name: "OUTPUT__0"
+    data_type: {}
+    dims: [ 1 ]
+  }}
+]
+instance_group [
+  {{
+    kind: KIND_GPU
+  }}
+]
+""".format(
+        model_name,
+        max_batch,
+        control_type,
+        control_type,
+        np_to_model_dtype(dtype),
+        tu.shape_to_dims_str(shape),
+        np_to_model_dtype(dtype),
+    )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+def create_openvino_modelfile(models_dir, model_version, max_batch, dtype, shape):
+    batch_dim = (
+        []
+        if max_batch == 0
+        else [
+            max_batch,
+        ]
+    )
+    if not tu.validate_for_openvino_model(
+        dtype, dtype, dtype, batch_dim + shape, batch_dim + shape, batch_dim + shape
+    ):
+        return
+
+    model_name = tu.get_sequence_model_name(
+        "openvino_nobatch" if max_batch == 0 else "openvino", dtype
+    )
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    in0 = ng.parameter(shape=batch_dim + shape, dtype=dtype, name="INPUT")
+    start = ng.parameter(shape=batch_dim + shape, dtype=dtype, name="START")
+    ready = ng.parameter(shape=batch_dim + shape, dtype=dtype, name="READY")
+
+    tmp = ng.add(in0, start)
+    op0 = ng.multiply(tmp, ready, name="OUTPUT")
+
+    function = ng.impl.Function([op0], [in0, start, ready], model_name)
+    ie_network = IENetwork(ng.impl.Function.to_capsule(function))
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    ie_network.serialize(
+        model_version_dir + "/model.xml", model_version_dir + "/model.bin"
+    )
+
+
+def create_openvino_modelconfig(models_dir, model_version, max_batch, dtype, shape):
+    batch_dim = (
+        []
+        if max_batch == 0
+        else [
+            max_batch,
+        ]
+    )
+    if not tu.validate_for_openvino_model(
+        dtype, dtype, dtype, batch_dim + shape, batch_dim + shape, batch_dim + shape
+    ):
+        return
+
+    model_name = tu.get_sequence_model_name(
+        "openvino_nobatch" if max_batch == 0 else "openvino", dtype
+    )
+    config_dir = models_dir + "/" + model_name
+    config = """
+name: "{}"
+backend: "openvino"
+max_batch_size: {}
+sequence_batching {{
+  max_sequence_idle_microseconds: 5000000
+  control_input [
+    {{
+      name: "START"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_START
+          {}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }},
+    {{
+      name: "READY"
+      control [
+        {{
+          kind: CONTROL_SEQUENCE_READY
+          {}_false_true: [ 0, 1 ]
+        }}
+      ]
+    }}
+  ]
+}}
+input [
+  {{
+    name: "INPUT"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+output [
+  {{
+    name: "OUTPUT"
+    data_type: {}
+    dims: [ 1 ]
+  }}
+]
+""".format(
+        model_name,
+        max_batch,
+        "int32" if dtype == np.int32 else "fp32",
+        "int32" if dtype == np.int32 else "fp32",
+        np_to_model_dtype(dtype),
+        tu.shape_to_dims_str(shape),
+        np_to_model_dtype(dtype),
+    )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+def create_shape_tensor_models(models_dir, dtype, shape, no_batch=True):
+    model_version = 1
+
+    create_plan_modelconfig(models_dir, model_version, 8, dtype, shape)
+    create_plan_shape_tensor_modelfile(models_dir, model_version, 8, dtype, shape)
+    if no_batch:
+        create_plan_modelconfig(models_dir, model_version, 0, dtype, shape)
+        create_plan_shape_tensor_modelfile(models_dir, model_version, 0, dtype, shape)
+
+
+def create_models(models_dir, dtype, shape, no_batch=True):
+    model_version = 1
+
+    if FLAGS.graphdef:
+        create_tf_modelconfig(False, models_dir, model_version, 8, dtype, shape)
+        create_tf_modelfile(False, models_dir, model_version, 8, dtype, shape)
+        if no_batch:
+            create_tf_modelconfig(False, models_dir, model_version, 0, dtype, shape)
+            create_tf_modelfile(False, models_dir, model_version, 0, dtype, shape)
+
+    if FLAGS.savedmodel:
+        create_tf_modelconfig(True, models_dir, model_version, 8, dtype, shape)
+        create_tf_modelfile(True, models_dir, model_version, 8, dtype, shape)
+        if no_batch:
+            create_tf_modelconfig(True, models_dir, model_version, 0, dtype, shape)
+            create_tf_modelfile(True, models_dir, model_version, 0, dtype, shape)
+
+    if FLAGS.tensorrt:
+        if dtype == bool:
+            return
+        suffix = []
+        if dtype == np.int8:
+            suffix = [1, 1]
+
+        create_plan_modelconfig(models_dir, model_version, 8, dtype, shape + suffix)
+        create_plan_modelfile(models_dir, model_version, 8, dtype, shape + suffix)
+        if no_batch:
+            create_plan_modelconfig(models_dir, model_version, 0, dtype, shape + suffix)
+            create_plan_modelfile(models_dir, model_version, 0, dtype, shape + suffix)
+
+    if FLAGS.onnx:
+        create_onnx_modelconfig(models_dir, model_version, 8, dtype, shape)
+        create_onnx_modelfile(models_dir, model_version, 8, dtype, shape)
+        if no_batch:
+            create_onnx_modelconfig(models_dir, model_version, 0, dtype, shape)
+            create_onnx_modelfile(models_dir, model_version, 0, dtype, shape)
+
+    # Skip for PyTorch String I/O
+    if FLAGS.libtorch and (dtype != np_dtype_string):
+        create_libtorch_modelconfig(models_dir, model_version, 8, dtype, shape)
+        create_libtorch_modelfile(models_dir, model_version, 8, dtype, shape)
+        if no_batch:
+            create_libtorch_modelconfig(models_dir, model_version, 0, dtype, shape)
+            create_libtorch_modelfile(models_dir, model_version, 0, dtype, shape)
+
+    if FLAGS.openvino:
+        create_openvino_modelconfig(models_dir, model_version, 8, dtype, shape)
+        create_openvino_modelfile(models_dir, model_version, 8, dtype, shape)
+        if no_batch:
+            create_openvino_modelconfig(models_dir, model_version, 0, dtype, shape)
+            create_openvino_modelfile(models_dir, model_version, 0, dtype, shape)
+
+    if FLAGS.ensemble:
+        if dtype == bool:
+            return
+        for pair in emu.platform_types_and_validation():
+            config_shape = shape
+            if pair[0] == "plan" and dtype == np.int8:
+                config_shape = shape + [1, 1]
+            if not pair[1](
+                dtype, dtype, dtype, config_shape, config_shape, config_shape
+            ):
+                continue
+
+            emu.create_sequence_ensemble_modelconfig(
+                pair[0], models_dir, 8, model_version, config_shape, dtype
+            )
+            emu.create_sequence_ensemble_modelfile(
+                pair[0], models_dir, 8, model_version, config_shape, dtype
+            )
+            if no_batch:
+                emu.create_sequence_ensemble_modelconfig(
+                    pair[0], models_dir, 0, model_version, config_shape, dtype
+                )
+                emu.create_sequence_ensemble_modelfile(
+                    pair[0], models_dir, 0, model_version, config_shape, dtype
+                )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--models_dir", type=str, required=True, help="Top-level model directory"
+    )
+    parser.add_argument(
+        "--graphdef",
+        required=False,
+        action="store_true",
+        help="Generate GraphDef models",
+    )
+    parser.add_argument(
+        "--savedmodel",
+        required=False,
+        action="store_true",
+        help="Generate SavedModel models",
+    )
+    parser.add_argument(
+        "--tensorrt",
+        required=False,
+        action="store_true",
+        help="Generate TensorRT PLAN models",
+    )
+    parser.add_argument(
+        "--tensorrt-shape-io",
+        required=False,
+        action="store_true",
+        help="Generate TensorRT PLAN models w/ shape tensor i/o",
+    )
+    parser.add_argument(
+        "--onnx", required=False, action="store_true", help="Generate Onnx models"
+    )
+    parser.add_argument(
+        "--onnx_opset",
+        type=int,
+        required=False,
+        default=0,
+        help="Opset used for Onnx models. Default is to use ONNXRT default",
+    )
+    parser.add_argument(
+        "--libtorch",
+        required=False,
+        action="store_true",
+        help="Generate Pytorch LibTorch models",
+    )
+    parser.add_argument(
+        "--openvino",
+        required=False,
+        action="store_true",
+        help="Generate OpenVino models",
+    )
+    parser.add_argument(
+        "--variable",
+        required=False,
+        action="store_true",
+        help="Used variable-shape tensors for input/output",
+    )
+    parser.add_argument(
+        "--ensemble",
+        required=False,
+        action="store_true",
+        help="Generate ensemble models against the models"
+        + " in all platforms. Note that the models generated"
+        + " are not completed.",
+    )
+    FLAGS, unparsed = parser.parse_known_args()
+
+    if FLAGS.graphdef or FLAGS.savedmodel:
+        import tensorflow as tf
+        from tensorflow.python.framework import graph_io
+
+        tf.compat.v1.disable_eager_execution()
+    if FLAGS.tensorrt or FLAGS.tensorrt_shape_io:
+        import tensorrt as trt
+    if FLAGS.onnx:
+        import onnx
+    if FLAGS.libtorch:
+        import torch
+        from torch import nn
+    if FLAGS.openvino:
+        from openvino.inference_engine import IENetwork
+        import ngraph as ng
+
+    import test_util as tu
+
+    if FLAGS.tensorrt_shape_io:
+        create_shape_tensor_models(
+            FLAGS.models_dir,
+            np.float32,
+            [
+                -1,
+            ],
+        )
+    else:
+        # Tests with models that accept fixed-shape input/output tensors
+        if not FLAGS.variable:
+            create_models(
+                FLAGS.models_dir,
+                np.float32,
+                [
+                    1,
+                ],
+            )
+            create_models(
+                FLAGS.models_dir,
+                np.int32,
+                [
+                    1,
+                ],
+            )
+            create_models(
+                FLAGS.models_dir,
+                np_dtype_string,
+                [
+                    1,
+                ],
+            )
+            create_models(
+                FLAGS.models_dir,
+                bool,
+                [
+                    1,
+                ],
+            )
+
+        # Tests with models that accept variable-shape input/output tensors
+        if FLAGS.variable:
+            create_models(
+                FLAGS.models_dir,
+                np.int32,
+                [
+                    -1,
+                ],
+                False,
+            )
+            create_models(
+                FLAGS.models_dir,
+                np.float32,
+                [
+                    -1,
+                ],
+                False,
+            )
+            create_models(
+                FLAGS.models_dir,
+                np_dtype_string,
+                [
+                    -1,
+                ],
+                False,
+            )
+            create_models(
+                FLAGS.models_dir,
+                bool,
+                [
+                    -1,
+                ],
+                False,
+            )
+
+        if FLAGS.ensemble:
+            # Create nop models used in ensemble
+            for model_dtype in ["TYPE_INT32", "TYPE_FP32"]:
+                for model_shape in [(-1,)]:
+                    emu.create_nop_modelconfig(
+                        FLAGS.models_dir, model_shape, model_dtype
+                    )
diff --git a/qa/common/gen_qa_tf_parameters.py b/qa/common/gen_qa_tf_parameters.py
new file mode 100755
index 0000000000..9c99ba1a6f
--- /dev/null
+++ b/qa/common/gen_qa_tf_parameters.py
@@ -0,0 +1,122 @@
+#!/usr/bin/env python3
+
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import os
+
+import tensorflow as tf
+from tensorflow.python.framework import graph_io
+
+
+def create_graphdefmodel(models_dir, model_name, model_version=1):
+    """A simple tensorflow model that accumulates the INPUT with internal
+    model parameter named VARIABLE and produces OUTPUT.
+    """
+
+    tf.compat.v1.reset_default_graph()
+    tf.compat.v1.disable_eager_execution()
+    input0 = tf.compat.v1.placeholder(
+        tf.int32,
+        [
+            1,
+        ],
+        "INPUT",
+    )
+    variable = tf.compat.v1.get_variable(
+        "VARIABLE",
+        [
+            1,
+        ],
+        initializer=tf.compat.v1.zeros_initializer(),
+        dtype=tf.int32,
+    )
+    tf.add(variable, input0, name="OUTPUT")
+    tf.compat.v1.global_variables_initializer()
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with tf.compat.v1.Session() as sess:
+        graph_io.write_graph(
+            sess.graph.as_graph_def(),
+            model_version_dir,
+            "model.graphdef",
+            as_text=False,
+        )
+
+
+def create_graphdef_modelconfig(models_dir, model_name):
+    config_dir = models_dir + "/" + model_name
+    config = """
+name: "{}"
+platform: "tensorflow_graphdef"
+input [
+  {{
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }}
+]
+output [
+  {{
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }}
+]
+""".format(
+        model_name
+    )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--models_dir", type=str, required=True, help="Top-level model directory"
+    )
+    args = parser.parse_args()
+
+    model_name = "graphdef_variable"
+    create_graphdefmodel(args.models_dir, model_name)
+    create_graphdef_modelconfig(args.models_dir, model_name=model_name)
diff --git a/qa/common/gen_qa_torchtrt_models.py b/qa/common/gen_qa_torchtrt_models.py
new file mode 100755
index 0000000000..5f6ea04581
--- /dev/null
+++ b/qa/common/gen_qa_torchtrt_models.py
@@ -0,0 +1,114 @@
+#!/usr/bin/env python3
+
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import os
+
+import torch
+import torch_tensorrt
+import torchvision
+
+
+def create_resnet50_torchtrt(models_dir, max_batch):
+    model = torchvision.models.resnet50(pretrained=True)
+    model.eval()
+    example_input = torch.rand(1, 3, 224, 224, dtype=torch.float)
+
+    resnet50_ts = torch.jit.trace(model, example_input)
+
+    trt_ts_module = torch_tensorrt.compile(
+        resnet50_ts,
+        inputs=[
+            torch_tensorrt.Input(
+                min_shape=[1, 3, 224, 224],
+                opt_shape=[1, 3, 224, 224],
+                max_shape=[max_batch, 3, 224, 224],
+                dtype=torch.float,
+            )
+        ],
+        enabled_precisions={torch.float},
+    )
+
+    model_name = "resnet50_libtorch"
+
+    model_version = 1
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    torch.jit.save(trt_ts_module, model_version_dir + "/model.pt")
+
+
+def create_resnet50_torchtrt_modelconfig(models_dir, max_batch):
+    model_name = "resnet50_libtorch"
+    config_dir = models_dir + "/" + model_name
+    config = """
+name: "{}"
+backend: "pytorch"
+max_batch_size: {}
+input [
+  {{
+    name: "INPUT__0"
+    data_type: TYPE_FP32
+    format: FORMAT_NCHW
+    dims: [ 3, 224, 224 ]
+  }}
+]
+output [
+  {{
+    name: "OUTPUT__0"
+    data_type: TYPE_FP32
+    dims: [ 1000 ]
+    label_filename: "resnet50_labels.txt"
+  }}
+]
+""".format(
+        model_name, max_batch
+    )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--models_dir", type=str, required=True, help="Top-level model directory"
+    )
+    FLAGS, unparsed = parser.parse_known_args()
+
+    create_resnet50_torchtrt(FLAGS.models_dir, 128)
+    create_resnet50_torchtrt_modelconfig(FLAGS.models_dir, 128)
diff --git a/qa/common/gen_qa_trt_data_dependent_shape.py b/qa/common/gen_qa_trt_data_dependent_shape.py
new file mode 100755
index 0000000000..c6600ed919
--- /dev/null
+++ b/qa/common/gen_qa_trt_data_dependent_shape.py
@@ -0,0 +1,158 @@
+#!/usr/bin/env python3
+
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import os
+
+import numpy as np
+import tensorrt as trt
+import test_util as tu
+from gen_common import np_to_model_dtype, np_to_trt_dtype
+
+
+# The 'nonzero' model that we use for data dependent shape is naturally
+# not support batching, because the layer output is not trivially separable
+# based on the request batch size.
+# input_shape is config shape
+def create_data_dependent_modelfile(
+    models_dir, model_name, input_shape, input_dtype=np.int32, min_dim=1, max_dim=32
+):
+    trt_input_dtype = np_to_trt_dtype(input_dtype)
+
+    # Create the model
+    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    builder = trt.Builder(TRT_LOGGER)
+    network = builder.create_network(
+        1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    )
+
+    # input
+    in0 = network.add_input("INPUT", trt_input_dtype, input_shape)
+
+    # layers
+    non_zero = network.add_non_zero(in0)
+
+    # configure output
+    out0 = non_zero.get_output(0)
+    out0.name = "OUTPUT"
+    network.mark_output(out0)
+
+    # optimization profile
+    min_shape = []
+    opt_shape = []
+    max_shape = []
+    for i in input_shape:
+        if i == -1:
+            min_shape = min_shape + [min_dim]
+            opt_shape = opt_shape + [int((max_dim + min_dim) / 2)]
+            max_shape = max_shape + [max_dim]
+        else:
+            min_shape = min_shape + [i]
+            opt_shape = opt_shape + [i]
+            max_shape = max_shape + [i]
+
+    profile = builder.create_optimization_profile()
+    profile.set_shape("INPUT", min_shape, opt_shape, max_shape)
+    config = builder.create_builder_config()
+    config.add_optimization_profile(profile)
+    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
+
+    # serialized model
+    engine_bytes = builder.build_serialized_network(network, config)
+
+    model_version_dir = models_dir + "/" + model_name + "/1"
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(model_version_dir + "/model.plan", "wb") as f:
+        f.write(engine_bytes)
+
+
+def create_data_dependent_modelconfig(
+    models_dir, model_name, input_shape, input_dtype=np.int32
+):
+    config_dir = models_dir + "/" + model_name
+    config = """
+name: "{}"
+platform: "tensorrt_plan"
+max_batch_size: 0
+input [
+  {{
+    name: "INPUT"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+output [
+  {{
+    name: "OUTPUT"
+    data_type: {}
+    dims: [ {} ]
+   }}
+]
+""".format(
+        model_name,
+        np_to_model_dtype(input_dtype),
+        tu.shape_to_dims_str(input_shape),
+        np_to_model_dtype(np.int32),
+        tu.shape_to_dims_str((len(input_shape), -1)),
+    )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--models_dir", type=str, required=True, help="Top-level model directory"
+    )
+    FLAGS, unparsed = parser.parse_known_args()
+
+    # Fixed input shape
+    create_data_dependent_modelfile(
+        FLAGS.models_dir, "plan_nobatch_nonzero_fixed", (4, 4)
+    )
+    create_data_dependent_modelconfig(
+        FLAGS.models_dir, "plan_nobatch_nonzero_fixed", (4, 4)
+    )
+
+    # Dynamic input shape
+    create_data_dependent_modelfile(
+        FLAGS.models_dir, "plan_nobatch_nonzero_dynamic", (-1, -1)
+    )
+    create_data_dependent_modelconfig(
+        FLAGS.models_dir, "plan_nobatch_nonzero_dynamic", (-1, -1)
+    )
diff --git a/qa/common/gen_qa_trt_format_models.py b/qa/common/gen_qa_trt_format_models.py
new file mode 100755
index 0000000000..e077139aec
--- /dev/null
+++ b/qa/common/gen_qa_trt_format_models.py
@@ -0,0 +1,558 @@
+#!/usr/bin/env python3
+
+# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import os
+
+import numpy as np
+import tensorrt as trt
+import test_util as tu
+from gen_common import np_to_model_dtype, np_to_trt_dtype
+
+np_dtype_string = np.dtype(object)
+
+
+def trt_format_to_string(trt_format):
+    # FIXME uncomment the following formats once TRT used is up-to-date
+    # if trt_format == trt.TensorFormat.CDHW32:
+    #     return "CDHW32"
+    # if trt_format == trt.TensorFormat.DHWC8:
+    #     return "DHWC8"
+    # if trt_format == trt.TensorFormat.HWC:
+    #     return "HWC"
+    if trt_format == trt.TensorFormat.CHW2:
+        return "CHW2"
+    if trt_format == trt.TensorFormat.CHW32:
+        return "CHW32"
+    if trt_format == trt.TensorFormat.LINEAR:
+        return "LINEAR"
+    if trt_format == trt.TensorFormat.CHW4:
+        return "CHW4"
+    if trt_format == trt.TensorFormat.HWC8:
+        return "HWC8"
+    if trt_format == trt.TensorFormat.CHW16:
+        return "CHW16"
+    return "INVALID"
+
+
+def create_plan_dynamic_modelfile(
+    models_dir,
+    max_batch,
+    model_version,
+    input_shape,
+    output0_shape,
+    output1_shape,
+    input_dtype,
+    output0_dtype,
+    output1_dtype,
+    input_memory_format,
+    output_memory_format,
+    min_dim=1,
+    max_dim=64,
+):
+    trt_input_dtype = np_to_trt_dtype(input_dtype)
+    trt_output0_dtype = np_to_trt_dtype(output0_dtype)
+    trt_output1_dtype = np_to_trt_dtype(output1_dtype)
+    trt_input_memory_format = input_memory_format
+    trt_output_memory_format = output_memory_format
+
+    # Create the model
+    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    builder = trt.Builder(TRT_LOGGER)
+    network = builder.create_network(
+        1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    )
+    if max_batch == 0:
+        input_with_batchsize = [i for i in input_shape]
+    else:
+        input_with_batchsize = [-1] + [i for i in input_shape]
+
+    in0 = network.add_input("INPUT0", trt_input_dtype, input_with_batchsize)
+    in1 = network.add_input("INPUT1", trt_input_dtype, input_with_batchsize)
+    add = network.add_elementwise(in0, in1, trt.ElementWiseOperation.SUM)
+    sub = network.add_elementwise(in0, in1, trt.ElementWiseOperation.SUB)
+
+    out0 = network.add_identity(add.get_output(0))
+    out1 = network.add_identity(sub.get_output(0))
+
+    out0.get_output(0).name = "OUTPUT0"
+    out1.get_output(0).name = "OUTPUT1"
+    network.mark_output(out0.get_output(0))
+    network.mark_output(out1.get_output(0))
+
+    out0.get_output(0).dtype = trt_output0_dtype
+    out1.get_output(0).dtype = trt_output1_dtype
+
+    in0.allowed_formats = 1 << int(trt_input_memory_format)
+    in1.allowed_formats = 1 << int(trt_input_memory_format)
+    out0.get_output(0).allowed_formats = 1 << int(trt_output_memory_format)
+    out1.get_output(0).allowed_formats = 1 << int(trt_output_memory_format)
+
+    if trt_input_dtype == trt.int8:
+        in0.dynamic_range = (-128.0, 127.0)
+        in1.dynamic_range = (-128.0, 127.0)
+    if trt_output0_dtype == trt.int8:
+        out0.get_output(0).dynamic_range = (-128.0, 127.0)
+    if trt_output1_dtype == trt.int8:
+        out1.get_output(0).dynamic_range = (-128.0, 127.0)
+
+    min_shape = []
+    opt_shape = []
+    max_shape = []
+    if max_batch != 0:
+        min_shape = min_shape + [1]
+        opt_shape = opt_shape + [max(1, max_batch)]
+        max_shape = max_shape + [max(1, max_batch)]
+    for i in input_shape:
+        if i == -1:
+            min_shape = min_shape + [min_dim]
+            opt_shape = opt_shape + [int((max_dim + min_dim) / 2)]
+            max_shape = max_shape + [max_dim]
+        else:
+            min_shape = min_shape + [i]
+            opt_shape = opt_shape + [i]
+            max_shape = max_shape + [i]
+
+    profile = builder.create_optimization_profile()
+    profile.set_shape("INPUT0", min_shape, opt_shape, max_shape)
+    profile.set_shape("INPUT1", min_shape, opt_shape, max_shape)
+    flags = 1 << int(trt.BuilderFlag.STRICT_TYPES)
+    datatype_set = set([trt_input_dtype, trt_output0_dtype, trt_output1_dtype])
+    for dt in datatype_set:
+        if dt == trt.int8:
+            flags |= 1 << int(trt.BuilderFlag.INT8)
+        elif dt == trt.float16:
+            flags |= 1 << int(trt.BuilderFlag.FP16)
+    config = builder.create_builder_config()
+    config.flags = flags
+    config.add_optimization_profile(profile)
+    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
+    try:
+        engine_bytes = builder.build_serialized_network(network, config)
+    except AttributeError:
+        engine = builder.build_engine(network, config)
+        engine_bytes = engine.serialize()
+        del engine
+
+    # Use a different model name for different kinds of models
+    base_name = "plan_nobatch" if max_batch == 0 else "plan"
+    base_name += (
+        "_"
+        + trt_format_to_string(input_memory_format)
+        + "_"
+        + trt_format_to_string(output_memory_format)
+    )
+    model_name = tu.get_model_name(base_name, input_dtype, output0_dtype, output1_dtype)
+
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(model_version_dir + "/model.plan", "wb") as f:
+        f.write(engine_bytes)
+
+
+def create_plan_fixed_modelfile(
+    models_dir,
+    max_batch,
+    model_version,
+    input_shape,
+    output0_shape,
+    output1_shape,
+    input_dtype,
+    output0_dtype,
+    output1_dtype,
+    input_memory_format,
+    output_memory_format,
+):
+    trt_input_dtype = np_to_trt_dtype(input_dtype)
+    trt_output0_dtype = np_to_trt_dtype(output0_dtype)
+    trt_output1_dtype = np_to_trt_dtype(output1_dtype)
+    trt_input_memory_format = input_memory_format
+    trt_output_memory_format = output_memory_format
+
+    # Create the model
+    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    builder = trt.Builder(TRT_LOGGER)
+    network = builder.create_network()
+    in0 = network.add_input("INPUT0", trt_input_dtype, input_shape)
+    in1 = network.add_input("INPUT1", trt_input_dtype, input_shape)
+    add = network.add_elementwise(in0, in1, trt.ElementWiseOperation.SUM)
+    sub = network.add_elementwise(in0, in1, trt.ElementWiseOperation.SUB)
+
+    out0 = network.add_identity(add.get_output(0))
+    out1 = network.add_identity(sub.get_output(0))
+
+    out0.get_output(0).name = "OUTPUT0"
+    out1.get_output(0).name = "OUTPUT1"
+    network.mark_output(out0.get_output(0))
+    network.mark_output(out1.get_output(0))
+
+    out0.get_output(0).dtype = trt_output0_dtype
+    out1.get_output(0).dtype = trt_output1_dtype
+
+    in0.allowed_formats = 1 << int(trt_input_memory_format)
+    in1.allowed_formats = 1 << int(trt_input_memory_format)
+    out0.get_output(0).allowed_formats = 1 << int(trt_output_memory_format)
+    out1.get_output(0).allowed_formats = 1 << int(trt_output_memory_format)
+
+    if trt_input_dtype == trt.int8:
+        in0.dynamic_range = (-128.0, 127.0)
+        in1.dynamic_range = (-128.0, 127.0)
+    if trt_output0_dtype == trt.int8:
+        out0.get_output(0).dynamic_range = (-128.0, 127.0)
+    if trt_output1_dtype == trt.int8:
+        out1.get_output(0).dynamic_range = (-128.0, 127.0)
+
+    flags = 1 << int(trt.BuilderFlag.STRICT_TYPES)
+    datatype_set = set([trt_input_dtype, trt_output0_dtype, trt_output1_dtype])
+    for dt in datatype_set:
+        if dt == trt.int8:
+            flags |= 1 << int(trt.BuilderFlag.INT8)
+        elif dt == trt.float16:
+            flags |= 1 << int(trt.BuilderFlag.FP16)
+    config = builder.create_builder_config()
+    config.flags = flags
+    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
+    builder.max_batch_size = max(1, max_batch)
+    try:
+        engine_bytes = builder.build_serialized_network(network, config)
+    except AttributeError:
+        engine = builder.build_engine(network, config)
+        engine_bytes = engine.serialize()
+        del engine
+
+    base_name = "plan_nobatch" if max_batch == 0 else "plan"
+    base_name += (
+        "_"
+        + trt_format_to_string(input_memory_format)
+        + "_"
+        + trt_format_to_string(output_memory_format)
+    )
+    model_name = tu.get_model_name(base_name, input_dtype, output0_dtype, output1_dtype)
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(model_version_dir + "/model.plan", "wb") as f:
+        f.write(engine_bytes)
+
+
+def create_plan_modelconfig(
+    models_dir,
+    max_batch,
+    model_version,
+    input_shape,
+    output0_shape,
+    output1_shape,
+    input_dtype,
+    output0_dtype,
+    output1_dtype,
+    input_memory_format,
+    output_memory_format,
+    version_policy,
+):
+    if not tu.validate_for_trt_model(
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+        input_shape,
+        output0_shape,
+        output1_shape,
+    ):
+        return
+
+    # Unpack version policy
+    version_policy_str = "{ latest { num_versions: 1 }}"
+    if version_policy is not None:
+        type, val = version_policy
+        if type == "latest":
+            version_policy_str = "{{ latest {{ num_versions: {} }}}}".format(val)
+        elif type == "specific":
+            version_policy_str = "{{ specific {{ versions: {} }}}}".format(val)
+        else:
+            version_policy_str = "{ all { }}"
+
+    # Use a different model name for different kinds of models
+    base_name = "plan_nobatch" if max_batch == 0 else "plan"
+    base_name += (
+        "_"
+        + trt_format_to_string(input_memory_format)
+        + "_"
+        + trt_format_to_string(output_memory_format)
+    )
+    model_name = tu.get_model_name(base_name, input_dtype, output0_dtype, output1_dtype)
+
+    config_dir = models_dir + "/" + model_name
+    if -1 in input_shape:
+        profile_index = 0
+        config = """
+name: "{}"
+platform: "tensorrt_plan"
+max_batch_size: {}
+version_policy: {}
+input [
+  {{
+    name: "INPUT0"
+    data_type: {}
+    dims: [ {} ]
+  }},
+  {{
+    name: "INPUT1"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+output [
+  {{
+    name: "OUTPUT0"
+    data_type: {}
+    dims: [ {} ]
+   }},
+  {{
+    name: "OUTPUT1"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+instance_group [
+  {{
+      profile:"{}"
+  }}
+]
+""".format(
+            model_name,
+            max_batch,
+            version_policy_str,
+            np_to_model_dtype(input_dtype),
+            tu.shape_to_dims_str(input_shape),
+            np_to_model_dtype(input_dtype),
+            tu.shape_to_dims_str(input_shape),
+            np_to_model_dtype(output0_dtype),
+            tu.shape_to_dims_str(output0_shape),
+            np_to_model_dtype(output1_dtype),
+            tu.shape_to_dims_str(output1_shape),
+            profile_index,
+        )
+    else:
+        config = """
+name: "{}"
+platform: "tensorrt_plan"
+max_batch_size: {}
+version_policy: {}
+input [
+  {{
+    name: "INPUT0"
+    data_type: {}
+    dims: [ {} ]
+  }},
+  {{
+    name: "INPUT1"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+output [
+  {{
+    name: "OUTPUT0"
+    data_type: {}
+    dims: [ {} ]
+   }},
+  {{
+    name: "OUTPUT1"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+""".format(
+            model_name,
+            max_batch,
+            version_policy_str,
+            np_to_model_dtype(input_dtype),
+            tu.shape_to_dims_str(input_shape),
+            np_to_model_dtype(input_dtype),
+            tu.shape_to_dims_str(input_shape),
+            np_to_model_dtype(output0_dtype),
+            tu.shape_to_dims_str(output0_shape),
+            np_to_model_dtype(output1_dtype),
+            tu.shape_to_dims_str(output1_shape),
+        )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+def create_plan_model(
+    models_dir,
+    max_batch,
+    model_version,
+    input_shape,
+    output0_shape,
+    output1_shape,
+    input_dtype,
+    output0_dtype,
+    output1_dtype,
+    input_memory_format,
+    output_memory_format,
+):
+    if not tu.validate_for_trt_model(
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+        input_shape,
+        output0_shape,
+        output1_shape,
+    ):
+        return
+
+    create_plan_modelconfig(
+        models_dir,
+        max_batch,
+        model_version,
+        input_shape,
+        output0_shape,
+        output1_shape,
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+        input_memory_format,
+        output_memory_format,
+        None,
+    )
+
+    if (
+        not tu.shape_is_fixed(input_shape)
+        or not tu.shape_is_fixed(output0_shape)
+        or not tu.shape_is_fixed(output1_shape)
+    ):
+        create_plan_dynamic_modelfile(
+            models_dir,
+            max_batch,
+            model_version,
+            input_shape,
+            output0_shape,
+            output1_shape,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            input_memory_format,
+            output_memory_format,
+        )
+    else:
+        create_plan_fixed_modelfile(
+            models_dir,
+            max_batch,
+            model_version,
+            input_shape,
+            output0_shape,
+            output1_shape,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            input_memory_format,
+            output_memory_format,
+        )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--models_dir", type=str, required=True, help="Top-level model directory"
+    )
+    FLAGS, unparsed = parser.parse_known_args()
+
+    # reformat-free input
+    # Fixed shape
+    create_plan_model(
+        FLAGS.models_dir,
+        0,
+        1,
+        (13, 2, 1),
+        (13, 2, 1),
+        (13, 2, 1),
+        np.float16,
+        np.float16,
+        np.float16,
+        trt.TensorFormat.CHW2,
+        trt.TensorFormat.LINEAR,
+    )
+    create_plan_model(
+        FLAGS.models_dir,
+        8,
+        1,
+        (13, 2, 1),
+        (13, 2, 1),
+        (13, 2, 1),
+        np.float16,
+        np.float16,
+        np.float16,
+        trt.TensorFormat.CHW2,
+        trt.TensorFormat.LINEAR,
+    )
+
+    # Dynamic shape
+    create_plan_model(
+        FLAGS.models_dir,
+        0,
+        1,
+        (-1, 2, 1),
+        (-1, 2, 1),
+        (-1, 2, 1),
+        np.float32,
+        np.float32,
+        np.float32,
+        trt.TensorFormat.CHW32,
+        trt.TensorFormat.LINEAR,
+    )
+    create_plan_model(
+        FLAGS.models_dir,
+        8,
+        1,
+        (-1, 2, 1),
+        (-1, 2, 1),
+        (-1, 2, 1),
+        np.float32,
+        np.float32,
+        np.float32,
+        trt.TensorFormat.CHW32,
+        trt.TensorFormat.LINEAR,
+    )
+
+    # reformat-free output
+    # reformat-free I/O
diff --git a/qa/common/gen_qa_trt_plugin_models.py b/qa/common/gen_qa_trt_plugin_models.py
new file mode 100755
index 0000000000..10c9d5284a
--- /dev/null
+++ b/qa/common/gen_qa_trt_plugin_models.py
@@ -0,0 +1,369 @@
+#!/usr/bin/env python3
+
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import os
+
+import numpy as np
+import tensorrt as trt
+from gen_common import np_to_model_dtype, np_to_trt_dtype
+
+np_dtype_string = np.dtype(object)
+
+TRT_LOGGER = trt.Logger()
+
+trt.init_libnvinfer_plugins(TRT_LOGGER, "")
+PLUGIN_CREATORS = trt.get_plugin_registry().plugin_creator_list
+
+
+def get_trt_plugin(plugin_name):
+    plugin = None
+    field_collection = None
+    for plugin_creator in PLUGIN_CREATORS:
+        if (plugin_creator.name == "Normalize_TRT") and (
+            plugin_name == "Normalize_TRT"
+        ):
+            nbWeights = trt.PluginField(
+                "nbWeights", np.array([1], dtype=np.int32), trt.PluginFieldType.INT32
+            )
+            eps = trt.PluginField(
+                "eps",
+                np.array([0.00001], dtype=np.float32),
+                trt.PluginFieldType.FLOAT32,
+            )
+            weights = trt.PluginField(
+                "weights",
+                np.array([1] * 16, dtype=np.float32),
+                trt.PluginFieldType.FLOAT32,
+            )
+            field_collection = trt.PluginFieldCollection([weights, eps, nbWeights])
+            break
+        elif (plugin_creator.name == "CustomGeluPluginDynamic") and (
+            plugin_name == "CustomGeluPluginDynamic"
+        ):
+            type_id = trt.PluginField(
+                "type_id", np.array([0], np.int32), trt.PluginFieldType.INT32
+            )
+            bias = trt.PluginField(
+                "bias", np.array([[[1]]], np.float32), trt.PluginFieldType.FLOAT32
+            )
+            field_collection = trt.PluginFieldCollection([type_id, bias])
+            break
+        elif (plugin_creator.name == "CustomClipPlugin") and (
+            plugin_name == "CustomClipPlugin"
+        ):
+            min_clip = trt.PluginField(
+                "clipMin",
+                np.array([0.1], dtype=np.float32),
+                trt.PluginFieldType.FLOAT32,
+            )
+            max_clip = trt.PluginField(
+                "clipMax",
+                np.array([0.5], dtype=np.float32),
+                trt.PluginFieldType.FLOAT32,
+            )
+            field_collection = trt.PluginFieldCollection([min_clip, max_clip])
+            break
+
+    if field_collection is None:
+        raise RuntimeError("Plugin not found: " + plugin_name)
+    plugin = plugin_creator.create_plugin(
+        name=plugin_name, field_collection=field_collection
+    )
+
+    return plugin
+
+
+def create_plan_modelfile(
+    models_dir,
+    max_batch,
+    model_version,
+    plugin_name,
+    input_shape,
+    output0_shape,
+    input_dtype,
+    output0_dtype,
+):
+    if not tu.validate_for_trt_model(
+        input_dtype,
+        output0_dtype,
+        output0_dtype,
+        input_shape,
+        output0_shape,
+        output0_shape,
+    ):
+        return
+
+    trt_input_dtype = np_to_trt_dtype(input_dtype)
+
+    model_name = (
+        tu.get_model_name(
+            "plan_nobatch" if max_batch == 0 else "plan",
+            input_dtype,
+            output0_dtype,
+            output0_dtype,
+        )
+        + "_"
+        + plugin_name
+    )
+
+    # using explicit batch is necessary for CustomGeluPluginDynamic
+    if plugin_name == "CustomGeluPluginDynamic":
+        explicit_batch = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+        with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
+            explicit_batch
+        ) as network:
+            input_layer = network.add_input(
+                name="INPUT0", dtype=trt_input_dtype, shape=input_shape
+            )
+            plugin_layer = network.add_plugin_v2(
+                inputs=[input_layer], plugin=get_trt_plugin(plugin_name)
+            )
+            plugin_layer.get_output(0).name = "OUTPUT0"
+            network.mark_output(plugin_layer.get_output(0))
+
+            config = builder.create_builder_config()
+            config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
+
+            try:
+                engine_bytes = builder.build_serialized_network(network, config)
+            except AttributeError:
+                engine = builder.build_engine(network, config)
+                engine_bytes = engine.serialize()
+                del engine
+
+            model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+            try:
+                os.makedirs(model_version_dir)
+            except OSError as ex:
+                pass  # ignore existing dir
+
+            with open(model_version_dir + "/model.plan", "wb") as f:
+                f.write(engine_bytes)
+    else:
+        with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network:
+            input_layer = network.add_input(
+                name="INPUT0", dtype=trt_input_dtype, shape=input_shape
+            )
+            plugin_layer = network.add_plugin_v2(
+                inputs=[input_layer], plugin=get_trt_plugin(plugin_name)
+            )
+            plugin_layer.get_output(0).name = "OUTPUT0"
+            network.mark_output(plugin_layer.get_output(0))
+
+            config = builder.create_builder_config()
+            config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 20)
+            builder.max_batch_size = max(1, max_batch)
+
+            try:
+                engine_bytes = builder.build_serialized_network(network, config)
+            except AttributeError:
+                engine = builder.build_engine(network, config)
+                engine_bytes = engine.serialize()
+                del engine
+
+            model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+            try:
+                os.makedirs(model_version_dir)
+            except OSError as ex:
+                pass  # ignore existing dir
+
+            with open(model_version_dir + "/model.plan", "wb") as f:
+                f.write(engine_bytes)
+
+
+def create_plan_modelconfig(
+    models_dir,
+    max_batch,
+    model_version,
+    plugin_name,
+    input_shape,
+    output0_shape,
+    input_dtype,
+    output0_dtype,
+):
+    if not tu.validate_for_trt_model(
+        input_dtype,
+        output0_dtype,
+        output0_dtype,
+        input_shape,
+        output0_shape,
+        output0_shape,
+    ):
+        return
+
+    version_policy_str = "{ latest { num_versions: 1 }}"
+
+    # Use a different model name for the non-batching variant
+    model_name = (
+        tu.get_model_name(
+            "plan_nobatch" if max_batch == 0 else "plan",
+            input_dtype,
+            output0_dtype,
+            output0_dtype,
+        )
+        + "_"
+        + plugin_name
+    )
+    config_dir = models_dir + "/" + model_name
+    config = """
+name: "{}"
+platform: "tensorrt_plan"
+max_batch_size: {}
+version_policy: {}
+input [
+  {{
+    name: "INPUT0"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+output [
+  {{
+    name: "OUTPUT0"
+    data_type: {}
+    dims: [ {} ]
+   }}
+]
+""".format(
+        model_name,
+        max_batch,
+        version_policy_str,
+        np_to_model_dtype(input_dtype),
+        tu.shape_to_dims_str(input_shape),
+        np_to_model_dtype(output0_dtype),
+        tu.shape_to_dims_str(output0_shape),
+    )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+def create_plugin_models(models_dir):
+    model_version = 1
+
+    # custom CustomClipPlugin
+    create_plan_modelconfig(
+        models_dir,
+        8,
+        model_version,
+        "CustomClipPlugin",
+        (16,),
+        (16,),
+        np.float32,
+        np.float32,
+    )
+    create_plan_modelfile(
+        models_dir,
+        8,
+        model_version,
+        "CustomClipPlugin",
+        (16,),
+        (16,),
+        np.float32,
+        np.float32,
+    )
+
+    # default CustomGeluPluginDynamic plugin
+    create_plan_modelconfig(
+        models_dir,
+        0,
+        model_version,
+        "CustomGeluPluginDynamic",
+        (16, 1, 1),
+        (16, 1, 1),
+        np.float32,
+        np.float32,
+    )
+    create_plan_modelfile(
+        models_dir,
+        0,
+        model_version,
+        "CustomGeluPluginDynamic",
+        (16, 1, 1),
+        (16, 1, 1),
+        np.float32,
+        np.float32,
+    )
+
+    # default Normalize_TRT
+    create_plan_modelconfig(
+        models_dir,
+        8,
+        model_version,
+        "Normalize_TRT",
+        (
+            16,
+            16,
+            16,
+        ),
+        (
+            16,
+            16,
+            16,
+        ),
+        np.float32,
+        np.float32,
+    )
+    create_plan_modelfile(
+        models_dir,
+        8,
+        model_version,
+        "Normalize_TRT",
+        (
+            16,
+            16,
+            16,
+        ),
+        (
+            16,
+            16,
+            16,
+        ),
+        np.float32,
+        np.float32,
+    )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--models_dir", type=str, required=True, help="Top-level model directory"
+    )
+    FLAGS, unparsed = parser.parse_known_args()
+
+    import test_util as tu
+
+    create_plugin_models(FLAGS.models_dir)
diff --git a/qa/common/gen_tag_sigdef.py b/qa/common/gen_tag_sigdef.py
new file mode 100755
index 0000000000..9c0c5ffbf7
--- /dev/null
+++ b/qa/common/gen_tag_sigdef.py
@@ -0,0 +1,290 @@
+#!/usr/bin/env python3
+
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import os
+from builtins import range
+
+import gen_ensemble_model_utils as gu
+import tensorflow.compat.v1 as tf
+from tensorflow.python.framework import ops
+from tensorflow.python.saved_model import builder, signature_constants, tag_constants
+
+"""Create SavedModels that contains multiple tags and multiple signature defs"""
+
+
+def create_savedmodel(
+    models_dir,
+    model_version=1,
+    dims=16,
+    model_name="sig_tag",
+    tag_name="testTag",
+    signature_def_name="testSigDef",
+    different_io=False,
+):
+    """
+    Creates one SavedModel with four variants of the model based on provided tag and signature_def.
+    The models multiplies the input tensor by a multiplier and the multiplier value is different for each variant.
+    Naming convention and config:
+    <model_name>_0: tag: "serve",    signature_def: "serving_default",    multiplier 1
+    <model_name>_1: tag: "serve",    signature_def: <signature_def_name>, multiplier 2
+    <model_name>_2: tag: <tag_name>, signature_def: "serving_default",    multiplier 3
+    <model_name>_3: tag: <tag_name>, signature_def: <signature_def_name>, multiplier 4
+
+    If different_io is true, there will be two variants of the model created.
+    The variants will have different numbers of inputs and outputs.
+    Alternate naming convention and config:
+    <model_name>0: tag: "serve",    signature_def: "serving_default",    two inputs/outputs
+    <model_name>1: tag: <tag_name>, signature_def: <signature_def_name>, one input/output
+    """
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with tf.Session() as sess:
+        input_tensor = tf.placeholder(tf.float32, [dims], "TENSOR_INPUT")
+
+        # tag:"serve", signature_def:"serving_default"
+        multiplier_0 = tf.constant(1.0, name="multiplier_0")
+        # tag:"serve", signature_def:signature_def_name
+        multiplier_1 = tf.constant(2.0, name="multiplier_1")
+        # tag:tag_name, signature_def:"serving_default"
+        multiplier_2 = tf.constant(3.0, name="multiplier_2")
+        # tag:tag_name, signature_def:signature_def_name
+        multiplier_3 = tf.constant(4.0, name="multiplier_3")
+
+        output_tensor_0 = tf.multiply(multiplier_0, input_tensor, name="TENSOR_OUTPUT")
+        output_tensor_1 = tf.multiply(multiplier_1, input_tensor, name="TENSOR_OUTPUT")
+        output_tensor_2 = tf.multiply(multiplier_2, input_tensor, name="TENSOR_OUTPUT")
+        output_tensor_3 = tf.multiply(multiplier_3, input_tensor, name="TENSOR_OUTPUT")
+
+        # build_tensor_info_op could be used if build_tensor_info is deprecated
+        input_tensor_info = tf.saved_model.utils.build_tensor_info(input_tensor)
+        output_tensor_info_0 = tf.saved_model.utils.build_tensor_info(output_tensor_0)
+        output_tensor_info_1 = tf.saved_model.utils.build_tensor_info(output_tensor_1)
+        output_tensor_info_2 = tf.saved_model.utils.build_tensor_info(output_tensor_2)
+        output_tensor_info_3 = tf.saved_model.utils.build_tensor_info(output_tensor_3)
+
+        # Using predict method name because simple save uses it
+        # tag:"serve", signature_def:"serving_default"
+        signature_0 = tf.saved_model.signature_def_utils.build_signature_def(
+            inputs={"INPUT": input_tensor_info},
+            outputs={"OUTPUT": output_tensor_info_0},
+            method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME,
+        )
+        # tag:"serve", signature_def:signature_def_name
+        signature_1 = tf.saved_model.signature_def_utils.build_signature_def(
+            inputs={"INPUT": input_tensor_info},
+            outputs={"OUTPUT": output_tensor_info_1},
+            method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME,
+        )
+        # tag:tag_name, signature_def:"serving_default"
+        signature_2 = tf.saved_model.signature_def_utils.build_signature_def(
+            inputs={"INPUT": input_tensor_info},
+            outputs={"OUTPUT": output_tensor_info_2},
+            method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME,
+        )
+        # tag:tag_name, signature_def:signature_def_name
+        signature_3 = tf.saved_model.signature_def_utils.build_signature_def(
+            inputs={"INPUT": input_tensor_info},
+            outputs={"OUTPUT": output_tensor_info_3},
+            method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME,
+        )
+        # tag:tag_name, signature_def:signature_def_name, two inputs/outputs
+        signature_4 = tf.saved_model.signature_def_utils.build_signature_def(
+            inputs={"INPUT": input_tensor_info, "INPUT1": input_tensor_info},
+            outputs={"OUTPUT": output_tensor_info_0, "OUTPUT1": output_tensor_info_1},
+            method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME,
+        )
+
+        b = builder.SavedModelBuilder(model_version_dir + "/model.savedmodel")
+
+        if different_io:
+            b.add_meta_graph_and_variables(
+                sess,
+                tags=[tag_name],
+                signature_def_map={signature_def_name: signature_0},
+                assets_collection=ops.get_collection(ops.GraphKeys.ASSET_FILEPATHS),
+                clear_devices=True,
+            )
+            b.add_meta_graph(
+                tags=[tag_constants.SERVING],
+                signature_def_map={
+                    signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature_4
+                },
+                assets_collection=ops.get_collection(ops.GraphKeys.ASSET_FILEPATHS),
+                clear_devices=True,
+            )
+        else:
+            signature_def_map_0 = {
+                signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature_0,
+                signature_def_name: signature_1,
+            }
+            signature_def_map_1 = {
+                signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature_2,
+                signature_def_name: signature_3,
+            }
+
+            b.add_meta_graph_and_variables(
+                sess,
+                tags=[tag_constants.SERVING],
+                signature_def_map=signature_def_map_0,
+                assets_collection=ops.get_collection(ops.GraphKeys.ASSET_FILEPATHS),
+                clear_devices=True,
+            )
+            b.add_meta_graph(
+                tags=[tag_name],
+                signature_def_map=signature_def_map_1,
+                assets_collection=ops.get_collection(ops.GraphKeys.ASSET_FILEPATHS),
+                clear_devices=True,
+            )
+
+        b.save()
+
+
+def create_savedmodel_modelconfig(
+    models_dir,
+    model_version=1,
+    dims=16,
+    model_name="sig_tag",
+    tag_name="testTag",
+    signature_def_name="testSigDef",
+):
+    config_dir = models_dir + "/" + model_name
+    config = """
+name: "{}"
+platform: "tensorflow_savedmodel"
+input [
+  {{
+    name: "INPUT"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+output [
+  {{
+    name: "OUTPUT"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+parameters: {{
+key: "TF_GRAPH_TAG"
+value: {{
+string_value: "{}"
+}}
+}}
+parameters: {{
+key: "TF_SIGNATURE_DEF"
+value: {{
+string_value: "{}"
+}}
+}}
+""".format(
+        model_name,
+        gu.np_to_model_dtype(tf.float32),
+        str(dims),
+        gu.np_to_model_dtype(tf.float32),
+        str(dims),
+        tag_name,
+        signature_def_name,
+    )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser(description="getting model output dir")
+    parser.add_argument("--dir", help="directory to run model in", required=True)
+    args = parser.parse_args()
+    base_dir = args.dir
+    base_model_name = "sig_tag"
+    base_tag = "serve"
+    test_tag = "testTag"
+    base_sig_def = "serving_default"
+    test_sig_def = "testSigDef"
+
+    for i in range(4):
+        model_name = base_model_name + str(i)
+        create_savedmodel(
+            base_dir,
+            model_name=model_name,
+            tag_name=test_tag,
+            signature_def_name=test_sig_def,
+        )
+    create_savedmodel(
+        base_dir,
+        model_name=base_model_name + "_different_io",
+        tag_name=test_tag,
+        signature_def_name=test_sig_def,
+        different_io=True,
+    )
+    create_savedmodel_modelconfig(
+        base_dir,
+        model_name="sig_tag0",
+        tag_name=base_tag,
+        signature_def_name=base_sig_def,
+    )
+    create_savedmodel_modelconfig(
+        base_dir,
+        model_name="sig_tag1",
+        tag_name=base_tag,
+        signature_def_name=test_sig_def,
+    )
+    create_savedmodel_modelconfig(
+        base_dir,
+        model_name="sig_tag2",
+        tag_name=test_tag,
+        signature_def_name=base_sig_def,
+    )
+    create_savedmodel_modelconfig(
+        base_dir,
+        model_name="sig_tag3",
+        tag_name=test_tag,
+        signature_def_name=test_sig_def,
+    )
+    create_savedmodel_modelconfig(
+        base_dir,
+        model_name="sig_tag_different_io",
+        tag_name=test_tag,
+        signature_def_name=test_sig_def,
+    )
diff --git a/qa/common/infer_test.py b/qa/common/infer_test.py
new file mode 100755
index 0000000000..21cf630e39
--- /dev/null
+++ b/qa/common/infer_test.py
@@ -0,0 +1,220 @@
+#!/usr/bin/env python3
+
+# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import os
+import unittest
+
+import infer_util as iu
+import numpy as np
+import test_util as tu
+
+np_dtype_string = np.dtype(object)
+
+# Allow caller to setup specific set of backends to test
+DEFAULT_BACKENDS = "graphdef savedmodel plan onnx libtorch"
+TEST_BACKENDS = os.environ.get("BACKENDS", DEFAULT_BACKENDS).split()
+
+
+class InferTest(tu.TestResultCollector):
+    def _full_exact(
+        self, input_dtype, output0_dtype, output1_dtype, output0_raw, output1_raw, swap
+    ):
+        def _infer_exact_helper(
+            tester,
+            pf,
+            tensor_shape,
+            batch_size,
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            output0_raw=True,
+            output1_raw=True,
+            model_version=None,
+            swap=False,
+            outputs=("OUTPUT0", "OUTPUT1"),
+            use_http=True,
+            use_grpc=True,
+            skip_request_id_check=False,
+            use_streaming=True,
+            correlation_id=0,
+        ):
+            for bs in (1, batch_size):
+                iu.infer_exact(
+                    tester,
+                    pf,
+                    (bs,) + tensor_shape,
+                    bs,
+                    input_dtype,
+                    output0_dtype,
+                    output1_dtype,
+                    output0_raw=output0_raw,
+                    output1_raw=output1_raw,
+                    model_version=model_version,
+                    swap=swap,
+                    outputs=outputs,
+                    use_http=use_http,
+                    use_grpc=use_grpc,
+                    skip_request_id_check=skip_request_id_check,
+                    use_streaming=use_streaming,
+                    correlation_id=correlation_id,
+                )
+
+        input_size = 16
+
+        if tu.validate_for_tf_model(
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            (input_size,),
+            (input_size,),
+            (input_size,),
+        ):
+            for pf in ["graphdef", "savedmodel"]:
+                if pf in TEST_BACKENDS:
+                    _infer_exact_helper(
+                        self,
+                        pf,
+                        (input_size,),
+                        8,
+                        input_dtype,
+                        output0_dtype,
+                        output1_dtype,
+                        output0_raw=output0_raw,
+                        output1_raw=output1_raw,
+                        swap=swap,
+                    )
+
+        if tu.validate_for_trt_model(
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            (input_size, 1, 1),
+            (input_size, 1, 1),
+            (input_size, 1, 1),
+        ):
+            if "plan" in TEST_BACKENDS:
+                if input_dtype == np.int8:
+                    shape = (input_size, 1, 1)
+                else:
+                    shape = (input_size,)
+                _infer_exact_helper(
+                    self,
+                    "plan",
+                    shape,
+                    8,
+                    input_dtype,
+                    output0_dtype,
+                    output1_dtype,
+                    output0_raw=output0_raw,
+                    output1_raw=output1_raw,
+                    swap=swap,
+                )
+
+        if tu.validate_for_onnx_model(
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            (input_size,),
+            (input_size,),
+            (input_size,),
+        ):
+            if "onnx" in TEST_BACKENDS:
+                _infer_exact_helper(
+                    self,
+                    "onnx",
+                    (input_size,),
+                    8,
+                    input_dtype,
+                    output0_dtype,
+                    output1_dtype,
+                    output0_raw=output0_raw,
+                    output1_raw=output1_raw,
+                    swap=swap,
+                )
+
+        # Skip for batched string I/O
+        if tu.validate_for_libtorch_model(
+            input_dtype,
+            output0_dtype,
+            output1_dtype,
+            (input_size,),
+            (input_size,),
+            (input_size,),
+            8,
+        ):
+            if "libtorch" in TEST_BACKENDS:
+                _infer_exact_helper(
+                    self,
+                    "libtorch",
+                    (input_size,),
+                    8,
+                    input_dtype,
+                    output0_dtype,
+                    output1_dtype,
+                    output0_raw=output0_raw,
+                    output1_raw=output1_raw,
+                    swap=swap,
+                )
+
+    def test_raw_fff(self):
+        self._full_exact(
+            np.float32,
+            np.float32,
+            np.float32,
+            output0_raw=True,
+            output1_raw=True,
+            swap=True,
+        )
+
+    def test_raw_ooo(self):
+        self._full_exact(
+            np_dtype_string,
+            np_dtype_string,
+            np_dtype_string,
+            output0_raw=True,
+            output1_raw=True,
+            swap=False,
+        )
+
+    def test_class_fff(self):
+        self._full_exact(
+            np.float32,
+            np.float32,
+            np.float32,
+            output0_raw=False,
+            output1_raw=False,
+            swap=True,
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/common/infer_util.py b/qa/common/infer_util.py
old mode 100644
new mode 100755
index 50084e616f..9a181c1d29
--- a/qa/common/infer_util.py
+++ b/qa/common/infer_util.py
@@ -1,4 +1,6 @@
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+#!/usr/bin/env python3
+
+# Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -24,11 +26,40 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import os
+import sys
+from functools import partial
+
 import numpy as np
-from tensorrtserver.api import *
+import shm_util as su
 import test_util as tu
+import tritonclient.grpc as grpcclient
+import tritonclient.http as httpclient
+from tritonclient.utils import *
+
+if sys.version_info >= (3, 0):
+    import queue
+else:
+    import Queue as queue
+
+# unicode() doesn't exist on python3, for how we use it the
+# corresponding function is bytes()
+if sys.version_info.major == 3:
+    unicode = bytes
+
+_seen_request_ids = set()
+
+# By default, find tritonserver on "localhost", but can be overridden
+# with TRITONSERVER_IPADDR envvar
+_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")
+
+
+def _unique_request_id():
+    if len(_seen_request_ids) == 0:
+        return 1
+    else:
+        return max(_seen_request_ids) + 1
 
-_last_request_id = 0
 
 def _range_repr_dtype(dtype):
     if dtype == np.float64:
@@ -37,121 +68,1241 @@ def _range_repr_dtype(dtype):
         return np.int16
     elif dtype == np.float16:
         return np.int8
+    elif dtype == np.object_:  # TYPE_STRING
+        return np.int32
     return dtype
 
-def infer_exact(tester, pf, tensor_shape, batch_size, req_raw,
-                input_dtype, output0_dtype, output1_dtype,
-                model_version=None, swap=False,
-                outputs=("OUTPUT0", "OUTPUT1"), use_http=True, use_grpc=True,
-                skip_request_id_check=False):
-    tester.assertTrue(use_http or use_grpc)
-    protocols = []
+
+def serialize_byte_tensor_list(tensor_values):
+    tensor_list = []
+    for tensor_value in tensor_values:
+        tensor_list.append(serialize_byte_tensor(tensor_value))
+    return tensor_list
+
+
+class UserData:
+    def __init__(self):
+        self._completed_requests = queue.Queue()
+
+
+# Callback function used for async_stream_infer()
+def completion_callback(user_data, result, error):
+    # passing error raise and handling out
+    user_data._completed_requests.put((result, error))
+
+
+# Perform inference using an "addsum" type verification backend.
+def infer_exact(
+    tester,
+    pf,
+    tensor_shape,
+    batch_size,
+    input_dtype,
+    output0_dtype,
+    output1_dtype,
+    output0_raw=True,
+    output1_raw=True,
+    model_version=None,
+    swap=False,
+    outputs=("OUTPUT0", "OUTPUT1"),
+    use_http=True,
+    use_grpc=True,
+    use_http_json_tensors=True,
+    skip_request_id_check=False,
+    use_streaming=True,
+    correlation_id=0,
+    shm_region_names=None,
+    precreated_shm_regions=None,
+    use_system_shared_memory=False,
+    use_cuda_shared_memory=False,
+    priority=0,
+    # 60 sec is the default value for L0_infer_valgrind
+    network_timeout=60.0,
+):
+    # Lazy shm imports...
+    if use_system_shared_memory:
+        import tritonclient.utils.shared_memory as shm
+    if use_cuda_shared_memory:
+        import tritonclient.utils.cuda_shared_memory as cudashm
+
+    tester.assertTrue(use_http or use_grpc or use_streaming)
+    # configs [ url, protocol, async stream, binary data ]
+    configs = []
     if use_http:
-        protocols.append(("localhost:8000", ProtocolType.HTTP))
+        configs.append((f"{_tritonserver_ipaddr}:8000", "http", False, True))
+        if output0_raw == output1_raw:
+            # Float16 not supported for Input and Output via JSON
+            if (
+                use_http_json_tensors
+                and (input_dtype != np.float16)
+                and (output0_dtype != np.float16)
+                and (output1_dtype != np.float16)
+            ):
+                configs.append((f"{_tritonserver_ipaddr}:8000", "http", False, False))
     if use_grpc:
-        protocols.append(("localhost:8001", ProtocolType.GRPC))
+        configs.append((f"{_tritonserver_ipaddr}:8001", "grpc", False, False))
+    if use_streaming:
+        configs.append((f"{_tritonserver_ipaddr}:8001", "grpc", True, False))
+
+    # outputs are sum and difference of inputs so set max input
+    # values so that they will not overflow the output. This
+    # allows us to do an exact match. For float types use 8, 16,
+    # 32 int range for fp 16, 32, 64 respectively. When getting
+    # class outputs the result value/probability is returned as a
+    # float so must use fp32 range in that case.
+    rinput_dtype = _range_repr_dtype(input_dtype)
+    routput0_dtype = _range_repr_dtype(output0_dtype if output0_raw else np.float32)
+    routput1_dtype = _range_repr_dtype(output1_dtype if output1_raw else np.float32)
+    val_min = (
+        max(
+            np.iinfo(rinput_dtype).min,
+            np.iinfo(routput0_dtype).min,
+            np.iinfo(routput1_dtype).min,
+        )
+        / 2
+    )
+    val_max = (
+        min(
+            np.iinfo(rinput_dtype).max,
+            np.iinfo(routput0_dtype).max,
+            np.iinfo(routput1_dtype).max,
+        )
+        / 2
+    )
+
+    input0_array = np.random.randint(
+        low=val_min, high=val_max, size=tensor_shape, dtype=rinput_dtype
+    )
+    input1_array = np.random.randint(
+        low=val_min, high=val_max, size=tensor_shape, dtype=rinput_dtype
+    )
+    if input_dtype != np.object_:
+        input0_array = input0_array.astype(input_dtype)
+        input1_array = input1_array.astype(input_dtype)
 
-    for pair in protocols:
+    # for unsigned data type, the value being subtracted must be less than the
+    # value it is subtracted from, to avoid overflow.
+    if val_min == 0:
+        # swap element if the element at input 0 < input 1
+        tmp = np.where(input0_array < input1_array, input1_array, input0_array)
+        input1_array = np.where(input0_array < input1_array, input0_array, input1_array)
+        input0_array = tmp
+
+    if not swap:
+        output0_array = input0_array + input1_array
+        output1_array = input0_array - input1_array
+    else:
+        output0_array = input0_array - input1_array
+        output1_array = input0_array + input1_array
+
+    if output0_dtype == np.object_:
+        output0_array = np.array(
+            [unicode(str(x), encoding="utf-8") for x in (output0_array.flatten())],
+            dtype=object,
+        ).reshape(output0_array.shape)
+    else:
+        output0_array = output0_array.astype(output0_dtype)
+    if output1_dtype == np.object_:
+        output1_array = np.array(
+            [unicode(str(x), encoding="utf-8") for x in (output1_array.flatten())],
+            dtype=object,
+        ).reshape(output1_array.shape)
+    else:
+        output1_array = output1_array.astype(output1_dtype)
+
+    if input_dtype == np.object_:
+        in0n = np.array(
+            [str(x) for x in input0_array.reshape(input0_array.size)], dtype=object
+        )
+        input0_array = in0n.reshape(input0_array.shape)
+        in1n = np.array(
+            [str(x) for x in input1_array.reshape(input1_array.size)], dtype=object
+        )
+        input1_array = in1n.reshape(input1_array.shape)
+
+    # prepend size of string to output string data
+    if output0_dtype == np.object_:
+        if batch_size == 1:
+            output0_array_tmp = serialize_byte_tensor_list([output0_array])
+        else:
+            output0_array_tmp = serialize_byte_tensor_list(output0_array)
+    else:
+        output0_array_tmp = output0_array
+
+    if output1_dtype == np.object_:
+        if batch_size == 1:
+            output1_array_tmp = serialize_byte_tensor_list([output1_array])
+        else:
+            output1_array_tmp = serialize_byte_tensor_list(output1_array)
+    else:
+        output1_array_tmp = output1_array
+
+    if output0_dtype == np.object_:
+        output0_byte_size = sum([serialized_byte_size(o0) for o0 in output0_array_tmp])
+    else:
+        output0_byte_size = sum([o0.nbytes for o0 in output0_array_tmp])
+
+    if output1_dtype == np.object_:
+        output1_byte_size = sum([serialized_byte_size(o1) for o1 in output1_array_tmp])
+    else:
+        output1_byte_size = sum([o1.nbytes for o1 in output1_array_tmp])
+
+    if batch_size == 1:
+        input0_list = [input0_array]
+        input1_list = [input1_array]
+    else:
+        input0_list = [x for x in input0_array]
+        input1_list = [x for x in input1_array]
+
+    # Serialization of string tensors in the case of shared memory must be done manually
+    if input_dtype == np.object_:
+        input0_list_tmp = serialize_byte_tensor_list(input0_list)
+        input1_list_tmp = serialize_byte_tensor_list(input1_list)
+    else:
+        input0_list_tmp = input0_list
+        input1_list_tmp = input1_list
+
+    if input_dtype == np.object_:
+        input0_byte_size = sum([serialized_byte_size(i0) for i0 in input0_list_tmp])
+        input1_byte_size = sum([serialized_byte_size(i1) for i1 in input1_list_tmp])
+    else:
+        input0_byte_size = sum([i0.nbytes for i0 in input0_list_tmp])
+        input1_byte_size = sum([i1.nbytes for i1 in input1_list_tmp])
+
+    if model_version is not None:
+        model_version = str(model_version)
+    else:
+        model_version = ""
+
+    # Run inference and check results for each config
+    inferAndCheckResults(
+        tester,
+        configs,
+        pf,
+        batch_size,
+        model_version,
+        input_dtype,
+        output0_dtype,
+        output1_dtype,
+        tensor_shape,
+        input0_array,
+        input1_array,
+        output0_array,
+        output1_array,
+        output0_raw,
+        output1_raw,
+        outputs,
+        precreated_shm_regions,
+        input0_list_tmp,
+        input1_list_tmp,
+        shm_region_names,
+        input0_byte_size,
+        input1_byte_size,
+        output0_byte_size,
+        output1_byte_size,
+        use_system_shared_memory,
+        use_cuda_shared_memory,
+        network_timeout,
+        skip_request_id_check,
+    )
+
+
+def inferAndCheckResults(
+    tester,
+    configs,
+    pf,
+    batch_size,
+    model_version,
+    input_dtype,
+    output0_dtype,
+    output1_dtype,
+    tensor_shape,
+    input0_array,
+    input1_array,
+    output0_array,
+    output1_array,
+    output0_raw,
+    output1_raw,
+    outputs,
+    precreated_shm_regions,
+    input0_list_tmp,
+    input1_list_tmp,
+    shm_region_names,
+    input0_byte_size,
+    input1_byte_size,
+    output0_byte_size,
+    output1_byte_size,
+    use_system_shared_memory,
+    use_cuda_shared_memory,
+    network_timeout,
+    skip_request_id_check,
+):
+    # Lazy shm imports...
+    if use_system_shared_memory:
+        import tritonclient.utils.shared_memory as shm
+    if use_cuda_shared_memory:
+        import tritonclient.utils.cuda_shared_memory as cudashm
+    num_classes = 3
+
+    # Get model platform
+    model_name = tu.get_model_name(pf, input_dtype, output0_dtype, output1_dtype)
+    if configs[0][1] == "http":
+        metadata_client = httpclient.InferenceServerClient(configs[0][0], verbose=True)
+        metadata = metadata_client.get_model_metadata(model_name)
+        platform = metadata["platform"]
+    else:
+        metadata_client = grpcclient.InferenceServerClient(configs[0][0], verbose=True)
+        metadata = metadata_client.get_model_metadata(model_name)
+        platform = metadata.platform
+
+    INPUT0 = "INPUT0"
+    INPUT1 = "INPUT1"
+
+    if platform == "pytorch_libtorch":
+        OUTPUT0 = "OUTPUT__0"
+        OUTPUT1 = "OUTPUT__1"
+    else:
+        OUTPUT0 = "OUTPUT0"
+        OUTPUT1 = "OUTPUT1"
+
+    # Create system/cuda shared memory regions if needed
+    shm_regions, shm_handles = su.create_set_shm_regions(
+        input0_list_tmp,
+        input1_list_tmp,
+        output0_byte_size,
+        output1_byte_size,
+        outputs,
+        shm_region_names,
+        precreated_shm_regions,
+        use_system_shared_memory,
+        use_cuda_shared_memory,
+    )
+    for config in configs:
         model_name = tu.get_model_name(pf, input_dtype, output0_dtype, output1_dtype)
 
-        # outputs are sum and difference of inputs so set max input
-        # values so that they will not overflow the output. This
-        # allows us to do an exact match. For float types use 8, 16,
-        # 32 int range for fp 16, 32, 64 respectively. When getting
-        # class outputs the result value/probability is returned as a
-        # float so must use fp32 range in that case.
-        rinput_dtype = _range_repr_dtype(input_dtype)
-        routput0_dtype = _range_repr_dtype(output0_dtype if req_raw else np.float32)
-        routput1_dtype = _range_repr_dtype(output1_dtype if req_raw else np.float32)
-        val_min = max(np.iinfo(rinput_dtype).min,
-                    np.iinfo(routput0_dtype).min,
-                    np.iinfo(routput1_dtype).min) / 2
-        val_max = min(np.iinfo(rinput_dtype).max,
-                    np.iinfo(routput0_dtype).max,
-                    np.iinfo(routput1_dtype).max) / 2
-
-        num_classes = 3
-
-        input0_list = list()
-        input1_list = list()
-        expected0_list = list()
-        expected1_list = list()
-        for b in range(batch_size):
-            in0 = np.random.randint(low=val_min, high=val_max,
-                                    size=tensor_shape, dtype=rinput_dtype)
-            in0 = in0.astype(input_dtype)
-            in1 = np.random.randint(low=val_min, high=val_max,
-                                    size=tensor_shape, dtype=rinput_dtype)
-            in1 = in1.astype(input_dtype)
-            input0_list.append(in0)
-            input1_list.append(in1)
-            if not swap:
-                expected0_list.append(in0 + in1)
-                expected1_list.append(in0 - in1)
-            else:
-                expected1_list.append(in0 + in1)
-                expected0_list.append(in0 - in1)
-
-        expected0_sort_idx = [ np.flip(np.argsort(x.flatten()), 0) for x in expected0_list ]
-        expected1_sort_idx = [ np.flip(np.argsort(x.flatten()), 0) for x in expected1_list ]
-
-        output_req = {}
-        for o in outputs:
-            if req_raw:
-                output_req[o] = InferContext.ResultFormat.RAW
-            else:
-                output_req[o] = (InferContext.ResultFormat.CLASS, num_classes)
-
-        ctx = InferContext(pair[0], pair[1], model_name, model_version, True)
-        results = ctx.run(
-            { "INPUT0" : input0_list, "INPUT1" : input1_list },
-            output_req, batch_size)
+        if config[1] == "http":
+            triton_client = httpclient.InferenceServerClient(
+                config[0], verbose=True, network_timeout=network_timeout
+            )
+        else:
+            triton_client = grpcclient.InferenceServerClient(config[0], verbose=True)
+
+        inputs = []
+        if config[1] == "http":
+            inputs.append(
+                httpclient.InferInput(
+                    INPUT0, tensor_shape, np_to_triton_dtype(input_dtype)
+                )
+            )
+            inputs.append(
+                httpclient.InferInput(
+                    INPUT1, tensor_shape, np_to_triton_dtype(input_dtype)
+                )
+            )
+        else:
+            inputs.append(
+                grpcclient.InferInput(
+                    INPUT0, tensor_shape, np_to_triton_dtype(input_dtype)
+                )
+            )
+            inputs.append(
+                grpcclient.InferInput(
+                    INPUT1, tensor_shape, np_to_triton_dtype(input_dtype)
+                )
+            )
+
+        if not (use_cuda_shared_memory or use_system_shared_memory):
+            if config[1] == "http":
+                inputs[0].set_data_from_numpy(input0_array, binary_data=config[3])
+                inputs[1].set_data_from_numpy(input1_array, binary_data=config[3])
+            else:
+                inputs[0].set_data_from_numpy(input0_array)
+                inputs[1].set_data_from_numpy(input1_array)
+        else:
+            # Register necessary shared memory regions/handles
+            su.register_add_shm_regions(
+                inputs,
+                outputs,
+                shm_regions,
+                precreated_shm_regions,
+                shm_handles,
+                input0_byte_size,
+                input1_byte_size,
+                output0_byte_size,
+                output1_byte_size,
+                use_system_shared_memory,
+                use_cuda_shared_memory,
+                triton_client,
+            )
+
+        if batch_size == 1:
+            expected0_sort_idx = [
+                np.flip(np.argsort(x.flatten()), 0)
+                for x in output0_array.reshape((1,) + tensor_shape)
+            ]
+            expected1_sort_idx = [
+                np.flip(np.argsort(x.flatten()), 0)
+                for x in output1_array.reshape((1,) + tensor_shape)
+            ]
+        else:
+            expected0_sort_idx = [
+                np.flip(np.argsort(x.flatten()), 0)
+                for x in output0_array.reshape(tensor_shape)
+            ]
+            expected1_sort_idx = [
+                np.flip(np.argsort(x.flatten()), 0)
+                for x in output1_array.reshape(tensor_shape)
+            ]
+
+        # Force binary_data = False for shared memory and class
+        output_req = []
+        i = 0
+        if "OUTPUT0" in outputs:
+            if len(shm_regions) != 0:
+                if config[1] == "http":
+                    output_req.append(
+                        httpclient.InferRequestedOutput(OUTPUT0, binary_data=config[3])
+                    )
+                else:
+                    output_req.append(grpcclient.InferRequestedOutput(OUTPUT0))
+
+                output_req[-1].set_shared_memory(
+                    shm_regions[2] + "_data", output0_byte_size
+                )
+            else:
+                if output0_raw:
+                    if config[1] == "http":
+                        output_req.append(
+                            httpclient.InferRequestedOutput(
+                                OUTPUT0, binary_data=config[3]
+                            )
+                        )
+                    else:
+                        output_req.append(grpcclient.InferRequestedOutput(OUTPUT0))
+                else:
+                    if config[1] == "http":
+                        output_req.append(
+                            httpclient.InferRequestedOutput(
+                                OUTPUT0, binary_data=config[3], class_count=num_classes
+                            )
+                        )
+                    else:
+                        output_req.append(
+                            grpcclient.InferRequestedOutput(
+                                OUTPUT0, class_count=num_classes
+                            )
+                        )
+            i += 1
+        if "OUTPUT1" in outputs:
+            if len(shm_regions) != 0:
+                if config[1] == "http":
+                    output_req.append(
+                        httpclient.InferRequestedOutput(OUTPUT1, binary_data=config[3])
+                    )
+                else:
+                    output_req.append(grpcclient.InferRequestedOutput(OUTPUT1))
+
+                output_req[-1].set_shared_memory(
+                    shm_regions[2 + i] + "_data", output1_byte_size
+                )
+            else:
+                if output1_raw:
+                    if config[1] == "http":
+                        output_req.append(
+                            httpclient.InferRequestedOutput(
+                                OUTPUT1, binary_data=config[3]
+                            )
+                        )
+                    else:
+                        output_req.append(grpcclient.InferRequestedOutput(OUTPUT1))
+                else:
+                    if config[1] == "http":
+                        output_req.append(
+                            httpclient.InferRequestedOutput(
+                                OUTPUT1, binary_data=config[3], class_count=num_classes
+                            )
+                        )
+                    else:
+                        output_req.append(
+                            grpcclient.InferRequestedOutput(
+                                OUTPUT1, class_count=num_classes
+                            )
+                        )
+
+        if config[2]:
+            user_data = UserData()
+            triton_client.start_stream(partial(completion_callback, user_data))
+            try:
+                results = triton_client.async_stream_infer(
+                    model_name,
+                    inputs,
+                    model_version=model_version,
+                    outputs=output_req,
+                    request_id=str(_unique_request_id()),
+                )
+            except Exception as e:
+                triton_client.stop_stream()
+                raise e
+            triton_client.stop_stream()
+            (results, error) = user_data._completed_requests.get()
+            if error is not None:
+                raise error
+        else:
+            results = triton_client.infer(
+                model_name,
+                inputs,
+                model_version=model_version,
+                outputs=output_req,
+                request_id=str(_unique_request_id()),
+            )
+
+        last_response = results.get_response()
 
         if not skip_request_id_check:
-            global _last_request_id
-            min_request_id = _last_request_id + 1
-            request_id = ctx.get_last_request_id()
-            _last_request_id = request_id
-            tester.assertGreaterEqual(request_id, min_request_id)
-
-        tester.assertEqual(ctx.get_last_request_model_name(), model_name)
-        if model_version is not None:
-            tester.assertEqual(ctx.get_last_request_model_version(), model_version)
-
-        tester.assertEqual(len(results), len(outputs))
-        for (result_name, result_val) in iteritems(results):
-            for b in range(batch_size):
-                if req_raw:
-                    if result_name == "OUTPUT0":
-                        tester.assertTrue(np.array_equal(result_val[b], expected0_list[b]),
-                                        "{}, OUTPUT0 expected: {}, got {}".format(
-                                            model_name, expected0_list[b], result_val[b]))
-                    elif result_name == "OUTPUT1":
-                        tester.assertTrue(np.array_equal(result_val[b], expected1_list[b]),
-                                        "{}, OUTPUT1 expected: {}, got {}".format(
-                                            model_name, expected1_list[b], result_val[b]))
+            global _seen_request_ids
+            if config[1] == "http":
+                request_id = int(last_response["id"])
+            else:
+                request_id = int(last_response.id)
+            tester.assertFalse(
+                request_id in _seen_request_ids, "request_id: {}".format(request_id)
+            )
+            _seen_request_ids.add(request_id)
+
+        if config[1] == "http":
+            response_model_name = last_response["model_name"]
+            if model_version != "":
+                response_model_version = last_response["model_version"]
+            response_outputs = last_response["outputs"]
+        else:
+            response_model_name = last_response.model_name
+            if model_version != "":
+                response_model_version = last_response.model_version
+            response_outputs = last_response.outputs
+
+        tester.assertEqual(response_model_name, model_name)
+
+        if model_version != "":
+            tester.assertEqual(str(response_model_version), model_version)
+
+        tester.assertEqual(len(response_outputs), len(outputs))
+
+        for result in response_outputs:
+            if config[1] == "http":
+                result_name = result["name"]
+            else:
+                result_name = result.name
+
+            if (result_name == OUTPUT0 and output0_raw) or (
+                result_name == OUTPUT1 and output1_raw
+            ):
+                if use_system_shared_memory or use_cuda_shared_memory:
+                    if result_name == OUTPUT0:
+                        shm_handle = shm_handles[2]
                     else:
-                        tester.assertTrue(False, "unexpected raw result {}".format(result_name))
+                        shm_handle = shm_handles[3]
+
+                    output = results.get_output(result_name)
+                    if config[1] == "http":
+                        output_datatype = output["datatype"]
+                        output_shape = output["shape"]
+                    else:
+                        output_datatype = output.datatype
+                        output_shape = output.shape
+                    output_dtype = triton_to_np_dtype(output_datatype)
+                if use_system_shared_memory:
+                    output_data = shm.get_contents_as_numpy(
+                        shm_handle, output_dtype, output_shape
+                    )
+                elif use_cuda_shared_memory:
+                    output_data = cudashm.get_contents_as_numpy(
+                        shm_handle, output_dtype, output_shape
+                    )
                 else:
+                    output_data = results.as_numpy(result_name)
+                    if (output_data.dtype == np.object_) and (not config[3]):
+                        if config[1] == "http":
+                            output_data = np.array(
+                                [
+                                    unicode(str(x), encoding="utf-8")
+                                    for x in (output_data.flatten())
+                                ],
+                                dtype=np.object_,
+                            ).reshape(output_data.shape)
+                        elif config[1] == "grpc":
+                            output_data = np.array(
+                                [x for x in (output_data.flatten())], dtype=np.object_
+                            ).reshape(output_data.shape)
+
+                if result_name == OUTPUT0:
+                    tester.assertTrue(
+                        np.array_equal(output_data, output0_array),
+                        "{}, {} expected: {}, got {}".format(
+                            model_name, OUTPUT0, output0_array, output_data
+                        ),
+                    )
+                elif result_name == OUTPUT1:
+                    tester.assertTrue(
+                        np.array_equal(output_data, output1_array),
+                        "{}, {} expected: {}, got {}".format(
+                            model_name, OUTPUT1, output1_array, output_data
+                        ),
+                    )
+                else:
+                    tester.assertTrue(
+                        False, "unexpected raw result {}".format(result_name)
+                    )
+            else:
+                for b in range(batch_size):
                     # num_classes values must be returned and must
                     # match expected top values
-                    class_list = result_val[b]
+                    if "nobatch" in pf:
+                        class_list = results.as_numpy(result_name)
+                    else:
+                        class_list = results.as_numpy(result_name)[b]
+
                     tester.assertEqual(len(class_list), num_classes)
-                    for idx, ctuple in enumerate(class_list):
-                        if result_name == "OUTPUT0":
-                            # can't compare indices since could have
-                            # different indices with the same
-                            # value/prob, so compare that the value of
-                            # each index equals the expected
-                            # value. Can only compare labels when the
-                            # indices are equal.
-                            tester.assertEqual(ctuple[1], expected0_list[b][ctuple[0]])
-                            tester.assertEqual(ctuple[1], expected0_list[b][expected0_sort_idx[b][idx]])
-                            if ctuple[0] == expected0_sort_idx[b][idx]:
-                                tester.assertEqual(ctuple[2], 'label{}'.format(expected0_sort_idx[b][idx]))
-                        elif result_name == "OUTPUT1":
-                            tester.assertEqual(ctuple[1], expected1_list[b][ctuple[0]])
-                            tester.assertEqual(ctuple[1], expected1_list[b][expected1_sort_idx[b][idx]])
+                    if batch_size == 1:
+                        expected0_flatten = output0_array.flatten()
+                        expected1_flatten = output1_array.flatten()
+                    else:
+                        expected0_flatten = output0_array[b].flatten()
+                        expected1_flatten = output1_array[b].flatten()
+
+                    for idx, class_label in enumerate(class_list):
+                        # can't compare indices since could have different
+                        # indices with the same value/prob, so check that
+                        # the value of each index equals the expected value.
+                        # Only compare labels when the indices are equal.
+                        if type(class_label) == str:
+                            ctuple = class_label.split(":")
+                        else:
+                            ctuple = "".join(chr(x) for x in class_label).split(":")
+                        cval = float(ctuple[0])
+                        cidx = int(ctuple[1])
+                        if result_name == OUTPUT0:
+                            tester.assertEqual(cval, expected0_flatten[cidx])
+                            tester.assertEqual(
+                                cval, expected0_flatten[expected0_sort_idx[b][idx]]
+                            )
+                            if cidx == expected0_sort_idx[b][idx]:
+                                tester.assertEqual(
+                                    ctuple[2],
+                                    "label{}".format(expected0_sort_idx[b][idx]),
+                                )
+                        elif result_name == OUTPUT1:
+                            tester.assertEqual(cval, expected1_flatten[cidx])
+                            tester.assertEqual(
+                                cval, expected1_flatten[expected1_sort_idx[b][idx]]
+                            )
                         else:
-                            tester.assertTrue(False, "unexpected class result {}".format(result_name))
+                            tester.assertTrue(
+                                False, "unexpected class result {}".format(result_name)
+                            )
+
+    # Unregister system/cuda shared memory regions if they exist
+    su.unregister_cleanup_shm_regions(
+        shm_regions,
+        shm_handles,
+        precreated_shm_regions,
+        outputs,
+        use_system_shared_memory,
+        use_cuda_shared_memory,
+    )
+
+    return results
+
+
+# resize the dummy tensor with the provided values in the shape tensor and finally
+# return the shape of the resized tensor.
+def infer_shape_tensor(
+    tester,
+    pf,
+    tensor_dtype,
+    input_shape_values,
+    dummy_input_shapes,
+    use_http=True,
+    use_grpc=True,
+    use_streaming=True,
+    shm_suffix="",
+    use_system_shared_memory=False,
+    priority=0,
+    timeout_us=0,
+    batch_size=1,
+):
+    # Lazy shm imports...
+    if use_system_shared_memory:
+        import tritonclient.utils.shared_memory as shm
+
+    tester.assertTrue(use_http or use_grpc or use_streaming)
+    tester.assertTrue(pf.startswith("plan"))
+    tester.assertEqual(len(input_shape_values), len(dummy_input_shapes))
+
+    configs = []
+    if use_http:
+        configs.append((f"{_tritonserver_ipaddr}:8000", "http", False))
+    if use_grpc:
+        configs.append((f"{_tritonserver_ipaddr}:8001", "grpc", False))
+    if use_streaming:
+        configs.append((f"{_tritonserver_ipaddr}:8001", "grpc", True))
+
+    io_cnt = len(input_shape_values)
+
+    # FIXME wrap up shm handle cleanup
+    # item is (handle, byte_size)
+    input_shm_handle_list = []
+    output_shm_handle_list = []
+    dummy_input_list = []
+    input_list = []
+    expected_dict = dict()
+    # Prepare IO in advance
+    for io_num in range(io_cnt):
+        dummy_input_name = "DUMMY_INPUT{}".format(io_num)
+        input_name = "INPUT{}".format(io_num)
+        dummy_output_name = "DUMMY_OUTPUT{}".format(io_num)
+        output_name = "OUTPUT{}".format(io_num)
+
+        # Prepare the dummy tensor
+        rtensor_dtype = _range_repr_dtype(tensor_dtype)
+        if rtensor_dtype != bool:
+            dummy_in0 = np.random.randint(
+                low=np.iinfo(rtensor_dtype).min,
+                high=np.iinfo(rtensor_dtype).max,
+                size=dummy_input_shapes[io_num],
+                dtype=rtensor_dtype,
+            )
+        else:
+            dummy_in0 = np.random.choice(
+                a=[False, True], size=dummy_input_shapes[io_num]
+            )
+        if tensor_dtype != np.object_:
+            dummy_in0 = dummy_in0.astype(tensor_dtype)
+        else:
+            dummy_in0 = np.array(
+                [str(x) for x in dummy_in0.flatten()], dtype=object
+            ).reshape(dummy_in0.shape)
+        dummy_input_list.append(dummy_in0)
+
+        # Prepare shape input tensor
+        in0 = np.asarray(input_shape_values[io_num], dtype=np.int32)
+        input_list.append(in0)
+
+        # Prepare the expected value for the output. Skip dummy output as we
+        # only care about its shape (== value of OUTPUT*)
+        expected_dict[output_name] = np.ndarray.copy(in0)
+
+        # Only need to create region once
+        input_byte_size = in0.size * np.dtype(np.int32).itemsize
+        output_byte_size = input_byte_size * batch_size
+        if use_system_shared_memory:
+            input_shm_handle_list.append(
+                (
+                    shm.create_shared_memory_region(
+                        input_name + shm_suffix,
+                        "/" + input_name + shm_suffix,
+                        input_byte_size,
+                    ),
+                    input_byte_size,
+                )
+            )
+            output_shm_handle_list.append(
+                (
+                    shm.create_shared_memory_region(
+                        output_name + shm_suffix,
+                        "/" + output_name + shm_suffix,
+                        output_byte_size,
+                    ),
+                    output_byte_size,
+                )
+            )
+            shm.set_shared_memory_region(
+                input_shm_handle_list[-1][0],
+                [
+                    in0,
+                ],
+            )
+
+    model_name = tu.get_zero_model_name(pf, io_cnt, tensor_dtype)
+    # Run inference and check results for each config
+    for config in configs:
+        client_utils = grpcclient if config[1] == "grpc" else httpclient
+        triton_client = client_utils.InferenceServerClient(config[0], verbose=True)
+
+        inputs = []
+        outputs = []
+
+        # Set IOs
+        for io_num in range(io_cnt):
+            dummy_input_name = "DUMMY_INPUT{}".format(io_num)
+            input_name = "INPUT{}".format(io_num)
+            dummy_output_name = "DUMMY_OUTPUT{}".format(io_num)
+            output_name = "OUTPUT{}".format(io_num)
+
+            inputs.append(
+                client_utils.InferInput(
+                    dummy_input_name,
+                    dummy_input_shapes[io_num],
+                    np_to_triton_dtype(tensor_dtype),
+                )
+            )
+            inputs.append(
+                client_utils.InferInput(input_name, input_list[io_num].shape, "INT32")
+            )
+            outputs.append(client_utils.InferRequestedOutput(dummy_output_name))
+            outputs.append(client_utils.InferRequestedOutput(output_name))
+
+            # -2: dummy; -1: input
+            inputs[-2].set_data_from_numpy(dummy_input_list[io_num])
+            if not use_system_shared_memory:
+                inputs[-1].set_data_from_numpy(input_list[io_num])
+            else:
+                input_byte_size = input_shm_handle_list[io_num][1]
+                output_byte_size = output_shm_handle_list[io_num][1]
+                triton_client.register_system_shared_memory(
+                    input_name + shm_suffix,
+                    "/" + input_name + shm_suffix,
+                    input_byte_size,
+                )
+                triton_client.register_system_shared_memory(
+                    output_name + shm_suffix,
+                    "/" + output_name + shm_suffix,
+                    output_byte_size,
+                )
+                inputs[-1].set_shared_memory(input_name + shm_suffix, input_byte_size)
+                outputs[-1].set_shared_memory(
+                    output_name + shm_suffix, output_byte_size
+                )
+
+        if config[2]:
+            user_data = UserData()
+            triton_client.start_stream(partial(completion_callback, user_data))
+            try:
+                results = triton_client.async_stream_infer(
+                    model_name,
+                    inputs,
+                    outputs=outputs,
+                    priority=priority,
+                    timeout=timeout_us,
+                )
+            except Exception as e:
+                triton_client.stop_stream()
+                raise e
+            triton_client.stop_stream()
+            (results, error) = user_data._completed_requests.get()
+            if error is not None:
+                raise error
+        else:
+            results = triton_client.infer(
+                model_name,
+                inputs,
+                outputs=outputs,
+                priority=priority,
+                timeout=timeout_us,
+            )
+
+        for io_num in range(io_cnt):
+            output_name = "OUTPUT{}".format(io_num)
+            dummy_output_name = "DUMMY_OUTPUT{}".format(io_num)
+            expected = expected_dict[output_name]
+
+            # get outputs as numpy array
+            dummy_out = results.as_numpy(dummy_output_name)
+            if not use_system_shared_memory:
+                out = results.as_numpy(output_name)
+            else:
+                output = results.get_output(output_name)
+                if config[1] == "grpc":
+                    output_shape = output.shape
+                else:
+                    output_shape = output["shape"]
+                out = shm.get_contents_as_numpy(
+                    output_shm_handle_list[io_num][0], np.int32, output_shape
+                )
+
+            # if out shape is 2D, it is batched
+            if len(out.shape) == 2:
+                # The shape of the dummy output should be equal to the shape values
+                # specified in the shape tensor
+                tester.assertTrue(
+                    np.array_equal(dummy_out.shape[1:], out[0]),
+                    "{}, {} shape, expected: {}, got {}".format(
+                        model_name, dummy_output_name, out[0], dummy_out.shape[1:]
+                    ),
+                )
+                for b in range(1, out.shape[0]):
+                    tester.assertTrue(
+                        np.array_equal(out[b - 1], out[b]),
+                        "expect shape tensor has consistent value, "
+                        "expected: {}, got {}".format(out[b - 1], out[b]),
+                    )
+                out = out[0]
+            else:
+                tester.assertTrue(
+                    np.array_equal(dummy_out.shape, out),
+                    "{}, {} shape, expected: {}, got {}".format(
+                        model_name, dummy_output_name, out, dummy_out.shape
+                    ),
+                )
+            tester.assertTrue(
+                np.array_equal(out, expected),
+                "{}, {}, expected: {}, got {}".format(
+                    model_name, output_name, expected, out
+                ),
+            )
+
+            # unregister shared memory region for next config
+            if use_system_shared_memory:
+                triton_client.unregister_system_shared_memory(input_name + shm_suffix)
+                triton_client.unregister_system_shared_memory(output_name + shm_suffix)
+
+    for handle in input_shm_handle_list:
+        shm.destroy_shared_memory_region(handle[0])
+    for handle in output_shm_handle_list:
+        shm.destroy_shared_memory_region(handle[0])
+
+
+# Perform inference using a "nop" model that expects some form or
+# zero-sized input/output tensor.
+# FIXME Support for empty tensors using non-empty shared memory regions.
+# Currently shared memory support is broken for empty input/outputs tensors.
+def infer_zero(
+    tester,
+    pf,
+    batch_size,
+    tensor_dtype,
+    input_shapes,
+    output_shapes,
+    model_version=None,
+    use_http=True,
+    use_grpc=True,
+    use_http_json_tensors=True,
+    use_streaming=True,
+    shm_region_name_prefix=None,
+    use_system_shared_memory=False,
+    use_cuda_shared_memory=False,
+    priority=0,
+    timeout_us=0,
+    override_model_name=None,
+    override_input_names=[],
+    override_output_names=[],
+):
+    # Lazy shm imports...
+    if use_system_shared_memory:
+        import tritonclient.utils.shared_memory as shm
+    if use_cuda_shared_memory:
+        import tritonclient.utils.cuda_shared_memory as cudashm
+
+    tester.assertTrue(use_http or use_grpc or use_streaming)
+    configs = []
+    if use_http:
+        configs.append((f"{_tritonserver_ipaddr}:8000", "http", False, True))
+        if use_http_json_tensors and (tensor_dtype != np.float16):
+            configs.append((f"{_tritonserver_ipaddr}:8000", "http", False, False))
+    if use_grpc:
+        configs.append((f"{_tritonserver_ipaddr}:8001", "grpc", False, False))
+    if use_streaming:
+        configs.append((f"{_tritonserver_ipaddr}:8001", "grpc", True, False))
+    tester.assertEqual(len(input_shapes), len(output_shapes))
+    io_cnt = len(input_shapes)
+
+    if shm_region_name_prefix is None:
+        shm_region_name_prefix = ["input", "output"]
+
+    input_dict = {}
+    expected_dict = {}
+    shm_ip_handles = list()
+    shm_op_handles = list()
+
+    # Get model platform
+    if override_model_name is None:
+        model_name = tu.get_zero_model_name(pf, io_cnt, tensor_dtype)
+    else:
+        model_name = override_model_name
+    if configs[0][1] == "http":
+        metadata_client = httpclient.InferenceServerClient(configs[0][0], verbose=True)
+        metadata = metadata_client.get_model_metadata(model_name)
+        platform = metadata["platform"]
+    else:
+        metadata_client = grpcclient.InferenceServerClient(configs[0][0], verbose=True)
+        metadata = metadata_client.get_model_metadata(model_name)
+        platform = metadata.platform
+
+    for io_num in range(io_cnt):
+        if override_input_names:
+            input_name = override_input_names[io_num]
+        else:
+            if platform == "pytorch_libtorch":
+                input_name = "INPUT__{}".format(io_num)
+            else:
+                input_name = "INPUT{}".format(io_num)
+
+        if override_output_names:
+            output_name = override_output_names[io_num]
+        else:
+            if platform == "pytorch_libtorch":
+                output_name = "OUTPUT__{}".format(io_num)
+            else:
+                output_name = "OUTPUT{}".format(io_num)
+
+        input_shape = input_shapes[io_num]
+        output_shape = output_shapes[io_num]
+
+        rtensor_dtype = _range_repr_dtype(tensor_dtype)
+        if rtensor_dtype != bool:
+            input_array = np.random.randint(
+                low=np.iinfo(rtensor_dtype).min,
+                high=np.iinfo(rtensor_dtype).max,
+                size=input_shape,
+                dtype=rtensor_dtype,
+            )
+        else:
+            input_array = np.random.choice(a=[False, True], size=input_shape)
+        if tensor_dtype != np.object_:
+            input_array = input_array.astype(tensor_dtype)
+            expected_array = np.ndarray.copy(input_array)
+        else:
+            expected_array = np.array(
+                [unicode(str(x), encoding="utf-8") for x in input_array.flatten()],
+                dtype=object,
+            )
+            input_array = np.array(
+                [str(x) for x in input_array.flatten()], dtype=object
+            ).reshape(input_array.shape)
+
+        expected_array = expected_array.reshape(output_shape)
+        expected_dict[output_name] = expected_array
+
+        if tensor_dtype == np.object_:
+            output_byte_size = serialized_byte_size(expected_array)
+        else:
+            output_byte_size = expected_array.nbytes
+
+        if batch_size == 1:
+            input_list = [input_array]
+        else:
+            input_list = [x for x in input_array]
+
+        # Serialization of string tensors in the case of shared memory must be done manually
+        if tensor_dtype == np.object_:
+            input_list_tmp = serialize_byte_tensor_list(input_list)
+        else:
+            input_list_tmp = input_list
+
+        if tensor_dtype == np.object_:
+            input_byte_size = sum([serialized_byte_size(ip) for ip in input_list_tmp])
+        else:
+            input_byte_size = sum([ip.nbytes for ip in input_list_tmp])
+
+        # create and register shared memory region for inputs and outputs
+        shm_io_handles = su.create_set_either_shm_region(
+            [
+                shm_region_name_prefix[0] + str(io_num),
+                shm_region_name_prefix[1] + str(io_num),
+            ],
+            input_list_tmp,
+            input_byte_size,
+            output_byte_size,
+            use_system_shared_memory,
+            use_cuda_shared_memory,
+        )
+
+        if len(shm_io_handles) != 0:
+            shm_ip_handles.append(shm_io_handles[0])
+            shm_op_handles.append(shm_io_handles[1])
+        input_dict[input_name] = input_array
+
+    if model_version is not None:
+        model_version = str(model_version)
+    else:
+        model_version = ""
+
+    # Run inference and check results for each config
+    for config in configs:
+        if config[1] == "http":
+            triton_client = httpclient.InferenceServerClient(config[0], verbose=True)
+        else:
+            triton_client = grpcclient.InferenceServerClient(config[0], verbose=True)
+
+        inputs = []
+        output_req = []
+        for io_num, (input_name, output_name) in enumerate(
+            zip(input_dict.keys(), expected_dict.keys())
+        ):
+            input_data = input_dict[input_name]
+            output_data = expected_dict[output_name]
+            if tensor_dtype == np.object_:
+                input_byte_size = serialized_byte_size(
+                    serialize_byte_tensor(input_data)
+                )
+                output_byte_size = serialized_byte_size(
+                    serialize_byte_tensor(output_data)
+                )
+            else:
+                input_byte_size = input_data.nbytes
+                output_byte_size = output_data.nbytes
+            if config[1] == "http":
+                inputs.append(
+                    httpclient.InferInput(
+                        input_name, input_data.shape, np_to_triton_dtype(tensor_dtype)
+                    )
+                )
+                output_req.append(
+                    httpclient.InferRequestedOutput(output_name, binary_data=config[3])
+                )
+            else:
+                inputs.append(
+                    grpcclient.InferInput(
+                        input_name, input_data.shape, np_to_triton_dtype(tensor_dtype)
+                    )
+                )
+                output_req.append(grpcclient.InferRequestedOutput(output_name))
+
+            if not (use_cuda_shared_memory or use_system_shared_memory):
+                if config[1] == "http":
+                    inputs[-1].set_data_from_numpy(input_data, binary_data=config[3])
+                else:
+                    inputs[-1].set_data_from_numpy(input_data)
+            else:
+                # Register necessary shared memory regions/handles
+                su.register_add_either_shm_regions(
+                    inputs,
+                    output_req,
+                    shm_region_name_prefix,
+                    (shm_ip_handles, shm_op_handles),
+                    io_num,
+                    input_byte_size,
+                    output_byte_size,
+                    use_system_shared_memory,
+                    use_cuda_shared_memory,
+                    triton_client,
+                )
+
+        if config[2]:
+            user_data = UserData()
+            triton_client.start_stream(partial(completion_callback, user_data))
+            try:
+                results = triton_client.async_stream_infer(
+                    model_name,
+                    inputs,
+                    model_version=model_version,
+                    outputs=output_req,
+                    request_id=str(_unique_request_id()),
+                    priority=priority,
+                    timeout=timeout_us,
+                )
+            except Exception as e:
+                triton_client.stop_stream()
+                raise e
+            triton_client.stop_stream()
+            (results, error) = user_data._completed_requests.get()
+            if error is not None:
+                raise error
+        else:
+            results = triton_client.infer(
+                model_name,
+                inputs,
+                model_version=model_version,
+                outputs=output_req,
+                request_id=str(_unique_request_id()),
+                priority=priority,
+                timeout=timeout_us,
+            )
+
+        last_response = results.get_response()
+
+        if config[1] == "http":
+            response_model_name = last_response["model_name"]
+            if model_version != "":
+                response_model_version = last_response["model_version"]
+            response_outputs = last_response["outputs"]
+        else:
+            response_model_name = last_response.model_name
+            if model_version != "":
+                response_model_version = last_response.model_version
+            response_outputs = last_response.outputs
+
+        tester.assertEqual(response_model_name, model_name)
+
+        if model_version != "":
+            tester.assertEqual(response_model_version, model_version)
+
+        tester.assertEqual(len(response_outputs), io_cnt)
+
+        for result in response_outputs:
+            if config[1] == "http":
+                result_name = result["name"]
+            else:
+                result_name = result.name
+
+            tester.assertIn(result_name, expected_dict)
+            if use_system_shared_memory or use_cuda_shared_memory:
+                if platform == "pytorch_libtorch":
+                    io_num = int(result_name.split("OUTPUT__")[1])
+                else:
+                    io_num = int(result_name.split("OUTPUT")[1])
+                shm_handle = shm_op_handles[io_num]
+
+                output = results.get_output(result_name)
+                if config[1] == "http":
+                    output_datatype = output["datatype"]
+                    output_shape = output["shape"]
+                else:
+                    output_datatype = output.datatype
+                    output_shape = output.shape
+                output_dtype = triton_to_np_dtype(output_datatype)
+            if use_system_shared_memory:
+                output_data = shm.get_contents_as_numpy(
+                    shm_handle, output_dtype, output_shape
+                )
+            elif use_cuda_shared_memory:
+                output_data = cudashm.get_contents_as_numpy(
+                    shm_handle, output_dtype, output_shape
+                )
+            else:
+                output_data = results.as_numpy(result_name)
+
+                if (output_data.dtype == np.object_) and (config[3] == False):
+                    if config[1] == "http":
+                        output_data = np.array(
+                            [
+                                unicode(str(x), encoding="utf-8")
+                                for x in (output_data.flatten())
+                            ],
+                            dtype=np.object_,
+                        ).reshape(output_data.shape)
+                    elif config[1] == "grpc":
+                        output_data = np.array(
+                            [x for x in (output_data.flatten())], dtype=np.object_
+                        ).reshape(output_data.shape)
+
+            expected = expected_dict[result_name]
+            tester.assertEqual(output_data.shape, expected.shape)
+            tester.assertTrue(
+                np.array_equal(output_data, expected),
+                "{}, {}, expected: {}, got {}".format(
+                    model_name, result_name, expected, output_data
+                ),
+            )
+
+    if len(shm_ip_handles) != 0:
+        for io_num in range(io_cnt):
+            if use_cuda_shared_memory:
+                triton_client.unregister_cuda_shared_memory(
+                    shm_region_name_prefix[0] + str(io_num) + "_data"
+                )
+                triton_client.unregister_cuda_shared_memory(
+                    shm_region_name_prefix[0] + str(io_num) + "_data"
+                )
+                cudashm.destroy_shared_memory_region(shm_ip_handles[io_num])
+                cudashm.destroy_shared_memory_region(shm_op_handles[io_num])
+            else:
+                triton_client.unregister_system_shared_memory(
+                    shm_region_name_prefix[1] + str(io_num) + "_data"
+                )
+                triton_client.unregister_system_shared_memory(
+                    shm_region_name_prefix[1] + str(io_num) + "_data"
+                )
+                shm.destroy_shared_memory_region(shm_ip_handles[io_num])
+                shm.destroy_shared_memory_region(shm_op_handles[io_num])
+
     return results
diff --git a/qa/common/inferentia_perf_analyzer_input_data_json/non_aligned_validation_batched.json b/qa/common/inferentia_perf_analyzer_input_data_json/non_aligned_validation_batched.json
new file mode 100644
index 0000000000..af02734578
--- /dev/null
+++ b/qa/common/inferentia_perf_analyzer_input_data_json/non_aligned_validation_batched.json
@@ -0,0 +1,92 @@
+{
+  "data" :
+    [
+      {
+        "INPUT__0" :
+        {
+          "content": [1, 2, 3, 4],
+          "shape": [4]
+        },
+        "INPUT__1" :
+        {
+          "content": [1, 1, 1, 1],
+          "shape": [4]
+        }
+      },
+      {
+        "INPUT__0" :
+        {
+          "content": [0, 0, 0, 0],
+          "shape": [4]
+        },
+        "INPUT__1" :
+        {
+          "content": [1, 1, 1, 1],
+          "shape": [4]
+        }
+      },
+      {
+        "INPUT__0" :
+        {
+          "content": [-1, -2, -3, -4],
+          "shape": [4]
+        },
+        "INPUT__1" :
+        {
+          "content": [1, 1, 1, 1],
+          "shape": [4]
+        }
+      },
+      {
+        "INPUT__0" :
+        {
+          "content": [-4, -3, -2, -1],
+          "shape": [4]
+        },
+        "INPUT__1" :
+        {
+          "content": [-1, -1, -1, -1],
+          "shape": [4]
+        }
+      }
+    ],
+  "validation_data" :
+  [
+      {
+        "OUTPUT__0" :
+        {
+          "content": [2, 3, 4, 5],
+          "shape": [4]
+        },
+        "OUTPUT__1" :
+        {
+          "content": [0, 1, 2, 3],
+          "shape": [4]
+        }
+      },
+      {
+        "OUTPUT__0" :
+        {
+          "content": [1, 1, 1, 1],
+          "shape": [4]
+        },
+        "OUTPUT__1" :
+        {
+          "content": [-1, -1 ,-1, -1],
+          "shape": [4]
+        }
+      },
+      {
+        "OUTPUT__0" :
+        {
+          "content": [0, -1, -2, -3],
+          "shape": [4]
+        },
+        "OUTPUT__1" :
+        {
+          "content": [-2, -3, -4, -5],
+          "shape": [4]
+        }
+      }
+  ]
+}
diff --git a/qa/common/inferentia_perf_analyzer_input_data_json/non_aligned_validation_no_batch.json b/qa/common/inferentia_perf_analyzer_input_data_json/non_aligned_validation_no_batch.json
new file mode 100644
index 0000000000..a14eac39f6
--- /dev/null
+++ b/qa/common/inferentia_perf_analyzer_input_data_json/non_aligned_validation_no_batch.json
@@ -0,0 +1,92 @@
+{
+  "data" :
+    [
+      {
+        "INPUT__0" :
+        {
+          "content": [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4],
+          "shape": [6, 4]
+        },
+        "INPUT__1" :
+        {
+          "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+          "shape": [6, 4]
+        }
+      },
+      {
+        "INPUT__0" :
+        {
+          "content": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+          "shape": [6, 4]
+        },
+        "INPUT__1" :
+        {
+          "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+          "shape": [6, 4]
+        }
+      },
+      {
+        "INPUT__0" :
+        {
+          "content": [-1, -2, -3, -4, -1, -2, -3, -4, -1, -2, -3, -4, -1, -2, -3, -4, -1, -2, -3, -4, -1, -2, -3, -4],
+          "shape": [6, 4]
+        },
+        "INPUT__1" :
+        {
+          "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+          "shape": [6, 4]
+        }
+      },
+      {
+        "INPUT__0" :
+        {
+          "content": [-4, -3, -2, -1, -4, -3, -2, -1, -4, -3, -2, -1, -4, -3, -2, -1, -4, -3, -2, -1, -4, -3, -2, -1],
+          "shape": [6, 4]
+        },
+        "INPUT__1" :
+        {
+          "content": [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
+          "shape": [6, 4]
+        }
+      }
+    ],
+  "validation_data" :
+  [
+      {
+        "OUTPUT__0" :
+        {
+          "content": [2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5],
+          "shape": [6, 4]
+        },
+        "OUTPUT__1" :
+        {
+          "content": [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3],
+          "shape": [6, 4]
+        }
+      },
+      {
+        "OUTPUT__0" :
+        {
+          "content": [0, -1, -2, -3, 0, -1, -2, -3, 0, -1, -2, -3, 0, -1, -2, -3, 0, -1, -2, -3, 0, -1, -2, -3],
+          "shape": [6, 4]
+        },
+        "OUTPUT__1" :
+        {
+          "content": [-2, -3, -4, -5, -2, -3, -4, -5, -2, -3, -4, -5, -2, -3, -4, -5, -2, -3, -4, -5, -2, -3, -4, -5],
+          "shape": [6, 4]
+        }
+      },
+      {
+        "OUTPUT__0" :
+        {
+          "content": [-5, -4, -3, -2, -5, -4, -3, -2, -5, -4, -3, -2, -5, -4, -3, -2, -5, -4, -3, -2, -5, -4, -3, -2],
+          "shape": [6, 4]
+        },
+        "OUTPUT__1" :
+        {
+          "content": [-3, -2, -1, 0, -3, -2, -1, 0, -3, -2, -1, 0, -3, -2, -1, 0, -3, -2, -1, 0, -3, -2, -1, 0],
+          "shape": [6, 4]
+        }
+      }
+  ]
+}
diff --git a/qa/common/inferentia_perf_analyzer_input_data_json/simple_model.py b/qa/common/inferentia_perf_analyzer_input_data_json/simple_model.py
new file mode 100755
index 0000000000..db7ca95848
--- /dev/null
+++ b/qa/common/inferentia_perf_analyzer_input_data_json/simple_model.py
@@ -0,0 +1,129 @@
+#!/usr/bin/env python
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+import argparse
+
+
+def gen_pytorch_model(name, batch_size):
+    class PyAddSubNet(nn.Module):
+        """
+        Simple AddSub network in PyTorch. This network outputs the sum and
+        subtraction of the inputs.
+        """
+
+        def __init__(self):
+            super(PyAddSubNet, self).__init__()
+
+        def forward(self, input0, input1):
+            return torch.sub(input0, input1, alpha=-1), torch.sub(
+                input0, input1, alpha=1
+            )
+
+    model = PyAddSubNet()
+    model.eval()
+    batch_size = 1
+    example_inputs = torch.zeros([8, 4], dtype=torch.int64), torch.zeros(
+        [8, 4], dtype=torch.int64
+    )
+    model_neuron = torch_neuron.trace(model, example_inputs, dynamic_batch_size=True)
+    model_neuron.save("{}.pt".format(name))
+
+
+def gen_tf_model(name, batch_size, tf_version):
+    # Set up model directory
+    model_dir = "add_sub_model"
+    compiled_model_dir = name
+    shutil.rmtree(model_dir, ignore_errors=True)
+    shutil.rmtree(compiled_model_dir, ignore_errors=True)
+    if tf_version == 1:
+        with tf.Session() as sess:
+            # Export SavedModel
+            input0 = tf.placeholder(tf.int64, [None, 4], "INPUT__0")
+            input1 = tf.placeholder(tf.int64, [None, 4], "INPUT__1")
+            output0 = tf.add(input0, input1, "OUTPUT__0")
+            output1 = tf.subtract(input0, input1, "OUTPUT__1")
+            tf.compat.v1.saved_model.simple_save(
+                session=sess,
+                export_dir=model_dir,
+                inputs={"INPUT__0": input0, "INPUT__1": input1},
+                outputs={"OUTPUT__0": output0, "OUTPUT__1": output1},
+            )
+        # Compile using Neuron
+        tfn.saved_model.compile(
+            model_dir,
+            compiled_model_dir,
+            batch_size=batch_size,
+            dynamic_batch_size=True,
+        )
+    elif tf_version == 2:
+        # TODO: Add gen scripts for TF2
+        raise Exception("TensorFlow2 not yet supported")
+    else:
+        raise Exception("Unrecognized Tensorflow version: {}".format(tf_version))
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model_type",
+        type=str,
+        required=True,
+        choices=["pytorch", "tensorflow"],
+        help="""The type of the compiled model. Currently,
+                        only supports \"pytorch\" and \"tensorflow\".""",
+    )
+    parser.add_argument(
+        "--name", type=str, required=True, help="The name of the compiled model"
+    )
+    parser.add_argument(
+        "--tf_version",
+        type=int,
+        choices=[1, 2],
+        help="Version of tensorflow for compiled model",
+    )
+    parser.add_argument(
+        "--batch_size",
+        type=int,
+        default=1,
+        help="The batch size for the compiled model",
+    )
+
+    FLAGS, unparsed = parser.parse_known_args()
+    if len(unparsed) > 0:
+        raise Exception("Unrecognized options: {}".format(unparsed))
+    if FLAGS.model_type == "tensorflow":
+        import shutil
+
+        import tensorflow as tf
+        import tensorflow.neuron as tfn
+
+        gen_tf_model(FLAGS.name, FLAGS.batch_size, FLAGS.tf_version)
+    elif FLAGS.model_type == "pytorch":
+        import torch
+        import torch_neuron
+        from torch import nn
+
+        gen_pytorch_model(FLAGS.name, FLAGS.batch_size)
diff --git a/qa/common/inferentia_perf_analyzer_input_data_json/validation_batched.json b/qa/common/inferentia_perf_analyzer_input_data_json/validation_batched.json
new file mode 100644
index 0000000000..9b733c5a55
--- /dev/null
+++ b/qa/common/inferentia_perf_analyzer_input_data_json/validation_batched.json
@@ -0,0 +1,104 @@
+{
+    "data" :
+      [
+        {
+          "INPUT__0" :
+          {
+            "content": [1, 2, 3, 4],
+            "shape": [4]
+          },
+          "INPUT__1" :
+          {
+            "content": [1, 1, 1, 1],
+            "shape": [4]
+          }
+        },
+        {
+          "INPUT__0" :
+          {
+            "content": [0, 0, 0, 0],
+            "shape": [4]
+          },
+          "INPUT__1" :
+          {
+            "content": [1, 1, 1, 1],
+            "shape": [4]
+          }
+        },
+        {
+          "INPUT__0" :
+          {
+            "content": [-1, -2, -3, -4],
+            "shape": [4]
+          },
+          "INPUT__1" :
+          {
+            "content": [1, 1, 1, 1],
+            "shape": [4]
+          }
+        },
+        {
+          "INPUT__0" :
+          {
+            "content": [-4, -3, -2, -1],
+            "shape": [4]
+          },
+          "INPUT__1" :
+          {
+            "content": [-1, -1, -1, -1],
+            "shape": [4]
+          }
+        }
+      ],
+    "validation_data" :
+    [
+        {
+          "OUTPUT__0" :
+          {
+            "content": [2, 3, 4, 5],
+            "shape": [4]
+          },
+          "OUTPUT__1" :
+          {
+            "content": [0, 1, 2, 3],
+            "shape": [4]
+          }
+        },
+        {
+          "OUTPUT__0" :
+          {
+            "content": [1, 1, 1, 1],
+            "shape": [4]
+          },
+          "OUTPUT__1" :
+          {
+            "content": [-1, -1 ,-1, -1],
+            "shape": [4]
+          }
+        },
+        {
+          "OUTPUT__0" :
+          {
+            "content": [0, -1, -2, -3],
+            "shape": [4]
+          },
+          "OUTPUT__1" :
+          {
+            "content": [-2, -3, -4, -5],
+            "shape": [4]
+          }
+        },
+        {
+          "OUTPUT__0" :
+          {
+            "content": [-5, -4, -3, -2],
+            "shape": [4]
+          },
+          "OUTPUT__1" :
+          {
+            "content": [-3, -2, -1, 0],
+            "shape": [4]
+          }
+        }
+    ]
+}
diff --git a/qa/common/inferentia_perf_analyzer_input_data_json/validation_no_batch.json b/qa/common/inferentia_perf_analyzer_input_data_json/validation_no_batch.json
new file mode 100644
index 0000000000..4ce1308fbc
--- /dev/null
+++ b/qa/common/inferentia_perf_analyzer_input_data_json/validation_no_batch.json
@@ -0,0 +1,104 @@
+{
+    "data" :
+      [
+        {
+          "INPUT__0" :
+          {
+            "content": [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4],
+            "shape": [6, 4]
+          },
+          "INPUT__1" :
+          {
+            "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+            "shape": [6, 4]
+          }
+        },
+        {
+          "INPUT__0" :
+          {
+            "content": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            "shape": [6, 4]
+          },
+          "INPUT__1" :
+          {
+            "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+            "shape": [6, 4]
+          }
+        },
+        {
+          "INPUT__0" :
+          {
+            "content": [-1, -2, -3, -4, -1, -2, -3, -4, -1, -2, -3, -4, -1, -2, -3, -4, -1, -2, -3, -4, -1, -2, -3, -4],
+            "shape": [6, 4]
+          },
+          "INPUT__1" :
+          {
+            "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+            "shape": [6, 4]
+          }
+        },
+        {
+          "INPUT__0" :
+          {
+            "content": [-4, -3, -2, -1, -4, -3, -2, -1, -4, -3, -2, -1, -4, -3, -2, -1, -4, -3, -2, -1, -4, -3, -2, -1],
+            "shape": [6, 4]
+          },
+          "INPUT__1" :
+          {
+            "content": [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
+            "shape": [6, 4]
+          }
+        }
+      ],
+    "validation_data" :
+    [
+        {
+          "OUTPUT__0" :
+          {
+            "content": [2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5],
+            "shape": [6, 4]
+          },
+          "OUTPUT__1" :
+          {
+            "content": [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3],
+            "shape": [6, 4]
+          }
+        },
+        {
+          "OUTPUT__0" :
+          {
+            "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+            "shape": [6, 4]
+          },
+          "OUTPUT__1" :
+          {
+            "content": [-1, -1 ,-1, -1, -1, -1 ,-1, -1, -1, -1 ,-1, -1, -1, -1 ,-1, -1, -1, -1 ,-1, -1, -1, -1 ,-1, -1],
+            "shape": [6, 4]
+          }
+        },
+        {
+          "OUTPUT__0" :
+          {
+            "content": [0, -1, -2, -3, 0, -1, -2, -3, 0, -1, -2, -3, 0, -1, -2, -3, 0, -1, -2, -3, 0, -1, -2, -3],
+            "shape": [6, 4]
+          },
+          "OUTPUT__1" :
+          {
+            "content": [-2, -3, -4, -5, -2, -3, -4, -5, -2, -3, -4, -5, -2, -3, -4, -5, -2, -3, -4, -5, -2, -3, -4, -5],
+            "shape": [6, 4]
+          }
+        },
+        {
+          "OUTPUT__0" :
+          {
+            "content": [-5, -4, -3, -2, -5, -4, -3, -2, -5, -4, -3, -2, -5, -4, -3, -2, -5, -4, -3, -2, -5, -4, -3, -2],
+            "shape": [6, 4]
+          },
+          "OUTPUT__1" :
+          {
+            "content": [-3, -2, -1, 0, -3, -2, -1, 0, -3, -2, -1, 0, -3, -2, -1, 0, -3, -2, -1, 0, -3, -2, -1, 0],
+            "shape": [6, 4]
+          }
+        }
+    ]
+}
diff --git a/qa/common/inferentia_perf_analyzer_input_data_json/wrong_validation_batched.json b/qa/common/inferentia_perf_analyzer_input_data_json/wrong_validation_batched.json
new file mode 100644
index 0000000000..5e40ffe569
--- /dev/null
+++ b/qa/common/inferentia_perf_analyzer_input_data_json/wrong_validation_batched.json
@@ -0,0 +1,104 @@
+{
+  "data" :
+    [
+      {
+        "INPUT__0" :
+        {
+          "content": [1, 2, 3, 4],
+          "shape": [4]
+        },
+        "INPUT__1" :
+        {
+          "content": [1, 1, 1, 1],
+          "shape": [4]
+        }
+      },
+      {
+        "INPUT__0" :
+        {
+          "content": [0, 0, 0, 0],
+          "shape": [4]
+        },
+        "INPUT__1" :
+        {
+          "content": [1, 1, 1, 1],
+          "shape": [4]
+        }
+      },
+      {
+        "INPUT__0" :
+        {
+          "content": [-1, -2, -3, -4],
+          "shape": [4]
+        },
+        "INPUT__1" :
+        {
+          "content": [1, 1, 1, 1],
+          "shape": [4]
+        }
+      },
+      {
+        "INPUT__0" :
+        {
+          "content": [-4, -3, -2, -1],
+          "shape": [4]
+        },
+        "INPUT__1" :
+        {
+          "content": [-1, -1, -1, -1],
+          "shape": [4]
+        }
+      }
+    ],
+    "validation_data" :
+    [
+        {
+          "OUTPUT__0" :
+          {
+            "content": [2, 3, 4, 5],
+            "shape": [4]
+          },
+          "OUTPUT__1" :
+          {
+            "content": [0, 0, 0, 0],
+            "shape": [4]
+          }
+        },
+        {
+          "OUTPUT__0" :
+          {
+            "content": [1, 1, 1, 1],
+            "shape": [4]
+          },
+          "OUTPUT__1" :
+          {
+            "content": [1, 1, 1, 1],
+            "shape": [4]
+          }
+        },
+        {
+          "OUTPUT__0" :
+          {
+            "content": [0, 1, 2, 3],
+            "shape": [4]
+          },
+          "OUTPUT__1" :
+          {
+            "content": [7, 8, 9, 10],
+            "shape": [4]
+          }
+        },
+        {
+          "OUTPUT__0" :
+          {
+            "content": [-5, -4, -3, -1],
+            "shape": [4]
+          },
+          "OUTPUT__1" :
+          {
+            "content": [-3, -2, -1, 0],
+            "shape": [4]
+          }
+        }
+    ]
+}
diff --git a/qa/common/inferentia_perf_analyzer_input_data_json/wrong_validation_no_batch.json b/qa/common/inferentia_perf_analyzer_input_data_json/wrong_validation_no_batch.json
new file mode 100644
index 0000000000..e9a212e5ec
--- /dev/null
+++ b/qa/common/inferentia_perf_analyzer_input_data_json/wrong_validation_no_batch.json
@@ -0,0 +1,104 @@
+{
+  "data" :
+    [
+      {
+        "INPUT__0" :
+        {
+          "content": [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4],
+          "shape": [6, 4]
+        },
+        "INPUT__1" :
+        {
+          "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+          "shape": [6, 4]
+        }
+      },
+      {
+        "INPUT__0" :
+        {
+          "content": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+          "shape": [6, 4]
+        },
+        "INPUT__1" :
+        {
+          "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+          "shape": [6, 4]
+        }
+      },
+      {
+        "INPUT__0" :
+        {
+          "content": [-1, -2, -3, -4, -1, -2, -3, -4, -1, -2, -3, -4, -1, -2, -3, -4, -1, -2, -3, -4, -1, -2, -3, -4],
+          "shape": [6, 4]
+        },
+        "INPUT__1" :
+        {
+          "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+          "shape": [6, 4]
+        }
+      },
+      {
+        "INPUT__0" :
+        {
+          "content": [-4, -3, -2, -1, -4, -3, -2, -1, -4, -3, -2, -1, -4, -3, -2, -1, -4, -3, -2, -1, -4, -3, -2, -1],
+          "shape": [6, 4]
+        },
+        "INPUT__1" :
+        {
+          "content": [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
+          "shape": [6, 4]
+        }
+      }
+    ],
+    "validation_data" :
+    [
+        {
+          "OUTPUT__0" :
+          {
+            "content": [2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5],
+            "shape": [6, 4]
+          },
+          "OUTPUT__1" :
+          {
+            "content": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3],
+            "shape": [6, 4]
+          }
+        },
+        {
+          "OUTPUT__0" :
+          {
+            "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+            "shape": [6, 4]
+          },
+          "OUTPUT__1" :
+          {
+            "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1 ,-1, -1],
+            "shape": [6, 4]
+          }
+        },
+        {
+          "OUTPUT__0" :
+          {
+            "content": [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3],
+            "shape": [6, 4]
+          },
+          "OUTPUT__1" :
+          {
+            "content": [7, 8, 9, 10, 7, 8, 9, 10, 7, 8, 9, 10, 7, 8, 9, 10, 7, 8, 9, 10, 7, 8, 9, 10],
+            "shape": [6, 4]
+          }
+        },
+        {
+          "OUTPUT__0" :
+          {
+            "content": [-5, -4, -3, -1, -5, -4, -3, -1, -5, -4, -3, -1, -5, -4, -3, -1, -5, -4, -3, -1, -5, -4, -3, -1],
+            "shape": [6, 4]
+          },
+          "OUTPUT__1" :
+          {
+            "content": [-3, -2, -1, 0, -3, -2, -1, 0, -3, -2, -1, 0, -3, -2, -1, 0, -3, -2, -1, 0, -3, -2, -1, 0],
+            "shape": [6, 4]
+          }
+        }
+    ]
+}
diff --git a/qa/common/libtorch_infer_client.py b/qa/common/libtorch_infer_client.py
new file mode 100755
index 0000000000..063c8dc009
--- /dev/null
+++ b/qa/common/libtorch_infer_client.py
@@ -0,0 +1,104 @@
+#!/usr/bin/env python
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import sys
+
+sys.path.append("../common")
+
+import unittest
+
+import numpy as np
+import test_util as tu
+import tritonclient.http as httpclient
+
+# By default, find tritonserver on "localhost", but can be overridden
+# with TRITONSERVER_IPADDR envvar
+_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")
+
+
+class InferTest(tu.TestResultCollector):
+    def test_infer(self):
+        try:
+            triton_client = httpclient.InferenceServerClient(
+                url=f"{_tritonserver_ipaddr}:8000"
+            )
+        except Exception as e:
+            print("channel creation failed: " + str(e))
+            sys.exit(1)
+
+        model_name = "libtorch_int32_int32_int32"
+
+        inputs = []
+        outputs = []
+        inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
+        inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))
+
+        # Create the data for the two input tensors. Initialize the first
+        # to unique integers and the second to all ones.
+        input0_data = np.arange(start=0, stop=16, dtype=np.int32)
+        input0_data = np.expand_dims(input0_data, axis=0)
+        input1_data = np.full(shape=(1, 16), fill_value=-1, dtype=np.int32)
+
+        # Initialize the data
+        inputs[0].set_data_from_numpy(input0_data, binary_data=True)
+        inputs[1].set_data_from_numpy(input1_data, binary_data=True)
+
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT__0", binary_data=True))
+        outputs.append(httpclient.InferRequestedOutput("OUTPUT__1", binary_data=True))
+
+        results = triton_client.infer(model_name, inputs, outputs=outputs)
+
+        output0_data = results.as_numpy("OUTPUT__0")
+        output1_data = results.as_numpy("OUTPUT__1")
+
+        # Validate the results by comparing with precomputed values.
+        for i in range(16):
+            print(
+                str(input0_data[0][i])
+                + " - "
+                + str(input1_data[0][i])
+                + " = "
+                + str(output0_data[0][i])
+            )
+            print(
+                str(input0_data[0][i])
+                + " + "
+                + str(input1_data[0][i])
+                + " = "
+                + str(output1_data[0][i])
+            )
+            if (input0_data[0][i] - input1_data[0][i]) != output0_data[0][i]:
+                print("sync infer error: incorrect difference")
+                sys.exit(1)
+            if (input0_data[0][i] + input1_data[0][i]) != output1_data[0][i]:
+                print("sync infer error: incorrect sum")
+                sys.exit(1)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/common/nightly_email_helper.py b/qa/common/nightly_email_helper.py
new file mode 100755
index 0000000000..bc401e56d4
--- /dev/null
+++ b/qa/common/nightly_email_helper.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import glob
+import os
+import smtplib
+import sys
+import tarfile
+from email import encoders
+from email.mime.base import MIMEBase
+from email.mime.multipart import MIMEMultipart
+from email.mime.text import MIMEText
+
+
+def send(
+    subject: str, content: str, attachments=None, files_to_tar=None, is_html=False
+):
+    FROM = os.environ.get("TRITON_FROM", "")
+    TO = os.environ.get("TRITON_TO_DL", "")
+    if FROM == "" or TO == "":
+        print("Must set TRITON_FROM and TRITON_TO_DL env variables")
+        sys.exit(1)
+
+    msg = MIMEMultipart("alternative")
+    msg["Subject"] = subject
+    msg["From"] = FROM
+    msg["To"] = TO
+    if is_html:
+        mime_text = MIMEText(content, "html")
+    else:
+        mime_text = MIMEText(content)
+    msg.attach(mime_text)
+
+    if attachments is None:
+        attachments = []
+
+    if files_to_tar is not None:
+        with tarfile.open(subject + ".tgz", "w:gz") as csv_tar:
+            for filename in glob.glob(files_to_tar):
+                csv_tar.add(filename)
+        attachments.append(subject + ".tgz")
+
+    for fname in attachments:
+        p = MIMEBase("application", "octet-stream")
+        with open(fname, "rb") as attachment:
+            p.set_payload((attachment).read())
+        encoders.encode_base64(p)
+        p.add_header("Content-Disposition", "attachment; filename= %s" % (fname))
+        msg.attach(p)
+
+    mailServer = smtplib.SMTP("mailgw.nvidia.com")
+    mailServer.send_message(msg)
+    mailServer.quit()
diff --git a/qa/common/perf_analyzer_input_data_json/float_data_with_shape.json b/qa/common/perf_analyzer_input_data_json/float_data_with_shape.json
new file mode 100644
index 0000000000..8654c8d021
--- /dev/null
+++ b/qa/common/perf_analyzer_input_data_json/float_data_with_shape.json
@@ -0,0 +1,69 @@
+{
+    "data" :
+        [
+            [
+                {
+                    "INPUT" :
+                    {
+                        "content": [1.0],
+                        "shape": [1]
+                    }
+                },
+                {
+                    "INPUT" :
+                    {
+                        "content": [2.0, 3.0]
+                    }
+                },
+                {
+                    "INPUT" :
+                    {
+                        "content": [4.0, 5.0]
+                    }
+                },
+                {
+                    "INPUT" :
+                    {
+                        "content": [1.0, 2.0, 3.0],
+                        "shape": [3]
+                    }
+                }
+            ],
+            [
+                {
+                    "INPUT" :
+                    {
+                        "content": [1.0],
+                        "shape": [1]
+                    }
+                },
+                {
+                    "INPUT" :
+                    {
+                        "content": [2.0, 3.0]
+                    }
+                },
+                {
+                    "INPUT" :
+                    {
+                        "content": [4.0, 5.0]
+                    }
+                }
+            ],
+            [
+                {
+                    "INPUT" :
+                    {
+                        "content": [1.0],
+                        "shape": [1]
+                    }
+                },
+                {
+                    "INPUT" :
+                    {
+                        "content": [2.0, 3.0]
+                    }
+                }
+            ]
+        ]
+}
diff --git a/qa/common/perf_analyzer_input_data_json/image_data.json b/qa/common/perf_analyzer_input_data_json/image_data.json
new file mode 100644
index 0000000000..8c3959957e
--- /dev/null
+++ b/qa/common/perf_analyzer_input_data_json/image_data.json
@@ -0,0 +1 @@
+{"data":[{"INPUT":{"b64":"klkPAP/Y/+AAEEpGSUYAAQEBAEgASAAA/+En4kV4aWYAAE1NACoAAAAIAAYACwACAAAAJgAA\nCGIBEgADAAAAAQABAAABMQACAAAAJgAACIgBMgACAAAAFAAACK6HaQAEAAAAAQAACMLqHAAH\nAAAIDAAAAFYAABFGHOoAAAAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAABXaW5kb3dzIFBob3RvIEVkaXRvciAxMC4wLjEwMDExLjE2\nMzg0AFdpbmRvd3MgUGhvdG8gRWRpdG9yIDEwLjAuMTAwMTEuMTYzODQAMjAxODowNjowNSAx\nNzo0OToyMAAABpADAAIAAAAUAAARHJAEAAIAAAAUAAARMJKRAAIAAAADMjgAAJKSAAIAAAAD\nMjgAAKABAAMAAAABAAEAAOocAAcAAAgMAAAJEAAAAAAc6gAAAAgAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADIwMTg6MDY6MDUgMTc6\nNDg6MzMAMjAxODowNjowNSAxNzo0ODozMwAAAAAGAQMAAwAAAAEABgAAARoABQAAAAEAABGU\nARsABQAAAAEAABGcASgAAwAAAAEAAgAAAgEABAAAAAEAABGkAgIABAAAAAEAABY2AAAAAAAA\nAGAAAAABAAAAYAAAAAH/2P/bAEMACAYGBwYFCAcHBwkJCAoMFA0MCwsMGRITDxQdGh8eHRoc\nHCAkLicgIiwjHBwoNyksMDE0NDQfJzk9ODI8LjM0Mv/bAEMBCQkJDAsMGA0NGDIhHCEyMjIy\nMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMv/AABEIAMAB\nAAMBIQACEQEDEQH/xAAfAAABBQEBAQEBAQAAAAAAAAAAAQIDBAUGBwgJCgv/xAC1EAACAQMD\nAgQDBQUEBAAAAX0BAgMABBEFEiExQQYTUWEHInEUMoGRoQgjQrHBFVLR8CQzYnKCCQoWFxgZ\nGiUmJygpKjQ1Njc4OTpDREVGR0hJSlNUVVZXWFlaY2RlZmdoaWpzdHV2d3h5eoOEhYaHiImK\nkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4eLj5OXm5+jp\n6vHy8/T19vf4+fr/xAAfAQADAQEBAQEBAQEBAAAAAAAAAQIDBAUGBwgJCgv/xAC1EQACAQIE\nBAMEBwUEBAABAncAAQIDEQQFITEGEkFRB2FxEyIygQgUQpGhscEJIzNS8BVictEKFiQ04SXx\nFxgZGiYnKCkqNTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqCg4SFhoeI\niYqSk5SVlpeYmZqio6Slpqeoqaqys7S1tre4ubrCw8TFxsfIycrS09TV1tfY2dri4+Tl5ufo\n6ery8/T19vf4+fr/2gAMAwEAAhEDEQA/AON86l86mYjxIfWneYaYBvJq7b6Tqd0R5GnXcoPQ\npCxH54pXGlc0k8F68QGe0SMH+/OgP5ZzV6LwVqJH7y5sov8AekJP6KaXMiuVlhfBTY/eapbD\n/dVj/hTx4MiHXVl/CA//ABVLmHyijwZAf+Yv/wCS3/2VSJ4KhUgnWP8AyW/+yo5mHIiX/hCL\nc8jWBn/r2/8AsqkTwOnbV0/GAj/2ajnDkLH/AAg5b7upQH6qR/WoJ/AN+4/c3dk3Hd2H9KSm\nPkM6TwFryfdigl/3JR/XFVp/COv243NpsjD/AKZsr/oDmq50TyMoz6fe26gzWVxFjqXiZf5i\nqzMN1WmS0NL80okpkjhJ708Se9UIcJDUgmpoRIs1SiamMeJsmrMM3NNEsvpMR1qYTitBHkQf\nPenBjmuM2NHStPu9XvUs7KIyzP27KPUnsK9H034d6faIH1e6a4kxnyoTsQfj1P6UmxpHQwxa\nVpe0WVjbQFRgOqAt+fWmXGqlgT5rk+mKLdy79jNku2fPJqAyE9zSGG8gcd6XzCRjJpAKGqRZ\nDSAlWRqnSZx0J496Bkyzv61MLhqAJkumHQkfjVpNQkH8X51Iyyl+2edppktrpl6S11YW8rHq\nzxqT+eKNgMe58D+H7stsSa1c8gxSHH5Nmub1L4b6jApk0+4ivFH8J+R/1OD+daRn3M5Q7HHT\nwT2kzQ3EMkUi9UkUqR+dMD1sZMeHpwamhEykkcU8bqYDkYlsVfgHzCqQjRlXCKR9KRQfWtbE\nXPJl+tSrj+9XCbnpnwqvNPhlvbdmUXsm0ru6lPQfjXfahYXKIZIVMidyvJqb2Zoloc5M7biD\nkHvmq7N70wGZpN1IYu6lDUgHbqkU0hkqtUqtQBOrVIDQMkU1KKQEg9qlVmHegCVZWFWI7gjG\nDQBX17Q7XX9LdLhVWdRmKbHKn/D2rw5y0crRt95SVP4VvDYxmtQElSJJk4rRMixOJWXoKkWR\nm69KdxWLETovarcc6iqTE0aK3CSQYzzimRzrjrW10Z2PKA1PDVwHSNM09vOlzbStHLHyrocE\nGu60L4yanYIsOp26XSjgyA7W/wAKTVy0zvbL4ieEfEAVJykEhXkTjbz/AL1aI0zQ9QXfZX4G\neRtcOKjVFaMgm8KzjmC6hlHvlTVKTw9qcf8Ay77x6owNFwsVn02+j+/aTD/gBqAxuhwyMp9x\nRcLCg08GgZIpqVW5pATo3FSqaAJFNTLk9KBliOKVukbn8KnW2m/iXb/vHFAhdsKH97dRL7A5\nqtc6/oumDMtzHu/22H8qcU2xN2OU174m2wt3h04+bOwwHH3U9/c152S7sWblicmulpRVjnu2\n7igGpFyDmgZKGNSK9AE6NU6GmhMvW4Plk+9MbKyfWtFsS0ebClrlNSaGPzElHouf1rMlTDGk\nWtiHocg4qxBqd7anMNxIv0amM17Xx1r9pgR6hOAO2/P862rT4teIbfG+dJR6Og/pS5UF2bVv\n8bL9f9fZQP8A7rEVow/GyFv9fpv/AHzID/MVPIPmLi/GHQ3H72wlB/3ENPHxW8MuebVue5gX\n/GlyMLj/APhZ3hUjmAD/ALY0n/CzfCv/ADyUf9sTRysOYT/hZ/hgdI1/78mmH4reHU+7GPwh\np8jDmIn+L2iL9yNvwhqpL8ZLPnyYZj6fKBRyC5jJu/jLcEHyLUk/7clYN18VdduMiPyogfQE\n/wA6pRQrmHc+LdbvifNv5sHsp2/yqoLmaVt0kjMfVjmrWhL1NCxffOi+pFdCKpkDwKdigBae\nBTAnSp1poRfQukIzgLTWORn0q4bCkcjc+GNWs8/aNOuowO5jJH5is9rN1OCCPqK5LmtixYwM\nJpFI+9Gw/r/Ssi6TDkU+pS2KbCozTGMNNoEJSUAIaTNACZNG4+tACZPrRk0AJSg0xCUUAPWp\n1bimI1NF+e/XJwFBJrohOM8KT9aGwtceLtF6qPzo+3xDrspXHyoeupWufm2fnirUc1pN/q5c\nH3ORTUiXEkZJUIxjB6HsanhJ43Y/CtCC9JMHVQv401DniqgKZ7Lsz2qvPplndArPaQyg/wB9\nAa4TqOf1nwdoy6Zd3FtYRRXEcLsjR5HOD26V4DqMeyZh71URMy2FRmtBEZptAhKQ0ANNJQAl\nJTAKSgQUUAFKKAHCpRzTES29ybeRip5IxVk6lIRzJSY0RNqB/vMajN856Z/OkMYbmQ9jTkvZ\nYzkMymgDa03xPNbkJN+8iPUGuutLiO8iE1vJuQ/mD6GtIsza1LAc5qxGe9aRIZ7dilIwK4Tr\nGTRCWCSMjIdSp/EV8ya5AYr2VSMYYinHclmA4qFhWoiIimmgQlNoAQ02gApKAEopiCkoAKUU\nAPFTQrubFAiGbImYUiRtIwVQST0AGSaBmzaaESA1y23/AGF6/nWpHp1pGoAgQ+7c5/OmkS5E\nv2O1x/x7Q/8AfsVBNo9pODtUxN2K9Pyp8olIwb7TZrGXbIBg8q69GqbR9Wm0u7Vskxnh09RU\np2ZbV0egQyrMiyowZHGQRVtDxxW6MWe64pcVwnWFfO/jq0+zeIr6MD5RM2PpnimtxM4qUYNV\n2FaEkRFNNMQ0000AJim0AFJQAlJQAtJTAKUUCFq9Yx75RQAp064udRkiijJIPJ7Ae5rorLSU\nso8AbpD958fy9BSE9i15R9KPLNWQBQ4p6oapCYl7bR3WmyROPmHzIfQ1w8iEMynqtZPc1Wx1\nnhC+MkMlo55j+ZPpXXRDNbJ6GdtT3WlrjOkXFeIfE+18rxJO2OJFVh+QprcTPM5l+Y1VYVoQ\nRNTCKYDabQAhpKAEpKAEpKACjFABS0wFFbmgwebdRrjqwFJiPeoPhjbCzjKXbRzMoaQGMEZP\n+HSqtx8Nbxf9Tc28v+8Cp/rWakU4mVceAdXhBP2LeB3jcGsmfw3eQZ82zuI8d2jOKtSJcSi2\nmHP+NJ/Z7gdKtSJ5SnqMRtrbcRjLYrg7zAvG96l7lLYv+F3MevwqOjhlP5V6XFGQucVaegmt\nT2/FKBXMbDq8p+LVt/plrMB96HGfoT/jQtxM8euB8xqk4rVEETCmGmA0000ANpMUAJTaACig\nBKKAClxQA4Dmu48AWIvPEFjGRkNMufpmh7Atz6epayLCikBXlsbSf/XWsMn+9GDWdP4V0W45\nayRT6xkr/KncLHk3xVs9O0WaysrNpPNZGmkVmzgdF/k1ePXTbpwfU1oiWbfgmxm1HxlYW0Cb\n5HZiFz6KSf5V7U3h68hBD2cwA7hSR+lO9hWPSQaXNYGotcD8VLfzNItJsfckZT+IH+FNEnhl\n0uHNZzitUSQsKYaYhhpCKAGmkoASkoASkoAKKAFxSgUAOA5r1j4Q2ok8QRSkZESM/wCmB/Ok\n9gR7wHpwasyxwNGaQC1Fc3MNnay3NxII4YULyO3RVAyTQM+WPGHiR/EniC81E5CSNtiU/wAM\nY4UfkPzzXJud0/sK1Rmen/AzS3u/Gk+o7CYrK3b5vR3+UD8t35V9GColuXEr0VBQ6uY8f232\njwpO3eJ1f+n9aYj58vkw5rKkHNaIzIGFRmqAYaSgBppKAEpMUAJRQAYoxQAuKeBQAuMGvV/h\nfq1rpF5D9pIUXYMSuTwp4P8AgKTVwPbg4p6vWZQ8PTt9IYNKsaM7sFRRlmY4AHqa8H+KPxHX\nWi2i6RIf7PRv30w/5bsOw/2R+pqorUGzyl5MKSevaoUGBnua0IPp74S+GG8O+Do5biPZeX5+\n0Sg9VXHyL+XP1Y132aye5oivRUjFzWZ4hg+1eHr+LHWFj+XP9KYj5x1NNsrfWsWUc1oiGQOm\nFzmoSKoQ0000AJTaAEpKAEooAXFGKAHAVIooAHGBVldVkga3UH5Y6aEz2fwn8SIfskNtqZYq\nAAs6jOB/tD/Cu8ttf0m7XdBqVq/t5oB/I81Eo2HFliTV9PgTfNf2sa+rTKB/Ouf1X4l+H9Mj\nPlXDXsvZLccfix4x9M1FrlXPJ/Fvj/V/EqtAXFrZH/l3iJw3+8erfy46VwsvAya0SsSymwy2\na9F+FfgR/EusLqV9Ef7KtHBbcOJnHIT3Hr+XeiTshrc+kwafmsjQgNFIAqOZBJDJGf4lKn8R\nQB8261FsupFPZiK56Uc1ojNld84xUJqhDDTaYDaSgBKT60AJRQAtLQA4VKgoAdMv7vNZ75LU\nAXrDUZLQ7eqE8itlNQimXh8H3pisDXC/89B+dV5buIdwakZTaZpD8oNVZCM8nLegpgdf4G+H\nl/4sulnnDW2lo37ycjl/9lPU+/QfpX0hpmn2mk6fDY2MKw20K7URR0/+v71nJlxLwang1JRH\nSUhiUhNAj5/8ZwCDXr6MD5RM2PpmuOmHJrSJmyq5OMVCRVCIzTTTAQ0lADaSgAooAKcKAHCp\n0FAF23gE7BCM7uK60/CO6ltVli1OFJmGfKkjOB7Fgf6Ur2A5vUfh94j04Mzae08Y/jtj5mfw\nHP6Vz8ttc2zlJopI27q6lT+tO4DQxHUH86cJD2jz9aANvS/D3iHXMR2VhK0XXeUCoP8AgR4r\n03wt8JLOzkS612VbuUYIt48iMf7x6t+n41LZSR6pbrHBCkMMaxxooVEQYCgdABVlXrMolDVI\npoGITTc1IxM0wtQB4t8R4PK8R3DAcOFcfiK88nBya1jsZvcqNUJqiRhpppgNpKAGmigBKWgA\np1ADhU0fWgDofDdv9p1i0jxkGVSfoDXtYepYx4emyxw3MflzxRyof4ZFDD8jSAonw9obnLaP\nYn/t3X/CrFvo2k2rh4NMs4nHRkgUEfjigZqq9WI2OaQyyjVZVqQyZTUymkMTNMJqRjSaYxoA\n8s+KEBF9bz44aLGfcE/4ivK5+prSOxnLcit7Y3U/lh1T5SxLZ6AZPSrS6PGykPcPG3UFo8DH\nuM5A9+lKU+UzlKxDceH7+BSwRZFAzmNs/pWSaqE1JaDjJMSm1YxKSgAooAKdQA4VNH1oA7Tw\nLHv1tHx9xGP6Y/rXqceSKhjROFzSiM0h2HiM04IaBkyKasopoAsoKsL0pDJVqZTxSGJmmE1I\nxpyaaaAOF+JNqZtHhuFGfJcg/Q//AKq8WuR8xq4kSKgkeGVZUOHU5Bras7mO6QukG+RSpIwx\n24BA+7yRjj9D7qotLmU11NDT5ii/ZDHcsMEiR4iqj2HcCq3gi0sbn+2nvbKK7FvZtMiSDuvP\nB6j8KmnpcVPdlmLSdA1jw9JrcNlNYLZ3KJcwrOXV0JUEgnkHDZ/CnP4Dtl8XXdlJPLHpUFt9\nq84MC2zHrjHUHt0FbXNLFS28G2l3pejXqXM6/wBo3ZhKMB8iZfnp1wo/OrKeD9AvNZutFtNQ\nv01CAE5mjQxsRj0570XCxl3/AIbig8PaLJBHI2pX8zIQW+XrgAD8q6O98JWQ0bUbG10yRbqz\nhjdLwqx+0PjLhSfywPWi4HmzxPEwEiMhIDAMMZB5B+lNqhDh1qePrQB33gGMia4nxwFCA/qf\n6V6JFNgVDKRZWYVOsgqRkysDUy4NAyZVFTBeRgZoGTKKlFIB61MtADc001IxtMY0AZer2Ueo\n6fNay8rIpH096+ftZsZtPv5rWddskbYPv71UGTIxnqLcyPuRirDoQcGtSC/Fr9/Bgeb5i/3Z\nBn9etVLTUrvT2nNpO0XnxmKTAHzKeo5qYwS2JSS2JbbXL2z0e70qJk+y3RDSArk5GOh7dBWr\nN44v5vDp0hreAZgW3NwufMKKeATn6j8TVWKuMtPGVxaWGk2gtImXTpjKrFjl87uD6feqaTxy\n6TXdzYaVaWl7dZ8y5DM789cZOBSsFygvi/U0i01YxAh09GWB/LyeRgk5yM4rN/tbURcPcLfX\nCzOCGdZSCQeozTsBVZ3kbc7MzYxljmgCmIcKngR5pljjUs7HAAoA9X8Pad/Z1gkX8RGWPqa3\n0JxUMpEyk1MjH1qSizG59atRuaBlpGNWUY4pATIalFAyQY6mpFoATNNNSA01GaAIJBkVxfjL\nwmmu2/nwBUvYx8pPAcehoTswaujxW+tJrO5eC4iaORDhlYYIqk3BrdGQw0w0wE+9TTQIQ0lA\nBRQAU7NAEkMMlxKsUKF3Y4AAr0nwv4S+wqt1dgG4YcD+7SbGkdlHBgdKnWE1mWSCE+lSCI0D\nJFQirEeRQBajNXExSGTripRSAeOtSqaBjaaTzSENOc0w0ARP0qu65pAc74h8Kafr8P79Nk6j\n5JkHzD/EV5Fr/gbVtFdnERuLcciWIZx9R1FXCVtCZI5YqwbBBB9KY30rUgbmkpiEptABxRQA\nqqzsFRSxPQAV0ukeCtT1Iq0sZt4T/E45P0FJuwz0jRPCdlpEY8uPdLjmRhya6FLfHas2y0rE\n62/tUywe1IZKIPaneTQAeTQI+elICZFx2q0i0DJlFSjpSGPFSLQAnTvmmk80CG0hoAiYVEw5\noAjK1E0YYYIyKQHP6p4M0XVWZ57NVlP/AC0j+U/pXIah8JkfJsr8r/syrn9RVqTQnG5zd18L\n9dhJ8ryJh22vj+dZkngPxHH105z/ALrKf61amiOVkH/CF+If+gXN+lSx+A/EUp/5B7L/ALzA\nf1p8yDlZp2vwv1mXBmkghHfJJNb9j8KraPBvLqSUjqEG0UuYfKdVp3hPTNNA+zWkat/exk/n\nWutqq9qm47Eqwj0qVYaQyZYhUwioAf5YpfLFIBPLpClACqtTqKBkyjinAcUgHCpBQMaabQIK\naaAGEcVGRQAwimlaAG7KQpxSAaYh6Uwwj0oAaYR6Unkj0pgJ5XtR5QoAPLo8umA4R09UoETK\nlSBaAF20YoAQim4pAKBzUqCgZKBTqQCingUxn//Z/+Ex6Gh0dHA6Ly9ucy5hZG9iZS5jb20v\neGFwLzEuMC8APD94cGFja2V0IGJlZ2luPSfvu78nIGlkPSdXNU0wTXBDZWhpSHpyZVN6TlRj\nemtjOWQnPz4NCjx4OnhtcG1ldGEgeG1sbnM6eD0iYWRvYmU6bnM6bWV0YS8iPjxyZGY6UkRG\nIHhtbG5zOnJkZj0iaHR0cDovL3d3dy53My5vcmcvMTk5OS8wMi8yMi1yZGYtc3ludGF4LW5z\nIyI+PHJkZjpEZXNjcmlwdGlvbiByZGY6YWJvdXQ9InV1aWQ6ZmFmNWJkZDUtYmEzZC0xMWRh\nLWFkMzEtZDMzZDc1MTgyZjFiIiB4bWxuczp4bXA9Imh0dHA6Ly9ucy5hZG9iZS5jb20veGFw\nLzEuMC8iPjx4bXA6Q3JlYXRvclRvb2w+V2luZG93cyBQaG90byBFZGl0b3IgMTAuMC4xMDAx\nMS4xNjM4NDwveG1wOkNyZWF0b3JUb29sPjx4bXA6Q3JlYXRlRGF0ZT4yMDE4LTA2LTA1VDE3\nOjQ4OjMzLjI3NzwveG1wOkNyZWF0ZURhdGU+PC9yZGY6RGVzY3JpcHRpb24+PC9yZGY6UkRG\nPjwveDp4bXBtZXRhPg0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAog\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAK\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAog\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAK\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAog\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAKICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAogICAgICAgICAgICAgICAgICAgICAg\nICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgPD94cGFja2V0\nIGVuZD0ndyc/Pv/bAEMAAwICAwICAwMDAwQDAwQFCAUFBAQFCgcHBggMCgwMCwoLCw0OEhAN\nDhEOCwsQFhARExQVFRUMDxcYFhQYEhQVFP/bAEMBAwQEBQQFCQUFCRQNCw0UFBQUFBQUFBQU\nFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFP/AABEICdkNIQMBIgAC\nEQEDEQH/xAAfAAABBQEBAQEBAQAAAAAAAAAAAQIDBAUGBwgJCgv/xAC1EAACAQMDAgQDBQUE\nBAAAAX0BAgMABBEFEiExQQYTUWEHInEUMoGRoQgjQrHBFVLR8CQzYnKCCQoWFxgZGiUmJygp\nKjQ1Njc4OTpDREVGR0hJSlNUVVZXWFlaY2RlZmdoaWpzdHV2d3h5eoOEhYaHiImKkpOUlZaX\nmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4eLj5OXm5+jp6vHy8/T1\n9vf4+fr/xAAfAQADAQEBAQEBAQEBAAAAAAAAAQIDBAUGBwgJCgv/xAC1EQACAQIEBAMEBwUE\nBAABAncAAQIDEQQFITEGEkFRB2FxEyIygQgUQpGhscEJIzNS8BVictEKFiQ04SXxFxgZGiYn\nKCkqNTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqCg4SFhoeIiYqSk5SV\nlpeYmZqio6Slpqeoqaqys7S1tre4ubrCw8TFxsfIycrS09TV1tfY2dri4+Tl5ufo6ery8/T1\n9vf4+fr/2gAMAwEAAhEDEQA/APCppT5aknLsagbnBGVycZp7/wCoAHJ7lqduG2NWG1e4zyPe\ntLHnD8hsmIfd7+9NEnmKd5G71NIdrDCjaT3z1pnnBosmPnp+NMkfIudrKuc8baWZiqhV+fB6\n1HJG7MCxKhOcD0py4VvMZ1Vj0BNFgJEZG+U9W4B96aGWNdhyXz2pDtj52/NnOaf5ygMy9+tA\nD12mTP3jj7tN3HJizyBnf/Sm+SyqWHyjrSoxbO48459cUwFjYmUsRiNf4vWnqVk3E8qah8wF\nWbGdp65pdpYcfL654zSAFXaFYZZs/pUkc25SjcEjIqMnEgXGSaasWee44FFwJUQlEY/Ow9aY\nse5ZG2jdn16USMVjC9W7+lPjVY0BQ8N296aAD+7VNo3FuDntRGzswG3JBxxxx3pVLH+HBzgE\n+tRCdmZg3UHGamwXZYZAqnZ9wnpnmiRfurn5xzkVGwVWyfvkcCnRqzZ3HB9KQvMZJ97KtiTr\ninFj36/pS29qG3O5+fpSACPK546Aj1qhjm3buQFTGaIxuJcnBb1prL8yIT5meCKAwwd68Lxi\ngBZCUbAO4+npRHMdpjP3qjjVVJBG4MOD6UM20AoM7RwaYFlG3R4DbCozUKt9ojA8zDlup7ih\npCWBxsyOc96TenmEBOM8CpsKxPuG/g8DjAp26Pdhxlh0qCMIGJI2855NKZdzcjmmKw7crSBy\neOlIc4UL1DZJPpUajzN7YwO1SMuRGV5X+KgY9tkilgNp6n6UcJgJlu4aoI5CAxlH7puKlOVi\nLKePug0AP+9GSTl2bG6iPEMnA3EcFqZ5hhUqx4XpgUkWArFzkdQBTDclzvjHck9KaJNyk55z\ntxSHOePukVG/zKFYbec8fzoFYmmwrbS3DDO6lmc5jBTB/nQyrJJvY4OMA0JIGbd0jHH40FBL\nllZyNmTgCmsoVQRzmo94kznnvTmcKqlctnrntSAkba0YdMqVODTiqpJlW3cc4pArMqgHPrQQ\nh+TPTndQIeZPIUkjlumaiQBEIOTuOfpT5Nu4c7sjApvMLeW4znHzCgB5jHVjnPG0nrSx79oz\n8oU8CmKfMmIcfKtI0gZmG392OjZoGEavtk3HAYkgUbmBwUyQOtLuDEAHJHIpyzFmIcbfY0AR\nrhsGTgA5ApzMHzJjJqMKWd+foBSpvVcdaY2OZxgEruGadIu75QuQOlM8zy7hoz06ZHTNKrTb\nSzAKM4BBouSSx/NyvLY5XuKjXaEYjg96csbrIshYFj6UrKik4GOeRSKQH5VGDlm/hpjZZdg4\n74pTzJk454C0ehUFRnHvQUNZFD7oxxj5qXarbcHb7mk2mLcWfg04yJ5ahhkCgBNwbPPI4FOj\nY7SOMA9TUWTDInO4MetPbb5j84bB5pgJGF3biMMR0FM4+ZzyenNPjUQxoO2KayDDIeT1DCkI\nSON3YqOmMmiTO1doGe9NZ3jj56HvQu1+FB9zmmMdIyp9wfvWqJVb5j949DTpVDcDg/3geabH\nuWQ84XH50AOcJ8vGCaFba5BAyB1FIx9egpvlqDwcE9aQC+Zks2cBegFDbdwkCjcR603asjEY\n+b9KfHgyNlc7VpgNdlZNqnn2pEVWULuIwalZVXDbec8019pVjtwR0oAPMLfMV4BxTZGXpjrS\nnDMdx2qBwKa5TIO7tzxSAF3KeOVNO8wJEwOSc9KauWRFH8XNP2hDnG7HrQAR7W5PWiPL5UnJ\nznHrT8lc7sEHmoTGSTtYHuKAFVn3PkFSeMUiR7lOPvY70Fz95uvTijzCpII59qAB1YbNgyB1\nNKsm+RgQFY1F5jjhTkdcU9seYCBnPX2pgKrqd2QTxjNMWZlj+6Rjoae+3gdRSMocsM7FxSAa\nzEMQwJFTfLuAPp0qNXdVyVDY5qblsMFwWoERjbuy2QnSkdhJxnocAU+QlkKOv0psaqMEHkda\noB7MdoHahm3jGM96YytI4APGc/WpOcgAYGfvCkMdG3y/Kv1qUNuQkDPvUZdlY7OVHUVLH80f\nGAGqgHbTtVhzx0p8fbCkCo1yCFDfXFOXcWGfuj0qWAFS3bHPWrCueOhOOTSFQACDwe1ORV3E\n9xQPqOkYKq55JppRd6kc0nO4bjhfWlUhm3c/LSBkpYsQSu0Zpsi7uQcetM2nJG77xpwQbjuO\ncU7jD5VUcY+nWl+R1+9xUO4p849cYqZMhsdj2pEgrKP4c8UxYzywp0g/A05VOAc7f9k0hkaq\ncfjmpdrduD1pJJGZSoGQeM0Sfu1Ub8tjoKZQjFmbcTg9ABTVbruGTnoafvzIQRheOe1CouSD\nwD0FPcAD7mK42+hqRo8FSWwaj2n7pAAB4qVcnB6+9IQN+8kyO1NZhtxuOQc5FAmGSp4OcZpW\nZUxjkZ5piIHIY5NEY6kjn0p/ytISfu+lIzBs4pCE2ZkOOlN3BQ28YweDSp90MDnmmSKeAeWz\nkUwHKzKSByp9aezLleM01tyrxyc80xtyqAoyxPIoYEizAZV+tScqvA3EioXJbHyfd6tTtrTN\nkNhaBDixeNWPHahZAPuru96b/HgdqFVthwDnPakBKsnyYB201pDkDOR60qk5HHHQiiPDbvl6\nHmgYjxDaTnJpB8zLt4Yc1LuVmJ6cY4pmX2hmA9AapALN8ylj17VHIuVBB57gU7O5dp9aRQY/\nmzQwFZtmCOeOaXzMgENgd6byWDDkdxSKPlbtzn8KVhakqsuNp4FL5anG3gqajbPy4O7vS7xk\n/wABP61QyTdG2SoIbNNY+/FJu4J2kDpmkyQuzbnjINAEqlsDace1NbduyRlTTQz7QMYb1o3M\nzDBoJsODjkhsY4ApkcgUMT0o2lfmzuJPSpGZCoBH4UAKdpQMg9iaF/d+4xTVbCnb93vSGQsg\nKjjPegQ7aV+YNmo2mKowAye+aWSTG0L1pvVfmfB9KBiedhVK59xTnUbtwHBpGxGDtOTSx4O3\nLdabAVpAuAoxxQrFeAuHPc0i7WMi9SOlLuOAc8YqbFBhl5JwD/DSSLnbu/8ArUFtybmprZZc\ncmmIkVtvBHI6URsPugcg5qNg3GG49Kk9D0qmKw9sLgdWJzUq5bcD+FQ7grrj1p6qW37T+FZj\nLcLlkHQEcVZVjHGCx5NVbddygHjirGOm7kdqQybgKq475zUUjHccDGT0pzMduCeDStlwqHgg\ndaXQAZQ2SOoFQTSbtoAwakf7vByOlRTE+UABj3oAhuN3bn1qrtG1gCQT3BqfdtG4NkVW+6Cf\nU0wGHMcYQcnPNLuCNkDjoaGzvDHgU75SBuXqciqAFj68AilVgsgOM+1C7VYAjnNPkUdR1piG\nM3zk4yKNytg9DT4U+UnHSpI1Eik9xzigLirGXYDdkUjRl5ChQDHQ0/YWwQMelCNuLH0GKoQ0\nQlZBHGML1NKIiQe9OVQu3L7SetO2FuQ22kwGKo9ckelSKu5ssMZ6UKh2nI+X+dPjwRnGAtAh\nsce1yccVJ8oHC8UKx2nAyc0/ywY8ngDqKQwiTDDPWpEVdpDdc0xV3RjHBpzYHuaoYq/J0Hel\n4kdhjjrSQ/dbPTFKvCs3pwKQmNyIyNw49BUrqA59McU0OGQf3qG+4BjnNNBYcQTCPzNSsQwX\nB7CoVUuoUHGDU8SEHIHGaRQ9W+YYXJq516Et64qvGoVd/XnFX7WEqpIxgioYEMg3N0yMVUmi\n5JXrWiFwuABVaaPzG44qLjsUzk8k4HpTXj5LA8DkVaMJbt04NMEYVsY60rjsQhSy7j0p+35c\nt96pWh6LnpQV3LgjkUri5SBhtjAI4pH+VcKuasqpb5iuBimKpETH1q0ykiuV3Z4waX7+V709\nkKxg/e5pY2284570riYiMSQpXkVJsLNjG30pqgDc6t97jFPjH3QPmajmDlYjQIfkLcmoGtwq\n47etXWhRmIbgimbNylAM+9HMKxTWNdo4yaTyyFyODVpUAXPekaL5ee9PmKsVlzgdC1Nkj6r0\nGeas+WdwCAE1G0e7dnrVIlxKx+XqOPapVXqAPxpGYbgf4elSJ90rnntVkDMlWVgM84NPkb5y\nV4pyqVbaRTWQ8/yqhalaYbs+tU2ZWY4BXA61blUlTjg1Rb3+7nmpKQ2Ta0Yz8xzTtwkUqwIx\n0NNjXaxOOM01cncBk896QhzLtwAcgc0En72N3rStgYw2D6U3iT+LApjFky3yr96jzPl6YIpq\nn5shsFe9DAqu496oZJHJvyc8035lOAeppm4GMjpzStnau3ikIXc0bHdyKUMSMgfhSbucj5qY\nMs+6kMl+aRgCcCiRlyBu5qKHeAdxyM1IqpsL0CE+8uS/OafgY+br7U0N8wzgD6UhYhueaBDo\n2C5w3HpTtxZTnpSKFZemAaXyyucjNA7Dn/h46Ck3YPA560kYdVI654pzYZfu4YcZpgK0yqoy\nuc00SAZ7D9abuO4AqRxwaXy/MXJ4PpQMAxbOORT42LKQ3SmIoXPf6UsfcjgelMBfn3FRyuKT\ncV+ZPmx29KUqNnBw2aRclSQB1xUksd5KtkkfMepFIqlEI70q7omweT1pkc26MnaQc0CJQm5A\nD8tIN20x7ieeopPvMM5IPanxgqpB4bmmUhB94qDjFJu8uPleaDIFXbj5vWjB6MelLUA87Ayw\np2Y/J9Gpm35B35oeItIp/hHegY9eI8bsUoUq2SQQKjW4j+Zev4Uu0cMpIGeRQKxMuVQgDg8i\nmnDRgYG6k8zdkjpnAqPHfOOaYiRl+XOcmk3DADAn60b16Ac04t0HegBysi4P8NOEm35egPNM\nXjP93rilaRfTG7pQIlkV2wAMgjrmnHO5QfTHFQx7jIcZO0fhU6+vtkVSAdx0AOaTblc44Bpf\nM+Xn5ajdvmI3UyRyn5uTj2qNmPTHHrSsyOpNN3H8MdKY7CyKMAbsUNIFby93JHWljUK2WfKH\np60nl5Y7k6dKkYnzx8AYp4bqSRnH3aasf7wsx5pSsb9QQfagQ1Wbdlvu4oWRlkC84xmnYy3X\nA6YNNXKZUnLZ4qhj2zI3XGO1PX7pBGTTWA+VwcH3pxby1yeTQAu7jAHSo2IVtxHFSMw2g5xT\nZGVgeOO9DJI4WbnsPWpVbeoAO4j3qLyz5fDd+lIqqrZXKsKAJnl2rjbzQ2NynOeKh3EqSV3Y\nPWgP5intQyR8nseD3p0eISBupoZWjAHzNTmjKsN/TGTVASecDJgc4FJnHU89aiZhwQMDHWkT\nLMW6/WpYiVZFwS/NMDLJ8vQjpSSyL5f3e/Woo2DKT39aCiyG+6cd6VW2l+c89fSovN24wM0s\nYGCGGGPNNBZEit3JyKdCwZyAQT71AS3l4U96ejBfmxnPelcLIcpbazYAojZmXBPU1EzBUbjq\naVZBtC80JisT5xxn60hkCkhRu96Yrrjpk9Dim42scdKsCQzKVU46GlLEMSOQeahbrz92nCTP\nAGRRcLEgIfgZzSqNwPaoVkIPqKe2fl5wetNCFVm3dMDvU6ybM4GagVjknNEbNjbnJNICQON5\nbbnjpSyfMyvngVGr+XnjJ6e9KG3LsJoAduLcZG2h24H+RTWjGMenpS7iykZwOtMB4kLDBYil\naTJXIwh4pvnFkw3HvikU5j56elArDpPlUjdilIB246d6i3bVJOMk0ZU9sHrTAerOcjBqTvgk\njioo5syHBOKVpOndqA6jl+bj3pS2WJB+XFIz/Mcj5jTWZWT5etIZIGOVf05pzMG6HvmogxIH\ny8d6f5ijkU0SOZjG3Tdn0oWTYuO2aTcWlIB59qHxuA647UFD+Fk3k9ulJG4YnaME9Ka0n7wE\nLxSSMu4c4oJJdxbG4YUdhQyhskH5fSmRr5mTnFOj+aT1UUAOVw3zE4IGMmnwsACeCAetRnaG\nZWHvR8u1sD3xVREO2MwLn8qUdgnXuaY0yLgg4OMYoXCPn1HFA0SfdBGeaRpNpB7YpAuZACwz\nRHkZQ4xnOadwJI24PzZ9qQSBhjONp6UnVunFJgLnA60XJJWZ5GwOM1H5hKtG4+hpd23A6ime\nWVZiBkdc0BqK3yxqCakUhcK36VFJkqMDc3oanWEbgWIzjkGgCKRcZTuPek3HeOoAHWn7fm39\nuhpcqylv4elBSGr+73ljnmjr15xzS4+UgcsTkgUqtt+90oICNm8zldqtzmpDJuUr+Rpkjbdr\nDpnFFwjeWGA6HmmMkT92o53HNOV8MSR8xOBiouoBU4qSFm2njDe9MB7A7QT19akXaOM81Xdm\nVQRxk1PL0zjJx2oARgF+Y/nQcsBx9KbHwuwnPf6VJ5mSABgUwEK7mDE06ZfLAKGmhc/MDhM8\nmnLtyfQ00IQPnOBxSLyx4AxSsvP3vlxmlGGPBwCKWoxVmwpO3Pamqzx9TgHtSqoaIgnpzQ43\nBSBzigQu8lgexpIpPmbeB14ppzuVRnPem7sygnk45xRcYsc3nSnjaM4pzKqk7jn8aiwWI4wM\n0+aFfMAzii4akikMvBpWk4UFe/WoY924r0HYU7naM5GKNQsWQpXLAe3/ANeljby1wcH3qCOS\nQ9walhT7wY4BoCw9WLI2Bz1B9aWOR9oyMMaj2twqPjBqVWMuT93bRcVhNysMHkA0rOGVsfKo\nGajyFw+Op6U8L8u3tnJpFCRuGjDBiPan7hJHh+BUUnJz09KVWPcc9qYEzyfIM8gUxiAoGSAe\naG5PPFDSFGA25zQxksca7ycZGO9EZ3bkIyvtTA5PBXYwoZjHgDjnNTqIlG2NvbGKlZd0Ywdt\nVlYS5DrgZ4pdpZm7KBTAez7WJHPGKI22MJCTnGMUQ4kTb0PUGhctjcMj1oKsPSYbTzz1pTIS\nw7GocruGRjntUkcZlmJzwo4oGWOScjoBUbN5hCjgnmkDHyyQcVC8oAUg5bvQSWADtxnil3eW\nd1R7PLUtnOelLHIzKfbqKBC7V8osDkE9DU6jMIA4Paq6/u/lYEjrUkMgjOOWPaqAk24X0IHN\nO3KeV54qNpHIGONxxS7TGxX+KmJj/MAjwaZsLISxxjoaa77Y/u5b+7SHLfwkU7iHgGQBg1MQ\ntJz1pEwp5ODnmlckMMHAFMQOvlsp9+aa0J3MRjHelclj075pm8KxByCaBDuFj4bBHYU+BjtO\n7oaRmDKAF+alZSflz2oGSRsobOCabtJbJI21BCrxlgW4FTRqH696YDlxtxnvUhYAZxjJpvl+\nUwxzQpVWIHIznmgCVXPOeBjJp3mDbtU/eFRxt5gYY5JqSG3OdwGQtAE+QqrtB9PpTtwVsEnO\naTaWbcOB1xUkbxyIWB5B70hjg5bKgfSnqfuk8VGsm4cdQaeiuzHsCeKpAWUkUg55Xqal85Uj\n39Cx9KrwL5mQ3GOKm3/KF2ggGrJaLG8rGDt/GnRSBsnPHaoQrbdx4XPFKVWTBQ7RnpTETxkq\n4Yjcak3APuPemqyxqVRst70KAPvck0Ek2D91TwevtSOGjwfvD2qMbtpwcMKeuHUMeWFUA5pQ\n65ZalZVIBzhaYGUqSMn2p42iMFhxTAFkKsAKtBt2M8VUaQNwg/E0pZyoHX1qgLKskakuQadC\n21ST0P8ADVcSIylWHIqWMhvf0p9QJhIki8HG2kJRueRUUKjzCW44pFIKY3Zo0ETNlFwG3KT0\npds2/I4AGc+1RwvsDYGTjGaNz4APC07kjy25OE/GhVG35u9NB28DnNISwkC4+amFiTheeiCl\nYqJNo6etMY7VK5+XvRt2qGwAT2pASbihYLz6Cl3FmIHykDnNMydwZeGzSNGfMLk5qCkidpMw\nqwwRnFO3/LnvVWT5gD0A7VLG4/DvSZVix5qMPlUjHWoigWTOcjrxTN6hWx1oWTfB+7ylJjHb\njNyowfepFYqQQKiVXVc44/vUKxYYzUoLFz7ZJ/dWiqe6X1WiqDQ+IPMkjbO3AI5pQhmjkTPz\nA5H071G2fLUhuCMbe9OLKfmByW4r547mNUxswQk5QZ+lKqh48L8p6801chmSRMDrupTiThW4\nx1oCxNMR5W52+duwpGb5ozjc3Tp0piSbQQRuKnApf9X82cnqR6UNjHyTFeSuT6ClO1V9QeaE\nwFAY/e5zTWVeVU5PYUwsPeQxxrk89qT7QWO8Da/Ax601x+53SADHrTFO5g+Qy9OKQrD2Xbxn\nHOacQzAF+uccmmLtYsn8fqaDub7/ACw460ibE8xVWBB2nGM0SNtVQp+tVWK/KGOAD1NSbtrO\n45X0oHsiXj5nHVugJ/WmK5wAo49aRdn3j1I6U/hkYu23A4FBIkm5lL53EdFB/WjgOu4cZyfr\nUPO5SnzBhzt61JGom3lTtHfd60xkjAqrMRljz9BTFkVpOTgEdfShWD7tpJIGM9qZGwVfmXkd\n6ZRM7bZAIzlMctSx7cgksqduO9MiUswYvhQMg0rStJHknClsUCHxtjcwO7sPWmO56A89KTzF\nAztwFPDCo2lDRbz8oBzRcCQSbVIHBB5PpT9xRTgjHUnsKg3ebIW6KRzTIcs0gB74+tLqBPKx\nlVAR3yPenZwFJ+bHHHUUMfMdeOQMUizmNmBHHQ/WqFcc+0qMDcWPGaUK2794cnH3RUUcm4kE\nfSpI97ckZH1qQQqsVbcQqqvvSSSA7ckrznFM81AWBHyjtSSN8oPU9qoRJMS0ioT8jcAUpbGV\nGSo4wP50x9j4DDY+KSORtvBGM8n2pDJpCVYEHzD3FHnl+2BmouE3uh+9wM02NRlVc4HXNAyd\nXaRXGNpHPNL5zCInbuNJI2yPAB92FObayjuvqDTAjVlaPcQ3J2j61I7FVVAcj+7SL+8VkjIU\nL83zUz942H4OO9Ji3Hq25cAfN/SpIyzoVYZIPFQ/NtAON2c4Wm+aGYlSQg+8tIZPueMsOop+\nRuRVGQRy3pTF+VT81FvhvuHv3piJWjMTZ28djTOWzjuc05pGLYB3D1FRqp3cnFIaHLH8xfdg\n46UrMSq7hg9/eoizFi+MHptoVnkj3HimO5NLsjUHdye1L5oKgffb3quMSYI7Cnq+GBBGaBD1\nkcZEa4bvQvyNluMd6iZmycHHOM07zSrZ4BAxjrQA6RlXdu6n5s9qa7kBQ2cEZxTMlnHmfN3p\n/wAyqWYB1pCsO8or84fn+7SyNvkVc4OOW9aYuflYgqh/Sk+bO1ecHdk0xkm9Wbd0I4pit975\nix9KcN3zZxjG4VDIFdcg9uRQMk3dOCT6Unmbzg+tJG21QOgAyaaAfvKRjOQBQUT/AMJ+bI9P\nSo5IztyDx603cVbDc570SZbYozj3pAObdJGVJ5pyOV5A7YyaNzFsKwAA4zUTbpl2g7T3FMB7\nFdo43HPWmxsySOwGOO9N53BVXJ7+lN81SzAj6igCfdvG7o+OoqIsScD5vWnqrLgJ0K5piS7Z\nwOpxzQIaybcGM7iTgg0f3weDjApZG+cjZvX2pjNwQUPoKYwjkGdvVguG9KkSU+WcJwOKjjdF\n9+xFP4jJx0P5UMB3mfMeMZGeaYsjpG27B4pojLLx27mldd7hQc8dqQDmUKoO3cx54pzfLtIA\nPtTFLR5BHPY1LtGUYdOpxQNCIh/u9OnaniRWUAjJY9KVdskZ3AgZzikGybAK7thyMdc0mIZt\n27jg5HQ/0qN8BshSCRwamkYySFjwevt9KYsjpkD5twpgMXdAoZT9T1pN33gy7CRnik+dckjB\noUbjnGKYCSD94rKvGKFYxn5uRQY2fnOB6ChZDynb1NIB7OyqOAM9aZGxbCk8Z5pVwwyDnHWm\nxg7mCjrQA8YZjh8e1SH5o927BFRNtEgJHan/AHV4PB4NA7Ehk24IIY4yajWQx5Z04Y8GnCML\nkA5461GuWVl6nstMRLuYNg4I7VJGwbdhgQB90dc1GN0ahSNwx970pdohk3Y3ccUholRioyF+\ntK2513bcLnoKFY5GBtB5Ip4V2ZiT8uPu0xAFQL97HPQVKjEsQcKCMAj1qukZjZtw47VYhb5R\n3I70hokXLAIOcdTS7uCCc0u1mbcD164pWAwSy5oKGsqSKi55oOdrfw9s0vlrw2MfSjo24jOe\nlBLFOFRCRn/ap27vznNR7dzYYYx2NIzMCxGSKNQHKwVicBqcjB35GFxUUbDhtu76U9WKq37s\nYPb+tLUBWA4G7OOcU5/mZecsaTG1VLLtbv70iyDLEUgH79g5GMGo9wPOOM/epUztJ+9u9aVV\nIyq8j0oARfmXDDCj1pY5DIx2jJAo+ZmG7nHYil3qm7aNp/nQUO2ng5z607l+AcelNh+Y5YkD\n0FP4LcAg0wASfwlcsDTWY+W7NhSegqVDuZl9s01lEnJ4HpTJZD5fzLz2pJCy8Lx65p+zDKvd\njhahZclssVX3pCDcOc9MUpbKq+QPrTd4dNuAv9ac0e4AdT/dpgJu2tkHI65NShQrB8dRUTRq\nw2n8Kcdu0LuwFpiBpHZCoTb7d6auVjH8JzTsqzEnk+tR7WK53cZoGOSba7DHOOvrU0bMOR1x\nTI12ryOMEmlfaqqUbHHSgViVxt2k8+uKVPl3BRkmmLIeMn2FOT5l2jqDmgY7aAOBTVXOATn+\nlO4Rg3P0FNbKtx9aABsc4HWmq25S4OMHFO53btuPpRtGCccegpXAZIqh1weepNNO0M+05zTv\nLGAM5NN8sJJtHGaVxoduxHtVctjGfSk2nYqjlu7Gm7iSR90+tOZm24Yceop3EOZpIyAMMmea\ncWHylRjjmoVCt8xzlf4akBSRQy8egp3AVWJkG44pJFG0kHGTmiQswyQKbnnB6UAOBGcAZ4pC\ndrZ70HpjpTD97JNMQ/kQ4zgE0qMzcY4AzTV5UFuB2olUnBDYUUxWH/zzmk27g2cGoxhDk9Tz\nTgPPyCdvOaljE4AyDx3pY9qsADmgup+RTUbHqB+dAyVGzznAzik8wIrfLlqjhO4YPBzmpJPm\nyBwaNQHx/KoIIYnqKfuJOO+OKgRRt/umlWbnkcqPvetNE3FRmTOBn1zTWBb2Ipd29cHoeaXc\nCCO/pTAfGSsgJGTjinxo3mZVsEjmoFkwysOcVIsh3En5SaQy/bNnBPzAVa37cgLwRxms1GCK\ncHrV5ZBtUEZ460hjlyI8Nx3NLuCnhs5GQaPl25XqaXb8oXjjmpAj3MFwKiMjkEEbqlLYfn6i\nmlSGz260AVS3IG3aKj8smQ85qeSQyZ+X5c0kkKqodW9+KBEG3fz0PantbvKuABv6k0rNiQcZ\n71MoKxswPJ6irQEKwHALDBp/l4B38DtUrbWjG4446Ck27+D0FMQiqy5+UYI70+NQke7AzQsh\nk+UdR60uVb7w4XvT6CIFZ5M7eD6VKUGFGMN1p/l/KSFwCetMmZeM/jikA35hwVzzU25JF46d\nOaYjL1A/OgMFXGKoB7BmwrH5PWhV8sspHUUjcsoIytLL87YXhhQMX5+MDFTMp2gNUBmIYJ3q\nXcduM57GlYAbPBX7tPX5snFQqh8vYM4zVhV2xgYwfWjUBMFVI9eopI9xDKBlT1FK24cnmnBX\nXGCATQMase7lBxT/APlmDjoeaRQVyo9e1L5fynJxntQAoyxGDtB6mpo24cbc45zUMaleMc09\nctJznBpDLKxgAAHnPSr8K7eNvGKp26gHJPOavxlskDlcVEih7YIUjgCo2QBy+PwqVVOOTSHL\ncD86yKsV5PvZ6Z9KgVCWOTV6THJGPwqIIWOcYoGQLCWYA/rSiNmJz97v9Kn2E/e496Vo9rDD\nZzSAphcLjnHWlKswwMY9KseWdx9DSCM5JGMimBV29RjGKGVG+bueoqwsfmc9s5pnknczbfl7\nYoAj2jfyNvFKq5K9hmp1j3KSTzjvS7DwMgigZGF+Ygnj3pyqvODjdxUixhc8/jT9oDA4zSER\n/Z9igY5prQ5bAHWrKp83BwTzTtvmEbeGzzQMotb7VZlOCKqSKVGcZzWrJDlmOeKpzLtXApoL\nGVwjE9QD0NPVjknGG/lRPH8uOh3ZyKeuFf3rpijmY9ehOcn1pis3O7oKk2kqDjBzTG+ZcZBG\nKonmZVuXDKeSM+lUHwsiLnPqKuXHzZx/DzWYzbmJHHfNSWmP+0FsqVwAaRX2ljnnrUZby+gy\nWPalC7iT3pFD5HA2+rcijbuIBHBpg2jhjnFIG3NTAc3ysB2pwzI2CcIvpTeS3zDNIy9CpwT1\nFO4EkY3MRjv3pzNtOeo6U0twCM49Kjm6Dj6ikBKuF+6RTW+aMqo5zTFcrgMv0p+0jJPXrTEI\nBt75PcU4ru5UcelNkcDaUG4dzQw3SAbtueaQDkkyMPx9KdgE/LyKTKspXP0OKcq+ThZOeODS\nGPTG7bjAxmpVJ2Z71AqMBnOB0qXayMoYcevrQAhUNznBqNc7sjkdwamMm1WOMqahO7PuaoQ7\nB4YNn2NJuO/J6UhUxgBjx60rM3AH50MYZRMlQc/xZpJGBYL075WnMyyL8xyBTdo6jp6mkIaP\nlbI+YjrT94b+H/8AXTFXy36/WpGysfQbTSJsxZchd469DUaiQ8jlDTjjaCqlfUnvTgpYkHIx\nQOzF8w/d+770bhnrn1NLuWTpwR+tO2gr8wx34oKE2/N6seanEJbHGAe9NtwApDLk5q55ePmx\n/wABp3HYreTtbA6UeQdhFXljXcM8/SnfZxt4+/npUthYzRGv3lQZFL5BC5/Gr/lFmAwF9aY0\nJ5UHOT1pA0UVyc5xjsKZtBLL09zU80bxryOh61WyOvQ9qtGYkcW7IVue1Ob7oGDn+9SKu3lf\nvU44aMYbJ/u0wFiQ8knPHFOYZ2/3h1FCrhAc45qTcm4sfSmA6Nt2QDjNSjBApkLKrKcgjFSf\ndXGM96BCP+8fbtyOuaZMp8wYQEYqRlYtgHgio2UiTPOBQMZu3Agpj+VJko6h+QRjNSO25ccZ\nqL5mwG4xQFxjBU75OelS7hGQxfcccr6VGWC5xyO+RSxqGUH17UxC7TySe2aajFgBjI/Wnheo\nIwD/ABUrHao42kdqQxWAXCkfNjio9wK4IO/PahWbdz1NO4Vsg/NTuAqsqLtOcE96cdrNn7w6\nYpu4Nj1+lL5m3gD8aQDvuqq45zTplO0nOBmmKWyGxnjrTg25eTk9xTJYwZYr8vA701DuZuMH\ntmnh8/ePyjtTAqtIcfUGqAbuaNdvUZpC21TgZzSsrydOB9etNhLspBXZz3oAX7oHOPpT/MYo\nS3NRNgL680pynzIME0CHtcblwF47U3liBtxSbh1brSbmbkelIRJGy7tpH/16aPL+YnrnGB2p\nkaFoz61KuxQB070DEDKOQfalaQ+WCeaaudx4Byc09m+amHUcp3IGA25o3EjZ0UUxGIO08qe9\nOxnJI4FADOWXrTY8LuZnwOlOCls8bQKbHhjgDJzSFYlT5Yzg/N2NKrMPm20wsd3A4pQWZsk7\nQfWmMEZmkORxT12jgj8qY0m3vkg8kUsjF2zkLnpTAcqleR0PGKVWPOedpxUYVgwOae4LfcPz\nUxCs3zHnIo8zaeOuKFP8JUA96TaVJ9OxoEO3HdvXpjFEjcgg4PtTcqygNwacmVPC0wHs26RS\nG4PFMfcrMOtGR1Ixg0isPMK7sk80APVn8skfN7UvmBl54460xWaNsMD0pNwVV4wSaQCsysRg\nhlHcVMrYQuBxUC7Q52jIJpZG64zgdqpCZKu5VPQ5705WyvJx7iq8cWOWJPfb6Ui8n7/fpQMn\n8wA+9LuCrkdKjfbuHHFNydhyOKYE8bFWwfmFG4ZPHy1Cu75Sp+uakVvlYYoAVpNuBnBzT/MO\n/cOtQtls+pHFIpKq3c9DTBllZG6tTFTa7EndxnFRqx2qWOacSCxPRqQD/MLKGcfSjzQrZxg0\n3G2POQTUayF+CPrQInaTbjIzupdxaMuDjtiombMgDDj2qTy1YbkOF9KESOiaNgMjLCnbwVK9\neaghUh3waUScEnNMZYbauNv401WDZK1CM/ez8tPEiFsYwcUhEz/dXD++KSOTZkfePXmoVxuG\nT0px+Q470IaHqzPk4xQXPY4b0pjsYx13r7VFuO3kHdSAslmZx39qUkKxy3PtUO75eDnFP4bo\nABjimIsGTbgEcYyaauPunGDziolcMQxP1pNyrkqec0AT+aNxZBg9KYsnmOflzTGcoABn5jQu\nFY449aAJ1YyMCBxQzszFT+lMWfHQ/LSeeS+QNu7jmr3AsKPu7uac+CxOfSoN3VRzUqyKF4GW\n9KYh7bZEUKMkMM06TaG+U4Oaihm3cHil3BmCEcjnNUBLgr97p/epWkO0AjJ9KibHmYPHfbmk\nWRjJzyv8qQMlb936KT2qWNlEwJGflquqhlyW3c9KWIE8kkHPFAkTDocYB96GVl2k9O9Mbbyf\nvE9aZNnYvFBQ9pNuRyee1SH5VB3kA9Ki3lVyenrQFeSM46DmgVhzMGQEcHNNjULjb1oefcoI\nGKTzcEEHrTEPZSZlDHihlxn0zjNNZj1PJp6uGXGC2Kdx6i/cYMp470vWNs/gaIlVX3dmzxTV\njYE55PalcByKh5yRVmHacBhzUKx7lwxwR0qTJKryM0hiyK0cqlcY70qfNnsM0GT98NwwvrSY\nwzHPA5oEP4OD0ApGlAU9fakbaIeOS3NIG2svGVpgN2nBPrSxLwWzg96fcLjgdTzgUx8tyBgU\nFCsVmxipOFUAjJzmoxhVzil3FmCj86kpDnLBlZuQx/KnMC0hIPAoCsFG48UsYLORjHFAgMgb\nAAORR83JPB7UxWILErwBjipJPmVRk5p+YwXORk8dCaUM65UH5PWmnCsoI696cWCsx3bvakKw\n9ZA3G0bjxmnL+6x2PSomZfl2gr60M4PfPPWmOxMAVk9iKNgGVxz60zrj5smpGk2rz1BoCwm3\noQcr6UuMShg2OOfSo/MKL1GM5qOScOw9etMVi3uDZXOT60q/KysBx0qsM4+Xk+lP85uFA5HN\nA7E5kYNxz70xmY/MPvE1A8j9V6HtUyN8pDHBIpkND3lQSDJwfamxkmR8tlQeDUckiqvy8gjB\nNNLAKMHv0pkliHYqsG+9mkaYNkAZ5qFXBc5OB3ojYNnHr3qhEkbbgT3zikkkDMARypzTfMVi\ndox604YkkJJCrjk/yoESLltxB96TcSu5ecnrUS7o+G4BpFJxgcDPFINSZm6jqelLFxt5wfSo\nhLlmx94VIZNu9ivIphqWpPvbgQSO1NJzhs/Jn8aiRhtU7ct160skjbgoXC9eaBkyghmCk81L\nHK0XA6ehqvb7tzE8qafBndgnc2c0DLKuzbwO/aiM+XGVC81BJljx8vPUU9HAI3E5z1pgWIi2\n3AGDU1q5dsHn3pkSLu+bp6UqMY5GB5PagCaN8tx681LHiTcxbA6VBG25SFGD3NSx427CcE85\nqiSw8gSNBnOe1PJUKGUfhVeGRQxGckDrUxbcDnoO9UImaZVUYXnvSrIGGBxVYNvYgGnL90An\nHvVAWx+8IO7AqRsq3y4I7kVU3FRjODUrM6sB2xTJJ/NO0c9TjFLlipHcVAjbuWHIqRZOvIpi\nJVB25yKdA3Bz1qCNtq8d6kik3MQwA96oWo4sd4BHNSqxRuBUTMWcuBgdKlRdoPOaYyUM20nq\naa52hWA5PWmwt/Dkk9OaczGNSpbNAxWkxtC8DrQWZjk/cpgLONgHNSbgE2mgkTIYfWnqojmV\ng3FRSN8gxxTosfLkZJp9AZN8sbMd2SajUrGoGdwJ6U4bVGc89xTGILbhUgWEwrkk4FNkf5QE\nB+9yaZ5nIGOetOaSQtuJ49KCrEjRgtu7e9M5xlTxSbmK+oNIF4L5wP7tSAqttzxkU+NtjYJw\np7UzAEe79KaNzYJBK1LKLDMUG0HIJxQq/wAJ4qIt8vy/w+tPhk/dMW696B2ViXev94UVT8xf\neignlR8RRvGVDOcNjj/ClbKxhNuRnIpPMXcGC5DHkU5nDNHu4BFfPHf1FC9dx+Sj5GbJO1Rx\ngU1gzMsbHGPmz60qsNxG3I68UAPik++wGQf0pwwpLH+Ick1Ckhiz8uSx5FDMNxZiQBTuA/yz\nJJtVsBVzmjfna7cAc5qOZleNGXhgfzoAYbA4+RmyaYgaQSMSoJDHOCamjyqsEA5HFVo5EG4n\nIBOQfan7XUYjOT60hkxJZsgYwvNNYRmMyMSMHAbsaarOEcZ6jFNb/j3KE7k64HrQC2JRGmPn\nPyjn6mgynbwpHPHv7VGu3y1XOTUiSNztxjt7e9AD4JELtx8/fPQUK26NkJyD0NRLJ8zDguRz\n/jQwDKNvSkImQvAu1cJkfjUbKFxubLUk0m0rk7iopm4SPuxnHJApgShmwVxtB6GnbmaMKeAD\n+NMkb7uwnH3jTcncxYcN39qAJjJ5ykoNuOAvrSxuGwDwMfrUO8fLgFe2aY8jKAF5DdcU7gT7\n0OQvJ/vU37y4I3c80yNfmCr6c+9EZKTYLdDyKLgKZNrHI+TpihsfaG28BhkUxtqyOS2VPTFK\nJB5YUfe9aBh5xRQnJf8AlUn3cHnHr701QWU4A3ULIWxhsrnFAErKfkY01WMUhBJ+bkc02XLT\nFi3ygcCmQ47A7xzzQSShdylhz65pd6hUXrk9abHNtYME4zg+1NEitK7MPkXp70ATO37wkpub\np7AVHu3MSUKxnilZtu8E5RugzTW3bUUvlcZwo4oAduG3CjnpTusoB6URxtCFyy/NzTDC3mEG\nReOSRQMnwdzKxIB6Lmk5hXaG+btTF/1hLMQcdTSqwbaAct61NwYs2W2GQ7eR8wqTKKTGMsM8\nkVDuEgfjnphqFk8pQOSadxInkk8yH5GC845FEzIqrt+6OvvSMvnIFkO0Z3cUxZQs2H+cdKBk\njSBY1I4H96mq251B6dcDvRHiRXQHOCcA0m4xr8qZYGjbUZKkqLuJ4fOaIZvMV22Hd2qKSMyK\nWYgEnt2p+5oypzwB0o3EObcz7tuSBzTgxZW42g9KZ5h2ZB+ZjzSeYy8/5FA7kvmbMZjxUSsM\nlm4FIqbcgHIbn3prRsHABHrTCxMsgeQccY61DG37wgL36Gkbd78HrUjS/MRxgrQGob/3hDD2\n4pwRlmAz8mOaZkxbTnORj6VJzuYqN2BzQgEkIB+/+FOWQR5wCzNxzUHHlk/xHuaTa5kDEjGO\n1AixgKMjoBk5NNZh5PmY4PQUxQscbFm3ButO2rcKrKfLVeM+lA7Dj86orLsJpNvlsSnPbFJ8\nsmJHO4j5QR3pPMYK4xj1HemUIWO0Keuc5PSnLN8r7jn0NM+TaF5x1NSeYnRsFOwHWpAbGoaP\ngse5JpeduUGT3pVj/gRtvc5PahZBGxUDjHBpkibf3eQcilZVRThfnxTE+7mTccnmpD8p2gcU\n7FEfzLjDYHWmxRiInaS7seTQ0/7wEJkdMU4yDdjdgjnpSEBVo5Du4+lMMkkKjd82T1NNZhuJ\nDFiewp+3dgMxI6470ACxqzkg7TS/NHHjtnpTWUvkBPp604x+p244yPWgYm8lsKc+oocnzuI9\nvT5hTAFViA/HepVk8pSAcp3oAViY8unzjutLHllZR8pYZHtTTt/hGQafDJuGzPzLzn2oGTI0\ngjHAwKD+7yQvLc0mR5XzcO3OKcpeUKCAO3NIBJP3fIHykc1WUAOyqct1U9jVhl8nIIJPtUDM\nEQdj3NUgGmTOBgM4POabkNz2zjANNZwrHPAxwaauxU7565FFxC8o21TmnOxPG3J9aayrJhM4\nc8j6VJDuX5GHWkAxpSpHygA+lHmbQWBz2xTHcyZUJjnbU0aJDnDhvUe9ACE7YuRmnrjaDxzS\nH5lC/ePXHpSjy1Td1AoGNXcqk570sMgwz4II44o5ZgB9w85pPK2hueO9MRK0zADHBxmnbzI6\n54bjmmqwZhngY9KWMZYlh7CkBPyyl8bhnGaOi9wKSNH8ry8gc560vzbCW7dvWmA9cM+VbOev\ntUmQsgGfl9KjhjKgHGVPNP8ALG3P3iadiiQzGMMF6Gm7SVyScGnKVUEA7+PShl3YIBUgfgaQ\nxNu8ZXOPelWXHIHAqNcMvXaPrT1kZRgjjsRU6kscJgec5LetKFdcoTgYzmmsmeDtJ6mlh5J6\n4xSGEeUYKo69zUrKynqD60zH7vryKRvl2sOnpVASFWkU55PQVGGbywHQZ6U9pDkYOFz0pGZe\n3A70AOXEa57UbgeuQfakOMA/fWlZUPXg0rBYVcIpLZJPSjguoPSmSZ+Qqc4pzMAxJXgigB+V\nK4HzHPrSh92MLgioodqtuxx2qTzDuPrSGPMjNkJ8poMhxg8ds03zN0hJ44pokG3BOTnpTuIX\ncmcKS3v6UjKsi/MQOeaazqoxyB1qN3VlDEfhTFYlZVjBcDIpjMcDBw9N85VynbrRk7RJ1+lB\nI/cUwRj3FMk27gD1z26Um59xIGFpseNxBGfemBKqbmJxhetSMoZRhcVFuPILdqRX3cbsGgok\n8wxyAj8qOW3ZK5PIxVfzFUZGTzUxZW6CmgJF9zz1pxY5HOPUVAZATnpjgUuRgHJJ9aQWLLfN\n0O2m7f7/AOlRecVAxyB3xTmk+bdnrUsYvmeXgYzTyxbocU3duzx9KR2MeegbGetNjsG0Lzux\n60DLOJO1MXazA559qcW3SEnge1AWFaIndzz1pFVmTBOKVWD5JXAxTFPcnigXKxVYrkjr0NPi\nwkZ24JJ7UwsB05pykEcDao5K0XDlG7gzc5602bDE7eMdxTiQq8DAPNRcKD82QeuaLj5WLuPG\nfTrTiwVcnk9qUsvljjoKazbdrbcjilzBysWRjtUMc/SkZfmIzxjikE4VmGAR0wajbGzj7+ck\n+1O4crHqv7ndnn0o8z7uWwaTzEfuQO3FMYq6479j0p3QuVk28tkqAaSQhsEflUa5VtrcDHOK\nFkUZ4ORxQ2Kw9fLK5VuachIQgnmoCwOAvy+2Kc0m773H0ouIlMy89hio/OVehyKikdS/ytxj\ntzSKpl24Xj17UcyFYm85d6kfjTjMFk64PqKgTjJcbRnjml3EDAQMSeadwsWdwkUgcA9aSNht\nWMtznFVfM2yEZG32qWFwOq5I5DUgLsPyqcnIxxVuN8KpByO4qhFKWztXPHPpVi3l5DdB09qV\n0Mv+YNvAyPWo1bBLE9eKCwClRxmq+4KxHQHrQBc84CPlfmxxTGm+Ubl4qNWXywME453VEztt\nI7HnrRcCaNtsbDGdxzn2p6NGY8AEAGoPMDfdbC7ad5hXaFQkYzTRJJgZ4Gacu3v970qFWAXr\n1/h709JFk+Xr9KoNxePMx7UhmG3Yfu5603zEVcZOAfvULMnmDJ59KY7MlVsKSn3P1NDY2qR6\n1FujGSSN2ad5gY+i+npTETOu5wGb5Qaj+QbweQaiWYpk4LfWnM+4qFXIYc+1SFmOBVQAefen\n8OOOtQ+YiLtB3HPQ0GQMpxwaEwsyVvlYEHOetKcLzjLVEzAMFIwSM0NIpbr2qgJVIxleW9DT\n1U7uuD6VAJESMKOSec1IJ14HU9qd0MsxvupyspfBaqayNzjGM880+NgzA8bRSuKxaVgzc9PS\njhj6GoPN8xmIGAOlKsm7novegstIdvue5pS52471WQtJuPQVKWPljByfQ1IWJPuxtniiEtww\nOQaiSY7jvGR0NSRN3GFX0pAaVuofBar0W3yyM1mRSA4VTk+1WYX2nrWbLWxdRlzwOB1prD+I\ndKrtJ+7JBw1HmNgYOfrUFDyx6gcDrUmwPgk57iot4denNCybc45/pQMmIEq8DHrSbVHShXOc\ngdulIjDdkjB9KQwZTG2c5pqKGfJpzSYPtTd25txP4UrgCwruPPB7UNa7sgHpyKRGDZJ49Kl8\nzb0P40BYhMar83PvTtu7gDC1Ky/QjrTGBbg8D2piGqgwe4qaNWZcgY9qYuGGR8o6VIq4BG7N\nIQjcHO3P0p20rz0pF2qxXnpUi/KoAGaBkLn2+tUbvKjdjir0xYKTjjFUrj94gx+IprcUjLYF\npMn7tJg9SM805mPmMpGAKFfqM/hXVAwY9tgAOefSo2+6eMDtTvTtTM7lI7g1ZBUl+RTuPNUd\npBYY4xV2aREYhhubr9KqbTuy3K4qGUQCH5+fvYpeVBCc+opwTdJu6Uv3GJzn2pFEbLtVc4NL\nJhehwKXg52qCe+aag8xSSOR60XAYJCr5PpSldmOcs3NPjyc4HbmnKpJUg5xSuIa2/qo496Nv\nBLGl3j7zDHOPrTcfvCrDGelAxPMGzd2qUSHycjqeM1Fs2oQ3AzUq7uMDAHamAgbb8vTIo3eY\nB8uMdc9aG6Y27j60salmPI3Y6UCY+OQcYXJFLt8yNmIy3amRqQowCfenj5FHrnmgZLG22NSx\nzilXaz7nbv0pvMYwRkUKiSEL37UALMokbC8LnioVYsxAGB607GFKbsc9e9Rhv4QCfegB/lhj\nkk7QKduDRgKOfemHOdp5HtUkZG3BHFADVj4O4c+lJjbgZyBT+WGc4NMx8uAMHNAAxHmHHSnF\nzsUDpTVBbIUAj1oZQ23HagBUYnJBwM/dNTLK/TIJNQLhpCD07mn84wOD1zQA51O75Rgd6lX5\ngGY5I4AqJW3DG7LUqSKwKngigC3FnbyBu9PSrNuW3bjVSFvlGRu5qxG205xgVEmBaibAI7nv\nTmyoUsd1QwsGRsHinbt2B0NSWh755KrgdahZvLAz0Pegu4UnOR0xUTKzJjOe9NEsbcsB935h\nVKSQHnaBipXlVWYdSO1VpMFd4HJ6VoTYGYbeDzUsZ4+UYbuTTfJU8htvGcURllXLDGaBEm0h\ncDkGnr1GRmhVB4PC4zUwU7BsPHvVCGNGuAM4JqdWKjke1NkHzKWGVzTmBzz0zxTC4rZfAHC1\nHM3zKvQe1SqofnOBUUnUlTxmgLkMu2PGDmmMHMhbdipf4s8ZFRgEyHccmgBscju/OCvejcsZ\nOBjnFNIKrx0pVXZikxWF4zw+MdakH7zaqtj1qDy9sgZvmqViGGFGxvWmWtgDtucE7SOnvUiy\nfIMDL1VZmVfmPNSRsXT5vTtQFiaQMoyTg96YrDbt6EnmmqxbgjtxQqkL1yaCR+7ywAvIFJ5g\n25AwTSK+1tp6U5WXaQKAGvtVM5yTxS78YUDkjFNjVl4cbjnimq3mbucYOKpCCUH5QoIweaSY\nMf4tx7EUBWXjfwaRlZWAU4HajUY6Nn2kEge1KqksWPApqtz0596aJGLAZxzQSyVypUgcmhR8\ng520udwweDnrUcmDxnjPNAhd27hOT7U5ceWSw2nNNVevl8HPFI2Wyq9O7UAOVt2dvWiRD5e5\nu3aoWZmVSoxzg4qaRm9flp3AWM7WAHSnZ3fLyRnoO1MSQRrwMn1NInDFgcE+9Ax8kmGIx26U\nittAG3HFNWRjHyuWBoSb92dwyc9qAFdiqnqaXczx7m6f3aTe20ZFLvDAevegBVO5c9hxStIG\nCrggg0n3Oh+WhSVYHru4oAduyzAc0jNzgDB9aQM8bHgFs4xSMzsxAwMDNAiVcKpYnLYpNxkA\nUjK45FQx7iwUnJbpRgq5IJJzg0wJWLDpjZ6Uskw2q4Py9MVHGvmTFAeNuc05G2x4Zc4o6iHm\nQYAHOaM7QSTz2qOOT5ufwp8ny7WxzVCHM5+U80zeZN2TjB6GlmfcwXOCKi6t85xTKsShTGwI\n5HXFSNI23lcHOcVG2cYDAimhm3daZLJslj6mlG1Cc4zTGOCD3p3lqoJbDMfSp6jQq/N06U4k\n7Rmq+0Z9Fp6hpGO4fLiqESNJ8nTHPBojkbcx/OmN91cD2oVgo5OCeBQA9mLsGxjHFIzrvZcZ\nB5NKrCSEjOcHrTgwGAeSBQO9wCfLx+VK48w4HB7Um8qDtTPHemtujRc8t1wKbRPUfuVTlhnt\nRFubPzcCmNgQndwc55pkjb9pT7vrUgTY+UZ+9nrSrIfut93PaomAYcnjPNPjk25APH86oSJP\nL2swB5xkGkj3LGS3K0LndkHqOlMSNgSM5X9KGIk84FlXOe9L96Td2zzUW3c3TA9aejZJ/KpA\nedqyMcggdMVHtd1LE96bkFQFB61Ku4cHn2oRSHJjacjoM01ZC3z9vQ9qa0ueO3TbSgfwnhMU\nwHKDyScDtSiQLHtxk5poYclyAV4VaPLAXdvy30oAezI2B93NCrujLL1U/pTV+ZgAcmjlWYbs\ne1MNBdrHnv2p7Mxkx2xUO5+cDLChZCyqenekA5JP3m1jgVK0gaP5hk9jTCyBScbm9KXzfmVQ\nozjJHerETxs20A4x3oRtqkH5eetRc+Zub7tEn+sPPbpQImDHduBBHSntlcDODnORVbIdAVHT\ntT9vzcHPGSKAJ2ZI2JUZJpUOxDmq+8r7E1LI7ZAZecU7iFU7TxwTTxIfvHkCoTkthvlNKrmM\nkZzlaECJEkCl8+vFOZi3yg/nURKso5561IOFBxmmUOWTsfvClEjKxYDjHSmLt6ZAJpvmbc5b\ngUtRWJvMHkgjmkYBdpxlT39KhaUtwq7VIqRWG0KeR70wFLJ5hbt6Uu75wwJVTSFwoyyZppmD\nKRnA7YpATRyLtPXOe9PVg3U8jpVeNiGDHkegp6OZiWOF2nimBOzrt469zSw5UEscioZFXbwc\njrSrLlV+XIBqbjLETGRiM/KOak87dHgjBqt5m3cMfSjduQN90njmqETbiy7kGCP5U3zNw4GK\nYWY4CHJz0p3mIFwOX7igpEqyYkRj+JpykeY2W5zwKh8z5VA9aVGKzMW59KBEvmCKQs3T0pDI\nPvetJuWYnA596Ryu3OPu8CoLJFV/lYHcpHT0p7ZjUB+ST1qJZBtHGCBSSTFmUYyOtMLErMA2\nBwKNzdBwfeoZmAAOdvrmmLh1Lh/pSCxbdxtXndzRwrncOnPFQrJtiUMAcGnu+xsgbh61VwJY\n2WXJU/N2FO3DG0jDnrUKsFUODz14pwf5gzfWi4DuFbG7nPFPJOSz/dqLarc45znNEk6ykKvr\nk0wDibjODmnL3OANvrUSttJUDqetKyn7g4zQG4+OfLMWXGO9OSRUIbO3PNIwI2x/cHcmmM+5\ntvGM4B9qAJhleV6H1pE3SK3HtupnKrgcjNO+YRnb1piHrGCuA2aRlLH5U6DNIJBHGAT8x60r\nblyynPHaqIa1CKNed3XrUZy/HTnqKTJDA5yKNwDYzt9KCR25OUU5IpzYG0jgio1YquSAfUjr\nTuH4DUCJMbnXnK5qV8KxxyOlMj/vdMU0ycEYyDzQMlWFVbhuTQWDFgRmo4u3GT6GpBuWTmgQ\nRxFckHB9DU/mfLhlz2zTGYvg7cds0/IaYk/cXjj1pgJGpjU45Oe9KD8wGMe9DN8uRz3pqNuU\nkLhqBlpQJGwDt4705XBJ3noMY/rTFVsKx607ad2T370IB4kxGHZ97g8AVJ525gxBDdjUEafv\nAq9asR4SY78elaASxtu/i2jvS+YBnPIqHaOecA1YhQIypgEY5JoJbCOPy8Hue1WfMKsUwT3q\nEgs2O1N3OsinkqDTQMtZ4AAw3elYBo+DubuKiZmY7iOPUU9FCK8gGOOtUBYBHAPzHFNZS2W3\ndPWo45P3uR0x0NPZdwwPvGmQSRyHuc0/cvmD3qGKPysKfmJ7imh9u72OAaYFhWKbtjZGeQam\n3MykhNo61UjkRQW6Y61MJv3JCdGPU0wJ2m8xV5xUirtdSH6VCq5UHjHtStJv5xgKapAStJhz\nyevWnRrubO7IqszFl4OB70/eqYP3T6VQInXKqSTim+YJGB60xpEk+/u54GKXaY0+UZ+tBJM7\niSLBGPpQzN5OFyD60xd7c9u9SmQyfKBQIRGZiN33sU+NvM3AnAXp70xyep4pY8Fcg4akNEpd\nW2sOO1IrCTKrwRTU+dzjkCgK27sGpGiHKxC8DHsaQjCkZ4601n8v7xyfUU3zDswMMaiQidm3\nKDgkdKdu+U4J44IqKSULCsY5bPamtJu9vXFT1GTLu8sOQPpSht2D09RUXCjg7gaTd/dHFUBc\n3J/zzWiqmTRQB8RGQRhI0UgZ3E+p70M23buXI60xpEXJJzjt70vmAOxT50H6V86d45v3kaEH\nmnZ8lR3bOTUY24XBxmlX7uDy1AD45G2k45zmh5MnngHrUCszMAeFYGlMyw7dp+UDFAhXdWZu\nMFR0/rS7hJCAH5HPNDKY1LKdxI4FV2YiJR1LcGmIsBghYFQ64zj0+lJDLJu+4yqud3pUbMIU\nCk5weRQwO/5ycDmlcCdbgLy6/LnrmhpArMTxu/h7VC7kqAQFYnj6UKxkU4ACKeafqVYmZPlU\nEfNntRbcO/OEXq3rUTblYlWycc57VI7FFXcdoxSEHnRi6xtzuGNw9KfyjMgG1uqmoo/l3E8+\njUk02/bIDkDj60BYkZGfa8Yyx6q1OVS0m04jP93tTI5unzYBPPtSeckkjR87h3oAkEhV2wDn\npuPShYy7dck0wZZCFOF6ZNAARkye+Bj1qhEnzfMo6JyaIc7AxYHjjFRRxssjYOWzz9KWRfnL\nAbeMD0pFEm4gLjHzdfb3pqsVYoxAx0elQKzAtwoHJpFZE+bG4dR7ULcQqqsXyBgx6/WmmRZG\n2ng5pflTEihSzfpTZmLR4kCkZyu3rVFDm/dspkJ2+gpc/e9+aYrfLhiBx0NJGoViSSRihksm\nV22jdheKNzFiQMHHBpuG2/PyexoWMvIu1s4/hqRj5GCIik8sM/X2ojAK7uo9KhYblVjztbAH\npU7MdvyjAPUe9MQgcPwFy+elOTMcjKc/MOVqMcTcNt46VLhly5OfSmId8/lg4xt7Uc8tj5Se\ntNVA+SXOT2pY1KKEb149KkYrfMuWOcn8qcsQVgN2PQ0xpPnKt07Y6UMBIo2vnbQDHGNWkJZs\nYHJpAHVNpOV6hqTcdx4xkc4qNZG2mJhl88UwJAduH/j6VJt2tuB59TUe/c2wc+tKfmQg847U\nDsSqzSMzEBUA/OmRtt3Luyp/SodztGRjCginj7vp3pCsPhj224Offmgrtkz5mRj7tJGwKnBz\nhaRpNu35dxxSExfMCL15p42sAcnIpnlrFF9zr60qujKVZMZHagfmOaQLJnJ+YYzTYv8AWEE8\nKM0xWXZ09qeIy2cja2M/hTGSc+UVPBIyBRgMEZvlprKWUFSSSOlAjcLtHIHP0piJF3rIQ5Vh\n1C0n2hY42CqQxPDCorhnfDJH83TOalZz8q4ywHSkMj8w7RvOc8U5plPABAXjNDfMpZkw1NZT\nHGXI3DHIFMkVXA42nbUm5PLwikHuDUYYsilDjIyKXEm7czDaRSGPViVOBgDtSSS7myR14yai\nVmkAB+XH61Iygr1+U0FCMu3GEw2emaFXZuz68UOx+6TkjoaaueA4yaQD9w2++aVpBtEhOQDw\ntG3nngdhTF2qGDLwTgYpiHiYyLnO1c9KGT5ywOWA4pkn7tfUdKfsbZgFdxGOaBjZMhUL9WPO\nOgoGcMnAI6e9NDfII3bgfjzQx7YJbs1PoA11CgKB83UmkLH7sYJPqaX7xOc5xycUqBkt9x4O\naQCZbaTu5FI2W6HIIyaH3FTt5pCxRcKOcdaYCmXa2NnGMGnDG3HcDvSQyb9w+/6UeWec4292\n9KaAcu+P7uWyOlTRo2McB8dutVlzGB1OKlhZlX5Gyx55pAWwBJj1Apkx+YEdaRSY9xIzkUvm\nblUDlscCpAazsuSDge9Q7sghvnDGp/JLD5jgDk5qOYquMLx1yKYEG4+YTtDbemaVG3R7lX5i\neRUYVlzlcZOcinbgnK9c0xsRpNzJgEnrmpGIYMf4TRx5gC9hTlQqpAIHegQ0euOaTafJyFzu\n4NKzj73SneYQF28rQA1F8tdo5fHanxr+5X1U5NEbKSG+7g0M+CxC8GgYr/MwIBAPpUqqrt0w\nPXtUCyBVyX4pN7GM4OUNBLTLDcNhhgetOQjyy4OCTge9VlbcAeueMU/zuwxt/u0DsWBlsZ+X\n/Gnqw5YdOlVFLNyWC8fdp6yN24Hf3poaRZjlOcBse3apVYjk8HvVQThs7lXFOinG0jdn61RR\naabbyg4pfOMsfI2nsRVRpcMqj736U750w2ckHJFSBLEyycO3y56YpDJuYjnYvQVE1wok6cnn\nFO3568buuKQtBzuVj3kcnoM07zpFVCQMnsKrySeXjgHHvSxyO3Lkbs/LUjJ5GZlOOuadv24Y\n/dAqAtuUuDtAbn/P501bgBsj5x6D1ouiuVllZAwLdeeaBLuLY5HXPaqq3sKZVjg5wcjgfjU0\nUiyJhCSP9lCew7UcyQcrJlk3KAR1oEm1uef61Hd211ayAtDKgIBCNGQ2PXHpVzRfD+r+IGlW\nx0u7vXjHzRQxsxx68UuZdx8kmV45ztZuAOu2pEbcwI+tasPw58TXkxt4PDuqSTqfueQ/+FW1\n+EPjx3NunhTUBKTkFoyKnmQckjBEg3E45zSKp8w/Oa7y1/Z3+JlwqbPDF0CePmxj863NL/ZP\n+KV0wjbRFh3ckzSgUudIapyPKFYJuBILUjsFXLFRntmvf2/Yl8fMsJt1snG398JpsEPnoOOm\nMVatP2HPGk8p+1ahp9sNvAXL/N2B46e9R7VXK9lI+bfMG8oDuHpTGkVec59yf0r6v039gvV4\n4Qb7UbN5GB3MhYYPqDjtTh/wT9vJGBk8WQxAHPyQEnH1OMmq9qg9jI+TWuAzdQN3uP8AGnrc\nuvA5QjORjp+dfbeg/sC+HrGGQan4gn1Qt6AJj8a3bf8AYf8AAFpgvPfSMB/FIAPpil7ZB7Fn\nwIsj8k9MZFEbFd4JYgc9On41+gg/Y7+Hzrsmlutn91ZAPwrX0P8AZR+F+jJJnRXut3U3E2+l\n7VFKifnK0yNg7se56VEZN3zBx/ujrX6VH9l34VRyLIPDsCtnPDHFW/8Ahnb4WxyM48M2spIw\nQ5yPyo9qP2J+Z0cgXvn2xU8asqbmRijDOUBP9K/TS1+Cfw20sAw+GNPz0/exq/H4itU+BfBU\nbRoPD2msI12r+4XgenSpdZ9C/Yn5cW9m90rrDukYDcRtNSjT7gsMQzMMfdKEGv1Dt/C3g7T5\nHeLQdPjZv7sC/wCFW4dP8OR48rSbJD2YW65/lT9sxqgj8t/7Lu5VKx2N03+7Ax5/Cpo/DOr+\nQNmk37Z4ybd8fhxX6mI2jW/P2G1Hcf6Ov+FKNS09fmSxh3dsQqP0xUe2kHsUfl9Z+AvEl9Io\nttB1FiRjP2d8fyrUu/gz430uQef4cvmLrkARFmx9AK/S5tUgblLeNG9kAplrraW0hyik9Qdo\no9pJi9mj817T4N+NGz5fhPVJS3T9yw//AFVbh+BXju8UY8KakgPTMeP51+kcniYtHgJ8vbjm\nmN4lkk+XYB74o9rIapo/O+2/Zv8AiNJxH4cugT2Ygf1qaP8AZj+JVwd48L3BA4+ZwOntmv0I\nbxBJtwFBb6VEfEUysFYKfx61DqTK5InwTD+yz8SpkBTw+2T2ZlH9asw/skfE51P/ABJY1z2e\nYZr7xHiCWYnAVT2pf7buMdUB9cVXO+oezR8Kj9j34l7V/wCJdbKo/vTipI/2NPiVJuH2OxGf\n+m+f6V90Sa1Oy43L9Kg/teYqRvXNHtGV7NHxVa/sT/EaTA22KjuGlI/pVr/hhvx4o/1+nsfQ\nSn/CvstdbnRvvAGhdcl3H5vype0dg5UfGi/sLeOW5N5psf8AvSH/AApT+wv46hbm90yTPZZS\nOPyr7KfXnjjJ3Z9jUbazJtVj0zU+0YuVHxx/ww3487XOmle370j+lV2/Yl+IMLEIdNcD1nP+\nFfai61JtVgw59RSNrciycSD8qpTkLkR8Rv8AsU/EBeClkcnO4THj9KVv2KfiBCNyNYyKe6yH\nP5Yr7bbXJcH58mqx1q5ZjhsD6Ucz6hyRPiyP9in4gNgt9jKntvOf5Vej/Yb8eKwJlsFB7M5/\nwr7Ki1SYjLyYx0qVtUkbAEmeO9LnfcXs10Pkq3/YZ8U3Uax3V5YWyd3iJLVPJ+wZrX3YPEtv\ngdmiP9K+r/7UlZMGT5aT7bIo2eYcVHvXvcOVHyS37A/iKVju1+x2juVb/CpB+wPr6qB/wkdm\nvqpjb/CvrVbl15DGo/thQnMnyntVKT7lqCZ8mf8ADBGu7vMHiWzEnT/VN/hTv+GDvEO7nxJY\nkj0jbmvrD7dxgOwqVbwqud+TT5n3IcF2Pk2P9hPX05/t+y5POUapbf8AYY1hA6ya9aoM/KVR\niD+GK+qDfP5n3ySac102/h80c7H7NHys37DfiBf9X4gs2H+0jfyxULfsP+Ii/Ot2JHf5Gr6x\njvGVuWzSG8YMfmx6UczF7NHyn/wwz4jdf+Q1ZJjsAxzT0/Ye8RshDaxYD/gLf4V9WLqEkeBu\nwKd9s3Zy/WjmfcfIj5Mk/Yf8QyQjGqWBIPAAPP6VEv7Dvi1VBXWNPI/uLnI/Svrhrj5cBuKV\nbzauCfpRzMXs0fIkv7DPi3hv7U09hnqxYH+VMm/Yi8Xt+8XUNOlfoFVmB/lX2C14d2Cxxio0\nvGXODjmmqkivZwPj0fsU+Mo8hbvTyh6rvNR/8MXeNlYFZLEtn+GYg19jyTPgkOR+NOju34Ik\nwaOeZPs4nxndfsa+O+QFtJB/124rOf8AZB+IMeQILTaP4hLmvuT+0HKnDkimi6lUBg5Bzxzz\nS9pNC9lA+Gbj9kX4hxx5FrbSZ6qJhuqp/wAMn/EOJTjT4yw6RiQZr7wa+lBypGKdHqUgPLDP\nqRVKrMfsoHwK37K/xGt9zPopZj2jkBP5VDJ+zH8Q4SP+JDI3GSQ2a/QFdWmVsgj8qT+0pmJO\nRj6U/aTH7KB+eV1+zv8AEGPh/D9yQvcDmqk3wH8cpgr4bv2J4GEzX6LnU5I+QefpSf2jLIvU\nA+uKXtJk+xgfm5N8G/G65L+GdQjC9/KNRN8KfFqx5Ph7UCPUQtX6TLdTK2dysPcA0/7fIw+a\nNPwUCl7SQexifmgPhz4ojYg6BqHI6eQ2T+lMk8D+I4Qc+H9RVu4+zt/hX6YNfjkbFGfYCnC6\nDrg26H3Iz/Oq9tLqT7GJ+Yw8K66nL6NeKo/6ZMP6Uh8M6wdoGlXqhxkfuG9cenqK/TvzrQJ/\nx5wk/wDXMUwLaOMm1jPplBx+lV7eQexR+ZreGdXhiVpNNvIkYfIxhb5v0rOlsL0EA20xA6/I\nQa/T97HTmJ32ELA9tgxUH9kaKikNpVtz38pf8KPbMfsT8yQsi4QxSKW7bTmpltbnyZJfIkaO\nPlmVCVHPr2r9LH8L+HpgC+jWbN6mFf8ACoF8I+HRHLGdHtRFKMSJ5S4YflS9q2Hskfm5FI7R\n71O0qcVbWRsD16j0r9Dv+Fa+Cdwf/hH7MY4x5QxVd/hD4DnLZ0O1w5ycp/Kp9pIXsj8+9x3B\nc8mpIy2Fy3J6V98SfAvwDcRPD/Y8ARiG+UYIx6H8aqSfs1/D65mB+wIIscx5IJPb5u1CqB7M\n+EvM3EfNx+lSodqFc9819uTfsteAdxHkToPWOQ1Rl/ZS8CtkpPeZ7DzTR7Rh7M+NGmIxgE/S\nkWXc2cYbPSvsJv2S/CSTK/228C/3VkrNvP2P9HuJi1rrdzEN2fLcg4HtS9o+oezPlB5CwyFJ\nOcECgybeSMc4r6ivP2ObVhm31yVW9ZBms+T9ja83HZrsRGP7hx/Knzh7NnzcsgO0YxnnHrT2\nx3O30r6Am/Y/13ywY9WtJWU7Ubay8flVOb9kXxQBkXVq5z0OQPrmn7RD9mzwzzMcnj0pWYgZ\nr1+8/Zd8Z28hSKK3nC91kyDz9Ko3P7NfjO3t0Y2CyytztjYnbzjnijnJcGeYO4baMYHU05SF\nj9QetdtqPwI8aafHufS38xm2LEpyTx1qqvwf8YBURtCuhuHB20c4uVnJtIMDHWnea0Y69a1r\nrwLr9tIUbR73CttLeS2MjtVS68L6taktJptygA5zGaakLlZlzTNGmCetU5WOPlbJq7cWkq/6\nyCRe/wAyEVnTJJE+ZFZM9FIxVKSuQ4lGaUs5BODSbh8uD81Qzf6wlm+nHNMkHlsMnBI6GuqM\nkYOLLHmMc+3eoy5RWbOTUStJtOQVPpjmoJHG0jd83Yd615k9ieXyH3MiupfPzYxtqup3AfMQ\nKifecsOn+1gUglLKAwx71MgsyZtu4tnOKb524lgO1RLtU8t19KflVX19KVyrMZ5mV4/E+tOW\nQhvQEYprNs+UYA9M0izLHwwO73qREq4ZSN230NL5hRgqjJ6VAsm4g5yM9Kc0j7+evrTHYkkc\ns2T29KdjcpP3j61XhmVSTjcKVZHw3GKBFtsGIA+lNjYEYJxx2qtzIoyeO9P8wRqffpQMcrP6\n4qT5WHy8g/nUPmjdxxmn7tq0CLBclQM9OtG3chIOcdahR9wIzikjlMT7s7R0zVAXIyGXlsim\nnHlZT72ePWo45Fk+8OSeKc0bK428UADsc9MkdxTWYnAA+bPJ9qBIeUAzzUW4dScHpQIeZfNk\nOPkHQe9St+7GAM+vNQrGki4yMrzxQrKwGcg+9AyZmLKSCPpUDscAD8acyqPm7e1IXj/g59qZ\nQ6OTadq0jSrtxjFNjbaN5GDUxKsuSMCkIhZvmAxn1pY9248YFDkZ39FPFPywYbhhPWgQv3T8\no6d6kjYMx3Lz60za7MGTG2nRkMuwt82aQ7FiORFPfd6VPC3GQM+1U93lyAYyfWrcfTnipY0i\nYHcPlGD1NKznINMjZdxIPNNZhxkc5qSiQSctiq8kmd0mduO1PaRVU8EjP41CMSNt6H3qkSNL\nDZn+JhUAzu6cdqnyG+UDJXvUcpAXeVyaYhFB29PmNODDcCRntTDOPLHVcnpSMxjbI+770ySz\nHtUHnJz0qRJB06e1V4sK545x3qVcHHf3piJlZ88cj0pWXzOM4PpTYV2tnODUjcc9/WqJsJkx\nqMnjqaiZvlIboeafv3nA59aZK7xhhsBFMLDN20+o9KGxGNx5B9KiZ2fGBgU5m2qMc1IxiYbO\nexpfMZlZiCF7ULnDErgmk/hbLnGOAelAxFBbYWOSDmnbg+8bTjPBpFA4b2708MAMA/Wi4RI1\nk3KR94981KoBU59MjFRqAqnauTmnLuERDD34NIGK0iqo4II9qazEqMcMT60hxJGGGacFOVPX\nFMLDlj3KcjJ7mkVGCnJzntSyk9BwTQuRgE/nQgGO7oAc8CkdsR7lXhutK33iDSHO3bg4HOat\nCGNGOMtyakbGwAD5h3qHcGU787u1JG38O6gRM77iOeKbuyp4x6GmSIRJ8vIpqptU9cZ5pCHt\nlkAB5HNP8zdtBXIxzVcHkhTk1NGwwRnGB6U0wtYTzDyvUUgkODu704YVckZNM24wWXOOeKoB\n0XGVIwak/hO0jPpURG5S4Y+uKETevOQKmwiTC7N544xUSx8jJznmpSoWAKDvBP5VG0YkIRcj\nHemMkWRlVuMKeM09mVY844FMUZUqMHAyaRiQFJ+76UdRkkbGRtoPOKcqruAzz71CrBZsdKGx\n1x9aYiZf3Z3LzinCQtlzgHsKqq3y9QM9M09W38DmkAqnMZdsBs80g2jLBjmmyK0ZBxkelKmM\nMykAHqKBMfHj7wODS+YBkDrSQqrLg8ntikZDg80wbJEfbnHDNQuTkEgU3jHHPFMRcsSynAFM\nVyfCiPggnNHzde3pUS/Kdqj3p6qPLznHOOaYC+WsmWDfMOab5hUB+GHSkVPKYsOX9qcPmHH3\ne31pj+ZH84bG/cG/SpBlWGTlR3pNvl8Yxnv6U9VVUODn196BDmYFsgcUm4LyD8opF5x6dMUu\nxMlTz70g2JN27ooPpSux2gj72eRSKoXpz3pqbWkyeDTEKu7aQOe+ac6bUUED3qKNhyFO2pC3\nyZJyaYMcGjSPP3VBwaIV754Jzk1FuQ554p0in5V38HrimJC+Y6yEj5lzTpZgWDbccYpse1Gw\nRmhpDwMZXNSxpDJsOyknJFO54A/KmM2c49acvLEt+FIGPb7pGOcdqVI9y9QDimowXJbqeKXP\nUA8mrRI//V8dTjrSRuVwAaapkZSvftUfCAAD5s8mgLFkfvPlJ96FbqVGB0NN3Dy1JbBx2pFx\ntw/P070DsP8AvR+2advB+RelRDCptUYzzQuFPfH61IDnzuHy4xTmkJQuw+Tp+NIG285JBpue\nAAPkzz9aYDmbdhgMnpTsHbgcA9aYshVeRtGetNXc+VPIzndTESiTapyd30pVxJkNxjpiofMG\n7CqcetK27dkdaBom8xlY8cdOKUbdoHfPIqNWO7jnHX60Ix3ZxmgkkYHduUfhTuhLEAnpkVFu\nKt/eHtQvQsOnYUASD5ZME8GjcyyZ25X+9UW/aWU8H3p6tmPZ0JOaCh4YxplW5NIrF23EnIpm\nSvTkVJkbeAaoLD1fzZSei46Uu4tgvksKZ5i7vlODRueRsdMUiWTR8xnPWmrKGOep6U1pB5e0\nn5qjVtuCBzTGkTKNzHIwPepY29GGO9VpGO0ZOcml4EYA4yaa0ESXDAsW2g+lMV2UAnBT0pGB\nEmQciiOMnAyME07jRN5nzjstP8xQpbPBPFQeZ5eVbpnrQZAzHI47UrgTmTdjd97tQ22NgQBt\nNQs4AyR8wFMxu2uBgelUMtqwVvlbBxTdpEbHkCo42Dtkduc1KXznPQjpSYCI5WMEHIqXzAsR\nK5fsQKgaGNdoL89dtOy68ocJ3FSLcc0rsq4GD0qWQsFXcMj2qHBypzUhm3AITx2pgSxsFOMf\nM3ejcS+RwOhpisseWJ3U5W5LAYGKBkqgxyYKgjrTI3YSP8tNX5upJNP42kchqAsSqVIA+6e5\nok3YAAzzVfft3FuABUqs0qLJnbxxQMex2r83HNJMp2hg3I5pvzLHnO856U6TGAVGeOaRXQb5\nhkj57HvT9yvhF4PU0zaPLCngmlbEceUXc2KAuS7BuHzYGKerFW25wMdKg8xWVc/e4zUkjfdX\n3oGKzjp+eKkWQbsMMjFQKwiZlbpSRybQcnINMROJgg9cmn5UAsFwDVeMGTHGDmgzNuI6c9+l\nArEwUL060jMWTP8AHnpTXk2ou1vmzzQ2Gk3Lwe9AbD97Ny2SRQWEcgB+bP8AFUURMjt8xA7U\n9m2qvfFO4h3KhgvOaFkO3bkg9zUTZVix6Gn7icfNk+lMB27jj5iPWpvNVWAHUjFVtz5IOAKR\nXG3JGcelMmRO6NDgMMD2NIzblFMaZWCkDimSfMxIOBTFYmSTcPu85xT1Ozp0NV4843KdwbrU\nn3c5OcdqZJNuC8LzxzSrICvIxVduW+XrSK3zMA350AWfMXqT81SRyOcEnKY6VV8zao4Ge9Oa\ndtoI4WkIswyruKtyKsRgSOwA2g9apxyKu3ZyTUjTNgj37UxXLJZQo574JojYKxyM88Gq+7eR\nn5B2NO3MYyepHFBSLSP83J4pVuCWGOOahjbzFUlee9KrK0hU8Z4FNBYsmR/MLKMgjFOWNmUk\n1CvyqVz0pyyFQXz07VQFqJdygN+FSOxWEgj5t3FV1k3AHqBzUzO0mGxkDmmTYsRr1z6U1Zic\npggetRR3GOD9c1J5pjGVGVPemBLFJ8hGfl6UiyDyyHzt6YFMZt+AeBTyyKCDyMVQiSNwuP5m\nnTSbpAVODjFQv8yjnbmnltsYyN1UiSTcYl4bJNK8hZTs4Pf60fIqgnk4pV+Tk4BNUAiluARu\npJmKdyR/dFPZgrrxkY7U+MBXJYgjoKQDY26YJPfip93HB4PUVBkFiFGKcvzcZpgTfeKjOOak\nl+bORk1UEhbH+yc08S7ssGI3cU0ySykm7A6cc1Yjk2843CqMMm33PTFTKwVuGx/s1YExmfZu\n7VJvVtp3ct6VXjz5hUn5etL5wVuRxntSegmTtyTzxUkajrjFQqAuMdWNK0xIYdAKV7jJtxRh\njA55pZGDH735VAoVyrFufakZUVygPuaGUkLtLZGMmpIwqocfe7+1Ry7lkQA/iKc7eUDtXc5N\nQULHJtYnAp+7Ct6mq7A5G7hup21Mrdhzn1p2ATmRRxjHpS9FyPlPvUQdm3gZXnrT2UyYwcjF\nIB3nH0FFQ/Zz/eNFO4HxJGP9HZtuSRg/407/AFXyLgKwBpo2xsqFGwaJFfePm4B+Wvmz0B8j\nAYwoyB19KHZnkTP3vUUknytyQQRzUCyNtLoMPnApgT+Zjdv528cVGzpNGdse3joaY0m3YxAO\n/lvrSyzFWJO1h1GKCWPMrN5ajggYzUchZC6g7goz7UxbgMoBTDnk0CZpEbC7VxQInj3OFb5W\nyOhokZ94JHy1BGzKEIHHc1I1wqkKcuGH45pgS+YsbBpF9hTWnzlQu0jqv9aYpKkqT2zz1qVH\nj3ZUcEYAPrSZYjTL5seY/MHcg4p0jb7ll/5Z7c89qjQlv4QuPWnx9S5XhuDg8UEsFYhVZT06\n+9OSNirncN33gO1IWXcqAfu+9Isu2N8Dcobp7UwHFSyhgAR1K980xZyJCcAH+9imRSHcFx8z\nc49BSsAx2525ODQNFgAxr/e3cmnxsqoQR89VokaPeu/BUZ5pyMGH3vmxnJoIFjYuxLHaMYyT\nilaQxptIZ9vtTS0bBRkOhPNJG7ZZ8/uweM0FEodCVAznrg/ypxVgzsRweKqmRY5Czjqc+9S+\nYcnn5T0NMXkPjVo1+bAHWmnEceGbJzkH+lKy/L8pDMfemSM20K6fIvPPWhj2H/I2c/McZzSm\nRRhcfLUTYZWwpBPIpU3Od2RhQMLUiHwtlSCSOcCn7wMR7vnzTPM6EjFHLMWz8vU0FElqxWN0\nfhQTzRGxebCn92oyfelbIZBtJDelEjeW53HpxxVAOU7W+ZRk8qB2p6suzceecYqIcszBTjOM\n5p+wFSTwoOQwoES7RtJ3AkDkUkjFx7Ypm4GTIX7w5xRvLscDG3rQhCgqI8nnHrRH83T7lOjd\nBHkj5+3FKGIXnHPoKTGMZhHGT09KYspcfdxx170uUbOznnvT1y0e5B32n2pgEatwEXHHU0Ln\ndwcN69qeu1skZMi/gKaAI1/eHg9TQUK0i+WCT14xTI8rkZzz0oA+bYuG7g0qnOePm6UgGySD\nBYqUC85p3mAd93cUMWaMKcEngr60NgMRtwfSlYBWmfei/f7ilfMi7AeM5zUQwzKu37tSyHLb\nuhHFOwDGjfGcYX1qRs+YjO3sW/pTdpWQtn9aVVKgM3JJpkirIQu/Pln09qFc+XnzNoJ59KXH\nmRkEbB0AqN18zCsBtXpQBIp2MT19BSmQR/M6/Nzikb5toAAFN3FW2tzjvRuPoEW5RufIzU8b\nFVKhhz/FVdpArfP869qfMreWrL25NDJHtITnKrkDHFMSTdyDlB96mRvjczAkk5o69Btjzk1I\nDivzM2cE8gULu8st19FpspO/IHGeKezHJ2+nSgY4ydN4xxxSc+WrA4Oe9DcohYE4pRHliXBA\n6haYDnj+YFnx7UyNtzEB+OnNOWPMoGd+BnNIEVd5bHcjFBQb8qQecHFJt3/Nu6DNMWQSttHp\nzTo1BUg4xSAcy+Yq/KPU/SlYKrADI/pUZ/1iZJBpw2+a583j3pgLuC8IeT1NIG3KOMLUbF8b\ngMHtUabmcbiRu7UbAT7jHGSGwajViqjcOOtDMPunGAfxpV4yCwaqARweAvGOnvTy5aM7sU0s\nVUsDxTON2c0ibkis3H86lRyr7iNvGMVSWYx7yTlfSpY7jIA52nrmpY0WMnualkddygHC44aq\nnmlfvHIpTJuU7eR2XtTKsW5DuxtPHeoZ9pwwb2xUP2z5QWGDnGBTPM3M2DmkMkLbj97FNVtz\nYB/4EelRmbc2xT+NRx3PnEqvGKq6BllZAGbbgdsZqNpGxkfiAeajfManIBbqCKjMuOQGBPt/\nSlcOVosNJtUjqSOlSROTGAflHSs5L+Hc2W2kdj3q1E1w6h1hkcqceWiktz3A7ii5XK+xYWZV\nyhHz55HtUn2j5mBTAxwRV/TfBviHWYjNY+HtQukXkstu4H5kV0fh34K+O9avVWHwxeiMnazS\nLsH4Z9KnmXUpQZxIaPbgY/Clmk3MqqcL37V7Sn7GvxNkuCILC1WM4IaSU9+3Are0f9g34gXx\nDXd7Y2BZsbeX/Xilzov2bPnlm6FBkdAe1NeYIwUDL+oIP/6q+x7D/gnuVs4hfeIpWuv+Whhw\nF/Cur0T9gPwbaWzjU9SvNQY8qQ4Xac9f59an2geybPgr7QJFWVlcAHHTNP8AtsTR5WRUPo1f\npT4f/ZB+HehxtGLSW6jLbytw/mfN65/p0roLT9nP4badMkg8NWjupz8yAgn1NL2pfsT8uUcS\nKhQmQkdAp/SrNvp19eNi1066uPQpA5z+Qr9Y9O+GXg+xYPbeHNPRh/0xUf0rYjsdJs2CxWNp\nEF/uwrx+lL2w/Yn5S6T8NfF+snbaeGNRmdWClhCwAJOOc4wPeuot/wBnH4oXlx5EPhW6Bzhn\ndlVeOvJNfp0t5aKpCiEAc/KoH+f/AK1Qz67b/wB9QO/HBqfayZSpI/OrT/2O/ifqDEf2bFak\nfNuuJB/Suk0/9g74h3KLJd31jahjyhy38q+55vE1sgIVtzHj5arzeLURcAt9CelTzyuWqaPk\nbTv+CfertsfUPEduoz/qY4jn8zW7a/8ABPnTBlrjxPcK3fyUwv06V9JyeLieEXI6VFJ4nm3D\navH1pc8iuRI8Z039hTwBZsjTyXtw3cGbAyRjPSumb9jn4YCNf+JY4ccE+a3NdxN4iuWHylVG\naZJ4guNoyfxFTzMdkcxZ/sw/DLS5EdPD8Ujp0LtvH4giukh+GfhHS9nkaBYx4GAVhUfnxVZt\nWnkbmQ4zTptQeQDcxwOOtS22Kxb/AOEX0SOdpU0uzV9u0N5Kk/TPpVuw07T9M5t7S3gYj5vJ\niVc/kKxlvDuwz7Qfek8zy23CQn8aEPQ6uPWLe2jICorE8kKAambWrYruTGe+EGa4xrqMSEbu\nfenJeKPmDcUwOvPieKFduHb2pi+Kiyn5CB7muWXUopsg8N61H9vjBK5BpAdQviVnYgp196Y2\nuNjkLXKNqY65A+lQtqJ5bdRYZ1ja9LyB83HfnFRLrFzIpPmEH64rmF1csDzT11Q7dwOaVhHR\njWLo5XzSB6ZzUSancxsd07H6nNcxJqp3cHBqB9XdF4bOaLDsdRPdSNjMhb8aa98SvzSNx71z\nQ1UtyzYqJtUMpIDcUDOoF95a8tuzyOajbUQrDDc+ma5ZNS6qWwaik1DEuRkmixJ17agr8t/O\nov7SRRhRmuWXUj3OATUn9pbeE5OaVirnRnUEGM8Ui6knJ6CuYa/LMdwwaYt3JgjtnNUFzqV1\nIbSWJ+lPXVcDK8fWuTk1Ddgl9gFINSLcBs0rEnXDWPMQ9veov7UVsr+tc21+CvPBqI32FOKB\nnUf2grNnPy9xmrcmrWckJKttcDpXFC68zndTJLgqpxzS6gdgupfuwfyNUmvHeYszZHasBNQP\nlgZIxTV1A7TlsGqsB0Ud88cu4NirH9sEnrz3rm11IKnON2KrNqau3eiwHSy6tKz/ACk06PUG\n2/McGueW+SP7x5p8eorkjqaVhHQSakRjmo1vnWYHdwetYE195nU7T6U9dS3R5zz0FOwG9Jde\nYSc5FK2oHaMZAFc7/aZjxzmgam3HHFCVhnRLqJZRyQKkW83YJNc/HfHaSaVb395ycD9KYHSi\n9RcEHNMF9nJzgZ/GueGoDqvOO+aX7f33ClYRvtfYOCSR9anXUI8AZ5rmvtfy5JyRSLeGRuTg\nUrCOmbUFZOuD6Ukep7G5bNc39rC8s3NJ9uOzNUB1X9pGRh83HtQ1+m773SuXW9OwMGpyX3vz\nSKOma/5BB/CmNfsGx3rnm1SNJMk4Jpy6gpYtvzSsI6BdQOd2frTlvi3OeTXOS3a7Thue9Kmp\nLxgnNMZ0n27jK/jT/twbGeK52XUCuCvApg1BdwYtzSEdNHeGQk54pVucDBH41zs2pDjHHHan\nx6ltOWyVoA6VLwLwaPtAbO3iud/tUHnHFC6wF6CmM6b7R+P1o+0/NzxXPjVhjByM0p1Zdvzd\naQHRLcERnnNNa8Ma+prA/tb5OOKjm1YNGXGT9KLAdEt0zcg8VIt9k7a5TTdceRmV/u9q0l1J\nNu49KLBc2FuDu+9waY1x8/Xmsz+0kVckcVC2ohmyF5pcojaW4y3LYFTfaCkeCeetZMd4jYO3\nnvStfpI3PA6UDNT7R5nTrR59Za3gXpyKX7V3HSgaNIzq3BbFI14Qw54qj5wbkimecFzmjoBc\nklLHdmplumXA61nNMGxtBq15g44NLoMt+Y7c5p6yHpnmq8bYUHHFDTYPyikBaF00Z+YZFIZv\nlIJ5qukvmL8wP5UvmJsPB9uKewFhZOlKXPrxVRZAy45B96PtG9sCjcNS2snBHSk3Y4FRZLY4\npxVm6GiwEiSbQfWned8o/kah8s5JDChYyF55pAWfO9aZ5g7UwRlu9O8lic9AKYDzNt7kfSmm\n4fAIZh9DTWiLdDQYG28mkBJ9o39XP5mpPObruyv1qp5fI45qbyWaPPQUAT+YzY+b86e1zL9w\nv0qExsy8YBo8ttuT1oAm+0NgZUHBz0ohupFZsYAYelQ7W49KOVzQBYkumm271VihyuVHBpW1\nCVvp3XFVtx6UbSVJxzTEWvtiseUH0wKjlNldwtFNYwzRnqGQGoUYrwBSq3lrx1osIibQtDmX\nbJo9o4/2oVP9KguvA/hW9I8/QrKQe8K1cWU56U49qBo5m/8Ag74D1Jj5uhWq/wC0q4NVrf4E\n/D+38wR6LbhWUg7l3dR2zXWMO+0GmyfuwWPAxRdi5bnmdz+yz8PbqYy+RLDzyqSHHpUmsfsz\n+Cr7w9cactnHbCQYjuEX94vAwSc565/SvRVbzYz6MKdtKnPPP+cU+Zlcp8+/8MO6Oyr9n1uV\nXxn51GP0rJ1X9hWS4jJttdRZSePMBK/yr6ZKOy/K5HpzU0bOiYZiWqudkuEWfHsn7CfiWFmZ\nNWsnUdOG/wAKyLz9jPxpbPkiG6iAyPIfqfpX28txLn72KfHeSK/Yd+lP2jI9kj88Lv8AZd+I\ncO+U6I6hTnar7sj61zeofA/xwjGRvD94vHQxntX6b/bHwAQCO3ApHuxtClAVHXgVaqsh0Vc/\nK+f4e+JrXl/D+oYHGfIbHvVCbRtRVmH2C7Qrwd0LAA/Wv1aSa3UnMIOfUCo5NI0i6/1un27j\nqcxLz+lHtpC9ij8oFsbqFiHhf6BTSTKY1IcMhzyMV+qt14L8LXSssmiWTbuv7lf8KwLr4I+A\nb6Rnl8PWfmPzu2U1W7i9gmfmKZFhXax2jqDSKzmMuUYIO9fozd/sofDe+8wrpnkM54MUmMVz\nepfsS+BZi3kXWoQFhgATAj9RWirIn6u+h8GBtyBkzIBT927BH3v7tfY837BmnFma28RXMcYJ\nwrIGNcvq/wCwhr1u/maXrNvcA8hJ1Kt9fSrVaInh5HzGlxtzkEn+VIu1myxJHXFe9XH7Gnjt\nWZU+xSrnHmLIQP1FctrH7L3xC0tl8vSzdx/e3W7Bsiq9qrmXspHmsc21SCMUqyb2yjHH1rpd\nV+FvivSFR7rQ76Nc4P7kn8OPoao2/gvW7hmji0W/Z1OGTyGGM4x/On7Rdw9mzG3Mzcce/rSb\nh124PcVf1jQb7w/efZNTtJbKdescowRWSoZmYYO7rn29qakn1F7NllX7Y2k9KdJll4Xp39ag\n5TAOVkXqe350quJMnPTnGavzRnboSM21cg4HvSqy4zkD+dRs4jwH+XPrTvLRW5wOOlFxixyC\nYvjn0qTdhBk546VEqjccfKMdaRMI2CSe+TRcErkrt8oIp5ZvlfGageQMhwMClWQbV4P40gJ1\nH3stgk5ox0Oec1AjeWxDfMOtPjdZFyMgE0hlpd24Mx47VZWQgjI+buKo/wATMWz7Z6VZR97B\n/bFA0T7tjkkcU5ZAynIweoNQ/wC996pH6AZFSMazYbpjNNkQ4zxn360pzwTztND4J3N0PNAW\nINwUkKeaa0jBsHnI6VJJtC5PHoah2lF3jnBzk1QnYkLA4yuR6U2RVGCDz/dpVfLAg8EZpChZ\ns45oMxVXc2TU8eVYqANvrUCqPM5JNTKGj7fKaaEybcBu9aI2ZSMdCOc0KpHHGaTaTkA4NaIQ\n8MFf3qK4ywbt9aerLGDnkgZqKZ3kADKBxkVWgDNw+TPy8cio9xVmGMHsafjG0MfmpuFVjuOS\nakFYTdubdv46bTSlgCQKj2+Y3HBpU3lctzzSGPVQq5PzEdBTVkfaylQKVmWNsnBoOCp7k0xI\nc2VUBTj/AGqbHIyyEEblxSbD5fXdSbTsGOKBj9xVRg4FP8wlfWomUyDr27U0MY9o60hltS3y\ntjP9KRtzZ9fWoiz7CAcNnH4Ubjktn5RxTEKysvU1HLI3C5wPUUrM3JPOe1R7V28UyRN37vkZ\nOcU1f9YQOtIjbecc0/afL3fxUAOhzg8jPpSMTtJzhx2pkeM78Hj0pd2+XKHjuaBC+WNwbvjJ\nJpfMO0beaY2AAfvc+tLJ2KtgenpVWEK0nGVOe1KSdsbkcntTGQ92Gwc0qsGUFuABxQFiR98e\nd2Nh44ph3RoAM02SbzFXggZpf9Y2Q/zf3aB2JlwyjnAFDtxlTlarON2OSMHmiPcFbBzzQNIm\nZw3RSKbu3qNgyM0it+5ODyfWmqu0cHBpICbdifG3jFKDtRjjJJ6U1ZPLUF2yaaZsgEcDPWmK\nw5QFOSAfSnRndwOPpTeZGOBxmk2ukpORg0xDt23cH59xTfLRcN2J5ok2KuAOe+KFUbs8+1AE\nqMAWYcdlpPM3A8cdD9aarB2YHrjim53HK/ex92kIlUCPHNOVm7c57VFuVuCDn19KUgqw4+hq\nkBYVTnkD61GW+UluRngU5srwT1/Km+Xlwc49qYDlYlum00ZLfd4GabncD+lKu4Jt6E9KBDmY\niYZXcuMGklyqjC00FgoOCe/4U0oTkEkjqGoAkcGTBU4IpwlUkfrUA+8QSR70qx985pj1LCyB\nWOKVWyQxOKibKqPbijnjaBx0PekIl8xWDbhxSbuAo5U85pNo6uBk0Bvl2A+wpiFkYcbV56Gk\nkBZcLx6U7/VuO4wc/WofMLY7CqAm+bILNyeDTfOKybRz25pPMzwV79acuF3Hr9akdxr53fKM\nc0924DAcZ5pisZF9CtIP9WQBk96egiZVL7mPCimcrtYdByc0qq3l/McA9qSTCqBvxTAlkkMe\nDu4JzTA4yeN2TxS7cqC/rwaRMxuTjr0FAhdyNnPB6GnR53Lx8o4ppVVkLZ4pR8+McA0DHedx\ns25IpqlpFO75T2xQAGOBhT60rqJGyDj6UrAKD8nXLdADSMzFPTbyaYv+sZsHB45p3mfgh4pA\nSbxuz6jIFMbKknoG60g25A6Y70SNvYAj5fWmIfuZYx/dFEcnmSBAcZ6Uu1lXaW60jRmOMjd5\nbUCWosa4YqT35p+OoB5NRL91RyT03etKuVY8dKChfmRtqipEIWTkZAWmeZ3Y7SKSNuq9S3Oa\nYh67W3MRyeaUbWHXtTfLYsw68dqQ/KoG3mgByMzL/tVPHJuUdu1Q7gv40h6BQeM0C3Jnwq5A\nyO5oDMq88DuaYj7kI3AAHkUhwRy2CaYh+zCM2QfQ+tNUY59OuKjxumUK25umB/OnHMcmCc0w\nJTtIBPyk+tKyeWoBO4DnNMYh1weTnijccbeo9KdwJNykZ25FNJ2jK8GkhBdsltqelDMoXy+W\noGSyAtGGyCw5pWbzdu7CmmKHjjAXgUzerKjYJLD9aLDsSsr+YSSAvQCnQqEBBOTjIqFf3oxI\n2GHpTpCFIccjpQA7BZDs+UVLtDhQ3GRzTFkDfLjApd24hc80CDaqjDnK/wAqa2Wj3Kflz93+\ntNlB5BHU4qYgCMY+UdM0DQqsvGc1JGykE9qrbdzYD/L2p744UHBPFAiRW/dmpI22jcR7VAre\nUxQjcRUsTFskjjHSgpDmYpgryCealX5+Qeag8zy4vu5qTcgLY59aRQ8RiVSpbNEbYUo3CjgV\nHt3SKy8dsU9WG1gVPBOaBDlztZ930xQsh2ncMMewpiyBSMck9qNwmZgow/egCR2G4HbnaKZG\n26Ns+vSk3FY8N1FKoZWyRkn8qAsOb5WAA+XHQU9ZMHJ5YdqhUlcnOSaVpfLJfG6gZK37wE9x\n82KSTmNCvTrUazGNf9puaekhCHPINAD5MtGuD78U2RgAATgU1TlicnGKUqZYwzDvQMfHNHgk\n8n1pFYFSQ2QTgU0gKCAvUdqRrjEaIY8D1FUhMkjdt2OgAwacsocNheOmajV9ynkZ604yY24+\nVG6/WkSSKrLGoY7lp5KxnB4NQqpijIMmVzxmnKzdXGDimAqMpkOB+FKBtZv7tRiY84HzHvSL\nIy/K5y3XmgkXzTH0OU7+tPWfbIWBDKR0qJf4jjg96G3lhtwAfamSAxGAdzZzn2qbaG+YMQPW\nkyrKcDOKe25lUgYHcU7iY6Jh5pZfmXFJmPcMdc80znd8gxjtTz8qluB7d6AHFixZQPpmk3lQ\nqnnccfSk852jUkc0sgzIrAbu5FUDHx8cp6YqSPft+eq6se2R7VK0h+XtzzmgRIWb+BvbmrMJ\nVbcK3B7YqkrdWXkE1IyhdrY+XNAFpZNqEjj196duLDspx1qBGHOTyxp3LDaefemhk29lQKTu\nfGc09fmVTnluvtVeOQR5ySVI71J8rqCjbR6YqhF0MqAKhyDQJZOduMA4NVkDeYDuBQDt1p8b\nFWyD7mhCJ45N3ykZzT44m24LfKO1M89Y5AcZ3CnKxJOeCaoCeI9WYcY4FNDNJGQcI+elNjUl\nWUtuHpT4UG3gc+tUSTrtk2gHJHGTU+wK208ZqtASvGMkVOMs2XpiJASsYH3jjNNXOS0gwOlD\nMABtGSBQrb4skZ9qpAJyQoDZAOeKczHzAVHGelMiYRAnHzHjmiTf8q5wP71BRMTtkGflzSs2\n0FQMnPWolYupDcsDxS+btUHHegQ7zA+VztNKSgwC20CmrJuYnaPWo1DZPmdzkU0SWtyeXljg\n1M0y+Sh6ntVL7hOPnPf2qQSCXCk4XFWSW1k87I+6wGacrGSPpjAqsjDbkHin7m8rGcCi4FqO\nTzFwv3gO5pWY9WwB3xVdZAYxjg460m8+XkmkUi0+wKNjYOKZuXb13yVEvzY5zTGnWOYjawzx\nnFIZYn2TRoFJVgc8UrYEwKluKj3bYycfN60Rybztbj3pDLPnOwyygCm7mPzMuFqLhd4cHKnC\nnPFN80444NNjJPMJbjOM1IMpINrcGoeAPmOM01JI9pDHJHSpEXMP60VX+1D1NFLQLnxRHuji\n3gbz2z2pvmfIWJ3Ecn2qJZnEm8tuUdlpnnHJKDIbtXzx6I5WG4MPmHUZqVZ23BSuAD6VHGpZ\nApKqo5z6e1BcM/DZAFMByj5R/Fu7elQncucsEHQClWTLAr90DDULs2Pkbg33aLEsWRTG+eMg\ndaWTLKCSMkU1mO0gLu4yRQAswiaPhVOSDQIc0okUBtwC9h/On5CkDG7A44qPd/Enp972zUn3\npMryR3pDsNVfOY7uWBxx29qauVkKjgf3uwp4J+0ZQ7QxycjqaiaQLI2RgMaAJfMaVQw+6ePx\n9akiG2NkPX9KidljwUBG4dPapBtYMwbhRx/9egB6yeYm0jDevamjZCoUtncenrTQwaAOOfXF\nOlVThehxuDUxCfOitkZCjgZprSAIrFcA9cdRTHkeOMEfvGJ5FO9MABW7+lBSJGZWVecljjdQ\nF3MQ/K4x0qKQNJIQfkCjgf1pA25gFb5lXHNBJM23IBGewpGcRqVYfL14piyGMKG6twaTYFyM\nZPY+tBXQkJO3eRnjK09iHiyBnPUioVdAoDEq/vT1I5Xt2oJ6gz7VyFYEdc0/cRDhj8vXmjbu\n2qT7mo5FMskWcYb+GkFh7MNqnqvHFPUFW+X5VNN2s0blhgrxge1RhmkZQ2Seo9qAJxIsasrc\nnufSnxkeWBnAB/OodoEwwMq33v8AGpJI0QMmcZ6GgY5mdV3O+FJwpFPyy5OATjk/1pnmI1rt\nc5HQUnTAzxijUokZw+HUbh/eHFO+XaBnbnn/AOtRtJUbsY9RUbMyyB1TcnanqSyVeOR8seOB\nRuATP96ljmSYcrgDnNIGDg+XzzmhAAZxgbhmnncIyenOKYxDcBvnzmnH5GAIJ3dfShlANu7B\nHHqKeuxF+XdtPYVHEoLMm/GOakGFz83zY6UgEMfUh9pIpowY/LPzMOlIp3AqxwaGcQsoJz2O\nKoBi5jEbHjd/F3pzbouAMgnORSKRxjOO2acCDuOM+pNIBW/ecLxu/OhXJUluCvFRGQ7RtGcn\ng1JjzScnHrRqAi7nXdjax6Gl5Me2VsMDxTkIkVht4HTPemtuCcnJ7D0oEPKhtrkg8YIpYT+8\n2kfL71GGaMBuGOeRTppnyG2YDcUBYlkXeSAee1N3fwsMGmBjjK8EHmnq2WLsuc8UxWHoqsAW\nOQOlJt8tm2sMHrmo2J/1ZHUc0uPubl+VR+dIYjDzCM4IHQCnq4kUFR8ycn3oxFuyMqSKZGCr\nNzSESNI7tuZcDr+FMkYeXmMbVbik2HnDZpFLKnPzD+7QIkkPkKPl38dqXcNoYDB9KSKQow+U\n596FXaoZjk7ucelMdhc/uy+7IJ6e9PNx93li3emtIRlNmdwznsKdhWYEfMQtBQ3a0f3Tjcc5\n/pTREBzu5P8ADSsURsAZC8ncaYzbY2LHO7nj0pAKzLDweM9cU2ORdxBGfakZirhSu5MZ3U1p\nNoJUbz2oAndNwBC/iajxt4J3ZqL7RIw+YHjtRHKZBjO3vVDHtk53NkKO1ReaJPn3c4wKbJMA\nhYHAPFRGTYyscKmOSaXqBbWYKoLDBxQ0gZSVHTmqct0hG4lVU8CoRcGR1SH94ScYUHP5VPMP\nlZdaXb8x6HpSLMWX3q3Z+HNe1ZUNroGoyKxwGW2faTnHXFdx4Z/Zv+IviaZmTR2sIxx5l2So\n/LFVzLqPklLoecJL5m5SMHOaWOYSSbR+Ne76T+xX4/vrqNZ5bOKInLuhLYHfsM13Wm/8E/bs\ntG8+sTTRMQZUjXyyVOM4J/Go9pDuUqMz5Q37W2My89Of6DNCh3uEhhDSNJ91QDyfQfka+8/D\n37A/hCzbfdxz3KjlftE/zdehx+Fer+H/ANmnwB4ZuI5YNCtfMiI2NJGH28ep+tRKquhvGk+p\n+ZLeEvEMN1PanRL1riE4khWB2K56ZwOMjmtfSfhH4617b/Z/hjUWDEjfJCYx/wCPY/Ov1at/\nCGh2ckkqWcbSScu5QZb61pW9tY2aYigjUZyBisvayNfZRPzJ0D9jv4oa5MIzZQaeMZMkj7wD\n6HFeg+G/2APE11eQ/wBtarHDan/WfZ1wRxkHmvvk3QVN27YO571H/aR6BsjrxUc8th+zgfKH\nhj/gn3oVrcO+q6xcanEBhEZxHg/3sgdvSu1sv2H/AIf2a4mhkm45ZpCzE+oz0r3U3yfxNj6m\noJNQjUHc/TnOafM9hOMb7Hnmj/su/DjRY1FtoUQAGCzDcTnqTmu10/4c+F9Kto4odItSsYwu\n6Fcj8aWTxNBAME59yKpT+MFU/Ltx7jii7HY6OO2s7RBHDaQRx/3UiAFCzRRElYo19RtFclJ4\nqYqSgAz6HiqTeKJ9xywANAzthqnlDIwefWmrraR58xvLUnIxXAz6tLJyW49qyrvUp5DkSHjt\nmpA9Lm1mLOVuDj61Wk8TWqr5SysxPUV57HqzKw54PB5qSZ2ADE5B5zTLO2m8UbWwcFfQ1Uuv\nE0jL8rYX0FcdJfNjA6ioXvG27mb8BQQdRceJppI9qyMuPSq8eoTTLlnwPrXOR6ipkIxUj3hU\ncGgZuSX8mRl8CmNeOz4DErWTDdrIp3E05b4JnOMe1OwjVafkfrUU1x0rOk1JVjZx096hh1Jb\nhc9KVgNeO6C8HpT21BQMIKwW1AMxA61GdQKjnANFgNw6k+egA71FJqh+6KwZNSJbH3RTP7QJ\namO5rrfSeYey1JNqTbQBgfjWG2pMowWAFRtfh8gHmlYLm9JO9wvJPvS/biqiPPFc4t7KvO40\n86g33iQadibm1NffMFztNL9qfhd/FYMmoBm+cgU1tQw2A1FhXZvC7ZWPzcVE125OA2T+tYUm\npYHBpP7RC7W3fMaLDubRvdrYYkntUjXRC5B4rnprwcEEk02PUHRSCcjNArs6AzHaTupn2wog\nwx5rEF8zN1wtDXDLznIoHc2muDHyW3HFQG53cniseS/LdODTReNIG+anYVzbN1jPzYFR/bl2\n43c1ifbDty35CmfbBJkgY9qQXNsXSsP9qmfaisg28+tY/wBqPTOKPteGHPNBVzWlkMgJyQvX\nNIk5DAhvcGswXWGKg/nSG7GCQeaVgNSW8ctzyTzSnUG4C5rHe8LbcUv2rL88DFFgNRrgSN8z\ncCnfaI/LyjA+1Y0k5bJP6UsLHbTsBqNdlR97NRf2ltYjkmqYkOR8pzVaZirFuc56UrBc1V1J\nl4HIp39qblxytYv2gq4wM5pxcgHsaVgNR79kOQ+4HvSnUN3esjDbeDxUikNEcnBoA1EvBg5a\nk849d2BWRGxPB6VKzMq8n5RTA0vtXduTT1vjGxx+dZfmlhwcD1oVmDY3cUAa8t4XXOcmk+0G\nRQQdvtWSwLbuafCzbccmmguaZlz0qdbnYnWsiOYr3JqVCVbk5FSFzTF18hycg0qzlhjdxWYJ\nAvGe9K03Py5xTFzGoJ9uRn8Ka10xGGOPQCs5ZmWTmpPNUZyc59aLDuaBvD8pzimtdt/e+lZx\nlJIGOO3NP9M80WAv/aHZcHmp/tBaHg1mmT5c7se1J5xUEZ+op2Fc0hdCPAzkGka6IyAcZrO8\n4NgcjHPFP85dxOeD0pFXLrTepzUhulVMDnNZzyBW5Izio/tG1eRQI147rbwWo+0Fe/FZK3kc\nnANSmT5dxPApDNQXR8uoXviOpB9Ky/tHmZO449Kj4kXcTRYLm5b35kPP86ka9OW54rEgVVbJ\nOPxqys4Py5osBqR3W7DMcCpFuk/GsaSb5dwPApvnF1yGxQBvNeDIyOKZ9qZpMg5WslZm2435\nFL9o2qcGgRq/amZvvYFPkuxHwD1rHE/yfeGaTzSxPNAXNZpuhXip49SaNcMcisSO4I6mjznb\nNBRtHUiNxByO1O/tjbGpByaxFlIzz1FN8zb1oA3P7Vk28H3qX+0JMDLfLjmsWOTOCamMx2+1\nIaNeLUm6I3WnSalKy43YNYokwwIOKnSZWHJwaBGqmqSKuGelOqSKcg5rH8085HPajztyUWC5\nttq8isMNVqHW3jb5m3+1c2JPlz1p/njrjnFFh3Osj8QPnIOF9DR/bHmNnODXKRysvU1IJm4+\naiwtTrf+EgKrsBqeHXlOFJya4n7SfM61Kl1hvalYZ2r+IEjcKUVlqT+24c58obT09q4uS4Zl\n+X9aX7U8ZGTkU7Cud0Nbt40wVH1py61a9doI+tcHHeFs7j1qWObaCS30osM7j+2LVvk2AH1z\nUi6jbFemPbNcC99gr82RU6X53Z3YosO53S31puHzfN3FTefav8wYkfWuDS9Y/Mzc09dUfcAG\nIFTqO6O63QsflkyKY0ccgK+ftrkI9SZRw5px1BpGBUkmjlYHXqirtIk3D1NTqquvzSBR2rif\n7TkdsbsYqzHqjYAZsrRYR2Hk9g4zSuoGMOK5Ma1LwA3FC6nMJP8AWZ9aQ9DqMM7fKQaVV+U5\nK5+tcx/bUkOdp5qVdYLLu3YzSEdCueyg0/a69VyKwI9YLL96po9d2ryeO9PURsbSqn5cUxvu\n5IrKj8Q4znle3NSJ4gjkU5pgaCYYcZzT1IbtiqK6vGqZIAzSrqkZYDg0mUXNoPU1HcW4aNjn\nNJ9uh25PIp/2qBlAIz9KQwhg/cgmn+WTwOlNjkj3Z3EL9aXzojnD4FAai+X8uV6CjaSRxTo5\nIdhG/ml68hwKQyPaTSEnr3qeNCy8c0vlnHTNAEO8hRTmfnkcU5o8YOMimsNwyQQM0hEa5LH0\nqTdgdcUrRoeRmmrGuzINIPMd5mF6ZNJuyM45pwUEA96cyhuhGKYhgwTikWVl6OVPsadt64NN\nWH+I9aB6ircSKchjmljndRjqPfmmN3PSmgH04pDLH2p+hUMPoKUXOP8Aln+VVd5XjFLuPFMC\nyptplxLHvH0p6x2G4EQJGenyqB+tVtxC+9OVgzfMtAFTVfCHh3WJvNudIsrmbGDJLECSK5jV\nvgR4E8QKq3WhWuQeGi+Qj247c12e0N1XAo8kKwPQU07BynkWofsc/Dq/aQiG6h3c4jlwK53V\nv2G/CckJOnalf2kvGHYq+B3GK+hFLrysmDQZps5Lk496r2klqQ4I+U9W/YVkWFzpviBXk6qL\nlB6/WuO1D9izxna7hBfWcpHO0gg/zr7ijvZI14A96c2oSSLgqar2sifZQPz01/8AZV+IelhN\nmmC+j/v27DH061yt58DvGmktLDceG9QD7uGRN69M4BH48V+mkd5tjIzhj+H8qkjmjkhIkIZh\nyM9vpVKrIPZwPye1Dw3q2jymO70+6t3jOGWWB1I/MfSqPzMoBG1iT1FfrFdaNpWpQOt3Y291\n5gw3moGzz3rltV+BfgnWriOW50SzYorKoWIYwwI545x29K1VZnP7FH5jtG8RXcCM+39anWP5\ncMAvGeCK/QjxB+yb4B1yMLHaPaHt5D7efeuMu/2FfDkkcrWerX0D4wm9twB/wq1W7keyZ8Xr\nCrMpBz61OgMMZ+XvxX0tqn7DWu2sZax1uG75+7IMNj2rmNR/ZJ8b6WGkSOG9Reio2Cfwp+1Q\nezaPFG+ZRn73rQrfNgjivQr/AOAfjjTY3mn8OXjRLyTCu4/lXK3XhbVdPKpLpl5GWGRuhbnm\nj2iDkZjltrgn7ppJMbcZyP5VPLb+WoVzyDhhyCPbkVVdCpba2MDBB45qk77CcbbiniPGRkVH\nI3y5LdahZmXr8xojctvA6CtDIWNhuAX6kmpvMwwy9Vj8+MDik3sjE8baZFi0rAMMfNVgfMhw\nMc1RiZjJkdKtxszA81XUnUm2uQAKNpAOeOMZpm7oSc0N84xmriSKp+YYO7196SZtxGTt54pA\noXkGmyfO+7IGKBkXJZufxpnljcGBJyaG3bsg5NODcjatAhsnDADI96C+0FvvD0okmbbkDg96\nY0wXaMZzSHcdG3A3Jge9DNtJ2HOe1Ju3cNnNNPyyLzR0HYl+4oPTHJo8zzJM9jTNzYLMMjNK\noZm3AhR2pgO+65xngd6OJF3qucdqMPuzuwaerBV5GaYajfM6HB5OMU5lxlM+/tTVwQQeDUay\nFmIK/KtITHqWaP1amSYXaM8+1D7g+RwmKZv28+9MAYYYgtxQzAgKrcUkn7w8Y9aZJnao7d6A\nJo/3eQTnIpqr1G3AzSnopHPak3YB3HI60EimNBnHT0pOqYB+U8UitzuU4NLuDZPQYoDqJ6L1\nHSnrGFjIboOgqFTuwy8AHipy3Q7sgmqGHllgp6AUxiu4kYB70oOd3pnH1pFjYqxCgkdaTE2J\nuO3aRwOlAJZSF49c0gbzFxnnuKU8KWPTPSkIWRQWCD68UMqthgB+FOXCYIXjvmm7Vj3Mo4PT\nFMdx24MvA57U9QAFB5PeoNyqyqCdp71JxgnpTC49vnG1BjJ60jbVXHp1NN8wowIGewFLI4Vg\nDwzenamIdu43fwdKNxZRk4oU7mK8EdqQybUK4wexNAA4+XCmmxnd04HQGkEexQRnd3NCsViK\nqO/OaQD2j+Xhvm9alikMa4cBm6AmmKpb7zALRIh2bjyR0FMBy7lG1yDk0gysjqfmyODTSy7A\nzZHtSrkMfzxQLqSRLuHJ5Ao2sWO87Tjim7doBb5c0hfzTyKBjtxVSu7OaPM2gAjNIsfBxims\n26MD+LNMRLkNJwPkxTtwMnzZC+1V1uBzwRg09vmXIbtmmMQtmQ5J2+9SK6nGFwBULSELgDcD\nTlLPEADls9aCWSjYrZZifQUsfzZOOnNMJ2sMc0NL5bbiePagQrN8231pQMvs2YP96mKzrHgr\n1Oc96erGP5mORVDHbjuyRtxxildlZgc4X1qOMndwd2ecmlJKscjcP5UBckbjb2B/ipGX5SVO\n0Ugcbdr8c0jZ5GcjqKQCrkruOTSE5+UqSD3oX7q+vpSyMXhzu71QhQ2MoenapOZFB/iHSoGU\ngnnJxkU6OR8gZwcZpEjtp3Mei9SKVclcYx3FJNIzlc8L3x3p2Q2Rk56igpDiAqnJycU126YO\nBjmlOF4HWk8ng5PB7Uhaiqw3bN2T1ojYMzBugHFJ8r4IGCBim7fmYjpjFIaHH5sAc07+EA03\n7qZ6cU5gJtoD4zTGSKCRuJojzgu2WNNkB57jpTEyFfaSKYEpkGMUuDtPzYPWmbAVyeCpzS8F\ndxHJHSmTYcnzNnGR602Rtrccj1FI0xCjHAxSxkKnTINNh1JI8sueAcU3IMIb+PPQ00/6w44F\nLH87buhpCsR/eZQy/MDmp1+9gcd6RsbiSRmmqp3DJxQUPVdzFRyD1NEjBWAI6elIzBUxtOM8\nkUv7piqjO3+8aCWCbg29BjPU0qKrAgk76iwfmRG71IjFfnH3sYpgO27WzkdMCljxtGTznk1G\nq45PAp0bfu2BAK/3qoBVkY72HC9qWMsyls89qYoEcZK9PSmqzCPOO9IZOM7Rz89OXdt5PK84\nqPcPL9/XvRu3N+HWq1EOVgzbh1brTmUqCCOOtRqCudoyaerSP9O9IYLuL9eMU5dvJJ+Yc0Kw\nLYPy9qY2GDA/KM9R3oAm3NIu49+gpdjeWFZsNnNNXa0YycN2oRgXwcsfWgCbIVvkG5qbJxKN\n33qRY2XIYY75pm5ixOOB0BoYEseN+M5frT1dWjPYk4NQiY7gcdalRl3LkcLzSGHzdPunHFPW\nQl+Rj1pkjhcM3POc96X5/LLY4NSBPt5BQ8dajaUhXzSbjHHgcjHHrTDIFXBGc0wRMZCYwQO3\npTI23R/J1zzTMeYmSSMdAKRSQ2F+71ouUTKx+6PmOadGzo5ycj0qKNzHGxbP4CnpMAysTz0p\nBcf5h3HaQOOaapPlgFcnqTURKbn2k4JpxYIcZycdqeoD/ODEZ+mMVIr7FJIyO1MZgqofXiiN\n25J5UjigGShtqgnqaTzjnb2pkbDA6sR2pUkVtx469qZNxcnzABwCetPmIWPb0bPNRF9pVSMZ\n5FKWDNkjce9MBytFvC9DjNSearKw6jtUYVdxxTFcMN20rg9KaAlUiTbkHANO3M0hLHPpTPMM\ncR3DBJyKSPO7fnnsKQEhbYu7Hzf3aacsAc5NJ9+Q+poaP95wdq45polkkfLZBxihfmyGHOet\nNTG08/l1psbGMAH5x1zQSTI/Xvg1JIz+SPWq6sVBYjatP84llBOOOlUALP0xnd0Jp5x15Yns\naiGdzccE9aWKQ84UnHegRNCqqhz3NLEyKzIPqajMiqoA6k81K2F5UcnimARttbIXjGRTmZ/l\nAALZzj2pqtu+7xg4OabjazENuI6GmBNG2RgfLzT1PzeoBqFZATtPBp/MfA579aALMjoqg9ya\nQZZTtOPeq6swYSYwR0FWY5txyV68fjVASwnyjsI3d6XzAkmMZBpEcbS3Q9DSKQvIORTAkhwV\nOAVGcZqUn5gB06GoNzLGecgmpVYKwx0pk2JshgQR0p7MNyZUnPANIrL0PpzUh+6MHPHApiFg\nbcW2HkdKnTftD8KvfFNt4dsahhtJPbrmrKwmPJI3DuKtBYUkbVZOSTjpVjySykN19qIS06lV\nUYxxUi7oxtJGMc0ySAx7RkAlehqMyNEzIFyh71amTYWGcjtVWb7g2imA5j90gDaOoprSEZG3\nPcUyNTI2AcetOPmIAQe+KB3HxgTSD+H2p6xgOVPC+tMLfvBzzinBd5yTz60AJ5ZbBU0km4gl\nuVpWk2qx9u1NhYSMpbIWkIlj27BtGBik2iPJPOaYrMzlUHHvQGLMR0NXuSydWDA7R0/ho++u\nW4PpTI28t8E4JHWlUjzOSWFIRLHGNvD4HpSk4bhvwquOVO3rmnY+Tcex4NMotMwUDPWomkC8\nvkrUTSFs+nrRGSyH5tw9KQFuOTdww256ZprN83zHIpgcSYZyRjtTmAY5HQ9KBkiE7CeoY5NM\nDL0brmk3+WQvQYprfe3Ebl9aBj2ysmMkik45K/SmxzLv3MPlx8ppFBPJbGTmgRJuNFN/4EKK\ndkI+KvL2BWA4/nS5BJIG3+lR7itqQcs27I9hSTv8pKgqcZ69a+bPRBgG5APXr60jSBpBgc9N\nopnmMPmK546VJCxkc7WwccgigBY9jYIJVwegpzMVZinC9wajWZFmdV+Xb1XHWjcsbbnO5eu2\ni4xQ7xyD59g6nPQ/WjI3gngM3IXpTZlBXOCGPQ06R1jCgED5enagXUk3BdwJwgWmLOY5EDrg\nt0waZu2MOdzbeOOlBiVdpibeR8xz60DJd28suMjOQPWhp9+4HDDpj0qIxudsisAmMUsQRgyk\nYz1osIlLdY3GflyG/pTlIbEg9OahOI0CrknoKdG3yjkKM4PuaY7EqSquXXlGXBxQrK0Zy2Dj\nFRR5J25CqPyNL5jjduXgdqQiVWbaVA3DbTFjJi3qcv3zQCJmDDcExjHvUcmUbKNlM4yKBDm2\nrGW8w46kVIrAFVIypGainmXPyjKEYNMnbBTB+YcDHpQUTF42ygbI7GlbeuGJ+bptHNMEa4LL\ngv7U2JtzBk+79elMZKJG3nIHHWla4+YDI5HGKhk4+XdnJ+96+1SnYwCqvtu9KaIH3DFlDbdp\n7GmqVmX7xO0Ywe1KJDGvzfNtP4UzYd3zOPm5wOtSMk3mONBywPapFkDTZQZUDH41GWffgYTj\nGO9Sp+7GzAPfdTQC5O0jBDe1O+8qvtPXvTIVKzDc2Ocg1JJsWYt/EDn2pDF+T5m2AEc4qOQ7\n87R37dqWSRywYjIb0pwUx5AIIY8mgZLksu0/eHNMWQrFuHJPUe1JG0i5YDK9M0i7oyhU8epo\nFYmTaFGw7c+tP5WMtswQecUkaj5mIy3Wm7mVceYHHU0AIysdzDgkc07HyruYn1FR7/MjBySM\n4zT2ZlZfpzQAm7bu+XPvTtv7wc9ulNO3duVsjutNbCsHUkZ45pjHtzGcrgjkU4rkqw+Zcc+1\nDRyRzLkZG3mlVvkYqMLnpTAVoxGqovIHOaQKNuG+U5zilLYIjI4I7mjjaRnce1IBskgDqq8j\n0FOT7zA8CkaPyenBAyaQsZAxwQO1ACtj12kcik3fKSOe55pZkDN1yRxTRlm4XEeMGlqA6Nlz\nnk9qerL5ZRjl88e3vUDKoBy2PapnZfJB74xnvVCEjV1b52+U+1OkYGMIvPNRiQrsjHTvUrEc\nkDjvSGNaHMi4Yn/ZprSK0oRSVJHJPTNPjb7vPyZ601mXyyABgGgCXb059jUe7buAGR60SNvR\nSBg96jTYWC5wetBLJeFxlsBuOaNyIuepHGKhz5ikbtwBzQ0m3Dj5geKQiZpmVVCpknq1IkjZ\nyq8d/pUKzGT5o9ykHp64p8kr+UeOXPI6cfWmNIla7PzFY8Z659Kb9qCgNt3EdgO1Vp2O1VZy\ngPGcU2O6ZpIzaxvdDowiUsT74A6UF2ZO9wu9x/B1yajd22jHrn8K6rSvhf4v8TXEf9maBcOs\nhBDvGVUZ6de1epeFf2KfGfiVRLf6jZ6arH5o1be6jtnpjn9KyclcqNObPBVmjiU+Yec4PP8A\nIVavoorW3gZHGJBkE44+vpX2Nof/AAT/ANDRoZtS1W8unT5Xj3qi57sMD3P6V6z4d/ZP8CaL\nDHGdFju41wTJcSFi2BjOM1PtEjdUXuz8249P1C8uNlpYT3hdtqeWhO76ECu50T9nT4k6/bi5\ntvDs0ELdPOIVj7YNfptpPw+0LTbaO3hsLWG3i/1aJEAV/Gt6Ozs412CLcB6k1Pti/Yn5yaD+\nxP441i6jN/dWtnFs3MqguV6cfWvUfDn/AAT20i38t9V1q91Bxy8UeIgx9PXivsyR4412hflP\nYVFJe7WG3hvWodRyKVNI8F0T9ifwDpe15NJWdwMASylhn1JJNeh6F8CfCGh2sUMek2i+Wdwx\nAp5H1612El+NxPWqX9oySN2xnmoLUUWIvDOk2rCSK2RWUbfugAj0xVmOG0t1xDBGG/vbRms5\nr5gxwcD2qFbx+pagteRrecqsNoA5zUv2sqnDbV9qwlvGbI/WhrhujH5fanYZtNdeYpG8nPXJ\nqB71F+UnOPXmslr0M2F4NUprgs/UmlYRty345IP5VUmvjgsCR+NYUl4VkKk8U2a8yuAeKW4z\nWm1YhRl+1U21oxgk81kSzb1wT0qnLMfLIpgzZm1sup9PrWbNq7zqVEmBWaX3KQDVZsr0q7EG\nhPMZo9rPmqzzfLtzxVaR+2cVC0hVDg07CuW49QKNtPSo59SwC23dVLcqrnrULtt4PNOwjSi1\nIzRnHyimNeEDGeO9ZnmbeowKillO0haVhXNUXSluRQ+qCJipJKelYslwRjnHFQyXj4PFAcxt\n/wBqLIpIbJ75qOW68zoa59pmxkGpFuyoCmnYd2aovjG+euKvR3ySqOcGuc+0bsr696Rrho+/\n0pW1EdDdXyxpkdvSqj6su7OcGsQXTFipbK9aTzBJ8w4FFhm7NdedCeeMVWtroxxbSxU5rPjk\nCjO6kkb5fvZosBqNfr1B574qP7cJecmswZKnFOViq+lMC5LeFu9C3RUHJqmrdd3SoGuOcdaL\nCNCS43ctTftO37tZz3AdSegFNa56ADFFgL73Ug75FJNcF0wDis2S4Kr1phu84waAuaQYk+tS\nTPtUetZK3LF+TxUjXBL8cigVy+Zj0PSofOCsMnvzVYXBU8HPsaZI4bBHXrTC5ofaAWznjNPM\nyq3J61l+aeo6USK7x5BpDNLzsxsQenWka4PAzwRWWu5eCetPWQds1IrF5WPPPNMSbLZxVXcd\n2d+BQs3l5xVAaEbbmyacMb+lV4btdo3DFWFmVlytAAwHO6gcYO3I9aWXlQc9aaWUYBOOKB3H\nbTv3Z4qMruJIGKTgqSp4pytvUgcCgYjYOOcAUZyvtTM7j7VJIOABwKkARivHUU9JGVNoHze9\nRh9vGKf5hZwRigCdZn8sdqrzO28FjkNT92cZPFQySLk96QhsiYGOhpf4cE80fakkOzHOOtRN\nKyMSaAJ4YR1JqTy0PtUEM29SaVboDIbp6UDLCqFximytjqOKqtddW5A9Kb9o+Y7mzmkFy4WG\n0gr9BTvNU4JADelZhkbcPn3DPSgybcgdKQaml5it7U9cR5YHctZazHaMGpBLIYyA3FUgNSHa\n3z9KerK4bHNYvmvz82Klt7ho+SaLAaQ2n2pyrGVy54qj9sC8jmkE369qYGjBHEd20k/7xp0Z\nST7xxWe0/OwHC/zo+0GFs9vSgRrpbxlgMkipPJRVYdDmsj+0n8rC8HOc1NHeSdXbrS6jNLyY\nWXAPI6mhreP7u4/Wsxr5sHbyBUqagzADgEimIu/Yo15DE+tJJbx7geQuKz/tz884FObUZJWA\nJ+UVIF1rdGU4PPrVaVSBt60v2pmXA4NVGuHZ+tUMkhXywRjBJqZeMhs1Cz8BifzpQzFfmOKX\nUZKFVs44NDLuXK/lUe054NSbgynaeKYrDgxLdKd5gYZHBqJn6c0m4FsDgetK4akufU8VLuXj\nbVfb83LZFSow9OaRQ7lc45BoZvypnIJFIW+X+dNAG0k5BwKeudwpkWdrEn6UuSQDn5qQE67u\npHGaXcM7t34VEN3IY5HWlyDSGSlTuz0oUFsg9e1RxsdvJp+dvzGmImLHjFPRmz7VVVz1J96g\nkll8zMZ6djQM0kkBk54pwds+lVLeRplDMMGrQk6CnbqIly27BpXbaMU8wliEDhT71ck0WaSE\nOvzD+8vSgRngkLnsaeWLYwcVYjsZUhct0XvmmfI2B0OKkZHuLZINLHIQeRzStheBxS7Rt+9k\nmmNhx1zQH3dKbgRnBGalWB1ZcqVz0piFSRqduZu9K1tKq52nb60/7O6qDg560rajuCjK9M0N\nlqSOGQ5YDAoZWVc5qibsjCjksO+aVX207afu9e9N/i9KXUdyVZiy80ok/Go/4AcUq4Vs9RTA\nmWQr06VILpkPBqvvIwoGfel2hFPOe9ILlnzCzbgeaf8AaPMbngVT+9znFOx054pWHctNcbWw\nO1O+2MwyetU2O1c96ccbeuBinYC6snHJojl5YdutU0f5QSaXzNuRjmlYdzQjm6c+9TRynJ56\n1mRynINTRuRye9TYC4W+bjrUkMhXIJFUgzbsjpTzIvPY0WAuecZGxnNSxz+X1NZqzGNTkc05\nrj93gDmlYq5rrdbVAPNSi425wxwaxre4PRuat/alUYyMUrDuaP2p/wC8QKkivAqjLZrJ/tCO\nPljkU1byMnk4pWC5uSX442ce9OjvirD5s+1YceoxZ4OQKeuqxGTAHPrRYOa25vJfOq5BK+1T\nf2ge7nNc+2oqzAhuKc+pxrhcZajlYXN9NULKFJ5zVmPUNyHnjNc1DfK+SRinLfY5yOtLlHc6\nf7ajL1FC3kOAuMH1rmpL1MZLUn2wNgKxApcojqUmQtnqtHmRZIHBrno9QCqct+AqWG+O3O4f\njSA3lIYdaeIyxxuBrCGrbVAzipBq3fNBZsbCTtx0prqWxt6VTh1NZACzYqb7YqjA5HWpAk2h\nmwaI/LXrmoftYVgcZzQtwCST0pgT8MTg8VJjcox1qKOVSpp6zpt29D60wJtp9MikbO3ikjde\nR5lN8wbcZ5pIBdrZz0prA9xipwTJHx2oweMigCIL3pNx5qVmAyCKbtDDJGKBDcBlPFM9TjGa\nftBTavWjbtWgQxY8YOcGnNuH8ZFLt3YNNkJyMdKAI1u5PP2q2Rirkd5L5eTk/U1TjiEbFh1N\nS5OcE0wLZvsIGKflUi6jGxBIwfcCqfPSoZF+bkcUC0NoXsbAjOcjB96pXGn2Ux3PbRSNjGXQ\nH+lUl9qG68sRQFijffDPwtrULx3WjWRRzltsK8k9+lcDr37JPgDVWxHYyWrMCBJDIQc+9enR\nySno+RUy3k0YAIoUrD5Uz541D9hfQJlK2mr30L9N0jBgR3HT9a4bxB+wzrcNxNJomtwS2eTs\nW84kAzwCR14x+tfYYvuu8HnrUq6lET2HOelae2aM/ZxPz71r9kn4h6TAJYLKLUFwciGTkfh7\n/wBK4e8+DfjbS2kW58Nakmzlj5RIGAT1APYGv1Eju1bHIBxgetPKxyLh0R19wDWyrSIdOKPy\nUubO90y6MNzbTWzY4jmjZT1PPTnp/wDqqxbx/KPm69sgE/hmv1K1DwXoWqc3Wl2s3JI8yPdj\nPXGen4Vyev8A7PngPxBblJtCt4pCMCS1HlsDnOeDyav2zI9knsfnI0e4A7sfTkUx1YMCVKp6\n+tfb+sfsY+FL51a2lvLSRRjcrgq3Ocn9a4zxF+xLdeWp0TV1C5wyXSk59CCOn5VpGuRKifJ0\nshC+nNNZsY717TqX7Jvj2xkkxDa3YXO4pLggDnOCK4bVPgr410kSebod2yqRtkVNytnGMY7c\n1t7SPcx9jPocR522RvSl84NgfyqzeeG9VsRIs9lLA2WB81doyoJIBPU8dKynuEikVCfvZ9M8\nde/+eKpST2M5Qkt0XPN3HYc7RTfv8jgr2qKFjIu4dDzRu3cqMqOpzzVEcrJxlV+bkt0ok+9t\nzjvUUbBQWY8dRmjzSu7uegNAE7ZZRg4yDTlx5Qzyai84rjd09qRnOTzyaOgiyqhl3E5NJ5gK\ngc5zUUczKuMYpVYnJzjFA7jo2+aTI5HSmM20HnqKkCqY+WwzUxo9iKc7vWqEIWYKOfl96bzI\nwz92pJGB2hRnNNb7pH3QKLCHSN5bcDAx271FLjcuXx2qQs21QV5pFUKpbGSD3oAGzGNg+970\n0FWUj8KayhcMDg5zimr8zsOncUAII/mwc7RTt+WCqvy96XDeX1pVVwwTjB5zQAi7P4RjtihQ\ndu1TzS7VIOGwc0LHtySePWqGxMGLqOO319akVti8HJahvu4bkjn2pocE5zhum2pZIbV3HHBp\nORICRlRS5+Zl24PrSeYNuD09aYx6zHkDgVGd23hutLu+YYPGKX+He33KYrCbW46Mg7DrUjqS\nynAyaYriSUFflU8U9m3cLztNAWEVwzZZcDpil9cDntUbFnb09aWEGP5iaBCxx5VgjYPWnMSx\nHOcUw43EgYGe1LvUr0280wF8xmU8+1NLZA3DPsKSPbhgT8ppIwIzkDketAE20H5eoxyD2pEZ\ntwOCQOOaXIkUgHDdT60isDGQxIcdBQA4F2ZnAAIpiuwhyeDmlDbUJIOW4203gKf7yjpQxEsZ\nEuDvz7GkbPXNMjZVX/aIzxTmkZowMYYetAx+7coAGPeiQFWOMEimjK4wfvU5VaNiByT1NVcX\nURnVVA6n0FKFRfmAx9elNdlZvb1FDHEO3+EnIoGG7bxjHNKrN9FpdoZhuxkDim7f3ec4yaBE\n7bTH8v36YGDc7c/WkjXa4YHcO9OXbuOWAHUUCAs3JJwtDTbVBYZXpSD52CbgT3+lPeRVGQmS\nvTNA7iRKTlemecCn7ivypz70i5ZWkUjdjmolkEce4narfzoC5NuKne4Gewpu7O4suOM0wKWw\nGPzYzzUjNwFI3dyapEgVzhlPBpikchh36Cl8zcFxwKeFXdljgEYNMQ1oztyWwaew6qv+sxmh\nim4KDkDoaQyBnZl7CpELkrtUnJ64pVmDMWxyKZAjM2SOeuacBnLH5R0wKY0OOG2t3p38OcZq\nHcI5sA5GKf8AOv3ehGSTRYESbhuyV2g96VueMcY4qONi0YD+veny8MozytSMazDnIyBwBT9o\nwoGAeoqJsMhB+madtjjxhsk8UDH7XKls4FCMVTn+Km7u4b5R2pdpKFg3PZaYxsjEFmJ+c9B6\n0+N25Zxg4xj0pgUkDcN23nNPTqSOB/WmSyONvnwxyPSpA4+8enoKaTuXGMNnGRSSBt3yj5Qe\nT2pk2HFucAYJ70scjKCWOTSu3zAZyKYsm5yCuFplDm2lgTxxmnrI0y9e3FQZ8tMMST2qQKyx\njkHFSJj1QjB6+ozTQxbKkY5pVU7cs2R1Ipqg+Q3OCTx9KYyXcVB459qbHIMENxRgrtUtzilW\nEZGetVFAOViW+bijdt3f3TTG29MHinNnaF6AmgBq7SCGzijHcHPtSSSBsgduDT/K3LkcfSgB\nY95TOOPSntIM4x8vXj+VMjwrZ3dRjFKqb8DOMVXQBd3Qjp1PtT2dtpZPu0xVBBw3y9/el2gL\nkcr6CpEPE26NQRzSjbsORUcjowDrkDpThl1HbmmA6NdzZzj/AGaUsfMyBTJFJbPftTlLEEDJ\n7mmMmikY5ydwFIrDnPzGkjU7Sy9KdGu5y2QAO1IRHJMGVVAxzT/MO4A/d7mk2jox255Bp21N\noUNuOeaQx52M2C2MjvSx7mGwHGBxTZFSRlfI2rxj3oebzFG1drKetA7AhbzMLyf9qnuyeYoK\n554qNVZcE/NnmiYFhnPz5oGSNyxKnjptqJcRqcg7mP5UrMu0fKT6mk3Nt7kDpQMlLOoJA3Do\nRQqBjzwMd6j804HpSFw4C4JNAEsbDdnp2xSswVtxXg8A02OMswI59acuGQjPGeKBD4V3Ntbq\nMmlDjZ09qhV+g5z1p7DaOep5xQKxIZFLHkdOlLGwYZAyccmo2wWyAOBSKfL3c/Ke1AWHtIGx\nnoOhNNbOcK+0nmmCQsxTGF605WDZJHzehpjJsfMu7jjpStIFjLEDOcVByzBmGB60vznOVyM0\niSZWEi7upoEgLYPBqHAVjn5e2KaAy7sHOO9AFzzIsnJw1RKvO4NuFRMymHkZaneZ5YUdWPXH\nSqJY+MkMcDinbuSCdq0xW6Ddg5pXjO4YwfWmBKyBkCklgOadtEOHI3VGjGRWAPzCnlgynJyw\npgJ5irJwDz2o3yM5A+VaZIjDvzTgjPGAThhQSTxgYIbGf1oZtq5546etQGMqw4zx96plUdea\nYgVyRyNrd/enriNcYpjMsz4zhh3pyE+oJpgKQSoXO1vWpVjC9TlvWoeFUk8tThI2wPjjpQA4\nMy5J+b2qxJnajdwOlQBhwzcE9qcuSpHfsaoZMGLKD055zUu0sQFOKhHKhSccZzT40P3QTnNA\niy2Vb5SCO9OjP77HX0qPZtOc5pSrKyMnBpiLQUx5GRzUu0llI9KbGgY5OPxqZSFypqiLk1vJ\ntUbsN71bjYMwIbIxVFFRV9au2YfGPl2/rVoRYh2gZUkL3pwQBSG43UiKVBGflJp27JIPNaWI\nHXAaJ16EEVWk/d87csfSp23SYOeAKhlYrH8pyaB3I9wVSMYNRrIxYruHToaaefmyWNPUiTOw\ngnHeoKBMnh+vtSqGx852jPWodz9iF55qxu56huOKoYuNzFSOMdKjZRkNswPSlRT689RS5ZST\n2oQgDKnKflQzfPuA4x0pjbtwfI+lPbBXcaoTDJKlifoKabgFQoznNIrKo6ZppxuyDtNAE5+U\nEAZ9xTo13KFGai8ltpKv1pVmKYUct3NIQ+P52KvwRUittXaANveoGjO4nqDQi7fl5HrSsMne\nYZwCAKVGwOTjmotqlfM4ODS+WWUHqOtMCQSbsswyM4zSeafJIzzmmBTuAJwvpSn92cY4pgKW\nZio24HYU9cZ+YA803zAuB39aDImcHp1oAXcPQUVJvh9BRQB8SM5kZVVGC5+8aGjZ2A6KD96l\n+0NIdqjbihnVY23g7jXzR6QjbdwdWJbpt7U/YysWA+U9TTUkLxgIgQY+8aSRkOza21weVz+t\nArCltrNvACt0NCsghAA3sDSSXCKp3c+jYqLb96QE/dpDLHn7oyccj+GmMg4BGGYcZqLDMoC8\nNnJ+lHmbowTlnP6Uxk6srXBKnhflNMj4ZtvA5pEbLHPyJ1OOtL9o+QtHhsnHvQAKvy7V+71x\n2pThl3qRnoaGL+ZsBAGPTigRpGM4JGKBDVZtxwc8Urfu1QEZUnk02E7SPlwPXvTiw3Y3bec0\nCHzMsnK8KDTmLHYd3HrUSqG3AtkMetEhEfyjkryDVWAlRhE2WBznj2pI3RWbBJ3dRUNzLJ8m\n1cxdd3vT2Zt+WGD6VIBsPQfdzSyMPvKm8Dv3p0ZZpFwwQ0hHyko3zE9KCghjG5pFfbg8inbV\nWNoxyH6AdqYBuDPnbg5A96cN4iDFPmPJ56UyR3LFF2/dHf2pfLMjFVcjjOBTIpgzMzEqSMKS\nOAacuYRhzhs9RzxSEPdBLGCrnHenceWSOmMZ70xFKqSOVJ4qUDzNqggFfmxQMXzDGiqikjj8\nKVZA2/B4HGKFuFkJVgdx429Me9MjiEY3Z8zB6UbDJuGChuCBx60kjfvACOSOajz5qlv4j27i\npPtG19uzedvU0wFjby168UkUhPAGB1HvTQzTKVwEU9W/pQrIkmwHkcZpCLCs+CelCDj5uV65\nzUMZbBJkwuabuSNfmJ2k8UwZY3mK4Ln7jDAApY1VWKkcHnFIJHyoOMYyKev7wNxgj+KmNDuW\nYLuGPQU1pDjaOTnk0u3dgcKeu6hwrktjYF7eposAeWFkK9CehpMFlzt3KDzTdobJDZ7n29qc\n28EAD5SPWlYZIrH746jgZobaVUDjJ5NNVSykg8L2pP8AWNkcY7GkBIyguI8ZI6ml8tIVIXIG\neveo4HdtzjgdzTwx4YtQA+VmK9S5749KazDd3GR1o8xlYuPvEYzSySkFAoHI5poBV2IZTx8v\namLcKocLkcZpZVTcdi7QPmPv7UxpPMbew255C44FNiFPzMrFckjpSqdzfSmJM21gRx3NNaQK\nwKN8npUajHKwabJOQKVJGVmAbJ/Somk2kvikabcwAXnNMZIcbTk59abI+2MAMM56U1iG3HOM\ncke1QxTRyMcYZ9ucdaYrMm8xz3K/XpTfMXbvK8Zxntj61q+H/CeueKrxY9K0u6uZOSCsbFcD\nuTjivXfBP7HnjbxZcE6g1tpdoeWYksxHqoOM0pSSKjCTPCpbj7OokQjyhx7H296ekkly221h\nkmDHjYhY+wwPavu/wT+w74Z0HbNqbPqbMmySGXG3OTkqB0yMdfevZfC3wR8LeGoktrDQ7SG2\nXkB4VZvrk1g6hvGg+p+cXhz4M+PfE88EVl4bvAkhBVpI9i49ST0Fes+G/wBiLxXrBjk1rUYt\nOtmYhoogS4Pp9MZ/Gvv238PwWyjCJGo4AUY6fSrf2eJf4c++az9ozZUbHzB4W/Yc8G6fHCL9\nZ9ZbliZX2k85x2xjNeu6D8CvCnh9oTa6JaRJEuwAxqTj3969H+0be3y1BJeY561PM2a8iRQs\n/C2mWmPKgWI9PkXH4f59aurY20EgeNQrDjgY49P8+lQveHg44xULXGTnOKgLJGjLMBjI4H+c\n1Wmumjz3GapyXR4JORUDXBz97rTsVdmi1wN2RwD1pPtRBPoao+ePXFQtdbSRmmBflm9yDVea\nYdCcVTlvAfmz0qnNebhkmgRoySIq7i3FVhMrZA+XvWa1182W+7TJLpShGcg1QmWpbsxq3HJN\nNF5uUE8Vky34Ztp6dqaLkButFguajahuBKkg/wA6b9uZ+Ky5LjY3qKqzXzLgA8U7BzGzJcHa\ncEg+tU5Lx92c4NUFvWC5ao5rrI4HNPlFzF9rgtkniq810FUgtWcb0sxTOPaopJ1HBNVyi5jQ\nku8x9fpVWS5Jxz25qjNNv4BwBUTTbV5PfrRyk8xfSYKCO5pqyFY8k55rOkvNsbEDcRUS33mr\nkfdxV8qC5oS3Ss2QKryzH8KrrNkYzUMkxbODxS5SblrzgF56VF5wbqKrM/7v15pGlULTsK5P\nJMAoB5qNnXdntiq8jbvpTT833TS8haksmOjCovKaQEAfnQSx6nikaYqPlOKLIpFWTHO3tUe7\nGCTSyH5sg/Wod4L/ADdKAH7t7daVn25GOKhb5ZAQeKXd82e1Ax0cm7OOtSrkDP6VArqudvWl\n87b945qbCLHG33pqjK5JzURm3Dg0xZsbhg0WAtrIFHLUw3IdSAOaoNMsgLA1HvLH5TiqAvtc\nKpBLc+lQmbdnAwPWq0PL5+8BTztZvl4HvRcoSSQ9ulBcMoOabuG7aMcUbh6VLEIF+bjpT1K7\nj8lR5K9+KUfMRzxQIVpPUcUbscd+1K/fIxSBKLDJV6cj8qYw4JzQrnBFCpwc0CGx/N04FXLe\nHzeKgiRVdNx+TPIre0xbNbqHzcPbBwXAPOPSi1xkcmhrJabi+3PfFZ91pws0yZQTivRGm8Ix\n2c1yk8n2oEiOzf7vTr0qOX4jadY6e0Ol6BZW8zja07qH3DGCceta8qRHM7nm3l7eDyf8809R\nuyMY4prsGkG0bV+lLHKDuBIrItgWZFAI4pXd+NpwKZJMsjYJIHYUwcNnqMetAFv7QWwtSYEw\nxnms0Ha5wcDrVq3uFC/1oC5dTGOlI3ypxxUXnbSDniklbau7dxQA4sFXJ45pwYlc9s1VmbzF\nqRWO0D0qRXLDtgcGmbguCp571C3zck4pocdc8Uii3JMixkkVSklUsWXpTZJi6kADFV2HtzQI\nmST5tx4xQZu+arrjPHSneWdxIPHvQMk8zHIJqL5iwbNKPvU5cbfU0AP3lutRMrcjnmnhty/L\n2pzKduRSAaq7SMjBp20hjuoZSxBbNEitJxjA9aLDuHtn5afuO3appix+XgFfxqVVGetMQKDs\nAY5pVbb15zSGTawULnNJz5hyce1AuopyOnWnI53AnrTNpaTg0oY9xzmgYvmHOT61IrFlOcE0\n1lLZx060GErgnvQBJHnaMnmpEYSHBPSo1jPWlVQc8YNAEm7cpPahWUjnik6LR5ZK9qAHNhVB\nzxUm7KfKKZjau0DIpy/e9BQMeszMpHelbIXt0psfzMTnAxSfN25qQGxuzfe5FT7iVGTxTRHu\n7YoVRuwo/OgCRpeoByKI2GMj8aYykdAMUi0Bcmzub2pY9vQ1CuVyOpp27A5PNICwqjcAKenG\nT1FQM5DKAc+tPyV6dKfQCfcD25pm3K4zzQ5wqkGk3LtOeDQMXZ8vWhW5GRzSKwRcdT1oVj2H\nXrTFsS/e4GTmlZaZHlcDp9KkOfxpdRjht288Go9xxgn5aRuTikVeik5FAEgYbcdajC7myKXB\nXJxgUmTH0HWhASqfLTAqRJCFzjOKh39AOlP3H7oNUMl8wyJyauQalNDb+Urssf8AdzVBcqMY\n5oDHOM0AaP8AaEskPlB8L6VCZcj3qoch8il3HdSsBbjmycEGpfMGRxxVOOQ7s0ryMef1pAXY\n5C3A69ia6i78YR3mlW9k+nxRrHj94vUkdT+Nces3y4zinNI74z0poR29v4k0WGEh9La5cjlm\nfH6VHc67plwoEVqbcdPvZrj1bb34okkJVfTNMDae/iZm2/lUD3aMv3eMZrKDnBxwTT1lC8Ut\nRam6t1YSWqhlYyY6r2rPk2ecPLyM+tVlYSDHSpMndwe2KAOj0vTbFtFlvru+SN0YqsPc/hUl\nvDpczqPtAUt/eXj865VmPmluueBSspVcetUB2yeHLC4m3RahHj+6pBplx4bikLKl1CpAzyw5\nri8eWODgd6PNdV+V/l/WnYDX/s9/NEaDec44rYj8J3t3aieOICPOOtcvHfSxqpDlT61dj169\nSEIl1IqjnAbilqBrTeGJ44y7qwUd8cVm3Fi0bYAyfQUN4m1CSPypbl5Iz/CTxTI9YkX5WAYU\n+gCeQVzkfX2qPa69eAasR6z5XGzcO+atTeJvOt/J+xxgY+/3FK7KsUY1Kj0qTnbwc0keoQ4+\nZM+lJHdRPJyMA0gF3OvPIpfmPOcelSiSJuhzinb48Y60tRkPmMRwOaTcy4FWPLRud2D2qvN+\n7XrTAczE89KjZ8YqMOT1NStkqOARUsQm716Ghm+WlZScdxTlVQuTSLGIo2k0qqVYk8fSnKit\nnHShl42k0EsdnjIzSLuDZzzTlToc8U4kenegpDtzL3JpBMfU03J554zS7lVfU0AK0zqeMkVO\ntw+3HeoE44NOUepyamwFjzG3BicEihrhgMg81CSN/oAKTFTYdydroleTzTvtuyMc5NViAc1H\ntOMnpSHc0Ir4Be9XrW+LDl+KwGbgelWEkLbccYosB08MnncjoKJZmVeW+WsezvGibH3lq9JJ\n5y4PA68VPKMvwXJdQe1TPIcdcVRDBUXBxx0pWk+YDdxRYC6Lk7hkVKtx83tWerBs55qSM/KB\n3NFhmqt0oYYNS/ajnI/GsofKRg5p7M3rRYk0/tXzZxxSC6U9qzVmKd80LNuz2+tFgNdWVvm6\nU1mVQcVQFzxjNPFwWXtS1GWlZj7ClWq6zYxT45xna3BoAmzQelM81eAeDTi/PI59jQITnb1o\nGSDQv3TTlb5fSnYLEcjMmAq0/hlG4c07cVOexpWO7oKBjVUduKGf5s04AjjHFJj1ot1DoJvD\n+9G1MHAGaTA5xSqM4oYhGjViDkqaYsLxszLI1SlDtJHNOGVXOGx9KAFjvJ1wpGQKlTUmBwyY\nqDcNvvQZA3BxTXYDRj1SP+I4H1p4ukPKsMnisnCEY24pTGONvFAG0uzaWba2T0wKJoYrmMrI\nu4MMFe1YbLKudrHH1psmpXFrECe3ei4EmseCdC1a0kt73TbWeGQYKyRD/Oa4PXP2ZvAHiGJo\n5NEhiB6NGdrD8c13drrD3A3yjeKvw3i7gc7QfzpqVgcebc+cta/YV8MSwyGyv72FjwqhlYD0\n49BXm2v/ALCfiCxiMml61b3kin7si7CR6dTX28t8pYqpJ/GnLKW6mtY1pmTpxsfmxrv7Mnj/\nAEdXkl0Xz4R1eGTcD7CuD1HwVrultsvdIvLMn5gJIucV+su5ZIwMAL6VX1DRLDUgi3VtDNGp\nzh1zmtViO5hKgj8j5ElgYLKMYOD8p6U1ZB1HzAd8V+o3iT4JeEfFquLzSbchupVACeMdfavL\nvEX7E/hS/Vjp93eaZOcANGwI4HoeK2jXTMZUWtj4MEasowxyTmpOeRleOOTX0t4s/Yi1/TY9\n+katBevub93INrH0wcmvOtW/Zp8fabGSdIWfgsGikB6HHIrVVImfs3c8uTuDxjvSxk9Ccq3e\nr2ueGdW0ORodQsLiylH8EqEEj1rM84eUOMkcYXmqUkxcpIZGiBPANRrM7R7ZDnJp0yOVGUI4\n5yMH8qqRzHccDeAa1smiXGxb3DjnBHvUqyBgccjqarnYvznLN+lHmjtxSJJmkjXqOT0ppbC5\nHDU3d1ONy9s1GshVt2PzpXAmSU5wTil+6vPX1zUW/PBGecg05nJzx9adwHP+5HTdQpDnIHy+\nlNjYsMlSeDTY3IYdfpQBKMr82cr6VEZA2SRyT6VJwGLHp6U1sn2oAXd5ZHy/Sm7myT1FLI/y\noc5xwaTzCufl4PrV6AGxmw4YCnxtkkA7jUMeWfGMVImFOFpbAOaVeCTmhSeSp98U2ZUZeDg5\n5FNVdp3ryTwKBi7mIJzg+lKuWU9cU1mKt83yihfu4U8MetBLHKoKBs7jn7velkO2Tg5XHSmB\ncN/dx+tOCrkcYpiARp5WRkEHNPVRLGGVuemelNZtykD5h6U3zD5ag8gd+lGoEm3y+S2T0BoZ\nsKT97jmgMwULjI6io3YqMgd6BEo+6u0/N15pVUs2See5pv3WDZ4xxRGxbjGDSAc2wLuHHvTo\n2ZzkjGRio2+9tIz3+tK3zEEttOelAx7csGByAMfjTF3vnB496TmNiFAo3FYjlufSqEOl4TH3\nSOc05ZAVDnn1qNcNENzfNT/usMn5fT3piAMgPQ5o4BUZypNO2bVYsPnH8NRu6bQAOc5pj6Ek\nI3MTnB9BT93mK3HbpUQBG2QDO09qRpCNxBwzH7tNIkkjwH3HrjFLHNufB6dKZGuwBjz61JL+\n9RVj4Ock0rCHcrIx6DoaTjK8Zx0qJ3dVCsQcnFHDMSvHbntTsMk+ZXPO5aTcxUbqbkKAu7mk\nXBPdjmgLIlhweoxz2p7fM2FHIHeoizQ7sdM04MWcFumO1MB6sPK3jk5waRmO3AAGab5ZYlei\n9sGg/NGFHJB+9UgPOS4AboOlHDfMeR0psPzMTnOM1Ifu5BHX9aGITy/LjJxlqVmPlhWypxmo\n2ZmYc9OtSSONq5Oc0g1EbEgAxwBkYpwk2qDty3SmRyKrHceOgpxJOGzgg0xj2ZmIA6dxTl2u\npBAIB4NRBwr8H5vWhWG87evegY9imw4GKFxuypNI2CwwOM0ryLwdvPtQAqnYxJ574oyJFOBt\nGc0hYFgzDANObC4C8DuaoQ1hwM/n6UIy7SwbABpTnGU+f3qNmCKH2fMe1Axx/eOMnAI609VM\nWB29TUUh8wxvjjOKkx95i+U6BaEJimTcxzginRsnO4H1qPdGMbRk0STCSMFRjnmkIlBXyw2e\nM8fSmxsXc+nak3rsKkcdqSOQbDg9PWmMk8s5YnOaduC/NjmoxI2AT+BpGm3OUCnpktVIRJuw\nhbPPoaVpf3at05qBssdq8HHJNPRlUBMbsck0hki7TG3Q96ljcGMAcE81XbaOegJ7VIoXd8wz\nTJAMoUj+LNOCmopFXyyUBJp65kVQCQaYaj42VeCKXaORnAPOahA2Z3fezintIVUlh8vpSKJV\ndWXgbQKWOTdgMucGo+GVHX7pp6ttySBjpSEwYFWwPXNTMxjT3xg1FywK/rSvGDGBnvVFWHxs\nePl+lN+6u4H6ijzBEMMOD0NN25BdMdelILEqvhNrLnP6UeYuOF6cCmMrtt54zTpGRXI6mmJD\nocCFwy4bqPamF98i5yAOtIrbZiCC2R96nBmJIC7sfxdqRQ9VZlZlznrxTWB3dyc0kcjFsfdp\nsziHIByWOSaARMOpbqMU3czdWx3pFUHvTZm3MvGSD0FLURJ1XGQD1pwZWj6/jTBh0ORtOenp\nQkaJuJfqMAe9AE/nMsnGGytM3DqGAx2qKNjH14IFHmGVuQPbFMRPG7HBJAxxT4d0zHdyQMiq\no3LNk8oB0qVZgp+919KAuPjcsPTnmkdQ3BPzZyPeo87PbJo8zZICcbfUUAPkX5iRxxScKvzH\n5j1oPyg8gmjzAGHAoAfIxWNeeCetStIFXk544qu0mSc/MvYU5MbCeo7CmSxyR+Y+7IJxTId2\n7PfnOaa8isvAKsPSkx935sbuq+tFgJ8M2Rt2t0AprR5YBTnHU+9HmYO7otKq7Y9+7C5p6iCM\n+ZuO3G2nuS6xiNtrscUkjCKMYPU07cPv7fYUxDkkMO7aDkjaacmducbjTGdlI6Yx0pA6qvJy\n/pVIESElinOCDzTpJBk/0qCMlm2lSe+6pIWGQo555zRsNku5kUbj8tSIwC9cKe9R7cyMpOBU\niA5YHlMUyBPl24+9z96nrFzkjj2ps2A8YH3cdBSsxYFd209qYw3IuQDz2qSNS6iMthqj8soV\nOBU6/MpbbQAipuZNzcjrinyY3FuTk4+lLGq7fTAxQ37yMLjAHekihVzhh9/IwBVmObavK7Wx\ng1UVWK7gePSrCnLDvxyKsRKrNt3Ecdak2MTvBytNjbzUbPCnjFPjxtKqSRnmgklWRfLCrySe\ntTiQrjAqtHGGjO3GQat2rLJHu28r2PWqEyeMK6k5+Yc4q1B87A4wMVV2OdrKNvPIq3AMA54y\na0iZlyMEL0yM1Iuw5A4aq6sxYL6c1NDmSY4rUBFyoYHjPSonLxowOOlWGj5JxTJoTIATiiwi\ngsgUY281GVDDKjBzzU11GOCMZWoOR0JwahjEZh5ZzjOalLbUTA470xlj2gueaa0iyLtHFCGS\nmTKkjg0n3uGIHvUSgM4XGF7mnbVySxPtSC2pMkO3HP4Us3TYajjZtw4pF3FvlO5s96YpA6D5\nQsmz1pArMgBYHmiaMuxGOKkWP5fm+aqQDHkdeAtPjkCruI56Ujpuxt49KBGyqewPOaAJI8M3\nzHFICQzB+AOM+tN4VVZm6H86c0h3YY8UAP8A3a7Sxwqj8KfCxddwOPaoGkHmrGfmB7U/7r/L\n9M9h70wJGyrEMMnGRTYZPMB3cYpSdu3D7iP1puDy5XaT/DUgPgjZslhSKVZ8EfjQzMo3A474\npvCqc9TziqsBL8npRUGX/uUUWA+LEcqrjPI4DYqZrkycmPay8L/te9NmPlxYyvz4OPakmkUs\nmCoH8q+ZPSHNhncFvLZeTRiPae644qLzg0jH5WB/i9KcNy7g6jLHhs0w1G5MyKerL+gqNx5y\ngOe+cD0pzIVkOw5HRsc0Rv0Xau0nG6kAKg3FkO1R6npREwY7Vzs/vGlVdqsiOAueG96jVDCM\nPkqepXvQMm3EZbGVPG70pi/LMFGB7+tOMiKq5bAz0FRed8jv5eSrfnQA9yN7AA8nA5pSxjkE\natkng+lJHubLnhiOBTS23apX5hzmmBKzencUny7V3pnB70zzDJiMHB6g048Rrubcc8+opEsU\no/O3pmhZHQklNwH8VNCljy29B0NK0bxwjP3WbigEOWZVjLAcelO3AYj68ZJ9KSNVXcQd3+zQ\n7Fo9xA9DiiwDo9rRlw2WX+H1FKzZUdj2pgUBg6nIX5SPen7WTcCFbA3H2oKGr88gxxmn7yGA\nA284IqKNg0mVB3Z5+lDTDduAwh4H1oETq24tjg9BmljyZmDYZeuOwNRQgFXYsRTI38lWBbKt\n09aBE235ix3LjogqWNPLUFxl2IOM9qjSfeyRuu0r19xTmkbzH4G1etNFEsi7rh+QU9qQKu4f\nMR6io4cR+aucqxyBTmd/3bfgKQh7Sbec98CiXowzkEZ96jZtzgsu4HqRQ8YEeXOGB7fpSGKZ\nGaNlwd68Cn7n4DrklucVCsbbWYnAanBiuO2CKYiZvlXCjHNOYiQcjFRxf6x2dtuemadbtsVt\n43imG5JIQ0ilDwBzT0YrJ1yG7VDHIjIdgPXvT1YhSqjBz8p9TSDqTxuVXzCOvApjXBRgAny5\n5pojKFctn1HXFISOWByoP3qoB8mMYPVj2pAknlj1U9PalaVljGOeadHlZGBfJYcmgYMxEkZR\nSQTyKFb5icfN1IzQ7FhgtwOmKj24YYznAGKBdSYSFMoB8rjnNOj2rHtbkgdqiWYq2OCehFO+\n9k9+uBUj0JfOCptC847jikjkXaDnOBUe/ZmQN7nNRCbdyItrNzVICUNzuYkj0psruykgjA6+\nwqNm4+f5W7Co/OVnIRd7L156j0oY1qMeYqW+cjI4B70+3y2VxjHr1rpvCvw/1zxldLDp+lzS\nhsbpthKKucH5vUZr6I8P/sR6hqSwTajemEkAhY1wcdPm9qzbSNFTbPlNrhA2xjtPocnHvxXS\n+GPhn4p8bTRJpWlzMGGVldSqt6ckdK+8PBn7J/g/wrPFOljHdXK9XuFEg9+DxXrmmeGdP0mN\nYbayhiQcfIgGRWLqdjaNE+H/AAZ+xL4k1q5S41u/h0+3zh4YwS54r3vwN+x74J8LtC81iuou\nASd5PUn71e+pbqVG1doUYFTW8YDBWXIrPnZsqaRjaP4H03SjF9ls4YhGNo2xhR+OOtdHb2Cx\ng7tvzcnAxT2mVV44qGS66HdSHaxZDKmcKMfSka62c4GOnArP+1bm5ao5Lj5sZ4qbFXLjXm7P\naoWuPlzVF7jy+vNVp7wj5Qc5osPU02uAF64qu1x154qlJcFh0qu03y4PSrRLNH7VujzniovO\n3LWabgjAxhaZJefLzxiiwrmg0nHzfhVO4uyJV9KrPe7o+OWqvcTmTB7UWYcxekvGboahkutn\nJJNZ8t15Y461Vkui3U1XKPmNSS+BU4FU5r4yr6VQlu/3PB5zUMlzu56UcouYvvcEry1V3uwF\n+9We10Tz271A90q8VSiTzF9rjkZ6Uxpt2SDiqHn7icdKa0p+lVyiuWZLt1xzTJLzOABzUG7c\nozTG4c5HFWloLmJJr5tvPNRi6PJX9ahwHzzikkwkfvQK4yaIyOXyQ3sacrHbhjk+tIpIGMc0\njHHWgVwaQdDk0hkDYHpzimIR1JGajMh3EgZp9CbsfcTIVJAxVSJQuQOh5qSTDL/tVAyntUsa\nJN4VeDg0vmhuc/Wq7SDp1NMUbVz271I7F1WDBj0Wq7Hd0HShpMJtxg1AzBW5NVcLCSSHd1xS\neZ8uD1pkwXcDnikLA4/nSGTeZt45qKS42ggdaj3fMfm6dqheQMSw6UWGh3mfNihz6c4qJstg\njpRuLMFHWkMkU+oxS7sVFlmIXNLyvXpQKwrZVeKjkVux4pTIOM8io2bcfvcUAG45FIuWZm6r\nUm0A8DIqPd8hXG1s0h2GqMMc8L7Uisc5AxmhQ3IAO3FRSFlkAHFMGWYJArHPQ0zzAzGhYzt5\n5zQsJXOOnvU3AF+YFu9JkKeWpVi4680iKGcjHOKaJHHHljPU01flbg0+SHKgDr7UqwhSM0Ma\nQjYY8mmNJtYgcVO8K7sg8UeVubgCmMhWQ9OlSfMwPPUUsaoJeQcU9mEfIANSxlUM23Bqddy4\n2HaKThunGaOQuCfxoESBi2csRnrUjTBsY69qgVh2GT705SvOTg07skbMrbxnvxSND5Z4anN8\nxBznFRtJhju5FIYfxAZ609kwuMfN0qldMd6EHAzmrG4yfMeBU9RhJGeB0FOjgbO0Hik3Fgcj\ngULIcg9hVAW/m3Be1LcKGjKluagV++etKykHpk0APjH7sDuOlKzttBWotp3ZB4FDOOvSpYxZ\nC3CnimSHLDa1Isgb71J8o7VIEi4PHenA44qLb0IanpyaYCbF3ccCnKFdcg0xsbs8kUvG0tnC\n0AOWPrzzTvlVeOtNjxjczYz0pJExgA80APRgpOFpV+ZfehMnik25brzQBLuxgN0NNVgygdKZ\nkt90fnS8cD0oHclOduTTBtb60GQ4NIeFBxzQIkXqCeCKbJlcsRTPMO8qRUjPuIB6YoAFbAyB\nzilRsLlhSfw+9Jz1J/CkBIrBWz0pxLH5j0qGT7oxzQJN3fimBOsnBKjmjcdvTmoVm+bp+VKr\nNtYZyM8UDJEdl4apOM1Bu3ck1JvbouDSETRtt75FLu3MSRxUIbEmDyKVSW74FMZNjgmjKrzn\ntTWcbcZ5FN3BeetIRYVtvbk0NNhdoxmq6yFhweKXcAPWmMm+706UgbDcCm7iFwaI2xzSCxIG\n6kkg01W65ppO6jtx0oESKxHA5p6vhSCck1GrZXg0ig8g8HtQMsbjsCnvTkwxJJ4HaoGfaoPe\nmyS9Nv40FFjcNwx1p6O2cfnVXzhuHrTg20E5piZdV/lPPNOdiY896rKu5d2cGnKzLnJ4qeok\nTKwfrUfmHzQQM1C27seKeuGbngYpsosFgw+9g+lIr7mx2qKNlDHFEj478UITJeduc/LSqw3D\nsagaYcAc0hyz5zTBF7e2ORSKw3YPBqAStuBPTGKFY8kHNAydflJBOaUEBycVGS7Dd1pu49+K\nlgTmZcAdyadv3ZGflqqMFsjrTt20nrikBa8zcu3gVIrNtAqqpDLxUm/avXNMRYOCDSM3yrzU\nSttHXIokw2CD+FAyQtubg08sO4qBflp4bHGc0XAmXAGelSbsEc8HvVXzN+VzS7jtpk9S28m7\nGO1OaTJHOaoxyHby1SBgV44OaY9ywMHOfyoC/u+RioWcMcA4al3NjBNAWHMx6ZzTg3y470zz\nA2ABz605/kGRzQBLuzggfnSsd3IGDUatt4PLdaXzSy5xQAuTzkYp6n5etIrBgO9O4xtHBoGM\n3FmODUyuduMc1GAp6U4ZxknmlzATK23nOPapBNuzjiqgk6cU4M3mYHSlcZZ+0MqjJpskm5eu\ne9QlgTg0jN1AWgRNGxb5jwal3Fu/FVEyVPanKxKgE4pFF5ZRuCr1xT9x24qnCec96fu2nG7I\noAsh+2eaflR1NUhJ83y/rTg7bs44oAubwCBijdxlarfaCBjH40m8rtwetIC1uHpQuCMnqO1Q\nhj1zzT1PzA9fWmMn7Dil24GV61GzA4+apEbgEUCH9V5HNNyc9KFbbzkUu7f3xSAZtPanNwuD\n1pxIXHem7Szc0rDGNzH0p0eWUYOKF54pdm3Ax+NICaM7SMHmp1u9vHXNUHbbkA9KXcGbIOKA\nNUXQVQSaetyOpIJrIJbPqKkVtvOcn3pWGjejuguFxUouF+lc/wDaH7HmpI7p/L5PNIZtLebe\nCKke/Xb71hNdFvmHApyzFj14pkm39qXqDk0R3S9GasZZvfFKzGiwzdW6jCnOBSLeJ2Y1gZZm\n3HpUiyEjGeKkDpobnZ82VP1rT0W7sDNIb2MSbhgMOcVxYuGGcNipI7yWE7lbb9Kdh9D1EaPp\nFwqhCEOMnY+D+NQXmg6erIsdy0JPRS24V5//AGxLgAPTv7YfZkjL1NhI6+bSnhjMizJJH7Hm\ns9i6qxI47VhLq0m0YPynjb2qb+0MDDNxTK2NNJi0fPXPFOW4wxHWqEd0jfd60sbBSTu570hm\noJFZMZ5pu7PI5qiswXGD+FSrMedvSgks+Zt4PNPUhsgiqisW+bpVqNWK/dJPsM0wHKxXoauR\n6o0MYQxqy5yc1TMMifeikH1Q01uMfKyn/aGDTA0YLy1aR3ltgQRwoqG48iQLsTyznOF7VU3c\n4zxTmYD+Kl1AsPBDuXZISD2NRmP72G6VCsgVgetKsm5z2FMY7ayL1yKikjEgIYZBqUDdQOpp\nCI44xGuMAAdKeqqT6ChmO0ZGKeoxxjigCT7MqjdG5z71GzXMfP3lpzNx6ULKV6dKOoCLfMAA\nwxVmPUV+6wxUG5ZDytJJbIx3A7TSA04r1Nv3sD61Os4ZCd+fxrmpYGXBzn6ULfSpxmgnqdCc\nvh8gDvT2WKRGXaAf7wFc+urTJ3xVix1R5tyyc89aabBpMr6n4F0HVtxudMtbhmbcfMiDc/Xr\nXAa/+y54J16SaUaZHaXDnd5sIC984xXrEMwYcYNPE+3ILBWPYmqU2Tyo+TPFH7FFvl30XU5o\ncnKpIm8cnOPWvLfEX7HXjXS5Z2sRbXq7S6BTtZsDOOehwD/kGv0GEmchumOtCqkm71Pv9P8A\nCt413FkSpKR+V8nwn8Y6bvjuvDOoRleWxHuxgEnOOgwOvpz0rl7mHybghjtIGMHjnuK/XCXR\n7S6heMxRqjqQV2jB9q4Pxd+zv4M8YwEXmj20MrYzNbqI3HOcAjFb/WEzH6ufmMu9lAUZGeT1\nxQ21hh856DFfbXiz9iPSpLZzoN/c2l3gskkxDx9fukdfxz+deF+Jv2RfHWhzOI4IbxW6GCUY\nzzjOe3T/AOtVqomYSpuJ4xErNlTwV9aegAmBBz6g102ofCzxZo7SLd6DdAxhslUJ6cH9ePxF\ncm7NHk7SeegBzW0Wu5k4u5a8wKWwcE+lRhjvViMGo1VpAWjyxHDAdqTzNrbidzdAK0JJx87N\nluOtSmQMwGO1V9xUjOMt1FOeYblA4peQA2WfDDgc5FOjUySZUZHvxSMxCsc5yeAKD+8iCj15\nxTAdjLNg8D0pm1ehHBp+0BuHwMdKSNkbO7JA6UrskZymVpTlmXBHy9hSMcvg8n0oj+8xIwcU\nFIC3zAkdaVpNvy4+XpTdxZQPSnsplUq3DLyMVRLI8AsRuOB0NPVtqkEZHrRFIpO1lO3tSMSM\ntkAUASfwgdB1PvTZHBycZ4pJCJFVs4OKFXfEQD81MYK3K87RjFC5G5T1pshLgMwxinsu4Aqc\n5pkiBv4TwcdaVWYsC3THaiRQ7bS2PWiNfnAT7o70rDJCxB24yMZqPazLnOBnpTtx65yCe1Ir\nDJJzjpigBQw3cpgnvSqipu385pskjOwXjPahQzMR17/SqAUKp+VuCDkUkjbvlByRzTgAzYzn\nihUVuQe/WgQg3tGHZ85/Og/wqAKBwrA8EHikU7jh+cUATo67sEfjTVZVY/32OBUSsyttAwM5\nzTmk3N2yO9ArEiNuba3VeM0xf9Y2z7h4py4DYLdRzSqd7KoXAWgLDY0EchDjIxwT60is+0s3\nAJxTo9zeYm7OeeaauZdqkfKo6+pphYkRRxxk0uwI/DEZ6YqNs7lPQd6lZirME6dKLEsZ5jmM\nAr3609JMN0yvvSbicAE4HWkUh2wvzAHFMaHllViUyff0p0chEfq2eaWPEOdxGT2psbbd5J3r\njp3FIbH7iD0wKTy++cLRC37vI4B9aEaNlbd0PamxDedwIdQO49aFkLMwI+UHqO1Mt40Gcjd/\ntU5n2t8owrcZpWEPRFbnjHXmnlSR8p4JqONRg4bcemajXdsYFsEGmGpO0RVtxakjXcvoag8z\neuDnA71ZVlZc84HpTAIv9bnHI6inKwXOBuqLdvkLhgpx0pI8orHqBSGickHG7oe3pTQPmx15\n60wfMu7GSOopWZvLAXnJz+FMbHr8mQDTll28Ypi7UXax+c8inMdowSKBjHZTwuRzSptjQqfm\nbNLtLLgMAetHKksq7yaBDExGxXv61Nu+XJ5x2qKPDMcjmnFWfGSEoAe2fQAntSRqrKQeh4pu\n0eYXY9uKbycMG+XPSgLEuAq/L0HFNJZojjnucU7aNuQQVNJyvyrwKAFXHHyc4o4yAFwMc0wl\ngy+v9Ke7bExt4NUAu5IWUkb+eBUrAsxZelV1YL/Dn09qd522MsB7YoJJIWK7kTnPrRGrFSW4\nx6U2Ntq9eTT2OOAOvUUxjt+3bke+aGbfkdaRmVeSCR6ULOJMnGMDp7UgFOVVIweOtOaP5gM5\nPXFN3xsysDkGnbhHNu7jpSAmaTcu0jp1qGNhtYKeO1P3FssBTBhgSq7QO1ADyp3IDyMd6VZA\nuf7oPShcquSMntTPLKkliDu9KYiQSYjYnPqKRGX5XJwe2ajRW5QnJxxS7mjUKMMe+aYEnzNh\nAcHOcigbi5IbP+zTvO+YfLg03cFYEcZpDEZjk9SD3pylV6jd7UvzFiAOKaYwGweDQBI2EIYE\nH2phYsmd2OelG1FZQPlBONxpqLtZ8gHnj3piuTDLQ56H09aMjjIwcVGygtuX5RjBp24bBnoO\nppDH8NHuAz602PG9fSoRu8xsHAIp8cm5VH92gZLIGWNjnkmmlQmBj5qcDuk+cYFRswbPGTmm\nFiZcyZAAOOeaP3bSKvY80yPCnIOPWlkT+LPHbFAhw2Kx4JHY0qEE5IwKj84rhQOPWnMVHy9T\n1IFMZJ5arGSTjNIyp5eCflpjSlsfLgU5l3YU80E2Gn5SNo+WnbVUnP3qFxg4zxxil4YhiPxp\niY5lDYGMUDgEMM4/hFRNIGlGeq80Ky+c7ZIyMigRIfmIKnPtTmLMo28DPNMTPDKcUqqGY/Nt\nAFAhzMGK8HINPGwzbsYqFm8zAHUdxUu9djYID4zViJZWK8JwDSR7llVmIC9qSPLKjvycfdpx\nx5bAdeuD2oAklYElj+VOjzwO2OlRr8ybicnNSRpIufQ0CRJHGFyRy2OBT+WYZXBxUahuQxxU\nojO3duwBQXawFPM4c4wc1NHINgGCpz+FRlRNgAZNSspMf3cYoFuLwevSnkHb149KZt3YVuKX\ncSCPu4PahFDlbdwB361KFGchsNSLtAyo4701Y9zb+mKskmXgcHj6VKkiryvIzzUca7snHWnx\nfKuCuee1IksxRjzOOB1xVm1xvJ6fWqqSFnUheatL824uu1vatIklqNvMyKsQqJFxuqtbgPJx\n6dqvw+WvBXBPpW0SWOjzuA/WrAjQHnjPpUawkLkHrUoYAbSM1okS30HLtj4A4qJm2qyE54zm\np921lJ+53prR7skkBSaqxGpnzQ7l5NUt5jyoOVB61o3Chm3Zxis2YJHk5wc1FkWhrNuJ3DHp\nS8M4Udcc0gYcgHcO1KjEqWPy1JoPaPyxhDzjNNCllOW5xSKz/f69ttI5+6TwfQUuohzu21H6\n9sUbm2l/ummMCsQZhgZ4pVXjO6qSExfOPG7OBzTzcfdbHB4xUcgG9c9KVVKuTnKdMUCHLI/J\nAwvIpY3Kx9d+ecelNX92jK5yOoIoj3RxsQAd1FhD5k3KvHBqT72ARgDg+9RDeMbueO1SSKd6\nnO3igQKVWRsDk9DQsjJ+8XnHUU3gtn+H1qUOsO51XcpGKLDQseGyxGCaUztMNrLtI496hZyA\nVI/GpZAvy5GeOWpbAAU/KSwY0ryEbI2X5i33qVUWNcDj0pGzuXPzHNUMd839+il8n3opgfEf\nzyKM4wDwfWmOqycMmw5zTiwbKSYC+gprKvmAKc991fMHphtaNXxx3K1Osm7aW5GOhqFfllZc\nd+KTllO75CKQEkyLuXY3luW7CldWVyDt3dtv86SZjhSg3uAMgelJ5m5gw6np2/CmAS7mjG0b\nWB5prKcbSxBPvTmkaZd2zaQSOtNdVXB5JxzSAGzNGUP316Ed6kYuyrErAs3AApkbIyglMEcd\nadCQsi7v4f4qABX+Xc33lG0r9KQMZMdh+VN8v/R2DZZd27jvSqVZfm6DkGqFqAJQksdppfNV\nm3ZCjHDetIuJAM4LE9+1JIiRyDf+AHepFYcwHljbwM805stIpL7lToKZGpbljgMcYIpyxtuI\nxtP8LVRQsOG3EDMmc4o3MofdHz2AqNd5kbYcv0LU8SbcbW9uetLUQuGVVXv1NSLufk8oTz7i\nolbdtU/M2cH6UJtdWjXPUjd6UgJPuuWQ8E8DtTWZ/MQ7MoOKZtCxrgHHQVLHkvuzgAZpjHSM\nzXAwcZH3cU1dvlkNgMD8oPWjkMrMcqx60hx52cZPqaBDnkDOuBlu5obakZ55brQzbW3qOO9A\nkWRgAMZ60hDo5CoIC4f0qTc2xQMZzk81HGSqiPIxS7BGwBO8Z4plEkPzNncRH0GfWlCrIGBY\nkg80xsfdU4QHp705dyr8v8XGfeoESy/Kqng+1C5LBz8val8sAAlG83oVpY1O0juOc0BqNWJW\nbaHyvX2p8alQSh+U8U7yf3ijGOM5p+zawwMgdMdKoNSNvlnCnjjnFJGxVmTf19ac0LNIXDAy\nd1btTtobKnrnsM0mBGIzGpY5HpzzSbUiUIHyx5/GppEIZSRkdfwqJ0EnG3HO78KoB+4+WD3H\nWhZFzyO3NDybY89UY4GKjmYR/f5HSmMss26NTjAppZ1OcDHQVDJIjQ8HBAyBUXnMuM/KMZNI\nC1JsVS6t82Oc0is27zAeAvOageNI1GSx3c//AFvrW54U8I6z4svUi02zkuGJ2Yxx+fQVFzWK\nvoY32hDGQdzAHI2jOauabp99rDr9kgeb1VUJx7e1fUPgP9juKaCC51m8kM5+ZordcKPVcnrj\n1r6G8F/BDQPBwhGn2kaqByrKCzE+po5kjX2Z8b+E/wBmfxJ4kkt7i+tPJtG2l2DAHaT0Gf8A\nPFfR3gf9j/wxoshubu1+0iQgjzD8yYPTAPrXv9holpaR4VAccZzxWgsXp0/OsXUuaRppO5ha\nH4F0nw+ix2Vmtuq5IRenNdJCDHlVyFxjr/n0pI4SvNTLIFXGOaz1NRfs4Zs9KmdgqgBRmoJL\ngBelRvcbm4NKwXYtw7McKQKrx3TqcNjNErhW96rSHJ3ZpAWmkJYls4prTDbiqjSle9V5JS7Z\n60wLDyZYHODQ0x289KptMAPpUMl1u4JwKYi7JLv4zVSRwzZHaoPOO3rioJJu+adhXLbTMo64\nqrNcYHXJqtJdBT1qvNcbhkCqsK5NNet90daia4P8Z4qlcTNuGOnrUfntIvPaqUSepekuAOQa\nge5CrnNVSzMg7VGys2QOavlBsnkuNy5Peo2Y8HNVt0m3B4xTi4LZJPSq5WRzBI3y5FROzY46\nU9lDLUUmQwUdKtRuTzjc5jIYYNQSLkDjNWXjZl45PpTVhOBu4P1quUXMQK21gMECndGJPIqQ\nqGbBPPtUZUK2D0+lPlFzDN31pJJODmpdoLH0qC4QiPoetJoVxhx8ppZF3N2+lCgBME0qqD3P\ntWbKG7tq4NQO24gVakhK8tVadgrBehqRleTKt0prSBVJokc469DjmoTIOc80DsJI+0bh3qNC\nVbk5pHbc3A4qFWLMaRVibGRzSS/Kg9KgMnUZpVnLKQBkds0h6kqZkU8/dqKTG4ZqEzNHkA9a\nGl3IM9e1NASlh3Hy9KbIu3gDg1F5m1TzSecWwCaQCuhWMnGWqNiegHBoMp3ZU5XvSMd2MGgB\nGPynHWgg8betO3bRyKa33sDipGKVD85p2VXvwahUlWIFO8z93yKWoyaRVVQQMjvUaiOTIA5p\nrMnl7lPPpUccxZeBg0CsTqyqvAOBRJtkYMeKZvYLgioy3X1ouPyEm1BLVSQM9qr2chnXfJ0z\n0qC8j3Lk8iiwbdEcdc0CNLd8vFBc9KYrNtHy1Hnkk8UFFhWH3icU3cFbcDmmK233pC+0HPSn\ncmxJ5wX5hnNN3E85qPB28nntSorDrzQMnVgwOBmkMpVfQ0xHb0waGyy+9AxWY5UnpStgsCOa\na3+rAzTgV2e/rSEIGHpihR1H5U3zh0xk0Fj1FIQrErx69aViAQuO1G7ocZpJvu54oEIrDB54\npH4HIpWQMoPQUzAY/epj8yNoxKwPXFSN8vQcUpTCgrRyAMYPekG4p+YCnKAyelMLbecdaUzF\nRjGBQUSxx7eSc1Jt2oSarx4U5I/Wn+ZuyAKY0LGw7A470wfMxzyBTlLdO1OXHJ7UmTcYQG6L\nS/exkYpu4qd38NKxLD0qRjtoUE5waI2wuDjPamH5m54FKANwAoAduIBIHHek+9wBxS7g64HS\nhm2rgKaQDhj04pFXc288Cmbm69FpNxYZzxTAnXvim+ZtbBojcH1FPDKy570DFVffAIpOVU8d\nO9Lu47U2Zswkg45oJHKT5YJ59ajdyrYpI5PlK9Khm3ZAHWkMk3ZwG55qff8AMAozVL5woAIz\nVtZMKP72KYD2+WQ4NORdzEHGai/h+9yaQEpigB7ud21RxRuULijzOc4pGUq2CMk0gFViG3D6\nU75iy44WomOeKenzd8UajJT8ynHGKeVyq4P1qBsrjHIzzSxgbjlutAibaAODS53Y9qiz84BO\nBTyxAOO9AD9wDZ60My4HrVWRiFyp+tEchcZNFxloMFU5NKGAXPeokxuO7mlyCeOlMRIrfxFv\nwp8bBlyaiULtb1pUYKpBNAyXcAuB+tICSMVEx+XA/OhQeOaAZZHyqFxilkk6evtUXuW4o8wd\nQOKARI3zHjNI3t0qM5U81Ikg29KRQgJ3ZAqTnjkVGHw3rQrDeDmgllkNtxjJpxJZTmoVY+uK\nTJkyM4NIaJRnfnPFDNjjtUO4qwXqacWHQ9aOgD1bGTml++oz1phbP0pwx9aAQuDG/JxUkfXJ\nqIbdnzDFPWQjjHWmCJQf3eSe9OVgBkiovvHnpR5mM0DJ1kLZxTsnaOM1UE3dgQKmVjtUjket\nILE64XtzSbjsORgVHIwOMdaF+YkZzigCQP8ALnGKkSTK8c1G2So44pqN8uQcAUAWFbb70pwF\nznDU2Nt60yR+xGKYEsj9MmkaQseOtQfe46U/5uMc0gJVYbgf4qlaQbeBVdWCsDjmn/e+tAWH\n8bRipFkG2olzt4xTDIQwG2gLE+4bgalYFcE8rUCsOuOKcshk+8cL2pgSK2Ogp/3Pvc1DG2Oc\n0/zBJ7+9ICRW3ZOcU1GPmEZpuQvOTSBu+KA6k+7aRg896XzNx9DUMbd9tS8bgTQOxIMLxmnC\nQqASOKYzLjOKYzB2AIpCLDKAelCthuuKg56ZzRn1FICwueSeTQXy2BUHmFWwD161Ju7D86aK\nH7hg80hctyBUfb+dPWRNuDQBKkm1ATxS7/lxjmqxZcAD8ad5noaLjLKyBUx0NOWQt05qsX45\n6UbjxjiggtM/GPShZBgY/Wq6t60cbx6UrlFreVk56U9ZF5wTVJpMNzSqxz0oJL6yK+Djj3qR\nZcLgGqLSE8dB3xSq2D14plF8nK9aVpAsY5yfaqizbuM05W70AXFc7Q2eKPO2nA61UExVeDmh\nZC2d3BpAWdzfSneblcdTVdJDtx1qSORcc9adgHswx60RhTTC3Ge1IGHFSBajbdle9PSPKlqr\nCT5hgc1JHIW6cUDJlTc2M7ad5JXjdmmAnsee9PZ9vJJpAMORxQ2VxjrRuH3ulJIc8jmmAgdi\n23t1qZZA554FV2PORxRu3DHekBejBMeccU5seuKEYyRgA4FNyF68kUAKW+Wm7mbqeKTduOcU\nyQ7T160Bck8z25FS+YWjwvFVd2WzUu4Lgg0FImWQ5wetKZDtxUa4JyDT8YOaVhkyzMmMGpFu\nW6Z+tU+dx9KdHll96LEF0XRHQ06O8m6dB61RPChqHdgQM0gNaPUGXA/Wr8OsMpByBj1ANc5n\nbg1MJMrxSGdlH42niUBtkg6fdxSXPik37KWSP5RgYrjjMOnamrJlsgkfjVIZ1TXAlJIwDTPM\n4weTXPfaH4+YirEd8y4BOSaQjaVjSGX5qzVvdq4LZo+2Bu+KBGq0hVgwb8Kd5oLDHSsqO6Ja\nrCzFVpFGjGRKwBOKvWenzX8hWEqNozuY1iJNt6Gp476aLJilKHHY0yTWk0q7jZ0MRcr3U5qq\nymNcujKM9xTLPWryFi5mZm9W5qVtVedz5gBz14pDEjZRnsauQ6ZJcAFWQE9ATVCSZW7YNOju\nNmOv50XEW20m4jZ18vcR12nNUJrNlPzI6fVcVpWuqTW3KvnPUGrUmufaIjG0Skng5pAcw9q8\nZJPzCkUHqOBittwgX5QD7VWmhR+CMUDM+C8khbKnAqH7S00xlkJL5qzPaiNTjIBPWqjL1709\nRGuuposK5OTVuG6DAEcZFc2q/MCc8Vdt7z94Nw4FLUZ0Ecoz1qfzunNZMVwrEHdirAlPBzQF\nrF55Aw54OMVGI0b6+xxVZpww680+OQL160XYuUj1DQ7bVIZFnhWRXXYwb0znA9K818Ufs3eC\n/FDSyT6YsU8gI3Q4XHfjGO9eqCQlvapFmA9M0KbixcqZ8geJv2F7dpHfRdXeEso2pKuF685/\nzzXk/iL9lXxhoCzgWyXMMQZkMIJeQD0Ffou+JBnHzYxuqOWJZFKsFx9K7YYi2jMJUU9j8iNQ\n0fUtHmaPULCe12k5aSMjH1/SqhcOrMvKjjcf8a/VfxD8OdB8Rwvb32l288TnLMU+bI756/hX\nj3jr9jPwlrUM0mnmTRrsjKtb42yc8gqeOR6Yrp+sRZyyoyWx8CxyE9G5qeNm3bs4XvXvHjP9\njXxN4fxJpbjUbYgjYgzKjHoCAeh4/I143rXgHxF4NjkOr6bcWqK5jE0iER7sdM49jWvMpbGT\njJbmcrbmO1Qc96b5hXhhz2xUHnMZ9hVg3cKOcVNtOckc4zV2M+t2Ot/lYu3LelLktlu/pTSd\n3zAYbuKarFeSefQU+Uq47zBjpy1HnNxzUZyDnGKlj2nJPXHFOwCrIeVHTOaFzyD0pu4BRlea\nXYyqOeaBC/u+eDnFEbMPu8D1pq52njnpSp8uEY56nigB04KSBgfypu8bv4iT3pGZSo4yc/lS\ns25flOB70BYHYbhlS3tSsxUAKML7UYbbuBwccmnrtj29+M0XEIZPLGAMn+VMEmCcjd7Uu0HJ\nyOuaa67e+SfSgQsYLSZUgHrmpPM5xjGep71D5e0k5wMVIv3QSe3NUINq78M3PpUn3pB2qJij\nNkcE09pA2Mt7CkAu9DNk9BUjMvLEYWo5D5hBVPypJDiQAgj+lMVhyg7Scce9G9CuFGTRuJmD\nY3Ljmk+VSGAwKChWXDBjxxxmhSc7j19qY2585O5QfyqTyzuDI/GOlAhHbHzg8dMUvCuQOVz2\nok2lM54700HavA++elAhzKFYZbO48UBXVACeCaiX5o40JIfNTMm1ShbLZwKYCsSoXt705d7Z\nwny46VDy2T1A4x705ZJUyQTjpRcByHarAjI96cpVTwdoPamLMNqhkNG0qwY/hSAccyH5Txmn\ny/IQVGaZvK446nNAkaRz8uAOKAJvljYFB2/Co2YyDb0Pc00L0C8e1OZVMnDbR3+tAmC5jwA2\nQvPFORjJuY8HP6VBIdmdv3c1MzBVXc2d393tVlWHqj+XjjnvTFmP3PTiiT5l4PQ0jOvnZ7Yo\nBokAU/MU+ZeT9KRHPLA7lPTimxt8jN2PahW8xQCSvYGgWxIMqu1zhjzkUi/u2+U/UUjEqw2n\neehBpGcqM45PVaBk25dwzzTXjyzN1A7Co0UbjnO0c5qSNsRlh0zyaBCBU3KdxOR+VSmRVX5R\nj3qOTbhijd+lN3A8k49qAJV2v82cAdhSM25jtG8URbIow2eT2pXbKrgYXOSKAFkXEOB82DnN\nNjYNliOPak8x9p7k9KSTIUfwsetA0h6qMbf4c5zTg26TCnjFRBhHgHO31pVbbICw2+tAD2Yy\nSKf4ehFPX5o2ycqDxTAyNnbzz+NNXEcLjqAcmgRIuYmDKNxpflZt459ahQFYw6n5Sf0p6kOu\nV7HoKYDsLjj5jmn52z5YdO1NbMa7yoC+1RxOGyCDnqKAJ2bcpbGOaF7OR+FReYRGd3CipGYe\nWp3daAFVkxwu0ntTx+Oe1M3bpNuBnFNWR/OOPu9OaYE5yWB3Yp+7cp7GolZ2HIwBxQud33qd\ngJJs4Qq3PcUSEZAGfmp6bVYH16mmbxIcEcg8GgBcYHA+YULgLnqfeljA2k9xSo6OuduKEAmd\ny8cnvTVYMoUjnNKuFYnGabtG7cDg9qQ7ErMw+UHHvTfN6ZGT601tyAfPxTmUNICPu0BYcr7l\nB6MDUbqC42nnrmkVtwc45pqt8oGKCbFgZ2gNyO9M2rNkK3I6VGGYRDPIp8KqOMfN1AoGO8xt\ngU8AHrimhgm4LnJPXFMMpfcBnj2qVSZIwGXgdxTK0H/P3Hy92NG5RuOMkjioYvl3bif506TH\nmDJ4xSIJowSPMYDGMYoDbFAIxnpUW75Tj1zUrSeZtLr7cU9QA53DjC9DTI1+cnoenNPZtyJk\n/N7U1Cysx61QEqht3PNLu2rnuTgVFHlVPzZzzjPNSMhaIMMYNAmDPtYBevQ005VCAeM5pBhs\nMvXoaEU7GLjikIcqhm38AAUI4VTxkHvUSNjI6k04LhcDmmA/5jls8Y4FOTDR7jkL70u/eAuM\nN69qez+biMnAUdKYhBIBCcrsYc/hTIsTTcA460jMRIARuPtUkeEUnO0e1Ah53l9wOO1SMwzt\nH3+59qg2lVB35yelPSZd0YZGDf3u1CEWAmYyA2BmpudoKtk9KiUA5+Q4zT1bbntk9KtFWTJc\neZIpboO9PVM5AJ2+tRqu1sn8DUqKYwWbk0hj17ccDo1O9Wzx6GmRZVSVXfnrT0ZX4YfN3p2A\nfHlgAUz33U5yI+ODu9ajQMuDnK0jfeAJyKYEwYKQv608OFHQntUIjMbZJyKm27QAOjGqJJIX\nMnA4xUyuVwQOO9Rq207QPxp8cZbdk4WmBNHu35HIz1FXoWw3z9T1qC3QKq/N8tW2Hm4OMD0q\noksswquRt496sJgrgDv96q8ShcBRx1NW41VlIDY71uiCZcbQRzU2fM4xUS/dB6LVg4C5B6Ct\nkQRLuk4HA70ph/dnfyPanqw25AyDTmXcw3cAdhQSynJF8voKp3UIkBHU9q0WZY8llO0mqkzK\nG6YNJooyVQBWDdQcUpVgoC/dJ5zUtzGGwU9cmovKbru75xWZYrKEbJ604ruUEnBBzzUch4G8\n9+1PWESbmJ6DgGoEKwMjAbs+gpNpwRjA70irnALYApTIY8hl3DtimAxnCzAAdB1qbeq8Y685\npvkNwR0NNDM25JF2kdDVAOZg3GOPSm+fvACjaBT49qx8fO1NZQzE4x7UaiJN3zFgQQe1HnfL\nhjnP8XpUW3djA21YhjReGGcUxAF3KNvGemaJFH2faQd2cGnttZlG7Bpw+ZzuFA0MYhScr97i\nnrH+5KE+9OAbJyAeKGUrtJGdx6UWEDKp2nkcU3yzjIOBU0jDJQdPWkZRJgqenSkMj3H1P5UV\nPtl9aKBfM+Fgu9t78A8g0rMYpDhcIRwajZdu5ScHOKP3m4BRv7Zr5i561ibdJ5fyne33iKXc\nQ2SeGGeaj85opCATux2HalfEkKnnGcg0CJVc4xjaccGl27W+c5PbFM8zzlyW2kCmeYY2CFhI\ngGCfencB6/vJCArHI4xUsyny9jDa2OtNZ3j2rGQv0o3PIoLjgnHXpSvcBQNq7FXJxmm8SKQR\ng459KkwYdxJUjGM5qKOILtG4kNyc0ahsEO7aADtHvUhj3KQGAz2pvlhZN7HcOQFFMjkCRncM\nnJ6fyoAfKxhjAADbjjNS8jaCASe/XFRxy7tuSoz/AAmk3Ou7L7QTgcUCFRfMkJ6jOKGyXLbs\nAcAU1WOcDOAeafuDMTjGTwfSjUY0K3ceXQ+I2A+6P1zUrKW273GwMDkd8U4OJpjtCrz/ABUA\nQQ/ulMmNzHgZpVZ5WJRdm0c07y1eR9pzzxTGDMr4O3bwT6mgB3mtsDY49KWFXeT5uO/XtTVk\n/dgBRuIxzTUzuVWPOOaaAkjb5XbOYweKCwK5Azg4oZYxDywLdDTdqrnPyqw4PvQMTaVbG75D\n1qUbdw4OMY3etM25x825lGOKRV+YNknH8NImxIyhV8w7iyjAVe9K0w8tCFZADnmkEZVjh+vJ\n5pZF3KGyWzx7UFImKjjH8XPFL80RUo2OfrUCv5JznaSMD3qdMxiNSvy9eKQizvf7QpOQrL1q\nTaGX1Ge1Qr80bfNwvQU6Fv3fyHdnqKQmyRcebkHKdMVNuBOzGB1zUSum4AAbgKVpNy47mrQD\nsFGZlAJPBNNydpydrD5jimncqqegB4o8z5pFJzupAKwMe47y2eQPakjJbeWHUUx5F8zDZzim\n+c6K3QjtVrYLAJEVQn8P8jULSNG2SM+jMe9OkUSLmMENjJGOnvWp4d8C6140vo4NLg3NvCFj\n03E469O9Z83c0UTELHzsSKWkY4AXmum8O/DnxF40vIYNOsmkSY4DHjA9cfnX018Kf2SI4VW6\n15Rdzuw+TB2jj19a+nfC/wAOdP8AD8UUdtbRwlF2jy0xgfWplNLY0jT6s+ZPhr+yf5VrGurA\nXMysMlWGOmfX/OK+lPCPwo0vwzZx2trbosKcgMAefX65ruo7SOEfKMHGM09sq2M8Zrn1budK\nSiU1tVtflTkDil3lcAc1PJGWOMZqaG3ROSMmkMihVmOOgqwuEGKVmwvPAqvJeKAcVSGW3mGO\nOarNcHdg1X+0hutQSS/xLSAttc5zmolmPPeq5lB6tioZJgWwposItNOSxJ4pnnDPPSqLTAMQ\nWBqJpy3Q8U+UVy61wsmTnFVmmKtwc1VaYbevNV2ugq45z61fKLmLUkoOcmoGmCnLHiqck28c\nnmoTN8x9KfITzF2S4HrUEk3y1R+0Fm9hRJKxHWq5Q5iZp0J2luaa0h6dRVJiu/3qVZDtx0q1\nEhyHySbo8VDuK9jS7iGyRU8caxty+8e1Wok3IWkzHgjmlWOTaDtNWPMjVT8vPao5Lh3+ULz7\nVajZkNkDW75weKclptXBbmht7HcTintMI4x8wX3q7CGyRJt4XBqNlMny4GKt4juFyHwQPzpR\nGnyjcozRyokLWyUwAkcdhRPZmb5UTAx1rQN5p9ku2W7jB9F5qs3irTLViv8ArW7Y6U7oWpSk\n0J5WXacN61sWfgrzbfzJWJPt1rB1Px2yyJ9mtkh28/Mcmmt8UdTUhojGjn+6vFHMgszQ1DTY\ndPjywxzg1i3l7B5YjV1B71i6h4gu7yR3kkyGbcR71ltMTliSc1jKSZqjba7iKn584OKhbUhD\n93k1j7wP92mSSDcOazuXY1pNWY53c1Ua7MnzE/Ss9pC3XjFKrDy+etSOxba4PAzxTWuOMdCa\nqsx2Y6e9NDFzgnpUsqxPJcHcAPxNRmTbyDkd6iz154qMtzgA47mgZO3c9RTFlypAOKiZufWm\nqvOaQkSCTaCPvNTmbbhjxUbMNwIA3Ukhbbk8incY9JA2TnijqpJ4qJdq8HgUrFscdPSkBKqD\nZkHFMyF96j3HpmmtwOtAErMeMc0i7udxojIZQAcGlb5Tj86kAVW3ZBpF+VvmFJuCkANTdxd8\nk9BQO4pkVwR0+lCDb05pkbLITxtIpysV5H3T1oGiRWZsrmoTJ14zUjNu6cVFuG04pi8yvNG0\n0OFO05pbdBDHjqxqbdtUdjTTxIM9KAJAzZwOlG0BTkc0zeV70qsSCeppDHLgDPrTc/NzwKI2\nVtxbqO1DLuAOaRIucDOKFDeZz0pNp454p2dzbhQND9xZsdKRcgkZyKbuLKR3o52Z7UXHcN3O\nSM07qucUmQecVE7bVznFICRWIPApCcdDzSH5MZPUZ4pjH5vaqGTK4245zTGzxu6VGvrmlJ7s\ncelAiVXbJQ0w55HQ0f7WaTqc9KQxfM/hJopMBunWmncy5FIViZUDKQODRtLDkdKarE4J4oCn\ndhqBhGp2ncP1p6sVYgDBIpi5L4NKPm4HHuaBEsbFWx96h1Oz3zTC3lrgdfWnNuGOaQxkmdww\ncinLJujOTg01SOcjmlU7s5x7UAPLZUEmnLt4qHPy4NLuAxQA7cW3BcA0KSU7k0mVZjjinLu7\nHigfQOkfqKdvVVA70YLZ7YoGG5xkUhCrIAp9aUSA9FwaZwzD5cU4KEOetMCTI4qteSFowqHB\nzUoPy88E1GyjbyM0AVJA4kUo+T3qwrEdetJGixMxPzHHA9KlVemeuKAI48ySEH5amPGFDcjr\nTWTjOcGnLu+8CM0ANZD5gw2fWp8/w96gX5myRUiqqru70mA7b607zG3etRD7pOc5pysaY7Cj\noOMnNOXbgk0gBbocUbdoIJoEKWVl9qRiDgdBSbT5fTFIy7sYqQJeq5PWhtzLkZFN27eRzS/e\nXH6UhoHXqQccVFbt5inHQHvT5FZkApkURjUgHqaBFgD5fek+6wHWlZdqDmkU5pjJVYdAOaTg\nZOM0hV+MDinRqYx8w5NACbjt9BSq+5c44pzruX0prfdxQMC2PpTueCOnpR8v4Uka78kdqLgK\nCfOOTuGOKk3buOhpq4LdMVIRjmqAQnbj1qBmfP3eKm7kngU1YweRSBiqxZsipI855NMRdp6Y\nFTJjPrQJCbWjOW5NKcFulO3h+vQUxsdVpFDlb93jFORfxqPlfcU5c7s9BQhDmCvgEdKdj+Lp\nS7SF3CkwR9KGCJRIBHjHNM27yT0ppHy+5o5VfWmIVuGwORUgXGOaiVunapFwMZpFD1O5jngU\nq7Yx8pzTVHUnmnkBUBA5pAHzSLtDYpd+3jGTTcALuLYpW+6CPvd6AH+ZuIA4oZiOvNNRRtHc\n07ccY75pgCktnjGKcpPagKWc88YprZUe9IB+Tzk4p6vu6HFRbQeScilbtjrQBPyoyKcrZxkU\nzdhRnk01pCrDjg0ASb9ykZ4oUKqimMV249fzpeeh6UwHhgzEdBToW+U4HGe9RL6UK2TjsKQ7\nFkyDb2NMOTTB82MmnYbPB4oCxKzcAZqTdtXgcVX+9yOlP3FloGSeZu75p3Udeaij7CnKw78C\ngCRTwKdu2kbuRUS4GcGl+8pz0oAcWGN2MilVj0Xio/4uOlG8sfSgRJuEa+tJ7nqaYfve1H3m\nHPFAySQ5xzx3pC21Tg8UcdcZpu7k8cUATxyh48Y5qXcN3B4xVWM4YYqXd1IoFYcxCrxTkYsa\ni42jJ4odiMbc4pWGTFt+eKFYrgZ4qOOQsvT60i7mz2FArE/c45zS5P4iq6MVanKx3HmhjsTI\nw3c9am5C8Hiqy4JzmpV65J4pASIwz71JuDdzmq+4bh2HrTlkG04pgWR8w4OKRWCtjqPWolJY\ngjpRuCtgCgCxu3fShJMNiofMbYRil3cKR1pAWl688Uqt04x64qHzPmGeTRuJbI+7QBOZSx+X\nrT/MOME5qt5mw9cCqf2h5JiF4QHrQBsB9ygGmrIFzzzVVZirAmpl5b3NAydW+XpScHHaojJt\n47Uu8MOf1pCLfnHhQaTJP8VV4+WOKkXHPPNMCRSVzzS+YeFHWo1znjml8wE7scUAP5xg0K2a\nFyzZ7UJjcaRRPG+F6YFKWGeOajUnHJ70snXjgUCJVLM3JGKUydhx9KhjODg07yzk96YEvLIA\nOaVvTrimKxDYpcnd6UrCHn7vIpVYn2pm4460fMDzRYY7B25PWnhgFxULZOTnpRHJuwexoAld\n9wFKkhbg8Gm43HinMw3ZAwKARIu5aVjzyajDFulP2hhk9aBbE6Sf3etTxzP97tVJTt71K0hZ\nQM4osMuJMcDA+tTCYelZvmFTjNSozMeTRYRejnHdsVMs3PHWs8MGPSpFznINKwzSjm6Z5p3m\nHJweKoLIyr1zTvOPGBg0hM0Ypjg806OQqxJPNZ6zbae1xzxQCNIXBAz1NOaYYBPWs0TEdTgU\nG6Dcdfegdy9MQ8ZyaoSYVgAO9PWfoO1NZtzZzigBD8uT1pN3y+lDNt9CKh3Zye1AJFhZTHyK\nsw3hb7xxWZv39Kk3FQOOaRRtJcBgKmSTdyDWRGzblYcetXVkwQetAXL/AJjA8GnLIcZqqpJ5\np6yAYBPNJiLYuOnaneduYVXWQd6lUr17UhlobWAyMilIDKcjg9qredzgdKd5x4z0pgElqGj4\nGPTisHWPA+m+Ibd4NQtY54m5w6hufXn610Il3YFLu7YzWqqOJnKCZ4V42/ZT8J64sktraGG4\nI3fuwEJbsB+NfOnjT9kfxNpUbzWU8d5GHACLksOeR+H05r7/AG+ZR356VXms45E+YdueBzXT\nCvJbmEqMZH5d638J/EOlW0s7WFw0UbBZC0TLt5xuPHT2rjbyxudOuRBLE3nL1RQTjjrX6v61\n4X03XLRbee0V1X+HoG+vr+NePeNv2ZdC1y1misrZYUcmT5eHj5JwD35rqjVUtzGVG2x+fr/P\nnnJ7DP50RNjAHIr2T4lfszav4RjF5pgmvMf6y32fNjPUHoe3FeQXenXGlXbwXlvJayZwEkQq\nT+BFbLY53F7ETbd53H34pyq0m31PTJpmzasjEYx69PzpYm+YEcjbRcnlJVPy7Ryc80KwVjkc\n9M0wOdp7Ukee/wAw60wuP2hZDk49qWNgy7McA9aI9qzZYZJ9aa3zM20cUEk0illAHT0qNm6K\nRjtTWwqbg31pq4Ztzc+ntTAm4UFioKngU1lG3AOQO4oXhc9qcsXUAgdyKABkIXpk49aQAbgO\nq4py7iMR9e9RcbDg8k00SL5eVJ6nNJuAbA+Yd8UoZ0+ZTu7GjaOgPB5NMY5HZciPt2p/nHbk\nj5jTF+UjstLxgtyMdaAAt5zfJwPU01sYCd89aTcqruByvahtnBTh+tAhWYo23qvcinrIWU7e\nG6cU1fu7SMknO6kkTyuR8v0oAkU+YrKT+NR7vmUYOR3oiG4kBgW9Ka2TwvX1oCw92Hy55INW\nOGYFuuKgXYrKCee9P8zDcjP40ALu+VsdccYpI2bpjK45py/L94YJoEZZCVO7HNAxeVfOMpjF\nRb9rYOTzUrYXbgcEZxTG3k4OBQSSqo3qxbJbgL70zeWdlQYye9RpmNTg5z6VIMNGWzjJpsBz\nKVIywDUiRMrkHGeuaXcvmFMZOODTF+RuVLUwJGjUqMD60RnPQZB7VE0ivgqNg6YNK3YK2U6G\nmBKWEK4+9mndFHHB6mox8p29QBTUZ+QQWB4FA/Ift+XKjK5pc7Qyge5pIWZV2kYo5Ctlsk0A\nDMeD39BTizMpxzSbQI1IPNLIRuGBgnjFIB2GVVIoOBk4684poYLlf4sYANI7Esi/cOOfakMl\nkby/bdzTGwPmHNKWAYZXcMYpquN3I2jHFMLDmkZlXC5PSlJYKeOfemSq6rs754IpzOJNgPBH\nH41RI4FmAwcccrT2zKwHQ4/Co412xknG7NOyWxgn3FKwwXd90HIBzzUiSeZuLJx0zSJt3fIN\no/iJprMFOMkqeaAYqu0bE7cH1pyONrE9etM3Fjxx9abHGyyMTznjFAiWFlmgYK2M0o/d7UHX\n+JqasIVcbeKQny5CByccUwJG2ndGR8nUNSR9Q6jBA6UyPe8ZyORSuHZVO7ac1RI6Tewwejdq\nc2DERjLDge1I/JBJ6dKB+8XrhutArjo127WIwfWpMbucYX1qPcWUADIFPVQIyFOe5FIsVSWy\nOxp235guOSKjjbGOMCpMNn5TyeKZQ7PzALzikGVyerGlWMyKxXjZwaRldQp3Dn8qRI+QFpEO\nML3p27byq4A4pdpbDdfUU0Zj+8MKe1NAI3ysRu+8MBRTVVY1AY5YUdAc9KSQZAKn7o71Nxj2\nK78A545pqh1yV5FEX+pww+cnrSq23JIyR2phzCQx/NjO0tzSyRkEAHPOc0/apjBU/OeTx+lN\n80knuAO1AhVj2xdc80hkCruamn5VOOeKVf3ihcZ7tQBIs3mHfgbjxRk7Sp4J6UxIxuII9xR8\n0jBsbcUCYsQCMWXkgY5p7ruYFsEmmJuTNMLFmUoMCgESrtOSRg9Kfwi5zkdai3EEg0uDGoLt\nknjFVqFiRdr8hsACmxsXDbuM9qSNVycrwBT84O4HHHNAhixq0mAcAU9cRKw38+lKoPXHBHBp\nn3uQnPemA+ZDuULx3OKUzMvyD5uelNOGbJJBA7UkTZJYHntTEAXyyXHX2p6synIGKi9wec9K\nVSy/K2frQImaRpOByaVVLSLg4x19ahVT5mQ3FPWTaw9etADk+WR2Xr3pTlxgj5TzTBubOO/J\n96mjhBh8xe3BNFhMSJVdmyCNp+U1YjBzjrt5FNiKbWORnqanjYK24HK4pjsSRkgB2apFZZOM\n4NNRcLSxn5Dt+979aaAfEu1Tn1p8fUktxUcI+ba3fnmnqoy5zxSGTHMagr8qn3oXb5uM/Njk\n9qjXmPOM+gp6ncPu5PcVSAftTkHce9Cru5U4B6U6NdyMp6gUow8KbRgg4qhDlVvLCgbiOpqU\nbm5XB9qihy0pXvUrL5bBh0700STRRs024/IuOlWEy2V6ZqJCGyQc1YXC4f1GMUxiriNckcVf\nhYMgZlqmyqCGH3TViNiGUqMgVcSWX4xHwegHNWY8eXjHDHrWepEjEscE9q0If3ihemBxW8TJ\nj48DKjkDilVGBYE5FJCSobipFA4BrdED9vlxpx07U6OQN1XHHek8z59qin+W7KxIyAM07ARz\nLujx1qlcKC2R09TV7cGjG3r6VVuGDcAcd6ljM6VQSShAPpVVsKoV2wSea0HgQMSR/wACqlMo\nkBHVh271mO5HISSFxgDpUjE5U5BGelN5kIyADin+Woh3bsjOB61FiiNgW3fwrmpFb5VB6j9a\nYmQxVvmGKnYAsgUZAGSadgIiGZmw34U/ZtX5hkjg09QrZx96ns2CN3U07CI4jFGGwpJNCqrf\nNgfSnzsfLypB46URlPLQ45PU00QNaPkZOBU6MI4yQM9qiZg2O/NPkYqMBcjNUMdLIjOgZcGh\nsMxP4UjQnzQG4GOtOjH3l79iakoccSN8vXGKVlKsoUqSB1pkeE6DPrTmTHOOtFhDWj8wjpn6\n0/b/AAkbcU2TIZNicjmlb7/chuc0aCHY96KMp7/lRTuSfCTttO0nc+M7vSlX5Gz918cgGotw\nLBd3zMc5x2qSOTbMyxj933yM18se2Kx2OCF+fH3/AGoVSuAG3c5zmmSSLGeEY7uOfT1FOBSK\nGPjc+cHHekKxLIdzucBTjAIpn2c7Y06N1LnvTbhfJ53d+BnkUqsWhwXYHPO4c0AShpDkAgsD\n+OKcsn8KkuDzu9KjbEkg6kY6jilDgKSDz2A70hDtu3O75s+tLN/CqnBXsKRlLBD1P92kdGWQ\njbuZuABT1ESspjbaoweuTTVlRlaNlYE8mmyxllVWyNvXmiORyAqLuYr1NBVh33lIVcrTcjcE\n2FkHf3pfmjAy6+mBQNxATO4E9qYrEqyNC3ahGLrtPynOaYFbzAuBjOM06ZG83apGP71IQ6Tf\ntVCuYyenrTJIhCwO/AbtnpTmM6xhw27acgU1VWT52XDqc80CF3MrbSfu9Gp20M3XIb+E0Pv+\n8UD7qYy9GDAqvWkMf0yoTDqefpRIgZRkFgDkkUu4s23IyR+lELeUSAOCP8mmAzMfzBU4P8VP\n3FYWjONqnHvQF/ufOppwJWEvtyWbGO9JgMK/vAyDaoXqT1pVIXIJyetOEZWbceeOKRoQzBsg\nf3qWoDWj+UoDjJpzNJ8u5uBxUzKpZdp+ZulJMd2M9M4/GgBFkEnMnVegNO+fO4HjvTBC1wxH\nQr+tPYZ4H3MZz70DJNxK4UYyeealOcsAccYOKrKVwBhievWnNKxjLouQeCaYWJFkjRVBOPpT\n2mYMQGzxxVdc7VLHOOgp+8M28ja2OKZNh3mIyffLFevoKXzi0II//XVaaQJCwxknrx1phMbR\nlYzhlAwKYy1I/nQjzMow/j9aZH5rzBLaPfIxAUfWpbW3udSZYoYGcA+lfUnwP/Z2Hn21/qQW\nW4RvmhAI2E54J74/Lik9DWnG+5wXwx/Z81bxNdLJqcAtVPVc5JHA3ZHGOelfZPwv+D+n+FNO\ntkWJQy/M37sYZsff6da7Lwn4Zj022RAgUoQD6HHTiurhtwiY6muSb1Oqy6DLPT4Iog3zb/XP\nWrO1YwQBgGnYEaEn8Kp39wV2hTzigY+ZhjKmok3SH3qONmOM9KuKo25HFDAeoI5IzUbSbSSe\nKja627vmrLurw/3qkZauLw888VntcfhVWa4LDg1GshLYNWrgaSzDoajaXbwTwKqfa0U4PWom\nuAVwDmrURXLUkwb7pqpJc9dp/Go2k2qfSq8zfLx0rTlIlLUQyMJOScmn/aNvFU2mJanq27gc\nmrUTPmJJZmY8Gq7THoaf5bLnIxTGU7eavlRDbI2kOTg0wbiKkjt/ObA45qO9Z7WZMcKeDSAC\nvamt9047Uxpi+OeopjkqvzU+gxrOBzjNGTJgLVWR8OBmpobqNCSXHAosFi7HDI7Yxk1ej03C\n/vn8setYdxrAjQbW5NUn8R3EysDJgelNNIR0kskMTYUh/wDaNULrU1jjYjGRXOSahJIdxbNU\n5Jm34Y9aTmKz6m62sIw+/k+lVbrUnc5XJWsZWbdjpUhkA43YFRzlcpfk1eaOPAO0VXm1iVmB\nySMdM1Wkk3L7VA7BlGDg0nNj5bMfJdSScE4B7Uzdt6n5vWoVbe2aV8+ucVLZpZE3nfMBnj0F\nS7lZfT8apMe470m8MuCehpCZJJIFYqWNR+d8pHUVHNIWOF4oUjyzgZouIUyHHHSmlhgcc00s\ncDFDtlgKChJGY4wKOQ3tTmJUDuKj5AOTQMUsdvXPrTtwqPcvGepoeQbh2qQBZPmxjjNJ5g3Y\nJwuaNvU5yKZncGz0oAdnbk9qRSS5wab5g4DdKO+FoFYVvvcGhmPK5zSeWeSTTVOE7UyrAzfL\nwe9PLbMHuaj29GpWYscnrSCwD73PSmu4B+Xmj5v7wFMwwbI6UwRIMseDinlyzZIyKrrnnPFO\n3heC+KhgiVlHVRzTAh45waYZecZI96VWK9efekMdJwv+FHmce1IW96NgC/zoEtxyNmhmAzgU\nzcN3HSjdl8ZqxsbndjPBp0jE9qd91jlcn1pchm44pMBir0JqTeOgFNJznAwKZzxxzSAe2FU8\nc037uOtD4FGTjOKCR+f3eetJz16CmlisdOwdu4nikPoIjFecZpy5bvgGm5KjjrTuFGM80CG5\nbaT3BxQFXq7de1BkO0gCmhBu5HXvQUh7dh3o27eO9IMFuRmns209KYtRhX5dvem8/wAfNKp3\nAnvQR36mgNRJFG3IpyH5QD1pJGGBin8Ow4waAGsAv17UigH2PpT9oY5BytRxsOVxU2GOEZkb\nHQUMu043cUgzzzTQRuwQTSGSfeIC/nQxPKkUAY5HWhGyxxQA9fu4pWYsR2pqrzk9KFfvikIc\nJBySKRVDj0pXUFcjoaTbu4WmMTooJNOfCr/Km5G3JGacihhnPPvS1AABweoNPx5YAIyaYrYH\nTpTvMOCzCmA8sBwBxT1QKuNtRZLJxyalVisYyeaCdRrsVzgDFC9eTimsx59MVWhmMjlcYI9a\nTKLTr05oVQST1NKcNjvSBfRuKYg2Dcf71Nzhlx1FKy/Nwcj1pY1Cx5JyaXUBBlt2RgUDavQ9\nKM8FjzS7lK5xxSGKrAgD1oY7eMUu4FRtGKQybeSM9qYBgDpT1xtPrUfMZwR83WnIu/2NIYMr\nFRzgU76cmkUjdjkil469qYxMHjPC5p+3rzSZBUKRT9rNwCAB1pCEXIwAaTJ39OaON/PBp2Qr\nZzSGhF3eYWJ49Kcq4XnpUDSFuhNSLnuc+1AEn3ueoFKjbecfhQw/djPyj2pV5A5xQFhzMWAz\n8ppT8zbgc49aY3ykkAZ9aFz0frQIfuP50h+U89aGx2PNAzwWoKQoT5afHhOTQOuAM0qIdpJ6\nUEjYyVyCKdISenSlXBB9R1pWYYApoYjEbcGl57DAppY7akXA69aoTAtvQHFO42gDINNRj2FP\nLZ60DQHHbrSqm5DikVgrdKcpbbxxUDsIV2rjNKp6Z5pNoOSetPUbcVSAXd8p9KT74GDg05m+\nXp19KZx2GTSYD5GK4qPzRk81J3+bpSKoIwBzmkCJIsMvK05cNgGkjjO3fn8KezhlxtwfWgQh\nxztpu47dp4NS7cGmt/rOetAwWPaQpOaXncBjND/dyOTSGNmwwP1oAfEwTIIoXJYnHWl2+nNL\ng9KAEUFW5NSfeXIwaaP3f3+aGAbAxg5oAaV3LyCBntUqsOKYrfvCKcFBJ7UAO2/MTnimsxVc\nk8UpRmXjilO0pg0AC9umaezbuCPqajRccn8Kkb0PFADdrZJyMU5DhcHrTcBeOopVb/ZoAfxt\nzjAoBOPl6UnLcDmnKPlzgUFCbueKcNy/SkVcrnNG4kc80APf5eM4qJm3Hb2qU4ZRkc0zBHOM\n0ASLINoFPX7vLVW464I9qm27uh4oAd5gB64FOVtoI65qNlC8dqRpOuAQaAJDlVJNGfU81GrE\n9TxTgoxyfmpASCY4x0pWYY5NLgbTkZNR9sEcUAOJUDA+9SjLDBNRBgvPepAzMM4xTGP3buMY\nxTVky3WmNhT1INM75xQBa34+lSI6hagjZV7ZpwYbwAcZpXAe+V6cilZTwRxTfMKjBORSxsWy\nOo96B9ATCsec81YyOKgTBzkYpVb16UCJZfvYHIpVO1eR1puRt9qFYs33hikBKO3ancbgOlRF\nvWnqRs5PNNiHyNt6GgPuXiog3rR9OtFgJ/Mzx0PrUgbb1NVxluKN21sUgJ5JBtwR9KbsVeQe\nPSoyx9RinqTuyelAD9xXpUytx15qsoJ5JpWfpQBPJIRjHSneYX6cCoVYNgEUvJxigCwkh6Cn\nLlVyaiVjHgetDSBl9TmgCYSFTkZxT45NzH0qoGOeW4p5bbyTgUAXWfoBwaTd2HWqwk6MTmpP\nNG3caALW7jk4pVbdjBzVUNuXrT45F2gdKAJ1cdRzU6sWXrg1UU/Lx605pNx4ODQBMzHzMA/j\nQ0m/kGoS27oePWkRjz0xQBaVsKKUSdc81XVs+1SI2AO9Ionzhf507aFI29Kj4DA9aUueg4pi\nJlXnNBPPPSmRyYT3peCvtQFh33cZoZu9N3ArTlYOnTkUALGwxz1qRSPpUeQ3bnvQM96kRMsn\nqKmRgvJqrzuz2pwbGM0wL6sD0qWKNmWs77QN3yip4bptqkHimBc8vb70bl3cVH9q3NTPOXJA\n61IE3anK3y5zVdZOo6inxt8pzQBKWJyetNyeP1p6uNoBGKJMY+U0DGJIecmhpMqOabtAajb6\nCkBJv+Uc0hfbwBxTDnj1pGb5vQVQ7aD9wUYzinrJ8wJPFVpGFR+YQuOtFiTSWYKtWIZvlGDm\nsdrj5gKswXAHBBpWA37e4VcbvmHetaxFhcqVlj2Sdd2e1ctDcBl96s290GyCcGlYZ0V1pMUc\nDSwzBlHY1T5VVB6ms77U3I3celO+2M2OM0rAX17nNOD5wDVZLhWQ+tO8zdg55oAn3MO/NKJS\nrVCGy2aY+d3BNAyysxHBpGl3N7VArFs54qVVBXk0CQcLzk1Lxn7o9aYi5xnkU5fmqlLlG0iu\n2m2rqplgWTBzhhkVy3ij4V+GfGkM0eoaXBcpIpU7kG8fRuo5x3rtRh84600R/KfetY1WYSpp\nnx18Rf2NLeJbm70YTQszb440cuiqBjaM9efftXzt4y+GGq+D5WeSCWeDzCvmKp2qBnr6dOvS\nv1ODFhsYbk9CK5rXvh3pfifzTdW0RJBCjbj8Pp/ia6I1dSPZo/KeS3cdmVf4jg0q5UFF6Y61\n9nfE79jC1+zm50S5W0blmXrGTnOMdsc/XNfMfjr4P+IPBOpSQS2s13GFDfaIl/d8+9dUaikc\nk6dmcbGzR7g4zkdaegKptJ7cVErZzn5CByG/KkZsdDnp35rUysO42425alLFjkDjpUaMRJ6e\ntPTq3PagViRSeCR8vanKOcg/hUUYZsANjvmgN8zEcp7+tAmP5Dddh6cU0R7ZMMc4qTzCoGOK\nYQfMLNy1MBVO5flXndil3LIzbfl2cEUis4zkYXrTG+XkDjvVASqQQQ/3MZz6Ur7doO4YqLqp\nz+GKVmRgPlywpMQ7aCu5eucGmtJtl3Y9ulLuY42jBpNzNkMcDvQArYdfl49qQA8j7x7UmWbn\nGQB1ow2Nx4GOMUxDsDjdwo7DrmpP9WRk4Ldc1XVgOW7ipvvKpPI9T1oAdxIr4xjoM9aYQoPJ\n46Ui7QrActngGnKrSKVA5HNPoMkGdwJb8KUSfvCB8oxTOMBh8pHWjO7hm/KkA3aSuNxJJzUk\nij5MjFMLKqkDr60p3+WOenrTExzPyBt20sW3D55U03Pm98fWk3Krbc4zzSESKxGCi59fpTJG\ndSwzhuoX1pY9zfMDgU3lpOfmftT1HYdyrAZDd/p7Uu0d14J7UxUVtzA7VzjHv3qQgeWEBz3z\nQALkKcnFKsmchfwpu5TuwMinAKqLtPvTGxBIcn9aTbvDN2PSk2buS3XpmpMbo2BbBXmmSNi3\nBgpNPaZZGViMe9MVTsD5waJGjbahB4GcigB8537ccc0/YNoYnfzg1DkbvXA71JF045+tIYih\nRuy+70pkq524HJGPpUkce1to4HWpGhKzA53DHSmSIzEjnIYHH1pvl8sd2B1p7OWySRnPC03z\nBsJIyaBjQNyBlX56lSUbtznaccDFRrIzKrYyw9PSgM3L53DPekBPGy8vuGPSmCRG/hJ+bGab\ntCrkD7x6UszH5cdF9PWmA4IGmDZyB/CKeqmS4JVzUW7dt2Hyz3Jp65ZjzhfWqAeqltx3dO1N\nZmUbnwB2xSbVj+bOaGiVsN3FMQ4s0nz9BSrl02tyaNwVN5Htikyse3HJamSO5bBPToKbHlnI\nxg+9H/LThcU9i7HORnIpB5ixKzMcGpYVJVhjrVcZPQ4bPSrG07wOnekXe4+JkaPBByDg0+ON\nGbofal80+X8q5NKu5ufTtQA/ywuSWwuORTdqNHj+HPApzRq2DngimbR5gA4UCgBimVVKg4XN\nPbp6kU7njJ49KXlVztz7UARKrbTz8xpSoA59OlTqytjjBpm0R5LLuPaiwDB93PcdqReDl+G6\nipWXcFbFBUr/AA5NMkiZUCBQST1/E0qq6qBtBz/F6VKshjyduN3AoTLKUzlgeRQURbjH8rjI\nPSmYHm55Ax2qeQqzBQcv/dp7RhTsIwetAEMcKNHnJBHUk0ctlc4A6U/bu5xhc80+SRZFKovQ\nY6UWEQlTtG4flTdvULyAc1JzlDgg+9OWPkleSTTAZgeXvB59KbLG0kLY+8ehqz5YXlhQFDJh\netMRCNywggZ4xmkKiRTngVIoIyT90H7vvSbhErDGR1oAQMojx/COlMO+JGkX7pOKPLVVBI+X\nvTT8ysOq5yBQIcrDcBjginRusbFB+dMSTkgttPagSbVyRls44p6iFhjKtkDJ70+QknnpjrTH\n3hdwH4U7cfL46Dk0CG7gOB361MrhZhtUNgfeqIYONg56mn8bdwG3PBpASLvcHn8RTl/dwFBz\nzUQjEEYOeKmZzGF2nINUBLt2gKAC2OasKDt/ujFV4XLYJOD3qzGpkkJyCgFADkz03ZNJtKuc\njg0oVV4BNTb+AGFMYm0x7RuFSc7WX1FRmPOcfNSlyuAOWoAn6RgA9KFzGCQ3PrTFfdEcg5Jx\nQ6sVCKM+pqhEy43Byfl61JktjYuATkVGq7oxx0pybtwxyopgSLEfMLq31qeNvMBx0zjNMZS/\nQ7TTl+UYHC4pgSL+7yRwM1aYlsc8VUjVWxg7iOq1NHGPm5+UVZJaHzMOMD0q0oCLtAOaq28u\nY1JPzCr7k7gw54q4ohkkKg4JXBq9HiReDyKoqzbhxnPWrUYbO0cVtHcgnUNtBHCg809SrZPU\nU2Fj8zZ7YojIChcZPetbdRWHlgpBxgVLFODCRk7qi6MB1HenwuORjPvVIQ7yiyEggHFVWjJ4\nznNWznBPWo2ZWOQMAVIilNGyj7uRVBo/Jk7FSOWrRuGcMOflrPvJk3EAY44qSkQqymQqGAGO\nKav7tdv8WajGflIGCetWIV+Y5GTUAO2hmApUTaxw3JFNDbm5+8KkXbGpJxkigpPQcI2Vlxhh\n0pjZLD196ljCiMEttakLBtoU5I607aiGhVZiDyO4qVsbQcZx0pAP3nPI9qaFeRSDwoPQUwFX\nbgE8YFKoIkAY4HWh1P3R07U4R8gE8AdadxCqreZuz9KcuRkcbvWmqvzk4/Ckj3M7EfdoAlYD\ngY570uN0eQRnpUUqBnU7uKkaNZEwDhs0BYVi0m0Ae1RCIRyFVzipFVkbIPy9MUvK/Mc1ImLt\nX/aoqHP+9RRoI+EN4chh+7THBPWmrJ84wmcdW7U2bEa7pF+Vf4e5oc7VX0bkD+lfLHtknnS7\ni3XAwBjNK5Z44Qy7SvJ9aa7e/lqB3piyNLhd24nq3tQIn3hQh4JYZIPNP58wMpycd6hfasoL\nY5GOKN21tkbZIPU9KBE4kdicLuGMHFDYjjyo3DPTvUWQsmc7R3b3pflRlXknOaAJ1k+YtjjH\n5Uo+WQMWOG746UwFlkKBcjqcChWEcJbDEg9DQIlGAuxnJGfvUm4bSVYhRxSxfuwW2cNSbvLy\nSw4oARURVCDuc7vSnq43NtX5R0PrTZGOwZALsefpTW+5ndg57UAWG2CFTnhjlj3FDIu7fGeP\nSoWDLtKjcD0zSjcZGG8Bsd6GMmf5WyD8rDmj5NrsW+QL0qFWdoyhK4HvQrFlBCgKOgoJ1CKR\nZkBG5T6VJHlcDZwTzUTZYrk47GiKRo2JUfdPegY9ljCggHdTiQsAHBXdg801cSAsxwD04p6i\nNZAIx82M4NAD/LMK5Byf7vcCnBhG2C3PUU3e7bio28YKmovLDbNgyRxtakBPHHuY7mxu6UHP\n3RyehobDDdjLjg0iqeMfNJ6UAO3eWeewxuqTKHgtkdahkZ+VcKwPGPSiL5sxZHpuFPQB3zNu\nIbZGfXvT2Yi3Bcg9hiq7SMV8pVGN2Mk1IqvL+7z8q9e2KksfzgccdadJJu3Rsu3d/F2qIMPL\nX5iwHQ0q4ZNzPuyfu0xCsrLja3C04uNq5HJOKjAcM38JP3fpSN8xIkbnsc1QiWPDK2QCrDIy\neam0LRrvXrwR6dAZz1YDA4z71FbWUlxcLDEpdQOT6CvrP9nL4SKdN+2T2vzN/G6fl9aHJRRc\nI3dyx8DPgPDZyW97qFq00/DIx4RX+ntx1r6u8N+HxZxhAqontTvDfh5LKFV9s11MMfl8YAHW\nuOUnJnX6DYwI1weT0qf+EY4prY44ob5cNmoAjlnEWQ3PFZbM0jkmpb6bcdtV92e9UMtRNsXn\nmnSTNGvDY9qjWRRGB/OmTyqO+adhojmkyp55NY91P8xGanvbwDhaypHLMSeBVqJJMJi2Owp3\n2jGe9Umk+XFR+d5Y5PWtUiLsus9OWJmXB4HrWdJeKqgnpSTawdoVetUkSzUaL5cFsVRuJBGc\nB9wqtNqD7OufxqkbpuSRkVZmagZNuWPPoKBfJEQAnPrWWbg7fl601ctyT9aYGnJcGbnOKAHb\nHBqsJlVeT0ps2qDI28ECjzA6TTbWOP5pHUD1JrN8RalZsphi2s6nO4Vztxqk8gKh8LVKSYvy\nTz3ougLjXHzZzgVFLqZHX5lqhNNu4J4qJpM/KMVFyki410XUkcGqMsrM2c4FIsgDYzmkPzcZ\nxU3L5Rj7mxk0BNnTkmmMxaTHTFMZmHIPFINiVm28VBJJu6g5p/mDAbqaikbctA9xS/vTOv1p\nnp7U1pAzZHFIBZJDwtIGyeaY0g3AetIz/wAIHHqKQx28b+OBS/x+tQs3mAHpS8+uDSuMkbIq\nE559ac24cbqFxxk0wsMZjjBGTSMSYxj5accDJPWo9w6AZNT1JsJ5gDetOYhvmHX0qMtt46Uh\nbjI/GmMeZecdqaG59qaxHXPWkVjnHUUAKct8392j73OM01220AkYxSAOd3HSjPX9aapO85NG\nfyouAp+XntSrKNwqM4U9ePShW+Y8cGi4yVn/ACqNsenFNLNjimrkjGc0XH0H7vlOOKdkgDHN\nMB42jr3prKW4B5oQiT/V4LDg0h+9xyKavHy8mnKxVc4waQbCMwZff0qKYhxwMVIpxkkc0vHf\nigEN27l5OTTlY+XkdKQr81EbgMRSAa2OPrUm4ljgcU3cOmMGjBUdaBjlxz8vNN4wRjBpVJ7j\nFSLICuWHNFxjPmxk0vB5zg0biWxjIpFyc54oEK3uOKQ47HApB94AmnBV3YPApiG/xeopc84B\npxX5uOlMb5ecUgF29jRt75/CjcPSjaFyxPHpSAGHyg0pXLZzQrKzEdKFXeTjpTBDue1I3TJ7\nUjDbxSBR3PWgSEaQs3yjAp24bMZyaFAVuefSmY54plIkT7tKzDbgdaTy9pwDTHUq3pQMVMbe\nRz60Z285zQD8vPOaVR7cUhDsZjG38ajydvTnPalhk35wfwp6jj1pAJtAII5psjhWBAxzT2Pc\nDFMH3STz9aGMcX6sBmljbavC0g7Y6elLu6gDFIBQ24YApwHy4xTN23a3Tmpd3p39aAGNkcAc\nUnOD2p23acHmkZdzccCgAC5IPbvT967tqjAqPnPFSsAseMZPrQAz8KdxnBOR6UjN8uBz60o2\nhgB6UtQGjuM4FP3BuKaP9rpTlQMuR2pDsLjcoDDpUbLtkyF68E0/73JpV5HHSgQi5XPHHrQy\nlfmAypqRuFxjg0hyYwOnNUAiA+X+NLkKvPNKFK9entRn5ScUAI21VPFKnzIV7UjYZRinbT2P\nNIBWIAA4HrTNqq3Jz3xS887hzSMPmBxxQPoPZtxz1NJtPY0NHnBI6CnKOAVH1pCGKNrbifwp\nS3zcjAoB2sSeaXAbnPJoGL9/tgVJ91Qe9N2N36UjLu+bPFSMXcOcj8aI8cEDNKuTHmmsOBtG\nPWmIeY+rY5ojCjqOfWnLnYBnmmtk4HYdaCiRo9xx1FIwx2xikVmXntTs7+vSgmwnIXP8Jp2N\n3JPNKuVGOq0KNy5PrQMCA3IXinc98EUu/gqBxTNvzDBoGSLhnyOKCxPbjNCrtzzk0/JZQDwK\nCSN884FOEZCj1oK7umcULlmHPFADlAVeo3dKX7qlTyx70KOSSOaGcHAxzTGNXdtOPWpBkY45\noGIxgcg+tHuT9KYth7Y445pVbkZ6UirtjHrQ3zcZwaRQ1WG6pY8lueRTVjVeo5pOUPtQA8tw\ncdKT+Hg80qsMY6A0AFe1IBFU4APrTmYM2MbaRXO48c04AtktxTGG5t2AeKlXBBHQ0yM84xT9\nwGBj5qAH7ty571C2fTNSbvMX5aOFwTzQITYVxu4qaP5V46VGMs3Tj3p5Yx44zSAcrdwMUjN8\n24UMx/hHBpqZbtg0wFY+bz3pWOMBu1Kq7l44pNpYkZpDHR4bJ609VLLTRGqqAp+tBDdQePag\nRIPvAg8Uvl7lI70i8d+Kdu3crgEUDI1Y9xzT/mk+lN7c8mkj4XHQ0CHIp3+1Px8v9KbzuFSN\njAIHNAEZb5eODT8/KKY0g25I56U9VGBk0FDfL6YBp20jvUjZ2GhYy0eegpAMXnANG0r94808\n8NjimswbkCkA1RznFO4UgUqLhic8U8J1xQAxj82TzTsUKvqKQtsl9qYhcqM4pvLHJ6Ubf4vW\nhmGcUMZJ5nUY4o5C03hsZPFP3DpmkPoNZlVhx1pytuYDOBTdvcijA25xzQIViGbI5pNpPtR9\n00pkHpQA5lKc4zSKpbJPHpThJ8opVBc+goACQVwetPVvl44NN6ydOPWnKvBxQMVWzkd6VVJ6\n8Uz8cYp/mfu8CgQ7heCcUqgLztzTDjA4zUyttYemKBgF28daXcG4AxTcncSKF5yxIxQxD9wI\n4oztpmAzYHAp8mFUUAGMH5jk0rZ+lRbi/UU8MfwoAk/iAqU/Nxmq/mfMCKerlzwMGgBwYqxH\nWn5yoDCotobrxSMSp68UAWDjbgdqQt1waiWTOBT2PPA4oAdllYZbNSN6CoG9ad5hIBoAkI54\n5okj3JgnGaYr/NgdKUId24NmkBPFjZtPGO9OXOQD0qHcWUginByTjtTAmHynk4FKhHf1qPcW\np3mZXntQMsBwGIFCthsEZ9Kr+YGHpSqSjdelLqCLWCw6YFHcc1F5rYPvTgxVQSOaYMc2eRnA\np8cp6VXYF+c0b/nAHT1oEXVuB3604yluBxVQNtOTzQshkOaBl8MO3WpEbCnNVEbBz2p28v0N\nAFlWAAz3pQ208HFVg/c81IrfNk8CgRMGGfmODTjIqjg5NV8Bl96cuPSkPoTq3rUjHLdBxUKP\n3Io3bhVCJ1IZenHrT12jqcCq/mELjtR5meKQywWDNwadjC8darq459KezDrk4pCJUyqcHn3p\n2T1zmo1bAz2NCn0OaYEnmMTnoKlEowDmotxKmm7sLgDFAFnzR1zTxJ3BqoGHTFAz2OKQFot7\nHNBcbeahaQ8E9elE2ewoH0CVht96gZiGBqVnDckVE3zZOcCpCxGZMtz1qdZ24qBl3ZxQuduD\nVIC9HdbfvcGri3ACj1rGHX1qxDKdoHQ0hGqlwF96Vrjrg1n+b6UBj3NFhWZqQ3GfutVqKYsc\nZ5rEtyV+tWkutq4AOaQzXW4LNjtS+ZjjOazo5uhyanjmC5PekBoLJ8gJFPHzJVVbgYGSKlWY\neoxSYFuPO0mpVbOOKrKx654qTzNilugpjJsbW4FKrbj05qFZieetPDGkBKcD60xmHORTFb5s\nmlMg71XM0KwokDJiRdw96yNe8J6T4ih2yQ7JRyrIBy3vWmxPIzUax9CcVtCdjKa5j5t+LH7J\nunatC17p8H2Oby2y1koAkYk9V6Adcn6V8uePPgf4g8FzSSC1aaxjx+8jy+PlzjHXpX6ex3BT\naDyOlZt54ZsryORbaCOJ2yeVGDnqK74VEzklGx+R+478PGyOOoYYx35qTa0kYZTgNyD7V9w/\nGb9muy12O6ubSxj07UHQMhiXC7h1OAOTivlTxh8KvEHhNWlu7RntVPM0ZyB7kfw549q05jK3\nc4tVaNQp5x6VLGo8ohhtpr5ZASAMcdf5f57U0yZOTyfSrTIsPUlcg4OOlCyFj5mcgDBWpdyE\nq5GB7UbRyduBnjFMkiG1kBIz3NJ8u7d/B2FPydzDaQOtN3df0FV0CwhU7Sd2T6VF90cD5j3q\nRc7yW4GOtHOMKc5oFqIueAD83rUm36M1IrMrBCBnHWmldqk5+bNMQ5ZNu4dMDmmrJuUZ456V\nGxOcsfwpT/DkjB/OgQBt2W2/LmlWbaoH8WelOwOzY5pv3ZPMPU8YoGSDGd5HWkxtKtnHNL5Z\nAPvzzTZg4VaYD2YGNhyQDmiNk3ZOduKjDvj045p6qWiO4jPYUyhZGMce1Buz3qRmLKqt0x1p\nsXChQPmpvmFsoTikIU/MARwM4ojUeYQTn0NRg+XIGbpjBFSZ2j5VyetIQQzFflA74pysY2Zj\n1o7gqMH7xpWYTHAHLHNUhibcMpB4IyaFY+bzSbd33jjB6DrSt13BsimSLGwViKRvmXGdp6j3\npAQcDGQepqSKPcxPAA6UWFcaFaSMZTaQaVmG4k8LSbnzhu3WnbULHjIxQMWP5o2XduoZdoxw\nOO9IqiJW3jIPYUyTLtGoHGaaHYejIrZxz705dskx3jjqKiZQqkng7sc0rMdv+FICyx+UbCAK\nf5hXsTxUDbtokUZAGDSecd3Gc9h60CuSY4z0f0pm3JIAx7etLvVZd2Sz45HpTVYNlzwRRYaF\n3sCTgrx+FIoMi4X6mmxsZmxn5T2qXmJCFwVPBFVYBHkWTCq+GpWR2AA+XHWhIEjQAdRzT2bc\nwOD81BAkaqAQ43Empfl2EE7T6U1sQsQeMdKQMJACyZPrQIVmRY8g5PpSxt/e6UpMUgyqYPQ+\nlNZd5AIximMfublBRt3qSrYC0sbNLF0xioo48Zzk81QEtvho2zy3XNLH3+X5sUiKRg9T6U1m\n3H5eBmkxEsbfLuYdKfmRvmT9ahVTu9QakUv8vOF7igpFiKQt8xGDT1+VgwPyNwfaol279wBN\nTRYkYjOAOeelIOpLDGNp/iCnipCo29KihCs3DcdzUyqo680hiSRjKr+VEg2nIGCOtOK7dpHb\nmlXAJbG4k9KBEe3GXxkt2pJMiQH+H0qRSVyeozUcieZk559aYw2vvP50okXBOeTTf3iEAHAx\nT8hlBI5HagViKThlXGc8mpnwFVl57Ypu5WU/3qcmcAgcdKYxIwGU9iKSMt84de3WpF/d5DD5\nutG7zPmUc0gGbt0aj8zTwu1GYdKIyVPCAn19KGyzfKNzd/SqF1IuZGB/SpUjOBtwrL+tHryF\nNDfdAznNAhy8qwdqMbVO0jcB2pEUrIRnC0jY3n+EUCGZCyKeoxyRUTMVZvlyDTnIVOuVz2qK\nZm4ZenpTAFccx8kH1pnlyMSCB8vpRGdzHf0qQKegbb3pARxoAu8c+tKxLYK4+lCgrkD7p7UE\n7cfwknFBI75m2ljtFLGSrMoPy4qLyT8wJzTyMcA4OKYCxsMHdjI7UqttUcZyabGhwHAyelSw\ng/Nv6DoKAHDK7h949dtTRlJcDvTQRtEgHzdKmWMoRkgqR0pjJ4Y15bZlTxTkjQfKpINP3bVW\nMLwOlAYxtgDPc0wsOKnzB0+UcU9iJDg8HHamru8wkpt74qXaNoLdSaAuIqfLleKAFVTjGT39\nKfuG44FESsN+UAx92mhCrLt2jGQO/rTyCqnHJPOKYMkKcck81Iqhc5+amSIQdhPC+1SxqJIw\npzTIyD8rDAqz92MtjA6A00PoJH8yhAcbetOlnEeEAx70iYVgcZz1p0ijGCO/FUA+Ftr7/bnF\nWI5EkJA+lQKdjfdIyMYqZIxuyODnNMktW0ZaXGzIxgVfgbcozxt4xVGMsZA6nbV5fnbPFXEk\nlH3/AEFWI1KsD04qDaSuTzipgxbAzjiuhIROFZlwFxUgj2p9e9NhZl6njFSxn92c9a0RAjRc\nALJn2pVKxyYByO9IhJYcY55p+3y43785qiRZPlz6Go2b92Rt5pwzNDkDBHWhmGRz8uOakaIJ\nEEuD2xis+6X7OxU4LMPStXhhx0NZ90m6TeBuC8UFGaQRtYn6ipI2+YuTx0xTZm3ZIGOaFKrG\nSx69hUAJuG3bzknrVjC7xkZ9KqyKYo1I+Y9adHMxwDjOelIRYb58YxuBpYG27uzU3cORjDU5\nEDLg8GmAqu4VsD5fWnx4aSMElTnJx3pqMiZJGDUg8uT5+/SgRKyZZyn3R2qLdujBxgZpCzxk\nqD1o/wBYRn+E0ALHJnc3rxT1LY4XAPFRR7F3nb8uakVmQHJwpFBOpIiryo+bAoh4Y554qFQF\n53bc05d24BTuHc0xk4wq7uvtTBhiQGz3NI2yaRU6LQq7iynrnqKQCbx7UUfZT6UUgPgeKQbT\nyGwOhpUmLYkWNf8A69MkfL5VQi5+ZgKQy/vsqP3OO386+WPbJkZ0iLSbZATgj0NCsH4b5V6E\nVC2GYkL908j196ezJvbOce9AAqttKqvCj5c96cWMKrxnvgdc0L1Cb8kDIpseV/ecgZ5oESRy\nRkFuMf7XrUsse3bz++OPyqv5n/LFkyGPDY6ZqTbGFCg9Djk0DLbMFX5m2/7vWkSTy9qkbt4z\ng+lVXBcjyxlAOCaeznzF4z8uNtBNibdwMMQmeGpThQTtz33j+VQj95g43BecevtU3mLtBjjK\nhusZNAhV2tjapAxnFNDD5lAOcc08ebHJuO0BRtIHrUZfZt38Fh+dMBI2bGDnHWlVfOeQgndn\ntTeY364zwRSLITNsHyk8g0APC7mDAcDgrT9zbQcZXtTfmLZAIOfXrT/uYXdtx/DU6gLlplH8\nLUsZMaNuIYtxikTPzbTuOOtKVRQrN90jkHqKAHr8rJhMJjJzT48o7FuSeQaj5kVeyjpijztq\nkk57c0MCRS20l+Nx4NKJNpdSDuA602XeGUOwK7egplw2/YVY7hxTHYcgeaLyw2xh1pN2Wyh5\nXg0L8svyv84FJJ8ylMAEnLH0oQgW4XzNw5B4ApDGVfO7YOvFLEpcYK7mXo3rRGyhWG35s9ak\npD1xtBXDMfX+dCxLsO/JG7nmmnDoAeTUnmGQKCRtx3609kIXq52nK9hUjSeWMEBcj71V49sj\nAnggcgUlx5bbS+cryBUiHGRmQnO4j9KijXzLhI+cv+ODQ0jeWX3AHuK7D4T+Ef8AhJ9eVGRr\ng4z8vRfTPpnmqTtuaxjc9m/Z9+DMWqRi8vofNdmGEblBzwc/Svtbwr4bh0u1VUjWJD0VRj9P\nwrkPhh4Rg0PR7aCKPbtjUMcd8Dj9K9Vsrfy1BIwueBXLKTkzqUeXYuWkKxdDzVgj0pkIGeel\nSsdvaoGNU4+tRXEgWPngmpR2Iqtd5kjOOooAyri42yNxkVHFL8vA596gumKsRmo45tpwTVpA\nWZrjYv8AtVRurk7cBqS4l3E81nXEw5HetLAMmuCT1qFpjtyTmonfdntSLljwR0qzOTEkmLMM\nHFQySE96XynbOBx0p4tmyCRVkkBYOADzikWEscgVd8lI1zgE01QvSqRJA0LbQGx+NMaLj0qz\ngbsE5FDRndg8g9KoRV+WMYA4qNn2qSOtTSQhFIZse1UpbhAuFHPrUsLCyzncOOKikl6+tQyT\nFlyPxqEyHvyKi7Cw+WT5eGqrIxYYFDsOgNNVsg/wmkMRuwzmo3YbvQ05mG33qPNSUhG+U5Bx\nSM/THLGh3DZwOO9RmTrk5HaqLFaQdCPmphbHrijzAF5HNN3jGD+tSAi5ZjgYGaj3bs4bPNSZ\nXGR61GNoJ96QDW5XpThnYSRTGLCnbvl54FADNobr1pVIX5c1Ex3MSDgUZHrzQMJWy3Tj2oYb\nSGJ4xScspAPNNkYKvrSAQPwTjNAHqcCmnJHoppv3lAzQO5JIRvApm7FRM23PPIo3DywcmkIk\nOee9M2kx49TTUb/6/tT2fOAKAZGCRkEcCnedlSMYFIx55pv16U7iAFtvNK0ncdRSbgwPOMUg\nUduakoRmLEcYpuG5HUU+QbWx1pFU59aBC7iy/d5pBuwc4pf9WTnpQFAbNAkLH8ue1MXC5NLu\n3HBGDQuVz70ygPqOtIPlGTwaVsdcUiAH7xwKYAuRTmb5umai3f3Tmn7T3/KpYgfLcdPSpNyu\noJGKiDD8qep3ZGMCmPoL5nPTFM/i4GaOQ2McU9R1OaARGyluaeo7d/elz2ApVBPJFSIj3M30\np24svNK4xg54ppypDHpQFxUVt3tTtowe1Ju/ioyu7niqEKuGx8vPrTzHvJG7pTVY8nHHansv\nyhqBke4Zxmlk+ZR6CkRRz7ml2lTgnNILDWb5RgdKc2dowBikJ2kg9KbuJTrQMeTx0yaYrHPA\nxS7scinLhutK4CBlPJpoUs2eABUiqFxwKb95c9gelMkFTPJzQcKOOaN25vQU4r+7yBmkNDVx\nnilf7oB5pq8oMdaTnr1NAxV2qxxTs4WmsBtDAc0m45GeRmkMUKiDdjBz2pVYs3BxSlQXyOQa\nCuGyOKAF24UgnOagUlZOean3bug5qPA3c9aBDlUty35CkHTng07zsEZXinDGSDxQFxDhmAxg\nU89CKFXC56j1pMFulACqeD3pVfv1pqr1HekXAJxQMeDkHtSMelG3+Inmn8c8c0DG7T2o+82e\nlC7iMdPpT1X14FAhjDPc0fdHFOzngikjYK5zyKTAdjdyDkGj7i460iqMHb0pw6hscUkA5pNh\n9qaZBtwKfkMhOKibA6VQEibnA5pOQ/XPtSRyBBRvDNkHbQId6Z49qNw3dKZsG7dn60vzK56G\ngY5k55NG4hct+FDMNoHejcWFSxiRsx68A+tPVhtxu6UhXzF9BTUiCqT1oEOdd3fAogPqM0ka\n7l609W25HSkOwSN6HBND52j0pFX5v7wpzsChGMDtQMjVmVcdqn2kMO64pkUZVd3UVKOV9Mnm\ngNhCNw4prERKcmnD7vy54PNKQknWgPURHPy8ZzTtpUt3BpseQDxgVJuDNjpQMTO2Pkc1IvC5\nFNX5id1OOOtArgWPXHFBOR05pD/qz+lLGwOD7UBqKvrTucU1R2zTsbeCc0CEyfXFO6EfrSEh\nselDfKAQODQA/nd7U1Yz5hJ5FOLfKe1COu05P4UFIBGznOeKc6/LgHJpA3y8Cnw+pFBO4Ddt\nznmnDAXkc0jgN16UjEc4HFBa2FK/MOetSv8AKBxmo1+4CVye1KpLd8UCDIpdzHHFG3t0NA3K\n1MBx9ehoZSoGOaA+7ORSfMGBpDHxsdhJHejce4oX5u/FPClvmJoGNRir9COaevfOMZp3bB60\n3AxzQSP3jAHpSKxdjkYFOXGM03cN55oAVVK5bPFOyzKNvBpJFXZinKTHgDmgocFKrzQhxz2p\nc9c8g0bdvB5FIQ3O5zUix/LgUnyZBA5qUfKB700JjWUKvNCqOtD5+tKnLADrSGNb2H50uwsu\nTSuwbBpsjYUE8ChCAMCwGcGpJOFAB5qNY1XnqaeMMvvVDDbx605cg4IpyrtHXApd+7gdaQw3\n4zxxSltwx2ox69aQqQD2FADWz6U7ay4yKFjPXpS+YQCO5pEjcGnp8vU5xTAMd6WMBmNIY4tu\n56Cmk7iARTmU/d70igBvmPNAgDD04pMDdzjNLx1pvlllJoZSDZ81SZC9skUka9CTzT2UbsjI\noKGMS3OcihiW4FOSPb0pQR0xg0EjNuD8w5pApUY9afyxwcmn9GoGNVeOmTUi/KpyacucEgZp\nGHzc0gEVW69qerBPem7ioOOlJuB5xTF6jiQzY6Un3WwRmmqysuVHzZ71JuDc4waAAfMwHanl\nfmBH0oUjGOjU5fu9eaAG5xkd6cNp/Cm7vlyBmnxlS2DQMTb82RwDSsSccdKeFzwO1K2emMZo\nERhs57elO+vSho+lO424zzSGIOFPFJjbgk05u1BQ7RxmgBzbT9KZgkEipCAOKZyWGOKYkhIw\nQM55qRffpScK3tTJstjH3aBjxIAuaYsm44zijyQF44FN27X3GkBPHhV55NSxjrmoFccVIpIO\netL1BkudvX0pN2F9KiLbmwTUmQy+ppiF8wrxtzmnZKvimjB5PX0pY+hpjHqR3pVkDLnPemqv\n7sk8Uix/J70gJvM+YYp6zEcHB9KgPy8HinKp5OeKYWJWbdjmhffpUY5I7UqttXnkmgCRfm5b\nIUU/aVxxx61Fv9elP35H3vloAe0hWPjmnq/yqBwajVtvfik5X5vyoEWN4JBHBpQWZuTxUayB\nuT1o3gtnPFAEwzng8CpVk6iq3mfKePyo3NwR0oKLLyll64x1p6Nke1VWPy80qykfSmIubtuc\ndKI5dyYAwfWqokBx3qRZArH0NDESyTBADjrxUiP0DVTZt0nHAFSbt3SpAteZ8uOop8Z+brgV\nUjk3DA6VYXvnn0piH+YcnFKJCfc1Hu+XOaRZPm64pBqT+aDgDg09Tu5zzVdZNowOacGDMMnB\noHcsht2OM4p/m/MTnAqurlScGl8wbMHp3pgKsgk6DFO8vI561FG235R1p4Y7gcc9KnqMEh5P\nOKd5ZKmjcA5yakSZWXbVCIWTvtx605I9qjnnPFTrIOh5FOVY2kyTipJIwQx68UYP0p5jXccc\niozuyeKCkPjc7uBT/M5qL7vtS7e+aQyysh4OeKmE3vxWduKtyeKcrHr1FAmaSycZ61NHcjHI\nrNjk9+tTK23HNKwjVhvAygZxU7MZlA3HHesmNgW96ljutvfigZsxNt4FT78dTWIL3ZxnmrUV\n0JMZ4JoGX93Q96N2R05qv5hYYxTgzBRVIW5IWIAFOzhumRUW4jmlDHnmgRMuN2SKUOUbg81C\nshbinMx/GtIyaJkrl5LkTKIpUDDG3J64rjvGHwu0vWLSZIrZGjlO4x4+XPpjua6EPxkEip47\noqQGOa64zOWUWfFPxR/ZsttOhlm0m3kEh3M0TLjB6njFfOWteGLzw7eT2V5C3nxjPTAwee9f\nq1qOhw6rC8yYEvQ8Z3CvH/Gvwn07xC8iahZJMGGPuDJ9Oe2K2UjPlufnYrllZNjDHTdTmmLR\nqFOMd69l+LP7P+q6DqIn0q3V7eXLeXEflRQDyT614zc272c3lyptPoa2TRlKPKOZh3cksMHF\nDEEjLYIHHFNz6cdzRI+6TA6YzWhmPYFdpJ3ZFMDbWGeBnrTkYsmRg+xpVkCrkjd2waQmKWVt\nwDc9qiIfBBAz6ilIThiOAaeQdvAxzTJIVwV+c/NUqtgAbMt2NJiM8sORSMx25BwO1SA5SW4Y\nYprYVuu49jTly0fJwaTIZRxj0plDo90gOW/Gn/MGyfmwOKhVQgZi3y9xT9x5C84XNMBwZmXA\nHJ5pybGB4w1MkbdtC8tjJIoGA3oW9aBD9+M7fvY60yVgyjA59cUxpCi9CDnAqR2dlAPFA0NY\nHbzyKdudkbyxyB1NIrLt2s2acULShVPy4zQJkZ/1Y+Y7vWnxuTwTjHcUMgVNgPOcmmAqON2R\nmqQiSOR423FQwPGaXjBzwetOXG1h26VH5WMnOfrTGh+0CMHPHWol+ZhgkAHJ5pWZeQego3Be\nVXjHWgRMGG456+lCszYG3FM5bkLk44NKGO4FqCR0jcHPUHFJtDfNv2gdqRf9Zx/FwM1JJEQx\nBAYUFEcZWQNu5JpYm2dRkUsfLM3ehlBVuMhTzQA/zFXEZ7mnxt5fyng9s1Eq7hluo5HNDzCR\nsFSfbFADlYF2GRuHakODGRnPrTISFYnbv7mnHHXbtB5pgB2quV4PtTtw+UA54yabGNiszfNx\nwtOjQRxbuCT0NMCf7oHTcab5jNIQDt+tNVi0Sbh8+KDH+7bPzHrmgnqKZg8ZLDkHGT3o3NgM\nBx0ojjKoFdlx1yaebcBUYMcsM47VQMSObzAccBetSrGfLDFwe4FRmFvO4GVxUm4qcEY4piEW\nYLnPIp6puXGdo61Ey/JlQSM1JHIWXI5HvQMfuEmQvDDrUartDAnJYU/jcWzTIwJOQDjPWgB5\nVlQKB0Gc96VWKvkng0vmFQ2eRjtRGoAUnluuDQBMr7ep7UkbZ5Bw1EZ3OcDjrzUpBdfkABpA\nNUhmG3IbvVjzBu68dKrKxVuvIqXIZfu80rFFpF6b2wewodtrYU496rfNJgsc1L5m0YH0zRYQ\n8gLHtVs5NOVdwbPCdD9aZuwFyvP96pdwK4PGf4aLBYjMJXaxbJHTFDqVXcepPWlY44x81PDM\nygAde9FgZEFBfg4BqRW/duq9V5oaPJGenammMqx4zu609QRLuLwgsvzEdaPM3KAeDjFRhjjg\n9KXYSA9MQq4ikCFevU0fdYhc5Gak+Zo92NzDk0yb/ZPLjOaBiLllXf1znNP5LcLx60rMUjVc\nDFRSzMqbRQJgzBtwPAB61FPJlhzkYpPM/eYPQj9aY0gkZgnAHX60CI/MEakdc0rE9e1R7STl\niM+lNmY7fQ+lPoA8L5asfvMacqmbHUFaRX3IvGTjilTIYFetBIHcFKsNrZpJn3Kh2736YpCz\n+YWLU5gY1fce+BinYY7lUO7rS7vm4HzYpjOflAB6VNHndkrzilYBsbSK2R90ipF2vJk+n608\nqCwA6Y7UsMYRvmB65BpAPgjZiA2F74q15aFQeuKgYNx83406EllJJ59BVIVu5aaQDCjg+tAY\ntjHT1qOPLKd3H1pF3MSP4e1MC0Sok3Fsv6Zpd25t6nrwRTEhTzN5HzAU/blsthV600IfGGZi\nM5qaMr5jKV5xxTIceblTketTrtkkOOGFUIiKHgZAY1OsexwByMc03yweepFSx853JgmgQ5I1\nbvk5pfL/ANI2t/q8dKWCPazc8UjN5cZZic5poYfd+UDGT1p7Ydip/hHB70jSMqrhd2e/pT/L\nPmDK4OOaYySIDdgff96mkUFiAe3JqBc+dnFS7ct8w+9TuySaHEbL71ZRnXPHFUVxuGOCO1WE\nmZGyTgHtVxFY1LeQsoz0qUYdmA61lC4K4VT1rQhHygg/N3reJDLdvuEZc5IFThhL82CBVeOQ\npGQaeoeRAedtboT2Jtp8sEEZJpJGO3AOT6UowOnUilAC89TimZi/NtUgYQ8fjSSuABtHPQ0q\nyF1KtwF5HrSR5nwCtFihnLA5475qCf5lBTgd6s7euemcVFcxhV5bC9hUtBcyZl5xuHPWq5VV\nxzz3qxIqqx9c5qvIFaTBOBUBcVmAQhiSPSkZwrq23gDimPMMcDinRr5ihmGBSAlWQsct8tP3\nDdkdai27Qcc5qSHbJwOMetMB8b+dG3y9DTlZshT0qEM2Djhc1M37tQ3XNADmYbuTkipIwI0J\n3Es3aoYex65qRZA3Cg9cHNAthx+Vdm3r3pVjMibCQTTljzxn5uppqr1IOG6UAHk/LtPUUnzK\nBjK89Kfy2M9QKGJypPJxQA9FBYk9hmmKwLcNgnkUjHKk/nSRxrGoYDOentTQD/3n96il59KK\nYHwJjcpIfzFYZGOtIFClSp9tvakLbo1ZF2jPNO+9HtQHGc18ke2PLbckDc6jkUyPlSTyGyeR\nQ3+sdhnYByvr70uXkdQWG0r8uKBCbi0a/KNwPB71J5co/eJyueVqEKqKFY5OelPSJkP7slie\ndmaAJDIVCsy53DOf6U1hubK4HopoUdV3ZIH3feljXzJFLLtOOc0AOWQydWJPQ0FcMGPyR9Np\nPek2iSQc/KcgY9aR7dGkXO7ntmgVx6tujXPPfipDlP3mMg/w1HtAYBBgKMGhZHhHK7gTQBOs\nZdQyvkDllzQrjptLFRkZ5xUP7vzDzjd/DT/M3EBOO4B70ASSEsoc8luaZDGVbfL07e9EbK2d\nxPHrSR/vhiN9wz0agY+RvKAPOM5NPYMpLA7kYZXHWlEf7vcvY4Ipqq7YLYBPagQR4DY5TI5p\n25FwNnmFjjdTUWRVbLAtnGPSljk8vcD8x9aBB5gRjhsLnBpY5OrP9zOBTVxNiPbhuTTZGLbV\nK7cHk1LAnbcpLFsjtUYmA+bYWOKCXdmHVAOtI29cBDnAz7mqGSRncNx+73NICNvByxqJ3G3K\ncjutStGEVQoyp60rhaw/eWwQ2CBzihLg7MgZPrTGAjwoHU9adJmN/lGVXqKQDlYcNjmhgm5Q\nBj/apofEW7OAzUcDKhcgHOaYMfhd5B4/rSOzrHswD3p33WX+JT1B61XMjYbjaik0gW5Np9q1\n9dIN2xWHXP8AF6fzr7E/Zz+GMei6bbzIp8yZt57kA+tfM3wt8L/8JBq0MYYoyYYgjOeeAPev\n0K+F/h0afp8USp5Um0Aj09qznLodVNdT0rQtJS1twByvTNbyR/KMnjNR2Nr5duo7DjiraqNv\nIxWBsIoXdgU7734Uox2H40KrH6UhB93kVVuf9WzdMCrdVLtSysOxoGcxeNsy56Vnvcbf4q27\n+0EkZGea5u4jaJju7VrEQ+W5LNxxVKST5j69qfzJ0p0dn1Zjn0rRktldVaXtU0NqVVjnrVmO\nMKnvTsDYcnFNC6Ee0FRkYpu089hQ0uFwetQyXBVT6VRA+Rh5Y45qvJKisSTg0kk+QOwxVKaU\nZJPNVcVieW8RVyFqo+oNu+ViDVeSQN04FQswqRkk07Tbizc1BuYYXsaNvyHuKjEnzc80hjnk\nC8VH1PHNKzDBOOahkfaud2DQIDJ8xxxUbEdutJu3SHH40N1wPxpAEjcjjFRbwzcdKJHHOfwq\nPdtxxgGgYrbucdKj3bScipC23I/WoTls5PFBQ/AK8c+9Rs3PUZpokz04FIcdcZpAOzheOaaW\nG0+tM+70z70oZdpJpAJIx4K8imNIeaCw6A8035mbk4oARtwU9qYshb5eppzSbuvSmKvzc8Gk\nMfnacE4NRt96h1DjihVx1PNACNI3TFMbKjFOZux4pCcr70CI2+9z0pynaM9R2poIXr1pVI9M\nCiwwPzZNH8PJxSs46c4pjDc3WkApbK4NITlcZ5prZ6dvWnfdkGDkYoDUa5BXAWjdtUc0M4Xr\nTF/eYIoGSFvmHv1o8wK3HNDfNjK9KVvlxwMGglvqKW3JyOaQEcD0qNs7s9BS7TtOPzoLFLbp\nM44pTk9TimRklMZ6UL8zc80rgP2/KecmmbQPvdKd/FQy7+M0CG4HBXpQeWBJx9akVMR8VHgZ\n+YfSgB7rtHHJpVb5cVErEkg5FPZQqg5z3pgAJ5yaXcVAxyKXbuUnI5ppX5Mc5oDYXc3pR5km\n3AFMVT3NPVdvIPPvUsNxsz7cAqab5yntTpG3ZHPNQsu1eKAsWScYFBbv/OoYPlAd/m+tSNjP\nt1oGSFtvHY0SMGUAHpTVIbmgnbzt5oELuYLg9KRCw60p+VQxpSvmcg4NAAwzwKVQMZGMe9MO\n7ae1KijIzwKBhjv2peOcZBpy4bdjjHrQrnaSRSAbg7QD1znilZuScDFP3/KGxUZYNxjFMQjf\nMue5pfmxgHjvSsvHpTQpWTIOVoYCKNvOaVWxn1pzMvORim8q/BGKQx6rvUKTTR8rletA45xz\nTUznNADo9244HFOZTvPcGkbKjOefSmKzcmgBV3K3HWiThsmlhG1WfvTziRRkUAM2+YAewpcD\nHBpoO1iucCnLGfvZ4oAc25uAcD0ojJxik3Ecjn2pV+XmgBvmbCT+FKrrs470vylTnBJ60MwU\nBVGM96AFySvTilRSeScCmkDjDZFSKPl9qkBdwVjz2oZjnJ5GMUnysARTGbpigB+cBQTzQvyt\njGaapVue4py5znOBR0GOC7cjselKpz8vQUzccZNCt8wOaaCxIW7AU04YntShgrZI4poAzuPT\nrQFhdvHApDxjApySfMfekU84PTNJCDaQ3Lde1DZGD2pXbzJOBzikDeYtACNyuRS52gU7dtTG\nKjDbs80DJN+cYNPVdy5BxUS/dGODT/uxk5wPekMVcL0pzRnPJ4qNMGMc5NPMm7AHA70DF4Re\nKVMFeuRUbOu4DNPVlPA60Ej1kGD6UM25gegoH3cHFIvzcYoCw5VKq3OQelLD05NNGVPH3acu\nGzg5oKBVznPSkYjnHWnq20cjFIRt5AzQK4Lu2nPJp69s9aXb6elOXGMnNA7iKp6mnqoVQD60\nrc4IPB60NgLnGeaBCbguQOaTdgClXOTxgUnHPPNAIUMPSnqflx3qPkLntTmG7G1sN3oAGz6e\n9C4kUcYNDDPB/Ol+U8AUuoWFUlWPNPV/fios916UbhzTK6FgNuxgcUclvaoY2PUnA71MxGzK\nnigQsk3krkioo5jKd3QVT1C4aNjn7uOKnsUaWNT2NMC22d4PWlyWPWmNiPrzT12vgqvHekOw\nKpVSzHilCDqWINEijHHSkGcZoAcsZToc1Kq4PNRK3ygjrTy+4bhxigZIueSfu0b1wMjio0+f\nILcUpX0+7QJjlHcGn4+XpzTEXLelKXZenNBI9cjqtL5m7vimCY9D3pcDANAx65UjPIpyyHOM\nVG36Um5mXg89KQ9SzxjkAUZ/yaiUkrgjkUpYtjjFMRMuSTnpRwrFifpTAzBemaNxZOR9KLAK\nzfMOKHwy4PJo+YuAOmKkXO4gdfekBHGQopy8HJpdtO+tMoXtz92lXbxjrS/wgD8aPlXnvS9B\nCc7yOtGSQRmg8cgU7I25I+tIbE3buCcUfeYc01l3tkdKew8tQ350xC7PlOabxxgc0of5SRzm\nm4KHJPJ7UmMd5btJ1/OnNheCMmk+83NIvGc0ALtHHcGlZtvTpSLIM9KOd31oGAbd2p38OM8U\n1c7sHpTmbcMAYoAkMfyg559qF2kE9GpvG7rS7RwcUCFUFeT0o29xzmkLds0ik9BQUh6sVI5p\n+4MeTSLGWI7008ZyuDQIfjkDt3o+7nHFMGevanjL544FACH7uF/GjbtcHFOXbuGRinpGN1Ax\nh/edODSq21QMc1KML/DzTdg2ls/SgYvTaOlIQGBPQ0feXJ5NOTbtIoEOwNobjNK3zgDvTNoI\n60/d7HigQD5cZ7UfeJwM0n3hu7UhbH3TxQAv8PI5FOVyVoUlvm/nTV3ButT1EGRnHenDjrzQ\nibskjFPVMe5pjBVyKRvl7Zp2T0xQ3IyKBEEz+UCx59qTzA0fTmpmh3ckZHvSeUV5HajqAsan\nrjAp64X2pUG5aGbnpQAbV3ZXmkXg8DmhSf4RTo15yaY0LxuzQr++BRuBYr3prLubpQNkzMuO\nDSLk8jik4XtzTgdoJoAazZPNLz0HFDfNg45pd27jGKkY7nAGeaA21vm/CmrjoT81N3buvSmQ\nTh1zTVz07U3+EYFC5IJximA/cV4zxT1OVNRZ556U5CzLjgUDsSKeMH9KeuCvJxUSsVPrTt24\ndMGkBOpwMdaQkKox2qMMR0o5agRYDBlpFfbx1qPOVGOtSDaVxmmMVm8thgU/+LIPWoj6Hml3\ncelDESLjcxzSrJ8gx1pFZSvPamnBPHQ0ATq2361MjEqOxqnu6AnmnrKRnJ5oGWlOOvNP4xjb\nVZX4Gad5haTg0Eu5L93GDSg5OetQhsPyaesnJPagr0JYpNoJI5qTcGGcc1AHPWl3Ed6BEq5Y\nDtUm4tx0qsm4ZBGacj880hk24AnuaWPqcnFRtjg07cu3vmgmxKpPY81LkL94VVz83Bp/mFcb\nuc0hk4k25PbtSpL1yfwqBiCvWiP5mJ6HFFgLDSeYoGKdtPrWfMzbSwb7tS2t0ZYfm60gLDKR\n1pV4+lPVsoM80/aOM8CnYBseM8CrC5K8iohGrNleopVV42yfu0WAkwRggUnnfNjHFMaQYNVT\nME78UhF1pBwScVLDMWbrWWtx5sgBOBmp4ZgshGaEh9Dchujjk5q3HIWYDGBWLDJkcmrSXBPA\nODTYI05GPTPFRtIGbrUCXBZQDzUTyryaOoGgrFcelO8z5hzxWdHeErhuKeswY/eqhMvPKPoK\nTduY8VXSQMuKn3BcH1FXEzLEDGPBz3zT7+xi1GIkAK/XI4qBZFUDvUtvcDHPAzW6kYtHDa94\nXWUOroXPTbnAHvXy18bvgCJrhtR0uNiHfLxoOVPr9P5V9wXlqlwvmKAWBya4fxFoyX0MgXqR\n+XPNaruRa5+Z+peFb/Rbh47yKSJs5w47e3tWXxG+xjkqK+2fid8KU1nT2RUAmILLIFzg+hPY\nV8iePPC954XvIxNCUPKscdPeuuMro55U30MFZB1HBzzT1YckHNQK2cgc+9SFt0eNuB03UzMe\nBtjPO4t0FIzF1U/3TTD8sZb+JRTWKsqN93POKBWJC+5nzx6UkmNpXNCuFRuMn3qPftyBySc0\nCsSKrfdz0FSbtrA461HGT1JyvejDYJHKimMccMCCetKyldvlnB6fhUbMf7oLGnhm2gkY96YM\nezbFOw896axLdR8wpijapI5HU0NnduAyMcigTFWQiZlILD+9TucEM9N3beFHGMimeYec4OBQ\nUTCJVBYNnsKVZCqlCc1HHJujxtzTdxHXgdKaAkDbSecZHeiNdq+vPWmKoOGzupzSKPk55qhE\nvzq21eR1psjFh1zQGK4APNMXBYhchKAF7cLk1JHjnexAbgexpih1ZgORilztUAjJNSSH+rVk\n3Et0zSquyHlssabuJIJGFzigY6FcHOc00FiaGQr/AA54pRlgcnNMhdizbWwMcUyOVlBJGaEO\nxNtDEHtj8qbG2N+D97rmog/T+EZqZ9u/dxuI/CqsFhqvnCqec4NTeZsycfN0qtHF5OXY7sno\ntSjLybD8xxmkFhY2EbHA69fSnck9wGPSkCtzuHOPvUivuXn04NUIXoCGNCsGYM3IXtTW2tkg\n7iB0oZ9pGBxnFAiZSrSDkrT92GZFOOPzqNgoYYP405pFLbs+1UA7achSMjGeaN27BB6U3OAW\nYZOOMUiNna4UhKYE/wA/kkgknNOTdJhvvColmxkHj0pBMu4KoIGKCSZWKuMkBTxihFMe5c96\nau0ZfGRjr709skKRwTzQMdtIAxj3zTpedvHyf7JpgxnnnFOYmTAAwtAC7jtzjHYD196ax6sw\nIUD71PbPVD7U1squD91utAD13xvhR8uMhvWpBhmJLYyOPrUHznkNx0p8bLn5uT2oEywqjC9y\nPWnNcAISyd+tQMN3Q4OOlISyxqrLuYnt6UAiwjDaCp4609pgVwBiq33lIB2qKBldu1t2TS1K\nL24MikjNEcvmSkMOKiOV5bn0FJ5mxeAM5zTGTRTBWYMM9cVIrnaFTv1zUEbDd1BBNDShZCM4\nz0NArE4baME85pysC5bOB71TaYvHu3cg4NSBtwAY8UATL8uWfoTUgc7faq7ScENjikaQYwOm\nOlIRP5pWQEH5cc0M6zR/KduDVPzNvPbpTVkDfKPrmmBcN1uJXH40x234fOOxqu27nmmybuhP\nGM0CHSTfNwMntTUbrzjuahWRmkAxweuKljUvEdp4pgO2lpBhc96WSMyZAXgdKYuY8MDkEYom\nkb5PLP1xQSOB8sKccE4oZ2hkY7f9kChV3xnd1zQGk4DcqoyaLDGqpKOz/Ke4NORvM56r1pqs\nNjO/IboKd5ZWNAvBPQUxEiZkXnjuMVNIGDLtG3jGaWFHgxkAk/pU24+cCwwBTGNhhKgL3Izm\npdvKlsBRmpoceWGVskdBT5IY2Uvn5vbtSAj8nzFBU8Zo2LlmXjtTxG4RWXr3qVIwyBipBzwO\n9MRGynaFb71TeWWyVXjFK0Kq28HPY05oyq8EjNAhQu5d2cDFKUBUMfypRGPJ4OaVSF4bk0DC\nGQDJC4qdVO5XH3TQBwCBke1O3grj3x+NaIhg8gViVNO3PIoLHB9Kj2BVLA5PcU9Sp/eHqeKQ\niVGCp1p7TBlwy5FRbcrg/XilztIIqhlhsLs4x3xUituLE9e1QNJuYBlz707zCzDcMAdKYDkU\n5PJ9akyV+fPGOKjUtHzjOak43gdqrpoBKgG4HjJp0jB02nhqrtncccY6VKrAyZzk4qkSTxsq\nsmRjFaVsvmscGsxdqj++Ac1fgYlQcbSa2iSy2m5WOeRVtdzR4B+WoLdo36rzVqHaqkKMitiR\ndwR1AGRjrS7uPfNLGfO6cEdqbtbnPFUSOC/vAzDFEbHzGA+7mlVg52scnqKWNAku/PB4xSYE\ncm5lxjBzTLqEnbjnjpVmRiowBk1G5+XkfNSYupkTRiRuRx61lzM3mBcYjBxmt2RQFbavPesu\n4ZfK24zzxUMZX2ovTtUkbLtIJI4qLejKwUcrzT13S5B+XHSkA+NgVwfzpSrKwDHHuKbnbhOp\nzmpZJEzsOSaAHRybY9vUZpY58yOCmVA4z61BHIi/IeeeDTpAfLwp5JzQMsnEePXNPkyCNvBq\nJclQxOWJ6U+FZMsT+tMQ+TIO5T2p33cMwwxqNW8tjg5B6il8xpCdoyFoAnZsRFh1qPO7btb6\n0xZVZcbTmn7D5eF+U5zzQBGMoxy1PjcL3zTvLU1E3ysAMBu9GwibLepoqPzG/v0U7jPgpm8y\nEFV2N7U1oyeVmxkfNTo1+ztwxK9aI2VmJKbs18me0IsqzSsMkbVxnsaTcVwfuoPbmlMatg42\n89qIy6ncCCG4pDQ51GOV5bn3o3eaOpyvzACkCtncWx2ApSrR7dxA3Hk0gFZt0h27RKDzSb2m\ndi3JHpT9wMxO7jpuxTYY24zwN3HvVAPMgiTGDgDdxTV8xsuF2lucUMGaRs4EfZe9K0jFsE4T\nFIBVUr1G0n7wp0TLlh6d6OGbbnqKSRg0WxOMHFIQbjICV4PQE092MkirjnI7Uz7q7FPAH3qe\nytLtMb/Oo5/xpkibnWZy/J9af5hkXOzaB17ZpjMYUUsN0pYY9KkVt1wXbB3L09KYAsxjkDfw\n9cU2HEcUhZjubpUgR+fLO4DvSMQzbmbA74oASInaFwxYdWFG47jg7hnnFETNhyCVXpUrMkMa\nqo5xksfWgByKIlYlcHoHqLdJuPzfKDjmlLHaSxI3DG0/zpqKTHtz9f8AGkAoZpGZiNg9BTvl\nt5JN3JIwTTlJ3BMbu5PtTV+9JwHDetIAPEYKrtA6e9OkkjRRyWyPwoaOSONTwe230FSsoIGC\nAMY2mi47kaxuu0/wn1p0m5YeDh88j2oMZm4LZ29PTNSbRhix+YHGKBMbjzMRqOF+anvCXG7O\n3d0X0p6f6tiyc5zx3p0mOuCBnikBWCMuC5zt9KiEaz3CKQSCSSM4q35MgboQc8Cn6ZZtcXxE\nhAQOF+tMaPoT9nfwbHHGt6RnfIG8z29K+0PDIWFRtO7aBivnf4F6LHZ6HbKEK4UMdx9RX0Z4\ncj8tkkxkdTXJJ6nfFaHo0OPJXjAxQT82MUyyYS2oO6lZsNipLH4P4Uq5BwOlNz23c0oyOe4p\nEjtvfmq8y8mplYtkdPWopgc9KLhqZV4u1ScCsHUIUNdTcW+7JPSsW/sMqSORWkZAc9FH5chy\nMipmG0c8GneWVmGeKklh3KTjk1rcgpsSpHeo5M9zViRfLXLdPeqcjDByaokikl2tz0qlNOwB\nHUU6aYbsE59KqyP83J4qhFa4mkOcE4FNW4aRRuGKmdQ2c4qqykDpigB8mdvXmo2kXgEUDG7J\nPFRStlenGaQhZJGYcdKh8z+E/nSljtx0qJg3egLj/M65OagZSy8+tPbG0EUwsWJApD8wbCsC\nDxTc9fmpG4UimcEZ6GqKBvlySKaZA4GTxSyNuPNV260mBKx+TAOKZu46c96axOM9RR/DmkAw\nNjIPSlyQp29aYzAcDvRu2nPekIXkbSaRz2A4pGY7M9iahabDEZO2goVm29Bk0kmcZJwKJFOR\nUbZXPepbCw5mDLxzSbcdWoX5eajx0Oc80wHNhvlAI+lKzKVpBu3H0oKjHWgaQjHnOM0g9aVl\nA/KkJZhjGFpMQPGFIPXNJhuQRSrntzilLHbntTAiVjnGKeq7mPajOF5FNY/rQwFZQV68elIj\nLtI6HtSN1wOlJu2qfUVIDmw3bJpjdscGns2QCentTerHA496YBuJXGaBu/CgEHIpgbbxzQPc\nerDo3X0pFkJyMfhQI/MyT0o/1Z9qBDd3bpR06dKXOF3Y3A+tCr8uCQPSlYoduywwKRiU6DrS\n57rxSMdp65zR0AF9qXaGzng06Ngucimn94xxUgMbOOetC4xg0rL82ScUqr1z+FO5IoH50uct\nzxUZLMCQMGlUnIzTuMCo6/xU9mLLx+NM3At3pYz8pGcVIEiqNgqI4ZulSL8q9M00/vAMcGgY\n3aPXil3FRSrtxgjmh2DchelAAjKSdw60v3W/2aRWDe1Iud2CaCdR7NnpyP5U3K7s9aGYcgUu\neOBQA7zNuOOCaXPmNgcAUxlHG403cVbjoaBkjfMvB4zTto59KZjauRTQ27gUASmQBtp4FJyO\ne3rRuXpQ+O3SgQijqc5ox8uQ2D6UxX2sc9KXILY70FDnXIGaanzKQRjHSlMgyAe1ODbs8UAJ\ng7c7qbGxU5JyKXaFzTVbDZI4pE6jpcHrnNCtleaRmz9KXYSu4mhFD41PJPIp4YHkD5ajic8+\nlL5g6dBQIRh8xBFEeWyM0gO0H+KnrnaGApgIGC8Dg0bhj0NNDfvORx60eZ8/K5ApFD9vcDij\n7zc9fSjduXg4FG35uDSCw3aeeOlOjk9elAbsaBHtbORikFhd5/h4FOwWzkZxTMdeeKN+Rgcm\ngQKu49MU9gVUA9aay7cKOvWns25QT1plIX+HrS/Lt4FMX5+p6UrdPegA+bv0pzMdpyOKRmMa\ngdc01Wyev50gF4xnvSr655phYs2Ogp7KFoJDzNnPWlbaFUg803IUmjcCoBGKAHdutI3y5yKH\nbkbeBSfeNAwDfKGyKJo2kX72FNCgZwOgp7LuXHSgNSGBWhbaasKTyPWmrk84yaU5XkGgAaHe\nhxxRHEV5PNO3cZpfu/xcUrjH7hxmlZueDiotw5I5qTbmPOc0JgOLBAMHNLuCc46+lRRsO4/O\nnhhtIP50CCSQ9OtCSsxGBxSdCRmnqM9Tj6Uxki7jkd6Eb2oVt3Q4pOWWgWxIrYY54poyynPW\njdvXpg96OjA/pQMIWIZu9SEBgCeKYG7jjPWn7gxx3oGG0ZHpQWCtwCaNpH1owSu7GKAFbpxz\nRGpGT2pq8qR0Whc7cZzSGPX07GpFA21Fz5fPBpY8pwfmpgP3BV5HFSLjHIxUW48kil+YnJqR\nBLGk3ysuakRfJACrgCmoSc9qfuznPSgLDW+bnPFSRt5agdqYXVlxinFt2KAFLHHHehiQgGaR\ncls4wKecL2zmgoQtyOwpSxZsDpSYAI9KMbeBxVAL93PFSBmbtximLk57ilXcvuBTFuAZt2Dw\nan+7gVBsO7d0qVZMdRSFYdgbSaftHl5qBWJb2qZm+WgYrqTj0oUfMAOKG4OQe1NGcZ4zRcZY\njPGetG3LZpgp65ORg0EgqluvSkX5cgilBJ6GkZ/mPFSA4Nj2pjM2fak5kHTkUDO0A0wJVfzP\nan7gDjrUUZUZxzinqy9hQA4SfNjFMbO7OKUNycUf8s8559KRQ5W3YGadJ2FN+VVz0pd23HOT\nQIeregxT2bnaRmmN2A60O248frT6iB5Co4UD6U0EyNuI5oXjhRkmpGx2pMocvTnFNYnGaQZb\nj3pww6+2aAHKvy54ppJ6YpduGHpSsPTNACou3k0gbbnNGdvU8UKfmPcGgBVxnPWlOecUzlen\nFO53deKAEXnHapVAWoNvzYzUrNtT3FAxzfKucUZ3LnFNRzIeelBb5z6UgHh9oH1p6t8rHn8K\ngLByCO1TR5GTn8KADaN3NG4u/oKb5ny8jmnqQ2MHmgCVcs3I5pHJ24703zPn9PpTz/eNAxqs\nFGAOak49OTTFyzdMUBsckc0wYq5VsY5qRVLAnPFM3d8cmnKoVcUDDIAwRTev3RzTjwvNCgHn\nFIkFUgY70rDoKD8rZxxTiOMjrQMI5BznpQjFSVxyaVcbMYpducUMYN29aVGG7FIxC8URkFic\nUxWJNyt8opM44xTM/wAXel5YA9KQh3K9KCpLUuD+FMLc9aBjo1bnjFIwIyKcudpIPWnK3ygk\nZpdRDNpPOKVn4KjqKcsg8w+lInLN2FUAFunrinqDgU1e3FC7tx9KQCuwVuOlMEny470/b1z2\npuwbs0WAX04+alLBWwVo/A0ZP8S0ADA5B7UKx24xS7dwz0FKuetMQu8MuCORS+9G5d2DSrll\nJ6YpAKctgAYPWnMwXFVWErOCsm0d6nVt3P3uaYEvY80K25eDzTG9acnC8CgB6sYxQrAcmmbi\nzYpQvzYNAx6sTlqU5wDSNheetC/vM88UgHZ3dRT9vy8VErYwDzTnbavB5oAlTPpmkU9ciouw\nycVLuwnXmgCSNj60nJPXFR+Yccrg0bvl65NMTJ92VIxihQWjABIpicr1pe33qAtYlVyvBqRW\n3Kd3SoFYdPWnLlQRnmpAkaX0PFPRhtJzzUBI6dKdwRgdaYydm+UHNC/MRzgVArbeCKcrfN0o\nGWM+lKzHbzUe8DPagSBlwaRJKsg44pxbGTzio1cbcdKUNxii4+gkjhlYgcYzSW2OB0B5FRzM\nFAA6k09flUAUydi8jbe9SbmbmqyseCRU0bNls9D0pgSJMBIAMgVZ+0ZGGNUlIzzT9wb8KAJp\nEDdOBVG5jAHXmrYkytQSHfxjBqWMqLG33uvNORiuSRzVmFNuRjrUnlhl5FMLiwz8A4571bil\n+bNUvIZVJU4pUdl7cetHUDUjk75/KmtJx1yKorMyjr1pfMOQO1IC4shz7U/zM9qp7/m4PFTw\nt78U0DL0bkjOKm3FyB2qrGxU9eKsRvxx1rRGZMqnHoalVjtNQodzZzUqc55rREssRzEx4Gax\n5tPm81ty4BrXh9qs+TuXcTyK2jcxaszhdU07zF2bc7j8ymvGPid8N7TXrOUraLIyhmAUDO7B\n59z7V9H69ZAokoXb9BxXEaxo/wBoifa2B/dreJFz82PEmjv4d1ie2cGNgx3oRyv+cis5d0i7\nOFCjOM19b/Hj4YQ63p4uYolF8q5Mka/MV9/XqK+TdU0u40m58idGXqgZenFamEo9ivyzYbni\nlTpkrn0FQx7kOAPmAzQrbW35yvcUzIkCEKcj5s5xmlVyW+ZRtxSFjnd0GKizIykdutAyZQu3\nOCOc09VLH5OA3U9qYrBkC9T3FI2YztDEHNAwfG7A/OjzNwwDwOtHmEnIA9zTQwqkIn3KuR2N\nC7lY7RxUWcjB69af5+EHYetHUCQY3dcHrUe5cMAuc0wMG+YGpGYINwHJGMUxAuFXABHpRu+Z\nRjg9aaM4ByCccU1mLKecHrxRcEOjkKM7cMccClz81Nt1UyLg5YDNPjcsp43FaLjsKPU/hin7\nh0HehThc7eDzUX3t5Ixmi4EpO0hG6/3qFbK4weDxUManygScnNP5Dbt4A/u0hD23rnPPcChZ\nG8sbxgtTC5+bdyexpVZW5YElRTAcylFUjjBzml35YnPHU03zGXr0x+FKxDYAHWgRI2T84646\nUR5ZeetJ5itCGXrnbikMoxgHHY1QDlZvLbLLH9ajMjKvX5v71O2mYeXtAK/NupyqNpzjLD73\npTGOmkbyxsO/kA0knygVFHtYE5wuefrTpCWx9aBD42/dEDg55PtSqyr1bg9Kj81WXAHOeakO\n1tp24X1poLEn3o1jz3yfWlY+Wo4+UjNRjJmG7oOQafvKcdRVgJGxjYkn6USb1+ZuQe1NCnHz\nHLZ6U87iVGN205pEjthYKQMH0pUwZGZuAo6Uh+aTLEqfamsy7iCOfWgRPE4EPH3ielTLIyx5\nJw3vVdWHBZeg4xTPMLbwRhfWmBP5vByeSOoqfePL2d8daqoV2KcBk9ak3qcjdwBmmMlUEHCk\nZqPzSNwJ3VFu6Opz2p4X5hlvegCbG/DdAO1PjTDFwMGq64bODnmpGkCjOePQUCsStMDICww1\nPWbfJjoP71VN/fGfrT7eQrtwuTnp7UhWLXCydeKUuu5QOXPtUYmVpWxyKZ5zKxOdrY7VRRaM\ngVTubLUwNuUHHNUjJ5gUnrnP1p/nbWwoNIC3yyjaQApzRITyo+83INV2YbRk8Uqycg55FBJL\n5xIwi9OD9acJCzLuOMCq7MVYlWwMUoLKVJXI65pAWfNMjFSOMUknzrgHaw9Kh85t2dowaQYW\nbPduKBEoYs21RzimqxzlVx60zcA2G4APXNS52k7R16800Go6RwVwDmmfN09qjUny2AA45zTl\nk3bWPyjGOaYxRugXzANx6YojlLBmHy57UkTZzlcHqM0gXdJkkAdaCepKrYUAVIrbOBwahOVP\nAHuewpVlE2d3yj+dUOxIqldzBs56rSxqCMkkGmoob6dRUkfzSBui9OakLD1Rey5fHSrUUbNG\nCRUax/xH73TAParKrwOSFxwKQh+0dMZbFPiQjBPU9qbJIVjTjBzyRT2kDYHU0x6j1UM5XgDu\nKfGEZcKeBUO1wwwMLUgKKwUNyaAsSRt+8IHYU5ZHbLA/N0xUSRvGx/nUu5Tns+O1MnqOA+YE\n8nuBRHKZWKkdD0py7doUdeu6mqrqxcAZHemMk3KTleF6UpUBDxuNNhwyk4G4nmlxuXaOD3qg\nFTMKAh8n2qVDuYZXHH61FGpCjavfn3qRWO85XHNUId92TcFyMc0qqOfl+9SfeyMc5zTl+Vsk\n47YpAOXEbEZ3U5d0i7gNo9TUS4jzv+9mnxsznaRhe1UIdu8x/LBzx1pzSYwCenBpmwx4Jbbz\n19PapI40k3GQfj60ATbiynb82KAvy7s89xSRqF5b5U6YpVKqMKOKaELG3zHPT3p8eEfKvnPF\nJgSfLjr3pF2IpA5I4zWgFpD5fQZFWIZTGuSc/WqaqWwwbCjvU0cwY9M1cSWaqzFJFOcAir8c\n22MAdfWsi3lGzaxyP1rRimSQAk8KOK3RmWFkCsePvcGnyNu2oTgjpioI23rgnrzTmz5mQ3y1\nYmTBdvOKX5cDJwaarAx7V+9Q4zhQOfWmSSc5BV8j9aayZJLEjtT4cAnPGBTWmVuTzSGkUrj9\n2xU1nzx7m4X5RWlN8zbsZrPvYivPQe1Qxme7bn+VABnmhWLSMc45pkjfNjGDTdzdR2NSFiyu\nfNbJyfSjaucseai27Dvz89G8yMTjjp+NAWJlj5wBgdakSRRIO9V0yq8525xU6oN2RwaBEg65\nXmptxdcE8modwJITjHapV2+YMDPFMBcqpAK8dKQ/KQEGBUUihTtPDZzntUpYMMjhcUCJPMTH\nBpdzLgdQRUYUY+UDHc0M21hg8UAOwFwN2Bnr2pPk+7u3ZPJp0n3QAML1zTY403ZHXrQNDtsX\n91qKX7QPT9KKQHwUy+WrAtuOKRZPuA/dx0x0pWxHGHIyMfiDSsSZI2wArDBr5Q9saI9q71O/\nJ6+lNVjFt+XgcfX3qdYxAxQAFOp+lDbQFC/NzwtJgQqrTMNzhY+vT9Kase8EBgdrflU0jH5f\nLXay8USRou2RBhurfWkA1Y13N7d6G3SLvHzheT7U5iJ1BRuc/NUibBvw2RjtTAhUFlDYyzdB\nTtrrNGhXdjkk96kWPeyDdtxR80inDAIG/GhgN2lWaRV46Y9KcFG3e3/fIo2HBK8L6U7e3QLh\nSODTAjWMSnG0otO4bfgbdq8NUiKVj8wtuA4pvliOMkn5j/DTFYjVSww+MDoam4ZVUNuAHBxS\nbVZcOm8N09Kk2nyyccdM0CGcq2T1/nThGm7DnYnWn7VjUMfn/wBqpDGjKWYAjHA9aQECYZXK\njAB6djinqJJEAVOSM59Kmdd0aq4AA4IHejAWZhu2qF4ApCIdoVgDnpSBSpL4xzgVPj94meR7\n1KxGfnCgEZAFAEfksJAynqOcUkcPz4POOcVPwVwOGHens8e07hlccbeuaB2K7xhsPjYen/1q\nPLOxgy7j6d6kbiMZG4dQvpTl3HBI+c9CKRIvkxts2/LgZ2j1qRPLfO7hyeQBmgRqqdSvPJpz\nKdo/hJ9PSgdhq4jIwvmIeKf5eG4PU5+lOihWPcqcketTRjaeQCh7e9M0sQNbsmWjkDKetWvD\n9v5mqQhlJXfuOOSKjZcbjtwcU7R7to9Sh2v5b54P9KOpSSPtH4UN5mk2igEfuwozz04/pXue\nhTYUDd8o4NfP3wc1A3Wj2+5dpC4znJ+te5aK2wIpPU8+9cUtztjsej2My+WMHirnmD04NYln\nIqqvbIrRjk+UCpAs7uMgc1IrHioFl5zUiuTkdKQD921uKkZgV9aotNgkE1Xku3iXCnvSA0yo\nZTmqlxAGjIxgVFb3Lybcng1cXkENzTTHY5u+sRGxx9ahWI7c+ldHcQB1PArJms2iyVGfatoy\nIaMi6jDRHI+lYF1tjyu7JrqdokjZWB5rG1DR/M3OnFbJ3IZzU2d2QcU0kfU/pU9zZtDnf0qp\n69Rir6EBJleagMmWPpUkjEDPXNQN8q4qQG7tykYqOT7uGp+7jGOaZJy3tVANYjYOwqNmzyT0\npWIOR2qPhevzClqA1z0IpFbrg4pdw78CmM2M8Z+lIoVsBc1C7ZPApZG6jqPWmsrAZHIqgFbp\nmoW+br1oJ5yTzTe/vQIj3FW6HFP9ewpWbCH1NRtkLk4qWMbt+b2okXnrQzccHmkyBSAaGGCM\n8UMVwvHFO4CjimN8pIJ4oGHU5HSmt7daVRzSbhzgZNLcLDWb5cY5NIy7eR0pzLtxmoww9M0w\nF3Hn1pAdo+7Qu1SSecnilClW6ZzQIa2WwAaaMsMU9QIsk8ikKgNnOM1ICqWb5e9GGTjr9aNx\nGTil27l3UDuMZiwwOTRuZAAeaXaVpjMwYDGBQAHLHFQ3VwtvGx6t0xUzdOKZ5IaTcwyKQDbV\nX+zgOevNP5Py5xTzH6U1lBbrg0wG8px3p24sMH8abwVIH50nKqDnmgaJNo2nbTGk3YyKFyik\nU1gSKBi7iB0NGMLk8miVjxjp3ppPcgmgCT8MULg8n8Kjjztz2p/oKkB/O3nmmhgoOOtDZzTC\neuB9aBCN8vzFs+xpySHAwMimFQ3JpwyORwKTAX5hkk4HpSiTdj8qaW3KARzSr8poFYXaVyOp\npFBbOeBS8ryR+NGcqccmgYbmDYU5FN3lWA70rZVRjgmkK570DJQwXAxyaazFWpOOATzQp656\n1ICFST6U9PvYxk03eM9c0bh1xVAO4GWI+lJuOcd6b/FxyKXcF56k0AO24OWGaVscUbzj60gy\ncjbQAqsZAcdqNpXnjNMXjtgU7zF6YoAdxtyetNydpNOU5XkUxshvagA525pVxtyfvUbh0NJ0\nwevsKAGtndgDNSBjwO9MZhuBB+tCuN2cUASnI60H36U3ll3ZpqybjipAdIp5UfWmyMVUdxTc\n/MctzRM25RjtQIWJh0ant8rAEZ71CF389RSTPHGhZuo4FAyZpBnpTlJVevFMhfdEGZcDFKfv\nANwMZoAeVO7rnNL/ABHPAqMMeOMGlZjzR0GCt6jilVu1HmALxwaIcsM4pBcei7RyaH+VSRzS\nMu7Jxik6L654oGDsGjwKI12kE8UnC4GPzqTnBJGaAEKhuQetKN3FNxt5A47UBmX8aAJNuOc0\nNIA/A+WiORSxyMmkP7s4wcdaAHO27nnFJxwc01GJ5PShmGaAF3e1SNll+7TeGOP4qd5ZTvmg\nLDQoYdMGmkhTyc08klTk0xmXaB0oEKvKjvT87cZHFN5K+lLt+XBbNAhI8DOaU53ZXikZdvJH\n0p0eWU8UFD1zt5OBTh83CjNRRsZABnFOZduQrUwH8suNoFIyjsaRWPpSk/KOMUhjjtjX0FCy\nBh8tN4brzQqBjgdO9AyRs4x1zSO3AU8UsjBVGKTac5OGqSRdwLc9KGkwDtXNLG6vzR7VQbEd\nur5JfpVhW+UYpoJVcYqTcAAAKBjd52nHWhd24E0A/N/jT1b5iO1T1EOaRVPSkj6l8Ui4kbHc\nUhJL5Bwo7VQyXcTmk3bslhmgrznOKah6jPWiwC/dXbSGTC4AwaFXb15pHyrA4DChgPVixweR\nT8AE4NM8zbSyH5eBUgSKv8Wcilx33YFRq3QDgVLldozQPoG4Y4pVGM89aafl4B4pVzw2c0Ah\n23tT9vy9aQtnpwad25oEG47cUu7avTmkj+YdOfWkO4deh707DFZ9yqcYGaXdlsHpUM8bNgI3\nQ5qQfMOuDTGSRqVJ557UqyZXOOT1psb7hkckU9GychePSgBUJ69frSbmbnpSDJk56elLu+Yg\nDAqQHxrsySeKckny5xUYJ2kHihXGA2DQBYyWGelCx7TknIpN25eOnvTcn0xTsA/fubgcVI02\n5cBsVCOOPWniI8HcBRYB24ryBkd6buO3pUmf3fB59KYzE80gFXcuMdTTu4B60m4qAe9PDAgH\nqxoASNVjJ4znrUgxs44qNW2OQehpwwMgjHpS6gPVe4601V8tjnvTo6fx/EKYEXlls+lSKo2g\ndx60u7aucUjEOvHFACbvm+lKzH04pVXaOTSfxDJyKbAF+XvinJNuBGOaY21mwKXhW96OoD+V\nxz0oRvmzjilVgxz2oPXrikA5lbdkHilEh24Api5ZuKcPl59OaAGtyD61NGRs4FUoGaRnkboT\nwKsq3y/KOaAJV/CkVtoO4cULw3Tml2nkHpS6lCFRx6UjRliTQvC4NDNt6UAOjXaPU0E56LzT\nVfLU9iFPA4PWgV7DV9APrT/vdOBTFHTB4pzN8wBOBQA4Lwe4qFXO8gVOuV47VBJGY8kd6Yyw\nny8ZzU3mDac1Whxjk9KlVt3bg0D3HRk7SetCEqORmkTAkK54p+7t1pdSRQOeTRksTkYxTS2G\nHPFKzAMSO9MB+dxx2pf4qjYndwOKft9elAmOeQbfakOW5XpSMfkx3pVUL34oKHIx4zTmbIOO\nKSNhuPHNJvG40gHAblz1FLg7eBgVBhlPDZFTBjwtMVhkyttUj8afDuZfmH/1qGY8kdKVJeAc\nYNIY4ptTg0DbtzSrlyWPXvTCoyQBSBDoyWTg4FPX93njikjXEfFLy2fUCgNGIu1l9KNvynHN\nNVgy4x9akBU8YwKEFgKnyxg4pcBV680KoIx2o4XkjNMQise4o3Ads0eYrfMpz7U5uPmHSkGo\nhYN14pze3WmNT1ztycCqATBbApzKRwOlC/K3I5prEs3tQAq0qq3TPFAIHbJoXJ6jFIBGTPQd\nKWGMrwRTw3zeopyjqaYBtpvKrwc0oLYz1zS7Sp4FA9hA3y+9KFZcHNMWNt249Ked3Skxhyxy\nKd3x0pGUqopcjqOaYDlNLt3NgU1RgZNHJyQaAEbDMc/hTh82BTNvekGevSlYklYY9qcq/KCD\nimxszDpmnY3UCH9qXaVUnNM3Hdt7U6jqMUOKXzRuzUbcUuFZfegRL52VpwkCLmoVXC+9KpLZ\nHTHrTAm3FuTmneZsHTrUO8dD1p5+ZM4pDJSxK80K3PBqFdzKMmnIfbBpDSJkztyetO875qi5\nGSTxSLj60WER+c0lxwOKtx/LnOKgjUcnoalDKcA0CLCtlsdqk3bsbScVAqkqeaduIXA4pgT7\njuoRtre1QqT1zT1bKnFMLErSkr2pisWPPWk4PsadjaM55pWFsPVtrDJqXIPQ1UDdT1qdWxig\nZNGx705WDZGMCoVk61KAOBnNSBJ5a7c01oCVYg4OOM0Zw2ScClSTLdeKoCEZ2jgg4qWFiq8t\nipvJ8znOKU2u7rVCY6K4G0c81Zjm3YHas5l8vIIxT4JmziqRBswY3A5yKuKB6Vn2pG0ECr6y\nBm46VsQyRf3fTmre4bR6YqpGw3j0qSRx0rSJnIvzRreWWwruIGa4zVrT7MTkYHWuzt5gqD6V\nR1a1jmhaTGR3zW6Oc8u8Q6TDe2pYrkqMn3HpXzn8UvhXb3JN0kQG1snaMY9K+rNSsQ0bLjiv\nO/F2kmfT7lQMkjaAR19qtCPgm90S6s7u480BFViB/KsmZBHJjj6V6d8TtJuLLWpGCMkRBycc\nD1H6V5fITIxZFG3607kuOlxGJOUzn+VLgyY2nHGDUXmEttHy7eaU5VgoG3cMlqohD2XZkD7/\nAEzSs2+NPmycc0ke5GG4Y9/Wo/m6Ej5Tnigodg7tuMDpTzlVJxnFN8wY+c4LGjzNrFP4cc1S\nJ6jtpZc5ycc4obK7VHIxUe4KvXnpS8Rgc4zUhYlVvkKsuD2NHmMyqMgcc1HIxZsP6cUu3aCN\nuTjNMLC7sBlHp0qS1UwZJ+cEYx6UzdhAxA56E9qVG2tkc9znpSCwiodx5w/alVjGOBn+tRhj\nuPHPWkZwmAoIJ5oGTrMFQlgaXzA21tu0be9Rsu5VJpZsbQn8PY0xMWMny93TnpR0zu61HGqo\npy2e9LuU4IB6UCHr8p9aerbo+ODUeerdT6UqEFA3KmmBK0gZgR93GCKQE+dhW2mmqwjbGM96\nVgJCGH3+2aZNhxY7zlCP6e9LtGRzx/OjcZMNkbhwRTGd1O4jPONtUh2JmJZiI/TmkjbczZ4C\nnBpFyxznb9KSRvmOW6npQMXavJJxnt6+lOPQAnJ9ahDExt8v3TgZ609HRsDOKBCqy5O3Iz1x\nTlznvt9KiVv3npU8ismPnHIpiE58vOe9OZSVGRTF+Xqck9qfIzySAlflUYxQAsZyORtAOd1C\nyHdxwvX60g+9hTx6HtTicZx1700IeJN0gJO2M0gYt5hC8UjS+aoH8INLubIwv3jSuFh44RcH\nPFKvzNjGAe9NVl8zBOKkaN1XIwVJplWGqmO+4DtQ24ybscY+7QcbmkRe2MUSMflIXt1poVhy\nsFjOOPal3Fdu/DL+tQbjghjnJpzI2OvymmIkEilSFOMmnl1Vhj5jUMKhQc9elCzAYL/Kc1QF\nhZdrHI+maRZP3hY/dx+tQq/mLwc96bMzj5QMd8VLAtMGZSwPOajaQMc9D3piSKu4sCQeqiot\nx2/KPlJ6UATszybdhxinwyA8FsHvUW09GGD2ApeI5N5XigRPu/d4U555p8bq0LDoQePU1CJ1\n5K4UkflSbt21h170CJmb5eRx3pyXC8DsahWQMxVlyPWmNOGU+WBtH60XAtyN82FGRjOabIuV\nVg2R61TadjtQDCnqKm84ZUg57UxMnWNfvFs5qOORgzHnC06OQMrM4A96Z8y4z160xk0bGRJA\nvCqcHjvSrtfAfnA4qOFpMvn7nb3pFcKOFLc8mkFiWPdtO48ZzmnKqySHd25pq7vMH92pQ37s\n8YxQiraA/wC8jHO0VNGibNxHOME461HtBC8YzzinQmTDDP4U7k2JY/vAr9wcVNhF2nque1Rw\n5Py4wKlh2puUd+aVwHLCDMCM7if0q2ki8A8f7IqATeZ26cCk3hV3KenBouKzLi9C2eOwNLHI\nG5x81VFYOoOdtKskbSHJPHegdi4su2TYWPSkzsfehB4wc1H5oLA5zzR5imQ4PFMC0jMo65NA\nb5shtpqBW25wcnvQ3z9OvpVIRZ46Bs0FmVdoORUPmAPnBxT92GOO4piJ1kC4AGP507ILHDbj\nVaN2bj9TUhYxyAKB05NMLE8bmM9OKkkbdIPQ1WV9qktyaej/AC89f5U7iJfmV8ry36U8MO47\n1VE5JYD5R2qxGwMfPWgQqyCR2B4561IWzJgDp+VQ7l3bc4zT2HzLt6etWSSmN94YDd61I2G2\n44X0qCNjErbs49aVZvlORxQUWCpkcE9aFwp9x1qAM+eOSO1Tqyyct8vHNNCsKsmV3Buh6U+N\ng2S5wp6VXTDZwP8ACnbvMwmPc0+YLE6uY1K9R7VNHnHPy1XH7vDAZp8ckjYJFXFkMt2zFWOT\nxWtHIFVcDI6msaBtzncM4q/ZyfLtB710Ihmsp+bI/KnSMG6cAVAuVXNO3EDB5PWrJJlbyznr\nkUK3YHce1QAlcEjjOadDIh5zhs5p3CxNHOVbBXOTyKJsbiBnFJIxyWWgNnGOvWmhCsoZORzV\nC5iLybccYq4zF84NQuzMd2cKBg1PQZlTx/KAwwVPWqcwznaDj1rRvBkH1rNmaT5UUZ5zUMLg\nqvJHtGSw5ptvcMuVZCDnvUkchjbcSM9KZJI0chaQZTHHtU6gTrIfmDH5asGTa6AenOKqLIzq\nG429qkjcM2C2aYiUyfvAoGDnrU0OYt3du+aqoy4Oc8cg1OsyMAytnPXIq0K5N5qzRn+Ij86a\noBCg8DrSMwTICYJ70iMdxLDKgcUxk/8ACVU/LSjapAPQ9aZGojXcOA1OXaVBzkd6BD2b5uOV\nHaomkZWBRcEn8qbgvu2nB9KI/lxnrSBE3zf36Kh8xv7lFSM+FWjLb1A3MoyR2pGjLQoDznn6\nUikxscP5gYc0ixhcFXOe69q+UPdJAxWQxE7jiiQEISGwccgVHuZlb5ec4yKc3meXwMt0IoJH\nFk2ozHav973om8xERsrtb8cUkmI1UdFx92k2jaAvTHAPapAljcBhsCljwcGkbG5sJhh6Dg0j\nFDtKlUZRgnFOVgsIUZ3Z5HrTATaFYZbnGd3p7U4xmL5mXg9hTfMG0tgqy8AU+OQxxhcnnk+1\nIdhwYTMCo+YDimtGS2d+GVegpypuYAfK/Ue4qNkzKzKdjEdKA1H4Ei7OWfqKnkJRl3YY4xwO\nlQxy9Co78mntLkbExnOTmmS7iopaFlPPPAqXzF2DO4vjBwOKhDNIMgkHPapImPmY6ZOM0BYe\nI0YhlBIx0NKuEk3Z6DnPSlZSvTjB+92o3qSwJ+U9TSARVTyy7Kzg+9OZVVVAXBPQmgSGRzGc\nKuKdKyIq7sMcYGKBCrG3nKWIG0Zx60RssnLclegxSQkMjZOH9TSrlmU52j07mmFiaOQrllXn\nvmkSNnLyR8jGTTJH3qVyQc4NOVVhIWNiFHX3pBYerlSu5F3HqaFfLE7dseeDTpG3Ebht9Gpq\nyCNQOrZ5oETg7Z1KOpHdaGkTcwHAzyPSj90yhQMA8g0gt925VHQ5yKCh6MuTv+XPSnKwTn73\notMlVflz8x6gdqdMgLAxna+OeKC0Ok3Lg7cg9cf4Vi3l21vMGjJzu+6BzitYRs+TvbCjrWY6\n28qtIWwwJ5PWmPqfTP7PPjSCS1+wvIxZcYyPlXjoTX1BpGqJhM5xX55/DjxCnh3V42VmaJjl\nlU/ma+xPAvi+31KCMrKGDLkMDwa45prU7IPoe+WF8skSmtq1n3c7q4bQL3dbrk557V0sMhXr\nxWW5rY3luBtwfzqSGXzCQG4rH88bcE5NXdPXcc7se1AvUteS75A5FM/s6WR2A5BHFa9vD8vA\n/GrlrCsZJxmgkyY9PKoq7cEDk1P5JC4JrWSPevTihrQP0FAzKSHHDdKgkhDMcda2ms+MVRur\nVlb5R1pokwbqxOWIGKy5YW2sPzrqZoWXHGfrWfdWm4EgfWtkyWcfqVkskbALzjNcneQtDJgK\ncHrXpk1iGydvbrWNfaKJskjHHFaqRnY4KRSPfiolbpnn0rZ1LSZISwQZ9awCWhYqQT71VxWH\neYdxzxUBYg+op0r9OKYzDJx096oLCcMfQUyRh/Cfzp7MCMVDIobgUDCRuAOopi7gppGb5SKa\nrEqDSARvlUrmkGBjJpWYZ4FRsw3YIzSEKxXJPWot/Wg/exTNwVuooGIxJGO1M3djT2+ZsZ+X\n1qNWUg89+KTAFxuJNKufWkYhR0pAwwTnmmMGznFNxvbHpTlO7BpHC7qVxis3zcUg+bnBFHy4\n96Nx+6OKQw8xWyueajXCkjINLkbue1DBVyR1qmQN3D0xS79uRnJpmD1IyKX+LdjioGHbk0jS\nDdxSS45wKQDdg4x60BYRn6U7ew6dKMK30pVwRgZpdRjmY7QSaZJu4z/9ahh69Pej6NmhiEOK\nXHyjGaiV18zZnmnAswzu4oGS8dc1CzCTIxQXyfUUgOGGOaQ/MX+EDGKby2cnvT2zt5OKZgqN\nxHWgQoDfhTd3UdKd1bAbIoVRkHGRQGow+tO3fLjsaTcNx44o3e3FACFjnC9KdzmhCNxA5HqK\nTBX60guSbie1H3eTTVYqMGkxnqeaA1EYjOacTuUYPFMfCqMmnbgE9u1AhCxJwenrQvzcd6Pv\nAc09QduMYNBQzzuq80Yxyp5o5GWoUlWywyMUAPMgbHrSKuAe9NX5hnGPanOCvOcDHQUAJy3o\nKQsVbjn1oVvlxinqucAUgEZRjcKUfdpnKtjqBRkbsnpTAf8AKcmk3bsZpGUNyOlDdAfwoAar\nDd1wKk3HPtUbAL2p0eQpGcmkAdO9HDDpQo6DFLtKt14oAcuduRQ0nTjmhc49vWowwbO7t3pA\nOI/vce1K3y8jimNjCsDk+9I0hZsdaYxRhgcDFLu2gg+lNjbc+MYpxUbetIQ6NuN2OKbt9DTR\nlSMD5RT2bjIHNIBh+VuOSakZg2F/OopC3BUc96dy3saBjlIjyAOKgugsjJ2Gc092K8dqGXOM\n8+lAyX73AGBTtp2gk5NRxyNnmpOT3wKRIM/zDikLHlsU1TuJz2p3zDIP3aBiIdx5xilZvKU4\nPNIuF5PrSZD5wKoCRWPlgj8acxC7fWoQxU+1SbdyYpAPZTJnn8qdG25go64pkO7ac03tleT3\noGTH5VPGKQNxzzTfvN6UnKseOKAH79vbrRuaRcUqtux2NJt2gnOaAF2fLweaaeOoyKVW3DI/\nI0ik9COaAH4+XIFKv3evNIT601cdc81IxSxXGSKRWHI6nqKax7sM0qYwWwaYdB7MxXO3ikSQ\nSEmkZy0fIIqNFG4lRhTTJsTO+TyeOlKrkdTj6VAvytk8+1TLg8ng9qBj419s5pRHtc570qny\n1DUZ3NnPWgB4b5enNBbcmDimYIbmneWScDpRcYR4XORmiPuMYpgyvbNPzuYE8UASYHcZpFYH\n60AnODzR0U5XH0oABgdTUh/DFMGFwCMnrT2UFc9DQN7DtpznpS7htORzTRknmgg7qXUkXduA\nyccU3JPY07d0OORQZM54zRYYh+UjB5IqSNSAc/hTB94HHan7dx+9T1AOdmCcmhiOB3pZF3Ll\neo60zaODSAevzMQacVB+Ujmo2z5gPQU4yNyQM5pAOXaACPpStu29aYrA43LinSZ3A9qYDjjb\ninYGBTUGVz1qTcMY4PtSAVRySelNVsKTg4pVy2f5U5uFAxQUIG6EnrUi9TmogA31p+dhx1FB\nJOX2rwMimbiflxxTllC9emKbgg5z1600MFG1s0igbsNwKFO7PFCg5+YZphqOKqpO080+PJTj\nrUDKfMzxzUsZ8tiBQA5s7venhSBuNR5zk5waduPBz9akY1sswOeM04nDcdKT7zZA4pY+5YZo\nAcZN2O1PkPzf7NRIwbrTmO1ck1QBEwVjuNSrLhumar+SVXnv3qXb8vFSBKOhYdakjYbfmGKi\nj+ZfQ09ssuKAHYBUmmqp455pOVG0daVuOvBpAOA5weaccMcY4pqHjOOaUsCwINADxJ82AKd8\nzd+KauFcE9KGYLuPemMdJJ2FIrHaMjFRxs23cRTipXrzQBIzbvlWhcjnHWmLwvIwaft6EHig\nQjNtAYjFL9/GeKap8z7xzUgxtwaAQfdfFOYqxyTUa9cnmh8FhgUATKxToOKN2c5HFM3FcHP4\nUrSYYcHmgAVMHPb0qRWCjPemA7hkDipMbsZFA7DlY7h6U0PubBNKwPXPFM2/NmkMf/FxS8N2\npm4jpTgTkjFAMVfkXnGaevK8niox3Lc0r4CZFBIrqO3FLtXr1qLzCwGRT4XBBGOlMZPncoAF\nNb5garz75CNrFRntVhCW49upoKEWPaNxNSrllB6Co1xnpkVJwR1wKVxBjnpzTk+X8aa3zEY4\npec8igOo7+LkUm4B/mH0oGV/pT9p69TTGIW3DilB3LgnFLj5RSEetAAp9eTSsCo9aaB+8zni\npGPPFAgxuPHWlbG3AGPemNnHHFOzx1oATHzVIOx6Gho+Rg0rfLjIoHcOdwNDdu1HDc0bcnPa\nkL1HSZX15pAp/GnZJwG60H72egpWGKqnsaVTtzmmZzzSh88dqbEP4zkCkVcNzQ3y8CgKSue9\nIY/+LngUgxjk80jMWIokU+ZkdKBWGxxbW4HFTZ5ximq2O+Kdng4oAXA9KE/1hJ6UHlRQOmR0\nqhBtK985OBSFimfypvNOdAy5zzQUAYEggYzSeYVySMjpUM8rIYkA47mpwo60gFGVI96e3txT\nuCPWmFTkc0hEg4UdzQvHWjb3Hze1FO4DmYKoo+6QCc0m4MMYzSbSeTSC+orD34pF2qpwKXGP\noaX/AFcYPWncHoC9MnpSZwcilWRX6dPShcZ5yKQwPqBzTlA3fNSNgY9KFU9TR1JHcjpyKRfk\n6dacvy+9OPHLLxTEIpG0+tM3EY4zTgvzZHSk2kZpAJuGSSeKUEHGD1pNoZSO9KqAYNADtxVu\neMUizHzCMZNLwSSc00Z27hwaYD2+Xnv6U/zDsx3NRKSy5PWl+63NMCTdtPvTlbv0NQrJ82Wp\n3XvSsBORu6nj2pFyFzimfMFyf0o3llIBoAl34HSpRhgPWqy84yfwqbcOOMUxkzOcYHWhQ3eo\nQxbnPNSrJzyaAH528ipI8MvTmq6tljg5qbdt4bikxC8Z64pxJMZ5qNvmyOxpMkAqaLAP5246\ncVJk4Az0qMNjqelKhBBOcVIEokycYxT42Ktmo1H40bju5HNUBYZi3VakTHWqnmEnHSnxMd3t\nQTqaCt8oAqYSDIHeq0cm5c4qTJZu2apARXTE5NMgJ8wAZ5qW4xspkeFIbpVolm1bJ+7Vc9O9\nXlUdqy7C4H3c8VsxqeMc1uQNQ0kj7T0zUrIBxjFQyKQ5PaqRnInhuCoHzYp15dq1uVHJNUGz\n9BRGpZgBzW8dTFoq6hZl7UyqO1cVrmnhlY9scivTmVWs3+mK5K609biQh16HnArWxmfKHxq8\nLG4SSZFwFXG1R16818tX0b2s0kZHCnHHqK+9PiVocVwLhFG6McDPavjH4jaDLpuqNIi4XnPb\nPPWmgOOYhGBJw2OlNYFiHckp6ZoKnyy5GWbgUKpUbd2R79KkmxIJPMbOcY6CkZs89DQzKyhg\nuAv86RtyqCRnPNADkct1TJ6CiQ7htHGO+aYp6ndjtimbVOGLd+R6mqQiTaOcnBxRg7QCfoc0\noX5h8vUZ60jMGGSMsOwpDJCU8sZfLdDSrkjBPaot6iMHblvSnsvzbyRjFFwHvtjUBPmI6im7\nsMTtyOuaczdGGBxUO0+pAFMmxJG5lQNnJB4FL80i5YDdntTFZXXr/wDWpy/KuVztzQA/zD/d\nyOlICGYowx6U1XZc45Gae/zLknGPzpdQGqD3GRTvXI/CmSKu0KXI7kU7zAy748kDtTASPjpw\n3bNPXc3y5znr7UgkV+n4mhcR8nvTGPDeVnA3EcGnGRpFBzjA71EvzNknCj170febKcAUyRYp\nAV4zgHrThKZGIPB65FMjC7eW+opImZXfjimBNuK5A6kUMQVHIDd6hEnz7+i96fwWZh1I4oAf\nGobo2T/Kk2FWDfex6UyPIXkZ9x3qQKQuVHH1oAVJEY4PWlkB7ncFpkZ3cAc1IzbeGxihtiBp\nF3KQfnxUiyYYsW4xUY28Hb+NJjBGcFc0w6kyyFuOnrRuCqc/L6D1qJ2DMcLkd6f5qsnzD5O5\np3Ex9vlQzMMA9BU/mk7G28rxUAclAV5Wp1wrAgZ3DjNBViWPy9rBz8zGn7dsfHOajjUqDuUZ\n61M2GUAH5qY7EcihVyBjIqLjy0wxDZxVjaZDt6H1qNdysw25I6UCIyo2sM8g01G3t94gVYhU\nSKxJpjWqtzv+7yKdybEG3dIuGPHWjc7ZZwqjPFSSheAPlz1NNkAYooGQKdxpCmMx4+bHFM8w\nzjzHXjpinRqz7gw+UHildflwTgZzSuBHlm2+W23n7tKV+UsOCKTzAqlcd+tNXCqVJIGc0h2J\nY2yvzN83akSZo1ZXBY04bei/NSNlVOByetArBIVYKcflSeYQW6kY4oWVeEVc+vtT2yI9u3BB\np3Ack5UnaNwx19qbuGSR0xmmiQcKo2qoxTv9XwRuJ6UhWEY741IO0jvT45FX5RjHUmm7T5fz\nLz6UsEe1s42+meaBWF3bZtuWKN61KpkDHceMYpu0iYbmDZ9DT2hduQ2fXPagdh6ruUHPT86F\nRtu0c85zTkjO3H6inx7ZWIzggfhVXCwlvubd2PTbU+0hRnp7UsUY3AIf+BU9Y+CB8wz+tFxi\nqrNg4z7+lSsV3Er1xgVFISgwPlOetSKDxuXnrmlcVgzhQhySeeKkXBcnGOwpjMW6DafWlkG2\nRR7UriHxlcEbip7U8LuQtnC/zqLlvlX86Vm6RkZXrmgCReVxnipQwEYT0/WoAFGGxgj0qTzB\nMvOBJ2p+gErdCqnDUkMjcA9qi2gIW3/N0NG48DPSi7CxebhgdwHHNN85W+7x/tVCrRyYZjhw\nOFpI5GaTDDC1VxWZZMghIyNwYZBpMklQDjJqNdzLsY5wcg0K+3kkZFaElnH7wrnik+5J97io\nY3ypIPLUr7oyFC7vU0hMtKwVT824U9JAy88A96qge2PWnIpddp6U7hYuQkeWS3OOKjWZhnoQ\nTTN3/LMHB9TTQQrqT9wdxRcdi1uZs7VxUivKUwpG39apykYYK/OMiiH93ghufc07smxebezA\nO2V7D3p3zMuNwAFVhKxX5s0x5ixAJwP507gWVk5AJzTo5i0xRuEHSqoY9RUnmfuwG4YiquMu\nbjGQFHy9c0LOF3Gqm5/LA3E05TtbJGTjAWi4WL8btFD8x3E8il80soYNj1AqhFcsVIfnnGKd\nHI2zYB3zVKxFjVhfa4YkEVbhl8yT5Oo7VjFmKrgZAPrVy1mK4LceuK2jIlo6FJysY3nLelPj\nkAJLZNZsNwu4EHA96sNcCRdyjvit7mdi6ZlZfkPFNT/WbiM1U80fdX5ad9o24Bb8aoNjR3nq\nDx6UqqdwGcVSW4CnP3velEjt0PU0XEy1MyqoKnvUTfu1JP3TUTyHjJ4zRO3m8Dp6UhJFWaNm\nBB/Cs+4VkK9jVq4nZW2qMGqU7O2QTnvn0qSlEahWRmK/MR2pEl3OQmGHfdVSOQKw2HnPP1qR\nXWNz6mpCxYEjbcjgCpFnD4wMmoudoyaUfu0JQ55p3FYu+WLj5ZF+QcnHGakkij6jpjFVFuG2\nZ9uKma4Bh5HOKshocrHo240qyEArkD0zQsnmQqQRnvTJo/MjADYyaARZgJb5mb5egxUrZyFx\ngd6gX90q7V+UVKvzIz9GoBk3Abjp61XaTnAGR61EsiruJJ5H60yPzdznfj0AouMl856Ki8w/\n3z+VFK4z4eUblYq3HTpSdgMgDpimbXXKFg7Yydvak3JjIywAxz618ke2iRGEcLkHcM4Ap0bl\nCpAymOWqN4w0Crjb3PvQmOPmxQFhzrubJPfIbt9KduLSqTwwP3abu8v5cng53U+Njjg7l65o\nEKCJWk2R/wDAT1oXKkKW3NnpUcLlZAVBG8ckmlR1iRsKTJu6nv7UAP3DzDjrnAHalaTadzcF\nuB9KhZSy8Nt9V9KkTcGCFsp1GeaYyQKz4P8AEBgMaQLMyHev7zpx/OkRvlIJYDdwp/lRJvjZ\npWLDPy4HNAx+QyhVG3Ayx7ZFG1WKsOvWkVlCKRwp657mkVwx9R/d70iSRpNu8n5T0C06OMoC\nWbbx361AsnmK5Y/eO5fYVJuSXAkyWx8re/pT6DJ/ndURyVB7UkPMTox2MDgE9xUG7y4d5OQt\nKzK0Hmt8yN7UhFhZVxheGxjmnI4Rd7AMR6VVjfGCvzcfdxUkayOvyhQRyeeo9KAJw2SOOTzj\n1py5ZiQOB1GarI7csW+8fujqB7UnmbCybip9+tO5Ja80NHgDA69eaVJDCue57+3eqgzlcc7u\n9TtN5uVVsqOD7UgHq4Ecke4kN8wz2qWGQyRJs6j7x9qpK4Zcq2T0qQlt+4DCAYOPWgZfk9W+\nVW+7/jT7eRoR8z89PrWek7spx90cgNTvNLbCy/eOCAf1oGaCzLhhIMbeQ1SCT91ycE/wjrWc\ntw8bfNyvTFOZmhlRmG0n8qBovLIsULdmz+lc9ezBrqQq2CeBWusjyLIMLtxXPX0yR3H3cMpw\nTQVcSPUGs54JFB85Tn+lexfCX4iT6feFJXOxyC7Z6duBXiTMGI2ev5Vf0e+/s2/ibe20sBn1\nqZLmVi4ysz9NfAuufbLCBlcMD1Oetd/DfBsc9fevjz4LfFbyYEt5m3svCZPUDr/SvpbQfE0O\nowKQOD0rhknF2Z3K0tjuY5PmDA55xWna3vktj865e1mJbGeOtaUEzKoBOaAO4sb1XXCtx71p\nQ3Axkde9cZY3RTAIrYtr4haAZ1MUgwB0q1GfQVz1vqAK/MeK07W9SRR81BJfEYfPemNa7+va\nlhuUXIzkmpVlB4FAGfLY+YTjkVTuLEp/DxW+p9qY1uJOvIqkyWctLahVI28Vn3tnuPyDaMV2\nRs1k4IxVO40wdhmquwsef3ejibOV5NczqXhszMeOV9BXq9xpe5cDj04rJudL2qRt578VpGRN\njxi90SWGQtzisuZDHwetet6p4f3MzKp57YrmNQ8OYyGjI/CtOYhxOGZt2MCmPkc54rWvtFmt\nCQBu9qy5oJVbYVP4immIgbnntTFbbnbzStIFODx7UxmB9qYC7izEY4ApjZ78U9SSuO9MLevJ\nFIZG3XGaGVVHTJpGwxzjBpQ3ykYoAaGzxjim7QoPy4HbFLgjnoaOWXr3oYiEzDaMrlqTcG5A\nqSQfvDgcVDyq46ipGOVtrcdKQfe68UrLtTnv1pNo4waCh+5V6rzSH5mpkhwDxSqSQOeKVxBj\nPuaZIwZvSpWwrZqFiOTincLAMq2AeKkXknHI71GOzdjTyA33OM9aAuIxCqBjNMZgq8d6eQEO\n0iox94jFAdBrMAwxyKeuOucH0pkiDdkUsb8+9IkXaWbJPFHAzgYFOOSetRMeuPWkyhjxrvLg\nVJg7dwHFObO3pxUfmYGAaBCBvlwo5pSrYOODTl5yR1qPG7k9qQAGMgA796cQemeBSLkLxTQx\nzgj8aBj0x6baew25GeKiZ8ADrig565oK6CycYNJJkpQM854FNbavTmgQgYqnHFLu9DmkGVx8\ntORQGPYUACtnO7g0isSx4zSyYZgaRZMNx+VACMAy0qqG68CnMQcjGO5qNmCr655oAm2Bj6mh\nlJ5BwKjD/L6UuWzQAMdzYApGfsRyKRWHIPWnAZXGOaAFUgtyOKb95zgcUhJ6DrSq21eaAJH+\n7kDmkViq+lM3E5xS5yMdalgIOvtSq27jFDdAKaM5xnBoQD+RwOlC8mkGWHNI2PpQA5mG4E80\nuQ1NCkjkU1CN3WgCVVC4+anfKT61E43dBQpz04NDGSkkZ5wPSoDkMc/dp+3d1Oabt69aQxN2\ncYXgUMxV+nJoMm4+mKauJTkNnFMkf5gx0O6kzjjrQWPWhuGB7GkApZtvHSnds5xTc7TyfwpV\n/eHkYxQA3Jx15pyn5TTmAo2/KeOKAI9wGBnOfWnbwGGRxScYzjntRtJUfnzQMlzu56U09hTV\nYtninjO3pzUsBTnoDxQvy8Hmm7W4NOUbqoBVwW56U3aeQvSnBducGnKdyjHTvQHQbGoVfm61\nIGCtjHHrTNwdsHgU5vlO3qKQ0OYjjmo3wrZ9fShSOeaQ8c460gHrIU427j7VJyGOBTF+WMEd\nTT/MCryOD3pgI2fMBzgUqsMkYpoxuPPFITnIHUUxkhA9KZ/Ecmgj5fvZ4pnl5XOcUrAOZzxz\nUi9AQOlVmXbIM1aXrkHiiwyJ2705ZAVwBinOoIJpNu2Pd3ppADcr1p3XheF96Zu3dKdt+XOc\nUCEVRuAHJqZYxu5xmoYztPHWpY853GgB23GQTxSHG3PbtSNyAT1zRzg5OR2oEKHLdelLzuBP\nSmr93/CkXccii4yTnaSSMUR470RqshznjvTY8LkZyKVwJCVA4OTTtxbGTUSyAMRt4pTnqBmk\nHQmMm4jHUcU4gkjNReYigZyM05fmJGcGgB6MVYhjx7UZDNkVFGxDFSOPWlyV980ASBfmPOac\ncjhcZqNDhuBmn7duWzVgSKwHXrTeGbHSkQbue1OyN1ADwqquM5pnseRSsy7TijcOgPFTcA2g\ntktx2pSPm603GBk8ikwTg5x7UgHlht5FO3FVB6imBscEU4fMuO1ACeZtOcdabHGGkMhzmnSN\nt2/LUu3avB6880AKp3DgYpzfOwb0pg+VR3FO3dcDmgNSXnjFI2FXpmmbznngU7zBggjIoGIr\nfMcjIpdx7DNA+6SD9BQGO3g8UxitngZxTl3buTTN3TdS7juB7U7hccv+syeaeDtOaYSF780u\ncrjvUhce3Y4yKA3YjFC5Xg0rN8oBHOaAHbc9aQseF7U0naOTxT1bfgiqARCWwNuMGpGIbOR9\nKRlPc4PtTgpx2NACruZcHpRu2rnp60Mx2ilX95Gee9IQsa7mHOae27t070zAXGODRzt65NA2\nPKjdmnMu1QG60xCNwzyKkOW4J4qRiKw2sc59qFUMopqqOe1LgrnHNMQ/27GnbgVxjmmMenqK\ncPm7YNAx6t8uMcChW3Lnoc03+PA4HelZSze1AWG8+YSTwafu+YdqF+b5cU5l+XIoAYrbmPH5\nVLGD1NRLlcmhWO3IPPpQImbGeDQQSM96aVONx4NMXeaAJETdz3p7YH0poB4HSpGHI9BQAgf5\ncAYpy4wM9ajXLdRQGA96kqxNtwvXNRrQWx06UvXmmIec9aQSHkkUinc2OlOClc55FMATjJJ4\npkh2qTilfc3bApsnzQ460gCNty57VLERtx0JqFUwoVeDUy/KvI5oGOC59qXJAIzxSlgw44pd\nu3HemMWNdyjnApzYVSKSPAU7qkRQ2N3Sp6gN2lhknb7Uv8PrRIo3Ddx9KXhVqgsM3FTzUgYb\nfem5DUqgde1ADuQetJJkLmk77jSnDDqfpQA6NtyAgYpygLnJ60xPu+lLtyuT0oAUMMEd6dyw\nzTF55xilVuetAdB+ctTmLHimIfMyc1IrDH86BWEVRjHenJ7nik75FGCB71I9xx696Xg4HekQ\nmk/5aYzzT1EPG1R0piEcg07IyeaUYyeKQArH6ilDd+aYr5UkVLHjoKAuItOjBamsQrcCljam\nxike2aXOe2BSdTgUM20YPSkAv8PXFOU/IRUYIdc0qFsdOKCR235gSeKMlval3HacjFIq8Z6U\nBcRl+YdOKegJbJHFNVd3HQ9al5ZcigoVeT6GjIpCw3Aj0pcBm5pAOztXCjn1pu0soGafx15p\nBt79aBDQm3pzSrwvPWl57dKXluo/KgAf7vrSkAHBPOKB8rYY0u4c8ZAp9BjfKAUetO3fKARR\nv3YwMUhz6UCFk5AAFJyzY6Up+6Oacfug5FIYcdKdu3Db1FAwy00c4HSncQu07QB60M2PekPy\nPknjpSsueM0hIZjOSOad5ny429Kbs2nIzRnjrTEhytz04obJYc8U7cNoApNo9cUygVQFNJuz\n2p2CWwKXbgjpkUxBtHHf1okU7ht+7SZIPP6Uq5boaAFcnpnpTVOVJHBpWBLZXpQo+Y0B1H/w\n570ueM96avy5yaGOfpSuMfG25eDTmbDCoUYLwBipGYeuRQIlDrxxj3qRfmbJPBqtG2RzUqyf\nMKAJd27jOCKVWy1RN8zZpsbfOTmmMsYGSetOVh+Xao1OeR1pV+ZeaVhEySBj6U/d8xPWoFYL\nnjNKZgpHc0CJGIYZPBojbKkZpjNv5NPjx6UwL1u55A5GKsx9j3qpBhQDirkfHBqiQlTctV88\nlTVljuGKI7cySABSc960irkMksE3OAtdVbR7YwR1xWfp9klvGHIz61ckuCPujFakDZmw2Cag\nc7j6092GaiY85FMQ1unNJEwVjjNNLfNknimNJkHHHvWsDKRoxN+7IPTFUbxUjjY4xkUz7UI0\nO45OKxb7UXmJ5wMY4rrRys878UW4uVnRx949/rXy18bNLSYFljIdfl3juM19U+JN2JGI5UED\n8q+avi8rSwtIDgAZI+lVyibaPnZZCrEE4phz5g7rnpRdM32hwEyc520B2C/eGT+FZPQtaig/\neVuBmkZdoDbu+OaTdt5I5NLy4x1XH5VJIMfmGRnnimLgsyjlTyQaG28Jzke9IvUnO3tTuBIF\nd0CsNo6U6CJgCAcgGmtujwM5p0fygknJJwBQA6bZyN2DjrTHyIV7jHFEoCoSPvZpZFyQB/EO\n9ADl/eKM5BxQCVOxjkGgNhiD2FKvy8k8nvQMRRvBQHKj0pyyAAJnHPSmQ/efHSnrti6/MT/F\nRcQu7bzt3c9qTau75uMnIpWK4+Q/WmLjILDigCRcK/OMepoQjY4B2kHt0pg/eK3Q4PAojPUA\n7W6GmHUfGF57EjkU3eTtAGCOOaI8RqSDtA60fKyNuPzg/LjvTAVcsCTy69Qe9LHKMAtxkcL6\nU1TtyT6cinM0RVXzw3AFUhdSNt2QFbrUkbHeASQOhqN8bn2gnA7U5d5jjboe9IB7KrsGH3em\nKUL5jEYwoFNQHy2OOSaRWPJB46UxEu5tpwc4peFIHJyKYrbF46+lLJncPQc8UwsOT5WYgAnH\nSlYHcvy/PjNRr3JGGallCvtIdlYdSBQA5t78k4HenZwo7imc+WR396Xbui2k4x1oEKrFWyDn\n2p3VShOOc0kPyADG5R3pcb2JbrQDJlzuCquR0qyy7WRQOB1Peqit5fVthX+KpkVu52knOKZZ\ncXYxOP8AWYp7KW+bGOOahaQjJ4GKeGbYFUkFuuaGMfGr+mRTeY2LNkikZjHGADnnBxQzHqOV\npCA/Ku5DweMUrMfL4+8O1NVTyeMdcUYO4HHLdKAHTKGVSF+bFQRw4fzN2akdmZhtOdvB9aVR\nukI2nbjOKdwI9vzE7yAenuaRF8xg3UipeC0ZAypbA9qdG37xsDFFxWICnnOe3rTVjDtyfk9a\ns7QZNy81HJCqsQTnPPFAxFQIpKtz7io1Y9SeoqbavmLlsKBnFMZD8zkbx1FFwEWMyIMYHrT/\nALqjPPbimti3jHGS/PHanLGduE69TQKwwKJOCcfSlWFvmXPQ9acsJRVJ6McDFSSLtZkB+ZTn\n60XCwjR7ZF4LHHGKIwP4wakTzNu7GTT9zLtG3cTyTQFhsaDcDtyRUrAN+8C8dOD0oxiTcgwD\nxTljWM4A5b0ouMFX5QTyTSwqFyD09aRZI1YgZLepqVdjRn5sCgmwiR7WUjgDnIp7fu/MbGWf\nvTYnZe25afCp55BBOQDQASKZZFAbjHNSqx8s7j7VGB1GMe9NQjbgHnNAyVmwoGTlqdKThQeS\nO9Qsx3dOc08ttBZunSqFYEbqy8jPOacjGQkqMLTNg4xwB196epHlk5wnTHepFYeP3kinO0Lx\nTmO35+wPWoUVhCWUHdu4+lPJ8zJJz7UDsiUBZFwvDMeT2o3CMFG5HSmRg9TwPSkZjuY1Q7Eu\n4MoHGM4qWA4U5Pyk4UGq/l7cYO44yaNzZBU5oEXGUJkb88c+lQrKshAAwRxzTFbc3OSKeGVm\n5X5j6Vd2TbqTQN82WGV7U+Jfmbc2F6mogwZggOPWmmMqzEtke1AE8Uh2tk8dj60sLNklmwMV\nVLBpAOox1pxYspPRaCdS4sgLYYdRwaMhlODhR1qsd0agghulPkbcpAG1iKBpE6skjDaO2M+t\nEkisMgbTu6VFDkxBwAWHFSGRXQfu8N0qkybD8s27BP8ASpEY7V4ziqom24Jz16VNuPVjgelM\nLEqyA7uTljTmYq21xxVcTFeGGMU7zn288+9HQCwpzINp/Cpt3z5PSqgcquAck9aVpUHBPGcc\nUJhYm2gsSAT3xT93llTjlqrR3B8z0C1KZNzHcQV7YqxEobbMpzwD0qaOZ1kwuTVENtj2c7s5\nqaOQjaW4pqViDbhlZgBjINXPM8ohQvFZFvk9Dg1dWToN2TXRGVzPYnSf96RUjcISPu1SSTdl\nWzvJ7VKrNwTyOhWtlIknW58yMANirMch8rryD1NZYPzfKO9TrMSuB1JpcxSjc0FlHV/yps7/\nADBlOKorPuXJOXPWl88nAFCYcoXDeZnJ5HSqcgKvu6cZNTSSM2cnBFVLiRghZjweKm4WIHkR\nnXHDdakSRcbiM5PWqQbavHNOjnJkCAZUDOaOYZbW4KAlhnnpSQ3AWR1PTGc1V3ncx3cdac0i\n7doPLVPMLlL3mEg7WyBUiOOgfPGaoxyFd4znjpTo8xqDgMT3qlIjlNONsqSM4xwBThJtZRuw\nw5qjHMY1yBwTiiSf1+90NUpCsasj7o1Y/eJ496Q3DRqUB+b2rP8AtgmUKSQVpNw253nNO4WL\njOZN46nHNTxmNbcbmwev41mR3AX1681I10DkrwOnNF2LyJ/l9aKqeZJ6iinzMfKfF6/u2kcA\nkYwKby0SqeMc5pGVllKlsj2pMYRkAzmvkj2yT93sZQcnPemKysqptwd3PpmlddwDA5I4xTme\nKNlxkHv6UwEM2JchcqDgntQArsRmmyKfKPTrnIpzHzDhQR2zQKwFRtfcTsVcLjrmnKwZY09s\nn6+tIPm+UcOOBSqxZtyn5xweOtAxjR/vNwbK9KfGpeRSf4elBbzDtzj2FATy1aNjjnkg0uoh\n8szTHcyBE7+v1pqzSLNuK/Jj5T607ncykYOPw+tNWRvLVd2WznFMB8cjSZ8z5u5XFJtWNDz9\n7pR5mWZtpDdMUyaMRKZP4c4ABzSBoevzKY1GPr0FO+VYwM/d7d6Z80hRgOCvalhhMgLD5WBp\njBmERORhGHX3pzNIqpECNmPu0kiFmUKuRu604cSsqHB96CWJHJE28rwwHUetGP3e7J9wKRtu\n0oBtZevvT4yBJuXhdtMAHyqqDkdd/pTZd8jcjJx/nml3FVZgQ2BTI53+RQf3Z61OoiZl2lcL\nkYxx2NNU+SpDDOepUU4ZQMFGUBzmm+ZL5ZdSFboFNMLEqSKsbBflI5zTI5DKoVHKv15pqr9x\nQMk9RmnSZ3K+3jHbtQMkVjGWDHcMckCnqytgBgyDrjrUagMN2dvHWlO+ONiECuxyfpQA+Moy\nkZOB0FOaRmwMNz3qOOZhlcc9zS7jGFffkg9qRRLGwt8g7t3UntWJrEg83zDwJOfpWrNukYvk\nsGOSRWP4gRgFdMEHB+lA0Z6sFBUPxjdmnPMsrDe/zBchsflVZf3eCWB7ZxUkeZGGCNvTbQUd\nZ4V8VTaTJGHlaPa42soyc+tfSXgH4zC28pJ7jcQcsc/K/wD9evkhj5cgznr1rXtdZnsZYypM\nYUg+oNTKKktS4y5Wfpf4T8bR6xbo4Yof7pPSu+tb1JV4bJr8/Phn8ZpdPuESYs8LEKVV8sDk\nZJ9sZ4+lfVngHx9b6pGpEwbgMCxxkevNccouJ2cyZ7XazFlGa17TLRhiK5zTbxJkDBs10VnI\ndoOMg1KK0sXIsr34NW4XdTkNgVVSTauSMg1PnoMUyLGhBesn3jmr9vqnGTx9awyxGO4qQSHg\n549KQHTx6gsigdKu27CVevNclHcYOQTV+y1Ly5OSc+lMk6f7M+3KjNRyQ4X5hUtjqitGpIwT\nUl5cRrCRxk0BqZklvuA7elVJtN35ONxPTmrTXA9aFmDMPWquFjHm0dtpJHNZd9opliOVwc+l\ndi7Bs80wwh/92jmHY8v1Lw7HIpVk/wCBY5rltS8KnnYN3oO9e2SaOkhY8Gsy68PgqdoFaKeh\nDiz581Dw06lsoUP0rKm0mVBnaQPWvfNR8LhvmIHp0rn73wmdrfu/l9hVKVyeU8RkDrIRjGKb\n0YcZr0TVvBobcyrgVy1z4XnhZtqnA7mrFaxh7iwPy4pGc7cAVNNZS27bdpP0qv8APuxt5p3G\nJnuTTfMJ60Sc5BHNCruHBpEi7ugzUedueM08gbsGmrnbnqKB9BT/AKv603tjHNLu6A0jMGbA\noFqBPykU1clcL1p2Aw680gGDk8fSgYrNtBx3qPjvUsihunNQhSrEkUBqLu7dBSq2BQpBXFNc\nApx60rgKzA4z1pjDkkZoUA5zRvwpGaQwVhTAy7vn4FPHzIOMetVpwX4qibEgmGT2X1qRmCtn\nFQ5xgMOMU7k7T1HpUsZKWO3NRnHUDFOVCXHPFN2levApDFBGTjrQ2NuM4oaPGGzx7U3b6igA\nViM4GRTuMHjNMMmOAKXnuaQAFITB5pG+ZfShmO7ApO4GaYDi37npk1EzfMABxUpHPPSo2/Wg\nB3zbRzSsp69qb5gb6U1dzNyeKQDsbelIyJJwTTjuzxzQi/N0pgNkXbGxB6dqb1QFfm4pWk5P\nHFHIjyOnpSAFDbc/pS7yG5FKVBjznml2cZIpARtgN6mpFyrg5/CkRcMc80zlmJNAErNhs460\nyTgdOaQbnjoVX3c80CHA4Xg0ifLnHNIv3utPwGzgUhjfM3Z7Uuc845xSKAeDQsbLz1FABtbj\nnikyRnnJp0xx0pu3PIPNADlyqk9zTI/lyStKrNtOcZoAbaecjuKAAsVcDrkc0bhu460g+VhT\nWXB96BkoYK2AeaUSA89qiVd3SiYHbxQA4OJBgcUqrtG3pUEUZRMk1MV3YINACp9OPWg4Y4DZ\nFIwK8HpQML9aAHN97pxTu5IPakXLDnpSMpB60ASf3ffrTHyWxmgGkxhsk0AHKnpT2bcvoaYe\nc07HyjA+tDGNTDdTinfw9aJMZ4FJuBUYHFSAqls8HipUbGMjNQx9xUg+VTTEK8itwMilJPao\n16g4qRlzTGCg4PGaU/dz3pArKnBoB49aljGrt7j6045ZTwdvakAK896cWZQKaAFf5cU9GDR4\nNNONuelJyOnNJgO9BjAp3fPbpTSrMBjkUH5WxniqATJ3cfdpy52HIpNobvScp05FMkj2lpOp\nxUyLyRuxTVJLZp7fdyaCtR2V24zzSq4VTnk4pkeFTkc0bS3TipZQE7lwOCadkbMHrSRjBFKF\nGSxoBiFQxz2Ap6sI1HoaRCNpJFJu3cgcUEkjfNnB4pRtAA71GvII6CnR/d5/CkNDguQcHAoV\nSoznNNGBgZoRTuPpigQ7cME9KFwxoKE4I6Ui8g8YxQBLtC8GlLFR8o496i5zk1I24qAtACMN\n+O1OX5l/zzRgKCerelDZ2jHFACqo6Ac0vVsZpFbB5PPtSrtXpzQUSK4Vs8g08fMCO1Q8lSSM\nc07J28VQhRkL1waFYN1FIfmHvTlHy+9IYRsvPFKqhepowdvyjnvTdobJ70gJFbt+tKcE5ppI\nzjNH3T1wtBIufvd+KVWJUYpzYXH50xWIBx3oAkPoTmpV/wBVtxzmqyrhR35qXd8w+bmgCVl2\nk49KjRiX56Uob5sEUzDE8CgZKwycDpSbscEUgJGR3FKrfLk80DEb5QQKfGv7vJNNzu5xgUu3\n8vagB33uTTtwVabglc9qVe4xmgBjtvYbfxqVMr0OahC+Xn1qW2AYkGgCVtzKCTmlCBu+KRgd\nuAaSNgq4PWgBWXdyeRT1XGCDgU1SeVxxQrcYNVcCT73bNOHyt3pmeODT1b0PNSA7O7txT8Dj\nApi/6zBOBT9xVsdqYC5yeeKTa2Tjk0nl7iSG5p2SsJOeaQBwsYIHenKwbJOQaZ97HYU5WFAC\nryuSOKPvKMcUu7dwOlO2Fsc4oARscetP37yCeKQbVbHU0ioTu9KkBzcvnGDSnryaiZm8wAdO\n9G3DZzTGTRttYgDNKOcnOPaoF3bvk/Gptm33NMAZTkc8UbRQ2W5Jx7U4LkDJ7UuoiQLnGelI\n338jpSbiRtpfvLgde9AC5LZPpRuzgnpTFYoxo5VSevNIZIR+8yD8tLtC9RTNxTjFSq+3mnYY\n1lGOvWm8rxTmUYHPvQp3NzzQIFJ45p3mDGO9L/CQRSNgYPGaLjFILR8HimsuyPilB2qBSP0y\nTxSGLH9+pWUMMVAGMfSpS+VGaYCqNqgd6ezdetRBst04p2DwN1BJJuGQaXzvmprLwB3pSu1Q\nO9A0PDBuCaUHdkGo1UcZ5NO+6xOM0dQJE4PtTYz1Apv3Uz2p3vigNR6jrjgUqhfSowpPIGKc\nAQ2D1pASc7hTW3FsDkU0dzmhfmXJODQA8ORyRScSDpRHjmnADdnpQCHL94ADilj+XNNX5Tml\n3Atk9aBkmSRkChW3H1pq+tNbPY4oJJv4iBSbtvbmkGFIJ60N8z8cfWqGKx7kYoVj2pNxyQac\nuCDzg96liGKf3hAp5fDYBANRRthSelCrk5pAPRjuJPSpV54xz1qCPKkg8ip9xbjtQMdyRxSb\nvlIxmljIVval2jrmgY2NdpAAqRT26Cm7ivNDZGMdaAJfvE96RvuUm7pgc0bsqKBDwwC8rRHz\nk5/ClwWGd3FMxyT0oGyQt8oGKRSKBGyuGzx6UFd3TikIXzC3ygUeppGYqvAoTAXBPNMCTB2i\nlU4+tIPmbrzSrlWO7mgYjAd+aXovsaNpYj0o2k0wFCkKDjihiTnHSk3Ed6ApzgGkA5Rkcil4\nC/dJpqqcEZ5p6sWXrigBB0GOKXjrijOEPY0KOetACBhnJFO4zkUzDbvY07aeOOPaqshAoK9T\nTcDAxyTUm3Jzn8KPukDGKBehHnYwHansNxBxRsG7PWlbLZ9qYXHHPpik3Bjg8UDdtHNN25zQ\nMc2NxpFPXFIVwuaVDjrUki89qaOV9DSkHdkGkC88UIY7nuOaQKf4jSbucdxTiu7mmBH5gYYz\nTl+7gdaZHEFZs/WpFx270rAK2VpwbcuRxTBnbg0AH04pgSrn3xRHjv1qLnbgMakjBUc0AWEb\nsKcCVyKiYDZ15pd4IGTTAm3DbxScelN3Dg9qf/rPagkZu46cirMOWCj+VMih81hjkVo29usf\nIHPvTsK4tvGyjkGrakFQOpoVWZhx+VaFlYg5LDnNaJEthDZmTbuXArSt4Uhz8oyaFUBcCjdz\nWiiiSXzAFNQ7iwzmk3FRkikLcVQdAbIWo2yo68UFh0NQXD9QDxTZJI8gVTxVOSYtntR9oyv0\nqpJMME5rSG5jMdNJ8mM1m3DbVBIwM1bX52AzU/8AZrXikDkLya7Yo5JHmviq4zCzAdcmvnL4\nsYFjIfx96+h/GT+SrgdFNfNvxaulNvOp+8OgzVshXZ8+6kGa4bIw2f0qD7ykkg1Jqij7UdrE\nueqmq0jHYAcCsDUljLLkYyuKdkeWDn5fSmK+0tj7uOhqPcyqqke+KQWJ+Gk3qMr05pu35ipH\nHUGkjk3MxBwfSm8+Xktu55oAlkUfKG/Q1Jz5Z3KF54NQ/I6jBwKYy+dkluAaBFiLO4/L26k0\n1SZZ1xgKKYTnAzkdMCl2nBAbjtQMeGCyENz6D1pHkypBG30ApkedxBPagEKHUrkjuaGMkaYM\nwCAn1xUrTLGpI4WoIWKxttGMHikZWWM7l4b/ADmkIlWQbADxu7/ypjHbhSee9JHtwFPPGP8A\n69PmVfMB74piFZgCNoxnjNKq/KxY7jnhqRdzdhR9z5gM/wCzVC6kjhJvlx2yaav3grKQDQGb\navGBSCRt4LDLdh7UihzKWYr90Gl8sI2zjgcUjMpmDE5HpSSEMjH+L+9VIQqvwNw79RTpCN2R\nkL7UR4YA5yQOaGTYGY8huQabEIshx1+U96dkIvt0HvTGwqLkdetORN6u2eFHFIQMfLOVHzE4\nxTkkBbJUqc7fxqLiTaQ2eOaftxGF3YOelUA6MtHu3Hoacr+ozmoo1LSAscgVL5iTOQMLtHel\n1AXcX69e/tRv2sV+8WFMfAX0Y9qdjqQmSBgUxjvnn+VeMcHNKzfKqH5SD1pvMmB0PcU6RjkK\nF460tQsSsp27ywU4wKn3Kyoc7mI6iqqnzRyeB1zVpWXsOg4xTAkcgoRn61IrfIpYkDpUW7hf\nl+b1pcBsbzQMkGI887iDmjcMB84Hdaa3yjIO7+lAzsPIoKvoPKtyVUcj1pzZ3Lg4IHeoFZlG\ncEn1qeNTInPOetAgA3SKVIBzyKk+cOe+KhZhjCDaVOS1O8xvN8zOAwxRcLCR71Qd8tn6VM5C\noMfMRUMwKrgcAc0rfK3ytn+lSFgV8g849x29qbtCtluTTFj8klWfO47sU9iG7fSrCwKyNIS6\n4BGNx7VIJC3yqMAcfUUzbnCnk5py7WZgxw3SoANq7SCeM8NTuFjbnaCMZpm35cxrhRTk/wBW\nXA+f1NMBIY/Lhyzc9AKlL7thUd8HP86jjH7vPViaeuTJhumO1JgSOxViq8ZpVYhTztB+Wk+b\njf1FE0JbaVPGadwsPZTGoU/NTUmjiuArZIxy3pStuWbH8PemxqNzc4GaBAyYHyjPOQak3Jwq\njL/yprKIySfvYpYfl2kr97vR1Al3KqZHLEYxQv3ec7vSosFmYDpnNP8AO2Zwe1UT1H/Mw3jg\n07cM8AFqiWQ8NjCmggAgoee9IocJmVsiPd7U8yMy8jBzSbtrAg805W/fEt91hinqBKzF1Cgg\nH1pnmhE2Ovz54xQkY4wfrStGDMJAPlA6UhCmZgoUH5jTlZY2CkYz396Zu+bcBzT0wwKONxJ4\npgPLfu8lTkcmmK6bQWJy3RaF3LJt3ZwaVuVboWB4oGHKA92pWYLH8o2saQOqgSc+9MZmkbPI\nCnNUIsYPl4zyetLGxWTJGR0qFZmkxngZ5qRidygcCgVh0YVQ5L85pzMI16nOKYrGRmCrnmkd\nvlO1sn0oDlJY2LccbaRpPMVlIwKZEx4yMHuKfJIq4J49qNSRyzDYDtyvTFK0kjYZVGM8AUxZ\nA0eQuWzwtJ52zcAuB/doAmdWcrtPlHPNSsgRmxJuOKjWb5cE8YqNYyfu/e9KoRPuJTJGRjvS\nwyv6ZWjcsnyk446U2JAmQTVgSQ53FyuaVmCrvPzeppElLLtHG3k0iY6joeaBEsEi44596X5X\n3HGMc7aZGcMTjOaaZT1Hc4JoFYlVxgc8HnNPYhQAv4VCxK/dGV7YpJJWZQo4YigCfz1bJJPH\nBqSK4BwCCzfwr/WqsbDChRuHR6mEm1iCeegNUiGXbaQoxZzyD07CtBS3l4VvesyP5lZQeKs2\npONrcBe9bQepDNCMlhnNP3E5dTz6VXWQNkZyOv4UM6rJwSB14rW4FhW83AB25609ZF8wA8bR\njIqDcjSKQCXxwtSnH3gu3ii4BGqL+fWpIhtLc59PWo/LcLnPPaj5t+4dcc0xdRWTb9786o3Q\nLQ8NlQe9XNzspyMdhmopLcyROhHbNSMy2fzOFXHHJqLzRGpABIPp2p+0xk4OAOTUXn+YzEja\nMcLUlDWTZ3yDTjJ8o9uKjhYyRjd35PtUkahgcHheaQieFTsYp1xk5NPj+ZSd3yqN1QRyAfNg\nkegp245GwcMentQJosrMdwZfmjYcU6ZhIwcjCrxn1qONVWMxsQEznFNjMjc43p0q09CCVbhZ\nJcqrAd6fuJ5YFVzgZquMjrxzkHvR53mHZzjuavmJ5S1JG3G0jHrmkjJ24ZcnNRDDTYpVkbzC\n/wDEOKfMPlLexfX9KKr/AGiT1oo5h8p8ao565BIp2SwywwRzTVWLzN45bbyO1O2vCd/Xjrni\nvlz2BPL2jdnhud2eKV8bcPz3AXp+dNZU8xcglm6YPFLtdMrjKg8in0Acv3gqYy3Wm/8ALRt7\nZ2nOVocgsgB2n/ZpQwjVgBuPoKBiMx3bgdzHkAUu7b8gX52BO7PQ0KBGyE8/0pdoVg3Z+1Ag\nkDqVwQxAydvFSxtzJsG71Y/rUHloqkqcnPrRIfMZmVynOKQifb8gCt74/pTYW82by1VQvXrS\neYomLh8LjpihVGd4HJHFAWHhiGcPyrcDFNgxHIUyNhOMH1pgZsFh83bb6GjaGBTODjJPqaBk\nuPlC/dKnFCKdrFJMHPNNULIgCNux1Y05tg+cDHHIoEO3AtuHC9MUg4Jb72ORTYXUKruCcnGB\nQxYybEG1s55oEP3Sdk5B5yKahLuQxxjpilWZ23CRs5PbtS+WmMbtuOuKTAXb5ahMYU859fak\nVhu2AcHn6U2Rl2g/fPYU5m3BBHhWP3hQgFj3ySfN8ir096XzEdwWHOcChlTzQWc4x0pNw4Y7\nQueppjQ51Eb7sZOe1LHGys28naw4FISPOP8AFgZzS7iyNnPy/wAVMlhgsAQSMcUrOWIQn5et\nMErsoyODxtH86UKGYDO0AUhjixjwVxjvT5GSEYA3MwxxUUcasvzZOTSxShgQqcgc5pdSrkvm\nPHgAZXGDWXqkKtFkHaV/KtFZN2QD0FU9SjLWuEQsOppjWpz8uIzvYgr0qOGQ7GJICgUOA4yw\nwAeF9agmZk25ICtzt9KRrY0DP5ccbOQcD+Gplk8yHrtYc89MVl+YeA6hs+npU8cztk4yw4H0\npAkbek6muny4wqBuen6ivXfAfxQlsbi3O/YsfBz0cehrw1JnXG6Pec8ue1aNldPbyF85XOQu\netFuYZ+jvwv+LFnq0Ef74KG4+Zsk+oz7V7boviK3mhjO9Tux3r8s/AXxCu9BuGaKVkIcN5Z6\nV9J/Dr43PJMRJcqxwCY88fgfWsXTNfaWPtuG7jlGQw+lWFmHHOSeleOeFvihY6gY8OCoGGbd\n1PpivQbHxFb3i7onU8+vSsnFotTTOmWTLe9OXDOazre8WSQMGBFWo7hTn1zUFl+NTt6VPHwc\nqOaqQzMy8VPu54oEy/a3Lx96umZpk+9WZGw2gVZhcKo5yaBFmOGR2Hep2t5Ym5p1nMFYE8n0\nq5JMjZY0uozOSRx15qVLv5QM4qORcMT60zy/QcUwLfm7ueopxYdqoqSPYVKGLKDmgonkjjZc\nMm6q8+nRTZIAAx0pwmYHnpSpMe4pksxbrw1HJnao3e/NYt54WEinzIgfoK7iOQ9+BQyrIx7i\nqJZ4xrngxGZgsJU44wK5h/A4VixRie9fQkmlxyNkIGxWbdeGY5QzgKOelVzE2PnLUPC7W+Tt\n/KsafTXssqUPPQ19Faj4NjlUkIN3tXKah4BMhY+Tubtz1quYXKeJvHgHdxUXHTPFelar4FKq\n48oRt12nrXO33g+4jjwY9p68UcwcpzDYbvikGOSK0ZtDmhXkf0qnJbvGhyhyO2Kq4WGLhl5G\nPegELxnNM3Mx6ED3pOCfSi4hzMQ2R0pkkhbjHHem7mUYzTWY4xTAl3DbhRzTVXrk0wScDtUi\nsGyB+dSAg+XjtTTt2njmpGAAwetIGG33piGA8dcVEyFjkU8ru96euVXBGBQMhlYGMimwkqvu\nadcbcGmw7eo5FICVe/YUxju70FhwAKPu5wKAF3ce9I2VwcgmkGcZxTWjLNkUwFyM5IpWbcM4\nxSdPrQzdKhgGdxpoO49KdxuNNcZwRxQgHbcx8k5pjEnnoaG3Lk5470KN2MNQAmdvGM0oJZuK\ncyk5ojBbIxzSAWPOc5oYndxSrhcg0xmpgDL8p5pFztABBNDLuXrxUaY3ZpAPcngAc0oZl6nI\npN3XrS8CMZHNACq27PHHekZt33aUNtX1zTfutgZzQAq53gCgM28jFH+rXn71AzjPU0CFwqrw\nfmzQ2UOd1NX5ic9acPU80xiDG8YPJqXzMMeag3DOcYNBwx6ZNSxh5gkLf1qXjt1qPAXGRxSh\njuJwKYWH+WetJt680B27jAo3ZzgUCG4GKTnJzSoRggjpSHL4PegYRAs3XFJ5m7ORxStlseXx\nTdpDDNAheD7U4U1/mbnrUmAPm7UXGMbLMOelPznoOaRmVT15NIX2tikwF3DcFIoLlpD6CkY/\nxdaFZjzjA70x3H7/AGx9aT7zZPWkkyxHpSbOpoGS7OAetIGyCO9KrEAZ6UskYU5zyaTAh3Z7\nU9TuxxiobiRoW3Bcj2qG31JJJ9h4J9aEBbByxH51IrKOM7qTKr7Z4pqr8x7UCFzu74p28Lx1\no8vPbijywVIoGO3YXOaRflGe9IYywAFOx82O1HUQDlcjmgZ70q4XpTevIoGPGAMn8qb/AA8Z\npfQmgdz+VMAVgx9DSq27ryaYAc5PSnR/fNADhgfLnilX7xUHikYBuTxRtIOQeKAsO27ec0vm\nYX5hk0KeuBmklkw2NvHrQMVXIGMcU4Z25pA/ygdqHIHOeKkYZ6+9IoIUZGacGDDgUo4+brQI\nbuPpRxt4anGQfMcZpvlgAe9AaD9wxtP506NvlOBmmSYwBinKuzA3UAC4J5PNK2V6c03Hzcin\nqp70gHcqhI6UxeV4pw+XjNRruZ8dBQMmUDIzSu2xvpUKOS3pTpNsy55oAdHuk+6OTUrfd561\nFG37sKpwaeQVXk80AAjHJyM06MNt+lMX9315zQWyvFMSJASxGBxSv8vOcUxZRgDG2mu24cUh\nEsYLMc1Lj5cmoIWKqMmnsxYkYwKYxyscHBpdpOD3pqr15o8zLDPagBSVUkEZNP3bgMdaYPmY\nmnNkLkdaAH7ugYZpv3W56U1t2zPNKrHyxx9c0hWH7T1Xp70rKN2e9RqxHy4qTaaQxVzu5OKV\nSGbbTD87dOaTlWxmqGWNq9zzTVxyvX0pACOn60g+VuvNADwAq880isqKDk0m4rnHJpdu4cnI\nPagCRX8xePwoVD0NMRQrYJz709v9mgCN/lf7uKlX24prqTjmnDkelAEvK4z1pdny4x171Hu5\n5qTJ2gg8UAPZWXAHIpq9Tnr70u5tvBwKb7nk0gHKwVipHyjmnIqyMCpwajLfKcjHbNSwrsX5\nTxTAev3Txml5am7j0UYpY+c96aGPjAH1py/eAqM5C4H405D3AOaQhXQ9AeKfsHTNNU5yDT1x\nu59KABV7g08DoM4qPO0kYwaXgsMnkUgHfxH2NOAPamA1IjbVyaQCMpGT2pv3uQM0u4t16Ucj\np0pjHMuThTinc9c80zGGFO3DOCcGmIaep9akT361Hu284qRSc80gAkqfWlZ9vTrTQ4ZsA80M\n3zAAYpgOZTs3A801mKrn+dSfKq4Y4qOQgqV5J9aljHLlhzUi/dwfWoIWIXrUwbd3zVBYe+Dn\njFAYbQDxTNw5z+tO42880mUL/F14oZR/EKbu9KUMWoF1FVflIAz6UjY+6eTTuRjB470rbS+a\nBgq5XrQvzHA605SozxSRt1OKkTHbCRjOKXb8woT5h/jTdx3YzVIZJt5zmlJG30NRnI4p5bav\nIpAAG3LE07IfFIxVsH9KXO3GBTHYVUI6jIo3be+akVvlOelNG3OccUMGG4svXFLuLcmmnI3Z\nFP4ZRjrQDAbWGf4qOGPoab15p38OcZNBIuRu46UBg350m5Rgd6Xb3ApdRj+uMU9l6Go9xXtT\ngxP0oJHdGwORSsp+XHHNIjA9RQR8wIHFADlG4nJ5zSO3T2oDAHOM0rSDbkjincYi+uc0jt8u\nOlLGwOQeDSOdy0C6gudvtTsfKD0qPJ2+lSquVHPJpWH1HbSFBp275cYxSFtvy5yKRpA3Q4xQ\nMfgLy3SnnGKYvzY7inJja2eeaAHFcrweKTnd0pPftSruzzwtAhyA7TkinZywpMblIFCsq8Hk\n0hjiu0ZFIzA49aduHvTI1O4k0AS7ttDHjg0zllPal5244pAKV3KKB2JGKQEo2OoqQjC9KYCs\nvygrSgHbz1pq5280LnnPTtQA8c8Zoz2NM2/NT1YLgnmgBpUKM0rN3xxSsR2FClXzxxQAqsBg\nEfjTt2G4ximcKvFG0cNjNADiN3J7UvH1NJwxOKfgZGOtMBm7jGDTw3OM/hTd26lxznigB3uR\nimljnnkUpyy4NGN3HTFAhdvFGRnAOaRenHNOKhmGBg0CGhjG2OtA4PNOVeuetI2QBkUDAe45\no4PbmgL/ABZzQT07UMQMeBxzmlyA2B0pG96TacEH60CFUbSenNGRjApF9+tDcNhRTuUIM8k0\nbh0AxTgQUODjFNB3A0xXFUktntQHPTvUfIGaVRuYntU9QJOF61KrZX2qH7w5608DK8dPSqJZ\nIGDKTSYDKMCkjToBVqO1LYxQDGxn5dpxViO2L4wcVatbENj5ea17XTxu+6MUWIZTtbPaAVGf\nWrkdkxbpWnDarCvAqcIAM4xWpJXhtgFAxgirUSoq+9HGM4oUBl9KBC01uDSZ6g81HJIR25q0\nND2kH1qBpOp6Co5Jiqt2NU3vAw25q0IteaCCOrVXe43NgCofOx0qCS4C8Z5p8tyWPkYc84qj\nJJTJ7g5IByagaQbeD+ddVOm9zknUWxMl55ci8981tQ+II7aFyACCOc1yNxIUyTWbcaoYt6H0\n711xVjklLU5nxzeKzXGAQhO7kcV8o/EjVhd3twASBux19K9++IniBLW3lTfyy8nNfK/iq6e6\nunkzjkmrlEcTj7tmaY85B5qAp1BIqw2zzd5POMVX2+nLZ61zSNiRSGVcdO5NIzBpCCDuHSmx\nsfMxjjvnpUrSbvuj5hxuqBjNqLJyDkj9acsbKAOpPOKRlZvQmmtJvQ7QWNIQnl7t2D3pQysc\nk4CjGKVlRlSQHLdDQ42ZXsfamAgbYCcYPYinoBgO3T2pYiVQ714pJFGzGeM54oAecKf4Qc+t\nMClZJGbkHnpR56tgY5FIrEyZXIU9aBh5jEkDoRnFOkbzNu4/LjBpApZsnt+lNkcnAxlj0xQI\nkRlTAZstnA4pwjLctwM/jTF+baCuXHWpsDf6+ooAY0e5/vbR60hwGAJqWbbJ8o/KoGUSqcHB\nHFAEu/cuV455z+tKxMjZB5A4NNtxuAycBeo9aaygIw3bQxpgBzGpYDOe1CqY1BHLd6kZRHHw\ncjHU0yOMnGWAbqRTGKPmUsozJ0PNSLKVj2k71HHFRsoAwRg5pzYQ+hb8qZDH7gIyQMj3pil1\nbGMKecUiBlGG45xik+Zpm3cKB1pIB+xm5wFGe1O+ZiCE+YUbhvAA3gUYd23Ifl9qYCszKpGc\nMeTShg+cLtOOtM255cZfNJtZWU5wP7tBSHN+82AtllOc06OQy7f4VXIzQyvHHlMZznJ7U3YW\nO/dgfzoAliOM54z3707cyL8xz/Om7C2G9e9IrhWIPzfWmKxJGwBweBU8LBF5ztzVeNg0TSfx\nKcbanXzW2EsMdaYyz1PoPWkUjaD1HrTV2zLkdQcmkLEcMODyKQDmZJWxjHv60RyBmK9h0qNc\n98celSKqMu88GgBygMpDE8c09GdVAUjBqASlmOThcU8SCNcr8w7Y6ihjsTIvmIwbjFRbtygY\nzzgU7zMjJHXqKerJI25VwPSkDDcUBHbpTlUMD2PWoxLlnBGRjINCncjHO00DJVwpyVyaRmG/\nKjJ7rSQuuAG+/TWjPzHqc/dqg1H+bllxggnt1p7bZGyvUHFRLtjj3MMPSRMWjOFwSc0WEOG9\nVIznnn2p8aiOBkduc5pm4MxA/OkZvMU542+vWhlEgYRjjjPan8rg9ajUhlU9SPWpAy7sg5NS\nIkRyxDMCfajz8OAQePSm7hu64FIrNxj680BqSvu5xz3pI1LoSw2570Bgy8nIHPFJuM0eVB25\n6UCJcbcBufenL6Ofl7VECscykKcdDk9KV2T5lzz/ADqhDl2pcZ3fIaj8tm3DGFzktTY2BjAb\n5cfnU6kjhTkMO9ADFkLEZ4ToPwqYMIj/AHgai8sfePOOKRcvCqbSCxzQKxNIxaMfLg5pcbY/\nnOW7UyJ90hj2kIB1o4dhu4INAyeMGTr8pApyjLHrgCojMJl6bSD071PDgo3PzEdKAI12qOM5\np7khlfFRpMFUkgccULJ8p9T0oAVlImJByOtPbAUkjk96aqlsLng88U7ceQcEU7gLDxhc5FSS\nDkAD61GqjCqMhs5okmKyEZzmgVhfRQeKXdukDFfYCmKHXvipNw4HrTFYdsZBvHHP3RTVlCt5\nRAP8WccUiSSclT7YpyqVUfLvJpjsSBh6jHrTWAVfmwx7YpPLVVLMMr2ApBhuvI7UCsOVgvzO\nCD020rNuUqowR39qVZGkcLjgd6a0e9nZWAGKBWFh+ZgAOB39KkRsscEnmmo/lqvOSeMUu452\nkjPtVAPfaCADinv8sbOmOBzmo96tjjnPWpS26RUCgA96YA3ysnPDAE0co/B+WljZTgPgDtml\njZXdzjocAUxCB2cZGc055Czr8u0Ck3Oqj5Npz09qcFCq+OT1waL6iE7Ag96dC252DHLfyqNS\nPLHBPfIqVW8zgJg/3qLjsgk2RKOfmbqKFBl+XGMUnDMo2gmnQ7o8k9ziqIcSa1mZevA6ZrRj\nmJwJBxnmspfmyucAGrMXmPjcMmtIkWNQSKWATgGpGHmMNvGDzUG05Vlxgdqm80rlivGMcVqS\nP3lskcNnAapiwbqe3NVnXy1XLYJ5xSRtu3Keh5zTFZllfmb7+RRgbmCybj0NQptjYFQW461M\nsfVuF46VQWHyBwqgn5RwKYwaT5Q2GH8qWR1ZApY49aj+VcsPTGakCldAMvTHPWqRRmyR61pz\nfwpKMA9xVRgUZwDt7AUmBSkhKqFXHXmlVlUkZwOnFNZTG27qO9IHTcGA2g1IyfOxgqHccUqy\nbm4O0gVDHI0cjOgyaGysRbeGdvTrQMkRg6mRwcqe9TMoEKOjnJPK1VDFsAnJx1q0twUiQEZP\nsKaJJJAyq245A6Uke4qMEEEZqLzi2QeN3rTirLHhOvpVCFXzSxBPPqKeJ+2ATUZxuUs2D3FJ\nDDm5d+gxx6UCLfnf7P60VX3yf3B+dFAHx6JCmGA+Xpn1oWUKc5LDOdtCr5inLADrk0kY/iJ2\n/wCfSvnD12Pab5Tkbvp2pq7mVVDkN1OelNhD/MzDDdgadKS0LqzZ57daCbin5F+h60M6x/OX\n2s3H0pxxGqF23ex703cPMO9cMfX0oKQkmQFXcMH+P1qTc0kaKcZUfpUe5WJ+QllGaepzgMmw\nryPpQAq2/UgcZyTntTmUZIzkE5HFRsCqlxkvnP4UGRsBj0J4NPoAuUkQBR0PcUoLoMAA/U9K\nUyHcT0Q8cetJ94LkkBTijoAQ7m3Mvyn6VJJJuaMKOO570yPC8IxPPWmeY0e4Mu1c9e9TqBL8\nnzFWwM9BT9yyNyvHTHeo41CZ7A0ojYHcHyaAFKnzFRULJ160qeWPnX+LjntTI2kSb5E47mhf\nlHGOKOpLHqrNmReAvGaWJVjzzx1x61GxbaytwT0x3p0XqVxjrTAX5ecrg+lKuxSHRsH0xRky\nMxx170Ky4Knhf71IbHNIrLuI5HPSk3CaPeF2exFJHuZVXgcZpyyNPtCjtyT7UISuO+8VPBzS\nbmO/ccKT0pkbs2doGF6UrSFsMOc9V9TVA9RyybXWQE46GkyZssw2LmlZjsHRTnlfSlVkw+4n\n0pFWBZfmRk5TNPlmAlQ7cD271DGQnA4IFPh3nbuG7B60iWiSP/WMxwAePpUGpI8NuMP8x4wP\nSnFGeUqwxz2pt6WFswK4Pr7UxpWOauE2SctkCqsnDLtXnOBmrkhDbckEluAKg3fMzAbjnpSN\nkyszbZiHkw/rUkLeXIEZsqecZqFIzJK4K5IOaRmCyEnkgcAjmgsvx3gaRlZgBjk/yqRZhJgg\n4HasyReCGPJ7CpPtG2Mb+EA9OlIDWhvXjyfMwa3NE8RXmj3H2qCTYQMNu5GPXFcdHcDA2nPq\nT6VdW68zCg7lAyKQj6H8F/Fme3EBaQqzYIK/xH1r6A8G/G6G4jG6ZI2yN2eua+CrXVmtlSNH\nZN3Jx2rq9G8Uy20yFJCyr2bv7VorNakSutj9JfDvxIguo1WSUCdmwirzuFd7pXiKK4AbcrL1\n68ivzp8OfF+ezkVYn/dsMq2eUr3HwP8AGiKXCNMzIBhmJ6H1Nc06b3RtCp3PsqzvBLHkEdM8\nVaiuN2MHnvXi3hf4jQXEYHnjOONzcEetehab4hiuFDI6N/tBuK5ddja6OtWYq3FWIbjbyxxW\nCmpqxzn5vTNWFvAw3Zz6rQBvx3m1hira3eFwTxWBHdAY6g1PHdBm65FBRurMrHBOakyOOeKx\nftW3vzU9vdeZjLfnVAaPrjmnqp24qskg6g5qxDJu60DF8ppFx0oVG6Yq/ZqsjBa2UsovLUFA\na0IZzyxk9TUsdtufGdtbM2mR7coMNVJYzHJtbikIqSWrxrnOartJ8u09K1LlwsWD17VjSMd3\nPSpYx25W4IzTJLWKX+AUgb2pd/y9eKQWM+80SC7wWQHFZNz4LikJJCkdvaun3DoKcnPOaCrH\nm+p+AIXGPK3c8elcrq3gby2xs6H7oHFe4yRiTO4A5qpJp8U2AwHHAzTUhWPnW88DSLE+1GXk\nnkVhT+E7lI2bZx7V9O3mgwzKRgY7ViX3g+KZBvX5f9kVopkOJ8yXGmy2vzlSRnGDVVieQRtP\noa+gbz4crcQyIV+8eM8Vy2p/DcbdqxjevHPOarmuTY8jX1IyKevy+1dlfeAriGXG0gew4rLu\nvC80Z5DY6ZxihCszD3jPPSmNjt0qa40+a3kKsrHb3xULRsEwRg9uKq4DuMY70jkjBY8UwNxg\n9aGPy+opC6jJGDKccimwn5do4xTpMLgKPrRCh3elIYeZn8Kcr9yabIwXcO+KYmGUZ5piJN5B\nIGSKdy2Oxpo9jSc8kmkwG7trEnpTdxbIxUnEi9KT5RkmpFqIqtnJ4FJuO4LjjNKWJUDvSSbu\noqh6jmcc9xTI3G7pzS7QVyacpGMgUgGq3zd6d5m08Him7g3tTWUrj5cjuaQajmJzwc0uze3G\nPek3c5ApCDjg80AKMcgdPem7gvRc0rNlM02JXwdw5pCHsecdKOdw9KYF5HrR5Z2nLYNMYrNt\n57Uud2Cp5NNaPcozQrBc8UDFYZb56GYFRjI7UN97JNMC+vrQA8Nt/wAaTDdjSMMMQeQaVV+U\nAUuoB935ifrTkA+8KVgMANzSKu3gDNMpDZtxUDvmlRP4s8GiSRiRkUgyp2ngdaLgPYcfepVb\n5epqP5frT2Py8daRIDO3GOTR93HrTXY7+OlJ+OTVDBs7euKchDEZNNbG4ZNKqjrSEO2gEk9K\nRsLwWpuNy9aPegYjY2k09ANuaYp52npTgdnXkVIh33l4oxxwc03dyCBwacymNsnpTAcjfLgi\nmKxbjpTuwIFNJ+Y0DBXbkVJ1HWmLn04FPQbl5FIojklRRg/M1RR26eZ5jpz2oCZkPy4Iq0yn\nAb9KaARm3Ee1P+Zmz2pu3Cg/zp+fypMB4U7TzSMpUDimrJ04zSuxb2p2AFO3JoVtynHWmIDs\nbnNCqduO1ICT0Ud+ppMbWK02Pt81SFfm600FhFXnJNLuGaZtKkccZpRjmqCwm75vm6UpXADK\naXYG47Un3XIxxUhYczBwOKaMrk9qRc5IqQZ4FBQ5GLHjgUsi+YBnmjo3FDcAY5pAN8sL+FDL\nu27ulOO5+nSnMv7vpQMZwrDbTt38Pc01sleOfYUirtGTwaAZKF24bqDTQ27OeMUnmFlACc07\nCqMYwaCBA29qPl2k5oJ4xil2hqCh3UClfleuKaeF6UcD7xyKAFRt3H60cNnmmMwXBXpUgj3f\nMD8tACBR1HWpOQuRx60xPvelOHPUcUgF3+9OOWznrTVXnJHFKQeTmkDFKF2BzzRtO7AGaaoI\n5zmpFY7SB1p3FqG3dggcCnnazZHSokZucjingFUGeM0uohW4b0FPXLcA0xlK7QehpzOFbp+N\nMoFyuaUfKvTNJuPU0u47eOlAxei+lO7hgaYvzEelLtI4xjmgXmPZmK89KjaVtw+XC08yFUxj\nNLtMkfXFAxSCfTNOUbOTSIuR6091HGTn2oECk7jnge1CgbskZoZemTxTZN20bMEUMY7JPQ0r\nLlQf4qaMcN0pW3bcgZpiHNkL701flGDyaUE4BPFL977tAD1Az6e1IGwTxk0MpalxQA7A256U\ndOo4prHMfNCk5HcUAP5Kn0p8YGBzTcque9NX5j6UDJN+PUrQsm7k/hQw4wG4psa7jkGgCbO5\nMsOKkUkKMCo4W3ZVuaeuVOQcigQ4/KoNOUlTuAwKjz7E08M2enFAx/mDn5aVWwOe9J654PpQ\nG3dRQBJ93ODmlWQbRkc1HvBHAxUuF2jPWgBjOeT60ICrZ4NC4Y8dKRD+85oEHV89ql+8Mdqb\nuCgZ55pVyck9KAFbGMg0pbauDwabtCnlcrTmxwccUAKvzd8UPgcmm7lP1o4ZsGkA7kxjHNPT\n7uGzUcZ25HpS7iDuNAx+0Qn3NOPLZzUe/OMg5pzAEA9DSAVl3AZ9aXdtH3abuyM96c0nyqDw\ne9ADduecYBqRRxxxTVIpclsDvQUOZvUUgb5aPu/WkXv2oJHqflzTo2Clee/NRj7uKeqZ54pj\nJWUc4HBNM+7k4o3/ACkGlUe/FGlxgsm3r3p24KeO9JtUofWkZeBjmgB4Y7R29aRsbuB+NNz6\n04HH0pAAJIzmn8svrUI+97elShyzegpEjhwQdvNS7g3GOaiD+nOKRpNrZxQUiVCW3A05ehGM\n1Grd164pVbgHuaoVxxmwpzzQrfKvbmm7uxGTT2jDICO1IpAMMxxShsLwKjTinKx5GKYg+9yB\nmjc+3HSpFZUXHQ01vvZAyKT0AAx2gnmpd3y5HSmRtnginj5VoYgznrUq/KvTOaqw5BO45Oas\nE7W5pCDdtGCKUZb6UFlbIFNzt96YxSvPTNDfdpFYueDSsPloEJH8wyTkU5QWbJyBUe0luDip\nvmOO2KEME78U3yMPnOafklqTO1simw6kigqwp/3WJ7Gmq3GcUDLH2qRihge2M1Iq/wC1mo9w\n3AYpHYiUnoKCWTA/LgUnB5NMjPIJ6U89DigofwflBoOe1MHKjjBp3I55xSbEBbb15pN2e3FJ\n15FLtJ460xkiyD0pzHPfimgZXGOaPu/WgBS3B5xQW4ADZoLZzgYNJjPbFADlYpnuTTj26U3B\nzkUoUd6QCt8o4PWhGx70BeRSbWzx3oEPXg8c04n5sdBTQP4Rx60ZJU5oQg/h96eo+XPQ1Grb\neD1qSmigVTSkjbg9aaPv9eKUY3YPSgXqPDbsZprfe4pG+R8Gn7vl+7zTGLlVx+tLnqRTC1Ir\nfL70MBrO6/d5PvT2kJ6igLwfWolk3Nwdw9aEKxMvIwTgUkh/d4A/GmtINuMc+1KuXHoKliBQ\nSM9aOSvWjJVvak+8cLVAKG3fUU5lLe1Jtxj1qTqOuKAsQgZUjHNEfpTurHtgUbS33Rk0XAYy\nnoDRzwB1q3FZuyA7T71YXSyxUhetAiikbSYCjnvVyGxeRvu4rXs9I289G+lbNvp6M2SvAFWI\nwLXTTwPLya07fS2Ybgu0Vrx26qwwuMVZAUUxFS309I1Hc1ZWEKSAMAU/pg45pPMC5zTMxM/L\nSFiF54FI0yqp9artNuxmrETtNtwMUzzc5Iqo0x65zUXn9s0AXGmC9+aga63knpVRpueTxVeS\n4AbrVICzPIVG4niqDyBZKJrjcvXNVpHB71aVxE0l0V56iq7XAbOaiDbnxnilaMMCa66VO5yV\nKgsaF2LUSKqqBjk0eZ5WNtHbzHGFX7x7V6MIM86c9TPvh5cLM+QuK878WeIY7CxkkLZdRkV0\nvjLxdDY2skRIVgM7e/1+lfOvjvxr5yGNXOGHPNbxp9zDn1MHxt4uOoNIDJk4zt9BXjutXrTM\n55wv8q0da1ovcMSCV5Ofb0rkbi6e6kkO/apHyr7Vz1bI66Y0XCzNlBkHjFIqOFPzYOegpLeI\npHtwFHWnyKQu7PFcZuOE3PT60ij+H3zTFcZAxmpFkZiAV2ioYxsa7pCCDinNJh8Hg0m792fm\n25pWHyr3I70IBxx5fy9c9KYuOcHPcnNACsGH40iqGUOTtbpmmBJG3mN/s0m75cDnmmbTJwG9\nzUvkqRvT5gO3vQAiAsoygHoaaxwDk5we1Cn92FYYK88d6WHDxls5UH7poAViByMDjBB609VP\nGXHTr6Uz70fKjOaXjZhR83XJoGCysdwAyOm71p8Pyr1ztNRN8uAxwTzxTo2RgRv560CHzNtY\nD+InPvTY8fMW+Veg+tKrCTa38YPWlkYNIADjvQIVYpNuQelP3fJk4JXmo1Z1bco+U9feneZn\nkjn0poYrZPDLksMjFNVj5atnHzbcUPMVMTDrnp7Uj4ZR823JzimANhdxYZOcU9G2xgsOCeM0\nyTa20D5sj9afGxYHK5A4+lMQ8YVzk5YjmmyN5agfezwaZwvJHPvTmzwdpI7UWGSrgMFAwhGS\naDJxwDuz09ajO5uSTx/DRHceYxOzjGBRYCQOF3HGCRSEu20Y+bGaFk427Tn+VBZUJDHI7etM\nBV5+ZuvQ0CMKp28DvmmcKoAJPOcU5u7ZGKXUkerBoxlSAvNNj+WYHqG7U5DtzuYEU4Oq4OKO\noxY4wuSercVZVhCi4GV6HmoIWxwRuU96czDdtC5X1pjJVfeG2jA6UcRsMvlccUzztpOV46Db\nTVXywC3IzyKQEy/NlV5I5zT/ADOOAR6io1Z1cgjavan7vLUk8k0FDmdGAODk8bacqhpMAjp2\nqJNqsSTl/SnR428cHOaBjxiPGQTzS8huRgZpsmUYs4GMcClVlba27t0oJFZsScDatS7vmbBC\n5qJzgbCeaa0YkbGO1IdyT7knXcPapt7b1IGBVdWBtyVb5wcYNKshwF3Z9aYE1xI5BIKkZ6VG\njFQck7j2piqi5b5s0Z3kHdz6U7h1JdyxxggZGelCqXbd+NNg+ZnycH0p8ZPmbW+UsKQwKr05\n+bninbQjBlH1pdgV1y3Tjio5B8yjJwfWgSJ4+hYgFKYCA2VPz54zSLIE+XGKYrK/ylfnznNH\nUZYjZpDwdvPNOUmPj3qGEbAw6Zp2VjBL/MfWn5kksinofunrzSrjbtbkUyOMHDHLj60pUEjs\nM9qVxhuC5xzUitnDdFA5qKZgrDYMkUSExoMNljzxRqMlkk8vAxknmpAxAUL94d6i2+dHubiX\nOKRGDZKHBXg0ySQE7mAbcQcmnOWYrxk9c0wEiTcDuOMGk3eY2CcY7CgQ6SQrMWAyvrU7A7lK\n/wAQycUxFKqVC5B/vUiyMsiqfurQAH/Wbhz7GntgNnJOeMUxsMCAcEn8qcFCsp7dzQIccQt8\njdetJGx5JXnNJkEkjjnANOYeaCSuGHH196AJP3m1HyAvX/61KzHPmOMDqBUPlqgjQnj0qTcD\njP3scCmFiVfnBz1IpF3bQu3oeppMuwOBtOKb5p24xteqQyffhjlQBUahtxBJxngUjM8mO5GM\n4pTNJk8dsD60AGDI3OQfU0u5n2rtxj0p/mEBRj5sfNSJwx525GKBO46TcijDZz2HWnRqrbkV\nfmIzUS/McjqvHNSfMg3EYpiFhj5z3WmqcuzKvzDnmpeRH159KZt/dkE8d6aEOgx13cn1qRVX\nJ5y3rUK/vNrEc9KewKMU6Ec5qgHjftCtzg8U9nMwZQQoY81FGTcKNjYI6k09QkbDeTx/OmBI\nR8qIpJbHXNOMiu+CcEcHFMhI3nGemQKYzKJOOC3XNJiJ0xjKnjPSnLl1bBI+gpFjzONvCgVH\nuBkYZK80LVajJlZowOm3PXFJCxVmduM9FpvnH/V447UO2CrH/WL/AA1QEgzIdoba/XBq7bSN\nOoBba3SqMjGRwyjDdffNWbVXXLP19KpMzZpQsVGMZbvUqkspwRj09Kgt5ykYJXmnwqzHOQuT\nWiZBKseVDscnpzTgobqdgprK0mQnOO1DbtoB4zWgDmUHhGxzinlVwMHLZpm7Z24Jozhc4zzm\nmTcnVlbIZcY70xlLIRt5zSNICo3cCjcGx83NAhkgLEZOdtZ7LulJPJ/vVo7ygYMM5qlcLu4X\n5RSYzPkAc4D856etI2WIAUHkZFTeSI33DrVdXO4kcsDUFE27y3OV+gquztuJYfPT3PmYbndm\nnAIu4scyHpTARZMyKqdSOlPYy7vlIB6HimKBww4ZeaGmIlC927imSyfd86EnPagqd7ln+VqR\npNylcfd4pvy/3jiqAcj7cRr8w6+9TLlZNxbAxiq+0LIDnD1JG5OQTkE81IWJ9yetFN2j+9RQ\nKx8cqoyfQck1MqrMsjAbZF+7z2pnG4AMp3Lnn+VIqiRGVjnPfpg+lfPHqMVhuVRHyxHrUg3J\nEdyZZuCKYu1JAzDJxgAU4SMxYZ57UiSKOOPd3bHByelSjcWxjApnDIoTqWy3FNZikgOCQW59\nhTLJtzsvDAMeD/hQ+9nGW5J5+lNkkj+8eFB4NPRF3FgwBbpmkIcyttJUhj6elMG6RMOArKc1\nFET84IwwOM1NJtUruU9KYXY7lnaQDZxwKRWMmwk4djjNIrGSE7WwW9aRim0Do3SgLjvMG5dv\nIboaPLG0oQS555pys20YVQMYFMVHyzA7mA5I9KAJOFZV3ZyOT2FI37vdtYYPTPeiPlWU/cIy\nGNR87QjHJH3aBk25wCQcDHNI0mzgrtLLxQzEYdj8/QYpdylhkbj/ACoEIp8zGeXA5pVY8pwO\naZ8zNtzkjoVpWUhi5GCG4oGDMsce2Td1wGHr6UvEezDb16kgdD6UPljkMR3PFMbPB3eXkY6c\n0E9SRsts3DLZwMU7cFkLkYdeC3tTRnyyyHyz/dP86TzDIu0ruPUsO9ADo1SVmKclfvMDwaRl\nKjdGgODnmkVkVg0ce0dGBpV3Dc+f3anOKBWHAD7wySeSrdqI23glj8g5oXJyGPDfMTQrGPIZ\ndoYcEdqBj2ZWDZXacDFKGBm8pQQAM7qhaQFgmCpXna3p65qQ53Fs7STwR6UCHea0kbbjhvrz\nUciLKpDtyF6Z5NPHlLNhweB+BqFsSbuq4ORQOJkXSyRxlQi+3qKqGFoNqlVAxyc1qaiseBt4\nfuT61RMTqpJGcnvQdEWVmjZsKAq7jkkGoJYmkYruAC8gmrskf7tt3LAZFQMpkfaRg4pFFFcR\nFsbnd2yT2prOwVlbuasSbFOBnrgiq8hLDGVAzjBoGNYmOMKjAOeM1bhmD5xgBRgsfWqaxtuK\nlgwHT2pqlQrPtZR0LZ4zUiNaG6O3AbCDqcdasx6kQuFPydQfpWLJK67E4Hy+tPjkwoIX/wCv\nTQbnVWmqmHEknzDqFX1rqfDfji5tdw37GY5ye1eZrcOsgAbK9cVftLoL8pyW9VqiWj6R8K/F\nifT2DRT5Tdghvu+5r2/wz8YIXhJkk2sOcZwv1r4YtbxrdiY5N3OevQ10Gm+Nri1b98+/B3EK\n2Pyo5Yvcm7R+iOkfFK1a3hk88bHHDSPg59PpXaaV8QLW8wv3Xx13ZBr84tO+KFzHKrRSldvU\nSdB/SvSfDHxqmktvKSXMgbDO/X8BUSoxtdDjUZ99WviSOZR82D1OTWtDqyTKHVsfSvknRPi8\n91bxPJMrCP5X2nBGPavT9F+JkEkMZWQMMA7dwBxjriuJwkmdUZJo9wW6EhDM3NWY5juAyR6V\n5hp/jyK7j+WQKeuD1/Kt218VxsFYyKeMjDcVNmjRWsegLfE4UnFX7W6XYpI5zXC2/iSKVshh\nu/u5rVt9YGVLYHfrVJMlno2lRCTBzit5V2KADxXCaNra/KA/PXr1rr49SUxIWHOOa0uSyzNM\nIhk1z95fo1wcGmapriNIyxtx0rn3uC7ZzzUuQ0jZmv8Afj0qDzN7ZqgLjbweaPO+Yc1JRfMg\n5FJ/BxjFVFm+bPrT/N+XrQVYsbsUq528VB5g2g1KsgpASbiPpQKYrnbzzQPmx2pCJmb8aTd8\nvPSoxnPXinbunH1pANmjEigleagk06GTJK8/SrJ9RxQvHOc1QGVP4dtpRgruH93oPrWLfeCb\na5VlKlj7mu0VVYg0xoxtOBzmi4rHlWofDqGRSGiAA79652++GsS5MQ/Blr3OSFdvIB+tVW0+\nKZSPL5q+YjlPnHUPANwrHYimP+8P8KwLjwhcx7t0b7B0IFfTt14fjZt4jUevFZl34UjmVtq4\nJ68U7i5T5jn0e4t48mJgPVhzVTypFU7kYYr6LvvAy3C4KbgO2OK5rUPhrt5yvJ6Yp3FY8UZG\n7qeagZjHkV6lffD9lyBxzjpXP6l4BuIsBNx55wtO4WOUjYcdzTclX68Vpz+FbqEAlW/rWfNp\n9xDtGC30oFysbt75o3DuKa29SAUOGGenNN3HpjpQKzJW+tMXdtIzkUit60zcF75FAEkbFcZ6\n1K4+XOagJU9OtP3K3yg4pgJ/yz59aeZiwxim8betNLbvlXgUgHcAGlx8uc0zIVSOtNDcc0AP\nKjrTQ24HacU9cFcZqMYVsA8UAObrmk+9nNG7aMdRSMwwD3pgOVtuARSSY8zjpSyZwDSFvQUA\nDFWXpSdhTl6Y2070GBipYDGcZB605W+UtjFNQdttPYNx2FMaGqfm+brTlfaMHrTM7uMZpWxn\n3pXKDcAcnpSAlmyRRyFPpQuFbPUYpAJz9KXyWXktk+gpdwY0ojKjO7NMkRiMgEc03luRyKcP\nnJFIy7SV7UxBtD46GnNhlIzzTVbcpGMYoBOOnNAD1UdhTOWUg9qkbnGPyphXqR1oGKv3QxpG\nG5s0u7C460n3SGPT0qeohYyfoKfyysD60xs+nHtS7vlzVFAGLcdPpSrlSAy/jTdvfFK2Rgmp\nYDtu/g8VIF6Y7VEuUfcRxUitt5PQ9qBjeACfWkbIJGR04o3DBwvFJGNykEUwB+dozUmcfL1F\nNCgtyOlOyrEZ4NIACqo5PNJuA5xxTm+9k80n3uOgouADDKMcU88LjNN3fNgUZ3Kd3WgfQaig\ndTxUwA253ZNRfdAzTvlLZ+7QIGYlcelN6L704txxzSL3JFMBscv7zZipmbapJFRKn70NjJqR\nQB1o0AamZPn6U/aS3oKUA7vRaftB70guIoNJuC5yMCnrnkVGzfK2ecdKQx8UhHFEhbbkcj0q\nGJjt96mbryaQxQo44IJpCufrTd3zZpS3zEdTTAcrlRytKc9MfWjcWXHSk+by/WgA3UrfJ24p\nn8NPb/V5Jz7Uhi7gy4zTR701lDYPINSeV8uSaBDdy78beKft+UKKGICjApW5XpVAKPl60q/K\nnPWmKCPen8heaAEDls84p+3C9aQrmMnvTd21Rg0rAPT72MZp427ulMhf5sKMU8/Kp7miwIau\nFyS34U7HmY4+X1p3G3AH1pqnPHSgBygbdp5obgYB4oP97+EUBd4zjrQAhYs42809vk+tRxjY\n2O9TsPmyOaYhP4cgUpkMg5GMDFMOelKq+9AxVfOBn8KVHLccAULgSdOaZuO7lSBmgZZRmYEr\nwO1IpGMHk0BmbkDim8jnvUjsPbHTqKFCspBo4VfekVgq8d6BCKvYdPWl3Fc4NOGcccCmqRwS\nOKoBysWHPNSL8vQVH15XpS7mxxUsB0cnz4I4qMzgOUbg+tLuH8QzUbxq2etAhXk+XaDk1KuQ\noGeah8kY+XrU8cZGNx4oAkX7vIp2AV4pp4zg0ij5SRQMk3bVxjOaFbap470m7tmhVIBNUOxY\nXGOKN3IA6VFuxipc4/GgViQYxzS/wg7cgVFuIyD0qRXKjg8VKCw9f9ofNQqhhnOKaxMjAZ5p\n7qFAqgsPCBVzSH5j6UisPWkGWOO1AAwOc5/KnI/cijIDHNOT7pIGRUiBVDNUm35faoRleR0q\nQn92CvGetADzwvBpOFAHUUxlPY8Uo4weopgLMoU5AwKI2Ugk9aQ57mmA5wRQMmjXapzSHDHj\nmhm3cdqSNSPunBoAfuOM4xTtueaYGJ5PQU/dkZFLqGomNy7e1P4bII6U0A5HpS8cc4FAwyFX\ngU7zOAcc0mBg4HFNyeR0pCFP3sk1JuG35lHtTVxkAnigqOeaBAy7V3A07eCOKOFXnmkVec9a\ndykIx3Hninxjb3zQyjcMijjbxSGOVSrZzmjzPyFORgw6U1iu3pzQA9cMPQUq7doz+NMx0Ap+\n3afY0CFYA5Ipit19KWMlt3HSjBJxjApdQsABXBU9adt/vHNIJMMF208ryaZVhpLdjipFJaQZ\nGABUAYiQZH0qdW68cUCF2k96mXjjvUCsX5VfrUv8NADZOvvmjPzZ6jvTuOMjNM6ZOKaARTyW\nxkGpUbI2g02Pgg9RTuN2cYoYCncM7RSscKe9Iu5WznNIx+YUgC3HHPODU5bcxzwKghJ53DHP\nFSlQe9AAAetByV6UFjwBSq3PNAhImAXkYNN3E/Sl2tzQsZVc5zTGPVed3TFSbvm9qjHzIAet\nPH3TzzQIVV+Y4zSYxxilXNJ83JxSEPXKryKcrAr1xTF3Y5NBI6A0DHljjinbiFHvSLnaO9Kc\nqORQArMcZpd/IIH1oXDA00KMn0oGS8FetIwO088Ui7VU0oHGaAYiru+gpycDFJu2jilFAkAY\nhh6+1SY3c+lMVcD39ad/CPWgQrfezStnGetNXlvajlevINADlyxHNOz1FRg4X3oky2CODQND\ng56sMGn7vkyKjboM805c7cY4oEOX5uAaXbuOaRcKR60hUr3zQMX8KkXHFM7inYCsT1oAX+Ek\n9c0L82CKY2S2SePSnRnC0DY7buznqKN21hmhWyxNNkZefWgQrZPAGfSldWXBz9aFbIBpiMGy\nwORml1Ad5g2nFRqqxZA4HWljGXzin4C5zzmmIF2PjI60sbLkgjFN2kqNq0+GEsxLA0AxD9c0\nL94YFXI9PZ9xIx6VZj0t2QcYNVYNjPUBj14qVLcydvpWvBoe1RjkmtO20sKACBxQLmMCLSzj\nJGav2+i4wWABrfhsVHJAqwIRgcYNBJm2uliFdpGWq5DYqrAbRVrb0Oc0oJ6gUwGiML1UYo2h\nfujinmTPU4FRb+TjpQTqTKwpGb14AquZh0zg1HJcDbjOTTAmeU7h61E1x8xHeqpuz0NVZbzB\nJBzTEW5Lrr2qCa73YHQ+1UGut2STxVaS8HmE7uKtCNOS58tck4zVVrsc5qhJceYpy3FVJb4L\nkU7XC5qTXgPGcCqj3W5utZclw0jcNxTBcEZGefWtoxbM5SNJrsoCMZqL7VvI4waz2uNx5NTQ\n5bpzXfSo8xx1aqijRg+bgHmnvIFQqPvVEoEce7dg1kXmuQwhtzbcHrXqU6NjzZ1rmysyKm5y\nBjrXLeL/AB1HpVjMqyBGAxjuc+1cn42+IUNnblYyRLnhc4H1zXh/ibxxNqTMrudpySWPU10R\ngkYN3NTxl44e7jZFkbdnG4nk+1eN+IPEBuJDvbcVGAPSp/EGvN5LELjbyDnvXBXuoM/LMd5F\nZzkraGsY6jry/wDO3LnA6AVnxNtyP481XmmG0Hd82alhIkJk5zXnVDsirFqNuu7qaDhY/vbk\nzTNpZgw4FPZ1xsHArmNhMmQhkGBStv2qQdzZpqhvugbBj71OXpgN+PekOxIWRmB25A4Ipplw\n33fk70kkmFXbwM8mjeZWOBsQ8ZPegALDdhec9DTvMj3KCM84xUQ/1hYNtUcUrL8wkPHagRJu\nK78fhTQ5Azux7UokCqVIzmkiYgkMnbimSx8OFHPA9TTmLKh2/LSr1CPyCM03LbgCQV7Uih6s\nDhwu7t9KbhmZs/KewpQoYEIMAHmgKGGF5aqF1GqwVuee2KVmRm252nrjFLtXqw6UxmRge/qD\nQMlUncOM8dPSmttK/MMc02OQ8bfu460513RkkigQ4l+I1PvmlkbKqF+/nk1EmVUHcQak3PtO\n7B9xQMPvXBJ+6Bn60/5cA9RjpTIy27n5l9Kf83l9l5zj2piEj2qwGc9hTSz7io45x+NL95QV\nwAKa3zSAgnI7UFEiqWkz0VRzn1pySbtw5HtUakq3LfL3qXjb8ze/4UCYn3yDu2qOtNVVKkZO\n3NEeGRs8c8UKcggGgWov3Fbafc1Io2wqNuS3O40yPcqkH5h3pwjyP9ZheoFAxCmzOfT7wpVj\nBXn7vUU5c+SSQNvTFI02zYCODwFFArCL82SfSnqu5Rg7VJ70vJ+UDBz0p6v/AMs2GR1xSDUe\nd4UnGBToWG4NuyQOlIrBQQw+X3pBt/4CewpiBcQs23lW52+hp7IzcY5xmmpjOBxx0NPhUqMZ\nwaGUOJ+Tf129afjdjK/e5FRQsQzDt396duJkwucAUdSkSKqliCcECkAVSF3Ypi/Llm6+lK7b\ntpZQOO1MQm3DsTlvSpGyVBBAI7UwucDA46GkAJjI7daTAkDFWDsc0u5t24dTzTV2rGNxz7Un\n/LMYJ256UhD1EeC5PPcelC4Dei9qjzhwAf3Z7U9mRmA5FMbJg3cjgVHuZs7hjnOBS/KzYHzU\n4ZRuG+b39KLDQLJ8pAGAalaX5E3djiq7FxNz0PIpfMLMQq7/AFWgRPjbG5UZ5yKYrExr3NNW\nXy87RvGKcGznd8gxxQA6WQoNxBNG9GX5l+f1FRhWZcMccZxT0hTjzJgrdelBQ7ggENz0xUig\nfxH5ar/KmcnIJ4NPV1kU4Py9D9aAJ42DDGSO4FLzuwGC8ZwKr7ShBLZPbFP+0MoYuu49sUEs\neilm5PTk09I1ZSRxt5zmoeH5zjcvSnK37lI9uDnrTAnCyKolJwD/AA96dG3lkhlwWqBc8nPG\nfWnyMXUHHzZxSYh3KDDLlSetPdQuAi1GpZFO4cA8ZobLMSr44yaoCRixVQBnjml8z9xh1/Ko\no5PL5bPPoKkYBJPmOQRkUCHqoWNfm79KezjOQOnao1mULt28rzTn6KyDcz8UMURx27dwGWPP\n1pWl3Y/h45NR/dIOMFeCKGw559OKkpAW3qD3z96p92Yxsw7DvUEe7oBgZxUsTheq4NUMVGJQ\nnf8AN70+OTjDDcajbG7j5B3qVWxG2PmGKYh6s6plcAZwRTWkZGK4FRr/AKkBjtGcipnK7lyc\nZHWqEK+VRTnfk80O7BVBAweRTVKFTgk47dqc21gjf3e1IQ75/MO4YVhSuzrJk/MAO9KsyiJi\neQKEkXy8n5u9ADFUL8wzk81JGxkmUDC8ZpFbb8g6k5ApjkQkAH5ietMRJIq/6wHDA9KdLKBg\nYJzzUAVxJ8zbuamVt0u11yB6VYiSPDN8h2j+VKqhpQH+Ye1RjawKnjByKlhXb09e9IYsX3mf\noegp6qOT1Yc7qj3iNWJ+YZ6VLG6tHsZccZzTFsOfOwNnLHvSFty8gA0KnnKFVuBRgRvw25B1\nplCxpztdSOc5FSBVkkH93NNMhK5Q59qI87CS2D3UUwAs25gPu5496njmLAbux6VXKhcZzipN\npLLt+7mgVjSt2WeTav4+1XGXKjav3T1qlb4iUheGJ5q2sbhQc7gTgLWkSHEdHIYySOCTRu4J\nYfSn7RHw/Df3aJGBO1OoHWtTMazeYMLT+flUr070xVCx7m6ipYScsOxGaZI3d5mQw2gc5NG9\nWQHGMUrN8hVjuZuBUbSAlRjDL8u2gY9MSNuY8CqfnFpDlTjNT7tsgBGM9qYu5m4GFzjNAWKv\nBk+fpmqsylZGCKRzwKuNGNzNxmqkwOQwbatIZGqvtznDdxSMyhlAG7nk0szH5Tn5qXHl44xn\npSARRuZ+MCn/APLMMo+lQ7pVkJXgdDTmkMe1gCSTg/40Ej14+VuG609sTR/KdpFIG3NkjJHe\nkVjuLE/KO1NAPjZsjC7j61HGzKx3HvUu3aoVRk9RQqmRvmOD79qQXDzH9aKPJHr+tFAz4/27\nZPl/hHGe1IsgAKg/Mep9aWRU+UjcGJwfancRqU3K7A9vSvAsekIu/aNygHs1Obc7AhNvGBQG\nG45kDqeaY2RGCeQe+elFhIkWR0dSg6feFNOWYqG+djmmrn5Sr545J60/5+GIUDp15osGorMr\nME6fw5PrRlVJTGNvftmo5HKucoSuPvds0/y3+0ZY8Y6UWAcuQpD/AHuuKRpGYgScbRkcUbhI\nB5fY8ihSzDruVT3oHYcz8lgjDI6ClVsKMr16etKzhnfYeo201YyoUb8v6UCHMvmAbThs8j2q\nQK0GDGMH+dRvujYkDnjNJ97PzkZ7/wBKQErGSQ/Mvy9zUZ/eKoU5bHel6FQT09DQhxIImIHp\nQBJCzZKn94uPu01X8uJi+ACcADtQyyAsmQoXnj0pu4LjHzKe1ADs7FUqc7TTpMFgWPGO9N+6\nDkbmI6VJJG4VGYqQB0zQUhgcuuwLnFKyxtjli2OG9KPur8gyT3FGzfGHKn5RnGe9AmIzbgoV\ngNvWgTNJMDnYoGOnFCtvXzGUpH7etOUmQ7DhQBximIaRJ90lW5zkUL++3dVPTnpSR7zIu3hs\nZOaEXllZhljwaAHLGiAbm3Z4OKcv3iAeP73t6U1OZioTauOT6mlVV5XbjvxSAVsiQHbuz0Pt\nTmYYJ2Yb68UYVeh+XHeo12s4Eu7BHG2gCWRpC0aphuM5prZVsn86XaY9uBwOaj5YnDfK3PPa\nmNFW+TdAGDAnd9096zlZ9zeZWndZY5ZcoBgD1qhcfJg7ML1Y+lI1iiHazKwB3H0NRKhQYb5u\n/wBKsFlkxscEdyKZtA3qflzxn0pFkDAKjptwd3WqcsQbggFhWjLGfMG5srjriq12qIobHzbg\nKRRSmhO0eV8pbhqbIRtCDoBjd6VZuGkix5fzc84HaqpyzOpTaM9aYyFN0f3cFj60q/Mxy/Pc\nelI0ZX5mP0NAZFlyVJQjmgkkikIYxqCFHercdx3HHGMVTik+Y5O1OgGOKbuHKg5IPXNMDU85\nlBCNtJOc+tWYbrzGw2Fx+tYkcgGVLYPUVMs/zB9wbsaLhY6WzvP3ON20k42t3961bLVvs8yi\nOYrtOSnT9a4yOTPz537evNXmvEhjV1YENQTax6VY+MJUvodrtt6bQxxn1NdlpvxQntcBLj5s\n7vmGQcV4LHqDshVDjnHFXYrq4WQbZPurkEfypoD6h0f41LNMjM7CZRkyqcL/ALtegaX8aEmj\njbzAqNhdue9fFen6pNZSEgsd4yVzWyvjS5EexXbcpwD0AFRyxe5SfLsfdGl/FqATBJbrYSeg\nOcV6DpnxFt5toEjsGAxuOMe/0r87tG8eXazqWk3J6Zr0bQfi5P5iqk7Fozjdnt6c1nKn2K5j\n780PxfHJJgyhR2bdxXaWPjORogVlYxkcYOeK+I/DfxcM0aK1wrkn+Ec/TFeqeHfilAqozSlx\nnbuU/d+orKUWjRNPc+j11ZbjLq2R35qRb4swNeU6b47twy+ZNw390ZP5V09p4iSbb+8CsRnb\nmuezbLujtvtXoeafFKepNc5ba3HIeCCoH3ia0F1KNVXJ689aLvYpG1DMG69actxuyMYxWWt4\nrYKsAPrT0utzYBo1LNPzgKcs3HJqh53IpwkLD0qlqSzTW5C85qVXLLkcmsnziijjIoFyVYHd\nxUjNqGXdwwqf73NZMN1t78GrsNwNwyeKBWJ2U59aFWnRtu5B4qdI9zfL0piIF574pPMx0/Wp\n5INqlqpeZ78UICdiGUetPUZbPSoFmXj+tKZORg9aoZZ2bvp3pHtVZThaZHcD7pHNSfaNrY60\nmBFJZq0ZAFVZtMRoypUH09q0Fb5uvFOO3oKZFjmZtBRpAWX9KqXnhuGRSqx5Oc5IrsRGG5Ay\nKctshJPQ07i1PM73wLb3C/Om0d9orFvfhzbmMR7FMI6dRmvY/wCzFmfpj3qvNoRyeAQewqgP\nBbz4bxbQqQ4P94c8Vg6l8MW3M6wsi9OOSa+jm0IqCVTDZx0qnceH1KkFQT6YpXBnzDN4Anj4\nKHb/ALXFZd14VktQwEeR69q+n5vC0LM2VHP8O2sq+8F2rQ7Xi+UtyRxVXFY+ZG0uSP8AhYH0\nxVVreSPJaMqfpX0VefDyB3YKFKnpuGTWPdfDXzGzGikZxtbkU7oVjwouy8Y5pYup57c163qX\nw1Mci4tlY+xrEuPAJRsCII2Oc9qNBWOAGdvTj1pdwVeldbN4NnDELG3+8B1rLuvC9xC3zRu3\n0XNGgWMcSL2FR9TyMVeuNHlj+bYR+HFV5dNn2lljbaOpxxQFhjfMvFIw+TpzQFKrjBz3oZva\np6isx4xtweab91aYDu74pd/zHAzTEPDHbwcU/mo1+ZMng0vmdAD9aYDlOW4PFG7jk0mAckEU\nzcCuMUDRIrBQfU0n3uTR1PpTGbygXJ6UrFEm4MpGPrmkONvHFRpJ5iBsUv3hjvSFcVVO7BPF\nPDbhwDimsQAvHNO2Hkg49RQINvpSyelKp9aST957UAIudvIpD1yOtODH1yKdgbifWjUASPaA\nxpHXC9c5o3EZzyKRcFuTVCBflXAFG4FeRzTj972pjeoFLqOwufk+9mgtubGMEUnA6DAp5XOD\n+tDHYQZdemKTaeATinMNw4NN8sDp170h2FZD1B4pGyc4PSnAblx6VGqljxTsMeZOFGOtPVgq\nkH8KZtZsDPIpfvckUgYqsWGKcxG7pxTQp3ZzxSK2Ae9BJP8AdXGKZIpXkc0ROWUZNK3tQMaP\nfilVRt5oEZ4ZjkUu8N0oGMZRu6k0pUSNgnApW4Yds0hIXPHNADsMvTpS7gI8ZyTTUXfznHtQ\nqgNQIVeB15pd3Q0cMcilkUcBRx3oAVstjmpFYM2M8VH/AKvjqMVGpP8Au0DRM/K8HFIrDvTD\nk470DPOOaQBwshI6YoiYtkN1pFy7YPFWFjXd0oAQx7VHehflamN1IPShQC3ynmgZNt+UnvSM\nxxgUi5UZNJNJsXcTQAyOQ+ZtxU1VYdxcydqs9eRxRYA8z5+RxTtxYA54pMA8HmkWlYY9lO0Y\npCzcccUK3PJ4oO5V65NMALbWx2oGWfk8UoXPpTuOlMQHK8/w0gQYBzkU4fMgGc0gX5iQPlpA\nIxG7OMAUsfTOePem7TyCKfsHTNMB+T3PFDttOSOPWhV46UvLNt6ipsArsVwuOKXnApGzwM0D\n5vlBwaYDmbLdKOQCc0gJ/D1pduMk9KYCKxZQx609lHUmhWDqABzQo5x1qQEzubI5qQtSKvoM\nUu3n1oAf5hAwtM8zruFKrBcrjj1pGGF60APXHXGaAeTTBu8s4p8a/L05oAVs4x0FNLDGO1Ox\nubHUU3joBxTYx4/1eAcUin5cHtTMD1pT2z0oYCZ25p6ybsClZdyggjimM4jXkZPapECzHzCo\nB471LHIW9qiXPP50+P5vmoAn3bmx2oI24OePampyOTg0/gKc+tNAwxuORTtwJ9DSbehB4pqj\nlietBRNtwMg5Pen8NzjGKjQfNTtxAwOSKQEm8d6epDAgVD/rGwBgU+NtrYxQBNtGznr7Ucbe\n9CkMOeKX+HJ6UxAo3KMdad936DrSKpUZHIpjrlsDkUCHqVPNPXI6HimbgTtI4pcfL/KkMlXC\nrimIxbIxxTFznml3FTwCaBijLZHNOXdtNMVicnPPpT0Jbg8GgQuMrjmm5Bzng4pcleTStjsO\ntAxir5abupqQZK56ZpWXYvXg0feUfTigBVf5SMcU7dxg4FCrtjx1OaSSMtzQA9VZec8U7aAu\nTSL90Z4FJIMtwcigQcxt7U48NuHPtTVyeOtK67VDUuodBfvDpzS7c9sGkViccYFOb72SaYCt\n8o6cUeYVUZ4FV7iTYq88E1KX2r83NIYrMcetBzs6URycYI5pd3NAx0LEZOKkk5UYHNRBs/Sp\nFYNhcYpgKMLJ+FKqnIJ5oKkZxz70u4jAoAcny7uMbqgkmKdqlbO3k8VHyw6ZoAfDIGUHnNSS\nA5460yGHjk49KeDtXmgY1wVZc9Kdkq2D92nffXBOTTJN1AhFcLkZ+b2qVchRzioo8bQcfN61\nKqtkZOaAHLnv3oYF+BTW37sDigPtO0nBp3sBIqkqAv40v8XXio/MO7b936VNtBI9aQARs69K\njJJJHepGba3zc+lV5JvLblaAJ1c49hxml4zk1BHIZIx2XNWVXcwx0xQA4n06U37wyDginD5V\nIpFwvSpELz9TQuAuO9IcgZHFLnODVBqOz8u4dKXjbuHWjd6CkOV/GgY8Biuc/lThlvao4yeh\n6U7cd2c0AOxu4FCqPTJpo45NPXKnNIQ5flY46U773JNR5GeBxSsxYDigWo8YC9aNwLYoY/Lj\nbg0BQvU0AHl4xyCKeuNvFRqvvRHgqQOGoGSbgvbNKzA4I60hwPrSnG3gc0CFjzimjO4rS7mU\ncEYo37s0XBhGpjHJzk1Iy+h4pDjbQO27n0plCBj6U7B2UjKMbhxTeoBzzQGw/PA9aXnjrTf8\n80HOeDxQIf8Adb2ozzQp3LjqaFXBwaQx6570Dcqk/lRgsfSnMhYcHimFxvG0E9aRgTyDxUkM\nLc8VZSydl6UElJWI5xRtMjccVqLo8jr79atW+imTHP14oAxvJLKFAojs2GQE4rpIdCbcflyP\nWrtvo+FwU4/vUCZzENjIy/cxmpodLeRumCPWuvj0tYzkDI9anFijMOAKBnNWul56rk1pQaLG\nF5SthYI16LxUiqBgDpQSUItJQ9hU62SL0GR05qz0zTN457UwGJbxxjgU8qvGOtN85VqF5hnP\nbtTEWgwXjvQzDqeM1SN183QimS3nXB5zzTAtNIF5z9aVrgYznArMkuC2aia7wMGgRfe7XmoG\nu++eKzpLvacZ4qCS7HPpQI0ZLjdlt2Kqm8KuQfzrPkvwFxg1WkvvfFNBc1Zr/auQaz5tSyCM\n4zWfcagpXAqhJeAZzzW3KTfU02vmAOT8tVbi+VVz3rM/tANkE1XkuhzzVRiyHI0vt2V+9VV7\n0nPes83BU8dKj+1AZzXRGm2YSqIum4bcSGxUn2zKgEnNZ8b/AN6kmmVVxuxXoUaNziqVktja\nhKsRzzWnZusLANjFcX/wkMNmmZZAoXvWBq/xGjQHZKAQcDmvapUlE8upVctj0DxR4kh0+yZk\nZS6jPXArw7xd4+cSNJ5jru/5Zocg/jWH4y+JMl9b/KyoYySMnIJryLXPFMl5KztLlgMnB4zW\nkrIiKctzpfEHjA3U2ZGYjP3c5Arh9X1loXJR924ZG6sK615l3ShgVY4696wrzWpJm/1owvFc\ncpnVGBf1TXGmXEnz89c1g3N+8m5lHy9MGq81x5kxXfuHWofM4YfeFckpm6jYsxzFiASACK0I\nBnG07ARg1lxeZhty5XPFakMLcHvjrXPJ3NY6ljDDCk5x3FKWJQ5GaEWVo8ZHXik3F244A4Nc\n5sGfkBJyDxj0pYwfmC9AOv8ASmr6Y60/yyfl6DNAhP8AWBieimlZWYKzN8mKQxsu6MHajHJN\nMC7ck8p0HvQMVcbefWn8NznI9KI4gq7+Dn+GkEflvkfXFAC7+QP4sUbSOQckUgbr8gzmlXMK\nknlf5UAOWZhGA4yGOAw9aF+Xdg5b1pvLKny4jByKCiW8gK5bcckVYx8ci7QQfqKcskeeBz60\nka/Kx285zijyx8xHLjkUtSA254I5zSMoVunuKkEjsA5Tj0oaNm6/WkVcbxwQNo7jsKXlSPl3\nDtQyhlJB6DrTv9Ww3clhkUCYgTkkn5f7tOX5F3EZz2pEJ3EngUscbjLFcDsSaY0MjVo2Oeh/\nShWBkAJ6HP4UoU+Xls5Y07bt6jJxQFwZl+bcpBJyNvSlyOAw+bHWkXGTlTUnlhm65wPxoEMb\nb93Zlj0NPVlWH5vmJ6UrKUKkDJpsnUpt9xzVIBrSbwoC4KmlVDvIVSSedopyxOrBgcDrzUxU\n7Tzlie1AIiXcMjaetSMsY2lVIPpStu4yTuPGaMMq+4oGN3Lu4GcUnzxqNw3FjkUvEgBIwxPS\npfLbzCQeDx9KQCbS3zDgjigfeBx3wTQGK8AZHQmj/V43GnYCZcnIOOtK/wAvXC59Ki585lJx\ninElsYG40AP+64DA5PenMo3dcU19xY5+92FRSNvwMHIpAx67lYk9aerHqOW7+1IsbLEH8zJB\npGYuxIPNBRJCyxlgTkkdKccHarD5vam8Bhn0+9TsFmyDkDuaZIoYx5Dc5OKRRuY49cHNN8sq\nN5bJzxQyvJHuJxzSAVmEfLdc08MDkE8HvUSnaD+Rp28rGF2/NmkA7G1TxyvOaQAuyEcN1pfM\nLAY+7nBzSK7GQ7euO9MYbW3Y3Y5zuqQATrjHzjjPrQwLKueo5pJFVfnYkNjtRcBBGytjOWHN\nSMxLZJ69cU1SGiPzfvPT1ohJ3NuwMDmmDJd4HJHA/WkkZWkAPTGcU1hznFDr+9DA5XHJoAfl\nfvFc9qSMeXkMAVP6UkeWXcDlc9Kcy7v4vw9KRQsQO0Yx1xg1KypuO0Yzx+NRyxumMDoOoo8w\nqygDI65pkirukyirhvWhlPJU7j0NRzfNgNlPmzjvUnyLkg57nFMQ75mjAU456UBl3Ek9KVcl\nVJwB1FNUKwZifakA58KuACc81IqlYQxIqJcptXPP9KVm4PBYk8CgCXADDeSRQzK0mB3qNlZ8\nLnOBmpVxv3DjjFLUZMq+WpH3vamMBuDFvwpeFUryGPJpu0Rjvg1Qh43urEHipVfYqgjk9Kjj\n3Mp2HI9KXYeCThhzigWxKD5bFepYcmo49yRsGbj170wGTcRH8x6nNS4JQ8ZOKAHxyB5MHgYy\nTQsgyWIzzkUx5BIq4Q9AOKdDtViScAdjQA4zBpd2OtSgEjPTPWo1ZWbaBk4z7U/d79BnFMY5\nlbcCQAnTFNP+sXjdt4pG3ZDMcrjIFTiTy4+OpGaYyJWdY3wPlPanpCI0U5zn+Go41bOd3LZO\nKcv3iw4boQ1SS9STiPaP4WaiRhuCkbRnIxTY88DPPJGaerfKC4yapIQCMsd275s0PgSFtuew\nFLHJtbAy240rMsJKnLc81YhjlCBnp3FSDC8Lmlj2Lxt+gpwVhtYnigBq5mZgeMU7aFI7/WpV\nx82OuOtRRsZo2354ONtAupIqoF4OTmpXkP8AdGOlRQlWXYBtNOyFYAg7lqgFMjbsnhRyAtEU\niRwk9Sx5pdpXL/xMO1KsYGzJzUgOj2bdydqeThN45BPSmKgOVUY5pki7MhM+4FBRJukOTnnP\n3asQh5SABg9eKgjYsQcc461pW7LFtYEE4z707lFy1tiseTgmr3lBYxk4FV7VS3zBsA9Qask7\nSA5BHvTQnsVpmEkg38Y6U1VKtwMA1YdAyEuvzZ4aoIw24kfMlboykriSLujKA4wwOaAxU4Bz\nz1pq58tsD588Ur7ty5XkdxVowFEwkk+deR0NM3Ll8DGakVgzHkKPU1HJheoKmqKQuRtGFy1R\nszfc+761LGRt+90psmd2R17GkJkU0O1PXvxVGZecMM5/hq8rM0hBO6oZo9uc8+tICksmM5TJ\nxgCmqxyrHrjG09qR13Kw3YUcikh2oufvdzR0HccpfbtYcA5pFkKyYUe7fSoxFtD/ADHcfm+l\nPhIZSCST1zSESbRuJ3YUimxgb8bs0ZUgEcr6U+PbnBG0d6YxZtpGQ2f6U/gbFJwD3pvl+Yf3\nQ3e1L5YePldxU9KQD/M9hRUOw/3T+dFArHyGdwUEjPGWpgMSbpHGRjhVH60is+5d33CMEg0/\nJPyhlwOleB5npCYbcB5ajcOM+lCpt6DDLwFJpq5W4UsSc9PSpZF/eAMnPUmi4DSxBQqvzgfN\n9KVVA2uO/QHrTVYQyNgd8D/CneYI5EKjcFGD/s+1MLjlUnepbC5yxNSbjJMX3ZTGA1QOc5BQ\nnd3FOjZPLKbth6AmlqALIzZjXnnkVJ5hUNk5GNpwKiVRDGQkmDjBVh1PrmnyM6qj5BIxkjoa\nYDreMbmCjnP3qSf9452Ehl70bmbLDoWzTNu6PcOQeue1OxJKrM8YVsEj+KhAzMRjj+6KjWXz\nCMHAAxz7Uom8lycljj+GlZASyLtIX73rQyjjjOfehcPyvOR1NJIyCIqy5PrmlYoUIq78PuHd\nvb0qRMlN4XC9qijkDKAvzn0xilmk2/dJOevr9KQrjpHWTDB8epFN2qHK7t56g9qGaNNoVdue\nophkCqcoU9KY0TqZBMiI3GMkdqTzgwdQeRxQqHy1dT+8z0pVZjcbTHgY3E0WGxIyyA84780R\nzKY1UITu53AdD70RyCSRmC9OaElKnAXDdqBD1JTAVgzevrSSMdvzx8DuKWPds3Erv/u0sUki\nklhtPTJ6UMOgxpCWXaO1ODGNgOpI5pskpWJcD5WG4DvSKTGodjuB5wKQ0S8+VyM9z60MCGV8\nYOOBUcis8igEgMNwx/Km5l8vzCdx3bduaAJUkbndlT1I9qSNZFVlJVd3IpB/x8CRm+bG0r60\nqwjzch8IOeTQMhvjIoUHGwdcVRdmXODuAPStWbfNG2VHkmqEgGQNuVznikXEhj2sojMPlspx\nn1NRyRmXORgqeKvqokYEr7AevvTCu0nA570GxnsN2NrYI61BIjjd8o29c1bMZ8vcTjJPTmq7\nKWUerdVP6UAVCwjYKcuf0qCZGM27bVmRWX7wXg9qZuZl3Hcv1oAqPGY3I6swyPSk2uvAw+R0\nq1Iflyo3cZBNQxxnbuP8RoGVZdoXbghs5oaSPjtjv/SrLxtk7armPbcbGGSRn2pkkSyN5gHy\nvuPI705QIwRtwSfwoVvlB2rmmszeYMHgfw0hk8UhRWBGARzSRN5bBcZB9T0qGSM7WRm2sed1\nN2ll2qPmxyxNAF2G7RWYfMV6VoW14qqFBIxzzWIpKIN33farMU7ptxjAOee/tRcRuLdP97OS\nf5VMt4F+ZPlJ61grfGRZCy4cHIx/KrMdx5iqSML14o6ks3EuA0e1iytnO5eKvQ37DaVc4zzz\nzXPLdnehJ2jPWkmum8w7Wwc0xHpGmeLri1KkswaP5gV449K7TRfijJbSAvMYed2F53eua8St\n77KhTKCx4681fjvJFY4YD/a96Bn0vovxtSO9XMuYOp3sfyGK9B0H43LcqknnYixkZb049K+L\nodcmjZELIAfatCPxVJuURSNEq8HaeGIqlbYTkz9A9N+LkeYwdzknksdorqLT4nwSTbFuN/c+\nw9K+CND+I1xZyI/2l5XZcZZzwfTmuk0/4qzm5jXzWRQfmkDZ59KydNGqm7H3da/EK2m+5KTx\n0PFb+keL4bhdzSYPpmvhyz+Mj2smxn37ui55rufDfxiiliWZTI02Nu1vX39qh0UP2jPtGz1J\nL1gEPfH/ANf6V1a+F79Id4USAjI2nORXy34H+LwmvolcqqyH728EfTNfVvgX4hWfiDS1YzRu\n0a4bZ1GPb2/rWLpOOxoqiMCRmhcxOCrDrkYxUbSbunIq58QNWtftUMkcqgMvJ6EmuXh1Rmjy\nDx6Vk4uO5opJm/FN3bpVmO4IUkNiubh1ZATuJ96kXUQzZDYU9KQzpY9QKkZNaFrqShhk1yf2\noSAc7cVKtwy4Gc+9IZ2M2oL5BGaoNMNoPasX7cW+UtnFSJeDgE8dRmmBqfaBt9qWObvms5bj\nvnHtSm42t14oHY0TeckDrTluSO+ayvMP3geDUscm7tQKxsfalLcGpftHTmsdZCuM05bj5jk5\np3FY24bjjHTFTR3KsORg1gLdFe5qRbtt3zNRcVmdNDcLuA6VMsgDdOlc1/aRHQ4xVmz1bdkM\nc1XMKzN/z+xPFQuF7AYqidQT0z6037dz6L2qbisTXGApAAx24qi8auoDKCKfLccY6ioGk4oK\nsONnGykFRUD6XCQf4T7VZ84fWmhtzcCpCxmf2ChXnG7tmqs3hsbeQDjviugEg9c/WkZxIMUw\nschc+F45AXEezPpzWVeeEUkQgR5/2ulehnbtx3qNoUPbnuaLhY8qvfAavjcoLdsCsu8+H/y5\nEWTnkH/CvZWtI25YYPrUf9mxMuSMn1o5gPBrj4dfNK5tyo91wKxZvh6WbdswP9la+jZdLSRc\nYyR61QufDyyMGCgH2quYk+brjwHIrMUX5ffisubwnc2+SflXoTj8q+mLrwuJFIcZU+gqhceB\n7ZgRsyuM4bJ5p8wrHza2g3SpnyyecVSk0+4jcq0LL719ITeA4pEG2EZrJvvAkfOItueDT5g5\nTwJY2XOEOfpUPz5I2MPwr266+H6tGqonOeGwM1lXnw+3IwKMGHotVzBynlXmfJ0zUUjbvu81\n6PN8N2VcLEUbrWbf+Ap7fJ8vbkcYHFO6Fys4vzA8YxwBUmQuD/FW8/gu4VRtjcn2FV5vC95C\n6h4yDjNLQLMyl6de+aeWHarUmi3SqWCYH0qL+z5o1AKEk+gpE2K5YbuORSbt2SOlStBIuQEb\nHrimGN16qfypDsC4K+lO6LntUOGPGCB9Kk/h25qh2G889cGnbl25xUbPubaKfGylNoHNMmxI\nrevSm5HY03nuaaZFVh60ATJ3yM07d1FMVi3PalbrUlBxtyKF+Yg0MvlpuNNDBQCDg0CHN94j\nOBSL8p4OKGbcuDSNtoGOZwB059aFJC0m7jGKfwooGLuPlmm7h93HNPXDDPemKCrdMmgkeq9A\nOtOZgp25oDhXyOtLkFhgc0h9BBJ8uCuKasYFP+8+RTl4yaYiMoSwJPAp5I5IGaBIrZA6U37n\nI6Uh6irypI60gUnrRk7cjpSr83TrTARlKrxS8hetEindyeKCAfu9aTELIN3Pek2bqUt0yaTf\ntyaGUOjbbjinFfKG7oD1pgk3KKdw3HWgQfLuDdBUm4N0NQtjoePapI15JPAoGNZSmCTx1oTA\nk3AYFI3DYY5pd2c8YpiHu/GarybpMA9KscNHScFT60DDgIMdPSlSRuwpi+1SKTt9DQMQE45q\nTcFUHH1pv3V25zmgt8nI5oHYRm3N0wKcsg6Hk0gwwOTihQu4YNSIfwuODS5zz2703B9aRj8v\nHWmBJ5Y3DHGaf5ZXA6io1+6Cf0qRW4HNMQ1sK1R+csikrUrndxjg9ajVUVtooAcjM3epEY/j\nTduIyQcUKu72NIB2491+alPT+dI2VwTzSq4bhhQA7I49KVyDgDkUN83H8NN245BoGPWPB3dB\nQx28gcmkUNknORTl+6R2FKwApO4U7HzU3f7U/OMA80hAo2g5pGkVulRecWkK5p/Ct8vNAE0e\nOnahpNuQBTO1O4IznmmA2NtzcdKdGhXOTkZqONTGTnmplwFz0FDKBo93NJ25HSnRqW5okbPA\nFIRCzHdgVIy7VHembMtupzKWIoAbgpxyafD6c49aUHg85PpRGxwVxQImA65HPan8rxim7sJ6\n05mKrk9KABgeNvSgr83B4oHAAHOaaoO7rgUwHrkZwelSIe/c0yNf3mMdutSxt8pOKCug77qm\nkWT5QTwaZknnNDA7hkUhFlWXucUqqGUjNV/qOKmVunpQMliwu4E/SlK9MU0ja2ccUm7qR1pd\nRjuB9e9P6KKhz3qVl+ZTnjFMA3FV9jTkbAPFNzxnHFJz2BxQA9AME96XzBjnhqTaetDfNkZ5\noF0FXlsHpTwyltpFQc8Cp8EjI4pCHMFLAHpTGYAcdRTdxbjH41J8vYZplAjevSn8seOlMA9+\nKVT6dKBMkZQG56YpiqSMdKduDrgU3nikwHbuc07IOARxTDtJxnFP6LgZNAhc/hTC21T60K2e\ntH8RI5pjGN+8X5hSquVzmnK2MnqvpRuG3gdaBiqpH1pduPc0R/dJPWkbJxzigBWPy46Gpd3S\noWUHDZqVFzipAejbeAM0fjk0i5VuDTgo9OTTAAfMUg05AF60jqV7Uo67qYEiMOflpFXv2pmT\nng0/O6MY9aAHoNrciopHAU7mqTdu471DMq8A96BsfGuRwflFS7flyDzUEeduAcipvuYFAhzE\nqqnvUbMN3OM0M4/EUxk8xgx4pdQHsd0gqxzjpxUQx+FSfw9aAEZs/MeTUcxH3m5FSrjccDtU\nbKGUg9fSmAyFSeh4q3tKrgVXt/l6ipt59cUFDunHWl4bGKTleTzmjq3A4pEjfm3Nk8U5V2jm\nnMgK9cU1PmOKAHBSx68UoI3DjmhVK0qrjnNMkdJHtbGaQgelNXnHOTmjfjII9qCh69BT8/Lz\n1qvJdIsgjU7n746VKG+XmgTHs20AChWKr1zTOVT1pyj5aAJVO/knFG0belRp8wwTUi424IzQ\nMXPy9KhjcOxPSnqp3Gk27jjFK4Eit60u6otxLdCanSFiMY5pC6jFfdwRxTwM8D9aPIkHG05q\nVLeRsEKQPemDIdx5BpV+7g9atrpckgznHNWo9GdxvH0pCM5fu4IyKYqM0hPb0rdi0NkOG5qe\nHQyDgDBoC5gxxMy9KlWydl+UGupg0HgZGD3q/Ho6DAAyRVBzI4uPT34zxVmLTWk4xnFdiujo\neAv51MmlRqoG3HrikFzlIdJdj93irEeh7vl24PWupS0jjOAKlWFd24rzTEjnodFVgCVq7HpC\nKAzKM1qKqlsYwPanL940ElGPT0VRgZqwtvEvG3FSyfKuScVE0wUE9aZQ6NFVsg/hUu1eSetV\nTeLtzjFN+2Iozn86YmW2PcnApnmrt44qlJfLzv6elV5rzcgC4xSDU1fMHbpTJLhErIW+2jJb\n681FJqAbdx16UhGm1371GbgtnJ4rN+1qqkMfmqGTUB5eBVXA0XudoJ3Zpv2oY61htfAtgE/j\nUD3+3JzgUrgbzXW4+1U5b8LuHesSTVHZgATtqG41PpnA96aFc2JL5lX5unaoG1DaeuaxJNS8\nzI3ZA6Cqs2oFN3PFVYLo3pr5eueaqTaptXB/Ouel1Zdud31qtJrKSLkH5avlJujo5NTXy+uR\n3qlPfCQZDYNc5JqhkyofAFQyaiOu7b+NXGJEpG7NfDOM9DVaS+3SfexmsGTUjtK7+e9Um1IA\nHc+PeuiMTBzvsdDLfKucdRwaqSaoNvJ79q5K68UR27EF/rg81j3XixGU7G+TuRXXTp9zmnUO\n7u9cWIHa2RUEOtKVJ3Z/GvOZ/Ey+Udz/AC9tx5rIbxlGtw0aMVwOpPFd8KaRwVKlz2T/AISV\nY4ypYZHvWBqnjVBC4aQRYP3q8m1b4hPDG26VV56etcbrXjkSZzKVz0UV2x5Y6nM22eka98Qo\nWLhNzsvYnA+ua8w1LxxM1xL+/wDMDnp2H0rjNX8TNMzHecf3c1g3GqfIcEsT/DTdXsXGmdXr\nHidvKEQYlgeAD+tczdaxPvY7htbg+9ZMmrblIYH3HeqP2vcWx8jfwjNc8qlzZRLlxcMPkPOT\nkCqyks0igEcZ5qEyCXEgYvJ3Bp8ZaNhgbs9ea55PqaqKQisdvy9fWp7ZV5LtxUf3ui7ATjNL\n3LH7o4xWLZoi9bkqpHU7t34VqwrldykgkZFZtnktgjIrZjjGFGMd6zZokNVixyFPTFJt+YYq\nbDIcqMAdqjk+6Gxg5wQKyZQsW9ZCpUHvSYyxboPSpWHzEMecdPWmKvOApHepAXjd03H0pGjD\nDGcGnqCqbgcEmlEbdSwNUgIRbFQMndTmVlcNj5KnXbJGT3Wm/eUgDBHNDAhZlG58EdiKcsQZ\nSTwOv0pVXdjJ680rIV6cigBjRt5eAeMUiLtdGxmppFQ/OeDjFK67dpA2r0pXYCMuHyjZJ607\ny9z5HBA6U6RdiFlXinY2qrfxEU7iIlQSAhRz60zq2wnOBU02I4xgZ59KJIvmBXq3amPUj8se\nWAOlHlncu88LwKkLLuwBtxwaWTLkcYQc7qYhrID07d6UsPMAPII5pQpVRk5UnNLjcDkcnpQM\nds3SAscqOlPkVNpIGD703kKMrzQVaVyo7jpQOxE8DRyAE53DjHanRxjkZzj+dOjJzn+JeKA3\nXI+c96BApJDHoRxUWFK/MpD561MoKqNw6mgMWYpt3Ac5pisMYbgApJNSLmNhkYHc0bWj+ZeN\n3b2p7rhcdalh1Grnqw5zkD2oyzAjpmm4ZhndnsKfHE2eTg4NUMRoypC4+Y8k04rxgcnrSeZ8\nmSMmkUBWJzgMMUDFzjcCNwzS485GAXOOcUiuR8u3Pqakjz5RCtyexoAYG3MSepH3aezKqqc4\nY9qj2de1OKDA7mnYQ9mLSKR2oZ9sh9TRGoZiOVxSr94jbgj+KmAL8uVDbh1xThtDAgZ7U1cr\n8wGRnk0Mu4M44xyKkYSfMpzgKKVdxiGG49KYvzLyMfxVNCwkOQvGOKYgjU7sdcc0jKCxLHIx\nkY9aJF2wna+TUbPtQYTcx6AHpSYD45N0eWGPpTlYtyTTJNwiDKR75oiZpFyMbh1FIB38O3Hf\nOaeF2jewzgfeqKNpD99cLUu4iJkbkEflVACSbeASeeM0515JPQ9cVGwJVT/CKVGKqzHkdqkC\nR2XIVQSMdcVFxGh3de3vUm4xRoXP3+MU5QADkbgvegYjSNtVtuT6U2LeuWJ6npTi+SATweQa\napXzvl5HU0xEqsV3EEbvQUqKok+c8t3pIlX5m/MmhWDbWbgelMZLyuQp3LTUb5sNwKZjy2O3\noaJDub73SmIkkbdklcEH60QsrKxwQAM/WkV/LXaMZPUfWnQtsVkY8jn8PSgYTYaJCB8p6+1P\neTasq4GwdDSK5KkkZBGQKZhz8oXlud1ABCzBdzYJx09KlUysQ7DYB0qGEbE2be/WppP3gUHO\nKBE24qvIGTTMnhQOB09qYQ3VztK06OT5Q3r1oAlXPU/MfWnI3mKdwqPdtJGeM0jOWwFO3HJ9\n6BIlUGKQEMFGOakzuBONxPFRqVaTaUzuGaf1cAHaAKB2TFbManAwehpN7sVU9MdaBgs568cU\niqdinO1qYyVTsKhTkUPGd3AzzmmLIfmJHTvUscgIDbsZpkknmFQ3ABpEk8z5vwNDAPg9/WmI\nxCvlcYoGThtwJAzxikUr5ZVj7H2pFb5TgfLnFRlhuCE55wcUxEqxhUA+8MYGKeuHwFXaenNI\nN24gjafWk3Bc5O49M0hEvlsrYUZxyaartgLjAbnJpqscja3GeaVhnBPy4OAasVybcV2lRj+d\nJuzMQVycdKMCPkg7hSKxbLFSc0AKp35BOGpyspb52wtR7isg44JqR/8AZwuaYWJVw8bBWyO1\nIsZZ1YdMcmmBdqZxlhTlZRtIbaD1oEOWRVfHUN/F3p+1uD1OM1HMojBY9d3FOVWRhhvujJzS\nuPcdH80mXzjHWnLIN/TCDoaRctCSRgk8UqMq5BU5xwT3oAcc+ZuxmiMlZMtTF3M3Bye+amZT\nwCMnqDQMWHc24bcYOc+1XoICfm7YyPpUFuvlcY5I+9Wnaxh1CufxqWzRRLsMJVF7ZANSSRq8\nihutMjXbujyTg8Gp0w7N34601IOUJYcKSThaq7NvAOR/eq4qllJPIFVnVDGVIK89q6ImL0IY\nPmZsLzn71PyfmPtTGkZGxjGO1OWQSTc5CY4rQwI9pjUFY97dz6UjfdPO4jtUxVsEK2KjaPa2\nOg9adwE4bAC9RzTXBjXcBnb1qSJirFfXqaezLtHc55oAoxqMsAfnPzY9qiZufmOA1WnVfM3D\nv39BVZl3Rk7uAeBQMqvGsMUjfePaq20gLz+FXbsY4B2jGTVXftjJzkgUaki8oxdjkEYpqMVQ\njGD1z6imriVc5wCOaRYyEx1OeMntSAmk/eQoAuMmhQVUnGQRijcyspAx2x2oVWVjnJ4xjtTQ\nDo2aPheFA61Is22HYDlScmmBh5JZWyqn5vwp+C0anG3POKGIT5P9qineYtFID46ijQsyyEqi\nDPHehfKj5VWJ7rSeX5YxGN65zjNFupaYnPy45z3rwLHpkmfOyQNncA0YdWZpGAUjjvSbUVf7\n6449qdtRIyoXLkZFNAIZwsbb0BGeCPX1ojmFuu0MMH5iaWPG0HjcPvA0wRhWJO1mzkCkBKpb\n74Xd3x60ods/cV0bjBqJwXl25+fqQpqRcySckhsYAxQIeWK53RqSowO+famf6vaxXCt1UdqQ\nyeSp3k/5701trBXY7Owb3oAl81BzyBnpihWOwMyBFzjFO2lpAzLyF69qb5bPCWxtTPTNMTFO\n1dqlRtzxUTKGZwpHB/Kl3q6FSdzKfvUi8BwwwMfeFIaH+czRKWbc2cbR6U0urAqMpj+8OaTb\n0lVc7RxipZPNjCtJ1YfLjrzTGCuuxSQVAPIqSRt0jbxwOlQM5VflIxnkVJNKoY722nrj0qRB\nHjySGwV7r3o8xnPqvQZHApPmZQVACE8tTwp8wshDqBjrxVIQxtzNtGTz1HQUrZBBLnap+tLu\nHlkRv838TL2qNQkDKYskHrk8e9BRMzCPDo23J/1femIyybi5PWnblaQuO3QUxR5uXX5R1I96\nQDxtHzhzkfwYp0uW2kEAHBHPeoxMJdzFSu35eaXd5e0kggUgHqwk3ueo44oYbkGzgdwaYJnL\nj5RsJzilVsl3J+8cBfSmBK0m4hpSQwGBxxTICSz7yAcZFCsVkww3rSTyBFXIBJbGRRYBY4y0\noaRcHHWpE2rJluajkBZcFiMHkingfNtBGccHPNIoe1w2COCvQe9UPOIc7VI5wcjirzIqoAW+\ncc4xVOQswIHTqM0hrcexxIq5wPWhVKs8bnCt+VJCJGOOgxjcatLHx5SYPfmkze5kTjYrIilS\ntQsPLj+UjBXnPWtO4jyGwu89/WqEiiTqAo6CgZRjtysbMXOSO4qDb/FI25enBrQMPnSYDY2j\nBHY1WMexS2MrnBAoAgRVCsg+6RwabIggjWMdeu6rAUbggXCkZ96h8nq7HdzgD0p9QGyfeAVs\nZH3qrSRllHPzKfz9qnZQ4OTgim/dUY55zRuBUVQ7Z24xSLGG6c88irTRnzt20BDyc9jTZF8t\nWbGST17YpbAQMq+Y2GLBRmom+ZFIXnOc+1TqpVm2LkAZzS+WFjUsfnbtTEVWw2VDZJ42+lRH\nEWGMjOF6D/PerMalsM0e5R36c0hSNg+eG65oK6EYmJbcv8Q5BqxDKYWXkc8EZqtJCqbcvjvT\n1+ViHXKt0OPyoJLS3G1Npyxz37VLHJtmG45GPvep9KpeWVVVXgjvnrQz7SQpJH8qYGgsmJiy\ncOvPrVxdRMkSlmP06EmsbzVjC7G27iAT7+tT+cVdk+8qn8/egVjV+2MvJGSRip4Lr5AAOemc\ncGstbrbJtK5TPUGlhuHLeUo28k5pisdNb3SoqDG5jxj0q/DMseSpMeOetclb3hjwWGW65BrT\nh1SOSPDkuT93/CkB1C6mHKMr5xzWtD4meHaVkZSeGwenvXCreRow5x321Mt1uw3IJPJoDyPY\ndD8f3EeSkmxV4IHGfcelew/D/wDaf1DwvcR27+TNkBdp449z6/57V8k22oBc5Lem3PWr9vqk\ntuu9CAe4I5/OmTyn2zq37Rf9t30ZiYrGgwIzjJbuTXZaf8WkktYNz9hn5ufyr4CXX5reRJFk\nKg9feuw0H4iXlqu1rnjsW61MuWW44txPuNfiIiruWdXz0UNyKv6b4+SZNxf9eTXxfJ8RZ5mj\ncygL3YNWnZ/FKe3UCSdlQnG/Nc7pLobe0Pte18eQNtG/qcVt2PiJJz98A+gNfHvh/wCJUgj8\nwT/OvPJzkV2Og/FwSNsaTDMf4eozWTpvoUqnQ+p11KPgtJj3zUw1VJMhW/WvALL4lKrbludy\n9CD/AIVs2/xFhxkTbFJ43NimqbNeZHtq6koUZOTUsOoKFIJye2a8d07x8txcmMTNux6g10Vr\n4kGAS2T6k4qHFlcyPQ01LtVuO8Dcnp2rz6LxJE3Bf5vboa0bXXI2UbZMrjIJ4qRnZ/avfip4\n51bkcVysGqBlDBs+tWY9WXhc8HoaBnRmcL05p3mDGe9YK6kGOM8D3p39pDceeKQG6k2cd6l3\nbSMcCsWG++X5TkU/7c3TPFAzY8488nGeadJcfKMMSDWYt1twW49KkS4B5PSgVi+twQM7uPSn\ni66EniqCt5mMdaczbevNIC8LoevFSrcehrN3jpSpIQ2BQI0jOqmnLOOpPFZpl+b1p6y7lwaY\njQaXzOnSjzcjiqvmZjC9DTGuNrDsB1pjLyyHqeaesg6g1Sjmzkg07z+MDioYi/5gxjvR5gAx\n3qn5wPPelWYK2DQIutJuUAYzTCqt1xUHmhm64FKJgaY7EhhjY/dxUcumwyLkoM0ed2NS+ble\ntFwsUm0mLqVB+tVm0CJpG+VQW9q0vM6bqcrDrVAYUnhaBRwvzdsVQuPCRuOo49667duHpRuC\n8YyaVxHEzeDFKqfLwwOcVSm8GpuJ8orJ69a9ELhuDUMiBhwOKBHmcng1Zt25Mgf3hWa3gWPa\nxSPcfXGK9cW3RmzjmmtZxuvIAOadxWPFZfApZmHkkDHRRWVceAVUhghbJ9Ole+/2XC6kDgnr\nVSXw7G3yhVA9aV2Fj5+uPAIyQ0bBuxqhJ4DeNSWk49AvIr6HbwqjZDHdUD+F1wdiDHuKpSGf\nNs/gy4gztG5f72KqyeFJ9oeMgY4Oa+jZPCbMuTt+mOlU5vByuDhEz3wtUpCsfOUnh66ViNrB\nvccfnVcaTcABjE3J4OK+ipvA6+XtSEKc5z1rNuPA+3ccY3c/MvyijmFY8INtOqkeWQRVdlkP\nYkj0Fe4zeA49+4gE4xtC8VSbwFtRgIlAPX5eadx2PHF39CCR6U0rtbDDHpXq0nw9hP8ABg46\nlTVFvAXUMvmAdMLQKx5wzZYjGKXcDHuPFdzefD1QpYb8+mKz/wDhBZohvddsZ6UiTllbHbNP\nZg3Wt9/B1wgBUFlqpe+Gbq2XKoeafQepl7xkUok3NkHAqf8AsG8Vh8hwe5p0ujzp0Un2xSCz\nK6Y4J609mC5IPNH2K5XkxEUn2WcHLRELjOaQ7Cq2FPFKrY70iqwXoT+FIyNnIU4p3BD02r2p\nrYUZPApsbMcmmMxkycHbVAOEw3YzipY2FU417nr16VMrhe+Qe9IRKTuyc5FLxtGBUSyqykD8\nafI6qowc0mIV03YGfek29j1NCtlc55pd3QkUFIYsZV8FsCpmABBQ5ao3YtzjvQrbT14pgS5+\nbB5NLx3qJfXPNKOnNMB7L3zn60LjsOaQUqqc8HNJiHKNvBp3GTxxUYz3pysHwM4pjEDBeACR\nTy3fH0pVUc5PHamMuY8A85qRj+CvPWjjqaZ0XJpVw3OcUwFbDr0py4Cj5cGkDfKePxpWf5Bn\nkUwHD72evHao+GzTlbqcce1JjK7sc0AODY7cU/d8uABUO+pfvYApAJzuxTlVfMz1ozjrSceX\nnnNIRJt9ORTWYrTY/lbBNSZB4qgE8wtg09vm5FRqvzcng1J7Z4oAUScYNKzqjA44NMODgU7A\nkyoPSgY/lQc9Kcv7v3qGMkn5mHFSfLupMQ4kbunBpGPGetMaTa2AOKkUrt9KkZCF+ckCpVYL\nxjmnN93K0gXcM5xTGLtLc5wKftAXI5zTOvBHFKrBcCkMd1b2pPUdeacvHPUU0YBOTzQIeW2j\npTfm3ZzkGlbDdaQsegoF0BDnI6UKx3E0xZA0npUqj5TyKYDEj+c8Zo8zy5ehNSdDgHj1qBm3\nPgg/WkNljzP3ZHQ023ut2VcZpvG3inJFtwcdaCScnBGOlKynOSaauF5am4546GgCVJDtwOtO\n8xtv86YGG4ED605lO07TyaBgs24470/ecZJqkRiQEGrKN0BoKJt2+pVxjmq8bbcntUme+akC\nfcVXJb8KXhmBxxUJYMw9KcsnzYI4oEStt6DinbvxFRFu/anKwK5ORVDJQwKEYpUYsppm7d3F\nPjPJBoExV+XvzRwre9Nded2acGG4MRQIGJHbmpeNtNJBGc01CMmkMQHk4pxkA6dKXafmwetR\nspVSD1pjJSwDAdTTTnoDUaRsnzZ3Gpv4gQOT60AKikDPrTwTkjHFM525zxntTvvLw3NT1EOR\nQz5xzS7tmRTVGAORmlVRuO48U2MRec05OnoaTb6GnKowSetIBrFdpHRqVRnt0oZQ3OKXBXBz\nVAEeFkx1FK2N2cUkagZ9aTlegoGOUAEZpxY846UbgVBIxSZ+VqliBWp+7nvmm/ej5GKFwcHp\nTQEytvyM801ZP4D1qMPtkOaXgtmgZJLIIY89TTVkZwMjbSlSzdOKXA696YnoObK/d5oYCSPP\n86Td8o20FieelACxrtxirXEiE1T8zb1p8cm5uDgUgJcD05pVRm7gCkyrEc0j4jPBxRYY9Qkb\nYHPrT2+ZuMUyHDfT1NP/AIuOtAhFU7snqKaz/KxNAkJzSSLuU56UXGELF1zU7bWxxzVe1UoM\nY4qyFbrtzSuMQ8nHQUq/N0NSC3dv4eKeLV9oAGPpTuSQMex60qn937+1Si1ffhl5qRNPlJ6Y\nFMCvuzQFO05rQi01pO1TNpM2chCRUiMpQB0HNNbduBroIdDZgCV61IfDhLA4oGchawlJHc9d\n2a0IY2bnqK6KLw383CEjvkVbTQjjCKAB60XYjl/Jfd8vze1SfY5NvAJB64rr4NDVsbV+bvVt\nfDuw9MD2p3BHGw6Y7clcVOums2MCuzh0YRsG2gj0qb+y44m+6DSA4kaLIucg81JFoLgDjDHq\nK7ePT05Lcj0qRLGMnpjA4oYXOSh0Ao2NnP0q3H4f53EcV06QhVFSBQzcjj0oEc4mhrwWWp10\nUMwIGBW3lNxGMDtRx0BpiMyPStq4wKtR2CKuNo/CrLOq5GeKb5ydm5oAWOzjWlMaqflAJ+lQ\nveDovbvUcl4kYO5stQIu8Lwaco2msr+0l2jL/NULa0M4z0oA2lbB96DIOmcGudk14M2AfmqN\ntaVVJY4NCA6PzV5zUcl+u7ArlJNcJyQ2VqjJrW1s78k+9UM7B9URW44qL+1lV25ycVxba47b\ngW+lVW1ZuTvOaQcp276tuYbulVpdUUMef1riZNcaTAySO5zTZdWK9Dj8aB2Osm1pVwM4JNQS\na1uyC2fauKm1ks3Oahk1Y9mwaBNHbtribPmPzVAut/KxzxXCSayyYySxNNm1d1BUN71WhJ28\nmvK3Q8U2LWVkfaG6DPNcGuqnfn9Knt9SEhyvyk01Ek7X+0d3zM2agl1Re7cVzP8AaW/gN+VV\n574beDk1VmK50Umplucke9QvqW7OG4rlZNWkwVPSoJtWCj72OPWjlDmOobVDswD0qnNqxO4N\n0XmuUl1JxnD4WqEmuGTI8z86uMTOUjqJdc3MVGTVW51dgoLN8ufu5rlbzWBCww25sdjWXdeL\nIYpNhZXf/e6Vqot7Ecx2VxqyIvDctWfc6luQ4fYuOtefap40S2xucAEZyTXNat8SobVQA5Le\noOa1VNsnmsj1ebxJHb/KZAGx94msq/8AFxjXJkG0H71eKah8S4mVyZXYnquMmsLUPiAVXbG5\nUMOPMP8ASuiNE5qlXse5XHjhVkLebkY4IPFZGoeO3uIdqSYP1rwBvG00lwyNLlcdRnGalXxZ\nKud4wMfeBrqjBI5nNvY9F1Dxy/2whZdxBwXzVOLxtLJcNskUqODz19xXlt3qhmkZ4z97rzis\nhtYdGeMvheyqec/WupWMZRPY5vGG6NmMmT061z1x4mmjZsPudjgOTk152mvSsCsmflGMZ/rV\ne41p1xgld3YGnzGUYNvU6zUPEUkm9ZJPlwWX1rDm8Rb4w7BmPsf0rD+2STIQW+ct0qAN1HVX\nHIHrUynY1UO5qXWrteScJ5bdwe9RS3EnDBsN02+tZayMvDHgHG70pWmyCdxYA8Vjz3OhRsWg\nXbc4OG6GllGERVXvyKhjykhXcSPvZ/pUg3GbcW+lRfUdiVZCiEYKKD96l8wZVtxx6Co/O2kA\nc/WlklRm+YcnvVC6lpm8xQpO0k05XdtwA+XoWqDv8x57EVMrE4HT0qGPY1dLjZmAJxnmt5id\ny7RxjHNY2lRsrAn5mx1rYG7IweO4NZyLQ943XJ3BiajaMtnDAmn7SGHrQyBcsvSsywVdyju/\nqaFR9p7k0bvmxjnqalK7uQDkjpUsREny5yaey+Yuen0pFUO3A+op+7yz8vIxQgGBOgU/N6Uq\nsI3y35UoUOuM8+tLt3844Xp70xjVwMjb1p23aAVGecEUgjDqWJGfc04RhcjcemRQOwKo2yKV\n57U1EMaqzcuPU1INvVj1FMCKX65PrTuFgfczn1YdKOSqqDgjrS4LSA78nNPiXazn7xz+FFgG\nbT0wWOaeo3dOcU7JWY/NwRUTfIuwclu9UJtoU7WZypx2FL80cJbGT3pflGM9KVmO7bjhqAuM\nUltodcZ70sjHoACB0pW3IxXbkAZo+RV3YwfSmARqdu5uKVlYHaMjI5NC5wG6+1KWZWz1z/DU\n9SBuzaoAP1pzKr8g8UMckbuF4yKG/d7l27fSmAiqe4+XNOjzDvyw60qKwwT3FDKnPegeo2FG\nZiZG3elSLhSxB/CkyWZdgxjrSBT8xYYpiE27Nv8AtGlkj8xjhuRxTmXcvIyw5pGxuAYZXrxS\nHciDbSd3bin+SJlxuwSOafGI23Y+UdeaauZ48K2ADn3pjGurcAcADGfWnNIN2AuDjipOsag8\nPnj3pGjK8Zyc/lQITaZFC/dI6098RhecsD0ppUxqC3JzTmdO44xQBHIx2kg457UbvMbBO0Y5\nNOjQspIXCY4BoVdsYBOPWmMYrFxlTjHanhnZgufkpY4xubpnHahXJjZR2pALDFjd82R3pvmb\nXwq4IpYCWyCcY9akVeNy8sKADbtQgDPeo3X5g7fK2OKczErkcDuKYI97EYJPXPpSAU5UDPTI\n4p3l/MwHyN1/Chcu21xgYxmmhRHkBiwpgSRytlcrx1pW3O24DardaQsFjyc8jihWdcbRgds0\nwDLDjadp4DUvLLtY0LvEZbOefmHpQvyAbvmB5BpAAVvMGWzin92y3y9xUSyFFJwC2eBTo8q3\nofegBY12rwcqec/0pWYIpbGDnGKY2dxDAAe1BLNjPzUgJSzNHkDPqKVcsoyuabuKkjbgGgMq\nsFIIHrQBJ3GBk+lPjYHKuApFR7NrD5vxodixxtOSfvVQAF3s3dh0p0cflplhudu9IytyQRkU\noYEgdqYD+RHgdQc0RsOcHDHkD0po+djt/OlVgwIx89ADuVbcACSKlRvMDKGz/So5XTABJDU5\nQuFZGIBGcUAO2/MAx3EDBJp6qh68rjtTAz9AuGz3pXc5LL93ofrQLUWKLknr6ZpGcBSzDBB/\nCkj5jcs3OOPrRDH5kYU9V5PvQKzuTRq0cBLPyeaeoLYI+Vsd6TarKCeQT09Kc25uGGMHtTKC\nSPcFO7BHU0qsNpBHy/3qVv3jdQBjpRuDKFPyt60gGeXux82fepDJubBToMZ7Uqr5ZJPTGaQM\nDHvH/fNAx4x5eRzt5xT/AJpsEnAI+7UartUcfMfTtSsrkY/izTES/dUowyeoxTVyrKT8o6mk\nbKsGNOb5tzKOaoRI7llJxg0z5ViBK5FSeYm0KB26Gm7CsfsO1Ah8cZkUFflpvnBY2/jw2D7U\nseJeclfYUeWSTtwF/ipiJS22P5vlXtQxAUnJOe1RtCZYlB5AP3qk3bnG0YwKYCbVTGTliOvo\nacFXcPmz29s0CNSh3deoxTPlXt175pkkyyDdkfMPu0u3LFey8k00KpYqzYC9CKdNIw27ACpG\nDSKJAS/zn5qYtwckAbg2QaRGJjOMLjgVJDCEwx6dcUxbDlBVP7yjoamZvlDH8B2qHduVg3Ln\np9KcxJVQJAy4xQJMVTnqwVial9Vzu9KiCKrKANzEVYjU+YPlytJmkdyxbnapwOfU1pafGsre\nYxx61RtwWO0DNadqgKle1ZM3joXItzbuRjOakYr5J2kK/XB71GrYXjjHHtSeWHbB+960IZLG\nA0YYNgHqKbMVdV+XDEcEUSZVwqr8mOTTgSsIGBnOBXXBnLUKX2dl+VfryaAol4yPlqd8LnB5\nHWq5iDKCDjn1rUxsOb727GPaomLTLkDpzin7/LmLfeHShsDcw+Xd1xRqIYy+ZGxU4IOad97f\ng4xTeY+T0ahPmU5GCaYEbttj6AjHPrUDNuUBBg+lWdqqgBHfk96j2jzCVbODyD6UwKd64dlG\napuQCydeM1euYT5pO3IPIxVOaBVbcTy3AoJsM2+XCox1oZd6gn6UxiWYKF4H8XbNPCMjEYyB\nyakBQwjIw28k45p7bmz83zZ5xTM/MBtyetPkCvynynGTQIkVUWAKF3c5209lTzRk87c4pIZF\nUgEYyOtCwl2O4becCgA8xP7hoo20UC1PjpwryHapEbcj2pYcbiWGcfxZpIWVQqqp24znPSk2\noucnd/FxXgnqsemfLIC9TnHoKRtu3O75s8e1Ioflsg7hng9B9KYymRggXCHke9CEOkARgxfP\nqBTmjZtxUq3cr3ApWh3fKNq7ecd6jilC7sqSxoAlj8xxhACuOT7UNxGu35c9eaVQoQGMGM5+\nY9aNytcY2lgaXUCUMcYkHy9MGovlbeoOWx8uegFBbziWb5EA60yRVOCvIP8AF7UySVSUULuO\nccqac21sYbaR2qFx+7Lb844Ap6oUhViQ6+vpTAGYyZI4Ud6eVyxMeGAGNuaG2MqqrZXvim7R\n5haM/L3JoKFOGDA8fLjC9qCrsoDPvEY49qcz5jOThfXHSmSb/JMaOMk5zSAeFBwehYY6UxlK\nxFFOQOC7ClI2jLNkAdKXczgxscllypoEIFVYVIfzD0o2uisiDk849KFRdg3Z+UYIFCMkamUg\njtTBC7lSBUQc9WbFP+VWwBuOBioJHkjCBRuJO7aPSrDMI8MSMH+EdqBDW3K4G3NP+VN/oRj8\nfUVDI3HlBCXYfeBpfMMkYbZtKcZ7EUDJIV5IOG45qH724hc+vtT/AKc/xcelOZwqs0Z+YcYp\nAMzuUOnTpSxsY4zldvqaj8ssFdeO/FTGI8sXxuoBCkhtuQwbHGPSj5XjQMw2tzn3qOGR9/PJ\nUcN6CpDhkj7jPOKBCMxjznBWnMyiPIOXHIGOaNu6OQuMgHjPamKPlyc4BxuxmkWSfMuJmG5m\nGNvpUUqkxqfMxzgD3qRVLSbM7TjIIORTeJMK3ygHmkUSKvRs57ED1qbbt2qeCabaxjdz93sK\nuybfL+cfNmokbIzWj+b5PlBHNUri1PlFs8ew5rVdWZMJ8uB19KoSHapJfnpQUZ2Ay5Tdu/Km\neYMNgYHQ1ZClpB2zURhKF0I6nOaAKjqdy8EDuaRVDO20ZX1FSzLIrAr846AVEqmGQop28flT\nEVbiHdjnbz1PWmuny5KkBOlWps7cSAMo6Gq7KZI+pG05x6ikMQyZjKbSGcfxUSbvLVAeFHIx\n1pJG3bXZcc8Y9KJnO8EnCelMREVHl8cNnpSN8vzbuKkJVCAD19KZJ8zY4FFhkMgHlqScR/1o\nK8AMmARmnPlY8Hpng0mGZgGGMD7vt60ARsu1+SOaeqybgrcjsRTth25C7gOKb8yBfSmBEqAs\n3O1icc1Hg7tiFt3epWG2Qg4P8xTgsnn4JBUrzimMgjA3Ng52ipUlZ8543Dml8sBQFypB6etN\nkXbhui5xSEM3NGrbc7D6etO27VVt7ZPXDUjKSRh/lB6dqb8uS5O4HoMUwLqzPJajK5K/yp63\nPyqRkDpVT/VxhhnaTyKkSRckOcAdKQGgJELKGYjHI55qX+1WYYVCSvPzGssSAKfl2kn7x60r\nP867sgjsKYjdj1MrtB6t82T29qux6gNu9m2K3HPPPauYFx8jYXPbB9asRXYKxr1XGD9aRLOo\nhvmbiToOh7CprfU4ly7Fg3161ycl1JtO18Ln7uact4x+WTg9QKYHew6tJJb8NmPqMmtKy1gx\nqwl5b+HvXC6fejJUn32/1rTt9YlMgIjUjOQD6etITOxs/Ehj2MoaI5ycHrXSaZ4saFtzjJb0\nNeax3Uc0hwQrYz1qzDqDNH80u5R/D0ppjPZrPxs6Yb7SWxj5R/KrjfEN2DEy7Fz91u1eQad4\nhKfKqKc8fManfUlmZiCc+h7UAe9eHvHm0bmbnP8ArM5r0HS/iGs0RiD4bpu9a+V7HWDaSLIJ\nOD95e1dZpPjmCGQrI2G6jnHNQ43Hzcp9IR+MVGT520rwFzWvaeOkWPzHl344zmvmabxZdySk\nrIST82B6eoq/p3jB3ZVZiFzk5PFRyFqo2fU9n8QA7KQwAxxg1pw+NQ+CPn9818vR+ODayBQV\nz1BVs1tw/EL5Rul2+4OBUuCNOY+mrHxhGq/OcfjWjD4pilYAnj0zXzKPHyxbN9wSTwO+K1bb\nx8xUDzt5HO6s/Z6lKR9KQeJI2ATdg/WtCHWomUEuCB1NfOdj8SFaNB53TjaDya3tM8eRx9ZN\nu7tIccZqHFormPfBqiS4CsCPXNTLqW0bc5HbFeRWnjqJpAEmVuOGH+FbVr43t2XHmh/9rv8A\nlUBc9Og1IFM5xmpftRyCWyK8/TxNFIwPmbRj1q3H4kSY4L89sGlYu6O4W9BU84NKt0Rg5rko\ndcibAMg/OrUesJuwzAL65pWY72OpW4G3jn1p6XC9O9c3/aSpgeZwf7verEeoAAHdyaYHQece\nCDTfOBPrWNHfFfm3celL9uBbIYfnUjNxXOOuKFmycGsqPUPlyx4qRLob85yaQjV3fLTlfaBn\n5qox3isuRxT1nXIJNAi35hapFuPlqkswbJB/WmiYd+KLiNEXAY59O1ONxuA28Gs9ZirYxT0k\nKtnrQHKXvP8A3gJ5FSGUYJB5rO87bTjKW6cVQi8txtU55pfPwQfWqO/t3p3mhuDxS6gW2mHr\nTlk7A5qmzfLxQs3PWqAupIBnPWl8zaBVHeeuaUTZHXBoGaAl5HFO8zvVAXDMMA0ouMcGmI0V\nYbSaYZOcDrVJbkk47VJ52cmkMst8y+/tTNigdMmoPP8Am60/zRmgWxP5Kt1XNMWzh7oDnsea\nZ5x9aUyjdkGgBs2lxMDhADULaLC3fLYq55x4JNJ5hzkcimMzJvD6OCCARVb/AIReJskfKe1b\nfm9TmlWYbQNv40EnOTeGA+Rt3Gqf/CGpjOGJIIxjjpj+tdgsvOP1pwnxxmgRwVx4NEZ/1ZP0\nFZ8ngtW58vHua9Mfa3b60myPptB+tO4I8rk8EnlggbHPAqr/AMIaZGLLGEz1yK9bNui9FFJN\nYwuOEFIo8dn8FpIMEbsH+7VZ/Bq7inkhgfavZBpsPcACmPo8RUkMM+lAHikvgeDBxBx3qm3g\nVW3YhAHbJr3NdBibI2jmo28OxsPlC/jSuB4QfA7KuGjA+gqvJ4F3A4TYnrXvTeG4tuCo3VC/\nhWPBwop8zA8DuPAobHlrk464qm3gdowQfl9+tfQDeE0OCyA9sLUT+DY3O0qCOtHMKx8/t4Jk\nVRhdwPOcVWk8GzjPBwenFfQJ8HouVCnHoKJPByeSMR8Z6mi4j56l8I3MeMA9O9Rjw7cMpGCD\n05r3uTwnvDbk2kHjAzVWTwjujAEXQ/exincZ4U3h28hYDYTx2pv9h3cbZMLH6Cvc/wDhEgr5\n2Z96iHg1ir5GDnincDxD+z593+ofP0pn2Gbdgg5/u17ZJ4PTaA0TN61DJ4Nwcpbr+XNO4His\n1tNGuGTFRbZY2G1WI7kCvabjwWhj+eIP+FVG8EknCIuB1GOKLi6nkihyDlcmpI4+oKkH3Fep\nL4KCA5hU59qhfwOkhI2lPXIpcxSPL9zbcEEelPjctxt5HBr0tPAqrw4DjGAcVA3gpMldiqOh\nZaVwPOWkKqQRn2oDHA4wfpXev4DAydu8f3qhPgoEgBDn1zVXA4rByByKXcem2u2k8DAEYZsf\nSov+ENCKf3XPYnvRcDjs9cUvmHbx19K6tvCbqCNmD9KT/hDDu3Y3HHGKLiZzEce7GKcd0XJH\nFdMPCcqqMr5YzjPWo28LzqWypaMfxYouI5zhmzmnhv4ela3/AAjEigg5YdQcUz/hHZ+uMUgZ\nmKp6saN/zgDrWpJ4fn24VSWo/wCEbnVgxODTuUZxULn1o81V6itKXQLnduCcVBLo9w0fEbA+\nmKAKn3uRQD+B9qvx6HdhceWfWhdBuZcsqkEdaLgUVAZc96kCdM8VbGiz/d2Et3qOTTZ1+8CM\ne1IViAj1OBQCMClaxnC4ZSPehbGfIwuRQMVR8xP8NO2AdKk+yzRdYyw70n2eWQfKpAoAj6nA\npWHPJxTvssi/wk/hQbWWQbthB+lICPPO0HigkNj271L9klWMkxmj7O+3ATj6UwGM2BgDNKH3\nMOduOtK1tKu0lPlppjaOTaV47UCEKBmJApzfNjHAqTa6rjbg+lMWN2XcFOPpSEIpOcdqjkkA\ncL1FSrG3Pykn0qKSJxIp2EZPpSAnVuMYwKkGVABPFEdu7dFPvTjbuv3kNDGNk+nFJtIxzxUz\nQuy8qQKgVG3gEED6VQibHy0AZJAbIp2wmPofcYoRfmxtK8elICBYxJJlh930qdl6EDinrC7M\nfl7daVY3IHHFBQjY2im7vm5yKma1kaP7mDTZLSRcZU0gFjz0xT1bdSLDLt3bW/KgRuxHykfh\nSEIu7dz0pzMcYzSLFIckr0pVhkbBwcUxhG3PWrKksmelM+yv2XIpfsswwApK0MTH5496NwPf\nBp32aTjg80n2Z1P3TTAUENn2oEgztPekaGVuEXNJ9jnI+50o6jJFHX5sAUM24dOacuny9eac\nLOYtnadtIBkOc5AzUxy2OxoispmmGBx6VM+nzpIcRkigCvj5vl5HU0SfLjA61aj0yVuoKipP\n7Jk7c+hoAzmbDAHipMBlzmrjaPJjJX5qWPRZcc5x9KAKWdvTpTkb5vWrv9kTowUoSKc2jTqS\nduKQFBm28dRRuAHNaS6Gx2k5PtT7jw9JwNpFMDNUd6OT2rXTw/Ie3GKmi8Pv90qaAMPP8JGR\nS7T0xW+vh1ouq7qk/wCEefgbSDSA59YSynINMZT0x0rrI/D7Kozk/hT18M/MWxx6GmBxzAk9\nP0qW3j3MQR2rrV8PbmOU49cVPD4b2sMLke4pXHc41klVgApqX7LJj7vWu2j8OqZDuX5cYFSw\n+GRGM4JFMRwsdq57YFLHZyTAkDAzXct4dPzbYzg0/wD4RvbGpA+WmI4iTS5T0FEOjyDoMmvR\nodBTyguKdH4fCycjav8AOjqFzhYtHkmAKjp1qP8AsGRnLHdj0r0hNGWJSABmnR6TH1C0mK5w\nEegzeWFAp48PzbhtXNd//ZqxtyAQaEtQp6DFIDhf+EdPVlIqc+Hv3QwhNdulqi/w8+9S+TGw\nGRgj0oC5xEfh47QPKOfpU0fh8/dZcfSuzWJSenFDQhWzx+VBSOYj8PjO3HFWo/D6pgLyK3VZ\nc9OKduX8KCWYsmhwlwSoBqVdBjIweh7Vps3zZ7Uecq9DTApQ6XFAgCrxU6WUSgnHP0qZrgbe\ntN+0r60AySO0j4GKcLWNicAcVVOoquT1FJ/aqNCQOGp2AuCNQCCBSIqdhWYdSPJzmof7YTqD\nhqAbN0bNuQAppfOXHJya5x9ZC8E9aRtSJAKvgYpCOlMyRrnPWqzXSrkYrmZNaZWwTmmHViF2\ns3fJpgdOt0u3jrTW1JF7jPeuQm1SQMdrcVE2onYcnmjUDrX1pVb5ufSoG1tm5UYHauSbUWZe\neKibUmX7vr0oGdgdWOzkkepquNcddwz+NcvLqjbQM8d6ia+PZuKVxHV/20zKP3nPeov7W3ZO\n6uWW+LhsdaSS+KqoAOaYHRSa4VbaSQKqyas0g6nrWE10XXLHkVF9qLLjNMDZk1X94SCcYqu2\nrHccGszzMrjNV/MzyKBGs1+dxYnFV5NSMbHOWrPa4C9TVZ7vcuc5oGi62ot2bHPSoZL5mON3\nvWZNdfNnv6VA8xyWL4PpVAar6g6qcH86rrqDc5bNZM1/t71Ue4POTwaVgN7+0PU4FV5tQDKc\nNWFNdHoG4qo2oANhSdtAHRpc4XJ5qvdXm4Ag856ZrG/tZY0PzcVQuNX+QlW5P6UWA3ptSXr0\nH1qrJqW5c81y1xrWDgtk+1U28SxD5ZCVx0NUlqZtnYvqO9gQcU7+0drDD4NcHJ4qSJuCASe5\nqldeMo42YiXke/AroUbkNnph1jyWPzndVWbXtrFS+GNeWS/EWFoyd+VBwTmsl/iNBcTFYWJx\n/eP9aaizPmPWZtaPJeTG3nrWXeeJYmJG/I65LYryDVPiUsm5WkyQOqN0rh9e+Jaebsa48tQM\nls9a09myHI+gb/xlDBGoMhRM4LZrAv8Ax9CkjlZVMfTKnnNfP0nxJjWTJlMikfKC3BrGuPHj\nuzESYbOeOlaRpkOZ7vq/xG8uMqJP+BMcVyesfEBoFVRKA/3hj0+teOzeLrm43ebKWQn7veqN\n1q0ky/6xmHuckD0FbxiomEpM9K1fxwbyH727HucVyeqeJrgxuTIwJHAU9K5BtWmZdhb93np6\nVHHdq+QzsT9avYlvQ34PEFwcAyZz94nvVu51HzIfN3HGNuM1yfnImMPn2qWO63KVx8vvVcxn\ny3NdtQfG5TvI6LUw1iRlIkbco6LXPfaSm5UOG96VrjcuMlSx+anzBy6m7JqmxQOcEciqBvGO\n8kADt/jVCSXcqoSTg1G0nlo3b39qfOVylxrpl2uAS3fJqZ5zIo2j5jzms5Tl1IPDDANWI2C8\nKcn7uPenzBy2LX2gKvzDJHWlW42xqqHqflzVWPcqtn16UeYyqN33fbtSuK2pPlo9zMwBz901\nLEyvgA/eqsu5ogSdxPHNTRyAKqlcsvpQXYsMHjbAGMnnmpo8hWB4JPWqqsJXJzuOc/SpUZgx\nOcj0NArE6soG1j7k06QR5RWQ5PIxUHmHbyucnGKnVTuHzDcOlMViXcBkMc46Cp4pBvYEcgcV\nBDGGmOOvercMZMjZbrQFjY0VGfZJJxz2rckVWbg7V/WszTIAqpzuFaQjHzbyeOhzWMmapCkk\nYPpTOSQd2FzT0QrGdxyWpiW7qSJDgdRWZQocMjZARgeCe9Ee6RS278BSNGN6sxz2qSNChbt9\nKBWGRlgMtgemKXPzYC596Xy/mHfuaMfKxPBPSgBFZljII47CnbWXBBwD2pgUqN5zuH8I/nUq\nktye/egQw+nU9TTm+UgqCSe1KF2gcZ560pzuyPlNUISJepbAPem7XTIPOehp8i7iCflJ4zSS\nbgu3O455Y0rARLHtxg/PUibVUjOMc8Uix+W+8cjFPhkPIVRg9c9aBguOATkmjgKSevanuy44\nHPTNIuWkxtyKtAMbLNlVyDx9KkbmRR1VRg0u0c549KQx/NnOAKQhjKZMDBxnrT5sbvlwVXoK\nFMkqkrhB/OmIm0kMd1Idh+/5ck49Vpu12w/SPsKcqlo2HT0zTY0Masxbgce1UhWHBRI5YHhe\ncU4SGTAbr1pjjYqHbgt1xRnaxGPmpgOClVJJ4z2pFUqzAnHGTTo2I4PzL2NIuG3Fj+H9KEA5\nRtYHO1CODS8MQrnA6UIu5VA5A5x6UrOWzwCBzSJAyfdRDh8ZLdsUbi64I49aFYPhgvGKaw2r\n/vH8qAE4x0yafGoVcdKb5YwMdutOyqt83TGRQxgqFmXJyAac+NxGTjNAHXnbkUqldpHVjxTE\nRyOWLdwopFzt+7kEU/y9qk7uAKainapJ4zxQA5Wby8v0/WlkKttwvBpEDRrJgZz6mgxsPm68\nU2UOjjCqTnvRCD5jlcEYpm7bhj0706H/AFrZ4BFSABdyuPWpd23aFX+HBFRonl/d55pk0jeY\np46UASRN1Xr/ALNOX7+B8rEc1GoXGVHznvSrGdxJ+8aCWxwY8o/XPFB2oSFHbmjb8xBXtzQq\ngLyDg9KZQoy2McCjdyQyke9CqS+VG3jpRIXnU87aAFdjgleMDmms2du05JHFOXGApG4gc4pT\nhXGFyo7+lMXURossCxGMU9YWZflOPr3pi7sNt5yakjDcgHJ7c0iiN89XQelL823IU8d6WQMF\nGQTQ4bIwcr6UCGlWP3hgeuaVMt8xUAdBmkjYsGBB3Z4p8mGXJO3sPrSAVhlcE80keWAahSvk\nhScv60MpbA+4KYx27hux/nQpO0/LiiNWTOeRTtvOW6EUCHpHugwDg9moDEYzjPfHeosGJev5\nU+RRlWBycciqAUr8rNnB6inK+2HBHzDgUgztyOAf71Ah3SZL9s4NJgPjy0iktg460/aPM3E/\nLnJHrTI/m5GD9aeqnaxxubtmmIF2tubGQf0p8I4YDjAyfWofnbKhdoAyWqeNsgZ5PGTQA5VD\nxjsTzUi58snkj3prSbW7AHpRufKr1z2oGHEijafmFSeSxKsW571HtC5bGOetSO+xlI5Q96Yh\nfLMhOc4z92n/ADMuHUKnQKKjSRWyWzkU5pjDJg5Mf8qYgVSvy56c1J878AEHrmmKoLswJxjm\nkG/Ks+RF+tOwEkeS4U856ZpE80MwxtFJwrAICak80KxDAjjFADlUTMeRleKVVZZAhP1pkeDG\nxU4anYbcCGy2KABMxMVPB6/hUobnZn52GRxUcaEqXJG7Penbt0gLD6EdqoB43bFXPfmnKmdz\nhxtHaolbDEE05U2nocH0oJY7G4DZlieacp+XBHJOKbyoO35hj71NVx5a9znk0CHIHZXXbgg8\ng1IGTcA3THSmbtoLMpLnjinbcKSeuO9ADtoUYC4HWneYVwR81N8w7kAGSakGdxIAIFADljEi\nhnYA5zihV85tueQfzpjKJFBPGDUsalcN2zTGOtyyqVX7+cc1at1KqVbls81Wkf5TsODn8qsW\nbbpd27IxyfWlI0iaUMakkp8px0rShj9eD6etZ8CrwzZXNanK8qc96xaN0PYfdXGBSxEqT6dO\naRACNwOTmgMDuDHilsUTRrvyGbPFNa3cMpjPA7UkbBQFP4VLnauM4weTXRBnPNFOb+JjwM81\nWO0tgEkjmrlwmWOeVPTHrVFk2kknHaupHLqOjaTy2XGQxyaGZVX7pP0piyFV2sDkUvmhv8BT\n6CFccbjwfehn2sOc54ps2xeSxLAdKZGFkUhhg9QafkA/zA4xhgQeSRSyxnhtvyt0NO3eXlc8\n9jUU2CqknaaAI5MrGxXnHeqEiqyjPFaTKrKydTjrVDyx5JBPINMLFYY/5aD5c9qXaFyC2fUU\nSRtyM5Ddqb5myDCr8ucH1qRWJI/LX5gckcUpUs4Gdo9KTcq/Mg3DHFKskgZOB6kUiQ4UqpGT\nntUrSszEddvNNEhWaSXbgbeBQodvmHWgQfaV/u0U3a/90/lRQFz472/vgBlQBmlT/WMAuVYY\n3UK5HLMRjvSM0kowpwvc14J6rFbauFQYZRzUjYVt7fdXpUccZWQL0DDBapFQK2yQ7lJoQhkg\nDbGX7zepp8gTaiodz+vamyMki7XTBByq0Kw2hsgHpigdh8eFhc7sP/e9KSKNZHUhjGFGfqaS\nQH5Qo5zmnSBpJtxOwChADNuZVUblHH1pd0a5XATnB+lLuO8ZPyDkcd6QhXY7seuKYhNsfOBk\n9BQ8XlxiEja3XIPFN4myy/u/9mnRsXiIcbj3NAh+9dztGMIgwV/rTfNDtgQ+WO+e9IqBcHPy\nNxtY094/3cm5tuemetAwXLQrjlvT0pHKtgAYJON1MCbtqhsp94/WpmkZlxjcF9KQDSUkUddi\n8fWlZlZ14Kj+dCqBIFA2oRmmiNGXdljg49qWoBt2xlSCVY5wOKWR9qgAbo6dt+dsMDxgYNId\nwHlAgjH3vSmA5lVvmTjjnnpUQ3Zz5Z2/r9aciBlAJ+YDGRSqsgUysTvHyrt9PelqR1FON0QV\nGQ+/cUqlgXTbsQjNN2+ZbgIeV4IzTigk4DcKmMVSKQihtqNnZt6j1FGNyMpH40MSNinJfGOO\nlEsJjXIyW6Y7/WpELGw3kyH5AuN3vSIpOVPfoTQQdmAuxsfnTQpj2gcnFMNSSOTa2GGCvRqV\nf3buAdzfepNpmwoAB96I/mkyVOVGMrSuAeZ8xyeoyVp0TFkYr8pJ6UyNArEu24nke1LGpMhP\nf27UiiSKPps9eVpytGBI2dy54qFstudXyfanKoW1CHGXPSmUi1Yq8mPM2pk4FaMyhI2Vht+n\n86zYpfJKEruGMZPY1sxqbiH956dQOorI6lqZE8ciwZXoeDz1FUGVhLw3IX7pFblzGMBUOBjA\nGOlYzxOu4McHOM0aiRRCyNKxdeSKJVJXarZbGc1Oy+XGQ7c9DiqksZ27U6Z/OmMr+S5TIPGe\ncdqaqhF6kjuSKlYHaWRsbf4ahff5ikgqx5K0xIY0JmBEa47gt6VXkjLPnkkDip5GxC25ty55\nI6j2qPy28sBGDL1AJp6giBkYQsNw4OKjkT14OOfarDKQz5wpznimCPb977rUhEJXaqsv7zAq\nPdGy/OuGFWX3Kq7DtXjNM2AzMWXcKZVxq4TIz2yKZ8j4IOQ3H0pY3Pln/Z4ANGFRVGMKx9Oa\nQxqiTo2OuKa67cICXGc0/wAvdyME0xsqAM4Qnn2qhEZx5mGXPH5U9lRVR/myVyacp2sybcjq\nGpo3hl3MVQfw+tABM2duDtyODUbK20bhtXPB9aVpMnhMsefoKcWL5O7PHU9qAEXbHlXHOMn/\nAAoj+YE4KLjrikVAxLH7hGAvfd60sgaOLKdhgkUhDGHmMp5Rf50wELhSdxzUkmVjQKO2SKRu\nMOP4uMUxoYzH5sH5h0Y9Md6FzvADfeGRStCyoeffafSkMW0hgOMceooEDzNJnaegwMU0O0ca\nrkuueaVYuR0UetKVdQUUjA5zSDlHgeWQd29GP3c9KkkmZfvDgc9aiVQp3bdy4/I0PhgOPxoJ\nsW11CVJAVGw/eFaMWpYYSNwG+83vWGWHXOR0GakZxGmd2F/TNMVjamu90hUgjuoBx+tWodVa\nBQN20Y5Qc/rXNed0IY1Is4b5t2e2aQ7HUwal+73EbBnIbv8ASrkOrfNvC4JHOT1rjxMwUIH3\nDqVqws+5t0b7loA7m31ZZGQN83tnpVtdSC7iY8r0JB71wUWpMJFdeCvpVu31ppbg4bHOTntT\nFY7+HXJFdPLk+YDGCf0q3HrTR4LExNnOAc81wUOpb5DlhuzndVuPWGSTLMAnQL2+tID0CHWm\naQu3yv8A3q0rLxTuXDnLg46cV5rHqjyyAK+W9/StNdShaEGMkyKfmApWBnoVrrTMW+YY5+XP\n61bk8RNDEEjmKyKPWvO11UspLkAjkY9KcNWVflY/MeQc9qQ7noFn4qkfG6Y7887eM1v2XjaS\n3BMdyQx4Kt83H415A151aOQfQVOdSkjjAU9Rg80nqO7PcLb4leTt3nd7Kf61pWPxSdJgyzbW\nH8J5r57/ALWfYBISIxxTbfXnhb/WHZjOM5qeVD5j6lX4qSsqkXGEPJUdAa27X4sCWFWD5BHX\nP9a+SbXxRNDKdwYo/A+bircXi+ezD4dihOCucAVXKg5mz7GsfiT5jEGZAo5GGya3YfiNGSAJ\nsntXxda+PpYlxFOzn0J4H41tWvxHuXkSMyZQ9RnJ/A1Lgi1LQ+xo/iFDgE3DZ61ct/iLA0gV\np/mPP4V8jj4mzMyqtyoCDr7CpYfik8m5mnEgz8vtWfsiuc+x4/HkbNlZG24x2/OtS18YQSYU\nzLuAzz3r42s/ik+7K3O6MdVA710en/FIGTaZ9p7Nxj8TUezRSqH1rH4hGEPm8H34q+uvIifP\nJgjuOlfMFj8VIuUlnYgnI2ncK17f4obZNvmj1Cs3NT7OxXNc+kLfXEmXKMHX2NXU1gNxkD2z\nXz1a/E4RxBQxTPJZSDWxbfEiOfBW4HvuNTysdz3CPVIyxwdo781MupxkYD5NeP2/jyGZck/K\nO+eDV6DxtCwCswQHtnJqOViuerHWAenOKtJqSOow1eVW/jCLcFWdcd1Lc1fj8VRPKqLJz654\no5WXzHpsN4ki4zzT1my3X6V5/b68jdJefrWjDry7QfMzj3o1DQ7IXHXPFC3AI9TXOQ6wJRyc\nfU1ZS+VVzu4pDNvzNyg9aUNnq2Kw/wC0Pm4fC1Yh1IFsbgaGKxrhiWzRuHLEcVmrqCr8oOal\n+2Dbg8ijYdi55u0EihJj1PeqSy4XOeKU3CsCAafQC403+RSrNtHXrVNpguPWmi4DMOKQjQ88\ndKVZCO9US+WzUgkC9TQBaFztU4605Zvl561U3fKCKPMG7jrTuKxd87pzxQ14duAcVU8wjrR5\nm45ouMuC4O3rTlujjBPFUd23jqKYzbsc4xTE11NTzQsY+bJo88dqzwwZRzigSfNgHpSJNRbh\nVoNwOueazRLmk37aYzSW6DA80iXBOQKz1f0OKBKVbg0AanmblyDxR5nzDFZ3nFVxnAo84kde\nKANRZvL5zSpMJMnHNZfn+/FPF3t6CmBpNJ2HBpPM9TmqH2lmXngmk+0DgUtQNNZstwOMUrMO\noODWc10exprXm3jFAF9Xx/jS+YGUg81m/aCepxT/AD/Q896ALgGVzgA0hiRl+ZaqfaOp3dKc\ntwW6mgZZWCLbjANN+yxEH5eahW4C9aX7R70CJVs4GGCoNI2mwM33MU37Qq0v2oDkHNMBp0q2\njUnbz6dajGkws2T8o9AKnFyG5zTZLgHFICvN4fgZeCPpUS6DGFx1WtBZB1Bo8wDqaAM5vD8b\nf8sxtqtL4bVj8qKMVtmYDoxo+0KV/nTSAw/+EZSQZwBUMnhzAIUD8q6DzgpGehp2/igDnI/C\nyNEcLz601vCcbfeQGumklG3ANMjkAPJpAc5/wiw3ZVV24xtxUEvhTLZC4x2ArrVmHaleQFea\nAOMHhNd2SrM39zHFJJ4WLbcKIyOo65rs9wxjNMO3bjrQBw//AAi5O7CDHuKjbwlu4ZVH4V3o\nVPTmlyqjkAigR543hEhPmTbz1FEnhPaAqxLzzz0r0N/LYcqPpTI1XoUyM0uoHnTeFDkq65K/\n3aV/Cp4bYAOnSvQ/s6bidtKYI+6cU9Quebf8IyI2OVb67eKf/wAIvvQkRlVz1UV6M0EeNpA2\n+lCW8aqRs4oGebjwj85IjZjion8J7cjyuevIr0z7OvULzSSWqZyRkUwPL28KrI3MQz34qJvC\n65wkY6+leqLawFfljCnvTG0+BsfIB34ouB5Y3hhlDYT6gDiqsPhdeS0X6V6y+nxBTgAZqH+y\nYTjIyPagDzH/AIRkcEwnrwKlbwwBjKYPoBXp39nQbQoHHvQNPRegBH0oYHmD+FImZf3dMbwq\nofKx4OcZIr1L+y4gNxXNJ/Z8P3tv4GkB5d/wiQeQfJuIpknhOMTEmLc3vXqa6egUttx9KQ6T\nFIASNp9aYjy6TwiFxiIN70jeEyrAFBt/u16n/ZMcfQA0NpMJUngNSA8sHhtY9o8kY9MUyTwy\npJAQDHI4r1RdHQDOV/KlbSYe4BNAXPK/+EYLANtGO+BU/wDwjqtjEeR9K9NbSYGx8tH9nRox\nKAYHOKYXPNV8LhcsYx9MUreGVkxmMAfSvSjZJnO0EGo206Jhz1oEeaSeGdxPye3ApP8AhEyO\nfK4PevT10+Lb9ymf2bH34HpQB5p/wibLGAVBqRfDIXH7sV6IdMiZvapY9PhUn5e1TcDzdvDJ\nK9MtnrilXwuATkFnx6V6ObOJT93ikGnoWzgAetMZ5ufDajB5A9MVKfC6MoYjA9Mc16H/AGbD\n/CtBsYdvTBoA85Hhld2BHyfUU/8A4RNY+SfwAr0NNNQY4BPvTm0+NjkgcUCPP4/CuOSuVp58\nN+XgBcc8ivQPs8UYHy5oazikxkYoA8/Xw6MFtmOacvhstzsyK7xrSMLgLTWgVVAAoGcM3h1Y\n14TvU0fh+NVwUOfpXYrAisGZAQKm8mHHyr1NAHGf8I2Cp+Tj2py+GE28DFdn5SZPAApY4lK8\nACnYRyEfh3b/AAcewp6eHdpLEED0xXXnAXGMGk4brQO5y6eH0VM7Mj3FI3h9WXCJj8K6xdqr\ngUm8bsgYpCOZ/sBNuNgLCnpoStGA4AFdBkbsgcUpKHApgYP9gKmAozmpV0ReAVB9a3CwHbHp\nTCy7twx70n5AY/8AYkYHCYNL/Y68Ern61rGRR0ajzhn1qrD6FCPSo1HKij+ykU/d96uyOBzn\nFMkuF7HmkCIF0+PhtnNC6ehY7l3A1P5+aFukVck80hakZs0jXCrgDpT/ALOu0AAHPWmNdqDj\nOTSreIuc0gD7EvVlGKf5KN0G3AqKS6LHg1G12OR3pgWFhQN04xUmwDHGBVH7VzxyBStfgtg0\nAXmx2xxSKT0OAKz2vNvA5pDdFl64pkmkzKvejepbnkVkSXR2/wBaj+2MrA5yKBmt567scine\nd6d6yHv89BzUf9oHmkBsNNxyaZJdqoIHNZQ1DcuD1xVZ7v5sZoA3ReDgk8U1tQUtgda59rza\nDg/rUf27pg0DOh/tLaeTmlbUdvOetc39sLN1ok1Da2M5plG62oY470G+IU81zq6gepP50v8A\naG8bh0pAbLaqfXik/tD+LPFc9NqA3bc4FRfbwv3jxQI6H+0DnO7IqCbVDjiuem1A/wALcVG1\n/lgTz60yTfa+Mh3Hio5NRLdeBWGb7PIFRm8LKeMAU2Bs/wBoOo25zURuu5ODWR/aBHBXI9aa\n16pbOMUhmo15u75pReDbkngVi/awM81DNqOGC9qAN/zl3A0ySfb15rE/tAbOXxzxSPqAkOA2\nKols1JLzaxI6elH2pWXJrGk1BOOcD1qBtRRlbBI5oHc2Zro4BA4qNbvd3rH/ALS3RkMcVWW+\nVe+Paq5R3N/7WOec1CbgLyT0FYsmoBfutzUK6hu3N6+9LlA3FviwODg1H/aDHkkmudk1I7jj\ng9qWTUB5YCtk96fKQ2b7XxI9B61WOpEMT1rnJNTMfG4j3zUDapzu3c0uUDqZNXXgA4o/tDc2\nS2AO1cb/AGmGfJOcdabLrC7uXwMdc07CvY69tQEnzZ4qnNqQXODtrkJNfWMcSfL25qCTxJEx\nBMn1FPlFc6htWVmO45FRTaojYywArhrrxOiTFQw29Sax9S8cJDH8ylRn7wPar5Q5j0CbVk+Y\nBuPeqMmsfMctx6CvMb74hwrnDZB6HtmsK++IybW+Yo4HXdgUcrFzHr1x4hjjY4bI9QazLjxJ\nDkhZVA7814pf/EsQxgNN8p53LzXM6j8SiGbbciRepbOAK05Bc59A3HihY42JkAH+91rF1Dxd\nHHDjz9jdQpOM18/3nxSNzDsEgCgYLZyM1kXXxDbbtNyHbGdr9h61apkubPeb3x8I41XzeT1a\nuen+IkUfOHOWxnNeB3Hjya7kkkec8D5T61m3Xi6YZzO3mZztFWqZPMe56h8RozJIqs2wds1y\nWsfEh1jPlSAHPRjmvJ7zxIVj+aVtzHJxVGbUWkk3OeOwrVRRDdz0WT4hTJks21jx8vT61Sk8\neXXmkmTgDqp61wDXLNknKsORzwaiklEmSW+bPGKuyWxmdxP4yefMqyMJSMGsa+1ZpSXcF+3z\nVj+aPI2qfnb9KVplnZdzfKOOKBamhHOWjOOQx4J7UkczNlTzjp6VVWTyzndmP2pk052ArgAj\nJ5pk2NCO6MJBPzdjUxlbacnAPPB5rJFx0IGEximyT+W24nj9Ke5DRoLKvzCPO8cnNRecFcZU\n/N1ao55vmDD5EIGW600XI3AKcDvmncOUt5VsYIzjNSRynYAQQaoq27lhynYdxViOTcoYdG6G\npBom8wysWJwgp6sc5kIVSeBVWSQx8YxznNOYfLuPQ8hc1SKsWg/XcNvPFOZnJZBgj+dVY7gh\ngX+b+lTqyybtn3xz+FUSTQghh3Uc0/8A1beZgdztHeooWEigKMOT9w09sSMw7YwT6UBqTMBI\noI+VuppV5BJ5UmooZF2gLkqBSxsWXcOEzyDQSybcRnB+QnAqSJtq/M2GHGagjIJ68ZzUsbK0\nhJ69qtBsWITtjbKbT/eBp8Mhb733fWoVH3VBwB81WIwH+TA3tzQIeqKTu6GnqxyMrtOfzqON\nRAxDEt2qePO0K+WFMCeNSpyX59Ku2aq7DJ571QWMqoGOetamm8XC8df1ovoJI6CyI2qACF9e\n9XnUMp9ueaghhMajC+5qYyGSPJG0CudmqFyVhBbvzTPM/dvuGX6inySHCgZIPamKu2Qk8g9a\nQxisdobqM8j0qUkk44APFJLndkEbPSlVVCqrHljmglgiGORgXBxTWZ4+Su7ngCpPLXcSOW6Y\noT5QyMDj19KZQ7LHLN8rEckfypkeOMdc8Uq/Ku3nB7Gl+6gBGO+aQCsWVmJ5NNZWb5i2D/do\nDb87fmHWlYBmBA7ZzVEsbGxkyr7l9CKHUt8pJPP3qYrOw+9g55xTueAeuenrUiHrIFUADIB4\no8wLIzKD83rTtoGQQA3XHpTVUZH86pAPQL5Q3etLtKkEjaD0pJVXnPTrxT1QttLfgKoYm0rg\ndyM0mN3LE9eac0ZOMk8VHn94VxxQA7ACnBwOwpI8EFsYIp5UhgF6daYV2ISPvE8+1HQklVjt\nLHvximsSkJULgnvTsnA2gk+tKqllJcbj6imhjUQKAMnCjOKaxE2W7H+dO4kBGTjpijaFUKBg\nUxDYw64Ockfw1IFY8sApzke1NKF+QfwpWYBQB69aAHNGdp+Yc81GuG+VTgmlbbu3dT0p8bJx\nx0NMAj2sCnIxSnDAhRz2prLgtg4DVIsgXBT5hjFSKwxXUH1AHNDR71weO9Kild/GQed1OXEq\n8npSZREy7mUkZUVKQpXI+90Apu35uGwKeo2/dOaNRDY8GNg3Bz0o8srhc8dabIvmYBXvzjvS\n7WXaoPy56VSYWHMwYNsbPrS8KSB8231pBGGcqoxRI4k4xgd8UhiFSxHTBHT0p24txjBHHFIq\nl/udKcF+bGeR3oAQbmbpsUUySPcoxhivepIc8knd7UxY9rOSDtNMBylmXKjkCkDDABHzURLt\nyeVBHSlPyqGHDUhWHJlZDgHOKOWkAPC+tI2+TkttY8UiuyKBncqnkfSmASSYyMHZnmpBmRVH\nRV5xUe0ruPUOcjNOZS23JwM0alAzCPLqOtIzHg4wuOlEefMYdMHPNP8ALDtnOD2oENZyrhc4\nTFKNu4oR15BzSbfmwwyMU7ywUJXhloANp2ttyB70NHkbiccURltuGHPX2ojG5Sd27njNAAmG\nZR0B4zTvu4V13cZFG7EY4wAeR3pdwZlx27GgCNVWZhxtxyc0+RtyjH4U6TczbcY5qMAMwC8k\nGkA5eWI3ZOKRpF4OaGAVmJG0nilWMLGI2Ugk8ZpjRIvzNjGTjOaRI/mD7ucYpEUrM4A4x1pY\n9y5DDJ7LQSKQflLnHNSNtLbkbK+tRtGWIJJLD+dSrCV3IDkDkmgBsWfMJBHuKezfvPvbQaXa\nGcHGB7Uu0SI5xjFUMRXdUZDy3ZqVRJuAGGNIqjy1xy9PEbAkqO3WmA9FMin5eFp6ksu4D5h0\npsasgCA4zyc03cVJA+Y54oJH+YUjUkZGfmBp8ZG3DLhc8D1qJ2Jj+6SfepuoRm4UdaQCRuPL\n56n+GpG/eKxLAhvTtUce3aWUhuSBTpFCQqVQjnk1QEjfd+Z8AnJ20pZW6MSPehkyokAyKZkc\nDbz1qhD1UeYMHtnNPkba44yzVCAdrhRluop8bfLubrigB7g7TtHTjHrTgx2gKNmOvtTY3O0m\npFdWUIRmgGKZPkDEDHQtihS/Cryc5zTWYtlQeOwoUgnl/m7UDHMvmkhjtPWn9Iztfkd6bIBv\nQqDnqV9aRULbx0PJHtVCsKzCONQAct1NLtG0k8gdKI2OQCNwx3o5DEgcelTqFh4VpE+Y5H90\nUn3eCDt6A0gZuoGPX1qbcjLtYZBHFMBi7+CPkOcfWpTtjU8/Me1N2lVA67aXftIJ4pgKn3Bh\ncj3qUzbWGAPpUbOGX5T9Tin+Twu7lieKBWJFj3bn24Partqv7sFhg96qhPMwqjgVoKvC8e1Q\ny47l6CLzF3A9B371c3lI1Hf2qtZxBssGwqjmrUYVgxByFrPU3HjcuWHAoRlXn72aj+dxuRuA\nakyvkbnA3FqCkyTcvl8inCYfL+tRNIUyF+71qMTAZP51rEzkywsgaQgAY9apXEiySHHT3pzT\nBVwo4qs82F3cMewrrTOSQrTbZhuORimGQBtwX5c0NITtLjqPSjb5y7FO0Kc1ZncMB2J3YPem\n7jt2kZ7ZoZtysRwM0Hc2F6tjOKQDWZvMCNkD1FOVBubrjqCe9M3e1OG7ZhXzznFUOxJk5Jzj\nA5FVz+76j73NPZiwyTnPWlMbMwLEfKKBWKkylpFXlQe9Vg37tkA3Grs3zbX3ZK84AqsrKykh\ndp9fapEIqlrYYGCp4HrQ0ySMD91lFJNlYxg5z0A7e9Isi/cC5XHzH3pkko+ZT/FSiTzCQ446\n4FG3jbGR0pPLO5dx2sRQHQl+0t/f/SimYFFAj45aQLcFVGcDJpV2vMqsuCckYNRrlV3gYGd2\nPalVVbdKCwHbivnj1iRGdpA27KqegHNKwEtw2889qbJIzbcfKO5FOlbcwYHAC8GmAhYswy2d\ngxzUvB+UKC2MgmoHZFyxO5sfdpwEko3RMAmOc0xjkjTzMZPmHknsMUsf7/GDgnp6Go23JhWQ\ntxnIpyQrtCg7Qf0pAPRi8hjHReaGjVwS6nr94HpSoDs2KygDvSHa3DPg459KQmN48wZb5fX1\npyruBxgHrn2oTayBtmAARkd6El2Nyu4460yRs2Gjwx46rxSBUYDhpCPWjPmNtI29+egqVQ0K\n71GMcfWmAm9t3zEZ9qBlcgHJbgAUxtzSMGXYD/FihiYVyCDIB2P60gJljbbsC7n7rmm7Y1Vg\n2Qx6rTVkLfMS28LkstKjbedu7+LBPNABEqLGCp3EHoaazbQFVSFZu9S7d0jSKNjn+HtTOSuC\ncseB6/hTGIdki7wDjPApy7FVmXLZOAM0uxlkxKeBzsFIrLBABj7vRe9BNhy5AJGC2aa+TnHG\ne9H+sk3D5crmj7i88g9fagY7dtYAfMP7wNKrMsnue5pnCKI0j+bPTOKbwuWlBOTtULSAduKt\ny/fvS7VGSJdx7UDEalceY+efpR5fy4A2pnOO9AxGk3IX5RulPBO3MYO1T83vScCPAXdHnljT\nVD/vW3eUhPyk0IB+/wAw4x8vXHpT/M6lQAvTikaJ+QcKAOlJHMSm1Y/rQIXmNih+VGXnaKSV\nQgjDMcryv+NJ5ytu3Ej0/CiOM+crfeDdQelSNasvWqowEf3hjNbMakRqcMgAzn29axreOORi\nu4oc4z7V0CMPs4RTuC461EnY6olfUI1OGU/MRxjv71kXS7i+RwvB+tdBcNujUIv14rGn2tuJ\nHVsbfepKMub5VGxfmI6mqXl7ULMef4s1pzKyyMSOMYCenvVG6G3BcbloAz5jnG1fkqGaRzKF\nU7lP8VWrhQ0ZKAxoOhP8qrSk7oiqbOR83Y1SAjm2qw2jK4yKhfZ34UjmpnjPnS5X7x49qZHj\nO0/M69AasEJt3YKH5fVqjmjCk85B5GKf97cJDs/2aTcoVVA+m6pYETqW2NnKbcY75piqfJJL\nAEHmpWZuzDZ2Xvio2jXgpzu/SkHQarDJA4OOpHFJtPyg8kc4okXzMEtQ7fKDj95jHWqAaB5a\nsVGDjio2YttJUYApz/6yMjnseaUyushOPkHHSmMOQ5IUnjuKYMOfn4x1FS7mj3qfnB96jDfd\nUH2yehpAQlgw45A6U8He2duBjn3pocbThSCrYNKMtL1AFGoCLhVJYc9s9qBtjXaO570j7jIT\nnOPXpTmYhVHBLelAhrO7qygbttBhyoHmYHUL3FDO8e4L260pz5ZLkbsUANwfLJdjSltqrjgE\nccU1OYyMZU8GlQ/MCpGF/hNAhQreXk+n3aTAjZVX5iwyQfSg/Mztkhj0PWkZdyjcMMO/Q0xi\n4VW+8FPYGkHygDPOeWzSrt5kxuUDG3HT3pN5jVSoUjuKQDlQtG3AznmmNiOMD7xzTmBaQsx2\n98LTZNrMDuCjHIpiBmLTDdyvWiPCkqBgFuBTFyvl87j1p+1/NO7k9R7UAPDLtds9DinOSMPu\nx2UDjNRLjLgDcxH4U11LAKxyMZ47UCJ1Ow/e2jrt96sKwjJYj5fas/du6MDgcrT2mMmMDKHk\nLQBoQ3hZhkcent61M2oBWGDlT0z3/CsoyruUH92x/hqVpNxDHAfp+FAjWt7wx4fcQM/dq/Hq\nxWPMbYGecVzdu3lqfnCr3Jp0Ui7d28xq3p3pBY6qLVt8jhn+XsuOtWbXURcRHbwV6CuQjkZd\nxd8gnjFWbe5kKfe/dg/w8GmM6xb1WjBJ2kdQKswagGwpfIxkNXHTX00c4KybYyORSf2i8PIb\nj+7UPcOh2d1qG6FS2GXPQVAt0247+FxxXMQ3zFkZmYrnvVmTU2hbG7IJ60rEnTpOsnCsORSu\nzbRlsds561hLfEYyvTkgGnW2q+azJt4+8MmnYZuGRlXI+VQOmOtJHO7YZZNh7Vltqb7Rgbg3\nU+lTfaFYLtYE9MUCLq3bR71R2BY+vBqW1vGK5Lsioc8njNZscoZeGDDnkdqsfaEdVQDI/vCg\npF23vmXzZTITu5znFW7PxNNC3zMfLbtmsaRo1Rsrj29agkuFkcKF2qB0pdR3O5j8dXG5nLLg\n/KqjtUqeP7uMAxsu8nb/ALXvXACfy24OKbJMX4YkZOBiqsF7I9Vt/iVJHtUI8eOpD5Ga6DT/\nAIoCOFhJOCCeWzz7V4ZIWjABkIK9eeDTftUiSJs3MF/WnYEz6K034sBfu3LKmcFSa27X4qLu\n2/agiHplv618yLq08chdsE9Nvpmrlv4heEBvvsODu6VDiVc+p4PiEF2SJcB8+9bEXxQgjcI8\nmAR9/PGa+TovFcqfNuZR2x0qVfGtwIyGlJJzhe/1pcq6hzH2XpvxJVYwXnAi9M1tWfxKhlcK\nkhI+pFfGGm/Ei4jREnlO4DGV7e59a3NL+JqK5MsnzoMr8xxJ9Kj2a6D5mfa9j8RII8kTB2A+\n6Wz+ta8PxEhZQu87s8beRXxhpvxO+0klZdkvXZ2+ldJpfxSmhg2+dtbdkjPIFT7HqXzs+uv+\nE4Vgu6TaDzg1PD4uiz97J7YNfKf/AAtYMCYxuz0y5HNadj8VfMhDE7X6bQ2P1qfZspTXU+rb\nfxNEwB83JHVe9acPiJHClT9cmvlmx+JLbtzSbBjpuzWva/FNGUBXdj654pezL50fSy6+shwG\n2r7mnNrKMcb1x6g188WvxKSVsFnB/wBo8VqwfEiKRGTzfLUfxVLpsXOj3ePUAxGZAR9easxa\nmqtz0+teKw/EaAJGxlUf7S1esvH6Tq+5gDn5fmqORhzI9kTUkYnLZ+lKl6j5bNeVp48hjX55\nQuRVuHxgqhWEodGHTOM0co+ZHpkd+obbvxxTlukDD5q86i8XJIwAZc98N0rQTxJH/fDntg1P\nKx3R3X2pepPHapFlXGc1xsPiKOVRhtvsTVldc3LkSDHtRYV0dO1x1oVgenNc7FrHmdMmppNV\nVV+Vv1p2Yrm8lwpwCaXzB0B5rBj1SFh9859KmGoqzAh+KANjc3AB+tG48luDWWupRsQA9Tfb\nUZx83FAF/wAz5aVJCMjtVT7Qp6EY+tK1wMcECmMt+dleetL5g6VS+0q2MnFTeevTP41I7FkS\nfL0NIJAwqr5xAxnIo8wfQUxFxZN2cGhpAQM9aqCbLDFKH3AkUx2LPm/lTDIWGc5qFpunFMDE\nscHikFi4JNy5NN8w+Zmq5c7ab53B9aTCxbdi3Q4pvnHZjvVfzix6c1H57K2MU0IutKdvNH2j\nb05qm0pLc9KRm44oEXVuPmOTxR9qHIBqg0oC9DmoxId3NAGkt0acbv0PPessylG4PFL5xYna\neaANb7UVXIOaFuDwSayRM3Td0pkt0egNJD6G614gI9aRrgbeDzWKs2QMmn/aNveqEbCXBxhq\ncLzPH51i/bGHQ0kd2V560gNuS8XOR0oW7U+wrFa6zS/aAsfJoA2vP285o+3Yb5uRWM1yzKB0\nprXHOCaANtrtcZB60guF2kk81i+czGl84r1ORTA2luuOacZ93Q1hm4fHB4py3TRt1qQNr7Rl\nQCcmnLdZ6HisRr7HsSad9p2tnNAG2bjPFK10NuCawnvmWTOeKc14dwB60wNjz8rwaVrklRzW\nMLrbnmm/bGZsDrQBtNdttpVu+gPNYjXzMu386fHc7cndQBrm4IJbtS/aN4yOKxv7Q656U77c\nFGe1MZq+fSiYL3zWS18VXjvTVvm24zzQI2PODc0ouvm+Y8Vj/bAgx1NC3m7OTmkBtfaCwJ7V\nF9oOTnpWZ/aAVCM0xbwspORigRrCbbxng077RjGTWKt93oN9nJNAjb847hz8tK067cg5rB/t\nPaoFOXUFwc8cUDNz7QOKa9wOSelYralwOe1R/by3VsigRuNMCc560nnjp0rEOrFMYHHvR/aA\n+8e9AjZNx0APFPEgz71iPfqGGDkGlXVAqnjmgDaeYr9KjEvpnmskaluXrTG1AqvXFMZuecM8\ndRTmmG3J61hLqg3e9LJqgZTg80gNn7V8p44pguA3TpWKNSOzk5pBqQVT1osxo3VuNp9qbJcH\ntxWGuoFjnfxTm1QFevSnYW5u/ayOc0faBIMmufTUtzcj5akOpK2CDjHvQFjaE2R9KPtPXmsN\ntXXd6U3+0gwpWA3GuDgUrXAVc4Oawm1Lpg5NNbVOnzZ9qYG756n39KQ3GzBJ4rDOoBe+Kauo\nCQg7s/WiwG753zdc0G56jdisGTUOTtbiol1Mr3yKdhHRi6P8RxTRfBycdK53+1C5IxRHqGzv\ng+lKwzoPtR7Gn/auOTxXNTasF4BpF1MgE7s5o6gdF9qHQNSLehck9elc6upYUnGDSHVumO3X\nNAjp3vh0znioTMSv3q5v+2CvByc9KVtUV2X5+ehFHUo6D7XtXAOTSx3x5J5rnG1AjocigaoV\njIXk1QjpGugy7s1ALobuozXOR6s0nBOAKjN802SDtFSCOmF2GU/NUa3A34J965z+0McBuaX+\n0Sz5HB6VQzekvhuz2pi324kBuvrXPXOoMxCg4Heo11DbxzSJZ039pBSd2enakW+QYLNXOm++\nU5P41HJqQK/Kc9qaRR0v27cvHA9ajkvgp+9muYk1RsgAlTUZvyzc9fWiwjqf7SG0kdqZ/aO7\nvjNc0bzbzuOaZNfMy/ex70rCOq/tFVXDNVaTUgpIGDXO/wBobvvHmoJLwMuCeaAN9tUdZeOl\nC6k3mEk8Guc+2DggmlXUAsneiwjo31LcxwcACqj6keg6+tYx1IE9OKrzXrFsg0DNr+0mJKk8\n0fbm6Z4rnnv88H86EvG2nc2BTsB0X27gfNiopL73OfWsFr75eDmm/bvl5NOwG79uLNy2DVdr\n5g2Q52+lYrXnmMTuxTP7Q2qc8UrBc25dSXr1qGXUiy89KxBf7uhyKZJe4z3pcoG7/aAZVVTm\no5b7yz1yTWB9qdTx16017w7tzHmqsBv/AG4v3wAaV9SKqMGufj1BcfMxpk+oYU4GR60WA3pL\n47fX1xTVvS3fj0rnU1IvwpwfemNqO1tqkg/Wnyi5jfbUOoPFRNdnduyT7VzzalhjualbUkVd\nwenyktm+1yNpIb3wahW+3KSz4rnH1hNp/eY/GoZtaiK8N+dVygzpJNUXbgHPNVftjbiS35mu\nVk8QRKrHODWVceLF3AFvpVKJJ6C2pK1RtfL1LZNebTeMkDKhlxzyPSqM3jyGN9vmbiR94NVc\nrC56bPqClc78dqoXGuLbttLflXlV18QwjHE/mFeQuaxL74mBpBESS7c7c4JquQXOezN4hReW\ncdPXpVRvEkcXzCTKn3rwW9+JS28u/wA3CjqM5xWDdfEkssh+0YlzlQp4NPkFzn0XceMFVfmZ\nQueDms648aRMD+9XA6+lfOVx8TAzFfM804+b5uhrJuviZLcKVV2WNTyqnrT9mLnPo+4+ICLl\nBgn2P61lX3xDSG2Pz8E4znpXzvc/EiRF3CQqf4cnOPasDVPiBK7ESuUDDOQeDT9mhc1z6Pk+\nIgRvnnUx4+761l6l8TI44wWkaLnhfX0r5tPjadpl2zZwMBvT2qjJ4wnuN++ViAejHvT5EK59\nAat8RPP+cExsO27Oa56++JRl3R72IxyzdPwrw9vFV1MzIu6GQdAxqOPVpQm2RiZM565p2sFz\n068+Iknyx7mCN0x296x7rxayKzNIxfd+BriJbozNu38gfhUUl5wAZN3pmgDbufEUrPIWZvm5\nwvSs1teeR9xT939ev4VjtdHzHc8Dt9ahWZpIyC3lnr7UEmlPqVwJUKMFjxjaB/OoJriTzAGf\negO78aoCUn+LcxHajzMEp/B61aEXI7iSSR8H5j3FSrIzTMNwVdves2Nvs+Ewdz8A1LgMvJO6\nqETSYWP7wz/KmJll3NJ8q9B61Ft+XLnIpIV2qSo6noaoVixJdEbmHygeo706HK7CTklecVBI\nzeXgkbScmhm2Mc5OBnimFi4Zl3AFtvv61JbqqszMAA1Z+R5all5Y/KKnWRpCMseu3FIZK0wR\ncA5GaDI4wD+NRqreYw6AHvTVXzGYk4pisWElGc/w56VIzCdSU2gL6moIceYOMECnr8pYeVhm\n9aZNiQyEEFsrnv2qXbukK/8ALJec+tV2cKpVhkqOKdudonIIZiencUCJ48NyQXQ9RVlHO7au\nNo/SqMch5XdtGKlRhHhi3PekItqA0Z3HgHvU33l2gDPY1SMgIBK/Wplk3MhDYwefeqQkSwgr\nMDnHr6VISDIW3DnPNQSZ7cLu4qRVTacnApiJQmQN3J7EU8yEszHghecelRbiyAK2D2FTAgRE\nE44yaYDlmO4ED5e1SrNuUnYPeoCqlk252Mc04MFYkDK5qiSyjM6gqNo9MVKJEVi4G5sdqrCa\nTGRz6mpY2xGrrzzyKBEzyBlUgY9RUvzecrA7SO9RxqjOWYZHanDLg5OFB71RJbt5GW6DOBsP\nGKnDEyMrDHP6Vnqd3OC2DzWgp3SKTwvXNAEkTblcD8PetzQFXchcZPv2NYoXd8oXC5zW/osO\nNpHTriplsNHRGNlwrN9cUN8uFBGPQ02SXdjGfrSTMrR55LdsViaC5LHcp5HFC5ZeB9aaAu0e\nuOak427gMAdvWgCOHndkZ7ACj5GbLcHsKEOCccE80DBXPVs800AvJk5Oc+lSSIduEBPvTc5Y\nEcN3pVcruxkjFIBd4mjXJIfOM08rhSrsNtRs26MELjHc0q/vYmY4IHNMBjJjLJwOmKU437VJ\n2eopOduR0PNPEYVT/dpksVbcDPzYf07UrKJF+Y4wO1CqpXqfrS+WqDI5PWkIauY+QNwIwc0L\nhW4X5aTzm3HI49KEXHJ4FUIeWVjwD1pcNyS3NO+8oxxRJGGX39aYwY7uc4AGfrSKWK72UAH1\npN6lcrwOlJJGzck8jsOlAx0beY4QHINJvADjrQjdCuAQKfIwKrgdeppCFhIZSQ3Hp3pq7vLY\njgZpqq0W7AAP86kUlowGH0pjCH7xYrg46U5uVJwS3tTN74Lj6U+Obd1GzcMZpkjFypwM9M5p\nrMoj3DPPB9qkbanIb5VGSadtUhd3Cn5qBjUXyyoxyO5pyyK4bcQvPSj5ZF65GaGRTy4+Xtim\nAMR94dqFYRgseCelLsym0nb+FKFZckqCopADswRAo6nJ5pFTzGDgkL12ipFjVkVuc+lM3beA\nCuKAFkUhHYqMdeDTeFVCrdRk0rSCRSO/86VlDJgjHHFLqFyP59pI5p67vKJZcNSRqQVz9BT5\nD0yCSDTGGcKuG+fvTgohYkYweTUQ+9kgHninMrNyelAh6yFVwuck0m4qpDH8aSTbtPzbSB1p\n23bF13jP40wDPzAHihGYKW27wD0prZPUdOhpN7bh2z6UCHsknleu40xsD5WHenb+vbB4pvLQ\nszN82eBQMHjYsHB3J0BpWjKyZzweOKa2VjwvTFKMsqknLdlpADNnOOg4p6/MBnj60qqFB3DB\n7ihkHlgk5z0WmMTzN0gOOOlKFwAP4s9zTPJ3MpzhvSpAu3OOSeM0aiEyQc5zg8CnxAiQlhj2\nqPCqSW521L5gUrjqwzmgA55yfmz0HSoowZJGXPyr2qQFY8tnevqKj4UFlPLGkMe21os9TnrQ\nI/mJB5ApYGCnAxjPINOEbMHAXHpTEHzOoB4x3pU4BwArdzS7MAemOc9c1FJlTk5GO9IBY/Ma\nNj1APWnqrSEEncV9ajkYowCtkHk09Iy5yvDHgCmAbzuOPl56il5ibAPvSKvln5RlgcHnvTWJ\nMnqT1zQMm2uG3joaeGZIn2rnPOajXBc55XoB71JLuWP0HcUxArFmJHRaVpPMXBBQUzeNwOMJ\njNOPEi5+YGjUB64G1ivQdqlhkViBuwOpFQrksVU8AULGqxlzwxOKpATS/K4HUE8fSldS4DIO\n+Kjb93ED1Yc+2Ke0uQAMfN2qrCF+X7uSWpy8qVwSO9Nb93gKuMVJwBgPk4yamwCIoTIAyAOM\nUsXmbSCcDqAaQo0bLt/iHFHmN95gPl4Ipk6kqSFe+cjBFKu1eV49fWo4d7jevTNSld10RuDH\nGSopgGfJQsDnPp1pIcswGMZ9aULgsRzntSxw/KWbqeM0DHfe3Efdzim79pATr3pVyvf5R60q\nyKJCVHPrQIljO5wSMrjA+tRsFl4AwynrUisWTb93HNPi3IxwoJI5qgGZLEZOGped4UnHvTNu\n7ljg5pVzuCk+9Aydo2VWGQ3HFMX93GCRk96jcOOeoJpzkxwhgmTQA+MCSQc4HenrD97B796g\nXbu3jj1qZmKsrKccdaYmG5i+DwKkHAIbB9KZ8zASE5qbCv8APnbtpE3Ygm3R7QMHvUkPzNtA\n3FRkk1FCu5STw2acEZehwe+O9BRehYLkkYzVpJB5YA6gjIqnE6twPugc1ZgZQ2cYGOtJmkTT\nhkXyzsGB3xTxcMJMZ2g1R3eWnyttU804uGxjlsVmUWGl3Mw3c0/7UeQOVXjms/zNu4n7woZy\nN7fw57UBcvvcdB049ahkmYxndwaqNtb5g2TjgUiuZIlLLhiOlaxM5GhGT5YHqKjiOC3GarrJ\n+8GCc46VLHuYEk4Ga6ImJIZPM4YEY74pu0s5xyvYg0hkWPcrZYUKNq4U7Qea1IHthU+7nHam\n+azM3G3jim72XcQQ3GME06Py2Yb89KBPQTc8cZIG5c4pVmTeB0PpTeV3ID8jHdinIu0cKM0b\nDHNjBPH0pkmWYFRkEYNOPzOfl2tjmkRTjaDwORQBDcAqRjhcc1WaQt1GRnpVmRTIMAc5znNQ\nSZ805+6tMljUk27iBxnpSNluBx9KbG+NyydDyKFU+YnzkjpQIeccKvysBRukaQFug60nMchD\n8c9RUiRjjJzjnPrTEP8A3XvRS7/9n9KKLCPjcMqsvqeMU1WK79/BU8ccCnKwyTtDKBmkZVjh\ncMWIJzwK+bPWsSK3yyGQhlxgECmSbjGqoAR33HFOEilQGJVSeCFpjL5c+M+YD/EKdwHbgzBf\nwwB1ojVDuUPsVT09/SpNuC2CFPvxUSyDzPlA45OKBDkBRl+fd3x/Sl3eX99Sx3H5R1qNSQT5\nbbUbnbUvzZAQZagLirCN24nMZP5VGy+WxXqQcg+opH4kQbcgHJFT3DBk+7wDkmgerGqu3JDZ\nBGTilI8uQNjcrDAx70m7y8EDee7U1m8uQOoLL6elMVhJA02VbAxx71JCv7wEnIx0pG2AhD8x\nb5iaNgZSQ+OwUigBAxjc7+eeOaSSFVPJwG5I9KSJRjEq55wMU8oem35lbv6UeYC8x7TGuOOG\nFNkcbVjxl2PJ9KMhd+CGHqP5UsZZVyeTjt1pgLId9wUjlwAOfamNtkVQPvK2eT+tEkhVldYs\nN0LihnBYqI/vDJPagB8jMJFIw2T949KTMigk4JLYJ7fhToyTHvx04yeooZi0gViCCucgcUr6\ngOb5GJLAnsBTVUZXB4Y8tTVkUDOd23n3pGbdHtIwCdwANMByxkOxGZCD94jtQWG7djjso9ab\nGzTZKgoqjjJ64pyuSQW4U9B70AEkbR4AIDH5j7e1CkrGXzls8H0qR03KGPyDPIqPhlY9UHQC\ngXUFMpHPr17VJISy4K5IOabHI3l7Rxnk5pyMVPznb9OtLYBf3kmHB2gnnFIpPVCQN3U96YpL\nKjrlQxxtp0sbIrK3ODnZSAV1duAFDt1VqWNn3ZO0beDigfMwz1zyDTZNiybEJKmkxrc1beOK\nREfrk8it2xRHiA24APLe1YFrIsJQbM8ciul0qIm3ZO7c4zzWMjqjqytMJWbj5I1/WszULchw\n6/Lu710F+iLIFzgsOayr6FvJ3qMAUizGJXydpJB71n3EaIuQSy56CtK5SPzURHGMcjNUJN7L\nKioMjp9KoCgz+YShb5VOcVD9nlkYnb8n3uTVhowjp8uWZc/jTZlK4zzGecZpoCqqtIxlXBHT\nnvUEgJi8zuzbT7VakjAYEkqp6exquylW2pyv3sNVkkEkfzKP4lGfwqPzAySl2ynbFSNMQylv\nvYPWo/OIXBGw/SkAoUKqnGdwx9Ka25W2p8oIwaftZkEf3jnIb+lMbEe1mGQT070IYix/Kytx\nt/i7mowpDE99tTODtOVOwnt1FMVmCl8fJ0x3PvTQiMJJtBwBx1/rUbKyrlny/YetSXLMFB5I\nOBxTJNnTO3jrQxgVzEVcfMeRj1oaPMYHU9/XNNGGk8xwVAXgZpy7WVSWI5zmmMawPG0AsvcU\nMm0g4znrTgVy21cjNRthVzu5oAVsNwDgE4NNKhWwucjgMae2ABj05NL5crZAcMoGQf6UmSyK\nXJQLgBs5OeM06VS2GHT605izqDIoJH61E2dpHQg5C0xiyK4YbYwynquccd6jLIvymP5DwOaf\ntMxXa3I5INO8zuF35OKBjWjfai4G0HjaaJvMVirYAXkE9/amog+dVctzkkjFNJRV3O5f296B\nMeZP3eScA9cfypQF+YKflx3FMRhypXjGcUshdo1UYI/ipCDdmMKevb2pjMG/hA7FvSpG3LtL\nfdpkv7wHjyyO3rQAi9Ubh9v6inBRJuf7pJ+9npTTIrIy7fLbpn2pZFDRooDf7Q9R60xjTkqQ\nPl5z9afuVU5XaT0GaGK/f5jA4H+FJtw/C8Htn9aBCeWPN8wKFGMGmkMsfzHqe3anR4ERKAls\n45oztblfnal1ENXd5y8BhjljSNtjyzNliei0qr8u1jz3NHy7SvAPrQAu47A2Op4FOZ1dwSn3\nRikbnABPTpSSSCNdxXcDwM+tHQY/zBu7gjpUsdyqqVyQT1aodpVhnG3HUfypnmFcq4G36UAT\n+bu2hW3L6mnlz1B+ZeQ1Vt0YIAHbpTFYruO3J7CmBdaV5ZFbdtjHJ+tWIbt+GKgjdWZllzlt\nxI/KlRn/AI+uKCDXa78pXHKkmlgvNj7jnG3Ge9ZdvIXUZk3Z7Gpdw6t+7A6UDNdr5ty8/u8V\nNHffL8p4JzWEkgHzAkBu5NSeZ5Z2rxmkBvQ3kkMgYPlm46VJ9sMcyfNnPJUVifaTIVG/j+tN\nacxqB1YdCetAHQzajIgHQ45psN8siEsMnNYf2hzyGwDwamgbEeGPXjP9aBms822RlPB7c05p\niyj5s46/Wsw3KSbgD93ika6bKkcKOtMDYSZj+AyWPWhrkDDKcnOSDxWN9uaFcBshu9PNxuKk\nHa57HkUDNpZhuMrYCkfWnRXCybgAG+XI9qxvtRkkC7gF6EGpPN+YhDgH5c+1AkaRuxtjTGSW\n69qnVUWRhySf4j2FYxmVVCofu5FO+0SttDSHpjFAGq0sZ/3c4zTmuE8weoHDCspZvMYKvyqO\noNEdxtk3g8Z20Dubq3TQ7nV2J65B6VYh190ZcBskYLk9aw1uOMD5EXk09pAFGP4jkCkBvw+I\nblUZEdw2fXpU/wDwml9tWMybo17N3rlo5JNzsWY49sU+KQMuRxn1NGojuLbxzcMEQlYx1O0k\nfrWlH49uDtCu0exs7c5B/GvNprweX0yenNC3Xl4PLHGMA0WGj1a2+Jl3CpIlJLnJCnpWjH8U\npFZFe7PzHBBNeL/apomGBmPGOKsLIo2yE8encUwZ77p/xUMaEefsQHHmN3+grWtfisGICzZU\nHqp5+tfOTai6r5YOF6/Wlj1SYcCUoe/bilZAfT//AAtIsP8Aj5aQevpWtp/xUaRMeaNqnAy4\nya+W4ddNqpCu/TPByKmh8USSg7gcZz1pWK5j6wj+JKJMuJsbvfGK3Lb4nLEwHbH3gevvXx7a\n+MLhMu5OQfl3HtWnD44vHi+Z/wBe1Q43JcmfYlv8QlkbiZTuGdobn61qWvxIto4wXu1LdyzY\n/CvjK1+IVxbyIwJn29g2DWzD8TJ9uGwr53BmPb0pezRXMfYNv8TVlbEd0F9zxVv/AIWBGy7T\nKOTgMrZOfpXx5D8T1WNhLdbSTx361PF8UljZf3zCPON/vS9mPmPsmPx8sALPLtxxjNTx/ESN\nAAX75JzXx0vxQeNWH2lsZztLZ/GrEfxT3KH85pAvYGl7ND5z7Lt/iNHMSRIjEdl9PWr0PjyC\nbAV1APvz+VfGdr8XJd4SH5wwyecAfU1qWPxZLcicmbodp4/Ol7KIKp1PseHxZbsvMwU+7VcH\niqFlAMylD3Br5AtfiqG5djtX+HcT+ua2LX4rRALtnKkc7Caj2XZlc9z6sHiKIrtS4XP1qaPx\nImBmVSor5ft/ipGdx89S2N24nA+lXbf4opJtxcHdjO0nFT7Jj5rH0z/wkEUqYEwGKsR62jAD\nzFYeua+cofiePvNIVB44NWh8UFiZRneB6NR7Ji5z6IXXo88YIHvViPWEkXIwB9eK+fLf4mFv\n3gkCjsp6Vp2fxG3sAs46Zb0q/ZsrnPc/7QDZOQR9aVNQjVQSevvXkEPxEVsBZeM4arKePrWR\nSGuOc8DHNRysfMj1xbxCDg/rTWuUPQ/WvL4vH0W77zIvqTViDxxE2cyZOfXip5R8yPS/tS7c\n5pjTfNnPNcND4vtpgu2XJ9M1ci8U23OZBv8A7pNTyhdnZIwZaG4HWuTXxQnLh1x/dzTj4i8z\npKGP90dqdmPmOocjZ1yaTcO1c1/bgYDD8VIuu7B8zgY9aVmI3pVCjg1Hzxg4rGHiFGYMx+Xv\nzViPWomw2PpUu49DQZWAyOtM25Urnn3quuqoXKscUxr2I5Af5qTAlaQr0ycVBLeOWBHGKa0w\nHJPUVCSitgnnrVCLa3xPBGTTvtu1elUfNQDrzTjPGFxnBpFFr7YxUntT47wKMtyKzTdKowDn\n2oSUdf4aaA1ftw9yaX7QG5zg1l/bI4/4x+dI18md2Rj60CsbC3JX5uq0NcZzg896yPtqMAzH\n5frTFvR5m3dimFjVW4LNgnApxuPLbrWU10u3l+/FDXAbnfk0Aa7XAbB4FNmvAuD0FYv2w9jn\nFC3BbIY0AbTXAXndmka+3cA81j/auOv0pGuNvzdDTsBsrdHGD1p3nMvzZrGe9+Uc8+1Au2Vc\n5yKANuO43EjPNIZjzk1hi+2tnOCaX7a/Q9D3pCNf7V1B6UNqAZcY4FYz3ynCE0guVjzzTA2o\n7wt8pOKma7XrnFYX27au7GaRrzcoOKQGzNdgMNufWo471wxJ71jNeFcEnBpTfYxnjPegTNr7\nXuXB60LfiPK9qxJLz5uDmmLffLhgaoZtveDBOefSkW8LKe1ZEl0Am4HcKh+2NtzSsI2JLsHg\nc4o+2EjgVki6Xy8980n2pVyM5NIRrrdbfvc+nNQ/bz5megrKN0W6cHNL9qVmyKYja+2hlyxw\nfT1prXwZio4GKxZrzK7lPFNW73ZJ54pgbJuiq5zmlGoHPPQ1hi4LZwTinC8ODx+VFkOxtm85\n46U37ZuDDNYovBtzmmteDbndigLGwL5o2py3w6Z5rB89yuQ2RTftgVsjk0FG8NQIfaOaQ3rZ\n5PFYj3m5euD7U17jCgAk4piN1dQAGCee1M+3blINYn2pV5zz6UgvfL5x1pC5Ta+3kY9Ke12M\ndcd6xPtw8s56movtTDJJOPekUbv21W70f2htbrxWCt4ucg03+0A3BPNVYlm62oHsaVb4yMCR\nisT7aF4JqNrw9iSKQjfnuj1DUpvPlyDgVzv2zuTzUn2zcoG6lYdja+1+jZpDqG3OOvesE3m1\nsBsGhrjqc5ppAbral025BppvGaT71c82pFcDJNAvw0g+agR0P2w/j7037WNud1YTXgx9+mrd\nGME54osBttdFpAASVpXvAvHOPrWF/aBz1/Kj7aJCATzmgZuR3Qw3z1F9oLNkHisa4uinCmmR\n6lmMjPFLYDoWvvlwD0FAusx5DYzXNtfM3Q1K19lfvUa7gbzXIXnPFRNfhVJB9uKw1vMrgtTJ\nLzt2osxo22v9rAikk1A5wD+NYH23b1OVpy3o2nDCqsxNmy14e7GkW+9WrE+2dSelRm8LEnPF\nOzFa5vNeNtIZsCo/tix85wKxmvguMnd+NRTXe7HofSmBvPfq3zZpn27cMbq583xXPGVFNTUF\nb5s4osI25tQbfgDjvQt8OFLGsH+0v3m0tgd6j/tJTIVVuaXKFzopLzoCcVBJqHUAcetY39ob\nuepqNrsEHLYHpRyhzG19sKc5ytBu2lb5eK559UCnlvl9KT+1wvU4HtT5RXOg+1MvOeO9RnUO\npz9K52TWk+VS3BNRLrMO8/OABRyj5jfa83ZJOR6UfbRIvTArmptYRslXGz69aqR+IIxld+Dn\nGM00ieY6pbzb0bipPtJdM1yLeIYtrKp5HvTH8UIFA3Fccmq5Q5jqZbzywOc81DLqJ78Vx0vi\nmHa2ZVJ7c9ayrrxlHztkwQOhp8r7C5kehtqUawjBGe/tUK6mh/j2n0zXl9540XytolUBuuTW\nc3jeGFSvnhZOvJzmqUHYXtEet/2wqZbfnFQvq6PgM+0HnOa8gPj8TZCzYXu2ar3Hj5FGFk3E\njGW6Ucltx8yZ7DJrkXlkA52+9VZfEyLtwdw6YrxbUPHwgwHmAbGflNZdx8QxBuJkJyNwUHNU\nqYuY9wn8WRw9Ths9AelQN4sVmLLKq5/vGvny4+IyspO/azddzVQn+JEW3K3G/HGAeRV8hPMe\n/wAnjqLD/MCFyAxNZzeOlVCJLnAboV6V4BP8R45oysbHd79Kw7r4iy8lJlB6eWOlPkuQ5H0H\neeOoWYgXK+YOqKeapzfEKI5xOQoHQtivmvUPH020ssqxuTjjrWNceNLi7UqST6EHvVqCI5mf\nSN18SUVTtnYE8fN2rmL34lKsyEzmQZ4Ga8EuPF1xCjM05PYgHoazLzXZ5m3s24dRg9KrkQrn\nuWofE5opC3mnk5C7qyb74kv9nYxt83fJxivGJNSdoWIbnGeaqWuoTspMjs5IyGzTtYZ6pN8R\nnlYMMo3dkbIPtWTq3j57hUCTMJSfvjt6CuDa8kVdmcEnPA61Cztzn8qVhnT3PiW73OTM24jB\nZj1+lVj4mk3ASA4UYVlPOawXm87azkBAMe9J5giBZzgelOwWNn+3LmSQsjCPHJY9T+FNj1i4\nG/ziQ0nO7tisVSjcljmpNzSEbnJPagnU0PtbSKTkls4HNNVtj5k+YDjBPeqKzhnOOo65o+0L\nJJj+HvzUjLv2jdGythAehUd6g5xkuPl6tVeSf+6M7ecik3qx3ls7uSP6UgLMknmMjnkY/MVN\nHdJuRiMDp+FUDIFZWZsdMD0HpTVmPmSMrfKTjFIDS+0eXIVyFJ52+o7VWa6w2WGR6elVmfy2\n2hS3pmoWzkAcrnn60FFtpty4wOtQSSs3GOPWmYBkClcY5+tG8s3+wPTsadhMf5g89Bt+bGeK\nieZmVQRjHYUqsSTsKl89zQnzSFABnGCfWqESJI7LuySe3tTuWUvv59BUCkq2OgHFTRbmJygA\nIwDTAcGYMVPzfWnRu0kmOBxjNQLl/NTdiRT17Uu0SRHjAHfvTQidVZsZOdvANPYBl+9hsfe9\nfaobckqQjYX3pyttT726gYqyfMB3AqXzCy8nAPWoo0YOdwzgZOKVY1aM8ctyCKaETxyAHaCT\nxRu4BHUH5qYo/wBHVMjOefWnt8ybVXkHFAkKreZMWVuPSrPnNJIO+B1qttypG3C9z71J93BB\n2gDPFMGKkm+RiOp4wakjTmTIw55FMj/4+GbOeME+9PWPduUttPXdQQP8tdqOTwO1S7N2ZMbV\nPHNNMP7kHI69KcVbgYLFaYEkg+UDbz2NTx/MudwB6c+tVtzspwMtjFWSisqhhtYL+tMQ/kqq\nscMDzUjMApwMjGarnDbQcl9u6pIW/eZ9ulMkfHJlsn7nTHepF3DqMDPB9BUCt5cbhh95s59K\nmVhu+cblIxTQE6ncpYnco9Kcsm5sqDVaORVj244zx/jU3nfMONoHX396oknXcuF28saemI87\nOv8AF6ZqGKYOpkUMwHAqaPLQjcOScnBouIkT94q7WIOeRUx+Ucls+lMyAyEDJJxtFTnKs3pn\niqJHRSS7R05q8gDAbuMCqSnnIOwY5z0q0qiRRg5HrTAvQybsYGSOpre0V9sZ2nqefauetW6K\nGz611GlwjZwflIrNlGihYdRuWnRyfeCjtSJlGAJwuKd5a4JGayLBdy4Zlpd2VwehPNPZhuXk\n4I9KYyjp0P8AdoAXy8R4B+lNkPQAYpfm8vj8qaPuD1zQAsTbZAzdPSl8zbkjjJ4qPnLN6fzp\n8aBmVmJ245FAAAzMMn5R+VSrjzDsAVR19DSFfJbP3gRkUM4Mee3egQkfzKSMEZ/KkWRdxB42\n0KwUlVXrTi2GAxgY59aBC9GB5OafgnngVGyk7ccgdKRj8uM7TmqANu47d2Cal8ndtG7Pt3pk\nluVZcsGVutCsEYEfw8EUwJVk3sE7jvTPMYs3dakMgP3VzTNo2EdD3FAhhwjjZznrUnnbVHyE\nU6PaIznAPZqRlbBBfccUwEWP5gwIyedtGfmP9w+tKuQOV6DinRx8EN0J4NIAIEeP4s96VVbb\n70KflLfwrwTRu34I6diKYAqsMnPyelG/zFDDgg8UuGVSzHvgChR8+0YOOtAxsXzMWZc9Sadl\nnQDb1/lSnO4gDApVbnGcAU2AKVkB2DCrxQ3C9OlOAVYiAMt1pvOAwHOeRS1AcJM7d5yzDIFP\nx93kY70xdxVf4SBzStt4A5pAOhaTaQMbs8fSms3mH+dNbJwF4PrT+S2D0UVQEfHmLgc0uWdy\nWGRmnLHv+Zvl9qd90dOOtAhrt/D0PUUquNpBOSetB4YE8Z6UqkLuyozQMauS5UjAxwaWPO7J\nOQM8Uir+8GTlcZ5pdq/MQOcU9BCctuwueKFkbGc5b1FPRjnA44waaw8vI4Uevagdhqs8ylCe\nOo+tIq7VznJHWkj/ANXwec1KuNpBGc9aQEYZmUnGW6inB/LXGPmNKynk42jHAo2qdvrjpQIZ\nz5ZA4p7KQoKc4FC7yWzlFxwD61HFu28rjtzQBLGzM3zDg8fShoxI5G48elKq8HP5ClZivQHB\nHSgdhrDO7HQDrQzBGTJyMU3eH4ClR/Olx6jac9KYCrIRnA3Z7Gnfw8gD+lNky2TjjpxT/LG0\nIBk9s0CCMk5GPk9Kc2MfKOcUnzFic42jBFGSsi7RncKQCqsfD9GHWhXk8wlWyM8UxZPvoV+p\nojkR9u0420xEq7Axdm570m5mXk/KDnNNOF3EfdbmnRybsqR9KLDGx4fqvfOTUn3W3MMHtTV+\n8STn1pw+deDu570xjPJ2yZ6jrTgp64zz0pfL2tknhu1KsZ3b1O0ZpCF2gqWHX09KfMjFVXOW\nYYNLtaNiB827nNN4bJOc4xn0oAdbqN5B5VRjmnD5i275OOKYrM0WOnt3pW+VA/J7ZqgD5sZH\nG3qPWplgVmPmnjGdoPQU1W2xqV+7nPSlVTHLI2R+86/4UxAq53gfdzx7injAkXau4Y601cop\nZuh/SgMIenzA9MUADRgNlT3ycmkEiTH0APJqQbVIGMtTlEcYZOjHkmkMVmGE546Uu0Lle3So\nskZZxuLcipTGNqZ5PU1ZI2BdxKqcRrzj3p6qgwDwepNOVdpOGwD0FH5Y702AsjhvmLYYYAPr\nT3ZzgAfLSN8xBIGAOBRGxRic4GNxqRCnCwuSc+1Iq7Yyp4b1oDM53KdoYcg1LtDNux0/WrsA\n3jaPmyRUsc25sj7uOtMb5uCuBTmbDbSRux0xSGM+++acsg78npSHJbPQdKAqhsHj0NMQ7ndt\nJxzT+d2GPFEW12Lk8rwM0i7D1yznmpGP8tNvydQe9KpMgY9FqJVXb3qRmSNcYyD19qoXUViU\njVSOp4qSJQrMHwVxwKj2hVjXdliaevy7zjJBoCw9U3MuDkU+Fv3rlugFNVd0eelIQFjKj6n/\nAApgiSPEONz4Lfyq7C23dg7xjg1TTZMy7l6DoakhUqrqp5PrUs1SLYZn2scYPal3F5jxt2io\n8blUsMhaljlBXj8akBNpDdc7qVVPzKRx3qM43g5+bsKfKD1BwO9BIzaAFGSDmpSQGGznHala\nEMoG7DetM8sxsQOSB1FVEmSZLGFZjk49xUrfK/LZHamQ5WPnjjmnnG1c4Jrcy23FbMP3hkN6\nU7KHg54FMk3b1G75T3pis+GLdc8VabExQdsuQOKe24ZY9e1NXcyk556nNDSLtKscErxWnQgM\n8gt6U7c+5AOOajBJVRg5xzmnyK3BU5FLcY+TMcjd2oUnbt69yajCt5Z9QetOVWVSScenpTAA\nyDLAHPY1GyKsZkb7rDHvT1V2RiThRTFUSQjJOzOdtMgrKpbAAyPekmmWJGJGPQe9SyJsZwWy\nRzj0qGRfNzuOABwTSAfGu7YcYLAZJp6udxVuecAimNJsjbb85Uc035VVW2nmmBP5cnpRSecv\nv+dFUB8db/L4I2AjHSn52tg85oWRd4Rx2yTTPL753Hdxj0r5o9Ue4Eq5I27Rzk9aQhX2Hbhe\nvBpOuRnOecH1pqxl12qM7fmJoEwkzMdwGOfTNO2h2wcKfRaRSfOj2fePJ7DHepEBh3FiAhbh\nsZ/CgLDVVW3fKEI6nFSBmCqFG054ej5hJhjjvSNI237u7nAx2pjGxybvmXDH0NNG5lfB7dac\nsSLERIdrjnPrSbAkYVjknncDzigY6JwYwNvzEcntSrujUtlQPelj+YlQwApv3sgjK9MmnckG\nMjLvZQF/vU1WHl7mbkml+ZozE456+wpvk7YyQQUU/douIk8xwBjhs9KSZhJLuJz/ALOKUtiM\n52lyfpTG+Vfmyfp2pXKF4fCqu055FPkWQSKchgP4ajEh+UA7/cdqc2W9QScUEj2mVWO7jjiP\nHemxsfL2t8rHkihmZcbsMq9eKFXcrODuVvfpVAKuVXCE4zyxpIlZQW4LA8UjfMgG7jpilIZg\nAQVQH7wqeoBuKqcpudu4pm3bIoHLY/ClMi4IVjkHjFL8xBfb97j2qgJGUPIqBwT1Hpmo4+IS\nzJzu6e9I8LviIBRtGRtNPHyqpUbgoxQA5QtwwHKt33dKYpZWcBlC5xwODT2zJHkDLUkeYWJX\n5if4aQgcKcDdhh19qFUrMsjNtA6e9N+Rcx7eCcn2NOUtJIcDCAd6TGOVtoZido5wtOSMSQgB\nST15NMdH5BbccUxcGNGALDOOtAEjMzRyFgoC0qttRMYx94ZqMAr5is2B9OKeVdTGWU7cdcUi\nkatkwmkVlxgde9dVYoPJyQ2c53e3pXF2DGCZTkbGPSu60ZkktZCPn5wBnpWUzeAy+I2g7Tt+\nlZcjLcNhslOlampM/lgNlQePrWLcQNF91wB/d9alFmXqdvHDJuRMkHr61SWP5tyvk55Hp7Vp\n3EbtHuPArNmVcHC4JPUdc0xlW6UJs+bazdPpVKSRd4I+YEcr2FXZIUwrt1X5ev61UkU7vmTa\nOinPUetNCI1Xg4+v0qGbMcbbSvXO2pdxUED5ccZzVWRQGI2bEziqAjkXcpBj27uSCaavmDDb\nFwvc1LIjt1BZR1IqLB6Yb12n0oBDQsS7pDN1PC+9RCRVwvVs5JxUskce0L9xs5G4dqcy4Odm\nVxge5pjK75bcN5bdwVFO5UFiwwq7Qv8AWlUlN4GBxtYGoZFCKuM7QMetMQkeSi5XOBnnpSSf\nvV9C3b0p8m1sbVbH93p+NDLvbpg4oGRbTGuB9xT3pi8sd7BYycg1Jt3A7TtbHQ96au5l/wBW\nCR39PamIGkV5Pk+Qkfe/xokxwrfMRzxR5x42ptXoVprZjZgExxwBzQIRfmmBJwuP1pWOzkDH\nqBSIPkBPX+770KA0uSxz3pdRDCrbhzuQnn2pWbaFbIJ7Cj+FmBwpP3afHGHbldu0UxjS/wAx\nym3jkrTIo22M3UcnAqRVMcnuRTFyrMuOf50D1Ht5ciKUUrJ3Y1GzLGpdxnBxjHelXEcfQnnp\n3FAkKqSyeaM9BSASOQEMwUbiuMU3D8BhnPGRTisRA2Z83qQabIu35hkL3FMQrSOW2Kc4OCDS\nM2zdn5UJwAehP1pfLwu4NuLc0m1plEa4wejN60AxNzlegIf5h7Uu70BDHqaaxJZeuSM4x0oY\n7WBIz6AUDFaQtyV9qCqt8gO1cZ3e9N3SK2R94c9KVt0q5GPXdTAVfmI5yR3+lMZ1mYkHPOOe\n1KrNsBBye9N8wFh8uMdcdKQhY15ZflOO/ekCgsc8D1pehyqj60iqPMADbsnk0AOZieByf7wo\nRQvU5xzg0gITepBPPHNJ8iqpA+bNIdhAVjHmBS3PC/1pzI3lMG6HncaFyz7Quw9Qe1I3zfKx\n3c/h9aBDZCV2qVxx1pOMAhgy5x6YqTJkbbg8cUhTcDwAoOeaYDNv7oY+9nmmP87EjsMe9Pk2\nrsZAQc4pr4+h3cmgmw8R7Y1VTlhyTUgzl2lbcmKiZjygbGe9OBYgqWUcdaCiRX3xDjK9vejz\nzHJk/Mfem7hGSGPygY2jvTN21cOB/hSJJ1kCt06nOKe0g8xm3cVVEzFjxlAOKaHXawZtoz36\n0xl1bgK3AJ96ctwYt3eQ1T8xcplsDjA9aGmCu5KndTEXcMo3E7ieuKal0EyOevBNVPtAVfkO\nexpqt5eS3ze1MDTadGXzG+8OCR0pYJlZyQMJjgk96zHnMmB90Y6DpR5gwowSw6NTC5qNKuFL\nNjB5arMalm+YYz0GayUm3A/3upq3a3G2YDeDxSH0NTykhUc/MOajmmaMhhgk+9QrJ5jMzBvb\nFOu7jbECoyxGOaSJEkmBkUksfXHSnrOI2LbRt9M81mR3ClSuctnjmpFugvUHPTJo6jRtpefu\n2XZuGe1DXTTY2/dX1rJW627udhB60qXm6EsO56UDNX7QVYM5JyOxouJY1Zdr8N1rMS48vcS2\nWHQGhJFxnI9TRYDUWdNp2nO2nC8KxgkgsT6Vkm4VSdp5/nT4boMSdntSA11kWNWVjg/eqOC5\nLOS/3e3pVL7QG+VuXxTWuAsKgvjnmgDVaUMwOcY5z6U1pk+ZpOSBkAd6zUuOuTuj9ab9qDZG\nfof6U0I2IZA0OU3HB6e1Pa6Vyy427TnHtWVDdOshOcKVwQO1P85NpYj952B61IGq96jYfH7v\nHAqxDqKPH8p2nsGrC88cKPmGM+wpftQZioGW9aYjVa5aZS+795nHHFTSXEj7Czknr8prFEje\nZymOMZFSfbCjBVGc8DJoC5ptdEElmyTyFPpT/tc20bW4xkZ7VlNeDht2EJ5GO1SfaG3cD5Se\nFzzikBoR308bA7tzHq3pSS6lMrnY7E99pqos6ZZR0qN2CgbWOaaKL326X5S7uuOwatFdenbY\nI9ykdSKwRP8AvFQ/MWqVZljzuyyA4O2hjN+TxZcQufLfcOxB71JH40uUkUNO8rYyR0rnZWHG\n0g9+naoZHBPyKfep6gduvja6bg3Lhup71csfHN3HJkuZAGxnPb1rz0SMqkg4XuKnkuG8tNjf\nK3WquxHp0fxFCyFfNZhnn5uKtR/EJhI4+1tjHGen515YAfL3Njfjpmlmnby1wM54NAHrI+KU\nm5EL4RvlMinIrbs/iRJDINlxvKjGeMV4SvmQ8I5bnP0qzFqjbsH5FWkFz6GsvitLCw+aPcB1\nyc/WrR+LLSR7nlbcDw64r5z/ALelhUuNwU8ZFNXWpduFJf03GloM+jn+LTSsoluct/eLYq/F\n8Xmh2L58eD/dbJ/KvmObVpZE2l9r9SGptvqzxuHaQhW4LKaYz680/wCKrMoeKYKO7Hj9K2IP\nisrqxEmdvRR/jXyHa+KLmPBjckDgMTwa0D46uCwRSFTqdrd6OVC1PrP/AIWmUw1wXC44A/8A\nrUJ8XFjfdDcZPTGSP518kr43uFkO7cwHIbfnHtinL44uBJl1Vec4FHKhXaPrqP4uEttkmJB5\n3IeM1JD8YdpK+bvbPKua+Ro/HVx8xklCHP8ACecUQ+PpkmcA4QjhieankQ+Zn2ZB8XRNhWC5\n7DdzWnD8Wo42ALducZ4r4ph+JTw42u3mHoAasL8SrqJdzyydfu7+DS9nF7hzM+0V+KsDyYW7\nLt6HitW1+J1v8rNIuemNwz/OviK3+KDzNiSQq/bPJFalv8SFhUb542kPTHBB9aXsoMrnZ9tQ\n/Ei2ZtpuQremanXx1Fux5okP94Gvi2P4qS252m4Qt3Yf41cj+LjTDmRmQckq2BR9XUh+0Psn\n/hOIWHMsalevzUjeMIWwVkD577q+Nf8AhbKrGHacx5bor9qnj+LRzuS5kPPd6Tw1upXtD6+b\nxdCGO6YAemeaavjeDczPORxxzXyMvxelPzyTlf8AbHNJ/wALYCr5n2jzZM4xS9gHtD6zk8cJ\njfvDgDHBFRf8J2G4DZ77epr5THxWkmU/vAqd9wqIfFJtjBZsHqGVsUexSDnZ9bJ46BwjyKB1\nAzg/jU3/AAmRxnzAG6AV8hf8LVkiUuZftO08jPNbdr8WY/LjMr8N0z2o9khc7PrC38XJcQ7t\n6krwRmpk8VRbGbcAOnWvmGw+Kwh3K0ixMeR6fWtGD4sJJws0c395lPAodIpTPotvFCKmQeno\netVZ/F43AqcP6E14E3xTimVtsnT+6eaiX4pW7D55VDemcml7MXMfQg8WKzZ6HvzxQ/iwnGOx\n6ZrwCP4lJG3zP8hGck9KavxOhZiTcKwPGCcGj2bDmPoP/hL4uctlgfu1LH4sjc8sQP0r53b4\njQcgTc+zdfxq1a/EaFlCea+P7vb86XsmXzH0G3iiLILD8jTW8SJyC/4ZrwaPx6jKWE5Vf9oi\nmv8AEBWZSkqsD1OaPZPqQ5Hux8SJn5TzTh4mQr97n0rwWbx8FwQzcnGVNOXx4Vj8zzVjTOCX\nNL2YuY94/wCEmyp5wPc0xfEytFgyBeexrwdfHyhsPc4PbHT86kXxyq8ibDe/IqvZj5j3n/hI\nY2+XzB9c1FJr27vhfc14UfHO5dxuMjPJB5pw8fESKYpTIMZ3Z4o9kHMe4J4ljHBY+1P/AOEg\n6liAK8Mb4jBWJadQc9uaZN8RON32hcN0yaXswcj3mPxJEuEDKQfej/hI4FZgZMj1zXg8fj75\ncmbcv93t+dWV8eooxI4ZyM/e60vZsnmPbl1xXbIcEfWk/t5EJbdxXiq/EKNtvluUJ4KiiTx0\nVyDOB7ZqfZsXMe1f8JBFIpw/IqP+24znEgB9K8ZHj5IlU+coycdaVfHCNlhMpVepDc1Xs2Pm\nPZI9YBHU4z60/wDtpSxUNg9K8YHjos+Y24I6E8/lQnjgKd3nYb0Y8mj2bDmPaV16OPKMfxpF\n19BwOR9a8a/4ThGALSk5/vHFKvjqJYy3nKQOmDS5GHMevya4ik/Ng+maBrisuC3bpXjg8fIY\nC5kJk3ccYoHjz92xeTae2DVcjDmPYf7c425H0BqP+2gGxu+Xvk14/wD8Jw7KpLhGx97tTY/H\nQf5Umjf13Gl7NhznsQ19fMBLqF9BTm1sSDIce3NeNP48SNyNyqR0AOaWPx7FIctchP51Xsx+\n0PYV1hFbLNhqa2uo2Mtn3ryKXx7btgmVn9xUf/Ca742CP3yORR7MOY9hOtp1zx0GDR/bCyDm\nQAV42PGk0S/8fEan3alTx2rctNuYe3FHswUj2CTWIkXCt+RqP+2I+o+97mvI38fIrEuVAxnA\nbmoz4+iYAxS7weuOv5UcjDmPXzrgfJ34Ao/txWXG/P0ryKTx7EVysvlqPXr+VMbx1kgq4IPf\nPNL2bJ5j17+2FZclsYpv9tqv8Rrx8eOQzEtNwOgJp/8AwmyxgM5Yo38WaFTDmPXl16Ikknn6\n0h10fwnA9c143N45TGFkwvX7tVD4/HIaUAf7JqvZlKaPZ/7fVX3FsrnHFI2sKpLeZ+FeJf8A\nCdjy9yzYGclc0h+ICeWT52HPbNHsxcx7e2vKqDJpza0iqPn614ivjqNUBa5y/ox6U1vH7vCM\nTAH1Rsmj2Ycx7c2spsBDc1APEEayZ3HFeJzfEKNGDSyOB03A/wA6gb4jIzZOUx91i3Wj2Qcx\n7xHrySKzFx9AaifXo143AE89a8Gk+IqSMAZwr9flbtUf/Cx42cmSckdsGj2Qc57x/wAJAqFv\nmyO2DUf/AAkHybgTzXhE3xD3KM3GAOmKik+Ijbjmb5McAnAp+xYudHvUniEqVJbn0JwKD4iQ\n8mTr/DmvBW+IicBp8HH3c5pj/ERNolFxwvb/AOtR7Jle0Pe5PEkCowLjHsaqT+JUjwwY4x61\n4W3xESSM5cbmOdoOKhl+Igjk2tJtXHHzU+Rk8x7x/wAJOHQFS3J79KVvE0e4Dzdp714BJ8So\n0kAF2yY6t1FRt8R05ZZsRN3JzVezJ5z35vFCNkK/HrTv+EqEar8wI65rwBviGnlriXaezM2M\n1Sf4lh5GJkXeBxtYkfWj2bFzn0G3i6KSRgZc45ODUT+KkZfkkGOvXmvnZviUVkIMykryccE/\nWmRfEcS/vF++xxu30ezDnPodvGEZYMGwo+8zVH/wlkW4kMG7gj0r50l+IgZXC3eSpwVbrVK4\n+J3yqqu+VPzdgRR7MXMfS3/CaoAQzKh7HdVWbxmFUv5m0fWvm5fiL525t+1Dz8x5qvcfEcLH\ns80t33Z4p+zFzM+iH8cJvwHLEDPPSqs3jyKGQHztwPOAa+cbj4mMsYKyEHONxNZ83xFfzP3h\nY46FTgVXs0HOfSd18QI9xBcBhxz61RuviAOQJAGA/hr5rn8e3CzFy2UK8jPU1Qfx86SFDcsv\nGTVezQczPplviCqx58zCgctms+X4iea7EPtQjANfN3/CcMHDKTInYFqVvFzyMHLuVPYcYp+z\nQuY+im+IQjwPOI29s9ahm+Ixk3fvNo785r55bxNJvx5rB+o560f8JVLhy0jEg8DPFHJ1Fdnu\nF948LR/LIg77VPNc/qHxGHzgNyB0Y4FeR3XiCSZfNVmR/Ws661OSZWweepY07ITZ6fN8RJ5G\nIRxD/D8xyPwrPPxAS3YiSTeR3NeZPcPI5eRj179KQGSXKOcHqM0aBY7+bx5LJHsR8hjkJ6iq\nTePDI4Rd6jPrxXGMwC4c5561WkuGWZfQHJ5qZBsdxN4qlklKmRmz39qpyeIpF+WGRgM4GTXM\nyOY2DhyCeDQvzYDOCy/MBTGbUmvXMjMFIYg4LNmq82qTLkHAY91rMhuAzMxfj096PMTguQGP\nQ0hFqTUJVXaxIXr1p0l0ZYVkHyr09zVGS4T5VfkY5aoRcKqupPy9aq4Fi6aIKNwJfORVTHk5\nIJyxzinNMs0ifMAUHeoGuHkkkZ8KoHB9KoBJsI2/h1AyFFQLKHZ3YbW28L2pjS7enMeMljVV\nmDRibJ2ZwBSuOxbkZWh2k7T1OPShpVhUKPuYqDzdzR7vlRRytNZwsTlWypbAWgCVmATPJPUc\n01bo4wRlT+lNjddwLDBAqPzgY24IGeM0wsTt5fnKQR07UjTBo8H5snAFV/NO4cfe4zUfK4B+\nYA8kUBYsLP2K/Nnbj0qT7QVbLKQ68CqvmA9zjqMCldy7CXGEXrk9aQFg43bif3jdfQUx9kW9\nsEnsQetRNIytu3AZ5pThkGG+Ynk0gaJvMLKrLwSORTVbzIsDOM4xTFbZISp3Ec5pGkDbiOGb\n0pDJvMHftwAaI2MhAHBFQbzvRyPu96f5zbmdVzu6sKmwrEzed5m9SG46VFIwZSgJA6575oPU\nZzsA7HvSBdzKerHrVDHLIPl559DRJjcQcqSf4elNTh1Bj3HnOKesz+TGQMrnkHqKLCFXay+W\nyZ5zn1pDIfMHyYTP40770RXGeThhSruhw4wygYzQIdDgyNg4Qc808BmVx1cDhqriQRKzMS5P\npUmWDRyZyNuGApgIN3l7ivDdTT1IZcZ4HGaYzncQigcYG49KNzMp4G5eCo700A8sAhGOadCs\naRAqd5x3PemfKzAr6U5EO3ywmOd1AEiyFvlc7SR6fpTtvyFCNnpzUbTKZNh4TGc470m0H5mf\n6UxE8f8ArAoYB8elSHbI5zkn/Z7VGi/NvBycc0E5yQdrUwJFXcwUcAc1NtWSQL/EPmI9RUTS\nKdpU9OoHerELIyO4ALYxg8UxMaUUMw3bQOoFSRt5YVXDc9/akVTIkbBVbAztHXj3p43t95sZ\nOQPT2oEP3L8ipwvWnrINw2v94Zye1RRlo23cE/dHt70KvlhBgPJn5jTAsIzfKvoclqkVt0jO\nwy2fu0yHMz7AuHJ4PtTvL2ljnABxj39aCJCiX93ymGBwc9celTRttBAYYYZx/SoFcbSSfl6U\nqruyB97HBqhWLG5fL3bcKOlPjU+TvLDBPFQQydUfJAB/OlhKx/Jn73T60CLMmBgAjnjIoVij\nYx81RSEMwi24K9c1O0nm7E+7x976VQEnEe7GTt5JHSrCtnIxtGM5qhbyMwbHBY9as4eSSTLf\nux1NMllrzCNhDYIH3qsfKxGTnHPvWeMFW74PerlvMWwSO2OaokthS23cAe4FSpIq5J42nr2q\ntGzLICFz71ZXG50IyhHB96QF23UrtbhQ38VdfpduTb53cY/OuXso2eOPjOOMV2VmrLAisvIH\nUGoZSJAodcMcN2FG4cEofSiVtrfL96lQbictk4zWVyhdzbvkG33pRjHIqJWLry3Q1M3bAzTA\nhDFsgcCng7sAjGBT9p6YHHNIy7cMTg+lAhGUqo7j+dD4wCD8o60rNty2M59Kd90cjauOtADf\nNO5QFJGOtOTaqtkZ3HgU1cqCGbPuKVvv5AONvWgZJj94Pl5XvTWyWwSDzS7nMfAwc5pE5ByM\nHvTRLDbuPB5HX0pwVUYpjk85pq7VU5pxb5d5G0HimgETdIxyML0FIyrICB8retKqn5sc+lOM\njNhMAetMGIUCDdkfhSMhfDhsgU9QN/qgOCKIl3eYSuFX7uKBCfw4X72c4NKjDec9aSVj8hxj\nP8VO2FsFRz05oESLwQQdpPFNBIlI6nvSbTuIB+ZabIxG1w+D60xkkcm/qMD0pyYj4HUnrRuy\nw3enWjjqSMVQCfOWbPGOPajnaGzgetO3mTjovtTGCSfKuQMUC1FWHapLMZO/Han7lVjj+IdC\nKanyLnBA6VIeCpPWkMbHzkkZAFIvIwDw3WlZH556+lLHjb83GPWgA3n5s84NLGQw4GzNMjYt\nlgPlzjBpy72YDbg9jQARxnbukTaetN4IJHrzUok8z5FXIUYP1pvmbWIxjNACLJuJHOPWiJRK\nxUMeOacGZOo3CkbKtlOD3oGDSc4ANLuQ43DnrxSc7ck59TRywbGPagQwBuQDznIFPG1VbBOS\nOfrTWOACp57+9G393zlQT0oGOMe6EE9fakGGOD8y07ldu0cE4pWjyjKrbeeRSKBXTH3SGpVw\nGHAYY6UxpSrBM5oiUMzY4U8mmKwobeOTnsVo+7yCcf3aSMK6kj5eaYrfMTkHsKBDo2O3DHOa\nezEjaTj3qKOEkYbI5p8kPbdn+VAh65XgkEnoaVvvfK+c+3SmNGVXqM4xTlkJjG4DIHFBYoba\nMHoaRV+VnblqT5fKCty3UUuRwVXPsaZLBvmwx4GOlJ5hwACc5zTuGIYD5elOVP50hCN+7b+9\nuHQdqUt9zZyO9NVQu49T609W3RlV65zj0oK6CPnlx0zTuCeBj1oC7j/sjnn1pik8FvlyaYrj\niM8hfl9aRV2qQGzmmbsSnHK9xThlo9uKfQRKmF4xkkd6VZGXjbyBgGo4ldlOe3vUgUhsHJOM\n0gFZgAMjLUgyFZSPc56U5WbdkjOentSLu3OXYYPFUA98ZBVs7eOtNVicrjrzS7di4BXrk8Uk\nYaRiVPC0ASfMi9Pm6Zp1uH2vuX5ad80ijOAR+tSKzKoQnJ60AMWRVU7zk4xtH86SThgw5psa\nMsjsWBHT3qWNtyEnGOlUAxWJYsRx2oDlkDBOM9afJ8sZyeKSNQ0ICn8KTAfG25dwAx3PenCM\nNkMPvdGNM2lc7F61IcSYWUFT2pCYKw3FHXK4xxSBQkmRkgDin/6wccEcURqUY/KStUIdtV2D\nNwcdc0jx+Y3yf99dqc0Y25I+lOx5yhV/d0wFkPygEc/3qc0aMgIbj0qJmYMAenTb60SZVtq9\nOuKBCxyfMBjipd21SUJX2pqYK5AwTSNnzAfwxTGSKS+P60P8z8/M1Ki5LDNRwZRWbO4g/jSA\nl8zKnAyR2pY5DG2WTg/pTEYfTJzmiRkz8ufehAO+6Wyflp0P7vEwIPamLJhcEb88inqf3Pyj\nb6rVCFOPXvnIp8bBVYsmaaqqFABHWpfM2yOuNykflQKw1SqqrlTnPGKfNAV+fPLHpUUalsA5\n68VakBjwxOaZVhseOO/OMe9BYBWUDac5OaV8qyKBk9Q1PjUs2W71IASAqk8j+9UpVZZFTv1y\ntPWMMwyRs9KlgU4IC4NSyyTyfMGQcY7etJ5e0ttHXrmp4Y9qHoD39aVYAxHd+ppDsVfILzB/\nu7RU8cYkU5OKtRx9Rt4xTlt0UjGFXrzRcrlKwUKoQ8k96iZY0yTuLVosoYYxnvuFVp4ywUe/\nNVHUTTKzMVjy3Q+lLGfkA/i6/hSyM27aBn2pFIkUbhxnqK3RhJDo3LfKePQUSLtKsW+XPNSM\nxZVAXao6UxVGSeBWhlccsgaUgcjHXtSLHuBZh9DTWcMpQDBPNItw+3aOEHarETpIWY8duaYG\nMa5ycdxUayZypbDU9WY4PGOmPWnYYoYeW2Oc1L5abFGcgDcarrITuyMHpS7v3iqDxTDoSH94\nrHPy9MDrUSq2O7L+VSFRGrYyKcSVVVDDGM0iCs7GTJPHbNVtrHIB3DNWZlfaB90ntVY+aq7V\n+U55FMQCddzqBywxTslQE/j9Kb0ZXKbT3qQ7ZJAW4bGM0AL5Lf3h+VFN8v8A3qKBHyFC0bNl\ngSNuRxRaYyxAK9TzTt0m0AYQjsPSlKjeSvJHORXzfU9Ww3JZQuNw6iiONlbLdT3FKpCyeYee\nKIhuZgxwuM8npTASYeSu4qG54B4oLRnbvLFs7iMYxQsZVCwbdnoSc1Ii5wxHzY5FPoFxskZZ\nlMXR+eTRuPlhcZwetIqhpBgjCikGVjXcfvtgYGaEIey7uGQD1OaZHGFIDjDHjBpdv7jLMdjH\nAyOaXyVaOM+YTIq/dNIAKpJIGwAV4pVQ4Lrx9aQqsjx87Gz8wqRtrTKofAVunY07gV93ytk4\n9u9EUxOVKEHPX1qxMpWY7kyCe3ao5Gfz+RsAGAfSi7AI98RfzDuDdsZqNFI3Y5P161Mm5ZCd\n2I8fd7imqxeI5wATgcUgEbDRgodrD+EUrKJmA3/NjtSeUfMVB8rd27U7c3mN0C428fzoGEbF\nZMZ35+Ux9z70NGAzqoK459qRdojB6t93cKI4yMgvtP8AdJ60xJChQ0eAN2elNkjZYMA8DqM0\nKojOGHI6Y6U6MeYrFfmOOaQCrO+QwC+UFxyKbu+YjqO1Eu4Kh7DjbilfAQM5yM4GBTAMxqpC\nszvjjAxmkYtHCnH1A61IN4jy33exxSGNSTI3zn60gFZfLyVB6dBTBN+8UkHb1NOJ8s5XK7uo\nPYUxdyqQGzzkfSmAvnLtMmMjdjB60NNvkxtKKB0pGwQC3HOR6ZpZJFeU7zvOO1IBYy8jebG3\nBXGO1KGPkhT8rq3GKWFiI1SNMBDg0NIiSN5hJB7CmLqOWMNIQxzxx9ab8+OJCR0Io3K+Aq7T\n/DuOBQspbJIVAODjv70iixYobqQIpPy/hxXoOg23+hGRV2+nvXnWmzfvtuMDOevWvTPDkizW\nR8wkspxgVlM6KZHfea0atsVwPU9KybqE7SWwsjHt2rotSjHl4TjvkCudvoZVwzNlc5z3qDV9\nzIu1k8ndnJHBFZ9zakSA7sDGSK07y43RuOmKynkG4EknjkGqROpTaMSbstnPIXpVOZt0fyqS\n2eAavyKrN8yfL1xVeZSck9CMY6U7DM9oQxGGyzfNgdhRtRmcFsxkfrUyxEjKMoHT6UyRVXfG\nRnBxxVAVZUKsMFipXG3PeomG9RklscHmrJQxZJOSRjHeo1UcbflOcc0xEDRKzlScrjIz2pjZ\n8sDJbHpUiqVLg9yRTQBEjoByOvNAakLLiIl2z6gUmCiqxPygfd/pTz8yBNu3vTRypjPPfn+d\nAhDvKbXbJ6/QelNIETZ27k7Uq+Vu+8ZO/oKdH8ykDOM96eoyFuw4Dct9KhWYtwAVDdTVlkKs\ncAn1NJJIUz8nHTFMBqLIpOMHj7xqNd0XVg644QUgjLSYOcfWnlY/N2+3GKAGiMM/mBsLjlDT\nGjZXA6qOfwp+NzbSenT1pVbCnP0pAJwGwwAJGQahUGNdpbYOuDzipG+ZVB/I01WSQkkfNRqM\nbIh3x4O1T/GKfJxNvZtpUcCh9qqvOCO1O2rJnjg80ARiRmUMWy2e1OkbyXyCdzelNVlmVl+7\n68YoaP8A1bBuV9aYDvtWwlSmHPRqj2MY/mfAzyakKhpASQzZyDTHJYOM8E9KAEBUl2iIJHSk\n8wtGH2cqeR6H1p/3ugwqjkCm7m+zlPvHPFAgkJ3hAQO5J705lACquA3XA7U1kWZgWG3ApqgZ\nJU5PIJoGNOQyjduyeWp23ajAdM8Uuzb91cACkaLe+Mlj1FMBjKynPRacrFTwqhD1zTUyysCc\ndsHmpPnUBjjgY+tIRHuEi7QMc9aGULGWVNvODS/eZgVCvjqKbkR/uznb1x70BcXarH+6wH50\nbV3FgcDGNvvS7U3LkEHrS+W0bksAY24zUiGHdhSMgdDS5HTHljpuppkZIwhz96nthSAGBBNO\nwCeWFkR9+F6f4U1gxEjZG3p7Zp21lZi/QdGNRKAATKdjt0Hb60CFViqqFYMQOR2/OlYsq4Kn\nJ+bPYj60ih3XadoPTA6kUqkodgJ9h2oGKrHaoTCITnmmsoHzE5H97FEagsQnzsOdppnyxr8z\n5Oev9KCdSVd3mHIBLcgDmkdhuIYdOcentTV/d9PunnikA+8q8Hqc96YCtJ8wfbjcOPrTAvmM\nd5+VuSPQ07zGZct84/u+lG7/AGDjr8tAxFj8wqoPPcmm7ip5OecVIqruJP3COGo3ZwuAadxC\nsoB2j5iBnFRyN5qrtHfsacrfvM4IoQ/6QcL1FMQkg8xfl+U9CaMFdm05PTFLtZsr94daT+6N\npUk8ikV0HeYVY8+xxTowI8SAY7detNaMKwTkZGcf0qIRs2cKeOnpSJNa31RFhKyKQQetUjMb\niR3ySP4VzUTJIqjeQVI6DrTVDKg44znNUMlKqsQ2ggE8tTo5GOVPzj0pscjMpNMjZ/m3ffJ4\nNIZMJPMyMYO7rR5pEzEN2xTAu0gHse1MdtvTueQKLAWluGjVmIBPrSR3GY9vXJyarZ38Abn/\nALuaduJXGzawPFBOxceRV4zx2FIt24kUoMr0xVLzG+63c5JpyyMSwB5HSkWaEM5XdI3JzjFJ\nJOu/5lye3tVIzKsYY5UscGnSPH0zg9frS6kMsJdSRr5W3cSfvU9bjc3zncy9QOlUZJSw3KCF\nFPWY8H8x60wNJpt0bY5Y88UjTL5YO7PHX1qgsgZiB97qMdqkaQ+YWxuGOtAF+O6OwxgZ4z9K\njF1tAyc9zVRJC24hiOOajMgkjOe9AGpHcEupBJB5BpDcIsgJJJB6Vl+Y6qMNkjjPtStdAAkd\nWGKANVroLIV25B+YelTR6gEGSfnHX2rFgvC3lhxgY/OneduLkrj0qkBqfb1fkBlJOeakkvcK\nr4JGccVl/ajx8w6YxikjlYyDnI7LQgNV7gSMG+6M8A9atx3RXDscgjFYQl65OCOxqeG6AU5U\nnjuakDW888MrY7fWl87cwwcNnrWWLkLGA7/Iew6ipFuP3eN20L3pFGqZBtx1HepBtbCKduOm\nax49QO7g7geAakW+dmwFHHFGoGo0hdQXOecYpzTbGBZugxWZHdBm2l9wxzTFug/Gfk7UAarT\n7tqDK55yDUdxKJH28jHpVF5vlDLx7mmfbPMyGO04+9RqM1VnLRZBwvTFMkmDKqngg/eqlDfZ\nGPL+7/F2+tRPfs7cYkQ9+lBJprIZIiQehxzR5nl/wgL2Has5pjD053Dv2pWul8tc8nOMUCNC\nOZlXJyFz2qVJhu25+U8+lZbTMv3Wwc/dpzXQjjUZ3O3WgDTkuERQwOCOvNTMyPGrB+GHANYs\nN0GyrcID360jXPlkODkdhTKNFpCoc9WXgrT2ZpY1OcY5qm12MK74Q9SfWo/titGT0JbpmkUa\nUlyFIdRzjGailvmm2k8haz/PYpwfl6t7CmpcDBB5GMqaq4jQ+0PExIbBI4JpTfzIu6RweMDF\nUjKrRqCSabLNuVkH3jSGaH9rS+SqAbjnOCetNGqSzSFWHl+uDWWJTJGCp+7wfWnI+7lm2r+R\np3EzQj1GRZSzPkDtViPVrlyqk5X1rGPyq4AIOM5qSGUqpUMOmDmi5Jr3GuXAjEMUmFXjNNj1\nN1XPmHIOTjvWRHIvKg5IGTT942gg9ecUFG1/as7Tb24AGMZ60w6rOGwzAq3vzWZ525lfHtim\nK25mymeeOaLsRsLrVwkxEbYyeTVmLWp4FYiY49DWLbyI8RZvXg9KN29zk8elIZ0v/CTT7UBm\nZeODn9KI/Fd7CQEfYWOCCetcysi8KwxzgGnySHaQSCwPX2ouTY6mTxnceYEB6cHnioX8YzXE\njKrlNvPymuV3/MCnTPPvT9u1i33T3FAzr4fG9wwDtluMck/nTm8U3SjO4ZPJKnPFcgjYVtp7\nVMZvJCfMTngjNMZ1g8ZTbQA2OeCeasw+NLyEBd2BnO41xO/ayvkEHqtNabbJy24+9LcD0Fvi\nBPNzGy/Lz8x60f8ACw5G64EgP/LM4rz2WXzcgDAp0MmFYZAJoA9LX4izv8ofb6Zamt4+cs5S\ndt5HKMMivM1mMfzBunXNMW4ljyd24N70tgPTG+ITxoDM4dem0dak/wCFhTRqAJ2VTyDnH4V5\nS10AoGSMdTQdQZVAVsqT1PNMnY9ZX4iMqtuZ/NIwRnIPvmmH4jSKvlhyEx/Ca8p+0OCWLEE8\ndf6U37RL9nJLbQDwaYHq9r8RGk3qTlcYAPGKiHxE2xqqHJH3jnNeYteumzOTkcGomuCu0x9c\n4ai4z1yHx/IPmd2LL/dPH5VLH8SHjRnkKuc5+Y449q8jjv5ldvnwoBGPWoftTycs5H8jTuSe\nwt8SHZmAJjXqGV6avxKuI1LAoQeAzNzXjq6iw+UE+5Jpn2oqwyxPOQAaZJ7JD8RzlxJLuIGc\nEdDR/wALKto9oZ2Zj94KcCvHJL6WSQZJQ+5zQ15NuJQYHU+lAHtK/Ep442jRznOcZ7fWpF+I\nnmQDzJmXnOz/AOvXiLalJw275z+WKlmvpQo3sSeopjPaF+JTbXVRlOwZifxzTP8AhZDGFfn8\ntxxuLcn8K8Xk1RvlHmts6hQcc0fbHbLySl80mB7U3xEnkZXM7BB2zimL8Rt0nErM2c9eleMJ\ndSFSfNY+2eab/aEkTgmRlbpxUDse0SfEhpJDtlYnP8Lc/lTpviI80flq4PuOGrxD7UysZ13J\nIp5PqKsx3peAuXO9ueDyPaqEexf8LAMahy7Fuh+bFOf4jFpNqM4JHO7pXjC3jxwtuYsrfw96\nb9sm8k7ZCAfU0D5T2hPiFJbxh2YYzjBOf0pW+I0m4M1yq+kYHNeMLeS8fvNxo/tDcdwc7gfu\n+9MFE9jf4iSDdI7qecelRyfEqX5yjiQKdrbSQK8dmvrjzfk3B34PpT/7QMORI+4Hgn3oFqer\nr8RJFwC4jwchc5zUrfEWZZG8xgV9Rxj2rxz+0pWLf8tFxgY7Go47i4kbIlYgclc96dxHsq/E\naSRMxnagPJzzUf8Aws2REd4ZCO3zc15FJeTb9hfEZ6qKYLho4yEYjJ4Boug1PYf+FnPNlS7G\nTqOMA05fiE/lbWlJUnLbX5/L1rxsXZWHc0pkCfKQD1PrT2unZUYHapOd1SB61N8SJo5iiyyC\nLGQf8aqXHj+duUk3dyG4rzFb2TzCwZtp42nvS/aXZjl8DFMdj0f/AIWBOi/6zduOSq56U5vi\nI6ysQ2F2/eB5+mK8xW4eMY8zjOeTSNcTSZZz9FoA9Lb4hyfKFjOwn7zMc/lSt49fnzJ2jVf4\nVNeXPdSwqJCx+lEcrMzEklmGSM9Kegz0yb4iSN+7iIZcfeduTVS4+IFyyhWlxIONw6AV59Hh\nOckv35pzTeY2eQcYPOaQ9Ttv+E6uYc42uP726nN45kaMAsCO+OSK4bzNigqNw70k9wGVSvyn\nPFMR26+NJFb/AFu4MOnTFQyeNrppMGYrxgc5FcQ7PyqcnFLb/wCrLZ3H1NK9ibHbN42nUqpf\n5v7/AHqNfGd1C2A7MWPBPauOWTdj5trkcU5pGbAY4ZRk+tHMOzOwj8YXF1KTJNtKnGOmabce\nLJVcgSFTnhgc/hXIjZ5i8feojkXzmA+Xb1JpXGzp28VTR5kWSR5WOCxHA/Col8UTmYkzM6en\nQA1gNMJGB3Ek8D0qLdtUlOQDyKd2I3m8VzZw8j7yeNuSKY3iS6m3ZZgMYIBwTWC8yyQEM3Ge\ndtRsx3ggkIMDP8qLsR0S+KLlHBQMo6fMckUw6/LcZ3b1jLfMc45FYDM67yQSw5DCnLIvy7jk\nkZJz3pgdAmuP5uRKe4VjUC+IHVyWLO4P3u1Ykkm3Ixk0xN6ghmAB5FIDak1WW6bG9gepOf0p\njak8LBXdgvXr2rKS6XJycA8c0slyFPkuMtjO4/yp2A1ZNW87y1HCkZqP7dcMWLSZXptrOWXy\nwiqvy+lElw6lmwM+1IRpy3zPDsXlwPWqqYUlnkBJ4K1W8zcM7vc0vnQtICI8RgcH3poZbWSM\n+uB3q3HcbWySSMc1lNNuyqjjrTo7jdITg/d7e1IDWa853ZUj3ohfdMihwMDNZ63G51J6YxtI\npJmO4FR0POKANCWcSbiCuFPK0xnEbHd3H3RzVWOQbC25EbruNQvdMY9gHzdfpSJZZDbmOfmX\nqKcsxZ8nqeKqQtu+bcQx+XA6U1rgfOc528e9MsutcDb5Y6A5NVWZY2LDufvVXM22PGMZ5JqO\nSV2jGwgAUAWEmOWjaQuPfvRHcgkAKVI75qid28jqfXvTWk25cFs9D9aYzQ3KobY2Wz0prTFi\nvzZx1HpVOSQhEYHJ9qhMjbS+cnPIo8hMvyT7gpwcnjiovOCoV7Ak8HvVZZiyqRk4/hpjMe4C\n+1NIkvNIvmIyjcSOfamPI7jdIcBjiqy3HzBANmf4qaQ0zSYfKoakdhzSELjZn0qMF9yDA2sc\n07jzgw4GNvWoj32nIByRVBYcznazM3zHqPSkRQ+1c/PjpTdxdTgYNCseG2ZI6GgLA5ZnU4Bw\nKezeYhDHacdKj464xx1omyjowGWIxikHoDKQV2tuPUL6UqsQpB+XPUGgqVUtgiTtRJ82C55x\nzS1GNj2xyZzlB29aeTtYHqOu3t9KRi27DKChHC9/ahfl2rnPqKYD8NuLlAx7IKXc6sMrtc9v\nSmj5cDlM0vP8Z4NMVhUZhIrKu4Dhj60kX8W/5d2SKTjcpzjn86UrukxJ8g7VIDFyy49Dipzx\ntVCAp7GomwrFc80oBmKrn5T1zQUO2hWKlu9DFXjYk4K9qNvy8dFOaVjnHG5W5oELE0i4Zf7v\nSmEsFB2/Jn86RjtU9VHOMU9jtUDkjrTC3Ueyjy8RvtOfu0MpVACM5OMU3azTK3AUjNPDYwVO\nV3Y57UMTH7duVTg/xUzyVZeXKtninqMbtrbmU5Jpy4DFmXcM9u1SS2JtYSDcQ3pxS+Q0bMxI\nDZztFLGgZSXBwpyKNpPzH5gT1pjEX7xCAbjzj0pVkbdlvv8ATFSfwAAE854pNwXd8nHeqGKv\nzAgkKM0nLwOCoJ5pdm4DbjsctUuNrHj5cZ+vtQIjWMoEOONtKse1gzc+tOKFo8Kcev8AhT2b\n7hRQzYxj1qgGqQPujqewqeJ2TKhc+tRKzRkrjeD/ABelPU7lJBIw2M0gJy21V2jr6fyqVU+X\nJPA5xUPn5jUAfdbt607/AFkhXOF6n1qiSXg9Ru3Cljh2MjMdvbFCoxQ7cHj1p64aPLKSQOva\nq6EBwrvIeMDAqSHYmWzvDDBzTFk39VXaKcWG09gT29aBDVUKSByg6ZqVflXcDvqNo2WQhueO\nlOePayjOVx90etAx8avtJB75JqVSQ3IVh6VDGPMZ8/dxgtTo23cqOFoAly3zY4JH40ke+RSf\nu7Rk0wruQsTjnIxSFirBlftzTFYmRkyrISARxVvcCNu7BY81nnB5+8D0xVlZGIGVwR2qiC75\ng24A+XP3qk8wjqNv0qpEzMxCjYP9qrLMVdQRgj06UxFxWbbuDEt0GK0IRuwc5I6is6GRuu3c\nx4q9CSr7m4IHIp9BG7pSG4uAAvcd66tYfLTAPHrmuT0ONmxICRk11ipt24bKkZ5rORaJVjVl\n44JpEXaW7CjcOox9acpMmADt9axKAxqVEmfqtP3q3btxioVdlY7RuFO8z+6u096YiXsWC4OK\njbLYO3nHNOOdvynJprLuwF59T6VRLHDeGBPIPQUrZbKP0zUao6/MWyoPFS7WkbPGcUAMkXy3\nABxTiS3zdsVGuJ4S5GSDgU/IXkdf7tAC7iWQ9vrQyndycc9Ka0ILfKpwed2amb5R8oyT1NMQ\nzYsmfmx6UfN8pLZHc0ob5sUseVUlhxnmgaBcM5Un3zSLHu3MCQe1Iu1s9u9TfKvA5JGRRqMa\nsZVfc9aRlOcZxT23qm7qfSmyfMNxPXt3piBs7VQjI9KesWSC7EH0FNkYJgleRQzlskkg07CE\n4+bhs5p4ZljAKZ9qZHu3HnrTk+WQ7noAVuF5FIM8ZHy05cyDbu5601sn5e2adwJPlBIQZA70\njYYggbRiljUJIQvOB0pMBlG7Kg9KbAFBkyrHGelLho1xt3Yowp+UHPFKzBiOSF6HPWkFxnmE\nKDjPt6U7bldzDCH1prR4UAAkZqdV3KV6imBGsYkUL2zwalb522520znzF5yvpT9xkc4G3bzk\n0MBuMSFVGwdT9aaSq4H3s9cUu4t99gT1pB3C/WlqMf5gUDqRnkUgVW3IOB1FNlYyFSG4pThc\nE8n1oCwinzPl3Y9qMhyQGxikbG4t09BTlZUkyOvUgUBYdGyqcKM45570rbmxkgDrimKHGec5\n5FAHGT+JpBYVmXYAPvA5FG5mVhjn1pypj/aHc01lTaWXPWmMRlMiZAwcYJp33QoBzxSKvzYB\nwMUiKGXJbFMBxUYOw/L3pjYVAdmefypVY4I2/iKQ5QZJ78UEkzt8ig8js1Mkbc4XquOGoVss\n20cYphhZo8k/N1AoCw4oOMnDUjZkBydtOZRwdvOORSrt256tQUJxIvTJAoG6QqD8v0ojYO7c\nY4709Y0aPO/FAiNl+TGeQeKnLFtpU0zytsZAJY0scZjXI5P6UCE3HdycAUoJxnbj1NK6hlAP\nBJyKCRtZiDtU4yKABS8kTEfMF7d6dJ/CccqOaWKM7i6jGetJtHzFhxTEIMdQM0R7+VpYx0HH\ntT/LdlaQuFH3RQMTyQoO484yKmXAhHzc+9QRqpYI5ycfep0aBlbccbf4qBDtyMcbunWm46gD\nvkZoZvMRQoGPXHXFPVk++Rhum2jUBu/rk4XvSq2cFW+9wOKQxddre9LtE2GUdPSnqBKrNtwy\n5NOVlZVUn585xUatuVjuyT2p2D8qsMHOaYyX5Rkng56VGQVODjbT8GTcp49KTAGSRkAU9QFW\nNljORlu2aU71UDZn/aHamLdH52YDH3QtSxsIlzg+ZjOM0CBgJjj5gMZ4p247UDZIPA9qIY84\nLnlj2p3CSFmGR2piBVZfm98Gn72kmyB8uKjZg3DNhc1NJhSNg5AyVFCEx8MvmMC3AXoKbI3J\nYEY9qbJGSqv9wGiIA78kYWgfQf8AeCg/f60se0M2eBjikZTIobpxxSMxZlVlzigQ9V+UA8c9\nadjdITkfL2pki7mBBwo9KbIMzAjgelMELJcCPauMsx7elSSnnCjBPcVApZZMbS/P5VKyyRvl\nV3erelAgVNpVSfl6/jUm5N4bb8vQ01lK4Y/N7etOZjtxjYWHQ0FWCNvvFhtTtRDmRfk4x19q\nRQ/lqjHIzUseY1YgjB4NAhi452EHmpQx8vceucUxVVVxtIk/pTlzJgkYC0wB2kjxj5hmpVKr\nJ84LU1VCyDnPGTSvI3DZ2e/qKRSHhVVSA2cnr6VNCm5iwbAHBqOOIKozyW5FXoY1KqGBHP50\nmwW5PFahY1zz3q2qErlBzT4o1KMenpVlYyipxjdWdzXlIksw6gnqT1py2ZZmXdt9DVpIiccj\nbmpVjCqxIyM8UrlqJA0AhjUZyTgUGEMPu4xU0i4wc7j6UgUKuWOeeaXUdiL+EkLgCqd1gjI4\n79avTHqA2B7Vn30QVc/eHtWy3IkUsBmJ3YxzUXKkgD5WGaa+FkVBxnrS+Y0ROPnXpmt0c8iT\n5iR85AxxUmAyYb72ajjxJt5wBzUgRfO3hsA9q0RiDbACQCT61FAu5SXO3FShiuUbg9qiZccF\ns5qxD425bKb+O1IY/lUbuRzSRsY1Pv0o3/vdp9O1UBKqgZHfrSgpkfwsBzSeYOGHJ6UrKQrO\nxx2AoEEMhkkIOCOgpm5SxyeQcUsa7e3P96lYmIFsBlPVhQSI6E/OXDEdPSmuyhsOcSMKVpF8\nkIB3zSDO1lxz2NAirNbllCgsoU0/c25UODxSyyGMrhwePmzUMbbmDtxz1pgW/Ml9BRTdy/7V\nFO6A+QRudQSQpU9M8mmLvkyF/djOSxoVQpJBBZex6mpJHfap2beO/avmj1RiHb05zxk09rcC\nQDG4jnNOGGjbdjdjr2pPmaOPccnH8PFMVhrK5XdlVOfuU7aPMLbznGOlRf8ALYseCvWpeGQr\njBPO4HpQMYqheOnPX2pV27sB8leRjtTg23GDv/CmOu9mJ+RT1OOntQT1EVWmkJD8gcA9qkQn\nzFU4AUZPvTFxuKorKwOB70qxtlQx56kEUDsPXAViwyzE49qbDGJMrvw/vT3Mm0BU3c9RUclu\n8bM3VvvcGl1DqPUybNgzx6etR/OcM64OcZFS7QoSQZV+ppq5+9uySaYxjKfMPl/Oe/pSnKrz\n+nIzSlVkmJRtj46+9EO2STaRg45PvVJXJ6j1TdITjLYyD2zTFjeJmyBvIyaVWPlg7gADyO9I\nswaTaATkfe9KkYKyxlUYdsihdrTFmBx33U6AExjHzMG6kdqDvuFIVVIVvXmgQ1YSWOTheopy\n/Ko8peeu6kZjyCNrKcc012Cg5/U0XAejN8xMgzjlT2pqkeUA6nbnIOKcRiMKQCTzmky9x8in\nGBgCgAGArYYkZztY0jRhsBHxnnHpT2Ro2QuFBXrTY2ZtzHCjPFADQv77PO4DBB5Bp/mCRgij\naw65Hak85Y+xd242j/GnlmkkXdHhB1xQAwksxjJ3DHCmnfIVC7drY+tOZk+Yqvy9M1Hn5uTk\nLyaAe5IV2x7fMCNjmmqrx7UH7zP/AC0xSShZZFycbuhxTjlVlVs/LwBnigQk1tsYNJ8w9fWl\nBTyyvl9elJIJGVSXAC8bDRveLPJkOMnFBQ6FU3DA2tXpPg1kkt2RgQyjOT+FebLMsm0YJ3c9\nMGvRPAzeZH+84B+RR+XWs6mxvA6O6jVbdoyQBz25rlb23dVIJO0HOK7C8XMOWGTniuZ1RApZ\n2HbHFYXN2cpdttMrqA+PTtWcWVmOImA7Y/nW9dIfLbK8EY4H51iShmw0fT3qiSo67ZFwOfWo\nZMeY7HhqnkjONyHHPf1qLyg8bNLlSp5FaCRSlYLIoAxu9B0NMO7ezjkk4I9PepZtu1So+XOQ\nf61D/wAe8jZzlhndVARyMkcwymSf4vWoJBuMjY2jsuanVXZzuIbbyKjKmRf3gG7Oc+1AyGT5\nsJjccDPvTJFVo2UEBsZAqRmXzAgGQRnimrt2kMgx+tAiLJYLjg4xikO9lZNuTRwybA2FBzxT\nY+JPmYmOqQajZoRIPvZwAAelJL8qrtYY4Bx2p0OVaQH95uHGajGwcqcYPKn0pjFkkOQg+YZz\nuHf2qOTK5DEHNG8FSeeTkN7UmEZvuMRjvUkiyK8e7yxk9CSaj53BCoJ4+7TvlVmLAt7e9Kcx\n5wmxgcGncBuweYzKDjpjNIx2oORgnqTih9u7DHcM5NLJ82H2q3cAigLiLtYhdpLdc9vzpYY2\nZ2zHtGeopGBdBgdu1OKh2+UlFHGM9aYxoUIzgnPPGaau7yzgYIOTmnPhtuB909z1prSBsseN\nxxQFxVJf5dgHGc9qb8qkbjtbPFL/AKtNrKSAc7gf0pWVZP4cnHB9KQEe3LPwPds0CVVXBPzD\nikZdq/L97oaTywykbenOB1oGLlkY4XJYYFDK52qvy4HNB3EjAyKbtbcTjcD1oAGjyuCckHO4\nUrfNFtC7V74pUbah74/h9qZ/BkNhTTAWQJsDK59APSg7sq4zuHBHrSM27hfu+tPfLLleQvp6\n0ANbCvkcr1J9/SmKPMyM470r5+YfxE07y/M6HHrigQ11DqFLcjnNR5PVlye1SKo3YJyfXtTM\nrlt5IPZqQCwrtY7jke/amo53MNvA5GelPKiROGBOcUxd0vuqnn8KYMdukf8Ah3Z6t6UjMoZe\ncL0pyuIsnHyNTH4YcbeeDQAbioIB6c0jN86o3zFhuHFBj/eEMfnbrzQjFhlOq9/YUCFKovzH\nqe/9KLjMREkZBjxgj0NOjxJ8xG1epzTGjG3O75S3QUhMYMK+A3JGdwpQw24Pz56mnjAVscjp\nTAjGP5RxTAMZyB6ZoDLtG84PtSFT5X3sc80NtGGC7jjFAMVWaMsPLBB/iz+tIV2J1yM035t2\nWHHepAqBs7uMZC0ANkcNHgjCnoTRxswu0H19qBkqRj5evNDbsDOFPX6VYAWD4UHHofWlX5s8\n7X7CjeVxlNx/vU2RRtODhj1qWArNtXH8YGeKb88mPn3HGTSllWNcHbnv6U2bPmAxrnK4pCY7\nLSKD0A4FI25WyCc+g/nS8zKBu+UD6U3a3lhQercn2pgPaQqvB7dajaRWUbSeeM96eVCuxHAp\nF2N8xwDQUNljEeSp3HHVak/hVjJuGM4xQqhslSB9aaiMyhSNpBzSC4u47W3HZnkCmvtAzyT6\n07jfvKkmhVK5yNwb1piCTLcDjjhhxR5jMV35bPBakUMqHB+XoQaSPCNknCDv6UgANsLgcg05\ncbgW+UlelJtZs7h8v96l3lo8Y3nOAaACRfugjK0u5c7iM+lJuyoDDnO3Apx8sNt3gqvVgKAF\ndT5e4OMH+GkUMsQPUg8EdKZIoxlWwc0B9sYG7bk8HtTFYkDbcmNSXbjFIzHYFLYYfwj1pnz7\nSPut1H0pcnzkKoQMZ47mkMXzi0LKDhqc5+4F6Y5qPcTuyMMTyKasqxuQGwv0oCwSPvkxkgD1\noVUEZYHOOcetGAQxJ3D3o27QGBGD/DQIkjULGvYtyBTizIwYtgZphDjB2qw64pm5tu/HAPSg\nLErSbVyfm7g0kMgfnOCPzqL5fLBDZbPI707J+VgMH0700FizG25j/E/oeKFmCKcgkjk4/lVa\n4Pzcj5f60GR0wQwbPWmFifzduGK471J9oHlk87j0NVPOIVweRQ0bMfvADGcVIy6sijaCQDjJ\nqSO6ZMgHlhway9x6Hoe9O8zcuegXgUAX1mTYVBO4csRSyXQz8jFSOOlUVkUYBOGzz71G0m5m\nGeF6L6+9O4GuNQZ4cHBHrmkjkK5Z+h71lQ3RXDP82eBU0kp27Wkw2eRSA0Ptm7MYfJPJPal+\n0hVBJ+prOZ0C/Ly/pTPtDNGQpySeKANZZfMTdvzzQtx9oIH3cfyrLjkZl27tsgp8dwAxDEgn\nqtBJr+Yu4qXwx6D2pm75lIwQvGAay1vD5nAwvqeac1w3UHGelLqFjQN4itgLlvWk+07WDPkb\nu1Z8kwVCD+IpPlmUAkgjkHNAy4JGkLnJ9uakgmCcHkDn6Gs7zWB5fAHaplukVAVXaW7nvQM0\nvNZIzk4HY0faV2hWHUfrWcbr92xJJfpTVn8uP2Pb0pgaH2raTtABx0oa4O3cOTVA3QWQNnKg\nc+tNa4XdlQWU9RQNF+O8LdVxxww7UCbzlGTntn3qkLpOmMD+VOLHb8u3rS6gzS8zeDubJxji\nmrIvnHccGqEk3z7QQuO/rTGlEjEk4bt70xGl520tzt96PNCtjJ3Z4rPeb93zg9qeLwfecbGX\nsKALzSN5nXp19qk+0J5eM/Lnnms9rjcplLYB7UxbhUU72AJHHegDWW4Q5HUKetElykbKS3Df\nxVjGfy1HzYLdWp3nLIvzjK9Ae9BRrG6LOPm+VetPa6R2O09s1iedlB8xJU4xUxuFV+GIFMVz\nSVsEbW4PODR9vXyzHk5zWa053ZLfQ9qY0zSSHbgEdRSEan2kspCtg077SBtBbcay/tfvj1po\nn3SA/dFOwzVM+3G/rjIpVuBHHwfMJPP+FZbXTSKuSMoaY135kzYbahGNvv60hGo9wW3lTsCn\nG2j7YON528daypJisWXOee1K11H0J7dKBmnHeCVsAYpjXxVtv3snFZrXT7hs4UjGab56KpbB\nL9PpQI1AxZSB1z3pJpI4VV0XJ+tZkkzSRqTJgeq0xZ+M7SxBoQmagutm4svOM59KXzhKoff8\ntUFmWRm4w+M4pPM3KpUgY680hWLzXBbg8qB19advKnJbalZPnL5ZdW8z5qnjuGO4Trgt8wHa\ngouecysQ3Ab9aGlO08gDpVP7QssOJOAD+NMW4VmdVBx2DUyS7tVowCwA9qRLqINjt/OqEbFQ\nQ3VTxg0vmh2G9QgJ5z3+lMOhorMJGLHgdKRSywhWbvn61RjmULtB4J6+lI8wxjzMoO1Ai425\nsgLk/wB2iSZvMVjyAMbarvdZXG/y8Dk1Gsh+Ugja1MZYbEjcN7/SnRkbAAdxz3qEybZtvUeg\npGuPL3bQoHcmgdi0Gd2Iznj7opfOXbycY/Oq28eSJDnOcimTTEgsxGT3qQLDTFyCThT1pvlh\neWl2e3rVZpT3YdMjFMnkO4MTuyOtUIvtOskON+0jnPr7Uzzy0fAwvf2qktwGwu3JHOKdNIsj\nDA5PJIpXDUsRtIrodxGemKdJIryZY5XODiq7XGwFi2SOMGljkWNPmG4tyF96ALcNysQaPLFu\nw61Csgmm2EAcdc9KgkmWRcKNp6NUcTqsgQ9DxQOxe8wIvnLwudv1prSHaWXj1qoZgqtGykKp\n4yetK1wFUKByfXpQBaT93IC3OelOkkI4GN2eR6iqfnPJGP8AZPBp7S7s7g2GHUdaaAkkZY/l\nSPhutCr5Me1umcgZqGNgpVNxVfemPcFc4XcM1Ii+JPlJA5xxUazBjtzg9xVOO6ZpMKctjIpF\nl+YsxySevpVILFyRldgAuAO5pJ5yvO046VWjmRpmBPyAcE+tRrc7lcs3H92kBZ5fPzEp1H1p\nXbywrZBbrVOS5OQUG1cYPpTXmb7O+1dxB6mmM0PORm3dGIx1pfM8t+F3Pis5JMQocjd71LJM\n2SFbqOaBFkt5e5uWOMmmcbQOn8XJqs8w8sBWxmm7wAuST6tSAsvjqD1PNDXC7SSenFVF2Ruz\nGTvwKibc8hzz7Z7UxFyO6VHQbcsOjGpWuGeYs7Z9hVBZF3ZHyhfUUnnFSXxwRQM0rebdHIzH\nbjoT2qE3Cy25wec9emarGTco4prALtVjjd0HemBdjnHkiQHavbmo3vGZhg4HeqnGHBO1c420\nkkqIwJ5A4p9ALm7aSVb5e1NMnnycthMc49RVdZvldQflx/nFCbvL3N0AxikBNFNtzycfWlLZ\nb5iQvaq29VVQy8mhpHnXaCBimKxakmMsZ+bjpTOGwVLbcdPSoeA65l2gc4xQHLb1JK5ORSAd\n975duWp/mGSRC4zt6/SoWYblIbIHG6l+Zvut3607iZNvJYD7oznNI0x3HnAzzxUXmOSFxkr/\nACp2/bkkZU8ipY0h8jYYIp27jkU5mDHyxnIOT+FQb2ZE4+bPBpwfbk575aqGWlmyTt+7jFOZ\n0+QI2MD86pZMUeYxkMc4qfIKnKhcjAqAsWXk3NkHhaRJAVkkU4YDNVhtyQcg98UpbGVUcNwf\npTFYtu3mR796g9KimkMi4x8vQbfWoUO3PyZVenvUkbLKihchs5KntQJoSK4ZP3QGPWoVlIVk\nU7ucnNSSYaZnQE4HNRSP5cYIHzH9KRYb1ZhwfcUrsHkJB2IRgN2pc5kBGB8tRM25ecs1AhZG\nMgLDO5f4uhNJuLKqkY5zmiaRpEjLDHqB3pkjFVAwQGOPpTAQ7mJ4+XPUUH5VbB5PFJGpjdz2\nAxSbU5O7gj9aYh8fyyg9OMH0NN3Ku44JbPQ96au5vdgOlPDhG2kbi1BIjYRcgfeHTvSbtykb\nTu7qKXG0MpOcdBSSsNqkcYGM0FBIqnBJx3xQrbvm2+2BSGPd8ynJHUUnDcqdvqtJDHbQsZHU\n9fpTSTDjPpnApW+XJHCdvU0gbLqSM/WmIbMyGPCt85OcU9H81dzHaR0Y0nA3Njg96JEBG0n5\ncUE7C4Jxgnc3c0cK53g7R0J70M/yocdOp9qc2ZME8J1oHcarEvl27cUvl7mXcdrdcCkK7iST\nke1Krb48L8v9KChd398d+DS7yzEgZUCl3q2Nx46YPek2ncg9asVxNwbJbnjgUHEiqzLnHHNO\naNmUqME55INN3PIq4XC55qOohfvZIHzDpS7XZSfukYNI2/BKsoYdKNhZWBPQ+vWmMcpEjZL7\nF78Uu75SfyxTNrNtAxg9qcoBYq4w/b0pDBtp+UtkkcgUL90jrk0KvzAFcN3NACwh2ZsntTEO\nCgNnJJH8NLG7qp3qATzt9BSJIW2Z44yfelbd85J5I/SkIkRdvEYyW/WnKx3MpG1hUZYqqADj\nHGKYD1AOG680CLTM5jG4bTn86kVlWNe2etMDNtB++OPwpd4XoAyDqapFC/Mqkq+0gZ/CnMBt\nHfjdTY2DKxUDd3qZVJUcAjpmgRGFG1T1Ru3oan+XjjGPSm7gvAIY5xto3PGWDDCkimIcyYXd\n0+lBAyvGWpW+8gxkHrSSTIxBYEovBx1qgFCs33sgZ49acFbHqG5IHalSQlgU+4vTNKuYwQzY\nBPX1oFqKinaBwuOetKuQpYt8x/hpg2qOCpGep61I37zG7o3HSgRJHF5coVX3A9+1TN+7DjBd\ne+DVfcVbBPAOM0/zWVWBGf8AaoIHDbGvzfNkZGO1Iq7huXtyMmiFdq7cA99xpNq4Jzkt0qgJ\nPO8zDP1z970pW5Y5UhTzTcmNQQoBA7043G2EA9WPApgSq7hg/bofeljZnX5e5qBcsuQPlB/G\nnbyWGBgjmmBcjkRIm7MDTdu9SDjPU1WaTDbhkE8kU9Zd8TSZyWXgd6RI/adpVDhQenrVtZBI\nCBwMD5h2PpVSGQ/KwGMdc9qFkwzKG+UnOKBMtrK+1pMfP71bjkkkaMkckc1S87csfU84qx5m\nM9ucUx2NKGbaxKAYPTnmr0Yby8vwQM5rLhkWFgqYJPrzWrbq0zEE9RgUyTqPDzGbDbfkHH1r\noY8njGAPWsbw3CsakjkbcH61tuvVVk5XrUNjE2kLxgqeaXcB9aT+ELnrQq571mhodGNsuQ2B\ninjJG8nAzSYVV3dR3prANuyCPQUAxxIbkDb+NHIVyOMVGWeTavp3qWbHUdTxTuKw5Y9rhj83\npSeWAWYnHP40ozt3nAJ4prAMo5yelMBVXauIxkdadIx2iRQMnqKiBO7gkbaVcspDHgnlaBMk\nLKsgJbCY6etKrJjcv5UxmMb8gMac0gZvkXHrRcQeWSykjb/WgksuM8HtTmyyg9qQqvBYHYBQ\nUPVSuflwMUnmbUyy5NRq/wAwTnGeKmZmG7K7hjpVgC4ZQecN0FOYbQCvGD0piMBzt4AzThuO\n0njcc0AJu87dv45zmiPLfMT8vvTNo+bPPPSl2/MFHQUCsSbgFPGOaRlDKcfjTmUBcY/KmjK8\n7c+tG4D8lFyozTWlPAPyt3p/l7Y92cCmQqWU5GSe9VYkcT8wI+7/AHqljbdJuIymKhkbK7U6\nD2pPN3SFc8DsKAHqAigMOd3BqRgG2jOeeTTSflUbeM81FIyjIAJLdhQA/wCZWYr2/WjzG3bg\nO33aAo+YYK4XjNEcYKjdyMZNAXE3HIwME8/SpPmbcM8GmK/mYxwO1Sb1+7jnOakAkUFVAGDS\nKqKDu+lK0h3+tGxfLDZ3c8iqGEe2NTlQcc0gk3bgY8oeQe9DFWkGSF9KGwx3bSR+lIdxUURx\n7lG4jnmnYPlgkbSec0zIhU7V3E0RsGjCu/zHsO1MB+wqm49aTcNuD0PWhiyqQDlaAQevIoEh\nkIBjf58dsVKuNoGeMU2SJmYbQFWmLlsg+vUUyiRl2qqHrnOaQom1juzSMp2t83TnNM2tsODw\neaBWHqrsnBAHYUSAGPOCTSLJ8q8YY9D2oZzvIAye/pSFYcjd8YCinO25lIO0d6aJsKTtxnil\nZRJgfdUdTTGMeQq24KRmnTMxkHIAHPFOZ9uRgOpFIuPLOevYYpMEJ5nykqvJOKXHmLywTb1o\nQ7lTPy4PpSuw3MhH40XGPjbDcHcPakUk4J4wcgVHGfLm2xjKkdTU0agDLHBoERM3X+EE9amh\njZfmVsAj/JqNT95iCfQUJGzAurc9xQFh6ZCMAd3NPxtG3I96TeNgDcEdKa2C4IGT/epiEjK7\ngxBJzgVO4O7H8PXFQupY7s85+6KereZKcLgKOSTQFhI3w2WAAzxUu1QwDLy3IxUe9JMOwC8/\ndp/8fByewpkjW3E7Rzzj6U9VJTG3J7YobmTJG1h1pBGVO4ccUihI0ZpNoOCexqRAyyFE5+Xk\nVGwZI1ctk5pqkLGVOQ3amST+X5kgBXBHUCnxymaY5G0KMDioVYn7rHgU94ZSF/gB/iJpjHrF\nncckEH1pWySA33aRItxO1iTS4IZSxGOmKAHKF3bdmAecUskfII655oXC8YYuDkNTixkj3KO/\nSgVw81lXcBnnv6UrOzkZ6HnrSs5abBGFXqBTMK6nBxk8CmNEiMiyKzLznFPkHlsxBwO1Rthm\nBY4IFIrbmAbluvtTDcfu3KiuSecmgKquWV8qT92iVlbgDoOaBhVRunotMCXc0iFc4I6U394p\nDdexpVwikgZYmlkbPQYpiHq2YwQfmPOKFXaBn5nPSmMp2r2ZRSovG7r6UgsSySBVIUlJRT1J\nPAHJHLetRM53qMcHrTvn69FBoEPdQq5Y84xxUbMPLDMMjHHrUg+VW3Hk0kSo6tvyExxQII8S\nRgt1pisFYlTgnin8CRMHPal2/e2jBU80DH/MjKO9LH80j/ypMbpFJbAxSbgshGetPoBJH824\nsCDjmmrnOACfY9KVn5IJ2jHFSBizLs4HfikNE8KgsOclRuwa1FGVDE455FUbbb5jMR8pGMd6\n0I2LLgY2jpWUjWJcjA8sIOSDk1dizI3oo9arwRMY/M6Z4qVmGzJGOwrI3H/MuVbrnpUrMQV9\nO9NjUNgNy+OtKzgLz97NMY8x7VwvzufemvGfL6jPcU7crK2BkUyRWkCYH1pCtYqvlY2IPOap\nXTOy8nKf3atXIIywbjPSs+7CRyblJBxW8TKbK0zBuBwemajfcqqNw29DQMltzZ5pzbFUMeVN\nbxOZiRyHzAvTnnHpUjyFWePHzj8qZGwzvA6dqcWWRi2dr+lbkMSPEm44JY1N8rHYVI4HzVGs\nhjzjgEfexTlkxkL97FUQM6MSSNq9KXd82PXkGlj2qMY+c8nPao2jLNlj09O9UgJo12/vCMc9\nM1O6lmyvIxmoo2WRTIy42jinbs7RnG7k0IQNmNWKnaTRHhYyrAq3v3oVRg5U5zQq7t2PugZ+\nlNEiLJ8pbbnHGfekbdGMhuGHpUyOxgVUx1ycjrUewSK2c9eKQitMqyR5IwfWmDY3B5HbtU2w\nLHw+DmmSNv5AGR2pgHlt6rRUfm/9MjRSA+RoVM1w5IwtPDMFKnkA8+9LtaOHKfMC3epPu5yP\nlI9a+cPVGMpkdF27UP8ADTeZVY4Kqpxj2qSRfmU5GQOoo3HcWB2vjG2jUY1tihVwWXqD2pWx\nEoWJevU0KzSNtdTt9SOKWWN41UYABPHNArERhI+dXIP93tQCWX5FO496mZdzHBbaPbvTI33b\nto2nHTNAhSpKHd8p65zQ2fKjDkA9Ac800f6sAvyfWnrtkUMyYkTotAxjBlYP8wReODT2ZxIv\nzbgey9aJVYfOpyW5KntSJhmBUY7GjzAMsEYsv157UrL5aqmMkHp60jFHjZCpYDoBSNIPKXa5\nVxj5SP60wE6ZXG05zinffJkzlF64/nUkkm/GSM4zlahaMMoRDs8wZOO9IQ1Yxy5GVbpSJJty\nQmF9+uakjVIyqMeBxilMaq3yqWbPAJpiBpG3bypJx97pTFYRqGJ2s1KzLnYGOc8+1I37yQMF\n3KKAHEeX824MT+NEgDR7Zl3fTqKasZZiygAZ5an5LZ2KysBw9SAmzZJsYBQRgCiOPafl+QA4\nz3pLclskgF+9PLKu3KnHr3qhojA3MzEb1z0J5yKVlOD/AHeuKV1C8gbXJ+6etB2rKQHxxyMd\nKBCLOWwjjanbHrT4f9YysC3Gd3b6UxotpMko+TO0bae3yxhMkp6CgoY2IkO7cyH+6OQakKmP\naV+cEc/SliBXkIQv1/WmJhHZ9nscmlqSw3Fc7D2xkCkO0sfnxnqw9afCrLISp2gDJA9KXjGO\nNud1IBmV42puLHofWmyI0KkA855UVYKhssD8pHGKjXlWIwMetUUMgwoB3bnPQnt7V6T8P3W4\nZW7MM/TtXm0anyzjv1+teifDeMKqIGBdvnznoPSsqmxrT3OzvomVd27C5OB71zF6vmRSKpyz\nciuxvV8yNm24HvXJ6gu2c4IWsDpOcnjUKwfOQMGsdrFfMTBKk8AN0robyJAzllwT3FZM2PMG\n4/KpxVohmJNA0Z+YZOcYHrUDNtny7EE8bcVp6jGqwlSWUltuBz9DVGZVQxsxBIGDVLcWxn3S\n/vm2n5QOGqGPDIBu3se7dqty/wCsOVxH61Du8xhhVCj0HatBET7YVLOpJPGar4JcbcHb2NWH\nRNzbjlD0qvKNj4XhgOKVxkSq+5in0z6VG+3aVBxJ3qXzS1ucx7PQ0xNssqMwGVHQUw1II/lz\ntXI7mmNuIwARz36U9ZPmdVG7cTgelSOsy/u9oyFzweDVAQSwn5SWG3sR1zTVUb8HkgYGKRcn\nBXp39qP9VKGbLHGPagLi4CyHI3OB2qHbJ8r8MCe9TKp2kBg46hRUePMb5jz3X0pARSyESMpG\ncnGKc0jKvzcLmpCoKcY+U9TUfVj/AB7ec9qYrBCx+bIGM9fWk29CpxzyPanr+7UhuS3ORTFZ\nlPzncen4UxjpE8tt6tt3fLj1pGjZNrZyT1FDSCQl+oX5QB/OjA3BXO4H34oGNYYJ5AHb1ppb\ndjv3HHSl3BmOcenvTmUR4wcHoD2oAac7txBKnrRIwkUA5TaetIfkbaTknq3rQjAcO2Tnjjig\nQjEmPzMZAPaov+WpJUqxHJzU+4AMjsME/eBqPaFXb94g5LdsUhiKFwGVsrnHFIzFSWxuwalW\nSNGcJwrcBfT3qLy8n52yem0fzpki5Dckcnr7CmgBG2ooABzk9KczCMfdy3ShmZVBKdaB9BvC\nyZEgKt1oRdoY8stLv27VC/pSndzt6d6CRqq0uSF/+tTGbyXG08ngipVkIwoBx601lVt2Bt9c\n0DuN4ZQU69CtEjFlVdgAzzSOwbbtG05pzYDggbvUUAN8kfKEHGc5z0oyqxsRz2ojy0mFBAH4\nZpVzknbgf1pMLkcY3QlAcjOcelOkVtyrjJzkmh93mfOvlnHIpVdmGR8v1pgRLjc2BtAPNK0m\n35gfl9qc30BU8NTFjRYyADjPUUCHM2xv76n+GnbkcAquD0I9KZvViABuA6+tNaPCkKMBjnOf\n0oAGyQdgAHQj+tKHVVwckUzIVsFcn9KNu3jnHWpFcVsyLyAq98GjbhQwPSnMobAVcNSZBkY9\n17dqoBrN5ihAMnPJpXULjaoUZ5ahQshDMdhznP8ASmtuVSWTKlulAx3EmUI3LnNLs3ZKZLDp\nupWk2nO3pwAKNx5ycjHSmAn3uJF59QaY0gkfbwdowKZGyjGQduKe0YWMME70gGspbeGG1cfd\npyt91dhAA9aQEjGTuGcU5Qy7hkBj2zQITb5zSAOBj+LtSfMsQ54zzSj9wpVRjcfmoUDdgHOB\n9z+tAxWfMvH5Gm7UXOPmJ70pyzZ4U45pIkYr82FFAxWbaVTAIPf0p29WZwudwGPrSK3zbI3B\nLdmpili27aMjj/69IB3+sUbVLkDoDyKFzjduyAKapZZF2/KM/NTo1Ls4XJfnmmIRQ23LEp3I\npBGjKecgnvQjM0OGGMU5wflwMLjpQAixspP7wKcYOaNvkoMnJ9aG2sRuXr3NKyhSuD83QCgB\nv3TuDH5qlMYSI7cAe/c0xmEfUcA8sKRl2oCPmy2aAAHa3zfPx27Uv3PU4FJK23BAxk04qGHJ\nxn9KBgvEj8/e6A9qaG6puII5pbjKruIw2ePU0mV3B2G0Y5oEKnmbg4CsDwPakYuGZdm49OaD\niFnXPLDK+lLtby1ZjlPb1pDEVV6lvpQ5LL8q7VPHNKTuXnk0L8w+YZ9yaZKGbQzAnIbGKVU+\nUsSvfC5pVjPzZU57U2RNq+Y6YOOKB3Gncqrhcdiae2V27eWx1oXfxnkY6GmyK0fIzn9KpDFd\niY8nkd6JCjxhdmCeaRmO3GcetOYnIGPlxikBGrHGFUPxyM0HbKucbDjnd2qRPlZiY8LjGKbH\n+7U7BnPBzSERljtRMc57VLu6KTt5pq9dsabWHNP2lYyXGWbpS1ASQMwLYyB396bK2Y0JOHzz\nj0pyRhlCl+h5xTWyu4df8KYDP3bxNt+5nj60rKpUYfLZHWh48INnIzyKSRNuN3IBzkelPoNE\njfNI68FB/OmhSFx1Pp6Uu5VDMRhX5FM+5hjyfakIVpJGABHA4p2d33jgGmwtuDjnd1AxSCMF\nDlsMOaAHMWjXI6Z6UnJBYnnsaF+ZvmG1scN2NJynUZ9qAF85urfNnjFOVtuN2eD92k3BuqjI\nobJTKnBJ6+tAxWYGQgHn+lAkYfunbgcqfSoz8p2hee59KR/lZVzlSfvUAT8soUvjmlMm1guO\nOxpjYbcMg9h703b/AAgcYzuoEO+UgpuwSetAkeNgrH5elMVk47KOtG5GU55A5FAybgqRjil8\n4Hag+XFQSMeB3PIx6UrN821gduPvUgZKrckk59jSRzDJJJJz19BTWiUYffuPTHtTWbhztwvp\nTESLMxwc4+bI9cUQPtL5G7nNVpM7VBB3H+VSxsDlC4Xb60DHyMWxuBA7DNO3bpF2LlgKiEJl\nyd2707VGzMqgFMN35oET72Z8luM1J5nm7sjlRnr1qsUEny7stjmk3r8oB3HOAKZXQtLMqqpG\nWU8k+lL5gbLlwyqM4zzVfcPMKscD+9ULKqozqN2TgjvSEWWlaZXGOf4celL5mxdp7dOarLmE\nOf71N3Mq5Azk5OafQC8swjbb13foaRZBDlHJZc5Le9QLIUHzKNvZs0370fynvQIsecp+cChZ\nhIuXwM8CqjrmPhsn0FLwoUHv0FDAttIkYCeZk0zzlTaxO49jVbcOrY9KWHldvHy9KQyx5xjk\n5XczflSbi8mehpkZ434yfelbc5yOM/pQBIsjLERnAB4pftG6R1B+RRy3vUHzLhSeKYpPmPnh\nW70E2LHneWxcnODgYpFkDNhkzUacK2Bk9aVmfISX5e4YUgJlBycr8h7U7zD26VXV9rspO4Yz\nmmtMR8iKSc9RRqMnLKz4JLHripJCVUcjPUGqjSCNyCOcZ3UnK7CpwWHFMdkTSMcYVc7jlmpz\nyBmTByB0zUPnHaQxx2+tDSBSpxlOmKBWJ5nMar0Lg8gUm8MzFh9Krc7Sdxzng0b92D97HamS\nT8BSW+tCny2A+Zi3PWo/MJzt+Un1pPuNy26QD1pjJWnbcSh5HWntMihip35HXHSq/mCYjBCe\nuKWRgq/Jwi98daAJ3mEzbiSCF6dKRplRcnk5xiq7N53QbgR6UjyFhjgLngUuoE+5CwABDnrU\nPnFl5+4Dge9JJKy4JxjpSFuQMcdcUwJGmwwAG1jUjyKg+bgY61WaYyAYwMetKxRskDkHGKAJ\nVlAwSuc9KVpGWRV6561AZMY2Kdq0zncQW5PO6jYRZdip+/xmk8wu3Jyo6VWGFwHbAz1NO4bh\nGJAOQfWkPUlmkLADv1yadDJ56cHLD1NQ/MzMzfKKa22NgCpZTydtAFhX3sQjnHc0NMzYduV6\nECoFUK2f4e9Ku1jkNtB7UFEyuC3zuQCeB7UskgaQEfTrTWQLhR8zn0PShm8uPgBiOTQSxzPu\ndR0PoKGYL5mT82OlQZaVfMUYB6ZNJG374KwOMcn0qhkoY7FDfjgUI0fPOSvqKZw25e45Bpcb\nVLovB6g1PUB5bap2jO45z6U2QM0hywCsOSO1MZfl6YPp2pCo8oMBtPQrTESKx2hccdN3rSmT\n5Tu6dM0xsO2S3y44xTGf9wcD5ge1ICWH+M9EXpnvQrPIhY9P7tNblFJ5pxk8phuJA7H1o6k9\nRrsvyfuz1xzSs2ZC2AOMcUjTeZIVxjjNNUL1b7uOnfNUA7DKoJwzelEm7K5/OmKo2lh97sKV\nD5iEj8j60DBVHnEFsMRzjvTliQLhiS/b2FRqD0IyehpVcAAFSTnGAeaA6gfUchT60gYZ+5vD\ndqVctkAfL/eNORQUGz/vqmAir8vHUGlZt/yk4GO1NUL84PB9aUnzFX5eR+tIBJVKoo70nTbk\n/NnIFKMsvA+YnHNCqA4YjLCgY5cqxC8Fj3pFwW57HBWhe+e9DRmNCOueQ1ACcqzIuADzg9Kc\nqhWG3GT2oYgAll5FRsMMCDgmgRKrAyNz06g/ypu8om0KQGOTnsKV2Z1GQMqeaPuyFmbcMZNI\nLCowkkJAwBxigY2geXkE8461FGTtLdjz+FP3SSRDHygnFMCQbVUg8E9MdqXc0e1W5Ibj6U2N\nTHEEOMqce9PfDyEnGKOgiRnaQsRtApG3LnPVaTanLg4zxims27jeQSagdiVZGWMOoz60CXfg\n4xz1FMUiRsA8DrmjcQFyPlb+7VajFEzbShG3ufeo0+dW7EDPtTj8rFWXYwGAvc1HJvjJ8s/j\nUjHRyMWYkfKOM01cFs53beadKQ7OqthsZxUXBjAztPdaoBdu1hGw3SMcgZ6CkMp+YH5kHtRu\nVWDHLenrTQzTNg9z1pkiKx288KRSNH8qbOT3FKzFkxjABxSKoVvvYz3/AKUAKu9ZMng9B709\nGIbcAAelMOWXazeWC1JtU/KjZwe9AhY38wHcfm7k0NJ+5ICbucCnbcKThT6r3piM2w4+Vuoo\nGCqY49uf3n8hSs4ZiwyoIwRQMt97qOSaNy5HG6gXUV15XjIxTQDwGABznNHmFZgrKSPSiVGj\nJB5J5GO1MLDk2I2NpdSc49KRWG1l2GM54LdKan3fk+9nnNO3K3zg5/2aADa0jgMdp9R0pdkk\nYIY7m/vUpY71MgxkZpNxbc+fl9KQxuWKkj5m9KQsPvA4BHSljYDDA7T3p3lBgzk/KO1FgEAG\nwBvXOKRR8u0Zx25obYuAXLMemKIodv3zg9qBD4k8uQnqGGKVQdoj3YUUxWPlksSMcDinNGWO\nEYDaehoExq/MSQNxzT/u7mPBJximMJB04JPQdqcxEy46MOCR0oGKyrx8+cdqdh5GIJBC87aY\nSjSbGG7jtTost82QAvamhIFLSHLDj27UICYyeCc/dNKhYK56jNJ5ZZuhDYpFD/LBIYcEe9L9\n488luBTZMNDgKQe7VITuVVHBAyRTJGBPLXaNxf07UoZdxT7pxQdyyLt6H+GlKqjFW55zj0pj\nHI5+Qg7TT96tGQR369qawJk+Ucqcbu2KdvQAoF3RdcL1zSJHRttjAHNTK6AKF6Hv79qqxy+Z\nIwVdm0fdp8LHAyvyE/r2pgWNpaYjIwRj8aRcnCnLBTjNIqg5APz57U7ajHC5XsfrVAOVmCtl\ngApz60MreYZE5T+VDLvGFO1+n/16dLh/4s45OOMmgB6thiDzuGRTs7Vw65phZXxxgjtTwy7W\nyDuoADDthDZBJP3aegcZHRcZqJWTrg4pQpRmBbJI4FMgljzsIZwR1G6iTMbYxk8Hg8U3aI1x\n94E8g1IzMFK/dPYYoAJGOGAwDnHWmgMBtZsYORijzN25ioJ6inDCqolbLHkL6UxEpwqFi25e\nuT/KnNNuZSFyMZqBShkYE4jKng+tSwyDy49uMZxu/pTQyR5PmT+5QWjSMnoSeG9KY3+sfzE2\ngdKbvDKQVx3piZK2ZGXfyPSn7fMkVsbWX+lU4mHLBsKvUVayqsdvzg9TTESbhJ984Geq0+Pr\nhl47MKhXy1g+XcMetO8tpFU43E9ge1IRNtKxhBkkHNWFkfIXK5PXNVowqsMNtI7k9adls7if\n0oGa1q3GXUGQHjFbFmxjwxHDH8qw7Wb5cnBOOPat3S2EnlLnJzk0XJZ3OiwiK1DFT81aDMFy\ndvJqG2P+jJkYA5qbJZRvP0FQxEfMSgn9KkSPgkHaTzimrukkwBkDrUu3uT81SUNj3MxwPlA5\nxTyhZVC8Ec01V2gLnIHWnyNlxjjikSDYC5HHrTVZZSVYbe4NO5VSSwI9KaVDhXIwKYAmVYdw\ne1SK6KCuPmPSo5l+dSvFKy/I3c4yMUASFRHGTnLU0MVX5xhiOtL0xkgkc0isTJubJXGaNQGN\nIWwMfL+tSR/PhcYUd6i2Lu4OWPNTqrcbcZFMQBfQ96cwLLtBz3oVgzPx0PJpFP7zGcHFUhCO\nu5U2jOOppx3rg7vvcU35ZMjOAKV5FbaFHOOaBoUZXAHQcZHenswjYcndjpSLIEUjbg0jNmMP\n3oELudk3Ecg8U/78eQ3zHqKYqnqeAe9IeWBXkZ60xkqv5a7Wxv60u/cAR19KjYBpGyMnFM3s\n8igHauMUCsyVd/U/lTgqmMkNh6YuEYjOTRwqnPU1QaiIQQVJ/GnN1JUD3xUaofLJxz2FOhDS\nAgIzLnkjvTGPVuBj5if4aJMcDuKlSJyzeWuOOB6UxY2YvlcMpxS5kVyiBG2k7uMdKXIjXavP\nGKkEflx/MOe9I0DMMjp6UXDlYwArhmXA9Ke21lyODVlbOaRlHBUjI9qX7C/Py5A5pE2ZTYna\nCQB60rAt3wPSrFvbmV8EbuelTzafsYHOSaLi5WZ/lhm459aagJUquQffpWounpIpDZU+1M/s\n3yeQ3B/Oi5XKZ23bgs2V70+RlXIReat/YGkjJUfd/lUkWluTuDfe6ZpcwcpRXLYOeMZxRksR\nlOc8Vbm0uSOQkHcDxwaRrGRecfNijmGkVgrpIys2F60i7VyFbLGrP2KWQKCDu9akfTn7JhhT\n5hWKDAohPUd6I5AqhM/MfWrf2OXytpX5s0g0+Vs/LhscZpc1x2KrdODnmnrG24hVyxHT1oWJ\noRtcZb+7jmnBmJAKt16UyeUjKgkAHLDqPSpThu5x3FNmjaKZnbJz6UyOTj5Q2aYrEi7o22/K\nBR5QI3P69c01vkj3N1JpAz7Srcg0yiXhcnO4e1DkrCXYbm/u1HtUxjHAobbIvAPy+p60APEa\neWA3J+8COMU/auADnceah84NGzE89Kf5z70AIHGaROo+Ftisr5JJ49qjDnzMeZtH0p6y5l6A\nse1L13NjK9OaAHeWjFjnJXpSbTGuOpP50MQpyB1pjEu3HHHUmqGSK2Y9qNk5ywpwZCrImOOu\nRUS7uFI4xnIpWDMwyfmxzQDHrEWUscY7L3p8eHU76auVYL/HQsbMxA5Hegiw/wC9ECq5wcZp\nGZtuGbPpinqHwAD8ntTN22Q4FAh+AIxk7snt2pgJ3qSMmpIpBtCquc/pSY3EKp3OBxTKQoZe\nVHDNSsjFRufOO1AVW2sflZeTRt7k7CaqxIokUA7fvA1IuPMUhefegqpk2oQzAdaTdwozznmg\nZIz7ScEM/U/SmJh8c8ZyaVtkjfKOc8kUHEYYKMdqLBYlL/NuC8Z596JDvkPljaeuKbCu6P5m\n59KI/lzhWzjFACszbgAm7PU1L8vnfdxximoGzvbkAfdHWkjxjJUg9aYIXBVjkDPpTlZTkMCT\n60EYwxb8aVl2sP7p5plWFXgcHJoXdKp4wRRs2vu6A9D2pQ23LMcHoAOaBMFZghyNzDrTv9Yi\nY+XmmLG/3hxmlUc7nPToBQSSum3kH2zTY2k8k7hlQeTT1PB4yetC7W+YLhu+aAsLuDEADHqa\nczEBlP3T0qKMt8+RwP4qdu3bS/TNAWEjUq21fzqZZCxfJ7YprkmQ7F46inRjHUcmgA3BVUbc\nkDJqT5ZOCMMcYzUQDJJtzz71OjeeTt42jrQAJEZNwJGV70+Iv8seMYPJpqxvGuCvJ5qSNnM2\nWAxikxrctQRGNwccE/erUiUbjx8g54qlbxiRRuyo7Vpwx/u9vb2rKRvEsbjuGx8KvapGmZQp\nZd3OeBTBHn7oHTvVhV2qox0rI1E+bcO2eaeNrMN3HuelEeRIGc5zxto2fMcjgHpTKEDKuV28\n0yZmRFwcCpo1BUnrnio5pEaMKUyV6+lBJnTSn5iRleprOl3by3A4+Wrt1s3OqnAbnAqjIvIU\nrk4xXTEwk7kRZpME/wAPU0m4hgFGcnpSrho/kI25IP4Uh+bnv6itUYXYgmDKVOOtKI3VTtxn\nGBUahVb5CMGpYnZgwNaIlj9rR4beAuMFaZgqquORmo2w2MEjtg1YZSy4zha0M2JKfM27R1pN\n37vA6g8/SmmTbhF+almIRwRyuKpCuS7l2gdyaViGbaeD2xTNizKMcYFJCPmBP3loAljkffg9\nqc8jMenlp396b5nO5hjvSltxVyMCncQ6ORnbsFxxxSndGRzxUo/d4YkA45WolUuGcnIHNMkg\nlQhWJGajkj+VTnDVajfKgYzuqCeETBQTjPemA3zX9KKi+wN/z3/SigD5RVfl2yDA6kUxVPb1\n71ZZB3cFfQUyaMlchdy91r5s9Uj5U5B2t/SkWOTGVdduckjk1LLHu2hQAcdfSgRn92pwCwwR\nSBjZN0mCW49fSiNQ3LHzMd6kWN/uKoYCoNojkHzZVzwaQMNvUkHbnpmn7SuAyCUjnAppUxzN\nGPmHrT2d3IwduOppoSG/8tGLABsdah2NGu/OecFKsNtkbDLgt0am+TtZeeelAyPY8dxt4PGD\n7GhlG1AziNs8sKmXDLyMlTgn1pjRpCPm+U9mpgKV2ZZRuOcE9jSNhlB8vjpilIU4MT5Veo9a\nbub7zZI6CgYjSOMKYwFHSlRQFUnlQOTStCdo25z1oQFMjqW6D3pCIlR/MOANmOM+lPWMeWCz\nYCnO09afDGQvzZw3P0prKUYAc5/iPXFITHbWWMsUG9ufXimbmVBs4TvToj5ZYKM+xoVV8wcl\nlPVD2NDAauyMlQpDHnFEhaRRtwpXkin7TyOrKcge1G6MfMUKq38VMQ0qcptPB5zQQPM2kkZ7\nntUgZduxTnmmyQnzAn3ge9MYzdtYhxkL0I703zTLuCrsLDBz1AqzGFwd33V496reW7u8kq7T\n0Uj0oFbUVhJGNin5O1OVXjIBfc+KQMCoDHgdzSvtdlAfB6hqBjlk3SBXbaG4IFRLGdjRhuFb\nvViSHzEyOGzmm9Ae5PtQIb5A3bwdrYwB60i/IvI3MflPtUjI6qoZdytzu6GjZ8y5wO9Sh2Bh\n5ewDlhxt7GhjHDkEbmbsKU45KDgnHvTmyuAoyq9T71QyJSrOcEoV5IxXc/DGULckcHexwe+K\n4PyzJnb1fg89K6v4fzNDfABgdh69OfSs5q6NIbnrN5G3lsoOTjPtXI6oFL/MPmxnIrsriN5L\ndCOCVzlf5Vy+q25HPGcc1zHSc3dfvI9yt7YNY06tIz4wRGcVszYjLgcjrWNM3lNIU6vzxWkR\nGfdZ3bsn1xnpWdNGTb5H3M9K1ZNkyBuh6HNUJowsjBuF7Dsa0EU5FdDuJAXHQ1XmZFwUO1m4\nNWJH2szFf3gHANQrtMbfd5ONvpVILEErBcR4DFurVHK5LIpG3I5IqaWKOL5cFiOcmq8rCNlK\nx52jk02IWQMclxtVRgcZqpIxysgG3tx1qeQkLgHhucUnLYw4zjlcUBsQ7m8wOmPQg00xja53\nlx2PvT1T90yKPmByWpJFWR8Ac4y1AyFsYXPDd8d6ZCRuKvllPY1LMpVg6chhioeG2fNlh6UC\nHKpRiqcKO9JxG2ZF+bqG7UbWLPnIXqV9aayq23PIHPHagY2TDNlvuk0jY5UcA9NtSbRtw43K\nTlcUisRKzAZXGCtMCNiFZSw+Xofak2CMMR/+sUECSMLz1okXG1T8vPNAhqx7QVDcZz/wGkeM\nNk5G0cipFYMQoGwD5cGo2VtxUR7ueMGlcBCyxLk/Nu6n0pWVFYEFjTlG5sEAHpikaR2Uoy52\nn7y1QXG7g20EHeRkUq5UhmXA/u03AVV3fMM+tPZpNpAGeeFpBqQrjcwbjnjikVRGrAlmZqJH\nAZlMZH94+hqRcsM/wg4o1AYX+8vl5JHSmt8+xl4IGNop8jDdlW4PAxSK5jdXxz0xTAZ0YDYT\nk9aVpRN8i/LxStndkEg5JxTWYcHGO26gQvRgGPIpNw3Njg/0p25mlXgI2PvZpFbczOWHXGaB\n3FWTaoPLL16UxmAYljwwp7ZCsqnJNIxLKc9MYzjpSEM4CjrwetHRiSc98etC5WL5sFf7xo8v\nqGHXpTAVJGuJA/OWH3cU5ZPlwTgA9u1Rs5VVGNmOAKcxaHBIyOpxUsBNzFeF3rnnPWmgD5up\nb+E+lEjh/mT5S1KGKrk9PSqEN3MqlWG5vUUMzx4XCkHnGaftG3J5J5xUQVeSQVIHB65oAduV\nn3A7AKZGypEcq2Wbj296Gxuww4z1p25fLLH/AFfTd3oEJy3AGex/xprbx23Y44o4bbg7GzgU\nK3lswDYfptosMMyO2Tyo7UmwhGJbAP6Uxd0anc2FPIPvUjYXGRvHX/69AC7h5e0cHGMn+dIE\naOH72FPAPqaVRuywx70xY41ycHPqTTGOXMLLu+bsTSLyFBOMnNKqHcDuLAHPPSiVgQGAG3pi\ngQ37sjgr8uPu0fewSSCo4pWA+Rh82RxUjMHmxgEheVHekBDw7B84XNIwCszbcmnYIG5hhf7o\npqkCTOTjt9aAHLgsD7dDSIg84vgDIxycUcn5pAPehvvb2+YdAaBBt+bAXC9eaGkIU5O5uuB6\nUmAq4yWbrTtpRiB+tAxA3mRkhQB6nrRgKpBbnGRihsoxB6Y6UyNWRgVXcCOlACw42jjNPXKq\n2G247imhTtYk4Oc8dval8xdvt05pAJHucZYZUijaZVAzyBnBof8AdKFDYyeKGR9pcHheaADC\nmJA4IYjP0o8wyLvx8w4H+NIxPyhWwpHf09KcduQuOO1MBWQxqGY5H1prNj5jgEdOaa+WG3BG\nDQxO3PXnigBFb5N7Ek9hinFh95lwMc0ct2we/PFIrKPvZPseaAHNJ5ignk9qQuSwJ5Ujv60r\nN0KrhKTeWy2Mrjp70DJNrTfNnaQMYNRSM6wrGBkZzTlaThmFNVtrZzle1AhVJkYhThhTTvZg\nUxx1U0uMcFevU0MqsoUjaR0WgBGULypMjtwW9KXy9vPmZ7YNDP8AxeoxgetLtPBIHTmgVhGU\nswViSuc5okDtja2EzjmmK2GAO4n9Keu7zTkBlI4HoaaGJtEZIxk+tK0Zj2EncByRQ0Y8vAPT\nrimNgtkBvSgB8jfLnOQTQW3S4HSmFm3bFHy9OlJjyx0wOnvSGSFz5ZAHzg9aVvmUEDJprKfM\nXPA9aG4BK8gHGaBCbe4/iPSk3Krbtm7tTVduXU9Dg05m4AI4JzigYisVZiq7g1Kp8nAwG3dv\nSl80M3Qhs4zTZIXZSANnOc0DHSdgQDj+Gk3hpAAMcZpdpbB6q3X1ppxx2GOKBCszMpYcYP3h\nR0UKTlmIOafu6IV+XqcCmDIlJQjPuOlLqIX725CDjOQ1NKvja5HrmlZmwG5Yk4NN54bktimA\nihZMdfTNLwSFU71Q9aVHfcwHAxzTIdojIC7VPGe9AxyNlJDjDnvSYLAKo4I5pxjTaArNxwfe\nmNIu0ASY7FRQDFjB65yBxt7ihXETHA3HFNz5ZGxsY55pWUnJGQeufrQT1HDa2Aw4NNXDyN8u\n0KOMdDQsnRQMhRndScyNz9724FMYu5cFdvJHWnYE0IXP3e9N2n1x600qWiABIBakMVcMxzwA\nOfSmqzNkFSV7LT+S23sKYMbsEH60AO8stEpGSue/agjqoG09iaRRhmQE7ME/ShSSikc8dTVI\nAkULGZeQvQ4NKnzMGIK4H3SaGUEAA7fWkYfKAW+XPNSOw5d3mHHGaRGj3FeR9fWlYAHAGPSi\nNvmZWTkDINAhiqVzlsvnGfahVKsVTp1pFc8MRn2oVxgEcZoEJ5gQhiuBnmnFlbcQdp9/6UnM\njMMgAjjd0pF3MoUjcw4oAXyl6Zz6c05v7qdeM5pN5G0gewIpH3bWLfeHp1ph0F2FVYBefWm7\nyPmI3dqVt0bKd2QQDilyV5IGSeg7U2JAsYbgj3OKSSIuoIXAzxSlvvHkdhRIWaROSFApdBkh\nCqy545pnKs/ynfnp7UoUMpG78aRWZWG1s+5pAIsZ8wEk/nT2VwpK8r1xTVT94xb8KczHAIPy\nrz/9akAwr++JJChun0p25txGO2AaYSW2KRvOM8dqdu/eYXoOcGmA0/MvIyehxSgGMcNyeTjt\nSbuvFIyhYPlBDHqTQAsmdoB5BP40se+RmBwoHAOf5Uhb5R1GB1NDfcAOHJ5FMA3lUygyo4O4\ndTS72/3eOaR8smGPPU02NU25L/h6UhC+YV6D5af9wA46c8UwqpUHOQDT1X5m2fMPQ0CB2Myg\nng0zdtGe386OWyPuDFN3+WAQu4HgVVx2HbgvBwuR0xQs21VU9M/nTZPmw2evFOfLL8wXIGBz\nRcAYlSSDgdRSMS7DeQB2xTtwXBJ5xjGKYo4PUnGBQUM3OykYDc4GTUoZcAANuHB9BTcZYDad\nwHNKu5SVwBnrzSEPCiQNkYHrUYZRvdV+opwb5QqnI703/loTnA70yQZwuQpyKXDxgElcHt6U\nYVs4PDH7tHkhRhRkD1ouMa2PMG8hvRuwpzsFl4XjHX3oZSGePP3D6cZoXfGrEKDk9KQw2lXU\nbv3nUjrxSFisZVRt5yBRH8vfaeoFEjFl+U5PU8UC6jmHzZyBxTdoGPrQ5WTBUEkDlaVcFmYn\nAxSGDZOSB3ocN2Xju1LvVIx3Bpg83yW/iXP3RQAsmzYDg7c9PelYhckn52449aMbo1O3BI6N\nSBiqhs8N92qQhfMZl3MMkDBoZiOGyQe1KzZYnpnjPvSY2qCX3HOMVIeYMxZQFbGOgpu6Qt+8\nx6cU5mQE9h09803/AJZ5wWIPSgYvEa8A7u1OMjyKGVecc01m3MD9zHrQAZl8wtsQH86ABvlK\nlm28cfjSrukQHeA467qFwuQw3A9M9qQ48t8cds1RI5IzhhxjGfrSJHu2g9W6e1IUX5T1IFOa\nFNu52yP9ntTATHAbPQ4pV2qTwcnnFMX7vBz6Zp0jBZFcjhaAEyBtIBBYUqsvpnb1NJt2kENl\nWPB9Palk2+cRtK8c88UAL5w3nufpjApgl2qSvSmdOFG7Pf0pdqsRs4I7etAD9xddoAPvSqfL\n3KeD/e60L/rgMY9QOlA8xGYhcjpk0AJtI6thqaqkzEhsA06NgvyilZgHwKBhuUrzy1Cphdrt\nheuaFIk3Dv70jRiMAnO7saQC7flIPBJzSNhueu3kUFd3zYx6f404sfM3CPK7eo/nTGxHZWBC\nDBz3oZui4yDxSIwXJAJJ5ojz8zYwOwpXAVmePAQbgwxj0p8ZZWC439vpUS7zECvyvSyZh285\nb2oJYqvukckcjgZ7+9OXP8PHZs01mIZVzl8Z46VIq+Y2xuB1JFMAWNGkDcrt4AJ60pG5s9Pa\nkjk25wu89qNpz8wwWHBzSC4qsEwAPzpSwBVWfAHoKYCV2r/FnHNLI3mbvl2qDgt6mgaFxuYD\nqM/lSMRjaDhF4+tEe5JAAPl7j1pioWXOMgn7tAhx2sWI4bqcntUUj7W/lipXUNnptAqGTcqg\nA5HUChAPILYOAOOf60bt5wfkP8I9qRmEhxjaSPwpnDYycN29aYh0hMkOenOKa2FCnGNvSnDd\nJ8revFKI1l749fagQjMGYr78UB9jHcuT60gUtIT2pWysJBw3P3qBgrIqlm4bs1JzuVi2B6Gk\nWMqu0r19aVmzjzMDHApDEkyykg4IORS+YzSKWAVu5ofGcHBUc5pRh++T1DGhCFYFZd2dxHQ0\nxWdpcHqe/agR8MGOGzR1AVOSKYhdhVWcPnb1FJ5Yxt9Rk4pdwXgYz3oVWG0GgEDDgJ7YGaY2\nVj5XOD0p/d5T0XhV96QNuZs4znmgocZNzbgBs9Mc5oYHucL1z/Skb7oI7GhlO4A8r1xQIFb5\nt2Qo6DilEpkyRjevr3pF2sp/rS9X3EZ4x0oYC7lbOWJ5xj0p7c5HHzdKjZggwRlSfypzRhVG\nBg9ck0xMVVKfLuyf1prq33BtBHPHej5d4Zsg47URkBDg7+uSaChykO4b7nYiho02kl8AnFNL\n8K3fP3aeG8x8JwMZzQIe0b7s4CgDp3zTQsrFGUcdN2aT5mTJ69aTapQldw70gJE3SHZuCupw\nfSkYYDBBwvOaPmkAJO0frSrg8D5cdR60MXUaS8gXB5PSnZLZUN83ehvvER56cexo6Mp48zHL\ndvemMVn67BlWOM+9STt5QAA4HXimZVclOmOnvUrMNqENhiOfWmIYkgjCqo3b+rY6UsbCMD72\n70HcU/5eezdxTNoOctggdaALWI9q7dyjGcHrQGJjKgYzyfWq6yKqDJLc4qxJjdnd26/0piAY\nZSmxt3UGlXDFjjLAc0zc+c54Xkt/SnkuUY8LuGfemAD/AFZz8znmpVXbcAHOTxgetRwspUZO\nVxUqyBpnIOAOaQCKztH1GQewqURiP5mcNu/ix0qKNQd3z7kPK/WpMsy4PzEYJqhDmjEQ4O4u\n3Q09X/eMpfIUctUWWZi55B420wYQBV4I60E6kzLlMuuaNqxyfhQrBTg8gj7tIzKzYIwPX+lA\nrD43UR7Am5jyaJmBj2JwOuMd6hSRuSB+NOWQ9N2360xj2lLMA3JxSpuaPeRuwcURBWBB+Z25\nzRG2VYKcdqaH0AttmX5cblySOlOhkePKjB3HrTWkO2MZyB8p/wAaFk25G3q23NMgseW6RZZ9\ny7ulSrnLEthM8VBEEWQIDkY5JPepAy7V52hRjnrQBJ15kO5TwD3qxGoCnkhvequ5Nvzvlxzi\npYG82RS3amgNC35ZRsyRyTXVaHGssyOq5UHpXJ2rlsnvnFdp4agMwZ1YJtbB96zZLOzj2pCh\nI3Cnx7C4Qk7+vtTU2ptzwuO9OSRWz0J6+9LoA5SPMOMhe5pQFK8NzTVkU4I+6eop5kG0gDA7\nVI0IGVG9acWZ+SMj2ppVV27u5pEcjJHGf4aAHtH5nyn5cc07lpODxjgVFv8AMyN3elYjcD3H\nSgCVssBnjbSM3zHHYUz5mXjqaRWKsc9BwaYh8bZ3PxkjpSxnpv4B96YihuegNPKR+Zz0A6VQ\ng8sZLBsAdTSR4wSpJNSeWzovlxllJwakFq7Lwu0g4xUsBqx/e2ttHXdTWT5S38WOtSR28jbl\nIwe1SR2Mky4Y7SPvMaokrqqrGNwyTzmnKGUEA5z3FX1s4I1PzZbHBqxataBPLkX5uuaCjJVS\nyfj1NSLtkjJ6DOBgd6uyrG7H5crnio2ZUm+UcY4WgmxV8t5/lUnI68VLHBJI2I0O3oTVqG6S\nObYDkN1YCtOxmW2yRtJOeKLlJGWNIuVYMMOMdqP7LbZlhhu1acd66sSzKOaJbhHwVb/69K5Z\nnLpLMqspGfetCz8Ni6wGmVX69M1E1yu8c4xVlWdfnGVJHIoCxLJoscDEyFZE9vWrDR2y2vlR\noFkxy3asqa+bG0SZx6Uz+0tiFXBPoQady7EpsGg+YnOaJFSPIJzu6il+3/dUHOR0qo025mZ+\nSO3pSugLMNunnBX+72z60v2ZgzOqYA4INV9581WzvXHSpXvJHOOikc0rjtoSQyJHjI5NT2/l\nKzFpceimqU10vysPujgim3U4+UgYBqrk2ZOjNHKXRcAnFTRzKx3P8uO1UvtTFQpGB6U3f8/z\nHk1N0OzNePa0chUZOePWj5Wj2uOay/7QSFhGO5zmrceoQ8pIpDjncKWgWZcWPZblsqg71HHL\nuOQPkA65rPuLksxEZLA+tQi4MMbDPJ4wKNAszYVEkXA+91pFVlyRhh61mr5ixiQNj3qRdQMS\n7RzTZOppRzBGUPgDoKjuNyzeYhGzOKo+f5jKzc8ce1SQyMsJyc/NkUwRb3Fs8ZYU+DMzHcBt\n75qusjzSFY0XPqTUbO1upLZ3qeaRRqSWKfeKKVx1xVW4tVWA4xu6j1xUX9pTrCNxyP6UjXCS\nIGY4Y0XCwjWaPt3jqM/SpbfSrYQnue1VGuEWbaGLH17VowXieWoPB6GmpakMZ/YSyQ7Cyj0x\nVN/DvOM5YVtO48vK8UqsGUPuz6+tVck5tfD9w8hMf3enzU5vD9zCnXd/sgc11C3CK+ACOOKX\nzlAZ1UlugqriOEmhaAlHQhQcGkaESEAHC9c967m7t1eNRIEO4c8VmN4bg2vgYLdKoVmc4VVu\nR2/OlWM7st09a2JPDdxGVEUgZfQ1Tk0y4eQxqjNz/CKBWKh/ePgnAXpQqHaRuwSfwqf7JLH9\n9GTB9KjYk5JOR04oKI2k2/KzfLn5dvpTyyhjzuPajb5Csypls5I9KI41bazjBY5pksdvIXJX\nOejCpFaM/dY8Dkj1prOImJ+6vTFNZUhXP97nFADxhcEE7OpFPfYyfIce3pTGUMoxwvXB706N\nSx5wABnbTZNhvmGPj7vvUyyIGDYIB4qNlZvm4CY4zzSqQAq7fn67aQ0M+YnOcelT+Z0UjrSY\n+UqeGpsDYYj73v6VRLCONs7gTx1qSELIGZeoPenLu3kseMfdFNXbnK8CmMEYqrICEY87j0Pt\nT8lY8N8zgZpjR5XgbT156VD5zfPlsn7tMG2WPMWSNjkLjv6UrSFMOr+Zx1qBZFWIsRy3FQLM\nFjO0YPSgLs0Y5l3/ALs555zThcDd83rWarMuQD8x5zUol4BYc0xF+NlbcCevrTmkPC5z2zWe\nrgY3N8pP41J9o8vj72D0osMuSMpaNWzsxnINKx7Y7/eqONuQeq44zUccrYyW69aBFpcC4Cq2\nc9falY7WfPKjvUMcoQAHjd/FUqlE3M+eh4oEL5wVQQ3JqYZZRuUiqqsrKDt57ZqwryvGB1Of\nwoGKc7do/i705htAJGWUY4pP9WwG7OOoojkDMwyACe9ADipYAqTnrTuc5A+ppoYxyZHTvS84\nIPQ80FEnnbo8Bfm9akwPLDI3PpUTZ4JxjpipFXb27ZoFYlUs0ybunSpLcI7ZzxUMeEAk5Bwa\nsWao208+9IaL9iWZdp45wBWtHGfwxz9az7BSZGb7vua1FU7B83HXNYvc6Eh64WLOfm9KQM0s\nJU5B601c9uR61MvysW7Y61JpsCMI8Enc38qeWJYMeM0xl+YEcmnGYeXyOM4qQA7gpVfvGopL\njYFUrz0qSQdlbJNZ00mGKsp3ZwMVcSJFW8kUSZHeqUrcglvm6cU+UncYzy2aiDKAeOa6Ec7E\n3AfdH4etJ5jcoBt9aegVST1INJgeau0cFua0EyIAqwG3BpVcRx88805VdpCzD5TwKbIDGpUc\n8800YyDdvXd1x3p26TC5UhTxS5VYwUGF705f97K9cVqRuKp28xjgdaRpAWHHHXFLzHtYN8pO\neKRXaRnIGBnirRLGklm+V9vtUkeGPT8RUa4kwucMD1qSNljBIHHQ0DJiVbA7ngVIZW3eWiZP\nTNV+u1qmZWYqRxjk0xXJPlGVxx1OaRQ235XARqa5VlB3cHrSxIpjbLfJnj1qiRWCyJtU856i\nmspVQqhSyjrT1kaNWVMY7UkbbVIb7/vQBW8yb+6KKl3N6iigD5QMSqxH8R5JxxQUdcyYOzvV\nptwkO8FUxwfWm+Xnq+7I+70r5k9cgkVZNrRrnHJANNHlSSrlSzfyqdlYSLgZwMYFEK/Z1ZSP\nnY/lRcVhkkKruyW2npjrVeOFWkIkXjqK0ArbSjcjHX+tRmNFjAB+b1agCm0Um4HZx/epZID5\nG8g9atKv3l6MeaXcrMFPC4+92piKoXGWXBAHPtUSxuykythmPBxVsxkYZVBycZ6UNG6tmQgt\nnGB0oAryKUTaQMrzuqHI+VmYkZzgjpV4Rr0Y5Ge9EkIbJI5JxtoGVEXb5jRgEH+93pFYhvLf\naGIzgVYKkqQycLTEjLSDbhmxnOO1AiFFbcec0FQG+QFj2+tWGtmZSykb1O32pPJYOPQdQPWg\nCFWVGZ8Op6EHsacN0ijDLIfU8cUrxyNKFJIyeRQLTylfjofx+tABv/dBdmTnFNVdo2spD/ex\n6inrFEzDfIy46elRy2rtIBuyex9aAF2lmGWypPTpRuEa8sA7dOOlEka7lILeaDznpRuGSShy\nvAOKoLCBGjgznbk8jvSqu75VY7utOKCaMSsfmU420kKhpGkZ9pxge5oAZu3bmX5VU4bd1pds\nskYX5lU/dGKFw27ecOD+tP2NN95yH7UgK8gdSFcbZP7tSeSu5RnJY8nHFSbXmYBflj6FmHel\njjC/JwFUnv3oF1EYEt5m3aijBJ9ajXlMbs859KkWN14Y5XHFIVKqWEeSegoGL9+TLEl+3PAp\nrwhgSxxn+LNLGoVsSAA/3j/KnsqswBG0g/dBoAiAVWCZ2AjljQ7usSqhwoODn+dWDtdNuzeQ\nc+Z/SoNnz7w3Gfy96LgMWJHC8YP3hzjNdv4BhtproToA0g+8v9TXETRRsuAS5B69M+9bXhG8\nNhqqSI/ly5AP0qZJ20Ljue/NGGhQ4G3H9K5nWLXyV8zG6OuphjaSyWRiDuAI/KsDWI91u6Z4\nx3rkWp1M4G6wJnwuUzxWJcK0cjbMD3PSuguU2KU+6SOtYc8YEu0jj19a1SJMuTO4ghScZNVZ\nP9IX+8PQ9q0rqNlUs2OTgetU2QRybscYwcVSEzMk8xZjyoXH8VQSBJMsq5Zj27e9T3wRTnfx\nnqf8KjLHd5vKDGAMVp0GNaRmYlApZRjFU5N7D5jhj2Aq03l7P4gxPX+lRSSJ0II7UhFdpNu5\nSoXg80xtjxI6DYVGCe5p7IGxl8542mopD0APflRVAMZTwwBCkfeHUU3adyydz8u6nyNzhcgd\n+abzHneRtxQMrNG67mHPOMd6i5255UqMDFTSRy4LYxTipbaWIUAZx60AMVS/lszEMP4fWlXK\nvkAKSfu96GJ8zpkHFChfMZmJDLxQBFHlS672Z859hSMr8f0qXa32Ryr7RnIX19qi2nydxYqT\nzj0pAKHPPQD1zTCoXLbize4qeOFeSBuXGcHvUGDwduHY8c1QrjdokKtkDHVac2zcCNwYdBil\nZlkkJH8PfpzSbmVfM3cAGmAzy/MYMSc53GkZt2cArk+vFOcM4Vg+Cw6UyON3YqBu+U0CAK7M\nFGMDnJpoG2QnkN656U6FcIu/gZxSFCspAO4dj3pAKdqgs+Sx7jqfem87iA2T14p4Xy5AZDjP\nHFIu7kbeCetAEfOA64C9CvvTv+Wm4txj9aGYbnI4OPuUybDqoUjd+lMQ8MfUbvXFNkUqR2U9\nRTMCTIQ4ZeM05YxCqk9OvJ7+lAxWDMxyQSPT0pqxJH1P3u3amqoHmEj5zyT2FOk/eRxq+1h6\nUBYOCz87QtNw7Kfm28cY70vlIpGWwf7tIV34ONozy1AAodRkDc3rSqzSA5xjrTGV1jcA7Xz+\nlNhYldzEhB0x60AP3htm/wCY5/KlkcKM/eycAU7arR5G3rn3pqlGyzR5GOnvQIJIwqqz8f0p\nkahvlJx6elPk/gBGQR92lZf3Z+X5c/eoAgWTaHPUAYJ70IpZeeQox9aevzR7BgtnPPemMvmY\nXO36UADRseQvBOeTRxG2e5PPHFJuPOGyAcYpfkD887h93NACsqySdMbeR2prf6QfmUZXk+9L\nsEi/McbQSRnk0m1di7Sdp5z/AEpCFRQ5LMueOPam/L5jMrHGMU/dklI87e9NCHzMF8DHAxTG\nNA7EfWmqDtYE/LnpShvkXPJ3YobajsBxmgBVbdkLwOlDR7Y1CovynOTSKhyu5hjHTvTmwykA\nlP8AaHP6UwCQMyo7OBID26YoK/edTgngHvSe7FW/T9KRmVdpRsjPQ0ADdlH3qJFUKMnDf3RT\nsCM7WPXk03G1iGOE6g0hifLuIwdpGTSRbWk+U5YfwGl+8TIrfL0ApSw3MHG0FeSooENZWO7B\nB460vLYUsN+N2KNqsqlclcdR1pWy7A7cOB1oGhNyqSCd2ef/AK1G8FWUfKwHB96I1EbEdS3N\nGGClMZPUUCEDCKFcrtkPX3NLy0hXaCO9OhLMpKjP1pih1bBwWPYUAOX/AFgPDBB0NJ80jFj8\nq/ypGVdnLfMDkrSyq/lhV+VSQaAIyVbqMYOM+tLIyxqep7gUi5aQoFzinMu+VixxgZOaAGiR\ntgIXluDntSts3E5yAKGk3Rh9uB2NIFPJYc+tABlGwSOlOEhViFUEYyWpgClupDfpTiwaXJGI\nwO3c0ANjLSscghQO1PIOzn0yMUblEY2g7m5qPzH3BgpxnFLqA5ZN0gz0IwaE+XcOoHH0pzN5\nzK+3bg7aR2LSHLYHQ4FAA29SpDc+uOKbLJsGNpKk9acSfL+U7iOlLvCOGOWBGcHsaNQFkXG1\nVOO9NXEkjsD8u3mhd33ufm6ZoOY42G4DmmMapwoJOVxx7Uuw+WGzmjBYAHgHpR88nBIVF7+t\nBISMG68ZwKRsdQOenBpGkAwO1D4RiScIf4qLlDlYhueRSfJyRxz3pGxDwD2+9Sso2qzNnHWk\nISMdcEup45pQ20cDp1FCtucjp3HpSeXGFf5mDY5U0wD5uWUDDcAU4ktGMrgLxmmbVbY+fkxj\nFIuGjypOPT3oC46Nk3DzMnHQUSMYVLBvNJOMelI7MwHyjj71KrGP0IJ4FADuTIu37wGc0gVR\nNljnjpTW+8c9fQUBXUhkG4f3aADeWb5Rj2p3yBgyjnvzQ2CwbAHPIpzBYwh28NyaAGtK6kH+\nEc1E0gkYH7tOM25yEBC56GmqB8xYZ9KAHgiPLBs7himtuVkCHch6+1LtHCg570pH7tj2zzQA\nyXesbseOeF9felj+ZVUBeRwcd6GkXy95ySvAqPO5jGFKk8igAGGbLDbjgn3pVb5gWP0pWIVt\nm3Jbj8aAq5YHqOPxoCwN5m05wcntSybWA54xTfnVdp6/yo5ZgB696YCqu6MqPl5zk0rFtuV6\nUkg3KR29qI1LfLxwM0h2DbuwS3Pel2nb0470jZ6sQE68UKxIyDj0zTQAJPvbRxjBBpWbay91\nx09KN4+Un73INIqsm7ILDFMoGxgZ6Z49qRlbhQN3NI2W28YHenfMoI/hz1FSSBjZSS3y+9K2\n/wAwHO0EYxSF3EhAJbb0pNh2kM+1zTECq3mKCNpzjPamqv3lx8qnG6gFEcbmJ9fSmcjJUArn\nI9qkBzHOBg9eM0rKWbG3K96cd0km4kDikAKjktg/rTGBYc/Lz6018nhBuYjqadsO1gRj60bm\njjUKd1Ag4dAmcY60fL5m0/NnpSKckgHFAUBdxXLetMQv3X2EZpNr+XjpzQynaQc7qU7iOvT1\noGI27dnbjFIVDZIb/gNH3nO47TQI9q/KPmz1pAPyRJjBcrz+FLvXDYGNw+7TVO5nweD2FM5j\nxnkdOKAHsDGq4+YkYyKNoKkqBnGCTTF+WQ7eaVh5m7HK45pgBb7qu3QdaeuF+YncvpTIYRI0\nabs5pWkOSiHKgUAKpEgIPfpQOmQMY4FJtKqCp+c0mRIBubFAxzZ35XkgZz/Smsw/1n8Z60eY\nG3IpwOpakyeML8uOppCEyS3p3oDNuY4yMcj0oKMy8NjnO6l3Btwz8xHQdaAEZfLjUljz0x6U\n9lKoeMehpm1Ny7TnH6Upb+827nk+lO47iMgVACcjrQwVl+XgkdDRtCg4y47H60vGdvVhRdkC\nqG2ggH6Gk3HYXA2t6UsjMqY7elJHuVBk89qAGqy5+djuNSt8rbsZHrUfbBHOaNpVyr88cfWg\noVivU8A+nrTeeAV29zStuVBgc5wcetC/IGUcH+8e1MkFYDJxtPWlUOFznCnketHzcHbtx1XN\nDbduXUlie3pSSGMCHB5OT6HvUkjhmUj5f7wpvyoxycelAbKknBH9aVhAdjcA456HrTmyzDCl\nDnpTGjO4HhuMkU1ZC205IHTHpVgS7WUkDg555ppIbgcrUbuHk2pkH+I+tScbc/w9MVLGLHt3\nbWIIHakEnlyc9c9BSMvTPFLu3OSwAx09TSYw3KykclyaQsFZhjI7e1N3blAZu/XFOXARTtOD\n69aBMRQZF64PpSg/KxHXtQy43ADDdqF+VgTxxnNMkAxb5F7jO73oLFlc9CowfrTdy4GRyecC\nlAO/oFB9aBjocSAllDfLTRIoYRtgr+lLGuY2wcNnAFMULuU7eT+VDGOMxTc2Nw6UsbbV2nhe\ntJn5WU8DPFJtYqN3BB6UxDlf5R/SkDHYNwwtI8h87ao+XHWiTJ2q3CgYxTAcrFV5XbTdwVSW\nB9BSqxKhmbDA8ZpZSN/ctn7uKAE4jWMAlOc0M7eY2FyPenMj8iRkAHvTW2tGzNIDgcYPWgaF\n2qzBV+VsZNN3Fo9wXgHFO3EquFwuOT3pFC8BSeDnnvQIGwxGDzT1k+TnnPaoy2SxA4607cuA\nAeG7YoENyjNkcY/hqQEvEwHHoKYWDSY7dNtAZsnbwo4FMaHYUKGK857d6fJyAM4zzUalFjG9\nwDnO2kDAZbYSv6/hUh1Htjy8bsevuKQSGPaBls8DHpTZG52Jzxk+3tTUyJAM4bqPpQBKWYsA\nCFFK6hnw3BIpG2gJnkk4A/rSSSOuMLu45NMGK23kH7o96I2AQhBg9TuNKyiM52hs/pSK3zEq\noC0ACktH3znO7Hann5WJXuODTFdY4yAThuoojAH3D8vfNADo8qpXqT6+tLJEzlSwyV4J9Kar\nZYEg8dMU4sVY/NjdSEEanaxZgxB4pm4sQxJHPSnRsnmkFcAdQKTcW+YnIB4+lBSAM24Mhyuf\nu0OSrZ5Ur0AoVQZhg/I3UUikrLlRz70CEVh5J54J5NNXHO44IGRRIhkkVRwvXFNwGDZxnOCS\nelADpGXzCOcFaa7Y2fLg9jT9ygsVAK9N1ImF3M3zADikIQ5ZiU6jrTm2TRncMNj+HvTdrdMg\nFhnIowQRu44xkVQgG9gpyB249KDt3MG+4o4p33eFO4f3qQqAuF6nvSGJ/rFBBL9sUsi/7Ocd\nRSKdrjI6jGaWPKrtLcZxQMAE3fd7UFMxle3b2pvVTtOedoPenMp3Z3fKowaAFRi2MgEgUnEb\nE5A46U1h5keO9KQ3GANo4xTFYcu1VIBySMkmkXKsOd2Rn8KRsKwyMgnj3pZAF3DHzY7dqACQ\nKCGDZHp2p8m4yZYDB5GKZ2xtGF4obKgMMEZ/KgYKp56de5pWYtkgc4ximtGjRli2Qe3vQeIA\nCcbfSgBfJCsofrjOKcvYAHbnpTWPR+d2O1PK+Xhy2HIzt7UMQjNtYFRnB6d6WQBhlyWbtRuz\nlsbd1Nb5jgHnpTEPXdHzkM1MjxLI2GwOpxQVCnBLZ9aVVXgjhep96TGP42bgOc8GiTKKBs5z\nkgelKPl+bPyfw01MxhiM+pqgHZLHCL155pGZuVP4KKPMcLwcHuabhVYluSvP1qRjowzkOPv9\n1qRZPlBK7BnFJGxjYyrkL/doLGRD2B6g0yQ53ddvPU96GK8gNtbOAKajZwJAWGcCnfKJAFHQ\n0wuO5aQY7DHtSsjSYGdjDvSMoDYzk5yKduVpApOPegBduxWbBc+ooXLFSy7fanK3BJBUL096\nbtc4yMN1zQMdCfkJYfKW4xUkaqjHJ3c5FMWQhQR0FLkHPzbXHzAVSEPV85VT1OSKfGu1cFuG\nP3e4qFm8zkDnr0qUScE4xkYx3FMkAqruQDEeeTVjavnLj5cj7tQPG2z92OAeTRJ8xTGeuanq\nImLKsKMqhCG5zUzNtDAfNu/iFVRGNxVV3buuakQnOzoByBTAeDiPHrTioj4x1HUimt+8g2uM\nc5UikYu8e/hjnBY+1AC7S6qR95eS3tSeYMlsFj1FMEhkXA3KueRmpm2hvlG0EYFMBNpZtxG0\nk4welEbBlYEZ296MSyOWDKw6YJ70+NPMjJDCNxwRQIarBuclTj7tL91vlG09frTpeTyNm0ZI\nXrTG+f5VGARuD/0qwY3zi2Ay7ST2p/3nzyAvp2pkeWLcfw0QBmwwb5qkksxsP4vmP96hW3Mc\ndKijkRmJbr6U9ZAuEQEZNPoFidXBChkwV6tVpx+7VgQQT+NV1X5mIbn7pqRouAJDjHQipuBe\nt28xdn3D2Nd54PjO0s3BHGP61wUNuPNHJYZxjPevQ/DMb+UHx83dRUisdJ5hkzuGSKGzJzxl\nR0zioB87OyZVsYIp8VvNcSJHDGTIB/k0rgkyWOQ7dpA696eJPkYk5x0xV218PXk2EmjMfGdx\nHFQf2LNbA5YAE4PekOzIWl86NflJK88U5G8wg54xyalbTnhO4ZKnjipo9PEaglTs789aY7FX\nPBOMZ4pY1YHr+dWobdWclug6fSkeb95u8tQo496AsRRqzlWX5B6n1qZrVljO4jGeSO9Oe6gh\nhXcuxgOlQveF5FdhhMYGKfQXKWI4UkhZRwRyCaNsccQbgmoGlXYVJ5PSmD92GIGQBnFK4uVl\n+O+FrHtwSTzgU37Y8zAE4GenrVGGfcox8zkZzT4bgrIpZc9ttK4+UuiQkseXUHoO1RR3wRnA\nBKn17UhuEhBIO0ddvr7VVlnLMRgLnkD0o5g5DXhvo5kIkQKSMA0yRvLXAXB/vVmxzH5Rndz0\nqdrgqvzH5s/gKfMPlJWnZSBnIoaYvj5NwzTdw2swG4t+VOgbaMZyw7UuYfKL5qqSMYPcVNIS\nqBlfa3TAqFYzJncuTmk+7IedoHapuOw+S4ZmKAc45zToZDGu18hv4ec1AzEqSo+cn71TwzGL\nkgE+tIpIervGu7Hz/nWhNq4azFuUwCOTWeM9cYXOTTVnh3Nkk/SkPlHKAwyWyRRIVDAnmo96\nlSAxbHelimQx/vDsb2GaBkpmI29Mj060bgzbsd+apNcBpf3LbsdWxUi3Bx8xyxoKsi4WaNd6\nHgHke1JJJmMtuwSeKrK25sHgdzmpXaORWUrnjhqBliOSJoyM9OT71D5kbSAh8nHHequ4qpRT\nlRwaihkAU4U/L0zQKxfK7vlB696Tyyyr83bJNRQyFlXvzSSZVeuBn7tAyYqCMMB6g01n8sHv\nUXnjcVY54qNZuuRk9hQKxNFNuwACNtTCYbTkAn1qo0hSPcvU9RTkiMmPbmgLF7dIygj7o6VK\nsynn7r47iq21toBbjtikul8tgVfqOlArEv2ppDtxjtS7mDDceB71QjuCz9M9s06SUr3yaB2N\nKC6LTYVtuKlwGBLHKk5LDvWMrMnzcg9Kt2t08b4/hxwtMLFuSYqVQKWB71JHuVnGPlxxUWPM\nkwrfNjP/ANaonWbbtBwep5oJLMQYDLBcdqsQqSm8sARztqh8wwm7efvVat1juEGDhz2ouQ0a\nMcm0KpbrzTmkHG0/Nn7o9KzzMY5FjzljxR/x7yI5kyxrRSIsa27jr0NKkh4OcVUtbtJIyrcN\nVmQAxgBvpWlyS0im4Ut94jtU0bFoxhdx6cdqofZ5I41dGIdT2NSw3haTdIAnYgcZqkwuaMVq\nJEYEkHoQaFj+zqBH0HU1B/aqGZVUAZ4Jq0lxtUhgDzVbkle8ghkjO5MlutU5vDcM1vvhXY56\nYrTdFkXchz6iiTfGqhM7QcGqFqcfceHbuwUmRgRnOF5rOZgjYcFTmu8uZiX8vG6q8el2lyHF\nxD8w5x3NIRxLyeYSOT6ZHFODCeQK5AIGBiuqvPC1pvG1DESMjJrBuNJmhuNixErmgY2Ozkul\nREjL4OcCiGQNK0QQgqcEn+Vavh1pNNvEaRcxE4+lQ67AsGoPsXYshyMd6oDOkKxsRnDDpUcM\nhXpQsRwXwCfepFYmIKgxzy1IlgshY7n5x0pTmPk/xdvSlkjaTGWAGM06T5o0LNwKZCA/LIh9\nwN1OONrsGztPAA4pEwwP8S+npUbqRIAPlQ9xQUitNcStxjIPU5qBpEWTIOM9asyx7UYA5zVB\nYypORhhTEyf7UOQuX9+1IzjjdmoYdoTOWUk4p8gHmFC2VU4JoAlZvLG3PzdaTzmbnov9agX5\nWAzkZwKkwkefQmqESrMQVWUAbecqc1JHNujLKQzMe/aqm4jLZ5+nalDbenIbniqEW1uH+6Ww\nM1JHLuIXJJHrVNXcvx90dvSnrmVjvbYpGKAuXobpHXEhztbgVPJK537jgZ4rIRfmwqnHTIq0\nszLuZyStSBeWZyyZ+4eKnE/ylGPfgDvWb5iepx125qTztzoMbR0FMZqrJuVT09qesRK78jGe\nlUreRmbHZTV2Nt3y9yaQE6lZI+VP1pnmMAQw4FOVAc5O0qfl+tHzMpJHz5oKuPhzJGGZcegN\nO8slW+bacU+EsuS/p1oj+bgnA/WgBQvyjHpVuBQuGzle9VXb51wdoFXrbEnycetBaL9qSzDs\nlaIJbCjAx0qhHwmBV9dyqoI69awZumPUHbkcmnNuXLfeU0xVPQNinbff5RUMolGPKGPvUoAa\nMg8H0AqKMDhU+9T5GRGxuJJFA0Rbdy4XNUpyY0PGGHNX7mRYQfmOR6Vm3l0rcAhjjn1q47kT\nMxtzMWHXNRSZVgD8pPaiSRQxGSR7U5QZJApGRWxziRuVDE8knmpd25QFX5qjmHylcHr+VKo2\nyIADzx9asliyMdnJ2gcCojuZTk9uvrUkmJCGxgL1+tMlwpBPPPNUiWMjV2jwDj1qViAoT260\nwKJJHO7auOKVGZvLOO9aIyZIJFVThfl96ajAqFLbWpGbzV6bT60rfw45/wBqtEQDMokCY+Xu\n1Ojk2s2MAHgZ7UNG0nKAEelIFKrtkX5s9qoNSVWXzFVhk4yWpzP5ZYE9R92hY/kyy/dHFRLh\nzn+LrzRYknXHloAQG9KfGGWTaRlSKaoTcGYfNjmk3eZnBKgc1RJJbxgSsXGAvNOOPM3bd2Rk\ne1Iv3kBGQeaeNxY4A64FMfUi8lvSip/Lf2/OimM+VV8xnVWORtzzUaqzMQwBbOR7ipmbaDnr\n1xQrArleR0x6V8pc9gjeTbywKj0oZl3AnqBkilaNkULnvnmnyRhpl4wfenuPqNVmk3bjjA6U\n0gsik/yqbYGY8YI7+tRyL8u1fmPtQJoJF2zKSBhhjNI0IK/L91euP509WYqwYrwOPSlDFug+\nQ8EjpTuKxXkjeQLGy/L13Z61J5RQKOM9MVLKq7QNudvAPakVV8pQw+ZujelK4aDWhDLgrvx1\npjW+1hzgdQe9TbTEWKHKYwf8aRoyUDb9xPTFO4ir5X323l2P8Ip8cZwMfKoGcVYWN/8AZyOc\n0NGzzDgEkUXArrCixkAnB6j3pnkusZ24C+9WdrbSOFAPNNZTIcbsjtSuMqtb+YNwbc3qKRrf\ny5DkfMV6+tXPJCZBGBilW3iVcgM31NO4WKclvvZGC8L2pvlxTMZArBxVvyyw+RT1x16Uslvm\nQE4AXrVCsUZBMrBRGWVhwcZqO3Z/JlSQbk5BUjBHvWhEvyllLKc/dzSXCLtwy7lb7xHegPUo\n/KwUZwpGKNm3AK4QcCrTQRTMh2n5BwKfJB5iln6A9KBGfHtC5ZhhW7VOU/eFwvysMYzU620c\nMRIHXqaSOEKABu20DIGjbbtwUUcZqOaMIm1VCgnk5q3OrZwrY9R3pvlrGgAyRnk4zincRW2t\ntPzfuwMDjvT0Z2XAX5gOeealWIbWJX6Nn+lK0LLtRBj1YelICDazqD1A5zTf9l48jOc1IkY2\nyDoFPDdvpSKGmdQvzEjpTATmNTg7V9TTLi3IVdkgDN09PoadtMYaMqZI/wC971IIVzGCPn64\noAjmUbgm0BgvVelWNK2wX9uPvF2ANQqFEpZlzu/hqWzxHfwybdqBx+FJlLc+i9LxNo9uhbLY\nzntWNrUG6Tnlc9K6LwlH9p8PxEHjZnNY+sL+82MMe571xvc6zgNUhLb1dcDPBrAuIwq7Qa6v\nWbdZpAw4x1Ga5y8jMexiCQ2elVsSZE0bvJwMqB0rPkh3YJJx0x6Vq7d28B8c1mSM29uMqOla\ngULqNGQ4YO3Q8dKqx58vBJwh6Hoav3Hl7du35upqpIp4YsAv8K1QhjNvypA2N0qpLt5AP3Ti\nrPnxxjc5BH3se1VptshZgu1H755poCtIwXIJ354BHSkZdu0kcDjiljiCRtFn5s/L2Jobd5aj\nOGBx681QyF1RpBsJ9/SlReWKntxnv7USAqACoU7uQvWo5oW5Ktheu2kIbJ2yxXj7tINny5bJ\nUZPFPdfO2f3sZ/8Ar0zd95EYY75qgE+7mQA4pp3CMsU3q38I60/Z5hO18EHBXHBoYlXyPlI4\npWGNVtylEzsx909jUahVBR8vx1qeRi3Rl/ComwzDYMEdc0BcaZNvyDlcYGaTaPLUb9pU5Ioa\nQMx4BPX603HluPMOS/zfT2ouIUkHcVG+Q8g0wtldjrkHrmn/AHum0Z6UGM7tpHHWhgyB/mcA\nIcgcUoZmjyflOcYqXyyzYQ4OM+9R7g3Jb5l6incBWU5RduSTzihmG85AGD+NIzBeTwxOQwol\nZlUlwDu/i9+1AhFBP3mzk8UsaMqhWYDvQqqxRwcMByKQZ8vLcdqAGyqAwJX5u5qFl2sVIxkc\nH3qaRt6jgntmmhGWXlSox96gBF2rsG35+h+vrS+XuB4LjPbsfWnbGjbCocY6moyrbsBihPVR\n3oDUb5Z+YZ2t/OlK42ZXHuaUPI0xD7emBSf6xDklh05oAPLMbAsM7uRSbcKQ3HP60FCp27iQ\nOA1O85VQfKzN9M0xEZ2M2B8oHX3pWUxhWOBGe1OdPMkORt4zio9uwBWO92PTsKQCttI2kc9Q\nRxSw/ukBb5s8YpXVtoIO8dvpTd3z9wDxzRqA9V5z19qaC3Cu3zFqHX988YBJXqajclpIzwSK\nYCEE5I5ekY7sDdh8ZqXaBk5PPUelRKquDlSoB+9QAK2Y/mXYSaEYRyHIx/tYpGQkow555FPZ\nvvAev3vT2oAYyueSykZ4Y09NmCoIK9SPSm7W3KTxt64p0bbd5KhiR96mIbuXbsVtv+17U3aF\njP7wn0b1pRhmyFwRwc05WU5BXtSDUSRl2oBxx1pijqcZajeML8mVJx+NJIT6YP0oGOlXMm5h\n84/lQ7Ksm3ZnPOaXy1M2SxzjNN/vArmQ9M+lAC87ckLgHv1pGaLaCiE5PcUjZRRs5YdfalYl\noyx+U/zoGOJ3s2UGcYqLl4QpIz609vvIx+6BzSFFXcUPB55pgEezI+bLd1pdwDMzc1GfLOZc\nHOMcU4Ksigjr1oJG4+8GBCdsGndcDO046Uok8xtwXgdaQKsasB1zu47UihCwZQvcfxChWAlK\nqWJx1pzMVUDGAxyKQEjIAxzyT3oAVmPC4wOpxTFbLfKCB1OacynaWI5/u0eYZA0ePunr3oHo\nC53FW6MeDSKrRsUL5A/L6UjN829Tntgil3ZHHRuce9AmNUbZBng099i/Pyxz96kkcbQen94j\nvRuO7phev0oAR16Zb5TTXYtG/OOwpwXaHJwynnNIZPlUDqPvUANX5QhYHGO1PwFVsHK9abue\nM5Y4Vjx9KVsoxyAUPTFACQ5ZSS3twKVWbyzuwFBxgdad93IUbQRxTNyhh3HegBVJMm8coB92\nk+4pO3cW7CnNuMTshAwOKZGx2gE4P96gB7rwFz2ycUilV45I7A0hXyZAQmT/AHqVjnIYZJoA\nPMdo9xHPQLRwwIZcHHSkVRuCudvHTtTtzAHDYHSgYjyEsmOFxio41yXUtjnJp7blUANg0m3y\n13EZPTNAKIixgqy7tzHn6UcMoj6cdWpxwnzevHHrUckYkjG4Z56UhC7gUX+MKfvetK+0t8vA\nJpzHrxgYxgU1VXaQPm4zTAH6YY5weg70jZkO5Tk9h6UH5XTnacZFKueSpwPX1oEGGaQHIG0f\ndFSPuZV6AD0qKRjnKn5qeuc5xQA6QHqoyT1qOJGR2DjLdQKk24I9OvFNVhJuZAck4JNADGkU\nKWIJHt1oWQthlGV6Um1tpUDHPJpzI0bJyCMjKigYcBiWIAHOP6UiFcAZPTIJ7UkinexVPvH5\nfalKttJP3uhoAarjbkfMAeaPlPyk7d1Kc/LhRihflY5T5h0NACKpbjpxw1CqWzgfN70Kpk+8\nMleadu3YdeF7mgBrOFUdd/Q8UjEeWAAVyc0RB2YkvlSeKTYVxvOeM0CFOVbaoOeuTTVdcMeS\nOpJHenM26Mc5GabJuLhmbHOBQMAxOHHzb/XtTi20BORj+KlkDlwOOvO3vSEssnOCOmKYhuSu\nD1PcetIPfgU7ccEHBJPAoUeYGwMAcDNIAZAi5KnHXaKGVZOCRjGcUvLKBksQOTmmKvcqcA/n\nTQDiu1QDuz1HFLJIzKoLkHPI7Ypm7MhZs7Ow9KcV3KB3z0NMYjSDbtzvGcg4pdyqQc/J6GgY\nXnI3ihsTDGOf50CAttG9lKljSSYWUYO49c0hkY/I/wB4U1m2pvIyvTaKQDpBuDEde4p2wths\ngLjoKawCsozxjNNTCxnJIGaQXFVSIyA3OeM0N8ighiW70DCgsenQCkkVo0D+/wB3+tADsg5y\nSS1JHvVdqn/61LJlmAHG7nNI2PMIDfWgAZiuAQN2aVlzyGx3o+WPG7lfWhogzAg4HXmmAbi3\nbJ9aFXazBjxjNCJtLA/UYoX513fdGfzoEICVUN1LHFKeGcsudp6DvSL8zNzuPakLN8xzgHrS\nAUYVd6LtY9VoDZ+7n3zSeYFYFeT0pwDNGTu53YK98UDF5VyqYLYzmo9zLzuwP7opJiu5VVdr\nY4anSLI0f3cDpVIQL8wD7tuO/pSJkMQRgdfaliyzLF0Hc035VTaCQucYoAI0KxsdwZmPAoA2\n/KwBoHcBegpFVVUEZbnpUlD8N5hwAq4pB94mQ7lxwKf905I4piqxDfLnnimIThoVwSRnpSsw\nMgbG1tuOKRo2UYYjd/d9KTaVCpnJ7n0FIYq58ksuBzilbKxHgbuhIo2qYdxHG7AA701m8tcr\nyWH5UyWEbFmGCFxzQrBiXPBJpo4XlfnI/Gnqo2qB8wNAhFXy92CTu65p7FldQo5x0pjOPmAG\n4ig42q4OGbjPcUhDlUYBBzzyO9Nm3jBPHoaVsnIXIPQcU1f3i4z04OfWgdxybzH02nOc0jDc\nwAGDT1YrkscnGBQZHOONzDuKaATAbao52nnNI2d5I55pUy7HPy+vNL8pdsfKAODQAxictuXI\nxnNIMNG5xnb0FODBUOfmDUxF8vPo3UUAHKoxJ2nsaVm+SPjLZ5FHIZlddwJo2tyc8DimAMTG\nz7Ry3YUvCsu45BHPtTcKUwSQaXl8OTn+E0DE+bcTnIHPsaHyxVhjeelPaNlUR4yOtNULsYt0\nzwtADpGYvg7VHcU359yAnI7GgZGdq/UnrS53KAxwR096BDdp5y2WzS8spLdMdKFAePBGxyaT\naw+Vjt54oAUfNhgMHHegoN26X7oXj60hxI2G3Z7U37ygjkE/KTSHYe2VVZPugdh3pI2LYZV+\nTOBStndgvyONxoVdzOAcADimKwMo80lR8oPOfWlbK7yTyT1pqq0kPAyc+vWlWNY3VWyTj9aY\nAY9u1gcL/Oh2ZpCCMnGc0bTsIPJzS7yMkrhunFAEbRswBz8rfpT9o4wxcjrQFdcEHK0oYu24\nkZoAQqAxPOMUnkxquBjf1NHzZJc4WnKy5U7QGHJNACHdG3yg4xw1DElQe460i52ksx5OQKUo\nWjdu9ADiw3FguARg01csyoflx0Jo2lo1Bb5RySKUvuP94dfpSAbkqu085NKVC5A/L3pVGJhk\n7g36U1WaQEqhBBxQA6GNZIhu4bPzE0mVOQvb86Ex8wZvm6kfSkWQOGGNnPGfSmKQ4HYSc/XH\negrukUkgLjn6elIFWNfnIIz8oFJHsCtg7vWgYfxED+HlaeHIMhVchRjHrTVJVDjp/eNG4MB/\nAx4zQIWN2Xa23nFLnaCzdT+lN3eYu0DDL60MDI4YEEKvSgB27y49/VcgUrKVUkZz159Kbksy\n/LjjJBoLM3zevFBSHjf5mF+7jdQv7tiWXcrDIoIO0gcN/MUioJMAtgZzg9/agYvzLMBj5j6d\n6G+ZjkEeopCuGLI546+oprZ8zdn5x196BC7hg7eAvWgRnYMHGeAaXHynjHOaRl+7l/lz0oJE\nk/dEEdQcUjKobc64bNHDSDPpkCkVgWLMdxoAdwrbguV9qTbG6EjIPUNTV3iMqnfjFOzI2EWM\nZUYNSAA9OML1oz5iswwH7elC4LgngdDQdvBXo1UAFm2jjOeMUuDGxI6dMelN3KcEevFLtO52\n5wevNAxY8HOB0NEihmDhsHv7U3cy4PbpinLGV24ALdT9KADase5uh7d+PWmjDZwx2gZwe9G4\nYJAJA6Ck2lSOx6laAHfNuVgvbpQ2DwDz70nytMSSUUDNOZgqbhyWGD7UAJt3MFb5VHQ5obHz\nANl15PHGKFAVQuckdzR0Yr6j71FxC5K/Njgmj/VqVKFgxzTJWUkBhkddtLywxvIHb2p3uIJQ\n0fAUP3AoVhMoDKVOOwpY5Fjc78kYx+NIjOI92MKppFDkXc6hmwo6805W2qxALqxwCaYuGck/\n6w9KkzJFhTh8Dp70AJvHntGeqnHNCsFUoo3HOS3pTQ3mZY/xfePfNNjYqxjzgDvQKxNjcdzH\nkDtSbztXcQR/s035evOcUirlSfu/hQBIVU4OcLngUquQzKFLetMGEyOpI6VJhtu0HbwM+tAh\nqsJFwfvHjFKq7hhhgdMgUeW/ysx+6eKRmO4Hv944oGKGlVXC42k5p3TliNxHQUw5U5K5zyKd\nN2UY3AZNMQ6T5VCEZx3pvIQttwnQGlVuRgltw49aDG24DO4Y59AaYhE3RkseMjbUyt5mflGU\nGP8A69NwTJs3bmx0pVjK+Ww6q3ze4oARGeRiCMlfvHtShipeQDcCePpTm2h2xwGGSKYzOEQI\nu1cYpgSNnaEC9Tke1PVRKxJUK68c0ifvG2Y+YD73tSlSFzj516HPagBxYKoeI85waYF8mRhk\n5POPrSnheDgMM5p+4jaCnz4xmjUBzZUBcnb1O2l+duuNoORil8vagy5U56CnFk34d8IP4qCR\nzA7gyMAx4Iz1pWLFs7cEcU3iGQBgCDz9RTxiWX92xVT1DGgBvzeYzONuBwtLs3MhJ4HJFKu1\nWORuYd6jbcVXnYH70+ghzbNvA285pNzeYd3II6elDEDOMEAY3UxtzSBieQOR6ihDJl2+WChz\nzSsqeYWZWJI5waaGSJtyDKEfcpvmPuCquPWmA6JyjBACtO+8r91H8XvUTs7RsWO85wFHWnqp\nbaoY+9MQ5JBHGAo3NnnNI2F+Zjg56Uq4WRipGPWmYDxZYFgT+dMCW3UsrEDDZzg1IrKpyili\ne5qGPco2scYqdGC7TnJ6VIFqPGwHPzZxUhyzBnOQOMColKiLK84bOKkib5WJGCefpSEzQ01h\nJeLkc5+7npXtXg/w/BJp5klfDMvIFeLaL8t3CRy7NivoTR7dYNNi2AjcuCfep2GWLbTNO0+4\nVmXzFXoT61aXVIbWYukSgseuOtZd1I8eY2xjOc1Vuf8AUjBG7GRUlI6O68SQNCVflgMDmuau\nbxpHMiNlj1rMml+XcwIbPXtUEd+FYAMCe5qRmtJLNtDHO7tinSTMqqZDnvx3rNTVpGXC4z/e\npGu5JCrE59adwsXJNSGW2ptJHStiHwletZC7bBRv4gegrmFm+ZncYB6etEutXccbRCeXyW6r\nuOKVylE1biGCzlIY+YcduazhMWk9FB70y0zIA0h+Xs2ad50casPvHOaQcpJIxkxJ1H8qbHLu\n3DcetVprzoETHPSpGvBCroqD/eoDlJoz5f3AcZyDVmO/DAjYOnJqibgyQnnAUcU1bgDZ8mV9\nRTuFrFmdisIZ/uZ6Y71Vut0sJZCQ3pmnSO7E5bPPC+lQsyW44BIbkj3pAoiQvOrbOikdfetA\nM32cNkhl4PeqC4lZWJ4q9DNBHIFl3FMdaQWLEdwV5zk45psTNIpZpAGzxg1dt9SsYoykkW9u\nxArOmnjm3NGgVc8UC5WXo7p/MGRsxUUl15kpXPOetU2UswJJDdwaduxtDAfKfSkOxcWRY9y5\n4/vVB53zBS+5T0NQLIWLbTg56GmbHaNnGVpjNCKYpldxZe+e1RNKGbKcL0qOzL4O/ow60so+\nzoSFyo/lTAmWOSXaIXwOpzSyTvBkEA5HDVFayMwAY/u+uKW6nEkYXOWz19qQIhtleNs7vlJ5\nq/5ZjJcfOnrVJVEY+Qbs8Zpsl0Y18gS4GeaRoTLN5IYrksx9elWbeTcCCeQMnPeqUexTlTkn\nqTS/al8v3BwDQIuM6+Z97hueO1RCRUZ9uSMVUaUMcqO3P1qaH5Y14OTQxC+bJCqlenXAqSOb\n7R8z5B6803PnYGdhApoJ8vgbiODTKQs0waVX6DoasSyCOMY+YmqbMr9eCvapI5kWNZGOAp/G\ngC3G5VdzAewpkl08LA9ATzUTXXnNiIjHUUy8Jkj3q3NIRcaRupODUcjNJLtGcdSTVeFnkAPV\ngKf9pZl75B5NAi7Cu87FHPXmlaOTzQjjCjvVeC4QHdu96tQ3ReM5G45yPpTAcJQrbXXK1BNI\nVP3eOoOe1Mm1AMxIXBX0pNymPzZed36UATpMAuQTzUklx91i3y1SV134U7gOlTtjymB4BouI\ntNcbpC4PzsvWoob6SP8Adl8Enk1TXcqq7/MucAUrDdJtOD9KYcpqPN5twhz90YDVa3Ksahuc\nVgh3jk2DnHPWp5tQd/lQYOKB6GosyLNzgLjA9qnjvimOQ2DxzWB9skMZBTLeoqW3uDdRsxTa\nw4quYnlN1tWlVj5b7lPPSnrqfnYAAWQ9axocKrjGR6VPNZzMqSLlFxn3z6U+YhxNZZmjmBZd\n4q/HqCecvGVxzzXLW+oSLclGHb+KtIR/IJPxrVS11IcdTqVul2ZAG49qZFcPIuCwyD0rBhvm\nVgCWxir1rcf3iAe9aqSuZtM1W2tk5BY/pWjZkeWrSgb1GSx9q5xrph0IAHpSf2k0ieUrtnPN\nNiNe8ka6k3Z75A9qkjTzAAI95rJ+2bGUBtx71Yt9QdZTsbaaoRrXln9uVRsVBjHTFcz4q0+W\n1mgZB5ny/eHSthdTmH3802dBebEY5A55qRXOI+aEgsu05xSsSXLY+UcGu5g0yzJKSxAg/wAR\n7VzeqaOFmcwFim7HSnuIyduMkknPQCnqxWIbvxpGgaFSkisHDcHpTJPmkANMViVnC8AcGlkj\nC7AVwhqKPBU9ivTPenbmYgt16BT60CI2Y7j5cZwDjBqCSNmkwF2nvVxZfN25bYe4pmGfdk7v\nQCgm5WMQQkcMMVFInmMSp2mrpgCqBH1brTPs7LIz47bfrQMpxqF4J3H1NI+WjAUd81c+z7/l\nKgbfXrTJrfbIrR/MGGCtUhlVt/G1c80qxhXVt3zZ+7mrIh8sFWUgVDJGqsqlSV7VZIRksrk9\nCeopVV1DtgEAdKd9mRv3aOSBzSkCMDYhyTtOaQ9h0e5s7fu45p2QXCk9OTSeXtBwTkmniA9M\nc4zmkIVcOxIXI6kU4zIz/KMcZAPakUsVGAMentTkhO0Ns2gjIoGSW8km5ShzzyK1o2ZWzjnr\nWbHH8oIBUj0q9Gv7sqzZDdaAJo1bqxzu9Kn8wMoO75hxUIZfLVEyGWpofLZSSeaRRLv6HO9O\nn41H9o8uQru+f6U2M+WcqcoT92pwx3E7QOPSmMm/uFuQRVqHC8/dbt71VVl3LkZOOanhn+bc\nxG0DAoGakMjBhx8uOfrVkzSbgHPHas1JG2/ewMZqdZ2lRcjms3EtSNOOZWbI49TUnmK7BAcj\nrms2GY9Bk1MswHy/rUcpqpIusRHcZVSVI604KVViy5J6VEJivQ8Usky+Xwfmo5R8xDcfMxfb\n+tZd0duSRwf4hVqVm5TqaoXSbcLu+fHC5rSMUZydyurKrHDb0605Zisgz0YdqijUK3SnhWC7\njyf7tWjIdJI2zKjvijc7KMHBXpQkYZV2Hg8kUyRl+0BBkITjd2zVkC7g2BjjvTnzIhKkYpVU\neYyM2CO1IXC5CrjtzTQmRsyBB8uc8cUKwjkIPTHFJJD5b7c5IGfpmmCQt/DyvetUZtEjqAoK\nnPanFljAVxx13ComYR25yMbjkmpFUCJfTrzWhA7I3gKcbuQKfu87C5JxxTFdVJfGO1KpC8gF\nWNUhdSRvmKjoelK2EbBGPek8t9vPU96dCm7AZtxFMhkmQsOSPm7U3AUjBy2eQKRg3IPK0/ym\nwGQbtoycdhTJHCceZt546U/dvQI3BJqJdm0E4HfIqyqljvHKYpjHfYR/e/Wiotr+hooA+WI5\nDuUfdwf0qR4S5Q7SAW4x6V0V58OdUs33PbyyA8fKhOP/AK1UW0e+gWRVgcvCMu6gsFH0r4/m\nR7vKZcjRrJ82Txgbqf8ANw2Bhf4s1bk06dVDTBnB6MUI+n4VT8sQswHLdNo5p8yFZitIHLFS\nOV69qj2GNcnliOCKkBdrcZi2rnB45oRVb5d+CD37VaYWYwII1UNz6jFOjkG5ymWH93HFKVHK\nsGKscbqcsZhR05X07mgmwHcVCjA780jKI0AZcj1FCqbgjg4XqadyqEA556UrjsRCNsBkbaAR\n/wDqqRlVgWEe2pI4/tBOWAUdPrTY25k/2SRQOwx13KFwYx97NNJQ3CghvKPU96txyFlQkjIo\nT5mdZQBnndigVip5KRySAgsp6CnCNcIVOPap9uSc8r0xTGjKRtuAyDxjpTAbJGZJGC88dT6U\nnzLHyRx0HpUyxrKo2kj+tKdqq6Abmxn6UxFaCMSRthiHHIqT7OZ1+9nacnHejb5a5Re3IpFU\nwqV7N83FO4DWjC8DG08ikNu1xyvyqOuamlBOxm5Y8VMu7ySq8Ed6BFFYy0mMbWFPVfMBLDb7\nGrDw7eAQTjOPWovLd0yfudx3FMRB9mMqbSOh6jpTvJXJH3WX9as+WnljB2DtRt8xMHt/Eo60\nXKKaQiYsz8FR0HemeSyqMYVe1XG2LtbiNjxtz0pZIwvMn3j/ABLSEUHV92HGCTj5e3vTnh8w\n8H5l43Z61akHl7FC5Unk0uxYdxIBx0FUIpiB1iCttG48iotm12baPlOBV5rdWwXJy1K1v5a8\ndc4+tMCjMGZBggOeRiljjXJbgYHK+lWFtVYLgneTnntTpLdY9wIzk4JpdQKDJtjj9jw1N2uz\nqrjaC3Xt9atLCrOQ/EQ+6PegqvzA/KvTdSewLc+hfhndC60KIjKpGnlj3x3qHxNbozPxl88H\n/P1pnwkuEl0GKJed0eQTWl4lt/3ysGzxyBXI9ztPPLuE+YqlPn75rndYhKxuFGdp6jtXS6oz\njdITg+tc9eSKy7ctvHO7rVrcbOZyRuY/LzjmqkjFtwc7e2BWpqEZLBxyh6/Ws2VUds8E+tam\nZQuFO0fLwe1VLm3Ei4Vic/w+laFwrrgspZfaqTO6qUBxg5HrVAVLiNI12oNy45zVaQCRVZRw\nOgHFXrhVkLkDaV7iq8aSFVUBWz1bpin1EV7iPzMg5UY+9UCkRsgB/wCBf1qzJv4UNkqemOKh\nx+85AYZ53dvemBG8beYM5IBzx396i8t1YHG76mrbEGMkHPPUVSY7m3BSvtVIAZirlwe3ODTW\nwzJt79aczFmBC7QTigRqWYA554PrSAZtIkmKMD/s03buVskkU5tzNxHsanL+7YkEk9SuOKA1\nIvk3ZC4UDHFNkY7ULDH06/WpDGm4FcnPNReVLHI7PMHBHyrjpTAafkzyGDH0p8mVXy9uTSsf\n3alvm2noB3pzNuYxk/MGzu9KYiNvn4C7VHGDTUyBkZ9OKV9+7ci7lzj60vmBWKY3E8j2qWMa\nyMsYx97PLUm2Pe2Bk4yT704smcgnf/cpvzMzFVALDBz0oAb8w3qOfShWZdoZQxHOGNK+FIOD\n/wDXpv3ZCzMNo6iqATCSSs27r6Uv7xWLEZGMbf60xWMkhI2lf71PXdsG8gknGaBDVLMoXoF5\noZtzFjnFKzhXTnAbjOOKRFa4WQF8IDwT1NBQ1t23huSepNIY/wDloc+makC/dDAKR0NMfaN3\nOB3yepoEDoQVYqOTye9Iz7VIA70ozIgLBmpEG0sfvcfd60CGSTOzGMHAPzfSl3DccMckdaFV\n2ySg+vtQyOzbUO4AZ6UuoDYwQuwc0rB2/d8HIpfvR5J2sO3rSeU20mPcQ33j3FADV3qxI+Z+\nm2jktydw7Uv3m2ROSR1OKOGYnJLD73FADVZwsjEkszfnTGcbSpXHPNP3dGGSo6iiTJPykFWp\niE3D/V9D1BNMcrJGwLEIadK24IWPTjA6mmJGpYE8g849KBjvugsqbn7CmjK4DDYT2FPXu2ee\nhph+6TnLCgLAihWxuJ5+7QI9oIcbVzzzSs2eScntikbLsFH45NO4DvvsQgxnndTVBjbBb5TT\ntreYQcDA6g1FIPMwoGOetIQ5vlhYjjHRabvZ0UhSWPbNP8sx58znFM8xf4UII79qQCOxCKXB\nBzytPZiPmABHvQu1m7kYwaZt/dkdRng+vtTGKAFjJY4B70bSbYKrhiDkUqjdjzBwB93+lR7V\n8s4HH90UCJJG3MFHIOM0HfFJgRqy9qTd5cakc46UFd2MtljxQUhFKsm0gqf7ooP3gACCO9SO\np3ANwQO1NYFULDk9BTII25b5hhfakRW2PgfKejVI26NQYyJPUkUkjESAE7lxniiw0N+ZVUsc\n4HSlkw0aMCCCMkH1pV3qxzhvTNMbasas43c42ikMVstGrA4Ip7MoY8YJpDs5wcKBnAphyyg/\nj+FMQuBtKBfm65pd5XYyj5TxTlZdpPY/Lk9qCpVQv8I70AMK+WWI6U5VCqVbAG3NIOFKk/jQ\n21l2dWxwaQxipvBUHHOKdtDNsI6cGnKu4gYAIFRgCVuPldTkg96AF/5bAbflHr0owAzDIPBx\nS/NlsDIIzSSMGhDBdj96AGK2PnILHbjaaUN8gYryRTmlPGcZx2pOX7Z4oEDcLgjt2pHVF+5y\nDQv+tXIKjGD9adh49ySbcZwCtMaQbmaTOeAO9JvbncwYelDEjAJ3Y4NJhc8f99UITFwufkBc\n4o+Yr93K+1JHlpD2U/pTlBVgGOW6ZpDGNmRkbPy7sEYoY7ZCFGd3TmhpPMU4GApxx601UKxb\nWb5xyKAFZdrbWOXI5xRlo2x1I5pxxlX6nFIWHlg/x5zigQcBg27cWGQopEwoJIwe9HmL8zKv\nPtQuCu4fd9DTsA7buB29PemrhhsHLD9KXa0i4HA601lHlja2CKQDQPLy7fw9BUsbHcFPynGR\nTACrfWl27gTu+bGKBjmB3hwctn86VpBtyowSeaXYoVV35OOnvQrIflCkEHkUANbO04OTmgx7\nMHu3rTkBWQ5HTng0mCZCX+UA8U7CE8sx/KOWHOaSQ/OGc4B7etPXejSqDn0NRttXBYZx1+tF\ngCRZN47Cm4CgnnJNL5hXJJyPU0jN8y4GaLDEYFFJL4BpQegz8uKFjVjh+RSKyfOAOg4pDYrK\nFYYXikb5lCntxupNxCqy8PxgGlkYbtx+UHigkZkbTx92n/NsDDDA9jSNtX5VOT6Y603zCUI2\n7ccUDF/1uWUbNgyeetN8wuu5Rz15p25SgGKTC7uRjimAKwO5iMHFGSqqOvcmhstGRgEdajcu\n8I4xj0oEP+XdxxihmO0gHC0LhwCRx6ijBz8w3igQqoFRgTk4prsdow4ORyopFGGJPymk2qpH\nQepoAkLLIFztVQOPWhn2sMLkeopmPLXlNwzwQOtO8yRo2OwY7YpjEyi5yc59OtLGMMCUAAoX\nCkMeO1NBBzuYhs8elSMXerEj71H/ACz9z0FKudhKjoeTikymN3Q9PpQKwrNuZeMgUi7VkYFs\nZ7ml+aNQSQV6Ck+XdwvzDuaYCSFo8bhuHUEU5X+RiV2sfajnzNrNkYzSMXUjadxP8NIYo3GE\nBmx3FI2JNvzEMOSPalfls9D6Um5i24jrxmgWoOR5ikvgMeKUxiJWBbJznHahQittK7jikVv4\nWGW6UCsG0ecrdRikwCxVTz1xTmURsFP3vSmSKJF5GzmgBfmdMYAYcimhmKgBcN/ezSt/q9oH\nHTNCL5cW3Yfrmgocc7owAGKjv0pgLfOJG6nIxRGvyswOBQylYt4+YUCYu/EgAGPWmqvQBupp\n/miXaOj46YprKFQjrk0xJgmQxQ/ePU9qT/Vj+6QcfWnj5Bj+A0z7q8deozSGK67XXBIPehQy\ntjuvNOYu2MjB6k0nmGRSSTj1oAU/OxZRlj1pFJRWBXLEYpCAwwrbc/xUfM2N+cL096YCjHlo\nCPu9KUuem3CDtTWXPQcN1pWVVTGfoaRLFkZQ28t85GAKTYNoAOcHJxS8BQeCehbtRCQqlQQO\n5zTuPoNBaUs6R+WPX1pqsJMLjB67lp3zdmwp7U8IGGFbaR+tBIvmM7bcYP8AepmY9rrjIz97\n3ob92wwCxPU0NiNid2BSKQxl2q3dscYqThlQISOOaa2xcAjYucjFCoAzBMsW5oGB+6wJy1Ei\nnyxhssetOTPmAbcv3Wowp3MApIHOfSmT1FxgBWPHrSMNrbTnDcBvekcKVCgcHkkUSYRVJ+YA\n9PSgGK/y4JJZweRSM3zbfXmiZiqhwfm704MC27GPQ0wEbDAALx60NtZSeuOPxoXPnNlsZHHH\nWhkDbVXju3uaBjWjK7SWy3oKey4yvTPO6j+P0OKaM7sHj0oEO3M/0AwD60kWHyP0o8s7SAOe\nuc0ECMABeT3oBiFto3MOM8KOtKxA2lvn3c8dqOFmRz8wBxihP3cjA8A5AoQBHJjeA2D/AHqV\npFY/MdzEdcdKSMn7pXdjkCkDbGbHKsM9P0p2AVcr+7bkdSTSsxjwnll2PI2ntSHLFR1+tG5l\nyucg9xQAMD0UcdaRflySegzTh7HJx600feOV9jQAgbcQSetOHygkHJzTtpEbbcZxTIQB9484\nyfagaBtxlBDEKOqGnPjdjhT2pMq7KG5Y96Y3ysSRuxxQFh/ybWyCeOtG0bBg5AGTSsNpEWcC\nkYqykIdoHWgQLjbkkgHoaFYs20KePWhl8xcAHaR+tHmElSV5UYNAheG+bHyDsKRcAsw6dx6U\niIUZecr1FKMNkYwTzQAIc7iBhT+lEzFEXAP4Up+X7vBPBx2pg2q2WYsRxUj3BSpY7F3HGS1L\nJKGwOpb9KdJ8seVG1SaFyu3KYUjhqoATAcsEB4xzTWkdYyMDcTxgUucqoByzHaPT60DaZCVG\n4Lx9aAHFcKCxwpGMUzaFhUH/AFgbIoVVbnfkeh7e1OIPVmGaBoJDvYlR8x6014woKHgZ+8KW\nPMfzYPqcf0pWKrKZA2PMHCmgQkkfzgEngdM08ZjUHqPT0pFj+Xk4wPXnNL5bCMAsPUn1oAau\n6Nzu5Palb93Ii4zkZNDP5jCQrwOMU5GV2OBhsZFIbANt4HI680yFnUk4BJ4H1oVh8uedwyaV\nlEYLA4weKAFeRpEG7AxwR700/L/DubsKST7o+XcxOaUnK/MSUz+tAMQ4Xa2PmAwaAMLlwF9x\n3pSSW9jxTfvNlD83Q5o2JEVjIzEAgj9Kkk4VNh+8eT3psP7tWw2FPWlXC4AHPtSAU/JJngjs\nRTFj7cbjwDQsaszADa+cmlG5U5PfH096oQNtVgpj+bpxS7thZCDk+g4o3uqOx6jj60bnZADg\ndxQMbu6fJjbyeaX5FTLvhyOCPSmEFmOGz6mpNo25IGKBjI8LhFY4PbtT+7s64IGBQy7pAY8d\nKFHm8feP900AJgbF/vEc4pqKkWVBPPPPSnbWDsqfKuOW9KGPmKMD2FACZKt7d80skmSdpHtm\nlTMikvz6ik2KV3dgOKkTDckiq2OenFBUmTrgYpN22Phcjrn0p207kG4ZIyGpgIsnXevyetOX\ncV2fd7/QUit155FOVQ7EZ2gDn60B1EZtyhDz6H2o+XK7f4TQzBVG3nsBSbWRNrY3deKEMeWL\nTHAwpPSmMSS4HHvUhxIowPm70yNQqEMT8wzViB1EcmV6gY9jTlDlshuCOhpF3KGBBOeTT15j\nLdB0ApWEMC7d2eW6KaesaomWPPc0kYZhjOCBmmx53MSNq991AC/K67gxJzwKVVaTGDhvu5oB\nMYPA5HGPSm8HbgEDHDUguP8AmVy5beB8pWh/lUAjOTx7Ui4xvxyO1KzebgBPl/vUCHuQrjHy\n8cn+tNz+7IU5J7mnLls5OWAwKbHJ5gwuQw6g1QD4vu5H3g2Ax64pVLhVJ5UtyKZCojj+YfMv\nTPenwr5a5zjHOPWmIUHdxgqMfe9KXzGkYfLkAUkef4u46U9csq7D8+celAxDIGIZBhR1NOjZ\necZYZzTOQxQ/Kvf608KVaMKNozzTQCYaSTDDIXkLUrIzMpHPp60kezcctuB6luOadGnnfPnM\ninjFDAGZ1DHOMdc1I37xOm0DufSmR/NM6Ocqw6jtTlILFRmTbSAcVBXHUgZB9qV8cYO0cc0x\n2dhgAbR6Hn6Uqsu5ONoHUmn6gK7P5m5PlQcGh2MW1/vgcChJhllAIG7IpQxjjO05YnimSxyq\nu5VcFd3UenvUcaqZnBJYrxmk8wrJzkeoqT+8pHBGc96AGuP3kez5vrSszbgxGRmkVkQDk7sY\npY8Mq5bKf3vegBPlWVipxkcURxneAW5xzTdrq2GXjP3x0NCruDMQQQaYiVfnb5R92ljkOMnH\nXAqNoxJHsDFCeppwRI/kAPsuf1oYEqybiG6kHH1qQSKWBxnnP/1qhjjXhASoP8X9KWPGCucs\nDxmkIt7SFJCdWzwas5aSPZkVSEZwAJNwzkrnvVuMNgB+ZCeKBnQ+D4ftGpRArwp4PpXvunuY\n7Xyx2FeP/C/TxNrCtJgr0Qe9ewXCmNXKnBA6VEthrczLhX+1c/Oeu2kuI3FqD1cfwiqNw1zJ\nc5j3Z74qT7ZPC2SvbHNQXaxnXyyOhyrLjrxWTtaJWQAkk5ra/tKe4lKylQoOaoXH+s4YJuOe\nakZGzjyY1AKt3qV5iu0qp602SRI2RiCxzyO1FzM8bAIPlbpQUNk1HbLtZSOc1Jb3i3G5f4s5\nGRUDqs3UYYdzUkcShdm4BjzigAEzeYTk7c4KU6UlV46dadKiquB8rY+9UW47VPUkfdoGXlsy\n0azqvDD8ah+zlt3PLGoobue1jIDZ7kN2FTec/wAzFl9QooGCwiOYln4xTNvl8KNzZpygyLyu\nG65NRsphbc55PQ0hBJcFmwDk1NGyyYwRjuarxjhuMk98VJCpiZAqhgx59qBkpVtzmMYX0NIq\nyTNkjgCtCRVbafuDHX1qqzMuWU98YoERtaooBbcW9QaW6kb5VVeBzUscytxu6dQab5g3Fcbh\nQMsWd55y+Y4xgd6ZLdLIS4O3H8JqvIBgjO0dfakZFm2Y+U45+lSSWbeeLbu/iY8mpvtSxRsq\n/vD0xWXMixSHC4jIp0LOIyQ33hgEUBYmmmdeF4Xv7UqyTPhdwIx36VCyt5YXdl8Ukkkcbgli\neAMe9MCzEzpGS3POKcWLKAvXoaa0gZSqHbzUTY2/3T3p6jJmZo25bjGBioTIJZAQMDoabDNt\nYq5yD0z2p7TICVRArH1/nTKGbnbK7wF+lS4KgYPB4BqNTuwVHOeas7TM3Iwg4qeohI2JyOwH\nU+tOWYph5MhVGKa0ix7VxkjtTPOKsTI3y07CJIpAvBJ9cnvUhmVh8pI9qri6Xnd82enFDSKs\nZzyf1plD1b5SduD60SMpX7uVNMV48hieMcimXFxHs2YKjqPWkA6MMrJjIUHOatSMY42JOdxz\ntqC4uvMhhSPGcjOKsQ2+5mBG70oEJ9o27VC7gOeKJLsrbttXCnPy1HMvkkheB3NV/OjmPlA7\nX7Emiwi0rF9pxxjrVmORuoOwY61jq7gNHnG0+tSNcksgVsr3BpMC35mSVT15PrT8nyvmY/L0\nqnudZtowIzzVibb5ZKPzjpTGW442eB33beM570W9+yxCOVPmAzWdHMbfaPNJ9f8ACnzTDcwH\nA6g0BYvNctJyDhf5U3zSGBHIPWqUdwoG8tj5cVLdMy2asS28dMCgCzHII5GI3Z75605rhTIH\n2bR05rPt7oiNvM/1mM5zSR3hmYbxkY4pCNGGYFuOfUVKsqLICpyh4z71ThuoWUI5KHOTU0ky\nPC+zGOxoAvxzBHGDtya1LXV3b5GG/aM7u1cut4W2ZTp6VdhukZdoJUg5xVAa8yJLiULh2Ofr\nVhpuNpBAHGM1jSaoJmCj5NtEd8XkZGfgDj3qkyTVa7VpQVfBUYpBdvu2jIXPas0S7m/u9iam\nRtmRv5x+FClYlo1ZLryVCKSx7sKfZ3/XcMsvrWNHI8akFwT1qQXjKwXbjPU1fNchxOgjulZd\nxxzS2kyZJLbmzxWNb3kWWSXhabBfskx2L+7B4q1IzcTrTINuXPNWoSqD72eM1y0OoNNJirou\nirHaSVxirUrk8p0f2kNCVYfMOQ3eoJIzuXPPes6C6ZlKnqtalvtmjy3T61oibDZLOGb/AFqg\n5GPpXPXmg/vPMiPGMkV0UiquGQ7lFOiXzj0+XGOlMDhsyRxsrwkDd1I4pVR2LbmDL1A7iu/u\n9LhmtSrhQDxiuT1LQXsWLR7io/i60EMzFkjaMYX5h3pOVwU+93Bp0m3cBjB9cYp0g3KVIwcd\naCRIWGC+NrigMZNgA3FlzRny1T5PlHWlQlXyoIHYimUhxt2aQk8ZGeaRFRpFITBzilZWZi+/\noPu05VGwHdjPJoASW3baxP8ACaY1rGuHGCzDk9ql+cMHBLDuPWnqobIIwOvPSqGU/s5iUDjc\nfSpfJJYEj6CpxtQb9uO1G4scbSOaBEX2XbIMtk9TSRW7OxO/P+yKsxx7iw6Ac80vk/eb7pzg\nD2pAV1tSnQbj1xU62xaMDv6mpWT7rIdh9KlVQzKPx+ppiIUUcbjjHGMVLCNu4hc/WnMNzlvu\nt2pVbnb1LDk0gSHqpEYdRk9MCmseFH3T0ohXy1yp6H86d5m7JC556UFWFjh/eAK26rDE7cud\nvOMVAIzKwCnBXk1LxJIu9c/Q0DJI2UKcNk0sboWwTn2pq7VYhRn0pTu2k42t60AWFlIk25yo\nFW4piGznr2rNjk5IzlsdMU5ZHYrn7negDV8xlHy8fSljkZQRjIqlBKylwfmj7eoqRZ3bjtjk\n0rFJmmlw7qAo2n1pPNxnPJPeqEEzMpG7J9BUjXCk7d3OM0WHzEskrKu0DLVQlkMZ3SD8ae8r\nMu88HpULjzMbue+KNibicsu3IVOtOkYrCUIPTIPrUXlhXLZGMcCpWYs0aE4wOc0wAMflbPzY\n5x/KkLmPPy59h2oLKkjc4PamqCvO/wC91NUSxVUvnjYRz83elUsQN1DKd21m3YHBpxUlCd25\ngKpEsheRhwMnJpRIu3HJx6UoYBVbnNNbEUe0D5m5rRGbGhjJJtC4yOc1Ko+Upt+VRxTPmjbL\nHJx2pWkaRQANueK1IFTmEnbxUy5Vl70gfbjJwg4OKI4yoYggg9DVICY4aYkg4xT1YDHH4io/\n4QKVdqtlQRH6HrVE2JEkBmC84PemqTbMxDkvmn7WkQ7eB1FLtVUILZfrwKCGRp8oAcfMTmrS\nybW2q2CRUQxwepxUgUbT3NUCJPOl9RRTdx9KKBHtN/8ADdJJWaSDbJjkVzuofCO2d2nFsquw\n2swXHy9xX022iwzKU2gH1qvN4Xt5V4xn09a+JPquVHyJefBHTm3eXCGGT8pGB+lctf8AwGhL\nS+UrBP7pXnP1r7Tm8GKyMRGp9sdKy7jwPlj+7Xp6cUE8p8PX3wKuFjBgiZNvPzYwa5S4+Dep\nWas0aeYN3+q29zX3vd+ClVDtgXHSsm5+H2/BeIKf7yjmqQcp8B3/AMPNW0qdBNbtG8g3IOuB\nWXfeG7qE7nidX25KqP1r7x1H4Z21xIWeMMF6BhWDqXwns7hQBBtOMblUflVXaIcD4ij0028G\nH+R27Gqs1q0ILp84HTbzivsDWvgbp10AzWWZFBAOdo/SuLv/AIDx27GSOVoc8FApI/M0uYnl\nPnCPbJlmU5x6c0KwERQL8zHOfWvbb/4HM0bNG8jE5GQnI9zXOyfBnUYGLBdwT+HHDe+aI1F1\nJcDzJo/uqfl4yfap5Jt0YCja6/x10+ofD/UbGYAWzzj1TtWJceG7y2VswTbw2NpXH45rTmQu\nUpxtuHAJJpGXapyue9WYbG4jH7xWBBxkio2jZWYbc5NNNESuQqrMVJX5OoFKzhX4OPanzRvL\ngq/yKOVAzUSybm2gZUdj1quYLBJGd6lW4PWpdm5iCflzj60jfMvlKffOKV1O3BfvnFFxcrQk\nKgK65JZTnbSyM7MTjKH0pzthwwG0U7gRna2U6kCnckYyEBefcU1oztXHGW6CnTL9oZAAQPX0\npf3SyYBIIPGe5oGDBVBRxuAPHHFOhCMzdI1U9PWiNipZfxIP8qjaNZJME/e/hFLqAeXFsbAG\nN2RTmz911A+lO2hYxEE47Gj5uMgOewqhFdV6g8j19KlWF1+h459amVf3ZXHJOdtP5jGxjubr\nigGVvLDkkj5sYxSOi+WrMeUPHrU5ZZmLgZXGCe9QIishH3yehoAGh3YkA4YcAUbS6hR/D1qf\n94QoQDOMdag8vereZuiwew/SgRW27WOF5zwaintwEdcliwz71eS3G/rjjcQewpSgZTuXIPSn\n0BbnrvwTRm0tFx80eRz6cV0vixfJDFOGzXJ/AuZ2ku4WIAX5VHvXb+MoWZFAAHpmuR6M7ovQ\n8w1TGw4G3t0rAuofLt8rg9xiuj1NScgAgisK8cKvK5Hc1URnOXijD8E5HP1rF2mFgrKSMfe7\nVt3OCzYGFzWfMvaI4Pqa1IaMy4uGCuSCFA/KqrtujAIHzDAcVfu03wmMDAPB3VmsrKrRhRtQ\nZ3U0QRFQEKnqD99e9V49ytl2yPpzVqSFGjUA7Vbk/WqkkyiQADCLxk+tV1EQsVxIpJz7VXYA\nqFUMR1P0q4UErFgRj+dV2V442IfYfzqhleQr5gCsVhxn8aMHksQeO1NlXax2jIPeho1K7g2D\njFO4xdrMuH4+XioVlTasaqQe496lVWZkEnDAZHP60nmNNMcYT/apCGbV3EHLDH600xorJkne\nRjAPSnr91iW4HanK3lnftyuOtMYjQgsBkoO2ahGFZsA8fxGpWyzD5cg8g1D823Yxzz1pX1Ab\nzxvwfVRTcBtynKNnIPpRs/eMA2Fxk5pFy0ZOeOmKq4hPNbaFQED+9TZF2kYOH6mn7fOXap2A\ncikVfvE/OelIYrgzfNjbJ096YufL2lsZOCKEX98NuWOME+lHEeed5zjNNCGkImBuJAPApeF3\nMRlvenFD8xHTt9abIysC2M44/GkHUQMDHlgq/QUm3dtIGKSSQ7QNp38U5sGMKCQo+YrQANJu\nUIeCvb+tMJ4yfXn/ABp7kbjtIPP4kelN/v8ABzjjjp7UADEDEhG8dAKYVJjXeR149RTlfauW\nIxjr70xgysGZcDv9KfUCVgrEYOPxpp/dtnAUdCfWkmwsewEEHkNTRu6sg+XsD+tMQCQqu4/d\nI60yPd1DYBGOvNLuEjfN8v8As+tL5m372CvTGOlAxu0Sc9McYpyjczMScBeBSbW2/eEZzwaJ\nCWyWOD60BbUasZ2KYxhu4pysARt+bsaU7eOdwAz+NKoMe5mZS7Dt2oGxka7cjcAM9KZtDSAb\nfYDPT3pzMcYcZHt1pF4YhRl8cE0EBtVZQS2Cp49KYVPluyn5l5p3k5C4HTqPWmY3MWJwemPa\ngYrbv4cCmq2CQzL/ALwp77V+7yuM0Mqqobb17UgG+XkHZ8oP8R/wqOSTYBkYPrTpGkdV2nAJ\nx705tu4mRiT0HFO4g/hyDmk+8CCABSMuW4YZ9+KU5BTadynrQAir82Ad/oO9KvzQuJPlIPFI\nV6gHDZ4+lP2iRhhgFHY0ARr0G3O7uopqyKFKPwpPHsaerHlMDOfvd6bhWRjgjb2x096ADaS4\nGcoep9/WjgcIcdjigY3MCOccUke5WzjIAoGPjYHkHj0NIqhZju6ZzmmSfMwIOD6UqKysVcZ4\nyDTGP/iOWySevpTfN8zcmCBnoaO2c849KVZDsOU3H9aRIjLtwQ2QxAxSugX+LjPSkzt2/wAJ\nPU9hSttYBsbwV6+9ADWjKMW+8ppvZcAhep9adGSvXPuKGZlYMwyuOKaGHHnEA9aRmTacHK9M\njtQqs3zceuO+Kase0nAAj6ikIeRGroHB8vGSw6Uxd43KBujzn3pMiQHinKCi7mXcW4FACOTG\no43DrxTpGXAK8LwaSNSgQFcD1/pSqq/MoIVfWgYjKN2FOF65/pStvbL7cNjGaBGI1XndShW3\nEnoRQAm0SKuDtZR19aZIDNFxwO5p7blbG7twKbtZWADA9yKYhAm1lI+cMvJp23aobPSmfxeu\nKVvl+bOVNIBBjp8xJ5O7ikkwWC9eKUlUG4DeTxyaQ4XO7hscYoGSMPlAxg8fNUUkb8DgIT2p\n6qCyqwJGM8HvUZZtuzpzVCHnLSZGNo4NO8vbu569KauHwoGBnk0rMyKcLkA9uuKQajTgKpA+\nUnPFJyVIK7snvSyBVGAMZOQKRVzJyCB70gE8xZdwD5ZfvHH6U5WDHptYjvTVdQhRRtUnk0sr\nb224OQOtACKwXAztU8USYUgbec/h9aTIaMLt3Hqc+lSNhlO4cdj6CgBrMF+UDk9MUjRoGzu9\nvxo3MrcHdxQu1nwflGM0AOdghJJyFAFN4XqMMecU6MjzMbR0+9SLGdx2kYHrQJgqLknNKZBO\npyMFemKE3bhv6dj60zaFb5Qce9IBcFVUb9zGnySHkMvWotoZXwvSnqrLGoDcHqKZQsgAydxB\n449aRtrKAVINSPMFK5wQKYquOQw5NMY0ru+XIYU1ZOSPut0FPG1Ccj5x1pMny9xTHv3pCEZT\nsKAbierUoVemBnFN44CgszHrmnbfmztyo4LUANJEiYxkL/Kh1TZ1/wB0Uu1Nx3cA0bSF+58o\n70CGqxO3PIB5pXJRivXJyBTW4XJGc0rHdGAOWoHcbuP8K4bNLv3Od43NjFJvby1wdozTvMQu\nQ3UDP1pgG0I23gnHQ1GxG3g4Ydx0pytuRSVwG5OfSkXKBlUYGcEe9AhFIHUjFLGGRd/6Un3u\nCw468UfeXaASexpCtcVWc4LqKMnJY/rQoCoGZslhz7U0JuxhgQT0NAxecHcTz3FG4KqhT8po\nzt27hk/3V7UKy+YDt/GgAKhm2ds8+9LIwb5Rzt52+1EbbbjG3qfyoZdkjZwc0x3ESZic4IVv\nwpJMeacfdxk05t74Zzkdvakbbt57UguCt8n7wEhui9x6Uv3QRnoKTce7HnpxRuKhiRtzx9aA\nBQVXH8W7P4U4Sbd2VzmmrlmzuyeopG7Ek9elAC7dnGBk8ml53KueM55pAyBmH8Z5+lIv75iu\nSGxwaAF4/eZHzZ4PakZTuAAyW607/V7hnkHFGC0eCMgHNMQiKqszhs9qRuvJzmlx5mNucdzS\nyAbduPm6mnYBjqUkBPKqM8Uu4sSeoYZPtTWbLA4PUDFDt5kpKnCjgipANvlr0xkZHNDHauSM\nk9VFIwGxT1ANPjIZyccN60wGsN8YOMD260qx4IJ4U9qTcyqw6YNGRu9v73pSAOSxGzH1oyuA\nH45oIlDBj83OKNwbOeeaBh8yZ3c+lG8KuGGApxg07JYb9w2jilf7xJ+YdaZIzaBnb8zHkClZ\njuPXI4oztbjHIoMgaPgc55p9AGhieppV+RgCNxxxSsNq72OFIpgJXB79j7UAPbaq7V/eMf4a\nC/AAxlevFNx82FGD1IoGWDH2/OkIU72UsxA54HtQ2FxtPJ7mjd8oUjnFCqCwDLn0oAXliADy\nKFVznkMvvSow57k8UzcVbaDhRQUO+9jjdTZScgDKil2qmdjl2xnFIpEkY3jae9IBfMZeFGSR\n96jzDMoGMOBy1EkhAGwfLSbn3DawAPXigBNoZcHg9qOGZVPJ9P601s4b68U/5Hb5ugGT9afQ\nkRgMk9VGfxpeNoyvBFMLsAzE9fan8rxjj1pAIp29Ac0NneMcjrn3pdxfIB2joWpFjOSiNkDo\nfWmASBuARz1JpSxYEsNqngYpFUvGSG3c9aVdpJHJ+tAEbKWGckD+dP5LZA4xihcSyE4zjt60\nn3ZMqCB6GqEA2n5W5Xpx60rNtZeMgHBpMnnC9OaB90c89TQMXcPO2qcEHOaVWVWZzy3pSrtL\n5GMY5z601WK5AQMR3PXFUIADJlSueM7vShQSuB0xSbyT12D+VLu+b6dWqeoxiL82GU7u1SkC\nNsAEk9aaxMkq4+Vc9aPM8st8+Wzz9KAGghvlO4gnFO3Y+UKAF4xSLhWOW3Fh8vpSyEyRgsMh\neDSuMN29eBkA/e/pRwzZ6A8BaVmySpHGMio8YO/HH8qAJNzNvyADnk0nyrgKmFxkt60csPMw\ncHgUKxMq4HygfrTExDI3lkZ2sDyKVeVOOGPeklYPLkknBwadIRj5Sdo6E0DvYGYMwbO0dPpS\nbRtbfkUMxymFCnGc0gkB3YPHXHrQIF25VUPHXmlkYMCSNo6YoDDneNuRkYo3SkD5QAehoGhN\nwdVKg/KcH0pfm2FsZPbnpQWGCSeOgHvSJuwWJAGMUAxdwXDEHsDRzGzhe5zj0prZkTBPI7U6\nHHl56/zoEB2sPl6dSaV1EgB25I6N2pI125wNuO1KrFeNvy96AQbQrbicAflSnA+Q/cPOT60o\n4j5y6A520m5QpdxleuCaQhh2xkllJPYCnLt4Pb0pzLuCsCAp+bPt6Um4f3eGPWmMfGpYlef7\n1MZlZ8qM9jShgGxyT0FLIqogVV+bPLCkP1E+VY1VFyAcZ7ikP3WVl3CmlmY7V4xydtSRsG4z\nhuooEMdivPbHGKRVLMQO45HpSqwhVgTls56Uiq2C+8YanYOoqr23YXPJoCgru7nnAoC7MMTj\n1FMWNYXI5CkfLSAem3a3y9KQNtwB16mkUhVIJ+bH50u0KqHq2KYuoDO85+UnpmlkUBsBwfUU\n2Nvm3OclaWOMecSeOevpQKwBg2VIyDxgc0qqvUsPTGelDSKs2xE3MvzeYtIzR7tzdW6+9Awb\nC5VSKQrtOGOemKFwykYwc8E0qndnA5H8qBgBtZiF56ZoVWCsejA4FDblypKgt8w5p/3Vz13d\nRQBGNrrtb5W9aGZWyFDAryGI60rSbeMZpu5myxG70FADt7bs7TjHNLt2wsCOCaXDhc5wvcU1\nV+Xczg4PSgQsTBN3PGMc008qSBgdqcd3TA69KazbduRxmgCRS0e3dgkdqCx2sGH3h2PSl4bc\nztk44Apq8qoK7eM5oF1G8eWqg/d5NPVgyv8ALtULnJpsXy7nbj0o3fu2aQfK3y/SgoVfljO1\nS3HUUZ3R+W3ULkMKe0ixxkIccYpFO4DK47GgljBMVRAMlscinZLdG2gDJpVwWY4wcY3UxSYy\nFYZB4JxQIcrB4cLwx7+tP3fKoddw6GkWTMmMqsYOAaRgBtY8bj92newEm0MxI4Pb6UrYaPA4\nApi7IWJD7hg0i5aMEfNSAe7szKybfQ0gIVfvfxZpu3zMYGD39Kk+X7vBAGc0ADLtcAnI65p2\n5vLDhcDOKjdRG0bK25WP5U5m/fHHT9Keo0ObbtYsD0prcKOSc4/Cn/NuyW+XHSkT5s91pgPX\nb1LksRxQFG3DnC4/Wl2s2MEbV60z5nGO2cigZKCXYE/MYx1NNWZpnc44zjIPSlKsx2suT3Ip\nykPMGUYTpgd6LgKDHvYn5n60reZuVQBEGOd1Iu0SMqjrxuo8xmO3rt9e1AiUKRcAKMrjnNLy\nTuAwOnFRJM+CxHPQe9StuYBSApxk0xCqNqA/eAOTQJI2k4BA6kGk83y1UY5zniklkO4k/dc8\nCgVyTdgZHzE9vSo1P3RnPNNRfnyPlUcGnLuik3IgPHeqQhxAdvMc7VU9KachnJPU8L3o58tt\n4O49Fpsx2Ngf60jNMY5XbzElC4K8EGpAEzz1+9imMoMIVSXfGfqaRIzuJPylWG4/0pAP3P5e\nEHU5waQMy7g5w+MlfSrDMVkcgfLjNVdxwPMbc7dPagVhVzs38hO+ac0ZklUjqBmozJ+6I5x0\n+lO+bgg84wT7UAPVwyksNygcEetPj4WN3GOe1MjYrlQPl9KdGzNKoI+Qfe/pQIsxtGhfAO48\n5qwyho42DFs8Zqtt2yccj+lT26t5qlW+TsO1ID074UxCGZnaTKqc59PpXrszCWPeACpFeSeD\n4SzKqDYp4/GvS/tjLarECMipYDLv938yYQ4rInn8zrgDqauzKbpCCwIPbOKoyWrQNtZRgfjW\nbNDOuIW3ckKDzVeRjJuXt2NT3kkjXOCvy4qtNjbtTJ+tIaI7aYBtrgmpLySM7VwQQePxohQq\nTkgcfepGmDL84D9sikURtmRfuk4PaiNsfNyzZxTgwl7GNOx9amhWOVSqocjk4pgCTDhZeSWx\n9KlvXtmUR27b2j+8wqm6tuwF56gd8VahsvtChoYyh7gigY37M8qlhg9iKkZQJAAADj8qsrF9\nn2pIdxXqKa22OTGARnP0pAMbLIGzx0NRzR+awXH3e9S7BtLZyM1NDNHIuxlyxPWgdiG2gkmb\nygM/7XpV210RoZmkEm4KcbT3rYks47CyWVFw5GM96wL7UGhYt5mcnkCgRJevJuEbrwvQCqvK\n8kEVIr+d8/mc1C8okJXdznr2pAK2EVmB561X+2OJFCqGJ9aJ5gS0SHIx1oWFWVG3cDrR0Alk\nZmXYxz3pnnO8nzMAFGKWPJViBnHQVJbwCRfMcAKKOgEUYaTeg6N61Ou6NMFQEXgVFJMZECpi\nPnGarPdEyFC+R3xSAtzXQQqo7/xVXDGGQM4Dhu47VVkuQwzt3AdqWG6b5QFG3HINAFuZma4K\n7tqnn60/51wdpK5wFrPkvGlyGONvRvSlS+fjc+SBwKYupdMhkZm644PtSNOWAIG5v6elUmmG\n3CHbIxyR2oklaLAY7R/e7incotrMsGdvO7nr0PpT1vmSPuR1rN8xYztzkE5Jp8sw4I+5mkBp\ntcqVEhHzHqKheQbd2csT8q1lfb9pffll7Ugm+YY6NSAvfaJMncR1xxVlruNo+CQ2MHjvWN9o\nkaQoi4Cjjnqac1020bipP5UwNNZV2rtkBHemXBM8qSK+0Ywc9KyI518zPIXqRTn1KNmGyTCH\njaKBGrHIfMzv27envU66hKnO7C98Gufa8K/eGGzT474y5Q8f1qRmyup+YrI24ljUHnhZiQ+C\nOnpWM+o+VlgcP0AqOTUZDgbM7vQ0xm7Jfbv3jMMDqRTlmDJlDknkH2rn2uJoYN7sNm7AwKnG\nqHfyABjtSYjUh1STc2cnB4NWV1JpGB2bX6HFc1DqQkzE4wGPWpJLxreQsr7Qox9aoGdFHJzy\n3K/MM9/apFupBG8jASZGQork7zVpsqFbgjJFSQ6xJ5RUPywxtoGjZbUJPLOV+bHSlbU5TboG\nZtmecmufkmeMx7CQc5antqnzeseeeKBG99sEmBvH4VY3/uzhuOp5rmJb+NUbDBW6/hUy6v5d\ntiPkMM1IzZ+3OFyW3BeQo9Ksw6l5ytgtGo/hPeuYF45QksBnoO9STXP3myVJHVT096oR1Udw\n3I6H0J5o/tARtHgZOcEA1zFjqW1iN5b17g1pNqUSxkOdpI429c0AbcOsCSRlIww7npTv7SLS\n/Mv7phw1c3FfIrLCvDEcvU0l+Fbbuw69s9aoR06X6hApYsAOopF1SZ12lsCubXUdzKFc+bkZ\nz0HtWl9oRHDSdG4z2oJNxroyLkH2zV20uin+sOcjgmubt7pI5GwwO3kc0NqbByGfoeOaV9R9\nDo5mWRs7sPVrZsiB8zaMc+9YNvdlo8yDJz96r8c26P8AeH5PWqMy+LhlKgDNXI75YmUA5z1F\nYsLnG4Nn0Oaf9oR8K2VK81cZAdXb3SO2Q+H7g9KsyX29lw3y56Ka5SPUFjYEnCds1bS++fJX\nb3C1rzEOJ1dndeWxDdD2q9BeRtnflMdCK5O11J5jn7vPOa1FKtHkvuHWtLmXKdUJkfaS24Yz\nTv8AWIcKGHWsGG5j2/IcnHT3rQtbzdsyNtNEtFbVNIhumBCiNjz0rn77R5LdiSC0Y79q7u62\nzgOUB7Yqu0bbdrRDy/Q1ZJ56FEiFhkqDTVSR48MdvPT2rr9U8PRJbtNEfLb7xXHBrmbjfAxD\nqSrcg1SQDZGRVCdGApCP3SgHkmkTbsBbgfrUywjcFJ3r1BqRiRsdpVeg705VbgucjFG0Rsdu\nAmeWprb2JjwQOpNPoAuBjJ+Zc96lzj5+SG4HtTFjDYULz609QS2G+7jgUhDvmVSGHbrSNIsy\n5GSw7GkyxiyfmTONtP4b7gwR1UUgsO+VoQxH4j1pI/8AUBScHOeadvXaYx8rUhiGQM/KeoFU\niRwG5gS2Fp3DKdpzUbokeCh+XpTlhZWU5xn0pgieIq3OORSAGNi+ODxTQ21+OT3qTy3m+UcA\nDJpFCRqyngc9frUqs6xsWXk9vSmLJ5eGOWDfLUrsWPlt93+7QISFN2DnDDvUgYgZJ5XtTWIH\nCL8vYU1n+YDH1plE3nDqRtBx82OlSNiONlwMkZyaijYNGCowR608DcpwMn0NArDgzLbr8wGe\n4pzfcGOnc1H92EK/X0pQWjjwMEHtTuMlhkEKZH3vWiOQ7WO3JPrUMfB24xn9KHZmkK5wMUgJ\nflZPmzmmbmZQcYbOKFfdDszxnrSqyYGcgikAM6rj5cHNKytMxJxhR+tMBaRmG2nBRnGcHFMB\nWjKyZb+7zTlkULgGmSNtRSNxFOPzLyuM8iiwh7K3lgk5OaRGCqQv3u+aSQlcHt/KlZkKY4L1\nSEJuU84+729aYrbgTtPWkw0ZL5wegFPX/VkHIOOwrREMazBwpBxjrT44/Mjb5uahjjC4y2al\n3FwCBtxwa0MyaPEmxQMY4JpXbaML83aq8bbvvDaFPX1qz8rK0mc9hVIB6/KhVxnjIIojRYih\nLZHUhqYpK7ATgk1LIqqw3cnNUQOdg0hKHA9KdGGMmWwO2aQ4XBHB/u0jN85cZBNUIeFDMSDk\ne1SKvGe9RYbb8ozuFSKxXBHUDvVoBfm9KKTzX9KKoD75WPC570YHB6+1KFO4+lO+7ngV8KfU\ni4+XkgimeXlcEZNKe2BxQsnrQIimtUmXaVGKqtpsWfnPHStFmDdTimHHdc+lAGTJoUPIK5FV\nLjwzFIuDHkfyrpOZFGRgmnN8q471SA4qbwbA8ZCYB9DWVe+DGU4Vcg9lr0RV+XGKTye+BQLY\n8ou/ApkyvlFRj8ax7r4fqx3CIjHbHBr25rZXwx5FNawhboooEfP998ObeSMkoBIT97HP0rnN\nR+ENlcb3eINuGCpFfSsmi2zZ/dhjnPNQS+HbZk4jGSc7jzQFj5L1H4NWl0NscIVgMfMtcpq3\nwFK7ggC59FzX2ddeFY9xVU3BuQcVkXXg1D1TmgXKj4kvPgXcxxkxRhgeqLxXP3XwfulRnRGj\nZOPu5r7ol8BkjKKWj+lZt14GWRvmhHHtQTynwbc/Du/tmJltyUYdupHrWFN4ZuLcvmFioPAI\nr74uvh3bsz4tlGf9msLUfhfb3TB5LWMuvAcIOKrmfQfKj4bk0y5tYx58fH94dR7YqBbcxsCY\nmVv4jivsLWfgtZzyHMJRs5aSJR8wrltU+BMTH5EkZOgyR0+lPmfUjkR8yxxtvZwDnP8AEKXc\nuXZl3Lj05Br3HWPgTNHGGg5kY8K3A/PFczc/CHUYG5gGASvAP5inzkOmeVneygMML1+tKjfL\nhV2nPG6u21D4a6lHMGhgdgOvHX8Ky5fCV3FIwkRjID90jBFHORyswo1PILYI6mn/ACKpXGGP\n86u3Wl3MOD5JyRyMfyqusDwq+5CRjj1BqlMlxZHEflOBgr1NIMI288n1z2qSOMqu8j5sdBVd\n41jjJZTnOc+lCkKzJ9sZDIF/dt81Hlp5e08EjhqEjaSPeqlff+tNI8zAORnjPr71XMOxMoUq\nEQYYDqe9NZSFPYH9DSgDIQtuI5BFPZlUKA2O5z3p3EQFAoB5560ySP5Gwfl7CntvZmYHrwB2\nqP5lOT1x0p3HY9A+C8vk6gyruAfgn0PrXrHiKMSKuU3KVLBq8X+F8kkfiSEx8K2AR+Ne8eKY\nTcWW1SFAAOBXNPc6obHkGoRhZCUGcHrXL6xlFfA+XPPrXZ6k6rM/HTiuX1VWOcrjPNEdxnJy\ncMVA3d91VJ4WmVsNsUd607kIk+cVWnljWPJ/Fa1IZhXC/Ip/izgk1Q5DM5GV6H3rWmUTMQqg\ndznpVOS1MzbVBx2CjrVIRlndNCTtIAPGar7vlIC/MTjJFaTRyxkhlBUHFV5MKSMD1zTQFP7z\nEEggddvrVeRSVJ+8OgUVMyqrYz948+1OEZBP+z37VYFLG1t4TLHjk9DSNH5akvgDPP8AhU80\niKxIBbI/Wq0pDMof5T1DU99QEdCSsuM9gPSmOHIJ3Ls9BUqkspAbaOu49qjWMFcYwp/i9feg\nRCyrtJ3/AExTTjaFZjjqRUywh1IA2qDyajWPdGVVtw5oGNTKoVBLDt7UrRjzMsdgC9OxoaOS\nSNSg+XoTmk2lFO5snOKYmMjI5bBJPGcVGkqKrIuV9VqVmMeViyeOvam7hswV3seC1Axke1SQ\nRu5pG+U/IMHPQU9mVMZGAD1FMkJZlcEjnOO5FMQp3DgMB3x3pqKq/f8AlBPFTbQxc7fvdqjV\nTGoUgeo9qLhsKEVm4DbR39TTFUK0kZ4LnNIrNy2do6ZoUBVOGzn1/pSGOtxtYru3EfxNTFcy\n7uBGTwf8aftXy9uCT1pu0tKTjjb+NFgE+VcEAHn73rTZB83TnPPNSLtG3enyngH0pI/vMpHH\nrVCZGx7BsD6U1m+XaBnNSIhWNnx0J601Y3XaNvzdcikJES/KWXqw6U6PEcbdSDwV60+bP8Qz\nn0puFT5T8qHp9aYDVXbHuzyP4fSmyEOuCx3MMcCpPnYFW24zimsRvUEYJ5XHHFAwwGWIk7mU\nYAoVOTuXAXoKRV/d42kjrvqeFfMBw2flJ/GgXmQL8rHHHc0m0SKSW+btS/NtXcoB7+9GVeTJ\nGwD0oAGG0KN3NN3K3Thu9Bw0mRyo55pVbDkdB16UhDFYuuAcgHpSRruZ9o59D3p+SxOFPXtS\ndFYA8t3plIjWMFRwS/8AdzTmkbg7QMHBFLtHBB5FJuyvPT+dAmBXaWPIX1pkvzKvcnp6047W\nYDBA9c0btzbjyRwDQIbuUTZC7jt60hk24IGVPXA6U5m5HGVPSkC7flIbNADeRllOOe4oKrMc\nngDnilZdylTkeuKVI1ZkRVIPbNIBAArKApCtzuNOXJJIJ+YZPp9KPL3MQTyp6elId4UJyAOT\njvTGNIZuSoDE8Z9KVt3KgcUSL5mJAec4AzTXXav3tvPLe/pQArL8wBOOKApZfmODSDG35zzn\nGaTI2kA5I6cUAO2srBj1zQyGONwTmRjndQrHh2BI6Ypm1pCCxztPT2oEObcrL0bjkCiSMeSm\n0kMD2oT58so2rnrTtx5IIxS1BjEbLbehLfeoMeWyTk0nKryvTndS7clSp+UDvVdAGN/DhcnO\nKkU+Wx3HAwfl9Kaq5JJbC5zQ2DIMAc0hjVZNjNjOePelaPy4ly/J4FDMWUMvy4ONppV2tuB7\netLUQrsG9wowAPX1puw7V42tnJH9aTLKBwNvenMwZgwOR0FMYgXfkn72e9CMJGAweOppWAky\nCSAB196VnEWxUXIOAT3oEJIwCk5yuaay7MNu56AUHG1sDIDdKVtg5OTx6UwGKwC4AwM5zSsA\n0fTHOaFDfd2Y3c0+RQgyWz22igBm0NIATiJRk8U4mONsr82eQfShmJjwGyPYUxmHCY6dfakA\nAiNCVOXzk5p7k8lRljxiomAVuTjPcCplk/eHjDGgBuPOwqDDKMsKVstkq23PamyKVO8HDdz6\n0MQyghvc0+gxm0nndjtSkFcBWyM85p7nyyA4BLdKZt2vnpzSEO8mNSZvvL029s01SwXDnnrk\nU5W2rhV/d5596azHc2ABFjA+tIYnBUZbbnpil8v5fmO0e9NY74guMEHmhgu5VYk980xDpFzH\njPHQ0uNp+7wB0pGYLzjK/wAXtSNtb5g+5cfjQA1VZWyVwvQU5l8tcn5R0K0vzSFT6D7tGQpz\n/F70tQGsC7KuSqjmnSfO2U4A70zex6nIp0bRurKpIJHQ0agCqyjbncWoY5bYBjHBJpvDFQoy\nwHBz0pG+987Y9aaAcV2jIPzDnNJ5YBDsWKNyfrTmO35VOe+aXcVwSeKYxWVuM9euOuBUZYN8\nxVuOBTo1DFzuYZ6AU6NSqgKd2KQiNeW+7jvTWA+YnnPHPSlyQVZhtbqRRn92XZPlJ4NAC8Da\noHzf3qFJBdS3XrSbl34VT7e9IThjuBA+lAxvLcsdqetKqll6YbPH0obPOF2kCg54I64oEO8l\nd23dkdaI+FPbPrSfe3AD5sUij5hubB28gUANVjuUMQQp6mlMgZmDZJHIpHjRWwPmXGaMhpFZ\nRt9VIoATYQuTzn86cFO0MHxjt3pVz87dMnjPaiNNqHPXr9aATE2DhscdwaSJV3BunoKfuLnL\ncg0oA2nswFADAGR9w57HPpSc+SuTjJ3D3pWb92MHGe5pJIz+7Bbp0+lACbjgvnBY4FJyeoBb\noTTlysjDbkHpSdF+b5T7d6BhtbjnK9KJMvHlFzjrSZBTr3yaeN+fkGO4oENbEijHAxmlZmIU\nfezxRkqCWGSTQrBVII5zQAAb2I6FeMUhHbfhSPu0cMwKnHtQCGzjqeKYCrtyBt2/7VIvzISf\nlOcY9qXJ2h1PIOCKRR8pGOo3ZoAOcN3UGl27UJzlT0FNZvlG0Z9qP41Urj2poB0ajaWV8nvu\npIss2SM8USKN3y9KVm3HhtoHFSAz51GcZ70AB1/ur1LCnc7WUN8xHWmIG2cD5V/WgBfMBxn5\nl6dKJGKqQcADpjrSSfLGfVuw7Uvlsy/Mc4GaZIDYqhzzkcihZCqhgMdgPWkGGZgRtZR931pZ\nMuVzwuKQBghv7hPX2o2Fm2p90dWal+bIbG7b2pF/iZjlmH3fSmWJtWQOFyoA496VcqqY5OMk\nU3lYQM9+opzSDcFYYAGTRYgYoLNuKkqefSnxs0jeg6AUK5jyrHcW6fSlRRGx2nt+VAxn3sjv\nyKS3Akkzk4AxzQAwXORk96RceXgNuOfSgYZZVx6nBbvUnCthfvdMCo8bJNuc55qQOPvsAO2R\nTJEYlV459fWgfNtGCPejA25xg560BiwIUZYHO6kAFVaQn7uKUI7KCB37+lHys2C2Q3WkbHBA\nJwcZpFDlY7iV6r0pu75skHnqKV3CoR9wjkH1pm9ZPm3YJGBQArKsbbc5GMihs7SfbmmMAjLx\nuxT1ZlmDcAf3fWgVhowFwHAzzS8GNlDfN/OkIVmPGMnP0o27m+YYHpTAUL+7ALZb+VSEAsxx\n82OtMGJGwo49qcGxnPI6UhWDDLHlh+I/nTFUSrtPzNnPFOdmkUYPy9CKRkG4kFlPtVAM2Bsq\nOV69e9PbeyAN98d6Qrs5UfKR1prAEDGTQIfkR4I4b1o3N5Z3Yx3pkq7ERcZ5+7TpAf8AgAPC\n0wE4ZQApBIp5VVUA9vzprD5NgOec5oZCcZbA/u0AJsCngbwe1OU/KQMeYaG5+dCEXoTSIpVm\n3DHGQwoAcqu8LB8MewpgUFCW+XacFaPlVXJyWzihP3u7cDuHY0DFGXUEjb7U1YxguUBXP3qd\ntk2NxuY9QfSkY/KAB5Y7CgA2MqliMKOijtTlQrGWAypGSDTWIb5QCPUUKoVWx8xxSuAJllII\nw3UUSNuVQp+ajcSwAGTjn2FO3jd0wPWgbCRtwUZwe/0pAmVDs2Pmzj2pvlj5uuSc09lOSpHp\nTEErI0xbGVbnil2jy9u4FeozSL8rbgN6AYxUW3cNrHjOfYUCJNzKMbf+BUZDScjn1p7ScLsH\nblaY3LHGAw5oKQrcNvJ+bsppuWMjMcnjgUrIZFyX5x940iqSOuFweT3o6CFaQgAbPrRhV56j\n0oZyQo+6qikVQ24D+HnNIB0i4TcwwCcULCY229uv0qORSsisxJjIzipXYyqGxtGcA0AKzfvC\nSflIx0pVBUgLligznFDshfactxjikVn2bCfxFUIEZv8AWMcZ7Ui4ZSSuVzxStG0Z2j589BRt\nWZNuduO/vUjGtlI3QjCqcn6Usi7QozjAzgUszGFUBOQx5pJN0i9MH+lACLuZQxO3Pb1pY8nI\nK459eKQx7ZE7sf4aNuG2J8wznP8ASmAi7juB9eDT9wZASo3DjiopJGdsKO+OKdIF81dvUdaB\ni84ySOvPFN2qY22ZK54pyk+YTnj+VIQfLO1snPNAB8rLlzhgOB60gy0a7jk9centQSJACwHX\nFPOACO446UAN3HkdfalEZkbAH3RnFEeZAQPl/wBqmMpbkEq2fvL3oEG0NlthBPanqM9T8xOA\nM0jN1z1pjRhiCwwaT0Aki8wSNlcMvGKbGCq5JywP3aVVO7CMW45FKD5bHP3TwQR0NMA6cucH\nrikVTJkYw7dOaEDfN0Zfu4Pb3o8sqoYnkcCgBJF+Xa4AZR96kOGI65Apy+vUDrmlX7vLcHt3\nxSATdnG2kVvMwGJGDxinbgysNpA7Gk3blVgPl6ZoAT5DIRluOufWhVDYJ+9TuMg8EDqabtUr\ngEg5yKBdRSu7IztYGldUPJJz6UMysAgGJP7xp23IIC89mNAxI+M8A8c0u0qoL8r2FI8Py43b\nTkZNK7I0TAZ3dqYhpXc20LnvS/Nv+9tOP0pykCTAbt1pqSPlyY+3P0oAPLDxtjgLzS7jx6Ec\nUb/LjBAyGGBjvTQpGBg4C5zQIXcVZcchuDmj+LcQR2HuaQldo2nJ64oaRlWMHnngelMAk2Eo\nhXJB5I9ak5VVdhvIO0euKTEe5l+53JpfKEkf7tsN1OaYhHjK5woGTUjMvlHDbDjhQKjk5Ydw\noznNSxuMDPJxnJHagoadywjJ68Z70Rttw2BgcH3ofKxhc9Wzn0FOcKrBX6dR/Sp6iY3aVBB6\nA0rIchlYEHtTgmVOOGPb+tM3InBbnpxVDHqxO5iCVxSqw2gJwG65FARlwOqt3+tEbFQwIJw2\nCP60DDb5ajb1z69ac375iehHX2pFmQSkrkBeeRTi/wAzFujcj3piuPZizDy/TlqXkR7gPkBx\nx1psjeUowMH2pQixsF37z1OKkLisxxH8uCR1FC/KxKnCd/Y0qA7SpHQcGnKu9QenY0CEwXwC\neOvFOXJZS5ztPApkZxkMvy/3qcpw2B+GaBD49rXDtjAx07Um3dtcdc9KTDO+OF9TTnQKwYEh\nc4yKYmC4WR1xkEfrQv7sIH+Y9B7UZeNi2VK9lpqsZFbq3qtUgJArLIWPGOlNG55sjC8cmmM2\nYgyjIXgZ/lS7AzK54yOfrTAXIi3EHDdvapFlwuQckjnNQu3mYbhip6e1SKysx3DOfu4phqEj\nKFGD+76e4NOkKiJWHzMD19ag3BI3AXIH3qft3fdDbB2pCHt8zZxlW7VHuWWMhWYHHOaI1O5S\noxtb1owNzbWC/XoaQIejYC5Hbt3qVF2kru3bhkVGMMVC8HFNUSSfebocY/rQhFuLeSQ52Pjo\nKv2yeWVH3kAyTWbh1dcN+Petm1XMqsBtIH8XepA9P8JwOY1fcFG3I966Oa6MbddzBcmuI8M6\ni7SkFgAnb29K6COQuxBJCsclj6UAa9tdb5GAODjIFO+1BZC0h3Y9TWNLerZyGNDvZuQ3oKov\nLJK7M5JQ1my0atxKskkroCRmqQYySHIICjOKgWZlbYhwp7VYjbcwLHZt5PvSNOhFMWCA/d9q\nl3htp2gADOKNzSKcLkHoT2qFt5bJOABipJJVkLIQQAM5GafHdeVGQ25JCcYFQKQ8gZvu9l9a\nlZQ0xZmyAKfQpAtx5cZCn94T9484q1Z30lurLuJY96o7vk+UDJOd1O3lcFhn1ak2MueZIZGy\n+Sec1LDH5kZc849KoSXabiyjIIwKjiuZY1OxsA07jNGSZOfm2r1rOvr5o3V4Bl179qZczp5O\nT+fvVdZT5ZcnKngUdCS2uv38yhZnzj3pWmhmVdzbZjztPes5pAI8fK/vmnrcRyJjblgOKQyy\nbmRVKqSuT0FSJJtH3vrVF5kC5BIx15qM3QVdyHI67qNwLrTFXyvX0qeKYowLDMbDNZX9oBm4\nOU/vCq8mpHzCBlU7HNAG/uWSM84bORg1A2oTQoYx91jzWPDehVZvMy3SnLqAAbc2SBxTQGqH\n3DqRxVWaZYsKhzITiqf9rNcbG3Db6DvVSW7RpmlZ9pJwAO1IZqNc4+Uths9D3pXuE8nqd+eg\nrKNwtxJs5wBkN60tteCJiWG4CgdjVWTC8NlzSw3IVGVgCozWPdXDtO7xjap6DNR/a3Vd5IC9\nDzQSagvhIquoxzjaetRyXTGQbm+bP8VZUdw90z7SEVR1z1ojm34Qtk4yAaGBtNepuAHJ70TX\nTxoVHIJz9B61kLMPKBcAOD1rQ0bULZmK3I37TuG7pTGQCeXdtbBRhkN61IJjEql2O/sKTUbx\ndU1DdBGFRRtAXv71QmJWQkHLKcHmkBY+1n5lK87s7gaaZnLOzAnd6dRUEMgjPmFhtJwfakia\nNZCqvy3Sgdi3uMce+N8lhypqSOEsoMQ5681mterCWCglumPemWupSwlSxO4np6UhF+4uJFJB\nIEg5qt5zMwy/LelOkmR5i7Nz3qG4uYmZDEuFI60CLbNJLCJBCSM8YpFkT92jqQzckYNQQ6tJ\nYwtbowIY5JPahddCKVYbcDhiMmgZt6Zapf5BdsAkE1BqVrHp7CNWY5PrVKLX3YKsSrG3U7ap\nalrEl5IQDuK8jmnYBxuGhO4Lv9VqW6vFaFFz5gUbzj+VY73UkjJzkjqDUjSsmfusG+YkGgC4\n9wlwwYZBbkg9KY80edqSdDyBWfIxYfe2qTkU23mEe7aNwzTA1VuJHBQNkockUxpGLEStw3IU\ndqotK/BJI7UkrOWUfdx696Y7mn5x6nDLjGKSO+8pSNu5v5Csprxo2JxxjpmnC++0KjR/Ic4b\nPcUCNb+0N0ZIG5SOnekmvHKIFGSeqe1ZLl4VxkndyOMYpfOLJ5fTHG6gLmw1wttGDncp4wD0\nNMXVpJiRwoXisa3YKreaWZs8c8U6JvvIRls5HPNKxFzY+3fMHJJIPWj7cN3zZC5zkc81ll4w\n+OSD39DQtwqx4J6HFMLnRf2kVXKkfKM57/WrJ1Q3EKgyfL2HrXKLfFt5/h+6fpViO8Roz8+1\nugAHSgDp47/yc5bkjH0FXJNQKxq4YHkZbFcjFJI21i2W7Vcg1EmUKzgHHTFIDrYNa8wFGJVi\nM/8A16vtqjRRHD5XGM5zXENqEK9M5HHWrEGtfZ2CAjymG45HemI7e31SFYwrMTJj1q7Hq0Nx\nGuPldePrXCR3jSMjscBTggVp2t0YZslQyE8UAdxY3EPzBlVtw5J5xSw3O248uL50FclDqZ8x\nihwjdBWjHeFZAyvscjkirEdZvMbD5gBj7vrV+2ukQDcePrXFR6jj5y3APXNWk1gCQeUCe+TT\nuS0dtDeKsY2K2c5ya1oNQh+XDYJ65rjLPxFHJaHeu2QcU9r4sxIYkZ6VqpGbieiW+rJCwEhL\nn2q216ZY8KG+lcXp18dyqW3FuB9a2IdQaFgJGyOuK2TMnFo6GNN0fIyMdWrJ1DRY7pSV61PZ\nawJHKN824feq+pWNgMYB71VyDhLyzks7jDfMBwDioPN+Ygj8q7zUNOiulWMgDnO6uU1bRZLW\n4JUbgeaBplGPazAIcYGSDTgpYFkfA7CqzF4pyrISwHQVYiYAYC4JFLUZIrHhhy2KXvy3PpSq\ngaMFSSw9KJTxtx1HJqiWKA8eTjcv92nxsFwSNp71ArFNq87e5pyyqxI689aBEm0jcSu5uxpy\nxmFgX7j8KiMpb5eg9O5p7NvjCHkDnNACx4X5ezHnPapVXHB6Z61HHIOGPzHPFPlcSMAB7mmB\nIQA3y/ezToVZpS247jxjtTFyp44FNUlmIWTjNIoszKVYDGCDTPM/eliNzHinqzLnLZOMc0m7\ndsAHzLzmhEofGrmMsR060eZ5hwRjvTU3qrBSSSckU9MMADw+elBTF2mSPI+UCkaQfL12+1Sc\nlWAPtimADYAuA3T60FjfvZPPrirDKxjToCaj3+Wm1mx6miWYMqnBBpIQ5QWOQcpRvTcCSQKb\nG27K5Kd6esfy9Bmn0EIrKjbgcj0prt97fw3UULlW2YA75pwILMXGXxn8KQDt/wAvDZbjNAO3\nLYz2pu5lUFfvdqTLKuGO5uuKYFiNg+c9CMCo4tySFW5IojP7zO3Hy4H1pTuIAGd3c0wF2hNw\nc5VqPJjkZGUY28GlkXzJMheAOfTNPYBWBBxuH3e9UibjJIdx3E5Ge1I8hRdhOe4qTaGYLnPH\nNNk/1Y/LNaIzZCrDcdwOMcVKzOqoOgPXFIqnOwEHj71CqGX5jkVoiR0eNoye/SpBIMMuNq9c\n01QqqWxtXHWljI4xyW9askVGDbSp3mpdwO5tvzDoDzQrfKQMYxSqw2KV4zxVIljFYjDcsx5y\nanViy4zyRzUYYtIFPAHpSqxRT2OaYIeu6P5kO7PGKmC8Lk/P7VDHJtRj3xUkc21VJXJA61SC\nw/n3oo+3D+4KKYWPvtTtU5604KBhvvU1ccE8k0/nHHHPWvhj6cYVO7g8elIyjOMU9R1yc04d\nOnHrQA3ylB9qZ95gAMVL96k2ncO1ACglj06UdetO3fNimn8zTARvl6jimqp3HnjvRuPOeRTw\nPlz2oGNwNu0UoXgZGDRIvygigY25NUIaqrv4GaTy8cngU9WX6UrLubml1FZDW6fLUJhUNuIB\nOc1YChOSeKY2CcdaBkMgEhwVBH0qL+zICSdg59qtn5cZGadyvAGDQBmyaNA3GwVVbw7DNldg\nA71vBCTkfjRsVcgGmBzEnhFeQuCuOBWJc+DPvOycA9K9B2krjBFDAPxtHHY07iPK7jwIHkDM\nNyAfdFZ954GhkyXt8p29a9hNvH3Wq5tI2B3KKQjwm7+Gtvcfei2nt8uSawL34S2LSbltVY9T\nuXrX0lJpMNxjCAEd8VQn8Nwtztx7ilZE2PlzVfg3bXETBbfywfTIrjdS+AtqpJWF1Q9cEk/r\nX2TJ4Y8zgDcvoRWVdeCVkY7kBGc7RS5SWmfEN98E5rdH8o74c4DKvzD8KwNQ+DN5GpeMMEHd\nxjJr7mvvBCySMTCFT6cVj3HgMSOwMS7cdMVSQ+U+Epvh/qNmzh+w+8oOPpWVP4Zvlbc0LYx9\n7FfdV/8ADaK4jKeSAO+BWXdfCe2WFNtszfTrQrolxPh660S6YgJGVbHORTH091wrLhh2I619\ng6z8HReyZcAD9fxrkdT+CaCPbJAyBujMd2Peq5n1FynzOYfJIwrDnmmTIFJI+c17nqnwVkjY\nFF8zseSOnfNctefCOe38zyDIO/zKT+Ro5x8pyXge8Nr4ltNvyoxxzX0LqCmbSw/U46/hXjWm\n+B7u01m2bY3loc7yMDNe2Tqy6LtHVUA/ECofvGkTybWLYbnA+9uzXNXTMoII4zjmuw1uMNM7\n4IUdfrXKXknlqSo3GrQM5W+A8xiw+XPFZ94qNkDAyK2NUjEyB9uw5596yJI3k3hEz6tVpmbM\nJvMaVVRM4OG560khK7wdxI6be1WGChi2SrDjFVpg06vHvKMw4Iq0IpOZGB8x85/iqvPs2kBd\nyL+pqYRrBGAckjg96rSKBKcc9xV2GRb8IrIcs4wRjmmtKI4SMbVXgr3oZ8uABjb0A601nZcN\nt3+tUhESvuUkLhscdhUDkliTGpOMFqkEhGSgy+e9OePzAWT5CetUIpD5iEYZ5zupJBtIBPOe\nFqRnMMh4MkfTjtTGZTMRyxxxSATcBIyruBI5FIvyyfd2x7fvU8MQsasQrMceppkmYmUNzjt2\nPrSGQGQx9PlXP3aWTczAZwW5PtSvt3Hcdy9QoFMkcepWgTCRmK7d3mKvZaT59gJwccmnegTr\nimRqYvMJJOR07UwI5GEnzKuFU7iad5nmAlwG3d/QU7y2SLJ4QCoxbpJMuBgEfnQMdt3Mq7tq\nr096btO4sTg88UrbpCBtwoPfvTplkjkCHDZ6e1MWpF5MhUhX4xnNIo3MAACQOWpXWSMg54z0\nXtQ8YVhx83UkGgYiq7b8rwvGM07zAsfyruNRiN2G0kknn2qRZGKgsnzDgUhdR0iBiUzggfMp\n7GqwJZ8Hjj8Klz+6Z25YdWpqAfN3YDIFMY4IPLILcDnHXNNDPHHt5yeaXaAwz94jOPSj5sEg\nEf7Rpk7Ea49fmPrTN42tH949STT3wrE7cnHT096apXzNwG3jJHrQAYUxrxhaVn3yDcPlB+76\nUnmGR3x8ijp9KXIwNgzn1pagRyM2xwx4z29KeZV8vEYyuAM0jyeYuGGH6CkVVZPLPBAzTHYP\nmCLnr6mjlWPcetIDuYEnIxgCneW21lz8v6igNhrK3B75yPelDB2OfkHr/QUjEj5TnAGKaFPE\nLrxnINIQcsSF49805c7ihYAKM0zy8ghhgZ4I70fIw9SODQA6P5pODn2pGUsxUDC45JpV+Zg2\nMfSmqTIzEHqKYCbQy8HayjihfmUHo2OT2pjrmMLn95nlh6U5V2LgsCucUDsCsGXHGAetHnEn\ncwwnTdQWDJlRjBxSbflYvwp/nQJjY8tMQ3Hf8KXcJAGycmlVnVlHt92mgKNoXhiOhoBEvzcc\nZWmCORd/7zcOv0pseFbYCSD1Y9PpS4K8Z+XOPpQUMEY3AfjSMwGFxjnk1JtDbsc7elNZldAW\nOGU5FBLEmUSQ7EXy9p3cnrS+a0igbcD2ob95kk+5FJu6lVwvqaoQmSy9ec5Ip7EblUAZb+Km\nn5V3jkU5wMqAeeoHpUgIyhAMcEjpSouIQMfM3BprbnddoyR1zQzbcsw59PSgNRYwd7I38K8G\nm7v3akJu7GlLtkMTuwOlOWT5QQdoPUUAQ7TtYHjnNKcvGACQ3WhtyqQDuGc0rZB5/CgAZjtD\nMctnmg5ZTheDSSNtjXC4OcbqRkOFGdw3c0APDNwFw2RjbUPCumeQD6VM25kyuGPUsKTBf94/\nJ7UAIdgzs+dSefc0qDoBww5ye1C52su3aeuBSbn8vDDCjv3pgJzywIL0+Nn27CMnr0pgwzKd\nuBTmZ+H3YAOMUgB18yQHB29z6UjYV8AZB4zS58xtwO0ds0kg29xuHJFO4hqgqvB2ntmlYMIw\nxXce9Ml3cDHzNyKWPAXe5JOcUhirIVbaQAT0PpS8qcMMf40i/MzluRim/daMnnNPoBL8rYTh\njTWh2nGcmm7DuyGAYnJxStgg7WJx1pDELZY7slu1PUngDBbrTNp6j0pSu0kbgB0zQMbIuSRy\nozzSNtOMDOP4aWTdnbnjGc+tDKWbIbHHLUCBhuXJ79qbI6qoKfMBwac/3sk5GOAB1prgr8xx\nvoAGVNoIG49eKV9qsMYIPbvSttUEr3HWmRsoYPg7gMZoAf8A6tixBZSKRVHltuBK4yKX5RFy\nxL0K2SWUEDbgfWgQDAjDdX/vUiyFhu29eKF+aM5XODjrjmkkPl4Vic44GOBTGLtG45G1gOT7\n0FfLXL4bIokVmkZiu0qAOtDRlcFv4h60CBsYBY/w/hSKu3BduO2KWQjKDblT19qZuBxjn0FA\nEqnPBbqewpyqFPlZxjmkWUtjAVPXNA+WQsTkUFEMzNIQAd4HApQwfgcbe3vT2kG1o9mD1wO9\nRrz0XGB0pCF+ZmIIwcZyDSfPHGQG3FuopSTt4G0HjPekUH5sOSR04p2ENChcKOFUZpVBC5IG\nDyDTFYMuT97PWnSMCg53DOOKQCYPl7mxzxxS4yBleM44o2+WmB97PFCb9/POeKqwBhSTtU8H\nGaXcWZHXgEcUm4rnnIztx6e9AUSDbtwF4BHelYBFyqsHGSTTmy0ZAbaQM4ojyx54xTPLZXHO\n7vmiwD/M+VSF2jvn1pVXzGz909/Sm7WJwRuHWhdzcEbQaQAzhMqV8z0ApoVioz1BzT8qrcgc\ncfWkVivX5c5wKAI2+aQ8lB60gwuTkke9P2nYAfn3Gk/1fJG+POD9aAFLI2Dt2+1JtPAHyjPW\nlkbYmScg9AO1DNuIJPyY/GmgBVHmY3AikjwNzZ3EZwKRcAYVTkng0vG4YXaaQBjYoOM/xZFD\nbeHYYDcDFOaTBwFpuSVx1wc4oGKF+baBtx+tK6lQg7DrTXAlZSH2kGnSZMnHpwe2apECBtjk\ngjZQ/wAy5T7wNNZVZQXbG7pTtxaQbeFxjmhFAoAJfp6ihV3Y468Ck2j7vcdfenL8rb1b5fSk\nIjO7OSOAcUhkaONmJ+Qtjb/Wpd+5eOM/rUYVvmBHFFhiK3ynuAaMFeA33qk2ltm07U6MDTWi\nTorZINBIm4gtv+YnqabDhbfOSzbuBTvvbu1IAflweR0oARWLM7EEDpTnwzIDwccULI3PGQet\nIcsoO3BzinYodJsVdg6jmmNIGUE4IJxTtoOT0IPTFII8AnGccgUAOUHceOlN4VSWPOefpR8y\nupPORk0rMCoz169KAuKI1fhOF7bqbtMbFOjdaUqeGHTPSiRg2WY/OeBUgDNuKnG003bt3YO5\nSelOKNwucNjIpGUiNQcAZ9adwsH8O7p2NIWEeQByeuKedqqV/EmhfmGfbpQT1EVl+VgmWxSt\nIfKIUfjTB91g/wAo9Kcsu4A7c/WnYoRlRlWQ9F6imSMkjBlyBkU7a0UjhVyOoFJHu3HA3NjN\nFhDyduTnPOR71FGxfcGH3qc/zYycg0qptUhSEHr1oC4m0+WM84P40BvMbJfnoBSKuzJI/wDr\n0kke4hug7igoVVCnGeP73vTmZpoztACr1NJ8v3sceppWxHHyc7j19aQh2d20AcYzmmZEikE4\nzxu9KVA+3JOCOPam7jtAxkngetMlht2jyskY6D1okbLAY2ntilVj5vlkcdPxp21l+VSDg4oA\nTgZYdaNp+9njrTQdzEKCCwxR/FtPIUY4oAXq2AAR1oG8L8y5Hb1pNyrGGHUcYFKjkjJ5A5Bp\nE9RNw+RlbG4YPsac23dsDZBGCv8AWmqoA3dSRjFCqN24j5sYBpjHfPGflAZQaPmmUtuzzSKz\nFgQvsajWNpMhV+bPrTAfIrbhiTB6ge1PkYv0B4/Wo4vmzgd8bqfuKhj+AoBCjeq7uhPFRLJu\nfaRhf506NmPDZJpFVdoDAk5yKB9R4bqFJGeOaG5Ugfw9aTd1OPmJ7051G3JIBPB20gY1TliW\nfnHSjzN0hwflC8mh1K/KcEjv3oXHz5U+mBQgBMLvUEsD2oVg2APlQdqUsVYMOO3FISfMwOeO\nlMBuzp5TfMvOfWnj5YzgbnY8+opNu3AU4I5oxw248t0NACeUed/KgcYoHMfTg9BS7QsZy2QO\nMgUqM20kcqOhoGCjdwg6dWoaNmVXVgcjmmrxGBuwpOTSLGNhwOh4oESg7vvcYFImGjYlsp0H\n1pp4b5uTil8tQuF79aACQbSmOGxg0vy8YO4dDQzdFIyR0PrS8LyOtACbV+bknbSlAMBuW6HF\nNZhuz0X09acwLKGXntQAwoDJjIcL0yaGkZRuY5Ofu0LhsjaF44Y+tJtLMSxy2KQD87phuXK4\nzxTdxDYUY70kajywwODil2/MO4PXmgAUockcf40ZCqMnJbrQ8Sxx4UHOaVsZBHzAUxgoKLyc\nqp4FBMfzfL83WlZV27hyx5A9KaM8buSTzQAcfZwDxk5FObr9zAYY/GmBRGjcd/Wl+4q/Nvbs\nKACOP92yseQecUhIXocY6UqqPmJ49TSqo2nd+FAmI7LlSOSeppOjBicqWxmkUmNSRk9hSsBx\nvOFA6e9ADiu2RiWwM9qTewO08KecUi5aTK9x1NCjaABk/WgAbPl7t2COvvSyYbBQ5GKVt28N\n2PG3tQAF+X0OKXUYFk8vB+97d6YjKvQfNjNG5t+MhVB6U44WQnPynrTEGWVCWOMjIpP4ioOB\n2b3o+XnGSCMLTiQqZblj0HvSARl+ZdxA9fc09WIkG8Ljt7UzG8DK5UfnmnSKZFGAN3SgQ5WL\nHnBNN81SdpzspMJHkgEk4H0p7RHc0Z4YDIA5/GkMa25o9pHy9aUpuVcHnHQUih+MHdx1NG4b\ns5bPQ47e1MBvGSc8Z/AU9SRlyxVuhpqtmE5Axu+7inrmSQZHPZRTAarBVUhuEOcmnGR2xnlT\nzxSKFZmTq3UijB8st6cDBpgD484YHX9KVVGSepz0NEmNyAnJx+NKzCSTDKcY7UyWIyjdtYZH\nb60snzFMDaM4NL2YjAx0FJkspB496Bjtu0kjlW45oOVX7uMetCqV3ZIIHSiX94qgt97ilqFx\nTlPvLlWFJtVWxv3Fep9KcvzfMT8i9BSYXzMqMjHNINxFG1iqvzgn6U6FCvJUA46Gl+VlwOdv\n8VM/2vvIRyaoByko3zZweg7U5GddzNxu6fSmg9SzZTbwvoacuGZA6nPXd2AoAdvLbNqYI69q\nU4kYlwRg0MrTRyFiNm/5VB5x60qzAquSGZeAfX2oJBfvFWODjP4U6MpGxJHbOaVQ2xiRg54X\nvTlc/KQuR0oAQs7sueM8hRTl+Te545ztpnlhY3U8AkkGnx7MAMSMCkAdSwY4VuRTmmEaqGHP\nTNNZo14YEnrntSMu4DLZHXBpgKFMjBydq5+ag5Xch+UK1D7d2WGPaibbvjQ5JoAAxYqxG4Z6\nUrHzGcqMjPGOKN0aykEblx+VCqWyFOGAz+FMBvLMSw+6cYp3+s4HA60sLb2JA+bqKXO0ucbp\nMdqoQ1v3K7scHikaPYwAb34pdowuQW45I6UicM2F4HQUACllZgAMdyaRgdrsMliQMD+dOjYv\nJ0AUjHPrSKp8zaMhkHXsaBCj5Y2Xd+8A4x3pjMNis6AnvT8vsVl4Ynn1pZCVUFlG4igENDLu\nGPvHsKlH3t2whhxULMNgccv0FSq5wPM+X+81AWJ4bjyZCdpc9+K09PlLSbgN4I4XOTWdGy27\ngqM8ZGea2NE3ST71UbsZJA7VImdTo6Ffmz5Z9DW400kZIL/JtyfSs6ygFwoYHJNXrpGb5RjG\nMYqQRBNdBlDR8k9/SltZ3bKu2RVN28keWTmqy6h5EhkDZZeNtZO5qjWkkMciAnI/vU+S89OR\nn16VmrqCXEeWXGOfeoZrrawGTtY80FnR29wk2AXI449KlmkMEGAu/PSsWzvIVkVdw4GSDVu6\n1hJIieFGMKM80XAX7XyVkBQdcinyX6iPAOFNZS30bZRgS2MhjUcl5sXJXLEUtSjWN5xtTlMU\nvnbIN0jbR2rPt7hZIx0G0dc1Q1S+aT5Gk2DGKBM6GGdGj3CQEVVe7DK5D+XEO9YVvdstrgHH\nrnrUE2oHaMAlc4xQSb32oSDBbKtwOaikmEeQJPlHHXjNYw1I+cNnPbJ6VKZG5RzuJOTtHQ0x\nmhHcMuOm3ocUxtQk3sAcKO1Z8ckivjbkdqikkB3bmw/cd6YGk2oGOM7F3c8inq4aE5Iz2Of0\nrGhYbSXfzB7VHJmUjLlfTNAzQW9X7oVj2PpT/OEmVJ+UD1rNWZ4lODj1/wAabJIu6Mht27ge\ntAF+OTyF3s2EzinSXAkyMYB4zVNSGjUOfunO2nLIWY7cFSc4NSBKGDMFB2oOtJvHmED7uO9U\n5mEm9Ryw/hHWkgjaThc7iOhoAsrIxkIWQ7hyBU63AP3gQD1FOj02RYvNC4GOapC4ZJOOgBzm\ngC7cXRXAjbLdQoprSqqHe+C3tmqMM8azKWO44xT5JP3mVIMfpQBJCGkkJjOFAx6ZNSNcSQox\nKKWDYJHaqM6iNtwbaG44pVuAkm5Rlfc9aYiaSdZoyykhs8UNIWUKGw2KhA/eZU/epskmVKtn\nI7ihjRow3z2+DuB4xxUcbAbmDZdzms8SBsY7mniQtz91FOOPWi4izdXCeXtB4PWo47gKBjDE\n9xVaWYvIGYZb17UQeYw3FQeeT7UgLEjDfgSbs/pS283mFmX5sfKO341FtULuVcYPBqKXCLtU\n7Q55xxzQNal/Y7YIIYr1IqCSTavLBjUMcrbPKztPTdmkUKM5O5RwTTsND3nVh/q8Du2aRpE3\nF8nJGAT2qsivucODt6jaOMdqWTJwBjA7+lAhWkO7KH5van+YwfeXVYiOeOc0zzF4KR89pKj3\nuc8ZYHdz0o1EXJbgRshUKV6Gq8k8kG5kUbWPTFDSCb7x2Hr0qKZtzBlkxjoPWmA5W3N5eMt1\n5/lQWIYL91ep46UxXfzAQMuR0FDsfMdfu/LksTQMlkkBUkElcYBpnmAqgLmTHGTURbyYkwcg\n8/Wm/MckgIvXjtQIklAOY2Ktz2okYSSAY+eMZGO+O1RLInHp/ep6ukjOqncW6MKYCfaC2GYs\nq/XOKmWQ7d5PynjFVVQ7woTcc09pjIMmMEA4z05oC5Ksiqhwx65pNxjkDbuT+lRM26MxFeCc\n8HvTeGZV3AAHkGkBb87yyI2+9ndkUy7uAiptDOpbp61B56wM7EhgeAfSmySDYnabPfpQBaZy\nsQ/gJagSOshJ5GKhkYtJhnzuHBp0Lb2yDtXG0/WgXUv28ys3mCTO35dp9ad9rWNW3keYTxjr\nWdGzKSzBSjHHHrS+cse/dguOB6igTNOO6ByA244+6RSpIkbI+Wzjqen0rNa6dtr5BQcE4wCa\nk+0LJ985VeT6UAaqX7qy4cKjr0zk5q1DrckMZQDzeMdcGucjkbhlOIkOCSOc1ZW9Z5GLFRGw\n4GOaAOrt9SM0AlR8HoVrQt9UOwEttbpgnJrjlvPLYAsBkD5RU63iswzlQo5amI7FNYG4LJn5\nf4q07e8+0KSw+UHrmuDtdQSQGDeQ3UM3etW1v0hmVGfBJ/hOTT6EanaQ3aiPYnrkVpx6kZI+\nPlHdq4lroyBxHk7ecngk1es9QMnDcMqZqgsdva6oVYODjA6jt710lvqMV1GhYEH+9615xZ3B\n3RlSSWGTnv7VoWuqzRzYblSOgPAq0RJHfrdxwt8rgEnpnpXQQ36zQJlwD0615rHdO53FgwFa\nmk3j3EwG75l561tGRztM9DZnGCGyRzUtxbxTwh5BnjGawbfXllYpIMMBwwNa1nqKXFuFOGFa\noGjntQ0mVrlxAhx2NU/sE8LYZdpPHNdnGxWbpgelQ30KXKlCo3ZyGpbiOSjG3dt4wMY9TSMz\neYBjcM4NXLqz+xzFieDVVvlZiOcnOKAGmN3Ztv3KjMbLjPCmpPlCledx6U2QOyjf8oHb1oEM\nA8vJDbiTjPpSea6jaG3c85pGkTaNq55pcguQww3aqAkjLquMZOfyqTzxuHZv51AGZMheGJxU\nuSsgJAyKBoeJTt3dEPGfQ09ZAq7MZYelReZuUupyM9B2pY5BuRl65wR60hsuq25cjk4pN+cK\nG2mqu798SpIGaeGCtlic+tMVi0kjBvlPTqaVTub5upqux2tyfkI4p0NwJG69BQVYsINuRuwK\nkKjcOeMdagj+bAIx3+tO+71ODnikFiVQskJDruFO3AbePbFMGVOSeGp53dR2oC6CSQsxQABh\nxTDI5UK36Um487up6UxZWb93j5uuaBkqyDdhxyKFZ8nHzk9fpTROeoTLdKDMqsM8Z4+WmInT\nnPHI6GmvllJPbpSr5a/KGx360rqioVDDnvmkAsLDIBHakaWQk8YHqKapMY3E7j04pRIRGWPf\njFMliyN5eNrbuNxFODD90cEEn7xpZwvyYOOPvUm19wIG5B/DVrYPmSNIEYrt5z1okVuVzgU3\ncm75l5PSkP3SDzVkMVY90fHDDpg0okJjPyj3ohIhz9KVpOMx4YN/DirRFxV2sMg7hj7vak3F\ncE9c0sjbBlQOnOKaPmZXPQDpWiJJnQKQ4G4NxT0UMeDtUfw1HGxOVI2q3anlflUdFHFWId5O\n1ic805W3HLcgVCjSRn5eVp8eWZhj3oEWV2mQEKCpHIp5UDGB35FN27FK4wQevtSNI0Mx5+Uj\nqaAuLsj9aKb5n+zRQFz9AFj6dqeeOpyKXyztBPFGB/8Arr4jqfThtG3OKXPyD+7RztJ603O7\ngdKYCrtb2NG7IPtS/KMYBzSR/eO4cUDEX5sYGM0pXYAT1pfcdKHJbr0oENK9STxRu2rgcilV\nS3XkU4xhWx0qkBHgtxjilUEEDqBTwvPPSnfL0HWmBG3zDnAPajcNuDSbfMTI/Wl27VAIyKkB\ngUFs4JFORS3bGKeinI9KfjdkZFPUCEj06UMdpz1p7D9KZyeSOKQCq21c4pV5XkcikWT1XFKu\nVXFGoDo2JbNN6swoZSpPYUi5/GgQcdzTRjOTTztZTheKML2o1ER7+SM4zTu/IzQY8kUoU4Io\nYIReOD909qY2FbheKftJ4FLtZm5pgQNGsmQRlTSf2fE2NwXP0qfAXg07qAQKAKc+jwspwoqn\nJ4ZgcFghBx61tN8y8DFJ82OaYHLSeEQysQAtZV14LRVYCP29a77+HmoZDucYXigbPL7jwIpR\nlWIc+orKl8AhTlo8nGOK9l8pZBlkprWEGPucGkJHgsvwxgkk3PDxnONvGfWuQ8ceHv7EspCE\nVY844+lfVTaPDKoAAPrxXkvxm8N+XpbGPaDuzhuuMdqa0GfHesRmRHAyD/dNcZccMxORtNd9\n4ki8m4l2j5c1xOoQl4mJGGzmmiTmtVlCzbXdSuMisy8UeQNjFe9aOpQtIQVAc56VmXPlsuDk\nr3xxirRDMe4xvyEOW6571lXHy7wx2y9hWvMjLId5+UnjFVLwDy3KAFjxg1oSZqudoUgFyOn9\nar3EnlQnadxzzgVckjVtrFQpUY/+vVa43Izc4JGcgVQitMoZFIGRjqO9VZFaNtiAr+NXJA6x\nnOACOvp71WYmPDY4xgv60wuRPGiSAKcEck0Sboyu1gcnINP8seYjEZQnnNMaILI+eEP3VNO4\nFdHE8jYc43c8Yp0vlbWD8E9MdaQqWzt4buaimCjaSCTnmmGozeUIGwvjpxSzZkCo6HGOGPSp\nNzLkn7voOtQMskgWNT8u7PNACsWVgW9McDpUS46yYKdmp8jhnBAPythgO9Mdt0ZyvDdFXpTE\nMdyWBC/TFSbfmAJyrDkUhjbYpQhl/vVHtBG5Ths9KBj9oVtoPB4AqPa6hW6N3qT55Fy3HcYq\nLjco3c9zQMJCNyiTgZ5FDEnJ3AKDxmkXEu92G4qePelkwWU429wMUAJJnbtLHJ5zjj6VGuFA\n3Lg98VLJEjcsSr5654NQtKY3xjdzjdTESLMI7cnGXDYC0M207nwwx27UjKUB/io3Ksi4GUIx\nj3oF1GMP3ZJGI+v1pZPm24Xg88Upk3I4PGDjFLHtUKX+XtTKI/mbLMPl7UbgORktj7vpSqzK\nvHz85pvmFskjD+uKkmQyQoQNqnngYprgvCoI2tj71Sj94oKrznOKPlz8zf8AAaYDcndtIGSK\nbtUAAryO1Iy4yCfmzkGnMqqUGdxP8Xr7UwDfltgXP+1TZG2gBRl880/5d3A2imtu2l8YIPFA\nxjFdxZUwemKJCSEcDOOq0m7ozDIJ+8O1SKoVmXG4kfnQIh3llO4sOcgUrOxCZbBYc0shbydg\nHPr6Um3dtAGSF5oEL80bcEEetAwu4Hk4oYbmjVRtzzQxLSDBz6LQAbBtAByepApgYKzYTaw6\nGntuj3FFx2NGNvAGGI5oGRodzEkc0sapuMf97qD2poj2sD1U9aVYlaQ7SFGMgmkAsZB2rxkC\nmMdzKFGOe/anMq/eH31GKR23KgbjPegBdzCT5eRnApnVdjfLjijafnRVyFPBFKqllOc46k0w\nCQAMqt8/0oClFBJ43Z96cykrkdMcGmMQydfmHNAIcx/fMVXIPXFG7dk7fLPTNMZ285SowzcY\noVXZieB70wYhRCSpGR/e96C3zKzfexg/0pNyh1DHNO3Zw4IY7up7UCGhd6OrZHNPZf3aheSO\nBnrTS2xi7HknAamhlXLbiWPH41LAczHj8jQjrLJ8xwnTFHHCqdx75okj/dknhs8CmArNtDMF\nyOmaPMU7dq5PvTDnO4gxjHPenbmk2lF/GgBPmGCxwS2DSDdxhuex9vSnttbIP388ZpuDgZOF\nHFACRyGNTLtOQcA+lHRufkB5x1p4YNtKjOR07UzajNkoQBwPrQAm0qNiDA7n1pWBlYAdhxTS\n7SRGIDDA5zTmQ+ZHJnauOcUDEVssxwQQOCaVmBMf8TnqaTOQcDgtimc5H8Kg9aYD2UyZL8tn\nApzKSxUjjHIokkJ4C5b0pse7czM2AB0oYCBQ2dsbYx1pcptXKljjl6N7KpJbC9gKDKnQfpSJ\ntqJt3HG4jHKsKY2DyDyeNvvUw+ZTgYP60nl/MMgAj86BjXUrIyEYA4NHQAdNtPBUs+Qcnksa\naVdWzkHcP0qgG/dVQw/4FS+WI1LD6UseVUhzkdPpSKojYq3A65qRkcak7gx+9xxT/LVdu7J5\nwKRsNjacjPWhsnG04OaAHK22RgeW9+1N3fK6gZOO9OaQhmymSRjdQp46dPzp2ERk7jgHbx+d\nKd2F3Y9PekXcy4KdDkNQf3km7tQAseFZuPlA4HrSA5IJGDj7opWOSQy8rzTTGxYSZxkfd9aB\nijg8/KPagZjJ5zn1p2194woAAzim7i7AsMMTwaQBtJUgHIY5pdgfa7g4JwDmkVTsffwc9aTI\nVWBJxjj0pgOaR5GdmG5c7fehoyuFwB3JpGyIVJOT7UcMFG7hjjPpSAHP7shW2/WlRfNIA6qv\nakZBucA7scAnvTV6AjgjgkUCF2MxWTbjPGKSQ469QefWnfxgRngdSTxQcsTgBiepoKANukDI\nQcdTUe35mbPLHrTsqpKR4HqDS8uMHavsKBDdvBOMkdOaGYjaRhQerUgbb94bhmnoQdyFcg8j\n2piGc5AxmOkZVjUkNljzijzHkkxjavRvQ0KoZXUY455NDAMhVBb73U0nJhLYwc9aUPsZQBlW\nHcUrSboyBwM4INACbVGCn3u5NC4HK8DuTS7vJUADO7jFDRxxspJIOOmM0AIFPzMDuXFCqV4J\n6jt1pOR05Y9gaMHcQRk0XAAuSRv9iaUru28kD2obHZMUfOuEB2t1pDEZFB+Uhznr6UrKH3MZ\nNu32pF3bSCBhjk4oaM7jsOU7r60wFBLYOQPQCkZRG2Bgr94/WnR7pJMn5do9O1RMq/eXkZzS\nAJvmbAU9M0NgKhPBbinBhktjluKRR5ZBOGwc4Pan0Ac2Q2QQSOCKay7VLAgZ9e1Cf60uF4br\nmk8tZOpJAPIFIRJyAhBycc+lM5bccbSKNvtgdhRyAT1HSgAwvlqNnI5LUpUmI45GenehY95+\n98gGSKYrNgEj5c8GqEO3ZKlhhfSlb5+F6N3pvliRSd+Sv8Io3FcHAX/ZoAco+VjjCjjPqaI1\n5OcdKbGFZSDwM5FKq8EtlCvQ0WEDZO0Lxg5+tKFVmOeO9CrJHht6tnk0gbdknkk0D3BlDeWx\n5XPaldVMh25Cn9KaVO5gCNgHSjnaP4Y+9AgZW3BV4bHK+tNaT5SCCGHH0pydG3cP2PtT1B+b\ndxigCPJ2rjhG7+tG1/NIHJ9aRenI6c0sYYsASBnk0ANkkIbIGT0NBX5QQcetOZflJUZ57UpV\nCrDBAxzQUCljIqqMqvOTTQzKNzFSCcAUKQ0bKTt2nAJoaNU3YHSmhAjDcST7E+9Kqjeec4HP\nFJ94KxGB0wKSSUh9vT1NQMXyz13EE9KRs5QHB2nmnuzNzjtjH9aT5eBmmAOrlSRyM80i4XOD\nkAfnRuxC53c5wAKVW4ztwR2oQCCQ7d38HoaVi7YCr7n6UAsY2JGA3ahWPlpjg560gBleR/lO\nQetMkXaAh+7nkCnMCzEY59c8CmrlmCqMsPypiBtm8LtOMcGjlfunkigEqQpxxwc01lBbHICn\nBpkj1ysYBFIWZsnpxihvmjPYZpM7Rg53HjihjFWM7wo+Y/Wk3qrFcliOmBzSkBYyuPmBzket\nAIkYgDMn5ZpgG3jczBYx2zQzFTtHJPU0jckxkfhSncrfwkjqKnUBFZt+QQqj1pzPt2ENz6Um\n4K2SPl9DSFj5gOOTxwKYChH+/wDwj+KjldoAyW9KTZtyob5c5x6URttPy9aBXHKpXewIyo5o\n5HXGMZpjIGU9SGOCRTygMgGMoKVg8xFUkFlHDdqRshVK5c9cCnbSrHPQ/oKRZWgbKjAPAFAr\nitvaNcDHc0jbTlSdp6ikWPLHc+znNKxAb5h5gHpQFwVtsYH3mY447UpUo+ck44xSD5Q2369K\nTdzkZJNAx3PBzt9qRZNsZ7tuoVWwcx5H97Pel/h4A39xS1GEgJwrLtJ520gwoEikfSg5yq+l\nJ8q8jnJIqvMByq7RtIxyc88dqVhtBbdgnkU3cqggluKT5dw2k7cfxUwFZcqCOh7A0bm6hMEc\nE+lKpWNcDkg5pq72zjg9aBDlz1C5WkOc5xlc9u1Kq7v4tvrTVUx5UDIzk4oAVWCq+ByTgUrb\nmxskGAMEY70hVSwZl46ihdzSEj5U96ChPMjaMApll60uM5KNxjIpwyrlV79OKjxujIPyFWoC\nw7d8w3jAx96lbO3GMCkbDY+bj+6aEwwLEY7c96BDkY7STzjpQ4KgEcr7UEqq5A2sDRv8xsqp\nxQA8n5hkAHHQU3ncOcHGfpSZXaSTufPHtQ2W5HHr7ml1Aa0arDhzlyeaAoVRz81Cr8/zEZ60\ncqxPAPWgkcyhWUAjgc01TvZgMgNQzcENweuaUqdhYcLjqaEMaFKSqwOWxilVR9zdz39KP4eP\nvY4NNZiYeBkjvTGP3ZyqfK2KRgcDuMc02RnEyv8AdyOGpWyVZ2bjuR/SgBTt44zntS+YsL52\n57UhXIHdSOvehiVOB9wcAd80ADFdw6+tG08sxwcZzSq7GRUI57mhN4Zmk+YZ+77UANPb+91p\n3bDHd6ZoLMJCxG1McGmkDcPUjjPekAsbMoIZcjPFLllGJPlYnim5fADnacUeW5UF5N6UgEXK\nsT1Ofu0sxLbSOOcml2DzCQPmo3GSM7vWmAsg/dgng7skj0pGkDHA+YnoKdGsTbSfu9+eKjaM\nL8uflzncPT0pgPYs0flFQpzxTP4s4J7gVIqluTx2FHmK28kFFHBb/CjoMaudpJPHpTlKbRnP\nHJ96Y2PLBzkr1pyqzruC49PcVAhVyrAKe+4e1JwrGRNwYn5j6/SnMzLtULyepPpSruj4bB7r\nVDGqA25sEHGQKFwrblByRz7UqMzMHUbefmP9KXlywPD5+77UwGMd0J8sYBPI/rUqlpJFwdmB\nj603cy5ICg919qWL7x5+bqqmgRJtO7P3V6H1qMAMgVOgPANAblZM/PnBXtSs3lsxx87dqYhQ\nWkY5Crt9DmmMHjyT9c0rKd3ytgY6ChmCxjD896LiDptOdysc5obbtyQSc4ApWjZkMijdjkUL\nvaMMxBzydtIYxshTgYJOTTmjLRqD8jbs0D942UHGORUiq8mA45HrTGLH+8kkO3aD/Okb720p\nsHdaTzBkDfgg9RSbGkmIDYbrSAVldZGIwPlyfpSI/l4LDcrDhaajNkj727qalUFpAyrkIKro\nAqhWx5ny0LhNyspIPTmmyMOWJz320+NTIM5+6M4qSRcDcAeEJyacsa7CGXgnhqjZtxDY47r6\nU+NS7FhwMcc9KdxD9xKrjKsDjB605VddwzgHnrUcYcKUBw+Ovan8NHl2yRxkUagCg7sgfWn7\ntjbjyvSomAWEMNwbPen7WPJGT6dqYDlYurEAE57+lIhVtr78cU4YjYKcHPpR8qqVePAHC7e/\nvQLqCnncg+VvuilWQliXYcdKarNGCDwe3tTtoGGC7R/F70wGqQy/KMAUj7kJVT8wXd+FLy0p\nAGARml2hiwHXofpTFdjSCoBLbQ3pQmOhbgGm7kVWyvyg53VJt2nYcFWP3vSi4/Mc/wDrFKuN\nrccVH83yhsD3ApVUBvKz8wOaVmLbSONp5HrTGI2GT7pJz0pQrRlucZGTQxcZYHI659PahWMi\nY29e9AhnzqSQrNzjAP61JvO7DjJTgH1FRsWjaMMfvfMcU/ADPI7ZBpCEZixjxwrNRJIyXTKE\nz2yTSNjg7uP5Um1G3ktnYeG9aCibzNsgj7rg1uaZdC1iZjwWOfasSNo2CsPmY/eXvWrDsjjV\nW4z0AqSWdd4buM7zI3CnIwe1dO1/bw/P5e761w+lr5Y3Z2j0HetvUr1JLF9o7YHNSNIgvrj7\nTJIUXaM8e1Y00m2PJyf73+NNW8lATP3ehqvqFwkbAocjoVpFlyG+T7gb733TSvOGbktleDWK\nsoUP2HULVq11FGUK/HHSoZSNLzGaMdsd81EJHBypII7561AlxEYy6H86u28Ec0PmjkKOaChV\nuo47csSxlY8KadNcIYAVkxJ3B7+1Z1xJs3MW+lJZwSXcZMZDFT8v19aYE4nJjypI55WpVMnV\n1LFec9eKz/thjbDgMc881r+HtUtGdlnYgscHI7UhFKSTPmKAflNS6fYz6tcGGEfd55qHW2ij\nuZDazb1Y/MfQetN0fXJNLuHlhcbyMZPQ0rDLOp2M+h3CxTqH3cqVPFVJLh5mOGKsP1FNvtYm\n1S58ycF3X0PH4VEZuisNjNzmqEXLe7MjlZHwQM/L6UxpF2M4O7PFUoxtY/3j39asrGVhGPmy\n1Ax8lwpjG0FDjk01XKKHzuJpk3zbB26H60bX35ACheKBEk03yggZLcMaiVQzjJ+7yKYNrMyy\nHHP8NAhA+XfgdqWoycyIWAifcy8M1OnZ/LbnC+oqJWSMqcZK8HA605d8kZJHyk8A9qQiRSWh\nyODinQXEkBEoXpwKjk5UEdf7tRx3EhZgnQDpQPoak2sSSQ4Q43cGqEn3WIlBPTpTF3iPn7/X\n2qKP5lyw2tnp2FAi0qmGfDEYA6019rMApzznio/OVQTncX60o5lBHDL0X1pjIZnJKMBknsD0\npVaT5Rt+XP3qJGZmYHbGPX0psDFVKFThRnJoAtRKvzbmIPqKjYI3y+bj69ab8zQkhsL94g9a\na24qowvzc89cUxjGb+FW2svWp0fbxuB4z7VCD5jMpXJ7c9qTcPJbouPXvTEOJeXLMw2g447U\n5lcLvWTA6YHeoVmfaMqsYz0qZVBdt5zx+FIQ9pWRlJ4HWomma7YtwqKeh60kv3eG38dPSq7T\nFGTpkd6Bll5kXJReaiEm7LKNnr71Ak2N25gG3ZqeMGZjuwgxxz1oGhxk3ZTcQCOMVJLCFjBE\nigNwSexqrNugYYYbcY4Hf0qGM/Kd4YrnPPrTJJpncyRxRksFP3h0qeWUSHY3y/TmqiyBYZDI\nhT0wetMabyY02HLsMgUagWnl28HaeOhqBMGPDnA3ZqESNtCrnLHnuBSLIHJIJ3DtigC3JKXU\niOQcHsKjUYzHkluvPeoB+7j27sSbucjqKUXBjY5bJ5pDuPkzIy7T908ila4CyMFO5upFRr+8\nwcbfpULQN2Hzk/e9qYiTdIyK0n7sMSAQM1Mj7W2KFz0OKqSNt8rLmPnhutDSAl2yVGdpPb61\nQtSyz7IeezetOuZD5YzxxwtVUYLHtz5m3g+mfWmK26TndIvv2pMRb3jy1B4Y9aTzdzM+NqAb\nef51XW6bygFCt3+lLJIZkjYkHd0A7VIx/mH5s4OB1xT2aORACux8cZ/nVeVpNxEe0MOOaNxZ\nhuHGP4e1MC0zbY8NTY+N2Hw687areZwrr2OPmp6zK2VjXIblifWiwy2bxpFCsFA9PWiGfLHc\nn3DkZ71VjYcnOCvrStGJgXDkADOemT6UyWTtOZiQy5Gd3sKiacNg4xzn2NN84LGCT94dqbwS\nqs4K46+lAizcyPIiDHljHI7fWprWTy4VbG7jG761VF0ZtrAZx8o3U8v+7dV+bAyDSDYtxjav\nl79ozkN9aJL6TG0KAOhZjjNVI5HLZLdRhabPJMvyud/fgUgNCG6dV6DzivDGren6gIlIZSXX\nndisVXnXaxKjtg9am88p+6Iw3881SJZ1NnqrSKw3HBOM5rStdQdYz5XOODXFw7IZC0Z2E9Vz\nnmpo9UEYGQ6knHXrTEd1FfXFqFmRjjp6gVt2esGZvmAG1eWrziz1nbA64bbnAG7qa17fVGCH\nH7sDrnvWi2Eeh6fqTSHDHbG3INa8NygkR1fJAzlTjjvXAWerCTbulEfGB71pRXjJyH+boMGq\nIsejx3iNsCNnPIFadnrBt4ywGRnBFedW+oeUyOWIC988V0cNydqsBkNzke9aRkZuJ3djqpk+\nYS7hjha2w6nB4ycV5vp9zJaXCYbhjgg9q6qxu903zuCO1aEG5qFjHPsI/lXO6tp72JaRT5kR\n4DCugt7jzGZQd6KM06eM3MJVwCG56VQji4TuAZXAKjljSbXVdxbI6gVf1bSfJkzGp8r29ayn\nmcSeV0YDPtSAczP5igAHjpT9xeVcjcy+lV03vJnOTVlcLyOD0680xjZJFWYDBY5/Kn7huJxj\n2poYDnG4g9KBMAzMEOakYqsBkp8qnqKk+XIKHt+NMb5l+Rfmbr7UiqSAQcFeTmgRYHzAL09a\nkUFjtBwvfNQbXZmYH5c0vDLsDYPemKxLnOTuB2nHFAbaQF/i43UyFdqEKN1TAny1IAAHOKqw\nErsyqg+6wHNSeYTGN2PfiqykSLl+W9aeQ/ljB6VA/Um8w55O4dqk8/p8u5e9QrIJHAHXHNSD\nETEOeP71MLCtIOuMMTwKjkkbczMNvYYpWmXjjJHIamNL52A3Hq1MYzzH5Ctn3p4VGBcNnHao\n2AjAZRkd6k4YZTA7kUCFV8DcBlu1THZ5oYj5sDK9qrhmjIZsFPSnRszIxHDZyKAuW92Q23nA\nz9KQyLIi88VVhlLbiTj3NLGzL/8AEmkItEr8qYJHXNTbjyQ20Y7VQhmKyHccgdqnaQHBVsd6\n06Ekw+bG7IH96kZfMbAfA9ajZt0TE8Nnikjj3YZWx61USWTrIqsqs2QeAe/5Uqx+Xuw3IP41\nXXElxukGMcD2qVm3ZK/NWqJJfM6EDg9aVmGchc1DuaRgVXg9anUrIwBOGFaCY7aZJQ2MECkb\nCsCDuHTFBiIULu+bNKcRqExls0akkiyFV+b5B0p1sgLv5ZzkUzZuU55I7VIqsgQgBM8Z70wL\nJ3MoB5akZdykNyo5+lMmcoy7T9aGy20/dBphqP8Asz/3hRS7D/z1ooFqfoIyHb1pu3gA09ct\n14o3bjgL+NfEn04xpNinApFyvHc9aftJ4IpBG20nHNAxEOcgdadGRtIx0oCjaP4Wpdu1iM5H\nrQIa3TIPFAJC05gOlJt28YGKBjeRyKl4ZeTzS+WNvFI2No4xTEIyleetJ9RilOWIHanPhzTA\nhGOnSpPvLjNA2luRxSqNuaAE53cCkUH0pVDDqOKbnbnb0pALg7SaYW4xjmnB84B6UMNzZNPq\nAzJfkDGKcDuwTxTmUquAc0iqO4zTADubik2+1PZeeKGBwOaRLGDsppxUZ6cVIFC4NNyBnjPN\nA0MwMHFN3dPUVLhSuOh7VH2ww/GkOwb+TjpQu7b1pP4cdacVKjgUAIwGwjHNJt4yKkVSvWm7\ndueKBCj7oFLtLAYOBTY26ginIu33HpTEDLuH9aacL71My8f0qNccihgJH23Uqx5Y56VKq5Tp\nSdTzxR1ASNjk8e1cR8YLFptFLKgb93npyOK7yNd3fBFc/wDEKESaBKc4fGB9O9HUD4M8VW5S\n6lJ+VD+Wa4LVkzGdnrgmvVvHtoBfTBuCpOPz4rzDUlEhcEbT6VQjlL+Aj7oFc5efu7o8+YCO\neK629URhcjGe9c9fQxr8xG7PPFWiWjn7xx5gUHDDmqs0btGWcAnquKu3VtHJIXRuB1Y9aqt+\n8bO7BbgCr9CCg+PMAbjnJ96bIwUu7HC44qzcRkoQRyOvNZ8knmN8q5ReOa0ArTQCQockZ5J9\nRTJFCnGMrVt5JZoegEYrOupJMRtGdpbsBzigQSTBgG24x2NV5v3rbvvN6HpU7bpscfL+XNMl\njLDkDcP4QeaaFYhUqZNx/dgD5h7UNHu+ZSNh6GolVmbeSNnTFLGivLsOQV+aqsURkbW2sOez\nVGzFZv3qktj746VLzJPuK/If50wKzEoeV/i9jQIgbO1jkBj2qNNyxjbgEHnPepNqyMBnGDwa\nWSM7GwMjOT9KBDRJjJUkux5UjrQzNICjrtzwB6e9ELblRshG67ad5bbiTzVDGfKYyiMVCfL0\n60mW25UZPrTwu4lWIC9M4pFt3jVlJ2ntz2pCIOfvYxk9ac2WlLF8r0+lPUl0Crg4PftTZPmh\nZuNoPLDpmmMbu2zDIwQOCe1IG2sQ3zt16U/uGJBO373pTJHbAzyezetAXGg4zzntRIzRkqBy\nD1p0ar82/gdSBSll8zcvzbu3pQBAZCzHfwP7xpSu1QD+8AOc0E7c7l3DORS/8sztPHWkIRlO\n18HbgcLQoZhnHI4+tBP7sluWpFkSRfkBDDnmgQih9pcsAM42d6arR7jxwR0qZtq7XUZ7Godq\nuu0jDFutIok2nyxjj3pjAsUAXIH8XoaJNqt8nzqpwVp3lkkhW2jrn0oYhjKxYrn5M5J96a25\noycZpzyKFDOMZ+XaO/vSNGWWMrkDvg1QCBduBjJI5FN37GDL9/NIRtm3E4AHU0iyIzdyhPXv\nQIlkYOxYcBhyPemeWhjyH5FEfy7sHIH6UiZ3OcgdqBoRmKjdj5ugFKu6JwxO7ik2+WOcr3wa\njj3NExB6NxQIeMrlm5Vjk+1B+9lMtzTpNyruboeDSxSGPAAzgc0DGNu87j7ookiJmX5flYcH\nNByzbidq56imrEz7jywzw2aBB8qtzkMOB9aRyOSzhl6H604eazkP1XvTMgPkR9eMUDBmaNvk\n+UkZoZvmUjp6Cl83apAXIAxTP3mQSeMdBQA5Y1O/5sE06NQjE4wduKQo3B4AxyKQ/K2087hw\n1MYiRyNhkYDHNOJ6DHJ4pke3nAO7oKVozIpbpgZApBYZxtVgAQvX3pzoN+G4Ut2obb5QCr8u\nP19aRY5Fb7mSeetAmK7Ns25GO1CEyYUDaR3pDv2pu5GOw705mK4jxg+ooEJnfkEbfak3hUAT\n6c05lLZGQRj73emLF8mQ2fWgB7KVUKGG3GcU3BZi3fGKTYJFAPFPyNx+XgDHFADVbbGoYfPQ\no8s7mPA5xSgbeNu/0akEnzEk8YwfagdhDns/0xT1G7Ic4GOKaJFWRMD5aMHzA5GVPagAULJg\nk47fWjaPLO35iDRtKqTkKueOKSRjnCnafUigQNtkbIbHYntmkfP2eP1zQRtjC9D1wO9Aj2hT\nuBP900DFkZZG2sdnFJvHl4Ix2pH2fKpcZYZC96eoP7voWI6npT6CEx8u0kY7U1flUgDvzSpG\nWJBbd7UmM4KnOOOfWkAOquitjbz60oJ5LfMByPWiQASKH4ApDgNuVTu7D2pjEkcBwrfxc57U\n5VxjPTHeo9u1f7oY5204MVzj5j/dpAKrIzK68Doc+tDKTIQ5P5daRVVY9rjGWqQKCx2Zz7mg\nCAKx5C/KOlKW2ru6t3qfbz+7J29800x/N7dTTEM3FMFjhTTdxb5RwxpS3mMpYZUH86cFXLyK\nhDf3e9AxBuZcb+ncVCc4BUZLcVJw2Ch2jrSxsAQVPI4FAxGj2gsZM9iKJFBTa3GOQaRh82du\nO7L60jYaQNtJwM9aGIMhmXlvTimKD5ci7iWzxUmdyszHY3rTcFlDZ/BetIY5Q1xHhvlxTG+Z\nQE5wacN5jwG6/wB7gmnNsjPQZA6UCFIIbsfSmRA8rt+bnBoVEYElucZo2qqgg89TQIGzuCuc\nD9aAFWPgbj6ZprAcM2DnpSeSsjls4TGPxplDlYbRnkfSo1K4KkFDn7tOb5Y+Ww3TPrSLnpuG\nByKQhx3LwVA9DSLyoLcnPWnBSrYPzDqaTcH/AIdtAgSRVYgnI6gUE5UvjG3nHehgrRkD73ak\nUBdx5yRigAdtyhj1bovpUaxiKbcrZ45qRWIhQ98daTlvm2/hQMaCWZiF565p5+ZRkYpARIiY\nO35uaQZZmCnB9aYriNnzPmG0dqd82wFzkjpQ0Z8tQAD6035umc9qQChhv9D60v8Aq2Yg7i1J\ngnLfdwPzoDfKFAy+M0wEPUIPlY+tOBYtuccrxSBizYI6UFWJJJ49aQBGRsP9/wBaao8tSDyQ\nckD0p/H8A+bHSiNvmJZcOevpTAazbv48KegpVYJEyjpSLJuVg3y88etKVXd9R+FAw2+ZGCTj\naMimf8swxGSTinKxJ5+UelDY2nHPfpQIau485yPT0o6bsH3pSGyu3le9IyhWXHO5sbaQCyBt\nvDZ+lJJn5e5B59KduyW2qV2nFCxs0ZYfd70AI20ycnAojULIV5Ze1II9sX7ttx/umhlLlAG2\ngcmgBdxVTtGPWmsAxB7YxTgdrFjyrcAUsiiT7uOOKqwEEilsqPlIHWpJW37MnIVefp60pYbd\nnf1prRsoUD5yeM+nvSsAiNxt6Z6fT1p6n5WAGTjApQxXBGCMYz70zIPzKfbNUSTKEjh/vPjG\nKWTY6ghSoVec0zzCy5jG1l5+bvUu4iMMerjJX3pDsVtwb5c7D02+tObG5g45PORQY13qxjJf\nHzU1d6/eGeeAaBAdzxnaOf6UpiZmV24HYUM204HXrgU7aXweq9waGNCLlWJDEAnGOwowsbNn\n5geKXd/COEzmlXEgIzwOR70wI2+6UxuJ5xT2cs3A3KR96mLIGDHGT3pPmZVLDCk44pXGCt8o\n3HaF/wA4py43A5xn2pG/1ZVugNLz1LcUAEcZ5QLkckNmmq6MpBX5hRwjF+VHQCnBdyk4C0eY\nhEVVzz1FCttjJ7+tA/1Zx1A5BpqNjjbxjNLUYoycD8SaXlsgjgfxUZKYIGcihgyxgHODQSxE\njzkZyaPL2kAHnPFB2rg9OMUuzaoC8t1piIgp3NuOeaWRt0g+Xr/FT3LfMRgH3qPaNowc5NAx\nzKQeeaGf94MfTOKGbacD72OlG7GNoAUjn60AOALBsDnt60NGu4bjh8Z4pqhztfG8j0NIu5tx\nx35J7UgDazL14PSl+VsqDg4pSNvC/gaGysfOGaqARmP8QwMYBpWkZWAHXGSKazA/KF4HOKVl\nKzbz90ryetIBGOIc84znNObb8u3pjtQsSsuXO3jgU1gExl+ccHFACq2GAQcHgegNEqvN8h+R\nl4JHemk7VHbnpSyIBlhuPp60xBtPAUgkdadE/wC8+Y7x3J6CmRk+uGPpSruCMjLlT3pAKVDS\nfMfl6gUKV24OVbPDGk42gAZPAJ9KdxlhwRSFYZC43OvU57098+XgMAO3rSOo8sEYy3RhSZDK\nARjH8VMY5v4lyRs4IHc0m35g3VmFKjBlI5G7+I0yNCMkvuKng0+gxVYrIwOVwKWT51IAxgbj\nQxLYffuycihizscnarDGaQAVcqwA4PSjHmRrjg9CP60m7f8ALjIFOXK/MDhj0PaqQAY8Bm28\ndBSMOEDenP0pRGWjYk7moA6DdhsdKRIm3YxCjK9T9KEI/h5XrmhX3ZFAYeTiM9TUsYikbgd2\nGz0qcEtICSPVj61Dtwu4qOOKMI65b5WqhjlDGR2D89QPQUbRGgLAMW5zSecWUeXwe+R2p2Bn\nGPlxkmgeo3y/uk9+5oHzufQfw0Mr/dByOq80MvOQMnvikIdyykFcnrmkDHofu+1JzGwA4Zuv\nNObhgEYAdxSARhsbCpuGP8mmtllUZ3OOTTmk/izg+1R4G44JPHUUxXHbDIuSeScfSiT5iSRt\nx8tIN3mYB3KKXdtbrnNMQ44aRcdhgimrCSjHfk9aMkMW+83QmjaF2jHfNAAjfKPl+Yc7qcCi\nt04J/CmEqqtyT3pdyrGBjIxmgYp/1hUgt6Y6UmS3U/KDxx/So45HmQ/I3XOe1TToFlQ5+Yjn\nHagYgXhsdMdfekUtsCZw45pPmb7hz83GaUltxbOT39qAAMFwDkN1zSNvVvvc96Ukna5G5Pej\neS4JGAxwKBAysykjoozS483aH44yKNpXdklgOMYqPb5cZG7fzQIkfdt+YfITSbQAQTk+vamq\n3AI59RSqN24KPfFLRjEO5FEecO3enNGVUOnzKvU0nmbtpPBzjPelKgKygElmznPFADm2ELtw\nARnbUUanyiGyfm+76U5tmwd+eDSr+7j65JNMA7bc5b1pcSswOQyDtSbfmPbjmk6R459qBiyL\nnHQ+tSs25t2dqqvC1E2Y48suD6UY24XGXf1pCY5pTIyDbgsOSKcq4yW6jimcheDkpSNtkZPm\nOTyRTGK3yQ8PtycVIoO4bF39jTWbahBXPPSniQMQq8Z7UAG47znbjoRUUI2q+SevGac20MxX\nhRx+NLGw3EEckUCHR/NCCQF7laRWywJ5pPMHOOW6Ck+6Dj7x7etAIcp+cEDOeCaVW2swA9gT\n2oOVyAuTjP0pURSzMuW7c+tNjI0+VmJ3McYOKkX5Y9xwh7DtTY1Kqdpye+KQKZIy7DBHrSJF\nXCMoByWNPkVhvyckcg0i5fa2FBA6+lG1uedy9TQMG2SKTtCnoR/WnKrLMODv29vSh1XzjuHJ\nGRTI1eQOH+Vl9+1Ax7LuUmNgwX7zCnqhjTzB06gUxY2WP5Tgt0pZDuVTnA6E+9UiQWMSAkHJ\nbr7UbGjVhkk4xj1pNoVW5yPUdqc0e+QbH2gLktU9RBuHyBVLDHNOeP8AeFM7SRximqRGFBJO\nelScrIPMGST+lUAg3wqh3bsnaaVgFYjbhc4z2pv+skzjaOwqVSSrZTK54+tADmyyOB85HSk+\ndmQvxxg7aYiuy8ZXsadGzRkID8nU5oASNz8rbd3bp2qTzGYsI2Hyn8RTd+ZBt+ULxinyKRkk\nKc/dZev40CB87Q/De2etOOSwz8wxTWRWUh1ycUi72wz84GPeqQwcMueODSsybuBjv6UisFbP\nY0KBuYnBj9KQmBwGCgKYXHLdhTtoET5PHak2/IQev8qcOgLDcv60wuRswK7GGG25ElPhYSMp\nx8oH3velUCYMGBBXn/61NXbtKoCAefYU7gIo5JD4BP3aSQvuGOSD+FSkeW+AmTj71RE7uA3y\n5zmgNRv3miA+bd1pzArgbR6mmN8qr2CnPFOYYUE5O7p9KQWGNlm3H5Yz3p9uG2u235T/AD7U\nbSZPTA4GaUDbJwSVx92gB8WN4cLh+9aKqvysTn8elZ8e4MAxyR/FVxo/3ka/Myk87akRuTag\nIYVDN5Z29ac1/wCZarl+aq3UipagAKcrjcw5+lUbe8EceGX5V5ApWKVzZsbYXXBb61R1iGK3\nm8tW3n+8O1RLetsz80ZPPHamQs24lyrE9c1DNEQtIB8ynJoXAbkcMMbqGXzZ8D5Fx+dIsY8z\nYS2BUlEsedmF6rkelWbG7mXIjYkEYIPeqtuvz7C3y5z1qSTcrkIASfSkA+5WTyw2fm9Kkjvn\ns1Zo8h3H3ajjbbGd2fdTVab5mI3ZUcjHWqAkVmcl2AUsaao8snaCGz170yRgqKd3AOdtP+bm\nbOc/wUgFkYbRxls0oUrAVAwDzQuFwSe/SklZlXLnCA9PWmMmidUGDjOOPenbdzbn546VAFKy\nKdoZTyB6VIrtJO69F/vdgaQh8m1ZEAOT147e1Sx3f7tWUFQT0qOVGWPMYBHfJ5zSRny5IzyF\nxyKYh4mLMB1wcVJIuFwG781U3M0hYDejN2qdMRrIAN3oKChk0g52jDHgZ6VctovtMqLL8oxg\nle1QJAfk8xeByv1qcSCzV3Y8sTjntQFia6s1h3MvzBenv71R8xmXdtOCPXpUwv1eMKDlz94t\n6VDJcQspCErn1pASpIHt9hOJvWmRXAhViCBtOBUKs655wMfjUDXBWFkZQSzZx3oEW/MJjLZG\n5j60NL5wY5ACjtVcOSoCBeRSM3lsqsdnqKNwLKDBXJ6jPFPlYSKMv0GSwNUvORtyq53D1qsJ\nBuGxwuOtAMuKDu3E5Pb6VKsoJULJlc88VReZ5FLt8uO/tUCzHYEU7d3I9aYG3IqLAysfmz96\noG28hWzxgGs1bgpgOSeeCetG6TDFmx3p2EaG350ZG+f7pqPISRxJ8qjjb15qpb3TMMRt81Sv\nIPLyeq8k+9OwieSQblRuwGTUrSqImwwPtWf/AGgHjZduW9cUxZB13YGKGNE3mfPkSbQoJxUb\nXG6HYRtZuRUSqZI2cA8eveo1mClOQzHse3tQMsqrfKcKeO9AdhlW5KtgGqk0hkZgT5ZP6Uhm\n8tgeTxjr+tICwZ1aRgu4KvJ+tMkupDG4blOo5qPzNy8jBxg4qN5gsbK3zHsFFAFlZ9ygn5go\n6E0xmLYwyqzHIDVWbzFhO7qTn6Ur7pcK3p1pkk6s8bhd3Q8rUgmjRiFOXxiqTy/KAD8y9TTJ\nZkjhEh4ywHHrTEaCyd3O8MeTSM23ovIP3ev41R3tIhJXDqeGFOjdk+YttbupqWNF1maGQHHX\noaZHdSSsMk7up9xVdbkLGzkMW7NnIFRGT93Gx+d89j0oGXWmBfCt9zgt6U0XHnyFFwCp+6f4\nveq6ySSNlQM9D7ik8wbmVE2kn8qYi1vHVOcDHpUKzHoAQP61FLJMduWUhT2pzXC7uQMHjiqS\nAlVN0WI+SoycUbxJFHgFDx/+uoUfySMZXcvK09Z2KscAADGDUWAlZvMdixwBx9afuVUUFsJ3\n9RVQt8qkyLu6kd6ez7s5VifWmST7l4zwvam/MGCjCAVHG6q21zuX+8aarCKZflMxbj2xTGTC\n4G1mIPXCkdzT3lEcKjd8xOTx09qr7toKqACpzg+lP8zzE8xjhu3sKBDnYs2/7pz90/zpRv2n\ncFweKYNvAIK7uhNHG1NpJCNlvWgCZVxFuUbWTjPWnBjuHGwn7wzTG3tIf3iopORTJGj2nnIB\n5YUASKrRyMBwM53dqmjIbc6k+Zng54qGUny0ZWBTpjNJCpLKpbaM/dA6ipJJm2mNTnk9Fz3q\nXzPs7Lk7jj5mNU1Cxsx8zKLnFKzFlXeQ6dfl60xk4uBGpYKVOeKm808M3zjtj1qszKY8YO4j\nAYDgUrSGPygoJA9O9MTJ1YM2ASp/KtGG9ZJArjd6rWUJjPKQV2r79qmtplCkg78HHvVisb9v\nqCpJ/rPlP3Qe1btjqmNweQOOinpXFQvuG3Hy53BT1q1a3S8SJkunGexoEd/b6tH5iRuWYjkY\n6Gt6y1d1XzSSFz9zNeaw6onmbsMWz0zWzFrEskcZDZCtwOlBJ6Ja66JJQGBwxxg/yrrNPvRt\n2Dt05ryKLXRJKBgqSQd3aun0vxEsWVL5brg8ZrWMjOUT0y21R7VlA/i65rpbW8W4hRiwBrzW\nw1QyN5pYEdMNXQ2WpBVyzDHYKa3i7mOqOvuLcSjaCSprm9V0CaBmkj+cnoa07XXQsagDA6HN\navnLdQuhOGxxV8pF9TzvcWXaD82fxqbaEYOQckYrb1jQhD+/g+Y98CsNmPCNktUtGg+SRlQc\nBX6ZFORdvU7s/pUbZWMggHJx71JHHtbaDzj86QDgu2bKnIxSldqkNznrTFzGxIPHoKmUfMGP\n3SMH1oAXaFGVbginDaQMAA0y3j8tSJAeDwaWPG4AjnNSxkn3ZsE447U/eq5fP3uMVHI48wcZ\np/3lbjB7UxiLnbx07ipd2eEbBI71CjPtAK8dzSrtVTnkmiwD4yVH3hnpmjczEgnGKUKBGMDF\nNVczBmPHegCWQFQh3ZHemyP8hAx81NbiTjkZzijChuuC3IFAxWk3YUfKO9C/MpZTgdKaUcbc\n9D3qVlWOM7eWHJFMhjWUtwowMUeYy43DB6UsnCqOcMMn2qNY90Lg8nqCetMCy2Fxjp3NR88n\ntmnRjzF2k5x3p6N+82AYHU5o3AVkXyfkOCTzxSswLfKMKOKaPvnbyDS7uQAORV2GySFtilm+\nbnvSgkEFeD1HvTFYNwRx3pYUOehxmrRDsSbPmDbti+lOWbdkjgdBTZF/eFB8y9adDsVg3VSc\ncdq0RnyjmUxrtBO+njO37uTTVUmQsTnngetT5RI9y/Q1pcnqMBPy7uRmnpMpLFBlemDTYycE\nK2c1JCBE2AvzGgljlI257dMVKh4YY3kDPFN5G0SY69qfGgiZwH60Ma2FjU/Nu/WjcJVUBvmX\ngU5VeQ7cc1FzuYomKQx+6b1FFQ5kopgfoooHBJpFJbgCkb5hwMU4cr6V8UfSjehzS7mxuPSh\nWDNj0oY/KSDkUxjcEtlhxSq4UHAobdszn5aR3C0CAscZFK20KD601WG/BPFKw6cEigY7cPoK\nU/MMdqb97gdqRfTOaYh+doGBSH17n0p6KecnigeooGN8sUiqc5xxTmXJzQvynBGRQIVQVGGq\nNsgZxUvHfrTZGOOKEBESGjyBg0m4de47Uh3HnHFIp7mn1Al3Bu2DRg9e1RjPBByacpKr60MC\nUMPxpFIOcim/dx8uTTWk4IximIfuwuBS7T1FRo351JuYyA/pSGJnb1HNMb5l4/WpvLLfL360\nj/MvApAQrGRwDTkUtwetOCjg07bxnPNCEC/MORgihYz60/aWWkVTncTxVANGFyadwVyOtAG1\nvmOM9KXbsUg9aXUBHxgf3qHAC5HX3p3VR60xcNknmkwsIpLMOwpAxLkDn605Y+4NKihT0yaY\niWPavJPNZfiiEXOjzxkblKmtIrubgfhTL61E9hMp5UoeDSA+IviZaBdQnG3bg8L9O9eM6pCB\nM7n5Tn+lfQXxctHtdSnEibCrEAH0rwLXlT7U3YZB/GtLDOZvtzkbh8ijOawr5Y5B97Df3fau\nmvIcg5+7iub1KzMfzk4OfyFNEsw7m3hWPA+QZ5brVK4tt3YluxFad38rIYvmXH3qoySFSRkk\n9cVrEyMu8RVUSKcSLx9az5MoSMhQ3rWnfL9oRTER1yc/rVC8t0UBmO8MMBasXUrldqr3XPIq\nrcS7VLR8Mp6+1XAqyYRX+6PuntVaaNdpwoznkLQMgjk+0IMgKpb71RNDt3y79pzgtUsam3fY\nVyp5X2pjMeWVw46M2OlUMgk2bcqPlNNKsy7F4lzx9KcuFVtowAetR4LgFGwgOeadyRn7xiV5\nVMfN259qjZS28KMbuTtqWTdLxu3YpkmRnaDnH8NAEQXy+B83pTmby8xvGwVhzSHayqSv68Ur\nZ2dcqDQK5FsQc7TjPHtTl+chidozgU/Y3z454pip5keBxxxnpmmMa2ZM4+XnApnTCsMAcZBp\n7D5FQ5DAZJz3o3NJ90AtjkGjoISRGV8YwCOoqPauwoDtXqR2NSx7QOGLACmLhs7ozmlqMjeP\naQOMY+Wkz82JPmGOAKe6gqXzujXgVHjdtwcYPGe9GoxFUrnnJx0pqvt+Z1KjH3lqTa5ZVU8s\nOW/pSOxFuNqE9iT2/wDrUaoQ1iy4O4MjDAqNkVcgtjsO9SXDYK7SDx0qJN24sSAfXFFxWJtx\n3KBw4HftTEkwrsw2tnAb1pVJHKHJHU0Hb8pYZXPI709QsNkfd8qvh15NEu7dGcjDdh2pZEGW\nbGH6Cm8xqm3rnnPajUoTcrEhflHf1NDMGibHC44Wn7w0i8DGNxpCo3ZK5VueKLAMyFXLcuBw\nvoKULuyQduB3P50zcOWz9/pQw4KsMtnjFMkQ8vjBIA4pGyqkhct2PpTpCThW4xzgUi7Iw24k\nhug9KBCyfK6jPDDlqbt2lsNkZz9aQhXRQTtOeKHj+Yc8A530DE3dNxy7GlZfLEgJ3AUrMV3F\nVBJ6Zpi524dST/FSYhVUSLu3Z4ztoDcYC8kck0Nhcrt5x8ppvzRqQxzkflTASNfbvgc0u4nK\nhiNvbtTfLXaAMg9QacMxtgn5e+e9K4CjcpG4biewphZo8qOXJxSx/KrEc88Uit14w2KBjY5O\nGXHfFG4Jwx5pdqx7SB16mkVAc7huGeKYCDzGU7uef0p/ysSRyAMgf0pFVAWPKn3pGiIKpuG5\nj8tACiRpN2wYYf5xRuKqWGCx6r6UbmErEtjHX60RjO4Y+Zud1AhOGQFMn1z0ofd5YYnIzwKc\nHCqAeCOMCkfYshy+PRaBjV37gVbcRzimbRHJvK5ZjktT1ZVJUj5iP4aRG8xVKjII59qYB/Ef\nlwSKEVljOcZPFI5LcdWHNOWQEjKYXH3qQhNu9VYHoQu2hvmV9o2FTy1GSxVgdnGdvrSqSry8\ncYxQMbGynawO4560uRGxAXIY5ohWNVI5Vsc05cpjd8xHSmA3y953BxjsppNjryzfKo60feyT\n8rZ6dqcF3Z2nB7ZpCGqSMZJC4zjFLuG4bm3gjoaczSZ25DSKOfpSboo2GR8x6N6UAQsjKxUD\n5uuTSqM8YGD1zS+WdzAv+8J7ntQCi7twyg6EUwEVUyMLyvTNO3PuEZK+WOduOaPmZg2M8cUc\nJIGPDnigYrL8xWM7UPOaZGg5OcKvO3tUhXcx3cUw7QoDc59KLCETLbiDmMHjNSszPJwAOOTU\nYjRvmU/IOcUm7EeQMjPNAxyKSCpO6k37sH7qCl+YID93jJ9aBu52KpVhzmkBGw3Y/wCefWp0\ncqpGPnqPaWK87VFOUKp+U4/CgZPFu5VmCq3f0qG4kVY28s5K8Fh3FLHIzKyuox/eFVlXzGZi\ndopiYu1nVCo+Y/w+1PXLtuDfdBpI8cF84ApPu5K8g80hDtxZVIXazU5YWyeAWHPWmCPq+cZ7\nelR/LuOGYt65oGL82SxbJz/kUrARluME9hSYGWQnkcimxh1Y55ZjkfSgB7/KqsvPY4FJ8sbb\nACueQTS/MvmKnGDnHr6035kCu7b1/umgQ4IZN+MAqM/N3ojdG2gqVYikkxjcRjngUpPmMFzw\nOd1MBuEkJCjBHemKyljwcVI2C2zOM/xUbSsoyQmBjb2pDG/wnHWkaQiJdy4UHnFKM+Z97jrS\nZ2sSwPPFMBzRhn2qwYdRmjadxbYMAYoVdrDLZ9x2pF3LI5yXXGMCkA1lwM5xQivGvC7s8jNK\now4QjJ60qZ3EOcc8UAAj2qw3bec5oUuhLA5xS4Vg2TlR2pMsF+UZBFMQgQnJ2/LjJJNK37iJ\nWJ3FhkYpPmb90x57imxqNgQEfIcc0gE5HB784xTmXzFCg7do4oYs7bt42/zpBt8vOOD39KYB\nz0JBA5zmhGLqeMY5pqwoIyME55BqRVHls53McY2jtSAYCsm5d3zDkU7bh0cZ96QMqEHkZHPF\nGTsJPT1p2AXb5bNj72c5ppw3qVx2p/LMdgypXgmkWfy4flGD900WAQY3ZDZbGB+NBysZBYb+\nhpgbzMIOD71JHD94PyBwF9aeoAwG4Fl7DntTd25uOmcY9aOpXD/KDjbTlU7iABuBqQBSGkZ1\n5A45oV3EZ24LHqKSP5gRnHPShsMm0HBzimAnlbVyzleOBRGX4bgkdKPMG5UdWCj2oZto3EBU\n68UwF3tIpB5xzmhWcjbk/SnSKPlYHJbptpoJBZwd2ByO9SA0qV4YYbqMU75SpJTikH3gcc4y\nDQW3bRISDnqKoY3C7yBkjH5VIxGFKj5emKjB3cA/N1+tA/1mVJIHb3pW6iHPhTtKjA5yOtJH\nxktkoaF+YsjHa3XJFIGbZ0xz09aYCFuVjxh8cGnlVMe1V2sO1NZvlBZfmz+VKd0fzdyetBIL\n90KOcctSoWaRkBzzxTSrLEcHdk8+tKrHZgA5JzSAWXcMBuq8cVEzosw3HnGKcynft796NoV8\nbd47mmOw9cRsSBnIxmk2bTz94dKY+duVHOaXzd33j/jRuMcHbHOAcGkVxtAIyf7woLKOGbPv\nTgomUsBtVf1o1BjDwu4AAZxScbdpOfpSu207/vIR90dqQ/Ko2ryfWkJDQw8wd/rT2B3YBwc5\npGfcpAXDj0pG+b7vO3jPpQMVW3KzOdxz92k2jnnAPNDHEZOM0rIdoPGGGKYAoztbfmjd94MQ\nG9BSKm4H5eB1oCkLgL96i4BuH3T8rUrbplBB4HFDSBwCRgqMYoQK33PlzQDEGcDjmlbfGcdC\naaNxcliQB0p20x5LfOWHHNBIjRr5mGOTSRx5Yr78U1VHl5P3h+dSbQysvPHQ0ANILMT0YcYo\n+Xbs7k5zTi5GQw+YUzcu5WPI9qYDVVY8OuRzinrMrIwIO3Pbik34yu3IPJHpSjg47YzigBwk\nRm2k7RjimHBXp9c0D5tv8RXtUqLu3M5wKfQCJUXaGVuM5oPCZVSRn7xNOZl+8RhR19qHy2SC\nCrD7vtSAQKvBAIHQqTTWBDbT8wHPHakDZZSBz92nMflAwRlsF6BC78NvYBvSgk7gF6nkj0po\n+7kDK9qdzjds59aQxGbCYVRvzyTShmkyo6Ac+lNwqxhpRjJ4pV3ggYCnPT1FCENXOdw+XH60\nvycsRk45oVR82R1PHtR8p6fNQgFhYcDbgEflTVkKxupbinK2Tgpx13fSkaIx9wdx3BfamMcz\nbX2lsADINGf3bEjB/wBmk58gBxkr096NwikRs5DDkUCBVORtGB6UpJzz1/lTSdoO7oWzildg\n0m5VxHjkZoAQMDnOd/8Aep6/KMHJAOaG+8QCMY3fhS7mzkfN8udp9PWgBV2sxZTjd/CaYyFV\n55f1zTnPmSKWUqu3j1pGUrhhggetIdhFKjPJU9+ODTcDvjGcgChULNgfKSelPKbVYHG/OKAG\nrltpXG1zkKfSlX52IC9smmj95wDsZe3pR8rMv3hgckUNsAwOGzjnke1KwEjcNj0FLuU9ACAa\nasY2llb5icgf0pgKu5eQcMO1G0/eIwT1pMqgOVy3r6UqqY1wx3A1IDlVWy3QrzzRJuZnwm04\nzj2pGVgwcfSnfMdxUfK3B56UwGMvlnKjIPy4o5VR/DjgineScFSw579s01mO0KUwcc/WmIVd\nhY4JPfdTFbkHj0p/302k4HtTFUqNuOM8t3oFqKWfaSBuwcHFOZm3K4GfVaBgMQp2/XvTPm3M\nCdwxk0DHuqxv9/cp5yKN25M9cUn3VIKgnHBpuS8e0rg/7NAD2b5UUHC9aGB28HDfzpNiHqPa\nkZgPm2napxuoAcqiQMPamqzyRjCnHQAdzShgMsv3icYpPmSbO7KkYNAxVzu2KcMOo9KRpN0h\nJH3eM/1pwkCsWJwenA6+1R7pGznhT/nFNgP2v/E2B1/ClX5HyoBDUOzPIoIyvApP4yQoUKcV\nIgjLfvMYJP5UhbCKSuDnDAelKrMyMAvGelOWNghBHzddue1CAb8qyfKPkzxmkOcOy89h6il3\nbiuFKx9vY0vQlgMgdfemFxu4smCMGlkf94Co3DHT3ojdWJYx/MelJDlmLgYfO3AoAF+VSScn\n0pSco5OOOgo/5aEsMBT81LtLBvmADnI47UAIMZXflwy8g0u3512NwPXtQzDjC7mHANN64UHB\nzmgCTaBkHDEnPBp/zMDnaoXp2qNFO4uSAegFJxI2Cm8DqDQMcr7wNpAJOOadjZ8rDLZ60xl8\nxmO3awHApY0jZlO87vX3oARAqZTlgx/HNPYFiR91l4Ipyn5SnHXJam9ckc+poAXbu5KYYelJ\nkjDY+lLnKkqevFKjHheuO1K4huSJGK5O4ZNI3y5APytzxSsxVspxngmm+X5nO/aB3NMB7fMw\nZc7sYNIxO0/LmkVTtIBwxPrT5P3b/Kp245oARcxqH25H16UpJB3ZYDvxQqsqKQ2BmjzGZir/\nAMX60AKrfKWb5m7UNiRgeQccmnMp3bQOccH0pqnMJDjgdaBCtGQ2wcnGR6CnEiOUKRlcc+9N\njyoG08Hp9KcysueOPWgQJMVZ024BGOfSkKv5HUAdB7inL/q8t34BpPmDfMCqA8UADMN6gDcQ\nKVhu2s5yM0m5/u7cq3cVIMcOTg/dApoBsci7WGdqgYFPikO4Io6j8Kdyx2lAOeTSn5UGPlPc\niqAf5nzEYIIGNrdfrURIPy7SW68dKGYSSckkf3u4pxEbBjkqw6D1pgEn3d+Bu7HPQ0kStJ86\ndR1B6H1pWUeSCee5UUkm0xKU+Re+KBMdu3AnPPUClG5lAB2knkUQkSKR27L/APXoERZZC3yE\ndKWwh3+rZkAycdqSOFZfusGC85PGfam5IZdo7dacih5sMNynv05pj3JXO5eBke9NXdkfICAO\n9Nl8z7m33LZpu4thGOeetMOpKj/vOVww5I70BmdWPQH2p8aMu7uTxTSpXG5unpSGNaU44Ulc\nY3VXZk+6ox3qxGzYOASM5Oe9MaIBSSmA3J+lMBsh3RkYJximM3zkJnHTnvT9oTJRm2kYAPpT\nDhmL8o2O/epsAR7VzlSG6bjz+FNKsnzSZDA549KE+bcjk8jII7Uu6PywgOH6c96ALNiVmcbR\n8inLVYvlXcMbguQVFUbMm1mwpycc56VYuL1/MGTliOGpFFlrs3EWSpcoMY/rTJ/lwxwcnpVW\nyvU3NvBEnTI6Gi4vGmkxGvyjr9aXqMuNKrMoUF2zjrT57jdD+7wGPRvSqcMijnkt6inrMsKs\nNuTnO0isyidW8yQEddvXPeiSR2mQAkDbktim7GLKcfO3O32pXklCtgfL93HcUMaJY8MSAO2Q\nafG2GDHk/wBzoaiVXDBmGHH8Of1p4jMkhaRQV6g1I2SYdmJY/L39qgEJWQFhgMflINSySP5O\nAOc/pUcbBkdmPKjIqhE8eyTcdu0r8uaa3+rKq2O1JDieNioIbH3f61E0m5Qu3lR2pATKGRlX\nGQOtLJIrqTsyoPU1Gs+07z8qYxmk3ln2A5YdeaBD5Dhgcn1GDT4vmXbuKg8nNRbkYFR8rDnd\n6+1NaU3G/eu0bfl5xz9aALk0xVAhIPbb6+9RrGdo53DrwaqPJ5kanH1YcilVyJUyzDccE0wL\nXmRxzDYvbOygXRhdGZcZOePSqxm8uRpDhl+7x3qO4k86RU2FR65oA07rVFuG/dbgqc896qGX\nLnLbk6VSOI90aN2601ZBJlASG6YpjZcabZIdgyq/xVHJMW+WUjY3NVfOXYEbcFB+bFOXDRYV\nvnblSeaQE88hiUYOVA4OetNkmOOhZifventUAc8xkZcDk+nvUckrIqtn52bBwP1oJLMtwhkE\nbbo3xnbRJcNJGDkFlPFVJLh9wUHIzjd3zTeSxLcEdqBlr7Q0kZ3jBz0FCMscJHXJ4qo7B4+W\nwfbrRHcn5RjdjjaBigC7cyFioU5VRyF9ajBZtp+87fw55FV2kDZaMk9inqaJLgfKNu0DqKBl\nr7zEN0H6VHM7/Nzv7ALVWScGPAOTnJI601ZiwLq2WPA9hVIRbWTyyQG2t0OKnnm8mPOzPGR7\n1l/vHVgpALGp5LuZ0/eYKjjbRcTGNOGjJKMWYdjRbzGMKGDFPXvTJDLMo2MqJn7uKZ5mWLPJ\nlemakNC410XiHzYGcioTdLndtG4HOKrswZFBbILfdqNWUbio+Y+tVcC3LMvEjsd5PSkCtuLu\n2R3z3FRM48v5hyOpx+lOZXdmjdC25Mr6CgY5nDR793lgnin+fv3DdlPUDnNVPmjhQfexxtP8\n6kjzsIwc9Mf1oZJJHNuGAd5B+YH+dNkm8uTarr7EVB5ZQuBk4GW296bJHF53mKGDKMZxmmBc\nSTaz79p46+ppkZdo8IFLdgeai3RqVVAXcjJbtT1VoZNw27j6GpHYfLDLCqgny1PPXvTJJjIu\n07t+eWpkkjv/AKw/Ln60f6tSc+a2elIZINiyKFBUAc5pqv5kuAfx9qZMzlthIPuDSghUK456\nEj0pkkizOSu3GG/UUMT5jkHBxUa/Ko44XgGnbnj3AgbvU0C6jmZkWJsfM3WlkOZOflx1WmBt\n218/MDxil3rGxGG3E85oGOSb5lH/ACyHQetKhKM+0Yyec9qjVf3IxyS2AvvTvMfYQgHPUt0H\ntTGNuJFbPOP9rFSZCygqxJxtO096hSRlZUkUMB0p0a71YoQD2zQxWJFVN5XBJ74qZdyuBnAH\nODxVZMoueM9Cc0Z3Knzf8CPXNMLFhhI29n5HU461EG3R4EZI9c9KjZs4beQRwcU6bEfyx/Kj\nDG3Pep6gTtNI0a4TlD1PelkulaFXC5lY84qsjO5VWIJUY60wTBWAQE4PNUIt8uEUkAA/xVJE\nSY2yQSD26YqDeo+Yc9yDUjMWwhGCOrDpQMl/1kmzIAxmkkaSJifv8cc1DuDKVIwe3qaMM+0E\n7T3UdeKdhE4YThd52ydz2p3mDb0ITOOKgZ98QJXaG6ZpQxiw2dzYxgdKkCRpADnmLHTnP51N\nu3RbmJyvOar28gjYnGe54p3mKswxkIx6GmIke4CyZQnA60+N9p3KpIPPyjtUTbluJBsUA8g+\ntNuGOAoYqcjOOKBGk024YUjGMnPWpGaKRQUBJzhucCqBZioAVUAPJzzSSFQrNuwjHG0etNCs\na6zBZAoG0dQKmt9aLMdjYdBnkVkQyFlXsVp8Un758DbJjcPeqJa7HW6fqG6EyTHHcEDvWtZa\nszNhlXGfxNcNb3jq7qHxt+/6fStS31FtuzOwkggGmI9P0/VUlkAVivHTtmup0/VBtRGGxs9+\n9eOafqxgYpuYjPIHc11lnrbnaGXnGApPNXGTWxnKOh6xZ3yyLknbg4IznFdHp8rvsO/jufav\nJdH1YxzqMHYfvAmur07xExmwrZVemK6oy5tzmd4noKzKVIByoPNZWtWMO3fGoRjzmprK7jmU\nEkbu/wBavLCt1HtlGcdKsdzjpCO59sin5KrwwrQvtJljaTagCDkVlpywUnnPNZS0ZaJlcKzK\nfSpYcgDeMJ70xvJDHC8nvU20sFLnKjoKTY+UsKP3ZOOOmD1NI0Z2EMNtTQgKAWGec5qeRVl3\nAjryTUMvlM94AXUjrjrTAGRt2cjNXpIQoGOmKg4Vvu5X0oCxEqhpH5PAzScsWXbg54p7xv8A\nM2Nobg0wNz8uc9qoQeZ0DcbaflWV2I7cVGV3ffXmh4SqbMkD1oEKyn5SOB60KPvNnH1pOdi5\nPHQKKeq7+o49aoB5YsAoGe+aUkbgrL1pqMy/KjZHfikKkR8Z35z+FMTHKWZWXtnA9qGU9AeR\n1pzbSw5wxGcVHGTbykKdzsOppkk32gLk7Ao67aN5kkyOQOppig5QlQCOuOlSx7I1OP4j0oQA\nq/LuHHpTY87tpPzHn6U6I5Vhu2leeaFbzG3g4BGDWhMmTKVVeDupGZioAIDZpn8IXcFcH8xT\ngQWPGTTQiVSsZz1bFLDiNcRrhuuDRuidQG4PelChhuifAz3rRCHrmT5icmnbd0bLjC5zTGYD\nGzr3pd3mA5PPZR3piHr82GUHjgmrG4qykn2qNZG8vYRgUqRlly3Qdu9PUkczN0JBGe/WpApD\nAEZOc02Nd33gN+eKWFizOe68HNMCWTe0uQ+F74oUs2QB9KAfL3MDkfrSGQv6ggZpiZFvb+4K\nKbkUUWFc/RVsdKQMQCD0pdpfnOKakZycniviz6hkZy3KnBpykhSCc05Y85xTvKyM9KBEYVgo\n9+1GzC5xmpxFxknNNWPvn8KAGBeRzUmNq9SacsYXk4IpG56DjvTAj+Ut1waQVKYwQAOtJtCr\ngihgCg5HPFP29DnihVLY9KeooGNYfKMDmmhWzk1Mfu4xUaq3UnPtVCItpfcw4FIfvDkkVP5Y\nwSePao1X5sCgCPYdx9KDGGbOKlZTzk9KRVPXoKXUBgi5449aV124wKlzsXOKQjDZNAEbRtwc\n01l461Y+9yDxTNgOfWmBGsfAxwacqnIOMmnqm1TmpFGxME80uohkhON2OacEO3ml46HrQPlU\n96VgYzb2xRtPXFO3bVz1pN3y57UwE39vWlK/KAetAx1xQe3OaAGsuPek56mnL3B5poXa3PI7\nUgFdgwGODTeOQKeOM8UxgUyxFNjFPbHGetPHy8UjAlBSKvPNIkkXLLnvUzR+ZbScZwOPrUUf\nyn1qWNicoeh96APlH47WJj1QFzuJTj6ZNfN2tW5WQyNwM5APevrf4/aesEjbkLZzhj1GelfL\nPiaIRzMp6r8tUhvY4zVsoM4yp5xWBeR/aOG47mulvFVsBsnI9K5q6zHK+MnHarRmZlzDGYVR\nBs2msa8j8vcT0z1rZuZAgDY9itZ14qSq4ztHXNWhGW0fm/xbePvVQaHhj1x2NaM0ilCiqSF4\nHvVSZdy5x8oGTiqIM+RHlU7YguTyelVZECr8oKgHk/SrM0yTMOSB14qGSNpIi8bbTn7tWBVE\nhmDKxwGUstQtjajIjFwOV/rVtEjnYZOCvBzUbb1Z1Y4UcgiqQykuZJD3B4xikdR/BgqvBxU7\nIsymVcA427umag8tkjBABI44oJGcCNn2/LnGO9RBXUEKfepl+V2ycNjBHakTaiscDp696oor\nSBmyB6dulIMqykHcmMH61O3ywhlG0n9KideFUfdJ5NJiGROYywLU2QOQCJPkB6VKyjzAgAAp\nkkZkcoF4AyKAuNKhcnOWz3pjAEgqOc9PWnKDJ8xG3AxQrN8qqoOOeT1oC41iw5PVfSnGUHPm\nZ5Pak8x2ySnlgHlaRgPmP3e+6i4CiFmXcoyR2zUZXzdh7g9hSs5EZ2g+UG6D+dO+XcAGweu6\ngY0sdpG4BVpkgLImD8/6Ypclo3Xr/OkbEjIu4jaM0hdSNo1Mm8ED2p4jO5uML6etDKp+YLkm\njeLdzuOd3UdqrUZHGpDMQ2M9qWNSWIY5B70r/dOw4BoCr5e3Yc/3qYhsnorZYUnEj4cEdwff\n0pysOdi8jrTVjMnAIPuexoGNZAzDHGO1KzBZA3T2pFXrzlxwaeq70255P8WKBXI8DuAIsZ3e\nhpYYwmSctuH3aFjGAF5CjG40n3YckkHOBigQLnadxxz900jt8pA6ep7Uxm4LHJHSnFflChgG\nPAoGEgG8ENlMde9NOG+VRkDmncCTYegHLe9Lko3yvs+ooEMkzNHxksOuKNxkwAwXjpTmk2nd\nnap4I9aRmBUDgjOBQIj3eXIMDf2Ge1D4ZsMcg0+MvtbP3AcbabtOTuxgdKAGljsA7j7ooh/2\ngS459qGjLSZJwuPwpV3tgA7U7470DG48z5l+Uk4K96N5VsOOB+tJ/ECo6d6coKfMfm3UAIAo\nb5eD156U1W3NuJ2+9Pj8wYDbRx0zzTWUn5sDavJoEMkYtufZvHQ4oUBlBOQf1p6od52gCMt1\np21lUhcKd3GaAGKP3m08qeaOmRux6YpZN0hI4VvbvURyu3HLZ5HrS1GSMjtx/EewpW2SZ3HG\nTjOO9IylZNwHINDdGJJY0wEVPnABICnr607n5lRQEFJHt3biSnoKFRYyQH3MxyeaBCARs5Y7\ni2OcU1uF4P7vrtIp23ghWwxpGVsFse1AAjrIylVzgYBPamKNiSBn5Zsk08N5dvkDIzTWHJBG\nBjOKYxV3Ox+XPGPwojx5mGY4xgUKWVeuVJ2hqVoznJIIAo6jsI3l8/xAdxQ23buxtGOB6Uq/\nu48on3uMmmvG2FQAHmhhYAdqtu644PqaNrK2XAEo6/ShkKqMAlmPJpWTbJkDPNIXUFxuIdNy\n/rSqqbvK+4rdBShj5mFbIPQntTQpViWO5u9LqMa2cbVHyqcEmghHYe54oXHlv79qam4J84+l\nMQbJMMrEjnj3pw2xjgZ6ZNB3McBtp9TStGwf5emOcimIapKqcfxHpSup6Z+bHSlAQ5J+/jgd\nqRv9YAME45PpSDqOYs3zEDkdKbt6Dv7UnJIzng/dpSQ+XBwpIGKBiblUFWyR1+lKML5eTuDD\nINKpLRtjC4bANNBdn2kBk6ZoGIxBkZS3Q9KazNgoQBz19qdhl8xAMBj170hVGYKxyQeooJYr\nKJJlwSF29KazBnwFwopeZGKZyOv0pjK2373yn0piQ/ySFYg474NLtVwW6NjGO1MYAt5hY7VG\nDmmr845OBSKH4GzcMFu9N8wLNuJ3cYHoKF+8dnI7+9NZSxORt45oEP2sp3N0bsKb91twUsg4\n20o5QKTnilWb92Pn2rnHSgBNzNIf3eQR+VAUYAzyD+lAZ1G5eBn86ThcueWPGB2oGO4w4Q89\njim7AIzuznrk0NGu0Ybbjke/tSHLfe+UHvmmINo8tCBkZ696SSRM7TlnzT2ZCoHfoD60kjYY\nhVBZTgmgY0KCrAttz2pVjKsdg3EDPXihgHUb1zz96lUM24oSNvBPsKQCD942Dxx+dHIVc8gG\nmxgup/h7j3pzKFkwMksMkY6UwEXEW4g5DdqU7FAV85/nR8zZBAJHbvTHf5dzDntQwY5mzhgC\nB2pNyfMMYDcZqRidxHQHpUbDj5Rhgcc0iRjKqptwQy80u4YwD15/Gn8xrtk4Y8Y+tNkQCMgc\n7Tk+tMAVt0eONwP40sG+OSVd+Sf0pFXa2NoO4Z+lCsrbzjigQLk4U859aEO1mYj5TwB7UgU+\nWMn6UPk4ZBwevpTRQYy27OAOgpdoZvYj9aG+9kDNKMeYccjrSuA35W4bIYDik3sy7R1PU0vz\nTZbhVbgeuaVVZWKydVGdwqhXEDfKeMnsPenCNVbdu5Iyee9JGp2uCN2fuik2rtCgENnBWoGI\nu5t3OTnintGF5JA70FV3YxhqbhGYo3XsaAuBbdG27dye3Ue9KrkKcneq/KuR+tKjvGvHPYt7\nUKQ75JBUjJ9qYr6gqFCoA+duTjpSD5cgnYSetDKvmZLkenNB3RtuwG70xsYrfIVPzc8EUD5V\nXK555p0mZDuTq1RBv3hVuoFMkk2gHKckdBTWYH1X1FOEZdQQwBHIxTW+Vi2cg0ihWIbC/wAP\nFO3CRmC8EdCaYik7c/NTvm8t8p8x/SgQ3a21dwyc807bu5JwOuKTlV3DoRg0CMh+mRjmk2IT\nLSNjpTlYxkquDx940jSZb5hjtxSqpXI25PakOwgceYSQSO7DvQV3K3GF7c80u99xBIUdwB0p\nu5VX94SBng+tMLD9zbVwtIGVWOeMjrik27ldgf8AgNHJjDBuowc1Qgh2iTc6Z9qVZGw2Rjmk\nwq4UY5o3Kv3j16CpHqIMJ8qj5+p9KJJAWVvw4pRtVWTPUck1EnycgYA4xTAlLHYzZwTwQKj2\nqrZUcEZOaVSdw3D5hzxQcMhIPO7mpGL8pXA5XqKQYkkwGyMce1O2qykIdp65prNtxt6noKoB\nynZHkjn2pGboRwTQP3cuOvHI9KBuyCANo6mmA37zeg6U5l8sKD1HcUBRzjknmgM2Npx9KAB2\nyQT93PalkkOwkDFNCllC4wetK2ZF56ntQJgqhnBGCSOaaMP5gVsAHn2NCoI15zjODinthQUV\ncL1+tBI2RS2G696adqru+6O604hj7L60zaXVSGDAj5hQMdtyuV+8e1EbM6yE/e6CkVsqMHaV\nzx7U/B2hQu5+o+lAAFRsYOGxzSwSOsjKowwOCW9KZlV+YnJ6FfSl4VmUEtuOSx7CgCTyyhYj\nbUbTBsbBgdxS7l3fJ83tmk8o8kgbCM8UDQrbWU7RgnjbSTKwA3tkL/CKURsV6rtxwO/1pq5V\ncZzgZPvQIThkBPyKO1I2GXeCdvde9KwMnBGC3NK2FjDr82TgigBMhmGF6cikbO3IDdfyp33D\nwcrnr7elN3H5sHcpP3aAA93Y5XpgUrKUH3cj09KRUEYYKDjrzSrmFueWPTNPoAq/PGwYEZ70\nxZF6dD0zTm3p/rGANAV2bO0VIxG7ddgpxjXcWXJH+1QxZVweBmkX5fvMCmc59/SmA5+Y1+X5\njxTY2+Yqg46EmpP4jng4yFpqqdh+XOe4oGIynaSOnSlXLMASOnHOaGdtoB4VRx70jF2Xdxn0\nAqQH7flJBJUdqZuDSAkZUelKrMse4KRnpzT2ZyoVV+TGWpiEbMmMnB9KiVTuBC7c8U+Pueh6\n05FO0vuw+aZJF94nA2gcf/XoOGwV/EClZZBIWb+LikK+Wu4D5umBQA6SMsBghtvPFIf3nQbS\nvIpI0RUO0lGPO70o+6dzP9KBithlBB+bq1N+ZeZGyvUU8LvYbjx1xTQuWPGPQUgHPkuGUZUr\n3pJG2rGMYJPIzzSsi/fPHY80qt52GVdzL39qYC5cgKn3Ced39KaqpG5C52L60NHgKUIIJ701\nl3KuOAetAC/eU7V+9QFbgEYB4zRtyDgkKOKXCrgE7vagAZTJhSuSOFpfnV/mUDjBqNi0YK91\nP40K26MPgg559aAHeXhirD6NT1yrbOp9qiDKrEMS7N6U4LlgqnJzyDSAAnykufmU5Kijcuza\n+V3Hhe31pTHtY44Oe5poPmLh8ZB60CYrHayHIyp7d6UbGZtvJJzz0FIjR/OwBDdgaQBmbOMd\nyKYxzN8oXOT1oUlv4ugz+NM2sNwxyRnjsKV1CpGD8rHuPSmxME2ybsZJxmlLbVUZyCKdIoG4\njgY5xTGARFUcd8e1IY7k8KfrikXmTGTuH8qNgKksdo7UbB1D/wAPc80AAUrls9OgpG3swfAH\nFJzkcHmhoxwD1PH0oEPEg8wlf7tMUhQCTh/WnnasagDDZ203AOWP3RxQAu1S29hg/wA6OQB3\nPpR5Y2dG5603a/l7CcDPFAXH8RsGYYb+6KaTtYMq49aXaI2OWB449aGYFep+negQquFO7GG9\nKN+6PAb3+tAjMkm4jjHNHl4jyB82fu96Biq29QMcd6du+UkDJHWo4+Cfl4pxbqVHGKQxyxlV\nKtxu5yKXCqp2jOKYqnapzvX0FOPOPrnC0C6iqV+Y/wAI9O1OKksmw9eregpNoWVgzYQjtTSd\nuAAdnTPrTGODAu235lB6etNZgjHCZJ/SljJ8sAJgZxmjlsnOcHFBNxd6xpjZhj3pY2U4LMfw\npGGOWG7/AGadCw2bQdoBzTQxm0qp9Pen8ybeMFeeaPMEkhyvy4oY9Pkz754oYCFjKTnjnt3p\n8eVZhjr1zTflVdq5xnkntSb927nI6VJI/lVHJJBwKM5LjJbH5U7HlkAHqMUmGRggOD39aYAv\n3QwkwF52mhdzR+YW3mk/5Z4bHmg8Ie9Ct5PHJB9RwD6UAOYsJF245/Sl5Vju55/yaRSPlxkt\nk5Pahg3zktnjihAOKjdhn3A8/L2pzfefK5LcgA1HwyxAKenJqVs7MFsnNWA1GDYKdeh9qeg8\nza8o5I+764oXaT/d4yPSiPAwsmQe30oAVeuQnHYelIrbYzgbaPNPlso7HIxS+Y7wYYbQaCWD\nR5s0UDHOSQacqlAAUO0dyetNeMRqPnw/p2pzfMv38g8Z9KbEIVJznIQjipYUGFXBG3kNTVUL\nFgScKeBTvMxnaCc+tMYSlVYg5LHoacN6sgXac8NxShSzDC7jimOjxgDHzg81IEq+WjM5GMcD\n3pmVaPZgnceD6U9cEZdcUsQEikDrnNNFEDMCEKElVHNNMpTkHcD0p5VlkMJ2gdm7VH5fUAcA\n8UwB42jy55BGM9s1WZmCKM7mB5NTysCxQ8Jj9ah3eWuc7h0NAA0hVQ38W7lKVlVsux+X+76U\nrFY5A7KTxximPt8z5WO09R6VID1YiNQqs4HJqHzJJmbcAO4Gegq5bOFjIxlicLVKVh5nJy3Q\nrihAPVVYgg49aesvl5AbnvUCEqpY/dHRaljdWUfJg4yTWcii3ZsNpDABOv1qxHu2EkjrxVCN\nky3Bfjjmpo8bQu1oyfWhlous/wDpI5JZhj6UCXy2J3YAGKgdmbawPP3Rk9PemOxLbWxx1/xq\nALSdyDlgM1Ju2p8/rxVMyLtXAYD+8KmG5sguHBGRTHcsMflOW5PcdaYIycKcCPvnv7VR+1Oj\nrFjIzhj6UsjhVYAk4PDZ5oAttNJCZCmE3fdX2qNp1Zg5/dq47VCV8uAhjkMM59KiZkyuBkYz\n9KCepaKxMAXbfEO2e9MlXfgZwP71RuAflPHfjvUczExgHge1UBYjkfht4J+71omZ18tUP3hu\n56Yql5m3CBQvOTntUrMF2kNwx7UDJ5JnVdvY88VE9xiQfOzEdOOKhnYIjZJOOnrTdwZNynaV\nHSlsIsLMpbeq7eMeoPvSfaF+Ytlm7iqzSvgqOR/CRSNM8kZAThfvNVCv0JvtCs+AcSN0X0py\nzYyGx9R61WDKxLgAHGNxFNOzy/lJ3Z60ASrI+3aT5ascndzT4ZNgHzYweGqtuL7VGAme9OkY\nMxAOT02ipKJWkKsJR87E4pkkzW8hZhlmOBjtUeWLgbShUUqxt98rvz+GKQDhcMZiCMHFPyzI\n+fT73oagDbQNzDcp5oaf5XCnKmmBJtD8LwynG6nBnbhhnHU+tQqfMYENhMZyP5VC5aYjLFDn\npQJk7XWVJG4Nn0xURn8uPDPkt1pN2WB+8oGTj1puFYkYzzSAkRz5agLk55PtTt3ys2OCcYqF\nYXVGO8/SnIqGElzjHIqkK5LG2ATjaQPu+lOVRMpl3EleMCq/lvhtrBgSBkdqF2wqxMm2TPAX\nvSuA6SRmjwoLc5z04pgj3MNi7lY8D096keUr257/AOFORQfnwcj34FHqFyJoSshJG0g5z1py\nxqs6FcyN1FIr+cpBJ3Akj3qZV3LEfuc9KQXHSEtmQLk+woW4eSIKC2D/AJxVxowzOJC0ZxjA\nqjNtVRHG49sdqoNyLG2TkkjPAx0qVWJkZmfbt5NG87QoOSTyDSeSseT94E80wGs48xlQltw+\n8tEK/M4C7vlwcnihUZThRu/nSKscTeYScHqnpQA1WV9qo5O324HtUm5XfzW6L6Ukkm5QYk2q\neS3tQqtuZeoUc8VIhU+RiQchudpoUfIWwPUsKcq+fzyUxnApvltLGQhCbeSKQkNV+fkUcHkm\nm/vBG5wpLHOO+KlRSFQ7dzdx60rRESZzsfrg/wAqCiKMfeDDdx0p/nLGoU5k3cA470K0kjn5\nQAOR70+Nm5DgDd0poRCqhbfBYFlbJ28YpSdwLNuPOadHHticMwHP3qIQxUOpyq9GqhjAWkYS\nL9zGdw7H/GnNjcAz5VuuOmaFjby2YHA57dabHtDbWyVxkt6UFIcr+WzHbk42gmmL+6VVU+a/\ntT12lW3ZLIcUbCuAoDZGcg9KCRmPmYpG2QOVPrSLJ5mwDlduC3oalWRgG2liDxx3pvlhVJPX\n+XtQMb8u1RyWxz7+9CQorF9zMR+lP3MMELyBgZoQY3qOD1zUAhqwxyK0gbLjv608IxZmYbD0\noa4Z0WNl8sYzx29DQy+VGefN38lvWnqAQEW8gbJ2j15BPpS+e9qryvkux4Tr1pm8+WA42gUk\nbCaA89/umnYkcZXk2b1yepOe9TqdzYRdrAZLE9fWqkm7KnOEX7vHNS+aXBO7BXvQ9ChxnM0a\ngnO3oAKnWQALg7yfbpVdmKxmORhtPTApOVYNuYbeN2KZJbaYRsylQ56fL1FLDIP7u/b3NU/M\nMigK4G44LAc1KsnkHaGGAOP9qgZajBJPzYwN3NM3hk8zPU5qFZDyAfvCn/LHlecigQ8yLjeu\n7HZj3NSKw+zgjaOeVPrTfP8ALjC53r1203dHJw6lXb0oAmaYIwI4Pt3qVLho2BIyx4xiqyyF\ntgMe5h0xTwoZiVk3c8g8UxWLUcohlccNnnNTxXSqAcHbnPFZokOBj5UzyB3/ABqYZWQEHavT\nFMVjoYdQjjYuT8oHCD1+taNrqT4D7tpUbvrXLfaFiXYnCg1Zg1BlXpkD9aCOVXPSNE1JmjDS\nN8+OAOpJrd0/Vlt5EwSHz8y15ppurBJkHzAsMA+9dJpt2szjLfN6da0jKxhOPQ9l8P8AiaJl\njiKh2Zzl67XTdSM1wVdfl7GvCdMuxEyZmCndk8+ld5pviYqUZT+6yM12QkpGFrHo5Uybgwwr\netYOq6OIZPOj+WtPS9WW++TcM/rWg6pOpR+R0FNpMakcWoJkIJ6VcgbdkdcDNSahYyWM7Hbl\nD3pkOcZAz61hJG8S3Djbh24bpT1kVdyEFiBTVXZGBwXJ4pywlmYlsH7pFSaDWx/F8voKWGFe\n/IpzK27YTnbTo0Pc45zTEV3TLswG4L2qDauA+cVZkjK7ipxuPWmCM7sMPlx1p3FYqvu65DCh\njvUZbAzyO9PNmgkYo5C9QpNRyZWM55bPFMh7j0ZI5BzuXB4xQJDjaRxTN3mR7OA3enrCduAc\n8VQmRNIyHCrljwKfJIeCeGxgmhVEK5OST+lN4jyxFMTF83YR8vzf3qcJjk4X95/dpPMBwSPm\nxTofuthdpPegkcqkqCWx60sa8nBJHWmsnmbTuGF/h9akXavGCPxqhCNjYQByTkLT1zIgwtIk\naq5weeopwcswwMeuKoQeWJJcD7uOtPX/AFZjwDjoaQbgzKRx1zT4sKuQueapC6jY/lYKVA9T\n61NGAoJUcZ4NIzYYgYyO1CfMuEOD3HpWjEx6kbzhuT2qQ5WP5FxJ/epVYeWW9DtFEe5pBggL\n3zV2I1JF34G/knmnSN8uW9eTTFQsGGe9PkUMu0jI70ASBGQLgAn1pzKWYgDGeuKidyse5eg4\nxUiTFc/zoHYcygnrio84+ZuT0pAxZsYz70vlhVZi+FzQSx24+goqHzP9qigZ+jip3I5pFU80\n7cWjyKFywweK+LufTDPuktinr83IBpUGFPfmn7sdOlMEhAu5eOKav3m+XPFPHzZpFyMnNAiP\njGM89aVTsBpfL+gNM/i9aYxpUyMOwqVVHQjmk2/hTiO5NAhFU9adsPXtSY+XrQzc4B4oAXB6\nA0vHXpScnpSZ3ZGOlPqAuDtyRmmrnk96dzjrTVUjmgA29SaGUNwDT2G7jOBTT8oOKBDWycL2\n70cNjGSKeVXbxwe9CqdppiG9toHFGD7CnBd3tSr0IbrQMD97mhuGHekAyevFOOAuQKQDed+C\nKReDjrTyp25HNIRuU8YNIYbaYcjoKdt24z0pyruGc1QDGQspAqJTj5SMVO35Uir82TzQSxFX\navvS8KPU0c96c3BHfNADevNMxuJ5zTvm5+tJt2kY/GpY7hncMCkKkLzS8feHWomkAPX8KQiW\nNvm61ahUdc8/zqgr7m6YqzA3U56UAeR/tA6astr5yj5mTJGeOBXxz4qiZJnZl6ueO9fcnxr0\n83Hh5nWLJA+9+FfF3i2EvcSb1GFOdvvTDoebXkjAk54ArnL6E+ZySSec+1dTq+zCDqx7iuev\nJPLyrDJYVoibHN3+Vk2YLp13CqFxbyHcAdueQK3ri3O3ghUxyKrMQVCkbuMVaJOfmVpIpNxE\nZ/vCqKyMmFJwSMFT3rVvoVtZN27Ckcr1rKuI/lALDrxiqRBVkx5YHl9TzVeXEcoUghvSrj/M\nqhRypxtqG6US3Cqx5Hcd6q4ameiHzG3N+HpUc2epz/vCrV1H5ILAck4OKgZSrBc5OM7R1xVI\nZW8xTGu35A38NQbWVt/Vx0UVOVDtjbhAPu96ZDj51Hy896rYBrM7OqtgcbhxTZF434AP90Ur\nRkAc7hnimtGoOTw/6U1cBsymRQ23ntUWO+MnoMnipG+bPoO+aj2qeQGBbgc0hDGy+C45Xjik\nZnjwOfqOtKrqmzzAdnRuetLIu35lHy54z2oERsDtYDJbHSiP5VVc54yTikzJ5jAMCMU8q3mE\n9dox7GrGN8xmkKj5kphQjPHyt605QVXOeDzjvUbZl6tgDtQA9dyb8MPm4C0yP5soRu7H2pvl\nleWwR/WpmjKMgB46kVNhkEahVbAyRweaPLTJYOAuOVFPkZWVl8scntR5fk4QBenSgkibHOGx\n6ChZm25Cqx/umnMpY44IA+9Ue4cn7pxgrTARMNuZ/lbOeBwKGZuo3N6UscRjYdgRnOacAF+Y\nnC9yKAGDKxlV4J5JqNtysFyduM5FSKudwz16E96AzR8YAYDv3oKGBV5YA78dM06NmjK4bgjs\nKb5e1SMYbO7j09KImUM5AO0DP/1qYCKXMjKATnvjrUaOPLxnBU8ipo5CsR9GbgVGzBZgwVQg\n4OKQhWQsAMZU85PakVvn2cbgKViu5guSMZFDD5FZcA0agxg27fmHJpvLNy3zDpT5PmAPbOea\nSTlgScj+6KYgYs7klckdj3qLb8xZvlYc4qZ2V3I5zjORUflttVSylG5/Cl1EIyqwVixC9+ac\nxADZUsByDSNyzDYoQDgUu4eWFJwMcetMBI8CPcTgnmnPllyOOOvak4aPIO4/3aayZG4Hj+7S\nAFjBXJPbkU19rDLNwBgbaXd5jbCDnHNNXC9Fx2NIaHK+/YyruAG3P9aRlVFLNkjpil2sob58\ncdvSkQHgluCOFNPUQhO5QuNqk5xQ2C+M9s89qGXJJB3EdR6U3duBxz82MmgBPLEZVsli3alX\nZu+bls9KJFIk+ZyDjjikXdIOm3nmmMdtPLqT6bD2pdrRKCed3FAV2Y8AqP4hTiuWGPTA+tLU\nBjYK4zkg0ix7dxxyetHz9Su7HBxStuZTg89BTHoCjHT7oHFMDbWADZbHP+FLysQVh0P60jkq\ngMagEHLMaAF2mTaMYOc9aRl3n5sbd3I70MDu3kj5hhSKPLIwfL5IoEJMoUtJnzFb+EUr7WUd\nVbH3acqqqHOcD+EUseyabLDChelAgbHlo4bKdCvvTW2hhztXpnrzSbEkjPy7VU5GelI0jN0w\nB2oGLk8rjvSsojI5yT2oYZjCDp6etIUC4Dc/7IoHbUVUGcNwn3ttMLBJTgYB5+lScMNoTafr\nTdqkliBv6UCYjIqgE9falRg2AvNR+YAQejZwTStu3YTgnrSGLGA5OPmOaRc/PsLFs9Go2xL0\nQhvVTTl/ds3zZBXpTEIFLKVc/P6Y4pFyp2lRjsRRHlvUEcc0R71dhINmOlBPUM/vS+/dt4o+\nVWBxkjtSqw8s7kxntTFYlQxXanSl1KHqRuIUg9wKjUsqtxtPc0uF52j3oxtX1JHT0qhiLjJD\nBiQOGFKvQIrDPX3pd5KA4xxio1cD58YbpQId5nysdmSeCRSsxKIuMLwQacrMsuxTuBHpzTN5\nVjuOcHhfSkIfuJ3HbkevpUSkbdzLk9sU/lFZQ3JGSPamDPlkDpjjNIYvyt8mMSdfpSqwbLM3\nAGDimSZj2jrIcZIpWVJFxnb9KZPUThOEJxUisQwHRepFMjHbP40ZG7BbJU5xSGDN5qlgMYPQ\nUeYjj7mQOTQH2sxT7pP3TQuFXOMjv7U9twBgrKPMOO601n8xtzYGBT4/3jGPhQDnLelJIkaS\nEKNwPWgZG3zK2w847inM/TjkjmnfMqggbhjoKa2F2hkz6kdqADc6qVDZOOnpSFv3eQec/N9K\nXeMnA+akDJnJXPrQIczndmMdulDFt3XDcZpjSHbypU9vpUkjBWVWG0sPzoAYo+ZyG+fHWl4l\n6c5GD7UNhhtPAz270vEbk5ITOABVITEbLDaT9KUIJDncPlFIGCkgj8aSOPbGcjJzkGkFwVh1\nfkgflS8sOoy360bsI7Z/4DTZGUhX24BHSmArqY8bjyOTimPhk4OD1A9aGy2NnIJoRQpO/wCY\nZ6UmFh7Rl2AXrjpTZJM4VfuetPbDZA4UdMdaZxjk/N70DDg7sHNKrlcgpxt69qNrLuKkNgdK\narfu1Und60gFwHUbhtB/ShP4xu3DoaRm3KN/y4OcVJgFTtHHUU7gR/OWLcjFP8wbB0DMetMR\n28sg8D0pVhVmBc/gO1IAZFUEDrnk04uFjIxntnFHl+SxVzuY8qR0qOOZtrbh8vQfWmId92MY\n5PejyxIUUHnqfShGOxuMHGPxpqkrtIXHZjmkJjtqkY2dDwaGUmMsOCeMUNzt2NlO4FKyt5gA\nb5fSncYxsybNvBHFI33SwG3nBY808fNI/YAdKjiA2lWGO+aYhx7BRj/aFJk8Oq4IPSgdy3Oa\ndl41JUZ4xTBjSyyyEZO9u3SlO5WG49OmDSYHl5JJY9TS+WoAI+YdOaQ7hgLuJbOaQ5XBzlc8\n/Wm5VoypGCp4NOYldvYf1pB6jtzbmRRgkc8UjdEB5Ipshl2nccPuyD6ClUlH653d6AuGBubn\njtSrIok2HIbGc0ijCkk5PSk+Zfm25pCFVT5fydM/epNoY+WvTrSgbUDAHJ6ijzAuCTjnkVVx\n2FkT5SF5fHalyMAgdBTZEWNS6yZLdhS42wjH3h1z3pAN4ZTkDJ/Smqx8shxhugp7Mp5Py0xm\nDN1JboKYxdr7QOSe5oYrtyF70KzMAN/fmlU7Q3r0xSJuM8xVUHrk8YqTYVZc/MT29KjVWVAg\nGDnOacyssm7qO5NMYif6z0KmlDb3kwduevpSKwXLqMnqRR/ED1UjOaADIXgcYFEecZzgEcmh\nvlXcPp70McSLtOfXI4oACrLGHHIB607cFAfv/dpgYbipBIY4Apf7yHll4pgDKWUEHAPUGhmW\nTAx83QGhXAfaGyP71O3DBXHzqfzpBYThpNsYwB1z+tEn+sG0YTGKRdy7iOvel3bs71KMBQJj\nWOwggc4wabtZcENlj0p2Cq8tuGM02NWCgFvlPf0o6XAdu5KsoBPBbHel/h2nkKaTcx3KWyi/\nxU7Pylh83HSkmFgkKswGzCn9KSQhWA68dKEVZAGLcdlpG/vkd6pAJIqj5icHpgUz+8FTJz+V\nTSbfMAPIYULC/Kqfu84pICJcqzjOCFojkPU/MvoPWl2jnjJI5oCgmTauBn7vemA4K3B4bdwc\n00/u2wBg0IVTH8WaXd14BHTJ7UAN3bpG6kYqRv8AV4K7geppPL8tWO7OB94U5VPl5L4TP40A\nRlgvy7cj0NKqopV2Yk+npRNjcuDzT2xuHzZXvQNCSttYDaTz0pWRY92RhMdfSnHbhjnAzjdT\nFYSZ3Ej5ehoJbEPyqADkZwG9qlVtox17YqPhcAjOelIxMf3FPXk1JQi45Q8nPWnorK3LDFJt\nPl5KYOc0u75So5HUmiwgZgOPwBHSgsI2VVyx6n3pVYBh8o20iuZGLyDYAcbf60AIzeavygoc\n8g0Mw3H5eBxSbjuYLyrdDSSN94ZxkUwHFiuMjev96m7gq7s45wacZB5UXBOOoFN3N5m4fd9C\nKGT1EXDINvzFqXzDIwDADHBNKzb3Vj8nGPl70n8RDryOQRTKDlnDHjHAoaTJJBx2pCzKm8Dc\nw6ilbZIyHDZPVR2oEEa7U6bueafNjKso2p/dFNUiN9o3MM5NSyLtO7G4N0UdqQEKgbskEndS\nH5kOwcqelOwVyp+/14pjNkDA2tnrQArYkXcp9tvvQ7FdpDcqeRTmIjkbHJ7UmfM5VTu70gFm\nKMSR8pbkmkO5VORnjihl3NnqOwo2nDK557LQMbGx3blGMcHNK/ysGDc05iwYgjj0FNZlxgg7\nepzTATa5YEnBHO31pzssjJzt3Hhcd6V88j8vXFIzFkCsNoHQ0wYv8WcfN6CmswU5P3u9KuVY\nDoKF/dq2cMvXFAC8qpwDlupprszbQeQtOOSwyNvf8KNyckjAY4HrQIQ4OVwR3NOKj+L+Ede9\nM3eXnIJPQ4pGby1U4JYjHFAxy8xgAeZuPOaMhm3NGMLxgdTQykqCRhu9KMqo+YKh9aBCfwkj\n5dx4BpGJ2jj5uhFL91WLYIB+UU7cQzOF4Jzk0AJkM20Lk+9C4GVPHH3abKw3IwDOrHBxSuET\ncSWKg4xjmkMVcqpIbJIwPY0iyGaNGPyv0I9aSVCSqggAcjHrSrubKHB/xpisKZAsmduSBSFu\nd394dKMbOnU8Uxc4BZep4qQHr5inOPk707aWm4PzAZ/ClZShCHkHkmjcVPDYPv6UIBqruBQN\nhic06KQliocI2ehFI0gjVj13dMUGNW2jG5gM5/pVC1HFjyCPmz1HemRsMZAxjrSsxmJIOw/3\nfSkfbuwFLN6DpQGpIFyd2MD60pIEecZGaRUCRn+I59aaNy5Vzg9RQA9lCYwSVNC/K21scUi/\nvACSI8djTlX/AJaY3Z45oCwMqELgEn3obDKQf3YxkKKTcRAEP3s59zT+FGSPmb+H0p3GITtY\nYUEYxilWNNgL/KKbu3PvYYxyMUbSVznOeimkAiMACC2B6GpI23QsowMcims3mxKSvKtggUsi\nszAMNik5piFH3UOPmY4zSt8obvtOC1DPu+QcAcgmkjjDyAM21D97NIQhZnKgqDznOKly00hG\nQFYY59ajm8xlIjZdgbCn1pyufLG7qp54pgK6iMLjscEetEi+XIDjd7ClkK7QfvMTxSsrrlgc\ncd/WgYihuQZB1zz29qePvHcoXacEZqMyFvLIG1v60vy4ZmOGJ5FAh8n7zbtYDBpjSMrZG2T2\nHNK/ypkruU8YFEZYtwoVAMYoGO3YXeBtz2pfkT77bgefamxxtg7jxnpUjKqjy8ZyeSO1UibB\nvGxSy7vekWQbE2jdu7GkVU2ngrtPC561J5u3BVAAAOKfQB0e442rnI49qeJW4GPm6YYUxd6s\nTu5HG1fSnqxDEueGHekFiVMqqsPlO7n2ps7+ZMxCMrf55ohj2qTuxkdDTWLq453DGPcUhD5c\nKACd2RzTRs4Cq2/HbpQwOTkZGMZpw3eSoB2nO0EDmmNETHbyw+UdKiEm1QqnLMeR6VK6rtIJ\n8xF6/Woisccg8tdp6kUDItu4uF5yeajVSzHIxgfhTpl2528c5pjSMUVnYZY4KjsKCrAZSigl\ntwPamqwX2LnnPcU2RlVC3odoX+tRhjJ8xHyilcViaWQBtoz8vHBqItsKeYeR0pflbgcDrUbH\nao3ZyTjNLUB6yPJJtxjPaljk2qV5yCQe9T23k7CWG1cd+tMt1WNnYAqOu01LGWFAjIyBuK5/\nCk2t8oJyc8c02TGQxYZIBxTGceZuj+8ozR6jHG4x8sgzz19amaXaCrDB7EdqrNmRc7csO1O8\nzy5AVyH6ZPakXuWIHY5cjHai4V1jzkbSe1U8+WGG7qakDfuV9fQ0hWJpH242Y2d/Wm+cIcN1\nTPftUA2Luxy2eeaGYNGw+8fT0oAe2+TeSTw/Bz1p3mfeU/ePPtVZN+1grNu6n0qR2ZCcHJHr\nQT1LVmpuctnIHGM8g+tRMrRFlJww5LetMLGNd28oepxUX2hpFJKBl/vZpgyZyrMpXJXHPHek\n3bY/vfP2piSKy8HC+9Mj27s7tw96AJ1ul3bSCzEdabHMPM27S27qT0qItuLBQcdaI252ltiY\nzto1YMecwb2Q7w2QPakjAZUG/BUbsZqOGXbMzONwxxjtTRHlXbGGY5J9BTEPOGHzHAY0/wAs\n7GjV1zjvUPO0MFLdqWJ1RXLHIxjGaQxjN5a4HOeM5708qGdS3ykDkLTdybAAoYdQPSlXG3c+\nQx5HPagBy4HTduGSDmlaRwqs2RnjOaVZmhfdH91hzn+VE0zTYdyAq9dvSgBjbo5k3IeQQGx1\n96TgA8/NnFOGEbers5Y/Lu7VG0YMXmFSz7sGnYLisBHypPXBxTo9kLAOSQDnjmk2rtxuyeue\nlDRuzYUhlGDiiwdR6jhip2A9M+lReZ/CHU89cdKHQLJ82VBHAzxSvEq7W46cHFFhsIyGU4PO\ncE0saneoVvmzjJ5FCxouCGBDdfWrMNuu4NuAA5x3piIAD5zq/wAq9+KNu052g9lJqW4ZmbAP\nDUjW4mRiSf3fIGaVgIlba5x/rDxTYwV3KG2p0/GrCxCSbafkOM5oW3EinBJAOKka2IfuMAmC\ne4qxbukbbmG5uoNPWLDMSMPtwOKGhVYRjJ7HApjFW4MsuScZ521DyzFlThsgcfrVoIJEwmBx\ngvQuTsQqVzwGFUBBDE8Sjfy/rTlhfsAeckVLsWGY5JcqcfNUjRqzBui9WoEVht7nDE/d70kk\nEeM7SDnp61YIVY22KGA+YHuKczBmzI3lgDPT9KkZWeIxrHz8ueABTzhuoO7ODipJGwgP8Pv2\npysFUYcEdSaZPkRJCBmIDc3Xr2qLy9qlicx5x8vFSEgylo/l3etImN3zdQenahhYVSmNoOCx\n49qY0akhQwL571YjZFXJTf8AN1xTi6SSZWPJz34NSMr+W0cJRsb85Bx+lCqrsokOz3qdT5zZ\nK87sbqVtpQqBvYN83tQIqlEVmXIPoKFgSSFdrYz/AA1aEm2bcFV1XgnHFAXzCrooA6+1O5RX\nZQ2VUsFAxg9M0fZzHBtXD5PPHep2nVpAV+UZ5GKeu/5m4K/TpRcDPjhKyFAC7Ac+9CRShtuU\nQ9RirMkO/ac5bPPP50eWiFtihsNignqVB94n+L9Kd8yq2V9/rVsWqqxPUNn8KiSEiPYPmfuf\nagCnteNRlgS3TPvR/wAe6rubc27bxVv7LvkDkgFBgL1qA27MwYjbzxxQxoiXDBxH8xzz7U7B\nUYfhcZGODUjW5ZyyYXHXFN2iH5sb/Q+lUNkXllmyzfL1NSs4XIQ544bHFJgMS/VsfnSNujYN\n94N0Hb8qRIkjO0YLHgGkXlU2vktyRR5aqG3nJ60MI1KSK2wYxg0xj4ctI5Cl/wCYNKqt1Zst\n0xTGxu/1jKoGdw9aSNsLvPIqW2Fhd+1m37Qvr/SnrjavPHuP0pkbF33D5/Rcd6eQZA2Fwcc8\n96oQvG15M8gYCf1pMkr5rSAjHIzTTcttIGxAowWI60qx/uVT5cNyTQIsOUZE2jhhSu/lBc/w\n9u9V96qqqu7rwQak8zdvyu5xyB3oAsrI0kYHVxyFHFJtXll5fuPSoPO6sWCtjkdxVmFo440b\nJbJpDBZQq4HyjruPalXLRli28q2fwpCq/NgEhjwKhkxGwRcM7ctjt9aaJZdWb5yzIMqM8HvT\nRMz7HKeWWP8AEeKrhvLYEklTxn1pzSPkl2Uhfuk9qog17dxJsIlxjtjH5Gr9veeVJ5hlZ8nA\nI4x7VhRtghSwkOMgqeM1YgvBcMN3yDp+NUQ0d1b6lH5iFJFLEY255zXSadqRuIcGXiPuvr6V\n5bbTeVdDdgKerZroNN1IRy7izA9FftitYsxkme5eHvEywSKSdpA5rv8ATdU+2NuBxH2r580f\nWBI+c79vJXpmvRdH8TSwqCrAR4+7XTGzOZq256vdQiaDc+NpHcVgyQCBvmXKnpgVL4b17+0r\ncpN94HCk+laU0asWPXsDRKNjaLRj8Z3Ecr09qVpsMr8HPWiaPyW24IHajyVVctyawehvfQf8\nrZJHPXrQHCrkj2qJY8Jkg57Uqt0JH4UkMeFHlnJIHUU1mDbQykLQzheGGW7ClaRl25GR6UwK\n8g3qzLzg1WfzSysqAgcke1WpJPvKV2g96qtnbtDYY/yqiRi8SFyCQalx/D2NIv7tdueTQrGR\nxtwvtTJY7d5bKxGExjmm+YrSMDggjinEj5t2GHoaYuAwxwvaqJYxcrIMDmrAXyiQTkMc01Js\nSEHnFP8Auybt2T6UEkhRdwKn5fT0pGb94dv/ANeo1k8wsxOFpyqVP7vnuatADYEgz19RU8bM\nm5wOMVFGQ0rFQduP4vWjH8JbhutMhjmmk8oyE85wB6VJG25+Rg4z9aFzuDY6etPDCRvM4Par\niAyEYkLFTz3NSMmF3KfmPb1oYM3JPy+3ambVXDfeParEybOB838Xb0NKm1Wx3pithiHGSefp\nUuFDBgu40ybMkXcrA7vl9Kkz/CD81MVgxz0py4j+bP1NAyVccAjinyfu4yWXqelRKxZSTg85\nGKezuy5J3Z4pgR7XypXp3pZI1mDEEFR2p8a7VO4YGabmNlfadvtTAg8tfWin/L70UCP0fUfK\nTjpSq2cHt6UvmfKRigYXkjmvjD6cQ4ZcDIyacxA+WmSH0p/HHy0EibgMc8d6azBWDdqTywzc\nGnqqyZycCgQhbcvWmlwuMA04oFXpQvK0wFxuHFIq7WxSq3yFRyaRTs5NOwxwAZ+ORShRyCKY\nuOoOBSfaNrY25osIXcemKQBsnnBp28cjGKCwLDA5osApBK4NMbKgZPFSDB6nnNJwykjnBpgN\nZtuOKQSZb7vFK3TpmmKCyDsaBEnG2hSUb2NKoJTOOKAvc8GpAXv6HNODLt55NNyA3r70pChT\nVCE4PQZFITtGOopS2Fwv4009h+NSA5c9entS89aQZHuKX+HrmgLh/D696Td8uQMUqk/SiRTu\nB7ChD1I2bkDFPXHekb1NR8rnng0CJmULikY7aRewPNLJTARM96Reh9ad95hxSKu1ixxzSYDf\nvAYHFRNCGPHBq02AoAqNlDLkcUAV41O/k8VPEnoaacDjFSKDxigRznxMt/tHhqZMHgHBz3xX\nw940hZLhwvC5zzxX3x4ktftOhzoD8xRufTjrXxB8RLDy76dcHbvIBPcZoGjyHUEUK5AAIGRX\nN3irNbru+8P4q6jVbf73B4Fc3eKzIBjA9qtCRgzDa5XP41UDlSV6qOpq1IrLcNvB29qzrp/J\n35Q8jAParEzPvsyT7dwyeAtZbeYpJVP3inmtK5QKqGQbXI6mqvMnAG0YyfetEZlJZlaYb027\nuir6+tMuP3cfylSd3ftT5EKt5gPPIANVpG2xguM5NWBG8ZZjyfVs1RO4O5yQSOversybmUJu\nO4/eJqpI6xB8A7845piIJMeXGpO3n7/rTAQJC0vyE+gqZ8MyMBgDkntTG3eWcEDJxSC5E+ws\nNp+fuv8AWkZo9xVxkY65pGhxITtwQMfSg/6PG2dv161QEJVl+TcG/iOO/tUbONxbB9h6VLKo\nWQjdggZFQKCu8BwW69OtIByrtxj5wRyppNpUgOu5Tx1o8zJCZw+OfakjUM3BIdeR6UWDUYFB\n37V4AwTmkEYOQjng5PNBYec2fq3OBTPMLeYpTapPDD0qrCJJDuAY8emKSaNflYcfWmrF8m0M\nzD9KcCsihF6Z70agRs3775hhP7vvUZkbbyMnPFTyLhv3jAgcAVEoP1BPSi4xV+VfVj/CaazK\nuSTuX096czHeoUewz3poA3FSMMRk0AKzbo1ZAS3pmmuqrIMqEGN2Rzg0NjGUOOMEUiKxjJzk\nfyp3ENVd3zM+4HuKY6EYGN3OBT22lVyMA8cUnRsE5/oaAGyZ3kFO2M+9R7ljkAkJYdCalVW3\nbSc980rLtXLDI9aQyON9shA5bPQ+nanFWVixIC/1pGYQtlF3D1pvl7WPOC3NMQ75mRVKbHUc\nelC/Ip6BvTrSCQqAQdw6URlGj+UkJn+IUDEDc4AG48ZJ601nyxVI8Y6mgsOcrk9mx0pQzO2Q\ncY7etMLDSwkGBximsPMACnBqTayqSQoFRlm4ZVGKBMVgAoIUhuh5odSzbQQDQy8B2OQeopJO\nyg4OelIBD80gO0nb1bPSmhf30nr1/Cnbhkxkld3HSlMe3J/AUxDPu7mU856UNID/AAnIGce9\nO8sMzED5sZpm7rhe/wCNIAEm0HK7cnrSLsVj8uTjpTiFaQhue+KbuxJwOfWgpBkFBkZz2pNu\nWIc8EYGOgp5kVW+XlsUxycc8HrgUyRNphwMbSRg+9LtDZHCd6XEmQxdXHdT2prqwZmZc0AG1\nHjGSQaR3ONqJn1al5+feu3/ZoSRdvPAx09aCkHA+7w2Pu05WGSdu4qM8UkcgkYuVAwMCmHOz\nK9c0CFVyvJBy3YUeXhU475HvTlztJzgmiOPoWOF7igBGJ/umnxodpwcg+tNDkOSvKDoKaFfj\nJA5yTQA1VK5DA095izDIwR3FKuGY5O5aQ527fun1oEJkDksck02RfMdjuxgCnjG5FbB77qGw\n24DBbPQdKBjWBxjrx+FKWUqpZdx+7mmRg7gWyF6Y60ucbwM/KfSgYir8oTBz1pcK3qe5oVn3\nLtHO3k0ZC5LnBI/hFAC87lI45puxF3ljg9c0KqttHfPU0/aF+ViCf880Eke3LKOGB4GaFyoY\nDlgcE05W2HDrll5+vvTNwDORwSfu0DHRqA+cZyOlM8tfMwzEMOeOlOXPm8AjikZl2H5uQfvU\nAPYEzZUjpnimhQyjccZP60jgKyndtbvT1LyOSenQD+tLUQ3LSYDLgA4yaViJJimPl6DFIqlg\nc5cZpN3zlcYFAajG3JlTShtoO05OM89qJNm3dyw6e9Ob5YygQ5P8qYxF37mDAEDkelCYZjkY\nBHFEjBlKknNNZhFtx17Yo2KFVznaflehnLYBxuHQihlK5J4HU0NkFdg+SkSL5ZZlLAKMjc1O\nclWYsu0Z4Apvy9QxGfXmmcq4AJY+hpiHbfLyzDcCOvekZlCh1X2xTmUDAYk4PC05TnJ8vBz0\nFAiMR7l68559qRdm45X5ugIpfvEgNnJwaVm8nheXzgD+tAxuSrKpGRSbiXYq2VAxtoTEZOfm\nY+tJ5ZRflORSAdtDYBO44/yKFkPy/KMg0mQmGx97j6UJg79meO5oKQ7cWJwMbegFJG0mfm6E\n9KTcFKnex49O9OTcsh+YMcZ60xDJIx5xKD5R1NBAEZx8wpVYNkdCecetDDao3cbT0oARmzgn\noBSySCR0HXI61GRnJZcKadkouM8dKYhI/K3nOT2/GlXKyEEZB6+1Iy5+VRnvmnbWaIADk9aA\nGxsfnA+Y/wCeaPmVQ3UgcLQ67QpU8jinyblYFsEYxjNPYXUi27YiWPzt29Kexw21iB8vFN8t\nivv6U1VZskDPrSCwbQFBBzmnkeWnDDPem43KCBgdMCms0Sw7GDFg3BoCwq4yPXvQcbjxnHNK\n3yrgjqetC8/eO0d2oGIWC5Rcgsc5pFbfgKcDNP8Am3HGCvXNRY+UFeM0gJdv7z7/AN6hlAjK\nElm3fhil3L5i8cDk0h+VnyeOop3ARV3Lszhc/nRtUuQDxmjaWA52nrSqrycL97PSkK4m35eW\n4BoZgSTj5c4FHljzWOdoHc01U+Ve/NBIbXk3Bhj0poUeXnOCOtSMrKvJ280jKkajJwx6e9MY\nrbWVWClT/OhvlwcbDRIoX5Tz33UhyzLg7h/epgNb945C5x39adsIQg8DsPSnSbjNlRgAc0i5\nOSw5/pSHYZ94Zx8opyxkOecZHrSgM/Cnio41Usctz0GaoBVyGI/SjaeAW4zkgUDK7+MdiaSP\nPmKc9qAsKz7pBmP5RQ53PyDnqB2obc7FtoHYmjzGWHa/DdqlsA5Z8scMR+dG4cg8H1oXJwp5\n5pdp+dcDBoExGJ3AD5R1Jpd/zYySM5zSR/KTuGGA4PalVu4XP86Q7dRV3Ix3Ec0nGGGM+poa\nM+XyckmkWM8jdgd/8Keo7jF+8cDdxTlbPO3ccU6bIX92NpHymk4Gc5xjtR1AJFIRcc7uhpPl\n3K0QGcfNmky0in+ELTuy4XaAuOaOouozd5ylQMDGee9SYXzPvYAGKbgNgEgALjFNUhMLnGao\nBWEm7g7qCrJ8rDeR29KG+8UVgR3NIuBJtLbT1D+1Ji1DYCOOR/do+XnK7j0FPU7twYce1M24\nXAPT1piDcQpXbz60qyEZC846E0pz5mQQTjihQWLIcBz82aRVhqMZIlY4EnXNOC53MoySOc00\nK3yjIHGKcN6wvGcP60hClR5Cx8eo9RTTj7xHTrTTGWwV5989PalxtbB+UNwaoaBlDBlBztPF\nOZi3zOdy9KTnccHgClEZVcK4IbnmgaIyo/vYFJuDLy2BUkg3Djhe5psi7sjv2HvSFYVlXyyC\nDualRvugDdwQTSFyu47TlRg0u8pIV5wRzjtTWwXCNd0S7hgULuVSM5B7elEkTbQgOSB1FL/E\nGHTGM55oEN3nnHOOAcU5W/jKtuFI2Fj4HP8AeHSmK5dC3OB0oEKSFyP4mPftQrKrMzEk4wcU\nPGGXDOSQM03zA0RZfkA/OhgHCqWP3B0NK2Fz69RjmhWHlnAB3DHI6U/dtXDH5sdRQMQMVUjc\nMEelLtG3aT15FBZdqjZxjv3pu9XyMfLjgUANZRvB3Y7ZPNJHGwklDenT2qQZ9QnGcU3gYVj9\n4Z4ppW1Afu/d/Kfkz1pin5wjcelNb5eVXDE9aNqsnCndUsRK33SuOQfyoJGz95nB/wA5pPvM\nNqnkYNEjDv0XrjsKYxI8Lhd249Nx7U8DAO7gL29aa4JVeBs68elBXcqkt8rd/wClIARgsZ+X\n33elBYgqBw2KRpDu27MEcYFKdzRjIGc8UAE2PLGW2HOaZIo81Q3zE84qRUZsO2PQ5prBQ428\nDPUUAD55CnGKaJCzbOnFK2RtZhhDyRSMmYwc4G75fpQIVV2s20ZwKApEIAO4seaTmMq24gsM\nYpI8bvmyqqeaYyTBjZwfuYzTdzMoAO0YyDS+XtYtnjrk+lEjFZF+6yY6igYoAHzLwT1FPSQJ\nlyM7hg57Uxm3Yxyegp2TyvQ+lAhnMjrs4O386GaNmKP9AaQxBdpHUcZpyqqvuxmgQ37rcrgZ\nx1/WhQFkYKc9+tO2hW56ZziovLRlBAwxPWpYD92Bs2En734UEg4YZC989qVnOVwDuU44ppU/\nOCchjTAVSFO3Jb0NINzcHkA80pkMe1SM8dMUshKld67d38IpDE+9uLDB3daGyvJI29OeaVVw\nxDcCkiXbkHkk5zTBiqAuAx+ZulNZfMJQjbgdaEYSK2R+6B/Wlyy7QF5xnd7UwAfM+VbjuTRy\nqg7gWzwPSkGNobqSelKcLuEi/eHGKCQyq55yncj1pVVR8oOV6570mNhGxtx29KABtBJwe4NI\nYit8rpnJznJNK235cjcvpjihVG3JIUnrmk/dqowxznv0pjFORxty2ePal4h2py5UcU3zGViV\nznPWlRid7kE8UCFz8wXO1jzQduA7nNJuZSrY7ZpF8pv3bE465oAVt2ckY77aCvmNlVxgZP8A\njSbUfLbir4x83pSR48rhyzL+o9KAJEcOH4zxnH9acoZl5AfHPHao/kwvOH649vSn7RNJujGz\nj7vrSARuVMg6f3aXcsm3cmS3Q0jNuG0Dp1FJ5eWAOfXg0DBjuYADCjnihZC2cqeDxTm/iZRx\n6elJufai52gUxCAbWDFOW4PvSswWTYp2991LvIz0weMGnRSfKQicgc0AIWYDbtyvUPTyNyje\nMMR60jb8jPKsM4oLCNS7fMOgoARSquMoM4604MGySePamqSFLN6dDSf6tHbOR0xQBKx2shC5\nRurGiOTcxbHTgA02NtkYbqvdaWTd1Hyg+lOwCeYS2GXbmgnzAM4Qg4Bp0nPPUDvScbSWGVz1\noGIFZd24Y/H9acjFir5zg4GRQCq5VenpUsjPNH935F6e1Ahkn8TMwPrSnDDKgfMOppqx7FRw\nNxz931psn7xmYZB/u+lAhSBGoV/TOBShipXcMxkdKWRd2CeuKY2Jk54I6UaiJY9vzNu28cCj\niNQWbcSM1Gp4OF5xzT8kqpBG7pjFIBVYZGRkdjTtzAc8t6e1JIxkwc4Xpigf6zH3mximih23\ncw2A4xyTTmOUDAFTnGP603eSpUfwikiZlZQ3APX6UxEqKGBcvtboB60m7zl2udiZ5NNXYzED\n5iDx/hTmDLGDn5s54FBI5ssv3MKv60u5GZV6D0pqmRm4UKeuQe1G5f7uZM/e9qoCaONZJpWU\nFTjrSwqsh3El9q/dFJApkLHOFzz61o5X5XSPaqjHApCKao3khmXcD+YFKvlmYDBC4xzQijMm\nx2IPzHNKxxjJ4PWkAvllMjO6M8fSmtujI5xt/h9R60q/OxwSo/ut3pySO3BALD+VAakEan95\nkDJGQKjVTuAPzcct6VYb5/mHHHQ1XnY/xcnGOKCiCaORTtQKRnqx5qtNhQZCOlWG4IUtnj8q\nh2osLjaXb9BQMj3K6/cJbHQ0zhlwAVI/hFO8xuDx0xmo2lCp8qnfnBamUSKDIuOAFOCQetRu\nwkyFJxTDhVA3Z57U4LtPGMZpE9SWPcFIHXv61Iqu6IQmxc9TVdW2zNtbL44qSK6ww3HehGPx\nqSiVlCy5xuPSmGMo77fSppJFZcZKv2NUlkfcxD81LEWIzuYsxwcdKbuMkbHoq85pqh9rHG4/\nlTfmkHy8AckdKfQB0oLRrgZB53elN3FVILfMOmetMkbbFvX1HAqTqzPtBI6k1JVhittRnIO8\n9P8AGpsD7wb5tuc+tR7gx+cfJihiWh6EHoPpTAa0hlQtyg70vCruLZHrTlUMxQ5HyfhTEw0P\nllRj3NMRK25pPvZAFR8hfkUAZ6k0bVKnKlSozxwKViWWNgFIzn2piuRzMIXOASe4oZg2BtK0\n5/Mb5sd+tDbvO2ldzYzkdKAHIsoyNwz1yKjydw3nJY447e9ORmkY7RtHTPpSeWZJMkAKON3r\nQMeVTcQOSo7d6YuxvvHBPTnpTiqx7fuqc+v6Uj7HX5vlJbpigBVz5mwA4A5bPFO3eSoIX7x+\n760iBo43LH2A9qamVUAnLHsewpBsBVdpI4XPNPjQMvXcO3tSSqeFI/d9gPWnNyVXZtXvQgGq\ngXBVt4Xt70MvlKFA3CQ8ge9MjZkV36N059KnhYiNGXDZ6GkCGKpSR1xlRx9KcIwuAFLBuuTS\n/NHn+81OWM8hmO0jH0o6jIol8tsMM9yaczA8p948D8akCqkarj7owAe9PWMkZKgH27UFFaWF\n2hwwDAdzRGrkJxuYcZNWvs7Pw/yhecU9YdjA5ymOOKCGVJF3MGCDcec9qm2jll6Y5NSxnZld\nuB71N8rR7VwDQUVZreSMKQmVYYz3qzDHGsIJBWRfl396MiTbvb5h2FKrBYCQN2expDt1EWFf\nmIHLHrRtCqqqMEjBHoaWSU+WhK/IODjtT9wwMnjGACaAIXVcEN1H5mlChHckEADio9/zFicd\ntp7e9T/Y5sr8mc85OeaaBCIoVQQOvJqSPEkm5m2kdKT7HczHiMqwP3RzSR2M87nZExI4PFUM\nrSKPmw24uc1MZAqxADeCP19K0LXwzcyQjzIZT83ykcAn61fh8E3jLhkf5efm69akLM52SQnf\nlcDpgdqbLcMzAsApPyj1rsF+G2oXMcUpt5I4yThuxwcc1Yg+Fd65EjxMyqeGXmgh2OJWUfJs\nAJxkhu571CsvnEmROjdK9Pj+Dt9MobZtVhwWHJq0vwYvYYhuZJNo4Cqf1ovYrRnlBIySwxzk\nKKWEtM33Sq9q9VX4Q3sakCD52/iA+X86jk+EN/wGXOP4YhnNK5NjzFWZdyn5QOdhHWnO5dcv\n/EOo7V6O3wovntwHtmik3fccfMRWfP8AC+8jkMaxMOdxDKentTDU4RmMciujY4wT2ohIjJP3\nf4iP71dr/wAK1vl3EWUrI3VmGFx9aryeA2jSTIaJlwpDdT9OKBHJyKA4ydoI3CnLcJuHYngi\nt6bwfJCod43OOi1Tbw3c+YHjgJI/hPp9aB6lFWGxlZehzn1oW4EiYjO1c8hqbd288bqPKaMH\ngE+tVIoZz5qtwF9ByT9KQFyPlWG3BJ+9T9pU4wq+tUoJW/1LEq+M9Oaltx5quAc7Rk85FIXU\nn8tVJAkJJPQdKkVEjjcuGXsMetUwx3IQGbFTLMI2fccMR0Y0FAyQq2CrBiMbvQ0xrd22gnIU\n+tWP3L2+Ff8AeKMgdqb/AMfDDZ1Uc57mmMikjbcAmAem0fzpDGzfKyfL/fHenFY+TIrI4OKk\nKqFZi7ZI+UetMCiyp5brjAY4A9KftOVlOAR8uatNGfLOMNjgnHSkMW1VxT6ElMxL57eYmS1R\ntGGaUMQDj5PY1daF5MqoBPX6VXaM723LtCryfenohlVd0cYDNkHg8dTT1wJAT6cU6JGZVkf5\nI84HvUnl78kNuA6HFK5JWDZkL/dPbjFO2ltx+7nk0rRmNiD93B3HrTFTzAvlsdvYGmFgC4XL\nR8Mc7SaULvVsA+mKF+aNyeWU1Hl5I8ocDuaWox/yjaCvyjpg05lHzENuI5wO9R8btqHbnru6\nGnlUjjRhlZDwR1oJQ+OIzGN1TIB5IFTSb1UgkBexXr+VV45Nr5UkHONuaVmbzOMfNyRQPqSq\n7hVwcY796kZY0kG4HcepB61BCz7umV64Penxyo0jM4OeuOwNMLE8yJKVZhyBw4PH5VGzHLYG\nRjkkUsc5aELKdzfTtUgfMy7V7dO2KZAlv5UQG2M56ls96lRnX5woG05OahwTIEHBJ4PUfSmp\nJI+SCdpHcfpTJNFboSIcqQX6H0FXYbt18sBm2DjPrWVbyAcOc7e4/lViO4BARAwYHg0yWjrt\nPvGjiJXcJcZLe1dpoerLJEjl9zONuAeM15bHespILszAYJ6VvaNqrR+WrIVhHOa3hIwlG57h\n4T14wyfvJNoB+6K9Qt7wXEY8sZUgYY183abrRjJZPkG4Hae49a9K8H+KCAVy7dxk11qSkjla\ncWejXcEkMJ6MepNZ+4Px3x1rT0q6S/t8E5c9c1Hf2w5KjAXpjvWcom0JamarycKxxkU6HO7a\n3FLGSy8/wmkbC9eCTWD0OoVm+Yk9u9RtubLN9aXaz/LnODyaVt3mBTjGOKYivIfOUnJX1qHc\nqldwOegqXaDnc22oGbdKFQZxzmmZiMfmGemetO2q25XG7/ZFRsTJkMdik1IqpGODuIH3jViH\nrhlyVBbpimrj5RuCKDk01vnYbARxyDTljKrgkA9qYEnBO7AbnIIokwzBlOc1GxKtwPm6GnKx\nDDC7PWgklZdu0Ag7qesZXo/zVGNzMARxntS5xIxx+FWIkjVmLAnjGeKTque3SmLnHLYz0FHL\nYUnYD37VRDJGUhMHP4UqKJMxZwQM8GnDEeFzv/Gl3BuVTHPXFUhCRY3Hk4AqTywcHOXPNIq5\nlzgbTQuIWZj97txVjJlVnXLNhs4xT9vluSPm4qGF3YhmGAe9Pb5SFP3c9qYE6rGWxjBApq4L\n4GSM/nS7cMX/AIe4ohm+ViBwDmkIemOVVTuokhZVC8oOppPMZo8xcMTnmn4kdcluOv40xitu\njVM4ZGHzDvTAytNj7o/vf0pW+6AzgGmsp5UdQM4piLH7v1NFUPLl/vNRTA/SoLtzxSMT5fPJ\nqTJ/CozkscCvjD6MF+bHHNP3EcEU1GPTHNKzEtjFMQMo2kimqobA70qttwD1pWwvOKAFkQBc\nZpv+r78UIxbkjNO52YxSAjDDPFDEEDPWlXH1pdoOM/hTGRuu1c+lQtlefWrbRF1JxxUDQ7uB\nQDEVulSqrKcsMnrQYRgYNTBtmAeRQIaw24IHNJld2Dx9Kc3UZ6U0rzk0AJIew4FIvzLgnmnN\n83GKOOO1Ahy/c2cjFJsO0570oYs3Apd3zUxajF+XrxTmxwCKUsCckUmCeaQxrLtIOKXnrjg0\nFju55oZyFJ6+1CEG0lcdKTIXpyabuZk3E0bMKrMcUMLip+85zin7j3pu0Yxnmk3bgF6Uh6jg\nvzcnimso34wcUSN0A604k7hkYFAtQxnuOKT3NKV3MT29qGwq4BzTARelL/D60YNN3cYpsY5s\nFcdDQ3y4GKbn589qRv3fU5BpAC4ZjmpljGOtMwBjpSqw6niqER6sB/ZkijgsCpP4V8c/FfT9\n+rXShtqrISD0/CvtELvhdWHy45r5L+NOn/ZdanQ/KJMsP8+lSEdz5x1hW8uRhnHSuWvV8mPI\nORXa61CeSp/Dsa5a/hVkwRjHOBVIRzd0Qy7ivXrWFfRlOcEHrnrW/qC+WCScD0rGuiFjywzu\n4xWoGRcSCYDIPPI3VmTTfKQqcjgHNa9xHuYgDbtHWqMyr5JYc461SMijLIZYxkcd196rSlmA\nYt81W7hUbdtOCvB9CapTM27aUGT6UxEXluwfYqsSO5xVLBMfzcuOvvVxj5LSIWIGN2T2qM4f\nLAEnGelWIgbIZQeIyOgqDydzNuO3BBAqzuO35hxj8aiuI872U5lXjDdMUCI2V1LHdnPSm+T5\nXzquUYYI6496XbuYEtzjJxzRHIyliMrEw/ioGVWB+bI+8cf/AF6hRgnmM3POAB/OrEjiTKcD\njmqq48vaWIYHj0poYD5DuIGT1pGVo/mQbqVt+McAN1XvTpIRCqlc5zk5q9RkLsGXBXk8n1p3\ny7QHztb0pTF99icgjqO1QCN9qA5YUhEq/LFnk4OABxxSR7GYtgp6Uvlq0ihm2DPGKc8ZEm4u\nCv8AdoDUrs3zfMdxz3700qy5dQQTxg1YYgqflAGeuOlRssqqCjbucnPegOg3efLCkZ280m0N\nsDDOeBSKwySflYnNSMD5YfcoXoMdc0CGmMKFVAATxUbL5DYLNj/ZGaeqEqQx27Ryaacbflc4\nbvRYLCgjIJ+72qORemFyc1JtEMhLsWXGBSNuA56Y7dqY7DVTMZO0buy55NIFBAXPbOP6UrRx\n4TGWY9RRIFOG5BXoaBDJJRwfur6d/pSccjuf84okXcM5689O9Md1WQk5HH60hCOrgDkRjOMV\nKyDcBkD296jXG4ll5I4JpVzwFGR3pjHNJt+UgE9Coprfu5DuAKn0pAw3YC/MeppfL3Jt3DI6\nZpgRMvlSbmB2dqP43xxg5FOOAu2Qk0jhfnB69KBDSNzEbsLnqeaJsNJkDBUdO9OWTapO3Ham\nn7wIOOKBjd+5lYA5pVbawLHIzwKRifMUoPkAxRu7n8B3oAm3BkZWGe+VqDaI9gQZRevqKdu2\nttK7SaQbsbeN+efpS6gQybo90jDcM8H1p5bjgbVBpzyfKwx8vvTT937uFPU+9MBol+ZhtyPW\npOWAXpjpTYyyqQQAO1C5VlZmyPSkxAy7SWI/KlkJ3Kyg4xzTcNhgCSTkgUp5UAHIxTGJuaOS\nUou8E/dPrSJxICwAAXJGeKeT3Jx68VEqhmVWPyEZ20ALw0e7GOelEm1XAToR1pflVQykBQcE\nUmFO7+72oEK3+sYYyAuaY3zQ/L0xkn0qQsRGNmOmDmm7wsalR8+cdO1AAshbbtXaCMClb7xR\nucYOaOPJBUfxUBRuLd6BiHdIwKnC57UpyFO/Oc0FlVVbd944/GlkDG4G/wCUDGQKYg27igBz\nj2pOFk5XJPXFK0m99y5Cg5NMZ9rEjueKQA0nPyjIpoZ9xcfQrT2YfcHy/wC0O1CLtuFIYMOh\n96Bjfur075zQrYJO3OOaPkxuzjn7tKzDy8EZWkAbTtUkAAn16UwKXUlhtUn5T6infKyYXhQO\nBScso3E4NGohysvzKOTjFMU7+GG0ZyaedpwFOB05qPbhDz3ximAv8JC8t254xSlF8zYwChaY\nzYxhODx9KeV2lgDy3JzSGNkjTaCgBXPSnDYrYXPrmmYClTjrxxT+uT2WmAxdqsdp2q3XNKrB\nfnGGHSlZg3G3G7+GmLncQRgdMUAS8buMDNNbd5mM/MPWmrlwFI6H9KCVYEkkDPFIQh4ye/rS\nltrIwG5KexXzCpGB0oeP5TtOF6AUDI5SWYsMDvz6U1cs33+DTmA2KJBhl4xSFP4SCO/FMY7y\n2HKnLDrQzDcCmBjtQZlUEDIPQUxe7DqOvFLqSPUeZIR1OKjP3woYqAccdzQAq8jPPOPapSFX\naEAIPO2gCNQVZizb3JxtFOJ+YNt24ODmlwd5yuPSm5G07uRTAbjcj/PlQcgUrN90ctx1pxA5\nJHboOlR7vL+Uj5cdafQWoNsVgefr6Uocrxjgng+tNiLN0HHfNIpKfeXcM9qRRIo2SAM3Hp6G\nm8NgH7wbkjvQoGCSQd3T2peOmcN39KQDnLJkhOnIpp+ba4G4nmnEbfckYpqsdvy8Kvy0wYr4\n2fMeCaYHEe5X5HYClZd6Y64/OhSTyRz0oAbG2yMNnBPGD2o2/IDnDZ+9S4LAoRnvmkyWVwU+\nUYoAcq+SuB8xPP1pBl8MV785pqlyuQp+VuKdu37nLYYHO2qRI0tjDAEuTnBpzBlwFBXcMmhv\nm2kn/dFBZ2GSflUY+lIYBgy4X5SPWmf8swffBo2nbvAyO9Kq8d8HoKYDZJP3gAGQv8VG1mcK\n+H56U4odhUHHcijhmyoIwKQxGz5uf4AcHFOcBWYjOO3tTPmGWUjYetLuA25bd7UhC/6sD+Pj\ntTcdXI5PQUsYMe4A4LUm7dtyuMHmgYrf6zfnHy8KTSIA2HJKv7Glb98flXK579aczBd+VwFO\nBQRuNBDFtw4x09aDu2ocbVHFLyygjihVdQd4/CmAjfvGYsd23pihcc8ZPb2oXHKqME80bQFJ\nz9aQBg7csNwNEnzOD3xgik3naAR8nrS7lbcQMehpgDfMgVSd2cn6Uu47gN2T6e1Jy0ahfvd2\nFKz7XQt1A4NMBq7mXA4Y8CiRTuXaMleOPWhgyxjjLZ4NDYVgAcDu1ACfMJfnztbrxQykxoOh\nBzup00jpgdRjOKiVSyqc8jtUljpNzbkGHU9fSnAiTbEMblFMVUVSzHbk9KcqbZAV64xmncQ1\nULZJbBpdqquQdxPWgxlcrvyxPahsdMfWkAsjb1A/GkQB5CpYqSMihWzuAHPUZ7CkXC5DMAzc\ng0EsFUKwJ7H8KVVLg5PDN8ppWYIqqRgH1o3Nt5XaBzigL2BVO7a3zHpn1PrSbFwxx8o60rt5\naLzkkZob7vJ5PUUx3GbW3Aqdyn8qedzEbpF/3RRxsG0Ywe1NC7skgA5wDigYLGGJHel8sLjI\n3noOaJMqOue24UkkflsmDgHoM80AHGQT8uOgpOZN5xjH8NO2jEm76Uka/PuzkhaCRN2xtxOR\nim8SRkryTRGv7kFjuyTgU6NsMqqMetMAXDxgjG4DGO9Jt2yDaTkjDGl+aR2wMYPBpOWyB8ue\n9LUoXygzAluM8UgV9pJO1t3H0oHQkts29fWkVi3A4z0NBLFfsQcj0px3bQSM5OOaHQrxjLD+\nGkDbo9xXp1FA0DKWfJTEa+hoXMgKkYA+akiXb905yCeaBtH70DLdMGmDEUL5g3H5B81KWEi5\nJxuP6U1XXa6sM9xTs45bpx92mIXd8pAOR3o8wMvTDN/F7Usa7X3A5Xs1NUj5iwznuP5VOwDI\nx/F8x7H3p/3gwB2e1IV8tTvOwY6daeoLRbgvHX3xVAN+ddrL90cYoYjccDHuOlOchANuSrYI\noYtuwoHl9cd6AGMN23jaMUDJYcAcUqs/ln5QxY4BoG7r1HQigTD+L5Ru7YoVtmTjcvTmlGNh\nx0zzTQpO51yx9TSGP+9naMjtmmceZhhyPSiUjyw4OSDSuGaHdz5mclO+KZIyRd67yMAHmnKU\n3HZllx+VAj/iYZyPu00rn7p+T1FDC47cd3AwAMk05ZvLhJb51PNM4b7vX1NP37lbgc9qTAAC\ny56JjjBpA5VdvejeGhYD5j1GKbgKuW6UDuKxPyjbh/b0pR8vAXkdKVccMD8x4oVmV239QODQ\nMCeAxwB3FH3lGV78UqKfL6B93Wo1bbgDpnFABnDMzAlfansvAU8jqMUNIcEAYoUcA5xjgk0w\nDaiyBiDwOOaQS7m+bp16Ujr1JkyM8U5m+UrjGeWPtSGIVxjLZ4+8RS5Zo+V3DPJx1pN6YLBi\nQOBxTsngOTgjOO1AhMDoePxzR+7aRVxke1Ckd1+Xt70m4Ix7Beo9famAL5e1G+6AeDQqbskE\nkg5Le1C7Fjxg5PQelBXdx3FAAx29BkZyKWTBKjOzPXHakdj5eVA+X1oYqyjcPn7H3oEMbKfx\nfKv8XrRuYKhI+g704xnzNynB7j09aSSRmZMfd/vD1pDD5lYgkqfQ0jY28j5ienanKrjczncW\n6UxQzNgpj1pAOVjtVd3zAYxSquDtDZHbPahSGclf4R0xShlbJVc+5oYDSSByMnvSRk89xims\noaQZz+FPWPO5FO0ZyTmmgY7cqwphQdw6elN8v5cqSD9aQ7Sw52gDAFG3BDKcgdc0xCg71wVA\n9DS8j5XXJx1zQ3yuoPKmkJzuDLvGeGWgBDmNgVXY1ObLcsMN04o3MUDAd+SaYi43byRnJBoA\nef3bDPz59qRNykttGR0pQo2Kc7R1poJVt4UsT/DUjHAmQks3bOAOKRgWZV3YUjPy0SDzWwPl\nOOfSk5TG7JUccUai6jwqeZxkr3zUcbKVxjJzTzuQBgMoTxRwWGQFNMAkbZg4DClVQzAp8qHq\nDTNu7OF49aH2eWCVIUGmMkX5ZWwuExjB6mkZv3YJbGOMUNtHBJD9RSqQ3ReSOlJiBmK8LjHa\nk3GNS4T5s4NI+RtyfmHRRSSbvvA7xnLL3FIY/aOWL7d3FJkhsEgjoM0it1CplW5C9vrR1IYd\nBwNw61QCtIFOcZA9RTnb5+oXPPFIqjBP8DDkU5owIxGvznGQw7UCDlGJJyAM0NiNQD91sEVG\nvmcsVLKOKlfcW2kY/wAKAFZiGO4blxSfxMMD5hmh3L8DnPFIuVfB6r1oBjhnbgDPbae1L975\nc8Y70vBbzQpPY03YShOMLnigZJ8sYCj5/XHSo2XdCoY47rQsREeVxwcmhdzMB95aACRfuAHz\nJG644qVSWUAHae9RgLu3FdwB6GnxxncTnjGNtAANqt5o5IOAKXzGhfrgtTOIkyx+bPAFPMbB\nWj+++3IagVgRSWLHoD3qPK7SCCecinMzCRQx+73FIzFmwfmye1BI7em0sqlRj7vvSrjywc/M\nOeKTlVIz7UqK27aBwBmqAVdpw5HynrRHsM/zKcDuKFby96MMgnijJPOBmgoFbqyggZ6VN/rG\nweeM0yEdVI27uOKecrJ6BeOO9IQhUMAYkywPQfzppc7jlQH9AevvUm7bnCnJ4/Co0VmYlThe\nnvTESbSqq+cfzoXDq2TtHUUowyn+905p5jGBxuHT6UEjhH+7bDZwc1I1xuUJkoMdqYeyovOe\nR61IypJw3ysvJWmA9V/cqF5XOee9SRyKuF25Umo2UFQEJTvn2pyMT90AEc+tAg8kBmJOOaQL\n5eWXkf3qmUq0O49GOA3vT0byudm4MPxpFalWbtzh8ZxVXc3lbc7mY8irjnDFT901EdkYwBwa\nBlOdTu2joPSoJBu3YGBj1qy8ZjRsfLnnFU23qpOOT3zQMr7j5YDfKB93NN3bSVx1FLMw3fMc\nrjmomYKBGMg9aBj/AC+e2P60jKpdQowB96o3B2j5sknJFSRrlfkOD3BpAPCIylCSO/TrTcFR\nleV7U15FZSCWJHanyBfkZPucce9IYqyHqT14Jpm8eYQv0yelEg65GV68dqARt3MpK9h/WkIc\n7FlZi+AvOBTllBdWxk9vcVDGu2BwGzk5J9qeq7kHIbnPFIqwu7zFY5A5+6KVmGxcnlmztPf2\npGj2fMBx/dFEZC4DgF3P5CkANIscjLnr/DQrMFQtnc1O8vdMuRgA4Bob/WBVYEqe9UAz+HLH\nGeTzSncy4wD/ADo4ZWGAWB6U7y/Mx6460xDP9WxJ+8o9eKaseMDPLHJOelPaFm2rjIzg0FPL\nkdSNw65oELu2nljjOMUHPl7P9qmpITuBOSTgU/afmLHCD7x96QCLmLK7cKTxStG33F4Gc5pA\npCq2W2t0B5pP9VlRkiQbt3amMUqq5LgMuaeyoHDDrjvzUasOGX6YNSKqhT/AnvQIHZmKISCG\nOaX7zNj5RnAzSIyiMuVwBx6mmriUBXbjOdtIocz/ADHIpzKJArn5T04ot496uobjPFSK3y5x\nyvFIRHJhY3AXejYGRSndCFIUHjHWnbtuTjj1qRv3mzC8L1NIYixh+GbD9RTvIK4P+s5wacVX\nbuwTk4FJ5KlyqSHPegCJom84jYz+/arEcbxlecD170fPzGh+6MljTl27G3t8ue1A7iqgySrN\nt/usOTTAzNEWDbUzjb3pkv8ACyN0PAJ5NPZZJjtUZZvRcAU7kiTA/KvPTNIHEMfyklycYqxY\n6PqGoAoYnCxnBkUV2eh/CvUNUhE0kZ8sjKrtwWx60ho4aOQuoK8nJHTn605YZdijynf0wp59\nBXtui/B23uNrTx7ZMYY8ce1dXa/DPR9KZXmnWRVHEfSgXMfPll4V1O6jDwIST1iCn5fatSw+\nG+oXU+Gik24yRjgGvoRm0axH7m2DED7wqCbxIlou23iXcfamoyJdRLQ8t0z4N3lzGhli2Ln7\n3UV1EPwZhtlRp51Vs/dDdBXSSeJp/K2FtrdflqhdanJIg3SsTnPWmqcrk+06i2nw10W3keOS\nUvGOflPP0zV1fDXh3T5gYYFO3/npzWat1Iq5Viwx0zzTJ5m5BOSec1rydGS6jOlS80O3yVs4\n2HTG0VP/AG5pnkeXFawnHP3BmuRLDzVwu8evpScrnau0FuDR7NIydSTOj/4SJCp8qFVGemOK\nrTeINoby0w3XgYrH/ebWQYDUbSduB2waOVBzSN6PxNNNCCpKsvSpI/FEyZO76j1rmo23BgDg\nKeKkXOckZDc59KnlS0KU2tzfTxMzqwBbzeox0psfia4YkvgHHVQAa5/dtDsOOxqcSY3KPmXH\nFPlQe0Z0EPiiRRtZcg/xEZP50/8At6NziTGPXHNc0zGTq2FA6U7exUHAyKOUOc6b/hIklHle\nQWT17Yp9tf6WquXtl3AYBYZrmlfLDL4BHalkZWfGDjHNHL3H7Sx0jQaFdRENboS3BMfFU28E\n6JdW/wAjNEQ3VjmsPzmXchXy+cjHpTkupFUZfeM8e1L2b6F+1J9Q+EtjcNH9nl8yVhnIHHFc\n7rHwbkzvhXD9Ca34dWu7cECXGOc+1acPjS5t2VQoYYz83ep9nJD9onueQ6h8KJbUMrQvtXkn\npk/Wuam8Gy2qyFYZFPoBivpq18Y2l6oS4hWRu4ZRgVauLPw7rPyTQxrMw4ZePzrPllcuLT2P\nkNtHmjZlX5uOOec+lU2tp4cyNBhgMfvK+rbn4WpfRyfZxEyMCAwQA5rjtT+FNxG/kPAxKrje\nBwaZpY+etwjXLuVbrlP5VOrE/KwKZANeg6x4DuIFcNatAinbvIzzXPXHhW5TGQBgcqKYGC0g\n3HzDnBzgVJ9ojIJBxk8dyKS8sZbXK/6xivO0fpVNozHscKVdRgq1FhXLbbtzEElD1+vrThbu\nsYJl38/lUC7ywGfm65zU0cw7dT1pDJWx846BRkmovLxHnsTxUm4Ss8gGUxj8ai3FmBOcf3RQ\nAk0PmFc8ntgUMoYMoTaR6HvUgK+W2G+ft60gOX2HG7bQIgk3k7W547VA9sEUPvwfUVcUj7Ox\nBCtnB3daZ5O4AdV9qaAqldqjng96gWMrkBTtznI6VdkRJoiUzhTgZ9aYyny9m7aO496oRVWT\ncqYHfnNLuMMiZb5lba1PnjQMqZyQc7R3NI2EhYuMkHr70CJr2WBoNkS/Mr8mq/lrKymM5QcZ\n70xm3bUMewHkml3KzId3yg9uKBEqk7AQ+UHWmzzR+ZuCNsIyV7/WiIr5EqoQgBpNjNCDuDle\nq+tIZPHP5jAjCkDG0intIVwA3B6tVdtskQLHaPQU6KTzEZgMhTgtQKxYjkwmEThTuD570M3m\nRyYznOc1DHMOTywPG0VIWk2uQQqdx1xTJHecP4QRz1o3OJiQ/wAvfFNDhpFf7qn+H196cVZF\nLsAByBVIRbt5t8Ua7+T/AMtBz+daVveEbFD9eGb1rDiWOOFVjB3qMls1ZSQlUdF3diOh+tUm\nT6nZ2+oYeOQN8inlR6V2/h3WRHHvMjBCfkNeS2rglm3b/YGuh0PXDHMFTJXGNjdBXQmcs4n0\nP4X8QSR3wUyKCBnCtniu8jmSbcd2/f8An+VfO2k6ukaiQNxnO5fX0r1Hw34k3SJNMxKBcls9\nK6YrmRz6x1Ovuo9sjADK+tRSfu1AI796uQ3S3sbAcEjhu1U+Y8rIdx9awlFo6Y1OYiXI3rjN\nQtMAPdafJxLkE4pJANvIx6VFjS5VmJkk3L1xkAUilv8AWkdRg1I2yFsZ+ZqiXETFW5NUkSNV\ngW6YI9elIr4LMep4AoaB3JAKleuaRvvjqVIxVCJWYcY+Y45NIrA53HHoT2NIMKy4zt9KlbIT\ndjJHSmOwgceYo+96+lOZfMJZj1OBTGY8EjaSMlqdIPMKqp7ZzTsIk+WNunI9KVFZU3ZyWPem\n/ciJJz2zSugk2uD+FUQO8s8E4yKR9p+9yMZpqMHJ289qmkUKuT6Y4oRLGKV+Vhn15qyJtrdc\n7ugqureZHj0NSbePSrRBKjfMcDJ/lTlV1yTgioEypBzlc4FSybtwQj6kVaK6Em5hIFycU0MG\ny3OM/jStvZQF52nrUiszL8oUeuaYCxt5mSDwetSQ7fM29O9QqSitnAyOKfC2cMRjjGKBMlVt\n8jEjavalXb/z049DTYlLKSfl5pMj+7THqSMoOMLuk7UjSHqOW9TTVwzDJw3pT3+6CzfMP4aQ\ntRm5qKTzUophc/SUt6UZPsDS7eh6UAq3K9fevjj6MbnHXg01mLMOflqXZ0J5pNuOhoEMj+Zi\nTTmU7eopxXd3+tNJ6AigBw4AJG2lchuOoNDtu6jJppXGMDmmAeXt4Az9KTaRmn52tjNRht2e\ncmnYZJCxxg803aobNNbIYYHagBs/NRYQ44oY7uRzTTjODShhjAIoAUtvUYHFP2jjNJtGP8KQ\nKN3JxQMX37UHDg4HNNbPTtTgNuMUyRFPzA5xSqMDPWn/AC46UbeB6UgG/wAOMUbh0p4GKZjg\n8UhkX8qPv98Y709lLYoXHI6DFBPUZtxwelO+XqeRQy7QOMUxiAOKRQ7IPTg05hhQMZ96rrJt\n6ino5bmmAbTvH9aH3L15FS7d4+YUjJ8mAaBdCNG96cCKPL28DrRtIXmkAvmetI33qXcOBSn7\n3TNMCPjaB1Gae67sY6e9G0dMU/b3IoGR8U9WGMYo2gAnvT9o46U+pJJCdy7CMg185ftAaXGL\nxpQx+bpx/CO3519IWrDcAeteGftBWbbVkHQkucDgL0xQB8ha8uwtGSDjjFcpd4jX5up7eldp\n4is9zSfXnn0ri9Tj3Q/KCecUIDmtXthvEhfcOm0CsO4YR9RnH94Vv3xZpAOuDxWVfIJhIMYC\n87h39q1uIxZAJo3bP0FZs6tHkAY3DHHetEw/u9wGATzVW4Ji5J3ZOOnSqMjPjzJGQ64Tkc+t\nUbyIw7GTdzwMVdkjMspUbgo5NVroFYzj5h2GapIllFm3hvNOWXtTZpCFZs4yccUHe+4MMIaa\nvluSh2q/UZ6CrKGXGQq4BHOMmoX2r82CZW468GrHl/aJGY8eXx14NVZFaFQZOnYCqEEkZjYM\nowejLULdwUZl65zU5yF3n5h6HrTHdQQoJOeMCgRVAdVYsVJboKR445Fx0xjPpUrfKrjHfB9R\nUTRrFgDnOMjNIoiWMnIGSMU2NyH24aQEfd/rUskfRFfHOeKEwR8py2elUgI2jVTtQnbjk+9I\nqsHyxG3HUVJHj5+57imsw27RyAM57UnuIgVtrbsZANLlFI3ZJPSnLHwdwHPIFRqwj5MbY96B\nj+GlBB6DpTI5GZXA4zR5n8S/KTR9DTEQrhVIIBPc96ckflxj5txznbS7RuYYy+KQYXDHqODi\nqAe7bmZs/u2XGD1qKNhHtQjp92g4RSxO4E4FEjq7DPyqvY96AFdQXVpBnIzil3FlYZox5m3P\nPHAFN+baxYfIp60DF5ZQehUdaj5yMfMrU7aucp1IyfamqwbHzc9AR60iWMVVXLNlQvGM00f6\nwgDIxmptrFSAobHXPemlfMccFMDmkBCuGLFskY4pYx5asxO1RwKSRCF+U7txxSeWWJHQqf1o\nEPWT5SWHak3KNpztJp5V9pDjk96Fw644yKoZHk8qw5z1poDLLliGXPNS4CtvJ7c+1J5Y5cnO\nR2oCxGMyTFm+6O1ImVY8cN09qbu2x4X7zHvT2+UAtyfagBuF2lSThTzTSuJPMfDHtipGaRvm\nUZz/AA1EymMBmB55IFAXHMD5gJ5zzupG+8H755pVwFLEtg8iPvTQygEr83HJoEGewHU55prq\nrTZyT/s/1qRcTc5IOOlDAAYUYJGM96AIeQ25s+lK+VG8pgdMVImfL8srwORmjjDFvu+ppAMZ\ngxVecenrTlY8kcY7U2Ntyqg69d3elDGUnbyBTAUNuYHqO4ppXCsxPyrwDRGw2453FsAUu1cF\nQuQD81AEbfu8BiDnrSt9wIDt7052Eh5XYO2aQxlsjOV9aBjd7lSxH7voVpQu1gAeOookwCdm\nSMYPtTFBVEAO5RzuoHoSNufqMDrTWKttCjGTQMMrj+I9BSsrlI+hZeuKBA3ltlQOVGc+lNjd\nVZwp5xnJoVx5jYTg8H3p7JH5TD7hJ4piGLIdg+YAH170qr8wON4HNRbGX5Scgd6fueSMKTsH\ndhSAI921nPBYnj0FI2GjQ553du9PZfmKpxxnNNX95tOMNQOwpU7ScYweBSfNGxDdCOPrTtvz\nnfzxTlxnbkN7+hoEQnbHHnP3untUm1pMIGAPUZpsK+ZHIJAAATzSGMqDzuwcbqAHnPVgMEYp\nq52qAuW70MVYhVHPWlY7dpAPXBxQMYzqv3jkd/akO4xkk5xyKduEbYxktyM0Lv8APJODkcrQ\nIXPzfJ164peJFOeWH8NKrOW+Vc5NNGPvA4b0pDAqZiMHaVHpTGdm4P3gfvVI0hZ9+duB2prE\nYOSDg0x2EV3EgC8inNIY23jvxUYyoRiCFJp0jeWTJjrxigBAuMseefu+lDE7wG4Gc0KCqkJy\nD39KGy+Fbhh3oCwMQx3Y3N3pqgtlvvKeMntT2Z9wEeAD1z1pm4KpibIOcigQjc7eeQeKfIwa\nN88H2pjNtjHfnHvSqwePbtwc80gCQgIgHTvS+YFhGOueKRo/mK5BB7im7EVyd25cYxRqAFhn\ncTmnnlQcbsdqZwFUBSxJwM1MvyttUFj0JpiI1kEal8ZHcU3eqtyNysOKcVZY9oUfe5pituY7\nR8o4oKBsqAcYHTFOZfmCoMg9aCCG5+ZWGDjtT/8AU7V/i6A0CI3jG8DGAOSBTZGDZJGD2pfm\naQkdutCllyzDcx7dhQIOAvzHn2pflYL8vy9eKTJXLYxShd0ibWyO5JoAPuMW6elRxnbH8xzz\nTpRJtX+IE8UHA+QYB9KB2GK23l+R60/Y8cgJfGaT52wAABjOKPv85wehzQAENIsnzcMaRmG1\ncjcBxik27nGTgCgR4Vjv5zn60CHD5sLjg9BQu7legB5o3GTax+VaQthWyu5Sadxjm27QD8vq\naF3Rk/KD6UkiptB27uOKYqszKSPm7/SkIepbczKu4njmm7WYZPA9RTlXaxYnGOfwpPursDbg\nxyD6UANKgYEY+XPNL5gGSq5PQihj8wCj2Jpq9254OKADauWJ44707K7lOcNikkHmZ44HNL95\ngcABh1oAQfM4789qGk3yNk4yaVcDOeD0BpnlhW6fLmmIcxOz5l6+lLu3x8HOPWmqNsh54NPb\nehI2gqR1p9AGyEMS4OMDnFKYyWVQQQw3ZFJ8oQKvJ7+9NRlBIAJX+VLoJir1ORk9qc3ogG40\nkciL83I4zt9aavPz9PakUOPmR5+YdKRlO0c5HrSsSqqSudx/SjczZCD5f7p/nT1FYVXOcgYw\nOlKzKu47tsnakZdoHzcHvmmMoYk43tnmmOwK27g80OpXJA2M3FN43EBenAzSnCqBn5lOaBCs\nq7sg8gciiOQAkYJNJGwZsjqc8kU4FmZQuMk8j0oQyNW+V2PL9hTtu5fn4Pb60kgLO205296U\n5mjUt1NACqo25xhsYNN2q0O0jnPXvTsfNnOBjGaIhvOSfY0WEK2GjDMNx6f4UrAyZ3HOeKXl\nVcce1IPljO7kg9qkQxYXXK7hx3NK7FWVsc464oO3bk5pdrHq2BimHUauNxUdTyTTsloxgDji\nhWDEuVKoBimKo5JbHoKCxx3Fwo6Y5BoXJbDLz61Gw3MMcMOafg7ic4OOhpEjV38kHims3y8A\n5PWpAq/KQ5917UfMoJ+6M4phYRUPGDtx0o+8M5+fvSswXBQ5HqaPIZo9+cEckUxDFJk+UdaN\n23B+6MY3UrNll4AOdvHrRtVWO4ZGOfrTKBV3qQo46/WhW+TK/f8AfoKTnqDtyKVmAwpG7jOa\nCWB5II+T1oD9WY5B6CmmRFTdgsV/hp33dy4yc0gAZ2jnA/Wk3qrcr0o56dTRgtMQeijpTAWX\nacFfvMfyoKh/u/K3o3pTlwI2O0gtwPam7cLkcgD/ACKBjC4MZz8iD0p8eBGCxCMeg9qbtUqQ\nOC3PPQUBQygkYx096BArYZj1ToM1Ly0YPU9OKZJlFUbhzyaFjaOPfncCecGgAIbzQQcKo+7R\n5n7slRsYnrQx6KBg9ee9EjDjaMfSgYfe4B5xk0vReBjdyaadyMT14/KkHpu4PSgQ4f3v4fb+\ndNyApxzmlk/cSFDydv3aVMDG9eFPAoENhYwEtjIx9005exyQD1BpNx+bOCM0jY7HIxwtLYTF\nYFuG4DHANNbcsRJxwcf/AF6e2WUAqQpGKayKPk2lm9e1MBeI2G0bjimKxBEmcYPIpdwMYUj5\ng1G0fMDwCeM0mA9/mJK4AbpjtSszbQcLtPBNNWEKjEv8oGSO9DN8qiNctTKE8ny1G30yTTlP\nljLjO7pTWyVQN98HGPalQ+ZIQDwD37UgE2beOc5yBSq4x8w6HNIzOp4IxnG70pyqV4yGYnAo\nQCCRVO5TkZzg0rHcGychu1DqI5GjcZwcbh2pMM27HIA496YC4UhFAx2xQpCvwMbTg0KpdQwb\nBxSI4ZcMMf7Q70gB8JlQcZIOe1OZhuYdRTE+7tZsnrT12rGc/ezncaNQGqxX5QPlxkGgLuAL\njBpQNyMoIYetAVfMGWJHagBmQOc5GeacrBSSOQO9GAzSjG0YyDTEwmADuGORTCw4SbQRspY9\n4XOQU7E+tI6jg7u3Wk2gx/M42DoKAHcrJnByRkn1oTCuAv3ccU2RjHlQc+jUKCwH8OO9IQBt\n7Nk801WG7O7A9aeqsNwyDjnI601kG/eDlM4xjpQA7JwBj5SfvCm8bT1C5xtp0jASYBynqaTc\nN2Oq55NAxFZVBHKheCPelZlZQQnz0DdtKjrQcsowMt0oAcWUMePmHU0xGCZ8xW2HkGlklOUC\nnJxzgdaXmNz8289x6UANwwkBxz1ApcuGGBgMM496CDxg5C8mmy/OAckJ2piHbcrtYdeaFZmX\nGAyilbooHHpSd2Cc4GSo6UCEGEfevK9Oe1ObK4KHanekVVY/McJ/dNIysVQKox1xnrSAcYys\nx+cFcZoViobJ3HP6UFPLkcAZGKSH5t2Tgg9aBsbuPmADIXrik3hFDMeWPSnowXergbuv4UK3\n7vBCjnrQA3yyVLA8elT+Zgq2MjH3aiWRuoTB9KXceB1GaYCiRdzFl2j+7TUwq5DZJ70/70m1\nsFRyBTVcrxjcxPpUsdh+1cqSufemjdtJC554HtTl+8D0x2poZmYlemcfSmhjn+ZEC8LnoOtN\naRuW6hTgUJtjJH3WzQw+UA8c8imIQ+ZjBAI68Uu4xRhoxj6U9W8tiOW7HIpAoXOD8g5+tAmL\nuZo8nOW4p27dk5yelG4MqkDG7pioxmNSQD8x6UEkkMn7xzj+HjHrSRRllBYky9SaXyhGy/Lh\nvUGkTcrM5ORnBFACht0foW4NG4YULwOhBoXDMQeO9KxjaQK2Seo20D1Dy/L4Vsml2oxLgFQB\nggU3ays2eCBnbR5hbbkYzjPFAxqsflBJ9xino3Iwp/OmyfPJyChUYHvSrLswrgsuMUwHrl87\nTwOKc25ZEwcgjGRTFmVc4+Qeh6URY3EkkcUAPU4Zwwzk1GsnyjaPvDGDTtwT1LDmjlmU7Plx\nn8KQmCy4JwuSq0LJIF3MVWTHJpu5lfCLgE8DHFTKp8twcFu5I6VQ0R8ycnqeckVIq/6tm+63\nU+lNEbERkHIz1PenlCFIHzHP3aCRfZTk/SnRn98TnovIoXJUEHAXimriOQpt3M3U0CHqzBz6\n5wKTcqswHzEenrTlzvJPU8Aim/LGpQfKSeTQIfHLhVZ4iy/7NSRsDlc8Z3Y7iot5iXh8p2x6\n1N/cbcPmHLVQ2LtIJEZDE87qftLbSfvDg0wbo2KDbg96nWMhhu5XGOPWkJj48xkdCTTo8pJh\nVwfSkgjBwT0zyasKqsSV+9mkMhjUmORHTCE8fX1qU5jb5jgbcCkUMVZXypznFDFJIyVXLUAV\npdrKSi5wcVE3zMYwvzevpU6BZGb+ENxj3qpOpj3qpOVO0mgZDNI8zYTnHBaqsq7cENuHce9X\nHzHGF4UdvrVO4UbOHwc+nehAVmcRsW8vb61AymSPdnvwKnDMcs/IHWokY5YryhOTSuO4m2OH\noSSRz7UokCMM9v4qcpDKTwCDxml3eYuD1/vGkMFbdMWOCuMmm7hlj1HYCncllGznvSMuxtp/\nKkMQfImQck8EU3zDIu3kYOOKkb5Yxjj/AGai2mNvU46UwHqOuxcFaQYjRvlO48jFPwOjD5zT\nnLZA24OOfagBMMQCSMEUknltMuOccZo2gthhlQuRSqEV+QR6D1NIoXzFjyCcZ4xUcf7puRk9\neaTYskTbj86ncf8ACnSSecySAbRjBHtTC4/bhnbpnpT0kfdsCYwO1RIsYmbkkGpEcKoJBPPr\nzSEIsj5BY4C9vehs/NuON3NKskksnyYU4y1RkfLkfMP7vp70CFVCoJ27iD1pPL24K8NnLE+n\npUitvkLZ2pTW+9szvz/nNABt3Z28qentSFQqbTzgcCnwgKpUEMOufWlZhsUOMEnge1MBoyqh\n9y4HUUKg+8rZDetPWJQpU4VW59aRl2oUHysOaQBHtiVtvJJ7DNSssa4yBtP92kj+Uj5WHepf\nkjXHQNz70h2GRorMEQFcc7u1SR7NxJbB7fWiONnQgHA9TSrb/vIyvKbsdec0gFjj3Ngj5+pq\nZof3RAI570xd7OxXAKnJI9Ka85dmKDEf8qBiNmNQFJZRxjHemtcrs2A4IPpU3kyMqhFIZun+\nNa+j+CdR1i6CNAY48Zy6kBj7UB5mDbkTSYDHZ/EyirtvpMt0xMC5PXbjtXrmh/B9FVTdMkY4\nLbeT9K7qz0HRNGhSFLeNthBHHzfjQZNniGi/DfUdUnVZrZk7jcPyr03R/hTa2cSG9nDS4HCj\n5R+PeuoudYbzG8pVjTtgVk3V7cMx+fPPODVK4uY1bXT9F0NflhWQIM7uuTU9x4mMkSm3RUH+\n6OK51vmPXg+tMlcx4WNeWGD6VaiJysjRuNYmaQqoIz6VTkuJGk+eTB9KgZnQYzTPll4J9xVq\nKMHIlaSRl3KcHPSj5mcMq/P3JppbbwTw1PVMSBS2BVE7ijduDYXI6g0irnO8c9RSSbuSBznr\nT8sqhsZPpQgE3eWSUXDMMCo9oWM5z5g61J95nY9R0obczLu+QkdaYxF6gknNLGpk8wF8gcgV\nJuH3SPx9aZt/eAsNoB5IoEIGJXc4+YdeacOmQ2B1Pt7Ui4kkcbc45BojXJJ3Hk5K0AOPyqQB\n170oYKFBPFOba+c8jsKa0YVggXPfdQO4jKZARxtzSbdrHnPP5U9po1O1+G7AU5VDg464yaBD\nF6Mh5PUU/AaM89Rim7zuXaMeuetOUjdluU9aAGpv2qBjy17051ZvmXPpnsaSNm5KDco9u1Ok\nUoVzuwwyKLANkyIxuBBziopDEo+UkHpipd2dyE5brzTPL24ZlwaoBocfLG5OaFUtNsC7gO5q\nTiRWP5NSLH0w200yWNZdgK4xz0qRXdSoZtg6gimMmSxY52jrUW0jg/d9WqtxfDsbdrr11Cql\nZGZlP97iti28a3GQtwqtHnk4rk+UXaB1pGL5AH3QOlZunE0VRnogfQtctZElKrkZP1rEvvh3\nZXEBMAXBGV9z2rlvN8uTg7SBnitjTdevLfO6UspHQ9BWfs+xtGp3OT1j4ctZKyOvnE8kqmMH\n61wureCZYfNymc+1fQ2m+KLO8iWK4+Vm4JbpTr7wra6nA5iIkRhxt7fSueSaOqMkz5QvNCmh\nUqF2bRwSOM1lSRyqN2MADJ4r6L8SeApY4zAsfybchiOv096851rwjPZucQ8L82Mc/SkB5p52\nI8fdGeMcUecFkAwSp6PnHNdFqGhPLCWjg3ZOSpGDWHNYuuXb94i9exBpiFz9qPXLqPmXHPFI\n7LuKhu+elR248tw6nZI3XPpViJWRWLbQG+Y4pANYRfMCNy5+X1pyqy45wMYB9KaZIw/3c55z\n6UNmQDJ2gHO6mIZI2xTtX2LenvR5P7tCoBGQSanZkZScfvO4FLHGvltt5x2pjRXcFrhuAu4c\nH+lVJIY41Z2z8hwWHOPatGZV2gdC3T2pJkAU7FH+0T0z3oF1MuRW2g4IRueaXZnCY2r1+tWJ\nD5mRn9329BUSx7WKKC7gZDGgYz5TOYwmARk+lIu1WLpIORtHp+dPaM7S7DDY6+lLg+YCdskh\nXqv+FBJCxZYzuXaOh5yM0blXaryeXu6hTxTW/eT7R8qgcrSjyyp+Uc+tUBIsG3JLZXswNPVi\n2X3bT9361A22OEIz9+F9fapPm+RgQynGB6H0oETuwgXYPm4yG/u+1KbjZhmjaZTz+NCy/vCx\nHGP1pW3XDMwBxjhemKAHMWZRjC/xYH8qdDOSpYfMG/gFV/M2qF6FR29acq5dVGQWHQetVczs\nXUnJyVG1+m32q/DIMxoh8twQS3WsYKVbaBluVye1TW7NbxptIfd83mdxVqTJlG53Wl6hlSSd\nzk4K16b4c1GFbeFkJDY5GcivE9L1LaoBGCvJ9SPWuu8P68I7gIX+U8gAcV1Qmc0o2R9A6Hrx\nmHzNtcHA966aK4S6HzLhsV5JoevRySJtI9a77Qbx7gGZ/lXOB7106SRzaxdzUmTCYyBiqbTP\nyTtOBwKuysrKzH7wFZskTRqxJGeoNYOJ0810DfOwZTnimSSbV+7gjrQkhAww4xxikb5uX59q\nQ+g6MAspUFfrSyR/N+pqNt4YKM04Lt+ZnJwelPoId5iGNBtIahTtjcYO4n8qbuQoro2QOMY5\nFOJcRjHJzyfamihQx8vB5BGKdtJUFPl7UzlowF+Xmpdxyqls8dKYhyvmEkHO2nBstnqPajau\n04G0dPrShVGSBy3HtSEOhxhgBt+lK2dg25xmmxsI/kC5/wBo0pkC7QT1PGKZmOiwzHDc+mKf\ntYKcnJ9KYuVLHPftTmkKrvPJq0BIedv06U6JertncDTUy20kZb2ok3Nlk5YHmqQEuGyxx83X\nFFvl2JI+T3pOflcnG7gmpYW2xunJxVCGurSSAbcDpzU2PLT5l3Ed6ZGxZRnr709c9zk5+7QA\nqhmySeP7tM2uz4DbQOTR8pLZOxqaSd2T06e9AxfvZy49qY25seX1z3pVxtwppxcLHgDcSeaA\nZN5/+ytFQbYv+eTfnRSJP0sVuDS8FunFDKcZ7Up+XntXyB9IJjdyOlJnb2pFbAPNLg4zmmOw\ncEZFIwDc0Mo4x+NA7gUCsO5AzRnnPajiSPrRjaPWgBrJ3FN2BW3CpM9fU0xcqtPqHQNpLZBw\nKcq85J6UjZZRjmnL7UCGyKGGRTVVTGeMH1NSewAppXHuKYIWPGMH86OnGM0N2ApygnntUsBP\nLJOc8U5f0pzHaORn6UcBckUAJtPbGKa2eKk2jr2pPUCgBrelImVyOtPB6560wZyaQkDY4pGx\ng+tKV3cUnlndntTADk4zUUi7hkCrAb5cd6ibKrkUCuQiPqc8+lOx6DFSJGWXJFPX3HtQPoNJ\n+UDPFLxtwR+NKcDAC5prKUYZ6GkIUY6jmmMwDe1P4VSRTCCq+ooGgKBsUvzRkHqKcoHGBSqd\nwyaBMUc84o6DJ59KC3bpSfdUUwGD5iT71IyjHAxSceZkHin7crQxix4JHr0rzH45aaW0MSYz\nwck16goCgcc1yXxatY7zwzIGG4gflxQI+FfEsGxmB5PTNef6t5nzCNsAHHNen+KIla6miHG0\n4HvXnGrKkbOjEhs1SBnIyoPmLNis24yqY/gz271sXR8mRlUbu9ZE5EjNx8xH3atCMiSIHI3b\neelU7lI4lYq+WPB9a0Lw7VVtmWHBqg8aMX/vYq0QZjltoWI4UHBPrVebEiupHzDirEkix7Qv\nTvj1qtIwO4k89SKolooBXjyAu4gc+tV7hQ0eRHsH9avyW+NrL1IzVZlXzDubDAcLVkkLK42s\nD8p6imONzZ6rnNWPMJj6hm6YqvJM/wAsapt2nk0AQoCY3duQp4x1qGbdHsKrnJ61ZkkFu26R\ntmeoAzVeTfIysvAz09qZRGzBmZXGM1XeMqxUjkjAq60Ymb1YDNRSBmwTgt229RTAg5UYAztH\nLf0pisqxlgM7uaVbc7jmTDLzgU5drqSRyT0oEIp2qCsZGeCaRflUo3I5x7U6SV1VVJJVTnAH\nSmyDf8wHfOaYEUkLMU3P8+Onajaxb5vpil8xlmAYZU0hYmMnOCG4oGIYzu8ojkDPSoIn3OVQ\nZPQ5q7I8kjKp+VsZyPSq52jcfu543e9MlhuUvnBVgO9MjQxuWB+Zu1KxDKhYbZFPI9aPmQNI\nBvBPK55ph1I/LjYMx69Me9NV+EDoM9CxpzYkYbVZAP71JJIc/MAyeuKQMGA3AeZt56Y6ihcr\nJlRhPSlGwK0g+c445pHQqwlYZH90U7gO8wnJGFbpmoo1VWz1OetSELjcVIbHTHFMwVXBxnrU\nhuLt8xSNxDZpWVlcMfuYxj3pHDD52OY+BtqIs65jOdpORntSAVgWbjjb/COtNXncHfYcZzil\nbOAd3z5ycUq53uxGNwqkIRssy/N1HQ0nyxuVxnIpyttjIZd2KJfmXpt54YdqQ7DMCNctkt0p\niqcNnIz0qRRIW37gyr60xv72SBnpVAN2jjjcVNC5WRi3zDqPalA2uWUn0pxyudx4XmgRHudm\nz0XHFNiBCfM3GehqTcdjYGFz1pCu112jg9CaTGRfKrZOSD609n24JXd9elKy7juxwelMZd2F\nP6UybDv4t2O1Rhh0JOegxStlTtA4I4Jpzx4ZBkIG6kUFJCjMfDkE4zxTDI2Qo5HWl8wbSfvJ\nkrwOTSRfuVOVwx9aAG8szNtAHQYpY+jKDyOop33iNy5I6Ypgyu4KBvJzmkIdDgsZFAzjApB8\n3ysMDqWFNx8gIG3noKX+JlYgg8j0ouAvnRs2SuD601SFYtz7GnScsoJHoD6e1JkycbdpBwTT\nATlVBzucmho+MFSy5ziny/MwVSOO9MMqsu3JLf1oAT5ZOM49Dj9KbuKMEKlSD90Gn8SSHccs\nvoKJGMiLt2gZ/GkAn3ssf3ZzjFJ82cbcj1NDN13DcKVWO5AW+Ud6SAZwoGFP3sH1pWUMpToN\n3SndwSzEHk8U0BfLJBPXjNUMk+g4AxmmhmDKCvQ9RQ3Ch92VYcqKawPXG3dxQA6Vx869SehF\nRkhIh8vOMcU5R8wEa/Kv3mNGRsZzwAe/WgBGRflBOQRg+lEgZcjoM9KXnYwGGXOeaazFVYnh\nscfSkHQOePl/4FTjuLdcgUO53BAMcfnSNHhl3Hg9aYgZCy4z8+OntSGMqyDdnNO68g4IOM96\nFAaMeZyM52jrmgAb74Ucc9qGTyVbI3N2NNyZJNzAqQfvU7BDPtIy38JP60DBc+WN4wrcAgVE\n0aqDgEuTyp4p6kOpRW3YP5Gjadpjd924/rQII2wBG470iyOpKbcknqaGyzYDbttLuDfN/EvU\ndqBgQd/B2dqaSqqemM4oVd3Ibqe/WkZUVjt5PpSE9wclDkDnFEmJHDIOQuTTvMO0llxil2tI\nAARuxyKNQIVQ7dzDg8ipWxvHAXcORTdm5cFuB1FI0az7ZQDleB70DBowG4OOOBQyruG0ZGOa\nRsMCSdp64p64OMttyOtAhjYYAA4VecU5H+YN3PBJpojO0AfMTzt70pwWGThv7tMBVbryN2cc\n03cWbYGBZew70jYX5hye9L5g2lv4iOtA0P8Aljzv45xQzfKI+p6j1ppVWY5OTjOO1NjxuPBb\nIxQASr93HJBydtGS7dMGncKuEOBikx8yndjI6mgQ2PG4Ky++6ghFB+XnPUUYBVtgye4pVxIu\nRwMYoGNZjhFXtTZfmLN0HTdTt+0Y29O9CfdZW554FMBuQUQlWLKMHb6UH5WBA+U9KXcFyN3J\n7LTdy78xKRtHc0gY7HVm+cdd1Iu1lBBySenpSIw5POT27U8MWU4x6DNAhJFLZUDC9aUKW3AH\nAHHvSBWjdfm3UmWEmeQ785oH1Bl8v5T1x+VG4M8b5wuOVo2lQowGOaT5wcMoAI4xQDHhQ2QV\n4Y5pPLMbkMQFHIxSq3Qt8p6YqPIXdk7yxwDTEPboD039KYzYkwFyvQ1IjFlIZcFeADTFjY55\nAAoFqAOMoq/eHWmwqDkFt2OB7UuPl4BU+tI8iqV2L+FACKyso38PnH1qRtynkgL0/GmfeUuf\nvenpQ/z7FwSvU0ihfLMfLHeQfu0oZljbdhc8+tIzDzNxB2kbef50KvRTn6+1Ag2vgEsBkfdF\nNi+TjBOTgUNH+8G1vlzS87WZWwCcDNPoKw54zlgSCtBTbgCmnC4Jypxjbn9aFUFcEHI6mgBW\nYnHODnFL82WxwcYpI9jKRg565o8x4wVHzqeS1MeoscaPJGdp2jIphZ0XauOW5NOi/dp14PND\nAlkydyjsOtFwGP0VmOD1470bQuWByWNOkAZuuAOlG0FvlOeOaAEZmYFVGR1xSSbWXLHGO1Oj\nyrE54PekMf7vdtA/2vekIRdqqp5O7gikUGTcp+U54+lPVmZQdu0jqKR5GwQE3E9/SgYvlljg\nHpzj1oRVd2ycUuWOCq7QB1pPl2sQfmxQA7y0Vd2d3NIn3TgHLE0jgKu3P1HfNL0jwT8x6EUi\ndhGyCNyjNKzfMAV+XrRt3R/MfnHeo/8AWduPXNUgH7mwV52E5OaY2W5A3H0peJAPm/Cli5yp\nODUj23DkMu7G7vRzllYZ4/GmxrtI3cjPJ9Kdt3c7s+hNAho+ZQADtHrTmPlgYIPOeen0pCG8\nvBP8XahWjZsNyP4frQUBRAGdBgMecmmy5bYeSM9aeVBkCjrjPFG7bhsc9zTQiNWV92DjbzSK\nzSx4AwAOvrQON2UyG6sOtKsca5Vec1QxBIAVL9OlL+7C5zuOeMUKdgwV3DoGpMgrtxhgeWoE\nwOOHJ4Bwae7HdnG1cUjcrhflbORtFG4M27Hy4wfrQIUnbgscDHPvUa4ZPlYkk0O26NzjdjpU\niuV8souCRmgBxfLDB+UYG2mZ2788Acg0rOFVmb9OopnG0DOR156UAOyF3HG4Fsc9aMOyHjAH\nNICNuMn3pVUcBSS+c8+lACeXtYLjczDINK2BgHjnpTcdSeGB4NK+13VjxxgmgAVvOmI7Y796\nXzF8sKBkZ6elCxqyt3K/dNNVMLlR9akGGwopw4PNP2ho8BcLjvSM0bKMDYc4NDJubBJVB3o1\nGLHJ+8O4fw43HmmRtuZ8cnpmnKSuBj5lNKBzkx4JPWgTG/wYxyD370sm9tmQAueTRkM2W4GO\nMU1VGBh8UCH/AHiQTtTPy0isygFkbHTd70nJRienv3pWUkId+F7CqGNkX95v27e3XrTnPzAN\nnbjNDEqS5IYdM0wN95iNyAc0ASKh8zp26mmbvLYKBljwWpT82wFiq/5xSnGGQn5utEhDWUxz\ncHDY4zQq7ogQed3NP3fPvYbtoxSBtzYVNp689KBirnzCcAfypC26Tk8+tMkkHGA31p7KzFdg\nXZjr3qQEkU43gkru6d6V1Xdv3HA44pNxDevFKV2jB4GORTQWEZAsg27sGm7W6g8Z6U58YATI\nwM5obKFGZd27qKQCYDbixGfalP8AAcZGMClk2x5bbgnjFPizIuTgJ0980IBAvktj+JxgU3a6\noSTgA4wKZwzEF8jOM04Da21Dgk96oYMSu3hm9eKQRsFLBMc0+SQu2M42+nSkGW+ZSVz3J4oH\ncjYAnAO7jhacpT5UKfe4Oe1G75mGBkDO/wBfak52nOMEdKRIi/LvUnK9qJvlhTHHPOKVV2qe\nM8cUKAO2SDjJ7UwDaob7pJ6HmlkA5AyFJzTNx8z5mz2oCndjOaZI/b8vQFeuKarh/lz3zjtS\ntnzBgcGkZhuC4IUVIx3PJT7/AHpFwB9/J9KFYQsGX7vekHVcjKseTTAfGxiZQo+70qOPcm7K\n/OxyaUv8+RwqniniYys2OBmgZHvIC7F78in/AHTgd6TG47lOKGBaRSvI6fSkA0fdIXhc4wT3\np3zKGz8gxziiRdqvnqD0X+dEattyeR3z3oFYI/vEMMjGcUxvkABRiOwBpwYMWYnBxgCgq7Lk\nAsoHzZoaATzDvYNnDdWp64LNtX3+tKw3MshO4gcU3eFAboW7d6BMQqDEFC4Oc0ke4tlWVWU8\nA08KVU/xH1pGXdLwvykc0FEiL/rG3AZFMOCqbTx/FR5IXaMjBOMUrIrLtA+ZW6UhikDg9Rjg\nUv8ADx8vekDbj83G3gU1toYgHk0hWHAjcT1LCgrt2nOPUUmxVXhst3NL5ZySzgjGR6VSATYr\nEsTjHPNP8vcNw575zUbLh8kg8YwOlCx7sxtujyOMUAyXlWOfWkZsSbduVI6Ugz0LAnpT5G8u\nUKxU/LxjvQBEwKKFztwcg04tlR1z/P3okU8fLhS2Mdead91SCN3OKWpPUau5ZPmOfT3p65fK\nqfcjvTfkZlCcEevanq4Qsu35/wC9TGHmLkHGQeKFjR23EEhT69KRX24wvP8AdpzKGbrjPXFM\nYR8szHOPWkV2YEsRimTEr5aqCQ1OCbc889qBCxyBlYNnOeGpfm4BIwOcikK+YrIRyecipTho\nyMYwuBQBHIBuU48zJwPaky0bHLbznGKWPctsDwHHajcvDYJYcnijqA7hnAf5T0pZNmGAcDb6\nnrSN2J+bPJpm3zdwEe1W9aGSPEZ3FPvD2PakUptbYWT3PejO1tu3cMdqGY/cIqikODtIoy3T\nt/WnNJ+8VXG1O4Hf3pvEikYww/ipys83BKnaMClcQ4DLHHzR+tC57dc8k03yyu7hlAPFP2Bv\nmY54/X1pCHsvzEIpI65z0pnKsWGC6jgGnqvlbcE4zz70rL820jhj1/pVEiKWVAVHzNzihn53\nAZIHSn8KAQMdsGkZUVcgYYfrQA/Zuw+RnOM+lWVQrsYPj1J7/Sq0a7mffnaTwKt4WQJF0Cjg\nmpGSKzJlXOWPzDHepI97bcEAE8tUO5myyjGOAWqcKdyKOARVDHybW3MDlumaX5Uj2rwO9DL9\nnyhXLdeKMmTblNoxyaYEExUqQFwo6f41VnGWy2GZuc+tXZMFTt47GqUiJuBAOfSoAqzLj5iu\n4jtVOTKqcnhulXp/u5AzuXkVm3DbcBQQAOhoArsS8ZZvuDjFQM3lLtTIDVNt+Uk8H2qHGWyT\nnP50FEm1dy4JJA6Uu8NIVX5hjJBoZm8skja3b1pAWVm2jg8GnsKwoIf5gNsnTbmlIPmc8lRn\nmkwwIPR+nFKuGyoHzf3u1BSBcysWJ6DIpWIjQb/vnoaZ5Zi4z83apjGrp6sOualjI224+blu\nvFSbzgNt28dc0isNv3OvGaPKQocthh2NMVgVWGSSCx6imiRvmLd+1KzBpQACD3JpVRFkU/Nk\njk9qB6ieWDGExuY88UnzyMyfcCj0qQbNpK+vGKRkGw5BMqnIpANT5dpBDLjBx1pzNn5wuccY\nWnMqL+8UEc8rQzBchRhn6CkIi2sWQr8pxjNO+62By3txUg2lTx8y/pSNEuxWZsljniqAaUbu\nPl+lKibGYEY46mpuRIuXy3v0pV8wneFGGO0g0gGJHuXIIB9akkj2qpfBJOAfSlkjRVY5zjjC\n05WC7RjIxnntSCwi4RVBGZAOPQ0i4VlAjyZB+RqZcfJuONvTNG3zAAMgg5I9aRRDHCx4bIfd\n69qsSx7WHAK9BmhpV37mwF6DBprSKdzFm2dOKCQbaqkoCSD0pGxbx/eLtncG/pSw273jqkUb\nS/Sux8K/DG+1x43AaOIHJkl4H0oDY5COzmuJgsCsNxAPFdp4f+Fd3rEgVnaODozEYFer6F8P\n9M8Nxia9ZPOJI6ZyK0b7W41fyrWMIi8DK8U4pkOZh6P8LNM0RYpLuVZfLx8xH6V0j61Y2aGK\n2t1J6BsZOK5+S+eZyjqxGc5J4pFY+YGCVpy9TPnLlxrM80p2/JDzhR61TBlMm7e2COc0vB5x\njJp24MQM5zwapRI5hGLRAFvmPWlZB1zwRmkztfaxBA70kY8tsA5Y8j6U+UVxFXbH/d5pyFVY\nru+frSt+7IGcqeoqN24YqnzdKYCnHzB24B4xSIqk7lG4+lOB8uPO0EMMnPWl27gG+5uGNtPc\nlirGrMCR05NK2ZM7Rgnp9KdHGY4xsHGMkUqtuOQMHoKBIZHnawAPFEYk4O0Zz1NPfcw2jqOp\nFNYHy1G7v60DFl+6xUZYnNJMW3or+xNMVgoPcg9qfu3tkjHHBNK4hfmt2B5YNzQsiyE54NNy\nyqn8Z/zxSPlgSVwO/rQAjZUjby3tUq53ZU9qjjyVJztPUH2oUiHOT8p71QydeFGRhvc0wPls\nBsE0FgCpPp3pEkEaszcrj06UEijy0UlFyR/epQwcAD7/AFpyyIoB27uM5pildxY8nHK0D6ix\nK+5s8nGT9Kc0y7kjjQ4YUcxqNvybhzmhcxtnHzgU0A1v9YwRjxwcULn1II6Zp2dsO7HU0gyV\nAxls5/CqAPnkG7APGDTcqY135LU4gruYsAue1M535OAMZ5oEJLsjUZBLNxxTWU7UJLE5xgVK\ny+ZtwPm60KrPwGAGcnPWgQxsbWCk7h60SBJGAdvn64FPDIc7SB6GkjYISx+m/FMVx3mN5YbG\n0dADTBG+0nO1s9DUu0TAZ7UwZkBDckdKeoCxqzcAAHuacp2qPQmmqCW2jIXuxpwVZM7hhR0o\nASOT7yhcE8ir+n6tNYSI6O2c4254qk3yjKjJFOUpHglsA9c80nFS0ZUZOLO4j8WW+oiOG5GW\nJx83rTNQ8I2Opxy+Vh2ZSTznH41xu5Jm2sPLIHB/rWjpuqTWrAeZkL/Ce9csqVtjsjUXU5zX\nvh28OG8wsM4DYxge9ef654ZihYAjac9v4vevo6w8QWWtReRcoI5G4BYDGaxPE3w5SeMvbhWV\nvvMw/lXP8L1N1bofMmqaSvnLtX6FRxWPLaMuws3yL6V7H4i8FT2bbyrmPu6rx9K4S/0FSyLJ\n+6XPSr3EcmzhowwIUM2AGp4kDKfm2gcFetW7rS2jaQZ2qpyrYqn5bQx4YbR1FArD3dViXYGj\nY9TTkU+XI4+6e/rVbzXZ87vujmnxOZ0+U4HWgCfJmVFAAHUUsySxsD0XqV609lVdpx1XHHrU\na7gv73Ix1z3oC2pFKu6NGP3W52jtS7nXIZQFbvSrGqxnCny+oobK/KAW46+lLUZX+8CWAYDg\nVCyN5R+Ybs9uuPSr0e3aeitjGaryK/yk4HH3vWmQUVjzKTjB689cUqxjzCC204zUzAtyfmGe\nSDzTQv8AD78ZqgIElGzcy5I4pdjLGh68gLU8IUSHIwe9IqCOMs2SSeG7EelMBFkUAsWwUbpj\nvUjTK2AwIbbnNNLNJtJADDjjnAp3mSQsFMeQeFPY0WELErNGY8ZH3h6fnUkbO20sdpzgYqPz\nCuVIwehpUPmMRGcj0pAIdqzSZkB9B70qyCRNxOWXpihWO0JJswp4bvTrcoJHVTnd0OMVaJaL\nFvdYmD7WwB8xHeuhsbtJ9mz68GuWa4MIGH+YHG0dPrVmwnaGTcW4zncK0TMnE9L0bUprNl/e\nYHUYr1jw3r32j7PEZOvVugrwXS9QEZDHcwJ644Ga9E0LUl/dhHzjov8AXNdMJanFUi1qe8JC\nAu/O4EcVV1BfOTK8SLywHpWZ4f1s3FuIXbeo4B962WhClMkhm7V0kqRl+YzY8s8HrmpDu3Kp\n+960txbmF2A4UnjbSbW2hT165rCxsPIlVgOG9TTFnZWbecL06U+NjyTxTdqsy4OzHr3oELtE\nbjGBkZI/rSrOGUtt+XoB/Wk6/KDx60oh2y/7OOlOxSFXDR8Dk9aem0qMghl70m3cxdeFxin8\nhRnjjmqsU+4q7n4PHORineU20qxz3pqLu5+7TzIrYw3zUkQNWMbOORmpFTa4IA54FLGzNuUD\njFH48/yqrECRncpDH5gasFGZBkDFRQqkj8j/AOvSyMyA4GV7AU0Il3BVKq2SOTxSN2CY2MM/\nSmxud5BHbrT4W+YrIvI6baoVhfMjEIVTuAPenwsY8nOc9Kj3LGT8nB706NQsg2qcYzRqMkWQ\nZ+Y/NSH5QSx59Kb5gyV253cZNOaMrtUjdtNMtCDay4ZgpzU27cmMDf60zy08z5o8bugpDHIu\n5SyoM5JPpTESJuG4nGMUw4jkYhTjtTJMqxYjAPA96dJktkgj1pAHz/3hRR5IoouB+l275RRz\ntINCqdvJ5pzZ47V8iz6EZtDMM8cU9lHSlZhxxTXbbg0CBl29OabyrYpXbavBpF+Y5NNlD1GO\nQM0si9weaRCV9waUfepCYxV+Y56U3nsOKkyNx5pS+5RigRFz2pygqoIGakUAc4qNAe9IB5HG\nRTdgVSO/Wnrxjp1pyrh+eRTQEbJ09cZpyr83WnM3zcChSE6nNUALluO1H3flIyaRj3NH3uRz\nUsBW+VcdKTB25ApN2GweacN3OOlAAzBVzgUzkgY6UqqDnNC+mcihiQfw/wBaa2dh96a/yZya\nj8zqCaAJFIVck804Y25PIqJcM23GaMiMkL1oYkP3FWz/AA1IpLYzVUyEYBNSqrLgbqRRLI21\nutIxyMnkUxgcUo3dP4RQIVl+U4NC5B9aVW9qQ/KTg8GmCEDbWJwcU5cbcjFH8PrTQpVM9qbB\nitkdqQ/nQ3ODil3EYpAKqhuBxjrSr96k3Zz60ijaM5zQwZMPm4JxWR4ytReeHLiMY3/3jWos\ngZgM80ajGtxYyxsuRsPPpxQwPgnxvaG3v7sA5CyMM/jXmGrwtMzV7j8T9PC65dKOBnNeLa1J\n5MjAcv1xVIbOMvoWZip4bpWJdfu8c5I44610V9+8dscPWBfwusgY/JurQzZkzYlnzjp+tZ9y\nzDcVUBD6da0GUmZ8KQAf/wBdU5gSu0DIzkNTJMm6j8sBiVyewFRzw/LkrjjrV2YpHIFYbu+T\n2qrJIJN6fNtPTNUiCpN95Tnp1qnOy7ixTd2ZvarLKFUKwJHTFMZRJGQo2gdc+taCKcm2NhGq\n/L1B9KZuXc2QUbH3jVnepgZ3xw2wDFQzQ7WyOSehoAosP3ZVsOM55pWO5VCxENjn0qS4ZWbn\nhl4PFNaF/MGf++c0xkEj7cBm2lB2qAsVPB5P92p5lEe4AAFhyPSmSRGN17Db1phcbtDYZD0+\n8RTJQu4gNjPPNTJvjYhQMYz0qEyA72IyRTAaC0YKhuvXFJ5hI29DngUrZZio5ZTTGc7GwuTn\nrQDF+aSRcDIHU0yRir/KMk8Bu1OXC7QCfm6+tKy4jdFBLHoBRYCHDLkn5TjGKbIGDAY4Xtin\nMjH7z5c4ApZXbzCDy2MEiqGQyKzMDjCjmjd+8IHpT42b5gRuUDvSJtZsIcHHNACM4AJ+8cVE\n43SLgbAwyfepGDHCqmSTxTZkKsVKnePyFBIhT5SBjy80rE84bAByFFNdTHGC3C9yKeCpUY5Y\n9DSEMWNWztyH7kmlfaI8s2TnHFCqNpbndnBFOwjHaRwozwaEURYHDAkgH7tKxLZGMjrn0ojE\nkysQQmOlM+aQYbjHJxUhYaSArHb83QY71J911y2445puVlC7flwc0qqBIeOW7+1AhrKeVHIY\n/epszFjtU4GalhZWLDGecCo2QsrKeOeMUwIj8qsVUnnpnrTvvdBt2mif5sbQQowDin4LR8N8\noOcUwGbmQZJBOeRQzLuIxlGHLU1mRVZwOTxSsrNGBgL220wHeX12+nFIW+bHLcfeoO6IFFOD\nSMXGApwByaQhskZY8t+768dqQqARgYLcc0kjFoyccE05pM4GecY+lFxjHO0FSc4PAoZGlwRw\nV55pcc9PmoZvKj2v9/0pgH3m4OzPehm67m3jOA1NjJC8LjmnMpOS2NpoEIVdYyu75+2KZHmL\nAYZqRdqkHJZu4okb15qQGBTJnAwPemN5azJycjr7VK2RGGJyoP6U1s8jy/lbkUwBotzFtuSW\n9f1pdwbcBz70GQhsdscCjcV6jLnliKYDUxlhnnoDS7WxkLz0NC8A5OMnOKNoZt2fmY460AI7\nH7wG09yOtN+Vyd+Qx6HpUjcHaz4dT1prFj8xP6UAIrF+AeV60iSB1wR8jHFO3AdsCm7VYe3p\nQAo2qhDHeV4ApI8KqbxtLcgUmQzZSMkr6U4q7SBj/COtACbTuG5dqjp709XKwtGBgsc5ao2V\nmGc575pW3My/SgBArRjbnJzmlZdzdeByaQ8M2OCBn60pUxvuU845zUgJtRmOMgnkE0Lnc+5e\nFOPrTmKyIOckDtSbjIBtHOckVQxuVXdknLHNJtcLlsKPTvRJiSTbkBuoHtSMBNlm7cUgDJUg\nKuB1JpSwVQ2MPUiqFiwDkd80zchbvzxzTEB+6fmzxyRSRxt5eUxn3ob93HhORnikVVmTjKSr\n1NAx+BuxwGxyw6Uz5V3fNubHSnKgKAA7h/KmSFVb5TntuoEJGoAAK4PXdS7sqxxjvxQynJXr\nxxQedu35eMEUAJs+XJ+VutL/AMswqjOTkmkPzKhAyOnNNIKqQnAzjPvQMkO/j5dvOKZtCMyj\nr6inhmEnzNuAHIprLhhk8nmgQ1VCkheH9T3pWdztQgYznctNdjLno2P4adJvG0kj047UDBgZ\ntxQ/dOCMU0fIoYHDqcc80bDgtu5H5GnTMSFUkAsMnA6UgEWQ5Xsw9O9G7y2POSehojmLBdo+\nboTSn7zALgHimAgJByfmJ6jsKCynaCuMfrRMAsKhZOQckevtTlmPL7Oo9OlAMbIxVT3X+9jp\nTpBujUKOOvNRoX2BAeGOaVg3mnvjvQDDc+3ATcepY9qSRt0XyqSMg8UrOzYGQp9QaarcM2cb\nuKCeoq4ZdxOMmj7uWYhsU1l+X5lLhfSlVk2ncOOymgYM3PPAYZwe1NYLGo3Zcnnj0p28vheC\nB1zR5iIrKVLMfQdBS1GMVvLkG0bR1+lKrAMehPXFG5W2FRkj+93oGEDtkYPbFMQrMRjBwrdR\njpSI2ct0PTaf50ifIwMfGRzmjaF34IBxQAKAuDjODwaXcVUkjJ6Cmx/eCYJBFAUsGUEkjoKB\nincNhUZccn6Uu0feySeooWZeOdrDg0sarvc7sgDIoAaGypyPmJpy7SwXGB1psZ8xWzwRzmn7\nSqjDD5v4jTHYbGS0xbOcUbWIbIPJzij+IAna3fFIuW3fPx60Eis4JAIpsjHkNwKVY8FUbAzy\nZPalYHkhgU7N60wGr8uSOnTmkwyqMsBz0PWkQNIrMSQvXBo4OGPzHvmpAXlv4s80rAljhtrD\n9KAwD/Jj19qRcYLFst3JpoYrYYBvMy460j42hgvzZpu3jCndk/lSwnYzsw3ccA0gHOq7gCee\ntIzBVYbv3Z60LFz6+9DbS3CnHbJqkIB8qZxgYwBQu4xkDAXHQ0rMrMNzfMPShfl5f5gT0FAC\nKcqvHQdKWP5VJA+foPamxgCR8DIoXO0so+b0oAcPlVmIyFOKRscbDyx5FEhKpj1O5qCAJhhf\nlxnIpCBmA4YYOeDSCXMJ4yV4pRlpAHx5ZPB70D93vGPvcCiwCbiGUAFQRUiqMYByaZlwNwwS\nBik272Gepp9A1F3Db94HHakOFbA5LCneWFbJG73pFHmAEfw9TQhg23dk+nNJ3Tn5c0Ha3Y/h\nRH8sm7HGMCgQvCx5B3BjmhVG5sp8o9KRQFYEncv3T9aXyyrNHuyfSkFhkY+XIXnrzTlZS3mE\n7RjpTZflULuxng03byP7gHFAmCswjLtyM9fSnlRlAemOtIxAiTkEZpX4c7hy1IY3lYzjnmjn\ncBgDPNOXauSV3HsBSKzeWWkG3PC0DBmCxlkXkdTSKdyBQcsaUfKDlc8YxSFVAU7So6EUyQ8t\nmcbfvCmqVaXEi7UHUmpI8bWUHaw70wrEyDfuYH1pgOk+UjjPcfSmL8uflzk4PpQ3+tCk5Udq\nVgPm2cgnhaYC7SpbJ29i1IrKY1Uk4z19abh9rcYUdSacBlAGwXzkMPSgBFxtYDhWNJt2xKEP\nzbsZp0mPuheT1oZljj2g/e6UAKcKGY8npTWVZNpxg+lJJGVKseAeMepqQMG4I+bGKAGF2VDl\nD17UfwlX+XJyG70iybVKA4b3pWVnUEfNt70AKq7t+0/NjAWm7g2E3bv8aeyAYfdnd1FIrqoO\nfm7A0hoT77btvyLxSSfdCKxCE9vWnZ2xhdhDZ5xSMPlKDnuCKQWHNkLs+XaOpHrQzFtr8Kel\nIwEaqwOR3WkC+YwZl4HqaYWFxzt3Yp8fTjnnFRlUU4P3mojU/MV44IwaQmJG3yuAuQvenIw2\n7lOfwocFY1T+H2po5fFMQ5VLABvmY85phc7gMZIp0LNtZW4PrSov8e7GO1MBPlUBgp3Z5pC2\n3eh4JGeKPMwxQjKtzmnL90kjJ7ZoDYN26HIGXU4/ClZB5eOFzzzTCw2E4Iz6U4IG7ZGO9Aw3\nHbk4HbANHVS3OVGSaRYRnOKNwXhjg5/CkAq53cDJI5NDBI15zu6c0i5YgA5PahsNIcjcVPSg\nAbPygfexmgMu37pJ7mlwCwbGCOBSSZU46ZP50xoVVO3IbK01WPmDnJHag4Vim7YvXFO8vbjc\ndufSjQYfLI5O7Jx0xSfdIQr2zmhWCsMKSKXcecnKjpmoJEXaGzjaT1FPVwuNwyeoNNeMqQfv\nF/Sjdz/dI4IqgE3N8x28NSM3908elKXJ/dlsUjZXHGPegLiD7p/u/wAzQylZAevGSKXiPEhO\nVA+6KaZeQ2B0oBilGZgoYovU/wCFG7y1ZBzuO4nFBjZh13A9FPakRdowc5z9aYDn+Zl3Bcdq\nXaG3KfldWxgd6Zt+oycZI6UGNvNLg5BGM/1oEOJRScZLDjFIxKyAP/EOtJtO3ap+Yc59aPM3\nY5yPQ0APVlI56ikEZwTn589BQu6RScYPTjtUfl/MAxI96QyUBt2cDd3FNDEK527QTSOxj+Vm\n+m2nLvYBSQe9IBNpzxwcUCNg+3uemKSPLSdd3NKq7WJGd3NMQoUDIw2R95s0i4f5ep6j0ojX\nKncxweTSxtuypyBjgL1oAavsfmzzmnbzIrH7m3v60isFQYGPbvSKyyN9f4aaGETE78DvzTmH\nnYZWCsv8TelIGHlsVOx80BVk7dsmgdhVkPmHvkUbjJCOMHocU7PmqvYDj60kjLHnZ8pPGTUg\nG0fgOaezFZNwXt0ppjPkr91Wz1o+dQCwyzjA9qQxed/H3DyTSGSNJcnrSAtGCpbdgcCnKqKo\n3j5j1qgEjUfOMbt3NNCrJH8vDKfwpcbfuLvJpNpSMsoBXof8KZI5mRpHI429/pQPmUMzZPUU\nL3KcMf8AOKcMbRldoXqKADcJtwI28dMUbRtRdgZPXNJxJuYj5v4aVZG4YAKw9qQApdppGT5I\nyO9IrM68jp1FL5Z3FzyxOSM8U/zBM2CDs6bh60xDJHEihVG38KFyvzHhBwSaUAqw4wV6UuN7\nHcvPU88UDFLNhWPzDtxRIreYFT5SD90il+WUAvlU6A+9MLtG8uTljQLqOUncAWwF/KlXMkhD\nnbzSKpZMYCjGfak5yzYJ5xikwHKo3fLwSaM5XlsHsKbKwi4GWZuuKUxo6g/d46GmAu0PMqZ4\nx3p7jbnYc+jKaamGGPb7wpG+VRg7R6UxMd824M3PHIpVz8uDk4yeKbtzuRn4YfLQp6hflPoa\nQhV4VXzy3AxSYEb8/NjrQFCsoZuP5U5SFy2MnPIplIauQr4GT1AqQYUKVGMj7tJu+bGcd8/0\npdoHAB3MOeKYmSLmNTvHzueM04qD8i9c5amKCq7WbcoFKMSIoRsBuDilYQoZpWb+4OnY0rbY\n8EAluuaF2ncF5CjHNEYbaPmGDxTAcd0kacY706Nhv+cYB4H1pR/EBxxkZpqoxQbyMZ4PvQBN\nGvysu0u2c5z0qZcbQWb6HHSolDcoGzu9Km2s0oCjdgY3VIieOMh2BcMPWp4YxuK7iSp61Cqq\njZ/vH5vardurCR2jcN6j2p9BCLGyBg/ztnhvamnb0dWDdqsRx+ZOdxyCOFojjDArv27eu7r9\nKYalSbduKg8H7v0qrGxkkP8AEy98cVYkbHAHzDioGEka4K4Tr8tSMz2fazZBJztHtVGbPzDd\nuUGrtxuX/VnIzzVF8bQGO3cM0DKkjeW3mfcwMDNMWQXEYfA3qaSZBJJjqKTbtKgDH+1mgslV\nt8haU4XHWm7T8wJ5ZvlpWUNlWGOOKcqoTlnO7pQUIrBQSATjgtSED7obBPNPwY0I3A880u39\n3u6Dt60XJemo1vkwW5PQGljk4O89P1pkhyynqf7p7U6RXE2CueM4qWAjzFtoJ2sD90U4ktN5\nitlh/DSsFkJQDAXuetDKyqCvQ8ZFAXELbYzsOWJ+6BQdzc4JyckelPGGYhDsZRUZ3Nkjrjk1\nQ7j0Zo85HyZ4yO/rUnmBVPmHBPRsdfahTtREflPp0pBGuQzvnBwqevvSYCGU4TeoBYZJHaja\nIcvu80Y4HepFl8tiX5XoajZmUBV5Jb7o7VIhI4yzIQMBv4TT2i2M64OVbp2pfLDNlTx0IqVl\ndpt5O6LGCtUBAoDYZuDjOKm3COPc5BOcAA0eSHVUxuFHlx7MYCbTwTzzUgPWN1wSoRT15pQh\n69EzkLnnFPKbmDMcSYAx61EkOyR1ZgjLyM0DsTberOAy9Qaa7o0gbkZGARUUkwUDOS3t2/Ct\nfSPDd/rUiFYmG77oUZB96BmRtaaRIBGxYnjaM5rp/D/ga8vpAnkyMueSBkj8K9P8F/Bn7PEL\nzUyY9vOxl/Iiu4+0Wmk25SyjQNjHHX60akSOT8M/D+z0OJLmQK8oHLEdPbHrXQSeJBYx+VaR\nqiY2htvGaz7u6ll3bWKgnmq/l7lYNzjmtFHqzGUh0zSTkeZLvIHc1DyygvgjOPenbSqF8cU7\nyyvzkjbj7taGdxrRsqkMcDPUipPmchfut/SmNx5b53H0H+FSuuWVxw2e9P1ARj93I4YUeT5Q\nzkAjnNK2VGPvDvjtSFf3fPK0Ei/LJIw27eeT60jN8zdiBxTtjSYwwUdRUbb5nCjkd2xVKwxZ\nGVsBTk96Zt3LhRyxxRnc5QEAjjNPYllVYyBtPJqBiiHdgOMhfQ0bWOd33s/lSKhZWPT5vWnM\npaM7eH71ZLBJP4s8qMDB60K0jqCAOucUir5e4upJ+7xT4Sq4zwBSGhsbE7iRt+bpTHZQzEqf\n8Kdu4J24wcg0rSMIyAPmYUxMjjYcHP8AwGlRi7bScY/hNOWESKCzbSvBpu393j7xzSEiXGW5\nIQDpTNjyZUvznikaZdyq527einvTiWlXpg/p+dAyRVCjLbgR1WmcSKdp+Y9Ae1DKdo3A5+uQ\nadGRGxGzjsfSmIZMx2qGGTUiliuGGB1HFMbcF4GTnpUm2X+IfKwxgDpT6ABA2/NyDSsy7QhT\n6UKPKXnG7HTNNVVVckk7u/pSEL5g+UMMg/dFKuGXP8XqKiZWaNe+Gp0jHaQqkf7K1QDm+UBc\n8GkVRuI3Y/2s0n8PRsEd6PMB4K5GKAHCIbsqcZGajcqmAclfSlWRo9pxnPHWnFSzAgqB6CgQ\n9cq28D5sYAqOOQKCxGATg0/zBI5YtsA4zTIYxk55U9KZIEKwY9x2ApWZlhUgDBOOtOVduQeT\n6Co8Exge/TrTC4m3Cnadvsaft2nryKGVVOQO3ekXO7IOc+lAwZiqhPvDOaTJLbQDjHT1qWOM\nK21R+fWhAI2xu3jP40xBHHu27iU7Yp6xbFZ9+SOlOZ8Lj+VNJLHaOeMnFMY6WZfMTncMfNnt\nSKd5Y7x7Cmhn8svtBGcdKVsFkJ49QKNGMsGSQRoByc5yO1dDoHimS1mWO4BkiHXcc1zEQKyO\nCeO1TKzrJ0wCKxnC5rGo0elXFlaeILaTykUxN/DXmHivwAVZikW5FHXbzWxputS2MoEbfxcq\nDXc6fqEGuQbGAV8YK+tcc4uJ1RqJnzRrWho0J3xFQrYHHFcrrWnrJMqviIheABX0r4i8D21x\nHchF2OoPA9a8e8SeF1hmCOrGWMYPy8VKkaHl15Y+Tahh94HnA5qs0ZEK5IRSefeuyuNBMcMp\nZPmxkH1HpXMzWIikLOu044XrirEQq6hgEb7vRaetwfMYPyMZ2mqkyyQyJhflPXAp0kjfKWOz\nJxjHIoAtSsUjUEcP056e9R7/ADFYp/AOc9/em7WLKDIpT+9Sqn3lVSAe9A7BJGIVBALE801d\nsyL5alyp6092ZcN2U8HvUknH+r+RSckj1oEUJP4j5exi33vSmTZyTJy3UAdeKuSLIysxOw55\nBGajlRmYBVBbsaAZRZlk/eDILcH2pyrj5U+cqM5BqXZKGwV2ofvGjyHjHyjCHIqiSDaFhJTK\nkngjrROzYRmbdgjA709o2ZUHmfKvOxR/Wmx7m+dEyM8g9RTAmkbcwb+8MZpFkJdPLAXHy5pD\nJ+7BK4O77vtSKxmf5QAnQYPIpCJVhXeSRmTpjsaY0YkO0naCOo7UCQmbOf3fRuPSnRzeYwyN\nrN3PpTCxGmFyw+bAxT4cK7Ng8Lkc0jeWrPu5B4yv86RpPMUfLlBjr1pohpmhZzGSZZDI8aqO\nuf0xXV6Jqvls6jcy9M56fhXGwNIOWZQrHt/nitTT5GiZ2U7cHkitYuzM3G57Z4b8QPuh8xtq\nA4H+NevaVcpeW6uzlpMcYr5u8K6kssitI+84zt9q9h8O+IohFthcsVAOMcGu2nLmOKceU7W4\nU7AGHPXiqbEbSQNxHNWILp75C4AHH4VXkzHIAfvHjFaNCjIWQg/OBuJAzikwD947R1pwURth\nSMd/rTI1PO7BbNSXuJkMvPy47VMofYDuA4qNlLMoCgKCCafJhpj129qChpVtww+R6VIv7xWG\nTilbYF+QbmHpSmN8jA6jJx2qWFx3kjywVkKsO/alXEjeZgbun1qPzv3eGBK5/OpU2KwxwcUC\nY5mbbgt+VLIp4CD5/WmYBkHen+aULPzgdaoBJVdHAA+ppeJJMqCp6Ggd8/N6fWnIxXLD6EHt\nVIljt3lsVfHSnhWfBXjI61EVPUfMTUinamC2KZJIpK5Bxnu1Ju3SZTrjBqMxkAMB+dWAysxb\nGwEcUABYLhc80pk8nLHn0owAwBG7j71C/M2Dyue9BaDc+Cxky3rSMxkUh8kf3u1DZWTIUECl\nOZIQRw2eaLjGcMo/iA55qXiZQwyFpszfdwO/NLuXnPHpjtQA7y19TRS+X/00NFAaH6WgjA45\noxnk0kjnjApjArjnNfIn0CJG+bimOp24p2/LYA5pUl3Z4wKoCPaFPPPFLGD6cU5vmXHSj7o5\nNDANvzCjd7YpTz82cCkON3WgQ7C7eRzRjaowM0qj5ckUuz8qAFPA6U0qF/GhsevNGcYNACcL\njvmnFetNJJ5p34c0AJSsuPcUc7elLtJ7UxjTg8U1flye1ObPbg0qr19KliE4POKCw3DFKvzL\njpQv0FIB6tt46iopGHO2lZuearSFhn0NADJpN3em/ewcgUvl5HTntQsJ280wEeQr0/SgvhBn\n71SRx7m2mka3JfJHtQxDY4/MYd6sqpwM0sUIi96k2krnFIYwoV560MDwKex7UgHcdKBDOd3W\nnYGDTtoLdKUqOlMYxY/lJHFIc7RxmpdvoaVcZGRimJkTNuUDHNQ896ssATUTx56cmkIYPfpS\nYO3PJqRE4yeBSrE3Y4FAxsf3umKmjRpYpeoIU496FU4HHNWokLxso6sMD2oA+QPjbYm316U4\n+/hunQc18/8AiCFTNuA4NfT/AMfbNo9Ukk2YLfL/AD5+lfNviKP5WUAdMbqoTZwF+u1nPbtW\nJqMh3qFwzkdDXQXG6OUd1x96se6w0hk2jI6N3q7kGE2zdufKtis26+6WbO/titu6tBuDqc+t\nULwqrBc8f3qaAwpHdWLEAjGcEd6pNH5iy/NsJ6CtC7Qhio5U87qpSwzLMzoA6MOvpVokr7is\nY4+VePxqvcRuse9AG3HkVY4beu73IqOZgsYyMAmrJK0yiSTaP3YHzMMVDIu3duJZW6VPJ/x8\nAMcDsc02SUMoX7zdaA1KXCqJFwADtPrUU28SF1bKDipWKMz4QjAzVdlYFT2bk+4q4jI5ATB5\nbE7x6daNpnIZyU4xtpzMVkU4yT/EO3tTpI9sm9SFXgEGgCNcW+4eZuB44pNpyz4GMdKWTy1V\nhxgnrTI2EkZRjtdTwKoBpjAQO3ygnmo2zCPlyc1NMGfAZflPOKZLGT8wOQO1JiZGuN4Z+ABz\nT49smFJ4PIpGbzCCUwy8j3prPukJUYPU+1AiNpEaZiSTt4pFV9uQQM9c05GTLdt3U01WVUI5\nJz1ouA1m2jBOfcDvTchW2hPmPOaeqRM3THcEnFJJIDDl85zw2OfpQgGs5wOcMDwKYxeNcucs\nxwBSH5QWPJ9Kdyy5V8j0IzVAK3yr5f3x/Kom8xmVCFUDv60silU44Yn1pJOi4cBj0z6UAw/5\nZkDrmlbAZScdOcUm4c5GOwHqaXywGGeXH5fSpFcZtHzNuZRjjFNi3LgHDe9Snd8zEYz+lRFG\nbLfdP60DHM3RQMDnNRjC5A/OnD5omI/HNGQoQDjsSadgCNQzbl4UDmj5dpcH5v7ppvmbZigO\ne+KG75XB+tIAaJ1jyQCD6UjSKir3PQgU3cMbd2O9JwuMrkk8mhCBt0aLgAqeSfSjdu5Zjn9a\nGbzV2p1Tt60b8Yc9TwRTGIw3Rgb/AHz3pXzkdMEYNG5fNAK+9NKktxwCaQCc5wQVQcgnpTFV\nGU+pOfrUikHKnJXOOTTVCqORlc4p2EMVd0nUD39Kd8xk3YDDpupSEIwoy2c/hRhY2Oe/IpjG\nqdrFmIA70i7PKd870zwBUm0Ku1h19aiCmGMrGMjOee1Ah6sBggHfjP8A9akDOvRAd3UdxTlx\n8zH5uMqo9ajhQrlnOSec0AEitgYOQOoFOmb5lbO3I6UudrlcHLCm7QGGeR0NAhu7uBx/epWb\n93liFbNPXEb/ACNlSabsBDBRxmkAxlCsrZ3e1N2qu489eKkY7ZM4xShfnLdRjpRqMandicj3\npcN1JpT8xABxTWJ2kGmBGy7oSWcZ3U/nzBj7uKa+Ni5jOTzmmBWjyAcjOcGkMkLCJSo++xyC\nKX5ioAbk8FaRl/d425Oc0nz7QeEUc7qAEVwise33fpT1I4DHPGBQJgXGF69MDrUTK/mb36Zw\ncdqY7EkUmCRt+Yd6PMDsSx78UCRdxVVPu1OzwuwfUUhELSbVPljnPJp7SbmwPlOO1Jtj8zJJ\nFGcg7uCOlMBB8gKhd5PG/wBKIflWQEjjjFLDlV6cg54p7qqrnaNxPzGmgEwXwF5yOPrSMp3r\nvG0KMFe+aJNitncQhOB9aaqv5jAsGz+lIWoqr5jsuMJ1NHG7dG209BQD5IDbgfmx9TTV+bLB\ncfNzjtQNDlHzFv4h6Uz+JSy4DHHSnBgCTznNO8x9/HA9DQIj3OGI24CnGaTKlsAc+tO3GMls\nZzSIxUY/E0AJvaNSuMY6GhWKx7D8ynmjAUAFtxznFOjZmJ6YBxj0oAbuG3aBgHrTmULCGA3A\nkDPpQ5G4fLgrTF+6WYdT07UDEaJWdihw2MVHHkyY7njmp/JWOHKKQT701k8zHy/4UAJHGXZk\n3AIOfxoClcAjrwWprJx8vCk9qVfMbCM2AOlINhVjG5V3Y28Ck3hgdwKkNTuWYbh93k0fM0W4\nEYzn3IoAa0Y2nnvkGnsHWQKpyjDmkWWPdtbIHX/61MVn3cdBTBj02NNuGVCjkUjRncN+UDc4\no8wL+8wdw4bFJLjcwz900hDVHmPhfujimhflK+hqTcONpye9NVcDaGyWzTJ6j2Hkj7xK+tMV\nl2A43sfWlIfyVIO6NWxupGyrBSBz3oKBW2scDLY6UL9wnjkYz6Uf6vHPBPNKxSPcu3CtTAbN\nG0JVGIYN2WkZl4UD7pxSyZXAz93nd6+1HyxjdjJYZ4osMVgdpOflHYdajbbsO4Y+lSHKqONr\nEfepmV2gLkDPQ0hCfNsU4245BqVlKyghgGxmmMxmweirwRQ2WcHGKBit94gjPvTWxHyq5z1o\nLbJNy88fkaVmxtJGfUUAJuVZOeN3ApNg3Jv+bnNPTZtIY8A8UiR5xlsEdPegBVAMhC8N2zQ6\n7suwAYcYoX/WFscbeD70zLNHtIy2c59aAHEbgB27n+lDKMeg/u0c844bHIpNu2Mso3YGQP50\nxClQ2/BPBxSLt2tn6UM3luVI4bk+1KFLIcDIPT1pgMaP5Qv3W7UnysvIxinyHeF3ct04po3E\nH1pDFXJKqOAOtH3pGKtjIxQy4Uc5PfFH3cZX6YpCE2vvZWbjGTikbldo57ilU/MeeT1FN+Ve\nFGe30oAfzuB29utMbPll06Z5pzZj4zjPFKu6NQeqEVSFcG2jDKDnHIobCt0xxkjNMCv8rgde\nlBVtxMgy/cUdQHZ29BuVu1JwrZGcYwDSllChevotHAj3MSxBpi6gpdI22j5u5oEZ5wcqOcUM\nWwrf3uNtIoBzhtuTzQNjlZOQi8nk56UxSNrZyG9ulOYDgZ46bqNwRgNuT60AKgKcbsnGaZuI\n4OUA5NOkZWViq5J9T0o8tmhA6N3J5zQO4udm1uoYdBSldzDYwQgZOaj5iYZ5B6UpVNpYk5Y4\nPtSsAitu+dV4B5pysX6LgE0ctCFHynOM0522rkduDzS6iGbRJ8g5wck05W+U5X5aZgRtkZwR\n0xRtC/Mc7epNAhp+XKkYU8il3FTgDIbHJoJDJk52/wA6UMGjyTyvOKBjQpVzt5z1FOUsoKZG\nM5GegpPMBXKdTTvLEiqT90nGKQhuGYE5w/pQSGCktk/3aGXbIy9FHFLu3sqIOMHJNPbcBQqb\njj52I+7TI/mUKTypyAfSnLGVZgGAwM4akaTcpZgW2jJpgkJjbMXB4ahojtL5+nY0fLJhR908\ngGlZiWG87cD/ACKQDNh8vhs560HG5SOeOW9KTb5a4HBzmlXYoI6qew9aYCYMeT97uD7Uu0Fd\nzLz1p2DtCgYGaRc7mDdBTAbGyspGfm7Zp+47PmG33700jaw+T3H0pSu7noW6UAJG4b7qgjux\npxxjrxSIoj3p1Occ0nVNjDODyKAFyFDZ+Qg0h6dAR2pSeDhckf3qZkhAwXJzzQMfH8y579KS\nJvm2d80pXauQehzmmhR5hYHI/vVOoXHyEKST27YqNXHlqCm7cePpTl2/MFy7n+GlVjtAHIzT\n1C4q4XOflx0B7Ui5mUnPWmtltxAwo5ye1OdVZQyffxye1MQRr8uS27HGPSlQhdny55+9SSR7\nmBHfBKrTPM2tsUFu44xigByyqwdSrF88AUbSwyw8tV/n6UqsSMe/J9aHOxckE/NgClqA3BVt\ny8KevtRkb9m3g/xVInybs/e75qOQSSKGPy4OMCgBx+XcDgDHWkYbdpzzj8DSKxbcrLu9qG2q\nuG7UgHMo3K27IpzYkjYMcYOc+vtTdyAYC5HYU0Z27Rnrk5qhAqZ5U8NwF709v3cxGRkjn60y\nRtzAD5fcU44VcDlhzk0AJGrMu089cUjAyRq28EjqKcrFYWKEhyc4NIflVcLkdDQMczbiM42g\nelNVdynuaX5lXGevek2+aoIypzUFC/MQFVuSOlJwFwep4IoVcFhu4/vHn8KXIjYDZggZJ7Ux\nCKuxclmx0AoVeSN+McnIoYBxnLKxH4U5trbMemGzTQMaSG5Jz6U35WgYE4bstSN0K4z3BqPa\nSQxG1eze9MQRzfuxjrjBJ/lS5+XaV2n+dBBVWB2juV+tIFPU9aAFbgbmJz2wKRlEbfMecZ60\n4OuQQfm6c9KGwxLbdqgUAI6yOq7G4703au8szn/dpzK0aKw/iHam8NjIwOhNAD8GLBUdTxnv\nTGHVtoUk0RxgY3FiM8U6NE80qRhf7zdjSBCD5sgnnuooKbl2FTnqM0ikMxDD5+gb1o8vcxDN\n5Zx1NIkUblxkAqacy4ySeP7w6VGrZ2r0Hr3pTEyue+R2oAWPIwAME/pSDczEk4I/WiFmX5SO\np605l8pSThmzwKOoxMHyyFbBPJNNUttwFJXue9KSHG0D60dflG7FUNCnCouBkZ6GnEJ5zNjY\nnUUnlqG2k4U9zzQcMzbvlP8AtVIxBtkG9eT7Cnj5vmBw+MdKFbc+AVAx1HFN+UAFshs/nTEK\n6nk/xY7fzpdyKQrDeMccdaVpizfJxng5oH+pYjjt7ihgIrZ5Iwe1EjiRgWfkDAYdM0Myqo+Y\nsMYzTSEj5A3AdBUlITcWOGXBzinSSFm4HApTllAHPOaTdtBBbjqeKZILu8rcePmpZNpJV2wD\nyCKTcSgHUN2pdoVgAM47ntQAKvlkAHPINSSNtUg8tnO7tTBlWaQ4Ix070qA9WOAR92gQ5SN2\n7GeeKjbaAykMHJ+7ShcKCwwV75pI2EcnOStCAdG3PB9ttHkrhhzt/WgEAkH5cc+9KJE2lRJu\nJ5xjmqGJCpKhQNwY85NCZBKjoDS7vukLgjtSNIA2QuAf4jTsA6Ryse1flzRIm5VAOPWl2t5Y\nJOTnpRxJuLdO5NIQOpP3ssvQYoZdvV/3voKFx5eQPlzxSKrMzISM5yKQWEaRh8pGMjk04Zyo\nk5X+VJjzY/LJ2880rHYpwxYLwDigQ5Y/mYj/AFY6E9aUR/K2X3mmPIJGTd6dqdzGQTgD9aYh\ncFowD2pCxwzPxgcYpOerHAB5pd25t3VW4FAB95gCcinxgBSrEFyetC8MQDkNTQwBUOMY/ipl\nIkV8W4VRvqe1Ks25iQ+MVTRj90cKTgU+GZ8FWXaP4fpTsLqXJseWRGN1RMypIm0fKBncPWme\ncskZ5wOlIIz5e0t8vGTTGOZmjBIUMGOCfrTiN2Qp5XmmBl4H3ueDUrREDcOT35pEjip807lx\nkUscasD/ABYHHPShUUsWJPtToxKq7QAqk8+4oAmRT5BJYIxHDY5qe3tmuHwjswQdfU1WiUSM\nrqM8Y2ntW7pwRYGyuzcc0mIosrMy8GNu4I61YgCbWYL32kDtUl5IJG8tTgdcmiFdygBTg9aG\nAqRsxyhK46USsNpP/LTpkVMFdvm4x0FDRfOpTDDPNAyq0YKlAdhI5aqEkZ4Qtlh2zWi0qjcC\npB3YHvVe62qzEJtUjB781LAx7gFhsIxhuFHes68X5grDAXpWtMQzBcMB6/1rJ1DEeCWwc9DS\nAz5XMm4qmT04pN37sLg7cgE0nzbiQeM05GG5SVzzjB6UyiZV3H5RnH8RpiSEs23p71LH+7kY\nZPJ/Ckjj4fBzn06UykL5YjY5IpJGO5R91KVWXJT7zdeaRGaRju5QD0qAEjyrlSd3PFT8zS5A\n5UYNRf6vqAxb7vPSiMnc0gwB0piHSYmb5V2OOSf6Uil0zJu9ttKrLHGTncW/Q0qpjbzzn5hQ\nAKob5mOG9qbhdrc4bstSGPy23ryCaRd7Tbm/dhxw2KYC7j8qO2FPcCm+WQwCHdkcZqUs23ON\n6qfTrSyKFRSBtfHAqRjVtyuBKM9yo5FNQDazKADnjFSxgNKWkzu2446U9QBHyMAdqfQRHu3L\njoO/1qQgMwRWwcZIpUKr26/wmnKcEyYwCME0gGx53KUXCqfzpQ0QkZmLE5ztxxTSvzBQp+Xn\n2oVmYeXF85Y9xxQA6STOGPAHQUxUnmBdlxGf4mHH0rd0jwtda1cKsUEmxR8xAr2Twn8KdP0m\nNbrUTkgf6pxnnFBL0PP/AAP8M5te8qRrdljJ3eZngj0r2jSdJ0vwrCVjUTXI43EcqB2FMm1p\nNPh+yadEsMeP4ayHuJJ2d253dTTsyXIv6j4gvNQV0WcpFnG3uKx44WUb2fIPU55p6hI1LDLL\n6UkbLIoK/KAMc1pFGTbHNJ+7IU7VpVf7oYckUioJlVWGSDmpGYLuYHc3p6VoiCFWaOE7vmO7\nhafuBbpxikWQ+Yg29R1p7RfNz932NAIjk/d7DjcCe3WnxsdzEfez0NCoI1YjkU0yHZnbn2oA\ncx7E4ehnD7E3ZbPJprPtbCjBxnFIGbYUVcueSfSiwrDrhfs6nb8zZ5pzR8MAxHcdqUksu0HA\nA5JHNJcSM2G6UgGqw28nt2FI0DSKCDwey9alMe7B24+lNy4AwQHz2pjQqxmJlDHI9aGZtrsT\nk57U7cXZQV4NPY+Wr7Oh4zT6ARRSmY5OVFEjCOTaynB6UvAZFPXHakmUuCCeM/eoAfuMcnzH\nEe3getGWYYwAMUkoEYTJDKeM1GzMZCuenpTsIfJhVHB6dKT/AFmMDBNP8zccjI7UzzOVTBDf\n3qBCNHhd68uD1NSciHdnLZzSSgx8AZA+Y0lwxWFHReG5IpDFb+F+ueMUSbmkxkbetOZBlWJw\nCOg7UjR7RgfmaYhWcu65GcDtUjK25XD5HYU1cRquTgng5prY5AOVHRaBMdGz7y7Muc4x6ClG\nGyNwABzSHbvHTBpqMis44OemKCiRtw428E9u1JtVMheGbgEnrTZP3hDK2MDBHrSuysu3HOAM\n0wBl8tiHfG0frQ2Vb5B9c0m1m4PzAcc+tJuZGIYZB5NUSxvKyM5XA/u9qXfuUBUwe+KWMM3z\nbsjpg07duXahCsOppEDVjDRtlsbfamsrEDnPcNQdipgvsfqKI1JXLcH+dACDKndG+Cw+bNLH\ntjixgqCetN3fK/8Ad/vVIm5Yk+XNMBI42ET56jkZNEY4yo5z+VOMbbXZjgn+IntSSOJogi85\nP3lp6j3FZ5NvzDC5xkdaVVCudo+WhVPJYZx0p0kZbHPB9KYEkZHllTt+ppG2nLIdoAxg+tNi\nVlGcDjrSvGjZMmT3BHemMcW8oYH3e+KTaeAOWznPtRjMm4L+7YYI9KVT5ZXcOhxSEJ5yzSMu\nxhkYA9/WnoxBRX5xxmkEuwEqOSe/WlUKyMzHaQc81QD49qyEYwc9SKsw3Vwj/u5vL9vaoY8c\nk5INJy3BHA71nJcyKTszv9F1uPUVjtpOTtwW9az/ABF4XS6t5mWIFgPvKK5m2u5rVxKjbdp+\n77V6B4f16K/tDBLFtm6E+tefUi4s76ck9DxPUPDz/vFWPlRkjuK4TUNFmYuqqBDkkk19KeIP\nC9uu94s5I+ZQa8w1rw2I94iiYjPKmpUlsbcp4hcWqqzJ8ysh2lu1ZU9kfPLqzDt5fXPuK9A1\n3w+VwqnY/Vkx2rmLrTmWN3Y7UjPC98VqRYxowq5DKQFOcE0+GfcrHPOcD2pslr5TgoPlY5G4\n5pjOrR7lVgWP3aQupbVV3BXOXxkinRnMZGMFOfrUK3ZZhuA9AfSnqyiTJ3L68cGgY2SVtu8/\nKw52im5Zt4xkkcHvUqnaxIXO736+1NWRWDrtLMx57Y9qBELIWXYozgZIJqMNL5O0fdzkirXl\nrGuGk289fakIRnynbv2+tMCiF8tTsI59e1JulUY3D5ecKO1TNEi7lLrvbofWo2aRIc4wR8uP\nWqCwxnMcgUDeZOc+lRsr+Ww255+9mnpI3GRuT27e1MXMiqG4VjnrUiYtugMYJB2Zwee/rVtv\n3bLtYEKMH6VU3DadhwAfuY5qSPLMCB9V700SOLLHCVfChm647elSbikmVb5SuFXrTfM+9tG/\n1GO1Njbdgo2xOvApjJoozCw3gmXr1p9o6wyiYOyyE4bnP6VXWQlWfJlJPQdfxqQTeXMv7jGR\n6/rVImx0elXvkswXcR1GOM16J4b1YRoZIZMy/wASk9PavJrO4C4BP3j8r9K6fRtTkgkbyjlJ\nOoHWuinLlZhUgmj6b8LXsd1CDv8A4R8p9a07jEjdQWU9K8o8H68Y4o0diQw7HpXomn3BmmOG\nJNdt7nn25S3JhV4XBJ6UOCIwQMNUkihm37eB2/rUW7zPmAwPSpNUx7yfuxkYPemtu2jB4PSl\nZS2O60rMUIXGB/KkMbtdQA3yjOMip94gAIkzxUUJ3ZBc4BzSsv3GADITikMeJCyhtuB/OpI2\nDMS447EUyY7FXHfqKdG4GRjGP1oQA2DtGdpB+8KeuWYrjjqc96a2CoJHzZyKf8zSA4+93qgF\nXGN3TtRuLYwMg0NhZCMUqMVH+yaCBfMbdtUBQO9AkWZiqrwKVozGcMcg9KBuVtpAV+w9ad2N\nD/OYJgDPHNPhBK89PSowWWM5XJzzTy+3AAI9aYhyszDnIUdKcqZjZW4zzSDJyck47U8Mr5DN\n7Cl0KQxflXaAWHqKVgdp2r9SKfG23btyM8Cgfe5bbnjb70ixrRmTbgkHFL5arnOSaljzuJ3f\nKOKj8t0bJ5GO9BIvmD0opm4elFMND9MuNvNRsdy8jFSYPekkYZr5XQ98byT8tLztwRgUrEL0\n70vpnmmV0I2Yde1PHIxjrSGMbeBxTcFV9cUmHQf/ALNBXjGKVcEZwadgHvzQhCZ/hFNViq9c\nihc+ZTtoDZxSYg245xk0K+eCMCl2nNA/1e3uKAG4O7jpT1+9zQ2cAjrTh83bmmMAw2nIpGJw\nCD+FHVeDxSKwzg8UhsNu7BzzSMrIOvFPfHFJxt9aBDDj1pygKvWg42g4xTV3Yb0pCFZ/Vahk\nQScEVJuO3B60xmO7pkUAMEZLYA4qaOFv4uBSKfQ/WnncF65pgOVRt4HNN6NhhT1BVeDTWbK5\nxk0MB2NvFNI7Zp2fl60YwAWoAZnaM9qPvdOBQ/TrgUmOgHSmAu0ryTxS7uOlKq7uDzSsu0cU\ngEDbecUMxK5P4UrKWUAfjQV4waBMTOcUbe44pVwF6UgxxzxSARlOOeKccLjml+/x6UhQcUwF\nySOKsQgrg5471A3ynjpU8K5wT+NIR4N+0JZeW6TMOcHDV8oa5blXdz9ea+0/j7Yq2hiRo8uo\nJViM9a+PvEkI2lmAHOOKoaPLLz7zMeCCcZrGuoPl3E45ya3tZUfaGxn5Tism6AZc55qkRcw7\ntzDwnzZPDegrNvpEVMKMseprSvlJkGfu461nNhm+YgDNaJAZUy/NktnaOBVKZShOeCe1ak9v\n8rnOQWrNuM7myfujiq9DMz93lErsyRzzUMkgePB++Tnjt7VPIpkQnGXxUEjH5Rs24HJPrVAQ\nyqu1UiHK/MQabIhMAlCFew9al2gZGfnxndUOJTG3z7TnqaoCq3zNg/d6GomUkBE6g8Z9KtSb\nZCNwJ9WHeqzjCnGVGfzpgN+WSTAG3YMEetJGokBYqAcdc9KUKpXZzu659KZMGiiDAZUnkUwI\nDH5aEg9Dnaw4FMkJfkjcCOoqeRS2A/APAApnWQr9wKOfSncQxiwZcnK4/WkVeWK8YHzelIpI\nYNn5ORzUcx+XgFlzhsUgJVxIrBuoFRHPlgdhUkm1cbRzjGaj4WQLkk/pTGNYhlXjbz2pJFXf\njG3I+960oYqwVhjJxinYGTyDztxmglkJbzFAPIH86GjZWBJwO4p0qL53HAHpTpGb7QoABBHQ\n0Mdit/q92FzzxSn7oZU/KnbGVmAOD0NJwiBc89qYaDWIkyMYJ46VH5fyncV4OBUkivwDy3am\nMoaEb+CG7UANfJGzr64odsMOMc8U/c1uyoVDBuajf5lOcDBwPXNAh0gHOM7Tyc+tNaMtgK2G\n6n6UKpZgc8j+GkG4Bm+4fekAoXfuI5HWmsoYDHSnqzNhSNuaRV2qQQAB1agBJMiNgDuJ796a\n4HygHkDGaVlVWQ7iVJ+9igbfLIznPI4xTQ0Mbb1YDjj3oY+oLevHakU7tvPzE96VVKea0jZ4\nwMUIkVW3NhCNp6Uzy8MwwMLyeacF2uAE4xwRTGxIuVBLZpgHLMGAyPWkUbu/FAY7WO0hxRyq\n+Y3TFAxqld33Tj1pr7mXaV2rmpMs/GcDHApG37trDJI69qBCYXt82eCooHltiMEgjnbStjaC\nHHGAVx1pH5VnUbcDrSAP3nOQD3z7UMxwB2NNkkO2M4+9xmnL8xOOnSjUCIAJkAdOfrT925QW\nHJGaGxHHg5Yfz9qGHY85HT0o1EOEg28sFPSo1ZWYrI23b3HenKiKwGNxApI0XYGz/rOefSmN\nCJtjI2jK0u3KgA45p2d2VAxjkU1iVXJGaXUYkjbQpIxmkxtVsHn1p2xti/Nlh0zTcbioxuGe\nQKYhdpCqT1prIGYE/Lk9KGVnmcEnav3VFObLqWOVYetIYjYyQDuI+6tI6lsZ647Up4GHXIPP\ny9aYqtlm3cD+VAAzFoxk9OCRSFRvTuPSngBlxjIPSjCs3T5VzmmFxpO5T/eB7UfdUFRgZ5zR\nuRVLpyp7ZpEPyYxnPSgB+7EcgxhD/FQrDap280m0soBYBR1+tJu3KCBnPX2pAK0jBgSq7c0r\nON+2XBHtTWO2DGQTnIPtRv4GRkHv6UxsRxtXKNkZ4WpPl6ffGM7v6Uy4Vk2RgBgT2606Rije\nWfu92oERFgOGG4dR7UL2/hJPJ9aJMh8qN69Pp70k2+ZgcABDgAd6ADaoXA6Bs0csd0Y+djwP\nWnc4LhPlPHtR91CcZGM4oBjW6Ek7znBHvTzlcA/h7Un+s2+p5x60jRFtnz7efu0ADb0XAwfU\n1GquqHLcn1qSQb5iCCEYZGO1N2Btpj7cZNAgY7V+YA9lI701Q3QcLnnFKqhlJBwQcYNKwxjb\n26j1NADmYMwJB+7TF+ZM7d2KUt0IBIC4IoATcgzg4oARpjxt4z2NOZmAAK5PrSfKu5I1LMer\nUkibo1PKleaAGriOMDby3P0pV2l1ySVHp1pyYXBUHJ6E0kaiSQ5GFPB+tAxynPJ+V8cZ9KYv\nG5j6U5Y2jVQTu2DI+lJJuddxbC5wRQA0KswDM2wj+E96RcQwu2CWzSyKGwP4vWkZtoAU85wQ\ne9AMXgMCP4xTT80WQAfm+anOwwoC8Z6UjFf9Xt2cbqBDSUXPyYX+9SHaqgdR7UnRcdVPNOEZ\n25DDP92gQKp27QcnOcHpQVGGJHzZxkUj7mKk8Uuflyq5GaEUK0Z8vDH6UGQKoyN/bpTW3TLl\ncrtOdtKmVYHqrc02AKuISC+WJwFxS+XI0a7cLxyD3pFbbIzYyc5FJu2tnafmGM0CHbmXO7BO\nOlNk52FuV46UkbKcYUtIpwRT+eRnpzikA2WNWVlVueoFCN5ij5SpXqfWlb93nn5s9PSj+FgD\nnPWgBkfysSy5TNCyHB9zTncNhVfOPakLIsZfqelABtSTKhtp60m8KpIyw6UN8jIy/MDTsbmK\nLwOtMZHuPYfLTmbe6JnjrSDklWPNOaY8K4yo6YFAhFkRQWB3ZJGTSbduAKc22WN1YYOaZ82e\nuMCgBzMzXByNyHgilcmPoenpTNo+UBsH+9TlG1SMhu+aYC58x/lHOKZuVmKhj7inyqcAgbSe\neO9R7FYSAZU5xu9KkA2lYwwzhjjbTlUxEjORTvMyyg/LtXHWmsvzZIIXvk800Avy7mynzYzS\nYUsBnnGcelLu24I555zSzKWbO7OetICNvmkBlblewFOclRs5x24pGUBj2XHFKzYAbORjH0pg\nLu2gHO1gODTV27sjcF3cj1pG38bgCT0p27cx4574oARVRlKlNrbs7j6Ubt8jfxAdTS+YVj3E\n5GcEY7UjZZgq7cHpVdCRy4XDHkelNZR5ZC8HqaVQ0JJkAIHFIFVVfc2C3Q0hNiD5owf0p2Tk\nBhsU9GpFzH8pOcDml2jafMbgdKZfqIyjpjPNKG/eEDkYwBTfMO5mIyNtOi2ZUgbfl/WgWwmD\njbnDHgUu0Rx4zl14O6mrGnz5lzhuG96aCTuyOTUsY9stHknBpGcYPGSO1ODMsaKx+QnpRkhm\nCDrSAJv3kYwdi+9DkblXOTjBNG3jnmL+LB5okjVvlP8ACc5FMljGbIXCnAHNIG8vJ6luKc0Y\n4IbCdOKQY2thcspwM0xpAr/N8q5A9aD/AHg2FBzik57cL70LhoctjbnFKwuoSH5dxUg560rY\nTJGQrDr70RoZFB+6V7GjiVQS2MGhjsNUOq5I3Hpmk2t5m7BXHFPZvv5OQaTc0jbFOQF5z1+t\nMQmAzHHHcZpEYqmZFyi8fT3p67Nu0MTSLlQxJzjqKAGBvlwwz3zSBR8uOOd3SpG/dlGzndzz\nQys7OVYAe1SAkjHkt/FyD6Um0bcE89jQyl48M2X6YFLtz91cgLVANOF+aQnGccUfKzAgEKTx\nSBiEAK8DrmnbmXncFTGc0AIGDeaPfoR3pqvuVSTjtT/Lbd87fX3pm1eQo4zk5pgKcsykcMTt\nzninsoSRQWBbHGOlIFV8qPqDSI2Rhlyc5xigAc+Xk5B459KWMllLK20dOlRs3DfLhT1X0pyD\nZGS/OelAAgVABk7e9SKx3Elfl7YpPMSWFUCYPqacP3jKsfyleTmgCPcDkEEc9KOFXhMnpxTl\nbc5yMselN+YKdxw+elAB5fyjbk+2aVpOMscHoKPl3nB24oP3gU6ZqWPoG4c7QcHp7Uu07sE/\nSiRmkk25wvTik2ls54ToG+lFyRjRllzu+fNSbmPzZ46fjSLnjIxkVHu+VFbgdTtqhku4iEjO\nXPNIMMgIXnvmms2/YUQ7iOnrS/MUXaec9qAGhTtUouCDy1SPJslH8QPpSMrcsenekOMbd3NA\nD2+WU8blxmmKCq56jrTstzwMYph4TO0gnkN7UCHcyEu3pxSbguCGBXHOaX5WYF3IIHamsylh\ngfJjpjpQMVlAzxktzzStIfL6n020bQwXruHvS7mcZC9KkoRlVVZQduV+9SKU2oBnOMUmV4ZR\nuPpSlmOXBAHcmmiR8ikrgnEg/WmM3zBTx3NEu9sMTuyO1K6hWDkfJtxmmIarFeh4ZsH2FOYs\nu7AJVTwRQpHynHPXihTiRnVuW657UFjcgndt3LjNJ8ysCV27ui1IzKrfKN/HOKjjLeWSW3eh\nNBLHsrtgMmVBoy0kZHQZ6U1d+07ifwpwj6AHj1zQAincrEAgdMU1TvxjheqmpGZgxwm09BRx\nuOePp0pAM2Fsu2T9KRA0m5sY2jPPenbvlKDOc9aRMlhg55xtoARWBUY69cmguGB3ZZezUm35\nSSvG7inbmfO1MsvOfUUhMI9rEnG0YpFLkEdCQaTfE4LMWVsdPQ07aCUOeMZOKQDeWdQRjYOT\nSyY3Fz3/AIaXn65OQR6U0qM72yaoqw8sYVUD7pHbqKRVWM7Q5Zj7URlXjPGG7ZpY2+Zd65XF\nMY3a/rgA8mnMpKEHGetDKYwSBx65pqsqqB1GcgioAXcoVPNT5TxxT3iZsZ+5jI9qRmLkGTgd\nQPSkx8oCk5piF3BgGA+ToPrTDgFWD4OcH2p7fMuFwKaMySLuGNo7UEisoDEn5gKXzF246g9R\nRtx8m7Knnd/SgjLbeD+FIBNoiO0EsD1FL8yxkdFz3HWlCuYydwB6U0LtXJ5H1oAGYArIflXp\nQMqxOCfc0YLKVPTqDTpN6yDcwJI60DEBODuG45+9TpNrKM9u9NztHzA47LTtw+TcCdwztHam\nAFeCw+ZcZ5p0jAYDL8gx81MyDvxnbjGKVV+Zj94FeF9KFowAssnzYwF+8x70bg8ZXYFbOQ1A\ny2Rjcp9eKC3Uhfu1QuoHcTkfK3Qg0sYJXJzgGmht0mWP0qRmLYAOATg0xgzLuBVWZ/71L91s\n43A9aTJViqfc7mmKu1SSMrmgB64YhT8p9+lISm3J4OcU442KATyabIoK7G+bnikAu3DBuka8\nH60h3LDtbruyMUeXtk5Yq/QelOXuMZ4/CgQsjkqCzAgHtTOGXHvkGkdo3K7VYFeTTmwzAkH5\nuQKBCxo3zeYcgj7tHCxYK7aFQluHJNOb5sE8EUDFZehKFdvNJuEq7TxupSzyBsktTI1RlQPz\ntbI96aEPPzYV9w2jHSncAuM9Peh8MuQcFucHtR5e7Djgd/WqDqOaNfKCqcHqc0NH90DI70YL\nNukTGP1ojIlmb5icDODSDqJGBub5eBySalLIuTgnFN3JtKj+KpGkK4IXbjg471IC/N5AG3nP\nDVIrP5qr97+lJtPJUE5pFjk5J4H970oEWY1aRTtXa6nGa0I7h4tpUhmUc+lUFUl0CHb3NW1h\n3MXQgA/w0CJ45vObc8ee5qzGrs2QCFP8qrQxuOf4h1q4rlsL5nXpQMkkUD5ARj1pThCOMoP7\ntOVTG2MYAGTmkZVGBuyrDPFAiGQxjcFG5M5PFZt4TtfHyqRkfStGZMKcZUAfKfU1nSbsL5ud\nzcEUAZoIKZDFR0C461jalI0xffGdw4C+1bc7GDeucc8NjNY987DcQOOp9qnYZn/cwuOfapPL\nXhi/zdCpHSmL94Bjg9c1IW+YkKT/ALXrVFEkjfuwjDjPDrSqowAuQ2eRTV3JsEa5Pc0/zNrE\n52k9VqRq4jBfMAdDt9RxRkfMoHPXNPkl3Yx86j86a033sAjjHSgYgZVYsoVsjpTYVXOWXaM8\ngUpcMEKLgjqacoMjYYgbhketIBWRVUIvPOc0755MjblsdRUSyH5vkAOMc8U5mZwAFI7ZphYd\n95QAccdPSnceWqsdwU9KkVdsYZHAA42kU1kHytwvP50hji23cFBVcd6QfNJGMY/2utLuDSbd\nuSxzTkURlgen8qBBjBZieM9Mc0vl75k+RnJ7CnlgoDDhum409ZJIQQjY2/MB60CIkxuLyAZ6\nADqKe0Y2BUbaaiSUSKQRt4ye+K2vD+gXGtTLHFEzKD6frml1Azbex82QrCSs2DjnINeg+APh\nbda5cRSTxmIYzkjiuy8I/CeG0xdakFijXkbu9ddeapDYqYLNtse3HTFUJySFh03TPCkCxwqH\nnIwWAHWs6+1SW4wCflxwBWfcXEs2Qp+QcmnRsfJBXgKcYp8pzymL/ECRggcmiPiJtpO/PANI\nzdSfXJqRtm3cQc1rFGd7kX8IfOc8FaVdtw3lBdpHJowu0KPnWkk+VSFBHrinsUSLhW5GMd+9\nNVgyZ6HdnOKYuFBVV3OOdxNPVm3IRgE9aAHsh8s4OR13UyNk29Dv6j3pfuuy++frS7eA2Pm9\nasQKFZhJ0YGmMrOxLHKd9tO4LjjC+tOVRkBPWkIaFDquxQSP4qVPMZvuhSeKcVAY+h9KQYZi\nRxjrQAgUw5dvn5wFp7ruIOM+1N8zbG2TuY9PahVbjB3epqRCJvUbieM8inKq7XdhxnikEili\noB64pyvtY5+ZMcLTGhFxwSflPSkDkPtbhewpSpkRcJwOTmozD53KMCueXqhk+35c8bl6VAGI\nyAuamKBfmJ+QDrTYxtzuUFcZ96AQm5Nu0rv47VHGqRsWOc980qtsUhm+djnIqQAKcueG4xQI\niVWZhtbKg55qcyeY3AxtFIFIONu0dqGUtkjOehoEIu5iTkkd6Qyfu3zkbe1OOVXCnBNNVdrN\nuYEHrQA6HDRjC7i3OKesbSZJIx6VEvzLhflzxT1XGFRTt6de9ACNiSMZTc3UH2pQ7Djbhcc0\ngyY2BOCDzjtTkfaq4G8Hjk0CDb8iv1U8AUqRjOUGCfX1pXAZchtuzt6+1KuNuQ3J5+ntVJAN\naOfG5V+Xoad5e1OeR60pZvMQxt8vRk/rQ0e5cD5snrUjGP8AIy55XHamtxETt3e9P8kLIXbq\nBgAUxsRty3Xp6ZqgI1IEeNpB71ISGDAD7v8AFSDKqQOWamfLCSCSc9cUEi/LKzErnaepp+8s\n2cFRTY2KMQV+VucU/wAwcfxZ/hqwEkPA4wg56cUih2UktjP8J60qyu2Tjac4ApX3NHk4DfSg\nRG3zRhVJ3d809f3anC4yOtDKI9hUEseuaVmY7ifu/wB3vQA5Ofl3cA80r/MpVTg5pFXv90MO\nnekVfl4PzUg6j2Y7lGOlLJu2YCYyeaRf3gGeMdaerDO4bvLPAzTAcdm0FeucECjcORsyexpY\nwEyAefU0qq0i7sYVTihEiKu7GRzTmVG69aRWKghDyTSszBTxn096AASccA7fpU3mb/m6JUS5\nZlVvlp6qy7g3KUwDDOu/pk96t2N7NHMsq8Mhx161Tj/1OxmLDOasKEUBxkc4rGpHQ3g7M72x\n1Eatbvu65wfWsHxBpoXBQHZUen3J05XfJ29T710kmzULFGHBZcg4ry5aHpR2PH9c0sTSKHiA\nCgnd61wOraOsjFQuP4to7ivatc0826ZJ3c4ORXBa5pCSSAyDbJjIUH8quMi5R0ueSahp/wBl\nkZF4Vujdx7VjrCy7/LOUGetejalYgW5mzhz94EVyF1YLAxLBQXP3lrbcxMJQQwVsFifwqwGL\nbiDkDiiS1WFmI5QnIz6VEoSNGCZx1PtQIsK0jyRlXCJ0ORR521Wxt4bGT/SonnBhRkXvzU7Z\n8vdtChu5oGPkRfMXag3Y5BOagXd5ZXGTj9KdChVSFG4kZXnmnbSyDaVZcY3Drn0Ip9AGYUqS\nkWcD73pUEiqyxtECPXNXGRo0iVvvMcBQajXP71RHtIIX2pAUJOMKCF5y1QyRhMHduwcFa0Zo\nXVcFMlfmJ7YqiFSRmfBBXqe1UIi+0mRtgOM8ZIqeH923zNvHSmBUVyQoY54zTwgEm1RkHqOw\npkg0fmSFsbmX36inLkwgnpnk/wBKZEx85uRtAxkU6RdpTjCntQAo8x4tikIXOCemBS+WCw2S\nFgo259fWhWLsFkXjHrTbdSu+Y/MR8oXpTuBJuHl/63cScBRWpp100UylDlug5rMUhG3Rou7G\nSvpUlnceTceYvK49OapNmckek+GdYNuyqWLsO/bNew+GdWZ7YF+WPFfO+n33klFiwC3OScV6\nd4R8TyPHGpG9NwVmHGK7qcu5xVIdT2yIyG3DN0xxUbExsMHAxzUVjeI8Q2yb8DpU6ndCSBnn\nNbMxTEgDZ3AfjQGLNg8tng0K2zJIO3HSnKpX5lXJI6VJohy7WDALz0Jp22PCZGCppm9pOiFT\n3ApdwLjsfSlYoc+5unTOM+1SJjYcHJ9aiVgkm4njOCKmWNtpAI57e1AkRr++GRx2z2qfbIuF\n3DjuKhjI2FQCAD0p6biOeAKYyUlt2VXd2oVtqqDHkg5zUTvIqs442jJFSbT5ed2T9aCQBckS\nH7u7gGiaMyzFhz/SnBHkAGeBzinMvylgcHpimIjhXHBPFTKVGFzk9ajhjCKct81SbS20EZI6\nUFJDmHzhozy3BX+tSPjdtbhex96jt1Im3uccY470qssnmI4JHY+lItImB8vc0nReBSxtHIwP\nU0kZEi4b5T/td6RV6EDv0FLUCb7p6EDvimR7pdxPI7VIxK4DHjvimSSeThVG7PemITyGopP3\nv94UUCsfphnbjI60h75FO7DPWkxjvmvlT3hOOMilbaFJ70kj7o8LwaRc7TnmgroOxuUAnHFR\n49DT84X1NImWbkUMQrZ3DFKV53Y5paXB7nAoATbuzng00HKfSpNwakbDA4FAhI+eKM5an5C4\n4FMbGaYDsHgnpR1XgUbiVJFMUttyaYIcy/KBS8Hr1pMk49aXnHapAQ+nWlIwB3oHzKe2KajH\nvwKAHY5BJ59KRlyeDTWbn3pxw3TrSATaecimYxx2qVstwDTVG3g80AN8sDntS5yMCnHFJwvP\namALnGe9OBXvR97pxSBRuz3oYAchdwHFNLbgBTmb5cUKhagBGXcMkfSnbTtHSjb6ml3DaPWq\nEK3GO1LxjPekz3oYA45oFcF+bGTQ3zZwOlDMOg60Llc96kBBnjNLwe1HJyKRVZVJNBQqjaxP\ntSLnbQ7dAOO9JytIXUdt3L71JHlWAzUY54p6sMDA5oGcj8YbE6l4RnTbuIHG0ZNfEXiqF4fM\nDrjb0XvmvvbxlbrN4fuC4Lrt+7Xw948tyLqZR2dh+tNEnkOpxlw245YnFYNxCEbb+ddPrMY2\nED71cxdSFQV6e9aiMHVlKyKOg6bqypMeZuwS2PvCtm+BlKqTxnNZF0ogkUgHPqDV2JkypcyA\nLhjyOTg1nXjfMAikHqfpVhpCRIoTADZB9TVVnkUEk5I5+lOKsZ6laRWbIPTrntVC4Y+VuPC5\nwBV/cVXH8Lc1TuPLBdWG4Y4qxleYeUrqchj0NV5SN5DMW29quopnwD6Zqq8asx3D+LqOtMRX\n4VsDOOuKjk+4VZsnPGO1PmRvO2s2cUwRj5mc9OBTEBwzARkE4556nvTRlwwxkqM7aWR1jX7u\nG7//AK6i8vanBJQ9WzRcfQSR1jbd94dQaZxLk7sA9RSso5Y8nqPSmqrdSM98UxEW4gFSeCcU\nj7iw2j7tSSMjMuVxzUagea7KG2j8qA6jWDrkvyG/hpnHk+WUO4HNOYOzbs9uBSS7xIrcjjk0\nyiL95hmk+YKOF6UseOA/3T0wMc052Egwx3LQd0jElgB2pAJLjnccnpmo9u2beem3aRmpsFkY\nk7do9Kj3qrIu3AIzTJZHud4yP4l4xTQu1k7nv7UqjaHJ+Zh096UhWZGVsHHNAhjMWVnU7sN+\nNK5ZlwcYY8fWpDsbLhQCB83vTX+YqDwrcigY0ltwVgd3emKispZhknoKkeTy1Bc546iopFba\nhJC0ANLfNtK5bs3vRhVYB2zhsnPrTmQM0agnON1I37wn5MrQFhZMf3sBj+VM8to+GO4HpmiK\n3LKS7cjOKTBbg5IHGTT6BYf/ABduBjFM+XywHU71/ipg+VtoHy9adJISA+3aDxt96QxiuG3P\njjGBShN2M9cUMGL4bgY6UgVtx5woGc5piFyzZOMj3pZNzMHCgnHNRnblSrEjqRUi4VjzgntS\nAT5t2c8HrTHjLgjP3TkLSqwVSC2VP8NINrcbiMCqEDNuI6iiTK5AIweppqtuUHO1c07b8rYx\nhuxoGRxkowK4MZGKcN2GHQ+nrTdrNHjIwvX605clV428Z3UCE3gMAPvCgAhiSpPuKRfmYyA5\nY8EU/nKtnPOMd6AI23FdhO3nI9qRm28p/rM8+9PO/wA5hwQBk1HwzbmHQcGgVtSXa27GPmIy\najXa2dzY7AUqrgkFsyYzRtCheASe1AajVVt2SDkDv6Uu88buAelLMxaMqoIckA5PagsYzjO8\nY2jigBP4ty/MOhFDMI4+PXtTVky2AQCByBSgoNvzYzzQMaoIOVJUelKqncAWJJNGX3E9s9Pa\nkXO7J6etIBUJjL7Tn60Bg2Co57r60hX58D72M4pVyzb8445SgYjMv3gMHPAHalkk2jaQ3znk\ngUrsqyBQvykdabGzL83YHoaYyNVUN0wfQdKeqhOmfalLI/zdxTVQyLndtYcgGpEAUH5XHIyf\nxpOTGAflPcU9VO7D4fjORUe0b+GLZ70AKPL3EbSuehPSl2nhQdq/3jSvncFJyo9etNXDbgfw\npjYqqxdipyVGd3rSquWBY5J5CU1VVuM4J6Yo4OGDfMODTEL6hTtyMn/CkLZj2oMH1pdxMyrw\nBjjikwEzzlj1xQAiKV3/AD5TsM0iqY8ZPynrSbVXfyQW4+lOZcqoPzFRigQNGFY4X6GkkOYc\nk7cng+lJmRgWDfJ2pV+YM2Bt96AEkICkrnf1DetCs8ijJxxk0jY4A/EilC5d9vK5oGIsX7li\nTndSgsqqmPmUce9I7bsBTtAPIp7RtIQ2wq3XNADVVtxYjj+tJyrDcuc06RvvHBHH60wqZEUg\n/MOaBAp+YqOHHb1pw8xmBAwvcdRSMSMMRls9aRcruAyxNABuUtwSo5wDS/MrAZ+UckikVQWy\nMk9KVS8ilMhV7sRQUB3eXwcr+tI2ZCEBCjHU0jAkqc428bP60rZZwOjLzTEM3My4IwV4B9aH\nZV5bpjNK7HKueoP3qXarOTnd70hDNzeWFUE85FKCN21kxJjG6hVaSIhexzn+lI2VkQ/e3Hr6\nUhiR/cGOc0uEWY7yc44WkZBHIRn6A0zzPlJl6Z+9QIk+bBQnjsKI9z5AGEAzj1pGAWNSD8uf\nxpQnynacHOQaaAaHIXaoILdfalUBc4O7jrSiU7gDtI70iKqgsDtHUGqYC7W2rIOMcc0Mp4Gd\n5Jyfakbb8oZyR94mkQBkdskDtUgKM/Kqn5+ppVO1mB4HcUgwygKOAPvetKoL4CsMY5oAFO7d\nnhuuaMFQvl/OzUJnAycjOPrSJjDSEFdp+7SABGy9gBupskKxsxUbvalbgDHJznml2hSSx5PT\n2poBFjZVQqeCRSCTDOoycHlqkZQyAAbdtNy23H3W/nTuAzYWAwAeevrTt26RiODjGBUa7pML\nnBB5NSPtAJU4P9+gAGdpfdktxQRsUFvnXGDjrTdwVgfvH+6BS7vlLL16gUAJ8u3AXPpmh1C4\nVTjIoc7lzj5vXtSnay7T1xkUwFWTy12j5h0OaRcxq+RtYnNJCyHt0oTMkZZh82aaSAPl8vJw\ncmlVT3/h4P0pAR5Rdl+VOopXx8rk4VuRmp2ANx3EH5l6Ae1N2iNiVO7HSnY2sH3bQT+lKyjz\nCP4OoPrQAjqrRAM3Ockik2jbIAOhoVR83y5XFJE5Lbl69SppAEalWXe2BTtpw+18AnlhSLl2\nLdW6gUKdzcnHc/SgQi7juKjcMYzTFVVUnGWXo1OJAkZlyozihcL5ilS24YGKYhHwybgGxuwc\n0pUsoUYAz1pFJjiGQcDg0rk7gWXg8KBTAGU8liOTjilk2ttXqtNwV4kHOcUrfdIxwOtHUoVn\nMSqoyxY071Tbg9PpTG+aNSpwP9qk2s2CQUJOD9KBiqF27VOTnj3pI48Oylvu5YL/AEpz/u32\nqflHQ03YFXdjIzzQIHztjwvLHJ9qd9zLbuaCph4bucikXGDz2xSENbbGo+Q4b3pwHl8luMU1\nstGoAyV4xQyqvHfqc9qEMAFVsk5T+tK0hZgAMGlbKq2egPSkklChMptLEfMKBjfmYBSMbT36\nUrAs2FHfOOwpZGyzDOQKY0e3HzbX659qZI4rIxPOTml80NwEww603pl+MkdQetEQ8vBIJzzS\nGLkDK/e7/Sk3cfL8hx19fajhZPRcc0FcLwd1IWgIq5Ug8fyo2h96g7Tgtz3x2pVIUnIzSKu5\nWLD5elMQKymNc8k9B6UCPdGSvJU80oAjG4jgDj3NJ8y7GUgMwyfamNAAsi8NjJ+9/SlfccbD\ntGe1J8nQqdvUmhSu4qe43CgkVphyCMbuCKjk+ZUBwRj7opz5XaWGR2A60g7bRggYBoYwk+8D\nnOBil8xRwq5yORSDCKCDnJ5pMFmyenSlqIFjdo/kX5xzjPan72aPCDBPrTGWRcKF6HrmlZ/n\nGeWzTGIuFyCeM80uGy7twuMAZpZMM74wPem7VVgoJYYpAKVZ1TDAHpik3ddrEEcHFIV+VQPy\npVZd2V4/2aAEXDLuGVfNLI37zAGTjG6kk3spGVXnPFSgLGVCjIxyTQIiZk8snuO9OVlVU2gn\nP6UrLtUYxtY/5NC7mVwjD06UDGN8u4rn+uacI5FhG7Dd2FNkJQR9+cE09VDFvmLdfrQAzhmB\nYkc8e1K4O4Y+YA8GheWAxgY/GjpncTu9qYDsSFju5IHG3pTDtY5Lc+3FPVcqQDj0pFYSYQjZ\nt5z60CuDbiw5wo4pjMynG3c+cDFPXB3EnnqBR5mQW+6e7GgYSQlmwwxjk4NKWMgyRtT0pu8M\nVVslfWlY5baDkE8CkA3buXpz3+lScGTb0+XimyKUJyM+y0i5ZTu+Ud/agA2/dLcbuR7Cnb/n\nIA56cUxgv3Q24Z+Whvl5xkZwVoKHbDGzBuGx0FI2zy1x3PK9qVlKuCME9PoKTJjLEfMemMUa\niF2MrBI8/Nz7UgV/KYgjbn5hmlZQmCSd2OoNJtIUFT97tTEI275STtHbFPYeY2QMkdcUx1U8\nMS2307U52bgjvjGKLlDfLDKVYYGc0cZwQRx1pZPlxg5HenyKzNhPmbGdtIliMf3YAJC+tM8t\ntgIBK075XRcHIHUCjLxxlSduTkVQBtAbIkyMc0MRtBHSmwoq+7evalVSsLbuWz92pATjcXzh\nKauCjuuck4FOVSuCRx6DpT2U+WSMBaNQGfdhC4yvpSKxXGO5pVX93nOQTR1O3ZjHO6mA7DFT\nwDz+VEn7tsAYBHNDbTz0bGaViW+b+LGM0AM2iPlTlabGpCHDY579qft3R9cd8kU2ZQxXauD/\nADpdQEbJGQMH+nrT/unI5OMjNJ7g4bp+FJna+3duXufSgokGdvIIYjhutNjyyhfl3/7NKi+Y\n27djHQU1Q3mkg4J60gHfKGKBf3fv1JobO4eXxTWYEcrgA8EU9QFUlQWBGcmgAVdwJA+ppG3q\nNg5PrSKAYypXqM9aQyhgGVdv8JxQSO/5aKoGB2+tIcxtgnJJpzc7lyQyn8aZJv8Avbc+hpjQ\n7aN3znPrTQu3kn5c4ApCdqgsvPUmn4UoSBgdc9vpSELGRIu3knOOf5014z5eTxg4pfLyrYba\nw5+opVA4O8BiM5NPoK4hzx8271p+8BvMztAG0Uz0ZSNwPWnsQzeZjzB3FNDBWGHwfwpqj5vn\nQnilZf3gbjLcCnLlsknO2jcBMfNuPHbFH3cfMN3XFL80gUkYBPNKeuMKQDximITbubbtAZua\nFBZgq8DofrSlvvED5uu70pIx5pV+gHJoAXna393oaGVsKoHy9ee9I3cZ+XOR7U52wSARuoAa\nrM0gIXao6kijp8xYOM8e9AyAHDfgTSx5VgAu1j+tDAViWy5+Xj7vpSBWjUKxBLDIpFVRK2D0\n420vl5fk4YHOG/pSATc0eB90fzp33segOaFMYZgT8x4+maTZt4bhR0aqEx6sdxZhyaVo2GCz\nbj/s9aYzGORRKM99tLIxjkLJyG4+lFgD5V4JyCaGZ2U7gCF6Yp27aQuPm7MRQdz4zjI5J9aY\nCDc0fzdFHHrT4YyIgWbLE4CjrTVUN1/u5FOSMbOpXjnJoGiVshmZ8llGOajVm2vkggjPpSgh\n4AFc5Hr3ppUSQlQM85K0iR/zeUORg/d46VKm915KnHXimNIJG+YZwuKnhUxsoI+Q/wAqLDEb\nMeCpwp54pI97YwSd1LjYyngITmpo1DScH5e1Ai1axLFuyMIi5bnvU6yblSTYVjNQxxkMxcVM\nmWxJ1UdFpCLcbOFP7s7W4DVPbxiTCnOE/i9TTEzJECDwas+S6oigYBHWkMXaNxLE4xzxUioY\n1yvHHB60rR7lRdpDL1bNK0YLNv5yMgjtQBSmyyt/E/YCqkkRZchvMyOc1oynMOzdhvpVWZNi\nliccdqaA5+4k8tWbHPTmsK7+ZSuCF657VtakmTnGBnr61iyyBlYEYB/hpbgVmUrghlz0Gamj\nWUkkgDjmoeGQchue9TRsVkDEkH7uKCkSLIPMXf0p25fMJI3UN80xY88Y9s02PaknHI6E+9BQ\nJ824xtgfSlWMtkrhj6UKwjXY3GT1pSpVflG0H86kQrR+S5X8ajxuQcdO4p8YaRnSQkHGRn+V\nPVT5Yz8hHVRTCxGW3pgkE+4p204J65OeOgpwjGGJ4HY1OqZhAKMmRjd2pAMEXmNkkKmKXyxI\nyqOWPAB/nTtscarGcjH/AI9TmUeYrlcqOnPNBSGQxpGzbz5jDinD5VGBuQnAp2wosj/xMOp7\n01VIbb93j8KAHzI0arkBkHSo/Le4kwgyOuRVyysPtUyAAuuenevUvAvwsfUJgzQstuxzvYYA\n9R70bAcX4N8A3euXUTJCQuclj0Ar3XQ/B9h4StBM5Dysdy9sGt28bTfDOmraWMKvMF2kgYI9\n65S8vJ7vZKfm5wdxpRTZjKVizq2tTag23lY+vFZjn5tx+YdMU6EERk9eakk+5sI+jVskYSZD\ntPluGO0dsVJgRqyuQRjPFJtJzls8YNOjiTYcnk/nVEjI42SNWLZjJ6d6fudlYBse1Mjtyy8N\nlVPenMwj3MVJ56CqKI1Rl46Ac1KrOTyMLj71OaPcoI9OeajkUrgKMZoF1EWTcrBk2P8A3vWn\nsyvtAHAHJ75p0all+Y4CngUncsTyT0pAGGXIC8/3qRVKkYb60M2G55waTblTjoehz0oC41FJ\nzg4OciptytGrfdkz0pPLEm1lPIGCMYpGkVV6ZYUCFHCnBz70kfzRsucHrnHWkkf5UIXOfTpS\nyyYUEjHOBimIQZjTdnKn8/pQkgRhxt9DThhxkLx3HvQ3OwMvfmgAGXYscBqRmCSYPHPUU7yv\nMLHdjtg0SBivIAI5FIYgDqpY8rnpTdwiUuQQvqKdhmUDHJ707gZXOSOMVQMhlwyhyOvQU9gH\n25O04wcU9VDxqBhQp701oxvYjg4+8elMBrnkx7Mhed9POJJE3HAOOMUiqG2oTx1PvTmz5hKs\nFOOKkAaU7mUDKKfvmhfukg8dWalDblJ6kdT60m4Ln+6wpjFwjKFDZ3dDURA24Iww60NGkn8W\nCKeqiWMgrhsfepgx8atkZwFoWFlY4bod2KRUCqMglgOmacJPMGcYbHSggTylZRMRtYnsaTaW\nOTwuePal+6qY+YMacIy0xBZdh6fWgoJFVeOh7D3of5vkBXr831pdu2Yk8gA8mmrgLvOcE0AL\ntAYY5A60qtuYtiki+UEkYLHoaJG3Drt5wKQhW+Rsq2D/AD9qjbZNGcrlgevpT9o3DbztPSib\nfJ8yEIc/MtUgIZE8tcoxL+ntSx45ZeTjn60jMfM4IUY6mk5XpxkcimhAXeRRIF2N93FSo6p0\nwO3NNX5JAc5OKaWVlO/rnkVRIqqxU55Gc7aVnZiO0Y/hpI0AlDoTjvTygPRuM96YdBi8g857\n/QVJ5e5sgfLjqaGX5f3fyHP4U+Xeygdz3oFqMVvlwfvdM0LG5jx1OcA09NqHLHil3FVKkZB5\nG3tSAdGfIQrGu4/xNSqpZlJ5HpTFZUXbjg980/gBVB+bPNMY1ofMJBbjOcU/53hKIdqilwsj\nNgYwKEbcq7unSgkbGGjV8qQTUtuxY8r0pQriIrncwPBNN+8QxOCP4R1oGh0cjFHDAK/vTvOz\nGucE/wB32puSigP0ZvvGkZv4jtbjoKBDt2GIxjuFp6yBVzj5gagTdGgOMnP5VLGFaT5iG+tP\nRqzKTszZ08rcKImOWxk5rcsWNuQhb5AfWuSs3EV1kBiDxW5FK0khJHHYGvMqw5Xc9KlK6NbV\ndPjuoCScnOdtcVrGkp5ZVUyOgr0DR7gTMEZRvxyPaqPiDSkRWaP5R1BxXOdK1PCdX0dFZSuR\nzyuetYOp6fFsdPLAYnC+1eheJNPAYgBg56ccVy15pvmSMHf5gOvStYmMkedahYjecDIPQ+lZ\nl1CLdgc8/wCzXX6nprWyuQMKOc9vwrnriNxEcgAtWnUkyfODY+XDZ+92qfd58hbrs4I7VBLb\nvby5OHVv4R/OiOQW4kGNxY/dbjH40wL0bGRwobZuGTntjtSIHHzKm0A8/SoIzGIw6/MD0Vut\nWINyuxaVQSvAz0oGL9m+VnQ8dQO4qKVwseCevp606Nt0ZQPmdjywNS+Uqx85LdMY60CK8okZ\nVG/5vSqsiyLzt+XOCo/nVl4S2xyxjm7FTxQLdImP71m3f3jnmmBRkVUm2kbec1Hlnzh9pznO\nO1WZk2ybesh5zUaqYzvf7xOMCmLUXBVdrMCD0CjFMW4kjlRwMiPhgaVWVcbAcZyN1SowIdiB\njHPFLUCBJk3HbuQn+FhyM0uxmBGC4A421O1wjxxgrg44fFRzK8TEIeMcsKBWEUHgMucr0zUm\n9t2wnB28L3piZmiD43Se1IxUNuIZvV/SriQzU064LAK+CF58vHP511+i37W9wpjcxxjk+lcL\nayjzC/KgDH1HrXQafqEflDcwiXqCx61vF2ZlJXR7p4N183QKzOdzdK9Bt5le38sHbtrxDwXq\n3kyeX5eWx8uDXq3h++W82qzHp82exrtTucLXKzoMmT7xC8Zo8wCP5uueDSNmT5Nv3Rnd7U2F\nflLMduei0Fj5CU/vGNh/DSDbFGCVbrxmnLlsL29qUrwA5yM0DJIYxt3EZznIpY8qoOfmHao1\nAEnLsB9M0u5tpAPHel1Aeqn75G0k07YM/e6ckVEsjLHsPIWl3EQjAyW7Uxk64KnA+Xpz3pNo\nZhz8o9OtRRqzZBGPapVCxr8jYI6mgQ9cxqSM4P8AEKFHHI3N9aY0jbV7E1L5ZHzFCT3OaAsN\n4bG4Ywe9L523PG0imgH5SxyAc7aezrIxJGM8VJViVY9zK4ACUoJDMCM80blhUEMMDtTlO/BH\nc5oGWW2soyBnFIykSAjgeopmwbvvcVKAiqSM89qCrC7du4g5GKiaRlYY9MdKmwCqgHnqRUcn\nsKBCbf8AaopMj+7RQB+l20cE0iqCxyTikU/Nt7UqthSOoPFfLHuDduB0qQbWGB1oXLYHQYoC\n7ccUCEOF4PWnD7ucYoLAc9aD9zOetUAm4LgUfebB+7Tdh6Z5pyqwHUGkMdkZ4FJvw3Sl5YA9\n6TYeufrSCw09eOlJtHUmpB16YqIsWJIGRTEPXO3IPFLxtznmkjJ28D8KdgZ65BoAMH0pFBGT\nkfjQd3r8tM2hSTmgB68ZGaYzZXGMGl4btmhSGbgc0gG9xzzT1IVcHrStH8wwOOtIV3dqAHD7\nopN2OMUBTtIHanNz7fSgYg+9zTWXIxT8bRnOajjyxNAh27gUoX5gc0u35fekwep60xi7cN60\nfd5BpcdaaVO3J6UDY7cME4pu7cOlG4dulAznI9KZI7rgHijhT60FuRxTgobOKCSM9c4pf4s9\nqewPfpTV+QUdRC8nmm5JPJ4oP3eKRe/emV0FYbuQKYMqoHUVLuAWmrytLpcQqr8w7CnwqcsK\njzuYfWpI/wDWHnvUgR6xbmbSZIQfmavib4pW/l61eBRiNXY5/GvuSZPNifbwQuea+PPjRp6R\n63frGMbn3MPaqQj571eHaGbbkdq4y8JkLZGK7/WoThh1APB7VwmoIvmORnqflq0IxLiEySYz\nkYzWbcQqGGVwRx1rebgA4wfSsS+G7duHzHkY7U3uIx7qAbiS2D1qkzKysQhatGYPkkrnjFZz\nuwjKE8ddtakFKaRozluVxyPSqbbm5YADqK0JszcMuRVeZPLjG3nB5FMVyrIxYBlwtI+x5McA\n9ammhj2425LVC0LRgLjAHOaoVyvtGHLDv1qq6u7HYdsf941am3RsB0B5BqtI8cmdrcjjApsR\nA2FIX70R6A+tMYpGw3MXU9gKl4aMh2ww6Go5EEgyQVbtjpQUNVyxaPbj0pdzbvlXIUYJ96VG\nJxnkjvQoG5pS37sc7aAK7wllJPOfQ9KTaqwgIcAcMSakkbaoKDAbmopF3NtVccZpj8yNtzSb\nm4jHHHenbTHHyuUPPWkZw2AF5HApNwm+8cOO1BOooZGPK4/2RTFj3MUbA74pWZFbjO/0pLhg\nFLMMPnFBQFjGwOBwOtQt8xbOBkcYp7bvLGeSDTZMMVbGB3pkiNGGUAdcYNMaEQjK4HrTn+XH\nBKntTWCtgZ4PpTGhA4ZQuOO5pfKLuDnhTkfSl3BdyhMuOlJnc+52wfQUCGE5jK7ctn9KR8tt\n44HpSbuCO+f0pflyGB4pjGt90P8AxfdwPSlkm24yNqjj60ud0hPIHXmhmyvzDPPB9qQhu0Ku\nWc4J4AoZjkBVPHTNGd3TnBpFk8rdu+bP8XamAx2+YZ5NNkjYsSwyo6j3p6jYOFG89AeaGBAG\n45PsaWg+g3yw0eSePSo48R5UtuyM1NI+6NduV55yP0psiDzM/dU/pQIiCjcFC4zT8fPhiBxi\nmsy+dhsjtuFLtAbccdOPWmMXmJgCFU+4zSEb14YFs0773Tr701ol5weOpAoENJO1Q6nn+6KT\nO6Fi+OuAO9KrEKu3KgGg+Yu5mGeeF9akBI9u0KxK+1D/ACgDHGeKVmPO/GT0FNVRIeTgEYHs\naoQcNJuPAUdaRSjc7dpxnk0yPKLkyYAOKkwu7aAAvvQMTzA3OCAf1pqL8rKx5xkCncjkYwKN\nrFt7cMRQUR7dsYOOvGe9DAyMuU+71pNo2sHPyr396fk7FzxkdRQSIxJ5PQcil+facgbDzTd7\nRoctkKfTrTmDthkUiM9qBDBlkBCBMenekkxJJtHIxkmpA/bkUhAbGGGB1Hel1AYzBJsbsHGA\n1O5kViOO3NRyKFXYOud2TT2LGMcjHXb3NAB8rK2RuOKRTtVSBmnIfLwTwTyPQ+1M3FgflIyc\n4pgL82MnkZzijzC2VA+Vu5p6/LuyMbqbs+RVPJBzSLAfdVCmMH86TJXcsgxk1IvOQx+YdBUc\nuGkQqrN65pWEM24bCnbj+GlVuDkcelJI2ZQxXB9R3pWZWUsG5J5A61RI1lCMrqCSeoNS+YJv\nmRdoHWmLIyS4+8cZGaQyHBVV68lRS6jGswkbKfIAetSfIZuF6joPWjhWIA+ZTQu1WcgbfSmA\nHG/cfpikwzMQgwfWhhtjyxzznIpQNyg52jtQA3BbKjBOKayFIwX6jsKkk/d4w24N1xTHVPMH\nzk47UAAVD8yuNuMkClSTuo46gUj7WUBFAOaQqckAYBoH1AYLs7HJJxtprNtZghwSeRTj8rbm\nH3Rwvek+bAOwFvegTF2hFB+8/rRtkaRW3fPSEMW5wh70vnr5gOSNvftSGI8gWTcf4jhqBuUO\n2cDOMU7lZG4BGeQaYylF4VmLH1pIQn8QXIB65p0pXIdeOx/xpF6AAbmbr9KNofK7vmzgf4VQ\nhHlPmKI+nT60SMUbDctnoKFjZuNvIPSiSMLIc5wBkmgYKv7xix2q38PpTUUszc8Dg0uZNvIz\nu6ClcK0iFRgD7wpiEUjJGPkpq/uwyOMDOQ1GzYcMvysc9adHGzPuztYcD0xQwE65HTI6ClK5\nZF9BmiRf3oajjIbG0/3qQDVUyM24bh/epm4cLsy/b6VJ1YAHJzzR/qy7A4GcAd6AG5Vcr0UU\n2NQ0JK/ez60/advygY7imyKvVflPQe9AxTCJLcY+V88U0rjgDIHJ9qcxLKu44odgY/kGRnpQ\nMSNl84sFzxzQwMjEqfmx+lGwKuF4PXFAchSyHPqaYhI2Ea4HJPSl+XzgVyvqaToqMynrnPpS\ns+2QnsehNTqIJFCyDaxIzkHtQrHc2fm70cq2wncMZoZduNqbl7+1NAIjbnLYOKVSfu43E9M0\nFlPyqCuP1okzsBI7847UwF58vywdzjrSZ6Mw6UrSAMT0boG9aRiYxt24bvQtBh8i5Y8vnOKG\nCzZA4QfNSyZ3KxUfh1NNmYfKANuR0FAkIG+VnDY+lIoV1Jx83rS8xEAgZI5H9aFViBzgDk49\nKQxscm37vTvmnSSNNyFAI4wBjim/LuO0E98ntUmSVYn/AFf86BDVUKzBeV9abkI2R84AzTl/\nhPR+gPtSJ/rHwOMYzTATarY9G5205mRtvB2joDQ2fLGevTNA+ZsjhelFhiNyNuMHrT1X/Z5x\nk/SmLtO5mPOMClXeBkvkgUCEHzcg454HtTmZS2V7cUjAcEjJ7UbdrbNud3O4UgDzNuGBwc9v\nSkkAXdIDuGeRSbgMDbwP1pGjO4jaMdRzQAvGVGcjGQTSLIwxxz60u4P98ZfFEfMihzjinYVh\nNrFQH456Ukm9X243LSxt94sM4PX1pwkKxkk89qAsNXcX6cdqRshW2tk5xg05pS/3l2yY7Ukw\nMfztxxjaKQ7iy7Sy/LlMDj0NLI3yn0pp2tswDg9qArPluwONvc0wEHKgD9acV6DdwOvvTJGA\nZRghs9O1O2HzGbpjnFUKwqtmPluc5/8ArUqqdpOfn6ioJECLuzgMec1KzEkDGGxjbSEJuZiN\nwwSfmpwwjMSN46Cmsw24OcD0pY3CqUPTHekUJNJ2PzZpJGC7WC5XGDupzqGVQw6Hp601o2kX\nc7YYN36YoAVowxBUcU3aMEMDuB9ak2hpG2tkKMjFN3Kysf4iOKBBxJGR0OaSORUkbcSRjApi\nKdvzNhgMmnbvJRRjcx5phYQqu0lvv9cU5mWPBXPPHNDKzA4GWPJb2pFZ8ZbHXOKVgDrI6g84\n6UvDrsJJwOfSjcCQcfPTSrx8qeGPNIQ772BnG3oD/OlTmfLnKgc035FOWJ3Hlc+lLt2swHOR\nw3pTGCtvBwdqZ796E+WHnk7uDTePKCsNxHcUm35vYc4oEPjVWZvmpqszKccDNKqoykqfeiT7\nyjG1jjntTGGBtORz1pGzIMJyuMmnEPuJ3ZxwTUaAvKW3bV9aYBHlozuPPYU5WxyBgBfxpqyb\nWywy3QYp24sTuGBipAi3DaDt4an+WUZB0B53elLtGMnjjgdqQZ42c46qaYgVwzs56YIBpsa7\no9xB/GniTCkL0Y8nFMkV9uc5wcD3p6dR2JGRABtJw3akVsLntjFOZVbDlwWXnHao/mZV7ITx\nn1oCwNuVUcD52HA9qXd8vyndnvSMrMM8kg4xmnfeUgkbh09aQWEjyiAMM85BNLtLNwcZoDbp\nmDcOOlN3BSXO5vYdc0xCrjy2IPOaCenHze9DIFwSWDdaUHawb7zdD6YpAMLEuuBzUjHdGfQn\nketIX27nUAjGBTfuqjkYzximAr4OGVSjDt7UpZGjCnnJzS/dwV6+rUz7xIxluuaQ7D1jDqf7\n3ShUZMrke9Eald7Hr2pu3cu3PzE9aBDtjBtgP40nmbOq7u2fWkTkgD05pxbcwPKqKAGIoRt5\nX5gKkZRww4J5xStIGwM59qRfmJ3DbjpimAhZcA4xk8n0pFByQOmeGpWH3cEc8fjSOudwDbcd\nfc0ig2hSQx3d6UN8vA/Ck2jchxk96bnE5fGc9BQSxy58nDDBzmgfu2IZN3GeDSL83Xr6UpUs\npwP1pbDBWwApUZPIFIFPz5bBBx+FDYaM569j6U5mMignAOelMYzeF2bTx1pyq6Kd2HY8il8s\nMx6YA5PpSHKjeWBRuFpkgw/d4J4PVRSeZ5O3OeeMUkbHy3AGAOop2SqYxzjIzUgDB45ANuFP\nQU4ruYgLtHds9KYrEDfubd057Ui/dJJyzGmAFgqYJ3f7S0/eWXcfl29G9famKVVW2rzTjuLq\nCeSOMUmA1myfu7QeQKcp4GBnnOaEYMrljtROB7mjjaQDhiMkU0AhDFjhc7uxo3eWpy2cnaPa\nlIby1If5ie9L5ZZS3VScfjUgNZQoUbfmzgt604g7flXK+lI3yrhm79aX5vmRRtP1qgE+ZQDt\n2n3ocFsHHOetHCgF23HPTNAHUc5Jzil1LFyu71Q8EUitwoRe3TNAP3cYHPINC5DbgMkUWExr\nf64KvLVJI+dylPkxncOxpqruJYPsPXpQFO05ztP8WKCBFYFvMb5ieKGbYpyRjPC0u7aynAx9\naVR8x+Te2CdppDGIwwVK7gTn6UTL1XOI/X3o5DBgMnGfwqRTlhtGB7+9UFhCpXBIBJ/lSvtD\nAMuaVVT52Ayo4/Gmj5Y1LDjsTQAqxqjE5A9qeH4KDCEdaZJnaD1XrikXDNuXpj7x/lTAcrBo\nyCCFX+KkVSFGcZP60mWOfmBJ9ulSqqtgv6Z96nzAPuq2/wCYgfKBTY8FnJ4Zei+3rUy7I2U7\nSxxkVGhHLbDuIxz2FUA0lo1O2PII4YUjOFXAXkDJbsaWMHdhWwuMYpfKG0ZGCOtAArKy4Kkk\njIFIrDaBjJ9acr9SOSON1MXazPkY9MGgBzLtOdvGKQzIdoVWJ70/bsK/NnjpQpbcyk0CG71P\nzEcZ6D+dDDG52OePl9qBt2OpTdzj8KP+WYUkDH3fSgAG3KsB82PzpVA+/nDZ5pCqqBubOeDS\nMy5wucDqKBDlUyNuJBPY0/b5mMJtNNWPgqpwzcilXfIdhbI6ls9KBCMShIPGOgoUkKJNuFFH\nHmfMcDHFLvKngZFACqxICpyf6VKygNhhjIzUK7VbdnknrUu7+LO7B5zQA6TCkDy+2CtKx2wg\nqQo+7zR5hM28/MSOg9KQsrtgphPT3ouALhHRG4Ud6tq5G3cfkDdfaoGTzFWMjn1qWNTJGU6K\nKOgxkkaq2GB2s2V57VImAXPJHZR296HjC7N5J5+8tSw7/MJjxtB+bdxmgRNFJI3y5Lcdasxk\nNwvBIwPTNRwhoXCqeW5Cn0q3C3YqGGc0CLFlGVhTccMeCtXI2f8AiOUFMRS8a4Oxuy1L8zMq\npgEdRSYEpkXKsHOO49aT5jl8YGeDSSbQv3fmzUoi24xxuHapBkciGRVVlw+cn1rMm3qGQggH\nnmtgqE27vlx0zWbqCvIu3OO2TTGc1q8mPlJ6c4rAuGZhyFUj863dQYL5g4EnYmufY7mJXovJ\nz+tMZHiMtknOP7vQGp1UyZdjxjim8HcqrwTmlViMAZweMUyiSNcgAD8acoKtymF7Cm7XIKE4\nINS7d4HJbA7d6kBhkDLkouPzxQu7lmzkDOPSiNmZt4AAzgqKXBk3kAiP0BpDE5DLICWJ/Wpo\n1Z93HlnGeabFCq4zyQM//WqdQWO4nKnjgdKBjPL8tgSflYevelXDNhxxj7wNPWPbEdo4Bxg0\nJ94heABy1ACgLKoAGZF6N/Sgskf3uJafu2lVA6jO71qFpC0uAmSBk/4UwIcvNkZyp6itbTdC\nlv7jdErPk4VcVY8MeG7nW7rylixlsn2FfSXgnwHYeC9Ja7uVRXZeWfBx6Y+tInmscX8O/hVJ\nax/bNUPkJ1VdvzY+ld/qniRLRRZ2SLtAC/LyB+NU9U8WTX0phgUQwKMfKf61h72kB5BbPUVU\nYt7mUpjblnmlZi/zdS3qKYw8z5Fbah5Jp8UgZ8DGe3oaNu4Hnaf0FadDC/NuMVVDbon+X1qQ\n5kTLt83amJCVwPunr9fepF3M2cBu2T2pCY0Rtt5YEj2p20r82O/NNG7zHBPAp5UBVYthe9Md\niJiPmAbjNTq3HXkCmLhVzswPWmsuVPO4ZzgVSBiDLdiGJ6dqFPzNgbsd6kDMmNwyD0I7UbRE\nrJ1zyQKYiPlFB9TUiqWbJHNN3bQvB2/3fSl5Vg2c88ikApXDkdVojPlEAdKdIwXJxgHvSZK4\nYrkdODQMZzvY+p5pNpbvwKX/AFeUPIJyaVMchfu+9AhJCw4ThcUbVjADHcuM/jQHHK7ct6+l\nLgsSgGRigBhcK2c8NSmTEgB3bqdJHuVeNgHc01m3TKSckH7voKdwZI2Wj5HzVGAZFQD71SS/\nLlwcjPSjdtT5Rg+hoEhw/u/w+lM2gY3DkHtT1Aj2lmBzSO29iuduOmaY2R5f5tqnGc1I0fyg\ntJ1GdtMG4EIX+93p/lkKd3zAHANCEI6+ZsI4xSbgGJx8tOx2xt4zQ237p+7jP40xjVZuewb0\noZiqlj8w6VM2NyFO46GoxvbcMYAoAaPL8kBRgn16085ZQOfTipTtbouW6BabIxVACu1s0AyO\nRSpCsdxPU07y165IXpim7SCRntSncq5xwODQSHzSY2duDSZ2wucbmzgDvTnU7gwY8DjFCDzM\ndmoAdNhlVX/u9jSBgwROgzwKTAZjjnFOXKyBv48elUUOb95v3fKy00gScjkAUkMTGOTaeG6k\n0KrdEfAAxUiGLu81TvG0ckUNhmIBxk5FOUB8/wB71pgXaw7896oAYD8utNUFvmLDcBiiRWZD\nkFWznGacqk4fZxjnmgkRWBHCbm9alXy2+bGccH61EzMh34xkYwKfwFKp8j4zVAKxZcjG4Hpi\nmbwFAI29jTldmXIOWHWnja7NkZ4zg0riDkY3EOuOKRU3J9/BHP0FIsIbADYHXBqUtzwuD0z2\noGNIRiozuGMg0vzIrEYIPFSJsZtoUA1GB1zlUzjdTFcGjAKAjC9TTtjpJw4KNyPWnAER5OGO\neD3pobaxyp6dqYhYXEbMpXk9amhjVoyxOAOdtRR5X5tuT3qVZPMYlU2qfWgQ1nPygr8rc0rH\nYTtAx0GOtDKynG7OfyoVdqk9DjmhjRICY1Gfm9aYkYjHXG3pT+XRNq05WMjbSMA8UILEasfM\nYhWYdM1IR8yggLkenSn7zGpVT3wKcWHk7sbz0PrTDYaoXbkHnNbWl3DOoTHToT2rFwfLwQAG\n7CtDTbjypAh+70rnrK6OqjPXU6iHEMiO5x64rWvIY7+1y/O7gYrKjOxQpPmHrWjb3AVAMc+l\neRJtM9JHnviyxMcjr5JIAwGrgLjT3ikZgckDJ3c17frdit/GxCZbHNeY+JLfypDCvyt6+taR\nlrqS0ef6pai8y4O1j1U1y9/Y7sBkA7M1d5d2LMhLAbhXM6jYnBYkMo9PWtzI43ULF4cYQsO3\n0rPCny2RA3mkfxf56109wMIxdwp6euD2rBvIREyrI+D1ye/vVgRWq7Y/3eGI6nOce1TMryfN\nhFPfaetVoULyNLHyAMccZNEMiKvIO7PzmgRejkHG2NVPrSKQ0ZBkwd3NRhg4HHQ5U+oqV8fd\n2bCefmoGNkJWDIG75sD2HrUbQjgoWBz1ap5QPJCyDYByWHf2qJplZt8edgHUHpTAr3X+uGHy\nx4z61CyvkgMMdMEVakVV2M/3v71Ry4t5NzN5gbgFaAKUjGeHLD51bj6VL5cluqtwY26YpJEE\nKuoyxxn6UsSlvlHKehpiJGbzV+UgDPOaZGxjDORtzxzSbSUHAQk8VGWlYNIcMFONpoJHIred\n8hLFuo7U7a4K71L/ADcgUkkzNhtvlk/w9xTljZlc7jjGQFpoljmYtI5AKN0H0q5ZXDNjAG9R\nwrDrWc2FXKEtzyTVhWwFZvlbIrRMhnZeGL/dcRuNwbPLZwB7Yr2jwrqXnYRd0eOqt3968D0m\n+KuCF2ybsFf616n4W1XEar/y0X+Id/aumE+hy1I6XPZoJhLECpyB1xU0bLsLEfMawtFut2ME\nANgEVvswXkrx04rYxQM4wGzg4xT0jCqNzgj3piyBwfl4zSn95jI5HQVRY51GNuPfINK7eWvt\n0FEcgPB5A9KX5WT/AGs0mMFVgoULk9yafJ8rKuMNimtJuAweaFUMNxPzdKVxjww5T5t396mq\nu7jGQOTS+Y/k4TBfNS+d0BXauO3rSAa0wO0Y5BqdWds5OFNV15QtgdcVJ8xwSNy9CM07hYcg\nZmbnAp8ijaAfvUR7Y9x604KWjB249/alcpDGQSKoA471MW3qFQBSKjCleT0PSnw7I2LEHBqS\nkPyFOH6e1Tq2FyPoM02LO/BXIqUnoMjHai5Qsihiq9JPWjGWweTRIx+VWX56Mdx94U7iDy2/\nuiim73/vfpRRcD9J+OcnFN564p8i7sAcGk2sv8XFfMHsEkZ9RSOxjXBGaVfmXGcCjllOeaY7\nCj7uSOKaxCx8c0m5gMdRTl+9zSAVVHr+dC4VsHpQT1GOKRVbr2pooTlaXllPNG7K+9NKttyO\nlBOw/du4J5pqnr2py4H1pqDax70WAUMV7YoY7eF4BoGecnnPAo2knOKAsNVSrbSOKHXbyRxU\no4ag56MPl9aAsQx8444qdVCjOOtJgEcUgyB7VQg5FDjbz2oXByAadwy7T1qQE3bh6UYB470Y\n2nBFP4XmkMhKkrgCnbCvNPyW6UvllVGTTBkYyeBTtv7vNLs49DS7dvFMQ3dkA4oB/Glwfwo2\nj1pDG7QOg60DPTFOOdtL0Xmn1Cw09FOKO/HWl428HmhVB6UEsX+E5poyaWQHj0pm3ng0CF2k\nA88ULinNjoB9aFUYGelDKE8s9aNvzEml3Y6Uv3u9Imw3b6CnIpBzR+lOj9cZpBYlyWikGPvL\nivlv9oKx8u/kkRdryEKWHt1FfUiuPug9eK8B/aJ0c+StwQQu8fMRxmmOx8h6yJGlZcYRe1cj\nqVqdzHO0mvQNahLTSNjjPI9K4m8jzISxxVXEcvNDufBJIrIvrVlwUOR0FdHdg4ZQMc1lXUeF\nIzz/ACq0Qc9cddhbEgPNUpgrMBj5s1ZvGEMxIO5+9VbqTbIpYY3Dt2rUiT1KcxKSddm44AFV\nDNuYllycYNW5nLZ6Er0NQBgoTnJxjFBJWLN5Zc/MvpioZZBIf7pA4q6qjOFYEZ6VVuo/MUjO\nCDTQ9SncMVXH8Z59qrbR1UAkmrkjLjzCcjGMVVkZT0yveqEMQK6P5nD54FV2bY20kg/yq1GW\nAG8YPUA0NEDgsoJY8nvVCKjRlQfn3D2FBuE8sKFIHripZkwoCk5J69hVZ8O2zPAPJoGK2ySQ\nkdVHNQmQhOuGz+lADuwEeACep9KJGHmNtO5R1oAY7FRk/Kewx1psg/cs+MN3FOmLMyjPB74p\nHBXBycDg0DQ2QnYCBk4A6UsibhyQectilaRmUZPyZ6U0rgYzsHcUAJtG/K/dJpnzKrLs3Dd+\nNOLDYq8e1MkU7xsJBAycU7B0E3nG7djDYxSuArNk8HkYFNGG27hhSf1pG2sGXccqetGohzfN\nwMKMZL96ayErhjtz3FIrt5RXIIPGcVLsEeOPLwuAp5/GgZHn94qgZ4wc1FIqr1XBzxjpUzR8\nBmbOOS1R45JzlGGetACsx8zDDgr19Ki37VAHJ6U/hY9qkt9aR8IMLySOaBMiZShwW4PpS7mi\nBB4X9aVVHVj7CkEazZ+fgfeJqhCKTt4I2+tEL5VivyopzzR5g2Y8vcnQULgcNHjj7opFDtwY\n9dwPIzSbZJ42TGTnJpm3Y21jtB5FOUk5cEtt4xSEMZjIdgHH0prNtTAG4k4O6pHZGyy5Xd2o\nUxo2GznGaY9yPysyEZZdopI12Zbr2Oaev3STkbupzSMVZFBB68N2oYhm0yq7A7cdM0jb9y/N\nuXHOKezhm5XgHgU1sr2wD0NAhG2hQccZ5p2GeMlQAuetMbbu+Xk+vvUglO3GBmmkAzyxu4+d\nQM0ZZlLYHApAu1eOrcn6U9GByMY4wMUAMiRpkZXO0ZzmkjwzMA3PalZid1NWMDBA9y1BQn7v\ncB1bHek3HzMOuWAzlaewUv8APzu420xVELlcEEDrSJYrSKzLjLbuppeFGFkywP3aRcbl3c57\nUhZX3bF6HBFMYpXcSu3DH+Ko8F2G7gLwRT8S/dU5Xrmlb0b0yT3pAMAwfLIyc559KWRvNdHA\n4A5xTlYsRuXII601l34VPlC9fpTEIzGTbg4UHNKWDSZD/MaBIsjMMDbjjikVV2gMwznpSAY+\nXVW349Qaf94fKMnH3qRJMceXgDj606RfL+fPB6CmMav3g4HzDqKNxQjJ567RTlZjhcjB60TZ\nUKvXIzSEJu5XGDt6iowvlA/Lh2OaezhsL936U0sVOA+T6UXAZId8gXbyRTw20AKOelLzG3UH\njJxRl9wbgAjNIBDMGlYgZUcE+9NZlZlAB9yKeCvAI256rSqPlYAYBqhjGYI2Qpc9lFNk+Zsu\ncbuMUrbtuVXBFAUBQ2MyCkIesf7zaTjaOlMXEm4HlvWjG58jr1xR8/zMvHNADVDNMVXCjHU0\nq580AtyOvpQqqVZm4YH86Yvyrv5Yt/DQMlZnbJQc9cj0pjS7sk8cZp8cYYbg+B6DiolUKrqW\nwzD0pgSYO3GcknpTnmwoj8pTzn60xty7SBkYp33VbC5YDrQIY0hLlsbSfWlWVggPZTign93u\nK84zQF+7tB2Nyc0ANUHcMHpyaRgVJcfNIxzj2qS4VY7gbTlSME/0qPJHG0g9AfagBWJjmO1t\n0nUgUrO0mR7UR5jBAHz9N1JwqBf4+/rTGO8ssm3OD0qI+WrBCSAO/rUqsWk4yCT0NDOdxCru\nOcEGkA3yjPnLBfQUz7vGcgdac+VUKx2knrTShiZuM9jQA05eQHBweOtOACSYbnbSqPMcDBA2\n8fWk48xz97HylaBDNq+Zub7x5yKf5hO5tueKNwkZSF2DpzTVAXJJ4bjj1oAVv3aqc43DoOtO\n4VVJGaACuC2D2oVsk7SoYdc0AN+SQkYyfeljUKxbsBwKWRo+Cy844K+tJ9/ZtOB0oGNG4PyD\nnGSTRgJHgj5etOZtxGMs3WmDClywzkUxDtvy46ccU3crSAMM8Y3UbvusOex9KVYgcAHvwaAC\nRipVQQW7/SkBAY4J5NIFz5m44bpmnKAI0JH3egoGIx/djccE9DihWwobG7jrT1ZpyMAbB1zU\nUe5pJEB+UCkAc8MoG49KezMEO04bvkU2RhGFI/hp3mE5JXBamIajNnJ6Y4oZcRqAclTTl2IC\nSc4oj2upycCn1GgcrtZs8kc1BhSq5JHHapCv7kELk5596XAb5cYOOlIQqr36qwximuobGxsA\ncAU1gFUZbbg4NPEexiw5GOTmkAqt8wZvoKapfdnGRnpQu5m+7kUcrIDjC45qgAKrSEM3I520\nZQsdo4NRn/W5DZ9aVclVbB20C1HRqkW4bPm96aqlUYe+KfI5Ylx2GMUn3o2Ochjk0mMQDox5\n7Yp7M8OcnII4ApNpjUBT15xQq9X3bgePfNIXUb8gTCcv3PamRg7yCC3c5qXDMPlG7BpkbD5u\nTyeaBixtviJK4XP3fSk3byBjAHQUNIY9qjknrQ3zNvU4K9vWqGI2yBSpPJPajYWVeMnOadw0\nhYrxjp70u5vOXHpyPakA1W/0nzG+6BzSbXXLn58mlZdzeu7oKRf7pJ8vOCaCRZJDhWIwR/DS\nMQzMU9OlKG2krt3DorGk8zbyOOxGKQgAMkYJGQOtOV8AjHbtSZ2yfLwD0X1pehMnTjGPegoF\n2MUOScdjScCZtowx6e1IzKwVeQT1p/y+YT1CjAoJGNmPJAyvc+9D5YAEZOM8UvzNHhThgd3N\nLlm3PnPFBQqqXAY8CmtJubbJnHUEDrQzCOMOATjtRJ9xCp+dutMBdwXDIMN6UMozuC+w54p7\nQtJIv8JAqLZgEE8Z+6aAE8sSuy9PVqT7rFj0HG6nDlSmcv8A0pY8qABznIAoGNVS0hfdnAxt\nphztySC2e1SlWVcbdn0oRSV2AAMTS3JY35kYfJuajaWkUFsIT8y0vktyCSJAeM96bxuwAST+\nlAgaP5pD1RT8op2wJHgHkmmMVITDcN1p2BGzAEuM53UFCBh5TL/H2pTjbuZsAHH1pN37tmxj\ndShWOWAypqtSQ+6CTwP4aGbkH7+5cj2oCll+YFvT2oVQFJ53dlpFDdwjUdTnrikwn8JzuH3D\nSkSbg3T1FST7VZio3D1oAhXLMARtwO/akZn+ZM7ieTUiqXUOxwOlNjYgsOhx971o1AbuVVHP\n404gKp/iY96VlVowVGB3pB93OM46e9GpIuSy/N1H602PGQSc45IpGb96HVfqPenLH8xY4X6U\nyrjUaPcwK43cinMGG1Oo7CkbLYCqCadyzjPG0UCuxOSxA5PemtsWIgDkn79KPlh3A4fdnPtT\nsDIAGe+aB3BvlkDIu7jljTFZmyMbec7aVVaQMR07UKh285DjvQSxcfMR0GMmhCWBZQGUik8w\nq20/M2OfpQMRqNowD6mmgGp8u7ccAgj6Uu7akavuMi808BZGweMDrTVVWJVmIXtikA5pkEn9\n8NzuHSkkZPTAOORSRseQRkYxT1V1VAoBb+lAXYqyHeV+7znmj+IBl3behFDZdDxgU35lwWYI\ntAAZBnI+VVHTvS7gwBPOe3pQyqjEn5jjNTLlvm2jOOlMCFmaPauNxPf0oTJVmzgnjFTNyoI4\nbHNV4zu3bjgY6e9JgKVYqp24waJEVmbnnrQ21I1Zid1JJnzF3NgfrQAKx2gsMHuKc0x2hdmV\n9abuIVj1ycUfNu2L970NIBGVRnnI9KFx5fJOM0rLtJY8UrKCCw5C8hhVANHysWBwO9GFVcj5\ne9LtEkigYUnk0jMu0MF+cnAFBSFX0HcdaYcLhcZFOZvlDngjggU3lcuuQDQSw3suRkAnoTSL\nnKljk54oKKylnfJxxSxsuCcbih/ClYB6rl8N1z0PekUkuT5ePQelIFLxjn5mbP0p0jeWvDHO\neaAEMmY8Ljr82KVWAk27Mkjil3KwAACM3JpM/e9M0gBVZUO/p/dFNOxY/k+XnPrT2bG1lOB3\n702HEpYqNr+hpgOBJwWGR2pikmNtpxtOT6CnsA2MnJFIHwh8skHOeaAExuxIpycYJo4ZehBF\nO3PJnAHPccUKo8tmEm4jsKAGFVPz4pzZZQd2GpFy0Y+TFPaRnQ5HyjrUlEe5mwAFL0pG1cZw\nc0BVWRX5anY3byRz/KqJ6iMN4wOw5NJ96NQrMBml5aIbR908+9NMgUbynyj+EetAMdtSSQlg\ncDjPQ0iyeYDuQoFONx60u3KZzweR7UvzN8pPDcZpACKyse56/hT4z+4YkZOeMUwZVPlJAxgj\n1pywouG3lVx93rigBsK/LjP19KVzkBc5PoelC4Eb8frTNolAXblqLlE20Bm3PsyuAKZHnySB\n/D/P2o2rtyS3UDkdKfGgM23IHpR1JHLGE2FgCx7f1pWVd25jh+gFSNhZfmGeMe1QuwZV+XBz\ngUwFWYNvEjYHYgUkbC4YqrbcLwfWhY9rEE8n8qGZdwQDLdx0poVyPaTjcdhHpT/lfndlhyaT\nmQkr90c07cNu4JxS3GhrLEzZBZAec9qQMmQE+Y98U5W3LlBge46U1RGrHYMMT096QDsq7kON\noFJwVGDtBPFK2FGPvMTg+xpxYycmPATg1QDSm6TOcA/eHrTYxHudEO5P4falbKyBk/7574py\noGB2LtBPIPakwEbcF2qu445HaiNW3YHBH8NJtEUZTc249/SlYtyGbDZxx3pkiqJFyOjfypEc\nbN5XCn5So708sYV3KcHpn1pNpRfMI+ZuNtAhHCbQp/DJp8atHuUrzjOe1IrBVHmJuXPX3pFU\ntkYITPXNAAcFggOzcO/apFK8dyvB96Yq7VaNxvbPDe1Ii+Wx/iNAyVPlO4j5jSqXkz0BB4qP\nd8xYAh19e1SDYyrvb5mOS3pTESFSsIbO4ryfUVNGVVQqHLMM81AwWFuTuDHB+lSiOFGDxAlT\nxk9vagZNHKFyvQLyWqVcSAFD/Fy2KjAw/wAq89x609U3MeTG+O1QOxcxvlJwTx+Iq1ZxFfkP\nHG76VVt1D4xwcdTWnHGysrtjYR2p9BEysDgAfPjqetWYI22k4w1QRom75jkE8VaWTb06Kfum\nhEjlVsgnAyMYNWl+ReuQBgsKhCqZA+3LGtGOPapKgNu7e9QMoNC23c/7xe2Kzr6LEMh3BWAy\nCa2DuwBjbzgmsrVlCo46n0qgOK1aPzFaXfvOPmIFYckY3Dnk/KV7Zre1K0EivGDtC8nnr7Vh\nbSwDY+UdAetAxwXyuCcEGlUszBl45596YynzFUJlu9WYseYWYFFUdKZdgjX5txHfpnrUsamF\nmXv2NNjXy5MuOGGRjtUgKlieQcfdpDGC2LoSrDPUgU5YymAvJxk06KA4BB69akVgvQEnpmkT\nsM3Lsbnk8DA4p9up25APuM/rS+WEkUqOD1WklIVtrfMfQcUFoEkQqSR905qOZjIq8BAxyCKm\nUMFOQN7U6G3efcueUGcGmBHJJ0Abc2e1dN4P8GT+JtQhieNwsh6JyfrWn4D+F154ivlMSZjP\nzGU8BRXv9pZab4B0tILWNGvAnMpGTQRKRW8N+DdL+Htr50pEt03RW5wfWszXNVk124di7GNT\nwnbPc4qC5vpb5zNLIXdjkk1Ey7VJU7N3U1SMZMi+YqdoAI6cdaFR/mcqFOMGnyII8DnjmmyM\npIZST6irMhvlrHhQOOtK67sqp5649acr44NSLCZPmGAVGfegGVyrSYLtz0A9Keyg/KDgDrTm\nB3BhjJ55pFYOrFhhs9OxoJsMUkAgAuO5p0jhgqYwPSlXO3A4pyqrbSV74zSGR4EY2HJPYntS\nlfLAYHrxTyhdmHWk48npnB/KqQDOmVJxTgoxktgnrS8hdx5PpTgS2crzjvTAYhWLKMevSmqm\n5fvYOakwGYBuvUCm+WrZ2Ahs0uohy4Rm3nKgdKYioqZjGFPPWnLAGU5PPpSH7uCvAHSmIQoW\n6gkE9aRsMp+XaQcY707cQu7OAo6ClZjLhivJHNSALt4yeDxmhQVUojj3NIqsoO5flp3yxkDd\nuXrkVRQ/y92AFyQOuagA3MdgAPTNOlVvMBD/ACdcU5UCqcAKvXNAmRwqzsV6gU4Ybgnn9aI8\nmRgw2oalVUjXI+8OMGgEM4bChcH36U0xqJDu5zxmnyOvy8YzxTZAQ2FDMM9cUAN2qWU4+6cU\n4qSpGOp6UvKsy+2afvU7ux7e9MCLbsPzHL9vakXaqsXPz5pwYSAMDnHWljIZjvwuelMQsa+Z\n0/Omqsi5IOf5U7AC8mkj2LJsT5s9Rmn0GIjlm3dDnOKdyrMT/FTcKoCj5nNOdTJGAOoPPrQD\nFVRtwetJ83H8frSqpDDIxSxA7m2nmgVh5ZV2qBlRSRqFjZmXBzRGvmKcjIUdKcykHnIUdqCr\nCBDuwhCjvSqWdiBgnHX2pu0hs7c04Iqy55HHJqiSN08nDg49AKQ7lO7Abd6GhVSXnIbnpmnb\nfIRt44PSpExFYsSu5V9qjaE7gyvgrSsPVcnHWmlVRQE79femMGClOpxnrinMm1lHKjvR5hVc\nAEDOCR2pNxXIzlPWjUhjS29toORn0pykBiOTRHt+/np2p2F5AGGPNUAK4X5ugpfL+XcPmLdP\npSMpZcquQKmGeCnBx0oGJLCHVVXjHr1pWzuBU/KByKdGxbaWHPNN5KsQVOT0piEWNfvFsH0F\nKMPMUHKgZ/Gkhwsnzj6U5V8pWYfxHmqQrDyo2hxhiBzmmqGkw23jHPNAX58Y+TtS+WBH947s\n9PagYnATcMg0vFwpOD1xj0p/yhcKc0sZChgvBJyaRLF2/LtIwexNAjbcd3JpZirBQTznpT41\nUHc1AyNeF5+U9BU0eA6ox/GoxtBLgbgTgU5Ad+dueKBi7doYg5HWnKwjhOB83Wo/mZW4xjtT\nlkLquE+bGCD0oAawaDazEEnoKnWZl2kgDB7UiljgMvIpIT5krDIyBnmolqXF2Z09jqBl2fL2\nwK24/kkDjk4rldFufMV0dcbT96un06QRg71yM/eryai5ZHqU3eJcmZJLaRSvDDpXEa5paTyL\nIy9OK7WaToQvHrWFqckckMiNjBz19awNDx3Vo3j1SRW+THCjtWLf2/mR7VAV+59a7PWtJfzD\njBYnOa5u+tU2MMNlu+eldMSDibyxi+0ZWMhgMg+tY2oQv5bDyhIWOOn3a6i+hZi4AKhTndWL\nMr84IJPStEZnMTxyQW6gDb6rUWHSNQoUK4zWneW7tJlzkjjaKzmjaF/LI3cdKsRIvORGds2M\nDuKuRttjVZF3nHP1qjCxWRxvB+X7vpVmFniXIxLz1HWqAfuwpDDeD2PahsSR8nZFjsOajBLM\nTk5J4GPzp0kZV024JzkgGpFqMuI9sMOWz3JNRyN5bZEfyf8APTrVlY/MLrjd6Me1Qzwsqrvl\nGM5wB+lMCttkkYu55Zcmo2ZVdjuKjoKseWkbZdiowcD61XkXbhXHzZzRYLAqrMN28nHb096d\n5YjwScf1pFXllAKA8k0TAzxqjPg/w+poEFuBcTYDhFHRvf0p8u+1JLjBY4qNIzEB8m0g80s5\nYuokBZD2YdKoQ3a8vyouVLZz6U92deZGHHGO9NUFVk2fu0UZxSBM87C+4cGqJexpwyFpk8ts\nAL0rsNA1OS3uNu5ij/Mzeh9q4K1kWGRGO7gkE9vpW/puqo0KtsMbA8Ln+taLuZSV0e/+E9WW\na0AZs45Bzzmu7sZvMUKSTXjPhDWBCscbDHc16nos6yfMHyc8CuxPmRxvQ3tyjcAoye9N25RT\nnDUnmDaAR82alLCTocN3oKDYAuFwD1NI0ZVtwOWxwPWm/MvLdM9aex2sCOR6imMWNQV3/wAZ\n4K46UihZPlTnaeaa0hdumVz2qRGC5CHj2oGIq/vgqjvk052K7u/PShMRsQeT6ilQjyydvfqa\nQDlfYCMcY+7SxEhcHr1NIijHmFgSTipDHtLc5OO1IpAHRcZGOaf5nVcYB6UxMlVBIz6VI2ZF\nJxgL1pFWFjysRyOewqRVAjUPweuKYqnePMPDCpoyVOGG5R0oGPj3MxPamovmMd/zKDT0V2j4\nXBz609iVwqjBx0IoKHvhsFDz0xSqEaZkzkquaGXau8Dj170JtUqVUu/UipBkXnf7Boq3uH/P\nH9aKRJ+kDNgAnmm7ht6UbhjmlyR0GRXzh7Io4UcZpV4GD0pG/wBXSsRtBx1phqO4XjtTGYLy\nFzSsegpGQFckUhAWPGBxSqx2470wv8u3rTVX+InmgB+CeOlOGdvNHDMAOtKRladx6ibe9Lxn\nijAVKZGvfPNIEKWAz607LYHNNbDdsGnKN3NADsbgCRRu/h7ULnbg9KRsL7mqAcV2YbtTN3YH\n5aUL0BPFG35jgYFMLEauVYgCpcDbkHmmhckgDFORdvJ6UhAu5xjvTj8uBilUd+lJn15pALn5\nhjpSMdzcHOKB6dqTpyBQAu3dRznFICTz0NGecetIA3dQP1o6KM0rAcZo3Bs0AK3ToabubBHa\nlUluCaRVPJamA7d0PtSdOKQ/KTgZpy/N2xTAOdopFXHJHND/AJUbif60wHHFNbG3Hel+7jik\nJLDpQAig4zjNG07sjpT1zuKg00cZBoJF7Yp6L8ppi4HBpwyvHWgY5WAYDHPrXl3x0sY7jQZJ\nHyWjXcvp3/WvT8dTnmuS+Len/wBoeGGwCQq4IAzmkI+EPEkf2eSR1HfB+tcVebs7f1r0PxVa\nGOSZSvO48dR1rhNQQhSpGKYbHIXbbpG5xWbcxrt3g5OcGtfUoi0jEDBUY4rNuPltiCMselUi\nGc7qMJEm4qFOcg1kSfNJ5jHvjmtvU1LW7lSR71hzq0irgZAHWtLmZBKnmMzK2D0qnyG5GCPW\nrsz/ALtNw+ToarZLMQSAAePpVoYxkAjLYwx5GKimLSc8DjmpNwZiBzjnmoJCDGxZSq+tMRVl\nIwVb7v8AdFQTN5aJxgVOyeVIQOrDIqDnbl+oOKuwBuEkbsUOP73eo1YMqBWOOoJqZV/dkrhv\nX61E0ixzKCPmPG2pEQqSudykA8e1RNCIXY/6xduNvfNTyM5kZQMgVDIxhxgbiw5qhkG5FKtt\nKnGMds0jfK6qmCzU6X/V4ByhHK+9QFV2rjgj0oEL88akEYPb2prZT7/PqKmbJmQq3GMYNIGH\nz/Lnnj2oKI3B3A4wvpTN2dxPPPI9qk3MqF85AOCO9IzHd027/SgREqhm+UZA5BNIC3zENj+t\nSKrKxAbIGePWkbAKhfvZzj2p9hDI2OMlM8dKYrl3IwBkfhUjAsrHJ+ZsEe1MT77RqMRjpuqh\njFYsyqvynPPHSpMKuGEnDcgnnihVXzPl5bHrSbljyMZJGMCp8xCt5ZUgjJ9KimjMaZXAU8Yp\nUwI93A2n7tHVSXPekBEoKscKc4pQuEx/Ex6UoYlXG47RSKdqg59xVCGRzBcbgDj+dOLbkLAb\nOemKZu2sF28bs0sjbicg/hSARQfMPQLjNIshRuQwB/Sl+9G3OTjgUnz4Qfe9RRcYrL5kypjt\nndTP9UpEb7nznHrS7tzHLbe4pq5fLAgCgQqk7/3i4yM/Sk+Vcjk59e1G7fjdyRStnqw68AUy\nxHbaoIGT61GxIbaTx96pAvG3rjmm+WWxxz1pkgZAz+g6inGQHCt8wzzTFOSx/SlbMiqy4X1F\nADSoIwvygHvS8qxJVRx2PWkmYblGMgjmkVQykbsv/SgBRlWBY4BOTSMCrMVIOefwpJPmwpHy\njnOajBDYUdz29KAF/wCWu09CMilYA4jGQOtPkYed0LKBgU5VPPQHHWgBm4yDptYHhv6UiMWU\nnGHz1NIZtyj++ORmn7WZix/iHagRG3UMT8+evpS8rn+NSefelVSp34ACjnmm425CgjdzQMQs\nwJzz7il3eYvyja2MZNIBJGuRyQelGWkZh0FACbWfG85b/ZpZAVTAHPenfebCkBgMtmmqzKSS\nMjP50AJ8xVRgDBprdS+0AZ70u7z2JI2jPApWUOvzcAelADWwq4PzHrx2pdu5ht5XHIoLEAAD\n5jSSfKxw/BGNtAhYVDtkrg0c4KDgKOGPWj5V2hcqQKVdzSFiQvbrSGIynCYGQf4qj+9Jx2PW\npC0jcjGRxx/Oo/L2yEHrjOfWmDJURdrFSC3eon24dMluacI8fMTjcOgobLIxK7vT2pdRDSvy\nDkFuwB5pWYyAgcYGRTpANxKrhlGB70inzGz93jFMBysctgg4/hppJ80EDn+6KbuCL6Fm60uz\nBLB/mHSgYozllMZz69qbIxKgAbQODil3sTtZsEij5lxgfUUAIjbtxxyOAPWkyFU57nge9ODb\ngcN+FMxw3ygnFMQqsY3DPye9OkyzDkcmowNynLHOO9G0cH2pAKys0nyklehpyKpTJ4waZuHl\n7lbg8fjSecI4yG5A67aAF2M24kkr2wacwO1CDtP1pqtsXABy3NLGqs5OCGUdDQMSbG0sW3Dq\ncChlY/LuycZpDJIq4Xgdx60smWxt4wvLVSEJudk3cA0jIJQGL4HtQm0qoDbsdvWnKqyZP3cH\nIpMA8w7xGvIXnNK+WYFCAh65pgZFRiP9Yx5NMZfMjj2tscNnmkBIMMg3NkdhQrANyCT6miXd\nu4AcryTSrg5H30bnFACbR5eC23tTShAIUYB53UoULIFJx6U4LtViDmgYihneMFcD1pqoNvUA\nMcfSnx+Z5Jzzxn6GmbQqKe6jOf60CuJsZPnByAMUnlny933m71MuJFYDr70wZ+7gZxQMYrho\nwpPTovpSeXlgu7aOopNp5JGTT2Z94bbhV45oAdNIFXKLggYNR+aPvkA/LRGWGSMHJ79/alYe\ndkhQvbb6UFCAnyyo+oBpjYzGSx4PIqVj+9zjjGB9ajysfz7S7LwfSmSKoHmSBl3K3NNZh8qK\nDz3qTceNy43c4preaqNkjn8xQIcuPJJzt5wc01WVm54A6+9KNjQqGGyT1PQ0jcycLk+tIYvA\nIDdG+7SNlWO7qOMUMpZV+bHzflTpFdWBLCQt1NMQ3jy1YAtzgjtTt3mTFFGMDkDtSLyrDsKd\nDJtznkZ+7jrSGCRhs7T0GTnoKiUALJ6sOtT42sVC43Hke1R3GDIYzlFxwaYhmf3SIRupkirJ\nGvJUZwak3KqjuV/lTdzLuLfMooEBby40+bmnbsfd5PrTVcOoVlKnHFLz5ZHSkMVMfxLls9aP\n4wm4qPSmqzRhSRk077smQcg9c09QGq3zcLkYo8wbdgTGaVXMWU70Rrxlm47UAJ5gR0faxZTg\nU3zMsxI2tnIFOXJbJ5BprMefkzz1pCsODOsi/wALHq1IAzbwxCjNJIxYLnr2pzt+8VwO3zUD\nEjjLRNvG1l5piDAUFeG60LIA2Dn5jj6e9SsQCm1sgDBNPqIa0blTEp5xkE0Zbau35z0LdKdu\nKt0yT0pV3STbm4wKYMiYp8+fmIP3RT5pFjVXf5jwdo7Cm52qx243d6RogdqDlv6UgH9yR/GO\nAabIWK9eehpPJaNkbdkdAaRvlwWXcM9jRYAyNpLduM1JLGOF6oKJELLsJHrSNlztzhcdTRYY\n1VZpMYyFpT97gcZo3g8fdJGAaRdu0qOijk07dxWFaQqT8uQeDQudyqOg6/SkH8OAScZHpSkM\nzEZH4UJDGMW+YfcVjxu9KcuVyVbeQOhoZifl6/WkMfzAilcpCrucbufz6U6T5tu457EikVTE\nOSRuOKF/eNsUbFHr1oEEa/McNhu1IrfuyrEg55ohDbn+bjp70rL8+CecYAoELGXVSB07ZojU\nNIV38YycUki7AowS+R0pefLkCYUt7UgEZizBy2SowKav7yMtjYwNKsiwybV+c9KFbcWwcjvT\nFYQMOdq5ApvzCMIOpOakG5YyqjcPWkdSy4HX+VHUYzduZcHFLg+YQDuB6EUrKI1YAMf501V6\nqnAxnFPqA4ZCgk/d4xS7tr/NzjtTVbMe0HJx92hlbauOSBnJpMB27cCB65+amMg2ja+QBjFK\nv3ATyW6Cm7QqttxnPTPSgQ/7uAtJsDhmxtcdc03G0lgPmHr0pyrkuXOc9KYkG7ESqThs9qJl\nYRFc9+4pfk8xQOPQGm7iWY/fxxn0NMBpO3H90cZ9TRuONp4PXNLtDqFI+djTuWUhuNvHAoC4\nxchGYHI6U5AFTu2OePSmxuI9ythlIp2GjVgBnB/SgGM+7JkcA8hTSrsbcWXb70m4q2WHHala\nPOSFxSAXIQIu/IPQikVfvEnJXp9aXKhQNu3+dG0K20gqetMQpXd82R/Wo/lXDsp69qcigKT0\n9u9CsWADdByKBoRj8g3HnPOKc4G4YT60n3E3E7tx6Ukiru+8fmHFBQ/aW27V6jn2psbKvzck\nZxweaIeFC87l4BpYU/fEkcd6BWFVjwDyvWhQsjfO2c9qVfmXYnY8D+tKpDDIHOOfrQLqNZkX\nlmJGdtSR4+ZVHIHrTPKZUIODg00eau4oNx9aTAkZvLX7xDY60xAuzCnvnmlkkLRAsu32FAwz\nDjaAOaYClmfI6L3JprYbGcHHSlDFo2bG05+6aOhBUduaAGhvMUg9PSjbyMNg0Njyzg8jnikX\nC4IO4nnFIBNxU5JB7Cl5VCmffNBC7cY+bOc0qvsDErkGgBzKMIcjP+zTAp3bj06Chvlxnjjr\nS8gDA3e9ACFwykbeh/WleRo1xtVu+M0SbioOFP0pNp+6F6+vNMBJAGZ93yjHC05kC42t8rDJ\n9jSI2dzAZOcEUNh++BQFhuGVRgfU1ICERtvzD1prHK7lbgDHFJ8pVQM80irIXcPLUBOM5obB\nkJxx/dpQyEkbSGUfhTVYLwTgHnmgmw5V3LuB2nPehe5VdvPSmSMQgDAheop/mOIwNuQevrQA\nhXZIF3YzTPMG8jPHTpR5g7jB9KWTLNGSNqkZoAcW3KEA6dRSBSuJNu3nr2ojYBiXyf605d5w\nOME9O1GoA+5shm465FCjADBiAeKFDbnQDjPekkQspG7aKLANhkIQp9/5uvqKk2lJTtO3vj2o\nVXMflkAYA4FIq53OcLj170xiLlN24sWboewpdzCPBYYo3ER46k03nzFGNqnvQK4u0iIlRzTf\nMJ28bVFPykYYZ6nFJgxxgdec0ihVztB64NSbo9pIYq5OMUm4yL8qk45IpdwBB259aVgGcKCG\nGFHcUpxxg8EcYodmwcgY+lK23avOAB96mSxG+dRjhe/NOG6Q4b5dvfvSDCsDj5+3pR+8kJ5y\nx6mgBwEckeCSTnvS7Q/AOSDSFWRcMcqO4pJE6bGx6mgAZSrEbstnj0pzMrHLr9TikKr5mSDw\nMZFAJ6ZyPQ0AJHn5mxyD931FG796CvzD720dqf8Aw5bg5xxSRnyYyoUneOaEAYbcTuADc7RT\nUYfM7DHGPpQihYSqkqaGymGPOeCKPUBGYKo3A7f7w/nSspVVAJbnk+tDRlvmUHb0ye1OO/y1\nJkXK9BjmkANy2U4df0pfMC8BhluMkd6TJZi2AOOTSQp8jFm8sjmmAscZVXEh+bpR5LtJgZYY\n/KhMY2Ht/OmiR/LXLYJOflpgPjDbcMmFX19aCSxUtlec04kyKC2QM0xjJJ/D+NBLHtuZmC8c\n556UbgrncNwbg4pdu6HJXJX3p20FVXGB1z6UANdSf4CxzgEHoKeyKq4xwOOvehW+XJ45wKFV\nNwIBKg0D6jGVo1yW3DHPoKf8shjbG4LwKRjiNgMbc8D1qdWDKsQwq9T9aBWDYWUlvmOeD0NS\nxsAPKKnCjPtTNr7gHXL5457VNIZIwSuDG3BoGPjzJCWHP86sLvaPYevRsVWhXZx6cgCrMJdc\nru28Zye9ITJ0CpHjOT7VetcsAXOIwMVXtkLEDGOMmtJUXy88BaXQpLUnt4U8wMQHUCrYUTTA\nlAV9aqxwrsVlHz9jV1FZVUfdyeTSM2SrGTlsAAVNGx8oYVlccgGpBGGxnI7AVZjZp/kxnaOl\nLqUim8O5c52nqQaxdWIlY7Rsb9K6eaEMmSvGOnpXN6mqBW3Z5qhnCXrFpZmI2/N0z1rJVWm3\nD7y8kj0rY1NFa4yBxn71Z7r5TNtOc9x3ouBVhU+YPLByDk5q1HIrMxdcZPeoUjMki7SUK8/W\np2jMmCcEA5680ihfmZsjPp+HpVhYz8qI4XH94dKJFZGXYu4dc0oaQqzEgqeMYqkMGRYWIJ3D\nGSQeKVtuwOqcjnHtTpEVl2k/KcfLTZCOADx0+lDEMVsAykFQ3bvTW2ecS5ypFLueSOQEeWBy\nC3JOKfa2E15NGF+VSRgmpGPsYWumKI29Q2QvtXpfw/8AhdJrUgknV0tM72YnHHpXVfDH4NC7\nU6pqkTW9h9751+aT2A7D3rv9W1GGykFpp0YgiiXGB/KgTlYb9ss/CunrZaZEEMY5YHNc5dXT\n38paQkk/NSyZdjK2WzwfqaXaqrkZzjigx3IY2WRCcbT0xTyysgQgkdqVW+UHbhu4p54xx+lU\niGQMrqBnknrQyDzuMDPGKsyMCpOM45BqOSNeQfvdc1RLIUUK5LcFehp+xmUnOG7n1FOkVYnV\nSN2+pZEOCDhTihB0K/VR9eDTWCsSp+91qdE+VsngcCokwMkDcQeaoRHtaTBZSAKkjG7dt6EU\n+Zd0JVVLZwS2elJHH0UDauOtAETI67fmwcc0m5Wwu0nPerEiLuBHI/iNJtj2koeM96YDDDlt\nvQgcUpXaqhTlv51MuSwBbOfzFJtjXdlenSgBkuIzuZeQOcdqZIy7UK5/CpZvnTPY1HIxVlHQ\n4pEsPLeObhtyNTXVs7R261Iqn696eEO7K9T69KYEGzK+UvUnOaewVPcr6U6PnLY4XqKI/wB3\nvBXJPIJosA1tvIYE5XjFQxjEapsKknvUrR7hlsZHrRhnxwCOwWmUM8pkY7zg54FPcfJknK9M\nUu07iHGT60iw7OC2VBoAawZQBncvrStGc+px1qSNSi4XDITmmyKycd854oAEP7sFh0oDfKOe\nCeKOfLPPB60R7Gj44wf4qBESTHc3GdvrSo25kO08DBHrUqquT8uaVVG1ifl+lAXG+Wvz5AQd\nqj8lSN27cf7tTsqbRkbjSbScAnCmgQyQtuztwMd6i27pF4wOpNSnPnEE5UcAUFfOwu3YM80w\nGsFVd5GGXq3tTs+ZiRDwehpQgkjwx3BTT5N0gHAA7ECi4CBD8rE57mhcF9wGMilCH5uO3WnG\nMqsZzlAOtMZHuMa4A5al8xmc9x0NCnbknOM8ZpVGVxjAznd6UyRFUqwAz1zkfypcNuJPyntS\nrsjdsHO7tS8Z55JpoBr7GZgRtOM5XimMxAAYBlqSRd0mBxgVEWVo8MufTFAiONTsYnJpY14x\nt5AzzUm0KoO//gNNClSQPmJGc0wG7isTZGGPSgt8qkDjFOj9TwfQ00mMtgnbnpQMUxq23Hy0\n5lKsNwyfUUiR7WAJJXr9ae6qsglJPoVFBEhrCSPcAcccAULJhkQ/ex96pPlfdg/N60gVWXOP\nqaBkkWdxy2PYU2LYCxVabFhWBJ6ml+Ys20ECrEGH87JAYjotObbIxZl2tjhfSkaPdGpRvmJw\nfWl8vBCs5LA5wPSgY9UMkecfMozkmnx8YZhy38PcU18bcAEd80/zFCktw3TNAgKlVcDBI54p\nq5k2Mo3cc+1SKoZWULkgZz60DK4ycHHOKBEbNnJK4HTdT1xuABJXpupYyjbiuT7etC7F5Ay3\npTsIcNvnBRknH4UYk5A6d6VMMMyAq3+zTlAbdjkgUAMX/WBeq+lOjyckjDA4p0m2Pbgde/oa\nkVRIoAXJzzQWRljuIxihFMa7mjwTxu9alfbDGwOT/sikZflQlvl9KhlJF3T7ho8cdWwPpXQS\nXH2fEgUuT+lcsxePY6NznpWs0jmJTmvLqx1uz0KbVrHQ2ty81u2R1FZF7Gvy5BKdzUVlqTwt\nh+QTgGrmpYuLfKsFI5IrGxqc3rdoAjMF2sBlTjrXBataO8hIwgXr716bcOlzBt6k1wviiyAm\nDE7VPLCrRJxV9H9ojwMDB5x1rm7xWS4dNoCYIDV1t9Z/L+7ZQzc7s/pXOX8T7v3gw2eU/wDr\n1qhHMXSl4U3Z46tWfcR+YCEJz198V0l5a7YtseGXu2OntWHKpk3blGV4zWnoIzPl87KrujK8\nmpInbzi0eSWHQU5oyqnHQ/xHtUSxlZkA3KepbsRVCLeV84HpnjAPepUVZJAdwXnA3dM1VhYh\nZFYYXqCeKnWZfLXJGfQUgJWQrMweRWZepXvUGGbdg9+pHapPmGN5AbPanbHVT8+GzkUgKtww\nRVwGXHRsdaiaMPCSeD65yRV+QMNrSqGHfHTP+FQOxYM64jHTdinqIqNDJlRuBAHOajdX3HH1\nqTb5kZZjlTwNvFR+WPMiRzkk4DUxMkeaWNQGwzYyKbJ5h8rcxO487hjmgXCLuYDc3QNjOKZH\nHvkXEhfPPPTNMBZIwzOGPOOR0zQq+VEVfcqY3delIrM2RndIpxyKlYj7QVJDEDDKf50EsS2H\nmKGUb++41PaN5cwXdhmOCMfd96gkPlyBOifw4NWLd5FkUZBY9TjoKtMl7HoXhfVUVtobzSTt\n688V674dvi0iR7WXjPzV4Ro1xFHNGyOu1T8y9K9X8G3nnyIxlwOig+ldkGck0es24E0ayqMo\nR19KsqjMmRxVLSZCLckHMbcgVcjyzbycL/dqyEOSRX9yOKcu3n+FgPzpp2tGQRjHINSFgzfN\ngDGBQMhVBu8zOGqX7q8AZPJIqORc4B+77U1nbjJAX0AoGWGkVm3DqBgim+cnl/OucmlVRjco\nOO9DYYA9fagByjCgBcDOQKWNh8/BB9aSFjHIWx8uOKUKVDb+A3NIskSEFRz81OWQljH0xyRT\nVDyEMPlAp7IzspKjOeTQULt84KQcDOPwp5Y4APX2pqg7iQMYPFS7QGXjJ9qQyzEw4XGQadt3\nyYHG3+VQrx97k9qlRv3eW+X3oKHeZtzG2SGORSxx/KXOV5x71GTuIxnHrU3nH04qWINooqT7\nR7UUgP0eOGYkClVcrj0o2lc0LndxXzh69wZjs6UZ3KBTmTnrTOVbPagY7huvFHoDyKccBeBR\nwfu9KBMbtC9BxSlR2o3fL60g+b2NAhMkcnil5bpS7SSAeRSPnzMrxQUIyllyOKbtKncaezHG\nMYo3FlIxntTEJgnDdqcrbs9hSqvQdeKFyc7hxQAcscdKFwRxzRuDHP8AKjbtb0poBc+3NKBn\nk9KOTx3pdoXipAZ19jSsTt5HAp+2hs8DrVIQ0twKI2O7GOKdg84xTfvdOtSA4A/N6Um3avWk\n528HvTjhsH86ABieDjijowz+lCsJM+lG7HA5JoGIy85zxSqMcihlHGTQSCDjgUxMNu7NGeME\n80gJC470oHy9KGIQ8dqdwo+Xmm5468d6XI7fnQMRvl5NN69KduDNzTM7WzTYhxYnpSsDtyTi\nmcP0pwX5SKYDmYcYFJyOetBPy45pBlfpS6gIzdyOvpUiyfLycUxgzc9qTcNw4yaBksfzc1m+\nLoFn8OXKuflAzV9W7VHqe06ZMJOY9vI9aRPU+FPiBC0F7MMcKx/nXlGtMzyFVbGeTXtnxUs/\ns+pXgHzBWYfrXjWqwhQw+8TzVAzjr1XEbsDkZ/Gsi9z5ed2OK37xROwHQKelZ95bgsyHGMZp\nohnO3SrLCU6N2PrWRNGyhg3447Gt6+hEOCOvb0rLvITz5a89T71oSY10jSRgDaB9Kp4IiKBS\nW6lvatLy/O+Zjs56VUkBWUnO4dAKtE9SqxVlD/3RxVZv3jMrZC/3avqBtZCOCfyqneK/2gMB\nt561SGQPHvZcHcRUE0DNlnYAA5wKtTZ+8vy84wPWoW+VWD888k0xdSPnb5keFTPI9ajdVaSR\njww5XNGNwzzszjb7etK2yRm25PbJpgQbTktnkDJNVysm0OGzxirLD5yFb5cY3VBIhxktx7UD\nIJYw0ceBlmPHPao+IpN2ckHBXtU/Vcn5h29qYwTgEfK3BPeqEMVgzFycCkLbF6EKen1pxwVV\nEAx2oxuZv4iBQCIAp5zzzz71PGy7hknHYUzduUkrtYd6TzGJBXoB1qSRRhpCQRkHj/CmFRlW\nVuSPypiuNxwMdy1MVgsfyHgHJJ70wFZQFBwSe9KrfMMkEU0fvAFQ49WNKFHOFwF6+9UMFA3P\nj0qPYScjgYzTmAjw+M+3eolUsjHJ6/pSDUQRjaTuyD1FBcLIADlcZxSBhkFWyOlOVioJI5xj\nFFgGqzbgvRfvfWl3fvN2ADTolC/OSd2OBSSbWBeUbQRjimIZy0men+1RG/zuB94/rSQgfImc\nrmnNiORTxg0gGBdjYLYP8RpsbHcSpLY67vSpFjZgynkdQajVSzbjgHpimAyRV4bHDHrTht/h\nJIzinB8Aq/ToKTb8gUAA+tIBGXczHhAODTfl5ycA8YapZMN8pXjHJzUTn5QMZAOc96CheYzk\nfMOmKbIxV9iNlsdaCTnI4BPFGzKggbWPOaoQ2KQLIT2xihVVOmfYmnSMPL+RDkHJpGl3FlPG\nBzxQAn3d5GD2pMFgd3AAzkVIxV2yD8hGeOtR/eYIo2n1NAxrYaPeAQvcVHkBdynCnp61N5jN\nwRhlPPHWmYMvzEd+3agQFiI/mOD1Bojxt+8fehmPIKcDvStu8tWYZOcD0pCE4kUkcrj7x4ps\nkYEOd/zDpzTioxgHe2eaSNlaQh1wCcUwBV3QlZGw1OZRtUg84xScySO3YDGaTdsbJO7tQMTn\nccHK45NPQnbzyKCmM/LkU1/9XuBxtPKigBnyNk9+5pW3YU5AHakRAF3gYVjytOjwdrD5R2zQ\nIjZ9zE9P9mlVd3ABAPNOl+5kDjPalPzLnlM0ARx4YcZ3LQBucfL83rUrqm05z06jioVwinBO\neooAWNxsYt1zTlYYycA/SjuxIBPpSFQVzkigYR4+YNwrdcUm4uyr1VeBQpL7s9APu0MN0ilR\ngbaQACTu4ztODTvvdRtzx9aI8MwB9O1IcGTDEpjpQIRWKk7vXihhuy2cA9RTjIe64btTQxXd\nxnd60wERRsKn8KGUrsJAJp3lfL1yMdqRVH/Au1LqMbu2sJXGRnp3pWYMx6h2OfwpoyoI279v\nzEUjYaQ4b5eob+lMBVj+Zudqimsu0Z4571JH91sjI7VGMMoXuDn/AOtTEDM3lkdfpT925lI+\nYAcio1z87EY7UZ/dmPcEpAG4DI27QeRUm+MqFUbfXNMY7VXadyjgg9aRdoY8ZHvSGP3vtIXD\nEUhb5vmHK0Rqu9hnGRimldshyc54NMQ3ld3p1pZmOFbbs46etA6Y6baEEjZbrTuAiKkZ8wja\nWPApdodmG4K2c+wpW27gzruP6CkKqsfu3WgCPcA21ckk8VKznzB5ifN/dppby5QEbccYzRuO\n35RkqaQx3mnyi6rgntUSSKyj5cc9KlcHywMYJpuE+5/F14oEOZTxtO4ZyPam4VpGfpt5Poac\nmVkO4bTjhaRTtz8mRnOKkYj7pZA68DGdtKy8tlwHIwKRW+UnBB7rQ2AoHQ1QgGVaQE4foD2q\nMblJYndjvT25y33s0gygG8bVPFADpHEZ2biTmmxyfvCTlhnNK2doPByM0D5gOdq+lADVCsWd\ncj0+tJgrgscnqQOtKc4YL0U805NuXLZDKcA4oHcQqA4bGF/WmbmhYg4Oe1OZlLZkG09lFMVf\nM+YDBBwc0wF8sxhc5ZjyDmhl3N83XqefSlOQvz5VgcgA0KoeQ8biwoQhJG3qu4YHpSFhHyDy\naYAWYDOSG59qmaMHLcfKfzoYEasMltu4gZAp53LtCnAxnnpSbVbeQefY9KONoUZI70wEUny2\n7nOQKC7PI7A4XHC+/pSlWVFbru54obZLt25AU5/GpAcbhlX/AFZPv6UyQ+ap8wkHGaazOBnG\nQflAzSsVbcrcMuBQMJM+WrL96ljzJjgqMc8dKdujY7QSABTFJ5cFgegPrVEDmjcsF74yPpQr\nbZst90DtTj+72gZAPGPSkXdGxKtlumKXUryI2XByVwOvNEbK0o38Ac7ak+bcR/F/dqPaq5Oc\nH+7VCQKwCuz8n19qT/Vrn5mBwQMU7G4AOcZ6ULl8oWOR0AqWMXAXB+7nsaPMVlK4+bP4U1VT\nzBvJwO9Kv7uQg9/btSAYzlcKoy2eKRkK/KOX7ipMtxtGOaXG6Q5OD60w2EjGMkqC3pSbudvl\n4BGBSGQ7SdvluDzTixi5YdecUAJJuYpuG7aOD3oVi6uoyDjJJpY2IYFhwTS88qTy3SmiWRZO\n0ZPUYC04sisFZSwx94etPVgvynDdiPQ0nJbeRyvA96YEbZkO4vkoOKeW8wgkc4zUbblUMU+d\nm7HoKdISyh1GSB26UDEYlt787QMfSnSbWBUE5A6UfewwHlluB6GkVvLVgRjnFBQu5d2CpIUY\nFNjZWjfjincjocknJo+cRgIAfm61LAN29VIG0dBjrTVJ8zsMdM0gXyxuY4705iygEDKnvTFq\nG4KwHXJwTikChY8qf4iDSsyxtnO9cc4okUCPcmcHkg9qB6inEucsRt6H0okOwhmIOV+8KI2W\nRtqnAYYpWXC7gMlePrSAcF2qGwMsMCkbKsNy5XvR8wZd3zE9KJIy3BDDnOCaYhuGBLHgdQKN\n0bMB600IDGDuyS2KRVEmUC855pASMi+Zz8rUjD9ziJgCvJpN218Px6UzcCrNjHPSmhEkn8L4\n25pP9S3ru7Uc7hldwxxSA7o8le+Bn1pAh5Yxx7tu7LYz3pFX5mB4UjBpzbi6hug6/Wo8Dc3f\nPP0pgN8sKwHVAcn3pZFVVY8sSc/T2pXYFRkfLSfKrhc4z1pMBGxtBUf/AFqBj7zICfehvrhf\nWk272J3ZA70hdReOSDkkdKMYwW4HfNKZF428cYJpPJVkJZtwJ6UFDWPyr3wc7alWQBfmX5jT\nGbauQN23BxSrsc7s5ZudvpTECrtyGGDjINNCkKHY7T2FLhmYkHkUoxJIDjtgk0xCeWxQtnIP\nBNIy7ZCFbG7rSx+jHaM0fMsrPwy9BQIbgbgB8zA4Bp0incQ55HU+lOVmCnAG48jimqRkh8sW\n4PrQMQ4BUkgv/epzSOxCEDA/OmN+727sAYzTC3mKJCMD1piHx/KzEjnNC9ywwfehpG4XOFPp\nTYy3m/vBlelA0LgMmBweopeXUM+AQccUMoHIOOaVWGz5vvA5qBg+I2bIz347Uu5WwYyQG+9S\nOxU7xg8ZJpM4YN1Dc1QDtxU5j+U5xTnODsY4THNN3HbnpuoXGTnl6ACPEcm0fMpBwTSbHZie\nVHsaVsMm77oBpgZ1ZpMfuz1GaQEuWVsAfLj603bvXbx5g569qVVwxIOAOxprcybgOW4p3ESN\n8zAbcJjqaj7YXGR70rZ3YycDk01kBVWU9TxQAbRtI79aRcMoO3BpSrSM2MZ7UjMjLHkNvXg4\n6UgBlwuVO31p0mzjAbpSMv7wZXHpzShjuG4cU9RjFctnPYcUuQCmRjI5NIPm6/Kc048SZDbk\n7g0AIqLGzDkijPk7dvzZPU0u5fvn5h0yKTcU+XGCTTEOb52ODs9sUxWIjIGT7mnZ2sE/ibrn\n0pWYKy4+7jFLqNoTBjwRgbh19KazE/d55wWxSnK7c/xcjNK6kLydvfNMlC8bic+3TrTE2tk5\n3YPQ9qATsLEE7utG1VhwuQe9SUMLnzMnlT09qfuJV2PIUcY7GkyxiA2jk9aGO7CL949aYgjY\neWhODu5OaQYztIwg6UrLwoK7TTAp3DJ4P6UxsfuO05GBnilZTtBUGl3GP7wOKaisx28jJ4pE\nj+Wx2PekUFcccg9fajADnJ27TyQaN7MvfaTkeoqRh/q+hx60mVfcO3b3oDFRvB4ztBIo3ARk\nYyc8tQA4fMwIGw4wRSHBIw+5Ae9B45XkEU1VwQrUwHu6K2NuSaaGYKcgsO9OO1wV2ncOc0/c\n2z5xg4/KnYaGM3yqE3KRz1pQDuwWAY87aMKigoTu9SKauNvK7WHNILkkbOMkkenNIAGkMagm\nTP3qXCNhlOc9aRZdu5gO/B75piE27JMEnH94c8+lTecXVtx8sgcVHHny8rIUV8npzmlVd2Bj\nK9zTARSsajb8x6nNO+UuGXOcc0igM2QOAadHiPf8wwecigBjZUFt2KNwdlJJ6cgCnRlV3Fo9\n+RxRyzDAB4/CoAVdqQlWPPXml37o12cjuTTf+WxxxjgZ6UbdoAY5GcnFUhCbl+bf+GKA5O1s\nA44OaF2jAI2hhnNG0MNpk49aBjhleCSATn2pJAAxcfpS5AjKK3Hdm70m0R42kEdwakA2NtO7\n+LkeopwjO0AqM9dvrSMwZTzzTQx84Mpyyj8qoQ1IV5+/5g7VIQxCKQqH680fPIrEnLN3U02T\ncrKgYEgc8UxD2mYMf7mccilPyx7c5yetNjQtk/wUv8OP4c0Axdu2Qkkk47dKXI3jJ7flTdrN\nGT06496Vdu0EDDHrQNCsfL+ZvvY+WnxyGcfKuCw5qFWCy9PlHH0qxlZI242HpQITYGZAPmVe\nSelSMCJHXgbeM1HsztBbgd/alKsZWO4MvWgB24/IF3Z7+tTiZQqK0ZXdTVkPYDpmlcmaONcb\neeKQFuBgrkbc8YzU8KB49xOSvAFRxRuy4JCt7VJEwDArwucY9TSAv2rM24hTnuMdRVwYyFJx\nHt49qgt18vb8+GUckdxV3y1kIDHcvXgUhli1VVgBya0I1O8vjJH3R61WjBaFUjUdeauceYBh\nsVIrEgb+FxtzzirVvkr8owR1BqOLlCcZIb0q5boZWYnigRIu2ZNuNq+tcx4gjEKlfuhj9411\nJzHbnoOeDXP+IITNArvyyjFUNHC3USbCE5Ddc1l3CJDDsIxu4zXUzWK7QT1A49a52+hG5i4x\nuGRmgoyVc7AF+T5sE1at13eZlOMYyaii5YhhuOM46VOjB2QH06elAEkK/IuG9gp60yKPZlSx\nwW6n1odljyuct2pkhdtuTkf3j60BcmaQQsAwLMTtBH6VFJH84RfvdGwc/jUsNnLcXCxx5kOQ\nemfrXceEvh/eapKEEGZJmwAwxgep9BQFzntG8L3OpS24EbSiRwiMB1/CvonwP8J9N8N2Yv8A\nV4w1yPmRew+o9aueHvDOn+BY48hbrUCuF9F+lGrahNqTbpZCMdEHTFIly7FjWPETzjyIGKQL\n91V4rDbezZzkkVOYd3A5HWk8lh7UyN9yqylMYUnjtT1Rt6/IQOpzVxVeOSTptAxmoOTjJznj\nNAhjAjcyrz60KxGMj5SOalkQ4Lr0HFKsY27m4HrVIllZk8vcpbO4dB2pFUxqFDbyepqzuXcj\nMMnpTPLCMTjvyKYiJ187BYgFTwaaYzIzsz4JGMVM0Z6OMKxypprRqDk8lf1oQiLlSTgn2NO2\nkZbpuHCipUw7Aj5venRod/3ueeDVCIY4W3c/KuKCgRcZLHqaseXt5yMepoZm2FduT1DUDXcr\nqgGByB97Pp7UNtyoONxP3e1PXG3AGcnJpDH5cg3DOehpgNDLuZepXvSSErt4yOpqQYXfkbV7\nChmHUcnHSkFiJh9ojJAKjPGaJF+UK3zgdT3qVlO1ARjAzj1pJEK8J35NMRDGoZWJyPQVIoMk\nJ8w7VA4xUkaHbwQKVVJ3A8j9KAKojJjBXj1ajHzD8qsW4WPcu0uad9nXd83HpQSVY4zI7E8A\ncU/bjao+U57VIVKn5RkULgruPLdPpTGRzKS2Af8AGoiCyhtpA6ZqZVDSfKSTUjRN977qikxl\nVV2tyf8A69TmMNIvOO5o8l2jZlw7k8HFKytwSM8YJFUBHHDuDgHBByDS+X8pc8n2pZY9oPzY\nBFNhj8uMnoCQBnvSAXy2X+LPGSKSHl2Ynnt6U+aN4W+cbuOCtC4AyTjBpiE8t3LE4Jpsu19p\n/h6cetP8vdLlcj37UxEZpGTG1R81ADdpYnnG2mKWbJXJI61Mgwu4nk/w0qsWVyiHb3x3oEG0\nttUMOmaR1dWABOB/CKVIwAGZSq9hUioFYndlutMRGuWTAyppoDnK5+XvmpMPIpKH5Qcn1oT9\n4wI+X60DGFfMCKTgdM0NH821T8o/vVJGvmM+75TninMgVeSGHcigBhVecDIH8VIzDanygNSK\nNyMM4HpSCMMY93PqapAhdhbHHGetMXCMyOOOtTeWfMYg/uxUckaY+Y4NAk9SGNVLfd70rAFi\nFJ5pNm3DAsB796kYYxn5fpVDZF5LmbeRtQLRtEgBHX0NSrlpTuyUx3pPKCtuJzQTqMj8zJUr\ngU9EYZy2fajJAYqSxPalDDeMKSSO1MB20HgLtXrmgqIlG4fIeeKXzNqnLbvalkOFAIJGOKoQ\n2PghlI254FPZSMY+YEZ+lNWFJME5THerCxruGQSoFAETxmErj73Uilk+ZlbGG/vULzu6k1LG\nyqoBBds0ARYU5wSDnpTpIXbbtGTTgn2huVwueopVAfIU5ZKGA1lKMNpK5HJpQMKRjNSLGG5L\nbu+KSPd82RhPWgVwfYY12nYQcn3obZ5nyr9aTJWTkbV9cVJgRyfKd4xTENP7xg3XBqQY5Yjb\nTVbgELgg05mO126DGQaQWGMccY3J/eqbIaQBPSogoZYwDnuaEmbnCkK3SkyyzHggtnnvRuHl\nEuv0FNTdt2bcL1zSs22Rf4gBzUsa3HqokDK6kDtVuNfMhVSdvNV8l2JXkE81atnEm6P7uB3r\njqq52UxscvlZZ/mw2AKnnm+1W79VP96qknm5XbggGrFvMwZxlcf3TXKboz2aRSACRWR4jtXu\noQG25XtWzfARsrfMHP8AFWbcfvlKnb1x15ouBwl3b7GKqu8qtY2oQxNDgghuo9Qa6vXbc2cb\nNnaScVyE0ztx26nNaR1EYrSMwcOQoz+dY2oRBULAYycEnvW/fR+fMSqYCjP+NYeoBZXCbjg8\n/StEyTFvHGAAMKOtV1aPC4LSAnkg9KvTKQrqAOMgZqi4SCNkTGcdAelUJikh5NsZ3c8hjV2P\n92pUp83ZcfqKqRmOLy93QDk461YXEbbmkJ9F+vSgZKYi0bPgh+KI2+8GORjqaEYRyIdrMjdR\nnpUkqhYQ7AFN38PpSAZtG7lvkxyKbNtRUXafZRTyBIxwflHBpyoJC7qPmA4OetPoIz5iu0A4\nQZ5Apnkll3BxgHIY9qs3K7lVmA3NwSBVby0eba0m3jp60CIGk279o2yfTg+9DScx7ieBk7Rz\nU0zGNWQ/MAMZ9qh8wKqeYPl6Y9vWmBJLIFQtEQ+4fj9aSN04kBZh0ZgO/pTYUaMMWTKk8N7U\n5o90gIJA7MOKBPUYgKq7uQeeBUm8bSA+HYZ46fnR5aFWLDavbnqabtZim4r0xu6VaQjX0mXy\n5It33QM59a9N8M6kIZOpaMDgr2ryyynC7cxkZ+XH/wBeu/8AD90sZiCgKO5FbQZhUj1PcfC+\nqfaokBJAzgA108My+ZjHOfwrzzw/dBTBxhnIPvXosZWeDMaYPTNdBzLQe3LAsPkYVH5hZyoU\nkdmqRmKxqo5KjGKc7HapAwO4FMERvjco6Z609WBDZXbgU3cG9sHIpVlYF8jII5OKCx7dgGxR\nDleJOmabySCOdxHFSsGbdu5PagYxcqzAZcZ4p6Rjzg024DpjtSjeiDv7UvmPuEbLu5zSGhwY\n5K7iPpUkcjLzJkgHimeWCxYDGalKqqhN+W9MUDHBGfPXrkVOuQ2CvzYpkbFgVOUIqaOPcwbO\nSRzQNCBhu3EbRip1JZVIYY60xodzH+L29KVlTgLnI7CpLHp8ytu457U9mXbk8D1piBo8E9DU\npUSMVB96Qhu1P+en6UU3n0ooKP0kydxBpy/KOnSl98cUL9OK+cPVGltygkYpBxg9RUnTkjJp\nvG72oGg3Amm/w8cDvTj97ikAx9KBCcnlRzR069acrFs7RwKXhhnvQMbyOaZu2855p7KRznih\nUBycA0CE+Z/pTtpHsKePu4xijae/emIT72ccGkZiqAE07heM80H5gBjmkWI2F6DNMZi3fJp4\n+UYxk0KPmzVE9QXKtmlb5Bk9aG7HvQ+GYZpAxNzUKfQ07+I01VCjrzTQhwU4FJkbqUA9A3ao\n1+9gikBJ3JAprZ8vOO9Lzj0pGyVxnigYinLbgeKXco5HWlUBVHHFJ64psQu3PJpOSvpzTsjb\n0+tCgdxxUiYg4b8KVSVU9+aQ428daUZqgQ2T1xTt646cUjL83XNRs21fQUDEkY/SjsKb5m9v\nanAFhQA5VKtntSLup2NwxnBpcheBz60gHrnaTmo9xPOOKkFIFy2AcUAJ1WgqN2elKPvHJpjd\nBTuA7he1SOqzW7owyCp4NMjO7A6VYj+VTkZUigk+QfjPpf2PXrmNsMMkE/r/AFrwPxBD9nb5\ncHcK+ov2htPSPVg4x5kg3E+v1r5n1yANM2/7q0/UfQ4fU4xFggdetY825VkI5z61vapD8xI6\nelYknfb8696ogwr4t5eHODWZLIFkKt/dwCK2L5PM5ZcDNZc0YUAnp1Bq0SZ0kYiQgnc3fFU5\n12qMH5vStGaELyxyWOazJlDXRGSvHU1RHUrbtzMeFUdqrTYKgtnb1qwSvmEKdy5xmo7pG34Q\n5TpVIOpnyLuG7djJ4prRm4kUbtoHXNTzQhmTAyV5K9qjR90mcYBPIqhleRjJJtC4A4471XXM\nwKYEag1ebaqFRkMTxxVWKDMhzw2cYNAmRyMbhQMbRj71QbhIjDPTjBp8khWRojk7TgkCgxfK\negU/nVCK6xvlV5APWmurRnBGfm+92qyY1RgpZicZxUU7HaqOflz2PNAEU2S2SuCuMe9I3zMx\nGRxzUjQq26Mkl153H0qNjGjKF54oAY0MhUFOijP/ANams22PgZyfmFP2nY+wkHr1pjDcmCMZ\n5IpgMbcyfKwVM8+9MUBnKhMKozu7UbUMuCmSOc0KrRxkE5LHI+lAg6Rk5CqWwKG3bhuGOxpN\nm7eQcN6Uj/LEoXn1B5pjHciQpjcevFMZWJJU7R0xT9pX5QcMaQyFV8sjLZ60iiOVOBtUAH5T\n9aY+VQrkZXjNTN3xz7U3y13LwNp5NIkZ5g5wvCjPvTNx3D5cnG41N5a53kgDOKjWXcPmHy+v\naqQCt80nAAOM1FtzGWxznAqSNtyl04XsKXcJIgH4Hb60ARzfvyoB2t3prKV4PPfIpW3ZB+8c\n4NKyswKjhaBjFYMmd3JOMGlWMs+w8Y53ULHvjBXjFJs3RsCSWouIDg5xyKQgLgKuSaUONgHQ\nE4xQ2I5SN3ai4DctJGM8bTmkRVw2WIPbNGNwyelLgNtOc84NMQ18rjDDcRRuO1C+OOD70rfM\nrcDPY0wYCBCdxzzxQMduVWwvCN+lMVgflJU7TwaMFVIQbsnNJGFVScGkyhfMLM248d/pTt44\nZV+XtTfMIYZK9KRsk9Tk9vSkSwQsrN3LHoaCu3duzjtSvHu2k/eBx7U1vm3qMtjkiqEI5G0E\nAgr1xRIwZQT0znNKu5MLjK9c0nysHyPlP6UANkl8tSIzwfWnbgGwOrAUm07SOqDjnpSx5ZDg\nduD/AIUupQL8uATnnAHpSLGJGdP4aFbcoYjCdMn1pZIyoAB+Zj1FUJkbfLGEx3pzsWQDOABg\nUm4quC3OaGyF6/NnJoJF3rtUgYA6ijO/LZLAdGoZdwwpzmhVby2O4Mw4osMbztClck9aQ5OP\nk2gUSljtIYgdDRKflKjr3NIOopXc3XDHmmk7chiC2eg6UNtYDaflHX3pWkO4MijGO9Ax7OCC\nSOoxTI9uFABXHahlY4CtuJOcU1v3me+D96gQrEIC2CeacwLRiQDljikx8xXO/I+XFJg+WrMC\nNvBUUgG7kBY7izjril80NGDj5u1OASPJwPoO9JGoDH92W7jPb2oAMFV4J5pflV+ThqTupLY3\ncAUNGN2DywNMBPMCuRj5m4I9aGTkuy5A6U5oxuDbhuHrTWh3LnfkZ+7QA1VZnRicOfSl3/xY\nznrj1pZAcLkbcGkHKY2Z55FIYSBZY8r1PG33pjYZVG3B6in7TJlzw69R7f40Y8zcxwuP0piG\nLiQ8DLfzpdx5BGGHNCn5So+Zui+3vTY4ypO88j9TQAowz4x8zdDjilVcNtY8g9T3oXcpHfno\nKbz5WQuG3d6BjlYs5I4HQVHy2fnIXocetTNIqt8zDPTaKhk7Rj5ecmmIVVONgOQeo9Kawl27\nAMnOKAW3MBnPc1MZCxXbkY4zSGM5jYMF+7xSxMi5O75m5waaY5VjLOcZPC0nzMFyPlxj60gQ\n6RJFYBmGG7CmcbeRnafu+tL8xYHB44xQ3XIX5vWjUQK2cyEYzwAe1P3PGo/iPpUcbbl9WpPm\nXcQfn7KaB2JAzcbRtLNyPagSFiw2846mmM2WUn5SP4hStjzM5yMY4pgKq8KG44+8KjVSVCP8\nxz1qZkAYKWAbHUVEEVW5J8wmgYMp67vu8ULuMxUjEYP3qWNV8x1fl+3pRgqpCkhyc0AN3FlI\nxt+bn6UuDyvYncT7Uq5i6gNu60nzKzdweuOwoEEu5GBwNrcKcURs6scDGRx60kfmMwD429s0\nKdzMS3fimIT7PtkZC4BXk5PekC7gNrcHvR5acvIcEjGabnYpBGApwKAHcKpB6fzoVioU7do6\nUg2eXv8AvPnGBQ25VDOQecBR60gFVY23EDB6UH5EXJ4oit2Ksx57U1SFyCMletMBy/dCk47D\n6UrMNpVePU00v5rAk8LSqm7e+cccCjqMVVKSDHK46mhsq2X+Zuue1JuPkna3ygc0LuZQD83G\nRQJgyszrIo4/u96FmDfMG5U4GBxSqu3Jck98UiYZWwQD1xSAAz7iWGQeeKRc4YKvvS+aN27q\nCvShH2xHIxnnNMB0kg2rs5fH3fWowo2kHp1JoZkaMAA+uaRVCSMrHcrLlW7UxCMwTkHPpUqt\n++3CPI29aayloT6LSJJmMqM9enepGCBQhJG45+6O1DSKinhiWOMml2u7gp1HWkZmkGWYDHb0\noARV79ccCgYGWxlqQNlsnkd6enzAjHyn9KYA33ckBlI6ZpkjTYXKgDpR5Y4Vefc05mby1Vl3\nRnqfekArBl2qwGevBpNqrks+ccYpNqjLDhVpWZWUhhgYzVCEeHyyqqevJ9qXcD/FkjgUA74R\nhunrTFXu3yL60wsKkYkYs3brSfMfkZcAdMf3aemX+UEAH+Kk8wrlWHzZ5+lSMRcNuyeAfloi\nXGSXB9T6mmggqyLzk5zREu1dig570AJnCEN06kgUu4ueueOOKf8ANHAc43McCozI4Kllzxji\ngBW/1aDGH9/SnO/zYccAdqaf3jBjxzUi5MoGM981QiJmDsVC/u24WhVdYz/CF4PNEjFVCEbs\nHBxSrmNSMZ780DFjDNgrwyjtSj5m4O12HIpFzsZV6MM/4UrbdqtzkDkUgGOfurnmpGWTo3zj\nFNKx7Q6s3XlaVv8AWYBwT0pARtIN2W4PsKkB2OCOcjFKg/dlywORxTIQWIDHBHP/ANahhsC7\nSqlgc560rKPMB6HutKcOVAPTnbSDLzZIw1IliqzeWcjJzgetMHQkAnB6049CWGCDxSNIXyzD\nd644oGIqk/eJDdTSqy7ivTimtgsMA8jIpI8HOTg0AOZTsLtwnTik48tT15pFUshxxzx9aXb8\noA+9nn60BYVV8yRh90AZpDlkyo4Y8kUbiuQcknqPWlXg8HbjsKBAqhn+TgAd+9GflbdwPSkX\nPmOcZ5x/9enMAXGeOepoGhrOVjIIA44HqKazktGFjwMcGlYeYxBIIHTHUCiRQ0fzN8gp2GMb\n5mZhy3cU87lCDbgtSxsi9vpSu3zhsEntTJYp+XcTyc801W+UjHXpR8m45zyOaVlYKh3YGelS\nMZtfywS2MHinq21chck9QaVQq7435PUZ6U3lVViNwYdKroAnysoLLuxwPWm7k2hQu456U/ew\nG/qQOfpSbjtxgE4zzQhB8pyAPmHakGOdwwTTIx5bBDxnncOcU7bulJJ+6M9eKABctyeFHAqR\nvmIwMDHNR5LNkjDqc7RUv+uk39M9QaQCYCyIQmV6cUnz7z8oIzgc0HKqAp5J6UkjFcqF+ZTn\n6UxDizRrsKbivHHrSBSyhsexpT5isTwSwzmhpJGYE4C9DigYITuJxlR2po2+U0fIUnJFLuKZ\nCj/gVOjk+Xeo5PBFKwDC6SAFclfQ1IFEcxboMZ20wt52QGAGOmKRWQAB+T0piE3bQWPyknJ9\nhTWmVs+WCR61JIqqx+Xdx1oXavyEZ4zx1pgRrnKhzjPIYUrMGSQfxDmhXy2QvHpSgOkgBIG7\nvSAHXKq+O2aJVO5SBgHB5ofDZznIowGUHPtjPSmMU7MkFc46EUKG2tggDofWiLH3uu3saYGE\nahsbix/KgYoIZRGR8i8mjefqvaldmVsleDS+YU6JjtyaQhnBY7SWbH3qdtOxF2/MvfNDbl3O\nOBjG0etC4JDEnpzn1pDYBCykEZYdN1IAwjz94HsTStGX74btSBfmAB49aZIrf6ncST6KKbys\nYcLknipGUsu4ZQep7U3LFQGycenel1GIVRlzn5hzimh9y/MNu41Jt4JOMjkYpmC6AE+9UAcu\nwQD6fSk4aUipEwsh29MVGfvEBcmgCSRdyAs/Q9KDH+8Chwy9TTV8zpjBPJpqqyofU+lIB3C5\n3HK54AFKu1sMQRjnrTA23Bxlh6VIGLnkjJP+RUgNJC5YDryKU56MRz1xScDcGbv92nfIV3BM\nHsaoBrYXoDtxShfMKnGG60L2ZjuX0pvDcq2fTmkIc0heNxjBz+dOmj3RqxbjvUcb+WxBIJ9K\nkOWIOMLimMVju2shwuMEikXauFIJUrmhR0/hJPFCtubeR0GzFMBAytGojwpB70M5ZWVB360q\ngMy5jKds0FvMU7zgLyBjFIYSb48KJA3cbR09qeMtCDu+cnmmD5dp7t3pdpjldAdwxndSuSOX\nKszLzxgikjZdrfJhu1Pkk4DKAOOaRm8yNAwwCcimMbGR+PTNBH73B+UjkLR5m6QlOHHf2oON\nyu20Z9TSsMP4clSAo596VR+63ZKqelCr+7clsf7PrSsv+jouST/dqhCN8y8qTxxmlj28rt+8\nOB6Gn/OOwJ6dc0zaBxu3OaWwDMhFwwyo/Q0rKrAZz9aFKrjjJHelM2/g8+lAA2d3GMrQm3fz\nwT1psoVWHBYgVLlNuX5GPvDrT6CIlhEasIzgfWnbiNoCgmjaiLwCSRxTvLfrjbxw2aQDTlk3\nDg5xinPuVRnBGeKVMqwBfcfSmsoDHblivO2mKw4htpx0/lQVXhyPp6UrOYyQWwCM4pFBYYJy\nmM7aBiK4SMgrncaesbMNvJI9aWTaMKEC88D0pxV5G2K/GOSvf2oJFOPMCMMYGcipNqeWNx6n\ntUSHd8rAIR1OecVMuJGAH1FAD0TGT0qfyzuUlgTjIFMjUzHc/b+GpP3cbMcE4PTuKAJ4w3Xd\n7n3q5DGQFPl/NnOKqLGjpjcwB+b6+1XIF3FMuQnualiLybQoBxgnOcVoRsqts4xng1nRq0jj\nKliD1q9bxM83mMAQTjb60i0tDStSZPlXaD2PWrIhkY5XnAx+NNt0WPgfJnoKuWyK4Zc8+tSB\nYtYRhRxnGDmra2zK2ODxUSRBVU4yOgqzHtKjYTnvxSESeW3k7GQFf61z+rRlyyoCzA46V0iK\nzLkbiKoz27+YY0K55JY9TTGcbc2ZVdmzDH+LvXLa5aqXMcoK4wR+HavRL63RYdzOVZedpriN\nQt5pJnEuCrNgt1plnLzRna0ig5zy3pimGQLnuCM1buVJd4ydqpxz0IqGzsZbnPljO08en0oF\nYjRmMfmKMMO2M1r+H/DtxfOHljk2sc8jv7Cur8A/DG68QagrPGzbG3EgfKPavfNC8B6V4fjE\nsiCWfH3WHSlewmcD4J+DrTbJ73bb26kPx9816QZINLh8jT7dF28ebjmrE1wZAy7SiDoo4FU/\nJ3Z+tTdksz5lkm3vKd+TwT1qPALDcOfU1otGPLyQSc1H9lHOVIz600SUNpzgD8acyMqjdzzV\nowHbtyAPrSLZliNxOBTFYpFQJN2SQaVV3NkDpV8W5Gen5Ux7fauR171QWKflMwwpwTzxTZIW\nyA/Ixmrnl5XgkHHNKYcxrnnPSgXKUVXEYO3JzT1YrkyLk4qZlVeTwOlDJtBA5zTuKxXWNXxu\n5HWk8vdMw28YqxtG7HTHXFHllju5GaLisRQxfwhQgpu75mKj2qxtCxjGaYqFDwRtqkwsQFPl\nA75pZVdh6ds1YktizbgflxnrSMq+TgKW96LgVv8AUp8i9sZqNoTw2c4q6YwqgLzxyDURX92B\n0FAitIjeWcnLmli+UZ28jrVkRhlLbcmmRosjfKMAetAiIjzMjdyaTy8FVz9FqxwvOMc4zQIT\nMc5ww6MfSqArqqmRlCgLjrTo1VjgHAFTyQ7VCryRzwKgjHzEJxnsaXUB6gqT5YH1prFmUN90\nZx060+PPlgNxzxipFYqrJtyGP3qNRFW4VtoUDnrUbqT86DBHrVqWNNpGDu/vU0w5XHbtRqFi\nJFZRvPynFPWFeucZ5PNO8t8YYYX+dI0YZh8p29KopITaFVSvDN2FJ5bcjG0Z607y/ugsEcdK\nWaN2AK8UCsNm2sy5AHGN1VgnncAM+D6VaO7yyhXf/tGlW38pN4bHqB1pCIZlLAKoJJ6mmNG2\n45wQKkjXymTjLHnrUrA8rjkn71UhMr8qvTAPFK6Hj5i3bipTlWCZUg8ZpId3zAHbj1FMCJlC\nE454/GkQZxs4x+tWWUMNwOOPxqMgwY3Djtj1oJGrIWYrtyQM/N0puQ7A4wzDBFL8z87fLcHH\nNPlYxMpIAzxxQMYqmNNoXLYxim+W0gVcjcvbFTbWJJORTtpYBlGKYWG7d0gBx60nlK0LDBU5\nztp2NkhGfl707yVBypOf9o0gK/khO+DinbCxdQNu0VM0m9cbM9s560MPNwV6g80xshCMse0c\nkjNQ7RcJtIwy81MzbW2MvfOfakCn5n4UdNtUSRqdyDKn0FEajcwdd3pTt77EUp3+9SMvTtz1\noAJlbAwPwprRiQr2AHalWQbmKqX7CjaVk2DpjJFMY1VxuH3SvIPtSxZV9y5AbgVMgXcAc7ak\nMaqpXJB6j2oJK/k+YrAYyKdG2cAjbxjmpIY13HccnHOKWOP1PIPWrAd5OU2HOD0NAUjOWxjg\ninFnbevGR0FN3GOMMwy2ORTAT5lbcFGO5o2H7ynac9DRGp27cbs81IVG5fTvRcQIxjYk8Ie/\npSZEbbsgjswqaTCqc9BUWSzYKjbjIwOKBXECkJhTtdjnmlClmwh/PtUy/MFLDB7Gmhg0n3QM\nUAN8p/mZmVmpoCiQZznHJqx8pypOD14pFVWVgfm9qYxixhs/vAg9etSbGVOG3rUcce4sP+Wf\n8K0/aVi+UfN6UhCzKGxgcAdKFG23UfdUdqVmMcYLMD2pscg5B5HvUjE4MnXBxxS7hI2R1HX3\nqQtvlDrGBxj60mzHzMcH0oKRIuFOWbHpUltI6qxPzMe9R7R1X5jjoafD/rVGMHGTXNUXY6ab\nGT3Xkx7jz/SoI7oq+8fMTyatXc6KzF4sAjoaphY9ygNtJ6iuHqdRauPLueTnpnae1ZJHlzFF\n+83TNX0bzMxbtpzwazrn5JnDHLevpTEUNatRfQ/MNxTt71wOoW5WRYxznv6V3s0x+zk7tozg\nD1NclqEbecVdcBf4vrVREctIrxs5zwO9Zd1GzHa8eM/MCa37m3MYZDyM5P0rFuGaRQN24Z4P\ntWnUGYl5GzcmLnp8prMuiryExj5+64rbu12LIV6jpWVdArIpxyRkkfrVCKu53U5OHIwTUisz\nQKRjAGMf1pkitJgROFGfummBSdwL5brtFPUZfVpWj3KNy4+6KmCiWEjb5W3rjpVWJiVQA+Sf\nTNTrI7K/mDGOB/jQMeyrNt2LlMYLKeDTRA/2jcsmFA4Sn7drGMt5cRxj3NIqqjHaOnGGNPoI\nguNwjAxtBP61E2zawYZkxjFWWJaM/Kp54Gaim3ySKCuzI5OOtSIotbtyN5YKfu0f6tfnXL9s\n9hU0/wDrXKcxgc4qHGG45IHr2qjMWRTI6gv1GdoPA/GkZgVIAJfOODS71kcxKv8ADkelI4RN\ngR8567e1MELnzmCs2XXnj0prvGXZ5AQgPAHf0pd0as46AjLe/tTfL6NHlQeQPSgCxFtbKeZ8\nzcleuPSui0G4nWSEyPyoyu3+tczHs8xedvfNdHpOqRxQqCFfb/EvXFaRdmZSVz2TwbeN8jzt\n5h6ZHQV6tp8zSWqMuQOteGeEdUUsqsdqnkkdK9q8NXSXFgQOew966tzmas9TZUbhuz8xpGT+\nAdepNLJuG0Kyjih488oM+pqhCiNYsZPXvQr4yAcjp0pFjypHTbz+FDLtwVO4HnFNFAv7uTlO\nR0NSNIZZORk9TS/e2kcEdRRyzltuPemMcDnG0Yp3mN1C/NRtAOBye+KT5l75FIseNxGTxT4s\nmQd89/SmeU23hvcipVwwCq31pASNu84FxkYxmrAY8HGagAKjA5qZVdcNxg0hkixnzCWbBojU\nliB+HvSbtxy45qWLv2xUlCFiyhSOc80m4NJjbjHenRnZy3LUvzSKWAHXpSuIN0Xo350Unln+\n8tFO4z9Jv4aU+gNNdttKoHXNfOnrhux1FCnnAFGWOcj6Ui+nekIcpCvzg0h9+lC8Zz1pG6gZ\nz7UBqCDbn0pVVV56E0vHHYU1h3oANvvSfdPSgnaD69qcxGBnpQA/cGxnikZj3Py0xvm6Uqrg\ncnNADlxxxkUuOeKQnn5RxS7gO9ACbuelJz16U7J5FIfmXNAhS3IyKGILdKGyfpSYDfhQMdtw\nvPWmKB0xRu796OnNUhDl4zTdw3YofgA0uMLnFJgDdaQkhfWnfexijaNp5yaAEx8vpQzBcfLz\nTjkYOMilzu5OBTYxn8Q7A0rHPGKcwB/xpv44FIQikMM96Xle1HGz5etJuO3aeaYCbt3TimtG\nGXBNO8s0KRjOM0AMWIZIWnYZVxipF9hilwcUhEbY2ikU89aew+TaKbtH3sZNDGObPHpQvTNH\nJXceB6U2RSuDn8KQDgOQaazEN0pV+7nOKQ520wJCu5uKmtxuG1j1NVRIVYDHFWYZPmGDijqI\n8E/aO04QyQzY46Bq+VtcjEm4kgCvsz9oexW48PxyqOY2wV9zXx34nt+WXP3eabEefat/rWX9\nfasSRfLYgYX2rodU2sjYPU4Ga564hYkFxk4qyDK1H7mAe9ZFw37vbzlenvWxdKFQgjPtWXcR\nlVJz061YmZF1N2Ix71m3OZMlhjHT1rQ5kYhl5J7/AM6hmCqAGG8g4qkiDK2yHp90c0kas0bA\ntletXFjZcqg4Y8jvUbQrbyNlePTtVJCKUgTaTyB/nioAoK5j+VByc9qsSbmXAxtJ3H8KrTZb\nafu7uaCiLzS7EgA4HBqPzpLhjvwjquc9M0nmIrsQcnoQO1MkmHmA9COvHaqEM52kPwWGeRiq\n7Ltwm7LHnNS3e6RUwxcZ6e1MX5QTgbd3GKBDpJFnwCcMo+99KguMKFZhkn0qWSaNN6MhBbrU\nABZS+8HaMAetUIdvEeXHyN02nrj1qJpkkkJwOBjNO3luWPmP/L2qMRhV2kYyeooGNZt0TKR1\nqMpmNSD8wHOakaPzMDrtNKQvmEqMDH4UCIWjbzlwwzjJ+lRbh5Z3KWfBOfSrHHlnH3s8Go9w\nMZUja2MZplDVkQhNwwcdRSRlk+XuT/F3omhjCowDAgfe7UrZ25YdO9MRGu47txwwp6fdB3AD\nHSkILJ1yrGnsq4KqmMCkAxWAPXI7mojlicN8ueDR83lnjP0pOAuT8o6YoQCyMDhSOtOZDHg4\nDKBjbTFDquWGVzjil2mMl+cevahCYzdtTG3De1K2O4woGSPen+Yrc4+b+7UaiOMt8+/PLE1Q\nwZdrDGFYjg01kcxkK2eafuCruILL0FMUHccccZqQF44xkDvQZcZVRg9d1RvIeAozjk09WVW5\nXcpGaBDJF3YPB9PrRwVOTn1pwVEXCjr09qI1EfUgLnBqikMjZBlME55yOlN2DGV49RSfPtI4\nB3cAelKdpYhu/al1ECqTjHJweKbGuVAxuYHJFLG3zbQpTHakaP7wA4znFGoxrZWQ+X8wJpRG\ndx3HjH5U4Y3ZK4Wm7T5h4Kp1zmgQmGAAyDH39aauZEPy7GU5x7UqbVzubcWPShcMxIBDfWl1\nEK3zjht3fb60vPJQbPVaZ5YWbcBtGOlKvDZJ259aoBNm1uMnPUUbQoZe3rQdyyYVup60RpuZ\nlyFHvTAI2VVPdmPBPSmKoDkpuUjjNL90EfeKnpSlvlP6VI7iMQvBy4PWh8xcBecZ/CkLEYVV\n46mnrgtuXnIxTEMCo2ccsRupGwACxLHGduKeuGHTc4PFKdvmfMCZP7ooAb0VZY0OO6mjILld\nuMjPPShVMjYUsCePpTVXdhc5x+tMYsbbsrvGB2pgYg4J2nPWnDasZwu3mo9jyuCMbc0iRy7k\nJxz2o27FIJycc0NllILYbOAKVoWJHO04wSaQXGhQQu3p70KoeFgXAbOcnv7UOwwAB8oPIoY5\nkPHuOKYxdxVQqZGT1xSspIPUsO9KzkxqR1PBxSPJ8m0nL5wKQCYLJuA6UeYWXhiSPSl2lWPY\nelNXfuwq89aBinO1S4BGevpSlSW5OB/epu4NkOuB049aOCwwdr9D3oAFx5hB5cDr2/OldWlY\n7G7crTFxHHtYZXPanq4jVmEZUdxQFgOW2EjjptqPd8wAY5zmnrlQNh+XG4Gmx8bC/wAueuaN\nRC/I0zEMc4pshKlQvzcZP1pVYMXwcc4C0DaqygHHHNABuGQw+WQ9aawCyZbnAp7qFhViuCBS\nfebGe2cUwGCTfnIPzHGKUZ3BV+YDn6UivtUbxhs8YpFVdrHO0570DF2puPc9RmhgjNvP3upA\noVflJbikk3eUm3jByT/SgQrN1Kn5uv0pWby4tzfNkUisVO4jLH9BTWUnJxnnqaAEkYsiM2c0\nqZkjYvnb0FObHyoxwetDlgw2Y2enrSATmRI8MEUd6GcL64J4pfl8pgQFBPr0pPnCAnDDoKYC\nK/ybtvyZw1OzECzBWUYwCetNZT9wjCnkkUvzbgu7J7CgBBt2ZK4Gc5PenRorZfG0U1S8jNuG\nPrQZAw2NlQDzQAiJmTLH6Cnqw25AywPOaZJ869cEdDQWbeNnK45oC4cjLA5z19aaGZhhD/8A\nWpdoZBngk0zPltggkZ60uo73HYaR8EgY70vzcJtzzz9KYuOSRwTg7adzHHiM4QHjNMQvG4nP\nfAHpTSy+Sy+9KzbecYLcD603GIW3HDd/rQA/aY2aHbu74J6UXSqXXYCT1KkURfe3seSOWoO7\nqpxQBG2FBjGMnk07aFCjcvHvmkwOc4YEciiONWX5F4FAA3zSfI2GHJT1pMkLlSBn+I0ileSR\nkdCaVY04G7cRzTAY3yqzfeOOTRtJxz/wE08fOuT0zjHrSmRWX7v3uGNMBEUNuI4UcbfWkZiq\n7DkE0qx/IyA8D+HvTpAOBuzg7cUgGtvjYJnIx1o3DzF4wMc0BOuG3KDjNHmbvfJ24oGJtAYk\ncg/pTsBm+U4OKRlZfQdiKFXLcYz1oJsDL8qnO0+1O2llZSwx3zTdp8zOMjt9aTbtXcRyTg0D\nDywN205FOkLY3Y2nrxSYUDGdrCnYJkCseGoAjUqW+ZtuR2pI/l3q3PHFLtDNt2/N2oO2Rcnj\nHt+lIV9QZvugpgYx9aJPmVdhwM/MKNxkcMGCr6Un3Mk/NuNAxGG1iVzgHt0p3+rXk7+OAO1L\nub5gowmMGmqxjXpg9s1QCuvlxg9Q3GPeg/6o59cfSmv/AAkNkZ70sihmYkEY6nHBFAXFk2+W\nM449KOcAE4BoXHC44boaQMfuHnnvSARjvY7VKkCl3BlYAZFORiu4BTk8c0xY8RuuOSeaBCfN\n5gCDoMn0pdwaQhT83X6+1NkZ41wDgU3hVLYywOAaBE+8SNuI4A/I1GrlU+ZsgUpj3BSob1bF\nC7F3kDaTQAkfltK4LE54AokcR8H+H0pvl4+b+Mnk1IzGRiWA4FADMK3Tgnk09SFUjqCOSaQb\n+/3uox0pMOI2dyNvcU7gH3cBDzjqaI+G3Zx9aPuxkleP50i7G+8SOOKWow5bOeacMNgjg4xS\nbmEgQLxjlu1BYZwRz220gBdqqA3K9h70knmKuFAKkZ+hpSp+9gOO603b5i/K+B/doGPH3Rkh\nTt9O9NVmxtPDe9ObHkjBG7PejlY/n5Ycj1oExNu1SC/Oce+aad2CDwOmaVgAp+XJzzSMm+Lu\nyjmgkHZlQYGD0FDZbYxHUZ6dKThl+9x6US45AbaN3FMYfdUNglc/lTcx5IjYls5+tPVmTpxz\nwKaFK87QMHlh70w1HjGGfuvTimYEjOx+Ud6dt2/KOh6A03eWH9184NKwCMrKykA5bk/SnPtk\nWP1/u0pY5xkDHagkYDLywPSmA5cbmO0IR/EaijUNlnHy+vrT1++XYbgTz7Unlq28txt5oKYg\nZeWxk9KduLkZQ5xjaO9DESKh24bGaNrli6kllHT+77UyQYfdXbyeOtNZd6jB+6cUvzfK3U9T\njtTtp3BlHGcjFTYoYRlgc8+9Oy5k3bcGkXjzGcYP+elDb8AA5GM+9BLBmzGeTjqRSFC+2QHA\nZcbTTlHmKwA5I4pp+WMEthugpiBVCLw31+npTSUxtIxGx/KpPk8s5XJAzSMB8jlNhxxSLGoo\njYDGO2407eGY/LkKOGpjRlOpGDzT0+ZcdscUCEZfmQA7u5pzY3bguR0pqyKsXK80sm7jZ0YU\nyQ3FMEDdz+NO+6rHPGc7v6U1njXHJOePxpsY3K3zc570xseG24Ln5euacrKd5Xk44xUbLuOT\nyOm2nRKY8qgyT0pAJGwVSDxkY6Uzd+8AZM44qwd7Kc4GPWq+4M2SSEH8XvSAdJnG5uBnGBQM\npkhg2RgUpzwS4Yf3aT/V53DB7VQCR5ZPm9MYpPvLk9R0FIqt5Wejbqc3yyYJATrigBCvT+Fs\n52mlZfn3bQQeeDSb0LHk8fxGlXYsnIxkZC0gDcQpOcCnNny153k9hTAFYlWzjqQKbGNsfJwM\n8fSgB3ysqkjevWgqjseCw7c0kifLkHHGKRcwRjawyT0NMB24bd20jsBRzj5ucDPSmsrI2W+6\neM087oWPIA6kUgBlZVGD85GcH0pgbdGGA5Bp4Jkk35xn19KbJtbJHCr6d6kLD5DIV3kcnt2p\nPmUDPBPehlbC4fAxnmjd8wYZUZwTigBHjZTgYGe+aXJ2HOCRxxQMGRjjcp6MaTG35t4C0xoO\nVjC7sHuaeQ7ABR81MYHgAjaTmmtI3mqTlgDgU7jHtIcAnkg8ihQqy78EL1Cmk+7u4+bOcUvX\nIblT/nFAmC43ct15wKCoZgCAAvI29vekRlPUbewoiiKs4JIyOc0Ei/xE43E80p28Fhk/3QaZ\n5e6Mk5HYVKG2qCw2L+tSOw1YxD/DyecUu1VwFBCnsfWgLuY5J3evpSSEN1OSBwBVIAVV4UjG\nTjNKcedtOduaFXcFIX5h70MSykk7W6GkOyFf+LjkHj1+tDEIyhuWYYB96MAY5ZTjHSk4dSpY\n5XoaBD/MYRHncy9KiZj8occtyf6U6NTGzOSCfSgMHG77/PBoAVmLybQoUY70u1lOEHy461Gu\nfMw64Oc7akRgQzKSg9D/ACoAB8o2MeT/ABelOZG8zDsM44A6U1fvhRy2PvUKx3EAdOtMkdvS\nOMr1buR29qJPLZVYphv7ppu5lTBHJ6GnLiNhkZAGaBiKx3MHHIGRikXcxDSfKMdqUOBuDHBY\n5B/pSSM27YrZz19qYw3CMMc/jTjkAYXcp61DcNu2qeV6HA61NyrgK2+PGCDRuAm3c3ysOO1I\nuE+YiknZduU4A9BT5V8rY+8Ekfd9KAEdjHgvwSenXrSyKsbYDhsjoKQDooJx13UgEcgK5JPX\nNAuo770Z/vdA1O2+aFJBAXj60oYCMNKue1RrGSo3HAB/OgAdSq78YGeae2GYlMhccKKJEcKS\np3RjqtN84lgVwBikMe2CRhOAOaex2kKFxkckdKjZQIyw+XnkZoZjsXBIoEIzbm3FcKTgH0p7\nxs0arjy9pzwetNaFl5HXruqSRRuV0zjGW/rTEBhLMAxx3/8ArVYZy+1guzB6UzYHjVt2VPTN\nSqwbB5MeMikFhIGLPuJ4Yfdq1CBNdbWcK2M4HSoVh+75YAHerEO3zNxAAxjHekJlrBlQDgY6\ne1Pt13OEzjByfeoYk81cfdwc1ZgjK7cDPzc0gNSJl5dSVA4I9au2KhpNoYKW5+btVVVbyyoX\nAU521oWkBEe502tuzz1xQNmhFneMbTGBg+uasWsMk8528IBk471XRdrjHfmtGzjwwKcknjaa\nh7jNW3jDRKgHGOvp71Zhszt2gcf3vWltoRA28clhg1cDbkUD73Q1IyGOFlUuo2Cq09i0jCZF\n2gnrW2sIC44zj8qa1qkkbjf2zQI4TV7c7SduS3UmuU1LS5T8u7ceqgcV22sQeZcAru2oMEDp\nWdJavdTR/uyVJx060rlanFw+HZrqTIXzR1YkdMdq9U+G/wAJTfbrq+ia2tMHHHUV13w/+Hqz\nRC+v02Qr91GGMiu+mlC5hgURwAYCdqV2Iz7O0tdDtRaWMCxRqMbsctUbIZM55arLR7QHPTp9\naQKBJ6e1AFJo9y7SSDUTRnaQVwc1pNHuUnoR2qJkMjEjAPpTEVGjDMBtzQ8PmL83X9auNCRj\nbz70jQruySd1MVigbVVXJAao1tSI8KOGPrWm9uFB2jjFN8nEYGOv6VQjOaHBC7TkfrTWt/72\nMenetLyjyV52jmmqqNHvMfNMDNaI7NoAHeovJyoI4xWssai3LEfNmoWs2I3cAUwM426iM7vm\n71GFOflTIPFai2u35iPzojtdrZbjuPSgDJntSoCr97qaCrRrlSGXpWo0aux+XI9aryWob5mG\nKZJT8vdglTilMR29lq61v90Z+Wmy23zY5UHpuoJKbwKzliOoxTWUbAqqRirc1vtYFW3EdaPK\ndmwRtBphYpNGxYYx9aPL/dkkc/SrS20ijHUk85oaPauCCo96LisU2iMQyucY5pGUqBkZHqKt\n/LGSD8xIxSLCrIcjgcAUXDUpojMPlFPZW3KSflPYVY+zmNT2BpRGNgz17GmgsV1YhmI7elRS\nIOxGTzxVo23yvhsUzYoTGOOhpisVk3FSzfQYqWTf8gH3cVIiKq/e+QdBSbTEF+bJzkUCsVtp\nZunTvTeNpVs7iastHyxI9yadFHuXePmWgRAIyyjbzt605oztBPX0FSeXlcxj5PalZOMg0FIg\nkVVZeOT1pjJuyQ2CKmZcnnq3FJGm3cB1HBoAhk3yBSilRTlUr1IOakz16mlWMgYKc9RVCsUy\nfLRWA9s1IMs6nAHH8VTlQsP3M4OBRNjdkjJI5NMRAtusjkgZI9ac8bLGSeT0/GnRx7c7T2p6\nx4jVmcsxP3fSgCrHHJIh3fK4p6grFGzcuf0qWTO4vj7v6ml8sdzk9aBWIGjG3OeM8+1NdSV2\ng7+fvGppI+menrSKv7wBfz9aLjEVCzYJwBTyqsBxg55pyDauCMknvQ2W+761QiPbtZSvKHrm\nkWNOR97mp1VVjwRnFJGpaM8BfbvTAheEoCc4X0qLyFVfkfDE5JqwSWUg9aTaFjx1b2piK21v\nMzjoeDSCLbJgrgE1P8wbdjjpilZMqAByDzTEQhFGdwzzTH+V+F3JUu0+YS64SjyD8xUYHUUA\nQkO0O1Rt9vanKq5AUjOKVlZmBHAHUetSRgchV+b3pgNSPLbs5UUjqSwbBJz+FPWORk2kdDml\nkUghgeB296QCf8sygT5j3pAskKYxleuTT1i24VmwW/ip5UOpyfnUY20ARqyA8nJbq1KrAKc8\nikjj8tQrNn1FTN+6UlulUhCbFkVT933p68fw7xTV+dgU4HvUmHVSx+9TEMboBn68Um1WUgDH\nbdUiyCY9NtAjKblA991BRGYJNybyCvpT/J+dgMBjwGPSnsu2Nc5d+pNJxtO0jHp3ouSMLKq4\nAyw4LUqruY7cEgcdqd91QoIJ7ilW387knYOtADecLngigNnLhiPWnNbllJQbQPem+VuUKxz3\noBjOOH2kgcjNPMZfDEAHqac21TtA4IpqM2750KjHGaGAbWIGOBnnbTvLb6gGpFG6FmTGO9Jg\nkZXB9h1pFDdh3bs/lS8LIGzznP5VIyqqgDjPXFJ5abc571jI1gMv5hNHvZc46jNY6yNLJ527\nCg4IraucRxk+WGrnWLecwTlCc81wNanYjbimVTu42+1ZepQl52JYLxuIFT29wIV2vzUF8xlk\nBA60mUZk0iPb7WUk9ePSsTWmUwhz97PC+1b0iOjYf5B6n09Kx9UIuS7LHtXpmrQmcrfTJJG0\nUqlQTw2axLqMw4WR+CeNo5Fb11FFuYy5dV6DvWJeSDzM+Wdw6VqgMadSzPuBKkVnyLHCp3Dt\nk5rduFSRCN+JAu41i3TDyQducnmqEZ1w0bwhQSgHNRxvtQpjk8g1Lc7FXLDLA5FQed5kgbHz\ndhQFh0bCRQRl8HGO4q75jiNcjLFsFfaqMK7Jdp+XJy2O3vU8czMok3blGcDuabAvSbWkA3As\nDwT0HtQNzZEZCsDkdxTPMdo1YR/vG5LHtUkysroAQWxn2qRdQZtyMeA7DlveoWyVXLfdHJqU\nuI1chcjtUcitIEd1yR6UFFS4jWP/AJaNj0UdaZNhcrKvl7uanm4Ubfm3dSD0qvNGd2wHfxnn\ntVmYxmjQEAZQdP8ACkO1fmK9v17U7yyrMrDIQ5xjjPrSMrNI2WU7j07UCHMsPyGRsD+L0PtU\nbndcMNrKo5Ce1S7jGyK4BQHrimtyZfn9w3t6UCY/ekbK2z5DwauWjRx3G7ZhcfLt7e9UPJeO\nFBK/mA8qqdvrU8ZZZhsOFxzmqRLPQPDc0iGM7uMc+/vXtvhG4KSBXcZIBwp4rwTQbh9sOxwr\nZwfp6V6/4RuEDRhW3tnPPr6V0xZyTPUYlMm5iOR0BqaPeUOBjA5psILRKzDaxAp5i2jJPynq\nasSEGHUHdhqYq7JyfvgjmpdqqxwM7Rim7QY8qdrZzxVFDwojj6Uiqx+XPXmlEpeM/L3p5wW3\nH5eMUgHQgoxHXPel3Ertx0P3qI2WPjqPWpY1G4Buh5ploFG1wcZ96FyuSBjB6VL5KiM8984o\n8k9c5B7ipKEXlsK2Qecmp4oyysc5ApI18tNu3j+9UqsY48KoGe/rSAaz7iNzYPGABVsLnHHD\ncVVXLONw4FSR784LZC9KQExX7wYAY4zTIx0XufWkdSzZZuvpTtrcg8gCkA77D/tD86KZu96K\nNRn6Stk4GKVVO3HrTV+779qG3YxnFfPHsC7TjGeaFwv1FJ90880L8zHJ4oELw3fNRqqs2R16\nVLjaoIGBSbcqSKBAOF+UZ+tKyEr6D2oXp7U4fKnSgBm3p3FOXbzu6UKdvbrRw2MikNATxt6G\nkX5eCKdgdetKqhs880xCA44FDYDdATSYwOlKy91/WmNifhzRuXnJpVfb25pmBuOR1pEj92Vx\n2pFI2+lKMZxjik4x60DDjFDctjNIygjOOaaYz1zTAe2VTmnfexg8UMcpyKFXbSAQ/e4yKdyz\nc0nKj1p3oQaBDWB3UfdznmlbgcdaXjHvQwEzuWkZAq88+lO42n1pGQ7eDkCmA1fmXb0o45GP\nxpW+705pGzwCetMAUHaOaUKRkYoH0pjzBDnPFDAcv3txPPpTlIwcmohcxsvHrQsys2AM0gJM\nDaWpq+vSjOcjFO+6AKQDVJ3nPIp5+770bh6UhODnqKYDedvC0E9aXcS3XHqKGwo9qAGqpxhm\n+lTw/KATzVbbu+YHNTKTwaBHEfG2ESeFWLHAUZHHf0/GvizxRGVuHA7kg+1fevjnTxqPhW9i\ndVb90SN30r4Y8aQ+VdyZGWUkZ/GgR5brcI3Fccqc1zsx+YndnFddrCFzuIxuFcpfIUyqjAz9\n6tCTI1DO7cVIzwMetZVwm5WDE5xz9a2tSbZDkjntWDdT7o9qg7s5JpkspSKAcgHdjmq7YmiB\nC7e9TtukkB3FcmmtlQy8YzitCTJmLwyfI+Cap3EjMjBDkk5+taN6ELAbcsF69qzplG3Ypxzn\nFMXUhjjLRbmO05zt96iuM+SAw5HOKnzvUxnletRSqHVdnDDt600MotGVK5GC3OKX7N8xL48z\n2qY/3Sdyk5JNI258lB5Y96oRVnXMZcnDdMVXkQggpxtNXmAZGY/ex+FUnU+UW35UnJx1oJGs\nryZ3kbKhMXmBSvG09qfMyBkZQcd6jmJXBQ5LDnFUOwjxlt/lkMR0X1qNcbR3zwT71JuVVwv3\nSOvcmm712oPegLDfMdQQyn8BTdwkjVM7cmpJiVZ+c1CsLtGMff6gGmAxs7lKNgdCKTl2YY3A\ndc9qfgNweq+vY0jfvEG3KEnBz/OgQxWZkwh+b3pjSbIyw4PT8alCbVYo2cdajWMeY3pTuAR7\n5EUH5STz701WP7wMDuBxtp0i7lBPC5ob7jMOvY1IDVXy1JIxTTmQglgT1xTtzMDu64GRT2wr\nFtuGGAFplIhbO3dgq2aRmfaWDY9almc+cCTjjlajOHyAMDGaEhIj+5jsDzSSHkcfMeppXYlQ\nNv0pJs7snK8Y47UwFHzEKDvXOaauI3Z1G0H71O4aOMrwAecU2QKzOqn8DRoIQjcWbpleMU1G\nRoPmBAA6/wBKe8n3QBt2jBpoKvlN2TjI470rAJtMa46o44b0pWC7VjPzHuaVXbydrLk9TTW+\naRCPlUDiqGK0ZY7157UigrnI+bOaTIaQjOO9IFw2Nx9aQIRWyzlhhqb5Z2nnDdTUm92Us3EY\nPA71HIoUhgck96LjYp+VdhbOelNHmBTkcDih2G3ldpzwxpY/9Zgg4x96kJgVKqdqAg9aThXA\nYFQRxTl27SMFu+aUx7oxtfk8kdxQhEMbtyoOSM8kUcLHhxmQ8inqr/NiTj2oLF2Tcv8AwKqA\nYir5K7uucUMpZ9xwNvFOZCu7GGAORSSL5kajq2fy+tADUV1Y+uOtNVy3IGQv3hUrbgVHQ9Di\nmvGrSf3fXFAxSuwblP3ucU3hFwDhvSkVWjjYgFzkgfSlVsFPly3Sl1AQ9QqKVxzTi+1emWbp\nR8yBzkAZ5z1ppG5uOeOCKYC7TkEqVCjJwajTCjcBg9ue1TcgAsd3GCKjdhGgKjJBzu9vSgBF\nbdtxyM9x1oaNS2FODuzT2YKu5RndzURwD+P3qBCkD5m3bSDgUSKVTdu3H+7SvIittAz3zjvT\nMsmSfn5yTSGKV2qoPGeTSKzxq2TkE/pSt8yliN46g0m4s3yDaCM0aiFTCnIGFPemSr0A9c5q\nTftC5GQetNDHdjGQORTAQIu3aGIbrTmkYqDkZ6Zo3E5JXr3pFbcrAJzQMC3lndjjoT/WnMw2\ngAbB2aiQFlTIB7cUu4M5IxuxtoGiNYZPMypG/uvrQu5VcAk80vKoF7Use/nJGPWgQ1YxtwTz\n2pHwyjzPm7Z+lOX5ecYDdKVVPQ845waAI2bDrtC565pTJt57t1FKrB0Ygc9MUDdEQWUHI5Pa\ngQ1VY42glOmKJFJ4HysOKU8qPmOF9KD0A3fN1OaAG8fdZc470jhSxyuV9BS91JGMnBoijZyC\nBkUAIylh8xwO39KUswZSoyM8rSL95huyegNKOFwvKA8j3oAOGYsW4Y9f6Uuf4W6A8UzeuSsn\nQn5T79hTkVo3O/5nAzikMTjcwYb2PTHUUH5cIPmzSKowxJxnsOtKzHaoJy3oKBiBUVSQN3PX\nvSnDdeDjmneYVBLLkHq3pSRDzOjcdCTQIRWbA2E5zjn0pWUR/MucD8xTZlZUx3B49aNwC+WS\ncsM4pgRQybt5J5NS4HDA5kPXNOWKPyyRwQMtUbRgSLlsHIzQIJVKONx3emKf5QbcF4Jpd7MX\n/iXOAabsaNQEGeeWoGNddxUcfLSMxkLEkqo/hx1o2iRSUyTnk0BcLvVshuD9aXUQisFVsAqW\n5pGj/d8Ak0+U7WDenWmtvwxztX726mApUqybzgUNtbLlvnzxxSE5UHO8nvigqZpAgYKaADLM\nOT944pqs8jH5AoXjI74pz/KMK3f73pRG25yM5I556GgBu1DsYnbupWxH86nb220qruXDEZ6g\ne1NceZG/HzDpQMRFDDGdue1CvsZwBnjAqRYfMbhtmBnNMYtEuG6dc0CBiSsZyBtpvCsFIxz1\nPQ0knEgBUEHkVJxIpBHK8jNADTwXYjLH5f8A69O3L8wbjHGfemn94rOM7sZNKCGC/wB1ufag\nY1e27iiYjywFzu3cGnKxKnnIzgZoeNmyAwzjkDtVAJMfvP1ZeDRKrIq4PfDGnNIrIrhOpwfe\nmjJyxbI9KCRyMq7tmc9ie9NyzLuJ+TNC7VkTccZXNCyDG1RmM/e9aNRisuZc5BzSbRwF5cNS\nMSrAFdy9jT/lRxs+8Rkg0dQBlb+EZPT6Um4yR7WITnsKb5hVWzjPXigMzD5sLu9aQhHKxqQV\nJ9WFKvb5RuHrTvM3A5AC5xu60xss25sAjjA/nTGIuQzD+90HvRIxjjUOOScUqoWhwSEbPBoj\nwzYk+btzQSNV9ykMelSfOygDmNuM0wYVjxlelAUZ2jKoe3vRqAYEac/Kc4zSsSY+G3EHIxSA\noBlhjIx+NImFGAcnON3apKHtIW2kHikZW3fKcetNb5TjBAzSsTuyDxQFhZI/mzv59KYzE8FR\nyc/SnFOA4ILUBMnJxtP8VArDUJBdtx9hmmmUSMAx2nHpTmU87Rg+ppzwFWBGGoENK4YnG4Y4\nFBXA4OGPVTRuAYIT1pNwCHByM8tQOwnz7uSfpTgN24Lyp4NLuySBJwvfFIHDAANtT1p3EGRg\nk9MYFC9gzgDHOaPlXIBLDpnFDKitn7y980DB1+UHdlR2pflSQBRhSOKSVPmAHI6ge1JyzgEc\nZpDHcFuDjt9aQqF+UevQU/Yq7gTkDkYqL/lpkkjI69hQAqqEZvQc4pV27ePmkYjPtScrHtPD\nMaRnjwFyV7rjvQIecrM397+VNY988e1GQZB6Hin7ljYKASAelMRFuT+Ec9yaD3DfKW5pVbEr\nErnJ4WmyYdjvbpz9PagB248YG4jilUssbEnjODn1oVu+Npx0qNo8EFvunk0FDtzZBI9vpTiS\nrYYZFIVOAOgz+dH3cluQtMVhAucgfKx6sad95geAvTNNZleTA7jOaT93IrKARjqfWmLqC5WN\nl3d+MUnEKggkseuKcGVtqgdORQ21s4+UZ4z61JQ3lV2g7j1GO1Pbfwy8AjlaUsfubSA3G8et\nNUMvOd47HvTFYRRtjxyJP6UpX5g6tgDqKB833vlahI9xGW2gH86Bi7w0mSM85waG38npk8/S\nmyKSzAkNjnFOD/vEYZ2Y+7SEI23+F8jHQUixnywwPAP3TR5g5JXAzR97LZI9MVQh0mJW4GKQ\n72UbkY454/lTPMXyuTtPc0rFwVXd8vUe9SwExt7E7uee3tUm4qmB09qTKtJ0Io2bvvcelAw3\ndSV+9xS4e3Tk596RpU3YC5oc+Y2AuFB/CmII23MGHAPJBFDgqpQnO45+gp3nb5vnXauOKaq5\nY7uecg/yoBAqnucHGBUq5RcsQNveoGYrIADvOckUbgG3kZ56UmNkn+u3fNjNR8KrRE5Xr+NJ\nKpVyGbkc574oSMK6knH8qoQ3C88Fjn5albLN13DvUSLt3Ac46E0u07cFsE84HegAYfe6gkcU\n4bl2FlyAPzpPux5J+bONvpSsMY+bcf5UAJuCy7gMdQBQzDYoAwQc0u4R7mKZ560SEK2AQQRy\naQCeYA5OM5HWlUnlXXGe9BUbdoT3NM5PzMcrnpSH1FDExn1B6U55NwwAMcZpyBckngmmhTG5\nJXI7UdAsEgOV/jz/AAjoPenKQ7Ficr0574pNqrIQx4Ipse1lYdwcAnvTQuord2xuQ9KXKqnl\ngbe/zUzKrxn8PSllVVXJG7P50itgUgjDgkZzinQgfNu6E8D0psbBztIO5u/pTmQxrn7wHANM\nXmRqwVQAMk9T6VJtRMZ+cU0ny+44FNXDHLEj3pAPbcrB8gL0Apd21gp6Zy2aau5iRjkc/hS+\nYfLDEgKeCSKoAZSkzbiDj06ULyuMZOcmjlvmbGPX1pVjHDbsc/5FK4mEkYZvL6k8+1PdpXUq\nBlQMUzKsRIx2E/w9aPljUMcg9eaAHfNtUhty9OaZubdhhuANOG2Qb+qnqaSRX65AXGBjrQFw\n++28np6U/krk/LgZB9faj7u3YwDY5psjFxkcnqCelMBY/wB4hZOWHUd6VZCyHPIpsh2Ihj6H\n722jai4Ct7laB2FHzNmQk8dqazllPp3pcjaAHBOc0uER2PLA849KkBQDGqmNevZjSbVaIqFO\nOuB2pvGCMEt1BpWBXvjPpQA9dzSIVYEqKdkuzKeMnNNRhwyEA9MUbjHICvzj2/lQIIyxyFXk\ndycU3zVkw33SD+ftSsN0bD+L73ynP4UqyCSIOxzjjpTQhWffMAx2p2x2pql9zZ/1eeG96VsK\noYDdzkmlb7o2/wARyaRVhWxwGGW64pJt3DJgOTzRuUEBUJ/2fWpFTDbSNrHt6U15hYjnJbaA\nACPvGkLYX92NxY8ilXcjOJRhRwM9frTY12qyZDEcrikAg3+YzxKAuMZPepGVNkZU4PdSKQys\nFdcBsjmm+chkAVT8o5zRqJ7gWyuFbAp0Y24IPB9KRIQygL1/vdqX5WkCqdp9qNRDvmK8kMT3\noXKx5fgZ5NNkbdDt6EfnQULbTn0ABqhiqVUsrAgsOOe1OjxtVSvHQD1okUqwXAUZwaPmaTDn\ngcAUAJ5Y+btz0pxd34AGR0pWUquc5HoOtC/LliSwxngUEsaTvdn6nGOKejKkigKyHHzbqbGy\ntGTsIz61JARtAkG5u59KYD0TMZG75D0x2qdmKso2gLjpUUcny8AFSdoWnrGPtBRnywHNIZJA\nBNzjCKexq0seFJU5bOarrGY+ANueeKsKZZtqLtU0Ek8OGJI6ng1fhhffnb0HaqEK+XOCfmA4\nwK07YkqWbgHs1SyrFuFB5rDfuB4yetahLSY+bngVlwx/OrFO/LVtQofkKDPNSBbs1YSjoVxj\nmtS0ja1lDoMDODVC1yJ8Nyh6itqyhbg7t6dqQy7G244A+bqK0bK1a5ycY44+tQ2tud65A2nj\nFdJptoirsAwuKh7jSMowyCTBUkLwanuIdtqWAC8VvpZh84TA9aq6tYqLNioyD1zQVY88uB5c\nzZBYk8Ad812fgXwct48d1cjEec4IrI03SReXyxK2Sx2+uK9fjsY9J02G1h4Cr8ze9ZsZXuJh\nxFFhY1GAB0qu2Jmx/FipH8tWOBk460zcFUbeBjrSExqxBoycDAqFSVzgce9Tx7YyTnr0pfKE\njc9fWqQrFZFLcU+KMSBhgb+lWGj3YHQrSquVLKuOxNO4rFPywowR9Ka0ZVueuKt+WzOQRnji\nntGsi8t82OlFx2Kap5i4J5pwjVGzjPYVLsAUDoc8miRPL+Xqc9qaM3oQbTlhjB7mmSfLgCrH\n97PT9aGQeWNy7CexpjKkUZbIIzmpJIxuUE/hVjyzuGDwKVFXcWbB+tMXUqyR/Lkjk8YprIWj\nCnhqt4B69c5qLy927HXOaYyobdljwDle/wBaatuWAEh5NXmX0GPWhYSMkjJ7UEtGcYh0YE9s\n+lTeQjRtv+ZgODU+1m6j5acIOMngHpTuIoeSrKNw5x6Ux7ccVoPGXwKY8fOCOPWgdig1v867\nsktwKbd2+7LE5GOK0mhZuvNKLdPLJPXvmi4GN5CsuCOfWjydg4XvzWrJbqygKOnWkWFd3PzC\nhsDMaINgkfLmmJa5fIOADxWp5K7iNvU9KHgC5IH4U7iMuWJWzs3Fie1QvC5PCn0xWwqg42x8\njv2pnk7VbnOeQ1O4jHa3ZVCnineRlsHr2rR+yl1zncaY1qw5zn1pk2KLQvuK9OMg0gjOw/lV\n9l8zAA3EUxoVU9MjvQKxmtCEG0HaM5OKAvfoPQVd+zEtnZ8tOa1VnAAycdqB2MxvmY9jUnCs\nPkzxyatPalX5AWhbfG7n86YWKnlgSbOh60RwlctnOD071Yjjb7549qM5bIXA7t2oYiFow0ZY\nDaKZ5AOGUds1YZXCcjKn9Kj8vn5XIAqxeZCsbcvjtTX3NjYmT/eq0sJZiN2KaqlOpy1IGRLD\nuUZGAetNmj2yenNTuCvyk44zSbA0iMRxjpQBDtK/NjIpBb5jZicH2q4YyVzxg8VE0RztJwO5\nFUIgbjbnkinY+YEcAnpTypEg2twB3pdpwC+M54oERRxsxcN9zPFSRYZhuHTjNNGZGIBO0dRU\nyQll5G0Dmi4its3M2BznqaYM888+pFWeeQMDNQrGNu05DZzmquBDGSwbn60MDuBDCpdu3euO\nvQ0nkhVHoRyfSmFhH5TrnPaodryLjdtIqcR+XGcDcOtC7cqFbaWGRQKxH5eGU5zilbZy+OVq\nVfu4Ybj1oaFnVSAFz1pjsMMTMq/NgnmkVUj5PXNP8succkg9adgNyRTFaxB/EX7dcU+N0Dcj\nryDUjKNowvB70eTxyDu7GmTqBgRsEKSSeSaUKJXZUGVA709MLgfe9c0nyMxAwhzTAZ8u0E9B\n2pdrY3jkelSeRuJKjK4pyRhT8xwcdKpCsQrH8gG7g1L5QI5bmnhQygfd5o8olsk8UiiKMYbb\nywHJzS/LKWcYUDtipXTkqGP4UjIGJ2rgd6QrEPljqBtPrTmywVQ3P6U5tsi8jkcUqxoihqAZ\nH5YZgN5HNKY1UEbtpzgU4qA25RnngUBGI8xundfegBskQmYEfw8Gk2nOAcmpFXcuS2007+Is\nAPTNAWIFXa2OSvcUu0R4K0/a3KlsEc/WnsqsqlDjjkUhjY5N27K4Jp+4beeAoyc0KqtjI780\nq42uoXOTis2XEr6kyqoZDgsvB7CuZktwWYbyGznrXT6laqbDLvsVec1zU0yRsrEHBbGa4Zbn\nZHVak0AijXdK2Vxz9akkkHlFhwccE1Rt5VhuZWm5HQd8VZZSzAKcqOcipK1M251BmRldGZf7\n1ZN5NIVKbeMZyK17iFZmdyCnaqMmWjdVIcqp9jiqRRzE7FXcFd6gckdqxnxJMV6sc49627rE\nIKqrEv1rIXbHcNuUptGAatMRjTRBW3kct2rPus7cbNpY4G7rWpcKxYJvBTrurPuIzLcNknjo\n3arEZF1neYwhDL95j0qqFDkktgY4q7ewyRRsZDk56L19qpKsTOA8beZ09qaEB3s8Yxjb3Hep\nozIzDYgx0yx6VGFdQx3YVeh9fansSFLcggcijcRc8zcACSwHGQOKcA6llPHfPtUDcKpz5a+n\nrU7FmXdyuePwpDG4KyfexgZobzGlQPnOc5FOTcYZVQgBBkK1Ktw21H25bHTtQMik+VDgZDcY\nxVbyzayMVXcCMNk5xVwIyqAR+8PIA6VBLD5qlxKokU9KogoBjblmEpJY85pyqTISy4Lfwk0+\ndDuLADpxj+dNCR/u90nmEc8fyqgHrGZE6bUAqMoMbSD8ozUqDygCp3dvLzUYLbnIPzAfebjH\ntSEOZUMSmMkNnvT12bSzbgc4xUGD/rEJ3kYB7e9G5t/l7vu8k0wsdNoEywyR/OrspyoU8g16\n74RnZWhlJx5h3D2rxjQ2iikBI++eGHUe9ep+GbpE8p43OQcdK2pnPKKPftNujd6ajScHpxU6\nqI3G7kAbqydBuDJag7lc4/EfWtTdnIK5x1b2rVGVrDhjHmDHzfnSsCcFRsNNZe4zjsafD85+\ndsjHSrGNYSRjC4fJ4qT5igVxjnmmdinTHOB1qXAZfl546UtyhVVWZeOAfzqVoxtyQQc8VGv7\nsZxg4qzvZoQwIzjkUAJwVGfoKcJPLXcO3GKSFl28febtUpBZth49aQxWy0fGdx71KqbVBPzH\nvUaliNincFp3JUBSV5qdRix/MSenPSpVY+meaiWNhJu61I5yuAMUXGSlfmB4x3z2pVY7cjlc\n4qPduCgA5FSbuCVHFA7BhfQ0Ubm9qKAsfpAO2Kdj15pMfKMjmlbA+lfPHrjdwNOZl4wOaZx2\nHFC4+lAmLllyD0ahVx34pWbNOVeORxQA0Y/i6VIxC8gcUmDg8U36c0AKzDHpQT3puGbPFJzi\nmIk5PTikXC7j3pPm7ClUj8TSEL/D6mkyB15NKG/AUmznJ60AKDkHIpv3frT8gde9NGM89KAA\nqeOcDvS8dKGYY6008KCKChzc9sUn8XPSl7ZJpDwPrQIUMcdKdyCMimn7oPanAHueKBoAQDnP\n4UjY7cUqhVpuw568U0Id6EUgb5eBThzkAUi52gcZpCE4Jo3Y9xQnysW9acoH0FADGY+lJ97n\nvTmU7uDSbSetMkRsr34qpcYZcZwPare3PHekkt93GKRRmKpXpzzV2BSoz09alFuFUcc0/jB4\n5oAcuOfWjHzUR7fSjbkZoAcQDmmtwny8GjoeOlIvysc9KYhGIPQc0bc8UvG3NJ81PqAmzbwO\nKljbGMjim4IXJ59KI2JajqAmsRfaNInjYYXYea+HfHtnjU7lSNpEjHH4191TK0trKgPO0/l3\nFfGXxYs1t/EV4qrt5PH15oH1PDdchPl5BB56VyV0fmZWHGfzrsvEcfyhlGAvWuUvFEgYqBmm\niDJvlSaIgjJxwK52e3bacLyvvW3cMzYA6ZwTWbfeZGp+71x+FWiGYsvy8g8j+E1WmnYQnIG4\nnpVxokmYn7voaz5MfNgbznHNWiOpRmAAIc7kYYJFUZFXDImMjgZ61pXMPlLkDA6ke9UWA3hi\nVBFUMiVXjjPGG6FahjALncCvsakkUiXapwh+YZqNsu3PzIO1MCGQmNWBUuexqpcSMzAsWUAd\nMVZmVzMJGOeOg61XumaRjkHGKpARx45y27I6GomRt3yD5cYxTkYSNl1bbjGAKbMSNojYAY+4\nOtImxC0nXI+7xyKjkUpGhUAN6Va2v0kP7kjgUzyyWZzxhcLVILELbi+489zmlypUnjpUbArJ\nIWJY4xTo2VlG7I4wKYhkiltucjHOaZJvT94Q2M8U+RvMyHOeMACmqGPAfIx/FVdAIShw2G4J\n3CnjezDzMYxkUqqBC24YoVmVdwwxHFSAxlPJUjDDFR7eGGeBUhVmX5sL702SEq3yuOetMY3A\nZQ+cAdKZgyN75qQhVjYk4Hr2FJIUaSNlOBjmgBgH7xt/p92kjywYdR15pTgsXx7c96jxkYBI\nOeaBCyY2c/Lx1pI2yygfMcY20r/KqsF3ZOKVv3akDqGzRcZGz7gRjJ7+goZcIHzgelOkX5cj\np12imM67V7A9qYxokZV+Y/M3SkmUqoOc88470twyhgnbPelVgrfIM+opCGTFXZXQ7dp5FOZh\nJIQPvdd2OMUjMu4EDdz82KCCvBGFNMQSxkKCjdf1pu75SGU7DxQvzMFIwB3pPXc+Fz09aAG+\nWgXaMnHTilZTCuW/i4p4XA+U8nnio5ozuXn86EHUTafLJ5wODQF6Dg8U5s9uFPamtgfLgjbz\nSGDKdyiTkjotNKvuyBznmnug3NkneeAe9MXdIuOjDgk96Yh0bL5hPbuaTeOXT7ucZ9aRsRld\ni5Hc0r428JgdfrSATaZNwA2q3GaYVKrtGcLxuPelOSMqpz9aPnkY4ONv3qBoXy2j4PQ/MDTM\nhui49TTv4cbt208012VQrHgsM+1UIf8ANj/Z7mjPdVxz1ppztUlvvU4R5cgt8mMCgYjKWkOW\nwcVEq/IpDcBsgUZ8qPIG4k7aFiCtsz8nU0uoxzLuYuvC7vu+lKzszYzg9h60jOVjIUZINBYG\nTHByKYweP5uGwwFRo+Mn7w9MU4kRtnGRjmpAvzEBh5f0oJICwaTaMjd2PantH8oKDKrwcetO\nZQMkAGmsoijyPmyegoELlt3QbgM8imKPlJY8tzT2b5lDjaxpP9ZuP3SvUUANUBfuk9ORS/N5\nYOPl70R9QWXcOpp8YPzL0HULmgZBtG75TkHnFOWQschOO/tTtvPBw1MZA2cAk0hCo25SpBH8\nqFYJnaSB05pWztwvy4OaNokXPX/apgIASyqV+VTnIoVd0zMAFBzijlVJ3GiNhkc5A/SkFw+b\naRu3DPTNNkwrDA3dvxpwUgtsKrn5ufSmgNwDhTjP1oGLIdoUFfm6+9K+WY/NhiMgUhjLqCAc\n/wB7+lA+bCnl88/SkIakYGCTgAZNOXb82GMgIyMUqsFUsOxxiiFgu5lXjpVAR48wqeidcCnT\nfOhG7av0pQDyQMe1Iu0nLHBHSl1AjXHmKBkp3Ldqd0+UNhccMKCP++mPWlVPLXa3JU/epgKq\nIu4r8q5xk9zQ0e0EZxnvSO27JBB5yR2pDIVQqAOTkGgBpAJyOSo4pEbygGI3Buv1p+0Mcj5S\nKZJEGYkNtHYehoAa+ZGIwVUdKVmXcq/MH7sKUHaOfm7H60qEozH7xx6cilqV5h5gRsNw3ZOt\nJ8skmQCPUelOjkPkjCbgOS5pu5vvKvyt3zTEKyieRmHIUcGmLnaXBx2zU/liOEKvzHuB2qHC\nxxg8sQeVFAhWKBACcsOuaGG4AHkjkn1FLNyy4Abd+lNVlWYjkhRnIoAIxu3EdxwtLH9xdpIb\nPT1qNpGlck8Z5VcYqdTuYEkBtv60ARszLu4xzSIoYeWOB1570u4Z55fvQ2G4cfSkMGYrlCMd\nuaTbtVo+rY4ob7vPBzxQy5bKnLdM0wGLiRVY/Ljin7S0gAwV/v8Aem8FmDrx0pm0KFA9cYoE\nSRjbGwxzzSRqJcsE2kDvS+WVLAPgjvTEZ3jLKMdhQMBwqkqSTnmnNuKjAAA60Dc6jJGelBUc\nbyVOMmgQbUSZznjPQUMxkJDHC/SkdscheW5GaVmJkXcPlYfNigBJGc4JHy9Bj0pEA2nHJzin\nbsSDjaMY5oCkqwAxt+Y/SgCJWk+707ZpeY1RCdy/ep4bbwecdajAaHcOvHApoYn3lbrgnNSM\nPmKqNvy53eooXLLnjbt/yKTaXVAevb6UwCNQoHykgDIpoXHCc8cVIzfMyM+U6Lj1pMEHAcFl\nFAhJWZtny5IHIpFX5d33WJ60/DMu98eppqsJFEo65wPpRqAfMzBS3zDmlbaWY469aTaPMLE+\n2aPulg/yrnj3pbAMZQ34c4pw+YYPU9CaXB55Ge/FMZVVTvODnO3vQJEu5l+RAFP96mSx+XGG\nY/e9KVYmZcbgD13UlxDvXncP9qmhsbuZtuR14FC/OrZOCpxR95lBbjFEYVmI75xmpJHR/LJg\njhhgE9KYq/NgeuOaXc7M2ei8UKhkwwO1u/vTDqKyqzDvtGKTcqx5cYGeKGwYmxkHOM0bjkDG\n4AYpFDmjl+83zjtg01W2tkLn+lOj3xsSzc9MU3aV3NnHPSgAVwzY2ZJ65/SnSKeQeoPNIWLN\nuKc4xxSJyrFm69RQAMS2WzxjBpQ+5sjIoVX3iPpGRndSeYVBXGcGgBPl+8FJf17U0MFYjbg+\ntSNuyuOKR9zOzkbUA5oGNztbJUEt1NIoMzcLjbSlV8rIPPpRGGOQPlyMfWgQKNyk5288+9If\nuqccA8Gja23HYDFHzNtCnAHU0CCRg0gcE59R0qRFyoG/vk0xt+4rjaMcUxM/getAXZLvRpPl\nGRk5pitIyHcdnzccdqVQinK9c9KGZhubOX9PSgVxWZ03IeBjK0K3ljzAAxBwM9qFXDAZJBHI\nNMU/NhRtOeR7UAL5m6U4PzY9KVmk2bwMkdqGZWzj88UbHZBh8fQ/pTAI/mwz/L3zTWUNu2nP\nOcU9l8xdvGaap4IQYI9aLFCMpkYMVwQMH2FNDAsU++oHUVICNuGb5s9KFUKxAGAKYEbMEwcc\ndhTt2xT/ABKw/Wgr8pyMN6etI0Z2qMAc5IoAQ5WNRt5Wl2CRgFH1pV+aT5mxRHHvYEHDHIFA\nhsuGwy8c4+lO4ZtuMqen1pIcbCxcZVtvNJu2lsHJJ4IpDDd1QNgr/OhSiqxJ+cnvSvGFUMBk\n/SkaMzLgfM3amFx8ar65Y0M25cE7selKqvhsqFK8DHpQuBwvyn1NMkYFYdBvl9Pal58v5h/F\n19KSQ7mZclmI5ApVjZF3hc8fxdqBjG4Y/wB3H50sbbVODxjNAkC7B97jBBpW+UEFMZ6NSAjM\nYcjIAYjNSN90bhxSNs8sHaSB78mlO0KpzkZpCYxZArCIHLdelL5ZIbc2z0z3qRcI7YUAkcGm\nupk2hvxFNAEvCFQvzdM0N93B4GKXI81gG3FeaQqxbLrgsOM0IY0szEAcjGBQzeYjf307etOX\nYkZKNlum30pN23aw53HDChjGFfusrj5vvAdRSsAkuN2VxgY7UZ2KeAOe1LuO0bE9zmmSxGIK\n7s5bPApUYqMYz7Uhw2ccL1pcfNtQkFhnI9KAGnARjtYjPUU+OPcAAMZ9aTIKY3H5TS/K8xct\ntTHpUgN3EFwq727Zpywqy8Sb+afG3f8Ah6gVH93g8EfxUAPKDaQPnGeB600qGX5F2j37Uo2g\ncMcZ6ULIY8hR8vcEUDsG0gkr6d6TyUBBL/N/dpdo2glSAfekLCNgNvPqaQw+/kYwARTTlmJJ\nxt4FPky2Anrz6U1oyvJGQOcg0xA25cALgnnJpGc902j07Urb8DkE43BD1xTcFlG8/eGcUCH8\nLw4HTIb0pjYMZcHPYD3pHUZAJGR3peGbcOvcUirio52FuQQeCRSqVaHcOUJ5YmnM23aRyoXF\nRrj75T5ew7UxDgqqrJt3k9GoGduw8qB196bksGKjqOD6Uu0IoUNk46e9MaFaJ3Od+3ikDLtK\nkZUDGPehV8tQpkOeuf6UrY25yF59MmgkXcQMDhh6ineW33x82e1JuMjZXJb0oQs0gGeRQAu0\nrs+X5V7imSGbzBhvMHY0/cVLDPBNNjXyZGKnpyRnrQAsgkDBWAU5zkdKX5VZn5+bihVJ+/0P\nOKTeFGV+YnOOKAAruVjnG0c0pYttKrtQryKaqYYbWIY9Qe/tSj91uX+HPT0pBYWPYqgr0zim\nhfLkztwSKeqg8Dp1FDsDHk/Njj3poBMxqo8xdinvSZCswUcjnB6Yoj3+UVYYJ6URvtjCv82P\n4aQCLleRyW6L1ojZvNZThuOgNKzRqyiMYYnjnpSrGyk7hzu4ancYR7N+0qSvbHrSqztNhlwO\n3NLIG3Y3fjTHUbgAnPXOaQh7b4eDtA9qGEa/Ixy+MkdqaW2H5RktwDRJtjkHG3PymmA5cOpC\nthfQ0fP5ZLBQM4ODQcI3HK9B6mj5TGc/NzyO1KwA2Rg55pzbQ24SZAGSe+aapLOVVMrjNO2Y\nUEYLetFihvmPIQwwy453elGF8xVRwF28N/SkUqoIkXIo8zcyhk3Y6YpkjNoDABvq1TqP3XHP\nqfX2pu3Dfdw+c7hTml2yHaowaAETEZIzjI49vam/L5m0L5TDtUm1s5K5Hr60ZB3MwHoFpiGm\nQL8xT2yKaFYNu5U9u9KuTHlecdjTkUiQJIOG7A1IwOfMwx3KD1pctIzHcCB0Jpm5Y2cKjDJw\nc0piIYDbg46Zpi6khBwMY3fpTNzN8u7b2JFS7y2OAoHFRx5hXGRy33gPWmJj23BsMdygfdFS\nRSI3y/dXGeajWHYmQ4zntUxVGwMYOPvUyrA0iFAUQqvQH3pOrBsYI6+tOgG4HIIOOM1JAvzA\nfdXPWiwFhVbgvnPYZqaLlixJyozio1UsxDcEdKs27I55XEucbgeAPekhE8LHcXO1V7CtGKNp\nIRGQueuRVYR/KWIzluCR1+lTwXDW7s7AY6YxUDuy7D5iyfeAVa1od7bJVG4HsKz7MLIpZl3B\njjjrWrGPJ2eXg4OMelAFqzR2mIRDv963rbMCDJ2/zzWXYyMjMx65rUjtzdKnBADZ3VDYje0v\n7wZh5nHB9K6PTZCzKuOCcGsOxjEOAvHFdToDJJc5CbmUd+/vWTZSOgj0lWgO4cYrmdYUq5hQ\nnb/OvT7ewH2Al/TJyK861C2F1fNGgPzNWXOapFjwB4bBmuLsx8x8dM810F0hmmIO5cete4fD\nH4L3Efg2KS5QxzS/N0yGB9/etLUPgwG3ERMe+5Rzn0qeYfKfOUkLMGCj5hTRHnGQRgV7Vqnw\ndmhyRbyH6DnFYN98K5YYz5fmS5PdcAfjT5g5TzCNlk3Dbin7CEVlxtHVvSuwvvh/dw5HlMmP\nTvWXceFru3jbEbMB1xVKRFjEZcYbf+NI2N+AeauSaTLbgeaCD16VD9ndZMCPnrmjmERqAwJH\n3h3poUxtuzUzwsoyRt5qHyZBnccU+YQyT5h0xuOKbtO/A+btVlIw6Dd1HapdqniquFrlbysR\ntg7X9qZJGXK4YcVZeEI5bNRCP5S23NPmFYYqljz979Ka8e3B609UWQkA4I54p8mXxg4xVKQr\nEXqwHPTFIkZ8wMRjipGUNhcYOc5qTZnknj2p3AhkT5Qvf1pjISD83Pap2Ur1GR60zySWTbwG\nbnPahsTIUBC7eoPXFTbBsCbcj+VTLHt+UAHHU01AMlmGDnFNElRk3DikVd3PRatqoRiT0oaA\nE4zhcZNCGV1jXyzvJyOlM2+YvIIFWVIGF6j6U1owpOTiqArhQi8jA6U0xrGmB165q2qhY/mO\nV/WhYR1J3CkBUPybSRz60ojDMG3cmrLRhQSRyeMUojLbcEcUyblNYju2g7R61FwGO4cVekXK\nt/FUa25Zc5oArrCFBAGKh8gCQlj8tXo4ieAvI65qOS3LsPrRcCr5Kr8yrimzW/3XY4z2FXli\nJVtwx6U77OsjLk8YzTuBnJDgZYkrngU5rN0Y54B5xVySJRjYce1N8vzDnOWxTGZzQhTjGR60\nT2qyRrjj1Bq+tr5a4+8PSkkt+mFzTAzWt1VQmD1zupstuSoCD5SelaYibj5eKbJAS3GTikSZ\nv2Is2XGR09qRrMrlDx6VpeS0agjJHcUwRnzMnpVJsLGb9lVV/wBqmGMxrkxjPbNbPk5Y/Lke\ntR+R5gKkDd0BpXJsY0tv8oycnqcimt82COH7A1rtYbdpPz9jVaS1DMTjvxT6ElNYi+Rj/wCt\nSbdqsG6dKvfZZQnTKn9KY9uGGHGSeBRdi3KTW+zZjknnFCxM0xH8PXmrnlfLjOdvc097VnjY\npwoNO4FHyzu3Lgn0ocfKzb/qtXPJ8tRgHn2qNoyF+U5GcdKdxFNlwq5HOOtG3dwD2q08O6QA\n8Ko5xTGhXaGHI/2fSqTHYprGowdx64pzR7VPz4xyasND5hypAFMjyoYkA9uTT6hYrqiN84OQ\naXyVU7QM57nrUzW6soOMH2oCESYbgj9aYrFdoQrFlzjp+NPXDIFb5DnvU3llm6/L6U1oyobf\n8wHQU7iElVlGU5FNjjLRHK4z61Lg7QMcjnilYNkYHGOlMZDtY7ugCjpSrk4BORT1UKNzHk8b\naci4Ugrg9hTFYjEYWI45Oaf5IZhlMLjBNPVRNgoMY61LEu1cE/MD3oER+TsVUQ8etPjj+djj\nIHU052EaZOMk0iqy/MD3/CndlCsQcBV+uaZtMbFjylTNGeckbyOPSo1Xadh545p3ERGI7ywG\neODUTKY/mB69Vq4MDEYBJ9aiaFl+YYXmgRCnCklc5pxUtHnGSO1L5Y3l2cjn7tOJVpiVBVQO\naCRkanYR931pWhLbueg4HrTljDKect1+tTLsKhfusetArEDoFjUKMnvStyM8BRUiqd2COPSk\nWFFUnO72plEP06+/WnrhhjGDTzHhVzwSetOdPmBHI9anYojZf3fHrikCsrYxj1NWfJP93jri\nmsGZXK8MByDWciluVdUlSSxeNV3cZBrjriZY2VmBMhHCV1l9n7EwbhQu7PrXI3DBnVmOB1Ht\nXFUOxfCEcySRt8ux+hU1ZiYKp2EjjBzVJdn3TJ82c1ajBaPAHzdagoqPdFVxICS1U2mRd5KZ\nGMVb1Lb5IIU/L1asiSV2A7AmgDL1aHahkGST0NYNxudTGwKyHk5ro9SIkjdCdqYwK5q8jkjQ\nM2TjjJrSLEZ91D5I+Ugr/dPWs6+UbgSGIZc/Ka0rkqqqwOTnrVKZ/lPGV9cVaAy5InCqqtmL\nqfUVn3W2ZDjcsmcDHetVpI2YleAozurLum8twwbcX5+lMRVJKMi7iB3PUe4q0siyZOQAeMel\nQRxvICuQOc9KkXyo9zE/PjaF96BFvlf4hIOozU6NJM3TjHOO1U7dhDEGePLE4yTVpNqqSh/3\nuaC0KwR42ZWyw4+tN2r9l80tgngrmnssa7im0gU2Pay7Y8K/U56UCBk85VJJMajI7VBJHhgx\nyCeqqM8Va3AsoA2Eck56j2qvnbFK6gjnI3UE2KzHcwbPyjIAqLlsOBl8kEAdRVgERjIGT1IN\nRNsX5hubd0A6VaYhyyRxqIyuIyeOeQfWiVFuN5bJA+UEdGPrUbDu4w69Tjr7U4p5hBZG2Dgq\nD+tAgYGGNMnHGDtqLySk4KnnqakK+avlRDdtO7NRFWeYhfl4600SamlyK1wu98p1xjmvSPDk\n5jVVJ+QHpXlVnncHOSU/hru/DM/2lkVudpyTu7f41tEzkfRfgeUyWm0qQOtdTuJbK/dJrhvA\nd9EJlTeXRkypNd1GvGAeOtbGQiqzPgHvUysvK459aYoHIB5PpT/L2sCnzcc0CsMkVtwKH5ql\nZWVTgYOKaq7vmxz0FSxtuzxlh2oKEUoYxuO76VLGvG5R8jdQarplV465qzGrsvJ5HOKQx0cY\nUYHQU6Imab5fyNOj+YZYflTl2KcqMN24pDH7hGuQvzZ+6OpqVmJCjGDUZj6HGTU0fGPlzSGK\nobJGOKb0kJOGx6U8AxtgZ5PNKYwM4AoKGK5BPGBQ3EgC/dIpVxt3Kc9iDR1AIGT6AUCYzyR6\n0VN+H6UUDP0hAxz1pm0Kpzz6Uq5ABpCPm56V8+esJkqpOOKeqggbqRgD05pdp9aZIcNxRuO3\nrxS7RtxnDUhyFx1pCDJ3Z607OMDHNNjbJ6YFPXjJ6igYc7uDx3ob5VAHJpuRnvTtwBz1yKBi\noSpOTSL8oyetJyME9KarEnPagQ9s/eJ4xTGPvzTj8wPFNwAFwM0BYcvPvRu7dKXcByOtIMYJ\nzSYhVw2c0m7GB1obDYx1odCOc0ykDY64OKVcj3FCqcYzx3pn3T1pBcVuKXcQwyab97oaFX5u\nelAhzsDgAc1JgDjPFMb2pThVHrTDUNp5Io7jH407OVz0pOEUk0AL7UE8YxR8uNwpN2OaBDVJ\n9OaVs9hT1HcdKaQM5zQAd+mKdyDkGmr90+9AVsHuKYC87ckgimfSkf5VGDxSRqduT0p2AkVM\nfNSs2evA9qi3Fl64FJyV29qXoBKrKV4PNK54qNVCjpT925RmgQu4cADmjeF46tQ2ODTFQ7t+\neKAsOUnvTsdCBwabu3ZpUDEYFAy3GVKkdfb1HevlH48aX9n8SSNt2BvmHp1OBX1bCnIXuTXz\np+0xZn+0kmj3YCfdxxkDmmLqfLeuW5ZXTu1cPcLtBGPxrvddJVncDtXGXag5B6DpTQHLTRss\n2XGErPulJym3gc/Wti+yr4PPNZ984SM7+PStEZs566XypCR8q9wKp/ZyZGdvlVTnFX5sxzOD\nyuaqqu6RkHCk5q0TYpTASNnb8vYGs+4tw8mF42/Mff2rUuNrAsFzg4FU7gFsKp2tjJ9qYilO\n6+YGUZ46elReYX3LgDHOKWZQx6HA5OKZ5Z2ljxxkAelUIhmUmJcD2yarNlW3M2VHy4q3IRtC\njuOhqmSPMZD2GRTBEUiSnATgE4LVHJC4Yv8AKAPlPqam2yBB82F6c+tRtt3KobAHU+9AxnmA\nYZlLKowAOxpHZXIAG9zwRnFLiSMSOpG1h0NN+VYx5gzuHShCKzfvOVPTrTFjO0nOec1LNGEV\nR1/hBqM7l2qG5ziqEDOJcbV+buajdSjfPyeoxU829oyEADA1EylSWb5uMfjTHYjMuW3kcHqK\nZGQyk427qlVVZB6ggc02SEeWfmxjgYqbCSIXXcu0njrUbE4+YYxzmpZoWCjJIyOSKYqmRVVO\nSOrGmIbH80eCMgnNHysuD1Bof5cE/Kwbkeop20bmXcMtyM9qoYu0SHBPy/3cd6iPmDgckHH0\nqRc4Ib76npRuChpFOWP6UhFbhpCynBBxz3p7Hkh/nz2p247QdoPvTB8q71HzZ/SkwEHlgEKx\n6UjH5RxuJ4o8w5LgfKeKeqBWLY521QxoBfaWAIWhWPLKMZOKYq7oyeQM9PelY+WSdpzjNAEc\nfytljxnlaUuu4hgcdcVIWbydzJ15GKRZNzZcfNjj0pANkx5Yx97rTfkZtxXgjrTmZvTB6EUj\nKisfp97sKoGNjYqwZPmUfw+tJJmTcD65+lO4UEtyR0Ipm8tgKOWP5VIhJMbRg5Gcbqcyvu2h\ns0MuFIAGM80iptQk/KfWgAk3yyFh2oKs0wYD5cc+9Im7qp5xw1K+Y4yHOWPIxVAN8zbyRhCc\nfSkXcDgMCKVmDAEHjH5GgtllO0N6mkAcc9yB2pvy4PzEZ6+9Ksn7xmAzxjFOkzGV4wMZNIaZ\nEAIQu3JB7mkZVbapGdp71JHI3zKFznn8qavmNjplu2KYCDK5fbld2PpS7csQ5+XqMUsiru2l\nT+FIuV6tjPb0oAbGGwQByRgCggJjOQMYP1o3/MQQd3qKcFDMccqeuaXUBu0Kp2tkY7daRV24\nDDj1pwV2X5APlpGbdlDzzVCuyJTtjYA5Ynj2p+5pIzwCepzS7drHJAHoKd5aYDDOB1oAjOyN\nehG4cDvRHuj54KAcrSSKQ3PJJ/IUiqF2jduB6+tIYNgxnJz3HtRIdqxkN97g07hcqPmA70Pj\nahPPPSmJiNvbO3oO9CjZkt8w6GnSOFYjquelN4U4PHpQAqxlQFC4J4oLkKFxhenFJ825tzYG\nOD60rICu5m+TodvWgBiqVYsSNoGBimsoK7nOBjotOZhtChfl680qq27lchu3pQAjsY0XAL5F\nIymJRuGGPB9qRmZo+uUVqfuTMokbYMelBQ0oN3LcY4pUb98pxvTHAx0pgfbEo/zilD7Y/veW\nT0X1oELIhhhxGTuLfN7U5n75+bp9aVG3KBjB7io85R0f5eeCKQhwYyYeNNwYZ5pHQuoUrg9S\nBTnYMx44AwCKZiRQMjJI6UwI1Vjnggjgc092KoMjcc4OKcAeVPHHBpmRE6Bly3ekMRpP3wyp\nwPypV5Xnkk0STOp+7hCcCnxtywbgjimIj27W2qcjvSFtq/OOhqTgKyk5kIzULMdqZHB4JNIC\nRm3fMRtP8PvQV2sSwBBGaQjO1T0B+97UMf3nOTk4A9B60wFVlEeQO9IrMrtJ04wKaV2qRu6G\nlkz0zuBGcUDFZgIQXLbs5CrTV2rtYBju5x6UvnYK7F9sUfOr5yCBQIOSwYDofm+lJna7FDx1\nApSp6bioY8YoULDuCnce+RQAMvy5j64zTY/9SQDg9zilZQ3RtrEcc1GxAYYc+hHagQ8h8KzY\nx0AzS7NynB5U5NIBtIDjcO1KsYhZ8LkY6ZoGNCq0vH3TzupSzMfmG1emaSM9GK8+v9Kk8vKv\n/cPJHpQUMbazBVOTTbf7z5+Ug/nSsiEqWJX0PrTnILKWHfFMBjMN23Gcc7qWPEshbcFG3vUX\n7uSRl+6qnkU/y/lV1AK54oFYRcYO45xT1+8OygfnTQpZXJAznpSSL5irhsL39qYhwi3IdyhV\nzndRtMvyDk4+9SxKDIMEsF601y33t20EZ+lSLqMaQ7cE5KnGKcueipnuxpWUOoDgBuoYd6ay\n4X5Gxnr7UDHtiTknO0c0CbPy4Kn370nzKRsA2svOKXgbQo49KAG9Y/QfxU1XxJlTubHAI4Ip\n/JPBDJ3b+lJyvLMF9qBjQvnEBuBnhfelfCyBc/NnFEMWQVJKknqaJd0bEA4HvQAjbNrY+Vs4\noMirkAbmxxSMoWRecjH60q4JDZw3pigQ1WYsAvynv3pxb94ofCv7dDRIxk4VcH1pFh+UHog7\nUwFdlP8AD34HpTipPysQx65pI1VpPvbD2z2pNm2Rt3I/vCi4CeYWBYcY4p24MN8nzN0FQs27\nAX5mHWpsMvOMjHA70dQJoyrW6sE7frQki/Z8ycsOlNhnkhRVZAwxUUis2QVx7CqFcYp+8xTJ\napNokGANnr7Uz5flx34xSKG2nBwDwW9KkQ9sFd2cr0FMydwA6jnFO4GBjLDrTSTu+U4bOfwp\nFDmJ4wRjrQx+ZcY2/rR9wEJ8wY801UG4sR8pGMCgBdu5mYdd+KTaoYk5xnk1Idm7PmALjkUx\nYz1RsinbQAZv3Zwd3fJpvzSLvXG09aeG3H92uWHBzSMnykhhuPQCpAF+ZeMge9BJC4PU96ZK\n5Qon3SRinGFosgjLdeKYCs+3KlhnHFIu7y8E/N/e9qU7fMXevBFB3YKjG0c/hTAHztXA+v0p\nu5UmJXkEcU/DeYSTxt6VGijyzk4IPWkJhtbCg8jOeaRf9W0h+Y7topQHaM8gc5HvStn5Qvyk\n9VNAWFl3M0eDzScshAGWB6U5lf5W3ZOO3ao49qyFiTnp+NAWFC7RtK9Tk05lA5HTuKaFdVkL\nN0z+NJ/q1HJbI/KgYvyrHycgmgNtbdnjp0pDzGgAGcZNK2fL+U8frTAG4O0HJPUU7AWMcZGa\nbuUqOMN0PrSOoKlQ209qCRgbzO+1lb86kfIYtjAxytNkQhI2VdoHWpN275T88nXFNjGFQpO3\nk5z7ilfcSAeSRkkUbh525k2lvTtSyYbdz0H60wIWXepY845wKdyzKV+7jOKcvyqvZu4pSpwz\nEdKVxsRn25BXk96ZuLYY/Ke3tSYZuW4TuaX7ww44oJHAAMd2CD7cUq7lxuUBR0Ipv3oyWGOe\nBSbNrZDHaO1A9QZikZON+T1oXf8AKFXb3BpWV2Vzuwn+eKXdj5ZDzjp6UXEIP3bEhsPinfKw\nDMcsTyBTV+VuRyBnNIZN7bwuBigByj53ZV6dGpqsZuS3GaVW2rweMZPpTmQCMeWoC+560xoi\nVi27I71IzMp+UdqCqBVO4j1FKuFJCnLNyKkYzy+S5OR7U8MrYJGR/dFEa712q231JprYZgAf\nnHegliJ8ykdmpr/7A5zg0sDGNiTz2p4GZAVH4UwI2y0wAXtTmbMm1mJfHT0pykCQleD2qJWB\nkLquWzgk0DF8sZ39P507khWK7VJwKVkcqeOO1KzAsmB/D096AbIwsfmMhJZu9P3Kx2Be1MPy\nqZByc4PtT9yyLGMFuMk9KZKG/dUZHttp6jY+c7RjkCkPltk/cHvSbhtJGSo6MaCmL8m1sL+8\nxxzTmc+SqnkkYLU0qDtAOWIzQGwcAYHcelKxKDG3hRlRSx/L8zjPONtN+XaSD83pUjY3Ipbk\nDtSsWN+ZQzHBweAKartJlwPl6E4oGVYkg9aT51kKY2rjO2gS0JJJFJVQrYA61HJKFyzZLL92\nn+YXZRtwOuaSQkMAUwDTGI0ZLJIwZWYdM8UjR/KVZsNnO2n7i/U89RTVHmSB+r980gsHmfID\nn5jxnvikUpGud2XzgKadtX0yM012WRsBMMO1BPUThd7ScsCPkx096WZhtIOS2eNtJucKSR8x\n45o2dOctQMXcNuCPl9KCCGC8njPsKMEAAfLjnFKgaRj82xW6t6UhXEYkcKvOKUY4YHJH86RW\nMauC2SvANIp/dj+8f4RT1GhHj86QknaOpAoViofao3YwGp0e7LNtyccetAxJjaecc0wGJv2p\nngdB9alhjPJzg+hpu35WAAb0pdr7VBHT9aYhGb5fu5INPY7l24AJ/Ok3LIhym3nBxQBhVyCB\nnAHekMG2qFb7sg707yyVMiuCKaMMxVh900BdsLckc56UAJuJjxjJzupvK5z8wPzHFSR4HzkE\njsOlN5ReFxk5PvRYBdzfw8cZxTljV1znHbFJxuDYIPTHoKXYsnAbjORRsMMtz6KO9NTLKCGH\nzc7fWkbIQ7upPAp+3dIpxhB6UrEiD92xDYV/fml3Or5k6449qcsa+YxBO7HGaSMEKS/I71QC\nqAHJUbmK8E0qsF3BsO2Mc01eVODhTxkdqSNR8wQEnOX3UAC53BjgcUq5aTcVyOpoWblTt4zw\nKM7yzD5RnBFTqAgxI2UO3J4B7CkUfvSD93FKxDZyPmHQ9qcWOGPpTKI2fbjAIB7etEimPDPy\nTx06VI0n8IyRjNIr9ZNueOpOaEIVmZcBowQelLu8qM7l2n1FNZmlVQMEdfpSsjbhg7if0pbC\nHLIHhKMDvHKkUeYfLIJGfpSqVQHK/vAOvrUXk7Y8vxz1/pTAcsgVVAdt/XGOKGZpWJXB9adI\npkbcMHA+5RC3l5LMEXpjFACFVbcCfmHp0pWYOqlflk75/nQuPMbI+RuaRSrKT7420wH5aM7Q\nRJu/ioO3auMkdAO9KzCNvkUAgdD0pI2OCcEj0x/KgQo2JCRtIJ4OaiVg0bAp071MPmwSN6Dn\nHcUnl7myOO4UUhMCVKoqnPqccCpYgdh4wAflY+tRrxkLwxPT+dSyQhcZYvnsP0plD+WXLPtb\nOParEeW+Xdk/3aq/M2Fc/MDnFXIl3OCxzz2pMGPXMec/MTxnFT2qyLKNwUOOo9qYY23SNI2e\n4FXFxtHR2xj0qQLkO+RyzENHj5VqVdvlkN164qCDPybeg6+uav26+cxVV+bucUAatnHGqKQN\nrMKsrCEwwJbB5FUjJwmMHsD0xV6xzIdjYzn86TA1dKwxO5c85NblhhZFVASGOfpWRZW6+YQj\nc963LNHgmRh82ewrNjOjs7ZZWGMEdzXYeG9OC3EW1Mknr7VzOlw/vUUsFVua9C0GJvtKCMYU\nCsZDidvJGn2KReF+Xk/hVD4P+BD40+I1vEU/0eGVWZcZyvv7VoajGbbRGlI3MVr6G/ZT8AjQ\nfD765dQ/6ZeDALDlV7VkbHstvoVpZWMVvFCqxRjbgDioptBhkIBRQPYCtnf5nUU0/MufSgrX\noc3c+DopD/q1Oe5rMm8CxjeCgXP93pXcqpcDHb1pzBHJJHFILs8quvh3BJCf3bBtw+8M8Vh6\nh8NbeYE7Aox/CnWvblt0xlumajksom4KA49RTJPnS7+FNu4+W3Tc3HTmuavPglBE8gRDs6gK\nTkn0xX1M2kwMSDEm0+gxUMnh21duEyPpUSA+Pr74JSL5hx8mMgdQPbNc7dfCS7t32483jPyq\nSPzr7ZuPCsEhwoA9iODWbdeCIWV/lOW7KMVQuU+Ibv4dX9vJtWInrhQDWS/hG+hm/wBWzL3Y\nDhfrX27N8PYtoDxnA9qxbn4cwzbgIcjP93FGo+U+MpvD94rFnhbb2PY+9V7jT34Cg5xycGvr\ny++FMLs4eEEAdI65m8+D9mJQEgkXPJHaqchcp8wLEF3YXJHFR5Ik3FTjpX0Fd/Bku2EAQM3A\nArE1D4LzQh/+WY6qMcmlzC5Txoxny/mGKRZAF+7k16MfhVfSOVMJjIHKuQCayZvh5dwMyPCw\nTPXH+FaKaJ5H0ORX94uM4pPJLNjPA5rpJvBc9upYRSBR7VnzaLcx9I3H1GKfMhOJmYLMyEYG\nKVl3SKiqOn4VK8M0T7JY9pPShUbqQc+1VzEuJEUPTbnnmnjaFLdakVm5wuPbFRFQDxVJk2ZE\nrb5DuGPQU9l3MCwxSSRksCG/KpV+VixbJIxV3DUj27ySBhf502TK8quf9mnjH3f4val5UgHo\nRzTuIh4YbyOfSneWGUY4GaeqY+XqaXjOe+aQFfhdygYGead9nLKSpp746noTSrw2Pu0CKyqW\nUljtIpzjcVwOO9WGQM3OKXYejdBzxTAqNCV4zSxRhQ27uKsBQy4Yc0q25blW5HapY7FRAoTb\njJHQ0qx7frVxsYICc+lN8n5c4watDKnltk9/Wj7OEUnBx1yasLDsYMxz7Ujr0Uj5c5pisVTH\n05xu9aV4yjDjNWGUMM4zz+VI0TFSQcUgsUvvM3OAeBSrCPlUjgdTVvyAFzgFqFjYqMjAoCxU\na33fMG+TNJJHlgYxkDrVxlHlhF45pBG0Y2jv7UxFFozuJ280xojyDxV94xUcgWRSAMGqFYpQ\nwvgjOfaho964K4Kmr8cYeEArnmlaFmm6YGOtImxnfZ0bgdDSiPghetW5bcrnAHFIqhkxt+bv\nTGUmt24I7nk1D9nCM6sePTtWq0IKggnGKgNqM855pBYz2gRVA6L/ADqL7G6jONoNankY/wBo\nAdKbJCZOM4qkxWMoWoVhz0/hqJbdFDgjpyTWv5JGeAG9ahkhLLvXg5wfTFVzE2ZmpDuj4O1M\ncNUkdvuTBwW/vVoyWqDbgAA9qj8gqzBRubv9KfMBSWBVYkfe6VC0RbJODWk1vhQqkMTzuBpV\ntByQOKEK2hllTu3KMcYIpvON2eP1rUkiA5ximSWiGMkKarmFyszlhMjYP3+oqRo225P3s9au\n28LpgjAbpTpbN14zwafMUolJV8tyQOT1FSiNfLGF5Y8n0qZrd1mHcY604ZXhhmi5PKU5Nsaq\nCN3pSRs7fK4wCeKt/eZQU4FK8ZLbQu0dc+tFxbFMp5fzOc7TxipG5UHg55JqXbtRhtwT60LH\n+7UE/WndgV5GCn5OajOZGAJ+XPerHkiM5U9TQ0JkyhPNVckrTYaQALn/AGhQu3IDZB9amW1K\niTb8uB60LGzQrjG+gYyOI+cSCDkVYaNjDtKjd/KkWNFw2CHJxU0aFWZs59qLgyBY8tnPNJGh\nmcheAOtWfusAwGOtLGobcE+U9SaAIPJGNxOOfumlWLcpIXb3p8m1FBJ3E85pWmLJheAf1pXK\nI2UzfMM46GhoQ1vjO3nrUnMcZQD60bkKleRxxUsa3Mq8hJs5F+8O3+FcXc+WtwVL4I/Q+ld1\ncKzQsrAkda86upXa8aMhUXJfmuKodUdhfkaTrznlqma4kU/I4YZxkdMVUhCOxYv8x6VYUKsZ\nIXKniszQdqG9rfYmNr4FYw+Rim7eU49qvzN5bqeT6CqBWUTMpIQfeB9asRn37OysqrleuT0+\nlYl87OykjKdxWzcSFUYnJGclax7iT7x6Z5+ntVJCMZ0LfKG4LZ21BdzrF8qL35arNwqi7GH+\nVvvEVUDBS67N8eejfzq0BmTZLNtUiM96zLi4QylFQFehb0rZnYfMcYUdFrHuF2qwKjnn3qgK\n8i8jksvoKWNTu3BSExxup207lKMMYGVNG7lyOB2oIH25eTycDd8+SD6VoSbWYsgHzd+gqijb\nFVpD5YPAwOc1NFuZB8wKrxQUW1t1SNyU+Y8k0okVVAMYXI6g0zClclmweOvFLHGsmSSVxxig\nqwMwjx5gUj1/pVe4jZsAtkN90Y6VOMbdhw2T9001mCSIQPlUcr700IqzYLKSwO3hgKZ94ZX5\nV7Z7e9Suyqzv5eHNRsv7tdy5J7g1JJD5rPswN8ecHPG73qT7u5F+buRnrUUr7flb5UB9KWNh\nLMWZtx/2aYiQyOJlYDblfuioWULIU353Ddj+lS7R5hZgQTwAD3qObcGdSoUDg+tNCexLbFoW\nLA4BHKmut8OXC+ZGyJujPXHc1xjRhQp3tnv9K6bw44jVV3kJnjI61ojGR7z8PblW1CPcCpXg\n+mK9aZR5HyncjchhXiPg26MvlQr8pMgya9shUxx7QDjH4VuZWJFYEABdh9aGmIyF4Bp6xtwX\nHX0om8sttXgUy7CRsSwG3OO9KGV2IAxzy1OjhbaSD9DQy7lGDkd/rQLqEahZCc9Kl58xSucH\nrTY9p6j5qlXbtyysTntSGSKxjkxinKs+5jgH0pqxvI6gHOTmrCqWkwT09KQ7D+NoyPnxzT1+\n6NvU0QxsHJJwOgpN3l5y1Iuw5Vbce7UjKScn5aTlZAQwJbrmiTDJt5Iz1NAg/d4z92mhpFYh\nGGfpTsBcEfNxTQ24DA+bPNAhcyetFS7x/cNFAz9HSTgY5FBJ9KFb5aNx7nBrwT1EH3RxSKcn\nFKe2eppCu3nOBQIaxDPnPSn53Dg85zTflBI4pwUdRSKBSN3pTge1NbPpQ+Vx2zTEP5prYDDN\nG4qqZ5pwbOaBi7gx9qjVgKdnJx/FRj14NAwaQE4APvS7faj+LmjOW4PFBLEVckmgDqRikb7x\nA6ULhV6HJoAVT3FIsh24PXNLtyo2jJpAOcEdKYbDl7nNN27u1P8A4cYpvPrQAi4VMj6UoQqo\nGfxob5eQKQZ2k54oAVVK9elKWweRmmZJxk8dqVgce1IQ7du4HNDD5cZ5o4VAR+NIF3AHNAwR\ng2KezA9BxSFRj3pygsDQSJ2xnFNVfxp+0enNIcIMd6QwXoKXdtJx06UfdUU1WPPFUgBgGI9K\nRuFwDmn7sj0pqlQvTmi4Aqgj3oIAFHC9Kd70AIo9TS7hz6UKvJ9aRc4yRlaRNhcDbjv3pOFF\nGcjgUjN8vSgLDlUrTkBU5pM7lznn0qTcVQUwJoWCkbua8Z/aCshJawTIO5GPwr2P+IEV538c\nLAzeFmdBuYDj2oKPiHXFd7qfPTccelcTfhRIwFd/r9uVuGU8etcZqFuBvYAHtVIRy+pQmPDg\nZ3dKxLpTMBu+9jvXTal8sYyeBWHdKskZwfmzxVol66mFMnl7t2Du5NUp12/Oowe9aF2p+bAz\niqBUXEgBO3b0FWQZzOWUqAeDnB9KgMnzEFMgjg1ak/ePw2Bk8VUJVHZM4brVIRTmjK/MD25F\nVpZnjwoXdu4Bq/MwX5upYYAqjIqvgNkbDkEGqERN8keWG584PtUBjDSY7/3hVx5BI2OjdTgV\nVmYxjcmcZ5oEQTIsmFDnGahZURyuM5/nUrDdkqec/dqNh8rkPhvXFHURFIrNEy7wCKiVxIQx\nXlRjmkmdzIny7SRTGleH5E6VVhDmXczAsAvULUW8gqpX5D1p247Ud13b+DjtSIjKzbzkA4Bo\nKY2bhlVW4z+VNjBikZTgknIJ6YqQhWUrnnNM2CSPyycHdyTVCEaQxsyOAWPIxUEi4bB5JHQV\nZkUbyqLkryWPTFR7f3eerk5B9qAGlmh2At+FMO5XcMBtYZokUq2dwYEYqO4YFUxnI4zSAakj\ncK652jg09tqqA43sxzx2pNp2mMclW4NOmG1V+Xofmx60wI2bC8DnOBTfmU/KvzVK2FBAHz9R\nTJiyKr7vrigER4BX/b74pPmTKvxkcYpVwrNtGM8mhhtYq2Tz19qLCG+WFyMdOme9NZt0gPI2\n96kfEhZ8/h7UxcKpQDcSKQCK+5SwPy7qHMjA5Pf9KailYsY2DpShQIySeQaBjtzSQnnhaiZQ\nqgluvNO3EZXP4CkbYMgfMD3pj6ilt3fccUwKvAHKk9KU7cDaMMOKUsgjwODn71UDGsu5Qi8f\nNSSxlZCQwAApJQ0cykNxj7tNbDZyPmx0qSbicNyevoKDwvDZ7GiMfLlvlYd+1O3byRt4BqgG\niNHkXaGx0yKZtMLSLnexPSpowORnA61Hkuu+P5iTjdQNBt2shPTPQUcO5K/dzSIxjVi33um0\n0io0PBOQwzmgQ4LsYnsTxTHb5sqd2T3p23coG7gUjZ55465FACL8xIBbI9KXkqdxwe1O52hl\nHHfFN5CtuAPpQMWZFfGWO9eaQrlfQnmnbSq5xnIpMDaRn3B96QCMzZQqPnoYO21in7wjOO1K\n7fdTv3xTTwwDg7VGBzTCwDZu+Xcobj0qLcUXYR8xb71PLbiqgZbqc0SKWwM/LnPFADXjdWD7\nlIzineYTEQf71NVQxZipBpdxRGLcrin0F1DzBk4yajRCT8uDgfSnnbJGRvyDyKRsqwcdcYqR\nhGrNkDCq1I3+pByNwP6CiU7duOmMk+lCDdyBuB9KBMGZAg4yxOQe1GQGO85HqKNu3AHyjvmk\njQKzkZK+hpjFkUfJnk9qAu4GLO0ZzR91lLHgc57UsgMkm5e4zikISZxKoVRyO3emNuUAg0LH\ngb8YfOOKVoRCw3fUUwB1/eKB0bt6UNjcSeufzpq+Znk7cnqfSn4C71wck8NQAx8/KBwM8j2p\nzso2n+Lt9KTA/wB5s4o2fISwwwPFACsyqAm3Azu4pPlDt3yOBTg/zYxuWo+Y3Gemc4oH0HcZ\n3Y4PVaftZp8gfJj71Mb5n+UZLdqSRgrBQdu3qKADhpCBnbjPNIN3+8ev0HpTmJ6hNxzmkDGV\nWZfl55FAhrb9o43HORmlKsztkYbHT1pqqzNgP70vzMrDOW9KAHbcEH7r45BpsjblpZCdqjcN\n3f1pu0/TmkMFVnbcv3qVW8tSFOOafvGOBt/h/GmSsUiwUyc43CmIiC/NuK7hnipPM8tiTjrR\nIzMVCjaMDpSSMMhvvjPKmgY5X3SEqAD7d6b5YXOTlxztp7YjyVHytUPknk7t3OcUgJRkYLcZ\nGeajDZkIUduQe9SMoZhzuj7fWmMp3YzhyOV9qYDk8t8Apz70u0RbgFUhufpSLtGME4HY0js6\n4wMgnmgQNg4AyWXk1Ht2jeWOC1LysbbAWOetGfMGOwHU0hj2RemeBTAu1cFsnNCvuVmIxxjF\nBwuGc7lIwKY0GctlRkjrn0pJOjYGD1FODHd8uQAO9Mj2rcdzkd6BDmb50KkNx1A70wf6s7jt\nXP61LuBYnG0KOPamMQ+1yCT3WgRHu3KzMM4H3hU25Rbk7F3DkUir94IvU4PNMxGoY7SxPANM\nB2CpEgOC3OKWMj5Qx3bsgZ7VCDnGQWK8VK8ZKgqv3RSAI1jG4tyPukU0x+VhWz7fSnLlY920\nA+lJL5ixqWHO7ikFriMxkkARsADGaWaRducFlHygL3NPb5SCwAXsB3NMiUsuArL1NMBPLLfI\nPlJH3qdJsSMKwwQcf/XprsGGNxPan7f3ZOd7enpQLUa58yPbnkHO6hvmwGPJ7UbgGx/CRjn1\npFIaTJ+6o6+tAxJGXeoUFmHGMdKTLbeTt5pzb1k+Vwvf8KSTDTM+flwOaAGu27BUnB706THy\nqPkI5NDZ7AbN2M02RQ8oYnGDz9KBisyqwU9W6UsmVXDfdHFJuLunoeh9qGwzFSO+CaAsMOFV\nSB838/enf61QEJD55pSeSgQuQOD6Uixv0GSQM5FAhWULNzzjilVf3xOTyMdaYV3KoIxu6k0u\nwMGIJ2r92ncYKu6VRjAXqaYqkN1ymcCpMBlOzO49D7UImNwAzgdaLisDYkm64bG3mmk+Xlfv\nHoaGwrDuW709oypPTA7igBpUeWoAOfypECtID2QfnT1UbTg5ycgUkikIjs2ZG6CkSLtViXx8\njdqa2wZHTPQUSu0eB0JPPpSs259pBOenFMYjKVj2j5WPXFR7VX7gI4xn3qcvsABy+KgkYFsA\n+/HQUgH7fMUM6fd4+al4yTkketJliNrnIzgmmktuCZG0HP1pgL948cjoaeq7chTuGOPemsxV\njIF25446Cm+WNvDe5pFCrHtkBZ8eo9KXaWKBRy1JGT5mSN645pdxbovQfjQTYTbnAK9eopuA\nVjHIYHvUoYyARp8pHPNEzOpXI4AxxQO42RgrbQvzEYFG3zZgAMYHzD3o+dWGQAe/ekBZoWIP\ny5xTSC5GSdzHqe4pyhuHYEr6D1oTDKNv0NB3GMhW5zxSExNyrkH72OlLkLnHXPSlO3yd5XLg\n4FNIJySdrdcUwCR1ZSQOO3rmlP7zbwOnLelO3KqAZ3N1x60yOMLCH3feOeO3tQK2oE79gz8q\n/rTtwyWXucdKblW6cvnp2pRtz3GT60FCeYQpyMD86VUDP6jGc9jR5hEjgdewxxSfL5Ybp9KY\nuorBnYDJz1FG53VxuyAeeKfEo8tiG2vjOfSky3G3GSKBiMd2CvYc0xV8zAxhOtGCq9c89Kd5\nbdPUZ+lIBsq5UsPlI7e1P3ANnG4dhSbgi5A3A8Glj27t2D1p3Aj5VyByV5xSbR8ykcdQ/vS/\n8tCzHcrD7o60kkfmD2yPlo1AFIXJcZ47UpkVXXaMcYp2CvQYXpikVh5RITLZxzSJfkDDAG3l\nenFNYhuEUj1Jo5XbxtC9fengryAdxPJoGMVCrc8gc80pgbyc8Yzn8KdsVsHPTv2pqr80jOWC\n7eQOaQDtqBnwfk9qa0iswx8gxjNLu+UFceoxTlZuRuU5HXFUIblty7gBjoBzSrG7SHDAH1FM\nVfIC7iDk9RSouVII5IJFMBA3l8quOxp0mY1Bx+VMwTGByeMGntGFjVAx6ZOe9ACbi/RiFp0b\nA7v4l703hpARyuORSFlWPaFwWIK/SkAL+7YFuPRfalZxJjPy+hpdu1+vbrTFYbl+TdnrSDYf\nIo8xeRtx6dKTIkwC21O+KVSWkAZSVxmmrgEts2ntQAu4FskYK8K1DSJt3YPmZx7UNvkwD909\n6Ukrw3IHU0wDhc5GQwpcL5YZDtxwR70jFVXOC2eRQzBdpPyqei+9HUbEy/DdCDnNCsxnOBuG\nNxU0Mp+Tc3D849qT7mceuOPSmSNK8D+E5yBT2J3Atk0MVZtwHQYoX7pb7wpbDuBX5ic7VpR8\nzDa2fwo5dS3TFJtO4bW7c0hiMwZSO/8AKl3bl3McAdGAp2W5+QAUhbaOPvPxTENZV3BmbGed\ntHKrk4zng05kG4LwMCotqsN2TgUtRjin7xTvznrS/wAJxlfek3bsYI54p7OPMBxsK8HFHqDI\n1jCyDdxxkinc78jgf3qc0vUqMjOOaXnzNrc8ZBpAiNYztOGyT1pFZFOMbhjAFSxruY7x83bF\nMUMp3YB5qkDYqwhWzuyMdD2oXcvAztBznNIyrtbKkuxxSsvllWzgDgimIVk+UsCSDzTfMwxD\nHGeBntUuHZshgENM+/NvYe3FADdrSMFAJIOC1SMhJEeRkHPXrSgeTnk7utIzAs2fmLDJ9RUj\nQxl8uPO4sd3K+lK0axsTu3Ht6USL8seDjnpThny2yu4A9O9MGJtLMW8zGF6/0pM524GJD0+l\nOwMZVCePWhWkZF3JgKeHpMELIR5ybhyopF43DdjcfyprKWbLnhuvtT1UR45HtQIY3GSD071J\nu3KMLkdSKYuJFbb83OTSsdsXBwxPUU+gA+GTG7Zz0p0e3zMbsSY5WmyEr8zgNx2pxxsBiwQe\no9KQELR7mwuc5/KpFxndjnpt7H3pyxn+FwB3pCpMZVTgZyPagBNzNkGPC0vmE7vl+8MUceSF\nbcxU5H1pOclVXG4c5PSmAu0NvCg7h1NIrNuAHyjHTHWnTb4tqggbuu2hmKzBWJU9M9qdgGjL\nkHGFzigqVYvywBwKe21WIzuHt3pfO/gUEjutQAejMM8j8KOQzc4OelMYFs7TjHrTnYbcbueM\nv6UxAqhiwDFWHOB0obEmY0H3Tn60MDHLvB3EjkUis394fvBnCiqEEWGly/ylecHvTi26bft+\nQjPHakkT7rAg445o27cruwG59qAuAkzJsdccZU0/leRwccim7RuUd+nNOdMthj8p4HNMBI4i\nuHZv3Z71KFAZxnAX+PPao22NG2Qd33cCmzf6QEBOGzyoHp3pMZLvA+bZhWHDeo9acsiwgr2x\nx3qNXXGNhAB79/epVXKEr0bp60kA7aflcZLNwBVyGN0+YcKODUMOI5FMjb+P4e1WIQJGPUrn\njmkxpkrZWMEctn71TbVkUOGAOeR71FsWNyGOHH8ParVnH5jEFM4PT1pAXLfa209X7+1aS3Bw\nFUHbjkAVRtoSsTB/lZjx7VegiEUI2nLY5ahgTxyI7YCbzjO7P6VsWe2Pnr6e1Z9tAUiVgM7q\nvwxssmc/h6VIGvZg+YjYwua6qzjDLx1IrmbPEKqzncv8q6bTGWRQyGs2M6HSVMTRKRkA16v4\nViEhHT7uc151o0CGBBtw/Uk9q9P8J25ZYwMFDgHFZSNIo9A0XQjrl5ZWIQt5jhcYzxxzX2T4\nf0aLQdFtbGAbY4owB+VeMfA3wvHcalLfmHcYEC7uwPeveZVO3g8jrWRZWZsdTTFIH0qRow3Q\n80xOpBFAEnTjNLuO0jFNXGPSgEjPekUTrnbzxSSZbrTFbcMg07dupkkHl9eeKlVCsYwadx0x\nSKcLikMHXdtzSbRj5hkU5m4xik28HNIAjVGySOPSm+WjHmMY96kRdvSlPUDrTAqyWMM3RVH4\nVVfw/DI4OFBz1ArU5XI6UjcYpMDCk8LwSbgVBPSs248IwTZ3RiuuOc4/HimSDc3TFA0cDceB\n4s8qGTqeKz7rwAszMyx7l9uv4CvS9hVc4yaiRcuT0qhM8ivvhnazRkPDk5/jUmucvPhXDMzE\n2ihRxtA4r6D8lG5IzUbabFuJCAKe1ID5Y1T4LxzSMywunoccVzV58F5FyqFlkz3GRX2A2hxk\nHjOfWqUnhKKYHACHsp7UtRnxnN8KL+HhoWYnjbisC5+Huoxu4W2kVV65XOK+3ZvA0RUZQHb2\nFZl58PY2V1jiQZ5O5c1VybHxHceELm1bDDPfABqpJoM8bfPEwX1r7Hv/AIXw7Q5tUOf4QoBr\nHu/hXDcRsr2Ww9sLn8apSY+W58ifY3SQgoQMct6UyW1YMq8k4zkV9N3HwVhK7WjVBjhmHU+/\n1rmNS+CT7WcL5JHG4Dj6Zp+0I9meD7mX5QuT3NIpO7cMCvWrv4L3USnadw/vLWHdfDG/t5HC\nozbf9mq9oT7M4Roht9c8mmY3Mcjp3rpZvCGoRyY8hqzJNHuYY2Z0IUHrjgfWrUkHIZq4UcjD\nU4EBeatGF5DnGFHcVC9rJjKqzH6U+YnkYxpOmRilV1V9wHOKUqWkU4yQORTmUhtxTH4U7omz\nI1bL7zxS7dzEA8HnNN+82COKft+XIOPUU00FhFjJIyc80eVuznmpNuOR0o3HgZAzT5kFmQeQ\nsmTyuOtPSMbdpH41JtdW4Ix3p/y+WB0NHMFisdu/hTimtCA3DZHp6VYYquPehSD9aYyFlGMg\nZprgMvfd7VPtP8PPtRGpy2RtpiK5j+XOM+4pPI3Kc8DFWWi2KVx1qPy9uM9DQFiovy4yuD2x\n0qVsjHmN71ajVVXDevFOeMSdsnGabIKEkYkyecVGsY5ZV9s1dWP1IHtik2jayY/KlcdiERFY\nwpbP0FM2M7e1XYgMYIyfWm7fm5/Ki4WKqw8nPfimSQiNcYzV1YTtJPApvkjyySN3vQFjO8hZ\nJOTzUvkhV2FMirW0AA9+1OZSwxmhAUBbkHkd6ZPbgLkZL+1XjGynJOaAi5LDmquKxlfZVEeQ\nMc9PSnpblW8snaMZrRa3O0kjr0pv2Vmxu/OgVjP+yhyVxketNaFo12svHtWl5K7ti06SHzFI\nX5cetA0jJW3dmXA/SnzW7lhj7wrTWNtoXaoI71EluGZ2JzzTCxntAxG0jrTPIHbr3rQaEKuT\nnI6e9N8g7QTxT5hWKJiPlnJqNbY/eJO0citNkVui1GY84XuO1VzEuJnyIWH3Nw7mozHvySCF\nFaX2ct06Un2ct06YwafMTymd5YUhVGSR3pWt9vzA5bpg1daEqowOVPWmmISNg8A85pqQuUzv\nJK5I6HqKUxA4A+U960hAFYEj5abJD8wAXmjmFylJo13Ack9qQJ2YFW9O9XF67Qu7HFKYv3nP\nXFHNcLFE4YYxjHelLFlAHfg1b8sM2AM0xbctu2jOOgp3HYgWHy24/CnNEAodiGYdBUqxLuVm\n4FJNCJskcY6UrjsQq3lvyvPegxsybnGOeKmWLMe4r81LuG35h+FIfUydVuNtqFUc7s7hXn2r\neT5hYELN1Fd7rQCqRwq9RXnmrSImpFUTcpXG6uWR0oqx5/1uMg/exVkSGPBGTH3FVPMeNgsP\nzDue1TxzNJ8pGGHPPeoLHXl0sfATey9aoXEu5lkk+XJGAf0qzcOiRl+rdCveqNx++u0LJ8oG\nQDVIRXvMK0mU6fxe9YV9uw2SFJHKf1ramHnSMUY8ngGsDUrcGWR5DgoOADV3AyWkLIwUANnF\nULjzMM7nCrwCO34VocrulVOegXvWc5dfMkDEN3z61YFK8jaPaSSxPPFZ00myRncZ29s1q30Z\nmjQlirqcgZrNmUtJhsAScb/Q0ySvIA7Mu3BHIIqJXDMMtyOSvpUku6CXb3zjPrRlS3yHaB14\npi0BXKtvZfM9Fq7HJ864ixn7wqoZVd9irk4ycd6lhQtyzlD1+lK4y6cKu4t+7JwB706TOPnk\nCp3x1qJcuNikYI79qXZt2jIBxglqBjnbyZonXDL6etRtukRg/BboKkjQ7Crr84PDDkGogFVg\nWb/Zx/d96BDXAbZtfCY7iq/nMsoLJk9kB/Wrcijbhhu5wuOlVvLEcYQDL5xmgVkMYl42aX/v\nmkjbaEMY2v0zTlwkzBuijOPWmKyzRbtuznOcdKYDmhbaMnaM5z70LIQxWQZYnPrTXJVhyCoG\nAaSFlcsxBUDjHp70yBokYSOV4XGBW9oruWjSUZjUjDdPxrHhXdvVl2KBgt2Namn+bJKobb5J\nGMA9apENHsvge+ikYzIuY425YdSa9/09vPsI2AwGAPNfNvgOZbe6t4oomRZec54PNfRujzGT\nTULZ56VpzGVtS1JGQclvypBH5jfN1qTbhV43E9qWTcpBwB2x6VoUMVXjjIDZ9qAyhdrDGeen\nSn5PlnA4zzS/OFGFyG4oEEa/KCP0qaLezED7tRxL5OC/r0qdlbeCDtHU4oKHgbmDjjFPaLaw\ncHGe1JDhc9+e9SEDdkjJ6Ui0Iu6SMr0PqacsZwvpjljScBNhbn2qX7yKp6CkMaFywYLlexpW\nA7kD2qYw7owS35VFsz9aegiNV/dswHSjIXaf4jUki+WpjA565qJmXG4j2zQSG+T0FFM8s/3q\nKLCP0jZTuyv40Ku/mjnmo5HKDGMCvAPWJMgUxkLd8URtgc9KdtzgjpQFhu1VY5Gacvypgfeo\n+6eeRSt8re9AAueRnmjnjPajocmjHB3c0DE3ZwKkUhe3NRIoPfBpVz60gJMHJNI3Xjmk3ds4\npQwA4oEDE8GnKB1PekXHJPNIwIbcOnWmSDL3HWm7yvvTzk/N3pu07eDzQUKudwPrQPm3expQ\nPl9hSLnb7mgTFz3xx3pB9/FBYrwRmkCnrQFhMbWIDZFLxyM8Upx0C/jR5e3rzQNCFTx6dqeF\n+UFuaOOlIc7vWkIc+PLyBTerDApxYDg9MUbtq4xzQFhWUjnvRuPUdKaeh9aTBVP6UxDm60Li\nkZty9MGmbivA5oAlOKN2fYUxW3DNBYN2oEKy4680qpuTpRHlhg8GpVUqp3GgZGoI7UD7vTBp\nd23kZzRlyc7c0ALxjAFIx2rjrS7ucUKvc9KBDcbhnp9Kcqj0pT8qn3pGJ2gCgQ33AwacudwB\n5FN3dM0L9/IoGWFY8YFcz8S7ZbjwjfITiQR5U46V0ithlrM8aWa3Xhm/XHzmNgD+FNAfBHi6\n38ttytjnGPxrz68nJmcEYANepeMLd9zZTkNjGK8v1S3Kzk4O1v50ITMHUnDOyevNYV4jx8dB\nnI966G8gy3PVeM1h6lEVcNnOTgitUQYlxHuRgCVJNZVw3lzADkjvW1IwjzuOBjJrIuPLZsjq\nec1ogKkkgSRgF3Z54qnJGWmJ4HGcVPPhZDj5UA5NQtJ5kO7djH60yWU7hi3QfNniqbLtkLEY\nXqauyMZHGV5HpVXbtkkVhuVulO5JAZVEbv0GcZqGaTcycnDLngcVKYysZAPC81AQWkGc4UYJ\npgQI24OyY+X8zR5oj38fLnDL71IpXDblGO1MkjCrhemNxyKYbkbANt3DB/h9qrtkfMTtXPen\nn94AC3HXdSu0fl7mJYdMUCsV5tyxqM7gTjgU1JFhBRud3TFS7SVZs7Yx0FMkbbhuCpFUPoNV\ngXYEFuOnpTWbbliMDHOKGmDLlBhhzSZDKW696CR8zFvLI4yOtNZgqknsD+FMZjIrYXOB3qPa\n3AYHaw60FWG/u/JV87ix4pGyye2fujqaV1CxqOhzwvpTSSrLhd2PmNAhsmVGBwG6n0p6xv5e\nN+QBzTZXD4XG0E7sUsmOcAqevNACqqfKHI3nhTUsVmJC6sMFBk+lU42VmD9CpyFrWtpyzb3G\n0n72PSmBjyfvGceWQeg9hRmRF24D98+lW5mWS6m2fLG3QVVA2RYx82fxoY9LDGOxgGUncONv\n9aYit5ZHepWkEfHU4zn0pu4nGG+c8k+tNEkfLRnJJ/2aQncq/Lgd/WppNvmFs846UwndgDmk\nxjGXbKeMnGD6U3leAABUkmeADn/aFNkb94AOaBiKFwCGAejauznnnJpPlViQB703czZKDIHO\nPWgQnllpCcgtjJPtR/qzk8g9GpPMMsgAGw980reWqnb84zzigkaytzyMP/Ce1IZCPkPX1FSM\noZuORjqaZkSbgV2nHXsaaGMCt5ZDdT0FSxqyxqv3VHNIzNsUnl19KGcsRycN1oGNb95uZjzT\nV+dc44HrTs7lPy9DxtNMk5B52imA7b8rAHrzSfMDkEYA6NQ7hNjD7o4xQ2VBYLuPvSAA37sl\nRuz2FBxwB6Y59aI2YhsYU/pS7G27VwxxnigBYwwjbc2StNZSygBed3Azik8xcdwQOaWT5lG7\nmNuaYgETBuDznPNMbDSbSCTnNLHhQX5J6bTTpG3bPlyO9IBCzyFQuAepJ9KZICoI3ZGc7hSq\nQM4BXnhjS8KzIedx60ARsp2Ehsk9BSnEaB929V60MhjJUjB9KCrH2HXb6+1MErCsNyRqAAQM\nj6U3y/41bI6EURxs54/H29qdtC8D5MHrQMjztUd+eaFLeaUB25GQaVFG7BYnJ+9RIsnzBuQO\nh7igQoRsgOeeppFZljO4ZLDihlO1dw39Pn9KPkzggkDoaQAoJj2kZGMmhZNzD5vlxilX7xAO\n3cKjVVQ7Q2WNMBdreWcnjPGKV9jYZSeB3pJiqrtDYb+7ShCqqMdaABX3L8wLGmzTExhR16n/\nAApzSEMcHGD0prY3FguAeDSANxjKsBtBHJpVIWM7jvJ9aRSFjG5cjrzRGzKxLcrjIFMA4QZU\n5Hf2pdwWEFlySetIvy5aMZJ6ihR3wWY9aB3HINvHc01huOF4HcmkwcNg5Oc5oDDnngikIV5C\nrAxruI601GG7dgsD1x2pyHy1BUB2JxTN6FcRA7W4PHei4xS3lqfky5PH0oX/AFhYEIcZOaGU\n+YCScAYzTWwQCBznGaNQAL5WWdSc8hqcj/MH4BxSZL55zzg+1AYI6rjcvQGmAbwqFDyrc+4N\nMT5W45Hoac2C5bbjacfWlC/Oc9+gFLUBm51wduBnP4UN+8wF+cM3CmlVtw2jPB5zTV3FmVxj\n+JSKNQH+YGwcfe4HFMDBUIY5ycCnROfmJOcDjP8AKoVXMwDjGRk+3vQBI2S2ByAMkU5AV+bH\nzMOGPYUKyEF0bcBxnpmlcBUwuVI53UwG87eBux1NLvbJKjcmKR5gzhg2MrjGKFxggZFAhM7V\nHlkcmjyzjDfIc/d96XcEj27QueOeopm3y8YclelAxd23IPPYGlVSu35RjHIPamtIQwBxt7Zp\nVVo9xlOc9MdKYDmxI0kaZzjikLs8ZDgLtXGaAxVm5wO9NOVIMTBhjH0o6iFHzKVIwB0poUSZ\nGdpHT3pWXcpYfL60jLglXbCf36AGspk2pgqQef8AGl+Tc+XOf4cilC9ck9KXy1ONzcY60hjM\nfIoOQT1alVVwwZjzwGpFkDcvxjpTW27lIHz/AKUxDvKKZ3ctwODTmYsMZ3EdRQq+ZKRyoxnN\nN+aIYxuB6N3pAG0qp79+e1SLcFlX+7jGRTc75Ny/e24waarDyiDhsHqKAFj5yucnOTS7stlR\nhhzmmqu5GOcE+lCknI4R8YxQMHkDfMcmiSQNGqYwCe3WljVmwhO49qI9zSM23BWgBGXarHIx\njG6kC8KhwRjlqWSMKcqcluSPSmSNkgsOnAoAAwXLBWYZ6UiyBoyE45yc8/hTtxOAflbPOPSh\ncnO1flzigQrMpZCUJUDgCkLbeMYkY8UqnaxK8kDGKR5Dt3EZf6UDF3HaxxtPQkVJGRFGSjZY\nr0FQSZjy38LfrT4/kKtgigQ51fYg4zjkUir5oAzs2npSTHnJOMnAH1pGj8thEHz70AIokiXI\nGNvH4UqhVbOSpIpGDbsfeIPShs7iAvBp3AQnzV4GecZpzbI9z7sJnDUb9q8/KewoZduQPm2g\nMRSFYAu1ePlIOCaaq7h8xIC9DSndsODwuenc0bnYrkcEc1VgFXHl/vOBjK05flXIJ3ZxRIis\nV/i3DpSNtddwbBzjbS2CxGD5bMMHnuaVW2sCBkHg0/5Qx3Hg9aYMfc6c8e49KQgVWY5JC4OB\n707AZZB/H3pkmEXgZ5/KkSNm4ViZG5+ooAczfudqs2MdKRjsj7b++PSnMoVSVyR0ao1iYuVT\nkY4zQBKqqsLSAkhuo701dzRllGB0po+UKD8uD0p3DE9dvYCgGNddzDBxtGacv3gQ2R79s0iT\nLvUFec06JW3cj5Tk+1AESqY0AzjJ79ad5hVdmCQRninFldVGMsnU+1NCiSRlzzGcHHpQUK0i\nspYJtONuPX3qNVMKmNvmPWnNjJYfc7UKzngfMOwoAPlG9FHyelIdrbc5BUcgU7dumwV4xkkd\nqVRtTco4POSeaBEasv3wQ3otLsSOEYO3n7vvSD5UPy/Keh96ey/LGwGAP50xgvmOpXbgHrSF\ng0flgUkjM25UBA/lRHhs/LjjGD396NgHKP3ijPOMYpPlX5AKFXcW2nIU9aI2RlIZevekIayh\n8xkHd1AFN8orhvmHoO9SLhuvy8YxTI2VV6lc9zz+FO5Q/d64weDSrKVYjA9PahTujG8d/u+l\nNkZoycrk9fakId+7YYbI/wB3pSeYdvynAzwfWhZGGJFj4IxilC+Wy7u5zTuAm1t64HzjmjmR\ni/3BnH40K7tIzM3enrtjbOdp60aiGcs5DcEdQaYylVJxjnmpOqsw5yc01l3cAlgRnPpQKwmD\n5w53Lj8B7Uqr3Bwc05ImOMfKF6j196TAWQ4f5T3ouUDL8qorbQTk5pu545XcZUYx+FLJI3Oe\nlRlfmVwTnHNArCxttAX+90NKFTkA8d6UfKc4V4+pNIFTczBTg8gdqNgFblcLwPehmWRlwxXa\nPvU1vuDgrSlSY8E4XtVCDcw6HAJpXXdg/Q0NllTP8PSk2mPJKl1ApDsCsdxMYwD2pWkzywyM\n8e1HEYB7n0o52HpjPSkMR1Cj5AW5zTvvbQp2js1NPTd2pu1lXp8ueAKCeorYKgZyVpV/eEke\nlOGFkJQdvu01gDhW+Vs5zVIdhOflGcqBmlkYtgIPl4yKG2rhU+Zm4+lCp8qsCe/4VIxFT5nH\nIUjHPahfmj64VeBThvdslgvHBojK7SZML7e/rTAacbg7g9MUoYq2R09KCzSKnHmMD92jOcsD\n3we34UwFX5UY9CeaTlOBwCucUMoRTz165FLHgZDchhjdUk2BA6q6t17UJhh8xAxSK21t/UDj\n8qT70gbH4UwEj+U8EsG5pzyCNlZuucAUsjCSMNnaq8Zpd2VGI8nsaRQgUqW5354JqPcFJULk\n+lOkznAwPp3pNjoRIRnPG3vTEIPL27QPm7j0p4ymOmO1Iyr94EE5xTvu5Tb15GO1IBsalWOW\n56ihWLbi6/MeM05U+bcV3cdaTzCchgQBzn0pCE524+4/QL3pzERsBn5cUMpkIPDHHU0jN5mC\nOZF4IpjDIX5g2T2pdh8wMSA/9005shjwNzColj8thk5P8qYDx+8Y847U7ag+QdR1OaSLG4jb\nz3oUkoSq55piFbKEMRuOcDml3YZiR1yvFNBKnH3u9CkKznBYg8AUBYM+Yq7du5aMtywBwx70\n75goJAA9QOlRhfmJB/CpKJGxC/U5x+VMJLLsU7xnJp29mbbjJIxR5ZyqovKjnmkAMvqMA9qF\n+XBOD7UN83LHb6D0o8sO24El8UAHyhnKHg+lIqqdvX5eooVisfAGAckU9W5Zg2OM4oJGbYzy\nQeeOPSjCx/cBI9PWnxfKoPAkY/pQ2dxK5+U4LUw6iKF8vI9cnNPZufu4XGQaTzRuwy8H0okU\nsowW9Qvamh2GycbWQ4Ge/ancMz5/76ojkj+VfLZmIyc9KSJuG8z5Vz8tIQqMQ+HGFxgE+tNM\nxAIdWfBxnFPBM7YXDACmq2yYdRxyTTAUc54468Ubzs3KoQ+tNbLSbgpKk8sKdtLZ7gdAKAGx\n4Z92/C9wBS+WqxlgdhJpS/TJCKeDtFLs2/KVYt+lKwDcnC4GPrSr8qqVGOcU3dubacg9QTT/\nADG6YwF5pgJx5ZePBBOCv9aRWVD843dxSqoUbWBzjK4pEWN0BJKnPNMQ9cM+8jLYyBQ+Wh27\nMtnJFHlFiWHJ6BR6etOVRsPPA/i/pQFhu5VBDHjHHtSjzFuMBR6bvakKluFHy9Dup7Ha5Zec\nDjmgARmLEnjbxmpoUVWEhbAAzg96ZCp2bZG3k8gU9mEjBCdgA6mkMsRBNo2gZbmrUWWbYE25\n6MKqW6iORGYfKO9WYvuyN0IORikBaBijjYE8YwFbrmr1iwj+ZOGI6VnbN3ljGXPJyKsxlfMz\ng7gcUgNFmXzFZ+W9q1bPYse77wbisyG3MgwBz1GTWnDHtjjQDjPIoCxoIxjXG0ADp7Cr0Y3Y\ncgYIrPjhOVff8vULmtWzQmQ4G7POT29qze4F2zBdiNuQvPFdLo1vEzAupJ67RWFp5aPPy5JO\nM11WhwtwCvzf3ql6jOu0ttzRw/dyMCvXPB+nvG0EaDcwI49a8z8O6YbqZHYfd5B9cV7Z8N9P\nOqa9YxDIBlXn05rKR0R01PrX4UaP/ZXhWAum15v3nTBIPrXYFgzEUyxtRb2scfTaoH196e3y\n9uayEQ/d69aQZPbmpuq89aZ5e45JxQBGB84z0peecDmlAxzj8ak6qSDSKIlUhsYxUqnjpzSc\nLzzTvagBtPXDDkc03gH1FOUHsaNR9A6H1oyD7UdwOlKy460xCHOcDNNVjt96f0PuaMbmweKT\nDoM+br1pV460n3W6mnEj0pdRIcvc4pjfN14pQxUYxxRnqDVDuQsGIOKRVAHvT9vHBpVU7sZx\nQIRVAjyOtSdvrSbdv1pFJ5BoGOYbcntSbQOTyaFHGDzTiMjpSAj3EA0h5XJ5NO5pu3nJ4FKw\nDAqScON1NksYZOGX5fY1Ls3cqMU5TkkfnTGZ82kwEAbB+IzVS68K2867cYXvj+VbPtmn43Zw\neaAORuvAsEn3TgdMjis258AxNkRqpbu2Otd8EKscnNCr2I4oEeXXfw3ic+YbZWI/2elYOo/C\n+1kYgwbFZs7VGAK9zEXeoZLdJj8yDNID5vv/AIPRuJGW2yMn5sdq5m9+DCz8LC0RbkMO+Pav\nq6TToWUgLg+tUm8OwSKQVBPYn+lAJnyDdfBuRdxhPGM4K/NWFdfCu+aIhYm3KcDdxX2lceEb\nWddgQfgOfpWZN4IgcECL/gJqkB8TX3w/1SzODFlem5QcVlSeGb2H78W5v9noK+1rjwDHIreb\nGS2fXj6Vhaj8N4JDzCFIHTbnFO5DifHsmnzR/eRgKqyQ9MjGD9K+q7z4T2cnzfZFDe4rE1D4\nQwXAOYAidMbRVcwcp847SFHykc5qHDbjlT1r2vUPgvHCrGNnWQHKq3esi6+Et8gyiiUMONvH\n50c5PKeVKyRsS3NN+83AwK7e9+HF9asxMIxnB25JrMm8H321mETEDgbRx+dWqisKzOc2ujYP\nWpegyeR3xVqfR7mPIZWyDjHWkXT3txh/mFVzk2ZAqjcAWOO1IYRtxu5zmpmhdPlI980xsDjG\nT3qlILWGuvyjtTcMzgUOpLZz+tG5mz0U9qq5KiO8vjBxnNR+WFd8DBp/OwbuueTTSzr6E5oK\nsCKYxnHymk8veuRwc5qTc0nsM9KcY/mHakSRLHtyWOQeopI492RnA7Cp2A7U10DgH7pFUBDI\nqeuDQoWTIzip2VN2MZFIFJyTjg1QEcaiPgnmhk28YABqRl8zJx9KjMLSNubOBR1AZgBtp+Yd\n6ApCnAwKcvG49aljjY4J5HpQIqxRlW+fr2pFVpH565q60e5h2IqNozuwB+NAyF4CnzKaj8r5\nSO1WxzineUOT1NFwKi22F+YUSW6rGKtAeZnso7mmvGzLnt6UxbFNcjH7vIHek+z/ACFyMFjV\n9Yi0e0cUz7O23axoApeSGO3GBR5O5eOmelXPJJX0pvlnqo4FArFRrZeQep7Uw2Py7cdqvmNW\nPIwaUoVOM0CsZ0du0cO1huNKIQQoPD1d8vaD81Isf7vd17c0XAoNb84QbW/vUn2fzGbHQHGa\nu+WFOOimjyhvOz7oOaCWig8WQdo2kdaZ5J2gd/ar5h3K3OMmljj3ZT7px96mJIzhajkAYxzT\nXi8xSNuB1zWmYSikE7z60mwqvIwPakVYzYrcBTjJIppsw6szDDYNaXk9hxupJI/JV8jcSMCr\n6BY4PXN0Mylz14FcBrCj7Q25wjdRXdeJLxZI9zLlkPT0Nee30yXEzOVyRwM+tczudFtCC1L2\nqlXyXc54qaPcqvn689TVWGSW4UgnaQfvelTxr907txz97tQAmPMViR1H3e9VbqYxqq449atM\n0luxBIOfWqV2WaPCqCScYphcZIyxsoUFl/2aw9UjUFsbi2cnI4rVaYBgqgh84NZ2oIQ7kuTn\ntTSAw5jHJM7+ZhcYwprNuHPksqr8nc/1q9cKsOTg4zknHFUruTCkAY3cj6VoIpzD7S+Izz79\n6pXWHBjb5ucnPY1cn68jY2KpywbowHfBBzjHNMRUmcFeVPH8VMB6hiMkZFPkUSLuU4UHBX1q\nHywx+bhj0zQSS7ioBxyeKswyMrDAXd61SYhZlVgcY59qlWMOeG+XHr0pAaDRjaWGI1Y+venG\nHc6c7ge/aq6qpVYhl2PSpTH9nbfhvlFBQ+cglCBx6KaPMeR1BRemR/hTmKSKrouTjjtUSr+7\nTBy27B56UADKWyQ249WHp7VVkfcu1XBC87e9T7ipkIyR7CovM+UMsYaUjJx0FUgK/mbGVsZB\n7EdDU88f7xZUO1COV96hWMiTc43Kxz9KVmXcM5IJwfamSKyl2GRhT/EO1RjHO8EnOBjuPepF\n3/MmMDqPcUjFG+ZCcrxg8UmIiSQrje3U4Vav2gLLudWHI6HvVGRt7F24C9D2zXQaWqyXEKOc\nDIJyOKpCPRvBd2YZ1Z49wwMLnlMd6+kPCsgOirj5ifmUeua+btKW0FxGtpLuVmxv6c9xX0f4\nV3LYxxjClAOlWYbGwrH+IYpuw7mPXtUjKCCGJZutI4LbWPCitbjDHmIMDA6UnllOMlcGhVO/\ndyFzkVZJXqTxTAgMImYc5apVhPmHrwKVZPLbdjk8CpIy5Vi3X+dA0NjYhfn5A6VMxWRlKnHr\nQiiTAA69qm8kJkEYpFoZ5ap8zDOTxTl7lhhafuDRjIxinA7+o49KgoZHGWUnP0FKgLSEEc4p\n2dv3Rikxtbd1JpiEMYQ5Zsg1C6pJtQ8DrVjadwLAFfSkZdqtx17mqFch8se9FJ/wI0UhXP0d\nBBPWq80cm7OcrU+AnJ5peOPevBPVIgv7scVIzfKOKXaWHWk7daABcN14NJuGTxzS7crQTtXN\nAA3Smu3yilVyzDjIp2MN2NIOgzGcECnHO0YHFA+Xp1NHOOtO4hGIznHNJuCtyKGXccdBUUit\nzSAtRyrKCBTjjByelVoIyAD2qVulBI5G/u9aYc4x3p/AXIGKdwozyc0yhuc8CkHyjJpeFXI6\n03t60MQu+lHpnik8tl96M+2DQFg3ZzjNSbsYFNRj6UcryaAHbeckUKwDZApN276UbTtzjik0\nAbuueTQfugd6Qr+8GOlKSBgGkr3GABI680qkKeRzSZCv7Up9T0qiRJAW6d6YylcY5NSEnbx0\npobccZxQA0cqfWjaeMU/aeQBmn7Bt60AJg8dqd0OcZo524JBFN6d80DsOzz0zR94YpN20Z7U\no9aBCdME808D5fambttOOVXjpQSEudoGOaZtOBk809W9TUe0tkjmgA24PPNKyhQOcUnOORSt\nyo70FD1YY9cVBq0bXWlzRqMhgfl7n2FSr8o5qaJRIrD2xTEfEnxOtWtdavkKMvzk4Ixt9q8c\n1JtqnIya+iPjpaC18QXuF5Z+PpXzzrSnk4xg0hWObu2wpY/KM1gX0hUAuMrnt3roLqTerBgN\nueRWLeIGyccdhWsSbGFJtY7GX93WPNCFkDKcg/w1sXLHcQOmapzMFj5XlfStBGd5IZWUYPrm\ns+RUikKsu1+oHatCWQMpAXgnmqVxh3VcksO5qiCtIjLtdeMnBqqy4V8cEnGas3LFj5Y+VarS\neWkMiLy2MnJpiKnl7Y8ucgHt6VDtVWHOM9M1M0wCopGBjnFReYjKSy529KYEU8cirn5SG4qt\nulVOeWBxn2qzMVfjGxsZ61EjL5W0HLZzk0yStJCCpZe/UVGN4Qt2A6Gp5sSJ8ny88moyu/AB\nyDxzTQXZXjO4E4Jx2psybgMdSefarnlFWHy4qLcrZVlIP9409Sivxgdmx+dNZSqhCu1j81WP\nLDIAeCOB71G24qd4yw43GgkhVGVWLtgdNvc0BT8uTlVHSpJty4ywJAqKM7izPleM0DBZlVZH\n2lvRmFNZyQqY5AzmlhXzdwUZGPu9jTVJb5M/PjrQOwbg7bsDA601mEkmVGfrSNnyipXJB7Uu\nQJU2/LmgkgZlyzfdJNWbeQRNlWw2Oc9DUZhKs2QGGc4FMa2K5kJ+UmgZNNcJN82NhH61V/1u\neeeuaHZnf5SCByTTZGKLk8bqAHbQ0at2B5HrTVVV3Ybj0pckLgdPShotyk/dPpVDGnHVuG6Z\noWTB2Im7Hanoq8bjubHA7UiZGc/KW70gZH/rDgDy/rTNxVvlAzjv1p7KFVlJ/ixTFjK5I5FA\ngSPbhjyO9JhfmVTxSsoH3QV9+1KwY4ZP++TSERYKqARt561IMBSBgd6JN23PEhP8OajjkK5b\nbgn5ckUAM/hGOR19qdKS21QMMeTTmYFShPPbHekfhd56kYwKoCMsI39s9acAJUYg5PXHtTRj\ncobkYzTo1LqXUbeeVzzTGJbsFkznaMUFlbK7eCetLCu1iGHPUU1t33yuG7ehpAJGqsORzjNG\n8SLtY7eKTcXZtv3QOR70q7WjBK7Sozz3pgKV/djYPl7imbhHnBYD0qbc7JkHGR0qCPcfmP3s\n96Qh/CKxjbdnqD2pgYSbRu5PTNTKMqSw/KmJkM5wu3HHHNMYfMdoPPOCcUrBo2yTz0xTUlZo\n1ycYPSnbdw75HegCM7jnj5f71LxuDDJIH3qYdy8Fzj6U9WCnAIIP60hApH3s7W6ikkjb75Ge\n9GPmYHn29KWRmUZxkdMUxirjYOeCckUm0xORGN49+1K5XAOeg6U1vmyBuU4zn2pCGuC2Gzjn\nG2o5JChIdvvcU/5TtD4HGcevvQ0nyAYHJ43CmAMglCjzSqqMBcdaMFtvPG3tSs6p8wXBHFG4\nMw44pdRakTbmUMjYPTmlwFYLu3N1p0i5DY7DtTMHauRmTGRTGLG/ms5UA08tujGeT3HpTSWy\nvyY4yRRDiPOBz1+lACSKWjBX5cHNDKCNyNuz1FDSE5YjPrTWO2MMmSf5UgFZTuQE8gUpYryB\nx0zSNmRhg7u9L3Ut93tTGNhyqsF4P96gKM7A2TjNLJuYge9MCqJWCgk9S1ADmdiVDJy3GQaX\nYsftt45oIDclsED5RSNsOWPLKu6kIc6HClMbSetJud41bG2RfSljZWUEgqeooTbHjA+Uc47m\nmMhXDNlgSDyRS58luBwxzinszSdCoGc//WpN3lqzE4IoAM7lYL164qNshE+U9efan8qwZ+GI\nyqjqaWNxNKc7grDHIxzQAkjjdgHk+lIpZQrgckc57UvkqrDJ3beopVk8zoNiAYxQOwZG7oCa\nTcNu3dhicc0xWUqGIyoPNOZj5mNvBGQvcUEhJG65GflxyPU02TdIAJOGxx9KUksq54YHkUfO\nJm3DII/SgY1W+62zOP1pWk3qCxwCelCszJ8q4xxSb9x4G4ngCpQiQv8AvAu0FaZ5hRmBUHuA\nO1Lyoz0x1WmPy2f4jVDHD5mJYZ4zz1o+WNVBOSx5psg2uuBk45pw+Zzgc4xTC/Qi2hcsy5AO\nAaX5X3c5Yc807adpI+8vWlZjt3lMMw69jT6CGhiVDnoRjbSMoVQo+VjTmYHb3bGKQcqrH71I\nBThmCIPmFN2/eDjd6fWnyZEwIYBT6U2dWjxgbhnJPtQwEZQyqxb5jwRSIAIzu4XpzTmKqQwI\nPOee1I7bXyyZB7UAN+RUB2E+5pfMVscduDS4OwbuSwzxTdw8tQeBnFADgzRqQp5/nQ0bKqqW\nBb7w9vahvvHsR0FNwGYkNg/WgAaQlkbAHODinR4wXwAh9KbtwhAIUk9W7U5SNqkDPp70CE58\nsCM7Mn71G1UfJxnoT2psh87aw+UA/d71IylVCnG31pDG8qxCnHFIu5UHIJ9uaNwY5l4boKRi\n6cdB1x60CAZ2lcZpywuq7QwYHmhJBgkDB60n+0pxQUDDDFmG3H60SF12gsVUn0pVhb5nLbtv\nODS/wlHOe/40CGSDapK8HoaQOysMHG4cH1pXYr9wc4waaVjXbksxHegBVXHLHd3oRiz5A+T0\nNNDbieCq+tKzKuBuJY9BQMdt+Yl+R1BpDjnj5j3oRisigjcD94e1J8zgMmSvagALbXyMg9Ca\nWRm6EcdaRXbaSFyP50rsskgIU9MEe9MQ0KdqgjGfWpFkMUhbOc/KR7UzOXUA5INOZcSMvZjS\nAcY083I3Djp1FMXG7ORheop37yGRUboOR3okYsvmYA59KYBJIm2Py12swyRTdoAHG0E8n0p2\n52ZSWGM4z6U6RQpJDbu5B9KAInUMNyEMQehpjN82GGO+R2p/3lJ+6vUCmr8qblfLHtQSHMkY\nAGfmyacyr689lpD8yqd2CDyaR1bc2Rk+gpAJ0blsEU7aJn3NwMfnSthowpXbjp600/MhKdAO\nRTAXkt8vKg45qTYE53Arj86j+VWyrcOORSCN9pCsvy/NtNIBEDDkgFu1OLPtULyPajjZktsZ\nhnFCsA3GQu3r70mHUQALknPvTg+2TIG4GkRnZVXIXvk96OGU545+6KZQzG5SMYGegpcrGzfM\nemOKRW2KcZ65PsKXdvbITPoPWmSxu4rGFA25PPvQTyBnhflANDtuwShJpzKrM6tkEe1FihmC\niHnAHQU/OFQv17mmjLKCfuk7aFYByuMkce1AEkqmMkKRtbvUfyLjcWZv6UoIRsMMk9aXa2AQ\nQw6jPakALErbsDapOeKRnCxhVGTn8qF9cnHekViN+7kHoKYh5jfAGMjqWFRttyxAwOx96dGS\nW2glBjoehpQrMykHYOaAQknyKNwIOMkf1pcsoG35gRkk0gcLkAZbnr2oDA/N0PTFAw3FlGOh\nPSgo0ilmk3MvQUKwjxs7nmkwu7ev96kMCwkxhOaVXG1sIfc0YyzSf8s/WlbMkJT7o60xAxVy\nAp4xwcYpy7gyRrtK4zz3qNtxCvkE9CKT5CoUg59aCbajgrfMc7W/lSYU4VULMep7UuBx1Xtu\nPNCjLNyRjo1BQKGkV8dRwCaQLtjUYye9O8s/edss3T3pFUxxlgOnXNAiNf4wOjcBaWRSqrGD\ng4+YU5huUMFz3puSrMQTzzzR5AgCtHH8x3A8AUHMm0bc4OD6Uu0+SpJzk8ULiMFgPkDY/GgQ\nuPmYEZC+hpgDMoPQsentTtu1iegPIoT5SMtznpQANuVUG3j2pGOACBlqVVOxl6NmkDBeSe3W\ngAC/Ly3fgUpYoxKsFB60jZkYD7oI6VHtT7uCefXpQBLJ8u0feY/pSbY45FDNkfnSMcMoP5ip\nF2qxIXoPzo1KuMGCxxkkHj1NLl9zk/IG42CmlNrK8jAHpt9KdvJyp7UANG3bwfnHQe1KuOdw\nwOv0p3ybNoUhieTTWxtKhuhpgDfLHlTwx4NLkbck8D9aSMea2zONvIo8zc5YKNw4yKkAAO4Z\nBJ/u0vyljlsnuKSPczD5sY5NAQLj5clvmzTJYjD5WI5pwYBhhSTtzTVJJ3KAPrSiYuuFyJGO\nAT1oATAYnacoRyKFbKcE5ztpVjaNSGb5gfwobG7I6/3aBgyhGMYJLKM8Uv8ArAD909y3pTkj\nO4qPv9S1RqAJDvBYA5pgSeWFCsjBkJ496JAEwU+8y8+1MjPO/GTngdqX5lZmzx3pAMiDsMBi\naWTcSozzS7ircDAxnNIpUASbs+1ADgTy4UblpSw3EjjPJxSKCsnypw3UGnF92X4AHFNCGbs4\nDZGD2pVYBssc89qNx38DcuMk0oVyp6cH0o6DHDKyO4yVPpQG3Lx8oP506MkME3bT/eNMXiQB\nlIkP5Ug1GKw28EkZx0xT/MUSZI288UjN+6I2/MGzmjzNzbdpZT1PpSGOZjukC+uCtN5WP0Pq\naNqg7hySfvU5i0cak4YE4xQAucRK33sd6YEDZO4nPJFK+VXkfLnt0pQS4Vj8gxzimAnG0BeV\nPX2pUi+XcvDd/pSD5T0yDwPrSKsixkScHNITHf8ALFiMZzimxoGYFsj3xSbE80Hn/dp6SFty\ndgflNAgXar7M5cHOKmaRskZyjD04qJvMXLvtyDt4HJpJtsbKuMg88GmA5WMSnIB3cZpfMZPK\nk6lhjbmmKyseEwV5p2UdlLRnIHANGpQZZMhzz6L1pWWJlAywbvnoR6UJH8rMDlyeh7U2Jsqy\nU+gnuNZVEwXd8nX5ePwp29VYqPmPUe1Cr0Cjn+tPCnaSF2gfec+vpUiBmyfv7VxmozIY/mHr\nS/fjPcDpSliyqf0NV0ATjaRtJzzwacGZeV4fH3aZna2Cv4jpS87Tk8nrQgGwqyglzk+lO3Ky\nj16GlZWj464GaG2PF6MaYCFXVlTj65p4b94yeWGPc0EbtuSAaRW27+CWJ4xQIcm6JgQ2R3pz\nDd1HyH/OaSMtJycAAfNnoKbuVhgHt07UAPaTcwGdoxtz/WmpGqv8zbiOmKkWMsy4I247/wAq\nY/YM3y56gUwJbdQGkJIC4705mSVY1cYTPUd6ZHuOCPubsU+GRJNwIyvXIHFJgWImEajPTP6V\nZ3I3zHO2qkTbgCiMv8IYjirqxlVG7AbGCy1PQZNCpVlUsQTzitK2x54C9B19KpwxqpVI23cZ\n5NXIGWaUIitH29qQWL8MkbyeWVLDOauBgysRwF6Gq9vbs1xgjCKPzNaMcaYIJ3d8UgJ7NZCs\neR244rYhVGwN2OMlhVZGEdujKc+v+FX45FRQ2wAGoYF+zkXaM5/Ku30m1G1CTtB9a5LRbNbh\ni0mCMZ611tiWaRVUEgMPyqbFo77w4pVlVTwOlfRn7Peh/wBoeLlkeJvLiTzPYEd68D8G6cjT\nLuO5CM19afs16aWjvbsr8qsIwfcYrBmqPf8AcfLUnuM+/wBajf5TvPSnMflx1H/1zTCePWoK\nG7h1HNG4dTSO21enNMKluelAEwI28CkXAPSmL8pHpUitz0xUgGQ2RihW/OkxhunNBBxTAXG4\ncdKdxjng01TjoOKXjbk96YwyCOvNOXod34Um0dQKAD1NAgHPI6il2krnHJpPumgbsnnigQ3b\njjrTv4eRSdARjml9O/tS1AXkKPSo2bnins2aZ5RDZ7UmUNX5c5p6kHJNIq4Yk80ZHTFAg3BR\n70bc896dt7kUDHpTGJ05HWl4b73Wl/nSBhnpk0xC7R2o6rjqKTnb04obO3aRzQAq+lNbGD60\n4YXr1pPvc9BS6gRKrevFODY9jT9oIpGA9M0xgi5zQ3zClXgfWl+6BjmpCwu47cdqa3zdODTl\nOOCKOM8VQWE4VgB+dI2W70gYN1pVwoxUsQ4RjAPNMkycjtTvMwMVG+Sc5pgAAJxgEU2SBG42\nKF9MU+NSc84pedpyelMZSfTYZmO4Z+gqvJocMinKK2P7wrRX17Uqg8nFAjnpfDNvNk7U/Lmq\nFx4LjYEoFUdxjrXZrB3oddx9DQI8+uPAsW75UUD+9isi78Ax8MIkmXGSGXjP0r1YxrtHyjim\nNapIAQOlAzwzUPhPaOrMbdMY5ZVrnbj4M2kny+U/PI2/z6V9Jf2bCy/MvvULabDvJ2jFAHyf\nqXwbXb+7cqvqw6/hWHe/Bu68seUQe/Svr+48OwybgI1JPdhmsy48LQvn5FY/SncVkfGOpfC2\n/gAHlPn/AHTg1izeDbyFSTBIQOAdpr7ZvPAcNxHjYp3EHjtWfP8AD2CTIMe7PAUdK0uyeXsf\nEU2lS27bSrZzgjBqGS1ZYvukGvse++Fto6tmBN6n7rL/AFrlrz4P2UkhVoWJ9AMA1XMLlPmP\nyiu3C8d+KSTl9o+9X0Bq/wAG7dYWW3gZQTwd3Nc3c/BW4XMgVz2ww5o5iXE8gXG7pih13Zx0\nr0u4+E1+uTGisO/IFYt/8PNQtUZxC5Tpu4601K5PKzizxgdqVQGU8962pvDVzbjEiMpzj5hW\ne2myqpAVt3pitOYdiAKV5U7h6UpJbrwPSnLbvbsAAffNJywJK9+nejmQrERUJwO9O3DaByp7\nEU4Ak8rj0p2TwNv507oQ1WHORk+tSRoNpGetKy7lximhegPUU7jGrj5lK4Pr2pccDHXpT5Iz\nt3A8dKbtwwwMigVhNw24YcdOKQqfWpNvPIx3pGO7kL7UBZMQLt7cUg+Ubs1IF2jDGm7O+CQK\naAYFG7OOfekjXapVhxmpG2vyRS7AcHNMXUgkVeO3pSFNxHOKslQcYGCKaRlT60uoESx54IpC\nhbgnHtUyxmT+LA9KGBHy4oFYgEQJ2j5vemqrRsV2kD+9VhodpGBhcUu3bGaBWKqod2OvPWpN\nuWyfu1Js35GdrDnNOVQ0fI5piIvLXPPRjUE0J7cLmrm4SSYK0xoy0Z/vZ4pagV/LCuM9KZqU\nax2sj55UZwKuxoHJVx2qDV1SCwkdjhcUXsCWp5J4gt2DJvb5WOdv1rhtUTy3kATYe4/lXb+I\ntTWZnReHU8VwWqzGSZAD8qnB+tZam5WVnUBHTHqRViCRChBX7vQVBuDB2Zueh/8ArU2RQ0Ia\nMNv6/hQMkLK0JaTl1PAzzUFw0ckwdGKjH60+RU8tXLAM3p1qG4/dxEZAoRJAzeXuKEM2ck1h\n6hHNJdbmfb39q2JJW8pQoHXk1n30vmAgcMR+dWBi3jMu5UHDflWdJJ+52gb0xx61eucFdrth\nSeB71nTYjXLH5ic5FMRBJMGZFLYYDnjgVXu1/hkbJYfKVqyxWRnONoYYOao+W47hh90e1UMp\nTQhljIO0n0oYKu7epx2HfNPZRuPyHK8ZzVbaow65OG5U0xMlV5I1ONrAnkHrSgK0+R3HboKT\nc3zSeWOOlTKXaQAD5du4jFICRdqyKxbacVcDqImAJ3VWDFyMJn0qeNmfdG6Aj+8aYCZ89gfu\nDGME0/yyy5UqXzg/ShbVY41wd0ec5702RQs2RwmM5pFETqLYM0m488LUTNjacgHGBj0qZpFZ\nmAHmMwzz6VCzEYkbaAvAx0NUhMjClshWAA5NRxh1ByANx4+lPmUxgFTtY8lDTGZi444bH4Uy\nCVtrKCm7ceC2eAKhwZPMiXLITk59qlmzGenB7DpSAmPAH3jxSJGTE7QwIKNhSuOnvVi3Y280\ncjyEoDz9KqzHbtAXHOD71fsY/LkRiPOzkBf6VoktyZHovhXbLcRGLDL1P6c19ReB8yaSjZzn\n+LFfK/g+R4dSQ42BlClT6V9R/DlcaHwpx1xntzTIaOiaPLfhnIppYscYqVdwkPHy44o2HGSP\namJEbfKwzjp0owsh6VIsKbgzHPal2lgcLtwetUhgyltjbcc4wKnVW4yOc0c9TzxUqqTgHiga\nDHlzbwvX0pyrv5LZJqRV+UjNOWEfKBmgsaqs0Y3KFpwjXd6VN9nLNg8CpWjRY8scgUiyqFx1\nFO8r+IEc9jSrJuGex707YGXJ7UEkbKAobPy5xUdwhbgHipseZHwCE/nUbqygArwe9FxEf2dP\nWipvk9aKdwP0Nk45pvmgqOMUg3dzxTMcnFeEemPD44FOYjaMDmmqu3rTmB2g5pAO3AcGmtjk\ndvWnN90Z6Un06UwBP9Xx1pGby+etLxjnijbxgfrQwGiT5uRzTuGGcYqNl2tkgmnKBtNSIkCn\n8KaVGemaMlVHNG7g1QxTjsce1KGG0A0xWVsmnbS3Q0CHMoHfilyD34pPvcEZxSbgCeKQDJH6\n4FKjHbmopGy1LlqBlnnAwaaF+bPWkX5l64pV6UwsOznvxQ2GwM05cbcY5NNYbSBSAN3y4xzS\nCTHy5p2+o9pOcYoZI8/Ko9TSP8xzj60LlsZ7UuQr57d6AsNPJ46U44peA2OgpGXoR1piE8zt\nSr3wBQ2FxkUqqdxI6UwBGZuv4VLJygHbvUfPBob+dSMcvUccU11+bNO+6tNZty8UwFGMDPSl\nbsQOKRRiPmjO1cZzQAm4NxjNOL8YHbrSZ2YJ4FDd/U0CDdu6DmlXK0Y29DzSfM3WgQu75Tmk\nGMDIwaTkrjvmnbd2MmgQjKduSKmgBRuO/NMPUDk1ZhUbuTxQGp8v/tFWL2viDzGXETgha+aN\naUSNgrxX15+0lpDzwW0yqx255A9+/wCdfKOtQ7ZyGHyngY9aoLHBX0H3iOnpWNckNk5wMYFd\nDqeGyFGCODXOysu5uM4zVLQGYs0e7KA9Dnd61TmQ7SOATxir9021m3DKtx+NZkkny/MjYXjc\nK0uZ6mYzYcDGCwyF9agboRu5NXJvmO9cBhwD6CqMwHzAc4HPvVIkp3XMi7QSPWq7hU3f3ycK\nPWrbyBgyA7CB3qjJiRlPBA6+uPQVYDLj+FQArdDnt7VAYysLKfm561aceYQFBY9QT29qiWMR\nyOXJYHrmkSVLiEbQep74qsImY/e2jPFXyFjU8ZBP3qqTRlgpPI9qoCJv3atjHrj1qLhkXjnP\nT0qxHCFG5jk9qhmQRkNu46mgCGRpI3HOR0qN2ByC2516VK21mLKMgc0x12/MMZbnFMCNn8zl\njhgO1SRZuFwfmxyVz1FQOm1mKsMYy1N3/NwNoI4amIezeZkINy54pdoZmIGT0K0A7guW2MvQ\n0z5l3Ybk55NADWZWUoW2gc4FRMMqGVTknr7U9ogkeCuDTipjQgkdMjimPUhkZBg4wx4obK7W\nAyo61J8siggAAio/lPDMR2pgMchVIU5PXINNfMkYxTiqxtjOD02il3JtaNT7lqLCIiu75R/3\nyKa8e6R4yOnenbFEf3uPU96fxhjuxkcCiwyFYysRJO7FN3KyEkkEjFKkZUjJzRtG0jp3oGxu\n9Y41PXtT2+ZecEntUagMvzjbznFOXb5eP4c5AoExBHiQt27im9MYDLlumKdJ80Y2ggZoyzNw\neexoEMZWZmVBkKe/So23H5lOQeCKlVTuDeZgdMetInysRmkAzaVYlTkLzto2MwOTuDck+lSM\nRGxGPbNM2p90kgdeaAIyxY5UhQPWiTlS4PXrTgsY3hjjuP8ACmFSFR15GeRQgHKUmXCrtXuT\nSqvykqNvbJprMVzgdTx7USZK4briqGICynrkY64pJGO3OeR0pWUjaM5GKau5vkA3DOaAEbEb\nKB95hk0pUM2GO5f1prbmzztpRHgkBvxoBhswh52c/LSMR5gYdMYP1ofn7zcAU0N91s5BpCJo\n4/3mc5OMim7S2S33u4pVxzngetM45+bnPWjUYu4DgKGNMyWZucKeMd6m8wSSfIPLx1PWmGR5\nMdCM8NTERxBWIDNtPTBpdu5W2kDb0PvRtLByy/MDmlXbwQvvtoAZ5YO1m+/3560/yzwQ2T/d\npWYSbiylSOmaI2BDOAVyMCgBikIrtjcaTaZC5JO1R1pVjO75Txjn0pGbapYcZ/hoH0EX5hxz\n/tUudp+cc9vSjyfLPzHORzik4XJO4/4UADKGUheTjNIvzIvBznpT/NG75RztyB60jZVh2b+7\nSEKFXd14z0pqjLN3pT98bOMn7tKzbnJK4wcYFGoxrLwAOT2oMZ5VB9STTgfJbcVz/Ko+WkJc\nZTGTzRqIULuUbSSKcu/aQMAZ70zy2VN4bbzTXX94HVzu6mgBfLGTg4Wk5aQEnLDotOfAkI7d\nd1I2C2VHOOCKYxFBWQknI6mmrIWUhRtGclj6U4Mozk4J4oVV3YLe3NAwkCq5weOqtQVZMHGS\nwwRigqMfMe/A70GV13Kpxx+NAhH3s3zrjHWlOONvyj+8aGk8tQ20s7DBz296aylmAxhl5+tA\nDvkaNs5X0PqaTAbkj7q9R2oEh3OSM8ZANIrDcGBO0jk9s0AKzCZgvChTkH3pG3OpJ4cnkf1p\n0jbnO3G+owqkMT+IpAJ5ZMm0thvankjOFTPqaFUKvTHFNZSq4J9+tBQqqkhZdpAXnB4pDk5a\nVgh7D2pZHVVRRl89fekZT8ucMSOn9KCWLJuLAhgVAzTRnLIW5Ybt39Kc2GGAp3jt2pu4ybPl\n2gGgQqsWUbev8qb91Tv6560sY3NIB8pHOe1MEe5DwSevNMYZ5BZfqvqKeNobco69CabGx3Ak\nbl/WnjLbsgtu6c9KCRpBDF8n0p245OwHdjrTRny1UjI6596N/lknd8/daYwaZU2scq2MHPSm\ntu3FnkDLjr2p0y+dtP3T1zUckeNyt35I9KQDmBMg4xj0oXgEFsMKRWCkgZUk43UPgKfmyynH\n1oAG2qqrg5609VZV2sMbj3pjSF14GaGzjAckryT/AEoAXyxtPGADjJpDJI0ZOMhT17mmu3nM\nXXjjhKcpkdFCnZJ1KmnqA1M7s5LD37UcOrgjJHINJGpSYjdlT1X0pT97heM0DDy9yjcfmx1z\nSjO4fLmj5Vyc856UgU7mO44zleKQgbMh3MOpx9KNpRjg5ORg0v32yjZOO/GKjRfuENuzyTQI\ndI2Zjt609ofMcDb05601WDSFgnGeg600qJ+SScHI5xQMcMlXYcjPUjpSM25fm5anSKZCQH28\nZx60zd8qEDcc9O1A7BHGzgjo3r2pysBuXHIHNO3BWIJPPak55GMn9aAYj7UjXacvnPH8qdJt\nkZT0bufX2pqqmSQMY7UkZ2MGYBuelAgbYy5UtuU+lM37jjIAHJp7EtuOMc5FN8s7BvXGTwRQ\nAuwswZW24OelDKzKSRuct9OKd8+7B5QdBUcbGSRsAegoDUc2ySNUUcZpfmaPIPOKGVlUhmDN\n6jtTd+IRk89BQA2P5lwRjuakY5kO3AGMjFIzAL9z5u4pjSfPlU+XuKYCr97GMDPWnKxDsAcE\n/wAVCq7s20hOcik2nc0hG4d6GIYilSeSQeKcVO3P/jtNjGF5PU1J92Ysx5xRcZFIu3GPmXrT\n2kPDr0xzTvmLcYCgZxUb72YDjB5IoGPaR5F5wAfSo5FHyqBgZ+9SsPmICkHHSmspdQucsOW9\nKBE0sC7fl4BqPJYrzyPlzS4ZY8dQ3TPahFJXA5HT3+tMka3yNyNxHU5pPMdVOMY7UvC794J7\nClP8BQdB0NJgJjjIG5sU9Yw7Dc2OKRVCsSRknmhVXcSzAg9B70hpDQNueMgcU7cIQQTkH2o6\nqV796biSQbQykds0DHRxrLlmOOwHvScx8YGc4PrTspIBn5FB5PqaI2ePJdVVM/xcmkwGKvlO\nf4vX6elPmctyoCr7elMYBtwUbQTkml2gSFc9uM0wFyMAL93tS7XZjkZYnJPpTfLcKd3G05Hr\nSmb+Jg23PSncYjMGyp6dF+tNXevzEDpwKdxlgqZGMj2NHlsysWJoERvlotwUh89qevIyVwPX\nNMGQcBW3HpTgpZdzvtcfwUWARV2sRkEdcUu4PyD/AJ9KThVDY+bPP0oeEKCoPGd2c0iWOYls\nkjIIxTXQ+WuD06ClX7xBbIP3RTtpXCsOehPYUygI3OCq5OORTNwjZiBuXHWpPlViAdyrTGUL\n8wyw6GmIb91QRlgefpSkKmF4JY5OO1O2jd8/Ax0pqqG3HYc4yKXQYr5kwg4w1KEOSW4OeTSS\nSHMYUbhjk+lIyiMsoy6nnPvSAXjzyMbfSnbvMY5Xy1HX60hyyjj/AIFSFfl35LHoaoBDI24l\nW57cUbfL285DdaXdtIzhiePSkbcy4IC4OKBDt2QxIyw4HsKOu07t3tQ/zMcDjFEZMSncPoaY\nhDlQTvwT0Wm5Lx+WOH65pWjZs5wBjJpu7C4LDYT2qQDdsb+72H1pfLKx7t5bJ5GO9GfmbJBC\n8ikVgSz5wV/hoAP48MMDrSclyfl2novenbndSzjIxxSP8uzcM+hHWkAih13EcClzuUArg46e\npqRmLPg/lTCo3ddwzTHYQbvlBALjn6UOoViSODyR7051JXauV5+9Td23ecbiBTEBYNKo27eO\nlOb5S3OcdqbuEwAVSseM89qRsL8obOB0Wgd9RY9wO8x7jgqufemt83l7RsAODSq25Cd5DDmn\nKvzgnkdQPWgYNhmwpzzQw244/KlVWZi2du48+1NjxGrMH3AHA96Qg8zI4+8KUllYcAKRRGd5\nIxim7iy7T/8AXosA5l3KGxjsT2pq5kOM8KMLSnDxnDYzxihUPIPBxy1FgEVW25Y7vTFOw3Qj\nPGeKM7eVPGMGk5X5txAPFAgjYtIzMcKF+7QGYqOmfU0KdrZb5lIxRuVuCuAOjUixFzu+8VwO\nTUg67ScDG7d601ZCsDAj5e7Um4Nt/u9fwpkCfw5U/KehpZMo45z7etDY/hUheox2psincHTD\nN1O6mA9XADI3A6//AFqThmUKuzuc+lGwqzBs/NyN1K4bcgzg9/ekNCK+xmf72TjHfFSbf3bf\nJlBzSLtVmJ60mWVggPansOw9tjRh0YAelJ8yqMttB/WmiMLuyfm6YpdjFdqtvI7Z4/OkAGXc\nQwXIU4p0sg+UjgUN8q7QeT6dKZwRz296PMBxkeM5UZOeppeXk44bu1INy5HUEVGAVjALfiKO\noD2O1Tu5OeBQVC5xyMZpXwsqg+lCAR/LnlqGK4yOQFkHVP605lyzKi/KvVjRE23ORhVOCBQW\nO5/m3KecelMQikfu3HKscfSnxkiQjdudh1PSm7W2kKMqDkCkXPXbg0B5jiq/eQ9egpeNqqBg\n560u4ffK5B6LUZYgZA4Y4+lIok+RWkPVic+1CiPaeCWxnNGUVhGil2P3m7U112KpRuO9ArDt\nvlDJ4LCjaZV4b6560oZtxG3Jx17UwgMpYHLZ4FAAih1bkpjvSHbGqsPwI71IWO0DGQ3FNkUj\n5QBkc9aGINzcZQgnml3lodjH5c5IFL5rKu8kFumO9MZs5H3e/wBKYC7jtBX7meaXlZMgjHr3\noaMBlyflI5oVd0hC/wAPTFCAcdzudvQdc0xv3TDPyuf4h/Kn8qwJT5sZPNJHv8wk4UEZGTSG\nJsdYzGG2sTSc/M7jdj5QPf1pP4hk53DIpVcheTkmqEPby3VTtOAMfjSHPlbvu/Nz60sP7zlj\nhPQChof3gCnA9aQCNGWAAPysc0qruUrt6H7tKzLtOeGHHFKshk2spIHQmgBwzGhYvzQm4Ljb\nkEcHvmmbV8wbCSCMk4pyoGkV265/hPamBNuMaKFXL5wT709VKrHwFX727vTIl/eM44Ufd9c1\nIHWRWJU4HHPWkSWY8suCd+7oo6j3qxHuZc8IAOGxVa3y8j4G1wMAj0q0u5VVWAZSPypCHRZ3\ng7th/vYrTtpPmBK4Y8Y/rVaxjLHawyMZBrQt4NzJGvB+9zSGXrRWZcGT5vrV5cbl2ccYJqlB\nEY5CoXap5xmr0LBphhMAVBRox52hVGVA61pafayXAIdsAHis21ikTcGfOT+VdFZYaJcgf7Q/\nrSYGlotm6rg53ZrrtLjka6Qpk88risfTdkcKc/PnjPpXU6M7RyDeMo3asnuaRO/8KxlSFAxt\nO459K+1vgDp/2HwcJAuBM288f/Wr468Mp5zRfL98AA5xX3l8PbRbPwjp8Kjb+6XI79KzZodD\n5h7dOtC4Jo27uegoY/L0qCkBxuzTGb2pxxS8belAhm4Htinj6Um0Yz3pSu3HcVLGNyetIDTz\njbgUm0t25oAeOADS7vUUxelO5WhgHPUUi5bPOKXBUUoB4HQ1QCcLjuaGz1oxtXnrR1XNAhWX\nvScKufWlzgetI2CtAxPvUpOOKXPy5pAxJpAJyq5NO9wOtBz0NJk0AJnB9RSM3XH4U5e9C46k\nUwD0PtQW+UkChWFA9KABfujNDdc5pccZBpA3YjmgA43DvS53AjpSZwxpQvfNSwGNwKUMduMU\nrYK0Z3ccimAKaXbuOc4pPurgcUMMKKChdvvmkbNItLSEJSn19aRuKVecc0AJx6c01gSvPFO6\nGhl3YJJFADVPy89acDuXBGaQLzmlxjkGmAceXgjmjn8KU/dB60nXmmFh8bgN0pCwfd2pOVPQ\n4pV5zwKBicHgc0ewpajYkNkClcQ8naBTSAQN1DMTjil2lhz+FMQi5K+nNI0absgYNOUHnd0F\nIcvnHFAxNtHkq2DjGKUDCc0Att6cUDIZLdWYkgc1ELBJWy6g/wC13q4v3vagqCck4FAjKl0C\n3lbDxqR9KqTeF4pFb92u36V0C4bPU0MDggHFMRyc3g2JlL+VGf8AdGKyb7wHE2fMRSCM8jNd\n+inkE5FOZQw2soIpDseQ3nw2t7tMSRIQpz8i4rn9Q+EsFwpKxN8vVOma98Fqi9hioWsY5F5R\nc/3qYj5ovvg3DNgLC6gcjeCcVy998GZfMfywofPYHmvrmTSY2AAC5qnJ4dilLMVGc+1Fwsj4\n5uvhPeRkiJd34d/Ssi9+Heowkh7aTp/CM4r7QuvB8E0Z/d5Oc9KzLjwJFJhfL5PQkUXkTyHx\nVceEdRt4ixhKovO5uD+VZ0mkz7tzwuqDqxFfZ918PoZ4ypjVu2GXoawp/hTb+Zl4o/RuP6Va\nmyeU+SJLaRm2gcfWozGy4UjJ9q+mb74N28hcCDyTnI2jnFYOpfBJcblRoiem0bhVe0DlPBZF\nZWHHHenN3A+6K9auvg3OoO7KEdN39aybr4W3Vt0VWHcjNP2gch5vIu7HpikYlUweBXWXPgLU\nLUsWQAZ4Bzk1QvPDVwoz5bfkaftBcpgNGQoI4HentjaMcVeksZY12MhH1BqtJCxcLtwB2NWp\naE8rIlXGTkk0bSrfMKk2suSBjnninMuep5NPmQcpGMNwflNIVAQsDlulTbNxx0FRsu18549K\nLisIwBwC2DQdwXAGaJMscMMGhGXzAPvUyWI0bAE0pj+63epugP1okG3BPegViMq2wkNSLHtU\nEj5qkC/w45POalkXnA5461VwK5TY24j5cVl+KGJ0cqnMm4YHatry3eMjrXPeO2a00VZOmG+b\nHXFS2VE8b8SYilaRsCQ9SPWuMmdzMTtUqTya67XZQYCsvJJyFrkJGP2wCIbB0INZ3KGyEtJu\nbhR/DVmHbCQ7HG7opqmPKkmZUfaQcNu7VM8ZbaVbdg4plCTQLDnK9TnPpVaZfMiG3Le9Wbhy\nznPAxxVGSV1RA3Cc4K1SRJWdfMk3BmVR1FVbm3ErYMmABxmrlx8sIZcuD1UVmzLKJA4A+h6i\nmBjTZmdsLhVOAG61TbM0aEDYc4INXr5/Lym3vuxWeJEk5Ayg6fWqArzZdlZn3AnBCjFVJiy7\ndi7j3Aq/IycqFKNj5VPc1SO5UAUbZOhNMko7TJMSBhO7VFIp8wYcEdhUriSNTj1qBh3BxzzV\nIZJDnJXPzejVInzLyx49Kg3AnITIH6U+3BPQbQxwGJqQLaxssSBXwc9O9WBI8ce1j8xPHtUE\nYaS8SPPy9GNTtKPMkjC/MpwrU7jJYwzW7HeBzUbSnB+XeqnFKuNqqGyScMaGYNG6k7F3cepp\nCI/NZfnCjY3y/SoSq7VQfKuCcVPMsm0KUUDjG01G67uVGGX+L29KAKvl5yxO6McD1pPOZpFL\nAbcYAFJcOJG29z0PanJ5v3ioAAxTJFVPtEh2k8dQaZ8vmMS+7acD29qfD5u1srjI5am+UfkC\ngMCf1piHSR+aA/3So6H0qezmeOYBU+Qjg+9V5MtLkfI44xmrMe+NlMI3kjv296ZLO88Hqy3M\nbMfMyefQV9V/DZT/AGBJtbdIhx+FfKXhFnWaEMRtzlmA619afDGELpMhHPy4J96u4jeVN2fX\nvUksIaMDIBq5HCWz8oz3NK9rnkdfpVE2M9Yd/wAvVhzxUyruQqBg+lWVt9v3hSrbiNuD8pp3\nHZkMajqeMetSbTJwBz70+SP5do5Ge1Swgu/PBHFBSQ1YzuA6CrCqZFwTijYQcsOPWnKvmMp6\nCoKsEfyoVJye1IV5JxgGntH1wOaXYW7HincoiYgRqAO9O8n95uPTHFOkjPHHFScrtzQKxBJC\n23I5qNt0n3hgCrTRsOOnc0GNGHAIFMTKPkp6UVc8kelFBJ9+NyoB4NIvHHU0jMfxpQD16V4h\n6JIrdQ3XtSMRtK9KYy7TnrS7dxDZzTGOVhtx1pMnr6UvQHApI8dDSGPZgzcDFIFy3WjgE+lH\nG7IFAhS24kUgYdMUMwUihW545qepQvy9Mc0Mvy0cKxI5NJk7h2qhCKlOUFRmhlbcDil37cZG\naQC+YVwaRsbeO/WlZvm9qbjrzSAZ5RbpxUijjB6im8jpmnc9+tMB+75QwFR7jyTTmYqo6c9q\njbJ6cUyRyyfLyKcW+Unio1y3U0jfLuz0oDYmRgw6cUzcMnBpsbFVwacVXbx1pMY9ehx3py47\n9TUO7AGOtLk4pK4BLkjHp3pyMTjuKApJAPTvTkXnHQVRI/A/iHFBxxjgUclGGc80N90AUgFU\nBgR1A600DCnv6U5WC5wPrSevYUmAhJC9c05WXoBSH7vPGabtIwQe9UA9gOgPFNIG04605myw\n2jFNOcZ6mgBzN8wyO1JyrDmjBKdKOG5PSgQNlmBxS5IPJ47Ui5IwDxQ2doBGDQMXG1s4pOO1\nKzZOPaom3KMUCH+2aswvggGqQI71PCT680IR5z8ftP8AtHhnze6+2e9fGviSBYWbd1B4H+fr\nX3X8VrE3vhKZcfLt5NfD/iy12yzNu3hSRn15ouM811mFlkz0BrnZLcNn1Bya6bVFaT7zY9BX\nO3ytCxZOfWtEQzHvIcK4CZxyM1kS7hGw/hPOK6G8mVrcZB3Z5rAuWRmYL0JqyTLkG7LDgdKz\n7pWh3Y+Y9yK1ZG2yME5wM1n3KtwWAHcmrRJQm2yRNtOH75qGSPAG3jHepVYpMxcZDDApdoZ+\nvy4qrAVbhkTBQYz1+tQtIF+baSMYNWGjEqsisA2apbZJd8bH5R36dKYCMwVAmc5OcVEWlLhU\n5TGOelOmjKxxFuDuomY4AAwh54pgV22rIABnH8Oaq3Cl3Z87f9mrTf6wYOfY1FNtjXCgtg5Y\nUAQnLKCvBAzmkZ496+Yu0/xHHFPjZXmmj242jdu7Cl+YzBlw5xjDdKZJXMcfms8ILZ6Z6VDu\nZFZWXknPtVhlJJJG1qh3FsBvXvTEQnEzFsYwKcCY3WQ88YJqbaFZskAVAwXpnOemO1ADt27O\nzBHXn0pWU8NuDA+lQ7S7HPynGMr3p0g2IqJgEe9AxsiHyyEBJz+FI0LQt8wBJFCMzHG7anen\nKic4OW/2jQBGrKhLbcYHWm7QyAgZB5OOtOOVzgZB701WGW3cEDpQHQJGEeMpxjhajGT0IzSz\nOZFAUdBnJ60n3o1cDHNFxES7+AWwScmnsoCsjcq3JIpuz94wLHpk0ikYLKSxPSmA7ZuhDbdy\nLwMGo5Y8rn7vfFOUAKcAqc9DTZ/mPINMBFZ3fpgYpsjEKCPvdMU7aWXAbgc01V+YgtnHIpAO\nfDAbT06+1OAjKHd8w7NTY5ASWxjtSCQGPb0G6jUYrbGk4OePyqMyljyucdDTpSMEqPrTY8LG\nWzyORQIasbbWLYx1x70iKNm7dgNwaeTuA+bdnmmhhzt+Uj8qoAXAxnnHH1oyxUjjHdvT2pN3\n7slfmY9/SnbTgcfNj7vqaQDP9cuzaQezUm4leGww4xT9p3ehBwQaFhO7JOCeKVyiu3ybkzya\ncqDaCeOac3ytuxkrxmnu3y5/iPbsKYiI7gzDZ8vrUWFbO7Ix0GKsu4XAIPTg1CQcAntQIkG1\n4+wY9Kai9S3ErfKFpDvUjLDaw4HehtysHb7oGN3vTHYeZ5OhUYAx0qIqT8xOF7Yp+4ZLN37U\n07o2Hde+KAE5ZTg45xQ2N4Q5Ge9DsqyhWHBHGKMkyj5M8UAIzYYKx4xg4ojxJ97I28CnbUkZ\nSuTtGMepoVnZtxQA/wB09qAFBI3DJK9cUwqWAIIVepzSw7i7HO0GhmHKtyvr6UCGFhwc5BPA\nzSMpbJ6KeMVIpRI/U4xz2pCp+Xn5fU0DBj8uUYccGkyWk8wjtS7wUIVAAT3pF2lWIU7DxQIN\npkAxnHUmkkZYlbllPYUKYyreaWJA4xxTEVXXqc9eTSYEzyBlUnLLt6d6YwzGuOA3UelNEjBm\nUrnHBFNUboj8+MdBTAcIRypydvvQvzsFXqO9BONhVuO5NO27cvvGfQd6AG7l2MvvimK+BtVs\nBTx705ZAq52ZVu9Mk3bVjjG05zk0AOkztVsdWppZZN0eMtmlZtyDcDlTgD+tJgpgocknk0hj\no23DbndjjPeiPGCrA7xzn2pvmKFc5wQe3rSnflWOSCOtMQD5iC31/Ch2PRz8nUHuR6Uu794j\nHg9BTZDubnnBwaB3HrHyCp3dwD6U2RQVIX5VJ5XFPwFXBbvxSBnVHA5AoAZ5e7kc4HWmk52q\nDj2qTBXJPGOtN4Vg5GcilqMa6hW2khj3ApfvHIORjApNpBLhcE9PWk3Dn+EKPzpiHBiqqCPm\nB4NDsQ2AvzAfe9aGceYCSQcZHFNObiTcDk9KQCiR93vjJpr7mbbnbxuqVo29cAdKYR5a7guW\nPHNMlDVTd33LQ+2MA5O7ptpqsflIPfn0qRnPnFlTkjHNBQg4OAvUdaaFG1CG5zTo5dynnnrT\nc+Z80Y4pEgrbZNw4xxineXhieMGlBCD5j83cU1VUMQH+b3pjEEgZioH1anygwt5Y5LLxj09a\nTy1jOwHLd8Uiq/mHJ+bH3/b0oAZHIzoYyMnrmnL95Wxg4xt9aTac7RwWpUX5CvmYcdd1ACAt\n12fMT90URt8u5V4JwR3o83ZKSCSPf+dIGO7J++T+dAAsaszLuCN157+1OALDcD7EUyVVYk9G\nXmnBzMuQVz+VACLheewP602QFgV3YJOaGXzGwx2P/dNPYEYDDJHcUAMX7yswwpXH40R7W8pf\nMyVFLtCLggtzxSMu1UO3HOAaAGybWUrn3pwXcB7LSsRtK7cN/eFKqls5GCO1ADY8tJhflXHJ\nPrSvskkBJ5Hy0u04xjC9aRo94yBjJ5oAHhXcG3ZxxtpFwNwA7ZNSDG75uYxxTNu2MMpyoOT6\ngUDBd6jbtzxnmm8xsAy4z39KXO5ixBK9sdaQ/vIy2dozigBclJvuhs9f8aRgvmYxszxxSsTH\nIDnHQcdxRISzEcYz170ANmK7jnOAMDFPLHYC3QDihl/dFCdzeopEAb5jliB0qgEVn6EEL1O6\nmsq+WDyhz0HWlbfKQr/e649qeGHJxuI6ZqRETKqtk9fuketOaJIowQ+4A5C4o3fIHfle+fWh\nCGmY8Z20AHC4fJJb1FIrlVDKOecr3ow7MpzkZ5zT9p3YLrnG7igYws7KGPCjrtpVGMhT1Gea\nZyfnHQdvWnMyyKAoOfWgBGIMYB557dqVVcEnjmlVVZG9OgIpqptAUn8aBDvL6M3QUm4rufy8\ngenpSqOnO0dMn1ppZ2zklX6HFMQ3zPmD54anMPmyX2r2App4G0jhRx701VDb2UdPXtTESIrL\nIG3bge1IuWBO7kHjFC7mUfwrihVKxjBxzTAR87iM54zin5zk4xgU1WETSZXgjhqPvZwC27vU\nsoVJEzjrxk01I0UsQMhu3elDgbm29B0xQpVRnBDEUhWBMr8sYy/96l3kjoAwHNIuG+VWKDrk\n01tzZA796Bgqndljj09KVmKttI3oOfbNJ5m7K7c+hp38BUfKAc0AIrYG7rzyKcuAxAPy479q\nbuxhh29utJ8+3eCGH60wFRsqdwJfPX0pfMVeAOfTHeg446ov933pyqyt94E9aeoDQ27JcFW9\nqaWDZO4596duLMTnr696a3zK2eCpwKkkTJ4YE4PFD428nocGkkYsAvRsfdpY8oobrzzTuUGS\ni8DCZxuo2jdsHAIyCaTzAVYOCxzxRgqASMsKQkLBna2V+bPBPalZtzDJw2aRmJkzu3Y520qs\nmGYqcfyNMAVsqwPzc80EhunLY4pFO0Hn5iO1KPuqAcYFUID6fxY5zQjuzZVeSMYpjfM2W4/2\nqdGNytuOwAZ21JQIq8hcg+tCq/CgjGehpsO+WEjqe1PKk5z1A4amAhyjMzH5emBQuduScAdB\n601maOMheST19aaFZ8A8kenamSSK/XKbyeNtKGYNg/e6EUcsNn3j6US/6wM3DL8p+tADZlG3\n5jgZ7UrEsoGMUHCfd5z97NBjBYgMVHdj/KgBNkitu3gDHNHyNgkc54NKTuznp0xQuQpBAx0x\nUsBjbWU4GexoOF25HGc8U9SVyOAOppu7dIGGPXafSkArKeVJyG5FIqbu+3H96kPOVH3TyGpV\nxj5iSccH1oCw45LbVILf3qY0LK3ykD8acqheFBzQ0YQ/M2M80x9SJWZSVkyeegp/yxt1+ZqU\nKyqxIzjnd7VGqlgxZfmPT1FUHUcByGAxjrS8x7nAw3c+1O2twSQBjpTuBE5I3qD0oQMj3Bdr\nFevSmtv5yeT29KkLCRUGCHzkDHSmttWRtxP1oYCqu2M4bn+VIo6cDZ3b3+lO8vaqgyYyc8Ck\njUeedpw3rQAFSGIz05/Cm7tvzdM/xGn7z82FyBwaQsNyjaDx6UrhYUKMk4yp96Z/eBJ6cinN\nGPJXJy+f8imJuKuBjPfPXFAmO+U4zkcUhZs7W+7Tv3e0AZPq1IclCS2OcA0tQBm+VTtxz+dK\nzHO7AVem2jll3EZUdfc0jbZDj7uaQ7gu4EZHFO2p5mHOE7kfypPmfJU/KBg5pFTcueuKAFbD\nbthKj+7SFegXgAcnvQrDd8gI4yaYx2q4BPXggVXQQ/cF25+dvSnbwFYg/wD1qTzFyueG7mkE\ne1yM4frgUBexJuDYAG//AGqT+Esxxtoj2suSuCvWkcZ4VgG70h3YKS2TsG5vun1pGVHZcAqw\n6/WnOu50bHPQUqqJITsbL9TTGM8sySMG4OOKcsibSu3GBj8aU5H3skYyGFL/AK/nb5anp9aQ\nDY0SPo520Kw3E4+U8GkZdq4XGT1z60ZKsiqu8sMtigYKg2gHk54NDYxgHLDnNLIuQzE7B2P9\nKF+VVHDDuKRIrt8yuB7EUjLtJZW4/unv7Uisy9QQvenLGDIr5JFMQsSHc5U7u59vamMCFywy\nM1JtbGC236il3Oynay496B37jWT5eX+Ujp6U7eqqGXkKMURt+75XJBprrlSSNo9aQhWYqd23\nb64oKgR8ffXkfSjcWUEgnAxt705Yx1YFqZQ3BZt6yAsOp9vSgfMzMAAT0YUxUIjYlclj0pVA\nyAOB6UALz5Zx25xSrIIQB1duc9aXaOA4HcH3pAoZcDCkfoKQrDWA8wEgY608Mkh+bp7ChUXk\nls5HBpBmNQev4UwB1Tzeu5AAabJIPMO3KripZFEeXf5Vx2psSbCC3zKRkUAIwOwZPzEdaRsO\nFIGOxBpdpZTu7Gk2+Z2y3U81QhzFtrgAEZ6imrh+2PenmNI5AFJXjPNIo8tQ6fNzzSBi/LvD\nBt+08KKdtZsqeHJyT6U1N77kxhuo7Z/GkZX+8pznjFAC8ZJYcDo1PjkwpULkGmjdD1cZHVaj\nj+WT7xyTw1IQ9twt9vQDpUsYCwgBeT0NNjkUyElc4OMepqQfOpz8hB+7QJEkanAQHBNSIx8o\njGTnFV13tlc4YHjFTqp3sVGT2H86QMt24zkp97v6VP5gWcc5Ujbx60yFNwBPyr2PrU8aRHIH\nB/rQMsWO8sOcKpwRWpaRrIxOSJD07VTtYQCqvxnr71f3jcAq7VB6UAXLdiZHLc7Vxtq1AoTb\nls5bpVa3jzIVzuDDn2rSt4cSKq8L1yakZpWsf2jcB97P4YrYtYynCncfesu2jCuNqjpk+hrY\n0/dNKJdvT71SwRrafJJHIpK5HTFdxo8Za4jDAleM+1crYx4YMq7vTNdt4ZjlkkXeOMf5FZS7\nmsdz2n4d6bDeXmnwAEqHUcdSM19waNbi302BUBGEA59uK+RfgjYLN4q05BHuxztHYcc19kRx\nmOEAHtxWRoBYFaRj8vAJppwq46ml3dCTUjSGyYGDjimeYVqU471FImEz3pCJFmBXnrT9w25J\nqsMk5AqVV4pDHMuWzilXcaRfm4zThnd7UwEOB259qcG4pGba3Apf60mAzaSfalJAcYpSO2aA\nQc8VQ7CkksaTlutHFN2njBoAd2xSHpS420etIOgu0jHpQzDqKbk7fakZvlxQIefm7U00K22l\nU5780hBHnPShumKM7ehpy4amhidTxjFJ0b2o+7xSbg2Qe1AAuMeppc7e1HfI4oOWpAN3dcDm\nlyVGTR/FmlwW+lIoAM/Sk55NPZcrxxTQ3HNMTFVTSt+tIuRzml+8vvTYDS2e1HG4Un3R603n\nqOtSA9vvUnOcd6VvmHFAxnOTmgLAFHUnmjIb6037vNPTk5ApgN6GlwG56UmMZ55peqn1pgO2\n7h6Cm87cYpVztGTQc5NMQjZxwcik/HFCnHBpWYcHGaBg2VWkP3evNLznrxSNjbQIVVO0ZoyQ\ntJv9accHkVIyM561I3y854pAfm6Ufe7ZouMRTx7mhs+tKu3d/hSBfvYNMQ1D1pxYYA/OmnK4\n460v8JGOaYWF3dTmgjuT+FNH3RR/DzSGC557U5c9DxSA/MCaUvyykfiKYhSvbNIFPfpTegHG\naXdu46CgEO8vp9aaqEk56U7zOgFIW6gdaCReR0pvOznmhS3TFK2etAyBVHAZRgU7yY5G+4oP\nrin43ClZfakVYryabA247FJ71Vk0CLyztAAbnArTX7vvSnIHoKYjAPhuPy2AjAJ/iPNUm8Fx\nsWLKvI+9XVZ+bjmh88HFAHBXngO3uGDeWspAwCRzXOah8M4WYsbcKn4167t75xTGt1m4NAHh\nOofCaBlZxb5XqcVh6h8GILpQUt2X+LeM5r6VNrEpJVR+VQyWayqc4H0pDPlC9+EKws6qzE9Q\nCua5u++E91vJC49AVwfyr7BfwxaMxZkVmPPTNUpfCEMx3gYHOOB3qrhY+LtQ8B6jaqwaMqy/\nw4yTWU3h+eJQZLeRT/eYYr7RvPAsM0bKyByD/Eo/nWBe/DS2b5Xj3oTnaFyKpNk8p8eSWUrS\nEiNiB7VGlmyyZZSARX1TefCWA75HhWQZ6KOawtW+C9tMu4QNAccHGa05yHE+dNu04A5oK/3v\nyr2W8+Djwscjd6Af/qrGvvhLdQyFVVW4zgZzT5iOU80ZWbDDkVKqjaMda6+7+HeoWyYWJmYd\nRt6VmS+FL9IyxgkAHqtXzIXKZEKKY2wa4X4l6gbfT0gYZydxWvR20u4t8loWCjgkckfhXjvx\ndm8y6UBiDFwwpXGlY8w1VjM+92+QD5RXPTTbmIU8Vr6rN5ceD91uM1hyW5UgjDDOKQh0MIjD\nZX5+uac0oByDu7AUpZA2RL0GKhV2nVmceWBwFHX60xk0yFZPLJGducGqbOI2xt3seAoP60si\nuuGY7j0/CqczJ5wwdoHPzVSEMWabcS3HFVGmKszE7jVqeXcMs4zjAFZkkzR7i3C7uM1VgKM7\nFssY9xY4CmqUjBVKsMEfzq1dSkSY++T0warNIcYdApzyM/rVCK8zFjiRdr4qpcZmkUBvLBH6\n1amwxPXGeOKqTTKv3s470+ginMkkZaMndnpiqfz78EYzyKuXGDcFkcY2/lVYyS7wVbk9iKAF\nEwTLKpDdCKWFRNGA78lvy9KPMyyNxwcM3bNC4WSRpm2HOQFFAy4jDooJI4z61aZv3e1Fzx8x\nqrFIZNu1dx7Z71YhcvIUVCrKec1ICBPLQfMrAchRUh/56p17KaP3bbgy4YGhVeN9hXeCOADT\nAjkjfGR8uBnGaZv8uMNsynU5PFS8NhSenaoGdSzDkqPypgVHYeZgbWVzkKO1PdVWbKOcr1Ha\nhTtY42jP8XpSrs2q/Ibp7GgQsknlyIXY4kXJHrUcbLNlQNuzsam3I9upbDupO2onVfLUFgDI\nfm+tAMhaMyFiAVdedxPFXdPhlZgE5I++1QMjKEjUhlHORWhp8yR9OM/KSo5qjM7zwrG8M0Sc\nM6nO71HpX1t8JY3m0BuNq5wFr5G8LxSxyQu7+/0r7E+DsJXwrG5OXY5NAHWeTtbJ4FHlqvHW\nrjRjjndSxiNc5TmmXYpfZ1PIB96a1uG6CtPyjt45x1qPyeQOgphYpJaqq/dz7037GRyBzWo0\nJ2jA4Bp/kgNggnii4ym0LbRjg0yGEsx3cmrv2c+YCMgdKlhhCu2etS2MqiIKMBeaa0G1TlsG\ntA2/zA57U3ySW4HApgZrRmRl7AU6S3MjAY4FXmh/U0NFzhc+9AFBYSzH29aTyyzEbavSR4YE\nDI6VH5LKQQuOaq4WM75/7lFaflt6fpRSJsfcreuKXaT3pUIzzzRwOvSvHPQHdce3rSjGOBRx\nt5oX9KCdhQvoaayncCBSmUKOahWZnJHQUhkxyBjFB4HBpI87sk9aVfvMD0pDGnb36/SnLxnb\n0p69PlwD71G0e7GKAFOFwadweetMXCnDDFSLt28cVXQA3NuyeBTWYsfbNObnA7U1Yz1J4qQH\nbflPejjb0waQZUHuKbJ90ENiqEPyRxR260md2CDk0D2696Bhwe2cU0t83tT++F4oZVwMjJoA\nbIAv3e9QycoQetWDGMimeWA3zUCGxk7QKecU9VAXJFO24GdtFhlYHa3NSjkZPBpfL2tuPIp6\nrtHzDJpdQEIJxT29KcMbMk0hYf8A66OogjXvml2n04pdwUZx8vrTVY4IPSlLckHx601X45GR\nSM3AwM/WnhdwJ6UwEOW69KB8ueKG4yKFyPmagBOeufwoweccU7cF5I5poYspPemAvIGM0o2j\nI7Uitj7wzQB8p4wKABflYAUr5ZixPFAYs2cYFIUJY0wGjLZ2ilZvmzTlRscHml8vauTyakBj\nR7uccVKsY4I6ihvanRg4NNAZ3i+A3PhW9Ujd8lfC3jlf9OmPRQSuPfNffV0nnWM0DDh42H6G\nvhz4l6cF1C7QLsZXJ5piPHNYUhT/AHxXMXDfuzv65zXX6ouyNywy1cpdBpVIHHrVxJMS9k3M\nN68HpWTND5MmAmQ3Nbl3GBGwdcnoDWRcYA7nA4rQkx7qMeYyowBxnH9KpTRboASct3HpVy4j\n3OZduRiqshRcZGCBz71ZLM6WIs3B4P8AOoZMwxkMcPntV6eRV2rj73SqkhTziNnzYwc0ElRm\nWObdu27hVWVnRtrfd9cVeki+bPUE4xUFxbtk8fL3NUMqsfmUtllzTNyeaMNwBVny9uGHKr1F\nUwj/AGgoIiExndS6gO5dZDjO08VUkk2KMfdPJNTuxbcAxWqoVn+XIGD3qhMZ57N5gRQMjOfW\nnKdyqSwMnXPp7U1lEcwV+ndlFCxld23nOSPp61QhLiVjvbICj9ahz8gZ1JY9KWbaUCsCvqDU\nvltEgJw64pDKu9Cx4xt65pHHIKpy3celSqySFTjC52ke9Lt8tGxyynp6UxkL/vtu0428Uwgt\nJu25UVYYlVz09KibcMrj5WHSgVisqpuO7JJPTtTtreZyMqOKGV4yOMj+6acuWk6ADGaBCMOp\nHy44x2qBhulXJ5zyfWrEn3WBOTjPH8qhKs3KrzjPNMBrK8il1IUYztqOQjcVAKrjOfapFUsv\nHBA/Ok3FlTjay8fWmAhwqjknI602PPklF9c1IoK/eO4Z/KmyY55xk1PUBo27AxHPoaSMsrMe\nM44p21kY8ceppNx4J5HSmA1QI+Rkk9fej/U/MwzTgRgqw4NKu4xlRyvemIYynarHCAmmRsFY\njbxnqaf5YkYEsVCr39aazFk5IP8AtUwEdVMgbdlelNjz8xA9hnvSfeYKDhs4B7U6MlZGVsED\nqaAI2UpKQMZx09KHUqu786BGPmBJJPNL5Y8sZbj1pDGbiY1KDAzUhxtyX2t1BpDlMdOO1MaP\nau1xuJbINAh7KbndJvCsvJ96ars43MvUUbiFYbQPc0gkZuOA3TipARwQuznB5pV5BUj5VGS3\npTmUhgC2Mc02RsggLuz/ABUxiRkMGUjcCM/hTDIZDgDAz09qergKduQQOtKuYVMmMsRkUxke\n0bt8YwynjdTpG8yNvMcDnO0DrUbYabzCCmRjNOaPLDHzR45J70CF+62wjqBz6U1YS6sFOPel\n53KTwlO3ELgDC7qYdBGwoQZyQKhJeNg5HynvmnqpYs7dAeB60nHBcZXsKBiR/LuJ+QMacqn5\nirc+9M3MOGXgnvTlY722noKQC7y3GNuBzSbV2nd8uecUjKMKgJH8YPr7U5m+dnK5H92gQ3AZ\nkwOCcClkj3SbWO1VOcCjABDdaCNymT8KYCKCytu6ZztoDf3BmmtlpA2ccc+lNXcvOcR5zikI\ndu3KQ/D/AEpW2NJnGOMH2pWYLkZ4akLbcBV3e9ACMDGSR85PehMcLjP0pVznJ+U5yaGYAllH\nB6+3vTGIq/O2RwOaidPM+YKFbPSp9yr827c2MAdj701BtTLn5/UUFCM3l47L3FHLMXPC9qRd\nzJtYYJPWlVW3qG4QcUiRI03gnfk59KaE8xT0QjjH9acrDEgyQc9PelZ1dhu+UAfMPWmIjV/L\nxhdxHWhmZlypzmiRRIpOMIeiinR5Gfm+UDv6UAIxIYAHIPrQ33i3tj60FSVGQc9hRzwu4fQi\ngYIUfZvYlR1wOlKyLyM5549xTdp2kJzzk5p+yOQ4AIOPmJPegQ3j51XgsOcmnjCxrxvOMYqL\ny8Jnb37daFVcjGVyevegYNlcOMk9NtEylWGz5vWneY3mNtXpxmkkzJkAZxSEMRT5TENvOerd\nKXzUjQdVOcfLSSZDJtpCxH3Rhs8mmMequyn0Xmo5Jt3J+UdgKfuGCqn8KbHIqqU6nPJxQJB5\naKVUfLk59qV2O7HU+tPJOFwMjNNC5UueBnoOtAxm1lUttAxnIpiuWHClcc/hUoKscLz25oxt\nHCkjPIHakA0KokDMSM+tPm+ZSHTB7YpdyybWyDkcH+lNXdHlenHTrQIRf3aggcHg02EDkHrn\nBpztuUKMkd/rS9N38JAzxTART0IyCCQKY0TpCC/DNzjuaf8AMUUY3Ht+Peja0hTJ+YcUmBHG\nMKMLkn72fSnhkkDMRwOAaGRpHMYOQe9G3yiFU7FA6Uxhx3bK464pv3lAU7eeuKkbL4ZcKKY2\n5skcHPWgRGYWy2csfWnjcBxjbj7p70LuXLkksf1pBu8vjqT0NIBVJY7ivy4/KkZgVwegOaFL\nllAGI+6iiRSnQbuaYx3RE3D5t2fwp0zSMwKgev4VHknJI7YzQqlFyDx2NAMX5ppAFGMjOD2p\nWXYxRmBGeNtJtZ33g5YrggHHFCY649zmgBUjUgjO09abGerLt3dMevtTWYeXu6sTwBT0j2yM\nEHA7+9AhG3K+5eSR6dPakZQ0e44HONvvTlT5SwbHc0YJHTI6jFADWyygkY7U3cCoUfexkmns\nw84EZxjO0+tMVfMwB1znpQMVc/KEJAJ7+tIwOGB4ycfWnSRn7wbHNDljMD0X/PNAhu0KRk44\nxmlWEqoU/eHOR3FDJnOWLegApRGVkIOcdqAE8wNJtI3KOoHrRPIjFdqbO1KvzQoFADk/N701\nlPQpgqe3egBrbo5AOw5PvQ2FKll+YjINOaT54i64GcZ9abJG/O1M/NgGmMNwVQVjLE8Uqbow\nDjOeKVXcK3GBnaCKYXZeM/dPNAtRVwzlMFQOaaXJfcRhSMAf1pzN8pYrwepprruI+bAXkUiG\nxyqzNuJGFHehsMwCttb1p+394XLZRhjAqIR4fCnLdqChwYNMzdOMUw/dJxu55+lSSA7n7N2p\nrOflYcHHNADGkWTEaMT3+lKqtgDOMGnD5Y9+zYc8n1pQv3BsIkJpiGfqc80uGAVj8pAycUKp\nVmLfdHNJyxPOdo6etIoRmIwCcFuaNxaRmPVRxT8BlQjG7uPSlwFlyxyKBjcfKN3J64FNBO3r\ntPOFp/PLYPHSl27sMeKBDF/d4bOe2PelEjMAdvy9DT2ZUJJ4Hrik2no42sRxigBG3YxjKjkU\n1XEeAFxk8inhTwuMf560snlt1HsG9TTAj58zbt5znmnzMPL3cnnHFMVW8whjg0rMFb1Pdadx\nitngOoCEdutN3ZXBHy5zS8Lyee+2jnbkL8pOaRBHu58zGSeAtKQy4B2r/Sl5k3Oo+Ve1MZ1A\nJYHH60alD1YycBvxIpwUsQzMNo4prMGOB90jim7SysAu4ryfakAqg4YiPnsc01mO0DG0H7wP\nen4RlG5imec0xsZOATzTJBl2Rg/w9KcHCgAjrQnzL94AZwQakfbGjMCMg4oBkfmDdkpgL0pz\nKG2t91Bz0pVZXfaRgY60nzbQp+YZ9aAH7tuB1RuwqNVjVfMkDF+o/wAKczFlUHrnAoIkHyY3\n4/SgBmzftJBAAz1ocbAWPOeBjrTywyD+YqKTG/djApsBdpVxzlsUu09OWHU5o3beg3P0oYvH\nhCcvnJHpTEDhmYZwqfrQW2yMuM8UFBvJI3f0oXdjHLSfyFSAjAlV2odvfPakb7oYcjOD7U/c\nWXaWOc0m5d3Pyg96OgDQhdjn8aV9vDFCO2c05i3zNuJ3H0pGCSjCksQc/jQWKwRlBwQM9FoZ\nQF4OFzQrNJuG3Bz+FHzYyp+b0osAoXKk7iCDwtMXBYrncxGeaVcseoX1FN4dhgEN60gHru25\nHytjAHrTH2oMnOT2qV8LIGHTGCD0pjIduf4M1QDEAZhv+WpdrtbyYGI85J700/Njj5s53HvS\nrjzMk8Z7H9KCQDYjUng44xSE7l5GM8Z60rRqys20q2emaarOVYKOnf0oAftKtkHI+7z61FuG\ncYOQeT707gGIltwY4/8Ar07uQPXrQAZ+8QCBnHFIvyqmCFyO9Lh1YhDn1pFY7M7MOvApXGG4\nyNgAK3ZT/OlV8nYeHHX3pr/NGCCc+1PfGFZCDjrmkIjVFwxK474zyKRVEbZ+8x+YLT2Ksdw+\n93BphjTd8wy3RcVSGLGrednBdW9eOaau7c4ZcY9exqSSQ7lxyV4NJIp4DHLMakOozbuwuffN\nAbazDOOMAVJJtCrzzjkCjcmd4HOO/SgY2PKlQPlJ65oVTHvRyMFs7hT/AL0YJb5sZFMwXUH1\n6rQTIRd/zFhz6+1Ir5AKAoelP2qyk7sEU5VEijaw2+tPoA3aWfJ6Y4pN/wA24ru7U5cbTlsk\nfw0qqNhAyh64pAEbr5ilm6dKcsaxoc/MmfxpjoF4xhcZ57UpXYocjrxgdqYxN/l4DDcQeGFK\n58zcw+57+tIvJLbs470bWx05x+FBQBRsXcOc5p67Vb5fl45NAbcu0cnHIpqO21l7elBNhrfP\nEAeV3f5NLtQ9SRz1qQLubGAnFNjYeWeMnPB9aQx3ztlgOB39qaqltzH5W6gn0pS0yj74LHtQ\nE3MSDnjkGq6EicbfmbPq1LGA52BNzn7vOKT/AFRVSmWNKwZmPGD7UCG7vLyRl2Bxmn7n4IGR\n/fz/AEpPmCghcA+/ekU5zlsg8fjSHcAfmOZNyrztA70hmaTaR1J60u1Y29TjHFDBWA2HcBwf\nrQADG5ztJwMkZpeJIBgfOei005h5cd/Xmh8+YcEcjgCkUPT7m5lAHQ5phDRjkb48549KfGoO\n7cCEXjaP50isu3BLf7tADivfbtU8g+lDSbiArcng8Uu5l5YZUdKVVxlmCsx9KdxWGqvzYJ3E\nevTFHPyhlwN2AR+lAkLMeBgDFLCpSTOccd+gpBYGzHkN8zZ+7S7dqZAwTTXDcArg9c5pGwjA\nfeOfwpiHKf3Z+TLdPekVBKpXdtyMMKVPukdwcmmyfe/dn5DjH1oANoMQjY5K9KXeFYbyRgZA\np22KNirZJYcj3pvzSKBx83HNMQ3akilgNxbnNPZlZUXaV560LH+8CLwV60u52kGcbc4xil1J\nHfK8hZcbR+ppU3dCm3NB2klQfkU8+madtONoOU6k96YxY4woV2OCDnNWY2THKkI3GaijXzFB\n7DpVraVC7sbz0zQxE6qJMLklMYzjoasWpXOMAsOPrVaOYsoU/I3RquRI6txhgB0HeoH0LkKv\n8rluc8KeauQM8zY2j39aq27FWX1xWjZxmOQORu3HA/xpl2LNnDtk3ZyQD+Nbdvbu0a/Moyuf\noKzLa3kkQv8AKrK3A9a07UM0qqflU9alhYvWqeXDyST2I6V1Gh6fIYS7j5S3G6sazRUjMbHd\nW1Y3EuPk3EDtUMa3Oi0+NvtARV4/lXoHh2zLTIRyOD7GuF8O3bTQEuoUq2M9zXomg/6tAnbk\nj86xkbH03+zVpavrV1ctxtTAbGSOnFfTjnzMnAX2rw39mjTPJ0me4KZ3DhvSvb2YbSD+lSwG\nEHOaaoCt60m7HBNSLjI4qQGHO6nkDuKb/FTguOvWpKGbdre1ObLKcdaVugpvSkAq/d5o3Nnj\noKMcU3aemeKAHkgjOeaM7hnFIFCr70u09RwKADvmlYbeaA2Pej8M0xgPm70uQvJFJ9Bihsle\ntMA4PejIxmgY24IFC/dpCuJtPl0m4cetL2wKa33ulIBretM8znO0ipSM9DRg8jrQAiNnJNSZ\n3LxTVo5XtxT6gKM9KFI545oCnk5oVemKQCsRtFGTx6UvGTyDTWO7I5FMAbHY0vQCk+6AaX71\nMroHrigfMpFHC0m49QKnqSH3VHWncYBpvOM9qAOfaqGBPtzTh+tIp9s0ctUgA4z70q+44pMj\noaQHnrxQApxnFGPTilZe4xSZPU80wsG0bfejtSMT1oPQGkAfQ0fdwTQCNo707jHrTQhjAn60\n77q4NIMnPPSl60MB3GKaMBfehWxn1o43AUxhkelJ74oBwp5oP1pALuxRn86VSF+tN53Z7Uxi\nhRnNHHOBik3Dpjk05vmPpSATkqM9abkD2p/cYNIV3daYxjZPAFIzDgd6dHlV55pmcvkCgBzZ\n4FNCNuwDT1GWJPFBXryRQADimld2ecUdGHeneXlsg8GgAVSy5zzTh8q4/ipv3RinH64oFYFy\nq7TzmkCnkE8Uu3ao55pD+tIAHyAccGl3Yb60KG280Mu49aBiK3JXFIcsuMU7gdDSKp554pgN\nUBV96azHpmpUXKk03b045oAauc+1PVjuxjAoUHJ9KXnbmgBORTlXb9KRVKn1oLZOO1ACeTnO\naPL+XAHFODHtinKDzzgUAVWjHPHNAQMMEVKG68UzJ9KdwGfZI1+YIMnqcVBLpkT5IPHpV1dw\n7/hQq4zz1oEZTaBbSRkFOD6VSk8K27DaUBHbPX866PaewppXdz3zSBI4u48CQNkcqvp1JrMb\n4dQcholcNxyeleiqvPIzU6xq3bn6VaIZ43rnwvtbexupJYEaBVLsyjB4Ffm18SNSNzrmryFS\n0bTvsX0UHHFfqZ+0D4gTwf8ACPxHqRcR3H2doo2PcnI496/JbxBNuuA7SgFh8ynqSatEnMTK\nqMDLzgZrKaPzGLbsEjIrQvmXYgD/ADVmzsUgjXblhyTWiMwt41jYDhpCME9qFgdWYKV46j0p\n8is0KlOFPJqJpEmVjG7ROo/i71QhJMxxud4f09/as66kW3K7kLl/TsasIy+ZgfMcZFULmTa5\nLP8AN2oGirNMPlDDDDnpyKp3DySRKVj3gj/JqxNtkUh3w+MBvWqUhZoxtYjb8orQCmJA5J8v\nDLw2agnk8uPhd7McD3zVlo/MbC/cHU+p71D5e3n+BTu25oEQ+d5Rwh3tjbg/yqlcKcbEHB52\nehq7N0aUrsfPOO9UriSTymYnbzwRVCM/ceCybUz3qJVHm5bkE9/0qe8k3RBiwPI+WoWj8zvj\nGDmgQkyrtJDZGeRRIhWM5OADxQp2yEkbVPHrVsBcq2M9qkpE0GJWQr2X6VYikT52DYXv9aq7\nitxjaeRwR0PtU8ckYPlsQvcr6UDJOHjBB6c4p3mIpEikOe4XqKRJkZmBOzsDTYYyzEMmHHVR\nxkUCEkzb5KJuZuaquqPIykbcjPFW5FZXjMZLYPCnsO4qOeMuWKlSGPPtQBT5Mm0MuAOOKQRy\numSVIzgYp0bq8bHbjquf60jKoXZvOU6UxMk8vymCrGG7nNV9xmZnZdpQ4VKI2kWHcTuO7inu\nm+SOUMd2cFaQhrI4ZMnaeu4cgVasZRn0jDZB9T6VDIrRyMV79aWx/eMQQUC9eOtNaAz1PwM6\nTyIJjgMuArdQfrX198JYPL8LQM3QZXH0r438CzCeS2UrwGBUY5IyOK+1/hWA/hWDA6lvw56U\nmB0qqrZZRgU6FWbJxn0qwsO1TjpSrHhcA4NMY2OPdGcdW6iholO0DrU6oI+gyaesee3NF2Mi\n8rcCVb5RQM9amWPapGOM1KqLtP0oArjjHemsCpYHvU5i6c8YpqpuYdzSARMrwPSjy3VgAPlN\nS+XtbJ61IitJ8o7Ux2KojZzwMCnrEy9TkVa8k8sDxQEz6YouwKjRho+Rj3pVgLqBU7L82D0p\n+1tuEFVcZW+yn1oqfyZKKOYk+zN+KPM3AYGRQMdQMChY+MKcn0rxzuRNFypBFIyZ74pFViw7\netSKvzHJp3AryQllGaWGPa2etTsoJGOKNq5yvWmJCKB1pCh5/OntllAo3YPIzS0GJwR15pU+\nRcYzQXXIwKFPzZpXAbJlm6UyP75B6VLJnrTSmzqc0ALIwYcGlyCvHNNXCqcjIpycYoEKcdhz\nQylmHHFKpABz1pFbcuetPUYbQpz0obhuKUsDxSPnb0o1ARWK8kUHG31o2naDnilUbcnGRQAq\n/Ng96cUy3NAX5c4oJ3Y9KAGbi0m3oKkXONp6ZpDH6HmjjkZpagOYAjAoXPQ00Hkc8U7zA3bm\nmIR1OMDpSKoYHJBPpStn1pNoz6UCF5VcYyKTd8vpQrbW5zilJDL0FJiBcYwRxR91uKD972pQ\nBng8UwE60Fs8kcUN6D8aNvykdKoAHOMHNDKM56UFvlHGGoPbuakA57nihg2KOM0vLck4FMQj\nNtAHenbug6U3aN2etPXGctQMVW+Y8Ubs80itu5xTsgLgVLAb61IvYZqP69akQbV6UxE5xJG4\nP93B9x6V8f8Axy0/yPEl64Xahfj29BX2DA25sdu9fMn7RttGuvsUGA2GIFMR8uasokZwVIJ5\nrlNSQKxwMCux8QZjZwM8nOa5LUl3L061aEYF4D5YKD5s/hWLcEqXz81b92pEa4696x7mNWk3\nEdPSrRJiXGQ2VOB/drPkl8tstGSc/hWlfSHn5cAc1m3FxvMeeOemKsllZow0j+Z91ueO1VZl\nBk7EL0PerzSOrkYHsapXSq6burKecVROo2OQsxyAq9AagabCsG+7n8aftyoYt+7z0FVpI90h\nL9M0xjeJOcY54FRTqyruU7TUjF41ZlwGH8NMMjNGGf7x6e1MCoxMa72GfSoWxtPmtt77quHD\nZOM9vxqpJ+7Ul1yc4wKESyFnjk+YAkgU1VfyzjILdaJpC2AMA55I/lTx8wXn5jwF9avUOhXl\nDFhu/h6YpYZNyvwdx4qRwsjFSCMcfjTfLCL9/acUgEljSSONFXcc4P4VDzHhh0zzmpfOKqNg\nxULfI2GbOOTQMdIxmjC5G7dwR0x6VHIn7s5Y9eBmm+ZtVio4zwBSMjrgsQzY/CmIDHvXAOGH\nQHpTJQzn5mxj0pWVioGPmHNEisF4GR6UCYjEqgdl9h/jTfvP5bdcZI/rTlYsnJ6fwntUEv7x\nlkB2kNyfUUAK2WAx95T+JFKxVpGbjC8baTP71iP4uh9BTfL2yOex70wDays5IwoPrTD8xXHr\nnNDYViScsRgUpiyvDY45NIY47mlwD24NG7d8rsCfT1o5k3FeGC8e9N8wtHhlwScCmxCMpZQB\nxg9KOI1OM9aRjtK84z0+lRFv3hCHdg/epgK2dh5yuaP94DZiopMSSbl6D1p0eSxDn5aVxrzF\nXA2gjOM0Mi9CcZpctuOwDdjvR5ZZFDNhqBERUqwxlT0P0pxxtYk4C9KVmXfuz7ULIAmNvGaB\niKAvyrli3P0p27OBt+bqfam7yoO3n09aTlgGOVYUwFZi3PBB6CmdGzjpQcyLkjac96WMMu51\n2sKQBu6seuKY3G3A6jHFEj8HKZ9hQV+VQB70wDnGGwPWnnof4lx1zQ2MLIVwDxTWUYJ5K0wI\n2XO7nlP5UpL+WCDw3NOVQuSxwT14pqzfNgE/iOKkQgIyCRnttoO3dhWyM/lQshZgTQqogJA4\n3UwF+aNSSdy5pu0bgM4zTVVpM7em7pUjJs5zwvUUmO4hkbzAuQwx3qGNVjJJJB96lMiuFYrt\n96VcTKQTubscUAMRjtCse+Q3pQ7FI0BU7s7c+tL8xIUFfl5J9KPnZss4Y5yKYDWZkIXABz1p\n/JjIxtZTnbTM5UluGU596d8uN2fv9BTEJLIqsNwySPuimcLGOd3tT1ddwKjIPr2pu392wzkg\n5x60DE8wfcKcnp9e1KfljBY47HFG5toJ/L0pG+UkucgdMUgHfxZIyvvTf4OOO1BdBgs2AetO\nkwRsU9eQaZRGW8rbn+HpS7c8njcc57UuQRtIy1M2naATkKcgUiRZJA3A/h5NEbbsZJOeVzSI\nwG/zCDmhQNgCndj07UDHHK7j1XHOPWm7gypkZUjgUqx8ElsbuMUbPlCgZAOP/r0xCkBV2g55\nyT603DDgMPLxjPrT9pVizfN23VGx2gAcqeTigQ3cGcO2Qy9MVIrkguVw3U0xlDqWJwR0p6v8\nnzjAal1AiLCTlcA9frTyu4gZwG/Q0xoxHnC/N1IqRYwrZ3Z7bfegfURcpnbk849qQszSYGM0\n9W2qQVx7Uw4kYkSbD0pjY4SNHGWcjOcfWm/cjDYPzDn2pqRsYyrHkHIaiZ3DRtnKDj60hDWA\nhx8278M9amLZ24AI/vdqHwW4ADdqi+ZlJI4ouAxsoynaQT0xUh3eZgcAj9aQOU78bcDNIGaT\nao5b1oEI7EDZyGU0jOxkG04B61MW3OwK/wDA6iYKGG4NvB6YpagKyBl9R0yOx9aezbWjBXBX\nnbnr700t5mTnZz2o8xeVIKnHBPNMAbDqSwxzmiMtJlsEZ4Gaco3L8pyV9qRW8wscgkfxZoGA\n3btgGBjqajjXy1YnnnB96kDFoyrLnng+1MTLSMAdu3nH0pjCNdpPO1/6URISzBmx7+lHnBsN\nICGb0ps5Ea785OegoEKrKq5x944yakbLMAuWGOTSSbGwNu4/xfWhmAhODs9MUhDWwsgXG4Y5\noG7J+XPajlo1CjaTR5jq2MfIOppjHbvU7QtJgbTjndzTXkG5iPmOOKXd91mHOMe1IQ1MMQwy\nO1NLbGbJJHXFTSKYwBuAP3sUxVUQsS3zMePagBnmbYTtBLnkH+lObdG3Ix6rQzHyskcDqo/n\nTWUKRnOW+bP9KYARvUkcAnB9qdNuk2qDyBjPrTtys2H4/wBkUxmIKjOAOlAdQjVVBCn5++aV\n2PzDON3SiX52XAwe9Ei+XnncfSgYRKPu/gRSLI3mMVGR0206RvuFcbqQP87R4wO5pCGSZ2fL\n86nnNK0haRQOGIxxSKFVWJbimBxuGMhs9famA98rHgnec4NHmdsY7c9qQkRg91zyPWgrn72e\nf4aAFYlmwCU7ChWcHltwBzTix3DHK4pgztKnjJoADGNu8AsfbtQrFuc+1LtwpGeRyQOpFG6N\n/mUbMfKVNMBmdqyEjKjnFSfejR1fCMKAyM3zD5G6U35V37jkLxj+VIBxXoB0Hb+tNcDOBznv\nRtwyhPTqabtK8A9Oc0AK0e04HzYGTSSjjqOmaX7g80nBNDFPLVgM56CmK2oqsq7Qy546CnbP\nuY+U7u9R8DaxHQ/lSY3QsWYqd2QT6Uxi/fxluRRtwwDHb/Ko3XhcjntinykDb/F2IqRCtt48\n3LL2xSO5ZgFbvxnrStj7P6Nnmk2r5gKr8wH3jQMcyttIxlehNNb5tuBtOfvU+T5VyX+cd6Yr\nPu35GzvmgVwOFbLDkHt3pS374jHH3qVJAyuWGSeAaOS2MYG3GTT3DUFY5JAz7U5mCuCVyq8s\nBTPm8sYUkZ5b0pQw3OArBSMUDHPt8zPQMOB7U1mEkihONvSmK58zGQ2BwopFxySe3IosA8/M\nGG7J9KTbhgrEMCOM0xcbScFQeRmpXxsXAyTzupi8yMqWY9z9eaUYQ7e57GkYATbg2DjmiQqz\nBycsPajUdwTaqEHgj1pd5xkdW/hpHwrZPRqULvDEnOOBSEJNIyybQAnHIqNmIYErweKTeNoB\nyTnnNTN5q4JAweKQxhZTt284OPeiMlWO4+vHrQsigK23BzgikZR5ayHox/EVVhCZXbh1I54y\naWOYqpfHfFLtG5g43bv7vahWHRfmCjmpEN8v7+TkfepyY4ZsAY4aiHnOTjPOPal4aXGcLj7t\nPoAi/LISw8xSMhhTFYuxYjyxnpUiqFVimd3bNJuIXO3/APXQA+Fl8wFuQOaGZyxJ+Vc5yKjk\nk3bQBhj1qR9qKMAhvftTQCHKscjIPcUmNrLz8vWn7xuCqd2erVHtDM5LZU9qBdQ3q0hIO455\nHpQrK0m5uvQUq7kXaseR/epNoXAOCxpjD/lsee1Cswzg5Y9aGXGTnp70iB4ZOdvzDqDUsoVl\n/esc/KpxSySMyghMAHj3pACYmAJBzycUNu+Q43Ede1IWg0ycHCsrZyfSnsqjBQfe5Wl3KZcg\n8dajZvnJVSQemO1AxxYJnBOW5FAbcgYDvgijeGAbAHb3FIcHcdpJ9u1PUAibhjsJ54PahXSR\neN2V9KVSI40zu25oVVaQsDtGfuigBGU7Rubk8gUrYj3BssmM8HoaNuGLN2BxQo8zYetMGCsV\nA2qGXGc0isGVcKVOclqcSqMRjH+z60isvXd/wGmSGCrHa3y0LI24Dd8xGDik3KzMM4APHOac\noMbOYxk4zzUgIFbaQqjI560jfdA3fMODRt3FX2lc8Z96Uhm5HIXihDDopG7DHvSru8vHWQfl\nijjgBfrSbWHXn3FMYZaNcdPemqy8DGfak3Bmwx4PSnSRqu2MjB67qRI7arKMnDg9KNy7/Q+t\nMX5lLk/dPQU9cxr0Dbucd6AGyN0CAZzyxpw2sNzNiTd0x1prKWyOqjp9aNu4qSOAOSfWmMcF\nzKQwxj1pr7VTI6k96V23Y3cc4zRwc/LnHT/GgLgwJ2gfM3oKczDJ+XDevamM7JjA2seAaFjd\nt25sDHJ7ZqRjkB+ZSh56kU3EbcqPLC8bf609dzKGEmwgYz7VGflfLY8vt9aCeo4ZVcbQCeua\nN524HfrntQyqPmdifWj+Dcw+XOAaB2I/L3Z38gHt3qdVLNgf6z36UxcRyHsMdaOF7lu+KBCc\n7CHG1s0eYdoQHkmpNv7wh225GcYpqpuYgDnsaZQSyMiExqMjr9KezsSp28EZ6U3a28AEAd6X\nb5LsxkyQcBTSAQLtU5GXPIOaApXnIx1yfWlkbC7usmeKarBt4C89WPbNMBq/eBIJYnr6VIwG\nSMnNJIwXaOQx/lRvZQWflchR6igBM+ZiQPkZxTiGXJJ/Kgr5it2A5+tISVVCTuJPQdqAFC5f\n5h8uOQPT0pBt3crheg9hRGzLvB+9nO7tj0pXyyZbjJzxQAq4UEquWPH/ANemF1jjACseaVYj\n5e9SUGcGpG+78oJ47jpQL1GnHJ28kd6aI90eCpPP3qkh/f5UHlR3pPMKOpJ+TvxSGHmBn2qd\nmeoPeh8IpOCWPGfSh2+0MMYBz+NLtLOdp7/rTFcQspj3R5YrwVxSsPLUbTnjP0pG6H5yCDkg\nCjYVYHOd3OKeo0KrgqG/hzzSqRtYZ2nrk+npTP8AZG0DPKk1JuVFIkXEY5yvNIkjRvmwo4xn\nDUbvMUAnac8e9IA3VSWB5BPBx6VJMpZlbheO1ICOQhWIY5z/ABAcU5l2sU27lX+HOPxpWZ9m\n7gxkYKkfrTFG1ijZJI4piHTZUA7ge/8A9akkxwFUk9aXCSRLtYOQe/rQyleAeO4oELx5xflc\nCpJGwu5Rk5xUSI0ffeG9ad5gVTtbduOAMUCCNRtKhssOTTlRGyRJtfOQKSMh0w/ylv1FOjCe\nXuxsGMDNXbQdiyrFmQgd8cVZ++JN5JB5XbVOFWSPDHk8hu1Spu28J74BqAsWI2DFRyOPSrdm\nu3BD4ZTxzUUfzY2gZxwwq3DEryZbG3HJA70iku5oW/8AqyWb5j1q/bxHy8bsFRnPtVFWDKoC\n7SONpq9b/vcrnHr9KkbRq2bLhGDbm64rWtmV334+XucVj2IXeig+y10PksYwpAAxkgCkxl6z\nUs3Td/St/TYR9oCnjdwBWBptvIJV54YevNdXZq00i4whXvUvYcToNNtPLOMYw2TXo/hu3VmB\nHBPAAHWvP9HheS6i8xsqSAcV69oelia4t1hwTIwX6VkaWPsD4F6ebPwPECpSQthhXorH0GKw\nvA9j9h8M6fGOP3Kn68VujJ+lZvcfQjKfvMnmpNxzQq9Rik5pIA/Cl3Agn0pFbHBFJjg1NyhV\nbPNKWHYU3OMcUbvm4pgKrB15GKX0App+lO470gALluaUZC4PSm7cL1pwb5cGkAq4x0pV9elI\nrBaGOTQAgHOSaXbznPFIqnJ9KXhuKoOgnOPl60dAB+dL91ie1NXJz6UiQJ3cCk649aUHb1GK\nT7vJoAC3Til3hWzik7Z6ikb5sUyrig+tPXKjnmoWVsggVKo60gFHbjrRj5jzinclRzTOGY9q\nQBgbsjrTgQT0pANueaRc88UDF65zS8DHHNJuxSLndk8incBfvMTwRQD1pF6daFH1xSYheMYo\n7cUHAX3pOetAw28UowBgUhz96lUgsCKYhON2DSkY6UY3ZNA+6KQxOxpVXchNAbnFObKrSAaF\nP1FIzZ4FKMqfak4LUAKeF4FC4YelJzzQ33feqQhVGaAw/u5NGCcE8U0KQ3XimMVe4NLnoSOa\nRvYUuQevNAhJMckcZoRt2KCp64zSY9TigY5cbiaDg5pMYGM80u0rUjEDDrjmiNtynPWjB28j\nGaVV2N7UrhYF6H1pAp3cnilA4JPU0Kp3etUhiOcfLjmm87vSpOxPeo5FYLnNMBynqMc0c8fW\nmbjtHc04tg9OKAFK/ePagdKYJgCB2p45pEi8UbfU0uOOaQKSc9qYwzu6dR60N8xoX5SxPIpO\nFIx1oGO5/Gmqvze1K2VbI5pNpAzQADkntS8r1PWkH3Pej7wzQLqG7atL/CWNHB5xzR9fypDG\nKrKME1KM9O1N47k0ueKLgLu9BTIx6inKxVaac4pgOUD8aHBxSFsc4oHfnigBmMc9aeCG5ApD\nxzikZG28UAKwyT2pPu0gY8A8GnPjFIBI2O09qT+LmlVht4pQN3amAR/Mfap41LZwOabEu1TV\nm2Q7lz9aFuQ0fLf7f/iyXTPh/p+jwPhr2Us8eOWC84+lfm3q14Ly+YONkkfG019afty/ERtc\n+KIsky1tpcRTGeNxPp/ntXx3qEjTXEswHzbsH3962RMuxVvivk/vh5ZHTjmqMv7vYVYyKwwR\nVu+81jGCVLdce1Vmx5LkDLA1ojIryTC34VWDeh5FQvcOzAsPl9ankldmAIXaBVSZlnzt+Xjp\nTAdJNtcSQYYkYIrJuJJF3AqrHd/Eauxqwk3tiJFOBk8VTkfczFlwDyrdaYFGfftVjyA33aqu\nZBI+wgEcirMl0twfLXBYdTUDMFZiBvG3kZq9QIpGBjXJwzdQKikVlKlccdR61MUIUDGNwzmq\nzRhI8/M4z1zTAR2LIVkTnPXtVC6mLXgjKqFxxV6Reincd3Q1nyqs0yiQjao4x1qkSZkzJuIi\nG5Af1pFUFehDHuacJAskhA79KRm8xeTjmgCPyZVU7GCKvPNWVaMMHzlyvSo1zIzIWy7DC8U9\nVbaFZlJXgkdRSKJ4W+ZS4JbHA9PerUe5Yzwko6g9wagM3zKE+ePby1OtmHzBAdp60BYmZpPL\nRvLXeT2omy33iUOfWhQY4wGztzzjqKMmMDq4U56UgHyTlVGGyehHeqbRnzV2MAjcHb1qyrSL\nOXJUIwznH6VBx94DaSfu0CK+3arIwChRu609VdoX4Azyc9aS62O3lhSGxznvUblQyleB/EGO\ncVasJiyMygBCdu3kEcUuwosYVSA2Oae6sj7OiuOPpTfM8slVc5xgVIhrKVbYrZ+bLMe1SWrN\nO7mOVSM4DD2qGP51YEcK3X1rS+wKlqJFKqW6470gO88B3bTXUYRcydB/u191fD+1Fv4YsgBj\ndGDnH6V8O/DGOO41CBvLMe10P8q++fDq40i2wP3fljFA1sXNpVcAZ9qcq5IAFL8yyc1J90ZH\nFIoQDa3HJp4Us4xQrAngfjTl3KNx70XEG3uOaVlDDpinJnnjAobDH7w+lAxIxlMD1607aPM4\n+760L0wKcvyLjGc9aY7EeMsT2p4Vv4RinAfLwB1qRNzLnHAOKQWGJkxnOBTWX5eODU/kjcOw\npfJJAz60wsVv9ZnjmnRfe4birMcexj/KkWNfmJXFAEflv60Uu1qKYz7JVdy4pu3bxjBp3Py9\nhSn73PFeWdYIrKp54pFb5gSM0pO7gGkZSO/FFgA/e6daOYxkDdzS89qRe/NAAzdDSqu5jzSb\njsGRmkViuWHHtUgOfC9Bk0YGaYTuYZ608N/CeaAEYg/ShgWX5eTS7enpSocSYHSgBFXtQvy5\nyKfyvNIueuePSmApwy5pir8xK9u1O43A9Pam/MqkgU2A5uOe9EeO5oU7lOetKmNw4pXARl64\npSjDHOR7U5sbvag9sHimAq43AZpOMEMe/FJyxHrQVPfn0oFqKW2npRzsHGaOHyCcUv8AEADx\nSAj+8uVHen/d5xxSLhWYEcUv3eKYC/xHIpN2cnFHOKTaQvtSELtHWhnHTFAUk5oI+bikIN25\nsUoA3AUcA0w8HNNDsKSN3Ap3OOTTOWYDoKcylskVQhd27k0mRgc803gKAaXbk9KQDVPJFOZt\ni4NHAzRt6Z5FADkwpGelK21uc03ad454pduzkjigBVbcM4xTto25B5pOvHSkYhcY5pWAVfmw\nAOaep28Hmmq3fnFOz6UAWIwVYEdeorwP9obQ4oZ0uA3MgOVx3r3eOQ/SvLf2gNMe68PR3A+h\nAouJnxJ4izHK6OMHOBXKTx+ZkHgDvXceLrUfaG7qOtcdcxD5lR9retUgaOavv3eUPUelZM2F\n3DPOK1723K7mx8orJmZeMjk1qjMw75GMmCPlIxWY1q8cwLMMVvXFuZJOu0Ad+4rLvLYsxy2R\n1x7VRLM6Vg0hyv5+lU2YRttHzA9BVm5TBKj/AHt3tTYYY/s7FW/eHkZq0Ioyf64gDYij7p7m\nq+4jP97Oats67TkbnxgmqzOg+4pBAxk0wImB2sowd35VAobaFHbpmnNG0x5GFJwaZIpB252h\nentTJYg/1WDxnkj1qurMp3ZwnpUm1lHI5A71B83AbkGhDI5mUhSideTn+dRsu2Qbx0HBFTMM\nyA7fkUYpv3VcOpw3C1YiFtscm5mxu4FI8avlC2Wo8st5YZc4OKVwQzk/Lt6ZqQI1zFtz0zim\nNFsRiw3HNORg0YLcMTwKcrCRzu4A4qhlWRfJkA3DBHSmYK5O7IPAzT5lYsZAN4zinTAYjAGW\nzkYoEJEwEh5GV6Uxg7K3OATQsf7zLDGetLudVIIyvqKBka5EbADPH3qRWKqoZOSODUshKquP\nmT1qOaN1Iw2V60CGfKMknB/iqNgFGc/T6VKctGHZOWHSolG2Nkb73b2oEJu3A5GG6DIoXjt1\n4NLvbgtzxjpQFdgA5xzxVDBQQrBjt7BhUcmQpL/NkcEVLvPKEblzjFRGLao5xk5I9KTAYzDA\nBBz0pqKnzRxls5+9T2Cq4Uhm5yT7U5cAMqAnJyBTCw3YqqMc+5pskfIydtO27V27O+aWSM8H\nHHpS+QEfBbCNyOaeZd2SCAR60xSijBNCRh8qOfagQ7ftVhjcSM8VC0z5AVMHvmpyvlqVXqvT\n1qOP94pZuDVDEMa53Dkd6RmVvlJ2ntSYAAHT+tI+A2VOG96ABmbaPTPBxzT/ACR0De5qHczM\nBjnrTy0kcJIwcHigaJWBWQN07ZqPhR97ODQsheP0brSSYZgM/P3qXuA8bpCrKfpmmSbn3Ifr\nTd3BXOR6UKyopRs5pCsIV+6pGW65okZT06ng0m5eXLnHbFDL0K8s1MAZgnQYHcnt70blHQ59\nKSb5cAjcvTPrRIylsAYwPyoEKoOwnHBPNMK7elKvzRhd2aBsXnBPbpSGNCiReSSevtS7lVd2\nCgzinsq7gFcEdvao2VtmCcnNMA2qyna2DnmlKjaBu474oPzZ4G3p6UiqMbPunsTVCF5j3ZGS\nemfSm8+WAQMDoe4okLxyA7dxxjFMVCGKHJ7/AEoHYevHIGT6GkXG1s9+KVW8w46bfSiJf4iu\nAe/vSAcqsVyvIUYqJkKdeFansSinBwDxSEHdEpOF28k0hDl+QHcAwxxUe0shyQjdc0rH7qs2\n0jpS7ssqYyc09RiLwocjnp9aGUtgjAHWlfc2WzwONpqNFIUq3B65o1AABI2VHzD7yn0oR8MQ\nowMc8U9WPmEgcY5zSQZ2kAZzTATA2n5v8aRv3e0Z4IwTT3yql9q7gMDac0jKylHY9sk0hjSo\n8s7zgL/DSKwUlcfL605vuE4zuP3aXgv5ewk43ZoEM/d7iC272oMhWUMo5x0obDDcFG7sKduE\nb5xu7CkIap/d56c9f6UjMNo28EnFLv3RkFMYoZRkj5nUUxg43nfnG0Him7g0af3n6UvmFlJA\nwCcUpzu/Tjt9KAGFm2nY3PvUjYbblsA8fWhWMsjKigY5yelI2fm46HimA1oyZDgZwKaQvlyY\nbgdKViXbAyc87elIckE9dx9P0pMBWC7VB+bjGc0uUZcq2GHFIpG4lV2euaPmY7SBgDORTECj\ny+P4e/NG8nAjxtJ6daQKf4fmPcetPZtq8Lhh0FADR94qRnnpTpG2yKQOR3pu4rjHLHvQxO3a\nfv5yKAHfNIxJHWowyowGwIvQmnMHaN3B2jOKYzhMFju/CgAj3De3X/ClVyVIwFfHX2o3FgW6\nnv7ikdcITkbm/QUFDjs2gqc8VHGzIjDILjkZpW+baRxijaWmxwMigkduG1Wdg3Hb+tNx1Zkw\nnrSRxgLgr8y9Kc7F/lf5R39KTBi8bh5jYX+9SR/K0uV+YH5V9qVRvYrjC46mljwvmcckHApj\nFUjcMqASMc00MGXBX7pximIojh+fIZuAM075n2rtz2JpoQ59rfN36HFMZFWQblLL0pz/ACqQ\nvyhR0oi3HcXONozSGRrJt8zI24P6Up5fGcEjIams25efvKcn3pZNkku4/KfbpQIRlHAI57in\nFipB27lHaj7+ATz/AHjQzbyQPlPTNADlCtK7tlSR26VHtZUDYyw6e9LiRc/OpprZ2YLYAoGS\nTMEZdv3uDt9KaH+8d3BpAx8xOx/vUbWjJDAYzmgQjANIrEfKtOPzZUng8g+gpGIaRtoO0j7t\nN2/JnPHQr6GgBwJWMhRkdwaQOqMDg7cYz15pH37cjjHBNHK7CDtGfrQMVgIztL/N120Kx29M\n9/moIVJG3Dn86Zjc3PC560APMyrIHKlMilwxbaeQR96glZAQWPsKTjHJwfSgRJ+qjpimSfMo\nAXPcnvR8yqSH4/u0Mjr0PUdc9aAGNlosbW3g8EelSckdACuOBSHAwWbn+VIpMfTkH+KgoVpW\nZlzGOeq0xtxIJAAU0+NPvkt8/Y9qagEjAHgn1piEkztycbic4pNxfluB2U06RfMyAcU1o24V\nziP+9SAOq71GXJx9KduVUJA4/iFR8o21eOcU/YUj4OCetAgWMcAvkE5X2+tN3O7HYc4OCR2p\nYxsAGRmjy2UsxyueoFADzty2RvbHJ7YqNNjhWR9yngUFxBg5yOwo+982fnI7DGKYhxbb94cr\nwabuIJHbru7UpQurHBLUig7eG3DH3aewajl3sdudq9aSQfIF3ZbOSaau8R7eN/XntTiv7tmz\nUjGbvlBAxzyaWTZu3KcRjuKcPnUAcZpjeX5YIBEgbByO3rTGLn5AsZLIzZxTiy+YynPB4FMd\nd/VTgHII/nS7T5hcHcfWi4hzbFTzHOBnHSmL82Qy4HUVKy5jA4Y/exURw2JJW4PTFACyOdgA\nHy+w5pTtWEEHLE0qKRnJXAGQTTFI2b2GW6kHvQAFjx0IIz+NNaN12q7ZXrTl27Rk5/pTWjZh\nuLZFAhW2pMMgkelNVV6H5lz+VO2v5YX+In71JHlt4HG3qaLCFX+LYudvO6jcFx23Dr9aZnEi\nbWwD1alZSAcncM8NQDCJABg54P3qftXJOfmpu7ch2nB7051yMnnimAqshblty+3BpqgRzsue\nOgFLtCsNy/vCKG9CMY700gE2yw8fKcc0M7SRnd8p605dnBYdeM+tM+9u4JFAEi/OuAQOONoo\nOWAAwfpTSPL2ALyR2pD914sY75FIF3HOzuqjnAP3VpixhnO/PtS5zCoUfN1xSvuk2nOW7tSA\nPLSFlBTIPU5p4LfvEIAQfdOO1I20s5wVOdozUA3/ADNngH5qRSJVztClvpS/eIVxnHTbR/CB\njryKGZfMVmBO0dKCRg4LfJubuaGkG0Y+Sjjb1O5j0ApdolU4GNtBQmFXG4YoRiqlC3BprN+5\n5GSO5qSXHTaBx19aroAnyopBbOOlMwrJvXnnNDFfMA2cY60qsF255T+7QgFaQNltm4etGS0W\n3Owg5HtSS5XOed3TFO27/m6hRyaBAF2sCx3r3IpNqkZC98gUBkjjwY2YnkntSR52EZ2seR9K\nTCw7Pyqp2j0OOaaRgli35Uv3WHybiOefSlLfNuVenIFIBv7zywykY7DvTnbdEE5D5yTSAjcG\nYYOelLu/enjPHemMTA2jnlv0pUwr5ycAdKbGEZWLhg3Y06NMqS5BA6fWqAjj2j5WTzFY/lT+\ndw4yi9d1G4soYj5xwaGBdVAXnrhj2pEsXIbcAu0nmlVSuxgRnpTGXczcYPUc1Iu1o8jg45FA\nyNgFLcnPcUuRjHPSo+uAeD/fqVjtIBHGMBvWgQnyqqrnOak/iJBwccj1qI4GCVwKcCzMCo49\n6QIUt5kgOSVAx0puQACNxwemKXa3mYVgN3OKXbtV8nAxQMYwEnzdHJ/AU5WRmb5cnGAaRdyR\nr8vysMZNL5e3DLzg46daQeY1SIUBwWzxj0z3pysFYkjevTPal2vJIUO3HXFJG3Ug5I4xQFxp\nAYYOVAOSfWnHARtwKnGVX1psbP5ZyMEHJY+lP8xZmyo2Lj5SaBCs2Y0Azubg57UnKsUH3l5o\n3BlBJOe/FGG2ccn174oKHKvys/Y02NQWJILbuWpW+8D/AMswMbqbjPyu2F9fagBy4Ubgwx0o\ndGSQIq/K3U0nlpI2VXgDG2mfNsGCTzxQA8KFYE/MQcc0+ORV3K68c0n3eThv8aYONuRyT36U\nCHAloVZl4B7U+P5f4eW5xTUeSNZM/dPAWkPMK5cmUdxVCHBdrDGVJ457UqL5hKhsIp5xSEbg\nXGR6hutNVRyVyD6UDBmQEgce+akVvLPmsS7YwBSBxtJwA3TGOtRLIY+UGFFIY8MG+ZAcngmk\nXKxlAclTkmpPM3JJDtxnkMO9IqMqjnCY5A60gGrjzgQM5H3qkMi8hhhj2/rUS8xnbwueKdIx\nkUDG589fSgVhpUbc7io9PWnSFI2jJyzddtKUDMeMgU2RgpUsN7H9KeoDZtrMwIOcbhinY+VI\n853DOM80rMYlJUB89qVo1mWP5CDnqODmgkSNjIAMc9PensxZty4AxwGpqs8SsTy/T3FKVZkB\nAz3pjGp5smWbhfTFHG0s5xngZpWAVdy5zjqDSbTNGrrzkZO7saAFAz8owZFFIys4CkZ4yBQy\n/MjKcdiaRmVc7iWAODigmw7cWAIXDYxQp3bV+7jrjtS5TbkHnFK0aFUYtw3BoGPAKs24h16j\niljy0att+WiJmidTt8xF4wP4qdGzwswYYRjnbjp6UxMkj+fEbnBzuOfSpGXdubBVT0Hc0wqJ\nPmUEkcFqnX5kUtkhTww7VAFu1Y+YnTOPu1o2u0SfK2zqcYrPiBVvMAycZzVuE71Eqjv60FF+\nJvNbft6Hp61ehiP+0o64xVBcyzIpPB5wtbcan7qjIx0qWUWdPhGQcYCtkHvXT28nmLndtJrA\nsGUrj7vPTFblqrS7ABuOeRSYdTV08PNghcqp4Yda6vSbF5FLnjI4FY2m2vlKNmMg5FdBZzeT\nIhYkg8VmyjqPD2mkSqzD5VPX3r2b4W2v2/xJptuWwBKG3f0xXlGgTMzAqPlHX61738A9Ja+8\nZ2x2h1jAlc+nNZMs+vdPQQ2SR4+7wO3Sp93PXFLjGccjNNK4cZFQMkC85zTH9aPWjG4AZpDQ\nh+ZgaXG3OKbS5LKaBgMdD1o+X8aOi5PWjb0PWgBdtIQWo5wcUiZ60AG4Uq8ZpNp9OKf1FIAV\nhjpmhfmHSjhaC3y470gHZ24GaRW5oXpyOaORTASQmmbgy5NOJDDHek3KcbRimJjZG24A5px5\nxSv8uDRgr1Gc0hiHIYY6UuR16U053ZzTm6+tAx3O2k/SlWTrxQ+1gKBhuK8EZpSQV9KQthqG\nPynjNIQe9Bz2NNRjxnpQ8g6AUhDg3r1owW47VHzzxzT+fWgBQBj3o9BSeo707+H5jQAm7cxB\n6CgZFIWAXA60rfLjvTGLkcUnQ9MUi9zS7iy5qgBTj3pVYNmm8t9aRc7SKVgHHPpxSqN3fikU\n5XmndAKVgGt+tJ1X0NPfsab/ACosAm07R1pdvvzSq3qaCRyRVAJ265oHHToaG5XI60KflAI5\noATnuaGYUNy1A7gigQ5fmA5xR8pyKTkDI59qQfe5HNAwXhR60p+cjBoyQT3oX7uTUADk+Xk9\naT0NOJDYyaRfukVSARc7j6UbtxNCn1FKpHTHNA0JuKduDRgMMk8dqXBOeKXaKQxNgwSDSM3G\nKew+XAHNMZW7imIqOCvB4qWOUsMY5pJIyzbiaFUjk8CgdiaMl19cU/dt4xUcY/uninr8zYPS\nmAbqM9OMUqrxyKQEgGgQD5m65oxz6UK1O28etJgwVML0phPT+VPOWzzzQFoGM53DjFOPWj3J\npfUjmkA3se5psZyacOOTSKw5wKYB3NKGHQcmkXJyaCmOQaY0LzjBo4o555qPlfegB/PPpSEt\nyRSLIfTil+9zQAg5601W2tzTmOeMc0cdD1oARVOeOlSg/Ng9Kbyq4709RlsY4pMCWNRuznNJ\nqGoppOlXmoSHYlvEXyfYUqLgH0HPvXjf7XvxAbwL8Gr8wTCK8vmFtGucE5+9j8KI7kvufm38\nYfFUvjLxpreqytj7XdMQQc/KOB+FeXNII9yM3zNxmtvVJHmmZWfDDj9K5yTDrKVfbIgIDfSu\ng55asr+d99XyZl4BqOaQrGvOCTzinQ4uo0Vjgjljj9KikkCkxgYGeCwrRCIkcSeaGB2r/HUR\n8plIXkgcmnTlVhIKtIAckL0qA5RA4XGenrV201AqTYkjkEuQmeM+tZrsu07mPAwOe9XdQm3s\nsm3d22+9Z9yrNwUwTyfaqERNMruDH8h24PuajhZdx3DBHU0/yl3AA5Xv603y1+cOwU54SmUN\n2+chBY4J+Ue1IqKnK5C4wG96XeVY5A8vHFG5NvB8wEfrQJle6zLCXYEFP7vesi4jVWXax5H4\n5PateckYVj9MetZU8gk3qPvIclaBalGaMSNhl2svShmEYIADbhg+1ExHmLuJYE4/OkYKpKE7\nT6etO7ENkZo1UAYb1FSfu3UYQgEjJqEKOh+9/dqyq+Xt4wp6knpSGTWrBXeJgWxzuHf2q2s2\n1GATyl/2utVlgzk7tqnpnrUrMs64d8DIHIyaljJkDLyDncvWm4dIwwk+Xuf6Ux2eIqijkZxz\nUiho5CeCWHTsKoRC0mFbaoG7+InpTJIVjABPGOPQn1qRQtwxPTtTZVSSPJ6DgEnpSAimhkkk\nDJhdq4INRssZZUUAKeoP+NSsBIepAC/epI4S0aq5CpnOT1pgRxqxYE5A7Ac02STbIzAbhjBH\npSnO9VAwo5BodY95kUt0544pEjGkFxgoSDnO1RWgk29sZ+6eFz+ZqnYqeQy/K2cEc1ahtlcs\nocH1A64/woGken/CYeZr1vHy/wC8Ug+w649eBX6Dado9xb6XbZRkVkBCkc4r48/Zg8MDWPFV\njbbNz+YNvTO3I5HHWv1i/wCFbwzWsW5MMEB+704zWEqljSMT5fltJd+CpP4U/wCyuoGf5V9D\nXnw3EnKxqoHBO3tWJcfCZPm475yBS9oVyniS45TbzT4vmBBr1K++FZScsqMgx93vWPc/D6e3\nY4QnHJwOfaqUw5TjU/1fJpjBNwPSt+48NXSZ2wtx1zwaoy6PcMMCEgepFaKSYmrFLbtAYYPr\nTo4/MbJ6Va+wOq4kRk7UzyVXACsPWndE2YxYwHPGB7VLg7So4zzTmXkHoKGQSYOcUXQWGMox\n15p+3cuM80hJY54IprYXlTzTGPZRjg4PekKk4zyKU/Mu6k2naMttb0qbi6i/hRT8j1FFAz69\nkyMDtSbvk559Kex49aaq5bv715x1iLncOKXcOrZpWb0pGznJGRQAu4bsdqMr+NNZf4s4o4ID\nYw1ACtIQvFNUkAk804ikUc8nApMaGq3OetP5I4FJ94AquBTkU84NIQq/KuetDN8vHFCrtHtS\n8bvamAZLH2pOi7s05W3cYxTdgz1yKYri7skAjPvSvuXIHSnKp29KXaevWhgRL69+9PVg3tQe\nG4HFLtxUhcXGOtN65GaVs7s0deRj3pgLjOCKGOeKN1ITnkjBpiDv05oA25z1NDHb8+OKPvfW\ngALHbzwKJPugnrRuGNpoOe/IoAU4zgdKO23rSL94Y6UmTuJA70B0HLkdRzSYK5Jp27cv9aZ5\nnGCakBVYL2yaGXoetJvHShV5G000MU8cmjdxntQeDzzTiu5eOKYhqr81OPpRu4BxS7TwQc5p\niGKQ7YxilbIbGcinqvYjmmlSpNIBWYdO9HpS9hkUit8xGOKAFVl2470Y2qcU0BuelLhgCe1A\nAASPrUiemKjXOeeDTtzL8wNQgJFwe2K4r4zQ+f4OkKjAiIJb68YrswflzjmsTxvaHUPDd1Dg\n9M9M1fkB8IeKLMtcS84GTkV59ewlGbjvXp/jS3+zXFxHhldXOdw56157qygxsy9aYHL3p3xu\ngOPaubumELYI/E10V1HuVjuw5rntQVt5wPu961RD3KM90zsSq4TH3qydu7c+c5PWtSchoSdu\nx+lU3U7VwAR3qiTJmgZmK8AE9aquvlnPGAcD0rSuGMh2FduOtZ91MwfytoZPp0qiSu3EpJwq\n/wA6q3ShflTGTU8sm9sY3DrxVNZhtLOpGSQKYhN33U5P0qCSMtIwHzd6n2tGA4ZcCoHVo5iQ\n/Dc8dqoTKwkafPBU9OajdQsmwMGwvX3q8du3cfxNUnhWPcw4DHNAEcm7ATeBxUTB/OUyHhBx\n9amVC8LgjBxw3rUbDEIdjxjHrzTENZSx37tq9TUUUgbcQ3mjs3/1qXb5nDHP+z6VGxZVdABx\n/EKQMezEyCRhhQuOaj2r5gbPykdKYzEIF70ZCrj1qgCHb84PA7GhY8OMHjFNZQuBu47mkZcD\nap3Y6kelUIbhWUqeCT1pvBbanO0+tWGRGI5zxkVWhk2b22YGfxqbjHyEnp90/wAK0zncpyMA\n81Ju+YEDbxnNRMRyTyDTuIbuOGUnJ/hzUcineHXggc5qcqy2+E5Y9R3qH7rHce33aBjPmZdu\nPfNKZDKVBHTjNJ5rIoBHU0vLNsB5z8tMBm0QyJIOTikmkPdT071IF2qjnlfukd802YgsR37i\ngLieYVKBkzx/DRGxhLgcE/oKRfmXKqQc8E0Mpkxk4OPu07DDy1MZGTu6imqQ8Y5y3cZp7bWV\ncH5RxUcbDzjtxhT6UxCFxIxCJ90ZzRIpZmIO047U/duVxjAY4xUQXy9ykHAGaQDmV274bqDT\nJIwq7unOSaVf9WGB4p2zIzuylIOgziQ9cgjNNZkbaQNwU+lOuOFyg49qarOzAL8q/Sn0BIYr\nFWZsD6UoUkKOu44FEibM/N8zHmmxqsany9xw3NICVWjwo2lWXg0x9qOWA+ahM/MT83YGkXhA\nc8k4zQNjDgsOpYHJp+5txUDr3NAI+bnJXrTd0m0ZHPUUhXHBUUEdSegpF5GGGfrSBSzKejE8\n+1Ob58jqv96mAvLMC3KqaQZaVm6D071Go24Xf0OfrQ7AqTnEhPTvTAX/AFiiU4QjjHrSOX42\ntlT6fyoGzZgk7h2pVzxt4XPP+NIQzb8+MdKkwZI8lPlBzTWUmQgnB9u9K0pk+XBVOmaAIziT\nO8Y7giiXczRnHyng4qTIZQFXOTgCo2YhvLXHoJPSmMcyhg2T0PB9abHmMbVPX1pcANk4+Ufn\n70n3chuQW60wBRuj2lgvPJpJGfAQE5U5pdqjdgEoelOyXYHB3AYNAhpkLybT1POcUjrtjRi3\nQ4zTmXbHzjPtTGRfL/vc5xQAuN+F4C+tIflbIIYg9aGyx+RPlakHyjgZA7Uhj1VpWZ2OPajz\nl8wK65G3kio9jtkMcdyRT8iTPTAHWgBrLhy38OOKVZVGA5w2OPSkI3rj0HFHIIzjBGOaAHbS\nVPrTOJMIWwMZ5oVd0IwSG749KLiJRIB3xnPY0DBlDIH8zDjoKazMSMnAx1pdu/BxwOtNZcqW\nyWUHpTENWQK2cYOOaPMj5GdyqvenbSc7QDkcmmxxrwcZB4OKAFV/MUKBtX+6etDYiyN3XmmE\nESAoOW9acyj5wwyPT1oELIx2jA+uKcqlW3A4K+vekXONw4XHCmlUmReRgjnPrQBGxJYnPzHn\njvT1lJZgvBIyeP0pG3mPKrkZ5PcU4M331IAx09KBjG3YUA5PXFD+YzAgnAPT0oXG3J6nrTio\nkxklAvftQA1tu/eMuM8g0g+WRjjBPQ/0pONjLnvkUNId3yLuC80ECBvMyTwRwcUoxsB3EEU5\nZMsBjapG4+tIMsjZIXOe1BQ9l+UMnOOgpgzI0h+7laZGWcY/DIp+DtZTwfSgBNm5ePmpwVtv\nBUr1Ipm9fLyq7ece9O2r5mQCFK7aAG7d6DA285zSvs4bHA6mlOY02YBxx1pVGV5GP9mgBh+V\nBzwDzjrTcZUZHz0btrrtG5Oh5pWU5dmyCTxigYNgheqlevvS785f7yg0H5drMSEA5NHJ4C7U\n60CEJ3c9GPWlch1b5/nUcj2pJP8AZ5Y9qDtZtxB6YIpALtWNe0oPT2obEcYKN1HI7ikVhtL4\nyucZprKQ2zbjPOfWquUhFzJ8wGOxGetP2+YpKttBPOaRUDSbghAxikWP/lnnJJ4pEjljU5y3\nzAY+tOVAiiGT53XliKb5I3Kd2SD1+nam7SzliMZ6mgB37tmYoP3ePvN0pq52rjgDvQyKxPOa\nVgepbjH4UgGqoaZi5+UikbErNxtG3AFOOGQKRgZ4pFUecU7kZ3UARws3k/OANvepWnHkgEbn\nJ7Uvl7lx26UrMy4LD7oxwOKAGSRkrnG09xTWUM6knCgcinPjaJC2dxx+dOVSzEnovBWmAwTI\nyszDIXpikWQ7sqfmI9KUr5xKgbR1oO4Y2DPGDSGHzBtq9MZzTdvmKMnC55WljxtO04b/AGqR\nrdvlBPOck/0pgxGw6vtBVh0pY23bWyTtqTlZDzsDDHNM3lo9q4ABp2EIymRixbA60LhTkknn\nj3pEYKpcnj1qTfuRZF78VLAawE6sFByT+NKV2jauSBTMts3Jwc80hIX7u4ZPIpgPwFYZpBlW\n37cn0pzY644XncaDIzbZFX5e4pgJks2RwMfhTNx2hh8pXj61IcHIDAL1qOH5wxwCoOeetDGK\nrdcDOR+tIm1lO4kMOtJu+UhRk5yRTpFbbjuR92kIbJtyD909Qac8zblyMg0RvwSQFwMZqIbp\nCvUOe1MCSTbuIHelV/mxtwajRgHbJGPb1p7O27LHnHUUgYKrPvUvgYyCKPl3fOBjplaauJIy\nqNjJ5yKFZVUhSc560BcXb8u7d0/g/wDr0pUMcBSP5UMyx9M5pVkCybuR6DtTATHBfPzA42ik\nXe0ZJK855pY1CXDMx5YdKYV3RlegzTsMciPIm0Nz0/CmyDyTsUMFA55qQKuFAkwB0YdzTI0O\n4nqO9SITbtZWQ445zSNsjAx+8/pTwq7iDliRxTU44AwR1FACNGCwZxTmKqCz5PZdtBbP3uRQ\n3ChQDkc0Eir8o2sMMelN+ZlK4x604t82QMk+tIQdo3Ng55p3Aa7BtowcDildSzEDhlGeKVnC\nthRuzx0pcoF5J3g807lEe4t/D8zDrTkCnjHyY5pVyFOAWz3FGzapGOG96NRWEj2oRkZ3cjFS\nK/ysWPy9MYpi46EfLjHFNRWDFV+5RqIUL8wP3/bvTtoVxG/A67u1Iv3VBUk0SHzPlb7wPWkA\n1SI9wA3c4p7qygqeeO1JIuQAAAe7UwA+vTvQA6Ri23H3wKc23jC4ahPmJO7tw1M+98x60AOk\n+bqMey018NkgMTilC/MAnJ609JCrnI56mkFhjBlXhSd3Oc5xQsbthOGTqWNIr7GLDPv7U777\nbS3B5pgK27hh67aRt8bDI5JxSgZBUnpzxSBhuBJJcdM0WANxXzNpG7pTVYbgTxkcmgkH5SMN\nnJNDAE7GU880ANyfLdRginKgZQCe2TmjhVBIyq8YpGUqVxyGPFADWZVbK/dxTVXzGAB6c05o\n9qsFG455X0pQwU5CY45zVBcfgspKn8KY2GKqoI7tT9zfePCkdO9MbORgYYdu9SO45WKZx8ye\ntC52hmxtPH0pHb5SqJtz+tIrDhTwPSkAZ8vaM8ngk+lHdj0Xpx3pxHUleQMCmsMRkk5PXNAu\noqnIJ67ei0M3RQcHrn0pyqSVJwuRnjrTVxtcHA9GoKDcOvVDwMUOu35OuaI1KJ/Cx/nRsG3I\nbJPf0qiQ+7gNyO2KUs2Rlsim/IrEAE+/ahUC5yOo60AOxllOMJSDgsUGG6ZpjY8sckqO3enj\nCDIYhG70gF5l5PAAprNhQOWJGd2OKSZl2hUyuDk05mVW+YEoRxQwGtN9wMCM05vljZC2Hx0F\nOzuIAXKd80LiPMixluwzSvYBirwCOCFprOPLCnv60/5iy85Oc/h3pUVlZsqGQnpjpTGM3NHw\nOnTrnFCxiPLqWx354qRVCswxk4yCelM58tiSGzyV9qAsKzBZFIOSec/0pVl2lsrtLdDQqFo0\nwV56U5Bt4I3Y7mmMGG0DLZHemhiqhvvZGcClbG3ZjnOc0LheAcMD0qRWFT95yBwwxmk/eBdk\nhGBTlPQkfKTwvvS7Du3fex1pjIZMK/IzUjYPLfd3YpygMrGQ4WmAnynP3kb8xQAMd/K/Kc4F\nSBRuIDAZGG/xpm1lhRlHzY5p7ByNqr82Ofp3pARkJ5ZH3cDjI5PvSW8YkVdykt1HOKn3DcoO\nCAKJF2jPy49KZNhnlqpfaTJ75oZgjq6DO3qtNXMa+ZzyO1OjZeMgs3c0DA3W7cWX5j93FJuG\n4E5U9zTmjaMb1GUzxxSFWK4yM56UxjvMVizRjH8PvSoQy84RQMFzTFBj+YgKaGxJzuwcZ4GR\nQCHM4dWzwPbrTFYZKrkkDJJ6UqrHGC/zbSOfrS/My4zgN3pAJHlxtHHGeaAxUERsAWPNEMO4\nFtxYjjilhUsjMVxRYQYaPKr1Izupn2lGX5QWK9RQrI3O7HbJqTCx7VZAOfvCjUQkbZXIHUZO\nRTk+UqQ3B4ApzEyOGbkFeo/pUfylOQV5wppAPRTGxV2AB796RWMbFQu73FHlorKjli3UikVR\nmRidoxx61WoDmby2zjAxwPeo9u51VsqO+Kcu5gq5LSEYANG52cttyycHHtUgRqxyyYG3dxTm\nfcvGFOeR60reW2Hxk9TinsuVJbAPY00LqNkUMxZT2xtpyZ2KwAz93aaawLKNxyoHanQ5Zhn5\nR/DmqGSc+YuDswMU7zHXJdiV6dKafmfbzvHWnFfMZUUHGetCJY6OTCgDJA5q3HMquV2kbhz6\nVBGvluUQcg9McVahZlYghSeuTzik9xos2v71RxgYxVi1hMi4C4KnOKjjX7rA/MT06VchVVYM\nSWY8MR2qSizalRdF1HI49q3bUmTaDjJFZFrGpnYD7h5roNPt4R+9f5dowKBlqGNV2k8BTmtu\nx3TK7ovHXb61lQwtcyKE5Tqa3tPj+zl1Xgle9RIDSsFlZim7awGSa6uxQNHFlAXI71zOnh5n\n+UZVTk102mRySTLnop61gWjtNDt90A2/Lt5NfVP7Kullry5uycqq7CT6+lfNHh+PdGjAAc5I\nr7I/Zr0cWfheW4wP3pBHr0zUMo9kxhQo7f40MRt5/Ckz0J9KRQSvrUDF3Nzx+dKv/wCuj+L2\npApyc0FdA4xzSMNvTpSrjbg0D1NAkAxjB60ctyPloXG3PejcWAFAwOfrRyPaj+HINH3lBPWg\nEG47eaUZ25xxSEbmxmhfm6HikA77y+lKclelMblqerfKaQhFbavWgkhuKafpSnJ6Himhhx26\n0n4Uv0pPXigBetC59aQKaMZI7UIBO5yKM9KXnoRRQAvT2pG5wBxQ1G0/hTGSHAXHWmE9s4oB\nK0z171LAdxt5PNBlVWA6Gjg8kYqCaM7s9qQFk5XrS/eNVFZl65IqZJNq+9AEq/dOaf1Xiolb\nipOqjHHrTAZj1NKGHpS7fm460jHdziiwCLlmpdw6UK2OcU0ruOQeKZQ7hR7UuNvWkx0BFKMd\nPSmKw3pzTt+V4pCc07bt9qBCbe5P5Ui9elLjjJpG/WkIXAXtTeOnalwWOCeaPUY5pgCr74FG\n00i+howc9eBSYDuemOaQ56GnZLdOMU1vU80wE5zkHmlz3pSuBx1poHrSAUsBn3oVcrSL93Bp\nd23n8KLAN2jHqaUfe6cUp+U5xmkDjkUwFZtuB2oyN3FIO2aemNvvQMRQe5wKFIzzQ3TANO25\nAoATdzSt9aGHB9KbuH1pFDeNp4pxUMBxxSbgq9KXdn2zSARYwDwcUcnpQeBxTAjBuvHamgJF\nzzzRu7daQ570zA69aAJFPIwKdjkjNRjpkGn7Tw1IAztPTNLzRu+b2puaYBwF6c0cjAozuFH1\n7UmMTcV680n3eR/FSpypYflShTyelNhYap2qc5o7Dmlz296D3NIQbTj2qMMBwV49akG7aeaN\npI9aYyP7oPNOX7lKyjPSkP3jjpTEJ93nrSff9qbtI5z+FOjyxzQAqthgTzVhULMRmowvOcVN\nDhiecVOoEscJk6cj+vavgr/goR46a98ZaX4ahkDW1nCbiRQc/vGJ6/SvvLUNVt/D+jXmpXDb\nYLeMyN9AMmvxw+NXxCl8e+P9Z1tmb/SrlvL3fwoOAOvpg/jVxREmefaptEgdX3BslieuazLp\nVW0JQ/MxyafdTZUlurcVmvIlw6hmOw/drpSMBkbbsvuZUzjj19aWQ7vldhleetNQJbqyruKl\nvu55qN/mYMV3HOMDtViG+ZGkbK5w5PFRxyKF5bI7Zpl0p+1BXAwvO3+tQTOJrqRn+XHQVQFO\neV9rZUKAcg1ntMxU5fcxOSTVu83C4AA3ow45qlNKEUoI+c8nrQA15isYaNPmPG70psD+XtUp\nvLclqf5hUDjJA45qDzG2gGTDVQEjMHVo2U7weMelMk3bQISp55X0pCG3ZY7cDrSqpbDIckdC\nO9ICq0i+S3JZlPQ1l3f7sM5+TcMnmtV4keTdkqRyVrJ1CSR5v3qBVz8o9qAuVVV/IOEyrHIN\nN+WRtoXc56bu9NAcyEK3HpUm0MoJBx6Y6U0DGxOhmILZA+U59asGYtCyDgDoetVY1PzDYrPn\njJq15ckeCSu49l5FDFqTRKQw3Pu3LgH0NWIWWNyCpJUbd+P1qGBB52WG2RecZ61KkrFlBXke\ntSMWOJljYsM/X0qSPCsBnGV4oaQS8Hg+p/lUe7qqj5vei4xVgljjbcoXHIb+lQyENt+TL4+7\n/WpcExZLYIPOT2qFpAXfYC2On0qkBCrlpDwcrzUm/wAwLI/G49KYyDcA2VBORinRgQsQ6+Ym\neueRSEI0nyseGYce1AzubJ2qBz6YpNo2llGCwyRTgokAyPc+/tQSJbgztuhG/A4GcCrun27r\nIGC538evIP61myRgMvlkqrDOOmK29Dk/fRK3KMdoYdaT2KPtP9hfRhqXxN0+RYWkhHO0npgj\n5v0r9XwiqpHVc1+cn/BO3w+brxRFc7CYY4tqH0YCv0idByR6964pbm8NiobWN1+7gVDJZR/w\nqAatM2ajZi3QYNSWUptOimj27QR3zVebQYJIGHlqA3HTmtM8DipE+nFO4zjW8DpIGYKmO/HJ\nrIn+HsNzJxHtTp8or05VG2mrGjNnGB6Yqh2PIbz4bxOrII9xB4LCsW++GG7aAOO+BXvEkKZy\nI8VXFinmZwOaq4rHzzcfDUq3HmBfYf0rFuPh/cwrIzO2B935evtX07NpUU2AyjHSqc3hmKbK\ngLtJ4Bo5ibHyxceGbyI5SJ8Y5yvSqi6LcQSHemSeRmvqK48GQ7j+6U+vvWddeBY3XiFQOm4q\nDinz2FynzRJbvG20Kc+mKb5JaTD7t/0r366+GyyqcogfPIC5rDu/heqhj5H0+aq5xch499kb\n0/WivTv+Fey/8+9FP2g+U9yCjPTJpjMynngVJ95cDrTGiEnfpXCbCZCqD1JpQpZDzxUbZDYP\nSpEXrigQ1ugB607+H1pzbeuMmmkFsY4p6jG9WzmnNggAjilYYwOtH3Tg0ABycAdKRWKbjjtS\n5+bjgUqZOaYCx5dW7elIq56dac2fLz0GetCnnjmpYCc7qTA2+lO/U0exGKaFcduCjGSaXdtO\nB1NNbpTMjjHWhgPZtoAIo5zmkBLHmlJw2alCDouaM7lHGKa2OTmhAc4NMY7IB46UMSWB7UbS\n3A/OmkHp370wDNOOAue9KAB1pv8AFjFMQ7gnikbG4jJxSIpLHJ4oHcdRQIXdtYDHFG0qWx0N\nD/My9hT0f8aAIpGO0AD61XEm7INW2O75hiq6wFXJPINAD0p24txUiR/KeMmkKeXz3oGKq/Lz\nyaWmrxz2pzMABxQIO2O1KDtT0NMIPQ9ad2559qAHe2eaaylevNG4HtjFKv8Aq8saAGM27HNP\nVsA56U1cY6U7ORjbigA2/MD2pNx3deKVFO4jt2pPuk+tAAvLckGkZSeOtOVR170KdzccCkA7\nlgD3pmpRrcafcRsCAyFd1OjVgakj3NBID6d/SgR8OfFS3+y6pcoqghSQOOvNeQ6gpiVwRxjJ\nNe//ABq0w2Xia+Lj5d3HsK8I1j9yxJ5Bq+oHI3C/vM84NY96wdSigbs/erpNQjVlLjhsdq5z\nVFEK8HnvVonUyrpd2Sw56YqouB8uMD1q5ccgnPOKoSH5snn1FWBnXeFuXyQSTms+ZiQc8H29\nKtXxDyFuhXpVJmDMWY8Y64qkQyvJGXk3p8qYpskK/KhXeD708s0i7T8qeoqMncOM5HA4pkMq\nMirvG3AziqzphsA+1XpF8vjGSRk1V8t2IfaD6rTGV2m8th8uQOPxpl1lW+YbsDJIqzJ8oBCj\nI/hqq+5t3PLdQOlUIgaTzJF+8BjtUXCFgFbk9DU7RueQOBQvy7mJyTwKBFSU7cZTanYioGf+\n4M45PpVySNlCnOUJ6e9RvEVBQrlSecUWGQyBZM+hPBWl2xxnBG7+dPhA818g47HtUUqnyMj7\n5P3qoBiqm/Zvyx5HtSLu8s5fBY4yKjONwcKSV7ipJGSNhsX74zmgQyRsKGB6HFNWQNlsd8US\nD5s/e4xx60ix7Yxu9ecUAIyjdyecc49KRpt3JHygcKBSzYb7vWm5SOVS5wD0piHeeY8YxhhT\nHXcBg8+tOk/dsQvzelRrhkIYEHqPekMb5YwHzkmmlj2OFHSpWVREjIvIpGXdlj0PXFAiLaOE\nboOR9fWmSAqxZeWbqamZdvQ5bpSyRhmGT8gXJp2GQOTlQeMDkUF3aQKRx60Rtwu3pmlIKyEM\nc55o9B7givtIypbNNXBY84bNOKhUBU7znnHamsrF8AYNFxA6srL8wA96Y27d15zye1OaLO1c\nbiDmmR+Y0jY520wF/wCXjAHAFObCxkYwp/ipirwXI+duKds4ABxnpSEMVHQY7YyabJ/CVOAa\nUt8gUHODTWXCgjoxxj0pgGGkwXwADmjcPmwcA9qRyJCEJ27ajxnLEd+tIZZZlaAbf4RTZNoj\nTHQjJ9jSLndtBwCM5NI33SnYnlqLBqM3eX0H/wBegKXbY3B6084DAIN46Cl/1Ry53HvQNDBm\nPKnkdBTI5HXK7MYPWpCxVwu3PcU1VKsXzweop6kiOwaTYmFYj7xGaYzKMucFh8p45qbhcg9+\nhFN4baNuPU0DIt5YqVGO9KwMkucfe6gUoGXfDDA5p4zDIpHz7hQIjdmjjJj+9naM+lKx3KFy\nSR1GKNw3MGHPUChfmkYodyUwGqpLhccfWm/LgqeTmpN27pgelNG0N79/Y0hhsPLAc/yFIzDz\ng/Qd6GYqpbnrTlbbuO3KEUdBDNzMv3gVznilkOHX5sZ5o+6QuccU95FEmHH8PFIYzcS5JwQe\n9DH5mBAxjNNZmVFyOOuBUirtbdnKsOKYDUXauQTjrkdqTJjXgb/xpqsDkZKIOCac/wA7Lg4w\nOuOtADVbarEDLEcml2qOoycYOPWly3GB8meaRA0ZxjOehoAPlYblBB6fWkYkLjb05yaDuZlc\njau7FLuYM25sZOADTAMho12kE9CBTY2WNTgZ5xz2ob92QqgZ7kUMqrxnljQMVWPDDGGGNtIu\nFPCnJGKGAYD+6vymkBGItrHCigQj8REgfvB3pVX5gRx8vKj1pysxbG3PfNNVmZiNu056+tLq\nA3Abbn5WXt60ZAk3qfvcAGndedmBnGKjaVEC7hx2FO4h652FiMbTz6VGzPtwAQCc09V2r8qc\nN1yaVflyud3160DEXMjf3Rjn3pFxt5OB6UozGyktk/3T0pu3LZ3ZOckHtQA5mXcuwlz3FJwy\nsA3mLnilUlSygDHWjeyYV1yWoAaFAcBkIzSPjJTPyg8n1qSQuzIMYK9z6Um3buyMhhxQBEdr\nfMCXI6mlaQFhGx4PI4pYyzbY1G3FKwZWYYDHH5UCEZhtXadp3YxSS7tzbfzpYwOH25K8804E\nMwUH5n5pANTh0IAYjqKQyCZn2gkbutKP3KyAjlT972piknIwQue3SmBJjc5B+5j86Yc7EXnr\nwacyoI8Bu/rSLncvPy9RQAvl7WZep68U1R8odTk9+elLkNIRkE54wetI2JMv909DQA3lWLA7\niTz6U8OVUhenoaZ96EDG0hunr703e2TkEDtQA8YWPJOT/e7in7zGmSGY9OKaqmTGW+UcmkXD\nLtDdTuDGgA27sBlxjnb2oViHYpyO4PpRz5vP40vyqxdQd3b0xSABkscHHy5pOPlGfvDPNKrf\nMWByMdKTcv3tufb0pgLuDNhj/wACFNaU4I6jNKzjywAMc0v8B4yBQAYV2LD5cDp60hk/d/Ny\nPSm7WTb83DUKoPzLyQcEetAC46Y+Y9qaHBZn6c4IpxHzZHAJ6U1FC7iOVJ70ALKN027B2Y7U\nk3Yq52txtpSrrGFc7Oc/hQpWPLY3UDDowB4C8DjvTVWRSUIGM5YZ6U7c3l8g8+lJt2Rhcbs9\n80CFbAb7vFNMh3EjIApFZpOVVgq9c0NloiuMNnP4UDEZf3gbG9WHSkVCGKbjwcinbiMAdM0F\nBvwp+brQLqKyyTIxkTGw/M2etMkxJ93juT7Up27Qd5JHr60453DjG6qQxrqrKFU5TOTTshtx\nXgYwBSqokjO3buU803YcAbcbueKkQoZtqgYUY5JpJG8tdxOc8U2TMkYGOKerMrsFXzBt4DUD\nBQjLgNx3FDZ+UIcU0mTar4HpSJH/ABYJfPegQSovlkjsecURp8hA4J5FOVQoKjDEnnNDOoHp\n2BoAYo2qc9cYIpWaPargMAODzRIytH06dWHemyBI1QDJ3djVJCHDHATp1pWYxsABtJFIwXoT\ng4wKaytF5W85OO3Siwtbjtq7Cu3kcn3pRIm1V+9mkYtu3dR7UxA8ijlUwc0FD2/12NuAPT0p\nNygHg4NKdxZsdTz+FMwC2QefSpJHb2+QbVPvQ0hTLsOM4FG5mB4CtSPIGjCkHcDk+lMBjKX3\nEmnxhkwcbqT5QSykke9OZgSNvNA0Ox2AAPXFIc8KPlJ60m0TSEj5Mc7qZ5h8z+8DQMlkcHYo\n5KnBqNirKpVWGc89qVJGWRlCfhinRq4iyvDDqCegoAapxtOMqDzR83nMR060NuZuDyfWiRyd\nuBz04oENbdIzBRilbPyg/MR1pyNtZsHGeue1Rsrnkcsfypi6j9xjBV+B1FJuCn5vlBHUilbE\nyBfTvQymPBY7h0wKY2M+ZYmGSuOce1KQu75CcEUJ8qMrHAPQ0m47sge+KVwQ7JhTOdwPWm7/\nAC1JHIbvSq37vJU5bqOwpDvEYB+YdhS1Bjgwk2jac9BTWwzBCDuVsmnKWZegTjANJ8yLvfAI\n4z3pCQ/O6TI5ycbKbGCspXGR03ClwvlktwvUmkh2qhO7Jfge1BVhC3mynCYjXjNO+8u8jHYC\nkCmEYbJz/CPT1o8slSSNw/u9MUyRvzqDj5T3pzKGVCGzzzSKuVOG/dr1NOC+W+FbduGRQxCK\nBg45BNBChshflBoZOikd6RXVpDtBABpjQu8qHKcE880m4lMOdx659KXfuYjGB6+tMSMR7i2W\nB9KQIkmmPGU4xnimNI24MRuyO3anrlFY7s5GPpUcbqy4Ztu3gD1pjsIxSOTChskc56U4Rb1A\nztbqFpfO2KyuAc9BTOA2c8gYyKBCyS4YsBk4wSKQfMoyufSl27cgUscg684XrQIbwrKWc7Dw\naVV8yXbGNq+9NkPRR8oJzj2p6gsGKvn+dTcoQ4MnT6GnsRuDFcVHwv3juBGRS4ywQ8gigPUQ\n4kDhtxI6U6TakYTbjH405e+JMjGCvtTYlLriMjB53GgYixkBnY5GKcsauDk7QBTGz5bMfoBS\n/PleD83BFAXE2gyFg4+XjihhtGwj34oVSoJxkdTR1Xnp/eqhD48LGQTuB5FJL8oA980by2W2\nbmAwBRwkRLsN5/hpagNeQ5BC4I7Urbhl0Ue4pArsDIfTFGDtUZOTxQMd8pXKkYxytMDtvj7r\n6e1OZQpzjpS/IyttHNOwhgDszB+Ofl9MVKXY8bv3fcU2Pc6gnt0HtT1g/d7G4JO4fSgVhrfe\n3NxnoaUq5UlCM45+lMXzMOnBBPenJtBOAztjHNHMFhFmwVyPlxgin5dlKxrtXryKZuWPjGSO\ntOVTu37votSMZvYtjZx/KnqQwywIHQUm47uBn1bNIoG07iR7UCHbDGp+bcM9aFG7JIAYHI96\nRo9saqWABOaRhuVyG4FBQMTJGwH3s/lSiPdt7beo9aApaP5OFB79adkMpwSWHJoF1GtGGbPX\nuBnpR96POOaVFOwtgZY8E80KefnUqCOO9O5QsXGRnJNNZmVQilhn7zGnI0gwCoXnoeTS53ZO\nPlB6etIQwx/ugqNgk8U6FhyTk7OGFLIxZgyjBzS+b87AD7vJ460BqNTCqSrkk9B7U/d8ucc/\n55pN4Z9qfKH5PFIkTbyqtnj8qAHn5pVG/wCbrntUancztnAHT1pSN7Jk/d9KJG/2aAEVi0LE\nodx6EmpIz+5G0fMOGpNxZhhSQB0pNpiUvxk9vSgBAx2kbcqO9NYMxBU7h39qdHtjhYscIwPz\neh7GmRqIVVC545JxnNPoJkqb1XA/dnOQaRnG0s7EBjgmpN21m8xRjsFPNN3KFBwc+uKBDX+6\nY1Ax796UMQoDDL/0pHB2scfLjtTAzMyEHkdB6igCVcRR/KMv2J7CkjZypYoSemaYrZY7eRmp\nVd1jKncecmjoA2PEKMw+/S7SVGT96mxyBmYDoPWnFfMYMG+UDjbSAVcHcrcNng0ikxsyjjjn\nmgScnH+s/p3pvlgSbSNue2aACOVY8BUyfQdKV1O7g5yc4NBxGu0dc0ikqjDPHpTESM6cncfl\nOCtIsjZAZf3TH/8AVQVDc7R9BSiJd4MxYJ0XHQGmA5pG3rnuKeuWjwARg561EquibGwMHAbr\nU8aljuU8jtTQhVyu0PncxzuFWo5MNwmQeOajG51yrBxngYqdtzKQ2Ae6mkO9i5HJuXY21n2/\ndz0q9bYZTvGc+nrWfb4coY12gdSRk1oxusahh0Y7foakC5DC0kgKfLtrct98oViC471lWjlZ\nFJG1l4+tbFpOFj+VclgSf8KBmrYsUUbe54rd023MikythielYGnK0joQeB0Wui015Vm3MgI6\nYqJDR0mjrHG2xOFJ5z3rprCLO1FHGeveub0/Ekiqq7TnJ9q6rT8PJGoOOcVgzVHceHLZ/wB3\nGvzufu4OMn0r7i+Cultpfge0DFi0nztnscYxXxn4TtHkvrKALnc6rgeua+9/DNr9h0GyTGCI\ngCB24rNjZptjOCaQEhuOlIHBYjH401nwp55zUlDtxp27uKaH5AIobOevFA1qP/h9aD83GKav\nH0pQx7igSHHhelIPm5HFIzbm6/hS/TikMG9hS8KuCcUme9DEMvTmmAfeXIGDSA7e3Jp24YBp\nGHegA560oz2pP4aFbvnFIBRnNJyMil+vApARu4o1AbtPGDTg3r1o/iyvSlZSwBxTARm6Y60N\nnb0pf4uaMccGl1BhzjnikX7tJz16mhe2aYIXb3oOSuScUfxf7NH3uaXUA5bkcihe9GcDjj1p\nfQUADfdx3pWTd1pNuaBkqaAE8n5etMxt4xUqgstM2luKAG5I6dKkU5Xr71H5ZGctxTo/u8Ch\nDH5ypx19aX7qihc46Un3femMcACOtIB2xQOoNG7tSEKc9evpUf1OKev1pCuSeKWwwPHOKTee\np6Ui5wc80fe5xVASBjtyOlIW3YzUasd3X8Kk3Y7c0iQZu/egH5ck0oWjYA3PNAxOASRSc460\n7A2mm87aYxvmEZp4YMMVBJlW65qWNS2OMUCJfur603dntRk4xSLnbQAdBj8aPvc4yKUnFIrb\nVx2oCwiNuUj3pI/v8jilVhz607acikOwqqKPvc9KU55IpOijNGowJHpTuSvFNY8ULkL1pgNO\nenahfQU9cdD3pNvpUgI3L+1JuH92msxXPelXJWiwCscrQFzyaOaRQVOc1QArDnNG4dAKXbnN\nC/L70gA4BFLz3PFIzDd0oLcYApDFZvmwKTHzGljw2c8EU1fmOcGmA77uM+tDEb/rSYbqeaZI\n2MnHNFhEgwvFLuBX3qsshZs05WNMdyX7/SnFflqPlcHFOdi3CnjvQA0NtJB5qRW28UwqF9zS\nqu7k9aADleCc0wrxwfrUuwMc0x1AHHWpENXB5p6cY9DTEXEeO9PT93x1oGSqhxU0cfmZVRzU\nUSt1J71ejQZJ4UDuaqImfOn7cPxJTwX8J5NIt5mi1DVX8hFUgEr/ABH6Y/nX5X6tfJ5giVRJ\nswAV9McV9NftxfFF/HnxTlhtbjzdN0tTBAFxt35+Zvr/APWr5UuiDNJMTt9R6mtomUtipIWj\nSRm4U8DPWqTf6RsKfd3cip5drx8uTk8A1VWQfMuzbs/unvWxkOeRchsbT61HNI8PEQwW6554\noZh8gAxu4LU18sF5xtOB71XQCJ9zTEyKQ5GNw9PSqrskMcqt8o/h9auLMfOyDk5y2elZepMs\ntxLKDhc4VfSkBQaYMrfMzgDGPSo2kdWTy1DjHNOcSIFCrw3JIqt5m6TanDY5NMQrKGZieW9j\nUKL5jHPJXgCnRqFkZcHOMmpoW65XAxwKBi+WXwScHp9KbHGyJsJ+frmpUXegYkqq8/WoJMSs\nGLYC/pVAZ94zrny3+eqEmJdznkKOc84NX5ZEhaQY3q3RqzLjdCwKthgPuDnigkiZvIiycrvp\n/MkSIj5Hc9Ki8xSwflueUPaliQBXZjtbP3RTAkjjbcCADjvTwxZtr8NnI54pm3aQVb5cfMBU\nv2iNWTC/vOg3elAEu7aQ7nk8BhUvDTYK/KBjcKbxJBu+9xk+1SNhRtTaQR1JpFisysxOC23o\ntKuGj3mMhielSQxtuxlRJtyPeot0jMA/ryB2pAMmBOW27k9OlNk+XIB8rn7pqz+7ZmaYkDGM\nf1qt521SNu9ck+9CER/M0YAY7AedwpBJGqlFG5s1FNJIxUsCi5yV9qfsSGX5hw/IpiJNvmP8\nucKMYo8xPtGNxG04FIzBY/KxgE8EU2Vty+WBkr8xHegRM0iSKY+hHFanh2CRb9FLKxyAiAdO\nRWF5i8nBK8EqK6nwUv8AxNIpFJE0jARr1J5HGKUthrc/VL/gm/4aez0S6vJV+8nB7D0x719s\nTZZiK+cf2G9DXS/hSs5OGnfIGMY4r6PkbCn171ws6I7FdcK2OtN2lifWlDdsc07bubJOKRqR\nqm3ryaRSzHHQVKq9Saa67mHYUCHJ936VIvOccVHCflYdqeF54pgOZtuARSBfzpGY9OtGc9Ko\nYZ2HAyadu6Y60bqhYncSDQFiQtv5AqRV2rz3qFXYYAqTJ79qBjXjjYgBMGoJ7FHjPyj3qxIx\n6gGlyWX2oJZj/wBmp6UVp7FooEZ+QeeQKNw2/L0pOduT+VLwFHvWDLEwCtEbZUEDBpV78Uin\nb16VIClSAMHNO4wDzmmr69qcFKqaoAxznHJpGHUN+tO3fKMcGm/Nznk0DFUgOF7YpSDzjimZ\nBYn0qRSSmaADllCnpQq7SSBxSfdXJpUbaMigBOewpeuAeKRic8dDQFJ4PNIQx1KqSpzz0per\ndMUvQ46GkbLcZwfWi9xCH723pT+GX0HqaZk7SWOcUxXLgAnAzQCLCr6jil2fLkcGmO43DHTF\nPjbcvPWhARbSy4BwaeBlQRRx2pF74OBQArenakX5m9qVmAXGOajUNzTESKMZpPukUm7bgHvS\nluenSqAFwp55FKWAU9qRmPoMU3cN3I4pCHKRjpil+9wOlJkMOaEIC+lAEm7ZyBmmsdx9M9qN\n21OT81NbJXPenYYo4zmnKwJ5H0qPa3HpTkbbuB5pCJMEZPFA+bk8YpqsSuTSZBbGSBQAjthv\nal++ue1L7HkUIp24HSgAU8U5ugNNbjtSH7vPSl1GCksvHSjnrTzhVBFN+8M96YgOfShmAUYy\nKB03ZppYGPmgCWOY4PFSbtqkseMVWjbAHXmrEe3b8xz6D8aBM+Yf2g7Fv+Egdj8oI359sV80\n6swZpQRnnGa+v/2j7NJJ7SRRyE2k+vP/ANevknXrfy7iVNvGeapCORv2Ecfy81zd8PMdsndx\nge1dDeqR8uOOtZNxbhlcqvPXmrEYDJuXb3Ws9toVix281rTQncGHyg9ap3NvHIrFSSfTHFUg\nMWcJKzAY24yd3Ws2cuqlSpJXg8Vo8ZfdwAcc1VuIzvDIxbK4571SIMqdysgBBK4pyyDyyVOM\njip5IeOTk4xiqZXy/ldSMVRIZMx2Mc5HIqr5e1nUseBxV1XXacrg1VuM/ejHPYdqQFSRduAw\n5PUioZhtA2t8vQVZbzCpOMtVbc3Vwck+laCEWMhSrZJHSo1YM4T7uasfOw37uRztI7VVMYXJ\nPUnIoAY+fJ2s43ZyKYWP3gcH1p8kUe0A8YGcntTQ42sDynSqGSIxUSdz6VVk3IdhXrzinqru\npCt/wKo7jMe3u54yaBEDMyyeUBhe+KPvNjIb6fypscjKrc4J7kUkj42/wH2oEOkBXcAMDFRZ\n8sgZ3ZHT0p7N1ySxqORdnXq3pTsAq4TfnJNNYrJj5c0qttJBbrSBdq/JySck0AP2s25envTN\nhRircE85pRJubC5PvS48xtoO5hyDSGM+RN2xsqepNQtuVcEjYOc+tPkkU5UpyP4qGjDLk8tj\n7tUITcVHOMMMj1pgz5JLMR2CmjDNgdf6U2QZbOSTnnNIARl24I2/1pM5y2Mnpk9aaZFkk+Vd\nuOM09iFb2/nTQCDCqw+6MZ/GhtyKmRy38VOkUMoPXJpv35iucDHQnjNILiSK2/zFbB6GomUq\n28NgY7U5mdc5GT0xSMhRShH40CFZi7AnkdhSPuHBYGTqfpTdxDKo5wOKTyyrHI3MerZ/SgAY\nfuyCcEnIp27Kqqj5aSTaq5/Kk7DnHtTuMccb8EdOpqP5VkGV99vtTmjkaPk8daY7Hcp2lh3x\n1oAXhpDk7OMj6UnO0N1BPWk84neuMHHFB3OsSgcjmjoPoOaNgEZOOMmmyKuMA/KTkse3tTss\nCzEEgDgHtTV+7gjcSM0hBNlhuzkdsUm0jaFHXrQuOCRk5xS/MzEgYIPAp3AFxluMAcZoVl8s\n7mxnpTDnztv8PJY9qd/EARigQhUMuD2FMVSrAKccZp8n8OAPl603dumAIwe3+FACLvXIxu/i\nBahd20BeOMFjT9vJaTKmmfIMBhle31oBhIu3btXKr+efWmoRIrMFxz+dSR/MxxwPft7UxI/4\nTwSaYBkcMfXFIzkNtPC0LiMY4YZINMZQzBt3GcZ7UDHgBmJOTgcGhlLRgZw3UE08ZZTtBC1G\ncySKwGMevSl1GPGVZT1OOtK0g5KjAPWm8+WzAcZpjSAR5BOD1pgxPM8zIIx/KnElhsPXH3qj\nyWTO/aR04pdwWQFwd+OopCJI/lBUnjHrTGUxxgsCSDxg0BAG3EZ3Gkf7rnndn8hSEPyQq5OR\nnJUU/cJHO5ct29qYMxxgkZ44NRN+8ZWHytnk+tUA7zFDH16YpwRclt2T29KZ96QkcY6A9aTb\nuR1PQjhqCgDKu8fe5wadGr7tgAAApE3LCvGWpPM2rnGD6UCF3NGrAqW+lPkz8o3YyM/Sm/eU\nv+G2l2kKGC5NAhF+U9d3p71EcbssOR/DUqxll2gnPWhsqucD5uCtA7DIlzuY9OoppjAO88E0\n9lVcZOAOMUMVaTYvHFMBrSBu4z3BFOj27DyFPYGm/KsoB64+9R8pYg/Mc9aQhrbo+4x13Yp3\nzlgGyWPO0+lKrLtZzwg7UAnDPnABxigBWJ8whcHA5XNMVSrBsnaeg9/SnbU27ujZpWXbja3y\n5zQAjMVG7HPSmqw8w7224PX19qduG9gOjcc0jL5jFcYPpQBHs+9k4UnORSsBuDLlh0z0p0yF\nlxjZtHamt93APUdRQAi/K248jpjsfan9GbJ6/MQO3tTXjPlgJ0B6+9G0tkt95eTQA9vmjJ2h\nf60wYUKcErjAxQzZXIX6/ShAj2+Ac88UAN3BipwAOhxTWZVGFb5c8j1qdY4dwGQuB09aYjDc\nTgbegoAWNgrM23cPembT0P3jyBQVBDHHJ4FL/ADn51GOaBiLv3AIOO9PZVk47IOtRspCqc4J\nONvvTmjyB8xHIDAUAIvzJtP326GhQFUDOSDg02RCcsgxtPBNLuVGIJJyeaCdR7I0blyRtHak\nVgI+fvscj6Um0DczHLdAo5oVgsm1gCfX0pDGhjIdu3JzUi7lQ7fmGetNOVwM4GetKwdSVXJP\nc9qYDdpZgB1pZNpk+Vdo7+9EmGGBlaMswU9hwB/WkAqqrJjuTxTOWQ4G3acHNDPtxtOM9/ei\nVmZRuPHcetMYFdzYDEDHSkLZRDu5BxtxS/whuh6fhRwuGxn0Ip2GPRXjJBcA9R/hTG5d2A5C\n9ulDKGQvk/4U2NtykLnB5PvSEO5WNdzBVxk89aQIzKSv13H0pdp2k7eO4NC52lsZU8YoARY/\n3W/qKTcu3B+Vv4ff2p21WUlThRxtpu0biOnoT2oEIcbAxTDf3fQ0p3+Y2DkCjkzfO+4Y5b1p\nF+VSoOCWzQAu0Mw2kBuppJJCV3Z2jPao+HOxvlINKFZk+XovO2gCRV6DPQZ+tIxZUxy4b+72\npWXdycgEcUkasNsag+tABtLLtyc+vpRvZlPOCvShZPLZyuc9KTcflTb97hgKBDSpbOOrelKw\nEfygHOKeuFbarYUDg4zR8zKCcOe+KBDGXdGNo+X0obE2AAARyCaeylOS+EI7CmBdqhzwvTFM\nBOFPHzk/zo5yMfMVOOtLCp3PtUrjvSKRn0B60DH8LKWYkpjJqNcdD3OakEaKoycc9KbJ8uNq\n4O771BQrDI3KeF6mmuv3AVyp7ih49zMMYdjQobbnPQ7cGghgwEZG5stj5VFO3HBBIBxnFIBu\n/hyRwTSbRGSOpJ60hgrYUErwetORfMb5BsX0xTJBt+Xd8vf609pVyMZ6feoARZEZTheQcdaR\nVV2PPPrS/K7LtXI9R3pqt1KjYAelAaj92FIySMd6jZd0S4JBz0pWk2cgblNObdGFY4YHgYoG\nIWK9PpnGaCyqcnn/AGl9aVcrGxUYPcGgM0m3CAKOooGJI25QO/XdimtjYHb1wPenmQrJwflP\nTFIytIuduW9KBDJGEKjrt68dqPvKO/cU/oAMZb+7TFmC5DD5vSgBR80ZQdM5NJlFYMCQelIy\n7m5O0kZ4pxZPLIGWb39aepPUdnsW+v0prMy/MOV6LSfKykMMMODQdwX0UdKY2GA2U27l7j3p\nSjLhR8y+nak84yYUr+Pehlby+VPtSYA2I0zg4zwKUgRybs4JH5UjIvyABs96eq+ZKynp1oEN\nADZKOwPYkcCkIaTI3byvXFAyoyOAwpGHkEY6HqaAFbsBxTixVt3BbHSkkxHtK8E+tLIp2iQt\nxnGO9DGCb43JznJ6HtUW4rwykEnFSfOquuNwZsg+ntSqW5HRR60XYDPL2qGIxzj6U77qsAcn\n1po3spI53cc08qAoXGWApANQMuHOWTHNEkWCDjnPFIz5IVshPQetSMzM2NuBjGc1VhMY7BHz\nIMH35xTcgZIG5T3qRkA+Vh0FN4UE5wcYAFIBCZFhUovG7BNLsEmdwxjmnBiqgOeMZpGjDgPn\nCHvTARlVm3A5GOc01SInVjyp4wtDFrfIwOfxFC7pFOF6c5pAIm09QQOtEkhWMKFJb+91p24M\n0YHQ8EGmru81VD7eelIBx3DYUH1yOaaVbdu4b6cUpO5pF5GO9IIz5frgUADbWYAflSvujKsG\nyc0q4kB+XaQM5xSKpYc8KBkt7+lMACH74cO3pSqo5Pcc9elN8xZFzgoO6460fIsYK43MegPN\nAxGy3zCT8u9Oj2qrEgE+/alLCOSIlNuetIzCTHGAzZpAKWVEAwT60/AjQbhuH8qarKLjON6A\nc+1JuCEnnLcfgaBj1x5m3tjgnqab/q2IK/OeMUrJh2Ucsoxn2o/1R2g7gR1HagmwkeW4I2p0\n5p3neXyv3MYy3X8qa+VUgDPtS5KKmxsMBjBHP0oBCSMRIpX5lPU9qdIcLhXw2eR3FDN93au1\nR19KjVsMJBwc9TS1KJNwIZQuSP4vWkGDglsUcI5IO4tUe3auckYPemKw/ft6DA9COaduR22j\nhjyd1NMgZT82W9cUGNd4cHJxQIFwxw2Sc4+gokVAu0ZznHFLGCu3A3nPI9qe3l4Zicc9KCxk\nsbbdqg5yDSMwbOQRyOlPbzFQZGSeR9KDkqSyhSRikQyNmO9kXgg4U05jvcpGduOp96VlKKow\nN+c0qsFUsvDZOT6UwG53MCz429qVWbr/AAk8Gjord+MgetNWUtCMEKvUg9qB3HRSGRlJUgfx\nGnRqCrsSQc/SmnedpPzR4xxSn5wB29CaBgyeYwVOGI+7mjzIxICAQRx7UNtJwvUUskytnHLH\ngcUBdgjDcx6A9OKeAdhK84GTTRu29eAOad/qyCBuBFAvMarY2sD1/hNLx5pJHy+/ehVCbd/z\nMTgCkXBG0nOT0oGLI3ykdG7LikjzCd5GMjApFuAJCDGSRx+NO3FgpBzhuVNO4mNXewZwOSaX\nc24h2wP7vvSsQzMM4wdwHakJLHpgnkCgQh3CPg5HdadH8zAhAiqM08NuboVGMH61Gufujhem\nT3pAINsy+ZjJzTyPmDAn0IpzxKrDb8g7jNMXfyAv0J9KoB0eN3KHp1NN3FY2CjbjnNJHxHtL\nfMvSh2XcCD83f0pBYU58sbUwW55pfvrhVyehLdaY0ixdAWDdKeAFkLMSBtyaQDWUsoO3btOD\n6mnlFjYlgdnZjTVVUYMrF88g1K0DMpZSdxHftTQDXVmDEHG3+VNzuXKklh0zTlXygWY4Zh25\np8cYyG6pj73vTY2RxsykDaGz97Pb3qaFDkYIDHp7imowaTDHP+elOEf7kbjtKnhfamIsbSq7\ncZI5wKsxQvuWTpkcKahQbiuF2gfnVpZAVG4YYHORSYFy2STb1C5PpVyKMnI/iXpmqtuCdpz8\n5Odvt/hV63zuZiNzdAKgC1A3zcjP90e9bOmozgAcetZMO9cZUEZzkVuaHhQ5JG0nIB65pNgb\nVnCqsrkYx2robOU+WR5eRnINZVivmxgsvPQ1qW7SCRIkOT09qzk2NHSWEO2M7evUt6V1uh2r\nNtG0Zz96ub0tTDlSNwYc/Wut0nLDjlgeaxNUetfCazN/4qtYgflaQBvpkdPyr7nt4ylqiD7q\njAA7D0r5H/Z10VbjxZFKynbGvmbcf5719cySFSAOOOcGkyhklM7805mDZ9aZtyM55qBjl3Dg\nn6UNJjIOSaM9OeaY2aBjlbfxU6nbwBk1XT15qSN9pzSYEi/Slz0z1qPPPXinfxbuooQDi3YU\nBuxFJx70nPTOBTEPBGcAUbuRmmp8vJ5NKTmgYrKW70AfLzxSdOc5FO3BlHFADeq46075foaT\nbzkcUhYd/vUtQDIBpxJ28UjMOmOaBhjzRqAvy7hyfek+90pOW4xkUuCenFLqAbe4oo9qOVGT\n0qhB2pBn0p24beBzS7qQxFjNJ/FnPFDNQemO1BTBe5JoUH1opMc0Eijdjml3ZakLEnNCsPTm\nmAbhg5GPelXpgdBSZzgDrTtvzUupQu386B1wR0o3dKUv3IpgJ0J9KPvcdqT+L2pd1SxCK3y9\nOaUevShe+Onej170ADfL0OTTcHmndKO9NgMWPbznmn5Cgk9acMZ6UkinqelJgVgzbsk8VOWz\nTNhp+3B6UIBy4HFN6rS/dbmk5znigBQi9SOaFJ3e1Lu9qSNvmOelUPoKemQaamQ2MZpWYY4G\nKXcc56UB0GtSjOKGztyBmkXLj0oEKF705W+U5FNbpg0qvjt+dADh93NGc0nLDB4pcnB4xUgH\nBbkcUnrxRzil/ioYxpzz70bs0n3jQ3pT6jA5xwMmjOOp5oDFelJuOTxTANwOcZp27oMU1Qep\nNINxbOOKAHfLhuaAAoHamqw5BFLnK80ALupP4c96X73Xr7UD7pxU9QE3DZ05o84I3v6UHhcm\nodv8Rqhk6ktn5sGmtncR1pqLubIqVV20CIljxxUyRjHSj5V9zR75pADcmmlRSc9xSnP4UIsT\n+IYqRmH41Hu68fSgklenNBLHCT5T3NMbLj0pVbZx3oky2MUCE9OakVe/UVCsZbPFWYh8oGPr\nTGTwqShOOccf5/OuM+OnxCi+GXwt1vWnKrLHE0cHzY3O3ArvIVU4218F/wDBQ74rLqd5Y+D7\nKZvLtCtxdbTwW6qMA/zpxJZ8O+KPENxqV9czO5meWQySNnucE1zFxM8jFyMIDtC1oakHj80M\ndpzlh3GayGlYxkhW3bumOoroSMdyK53RsNx3HsoqN1WOEgn95196XY7LgLxuzg9qSZfmbkZx\n071qQClUw5OUB6GmspfHVVz+FMWbzEIYcdcE9aS8BVEwWUddvb8aAK027dINrFOnFUNrHcoi\nIU/xVZmmLMXWQ7QPu9s1WtZJDI0XtuPPagRVuJvK2HtnbgdKrO4WQlUyVPSpI1+8kj5AYkNT\no8yE4Xaf73rVAMQFzvZtvGR/hUkamRFfft3HABqDcAwUjcnc+9WBIpwi8jH5UDIVYybw+6MB\nsEVXkVdhIBGeRj+tWI3iaN12uh5PJ71XkkdkRAw3YwQeOKAM2+iIjaVDsQH5qqbt0YCnaM5z\nU91JulkVmLqP4VHFUf8AXYI+VDx16UCJPKDbjkhlbPPpQ0bbg45zxgdcUi5kUH+H+970pf5V\nXOCfSmAqnycbfXuKnVt20gg4bkkdKr+Xtdvm3P7mp7eMsuwrnPOfSmIsKu7cB0HXHpU/zLHj\nAKtyPaooxjLA7gBjaKkWEBo5N2PbPSkUSP5TSoSMAjGR69qbHI0fAXcmOSe9PWM+ZtRgzHkn\nriq67zk5PynGT3qQDb5jYc8dR9KYcswRRlM5NK0gdmZ328YzUMzbVjdXLdsimITznwCBwTg7\nqfuXLYHmkDqelM2MzoM4UDPNCMMfJwM9KBolKNJtChTjnrTOGuNyjGOtKqqrloxvfuM9KSMh\nZmUjGRnPagnUiWB5lyRtJ6DpXoHwx0OG+1yyMis+JAgVum7t/M1xMciqyBmy6+1e6fs9+H28\nR+OLG2hR0jmdMuvrx/Ws5uyNIxufsZ+zz4d/4Rz4T6PBIu2d0DtuIJ/OvRpGDMRjHrVHw/px\n0nw/p9oesMSr7dKv/eauM6FGxBtKsW609cN1HNO7UnTJxTLQHjmjrzjijhuetCsOnakSC4xx\nxTv4TimLjFOZgvbimhDc5A/Wjaex5prNt7c05W3AetA0PVuzUm30pwx+NDdz+lAxiqSx7U9d\n3ekRm28jmnjPr1qhh14pDytPwQvakA+X1xQBDtNFS0UDsjIYFTzg5pygMOKQndxilY7EPFc/\nUkbGxBPel2dyc01V9D9aXnmgY9Rhac2eh6VHy3U4p24bueaAQ3rjmnlgMGj5fTFKcbcH8KYx\nCoApAPrihnwuMZpVbC80hBuDdulJzzgcUdKVmI6CgTEXgYzmlbhODg0zkNnoaPuk55oYx3GA\nepppGMnvQsgwMCnN0yR1pJagM47fjS4DHPFIG55GFo+7k1Yh689RilQ4bPamo55HH40fNnHa\ngQ/IweMGkyNvHWgtuxQdo+tAw25YHHFKy7mytJz1zgUnPXoKCR3DU0kqvv6UH7vHFHoWODQO\nwK27GaUKrZ7UZHQdaRu4HamIXk/w0HHejccdKPQk0gGufm4NKrYFO+Xdu7UmdxyelA+gnPSn\nqPlz3pBkHHU0FjnH8qBDjilXHcVEpDEMOlPVstQMX1FJEpUde9LkHrxSD73c0wFblevOaXO3\nGRkd6TacdKN3ze1IQMMtkdKUDb9aAQecYxTN2X4oAGY7ajOSaf0bB6U5eT04oFqNjNTx9jio\n1wue9SQ4zQGp5b+0Bp5Hhj7Tt3PGw/Kvj7xFGPMJIxk5zX3P8XtPGqeCroMm8KMj2PrXw54i\nRftDo3LKMmqQjzvUo83BIbAB2msTUZfLkUDjIxmuh1LY0vXjrXO6kD5igDK5x0qiTHnaVm2s\ndq5qjKp3SLglT37VqTqJd2B0PJqlMQAeyirQjKkt06KNoxyKzztjJRiAKmuLgrMdnK+pqK4U\nTkM/A9aoRQaMrG48z5c5qvIyuqgjj1q3IDKrJj5f71UiwyOfl6YqiSvN8q8kluwqqGYuOoGc\n1cm2lg2OemarmF1J3sDnoKYEE5fzM7sKem0ZqGYbd3zM3pgc5q22IYTuGM9MVVml/dgbcKP4\nqYiIMzNl/lIFN5YEu2G7AjHFPkjxjfIORnHeof4c7d7McA56VVhjWXzYWbp2zUMsg2KcYOck\nVI/yxsFzkn7tV5Mqyh1y3TFMTLW4Z4IETDgVWuAnmKhOWHQ0/jdgLxjBX0qPryUPBpCIZcKp\nGMioWb5VfALDsamaNY8s0mN54XvTJFDDKdAvemNEUSsFJxuLH8qf5ZVgfvHoaWNf3SsD1p0z\nKqg5PHWmIrxxnzHDLjNCqwXccqAafIrbtyH5iMj2prAzScncuOVoKGKzx5Yn5Sfuio2Z45JB\n0OOKlWQbtp4UDApnlnzD/E+Mke1IQzbtXc546YzTmZvuoP3nv6VE2ySQtjB67ac25V3EZzTE\nRNnOFJOTUrbtwK8r3NJsZjknLDjNIoCnnJB7dqA1G4XaykYyeooZDIvUkr3p20eWAGAOelOb\nKxs2c47CmBCpZlUhcYNHlloyG+8Tnmp1+aENnaMVC8iyAFuvTikBErHzNrHOOlO+ZSccihma\nPaCv0o3bei/OfSmIb6sflAPFEf3iNu0HrSEuYiGXJJyaUKWG5lI7YpDGBXBwfu5zinLnncmc\nngmnyKucsdg24qLh1jy5UdjQgHyOVB5/ACoPMKiTyjuwcE4qZPmdywIwO560iRbGLfdRu3vT\nERLlvk7dzTg2M7T+NOEe3dhvrTZFZmUt8q+go6FCzEkbhyOMgUn8QBYg0p/vD5V6fjSfcXrl\nj3NADZGCKDjI9qQMS3XGRyKldgqjvUEknzAkfNjtQIXy1VdvUE96kXJODgDPftUYZOARlutO\nbaMyDOaOoWB2Cq+QW7gimL8zAgdBmiRv3O4Z3Z6etOb93ImOe+2gBvnGRiM5PvTWYDOeduCK\nfJgSsoXHfNIv7ngjJIoARGEzE9F7getBkbbtX5Xz1PpTxMNoZB7Gmsp3FiO1ACbkU521GmUU\np053CnD5oA7DAzijy+Qc5Ud6BB5bsxkzk455oVhtAX5weRSM3cU5ivDdT0pgN8xVOFG49cY4\nFLJ90FuQegpdzSFtwyfUCmrlpMFT/SgY1sbuBgUKW6jlf1qRm3YCR/N61Hho33fw45oEOJEb\nEnk46U0feDA5PX6+1BAWMAEnd/KiOPZkg8UDCTc3OM/56U0scABMe9PWJo85bBbn60w/uUyw\nLAnqORQSDbWkORvGKUMD944yOBT4T1QDqOtMRgzfdzgUFCJ0OQSfXND4O3A+agfe6dfyprL8\n+5Rx0qRDo8ZJYcUKTsIwcZzSQ5XcD8wPPNLt3RnkgHtmmAhbaxxnNKr8FgSTmiNAqHccN0pP\nLJG37o9aYxzEOfm5piMMbyNqt/FQAqwMXPfHvT4Su5QEwOu3PFSxDGwVBY4XPWmNDtYlSCrc\nD/GpCFw/mOCCetO2ruJYHO3AamAzyztKk7h0pM7c5BPHQetKCxUbGGB+tJIw2k45bjPcUFEn\nzwsN2NnemhwrF2HydqRsy5bOFA5pMjjHK8Ci4uoq4HOM5GQTRuZZFPRuu49KRs7m9hxxTgw3\nbDkkfpTARt0rl1ztPXFJnc3BAwOcilh6t82CfWmtuZghGPX1oECqFzlvk/rTlwq4I+ZuppAw\nLeXIn3RkCkkPlxqCSHPOPQUAEjBshfpimMohO0j58dKf5h7pzjA/xppYsoY8sOMUuoD1UOyu\n67EAxUUbBZHVl2kHHFPdmbGDtI7UkjcqqnJI/wAmjqMkWM5+XDY9ajx1P3l70TBdqjJ+Xlh6\n0jMZGIC/IRyRxTELzwxwV7e1M3beI2yxPehVXgD5h6Um4K2V+7096AEZizEsM9jinsq8gp8/\nXinNMuzkHg4AodWVgw/E0DGBegxg5p275jxkdPpSTNt24OWpGBSTBH3h1oEOmO48D5FH60h3\n7SCcD1ohGRzk8daViN3zHO7mgBC20KFbr1pVYxsQPnoZVjcHbncMCkU7UORxQA1cKxXt6Ukj\nK3OMevtSO21QWODuz0pzKkkhbcFWgYnzBuBlSMZo2+Wykr7fjTvMK5B2kHgUzcyzbSrEY64p\ngOMZVsZ5/SkbK7tw/CnsRuGCDxyKa0m5sn6Uh6CRs7Jgfd/WnyN/q9j7R0I9aadsfI4B601/\n7wORQIVlOCB6/wCRTm+6V655J9Pakw0kOUXLddtLJmOMEdT1oER4dV+Xn2NCMeCo/GnyRuAg\nPzjOQKbtLBuSMn8qBjZNpkAcZbPanEtkkYJ6cUbumDt/2qQHEh4wQOg7+9ArjVYBsHdUjhlt\ny2cc8HvS71dcA5qLA4LDBHHPSgB4O7Cr82Ry3bNHztl14Cjr60HOwDG09sd6RfvE4O0DtQAq\n7N0YA+YDBz60jTeZGQQAytk474pI2BXrx3yO9M2jBI4zQFiaQs8gfPykevamIu3gRkLzhjTV\n3fKxG0DtUi4Zm5PPWgVhsjEgMc5HWnZKsAVEidaVmaMhicqR0psIyGYHb7elAxpUSEsSAM4A\nNC/eCE7s8DNP4kjJwCFprfdHHPXjtQA2NjtYlPmU4+ajcCh4zu6ilOWO4nBHY0gOxWJPygZy\nKYrAy+WqhG4JGaVv3e7jcP1prFV2IPmbr6UoJWVsggnj6UhixgNh2TK9PxpA67WBBUqfSjG5\nvmyCvajzAz5PA96YArDbnoT0xQn+twyMOOTSycsxx8qnijdIxyW4Ix0oEHyqSvXuAKaylYwE\n+ZQc805WKqfmBIGeOtJ824EDAYdD60FAJAJdx9OnajlWz6/pShNrHb9Dn1o24zv+6OKQhm3Z\njB+XsO9OXdvy5KqR0FI0hMAAHzZ4+lOEg+WTBLDjHY0AI2I1Kowb3pBhosbRu6+9JLhZMY4P\nOPQ0p8mHJySSMEU7CHyfKMAAnt61FG21XGOvUEUOAq78H8KVmPmbjzx0FAIRcFSuMMecsacq\n7sBiVA56cH8aRTubkA470qhhJuPT+7mgRGysVzwD6U/zWbazjeV4wKfJ3Yng8UKmyQSKcfLj\nB70dACNdzEA8deaAo3Ackk5pGXoQvPU4NDY805YqpH5UvMYbQrFdu70FNbeuEcAjtijaApcn\ncRwDRny8O3zE8DFAxcH92zbWX9aT5vnAXfg5z2FJt6LuJwKFDCYYJVccj1p26gKinzAqc5Gf\nY0nzxsVcY55pNoZT82xs8Y7CnNg9eT6tTAN3OSu3sPSkLDcedozSsBIxxlY8Y3H1pI124/jC\njO40hiLGjSMzEk9iOgoP3SGY+m6lVjnd0z1NHy/MzHJFIhg+FUKDn0Y0rNtYEJuOOVpJAqYO\nNxIyKGyGV+efyoKF3hnyoznj/wCtSfd3sOj8D2pVYeYTnce2OlJ0ycbSc9e1MkbtKxrGGDDO\nOfWlZTHIybgdvXbRGGb5W+vSlb95kKMKBzSAYzDZk5Y+lDAKiYXB6Zp+4mHYfm7gDtSsx8tW\n6xrQNA2WyAoJxnimbg6qV4Y9jT+JFL/cJ7Uh+fBxkY9aB2EbcOc5ZeafJMxbI+YsM/KMYpu4\nyLnbjaM4pBIUULtyW5Kn09aCSMuyXAO3IAp+4SYZowvP3hSp8ynBynTcaYu2Mkk5U8UmMduy\nxz8wx8tDMdirt+YUildwAyPekXazFiGYZpjH5YShWxH3+tHnDkbfm6c0nmfMpIyO1SpGzyF2\nwOPu0ANcrGmQMlqNu2Ic7SaaG3RBtpGDzStIUZTt3Z7UmAu45DA8jjmguG5dvmH92m/MzEY4\nHJFOVirDOD0I4paiQR7WRnDc9NtLI4X5ANx7mmbdzHC7ZGPbpR90HB+fPNUUOeROCw287QTR\njbkFcoDndQqj/loM88D0oXcsmOo/vUCF+4yKRlXGaI/lZsAY6UBWU4kGCBxSLjqQRTJHc7Qr\nnAB5oADSY3Z/CkjBbBbr2WnbnXdgD6UhoRflwxORnFN5SUsTvT0pY8r14OelJtxIy5z3oAVs\ns2W5xz707y9yhx8o64oXa3zhdmOPrTWO4AMSDnjFAhV3FuVwH4pMeWFH8J4PehmMnyscbelL\nGSuOeAOtAxTuZdnTbz9aa0i/K20r605VJB2KQvXNIq+cpw5I77hzQMRoyzcHaAM7qmz8gIxy\nMfT3qN0VowMHA5NN2hjlenYe1Ax20Km0swyME4/WkO8lRgFFHUdTTwx+8eF6U1mZuBge9Ah5\nJGJFPGOvpTFYFI2A5xxnv70/5do4IQdD70LMu0grvA6HtQA6R244H0FNTELEkckZ4pu3y4S3\nXnoKe6PsTbjn71O4rDVO6PP3Pwo2nasm7cQcDPWmuw80AZBHpUyFc8D60XYhkpPIb5fRqZMx\nO0EcDGakkXfIN52lf1pPmQnjch429/rSAbJGu5m5L5wtPMg3LznaME+9OkZuMdM8GmnCuSWD\nE8Y96AI/lXP8OTn605GzEWGG5xjHehtwVsjewO3Hp70W6skZ54B5H9aBod5a+aBgBgMkelNV\nnZTgbyTyD2FPCvt5G5Q2GJpQP3xVdynHBpiYKAgAC4HbFJ5g8txg8dTTGb5soN3Yn0p7BmIx\n9ymgBZo9ysnPAGKb5gTJdGbnkCmqp55CqDVjEnlkHAz3obGLaxIzHIxnkKad8y7H/h61D5Lj\nG5trj86mjUluu4elIbJ0lMmB1fqatxllkyYzIMZwOmKprlpdu4jvwOPpV6OSWRcxgbF5K0El\nmCX94p/vf3euPStNcMqgLtUj16Gsu3WSeQNuCH7w46VpKCZCOhH3gD+tIEW7JWaZY3kIkXna\nB1FdDaqVK5Xbg596zdLT98pA+Qcn1+ldGuyRgduAeVBqGI1IJNgUdzW9pEQ4dh8w6CsC3Xbs\nMmCOgxXVabDsQjHzf3jWTKRu6erSKAOrcAV2+h26W8iSMOO/pmuT021kWFTjkc7q62xb5U5+\nXHPoKg0R9Rfs424+13MwB+6MEDj6V9ElxwB2GK8f/Zx0UW/hM3L5Mj9PTbgGvXWyGbiobdyx\n7NtxincZ6VGjAYyeaeOpqRoTcNxGPpTj8q5PNNYjdx0pNwORmgY5enoDS524Bpm04A6CjeQ2\nCKAHjIYE8ipNw5z3qNfmbvigg7s44oAezdhSPnHPNNZulCtu74oAUbuCOtOY985poyOaBjHT\nNADlfC4xTt3HvUe7oaUHcpIGKAJM9jx70xiC2MfjTd3QHpTlk2npxTYDscjvSc9QKQNx6Uu7\nJJ7VIDmztAU0pY+lN77qTcexoAXcAfagHrnpSL6HkUEjd7UxjlYKCRTApY5zinY3LkUD7oHe\ngBdh9aXdheTzQWPQ0o24AxmgBFNABzz3obC9OlJuO6gQ4ZDcUfWkyc8ULkN1zQAHHpQMBeDS\nk4NG3qccUhgG4460Ft446U3jGcYpw6ZApAOb2pFwBz1pEJ6VJ8rHkVQxvPak2tjPel56ClPz\nYPSkIbtzg96Xdk0h9uaUN1OMUgF3Z5oDFm5qMSfNinjkHApgJz/FT8njjimfU0vO2kxCsvzZ\nJo+8emKSjdnjmjqMXB/KkDY+tGNvem7dpyaYDtpK0EYo6Dihc0AJzS88EdaTd81Bbk8UDFb7\n3Io28c9aMlUoxu6cUDDknOadyQaj2nmncn2oJAg44pxYsOKbggdeKBjk0mMVWGMUjLt96QsD\n2peTyRxTGLtOOuKPbqaTd0FLz260wEUYfJpxyM+lM2ndk80jZ/woAVsbct1obLdKViGX2pFX\nOMHFIBf4smhWx170dMAmhhk5oAGAYdeabtGSKX7zcihgNoOKYgjUrkdKfv2nBHFMRvm5ofLN\nigYu4ZHFOYjj1pmdrdM0uTngUhgxNL/DSc789RSsD17UMBAw6Ac0N8vBoXlsgYobluelACBe\ncsaUYb60HpSDnBxzQFiRfl57VPGAwwOKgjjPQnmrtuojJJOMDJoQGV4y8SWvgvwjqWr3Unlx\n28DPnIHOOOvevxj+KXjq+8XeLtQ1y6laZ55X+Zuu3Jx+lfbX/BRL41HT7O28D2b7TOBNcsrc\nkAjC/wCfevgHVrqKPTbmaQfcT5U7mtoxMJM5O61aW4kuu75wW/Co2ZGhOGZUQZJqhYyExShs\nsGOWb39KmS4LMEXKqeo9a6Yogc00j7NrlkPIHrUjTrJdDCFQBgikklRrcFeCpqJcs4JOO4oQ\ngaSKPquTnj2qGWRslHO4t0FTXGWhYquQPmNZ8kjzbdq4ZRndmkIqwoEldGb5FPSq8jf6Q024\npxgVNdN5xA27D1b3qKTG0RlhgnCirAgXY0hZVzkYJzTY4d2VR/LUHO1qJvKhUIp/eqc4FPaN\n7iEHgZ6g9aQB/rI2KgAg5xTVn37mVdv4UK25flXITgAd6c8joq4X5nXBUUwIZ5isW0FSSck1\nWu5AsZk43MNvNWpJFih2LGGLDvWXeBQC8hIwMAUDKlxM6qVOAmMD61U3AbHx93jaP50+TLR4\nY89hTdwVsuny42k+9AhylYwqg55zj1p8LCNCGOW9RUSrENwXnHejYyrlSGLDkd6rYkkhWPBQ\njezHO/OCKuJH5TBQ2CR61Vt2DfdXheOnepoY1aT5m+ccilcZOkpjXIU5YVOpQAOVY84J6gVC\nrNDGrl++ClWG35JUbY8ZC1Ix5kZc7NvTt3FMaXzIVD4QZyD6+1DSGTB3KpA5GKbuVowgXzEY\n8UwGSyJuKum0kZyB1qvtjViSDhhjHankBrhSznkYUUkzjdg8KODVDGpEsYZwDJJ0C9qcrheN\nmG9KZH5mQV5ZDuz7VJNte4yQUJPXNIBscAZgSdobktTFwtrIwOSX2r7+tHktNM8e5gqr8rdi\naW3jMbRxRruxyz+v/wBekQWYLN7q4VFOXUghf731Nfav7Avgd/EfxKsprgL5VsVlLdRkDG01\n8cW8OLy3lhckl+fbt/Wv09/4Jq+C226trc6L5YiAB79MZxXLU8zWB9+Pz8oXG0YIqNsA4oyR\nJnp7U1zuPQ5rE6A3Z607cu3FNX7vrQF3LTL8gHApVxzxzSAHuKXaQ27t7UMRGQR/OjzOKdkd\netCoNuDSARcsvTNLt2855oVtrbRwKeFDZJGQKAFBHpzQ2F5xk07auQVxjFNZtrZNMYq5bHan\n4CnpUZY9elKr7hzTAd/Fz0pPugntTgAwOaZtLdDS1APMHpRRtFFGoGTj0H1NL0Xk570jMOee\nCKdtyoI5GKxYCLnbuxignCkk5ahjlSe1C7fL4HNIAX5l5HNBIbgDpSJ97NCZ3HNMA3cAYpT8\nuM9aUL1JIpCdy4PWmMXIxnGaduBbgVGp24U0vO446UEj+vPSkPLZPSlXG3k0jfdJ6ikMSRcf\njRuG3kUu4MuSKawPOTTARPunBwKePu880ke3bt70m7CkUAKy7SAeRQPvEHp2puSaUKRg5zQA\nBfWjduyoPIpc5oZQvfFABuHGKXcPTmo1YLxipdwz057UwD72MClOe/Woy/ltjP4U4ZcZoEOV\ng3Skox8uRjNM2nINSMfuw27FKuN3NG7cuDxTc/NmqAezfLTM7lIIp4kXnIpu7tigQYCrg0Mw\nU4oYZxk/lUeNzZ7CkBJGD97NE0gXoetLu6AdKa6rupiG7wVwBTwx3cEUxl2tgdKUYVQD1oES\nrtb6U7jbgdKamMHigYVetBSAscUmQODS7tvGOtK2CelAhn3celIuM5IxTl2t1FKygnjigljF\nbLjIo3bWPpTmx+NMCnJORk0DHKp5PanQr8p9ab7d6njxGQeoNAGZ4wsWuvDN4ucr5ZOK+EfG\nGkyW+pXDH7gYj8K/QHUFLabcR9dyHiviT4nQvb6pfRMMEyt+FBPU8P1eNVuHQcHrWJeBVyM4\nOK6XUIk8x3IHmYrm75t2dvB71S3JuYMnyMRjGeaoXXyvzwpFat0Q+WUc4xWTcfvdpbnB+7Wp\nJj3Fu6OSy5U85qBl3x4Pyn1FaV1IWB4wOwrO2tt+bO0mqI1KVw3lR5A3e9VZY8sApwuMmrVw\noSZSBuDjFVmU+aRnDKDxT6gV87ty+lUriV/LyFJYHGMVaZRtwwYnqah37WDKCqnjbVDK5/eb\nTnH95WqtMwa3Zedu7oOtXJVk6SMpU9Ao5qIoIyVDDpRcChzwR8x7GnvITgKm3HJwP1+tTJEs\nSMw+6DQx8xMKcAjr61W5JUVh82V4JyDTZFZlVsYAOaS4PlsoQbx39qaJGaMENuO7igB8kjeY\nflAyM5zUHm+ZGxQHA4J9afN5nmFmOW6ADt71EuV8zPKDj6mgBkhD7CF+ZetMVn+bnnHFTKsi\nqMFTuqKS3kX5xzz0z2pjIlPnW5KgqVNIzHb04PJNLJsXA3EZ/hFJyAVPC4/WmFgX5VEnMnoF\n4pnzAbsgBvSlWQrGuRhehFNdPLycnNMTDyyq4I+bNPMwJ+TIPQ1EFkZR82N3A/xp+2VWBVQV\nUYJ9fehlDJEWSQ4wOOBSNtO0lvbFKWEjBsYbGKYy7M4GTQISRfmyPlQelIo3JjqAc8dxT2AZ\ndu7a/elZQrHy/vY6etAyu7KrEhcg8ikyVTcGwSfu0rYZgGXaMUYBhKnrnijUQ4sY1yxymPu1\nF5mY8FAM8hqe+1lX5SNvrSNjzBjBHXHpQIZ+8H3juGPyp8jKY02th80oChiTyD2prqNoBTj1\nFFhDOI22jnvmnKzNglskcVH8ocxg57k0MTgBR8tAxxXecM3IPWo5DuZcdN3IqSNdq8ruPemt\nFu5BpgLIuXYJ160jsSo5xx9339aR8xoGAyaYx+6QMt2NJgP5JPTpyaFkPRhuHQGmuGZ1wAB3\nxSzIGYEdKoGJtaSTYo2L1zQ37xSf7vFDZ2goSDml4YneQcdx3pARtH0OevqaODgqvPShmLso\nK8Zp0iqGIUYx60DuJyG3bOehpFzJ0XPOKAc4z93HrQv3cBTle9MQpwGyDjtimqpViZBub2py\nBVJdl3Y5xSIWZWZFwzHkHsKBDdwY+x60km1Rx16E0Kdo3kZz2pVby+SM8/dpDDaoTaRlCOTT\nF+VdgOMfyp7SbuXXb+PFMkbbwFB45IpMAj/eH5GyB2NJuUMctxjBpVIjChVw+M+2KI9vIxkt\nQTcTcu5Np46e1LhVbr+FMUbgRjBXoKcqsyq465waZQNhkO3IGaPurxwOhNSleT2qNSMkBsqa\nBAwO1Tu2sKd5Z2jv/s0xVHmljyfSm8srZzjP3qZVgJMa5B3AGnRxt+83A7c8H1oKlFy3fqfW\nnGR1BG0lOvFIQhw2FILKetIF/d7U4Td0ake624C5NJ97gn8qCQZRHISD83p7UMF3Bhkcdu9I\nz/vgw9McjrQd0S9MZNMLjlwy8nav3fxpIxtjCHgg4+tG072bqMcLQ2GXJBJ9qQxrR7eAeho2\nDzBz8vUUkh24wM7uKI9vmApwqnBzSARxujc7u/ansp+RuCNvUUMoyWHXORSGNnYEdMUxgrKW\nbGSQMZxTdqLEoLZx1IPenM29tq/KMc4pPLDKoK4XOfqKBByeuGb6UuSuTne7cYoXD7iuBjpj\nrTMDaGJ289PWmApt4yoZW59RTlxG2VG4hqVWwdnU9TTvm8tsDnOaBkLbuHKcMckUqksxKcqO\n1JMG2qBIfl6kChWNvJxhVPfPNIQm4tH8rEsTyKUtlmwNrHrSHcu5d3P3gO9I26SMOxwO696A\nBVEnKtlByfWpFYKpYj5uy96Z5adMFF67qasiHLYy/QUDHcSF3xhwaNmfnyS7DnP8qA5lWTdw\nGHNM+7Ch7DvTAVnKt83A6Y9qFYlhtXCr0z1pZNodVKknGQ1IQzDb360DDBycg896ULjCtgMB\nw1Jlg4Z2+TuPX3pfLZpHiIyv96gTE3GMbguZD3pse/d8xwG604crs53LQDsYArknnbQIayhf\nlGRRI6JsJ+/UjFZTtBw/vUckZfkLntQAeZyeAecipNytGQEPrz61G2xiAgG4etLlmGcFPY0D\nHQ2+2Mlzy36VHJmORQQT6E04sQTuJz09hQFKry+Tjg0CGrkNuB744p3mKy72X5ulMXyxGGOQ\n6nLHsaXy/wCEjPNA0JKH3cqSMdjQNrYBBC45NPGBkZOQOc0zdkKmc5pCESQyK23lV52kU5l2\nrwA2RmgE887exobEOCh3elMaXUaFRFIY5OOppqMPlGWHrmlYbEOCGOc4okY9WI+YUCAkxqW4\nGTjpTvLPygAEk/jTW5UMeGHAFKW5VwcEnBNMBI2x5m7gKeKUY+8Rj1NAUK7RsQVPSmMvQE7g\nKQx8busp29CPzpzYDYJwMfdpixlX4PvTtyknJycZFAiPv8zN9aFby+Mg45y3pTi53jupHamj\n94pBx1/SkArfeyB2zTJWHmKyNhcc/WnYEeAhLOTx9KG3JNygK454piEkkO0KE2t1OP50pO7J\nZvlHBHvRuRsBeGPX6U7/AFfCnevTJoGxuNqqepByM0kcriRtw2jrSqu5iHYKMcUNtVuPn3Db\nTIAfMuGP3jnHrSvGsanncT09qNvluDH6bTnkClRV83gk8c0ixm0qoDMCPWpFwrDDdRimiMbQ\nGOFz3pPL5/HgUAH3XIC5xxmlb5T98f7uKRwW+QcN1pCx4bbjtn1oEDKY1Ug7lJx9KFVI1OCS\nwPSjjbknHqKfuUsOML2bvQBHIwaQE8MeKQKJF6Hb0Jp7Yf5scqcZo5dOmBngirKGrynPJXgH\nFIu3btzuOc5NNVmbc2eFOKPuq21eKgRLMfnDAbePzpGDFfX6VGi/MvzZIHJpBvVdw9fu/wBa\nAHcjJzmgsAoLcOOadu+U8cdc+9G04O5e2aYhF2sxZvkyPvY70KzySKQAdvajaWjyT9EpQ3mR\noxG1f4lHXNBQFTuKE8tzTRgYGOnWlUeYzYPzDoTQu4KB0weW9KQhYm3bsDK9qNxK5K7cdqRc\nScZw2fvHoaNu1i2cmgLjdu5mfsR1oh3ORtTj+81IZN0YGw5HOakkOI/3e7nk4pksZ/q5CrfM\nP0pWUs+3dzjjFH3VBdcj1/xpEVHjLoCee1MYnC7duRjg5p6qOinB64pzJ82P4mwRSMo3NuG3\nApdRCIUkYsw3J0/Gm7fLZUwWJPX0pzSE4GOfbpTjN5ceF+8Tz7UCG/KLghQVPYnvSStgeYec\n8cU9vmjYfx44IppKts5wAOfrSGhGm+UR7cju1IoxyMkdOelCZjRlzuXOaa2c4xgGgoc37zIV\nuVGc44pJM/u3bgdPelIyqjftfPQ0FeDnkqfvUxCMiiXafm75p3yrwxwem7rSfNwSuc9KbtyQ\nACcnB9qYXHfOzbEAcjsT1HrTI28zock8nsBUm8/M2fujbimCMxqBt6ikCHbR5PXHfFLyzLgD\nGORUYKA/IGOBjmpFB2E5+agLajUdi4J4X7oNLtfzMkbQPehC5woO1R3prbtjMOUz9339aQD2\nbzAMfK2cgYpo+8WcliaI2c4QjHejKnO3OM0CF35b+6BRu2nkZHXijaZGU5K88gikX7xIPU4o\nANo+YZ2559qkbDKqZAJHfvUb4U7y2R6GhW85N7LgZ4B9KACRQFw579BTiokbbnYoGV4/nTU+\n6ynhl5GfSla43Rtx82doGKYDflVs7iVPIK/yqNd0jCRuP9lqlcBVAA285x6UoVWkChSWC5z7\nUDGLlgwwduKMCSROQUUfdoHzZBBG08HPanTMI1B42kY4pDHLtaQ4444B6VE7GT7p2YpSCoGD\nkAZxSwxtKoZlwOoWgAGVk2gZUYOff0qZhuDOWw/dfSo2zFJjO1D19c0zaGOVHAP60AO2suFB\ny3vShsJ1+alkk+bO3BxzUZG5ehXPegQ/cfvd260LhgxBy2cAUi5jbdJ93HHNCHcrqPlY98UC\nF80qxKLnI2miQbdqfxUbQqgqc+w/nQyq6F2B+WmO4CQop3EA9CTR8ynGSuemaYsYPBHXmnrG\nS4TaVKnO0nNIBzZ+VWOQD0NPaQbcEYTPbtTWXcWKjcTxmmRr5bBSQG6c1YhxwGzvwD0alZlT\nH8SsOWo27sYQHHUUkiiNgxXcuMBRUsYrZ2jbyR0pI/mbbn5uuacAd2ejYyT/AEpIyu4Pn5PU\n+vpSGIucnjJ6Yo4CqhGZM8Ur5U5U/P6UhzHGXX5pM9KAHqoZjk47EU7mEfMv0NQshjAlbLbj\nyg61JG5diyg7Om09aBCFmiYk5xniljXdIX2HaeOtMZiWYSc96dHh0kO4qqrkfWgWo5Y/vALt\nGOSxoQbSoHSmP8oUEMT3FHlncQvpQO49ceY6OcoeQfegjcwJXGBz6U2P5eYznHVqVpHmIG7A\n67e1AxNhDByflz90elLxkuuQM9DQVK8EZyegNLhGjYE4xxtoAPmZwVbA7+lP2ttJ3cZzx2pj\nJiFSvJLY2ilk3eYoQ4IOD6UxCsoMa7cbw2S3fFDqRISOlHk/MXLZkPoOBSJ83JBx0LUEsdsd\nm+b8D2pm7GWI384pVjIU7WB3HAyaYFkV8ZyBwQKQyXf5nB6djSNsMyAANkYwP50kbCRiCNij\npmnKAobH5jrQA3dtkkKthc4IpWZ5DyAqAdBRt+cEfJhecDOaMiNF7f0oF1Gs7BcgFk6H607e\nUYAA4x+NODEs4C5GaNzMMkgrTGMReCxypJpTndvXkfd/Gljz5e5jt4yBQsaNGHXPzEEtnjNM\nBFU87hginx7BN94uAuW9qRt/mN82VHNG5XXcBl+3ahALt8xuh/3m61LHtj2hRn1JqPyzKqvn\n2OOlTSEqAuQB0NBRLbpsckAgnkZ6Vbj3qzY4yOR2NV0yFQFshegFXNrCMYbOTz9KRG25PZ/I\nuFyvP3ielacTbZJEI+bdnd7Vn2K7pWwu7jof51oRwiaYfN93+Jf5UhbG/pSoqBieM9DWzb5X\nhhjPNc/Yxor5OScZCmui0+LzXLliMDArNlGlp9utx5pZiCvGK6PS5iZIwcggg8+grF0Uq115\nZXIY5P1rq0gjjmiYD5sgVmy0dTpDeavPyjOa6/w3Zm8lXA6tgA5/wrk9KiMnThQf1r0DwTYt\ndaxZ2wOXZ1xj6ioKPtr4Taf/AGT4K06PGA0ecc59PT2rr2OcnPWs3QYfs+iWcYG3bEBiruea\njqWJx1NSJJnOOaZ90gE8dqcre2KQDs7cdfelyM5xTfUZpw5XOKkoVmPB7UFvmFN3D14o3d6Y\nh+7jIPNLvLL71Ep708NuTcBxQGoq/NxTuFGKarDdRk5oGh+4heM0LnrxSbjjnpTdpXoaBj1I\nEgzyKTrnnjNN3fN0NOHzZ4piFbbtHrSt6UmABRuB6ihjHK3ZhSKfmPpTWb1pyndyOBSCxIee\nM0ztTW/WlXIX2NNAOU0LjBApwYcgU3cuPU0gEZfQ809BzzSZIX1NJu3NmgSHSct7Um7FL2pF\n4PNACjG2l3Y5xTQwo6n2pAPU7vak/nTQdvWl4xkUgF7e9KoP0Hek9xTWl24OM0xi/ef2pzZ7\nHFIvf0NJ+FIOg4H86fu4wOaZ056ikOGxjigQu7bwaXl/akVRmg5D5FPqAi+lKrbsgcjvSbSu\naRVG4kcUxjm4XpRuNJuOMGlzS6jEbGPelkztHNI3IweKF/GkSIWO7pUjYK5pu7jApo+VT3NH\nUZJn5aMFlwaaCdoNNBPToKAJMgn2oYjtR/DxSevFAhccc9aTPY0gznnrSgcZNUUheD3xS+1J\ngbc+vShc9SKAA9DilGFxupM7vpSbSzADpQHQJHC9BxSowdfTFRyIWbGc4pVG3GKkSFycH5ac\nD8uDSZPHNKccUIoO3IwaVeB1o5ySaQdOKoBGY9qXd68ijn0o3DbgjmpATaD2yacXxzTQ2GoJ\n+bOBikAFd/PSnfdwM0n8XHAoYr0zmmAgzu9RS7xjpijp04pGzjmqHYNw9OaUeuKQcketOX3N\nAhq/rSj5qVvvZFIw289zSYwDbeDyKBk89qXb8vSk3flSAPr3o9utI3Jz2oTO4kc07CBlPXPH\ncU7cABxQDuzQ0Z6ikMsQ4ZsmqHjLxVZeB/C+o63qMqw21nEzsc5JOOAPXrWjbqOSfl2jJJr4\nL/b++PU8yp4H0e5/0YgT3UiHqQRgfz+tXFakyPkb4wfES7+IXj3U9dvZGnmupdyKxztTPy/Q\ne1eZ+I9Ra4jCNnZ02/1NTTag8sjljyG5A61k3bPudicqc4WutIwepWjPyFFk47g96c0zLFuU\nZZTiqqkFdrj5qkG6JyuxghHHpVakjzA0kICtz1NSyOPLjyeQecVGsgjZWPTviljfcZAEJbqA\nKBF25eOPbLnEIHO7jNYMrhSSrbUzkc9a0rkW9xbRiQlDnDKTWTOsca5AxhvlFADY0lZzvGVY\n9c/pUM0LI7gjhRnGadM7tIEzkN19Kr7TJsCv905JPpVAJnayu/ynGfpSO25WkH3vSlYCaIsP\nmZzwpprR4Xng8AgVLAnt4yilG+UHmolK+YEG5+cDIxUnmvCxAUsDikkwsgc7lU9qoCAMfMcS\nY3KfXpWZdAsrYcGLdliR1q5cKrD5QTvP3s9KpTzB2KdQODjoaBFOVjt3DBQ9PUVGEHzZO75e\nGPrTgxWPKkb85CHuKaqtJ1ICt+lAxqyFoxuXB74709lIAMY6nvSBn2k5yBxkCmrG8i5HJHIx\nTETW2dzsXLOONuKuW6+Wjg/N81RW/wA6l87N3BBH+eatwoqdFwuOe+aQx+N2cp75obeI/vcH\nqKEdw2+NiuBx60saOOT8wbkj0pAJ5iLs3jdkbcAURMWGz7mDgD+tLu+QbcAsMMKiaUSKh2kd\njTHYWSItnkbV6AVEiiSEtn5e+aVmEUnyggH1pmCJNpbKMece9PoLUC23CK2Nw6ikkkfcwkwR\njrRJ+5VspjacBqbHCpj3yFmduq9aQFm1/ebY9vykZXnipNuISwYBs4wBzmmLgqu3njG3pj/6\n9EsgFwj4LMvt0qW7AbHh23+2XghZS6KRkD+Lp+tftj+xV4NHhX4Q2khQJNcKvzYw20DpX5C/\nCLw7JqnibTrZI/NNxOhbaOxxnH4Zr9zvhjov/CPeA9HsQuzyoQGGO/X+tcNSXM7I6IKx1TY6\n96h3fMQRillOeAetM/ixU6mgDKknuaevyqTUcjFTk1Gsx6nkVQFndlcj8qQN8v8ASm7i3QU7\nbkZ/OgYmR0xRIpZgegp+R6UZ7npQMbGp6mpRjb9aYcSYwaNo69qBC7hTW+bI60N7CkLYxVIY\n5VOfmJxTtoRsUK3rTuDgH1pgAxjmmrjb6Ggt8/PA7UZy2ccUhsdxRTaKBGQdu0EDmhWCx4zU\neT+FCsGUjHNYATYGBzxTQ23jimt8vGKjbIwe1NCJcqc84NOHPzA8VDx1FCNgkZ4pjJ2Aak3K\nv3hxTMHbntSrjo3WkA6RgFGBSK21cmlG2TpwB60m3saQCgbqfJ8qiolV1b2pzMOh5poAyfwp\nGUlTzmlz04pT93jg0/ICJcqw4qRgMcUudyn8qTG38qQCBh3p2B16CjHfjFI2euMimA1pAoyO\nRTWYsQaXaCowMU7b3FIA25Wl5X2NDZpdpb5jTAYR/ER81SRjauSfrQcd6SP5lxTEAAznoKdu\nNK33RkYpjZ3YqBjsZBAwTQMNxjGKRThsdD7UozuJxVEgWGSBSk5pMjaTjmjr0oGg75PSlXC5\nHem5x2zQFIbPWgQvy7sYpcHbjg0m00j5X2PpQAbRup235xuFC00uexyaB2JlHXnim8KORmkX\nJXjuaGBHGaEKw5s4J/KhWCr1pcU3bj3piAH5TkdaF+UZND5Vh6UjfdyKWwuoxmAbOMilUhua\nXduXAH1pew4pajBfl7c1JEwHB5qMt0PSnR43ZpgSgh1OegBz+VfIvxws1/4SbUNo+9ICPyr6\n69R936V80ftEaX9n8QeYqffXezgUEny7rtuiPvU4Ydq5HUP3m7Bwa7fxLHuXAGSx+77Vx01t\n85GcLirEc5Mrx5OeKptsYEMpBPQ1pyw7mdS3HbFZl1llwOMGtSTNmi+UZP1qkzjcOeAe9XXk\n2y9MqBwf6Vm34lZllGMHkqOtBJWM6yNxy47+lU2w0mScuTg1YVHVd4QqScYNVpoX804O4r6U\nxEMkeMMGwagkDEEA89anIDKu7KsvrUc37vIwCD3qwKskjYJAGemTUDKR+8J+XGKsNjaQ/Ge1\nQ5CtheMc1QivuKhx03HjNRyPviITBbvUxQySFmHJ7VGsS8kdPUfyoERyW5jddnPamBkjYZUq\nuO3rU25cBwCeaiaP5WXO7J4/wpgReSsitlixao5AfL3E78HGKI1aHf8ANjilkUKeW29zStqN\njFj+YkA/Smyxufu8r09xU7fKq5b75zSLnBKnIHFDEU2Uq2zjdjjNNZVSIB33Pn7tTTR+Xjn5\nvX1qGQhhuC7pDwB61QDpIx8y9T1FQyeY3ykjgZpxkZinBUrwVp+9Cx3DBPFAEBO6MbTjsaRV\nLYVCxOe9OVfLbpx0pXZkYMG5zjimxjZSY5DuXjGMim+WVf52GCKSSQDcepJ59KGG6MORk+9D\nJuIyquWbBz0pMpyxYgnilZhuXC7uKRpBuUquRmkh3DBwFY5Hr6Uw/vOcZ2ng05ssCCfmzmkf\nPzbW69qoQxnLLjvnrR5IPOQTnvSOrL8oHOM0YQRh+euMUvMAVSIzk4OaRWKsw696fIm3bx15\nFMmy3zg84wQKYCcOxYrhR6UgI2vg4AOMUqsTG21T6n3pvB+cKcN70AMDbsg8GkR8MeOMYpQW\nCkbeT0oWNWXccjB6ClsMXzgq/Nx2pJNiqGznPSnIwfOU4NMk3BVxjGfyp2AGG0BlzmpCC0Jf\nG31FRyPmQLnjuaTaiuWfcUPAwaYg2qCoO71JoZEUnaODzS7lwi9yMgU3BWN2756UDFVGk2Ec\nc4xTpI/LbaSG3UiTBYxhufSlmZPMyOgoEVv3bSqFU4Hb3p6sdsjLlSDjHrS9CCBt4/Ohc7SD\nkbuq0gEVgVUA9eopd23JJ9sCmLCFGBTlUNGWzznpRqOwxV8raG4X1oGQ2epz1peJIypOVzS4\nGwA8elMYLgF1deO1M2hjnsvNK0i+SzKCSDyKbFlVPGNwzSESFkwTnc3Uio1Z2jwq49qFhG4F\njzjtTt26MkDA70xCNGW/djhvvGmNhRtBIGcCnLnzMDjjk5pm8NKcLzQBJ5wWPnkdM0CQdNvy\n1Ev7zcG4IPB9aXcWUAfLzQArSN8uRndxTpCCxPVgOaCoX5geO2aGgbO8nkj7vrQAn8ILcBvW\nlZG2gKxKZodizDIxt/hpqttViW4J+7QAjrtbrz1Apo+8CVPX1pzrhlAbgc07iSQEf/WpAIGC\nkjbx1pqsPIJ+8M9TTuV3Js7Z3Um7pz8uKBjfvY4+X1p6ska7EPJPQ01YiRuZipB6UmC3PvRc\nkeufOwOmOKRC4LHblOjUm8YLbTu9qN3ykLnpyM0FIRY1VyD/ABLkfSiNCseOV7jPelb5ck8j\npTGYhQjt8mc4oYMc6sc7Cqkng03Hq3OeRQ3lhECn7p496VVVWy3c5zR1ATbGuWLbRnFOyjPt\nA3N2Wk8sL8/D5PSkOVkbaM9896YmLt+Ulvpx1pSp3KRztHNN5b5mJLdxTVUPuySGPGKQCTF9\noYDjPQU9gGYNIv601cYCkYxTwvltljv7gUAEi+ZIC3Bx1FNG3zBlsn1p8j5+baArcU2RhHGh\nVN3PX2oAYznvkgnAapGUSYRQN3cgU3ceQgHJ6HtStvhyVIY+3UUAMYmNsOQo6YFI0ZZGAbcv\nXFO3Kiq7fM+e9O+XnAzu/i9KYxm8MoLDtxiow6MGbcUzwBUi7o12Iu4txmmDYsnAyRwR70AE\njKyqqn5ulOkU7QF528HmkUqN2RjucUrcdsxv270CDgx5HDZ60iM7hmPzY4pWJWHbGuFz3pBG\nqAYPDHrSGLktg56dhQrmNsN8hPQUYChuckccUNsk2Dqe47imIUZdsqBketJ8jcN/rPrRJksW\nHQcfWkRVOMDmgWoBSu0EE5OM09hsJIXduGKGbyztLfLnNNhmRZGXHbueKBjdoVcFaRZG3Lhg\n2Oo6GhQPM5LZ680qjzCQFAHc0DGy/OxAATnLc5p6yKshITtkZpki9hjA/iB/Sk7BNwC9R7VI\nDvM+duhyMn2p0y/MpHyqtN+XnCfNjJ96Yv3fmON3zbaoB5jUruPGe9RtEoO5eQvepMmSMMFB\nQcc9qRowyj+EZ4U96AI2ZlZVP3etPDPnaVwM5p+zbkgbsfxe/pUa7mj3E5OaCRVMcpxIcEdC\nBTWHHAzzUm0Ebeo68UhUw4kJycYoGMiU/dVcnqc0pYRsCuM56H+VOVnSYgD5mXlqZtjSNGkB\nMnt60CHKzHK7MEnr6UkkJkhwMKq8sfWpFLEkAEHGfrTedpV/lUj7uaB3ETb2IAzwO9Oab52C\njIxjApisPvHCYP1oI8vr/Ec5FAdQMJMILEE57USD95jrxTvMX+7t7AULt3FS3OOaYEXlqWBz\n05p+9NjpjJb24FNZd3ygc+tOEfByMDGc+tNgRjITajZX1pylmwcbQe9OyeC/I/uikaMbgVP0\nFSLUJmbIyu5MY/GkMbLKMNyBlqckq7WBjO7pmkZSrKcFV6e5oGNeMspdWz3peDINnTNNVdsc\nmTjn5e1LH8rD5gwIzgdKAJZZkZ2yPmUcVH5itt3qcjncKNp5JKhT1XuKj3Ej5fu/0oAkizuD\njLLnnNLuLblXhM9D2pQw81SDgheBTVw5LEbT0J9aAEbmQBfl9Rjg0SyPniMFfanLKWUgcjOA\naa24ZKnB9PWgBrKm3dzj0HrRuEZBBy56+lSMxaEfwc5ph/eE7UJOP1oADGHbafuDmk2jzMRn\ndu4PpQuOOO2D9aXiPPHPbFMBrfM2Q3A459advJUgYGBzTB1yF2hj+tK38QbljxgU7gKrOygh\nccdaGVjCdo2kn5s0iEMoXkFeuaXcWU/N82cj/CpFccAQy7sMSNoH8qaD5cjIThsc0jfdw49y\n1L5a+aSGySOtMYhztyp3DvSrJ5eOT5nUf4UMwVlUNjmkbLH5zlgevpTExyyBVLOCe+QOntSN\nKCuQuMnqKAvylRnGfvURthtrqCOgapKAfLkbs5HFIoG3BHI7mgKFJLknHTinmQOBiPHrzQQx\ni7XT5l+Y/rSxqYUBPc42elJyyqTx3FG1my+Mt1xQArzFI8Z6jrQ23cuBvGOtNVFZUI4Poafw\nH2Z256mmMjXgE4yCaTafLYHhc59zTpFKBVVssDT1/etgsNw9aAGBV4fdk9ge1Iu/kFgTnOKA\nA3QZOcfWl2bTuIwc4p9RAp+Un72TigyFVBHygj86aq7mI6BTmnSRmRVI4UH8qYBtJXp1pvG4\nAsQ4FOYZy3zZx+fvTlUlQ4YLgYO6oGN+9MwDEbexpOGzIp9iPWl+Y7iW+Y9SKRdu3IGB0Jqr\njFRSq7i2UHJpGkwp4LhuhobhsIdoxyT0pV+8wPG3+GpAZ86xg4wRTlZ5CMjCUhw3yhmGeadu\nKsM5KA9B3oJsJk5Jf50FBfbgkZ9PWnbldZMIQeuKY0ZkIJONvSgY9yyqB5eBnINBkZ1Bl654\nx0pkOYmGSSe3NO3EZz8zZyfSmMaJF+Y4yW4Ip2CqnJ4ByFpd2z5yOGHA9KQryCR+JpBYaoZs\nBQeueae3+uKltyMO3akZnbAA56fhSBtrFBg+g/rQIHUSAljgA8n+lHl7cgDbnkBumKVtoIAf\ndx2HFI8nlsq54x1xn8KChIw3l8HgnGTShWjkI3ZA75pRgkgLjcOVz0pFUEFAMbfWgA2jHIO/\nqKXAA3AHk9Kc2QQ2duBjHemL+75zluvFMlCn7wIGV6Ed6T+IAtkZ6dqVjtHyg5NJj5QXwyAH\n86B2FX93IQhy/cNSCb94XA3HoTmjcdqll57r6U5Yw2CFzQJoYzrkbGyD1OOaVs5yG+QdRTlj\nSOQnaQ393NNYrHhyfvHGR2oYkSLndvHI9KVM5LLytEfDFsbmFOj3bGCgEdaRQSDlWXGD0AOM\nU5oxKxDHacZ3YpOdoA5J/Sk2hZPkfk8Z7UwI42Z/MU5yBjHr70RqyhV7epp2GX5MADr8venD\nKMC/zFjjntSFYauCdxbPcYomUsfkOc9OOnvTsos0hKny2+72pkjblDJ16EUDFMaj5l5XoR79\n6FXe2MbMCmK25iMFSOnvUjRssinjPrTEG0x/N95j3puG6qPm7AU1VeTOWwpOCO+aAWXIB+7w\naQLcfIPuKTls5PpUnHzYH/Aaa03lqoHJpzKGAdTz7UDETLSfMcf7R6U3+PJP3TkmiRCxUZyu\naQL5jcggg4I7UCsOdgkjE8nvijbmEt0Y8Y9B601n8tc43r0pRnaSR94dc0AIUVcAMT/WnQlG\nYDZhh60MxRVUj8aHI6hTn1poY5WLEqTgUBtozj5F649aGx5Z4w3Y0cs2cbDjp2pEjFcs2d2O\nw7U9Q5Tr0PI9aTKsyufmX+tEaljz8xznIoAE+Y84GTRIrfd5AzkNSqw2ElctzSSM5UO33em2\ngVwblgMBhQqndgc/SnRkFCrLjAzkdaS3aQBgHwOuMUALDGTkiQ5JwVpz/NI6MokAGNwGKYY+\nvOWIzt96cjS7gcZdhgqKQB5bKvyLkeoNJCflKgAAnnPenHO0qp4701cqeQCVpgLgyDaowy9z\n6U1ZPLOQPkp3Kr5m7OTgr6UOA0mOq9gOhpjGMT5Z3jDE53CnR5ZTtG6m4XIAcqQejVOMwqdp\nDFvzpi6i/wCs27icD8OakWYyADC4zycVHtO0FjuUHkD+dSblaI5Uqc5+opDLcLBmXcNrZwOO\n1WfOjjmwgyM45FVYtskZx94jg9xUsCiYgEnKd8UhGiqhflU/P1rVtWQlf4jjBUCsq0ZWJaQY\nOdorXt4RuDL94cigll2zkDXWNvOMDmt+xaQIVIwSc5FY1nbtM7Ow2puxjvXS6agVed2OwIqH\nsaI0tMk8mRSMk/1rsdPUyuAy4fqK5zRVW4Zg6gKp4Ndtp8EW4OOobk1iUjY0vdGoBGD6V7F8\nHdIN34r0t13Hy5lJ4ya8u0+2+0ZPTuDX0D+zLp7XXigsykxxIXZvw6VBoz6rhAW3RM5wOMU4\nccGl2iNQByMYHFMbHODzUDHYG0E8mlY7fmPSo+h9qXYzNycAUgJFb1oVvlNN6qRjn1pegHIo\nGhwHFAP5U1X+anKfloBsVcNx0p4yDjtUa/d44NOLYwM0DHL9OaXcFUZ61GxYNxS9jmgYqsef\nSlJpi5OOaVm2n1oESfw8Gjdt4zTVY+nNKvp1poGOOOxzQzY+lNUDk0Kvy0MYr/N9KVMj6U1g\nduelJv8Al4NFhadSQc4Bp3JGO1VlkO7mpY3JJ+bigCQYzx1pAwycCkHpSj73oKQD2JK+9Ivy\ntuzx3pgbcx5pC20etAEm4ZyDmjzBz61Hj0FOwGoARWx9ak48vFNxwT3pxYbcAUAIvzfhTs9a\nbu+Xim4+apYD+nTmkOdtKcryKMnr1qgBf0p/HSkX7vSlxxxSGNUHo3SnHH0pOooyO4piE9Kc\nV+XOaTgdRR6cUvMYBuAMUhyvPanfjSbcdTkUxoT7vB5pcbulDMMcCkB9eKkBerDI5pd3PApF\nPc80K2eSKBWBQd2cYo4LdKB3OaUNxjFAAvyikwW70nY0q5Y5H60AH3V65pfM96aF3ZxTcYO4\ndKBkh7GkVjjpQCd2e1A+ZSaYh4+ahvl6nNRr8vOcGpPvcnrTGH8NH04pA/zdKGkHTFSA7btb\nJoGAKYZOlOb5ulIA24b1FG4FsCmgndincLyBzTGHO4+lJxShfm5OOKOB071QB3ODSZHQ0J8t\nLnqaAD+HijnbSJksc8ClX3PFJgJ0HtRwMHFAz14xSthuBSACM8k0xs8HFPXO0qeho247ZqgE\nHXNK3PQUD34pyrhs5wKAEGBSUY2sfSjeG7cUmAvODk0m4L1Gc0jY7dKGXavHSkMbxznpScq3\nHSlUccc09GHIIqhBGvORyKmVCfU45psMeBnNW4l6ZXOT05waAOS+K3jqy+GPgLVfEF86IttE\nwRWbbuc52jP1xx35r8XPiN46u/GniG91F3O68laRmbsCc4H0zX1p/wAFBvjtb+MfEC+ENLnZ\nrHSH/wBJ2n5ZZfTHsCK+HJmdmba2054WuiCREnoZ1+4WMxQsRzyx70t1dbbWMIVJHHvUF1IY\nd+8fvM9RVa4MUYiZss561sZMVmabADbDjPSnbnaMYkLEc81FwyBsZyMDBpFcLjjaBw1PoQW3\nYCOPcQSx/KoVmRbj5MiTH3u1KXEbbSuS33c1Wk8xVSJFG4tgtQA6SSW8nCSEYz1pt1HGrAKS\n+ODUb3Hll0Ay44JxUMbbldskcYxQBDNIbddwBKtwPWmJMrRqB99fanzMMop7Dp3qJmZlY+hw\nPpTAc5hhkK7j8y8AdjSsGVFVBn1NRBo1XEiHno3pUttGNplGQvc0ASRzHAVXG/vuFRXl0PLw\nclhRH+98zG1WwSD3qs/yW4kkDF842gUxIp3MhSIeYcDGOtZ7YjXb1JGQc1duJvlYSJuz0z2q\nhKyySGNsbFGd1IOhCynavzh+eWqTITO44PY0xI1Rz/FuHFPWNShVvldecmmCG28Lsrsj4Oe/\nWpQr7dxPzf3V/nTY1LIzHkse1SR7oFycEHjHWkBLteaP5VwF52+vvWhH5nkLkjb1Of8AGqcM\nhZgqj5du3FSrAVYKWYlei9qBkqM3mOpX5COAKVlEQAcMuR+VSxh5GTzGCntt9qi86SYkuNyj\n0oAj+XvkMz0SXDJ8qctnbnFSO0YUsW6/dqK4uFkKbQAwGG4oGRSbz8jEFBzwKfxEyoPlOM+t\nRSO6oHhOzJw2aWGN1bn+LofWmJksjeY4bGTjBqFlcQ7Yxzng+lOfcr8Agnjb3HvQv7yAlHIC\nnGMdaBEgYrIgGMdTTvMkZlwuMtxx7io2jEkaAcMeAe9XrHT0vpokeTYwOGOeTWcloM+tP2Cv\nA48V/FOxRojNFbuJCfb3/Kv2Et9sMPkAYRRhfpXwh/wTM+Hi6foupeI7iH94Ylt4W2gHaBwe\nB16191rJvcn864Nmdcdhzyqq81Ek3PFDYfIxxQsZVeFp3LHPll96ZsbPtTi23AIzTlAfrxTA\ndgquOlPX5e9Cr8vPIpy4/GgBOoIPHpS7flHcd6MdzRQIY33j2qQN1FJuDGhl2tkc5oATtRuF\nHPSnBQoHGTTAUdKU/Kc4zQ2cZ7UhbHXrQMXG4ZPWjkdRSLy2elG4rk0hsTA9DRR5vtRQTYx9\nvTHNImVz60rZXaCfelJCqcDrWQw+vJoZcx8imkYUdc5oO8Nk/doAbt+bINLtGckUpUDjoTTu\nQcZzVXASLO7BPFSDDZpcDOQKaVxwOBUgHtQyncMml27enNNXk7j1FUTcc25uRTGUde9Se5/C\nkHIyTUjEA28GnbhTcjqSaVcbckVQxu7gjGBTlYAHvmkkkG0DFNVvSkA7bnntS7vl56CmeYM8\n8Uv8OR0pAKvtTd25c04Y28cGk5Dc1QhRnAJOaXd3zxSFlxxQvK9aVwHMcjNIG9OtIvzNwaRy\nFPoaQDy3PJ5qN2PWmtls0qRnZjrQAsbc5NTs3y8VErbVwRSht30qkAvzdPWnN1xkChc9etMZ\nOSc0uobCLnNSdh9aaowvXNOXtQAK+GNJu3MSRkUMRu4oVsKcCgkXflvaj5dpwMU37oyeQacq\nkKaCkCttWnKRIOetNIPHHFIGw3tTJFLnbiljPqaafmzik3dBikBI33s54pJGAHFIx7U1srjv\nQIXdt6U7cNvPWmyHAyKTO7GBTAXORjFKmF4HWkUH14NOVevrTYEkeQc5614v+0hpbbbScj5W\nXb+Ne1RoABnrXmn7QFm9x4ahlCmRUOeB0qRHxV4lVvtDKvDLxXA6pblhw+PWvR/E9u3mOxGD\n1rz6+BDEke1aIk5yZAoOOAe9UblTsyD9a1L6MqpK8ZNZk8ZKnccD/ZrUOhkTbRcINuFU5xUM\n212JUA81Ymj2yFixbjHSoWOyMfKFoMygyfaJDGVw2cg1TmtTHNKAeVNXpcLlWPJqrIygPtPJ\n9aYFGZSuOQwPeq13Er4KnDDrUrbTwOx4pDsLF8Z4waoCh5e078biP4ajkLIvEYO7r7VakkCs\nxXrjsKqTNuUEkj2qiSNT82WbBAxTd21SqDk84xxRx87Dn604rJHGshIweOnNAFdifl2KQO/1\nqNRJGzkY2981P800DRg/MGyDULMM5U7vVRVInYqspkUqg+dv4jS4KsSRzjBqSZV8wMkm49SB\nTEw0ZZht3HijUdxke4Lsx7n2FNjDLIynhev4VLJ/sth8cntVdtr5LHcRzuXpSAW4YMoUevFQ\nvlZNvfHUVIzBZF9SMgVGyNISSctnt2pjsJ83J29RxUMiu69MY71IzMrLsOQDg01pGbdu4A79\nqYBJlkQjkk4xTfuOyFuO9OBEahmbn27U3awU8ck9e9MCNYwzFCdq+rU5YQWKE7jjtQ2cnzOQ\nPSnlQihkOQ3egCHL+WTnBHApW+VemDjOKVsZCHmm+aBGwyOTgZpCIycNuJ+8KFZWUgH5hyKS\nSPbIq46c59aFHzblGM8UAIzmRlIbJ9RSNkMVI4xn8akYKwwo27TxTJsZB3ZJ64qhgFLKCXGc\nfdqExyKpIIB9Papdo2sx6dhUaqWQtnafSiwyVmfyAqYwD1qOSMLGFQ53HlabwzLsJC9805sL\nyG59akQi7yxOBgccmk5ySjDPoKm+QqMc1GsYXcWGFxQIZuIlX++evpTvmUuHXikjwu0YwOzU\nK5RmU8k55PSqQhknlsgJO00xmMbLj7h709o0k2seMdKbtLFD23Yo1GPLERgtgjoDTWy0m3OB\njilSPdOytSSDcSOjL6UyhseVZjsB96fLub5WXhucr2oePaTg5XP41HJmL7vXP1oELI2VjUH8\nTTVdvMJPPYUsrI0wynAHWm7Tt6ck9fSgQPGFOS+DSKywyEspxt5Gafn5cEZP9+m+XuXjnBya\nQxzLiMYGN3IqJk2gdeOaeqeZGd4KntzTJPlUZOB3PWjUBwcRqNo+Y849qDypbdyegpRJtdQ8\neU6n+lJ5mJGdlHt7e1AhF3MAQM4ob5GOBy3VaSMho9+c88LUjSfL8o5oAiXEcmJMEkYAJpfL\nKsGLAY6ikbKgbhuJPUinqq+ZlBt/3qNQIyflPQPnIXrxTcdPXuBTtwjYjAB7Z7UvneXyOT60\nwECgggcgdM+tLzuwSW9DSRqzHYBu/izSMSsigD5RwaAFJ2o24Z5pWwmAwzxnFN8wtMRj5cda\nN+5N5GO3PWkAnmDzcIOo59KAAHI3c9wKfvX5Sq9Kar7Q23lj3oAarMPmUleaXcXZhwaDuAAI\n3fShm6lV9s0CHMrBiN2cjFRo4GSSSF44FKjoGO7PIwDSqEEeFOWoGKfkfPqPu0ZypB5Hcihs\nPgudrAYpGYw4CfxCkHURmZc55HUfShlPmDoN/f0pf4cdORkmiRAeM7hu55/Kn1KGNlT0yVpW\nYOoyOvJp0jHzF2ryvPsaYcSNuHDZzt/pQIcw3MAnAxRCw8wljnA5WjeZGO47B6elIFTzMrzn\ng0wuCSMwfbgc8Zpqt5as8nU9xTl+8wPrTfmzkjK+lAgYKzDHOaeGWTBU445zxTdzFmGzcuMi\nmFmkjXeQGA6+vtQAOxI4XJ6j0NBcyLk/KQMbRTmG9fmBAxn0pi4ba23BHNLqMdKDwxIAC8ml\nVhkEHBIzTDJjJAypOSp/lTm+7jIyeRx09qAFzn5iM5OKarAEhRgDjNL1UENk0MflIAwKYhOV\n6nD9veiRQqfLgt1aj7yqWJ4pnl+XvydqtzmkAMfl2KMURyMFI+8fQdqXaZI1OcAU7n7wXj0F\nAxr5VuBzilbEiqOhpjxt5wwdqYyeaccL8xz6AUx2AgKrMx3BTjAoKs0XybVYc+/0pqkssin5\nR97OKdgt1G1+gP8AWgQ75WjAwc9T7UxWaOPJKsCeMDtQGWRQFY9eTjqaEyN7nlunNIBY1bzN\nw+7Tizs3lbVA6ncKRvu4jbg9hUe5klDMSxxzTAkbkB2OFBwMUkbRnOThugHp70skZZjsOIx2\nPrRvLLllCnp0pAMkzHGB95egVR+tDL5m0BcE8H1p/wAwVQoxz96lRn3MzDdjjb3o1ENVW3PH\njJx1pFJWMZXoeaIpSzHB2gck06OZWYOOe2DTAa1v5gbaSqe3Q004cAHqowD6Ukn7tm2uSGP3\nafvZY9pHPrQMYeoCn5cZOKc8nlx8fNnnimr8ygBgvPzN7U6QqvQbvQD+dBAnylflBQ98/wAq\nbtTaAMnn5hT1BaQswwoHT3pBJngcMDknHWgoYzeWyjcS/UemKRmOc4z6VLM2ceoGOKjXGRuH\nykYxQAvmFlG489xTY5GT76ZPalLKqDdyB0peHyCMd89qAQr4+VtvyHgqKQxkHaG3v/d9qGyz\nLt4TvTRtWYyb9sfQ0DY6VhIpVhhgOi0kSg9RhsdKJNqbWTk5z9adMcYaMYyQT60wYjrIcsq7\nVxg0MwYBM7cdaV/nkK7yFIzik3xyNz1xtoJGLxlznHrT9xXOFwexpGO2PCnGDyKcNp+bkntQ\nDGBi6ZY47Y75p67nUfOQ/vTVY5JcZXpgevrS87ueR7UhoayMGDlfl6E9RRt3YUKfqBxSq3Ug\n8AdDTIyGVnJbkdM0DFP3g2Mp04NODHGNox1pke0R7ex5yO1A+Rdy8ketAg2rwy8gnGaftDNg\nLj3pu11X0LDkULvjZSxznqBQARnAboOeWpCpLYJwex9qXaY5PlUnPIJ/lQ25WwoyDwy+lADY\n1Vv4s47GmeY24pnAPpUzRsg+YAc8UwxsrFicA9P8KAGxqAcLkr3p2Fb5uh9TT4n25AGGI6Gg\ns8ZydpGMYYUANLHaBncCencUShJJNoGDwKk+YsrDAJ4qPhgDuzKG5FO4C7SFKkZQdRUcahsg\nrheuKl3fu3HO8mhkJwcNnbjPakLqRq25cjlRwFPahNjHIG0+/egxKyAZ59RSjDHPVOntQMRi\nGHC42nqxo3GTCA7GY5BPekOzaCuTzhuOKdNJtwrjPptoAeYz0ZseoWmbjtUAA5FL91lOcZ6g\n0kiNHna27jKigQ3cM7Twe9OUbmPOWQc0nmCaIOIuQcFvekbiPLcHPT1oACAFADZXHHvT48LG\nWDfOP4aN7FwzDK44GOlNCnnPyqetAWFYlhHk43H8qVvuOoIZgcbSOaaYUVV+bIXmm8ecGJJ3\nUxj1AdgWOD9aRlyCANpzn60mRCpMisQDxinNIGlGF3bhlfYU2SJleCFx9KXcvIJzn9KQsxG6\nMfKDyaRsbj8ucjOakBVHltjIbAyaCrMR2B54oRlfJPpihWMcbHHzA8LTAVcRyfM+G6c05lXz\nMA5WmsyZUkH5hlTSD5oumWJwB0oGN4j+fOXPSm7mKjK43dSakGJisZG0xd/Wj5ldQ3zr2HpT\n2Bsbs24RJAzE4OaXlCwI3HPODTWCjAI+ZifwoWMrkbsr2HrUjAyD5hg5xxmjhFBPDEYFDOYx\n8o+9xj0pApbBYH5f71ADsHzAUBBxhjTW/eTYX7mafIzZJLBVYYwtIq7l8tThhzmmArEbmO3P\np7UFV6kbAwwRTWQLuO/PGfxo2mSNu754z6UAGfLVjn5VGNtJHvaMSEZGOQakDcr2yOc0NtYE\nbiKQhqsWUkckd+1Iudo5BXvSqvy8Z2+lJyRlY+M0AhVA6xrkHrntTlIwyn7vTPpTVVWbb90H\nv2pAqqx5PXkDpQMVY/3eAwQA8e/vSHCsV5b0amf6zd/Ee3oBUm1o8AHOaBDEB2s0pw2Pxp+4\nr91T0pXkDKQWDH+9TchuOVLCgBWUhSxYZzge1Cxvuwi9Rk0RqqMwyGU8n60kbFQw3ErjlqBC\nsJM7YunUiljYDhjg96bGgVAVbav1pG2jbuPO7n6UASSYkkHBz/eFOkVVUMFyp4A96GJjkbZ1\nHGfWmspYAEFT3NAMUSbSWVdxxtOKYybckE4I7U9cLjacDHPvTlU8krtPUL6CgY7G3DD723g1\nDJu8vCnlj81WDEWkEik5FJIo5OMknnFAiNl2xhkPzZwFJo8xmkAkIHb1p3k+WzDPLDgUyO3d\nlIKjd70wFkYseQTg9B2pGb5wVOB6Gnq3lrtJw9JzJgEc5pAN/wBW28fnQI96qAxJLZxTmClS\noUuM9jzRt2uCh4HagLArbid2AA2D+FO7Hoqk/ephVWbdjj+dDI75YEFMdKYCNH+8CoQ5Pf1p\nd6qxGG3Dgiljj8lUIzu705WBZjnlecetOwDGY7js4BHJNGWWLywOM8mnbm78Kx5FCsrbjuwM\n4AqHoAixjkbScdKcyqMkBunO6j55FGDs296X5upfdng+1MpAflUbvmXHGe1KsjYI+9kflTV2\nt/rARjjHr70u9d/Jwg4NAXGljuDAEEDFSM4ZURRlhyaZHiRi4OI+m5qIVcsRkYz1oJDd83yp\nhc04KYVIBAJ5pFzIzRt8ijvSrHs3sDnnHvQAqsZigdQvqPWkZBtZTgkH5VowCwG3ZgZpdy8E\nj5qAGvvaQfL+Ap4IXJBw3YGnxGRYXbblP1FRvllJPK9uOaABi0mWC/ie1Ejbl+Vtq54am5fh\nMjFOZgqgA8LwKAsP3Jt+Y5J7jvTW2Rr6N/eoZi/IqNpFYbFU59+9MB7KODhSW4HvQoaMY6np\niliZfO3hckDBHpSbUVzKCZCTgUCDb1H8XvQshZQEXL9z7UkaswYHB5+9T4cqrchGzgY9KLgS\nBmHzhfkpGmyowDu9+1JGoYMi5Yj+EGnRqpjdQOc/L/WgZYgU7g2cHrir26NWIUkbhk1QtVPm\nAnqPWtJW+X7u7PWhgXLVjIqs3Dr09hWrZ4lj3MSOeGrIht9yD5sZrZ01tsPkn5488PjvUso3\nLFTHhj8wzkit6GMSqskZLKT0z0rDs2MoyASVGDWzZkW7IgbIPGPrWIHRaGWdXzgDdmuv0qTM\npj6jO6uR0mMeb/dHT6112jFlk5Xg8A96kaOxtHDKoQ7e4r6t/ZT0gppt1fyKfnOFb25r5S0y\nzaSRNw+TIya+3vgFp6WPgeFo1ZRI2ag0PSz8q5JzxTRjOQKcwz9KZ0qRi5HbrS7u55NN70vG\n4dqkB249uKX5cc9aRDtBJpy4ZeRQNDeOooyAoprZXO0ZpU6YPWgGPVuc0/jb70xflPrSls0D\nHbc9TxS9Bg0zijfgc0CuPbDKMDBo2jbgmmxnK9eaByTmmMeD70u7GcCmbgtKH/KkIXnbkcet\nO3fKMcUzdjJxxS5546UDFDFlprdCOlC4zmlwN2e1ADNvHvTlU9OgpsjbcULnaO9AEysPTFNa\nQ7unHrQsnABFIrnJHagB3G3+VMVTjrT+OKOG5zigBwalHA7CmqRmhiPQ0DJlH4ik3cZFRb9v\nenA+g4oAcGG3mkOfMz1pC3YrxQD1zSYDt/7zkUM+0jHc0i8c5pMbsGmA/PXNHmYwKbuw2TTg\nwZuRmgB3WnbgMCmA/lT8bsZ4FIQjHPajdggHkUKuDk0DBU+tIYrdRjpSbuwFIG28Gkb5s44p\nlDtw24IpR938KbuHl4PWhZAccUgFDHAGKCfm6cUm7qe1OwTyOncUCQjY257etKSBjHNNYhVA\n6j0oHy5x09KLCQuaduwvvTd3qKaynrTEOXoKOrYxhTQvI60jZA65pMsdjrmmj5hkcGjdxzS5\nBTjrTRIZGcYyaU5pMfxE0bs5JNDGHfikkztOOKcrDacGkVvm55qQEH3R6UvTNGeoAxQFPSgY\nIzL1FOGS2DwaQKeSTxTQwGaYDm9epprYOKU9h2pW47cUAL70Y7rTW4IB4p7A8Y4FMA+914pG\n5wO1OON3NIRSAX8MUzd1IFPV+uaQAjJ7UACj5eetHJbuKd0UGkz0zzVAJn5vUUi5PWhchefW\nnZG0nrUgNY0H7uKVfve1AHzE4pgIaNx29OKMYNLuIU5oAYuQPlqSNefm5NKucdMVNEnPTmgQ\n+FN5wBt9+1ePftYfHS3+CXw5klhmVda1HdBZp1IHRn+gyK9h1DUrXw/pdzqGoSrBZ28ZklkY\n4CqOc1+PP7WXx6u/jV441G7SWRNLgYwWMIJwoBOW59f6VqoiueN+INeu9Y1W5uryTzpZZC8k\nmeXbrk1gXV4ZF3A4zwE7moppy0IG5jJ1IJ681VkBbDZ59a3irGTfMJNJu2nk4/h96jkn+XJT\nJPY0SSGRgFwozxTcFncMNzj7taEsYq+Wi4OdvJHeiaVZ5MN2649aZHgqxGcHnP8ASoGyTsQY\nXqTnrQSTbppJCS4OBwDUzM4Cv91BgbTVGSdWVlz8wrShtzc2ySrIBtxlaAIdQWO2miO755Pv\nL6DtWZNcLDHKRk84DLVq9c+crZyCcBaqyYZtpTy1PNADpGXyU8yT94OM4wRUdwQku7Py4/Ol\nIEhJ+9g96TaGyGTDD1pgJHvmjVWH3TkGpWnMisoTYc8NnioizMxReFxnNOKb4UxyCce9AMjZ\nGbdk/vAM8VB5gjVgwZty+uastIEXduG7O3b3NZszfNLGd2eoI70EIpSbpI8A7mJ/KoFyluAB\nuOeRT2kSBlUA5POR60jYa3VgwZs9O9MtgFWVlXPzdSo44oym05Py+p/lQuHjyByDwvekZWuG\nJ24wOQe9AkOj2bsFiB1A7VNDGGzvfZu5x6VFHG24bWAwMkn09KtwSLNuXy8JnketIYi/N97g\ndARViKIt8rMdjdCKZh5FZVQKg+6CakjXcQCCcDt0FPoAbWhiYLJuYHAoSQr8rNjcuBj1pI2U\nsARgA4LZp8sKLlcZzzupDI4QqsFK7+x+tNUK0jHOcZFTwsm4AAkY+96VFkLHuAyWPK/1oEQK\nrQxngE9fmp8hdViAfB38mo9rSfM3zJjtToJFkZwi7iBkCmMS4Zri6EwOMnaanCxquI22rnmo\n5Yy0S7X2nPNMj+VgD879RQInjZljztJC5+aum8EaLJqmsWKxIJpHkyVJ24GRzn/Oa5WG4Lbi\nylDnBx0r3/8AZL8Bz+Mvipo9uqiS1WZS4YZ9D0rnq6Fo/Wn9l3wHF8P/AIN6Tahds86CV+Md\neleqx5Z8gfLio7eBdPsYLVAqiGMIAvtxU8H3frXIdI1t0fI5p6yE9aftHOaNo2igNRrRljnt\nUi7ucCkU8081QaihjjFLj5Sf4u2KavMZ9acOg5oATpgdaN3bFLx2o3EkcUDDaOKdx0oxilPz\ncAUAJjvnil9WxxQOBSHtzigYobjpx701vmWnfdGOeaT+HFACtgKfWow3zYPSpv4eOajON2cU\n0gHeWvpRSeYKKdgMjrimn73tTWGcHNDYCnnNYjY5mDY5GRRu3DGeKjVQ2CKkXG0g0CH+/Wmq\nfmpqqdhAPNMTcvXrTGWB8uRn5aibLMcMSBT9ofgmiOFvpSAVc7en40vC8GhlO3GOajPHY/Wk\nIdu28nn2pzYVc0mcc4zUMztuAxnPPFMCTzizYxmmqzbcHgUzByCDgVL95eetACN83UVHt7g4\nqZgOgOSKYuN3NAdBsY+bmpt2FIzgUIvzZ6jtQeewpAJ95aeELctxRs6cZFLt5xmgBu3rxxSF\nR1zxT+MEA/WmY9elG4DN2DkUbh1zk0/aGXOOKTaPTBpgNX71ScqMg8VHtI69O1LI+1QBzSEI\nzZX3FKu6kVS3GKnVcEUDG529aI1Jye1Pf0xTDlVxVEh93mnNheSetNbjFJy3zGpYajsg0v3R\nTEyG5pT60BYTzN3bFKJCoGTTdvFG0sMjkVQx6vuJz0pzbVU81BltuCMGlUErg9aQrEg+ZSBx\n701Qw4z+NOVcfLml3YwMc0CBlIUUjZXtmhs8YPNJz1zk0wsL8z9sLRnatHJXrRyQaAF29D2p\n7ADoeajYttA6Zp8age9MB6seD0rlfitbi68I3GAcoudo+ldVt3d6o+KNPGoaHcrkriJsc8dK\nEI+CfFmJFcE4YV5rrTDdtUfjXq3jO12ahMikkZJ+avNdUtTIr4HIOaok5W9iZoSD0z1rNMIV\nGDtnjrWzcORGwJ49KxpHLblxxVokxrhXWPGMgHr61TaPzMsWxx92tG8zGCAOvSqLLt8twcE9\naomxSkhCtlm6CqFztY4HB9q1LuNblG2nHNZc1u8cfTp6UCKjqqKW7ep7VC2FXKcqe1WDb7sM\nW+X096hk3NGWxznbVIRTOVwSw5OOKikjLSHHPbFWZAEY4G1gORVVWO09hnIpjI2VmdsfIAOl\nSL90bsNx0qLaJCXdiEGR+NNiBjJxzxTRJGy9dpKP15/lUTNthOF3M3Uip/8AWMc4wozSbSpZ\ncbQep9PerEUo1WMhI+rdR6UNE0auX5I6U/Hk7mHBzjPc+9Ql/mPfJ/ipgQRvt+YfN6g0Ku3P\nzbYzzn+lTMudysNmOePSq/mJ5YQP1PpSAkljMzK/VBVdgI/mXjng5qwuHkBDEEcDNQ3EJb7w\nGAcigBiqPvMMEnmk8tdrckp1xT5OcdcEfiKjb5CqIN5PWqQETId2WOB14qR3Em0hCGHWntg5\nAG49MUSP5igDjb1pAQ7twbHAPHNMZnRcKRjFOkZfMDH5V601j5mQvBNMYKT+7OBk9zQV+Zz8\no2jNOMfnbATgL096ZJtUFVBJPWkIRFMisx+Zu9N3FEwvAzk57CkbMZPzbcjFCRrzzyByaBsS\nVdrb1+bvSN18zOCPanKqqwIbdnikjbargnO3rVgMbf5OScOxzSfLncOfWn/exI/Ix0qPzAJc\nhfwqOoBuX5TjAJpPNVHKbc80M0jHcygJ6UsjFfuqGNOwh6YKsd2DnhaN3XJzTNq/LIQQ3QrT\npMKcD5ie/pTGIMhiT07U5vmPIyuMUjENJhjwozxUazFlIUHHWkDBsquCmcUzMmcDCH72DUmS\n3LNzjimSqd6uzDAGKoAYNyD95uaTbhcKPx70rsNyvgt2oaQg/d2g9KADY2fMXG73prSJlQq4\nHfPrSlWznoR2pVwVbI5oGhuQ0u1jjI4pkkmz5Ry1OUfdRhjdnnvTQrInycn9aQhiA/Or/eHP\n1p/ClVfAJHGKVv3m3I3Y60xyBMV+7/dpDA565zzihVKudr/nT5EZVAHLdRTcmT5duDmncQY3\nMMDL9qbtYMysMt1NJIrNnJ2n7ooTfknPAG3NAhIwGOVGCKe2Ub5eABk5qG35VifWpPlbqTtJ\npDIlY/eBOc5GakhddpZzuf1ppwxyOgP3aFjRhIehz0piHMqvtLHJpFAjyGOfSlVkRc9c9qRW\n3ADZ83rQMcmFB2ntUZZfwp0cgVmUDDCkCtJnsO9MBPKVVBwcdeKN25SwGE/iPpTk6YPB/SiN\nHKBcYye9IQm1Qg3LnPIpDkHcenSlkJUhTy/f6UkiYbAb7w6UwGx7VztJ8zNOGUkYgfKetI3y\nxKzdd2BSNIVVmAyCcfjQAmQIyOQM5pVCjHGN3rSmRcYZuQPu4/ShcBmZug6A1LAGXflQMhaZ\n8rbdoIPfNOX5mOMqCOtG3y5FDNkY4oRVhzbW6ceoprKrLgrjnIomBjAC8kn5qVWK7scgdjTE\nIcswA+6KaFI3YGzvT/LBXcflU/nRHhVYvk8cGmA1l3c7u+DQ21eg4BoCjdgjBPNKyiNmUfMc\nc0C1Gs2Wyo+U9cUqszAlTlfehS+QqgBv73ahTnKBckdSP1oAQZaMlfl5po4JTgZ55pzDkIBw\ne1BH7vI+hZqQDFjOGLvvI7ClJHG7J45xSxxty2cEnpRtChj1NAkJsDMm1c/NnaD+tBwzvg4y\n3Wmqp5DgqSMgilGGZN3GOKZQshH3QME0jZVgGPPpRzt4HznnmkVt6hmHIoAcMliB8y+g70zz\nPl+ZMFTTiy7gUbaRQW+Zn/iPGKQgfgqSc0EAsTtx/tZokwmwld/9401o2lyfur1C0AIN2Rk5\n7YoU/vDzk9xUm1WUM42DH61EqGMjcnLDO4UAOk34Dfd4xT/vn5WG3+770xmfamTkZxSNuaTG\nNozgmmMfuZsrIwZfbrTMou7a3U9DSbRuJB74FL5Yj25+cN1xQAsmIygA59u1JF+7Zw3zBqWR\ngmV5OOlB+bBGAelAgUHcyEkDqKGycbv4uAfSl5kBU5UjqaYzDaG7jgHtQA4Pyq/eAPNAm8oS\nZ+Uk8HrTcBvvLgk5p8jGQgIenXigLEDZXgHhutTMFLqQOgxxTlVYRtBDBucU3dsXcFyScbe9\nLUQc9hznvTZN0ak4z6insxfJUc+9N3sqgj5lbjNMZFGvy5I606NvLIcjJJx+FLsZYXOOvQ0B\nisajv70hWF8wrIQn607dtRiTkkcAUyRmVW7EjrSoDhGVtwGM8UwuyOMDeCrbSBzmnNJ8oAPz\nZ607AaQgDJzzT9qSyE44AwfagpIhZUkblsL3NCptyGPfhfak2ouNvGeOKXaqtgnoKABfmkbn\nA2/gKRVXYCTn27UNhE6ZJ70bhIoUZHr6UAIwRV3nPmZ4NP3szAD5jjk0eZuOAcheNtM2nscZ\npiHPsVlLLg98UrfMvJDehApG+TLj5mXqKSRwpEgJAx09aQBt2oM8knHH86Ar9EO0g/nTmUqu\n1j94ZxUQy2BtKOv8NAEm7LMc4I6rS79xBX5VI/Ojl2YEc45ambiyFSMLjg+lAC7lVfnG7J4p\nSoXpwTxxSrzEqYHHO71phO1SWGXJ4xQAKgXau/DDrR5YMhzyv86ftJXcy9uT6VGzHJK8AjGK\nBdQjkBUseRnAFJwkhbcd5HHtSrhYwrcknGBSMjKBuHQ9KChVVtpBYkdST2pN4VRtbcGOPxpy\nxtMr5+VSOlMRQsQwvTjFAhzLtYZJYjtTV3fOo+dG9fWnbjJJkDnGMU5Y1ZCoBLA544oAYyj5\nSMq2cUkjHcd45Hansp85GyGz2Hak5eQnGWBxQAmBIFIY+u0UfcLkEBSOaGQo24N1NOWMLJtB\n+U/ezQAiP5i5yABxSKz55JPp70Tfe2gDHtSvn7uaCQUmNSAPvUNmNCgGTjNCqWHXhT0pCrbS\nxYg5pjG4MseG+Xvt6ZpN2/BPAHAFSyRuVQlQT6k80kky52NxigRGqnLFuh4FOEZ24ZfzpyyR\nsQpFKFk3uGbdG3pSGMTK7gTu4yVFJuXyduN0mc/QUu0bcL1FGQkqnGEYfjmgYj5OChIXGTRx\nI2TxxkULuUMpPfilbduwflIFACbkb5WOfcULhuny49e9CP8AvOgPsKGby8t09jQLrqOVt0Th\n2BbOAKb5mPlJycYGKRj324b0ojQMwDHY1MBWaPcg2ssY4x60oGPkVs89abJ5ysVQZ75NOC/M\nA5wzd1pDCUpnG3gdcULgxjHzE9G6UpjCqMsN3pTV3MrBmBK87v6VViUmKU+Ubhxn1pPlJZBk\n7hkUMFZScbuPypNp2qE6+nrS6lC/LgLnYMct70kJGMquWz37+9OZmXaoTKjnj1p3liRdx6Zy\ncUMVhg2+WxAyScZpJFDKArcHvTjnceBnHGfSjYIcFTgHkigYx2C9jgCh9zld2RgZwakkiVsu\n2Q2MhaGy4Rs7j3FAyE+XGuTlj1pyFdzN2I6+lEmI5No+ZW/nTGIDZ2kDoRRchi7vlXC4bOfr\nUisWYAdVphdtoJO1/X1FLIpYjjaR1xSKHKfu7hgEYzTljEUjdJB/eFRk+YwwOU6fSkaQc4zk\nnOKBMnZTIyoTs4z7Uw7ZMrjJHb29aSVj5gVRvYjp6Uka7uB9TQJDtqFTt+ZB0NRD5Y/MU/xc\n1KqlVPy5OaRcq5AI47UFDVYbXwNpzTdpVdpbPfmpN375w3VenpRs/du3/LTPFAhsasq5KBjn\n6UeUOTy/vTnVZkXB5PWjczNt2Yx39aQDY9isuCS3vTvmSIgAHdyfWhemQMc96FUKrAN1pjGt\nGDt3kbeopuxmkGMOBzk1IVG0hjnvgU3aWjZ84GOBQAq/M2d3zjtT+TN88mFxnFNX95g4zx1F\nPRcgSY3t936UEhH1IHb5s+ooUtGzErkM3ApPmRgDhiR1FOkYqRg8rz/9amAvnMrNzjtRGRnI\n47nNM2+p+983PahceXhgfrSAfJ8uGY7O4pqyIyMzEgnpTFVmVtw3d80/5ZBkndx27UwEkLja\nQobj8qdEw+bj5gM0xNzMqnkdeKe0iCYkDGBikARsm5goOPWmRqOSMlQeaau5skYAJyalhUsp\nwMjsKB3G+ad+5+Kb95TtXAJ45qRQOcgDjv60za4w+/DHjgUATFHVd+cDGMU3y0ByBhiMVIqp\nFGAPxBPeo5HVZBjnufamIRVLLhhyp4JpQNgbjA6j61IsiyfKDz6mm/OpCqOfWkwEO5tpLbOO\nR3pdyqqSk4Y8fWhozGw3nI64FIIfnBA+XHegaEbdyvQMMijzGzgJtQfKKkbbHEuD+8zxRuaR\ny3949KAQ1ljkVWDbVU8jHehgdrHoScjFEjBWZQcuB6UvHljA+bHSgBWYMuAeT1pCpZs5yxPS\nk8vy2RTyepxziiGYNK4J3HnFADpM+YHHIU4IFOXhmYrz1HtTkk3RHYAgpq7eQylmA60wHqrs\n52kMCMkZqJWDKBnameoFH3k+X7pPU0jIWUq3Jz0pBYB5asBz1zuxQyiLdhtwY5z6VJujVWfB\nBHGKiH3cnljTAnEYhlG079w7d6ZgfOHU+31pz5VUKjA6e+aZ5hVsEbj059aBDPLG4DBU4yfS\npZP3TDOPmGM0EFF55bvTG3M4VuAPxoAR2VY9qHcB97605TuwWXgcGk8nEYbIBDfd9ac3yrj3\n6UgFjxGvydzU6xnBYcvnJ9qg3HeRsyB37VJDmRSScL9aYrlgbVPILH17VdjwsaksdzcCq0eF\nhAHc4+lWYI2kkDHmJeBmhiNS3tzLCqlsAHkVqQxsikoCCo4IqlppORH1yc1oqowUGQfSoZSN\nPTZWbGzG7uK29NVWk3Yzz3HOaydH+6zRgKT1rfsbfcwbdgn0rMs6bQ7f9zuDbzk/hXV6CrtI\nr4xt9aw7CNLeKMpyvf3rqdMkXYNi43cVBSOq0uCS6u4kQ7ssBivvT4Y2gsvBOnRpkJ5YIz1r\n4h+H2lm81qziOSHmAb16ivvrSrVLLSbWBD8qxgZxipZWxYy3PNH1NJzihmHHQ1mUGeOlO3Hb\nzTGY4xSqD3oEKrbl5GTT9wHBpi8HjpRIfakUSBgO9I3PIpFxjnijjPpQDHZGBxzQrE5DHimh\nst7UMRjnGaBIeGXpTOVPPIpGI2jHWj76MehFAEm7vmk/i60m4NGOOcUbsKMDnvQPoO56np2o\nz0o37gBRgHvmgEOJ4xnmhWP401sZBzinZCDOaAHbiMDGKFbqKjRmbkmn443A0AKW3NzxSfdP\nWmM2OTTwM896CR3VM0pOelR57Zo8zHGKRRIFPUnik6ZxzSKSDz0o6njimA5WO3kU9j8o5pnO\n3Bo9c0DF7ZNO3Z+lIqhhyc0q9cY4oANxxg07IwT3phJY5oDBmHFT1Acp+960qr8oo3du9IDh\nTmqAMdacrbfrTUYbhml3bsgDBzQIcNzZHFLxgA5JpudgA70qsMY70APLHsKQ9vWkJwlIrg8d\n6BjgOuaOi5PWjdt560jc9+vSkMSRgAPWncrTGwCGJzSqwakwF+vSnB9oprNxg0bcpxVAhWYY\nzjmgt0x1oVvkpCD96kMVmwvI5pWO7BzTd34ik2jrTELk54FIzfKDnil3U1lODipYxvmYYjtT\no2IXJpvlluc4qReAeKEALIxPTincspyMfSkyOM80m8rjiqAeqjb6mkQBqMbjwMUrMd3AoAee\nEz1pOdp5pvLY54pWHpQAgHBHWkwKXB6qeaDwKADdjtSsSUpNwA6c0L3JqWMVWDdeaCx2mkVw\nM5GKN35UIQ4ttUZHNJuLHpikY/U0u7jinYBdw3AdqP4j6UqsAD3puRj0oATcWzjpS7vlIo+X\n1pOQOlACiTK+tKrBh7Un8Jxwaanzc5xSsBIcFcjrS8nnoKQ/dwMGmLnbknrQA7gEU/hjnvUY\nx3FTxrleRTAWNSze1WYoWdiBTYFOCO4ryX9qf492vwF+HMuoRyQtq12GitIJCdzNjGQB6f0q\nkrkdT50/4KEftMLZ2g+H3h285kHmalcQtn5QeIsj361+cTawPmRYdwb5QMYxxj0rT8XeJr7x\nNrl3eXk7S3d2zTSu/XLHJH61zzTC3UuihmHyj2roSM5MgXMk0m/5WHTFUppArkBvl9KkkuWQ\nsQ20/wAWBVf+EMeRnrWtiL2HYO0mQbMcjNMRZJF8xXw2OfXFE27y97NvB/h74pXAkjQRcPjq\nD19qYEbNlc52jPQdqbJMOW24GOtLLny2P3D0/GopG28lNzY6rQSV4R5y4VeSepra0uYSKtsj\nJjOM55z3FYqwrGfNLE57CrNoogmja3bZg5J+tMZc1GzS0lMco2yq2Rjmst2MinPLKTVp/PMs\njyMZCzcGq83zXAcDanQ59aBjUZJNp2keoNKrFrogIzFhjinhXVnaRSgHTim2V0IpC5zkenpQ\nIX52JVvl28fhS8eXuj/CnXC7N7iTerfMG649qqSMsYLkkluBz0oAbM37vGwbz/E3asuRVUZL\nkS5wAD+tXLiUw7V3bt3GetZ9xINokJy68DFBIzapkxn7vUmmsp3gEbBjgimRsZoTu4yealVT\n/E4KjtQUAUr8oGHPQ+tKymWExsdxHNNZmbkZ/wB6hPu8cUwFjQx/MeRjkVZRSisEXBNCqJYS\nMjdjtToWWRgwbkdRQMk8sJt2t8/WnnMmWJwBzUSOG3nOGBzmrUWeHkH3hwKBkZkTa+OdvQ4o\nViep496iaTKyKflYmpJGLKGbA46UhDlkWOFuQBzg+9Vo7vG4Y3Oo44p24RyI6hW9VqLzhlVA\nC+/pmgQsWB9xuo5FC7AuR+6foR7VJ5IRVDMGY87VpVVFYNypz1NAhu0rkHo3RfT3pYYvJiA2\nYbpnP609wFkHlncrdjRFz88jYZWxj2oGKqElUUAMx53HAP41+lv/AATK+E77rjxFdQbRbrtj\nfH8RHv6jNfnh4Q03+0tSgTG+R2AVD3ziv3A/ZT8BxfD/AODulwlWW8uIw82/qSMgfhjn8a4q\nkuZpGsD1tozJI2Omf1z/APqp4XbjnFLDlySTSs27txWXU3FDDFKy+X160xG+UinZG05OT2qi\nh6kMpNHp6UJjHJx7UZ7UgFXO4jHFKM03cc09VNMVhAw5Hen7ulN+Uc96Tml1ESK3tRuHQCk5\n6ChQVbnmmMXI49aCPmoPzNxxSHO7nigYrD5qTktQ3rSA8+lAdB3T3oA+U8Ufe6dqMnPJ4oEM\n2+woqT5aKBGIycdaQrnsPeljG7nNG055rMobHGF4zkDpT8CPknIoUlF5oXcyknGKGPYco7jq\naVh0z1pFzt60K2T6mgBeBzSqwaTg4FHG7kUgxGvXBo6iELnfxzS4O3J59qGb5gcUfe5Bo6gJ\ng7dvekYdMdqbkrUmPlHamBGyinbO/al98U5ugI6UrgV2bDArSLJ1UjJqUxhuACaRY/LfigfQ\nfvMaYApEz3p0ny8g5zSZB7YpCEjY81Kp+Un1qMMu2kVuAM80ASrhckjikbbtBxxQfmXbTXXa\nOTkVQCZJXjgZpdpZc0K3y4xxTdxx1oAd1XBOKaR6cgU3lmp7MBjH40gELHrUkbDr3phTvmmq\n3JA5pAWSy9aiLgtQuWzmm8btuPxqiWPZjz3oX/V4xQuR70jMSo7GpGOGGbNO24Q9yaXg4IGM\nCk6LnvQA0qStG3aoxzTuWUY4FIzBD9aYDWU5HalVdxpGbJ5p4xigBrL/ABCk3Cn4Kr60xhtX\nPekSKuDTcjnFKOFOKRVKtnFMY7qvBxSFtq9OtJ/BnvnpRt7mgGLy2OeKchZVx1o3Dbimbjkc\n0MCZWP0NOnXzrOaM85U9s44pijzCCKlWRVZkI+8pBpiPif4laX9m8Q3mVCnzD2xx7V4xrTG3\nuJe4Y19E/HjTZbXxReAgqowV+hr558SR/v8AKnjGDVActdsCC2AwrGnzliFwDW9JbrJGw6Yz\n+NYztnpVIhmLdSfOFZTxVG4wuMD5a071irHJ61lXbDnGeOea0JKsihpCBxxmqUj7Q4znIxip\nfOMztuUx7hgYqCRU3ktyFGM0GZS2vH8jYx1qGSNzG0g4Ufw96t3EWFBUZbqAemKovI7R7SOG\nbmrGV5MK6uTvc8EUbQ2VTg+/apZEC4VVwM9KgkkWNwB68kU0Ax1CFP4gTyKhlmWaaQD5ApxU\njsI2IzwTkGh4xtJ4yw59aCSptVWbbknoDSncIgGYdeRT5lAUhD+NRTMuB64GTVAMaT72/lqq\n+SvknDZYHIqeTG4kr2+9UczD5QoIPUigREytHtLjJfgmofK2xsFIwvT1+lTs3IZ1JHTFDf3s\n7SDwKaAqxg5ODhx1B7U/5JEBZsHpg09trMxU4Dcn60jQ/dHU1QDUyoww3sOlQ7n8wN0Yc/L6\nVJICOVOJBxtFI0ISYnccfxUgIjIjNu2EHO7jvROmXyBkN83/ANapnXb82QFJ4OKhbZncWJA7\nUAMb958q9R2NRrH95s/NUiZbcSAgx1pvmKG2gcbcc0AIvyruZhntS+WfvbqBGg2j+IninLGW\nYgH7rZP0oYFdueT29aahMikhfbaalMayr3BY8Ypm0ruUtnsaofUZsWOPcrZGeKdw0bueOOFF\nIVEmBj5ulKVKttHzDpSHYbt+RDjtmo/LO7Hc81Ky54PUcY96ZK33SBtwOfXNIQxVO3Pb+IUu\nS3JIUZ7044WQBW3Bhk0khErBGGO+aYgQncRgNSBdq+5NHG0leOaBtXcQeT2oAGbbH6Gk3fxL\n8vGCKcoVlKngjnJ71HHnyyC3z9cUANbdIcY245zSegZueoqWONZQX67TTJGU8gYUnGe9MBEl\nbLDGD1polfIDJnnIJpzoQw3Hdj86Iy0z+ijue1Go0C7vm3yhCfamyF2bBwpAzu9aU/NwRuKm\nh3Un5wTgUwEZzwSecZpFZlK9jj9KVH3x5VcsOx64qLlZC3UYpMCRvlbCc1H5e5WA5YN96pOY\n1yD8rCmFgsZO3nPO2gQu/aQQDn+dKXZSSetLHtZSDlR2pNwXK9e9IBhkHltITuYdqTnywD2O\naWR90e8rjPGBRyykAdaoYOvcfd9qSXPGzp3xSFf3YwcGlGVXaB8p5zQArN0bHI9KbMAzAx9+\ntPjycDIpgQjILcg0AI0Z46ZpGkcyZxwKf97HGR60zOGYDoKAHKFbceFPc0K3O0j5fWmxneDh\neab95ioPPvQIWSRjnbg89KVwDtGT2PXmgwsr5XHSlZVdlDfKV6kUkAF/mO4bs8YHWjKMzL0K\nDj/Cj7snUlT6ims+442cUAESlIz5h3hucgZ20KpbgEZ7D196X5VZT0HoKSQ7sybsPjimAPEW\nf5iM47UD/WFhyuMnNNB8tQVG4txTjuXKkDHfNIBGbcvf5u3tRJiPC/eA5zQcjbgYJ9fSkZVY\nEZ4zSGKrYQszAe1GE8vrzSDZksVyAOKUFTjA68mqEK2Aw+fGF69qa+XVVc575pVjWXKkkD+9\nSOxWMjZtbdj8KCkIpx82OhxT2G7oMf7VBwzDv7U3aQpK5dScGgTHMq7flOSewpkijaqIwRzS\nsyxbMbmBO3AFOkVF5KHj3pAiONcZDEGlYiQknkYo+6uBg5GaaeB97BxnFMocrfLlxgjvSMRz\nwcN+tDEqgYryR/kUimSRgSNoHH4UupDEXLNg8bejCm7t8uGOe+7FPVWjchTw1NHDMSmSvUH+\ndMAkQtGDuwR2pA3y7OR3FOVv3bP6/wAJpNxkKkDBHFLqGo9f3h4Tbx1NNEh3cryKCx+baDxz\nSDMkm9evTk0xjolZlPYZyKRwTGf72fyok8xZAAcepoZmZhg57EetIQu0MqdWI70yTcqBy2VB\nxT8+SuFb8vT0qPcZBtyBjnGKYAATI7A5jxwPQ0LJuxuJJI6VIqfKG5EfsO9Ru2V3joDjpQAq\nrlOCCVOaIx8ykEEdmFJKivjBzGTk4pq7AzlCenRqXUBySMsjpgbO6nrSKwMTIB84OVzShRGy\ny8nIwaTyyfmDZ55UDnFMBcNgktlj1HpQy+WmDyBziiMoygjcARx25pBHyQCcBuSfSgY+SQuq\n9FJ6Z/lTdzOqlBhgcNSMqtuwpBPTPpSxr14Kv0oEIQqqS3GDnAoX5l3A8ngDpinI2GZdgZuz\ne9IrmbIMZKjr9aBCsBhUU5wefeozuYhulClnkUqwyG5/woC+XK5Y5Xt7UhgfMHG7jqRS4yM4\nwBSBlVQcFuaVNrHcxxnijUQ1mMjfLwMdTTdxCjs2ccdDTs/KUJzjpSjsDRYTFdWkwpOD1yKV\nY9m8lsKaZ+9UnaNw/velK0Ybgjgjr70yhY3Zeo+VhgL60n+rj2k/N6eho4aIMX2gcYoLLt3E\n5Y9sfrQMZuIyHyQvGaUMWU7T93mk3FlwMNlsmlT5ZH7LigBFQRfNncW5GKUMVyBw/UrSKWGF\nbAVuRS5XzCCcHH3qAYg2q24/eJzStJ+7YMOc8D0pHcLhuoxjpUq27m3knA3RxsFZqCCNshQQ\ncMeKMlfUEcZ7Uj7dyrg8dW96VVZ8gHIHamUKyjnPGeTTdqyMqp8pzk5p33sg53q2KY5YqzZy\nRwSRSAe6r5uVXIBx9abuzLjbtPWgSBI8HOcZwKGKyqpY7SBkGgBWbdnPzDOcUwyGVtqDac5z\nUhXP7wgYxUUihWWRfummAvEc20k5bnNIzZVgwy2cA1JtCklTknkGo1Zc5YksTz7UAh+51iAX\n73UD2ppk2qGQgk8Yo84I7jG/IwPSmj5kDbf3g680CHMT5gDA0rMGbAYs2aSN3kXIB4Odzfyp\nTlskEAGkMaT5bfLwvWnRyEMSrYJ6+9MLkdiV9KYP7uflJoGSx/u5OB8p6hu3vRIRtO0cZ6jv\nSxqiwlX5OeD7UMDtwhABOAKCBBhFBbk+1PXEkZ45z96mSRssYAJw3NEeBGS5yDxQNB91fTPB\nNDRjgmQMvSk2s0Z9V5yabtOfmYYIzQSOmULtAfP86V2dW2EKQeScUiglM8Z6ZpsituZlGQAO\nvemMFXL7se3vTyCnG6nK4EeCjbiOo6A+lIIx94AnjB780wGy7Nm3PDenWkUeZt2j5V7Gl2t2\nGfXA7UIwZcYIGeKkaE483zSC65xs9DTvLK7gx560m7DBApKjke9OkkefMjrz0oBO7IxhV3Bf\nm7U5WZmTcvzmkz8y7jgEY4pv/LQfLhl/ioGKrnBwMqex9aX5+jjcaV42XZgYHWo16ku2Mn1p\nkscrMGyQQy9j6U2UFuRhT1FSIzNg9AOMk1GsRdWJbnORQCHfeVAy+xHc09FVdyAYQjuelNLf\nxEYY0hZI1dhy2OBQV1GRxlCyZ/GpAoGGL5x/COtMQsqrKvEpGOaRuWG35SetIY/5l+5wD2pd\nojxs+YZ5FJH8o659KaoK5VeD945pkCsT5m5h+HpTlAZPukbTuzSMwwCzALSyMXGwnanfijqP\nUd8zsW6sRnbTcBo15+f+VI7Bk2g/KB+NG4ExnGTjHFMYiNhSqj5uzHtTMNkAHv8Ad9amONxY\njK9MKKYiAthOfc0CsOZgzBFwGPUnpTVjdMjGEPFL5fynGOOaVfmRmzzmpYxija21W5XqakUB\nVBAyxpjqNhC8vnJPtT1YyYRQMYzQIbvHmb8fNtxQ37tgAASBmkXjBGNvegr8+QD8xxQFwkkd\nsMOBj8qWRWjjUEh2/vLUsrbpfkxlVxzUa7y2MgHrQMJPmY5GDS/Mqluee1N2MZMOOp/GjaGJ\nRTtcHuaBgqjptOcdadubbuXtxgmk6lGJ6jBpkiqFJUFmzgjPamIkdfmVmOMjmkVC3IxjtSGM\ndDn2Oe1Ku5VDHAXpjPWhgMaMqxCnLEce9Jlozhk+bHSiWFcFmJZz0UcYFTNGGVRtKNjuaQiM\nbokAX5M9hUrZ8xQTs2jcV9aNpVRhsMOMd6PLKsPmwSf4qBDZcbTsPy9xjvS7fnjwpxjNJOrb\nhggAr+dPRWbaN3OKB2EYHcxIBXrim78ZGe1PeNY/4sMfyqNY3Ow7gABu5FMVtR6gtGSeO5Pt\nTV2sRs4Xrn+lPTc8LBjndnpSOuFjVQCFHOaY2KsY8w4UrnqaYwznaRjNO+0fLjHBNJkNgLxz\nk0MkcsMYiPOM85pV4IEeQMc03y12uXY46hadgqBhckrmpKsIzK0bbj+NOyI9hydp5BFIpaRS\nSgK4xihcoio5AG7IFAuorMW2sxyeuKdyC6MFOBzQwBlQsucHikkG+TLjA3YzmmDEkjXA7bQK\nZ5oyc7iwOBUzKIYf3nzZPBFRfek9ieKEBJt+YMTuz/D3pp3OrEZEic7SOMU5o0jO4jLjjANJ\nJjcQzAFh1pDQsmJEDbAGB45pqjyjyOWbigKZIioOcc7R6UqzBo13Aui/dx1oGKPvseN2ec0s\nfLEsM+lIdjvu+6uM803axC4BEZ6UCH7tuHjHQ01cNvaQ4J7gVJGq7fu5x19qYqj5o+hbpQIY\nztsAxn2HFTBixGMfUU2FUIKuM4457mpFPTK47bqAGtHtjZT8yKd30pww0Yb1pjSOu1cZB5J7\nGnsm88HaPQdqAGb2jB3HKZ6YoYfvCQOo6AVJJEF2kuDg/d9qNzrI3HOMAetAELA43A/hmlwS\nu9gQuelPVSGyF28ZOaiRjJMXBzz+FVYB7Eoww22ldljzs+b2pEz5hJwe4JqNm24GDuJ7dqQE\nhQHEhGX7UKUMf73Ik3dMUrR+WuQRke9IuQpaTLEdz1oAcybk2oxGT3pwRYmWPknOOtMVtzYw\neuamVv3rEJk4yDQBNtCtkHgHGB1q3buzN8y4WobVfMdfUjPSrVu4BBB3nP3aQWNSzXy5jsyR\n79q3IZdxGeKx4WYcvgZ5PrWrbLwuBnNQyjotMt03biNoK9K2rOIK67Tg9qx7Z22KV6qMGt3T\n2cbWONqnj3rMZ0Wj2891ciPPy469q7qzhFvGmACAME1ymhAqA+7BY/drp7dXmTd91R2qS0ey\n/BfTI9Y8SWC52ssivn6GvtgR+Wu0jJHFfJ/7MOkl/EZlkj8xIojn2bsa+sGYgY9utZyKI3BH\nek296Gb5femrxyeakB2DjdmnBtoz1pB78ikGMUdQH570j03PQ9qN3XmnYYbhjOc0nmFuMUir\n8tOxt6mpELtPY0qgNkmmbtuDin7h271VhoXjn0pGYdKTduIx0pG5yMfjUi6kit8uDS5qJfl4\nNObI6cZoLJGb8BQufxqIFuM9KlBB56iggGX5dxpvHrxUm35TjpVeTjgdaCkSqxWn8t3xVdZN\nrZNTZ6H1pCHbc8daVcoDzk0bvzpOe44pgLzjPc04kEY6GmbxwM8UZ4z1NAhWG7AzmlBK9sUj\nBcZ6GlQfL60ih3O7HWnZAz3NNRtoxSqvOe9MBACq7s09Tt+93pvQc0pG7nNJjHDHakzzkUKc\ndR0peM7s8UwA9z60hYbaPMB5obDUCEX7wp5Y9ajp3IXNICQnOPWlz83AqNWJ5xzQrHbk8GmB\nIX/yKFJ3ZApqsu3jOaVSd2c1LGPX5VweRTSwYg44o53e1I3TAp9QEwu31NJGNpOe9OziloYI\nUkM3NL264pF9SOaXG5sngUxIVFB70NleO1Cqo560dVJNLqMQMMcClb7q8U3dtWk3dzTGPAHP\nrQeF6GmowBJ9adySDnipYyJ3wuaS3ukm+UHNJdD5SBzn0rKt4ZEugVOB3oJN1lCmkAJbtik3\nblGetIqk/Si4DsnJz0pQ3qaY8oVlFLjeSelCKHleABR904PFIzbcd6a2Tkk5z0psZJ24pnmD\noQaPx5pMHqaYiRsfexRwQDTAx6EUu7IGPu1LAdkM3vRxupgjxIW7Uv3mz2oAdu7mm8g57Up6\ncUpBV844pgIuBQewxQSN1C8MS3TtQwF20ZO455FJ95cmkz0xS8wGuflznmm43qPepSu7rxQv\nC8DmgdhyjauO9KmMkGmp945qRV7ikIQY/Gp0Xco47jNNjTdyRgfSrSvFbRtLLIiRqNzMx4AF\nWkBmeJvElj4I8N32tapKkFpaxl5GbnkZwuPc4/HFfjh+1B8cNU+Nfjq61i6lMenxHyrO3LcR\np03D3PNe5ft3ftUL461m48H+Hb9v7Fs3CzeWfluJAQTk+g9PX6V8SalqEjRpCzo0GNxbHOfe\nuiCsYyZRkujHNGQvyjjJ64qO6kDRtjAf+EDvVbzBM29yRkcLTPOWKPafvc4NbmYkjedbgquH\nHWo/NdkGCCVOajjx5f8ArCgJ5NJLiNgOp65WmSSLMM5QESk8lhSiUNKWI2gDAI9fWo2kP2ff\n2Y4zikEm3txjG096BjDub/pqoPLVNHDHNZ3MpfY8fRfWoWkDI25ghHBC960NN01ptNup1KKe\ng3tgGkGpnWku5U2qWT+LNTRyR2dxI+zeANw/wqCOaRW2bRtU/Mo6fnS3EiPI+VZlVh83t6Ud\nALDWk8umyagJRHl9pi7/AIfnS2cYeMtK6hkHzA1Tkk8+NkVtsYbpUcm3y1O7OOvPWmBJczlp\nisb/ACAcg0Q4MhcYHYCqqL5XmfNuzzzUvluWwBwTkCgCf7RHbwuqAbm4P1qqzGOIDKkEZCkV\nOysFKBBtP8Y61Q1DEbIC+cnA29R9aWoFOW5IQcZX9c1TmXfxuxjmpvOVF2tyAcFscVAzKuYx\nyG53UySUxqyqd3HXNN8ssxdW3KeKihXdIUXB7CpW+WNlPHqKChcnzChfAUfex+lIqtPtxhWJ\n5HtTgN0aCNvkHJPepGjCrlM4bqaAJLeESM6spJAz7VMf9Sr7BvU4yPT3ptvmKNiWypG0VL80\neWj+7jk0AO2YOdq+ufWnyDcwIyXbselRBtzDnbnpnvTpG8khj8xHUUARthmCOu09SKfGq7cD\nk8nn0pMhGDk7yeg9PamM7rksdoHp/KkIbGgkjZQRvzu/Ck8t1ZI9q46+9N8zdtIHluT0HenK\nHFzy3PpTAe0ZLLkkFe460NErStknaRnrRvHnOB98H86a8czNtZCued4pDYRSLGyx4IBPXHap\nSjzKxblM4FCwzbc7lZV5z3Aq3YWs99chIULeWwDNjg1nKVkNK57/APsh/Cubx78SdGslh88R\nuJDIwIXj1P0r9p7exFhZw20I2xwoIwAMdOOn4Cvir/gmp8JV0fw/eeLLqMiSb93bgrnHGGx/\nKvuNhtkcg7snrXEdEVZDYlKw4xyaVvu4pWk6AfSlHAOetCKGr0GRStgLwOaUUfhVD1QI3zAk\nU/7x6VGGGT60/wC73qWMF74PNOXK9Tmmr2OKf97OBVDEb95wOKF+6PWjaSvHrTiPmxQIUEt9\naN2KbnCk9qTeG9qARIexpWO4e9Q7u3epAccZoAGG5cHrSRoV+9zSrnnmjPze1AxFPJ9qduFH\n3c0mfamAZFFJuHpRRYRjKfkBxTt5PWkDZXGMClVgxNYjEJJ5J5p/DdeKb6nHSnL84BoAXhVp\nm4qwwMillY8Y5oXIXNBYpzu5pG/Ol3E+5pNu7ntQQHLY7U77ucdKTy/mGDgUp64pgNbHQ9aC\n38JPNOdfl2/xU3A79aQCeYW56CpF+VQSaZIOy9KReOG70AShsDPSmsflJHNG8dCeKZGwYkCm\nAKvanK2/OKVcAnnmkUnJPQUuoCfLt4HOaU4znbTlA2k45ppY4OKfUBxXcAc4pgI3HPQU7eMe\n1Iv3jjpTAUnIyBScL15pQoP1pNvzHJzRcQ7b82R1xTCrAjPApWlVe+TUfmhj14oGNk3Nkc0s\nMbKN2aeuWPHNSKhXORSAVmxgqKU9yehpu47DxTlY8A0hWEwV9hRkbuRzSeYFbB5p/wDCCe9M\nBfQY4pT8tGcD2o+8ppCYmRjihgdtIAQuSKGYsvPAoGhM7QO5o3dfWhm9Bmm+Wzc/dp9QuOkk\nKqOcVEJjng0/aHO3HNN8k/hQSSfwgZyaTkP6037qggc04fNls8+1MaDDHnHNHbnrTtx96Nu4\nZ70rjGAYNL0XJGDS+uRUbNu70CHBmDZHAqbaJMH+I+lRL1HFTRZ696YmfPH7RuniPW1mxzNG\nNxNfLuuWO6SQbhnrX2T+0Vp/2jT7W62lQo2lsce1fIXiSE+fIRwBwfeqA8/umaPeA3Q9qyZF\nAyP4jzW5fRHziAeKxrpsKQwAbnFUmS9zGvl3YU8HPWsy8jCkj+EnrW0f3keWHzZqjfRrIuUP\nIPStCGZ32ZLiRdzcYx0qleWUMLbUY4z82avybVbOckmq0luJJmyx24+8aCDLuGCnHPtiqsyN\nMCei91q7JH5UbbuR2qnLmONvmwc9qpCGriRRhlB6YNZUmY5ZD3BxtP8ASr8kap+85U1XAEkm\nwj5c5HtVIOhWXLQkt68g9qQ7eWPUDrUsn+sOFyOcrUKRlmw/A7UxFaSQSfdOPVarNH5g57Hp\nU7wDkBSSWwae0PGQMBTjH9aBsqPkt0+VeQPX2oSQqrOOSwPDdR7VNtEkmATjscd6ZcK0YDBV\nZqZJGvmyKqt97GahKbQWON2cYNTiZecna5602QhozkZHegCvJmPjaMntmkkUsuWbIx/CaklV\nMxnduPtUW0lywG2NuuTQMbtZWVicqwxgUNIfnG3KsMcc0jK0a8HcD701lBXKHG3naDVCGOR8\nqZLAUSIoOY/u989aeg/ebkOAeoNSTKNzJnHGQR0pjK3mfKpIzzTsiXCsAG9agJ52uSBjII6U\n44ZlYntSELtPKn5sHil3GMfK3uacsfzF1YBRzz3qFn6knPPSmMVt0gLIwCqOBTJlO0Mpznk0\nqx7vlB2nrikYFTkrxQwIuOOdvOd39KkZjtJQ7XP8NJNhowFXkHORRIyMYyqk+vNFgsIzBvv8\n8c/Wmtlk2jhcZzSzb/ujBP8Ad9aQNuYDBXsRTsGozy9/APbrQMsvrjilb5dowUx1pzsojwoz\nnvSAj3FeOB701t24Dj6inbdyqqrkZ/KhuWYYzgfhSAF/iDLhO9MRt6khdvpQu7bycg9qU8YL\nHtx60AKsYXAYbTt9etRBizeWR8vX6VJzJjd1H8VNMm8/KNvrVAN3J5uOd30p/wB2Js469Kaz\nOynpx1pyq3zgcnHyimAigqwIYE9wfSjhFYcs2cGkb5cCTBcelLkrnndnt3pARserKPm6U6OE\nnacYXvimL8/GcN1wadFIoYJhvm/i96BDzATGRjcF5AqvuxCW6e1akcKNkklTjBIrPMYjZsHI\n6CgBsiq6qCSp60MV/h+8fWk3MxGR07UkmY5F3cljxTGODFsggDimqrIdpb3zS+Yctt5PQg08\n5ZFY8CgBkkLsMMcDHFGSu3vgYxS+ZvwWbjPSmv8AKwKgspOCc0AyNtrSE7tmOal5VWfjOenr\nTGX5dpTr1yKdJs3jjmkIRMKdnQMd3vSrhurAD1o8wrxtCj+9UciqE3E9T0pi1H+WV5Hy/jTW\nwzFSuFPelcKcFDk9valkRlIyPrSGN+62C3y9qCw8v5h82elLxuAxkHtTW2+ZktgjimAu4bXX\ndz1qNmG3djCd/WnOBlWC9eKYhZS47A0DsPjwnLcp2pvG4sRkelEcbBt0j5H92kbCAsFyAaQD\nlPQ9FpFYfMc5zTpv3bKoUncMk0iSGMNtXjpTEJuLruIJPQUp+dWXbtwM0f8ALQKwxnkGhRu3\nHBVlPf0pAG7cFUD5cZNNyG+RQd3X2xTmBCkgfN1xSb9q+YOvTFMYuGjZQvXGSKCxPA4x1pxV\ng25kbeRQo2BsjqOKAG43LhsY7UnEq7c7GB5PrR5fyj5s/wBKd5gUnv8ASgQm4RvycR84z60y\nRvLRVbNK21k3N8xoaTauXXcelIAZQAuxxxTdu7kEHPf0NPjVY+g3AjNNEZjXk/K3SkMPOVWV\nZMhCME+9Cr8xI+bsD7UYI++QAKFyz85XPQ0wEOHXBY46YpsXmeWRnODgE+lSKDuOec8UOp7n\nC0EiSR+q8+vamqpzhThhzTsb1A38Uxo9q8nIzg4oKQbnEhOc+wpGj+QqCMMc++afzt+TsOBQ\nvyjIG84/KgOo0k7gzEk9CfSkcrnnpjBHrTo1baRnnuPWl3FWOV3D27UFDRGJDkjbtHFLtLbS\nMZ7ml80SMQB1FIOny8np7UEh8xbAOQDwKaVbdnZznn0qRUO7GMDFJIwU7wSTjAApgRKU3Esu\nTz0NKu1u2Fx1al3Fm9TjsKRnGQrDBB4xQTrcRW/gB/2h9KUMVwQPvd6cUMmW4V88+9JuKyAN\n07UtR6jVX5EV+CrcYpWXaFUj5vU9KRIy8fzP0bOKUjdGeenRaYCodrHcxz70izR7mLHc+OMU\n1mJIIHOOSe1IqhWLhc+9ADlYNgg7TT8Hyyqt82aiT98hYnbz92pcFMM5Hr70ARY8twyjORyB\n2NN8sLkux3DnilXnkDH+1TmZWjY7STigAZy204GO9HDKQB8w5xSbj5SgL8x4pYydzZTaQMBi\ne9AC7x8rA9f4fegsxyHHXmo2b7rFdv8AjS7vMt2Zs5zigBdxJBXlm4pzHK7R87Dg/WkEZ2nc\nu1V53DrTY2jVd6Zyf1oAU/NKF2gKOp96JsFvlUk9M0rMv7squBnketIrNl89zx7UAMfLLgfK\noPOB3p7cKNrYQ9aSWSQMikZHcAdaSRQikYyTwMUFIRog7EqcheQKXjaXxjtSRqQpKdR1zSu4\n3MFb7v8AOkxCMrNEdvEg6GiTKwhPNYb8EjPBNOdSq7/vMfSo12/xNwelMQ5sZBwSVHI9fek3\nspJXnNPVhjvu7tSKg8vcA3JxtNAA7fMSMq3U0clVAywzzSbWXcpbIHIpq71XIJz1oGSqvls5\nypDcD2PpTfLJYFgGxxik2LtYscLnI+tKeFXuW/iWmK4yRdmWU/MeM+lL8659V4zUnlEKWDKD\nScqxIBIbqfWhk3Itw2kfe5obG8Ybb6cUv3d3THTijhmIB4xQAvCLucY5waTBjDKhyW/iNDyb\ncE/NnikjUSNuYE4oKHKpwE9OT6ULCW3DPHU07y2ycNjd2pv3F3DOOlIY5Y/lOT0/iqOHCthh\n8vY1IUG4Lu4f8qbIytJnGAPlC+9AgJJycYHvTV2567CaVVxktyhPTvTWUecSRgAUwHspU7d2\nT1FI3zLkjHOcUKx24HzOOePSnbfmPzcgZx60AMLbl3E/8Bp8YBX7o24pu8sWYKBxkCkVdy73\n/hGcUCsOyyOQgxgZNJFIDJKXRmK8oewpN5VVbOS3GBT3BVWL9+gFIBisW3b2298U9ZH2gCQA\nE9h1pg3cjZuPUmlblQB8p6igAUSM0ighccn3x2o3HaeCFzw39KGz95jtzxkU3cxjKFsLncfS\ngEOiLMMd88VJsVZA7jHqO1Rsx2ho88+tRkbuQSrf56UDF2nc393quaczKyghckjB56e9N5bn\nqR1pqKWZtpyPSgWwoVlGCcnpz6etLnB2iMNilVWRd7YHb8KNwEhOcrjr6UADKf7u4n+GnKys\nfkYOFHNH3cEnJPSnbkU/LHnjLGmLYhXbI+/DBc9Tzin8ffKgHpSDYoErEgE8AUFfM37BtVec\nGmAM3zCQrx0pAp5JXA60rDcVB+9jIFNLN5nOcntSC4q7dw/h9jSiMRqzyfdJ6ij/AFzbn+TH\nemITuZsZHbmkA6NiuUBGD0Uikb5dpUblxz7GjeG2BOGU5JpVwFY7srnoKZQ7b8u4sCx647Uw\nrgoU7dx3qSM7cnHXpTWCnaEX5yeaLgR7HVjkgZPNKzeWxVMnPepNudpbCvmmOu0Ng7iDnI7U\nANj+X5icnpipFVk3biMMc4qNsFiScFhwKdGoLbScvikA9VKL8jABuDSLGqDcx56fLTtm1c5w\nc8ZqOMs0mQec80CYp8uRjnKdttLlwNoHC035tzAYAY9D1BpwXbNuHYYOaYiPzNp3dO1K3z7Q\nOeOTSggK+V3c02PLAbgRzgAUDsTfvGZMMCF4UVFsbkHh93U0oXKtgEN0pGJ2rkH5f50wFhUe\nW29sMp5xT5PLVQFyr9zim/fjMgUfexzTSTksBk0AOjA2kqxBzzkU1ikmWVcZ4p3mbW6c4oZv\nX5Bj8M0hjkVWAYE8DkHvRI25VKZ3LztaljGVbdx2K01QwZcnHoaRPUcqjd94lmOS3pQ2ZmA3\nYYcn8KF4YqzYKnPFIpO1iRlz3oDqOeRW24XkdKaWIO8cjHSnJ84+VcseAKYF8pCT64/GgYm7\n5UVhkE5Ip20AEPkjouKTa2xmVfm4FTyMySBTyccUDIlcsgwcEfLT937vDLlu5Hek8vyVVmXB\nJ/Wm+YFk7kjjpTJYu1dxIGAP4TQqlt5UbVxmlYE7crkjk57ildQ0Y+bY2cj6UigZQrLnnAzS\nDPmtncgK5z6Um47g+SRn+IdaVcMpkGWOejGgdxfNLQqSceoFLGpZiwUtn/OadxGCZPuEY47Z\npEjVUXa+SCAfpQSOVS20pnaG+YmmyACTcDmPPSkkkxnbwAccdqNwWPJGTQMXe3lg4yB1X2o8\nwsAE2sp5DYpqyBsZG1s/dPNSFlmKjozUCBsxxrG/DMcg0Kq8q6bue9IJByrDcytinLt8w7if\nm6+1Ag8tc4GRnsOtRBX8kbQevPrVhf3cvynbxkbu9RrvxnO73FBQzcyRlVwwbrxSygxpHsbh\njkfSjcAcZ/4DRuDSFAOvOfSpJJF+XKsv3j260FfLkBJzu4GKRG8xGy3PY07I4Zl+UCqARgWB\nz/D6VIyfuQ2dvvTBJtlDqMAjFTtavtzjIxk0AQQgbhkg445PWmzP++YIrHjripFj8xhldpHY\ndqdEpaNuT1xQAxQPndgSuMYpqsXVFD8j9aWaNgvLYJOeKSNj5hOzGOlAC8s205Ud6jYBJiqB\nsEVOuWyXyeM0xFLR5zjJz9KAsOELSQ5woRT94mkBLrgrj/a9aT5o49ucc5C/1pG3zNtAzj9a\nYDY1G0tj7vekVjuVQwZevNPCsVYKNoPUUoZAmwryO60C6jl3eY4C5HUmpIwFZdoO09Se1Njm\n2shzgkU/ayqZAwNAEokkjY7G2r71fsVjt8BjlsZyKzo/mVc/frTs18tgGHzHj2phc0bXY2cD\nLf3s1sWjCHb8wJx1NZWm7PMIIyx+Ue1atvDumCBcnoWFZMZ0WiNtMpGZDjit2CGSZUGRluo7\n5rO0bZCCoXEh71tWMZluEiaPOTk4qConY6LaGG1jbpnHXrXWWcgM0SIu7kFhXOaa5jyJOeMA\nV0GjW7zTK4UlfaoLPrX9l/StseoXhPyqAuBzz1r3suW6j68+9eWfs52JtPAMZdNjStnPc8Cv\nUJG64PNZS3sMTOKUk7TgUzdz7UCQc84osUP5ZKFbn3pFal96QB0brxS4VqCwbHFIF9aAHqVX\nAxSnG7PWo/u8k0v3uRTCw5vmFHVQAOaRfU0quOcUgQDj6Ugyx64FHPXGDS5Xp3pAKuO/Wjf+\nNJtznnFN27VGTzQNMf5nFKvrTc/L60FttAyTfzgVFJ1PrQGzntRuG3HegRGTxirEbdM0wINu\naVTS6iJt3U0uemTxTVpM7geOaYAxC80K+48cUxl3d6VVKsMUFEwX+InNKp+Xjio9xxjvTl68\nnBoAeB680vmDOO9MZjuoLYUZHNAD1b1peuOMU1sN0pefXigBy5B65FJ97g0Ckbg5PSkMfx0o\n24qPo2QeKViemKYC57mjfuFDY2g00/dzQMcrGnkjHvUSvlsDkVIrBu2KBB90bqez4wCKQc/S\nkZs9RxSEDfWj8eKbKpzkHAqs0xQkYoKRb3DJI5o8z5qiim3HpgYqXcCo4oF6Cs57VIuCvXJq\nNVOQD070LhXwKQEoYLwKMhhimqPl5pB3xQA7BYU30zQPl75peWXIHNMobxup+Cy4701jjpQA\naCQ2hl9xTTt4GAD604HA601o9xBzUgLuyTwc0/ftX3pyrtWk2nqelA0RON2CRShvwpfvZ7UL\nnb0/OgY/+H2pd3txQrfLjGaaoPU0wFY5+YClHbmk+9kUgXncetADtv40Z6AUgU8EUvKtwKGA\n4Luy3akDDpTVcoDnpmgsF57UASKwU9c06NgfvVC3K5UjNLncBximgBnG7pzS7h35pj+nU01V\nbr2oAmznGOaX+LJqPcVqRsYyKQCbt3ahcn2pv3cYHNP5VeRxTAI2DKT3zViIALmoo8elWreM\nyZGOf880CJYYQ+ATkV8X/t8ftJWvhbSx4M0K8P8AaMwY3kltJgxjH3TjvzXqf7Xn7T1j+z74\nO+zWjxy+JL9DHbR9RGMcu2OnfH0r8fPFXja/8Sapd6lfztPe3cjO1w2SXJ69fpW8Ikt2KWra\nk7TZLBnJ+Zic4z/PrWXM7O5ZeQvAY96jmKfMpfBI5z196rrLtOVBKHC8mt0rGDdxP4SS3Pt2\nqB2kkkUnkH5c1ZyjKwUjd0xUTrtVAG3q/A7Yq7CGbCzCEr0OT6U95DHJuBXHQU15GWQl+QOA\nR3qKWUHbHImxs5H0oYEr3BEDKcHuFFVmnG0H7rEdTzSiSJZGZB143UxnM2MrwOlACSYLAH5z\n/Fipcv5ZRGLRZzg9KhjZUkOQWNSRyIIyQTjrtoAljaKRgqhldjyvarV5YzWrFZdqORu/CqNn\nNvmTYcMxwGqaZZJJgXm3rkAljQBE3lSR4wVLHlqda2kU02GbKkcIKnuJN0ytEolRfl2+9UW3\nK5DKY5M4GKAEZUaZ4yjDacAVbXa0LFoyFUZLDtVK4kKHO7kenJpy3huLd1D+VHj5gRyaQi6k\n6/ZXYt8qkDpya5+7dWmldAWRTjmtq81aJNNSCIbS4AG4cn3rnriJtrRANjdk45JoCxBIp2gZ\nG3+7UcaqGPYLz9allCxsSgZhj8aZHDutmZ2YZ6cUxDY4yJC6rhX9+lSiRQzYyTjaajZsKoC5\nPtUxjEduQThj6fyoGghRo1Kj5RnANTRxuuGZgR3Woo4g1uWc89QDU8mzbG+7AYDGfX0oKJwB\nCD5gGOwo+9Giq+wA5/8ArUvlHdgurE8n29qduD5DAbx0XFAhjBXhYH5m649KWOMOUG4u6dU9\nTTk2xyAkYdhjFO8vayDeG9T70wIhlvm2/KfXsaSQ/MWCHG3Bb1pzKV6yA5OM0SKVUr1HUnNI\nZBJiSJNqfInLGpNwlmZYgcqOaXaU3EKQuORSeSJOWJDNx8poJAQ7WEp6nq3YU5szTnliq8Bc\n4z71HtCKyk75M4K57UN5k2AT8oOdq8HFAx7SiPK4I3cKfT2r0T4P+EbjxN4gsrKIO8ssyqRG\nOuecV58qi4uEXbt6EM3+f85r79/4Js/B0eJPHT+IbuDfa2IDrIR8pIGCMdOv8q46rNYbH6K/\nCTwPB8O/hxo2i28Kw+VAplRT/GeT+NdbyefQVLcHcWweM+uT1qHcdoz0rKx0IRscnvQuep6U\nnC85zS9QPShgBk3MMcU9m4NNVl+hpccnjNMYL2OKfwOSKbHnucUM59MikSO75PSnbgVGKaq+\nZz2pW+XpTKFVtuD2oZtvbJNCsWXBo5NIQA54FIE25yOad3zilJO0mgENVRmlK8k0K1G35snp\nTGC8dBTs8Y701sjmj7wBxTGPBxSZy3PSgdqXd7UiRPlopN3tRQMww3TJx7VNtBOBxVXG6TPa\nrQ5WswFaMbcUEE4C8UhyRtpcngd6lALx0NGaRvl68k0KpKkk0+o2G4Z9Kbyq9adwVxjHvSjk\ncnNHUQgPtzTCTuqQ56jrTR3yPrTAdu/i70gb5csOaUH5TwMUgcswwOKQAo45NBG1ee9Kw4pu\n4jnGaYC7QvXrQpC5JAzS7vl9+9JtyxI9KQCcAZI5NLuBXGKM4xkUueeelIBw+YYzxTV5GB1p\nTiq8jlGJ7UwJGdTkjApYj83WszzD5vPPPSrse7vwO1AFpm2g8c1HnqB1NIWLHpxRyckY4p3A\nZ5ZXqOaGXCgEZqU5fvzR5Z9aBCxqRjaaexxnnJpq5Wlb1ouIO3JxRtG0t1NJuDcEZFJI37sB\nR3pFCrnuARTpXG3GOaapLewpWA3ZPSgBAzYHFSbqQSLj0qBmHY0xE6k85NDNwoIzURb5Qc8G\npFbODQAD72SKQ/lmgn5uvWk+8eaYh7Lt4B57Ui/jmlchsY4NMXdk80mDF+vSiRtqErwKCwAw\nRzSHkBfWgENjY5yTUxNJ5e1eMZpwVtvNICPd1A5NCqPSnKirk0px1AxQAv3TkipY3GVGM1CG\n3U9flzjkVQzg/jpp5vfBEzAbirDHoOepr4p1+2IlkDfwkj6196/ES1+3+D7+MAlWQDA9ex/P\nFfDPiyLyZpgxztYjOPQ0yTzPVIgzOV4WuZuogeSce9dVqTAs6r0Y5rnbwb4XVV5z1qkTYy8l\ns4/Osq6kCtgrgk4yK1mVsHAGPWsm6kDYUCtEQU23L14GcfhVaQeWG5LLn7pq/JDuiwxwexqt\nOA0IUsCRwDVEszp2EwIA2kDpVGZNynDALnJ+vpV1o2jYSEY7VWntepB+XrQIqSxttGfmHr6V\nUkJVdwOOcVedjjYOneojHHI65ICrxxVICjJGWXJ+9ULKRMJCc54PtWh9lDMxSQe+6qckarkH\nIHXPrVisQtnexXLBT82OwpryMqlxgq3T1p0yhcSB9meOB1piIzI5XHHvUgRtJ5ceHwc9MetM\nUGSNlcY288U1lZiBt6jOfSnSgxlWVuwyD3phYrtauzFscYzTUU+WecnHQVdN0I+eMEccd6pM\nGDbifLB6mmJDNwO1guPao5F8zhhtWnybolGcHH8I6kVHtZlLA7hjOzPQUAhGRZI9oPXjpSsp\nihyyqqjjNOBYDp8ucg0zAkUg/OM9D0qgK3mhucbccH3qRGb96SvyqOOaQx/PnaAey051Q7jn\nDHqKAI3RWjOWABHAqE5XAPDqPve1TFFaH1Yt+VEkgwQRnb60AyNVVlUM2Qf0pI1XeyqPn6c9\n6ISEAYjmkOPOJP3uoNAglVxyOGHU1G0gfbknB9ae26YlgcAjpSxruYZHbpTGNaLLFVGAB3pk\ncSBR2cnGM06Mu2Wf+9gU+RomIcrhxxn3oAi2ngE4pm4szhckDoaWNht2kEkHk0RblydwVT1p\nMYFg37xznJ5odo9xG3gDIIp3DcEAr2IpmEkY4zjuTTAjU+TGG65O7FK0m1Sg5Z+QfSl29T1A\n6f4UmC/zFgR1+lAhu3btwwPFEg2srHk0qR7VOcHPIFMf5iMnaM0gFkz5bMBgdaTnjsG6E0rS\nZIVW74qNkwAXb5s/d9BVAOxt4YcUsbBRKpBBZuCKGIZshiQeKVJNuT1OcAUCGs3Uqpxnv60s\nn3s9+hpwYM3+yOv1pjYXJz+dAhCx34X7uMURt+8VfuqTy1CgbgrLwedwqNVeNiPvkn7vtSGa\n67fmw25eg96y7iMJIQPXJFTRTZB2HpzUUkm5i/rQMZt/eFifkA6Gmlti7ivSlkkLBeMg96Rd\nu0rJ61QCLtUZUHc1KzEZU0/cSOOoHA9RUSqOcckc59aWoC7jGmCMg0q7vJ4OPam4IGGGd35U\nFA33D2oBjWY7ozuY87frS7vmyOcdqadzMNp2ledtG5cj1PWlcRLztYEe9NKdNvzA8mkOdxcc\n9sUqr/CDg460wDanzFS2Bz0prc7dztgjNSjYq+WvPGGxUcjBVCgbu1MBAoWTAO9SO1Cqi/Jj\njPPoKj5XAXgipSS3zBgvY49aQCLnLkDePX0pVCMv3+feol34YNlCTT1jGcdcUxrQdKw2Ftvb\ngetRsy+u091oZlBw2cZ6etHHLBep70uog3MG4Py0ZKqA3QnrTFYdHzk8Cpd25wM/ItMZGv3i\nWPTpTjKcg4z7U1ZFbPODnjNO8w7wQBg+tSIG3RtuY5z6UvyqvALjvijcdxPQiiEGNCN3zNzQ\nUOzsXOWPv6CkkCqo53A8ikabAVc4XPNI+YycjK07iF8tmYfJk9qOdzZAVqRfMjYtnIHOO9Ny\nZGOe/wDFQIcPmcDAAxUbsZG5yMdqezEqCGweho3I0gZidwGAB3pAMyFjYfMM0pYeWuQaczEn\npluy0oYj5W/I9qYDQNzcncOwNG0FTuGfUUhXdhlbAU0pJfdsPDdzQIRlHIXg0vmMZI8n5scn\n0pOUhUHl/X1pTkqoGAvTPemUJ8sm9s/dNKo2xkDoec0nDZB+Xb6URsDwB+dAxWLBgyntyKi2\nkqATsbqKWRvMIAbGD2pTlkGTg5/A0hCNu4Udc9acqsu9c7Q3rSbvuAcgHpRJu+Zs8D86AYzy\n3XGRgf3qk8yP+AZ44pOgG7ODz7U05bkAKTTAejHdg53YpDJyMbQPQ9abNIqyLnIccGmuo2ll\nU8t1oEOjA3OgO3cOGPY0mR8m7PTg+tCqCxxzgcGmKrSAksfw7UASI6vIxyQMcimyMS3Q9MCn\nSKOMHDbeTUbcEMeh60IAZ267Rnpj+tSKNuCTyeOKZ8xYk4AHAB7CpNyxYOBQMjk2byxJ3r29\naN37sFTksenpSyMGkORjI4akjI2/MOezUCEViSdw24PWnNndnfux600kgEFdy460dVVAPm9a\nfQBkkhZQVHzE4zSrypLP847etO4d9uMbec00sH+ccEVIEn3oweDx0HUU1Qy9DlCO9OXbEpYc\nEjJ96btAVS/C9aYajvmdcM4bjoe1IQioFztwckUnDNuXJDfwkUfLuXcCG6YoADN5jPht2T0x\n0pWVW2kHYV7etJxGxXPOOwpG2/K47DmmA5mXJOT6j60LJ8wc8HFK37xlA4X1pq4VuCDzgVLG\nOGfKyGzzndTUZ4/l6nORmlRQyvliqikZxEindkgfWmMNp55+tJGy7mGOD19aFYNx1PWnMzLg\ngBvcUCI4+MgKQw9fShZQFVzgrnGMU+MAKTuyc01VMhOAV28jI70CEZTzsOQefSn7trhsszY+\n7jgVH97G7jA5FJG4bGMqM9KAJOGwiDczHOKSRPmbDfdO3HpTmkw2TlU6bR1qJmbzTx9WoDUd\nIQvynHNIzkqo6KDSspk57Ui7CAOpz3oEOUq289OOtCrmDLEsetAjwjBeFJ70gjZMKWBPrQLY\nTlWA2huM0FlZSxXAB6UPhXOQVAHUUn/LMAt8rdeKYxf4NqYBzmnc8qhD880mNoCoMkUpjVcl\nDwTnjrmkFhzsIyGb6YBpmDgjHynnrQAN3OMmnqpmcBj93pQGxHtHlrubBzgU+SPLZ79KbtXc\nTnkfw0NhsEt8o70DGney/dwR+tOOWYK3ysRjPvSSbsKwBAz19aQ4+Zi2TnjHXNMY7EgbjaWX\nr701izqyYEWOv1pG/wBYGccYwfc0GPcQPvL3OelBIHPknnJU4pNxbGenTimq3yuC+ctxRyrE\nKp292NAWJtzL8rEEdqZtk429jk5NKyiSMAdulM+7HyTnOaQrD42YyNt+T1pzHzMFRl+hFJEr\nydDg4yee1MaRGw25lI44FIYu4/xj7v3cU5nBwG5B6jFB688L14pWfLbiPpimMRSdrAH5Rxg0\nOpZhg8Y/AU2RgoIc4Y80JllzkbCKAG/fXGTk9xTosq2RjpQxXcqxkg+/Sjb5ib85z6dqCWJu\nO3d94dxR97B24THNIIzgBMg5p/KSZz8pOMdqCgDBsNtwtGOCFYE+tO8sq5btSPEFJAA3EZpi\nEWQNtB2nHVaG37sk/hTFVJI1YuA+Og60vIX0btTENUBuXGWXvT1YOw3HGKQruyc5YdQKdIyt\nsCjLd8dKkNhuxQrb+S3T0pAxO04wq8dKdlpNwAzznbTmZ2XZtCgDIFADOrFx8qjrTvLFunmK\nMhuSvpQxVo04wV5PvRIzKzOX3bhjYBTuUKuZFIP3Rzupka/MMHBY9abztwOV7ipI0Ro9wLH0\nX0oAa4IXdndtOKdIq/KfxIphPzBex9KczeTjPXpmgBGTdgocknv2oVtgLFSG6EmnrG/JBGM0\njL99N25qACRVwpY59KI2QblA20SbiFwvQYpo3NIpZTgDB4oACrbt55xwKTdjLPnJ607zQW54\nC+nem4+ZihzIRkD60hWBc7gG47gUnmbS5K4Hb61J5jFVVuCo5A60yZfunPB74poVhEIkwSSc\nL9MmnKxVVZRuz1z2p8zBlCbcRL1PqaYzFtuzLUih7KNxIYbW+Y4pnK/d784NP8tfJOFwwpqw\n/dcN/wDqqkSxG5U7cHPejeoGTzjFOVQrZKrs7Y60m3KkoO/3Wo1GC4WRiejfkKXsAzBMHg+t\nI2V24TB780sm5VGV/eDjmkLqK3+vPzKykfeFDb/MyflIGNvp70kgDMMexxSspfeo4TNIYkeV\nPyNk9mpsa7nO9cr/AFpW3ySqAynb780OuVI5B70CFjDbdob5x0HqKkaTew4y2ck+ntUTMHjU\nMcc9alc5Y/wso5+lADGXOQWyT0pz7tmWHQY4pittYknPpT1YbstwMce9AxsjKjBk3M23vTlZ\ngoB53DkdxTmkBXcXx26UzllBMeSRnrQMXcI0QbiTu4PakVTyWX/6xpyr5mCWyo/DmghlYKp3\njuaAGKpyQ5yp61N8sa4ZfpimMo8xVLZ560rZVyxbcq8UAG1SqOvPHNCs3LDg4pGx5QJJAoaN\n4wGH3WGKYg84jDDDDp0pwxsDEcr3FRqo27A2CDzT1+bIXlc8j3pAI4/ceY3yAnGPU0q5dcD7\nuKSfcVBLfKOOelSLGnlw4fJ6sKBDm3fKzBWAGM+lNVcIBzgnOaRflZx05xVjaBKMLyF+92oH\nqRLH5yErw6cj3FQx5aNgRyeRUzDbIHyQWoX5MkNQISPaNgPB7cVNMU8lwefUUAOVVCvX5g3t\nSvkD7u2Mn7x70AR2sIe4jQjEeMjdWjLII5Cw4RRyetUiSuF6H0qRZF5cgrxgGkBAsxjZgq7C\n3IpB5y5G5QrD9anWF1jWTG4McYNRiMQ7jyzfypgRt8qgE5J4P1pHUqygEsMfNUuNnBGB9O9N\nhtlh3BpfnzmmMQOFLK7bY8fd659qTllC+Xj/AGfT3pZNzOELDdjOPamxN++JJOMfdNIYrOZJ\ntuM4GCKFj2ruBwOg5pwynmufkAOAO+KaiSKOBgN03UEiLIEQHJIbn1pscxVcBc5PGRSqzqxz\njGcelOZmDc8Acn2pi6jg24n5cbR1qRdjKMHDY5oVfLyC4Zm5FQtH3XlyefSgCxtB2xnmTsRW\nhDIBICfm2jBAPIqjaq6hmT7w6BqvRzLtEijDtwRQSalukm+McEn5hj0robKPbmTNYVjhc7mw\n2OGHb2rds45fs4Yqy9tvr71LGdBo8gkkBByQc49K6zSVM1wzbgAB+Nclo8B5XO045Wuw0a1R\nGX+FsdKyZodPpUJkmXBytd34SjZrgQBdxP8AKua0NFgRXwDgc133gm3WbUmkIwiDcT+BqRn2\nd8J2tU8H2UMEis6J86qehPODXXSYViByK+YfgV8WbPS/FtzpeoziKzvHC25PI8wkAZP0r6dk\nX5iQcioaLGHGKORg4pGXbzTshlwDxWRQvPrTuNvTmmhcYpygmgAD/LgDmnHO33pvReo3UgJ6\nmgBW+lO3DsCKBnhutHJyMUymG7I60EjbgDmjbt+tJu2mgB+SvPUUD7pNN525zxTgQelBIrfd\nyBSYz0/GkOaX+EkmkAhkO3B5pj5Yehp2RtBzSEBs0AQmQg5JzTklAPPenlBUTR7mxikBMrbs\n+lPyMDioQu0Yo3Hj0pjLCsVXPWhc8knrTI24qRmGcCl1GHXIxSqeMd6aHA6DJoUnqDimA/B6\n05iNoJ60xWbPNP69qAHNnaAOtIuecnmg5z1zR6+tIY4dOlJyFxRuIpx+70piBGwvPWkOce1L\n6nHFIzZ70AG/oMUpbcwP4U0Dv0pA3oKAHn5vpSjHAOfwpu4Ac0cnJFAw+50pwY4qN2O7p2pu\n87aALC5zntQct1qISELUm4kA9aAA+/SoXiDtkdKlJ9qQfdyDQMbDHtU1LwYwKbu9RR5ny4Ap\nCHZ4PJpR8rbu1Mz8nNKrZpAL5jdKFk3Ke1NK/LjP4077y89aYhVbg8UBiOhpu0qOtJzvxnBo\nGS7vl6cmljznByKarFeDTtwLDmkwF49KFwVY0bcHrxQW5wBx3qgsPDbUOKa2WUAHijHqeKbu\n28dfSpGH3c05Vyvr9abu6Z5pfTnFMY1Mqx5wKk3dTUTDaeafk9/u0wJFZd3HSkBpFIjwOtHm\nBicDGKQDujZBoYlsMKauBznFKvHfikIXHy9c57Ugj+bnikYlWxj8aTzBuHc0WGLxuwBilwRx\n2pu/5jTly3OaY7C8LigNR68ZoUFV3GmIQkZpyttQZpudwzjBoViSc0gFJJxgYqdV3qM9KiWP\nf83SpY1ZyQM0wJ44N3PboPrXnf7QHxz0j4E+B7vUrqeI6pIrLZWpOWd8HBI9BWl8Xvi5onwW\n8HyazrFxGrnKwwswDO3PAH5V+QX7Qnxz1v4z+NH1fVmZbbJFtb/wovbjpWkY9SG0jlPjN8Sd\nW+KPi6+8Qa5dNcXUzZBydiLn7oB/zzXn11ceZtKsDH246VsahcR3kMauin++3f6VzrKqwt5c\nZK7unoK6lorHPdvcZu86RpAPnHAyabdMZAjbSjL1x0p1vHvk8pSXZz3p96vmMIydgj6470wK\njnbncu4MM5HFOC/6OGyW3H7p7UMQFPz/AC9gR0qHyxJGCHK/N1FMB8s2ArE/MRjNF0qxTIrH\nLBc7s84qGVBhc8gN2706aVGYEcn3pCIW2GTO0qeoyOKbvOBzgHrRJI5YbWzjPFNjUfMvl5kI\nyPrQFuo64jZY0RW3c5OO1PZTHtAQGMfxD+VRKxXMZ4kxktSqRvGz7wGTQIfC5WZZFATB+7U8\nlxHI2T/3yPWo4pIokLH5yx+73p0aiI8MofOfLNMB9sy28c0btgFsqw9fSgs0kmZOUXhfr60z\nzBGsgI4Y7jn1qwqrsy3CsMk/0pAU2ciOWTCllBAx3PrWZPNIUiBHzYzu7Vs3EUSqVj+RyP4v\nSsa4hCxdW3FsdOKYEV7vkkjn35CjG30pnmMYWGTuY8nNRsv3yz5C+h701lUcCQlSuSP6UwEX\n92G2yc+lO8w8ADcmOR6UyNlMJKpxn739KSNhGxZuG7D+lAWJQVj27D16560u1t21m+X19Kjb\n7wVh8zfpU0cZWPcMAg4Oe9LzDYevAHGVB61YyFjYMivHndz2pvmGRQzR4AOMetSRswV4wFAY\n/WkO45OQVxs5zuNOk6Kytv3HG4cU7lmKSbcY4ojBWP513DttpjCMZkLOd5HAX3qJrcFhkYYd\nVqwwRV3blCL129c1GJBv+T5mJ6n0oGQs3z+UE+XHBzTwp34HPHT1qPywzbgdq+/WnQv5jMqq\nSp7jtSEKxLqQD8zHkZ6UyNVyPmwV6mpGhEK7+cf196bIwk5CrnHO2mBD/rFL4wu7IJON3tUs\ngEjbiWU4yQOPwp0ypJCI16H9Klghi3KjfvCoyeetS5WHY2/Bnhn+3NSjtxmSWbaEX3Jx/LNf\nuB+yT8LbX4W/CDTYxAIr+9jE07dCc5wMelfmr+w38G5viB8TNOa6h32NuyTtIQQqL1ABx1Iz\nx7V+xSwx2cKQxBVjjUIqgYwB0GPpiuKT5mdKVkKz7ucc1AZDu68U8yYBA5NQsu4HOc0itSdW\nDcEU5vlXpUcX3duM1JnavNIY1WO30qRG+X3qDeWxxjmpd+QeOlAxcjoadtJXjgU1mHGBmpFO\ncCmIRTtGKczDjFNbCtgmgg9e1JjFHTNO5xSDPHal5z7UgBW9qUNkntSbgvHWgY5NUA0ngkCn\nA/LQevTinL/q8Y5pdQGnJxng0Bt2f1pSDuGRkU3OQ2OKYDwwWlLbsYFQ5DEcYqZVP0pgHHpR\nS0UWKMXyxjgU5fk69KMY4HSkY5GKxAUckmjfzwMmkVdpz2pW6blODQhCBhnml3noOBTV27cH\nrTgmADuyKAGNhfUE1JGw27QMmopDv6ClRSuADQIft7nkUSMWHpQCdxPal6rk0w6DW9zSx/Ly\nTxSMo2+9N+6Mdc0Ah5UsPalXhSCaUH86Zjdn1o1HYbyeSKfGVyTnFGT06CmyKQmBxSEP3c4a\njgfQUjEbeR81J/CMjikAN3OaryKfSrTR8Z7VEwLEEdKYEf2b5hkZFTbQMAc09eFGDS7dvOM0\nANwQtGMLx1qTOc5FMYFV3dqBBwrDv605mB5HFMz3705VG3rTGPY8Yx2poUrgkik3cZz9aV/m\nNIBSODtpI85HIxQDtxSKAufXOaAFydxHbNG3cDihuKF69aYDdvX1oVV54p5O3nFNVW69RTF1\nBVDD/ZFKp2jFKzdgKTbhcd6TJAYRsnnNJxknP0p23p3PekZQpB5IpjFVh6U15Qo9T7Up9ulR\n7QWz3pdQsCsWXOKkj5bmjaVXjmgfK3SmIf8AxdKcx7dBTN2Tx0pHbaetKwD+1IO57VCJt0m3\noKlVuo70WAVfvZ7U9W20yQ7celC4NMZX15ftXh+9hPV42r4X8fWYt764ibkhzlvxr71aFJre\nRZBlCuCBXxj8XtHa08QX6OQWDHp7dqOpJ4Rq0K+Z8o5BrmrmMeYz5OFOTXV6xbssjDoW5Fc1\nNCRKyNznqKtbg2ZiQiaSVduMEjOawLpWhmaI4LL7V0KFYJiwOBnnNUNSWOSR5Bw2D0rUkyTI\nrL0yv61TkCqGI+6exq1hoYWbG4kcCq90zSQruABApmb3KGC33uVqK6U7QFGRmpriTy1UKA7H\n+Gqx+XjoM9CaCepXulZdw2hRjPNUmRY1G0ck8mtG4hZ1AY5YjP4VntllUtkJVDIriTyYwycE\ntioJsbueXz0qy+RHsU7lbnkUxbdJF65ZevNUSzPuY92VYYPYelMVQY1HIPc9qvalZtbxq6Nn\nPJGaoeYdpO7KZ5oEMl+Y4AYNn9KZ99ufoAatHLMCxzxx7VWk2s5UHJA+9QMY0e5Qu0jafmqr\nNtk3AbiO27pVzcGYAuS+MgU2Qt5ZZegPJpj6FCQN8m4/LnGaNud4Q4HbNPZj5m0LkZBqST/V\nkoec00IgkZ2A3+nOKhjzlkQnHY1LulEzZZTgcUyabcqbRtLcFqYxufLwW3Erwc0mwM2WOAea\nXYTtDHIPcmm7QxOOo4FBIjKynAXjGd1MCEsCcMrDFT/OJOp2gYqPb1H3Se9AyM/KRxhRxTSo\nbd/dPFTNhoVTcCV5NRCQhSMZBoAh2kR8HIB6CpVy0m0HO4YpzbPlA+U9qZ8qjf054oC5H86q\nyZztP5UjEM3v+lOaMbuflLc7qTcq5TduOOtMaFRg75xxUcijaeOAcmnBSkZwOD70gXdJluUx\n0p2EN+WLJVSe+aWdfmU9c88U/cNuD0I6Ujbl2gMOnegQiA7dp4HcVEyrtkCNnHWnNuVt5Jbt\n9aUgYLlDyMbRQMhERbJJ+QcYp7Iq7QDxjpRGfmIOPUikJEhOPl9M1KF1GMhPXoORQqLsXdyf\nU04YiUmTgUjYaNW3bSeAtWMjmQdT+7I7DmlTHHb3o+6rZPyCkZQ8IIyBn71IB8aszEopY96G\nw2BIoUnoKasku4ANtGelSbVbl03YPBoAQKNpB+YdBUXllsru2/jUihY8lj8uaTbGuWHzelIB\nMYUkfwjmmL82cHilkZmXjhe4ph5mYgFU65qhkwbdbhTwAaiyhjYsxAzj609iH+YZwBnFMkYL\njbyp5+lBIArwcHGMDNBYnAK/N/eFK29sHIK01mPQUAOfO3/Z700Mrfc5XuKdJncoxkYp0Y+f\nJAVjx7UhkWwMpC/nTVAbGBwvansvythiMnikZuBgbMDGfWkMVc+Y2Oefu0h+aYg4Hcik6chv\nm6mkJWTrwTwDVCsLjuo25obO4LtKr1zSnKheBxxxRukCsSRgc4NAhCvlgt1Ddc9qj5VAF4Ge\npp6S7lOcnuab5Y2/M/JOdvpQOwHKqp+8DzTV6szcrjgU9VKA88AfLShwqrxuJoBibgqgbevI\nJ7UxgQwJbdzwBTvmdSGTgHANG4rMQVwgH60gHSfvMjIXjH41Hu2r5Z5PelZcEMB94/dNNIKs\nvy5OM5pCHxKFk2YyMdalj2Ou0DkdagbcsgCrg9asKw4K8ZpgR3EIiTLcntUMall3Y/xqyzM2\nFY7sHNV2UqS6nGTyKVgHKwZcgYGODTVYrGRuznotKysq4xletIqclx0HrVDHMuJlXOCy8mmD\nIjOOB03U5ceYSD1HelH7tlCnJPXIpaAIijYChGF55705ZNnzyKCG4AHWmNhYyuN3OaVVyQT0\n7CgBGXy2BXPuKbIPQ7ie1O87/SGfZx0Ipm0NwxJOcimAhZONoOR14qRmDAMvDe1KrEtxznrU\ncb7lfdjav50CHLu+84zjimzIu8BEO3ruzS+Yir8xJyOKGysS+vWkMN67xu+7j9aTcAxXODjt\nSs29s4yOppTIJBvVelMZDv8As8ZUoWYmlIWRiM5KjIqSSU/xH5fpTYQ00mSO33qBCqW6jA4y\nSfWmKp2lnUjnP1p/3sr/AA9KRVKkqQzn68UmAbC0RIwV60Nu3rgckdqCrJFgJhc+tC43AjoO\nKYhzYX5dmAO9IS23BICsKMN8zE7V6BjTW2yMuG3etADWi8mRS4+XHNAXLuQMqf4aViZuH454\n+lKp3DKnaelADRt3BEGP96nNliGUfMOwpzN5f+tToPvCmhPMUtyB1FAxqqrqXUhlzzzSPtDA\nAHBIFHLEDbgGlZGV/KxketIAkUZJIO1SPxoZw7KFUsjH8qQg9GyVHvT1Z2VVH3VOaYhkivGW\nUjK+3amL+75J4IxT9pjkOSeTknNM4aRgVyc9aAHBScBsAdM+tI7/ADLGq8Z5b+lDb5EAHrn6\nYpRH0+bqc0CGvnlT8ppZlVAiH5896dt+Y5Xc3b3pisCoJGcHG2mUEknylQdoH60LJ90FPnU5\nzmlkjTliu0/3SaTju+TikIOFbcOFY85oZwvVepwrUN+8XEYOR600qwwc7s8EenvQBKyldoZg\nc8ZFMbBz8v3fSgdNvRlHHuaUluSw2gHH40hisnlkEZcEZI7Um4KoOwZbgDHFI27cQDhT19KU\nBpFXacKvY0CHNv5Krx0OKazBcFDkdx6U1g0Y37/cgVKypDjB37hnNMZGrDblBtU96VvursJI\n6nPSjaRGxB4PUU5pAMYbAxjFAhkih1DD7n60bstlhgAcEUbdzjn5f7v9aCu1iMcUDE/ul/3j\nZ4xR5a4OCOeefWlMkm4LhR703yxGhB+b5s0EXHbTIwDHbgdB3p5w7EKFRcde9MVdxDKMN3ya\nbGrEyY+Ug5oKHSsFVVAxzktTVXa3zAsWPBpW2yBT1yeaOcF15AOAKBdRWjkOQw2r9aN27Bwd\nuMAU2Q7uclj/AHaHk2KATz14oGA+b5MZ9qRW2kgZz3qRiIzv5BI7UiMEUDaWZ++KBjFddwIX\nDA5oMY2uTu+c54605ZMsmVAHf1pC6SH7xRT0NAhXx5aA/J70sirGVQHIbpTVZFbB3SL23CgZ\n4GMqe3pTAMb5CpJFIqnecYVewNO6LnIA6Y96RmPmBuvH5GgY1mVec7jnpSsnzYKnJ/hFJkNg\ntgsTkY9aNzK28PluhFILCwwF/kyPl5wKdtKW5DNwT074oX/WnHygj71NyI87mwSMHI4piA7o\n24B244HrQsfzfNwxGevFIxbCIHyx7e1I0IjXhiT6UmLUUq00h3NhAM/L600MJGJPYcml8wqo\nA4z1FK/+s7Fe4WgBrHzExn5c+nNLIgVVBO457d6VXZnI+6AOPenK21t3BOPl9qRRHtMcpfII\n6cikkkLbc43f3QKe0m7DFd6ddvvTWOecY9MUyWSSfu5VROpGS1Nbcqgt0zgY702JAq7mbnOD\nT9rMvJ3L2oABmJmwOeODR5gkfaFPHPPrQrl3yw4C4psjfKCqlSB1oAkhzICSeO9TbQFGSCar\nrJ5MYO0kMMsR+lPZgcfwjHWgoTYIo84wOwxTZG+UkDk989KX53TaT8pPWo3JHQfdPC0CHnnG\nzhhyT6+1Chmy2FXPVaAxkYsflxz0pFw3zD5WY85pksTaFmGx8kDgU8M3knn96D92mlU4CD5j\n1NNVd+SD8ymkMc5Mk23b2+6BTg22Rdo6cYNIZWNum3iTO3d3pq5C5xz0znqaBjkO6RsDGTjP\nvTRlcjHyg8kUbvLXP3Rnlfel3bVwOuck+lMBdrTbfL+VMZ6UmVZgxVcr70ixt8zF9o+tDLuG\n0jDY647UdRitJuXJOM0sWxSwdudvBxSeYWVcgbVGOKVV8xc5Az270MQ1ZpDJuBATFIzluFB3\nEZzmnLIGymMEcYApp+8V53dPoKQDo2LQ7tg20xWEjDH3uwp6s8cBUDAzjHr70LlY8hefegBr\nZxtZcHrkU/c8m3jcV56UkaHALk5P8NI0cnysp25HY0ALIFKjqDnNK5PGB82Of8aT7sIjIJkz\nkk02FsszEFsDgCgCSPH31bcmMVEzZjyqkDd0p5hxHuJ3K/BPTFIuEWNVy3bdTJYi7FJboP5V\nL5bsyhh8rfnTGXaGyNwzxThJ5jK7NtYDGKdwECLGSoySp5FJtL5+fkDNOmbaQ6vuP8XvUeS2\n4bsEjOKQ0Kyv8rEgDvRJlSc9CcjFMbCRgs3y46mp9u2PayknHB/rSGxjMFx8nzetLt+Ugdc8\n05cYUHlqRYzHIR367aBPcQKCu4dAcGpHh8tQE53ds0i8BspgH+HPWl8syTBQpzjp6UAMaJtw\nDHBxzSs27OEbAXija7ZXJKZ496c7Mo2g4daBkbSDbGwG3jAPvUjLlk3NhQMbfek2p5oXbnPJ\n9M0MArP5j8ryBQA9VLNjGU6jNDEbdudp65peJmcpw237p9qiX+EgBzQMkRgysSnOMAf1pFjT\n5s5wDiljdld3Y7VIx9KaZDztOB3agQMpjjKsOc09vMYqcnHTb6e9DEBFjHOeTSuwyGG487TQ\nSRsvzEEe27FPhtwozIRtpJVZiSudvdQelO43KTwvc0uoxrfvFLsN4zhaRogCm04AqSTPVBsP\nWmM+1d4XJ7j3pgTj5S+MFsZ2tSLuEZfadhNRqw3fPlnzkH09ql5ZRhsAcUDAgMw3Einr5e4q\ny+4pmTGuMAoDSyMxXaTjuG9qBCKw2knd6AUjy7cFiw7U5W+bDZ2noe9ORiGbCgn1agT0Gqw2\nZIPB/Gl8vcFXdlDTVmZZNzYJPGakjY7w5GB0xQMf5hU7Rnao/OoWGWB3474YVIqOrNuI9R9K\nSa4+YfJuGOKAsR8NDkgkk9O9K+W4Ayc5560qsu4swO7sopvlgEElsk8n0oGIyhRvfhycCnsr\nlcFV2jtSM37whzlV6Z6fWnO29QCfmx1oFzDXB8s/JuFQ7pFBUHIHNTNuRRsfjqfel8sHcYzg\n+lAtyudszHCYAGcU/bE0blpTvzjaaa3yzEqc8Y46imyYeE52+Znr3qkSSRYDKCAT2Y/yqWGQ\nyRqu3aOc5qBYyu1gcj+dSom9l3ZA9KALEbMhALcD0q/EVZAiJgHnNZ7FfN2kFewq8m6Jo0V/\nmB5pWGzTspTEpBTC/wB6uns7rzIVA5QLxXOWkYZvnPfJrpLPDjai4XsRUsR0/hld5851+UNg\nZrrdwkcSQjoK5DQVeD7zkhj8qmu00yFVlQuQU7j3rNmiWh0mghm08Kx+cnJ9q9H8IzNpWjaj\ndyFQ8cJZWc4X2zXDWKjKbBiPNb/jq5Nl8Mrpfum4dY8A9s5rPqVtqcfovidzqwYSbUifKmP1\nz1Br7y+AHxOTx54W+y3Vwr6rZ4Qq33mQDg4/KvzTtZPJjgOSsq4U+/Ofyr2j4R+Pr7R9Xt7+\nxkZLm1IaWPdjeoI646jit3HQz5j9E9x28A/570hf2rn/AAH4tHjDw7DqiBQzja4HTdW+zdeM\nd642rM3HBt3BpzFgeDxUS/OMjpS5x3pDHsRu5peMZBzUTN27Uir8xIOKQFhSfWjc2761Gsnb\nvUitu46UDHc5x3pevPUVE31pVJXrQMk3AduKQseo6U3PrzR1HBwKsBzSEY4pWO5femZ7ZpSx\nUVICtjGKAeKZjHJpePWkSO4Kc9aaGx60egpDwTQApY/Wq010UkGF6VP97tzUEkJYk4qraBcm\nhuFk6Hn0qXzMtmqUcZjff+dWkIboakdyVW3Dj8ak+9zUQcLxjmn59KRQ9fShWO7aeKb94cHF\nC/eyT0piHbsNTg3NMwN/Xig+nSkA9m+bg0NlhjOKNo4pOoPakwHbz0xxRuGOBzTVOFxSbvm4\np9QH7m2kkZ+lLH93NR+aykjNNWTrTH0Jl5o25waijYsDUvI70ALkN1FG0N0pN/yn1pA3SgYF\nSq80K53dcUhY4OelL/Du4z6UCFWTJJNB60xeW6U7adx5oGOV8D1NOZzmo2U5GODQcnpQIe33\nTjmhfu5PHtRz2FLkEZI5oEHuOBT1bdzUe/5cdqUECgYrNyRihcck9aTdxTd3XikBJy3Sm/Wh\nXPpTieOuaQD1ajPJ5pgpf4uPxp3H0FZt2ADilyQoHam/xHApdxwRSELztzQ3Kj1pOwpzHcQQ\ncUIoTryTQzHoOlNPWg802A77y4Bw1MRyG9qXbwDnn0pu3bSAm3Bue1SrjbnHFV1+705pxkKr\njrSAkb5sd6Yy4k9qVW+XnjFNaQnAApoBccU4N1wOaYzFc0obPIqgH+YVUUc9+lMz8tSrluO1\nSFhN2W4qWOMNyRTYkUNU8KlyQPwoAcsJ45GKxPiJ8QtG+FfhG817W544La3TdtP3pD0Cj3Jx\nSfEL4haD8KfCt3ruu3scFtAMqpPMjdlA9TX5HftQftSa38dvEEpMslpokbsLeyDZG3IwSPfi\ntYq5LlYp/tG/tGaz8cPGFzf3kzxafA222ss4WNcenr1rxTU9Ya6X94TuXggf0qncTtJN5r/K\nT/CBVPO5sqxbaSze9dUYnO3qB+b5iSQoxn3qPaWkUl+R1UdxUk11vQRx9B94Y5J9qge6VZlw\nucDJP9PrVJEgLn7KzeUuJWPB9BVXzpD0bezckU+SWKOXzX6noMdBVe3mEJznIk4FAX6EkjDn\nPQDLccfSo9+UVB8qtyBSbjDDhpcqeo65pfOHGQDxjHtQHqM3FLjycYbGc9qrzSYkChuM8Njq\nfSpZJPu8HJPDe1ReYYlaLYM5zTC4rSBpGUrhu9JvZWEgHTgYqSTdHCEUY3cnHX3NMKAyErJ/\nDgA9DTSuSLIu6QZP3hSNiFRJ1OdtNhV8HcAX7U+NUWBxI/73rtpDsPmtpLSFpEQAueGboPel\nhmK7ZXYNLjj5e3rVma7P9nCE/OzLwPQVBaW6suwHc6jcT6AdqQFloo14l7nIbPr04qy1kEYv\nIGEA556Gqdxcf2hFJK5SIgfe9cVPbTHULVIg29nGPmPpTGU5p4mZkUEs3TIrM1OQLMqoPmA5\nBq/fSxrL9nVN2zhnBrKuB5chCtvJ/ibn8KBEBReXBUHP4A+tN/vbiFLfxgUjK2zcyYfPRaVQ\nXkAPzemaY7CrGEgwP3i9Tj1qNVA+fqG6A+tPRs7liG0DqWpqsy4bqfTHFDAlhiRoyzht46Ma\nkt43lUIyfKDncTTI5NzKwJCscVYLqCxYNjPGOhoGLbq7P/e5ICmpfLYTKHOCOuOlRtvkGFx8\n3TbS+QW2iMkBT85Y1IEvlryzjJ25P0pu9MqYyUFI2I2BBJA6N/Sn8/fI8tj37Cmhh5m5WSPA\nb+9ihgD5bAgYGCRSNhJGib5iRu3LTpgqEDHybc/jTEVZNsanLHOeMdKleTYIwmQT1FLho0UD\nDBjyG6j3p0qxecEDfMB971pAD7Npbf06j2qHyzGg2n5FYYx2Bp6qI5PmRirdQBmpPLLLIoXZ\nuOcd8UANfAk8kFtzN8pX1rV8OaPJqF1DBtYTM+FHT8c5rJki81dqnLN93HXNfTv7F3wduPiV\n8StJtzAJYbd1nmeThVGM/jx2rnqaGkD9EP2Dvg6vw7+E8GqTRst7qCggtgnb6/59K+kpF7ZP\nPvTrGxt9J02CztUCQQqERV9B0OO1J97J2kVym+4zbtZcDPNOC8sfehVIHrTs7ep60FDC21sU\nqr170iqd3I4peSR2FICJkJ74p4XavrT2jGM5pV+XjGaoYqsAtJu96DllPFNVenqKCWP8vJGa\nep/hIpP4OTzRuzzjigoN/wA1KpNJwvbNCseSBx2pWAeCrH3pPal47cGkVRxzyKoBzYpcjdkd\nKaetNyV6dKTAXzgHxninBs9uKrtg59c0rSHgdu9MaJ+PxpQy96apB5xRjLZPSgQZNFG6igDK\nRRzluPenZUrimt7dKTzOTgcVmxjs8cDikZj0ApNxbjOBTu3rUoBnH8Q5qSMAcHpTW+mTT1+7\n07U2CGsuPu9KQ5UdKlVRtxTFyOe1Ag6qcDtTS3yYB5pZCV4pI87un51QCdvWnBtqg7eadx6Y\npjLleGpdQQuMtmjcFbsaD90elMVNuXPSmNkgO7Pams3ak3HvQwDKPWpBj/MDADHNIPu5po+U\nU4ncemBQIGbau0HOaZtKjr8tOGNoOOaNu08nIpgKjZHApdzFuTwKTPy9KjWQbvmO2kBKrHbk\nnk0bjwOo9Kav3uaWQ/3RQAm35zzgU9eBzTPdqdtIzznNADtvy9OKZuG7inSf6sYNRRZ3Z5oA\nl/hzzTuNuT1pGBpgU9aaAcvzde1OZl5wOaYjdTjmnN29aTYABleTQrHGO1Iq7uCOaMbcg9Ko\nkfu3dB0pzOPT8aRMY9qj3buKQhf4uKXPy4zTN3XHFKqjbnOTS6linJb5TxSYDdOnrR1j44NK\nBjgdxzVEhuG3ApN+3APNA+XtxTiRtyBzSEHJPoRSS4ZetKv3eaRQu7Jo1GVdjMxIBx61YhVk\n5NSK4VduKafmYc8UwHHPI605CMEU3O7kdKRfUUxFuFgxGOSOcV8qfH3SzD4mvJEORI2Rx0zz\nX1RAyxtuP6V4D+0JpptryGdFwkilmY/U0hdT5L1uFvNck59q5LUMxksBzXca5G3mSDbznOa4\n7VI9y7nHA9KpEnPXyhsMOh5rPuF3NgN8o6mr7L8rAnqaz5AqyOp5rZCM5t+9kJyh6fSq0jKs\ne0jK1aukHUHGe3pVFmG7H8I5x60yWVLqHfIMDBx1FUrq3/edyFFapJf5gMEdqYsQaSPzW+Vm\n54qiTPnLLGsjDG0YGKzZJkDZccgceldZqi2n2dUj2iU8FCe3rXH3VuY2bJ3KDQJkU8rFo2Aw\nB3pobdufkc9u9P2q33OTjjFNb5WwR8w9KBkN55k21GJAxzVKRPLJRR+7FaMkhkXLLg9Kz5sr\nnK49DVEicYX1HWopIyHCg5XO78Kmj242vngdaSdfLUOg3ZFADFcrG3y7lPKgdRUHzPng5P5V\nKrGJgPX0qIsQ5bPfA9KYyOZX4/55n5eKj8sKuA2Fzg1ak2tB8rfPnpUO3+POPQe9GxJBJAVD\nFicsfvUxYSq7OGI561bkb7VCM8MOMe9RSSfMhUYbvQMj3bwBtA2jjNNT5lbnD+lLJM3zE7ck\n8AU1M+YHfr0NABMRsRmHIqJpAJSw6dRUlyjs20EVXbdGwZemMZqgJJGVmJ27GYUzaY9oPPNS\nQjfjHAUZ5pk6iIgHkkZJoEALRozlcjOBio/lVWcnIJ6U59yyA7/kx0pAP4dvXmgBAxkyu04Y\ncN2qJiu3lCGUdalM3ykb8KOiil58kBjjPU0xkEgDsjqDjvSMSGYCpCrSLtLBMdD6005yBtyc\ncmmAFgy78Yx+tCsu3fj25oDD+L5R6e9Bzu254oGJHKS20KPrSyfvlJJwfSmrhSQBk9c5p3O0\n7x15FICN8kYA+WmNGdvt2PpUi52swOR/dp7KRCGPAP8ADSJKxdZozHj2omXdtwoJUYpxXaww\noPfikVm/g4U9zT1GN2ibao+Vl/Whs5G3AbOKdIPL5HI9utRJGZMkH8aYhThctnAP8NOUMqYV\nsZpsqhlDH7xOMUq53AKec9DQAKxkyTjywfu0TEcAL8vrSqyKx2ruz2pG3upOOOlMY3KLlRk5\n5pu09h8vvS/dGWHIGKb96QMGwMcCgQq7C27c3ApFwq4Y/Kx49qk3KrEkDGKjdl3HIyrcCkAK\nu5gOQD0x6Uq7vMYZ+RfXvSeZ33fNnApW3yEKPlGc5oAjm3NwDz1pfLb5SfvU5c7XBOOaN21Q\nMf8AAqXQoZIzsMqOB2pjNubavPGean3boRt+8e3pUUkZbGTg+1OwCorBckge1LzvBxj/AGTT\nSAEw3HPSlbC7SPnc8DFIOou3arHBPYUhAaEbs7sUSqVPzblGeVoVl3HAzRcZC7tFHwacpDLk\njJYU7cjsf4vT60nO5dq4UD5jVEgPlXAJx6Uvy8E5GO4o807vkXNJjKnfkDrSENcuxDK5z1xT\n4pN2ScMe+emaayoobYcjGd1BZI4lTadx9KYDlYKxLctg00L8qknLDpihjtXaw7daI+FHt1NI\nBWV2bIPPc01SVkJYHA7U9F+YjdlG5pJFZGwcnvSsAjMWVgmdwNPYBYtxGeOQaRW/ecDG7k0K\nxYPnjPQGqGhFMcg3biB7mmR4VCB8wo8sLH069KTyyFBBx3OO1AhV+5hGwB1pdyhgVbDYwc9K\nMLI3A28ce9EfPUZ56GpAe0ibPTHGaYw2KM9fQ0mMyYQd/vHp9KQqzZ5wV5INUASKyYfG7tgd\naX51wqqC/pSF/wB5xndjIFNUKrBic45Oe1IBw3qTx0pGjCyZU/eHTFA/1mQ2WfkUrEtJsYAm\nmAm5FXDg7umCP5U+LaNwX5h2zTCu0FmbJGR9KFLGNV6A8DBoATdtQntnpTlYHAT5R3pVwDtx\nx0P1qLIxjdznml1GTKoIIDDGec1FtfqrY9FqWFQyu23C+rdKRYh5gkLZGPwoAaUZCoJA7mkj\nD/MGbJbJFOkjZpPv/LjOaZLtVVZD35NIQ1428kAHGDkinb488AgeuKPLLsSTwenNCq33SwI7\nCqECgN8oXcOpJNBQjIUctTWV+FDd6kVUWXgtntzwaBkMgKyKWGMcU/zI8mMjGec0LGdz7zkd\nRSqvmBQuB60DFibe5wd1EbOwY7Se2PWk/i+UAY7+tB6nnb/tDt7UgHF1ZOM5+nSmJKWUlmAY\nHjnrRtBRlySCeGpjOVcKU4xgjFAh+dyHAx3JpqMN2DnGKI1C5A4Ujo1NDBGVO7dKoAZQFwDk\n5p0aNJ1Xn0pWQRxnALE96V87QFb58cr3oZIkkaq+cbVx60zywc5B56c0uOpz9KftZtuBlz3p\nDRHJliowR7GlKvJ3wVNK/wAze+c/Sl+65bJPrQMjyG+YAkA5NEbIsauMMWOcGlUMobafkPem\npt/u8YxmgY4MGzj7/agBtxJ+XnH1pWXeowQAO9JHGrK6HPJyGPSgSG4LMYzw5/lTmB8wxrgq\nfX1oW3LfOW3EcUki/wBzj/aoGKyttw0nyrTuGVSQcU2RR/fOO9LIxbDDt0FAhTt3NtGeOlRo\n2eCNop2S2CRt9+maSLuwUiTsTQOwp3JHuxtBOBk01lK5Dcn2pzR7h1wvU+mabgBt55+XFIBz\nbmxj/gRpN4VSBy3bNODGNUB+ZiMgUFgylO45piI2bbtLqaTduk2nI9fanqd8agnoc0oUrMzk\nYzxTsBHxJvy+AOnvTlcxt8p3L0zSbfLJyMk8A0gYcvjaT92iwhdjKGb7w/vUir8oLH5evFKv\nnHHltgdTmmyRhmJLexpMY9ZUC8AndTf4flTIFSs0awqnoPvCkjXcrsMqMdc0CGSLlFIO1880\nSFvvZwe9LIgZ1dhj5cY9aSaM+YhwcY+bFAyNkaTYQeDwKk2JtCsfujH41MyLHGoUEMpziolk\nKyZK4LZ60AyP5goKpyp7nOafGxGRjcSOT6URkJGVIPGTR5g2gxqFVvvDPNAIRVXYR1wKdJIv\nkqNvzHimr3VfvDkcUEFRvk7nnHagBWRRhSvOKZyVCjhqftYHdneO1GBk7fvHuegoEM8ny9pY\n9+lSMwkjYFd3fJ6fnQuW68t3zSlibfyh/eyaCiFVPmA43IRwe1O+fa2E+RffvTtx+6x5Jx/h\nRJGyqobqD0zQSN2sY9zEBfQ0MvAK9h0pJFJ3Z+7TUVY4+rF26UDHtuaMZXBpPl2lwpbb/D3p\nFJ2klvYigRnbvR+c4NNki7twC/cxzS5SZQF+XtmiQGOMbWHByQab5KJIZBwW+bApDAINoIYM\nTge1PZTH8pfj2pqqWZM8DrikjYbpQRnHNAxFCscHLLngins44WRtoz1psbKsQ/u53YpdoeMg\nLkscgGgBzOOVBGc0yMDDZJJ9aNw5DALx164oZc7QPl759aAFDFdq4JPcGjI3knle9OaRjy3U\n8BqaVEciqTn2HSmANuh+UHIPTNGz93jq2eaCCCSo3Be2abk7lkYH/dFIQ/sP4cHFJu+Yru+h\nFPKqVJxljz7CoVUqpIOcc0DHLtaPyxkyE9ewp8isI1QDJUdaazFslRgMM5pVXy2BVsBhzk0A\nJI2WUAbmxnHrSAOxO7r2X0pUYNkowLZxRJnYOCcHnHXNACRKozuO7uc9/alkU9Cfm6j2HpSt\nkqGc/SkDeYpOPmHWgAZv3fTC0jLswMZbsaFlbaO59+lKvMjGRiSRjgcCmIc2Y8sMbxxUbs23\naDmTGS3tUgQMhBPmAnntTZseeCBuIHb0FIaEZztVg4IApzlmTbjJxn8KYqiSPGMDORilYNJg\nchetABuPHGRjOaRVIUZJU5yOadxJ8zcEenHHpQ0zBSuMk9PpQIduZpMjGO+etNXDRPyc5zha\nVcbkyPrSbUV2wxJ9qABgu/n5sLkYPFKrZXYMsVOAaYvyx5IzzUhxu2qdgbmgQ1mKkg+vSmhg\n0mD/AKs9D9O1Cr5bsGOZexFEeMMS3zKM8UDBpFU5K45qSNhkjbuYjrTfOLKSF2qRyvvTto4A\n4yOaBg2WUxlcjHNOXczBiMjFN3AAZ+UYwB3qRRuUFQQR/DQKwTOZHT5dvpTWVd3m/dfHG49a\nmjf5V77jjkdKYsX3y3zDOOaBEcWZHxu3NjdjNTNIzH5jj5ScUn2fZDvTCkVGuS4JbJoGOVd0\na54J6r3qOEeW7L94561YaTarMEw396olX98Wk+UEfeoGLwRuXp+tNZVKlidz+gFORlHJ49Pe\nl2nOByW4oFciMZUtvO7dyR0p0bHczAZCr+VCqXUs3G35QKF2qoTb8zGgY8M6xrv2gNUbKJF8\nteF3cmlfbHFlskD0p6tu2qeQegx0oGNVd0jHH3RxUnCtlj8qijnaACBwck0yJdsUu4+YT2zQ\nQA3RrlFLFqdH+8b5OCOQD0oWTZGNrYYDB9qcsfy534DGkUL+8KgAYx/CaYsff+KpWBXEchye\n4B7fWmSx+aowPLCnOPamA1pVdmwwX1x1qRQW27lO3FL5KE7gu3uAf50SN5jDeSY8chfWkGoq\nxiYYdsKpyBUmxXjBkO1c8Co7ba2UbOAeKGZmwrD5AfzpgAYnfgfd6Gl/hwTktT/ljjYjocjF\nRyKFKjocDbQLcUKnluXX5RxSyYeSMfdjx2GaNnmfK6mPbyVPc/4U9B8vZR1oATa0jscH5f5U\nNNtZfk4x6UqR7lYIMZ6801lMDIrHH60Bditt428Mec/0pigszHafTFScqoCkctkA0FishPfk\nUFDJAHbd2UYxRJt4JPblaczmTbtHA6tSOoVS5HC8k0EEcaYi+9g54z/KmySKrMASuTzmp1jS\nYhs8EZHtVeTaMEnLZzmmA1ducr1/i9xSFVZjj/gLVIvckcmnRKD93hf7tMBI9+0cc9zUyqu3\n58q1JIRCSGOD9KezhtozubtjqKkVhQwSQA/OMVdjmWZuMKwOOetVo3+YhgAxH41dsRi681VG\nNvIPei4dDYs1+VFI5P8AFWzp8ZM4GcHPasO3kZZEYr8vXFdDo+6Ta+3HzZ+lSxI6Sxut88Kl\nSOPvDtXd6HZyPcJIOUVc1xWnSJNKuECkcZr0nQYiLUIGG/GTUG0TqdFaNZAki5Y9Kp/HC+e3\n0PTtPtyxeQmR1HQL0HP51e8PJ/pJzyelcl8atcjk1ZIHHESbVVe2etEY3khNpLU5PS4YNV02\nW5jJS6hHKHoR7VreE76fSrqK6X5S7bOTwwJHFcfo8k0Mcsw+SLG3g1Yh1CSGeMrNuTO4r1Ar\nq5Tm5lc+zvgT8SpfC+rRwytv0+5k8uRHbCxknGfbGR+tfXO0TKGRg6MMhlOQfpX5neAfEQnd\nraSQHccqG+lfZv7PfxQTWNPPh6/mJv4Bm3eQ8OnAxn1/wrlqQOmJ7HgjpwKXIHTmhvm3Y55N\nMBK4HQ1zGjAPwR3pVbI5qN3G7PeljYEd81IyVX+TpyKerfKDUSseufwpy5wcHIoAePU03lhz\nTVJ7mlDFlyOKaAezbuho+8vFR8Y680ocDHNBRID8o9aC+cCm7lVvWjduzxTAdux15FAbd7Uw\n7vqKU/epMlkh+Y/SkX5uvWkAzjnApemO9IAGQfahpO2KN3FCsOtMYfw0IwAxjmk3Dd605Rxu\nFOwhd2B05p+4dO9MLbQMjmjcPTmkUPUnI9KcrYJJ6VHz6cU5mzSEOUjPHNBfJ60wdPSlK45x\nmgZIslOXn3qJeOppwyBgGgBWyM0bsKDim7vlx3pGztFADWbaxyaRW4z2o27lzSBSV24x70uo\nx8cm2p1feoPcVW2tgYGfWpTncMCmBKMEHjmkB56U3cVB4pd2KQC43ZHQUoHQ0ikHnOaP5UAD\nZXOKdGxI54NIGLfePSj+HNMGObJHFHGQAaRWJBoXHGaQChSq9aNoUdc5ob5d2DkCk27tp6Ua\nlC5G0joaM7VwaXhW55zQ33vX0pAG7dSq3oKTG4E9KFO3n2pEj+etIfmpgfb15FOztOKAF5xR\nu2nJ70xW+YjNOyrDaaAHFvl44NCtxz+NJSMpFAx+8MuMYFKV5GTTOG60HOQcZpoY5V9elLzT\ndx6dqdmgkG5xxzSMfm9TR9Til+6cjmmAvJ5pRhqTG1etIv8Aq80MocuOR603B3AA8UnVQQea\ncrbVzipAVepJoVTtPvSKDuGe9Sbd3tVXAFXdx3qZVw3HSmwqV+tWUgOeec+gpDGRqG46c1if\nEL4jaL8KvCtzrutXEUEEKHZGz4aR+yj16Vm/FP4raD8H/D8uoavOjXJTdBZK2ZJTnsB296/J\n39pD9pDXPjP4ou2upZIbTcVt7Vm+WJAeAV6Z9/arUW2TKSRP+05+05q3xx10tK0lrYRSt5Fp\nvPlFff8A2q+fpLxpGkxw2OWbpUlxdGOQMy+Y6/xnnms6QK4LFuD15711xikjllK49LhvPO44\njWoWdVmkkwVDHAUGlbYnLnIPYd6rybZHYgEHtVEiTM0Shc89QVPNVXuGXJdee3YE1I8iIxC8\n46saRpjJ821SgHG6mANskiiWQ4cEs2ev0qGNRIFLDCZwBUi7JEJkyD1GPWo2m225G0Et27/W\ngCKNdqsij5N3SiaZZjhh5ZXgCpGCNGgUbeMmonkHBX+I46ZoH0AOZEyo5U4wKasIuFLKzb1P\nIpYWkSSQHAAHB96likWC3YRndKxwaYiq6lWk+cndxn+lOkVVCKvpz9KezbFXj5w3K4pW24mc\nthiOOOvtUgQ4RnOX2qo+VvWhmYrnyvlIyc0iFW2LjHy556U4Oy/MPuf3aooSM/cKtz29hVxJ\nVViSMZ4PvVZbn7M7FI1kzx/u1dt4oI4VmnjaR2ONoOKAIAyXELx7FRM81TkJjkItyRxgHNWd\nVnFxqJ+zqIY1UDbVBGAWQyJkqOvpS1As6gkcdhAUbEkvLYPNZ0cgt3VX+ds8UGOTajhevTni\noJGEzqDye+PWjULD2z83zbSW4HpTGSTdgkKV7A04qATwV7imxRp8wPJ+8TTAWGN5IGG7aPvM\naXldrLwhHDGnLIxj2jqfbtTlVX4bgdAvegY63g2zNl8sy8egqVY3XCkBpgM57Y9aZH+8+YA7\nV4z3Jqdf3in5sY+8D1pdRD43IkMmzaMcN7/SmNMom81jjHXHc/ShtrBzuJKHgD0p5zJsY4xn\nb0oAI2O7LjaScge1OZl2kM3y55WiIq7OuSMHHNJIkZkIU5kAwPSi4yPzFLMyjJxwDTvlcjYN\nyjrnsaa8hV8hdzY2k+v0pIoysZUhVLHOAaYBNiRuSMDjNJHHukMZHy4yGoVNrsueV659PWlV\nTcKzLnK8g9jQIejO3yq+MU+G5M6sWfDqeeOcU1YWZWAPQ5296PKKsWVixzjy1HP40hou6Jp5\n1DUo/JzK7OFjjTBByR39a/Zb9gv4HxfDP4c2+tXMGzU9TjDHeBuRfQ+/9DX5+/sVfA8/Fj4g\nWIMLJDbzLJKyr8oUYJPsf8a/Z21tItPtYreGMRxRIEUKMDA4GK4py5nY2iieaTrj8KhZiq4z\nz70Ow2jHJpr8qCetZGvQbvwCM1DNJubAxT2iLc05YRnJHNAh0JZuT6U9iMAc5pEwpx2p3/LT\npTGG3jgZpVbCkEc0vQnmo1bcxJHSmMdggEZpF54Jpd3zdOKMDle/agQ5V28nmgtgf4UY+UDv\n3pOQ2R0pjF6L70HPGKO5oYlwNooGP3AdRzRxnmmdOvWnrhlzS6hYTHy5JpScrgDmk44zS7tx\n6cUAMC889aXyxjOOaVsNTfm9c0yhRnpS9eM5pu7jFIvyUAS7aKTzPaigDI8s7cihM8qTT1J2\n8dKTdhhkVkITdtGMdafyvsaVtu3rnFJnaTzwaAGrIevFOZjik4VeByaQnC57ntQMcsmaZgq2\nSeKcxwvA5ppVvLB6mkiRxcAdzRk8HoajYHOM8VLt6Fjx0qhoTox3HihE2gkjiiRQrHOaXcdp\nU9KQhu4cKemae2AuKb8rD0pGbLZ9KoBdo60oG7IXrik3fKaFJ29MH1qeoCj5uwpeFzk00sNv\nvSZ9uKdgFbhCucmmnoKcmOc0ir82QaACQHgioWtyzAsM1Z3E8cZpDu4oAZg8Z6CnBgyY70SN\n8wwM05u+BzSER84qRTs4xTRlcmm/MoBPIoAczbh605PlzTerZxilVtoORxQMcvzHk8URg8nt\nSA7hxSrny8GqJBQKD+valZOmKVVPekxoQBtuTTGY7TgU4Me54o25brQIEXcuM0u1QcE80AhR\n05pv+0etAgT+L19KXcO/FHHpg0Da31oGGPal3eX3FJy2eOKYY6YDlZduDwaNwVtvaho84owF\n69aBC8+nNH1GKcc9zjjimk7evNMBR/ez+FIg+U0q/KeBmk2/MD0pDuKfunmmKxRc4pzDHQU3\neeFHQ0hEituye1eVftCW5udBtJ1GTG5Qt6Zr1NV+YDNcZ8ZLBZvBk0vI8tgMdjnvTGfEeuIz\nXE20Db6CuF1RtrMpPA7V6R4gtzFIR0GCfrzXn+t26KjsT83U1SEcvcANIWwNo61lXVu3nM4P\nB5FaEjb9wU85qlcNnjPzVqjN7mbcICc4zxVOQxpg5wavyxO25Ox71VktwOPvNjBqiSvJ5cr7\nlGDt/CqUgP8Arc529BUjYj3qeCeKgaRCpiyN6n6UxGdLCHnMm5gVHI61A2WZgWDHP4kVbuIf\nLmLfMM/xCqzb1kZioO7jcKBFNo2WMmJeM0p+VgSOPSp9oC7Q2G61FwXJJwFGc9qoRBID5jqO\ne4+lV5Y3ZQGGVznjqKnZTuV1bBximRbgsokOMAnNMRDIirG+0biwqKZwqoGG0gYxT1kPToOw\nPSqzRg5LdQfvGmAxsIWc/dPFMhXzFAXpnOKm4VeRuOeCtRsW3FgOMYBxTGRzQho9wyTntUbS\nbZFjYA99oqdnLYVeeOoqu0L7SxGeeX9KQiOaT5pEY7dw/EUrIPKJ35G3GKV143Z3EdDUIwsg\n4LE9aoQjRLuAHQL+tIqkQtyeB+tSSRlEbAyOppBGzKMNt3DjNAELjbjedx6A03fHt289aft2\nyMJDuPc1Ht534+QdqXUZJGuJTuPGKjwkgYuc84qaOZWVSE68GmsVaQhcbFPJo6gRNgrycL0x\nTS7qvyHDZxT+GmPIK+lNZWbJU/LTAiaP5tw+UN2NSSKuMHnt9Kc+MxxgcY6+lCybc78DGR9f\nemBXYKx2ctjilaIKo2sQ1CyFk+XjBpW3cc/WhjI5AN2fvdzSrJtYnoD7UrZJUjAGKa7NtI27\nlHegGC/JyQNzUpk8t2yM7f5Unmbhx0xyaczlhu747+lBOo3ncqjHJwSPWkY43xk7ivaiMCON\nSeoprfLIGPUntSFqM3BePmBPFOVTE5Q5YAUryKq7uoNG1mXIf5qdxjCT91OfU0z7q8AjnGKk\nKqnyqdxP6Uiq209x696AG9VJPA7GkHzAMpBNBkG1TnIzwaNpTPGG67qYw54ZBhu9KNytuDfK\nexpF4bcGJ9QKbKmcPj5c55pkoeSRu3YP0qIgyYCnB9KVm+XKjnNGQI8Z5bqR1oKH4Cxk8e4q\nNmDMRkEdf/1UKhjUnqPQ0jRlFG3+E5pDE3bV8zbx6UDDZGSD1pWkM0h/u46D1phjbd1GaAYr\nZ+bcMKePpTmXbsLNkL2oZSww3PvSqjbcg57YoExY3Czcc8cYphkCkk9z1pHXafk+X3pxi3ds\nDHSkDEdl8z5+SRwKGUDtwB2o+VlJIyyjhhTfMIjGe/50xCqSenJPZqUMcbcYB60NhlAIwfUU\n1lMfIJK+uKVhjOIs+nQAUueQU7jBpwxtOFJ781GdxZSBhe9MQu0nODggUmfMBdn4AxinLlWD\nKfwNJtAVySE74pgIxClSOEx0p5Y+Xnqc01isi4PUdPpSsysQwGRjoKBisyt/Dn2pcMoOAFOM\n80z+H0FIfu/MSTQAnzfe6lh1pzbm2nd2wafGxVNoXrxzTSNq7QOaBifO+NoyR1xSnDD5vX6U\nMvy+WH2seTzS7Rhf4mz1pCGlQV3ZwAabsPzZ+8R09RTuGVsnBPO2mrlmBJwRxR0EKrDaAVJb\noPamgSMMbeh5alEmGPPHSmtv3bQp29Sc0kMcrncdnApWx5n7wFFx09aAUbBAwB2oiYvMxbnj\njPSqECsEVi3cYWoFjLIVyBzyM1L8jId/zID0FK23bwoHH40ARtk8kZVeKdgfeUbRS8D95yQB\ngCmZJ4X65pDFI6o1HmDKBhgDhfrQu8Z3HI65oDHqvJJ6UxCP8rHjKtxketOZFXhOeKRso+Wy\nB02+hpfLG0KDwOfagY37vzHp0/yKeyquC42P125psmGAIB3d/ekj273VkPlkYBPY0gJGUyNw\nQTjpTAoVSFwRjkU5flIUHjGKjVz8y4A5wKAHKBvAXPToaRpF2uF5I6NQ0ziEhwAM4B9aYzqy\n5Ay3ANMLi7sKrDnPY0gZlbA6k5GaN+TsYZToPalwZGCvwg4A96AEZWiOSMEnmnBkDHnOeuKR\ng2/BPHSmjEfzbs47etADto3bVzj3pR+7zuHakdvmXIz9Kco+Y919TS6i6jY5Nz8cqBkinGbz\nG+7t28ZNNbhS4HydPrUske5U2kA45FMZBjdjLc9QPWkY99uWJ/KnSDqC2OOKauflx93vQQxX\nHk/xYJOcUYTzBJzv/wA8U3mRGzwM5BNOKny48daAHMuOF6k5xTQO7HYue3ak6uRnHrR/EOdw\nxgCgaHSEfOM7uflpEyc5JElI2BISF+v1oXczALyetIoaV/dtgHIpTGFYEnpzmnjd8w43N2pG\nUMyhgQM8qKYxPs5ZgyvtPWhchfLc8sc7qJEwWIyDngUrIJHUjqP4qCRVPyvsGVFRp8i5LZPZ\naXZIJHCg4I/h9fWk3BthaIiRR270AP3AxEDgkc8UkhbYuE2tjA9aTay52rtZueadI5ZlULjA\n60AJ86qYzjGM89aar+YpKHacfxVMq+W3PzBh0HWodwZsdQOhxQA2Niy/NlVPWiNkb5UzgHvT\nixCFSMDP3qaz7SMAeme1ADv4vMP3fQUNIGcog2jGcmnMpUAdO9N8zzgxUY+tAAkZkTJO3tg0\nRx7Rknc46Ypdx4ycjoaFWIBsklPbvTEM+RuSxY49KftIKqFyxHGewpI2G0MFyc8Chh96RvlX\npn0oGMVdrMWPP92nNlWQA7/rTFYSMSAT2xTtxkXcy7WXgGkAAmMH+NF6U4lWjzyT3ApnzL34\nz0p0edr87cmgYFi0IAGeefWpfOXd8i4GMHNRsiRkbsknnK0o27C7DCdKCSQsY2Gfm96jkIZ1\nJOOfShceX+oPrTHcj5cFlNAAqtErBm6t1oy+1hgEnpxRgsvlgZI7HpSMxDBWPTpimMSP7pPK\nvjmnLuZduAW7k0NiXkHaM0M7KrFiPbFFgIwx34yVA43U7IkyvKkH86Ur5m1cb1PJx2pyt5e/\n5d3YYqdQE+9g7vbNNXC4zz2yKC5ijAC5pygMo3cA/nTAV93m7QRgcmkdjIfNUMB0IbrQyncS\n4yOmaUfNgMGz7VSQhu0yKFIOCaCqx5Jbnpt68UhZZCVOSc8AU3b8xJbCdMd6kYMo2ZByFPFO\nkb5sFTzyD70x9u5dv0p0jbWIwWz0oEKzJjMnHovfNMkPlyABGLnnb6U5v3m1vu7e7dzQ5fcM\nH5s0DGFht3lieO3anqVbBBxx3pSqry5z2oVVEmT+VADdyquDwc9aVfM3EfeB54FNWQMjjHy7\nsmnNmRdwbaP7ooAUqBndg57DqKGbcoOMEjBXoaPOjCjg5/rTCm7KtyevvQMQt68Ff4aegVVY\nupZu2KNwXou4EYpWH7s8E85IoAQARMMnbuHIoIUgLu2ikKhCQ3Oeimmqp453LnpSJZYbKk7C\nCqr92omULtXnc3JApgBeX5PkBOMmn/MfkYHGcZpiEabAZCVGOQM0xg7KrFP+An9aeYc9ei01\nYfMkABK5/iNBQ1VDupC7FJyuKkYlYHdPvBuakhUBnUcLjKH+lK3yqCwwMcrTQiusiu+HXJxm\nntMVUDbgdyBUnyquD8zMflx2qMyyKp8rBGcZPY0gF8sh9oYGNhndTy2MovQihlYMP4kxhh70\nxt+9fl2le1AidoQ0a87Qo5qNtvlnBCjs2KlTbPJnO3HVfWkZSx8uVdnOVNAyruHyiPsckVMZ\nGVjxgHnmlaP5yeBJ0qKRSygE7ix59qYxWTzIyNwBX5s9qYu2TDnjH8qfKyL8uzaKUqGZMjae\n1JgIrCFSxw+7gfSjo+0Hlh1oZV2liAwzj8aF3NEWbGfUdRTAApVd4OQDytKuW37Dnv8A/WoX\n5myDhelMC7ISeR83BWgBUQ+WWPLev9KHi8uIFDweTTpFKxgZwDzTVhYctnB7VPUB7RFmDE5O\nOgpygsuCRQMbcqMkcYoRl/1aJkscYNAAsZkw7YAB+73qTzN27spxgUxUO2R3PK/LSxEqoUjP\nHWglkkgDzglsbeOPWhZ1wy7SRzlveo92xgAWDZ64zS8yKFAwTyTQASTFmCKDnH4UyX5RkLgD\nqak2Dp/F603jy2x97tTAaLjbHnkjoM05mZdv8WByPSlwNoWQbxjPHrSLH5Kleob8xQNDDI0z\nBWXBzmpHYZZ1GFBwaakm7Py4K8FqbCxXec7tx6UxMVxuYDG04yaczDcjZPHQUjKYYum09880\n2RmYxkDAxxSGh/O4L0B61JkQgM7ZAPp1pqyHsAdwzTTllKj5h60DHgIuWblT0x1qNtobOMA0\n9l+ZCGGF6j1pVK/OoXLv0NIBjMS20JhQcGpNwVsDOMU3kffO1ejYFJu3SRgYZTzxTAf5a7Np\nbCkc88g1IGdcEgNheKhjjWQMdmNrZqTIPDD5uoH8qCQlddqBhuDf3eg9qVE8ticZTH3ak2mZ\nTkAGmxsiKwYb29KB6h5Y/hfgjNLtDRg/dC9u9OkUMgO3mo9rNgDjnrQIf87KWxvz0HSm8LCH\nPMgPOfShlKrtJLox/KnSErN5ZPCrnJ70APkLRrkENuHCjnrUaqqKildzZ+YUQ4Znx8rDpmpp\nBshSVMM+eQKQ9xrYjmbIKrjk0joJIxnO4HIPtQzszBX5HU5qWRiy7M7RnmjUCF33MDkDFIq5\nUEdSclvan7I24KbR2NNhYqyqCHQCgZIyBWUdVY8KKiCvJwAFwe9PZs5I+Vh0NRxxssYZm59e\n9MkfcHzFGMK2fSovL/eEbQVbuaXcyrliP96lbhfmJx2agBu3yyVZs+h9PamnPJxhcdvWm7nY\nFcg/7XvTlXzpDtP3Rhj7imA57h2jjVlxtpY4TvZw2O+e9Qq3mZGct/dqX0PtytMRLjzFLHdx\n0PrVqzkbcXDcEY2+9VmkK7QhAX3q9CEb5QMA98d6kZq2k0uVUgbR611WjtKy/MMqf4scVzFs\nyzxjqP4T9a6TQ7grH5YU4U459qTElqddoliV+Zh1616B4dEsYLDkkYri9BcqqsVZxnJFd5pr\ngsGUFB2FZmiOt0thbFpjwAuT+FeKfEDVzq2uTzbs564/lXrV3efZdJmdm+YivBfEt9HHqT+S\nxZvUVrS3uZVGWbe3kex2lmIc/dFNjaO3yu7aU/hqG3vpoGhmJ7cBuhqyJFk8xtoed/mIHSuy\nxyHWeEdQljvA8RVXXnn0r3HwR4km0PVLPU7WXbNERIvPcdR+Ir5q0nU5A4TZsyea9O8O6+0X\nkjkuvGWHGKxqQ7G0Jn6O/D3x7Z+P9DF5bcTx4E6f3Wxk49ua6GTnB718Z/Cf4jSeBdUN5BKs\n1rPiOaPdwMkfMQPSvrzSNYi1zTob23kSWGZd6MhyD7CuKUbHWmWMfMTT1baeBmk+7u+tNVu9\nYWNCRm56cUqny268U1Txz+FL97rxQA/eAppqyHHXik9u1N9uhosA/v0pV5PNMJZWAxT2yFzx\nRYAb3pysc+gFRn72T0oUmgCxu+U0nI57U1WwvNAbcPapGSFzs44NBbdjPFRljux2pfvHimAp\nbaM0q/MppqtkbTS7cUwsLGBUi5Bx2qLt16UokO0UriHn73WkXnJ5pF4G5utLu9DimAqsWxkk\nDvTtvTmo2JbPSl579KkaJN3PtShuveojnaKVXHGOtAyU+3NAHzcmmhh2oNACluOOTTqYr4zi\nlWgBCflwKdu3EUK3tRjnAoAdHlW46UHOc01WIY+3Wl3D1pDQ/rg5pS2eo4pq89uKXccYpiAc\nA4pVYgYIoU/L6Ubj0oAXIJp2724pgIVulLu5PFIA7ZBxShhnGKTdjGRSs3Q4pdSkLz6U4Zpp\nbdyDR/OqAfx/FRuGeKT60M3pSELtPXPFBG7gU3ceDTt2ckGlYQituHSlzzx2oo524FMCPB3k\n0/gUgA3AZzTipDY7UhgzYxml3dz0prMvGRRu+XrSKHMQOaXnGc03d0G3mnN94E0wCgfe9qN2\nego29iaQBkHg80/gLwcGmUshG4U0AjAhvVcUmdvHOaC3YcClHzYJ602MXHpxTlBPB6etNX3q\nZRxhuhqREacqB1q1DHuXPf0ogjGemBVyG3CguTtC9f8ACmARQ8jI68Zryn4/ftKeG/gPo0wl\nkS/8QNHm3sV9T0LYzgV5v+03+2zpXwptbvRvDEkOp65s2vOGzFbkj1HVhnpX5p+JPiZqXjLW\nr3Utcv2v76bLF3zn8M9B7VpGDZLkdD8XP2hPFPxI8U3WsajeNHNMSiwqcrGDn5V46YrxiVTc\nK0kjuWkbd5jHJzStPNeSPOcs55PsKje4kWHyImBBGQ2K6oxsYN3GXSrGwbeCSM7c5/GqZ8uZ\nvKVGLfeJzUs0XlzYxliuTt7VXUhVcKDu/iPrV2sZjpJA+Qy7SvQGqsjtEqyDruqbzjNbbR1z\nzuqGSV4lXkeX09aYCTKLpiyBeeuKr+ZLuMYKrGgwCeamZSqo4O1D2WmyIgYsoyO9A+pC7BLf\nJbIJxupJFZihVcnpx3p8+35QV3qeNo6fWkYGRTEu5SvJoGJ5LW5IPDehqMs6yDCAN2FSlXZQ\nHYhehJ5zTJFCxAljnOAaXUBA28y7Rkjg5NGFXaWG4E5DD+VCx5Zm25XuQaPuryN7EZX2pisN\n2fvXZuVxkEGomX5mAOSOal+XYcAlT95u+aRZByBjHdqA2Gs/mW7Mfl9B60ivv8vcuQRjHpmo\n2QkKoPU8elPkUPImwHOdrc9/WgNySNPLmMQUE9M9q1JbFBDGWPlyDnax4Iqjb3DWLGZSspB2\n4bpmk1G+ub6YsxCEryq9OKBkF55M14QjEjvVGSXbJIgGzj+KnBY41DyEr83bmoLhleV2/nQA\n8SNtVmbKqKgj2DqMcZzTXRWkVSxGBuA7U6NdkhLfIzA0AKF+0rlW/HFIuCpVfvYxk0SK7bU3\nAjH8PFLIowIwMBDgjPJoAesmNqff4xip1i+Y52jA45qBeIwQm0A/eHUVYtkVjIIxvIG4k0DH\nspkXMZ3ZH3iMdKbMy7QScPjFPjV5N248jqe1NC7kjLDI3cGgdgbzIVQqPvH5qmZI9wj3ZxyC\nOtNLeWxCruVmxn3pJ1yo2LscHnnH60hWHSMNwCMCw7Hg0rNt+U8sFyTio/LLdOc9Nwwc0bf3\nwHQfxA0wB4yuxyQyelMTZGpZj8248UojWYNwxXPbpQn3lG04z+FAhk0n3mb5MdG9R6VMGbbg\ntsAHyhaZJ/pFtiUqoycE03c0YRhhtxx81ICzHJ5kf7xtgzndjrWx4b0ubWNUiht1bdM6ovy8\nsScdKx7X96ypnGenpX2h+wN8AH+JfjKz1K7t2OmWB3ztt9RwM9s5/SuWcjWEbn3H+xF8DU+E\nfw1jubyAR6rqAEjB1G+NeRg+5r6NZgzNgY9ajjjjtEEMS7IoxtVcYwAMf0ppkLZx1PpWGxuh\n3C8YxSLls549KQt8vrSKxZgDQMGJHHanHJIpJFPTNIclcDtSCw/IU4xzShjk8UzadwYntUme\nKAGlhwe9LuwD0pu4dMc0L830pgODcA9BT8g0zHyjmlj+Y47UIEL1Pt60v14pPbpQoJX5qoYj\nfdIHFO5VQOlJtwQetBbcelABxt96QHbx3py/SkVfmyTmkA7aXXJ4NIu7bjpShvypV+9yKY/M\nZt6elHKmlDN07UnKn2pDD72TmkX5gKV+mOM0nI4pi1FooooC5mbu4OKUtuWhk5HpSsOmOlZD\nI1OTtxT1QDknn0pR93p81Lt7mgQpbJ6UmN2eM0x2CtTlYlc9BQMBnd7UM3pxSrhlz3pAAylT\nQgGttVcnims42gD8KJC23ZjgUmwNyFxQwSJN2evX3oc8DHU0cc8ZNJt6HNBIjZyB0FPTnr2o\ncfKO9NV/mHFUA7cOT0pQp25z+FRn5mIK8U4NxknmpYxu09aeF+U0BuOPxpdw20CGsRuHvQG2\n8Y+lDKcgGhVKqSTzniqEgXO4U4t8xBzjtSbu/Q0ctyTkUmMXcFPrTfmGSaXjdkdaTnccHimA\ngc7SKf1U49KYvXGKcp+XGMn2pAJuyAKcy/J15pFPXij7wJIpdQCM7RgnGak3Kqmo1UEZzmlX\n73TIqiB27oaQy7RzTgw2nIqMr5nNJlIUN5nbA9aeqjdkVHkrhRUmNi570xCFt3GM0knYDrQG\nO7I4pWYHJ6EUAG7zFyBijGOnXvRuCYJPBFMz3HSkMkXhcUgZVJGeaOe44pNu+TGKYXFZt2MU\nr42gnrS42qRTQpxnrQIGzJtPpQrZyCOKOeaVtxTAoEJz60dBk0nOcAUhO04PIoAA25RzSBhz\n60jY7UuwbgSaYyRVDMp6c1leOdNXVvCt9E24ELuGBnOK14cMai1CMzafcQk8NGyn34oEfCPi\niHZJMpXlWK/rXmutW5l3MOjcGvWfiJayW+o3SkFSsp+U8HGa8t1bDcIeQaoDjLu0NvGziTnG\ndtZkgHl7upx+NburQ7VyRn3rn2Vlzg/j6VRmyspL/Iw981Rv8opO07farwk8t9jfU1TuiZMq\nGJGeKsky5FZlD4wPeovuuXOASfSr10weIBR81UH4UsenTFMTIL1nwXzhRxVZ48YbdvyMmrUi\nHaF+8BzmqssZL4BIUihCKzH5SwP1qGRjt2gbR3WnOoVtuc1CyvkgEFgMn6VVxDZGB245PSq0\nmCepyfWp1yzZ3dOTTZo9u5lWmhFWQDqeQx5qBlA4zxnirckgMYGQN3A9ars20HdjIOKYEO1M\n+b5hHYUjb41OfmA5P0p7sVwjJhW5psjD5cHLY5H9KaAjXYrKF4RjkGlaZULKV2rng9qCyo3y\nDnGcelQeb5ud696BEbJ+8wG5Iz7UscnljcoDY4pWfzJNu3IAx0pkcRjJ3NhB1poBGb5MgEgn\nkUeSNvzH5sZAFMXkEK2Mng+tIrHzCWPtVDEkzuXK4Y8Uka5+YnAxjApzMCid3yaaGDDceoXo\nKmwhERRtdm4zx2pknzyO6Lt46UjfvEUMcZOcentTNxjlcgcEdDSHcbGpOG3cAdKep/dgk7cn\nFRMd3P3T1p+NyglsiqEK0g3AY56GiRVLbfunoKXb8p2jJxmopNzruB4AyfWmAcqxUKPTNNWE\nDqxbHPWjAOCBlsZzTWAyAc7jQO4JhgCDkA4xSyMyt5Y43UsibV2nr7UzzFEY3A/1oAQMFcR4\nyaTlmJIzzSYCsTjk96eoEe0k4BNG4DdrHtuFLI248Lxims7Rv8pwOtEn7sli24EZo8gHDDMM\nDaAMbabtKqSTj09qRZPMQYyT1zSzf6sDnd1BqgGt5fyhBkA8n3pMsu853FuCadG63BXbx6n3\npVj8uYnselAiFVUoQARgZwaRWBjQgZ55zU0inZvDYKn7tNZTnOOvb0pdQGzMgbAO0H0FG4Rg\nhjvXHB96NwUgj5sVGq/uzg43GmAit8zDsB+tO2qvAVi/c9qAmIhzntSbmjyxPzdhQA5T5jFu\nc4oYlcNsynf2oVlZRjOT1pF+Vtyj86CkNVNudowTzupPmVuSCKVwWjLZ5NAQlRx26UCJOBkj\n5lC5NRt8ygqcBhnNOjKhSQ209CKCFj3qTncMCkBC2Dgg5I6U5W25LHJb+Km+XtUKOvQ4p6fK\nSMfLj86QCJH8p2nINKy4xkZGKaflwTkD0FOCiSPqOT1zTEMXduIYcDpT13BAvY0m3ggngDrS\njEiKTwFPWmArM6r1FQtiT2YCpFZWaT36UgOMYGT3pdQIgW6EZpWbzIz8uG9KfJvjyy9elIrC\nRcE9+aOoDWbd83AJ/ho2+WQSNppFVfmIXaueB3pzDIJPzbfSmINu3O7+Kmyds/KPWnrJuUYI\nJ9PSm7gAWPXNBQ1jhlZW8xe5FSrjfkrlCetNXHO0bfVaYjFuf4QeBQPoPEaqrAHJz3pJFG0G\nI4we/rSZDsBg8nNDKq7lfgE5pEiIGOQOG/vNTywbao5OfmxURION2dualj+dmXG30YUwEk5U\nt/CD2pn+sb73B60/cfLZQCR69qST7oAHy459qQCKylcLw44FIvzrwTkdaf0BCDHHBpq8AsOB\njJX3oQ0NTAG0KcHocUSDlWLBcDBNP37lyG254x70hwYySo+XjNMnqM27djKdylsGnN8rZJ+X\n0py/wdAOpNMjVfMIYnB6ZoKE3Lu68Uqqy5AbC9qFXfuUKCfWm7WwO+OPagCXdtbDje2OPSmM\nucYGPanHK8+nAoIaSMJjLA80ANXd8zY24HC0gbMy84DD9aXzW3ED5T0GaQ5EKtnnPJpCEkKt\nIFJKn+9S+SWZBnj+dKzhU3Ou/ccACkaR5nxjaFHFMBjfu3aMjI7UfdYr3xxT23RyKDhi3vTJ\nEkgk4GT15pDDaMYQ8DnB9afI+Tk9COvvSqfMx/Ce9Nzu3l+MdqAGb/lHc9DT1iy38OBzimiT\ncoZQAD0BpHy7h1wF74pi6jlYeYdn3cZwaV1cxbguUxyF60nzLHkJwT1pFPZSV9aQxu4iPady\npjIBp42SKHxtfGMUjfvMAj5RxSMRuLfeUDpSYrAcL9z5h1yaUy7pJNvyrjjPrSL80a5bBbij\nhcrw56jiqGNZSYwuchjS7k8tSTyvek3GBVyM4FEeY4wNmc84oJsL8q56kt3pMyLyoxjpSruj\nA5yx/hpGbcu4nbzjb3oGLHMWVsjf3JFNdT8rhWTv9acMIwCn3HpShXbLbsp/nigBGUgBwuWb\nkihmIXg/N+tCsoYlyytjt0pq7WGW3A0DsKzNEo2/MzdzTvl2ej4ztFNZiqrg0pVmlyCA2OWo\nAbHIyx5DkVJIwZBk5b1pGI3DIBHTp3pmdpwF+Yd/SgQ+RWVMLyvdvSmjMftu7+tKFLZy/UZN\nDpls5yQtNAMZmYgJww6ml+625vvY6jvSFSq4VuT1zTFk/dlQu4Z5JpAPVk37859aXh2ztwvb\n/aNM43L5a57YoZSO/U7c/wBKfQCREEmeu5evP6UyWYMwEY24IzxTuflY5DAYAHelJDDcq/Mv\nU1LEG0fMwP51HG3Bx8obrT2Yt14NIGEnAG0d/emMGUNjHBBpzNJsIIGTTWypGH3KRjkUSK0c\nifMDnvQMRmViqopGOwoGVjJzu56VKuBId/3MVCrs2IgQzFs9O1AuoNu3Ajj8KVi0eWYhs9qW\nRcM2TgA9zTB8x3E89hQA6RvkRlOGzwtOKiaTAyCOTmmeYvmKrLz60KzRsynoexoAUq0hJC47\nc03cSQoXkHGaliCyD5mKAVF5ikbl4OcUAL83zZPsaFZljwoDY6Z/lTVZju3cj1p8chUFTgHG\nc0CQjfd470sabGEjHGBjaabuWSMOCQc4IP8AOnSfMCFbzB3pjEXAPHyM3NLJ9/cPl2jNIcth\noxlQMc0nLbQXBx1NIBS43Z25UjJ/wojZAob+JuNppdw4XPGc/Wmh0ZXOPm7cdKABtkjBctg8\n0rMVUjcOPShWRFJ+9uHbsaY2UJPXHHSgCTc0KDG0k/xelRlsEADcT+tSCMKocLgLzz3pjbm6\n8Z5FBI2TaGXOQe+elPZfmDqeMcGkVip5GUIyppgPyEtnBP4CgALKyDepJBzuHTNSbyJlLKw3\ndP8AGmOzLgA9+U7U7593zZbPYUAMwG3k/Ng8c9aefmGMcYzn+lI2zB2/KOnSnogJUZBAGaBj\nFYLkLhUb+H3oXCkgD5hyTmklYqwBwATnIpFIV85yW/iFMY7hULdQ3akjkC5Xmn7WZkPTjksO\nKYQ0jhAQCpySR2pDF3YbaOBjNNV3kCvwDR1YlmC8+nagoJOi+WBQIkJE3LLnnNNkkKthOWPP\n4UqgLH8rYYe1LJncoO1t35gelAhqMnGfnzzxT1fzN4B2yAcCmrtDNswGHHNGVVAXO6TOOKBW\nE3blDjPoc0N5gUqxyfapGbLKrjOT97tTNjLIxDb1zjNBQjMyxgocBvXtTmkbzEVTufHOaTaU\nRsEMM9KjCqqEYyxPWgV2h/7yRSQPkz+VIM/3cNnr/WnxK4DAjcxHORRu2rkct0yfSn0ERNvD\nHnn0zSyEyc7vm6GpFURgAHnB60MuV2kDd1zUgMQ+YwTPygdKnkfMK87iDjDdqYwCjGB9aTcp\nwQCAPbimWIzHdk9emaiQFWbBP19KkVnETDpzkeuKFiYs+85X1pgOVXkdSAGAHemEuWkJ+XBw\nDUsKIynnZIo4jz1prYkwArYJycUiegxcR4AHzdT6UrseVX5s9dtLiPzGyGeP+96Uu3owbPoc\nYoGNwpZ3GdgHb1pY0MKueWzQ25X+7+6YY/GkQt/C2F759aAHyPuhCuMN/KkDvIu3I3L7UMpz\nlhnnmnJj5kYlD0DmgTEHliFskhmPNLCrcBeVAzvFNwUyjsN3anbBtUEkc87aBD1XcuAdynk0\nS75MbBtAoDJHEEiPz5yVPpT227SqqQ56CkwZCcrIqnJ96UebuK5HH8WKerFVXccEHBpGJ3uA\nwJByMUAI0hbP8D9NvrSKmIC3RieKazbmBJx9fWlYnOOjdSPX3pgPjt2SQKHBLc4pjLypBJLc\n89KFk6sqnPTdTizsqpj5fT0pDE8tWUsSQB29abFs8wnkrjkf1oXDSSZbPGAKVfl3diOBVCJF\nY+SVY7hnP4U1o2aYMDtjIximrGxYd1xwP6UrZ2DjaBxzQxjvJSOQc70PB9jS+X5ali2CD27i\nlaMHy9vKd6TBVhuH/wCqkMesCtuIX33U1lVsFWyvQ05GZFDjKjOMGhSiq7MeCeuOlADQQzFV\nHy9P/r0i4XHONnHTqaMLHjadwPepIs7icZXtkUAhFZuhXPOeKcJixIPDZ49Kci+ZI4PBbv6U\ngAjITdwTy1BNgTbuJJ3VHk5aQt+FOVWZTtGO4zUjruVUY4HXj1pD1IkmMybiSuOMVZjjlwd4\nwpHBpixjb8x47t71LOrNGAOmfWmIb5biPC4/Co2UNgHI96mWXZ8gU5PTFPjYRttYZLDv60DZ\nVkG3Kjk5yanUMAvRV7AU1v3ifIcNnBp0XGwFz70CEDoGAYFi3twKVlO1mHzY605tu7IPJ4AN\nOWRVUqVwc9fWgZCqhNo3EjrS4+VnwOuKe2SrMF47Co5Msqhhsbrgd6QLzHiEs3zkRrUaq3Lk\n/KOhp/mMfvKSOtMjRkVmI3hjkfSmINvnR8ct0Gaj2krgjDjinvJ0P3ARkL6UjLuYuH4X0pgR\nhTEpbHzetNh3NI67dhb5jnvSb3GDkbc55qRl+VJMtvPPvimAMiF+FHTlvWpCwG3A4HH0qrz5\nhBz9amG/y9xP3eRR0AfbticRuBsPJJrRgjVWKjJXsfQVn/LJhyMnrj1q5ZNvYgAqDUiNzT2R\nSB+VdBZyeXJGenPNc3YRjzEY8he1dPbol5sCLhc4zUsEehaCpa3VhyCK7DTpB5ZX+P0FcloC\ni1t0TflSNtb9u7R4VGzzWZoaHii8jh0mViSABtz714pcW4+0yN5gbPOK9G8c6of7NeGT7zni\nvJ5pJNxZFzJnHFdFI56j1SNOOR5lUA8ryBWlptwVdjIFJYY3ZxiqNnPMsKySxqH24K4/WoFa\nS4m2r9w967Ec7OgdlijV0bDociuj8P65ujkEo37jxzjFcfpiqyukso3Kf4u9Twn7PdRFGO3o\ndtTLVAt9D2Dw1qgt7pk3FWb+LPB9q+mv2efiq2l3C6DqMu6xuHxH/wBM34AA9K+R9JvIYVWM\n5Lt0k9K7rQ7oiSOeOfY6/eINcc4nZGTP0VdCv6c56+9LxtJxXkHwL+KyeJNLj0C/YpqNqhMb\nyMP3iDtknk166p+Udx68VyyVmbph70hbdk0rfLwRTM+lQMVX96ezhhx1pi/ezihutAx4+Ycn\nmjqvHWkBxQDznpQAvuelLwzccU0crzR/D1/CkBKxwAM5NAyqgVErHGSKUsWzSAc5/KhWbsKa\nOlSbvlwOtIB31FOU+vNNBP8AFS52gnvQPUQ4U56k0vmZ+XFNDDjA5o8zqMc0xD9wZgKGYEk9\nBUana3Wg5Ybe2adgJVGzLU9WFQ8+uacjDv1pFD8/N7UDAbkUqtuXnik8wNzj5aQCjjJBoLFl\nxzmk3DJ9KOeo6UAPi+7jvS7scVGrnH1p3O3igB4FL9BTN/QAGnM2KAFzjpRnPahiOMGkDd6C\nkOLkDFG4AdOaZ1+alxnvxQIccsODinKNoHNM3Ae+KG+7k8GkIkZwMUjNhs9qYM9+aeeFAPeg\nEPVs8kUjEshpm4npxS88ZpjFTjGadUfXig5VqTAk5FG4g0xsqeTS53UMRIPvChSVJGM5pjHa\nopGbkGkBI35fSgLgZJ5ppO7jNOyFXJ5NMBuMdDzT0c455NNY9CaMAtx0pMBzZZOOeadtC4oU\n/KMUnTrQWgbd1xQM8D1puS3elORg9RmgB24qcUbscGoy2Gp24nqOKAHBvlzigvhs9abu3ZAN\nAYdxzSEIPc55p+QWHahU9BUkUJZskcVRSHKu5hnip0j3ED+VLDEX+ULuP+Fcr8Uvi14Y+Dei\njUNfv4oZXB8i1BBkkbGcAf560WuyWzrr28s9D0+a+v7iO2tIYy8kznAUAEk/59a+Cf2pv22J\n9UtpNC8FtNa2xYxyakrYMvAztHpzXjf7Q37YXir4uXT2FvcS2Og9PsEGV3+7HPNfO11q15JM\nFYt5cQyC5zitowMXLsS+KdSuW5uZvOaQmRmY5ZifU1ynneY2QuwY69KszXQm3u7ErnILZOar\nSZkXJARs52j0rpSsiWNjkdYgyZB/nVeSUmUYOH7tVln8wqoPloo+90qiWDOVU5X+8BTJEkYQ\n5yTjOSRUYbzcqrYB5H0/xo2vuKlfcMfWmNtMZYgiQfeUUwEmkf5k4HHBpPMC2+HALDkcVHuE\ngO7DccYpI0kbKtgbRwKRIfvJn2qpYhc7VpLORJbeSPYTI3APvTVUrIQufM29c0nzyAZ+90+W\nmMQMhARdwZRk8U7cWiYZ2lunrSLEqsUU4zx7ZpxR5o8btrJx0oBkMkn7tYyT6VG3LFSmCBjn\npU0Sx/Kwz5ueSelL9ndrd5JNoXfgHNITKz+aqoE5XOKk2npyrxjOTUpXyygA+XPrSSM0YD5D\nMei0DWww7JyoQ7H6HnjNQReY6HKjk7f8ak2GRWLLtxzUKFlZAGwG+6fWmIcW2xqS6tjgAdqi\nkWWNtpwC3OaZKqRL5YGdw3bqiZ/NdSTnjGQaBiPI6qSoPynkA9PepRcNJbkbizdS54BHpVdZ\nBDICpMj9GXHak/dK5MZOGHQ9KfQZYj1CL5BHFlh7ZqrPmXzARs3NnA60sixeUkeCrZyWHFRq\nojYneWB4FIYZO1WY/KvrSyeXIUZcgsw60bjE2MBgOvvSeZH94t/uleooIHbcrI6/xcgDtTYw\nZPmVcvnnPanxriNcDAByAKn2jcxHzL3A4oKGxsZMhE3Y70+Tb5YJGwtx8tG3ah8ole5anptQ\n5I3/AMRU0DJGl+QZfleTx1FIZgWXaMh/uj0pI/l+QqA7E9elHltGR8u09BQArSqj46qvb3pz\nATn+6x7GkYYk5Xcg6/X1pUzzjDP12k44pCGs3zIgbef7wpzRs2cgNn+Md6iZkHCoTjmnrIQ4\naQbVxn8KAEEu1XTGAVxgd6dERJGI92Cozj0pmV3An7jcjB7UjwsFd1GQ3TBpjF2ByqLyPQin\nybImhCL87NjcecVE2QqYHlydgTVy3t1jkCu4kYtwMd/QVnJ2GkdX4B8H3PirxHZ2lrFunnIR\nVAyMZ+8fbrX7ffs0/Bu0+C/wx0/Sooo1vpY1e5lj/jbBx/Ovi3/gnb+zj/az/wDCV6rb7rGB\nlaJZBjLYBx/Ov0lbMeF+6AOF9OTXC3dm8FYrzZyaYrNtA209mLMeKUZ3YzxSNUhRkvxwKAwJ\nIIxRwuccmkGOh4oAX+VO5X8abnHQ5oBPJoAfj5cUN254pF+YZFH3RzzQDFZhngUnOeOlO2nv\nxTFyQRQJDgp6dRRt96M7Bj1pP4sigYoPzEU7lgRmgdenNBOeMVQApG0egobpmmyIQevFOzxk\nimAoO5OuKa2FGaXHfFGD+HepGLH0xng0vK55yKZjHXgU6NR36UDE5bODQPU80d+tKy56cUwD\nAamtnd3oLbcDHFP8s7QQc0w3GYHvRT6KAMyOmhhtGOe9Ln5cdDQo2RgDnFZAhQS3tQJByOpo\n3DvSbR2oDYaGAbOM08SDc2Pu4qPGWx0pIxtBB60ySTd0IHGKajLn5uDSOSeBwBTerA9qLWAe\n0y+ZkH86SRi0Yx1oYA8YyaeF6HtQAwbsDIqQPhsYzSs20+1NkB2+9FgFXDDINL90dKaFG3Oc\nGl2lsZNIYZJOcUm5Sc4pWDL1wRSBfWgPId/KkChVyRwaOG46Gk56t0oEG4cButO4Occ01mGM\n4BpD8nFUA7A25JFKp44pFUY5oDBWyOKTAYq/vCe9SbdvOc0m7qTQvqelK4CAn0470ob5iV4o\nPzHI4FN+brTANpXnOKar9e4qST5l245pI08teeaAAY4xxUisAME4zSZUp6GhFBXpzTFYXrJg\nHAxSfdYZPFOHytnvTHUswyeKAHFhuBHrTmbcORTFACZAzQMnBoGOVuRxSNtbJoDHvgUL1I7U\nhdReGwCvFJgdVGO2KazeWucZpwBK8jBpgDN82M0jNgcE5peMgtwaTGMk0xhuLY9aVVO7GaUg\nbQQOKAuzk5JqWAn3j05FHPXPFC8qTnFA6+1MgMjOc01k455NLkK2SOKdt4yOaBkQwvA655p/\nTJFJt/CnLGRwKAEjbaOasqwYAHvwaqlSGwanjOcKDgUCPkT446SbTxPfRAeXHuJU9eteE6hZ\nuMkJwDjNfTv7RlisXiKSRRgugYj36V8/3Elusc4mHzsCM0xHnWrx77ZtrfOp+761z03yo2fy\nro7638mRgTuXNZE0OckDj0rREsxWB6jGDxyKqXHysAPpV6+YLGFHDZzgVRmPnMCfkI5qyClc\nRlSR/F2qgw27vMOD6VfuG+0NwOh+8KzZd2Np/wBbnrTQiKNlVwTlh2FVLibzsnO0hvu1YlkZ\ngFwEKnmqip98k55JFMRXPzMx6evFQqoZWJbnHWrKx7lw3OarSRoMxr8uOd3rTsIbE52lCB0z\nmm4KyHHIzzmnjHKshB7EU+aZFkZSecUwM65jWRh/D6HsKr5+Zsjay9Qec1YmZd3zH5+oFQt8\ny9MbqdwGSTNJjjBHQ1FMrqwyCOOvpUsgCugPIFG7BYOdwbptoFfoVmVtjOo2Fe5PWmMzPlj0\nFXGSOSMLjDdM5qrdR7Suxup54phcjSRmRmD4b0oyWjJz+FJGhVg45U8Ukilup4zyKoZGc53F\nAQOOKaoDhimWbPINCxlWkAbK4yMfz/CnrGdyqp4I5b196TAhikwxAXdSMrRx/IPmqVoWWNtm\nODwKj+fZzwRVCGbk+7tzJ1NABk+ZxjttpwVGjLEgHp7moyquuBnIPANIQMQHG5QMHGKRfmWR\nVAznIp7MJG2k4+lKyhG3r93GKAGK5Iyp5HBpGj2kNn8KbzHkYwGpFUyMVOc4wKdxibUYli/l\nqOmBTW9SOaRv3kYOCuDimqrbcLznnFAEqqjNuc9uKrruZifvD6VYjYZAYhacuS+ByoFIe5Ds\nDYLHA9Kb8p4J4XkU512lg5+bqKjIHUZIxTQhM7pBu4yvFNbfI24AJt9adJH80Z5HGTTNqliz\nEnPFNiHqnynIwe9H+p/iG7HH0pGYqQpY46cd6MjYdsZb1bNHQYsZLMGCbfVhU0ipv3k7ww6V\nAclQqnb60kkhjxjJPSmIc0itJlVwKawWVT2PY0bX3lencmo2UiRhncpGaBj1UJ2xkYNRNnaN\no4zg1K2Pl+bJ9O1DMF6qcE8YpCQkoVpQQPk6YpJGIwR90U6LLM20bV9O9IzjcFxhqBCAj73A\nJ7UxmZsgjC0GRWkIIwRSrtfNMoAwjUOeh420jLuUleGFIu7cw6jrzQuGzk4NADVx5gYIDx3N\nO2DaDjLdcelHMa7cZz60M3lhflHPH40CGsSxP8OevvTuY48Zyc9T6UHC5A+93yKiWHa6g5bv\nnPFADpJPLOCu/PSk+5GcDgmnRhlb1XPU9qTlnKZwp5pDHFflClqau5lIPCrzQGVBh1LHtil7\n5CEADvRqAM3yg9O9N4Ck46nk0MRIvXbTF+aPDtlc0hD9rcHoAeaiGGkYRsCeu2pVUKAdxOTz\nmmNGq7jgKpPJ70DEJ3NyMDH407KKxdvlA/Kmr90Nu35HGaX+HOMjtmqAj+7yQTu6FakO5FCs\nBz0pzbWcE5zt7Uxcp82M0FCtJ8mSpLdKajqzYIK0+SFlG485HAFR7i2FbBaglj12liCc80bV\n3bSOF9aMZwQvIo3jJOCSaBC7sMN23P8ACcdKRG3fMq+xxSJhWB6ntQxwNyjH94jtS6gJ8x3Y\nO0j9aGbcvtTmk3Y2uG9MUmPLyMZoJbGq3tgUit5e4/ePanBTwT930psjDczYIwMAUIpC7eik\nfMaOFLIAXGOcUq/wvvxgZNEcnzM44HdaYhvyLED1GelOVi0ePvEngUyOMqMEdTkCn/eyMZWg\nY2NfvZzuz2pWjIwDwnXA60ctgFqbE5ZTt3Eg42mkND9w2kqMgc0eYzZIG0EfpTVk8xc7cEH6\nZpOZSMDkdRTBDWjLKCD0OSacWTaR0Ddu+aRV3Jwcp19KT5JcckHoCfWkwHBSzLjoP50kar5h\n3Mc98UjD5QAee/1p3ys2Cdrr+tAhnGNy54PfrRIxjXeWO41I2NpbB2/1pud20kZPehgNZhCq\n/Nkt14oGWVwRk/3qcwGMjAOc5pkkm11Y8Z4z2pABZ49pI/df3qEYKrMh5z0qSZwHUYqOSNjl\nsjcOeKoQ5mD4XdhqQM21iBu/wpsf+ucHGfenqqMwKuUbuCODSAb8sjf3VIoVtp8sLuanYZmI\nbge3amcjkkkA9qChu5Vb5s5BwBTmkKycj6ChiSuGXA60zKhtyndTEO53E4znoKRiZUAyQwPI\npQfmIz1FCKdu3v14pdQHFTI4KcqOPcUSKqsAR84P4U1UTJb5guOeaRWEknKsoIwM8596Yuoq\nqrKykA96ady85yAMk05crMyKMjFNdSxIHyqOooGIu7aTuye9Pl3rFwvWk/hUggcfnT/vEBm+\nXvQAm4/LgZOMUrJgBSfmo5VDt7Hg030YMM+9MBVVWc5OVTnimtIqsxX7pHfrQylV3Bvvdaac\nKmXGQDgbe9AxjucZUEDHWp/mREKnO7rR1wuBtPPNG4+Zxjj+FaOghMnaxA+ZfWo/+WeVPzHt\nTwQ0hySrE9CKljVJCQFwRSGVZGkXaDx7U7azSfMm4Y60+4Ty2yq59Qe1NUSqo5wpPWn0AUqG\nZdjEEDBzSAiNGAOCafuO4oVIao2QNuJG09KQh24sseD9fehmG0sADg8VCq7Y8u+MdAKkjUSY\nYAjPJFACruZgAMr1J9KF2thSvzZ4akLDICKc56+1NLHzBkn5WoEOyzN8xzjjFO3eXz933pAQ\nzNtbIznNRyHcpK857UDHL8zEY3KRkGkXb5R4+b1FOjTClUYE45poZQucEDp0oC4jMfLL9O/N\nLIc7XbOWpzIX8vdnaDninNy245yRkLjigLkTKx+QDk8k07ywseWPIPAFJw24gZHQNmiPJh2g\n5bPJoC45cNvXOw9qaHDJyuJPX1pPmkLYwWJxinNCsn+sOwLxwaYuogDcHP1pGba2UG3Pf1pz\ndgmA3qaGHmRj170hi7pCQP4RSMys20J06+lIFC7dozzzSlm8xsGkAv3XyoyFHWmq4Zj8uB3p\nVzH0Jx6UqzMVbpjoOKYDNy7ixXC9AKezFomyRyc4pP8AlptYgjHNNIYKXC59BQA5mO0KDxjt\nSNM3l85J6Yo2+XLHjIY9qSRMMwUYAOaBWEKFYxjjJzg9hS/MnIGec5HpSuA3zlj+FGd0bbXK\njHFAWEUOy5UAjORmkVwSSxwM9Ke2A0bK427eg9aGURkBuCeeKBWHR7l3hjuXHpUMO1DtKnPY\n0+M43E5z2OaTHmSDcNqj+Kgod5a7g/3uMAU1Y8LxwCeaGUKp2nB9KkcttBUcqOCaAI/Lyqkn\noOBSMd7YPA9aWN2jZSRl8crRgSYcD5c/jQAzcPmxgnpmn7W+UL8wxytNki2scJgE07BjbaBt\nb+9QMcseVPcjnrSfKrYIwcZzSeWy5yuG9c04n92xZe2OetAg+RtoC5ycbqaUAcZbADYoSNzG\nGQbSOP8A69ISSyj7zg8t2piJY2WQSDgAHqaadkceDnr0Hel4WQkrkHrSMxjdV27gT2pE3uMa\nMFuGwBz/APWo3bmUD7o56U/KrlWzweMdqcrFMFRlj2H86CxCrJIOdvOc+lNCht5Lbuc5FP3f\nMSc5qLftjZSCu4/ep9AHr8zENnpTlEggwQMfrTSoVcnqozmk42hxITu5+lIVhEJkJDDAUcUL\n/rCGbC7eB70pGCCrYHc04LnfuXJAyGpMYnPlBt+T92pMkdfunjb3NMVl3IshAUjPSmKwkZmL\nbSp4zVXASMg/IBkZ71IrBd43YPbNII90O4HbzSxxkZJIx696QCFd2FB2rTWZsYb5eMnFOkZi\nGZSF4wDTGUKiMDgKPmX1qrIB0bD5WA57Z7U5vvnzGyrDt2NNEiNHvJ+lPX+6T1GSKkYxfmUN\nuyFOD71JJu4DYz2NRbS2eNv+zT44mbktuIHC0yAjjCtk4b1o3EfdPFNjjbYWPBB5qSTEDAAq\nAwzgmgAm2wzKcAu4wPrTo/lcShsZGNvpTVVXmBZeg4Y9qcFXcd3yk85NIBj4ZS7ZdemBSts3\nLsG1yORT9y53L09KiVlkDE53Z4pALuB+V1yc9qXiSYlCenU9aXa/G5QVxzg0c+WSARzww5J/\nCjUY7B2lDjCfN6ZpjAKqych+u3tinrlmCN0I5pONp25LY2jdTDYamNvIxk53Umzc394k09I2\nZAhXGB3pI16noR0FMQ6NTHIVJ2hfmGaRmzlypLddv9aduLtu27jjGaaylo9/8C/eC0MY4soj\nBQ579KMBZE3HcpGM0ip++facdqdtEihehU8D1pDBl9++MUqMOW4KLweKQ5Vsry9O2vzgAHv6\nUwHR7WjZsZDHApIY2jjZsho8/rSchfl6dKfDCBhHOFz+tIQskjMVGMAckCkk2syZG0qd2Knk\niTlywUrwBUHyt1+Zs9qACP5Vfncyn8KWItJtGcc801W3qyjjb1pkefLyX4J4AoGWIlDyOM8L\n27U5VEjbjnHTNMt1K4bAYL2p7HaBgblb5qBdQjZY7gM2SF4GPWpGYRxgBt0n97FMYRMpfBB9\nBRk+V0K570AEbL99/wDWewoUI0R4Kt706NnhhYsofdxSLJtTeeB0JPQUDEZRJiQDJXtTvNy4\nyMqajVTI7Op+RhiptoDBVU7VGSCeaBEUa/NIgfftP3qJG+53PdvSnLhZHATZv5pCpt1w/B9q\nBMkUFmZh8pA5qJmEe4nnjt2pVDxx8cgnPvTJNvUjBY4oHYiwVi+bDbjw1LlQuQpz3xTjCylm\nbBXpt/rUeGUKynbk7QT296BWEYF48kY5zk012+XdlhjoafhtzI3Y8+hpFZjkghVHBqgCKQso\nQLgtzyelO5848YXoaaqj5gq4OMg0/wAl9qjcMnBApMBYWMKsdpYg1fspnMIOApJ4B61nzMYm\nLOOnG0VZhZpOQOVXNIDobH7xx6ZrqNGm27VA4HIri7ORiNrtj+tdVo9yN2wDdzwKhgd9Yxy4\nj/hB5rpYZljTJGNoyT2rmNKvi0sZXqoxhvWtG81NYLWTkGR+OtTa7Gmcx4zvzJcNIHyBzjri\nuGmvHjmMsblc9GrUvNQ3T3BB3DnaGrAikdjKkqhUIz81ddNcu5zT1Z0lvqgvrWWNcMAvJ96W\nzkIjT5uSOtcykyWql0kMcbDJxV+x1MXkwQKSABjFb3MzS85pGJViJQ2AOxFbWl3b+WQQA68D\nPNZl1DCGHlncfSrljlbf5hkqTx3FAjutDvTPa/ZSu1lJO9jXaeD9Ygjtp4rnMcyNhWPcV5Pp\nt5Jv2KSTJ0Fdtpc0FxCPlK3CDLKx61nOPU0iz1/QNel0q9tNRsnMVzC29HBx/nivr/4W/Eq2\n+IWj+cCsV7FhZos9TgZI9q+D9Pvz8gL/ACDHP9K7Lwj4ou/CeqR39jcNEykNw2FbHr+Fcco3\nOmMj71ydnIyaZtPTvXM+APHVn440GK6guFkukAFwh4IfGenp7+1dOflJBJ+tc7jY2uJyORSj\nB5IpOecUH7uO9SMVvlXdRnK5pv8ACATxS57CgY7O2jjqKRmH40u72oAVW9aGYNnikOM9DSdM\n5qeoD4fm69qVZDuPHFM/iyKPu59TR1Ak3c5pGcnmmKp5psmegp2AlDGl3HqRzUDbgwxzUu7f\nxQA7cTzgUqk8Z6U0fLwelKGGcZwKBijhutPVhtyQd1M3DPHNO3bmpWGLuIFKGHAFNDBuKRcN\nnsaLASbuxGRShiF9vSmH5VApRIMY/WkA4t8oI4pytuXINRqxGB1pyrg4FADlbb1PFKkgkHNI\nVA/rQu3nFAh7KOB2peQT70wt0o3YxzQMccr3zQPm60meMmgNvyB0oAXcduR1pSTuyabu28fl\nSZO7nigCTd3FLu+YE03f2A5oZiWz0pAP3Dce1N5bNIG5zSZ+b2pAScqo7mjkgetNVj2o55J6\nUdQFVtxINLuKim5HWjO5dvSmBIcsoJNHWmx424zRu3YNKwEm7pxR90/Wmrlm9qVmBxmmA5ue\nKTdyKVfmyR2pGkDcgc0gFaTHFG4kCmNzSBitAyRuuRRvLfSos7uM1Ls28DmkAu8HtS54HNCr\n+FJuB4/lQNgPYYqTyw2DmliVuR1FWViLKF2jfj6U7CGxoG9sdeM1c8sWkTTTOsUSKWeRmwFx\n3PtXA/E740+Evg7pb3mu6lCswB8u2Rg0jnPAwPwr4B+P37aPir4rW8umaVcnw/oRB3w27Ykc\nZP3mHr6VUUwvY9z/AGmv21IfDNxNofhWTdeLuX7VCQVOQRnr2r4N8dfEjXfHepPqGv6lNf3U\nhwplckKMdh261ywuwykySySOCdpc54zWdeTASbmPVcKvoa3jHqRKRLeatctuUz4GeFVapeZ5\nah4yzlj8+5qZfSRtHEF++B82KrO3k42/cPO2t0kYWLc00F0xEfyjGM4qjF8mAwD7W9etIsiu\nCGDKxP3uwqCWQKxAO4Z+8KYai3jLNKZGPLdIx2qs/wC7j6bSOQFqzDcIkzZiypGKguJEjUIg\n3Hqcnke1F7jIA0s+4AfvAPvE1HkMBtBDL1z3q3Y2M10zhDhkUlvaoGj8tnUsAynBGetGoETM\nnksxwvpUJJj2ynLHGAtT5WbIVdvHOaikRV24BIPB5poVmR+SJF8x2MZ6Yx60kf8AqSUDDHBb\n1qTyV3IrZ24z17051whAbA7DtmqFsJ9n/wBHT5trdeetNa1PmBDLuduvPAo8szqUlIypwTTV\niSRfkJXZ1PqKQbke2NWEatgZ5arDOzW6w4BGc5P86Fmik8sGBto+6w9femyQuMvu+XoaQDWD\nzAtHyV/WoSAv7/O5l4C1LtaGNz1BG0BahMQZcqCu3nbSKGSyHzMl9znkgHgVE0Zlk4I2qflb\n0p7sGjZnhH4dfrUDZ8ndn585P0oEJLJ5ilWAQNx9ary+bNGqjCxo2BgDmnz7GjDBy0g5AphC\nyBXAIyPXHNMqw2PKtIuQuTTlyN2wKz9NpqKQGRkwwTb1NJDG0jcnKEE7qaAe0xUKjfiSKbsG\n44HPXcKayL5a7iS4bJ+lSKd3mICR3BxSAcyYkXnORTV+9+8XC5/hHFNjUbf3m4uTx2xT1Zpv\nkHUdKCRyBmYMDsTHXFSYdcx7c7ueO3vQsbqgyR0654p6KXj3q4THVG6n6UDBt7JH8wkUcHAx\nkVLIyLncm4kdvSo5GRHRipAON3t7U/OxTEBv3cqBSYxBIVRQBkdQG600s8yHPJJ/GnNlowJe\noOAaNoEzSM3zEbdvvTBj5I22odwBGMj1pGiLSSAFSuMdaYsbCFmUEvnFMaGNs/eMn3iPWgQ4\nReTtUuT3z/Snbn27pEG1xtGaZCxYmRcfLzt9PrUqqJI/Mc7nA49qAGt5cca5HI+UUpwyooYn\nnp2ojZWxvj3Z/iz0qRYxHwOcjjIosMY2JNylPnJ2rnsc9a9j/Zy+EOo/E7xtY2lpbyuzuoY7\nflYd8+hPavNfCmhS+INUS3RGMo5X3OcV+wP7EH7PMHw38IxeIdQtDBqt4gESt1UYwTj1/wAT\nXHVlfRG0In0R8P8AwbY/DvwbYaBYKEht0wcd27k+9brMWY80vmdj97r/AJ/Wo9xXrzWFjZCq\nxyc80h+904pF4Y9xTmbcaBhtyeDS9+RTORzTtxxmgBPLAzzxSq1Azt+YZo9xQA/I7dqT73Wk\nVh+NKGByMc0AAx9aVm7YxQv3aRW5x1oAGbbgUmdvI5pWUNRt24oAcCSxOad/k1GflZccjNP6\nse1MAzu5pf4uelNkYKOeBSqdwqgFGeaPrSM3OKNx6UtCkDe/Shvl4Pel9MikIOOaQhQo3cUh\nyrZNOHbFNyWyxqgDqvtTVk2dTx2p24VDL93gdKBku8f3hRVfj0ooArSfez1pAx57CnbDt+tM\nKndgHJrIkUfNwRSt8pxTH9c9aGkAcJ1bGaVuoCsORSyY28daNpUjNHHegBSu3B9qYvHbIpxG\nF5605eEphYRgFUE8GkaRVbDGoby48tt3UVSbzLjJFGqHY0/MDthRkU7+HDHIqvaK0K4Y9qnj\nHynNPUBevGKJFwc5pGcNGAODTlXKjBzSARcYOTQvzfSlVAzY6Ckb5Fx2pgIw5yOtKuG65xR9\n/pwaX2o6h6ibRnjmj3IzTjhO1MaTavTg0CFUsFJ4pjHd8oHNOVjxxgU0ZaU44ApAHIbaRx61\nIrfLzzTW6dc0Lx2pMBQN3Tik3FQQRxSrycihlZgfSmAMflUjqaX7vPU0EYj4HSgdM9aoAXGe\nnNPU7RnFNRvmJPFO5VfXNMWozd3zTgdymk7HigDvn8KQClvkwvWgMVxkUjHvTWb5euanqAjS\nB5CBTlba2MVGq/vA2Kk+8STxVDQpz0o3EAHOaRcs3FNXLNignqObc2Dnin7ty470cKMDrTec\nYoKHeYAoPbpSM5XPPWk4oHOcDJ96liDAA5+9T+ducDNMU/NgjJ9aVn+amA5uuCODTTxxnim7\njSx/dJI4pisL6egprMWPBoZwF6cUeny0AP8A4emTSR53HI7f1FIMnn0p6qeuaBHh/wC0VYhv\nJuD92RCpY+oJ4/WvlTXYdsjADGOTX2b8ftP+1eFIpFUlVY84zjpXxzr2SZCVznjH9aAOGugb\ni4ZQMVnXEHkqxJrWmXByflINZWq5ClgPl74rSJNjnbyNeWOGYdKz7gmNS23tVmaRZLjg9OTU\nErq0rAng1sZdTOYBVL5wDzWe22ORWUkg9zWlNCW3BapTbhEATllPQCmIpTASOccH0NQS5+6q\njGOtW5gy5dwCccbetVTJyu4Y9cUCK0hAwxXHGDULLEzMrZ5X5cetWZP3kucbR0C1Gyx7XDHa\ny88CmhGcdy/PkkKMGkkfhmHzFuc1NIxYDaQAe9V2UhiynOeMUyhr7TIrMvbtUa4jbzMcg4Aq\nV2BQDp649ajZhG2WH4UxFVmLsWILHtgU2PKhiFxzjP8AWptyrIUR2BIyf8Kjmx5mFlyD1X0p\n2DoRu4jZTt3nHJqN3LFQoyM1I53bkTjHQiol3R4ycEHmjQkRWCSMvT0zUUjmPOeuOakuCu4t\nnqaib/WMrDc7Dn0FMZHtKsWADAfdHofWn/OzDI2MRkse9OWPbMMYxj8/am3D9N/JJ6f3RQBE\n+S20nB7imrIwyF+5TmmBlA4IPGe9IWj+ZYyc5x9KCeoyTy1VQq5PXmmtnaSRjmpH+82FGMYB\nprAzDGcYHPNBY2YgSKFA6fjSeYH+TadvJJoLAyKcZApPM3OcHGTytMLCOPlGe/TmmeZ8xwMN\nTvLZj83AHSk2/NwMN71NhDWYSMFH3e4pi/6w81MVO1yo+bHP1qKRMbQgw45JpgxWQNh8bexp\n3K53cegFK0nnKRjCn09aby2cDJAxk0DGkfaC+BtZeMHvUaKTuBIXjHFSuxIAGfM65qDqx4+X\nOfxp2EDE7FJJ3Dj2pJW9twPpUuPUZHoKYqHaSPlFMQMu1kxyOvtTTJ85AXilULt5O4ihW+Xp\ngGkAm7dnjrwDThvUKvU5pOZOhxjihuW4JwB96mAnmEO2BkUiqGkxjB9KQfe65oZV3Yyc45wM\n0wGSbmYhsIenFDbjjDZ4xThho8elBZc8cDGMUAN5iYIckEcmnyMQoYD5e1MWTEZYMNw9ab5w\nMY4yDQAvlhuMbWPOTSPHtjyOuen9acuOhPFI0i4bP0oJYkbCQOCuGA4oVY+Nx284zR5m0ZHQ\njGfakZQzM+fl60DAEtJjG7B4qRmyvI5zxx0qPzMKc9T/ABUKh3DccehqSxV2qGyrEnvSMwZN\nq8nGeKdhmVgeRTfkkChD5dO5JGi7k8zGB02mnSY2j19Kew+Ug8v7dKYuEUnI3YouMVlCkHOR\njjHrSBg2CudxOCKQK29MNgEdKQDblgctnpTAc2W4IyTUcmNh2Kcjt6VIMMw65Udaj37Wzuyv\ntQA7zBIyleCOuabIPmLgbjTpMvgjpSOrLjB+oFADedqnGOMU3dnAHyrmpmIY8NkD0qLyxtbc\nccd+tAhu4Hn04FSbPl3A/UUKAqqGHy449aVY9qkk4NJjDeTGWY4boAaa6k4PH+8KcHyy5Abj\nFDLhSq4Ppg0wG8qgIOTTDJ+8G35R6EU+NWJfAwR096aZG8wb15x09KBD40VFaT8KQKGXJ+97\nUKyrHtkYgdRxSDav8ZYnnpUgODBm+VeAPpUafMSQ/IHWlztHI3A+lDJjjOWx2qgEZTtBB3fz\npd25tjDHuaSPd1+6uKQL5i5LZwc80mAoxuIYcEdqSPacM/A6cVIx3R8jCnpTPuqEGcd6YCbs\nknOecAetDMVxz3xSbQVLEn0AUdKfHHuYDdigBG2LI0YO7nO2jlvlTGM5peI8qeSTy1NkxGql\nV4PpQMcP3q+Y/rjFNVgoyDhc/lRIoaIYyoJyVpf3e4cc5+7QAf8ALMHO5R3FRl1+XuKkI/ec\nJt+vSogxz8pVjnmgBVZA27dj2Ip7Dc3zdRyDUPqCNxp6ruHr9aAFYyMu3hh/dpeEVhj5vSm7\ngGdV7DOaNrcndwDyaXUQrsWhOF6DrUayK6qBg/3qcEO4kN8vWpNq7WwuD2ApjIpmZBvABPqe\n1Hy7uH3bhzTpH3L06jAWkX/V4dQCPQUhCSyNGwXAIJ5p2VaSVCOMZFJt2L6H1NIy/KNx3E9P\nei4AqlMANluuKT/WK2eCetL8seezClLZIBXGed1FxB5Y2gE4+tMkYr9wdKeW2/KeSf4gKRdg\nkOWLKBzSGGHLb+Mevao4ZBHIxByepqTcGVRsxHnpSLucAbQmfSmAkj+XgbeG5PvTWUwSblbJ\nxx3wPSlL4k2suR0HtTpMbtv3eM5pgMUlVyF27uQvrSowKM3Ttj0NK3z/ADHIAGKOG2rHyq8k\nmgBFUGQZPOKdI3mEDbyOnpSeZ5mWJwM4xSfNu4+tAyRcqxIX5cc1XZeoI9xipHBdc89eKRpD\nkNgYoEOb5GjwOMfeppLK2SN3PAqV3Bw27zOME0yRiVDY2EUAK2WO84Rc9+aTjeTtwT3FL5SM\nEdXxIT0o/wCWhJUgdMUAN3fN8oyR3pYpXwcrtzSZLK+1fu9MdaazMwVgcbhyppATPtkiky3O\nPvVV+bbtzkZp8ihUAXpR6E8EmmAu5mCMx3eh7U9fnRt3LdeKZgGIKByvamsxWPJBD9sUAJIg\nyHx8oHNSoqIyyIxbjle1MYFlALfL3UU5l3DaB0oEtBySHyyD8uTnOKaC3m4Vd2R+dJHIUU/J\n8vT5qVvmGGbDe3pQAq4DnHy+o7ZqF5BGDgEk9xUm1VIXHBPWn48kbdgYk4I9PemMiaTau0Da\nw70qM6twBg8mlbC7s8sx+7j9aYqmNgytk9KBMkZn288/Sk2B1yWKsBx6UgRgxDHDH9ae8oZS\nhAH/ANagLEe4RqigZXGPf605m8tsDnjtUbErtJOfX6UZ8zGOADkfSgLCh48sGXtn3pPLIh4+\n715606Rl2M23Ab+KhFO0HPOOKGMRWLDDIduKFbGcqdoGae8LDcAwJI+tIDIuN3p9ykA3eF2Y\nO/PQD3pY1HzbxubOBg9DTFX5sr8vP5GnbWi3/wAXPJpBqICdrZU46Ef1pyRhggKkL0BH86cz\n5jG1cE8HPpTd20qCxCjgChCE5kYA4xjikWby22qCSxwRSOPLYHnHapWmDNuz8wHAxTGOZgzA\nvlGAyKh3YUkZZmPemeY0xQEkup7U9MpIw5YHnbSAe3+rwOCOeaaZMH5l4I/h6UvmMTheB1wR\nSLu3Fc8nkCmK+ovl4JVhjjK01TvUknLDqKknkwFJ4PQ0jN828Dp+RoGMXG//AGMd6cp+QjGR\nnihiSgZhwTS/KsYbnGaYhyjbI2UydvamZVtu/gHgUrSAMTnnFQsyqwYfMmMUWESSIZA2Buwc\nbqQIFkVs7/p0p7SeZ907QoxikXcqhgMKRjFIAZjI2O2c5oZQV3E55xSDnauNtDZZvlAOOSKB\nji43KzZYZ2gf1pG/1ZUNvHuKaCWjTMf8VSdwFOWxg5oEIrSBcAg8VHjcq4OxvSnhT95vlHSl\nXa7bSmRQMaz7mweR0+tKrAZPRqRVVW+Yb3BpRvVmO3A9DTDcFLeXvX5j39aXcyj5V29yBTSo\nx83GfSlXasxYklcbeKYw/wBdjnardeOKTyyfkVtzL0b1pV3Ku4uAOgzTIsoN2CMnrQA+RXaR\nWABHcUsnyMflz9OlJJsViwYkCkWUSKADge9SAKu0AEbWznBoWQ/MSCM06RWY+ZjO3g5pS2+T\np2/A0ARx87geOOacv72FQV5x1NKsmJARgE8GlWMqhGcIOlAmR7f3gXO0D070sajc2flXOc0L\nsHQ4JHU0zaykNn2oC5JFmRWRcP1I7UbT5YGQXPHSo94bGRtZh2qRZCrbwMleCadhjWMa4wuB\n0IFK/UDZtipdxZtwHy9elJMpZVJOB1yD0oFcVW3MQOgH5058Ddt4weKaFMjcNtPTdTkycl8l\nx/CO/vSQh8Y4YBTgng0rKFXG0Er0J7GiNvlVycoBg+1NX5nJfp2X+tA7DudqKwyxPJ9aN4b5\nZRt9DSurugI+4vT1p7RhQ+/B44zQOxGrHkbdoPP1pu4NGT93B+9Qsm5VOTkdV9BTSpbhfXIX\n29aAJPMcsQSACv3qZu+4Ap6cn1p/DZUDcSOTSx5ZSjZXaPSgQyJdisqqeueeaHUtICowB2qR\nldIeOO9M2bgBvIbvQIk3O0eZCM+lC/K2VGDjv0pPOSOQHO4KOc96RnPBC7kP6UAKrMFO47QD\nnAppYoxI+4eCtOUpyxZgOnTrTtgZOcEjnOaAEjVlPyqc9STSKpVSVO1y2Tu9KcLjy2HX5qTe\nwcrJ8zdB/SgBw3mQ7Rhe5pOflPvyaVstHtBKgHmgfd+vA9zQHUfGpYMueSc/So8MvIbnHepm\nzGyjoQMN9ajRf3Y3OCw6UFhJGjKpUkd9po8zZkr83PYUnls+WR8A9c0IHjVUPCk4A9feglMc\n2NpCrhj1PamtGFIG3noBUiD5XAGAn86QndDxwx6k0DHsqtDzkjoyipEO+IsRjHAWovO8xPl6\ngdPpTlc7cMeaCepLubbjAU+9JuYMDkt29hSRPvzzuxSqsjMBuC96XUobueORtzbx2xTY418s\ngk4Y5K0+P5Qw4JGeaaJN8ZO3LA549qYhPL+ZSrBV96fuyXxnePxpcqyqc7e5zT+Y4yY1xGec\nd6QyEN8+8kjHFSuSzZIIJ705ocOCeM8j0pJ167uT60xdSLePMBGS3YU7cZpnAGAvBVh3ojkB\n4xgCh2bkkct3pDGtlWIAznrUDE5KnqOlS5IcBmODxTVVo5GB/ee4oJG7gEIPT+tV5Afl+U7m\n49qnY+VGm4Z8w/dp1uzAu2B8vABpjIgrx5OMjOKPnJy/3e1P/eNL6vjO2o1zjDndzmmHQkhy\nz5xll7NViGSQ5ZurcECq653SKCGI5/Cp7Xau5g+UHFBNy/EwdThcP2rp9KvGhZcLs+Xlq5eF\niWXB2nHHvW5Y3LeWmwgt0NDC52cGoNCEI+8RyRS6hqH2lgx4AFZlrMGhZAcMp4qvqN8Wt2jx\ng460vMRn6gvVoxlpDkelYd1dlUZSM44PrmtOe4eKzWCR+AMiuVmut1w3zsxxzitYshq5qxyQ\n3ViVRPLmHO4nr7Umn6jNCpgBXyeuR1FYcdxI1u25sAnAPT8qlhuDHKpUhVA6+vrWikRynXxX\n8jXCclgeS3sK6HTdSCqXxld3IPeuGsdZDKsaxbwDncOtdR4ZvIdSumtZMxhgcVRDR1WneU90\nWIwxG5QOldFY6gYWZx8r/dJ61yNrizCkvwpwPeti3mNyxePo3Vf609xanc2d4+4EHK5z8tdV\na6lG0caSgg5+73Ned6beGFVAy3PK5/Wt2G/NrcRTP82Dz9KwlHqbLQ9V+HPxE1D4e66b3T5C\n0Z4lgb7rrnkV9m+AfHWn+PtCXULMhZB8ssO7JQ+n0r4DbU4ZUMkAG0HP1ru/BPjLVvAt5BrO\nmZ+yuQJYw2BJjGQR61hJG0Zdz7mVtufSm7vmOBXNeAfiFpfxC0v7RZzxpcBcy2275k9RiumY\nbDn/APVXM4myY0YZcDrQrBWxzmk55IPWlU/N70ihT8q56mnK42njmmHK5zSRg7aQDwx79DTv\nwqPcW4p3PTNIBxxuGDS/xZpONoPU0MehNAC9+DSN7UL7Cj15pgKF2tnNJ0+lG3oSeDR6CgBd\n3U0v3scc03+VL/CMdakBwwox3pd2AaYG+bOM0pwWzQA5XC9smjeOeOabuO44FJ95sZoAUMWb\nrmpdwUbaiXhsYpxB7jBpj1H/AHeQKeGDAHo1RnjBJpTJxxSAf5h6Gk3BfWm5HfihWH1osDJF\nIb1NH8HTGTTV+Vs9KUNuHPNIB275gD0p23jNMP3QPenMSR6UtRjNx3VJnK5NMYHaOaN3HTNG\noDg/zZ/Kmb85yTTNx3il9+9GoDmYhQBzmpFYBevNR7T1p3yjGRmgQ9T1oBOOc03kcjpSvKTi\nl1GOXjk0LINxOKQAvyaTIVuBVAPX1/lTsjtTN3y8cUu4beaAH7tvFIuWGMU3d6cijzNrDmpY\nDzJgYFCnuByKbnq1JuOMg9aQEgbjJpO3vTVztG7r1qTHOenemA1V77alUnuMUqx/Lk8mnRxl\nuM/kM0WAZJnZx0p9uu5yMHFSlUjjd5XWONerMcAfX0r5z+PH7Xmh/DmdNM0K4i1TU4y3m+Wc\nopxgDd3HP6VSjcq/c+htW8QaT4ZsZrzVb6Gzt4x8xkcD8K+Q/j7+3YdG/wCJV4GgJunBRtQk\nXIHBGFH9a+T/AIqfHjxZ8SNQnm1TVGaykPFvHwoGc4/A5rkWje6sUFmpuLhxxuP3fU1rGJk3\nYg8XeL9Z8YazJe65eyXNzM5Zy7HgnqBWFqEaWLKSV/eDlM84qDVLe5ju3S5ixt53g8HisS/3\nXJjklbDgdM54FbqCIuxbiKOJS4LJG/8ADnOKpbvMZsJvcDCgmi7lkmVccRioBHiHzDJhyeFB\n5+tWZsQMVQcli3JGP0NMaR/LaZ48wLwGHQe1PdWZTubhv51JPqkkmnixKKkHdcdT60DuVLiV\n3jZYzkHrxTFj+VVUZ465pybZXEA+VM5INVrjLXD4+UcDI9qQyW2yWIZtuO9RTNtzlBnPPvSt\nIIyA/wBz+8O9EMZuCSwwi8lz2FMCK5uEVg0O9eMHmhI/M+ZxnjnPAp80g3ARgNE3+c1HgCME\nPlieFxwKYgjZklQrGJY88jPWnXEz3V0XS2WML/yzz29aZ80EgLsHPfFJ+7MpfzGUtwaBjlt1\n8sKRuOchh/Kq1wzR5iMbYPTIq2qndhWPPC1Z1GffbxI5AdBk0CaMaNmZ/lG0g85PSluEJkPz\nbWQ546GpEVvMYmPCsOG71XbbbqTGrsrNgg8nNAWJ1d5mDr8ox0qFmLZbJYDn60NtKjBO8dwf\n0qWaFfLjKttYjLRg80CKs7LP82Tuxwqmmbtu2MBi2MllOamjYxOyH94uCQBwfzqG3mkXAiiw\nDy2DzQNCKzSRsm7du654/Cq7N9njMb8v2p9wq3EbhDsI+7nrn6UTeTHHFE6N5yj5296CimXQ\nYAJD9KdKEgUDAZm49hU62sLK+3O/G75ulQzDaseRuAGcUCIX+aQRhdpx19ajxIiIrHC4wan+\nduQnOdwOf0psiHeVILkjOPSgdwmkYsq4VMgKCe9EYZWKyZ2rx8vOaW3CRuJpMsV425qaSZ5G\nLxRGJCeMnNAXK5mEkjBlMaf7XJqZNyMu07VbjGKcqsWwdu/FOVCQBnaF5GfWgQKy7SOBg4Kt\nTmkYMpK/L0VqXy1kwWXe/WhX85sty4ONuOKQAjbsl/nwcmo2y4Ei5zu4x6VLHtVpHzhOm3vm\nljUQgBFyDzx2oGO2qzYIzGOcUIoXLs2GByq0qjzFKghdvOabIVaVZD6YBNMQ1ZF24aU7Scn6\n0nktG0cq7SC2QpzzT1+dnAVSuMHb296bImNqo5bb/HnFABK7CSTIVZCfm21LI0Yiypw/QCoB\nHGZCE3c9TnNSb1J8pRvdR1oAI2aNlZhip7O3F/eRw8tI7fwnHFRLb/aJkLblYDgivYvgT8Lb\n/wCJfiuy061tWaWSVVBVT+HPoeaznJJFxjc95/YX/ZoPjvxpFqt7bY0m15kcHJyOQPfP9K/V\n+G1itbdLeFBHFGNqovAXFcf8I/hlY/CvwTZaPbRxrcKoM0kYwS2OefQHNdkzfMRnBrhubrQj\nY9fSkQluKXI8v370ikdc4NItDmHQCn8L1HFM5XrTuOvWgYZB9x2pBuJxjihFKqQeppdzL9KA\nD7vJP4UjfL0HNCsvJPJox+VABG359xTujZ60i8HpgUobnpzTAXYW9qX+L3oDdzxSL97dQwBj\n+FKeF5GaT7xJNC7snnimgAffBpykuzdqb05PWg+3WgBWXcpyOlG0Kwbt7U5fegKF68+lIOgY\n3L70cbfU0YIxzxSdORSEhTk8UevrRzjpxQFxk5pjBvuHFA+9mkwd2egpen1qhhxnpS7D0NJj\nawz0pd3zZpCD/gIoo3UUXKMz5t2GfIoT5c1IsYZcNSBQrdazArzJu5pkcHlzB344qw5G3nrS\nfewCcgUiWK3y475pr5Zfl4alU7WPHWm85JqkUKN+4Z5FPY/LgcU2OQ8gjiopZj5oA4oAWWE7\nADg80ixfNheBUwAOeaarYUg9aLCHFCuOOKRWOT6UquTnJp3DDHagBq7epP4UMxUccCnbgFOB\nikX5lJIoEIjFlyeKVm4weaGQtjBxSYIyD0FBSFDEfNTOd24nimyE7hgHHenLnaAOKRLBpMk9\n6RXDKQaN3Ud6WJdwI4+tAAzHp2pRwOOTSbfypApU9eKTAdyuOKeW289qhbO4HOQKf98+1Owh\n4YFeODSKx289KOM8UKpbPpQAMvvxijiNQB3oyBxSPgEE07jHKu7qcUoboM0jEqPam7huHGBS\nAfuO7npRnvjihR1J6Uxm2qQKYCkhuhxRgKcGkTBTpSgZ9zQApbHcClGSB60jYYjtTeVOc4Bp\ngO3Dd8tIeM+ppPLO4ZNO460CF6LnvSLJnr1oYMenSj7nbNSxiHrnvTlz9KZ3JxSsw25JoEPk\n44U00dPel+8mR1phbGKAE53Yp7EqoHakPy9OaUmi4hhO4U9VYLnOab944BFKcpx1zQMCSW+W\njeVbmnD5RUezJGTzVEnKfFpGuvBNxEDhS4LCvivxNZ+TcShvlGSAO4r7o8Y6aNS8OXVuw3hh\nux7ivivx9ZkX0qdPLbBpi6nl9+gaTgYOOtZE0bLuDtlT2roNQw3+rNYeoblPBz/OqQmctfWa\nqzFTjmqDRrnLDBrcvGKsxZflxWZNCixkk89a1uZMypVfnHB7VSuo2GOqk98Vp3DD5WVtzena\nq10jSwgMcZNVcRlS/wCs2g4bGCc1WuAyqgHJzzVi4hxJsBz9KhZSkbFT83TBoEQySASBx06Z\npjR723Zwx70ryLyrcFewqNJj1P3e9WBRmV5C3+yeaj8smE46seatSOWU4YHP61A8hXGFIJGN\nvf60DK+3ncD5YU49jUbq7b3421JIT5LfxgHmkXmM5+Ue9ICu7lQpAGG4NRNH9nbd1XpmpGj2\nLk/OrfMPao45BIcOeOwNMmQxcbsDIbt6U1stHJuxu6Gpmbdjn5KjjClnIOR1FUBCIx5Y3fLj\nrnrTQv2dgoGd3T0p/mbhnOd3ykH1pgZpJArHaBwKYEbMVkKleFPU96STbIc5BNSsrMz5G/b0\nz61FIolVQox6+5p2ER+VHHgEESUjIY3KlMZGaFzJIu8YbHSms27fG2Q7d+tLQBFUtkbsfNwD\n6U3bmORm5x6U9ol/iXI9aUnav+0BQMg2+WoYLhSuRmo9wVlULlzyWqdsyQ/M2D6VHtdY9yDP\namAqAyY7FTnmh23SbyML0+lLGpb5ifmIwB705kZRgrk9/agYkcY3cP8AL39TRIpDZAwG9aF/\ndsSD1HWrOxHiVXJJxnikIpMreWdvykHgVGivKvX5gckdOO9WUbrle+ATSyEsqLjlutAMhIDY\nCKdp/iqu6YbbgkZxuq7tLNndtC8VAy9zyFOcetUBD8qxKQCzimjP3s7cn7tG1WjwCUyelI27\nmM8DoGoENf7pbGOaXduXdzt9qXczYVj04pP4sLwKLD6CPlXAxSK23chUk/pTnZiobGSDil8x\nlVwRtB70ANwrpjG05pEXazOpx/WlK7YVIfIJ/GmyD5cD9KLgIrZ5PU0xcBTnk56ilyxjyBgj\n1peqgldgP60agDMN23GadJujVQqZXpTFVMDIbIPWhd4BAJIz1PamIDhQGCFlBwTSMQvbPPbp\nTofMKfK25c9KczK27auGHUUCsNZhIDtHIqJfmyuM8c1I2YmyBnjmo+FbHTcf4aQCiRYmAPI9\n6cxMmSfujpQ3+syR932pqSZyN2FNIpg29lwvGaUKNypjnH3qWEDJ59qarbmck47AUwGhW2Yz\nhd3J7mlADnOBtzSMwbaD90dcdjUnlbGJHIZeBQBC/LFcHPakTK25Qjq3NPUMWGGBwOnSkw/Q\nnHfFACjKtjbj39qPl3GM8KeRikZi3f5h2pf9WwcEEHtTAadiHDkr2GB3pFBVnB5I71JjdkdX\nJzz2qOORvmD854pCDOyMbODSMoOSeQR0ppPlsST8opzsgVdvznOTTESKf3YDD6U1pCVIxxjk\nmjczLuxnnoKRZBE4YsG9BjvQWMXBX5Rg981IgXaix/fVskUxlLEvIdre1OChVUsQGzwwpEsj\n3PtI605cNzuwaVo/mGPlxy3vTXX97uUcGjUBW2knPPbNKvyNgHK0mRn725PXFG1s4AyOooAN\n2+Nzjbt70BPl3q3zKM01kO0qF5NG5uAFx6mgTFO5lyf4uaj8nK4Aye9S7T5YyM4pAxaPdkDH\nFIYkJzHgcketIGcAkru96U48s4yJOzU0HzOekmKokUNtOGGM88Um5lVtuCv60qt14xxy1Jwm\nFX5s87qRYFg0Sj+LvilVoyhIOO2KH++AFyM9BS5HZNq5xmmAisdhxzQoHcfMf0okyjcfPjpS\nb1Zd20rQAgUSfxYK9c0iKoz5S+5b1qSGONyXwemKbJGpyikgdA1AEa4U79pI7j+tPWTzMkjb\nGRgVI2eFDdtppGYxOq9QBjNAiMHAUjg5280ojLSMD1zk0qlein7vO09KRgxcseQ3I9aAGsdp\nYD7zDintlVBBwTxTAcMNpz7mhV3McbuvPt70DYrRk7GJzjjFDr8rBjjjoaTyzIpIzsU/nQzG\nTk9+AKRLEjXzMLndkd+1NRztCFSAp5zT1HHA56U1t7sf4T3NGohVQMzbemMnNIrHdEQpyRxu\npQwdiF/hOM9M+9IrEqNzbtpwDRqMe6tHknBJ6gU3amQOpb0pNv7w5OM96FV1kBxkelMYxtzM\nhAyo6gUu1m2knDfeH0pyoMg524PIpGUM2V7dieaBiSZzuzgN2PalkU9ACwxx70m3a21uWYZ/\nCkXexBGFA4oJHSM3AzximneY9u7aD6UqgrwTgetKrKsfI5p2GNkYBQWGR92lWQMo6jtkUjMT\nGFVQxzwadHwTuPz+wpAMyyv8jbqTaFZct949KkeMiQnAwBkkd6edrKp289RQBE24Mo4CdaJk\nFwuSStKcMw28igEtnjB/vGmAKzLll6YwDSNjcS2S2ce1NyY4xIAfmO3np9af87Fg49wKQA0Z\nHEZ+buvpTJD5UqcfNt69qfGH+UYO/Oc0ybzO+MUDHbd6A/3utDABlzg9qRNscgGSWbjbimqw\ng3BuecA9cGgBWZWkDbsKDjGOtKGZuhwM56dKNqqoVjuIOR9aZIx3ZHLd1FBNgVP3hA5yetSy\nZXIX5jTeV27Tt9jSv80hCcZHPNAxPvq5B5HY03hlLScHHSiQgq+1fm7jtSNtfaW5AFAD1UeZ\nGN2DjIpWcqNxYYP8PemctzjjHak2rtUhunQd6YDs+Ygxzxml3Zx2OKQfKM8rgZ6U1FLKX6KO\nd2KLgSAA4JPJPftSMwO/jJxxT1KzKr4w3QrSSI3TIU0gGZEaqSeSKYpcMQeBjPFSRblVgSOe\nN1JsO5dvCjqPWgBseFUYBwelLI5YA8DtTtwfIU7Vz37Uki7sZbGPSnuMiYvwyn5vUVKhOQ5N\nJg7QwPBOKfHhcxhskc5pAMOWT5fvE8k0xsqwUvlR1NTLu5LffPYUmwKpAO49d3p7UhDtj7mI\nA2Y/iqCRCUB6joWFSbnfJxksMAk0jb4XQE7hjnFPyARmR1wOq9/Wl8xPLBA56dKSMKpyvOT3\n70btzY27VJoAQsI34OM8A+9OuMlgCeFPJ9aTy/LzgcDncaGY7huG4N8xagTFLFmwSAvU03ee\nAefRqGCZBLD0HtTmKyKpxjbxmgTGuqBc5wGGT9aeu1UUg7sjpTcpnJXjGMGhV+cADHvTKE27\nG2lvejyyXKjJ7k04Exsz43Fe3eiQyOx/hZjk0bEkcivuxKQ2RkUcLMobI44NOX9+/XkdKSZt\n3TnBp3KF24PJ3E9SBTlZ15fAXoOetR+ZjBxkZxUg/eMSQAFOApqSRvmbU3FvlzRuZYywHHYi\nmyZbgkKD2x0p6xbNr5JA70D0HKRGATkFhgimbAzKSwBT/IpWG5DycZz7ikMYba46n1oEMLnv\nwuc06RlUKyMRk05mK5DjaO1J8nLL83H3aBoTcOWXkmnq2JCztjK9KTakcm0t83UU+PG4g8kn\npQMgjZX2+ZlfQ0scZcnnGDnce9SSELu/v5pNu2TDnG6ncBQS8LdD74pUt3+Q7t3qKTnJAynP\nC+tKu7DEtx6U7gNCyKrEAZz9004KZGJZdoA4/wAacinqwwMfezUagRrwWYtSYCxlWkyxJ2jJ\n5pWUrkZwRzk9KGVPLB2kkHNK+WTDAk+nekJEDMfM+Th+ue1PbdIQwbO4Zb0qSH5Vyy44wM9a\niRs7Scoo4PpQNjt26QAAFRRIrHJBUehpyn5j8uB2zSqBtYMuDQLqVWxwEDNt6+v1qWHOPdwf\nlP8AOpNyfLk5PTK0yMCPzT1zx83b2oKFVfNURrlSvPFOVWZWJA9qSNTJGqgEHsen4UeWy46g\n/wB0UEsfxuOwYbbzTd7RqCF3jHSk8wRsvOXxnihmO9WZTjODikA6MeZGy529wvalQlotxPI7\n02XaGyCck4ApdpVfkznpimFh8uzICN8m3OfU01XE2Ffp3zSvl2QKPujkU0bpGLD7+fumgY+Q\noq7SCT7UjYjUAfKD1pGVpVw3HPakZm6k5QHoKVxXEzsG3JXHIwKkbbIocyYP+z1qJ28xlOcY\n7+lP8oSKXDAN3NGogjVEbC7mPcN0qUuGBKjnH4miNfLRv4jjrRHLtXDcA8AgUwIfLkZVAG09\n808blBUHb2qVdpUHccqcEVHJsfkg5U0DFjgaThjlBzmpFwv3179KYzLtUEFPapHUqw43Z4FA\nWIWX95geuQPSpNsssmCdwH8VCqRIQOo6mmozryrAf7XajoMcM4JbJGeoGaWMAqUPJVsimNE0\nhGTgk/MM9qlVRubjC9BmgBjN5bH5twbuadGuxc7lNO2qvyMOG6ZpWVJDwNoAx7UARgFmwDhA\nefel3HaW6kHA+lDqFkQBsp6d6fHE0avuw6dRj19KAGRtt3EAlsVIjDDBvvDmn7jwSu1cc/Wm\nsrNGw2/Mx60AMVhkgL1ON9BVpGPXr2qfaMMoIHHHsaYoKoxOd4+Xr1oAdsVWynIxg01V3LgH\nlec1N5aruVhge1KI1DBVGOM5zQIjjVCoY5wetEeLVhj7pp5xt5GPRaTaWZcfU5oEEjLIoTH3\nmpJssrqMq3QChV+0zsduFUVLGAxEm0kjj/69IZHGuzbuLMQOnWn/ADoysPmQ9iKVm2qdpBLf\nxf0pvRRn76igYi2/mpITiPnIHrUa7n+Vl6dqf5sjsQVyuOnekLRK/wC8LKw64FMkjjZXbYVx\nk9aiyIlZcnOeDUjRjc+WwAeCDyai+6pycj9akLCSRlWRt25gM4oK4wxOSetK0f7ssXy/9Khj\nXamSDjNaFDmLK27HGcUmSzMyR72b5TSs20OM4weKWHhvlbHHUUEj9pZvkUIOA3NSxBRI7k9O\nmKg+Zsc85z+VWox84wu6M/xD19KGT1J7ffJ87DcegxWzYy7FHybCeKzLZsoWJwgzwBzVuKcR\nouTk+lSSzYWVoQcv97qRUF9IJE+RtxxnnvVdZB5Ydm69qpyyGZSzuEjK8VRTIb68ZfvPvQjB\nX0rHuI1G9VfbuXg+lW3lSGFkGS+OuKxr87HQk/eYLjPaqWgIfczLZ26L5wYsAUXr+NEWoPMm\nJkzJj/WKOKglZVuWEkPyYwpNFrgsYmGAP4s0xs3tKRLvTp5Ih5c0J59xV6xunt5IrhXMWxsh\nh3rmrVhapNJHIyu/BGeoq5ZahJtCuSvpleK0jLuZNHqVhrwvLTy1UOzDnI5+tbemTR/Y16xz\nKcEH0rz7R9RRrjY/Ei/3T1rurG7hktzID844AIrRGfU2ra8hXghnYnAwa3raRLiPLkl8bAua\n43T7j/STvB4bJz6V1tnNHcTL9mwCPmGfWoaC5r6SDbSbCT1yUNdJpfiiTTlkgEoms2OWib19\nq46Od5JHkkOT/Wrtv8sfnPzzWUolRuei+G/HN7oerJqGkzG3kj5HPH419cfCf43af8QraOyv\nnW01pRtZDwJj6r/hXwnYzlWDKMc8/StvT9Xn03UEltZmhlU7kkQ4I+lc8o6Gyk0fpEYfKbB7\nGmMMnIGDXzR8K/2kp7L7NpviANcxM3zXm7LLn1FfStvdW2oWcd1aTrPBMAySIcg596wcWdEZ\nXF24UlqaueKeo7nkU3jkd6mxQ3d81OXP401lI5HJo3/MCKQh27aMY5oLcZ6+1MZjI3A4oZSp\n9qXUZIp45+WlwAPc1EW3N1xUgYfWjqAEkLzS7hye9IxPYZFIPvZpgOZs46ihj6Umc8Uu6pYC\nhtq4FDE8DpSMD1pT94E0WABkdTSsRx2NN3fNkilx5h4pDQ8bio9adkt14qNMnOTyO1OZiyg0\nDFDHdyOKGP4elHPShs9+aYBng85zTt2FwKb93mkB9KYMkVyuM804N+AqNMNyTzmlcljgDFDE\nSMeMmjJHzA8UbhwKYzYwAMipKH53UeZjGKaxw3TtTVbbjcKBihstzT2HbtTA23PvRkbQaBEm\ndo65oB7j1pOOQfrSBv3eehzQIl9+1IRtBP5UiMMdDSyNuXFIBy54prMd2SaYGKt1/Ok3b1PH\nNICVcYyTTPMypG2kBGdpp6qTVCF4bBB5FKW3KBSwx7evWj2x3pDGkkgjqKFU7R2qWOPBzS+W\nS2RyKfKMfHyRnt1qwqjqR+QzVdZoo42llkWKJBlpHOFHrk+leS/Fb9rLwP8ADGJUivI9a1Ir\nkWtp8+B0yWHHpTsSz2XbsUFyqjGTzXj/AMXP2p/B3wnWSBblNW1XYdtpbMDtOOCzdAM9e4Ga\n+MPi9+2l4t+Inm2unmXQdJY7UhtmKs6+rP1654r571rUJJmW5vJmeWU9WbcaaTbEpHu/xi/b\nK8W+PbrybOVdP0xeTbQMyB+MEHnJz9a+fNYurvVJDczySFi5J5PHtUGpXAUwEDC4woHc1BcT\nSzwhd7L/AHq6YwRm5XHLI8cILsGT1Jyau6T4wXRppJURnbYUUY6H1rKklK2vkiPd3LVQMZyz\nqAD2XNUSWdS1q41QPNNKSScsMfpVGS1e3to5AAIpeAxOcUszRZCxKfMYfMtJtiFo5lkbzQcI\no5waYFOSNobclCAxONpPOKrBUDMxJVMY244Jqe7by1LhclVy1RKyJMrsm4DnHbPamSV1kB3r\nn5Vzj2NNMYkkHZsc7un1q9qF5DeAusHkyE4aNR+tVljby3nZdyKuC3b6YpjRFt2yFX5PZgP5\nGnxNGYwxGHUZqNcyLk8qvHWmoXkj3rgFfl+opDLUOnSXkfZYT82/tVOSVWhe38z5c8n1FH2l\npMRl2XJ5UHjFQbxH5wIAGeMdaYDdoQJsO1VP6UbdsmQc1OwRgu5cFgMEnGajjhjjk8zbtOcE\nZ60CIj+7kyW3nOaczeZMRjAIzyKlkkHmPIyKqgcUwRuwVvlx13GgY61ma1YumHkwQAah8wMp\nWTgnr+NPlVI2+919KiOPMKgZPWn0AazMjBckqo+9UfSMvubYKkZQkoUOZFYZI7UNCFTkkJ1z\n2+lAETLuUOFAQj8frVd5AtwGUYdurdqmlmVmVhkhl+7Ud0sflqqN+8B5HtSFYj5YHzAAN3XO\nAajkV2QIkmFY7eP8ae6M0ZlRfkU42t3qt5YaFisTxyE42k9DQMvbxHNuwGZV25x19qrXFuXj\nE8YYx5+cd6STHkg/x4+Ug96JneSGOBX2rnc31oGRzIBCsof5CdpU1XYnDoQdvQZrXk0ZF8uW\nS6Ro1Abaf4j6fWs68kN9Kz+X5S9FjX+ZoAqM6xyRrk/L1p33VL4LgnscZpyY28oE4xuqOMmB\nZAy+Zzwc9KBDpP3jJHgbM5wv8jUrbg20NwDwgpiMPkBUr3PvU8Kux28Mc8ewoExNirLuTLHq\nWNLJh51AHHWpeBkSD296aflbK9AOM96BiLk+YdzRt91vSiPcAFzhR1b+tNy7YWQZTOSe/wBK\nexEv7uMZw24igB6nax3crjj3pAXWJwi8dz6e1DLvQoThA3zFfWpIxiNxkFeo96AIt21Ys8+t\nI7FoGjIwm7g0XG3IBO319qVovOEfy98KPf1oAjX7qrny88FfWpFmDMEZcHGAPUUrBGw4j3yt\nwMn9aF/dzBVO7jBb0oAYCVkQImw5x9asWsYklZUXluj9gPeq/mSLHhhg7tob+tbGg2rX14sc\nQKHOCuMk/Spk7K41qbXhLw5Nq19DGgc+Ydo+UkYz7dq/Wj9ij9nOHwB4Zh13Uof9OmQNbbvv\nIDyT0rwD9hX9mP8A4SPWG8S6tbP/AGbaYI3D5XJ/hH4/pmv0njhSzt0hhHlRqu1UXooz0Fed\nOXMzqirBcN83HIHSot25ucUsj8gY6mmOAzccD1pIoeMK3IpchsbRTc++frSj7vXFUKwezGlX\nG3FIV3fWlwc+9AxevPfvS87aYelKD60hXFUDdkD607hiRTVk6jGKVfU0DE3E8Yo9Mmlb5aTj\nbx1oGPVtykGlVuoPApoPzdKXIznvQAJnmlH3TzSBvmJPApdueapAJ97g0HjtS/xccijHPJ4p\nMBwwQTnihVzmmDK4HanYyeuKAFzxRt6HtQy4Wkx8uRTAc3zd8CkYFqD90DuaVs7qEMTnaBRk\nYA70EHd6Ck4XDUxin3o57daU8delJyvJoAfz6UUzzD60UAUS4/E0z+LOaasgxjHNOUbuorOw\nDT8z5P4UbQmT1p2B27UhO4GkhbjWJ2kn8qj8z5QMYPXNSMCIztOSeKjW3ZTljnimMeG7dqcy\nq3JHNNx8oApQpXPekAvRcgUmQSc80q9zmk/h4HNNBuJgfShW5OKVvcc0owB6GkA/qpzTdwVQ\nBSL8p56UcFhTuIdv+YGnPjHWon+XpSbz0xmgQ/cW4ApGzwKFY7uKSQj15oAUkb/wpNu3PNI3\nKD1psj7VHFJDJdwHGOKYp9aFb5cH71O2leTVAxGX5RQrKDjtS7u3WlCgdaXUkTaA2QSc08uQ\nhI5Ipu8dhTVbaxx3pMZJGueccmmPktjFP5AFIMg5J4oATnA9BTmbcvy9RR1UnFGQq56mkAgL\nMMt+lIFJ54pV+77UcL3xTATGFI70R8H3pWBFIuS2TxVjBl289aBmRQD0pzN27UisRyB0qWIU\nMOhHNN74oyGySKQMPvUAP4Xoc0hyo9aQDJBpJvkY4P0oaAduGBxSS8rilVd8Y9aAvvQAvC4A\nGaZIdp55FPbAyM0n3eThqliDaNuf4qCvyjJxRu46UbuxqwQfKoHGSaXdjGTxTf4s0ueQSM0h\nkbsd2MUgZt3FStz2p0YCZyM0yWNkhM1u6tyCp/lXxX8SbF7XxDqETA5MhPPrX21H8w288n/6\n9fJ3xy0p7fxZf4/iJbP15/rTEfO14qwXDdtpINZF8Vdi6jHtW7rEPkyOWGeawJJFViD196oT\nMy8RGjwVwKwrg7ZwMc11V5EGt2ccrjrXK3DMuc8nOfwqkQyheRhZnCELxkiqMjFY8qd3bmrt\nwpaUOBndxtqlNEysSOAONtWSQxlGZkB/eYzms64yzZZeB1q5MvkKrfxMcZqGdXZTlc89KoRS\nYbmZgML1/wDrVTm5XptcnP4Vb/eHAZcHPCmq1yG5JGWzVCIGw2BjAqOdn84BSMgdadIyRyBJ\nGxn9KgXf8w6nPFPzAQY2lQ2R3qOZt2VJ+U9SallXdGQvysBk1VmcrtYrn6UwEkJ6oAwxgYqB\nFXJ3Lz3zVsSNwwUAemKjdQ0h3DkjiglkEpXy/lT5OlRhAMYGfYdaccxZbG5fu496jZ32hkba\nw6imIj2MzMdv3fSmR7mk5yW61aVhJwPlc85qBt7SFlO3jHNMYxWEjMpc4JyaVoQylweV5pPM\nBT7oBU806SRbiaMDIUdRjikyiB8ttbb2+9mmZHK7cv1qSZgI9o55yMGm7t2CBgHg0E2sRybN\nowfmJHFNZtvIPGalm8uNflOR09waahRGy5zgZx60x7kW1WkJyRnpmpFjXZ97C+lKuNx3Lhjy\nD6U/cNmcZFAiGMgKDn5T0z61NGn75UB4PLZqIssg5XIH8NLG38RIK9BSKLb2eIWxwM8etV2y\nyjbkcZ6dqv29wDF8/PbFN+ziaTbng/5xSJM4HKkAYzzzT1k2W+CNzDoavTaOFjeWB/ujkNVD\nYyhdwznt3qhDFxsGeCeabNt2sqdMVYW3AkdU+fI69hUF1CysqDoByaAKMY3KDjbjj2pZMKij\nBY5yaQxO8nJANKyl89S3rTAQMZI8kYOe9NKnBA65zmnMA7AE7SBSMdzYXkj1p3AaCVy55A4x\nShhnruPakVmZhuYKKRlYvjt6igBXU+cpfGO4pu0JuYklWOBUjYkK57cVGuG3Lu5H8NMdxGy0\ne3imsziNBsyc80pkDMxHAXiguoZWDFmPGKVwQm0+aMdCeRUbBmD7WPXGKlaE8kNl8fdpu3pg\n4fHIpiFWNVUBSQ2KX5uqsM4wabIoG09WHUg07ad2DjnnikHUQfeA6EjBqI7lUxDkZ+96U5VD\nBjzSbG3KgbA6mgByOZCQH3BRzTdoZSvIPUcVLGAsbg4Bz2pjNtXjhjQMagDKpPK9CKUYXcMb\nh7UM0isPkCoBSr97Kng0gEw2MDj0FHsSSRyKY4dpC+eeg96lST5Su3ce4qhEQVmky3JNG0bm\nAJ+Wnck7xwq9RSbTGwIOQ3NIpCZLMMHk8UjKG+UkfKcdacrBeFX588Uki4ypHz9cUxMRi3y7\nefYUzcY5ANuST92nyFppFCDYoHPvSbSACmM559qQhsmB8pHGepqRWEJLAZXpTfvAqFzzxz3o\nXcsZB4GeTTAYjO2Tny6evzfLtDikyZGGPu9vehUKKdz98jH8qAE5kbvtHVaX5HkDAEdsUNG3\nIV8A8mlRiWGDjAxigBp2hgpJ69BUjcKQMFe1LGxX76jPt1prkRsNvBPY0gI1bLDGQfTHFINy\n7snLk8YpSoyc5DE8c0rKEwX+5nn1BpCuwSRdnTju1MQFWP8Adb1pX7EDv96lbEjKM4+tMYbC\nw2jnHv1qNtwBIGQOq09sNIMEsQOR0pAp8vJG0/3fT3ouAKeAGHuRTVILNgYx3p0WMsf596Fk\n3KwK4PamAibhnAyaJJDuUbfqaSMs2ecH1o3ASFv4cYpAOyqkbW5zTG+aVSo+opFIUDbyM8tU\nsLCR8enGadhiB3IJX71MGCvzqd3cULDLHuVj1bI+lI29kOeu6gRJFMY/kwOmCDT9yspCjAPW\nm7irZIAGMc96jjOFdT35oANnXIJJ6HtTdu1sEZAGSaerCRVGcEVE6vuKhup5NIB8mQu4MNrd\nBijcke0qaeyhmVQclBmmcrkldwPamAuzawQ8bvmyKXJZsZyvc+1M5hKHGW/lStukLHgZoATd\n5YCjJUnI+lIdiyM2doIwPakkDBto5OOtObLABVG7vmgBkcbtg9R2NO2vIrDO1h3qWNipwOnc\n9qa7bmOTtX1pCGFhJ33H06ZpPl27snce3amupbPPyjpt4pdp2qIxhByfWmFhY5BuCuCeakZt\n20hvzqMox/gzzmkDIygBSCpoGKv7wnceAetKdsjb/unpmgHc+cbV9DQyjccMN1Aw285GG9KT\nou8tjHG2gfNuGcU5lRFHcegoCxCyu2CRgehqbJbd/simMwbZyeR60u1ixAbA6UxDRl2CgbVx\nkH3p6LIpCkEHqT7UwZGQTkDimBm3K+7oeff2pATSMI18wLk9PwqNm24OOf7tLkgsT0PIFLh9\nwJGRjmgQi7RyM5J5oUkbsNupyqEXgZHWhl+XpszzQMiB8xhuO4ds8U5pix4yW6H2pZI96qGI\nx7U5FcKGUA8d6BiKXD5c9scU1VDoUTIYHJY0fNsJz83eh33FSQenRaAFZgzEuOccEVGxZMFV\nznoMfrT4V+Vm2ke9Pk8xUUAbm9KVxEflburAt3oUfOCMY6U4R7VIxg5yfakUBo8huQeg70IB\nvXIY5x09aFjjXoct1FDSDbuC4P50bypGNpyPxoYxd2W6FVPBNMkWNlOD93t609iGboRgdPWm\ns5I+QZPpTECOVGVHBHNOWELFgj5uuabwowfmP5U4M+4KAfxp3GPkUHMgOMKBzTDM3DLxgY9q\nSSQOyk/Jg4Bx+lI0YUsmNvf2pALg8njOeTS7G5ZjlvQGiQMysVYKp5IoRg2GHLEY9KBCQ7Rg\nZ98tS7WUmTOST0pDGSjbx06YpNq7l5IGKAHeWrN8y/L/AJ5pAy8qXA9/WhWDNxxjuabtDAtt\n2npigQrKZGChsLjOKWNkbOAVYcc0wKVk2jO7HDGnbX8oMH5zg56n2oGKWf5mGMD1/nSviRm7\nZHT1NNblRzk051KZP3j1+lAakY2rnBII4NSMu/APQDIpFXdGCR87H8KVc7AGGMUAIsalSzHM\nvqfSkk+WRNw+lOVlkycfLTId0aNIz78ZxntTYCLIm5gMnnFRn3Y8GpFVpl2hQued1KiiMllX\nd2JY5waQbkcamPLFcqzfe7CpZmXgAkY/gPf3pGjd2IBwcYIpvP3cZwOvtTJF8wsq4XJzyppZ\nBsYH7nP3aRYzt3Z47ZpWZ3wFHIoKG7v3jfLk45IqRZD0YbjjJPambsAHAJzzSMDHIQehFDFY\nMADAyGPRhTmO2EjZ8w7juaM/uwN2D70AE7GU5wegouISPG3aB7n60/J5Bxk98VG0hjZgPWpV\nfKEk7WxnOKQyJndU5+YdsDmnq20KW5NCqxkXA5I5FLwG2sD+HagNBFZ2znBY8bvSgsJPnznZ\nwRTgURiBlhSLB5alj36LQAjAM3J3buRntTXwFK9j3FPj2tGyleM9aFPykMMnse1Aa3F3RtDt\nZfw70D5uU+Ujpmk2l3DZ+YCh8nYQM46470DBfn3F0BOetKzKoVs4OetC7d3BIDUnCK28bhQA\n4t8rAn5PWmeZ8w2sNh4wKXbkAsu30weKb5Y3Fl6hsYHcUAIy/Lt3EnPHpTyzw4YcgcUAsrFi\nMLnp6UqyEMy7MbTyfegY0ICTh8nPQ1Js2tuGcY6n+VC/KM5+fuKazFnXLHHYUiRpk3q7DK89\n6WHLKcDOORUh+USB8Fm547VEEl4KsAQKYMkVi2WJ3kjpTOH2jkSZ5FEeHTO0/J1poVlk64Yd\nqAHMo6KQADk+1Em18jrhhz60iqu5sna3oe9KyjfkjG7tQK4SeZ0GAq9hSthpAGyUxniiONot\n2QWGKIv3ihuhHAHegBGy7K237vAY1JjOVLZY8mouNxU7ic96kUfvMA9BnNACIvV25A6U5AfM\nLOcDPFRhmYj5Gwe1OYBmxnBz0NBY4x7SxXJB5NK6tuOPuVGzJGx4Y5+XjpSo77yc5Tp+NIlg\nvzcqxDA8jHQd6eqj5kXhDzTUUtlifypyjAzjoaYhChjTCjJ65p2Nqk8bmHSlZsthj16Ypu3d\nwflYHkewoAdt29Txt+7UcfEYBIJznbU0iBl3A4j6+9QKq9TyO1IBzHNvhlOd2Sc06SQrgj7n\noRSFW3Ek5A5FLksrs4yQc0FBtAbk5LU9nZI1DfN700Ky7XA59/6UvllghB3YOQDQFw+XawJ2\nknipHVViXaCo75pjR5+8uG3ZxTgzSyZJyOmO1JCElz5YVBgj+Klf98VU8YGeKFU7nMmNv930\nqSNhGufvnHSqGRsi7sOc7uhHUU4Dqqj5R096Avlr5mSWPYU6ORXB7k9/6UAR7l5Gct157UOx\nMeFbgnmnqMqWk+VR0ApGHyh9v7voBQBMJvtCbc4UcDNKq7Mqz89sVE58uMAjnrinb42XJ5Pp\nQMVY8K28gt2pyhZMOOTnmn/ZY/J9G69ajjby1yBwOSaBCtnfI27C5p3mBQpX5u9JHHI4zhVU\n8hj1p2GPfcw5P0oAEYrlmUlm6H0p64WTeT2x9KAq53xnn+61RtHyr5J5yeKBdSaNQrE5DHuK\nc0iq+V79u1QrmWRkXbzzUyxrDGoIwWNADo40jwzDdJULRv5m9+561LuOcLxkdaZJywLcj0B5\noGMZW27Vb5t33j3qNVL794GOgFOypHy53Zyd1Njb5SS3fpQBHKpWQKoGepqNceYW34GPmVqs\nyN5bbegPU96rHacbkyc/ePp9KBNkbEKxZs7SOCKRgWjCqc1IzmFsGM7DxUc0YRfmbI7KDTEM\n5hzuO4H1ojYdAc+tHOxRtyPQ0jE5GRhc44oAkaQNhF7d/ep45BsUOWPPX1qFcRsGK9DipFJZ\ngRwueDRqSy9DlVdgDyc7T0NWI5kWMseT3Wqkcx5Vzz2pd+2NR1ycUhWLnnI6Y3bX7L/Ss+aT\n9yGbpu2496kklHyALn+9UMyo6kh/kAzx61SC92VryYQsrHlmXbn0rIuJpJWZjg84B9PerMt6\nI8FlLHoCaosC5II5zk1QD3ujJsRsv/tU9lBkVRkL3NJk7XKgOxGPoKYkzqFLHI6YpXGXM5mV\nUII9+9WVkaG0aJvmffkY6CsqRxncMnH8NbMNs9nGty7ho3XhWqgsX9OvilwHwN/867HQb57i\n4VkIznLRk158k8ZGSMbTwRWzpF82JHiRkeM9+/41pEzkj1EXH2ppJWCo4baqd6u2t8bZB8hE\nqnNc5pV+btELPktgkjoK345meT5sMgHB960MTrINVhihjh8sNJIN7NnpT1vhLIEXPlnj6Vx8\neI5t/mkSLztrVhvHt2Riyvu5yB09jUNFXOuj1KO3ARmC8YDetT2OoJPGRn+LFchPJ9oZJCwI\nP8OasJMbJtzMVGM4rPluVdnYG4mjhd4iSQ3I9a9C+Gfx11/4e3sKpO1zYt8jW8zblVSRnAry\nO318Jsjc7VkGA/WrLXEW4SRsX4wSOKwcTWLsj9CfA3xu8O+NoUUXCWd8wX9xI/BJwMAn3rvy\n277vU8Dsf88V+Xun+KpNIv1k3N5a8rtPKn1Fe1/Cr9rbUtAvDYavBLqlk3Idm+dBzzWbgWpn\n2wuOvP5YpFXvXGeBfjB4d8e2pazukjlPSGQ4f649K7gKGUFcFT0Oc5+lZOJuRLlc+lMVvmIq\nUgq2DTNo3H1qLCG7R1xTyQq5HFJ91STxigMJAMDikUG4qoHc+lIrYbng03lc8ZpGYs2CMUwJ\nAPmyaXd83Skx+NKqnBOelJgG7nrxSo24ZPWmjCrkjmlHzLkHBpIBx9+lG7b0FNDDoacwO3PT\nFDKF3fLyMGnBsgCotxJFKMhuD9aQE24dutM3HkGmqdue/vTt3y560wtoGD3NOztwc/Won+bG\nKV25xjNUHQlX7x4pd2FyRzUSk5B9KcSSeTUiHqRs96XngCmEjbQM9c4pDuPz8wyM0pwzZ7Ux\nZBnkUrNleRQMcW9RTN2SD2o5PPQUnfOMCgQ4vxijcSMY6UY3N2pyxnBNADmkwMKKZvPcVIEL\nLnFCrlcgUAMGSwJFP2ntxTtvHPWpFUYpAMVOQSKlVe2aVI2bpyKlEZUMzDagGS3YfU0wHIgk\n5A5pCmGIOBjk5IFcp4y+LXhPwBbM+qazbrtGTHG6lj09+OtfNXxa/byjsbG4TwpYo6FTGtxN\ngsH5GR14+op6hdH17eX1no9lJd6hcRWluiFy0z7RjHbPP6d/xPz18TP21vB3gm1KaZ5mrX27\nARE+Q45zmvhHxj8d/F/jgSf2vq11Ok7BzCsh2qcYwD2HsOOtcDda1c6lKGvseTHwFC4yff1q\n4xFzWPefid+1l4l+LEksa3UumaSowLW2JUfj69TXhWo6x52oSbHLHqff2qnPdNEvmWkqwhhs\n5HNUbbzbf95tB3tkE1rymUpMtX146yRzBHVeuwDgGmpY/a7hSxLO38THhasXjT2Z+ZlkjYZK\n9xVSa8LeWuQqemcGqSJH6o0UgSKCXzSi43DuRVa3kkLxxhtwzlgetSramYboY+OmRUWox3Om\nxQsY1i39XJ5qgRHdQmWV1Em1ex9Khht+u5lbHA+bk1St2nmYOcuNx46CtD+z0k0+5lRGjZRu\nEnv6Cgm5m3WLaTJYlG+6wHT2qFVPnA53e2envVuOWPywQhEpHMb/AM6rx7fLZNp+Y8+n51XQ\nZCcMzxq28Z5Y1Gq7VYgbgMnJ6mrToY7eQ7MZ/iFVm3SKNxJyPvdM0dBCmVRbgrH1PI71WVnb\nKy7lDDO3tVnzD50UaHIxnGOhoWMzZ3Nlfu59aBlRpdiruXfniop1bzjGDhduc9qu/ZYfJLib\nDocGM1Cyq0ZDZLdVx2pBqV4I4Yl3ygs+OFpBsfbvQBuoz6UvzLGGb51B5z2pskwlb7uXx9/+\nlUAfu2lXGZOfu0kjOzvhNm09COlPMkaxgAHdj7w65oeF8CRW4xls0AMmjEiqzfN6r2qOLbJI\nylSBj7tPOWjYKeR81NjVopFcsAzDO7+lAiJpkmmAWP5E71NIuZIwqqSTnj0pI4zKskm3arHB\np8ka2pwp3v069KAK85aO8YLgkcn/AApYprSOGYX8jocZSNBnntUV4yQzj5uSOmeaqv8AK+4c\nA9C1BRCwKqigHLH16VMr+YgQAZ6H2qIsVwudzg5qKBT88qLgYOS3Q0tQHzXkkaiMf6xT8p7U\nzUrmSa6jRF+8v8P6mmgNLgFfnI4bPApY1lSQlRjjBP8AhTAhSN96xZ6859KjunaNTt4Iblqs\nyTJasMnJjHzeuaY8y3W8tFszyoPb3oAhE7yN86bU2/L/AI01myy4O1h2pysZF37twPITFPZm\naSPKhe4+lAEcizTLtAGM8021gaQPuXbhulWWX5T/AHgc0h3rHud9pJ5FADdqfdkfaueKfKwj\nmb5cJgAYP60hjMaeYTuGabIg+UjcqE9xTAerGNSZPmPQU6MbQCeVznNNX/VkspJGeKSRdqx5\nXOTyAaAFZ2+cOMZ5U0/BhEahfM3csKhZiZCgTdjnrxUqEjDkMD0DUgEXYknOYyeMHvSxqPOy\nzcAdPU0jnzmCuQzhsUeWilpMNuHHTil1AWQbmXeODTZHl2gIOnOR2FKGMkOGPft2p2FUApyc\n9aYDJVjfBQsq4yV9aekQ2Z43EZHrimtGY1BYYLHk1b06xRlMsku48qsdK4xtjbtNg43ZGB/h\nX0f+y/8AAK4+JXia2s7OB513D7Q+PujPIB/LmvOvhT8M9U8caxFY2sMkkhcIYUXLN9R2GM1+\nxX7NvwLsvgt4KtYhCp1q4jBnmxyoODt+tcdSp0RrCHU9H8FeDdN+Hfhm20fS4/LtoFwAQM57\n59ec1tFty57dvamu4ySBx2poYsvWsLmwv3vek27cjHFISfl46Uok3E5NIoQKFxT16GmryMdD\nRg8DvmgBVb5uTSbiX9CKWTGc+lI3zMM8fSgBw4Xk/NS96Md+1LkfezxVAI3HNHmBOACaTd5h\nHHFOb5sEYxSATcduTSqR6c005p33+RxRqMVSV60qsF5PNIuWBycClH0pkjsjktS7s89qYxyS\nT09qNxXgiqQxNwU+lSYB6Go+M5NLwevFS9wFOS2RzTvu8mjcOMCmnC8t0oAk3EduKa0nGMUp\n+YDnFIqnqTTAVfmwTxQfm9qT+dOX5lBzQUgbOMHrQcbgcZApPTNObkcUwE96c3bIpq+/FDsT\nj0pgGBRTdtFAGYv3jUm4nnpQMc9qSPHQ1iAbh24NI52cd6D8vv8AShmLY+X8aYDF3N0OKerN\ntORz0oU7SR1FJTAcy7cUm4ryTSfMEx7019zHgZFSMduGc9qe2VXJ4pu3dGARjFRzMzMcfdFW\nl1ESs27BBzTC3eooGbn5ePWpkAxk9anqAv3lo4A6c0GjjsadhAvzdxQo25yeKjEg6gVIchSR\n+tIBS3zYFJ5e49RSLlgMmjbjk0xCYBxinKgZsnnFCqCaN2MjvSAbw5JpWXbzR1XIpN2enSqA\nU/Lgjp70i+mc5pQw24IpY8LyRU9R6Auc8DpRtOScYoXgZz3pJJOMDrTEO6rjNNHTntTEkOcH\npT+enagTCSRuNpxSpkqdxoGGGD1pxXauO9AxOVXHrSthscc0u09SKbt6E0gFbjnrS7gzH0pF\nA8zb2pyrtzxT1H0GHvikjY7iMYFPxnpR6k0xAy9AO9Kyqq7Rz601ATz+QpemTQAdBnPNNbpy\nMil2nvQvzZPalcBVwOM0gXarD8aOF7UI23r3oAbuHB609V+b2NI+F+tAUkZziiwAzBV4o5bH\nFH8IGOTSAmmSDfKPWjcI155Jp4YMM0yPr6mgYofpml3UjgdD/wDXoGec0gLEbbNrHoPSvnb9\no7TfK1gSKRmZQa+ho5NvFeP/ALQmliaxsrojnJXP4cUyWfG+vx7rh1zkrxXL3Sqh+cYNdn4k\ntSt9Kp+U7q5DUbZC5bOO3NUBm3c+LdlU/K3BFYEp2qDjC5wSa3blhD8hXGKy540kikOOe1Wi\nGZ33mcdR0DVlXWFON2ecVdnZlkCNxkYz71m3XytuwSRxj39a0M2QyMs0mAN2zjFV2m3ZOCNv\nNTyhlDdCStUP+Wflq31GKBEbZkkL54IzVNmJUo3yjOc1dmdU2xgYOMk1UdvMwOzcUwKskazY\n3DvULwOrFo/mb0q7NG2wHcCQcVWuFe3kBA4781VySkWBbduOwnlgO/pSbjIxUqAF6c0+4+9u\nSNlHrmmN8sYygLdaYxsjE4bGMcVFN80mVP8A9anxoFYq3Ab5hSNGd4b7pB60xFdxvAIbK459\nM1XkTYqsnO44NWF+aU7TlOajZRHGRnHPSgBZFCp833V5zTFaKTbycH+EiiZS9v5YbjvTNiqy\nqpJdVycjigQkkIXAJ79aj2ttwDkk1LIyMVyvHUikVjsZwCDnimMiaNUhB4+XuaijHmf7Kqdx\nqa4QBBk5VuSPeo9gYkDOcdBTAbtypYgYzkD1prFcgEfLnJP9Kl27SgByG4Ge1QruG8ZBCtjH\nrS6gPmlx8u/CjpUe2SSMAcAnvTdokk5GD2+tWNxaIFjtIPJpgR3EYUbwdpxjio1jHlj86lnK\ntGFxnJ600IJo2x93oCKXmBIk25ArDB7EVNuaIcEhcZ3A1VhZ1ABRfl/izVk5eMsAQ3U/SjqM\nWO+eHdHk7GGcnoasw3MMtuGCAvnHPaqRjLyqhA2sODTY1MG9Txt4z60xD2Yw79hxk1C5Yxtu\nzuHPtU0jZVSBiT+lQM0scYA+6wzmgZWmKsuQuDjk1W+baNlTtloWGRURXLccLigRH8o3E/Mc\nUscisTjnsKfGqQp5m3kdV9qNqtK7BdgJyBTQDZsSEBvTtSSK25FyACMcUrbZFbaTuzTWO5gH\nyrHgEVQgf5mUgcKcYpWYFtwjwxHWk3eVxuyOh96XlVVs4jHAoFYjwqh+cdyaczBo0YqA2M4o\ndfbLdeBUfEkYHQ571PUepJD8xJK/U0ifxYPFKwLfODsHTHrSqD5ZJO05/OmBEqjaDjLZ/SnM\nqxOGB3A9h2pVlOOgOeBmkZQGAB25GOO9ILCblySflHpikb92wKnNKzFpAHAyowRS7Rlix6dK\noBnDFsKcdTTWUvgfd708dOvXg0Nl2CBe3JpDGyfv8OCcDhgaPMG7CLxjBpfLDbQDtYjOPWkb\nLIFXg+1IBpYqeRx0xTpCy4KttHanNGWG/rxg0xlbbhh1p3EIzDHt39zTVO1vm9Mj0qRlX5Qv\nzLjn61G7K0e88EHGKYxCWLcnb3qSNiygkjIPWlkjKxqSDj+8abtIIH3u/HegBzMoQ4ODmosq\nHwO9DMQx+Xj1NRtGFlHJJ60CHjCsX3EDpilVTywYY96FURuwLctzTDGVUuDkdMUAPX94dpbk\ncg0yOROqgkZ5+tO3FMEjtjilC/NwNqkc/WgAIbkgd80bixHGFPU0gYqw3fexigfKuc49TQMc\n7fNmNDt6ZNN3Y5HLe9NWTzPug496eGxksAU9KBDXZVYuw5xgeme1EcY2LubJ6k+9PkxJhQcJ\njOKieFpmXawwOaABmZmypymeacCsjhiRkUeYNue3ovrTdo4BQrnvSsApzu5GR6Ck3EA5/L1p\nVHy7w3GcChpMt93gHk0xEZX5QQSGP3aMZ5J+boakkkI+fHPYU3IbaxXnPNIYqhUTLDOTQ0n8\nITPfPakOGmOeFAzQkm1iVyRjpTHYOFb5cbe6ijhV9jzRGoZwRndjJoaRmbaBhetIBrA79pJz\njOTTt+75g3HtTPl8tuuT3o8vy489CR0oAcylVAJDnNOkYFSFwrYwTTdqqEH8ff0oY+WxIXOe\nBRqINvQqR0596MqwyD9aRsn5tuCOopWVHfYy7B2NABhWjVk+8RSSKW2kH259aU7lUbeFUcYp\nQpfazHCdSaAGrjcSTx3NJ/B1z9KCqncqngnIzQ7FRs3Aj2oARW2Id4yW/SneUY145ZuAaTAR\nQDySaZ/E25yyjsKGMOfusDhT0FODLI52g8deOlOHzISv3fU8U3zDGqgcjrjGKYgXO1nK89MV\nGWPl/wBakLHJy2FPX1qIIWYsCCmMlaAJFYj3GKRtzKCo5z09qFkGwKM7j/KnNgMu0hiO4pdR\nhJt+Ubst3wKZsWJs4yRS7iNxUilYHqR82Pu0D0EBTGWPI5pisp+ZQzDNSxQjB3cFh+VMbK4U\nDBHTFMQrMi7c8he4pRt3ZxkmmqAse08knJpJMj7p2j1oEPUrK24ZGOCKjKp5nzAhPU0+Qoyq\nOQcckd6N2I8D5if0oAbuVyXHBzgr/Wl3BGIUElvWlZV2o6gKvQ4pixn5nVuc0ASb9hXDDHQg\nUxiXjZWbe2c/hSMpVs4G1uvtTpNzYO35eme9ADWUeWrJ8oPBpsmVYDc3TAx0+tOVV3jJwo7U\nFVZ/vbSOf/rUDG7SzYH3P7x70rMMgqCAODUkeeWIypGAPSowzeWcIW5waROo6PG47smLHSm5\nLLgMR6Uu4/dPB74pGXpubleRj0oGAZPLGRnHp1pWCocFR83WlbCDd0XrSKw43DJ9abATanIA\nIXp9KTgLlcEj+lLtUZAJBJ/WgxDeCBtOecd6QEbtu2EHBY4+lSSJ/pBC8AcbhQmAHJ9eR6U3\nYPvZNMAZwpxt47k00MFlAViF/vHmlEayfK2efShY1++Bt25HNMY4tuJEY3453UmcruIO2lVi\nisQvLDtSLjbySCB0PekLUH27sg5GOlK3zFVK/ulPShF6BkpN581hhlI9e9ACxqwZiWzk8ZpN\nzjcMbT3prMzN975ew96TJaQAgsf4j6UDJV3bcHGD3oeReuPmUYC+tNmjM3ylsAcihccgqfXd\nS6iD51Rc/eJz9KCp8zHUfyoDFmCnIHUUbv3hY8KvemAbTt8sAjnJb1oRWj3HqrDAFLuZlb5v\nmxnFCKj7drHfjJWgOgiqPLCAkkHkUrful4OcD86DujZj0wcEHvSLndnaQvoelMSEK52N/q+9\nPj2BHOzLdz2pjMQ29lBPQU5f3al3zn2pDGHClXU/KR19DTlBVmHt8zHvmjH8XY9KVQZMbhj3\nbvQIafmhVVOGzj3xRDtjZgB2796CvzfMMMDxTS+5iuCHoGHmbnHBC07+JiDwOlI0oWMcZPSl\n2spGOQR070xCbV2gnqaJNoVepxSsVAwB7Hmk3Zxnk9BQMJo+eCDnnFDhVJKLt45pZCerNgDg\nfWo1jds4PP8AFmkA4YWEMoDNnqetKZTtJ3cn2pdzI4wnbOKGZW5HIoAMErhASepNSZHnNjOB\nwc0ikmHLrtH94U0R5YPngnnvQFgbbllTj39KQ8bTy9LtDK6fcYHOfWkWMK5Ib5elAC7nwxA3\n9+KGcNGMoRH3HenR5jPLZHtTNzRvgHI9KCbjshegx/CKVg0SsF6qcGhmWPlxye3p70xmLSMM\nMMjOSaBodt3OgB289DSYxM5xlB1ob92Bu+ct0GOKFxHuLj5SMbVPNAxu4tEQR9D7VIrbvmC/\nIy/eHrUbYEACLyO5p+X+ygZ+RaBCruLZzwB6UxmwwUggDk8UpUKq/MQGHNOEg8vaDlum496B\niFTtYjlm4FCsF+VhkrxuP+FN2/Iys2cdxShjtG87uOvegQo4Zmxu3dM0MxbaoG0jvSSbWj3k\nEEd6fGXZWbjaB+dABtZWwBwBkkUMxK/P1xnjrSbxtBBYd8U4fvYyQMN/KkwBlMkYJAwBndSL\n8zK+OOn1odjGmwelJF8zAD5uMYNAWJAxV+PnOeFWmsBwwT5y35GhleFsFsH2qJ9zYQHJzmjU\nB+7qc5LHinlfLXJTGepqORv3h3D5qVWLbsjtjnpTGK8zsu5Tz/CPWnbtg3MM7uCPemLtCgH6\nAe9OjLfdwG28ml1Aaqs3ygcA1I3lx4U4wOSBTZssAVyFpjNlFGN6MMFR/jQA+LETsB93dTpG\nVW3MuCPypGhCkgn92D8vNIu3cS2WHpTAPOORucGM9aXdxuPK5+9/SmqqfdKEE9BUskfaM/Mo\n5Wgki27lIddoJ4UVKiBY8hMbR0amwyGNQ8q/L0A96lyW3fLu45oCwxl/c/3WbtmlWRXUDuRx\njnmjq6nhjjkUv3WAUDIHSgoaoO0Nv+cDkHintISQzZC9elNXywwKIQ3QselL5jRoGH3M455o\nExwOxvmO5WHAojk2sF24zTGjYJkDc+cjmnbtuWdcHvSGN+SJpFHTqD6GnNGWZfLGZO/vTWXc\ng4yGNS8eYxwdw49qYDpMOoC8Eds96iUtIpLLhv71PKhkyo2HvikVizEL8oPGDQA5E8vPmNkY\nyDR8rKCpJJ/KnmMf6stuOOlCxhmIjG0Yx70AKGR+XBfHbFOUpuZ+Gfbwp6Cj7owPxz3plvGz\nb96/J3pAM+dVXc+HxyoqQMGYFkPTp60Rqj5OcEcfWnlzJhe4GKAGxusn7zkc42+lSdcMBhuw\noiURRj5cnPFOkAaZWcYA4wKYC+Wy5ZvlOO3anLGWU87sLxmneZ1BHPb2pG+XdtOB6mlqA2Mb\ndrKuT3xTpmEm0BPmzmpvsrLlicFRkhelVGO6QbecfNz7U0BOq+YCqHcVGcGo2UKzFztJGKfC\n+4vJJ8pI52mmSYbaMZJ+br2pDuN6rxwBxUSwn5vl5AqZY2jyw5B79qbNnhh0oI1GtiZVbbgY\nwXqopYNuJ2npmp8ssjLJ1AyPeomK7tx6d6YXI2fLbSx45PpULK3L446/N1xU537TuP0xUPzO\npZjh8dD2pgIJDt6gdw1KpZmyfu0Rqu9VK5zwM0bT86rztPIoEPHLsGPOOBTlA+UAlD1bPSop\nOv3SQoyT6VJ5mVU5yOopiJ42KyHIyf6USS7/AJcEAc7veokZpPmIKNnkmiSTfkK2CKBj2kkZ\nVVTjjl6hlmjSMxj7gXlu5NJHlpAU+bFR6grfMEAKdWpisZ8ybtzr0U4IqONTGxR/4hnd6U54\n3+VwcJnlTUDMHyzk88fh6Uhi7iruYX3kcEdsetSIpmGVHyg4yKjUudzJxGeo70+OSaRTKmAw\n7e1MBDNJbsRGuGbjca04yuoWoJJ85P4SeOKy3ZpZCTk1LDM0T5f/AFZ4LCgLlxZRAhjcEEt+\nNbVjqEKhvs6sEx8wY96x5tPmmkjlgQ3EJ5yh6fWlVlj3KA0L5wVNWiXqdbpmrS29xE0R2qf4\nK7vSrpbm3M0jlRnG015hZusP2cMG80nI9xXZaDN9otZJVBBVseWTk/lWqkYtW1Oq27W8xwMn\nofWr1rNG0geU8jo2OKxkV5o1bB8xeqjrirHPmKhOxDz/APWp7kXN2O43SbQquD0YVLc5GC/7\nzisu3k87BH7rZ1WtJbhVizLuz1GKRVxMqcMchVHyr6VoaffYmVJFyu3gd6y/tCGQFXGSMEEc\nVK2IWEm/LAdR6VDj1GmdBchJowsIBY8lsVShzHcMEO2TpnpVS31Ce3bar+YrcjFToyz3H7wY\nUjO7Pes7Fmxo/ia50F2YtI+DnzIXKsDXvvw3/a8v/D32S0v4mvbNeJlkPznpyDnsO3vXzRH5\nlvJscbw549f881EzG1uhlsPkHPrWUlc1jLQ/Srwj8fvB/i6xMiXq2k27HlS9uAc5/Gu5tZ4r\n1RLbTJKhGVZDnP8AhX5Y6d44udGvnjG2eN/ldVGOP8a7/wAN/GXWPD+LvTNZmtgfk8hnJJHp\ng1DjoWpH6LyKWTaw5zzg01FP0r5a8G/tiXf2eCHWdL/drw1wp6j6V6rZ/tMeBr60dlvvIudm\n7y5ODnHQ/j3rLlLuj1PyueaYygnkVzfhf4kaN4jtVIv7eOUruK+YCuOO+a6S3vIb+PfA6umS\nNwIwfoaOUd0Cr74FOxx1pN8ZZkB5B4pzxllzzUNFcwjYbFHHYU5Y8cdxTtpPtUARbe9K+fwq\nRo88UnHRqdgI8/lR97IPFSLGu3Gak8kbfelYCBcqMUr5ZeKfsCnGd1HlnrSAbwFpA1PVexHF\nO8vnpxQIi/rTu3rUgUNxjmlWM9KCiLaWJ5wKXkpzUwh3d+Kd5PymgZDt6fSnL8xwakCHuKf9\nnLENggfSqAYsQ2k0ixlhjNTKu1TnBpPLb+Hr3HoKLC2GLHuPpTxCezcCmNewwyRxyyRxl+m5\nwM9v51zPjX4o+H/BulvcTanamXONiyqWHYnGeSPSjlA60wlkIzimhdmAD97gc96+fdc/bQ8G\naDBKsbXGoTjhWVeCffGcV4140/bs1a7mDaTaxWIOUEkxPTIPHoTirUQbR91yRiKEyMQi4yG6\njpmuR8TfFzwd4Lt2k1fW7eJ14MaMGfpnpmvzl8VftReNvFztDc65cRCVcII22AKT04/nXlXi\nLUNRnvkNzJM8zjO6Vi273yarkSJufeviz9vXQrFpLbw/ZefIpIF1MfkHoRjqenFfPvxC/ay8\nX+No7i3GszRW8jYFrbrsz/tE9uhrwGFvJJbYCoHLY71FeSPbsssLlWxnKgcZp8thXN3VvFV1\nfQtbTXzTCZgz+aST9K5mSR4ZWheQt5fKx9RVScNPIz78yNyWpVnlwuB06+tVYktafIiyM7El\nwNyjHep1sn1RjPdkWsR9Twfp71VklZYzINqMg3Cq0t1JeKksrl2Azt7flVJCLF/c2KQbLcNI\n394jFZTySXDbi+GByE7YqaS6RbURhAA3emNDmFcZ3L29qYixcSNJbIXyrYzyaozTzScIg29y\nf51YuGEkYRCSq8c1Umj8yFSiMAWwTmgZZhZ2UR+cwTqEU9TUn2WeeMRyOzDqUY9B9adaRBFZ\nFK71GRnvVVJ5QpMr5Bbnmgku2+l2zSIXvFtY0+b94c5p+ozWqtJE05O35g0Y4I+lZck6XCyl\n/ujgMw/IVBGxkQmQ5dhgEfyqgC6uFjmHlFWEnI7ke1Q/vIVJJxn1HFIwht4412bNxyC1OuG2\n/IwJDccUAEjbVBBDhh3NV5PO4LkKq9KdJCLdcEHj7vvUUymSQEPtb07CkIRpQkbBiODj3pHw\n0aqz7i3pxWlYvZw6fdS3SrO+7CketZvmtGxEWOefXFIorTKEVUaNt3UmpGZWZlIIj+6NtTGa\nTaN43Me/tULMV+Y/ckG4CqAgmy0fl4AVeOtRq5mUqsfK8ZHTNPkmRUJYjceAtOVx5JA+VMc4\n6k0wIGxuyo2hT89NlaRpGwx2Z54qRo2++F2qRkg1F5Yk2sSyntz1oExYWMjOgXJA4I70i/vE\nGRiYdB2FTxxosbMj/MetNuNtrCsgHmN3HagkqNNLE5ZeWb5StM8l1k2H5SfmIzTkmSNhO4LK\nx+72ps9x9olLxhlToC3WgaQm1FmZtv7zHJbk1QdXuhvY8Zya0jaxzFGEwBHBaqExVJmRWzGv\nVqXUsrbo49xLbsmnySbrf72fYelNtYw1wQ4wDyB2pZU3geWdu1+cdxTEQi4STZjOMflTpJHZ\nUVidind9aSNysiYjwjnAOODU0mI4TL5TNjqvagAXy8lpwDM3IJ9KhmUSGUs2BnAH+FPaZHkZ\nnXLAfLSTMQqF1GXbjjpQAkUpWNmZcnopoVlZA7jcS2P90USKEZs/eHbsfeliX5ZJB68elA7C\nMyqxOf3ZOCaVo1wAw3OT8ue9EytmIq6pGfvZGeabNGnmddzMfWgB2SBsx1ODjtUsjbV8tFMs\nY43E85qHb5ceWk2jOOnOac0jwn90+Vbg8c/WgbG7S0bl23MOmOKcvnLPiMDaoGWPrRJGNrAN\nlxxSx/K7ZOEI2ke9BIm1mZjjcd3I6VIzOIyiHJz3qIqzBsAhsYBFExXyQrKdqjLt70DFfPmx\nuRwON3vRt3kKHKvuyx7Yo8o+Wu2Rih+ZRU0YRVklZiGC/dx2oQiPzi0chChXB496fGvmKXC4\nX196SOREYKqkoxz+FTIxbzEjXhjnb2o0GNiUtDlwSAc/N3+ldN4W8My+INSt4IoWYu4GVHTP\nAP1z/WqWh6MdQuIdvmANJtCLnLt2AH1r9EP2K/2XhqWoJ4i1uPOmW7iSJHXPmHAwPqDXPUnp\nZFxjc9m/Yx/ZptfAPh+38Q6xZq2qSLiFJE5x/eb619TyShmLLzxgfSm/JHCI4wI0T5Qq9Bjt\nUMkm3aO/SuOx0oUN8wBpCxZvu4pm/Puc0/cSTnpRYYvLd6VV7Y5ph68Uq5I9DRYY7HcGlB6d\nqbhl6nik5Peiwh+Plwec0u0/4U1idue1KD8g55oANxC4602ZeAAcCnIDuNI3rQA5QfL2qc0o\nYAjtSHK854pv3jx0ouBJgs2e1Bb+764pq855IFIVzyOOelMB3LZHpTs7qjywPSnjI5NAh6/d\nbNN3NxkZNG4+nFOXPB6CqQxMrjnrSk5NG0MM0fdINSgF6KCaBICvTIzSbc8dqG/uimMfu6nG\naFJ28U1vQGhflb2p2AHzilH3Rzil4bNGM7aBjh82M0DlvQCmsSvbigSZjyRTAcpGD3NLyO2a\nZu2kcdafnHJpdQD5vail+X1NFMDNbAYCkbDHpSFvandAPfrWQw2lk3AcVG8h6DkfSpAx2kD8\nqjYnaSPyoAQZHalVsE56U2Fg0RbaeuKUDcc9qAHDlTzTVYrx3pWUKfl6UifTrQhCnrnPFLj0\npeF69BTeW57VQCghVo8zPQZpp+7weTTtqovXJqQG/Nt6Uki7eRyaBkfSnr8rc9aYDNp28HHt\nT9vy8nikVeSfWl+6MHpSARWXaTjil3D8KbuB4A4psaYJ54piHM4JGOKe3zcrik+Xpim4245x\nUgLkAil69BRjIyBSMcDiqQhc54pXQZ5NGAwGeKTjsc0dQE24GR+tHHmDPSht3l8UqEYpMCPa\neRT8nHTmh8Nxnmk9ARTQDkIxmn43c+lBxt4FJuIqgEZiOpp27GB/DTGHGTSFSwFSBLuHpz7U\nrNj6VGc7sDpQSe9AC7sEYFMY568U9eFpPlJ6c0gDB6g80c7s4pS3y56UKwK5FMBcZHBpFB28\nCgZU5PSmqzLnBpAOZh3NDKWGQaaFO08U9U2/Wn1Aaw70vHY0cbaaq981QDmbkflSr3pvH40M\n4HHakIXoMd6aPkIOeaGJPNDfMvSkxCDLPuPWlZtvXNDH5hjpQ6nIBqhEiN0Nef8Ax0sTdeEI\n5VPzRyfd/Cu/B3EDbzXL/Ey3N14O1FSM7VDj8OtAHxB4vs2+1SSk59q8+vA3zEncOten+MIy\ninjBABArzrUcxqAqZBqkBztxiaZd3UnFVrtjDGY2XIb0q1fWzLIXHbnrVKTM0O5ztK1ZLMO7\njMe0EblPOaoTMrSYLbRnmta5VWQqzlVHINY7KDMQy/Oen0qjNldozG28EGPPFVZCY1ZsBQT/\nABVZu7WVZMlsIO1QzNuhMbjep/ioJKE++43DjAH3aqvEE4XgN0NXJozGMrkOvQVS8tlVjI3z\nsasTGTQyKF2tuK9feqEzGVxuJTJwQa00ztzk7l4IH86qTsskzgj5gOvrQSV5F3knOYlqNlMn\n8WT1FSq5VsNjbjkUiK+4sAFAFWMrtuKqdnU4pC25iMHaODUkaiSN2349jULY2snTA5PrQBWk\nUq+xdqr1z3pu5lyoHncdakkiJkBDYGOB60jbRgfdQ8fjTAjxtI2j5f7xpu0mMqRls53H+VSr\n+838Y8v+KmSMJGBU4OMkUtgK43PuRWHHNISwVT68YHSkEatcEjKj3p7RqrBSeM561QDXXaoW\nQAtnjFRrIqxnd1P3TU7SCRW2/eHrVdmDEZ6elAB80v7wfMe9QbW+YKeOpqypKMCPlHpRtVnb\nHyofzqRN9EQQZ4ywI74qw7DyygwBnJ96jWNUAIFDyfL93GT1qh6kcnzEY4x/FTiwVTt++Rgr\n/Wm8beOBnBNMYkkbR3wDQBYjyrAswYYxjFWrGMsMEY3epqlAxDHH0Na1lnacHbx361IFe4s3\nVC3XHSq0kI2oGOSeetX7y8SONY87nPUVmrtZiGY7AeAe1MB0iHyyw+bB6imTAeWJG+VVXAWp\n12lRhsn17VHcZZFWTAYc4piM7ywRkDBPrTWU8Rtgc9qkkkDsVJGPeoc+Vx1/GgB65LMoORjG\nKay7V+UbiTRudYxgAMzDP0pJoxG2QOAexpiFX7rKTjHNRSnywoYbiTxinsdpHcNScx44Bx1z\nQUMKiNX3LuBOBUqiMKSGyAMBTTFjBLA8s3I9KYgCtnG1h2oAleZlYHPygelRTLubbt685pUZ\nnUiRcHOfenNkqqnkdd1ADd3ygMMgUiTfxDO7pzT1j+bpSMu08HiqAj+bdwMnvSyLtwT1pcpu\nPHPWldv9oAYzUgR7QshyN4Yfep/DEnGCKXuh/gpFynmLj1xmi/YBqtuZT1T+VRt5sbHnknP4\nVLGvyEbccfrUezzMv82FHNO4xMDcGzk44xQGAUvyB606FQqgH73UZ9PSjcF3E/dIxj0pgyMo\ndw+clSM0qfNHu3ZJ4we1OBG0KemOD60nKsCcFe9BIm75QoTcO+KQ7UB2j5OvNP8AMDELwBTb\nhGb5VHvuoGJ8/Ukkdlo+bcGzg+38qYysyqVb5vSn/fUkts54X3oEMYngbS27rRx0zntT/M8o\n7gOehpGjPSMYJI/HNIBoKEYb73TNIn+s6nHpTmVvMCsVDKfmAHWjdukyBxnAHtTAYrMO+OeK\ndjk4bDHvRJ8u5g25c8Gk8x12vxQArJlh2UfrQW2FvfofSjzF+Ytnnp9aRRyobj1HrQAbtrYy\nppcDBYAbj27VF8q7jtzzUqszMS+FGOKTAXd+75OOKQfcG0dOtJn5AAcnPSndZMONopgR/wAQ\n2rn2pY2ZRsI4bksf5UHDMV3EHqFNI5O3JG1/WgBoB5TOO/tSrN07gHn3o25bc3BxzSZCqoxy\nDQA9WPzOeG6haFVn287cjnNNfLx9dr5p/E3D9RwP8aBjT+8U4GOcFqbjzMEHbjjmnbVXCDKp\n3J9aQqOCcnB7UgGr5vLfd5xzQrDO0ck9aPMIj5J4OR60rLuO4naTzimIGXOB05pM7ZCcbz05\np24bjzv+lIrDacff9KXUA2tvJznjtRuO7YmMgZyaVZPJ+bnngj0pGOBlTyeB70wB2Zl46njG\nKVtuGYkFvSmvIVTIGDjBpE+8AB1GRQA5FKMeeaa0JZgPMz6inMzcux+vtTX2eYPRhUjEbA+Y\nD5xxtNLlVIGDuPUU6T+7jkUA/LuHXpgdaZQxFCo45Lg5BpnmOCqlMhjzgU7blupHrSxblkGe\no55pkMGVQzAndQPmXbtwOooXhC3dj0pzbdo5O7oVoAi24bLcUnmBW5YirDfeTIDBaqqpyxxk\nMehoEiVVMkZkBUKeMHrSRsI5ORubuKTCtkY2/jScMrfLn1PrSGSKitkjlc5zTPmKsQcknvT4\n8eXlhwPzpJGV/uLsHXnvQMbIznYMfL0yKf5il/lbOOoxTY1IbOcKeaarhVcHqT6UAxxZOTk5\npscn3Vb1zmnqxW3ORgUi4WUPjcMYxTAWQAMzBvmPqKarfMATlvQUrOzkgjJ7U3zNsmWOFPBw\nORQAowoxngnp6UBdqlgflHFL94fN93Py0FgPlA5HWgBPMwoJXc/pUm/c2X44+76UxpVfCqPm\n/vUDeQWA471SQhkkatKcA4xk0cMu8DIPGe31p+QzZVscYP0pFkJUop/d9MVJY3ncefujoO/v\nSljJ8qjHGeKNyqRxyo4NIxZcY4d+npQSIq7ZFZRz3FAL+Y+MMOv/ANanndHgng9xSfLzg4zQ\nBHHvnO4jC9h2pzMyqWdMY7U4qd4x8qZxtz1pWkO9iV3KBgKaAGHOVJXg8D605yVZo3GD7dqR\nGdVABwTyKSNSJCWO09T9aADzGjGFAK5wTTmcbApO3nrTeFUZGSx596X1VhmT+FaAEZSCCBgd\nc02TYVGDgE80pwzbmOOMEe9M/wBWmfvgnHTpQIduIcHOR0FKFZkICkNnOaRZCsZyNp3cAelC\nSEtIWyTj5cdKAEb94uGbkdGH8qeTiNGAxxjJpiN+7Ujgj+dKoZgTncRQA5WDJhlwV701QWUs\nGAboDTuVVhjd7U3oAE6UAJ1wXOAB96nHOzOdobtRuKyYVcmiT5WGRjK9M0DE+UjknNOYq0Q5\nyM9KYxDMAjfLjrSsGGCgyucHNIlgZCrZHA706Ty2jLKu3PHBpJN7OQGGKZ5Q3cNjHJFMQ6Ri\nqFR0brmkV9q4JO1R0okYs3GGGM8U5t2FULgH71BQ0bZF3bSD1Ap5YOq4bK9xSSSLuK52lf5U\n35G+4Cu4dM0ADMPvEEDoBTplAXJOQDxTWYSYGCNtSSyDcRjAzkUAyP5iQ7jYegzStu+bAy3U\n0Moc/M27nOKPmcNg7R03U+ghgwnO3mmNvkkJA5AqdI3ePzMcLxTcPuyOePypAxDlXPAK4x0p\nPLVACchhTv4mVm3fxHHakVWVlZm3Z6UAg2qdpx8x6H0NAX92yNnGefrSeaVl+b5ueKd5g2sW\nGBnA+tIY0cRFck4OaVlDEhTuOM/WhtykqU5J27qVlCYwN3VaABNy72J2/LRDhF+V/LJ6MaMZ\njK9hxTWwQmVO4cVVhD1/jyPmYcChWJRQy7RmmrmRsJlf6VI2TtBIPvRYQ1UO5wuD3FODq7KS\nmcdcUzai7nXOe9EJKfOOAODSGIzLJvy27PtT8NIUVeQFx9KRpRu5Hl+1OX/Vl1kGcYOKAGq5\nK4+9tP50M/yE9DnAWnLhBlOV6YprLncpGMc0AIXdmKlfl74pwkU8BsY9egpuxlbzFPP92l27\nSCq7j1xQIVQf4cEHq1KsZaVR0HWo2RmGM7SxyaexMRIznA4NBQnmfMU24BPJpATu2EcKM7qX\ncWUuVxzjik55I6dMmgBdxWFgVyT8wzUjNtUDGCRSeYy/fx6KMUm0jO5tzdNtIBzAfLn6Ch2L\nfefHbikZvkztxjrmmqq7wCu5TTEhWV/uscn1pB8qjHPOPSpI2RFZh95emaJNxUfLuO7PFBQx\nY1K8uGbP3R2p2/5tqJ8x4NIWTcBGMOeW+tMbKyHDDJ5oEPUdm6DrRwY2DLtKHnnNO3DAVQCe\nppouFiY5+ZwCGalqAu1t4wVy/RO9ChUZ1dSrHj/69J8oaNlYFev0oZTuYfeOc5/pTAWNdzrg\nkKOBmnNGArIeWpuGEY44pdzKY8ckjLUAN2+ZGM5Kg8tTmZdqjkcZB6VJ5m1yhHye1Rtt3Dgg\ngYw3ekA7b+9DFxgDvSfezsx5n86Qryu4YGacufMJIGB3WmIaCWdVPBB/KpM5kY5wvf3pFTzI\ntw6Z49aUYClgQNvJzQIbg7iqcKe9LuDtheHUYyaJGMihlOFb+KhG2t8vI9aWox6fc+ZsigYx\ntOAvTJobYVClsDNDLufaeFUcUwBs7thzxyGpI42aM7gCxP3c9aduG5CfnDcEinTfdXIyc4BW\nkAI5XIC5PTHpSbiwKZ3EHkdM/jSn92A2c9uKZLsXDFSD29KBjjjdwCBjjHTNLIsjKFyAepx1\npsgbcny7t38X/wBanMu2cAnHbFMBAzqxXO1uoNSfPH/vkfepP9WrKxBfPFOhw7f7SjNAhfmC\nqdpZh1ZjQ275eTnPanKrNuZh8pGeKRFaOJdnOTkmkMdJb5RiTxntQvGSyt0xnFCrk/ewGNSL\nIu7acnb0PrQARkhfl2sOOppd5k3HIODwO9MLGRiVHuafE4UBuMHo1Amh7M25SwwfSkG1pGjc\nZwN3+FPhmLyc7XKjg9acVC72U4PrSATe+4sGwHHNDZ5GNppiSHcH6/7VLw0n38CqHqCqrNsI\n+WlkYQP93g/LmnLEvmEMwwvzH1pvySYVzjJ3LSYtSNHaNFAb3K0jSGWQdkx29alOB8qj5veo\neOCgOWPK0CIpc+YMjcKjaOJvlZgF7CpGXzJCqPgEZqHf5OwBQ3HJNMBkkZC8uMKflqCTdtL4\n56E1ambzBhyBnpjtVcsW6nKimAkjD5SGwQOoFDyCSRJRlHH3gO9IWdVxsweuajiz8zsNvotM\nRIGPLcd/lzRuVV3FcCmKMEyEZ9qkdQyqzfL6jtTELIwkt96v8/pTOvQ4YjNLvDfN909MU2R2\nZVC8Me/rQAkTfI5yVI4PFOb9xExaIuWX5cnj86t6HdQabqZuriIXIReYzzWVqV6bi8nZBsjZ\n8rH2AoGQXNo8ccMjyBWbgKpqs0iIFRhhV5albdIxfPC89eKEjLN90MW9elIBvlrb5c7trc/Q\nUseEjJXJ3dPpQ8jSKcDdt7HvT96jAUFQy9PSlcY3bIsh2HjGaAp8sqFLqOTTkQ7sEblx0zTE\nSWFPLJyuc7R29qoCazu5bbyxbsypIRnkiup1DRDeWrXsNxGxhQF1HU1yLs6oCfoMdqt2Uk1j\niUpIsLcEscBqZJt6bJd3n+qRZRbqMknBre0zUjpupKZWaKF4yzYGQW7CuUXVpo/MMbrCrABt\nvetC31ZhCY7gZCjKn0qkRJHe6LrbLcYDjYeTzkitUT/bJfkJUZ4JHWvPbG4l2LJEwRc5LGuq\n0S9Y258wkvuyre1aowaOqWURw42EqpqeTUPNKIOp/iqnY3iyHyi/3xx9as2diZrpoW4kRSVX\n1oAnWKKbcM8dD/jU9nC0MRYP5m08L3qGGN2Uvgrt4ZSKfb4N0GOVA/LFIY8XRuJJnWMx7eOP\nWrETBmjkjf8Ad5wyn1qH7QJZWxhEY4DD1qGRtq7BkhmyGApajOghaOZmLkEjgD0pstrB5jTz\nOTIDtX2PasZZvsjBzlm74q8dVikwzRcNwR2+tQy0UVKpLNLcR5Jb/ln296aboiUErtLdFH6G\npZrNbhWe1XKg9M/nUcKtsBRCwU8MR09qgouR6pPDsjmkYxA5Iz+lWG1+KONy5+ckbcdh6Gs+\nFYryYLOxijJxvxkBq0dS8Pw2CCYSLIwABQ8ZHrWbWo7l/SfHl3DCkCySLExwG3kV32j/AB08\nU+EcfZNRkEOPuOxII/z/ADrxySNUjBQ/KGyFzgk/SluluL2RfPZstwGY8ClYpH1f4N/bO8QR\n4+2WNveRr94n5Sa9BtP2zraOETah4fkihY4DRPuX+VfAk8dxCsgaQlF6svFbeg649jHKlzcs\n0G3IU/piolEtOx+h3h39qzwNrUyxySS2eVLGSVTjI7V0lt8e/Ad65WPWokPXMnA/OvzKivA8\nzyC6eKNRng9z2qpNr199oZUbepPDNU8iLufqrH8XvB7hSNdtirdOcGrC/ETw3dElNZtRjuXA\nGK/LyTxlqNhYJAZFZG6DGWH41ah1RrixLnUJIbjr944+lL2Ycx+oo8V6RKm+PVbN4/7yzCrs\neuabJD5g1K3xjr5g/wAa/JiPxlqBnEBvJAqZw28j8adJ451K1GYNSupUzg/vSB+VHsxc6ufr\nNHqlrMyvDcwywnjcrj/GrguLZ+PPiH/Ax/jX5OS/ETxJpsa51K5eGTBUeaanuvHfiWzhimGr\n3Shz0WZsjI+tTyMrmP1f/dqOZoh/wMUeZAyEi4iwvU7hX5Rx/FTxbdbn/tq8KL/CJT6/WiT4\nseJcAx6pej1xMajlZV0fq01xax7Sbq3x6+YP8ahXWNOb5ft1upPfzFx9OtflrcfE3W76FUe/\nu2dVyGSZl/rWJceNtTu2Fu19do+MjM7H8etHKx8x+sk2uaRC21tUtEYDJzKD/WuV8RfGrwbo\nNuxl1y1km2MUjjcHcwHTP4Gvy8k8VX8M/Oo3k6DlsSkZqleeMt0csJgdyw+Vs5K1ahoLmP0l\nT9qTwXHoMdze36wXzfetYju2jPBz9K5PWP2zvCWm8wXE16w5CLgLX55299LdSGN32Rtzjsfa\nrdnam3aRpVwq8rkdfQZp8guY+zvEH/BQDY3laXoCjd0e5OT9QBXAa/8Atu+Nby222fk253cN\nEMMPSvmyd45plyGTcMsyDO32qnHGsF0wQsFPO5jT5bEOR2/iD46eNdcilku9cnlDsTtZyNrH\nupHSuJvvFF7qG03Mk0rdGZnJ/KqtzhlCRsSmfX9aeoZY1cqDtPc53Yp2Au2urWwkMMrOqMMn\njisqe2My7y2+3Z/lPpSy3KR3DEJvV/0qK3jWa5WNpGii6Bu1MTRN5I87AYtHEuVPuK0or648\nStHaAq7ryjMcYFY0kyJcOEchFOOf4qr4MR3rMYOCBtPNMDSmVY/OW5b94jYKxng4qo16sOfl\nDIR1zyKpNBJbBZPO3+rMc0TbVkGD5jkenFMZNayxLazZ5dgSCe9QeX5luhJ5zyveoy378ADK\nY59c1LIrSMpjYAf3c80AxrR+ZJsUZBPTNO2utrIjrtcHHHYVJD5fA2CRQfv9Kbem3jAkTcST\nyOwqkhEAuEMaR+TuI6nrSXCs2GU7fU57VDJsb95FIU2nJx3p6/vIy/mYJPRqACaKWNBKMFDz\nwaj2yPCyiT5evFXI9kdmzy/NJ91BnisxW+z72PDdAo96YDkzHvBY4XnJ4NS/almUCJBtUbif\nWqnl+ZDuYNJKW5Ud6m+VlzEgj28FM0uoDLjbcMMExnr7VRbzVnKK37sng9h71qJGbzcVTy+M\nZFUPIcxyHaS68cn9aoCdZlhk8yaNbiNegqNtlxKzu+0HlUHao4ZtsscW0Etxk9KS8Y28gDBT\ngZ3UAI+UXDBnYfMKhijDEk5ZnPHtVj7Q0q5QYbb+YqIzAvEBGQxGfpSEIY0WF1UHaG5HvUMc\nahgwOSOPlq1Jbt5isX4J5Wqny/eUbT1P50DGSMzb1H3vXNMZh5SqSOOx/lVjzDtJYDOePpUU\nkkKMx2b5ccR9h75oAhMaK+9wpHYY6UxcSKwBOAc9OKRpG8xUKcnn6+9SeYWkJ274sYyOMGmA\nhj/eYaYbx2U/pTp2DJkDnue9RxMsaksoLs2QO5pk3mllb5Qm7jFAE1xbxmOFPN8tjzx1PtVm\nys45tQt7e6fbbycsTwBjtWdcSJNcK4jO/GVbHHFQ3GoSxxbZADORkbj+tAiK+bytSmjhw8Qf\namPSq7KVJXJJPYdvWiKZ4lClklcDJYdeaWSB0h3gkMp5U8cUgGqoWAqGyf71QJGNm1mJVu4q\nwmWtWJdVIPLentTrpTDZQSH70nSmMgkRFtfKIYy5xu6H6UTGSOO3h8nZ3BYYzUU00sqoWYAq\nMj1+tSTtcXBhknl8xkHyn19qBjLibzESB42QL8wK9M+tIkhgjdYtziRcHP8AOpvtTtGyKAG7\n8Z/Coo5TIrlhgAfdoAiVk42YYjg545qRA06qAcfN0JpiRjcFdgpJyfpTydpjaM4GelAhEZVL\nNMd2eAopkf8Ay03525yKnaJWXJPGeeOtQgRrJgIxA6YNAD42DxtsXIxUYQhgVG1vSnMwWFnC\ntuUdKj85ppc7dp25pjHbsr+8GX3YH+NKq/vmbPltjFJ8zAEYz1JNCx+Zcbmk4NIGEZPnH5Q7\nEc80jSCPPykkHLegp2IhK3BZc9qbG0jW8j43hj09hQFkOZmXY4fAY9hT42eRmB5QH86QKsOe\n5PI9qfITjB+UnqaTAa8fmPn7iZxTljErlB1PAxSyEkIQNy4zxUkce2WTyVLM68Moz9aZPUsa\nakcl15TIIjjAY8jNaGn6et5dCKOFs5wzJyTz2FQaTppvJBGN0RT7w649yfzr6d/Zr+AupfEn\nXLa1gtj5SsHkuSvyhODkH161hUlZaGkVdnW/sifstz/ELXbe5vLeSOwjHmz3EiYwM42j0J7V\n+o+h6LZeGdLg07ToVgtYU8tY1HYevvnJrJ+H/gnTfhz4ZttH02FUjQZkfHMj9ya3/wCM1x37\nnUlYHJ9Oaa2WwT1pTIM0fe5FADFwrE1MmNpJNMbG04696Pu9qYxw6Y6GlxtbFIrbsE04/NnH\nakAm0gEZoX7uD1oHOB0460K23OeaYCpkgjqKe3GKjT7vXjNPZeMip6gDZApWxtGOtNVucetL\ntO70xVDYcN2pf4hjpTen9KVWCrg1JIm3c20HA61IpC8npUe3dTkX5vm5FPoMWNs5NOZj0Iya\nj3jzDxinhjSQC5BOTTWztpcevejJqgBMtntT8+opgw3enJnkE0DFaTbwBQfl5pDnd7Uuct0p\nDBVyu7PNJGetLgdO1GOCKZPUcrDYSOlIWBXIpAuFxQueMigBw+ZeTTv4famk0p+7TY9Q6kds\nUsjbsY61GucnNOXOCe9IB24+gopmT60VQikoyM5zSKDye1OX5c8cUm7PArIoFA4pWkUL93ND\nN0FC8Nz0oAONuMY9qjCfNmnMwXpzQg/iz+FAChBnaetJLuUcjAo9xQPm5bn2NADFQgHJpyj5\nfaiNl3nPSn8FePWgCPbuJ9aNp25JqThX6cYpnVTnpQAse3BBNM3hc96VWVVNNjJZtpFADlY7\nM96OWAI6d6cV29cCgsO3A9qAGj5TQrAZzSv93gc0ccZGDQITdlfekZQWzupzYXkDJpvI6Dig\nYuSOjUADI96DRsLYJPHrVCHO209KRcdaB05ORQWA4xSCwm7A3D7tN37Qc805pFHAFIwBUCgL\nAq85qTjseKQEbfemx53cDI70CH4/i4xQzcZxQq/KRSgjkYye1MA3DuOMUjEfLSHO4ehFN6tT\nAe3zt6YoXGDxzSH7vvSqNuPSoYCMx+lOTBIobK9aQg4yDQAnUnI4p3G3GMfSkZuAPSjdu9KA\nF3bkx1NID6jFIzhRSbtw9aaEO34pUfLGmq2eo6VGpLMewo6gTMuGHPFN4+tDNuUACkPHIHNU\nMGYKcmjgimtlvrSxpj71IQoxtxSdO9O3BmIAprcZpAK5wOBSxsTxTMk8mljB3E0ATquAWBGa\no61ZJqWh39vKNyvCwI/D1q3j0qQruhk91IFMVj4W8b26QzPEF/iIXPYfWvKNVL7iDwATXuHx\nS04Q6vfEgqwmIAA968X8QWu1m2HnOOaaA5fUIzK3B59az5l3ZRu361q3R+zxkN8xrLkYEfMa\ntEPcyJsq5z8o96zbgBpNyda2Lxg3CnI9BWTcr5chJO0egqrkspXEhOWY47VRkctIAoBAFX5h\nyjHDRsM1TkC7i2NnpTRmQtMdvmMu5+gFUZC7yE7PpmtAwb4z821vWq0zFY22ONwH8VagZ6zu\nkbEx7dx2t71Eyr5ZYDc69F9qnZ3WPc43A9hTFUSgB8rk80CsVi3mEjjaPQVFGdoKlzu61NK0\naXDOpwoPPvTDGZkZgMKx4NAiKSPzG3H5cDpVeSTy+AN+e1WzGGkxuGcY46moTGZJCMfKpwT6\n0wKMquGAZtxI6elExCogZM47Cp2jJ3gjPpjtUQk3NkfdAxTQ+gyFgrbhkZ6g1FuaPIVN3OQK\nsNlt4CYI6GqsFwScsMDpkVTJHyNtk+ZOo+7UTQkzKMZPXNWWYs28gHjAIpkf+sLF+o20hkbQ\ns8x8vkY5qvI0TKeMMvGauRoY237sY4A9aY0Yjy5AJ6kUBsys0kcigZOe2KVW3NgnbjrmlKmN\nVcKCzNmmNIdwaQjLc8UmIOGwMHGefWh28v5CcnPytS7maQcYz/F7UsihmOcEf3qaKuRuo8zO\nOf0JqHzD5jAjbjmrG1o0PzBqbzMy7ht96oRHGF84O3Q84rUt28td+cg9DWZ80YJY55wBipVv\nAVK7NvsKmwieSb99vA/Md6iVUwztySc1PHJ5qkhfkxjmoFUrkYwPWkMmj2R4U4KHkVBJGrNK\n7OQB39qGd5lI2hVWo7jLKOwI5qgKr/MmcZycg4qGQHzAxAI6bRVg7dowD6Co2ZGk+X5T0OaY\nhkylJFPUUpXcCxwBnjmhmYcD8qRkJXJ4OelADV+TA+8abLndhk571IOxUYI6k0M24li2c0hk\nYjCn5s+opU3NEcgA565pwAC4CjPU89KYFKkluc9MdqYtxG5ZWPDdN1L0yM8Z60g2Ku37z5zj\n1pdxPBXb7UFITcyyDP3emaTbhic5p0KiRn/2RxTWU7RuJ600IRdrZJ4GP1prELGvynnqacqh\nlJYEgHtSqAz4XJXqRSAYq58th3H3aJY92edpp3fC9ccUin5Tu6/3qaASTCrt3EcdaYqhZNrO\nQuM06R8oEbk57U+TG3BHGMfSgd7kR+6uBznrSvhYzk7mJ4WnptCgEbl6fL3pjbdoJG3JwoNM\nQjkR7VPzY547UMyswBXHf60nAUk8L0zS/JtO45bHFAyMsWyx+UZzjFL93O1iQ3rUrHdGpY9D\n2ptygXBRwc/wmkMibEfH3qFdVXoQCeM0/wAtYhlxub0oXIbJOF7CmSL5i91yAaFk3OVb+Lpj\ntTFYNkHj1HrQFJfgbFHr2pAKpCMw/DNEoRYlAGGJ/Gl2lfRl60x5MuoK89AaAGGENkHP404h\nY05HPb0o+cA5z6URk7imMnHemA5pAI1yvXp9aTBY7mO4KOTUa/LkueP7vpTo4y2GB4PBWgQh\nj3KDu+VjmnffU4P50nKpwcqD0pSRgvu2/wCzUsBTlguV24/OgMQ3T5vWgIGXcxJbHbpSL+6z\nyM460+owTIDBx9DSTMnkjKklecUoDkghtzdcUvzZPPP97+lAhhYth89fug02SJmYKG/efyp6\n/KpZvnPTH9aYuR8wUkDJJFMRJI+5eF+ZeDSMTJg4ximsu3BByW5NEn7vHUA8CkMR2/hIzmm/\nMhJJxj0pXY5UDG4enenLGWY87T1pgI2PMyR16A0nRgX5NB+ZQAmQoxTVRhgdB1NAxWA25Awc\n4+WlkzGwTAY9SaP4hj5QaGA3MT8x6Z9KknqEmCeTge3SkbaOR0XpS7egZcr0zSKnkox3Drjm\nqKELFl6e9Kp3MCRyOlNcyMy4OQBnAFGdpBce4oEStGJBuDfKT8yGm5U9edpyM+lMWTjJBU5z\nT/kmQrjHcN70gBpBuUHgsc+4prY8zIP0xS7WKkPjcP8AIqP2H4gUDuKu7yjnl85x3p7btoLE\nD1HekBKDGdp9aQjY24Dc396mIAu0lZBuyeAv6GnLJtj6fNnBNMb92o2Nz1xTpGZlU8LzmgCN\nomZg4OCOooTG7cRgdDUkrBWORuz6U0fMoUjCk5NArDJImRt2G49aN/B75PWns5XlRuGcdaYz\nHbxHjnNLUZIuQucDGcU15Nq8kFQefWkVj97qh7Z6UKoXGefamAjEBgQSd3QYqVmUABsYHU+9\nJIx8sbeGHIphjPl7iQT1oBht6nOe4o3Y2sF2setBY7i+MJ3o3GSQDbg9Q3tQNDpF6c4bPShV\n3K/IBHGD60SOWUso+bPUVE7NuBZfnNIZLu8vaWAbPFIzNluNtDfL1XPvQrAjBOSaaEKw24UN\nkdQRTdxjPP3TwT2oyI8HBCE4xjNLJgAsrAkfwjrTJDaNmSo29BjrSbmjjwBhc0gj+VRFySc4\nb+VSS4jbGCBj7ppFoYzRleD8/p3qON2WAqRuYNkZ7UjYMwK/KxHWnebJu2qAT3NAhWdlXkty\ne9I/7ttzfMfQUmcr84JftTspuUquT3J9aBAy7oyRw/uelN+7Hggj/ap7bPOyQd2OtIrfeyO3\nVulAm+wRyAqMdAcfWneYGjO5duTikf7yELxwT9aRtu53JJ77aAHKoHcYx0prSMuCMNnp7USZ\n8lWBwCeeKbIPmxuOe+KBi7F6nr1oZX3YBG326UoQnBJ4oeRWUxkFTnORQA0OzSdumBQiMud3\nB54oZEKkKPl/vUKqhS2TkDoaAuGw+T83GD2pu1GjIU5JOCvennlWbqo5FNDF0LKMFTyfan0A\nVlG3ahJ5wKRdoyT8ijj8aFZY2453cg03jeQQX5zikA9d4G5Bk4yM96aceUpLYfvkZ/ClJ2Md\nuSD60uOgJAxzSEg3blG1cY4BpsLBlbcCeCMZ70+Pd83GR1zTHjRQxHAzgj1piE2uqqO55+lS\nSsGYfLg4/OkjkAOQDt6CmsrfL83O6gBZHO4BE2ccml3bbgEvvAXoKCxPJcE55X0pF+8cHaTy\nDimUAYyyjYMjqc96cwZcjsx+96e1NVjtZWU7uw/rSbt2Ecnd1xSGLIrFSpbj0FLuG5txwM4F\nI+4sVxk9c00MJMDaeTmgBzIPLJB+cnA+lB+YKoOwLQXDbcjBpDIeQP8A9dFxEu5WYsZcj0qJ\nZGVQ2N3NJuCssbfdb0FP3DbtxwDjigXUBthkJx8xpVUqzArk4yKTCv8AOy5deOKY3dlJDH1o\nGPRdqZypz1PpSldqjA3HOFpGAWHaMDPU9xTc7NpJyelBI6Hd5kjPj/aJ6U6N13cdKZzuwFyv\nUg0QlUVnIxg4Ap9Ch6/cLkZFJj5iy8KR3puP4WLAN0pV+aPAG0r70gDdwc9euaTh1zj5fUda\nI8sDuGHPakZQq7VOPUUxWHZXK57cGmtJsVQDz+tCzBoSEIWQHGcdaQZVkBwFxjNAh7KBjIDr\n7dqZ5awksBywpVRomAY9eRT2X5uD9Ce1IBGJ9flPLMe1OZWPLL24xUas3mZLduTilVVX5wSV\noKELEfNk9MZpdwVDg5z/ADpVwmDjAJzQqhmLsOM9BQA1h8uD1zx6UsbFGweQeCB/jS7Put1V\nehoYsrZK5ye1AhQ6fMGyAeaTou3qGPWjcPlGOD2xSzKWUD7u3oaBjlcqcsuSo4qNpA2Hz8x4\nxUp2t90ksRTIdu47fvdOnA96CUK3YAZ4wc07a2VUHA6/UU1chTn5l7t60m/P8JHpzQWPQjc2\nxRtIx83Wk/eNGRHx6+tNKBmyxwfb+VLkthc7R6igBFxuLE4Hp3p2QxLYAPvSGMBMHls9QaFZ\ntuduRnb/APXoEIzNIucAbedy09SAyhUB3DJ96aihUc7uO2KRXb+AYbGfmoATb98bff6U9Ycc\nk54ySKWGRt21l3ZHOKdGwXA6HNBIsishU7s8ZFMJbKsvPOCaZ85+91z1pSxiUKoz7Ggol5G8\nBhjORmopGZl3sdxJ4xT/AJiMbPmxzSxNukUAYIHA7UAJtC8ufrTTgEsgYKRxSspkXDHD5qbc\nsPBOVxwR60ARlguxVGXI5okG3Bxluh9KRVaR9o+U9d39KkaEZ+/82OnagnqJg7dgXOemBTC+\n1wo4Xo1TfLtB5z2waa21mRnU9aRQnmruUKufapckLnP/ANaqyfMzYOFzgGrQZI4wGbApgLGq\n7C38a8ioo0faM/K79jVt5EK5wBx92q8hLBcnDMcCgTG7n27MbccfWlSMrjJzkcZp0jDaq7uS\nMH6037zAjovFAIcA27cASyjFSwxjdvONuOnoaTjzMZxxk0m0SZYAqvegY+SNGVjglw2BSRxi\nPd5n/jtCyMucnIpUJ2g5xnqTQA14/wB4qhshucVJF904+UKelOaRVO5FyvTdUcke1iGOFPzf\nWgBQo8wLvwWOdp9Kllwjb9hxnAC1CpEhU7NuORmpg5VsZznvQSRLnzDlT1/OplKb8Ebie1KF\nYx+q9RRj94uBtPcUhix/JvAGDS/MrFjyp/hprN8zADHOOafGzAcjgdKYxxXbgY+U9vSlkUiQ\nnau2g71AcfMPSpOqkt8vHegRG0cbSN13ds0wr5jqeMJ+lOgYlSw9eDSGP935jD7x5oGNZQYy\n5PzdqjLLNCPmwo5PFTAKJBzkehpjL5iuu3HOcYoAguY4xs2ttZhgVCdv2dIzyR1NTODuVmHH\nb1FROwTjZkdm9aYmQOMqVC9e9I6bMMPl7cCnNKfLxghqbmToTxjNAhjY2uy8rjge9QRb9p3k\nEEVNKzSRlBgHtioVBVdpGAvJNUA+PYxAJwvQr702Yh0CD/Vg85oJXcduDkZqNpAZM428crQS\nPklXcFC5C09mAj3HoONuahVlbaNpJzTmAk6cEHGP60ASSAQ2oYr8zcBe9ZjMu8hk5B6NVm+j\nljnHzhhjJOaqO7PMAec/qKCiN2QBjjc+eMdKd5W6VPnxxTVJV3Xb8voe1SLH5i/vCF4+WpEI\nxEMLqwyzHtQSwjCqu4AZ+lOiV2GVUFenNIWIyivgnikVYW4mVY8BTu45pqtIHB24GMkUrRtH\n8r/PjkmkWNjKN7ct0ANVcQR7fJaMZ3Mc5NWJvNuo7SGINPM4AWHPAqLydszIWCEDg9cn0q9p\neqnS45JIowL6QERyHnywaYhby1ltZI4ZYdj45XuPrS8RrtL8Hg57GolabyvtXmmSRxlgxyT7\n1HIzSKGZMKepqgept2McieXG+TkcHt9K39PvJYmc7lZVGMA8j2rk4dS/drmTGwcZNaFheInL\nspWTq2e9NMxlE77S9SjeSPd8uDkZ9a3lunNwjMWUk568j/61cBZ3iBcMASG4bNdXp90rKhuH\nYBud3crVpmdjpptUa+YqJNu0YyB1qa3Ztu2NdxI5NZSyQvMFjP7vHB9quJOsx2wMY9nOf72K\nAL0un28dormUrNuB2ZqGzeNvP3cK3rTLp1EizFuHGM+hqta3UfnkOpZSDjFFykXbcrGpBXzI\nwM5PamMVkZk6qT8opi3XmKY4gVXv9KWGWNbqNZQQGOQKkaLdv5oO0LsAGML3+tXVungt3jO3\nce4qi2oC31RXkj2RBeCDxVO4mhuJmZSeuQRxWZZf0+G4kuGO6PyxyVIzTtS1J/tTB8HjCj2q\nC1nSRQA2Djkg02/s3NwlxE+719qVigjtVRS0ijzn+6uc4qz5E8lqrgq6jnrzVGKRo5l34bdz\nkGlt7hVklMSMMZO3PFKwEfmPeyHb8gB+Zalj2Tu5YA7eStMaTzFMjR+WW421AqySXQjt1IOO\nvrRYe5LNIqW7Rqvz9CKIpRwSPmUZIpLhdoJAIkXgjvVORhIC53Y9RUFXJryaeZcvtHcc5qo8\nzsyMXYADAHQZojhaVSwk5JxtPpTpo2ZAAuG6Z7UagSL+/gDvxx8wA5NOhmiVgnlbW6bjzz6U\n23txcW74l/eIOh4zUFw8u9VZlDgccfrSA0rm83RhWGSvbrRDqX2ieJp2IhUd+lUDhVBAyx6j\nPBpt4qtbxSM2xB/DnqaoDQmuvtDOIT5SjoQeopfOBVI3YRuvI96ypZtiHb8jN0NEcJaNJQwc\nY6nsaTA1ZLyRrzIQnaB/+qmSXP25nVwI5VG4Y44qiZmjXDybXJ496jXIZmYMBjlv7w9KAuXW\nkVUbeSQOOD1qktwfnXbgEcGhGDLuV8r12GrVuu+0uJTEpHf/AGQO9IpMqWbyWbeZAdzq2QWq\nb7bM87TSyNhjkrngVS+0DaoLcM2QF9KkWRI+c8UBc1bfxA0cbRqiLEOQdvJNULi+kvHdyMHO\nNwGKoHKlnIyuc1NHcTuwLDyoSecVLAlW1eZWdHUD+IE81C8TRxkO5XPC5qH94MjqrH7w4p0y\nvcbEJzhfl5oK6EyaeZPJZiGRl5JPeiaF4Iyo+Z84+XtVa1mMYKTIxjXgHsD601blpGLZbyc9\nfX3oETARWsf7wZfPOecVDNbpDcMDIshdcqw7ZpbiBE5STfu5we1RRxiOLO3LN3BzVIQ+Qsqi\nIgMm3OKqyHYY415PXI61Za3khtQwz5jDOfQUyOBxuljXzCvVqV9QIJt6zeYF+XpSjLSBz8hU\ndM1MohUtIXJLDJ/wpZLdLe1EkibQ3O3OT7U7gRKX81Ec4Rjk+wps0PnXD7G3RLwPSn3FhcKo\nd0KoV3Ln0qGaKaxt8p8wYZCjk0AHyqrAJlc4pu1Jl3YxtHem27KkQdjhmOWX0PpVhURZDLIp\nCddtAGYx+UZYsvXFWpIRC6EFXRlyOaJpol8yQRkwhuFNWbeRbuPEiKvGR2xTAprIVwVPzZwM\nU6SFI3E5LI4OCnrQ6w2u58iaX+8vamrcr5auDuBODu9aYD23RZydpfkD+tZ8shZSN7NuOCy1\nLdM8zuS2V6D60y4jVZItqbvlwcHHPrQMi8sK7Mq4AHrSRW6zxuH3HI7mnSZaYqq71x97OMUw\nyMFAQNnvxQINwXYqRnKjHXrSySF41GR06kcj2qwsKK4LnO0546Cq19eRNcNHEpyADvx19aAG\n/dQB85PRhTFXzA+RsToWPrU0k0DKqIG8w/NuPYDrVJ5lkXH3QecHv70wGzMTu2nPqaj/ANdG\nyxLyR1/pQzhVB3Zy2NuKlWQQ52r82eBSAX+z7lo1YJyBy2eBUc0bFWVGzJj7o71ahaQ2z4kO\n5uq96zroKwIJYHH3h2NMCNlmMjRqp3gdxTFjmVixOAp4X3q20gRQfNBlPBNMv8Wli0nneY7n\nBUfw+9IZTuLi42maM98Ef3aozOfMR3JYqMFR0x606aTzmVQ+UxzjvR5jQjbt5kXGOtMBftAj\nhDAKzY59qS6mN1ahSzYYj2qKWOONsjliMkHp9KLiQqsWV3KORjt7UC5Rstuu1C5yAM8dzTlu\nVkmjDvtK9A1C3SLIFVGyB3HrTY0/fEMoVw2QWoAa0aTyCSNjnvkdabHGbfJyJRnKpnp71NJI\nm9mVchRgr6mnLHIDkx7I25CkUAQSny9jRZJ3ZLDrRhWkcMdvmcE1a8syRmRtqBOdppPs8TQq\n2csx3bgaRVyGNUVWDIxcHp3pGb5CRkMOi1I2UUqr7d56ikmLJGQDhiMHFAhrMm5VPJY/gKj3\nvFI3K89MCpWkHy7mBC4zgVFLK7KrgALj5eOtMoVslW3kBiMYpI93mAZCZHXrQW3KrYJOeSBT\nZiV2FiE+bGaCQ3vJhJIdo/vCk2qVAUY54560rIQyIrblYcDNHKqq7l2dM44zQIeqvuOxQpxz\nUMSlY2BJLs3QelSxoCxUEhs80KzRylTESrdDQAbRuJJ3Ljg+9TLD0aRsKp79xTY/9U/mcD+9\nipo1Miqm4Mv3snvSYDY235CjapPb0rT0uzLMWhJVj/k1FZWM94xCH5evyjpz3r1/4X/De48Y\nX1ta2URM5wAoBxITxgH/AD2rOUlFDSuanwR+DuofEfW7bT7C2aZZHBd1HHXg5xyPX61+tvwb\n+Emm/CPwfbWFukbagwzdToMbn54HsM1z37OPwD074MeGYgY1l1W4jDSyEf6vI5Ue/r/9avXW\n+ZyT0rhbbOqKsrA7buaXd0wM0xvuilRipz19qPUsONtLwqg96TcG6Chm5HHNAxS2BkdacTxm\nmq4Xg80Ht6UACsdtL2PNBI4x+NC43cjikIf95QOeKOF6c0xZOSMEc05QF5JoAU42+lLuO3ik\nHP0peh4OBS6gIWO3ijc2M9TS/KFyaRTzxVD6D2PyjPWkbDHApCwL8ik3HJyMGmShfM2tjtT8\n85HIqJT83rUkZ+bGKQwCjgjmnZx1pvqM0ENnpQA7rSjpTc7e2aUEYzTAU4PAFO46d6bz6cUb\nh070ih3OfWlU7vamrn8adt4HY0Eh6kdKRgWUUp4pu3oc0AKR849KU/KRmnbtvykfjTSQymqD\nYXcdueDTt2QMjFNXDKOxpWyMZ5pjF+7k/lQvzL05pGYY5pm5uGBPXpSCzJNvtRRuPrRRqGhR\nP3cdqAyqOeB3oVhgjFM8vqTWYx0bBj0+X1pGxtxmlXCLjPBpF+VskcUD6DmxtGBio2XpTmyw\n9qaqk4Lce1AhWzninNkKW4zRu2g5FC8qTnn0oAjGOPTvUg46DrQu3bgDmjcF69aBCkFQM9aa\n2d2MUu7PJ6UY285yDQIbtyp4zzRtC49aXcOeeKHwVz2oGNbDA8UL8oBHNIsny4xmnH7vpTGN\nZskYHNKzDGCMGmM26lA3d80gH42/Sj7q5zmkPTvRwvzHkUAA5Oeo9KMM3sPSjeFUMRjPSjcc\n8mqENVRkq3BpSQmO9I5Y80rLjk88UXGJuGTRt8znkGmmRdwBXFOBzjbQA5V7Gn42jpSY3DA6\n0biFPQ1JI4Agbs8UzcfMJxxT1+ZQOxpuCsgweKAHKw5JFCkbs0NhqN3tiqENZjjpSM58sAda\nVnpOF57VLEDMac3y4GeaZuHGKUqM5pFCclsmlA5zRz603aQvWqsA4qTmmR8Meadv3xnHWhV2\nqCetMBORmlVdowetEbfMc9KcrHJzSYDW+8McUrN2HXvSN6gcU04POeaAH7vmx7UrDGDmmheC\nAMGlZ+OnHSgkTd3XgU1mCrluTSr6HgUYD8ii4wkbgYGQO9MUk8g0v3QR2pV+bjpSEOR+atWr\nntg+xquoVeO9Sw8ZGetBR8r/ABysWt/E16CuAx39Pxr571td7OxHIJ4r6i/aUjK6hG4yPMT0\n7Y4/lXzNrkOxW8zhgOoqyTiNQ3NJnZism6hByDwetb1wW3YcZz3rFvo9xD8irQjJliEK5ByT\n0qpdL5qD5cSjnmr1zGrMWByw4rLfzfMcD5hjlvSrRi2ZtxbPMflJCioGQK2C2VUZrQmf5SFO\nARj61ntG33eRtGSaokrSJ825W+Rup9Paqdyu3K4+Ru9WiVkkKngY/CoZlMi8LlB/EaoZUlUR\nKoTO0e9RLM0inAGc4zT2l2rhVyc4qPl5srhGx90dKBEEq9QwyO3FJLIV244C9hU7W5kjDBt7\nLyRnrUTfvF8wHb6A1RJBJEGkRo2w7HnNV55JmYBB8oPNWZowYwcnA5NRs6qyr/DtzQIbG43H\nGenzVDJG0fIXKt3FOBZpGKgDaM0RyEFmJy3b2pjIwySblIIYd6rblWMj8iPWp+ZA+eoP3h3p\nh8tmXcMA9AKBDNuxQVbk9QaZHtXdl8GpbhgWwAAy1TZAHPO1z/DTAtBSwG58Co5MbDkkhegH\negbflDPj1NOfO3GQ4J7UxFeRhMU28Y5/+tUQwmd3Y8VLMoRSVXCjrTJF9BnjP4VLAPmQHq+f\nTtSyKVUIjDGM1EjeSoYkgsOnanGcKoXbupoYh3Njqd3BqXhmMeeg/lTPljwU6ZyaE8tdzFiX\nY4xTEO8rMIIbd6gVFJnO4YxjFHmYjAAwxO3inoyu+30GPxpjHwqzRqpOAKfdMVj2KepobdGw\n5wp/SnPERk5+ZeakZTaTch4wRwRQs7NGeOKJtzNvAFMVdyndxxTEIJGaPO3aTxkVDJCTGSe3\nf3qWNWX3GPu1G27dsfp1O2n0KIPvckMRjrSlBGFbeTn+GpFQrCwOQCaTCx5x8xpAyJRuYjce\nT6U/hVJ4wvYUj71bOMk9qBtVmGOaBDQiy8qSG6nNOKENtHPvSlm8wcZGKGwsmASeOcU0Gg1m\nVV+7n+dL5gZVPRfejzPLkAC5JpuCQQw3YORTANz+XggDccbqRFIjfec44FCsdvzcjOdvcUM3\nzZI4xzSBCY+7g4yOaSPhjhsDtmhG+XC4/HrSt+8xxnHamA1GCM4J+bPBoZCqHvxyKc0i4IAw\n+c49KT5tu5j+FIfQjdm+Ty8ZHXNK2Njbyc98UoXc2Cm0YzTVwuATjnihkjmUNtKExjHGBTow\nNjBzuOOD703IkjGGPynp60rHcjED3/GqLK+0+WN3yrn5s05mYkArz1X6UrK0kCtj5s804qzM\noPLd/pSEID95lXfxRvVtgYcjqaCxjzjhu1BZ/LBJAbORgUw6AVV3zncp60o8vZgggZ7UnB5d\nduaawZWwOjUhDZGBx7nijzGaQblGBxT9uVyVOccYpm7o/p1FAh8SqrNlufam/wARB544xSeY\nJGztx3pWURx/fBdjkDvigZGGMbNkEAjP40iuw2senQ1Iy7QMNnceQeookOGK4yBzRqIR13th\nCAewPejcZX+Q9ByaZHhs89806TO3jhhySO9MBD+7jJP1NPlhzg9MjP4U1WGQSu4HkUoZs5DY\n7YNIBkeOCvzc4xSxhm8zAB570+RljHyrg9TimKqtndnLDIFMBZM4B6cYzSiMJCVGT3NMTGQC\nfkxS4PAU87vu0DELeWeFyGHNI33GVGO706UsjPLIwGEC8k0jMdy4I+YdTQIdtVtuTs4xTdx2\njqVzjJqTcEYR/KxxTXX5doOCOR/hSGkRlWbJOMetLuC4UHIHeiRfNVGU4IPNPXoMDBPOKNRD\nZGLIERfmY/gKViB8n3m6GiM/vCCenemrGjIzuwIzxigBH+Y7ABnsaTyyvy55YUsa/eJI3Y69\n6MKrLIWPTGKYDySwVB1FNZTllYdqbuKruHJJ49qI2O773zDsR1oGIpbgKeR1PejO0YI3HOea\nSRmaTbtx3+tKqgSdecdKQhGcNlyfbBpS0YUI5GW/hFJ8pyTwV6Uo+fLsq9OvSgQL82QvIzgg\n9qZEGjcv97tTiuI+f9X1OD3oaTyxvPynpRsMUJG0jMDkgdDSYVFUk7vpSv8AOoKuAfpSMQCr\nfcI6D1plINuGJJA7j3obMjeYw2rjgCldRt6ZbvSsMspU5I60CI5MMw2nGOtDKZmDA4UU6MpJ\nMVYYY9TTlDFiAMIKBDFUsGdRge9PjJaPJwG6BTTY28zec5X09KXg7lB7ck0DIkRJeFDDB5+t\nPYqk2CCeMVICIwpVvlPaiVhuOR9MUCIdgUgklj0IpDsjyM57VLzGhb5Qx6e9Qhl2qFXJz82a\nB26ksa/uyD8xJ69qaVHlnYcnPNJvwgQHCnmlXCoUUcGgQ4IsgOz5QBnrUbbmwSwznpS7dv8A\nFk4xSeXu+Q8t2IHSkxjiG3ctx6Uvk+Wobhj2x2pqoOSCTt65FKrFY3w2/d09aYhNzM249R2p\nWDK7Mo3MR1FKqs0SluGpG3KpKcFTTARWdFAzk9SKb8ruCxZQffNP6kMQSD1FDKHYtkLH0APW\nga0GEkbhjIIwDSeWNqevTIoZnQMvbsaIpA3C89vagQ7aQdwb5Rwc0jMvmLgfL149aSZjtC45\nBxxTlXyJB9M5pDFZZfMyeFNIjMyMr8DoBimnczEqcRkZ+hpVk3Llmx2zigmweXth3fe5wVzS\nM25NuNvOBTtrRqH+U5460jDcMEAc9KAEVvmAJ2BeCaVflZthyp9KRmj8vG4nnHSnY8o437R6\n4oKBSW69fSkG1ATkAk8k01VClpA2T0xTtvzDJ9+aBDSwUYPzD1FGCCSf7tO3bsgDnP50kuX/\nANjtmgVgVtoBUbi3WkKkZwfnY4KjtTwvlxnByPTvUZjZeQfxoAXopLDJHy5FCuqqPSmoxVSu\nMnrS4P8Ad79B1pANZy8gL/M1OEe1sScHqppV2eZuHLDnmkVsszu3ynp7UABk+9IMnPy7abJH\nk7c/e5Ip+1thfdtXtkc0cKMgbmPekMHJZlRTgY7dKTyyqg53YPT1pNj7uPlPpTtrg5zx3Aqi\nbBtXacL945J/pSK/7skDDg4A9BSbGVsB9vcUIqbyQ2046GgZJIw2LgcHqPSo2Ui4WVeFHFKr\nHDnrxRISIUDfdOOnWgYrAKDIMtzggdhScLsKnbzx9KG2LtVWPutKVTy+G+XP5UAK4XYWYfMD\ngU1WVO3mex9aSNyFZhhgDxmkjz1boaBakm7aAuByc4pDhiwA2nrTdrRuDkFW/SkMYVsM+Cxw\nMUAP5Rt2eCKFjG3zC2QOcUhLQth+McYoaRSnK9sgdqYwZRndjcOu7+lSMnmAPtyP1pm5Wh+Y\nHeR+VMkZ8rtbAA6GpAd95sY2kd6VstJ8w+in+dIG25bnJo+XaF3Zye9MY45TG/GOzdqbhmDZ\nwGpy4ZgM4BOM+lRrGUzk8jpQDJH3lkZfmUcYPU0Knl7ieGz92oo2bdgnfT2QHIZuW9KZNhqs\nu/lcehp+QquCM5PGabtHyIeAvIJ703l0JPY46VQiWVM7GyC2MU1l+YDJx3XvR5e/AYFh/Kn+\nS0W3+NuuKliI1VGLEgkf3RSrHshGDkHnFP4YkpxIf4aZJJkDYOOhoGIrFmXdgrjjFKrFtzK2\nD0pFjEewqCpHQ0SEqwZhlc5JFOxVwjUjKnvxTELx4dSRg4weal3bpC2/901Luy2E4Xp7VACh\nssG/u+1DBjGw6BucnmlDF22EbQOtMZjFIGR8rnGKYEiq8SghghFJu+blSV7r6010HmZ3ZH93\n0pwVv3gycZxQA4sv3Q4VsZDkcY9KYBtUjr3CnrmnZ2Kufnf1xTd0ayED6/Q0AOSPdcJ/tckV\nIwUsy9MGolCsAQ3znvTmjTgZwc80hjdoC5Bxz1pHJLhQ+w/pinGPcMo4UA4x2pyxjzCX+bjA\nNMkjSM7sFsxk9KM/MwZvu0keVYcZANP8zzN2Vwvc0CHeWGU7H7ZzSxLuUZX5hzk0142jj2A9\nRkYpNoZASzDA5+tACFA8yn+Ee9G3OSvPPOaU7VYdfLxk/WnLIq5BG3d0FA7saweHADAbqex2\nlizbUHG6m5DsVB5Axk9jS8c8ZOOT70CCEfIcnHP6Up2rhei560woiqsgJz6VI2VYMV3D19KX\nUoTO4lgp2Zxn3qQZ54xxTHYeWTyBkfSlVWbJB4HG6mK1wkU7t4GRjBWk/wBYoBb91nNOVfl+\nccDrk0pjEgCgcE/5NLUY1UEoZejZzTox8rKeSKayqzFRn5B971pyq3DbgOKYAq/u8Djvmmqr\nMuSw3juOlT7lRQrDI7tUbBBGNvr8ppCF3hiiqA2ByxpzOImA25B4pY4T5YOMH+KkG0MH/wAi\nmMRVdC3zeYTwFFLCXk+6MKOpYY6U5okzu+bd1yKVmMhUK2B3yMUAIw29ss3UCp9y/LlcLjrU\nR2hiAwZ+3/66fCGj+9gjr8vNICRESOTPVT61HNHmQqW8w+vpTm+YDgkk96RlKNknI74pkjw3\nllRtGAOtLHtk3nBBxkYqPcNxwM5HSnIz7cBtpxigB6kRrw28N1FSMoON7bSOlRbt3y427Rwf\nenxIzKDJ1pDHSY+UtyTSYbcURsL15pc7skcBRnNSRfvWU7gQRQHUVV8uMgsSf60HdsA3Lv8A\nSl2qrYU5OcnNCqNpc9W6cUrjGYdoxkbXz2pGbkDPHepo97Z3FQ3vURyF2shJY5BoEEf3skbl\npLwRzsmNy89elKrLucBcDHGTTmU+Xjrt5pjKU8eJVCj3qCdW2nB2c96nk3SMQi8rwc0k6uYw\nGXk0xFdkLFdxGxhgmqsa8YGc9M+tTja7ZXqpwAelO+904xxTJKrF1jAVdzZ5+lNeZVUgjcvU\nmnSZXCZJVuWqORRvVQQF96YgjVpNygZjb5laoNsm75sAHjNTSN5PzNnYxxkGopghbh+B0WkI\naV8ttuSTnqKW4+Q8sCx9O1OXDPjOWx0PSkKbvmZfm6Y9afQZUvbd4WBlPy5457VC/wA2V3bR\n2qzeK01wRg57Z7Cqsqsr4YAA980FEseJN5xlFG2kYrtCEfMowxpFkQR7i2Oc4pVUtEN3MjHO\nB3pAPkmDW6RqpTn71N2jevlgM/X2oLMyDJEYU55/lRHwQMZLc4pDQwMzD5WLDd82ad5YfdJ3\n6CnRTCO3kWQZkLfjQu1UI38ntTuJjNjLIjb+3DHvTuW5I288UjshUA/d7e1LtaZcrz7UgJJX\n8yRTv2RAYNPVvMzkfIvT0NV2xu2kbx/dFPmVwEZDgFfu9qtPQCeNV5JXr1Xtj61e0+P9ySu2\nVA2PxrJaSedgGAVMADbV1ZJo9gjG4LwVH86Ymb1ndeSJba4TCvzuBwRWzbajIkTRIGEQGAzc\n4HpXMmZZRHtXzJduT7VrW8yXTM1ucnb86FumKqLM7HceG7rzowGw2zP3jjIrYbUEhjG0eXtb\n5XIrhtFd52Nu7bGYFuTitz7cjQrbyIc55J6VdyLHS2dx9oD+YV24LbienvThJGrEjlT7Y/Gs\ne2YRLlSNvQ89q0tMurfdNFfxyPGy5jK/wnsaQiaO1a3cTB1MTj5VB6D3qeG+DzhZEV1UYD47\n1DDEJIiVb5wOn9aijtjuYI3HQtUjRZfCTEOnzemc1ArBVckDOaLqZIVVMlnyBuqGORpJBxxj\nJ96hlE8ccUd4hV/kbA2j1rS8ox3DIX49OwrJuIwse5hgnkY7VC0kkqh4ZGDYwaQy3JZsrNlx\nx91c9aW0u2UACP5c4ZRVRrh5o13D9+p+9nrT1hmf51+Xd1XP60ii81ykhfeNr9AoFV4LiW3u\n1eM7QP4veq093HMwQEh16gd6WKVfldeYycUhdS79qEk80rrvLcb/AHqBVkkZlQqNg3MhpzfP\nGUIGAd2QabdWoNqZo3zMeo9jQUMXasgnZiF9u1Okuo2RozgKeQw6msxpTCvlKxZPXPel+Vtv\nmHBU9qCkmTXTO3EQ7DOKhtWaNJA6MHDcFqv3F8iYKJgEY6c1Sa6DRy5Yk5zUgOa4PCqdr+pH\nFNSNnPlsd/8AEDSSSgZJ5GO1MhvE5KZJ6E9MUgFnjWd1kOVCnp2qdpBDEsOfLUnNVJOWJ3nZ\n1NPJ3Abznv8AhQBeis9qrOGEqZxkc4qvNJI6lHBKE5DUyG6aFv8AR2KxLyQaRppZJNsjYQ8i\ngdhJGWOxDAEBjwBSW8xjjKvN8rHmOlbZuIeXCdlAyKfbRQXczIW2bVJ6ZNMYy3mg8zGzO08D\nFJdeSknzD5O1OEaq4ZJB0xjuaikUrmJgD3zQwIHZEjddxbccqKmad5YwrOAFXjIpscI8xDtP\nFStcRtJJug3ADr71AiWzVJFSOZtnGA1V2i3OojztTOeetWFaaaMtBbtP5fJCLkiqt9cf6mTy\nvLkPDJnBpDJ47wravGOnU5HSok3MCCuwFc1o6XotpqatHf3TWeeUdRwfY1XutJfT7uQi4+0x\nAbFY0AZk6rcBQVJ2/wAS1KluFjD5ITParMN09qvP+qHXC8moxc/b494AW33cDoaLsCCGAP5k\nkjuQD932qwlwYMG2yobqrDIprMmGVRz2OaasgVQi8Se/f2pgV13zFtjLE2clmFCtGm5WJdm/\nizUaQy3k58weUQcYHSrbRWUlvJFuIvF6E0wK8aySMI/Od4+uG5/CjzPsU3luHVypI8wdPSpY\nbTy1BWTEm3JOe9F5v1CzSe5nzIvy4xyBTYjMaMLIHJ3ZOce9W4PP1DfLJiOIjauT0qI3C42R\nxK+3ktntUN1NLdgKCBGT8ka8UDCZgytEzKQPSq6Tna25MhehHSn3US+Umw7XJ5XvTI4XYlUO\n09MUDF8l/OMhG5T/ABYp0yCTdgrtA4AqEhlkO5uB/DmkVkkY7Qdo7d6ZJHKqiMMSVC8nmieV\nJFxjBHU0TbdgBGR3XvUDSr5C5Uvt5bb1xQMerRhhl8+rUZC7mBO0dCDUO6MKCRjI3YA/SpPM\n823ZVXYSM/WgBi3knzANhTyajlYtt+bO3kU1lUj+468fWo5pGhkP7suVHQdSPWmBOsyeahm4\nHtVZlFtJI5O4BsKKIZkuME8nvmmRl5hLvK/e+WmMk3ks4ZcMBuFSxyxFDARiRuRJTZGFvCHd\nw8jDbkdqp+cCwZ4yyg0CuTyTGKQ+WcnGN3rVaW6Eq4HLHqMURzm6klZz5eBnnuBUKyPMv7pV\nzjgZ5NACxgSbieGU5FNuBHdNJGXaMY5bH6VPdQTWm3zVEbOnCj/GsyYLHCXUMZCcHJ4oGit5\nzRyNF/ApwvHWpRiXZncG6qf6VHdO5UFWwx53Y6VYmuLi8nQrtjEa7j/tUAMhDXF0ybCrj1FN\nm3Ybf/CeMVdvnS2tRFHPvvGOZHx09qpxphdoDFz175oAfbwS3ULOHjSRTtZD1pm2Tlnjy+7G\nTThC0TYdTCeoY96kk/eMrZ9iO9AhsMILOuA0mM4NXNqNYxFm3SIMYzVRWljkZlQkYzt65qxN\nJulzIih2AA29hQMbIrNCd3zofQVVmQeUF3fN2qXc0KkYynTjp9aJHWSDZGMuvO/tQIgKEkYX\nPqAfSkYGTLEggVEuVbcW4Y4pAu6EysxHOAaCh3EkjAJ+7x96lkwzRKG+RRtFEitGgG/nHQUi\n5X5VOGYc7hxSEAjMY+Q/xcZpZAJEw+Hfdk0yWMfKV4x1+tCsGbapyG5De1AAyrhNzqEzxzzQ\nELNgN3x0pqokkmwR4Udz61JExT5nGzHBOc0xMRk8glA+5RySKsQ/vIg0ZYjvmkRSrKvB55FX\nAyxrsaLZk9VoGRRxhoSvQHv1q5a6Wbm4RE4O3tzz2pLK1DyEpk+g/wA9q9H+Hfw3uPEmpWaW\n8ch+0SBECLlmY9OPTPes5SUVqCVyb4Z+AZ/E15Baw28k1zI6okcYzvY4r9Tv2Yv2abP4T6LB\nqerQRS69KoIVlBEA7f8AAv5frWf+yr+zDafC3QbbWdZtUfXHTKIyjMPbd9TX0bJ6qc88n1/z\nxXDKXMzoSsAJySB9ff3ppY9KQseMGmlvmosaIA3Ufzpdx6jrScg80je1IZLGw6nrS5Dc5waa\npwMEUeXgZ7UADKF560q0fyo5X6UrgODBVPHNBb5femFvmx2p+3ueRRqMVVyM0oYMnzCmK1O3\nDJ44pgODbmCkYFNcEUgfIpwK9zQSKfugt0ojG1gaMgtlunpS8fTFIYbgrcjikY8ZJ+lK3IyR\nxS4O3ng+lMBDwoxyaOcZzg0ik4OTik5bryPWgB0ZCsAaezEtntUYbt3p6NhsHmgB3mbc8cUi\n42kH8KFzuOeRSrhc8UAODcYoU4AyKYrZzxUn8PJoAG+U56elHmEqfWk5boOaVV4PrQFgXO3m\nnL0z3ppyBTsfLigNRWOVPemKDzx8tKPukenWl3fKM1SAAdzccACjcQvNIOOnelXgjPIFHUpC\nhhnHSl3Z6jimLg5x+tIuSvoM0x3Jciio/L96KCbFRWC/KetLu55/Km5wcnmlHOTWRQrbeMDF\nHmBlPrSe4pNpZTk4oAUNu5H4UrDd1o2hcDtSP8tAhO3NLxjrzRuPUYOe1Km3lmGKADoBjrTD\nndkjihnCnNCMcHJ4psBzdBg0m8lcHFIPmXcOBQo5yakBvHcYFDSLtwOlKcM+M8U0Rjd7UwDk\n84pcnYC1D54C9O9IvJIPShuwC9x705VG7AoH50fd5oAUkq2MUjE7Txx6UisQcmlZs5waEgG9\n8449KfuDdqCflyeKQY9c1RIfKy4zyKTd8pLdKXb5fPemtlsgjNIroDduOo4oRcE54p2P3Y9R\nTffvRqLoLuO3pQuPSjdu60MwCcfeoEKrbT1pV4fJPFM45HelXKqe9IfQkX5s4HFNkzt60buw\nppBUc0XEIzErgDmj5lx0xT93y+9NXBFABwwx92lOFA5zRu280jZ69BSAGzwMcUh7Aml3E5wM\n0jYY4qrAOZR9BSclQCfpQ3QgU1cMvJ57UwHFcKRQme4ob7vvSKp25zzSAN3y4PrS/ecDHbNG\nA3XrTt4Tp1PFADVk+bIoZty4NO2hs0xulAhuNzBv4aXIXpSqPm64FNVTnpUpgK3UjrTGUqBj\n1p2Np9aVmG3+dMB4O1gTzxU0Z6t0ODUDSBgDinqdx9BQM8e/aK0qCS0tLvazS4bceo2jgV8m\n+II1cOCcgGvtT45WaTeEEl2gvG5HvzXxnr0e2ZkZfm700Bwt8p28f3uK5++YyT7AeF7Cuquv\nk42574rndStxHudRtbvWhDOeu2kgkYt9096pTOY4z8m0dcjrWjdxGZdu/K9aozRFo8DJIq0Y\nPcypN6x5Hz85ziq8kr8nG7jkVemyyjtGP4RVORSuWX5QeMVYipMy5j3cqewqPb5zOOpPtRvx\nJk9F/h9akG5sOPlY9jTAzr5AmEGPz5qt8qR9MnONwrRu9vOSu5fzqiYklbZK2zjIpi6laRWP\n7uMhQec5pfLLpycFeMk0rQFZNgfkc801gyZRz97oe1AyJn2jaeQ3A4qpIzMSCduOKtySeXNt\nK4jI+91waiuYxJOBggkYzVEsgjUqp5wW4z7VGyFWYAHf1yKlWMYYZYgcD60jTeXHwcccmmMr\nqq7STuTcc0yaMrgo2ec1YkmTaocYOM/hUcm5HZWYKD9ygCpgrcMQckjmnKRtwACP1qSZTG42\n87hzTNowMjapHNMSImwvOMEc4PNII/l3+Z15qQFFUlmxxgZoeMRqpBB47UEjFw+BgkHnPrUO\nWLMAvQ1ILjYMg5bPpR+9wyg9ecmgsibKgEngccikXuNm8dalcB1BZsY6g0ZK4HbrQSRyZ43A\nB6VW8z74xzikYhSNh3U+QrGdzcD0FMZH8jXGEXC9OfWl8tUY4IU55NJ5iJkSHBI4p8UaSKqk\nbc96BE6s0YySCvQn1qs7BVfaTjtk9qlZttuFUfvM4HNVZt7su5cEHBpjQyQ7lDNkYHFNbcfm\nD5BqR8nJOMdAKgXezABRjOOKXUQTMF5Q5zxRtbABY06aNSMAbCDmhgFYc5PemUhqblDZBNK8\n3kL2bsMdaRmVslmK9himNGImQM2R196QhCpVgwbOaazeXlhyD275qTyxtbZzk5pjITwAN2KY\nWBjuZHB4pAp8xienr2oTasY4PXlaVfmOAflB5oFYaGLSYUFiO9NZjgr79qVmI3+WSuOfwpVJ\nZ1C4O4Z3dBQAzczc4wR2pCTjhSy55NS7SsmVIDdKV08tsE7hjPHTNIZAyqrc8H2pHZVPJ2nv\nTm+RiR19KZ5O5hx7+1MNiQSDgbfxprP+7z1bPSlaTMgJX5elOZhJkbRgc0DGq2GUSf6wnIz6\nU0yALyASx4U9qklKTKmTtK8j/Cq55wR90HrRuIfuH3gcdto9aVyeGCmmgBpQRxjk8daA37xi\nSR7dqoYrLubAJ9TTc5XqSc5zTueMHA7nFITHuOdw9BQDGIwbIOfanc7l2Hk9c0sfzjO7Ht6U\n10xyGynekAu1wrAjjPFLj5ck5b09KUwsqhS2MnIppUxsQp+Y9aAsMO7hc857UrIEl684p4YR\ntubn6dqRlVskNuNJiI8b4sjg5oZVYGQDLLwKMdAflpcqudhJqhCbNrZHznHNJIG+X5eR1p27\ny1A7mgsWXDHIJ6UCI12rJljx6U4tlgvTJ6d6GUKyo/AHzCmMW8wyE5BPB/pSGKyGPOG4WnRg\n9MZb1xTRkN1yT2pdz9xg54pgSMAwxnGOtIqZ3EcHoDTW3rLyQRjnFCKHPoopDIzxJyC6D0p2\n87Qqqpb+8DQu7yz82Bnml3bGzHzjnpQAKVUuQpxjBpu3cvIyeopzcrkDhjkgmmKuxiMkH9KY\ngXaqn19TQVK453DrSjayY9TQvykjHOOKXUYhxt3Aj6DpTwd0i5P3V5qJcKRkYB7Uu0jJX5x6\n0xD2Ufe79qj8sO2FYE+lL5hXIyM5xt9qTcN3ygKc9TQApj3R7lTkdaav3iOvGfpVhW2tgHcO\ntRtEd2Rzn9KQEZVfL5bnORinHvkU3/lnjIDBuKeuGOSSeOSaYDWBjhDZyDwAajX5myvC9/Wn\nYO7BbjqKdKu5gy9hyBQBHj5gq8nPBNOlAX5DyeppGk2npt7k0kk2fkCZJH3qAFZN0YUcKxyK\nUssfyt83saj/AIVUk9fyqRAobLfOwPGaBjQejoOPelmm+UFkJH970o3ndlk3SNxS52oQz5xx\nQAqx7sOW+TH3qaq/MQRgdQ3ajzflZVGRjqKbljt5IHrTJHqy7Qx5ycZFLJhcqTz1zQkyRxnc\ndwHIFJG+5pCV+U8DNAw+ZGxjCsOwppVlDA43E9PWnMxWFxu4U9Kj2uwVhx6ipAJoQWBb5B2F\nJtcqw3cetG1mOzO7ng05mDfdJwOcetMBvltvUBt6gciljVeRgxjORml2hsDcQT07Gl3BhtP3\nweh9aXUBP4QKGVlA7UrIXwGOH746UKu2QHOKYw27ewB780RllcHPGfvUjsY1ZWXqc7u1IIla\nMBSRk9KQgMjKCQuFbvShhkYHamMdzxhW3beCtOVg2WXAG7FMBV2jGc5brmhjzwdpI5pu794Q\nVPXg05VOzH3jnAagYm4qvHJ6Gl8sbjuboM9KNxMgBGAvU0u7evI5J4z6U7gM37sM6nb60mQq\n8jKZ420/5/LJGGIPAxTdrK21wPm54oEL5Z4AAUnnFRk7ASeecURqPMIOcg43GnsuNxCZGcCk\nAkalsZ+UU1kRmb5Tkc4pdxbHfb2pz3DbhsUF+n1oQDY23P8AOvy4yMU1QzNv+6AeadJvVShG\n0df/AK1KuJIxhtoP8NMYO2OGxg8g06SQjauFAP8AH1pI/wB2uTz2FIsW0+Y7bsdBSAbGpXHG\n1OuD1pyrhi2Nw9aafmjyKeCQy4O1Mc0ADKCqsCQ2elKzYbBGBj9aZ8pcbmOSeKVkUSZZiuel\nAg+UKvP7xjg01WCnBOVP6e9KqkSjIB96Xaw4YjA/lQMY+6TMcakAn86RlRvm3bccEVNu8tRJ\nj5s8AelNKhjyoG45pEjVYmXc3BA/OkVTIrZX5SeM0rydDnjOKarNIxx90GgYrZGPmyOm33p7\nYfopUjrTNp2sQTkVI0JZmYenJpANkUZDHPtim7gq5A69acSBsJHQdu9NkU7SdvzZzgVQxTMA\neCCO1IzhsPjc2elD7NyYUJTmPySfKCQORQIayj5wmMdfehHVgFHbnmlZlWNFBwxHBoWPbhic\nN0IoAbO298j/APXQqn5Qozz3qTyo1yc/iaI2IXOB6ZoAFhiMm3djPrTJFOQg4Ge1SMqqq/IS\nfWmyfKQQu0E9aQDWjIUktk56Um0Ag7SX/lU/l9XAJb3qP59vzLt55o6ANYblK5yOuae7BYwo\nTt+ZpJF4B6knHFNPzYRzlQegpksVWbbiQgN2XFM9CMMrU/zDtZcfMTjPoKasYYcYABNBQkeY\nySAdxp7CNc5bL9cimqGj437n9KczbQAVyx9qQxUVdwz8ydaY3yMrK6kDpzUisF+Q8H3HWoxH\nGy5245zjHSmA7B8t5QcL7UbUEaSJ1PrTo2ZiWb8B603Pm/LjDfpQIWONXbec5zj6mrMMYXcG\nAJ7g9qb8ixBeuCCT71F5x+ZurscA0ATsqxTBsErjpUEykNw2Gz2phk55yXBx1owUTOOp5pMA\nZDK+zfyOSKZIojwSSeeAKkOI2yevU01k2t5hO49VWmKwqqzJKpIz1Bo5VVU5NOZSzKpIjJ5I\nprDkqScjutAwZPIR2H3sfKDSorMqMdykj5higMG+Ujp36809mWOQZ3MSOaAGtzJgBiR3p0m2\nNgGXD/3aTbtYjO0N/FmhVRVxySOm6qARlwCu3Df3jTlJOUBIbqTSb2MhDZZQOnvSRLIZCyuE\nQj+KpEPRi2BH8zLyaMq247ajRVzkMcg5yp4NOzu3oflY8g0Ah3ktvQZAPYjpSLtbeduQp5aj\nlJOMH5etO52hUOA33qBsZ8pUsjbeM0i/vNhByehpdrbj2x2py/cJHGOlBI+RCvyvxz+dRrkY\nVBuzyRRIGwjM2456ZojLqwK8EjFA7BMx2cZPpRIwjaMj5jjpTNxVgp9anb96p6Fh0FAiLLHc\nr9eoam+WJChkb8alkjMnLcyEYK9qjMTbPmGT/dHagoe0YXrxjlcUhZmYYOM9akcGPGeW21GM\nlVb7p6GgB/mBWOYy2OgFPWXzY92dvoO9NDMuCA3X7wFM8sRyMSPl5OQaPMB3llnLFuSPwp24\nMy7W2rjHP60m5V2lzjjpRGFJcADg96CRJYlVsJuZe9WcAMOO2ODUKttwAN2eBQqjDk5+X3oK\nJYh+52fKW3d+v0o8tV+Utz1qFN0iZCdG606RCJmIA2nvSGK7BhlQRk0/gShyoHovrTOVYD+H\nvS7SvzFdw6c0CHJv55xnrSMwjTC5LZxyKWNGWPupznaak+baMtn1AoAR9y8+2KEkBXawwe5p\ndyrnpn+dJgMAM/N1pgM3LFGSPv56Y7VLGrR7WYFYj3po8zkuoK4wDT0O6M7zuVRwKRJLN80m\nFOOOMUz5disPmJ4201HZioB5xTmUqu4ckdqYDvIHADbW6k1I0J8tSGHX71IoDIDtYsDk09V8\nsE92PHtSBjVQRycklT0qaSRSOBt7ZpOMYVskcGnSKuwK65HqKCiFz84GOD196kijjRjtymaU\nxK2McFefwpdpYggkjHT1FMXURm2gjv69zQPmRVYYJHH1obJVSBgg559KdGw29NzD2pFD5Fb5\nBkE4xgetRxqdvL/xYz6GneYWbzNpA6UMyLJlCNvv60E3BtytgBeTyT3pLlT2I5PIFKzqsgxz\n6k+tIyOdxxwOc0agVDbN6EKPeojI4O0Hcx9e1WlY9Gb3quu9nZymMdKBkL/NNsByAOWqq7Oy\nkKMjPWrEhZmBRdvr7VE6kZU9G/iFMkrSl1VVC57ZpnBVucY4OaftaIZBLhjgf40kiptyBkg4\noASfDogY5UfdqF03SbuEUevrT2bLAEUzHmKS3zbT0bvVEiByZiTxxxim7ismQc8d6dErfOcA\nADvRHDLJC8oXcUO1iOgpjK02+TJDkEVBg7cnHXrn9KdGvnNw/GefanTwgMqgcqeo71IwGWwg\njGSc57UFQjHkllbik6xklsHd9709qczFQN3Dds0gEXDKSw3R5z+NNWT5GCnDddxp+0KjIRt7\n+2ajVXaNWVd3qBQMUsH+dhggdaTbtUPjcOvtinOzShflBAPOKarlo2AGADxkdKQwb5uAAqHG\nFqXzPvKrY4xUe1FjAcfOT/D2oXb5r7SFb1NMAO5WQdAOSM09lUblPyvjI9KiCBlYAjavOc1M\nWVo1ODubv6UySWxhCzRnkkkZDdKt3ly0GoYVNp/2en1qGzbMcu8hcdM1Esku3oNv94nk1QFy\n3uBbrlSXY8GrNjdGzVwE3GRvvA8isrzlVgPXjirkFuP3eG2oeTjv7UEmrHMfNOZCXxgc1vx6\ng32OI78leGQ9a523ZVIER7kEP/jRNvtbpYxLuSUfez0oEdpYXyyrtOQew9609PimvpDG0u0o\nNxyecDtXJWckp3xtIGB9ONvvW7Bdta2abuT90SL1zVIho6K31PyrqKMnZn5WPtWjLdpbuWtn\nEkbHLLXKtdCVVx+9bGeOtaUEkeI4w437eV7UE21Nfz1aRCI/Mwwb0BFR3lwNzPEOM4EYPNQF\n4VXy51LvjIwcCoFu4kcttJCnGaTKHM8yyYlYhMZx6UsMitH8oJJ6YqzcSB2VCvyMAc0lmqr5\niq3OeARxUDGeb5PDrye61OjC8iYIGjK8F/X2ptzamSESqQrr2J5q7Dbn7A7xsWZR82BSKMwf\nLId8edo4UDp71EsE7rL5cTMFOTtGfxq9b776H9yNvYsT1x2qS3urrTZdygxv+YIoAqWd4ZJl\nt5cIc/ePb3NS3UarM0andj+6evvVW93XjyMF2bmyx6c1W2N0L/KflJPWkWloSsrRqSyg55Bq\nElryZPLTB756VI6u2xQ24bepqx5YiATdjjJYUhirai4kYLNGsijpI2M+1V45JIkljjRQ5O1m\n68UjWccsbE/K3Uev1pq5jjwvRj1xSES+dHbOobEijr2H51LBpUN5JvsZULE5MTH5hWZcM0rG\nMxbVH8QpkNulq5mgdklQZVqaGW7hRHNLG3DDjOOvtRIrMoY4CqvX+lLfalJq0cIaNRIvLOP4\nqbEom3KflB7UgImlLbV3iPf0FP8ALwQrNwpwTUJjC/KBlwcgt6VJI3kW43fPuOd1MoWFAzOM\n8A55ptvj96ZpNjYJRl/lT45PMUrt+lRKqM3yZIHXI4+lIQj5aPrx2NKsjSBRtxt6ZokkVtxP\ny5OQp/lSo0TOsefm6/8A1qAHbnmy27y8flT5AyQsrcNnPFRxyp84X5gTgEihd6M5ZduBk/Sl\nYC/pWpXOk7mtJ9s0o+YAdaR7OW4jkupSN+emMnNQFUmt0eB1Vv4s9fwp7R3MdgWWbBb+E0gH\nttjhjS7lXcWDBO4qW5ukkC7T97nkcCsUM00wjmPzjn/69S7mVfs0oyg5osMmu2F1NsztGOXF\nMht/lMMreWkb42jv70yRlZgI2OR1x3FOt1kufMVYmcryCRzQIZdhbYjay7c9u9QcxySecnmK\nRkc9KfcafcRPGZoHiRzkBu9SQxjks33hgg0wJI5RJ5XG3I61W8t5N7sfnU8fSrltp8k0aqIy\nyk4DHtUa6c8sxSZWj2nGR3pgU1kcZ6AAZJBpkMm5pAW3HPH/AOqn+TFbeasxbrgCoZLjy4Xm\niTfPjG7HGPSkwGQ5XerKRNnpnGRSMyOqorZkU5OT0pFmjuraOV5Ajk4KZ5z6VBJH+8AbDM3V\nh29qEBanZBMGJGDz7CqPmG4WXZwf6VOVi+4QSigkCqiSb5HVE8sEY60xjrf97EisMH1zSsxV\njkbUb5SRVdIm2SbPnZal5ubeIA7cHJHqaAK7bl2bn4HSljmFuWwm5mG3FHCNlm3LnBWqc1yG\nkEgB3D+H2ouLYmunJjXjYc0jzozBA5Jxyaja4G5D5e52GduahQlUIdc7snjqKYC3UwbcQ2U6\nHFbPh9tF02xuLjUGaW5bKxLyc+lYDbFjJyFTOOetNYws27GHBB/+vVDJkk3SMZVKhjxjt7Vb\n1RrT7KiQowkUckDqapTsrRyTFvLhHAVf73rVZk8xVcSkbOc5pB0JZI2hVGJ+Vv4T/Kobq4mZ\ngm3BA6L2o+2SRw7DJuizvCMORVW6uJSoYD52OTjrii4iSSQFQCMA9fWiSRA21fkMY5kH8qbM\nnlqZOrqR8vrTdhh2yZ3BueOfwNFxiG5OxTM7OrHoTkioJ53dxsgaSEH7vcn6VK7+dMMIAvue\nRVi3HmXSjefL6/KKOghlvHE3mSSHbKVwqnt7VCsaWm1JMl5PmCj0q9cYuVPkx4VWyc9fc1Wz\nHNI0kTb8fxN29hQgGSW0bTZQ7c8nNLHbnzFIbBU5zUkY+ZyQSvQcc1aADqzlQpjHamBFLG8y\nE7vMAbgGj7P5Um9hhgOMH1psckkjl14J6e9K7BWBcEKeCe2aAEKhWADnp0BpsisFwwALHqKb\nbxb8p91N2PMNN+VlKs/I9aQDJG8sAAbsHmq7NukYnKgrk4p80exQoG7nPWooo5fM3BgEXs3p\nTAbHtZo8Z47EdaPM3TfKiqmdvXp7kU6aVpOgKlvunb1qNVEiqVwpyc+tBOo5o48nYPmzjOeD\n70q5kclzhE65p27y9ufnLjGKRVZoj/Cc7cUDTBo2ZSxYpuxge1NmjImVPu7hw3pUkYaS3fZz\nt4ANMZZFCAgHI5yeRTQO45Y18xcNt7ZqdY0WMqybecDJzmo5YxHCGjHX5Qx6k1PZwbVVZBwx\nyCecD/GkMasBUrhcDoe5z61dFu9wASC6jjIqW0sGkuJXTr0C/wBf1r0HwB8OtU8UXUNlZW0l\nw1x8u5EJwQfT8qzclFahu7B8Pfh/eeIL6CCC3klE2AFAySe34V+of7Lf7Kth8NdFtta1y3Sb\nWJBuihYZEWRwRT/2V/2XrT4ZaTa65r8Ec+tyKDFCQGEK9u3Wvo6aUsxH54NcEpuZ0xjyhJMO\nQOmP8/yqMzfNUeTk+nb2pOd3PSpLFbd1p5+Ve2aPvAGjaG5J5oQwH3eab24pysOnWkPHWqGO\njkCggjNLuJAHem7fQ8GmFyrjPWkBPS7cAVHg9c0q7upOaYD3YdhTuNvJqMfMdo60rfe4phqA\n5UYHennp600AjJpVJwRikIcmNhxTfu0i46g09fm+lIYjgL15NO3ccim43Z9qXJyOKT1AXkgZ\nNL90gmm/c+8aduwcEUwsAk6+lLyy4BxUMjBe/FSCTcowOKtbAD4Whfu570ZoGdxOBQBIOmR1\npfmXr1pittJyKTcWHzHipAkU7TzSHLLRu3rkcUg3D1zQBIGORSK2OO+aRWG3njFG8biQOKBj\n29jQGK4yKav3cnv0peWHBoGP37l6c0Fgy+9MLAd8U5cnBxxTAXbgj0oJwuRQrcnPSkUZ+lAC\nL9MUu7dwBmnZJ4ApNu2gpITa/wDdFFP3fWigZR3deODRt7ClaQ/3eO1Ij8HcKggUfMu3GKRs\nevNHJUkHJxwKrxhvMJbr6UDLDDpg5pS3PSm5/KlbuaGIUKAue9MfBjwaDJuxxjNNPzSUAM+9\n0GaeSdoBHGaf5fI4xTZIeMg0wHbx07Gj7vem7emRikdgv0pDFUDJOKdwvFIvOATTR8u7PNCE\nP4C5FNPfHemqxxntTwdvJ60+oBk+n1pgYkEY5pwY9D0pTgNmkwGuG+lOx8vahW3Lz1o+Xb1q\nkA1vmUAUFduMU7cpXFRMx4HOaliH7iz4PFG4Mwo2dDRgKNzcUDHEdSOKA4ZcY5oZh0HWkCnk\nsMVQCso25FIcKOetKrLnPQU1mCrzzQSO245zSZ280bgwG3k0KwLEMKkBEYN35oLFsZpFAwSR\ng9qd1HAoAXjnIoPyr60xmZVGaeoPXtSe4AvTmkf5sD0obqcdKFTqxPWmAcqp96RvlXA5p2OO\nlIo496sAOFx6ml+XbwOaa3zdKFUqtIB3O72ocAKD3pNx24x0pVYHlumKYDdzbScfSkV/bmpF\n5Uc0mAuCetIAVSuM9aYfvEZyKfnuGyKaqntwKTAb8vrRz68UjfLk4zSbeB1qQH7u+KbkNnHW\nlQ5yuaPLC84piDHNPXoAabuJOT0qaNRuyaBnLfE61F14MvcruKqD+uK+MvF1uqXDkMMJ1/PF\nfdHiOPztA1CMR+YWiYBf618Q+NLXbv3jB3Yb1poVzzPVoS2GHGehrnb5nCP5nzA8DFdNq3+p\nO0E4Nc1fKZIdp4Oc1oiWc+0ZBBXPHaop2Hpgd60JAFbCnJbn6VmXjbZWVhwe9WZ2Mq52qxVT\nuye1Vpn+UqvHHerLIo3FThl5IqiykN83RjmqRLKqqPMBYjdjBGKjbMe5XbK1PJGwkZh1IqrJ\nKxhYMnIOCaokpTeWpJCn156Um3zgoIUkjIbtUxXccgduQTVdsQxlv7vI29qZJA0/zFSOOgY1\nEqvIuHwCDmpGbcudnLDNOjXcApO1V+bFAalc5ZsBRtHLZqNm8uTCkkds81JMp84uckY6VECz\nKG27VziqQ/UYVfa8sbZU8EVWZN+Mn5ugAFWdxj3kthTxgdKapSFssNxI4+tNiKc2VjKAZwaG\nkXo/zMRVi4kM23KAf3vWqrRhZVdeQRyKQXGljkBuvUYouG34GfujmopFPJDHPU0OxKY3bM/x\nYpjRI29mRCgAYZyelJJHsZdvSm8tsKtvO3oaCWjcYP1FMlIjmVUVRkb2G7gUNH5iAqcY96QQ\n+Wx3fO2OOaDlfvHAIwNtIoa03CsUyucfWmSM7FuOG/MCpdu6PaEyQfwprL5fU/N60xDF2rtw\nCFHGaFZVDMRuzwPamSK0m3KlCPU0N5a8jI/pQHQc0nzBHUFuz0zzG3njOOhpC3QAc54c0qoW\nVmGCQefemA9ZXK7pMHb0FQ+YW5IxnoDTmUsodTx3FLJIVjyBigBjKVhOQMk847VG37lhgk4F\nTs7n74yuO1Q7jtkGdvO0UBYbIw2szN34Heo2mxFkLjtUjqJIx6jk+9RMm5cjjNAxzRssasTk\n9cUuxcgZ3M3U0qMQwAGQB1pm48qDjvQIY6Fn4Y8UpyuAv3vWlkcqMkZbvihkIZcHcpGc0wEb\nKqCG6nmmkFCxCk9qN0nJKjZ2FO2ldvOO5ye1ACsRwVG5SMGk4bqPmHGaFddpxwpPApm4rkEc\nDncKYhykbh8ucGnM3zEKMnqBUY/hIbg0LuWP72G68dTSAaxDuXK4P8qjGdpIJUdAp71I0fnb\ncHHOSKZ5YWY5fPpTAf5paLOzHGPxpsjMVVl2+4pejc8qf50uU8w/Lzj8KQ0IyrwdvDc03auP\nu49u9LgrjByKTerICw+c0FDQsoj3cAZo2/xbs809pAFJYfJjkU3hQpA69qZIpLLkE5B6U1gQ\nAWHHc07BZct97PAomYjG77ncUWAiZxgDGCTxTd5bAkO1SOFqaRVbYw4UHqaZKvmScrkAcNQA\nbjuAY5x3p8bLIzEBjjo1RDLDpg9Mmkh3x5Tdhj1oEOkXMJ6ZJqMtwoU5P61JuCkuTlfSoz8r\nqQuN3NSBJJ/qwc5buMU5fmXJHlEdKjlG1C68t3pzKX2uvPs1UMY26Nich+/FPVjjOMjvTI40\nWRio3MQcgmlGdoGdtMQMC7e/8PPSlkIZQGGPemLtDkn6Chd+WJ+Zf1pC6iKoVsgnnpUjfPIi\ng89D7UkZZm4Hbv2piuVVwPmJ70FAFXjGdxPNJtKsTuwoPelkQIsRzh880SY+VOMsck0wE8ws\n+AuM9RipA+HKgAEcUNK3puIPBWmuufduuRQA2RTxuzu/SkVnkXnBJ4HtTlkE2QDkd80jL8oV\nTgZyMUAKE2tnrxg4pBn7x+90AoK7I2P8WaSHEjcnbSEDBfM65HejhfM28BemKdJGylkQqQR6\nUm7fEHYYwcEe9GoCeXukVmK7jzxT/LUblZS3oabxK23+I9KZIsigbmwFOB60xhuC/JnB/Wnc\nllbBB6D0oXc2VIViejelOaRmXbnOKQDceWxOR15PrTf4iG+VPX1ob7uPxIFI2BIoUlkxyDQI\ncuxuDyR0pvKrjGDnihWG1uKBn5SAfpTAcuOUbazjkMP5VGqtndjce3t7VIw8tSQBnrQqgx56\nZGTigBvK7i3AA59qXO2IbcMDyaOTEVXgdeaax/eKDxkcUtR3HMxP3RtB6U1Y3KkAD3zTRG67\nlbj0NLj5XBywXgetAiNVMOVGADUiN/D7UQqsi4xt4zzUalXXg8CgCX/lmWK/KPzpFHzOy8E/\nNj29KOR33j+VPUnjB3HsfSmBEuQ2FQlX+Y5ojby2Y8596WNyqsSMihtvkgE/NnpQAzlvnQHp\nzT2kDBUYfJ39fpSLmIkh+D2pPLMijOAc/jQAM4flVwg6DvSZLJnoM96VnbeUC80EbMiT7vrS\n8wHbjH8q5JbvRtxkt0oVVAUB9x657UjqGwoJIJ65pjBmH3sEn36CkKNITzhh8wPqKPN3bgeF\n6U5H+UqV5I60uoiMcrkjBPYdaQoSSWOG/SpSNudg3KB19KRZGK/MnHrTGLuVRknt96mLjnnO\neQe1O8xWkAxuTFCq0hYD7oBoAMs4HdT2oIKSZxwOMUyJWiUMz5OcgHpUvJkLB9wbt2FAhowq\nttJ3evakLMq4YEkdDUjMix46gcZFRtuWMfPkUDEDho/kY78+lKsinJfvxnPSmsuNpxz2xTGY\nCXy2A3DmgCTywrA44+tIVEchHQEZFM8xSzSk8gYpjOHXBzvbvQIfNJt6nPfk0iMN2doOahbZ\nyg5I/OiGYtkHHy9j1oAtw/K5z0PY0x96TbTznpiljOV5GMjIp7tJ+76e9AxOV+RiDjpQpESn\nJ35/hpi/KzYHJPU0+RSV3ZAcUCE5jUBlzk/e9KNvljk5Oc7TS/MV4bBxyaYzDbnaWXofegBX\nOOcZFARW+83B7U9X8slTwMZx6UxFDSSAsNvYtS1AQrJGyoMZY4B9qkkG6TCn7ox+NR7huHzY\nI4oWMmQnpu79qAaFO0gM3y9vxpVYqxIOAerdqVkMbAj7oHNMXDKS3OT2pAOWTbGxJ3c43YxS\nhmHDD5W5JHakbnBbhc4K/wBaVflVkAI/qKoBFzE2B83PfpSkFdxxuakIbg78rjGO9J95QF6j\nn3oAakhjPCZXPIanOodjzznpSt++T+4Qck+tJt8zDHG096AHsylQdoCimNuZstS7V8tgnK9q\nFxuweuKBiFz5mwJyOjGkkmfYA/Dk/dqRY1lX5uG6mo1yzHI3DPyqaBByrE5Kqeq0rfMuCdpP\n3TSbiANzYOTkUEln2bs0ASJKdvzMdw43VYZS0BBYZ64qkq7cjd7U6NREAGc+1ADUkCsTtzxw\nBQ3ySDjtndTk2eY2R7jFRbyG4BIxyKAsPkbadoG4nqaVGJ3jGABxTY1brj73Slh2rLtP3u9I\nBWYLGny4Y9TSNiOTGcr1AoZQWfnik37ownQdM+tAClS2CzDZnOKXlZG+bKN6dRTI4wzY6KtL\nuXexXjHU0xkiL+9OTmMjrSLGq5YHC56d6aqFvmQk55FL/rmJ+6B/OgRJgMrKBz1NQLMG+7wB\nx0qxI2funZjg5qONVRixIxjj60AKqhQDtzjksaAqyAkHB67TSLGWLktluvB4xQ+WZQBg+tSI\nPmJDEg9topdz7vLPTrmnJGEJx971qPewJbHXiqBDvLUsMtuLcA+lEinfs2kMKT5WjYlsAdl7\nUrOxwUfI2/eNAkI6SBmjZecbuOopsbFVBXkeppzM0gDhisijP1ojO4SE8d8UDQbfLxk5BpVi\nEZJ/iakR38tMjKsePYUrbsOzH5lPAouPUCxbJj6qcYPenH5ZNx64+7UaAFS44PXBNP8AKdth\nkG/HO0UtRCfd6nKkZPFIu9TkEHPb+lPZdyqdwLZwG9PajKow2HjOMH1qugxQ20srD73f0oVw\nqnDZx1pfO/eckbehHfNRtlCW2f8AAaQD2boNvJHBpigMSuO3PpTvMTCBVyc4PtRIWMrJs6nA\nNBPUPs+2NWOd3Zqf9nEm3DEP3NDPmJEBL7W5XvTvtPlvIrDYcZBpFkc8QWNl35H05pit5aBf\nmAxxUkzibkdR3prN5jKW5FAgVWKjb17+lOXLqcOFYVKqhcoDgHkmoNqsxI5ApgPO1yHBOV9a\neV85MMdoznNMbDr0CnHIowfkRTuP92gAbeuFX14prMwmUyJlc4OPWpPLY5kcgZ+6vcCkhUqx\nJbrwO9AwbcPMJClB/E1J13cZyfvCpPmVSilXGPmNRK0YkIVuW60iUSIxPUYPTHpQmM/e5Yct\nSNMFIYjbg4+tHmZ+UrwecLTFck5aOQ78D+7602FtygEYwPzoZY5sfN84FMJLqAOB0P0pDuWE\nK/KpbPcikbduO8gA8j2pmY4pPLZu3Ax1pQxaP5Rk55U0xD2ckgsdyj9aGkLrsCmPNNzlcHlf\nSldyYwq/MP5Uh6j4kTyzgbsUu3gOOUUc0DCqNzbUPp3pBEduPM79BRqMe33fvZyMj2pihl/d\n+vel8zyiu1Ttxz9alCKwXfnDdqZIFSmAMH3FLHJhiCvJ+Wl8nbGY1+UZ79qX5RH13mgY5WZY\n2ToV4pCxMZBGGA4pVIWNt4w2OTUalgyBQW7H/GkFieNcIpTA9SetSufnXb9wdTVWSNgMgcA8\n4NTLjGEHy4yaQx5UovPzF/yxTVd1ONuwr/KnMcgLuwDzUsMA++x3j09aYiP59hPBz+Yp45JY\nqSMY49aVlZXLDr0A9KQBgy55VT8x9aQyVQVUJkbduTUflbtoCe/P86cWiZie3QCiRjzmT5ex\n9fagkZIymQ9Nq/w4/WmO21fmyR1xT5AXkBHG0jIHpS3A3q2SMnp64plIqSKI92Oec49BUEjt\nKymMcdyKtKxyRjORgmoJkLLnPloOAB3pgVNmzDE9eajjjLuW37QeDmrAhBfGcbailJWPIOMN\nnpTJZBMm2barZ29KrzMYyPl3euKmky0m9lO/0pj5VflGDn8aLAQswkxu6npTGzI3lqMjqxNP\nkGPmwQR3pYpRHuDDdu5zTEJGyldwzhTxVm1mS3sbkSuU+bc3uKpLJtDbjtUj8qivI5GdD5m8\nEZI9KYEDSL5jFFwn96k3HCsz98ZxSKwVdrDP0qbCzLtYZHt2qRiKpUuF5z82KZcKWaPPHf6V\nJuj+8xbjjgU7ygEHzbS3QN2pDK7becNknuaQgg4UnG7lc1JKqlezDpkUyRR90IznOSy0hBLi\nEsN7FzzmljjbycocHuD1pnmLhlVT65NPEjfe9BTGEg3R7U4K8470scRZTuABByPWmK48sknl\nuKd5bKM8jPQ9qYiVtps8BQz7ui8UrSeWyKRkY6UxSoLD+D1pjRnaJDnOfvGjqMn2FkLYwc9K\nbDC1zODj5RxSrvdtqqc4pA7lQp3IO+OpNMQnKyMM42+laFqvnW4BUkoMjbWeUG0FQdhOHOKl\neKSEqyNhOg2mgRqi9h/s6e3mXL5+WRuMGg7PJt4FDPcvymRyBWbJfStIv7sbRzt/LNaY1aF2\nEVrFiX7oZu2etMVh8l48MgVQTL91sHv6Vq6fcyz27p520rxg9RXPSKYnxI2516AdzUkMktv/\nAKSD5suOVzjH1oEdZa6r9nkEiZLAY5HJrQtbxpJEdmCszZxXH2uoSXUwywLEZ+Xt7VoQ6hvu\nER8g+nvQI7lnL3Ctv3IxxnsKusolkIdTs7kDrXKf2nJKypG/3PvAVtx6o0iq/mA8YA7igDR3\ni3kzvMkYH5e1TfaTNHsjIVT3ArHhaRd+ULEnJIqdboBQ0RKt0osGpsWsiswR22kdcmmzaiIb\n53tztUjBQHg1kRwoluxlkLFmyeanjjgiUZDMzD5QDSAuQ6sYWCvDhXOSVqxcaojLH9mBZd3K\nsORWdukmi2om1VPOecUv2mRfuYHfgVBRJJI5yit8rfNkjinCGKSxCg4nVslu2KR71brCkAKo\n+6O5qq0jow2A+X1xTGSfZZFjDAsFGct7UrW915JuEjYxqMeYv3RViG88zMZPlKezDiup0GN1\n+G3inftJaWJIyOcEnnH4VLKRxTeZId205xxirS3CJCvG3jkEUCN44VEn7snj/IqNY4Vx9qyV\nbkA9cUhELxhrhnQ5yMn0xULR7UJJDZ5wDzVuGTy2Jt1zH/damR25uJS74VevFUFim0JhuURD\nuVhu47e1PXHz7wVPY1blhh3YUEjrkVXaIrIdn3e4bsKRRXkVZguASmfvZp7btpQYU5707loc\nLgZOQB3omVZrQsrYkDcrSGPtJ/JkVHjHPG6nyXTx3BJjXyh0HvVN2bcpEZI7VJIx3oZP3mTy\ntWII8bm8xhtbJwR0qNvs0ceR8xPT6+tWZbaWTe8KjaB/FUMcMkuxfLG7OTmoYFiK1hxF+9wd\nu581HNceZIFUZzwfpUe5IZCJV3Be1SAoJpJQ4UY+6BzikgIpSyopQ42jJ9PrTf8AXRxtLIwJ\nXIqWOVJmSNlZRIMBiKkZZYZWA2tGemPSmwGTIm3eATNj7wqFWMDNJjMY4IzzVj55oyQh9N1R\nfeUxmLBPRmOKSAWMLtM8gwOy/wAqDczy7yHMBB2h/So4b94R5QClgCCfaq/nGa4AiOzr8tMX\nUuyRuzr5txJcsoyu45/CoIv3zDPOD930qKGWe3YLv2ruyaLu6iS62w52sMtj1pdRiCSWOV4x\nPIvP3Qxx9KCqorCS5ka467dxqKJHMjAtuXqKY8gjyTjze+e9Ahs0p/1u7Ix0NWoWWRQr/KrL\nkLVITwPMN+RH3SnXWpJI+5V2RqMDigZXuLWNI/RgdxAHaoVm8xsqSdo7+lSCaZh5235AcVXl\nXysFfmaQ5Ue1MBzXcc9yFyTGOCKk2/vH8tf3YGdx7VQfZHI2WwGOc4/SrHnEZCccdDQIekvm\nKBkR5/u9DUP2lvMYkYPQfhTftkeCXj4HGegzVV5THhyrbWP3gKAJriZo1AAzu56VBJM0hQEK\nu2o57kGQnG5V4yKjuWVtpGHI5JoGSW6iSUs/zbRwg9Ke1xE0ioWIGOJMfpVfcFXZ92T7xPqK\niOcZHPvVDFdDNKc8qpz0qe9ZJronIjj2bQf6VVV3t22sCu7p+NTJOkO8vF5zZ+71/GmK4R3E\nUcOHj+98o/xqK4s4GX5ZcsT82DTrgi6j804CjovcGpbWO38wEvlsZ2+9IsLtbbycNy8QwSwr\nPjUR3QKt8rLksemPQUsuVZklY/N8zHrUYJWJV3YUdcigRP5sUXmSsGDH7oqO3MhD7Rtizht3\nelaMsNxG5SMH+lWLdWaJvkPlP8v4+tAmRfZR8zRsqvt43enpT2jiWNXhDrc4+Ze31pfs5SFk\nb5iO9OS3bzFG7GR9719qGOxHHHM0bNJ8hkGOtPhjNnG2FEu4bcYqxIoO9mHbaFzSLbSMqpkK\nAOuaQEaQoVwH2se1NbMa7cNIzNztFW7iOF41dG2hSAagmk2gyRsGxwPWqEiu6NGWaOMFV64P\nIppnK53rtRhkbutT+as22JlYNjlqrXStcTFFYARjjPtSHYcrBkXa+HzkY/lUAZGkdOD8h+9w\nM/WmTSlGRzjcCKj4aR1JION2ccGgQkm5fKZzzjGF5qIny22sCGJ60gVpGyT16D0pFAlUuzHr\ntye1Mm7JWZtwBbkcbvSmH5pgwOOegpGWQHO7MfrSqgYbg+SO9BQ/lmABzjv6UR+Y0bOY/wB3\nnANCqVYHb75qUqvTeUZueOlAiONSVULj72CKmj8q4LonyunBz7//AKqdDbxx5G8H5chR61PH\nCFfCLn196Llkcdn5aBz8yrzjPU1qafZmVQdjKWGQCKs2+ji6liCjyh1APc+n+fSvV/hh8NdS\n8beJINNsrU3UshCjavC8jj269aylJIajdlH4a/DS98T3cNpb2jvLO+xEQcuT2z2Nfpp+zX+y\n9YfCzT7bVNViEurMBKiMMrC2MY+tan7Pv7Nek/B3S455oo7rXJFBklcBvKzn5RkdfevaSx2l\nfwrjcnJmvKWGb5cL0Xgc9KikbnIOKjeTb0OPWmq2Vyaj0LHeYWyKFGD97ikY7Vz2pPvMPT2o\nsBMrfJ0pWYBcnrTVYdzincbvUUw1DcM8U7J68GmLhs+tNX16igCX7qtz24pjoGCk9cVJuTaO\nOaOKZYir0pd20kdqFIVulN5796QiQDuDil5UZpivtIFLzuzQUO3ZGc4pdxxjpTOW4IxSlTmj\nyJJFwrdM0nLN6CmrhmxStlhxwKAHjqaN549qbk7fQ0bcLmgB4+bjGaRWycGmR7g2egqThVOa\nWohj4PbNOUgL0oTlenFIueQRTuxj/MKr2pqks2c49aMDnNKMbfQ0ASH7uOtIw7d6au78Kd95\nc45pgKg29aVZG54pnzKM5zUkbfKe5oAMetJuGOlG7OTijdtU4GaAHdOe1J5mM4pFGevApeMc\ndqAFTaygmn7ucdu1RqVZcYxTsDqDzSAXccYApyv1BFNTJXJ4NKTt7UDF3HscUBsjnrTdw25p\nzDdgjimVcXA9DRRzRVBcpsx+tJkjqO9KVHAPFIo2s2ayGg2j1wDSyduPypue2SBShdvTmhCY\ni54zxUkhHQCo+cj1oxjrQxIRvmAA5pVjC/WlLL5n4UmfzoAcW+bFJuABJpNw3FqZuJyaoZHJ\neLnjkU6ORZlP931rDv8Az4ZjH82087qdZySrCsY3bs/epMDdxgjj8aXjrUVvIWX5vvDj61YD\nKFPGTRYRGU9CKUY6HnFIrBqX7q/NxRYBCMkUjZVSMZNO3BhnpTWPQigBysNvIwajbHJp6sGz\n2pOKQEb/AHRipM/KCRQyqOOlMLHPtVASNkdD9aG+bgdKapDc09cKvoaQDV3DqaUN2pSePaky\nN3SgkDggimFl6dak29xSCP5jyKQDGO05HApyZX7xyKJOoB/GlWMevGaBg1OD8ZIprfLnuKRW\n+U96YWH43Lk0hfcvSmk5FIueQvJpCHLnGSKCdwoZiOB+NJtIbNNAOzim7gDS7v4R1NNb7vTm\nkwHE4xgUrZ25zRHygz1oVtwIK0wBiQo7mmsh/A0p+6PWk3YpjFXGMU0v1zzSs2Fz2pzMGxgD\nmkwsJGvymlXuM4pvKtjtR3NIYpIxkc03d155o4A64oYjqKRLGbtp4FOyfLyaYu0ryec0S3Sw\ngBufSgB/3eDUmcmo/ODLkrTo/vdaYErotxbTowOGjK/pxXxR8S7IW+pXEK9FkIK98ivtiNsB\nsDp37V8mfGfTms/Ed/8Au/LkaUsT1BzTQrng+pDyvMQrxiuZ1BPLgwed3Q+ldVrUm6dwBtau\nZ1WN5EBPBPFWiTnbq329Gx34rHmVzy+Sc1tTMfmGOVNULhXkjZ8cLVkMzZCsmCy/P04rMuUL\nSYxg5rQuJtrh154qrJhl+Zs7q0RmyC4ZtvEY6YzVBvnX5ufareoKdiBM8cZqn5ZVuJBjqc0x\nFbO5yIxtbvkVTuIy33fu55FWri453RnBJwcd6qSNhcgcd6YDGKJGQR82eKjZw0gKgkAc1Y2q\nWU7RuxnBqBl8vOR8rHn2pgRXEgUjDYPYVWmV5VLlsL0xV2TAiIVVc9vaqUkZBznBxnAqkIdt\nHlhC273qKT5UO/JXoTTGJVQ7Hbmmjc2MMSp420MYiyIcZJx60xmXhVbke1SzW/lx4VwwP6VB\nJGVcEEdKBWGzqZcZ4HtTGjMKggBucBvSplB5I4UjvVZY9innleRVCAwyQyDJ+XtimYJOd21c\n5/8ArUiyljlmJ55pzbNrAtw3r60mA7fuYSBcKaii8z5g5BQE49alRisfzcdqgTesj4GdwwWp\nWGO8s7lLvgZ4I/lRuIYk4YE/lSK3yhWOQOAKdJ+7+6wAbj/GqEQEnc+DkVGzr5YA4JPWprnM\neB1XpuWouFxuXIz09aY7CY64+Zu1JMNs0ZPGRggd/epDJtUFRgN6UyZjKq/31Oc+1NiHSOu/\nB+5jGBShU3Kob5enNMVgJGbGQR2qVl5U/dOMiosIr7mjY85VT09aSSRWyzcbj0p0bNuZpBuL\ncVECVQjbu2mmA5WCxkqMnpTAwfl+MDgUqyeZll+UY6YoX94vI+agBvmfKwHcUrfKqEjoMYpY\n8yZPCqBg1ANyqzds8H1pjJnAZieChxmoZsoeG3e49KeuA2MbQRnn1pvlvCNoXdu5+lACR43b\nTnGOpoCiRgN24+1LtboD+dIV+UFTtOe1HQYrKUBwCw9O9Rt90hAd1Lhly2T6CljZjjaMYPJo\nCw2RmIUbckCjBwpBwcc5pxIaYk9SKZwvuf71Ah7KvmBmbAxjiiNSrAAAqOM0iqFb5vmJ7mmM\nu1QxPOaADY2XzwM/lRJ8sOUbMnf0oM2WBJJzQrNgt26YoECq6kDPXqaSVSzhl5CjFBDRruX7\n1N3Fvmfgn+7QUgk25CNu9eKVWMw2DA96CxdP0yKTaygYUAf3qe4DQjSSFfu7e9P8wY2/zoba\n7hmYgDtSMQwK4+bsaXUQm3zSN3GOcUrKMjDDHseaWTDdVI4wMUyT93jahJH3iaYgb/WBmPy/\n3aXcAudvPqaWQrt6bj1BpjKNy5fjrikMdIvlhfRuKZt6nqOlJj5SpfcxPHsKTlQRTsAuArcj\noOBRuVGByRn+GkaR1X5sMvr3oXGzknr0xQMPlQNvBDHgGkw0S/PzinsQw2bTuHINIzMpyxHT\nn3oExpYrk4y55z6Cmq248cMepqRcN7bhjJpiqI15JDdKQuojM+4EHHahT93ac5PanNzle470\nbRGzKCNvYe9ADG3RsuRvJ55py48zBHyAcnrzQrGNVyM4457UoyuSlPqAitHHkFtvHApykxkK\np3MwyajWONnySQO3FPj/ANYSeQOGHtTGAVSuF5DHG6kOY1ZSDgHg0m394VVuh+8KdgqxBJYH\n1oEJ65+9/OmAgggcY5p7AFOuHzxSbvlO1eOhJoGR5KuJM49u1ODM67VHXnmk5I2sPu8+1Sbm\nZMkYB5x3pDGMxG0g4I6+tAwzEknHvQyhQCnPOcelDNvky1MQyNvNJ2Arj1p8ZLLtxx1pGXIJ\nHDelPjy0m4DAxjFAxGYLuCjtk0wNsZQBjjPNOXEkmGGO1Nlb0HC9M0CYbm3cKHY+hpdrltik\nD1NMV9y8Jz+VKqfvCXPJGM+/pQApULu3ncR0xS+Y7bVPypt6ikXKrhW570pYkAN69BQAkmZN\noU5YDO319qbNgsm4cjkr6e1OVtrEqCTQsY3Z6E8nJoAZM26TAHykZApIzuYOjHOehp7MsihS\npAzwaRVHDBslu5pCBvMkXJOHz6UhYKM4CjocU8KDlt+XU4AHShgI1dT8+TwO4pWAYOTxwPQU\nqkKzbSQP7pobLMABtXFCxk5AbA7E1QDdgST1OM4pd6MMsMN603aFxzg/3j3pxVdpJ5PpSGDQ\nlTknfx2pOnlnOSwz9KerBVDL97+77U1cSMSX+XGAKYgWNTISpPPekMfCkHfn+GnrtXBB5AzT\nWzKofoB096BiR/MAMfuwfu0MuzMnfOAKVx5hHzhXxnNIyllyTye1IBolCruI3LnB+tLkEZIx\n2xTGKqzRgYGM5qWNfMdVZsH+GjqFgRtrcNtGKAzK3I3KetRssnmNuYEKe1DMWwScJ0piFbIT\n5BtBOBS8+WRuxzggDvTT823sF6GnMd0nJ4xyOxoASb5iCv0ojYQ7uN27ilbCYTduX09KXcF4\nHUHNACKD5a7QCMcjuKGVsquQwPSo8fxq2GzyPWnYK7kVsbTkGgB207sLzt9agc5UkjLeverS\nxSSdCB3x70NCVxhfnPUUAZrLwM8U0SnftBz6GtGSAtGNuGPcetVJLV2bAj2E+tAEBUKGXOSe\npFPVRtxt/Gp1siBtbIJ4yKfDakbt5yq5FMBVjZfcgcY5pWYyYzx+PNCxiOQc7Tjrninsu5WJ\n4IpAMkdo4gARvLZFHmsyjJUMeTTc7WV9m49KRtr/ADFD8pxQA5S/zJnaTzSndwCQP6UxSec8\nEnr7U5VKqWByuc7aAFuP9WP4ufmIoOFdiy/KV4/HpTmk3ruGCO4pGbr/ABDGDQA2Tc2xkToM\nVJtPfJcjpTfMIdAvIxSMxDMwJU4pDE+9kNnjjIPenbdq88pjH0NMaIsm5GO3rz3oO8IcMMHq\nPagGK0i4B6jpt9aeZGxkA5PAHpUTEk4U88Y+lTbm65wFGTTERpGzSFh8xA6CkVtzAICHPHPr\nSxuFzsyGbv6Zp/mMDzglR97FArsT5jKOOKXcDuUrtbHFRLJ6EuWOcdMU+ZSv3+CDk0AmJCrs\noJTDdAuaVUCIoK7ZG60ZRhv+YNnimmTdjI74zQMkZQ6h+6n7uOtNZn3kbtpIzRw6kdR04NKc\nbQMfIo4Y0AMyW2Z5Gce9Na48lmIXcQf0p0bAxtkHc3SmKNqhCMuDk+9AD+VjEg+6x6U9W3Mx\nI6Cmbc9R8uc57U/jyyV+XNAxscahSVPB5JbtS4VsHP8A9ekYg8gZ4xTjiNQNuSOTQIjIPLJ8\nx7U5nljdTsUqRz6il+Vt4RinGQMd6b5Y65y/QikA1Tt3tt3D2pxUtGoUgilUeWgAHBOBUbNu\nYqvHamTcd95SqDB/nS7dqn5flxz70nO5NpAGOtSrnaRQBDtLAGM4C9qf97lV+ZuP/r0rK27A\n4prBplIHDLyFHekNDsGTK556HPegERrtC5xSsfM2jPzkdO9MUvtZ3G05xgUDFYIsaqBgnrnq\naFULghvbbRuVuoJbHGO1O3BmweMLQIXcvRTn1pq4YlWb5ccCmtk442J60qwlsjOM96YAU8sD\n+EEdaI40Uhuo9KUOBGdwyoOBml37cYG7tTQgLFVYdQORTPMDuTgAN6U/5lYkjIPHvTY4scZ6\nc5pAPkjJXAJG3B49KZ92RieSegzQkj7WBGABnPqKFcFirL9MUDHRwlyTwGHanbXeRl3bGXvT\nR8rKOgpvzNMxL43HrQMk27tpAwP7v9aJtqtnOVHp60KuX4yUYcN7Ui7BtAGcHkZoEJ+7Vsld\n/wDOpCWwD94+tM8xt2CnGeDTsOqkr83t3oGHyRhmP8QwR6UgYttC/TdTVm3A569xTsgKvzHb\nnOQKBCSBo27K/r60/wAsyKCRweuev0qIxySPtI3DOQ2elMaMCQvIx46c96A1LLjY2AmD39KA\nGB3bOPSo3Y7UPY9ee9PkUlQzkow6EdKQxCpYqVOGzxTt4j8zIy2MHHrTlZJPmBwcfnUe4BuR\nx60xMbFnaN/Ix0qaNo9zbQQMcGmnDNtB696TauSc4Cn9aCkORvMViqHOeAaYwDSBt+w9PpU4\nkG8nouKhbEr7Nn8VBLJmYMoXGwj+L1qJfLg38B3I4qRxyVbr0GOlMTbFHjAyf4jSEEf3sNhh\ntyB706NFMfOQx6/4UmBHsIILZ6dfxqXcVkJYYT+dCEMkjX76LtGMYpQo8hTg7t2DQ6JuyDls\nZ29aPMMS8fMOppgSOxZUQgFs5zSGQcsvBzSTKF27Tg44FGGiGCPvHmgdgXC4JPy9akZlVto4\nLc4x2qJs7SoG5c5NJsKtkHPf6CpKJWG+EcE4OAtEe5WGRtIPentuXaFJbuQB2oZvNYEjame9\nO4hhkO/YfmGan8xYsbiCOwpmQHYg7QeBxTpFXbGyDcc4NJiBpdrZYlj6CpPJLLgcZ6Go3YJJ\nnq2KIWY43Erk44qhjZQygZ+Zc4JzT4wysMHjpnuaeI1bczLjb/Fnin7FEYYHcTzSGO3KGUEH\nIP4UpVQW2cL39qI+VXqWJ9KbvIjMblULNwc1IxWKqo79t1PjVt20ng9McU04hQKT15Ax1qVZ\nDgGRTjH5UxAuWLIX+ZafJhAoDFsjsKTgthDx70rKYWLZwlAA2z+BSQRyTSRgOg3/ADimvIJH\nCLxxnjvUvKqoxg44o0J6jtuVYqOSe9Cxr95xk4xx2oDHqV9qVipjLqMEUxlMrtcr1B7+1QPj\ncck7R09quOwuFyo2n6VXkjLKSOMUBcp3EajBJIb2qsyyKpKnGattH5g9PrVaRgqsTnGeKpCI\nmZo3BfBTH8PXNQS/eO75Rjg1NuG7avI6kntUcsZZVbO4etUIg2lW/eHc2OFz0pW3xfORz1zi\nlZcxtIF9s03znePYXG30oAbId2XPPPI9arSSsZmIHljGCpq4rxxyL5g3jrtqrcmOSbcnXPQ0\nDIY8BgW+Y0/zCqlQAMnrTGzhmA5JqUxo0eWOT2FSA77qhCwI7molWPdvlJJxx7+1PjZUjUYy\nQfSkZdpKn5l6j60gsM3Ii7tu1Afu+9LteDLBsBhnHpTpI92MjnGStNyjKWkfaV4AAzxQMjWQ\nMxJUsuOTildgoKrgp2NLM20BgOPSl+XzArjMXfFAEB/eYwNvGCKkjYbkUk/L0z/Kn4+ZmQcD\noKgyV+Zjk5wKAJdvyBU+c55psiyyN5b/ACgcjHSkhwZGKfI3Qg1Ivmb2HX680wY6GR02srHe\nDgVZhuYlaSSZC8rDauOgJqmd6hWzld2M1ZiYRszMuQvVaA3IwDhhncCM9ad5M0MRG0uByPT6\nZpqsrbymQJDnHpUv2gwpsEu4Z+4emaoQtr5MYdrs4JXIxzz6U2ZPLOFOd3Idf5VBIRJkv8o9\nfepVneFgV2k4+61SA9MfIXY56kelaMNiW0WfUN4CCTYYV5/Gs8eZH5byRgbhyF7CmzSR/aFS\nJmS3Y/MueKYidXCgNG3lyY+9mrcV9HC6BMkkdfeqatAuc8jGBTlzsBO3axxjvQB0K3ANwkuf\nIXGGPqa1LPUljm2Pzn7h9TXNWd1HPbGKd8BOnHBp324GbbnMaU7gegf2p5hhjizHMfvHtSzM\n0cbOxwxOB9a45dVSOONoZGe5U5IYccVej1m6vv307h3Y4UgYFAjptPuvPj8mXhwfvEcVdSKS\nNy8ki7R91Qc1zH2qZpg8R+cnaCfSta3ZpGUTypAn+0epqWKxqrcrtGwlWHLehqPLM24jYrnj\nNU8n95EH3Ln5WAq4s0X2DMsnzxn5Se/tU9SiEKsbA5JDZ4FWIYZrh1jgfLtwE96pxu9wOjRP\n2qxbLFb4lkbEinchB60yrG1caTLFaIkwBmHBb0NQrrF/Fpf9lxvstXbe8f8AeYY5/Ssu78Qa\nhdSOGmI44OOlRNcTPEVdt7quN1IC+biW1ZQfvkYG7v75qs8n2y4EDfLPIcKSflqs07yKI5Tl\nV6bqTzPMUSBv3ing+lILF4xvGriRmj24CqOM02KVfMHmysS/QZ7UserSNDJAYUdG5MjckfSq\n11IjJG6qN27AxQhk0LZmdY3EZXoH70+O6d5PmVducEjvVM27PIHZwSepx0oj3wsdjZRzg8UA\nXVaNrokRDYnAYGpP7Oljum8ob9wyVzVG3m8lpFdN/feO3vVn+1NrIyfvSOcigBJF2t85IPRV\nx3qsuY925SX745q1cSSzKLqVMox7dqrnUUkYMI9qg4zRcBfNZriPymwF6jNDSNNO5O7d7dKR\nvILA7jgn0olkMLbUfbnkr6CkAhV1jLbcL3PWmK6hfNkYKHH4imx6lI0YhkYPGGyvFLdWqeWp\nD+YRyQO1AE63TNbsjD5R91qRmRIFKtlnHOO3tUMIRUZ5X2pjIBqK3vIo/k8oy5GFI7e9AWLi\nXDLauA/Cjhc1Vvb55LeNimW/uHgVUkuJJWIC4A4AHc1DdTzrMsbk4xk8UWAvw3yRIfMhXzG/\nizyKheQM3Xy/Tiq8LRtuMjg7eq96dPIFQPkNzg96YBIBt3byxXnFVplbcWVgrsM024uVjfK8\nOBkjtUbXZk+ZwEk6jikBbs2h8tNrOJScHcaJljEjxXEoVuoJqjFdK0m9F3Sfxc/ypl3ctLNi\nRQWT7xz0osBYeRFhVoXEjZwQaYbrcVyVUt0HaqhuflKIAxJ4x6VA9wFt0Vk3nOAw7U7AXMtN\nJJvJOOPlPFNj2+WS7HbngGqq3DQgAEhWH3veo/NldUVj8rk4FICzNJ5is6lQVHU9hVTzGXad\nxLMOtNDHhSMY69xU91Nb3TRtFHs2DDkHrTGR3EjMyowwi/kfemyXzZEe/wCTsuKheZjIV+8B\nxSF41QiQZlJwp7UwYgmdicBQnTbnk0m6K3GGOd1Q3En+kELGQyjHt70qhdud2/jrikBOJPLj\n3oobnnd6Un2gy4UjC9QO59qdshXDTSgqP4R3pJI0kjEsbYn3cJ6CmAWaKZGVm2seAev4VD9q\n8pZ4EVWbOd2eale1drXJOyVySOaSSAxxRrtXzGOCwpg9yKSSOVYmjj2YHzse5qSzli+1K0qA\n7DnBHB9qg8t42JPzFhy3qKnhV1hJWInccgelLqBf1TXEv3iCWcdqY/7ozn61myMk+CrCQ+q9\nqsw27yMVyD8vJA5zTlt5IQsewbj3UUDGSRbo8Z5AHA71NE0lwyquUQDHI706HZCuAMyZ+9/S\nmtN9+OUMoI6imIdLbjaVxlhz171LIWaSN4k+baAxqhDJHbxsASWHRSf1p6XTSMGDbeOcVI7i\nzAhhEynOdxxUb5MjKvyIR94mlF9IzKpUlv71U7iaGNWkDh2ZsAGkBPHMqxgEZUHls9aikh3b\ntvAzkNnt9KYuphYFBhV16nB70rTQ3aiRw2TwNpxj2qkSRLIPMG+TlT96k8xZGdl4yfvUk6I2\nJIlBZTgqetV5JDJlFXZnqT1FMonZw0oyAQvb1qNl2ofm25+Y7j2qNbpocsi7gBt3MOeacfOn\nijJVSFH3j3oEK64XfG2eKasblACMbulRxbJIxIWKbmxt9Kk8t/nAcsf4fegRG+V8tlQ56Hmn\noo81vu7c4IpUiVS0eSTtBJ9KtwxwszIw4OME9zQMq5+fBUoue9WzH828EhCO9X7W3i81Y5xh\nTxuUZxWpcQ29nbxjyxKrH5Sev4ipbsMxrXS2uBHlOWGSV9K3dH0U+cRAjP6A9c1oWemy3Sot\nshU4x06/QV9Gfs6fsy6h8TNYgzBJHYwkPLd4O1evA9+vHvWUp2KjFnOfA34A618Rtahgtbfd\nK55aReEXoxJ7cGv0s+CnwJ8P/B3QRHZ28UuqSAGa7ZfmJHYe1dD4D8A6V8ONFi07TLeKNlA8\nydVG92Hc/wCFdGZN2TXK25O5vayJo5gykHrSs20c1BGxx0pwYtkYpAL94jpinfxBabtGORT8\n7sYoKEZD68UKu1hzgUrL82M5FKzAALjvQIcyfNSrkr0xTOSc+9Lkrk0hj+e3XvTvu9Kj3Fct\n69qEYnJ/KgGPZSecUqt8uO9NjZtvzUBhz60AmP4K9eaXHr+FMx8vSlVsnk0DFbpnvQvzL6Uv\nC9eRTerYpgP+6uCaU/d4PNIvoRTvpSAcuOmMmhcHrTFcr0peWXng0AC96Vs9+lM3fLx1p4zI\nuM4oAOeOeKXcN2D0o+8euKbjnkZNDAerHOOgpQTuJxwKavHvTv4etNjBgWUmmqx25K0q8qcU\n7nbz0qQE3My8HipFbjioo8DJzUgYBeOtWIGHOKdyq0xmPGRmlX9KQx27ceOKXBxntTOFXnrS\nq2FwTSGOX5+O1LIwRSQuTQzdAOKRT1FAA33QcdqVcbRxg0m7ctK3zHFBNh+4N0pG3ZPp600s\nEQDGT3pdz4xjiqGHHSnLn14oPGPpTc44pjJOKKTaKKCCm7FdhHXoacxbdyaaRvPTpS5znNZG\ngqL8rMTz2FJ5hXtSLjdilzuJ449aABcnnqaP4if0NL/ECOtIx4yetAhqpwWoU85xmlbPl8cU\nnoAaYDd21SccmojL82Ap/pU3GNvem5O7GOKAIpkEoB7e9SeQOCOlOZRxinbeODxSAjb5VOBT\nzwoIFL91KazdFFWMVV74xSt83U5x1prMV5LYFAXbyTnNSA7sTimuAqdeaCxzhaRl454oJGLh\nl96kbpxQvoBxTGLA5PIpDHsvmMD2Apn8QHalaTEfAzS/wg0AOXHPFOC8kmmk4460rEsuOlFw\nEGGXrnFBbjI6U1V2gjFKuFXB5p9BWF9gaNuOtLnd7GgYXPvSQxN3+zStj8aU/cNMVSSDnGaY\nCsvyjB470BVX7vTvS4w3TNJuG3OKQXG7ctwaWPPPb3pS3y8cUmBtG2miRQPlA70udnWhvmHH\nFIqn1yKYCLgNn+KnNnd7UNgEcUcsevFSwArjPOM0R4Uc5pGJ4ApFXa3PNBQpcEkqKbncMYxS\nFiAR0FKrZWmSIMcg0q4zjNKx29qQbfSkA1n560M2OR0pjqVyc8VW8xl4xyaB6llJA3GM07bt\nIqKGNuucVMzbevNAhu35iQPw9aQxRswLj5qXdt78HtTuC3pSAQ4AwBgUqMEbsTSEjPWmcZyO\ntMC1G53A9Bnn6V83/tBWJh8TSH+FwG4r6Pi+ZeR7k14j+0Zpqx3FrcqSxlTn2xT6ko+UvEUa\nrIzd8Zrk9QYMFLlgvBrutYVJGbco3dCPeuF1ON8ucfKD93tVobOdvrgm4cBdq+tUm+6xzkEV\neumEYJIyrVQdTgbOhPIrQzZRvYh8gQjA56VmMB5jsMBelak38W7rnArNVVyygAc85prQyZE0\ngkUKO3Wsu4Y4O0YGfvVfm2ruLce2KqTRpIoIB2HirEZ1w3zBlQcUyPOMnFXJLXYDGx4qpIqx\ngxqelUAhbzGY45UVWnKXCsDkLjtU/EUeeSTxVXzDyD93070wGxxkQhgCBjHNRMflBY7j6irL\nTLtUo+dx2/SoZlTzGG7bimBUkjDFmdg0ecgAdKYqiFvlb73T2qVVMasCRk9R601VURlXXL9R\nQIiuMW7AZyDySKhDLMdwGB71M0YaM7W59G5qJU3tuznHUUxhcfvI1w2CBUMzZjAY5z2qdsLu\n75HAqOaMbV3/AHsUCsQ+WY1C496h2s0mAMrU6oZGDK+QOoNRhTtY4+XPbrSFYcrDaAysWzgV\nHJHsBJPOe9SRgMGXJBAyM1BMoPzg4YHnd0oHYG3ecvGwntTto8z1Wo5Fk2iY/OD8vvT/ADF3\nKo+Xsc1Qh0qloQIyDg8GqbMxcA9ScbhV2VR91eAOcDvUPl/KT9wk8ZqgISrKfL3fMvah/lZW\n9+R605FVXyGyw5NOj8ySQkjBPTIpAPhURsSE3bv4fSlmwWA+971ZWFoWCyMPmFRzKNhA9eWp\nXAoSSNHMQp+UiosvuG0EnpirF8pXBVenWoUY7lyx6/nSEEisjLjGO9RENuLKOAak4G7r97OK\ndtLMWX8VpgMXZI7ADZxkmo2IjIUngipzGO/PfbUP+szuHSgBrIeCcsRzSx7lZifm449qGY7w\nRz/hSspOCrcfyp7gNX/V4P389KRshgTxQV5yTmjDMxU9uRQMWPLMys3HWkjZNnJzg8e9JyVI\nQHdRHC4iaTHAPWgQzcCpOfnXgGk4MXT96af7n5e/1pZlVsOrZb+7QBEyLuB/ixinSYypOSPT\nFG0thiNpzimNuizk5HvQAjbM5UFueB6UgXb8u/5ic7acp/dqwThjgk9qbw0jr/COlIAkVt3J\n57AUxSxXDNhuuKc2GK4bAx909acyNKAqhV96pFIRgwwHO1eoo8xmBiPPcY7UcMgGNxXgk05W\nAY7U/E0hDEJ8vBOX75pzxlk+8Bj0601WLb8gHPOV/lSr87K/GOmKBCNI2zBzkcijzGZeDx33\nGpGyrNu7UxVVjwPlxnNACNmRQAAuKQAM68cAd6T73OcnpxTvM3MAEyOh+tAyLafMLP8AL2GK\nkEZUZc4XpmnMV+bBBPQKe1M2tuyzdulAhNwXO3nt9aTBZgrHA9RTuD8xOCBUSyfLznrVDHbQ\nuQTnJ4NDrk5HIHpTssqkAZ74pI/9Xhjg5ycUgF5Zd2OMdKazCQ5IPSjcY9z4yBxgUkjDj5vl\n9cUxMbtKyYxncKd8wBAIB6imsd3zZ+7096cAGUEDApANaR2+ZsEDqAKcqHcSRgFcjmjb5ecN\nnPWk4f7xOe2KAEVeFDnHOeaXc2WZu3PHekkO4AAZPpTnbaxZR8uMEUximLeu5V2qRnj1pm0y\ncg9KGwqhWZiOoxT1APIG3byRS1AYzA4BIYD+dNkbzMj+L2ok2qpyMDrnvSLsVAzHBbvRqId5\nhCqgHB6+tOWQMw67unTj2pu4tg8ZHFLuZQS43HsFoGRqPmK9weSDT2Yb8BePQ96b2wDljz06\n04tKxBwqgcc0CHSYG3H60wHfxu2ehpdpwF6nOcU3y2DHfgZ7CgYu04wTls0MoZCu4kZ5NCkh\nTICWxxxTvM8xSowB14pgRMvzYJ+X2pxJ3BVXKjn6UzbtbzVPtUiqTyzHPcigBoZhnjbk80pV\nTt3DJ7MDSLlm64/2qZsKyLznn/JpAOdsKBj5ielP8tW+XjIHQ0j4LEH86RvmVdvDd6BCKOcs\ny7V6CkKgrheBjvT1U84xg9z3pI1+Uk9uMUhiGMrCGz8xOOKa2dpYkZzmlVvMjO0/xU5YlRiw\nGW+vFAhj9sHLdaSSQyfKFIPqtK23dnaWbHNKjNGoYcMaoAkwyhvvKOPxpPM3KQR+8zwKGUeX\n8zYz1FJkqylVyOxoAVmG4AjaTxmmsqsuxB82cU6SQs2cYGfxpeFU/LyOc0h2GEMq4HA6H3FP\n3YXZ/wAsyOBTTH5jcNgA0NG6tn+HtQA5UCcL864/KmebsII4fHSlUfIwb5AaYuGl244HQ0xE\njHbhiO2cetNVl3528nn6UFZArAjd70jNgbgDnGKkBWk2j7ue9Mkw5D/dUcmpsfu+vbmmKu3O\nD8vU1QCeXubBOM8ilZSp7YPBpY1H3xjYenNNy7ZXGTnimABdrbe2Kcq7kJ6eppr7o25H1pXU\nbSPxzSHYZtDMGIxg0+OMhmzyGYk0oyOcY46Uo+bcA+Rng4pDJFEvmLzgDvViJv3hDcluA1VW\nztPO7jGPT3p8QVF3bsKBTEWlQRMyMB/SmyTL8q7Mn+9VXz3kww5WnLMX+TZkdaBCSdwGJOfy\noddsed+fUU2XEUm/769wKQTKshXpxn2oARjF82471P8ADURYt8yr+7XtRIWjIYICjfzo2ssT\n4ONw6UAG7DLtGQ3PuKe3yqw+7k8rUajaqtntjHvSH5ZBuOWJoAk8sFcryR1BoXbuyOgHamKw\nYM65BBwVpqSeXuyBk8k+lIBW2opQcM3NCq6KBjtSPtbbwXZvw4p2TuPzdKYCiQbWIU7ulBbP\nB7ikb5gu1gC3tSqFZgG4bH50mAjcKo3Z9AO1ObaoGcZPoabu+YBQCaTcWkyy4XPTFMBzKI2G\nDtNO/iAx+Z60m5l5baFJ7nmmOu7JU/hQA8HccEBT6VHGpDcnIB6VIoXcOcsOfpTdirlw2cnp\nQKwn3d+fu+3WkXdJ5akjrx/hTztdCF+V+9J5YjdNpwRQFh7c3DA4Ur1Wo2BMnQbW9KmaPBb5\nwvce/tSSLlV2sAT/AAikA1VWNwNuaY2Y8qxBX0FP3BJgoUk96ZtIc/XFBQ9ZCrrxkHAFN+Tz\nHbJVuhzSrGEJw2cdqOrcr0OaYhvIUKrZz3Panhdx5GaRsEHPPPSl2oBuyVoAZDI3lnKc5wKd\nu2/d5bqaXb1JPyjnIpFbggHk89O1ADWbcq5Hz5zTt3llmX5g3B+tG7bwTkmkkUtFgrkj7v8A\n9egAWFoeXcFSM8GmRMBt28q/XPWpXBjhjDLvjbmmdVRiPn6AUAJ8qyCNAdpHJpYyY3xnvTVa\nRWJIBHTNORlVicbv8aQrCsu5i4/h5NOVhE2/ONw4qPLIDxkGkXPlg5z9aBEolbcFGAy85I7U\nrq4kLcFSajml3tlSBxg+4p8JBbKn5T3PamWIoZM5BY5+lLuVmx973HFObLBmY5A4C+tVmVlP\nzc/TtQJk6szSKGHy0m9m4wSeuaWNvMAwcBfXvQ2ZMHO0dPrSBicsM8AnpnsaBKd0byLu2jnH\nrTZcCRflz9KkbLbQCD2piG87y+/J6gUkhMu0phX70q4WRWC8Y6e9Hy7pA3DZxilqMIyI9zSZ\n54203L7SDtXccjHUUvy5C8nBxRJuZd+77pxnFMBYw2JPlDoBktTspGocLktUccZRjjgk5qZf\nlByOeoHrQANH8hbG3PQZqNDhTk5ankFsgHBBzxQAPM5GfWgBNhaTcxwqjp60u0B/TPOf6UjY\nZsbflpfusdvOe1ArEeP3jSD5RjGKcxK4CY47d6cj7cMVBOe1NyWRieJA33u9AyVpBtBI59vW\nmMy7eQCM9DSqm6ORdwMmOKGYKEZlHyjk0uoApZpuU+XHAo3BsqDuHdaHO5RuJBJzjHanxqu0\nBBtGe9MBqv8A7PH9ajVh95u3UYp6tuYvjaueFpdrGQs7AJj7o70CGxt8obtmpG24JPy/5602\nNU2nOQoOcVMzD5RjduHGe/tQMiXKk7kyCPzpWDTOSfkXufShdysSc4H6e1Ksgbcc8GgkayBU\n2Bsc07duUfKDx+GajWbDBNuSDxxTmUovy9c520DJWjeSI7OGB/MUnzSNtb7xHH0ojYiJAh+b\noRT33b84zgfnSAY0ex08rhOhH9aWTarEf3eg9aRcyLszt3d6Xy41Vsr8w4HNFwFh+cK2Pm5q\nRo9mxmb5m5OaiKq0K/NtOcmpAoeMbQz7aZQfxZGcDtSqu1R/CScgUjb5V2qxYjnFOWToG+5j\nsO9InqO+0bZCCpbIx16U2MAqxJ6fwmm27F5NvRe4qQqrFOCFNAxIy38RyMdPSpPLbYcEr6Ux\nQySHH3c1PKzKNrNnjtSCw1oxI27sn61KrecucBMc1CxDDa59/rSwL8hJO0DnFUASShSG2sy5\n596d5gbc6g4PHNN3ErhR3yTUy/6sBMbM85pC1uOjZlEZPToKWWMSSscAVHCx8wrzj0Aq0tuY\n1DSAAt70DI2UEqRy2ePapv8Alp8+SCMVEq7tzP8Acz2pNrdFPHamKxPHbllHI2qevekydhTd\nwDnBp8cMknC8r1/GlmYRqhdMPu5wKQWGKvlMHVd4btT2kd+APw9KZH5rNIQcx54AqUZVlG3m\nkBGJCRjAC+pp7MPLAHApqsNzfWpVRgxJIIA600BHu4AGBjqcVUYCNj3yfWrZYKrRucFhkZqr\nIq+UrNwDwRTAqTMfIbbjdnAFVWZztAXeVHIFW5Y0Vt20+gwelQyKFQsDt/vUuoihKojyobEr\nckU1gyjaRlvSppogwZVBBz96opMSFUDdBz71QEIljWNo2U8nO7NQtGY5iVYFMVcRUkJEq8Ed\narLCyz9Mw9BmqAh3Dkk5HoetQysGkUKuAfzqdomZHddv3sDmq0kbhgvIfPJqRDk+XcCcnoPr\nUkiMkIOF3HuDTI+HK5+vFHl5YM3KfzqWMkikChiy/wANMaNMo3IRud/Xmn+WZgCCBz932qJg\n8Ssi/OpJx7UyhZF/erID8w6qaW3USTuSOoNJtG4MiMRjnPSnq3GEjw3tQBWZdvy5PB+7Uw2e\naPlO3HNKzAsflw3Yn1pkkkvCkDPr60CsOhj8+4CICu7oT+lRYZZpFdcInBz609JXtbiFz0U5\nz6VZ1vJvY5lTfHMm8GkxlK28pt2/JJPWpC3kqvlsd2cD6VF5efmLFQe3pU6lY8EESMOgPWmA\nyOISSx7pCyHkr6HtSz+Z5wYjyz0IJ60yTEjb1+XHG4dverX290RndFkJ4DEc0DRA/wA5AC7O\n/tTYpT5RilTAJ+9jpSKziMlvmdjT2jdZArnJx1pkiLvaEsqbVQ4DetSZMsCkBS+7JU9SKdI3\n2lNucKvWo4bUySDYW9RSESs4DRpna2PwFI0Y3TcbmUfeqM7lbD87egqxKrt8q4Kty3+FA7Ec\nc23BypaTsR096RmWTgyBWU/5NS2F1HpN5ukt1ukxgJ3FRtM7qxeHKOTjav3famKxoCUC2DLF\nvRhgH3pu59o3rhSecCoI7UmH5PMVI+OehqSZpNsa55z0/pTQEkLyLvMQyo4OTWjpsyhgjuA3\nXPYVliPyoC0hPmhucdAPSjzEEfmx/MnU0gOii1tvM+zYASM8tjrV2W4lmSQDGc7eT0HtWBDb\n7VjnY+ZDONxHpTrXUUibbcIzx5wNvUH1pAb8GrTrgySBHU7VrXhu47uBvMlUSg8j+tcPHN5d\n4zvmSH+FPetOLUEReI8MaAsdUNRMVuUD7lxgfWqxkLQhjnGee+DWTZ30bQ+TJzvOUf0qdLr7\nHNvjmD7OqnoaRRrx72y+3IA+960rM3mNIWKq3BwKzY/EsrMY4wY4m5ZSM1O2qLJ8yP5bHoPa\nmBcfM6oCvG7g04gx7h1PcVmJrUnnIyASQEYZQO/rVp7xdpUfdzncakZYjnliYNEvyYwR1qzd\nXyT+QlvAsXOD3GaqRX8M04QJtVhghT+tW9NsbK6jumOoxwzRZ227Hk4/xoAdBuDMSVX5Tyem\nahNxdCwVhDlQcsy1lT30dwu0yGMY4B6Z7VNHq00dvFCAAijDHPWkwNGGQkNskHzcmkVbebdt\nlEJUfePTNZn26GNXwSA38R7U28aCWFVgl3qwwT/WmwL0l68aPAr74244Pf1qukaIYwZNp6fj\nVVWNspQMCT09jUTOsNwh3c9Sc96SA1o7iJYfLHE4bBY/zFDyDzQEkV3HBNZ82orNGHlOHJwM\nDtVaO4+zSFkG5WbrTGbRu90nlFVEef4etVorhGD7HICsRiqF5ueQMG2c5OKm+a0jR0QFZBnr\nSJfkPmklZiEO4+ntUclw0agRMPO/i57UnHBd9u/NZ7NCkmUkPAoA0o9QeJ1ymcjPWq41Jprh\njKeBnC1WgV7iQbAeB19KXmGMkoJFZsM1AEz3SEgSfKzeg6/WmR3jWshwV2fdHOarG8j+Y7On\nyg1DJOzYJXt92mBdvrpLxlKgIOhx3NJIkgYsSCqjg1nreMYwixBecsTRcXXmqEByKBk27MYd\nB8w5qNpnkDMOXHJI7n0qq1w20ru4HQLU0My7QUTB6NQIkkky27Z8voOCDTJJGGWUHqDu9KbN\ndPDKy9f8PSq63DL5hHK45OelAEzZUBd4G7o1Nhumj3BIwWBwMmomjYIjgMwYZDGmecY5MvHk\nscHHX60DJWx8jurIW+XrUKOrOpB2Mjc+9WpLfFqk0koRc5CmqEkKySNg4yOD61QyV5IvN+/s\nLHO71ouWSRo9pBwfwNRvEFGCAVbA/GoNu3BUYAbGO9IRfEfnHJYbey1GEfyZEjwWY4p0doJC\njKTw3Jq55ZSV2XKLjhsUAVBAzNHiLLt8vXvTZFeDeR8nopq+lrIytIJPmHRj/nrU/wBh3s5l\nG59vQ9aQyhHavJ8srnZ1z/Spri3EWCuV/wAK14hbx6cdoxchsdM8VWltmMDsH255+bsO9Aim\nukmeRDBLn1Bqw1tJbgxtncvT39Kkt8cFTkAcY7+9LcTNeNFGWI5A4p9QImaS3dd6gORzSXE5\nVcjg+1bM3hS+tdMkvpWjCKcjcckj6Vi+dGHHnLguOnpQUE0kOwbVKjoW96jhw00UUi7pJPlV\n81BHIWwCcIDuZSM80txqSmZVXGFHy8dDRqAusaHeaDqH2e9UCI/MrpyDn3qLy/JjLIQzA/Lj\n+VWtQ8Z3GpRJb30OY4h8rDnmseS4Vvmjzs+8cUElm6uImYJlk3D5sdAapX8CqpkT94irlvU1\nH8/zHdkNyM9RTJZBGXO7GF5HrQA9Y1jhUo21Oh9jUUIZVCg7dxyOP4qWOd4cr5Pmq3zA9vxp\nJLiRpFK/Icbfp70xjuuZJP3b5yBTTMdoMZ3kn05qEzeZG8bDdg55qSWZyseeFbptFAEjeWzj\nByW45HQ0zaVmK87Rx16+9KsBb58sHziplhEjAEk7T1pCGRR7YQhTLZx9avHTxIFkLhExgjpS\nQ3IhmKJHvBGBI3Y08ZkkHnHcuMqqjj60AM8mCO1YxnzApKs2KljtWKoWAKfex3q3HYFbdP3g\n25zt7mr0Fusm9UU5xzRdDRVeHdErpnbnAwa6fwv4duNSt1TynlVnwFC5OT2HrW54F+Gt/wCJ\nNRtrOytJLp5TtWOJd2WOP8etfol+zj+xvY+DrWDV/FcKz3qcw2YOVjb1Y+tcs5GiWp5D+zV+\nxve69JBq3iSJrHRlIfynU+bKByAB2+vvX3noPh3TfCOjx6fpdslvbR9FUDP0JrQDBY1hUYjX\ngL0GKZ97jPSsNTYZgselJtLcA1KvzZOcDpTV+VeOtGoBuLLwKfuO4GkXOOT+FKv3cHrTAUOd\n3rTw2OBxTfucYpy55OM0gHZ+X3peCBTVXDbjxntTl+6aQwX9KAfm96RR8uM55oUfMaBD+2TR\n8rAZ4HWmr8vvSsDt4xSAXHfOKM9Djj1pN3y4xSqwPynpTAfnjPX0pB83OOaYp2544qSPOckU\nB0Gs20ZPOTjFPD7V5GT61HL22/eoXcOtAiZc8c0fdP3s03txS8dKChysOeKdznJpgPGMUbj3\n4o8hjudpOMGgDawNLzgGo+rYpWAlHy8HrSGZVOSM0H7tEag9etAxQxbkDr2pW4ak3DdihQNt\nGoDguCSDn2pGYso9KQdcjpTlxyO9AgVRuyRTlO5TxSllwAetJ2JBpjAehNOH3RTON3rS4wOT\nQA9aZu9RzSrRnPOOaAuOyTlj6UMSqj3qPcXOBTxiRs9OMc0AO2jaGp24dqZtwpO7ihc9CaBC\nsxZhxinN19RSBgpwacucYpldBy9T24pF5OMc9aTcfrQzFm4oAdvFFR0UagRbgqjuaUMCvIpr\nY+8KDJtUEjIqBhhVYkcijcMCjCgcUHG70oH0FZfSk27fek57dDTm+XFBJCwYtx0704D8qkdS\nqnmmY+XGaAFKhfm61GpKnHWn4OCc8+lIc7RjgmgBxUj2pqfLuz2pWJ288mk+8Cc9aAE+YnI6\ne9Jz1I/EUuflC0mCVIJIHtVB1GlQ3B5p0mFAxTl2np17UjfMKljYvBGcc0zjdS8quabuCtkr\nxTEObj7vShvu5pdwYccA0xTuXjpQA7+EbeRSFWz6ChX2naKAxI9RQA4dPenbT1JpikM3JxTt\n25uDkUgHZ3KfSm9uKbtG3k8Uqtk+1UMft5J7U1qXyyB14pF4ODSsIG+ZsjpR9R9KX7ophy3G\nTSEPycUbuvFLndjHSmtu8zcKLCF4LAngUH5eOopu0vmnfdX1oARgBwDTS4RTxTiBgZ601h8v\noaBsFzJ3+UUuTs4IyTTd2OB0pWXqQeKBAcjApfutSfdXLcikweGNA+gNhfegN0GKX7w4pei9\nKoENakjkLbhigMOx5psJ2yNk1PUQ9iGXGKY+IwSQD6UNn607yzkZ5oYDVJZeeDTmUFeaTpSh\nR3oAQkFMEcig4LUrEHB6U09zQAz7rZ6ikDDqRzTmYbSelCMrfNigCWOUMu31ryn9oKzkm02x\nuEDMApU4HA5r1eNvm+XoeK4z4zQef4Gl2nHlygmqFY+J9a3LdSMwwWbpXH37f6wEcE4r0HX7\nQNcMwIyOSK4LUlIkYj7rc1SEzk7yMJJtJJWqNwBHllbGO1a2pRv94n5RWTIqNzjNUZlKbY3L\n8DGc1lbjMXKDaPfvWpdMA2wdD2rPmBZdw4CnqO9Um7Ela8/1Qyu4gc4ql5bvGGU7QD92rz79\nxG3cp75quR5Zbby2OaogrM/yuzjLYwM1nlvlLNgmr11IWXZjK9SazpgQ20D5KtANdZQofGVb\ngDPIqouFJOWLqc5xV1m8v1Legqq8ibyqttz1zTQhhVWxmPBPzA1BdKDyPxFSN5hcZ/1Y/iqB\npvM391XoRQArQiRlOc+hqtJG4lIL5I5I9qsnb5aqOh55qDlmIVuOnIpiGbPKIOfcZHSopF2s\nQT8rHt3qzJGflVmyBTGQLkA5OOM1SH0KvzRjaGBweP8ACnY7ufmIpZMMFIG3b1OOKY33W3HL\nDkAGhkiFSz5GAgHaq8hdRwdozUjTfKoHGThvUUy4IbB+8KRQ1jxkg/X1qPaWkwfvZ5qdmbC8\ncKKZuEi7/uvQIYpKyMG5J4FOj5yDtwP4j61H5u2QE8kU9SFVlVc5OSDQIUszSAk4B4/+vUUk\ng2gkhj0/+vT5VGwKmU7k/wBKj42gYIYcdKoCGQs2CMbc9upq/DuZVJIH+z6VTjVI32g7snJW\nrdnMsVx5brgk8UFFry9rbCcjqN3emXeYI8KoU/xfSrurWZa3S4Xlk6gVj7Wut5+9xk81IivM\nrSEKMgZ5xVSTMYIJ3HPQ1qBWlVYgny4796V9HDW4cng9V9KBGcrYkGfuntTmOWIVtoxTBbtH\nu5OAeKd5a7SzHnt607gN2kcZycdaZuLR4+6gPJ70okZsYG09M0ziPOBvf3pgK8Z52An3pFby\n4yOpNT+YFwSeCMHFQMwVmAzj1pjFChl6getR7vm2np604bdg3Ac8Zpuz5WA69hQIRZPLBU8D\nPAqRnMi5zwPWmqrMMsq8DHPrRtEnU4K8mgBY/vc/MetRM+7PG7ng1IkgYkZxnoKbsO7czYI6\n+lAwyxwq9OppJFMzAk5UHn1pUbYxkPrxSNwzDPDc0xAgDZxwAeKYzgybW496VW3ZA69qRwcq\npXnvUjIpFKknHNSoxEWcbWI60TZfnv2FI2V2tnI9B2qgBWcRhep9aApL4zhccmmxsAN2N2eS\naWOPaxOCc8jNIQgUKpKfdJwRSsFG0EbBn71NVmdtwXbzgg1I/wC8XGePX0oAY2I2OG3k9DQc\n7QzcChAehbPYVJ5ayLsJxRqBAvsCM84pVk8v5R8ytUkhDKQRntxUTqGkypy2Pu96YCbVZxuX\ngcCnt+5TPbPSmgloSyncAac0gbHG5u4FMY2SQOqscDJxgULt4zyD2pfL3bSRhuopqhmIycY7\nGkADPzAqd38JoO5SMpxnrmlyd5LH5T0pvAXkkg9aNRCruSRlI/wpd2YgMZ5pPRWcjjg0c5Uk\nfL0OKYDeMHIxuOKT7rZ3YxxjsafuSOPJX5m/hobqowM9aQxEXc2SM8cCmvGG6LsKjJ+lStI0\nnQ4GMcVHww+9lRxTGEbCSMBRjJ696Y4dOVPttNOb7hVTg+vpQrNsV2GSOKQhwWT7y4YDnFLI\nSq7icMepqNoyRgMVLc8UrfKFGcjoaNQEbDSDHz560s2JPkPG3pTXx8hHA+tN3BVJ689fSmAr\nNvbccgdOOlOXAyM8n1prZbBB+QD86FjA5/ixnmkA5nJ+ZiKYxMi/Mm3HQZ605futuA9hSEYC\nkHJz+VMAGWcsnBx/FSKPlOTuJqRQPmJyRnjFNZFBGDhTSENGI4dgB3GlXIb5+McDHel+Zclj\n0/lS7SPmH8XPPamUhFVSGB6UyNmXJ3YB4207ktt703yTuLscYFIkTDScKMZOOaTcZGwvLLwW\np7Hdg5wcUirtU7SSe4o1AJnC7QBlW4z6GhfkhVsB5em4dqUjzFHzAAUxeGG37o6/40agLGqq\nwyTnvSsxZs7sc80isBuwpIz1pQ2/IXlvSjUBN21T5fBz0o2jbxyx70rfMq5G1ulNBXdk/cHp\n60uoxZN+VA4P86CrIpDfezk+1KVIkDEHB6EUIDncW+b3qhDRgtyMqeM0eW3GTtUcChssuMfN\nmmrgxk7jj0NLqA4bRv5PynpSRnPz9OOeaQqcl805V8zhRjjHSmAm794XI+9Ru2v1+U0bWX5e\n49aVQp+YjI6UadQI1Y/Mc5HvS4HJxuGcUrKYWKg7t1Iyv8wBAXrn3oARFZVZe/WlU4h9WHNC\n5UEMct7U1v3cmAw6UAOMzKQUAHHekVcqZO/vRuXYAR830pGjHGzOc9DQJir8se/H4USZAb+E\n9qX5vm42joQKU/MpJGSeOaBiDPyow3epo2RrlFLL3JenIwVRz9MVH8kjfMSScjFSA/j5if3i\nYxwaPPww5AbHpTVRgoAxxxSbvmwV7VQIkkZd3cgjBNJGDH8oXIpm3eyx84HNPjVmZhwOPvZp\nFBtKttTjvQZCFHPOafFs8k8gMOtRN8qgr8ysaYhzSbWbC/KVzUSlvMRtmOKfJtmbI+TtzRuO\ncD7poH0Gyhf4wxyegNDFWOWJROlLwrsRzgZoRSwJIBGelIkb8jMCOg7f1pzYO0sMkc0eQNvB\nwfekOGU5PHSgdgxtk3nIDdqSTZ94cn1pTKcKeTto3A5O3nrTARgM8bm9x0p+4tITjlhikDl5\nBxhcZ4pof90Sp+Y8c9qAJcZwVG7AqNR5igovIPWkaRvJGx8nODjtRs+TCjaO7UASqNx+XG7r\nzTN+F+Yck8ikZgBlVYgd6aucE43g876BCsoSX5umM808OgYkLuBHQUxpFZTj5geD603lVyp5\nxwO9AEisBwVznoBTGXbIpUbQaFbpKCeP4akXLL8y5Oc0hiZKhTj/AOvS73KsjY9aZhnAXcPl\nPy5pV+RmLct6etMBZJBGqhwSD/DSHAUgEdegpEX92cqTk/eJoKFtpQbD3zSECscFgMY4p2Wa\nPcw2DNRiQeWzOMLnFO2llxkDjPNAC7nWN3AGMdT1pIchSOrGlUyNkvyG4pvKxswPzA9qAH7Q\nuTjc1M2ruAJPI6U4Kd2VOV25JprM5UttG3HWmA6b92Ao4Y9VJprbgDkgcYApEKNJl85xwxpB\nuj+ZhlfpQAqSFUARPmA5JqVGZceaT8w4ApsY+U8MN3TilEZYlTJyRgHHSgBqb+ed+05xnge1\nOaTaMj69KZbxrsDY5zhjng09W3KwHHPFAEMbdQDjd1Bp+UjbYOvXPY01lLYPAP8AOlOOAF3D\n0piYMdrAsclj90U5toJO3I9KF/3eBzTmbgkDaTxQIZHbg9OOfSpbhhjY3BHQLTVkABB4oyW4\nOCT370igZ2J54VT1ppcOWKjawz81RuDuKFuhzT2QmLIdVB7UAxY5GZVBGfUijy8tgjHPGafJ\nGcLtOVIx8vamljv2PlwoyCKQhVJYMfunpQoRNoJy3X8aQMGyVHAGaNyjluT1FAxNpUKM8Zya\ndyjeYepOdvrTVIZXZkJPakCxybdzlTjJzTAXzJdoBAJbnjtT2b7pC546D+dEbru+UbhjAFOZ\nPlyOHFADVd2beQC/Tr2p27kndywwQabuUsBj8aft3HmPcR6UhdRq/e6bW7DNIud/T5iccmms\nokBByGHehUfd5ikAY6GmMfJiN8HIweSelDRllcE7T2xSsu1lfdkt364NN8vcyhW68saAHPM7\nKIyAr4+8BTZIzHtP3u5xSvKjOxHB+6DSrjhgR6NmgQ3cinJUru6HFI218AAlc4Oam3p8wL7l\n7YpB+8wFTag70CCNjH8pIfn7xpvPzHaQM0uEG5Cfl60MDJHkqdvRff3pajHH5PvAM57DtSKp\n3AfeHXNJygCEZX9aRo0EmeS2M4zTDUk8wyAtGVIA4NM3B2UHlgM+1PC4i6YJ7io8NGCoGT1z\nQMnjztZscNTTj+5+NN2SNg7SqY55qTglY8Z3Dr70CG/eb7hXnANSIwjYyOvyjjdmpUUnaCVy\noIJqoyBkZmyxB7UDF2rIxkB2jofWnMAq7N3B4yaajARr8mH/AL1PyJNyscr15oARcQylVX5Q\nOlP2h1GOp5waadu0ckHtSt95XIyOmBSAcse5XBYDA4UdacoZUXZwcUQxCJmYdCMUn3iqj5T3\nNLUBY8xQsynk9aajO0YwMBeTQoK5/jHTb609SVYKD83XbVCY47/Kyw56hqdwqIM5PamyTKz7\ng3zDqBTlPmsCV2lRmkMdtO0gHB75qViHwAnBHWo1IYZYZYmm7i0h+XOB60EjfusFbkZwDUis\nwV2644zilSVFkCMOGHH1ojYszKD8o7UxolVvuqY8Ej71RtBuORuBBzTlaRfvLnjFP3EKOeeg\nFIYquvzMOMelTSzCWFXfOV7CoZIzGuSNvPNPjUZY5yMcUEsRpMMhY4Vv4afJ8pBXt1pgZCpL\njJHTip4V53EheOpoKuSxzFOVBx1FMnmaZ9uCOM0uCrElxnHSmMrSRDaTjPOetIXUFf8AdgZw\nwOalZuj8jpwKJog8IC4T0J61IVEWGJDYGM560g1EeNSwYc5POKVvljIQnIPIIpqYYYwUOcg9\nqkuSuxVVhvzjdTAhmUTIkjjlTx65qJrcSDeTkDrxnmpW/d453YODTG/1jFW2LjnFO4FO5XLb\nACe/FVLhmkUYXuM4q80PlruX7pPXNQSQgScHaD2pElCaTLOzLnJ9eapuu0FFyG6/StMwDkPj\nFVpB5Z2DBU/rVhcqNlmR8YGMD60y6B8sjO498VIWRV2FipJ6EVBvWTzNpIPTI70wuRTER27A\nEjjv2NVuWwxYs23GKsMo8sEnOTg5qJQCWQKWA/i71IC8jaVwM8bvejlWJZcHp7VKkMCYVVJP\nU88USKdzFjvHYUFDfJLOAQeaWT93kMCTjFSSS7oVYj7tRNPhQ3vz6UDGbmVVUEgH1pVSWGTf\nu2jNKW8xsA8fep11C8OcjduGT7UCGySBgSRyDxTNpY7CfvDNP8pvJwo3YGc0NlY1JUjP8VIo\nibbNE8ZUptHc1oXExutFjwuw27BCvt/hVGSEGQPuLk/wjvUqyeSNzHczHBQ0wIuWXdn5c5B9\nKayjerFcFujetWVWCVZH8xoewGOKgjXoXOT25oAi8t1LKwwpNTLA6xhmAeNeBzUTDdvOTnPT\n0qS4eWSJCFwq44FBQ3zD1C4Oe/akdWEgVDu3fxUZVpM5PPTHakTPzc42etBA9snJJAVeDjua\nmXfDtCy7227gB/Koo5vtEJQL8wqGNR5hbJAUcnP6UCHmQlQ7bt+ckY6U928yExM+05DK1Jbs\n3KEg55BJpSrRyYkXco9ulMZJCq+USHBLHDA+tOF5NGzshIXOBxUUKgEjb83T2qSWZt4GMIvA\nPqaLCLb69PJp5tSq/MwYtjBqhJI2DgnDH71OWQLvYkAjjpmi4URqodSGbFAjQEU0sfmr9xBt\n9aiZfOhKBQrkdu9QqPJVf3jAvyRmkWQs4LvtyOD3NIo0bjU5V0mKGJ1AQbSuORVCOSWSTeCY\nwTy3UUs0O2MKxBLHgCrWn6nFahreaLoODjoaRJI8fltHmTe3arFtcWwXy5Ayzs2A4PArM+0A\nMAx3OxwG/u1djSGaGMiVC+cMCenvTAvbo7WCaJW8yXOQfeltd029pD5bIPm3VTkhLXQjVhz/\nAB/1qxNcCBS0jZTozetMGXbGYNIzNICu3ggZx7VHDeI07LymBWXYXy2chA2hSSy9/wAKfca4\nnmgCARSd09fekWjdWTFqqxsoRfU9ac14A2HdQT3zkVhC++0RgomCvVaga62tGTyynO2gRu3E\n1zbyb1fDY5YelQR3D7jL98k53d6X+17K8hCXBZTnBI6Gs+RhZ3A2SGRG5+gpAaf2iR2L4GzO\nStXEvlmjI2bR0yawpL0KivyFYUR6p5cIRj907uf5UDNW4nfynJOI1GKbb3xVQQAyL2HWspdR\ne9kkVImkUjcY054pr3MRlby0aIsMYJ6VLC5tTXglbzmcBMYxUJkXje3yN3zmqC3QhdwPmQjb\nzSSLNHbxIV+WQ8AimI05pFkijTcWXsRS/ajGCh+Y9KzI5ssqg544AqWW6i24xmXPNMpGgrNJ\nHGd21ged3NNumeOQbpCdo454IrPW7HmAbtp/MfnTDeySTLk7gRzupgWm1PfGCgyc8LmiGbzF\nLsnbBWoHuIssojWMD09aas2I8livbFAFuO/ezwY8nPDD1HpVyHVoJLcrHCWYjv0FYYuD5mxv\nmCjcKX7Zs4YFe25e9ITLTSIqlcZbOfao0aSYYTPGSTUCkBsB899pNH2ht4KBkLcDb0NIRLK6\nxrychurevtUe5FjbYS0XQ+o9qZiK4UbpVz1wacZvJXyYyoU8k+tBSE8xN4wNvHHpT2Kwkszf\neHA75pki/uDNIVjAONuagGZmVido/hJoESq424lbcw5/wpu4bzn5VYdqVlMatyvmHo3pUK5V\nS2fMfpmmNj/MlYIpJwv3cmnGZ5JDuODQlu4Ul+SDkMKdGit8h+997nuKVhEV5M8saiUZSMY2\njv70vlubcuiNt6BvSpo4RHu8xst/CfatC0VlhJH+qbqpoGUHhSZFDMSQvBHHNT21sY1DFPmx\njLVd8hVTaqc/eAJqUW9zJCylTnPQelK4Iigt1uJs/cCDkikvLdlcCKXIatrw/wCFbvVRIlu4\nHVjv4z7VevvAMljotxfSuytbsA4Y4zk44oKscpBE24tGPmB+bcauxszbwOXbn6Vr6L4Rj120\ncWd6sdyoz5cnG4+lRaj4S1rw3E0+qabcQwj5VmCEpz0+YetIRm2qiVGIG+TPKg0+VvMVQ6ZH\nYelUZJPKjXyGYHPznv8ASksJ3W8jikIaJm+8xpoC19oit4xGYSsrHO4H1qs0i28oZH5610vj\naLTtP0+wFrh2kG/zh7dq4ZrlZGVmz5ZbkjtViN2fWrq+27piUUYC57VkfaFdmY5cryaRrgRt\nsjILY5Haq0M0UULDOxicBfWgQMvnXDupLMy5AU4FMJHKj50H8R9abE0SwzbyyyYwMdKg8t5V\nYI64XkjPWkhk0k3yhOp7LjmouGTpt3Dkimt+7kzk7yM89qbJMMrtQtj9aNRApRWWMtuc9Dng\nUxZEkLrINpA64oZQ2HkGGB3BRRuPmdjkflQAzzHmYRDcit1x0oYksECkgcZFSQh1XI6Z+760\nrRlZOV2BuTigBi5yYjtGRkj1Aqxa2f2hsKcKo3AE8CpLW0QTJmPIP8TH9KvzBPmhWMgj+7QI\nqLGlvuTP73dn2xVpY1jhbywMnk/Smm38y4DcBQOcd/apgu2YbUbbnBPoPSk2MX7GTHGEbYG5\nORVq109Y5y4znBAz3qW0t7m8mZYUJTHBbiuj0Xw/d6lJHHHE0rFtuAD/AEFQ5BZmLaWJum3x\nEq4+Tcfrjivcvgt+zz4h+JN8ltp1k2zGWmkUhQucEn2617Z+zz+xjqXiO4t9U8QwvpWjLh1R\n1xI5yDnafrX3Z4T8I6R4F0ldP0a3FrEOW7lj6nn9KwdToaxicD8BP2c9F+DOkpI0cd1rbrtl\nn+8o56rkcV660hXAHA6fnVfzCc/MfzpTJ0Oc1lY12Hf6x22jaM07vjvUO7dyMgUqttIx3oGT\nbtq+tMMnzZP5UCQbSOhobDKaAHB+c7aduAxk8UijavJyKVcY5FJgKflzmnrhsEGk+9z1pB8u\nSBSGO29cmnAluKiViwFSMxDYoAdwpOOaFYKpz1pOWb+7608ttbgZoExEPbrRvGcEGk3HHGM9\n6XPI7GmIfH8wJbg0DLHpTSeDjmnK2VJ6GkUIcnA7Zp+ctTBnjilX7uehzQA91+bPApGOcADN\nI2S1KoPOOKCROeg61Jj1GKZtKt6mnszCPmkUg8wbqN4YGgY6gcUqgLnjrTGG7sOlLyvYU1vu\ncetG47h3oYEmRtGeKTd82cU0sDIRT1zxkYouA1VJNOHpSbdsmd3FJuKtkCgZJt468U0kKc01\nc0o4bmgCUfd5wDTWYqM0mQTk9aduwelAgX1zwaDzmm7sZFKGA7UFDl5HSnKyq3PNNHyjngUf\nLsJA+btTAeowpPTPahgVUdzSM21hnnikVtvUE02AbSrA4yKcrDceOKXgMATx1oX5cmkSw4bt\nT3amRt6inbssTimxgW2qD+dIGzyG4pTgqQe9RLHs79aQyXn1opN1FUBGjqe1N4Oc8imQg+WT\n1IpG+771iBIsgU89Kcjr82Rk9qbtD7R2605V5IFMY0N+754NKswwOM4pPvZI6UmMpkcUCEZj\nIATwM0gYbsZ5pWwVAPNBj+bNIAVTyepp33hyMGm7fl5pcbm4pgGCzDHSkb64o3knpio9u44P\nJoAbvKtjGfenl/lzkYp8ihgCOlRsgZgOooF1HZGxSvWkUFFOTnNKMbQOwoZd3fAoGIWLrgYF\nRyBo1HOakaP93kHJo+8uaaJY0Elflp6gbcD8aBtGMDFAU7aYxv3ZMmncdqRvmQZoUDd14oGL\nnjPSkZjsyOlOyGyAMimup2gdBUiEVsj5qkXG3jrUTKQQuMmnbSrUAP3HgU3zvmIPWmMxXnFO\n2AgN3ouBIxwBtGaapwzZox0B4Wo9rKx2rkGgCZVBUZ6UJgZx9KbtO0E8UdOMUxC7lXigsetI\nWzwBzRnjHekIFYbOR81Bwx56U0HdzilX5VwRzQUHl55HFJHnv0p8h9CM1FDuOQfWkOw5nG7g\nfLR5mSOOKRlJbAOKd90deM4zTFYXj6UhPbqKUNhun1pjv8x5p3GhFVVznrSkgc4pu7POOaXO\nDzSJFBzyPxoJLZIPFG75eOKQYSMfWgAI3NTVyTz0oV92ecU5eI+epoGLxgZpkmWBC9etKzZp\nN2aQxp+YZpdo2j0p2flpu4dDVIkcvUBeOawfiRbfaPBmqptyfK3A/Q5rd5CnB5qtqVub7R7u\n3fkNE3Xvx0piPhrxM58tSMdOT3HJrz7V5Ft85ya9H8ZWf2O8vISeFkIx6e1ed61Gu7jkL1pi\nOW1B/OU7nwOwrJnUrtIPFbt5Ckzbcc9RWI8gVnXbkA1RL0KN0wZcgY5wfWsqU7UZS2OfuitK\nRso4685qjOo3fKMtjJqkZsgaMFcv9wc4qrIuZmkXgEYJqy2duCc55xVOaT5iFOAO1WQVplIV\nipz9KoXAR5PLDndt3Z/pWhJJs+YdD2qi2GkYhDn+dUgKrOyxs68Y71A2GUOE+b1q80QIOwAj\nGcZqAKHTf0PdfSqEVGcAtnd06VAqKsRIUqCe/eroZVjdz83HeqzRll2lsA80dQGOjGMSEYPY\nVE0hYEAdPmp/2gRqBjdg0iyAZc4GTwKoQrL8ud4ywzj0qpKzrGoyGOeop+7YzM24o3BPpSNG\nkLKseWzzuNAIhYkTYJoeIqu1eD1BFOWNIZDuBLMcUpSVVJBGFPWgRUbLMTjd60nl7o1ZRkqe\nVqYOI1POWY56U0pyxPyLjhveiwEUgaRzkEA/3egqOP5W2gFql2fugd3PfmmH5XDRnOeD7UWE\nRyIGUMvY8+1Mw27cG3dhippGZd+1VOeKjOI4lwuG7ntVDJI2+8rKXK9ajmbaoYHv070/e8eW\nH8XJz1qJmU8g/M38OKB9AbDIcLk4zmo1uNuwkfOvNP8AMDMcfu16UhYZ4Xceh9cetAHQ2d2J\nlcY3rjBqhcW5tJnRSCnYj0NMsZ1XcF4P86lk3Sf6xdsZPFSBC0joEC4OKk+2v9kKnr24qCRk\nDlozwvWhpGkXeAFH6UrjKc67uSc57e9RLCFYuQcDjJNW5GZRxj5qqvG47nHeqQrDPOD5OOOg\namLlXB6mpCQcIoye9NbhiRjgdKAEZdi7gu72pskjNheDu60xuCDmhmG0Ko3ZOTVCE2ouQ2M/\n3aSMhhnbtGf4utOwAXYcgdj1pGIkjBJ259aLDBgzc45/SnRkbtx59R6e9Icbh8//AAGjcfvE\n4PTb6ikCDaNpBAx2PfFR7cLjLEelP+XYefm/kKYVdV4bNPUQu7PXkDtSNGW+YHj0HWlZdyqg\nfDdaRQH3E7o9tPUNRrx7tpztHpTf4SEDZzS7epA3k9Oafufb6t0oGhvBk55GORSNvjZdopy/\n6ssBz0LUz5e3WgGIfmwAvzdvenKWSXLnj+VG/wCXgnj2pFYtGVKn2pCEjV5GOXx/hT5GIIAH\nQcUnlsoGeBjmk3kv6jGOaAEYCPY5+8Ow70cz/eOP4uO3tTWU/ezkryFpqsyybg2Ax5NO4Dvm\n2kJjH5Zpu8LgFMfzqSSN2+7/AKwdqRQWyTjOM/Q0gFP7ld3duNtMVgq7SdpJ9Kd5u5VOMmmP\njk78nPC00O45u6g5BPb1pspHOWwelJ8oXaDz3HcUKpLDjOP1pXAX7u053L3pjNJuYFcDqMUx\nlZ92SV56VKu5iOc5GKoBI5mXfvQNxxntSxvxtJAAGcUrY5yMEcVEyhVDEbjmgBzZaMZHzZ4P\ntQy+X25od+uOmOKcrlgMdO+aQC7/AC23oMpTGVhGFX7zHNOj2KCVznPSmqv3nY/NnimMV8eV\nnb9ajMzGVAMBT2pwb91/sk5NHXoAR2oEKwKyeuKa3IOVyOtL833SDnON3ajzCpGP3ig4IqQG\nbVG3P4UBN2XzhentTyNmWBG386Fx03bQT901QxiMrRkZ2sOx9aHYMSwPQcj3p/ljczFuegxU\nXl5zk9scetAC7sjk/MaFkPIbt3pGV/lwfm6GkZlO1SMkjmgRMGK8K2O/Pem5EyjJygP3h60m\n5TDjG5B+Ypqt5fI+72oAdx/Fzz19aVpG6EbR1FMZj93HfOae0pZcn72aQEZkLckfL2ak+YsG\nLfJ6U9mJwxGATgU1lDK3OWXtS6iHSbGwQD74pCpjYMo2+tOjznAXkDPWkaQyf1pgNXAjbAPX\nNLgbefTNDfKoPXnmjcGYDGaYBGx8kgH3/ChcbRtU5/pSHKqwUYoVDlXB4AyeaBit94nPyelA\nb92VC9e1EajcXcF0bkD0oVdq4Xn1bvQIVWION3Hakk+aTLHA7UisqMGQ/MOMU5SZG3SfeI6U\nFDRh1J+6D0qMqzOqbdqZzup+eDH0brzTdpZQpO496QgEw5GTjOAMUsit5ZJPPp60/aFOVX8K\naMSZ+YhuuD2pgEanyzzgH1pu0bVBYgZ4x3p37tgVPIHNDMh8v5TwelIQxvlbhce4NNbBUAHg\nH86lJXzTu+U9RQCxAGzg9SaAI9m6MBgQc5BBoFuqsz4znnrTm3BTjkClE26Eow+XFADZGX5G\nCEjoaX5BNuOcdjQrrtDMxzjFNaM7MDketMYHCktnmiRdrLzlsdKeI1EYC8nvmkbc2GIwR6Ug\nEXMXQDbQxCEcAseaJEVWAySz87aTYJHO3GF9+aWoCfNt3jjHWnt/rI+fvdqaykrnpzjApPmZ\ns9cd6YDtzyOSuAAOfwpHb5go4yM0r/KqkDHb60qt0xjPTntQAxscDqO9IXEabQeR6U9mVjn5\ncUMhVTIVJAHIApiIgxU7X4zyKckbnPzDj5sU6TPGNu7j86jLeZI3r0Y0DFZvlKgZDc7gOlJ5\nbRqVBLEnJzT/ADArABAUx0oXO7cx2kdqQhsmGQbcjnBJpDtwysMgelPkkURhXbaM8HHemRoy\n5Kt97t607aAO8wsoVDnvtpQM5yMbv4vSmbdo3MQD6CncsFycmkAH5JOD8uMZHekCgOSOjdqd\nEAysp4560iqsoUgkbTzTGEbL8w27eMfWl+ZVAz24pJCJtwJAwelBZ2A3MFVegxQIF+9gnnHb\npQ2+XJYgAcAdKUKSoJ6Z4IpG3bSSOccUAOO1oeuz8Ki+7IjKMjPNSRsGQMBkY5z60i+h4zQA\nrSFZDxlW6GkYliONvbPahuSAMkL2pdqyJ04znANABgvDz8uOKbkKFw25um6g5Uk43DsO1EbY\nwmwZY8e1IB3yqOXyFPT3pM7lBDEnOcU1v9Uy4EbA8t1zTtvyjcSHx0pgOk+ePA+T60xQJODy\nf71DbSmMHPeg7VUAj5O9ACPjEmCVIOBRIrbVweMYqTzAcgLwe1LcMpKrnAWgYA/KFT5CBzmk\nP7xQXGBjkUnmHdng9xSswkYccdKQDWYMoAGF6AUkUnmbmHUcBTS8NjAwPWmmM7iRwM80aiH+\nY0iqScP0xSJnzPk5PekMg83gEDOOlOA2TYQkf3vegAZduMjC9KXjZkHGD96opd7YYjC55GaS\nEJHvcZ+bjFPpcYbDtLMOc5DD/CpTlWAHGV7Uqq2eDgheDTI9hj3q+Gzg7qVwBgRlf/HqFPlu\nDjJHHWmEuqkn0xTo1HljC8dKYWFmTMZyAWJzT8FOQuDiol2yA4JwvBqRVDRs4fHHG7vQFiOH\n5XDONzZqSVk5Yrhc8ClVlTr1Hr601tzRkZwSf1oEKrbY9ysQCcEGl2sw2q2Pf1qJlH8T5C9V\n9/WpPlKkAnZ6j1oGMWMhgCc+w7Uq7ihBTPel+b5H6HOMD+dSnK9T0GPlFAiLzCY8kY5xRIcM\nNo3ADO6kZ3WQKo570iq0zEBthzyD6UALGrM6HG12/lUqtsR85yp6Hqfeo45T5h3AkrwD7U5m\nMivuHTnmmwuLJgFSOQRmmSeZIVIOATgkcYpN2YRnBP8Aez0FPaQllZcFQMfLQSPWRMNtG6Ne\nDnvTBGi7sE9eFojZW3ArkEUoZZG+RcZ/ipDYx2EcY45zkYp7HOx+39aXP3ty429M0qD5QD8o\nY8ZpXGNOZNxI2460RhpFJ2HB43GkCli3OQKkeTbGq557/SgVhFYZUbcKvp3oDCNWG44JzxRw\nuSBzj5aTa7LtJwMZye1MLA0m8fPhV6bqVXPBBymcCmLtQAsN65p8a7XYMduPm+tAhy53OCMj\n1pksciOpBB9DSwyHa+BuY8ins27az8H+6KRQmH8kgY9Tz0p6sWUc8EcNQCqtleh6nt9KiZCW\n+UEKOc9sUASf6xSSTgcBfWp0b5ecAj+GoBsxnf8Ae5A9KUMDgdaWoCzbCcj5QevrTFX5fvcU\nqtukbdgRqOKGY+WNq4YEGqAX0BPsAadHtGVUYH8W6k2gSM2dz+9EkqbtrNg5xmkwGLMdzZXJ\nHAB71LCzSLyNgNM2sxbHMgxTljdmwDyOTVdBk8WFAXt2pN4l3qyYb1prNgDHP6c06MlhycN7\n1ICiPZGCrbselIrbuinOOfWlVz0VNgPp609t7MxX0xTERBQoOxNuf1qRIRhXlOD6Uwsd4Bbo\nMU9vvFX5OOKQhJF67evbFOfbuRUPzd2PepYrdsJggOOTTGt/nLBgwz2oCwyVozwRtx3qSLO3\nCfe70xY4wx48xs1PH8uSy4PrTC2okgJKHccA4NIrHzC7nAB+Xin7FZflBOfepFZdoDJjHGPW\ngOpDuZ/m/hJ61JwsgXdgnmnOR8oK4APFKIxK+XGR2xUgL5bDgncAcipFA8w7/l4zxSJiM7un\nah9zLuB3fSnceo2GMxgYbOWyc9cVYDbmBx8vWkRFbyvm568CnqI9p3HkHikIcuJG3EfKPSme\nQnmEnLj27U7zCsoxt54+lIvmIhTcCc5GKQajlVCfmBHpU8caMpLAD0oSMydSAwFEbLncwy/r\nQO5Alk+4nICdSxpzRAFSuGBGAT1qxtab5WXG49RTktz5jELzjaOaLE9ShcbRtIXgHms+6VsD\ngnccithoVk4DZxwfr3qvcWbjaQw6/lTAxrhCAdxIH92q7BI14QuDx+Nas1vGytzlgc5rNOCD\ntOwZxzTAhkhKwoVjy2cEsahMixW5gK4ZjndUzL8rI7FR/nmq6WYjXDSb1XHzGqEZvC71D5an\nJEyspDcn07VLcbRJIyrs5wKSJfPUbBz3NIpCSH5sZ2t0oO2blWwFGOfWnoqpG7uMYOM0vlhU\n9eMjFAyFdw3Rn5lzUbxFFO45Qc1KzCQbl/HFM5CkDgnjBHFAya2jNxJHsQDdwGNWpo1SYiR9\nzLwQOlQ2tx9jlXcu4qMjFMkxJPNI7YB5BBoF1GzxCFZAjEbu9Mt8SOqO4wB91qVjujVRyp6M\naWOMLMpaPcF/iz1pajIunK/MF5O3rUfl+YisWyM7qtyeVtfafKdRn5fT0quSjKoKsFYUxXEM\nO5j12huV9B60slqGkdWfYo+6fWlaY7WVDhicZ9qGEm4MxAcdKQDVVmYDABA5WlkRlRWxxnsa\nVVaKXcQNrdacqxhmUKSOoyaBkEAzMeSe/TgU3AbcuOWOAc1Mse0/e4PemtC0kbEFUweKYh0f\n7jIAKyDj60M6vb+WsQyDlietJ5zAhmGARgMaiLNtBUZbPzCmA9VG0le3NP8A9Xwr5dhmo2kX\nywNvOc8Uigu24c+1HUBygbSGbCHt6GmKzrk53n0Pp61I0Ufl7Wb5v7tLJhk2nH3cDNFgHneo\nRtpwR97HWgLJJIAPmXt9ab9qmmQR+ZmOMfLx0pluzJkgsBjIPvSF1LTRtCsZmb51PemSqzIT\nhWXqPaommM+4y59vWlMgKKW+VV6r6+1AdBzM6qpI5xlTSszSzCQvtfbgiljcSSKH+7nO30FR\nj5bzcPmjLfkPWgm5Yihd4tw2pt5I705PKCLtXdI3p6VMy20aOyXHmsT+FMT7Gkf7wnpuU/0o\nGh0jY2KVI+hpTGsqkSBvKzkL60RzO6qySK6txjHIpFVZJm3hiB05oBlcOY8kgLzhW9Pap7fy\nmheSdCLluI2PQilYGJWDcJjcqn19ajM091GFkO8YyFA6UyieSzurOKF5I/LMmCOc8UWtxHBN\nMZlD5BQN9aYZ55owjEnA2jcelVI1Mezd8yHkE0AIsZll2qeVPStRZbaaMiZWWaMbeDwaz/M3\ntnbg+q0saFWKlidw6EUWAnuL6W62WzjZAoAG0f1qGW1MahGbeWPyA9TSRQ7Sd52pkqOeacSJ\nGAdtwj/TFICW1urvSpM2nyysME+1aa3Vm2+S4VmuGGCFHFY7+ezGTeAH4X2FTwqyuitjYflL\nN60nqBqTW6xPDcWrMXAz5cgGKjvtWuNSmV3ULhNu0DAH0qlJcJHImxiQvWopLgyyBQNrZzTA\nduljZPK2oVNS2rOkyyMyu7dqq4CSBQpYk8tmnqNrKpG4r0YdRQMnkkzIWXCxqeR71E0jXDKw\nIRR3zyabIfL4IyhPB75p6qjS5H8JwaQChvLhHm5CMeGPc9hSxyMzGNuE/WiRVUsST04B5qLa\n7qm0YzwTRqBO7/vmxKqgrgEiokJ/1jHcpGQuaLeKTcI2ZWLf3hVoxKsxRk+bsB0FAEEYj25J\nO5h6fpU7Xk01uilFSNeNq9frUTRkNmQ7h0z70PDJvwkmfWmIiZmjfeqgnsPar1valonuHbDK\nflGP0qnHbiXzFZunVa0WjdnUplVHGKRSKSWLSSMWyVxvIXoKWGEssoY59Pb0rTiurmOOS3RF\n8hhk4HNVI0bzDsYBX6jH6UwGW9rbtbEqS0qjlT0PvTl04KoAkADHnPapJdPuANoxtP4cVch2\nLGcnzMcncOtK4FVrEKoO5sZqT7BGsLMW/escA56VYEUsn3OjDOD2qW1gi3bWJZlOCTQBDHps\n11II0QbcZLHjNdPH4Fv5NJjvLWaK6XPzxRn5l+tZDXKMqg8BSelavhrxU/h2689cyxuPnh7G\nkMS18MhZo/tyTRhm+baOQK6yw8M6Nd5NvfzRyKeEZecVhXfjm11a8klcNbx4wqYyc+lZ2p+M\nI7OFVh2O8ndf4fxrNq7GjuYfBqwXOYtV+yru3LIFzg+9b8mh3ci+Xdahb6xZuMSlcK35ZrzP\nRviZcWsiR38UN1bk4bb1I9a35Ne0tdSgmtYJDYsp3dvnNKxRLqHgOx0zS5brTb8pcM+FjY44\n710Wg+Ov7H8Kzafrsc+p27jIQnOR26+lcjq17bybfNPlwOuQueQPWqZ1i2toVQOtz/dUtnj0\npksxfHNz4W1CFrnSWlgvFODDjjjqM/jXFtJ5kQUhg2dwNd75mkamskCQRG66njBzWZ9ijVj5\nTRb8bTwKpdySrY61HqmmpZXMYTyz95uSPesjWNNg0e58uOQyBl3DArcbR/LQ3KR7lXggVBMy\n3TGRkA+XaA1VcZy91KuYoYk2kLyc/eqv8oXP3xnHHatu80a2TF1AzK54IbpVK3tfLhGV3Fmz\ntqiShJ80ZEjbIum4c1YjtZVs0kEfyMMrz1FWFig85yAcH7q1LDdFbc2z8RtlQ390GgDLmkST\nayt8w+VlPaoVWS4jMkSlUjPDHv7Vp3OjxWuwbvMXH31p7qj2oiHCA8Acc+tAGRbQ/aCzsxDd\nWVuOPSpdokTKnaTwF9qtOIpioOY9pwW9am+zRqxkA38cUhFMxFdjZwrDAPpT/LZsoDkrj5vW\np0VZMsSRt6Bf5VLDA0mTnaWHA70rDRWjjeV2KHM0ZztJ7etXoI5ZCGzl26npU1tZhucYfOC3\netAW7STCPZg9Md8etJyQWKqWrLGpKHIPBUVp2+mmaRWLEOx/hXNa+h6Bd6goSOCSQZwFQZPt\n+dfWnwB/Yn1TxOIr/Xk/snTmQOWmTdKxPOFU9sdzWMplxj3PAfhj8FfEXxI1CCDTLZ23uBgg\nj5ehbp61+hHwK/Y48P8Aw7htNV8QKuqa2oD+Vj93G2fvf/Wr2bwL8PPD3w50eHTNFso4YkH+\nuZR5j+5Ye/bNdC3fI4rG7ZryiNIm0qnCDjC9PwpvmM7ZHekYKq8DmmrlepxSGPVwuQetNbLc\nqMDvQuOcjNG7avPNSxkgYlcY4pVYDjHNNjb24prepPNIonXa2Mnmkblcg8VGMYyeppTwnFMk\nm3AqQtKrgcnnioo22Lk9KVcLweT1plEqyDaDjApyt3zTM56HHtS7vmxihiY9s/wmnFWDKx5H\nrSM64HHNByyjJ4qRi7g2ST3pecA54pnGeBTiw4HY0CCX7oOO9LletN2npninLgHGOKBoU43A\n+vanbxmm43Hj8KQxngn1oEibduQ/pRgNHnHNIvOeKFyRjOKBsYxO7g5qQsQucUL9OaFDMetA\nDlk6k8Uqn5ueRUYYs3tUmR60AOzhs8baNx78E00YbvS7uPegYbR0pRhaSMhs0u4gZoAbuZX4\nHy06QFmBBxSYxjHSlZSrL3pjHbSR0pxPQg01WLUrMMgUhAy85zxSySDcOKbncoA45pVUbuT3\noAXqOaRpAFyaQ43Hml2fKM80AKrFlxjml3FepyaTkdKd35FADvMDLz1owWPHTFNyAeBTl6gC\nqQxONuD1p27p9KRsbiOtDds8igQ8MH470L1xTVPzD0NOyFakPzD75OBzR32jg0bueBgetHqf\naqBCj0IJp/XoOKbHny8mnDngVIB5YopfLaincdyoMxqVBznk04cjHSkZdrA5zQ25oztGD6ms\nRijCenpRv2jGKgj8wf6wfSpt3B5zimA0Kd3XApW+7gGlbDR55BpnG7rTELtHQdadywySBikP\nytlemKVcEc0AIzhU56Ukft0obnjGBTMlEwDk0gJGJXp0qNgWIcHB705ctg9qc33TkU0gG7ju\n46Uq/ITkdaRZlZQuPmoUHaS3HpQAxgWxj72aeIzyD2pykdQKRsnoeSaAEPGMcCjYQvFOdeMY\npjFkwByKAHYAwKD9aRfmOQOaTaG5zQApBYc0kceFO4cUL93HelZiy7R170rgxCdrDHSnFt3X\nrSfwgAcepp2CPSmMaG+XdSqw29ee9Iw64PFJEu1ST1oEAYbue1LgZz0FNX7xIpxI44xQIYZv\nM4A5p/PTvil2hJM9qarDceeaACP5hS7SnHWnKwXim8saBDDntRztHGD60+QkMoA+tLn1ORQM\nZtJ56U7ac5zijo3HShcNljxQAvG6m9s+9KCuOTg0D5V46UCAMM8daRvflfSnccZ/Sm5HOaAG\n4OM5ppxS5LfKDxSMuM84NLqMdxwf0pk0gUjjinjHTFDIHxg0CGsflBHQ0i9uc0NyAPTr9Kdt\nC/TrTARsLzSFvmAxmlBBbI5xQrdfWgfQbIPm4oX0p3JpSo45pkjRgrnpSyKvFHHO7gUjYbpT\nGKcEjA4p/ltJFKv8LIQc0J8qc81JHJxjOAeD+VAHxF8ULGWx8R6nC6/8tTt+leW6jGjNgHJB\n5zX0L+0Dpfk+LLoqpXccivnzUITHvJ45NUSzmbqRY5Ww2PmrDuiv2p3To3at3VIVUZXhs8+9\nZMsfzMduCBnNMiRlzRhTlOh61lzMVkaQfKPWtTiM5H3c5INUr7EzcYx/dq7GJmsxLbwDzVW5\nVWQsCQ3pirskrMpi3ZxVafMkexhtx6VSAz2kAARvlPqRTQsLEnOWUdQamnt/3YLtkHpVbCIm\nSuDjJqxEDTLkoVwGHpVZQWkYJxxVtrjzvn27VxwMVXTMhUhcH1pgQSrtK919BUDxsV2gErnO\nfSrfl+Xv3HPNR7kZdyPtwMGmBW8kc7FwfX1qBlZW2quWxznpVmRmVlLcDHBqKeXfGVPOeuKY\niL5mjIZcA96jBZcnO4DgZp5k8qFQPmXPT0qIhmY/wpnJYUAIvcs/zdcVFcRn5ir7cdqldR5j\nFeE28N61GVkbDAcsaAIyDhW2YJGBmo9jOXEnIHQZqwx+UqeinkelNmgX5XVsNj+HvTuIrxsP\nlwnGcc0kmI5mXbtyOKfIpmVSOO+abvWXO4ZPQGgCFCem3POc1IwyodGzz0NK0Z8vy85PU01s\n/eI37emKLgEil5VU88ZPv7VGzMW+SNdo/SnGYLIDJkf0psi/N8pwuM5oKQ2PZuYYyW7U5lEe\nw43FuMjrSKzScuqjHAx3pspLSKVJ4FMQRsYWweVPetJZhJt+bCYxtrMG1VGDuHepomVFCh9x\nzzmkUWiVXMaqOOee9ROqyL5e7bxkCofMG1jj5s9aimcMPmJzQSK0jKpjQ9OpIqESpIpEm4Ee\n9LG7lGUMExztIpjMHfG3jH3qYDGZWddvBNK6puKk7W9PWgKWkAVvmAz0pjKC6FmZnznJ/lSE\nG4spwMADrTMMzJgc+3epgNrMPX5vpTHkDR5Xh81SQxCo2EBdpzzSAGTLfdAp3ooPFDM8Z4TK\n4wf8aQDAwkwYugoyGOerZ+7TY1zu2tt/rTiyqQSpz0x60ARbh5xCo3uKlhIZ24IHoac29cMm\nFHvQGba24gMRxirGN2x8DJBzwaRkK78nBpJZPur/ABUxpOC2SW6UCAsFjBPB7MopCxWQ4IXj\n1pN3lx5x+9PGBT9reWMLuoARWKg5H7umB/M68d/al87y2KFTg+3FI20rg8DPFSxAMjJboe3p\nR5I5KMSPrS8tyw47UnRuP0oAbjEZwW3DmlWXCq23LNxSszMxC/iKXnhhjA68Uxkf8chx+8Uc\n07G2JSBg9TSKuwtluvJ9cUiy7tuevYeopbAKzAoDGSrZzTPODRkLw3cGiRWVSGGCTke1OWNY\n4zghnbjpQFh0cY8klTz1xTAnmDdnAHrT1Y9enakZvLzu59qAGHaDuA6/nQvzZBPXsKcqnAyQ\nO/NNaZWfJj2n1pjG5+XI4GdtP3fLsBwFPJ9aRg2WYYYHt2FBYKVxwvQ0CE8zaACOCetO3EyY\n2jA60bt2VxkA5zSYV1++c+1MBnG/cOU/u1J5ZZgi/XI7UKuUxnA6+9CoqqxYsCemKQBIuzJD\nUzdwvHXqadsLfL94YycU2PazjIyV6Z6UagJnYpIO5SvYdDRD84BA6ck0M/lsVH3R1+tJGsmw\nqDgZzmgBzMcEAYzzTVZm2nHy9OKeQzZbGOMUL90r0waYdQXdGHKAbuu31pGYyqrBAmeoqNZG\n+dQcE8UqKWVQeq+9ICRFweVx7mmhAc4OD2NHzNnJ4WlYiTBA2rQAzcrd9rjrSKwbG4598UOm\ncnuOhpWZlXJwVPt0oAFXymMY2/Nz1pfLODg8n+Aik2/vgAMHGc0BdzM5bHYmmgEmIVAytuA6\n+1JJtbGOOM0fLtAH3aPLDsB0PpQA1lMZVgN3cL2o/ibIxI3alkVlXcDkA4xQoGCckHFIQ3zF\nkYbQVP3SfWn7V4Q8HpkGoxHuKvkYHPBqVysnzDr/AHT1+tADVb96VbkqKFYiQFgMdM0CMshM\nZGf7xprORGMDG7g5pgPbDDH3hnkVGTHtZgWLDp9Kk+UNyfnA4oGI1GFye5NIBp2uyoH+UgHH\nanbQrMwy/HPvSY2vkAAGlEgRm3Hj9KYEe7ao2rnnOcU5WZQG6cU7nlicn0FRMpdeCSOmKQGl\np9rHNE8j4LZxUV9bC1EcidD1qFWa3nXY3TkimTXDXEjFzk9cUDEZvn4zt6ketI7McH7vcUxW\nLZ356ZBpVw20sflPX2pgPaRNjcfNnApq/Mp3Hb6GjBKs3AGePWl3M20nDL+tLUQmVlYNkHtS\nnO3eT3x16UjfNnYAgHPNK8YkGSTz2HSgY7AOCw3etRyAHPBx6CkljVYQQeR2zSrnjJ5PPPpQ\nDGop25cDK9BSR5k3AfKDyaWRdsg5wvXNEZLZOeG6UaiFjO1sM3H93vilaTau5VIAPH0pp2Kv\nfzM4yRTUZvLYdecUwHbQ3LjOaVl27WzgL+dNaUFsMu04pV/1yMRhD1DUgFXG7Ifg8/jR91yF\n+6RzTVAQuCvU8U7bt+cjJxgqaYxr5B68Y4FOVw3JABXk5pArBQ2AF7c0zywdxYkkdRSEPZW8\nndwBnPAp25RyrnPembmEeUPB4C0nKRlZF5B5IpgKw2xKDhmz96m8+WRtwc5JqR8NjAyuOhqH\nzirBSpBJoAczMy5C4OaVtyyAleopSzK2GHFH3mO9SQOlADGUyAAjJzTtokkyTjHekZiYQT8o\nJwPWn7TGuFIOaQhPl+9wVHYUgG5txO3n8KVW3chcY4NMYKy7lJZM9KYyNVO5lY981NlmjwnI\nz1pFI3EgZf8ApT/Lby/l+UDk0AN2q8ikfKc8+9Db1Vh94luc+lDkrHjr3BpJMtGGBJpdQEhn\nPzbxtXOBRuG4scgY4PakZRnHP0NP2llCBgV7+lMBuw4VSdvfjv709SpdjnIxgZpW2rhgctja\nKiYDIHJ+lLUBY27Kec4p2xSxCnae4pTu4YDYv96k8tlmOWVs8bqBiq2wbAfl60jny9rjkUAl\neMc4pqqY4lJIIB+7TEK2SWJBKHrjtSj5W8tTjuM05z8okXjPUVHtdsv971PtQBJHhuPfrQqi\nZWIXcnSkWPcp29O/rTWLKMKPlHPFAxUjJjOBhh+dOaQiPt9DTF3bshu3Ioij3qTnNADNrSMj\nbsKOKfuXzCwbbt5xS7T5JAI60+MffUpuYcZoBjFcLh4/vetNfKsOy5yfel+VVK5yp7/0prKV\nYAnPHAoAdtDyblcBveiLCRkkHIP4UNgrtwGK89OtLnOdoKx43YNIQinDSbskDngU5ZEEeFwx\nPOBQcruOcg9Md6RWUSCMDbLjOAKYDeD0PfmnrjaUIBOadJncRjgdcUg2LynLHrQMaF+cKDxn\nr3pLhtrEMuPp0pYSNrevvTVUSSZznbzQIkjbyZF2j7w5wKViW6r0PSlj3NnDBcfMKjkBVvmP\nXkGgAdkOWbO7rinbW78EjIxSFg0i8buKdsEcu77340ANiTfndk46nFCxosZK5C7ssafG21WI\nJxmmSfKu35irHNACoDyVOE5wDTY8qxCsVkbgUu7zSwHBPFG126D7oxmgBhPzCPO1x1+tA24L\nOWV89hT90mAy4DdN2KXb5YGQDzyKBiRqu3cxO7PFOZcMGY4P1qINnORtUHipDt4YDnPegkRo\n1ZQwPU4zUm7bnavzdPrTH2ZXsCeaJMs3yn5hx+FMLDFbaQQhDe9O5ZecA9aGZ1ZRjdQillKZ\n56k0MVx+dhX5t26nM/7v5m6GjeIYwwwSOMY601pAxOV2d81FikIkg8tyRnsNppyjhQwB45Pp\nSDdJIuMFeu7pSyRjzGJGF69e9MBdxbAVPYP2qTI2sM/Ljn3NR27cmNQ2W53HoKUMkmU24PXc\nDxTAB2UDgjjilZGZlJAPHNNy0bKQevangM7Luz6UCsRhgd+MIBz9aNxIQ7dpPb2p0kOWYAZV\neaULuX943zY4oAUsscmFGeKbHIfm3/xcYpdx2gDnFBBk+deO1AwxluB0HNSYCgMB82KTax5j\nO3H3vSkMxYkg/KeAcUAEcyKpJjJPrSK3lq2BjJ6U4M/k7duSOc0yTL7HypH8Q9KAHNub5AuX\nbq1LJGpJULvwdzZpi4G51fkdKczNMu6NsevqaQCxzFpBj5c9KllmH3VUHsarxndGyYw56E07\nA8k8jeCAf8aBakn+sKszjctC5RPmO9mbimpEWXOcnt70pVjIshXcE/hHXNMB3mJuGEYnvT4b\nny2xjJzwKi+boAwzRt6Z5b8qBkjRHcVx+8+8RT2Yx43j73Hr9BTrdmMn7zO4DimEmLc685ON\n1BJLExhZ2J2nGCvpTF+4cNjvmjYzKrD5h0NTCFY42Xy8kjIalqPUaqiNCeRg9QKmJLR7ifl9\nKjWSSSNRJ170rNtlyVwgHSgY6NlYYIwD6VJtOPvg5/OonYyMNq4qZH+7tUZ70yRduFAblupp\nkchZmxwKlGWDjGTUYVuqcCgpD8bvl6n0pY93QKVGcGpI2IkG0Y45NSMrsjgHb3JoEmLtWMZj\n6e9HG7JGD3Wm9doJyCKXaVYIBnPQ0FCtEIiTjI6+9ObBYMQQMU1dkYIckEnlvanyMrYVDvHX\nn0qREkb740CjvzTgqqxJGR2AqCFAi70Qkd6nhYqrZTJ6rQImT96oAJVRTnTy5Dht3PTNRQgs\nrbgc+lWY8KDhfmNJiGmGPDsFIduc0SWbNbliec9D6U9YztKA5LVYihd1YPwemaBmXJAgXIjw\nMdaxriyR5SwG0f3a6s2qyYDcqvVR0qvcWKR5Xb8vUNimFjjXUnO9e+PwqLyUxIoDELz04roL\n7R5IpEkwTk8NTodLkexuTlRJ3z/FTFY414luJVZzhc5AqZY1Hyrkc+lX4tPRGGOWP3lPaiTd\nbkKF2qePxpjRmSRRMvUgqfzpkkm6QHGOOmKtTHbNtAySf1qFlLBsnc2eTSGQqojG4YEec8U7\nzD8+7kN/D6U3AZsRHAxjd2Jo3MzdQCoxt96LgRDLKR+ANDFR/D9SDS/OhKtgHG6m/JuJwSfS\nmABZI1+UAjsajZ3kkAI5HUU/5zt5xzSyRF2OWKjpnHWgBse2Nm3r26VGshkHoO1Squ7eC4Ax\njimrGwwCSE/vAUAKf9WdvzHvik4mwH4PrRj+JdwPQ0jIWkMY+XvuNAxWURYQNnPOaURrtwJA\n7Z+6etLHGvncn/69NZQzFUPzHndj9KQhCrOxDn5M0LGrM+4YXoGz0pWUMNvPHORRuVl3ng56\nUhakciny1UnzCp60qAtMcnbgdaeJFX5m7+lQ8PkBsljwKsonmmE0wKR4ULjiolC/Z1LqdzHj\nbSxO0LMSNhHemBzdSBEJ3dOKQhWUMwJBDL2p9vZi8ukiSTc0h7/w0wwmCRt78jq39Kjberbz\n8hI420ATyWbxXUqJIBtbH/1/pQ29Y8g7lB+8B1pqqTGGHPr6mm7zI6qx2nPy+lBJLITLksNh\n6kn0pqOpRg44Y4DY6e9Onka4wxAZk4460xnwy+Yvy+1AxPJbzNqnIxjefSpV8uGTp5jYwMUR\nxjexz8uP4jRHGx5jwrDkbqBj0ZOuCHzgjFPZMbgYwynnPpVdmkYktxk8MfWniQuhUPjHUUCL\nKbNoKAIB/OnKrLHtzhmOee9VZMJblUJIxnNSx3LxqiSfPxwe9AupLcyO0iIy4YLk0Qosg3eZ\nsIqKTeFMm5pCBxUJuHkiXoq55HrQUWpY1aM/vTv64FQRSHy8gblHrRCwcMeV55JqRo0jZTgl\nDzhaAHq6pHkDLPwKLgrcKjHOV6BePzpyJuyyj5VP5Uz7O27g8k0xiOy70DKQQOM0quu19yE+\nvrTpptsygrngfMfWhZjlhsBZu5FIBsLK0ILZBzgDFPuIzblNzeZtx07Z6VMrRnAK9sfjRGgA\n2yg47EDvS6jRD5eZNpPzEcU6NOowNw4Ld6GVmmBTll/DmnKplJEnyv3A6Ux2uEcbK2F65zmj\ndknCY3HlqccnaC2E7EdamhUykbmC4PAoAbGqecuz94Au4hulE6NJMCkW1chiex9qf9liW4aZ\nGKlDtHcGr7W9pfQL++KT54HYGgRnNIrSFWXhuM+lLJDtI2D7v60gUpv8wfdO3jv70KxXDODk\ndKAHoiSQLv4lB4NOVmYNuTcwPX1pouPNlIAxwO1WY1YqQGVfX3qRDOFO58MzDO00xI284Orb\no+uKcsimKRHXcexpExHht3zjjbTAXYJFdvKwc4qZSflBY7ccL702O6RQwHOac0q/MBz6ikMe\nyrI5QyeW2PXiomaOCMMuc5xt/rTIMTTK/LL0FTNs2sWbPOCuMUDLEJeeMvK2CvRc9fpSTLLu\njJGSTx2/A0yV41aP5w8foOMVdiQ3OnzziQMiEhUPX86oCKSdUkCspBbpt9Kr+dHHMzk7Vzkg\n1A10v2dTnPPA7ioTJujznAz39qBGrYK15cRxWoWWWQ/Kh4pbmC4junguGTMRwdvSsuHUJdNu\nI54W2yA/KfSq+oahdTXUrtL+9c5fb0NQxm7cTWqw5CedMG69B9ayZrgiRiyjn09KoyXTsAoZ\nlDdVFOW4RYTHnDevtTsIt7/tC4jQZXkM1WrTX57W3aOIhWU9z1xWU1wzRsI2CLjGaoMJWZQR\ngHkt/Wnygdhr2tWutWtv5cu5lUbyOnuK597hrNsxH92DkdjVO1jVVkBO0ZyQP4hTJGG45BWI\nNg80rAaGpXcxKOhMBPJ29fzpYdYkTEm1nyOo9aoTXktwoR+YhwrU1GltVAJwh9KdgsaVr4u1\nMr9k3hYpG6NxVnUdYaFIIim9gu59vH4VjBV2k7vm+8GNRtNJKx4J77m5oEXbrWZZLdgke3B3\nAZpkOofKVwzydRtNVvlXPHLnb9KIQYm3jiLO0kd6AZelvfPjWQpseqzFmXez4yeMVK6gZiKZ\nUjcKrpCzZAP7tG3Mh7UxFiS5LQbSxJXnilWP7q78tIM/SpPLWQElf32cD3FDIzKMpj+HI7UD\nI0OZtoVgw46cYqxDHt3eWSVx/FRFzIGGSfumr/2Tcyp8wzyMDNS9BleGFV4dcgj1q0sJkZVD\nc9AAOladvobTLu2MXTnb3HvXY+CfhPqvizU7e30+1kuZJXwyxpkj3rOUh8pymn6PKzAiNyVP\n3sd/T8a9u+EP7O/iP4kahD9g01/IZvLlncfLGD3+lfUnwX/Yhh0zyNR8ZMSYypSxQgBxjPzE\nep7V9YaTpNjodmlrp1nHZQoNoWJQvHpxXPKVzRR0PJvg7+yj4U+F+Lu5gi1TVDjdIwyiYA6D\nHqK9qWNY8rAqxr1AUYx9KazHaaj3n+H8aC0iQSD6Yp/mnbxUOfmyRQG79qQyUv8AKCaPMB4x\nk0xmDKKVSF60hjt+F245oPzdeKPuqWIyfemjJ+Y9KQEgbHFK2FPIpm7oe1Lu7nkUAPOOlHCr\nk8UmV78GhgGPJ4piBeV9ql25IPTio1xu9qfuDYIpDQuc445p+4bhzzios7SMetPXoT0+tIY9\nfmOc8U5W3fWo1OFIxSg56ikMfu2jBpS3TAqMKPrT8rxSAXdlhk8VIx2tgj6GoFAaQnP4VKrf\nNg0wFXI6Hinj3OTTFZQdp6e1LHyxJoESBs/WjbtbJPNM3dcUvSMMTyTgUDJF6cnmgZAqND82\nT9KkPyru7UgEBo9QKVcc+tIG6CmAigr1qRSW6jFMLdsU9QcAtQA5W25GODSgjbjGabk9acvC\n+9LqJi7t3ag5DDPSmgndz3pzdeaYxGAH3e9KvvQSNvFM3Hd60APyOOM07+E4pgJDY7U5mAWg\nBE55PJp3XORikUfKKczYBNADd3vShvm5NM3LxnrS+WW6HigZISG7Yp27jiog3c5xTt+MHFMQ\n77nNORgRTWzTgo2igQMrbuBxS7Txjn1o+cd6bzkD3oHuS7sZBpO3qKXbjORmhf09KCxBzjk4\nqRcnB6VHuO8gLx609clc5oEP59aKZ5o9KKQFf6UKrPxnimtIo7/lSsp4IbFQMGXd3wKAVVT3\nNKo6ikbKnpSEIVLR0m0IMnrT+WYbj+VIzBkx1qkMccGP0zTexFCqWUZpHY5AxUgJztIzSPH8\nuQeBS5H1o2nb7e9ADDzjnA9qczBh1pWYbcAc0wKCAe+aY2NtlEbFsZJqVvm4oPCsQOnpTf4l\nBPbNArD87Yxg5OelKq9+lHHpzRuAUgnmmAH1BzSZ+al3fKABgmmZK9aYhd3zYBo2hTRtDc9D\nSmP5RzSAG+9gUnlnJ5p23ac7qjZvLQuTgUAOZv3fFIAeCDz71Gqszg/w4zUwG5d3Q0AG0cYP\n1oUhsjtTWYKuDTRnJI6UAPRdrE9aDhlzTd23GDmndVPHegOgjEycY6ClyFA4GaCecZwMdqRR\n8pPegQu1cdaFU7sngUm7C8jNHmFuTxQO4rNuXOMmkVvXpQh5NJ97cM4oAViGXKnimN932pR8\nq46Uh+7xzQAu7K8dTSKG24Jp38PHWlX5ck0CG78A4zUe7GM5NPZsjHao1kGcDmgQ/d6DrSFi\n3AXJp2DtzQKBjIWYgllxSs+3AxxSls9DQOnPWkIF5bPY0x22kgnPpStz06VHJGxYGmBISABg\nfWnbf4h0pqL0zSMdqnByM0nuA/G7HNK3tULSeXgjvRlmXOOKYDm+ZeTupwx7Zpm3CjBxS9Gy\nRn3oAfH3NP8ALDA5OKi3buhqZWAXB5OKZLPnn9o21EOvQSlSqTR/ez3r5n16MtMw4G2vrj9o\n7STNZ2EobCfMOnfNfJmtwFriTqOcGquBxt5iRW3cFTmse98zacDgjNdBqUccasxBBFYGp3G2\nMZHGOtUiJbGFK7cgLgfSqlwyqo7seuKvTM+1cnqao3mn7Vdgf9rrWiMimvzRttKqRz71UfdI\noHU9zV1VV1Dk4WqM428A4yc5q+pI0AurLjcB+lUbiN1HP0q2szLJtQjb61DcKR8zt36Uxmf8\n7MAThRTJIQzDJwD/AHfSrEyLt3n7ueaYm3zZFX5l7EVRJUW3DFwpYhe5pivGV5HbtUm7bG+0\n8Hg1DcnbbhSuAPQUhogmUSLjzPmB4Wopv3n7tflZu9WvJQKAFOMZ3VHt/dlNgx2J60wKUcLw\n/JJyefmFOVQvcn19Kmkty3KZBxSLH5nyM+3jNMZCyfe3k468U2NXZQxPyU9MtvXIwO9ByuEz\n8rdfagBm/aG+UMCMVFJIIwFU/MO1JJtjbKtyD0Pf2pFh8tHLfNI3IPp7UCGKTxvGwjpTEYKx\nyvfrTvMZvlJLHoajljKZOcjPSqQh09wqcohL54PpUCyARsoGGzksfWnYfkgdeOaOVjABzIO3\nrTsMXylRMt+8z3FMjYFh8uOwyae2772ML04pseGVxIcMvIx6UhCOoWT5QS3fPSmH5VbPHPFS\nybRCGD5LDjFRljtGDkjqDQA0SoVJUEjH3cUMpU+g9DT8FW4YNuHToajm+UnJO89c0gEE3zHH\nHtRwyfOCRnORSCTsBz/Omk7SRglcZ2jsaYgf72VP3uBmiZdpVQPz60N95dq+9NU7tzyNjnrT\nGLIERmJJ44yKgZiuOQw6ipDIWYrgEZzzSLIrRg45PT2oAbN0GOCeTTFYdj+lSnc+z+HIprsG\nbOcgccCkAi537QPxNJ5h2ydhjGaWRugzSeXjgYAPcmgCNdwiUFcgcAin7dvIbefQ0LmJSrZ+\nXkN60m0ZLk5ye1MfkG0MoJbaPTuaTbtkywwO1Ksa5BzyfWk27srycd6AI2YpJuPTpRymQeR1\nGKOHYl+OKE6Z6oeKBBH8znAwcfepG3KntmnJny8dwcUjc8A5ApiFye57fnTHcooLJls8Y7U4\nFt4GPlxzSSRhs7DjHPNIBnzSZyVA9aVVaPZzk0vlrGwyQysM8U1crnLZB/MUxiu5VjuXgmkf\nDbdxYHOcDvT+JG29cDIzTld9pcEFgPSmIhIR5GIyAfX+VIylcNjHpTsBlw2S3U0Y24yS46Ck\nwEMjNIQvzbhQcrHkD5umKQgRtnOPTFP3KsZIPzdaQxmNwXDfN6U7I8wjbuP86azn5Sq4DUfN\nGrBQAf4STTAaY924PwScinNgEEDJxyKTeGiG/JcckkcURsGGcYDcBqBjTtD459aRsxxkDvyC\nanRPLkJLAgDBxURBjYhjvHWkMRQWw7nAHXjrSsv7ssvyjNNaUPxkgelPWNm6n5fenckGkDKC\nBz60oZscrk9cetNTHzBQf9000K0YJdsqeKYhGLsybTtyc470pXMpyCADn2qJh+7wrYT1NKsZ\nhX5WLK3vSGSbo25XvSNt3Dc3B7Cl+7HtKf8A16HXauAMYFMAVj5gK529OaRpdu8MfmzkU7lt\nrA4Pdaax+YuPvdhigGJHIrA/Lz3NCAR5I4zQ2A3B/eMOlKnyjYw7daABScFSOeu6liUhSV5w\nMYoXMi7NwBprBt+1CRj0pCF5KZzg9DUbMqsAzEg8AAUrZOB36U9uUQHpjJoKGD5c7mJOMY9q\nGX5QEP1BpdvX5sdxTOdwyMj1piFRNzNu+VcUyOEYwz/NnhqkYmQ4C4XoR3ojiHOW4xgH0pBY\nj3EBgDnHrTm/eoAB83eiTG0BRkr3HehR1IyRnn1oE7hgMflHA4A96Vxt+8u5WGRTiuV2Z49R\nTtm5QF49z6UxEMjMId23aTxilDBlCsvB4pd2WOfuDoaDtkbOcpQMYZEWfYQS+Kc/zDA5p/Ea\nBAckn8abs8osAc0hjVGF9eO9KpHlAFec85oxvjHOKFbcSDQNgznO4DcBxxSf6tgQm4deDQFM\nanuCe1OVvL4H45pjsLueRsjaCKb8zKSwG7NG0DOeM9KOT904+tBI1kXBXBbv703YOCB8vTHv\nTvOCsEXr13UrZZicEg0AJtO7e3HamD5ZAVJJ/SnbgyBSCMGlDRsWOO/WgQ0t8rHcDnqKVGPy\nJnLHjiiPy3z6duKcyjBCngjGaQxHzuYcELxuqFZNw2kk89amjBUbMc4701bdFdix5xzQAbUE\nO1sg56npTgx7gKB0xTNy7Sp4wcA0ikoxz80QH3RTEOkxuCkdRxmo+UYgEYIxTvlkQkAq394n\n9Kbs3Nnp9aQD1Q4wTkgUvy+WC3zNTSSqhiMN0P0p0LJlue3emAm6PfnDHjj601+gOMsBzSsV\naJSMg5wR/WlWRVJkzwOP/r0ARxx/Lu3Ef7JqzGgHz8AdGzULFtwOdyk/w9qkjbfuVvu54oGR\nn922Ac5PpxSswDYchhUkw3oRgEdqiKFQAfl79OtABI+2NgwJGelI2flKkY4+WnrtRvm+cmo8\nLtPUNnigQYE0j5+6PzpvzKuFO/mpGUou4fMe+KTcYl3A5HbFIQKTjkcA8UrMV5bjvTFVtv3u\nrdKdI21GY9V6UAI029gEGB396cFYqAMbPamMu1kG3l+S39KdJGGBCNswelMYu5VUhRj+dMdm\n3Bgc46ilPbccEDORTflX5i/B56UCHsx8knb1PFN443Aj+VKV+fI4QjilCsyn95vZf4aBjWwG\nyB7DNOVl+6MZ6nFNZSzpn7gHel2BJ9+3AIxSAVpE+YCPftPNR/MEXCkDduI9BTwp2tgcFvzp\nu1pAzZKoTjFMBSrJu7o/bPShVBX1NKrFIyqnb2JYZo+8ozwgGMCgYjSHzV3feUYpWjC4PUMa\nUt8oUD8e9M2hSODigQqtiTheR+VK2fLYZwe9K0e6UEnGB2pCz5D4/dkY5oAGYMFx/CKRsiM7\nBkEURtu3DHOPwpHXcoA+UYOaBjEOWwW2npU0fyrtUcE03KhI1HzEdaWTKqxPQnqtACSIzSL5\na8N19qGLhiA21c8t3pVHbJz1zmm7maMjHOaAY4MrKyBePehGPBYYb19aUsyptP3hzmmRt3Y5\n3dKWoh+G83O4KBzx/KmiQrvOPvU6TAOV6/xfWmfMqljhi3G0dRTGPkYqF2lcnk0m4RSEgliR\n6Uf6uPacA+9IFO75SSe9AAc5YseG9KSSRvkK/Ljp70+MpuIYfKRSIgMZwwKA8GgQmHJO47dw\n7fypPLZQMMoIHJoXEmSSdxNIy/NtAyc5Oe9MZJklhxyRxSvjaQ556ACk+9weAfejbtXrjBpA\nN/eRkA+nan7vLbd0Xv7UZK4O0kZzStIEkLlSDjp/SgjUFXdEzJwCelG59o3cY9aUMvlrtGC2\nSV9KTKeX/eJON3/1qQDUVxkg5DdMDjNKF2vjzCvGSD7UpzvwDs9hQV3ORzn+8aYxMNtCowye\ncUvyopOwsw9+lIzs8y4XjoTSiRgW+TdwcAUDE3jdkruBpF2R8PktnikaZo0QBcHuvpS7mjc7\niGJP1oAccLnPPGackfGfUZpy7G427m9abkbTsOOcc0AIpyOAev3qkY7VyhG/sKiXuFbGO1LI\nUG1lb5/XNArDwGWTIwT1psn71WJXnsfQ0LkfM4Oe2KdtHzdsjvQMiT7o56Dn608R9C3THU1J\nGgUYQZBHXtQy/Mkb85POOlAiONWEm0kgU5GO0gjauamW3LSSAHB96hePa2WJzQAqZHJG7ng0\nkmJvlZjjOeDU8kaEbR3Gar+Wm3DBh6Gl1AXhgdoKD3pzAqV3Dk0m3cuWJbtjFBZWUKSQc4DU\ndR2Gqj/Mw5APSnLuVSfvDuBSrsjYjzG3D0GcmlUFotwO3B5HemAsGGUYOFz09adOG8sbhsXP\nAFOfO1FHyp1LCldh5ZP36QEany2AB5NDQfxKQy+lMbONxwv8zTogy/dG5G7HtTAGx8vyYTNL\nJMAuVj2rmkVX+ZCdoHNJG3y4PzDrigBXVo1UnBJNNk2yYKjBJ2mnMp288sentSwpi3Ic5xzw\nO9ACxrwMtypxx2p6rIr+hqNZBuA2/e5JFSvukUHGG6D0pBqIrO7kNlR70scScsTgf7XWkaM4\nB+8wGM0chQXHzdqEIcyiSQyR5UNxkmnxKVyM/L/d9aSNQ0e8NnnnPc1JDGzsMDPPNMB9u+I9\nxGMHG2nea27KHC+lJIrLuDcLmozINhKnaBQUSqodSSd4zyaXyckNk5/umo1kVo8oGA61K+9t\nuemMnFSIaI2KsS23ngU/fhgAMepFFsvynd80fbPrSMwZiqrgHrimA7zuvz7QP1p6jzIQM8Z6\nUJEjRjegz0DUm4qwVeKQErMY2VAuGH8FTQj94wbhSOTnpUHlmOTKyblbgD0NCttXLt8pPSmI\nm2s0WUIwpx9fepFXoQ2fX2qGMMrEk4T+EVJBt+YYOSetArirGNrZO4HsaQMqKOO9T7gseG/D\n1BqPy1kUY5OecUg1JVYyccjPA96sbSyIu4DaeWqvl2bKtsxxUm3eY8DjPJoAnwXl+ZlVOx9a\nWCFtzfN82eKYyheWXcM8H0qWNWLDapwec0gJoYnSYL29atLv2hWGe/vTcg7QcU9oWkO5ScqO\nnelcCaNVRsZG49cVK6pNCVx84PFRx26LIgJyTyTV2GNTNuQfJnmkUhVtFubAZQErwCRWBfaX\nIq55wvOFr0nw/pomZyVBjxmsLxBam2DFvlySQB9afMM80nt1t2mO3cuM5A6Gsm7jHzKzZzyT\nXSXG5Z5Y1QnzOAMd6pf2bJczRo8RDn+BRuPXoPU+1HMI5yRY4o9shyvY1BdRlotyHIr6G8E/\nsh/ED4gLBdfYLfRdKkAZbjUCASv+71BP8iPUV63pP/BOP7Rbt/afjCO3lPPl28GV/WncD4Tk\nhkk52cDutPSERyBhhnI+6TX3bdf8E2YUjBsPGSm57ebb/L9OK818Z/sC+P8AR9x037Frtvkl\nfsreXJ+Kt9KnmQz5VuQtw3D8r3/pUDbmU7HH3uTXfeMPg/4r8C3EsOq6BqFpswJHeEsgyM/e\nXNcSdPaPCsVVs8jPP5U1ICMKfL3E9+velQlpCScjp+NMWXZdFHO3jO0ipWwcMBxnkVVxEEi/\nOx242jqPWrKzgAnrIBgYqIsGZioyvSh4XiY8ZGe1UtQHsxLYQ4YjJWldtrAP8xPNN2lsN/Fn\n7w9Kk8kMrAvgsMq1IBjESKxU4I6cUkbeSu0nG7j3zT1B2ou7O3qcdaay/N93cT09qYxrqVXZ\njbz1pVEauw25wOfrTZMySI7ElcYIxUm4YYADOO1AiBVOMYBUnPNHlozBsDPtU+44T92Nrfxe\nlRzMWXaB8oPUd6AE+Vl5QsFOStWdM1SGztbmKK1L3DnCS/3arvG7BSvAyPl71IWCtK3R8dRQ\nMryyFQDIvz92x1NRlcSDBLt16Vce4khiRdgMefxqEriYtnDHr9KBEW/GSvU0TLLb5Zlyp6cd\n6mh8t0ZclCTgEilkVzC53kjOCDQIrx7trLwHz94075vkMnzjoAKTacfMAMc5p0knyiNeeMgm\ngOo57gsqqqgfNilmzeTMygRrEuSvvUasI2RgOOppGYeeSSTnt6igoWXdJiR2JHYU8gcHGTjm\npUZJm2khfRaiDGPdu52nP1oENuJD8oXgfyqTezTGQMMBcfU0xwZtrKmzPcmmndHIWKjgZIH8\n6AsTxtKsWXT5c8DvTvMR+HTbzndTPNMro7IxTHHvU9w0MluISDu3ZwR0/GgBkcgk81Wxjrtp\nkasOWf3C1JJNHc2vlBdkiH5WHGfxqFmaSTd90quNvagCeEiGNgSeTnaKuRW6KsZupCCTxGv9\napurRPEzL8+Mgilh1QrdbpU3J0oAtXF8l0DaxoIxHxu65/Gq7MiYDHBUdfepJLVpkdootsWd\n29ajkhNj5UjjzBIeMdqALNpGzLIH54+X0pk027C5Hy4PFJ5webex2Mwxt6CmyMiR5f5j935a\nQyaNGa8GxwFxnmgxbGG/O5jioI2Td825Hx+VK103zDdkYxnPNMCeZRDwnJ7ntRbxNGpUD/a/\nCq8RU4LN07Z61OxDYEL7s9d3akNFmDy4W653nIX1pkjIkbMg8sMeBUatuwrHayc7QOTUl3b/\nALyNJAyLszj/ABoGMj8yCJ5GUuA2N1PMvzDj5MZx6VAWubeFXkV/s5PDY4NMkmZmOzj1FBNz\nQjkXzBLGcjGOnFNmLM67eXY4A9apQ3EscfloML15qWG4ZY3J/wBZjA9qAJ1LQzZlXCe1OvBD\nOxdUYFuRVSG4KvhjuUjoT0NJ5km7YsoBHzf/AFqYEgU+X8/XOMCrbSKLV1KjcTWdJdZXOPm7\n0lndCaFoN22TJO6gbLUV06wfIPLAbAFF1erJMqOcj+LFVpLgQogYbjnAYVXWb94pJCu56kUg\nNaO4WFsbfM3/ACg+lVbmVrVmiVmZW5Ze1UvOnEg2jYwOcN3pskr7pN7ctzn39KYjRjuvMjUb\nPm6H3FW7y4sfKSK33NIvLsRj8Kw47zb8jAlgcA1agkVlmLnEvbFACNMY9jE5Vj071Fv8vepb\nCg/d+tQLL5Owyc7eNvrQzBmLOgOe+aAHySLwPmXtntTWY8qThcYIpJGLQqr84PAWkuJs4Urt\nHTPrSEEcO7DscHGNoPUU3zi0wB+UY24pZAGKLnAxyVpuMv0Ckd80xk8Ny8McqtCrKDgN3xUD\nSrNGGX+9zTlPzvsk56mkZS0ZKYIPWkBIwX5Sr7MGnMzQMof51YbhikWP5FfZkKPvUIq+WCwZ\nyvIPoKkBUYMSXwFxhT7mkEbzSFR97HzZ6U9Yg0h5OAcqMVKqKVAVT15FMCGOPdIfmG76dKd5\nJ8vEjbWDZ2r6VYmt5I/lZQsfU+p9qfbw7Y97pwfusT29KoRHKN0gXnONwbuR6U/7AY/32wnz\nOx6/SpbOzZpAXUN3DGr1wrzTId22NRlc9jSKSuVvJkZuVDcfd9KswwrNgspVAMH61btdPNy3\nyMWdu6jvXRab4RvJGjWJGldm+WNBlj7Y+tQ5BY5y20tQzAx5kUZLdjXXaD4Ru9S8hYIJJXlb\nCKoJY/h+dfR/wf8A2L/EPjDyLy+T+ytPZl3yXS7G5wSQDySP1r7O+GP7PvhD4VW8TWVml5qa\nj5ru4UE5I5wOw9qwlI0itD5S+DP7EOs+Ixb3/iCJtL00lZNsgzLIvHbtwe/5V9p+Bfhn4d+G\numJZaLpscAU5MzANIT6luv8ASuljdlVlX5VbqB/T0pDIWbnGPpUFpEguGkJ8w7h1weaDIV5x\nwai3LvJI4xTt26MZpMqw4SMV6U6NyEwOtRxseQTxTpj5fIORQBN0XOc0i/MOBUayLtxUkOFX\nGTQwHbip5FKgzyeM0zccnBzigNzkn8KQEkhOBzxSrjGM1EM8jOQaduKx8L81AyRTgYI5o4Gc\nGmLITg8A4704KGX0NIfQcGDHb1pVYEcimqq9jzSL8vB5zSJHqw59KkT7tR9eOKd16dqRQ4/d\n68in/ejzzUTLuUdjT1J4UdKBkm7pinFgAT3qLnnnpS7s4780wH7gpBpyNlj2FNVl24PNAXn2\nNMB64VqXBL5pvNAYo39KkkefmY9KbuYt6U1WG49qUNlemTQND93Oaf8Ae61Gsh7rS5yAV60D\nJB8rHPFKue/So+WAz+NO75zxQIf935s0qZ4Jpn3uO1O3flQMduBpeW5PSm7Qq7j+VOUECgQ7\ndub0oPvwaN4x70o+Y0DFZvl6U7PzZ6imc0rSBRgUgFb8qPutnFGd2M0fdySaNQAZOaGHy470\nreoGBTQ+eMUwFG5VximSwtNgBiuOTipFba2TyKduPODg0ARxxhVPWpdx2e9HKrgnNA9aBoZy\n3fpTueM0iRlQTnPNSbTt3A9KBAVHr0pVyVPrTdwPI4BpydMdaYx3LKecGkUlgOx96RgOmeaV\n0zjmiwx/OSc8UZ2jgUYO4D+Gjgj29qLAKckdcUjH5cd6AwPHbtTfvNwDRYBdtFG6iiwEHkDz\nCwOQfWpOD1pFXA5pq/Nz0xWYxS3Xjml+8uTTlj3dDQ23adp4qRETbeozRHGOopdx2+1OVhjm\nmhib/lx0obKqCDk+9L8pxkUnOeBmiwiHzPMkJXgU9d3OelHk+gpeeOaQBjaSTzmlDe3PakbJ\nHSlwciqQxm1ypwaVU+TBGMVIqjBzQyhR9aYhp+8BnHFBA64zS7SxGR2pi5XjrQArnaoJPems\ncDmlkYHHAzSfewMUAK3zYOOKdnoOtMKvwD8opygtk9KAHH5mBxj1qObDKcjj0peW70142bAF\nACq25MgYoWQYpApVto5pFj6g0wHsob3p27aMKKOAvAo3bskDFDF1AjoehprEqoHXJpcepyaT\nb70hjmYbcd6Zz2/GnZDHpQM84oAYHJOMcUjSDZ0yfQU7d0zxRtG7I4agRErHdkVL796TaFHS\nj7y45FDGG75eOaQleoFL3pQAKABWOcgimMG55yKXlT04peSCBxQAm0eXg1X2ssxwMCrS52ju\najZR5npTQDVc7sU9m24705AAOmaFAyTSF1I3jP0pWbawA5p4AkXHT3prYUbQM+9AmH3VzQCW\nHTFI2du40uN2CDxSAbtKt9aVgoX0p235uTxTdx5BGRTJI9uRzwKeD8uP4aPl24IpFbqMcUDE\nLD7vekZsLkmgqF5xzUTHzDg8UDH7xtFSRMQpx17UzaOwGcU5MEAjrQI8/wDjnC154Rj+UsY5\nMsw4IHrXyF4ggbzpSpCgcH3PrX2n8UrdrnwhdtjcqkNt+lfHXiZUDSSZ45XHrQB5rq0bzZ5y\nRWDfwny9rfrW/fErk5xnoKxr7LKOhwc1qjNnPXEe1iCcDtVW7Lx7EPRvStOfBkYEcdc1QmxI\nw55U5rRbmTM52EC4deD2FVJpEwSRk9RxVu+ZXbn73b0rOuDIqDjd9K0BFSRf3qkHafvVUuJJ\nJJNsjYHUcVfmhO7ceDjOKr3BEXvnnOM0ySKMIMlTuUjBz61ULp8/DK3TirjN8qgjCdfrVJo2\nZmJbv92qGMjjGwAcD6U2R/vAfMvTkU9mK9Bnt7Uit5mF4JH8K0rgRLIHUIMD3pjYWTcOT3NO\n2/u2ATDdaruwbIYYOKBCNcCPggnLUjv5HzIN+Typpsm5doK/KRgH3prqsyKQwJHBHvTAeypG\nOFyDzVOSQNuG45HIOOvtUzW5UlmYkoevtUbMzRo+Ms3pQFyA5bCscjr0qTzTypXeAM7vSl3B\nuiZPt1+tJtJfCtx12+tAiCNhI3ynaaGV2k27eep/xqRdoBwpCnjHvTWXbFjfz+tAiu8BEbBX\n/eZzg+lRmJuHTlumanVQyjcdx7U0lj8uGBHWn1GIz/u+Wxzj8aaVK53AbjSpGiqf3mW77qQ4\nZto5HXNMCTzEUeZgfKuAvbNQeYyknbxtz+NDbN2ACKcuI4wGbcT04oEReXlAwbLkZxQzPsOR\nu/2qXLK3BwBTY5l2Mm0jnJNAxqozhSeWFE0gV9yKQe9KMvIccADg9qdGxCkMeSOaYWI5mbgg\nYBpsrBQOQc09sY6Z7dahUY+UjPNACbsyCThmXqB6UsnykcfKeR6U4FI5HKggkYORxTTGVXaD\nuo1AJFXYjA4K1EGw3IO3rgipOCQjjkc7RSlhu4yOO9MCOT7uCMHsBTGV1YHqQMipJEGVxnPT\ndTWAyST93ikAm3dnLe+c9KWNNsbNndz+tEagqSF69DTVTcrDuBz9aGCBchhkZbvT/OPmZIwO\n+KY67wHZtuF7UmFyoPzAjNIRJuC54GDTHAVhuywHRQOKWTGRgYHemfLuzu+amMdgyMcJtwM+\n1V1ZmVio781ZjY+Z8pyO+aNqKrkfdo1BjNx+UBsk8UzyxHI2/JzxgGn7Q3JXBHSo1XcThyzZ\n5oEM24YgrgKeCakbO4Ar15oYt86tzznHrSq21Q332J6f0qhjGCndxhhwKUZVl2DHYimzH5/Q\n9T7UnGRtLNnnI7UgJGG5SScMpxTTtXnfkYzxRJt4KvuPQ7aNqL8gGe5agLdRm5mjIx75PpQz\nfd+XIPepNrNG+1sN2qIK0bLuO4DvQASY+ZSSFA7dqVWVo8A9P1pZd8JBA+9zTdxHLDP0oEOW\nMthRwOpJpqBYwQ3K54xTvO3OAAU/2jTtgXPzBx3b0oGMVmikIJGGpob5mC/P2zTs/wCxvHam\n+VtBA+RjzxS1AHUrjao468c0gdiOTgfypfLVNoyWY9Wo844bjdg8ECqAXcxkwFOcfe9aR/k2\nggjmneZtIYnJPekZZGkG4ZPXmkAjN5a5K5OeaZIGBxhQG4HNPYM2WdcIPSk3YlGBuVhwSKYW\nEVfL2ktkdPpSqu6FgTls8GnKvDHGOcCkYM2cDnoTQAyQmNlY9SMULhZMlvlxyDTmPygEgt2p\nGb94MdMckikAijcVYEBQaX5QzM3XtTZHMh2bcAc0ix7lGcsM/rQIdIUi27urdx2peVbcOB60\n2RV8xAy89adIN4PO3PY0gECrI2S2DTo490f3gAvc02TG+NQhx3Oaa8bLwDg7sgZqikCoHViv\nNJubapJ+VT0qR2OchQGxyBTDncMck87fSgBzNl9wGWJ+76Uxm25VV57UFgjE7sP3pDiQfIeQ\nKBXFjb9590swXnFDOOf4SeTTi5jIJB+7yV61EsZkAyME9KBj8gkBTnipHVlXDcfSmModwGOw\njtSlwx+bPynikIbvAIUDH16U5gGlCqB0oVvN3b+vpUa43fNkEd6BDtoLF89DimNhWHPXrTlO\n7OTleopir+8YHkZoAduCsV6UhY7T60rMrMBj8aOJGOeO2aAYgYleBj+tLuHc5bsKXyxsG1vm\nHY0MgVc8E9SKQCKAzEF8N/KnLMpYL2A9KhjjJyAOScj1p3K5x+dUAcR5A5/Dmmqz7gxbco/h\nFSK0hUlflOOc+lI21QoU89c0DEM29iCnHU01tqrhf4ugpPM2ngbh71J5YkUGRtqqcjFArjPM\nJk2gbQB93FPjy0TbgAvoaRFMmWzgZ49adtDtuY4wMZoKGiYyoAikMvrTN2WYFTyM0pLDIJ3H\n1pMGNMdO+aCWIWVlx09qVWADHpgdaTcNobGTnHSlOWZkbB46CgQkLGReUwp6bqf5a4A+8M8G\nmbi+xcEgdhT1JaRd2MMKQDZwckY49aRpccgZwPTiiQKxbLEgcYx3pyxvkZHCnIPamMFY8bu4\n7fypGxu2kckcUjfNJIzHafSl3bdhxu4pARqxGAhwc4I7YpyykvnPy9KeuAzEL8uOTUXlgr6e\nlMCVlVFODl+u2nJI0mPlyMZAqHA3AAEtjkmnbmYjap2AY5pAJt+Xcpyc0m6TPOdpGc0jbtp+\nbDdie9L9yFQo/E9KA1EyFwCx5pT3A4FOdjIgBwAOBxzmmwhm3HoO+aLCHcjaJPujv60Mx+7g\nlGOcGlCibp34FDb2zg4K8UykRmYKgBBVevNJjnkctzmnSL53y/eIGcUrMGQbRlcYzQIUZJdM\nHGOCaYCfKZCmeOtO2svAJxSLu8sgZ+Xk0CuOZl8uJVBDd6YpKM4jOHPeiOQbQx5NCqMnJwx5\nzS1FqOzvUKx2nPNPUltwIyMfKTSEbV+8N3Y0kofAKnefSmUJCroQC24/3aZuPmMOgY9Kf5mx\ngQCuetOZUaYkrjjIFBVhpVguWb5ugPrTeI0+ReM8inFgo2r07+1NmUM3B+UD86QhfNVWAOef\nbpSBRF1bIJpd7ZTPyL6GkK+W2f4qNREjfu/m2k46VC24bd7ZHUKKk+ZImUcoaT5VCc8Y6UAK\n2x3Izjjt0pRGZPlyFPXBpgZW3BTkdcUND5rZwFzyGoAVv3hGEwelJu8s7lPsFNObMi8DaR3p\nny/Nk54zQAuCykr8vqvfNOib5D3k9MUwq37sg44/KmI+2Yklj/tYpAPVsHdISCO4pxUY3phu\neDSN8uOQ59QKVdxI+UKAc4qgBlbarEbAwyR3pYVVW3OME/yprMZDlj904HNLIfU4/wBqgBZP\nlhIHOW4PemPG642v94YyKXI8vkHJPWnNHuX5ThcYoAWPkA8ZHykHv71HHHtBUcDOcU4t+5AC\n5xxupWfBVyMDGKAGhiWI3DOKUKNr/Mc9Caa205AX5vWlJRiSr7aBirGk8gZRuCj8zS4OMMOT\nSYIjG07Wz971p0ig/IzZc9GFAhQzBd0bfMvFL95QO3X8abs28A5OORSMFVOPk9/WgQu0+WTx\nnsT3oXC8ufnIxRuz5YIzzyKVgv2jPTJwBQAm07Ad3A6etDBfk2kh855PFP5ZmBAG2mMhLDjI\n6igYu5zuQ8tnOR/KnrIv3VDAnr7U3cyruwFPc0bpSCcfKe+MUtQItrSZ3HamevrSqpV0YA7e\ntJNwqEklc9PSpVYhQDwvrTGKruqsVAGe1N+ZGQ/fHcd6BIyyE44IwKBu2lSdvH3hS6kixrGy\nyPtII9KcqYjOApGOM+tIVwyxo+SRncKGULGVOS+P1pjHsPlTap39CDSbtv3hvA9/0po3llIP\nOBmosOu/cTweB3JpEli3dmkZRhUbkr6Uu7zOAMBelQLIdw3KVOOWqVgpxwQ3bFMsk8zdgswD\n9+aUtHuD+3eoGUHJIyfapI2+Ujb7UAG5mUuu09s5pqseN6bV96GjO0Jng8k03aGm+YEcY56U\nhEsLbcn7yk8UgfB2kDaeRTclmEanYvvTshZsdV/vGiwEUmFb7pB/vVJhtwKtnI/OkO1RuGWO\neM9Ka0SFtzttPajUCVm8sAKCy9waRpBt2qOPakwFYDO5j29KXaigEnHOBtpgLIh8xdvIx+VM\n5eTBJwvPFK0YOVZzjr70nyxru5xQBJ8z5CgljQOyjIZeopu0tht+0YzxT2BjkQE/LjO6kDBH\nMbEbSSe5py5VzHINu4Zz2puxjkbsnOalOXDZXhRnJ70xPQjD+UnABbtUi8ck7ietDKqyKEGd\nwycdqR4xt2rndnrQMfkxnaT8p9elI25oSOdg6UNCfLGTu281IwDfPgscUhg65YFf4e3TNSLK\nWPmKNuP4aib1IzmnwkHodoA6GmBZkG6NWLAg84qIxooJA5JpFk2RhgPlJ4prSMVLMMHP5Uho\nmZuBkAZpI1cTY3ZX19KhWY7ySu4AfSpI93304JFMGKpCs24554YdKkmieRg0YCMAOfWm/PIF\nG3jvUm0xnLnBHApEkkcYC4LdDk0/aHjbopPRqiIKKCWznr6U/jg5+XrmkAtuhAG4hiO9OhYK\nzq4yGORmmR/MS/vT1j3SByMY6570ASqRM4UEDbUsUbLLg4PPFRqI0nGB1p0inzGPII6GmIdP\n5cU5RzuYjNEOyNQC3zZ5FJHGJcb8bscE05YU3bgOV6ntUsCQyK2Tgq3binqz+VtX5j16UKfl\n34+vtUvmsq7guPQ1XmIbAw3fKpfP8NX4YmaMqOo5C1TtV8ndLyD6VZSQ7yQ2R7UgJ1WNYlTY\nTIzdD1qwrbS5B2so+6epqONRMVJ+UdmqVbM4y+en50gBoTJlw2D3HpWzpyrGqR/LISQTj0rH\nYJ8oRst/dra0uAR24Ykbw3X2pPYaO68OwBlYqdoI4rlfH0LNfRrwDtOOw613fhG0DQzzgZRB\ngj+tcp4thk1DUMxIWZjtVeO9ZFHH6L4Xv/E2rWumaVaSXOo3DbERPfqSewx37V9wfBn9mfQP\nhzb2+o6jHHq3iTGXuXGY4v8AZRTxxxycnv3qb9nv4M2/w+8Prql3H/xPr9A8hcDMSHBCj06V\n695nlrtzk+tMCxwoXA27Rgbew9B7U9HVfpVVZCy460u7C4oGWWmUtzyKf5ir0JU+xql5nbHN\nIJM9TzU9QRdmht9QiMFxBFPH3WZdw+nNeP8AxF/ZF+HHj9JHl0hNKv5GyLyywnP0PFeqCQr9\nakFwzLyT6UPQZ8IePP8Agnbr8dxO/hXWLTUrPaSkN18kv0z614L4s/Zn+Inw+ONU8N3Zh6ed\nboZU/Eiv1wSQKB83zeuakZg8JR9ro3VX5B/A1pzCPxCutKmtJGimjNvJ12SAofryKhWKXLrI\nee3Ffsh4s+CfgL4gREa54ZsbknrJHGI3/wC+gQRXhXjT/gnz4L1TzptC1S50Ys25I5cSIn6Z\npqQj85GV7by4lO4Mf8ipEjdshgAvf2r6b8bfsJ/EDwvBcXdnHba3aRtki2fEpX+9tPavCPEH\ngHX/AA2/l3+jXtmB0MsZAx69OlXcRy24xyZAO0cfWnvHuwwbbUzxllXZgsTn+dM2jYwYYyeF\nPf3p3GVXBwqDJwc0qylg0ezHcY61PIyxq2V+bsahZlG1hlW70AKrMzOj8hfSm5PmBcfd6gU5\ncRwuSPMLNk/SlZlZ1AXZkcetMCPb5245wfWnQxBmXAzu4NKoDKzKNgHUmpFYlsKPekIgkT5i\n6nLZ4Ham8tICx3E9D71LJGOFJyOtRf6tmQnLHo3pTAVsuzL5eCo9anjVBbgbszYyy+1MXO9T\ntyPr1p3yKu7OGPQUgIRt8zjr6Y60LFFHuY/MxBwf6VKkBSRjjlhjPtSRxrHIsezcR39KNQK/\nJUYwFHJprb4WwRvVudwHFWJIwZeeNpya1RaxXWgvKYtku7j6UxmG8R8xVznHO5e1DZjDDG7v\nz1qxHbBVYuSCDxjtUe0eYXHzE/LuoERKzN82OMcVJa4t8OV37sgoKSS3bzBjgr1z0NPklJAC\nHDDqMUhDpcMikPjuE9PaoFldmJXl89O1DTNwAueKVdrSpHGGZmPUDkGmIPn5ZkO3+L0pVDKV\n8sfM5xtNPEYbzgZCVAwQfWoFaSNVZF3EjAbPINBRPD+8kyxY4HH+FQKXKnA4znOeRT1hkVNw\nY5zz7GhkRZCjNknk0DCGe4aNo1ZwOenSp5biaWGKN2GFpgbyoyEchQKrRTtJMxdd3GAD0+tA\njRhxcWsz71MaLgE9z7VW3AW6oScdcUxocEnquPvA8Zpsf3t7Dd3pdQJ1kKyja/mgjB3cGk3O\nrEOoI6DBqSaNLpVwMS9d3T8KhkgXadwIGccUxhGTIdoPl88mn+YsP7oIQWONxNROpWONSh45\nxT9yRqJM7mJxg0DRYt7p7WYBmDcZDdcU59Tmu3LnJk6biM1UWVV3ADJ9DTlZ2VUA+ZvwxQI0\n7vX3m09IJlBCHgRjrWdFcFsnG1ifXNNV1to2BOZQSCtJdRyWS2++Pyt/I9OaQFmcuPlB+Vuo\nqNpZRIAONo5HtTBI7ZkmTYOgemSK6E+am/cMbVNMCf8AduuUb5gck0x2YqS2FHUH1qOIeXhV\n59R34pdylMbsknhT1FAEjlXi3Lk56mmq6sPLjC7z/FmmzMyIMRkc4Pt701YxtG8AJnIPQfnQ\nBIyyBWVmDbTkAVHJcmXYF+UevpQygqcN5a0zy+Rg4KjPPekImkuDIytI+WQYDCoo13KwKkhj\nncelNkdPMwRypwyirNuxZQ+N6YyU9KYDWUSOd2FQ9KdNIG3LF0xzUcRxIcLvQ8j2p10ybkWB\nSrt97Pp60gGNGZIyVBbbxg1GG3bVxwpyakkbYv7tztYgGnsu2bcRtPt3pANdjwY8kH0HSlkZ\n5lUcHHOKXcZN4BKv6CnGF22gkAMPSgBsFuu0s5GQfu5pFjZm6b+c4FTNCuSpGD1BFPWPy13K\ndmeDTKsV9ixyMCNu4Y+lSrb7U8v7zqMqVqVMR5LHevTFSKqOCM4bHGKQhsMLIwDZyOo9akVQ\nrsSMg9u/0oijlDAZyT932qzaxh13P8pDfMPWi4ytHIFaU4Ddx7VLDDiQYAz19qna3ij3nHDn\nCj0q1b2yM37tN394CjQChNbllyWbfnofSrkNq8JCxDzV9Hq9Z6fNcyBNuA5wAfSuq0fwjf6n\nIsFlYzSszBFVEySc4H5nFS5FI5m1snkYBSNvpjqfSt/w/wCDbjUp3WK2aSTIyH4xnHQV9RfC\n79iPxJrUyXGvINKgXBPm4Dgd+M9f8K+uvh78AfBnw4t1Njpsd5d4GZ7pQ5yO/PTOKycikfGH\nwn/Y68T+LvKup7b+zLDcd01wNowPb+X0r7G+Gf7NPg/4eRRXD2kOraopyLu4iB25HIC9OvNe\nqpLtQrgKmPurwB+FMeQnJHNZyZVuo5JjCwUYAXpgU7zP7xqHr1pNwLZPQUFE3nYIHc8Cn8+9\nQr/eB57U9pixGTUsYqse45py/Ngd+1MZjnINCvtI45pDTJwdoweuaWTCsB1qFW+bH86lyNvP\nNAhWG3BAyKkVj1AqFmBCjJFKzbVBGcUxkqyc5JprfN8wHHrQgXBLd6FBVMZ4zSAVR0x0qRsq\nxPtimr8vPalxx1zQIVo9yqe9KTtfap5xTVyw9MUn3WB/WgCRWAbpTlUgk1Ex+b8amUjaxzSA\nVVOc9qcrAfSo942inYC4HU9aRSHMwaTpRHlTknikXs3UGnKoYk0hEijqaBnikjzgcZpW9c0A\nPBG4cfWl37mHaoy3zZ/lSg59qYDnk+bk4ApA2RmmlN3B59acVGBzxSEIuGbNSqO4ORTGUbcA\n/NRgq3tQUiQkjBFLyuT2prfKcZ4NK3MY9RQMcGKqSec9qVXK53flRH97PWms25uR1oAlVtx4\npc7Vx1NQ5KtjqKkVjg8c0CJV7E045/wpm1mXkilzhsZoGKDt60BuaM/N0pq534xQBLGD3NLt\nApmX8zoKcrfMc80wA544py45yfpTecnJprD5gM0AS7ztx19qFPqKa4xyKaGLZ5oAkGeaPuik\nyfxxxQuduKAJM5XijcNuMc0g+VR604HtigBpbb7UJI33cfLQfm70hfAxSAeqjAFOjX5iB0pi\n/Ng+lSKdynHBNMAZxxxzTlVjzSbdzjb6UrSHGNtAx2TjmjI28UY6dqXdgEY6U2MFx3FIpDZU\nHBoYbmU57U3yfmzmkBLj2FFN49aKofL5lYudpwKdn93xQw+XHSmDJ+grC1g6CrII1PqaSPar\nEH60DBBOM0u4beRRuIVW3dsCl+UqTTW/1IOec0KjYHHFIYuGzntSox28fjTsnkdqbGDtOeAa\npDsLu6UEbee1I+QvAzS/ewD6ZosITdyMDFKzYIoLgkVCzfPigRKynv0pp3Y607cWXHWlb7nv\nTAjkdmUY4NLjjrzQVPFN2Fc4IoAduVscc0LlT14oj+5tHWlVgc45oAFbnJ6CkZuxoA3Dnio9\nrFuucUgJSuO9Gfn9aRsMobvQV70wF3bOaaeRweTTWIbgcinKvGQeaAFAMa8nNEmFjUUu7g5p\nrYkUD0oAWPBU+tNOVPXNO/g4qNvu9aAJGJ28DFG49jRztCnpSMB68UAMcFtp70bjuJ7Uucde\naaFIGTQAq/MvPelyd3tTVA3Zzin59s0AN3BeT0peCeaRvm6CjaW5zQA5QfXiheOaQnbH60K3\ny4AxQIM7nwKXhh64oX5s44NM2456ZpXAf0UAUMMZNIpJXGKTJ3e1AD9vyAHvURBj4J4p3mA9\n8UknbnmqGvMN3Yg4NN3bV2jpmlYtsxijjjNSITJ4xzSSZXgVJxzgYNIMcdz3pisMwevSm+Yd\n2O1KSd2D09KTj7w/KjcLAzZ5AzTdwbtg05etNLAUALjjjg+tI25cY/GndFyeQaE+VQvc0CuV\nNftRfaFe27LuRomOPcDNfEfjC3ImuGA43cCvuK6jb7LNk/JsbcfQYr42+IEC2+pXgzgeace4\noEeO6hZ7iwz8w7GuevoXyewror6OZryUpkq2a568L+bg9OnNaohmPIdzFGyAapXUaooA+7V6\n8O6QjOPeqU+yW3IT5iD1rRE2MS62SE7xuGeQOtRKkathW+XtntVq4hjRunzHvmq01sFj3Fdq\n+uasmxWm++3I29DmqLyMuSowOw9KuzRxs25RkcfnUFxHuwNuGpklOTErjdxUMq/vGUkHA+8O\n9PuMsQAnQ4JqKNicsMbV60xDJIgoGD8v90etQNmPGwbXzU8hbcDgDHI96ikwsYc/NKT0Pan1\nGNmy8ZKtTXjABIPakWP5jj5cDPPemSOduPmB68UEkLKMYaT8KZ5e2YRqPmxk0/hSGb5sjNM3\nbm3ZJY/nQMGZnygHOctmmE87YxwO9IQFLHO4jk0nPkLhNpY8GmSHmGEFox83qah3F1DuNjZ5\n296c6lUAIy3Umo13dhkH1oAdHMFY4Py+lRswZj8uR61Iw9vyqHy13AH5c9KAI38xZA6DjAzT\nHAWQ5c5bvVldyrszyTjdUVwuYlUHJVvvUDsRyJ5kI2r908n1pvLKWA2r6g1YSJV+bPJ9+KhY\nbG5YdelCERyOuUAB/wB4Ujbo+N3OchSKcGBZtx+Wom3thz9BVAOZvmIzjuabvZVKsoPp9KeM\ntHlRnaec9ah+9knIJpjJ5ISVJBwg7HrUbL8wDc8U6Ft7FMEh+MVG8Yj3ICTt6mkBGriNWTGT\n2p8bbV3cfL1z1qOUcgA4I70Mob5hk/SqAWRwxBQ/Kxzk0KxDdflHQ02Rlwp2/L3FKyZUIQRu\n5FIBpDs+/blemacrKzEMCABxmljYqxRcqvWnqfOYMSGp6jK8gLKQpAFNz0I+6OCKttYpOrBG\nJKnOFqOSz24JJxSEQ7RuIViBjimtGVQ44PU1JtBX+IsO1JuDNlgQQPvUgG+XtjWRTn1BpPlO\n1xw+acWMmMDGRUasu5TngUxA0rbmwPr601vmk+YbRjNPwdxJ4Gfxpq5kwCdpDcbu4p6gN46F\nSq9cGpM+YyknIYU5nLIC/wA5pn3fmHyHGBSuwFAk/iXkcfSo3VkjUrwQefepJFZkXa20nqKY\nsbRsCG3expAEkocqxJPP3cUrLujJHDZ5NLu2ZGwbs0xt7YO392femAjZkyM446Uka7ehyuOo\npflUgsee9AXYrKDwfmFIAhZV4xj8OtN3bsqrbfanc+XuGDjmmuo8vzAclqdwHLu25bheg96Q\nkKoXueaFUbcAE7eaOCu/bls96oYsjbpOcnAwKYc+XnPSnt8zYK89eKQbVDY79RQBH5jBlI5F\nMjT92/zZYtkip9g2kMuF71EpIwy8AHp7UtQH7vLG5T2qOTeRkHJxS4G7DnAYZxTY2ZIyVJO3\n1pjYuPlUl8H+7TlmJVs8bfSmLgYd/mB7VIdkayBOg5INIki+Vgu5SueRT2G3GXz2poJkXdnl\nfTpSqrR/MSGz60DDnIIY8dscU7zGY5JCgdhSr+8kXHGAc1GoLFmGFPejUB8m1ozz74FB/gLD\n5cU1tqnJXcfanR48wk8Jt5WmIifiQY+91p0bMwJHzKeoNK0fy5x1HBph/drgAj1pDYF/LwcZ\nPenSMpbPKjrgUTScCMAbCOPWmfMUQ9SvG0UxDvoMv156Um0swJGR3zTs/KTn5jx9KUL/AAtJ\ntIFAwX92DhSx6Uzy1YZbJI70sjYwCDkdxTZGClSjErnkGpuxkq9u9Mk+8dvykLndThv3Myna\nVBOD3qJZGkbdghcZK0aiGr5aqpOWc8mnMqqu4Nlifu0u8NkABT2HpRtbjKA88k0CE37G3bsc\ndDQG87luc+lOZd+MFWAPIoaPZJlV4zzQAELyAecd6RW3Lx0HU0MHMjHHy44PrTFzkbvlHr2F\nMY7HmYK9R3pQ25d/Vc4NLJ8zZBwPamrGW4U4XPSmArLtyAevQVFGQshz0NTMiqc5yPWoZFO4\nEHCsaQh+3cCFI455pvmFo1ULz1p7Dy5AucZpWAViBwzdKAYxiRIpPcUm3BJDc559aVUKLuZt\nwHej7o8zHzH+Ec/jQx7CrlGzndnims22MRDpnIpWIOH6J0FH8IXHHdqEIeyvL0IUY596YU2q\nCOPUGg/6wbScnjDU3nd85OaYCS4xnopokc+SeOfeiTeU4Gcc49aRVXy237tzDIHpQARszMA/\nHFAYHK5yT/DT2ZdqgOGZT1o3Mkm7jB4yBQBAsiqSGHtxTgp3HcMZHHpT+OVdc45FI258bj8h\n7UAIAf4iF284qLOwMQTk85qeRckFD93rn0pix7n69RkCgEC5wGxj3p5mMhXavCjpRu2hR1bO\nNtMaPfnnY4OTQFwPyQs20qWOetJHMW+85w3b0pCrNGADwT96nsvkuDgEY7d6XUByokgZtp3D\ngNUG1lYYJznmpDvYgK2zPJFLuJ3bc7emaYDt2eOArcU2T5uQflHGKNoSEOy98fWlUiZ8fdA7\n0AHO5ecMKaxMjHcxBx+dCjezKCQe3vS/Nglxg9OaQhkqqZDngYxmnL03bSyYxilG1uRwepHe\nkxu3FSV5GRRYYMwdEB5Bpyur7QBhCMhRTJF54cBVOMdz70jIscy7G+ZulFwHqwLfIcAZpGZm\nQlVySOTQVC5UHnODTmwr7Ady4pjIo/f0p2AuN2cZzSjEZPvxSzsZFBJwR0xQIYWbdnOVzmmM\nzjLAZDGpPJHytnpyRSqy+YBjqTigVhq/dyFwPpSD96xbOxR/FiljaXcwIBGM8dqWFdqsCOeo\n560gsIwG35RuY96f88aBCOf71JG/7kEjJzzxyKX7yE7sgc80xjY+VY5winpTmaSRgMbt3ehQ\nxk2jBG3caPMHGcgtxS1ACpXGwgnOAaYrAxvu+eQHHFPJAVvL/wBXjBz602HKLkfKv900agJu\n3SASA9KGwoIPUjr6UrBlbaTgGm7D8wxuJ60wGP8Au/LO7K5xUkmM8c+tCKrLu25HQY6Umx2c\nNuAOMY9KQ0KU2ooHHPWlAfaMrlQeopJVPykdR1NKMEghmfJ+6KQhGPzEsc9gq0M67mAX5l+U\ng07ay7vmC+2KZuZs4X738XemAIeit2NPkDeZt24X17UDDA5GG6fWmEbf3almPU59PSgY9flY\nHZhc4/8Ar0yQqzFVUtg9KWSZ4VH8Weg9KVWDZdRhsUxDNoiDemOKcuJI443XaOpanKw2jK4U\n9SafJhVAPIHIoAiYO0OUHy579aRZB9/BAAwKk2qy7hleMDnpUTx7m+b5gozkUAO+XjALHuM/\nrQEHkl85HvSbSyBduBnJPrTmRzk8HPAoAFYblI/Kh1HmAKvJPLUeXu5C4IXBowxVccgdaAHM\nvygnk46ZpVfeikJj3zSbVjjy5x6DHWo/NYNgKNvpSGSJIYy3GN3HNIzfKiLw2etOfdJt+XGR\n60H5greWRjjpTEKqiNd54ydoX096acK3B3Y6fWmsVkbG45zg0hUrIqp869TmgB0Py7mJJfuD\nTnbzOCDGoPDU1shywGB2FNM0kjAMdozyKXUBxzuznHtQZBuBIJB/SnMpjzkYXPGOaRZtyk4G\nfX0pi1BVV+pyucCmOxV8rxxinKo3BhgY6Z6GkKFZFZuV7YoDUeu3aFHUjkVJDGy4Y4J9KiVV\n+Z2BHPBo2o2DubjvSGLJ5fO1WVs55pRIG+ZRz70351UDqCetHGGyDgHrTAXaZFEg559adJjo\nDz1NKuNwZVyntSDa3zOeCeFoAEZmxtIx0DdKNr7SMbVU8k0bxJu4x706FTNFuUZI9aAGiRTj\n5gGz1p+4fN820g5pfLEkavtUHr0pNoZQc53UhjIf3mQTjP8AnNOX7zBm3Z4B9KPm8sgHD56e\ntO8sbVbKhx1piGJGVbJbfninLu8sqGAHc0i4mU7vlJPIpvCsFXKLjGaQDo5AzYIymOaNyyfw\n/IDUW0xx7UHy5xtqSIHB9u3rTAFKsd3Q561JuUSZxgngDt9aYsaR53HOedtIZC0ygY3AdPag\nLk0a4Yk884NRy/LuIGRnbUiNwGGNueeaWRgFyTgdOP50BcRlKttznb3pdxbhmz70hZVyu/nq\nD60kjdDnOO3pQMUfu3JB3Ailibch3A7h2oV9yZA3D+I+lNaQ8bW9cYpE7ixzGMHCkjpup3mb\nRgnb3z60ecODt4UU1flxvTcTyKZRIvzRKwODnlal3BWO043Co0fbIoA4JwaftG8dFJ6elAhu\n1pAqhfkzT/mKsB90dM0qrJGMbacWK/Ofud1xQA7zD5Q2jHt15oLO2C3JzRHIZTnhT2+lPyPM\n+bg9qBjVYtIcjIzwac+Rjcfm9FFELnb93Ge1KoOS3egBY5eoUcdzUwUSY2/McfnUSsVZduD6\ninKCu9s5GelSIeuUhLyEA559qVPmHI3qf4hTPLyMA8dajVVzgbi2fuikImVv4c9KkVxIwQnj\nHBpjQ7lCj5mzz7VLEgYsGHAHGKYXHJH5i7s4CnrU1xKu1dvzA8fjUS7I1Gxdw69aGm8sjMJU\nnnFMZKqmSMvxheuKfI2Y1w3yMeRSQuu188AngVIqxsu0nFG4h0Odz7l4AwDU8sbBYz/Djmmx\nIVzGTnPJpfmztYfL2pEjlAbcrHbkcGrMMaAqGPUdqhhXfhccf3qnhUBsAbvSkMuwqPORSQFX\n0p7SM0jHf8p+XFMZVVQV5xyc1JGsdw2c7GIzikFhRGEZcDKj863rHy5WXjDY596xSV8zywdz\nEda0NJY7lQDezHaGqRnrHhWI2fhqeRzgsSB9AK7H9n3wF/wlHjB9TvoRNYWBDENkbn/hH0zz\n+FYNnp/l+H7eNfmdwMjHc9q+mvhZ4bTwr4KsrMKqzSL5spA/iOeM/TFZlHYNJnI9+R2B7imt\njrUa/e65pz+mcUwCNvm61IG+cYOar8qT3pY92cnikMsH73XrTWXnp+NMjU8nPNSqwZTmmMYw\nfcCG4PWnr8uajbBwM0vVjk8UmBIJAy/1pVY+pqNPTPHepNwBAxSJJo5scA4707f5mC3Jznmq\n+75dy85pFfLDk8daB6mgs/lqAHPuASM1DqWl6br1t5GpWNveQ46Sxhj9c1BuHWlW4Ibbzind\njPJvGX7H3wz8bGWRNKl0q7kOftFnJsx68dOa8F8ff8E5bqPzZvC/iCK5i5eOC9BVv93Prnv3\nr7Z8z5SBUizHb8zYz+dVzAflF4z/AGX/AIg+A5JJb3w3d3NuBg3ECeYOntXmN1oM1iz291BJ\nbSKSCJEYYP5V+1yTYDKTlCMFT0P4Vyvij4U+DPGUZj1nw7Y3oYYJMKhuevP5flRzAfjidLmg\nQ5IBXrxkVC1uZFP7vDDnI7V+jfjP9gPwhqzSy+H9TutDdufKYiSMcjgDr6968R8WfsE+O9DW\nWbRruz1y3AJHz7HbnpjnmrUgPlGONkVgw4PO2km5ZcLtbpXoniz4M+L/AAfhNX8P39sevmeU\nWH0yK4qWzlZiH+VkPKsMGnzEmfub7pUdcZ70SRJsKMRuPRqnntzDInBQk/ebpUE0bW+9/L85\nWOAR/OnzARqrwsF289FNSKjMu0Ju9yKsNuXaT8w24zjvTFifaVzh+vX86rcZXaZgxWPhQMMP\nSnMo2o7sVk/hp0gCybWIyw/Oo41y7l/mKr8relMB0m3DAruduDntWxIy/wBktDu+UKMkVkw4\nhj+7n196GV2cor4ifg+1IRGrbZWCuduOGIp8hDMqcnjJIFLJCwZQoBxx9aN2392Djplv6UxE\ncltvmPzk5GVpnkyEbQuHP8WKnVdsz57DimNIWyjvs4yCvWgCNg20LggHjOO9TWeIpxOwA2cF\ngKlt7eWdtiEFl5w3pT/JMTEuRtbrTEzPlVZLiV93Eh/OoNrw5XJ9varzIQxcrkL0460nms/L\nRgL6nv7UDM75jNjJ2t1H9amYfuiir7lieTWstrYxpGW3SSHHTtntST2dqssjdEAGFNIDIVWR\nFGQQx+6actowBJ55zWnNb2900Z8poxgAHP60l5ppiuiC+1FGcg8H2oApRW8rSbF+ZX/AVBa+\nXhioIPOD61omzWOEhJDk849KqJbBmKAbWAzu/pSAYf3q9QOfWpVvGhx8uCDxmmW9usbkn7nQ\n5NSTRmNtu9ZUxlSBQMiVpMMS/JPLGmsytDtf5Ax4IHOaWaFVQBX3SH5uOgp8gdkU7l5496YE\nZUwsqHlwM47mlmYptZj1/u/yp259vXcVHII/rUK/dBI2pndz60tQJVk2sJHCtIhyq4/SpLrU\nLrUyrTr/AKv7oAwBUPleczY5GN271oSZ2U54wOo7UAK294trHevue/pRJhcBAW2nn/ChlaOH\nCj7zYJ6nFO2hc46jtmgQjXCKpj8oDB5OeaiBKyAKp29QamkiMbfMoYNzx1HtU20RqRjHORTA\ng8+RZCy5Jxznv7VG0gkZie3TPQfhVpShcmQbFxngUsccTM2F4zy2KBkSs5jAaME9mNRNGZOX\nPHQ5q7NHG6A7upADUktuskhCrnjgNSEVoI1yV2bm6nNTBSzKqDaAMH0xVi3WOKHzH6Y28etD\nYjjCBdwPWgZE6xxI8gOI15DL60slu8gIWPIVct9aURDzMYCj+92H4VJHJIrusfQj7xpjK8ML\n+XllDDOAvSnRq/mFD8xVsZP8qm8vzpNrtg9cVMV+YuQODy1IRVt0eOSRDjzGqQxNt2k4bpuq\nSNXaR3kAHofSpYocoSzDGOD60CIBCfMwSCQOGPenRxt8qSfMd2TirENq23ZIw55BHIzUy25R\ngCMhfm+ppXQ0itJGuC23KAZpVhSaHeQIzHyff2q7b2P2pwcYjx16Vqt4bkuLSN1iYEHjA+9U\nuRVjAgR9xXaxy3p3q9YwtIGUpg7uM+td74f8A32tSRQafp9xqV1J1S3jYndj6V7l8P8A9iPx\nd4nWO51KFdIikGd1wcMPbHFZ8wWPmLT9JubhJGWHcpOMtwM16P4B+CPiDxi/laXp81xlh+8C\nbV59WPHWvvTwD+x/4H8GRxS6oJNbv1QKzSHagYc8AV7HY6bp+h26wadaRWkQ4Eca4AqW7lHy\nX8Pf2BJLfybjxTqexThjaW6gsB3BOfWvp7wX8L/C/gO1SPSNKt43QbfNcb3OOevrmuma4aTj\nk5oZxu96VykTpcO2Tn25qVXAPXqc1VQhuSOPWhT261Iy3u3ZJpiyZOOgqGN9uc845NSqwYBg\nQVNIpEisSx+lICGXOKTowNOPDn0PagsFbv0FICZGwO1NZl3YHSnqcHI4qSRzfulwxy3tT4WD\n8ZzUUbHkld1PDcfKm0UxEsjFcnGWpP4M5564pFYZ3d6VcHO4YNAIf5gZMkc09mPbrjp2qMKF\nYc5FKF+Yk/WkUSSSeYoOMdqf948dAKh3h8dhT/M3KVHHvQO5Ju2xjP8AFSY5wOlNmbbtA56U\nNu69DmgROpyDkUgXPBPFRqx6bs05WXk9xQBLtDdOtKuEXHvTFk45GOKUsOM9KBDtyg9c0udx\nx0qMruJ2nAqThcDvUgOVtvAGRTxhs9hTB8q8nrTgw+lDAFkK5ApfQ0xHGDnk0q/vOQCKAJPM\n4AxijnHvTAxPBqbcFX3oATc23k0qbdvrSKp7ninKu1fUdqZQq5GPWpM7W+amqpDAkUL8zHND\nGDY3E5pdp2k54ppwAD3pdwHXpSAcJCnCjIpd3Wm7srnFNjZt3zD5cUASQ53nPSnqx5zTUwMg\n8mnCQ7fegBwboO1Ln86YxO3kUv3lB6UCH8t3xTgT9TTNu1ck80b+9IZJu/OlztOcZpic804f\neyTQA4t8uKDgHgZozntTPmViaAsP3DZ15pBhQCRmkboCOtSKeB3pgL5nSl46kUxW+bpT6QCn\n5+/FKue350g/Snb/AJsDpQMTb8ueppB05p2NvQ5pGTcp7GnYBiM+7pUyNhqamV75p6nLHI/K\ngAwTk5xS7QoBpPu9elJ5gPuKQE3mf7PFN3fP7UmSx4pzMpHSmAvvijI5HWj72MDNIzbeelAw\n/Cik8z3op6kET5deeKI128mncFj6UqKcHPTtWJqGwLnng01ox36VJwFOTTGl/h6imIQbfSnn\nO0+lR7hyR1pd52jg0gsAY7cY4oXJHPSg5I9KQ5C+1Fx20uOLHov0qMqc9/wp7H5RtoVjQSM+\n7QF79TStIF78+lIvytn1pjHLSbf9odeaBjPPFIqquWJzmmIViOec0kn3RgYoYDIIpQvPJ4pA\nHCngUxlC8r0PWnLlW9qXAIwDxSARcrgHnvSrwpoUjgY5pWIXrVAJn5TTGZsDAzTzjbkdKYzm\nMDJ69KAGxFlVsjOaX5tuQMH3p0bfnTm4x3oAXhuM0wMrMVz0605V9R05pqqFZiRjdQA/ll2g\n0i46YzRjHemhtrdOKQD29KHj2r0zTGkP0NNmuDHgAZJoAGyzZAwBTTkHPan8tgZ4pWHy9OaA\nGLiTntTx97ikBA4HSl/nSATJYkAc0mws24dPSnK20nj8qWNttMBshC8Ubj17UEZwxoXOznim\nAu8KcikDDBJ60Fdy5HFMweKQDkz1J4p2evHFG7gDGaTce1MWoq7WTpimN16Um7HWj5ug6HvQ\nMV8hQzHApNw4yOaHPyhW5FJjc3+zQLqLuO4gGhSN2G4NN45zjNNYnPNAxW+6T2pijbkGncuC\nO1JuG7ZjmgBeOuaibA6c1KenyjimL0YVICq3y8mkVvmznpSdMDvTsnvTJJo18zgjIPJB/lXy\nR8aNPS18T3wK7AZCdvpX1rwcE8c8V83ftGae48SSS7AkUiL857tjp+lAmfN19M0bSKg4yRmu\navIUaQkt2rptSj8nduGOcfjXHanvS4bP3e1apikkULmOMZbOMVmXkgQEIMEjJq9cRsoJJz7V\nnXEDKwOfritDIzGkVhuYc/wk1BcMXZWb7i8k96uXEA2ksOc1Rk8u4j29fWquIjvrRoVU4G1x\nuyDVJ4XxkvtA5571oXVw32dI85A496zJV83LFiCOMZq0SyrtLIwznqfpVeRNse1fxq08TRPk\nfdI5qE/MQducdQfSn1JIWt1kUYfJHOKrSMFYFxxmrkvALKNvf61XZlk52c980+oEfmbmOORU\nW1lkJPCmpJW+UgHDVFGzSNGCcrmi4hrRlmwx96rfOzMFwrn8qu3CkyMFHTmq/wArc5zkdKAK\n+dqjeMMDyKSRmUcj5SRins0iuuVyo55NRLJJuPmEEZyPamA+VQqxuv0OajYIykDO7qaklyyA\nj7uMVAxA6MB6GgBY9q44Yn0pkvyjlu/pzTzMUby8nLAc01pE8w5IZ8YI9DQgGE+ZGxUlQDjB\npYl8uPL/ACnvSFo9oypLGmTMY85bDd6BWGxusm6NU2gHp61HNGdoGOnNO4SMMgOPem7u20nv\n1oAjVSMsy5B7ClZTJgY4zx6CmlXLbQeW9KUt5bAM2BjBx600MJGOwtHjZ91sU3aIlGWye1C5\n2tEGAyM+xpkfCjcMY65pgKpZZBu4A5zTm3bsk72bjdThGGIIbcG6imFj0P3VP6UMBGCrgn5i\nBz9aZtEcRw3zE5IqR2VdpUjB9aZIRuBLYGeKYiIITEQTjmllLqRlstinNgsxJzxxTXyNr98Y\nGaBjd21ST1al8vaowvXrSsSmwuM9qPvMxzgDigGG5rVQYztz1x2o+0eUChywJzupsYyxOcN0\nANI393GSOpoKH7jnKnBNNY+ZGVPXOaB80hHRSKQBdyqMhs0iWOktyMMDkYqPyh5eCAee1TNI\nvklcn73UVHuCk56daeoEZ37h0PNAbMhJI9PpT/lDMN2CRkVHu3KGU4cdcimITaV4XoOhpSok\nypfoM5pshdfmYZU85pFy3O3dGe9ADuZDjHOOppu0cDv2NSSZeNSF+ZTjj0psiszBjgAGkA1F\nkjYsGB9aFVlYZbHc0FT8y7sn72abwFyMtk81QCsqr82N/qPWmtISpITFO27cgGmLzuB4qQFk\nwyhsY7Upk2sNoyg/SiT5cfLgelMKk5WM475NMpEiyeWpYD5f1qORlBBB7ZIpWkVUB3+2KOAo\n43FqYDwrgM6nHHQ0hBSHLISepNIXVVVnySDjA7UNIVjwTleppCGSSNsBJH+6oo3IXU8qvr70\npDL5bjA3etKQV3p8rbuaBjXbzGDKM9s0o+VXD8Ghl2orAfgKjYFeWOQfWmJgcLHjdkGlaQPj\nauSeGpm758AgrjNSbl4IOQe3fNK4hix4GV+UenrSqeASMnPPtSr8qkDjnrSxhhksRjHSmMbL\n8vzKN3sKFjyFGcjqTSqw2lgPlbjFBYqmB9496QhMENxyKWQeY3J2jHJpCxX5R09felVXkBAA\n9SaAGsA8ed24L0Aobjv/AMBp+Aqgjj2NMZVGTnLGgYxI8njrmnH922P4valaMbcqeOhNIcyx\n/MM7elGogBEfGCxJ60rSGMEEAsxxThIeOfzpr/d685+9THcQrjluT2FNOIdo698VKWffuGCM\nYLVDN8pVlXdzjNIBerMeSuc0khDLub5FpeVPPCA84pnyzLt2nG7imMdgSbNuBzg05dzEjdwp\nxTZCzKAcYB7Uvlhm7gY60tRCowVjxt75Peo03KrEksWPTPSnbTLt3HhTx9aay+Wx4yW9KVgS\nHBGjTazcnoKUAKuW59BTVwHZm5wOhpWY8BEwcZ5pjYLyXLDbno1KmMgZIPSlWQtGQQBzwtIH\naR27DFMLDYF+WQOTijhlXnIHapI8urKpABHzZprZjUKBigRAWAkU435bH+FPcZkOTjB5alEa\nlc7cEHNJ1xtGTnPAoEKFKygoxKHqMUzzhHuVAQxODn0p3mE5P3WpDliRjtyxpD3JGCrGoILM\nTxTPlbJJ5FIz7o1yOF/WnM54wAE/XNMQ3IlUMCWYdRTVXAIMmO4/wp23lgQFBNAKPubrt6k0\nAKFf5SG3dytRSqVX5x1bIUdqezNNwp2evvRjcRvG4L05oAFZnZ9wXcfSkyFj2qcc8A+tOVhu\nIA2yDkUkcituzye9LUBegLMQTjBA9aHbb5Z4x3pXYKynGeMmmxt5m7AwPSgA3BmYE4/rSGTy\n414wGOMmnq3zbTjpxTWVv41wlMpA27cRxhTTGzuLDqaFkMj7SCAR0py52nJ4HYUEvcaHCkFu\ng7jpTj8y7hgqaSPkFTgJ+tI24LsGOvapEO3M2FxtPqaVmKJ12gcD3NNO1vlydwpcLtULzjmm\nMZ8yuE6k/Mac2WXI49vahciRiR97nNBZs5QbhimApwo3EZOOOetAXdjJyD/EfWmqrNbFmHfA\n9qQBmfP3lHGPegB3KqzMMgHBxR/AAv3m4xSDcpI/Fh7Upxu3evT2pAIVO3DDac9acyjAYYJ7\nEUO275urdKCu7GOM9aYw+fdz09cUwyBVOTz6ihZCCwbOBwMU+OMgjaVduvSkBGikcj94vr3p\n+3neTk9AB0p/+qyRxk1GjARsASwJ/KmLUbsO1jnce4pI2YuAOF/2qcVEPzIM5GNtIuV688fl\nQF31GqS0jZkAUDAxT1hVYVyxLA7s0j7ZVG1MH1pc5x3Ucc0DFkO0AjJVjlqQYeM7RjnvT1aT\nO1gCBQ4/ebVGBjOaAI2YKNwXJxipPM8v5SueOvpRu3MFZdvfK0iNtZiu1+2aBXBSdvqO1I53\n4AcZ70u1vMI28gZzmlRY2chV4zzQA0fKwDsHfqPSlkVdxJJGfvAdKRdu5gBx70jSE5G3B7UA\nKjPGm1RhQeFpxY84UMfWmbj94fKf7tI3yNtB2jHNIYrcKVI/GkXDqWVtirx9aayBVPzbi3PN\nK2fkAAANMmw7zNzAldi/3T3pAg/eg5BxlW/pSKPlGVJP6Um1mZg3yqByTQMf95dxOSMDNJ8s\nfCHcepamqw2EISQxpVUKjc8+lAB5pODgZz3pRJtXAG0s3XFKqlCPMIVSM0rL8gwevY0B1JJF\nKsT1z/FUarn5QenOaI8xoeTtPHNKrbX8ts+vFIBMjcwJwT0I6UvC43A49R3oTHmYzznjApVL\nbjjBUHB+tK4CMX3fKfl64p2doyGyfT0pq/u423NncenoaT55dq8KF61Qxd2CvzZz1FJuGWXf\ns56UeWqbXz8uck0FkkmaZUJXH50C1HZDN85BXoKQxkrzgelNEiM5wpEePloYFtpBJVaADaUj\nO0ndnJ96NzKwOSinrzxSt8xzz+FK+DH8g+f1PegAcrtOBk+ooKgKAXJ7nHenJlskEA96j3mN\nc7DnPFAEjANGQ2RgZH+FRso+QofmI6+lSJI8mFOGPUVGSfmO0gCkMYuI18sOeuT6VYjKkFv4\nelMjbzF7ZUZxigqVX7v3udtGohWj7End149KXB5baduM80katJkjk5oiLSyFGP7s8qc9BTAV\nF8xArcg8j2okkEY2kDHTA70b/LcoB7UhXdIAwBakJjW3Z2rlQf4vSnOBCm3O4tzuxTotq9uc\n9TTppNrKMgOT39KYxqtnpw2Pwpil1YBsMO5HanD5Uw4wvUAUKFVCFOAeaAE5+cFcqRwRSjzN\nibG2DuO9LtKkFGwDwc96VVMZOVz/ADpDQrO2F2H5RxSZXaoyQy9MUu1Vj3gkpnpTX3GPK8kc\ngCjUQ7hmLMSPwpjbZEKuCPXaaezbgjFseuaRt6sSxBz0pgOXaqoFGQO9Ey/NuIyOwppH7sN9\nw9NtNO8qMHJyOaQEjTBmUBcEDG6mjOSx+ULzRxCZM4k5wo9KVcFSCSe340C1CZTwyrlmGc+1\nMbHlYKE88+tTQ/um2v8ANx+VKWJC/LtQUARMojUKQcHn8KnkZeNvXoMik818gJjB/vCmLJiM\n7lw4OKYx7BeSy9uaSPO1vl3DGKWM7h83Ld1NEPys43fNjhaQCNHglVyNowRnqakPyxoAoEn8\nS+lNXKrw21y2S1O5jAk67uDQIaygnarbfX3qXykUguc46YpjKNwBP1bFK0gZMP0Xv60agKyk\n8RjBzmpNhaQc7hjp70z51VWyFHGF9qkBC/dGGznJo1GN+YMR5hGOrelKP3i7ozujHJal3ENn\n1602EhYmONoZuKYyWFXEh+XhhkNTHLLMG9BUiyhTuZsKOAcUMd6liQR29aBD4pN4DE/MtNwz\nY+fHOfwo8piu5B8vrSRssbFGG40AWPNjjL7Dh8fWiJWZRvO0tyDTNwtVB8vO449cVLJMV3Er\nntt9KQC8FgA+AvX0Jpy4hmVmPBHLfypYVDwDJ5PaozICx52442mkIlhUt5gX5cc9aVnz5Y3b\nDUcPl7RtO3nlal2+ZGznGc8A0AOEXUZwO1SSTBnQ4yF4/GoYd3Rucnuak2lZjxkDnHrQUSKr\nNtbaQq80/cfN3Lzu6e1OWQSKGzjH8NLDuy2QOeQKQhfMkUkLhR3NWEVmCM7ZXvTfIHl5Xqw+\n9RaxhYWXqfWpJLYjOMBtq54qwreW4A+UgZB9aqqTJhxwF4xUzIzgODgnqDR1Atx/veXGEbrU\n9vEqq56gD5ailhhbaylsAcrmpNqoYmiyATgigosJbskaM6ZLc4re8NWZvNSgAwrK4Y8ZFZLX\nErfJs3gDB9q7r4Xx7TK7RjKH759KQz2rwXoceueILO1KloYsSSKn90Yr6LEaQRrGi7FUYAry\nv4H6WhXUdUfaVkIhQY59Tx2H+FeoyNnp0HA/OoGPVtpzilZhIoPQ1B82MZFSZAXB5NIZJwOM\n07GUAz0qBju6cGhWbI/WgCfcep4p24Z5qJnDMB2pQd1Axx55o5AOOab354FJ0b2pCJNx67cC\nnGX5sAVFvIXApVbuAc0CH7hk4pVJzz0pu4cnFMMnagZO8ilvSlEneoeOD3oVjznpQMn8wqcA\n0/eeh5qt6c4NOjbjk0xE+77uRmpDM3bpVZ5NuPm61IjZ78UgJ/O3Yz1pQ3csefeqrSBQTnmj\nzuBjrQBduYIbyPypooplIwRIgbI9K888V/s6/DrxgzvqHhy1SV12NLbjyz9eDjNdz54GM5DU\n7zuRtPvTGfLHiz/gn34XvA82ia1c2DYIWCdRIoP1x0rx3xd+wp460MGTSXt9ctzxi3+Vvrg1\n+hIkIQbuVqTzTsC7yPoaYj8k/EXwO8Y+DGePVPDt9aR7seZ5ZdSfXIFcPeafLbuFeMpjq7fL\n9Rz3r9o5VWWLa6rKh6rIAwP4Vy3iD4S+CPF0ZGseGrC4JHBWJUP14q1ID8ffs6XC5aNhsGSc\nHFRDTX2nG4DvjkV+lPib9hn4e6opbTftmlzZyGEoZT3IOc15T4s/4J/6rbsW0LXIbpd5xDIM\nMq5PU5wTjHaq5hHxUsLkNk7T/dbqKaiGPp1/r6179r37IfxG8M3Vw8ugyahbpyklqwYN7+30\nrzPVPAGs6HJvv9Ju7XcTjzImA46jpRcRx+0zKWzulAxx0qvJGRtVuCenpW4unpBuRxtO/pjt\nVOaxaOZiqkgjhSMD60+YClsVpMEHcBz70+FUTEjRblz3q1NA7KuQU6LuHOabbJ+9xJnYo6Y7\n0cwE6X26NlljCkHlgOTVaa5bzGjVcrjIz3olk3J05zgH1qHzJGkJkjO7GAoqlJbiEVvMYY4Y\nevakWQyA7U5HY/zp0NvKylnUqvTFMZW8wlW2gcCjcQ+K48xBuXdzjb0P1qc+VI3Kbgnf3qtH\nGN2/O5CcY6c0+YDzOhUfxUwHRyKUbdxk556U9WXcrSlijcL35qDcuDgbj2Wl3NNkMuzA4HbN\nAx7LwyHLjv61CIlRSyNtU8fjUomIUADEv96k2YZR0Y/rSAqvCuNsmd/Ugd6cdijc4wTgFR6d\nqtMTD+8VVLLx71D5hDK5h8okn5etLqBEse2RjjGONvvR5LsrZUFe4qx5bLC3y5LDPuPenKpD\nfM2cDH1p9QKdvb4kAz2/OlW2Lly/Ckbdv9andg2HI2pnA+tLI/nNtJ2MvXHSmMb9mktYdpwE\nC8GoNpMe4nCfTr7VZlkXlWc5xwtRx73UKo2nNAIjhjZZCXQgN90Z4pWtDHJuJDEck9Pwq55Z\naPDHIHJX0x3qK4jEixSZzzyakLEEUf7xjnBHzVO0YmuHCnhx36Um3fvONyg/mKdGjyMVUbEB\nzz2FUOxF5LSBhjdtG3NSRqi8KdrdSafhRv8ALz6/WpGZVVHwMkfjSJIk8s7mVdw/kaR9jTKY\nVI6ZPY1MFWSQKv3mPIHSppbfazRou1RzupFdCr9nbftT7p5x2zTFIJXDbjj73v6VY8xoG+f5\nWYcYoaxkbaZDtHB3DuKBIrtC0aySYyGONpoZW3IUy2RgjtV6G1LM5IO8nIB6U9Ld2TZtKknP\n/wCr2qtiiG3h28EAuKljWNQoKknGT7VoW2ku7BVTDnpnOOlaVl4fnmAjSFp5SNoWNSc8+1RK\nRSSZgRxhuF5bO0k96ka3lz8qhx0+leteFf2efGPi6GE2OgXqox2iWSIqv1ya9y8F/sE67qUK\nSa/qEGjRKRmEYkdvfjGKy5yeU+PbXTrmRRkbo89UHSur8M+ANT168MVrZT3RY/KsUZbP144F\nfob4N/Y88BeGoYjeQTavMp585gqM30HavZNB8M6P4bj22Gl2tmgGF2RDIHbnvUuRVrHwP4D/\nAGLPGXiyWNr+2j0a1Zdxe5+XA+nvX0h4J/Yl8H+Hfs76xPJrMqAbodwRCfw7fj619ALdHjrn\nPY9vSlWcbiSDn6mgso6D4R8P+FYY4tI0q2tFjGEZI1yPx61pmd2bA6dCB3qDzvU5NMaRjypI\nNIkWVRuGab93nrz3pVzjLc0jY8wkAgUuoyRZtrcA4NSRsueepqv5n7vHvTlypDHpTGWlJ28n\njNL5mFGBg1Ej54NSlc+1SMc2JAccVWghdZCynCnt2qwmNvHWmhhtIzjmgokjL7T9aesjbs9a\narFW3dB6Uobd0GKlgL8rLxxRk8DPFC7ehFEarJnPXtSAlX72M8U5pCrKP4fWmRkchjmlblR6\nZp9RMWOZfOwOal+91NRKqrgKMGpOG7896oRK0e0cc0nmZPPakQkH2p2FZv1NSUg65boKc3yb\nfem/w7QOeuKGbv14pDHkgMOe9O8zLHPSodw27e+c1JuDKuPXBoESqmFyBQfu/dpFY8jOfSkM\nhCnI60DJEw3TmlYCQ5PQcU2I/KCMUrdSe1IQqtuPHApS3zZpFwucelIylkC+tAEwxu5PFOaT\nc3yjio5I9gAJyaI9w9hQUiX+HIHNG0jvim7tqnvSrlhuxQJDs9Kevr2FMVc5b0pVZtpIHFLq\nIl3rIO1LHn1qFMLyRU6yA/w8UxiqzRtyc5p4YdSMVFu3NS+Z83zDmlqGpIwB4x70m1ZFxSbi\nzEngU7cNh7GmMQfKMUpbjpxSbgcE9aPvD0pMY4sY8HGQacCevem7ht5NCnp3NAdCR88HqKVW\nVlwM03YRyT06U7kKCeM0MQu3cct0peGGRwaRc96FcbjxzSGOILcDpSLlQFPWmlyoqQMMbiaA\nF57E4obIA9KA21cd6cpLdeKAE8sqvWljXb3o/iAzR/F7UeYMczbfel5HNJil3ENjFADl+daX\n73AGTUaybeSOacrndnpQBIuPoaGOKYp2tzyacWPHagBXwyjAwajjzDIcnhqfu3MAelOwC3Io\nKQvO7GciljxyAM0m4YwByKFX+IGmMkA/A0uBvweKjOQ2cfSl+Ycn7xoEPVtuR1o4bHFRtnd8\nv409chcHmmgHeX7UU35/SikBB5h8zYBxUi7tpGaZFCwJZutPDbRkDisrliHPQDikZvlB70GQ\n9AMUKoX73NArDSflBAxmnq5yBihWTOAfzpdvftQhCsd2cnFCdwSMVGwLcAU1eF96oBwA3Eg4\noXLZXOBRkYDAUrYY8dRUsRHtxT9wIzij72MjFIynaaaASVuRxwKN67AKdzIvPFN2jA7UwBfU\n9Kc4PToKSQOzAjlaJFLLjNIByj8TSFcNSnCcDrUat8xzQMe3PNR7WbkninvkLuHSmLIGXJGK\nYEi/dpsmGwcUeYFIBH+FDHdnsKQhNoTBxxUkjBRgU3uO4pW+UYxk0wE3Hbg0jxmSPJbFKzKM\nAc0knzY5/AUgFztUY59aHf5emKBtCHmmMp3Ag8UgHFt3ag46nBpE4zmm52uT2pgC4wTTyxVR\nnk1Fu5OOhp68rzTAcjKFyRSK3zHA4oGdvNHTGKnYAbsc45pj583/AGacw3rjPNInyr6igB43\nD7w49qJJOgoaU7TgbqjjDqfmGWo1Acz7e2B7UzdxinffyCMUiKAxzzTGIpbtwtLznqMUit2P\nSh2HpxQIfjPek+63tUcbA8ZqTbtHr9aYCFvmPpTfmx7U5hke9RFjk5OBS6isSMvmYqNx82M1\nIpBXrTZPlYdKYwUbcDNIzdTjkd6azZYCjacHDUAgZiykLnFMjU7feiNW6Zp7ZU4A5oAH4YE9\nqbGwaRsjINIWMmccCnKpGcEdKQrFiNTjgcV4V+0hYj7RaXXJDKRg+2a9zic7AOleS/tC2v2j\nQ7J2PKlhuHuKEJnyJrtsdxbOR1xXG61b/MD26133iOHy2YA4GK4rWIV8nfv3Z4rREs5yQK3J\n5rMuGXcVTqOTWlcQBO+38aztxjY5AYetamJmajmNQQ+N3as3G1iN35VpS7ZGPQkdM1Su/Kt8\nEHLN0HoaYrlFIJGJ3dM01reKNSW5YntViaY+X8jfMDyKqXDOzA7QT7VRJDIobKxk59zUUis3\n+sPyDqBUvkkMXDYHpUXmbid3TvVgV5FVlJX5BnC81VZiGG5eOlTMmWGV+UHNMlxv4JbPShiI\nZFMb7kjDHHUmmbBuRuhzk/4VPIAvAPHeq6qr7gHwevPamIRnKs5RuWPPt7VCsZYHHIXmpZl2\nRqN+W9h1pvKrlfSgoYyHywwVSe/NQyQjgMcd6VpgqqB827nHelcIy78NG/oe9MRBllfnhe1R\nSW+JFcrwfep2Bkw23K96ZIdpOxcmgQ0kM2Dncp44qGRvn+4ASefWnn96B94t34wKa7OFBQeY\ng/iPUUDQ3hVzgnnkGmsx6YBVu7U75/vDB3U3bv4Iyme1AC52qUI3Af3TUOcAehOBipdu0N5b\nbee/aol77jsPr/KnYQnlkyPtbDdM5qtI21FLDa1WuJCCq45x16mq8kbrchiNwUYxQJilSoyV\nKs3RqNpYfMMnHFLIxkfJOTj7tOVCoDtyvagY3liAo2tx0pXU4IPHpinZHJUdeBUTSvEMMMY7\n0xDGRTjJ+b0prAFUGcAGpGYj58bgaao8whc43dM0wFX942E2k5wKPLJbnB280jZhBG0hlqOS\nb5lbb8hHPPShgK25nCt6ZFEaqVbe3NR7wsgwxJPShs7TgZbvSAdx8znHTimK21QCGZqVVxjv\n320jSbnLk4HTaOtAyRcJHnBJ96jwR8/QmkZmSPaTwTmmll6c80xMk3jjJ2j371GZCVKscDNO\nZvlXjeKZtXazOduRVDEUouSTn0pGZpF2ghMnmj5NqNtOOmKcFO7gAKPWpYDm/fY2HgDHWmtv\nVPLYY54PaglVUjBx1JWl3FVUnJU+tMkCrR54I96bHxIdwLLSljuIL5pNjBA24delAwYlVOeK\nIZDtxsz6GkYfKR1J5waWMsWDAewFADV+aTDHJzTXkCbsIWBOOO1C7lySTwaepKtkAKT1z0NI\nRGkhkX7pU9CfanD5lKg4A4BpPmBZWOATxR5o2jghQcY71RYCPIA25560u7blGGRg96Rs+WVx\n3z17VH9yRs/NxkUCHo5XBVOoxim9Fz/Dnmkk/hbcy8fdpQMKCTtx2FSArZbtgjjHoKSLHI3f\nnT1k2rgDg9ajZhuxt+lA0LvPDbtq5xilZgxBb7mcDFM2ncO9LwY2Dghu1UITDDcSgU5xxSMr\nNggdam2HHqMcmo1z5jrkHI49qQAe2fujg/Wk8sFvmOB29/apN+3II3KvBqGMeaHYngHK5oHY\ndGAM/KQM8Z7UDb5gGcjOaVScHJHPrTEIXdkbVHWmSODD5tvK9aRAIwec7v0pdhYMig47EU3J\n5x82OKQBJuKjAIwMZNKgIkQ43N0zTwzO4Ibb9aRt+7eOWzigY0qEYnPzZximsAucnmn8yZIJ\n3jmmKpWPd1JPen0GKoMYOWBBHpUe4sqnORnFSt8uEZc45z61BtMkmQuwZztpXAn3AK2DjNNZ\ng0cYUY5yWpdpZSPvZprLgdcDpto6CEZyrMMcHtTkLNgkYApW2MAT94HNKufmwODzSQhkeNjs\nefQUI3Uj0/KnKQykdKZgp3xx+tACNgIzZJOadI29cp6ZpFVo1VSN3GMUn+ryxG1Txj3oKF4+\nXnNKxYMWCHjnaabt24UfMOpp0bEyAFs9vpQIbtEkgdR2xiiRtrDA6enekH+sK42kdxTtobkA\ninqA1yJDx8h746UpYyEP6cUnljysNyxPSnnJAUcDNMRG2dp5JPYUu4rt2HAH3gfWhVX33Kea\nSds7toLL3oGOkVdvP1NGE244JJyoNRBdoBzknoO2KdHHv2k+uAKAHDd5hUKMd6arBQwVC56c\n0qs7O4+6B0Heo+MZTKtnBJ7UASYXIBXtz9aFCcr0YdKftKyKfvLjr60xctuPl496TAarHfub\nlulJHJGvy9XzxTip8tWX7wPNN3AtwvzdcUxCs3zZK8+tB2t91ee9N4IPPXmnIQFwfvikMYJF\nVmIBIx1PanbgseU5LUfLg56VGVVV5P8Au4oESDa20hs0rMJFKk5OaT5Nqpnbk9cdDSybRNjt\njHTqaOowaMlhl8EdKY2NuWJzntTlRmCgmlkiDqcHpQAyT5jtJHrxSFgjAE89R709B5i7scgY\nzSFEaZS53cdKQiNZGVXcLlmODT/MRBjbtOBxR5bZIA2r2FIFAYsTn3qgJFYOhCnNMDHa2OPa\nkVQqnJ2nqKTO0b+meCKChx5QfMdp7e9KsZWMhWGO4pjJlB2H1qQMu4Y6KOfrQBHIzgcLwOSa\nRmPl5Vc5p0jMwAXkHmjlc7Rwe1AherLtGFxzQ0ZXnNIGd0xt2nNBXzDuJ4HH40gYKHX7x2jq\nacNwyRwD1FDx/Ny2VPFI2Y8Nu3AcUwE+5/FtUetL+88ssuCvt0pX/dHICux6LTXV1yWbaf7v\nagBVVv8AWKdrelDSfeJ+9jG2mY2hW9DS+YHZsIeeeaBCLuQZbp2WmtEWbHI9qWTdwM47il5w\nT36DmgByyBWBYcqMGnrIsjbkPQccVA0gaN0I28c57mnKzNMp+5t46dqQDt4+8QfN6YpyruIH\nQ4J29M/jQyhFMmcIexqNHMibdwKg5pg9CRpCr5XuuCaaoCglSSxpXb5wQcAjnPp6U5mZmGBg\n7flFIBszYwenakbqMZ96jXLK2TyOSPSpeX2sDv3DHpTDUTAgwzBnye/WnOVkOB34pHY7dy5O\n04ximMx8rJBHOcd6AFlz2+4OPemNH5ax7vvdRmnjM8nygKvXmmN8wJLZINLUQ9Fk24U5IpJJ\nBIrfxMnJFLIHkAYNjHUUx8ZIU5LdRTGOQjzQOBn8hSMuCVzn5ufepWEflbumOqio5FPBQ/L3\npCY1l4ZW79KVd0ZRXO9iMcU5k2sHPzHHTtSfdwxXJ7UxoerbQxKkhe1PYBk3AjOOOarLIVlI\n5JPUdqlhYeS5UYOcc9qAEBEXIPJ6+1KFHmBc991G545FwA31okG5t/Q0gFYeYTt/d85O6kRR\nuw54PGQOKV/m4Zc8dRTe6xknb1FMBzfdC4CjPrSuw4Iwn0pphJY78H270MN0ikLjHB9DSGSb\ndyr6dAKZIQJNo+XHUUL8rYZsL7U0fNHJxkjoaAFZtql921c7T9aXYGYbvlHUKD1oTCICV3KF\nyVo8tWjVuo68UydQkj2yYUnaT096FYbm3HcegFCscFSep49qa2fO6bFHO/1oGJDIFYbfl9d1\nSNHI0bYOG6+X7UIfvkj5yOFpkYMeGPUfwmgBzKJMbsDI5AoCpuwoYbeQDTUC7iTwWPAo2HLM\n5+YHikBKi5YsSQ54bihcKW2rkdh6VGrBmDMT9M0sauG5Iyx6Y5piF8wyfOR+FJtZm3j5eMHN\nTKpZWJ4CmmybnyGIXvmgBuz5gB8+3ninJGrsXbr1G7+VJ/GmD5fH50qwh5uGJPXHagYNtXGA\nT7HtTViAIJb5fSp5I/LZQzhu5xURjMbthsK3XPakAIu1CCN2MkYpRu27g2SevtSMWhVUHzn+\n93xSsysSy/dx3/wpjBfuEMTjt6UNiNYxnk9SO1IrNGgzkg/w0DG3c67MGgQsgXnAyM8e9CdP\nulm65NDBtihh85G72xQQ/fpjINAwL7l5HPc0pILAopUUrBWBAPOOQKb5nlx8gkYxxQSIzANl\njjnOcVYMbNhkK8ckGqwO5wFXcCOc0qu+Mqfn6YpDHcuuSRnPWn7nXGRkHjFMZWVhkgdyKXZv\nXduIVedtAD1VpMhhjHNDrtUAHvSxuFiJBJ3DgUm3Ea5YBh270APZVmyckHpTZMbQMZx2pJGM\nLI4BZ6UsGk3gfNjkUAOjb94D14zTWZdwGc54HrTfM2nITHtinbPMkQkYHXIpgLNJIy5YgIvG\nPekWPb1OQRnFSlQ0wYAspHSmRsWzuTauehpC6km9I/vIXkx2p3mEx7c+9Iv+sVyNo7UihWUg\n9d2eaYxIlPmBvu57E06aPzMAv5Rz19KV8YIPXHAprEyZRVJI45pdSbsVYPmJdmZO3vUzHdnA\n7cCmKzsCmdjKKVZBw5G8MMGmFxVbeNqviTP3fWpNsnDEAgcMR61ExQkYXCk/r2p//LTZywxz\njuaAJFJjk+9lj/Ee1LsV5D5bbj/nNRtHtQEDnNSx/KxCqSGGKncLku3H8XH90VFtWQnI+Y8c\n9qduCrleccD60q/MhPG49aBipCY1APB9akWNWzlttRqRISS2cdqn2lSuU3g85FAxrbVVVxlg\netTxsC33vmpgJfk4UDo1Tfu1yXBznrQSxobgkdc8j1FTwYeMkBgPeiRwzDywOmM4pQHAUFs/\n7tAyzHkbQPuLyBUoz94qFDGoYo2WNgfwNTKvDKG5UcMaQCeWxlG0kc429h7mrYZh8mAT6iob\nZh5LBzjjJAq7FsWwVzyCfxqQEjUeW6gc+pqzCzLEGPPb6VAm6RgACO+KsTKvlrsYBs5oAsab\nI0kgxnG7nnqK9Y8E2qRWM7Lj5iMcde+P0ryzSYh9qV15Pp617L4Jtt8ljbEY86VAfxIpFI+n\nvhjYjTvBdogjMZdd3I+Y57mumD7s9qh06FbPT4Yl4CIFx+FP+7zjrUgiXctIMtyKjON2c8U9\nZO4pDF3ljyMUiycmlILMDmkZaBjkODTtx5wKjDYwKect0oEOPalzu780ikN1OG9KZ91qA6Em\n7bJTy2OnNR7T1zmlThufSpAeudhJNHAYZo8zCjIoIG3PrTGG7nOOKRsNyCaX0FKceWQeDSAa\n33gafjI9u9MVTu65p4bcMdCKAFOGXGOlIJMcA8UgwMiho8qCKY+gMvy96AuB1pVLBeeaM+2B\nQIcG4yeam3L9OKgj/Wlj/eKwJ+akUh7PldvOKI5AvB5NMH3sGl2DdnqaYFhX9OlI0h6c49qg\n8zy/l604sGGAcGpETRuwznilwSRzVYzHp1Pel3HAOeaYiwHKMRuP4mq91ZWWoqRd2VvdKf4Z\nIgwoVj061JG23jpVD6HAeIv2fPh14nYPfeGbdZTn95CAhz68V5h4i/YS8D6nGx0y+vNKnY/K\nxcOv0xX0b8/LZxT43yoH65pCPibxB/wT+1iFGbR9fh1DYciO4+T8PwrzjxF+x78QtGDsdGe9\njiG7fa/NuHpX6T+YQgOcnNJHKW/5aNwMdaLiPyU1/wCFPiPRXP27QbyxKnAYwMcHsOn+fxrm\nZvDd1HIVnhaFh8xDgg1+xlxFDdwtHLHHIvOdy5JzxWPrPgPwxr0YGo6BYXIzkBoF4p3Ez8gB\nbyrIY1II6ncCBio5bGXnbF5hYYAVfyr9SdY/Zh+GmsSMz+HEhZju3Qts5ri7/wDYl8Eahfk2\n1zfaenXyw6kfgatSCx+dD6TLCu0xsu5cEHsaq/ZJ2xEVGPu5PrX31rf7AthcTM1j4pkjjzlV\nnhDEfiDXJ6t+wHrqqzWWvWU/YfIVJ9DRzMLHxpLYPbsoKbcjg1EFkIw6ZiBxk96+ndQ/Yb+I\ntq4CraXYJwu2YA/rWBrf7JvxG0W1VLjw/JcRM2CbZg+D/hT5hngNxbu8b+WvllRn14py2fyJ\nIh3165P+zz45tlUSeF77DdCqFuPwFYt98J/FtlCVm0K8jIOSfIYY9ulHMM88eEQ5AQszd6hL\nPJGnyYVeortrjwDq8cYMum3auDniJv8ACq83hW+RowNOmjweWdCAf0p3Jsci7FAPmJLDaPpR\nPDJBkBx0znGfwrpZPC93Gx3RSdQS3ltjpyOnamweHZRN80Mki84DKR1/CjmHY5iFfMQEn94v\nVccfWlEZVjtVl98da6mPwoQpXypQPTac/wAqe3h91KtKjkLwGKkfpinzBY5SECZXUJhx0Y/y\nqaG3MbfvRnODx2rpf+EfkUBvJYgHIYIf8KB4dubhldIXZ89Npx9elPmQ7HONH5m7G5SfQdqa\nyqu1FBJHNdjJ4LvpHxBaSzKRltiHr+VT2vw81i4ZVh0i6c/9cW/wqOYmxxqWrycrhQRu+tI0\nMjkoEy5PDf4+1en2fwa8VXiqlv4cvpu29YTitvS/2afiBf3WI/DlwqseHkXaufxpqQ7HjMem\nyqSMrkckUyOyXLeYSn8q+lbH9jP4jyf6zSo7dm6iaQA/kK7HT/2CfFd/DGb6/wBPtR/FGSSQ\nKlyA+P5NN8sb42OScZ6Va/su4UqGYsn95MEfTrX3TpP7ANl8rX3iNY27pCnb8a7zQP2JfAGm\n7WvXur8g8jzMAmlzgfnDDoj3U4wV81BwCOp9Pauh0f4f6vrBVbXTLu9mkPy+TESM9x/9ev1B\n0f4C+AdBSNLfQLdvLHDTDe31PNdfZ6Tp+l4FrYWsAA2jZCo49OlHMOx+bfhz9k7x7reTb+Hr\niBRgMLjCjn09vevWPDf/AAT91q+WJ9d1e201GHMMHzOv419uC4MmATg+tNmc+XjfnJ9aXMM8\nF8J/sS+BtF8l9RkuNYdFwMtsHTHb/PFesaD8K/CPhZVGneH7FHUKA0kKseP610CyfLgE4zTz\nIdpCj3qRkqFETbEiRY6LGNo/IUbgTgk5781WVvn3UN8zc1PUZYBJVsnOOnenbskFjjPYVXXj\nmnq3XPNAFiOVN2MUPKWzngVEGXGVGD3pd28AUw1FWRcinFv3jA/hUW0KSoqVF+XqDSuMFDdW\n4p6sXzkdKbtZs80hyoGDxQMHbDY/lUzHhcn/AOtUTYTBPU0obdyelMCynDDuKVpPMYEcVX3H\nbTVZ2bAGKljLav8AK2Oe1Kcccc1ArGNQffmpvtA5+XmgLj2Xuxzil84bc4xUCttXnk0qsWxk\nYGaQE65yM9amjYLkA81X3AucdulOVickcUDLK7RyxyaTl8ZBAqFflwc/WpfM3fdPFIGOGC3B\nqToKh42hu9OSXcCuKYiT5lJXPbNKpbhh1prttUbefUmlbqCDSZRZ39TjDUwsNnTApMjOe+Ki\n3SbSMZFICUlWwelPXG3PbNRRsuRnqKlLqvQZBpjHbSuCeBTlJYgfw1GsnYninKvHpSBLUcuc\n56fSnqffFKq96Y6/MCKYMdxuPrT/AJg5I4A4FRbSzDIxk44qRid2M96Qh65aMHOWB70LIedw\n/KhRuDY9KcrHb0yaACPnknAqTcMfKaZyqgetKuO55oAdxtHU0AkcA8UoYFeeKFXd04pDF+8O\nlO5KDFNjyrHPAqTPGcUg1FbGeKRTuwW5FNDegpFYqCOlAyX7zbc4FO5RgDUI6e/Y1N1XJbNU\nMX7x4GacFzio/vLgZBzSruHAGcUrAKWxwRxSLJubApB15p0YGCTwaQEqyBuO1K2XbHaolJXj\nsacrFTyuKQEy9N2floZcneKZGfkIJ47UrZ2gZqgHIc5opuCi+9CqfrSAk29yadHICCCOaYDk\n8dPehc7jxipAVVPJLc1I2OADTeFU9zRwvvTAfjBGTxSsx3e1N280cq3PIoH0Hlc4oHyrhqZu\n+XHvT/4eevagQoyzf0pWBDZP5U0NtwKcDluelBRJ90cjmmhyzcjFJvPGeacv948in0JFG45N\nC/KuP50isWJI/KnZBIzxTKFZipAPNKsnVm57U07QxPOKVMLn0PNJiHMxXGB1peMZIyfameZj\njFOXuw/GkMXeKKN/+yKKAGKxfJzRyowRkUhBRsD8aXJ4rK5Y1lYnNKrbcmhmwRu/SkYbMtnI\nqieoSAMQcc0MCq8c0seNtIWZWz2osPQa+4NkHApVzt6UhzwD96nAHv0pAJ09qQ/eyCKXIbIz\nTEQDJJ+tAhzfdHOeadglabIPl2ge9EYKr1qgFyAmCeaRVLc0jEZyeacuSBnikIQkrzTtucc0\n1yWO0dKcRxx0oAFwsnr2oZQr4pvIzSyYBUjk4oASTcuMDIpApKn07Uqu3SkkJVMHgmi4yP8A\njCE5qT7qkYyKgggaPO5tzVLywOT0qRDlX5van7htBPWkV+mR0qORhnvVDJG25GKjZ0R89fam\nrnncKa8e7nNAD2ZWbIWnBvmqP0pe4oEOLDfTcZYljxSyMq9KQLuXNAC/Lt460GTb2o6jpULS\nFSTn8KYFjdnHrSnvg1EWHBHQ+tLG3ynA70ASfdXJpvHbgUhbd8ppO5FIB5YDgUgzxikcHjYP\nrSc8nNADSTuODkUbtvFKIwBnOM0w4XB3Zx2pAPLBRTGYEe1OkwyhulRtyaAH5AwcUgkYsRjF\nNjfPIH50rNngGgB+7g45phZXXHX1pOVI9Kcq4Yg45o6hcRl444oHzY3dutLtIzmq9xcCN0A+\nbNUBPxyelNK7R605m24OOKa2VLHHFSAKu7nODSMe55NIqndkn8KVm29qLgRyMTwBj1xT7dQQ\nTnmlCgNk96T+LIHFAE6/u2UHmuA+NNit14OZicCKQMW/TH613n3lHPNc/wCPLFL3wjqKMPM2\npu2duO9O9iZHxJ4ngzI4X5gpOTXFalBGYWJbvnb2r0HxAixtIOwJz+dee3auzNtIKnnHtTVy\nWc3qCp5eWP0NYt0PIjDfeBrX1pY48bQQevWsm6yydM+9dKM2Z07RpIdh3DGTVK4VXj8xh3wD\n3q7NEjYVOves68JyoPyqp4pk3KT+YY8hdreneq20rHuYNuzgnNXrnMbJtOAR+RqkTu3ENuHQ\nriqJEkQxwMucgnJqqzeTHuC/KT0qwGLIUzx6mq027cMNwOxoEQ3DmYrjKg9qgkyuSpAwauSZ\nWMZb5j0Aqs8S7dpGd3WqERNnuMn+7Veb5JApfBYVPuCzbADtxgsahkCiTDjjtQBEkzSEYXhT\nj5hTt26Q44HUA00YTc4DD1zTYbhJJOTjPQ0DB1jblPkbHQ0wxjbvLZbH8Rp7BmYnqPXPNNxn\n5ux/vdqaEVt00GFIY56HHFEysT3yepqeaRvMQ7ht6c1WuMhWVWyc53VQhm1MFSSPb1oYbWDI\n+3I70+Rgy7yhBXgD1pkeZMlgDjnHenYYySMuysH5PUUsfltIU+76URs2S236UwOFmOV2t1qR\nCSI3Q/Kc9PWo3jOM9efyp83+kLvzhhz+FR/dYtnK4qgEaMx/vM5FEXyyHJ+/2o8zdnJyeuB0\noeRDIuAdzDA9M0wEkCpkHAwcU1sso+bJp7KuQH7dT701cL86gMe1T1Cw2NhwhUqc5+tNb5s5\nHOeSaWclgWYn6dhUMa5GUaqAlJbbgEHNMKtwwIODytBxHhj98nGKVlXazdD3FAiJZCZDuDeg\nFOZdqkHk44oWQH5Y+vXmm3HDKA3ynrikMb5a/Lkml2kTYU8AU6RljkGF5xwaYsmThjtJP40w\nsAyqnp15NQnPPHy55NSyAk7QMDPT1pjZjcgnaKQCxyMoEQ+dWoYmNtm3FJGR1ZufanKQVcsM\nnGAaBDJJgq47HjilaUeWQUDnpTVdI4zuUHIwKRflYEH2NMYu4sQ2OPQ0JlmIYdaPMUK29icd\nlpFcSKvY0wFjwuc/KccZ6URodqlgWU5B+tKYWYbeDzmmxu6tJuOB2FAWEki3DrtINN8sN0PP\nXmlP7xVJPzZpp/1hJGe20UhEjdyOWBxTVbEgGcij+8B93OdwofttXk+lAxc+ZI3GVNI0fOCc\ngd6MkQjI2v1IHpSJlmZd3B5x6U0IJWIVT1IOaRt1w5IKqKXaJOAT7UEeZyCFxwfegYuFDqeh\n6Uxm8lzgZY/xVIDuT5eq9qZu3LnII9aAvcZI5yD1Jp8bHlim444qONdqtuGRn73pTo2Hlk57\n0xCyK0ahuMMaZIPu7RkEZNWVYNhQcr1prx7cgNkYpDKu4dVODUruVVSDkHg5pmASgbAz3pNu\n1WVj34pASYPzAMdvSmswVcBegxuFKM4yHzj+GmhiFKk8n+GmArfvIzsPzHrTcHy8bfm9aAzN\nIAAf8KcuRIxPIAoAaF/eZyGOMbRQrfIwI+b1oX93GWR/no24G52HvTARiEjUFiJCeopw38bh\nwRkbaQ7TjBDke1HzBSvrx7ikDQiruXGO+d1K25VHUgnqKI1xyFJIGMCkjLKx+X6g0xDlbr2J\npq/MCrdfanPhVwBjjrTVm+5xnPBIFJjHbiAB8oI9abuO5nHzccU1gXYnYcDgMacFPbGPahAN\nCsOQMnrStskKMg3Nnn2pVkxGC2etNXadyo2Ae1AhGwGY+/WhWJB2v07EU5f3Y2Ngj0pobA5I\nG44oGLJskYAcbTzTGjIBPUqcilyGck/KM4xQqsrcnikAjOHGVJHc5pV2SfK/zentRuO7ao49\naQ425x1PJFMdh3Kg4O7FNjO6YlvlXGT9aGlVtmDj1PrSsyqzHkqenrQITzPmy2dp9Kcu5Cfm\nHrSdMDHuRQ2d67fxpaiEOXbcR70bRtYdOMigKUbGTycmhZPvdBt6ZpgH3owRgZ4JpY2LRFQ3\nCnH1pJAdxBIKLThJx8i4P92mAnl7hkfKPSmRgtuIKg896dGxLYJ+YcnP8qYFDKwJ2selBQo+\nUAqMt3b+tJuZQzN0HQ+9NSTdhQCpHGf6VK2Dkg5B420AMjLK2S5I/SnBywIyQfShm+V8/KFp\nN33SF+90NAgVflyGwM0Roo3Ek9+DQqjJU5GOc0fvmjBHzbu9ADcBsBCGbGS1AU5y3U96Cn8I\nQxnu3rSyKqrt5K/3velcBFYFdg6/pTmj+62Aw6YzSRthSB6c01cpkKQSaAHK3zM6LkDjn1pC\nrMoZ2y1NbHBwUGexpzrhmxzxTAaVLPw2FPUU/czKEBAK+lNYqu1cfMOTS42tgZPuKBXFCuFb\nGM+hpchgOCGHFR7mUMHGMihTshQL3OKAF8tt3LYGeppFJijYFcgmibH+rb8+1MXAb5sgnjFA\nx27zGUNyPWnHC5GNxpq/JGUJ5Bzz6UjMFkLEZGOMUAxeoBAyR/DR5wj6JgtxSeZtYDuRkilX\n5mUD5h29qQx4+Zc5wRxTNreWDnnOPeg5VmXbz60xkIx8/XtQIlb5Tlm9qajbVZWGdozSMR5I\nAGSDnmlIGD+f/wBal1AYrERrleWOeakIJkK9TjgUbd0a7jx1x6e1Jn94Rt696oQh+RQW+8Dj\nmpGYNu3nIHSmBQv387RTQu3JHzLQBJGyLznJxxxUcib/AJt2P92iNfMjHyYbdTtgjdtr59RS\nGM8vcvLYHrQFBbIBx2+tOX942duE/nUYVk4TnnJyaAF2NOTuPy1IGdWbpyO/WmnPJTp15oZZ\nMIxXO48nPSjUQvmGMLkZI9aTyw5JUck54okn2yBdu4E8n0pisVUtG2QT93FAx7ER4ONwz8w9\nKJJGzkMetKOYxk4xzikGZGDE5U9KYCqrMNueW67aViGYAfw9F6UyPdKCeUwSN2aI/mU5GVBz\n9aBD1cM4cnilkkzuyfemRSEyN8vykdDTdrjL4DfWgBdpXofmIzSIpjYgjKMOfY0rMsq5HBpV\n+bIX+IetIkNu1hsbPrRjGcDcTTkJ3FQMYHPFPSRdpAXjvTKGNiE5IwCOnvTQreVtOcn+Edqc\nG2uG+8BwM0fMpIU45yTSsBHM6lhg5CjFMdiNoyMGn4+Zl29smkZN/IGV27qYxHf5dvVc9R1p\n+Pm29BjOKdHErIMnCHn3pEUfN12jjcOaBDmCN5Z2kEHrT2kjyGKjkVHuUyKWOFHA9CaF+fOf\n4TgUALG/HH3c8mmrIWjZj8/zcHpTvJ2xhTyM7uKaq+Y28/lQBIuG3MfvYxTWk3RhQOOmKauG\nHLbW9KAB5e9XzhsYPpQA9ZAMDbj2pjbdxByFPpT9wZmJOOOPekR1Ee9hhcfrQAyIhF/vc4xT\n0YurFzg9KVmyQzEJxwoFJ3DfjSAPMEhwMg4xkUvms8agKPl4qNBtDE8MTnin+YeCBzjB4piH\nMNuN2QevTpTfL+Vt7bgTxinyMY4yN29z0zURbcyuGAI4I96QCxkyLt4GDn5vQVJJhsPt4Y4q\nGJhI5yu1upzUuTwueeoBpjGSZVDtXLfyqWM7EBJy+KiXdI4UA7e7VIrNu4OT0FIRKsnlqOSQ\nTzmmyHzcnoKbhw/zDIpnl7pWUHtkCgZLJjcpdccYzSpIVkweCeCPamj+HeTlfQZpGwNzr9zP\nT/GmA6bcJflwE6Uiuqq7MhbjAz1pMBYwoO7PP/6qVWZGDbCQP71ADdyqqEKQ2OrU5Y9rq8h+\ncmnyMZF3AZU0yQqMNJzgcEUAL91my/ToKYswYgscnoBSs21Rx82P51FtXcq5+ZTml1GSNuK7\nm/1ac7aXnYGI5bnn0pZmEnAHI6ilLOxUdu3tTERssS4Ksd7cbak3GORScjsV/rQXxJu2Dimm\nQ7TvbaCfvf0oJHMArEfc560BfM3BeeKFQNISSc45B6YpUYR8dP5UDQg3vCAo+6cc07crSbGP\nOP4aU5UHaM/ypsajqeD3pDBflYhRg+tEqvGykjzCf4hSwH53OMM3CmlG9VxkOVPNMBIpGG5S\nN6559akUAcZwSfypNrLM20cEbs0sZDLl0w3Xd3pAPaRt2CvHSmg7mJVPu8DnnNCqXyS5Y0px\n5gyMMOgBoAVd00YYfKQcYpRuk3qhBb+7SKJFBdDlz/B/WhVZpFJUjHUjvTAT/lmgzk92p0mG\nVwfz9aI8KxLDLdqTyxzubnvQA9jHtCJ8x243dhRHlhkHkd6QLsRR+QNOXByQOOn0oGPaM7+T\n8xH3qdFGFOBzk59qZs2sCSWGKdHtb5VY4z16UEExwY2wRjNNXcq5zhW4zTI22yFCuRmn+YGj\nZiv7sHgDrUsCW3J8tQw5xTkUjd1BA4IqKNiVQg8/zp4BBY7jj+7RYdh6MGjBI+bvR8iqTvwT\nTN6MpPfstSeYvBKjIHK4p7C3JIyI4x/FxzSrMycxj5cYK0CQMwBHOOKIyDNk5CjjNIAhZXUq\nR8tT+XvTcTyO9RRgsWUYCZzT1ky2zbgH170DsTqo8sHzPn65AqTeZI92No6ZzTNgchc4XFTy\nQHYvZegpDJI3O0EjHGM09gzEFTnnn3qJYwzgct2z2qxHFtbcvA9zSEWVKw4LKCzcYqZQzsNy\nBUB+6Kr7Szq28bu1WPO253sTnqaQy3G21uvzE9hSSW+5Dtf7x6ntUNurGQNuwo5z6ik3M067\nRmMnI5oA6PwbaxyX0ZYEqcpk+vrXu/w40r7R4s0qEchHDHPtzXkPgmxMbGeQ7P8AZP8AOvof\n4GaT9s157q4BCQxZT3bPWpKR70uNpBGeaRWG7moy2xsJz609mVz6GkMaeMkjiljO7gdqRSWP\nPShRtbI60APVtrA4/CpOGORwKjY7ee5pFJPegRIMYJxk0373SgMV5HIoB+Xjr6UgF6ZJ5NO8\nwbc1DzuzilJ3cEcUdBh5rbhjpThJknmm9FOBQwCbTjrSAsLjHX8KRnwQuKhVstnNKpJYmgCZ\nvlAwc0jZIyRzUase9Sbgy9c0hiqTjk00OVJOeaQ845p+3jcOlMB0bAsSaVpM8Co2bHbFNOSc\n/wAqBkyyEcYp5O5cYwagVtynnmmiXa2TQSTcjvyKdtO4MKhWTPPehZC3HakBYZhkc0pcBxgV\nB5gLAU5Wwx7+lAx7DLZ6Clb73Apmc44oO7BOMUD3FXarHI/Kn7gxJHQVGv3ck0hk2/SgCTf3\n6UrNls81Du6UvmNtNAE4k3DHakVgcjdUayAw7sc0z+HPSmIlMzRtjqKkU7l3D5agwrKMnmlB\n2nk8UhE6sFWn+Ye361W3BsGpHJFBRY875QGGR7UKx2kj171WEo2jNKzfN1zQImZ933gM0qSM\nuVPBNQKw3cjmn7jyT19adxk4k+UbWI9eaGbPG84x61WVssBjFOZWOR0AoAmWQhduQaY0ce3Y\n8Ubr6MoNRcjPFDZJGDzSFYVrOyZfms7Vj7xKf6VC2haVICJNLs3J5P7hf8KdnLHGSKkMh696\nAsZ83hHQ5n3vo1iecnMC0h8EeG5MCTQtPce9uv8AhV+K4ZshhinNPt5PSmNGV/wrvwox58Pa\nf/4Dr/hT1+HfhWPgeH9Pwe3kLWukhZ+vanly2e5oHYx4/AvhiHldAsAQevkirNv4T0OFyV0a\nxGfSBRVz7RtUDGT3pyzEvnoMUARJoulxt8mm2aH/AK4rVqOztUzttbZQOgWICo0wwJzzTvM2\n4A60ibEw2j7qIg9EXFQtK20nOTnrR9oLbh0qOOXcx4+WmOxMrBsHcR+NP8zg5Ofaq0edxOOK\nVW74pD5Swzjg7uB70zzcDrx9aiX0IyKVsYwOtAiwrBlAPLUjMOpqONcck4NIzfKc+tAD/M3Z\n54p6qOATVcLtPIqRe/egZIuVGT0pysccHFQtIeBjpT23be2KB2E46E1IpO0Cm8MoJGKaG+Y5\nP0pAOIzwGqZDhcHmolPy5280qq2AAfrTQmOzuUjFEQKnrxSj5M85NJ0WgomANHmFVx0NMVic\nc8U1c78t0osBMD8uCafuztP4VFGQehyBT1zu6d80h2FLBuTnil3A5wMLTRjJHfrTkbK/jQA9\nVCqM80u4JgnijcOhFI3zHpSYEhPy5HWkRv3ZHfNRq55BoaQ9OKAsPL9iKsLllB7+lQxqG78d\n6lBHUGkIkTCnJ607q2O9RCRe/JpysD8x60xkp44I5FJ1XaODSlvlHfNOyNvH3qRQ7J46YFOz\nxkcU1vlU54p8bqhAPNADuCp47cUixhcHNL03EDIx3pnmfL0wfSgaJFf5jnpTmy3Wo/v4x0p/\nsDk0AKrBWA6nFPRctTCgSNiPv4pYWIXkYOKAJWUK3FTJH8v86qKx2496kWZg54O3FICwv8XP\nFM84eXj+IUwHcmAcc0jKFYHrQA61mdt2R7ipBnzM5x60yPKAn16U4Dcc4pCJo1AbA/WnLlWI\nGajXduFKsh3betAD+Rz1qRSrZJFRlQR1p8eFXmgCQcx5oRioxjIprY+7njrRHIVByKQDx+8U\n84xT93Tv9aiBIGQM5pAzjAb1pWLJmYjPYe1IqhvvHIo6sQy8Uucdvl9aAD+HA60qkbs0bec5\n4p24NjtTFccsnTIpFLFjik3Lu6daBg5GcYouK5JjavP3qMcZJGfSo13cktml5zg9fWgYrZxn\nNPG4gZORTQpHXk0/cNvQ0hixqce1Lt3d6b83WpBhVzigBSxcc8AUqAqM80m045NPb7vtQALg\n9OKVdy571Hu+XinKGC5oAevze3rQfUCm7ivXqaGyMUgsO56ipApC/WmbiOMdadl+CRigBTwu\nWHNKrEgECmsd7ZzxS7sAA0AOwW5xgjpShT+JpqsKNx9cUyiRlIwMUb/lx2pN20Ak5pyYVjnn\nikyQKk84xSqy8ZGTTUkLfKOfrQFP4VRV9CVVG72prHbnjJpBweKUhtuBzmkLoKGBUHFKjYzT\ndu0gE5pfM6/LxQxjvMopm/2NFUAMpUZ604L/AA+vNJG5ZiMcUc+vQ1zDuKGHJNJgsBjn60jS\nDbjFN3noDVIBV+bOOlKwI75HamnhuDS7SqihiFUYbcaazZzjpSDO496UrzxQMTB3EihY22nc\naev6Uu0ke9Go0NBbr1oBVQec0DG0gnik2hfp70JEjSvzelSrg89aTHc0buOOPpTANvzYyKYW\nKx596DjqfzqPzMsFB4pa3GSq+V5pAoJ60rBccHNN52cLVAOjxuOTTWz3OeaTjaM0ik7uaVxD\npAA2RnpTVwy5PBpUbdTuDjjNIYhxuBzxTZPu5olXJx0FR7Wafk4QUxskZmZVJ6d6PwoPz8Ch\nm+cUdSRrN0AHNOPcUqfMSSeabngn3pgJ8vA796coG3rgUbQ3IpGxjGOKAA/Ou0HFJKoRcYyf\nWlXaeP5USDLADmkALjbnr6UKPlNLx34oC4pgN2jdk9qd0QsB+NLu25JximSSblC/w0AK2Qow\n1ORlVcA7vWk+6vAyKY2VfAX3zQA5ju+XFQxR/vG3CpfrTdpLEipAVlIUY5qHJZvSpHcoo56U\n1cBemT1oYhfu96RNpbGeaZy6kjgGpYlVVORzTDUQ/Lk/hTc7FwTTnPHH61GyFmx1FSA5WLc9\n6TyV3ByORQp2sQelIS2fY1Vxi+dhuRSl9q/N1NA28A0MN2M880AJgNznmo/MO7jnHapHxtYg\niooQV+bqaQiRjuJNNjUqck5p2wtznFNYESYJwKY0O3Bc+lVtUt0udFvomyyyRMGH9amGP4qf\ntjeOUN9woQR68UCPiDxhafZZ50Qc7yP1NeY3m6FpBjdzivafiZYrYazdRD/nqfwFeParCyyS\nlAQFbNWhHLao0ZiZdu5j39Kw2h4571t6iAjbfvbuSfSs2TEatjk1t8yGZ8kDRqWBGP1rNu5I\n8DK5PpWrdKPK/usfesW43+ZlgBimjJ7lOVMIdxxzmq7SCNmA+XI64q3Iu+QOTmopIyzFV5yO\nc1YijcLJuVcZB5+WovLKq5JwpGBmrEr/AGddxYsvTioJHM0PPA9KZJAVVIVLA7unvVaUBVLI\nSfRjU8+YlyTgEYqsNyKCWyncUAMldmCxluozUMiszFQM7eetPf8A1xJOcjA9qiUbZQW69N1V\n0AVGz8xXK+h71GGRVYooyf4akKlcrtyc53VDMjNGQF8vH8VSA1Lox/8ALHaPemyfvAcfMuc4\nz0prKdw2tuOOlOkjOFw3HpVCI5FWNl53H07iomMTBzgkmpyqlskZqGbMhXnDZweKYkV5Ebzl\nLPtBGaeMMhJ4HTd3pY8BWVgNytncfSmsVi+YfOWOcelAhdu5dinPvUM/zSDIOF4qVmQ7X6MT\njFPPzKACCp6iqGQsyyYwenGKgaRVYgLkZqZmXcQhwgqHYoYZ4HUmhAMkVtyqg2Dqx9qkj+8G\nH+rxkHtTo13HOfwNNaLDAq2YscrSGNVirMCOvJqMbVmAzgHnmnmTcTz2xQgEin5OMYAPWkxD\nHQyAmM/eOCDUTR7YyA2HB6CnoyxowYnI4oVg2CvJ9K0voNBuWXBxk4pu9o88ZBHNOjG7fn5R\niog2YxzjHBpCY6PEcZwMt1qNWO0gx8VMqxyRg7ih6ZxVdF2yESHIz260IY9dyqwYZNM8xZOB\n94ckmn8EknjsM+lJt3ZCjIx96kA2M7mGAW3ckn0pGjdVIABOeM+lN52jHY/SnCQRsCdxzQA3\nJReBx3oZt0gHQdP/AK9DMBnsfanMSy7sbuMD2qiSHkMRjIB9KWNjIShCnHJIqRv9Um08jGTS\nIC3mFlC57D0pjGRhY1ZyRimNIWUbenWpmhXYoVeo/L3pJLcllC4U4wT2pAIzboyOVUjmkXCK\no5H4daG3YwQGGcfWnbRJBuHBzjB9aAsNclOQMHPHvSNH8oB4YnJ/wpxdGXdzlBg/Wm7TJhyx\nXHzc0kJCPkq5IIb0FLGpUjgrx3oV/My275qTJZgSx47U0MGZGZcMCRxSSM2QBzzj3pvlqzja\nMMTnNPWPapLHLZo6kjtpjbC8tjoKauG4Ax3NJHIFZsZ309SGJAbbkUDSGSN825eARimtjasY\n/OpDGu3aDx3pscPykAbgaAGq2W5O1QetCrtY/L15zTpdrAYGD0xTfvYbJyOOaWoXFWRNoUjH\nPahpAFZSMnqaTcegGc03aFbdj5ulO4xZWRuFXIxTVDeWc8/zp7SEHaRu9wKauVywJBFINxuf\nmyR8uOtO5j7fMD93vihX+UlU3E+9NjLCQlhjPUUJMESK2xjvPUZBqHaSCzMMVJITzghl64qP\nbu5kbCnnAqhAIwsZYnI9qVVTABOfXNHCZCHK0jbehzS6gDMVkVAo2nowpG3Mxwfanc7fl554\nzSyKVkGGxnk4pgI2SwAyuBzimPIFOQN3r9ac0xXtvHekdVjA5z3oGHLKOdpPJoUN97O1egzT\nm2r94Yzzmo5mEj9cjHC0mA7aerPhv7tNyOR/d5OO9AjDnJ645FJt2tw2F9KBArKcBvlB5xTz\niT6CmqyMC5OWHAOKVWZUwy4LHrQA2X+EryOmO9MkURkA/MM0+bMY4HH86QqrY3Z2nmmAFVLA\nbuSeRTpG2kqOQPSkEgZfu7VBxmhR5ZAUncetIAVisYy3B5/Cmu22MAD3FKVA69M5xQ6Eck8N\n0FGoxW2DGR8+PzpjAhVEY6nJ3dqc2MhhyRxg0443AE8nmgQ2QruZup6HFD7NyhBnjOaR/vHj\nj2oDBJG/hG3HSmA5nyct2FHCqCwpVVlyH5XHWo5BmPJ+72oAcAWyhI67qeud2QcN6GmtsZFG\nNrf3qWRBGFJPJ70uoDDGfnHRjyTSFSNh2+mKWQK2cZB700J8uVkyemKYx/8AEy/dHUk+tM4E\ngBOD60qrtGHbcn96gKqyDcfoKQAy7gV6jPJNLzwVOQP4aRs4Ow4JOcU1m6ZX5c80wHhW5AG9\nW70hbzF+XKheMUBvmzknPpTJMbhgkc0AO3MuHPI6UKxKsDnPUZpdo8tiSSaWSQ7VzwuOGoEN\nLDAAAB/nSHH+7kZ/+tScyEZHTnFJHEu5cnpyBigocceUCOcHJpEbaMH75OfwojZm3AAZzyKG\nGxgTzxj3FAmH3pGFBZV2gZHP505lI+uOfWmGbdwBgjpmkTYc6h5WIPy4pu4KvPB6Y64pu7GC\nOD3akijK8j+L8qBjlQeaf3nC9QaQbQSwbex70vMbE43MTzTRGNp2t/8AWpgK6qMFhznrSnCs\nBtx9aa6MzBQwbinqDLIAzDIGDS1GHysxDDk9xUe8sPlwqqe9LIpeM7DkeoodPMxx1GAKNQH4\n3HjjdySaa8e7ac5we1JxkKV5HBNOkwrKoOBTEG4MHwMDpmhl2xjDZbHAoaSNl2sNvbHv60iZ\nUnd8y4xmkAN+6iDbuScbfWkVyrEDkjrRwMIclT0+tKv3jsG3aOfemA7cfugZY85FIJCu7cMH\nNIxPHlnG7nJp7ZBDH7/TFIBpbEhz93FCsnAHBb170/arZU8nGSTUW35enK8gimArybegPXGM\nU3d+8GB05IPpQ7O2MfMRzTBNyzBaQEhVV3FeN3OCaZgyAct8pzinvgyLkFgw6+lClo2chckD\nn6UDFVm8tgCoU8nNN2gL8nL46+tJtDRANwh5J/pSQt3UYUcA+1FwYLl13DHy/eHc0qqrAkHa\n3Ue1JHhWO5eDzmpFxu/2cUxDACvLNvHp60M21iVOFI4ojYAkY5PAoyIwRjIbtQBIrBl4OG4z\nTTIu4qQWxyMd6TiOTkbM4FPkkaH5ioIzj3NLqMFbzApRcEdRSCPMhZTtX9acuWjYYxzkDvSO\nq7uhXjpQSKrDcTnHYk9KihkG8kn5fSnzKOA6sFxwT3qNtxYEjan96jUZPuG3B4wc0xs9AeDz\nSR4wSV83jG4dKekSNg9GHQUwBY/Oyyvg45zUcZLRnB56U9c+YeNvtRu2545PYUARs2F2sMCl\nj3R4I43cCljjYqx6huMelPEe4qU+8vFACMQq7QmMcc9CfWo1b5gDnd0HvUoTcAzEK1DDcwUj\nBzndQKwjONoU/K2elRtIFIx8q5xmpcNtxkZzwaYRtUhxu7imhijhyx9PvelJlQoZvkUdfeiN\nztxnIPB9hSou2Mr/AAg/KKQxWZZlBB47VGwK8AEj0NPZgAcR846+lK+eMcH1pak3BmAAdsE9\nBSeaxblQccHFPmUQsrMOOPl60qOAGbHDUxEaxhmJzwfXtSRqWfr06e9P6R72+UZ4oDBfnBwa\nQDWX5iQCWpOkmdoKkVIsxRsNzu71HHGcuFPflu1MRLuXG7O1uh96WQ4QbOvqetJu8tc9QeM0\n2QLncG3bRwKXUaJF2nO35SOnuaaWPmMTtC+1JG4VQx4zyPrSopjG5iAcUyhWX5AWb93nAx1p\nxCKu4AB+lNAWSPdn7x7/AOFO3sq4CYPQHFSAzjoz5I6ketP3DdjHOenamZeNTuUHjNIGaQqw\nOB3zTAkGW3Ko2H1qJWLStjJUDqaeWKtuGAM43GomyvUgZPOKYEm7zocqCPYUnCMnG4DtUgG1\n8qcLik3KGUgZ9qAHM3mZONo9DTFfy1AxzmpGK7zjL8ZpxUMN5AHbbUgRBTlmJ+9QFO3YTzSs\nVjkKn7wHSkk+TJcFuei9qYCScyDDYA7CnrmN9wUMD69qJQNy7G4HrSAmQbS2D2pki4K5575N\nNZirYY5XGRQcM21OG7570LJ5ZAQ7ixxhutK4x0jKY1aMdTyewpysoUp1Ld8U0kRuQAQfTtUr\nunynI/rTGRKQzYHBUdaVvWM4bqfenMpVgq4x/epsissoY/L2ApAOV24JyCOal2s0RZjvLHII\n9KhWPdHlzg7uxqeTzBtwfl7KKBEEh3ZRSQM9RUmdp+Zg3GFAHOaGYMM/d5+7SEpINwBXmmMe\nI+mWw+OVokcxxqzMS+ecUvzs52gMuME01/lIGBjpQA6Qh2QqG6cijaF+QjHel8xll24+XHWm\nSMfMPU4x9KAE2oG387uwzUjZb5R0PNII23+Yw2J61K0Y4296BXImyF2s+PYVIsjNGoPJ705m\nGdxT2zTVJ4QN8+eKBD9si4KnNTeTtYEcjGSCai/3SXIPKjiljw5Yjcxx0HrUCJIRE0uVDIBz\ng1OZNzPtUA1By+QowRx+NS8Llzx249aCkK2zaMjDnr7Uv3mwFyB096YrDzADgtUjKW+dRgg8\nVSJuN/ebuE5BzUq7pEOCExz707DNhh07kUSeVu+U5DDr70ADLyQjbX/vHvUjI7qC5ztH3qVF\nEjIJOVznI7VZ2ofM3HCdgKTLFgdeBt7VYQZ+9z6VVb5CuB1FS7zGN4GR2oV0HUslgIQV+UZx\nipvJWSEtkk/3f61SjjHkqzOXGclRxVw/vJMIfkx1/pSYrEyxiHGyMscA5p/mMGGMHv8ASooV\nmIwSY1HQdanjjWaXfGShxyDUMZfVCItxAYd6fp9uNwBXOW6DtVdZtofn5BwMd60dHCz3UY3h\nQRzmlcZ3fhmIrbFThipPHtX0t8EowNFe529f3efpXzjoFqy5aPoTX1R8MdOOn+E7Qdpf3n50\ng9Ds4mG4kn5qFbLHI5pkeN/pSsWMhwOKCh6Z9aNxXjtSZp25T8p64oAGbceTxSJkN1pCp4p2\n0dT1oF1HZKtkdKazFAc9aRW+bGaVvmXrmgYbztB6kml+bcOaRcLgZxTlxu59aQDmYrSbt3Ha\nlJ+Y5HFMG71xTAcBtP1pxYqcAcU2Nv73Whm3AnpzipYEqk8cU3+92ozxw1IPUUAPXAHJx707\nIGNpyKjZc8t92l4XA7UxjmY9TzTQ+c+tOT5gacuO4oQCK21eRUbtntT3+YEA80EHAyKVgYDj\n6dqF3c4pPLO4elPX7p55oAEXPJ607d5bYxmk6KM0bt3FIQ/zcN0pWkIXPaotpU80FjtNA+g9\ngH6HFLjKkE9KapBHTmnFeMZpgNb73tinKo2c0xuVA7k0rNsYdxikMflQcgcd6RW3L7Uhxt4P\nFBH7sYoBj+M0nGTxkU3G0daduyoxwO9BIyPJ4PHpUpJ2j1pqjpzzSKxX5m6CgaHyErggZFJ1\n+71pGk4OOlCSBiOxoGPX5ieelLuYtjnFM5xnHFO3HdkH8KYDvM2t05p6sXUgnmoidy56GkVj\nkUi0SsxUDNI0gIzjmjO5ueaiJO8gdM0EkisOg/GgORn0pVwqscc0xvukngUwHrhxnGKOOMim\noQoAyTQWMjcDAFA0Ko3EnP0p67ljyOtRA7fzqRct1OKoQ5T8uPWhlPTtSKcKQB+NLGx7ipAR\nSFJwelS+YGXH61EyhhxwSaQdhSAkLHoelDsVUBRimnI6ninKd3B6UAPViMDFG4t1496RW3Ky\nn8KRVOzrnHamA9lK980v8PTmo23MwIGB3FP3DBHepAQNhdx6A0qSbmywqPkgjPFLuC4GDmmB\nNuGCCeadkY+XJNQtIUbJFO8w9KAJWyQBxQD0GeajVjuOaasgD5bgUDJuckE59qdx8uBg5pin\nDetSyYZQQealiH7vlPqKYuN/vSKp2mnH+HjmmA9uuKBjpmm7gpyTSrkSBscUFCqecCgr8vrR\ny0hNC4YkHigAt4Sc5O0GpgxDYJ+lQAhW5zgVLw2WJGaQDvWkZircGm+5pWXGGHJ9KYD1Zu/J\np+48etMTKMd3BIp3zct1pALt465NKF3MGxn2FIrYXJHJp6hozx1oAerBWxjg08KF6dfemqNz\nA+9ObPJ689e1AEZZc88CrEKl2qDjnI4qaGQKuc8UDJFAVWYHPtT42VsMVNRb/lBUZFSZAX5W\n5PWkPUfkMucUn3cepNNVQCCxp+4MPu49DTGhzMyrknFC5k57jjmkIMn3jT8Yw2eO9Sx2Gqpj\nPBNTq3y56NUTD5gc1Jv8w/KOQKBCiNmBYnBpVbGSOeKQScc96WNfXpTvoAKpz1p8edp5yKi3\nMFKnke9KqiBQQcg1IE6qxp6oRwaYrb+M4qRenXJHSgBzELgYpy7lxx8tV9zKTkZ5qdcEDBwP\negCTh+eRQuVXpzQvCEHmnBt+O3FOwCM24YApOdw2jP1p67VGaVdrDikOw5cdCeaX7rYJ4pGb\nMfA5ojztG5aAsO3eg49qdjc2fypgYcgcUkbngUASHKsc8g0LkqSOnpSBgxJJ4pFYqDjvSGSr\n2IP4Ug6ljQoyuO9EYK5zzTBDwCw9vekVfmx0p+7sKRc55NJgMTPXtUmSVxxmk3biTnApyqFJ\nLDtSF1F3lWGOak+7gHrUMcmW4GakbcwzjmgodwSBk5pVw3A9aazEMM05cjLAc0AIylX68+lS\nM2aYud+T1p4UK/XOfWgByrlfSlO7AFMVvnweae2Tz6UgEb5lUn7xo2NvyW4oRxgBgcinb+2O\nKAF5XnPFObDZPWm5A7ZFOU/NnHWgAC7lwOBS7iWxjil8vaud2KbuyeDxTAU/J2p+7C0L055p\nVIYGkGo3DSY9KevoabH+tOLBcjrTsANj2Bp6HimfewSMU8Y6dKAsHJbA6Uu7a2KOg6UxSOSa\nQ9iUMOp60bunoaYV+XPrQMKBmnYZJxRTdwopgO7HFMYccdaVXBHoe9DMF45rGw7WG/e+VeTS\nN8nzYzSqwj6U3ls/N+FLYBPM6cdaeOnJ5pJMDoM0AnuvHrQOwqnGSetHAxj8aaxyu7HFI2Qv\nI5p3AQ5x171M5DL17VBgtQCANpouImXaVyOhoYqQAetCqopjAMxGfpRqOwM/akVxtOTTQTJH\nwPrS/dOABRcBOGYjORSiNV5Iz6UiqVYgmn8FSM0XGHAbhcCkbcpwDmmq+TzwKWCTrnjHemIX\ndtGe9RtIfrUhYN2x71HJHyMHNFyQWYhjwFp3mEKTmmn73SkY9qTGP3A44oYjgYwKZ5igqpID\nGpN2/IxmmPoJ/FweKRPmJwOfWj7v0xSZZowAdv8AOkSOKlaGb5vagqxUc0cbT60AB6bV6U3L\nfdAyO9OB70M23p1NCGIq4zTWYqM9PpUmMp71HztweRTKHK24Zxn3pN5bGabvO4BfujrTm4I4\noJFXBJB6U4r/AAgUgIwaP4c0wGuxXikVmI5NNVXVcMc02Rjj5OR3pdQsSFtq+pNG7De1HGfX\nihlJTPRh2oAYwywPbNAbqBSt8x9CKRD7c+9Ahsmei05Qy43EdKY2d3FEilsY6D1pgKx3MRjF\nA+UYFRurNjn8qd5h2gkc1ICrls5GKY0jdKc0gbluBTS23qcimgHcsADwKRd+84Ix703zNxxz\nilXjqaAI2VmU9+aQPtIBo3Ffl6nNKeOepoAlZlDD1ppy+SeKjOGbNPjyVO7pTELk8CpY1DPG\nCeN2Kg3BgRnvU6NtXdjp6VLA+W/jlo4s/Fl4UJYN830rwi/U/aJifu46V9PftBWe3WorkKNs\n0PP4V81apGVmcngHmmnqI4rV41WPIGGByVrCmyrE45/u10OrNtkfK5WsCZxIRgHIrZEGVPna\n2VIYcms2aSTcC43Bhw1at7JtLBWO41myqXQp0JrRGb3Ksqqdo29DUckqtISDhRT3LSN5Snaw\nHWq14Dbrsxk9ashlGa48wucZbPWoJF3KHyN/enmVnWQheAKasYVUDDJYc+lAiOZFUH5t24dP\nSqr7vkRRuJ44q5dQiNfl5OePaq3mMGDJximBB5LMeudvWmNuWQnPXgCrEkw4C/e71X3blwyH\nHXNUBF5jKpx98HBNN2s0nzfdxklqeyljlsIM9KryL5isFyzA0WARN38HUHO72pJWPD44zUkw\n2oMcH0Heo1YhDE38RyD6UxDdsm3J4Vjmjl89hSsWDFdpYeoprlSo6hf5UCIdqKMsd386jaLc\n3ynJAzSyKWUFQNvrRAQ3QFQfWmFiCTcGDq2cdqN3ljgkselTsFXcD0x+dQ+QdqvnHOBTKI8+\nZGFb5MHg0Oy7h82D644qSSAiQg4wOfamLtOeynsaCRsbP5hLcn1oLYjIRTyaSO3O7LnknjBo\ndTHlVYnHXHSi4CNmOMbUwT2pNw2hjnIOMipJOG3HltuDScLFtjHzdaAZDNlhtXBzzz1pJHU7\nCFw49KlkUN98jdjnHam/6wKEG5+7UwGeYGxvPBPSkVQpZsbR05o2qckr8y87aVT5zZ/j6hTQ\nDEbMkZ7helNbEjKGHyfxHHJ9qV9y5GcOR0FN2/KuG2ovJFT1APu7lOFA+6PY1GWI4zz7U5WC\nZ3J5jHkGmMpK7iME9quwxeS244NJhm6HAz0pDtVSTlB3NLuCuCfpUiEJyTtTcehpq5VSB604\nR+Z8zHAz1U0MuzKoMdyfWmIYv+rKleWOaGVed5+YdCKlEg2gFck1G25mHHGcUXAXk4Y8KeKi\nx8hUtuGfyqRs4OBlQaRVHGV9zTuA14yuEClhnrmlb5VG0cA9DSqp8x2z8uOlIrqFHzZJ6n0p\nDBUVWIcEA80xlG1snPpUgVi+N2Qe1QN5gZguPoxoQEm7btOMqB6U1WcSfMcZPWjLtgAE47U+\nVQ7Ljle/tTYgVRI2AwUL1NMAPmBQ3ehoxGPl5OacVC9Rg/3qkBJQPOHzYOOcU5guFb73bPY0\nibWVmX7w5OfShYwyjnAPIHamAjkAkevAFHIUhG2MB3701VC7gQS3UGldvu5UF2FUAm7dKFK5\nx1pAu1nXqcZzSfMWY9DnrT1JEnHKkc0mAyRvlTHyHvS7tmP4mPAoyMupXJx1pAOFjH3h1NLQ\nBEk3bvlwvQmlUcdfl96Vm+UqV+UnrTD5aqAOeelBYv7tk25x9PWnR/6wIvzHHNMKk7lwP8KF\nYkjaccY3etPUQreqcc02EruJ2lzS4CqFHrzRvGGVjjnjFAhu0fMMZ7ijdnA6eue1O8wltwBY\njqcVEuWfecBqBEu3KlcqRnIp0cir94gL2quAVySfmPpT3x5YUoS1MdhVYFWwMik+XcEIxxkm\nnFVjwA3GOVFRsx5IGWxikMdJlmXIyMce9NLGOP8AeLk54pcB4hyc45xTjzsxz2oENjbbJyPo\nKGXJ5Tbmhs+djrSFDvbcfmAzjNADdpRTggr2pV+4DnOPWnNj5Tngjp71GjFmOF6cfjQIWRj3\n59vSkRZGkHGVHNOZRJGWzgjrmmjiPglvbuKBg0u4kbcgn7op7R/McMBkY+lN3cgocU0xgLjd\njJyaAF8sLGQfvdM0rFUAyctjApu52V0Xk9c04F+FKjpnNAgG9vv8H2pN0YmXcevGfSlEm5QS\nPalWJfOGQMmgY3cTIV6oOjUu75s9TTtow2OQDTVZgpIXBpgBctlhzIeAKaVk43LxnFOZtqxn\nnOcHFCs8h+UnaT92kAMuYyM5GcUY3R7X5wetEkaq2C2Oe3rSRsW3KT908k0CEkxuKg5J56Ur\nKZNqAALjNNY7nIDcgUjLuK7WwO9ADgTyGP0NNYL8vHX0okwG2HOaVYiIQd2GzimMXazN8h+X\nvuoyNpQjANI3Egz8xA6DvS5VlYY+b0pAKp3jC4+WlbEmG+6aiSNVOVBBpzE4HRxnpRqAySR0\nHK854xSFg2FLfN1PpT2kJ5Byf5e1EeF3bkBI9aNRCJIfM3fw9DxTFUb2dsjJ4p+8xkFlwG70\nqny2YMd+4Y/wpgNLhvmXqDyadIwbnHI5pG3JCMDpxjvURD7GzwM55oAcuGXd0bvmnNtTAx8v\nXdTRIiqfMOPRRQihlZAeMZ5pAJ80inHCmgZ2rjIGcUbiYBzhs9Pakm+ds5yAOAKNQJPusX6k\nelM3BSU45G7jrSrtjUblOcUhZvMf5MnoDRqKwKqAEg7SOmaGyo2tgs1EzBdqlcg8killAk8t\n1x6YzTGOWQKRwRjj603coZmk4P8ADT/mU9OnTNMkOYnJA3daQCR/d4PJ5JNKu1iecjHH1oUh\n1GF5xzimMzdAoVPUUAOkjbcpKktikXcse3jBPTqamVurFvlK4xUaqFAHfoKYAZFdRk8rxilE\nu3GRjd/DQWKtghSAOtNXbnOMnrk0hjeGztGOaVd0kilT2xk0p+bJUgCk5Zvl4WmAbiV3FckH\nn3pwXbGxLY39qTcVUgpkDmhtp2E5APpQAOrQZYH7q5ye/tR90cjczc8dfpUUmChByTmpVUsx\nYcEdKAGyMcgJ8o75pSskbswO4AfpTCH+8zc0PEzSZ3E5GDQIeJNv8ACkZANNklPkrxyTyBQi\n56kgDpmlY5f5fvdSPWgodtMi7SBxzTA27OVJC9BSMpR9zFuaVmAU/KRk9KA0F3AtuZQOOOel\nIhKq3B3dfwoaMnk8KOgp5LlQ33u3FAhqsHZXPQUu4bi2c/WlZfLXkfUVHuTYHHEROKQiX51j\nwG4POaVZiV2OMlTnOKaO+Dhad8wUE80wGyMZOHywHRe9NkVGXGduKGkePLYCmkb94y8kBqAB\nlK7VU4XGeKXzhxt6j1p4zuxnjoKimxwR93OKAJGKswL/ADE9SKVm+UqmN45Appx2U8Uku+Re\noDDk464oABIVY8/Mw5GOlOkYeSNjlWzRvZvLAO059KcY2O87xjPQ0AIAqKN5LjtTshRu3ZI7\nUit13MpC85psT8GUc9//AK1IY5pFZcjrTBlZFUn5hz9RT9rMQQmcjhRTVjb755NNCDyPMcsD\ngHkCjkFegUjIxSY3ccjBzQ74kwMg9Rx2p9BXZKrAttbkHvUUjfedDnBzil3BWOR8xHWkkCbQ\nFHP86nULD2B2jdxu5AqIS4kOTkDgLU7RhsEtjK4A9DUbKpVGx1HWmJiJ5rOQzfKegpZM42sQ\nT6gUq/M24nGOhpGYqwVmyuc0h2AKWVFJ/GlXO0A/KD1xR8snIG3BzSmMMGJPbimMViNp2k7R\nwV9aGZI2yAdxGNuP1pjEpjaAVwM5qWGR5GCtwDwTigBvmAbSV9qGRTHtfj5sigPuTABwp6Y6\nUrZbkkbMfrQAq5ZS2ec85obayswkwccK1R7WVDtbJ64p7MCcgAN6UCsHLKv8DY5BoMZZRgYA\npCp8vaWx3oYFVCjIc9fTFIYsfzZ/rTV2NhcFR39acrDYR93B+9SbhIenfrQA6N0LNlSQo6Z5\npW+ZVBHLHI9hRNnooyG447U1sBhGrdvvUagPZ9rZz8/QU1mZVweWHJxQoVpQzHhe3rQoCyNJ\nuyOy0WAGUMdzHDEZpdvBO7k0SfMCCuOOtISFVD1PQrTELtRVLhskCnKyqyswycZyOlRSRtEv\nCjOaXcyKAOAe1AiVAvzODj3NNjA8zcRkH+dPSFPJLucDOcUwDcwIU7eooKHNGFkADFucmn+W\nm/cwG0+lNkOIyVGB3p0UZWNQWDE89KAGr8uWUHdnqPSk8wbmwNyn+KlDfKyAFCSDg0lxmPh8\nc4+7UsY5VDRgHI7fWnqpbIfOR6GnZVdvcVD8ys4ILLnk0EkjYKnaPmz0ojbqm0LjrmkXC7ex\nJzih22Luznn7tUNDo/vHc3FOC7JCCc8ZFNyi4wvDc0sfzKdpBBOMelSwuDM20F859qWRWfam\n4kUir8uMEnOBml8s4LRsflPJzVCBmJUq64x0FShiIwep7U3y/MZ2LYX1PejaI9uCXb0NAWFx\nthKvzznil2kYYEZpokRWJyQ3QKPWnxo0wDMBuzjNAxwA7H8BTt/lxs7fe7betMEh3EgblHBG\nKlhl2LtA+gIqWIVWEbMygjcMnNK7FlDP17ChvMVn3ENhe1CkeSuRhzTQkL5QR4yWyGP3ql3v\nnyyNp9qi8s+Xuzt54p4O4hSOg+9npSKJlV1XrsTvTo1jTCl8AnPSqwVhGGz8277p6Yqx5LMG\nZsFuvHpQSTtmSMqp4zx71Iq7m2EE8c4qvHCY1R165+7U0k0jKQDhgeoplFiKNWbOeAMHNPjY\nbQGPAOMZqO3kj8tvlYsetWo44I4yijqeM+tJhYOPMMeBjrjtViGUK3lg7XI4bHaoVB3BSMNn\nG6plI3KW5HdsdKgZJEzbBGMnacgmraqeeMEgZPpUUcm6YMuOeB6VKoYSmVvpgdKBEitFj5fu\n5ya1NNhJbKLt3HJ+lZKp03DhjXSaDujwGG8ZwKQzvtAtpFWHy2+Z+AK+vfDtm1rodhC42ssK\n8Dp0r5Z8B6TLqWr2EAJyzqSoGcDNfWcKtDCkRYMEAUH6VIAMKxJp+4nBHeo9u3OeQaXaVQEd\nKQ+pIx9DzS7Sw6YPrUYYZB4yakjY4J/nQMbuI65Jp6yH0qME7ie1P5PTpQAqkFsmnnCnOOKi\nUAZ9aBIcDdzQwFLBugp8bEkbuopD90AcUvmDv19qGgF3HueppNvBNRv8rc5qQZXnPFDAcrDq\nRT1w3bimcHOKEYocY4qQBht4FKOnXHtS7lyfXtTGU5zTGTxkkZbkUjOoU5H0pnO0YP1pOWPI\nximA+POBTucZHSoi5RenNSbgVGTj6UCFVl3biaezZjyDmoRhm9qVsgcfShgSKdyg9KPlViKj\nBP3TxRuBNSBKrdyM07j71Rbv0p+5XUdqYxJG3N1p3mZXpUSjcxx2oV9me9IB27aelDZbvUfz\nM2M4HWnCT5uBwKQDt2R64oD/AJ0kfQnFKfpVAPH3cmiNt/TilCjq35Uir97HSkMU/ewRxTgu\n5SDxSNnAoxuzzQAu3Gccmgj5cn8qYpaNs9Ril3FsEjigkG+WkxliOhpzMvfmkblCRQMUZZeG\nwKXhevWolyOe1LuHXqKBkn8PSncMvWo0bLZzxUoZSCcUCBW2dTQDu5BxzTGZWUmowx3Z7UDL\nDBgOvWkVtpGTn2pIs55ORTpVXj1oGO+8DkYpGJUKOgpFzgjGaYzblHXrQA9vSnqhzndkUnYH\nqaFI6DrTAlVvm29KRTwcE1GzZ4HWnxnpkYoAQttH3c+lH3h1waSR8fSl4VQeuaQDipYHLc0o\nySMimIOc5pWk/eBckCgBd2Se1PRgGyeKhwytnGVqVsNgEcUdQHbjRuEbHdzmm7h25pfl3HPP\npTAMgRn60bzuz2pM7V6cUmTQBIFD80g9DR2AFOx3ApDFjbqCKRVDNhjS5xim7iWyRxQIc2S3\nFOUnaCetCgleRinrhV9aQWHK2R1xQ4ZgWzTBjJ9aVT8uQfwoHYcq7kJ9Oad5hGGOcUIu3Oem\nKbu3cDpQMkVv4s9elJ97p96kPygUKM896AFRuCDUuDjOOKZHtbIP3qlViABRYBd3HAoYkJkY\nzRv3cbcChSVA4zTGKJC6jIy1OGVXk4poUnn7tKc9+aTAcsg3YZfypBMM9fmp27dHwORTMDO6\npAsLIrLkNzThIeF/hPNRBtj5xxjmmMwboKYFgAtnJGKa3+r29Kbxzjjil3b8DrmiwWLC/dVR\n6VKqA9Oo61BCxDbvTtU0UgdWB4JOaCh+3rSsDx2pFcrhSM+9A5Y56UAKuexp5ZtvPIpqthSo\nAIpVk4xjHNT1AXlRyM+lKjuMnAHFN3NuJp7KzrwcGmAkbZGCeak3dBTEXGCR9aerCTnoaTAc\ncM2c+1BK7sHpQgEec807aC+7HFIYiyr0UGp1IY5B7dKr7gNw5FOhY4yKBFpc5yTn2p27rxxU\nCyArnPPpT45NzYPSgCwrK65B/Cj5eoqONcBhjGakVhsxjmmMVsOuelPT5V4qEbtxBHFOjbbn\nsaQEkascnqBTo3LKcnioo2dQRninqNq4xnPrSFceQu3JPNJ0YdqTIZsdSKOM4JoAkZVzuOMU\nEjAOcUjYXA6g0/ClfagYJk9BgU5OMkn8KanpnjGaTcDhgOKCiSMjd6fWnOd3yqce9NyNwJp6\ntu5GDTAjZDu5FPGW4Pek3EjJB605cspOeKVgHKAvpxQJT6cUiruGe9DqSQQPrSAdkSZOcUo+\n8MnihlxjPSk27VINIRLtC8npTyBkcVHHIuMHmlX5uQeKBjv4iRSsx4569aRTupkinrnigCVi\nN/FKzrnrxTFUNz7UoRWj54NAEm7C8DilRtzcGkUKmB94U8YDZ6CgYu0t1FIF6kDBppba2Kdx\n6kCgQnJP86RmaNcquTnvTowNpPc0/O4AGmUNjlJXcVxT87eaZuAbpTjj8DRYBwbeue1Iy7nz\nnimsxUYA4pVYbuhNMB24+vFHVuBSjGABR5nzDHSiwDue/Sm+Yofaeadu3g01owuGHLUWESbF\n9aKbtPpRRYY7aG+bGFpud248YFO9D2pNu3I9awGhGwy01flbOOKXO2kU/wB7oe1BQ7gKQRk0\nH7oAo3HdjHFIzbWyTxQJjt3btUL7vMDA8dxUqON3NNf5mJAqkT0EZu4GM0isN446Ush+UUir\nhsnpU63KHKW3e1NZe4PNHzckdKAflGRzT6iF+9jjFRq20nAyfSpSCJMKc0yRfLPQk0WC4feY\nH9aVvl6cimx4XjHWk3BWxnikMVjwOMCkDbmxj5e9K4LMB2pGBVWweRz9aaGL0bk4U9KNoDda\nTcWVSvfmlUkEjrTJYfeXjpQvpjmmlmPA6UpJAJPBoEN+zxNOJHHzKOKmX7pNRK/zZ208L78d\n6BkPlksMnjPapNwjyCKXgnpj0qPcVznnNAD1Y9e1LtUDg4PpSx9scCmg7idwyc0gHcqoFJuX\ng9aGyeM4pq8AU7iFZwrAdA1IxCH1NK2d2egpm44OeaCiRVVVzjJNGR3pqtjgdaRsbgSaYiVs\n7TgU2NtowTRuI4OaZtyST1pAS7guTjg1XkwoyBT2ZuOOKMliTikBHDL8wJ4pzMWmpDGM9aZI\nwL4HT1FMRLu2tzTi6gYJHNNyPlFIxRfrTGL/AA5FMbLLjORTFkEmccCnqdo65zSBibgOKj8z\nlgFyKldQzA9KauFPTJpdRDGy2BmlIPcA0P8AM2c7TTWkyo2nnNUBHKx+6OKVWI7HNJK22XBG\nSaViVAYDPqKmwC7hyOhpPljOT8xpQyyHpg0kgx+XWjYA8wZ+UYBpk2UPDcGkDCOPLZzmldl4\n4yKL2AWHG3nrU259pC9KgDYYdql3E7ccDOae5J5N8f8ASwdHsrvJxkoTivlHXt0VxIpHQ8V9\nkfGjSzeeD3ZScxSAjnjmvjvxLEzXcuWIPQfhR1EcJru9m2LwTXOSb0ZgemenrXU6ssnnDI4X\nkGubupVZn3LnP6Vt1Ie5n3FuW3H7vHFZ7Q7CAX3OevtV+Z9iBcnHYVn3UJVW25V271SM2UWj\naNnyfmJxmqtxGZBsZ/m9atM5iYK3Ldy1Q/IzFs8k4qyDLmXaCpfBpVUSADfhulWp44po5Afl\ncVRaE7lI+YgZOKYuolwwRtudxFV1t3mLNv2jHyr71auMkAMPmPSmQ/JIMN34+tUMpQnapU8S\ndDmmuw8wAnI6cVPcQlpXbIz3FVWj8tGw+cc5oAbdLuYccA1GV2Atn7xxUshj8kEuQp6+5qvJ\n8qHnIHIpiBlHmbs8AYqGRSz5jPHpT2G1VJbDEfdpFVo48nhz2qrB0GI2xSwbjvTgV3Fi24Y7\nCm9A/wAu844x0qMNiMk4VgcUxEc58vlR5gP8NN8vaMj7pHI9KmMY6l+emBUUygP8q8L2zRqM\njwGRsLwP4utNYs0KjkEHrShgkgUttVucikkT99w+Gx36UagPkYcKFxn1qLdt3KRnt0pu6RlJ\ncZfON1Bk6eo4NPUWoqKGIB4I55oT5iwVgFzzSDMqkqfxNMjQruHAJFFxj2mG0HgjOAaY1wuC\nu3BzgnFR4CkENx3FPIfb5inoe9AgVVjbaBz/AHqiLHnnAz271KpkzhiGY8ims27leOxFADZG\nGUOdnOSaRsNIWXJye1PkQcEkMSO1RqroR3J447UANUBd5HDfrSYIZC3zKeopDiPdx83fNG5t\nqsBxnFIQ1U8tjjJUnrR5jljg8AY5qQMVGAc5PSmOhEnz/dPHFMq4zcdihVy2c5PNKFLTHd8o\n64PrTuMHHAWmPINwYZx60BuKGCsVI4602Rtozn5DyPX6VKytLHzgDqD3qFgeB1Oc+1MQ0yBl\n5yppu47Quc7jjNLuEkxUNlhz0pWw3Xaee1DAPMMbgFfkx+dIsqsu8KQOlLJJtWNQOR60bW5C\n8AnNNDGyZkjDJ8uaav7zgn5RwcDvT5CZExuwvpTUV1ZQT9KkXUn2/N87LuUZB9qiZUuCrpwM\n8nFAZzvLANnj3oVPlCnKDqKpDGqn+kMI3w1ICGcrnacc+lOwI+SNpx19aYzHYABuz932pCHj\nG4MvPHHvUS45LnGTyDT8rvyOqjBX1pFUyOc9BztNIBVZY2y3RuKVt3lkKRkGmvuwPl3DrTVb\nfK2eOOMdzTESKnXa+4MOpGKbJjg4PAwDSLhsZbLdyfX0oG5iQTtoATb5jEKcHH4U5SGQlT90\nc00fNkoeh596ZyshGQN38NMY4qWXchznrSsPM2gDDqOT6Co1AVdme9OBCHjgdCfWgQAqchm+\nbtRxuHrTNvmDaDg+tHmeZtB+XBxmkVqSM3OCcHuaY/yqoV+M5p0m0yEA5HejcrDbjBFAxGx5\npweD1zURXZJn8qfIvz7XGVxmhdvfrjFMkYsm+R8Eg08SblEbr83YihmdQmF3E8cf1pOVbb6n\nk0hjiuMkCjc23JO4HjHpQp25Dc0kiEMOccVQrDWjFuGK5fimx5ZVP3W6805ZGjUkc+ooXPOT\ngt3pAKsu1js257jtQhK72JHTselM2lc/Lu9aBFlTt6UwG/Lw2W2tTlKsSoXcTUkkeVjLcLjG\nBUa4jbIGFz1oEK3yyKu3IHel2N854UHoc85pu5sYXgnkg05RjJXrSAazbo9u3Pdj9KCF3CTl\ncipOWYHG0jr70j5nU5+4PSkAxmMaDCqQTgGkeMBhnmnqS+0EDb2pnzRsUAyxOfrTGOXG44GB\nim5CKpJ4P508yenRuPoaZJGOCevTH9aQCfdIO3AzxStncGIxzwKWRWk2jOB/DSKpZiBw2OtO\n4Bt2yN2Dc0jZU5DAr3okfzMDuKFVFfb0U9aADa2RgZ5BpXY+YQqMAOd1LtDbG3ZXFOXfsIz1\nPagQzcvlh/U0D1Y8Z6U2PG4nqAelPxvkBHy880wEmVYye+BkYqNkCKCPlZuc0jt5nzkE09lC\noMjdQMGywXecnsaaVRV37yRnGKTcWJ5wuKXAkUDGFxjdQIcN0QIOMNyDTWYjBXDewFKo4y3z\nBeAfWmwyLtL8rzigB7bdoDNtNM3bd2zqBngU843YYcdc01VdgzYEZ/umpGR+Wy4ZhtON3FOV\ntzZKnaRnNLGQ2d2RxgZpWG/bzx3xVCGNInlqCST2WlVZJPuspFKjbtzBcKppq42lgcIxpAOX\nd5nJXf6A5/OmzbipjLDJPpTo9kOTt+YigSbMNgenNMCNVwoyoO2lkkMeOMFjjiho9y5zgN0o\nXllXGfrQMZs7D5QpyaUt97aPvc/QUrJlfmOTmj7OrOD91R+tIAyVUkP8pxSsx5KnJzz7Ug5k\nJVcR9qSbOxuzYzj+97UAHl5jYhsY5xTh93nr1GBTIcPGWKkFlxt/pU3lttB2/N3Wi4hp2sTt\nJLY701Y9qBmyRnH40HrkLg0qyFeV+YdwaYAzKkm4nYV44pNzL0GR1K0PskB3Keec0+MDaGPU\njGakBxdN2G6g00qqk5fGajK7nOW+lNkjYhAOTmqGOXGCeoPGaftK4BP4ULG3IJ4z92kZTzn/\nAPXQIa5HmD5T7YpxIViMENRhowMndGeQB1FKpaRSF+ZOpb+lADVY5OTnvt9qezFApXapY8Zp\nir8hbeOuBRJnpneaAGlXZiX5cHIp6yklnxjcfu0ixhslm5xT5PlZZFGFxgrQA1kG4s+CKRd2\ndxOFHJ+lDKvTdnPbuKUNlQrfL2Ge9IAk+ZsfwHke9RxlSAwOWzjbUzSYHK5ZeMim7o87lHGP\nvUxjXjLLlxwG4GaJMhiRx9aEjVpMGTnGadt+bBO/Hei4hrMcgj5hjmiN8sCOgOcUsQIY8YHU\n0jbnYqvfpxjilqA2QkycHJY8c9qUR7TjG5f60v2dSoyOFOfypsnMuWcEN6UalEknLK33e2aS\nTc0ZfIznFPwPL56jp7VHuBXA+cH+KjUkdt3Fg5z8uaj3MsYBzz3FLs2rgHPPWgcqVzz2oAVk\nfjaOF5PPJqNWaRTjuenpTw4YuRkY4JpikqzMOMjFMByqYMhvmB7inFV3KCMP1JFJGX8sI3pk\nZqSFirDfnJ4PFACLL6DnPFCqr7mL4YevSkUyb1kwBHu28UkkO7fhj8rUAHlqDtU/OfxFObYz\neWykgDnb3p20rIrLgsRzSMzKnBGT/EKQxq5bb5bmLHHNOyy5x+dDjzIwWP1xTUjI+fPy0MBY\nxuc5445btTVlZ23MNig8UbXVeCCSaWNj/qxyvfimIR9qqCVy2c5pzNuyWGG7CmeXuDEs3XhT\n7Uy3XcxZjx7Hmglk8MhC8tu9c9aSR38skfKn92kbG/GfmAzTnxHgs27cOFoGJD80eCCVH4U2\nZiuVI3k0uUjVGUne4yFNP4ZvvAHHApajGrI/k8YAHG3v9aRWJUlBuXv9aF3fxLhuh56ULGI8\nnnYTgDPemAeYfJwQC2aUtvwFbaBz70khG4LnB75ps8as24ZXAzkUASoxXjJLY60smG2k4H0H\nWky/ljpvb+VK8hjwQ3z9h6UAJuDSZbPlDvSxgSSNg7lPIFJuIUZOcnJoXzI5M4GD1x6UugxA\np247Lmnsr7o2XoeDTWzGpAHyk8c9KNr7k2tnnH0oEEilWKZyaWF9uQwyT/FRGSm4kbieM0KD\ngrjvmhgPRfLUMDlielEmMgkcZ+7ScKhLcAGjaWGM8nlc+lMaG7mTKgDOc/hToynRu/INJuDS\nIWUqq8E0pk+82Mxg0iRrYz6nOcVKw83HBUjnIprbVlLD+IVA6Fl+dmjJPajUZO25SA3yjHJp\nvMjBPLKtnrTUk5IY5PQd6l2tGuGbIzkMKYCrGd3lsGK5yTSiXMjBDwozSl3VZC2dq9DTIXTc\n0mdh6E+tAEgBbJ6exoWTy2BYfTjimw43Pu9cgU0B5mKkZApAHmFmdxt4PHrTrfdJkjlT1zzS\nIvDb0G3+6KIWCrhAy85xmgdyw20BFZwW7cd6iWQ7ih65yadMN2GyCuc7aRlMnAz83SkAbioP\nRyOR9KSX5VBIzkcGk2KqjCbWztJzQoZZBGTkZ4FNCHQv8ynaSvQVIsPloxU4b0NC4kyR8qLw\nfY05sOvyyMwB60hW1GLn7OGJO/0HahWCwcAkFqXc5YZAC+g601k2sAp29zmncZOubjcq9ByV\nob5dpZdzYrSs7MNmRBkdCwpNWsCYfMi+UYpklBJPvhU9+nNJH+7VhzjrvpsUjYRQfmPBqbpl\nM8elIBsRdDtXGCM7qcFeVR8wXn71N2uSMAEUpYc4O0A8UgHbWkRlDEYPX1qS3gAUk5yelO3D\ncpIO8DJpsc4mBI5OcZ9aYDnUh1G76CnxxsobLYPbNJuZsBwD/SpfvLggH0oGNjYbVXOQTyKk\n+b5lQ7VBz74pI9jKCvDZ/OiRmkwqnD55+lIdtCx5ina4fdjnFSNsViSgXdzUaw/Ng/KByCKl\njcySFowHI/vUAPgZVIUqST7VZjTzcsemeKrIxVvM3ZZjylTEeZIVVcBRkmgZajUNtxyVOTmp\nCokk3K21RxioIRtDSK2RnGKnULGyluPX8aliJQrRrkrg+1WI92zYoyxHNMj28hTxUwkPy7xn\n36VAEu4LGFI3HGB7Gui0jdC0APDnFZdjHGzhfbOD0rf05nF3CiR79xFFxnu3wN0trzXjPtKi\n2hJDdsn/AD+le/biHxz715P8CtNEMN5dfdBATryPUeleqq+5iBzg0mBMxwuO9OVtzbScVD5m\nTTuOT3qChwX95nsKkALDrioDkAe9OVtrAE5pgWPbrSZ7Z5pik5+UcU/b82aQCmPPA60jf3T+\ndKPvGkaRc7e9UgF2llGOKE+XIxzSCTbjgkUu7dzUgOXGMdTQp8xumKQ/dGOKftJ9qYCj34NN\n3FutBw3WkZumDigBGJV84xTvMLdqTcGB7mhVPUmmAvzb/bvTySwznpTOWJOcU5MYI70mMGYS\nDk4xS7htHrTeB70fxA00BJGdueMUjZZeuKRs9zTdx3YA4pMdhZJPM29iKVWzSBQaMbT09qQh\n6yBmNKHHaosFOMdaGG3HfmgB6yFZOlLHg5BFRM/yk4p0bE44oAfnnbTv4s5wKRQRk01cnPHS\nqsBKqktjNLu7EVG0h4IFPVvX9aQ2KzbnpRJ82OwpCRzjrTYx1oJJGbcwweKGO0YHemkenBpv\nOeaRQ/OF6+1IzfKRRwyZI5pOe4oFYTAwGzT/AL+AOKbGQYyMc5p6sOnegYN8vGOKQ9KkPKim\nyMuKCSJlOcjgU/cYwMcjvTTylKAcAdqooVZAzAUsmPXBpNoXBo2hmPrR1GPjyCCelPWQeYQR\nn0pp+Qc0qNhuBTAdu3dDg0Mo28nmoyu1s+tLtzgk1ID9wXjPahM5JUc0wChZNvy5xQA4bc56\n1LnuDxTGdduOhpq+/FDAmXDKcjNG4rwPu4qFZfwp7SbsYHPSkAq/eBp0nzYx1qNlbzOOlSrh\nV9TQAMRtAFLG3zDnI7g03ny8nihceZkdMUAP3LyBxQv3c9qZuCtT05UDtQFhWPy8HjNNHzMT\nTW4bBU4pVx1H3aAJAdrDNO56g0xsquT0pd21QMUAPwWOO9Ge1JyvfrSMfmHHFIok3N36Cl9C\nPxpEZeSQaVV3NwcCmA9znBxiiIg5PSmEnd1z7UoZVU5HWgB6tnqacuMAjpTG2tgD5R3o3fNh\nRkUmMVo2YZzT1xt2ngmmbjuPpinL82OO1AE8e1QCRzS7uhNRI2G2nOKc33g3UelAh+47iM80\n5SQQx7VAsn7zPX3qVlO3huKYxwJ69QTmnhWZuDkVCMjgc09XxjBwaAF+ZSO+TzShCxx709FP\nWgqy8+tSIGxx6U1l2txxS7iygY5p3llsEnNAxFzsOafGvQjtQE5BY8elSK3UDg1RQ5csSRxT\n4/lGKYjY+8MGpOQvPWkSSLwvApFkODnrTd5A6Um4MuAealgSpIVXpzTtwwM9c0xlZQB0zT0j\nA5Y5NDLsP29hThH5bDB4PWmo3UsCB60u/uck0eoh6f6w85ApjY5xQihs+pprL5aYzg0rDHKz\nbl4z2qRmYMRjAHempJgDjHvS9Oh69qQxWfcvTmhWCsB0pu4qCMZFO+8wPTAoEP3HcMDFTAEM\ncdaiiAkPJqfcucrz2oGPDlhycYp4YZ3CqwZlU5xmnK25QM80CLGSWyaXeepGajjbywed9OZm\nI4GB3pASs2U3AU5W2qDnJqJTtUdue1PZcEEEfSmAsTYycYNSL8ynA+brUbMSpOOfSljkPJxt\nNIQ9mLLkjPtSQy5JBGKFYjr1p33Xy3SkUO4A9+lKvygZ6Uxv73ajaTyOlUBMSNuaFG0DB60h\nYccfhTshTjHFIBzfdJ3UjHagA5zUbD25o2s564pgTwndyDSljyDxk01ZAvG2lOGapZI9gGUZ\nFIy8HFJuLcULllPqKChVjOAR1qSP1HSoo8tweT7U+RjHgnp6CqAlBHWk25OetMVvm5NOXjJ7\nVIhOVI9KkZhjgY4pnCjBpY/m69KBj9x8tRjmn/ex2FNRtzc9B0pwLenFFiugvVunFKrBVz1p\nI8ZJxk04tsUBsU+ggUDliaX7vU5+lNXCk5GRSou1iakoPvN2Apc8c80vHQijjcMUw6CMd2Bn\nGOafGMKSeM00qM4NJuIHy8mqEh20454pV54xSFuhPWnN82KQDk+Xr0o+VuO4oH+rxSKp9hSG\nHlGil/4FRQAm75Fy3NKrY46/Wowo5NPXBrK2oxWFKy5UelNfK9OaX+Hk80FC8Z460113Dkc0\nuQv+9TV37c5NIQi/KMEZNGdvJ5pzNwPWo2bb2+tFw6DvbHvQ+OgOaPvLkHmmou7gcEetVcBx\nkAUDoKMrxg5+tMZj6DHelOM5FIY5geo4NJuPQ8ml3naBTAw8zOc0ybA52ngUuFPUUqspJ5oU\nFlIFIAbPPPFIpyo+XmhlzzmlH3QM4poY1lKcg03JVM4/xqRhg+1V95eTOfl6UMRJGu0HJzRg\nlcmlkynA5pFb5enWhAHtmhm8tQetAXrmm7G5PakIft+XrzTGUkfSlCkketKzbc56UikJCS68\nVKBtqNc7crxRv2r6mgliyMC2CeRQ67mGOP6UjYGGxk0H731FO4xJmCxnnPvTMsuO/wBKewG0\nLTWYKp9aaH0FXs54NIzDk9RTCWZdxOR6UNiNfXimIcCWjxu5xmlCnaMnrRv3Rj1PFDfdC55p\nAwYDAAoK9lOKFbml2jdnNIYIBzk0nkhUwB3oZMdOtKpbbkmkIacMRxUcgG7calyRnjioW64J\n4oEN3DB2jg0+PCrhuDSJtVeRkml2qWGadwFkcbMY5qMT4+7TuQxxzSr905AoASaTKKc0ke11\nweCOlNkwoUCmvJt5pAOc7ZPWmyfN04NIG+cEHNSN/e7UARbivBXB9aA2VODTpHEnXoBUYO3H\ncGhgNaZsHjjvSnCqueKOGY5p4ClenApgRMxaTIGanjYnqMVGq/NuHSpVG/HekBz/AI6sn1Dw\nrexL83y7j+FfFPirdDfPuHAB/nX3fqy79Hvlxg+Sw/SviPxxZsbuRyMAHBWqRLPLdam3crkZ\nrmL6GUKGI27jzXUalGY9zn67a5m8ma4AUjG08VrEyZl3UL4RsZHbFVZVljVmYhnzwta9vDvl\nji9+aq65bKyl4nxglcVoiGY1xH58fJ+frg9qoZMeTjcVPNWjE7KCW5B7VFNGvmZDbe5zVEkE\n8ZfBPyA9arNCFbIfAB5NSzEh2UNuDDgGqz42+S+c9d1UIZI2ZRleP7xpkmw8KuG3ZxSvEzRk\nnJHr3plzKMBUb5tvfrTArtlt56c0jYjXy9ocGnt+7CY+dSfmaoGLrvwvOePpQBBNGm1U6KDn\nmoWV2JyNq54qz5mYzJIR6YqF5CzAgk47UwI2bu43MOlRiMls7vcJUjBpgcYx1FMXjkDGetPq\nSR8fNlii9TiopvlZwWHl5zuq15aKvJznp9aimhYAhlz3z70xldT+7Iwc+tI0aGMNli1TxkSW\n5IGG+7u9DTNpY7v7owfeqCxFj93kAYHODTJNkg3kcHBBqUKGkCHgsKgZZImKY/d5xjNAhnPG\nTgZzmlPzbQnL5yaau9FfJwmeAacZBuDBsNjHvRcOg19z8AgZ44qJF+ZlMnK8E4qV17L271HG\nxXdgZyMEikPoKqgOwAwc9f60kjgL8zZGefelUIynn5T+lNUIQdoyPU0xCttRgVYDHNEm3aWx\n8xOcUYXILAMD2FPLKxPlHK4+9igCFpN/Mce1e4qMMdpBbHcEVK33Qu4At3qOaJVIReo70CGs\nC+0gjOfvGkZdjEFiWJ6U/wAsbl5wOn40gYKrbjvZfXvTKGOvTOcZztFGSoIByCcrTvmjIJON\nw4HpUbSlVJ6nsKCRpmMcgyuQ1DsAxj3YXqadtSbbkgP703jzCMZHQ0DHNGXkBXLKAMYNHluu\n4HvztoK7GAVvkPA9ajyV243E+tAhcDbgfu2HekbpuYKB6d6czZUg80hUlR2470DFO1cDG7Iz\nzTI1+YjcSTyM0u7DDvx3oCvuzgZ/uk80wItqgnApQu5VIOcc06Rd0gwcr6e9DfvFzIu1hwCK\nQCR/MrELgmnblX5HGTj1pJGZV44pY/uhmGTS1AYM5BxkdOfShsqxKjHvTmVtxwcAjpTGYKvP\nHbNAhVyy8c8ZNI0zPgrwtI2QylRk98dKcnyq424B6UAhnzZdSSS3Q0iOIxlRjHB+tP6RDPzG\nmlWWFmcAlev0poqwBdykjoefxpGYbl3g56Um8gZHTtSsw6AEketUIay8n+EqeMU+T+E7c+9D\nONvI54pGVvlGQec/SlYQzdtJVhwxpNw2kp846dMc0km91KEd6crN/GvbqtIY2PLDIOcHmpGA\nb5dpz3xT1ZeAMLxUQZuQAWIPVaQCMdqhQNuSBk0kn7tju5HTIpzNvhLHDEHG3uKJsjBIyvpT\nAY7mQp27UpUiQjoRSt8zdMcdabuOwNkk5pgKrHIYZHPIpXUKhbGCTxSl23YJ2rTY1DZYtlKG\nIfny1yw38VGwDfPuz2p7bNv3ju9KYTHJGqg7CDk0hiN2x3pqj5drNk5qQBW6Mev6UyVm2kgb\nlPHuKYC/xbgNw6UKdsbevUikV9hiAQrxnFPJxg5Dbjn/AOtTEBG2FN3Jzmk3BSW8wL6LUmAr\nMHGD0FV2UKNzrkrQMeqbtp3YTvR8u47TxScSKMnHfAoj/eZAHyilcTFbezqSMqBgYph3hW8t\nce9KuQMLwvP1pw+ZFG7bg8elLcBCvmYJ4YDmlyGCs2Rg56elMcHcxB4HU0qxvxvXcp5Ug8Ub\nAOmZWbzCuBTSQzZxtOOM01s5O4koPWm7grDK59WPamMdwqndnAGfekCjYuHKhqUOCrZ+6RjN\nDqCrY+ZV4FAhd3zLlOBxkfzoOHbaw75BpzEqc43JjFR7pG4OOOgoGKcbgQMIvakEu3coYhia\nXy96h3cIem096Zt+bOMY9e9MCThQpDd+aRmyzZUt9KVmSRs7TgDpilWTauQDkcmgBkCl8opC\nHoN1Iq+XuV8kg9fWjeXzKOCKCG8tAx+ZuTS6gK8n7sJjbzk/Sk3DaQO44xS7R5wb+HGM0zy1\nWQYPU4zQArtvcMrbdq4x705juXoCSOaTy8bmDDjgik3DAPU56UAKxO5SvQcUpXc33wSOcelH\nynOSMKM7aTaMyKRtbbkYpjQjNvjKtyOtKwO0NEcAfw00NsUcZ4/WnHnCjqeD6CgTGtvCgxjC\n9TQyeYoDDbz+dSOdrGNW+XbjHrUMkimRMjDY2jJxSAc2Y2Hy5HSjaI3JJyO2e1IzFR1bOduK\nbIh2ncN23rQIWSVVUN+lAmPlhsc5xj0ppKcFvvD5h9Kcqsx3kARt1+lMQ11wzfNg5496kkj8\nsjc2XPpTZF2qXxxnAJqNs4BJD7ec96BkkfJwWwfc0sm5l6hgOKi3Ky7mGG61J96MHGf5UDG7\nVKqPujcMipPOKSSKMlc4D1GWB4I2j0oXLKW9D0pCFEYDhc5Ocj3prMoZyAd2ego+ZclhjvTo\nyMDHybuuaWowErKWQ/MtKFZVAzwajj/eO4XoPWhThlUgls0xDt26QA/KB3PSlbdu5YFP1oYL\nvKkcZ6UFUU4YfL2agY1mbcFXI7ipWAzkfMo6/WmqzIeDn8OlG4eWxB5/u+tAxiqPLOTwTmn7\nTzhgPeo4lkZvm4THSnLGOADx3pgLCQ8nl4yP881M9u1uBlNu4ZDHoabCBHMJFHCHIqxdagZr\nUBhks3GKAKUeFmUhvqfSpI3EiuM/LnpUZVGwOVbrQrD+7hf79IQm3aT0IPf2p0bYBLfNxwKM\neWdpAYNwMUgUxrhicrx8tFxCxtyCR7EUu0W6kyAGM8DHY0m0EZY8dcg01l3AddvNACso+Uld\npHFG77yLyaSM9lOcc4pyyEcgfNmmMbGrRtvVc9sURzF2Lf3etLJIfl2DbJnmnH7pAxg+lLUB\nVIZSO7HIFRLt2sm3nOdwqQoGZe2B1WoxlQ68EHnNMEOaTYuSNrY/OlhbbGF/vHGaYzLJCNvN\nSKm3B3D1C0DEJGACu4L6UxV+Y4+vI6UrSAruC/d6ilG+NfM6E/yoEMjY7/lXKk5PpTmkjDkq\nOPShW+Yt0HYURQ+VIc/MGGQPWgRJ5ZbBXjvzTCDtIz8x/SjcGhxggL1FIrbsKxwew70APViF\nKn7vTPbNJ9w4Y4TGC1N3lPlI3c9B609kXbnkt6+lNABz95flwKTb+7AJ2gcg+tR7j8wAwAO9\nSIQwRt3uPapC4ikNwCdpHOfX0oKkxrnOMZIFK0Zb+JSSd2BxSYdZACG96YxNo8tefvDOKF/d\nxnBzz07ihcbSrA8dKX94eGAUenegTI0UFzl9p6804yoxUKcHPJxxSsy7Cu3L/wB6nFQ2EwEH\ncZ60iRWVTJvB4oeQKAw5b0pm3y1GGBGfurTwFduSBgZwelMoRV3DDqD3B/pTeFkB2bSOlDNu\nXLcE9Kk4DjLbmx3oENXG7I+6Tz7U3aBuP8OcDPY+tAjOCSu5SeMevanDzflPlkgcNzQNif8A\nTN1DN14p+AsYBPfnIpkZXacZ+U/e9KFZi3H3WPO6l1AVgGXrtz0pfLDck5OMYp0ZBLA/w9ae\nHHBAG80xakaKY9wbABHGaarZB5ztpx/i3cEHvQisytgAD+9SGCkN/PnpSiMyYCtjbz9RSlsB\nVxiomTbIcFtp9qYDi2AEUfNnjdTlZ41JkI54FAHmR9ww6A0bdxU9e9IByuVxu2nPUGkVTtYn\nkZyCO1O8xTIQFGW65pi7/MJJ2J0yKYD9wbBVvl9KZG58wh2Cp7048yKxHH5U1v3hOVGAaAH7\nVK/3l7GjcSRgZKjGe9Kqqd6ICF9aFIG1j/DxxQARNgmPbt4y3FIrBV2kEx9adlmQs3y7jikk\nx8qg7l6YoGEYeRmyeDzz0xTtq7j0KBqQnauEyV6HimrEgzyGB5PPSgRIsm2Tuxpd3yuqqdze\nlR+YNp6KMdKkVt0eVOXxxQMZs2YcMemKk8sTKuTUW8c9SepHXmnA4yc8Z4ANSMcVPmYxzjFO\nXcuVD8ik3fxucDoKWRS+Cpx2NAtxGXOwsMnPT+tSbVUMWOSOh9Kj2mFsk5ftilaN1kO3GOh5\n4qgF3NGwKruVhjOOKd0TI6nnHtStISpQPgdMU1GVclhlPSkA9ZM89UxjJ4pWbzM9FXGKYH6L\n1HUU6N9udwyzccdqRJa0/UpLMmIruRuBTr6+kuFIU7VAwQKqDK9ee2ajb5tu44qg0JVyyq+M\nYp4ZwmQuSTUMSnOUJ2Z6E1I2Q2FbLMPwpMB6kqOBubPODUxm+dCq5Oar7Tb4BByafGrPIONo\n6fjTEJJnzm+U7CealiZGJ35GOmB+VMVnbd8/AOKk3huSO3FAIlGMttPJ5PvS4McQcfMxOMUk\nMjsuHwrdjjpUsn3wNmzjJ+vrUjQMqlUXGOMsR0/OnwJld4+VQep5pEC+WAx3ADtVn7QjWu2N\nNrYxvpDGLK+4DqCeafIxjcALkMcfLTVXZGqMoD9SM9aWFfmYn5ODwaALkS+XGuzBccml8wtI\nxAyGFQx/uY0VTg9C3erDIu4bTkYpgOjtsPw+Yzzj3qxu+Xbt6etRsWjjHQ45zUqyeaxUqR6U\ngLMLIYQ5POcbRV22QyRbQMg/xGqEcPlsuTgg8itK3ZRMpDbecgAVDQGhp9nG8w+bocfUV2Gh\nWoa4EoG0KQAQM4rk7QSq8bBQvOK7jw6rNG6bsF+AakZ9L/CjTHsfDIdxteY7ufTrmuyjbaxU\ncCsvwrbi08OadEOixDJ9eK1Ahxx1pDJlI27R19aeqnbz1FQxt/DnmpFYsWBpDJFy3fNJ5eZO\nKaF2YbtUgwVyKQuoiuQ2BwKlVgIzk/NUGzc2Q3FITtw3UCgZbU7VG4EGomUM3SkSUydeKepP\ncc9jTF5iLJvBBBApoYZ9RTmbcxzxik3ALgCgZKsg6GnNOF+b9Kqkq2Wz8woViME8g+tIXUtL\nIGHOM1HzuyelJ90Zx9KQOSpyOKYyVT1wKcuJMHPFMHEYIpYW3KQBtxzQMe2DzTduWBqPcVbl\ns96VWLMe1DAdt29Kc/IC9KRe3NHLZOKBCqfMYgHmkyyKRRGuDkdacxJGe9IfQFzt460pY4Hr\nTGbbjikyWkAFAh+7nuTSMw49M0M2GI/OkXHcdaAHOB2OaBnaO1CkDIxSHO456U0A/cWG3PIo\nViwJHFMU/NTkkB7YPaqAk3DAHf3p+R0PBqJl75p33lBpFCgbevWhvvYzjHNEjBWxnmm+YOc8\nHtSEOXIIJ6U7cOp5FRbvMXGelJvPYcUAtCbzhuHHFNLc9cmot/bFKDznFDESxNjmhW3cYwaZ\n93nNG7PIODSKSJFyjHNPLL93GWNQtMCMdaXd8w7UBYeq7epzRuK9BxTtqsRzzTN2G9qYxfMG\nBkUbfm3KaTaW5oVdv1psBwYsST0p6v7VHyv0pysCvFIB0hJWmIxY47UZLkikjIT5T19aQD9u\n007ywzcDPvUPLE46U+OQr/WmAshC8Yyadu3U2POCcZNP2heaChGVV6mkSTd04IprfMR/Wl2g\n9OKRJMjEruJp3Bz2qKNxtIzzTyfMTPSgLD1+YYJ4HrSkKOF6VHgYGTgUD5RgUDHFuDThlY+D\nk0oZFwM0NxyOlMA3ttHeo1Y8KelNa4jtsGRsVKF3YbOQeRigQ5vmIHahSWY9x0p20cE8YpqK\ndxPQGkA6Mlc9x70/cdvtTdo24zQvQhqAHrxSr1Oc00nPC0oyo5NBY44RQc807adoYfN7VFtP\nU9KkVtvsaBCMCzdMGnLujbnqaTf601ZC7bW4PrSGSxghGJNKueACMmmsnYNmnog4/vCgRJGu\nxSWbmnBjtFRBSsbE8k0+HPljcaABdvIxxUm3AxmmMwXnGaXeflbGc0xjm/d4I+93ojYdTyaA\nWwCVxT/lwSB83ekAqtu68elOZhgLmkwP8aVSqtnFFhjjiMZB3UqBhjHIpo+ZiTwBUibu33TR\nYQDrnr7UvzNhh64pRhVIJ5qQMNvA4x+tCDUaWYsccnpUsOUUBvm9aYrbYVBGHJpeWY44FNgL\nLIobuQeKcuenem9hxxUhBLAg1A9heWUsOccYpySKpGSPrTcbCSDwaTavTFMolebJ29RUiqXG\n5cY9Kj2/uQFHNPSM/wAJ5oBjhnduAwtO4YHPJpMttK44HNIzGRiFXGOKQhYwrLz1xT1Xcozx\nTGj+Qdc96OWYYPHcUhjlbB29aa+dwOOaXB30jKZHODSEIm/zTt6Gp4wynBPFNhXbG27kjmnL\nhe+QfSgocFWZdxy2OKljVV4IwRSQ/KhYDj0qRdsmCTxQIFAjjJ6805f3nJ4FHC/SkZc/LyaA\nJfLXduPAxTY8LnHrStH8oJP4U4D5RxSGOU7SSaVSDyTSLkk4PajdhcEc0XGSM2fQUfMy+tMb\nAXpzSJvwO1AiVZNxIxSrhF5b8qSNsk05V6gkYNIA570rbggI5pF+VTnrTVk24B5zQxjt3zg5\nPPWpAvfPFMXDYJGDTt25SoHNUJjsnHSnBtrDP5U1GbaCRgdKVl6BvXNIAwS3A2mpFOcmmSHn\n605M9+KYxYyM4HWnMpbimbep70oyFz1NJiHDG3GOaVNyr0psatv96fyx2nrSYByeCuB2oVCO\nCeKfnHGKVV3Lz1oAVR6UvIXvmhRtz60wM3PXNIZMrFSorHuNKvJrwSLOdm7hela6ueDjmm+e\nFkCk8HmmIWCKSFSsj7mp+3ceTxSbg2R2pxzt5XA7GgsdtO3NCrznpTVkO4DkinbwDyvFUAu3\nL57UiqygkUvUYJxSq2QQOgpAOI+UHGTRyuM96aGHU0hO8e3tTAl+7wKVe+elM3BeKd/DzSAX\nCUUzaaKLADMNuCeRQrYUBuOaI1+8T0pqlMHJyc8ViUkOkYsTjpSp933pRjGelDcAUIBdu5+e\nlIsgXOTjtSbiT6Cmtg8sPyoGOZgrA4GaB3J6U1lx0OTQuNpz1pCFC71JHWmDA4PU0LIfTFN3\n7uO9AgwQcDpTjhcUzJbp07mnnb0zmqQxGxjg5qHaU6GrG0LjAprQ7m4oDUbtH3unFJHIWzzU\nnljucimMu3OBg0wHLHtQNuyKRiGXOabtbHoKXjbgCgbBm3R4xRFCu33oXpimbWVs44qepI8Z\n53c0eYM9MYpMljjNHKqBnmmO4m/dJxyKeSd2MimSKVwB3oA745pWEJIwVl55FJ5m5SGGKbxJ\nn+9mnMnmcZwKACOT5sZ4oYHt0zSxrt/hx705funmiwMcOQM9KCwVc96YPm4PymkjYOpJPFAB\nkBsE5Jo8stznAFJuAYYGfelfO3gZpiI9hwwB5p3RACOaeq7eT0IprfewaYyOTKqVI59qRWx1\nOTipOOe9RbMncKQdRd3XJ60LJt6jNPMILU1vTv7VIXHDc3JxRGN6nPaolYq3J5p2TzzxTAc2\nRznC0jAMN3Sh2DREEdKbzJgk8UBYTkrgdaesY6k81Hko27qO1ARm+YmgCZFwcjmnMqhSc5pG\nby4eOTVfJVd2fwoEOkj2qM1BLGS3ByKmb5sFs7arswXIGcZqdR9CVWAXA+9S7WWPBqFWypPc\ndKc07KoBG6mSOO3nnjvSZTaQKRWB9vWmMeoBpDFVNqnn5qMHAUnGaTHy570u1mOcimkAisI+\nGbirMEgXdg5OKryfNJgjiljlCZwOelBJNMrXEEkYbbvQgtjOK+OfiZZmz1K+hB3KshGMdfev\nsiFwq+u7grXyz8arP7D4gvR1LNjp09KYHzxqkJaQjoBXPahbrG3yjcT+ldRq25g/dulcwZD5\npDAk9K2RJTjjEKSTZ7YBrNupDNDtZfl61q6nmK3EKjGeTWXL8sJ5yMYq0QzAZTG3y8q1VGk3\nEkjJBwBVuabY3lDjH8VV5JArA43Z9KszZWkjfcSwz7Gq2zzGYdB2z61cx5jMW5XspqHy9uRw\nB2poXQjSN5FG4YXpUE9r+9yQGUdqfcNJxy2xuKRX/dYzwODjrVCIPl2uWUkH07VUy4YnqO1W\nbhtoAQkq3Wq0io3+rc5PrQIrSHYu4jIz0pqzbWGU5qxcA+XjsPSqchKyDdz2phqPLCOYkEYP\nYdqikV9pcEAk9Ke6/dDfu2J4btTGYK3JJGccdzTFqRtlowAQOfvUjyM6kByy+vvSyKJOFO0g\n0xiIyADx3IplEe028mwnhv5+tOfOEAOWPBoYmXY+7B6gVGzSKwO3IHRhTERMuYwwLNg/e6U6\nSP72PmI55p4kKPmT7pGGxUX8MrBiSen0pB0BW3b8gMMcDNRx24TcGB3E8ccAU+RA0KmIgEcn\n601pnVQhbkDlqGIaVVUYZyfaolUrCDvw2elPjceZvI46U2Zt0nGD3pjG8CQhewzSqyHBZuMc\nrT5I0+9naP72Kj2bWBwDmqYribo1YZUn0FTfP5eV+U+uOlQqqtuDDB7GhlVsbmI9/wClILjW\nZmw+zcM496dtMOQTljzxTWlO5vTHFOULkY5JHP1oGNkYrGoXnnkd6ZtXzQrDg05S5y3C4O3m\nmSOWb5cEDv60CCNfm5+YqchfalaSMsTEuSx5yKZyM88/rR8yr1y54zTAZsSHGfmbNPMZCnYw\nx6d6FVeCfXBzTeIsbcsCck0CG/e7fdHFHOBwcAc4o+9Ido98Usqh9p+6KAI48qx9Md6ey9Ce\nVp+1WUt+GKjk+ZVYdR0FBXQThsLjcSeDScLISfvkYGe1OMijdvz83p2pgHy4wD3FFhCPlZFw\nQOOmKUq8hAPTrR5hfnHen8MCAxXPX0osAzmP/bpXYbcq2G7rTRu2/KenGajXIkJaPOeMigCV\nj825eXPam8qrCRN/fAoVmgJ29x19KFZvLwXG7qSaBDVkCuNoOzFPEZCkK27NRo3ltuz8jdu9\nSMTHHgcAnikA1lCsCOQOxNNkY5dscNxikYfMQQDt7j1okViFIyRmgYisfLKgYGep7U9vmjBV\ns461FHlGZmJIp7R/NlecjJpiGrhmG5WA7sPSpGI3IEywI+9QxLL8uR3ppRoVT58Amn1Ac37x\ngQcKODmofL5Lg45xg1KcHcCuTngjvSSdN34YpMBjRnavB69TS7SseFJ3buxpfnYANwp6U1WH\nIVgxU5LU7DEXPlrISDk4oPyquW3KeelObO1CRnnrSmRjux/qgMZxSBkUce1GZm70u0bSxOP5\n0rYEanH40FgVBK5oAYxbp19DSdmwc8crT1wWbH69qZ94Y/h7mgQsa5X/AGj3NLF8rcgEDqaR\nflwMb17e3tQITGv3hk8kUDBDvkck/SifauxeSrdSO1OmO3aVXHFMQsMc/KeKAHONpjCtu2/y\npZI02Ko5IOaRSxZieig/U02NgwHy4UjnNMQeZ5asGUyOeQfaljX7p3bvY0nRAo5X1HakEpHL\nDpxn1oKHY2yHjg0wx+SQcENTm+ZlBPA+Y0pY7sswbIzzTAR28xgxPGMYWkKlUIB3KOtKzJtI\nG4NTuFkJA+Veo9aCWNaRWAABBPoKTEe4OWbg/d9aVS23IGCxwAe1NdvLG4AllP4UAC4m6gqA\nc05mLIQ2B/tU2QNuDr0Iy1MX5OWywbotICXCtjaeOhNNUld2R1JpqrnORt45FHlt2+Y9s0xi\n4H3em49qaGBJXHI6570jL/Dswwp6x7o96+mOaAGKhf5n5C9KUSF5eG4Ixg0pxGoDnnFN2tHJ\n9RxmkGw5YwWIYnI5OKTemCVPU460DcG2nlsdRQxyiqAB6nFAMG2thDkHPT1pJm8zAxnBx+FO\nfbJHvY8L370LGFUMD8hGRQJCKyfMp+VcZBpvKqhA6+tSKF2NuIxTG3Hjfnb0GKCgZNoy4x9O\n9OLNIFBTYOp+go2yLKCpDAj8qjLPu65GenpTEO4kVnX5R2XvS7Rw+cue1H3Q2D1NM3LG/APN\nAEu1SxzwmORTPMULn044p8ZdMgcrimKoVsEcUhB0bOMjH44pkf7yQKBuHJG4f1qRWKy7c5XH\nWo9nzAAkZPTNMQKwbKhCGHX0zT1+Zyc54o5VQykYPBWhtqknkcYAFAxrfvlHTrz9KkbCoQpz\nnpxUTRnaoUgHOTToQmWD8SZ6ZoAjaNSCWLfKM89M0Lh1xtOM9cU/aUhJXkE8560ruwU5GQTn\n2FBSGyIGUtwvYe1CLtjwxyMfnTc7mO7nuDTnwdu7AGOtBIilSqjOCOQKFB+YOwP8WRSfKQN4\nJXPWhP8AXFWPyt3/AKUAL8srDJ4I4pPM2naVLdt1LsXBI4GelKGWRAFbHrxSGIGKswQ44+73\n+tJACuSHG89PpRuEaeYThumfUUu3au/GW7AUagxnmHaNxyc7c09owflzjHUevvSN8q7dvynk\n0uzbjupHrTED7mXrjtxQsZXOPTGDSsCkqKrZXqalY8lgc0uoxq/eUH5QOOaYQrbuvJxxSsrN\nwTgHkGmHeWGG2nPIFMRJ5bIpG4McdqbHGyMqkAHru7UZKKzFcnNHLLg8LSGO2nsfr61Bjy2K\nqeM8Cp2YKpC9TxUTblVX2ZwcH1NMOorbdqybSGzinSR7d5Jxz2o3IzFQCp6kelMYBWLnLbjz\nSAcNxj6DA6VG7FgMcEnFWOWYdlx0qJmBbGMBTRcB+cjAABH8VNLeaSwOwjj8aQ/KNgG9jzTo\nzuj3EggdaYCbSuPm3seDTN/8JXHapVA2nPBPRqYvzqfXoT70uohFYHBXhuv4VI2w8kEA9aar\neWPu8+tAYhgJAcd/ajUCR4UZvlOB7VHjG44wfWnhVTIzke1NfEcYx0Y0xjPu5Kjg8Zp6fN95\ns5GMGm75FYqR2zQWKhWyME9utImwpjVSgQZ5xzTsb/lVgCp6Gmbgz5QMRnrnigQ/vGYr82M8\nGjUYSfdYhsjqVFJlVcOV3sR+VN+6wHTuad8x3dADwMdaYCoUM2NxKsOtBjMbbN3y5zUaqI1w\nWYAeg61IqljznnocUAHmZXDDv+NM8z+MDCg4205mK4J+YrxmkZVdieSh/nSEOZt/zjhvTFP2\ntu2liSRzimru2n/nmv60u0x7vm+8Mj2pjEYfu2PDFeKUHy0UFsyNz+FRsu1EAbr2x1p21mZc\nckjn2oENBydzDgHoKc7HuAzHt6UtvCzFsEE9eaJEbeG4B6fWkKwqxnIUEBuuQOKYZPMlIbAO\nOgp3IDbvlGaRfvZ2A543UyhV2SY/jHZhxTPL5yBhuuTTtuScHkdcUgXdyxxigkkDBmC7tp60\nwbnY7mJU/wAOaVs8ZI2n9KGUFcZKtnigd2RtCqqQGO4mnYU7s8Y7HrUiokjMB91eDzTljj5c\n8rjaFPWgZGqiRgdp3/XrTmfHLHHONo609thjAwQB1NRt5e3HQdd1AhSDGxLHeMdKUhWUc8kc\nqKDJ5bKcblPQ0kahZCVB5PJNAxI8DA5Jz3p0kjtmPICg9qBIMsvRs9RRD5mWO9Se1AxwYbvM\nbsMLjuaOFRxnDZzTkj8xWV22g+2Kasfz4zyP4fWl1Aao2gtu2nrg0Km1SXbknIzTl+Zm3j5h\n69qQBkUFsPk8A+lMBXxksVPPQUm4Kq9iTyKGkJOAfnPQelOf5CFK/vCM57UhgsYSOXD/AHjn\nk0m4SKRH8uDg+tHP8QxnjmnqdpK5ADdWApkgzD5lGTxxmjyywU7BtK8fWmlduVyXQ9+9G4cA\nMdoPSgOo4KZSM5DL15prAJuXHB5pyuG8wheDwKF2nGePegoUMUQYUMW4qVY3baMfL39aib93\n14zSxs8fIbFIQ3HLYG3B70tudxAZRx/EKSeYqRzuY/wjvSR4OCD8vfFJgSLtXOPn54U05QWV\ns/e61GzFl447YFO2lcLn5aoQ6Ngu7J3PjAFOt8t8jDBzn602MFX2qBn1pJFbzCGPLccUCJN2\n2RsjnPORRHhVJ25J7CgLs4OTgYwfSkRt3CvtPpUlC7NzB8hVUdKVVw4YHqKFiyp4460vLbWj\nwCOgNBIo/dt5Tc56Ed6cZEk/h2kcGol3KpZvm5zgURuq53/NuPSmIeioEYkkDPGKdHhmIPyn\n+HFGf3f3dq571Jtjlyx49BRuWSxwnru3gfxHtSnLcE7T/eFQru2Eq+T7Gnq3nNlm2gCkSxwV\nlym3lhy1JDDJGfpRJH5rK2WAA7GrMszFVXOQB94CgLDVzHKG2knqD2pZGzJnp60iMSoLH5fW\nneUrKxzx2Ip9BkkeI1JC7v8AaqaFWWN3OBnoKbb7Tb+W3FSq43Im7IHbHFIBsSqFy3zetSeX\n5mBH8x9KRo1+cJ94nOKmgO7hSA4HNIm4b0Vl3fKRxntU6r5kaSL681X8tnXaMbM5/GrEe7y2\nA5YHjFMB7hvmyfkHarMMnmJuJCt7iooNwmSVkOFPzA+tWncTsr9GY5wBxTLQ6LLSbiTkdsVf\n07EjYYM3OQcVCiv8wYZX1q9Fb7pIyhIZRkms29Q6m7ZyYkVWTI7ZrvPDMImvLdEThmC9PWuG\ntXaa6iKKd2B2r1n4c2bSeI7JNu7a4PT9fpUDPovToxHYxRqflRQo/IVbXcTxUWxY1x3/AJ1K\njnZjOKBIVXCsCVp6tliV6e9Rs+9h7dcU/wC7g96llEzbthA+tGcxgYxUQJVutTK27HOKLACr\nt6ihdrMFoDfvMZpGT94GFOwiQptYFjx6CpI33fhURG8k89KXcFVfegQ7KyEjPNM2soIHzUjK\nd5ZeKl5HAGKQ9SsJvLOAufWpV2smOlDR7kYjrTFXC4bipAkjUkYJ3elP9qbGojxg81Kvy8kZ\nzQMFUFWz6VGo2qCDg07cVJGetK3zL06UykG0btzdTTZM9VNOZSyg9aTy+uDQFhqE7sipA5+p\n9BTBGRxT+SMqcGgQ5W2njmnMSwwBio1+U5NKzkHIPNAC7WZR60bSWwOtKWYewoXO7OeKYDeV\nyCOadjaPWl55z0pisWXNOwWFjlUMM9TSySAnimEKzZ43YpuOmaQiTggg9cUyKTaTuHQUYO7i\nlK5PzDiqAk5+91Bp3OQ3SmhwF9qEbJ5NABndzjJoXGfmGRT/ALp9qYvQ5OKTHsK2FUnPNIGP\nBxikVT0605vmXHSpEH3s56mkB2nBPFG05605QoLZ6igB3K9qbwvufelVmYikK7WzigYqrjqK\nfweQelRiTd3pW59qQxyg7iRTmJxjHNNUhVHP5UMTgEZqhj0bt3pGPPFNDDcD371IcHpQIVXy\nppm07cd6N3IBGM0m794c8DtSAfyo6Uz+Pmnh8qQDkUzpk0FCjKjOcCl4UZIzmmqysvzcGnE7\n9pHSmGw9PUHaO9O3DbnqSaiYHHI4pUYY9hQGoPJnoM4pxJkAI4xUavjt3qZV+brxSEI2OCBg\n1KucYpvO0YHeljkyc5oGO3ZXBGKaMk880bSGYkZNNV+c0AKxDKcjBqTzPlxUO7dzjigENkt0\noGV9StftMJBbae2Kk0mabyzFMvKjhvWptoZeB0qX5QpI+9TES7txFKHPTHHao0Gfxpwyyk5p\niFjG7OeCKBliaauWGacv7tSc5qRofz5eR1pAhZcmhcL3zTMsrYzwe1AyWNscMeKVcEZNR55w\nRzUiEMuTwKAEzz6ipY8M2etRrgU+P5egpMB/HYVIi7SCe9Q7ju6Yp4l9aEBJ/Dgmmo37wDHF\nIMbs05vueho6gP8AMB4PWnMTtAFR7RsU45py9eRimUiUKdvX607aqLvHJ70wthiQfl70Bu4H\nBpiJFkJ6d+1KwPU8U2Nf4gKepVlLE8UrjQ4Rhl6/SnxqyDb96oyw2gFalXcsgK1IAoXoecUu\n4q2cYpBjJ9Kc+5uRzQAqfMpJ5NO+4uQcn0qNJD93binhirZI4oGSoPTBpZlbaCBimZA6HFSt\nIDjJ60mAkfI2mnrhRg9e9RrIFkGDxUn3mbJ6igaCOTDEE/LUsf3sg4qv5Y3DJ49qsRj5WOea\nQMeuFzuORSKfLkGOMmm5HpTgeQWIAFAIkUbpMk4FKqjceKjb5TnqtSKwbBPWkSIyndxxSblj\nYcVNGo3ZzTdqMx44oGJuLYCjqaeq88U6MjAzTmXI+U4oGR5cKV6U9MbFABxR5nmAdeKfu4Ao\nKBVJbOeB2p6juDzTV4bNOVcMT2oJZKzZHFNDD8aBj1oAAkHNLqMeo3dKXaxbgc0iLtzx3p+z\noelMQjbiQAPrTlf5WP4UBSvQ5zSSD5cA4NSxjlUKvPGad04pow0eCckUsjcrgcY60hi7T3PF\nEoXK+tI2VwRyKBnimLqKc96ep+X3phXey84x1p653YxR5jHKzk4I4pS25v8Aa70Ele+KdwWH\nuKoBrZVlzwM0/wC+TzgZpNvOTTh15oEHGeuBSxsrbsNk0MO4INJtxk4x9KlgSRseGY4qRhtf\ncBzUWDxvIHHSgu24YNICRSVzk5o8z5sgYo2/NjFGFyM0wJNoakaRRjj60bttNaPdGSMUDsSK\nwOSo7Ypqj5gCvNIv7uPFCs1NgP6++OtP3dFA701T8vp60L26ikBLnYpXvml+9jvTN3PPT1pQ\nxL9gKooVsF+n404MvbrSN8vvTSpUZA60AP4OOxpTiNcYpgBDZJzT1w4yTz6VIBkntS7lZvQC\nh2OOaaqkcEUASeZ9KKj/AAoqgHcKu3vSbAuOOaZI4ZwOQPapPvd6wGBbpSsh6E0i43ZPNIrB\nsgH86BiHfGpRuR2NIJOdpHNKxIxznFI8bZB7UAPXG0nHNNK5wd2BSeWd5x0xSFRw3YdqACQ7\nQcDdUO1sbyuD6VMzFsY6U5mBpANC7kzjFKDkUhDEfLTWwhGKpCFZj5gx0p28joKM4wMfjTWk\n+XaeuaQx0QP1zUTt5UmCCc1Y3Y+7jpULh2GSRQMb5h6UeZ5a7cZz3pyKNrcc1G5K/LxmmSSc\n7QR0pG5XHNKrErgY96RSWbg0gFZv0pdu/BAx3pNxySMHtTQ5U896LhsOyd27v2pWY4FRSMPu\nnIJpysFwM5OKZQix+Xk9SaPM7dDTkY7CP1pqIGYkkE0EjZWbyxikV8Rjj8aezYbA5FG0OoBO\nMUhCbWZs8YpdqsxAoZyo+U8UwF2we9CRSH7MAEZxTlYpnPSmMzEYHTvSNJhSCMVQhTu3D+7S\nfMQQv1zTdzfLj7tPMm3dnpigBi7vqD1pEYAHjpTo5i68jAxUTSbQ20ZqQ1JWb5h3PamqDvJP\nGKYsp+VyNvpUnmNtZh/F60XENYK7ZJz9KdsVRkmoNzdPWnfdjII3N2ouBL97OOlRcKcE8UjT\nAIB0pG2fK2afQB+4FeOlKxLfKvHeo9x/h9akaTGGA4pANMjPwMmhuVpIJhJmQLsGcc1HI+GJ\nBzR1Am3AqB6VE2JAexz0psbnOD0PWk3459OKYxcCNcds80FgvA69aRG3KdxpeQpzRYQMQI9w\n6mmfKcevegsGUAcChWXkY/GpANvzcnihm2kUdOSM0wMGyTQAqndk0gkGelIuT0FKuT2oFYmj\nZsjjivBv2hdNMmrRSdWaEZPsM17x5h7jbXlfxy08XWm2V590cxt6kUCPj/WIzGZGHKg1y7YF\n1gqP71dn4qtiJCijau7n6Vyt7GIbUso5J61uI5bVrh5LuQsPlPTFVPLCwH5s+xq7qYZkwV43\nZzVOOIyRuA2FrRGbMmdEdmHb1qi8fk4I+Ut3arUiqrsV+6DSXEqiPcRuOeBVmbINqL5cbfMX\nGdwqtcWvkrkEnB69qsebwAseG/XFVry6k8kI3ODlVNMXQr3KkKDv+X096hceTgq+WP8AD606\na6LqoOAM1XZh/GMLn71MQ5i8khUDYSOKqyRnbtdcAdMdalEpXkfMuePWm3LP5oHRsZ/CgCq8\nzRKQBnPUVDcZWNSVJUnkipZrd13ZIJPIaoIzLErKw3KaYD1RF3M75XspqFZCNyhAwbvUj/Io\n2rnIwfaq0cix7g2QM4/+vTAJpCsRCgZ6VE3yqpK4XqQKnZ1RSFXJ9cVCGeTh8LmmA3y8sMcH\nHT2pu7y1MQbJzkAU6QSv8+4YX5eKXy1MZAAVv71AFeQNvO4c/pSMx+XBwMYp7I/BB3L1H0qI\nRsOpyrHJHcUCHMzLHtXaCajCELtYgsvelk2ebtB3Y5oknEaKETlj1NMCHacggBiOoo8kszBO\nrdTTtnLEZDdSTSqpjYEHtkimMZL8qhVUyAnn2p0Y8zqM4pQTtJjO3JyaryYhV2V2Yt2UcUxM\nsqqrn5c4HH1qrtZZCrH5WG8CpI2xb5PzH68ikkzJJjPbr6e1ITGZ4AIyG6Gkz8mQNpB6d/rS\n5IOAflA4prCTzFKdMfMW9KAESRid5xgH86GysnTAbpSnEsiqWC56CgFg5DjO04FAxrKFXc64\n9PWiPaW+Z8HFEitHnzJBg+3So2QMQTkr03Yp2AevK89u1BcbtoOUC9qAyq58sMxPB3d6TYIw\nQB16igQjMoyVOeMcdqaUCKBvLdzTlVZEJC7SeKZHIVbbt/E0wsOwW+9+lIq4LAHctDhl5XG3\nvTZJGUhljAB6gUgGFvl56Z4FO24b5h9MU6TbJIGABUjA+tSxot1C4lGzYOuaAK8gj8xRhlHv\nTdwZgCuBnAp8ysP3Z+7H3FJlF245PXmgA3O2QqYQdKcdvk5wQeppqs27IYFaTzGwTkEEYwaQ\nEUhBbKZxTg0SMVbk4z+NOWTbsydqjjpQZo43K4BPckVTECqPJBx971pkreXhNxdfWljwoZi2\nCTgKaay8ZA+UdqEGofLlRyRnmjbIu4jhc8UscipDzxuPcUzP3h5hGP4aGA/JXjHJ6YpArLOc\nnJxn/wCtSFigVlOWPUUR5Vsk5JpASsTt3dc9h2qLcMAk8+hoOQMAfNnNDR7m+bgY60xoduVC\nSwx6etKyjcADgnsaiWTcvdmU45pZGMnLDkDANDAVy3O5eOnHrTVUlOANq9aevy47k9qjb5fn\nxjBxtFLUB6lduG+oFNkVmHmbsE8YpVVnIYn6Co9jbS5OMHGKBDxub5cZK9aX72G6dqQswQkH\nvz9KUbWfyycDG7NA0DKzttK7DjqKRQrJ15Bp7TfKMHcR3FBYM24YCMMEd80ICNxuU4Gxs/nT\nZIwPnBBOKkky/U7T06dajyWUkYA6Ed6QAxLRguOMU1AVjAU7hmpGLKoI4Y8DNIqllbb16GqE\nIIz5m4Hik8vcwXOR3FOjYLEgPzetIrEbiPurTAFZVU7V2tnH1o2tJ8wXLDkj2pXYbd23g/xU\nzJVt4XaTwakaAuGZkKlSw4oPC4CY570rM33XYD0Ycmo5FLdHJGetCAdny8ZU88U5Su5sOSvb\n60pDxyZHIxTdwVOUGc9qYBuBkBkO5wMijJ5LLgEEiiRNjFk59RUUjMY+GyT2PagQ/k7RnHHS\njDbfLz71EvmeZ833cfjTwu1uuO+f6UxjuCoX3zk03eTwBjB+92o3LxnJz2p25R8oyEbqKQhg\nz52FO49STRGylnR8pnkGlYBfunBHFEpZW3bcj1pgIuNoUnLDrSqwdjlt2B8tLGQzMFGDtxUe\n3bGEbr60hiiQrkMOe+KcGJYsuOlOVW4DYHHFNSNmyTx2piDKSck4IFNXOVGec5/ChsrtQDA6\nZIpGYRyZwWOMHjikBIshZmOAewH9aapZVY7gex46VHHjbktjHAHepeDgdFPX1NMZGGbb8vY8\ne9SbtxJxyRjHpTY8biM4x0p0bFGZjzuGKAbGnOc4yq8GndF4O7vmmK3yncMHOacGOAQh2HvS\nAJVCqGXnPakWVZBtz83THpRDjczEZC9KcshlYZUbuvyjFMQ3d82zZntxTnk2gYj+bOD7U1m7\nqcnPPtSriaTg7WH60gIioVR3GcA0u4AttPTrUkiLuLMce3amMo3bFXhhndQAm4uAyDk9aerK\nvGzheaaMbTn5ccA5pfMKlVxuJ4pgBYLjPIY5B7U3dJ82TkdlNKFGNmcAHIFNkXjduIpDG7Du\nAB+8OlLuHlMrDeV5wKGwzKpB5H50KoVW2Hac4OaNSRcs+G+4m3IGKCyzbRnAXnilYkbQ6kKe\nN1Nki3OBjYVOcj09aNRjWmbPHIzzTnb5cDjPPA5qWRlRQcAOx4Pr70xk2yNj5jjlu1HQYi4k\nQMeQBjFIpZWT58k9aRY1UA9Q3OKdtKr1xz90UtRi7XZiAce1KqxsD82OxNOXHYHd6U0gFgQA\nD+lUIGwF2od+O9K0iNGCBtI4NLtIkOF5xmoyoMZcrx3zQA9tx2nolJvIYnG/ng+tOZA0avE2\nT020xMc5GF9aAQ7I2sGBz+gpyTBSufTAxUDKJe52r/DS78LwP3fpQHUJlCDYBtOfyp8LpHtw\nWcqeT70zczqdoy3vTY93ClcEc8UgFY7N7t8x3ZIHWnvjezg/u+gWgrvyxwDTPvKe4znFMlkh\nIWPGc45zUKyFlCjnnmpIwzOTtyh7UiKXyU+UjqtIYrN5ec/MM9qTIjcleT60inoycml3jhpE\nzTGOEgbKZyepqPcMFU4GetPk2RN90/MMVHtPykDABwc0D0JG2ouCM9MfWldtzHfx60nl+Zkk\nkDt6Uq/MC2O2MjmkJjVby13L8y98UMwaP73J/h9Kau5f3ajewOGoEJ5YtgDrQIeCx25O4ng0\nuV3kMuVHQjpTU5UvnB6Cl2pHEW+8/wDd9aOowUmNflHGc7R3pdy7SyglSfx+lRqX2YC474NK\n7ndvPzBRyKYtRXWNZOT1FN28Bw2cD7tJGwZATx3yakZuPlwCe4oAIpGkVQ6gnPFG4uz5bCrx\n+NNkkzwvLe1OV1lQKEwcc5oAazDhd36Ubh0PToBSsx2/MMemBTh2IGaQw2jadoI455o8wLEj\nMvzdqa2cZA5zzSq25eR8uaCRfmb52xkdBmmqyMMl8r0OKX90uWbn2pscayM7bgvHBxRqAqk/\neHDjgH1FNLGTlz8wNOLHKsTnA5FSK6LuVI8sRzTGRMfM+8MN2x3p0gyqBsqv8VNdf3Yzyfal\nYSNtwPl7UCDheVPyg0gk3ZGAAfWpGK7dzHAzz71H8uW2cg8g4pdRDo1HLAYHbNTeYrAKfmkP\nBqBF3sCCc9Oae0YTkn58fnTHYAuN+FDDNKq7uNhH+0O1NZl2qVzt/ShXkl3YGB2FADl+Y4Jx\nzzR8vORv96YuIwWI56GnS/KoKAMe5FIYb/nyenTb60M5fgkg9lHWlWQbQG6mk8zdKOMr3o1A\nXOXBC5HrTJi0Kk+XyTkGnhifMVlw69WzSbCFO6TcMcUAIZNsi/xkjOKdvBb0YnNMAXaOre3e\niSRFI4ITPXvTAl37oyM8Z696Tae5AXHfpQzpG5yTyM4FLuLspODH/dpDGSN82GyoGN20VK6q\nuNzbgOmPWk3CNjhQyHoKTaRHwd2PajoFxfMPmMSu4EYpY2+YArtwOtDMCu4sNwH3e9JGCMEN\nuyM0xMkWRpG3ADb04pgk8tT8m4ZpVRURiTj0FDIfJ+99fakBD87S427e/HSnK0kbDbh8nFSK\nzFflO1ulJGoX5+AB+eaBj44Su8Mvy9cGhAWdtp5HGG9KI8hTuJ+Y53Gmbiu84zzkGmA6TDKM\nfeHIpY13sVJPzenrUag43bc5PPNSRsfLIB2tng0CJZLd7X5m+UdDupqNG0Zw2cmi6uHkCBvm\nXHNMjjUqNgwRSAcqhZCcnFEc2GyVzk9DTtreUQAMUxY24LAlsYoAkkkLNtb5R7Uqrnd09him\ngMyE7enHNNDOzKq5z3oAlVGjUhmy3XC0oVlUY79RR5iv+72EHoaRflVtp3helIQRkZJ3YK9s\nU7bu+ZQpHcUxfOYBhFhG6s1OjxHGCwwTxzQIl3GYhSvyj8qVR++wfShE8z5TwvrSMTIAgA35\n60DF3FY3KqAc8CnRlZeCuPU0gA5BGW6dabtbyyG45ph1Jg65yTgfdqS2kcMQvQdqrqvlsCo+\nb161LA/zEg/N6Uhss87jkgnHNR7l2hmOAOvpUa/XnqadMCPlYBlIzx0oAmbHloScFjxUzKRF\nhuRnGarxMrNGSvy49anilDRgMvDH8qQEkKHy+Dkk4AqVY2jYArgHjdRDJHGyqy8A1JPMJGPq\naYh0Me35C20A85PWpI9yqxz3zUCKGXaThv7xqbyz5YCnOT+tPoBdjmaQKD0HJHarNvG4k5xt\nxxVNV8xQPugcE1ejRpNqgjjnioaGXI4yo8vILHmtK1/cRgAl39qy1ZpPL2Kcbhu9cVrxkSSR\npANqseGNQO50fh+NomDyjLt0Fe2/BeNbrxBI5XmOIsc147psYaRWJ+76V7l8DbEu19fDO0Hy\nvbjmkB69u5xnJ6UrKdwqNVxk5xSqwOQWwe1K4yRAGyOlSbeR6VBG21sHmpNxZuDmgCYZ6AZp\nRk8AdKarFZBg8Y70/wDi4aiwx6rtXJHOaXJU5I4okkXbwRmmLIdpyc80wJPMKsR60vDcYwRU\nRfoQMmpBkDPFSxj1J2ZP3iaPmzyaRevB4pzEKQaQhfMAAGKSTDdVNG7cDxinrlsA80xkeN3I\nqSNjtIPNBAGQKUNwMZB70BYbUin5SM0z/lpgU8EMucc0hhH3xSfdyMZNLHx16ULhpCcUDIl3\ns2WGBUvA+5inN1wKaY9oz60CEOZOAOKSTHGKfIroqlaYuGbkYoFYWTdnacml3BYwpGKXczfL\njj1pu0ntmmApJXJzkUgf5RgYp235RnrTA3zcCnuAbP3gPrTWG1jnpUirnoeaSRT/ABDNIQ1Q\nwOe1P3butJ5nQYo+8cYxQMF9O1DLtyQaVsrTWbC80B1JBJ8oPWjhpAexpigcdqN53DjAoYwy\nY2Jzxmpd25eetRtGTxmnKOx60hbhuyAB1o+tIpwfen+WM8mgQiuR0FOYsV56UNHsXO6m85B5\nxTGJGvynjB9acqjvk+tMjPzHJ4p6ydRikUPTaWwOKRmLcZ4pnPTOCe9KwbBOc0CFI2EY6Gl8\nwbSuMUpT5QTQV3c0AHHynNKy57cHrSbQwI70itxjtQGwq/I2ByKWTA5zQq7FJ4yaYVLY70DH\n7ArZIzxQvHOMUjKXHJ6U6LlsN0oGO5ZhzwaVo85C8GmMpIODz2pyybUGf/r0ABX1FCkll570\n5mPHNN3FXJA7UCJWfa23r9KY0nzH5cCkEe1d2aC2QM8UDsTRt8vNK3Ulce4pjKWXH8NCr17U\nxiKW54+WnbQwHHFJtMa5J4pVfjBGDQA7/Vtk9PanLIvIpvO0jrSMpZcn5TSAliXKnnFOkXnA\nNQxNuyKlkyVHrQwHhhtweKarpyB2o8snB/ixTY8ZPOCaYDmm2qDjFOXa2eOetMaAN3pY2XzA\nu7mkBIsgfJwRTto29etRmRfM2jpT88+9AyX5SuMfjQy7NoHPFRpnZ15pd27nPSpEOUNnc1Ox\njg0DLc9qT+LBpjFSTPbipt/ynjNRxgBvan7guT2pgKmWUk/hT1OF5OaamCpzxmgKdnqaQEik\nbSNtIu88EYpsmWUZGPepU6Dv60hEi7lQAdKXqxyKbu6ZGVpxYKwx1NMa7gzHgVKueAOKh53E\nnrThJ0FIolB+fA6nrS7jvHOAKIznnFBYNz2oAViWbPpSszKucZzTN2GwaWTPBDcUAOjX5Sx5\n9qkTcq5xkGmq2VGPxqYfcIBzntUsBVYFdxHFLwqnJ+b0poXovWpGORu25xxQAkQG3OPapsrt\nweGFRKxVFz1J6U/JY8rzTAlVd3A9KYF8wcr3p6/NHlTzmiSULHgc460DHHDybVpR5e0jnNEL\nLtBHGaUZibgZXv70hCoo9elOTG7jGKjVTu9/Snxx87u1AEjJhcjvThz14pokIbBPFOeQYy3P\noAKQA4yvH6U/arYPtUbZVQ2KUfKv60FDywxg8D1o3fKCTxTEIbk04AM/HSkIkVl3dcUNH/Eo\n5pUVW6fe9Kl+7nHPvTAbDuZiTnFPLMzYxwaXdhdo6mnbT1PIoGLuHbqB2pF55xQv8XGPSk3f\nLjHNADg3zKNtKzA5H6U1fvcHBpVVskk5oAXANP2rwT9400MO45oBAkz2pMBZMsRjtTlU9ScG\nkVvlORQudvNIY/lT8xyKAoPzbqbuyvqKFHOMYFBI8AdySaMgKckn2peucDFIcZB6GgByjgjv\n1qKRp1kUryh61IqnnJwO1O8wNgD+GkUNXLnLDBqZMdT25pu5eeOTSnaI9xNBNmSKzHk8CjjO\najBPQk4o5b5RSKJeShPemRsV9xTiSgAPShceXwc81SGhQ25cAc0LncCfu0fNtJFLt3MO9ICU\nKNpFPyOAemKiOe55peWPpgUdQF6rgUqsGJGPwpASijvmmncuTgYzyKYyUfeAzj2p207Tk9O1\nNJVlU4wAc05vvcd6ADllyRto+VWBFMcM3U0YC9TmkBKrbjzyO1HfGec0xcL05xSSNxu6MTTH\n0JNwopn4CiqEN2Fm4P509FbB60gUqp9Qaf8AMB8vJrnZQL8oyeKb5YVv8Kevqw+tAYdfWgBq\ngFjjnHrR5hXjqKa2d3A4NL0XHegYvmZwx4pOGBPahBuU5prKRGdpoAdnavTihgGUY60xt3l/\nKcjvSjd5Z2Hn3oEP2siZzSBQ3OMmmqDjnPTmljU7vemFhcHaeaay7mXPB6UsjDHByw7UrN5m\n0igZGFO888U8n5fajbjcRzTdhbvx70ACHdn0pJcMvHB9akVV4ORTWHPB4ov0FqRRJhcZ5o4U\n8nBqR2Zeg4qObP3sZosGob/m6daHw3VsYpC2Fz61EVLKSDzTELG4ZiSckdBT8BsN0pnlmFQc\nbiaaqvJ3wM0bgPaTb8vUU6P72egIo2L0JzSrIqqe9MOo9FG0/NTdpUDuacrIOnUihSN2M0DY\nfLupuNrZzxRvXJyOaWTHlhj0pCGtgqSpoMfmKTn3pse1Bll6nFO3bSQBigAKjYMHimMDu7Y9\nKkOGIx+VR/8ALbDDimNEcjkN8o3D0pUQjkD5SKcwwCEPzUnzeXzxioYhznAz6eoqnb3Esyye\nYNu18Ae1WTKJlJY4XpTCo2nAxmhD6CGYYPHy0m4thgcClXG05GacF+QYGDTJEUD+IZ9DSSRl\nl645p24/dPSkb5mGOnegBpBBX609pBkqORTWOWwPwob93kjrQgHfeGeg9KhZe6VJHMpQbiN1\nMkz1XAzSGEiquDuwaY3t3p4U7Ru60x1ZnGOOaYCE7sHoRR5o24NPb7pwKjWMj3o6iDjyuD3p\nNx4A4pS27J4AHamofyoAdyMDtR8ok460A7h1zRt+bdmkIXo3TAp6AYx3NRPvbkHimyXC28Ly\nHlgOB70DHtIJG8s8qOvvXDfF+1abwk2F4jbI+ldpa7mhWRuGbkrWd42hW98I3yDDMqM3SgR8\nMeKoHSQ7shT0NcRrA2qse/8ACvUvFUJt2KSJvUjIry7VEeS4aRxha0VyWc/Ju3tmqu3bbyE/\ne7VYmzGxCqfmPJNRPlY5ARgYzmtkzNmE0HzEE4DVTfaCQ/VeBV2bl8ruxVCdE5Ycn0NaEWI/\nnjj3RsMg5PriqszI8hdzhjyPSplk/edDzxtqvJ80rgp+FUSVfL3uQvJByGpjRqdxbk+varDu\ndvyghehwKjktz5IZWyg65PNAFVpDJ9wdO9N8394d2SwOM+1LKxhi4yA3tTdoALE4zyT60xDC\nxkdtxCx9qZLgYTOQfSnsy+W25evABqvJn5QRj3FHURGp+f5Ttx1zTHYTMWHripljLb8r8pqs\nAY14+9njNMQu5mU4OB0zTGXcuH69jSqryE7mBbvtFLJ80bAHJA6UFEW3dkKMD196Qn5NpGTn\nBYU1mMfyjONuTTJziMbW3dDxTAG+ZWCtt2+vFRrJ+7+bk+3NIsBbnPHUk/yoKq+GxsxxxTEL\nGolyI1OcZyaY2eFUfN1xU24R5YcH+960yFh55Z8lcdaYDFYu5V8DjPNQPIWZSo3HOCM9RUrI\nDvOflznFRrIQHKAFW4zQFx24M0mMo3QZHApitt/d4w4GenWpCrSQnjHfNRtv8xcgMQuc0BuP\nbI6AYxzioF3RkHAK9Kn2sy43dRkcdahK9Tu+YdqQgkdWLAJwvH40yLG0gn6ig4ZgwyvfHrUq\nOHUjaCT3H8qBEciqsgYLgY60shOQQc5pXyyqhyDSAlpQ3UDgAdKYxjZIYAZb1psbHbuJxj9a\nc3y9Tgs2OKJv3eABu55xSERNcBv4PrR91eD7jNLuVpDtVjjrxTmkC7dy7sjt2qhkasdpUck8\n5oPzNsU9uTTnZFUkHAFReWynep3dyRT1AdOjxx44NIV/dhW+Y9yKb80kg3MQc9KHXa7BSQam\n4hyRIGCr8uOaGLqw+XjPOKYsyNlcHf0+lHmLlgGJwMdaLjFZWZmLHg96ZIjg4xgY4YU+E/IQ\nV+XH8VPbDKNoZQOoYdaCURIoXHG0Yp3yCMkpls8e1JtaTIY7R60B/LG7Hy9DTKI2UnAJy4bI\npZNvmFsc+9LxtZj+ApFYsucgD0oAJAu8FcEY6U0KyqzckfSkjlG4L6nHSnOrhfkbC96YDUf5\nlBG5ev40755t2du7rSKyr8hPJFLJGZMEcYGD70wI2U8Mo5U5Ip52MwY8HFKG2xlRwV7UxAVD\nK3JOaQh4Ygjcc4/UVHtbcWD7l649KfCpXhuQRjntTc+XJsAwSOc0xgxCsM8ZHXsKaqlYiD0z\njNLIp+VSPl7saYrMNxZsAdBipGSBd0i4HHT3pu0I7c556mkVW2hiN2e/cmnFCy/Octnt/KmI\nazbVTB43ZJFSsw8w7W3KTTB+6DDGR3pI/mztGAvakFhXI5ycnPApjYmXIznoPb3pyr5SnPzM\nTkNTTnawYfiOlMdhynciqjBscHb396QLtVsqRu4yf50N94sBsfbyy96dHlTuyenGaQhsymPa\nScgDhs0xd8q9VUk/nTmyoLEbgeue1DqFVd/PIyRTExqnAKkZfPNKy4KsjYB605l8veysCmeP\nWlChlAYYPXmkUIyquG3ce1Js8xj5YI/CjYsfAGe9MeT7pTcCDg0iR7S5wu3C9MUxlVc+a23b\nyDThO3mP93A9qacsWDLuPQ/SnuMXjaslIxwuCMtmk8ofcXAC/MKdk53BwR3GKYCFcqDvwPSg\nL0yueetO+V++famo3J+fAHVaQAqhWY78AUBTtwx5Jo8kdPXnmkjbc7c5Kc/Wl5gNZTuyWxjp\nTmZWHIxntQep3EFGP5U1tvAP4GrAXcCMY/GkDHcGC/u+g+tOXbuwW+XGSfam28iSb2JKqM7V\nP86AFDB5GzxxQVCLgncvvSRquxsNhiKFjcsAPnwOlTsBJuWNhtGdwpDllyVwRTGzuMZXkHJ5\n6ClX0H/AQaYIfyFOOff0qMKzKWz06c0vAY4GPSkZMH93zxzzTEMcibLMcDoKdJnCFfnH3So7\n0ihmHKgjFNV1DgoDgfzoAV41VR8hB67T2pNxwCOOe9P3SsvzDjPU0m1vMYnsOKAF5XOWBc+l\nIq+W2T81EeJHz93Hp60ENt9eeaQCbd6uSevSnLuLKit8qjmlbbxtbGeopqvvyRwAcZpDDJ6Z\n2ljSf6tmB6etKxDJj7xBznvQv+r5GRnp3piGvvRsHHBwMU/avlsccg80FVZuXOeuPSk3FWO3\nBNMBhkUghfmwPumlVRhR6jP0pzAbRJt2nvUbKW3EHjv7UgFVQvfIJpRuRSzADJ4FMGF4J96F\nLSDLAkZ60ajHNmNeuG659KYzSN8wHzdh6053WNkGM85p+S+Spzn17UwI5GYsGPykjH0pzHdy\nOccGl2kIN5zk4AobJO0LtTofekIa0hVVYHPPSl+8fu8DmkjYKu3PTnFPAbZjO4dRTATjlnGV\nPT60u/MLKAEbvTZWTaODvx92hcMm5lw3fNJgKArKApy9DLt4YfN6imyHDL3z/DTt+wevYCgo\nRs5DY+Xv60hxknk5704S7flPBPajJ3A98YGKBDXPnKNr4IoLbtqY6cn3pfLT5Np+cnDU35o2\nZTxzw3tQA1pAqkRk9elOyyxjaRs64NQeWFGQed3NTKv3/l4A60CHK3mcsdoHelVWbhfl7nNN\nONmGxj2oZWkY7GAwf4u9IYqSM24Kwz05pGG5Qq/fxzzQylWYbe38NNVmChlO1e+RVCDG5lY/\nQU/5ckbzkf3RxTGyrMOmRkULjaF6HqaQrDlkJaPdlUPUDrTpDE037rOzuKiEhLDf24GO9PZW\nRSrEA9cCmMFkC7j74oRiynPIz3pC+6IDbjHNG4Ku9jg+lABks58wY9KXzN2MHPanTI6hOd2/\n+IelNdUZtndaAGybjtGMR55OaeoLNtD7Y+y+tNbHzdXGPu0p2GNT909qAEdGCojHC57dSaYy\nESPn95S5MbNzvUjr6UKCq4A+Y96AHDzCgUcbeeaduL9OG61HtIyobk09kO5drcAdaXUAk3vH\n83yufSho0YgKTuxyaNjN8wx701d0bMq8Hrk0AO9AeVPAoaP92QjBTmkX5l6HJNChY93XOec0\nw3HL5e1SDhh+tHDR5Py5PTvTVVdwJBGOaa3JDEZJOfwpASDLN8pynpTTnaHLY5xikZi0h2gq\nPYU5o32kgE5pgDEbDjJz3FDMF/hwQO9ORgzY+5tXj61Fl2XJHHTmkHUcqNuzgc9d3SlHzBg4\nAOeAPSjado3ckdBnrSdJN7KRxwPSgYSfK3WkTbtK7Tk96bw2VkUk9RipI9q7VwRQIdxG6YJk\n9abGBI7L5mGJyFpN3BCjafSlVNrHA96NQGsNspHXH8NOZmZP3abB/dPahU4LE455FJEzzHr/\nABbQ3ajUBVk3Q7k69DSsrfKrtkHrntSKqyNkJnnB9KkkULGS3zD1o1AnmZfLVIiqxYwVx39a\nqpu2kg4VTz70EjaoGSe2fShpAu0bTsHWgQ/YHbDYIYcU3ckK4Kso6U6Rh94AK3pSmXcuXA2+\n9MYzYNu4ng8CnRgqyZdWx+VMX/V8L8vqT1pFTgcZGfwpAOXdHMxkHUfepq7GX585HQilDHnc\nc46Un078mmA5dy5yq4HNO+bcfMCtz94U1iI+WPB/OpVTcpyNq9TmgBHXDhiuFPA9aFYc7h7C\nmoSISu7O48E0p2yEFlJZO39aABmRlyoKbeCDTVwuTuz6CpVVWVm3biT930prQZJ2/KO+aBDG\nQ+ZvH3unFPMflsHGTxjP9KbH+7Z/nK7uaU73t/lbBBzSKJJFXy1+TB64PNM2ZIPKoeSKVLh9\noAG713Urq8inP3fWkAMRxggDPWkjIjT5gM7qAyyqqbfrStlWwcEDpmmIAo8w5y2eMUqqMkE4\nXoKSOZY3fcD06j1p5U7QQM5GaAGLhHwd2R+lPj/eTEjhVH3fWmvunYAA+Z9OtL8zSYchRjkU\nwGMwViAD0zU0SBIdxbB703y1RBt9cY7077uXODjgAdaSAjWQMM4O3NTOxC4VuPXvUayYPmAd\nDg5py/vNxf5SvzH6UANmY7MN1qRdi4ZeHzz9KZsbkH5iaPnkdcrjB/ClsBIdvUNhmGaWNVVl\nCjg01kHC49s0m3yUyRnBxgUwNBbpI4/LcY2/lUDKJI4+Mgn7p64qs7vtOcFW5x3p8LGPB5Ix\n+VAErR+VG4Q5HUHNIFeMb+oI5pI2C7uCKl2uyEDn2oF1Hqq7QY13MRyaOZNwZgRjvTE3bQpU\nJz2NOj2bmLj5OwpjGRhliwpyM9T2qaPCyZAznjI9aauEUjovXFOiYLMUHCr8xJpCYsZdtxxj\nnGD1p20LGSQfbmmKG3ZA+8c/WpdobIwT2/GkIBhVO4bcDcKnt1Lxgoc4qFZBIMFevA+lTn5G\nO0cY7dqBkzSCRy23BUfnTo0DMdwpq/NFllK9qfCvJ+fBNAFhZhzth7Y9qtxyKxwEI7Yqosh3\nEA4XFWkZ2kB42/3fegB8cxkk2BcDuSKu267XxGevBNVSDISSee+KuRgLhgMqDUsqxfVY2ZAy\nldvda19PhVpw+dy461jxyFZAB3+Y+1dBpYHl7QuWb061IGtoeGkYMxBXnHqK+mvg3CsPhFFQ\nfNI5cn/P0r5ys7JQwRDg5wWr6o8C6f8A2d4TsIVG1vLDdOuakDfyOc+uaUHzDxTVYscsMU9c\nM2AtIBdx3YxipA3ljd1NRsvy+nNOVtynFGpQ/lhuHSlVu/ShSfLx3qP5u/FMCyNu3p0pF6ZI\n4pEbC8D86U9MdR1qQHbdy5HFOXjlhSthUBHWlChlBY0hjt4JGKc0nQbaSMkKQRSZGT2oAdhl\nOeopTnAPTNIBu+9mgkZ/rS1GhVz1NKWIHHenKD6ZFKy/hTELG2W9DTipz6VHDJvOcVK3zN1o\nGNbH1pdx9MUBQGOKMFs4oAFz1I4qVW3KM8VFk7QvQ0E7VwetAbiyKcZDfhSfeX39KRCR15FK\nMHLHigY4IQBkjBFKOFODQVEi5z0pFxt680EsUN8p3Dmm4A981IW6etBTv+dNaANjC9ehpGY8\n07aq89ajOSpxwKAsJgYGetKFCc9qGU7c0ctGcikAmd3PUUpXcMkUFB5YIOKTHA5zQMXdjnPF\nAywOBkU7Z/D2o27eD0oGDHBBYYpVkGTS7gVwetN2gNjBNACtgn0pwU5OTkAU0ryKXaVOfagl\ni7htzj8KU5YA9M9qjZjx8uaihlmkkcMmFHQmgCcIN3TFKpH403zPlHrT1wGyaYxAuckkdaFY\nlsYyKl8vjjrTNu3PrSGOUFlIIxRGhDsSe3FPVgy4xTSCOCaAGMCOlOX7uCOaF+UHHNTLHhd3\n86AI3wOKTaApxwadjdyeKRm+XkUAR7W29M05GboBQGbjggGl5XI6UDQvlnklqTAzml3fKc81\nGoyeTgUwHnJwKXaVb5qTaWGM8ipf9YMA4+tIBnyr0NL97qOKXbuXgU3H6UAOSTzCVHFKshPG\n3kURrt5xyTUhU7zzgYpjI2Y5Bc8elLtDNuHSgKfu9TTZGKsFNNi1JDw3rTtvy+tI3QYpwUjv\nxUiEX5Rz1p65fAPApIwvU80p+6SaCgkkIYKDz7U4Jtye/pTU+bBxSElm5PNACkFvbmiNFTcc\nc0JkN8xxTmXjA60DHLGJYznpT1QKvy0iAquB0pVkCjkZoAdtOzdjFGwHkHn0pXZgvynFKnzA\ndzQII5AcqeMVJGPlz1NNEW7kjikZXBAU8UFIc6lSNtP7AdaEjK/MTS7sGgGJIp7DipFUq2c9\nqb5h+4evWnugIAPpmgQ9VDLhjTMMp7gU7arbcHFSFDkcikxkfzbeOmanjVS2W7UKhC+ooB+X\n7tMZGUO8kHOTU/4fNio12t0GKkXsRxSYCqxXbupz56Y/KnBRt555p/f6etSAzZ0YnB9KNobI\n796kVQ/zEjFCoeSKY0MTP3cEip4/lHTBpiqR3waerE8Ec1LAk2h8DoaG+VuOPanMyxFWNSK3\nmLu2cE8UCI0U/eIBP8qdkjJBz3oClmYjpSSONqgdehpgSKxH3flzRuB+UD60qoY1Cnkmms4V\nwoHNIZIpVR609ZPQZpoAZSwHWlMe5ODzTsIRZir4IPPepWxH35NRsqLtUHJqRVXcc88UrDsC\ntz61Jyw3YyOmBUaqFIIPPpTlby26UhjmXqDwaIlGN3anY8wnfw1EZG0D3pDCSRVYccHrTo2X\np0btTTy2McVKqggY60AKsZRT/ep8X7tcHpSq+e/zelGfl570yREBXk8+lPjkZgw70yJSWHNS\nBtzE9KAFTdyTwKQZz0pQ3rTt25c4OBUiEjUiQgng04qxU5OKYr7ucZFKp3dqdwHfwj1oLBQx\nAzRtIBNNUllx09aY9R/3yoBIXHNLu3PjnFH3YwPWiNT0zip6hqCgKCvapFKD75HFQshVhzjm\nkaPzMjBJphYm3AYIPWnbc8kc1F93HHTjNSndtz1pDFAPc8U1csGPfNOXJBzSDGGY8UWBEkbB\nV5GTSL8/X8qjVgoyBk+lSqwZi4446UWKY7r3pd2SMDBpisccDIpGXnIbBpiGTXHk7VJJye9W\n41eSPIAAqk1n57jc2ec1dbK4wcLSAUN8pXPzZp5+8MdBUKFeTyTmn/XOabH0H7/mzinH+90q\nJT0zTg2eKVhdCQNnG0dKVmU/U01VKjH40rY46fSgoQg/dByPWnq20jJpqt2ApvG7PakBNu7m\nkEY53cCm7huI6cU9cMMZoGJtG3jihtnAAyaOnXpQMbs9D6VQh+4elFN3H0opjuKzBuM9adnH\nCmowFU56k0qsMZHSsBslX5V3EZ9aT5fSo1JVuTxTgBk+9AIRn7AUnO4E08qcL69KY+aBXFVh\nypGMmlwI1ZT1z2pPvr2yPSmDdk5/WjcBSv7v5ePrS7vLhx1ajI6McCo128r196Gh7kqfd5oE\nu3qKb8vSj5SuG4+lD0DYGjHLDiiP7wOcimFTjg5FPC+XHkcZoAMlc4FOBG3mmk/L1607bhee\nRQLqN2qqk0isN2CKNyyEgAgCo5M7sigolYlunaomRtpOeKdHlF461ErGRip45p2AVMMp/u0i\noueD9aJYwRgNRt2jg80wFk3N8oFI0e1cd809WCjpzSOwODUEiSsI4+KFG6LJAJoXbJkkcCiN\nvlJH4imtBh95lz0xTVypbdwaGJXoM0x8yMAaLgSD5uTzRFuZSM/hQqmMknkUR/dLCqEKud2G\n5AHehpPmwMU1maTgAg4psf3RxkikIeqFoy33ai+ZnBNTNIe/C46VWVtpYt1xxTKRMw6kcUxv\nu4OSKFyoBPcU0qyqCeh7VDGHEi7VXmkXHSk3lWyKMHdkdKBMXjaBjDU7nG7d+FAYM5PtUSkC\nTnpVEgGJbPapdu09O1J5QzjNHK8HmpH0EwOCvJFIobacijcF6HbTWkY4UcigAjRN2WGDRJ8w\n4HPaiRlUgDk96R3KkHHy0DGpnGWpxbD+2OKWNBtJY5prAYwevWmJjJN0jDbx609WG04/OiFf\nm3E8elRk4yB0zQMdNHwDxg8UxV4Kn71PkYfKO1MbdyQPxpAxApx8vWlTkENQpIzSKBzk4oJD\nafcjFUb5g1xHaqcyPyR6Vobz5fBqnb24ju3nI+dhjcaAJo2AVV3ZHQCo9bG7RL2PZkGJuKmM\neZPkH0pJt0kThcbmBFPbcD4y8eJ5ZkKN91jjPevJ9Wm3ROv3Tmva/itYNa6rexsNpRtpFeRX\ntmjKwbr/AHqaIkcbMQ7EZye1U5Q0dvI5OT021durcQyOB1Bzk1WXDB1xuOM10RM/U5y+eVmU\nqfLQcVX8kriXOfatGZF2F2X+LGDVGaRo23MN6jg4rQgp3DGZQCQMHqBVOSTdgqMbTyasNIzz\nFVGwdRUEw6h0+ZuM1RIzzCW2g7V6/WmbWQPuIETdTRcXEXlhVzuXjpVe65UAlgnsKSYhs8kf\nl8fOF6VUk/fEF/kBOKlV9rEc47ZHWnPIueF3PjG3HT3qgK8ymRhg/KvRj3qEyM2frVllEigM\nStRq0eG3Hbt6UEleWQqvJIHrUTsGXG4SDrgCrDMPLbIwCKpxj92wU7W+lUO4jSO3737u3onQ\n1KrpIoYDluw9ajZmVcN83FEMbxqNv3W/h9KAuPk2ccbWPBqrLut3wgzk/dxVpod+FDZxzmgz\nD51dwTjihCKUeWZlYYK8ml3OrAHB3dKebfPLdcdRTVAVgc7sd6fUCGaUHccfKvBo3kbTjKds\nVKwDOwC/N1xVcg7eOxz/APWpjE+Y9QBuGaZu8vChdy9yKcMGPnrTtwVTyNuOlMVw3ZDKhORz\nj2okVWQc896iVTGwZTyetJJGwTkYOelICyzHZgHnGQarbS6ljwTxinrINioCd2eKj3eY5BOG\nHNIYqxk5OzHGBnr9abGGhjxuAGfTrStIGAcMS5/h9KkmYeSBt+YcikIZ5o3DCkOe+KRo/LbD\nHHOdo706a4aYLGMZxmoGQhizHkDgUBsPkYszYQBOtRTSeZ91tvFM3cHfJhW7nikEZXqd644I\npoNBDJthAU89zQsgGFPPGQaIwm7B+UqKavlhWPDyd1FMLMWVl3BQc+tSQp50qw/dQnhhVVg2\n0OBgr+Y/+tTo5njurdw+0hwcevNJ7CbstTv7P4Vy6zp6TW6Srn7xx1rn9a8E6pokjJ5LOAM7\nu4FfR3gaVrjwvbM4Xew3celaF5plvqDnei8+3SvGeMcZNM8+WIaZ8fySNbyKJVKHPO4YzUG5\nY5wAd2fmr6j1j4X6TqgO6H97164BrzTxJ8DZlkklsJiSoyI+SB+Nb08ZCZtHERZ5jHMzIcHC\nn2qZZC0Y+mPm61q6p4I1jR9reV5ik4CqDj8axbqGeGNfNRkIOScEYFdsakZbM3jKL2YlwuUB\nHJ9aHfd945GOnekjPzEMwAxkAc01lO3Knnufatb6FkjbWUBaaIQiliaWb5UyOVxxihGCKqk5\nOKZY0bWUYAzmkKgQlM4P1oZ93DLkdytIdu08cds0yWNkj3YZRzgZokZm+VCQTzUu+NpQNhwB\nzSGQZAXAGeB3oBjY8DORgdDSRq3zuT9BQzMcqT0PbuaVlaRQCdpJ5poQeXuYMRj8elG5lYkj\ncxo8kyMIg+VHNPZNoyrYakVqQ+Vk4ZjjOSM0sqtt/Sg5Xk9e1CMduM+/NJhqJDJtBQn5l6Us\nknIXOfVRSbSqsTtw3HFMjjO0yBgpHGD3qugiRSIcqPmz2p0UbQqFb72c571GwkZVO3aAcbqf\nJH+8DBskCkJjgWZSzYwP4aiALMDn5PSlCkKXHO7qKQv/AAkFRjimMccLj5cn0pFPmR7sYBHF\nMkY4XHHrSrLwFwXPY0BYTf8AKqn5j3FDRvJt+bCqfu+tJ5LxtnOSeQO9NCyGQo3B65FADpFL\nZUDoc4pzqr5L5Ujoc03cQ2c4PTNEciKoZ8+xpgKoLEnoMcZ71GG+Ulnxz6U9ZvMyDknsaPLK\nsWTbuxzmgBFj3k8YBFRjDSNlvmHoaVpOARx/nmnLGGbIOFIzuoAX+IKQFxzxTiw3bcAZGajj\nXZ+8Y7geKXdjB689KXUBsedxBHGaTb8zAJwed1Sbhyc0jFtuTwntRYQSALIrFsrjAAqMqr8o\nNpqTaAygE9M/So5G+YhRn3pgO2jbgDhjnPvSD5UJI5z92l3bUAOcdqEVtrE8jrQNAW/d7gM5\nONtDKVO4emPmFC7lUAYBJyCaY8m0sWJweMGgbHyIS2WKgkY4pgQq+EdiSMdOKesY6qNzY5z1\nAp0cgVgmdp6g0EjNqRtuLHng5pW/1uAAR2IppbbuAXczN+FNOYOBlst2pDHhlXGc5HagqGVs\ncA+lKGAzg/NTZNjMApJb0pgCsUYYyf8AepNwbIU8se1J5rDcCvGOQacqCHGDuyM47jNAWEb9\n2p3ybl7YpFDKoP3u9J5YZMnp2pzMdysDyODQIUMi8nkMabHtTO0HOcc0rbWkIUjHX6USHc6h\nT0/WkAm1vmxzxkZpUTaigYHdqWTC7ecknkelNZBlgDgHoTRYGJ5nzMVHHWn7h1ySCucUjLtX\nZjj1FL95cIPmFAAQXYhW4HbFGzd93g0kcjSRs3bPIFIwIUbTjnpRcBxymFfnccAU2QbOvQHG\nKMYYZfLds+tCkLuDdT1z60AJIDkEd+1A3lymMIBkmhSflG3Hzct6UlwTHuQkkE5yO4pALuDF\nsJk9Bn0pXkj2g8hum0VGzfeOeD6dqI1AkLKM8d6voBJtRWwDkdT7Uu1WAHPWo1wzEMMUq7m4\n24akVYU/K2Md+PelGUZgg+VuooYsp3ZyKazDgr+dSIVkVzvU5A6+1IyhlySeDkD1owq4Cn73\nUUvkxtg4JA96NQEGGVg3IPODQ+JCpc4XpS/O69doHQDvSfLMoG/AHJz39qoCPhXO35hng1OW\n75wew96aFDsFUgY+Y/4UhkDSZxkHpQAm0ZLkYKjmlOGt9wG49OaayszNlcMw6f1p3kmOMktl\nF4oAjeNkxvH5U+IvtYFgFx0qHDHuSPU1Iu3gZ+tAhSFVgx4B5FBxJnDYbO7NAYtyVynTNMdg\nu0H5uaAHr+7LFiQ3UHNKsfmsCeBSPgMSw3DstOWfJwU2qeA1ABIjy4ygx27cVFIR2wT61Lcy\nPJhQcKOC3ao5FJbeBntuHWkMSNTuDkHHalkTdMPnwx/WpWjO9fmzxUe1d2c5YGgnqKqmNWw2\neec01k3Pu3YPvTpNrFmTJYc4pDvOGOGZh2pjH+crYI69KTIRDg/MeOetRiMswGAD356UZJ3b\nUyR3oGP8wsOBtwMGjarYByePwFM3bVUB+T1yOanbLHCj7o60DsRKv3sIBjo2af8AMWJxgdM1\nHtZ1KM21f1o8vAZGc568UCH+XgDPJ+tJgxxtj1pv7tsc80zczKWHA3YxQIe7HaW7CnlPuknh\nutNdNrAD5qcJQMq2QBzn+lABtwjHOSDgCnP86gNjI6YqGMfP5mM7u3pT1U7XxgnrgmgYm4HP\nzZHrSM2VLflQ2EUD5Uz0FDfcUg5YHpQIngkHc5GKc0isw2HaMdM1WZkVSSCHPSnSsGOCuFIG\nR70DRF5jFePnXd171MIyr/ORtPT1phULhweMY207b93DZU9z1pXJQiqG43HAOCKk3fKQ3OOl\nR7FVmy2Gpm75Rk89x3plD4yjMFB3H1p4m8tmYntxxUbHdARGNrZpzfcXPzY6igQLtAJTPWpO\nBnKnPrUMbFncsdq9cU8SEhGZvlzigCPyQ0nysd4PNTyRr95GC+q1EAzbyHHXtSTsHKkjOByB\nQA7DJGCG2qKZvDRkKcjOaeNzw8HdH/KlVVzkY+lAxM7lXAGe/tThIjfc+hNN2bZG44I5FOjC\nrH8oGMdKQCsqqpCn8KQ7ZMYOPahSsKAn5mbij5RkdweAO9MQ5l24BxgmhQjSEISif7R70fJu\nywxu4pGjAG0+vHvQA7n5uM4603a64I4B5+anMp3Ln8aXhXYEbl6UgGBRypG9mOTnpTjGqxjG\n4nPrRg9ztHTik3Md2wj8aYxeHK/LlgchRQHfcwOQepGKVkHyFm2kjqKY27ZuVtqkck0ALHcM\n2QSuwe3WnEhi5D7QOi+tQrCyxnIwSOlTKwMY+XnNAhM+Zt7kdc05Su7OW+gprHy8hhjnrSLI\nWbCsMYwRjmgCZm2AhCruei1Gr712ZLbTkj3pqhAxOCyjsTT3Q5D4baSPu0ALJJ5ec/KSO3al\nLr8gzvPc0MVZvnQ9eKbhGb5c4B6UgHq4UEH14pjOWXBbac84qRWC4AA655pRJuZhx83FMBi4\nZl5Kt/SkkiyCVzjPenKqwkgNsT060/evBY7CBx70hDcnBZF3hRk+tOBO7JAX1qNlG3O0jP3j\nT0QqrM3PHFMYCPgoCAGOdxpxYcMcEDjr1pvMcaJ1bOc1DIokkIA68mgCdl3bcnb3FSEBkznI\n9qikG6WPg+Xt/GhAkiMdzJz0pASK3QDpSMVT5gKAyr8q8n1pVKMpG/GOTTGJGys5ye3p0pyK\nwU/N8pPXvSNtycDaD3pWZ5SFiPyZ+8RxSEx7SqnAbPHNEe6Rdzbgo9KU5EgJAx0pZWC5+baS\nNpqRC7ldlw3OOlHDyAE7eKt2bW1vZmKWPc38LiqwYIsgVc84FVcYKN3IJ5+XmnxqDJ5bHcfy\npiyfu2H3pB0VanGOHA3t3OMYNAC+V9nbafz9KChVcKeD/dpGDSEl+eOmafDHtYDp7+lArEsJ\nG4Kwyf7oqQoNwTO1SajhYQzF1OT0p0zFguw/NnJNBSLOxtqqSGyOKl+yjzIwx2ZPLelVWbds\nwM7f4vWpVuH3llyUxzmkIty25hl2wyq4U5D9qtRrJu8xmUH19faqNuqtHtOQTycVZhjL5LN+\n7XoP60hk8OG3FcgEndWjEqtt2HPqDVG1zHE3PfjirTNNC0WU+90xSYjZt7fcoYAbupzWrpbC\nRS2Nr59e1ZcbbpVQnacVq2rCO4CqQCRg5qGM7XwrYrqF3GxG5PMUEZ9wMV9WWMYs7OGJeiIF\nGa+Z/hepHiC0DD5fNUlSPevpwfMfm5HrSAcvGM9akVtzAA896hZ+4FPh2tk8g0upSROrDkkZ\nHSlVVK5U8YqKMlgQRxTolDMVzjvTAkjO1DuP5U8fMACeaiPPAFScqAcc96TGKgIcjORUwUbu\nelQ8kjHBxUseJehxikwsSbvlGR0pu4NjjAqSYAKPypvlHj0pASZIjz3pPvKDijlsDHFP27VG\nME0DEwevahsbSB1pd23AIpdopACsfL25wakjYBSCMmmBVZuDTk+8QR+NMQ4bdvHAoV1yOKFj\nxnJ47UxSN2PSgY7aT83Q0BiP60pLNmhGAUgjmgYNjqaTHPzcinyRnaCfu0iLtXB5J6UAIvpT\nmXbyaNoQDv605cMM9aAEV9rAEZpkuFJPrUrYOMU2SPzFGKABctz2o3HdzSx5wBUrLlsYpgRF\ngKAPl6/QU5kAXJ6im7sY4zSExrK4PtTtxPGOKXaWGSc0m3+Ed6Y7Dc56HBo60qoA1LtzxzSA\njGd3PFSjAHPI9aaq5+VhzT9g289KBAyhj0/KkOQ3AwPWnLkcdaft6knii5QxJVBIPJokHzDj\njFJJFt5HSld/nQe1Ahu0scdhSsu7AxUsaEN160yQnBAPNDCwir5Y9RSlQ7DsKj+bjFSowJGR\nzQA7ftobDHIoY+2FohZVkznrxQOwi7uvalZtrdM5p/3Tg01oy3OcUBYF5UjFBfHHWmrJznPt\nThlecfnQFhf+WZOaRQM5zmk4AORkdak+Q42jGaBict1/Sk3BiSacq+hpfJKgseRQMjjAbJP4\nCl8kZ6gipFG1T70xYxk7aCBGXoRScquelSCNjzjilbAbB5oGJGxDcHtSYIPJ4qQKNpIGTSAB\nsZoGMTKsQac3zMM0pQANjnHNA+ZQc4oAcvyqeOajlwihjzS7gSQSagkkVcq7CmBPGwnUEdql\nwQMms2z3faHKnMftWoqbuWPagljRyucYpeNxz0xRH9057Um0txSKHBeQf4aPlXLEd6GBjOeo\nNLj9z0oAOJGBxxUuehxTV+6McY60+PsaBgFJ6CnBQF+tOLfMcdKZwy4PWgBwT5SSc+1KvyN7\nU0ZwCOtPGemKBCCQ9wQKcrCRhjhqN2RjFNb5cHFAydVJzzj61HkeZtp0fzcsdtSLHzux+NA9\nxsPX1apjllz07UixlQSOvrT0+6dw49aQgU7UBxTdq7uDzQpBT29qWNdxAxigYq+aW+UfLT9z\nMm3pUuWVeOlNmX5RkYamBGu/OMYHrU8fyrzzUe3cOuKkHJAGcd6TGLuypA7UvVTzg/zpSu3O\nOT3pYwdoB49DUgPjjJQbakb5TxyRSRs0akZyaaynhgetAEn31BFOJI/hz70xRxyOvWpFI4A5\noKHIpUDcdw9KVGKtz+VITu6HOKQLxySGosSSqcH5eOeaQqOcDJNRsWOVBwPWpIQqsB1Yd6Qx\nwVuuegprfw8fjUvzBuvFKqB8kdOlIBn8X+zT0TAJU9acqqYcfxUsaiPHOc0wGIoDZI6VKp+b\nCnNAwNxDdOtCyBl4ABoKFVl5O2kXczZ6r/KnqeuaP4Sqj60gASKucnmpvlOCB8uKhEYT5m57\nVLGRtx/DSAC21uRnNOVtrYxgetCgqhGcmnfKUDfoaYArBm9D70/BZcHrTdoYDnBpWk29F/4F\nS6k9Ru3LgA4HenKuWxmkVf3mPXmpF+ZWIGDmmOwrZYYxShgqjJNJGM8c5p7dNu3mgBpYrg9B\n7U7lmz0FGMYyMilVPlPrSsFhrS/MFFPdiBheDTVUK3Iy1O2nvQMVGfYAwzQH3cAYFO2noTxT\nWXspxSASRdrButSLyOuO9MGSo4qTyw2cGgBS3zZHShX3KpYd6QsAuR0pP9awOePWkBI3yk7O\nR6Ui/Mv86XhW4H40KvBP4UxDBt8zg4FScnjvUewKuD1qRF4yetAwwVGDUg+ZeOMdaasYzy2T\nQWKxnA5o6AORfmzmjcqghmz6U3duGehpzBY4wTyaAFjbdjAxg805dzTMSOO1LGdzbgOMU7Ab\nGetMBV+X71KF64/CjeF4Jpy89OlKxQm1lXrkmlVeBk5NIxzStjg5pgP6qecUmFGCaODlTxnv\nTsdMYJFSAnG7nk04DbjjFRlsn5jg0vPNAyTG3k8ikwGYGlX9KRvl6cmncZJRUPmNRTuTYeuF\nkHHaoo43WbJOF9Kl3/MAB+NOYZ7/AJ1iiug3aQx7mhmIXGMUhcfjUZU4LE/hTES+YWYADPvS\nlux61AZHWPjg0qkqvzcmlcCV/wB2AQOtDZ25qLzGPbIqbdke1FxiNtZR7UrYAHHWmRsJFJ2H\nAobLfNjGOlIY/Kjgim44IobLLnoaaqMhG/kGgRIuMY6mmtgn734U/wCVPrUUcYjbc3OaYxrZ\nzx0oD/MAW4p0zjd8tCooGSKBAzCRvlGKTp7ChumBxSM5GF7UwHDCH1BpvyhwfypP4hk8U3aV\nk9R2piHtGud3Sm8R5f8AnSl+ORTGw2VX5hUj6AG3MST8vtSqvmL/AHaZsKx7RQytsyelIQ7m\nNgB3pPu5NRRsQ208mpPJO/rkUD6DuZVwPlpVU+n/AAKmMxifA5BqVWLL14poAkZVXnmoWmXb\n8ox7VJI+1cY4qDhW3feNFwHeY28EcChn8twR3pY/fvSqypnI/OgQzaxO48jNJIp3EEUgbcxI\nP4dqdhmbJGadxg0i7cFTikRs8NSqv3i3Qc1F97JXrSGO27uR2p2UWM4696aqlTzSxMPmG3NM\nQi7V5Pemuu7GOAKbDJtkYHmpWxIwCcUhCbvSlXj7xxSFSmASKVgdpYnIpAJuVmIIB460nsBi\nmrjjAx60bvMbHTFOwhjYj7ZJpzZxtPenHqM8ikkbHBHNIY1sqvWmK/ynccGl8ztTCpC5PTPW\nlqNj15X2pNw2kcUFW4wcijbtYcfWmJDVO0cjntQGLRnmpmxnOOKY0e3gdaAY0FdvPFJt+XI7\n0OyquRx60oX5cjkGmIYxKqO9N5K8ninvgrwOaT/lng0rWABlQOcUbxjphgc0iqWbnpQ6sBwO\naoD5p+PNqsWvXMhGAwUnjrxXgWpK+5vlwmeDX07+0Jp5kvorhQQjwYZuoDAdP0r5s1gYV0bO\nQeKYpHG6xCysr4LAtg4rJXhmZT/9atrVJpNqqp4U5NY5mDqwUbTjHPetUYdTJu5Bub5d3HSs\nxhiIjOFPrWhcSCKQgEFuhqjcKjkLnAzxViZlT755cBgoX+KoriYMFHXHWrc1uGYhvuk1BNH5\nDBRyMY+lUZlNWMjMVGz61DIzeWyEEk9al+bkbcYP4YpWVsEn6/hVoCmiglVOSn602SQKDIBg\nDgetTLtYMwJUelUp/MC4AByaNdwFkXztq7scZNMdULBQMhuOnepV24Y4OQPvU1m3dQd4/iH8\n6aERyKY1GRkg4qtIN7HcuGz1q5cWs1rGPMkVgw3CqzODAScM/bmqEQMq8oeCe5pkkgjyqHno\nG7VNhQoJPUcikVI2Vg/TFAIjX5QrKeG5pgdQWwm4n9KDiFQUbcM9KazM7eWRjnrR1AbI48wA\n5U9j2prZRc43AjPHY06Tc2IzwQaa0ZjwQckccdKYDY2JYMPTrUKQsEYF8c5I9an3A9eKQybY\n2AHzHrkUwI1JVugKAdPeomUsrFsqBzUoYNtUrtA9+tNVl3n5/wACKYXI0DModhlewFIVYqAX\n2jr+FPeEsxK7l4/Cm7VLBDnjvSEN3Jgjfg+n9ahYoMjByamZFkjLZyQcZ9qh2mPcWYEjoKEr\njRIoSPKg4yMe9RmbOxOSFPzetWLPQr7Wps2sLumcEivRvCfwhuZv3t9A6BhxurnrVo09zKdR\nQPM4IZLtnSGJpHHOAP61d0/w3qeqfKYZFXdt+7+ma+iNC+GOl6UuPKWSQH7zGung0O0tyMxJ\nxzgDArzJ46P2Tkniux85L8INTnVQsRV+vzdK0o/gfqEhUtJnHUdMV9CbVwWA5HPSmxSBQSB9\n71HSsnjZsyWIl2PA1+CE3mk+czvjO3HFQyfA+68xjbSGUsMtxj8K+gXj25YDbkYyKcrLGwCu\nucfNz2qPrdToP28ux8yap8JdU0+DdHE8r8/d5AHvXJyeH9Rt/wDXWzYRuWwa+wLiOKRgCQFb\nsOR+dUr3QbK6+eSBAex2+n4VrHGTejQ/bSaszC+Gcyt4YtY1P7yNdrKe1dQnZsY/lXl3izxH\nJ4FvFNuGHmD+EcVY0H4zW99GkdxDtlxzzgH6+lckqMpPmRlKm5anpe4yPuwMUkkmAQF+U9az\nNJ8TabrMf7udYpVbBQnvWi0jSKdo3JnrXJKLi9Uc8o8pWuNPtbtWWaIMp59K5XXPhjpet5cJ\n5LHjLZ2j8K7L59oA4xR5hwQf0qoVpQehUZNbHz/4g+DN5aRTTWh3BTwrdSPauGvPDuqaerCe\n3ZAem8Yz7V9c7V3At8y/nVK40S3uWbhdrdRjNd0MbNbnZGu0fIAkWHKSqRJ/zzbjFR+ZuB+X\ncn+e9fTusfDXTNUkZ5Il34xuYVxGqfBO3WNjCzKuc7Y+ld8MbGRqsSrnjvmFlUKQT/dHWgMN\n24PyvFdbffCvVbC5YRBmjOdjkZArmbvQ9Q09x59s5DD+FSR9c4rtjXhLqbKopdSAE/MGOXxk\nH1oQtkkYz06VHH5kWQ0Uinpu28U+NhJuQDD9c962Uovqa8yYqyAMvvxz601sjk7vvYJFPQIy\ngN8238806RVk45BxnjpVjFC/ey4U9jUe4LkA7mxye1Ig8wZYgDpUjqW+UJtVRgt600UIuCy7\nhkY6UivJ90Ku2kVGEZ5yn97v9KVTgblX7o5qRjGyARtyP5Um4Oo2HjOMY71Pu3jnkkZ49KjX\n5dpC45yKZI4FxG6k7kXt3psaoI1ycHr/APWp+CVJzlickDvUbqsbkDhuoz2pCDH7whTkY/Km\nszKpXNO2s0mSMZHJ9aWRlZgQM7Rg+9Uiug3afL3bskimSblVDg/WlXbu2nOeoFDMXXAOOcYp\ngNVD8xw2+hcquepzg+tOjYhsEk4FN+43LA4PJFAhWYLtH3gRk+1PXYygH5h2GKi2hWIG7d9O\nKF83owAT2PSgQ9oxg87fQU2ZsR47t1p8m5icNwF6VA3ylSGy2PwpDHfKrKEHAo2tGxCgFjzn\ntQWywycDFIzdNp/xpiF3nc3PA5pNvmRh9u09adt28Fvem+YEypyWagBMnys9R3pxwQu07sck\nUMnkxkAHbjvTlztwuCPWgBrbWcsAQuKRl28L067s0SMVU5GCB1FIsasi7yT3pAA253f+O00k\nsxIk2g9VoHO5s807aeARhjzTGkxNplk2g4VecmnMDJ/AGx2NMfO5VBwO5pzEq/B4pDdxryBX\nDBSjdMUsjKqsEXA9T60qShlYOMP1z3pF6D+8e59KCRN22RQeGIxinfMNu07XDcilR/4QvyDq\naVkCysSR93KkUANyJJSwHJNIVYAq/HNKrHywuNrmkdmfGVxjrQA1HDxuuMEd6MlmB3DI4Bpr\nPwzqvJ4ApcrtAZCGPJx2oAUKMO24bR2oXLRktjpSttUk4wGqPc24DHHpTAFYYG1DnvT2xvwP\nl3DGKXd0ZemcUn+sYt909PagYhPsSAMU5gNqrnJPeiMv5xTPygc5pvCtgDA9aAHKpORngUDC\nsxOQcdqTCcszZHtSbtoUj+KkAZ2y8HgjrSlTnHUetEnzNxwvpTVdtrhRwPWlYQ5okjQvkljw\nMnvTmAdlBHIHNNwrL83GBkUz767h8rUwFYhpVw5RM4OaGP3iwyOgP8qZlmBDLijcW47fpTAW\nOER/KzDdjNOU/LkDApWXncgyMYzTlYmNSTgdCMUrjGc/3gWFITtT73zetBzuPA9jUbD92QBm\njURKP7hGT6CmssZABbaAaT51K4PzY608KkbAuuT1NIYMqfMQcn2oVfL6k4ojyu4no33aai5U\n/Nhwc7T3piBpDGeRweQfShcMvowOaerSLGCQCHPT0qPquOh70yhz4K9Np65FKrbVwvyg9zRv\n3xH5c9qP1GOBS6iCRgVKLjZjqDUSsZCSTz1FPnViU2gbc8qopWUNJvAwv900aiG+YGcfLkMO\ntI37uTOO3FH8Jbt2A7Ukqt5q5b5cZz6Uxj8fMu77vXFJuBYlF796Tc27OzI9TTo+OMgDGc0A\nxA26YMQfTFDbcnHzEcgUgm27uM8UsZPDAYVutLUBeflDYBY9KUMV3tn2prxlfvHPPagfJ5hz\nlWXBHakIRQwZ2FJuOwgcMaFUqo57Uu37jN/EKYCR4VlY56c4p+1GXGcNngU1R5bZB3gdRUm4\nH94owM9DQNEMilmVgcEfxCnR7lbAX5mPWns3yttwOckdqR2M20Btq45piD51B2qJMHBOKRs8\nE5UE84oTMbFixB6ZpCF2Z3E465pdRjjGhZlLbu4NNx8oYHBHHNOaRWZcDkUisJN38JB+7TAR\nWU9F5B60LhmLHCgcmhYzu2KnJ5NKMKGyOM0CFG9QG4wx4pokEkxBGVX+H3o3L94jp27CnKwb\n5sfN2xS6i1GqD5mceWvWj5UmDp8wbv707e0kihj+FJ91io5569hTGKGWRWym7nvTUP8AD91T\nQUbcSDhPWlkVsId2FJ70AIzMxGRhBSyHd5bD7oNB+8yEZ75oxuYgHAB6dqBoSSNegfGWzxSB\nXVmJ5QdCKd8qsSeSeAKXyx0VsD+tAhEw6gsfvdhRGpXJZVI7Zpv3Q64yxGQtLu4+Unb0pDFj\njG/cMjvjtUnJBIOOc1H93aFJIJ5p4Y8iPgevemA2QscNgBweQaWR90e7GQf0pyqCNuQr+lQ7\nzGzADccYHtSEP2/MNgwg7+tKqb2wDgd6arOzR44GMEe9Ob5mYE4PtRqBHGVaR1AIUGpjtWPh\nSHpi/MxcfKMUo8zIZTvXuKADByD94tQMoxjC/PQdqtkZ3deKFkzk569R3oGJvBfcF2gdaQyc\nKdu4E4LU8MMAINvPeiYKE+7uVecUwEG3CIEbGc7qVupBOefSlXKwoVyV9KVc+cxUYIFSIdKG\n8oMRsz2pDGrouDhs0m4NGMkkdcUjcruyC3tRcBVZQxLjaCduKay8YxnBwDSozcll3D+tCKVU\nL9TzVAKvK5Bz2xRIu5k54xg0Kyxrkjgn8RTnJ25X7vegBki7WVC3fikG5WbHP8qf5akAnq3Q\n0kYd8kN5ZXrmgBGQdX5X2oVSi5PJ9RS/Oq5Y/Nnp7U/rSuMahWRmwDlRmnee0iEFsBeR7UmD\n2GGFJJH5u0IuO5pCAt5kIPJYHP4U+PG/Ptk+1MaR1kG0bVHpUiqW4P3mOeKYwVmWQ8ZBpVXO\nUdgEHzBsdaTG1mBz0wDim7dq7XPmLigQqqjZkjGOM4NG4mMMeVz93FScDPGF6CmswVSQOnak\nAMo8wDdtLDNG1lYn7+ByRULZkckj58/d9qfBJy3GVAoFclj+ZWGcZ5BPakkjCxrGeVJyWHrR\n1VxjDDkUrZbaP4sU9Rik7+V5xxk0KiytyMYHSgxsse0H5s596WI7V3OflzTAA2PmTB9VpVYM\npyoIHOcfpTHgwwKHEROcU5cHcfmB6dKAGAoVO8sDnH0qeNjHGFLbou3YikWMluMY64p7ESPg\njK47UAPZtoLDk46VHIysqsPvd6crHdyMDHINRqyshYAhScYIoAkRWkOQ2D2FSyLtbcwwT1Iq\nBdu7IYg9sVOpHIDEZ4NTsMGXcwKHANSYywA53Hk0238qMbEPGeB700Yi4LcsTwO1O+gEu5PM\nZVPIpVjbk8kH9Kbt2Rjoc8k96mQlfnD5jx92n0JsSs3kqqcMW6U5IxIxeMbdvB96ijVX5IPs\nc1Mjfudn3W9qVxjiACvljnPIFWbeMYZRy3XFQiMxyZHpzzUu7bMhH3ccrSAtRp86ncCQO3FS\n28IiYnO5m+9VeSIsxOdrEVNbkrbBWbBzx60AXIyxj2cZzVvzjMvYMo4rPWQblJGOxq1HbGRy\n6Hd7VAG3pMYMYeQ5k9617JVklwV753Vg2e5owzn514xXS6esbKFTLNnBGOaljPZfg9o8eoeI\nCzruS3jDqc/xZGDXuqyHjvxz9a8n+BWmvHHd3QOUIVeeua9Z6N6Uhkm3gDGDUiHavAzUauNv\nPJp6nLfLwKCx6j5iRx7UKBu9D1pY8qCSKbgM/XBoETbdyZX1oYH15pQ3l/LnbTW3dSeKlj6E\n8cQl6ttNIo8tjgZA70its56inKxZf9mkLUlaTdhuw7Uu8tj0qONgy4A5zU+3awJ6d8Uw6j1+\nZsChThtppA2GJHSpU2nOetJlDeGJ5yBTuC3FNhUAlecnmnnCttpMBVIQ4xkmnhGPfimZ284z\nUvO3PSkAKoReajaMsSRgVI+G460KN1O4JDNoJxTHXOe1O5Dc96kbB7UgGKz8DIxTd+5xninM\no2gDNPCZ6jtQAhXPTvS+XzgcUoHHTFO2hjyaAGrGST6ClGB19KcrHBXHNN2YYZPaqQBGuRnp\nT3BI4NKy8Aiho/LPHzUDG8dxmmbct14p4+vJpskZC/jQAFRtwDzRH93kYPSnxrtOTyKOWYnO\nKBERi2DI5pQ27HY08gj3FP2hSAQDTEReWW5pdwXAIzQzFe3HtSttbjNSNCx4B+tDLxjHFIrD\ndUhJIx607gMbOMelIgAU560vllTljTli3UriGIcrk09kVvam+XtHHJHWpDhvY4pFjVXb2pVU\nlSOhp6qeMnJ70pzzgZqgGGL931+tQsoXDEVY2sFOT+FKGDLtNICH72AeM1IF5+lK1sGbdnFK\nsYUYJyaYEO0ZyBzUsi+YuRSqpGMinsu7p+NAFdYzsPWkkUtgAbanbsFPNKcHk/hQDI1UHvg1\nKrheDyKY8Y4OT+FSRwkD5hgUrgMVdwJzSBTj3qVF+9gYp5UNg45oFYibdtpFTcMmnlfTilVN\nvWgLEaqVyOgNRoreZktgVa5bpSNGBxiqGRhRt5NOaPcvA4oZcKM0/cP4aQFVl2r71GtrG6ne\nc+9XljB6jPtTfJRmKgfhTAihtVtk2joe9SKmT7VIVDfKKFjKnrzQKw0R7uB1p2zbyDT/ACyu\nT/KiRQ3SkMbxtGOaQZ2k4xzTl+V8DpipOenSgZGrbT0qWPv2oChT0yaV1xHQMQcnHSl2nbuH\nFGcr05pUYlgW6UuoxcdOfrSqoZiQ3TtQrDc3HSnQqq5J4zzQLqCjb7mhlEnBGMU4ofvA05Pm\nU55ajUAjUE88AVKqbs44Wo8HoBzT8NtwTj6UxCrlIzmmK4ZdozilLKwxkg980+MqowB+NK5Q\n5FO3gcd6ceW9Kd/DkH60xssKQEsbAthhnHakH7yQl+DTV/1gPtUm7cSxXJoAVsRqPrSq3yHH\nPNN25APUZpdpRmI6UAScqwPTcKR24xnmm5cuOMipdhfnhT3pAKqO23jAqQr2A4psKlELM2Se\ngp+COO1MYmBtIB5ohDMvHApRjftp5wykLQAqqVYZ557VIxU84zTFUrtFK5DYXH1NJiGeWXcM\nOnpU6hc4HBpFj2rxRGw3gFc+9LqUObCjBbmnxk+WcDFRSAA5A+tPT1xxSYth0JLsSyU8Ln2w\nab8y8DgU0SHcfSgCRsZyBhe9LtXdtxTMkrjP4VIsePmzk0BYXywrA5yKduHXBz0qLzfkIx0q\nWNS21jxQGw9dskZyeBTUTCE5p8Kncc8AnmpZsYAA49qAuRIx25qT/WKCBimDKjBFP2jbw2KB\nihew5p7MVYYxj0pFxCvrTgRgEjIoAFXqe9KFk5YYxTlVY8knINR7tp+XgelAChjyTkfSnCTa\n2TSNlcZ5zSKoByW4PakxjwrMvB5pfmAx3pFYKwwcCnt/OkxAu4KfWgdlY04fdHemKGaTphfW\nqGOf7wB4pxX5vb1peFXJGaam7cQw+lIB7bugFDMuMAY9adG55yKXA60CQwRDB+bmkUGJcAcU\n4jy129TSlSYwN2KVhgoLLycZoUHpjvUb5yo/WplbaOetIAZhkDHNLnJBORTWGcNjJzTlYszM\nOmelUA4feyKdxyTyO1N3My8jrSAfKc5wDSAkjVcYNJxyMZFOVR94UbgM+lADUYK2eQtOPqOK\nSNd2TninpjbzzQA4hSoHenbgMED2qPcC3TGKfu+XdwfamUP+6ucc5puVaTngUm4MoLZx6Uiq\nFbHTNICVWG0g8+9Lu/u01jtjPGaWOTcoGMEUDBgXOSKSPduO7vTwxIINJtO7NUAu4ZI/Ol3g\nK2fSkHTOB70jDcvIBqQKn2w/3aKs+Wv/ADyopAPWZRIF70/hV55pg2N82OaWOPbGS3OayKFj\nZep6dqWNTySM5qLeMBT1oaR4VA+9uoEPZVbjoaGC7cGkVtwyRzTFB8zJ6UDHw7dpGcUebsUj\nGTUZYeYQVwTU67U56nFACKwwM8E9qXcGB7dqZGuAWzkmhY9mRnOeaYCs2VBHSl3FqifftOBx\nRvU45zQkg2Jt20jK5NDjauD1qMYZtx7UjyBep/GkAhXcue9Ej7FHHNMjk3cq2RSufuq3BNMB\nGZmUHp60wyNycU/PUE1E+7jPApoRMMNGGPUdabuPO3k02PcFOeRSSOeNny0XEKzbV+Y9ajWQ\n4OOB61L5fmqNx+lLH8qlVOfUUhkalmUEnmn/ADjIbpSbTnI+7S7ixG3nFMQxVzIcHmm725Ck\ng1LHhst0NJuySAPmqRiRqd2TzSs4TIpwBH0NRSLtkBAyKaAbIxxg0oZEjznIpce2c1GI9ikY\nzSGPZtyZGQKFwygdTSbwq7cY4oZtqrjrTAcuBlQOlSsT5eM4NVmTjrjNEQK5z8y0WFcsxqMb\nWOahOIWwFzSRsZJDg4FSTbSvJ59qYEDsfpSRsy/WpFjRl9TSJ8rE44piHxovU/eoVSpJ6U5Y\nRt37ufSmt9096QDNobcW7dKYrSbemBUiqVHXtSbgcjNSOwu4jA25HrTGYLkj1qRuOCNw9arq\no3Hd0oEP5VT3pFZixDCh2Kv0oaTcucc0hiZDcAc05sbdowTRt2LkY5pi7VyxPzelAhJGZmCg\nYPtQ2VXb39aj3ESZ9aCX2+tMB3mFVyfu0eZuOc0xP3mR0FKu3OD931oBiNtYkdqO2BxTVjG4\n4ORQRtIJo6iEX5PmJzTvM4JxSYK/eHBppyoyBkVQAJC/y5wPagOV5LfSgKB83akkdQwOM0ri\nPMvjvpay+Ho7wEqYzsIHfNfJ3iKNRITHwWOTX2h8VLMah4NuxgtsIcj6f/rr478SW5jaQL8u\n000S2eeaorR7t3PNZAzGwIG5fauh1BhcQt6isER4YEH8K0iSZUs0ckz748HNZtxCI23buM1u\nahaxz7mT5HHNYt7l4ycYYdGrVESM+4dmbk4Gaqzs21vl3KvUmrLxhlXLbm6tVeZgwKA4FWZF\naSRljVQMK3Oe9QS5YgFiwPG0VOzfK2R8o4pjZtl83aGB4pjSKsnzyJHnaoGcCoT8zblHQ805\n1/fjsuPSlf7p2DCnvTJIZHwpUEAt29qZHchskjaRxnr+FNkYLESwy2eg71Gs2HCKoUnnApgO\nvJzP98Y2iqu7GB1z2qZY1+YtkDPSoY1XzNucY53GqE0O2jdjbj6mmcR7v4s9fanMyMfnYlSc\nZoXHmMqj5OxoAhXDRygLnHb2pjkMpBOBjp6VIcKzcgZ9TVYDfGwZuhoQxvlvGwkZ8r0FSR4W\nQgdPSmrho8dcHiiPajFw2T39KbENkYqwYjAJxio5pzJJuUcrwfepWy+FYZ5yKjUbTuHTP60h\niFS2dhOcemfwpmzy/TKjLEnmplkUY/hbOT/jTXVVZmHJb2qkBD5m7O4/J14NKuw84+TFG1Qx\nBGRjJPalIUKpxweQtAiAKFTAbv0qrcDEZJ5Bb8auMjFTjnHOKjdUmZW2bU70N9B9LH0X8J9D\nsJfDsciqsjAA7cdz6138isqiNVwvTjtXinwL8TTJfT6Yy5jZQVwe3SvbpFkVtuM+lfMYq/M7\nnkV01LUhVRyV4HenM0ccJeRto9SadLIIbdpCvKjlR3rxL4hfFBpI5LWzOJASGYHpXNTpSqys\njGEeZ2O+1z4j2GixyHCybODzzXmOsfGy8EzKgXYTkDH8NeY3WpT3cpM8jPuPrVeR1kZTjAx1\nr3IYOMPiPShSjFancXnxY1dnJWXy1bptPQVRm+J+tTMg875l5Dg81x/lEDJYnJ4JJp373l0i\nZj93gGtVTporlid3D8WdUVgSVlm64I4/Kt/Rfjhes5FwBKoOfuhR9K8nEckODLHsk9PSoppD\nHGpkVuvCgfrSdKnYlwiexeMI38faSLiyVjMBuGM49wK8sntJ9PfyLyKaOUcZ5xXuvwi2TaDC\nTghuqmus1TwnpeoJIHt1MjA84rh9uoPlRz+09m7HzFY67c2EwkikYyqcZLEAgV6h4M+LsttH\nGt9J+4LBSzc4/wAaXxN8GYZLeWW1ZklAyvPANeTX2nXmlTvBcBo9hx83RvcVp7ldG3uVFY+s\ntM1yz1qMm0l8zcMlT1+tW1BTcCOegNfLXh3xtd+H7vfbs4X7p3Hp9a9r8E/FCHxE4t7t1jmX\ngehH1rza2GlD3onJOi1qjuWUqvXijKryep9Kesi8sNpQjIGaiZgoI25J5xXJy9zn1BpCZCrd\nO1K0asASvtUT527s4HShN3TP+FPZ3JYz7PFuKkA9xxVe60WzvEIuIVkPbI6VabPUcNmpl55J\n6VfPKOqBSa2OK1b4a6ZeR/6vc55LAVw+p/BmNlleB+QchWypP0Ne0rvPcYz6U9kUYGB610wx\nE4msak1qfM2rfC/V9MjMkQ84sMhecfTPrXI3VlfWcxS4tnQgckD9K+wbi3jnDK4464PSsLWP\nBun6kyO0KsSMNxjNd9PHW0kdMMS9mfK5IeMJ90/7XBp6t/DnLD8q9z1j4K2WoM7242gchQeR\n/jXM3nwNurdD5Mm3dyu7JIrvjioS6nVGumecTNlV4+oprbUYuz8sMYrc1LwLrNn5gERm2nA2\nDJNYNxp15bW/+kWsiOp5OCfwreNSLe5tGafUb5wiYDYRxwaQSGVSCOPWo97KzKw2/wC92pYd\nrYw2O+fStTT0H7vmVcFSDy1KcPvPYcClU72MinPbmo0EnmOWGDjlaYhu47cEnpjPp7044jVM\nNuA/iHelVgY+uB0LetRNIkZBw3HGAKChVbcWyQWzlTUbTbv4dvq1SJtYbx980nmNyqjI96oQ\nvHB68Y3UZbgBPlHBNOWRFmAZckjt2phVWUkMR+NIB/mcHpnpTFPILHJzzS/fVQqjbTY8+c4I\nwCPu+lMXQGkOSc5Ynr7UseNxBj+lOSP5MEY9DTZnK7Pm6UhDI2KKwdMnOAaVX8xtrYVugxQs\njHd/dxzSCEKytjBPSgY5lIOG6Z5NLuG0HbuweDRuwrADcem01H86sN3ypjlaAHeokYgH9KRV\nCq56DOcUPk47ADvUiyGRhuXjGMijoBEI/NRgTjoaa25lyvQcVLIOh6r0zUbKwZQp4NADmXrx\nyRTJFPloDkydRUiK+XBHOMA0zzPlwckrxu70x6ileCOWIGcChSVjAI3se3pSbiI+v/AqM7fu\nHr3oQCsuWXv2JokPdSMrwKI1O4gEcc5pFwxKgZLfxUhAVOzOcFsA+9CKqSfd3KBwSacA24IP\nmI60xsR+YoBJPIFAiVn3qrH5TnmmPmR878kclR6UnmR7QDkkjHHrTTGXYAv064HJ9qCh2MME\nIG4cnHSmrKNp3AjnrSMRt3xjjoadzt4OPQUBYjYkYOcqTT2cs29cZXgim+WTweZAc4pMbEkG\nwqTyc0Ekm8cqqFl60RyfvNuOGHTtSq7xrGSMHFI0ipICOp6UxjVj+RiThu5pdxZdyj5Rxmhl\nG5SzYHShFA3hWDY9OlAg2rGxyeMUH/U9OQaWLDZJAahlRl6+WCcUgE+8vy8gkChVWJmTBb1I\npyxiPccg7R0pBs4Ygj6GgBNxVcoM8/pUcny8oMc/dqSNQqtIG2A8Babj5lcHI96QDt3y4c7a\nYrCMbupHSkkxuw3zZNGQrNleAP0pgOVvXkt2/rSYyjE/IRwB/Wk3rIinaRgdO+KUHcuB9336\nii4CKrZVTwzclqcc7vkXAzjmjOCPm7YDCjG5Bk7mz1NADPL2RurNl85+lOjG3HmDK+tChQvm\ndweppSpbl+OelIBGZVfYDuXrQ0fJYDdn9KMhQODjOOlHOSGJ3ZyMUwHhjGAuckdvemyZP3zg\njoKRfmUkj6DuaRsTKueCOaY0LloxknHsKVsx8k4BHSkaXcGGOvtUfTDMjOB70APZnVcA0pI2\n8HI703dt6AjPrR/q1IJyKQDWy0OIx35NPaLzHUg4IHNMBUpt5B604/6xHQ/L0NMBdsnmmPGV\nI+8aiWN1kBPCL2POakMbsWYPtHpSRswjDg/NnHPWgBz53N8uPX2pI9iwkE556Uih485GQ3NG\nzcwO3A9aAY7Bibj5uPqKb5e5c7ty0IwjlITpjANLwrElcAjnFAhvkfu1KfNg/wCRSqojZVOT\n3HsaEYQrsDbVP6GmSO6rngr3b1NJjHrlpQMcE5OKUZEzA/d7YojP7verbie1OLFV55HtTGR4\nC7hu59KR22lV/ixnFO+8ykDjPJ9KdNbqsm5Wyw5P0pEjQy7T5hO3sKAqyA44GOB60pcErx8v\nUnvT2ZZFxg56j6UDIkVo1U9D3qUruUEnnPWmPtXaV+bPYdqRmJYbzj0pgKS6u0qHI6Uj58sb\nutLgxrgA8mmONrcncx6CgCTjrt6ik6bTt/8ArUi79uTxjtSy71Ckn73FIBCyXDkkGMr0oyy/\nKAee4pZJPmUbegxSbm++oyOmKNRDV3Kp3ElfSn8TYGMAdM0K77eBjnpQ6liOdvfNMADHaVwR\nnvSFjuIU43U5v7275enHeozGdxkRuh4U0DFcjzQQOOlLtVJj1bd/CKdhuWbhKau2P5uj0CFb\nG4ELvkXnFBYsytKoUHjGc4pqttY56etPWNljG/G3PHrige4jZUHacgURvuzgY9BRwoPOW7el\nIuGVt3GOmKWoIF+WQFvv0EsmSRinQlOrHGfWidhH8pG5OtMCPcyrtPyselOX930+Zum6kZx9\n7kvjGKI23JtKkYNAiTdldvVscilRQjA7toxUfHPrmnCNN21xg9aQCt8rBs49MUhXBLHDH+VA\nYBgrcMOR6UqsHUsT37UtQHrzyMAgdKiMhdRtPyng0rsV5HylqEYY+9sx1pgLGxjUN94KcAU7\nzn5BGFPJb0po+Vwu4knnNSL95lPPegCJX28qQU7VI2VZQ2COu7FNUtJ8oQLTm7BuP9mgAViq\ntk8Z79qRZDu5+YdRQzq27KU9XBX7uMClqBErGRiGOCDnipEYeWQ55Jz+FKuGYkfKcZz7USSK\nGByGXHYVQDdok+YHavQUkhPBB5HX3o+WNsdUIyOe9OQhpME9s5oATzAu0k4A6UKd6biSvPAp\nyqJFYrjP8qX7y9cY7CkA5sr8zHHrUa/vCcHIFP45yzMMelJ5LMAAdvegA2nywrDHOc06NU2n\nnOenrUZkZXUyHb2A9qs7d8gYDheelMCMDAyThV65PNJGqspIG4enpTmxMrgjac5B9qZv+Xac\nqQOqigAIbjK5BNJJsyV++fQUjLuiHzcZ60RRjcQD19KAHbG2g7ec0ghYFyx+Q8GnrHIMqflb\n0pFVmXDMME447UtRjlOW4+Vf71JFG6xvuPIPXvTgnBQngfrSS5Y7iwVgMBaACNssS2enWho3\nwvYE8CiaRygwOF5OelJI6MokUswyPw+lMRJMWkCrnaFOB9alhV55FDHbjqRUSsdxYkFcYA/r\nSrI7ZTfjjI460gHyLICw3c5607IT+EqAPxpkWdpZjuxSq/2jjOWouBJtkkX5SDnqO9RxxgsQ\nX59qkh3IS235umKMZJRCB3Le/pT6XAWNflxnaQfvGkWQR9QWBP3vWnlW68dOT6UAB1CkhmHb\npUgIpVcnb3zUzbFj3qPl6j61XSXa23B3d6njZJAoxsAOcUANjhdtr9Dj7tOReAVG9ieafGvz\nby29M81JGvmMZNu0g9aBjnG2MIuc5yV9KljYs5HZRmo9u6feThe5zS7vLkGD8pOM0AWI4xJB\nvyeW9alb5ZgpHH6UxYsOEB3DOafcIWU7Gwc8jvRqBKlw6liRuUA4qSE7l5Xk881WC8bQevWr\n8UaqUYngcUhFiQIuwkksentV6zYPGXV9p6H1qtCySPsGC2e9WrSPy5HQrlqkDU01W2sXw3P5\n11uihf8AlkuCf8K5PS3jb7yFmLla7fw7aiTLqfmTv+NQx3PoL4O2bW/hgyngvJ6/Su+zubpn\n3rnPAtisHhey5G6Rd5/E10ikqcZoLQqsGbkVJFlVxnOaiGHX0NSRkjjFSBOzcDaMgU5V2sCR\ng1GrbTjqKlj+Ukk5oGP4kb5jk0rjPAPFN2BSMc5p235vakMdw3y4+tPjbb8vak/hPalgwxHP\nNACqgjJYAjPWpwdoBPNCjqvWnLypGKBDWRg3H1p4OcjocUke7BbrT1HzAke9SxhtK8gc0uQG\ny1K25RkflQQNnzcGgAOR16U8sW5B/Cmx5CgHmnLHtbrjNAxqtgtipN3yjAxTQAmcjNPwMYJo\nGLtIjzjNJtJXOaTJX5QcinMPl45oJE29OOak9eeaYGJOAOKc3K88GgYpYeX1pEUfeByaVV3L\nxxQOOMUDHNIMjA5pjKZGzip41BbJpzyCMgEUAQLkcGnZ4GOlOZtxbA4oEYaMc4oAjkXdkJ1o\n5aMD3p8UY3ZzSMuD1oAavUingFmHYU1VIyfzqQfMuO/ancBdpPXpQ0eeAamXpg81Hj95QKxD\n97IxgjqaFX5h8vBqTBCnI708fKuD0pANMKhcmm7WZuBxUrL5i8Uzy39cUDE4PBH1p24M21Ri\nhuFAojz5q54oAQxFe/1psn3gD0qWRSGJ6imr8/UUCI1yrHuDUyncoOOaGA6Ux1IX5TVIY9gN\n33STTGA4PfNPVsLyeaY7Dj2oAcy85HSmsCxyOnrT4uOvQ1I0e3lTSuMbtO3g5PvTgF203nq3\nB74p5UbflBNMRF5fzZUZqRo9ygHg0zaQ2Twe1O8xQuTwaTAVoyowDS4IXHJ96OHXIORT/KZu\nhwKRRGCVbPGTxSjd0607AIxj5qFO1TQIaoHPOTScsfanLgc4yaXhhjpTGNXO8cU7dxwcikIK\n9Pmp/Gzpg0EjVxuHFC/fPy4qRSPTNN5yMdc0ALgZOBzQvysTjJPrTgpDc01m+brTARVK5zwT\nTVYbTu60/O75jTW56cUDFSTdjipUbKkbaSPCpkDrS/wcdTQMREVl54NKSN2OtHlj1oYKOBQB\nLGu7nGcUrqCuTUasY1yDTy26LGOaAIgx9OKXaZBlfypw/dqO+akj+Xkd6QxoQ7gSMDvTpGRv\n4cVLg7eT2qLaetMByASIcnAp0Z6jFNOVHHenK8alV3YbvQA/O1d3ek3E807yxyByDToxjOaA\nImHy8YzUkasyrxTljG7ceRUm75TjigBFj3ZWnRp5fDEE0Q43U5VyCTUjGFd3IPSlC7mGeFoW\nRI1b1NIrd+tADtxjODyAacjBgT2zxSK+5j8vB61NGu7gLgUCGIG3+gqXjqTTmAVaavT2pMqw\n5drDj1p6sgB3c46UkMRA6d6UqNxGOc0CBdzEHpUzJhcrwe9MVic8VJuAZQTxTATPy88nGaI2\n3YzQ/wArH36U5VJXPepYx0cm5vu/KOKfDwCW4GajRvvKeDSuyfKN2B0IpjFkUBiOopQwjjCk\n9enrSNhfXFJ5YYhqkRIZDlV25OOaTaZDkHAHamK+Mg/ezU23vuwKAAYMmWpRlWJHQ0LsbkDB\n96kj/eHpwKBhFCrLuJ5z0p4O5euPaooFZXkJ/AVKuOrjFBI/d8oJ4pzY55yaijzvZjyoFOwC\nwweaQxVQnqTinJ+lNXcrYNHnfMV29+DTGSMpYU+NPlO48U1CzHGcU9nSPJJoAPLyAp5pcBZB\nkfSoYXMnOcCpmj2JuY5FFwELFshuKMjhQcUq9s4waXaWbcDUgJtDcGnn5uOgpFVTyRzTuBgg\nc0gHLjbg0eZjHHApFyxI7U/ZGVyOaYCBjjB470seWyT+tJtHWlYhVxnigQsZYdSDS8rzTFU9\nOlSZzQCEOc5PSlVfMGAOaU4zyaOcE/dFIoZ079Kew/ixQuOhwRTjndjtSAaMlTg809W2Hgc4\no8vZk9c03bkcHmqCw/5hjPU80rk9B0pGkZmD9QBikJOcikKw8A7Rzx3p33ug496jV/lJAz60\n4N36LTGKAFyCeKeijkDp2pjYVT/EaaGO3OMUhWH8nJB4p0Kk9RwaSGRenc0rTFc4G4DsKYyT\njp1pW+9UVvMZI97xmM56NUu7NMYvSTPaljO5iQKbuA7U5evy1I7i7tpPrSfNxzzSberUKvdj\n9KdxDhluBxSAKxwRk07vkDNRLI28/Jj3oGS/P70U3dL60UBYcoGD70u4Koye9Keg4pFXnOKy\nsMGC7x7daazB2z0x2p7Z4NM2qW3Dr3pDG7iu4468U1lLIA3BFTMwXHuKi5Zsk0CERQzbu1St\ng49aI02x7cUrsMAAc0+odBVU7jn7uKar44pWkO3GcCmPhsHtQA4hk+YtkelNVUHG3HeiSTuo\nyKdtLLnvSKFZQuPftUcykR7ccGnYEzelLtHbkUAVl2jgdfpRIy7h3wKnuMKmVAFQctHyOaYD\nxhVy3APSomk+TawPWnGQuyKw+Wpio3Hufei4iF2KoAvWk2jGd3FOYj7wFR+cWbGeKQh7ZZRg\n01AseTu5NNjkLbh3pnlhiGJwaBpEnmN5uOi0vmDadoIPrQFDjkc+tG52+UDimIFbyx608Pxv\n4FQq+1yCOaDmTGOBSAtLINucdajZhyR1PWm4bAUcihYyV5NUMFbjgfNTZF+XH8XtTt/ljOOa\ncFDfNng1IFfaedwoU7WVG5NTunYnpUTR5kz2FNiHyFVbn8KZhwM/w1M2GXAILelNUYXmgLEc\nY2oSB1oyV5P5U3cV6cU3zOrGgOhIPmzzg1Gdy5OeKVMgZPenRqwX5xkU79CQ3FeDzTmY+XgC\nkmkXaAoyaQtvPoMUgD5mHBAOKZHIGk2Y3Z70xnKLntTlOGBUYPXNIoh/tGEyNCj72B59qkX5\npMHvyKTyYmYuEUHOTgU98MN6rjFAuoA7Wy3IprMOexzSqw2kHml+VupGaQMbId2MGo3wefwq\nRcK2KY6ndnqtAhG+T64oVzt4pducHtSdOegpgI8ZK8HA70iH5dlPDBe+ajbr8vWgYNkNjFNk\nZg44yO+KRlZuvBoXMZ6/nSJ2JWbJwRUbt8u1elLwWBOaGG1eDwadxjcHaeahxu7Zpz8McGnR\nqVxk0B5mZ4miW48P3sDruV4zla+MfGULLI4Ycrn+dfcc0AuoZI9v8J/HivjL4mWT2uuX0QBO\nHI54wOapCZ5FdYXJjBI6EVz95vSQhDha6u+hCoRtwD1rmtRhETfKcoOQue9aIzZS1BSsBK8B\nuKwpldowh+Vfetu7aVrNSxHy9VFZt2guIQFHJPStEQZdxGIwCoGcYrPmjO0gj5var3kuxkDE\nHHSqu07mDllJH3cda0IZSuswlVJ3Z6gUxm8yErjjPSrEgVmbaD8gyQaqYRoxIp2E1RJDNGnm\nffLHHSoGlG0Kfu59KkkyrBRyT7UjMZMrs6elAEDIGOV5H92qsj7WYkbecdKsMrNMqgYPU+lQ\nNia4K78Acbe2aYiJJJNxGfk7n0pGkCtyd6/SpdjbW5Cle1Rneq5Vc+pNMREw+Y7QUA5FSQyK\nUYk4P92pc5XDN82MHFVmBh2iIBlxkimMjkjaP5wN6HqTUkaorDC5z+VKzbgBj5T1ApADHkk4\nUHikIgljZd7Dhd3GKiWYBtvY8cevrU8nmfLn9OlVcBWIEeNx60xEsMf7vLckHrUSxrIkmDjH\nPJp+4vIArbAvG00xlR0ZevOaBjNqNGoOW3dT3pyyFYyAMhRgAnmjzDtDAYHQ8fpTWc52qAB3\nbFUUMKHHIOKmi3SRrHkdeaSSRt27blafGBjeePpQBAu9vNymNvAx6U3ny8lPlHAqRtyoSx4P\ncU2R2baoOEPpR5kGz4N1qbRdZt7iLACEfd4PX+VfVWl6gms6bHdoQCeuDmvjmPdDITGdrenc\nivfPgv4mW60p7MtlUOfLY814uLouS5kceIhzK6PRtSjM1u3ljc/93OM18oePLOax8S3PmKV3\nMdwxgCvriTGAowEYc/l0rxL44+FmvLNL2JMTgBS1ceFlyzsc9F2Z4vHglQRlScAjrXZeHfhr\nqOryIwQrbnksR2964q23STRKBtVHGc8fjmvrHwKEk8MwNFtKqAN396vWxNV0o6HZUnyrQ4jS\n/gvY7kaZmI6lcZFdOPhjpEcagW6kDp7e9dYv3sMRjHHFI0ix/NIyogGcngfnXje2lLqcLqSZ\nyLfDTR5Y2Q28aju2OTWdcfCjSmiaKOBQnr1ro9Q8Yadpsh3zx7D6Gq1n8RNGnYxiZVfOA2eK\nalVe47zsYcdnB4BtRyrRKR93itnR/Hmk6xKqrNGGPH3uhrmvizILvw9I8XzBh8xXkY9a8Ett\nQksdmx2Uo3bv+Na06LqLU0jS51dn1/J5cinYyup4GKxdU8I6fqkMqTQJmQYb5QTz3HvXj/hf\n4xXWnLHDcnzIM424ya9Z8J+NNM8U7xBLtkX/AJZsQDUSp1KbujJ05U9jw3xv8Mbzw3etNZf6\nRAwyMDrz0PvXE295PYzMwLW82fu9MV9j3Wn2t9CEmjEkYP8AEK8e+JvwlSWRr2wRd0hydvX8\nq6aWIUvdkdEK19GR+BPjBIkdtY6iyy8YB7/nXr2m3kOsWfnWrAg9RnkV8hSWrWV0YrjdDLG3\nRuDmu+8D/EO60e6hSUvJGrdQeaK2FUtYBOkpK6PoJjuwMZIpFLK3Iqvous23iK2a4tXVsjJG\ncEH6VdZCinJzXkSi46M86UeXQh2OWJ6/Wl3cYFOXLc7dvFMHLFlO32qSQkZV+UNg0bi23uaQ\nrnJJ/SkXBjJHWgY6S46qU59aA+6RRt+TFNXC9Ru4xTlVdhw3PagQp+TOPlFOVgysGXdxUTNu\nUd/Whc4JHJ6CqTsHN2Els4pYwTEqnr0rK1Tw/ZXELFoV3kHGFGM1t/w4JzVXUt0dqWUZfHSt\nYVGnuawk0z5g8faXa2OtymMldxw6qPlzXMbFj3Z6D+Ku2+JFqF1RhJwzNuJz+lcc2GUgjqeP\nrX1NF80D2KbfKJvZ4wi5yG+8KVs7WDKQh6nNNSQ7WBG0luTUm392cnPPaug2GsV8hPmxzj/C\nkj3yLIOAVFKQPL3bcv8ArTSo3HbmMYzilYQjMSqlUxiiLJkLA4U9fY0q/Njn5fX3o8zadpHH\nWqKGmT5jjjtuNOVGkjzj8aQMHXBxtzwtPm2owwMf7I6VJLG7vlChRk/wmlbLJknLA4GDTJJM\nRHHJzk0i7Y1yWwh5xTEPbcMDGWpMDcSenvQ2JMf+OkUgj+YqzYzzikA1Y1jjJJIJPFSSZkzn\n76rnNRlWZct1B45pV3FWY/K5/hqgGxyBkU4+frTZGbzMd+tTswkbCjoKhbDneTtbpSGPGWU8\n896Tc5jG0gdqI1LdRtPQ/SlOPm5wFH3qQySFiseG+73qKRhuVc5yeuKGbdGAD971p21/MVQQ\nQvegTEVmjlYkk7f7tKVHl5+6TyT60R5ZnGR8x60iksGG4FF4JFA2Rsp8vAOFHNPh+6AoB7/h\nSSYVQp5FJ6KOB1JpiYNuj3fLlTQhVSCfukcfWn8MrAZxux+FRSKoAP3ew44pCHdPmAxzTlyJ\nGIUdKbtdo94PyjjFHXBQkM3amAKwaPIG3B60RqNpJ4btStCRkn/VjqKSZgWXggEcUaiGFOFV\nTt55p33iTn7vQ0KT5ZIGcHBpGXew2jGaBiSZZS4PPrRu+QAtkkd6VfvFFOdv3hiiSHoSc/4U\nhgoAUlvmOMcUvHlhj91eC1G0hgqEKPehdyZwQD1qhDcpJGSV25/hPU0q7PLIj+U+lMYfMpB3\n5NLGGDybRyD3oAGhMe0ocEnmnNGsjcngHv3NJu9WwW6H0pHlU467s4z6mgBVxnDr3yTS7cja\nOCTkH2pQfm37enBpjEq65+91VqQCrIA2cZXOPrSsqrJkN8vcUu0FgCQD1Ipkir5mc4HcUAD7\ndu7rigr8o2NkNzSMoBB6ehpsarHnnBzQBJu/eEnrjBprAbkUnBPX3ojO9mz9M+1IzfvBg57C\nmNIXcm5kXhc8mnOxZcL1HX3pOJGyF24OCR/OkVgzOxHA4zSYAzfu+Bg9cmiNXMXXHOQKGjO1\nehHU0jMzsqKe/X2pCHFtykHJI54piliox97vnvUhIfAThMctRuCqehIqgD5Q3zoaThVIC45z\nSw/Nkrye+abu2K5U5PTml1GJ5ny429aVXMfAOfao9zKUByx9PSlfczZRtpPHNMQojH+s3ZI6\n5pW2SR5A+YHPPWoxEyH++OpFO3Bx8vXrtx2pDF87nJwAetKxTGxR/tcUyTDorgZGcEUMxhYB\nBl+vFAMVRlSQNvr707G5Rs6rz+FMLP5jNtKtjNO2smw7uTzgelINhGcqxycJ1FOVm3F1bt07\nU1zvkKA4HUZob7u0nAPemIduLRglQp9aZG5kY5ORRJEAwZTkjpSorDIBw3WjqA1oyWzjB9TT\n3jUMA/pnFDIY1zK2cjgClYBWRid3GOaAGR7fvA/LmnO4j6HaCfunrSMRtIK7Uz+tOkUeWuSG\nOaNRjd2/IAyOuB1pW3Kqk9M0bvmIzt4+8OtK7Bo1H3yO1AiOMN5jEcLUgkfaS30o+8pX7jY3\nAU0sX+8RjOOKBihmjUAAcc05lVycDORnkUrKcoR8wzg0xv4z1APBBoARXZhy3OcUbd3y56d6\nRG+7uTnPIWnfI6lsjDHA7GgQLJuXcp6Gl4kkdupxTOY+AMle2KXYQwYnAPpTAEztHPzdacN0\nakkYUnjHSnsvGAcHGd3rUTL0DEhcbttLUB3K5JGc9Ka0YALbsmpCgYJg8daijD7X3celMBWk\nTIODxS/KTvOQtGdyhx97vRu2sGZsg8D2NAAuVwRwT60hXbJ1APWh/mKknOOM96HO58kDNADN\nzTNt4Azyac2OBuyc/pSKrF+V+XoMU9EXcWHOPloGg5XgdCeKGJjkJ2/L0OKcYzjHXtTFzJEc\nA46H60gGyZkxjhs5BqT5pAWLcfrmkPy43den40q4VgN3XijUkY3mLjkKfWhZC0mAdxHWlkZt\n/Ck7eKXClSQGB9qZQp2sc496aoDKM8EnHvikUFVwASx7+lP3KrAupzSEIzFW69OKVchQQvAP\nenL80uSNq9RSbh82OST0ouAr7X4dcNjIxTU/eLhhg5zTmL7T8u40c7uOTQMWNhsZ9pIWo2b+\nMMTn+6KejPtdPXg+lJA3ymL7oFMBELbwMHJok3ZyTkg/eNDN/Fzuz2qR8KvUbD2zzmkxCs2/\nDDGOnFI2VjLen86bjY2M4TOM46U5isyFByFNMBrKWiBbg05VVdqqetLv8vIxu46VE6lYw2eM\n9fSgZL+6Z2JXpxzSfIrHPKAdqZHIqzFWHmY5+tPjk3EttAU/w0uoCKizREh9rdaftVVXdlcj\nrUay5cgpsGMU5ugD5JUZB9qYhFZ0Xkbl7Ypy/vcplkPXn+VJvzgg/T0p6q+0MG3HP3h2oAR3\nc84HHG1uaQqWU8kZ6c9fagyDzcqOOpzT5GWTBK7dvKkHvQNAqNtXnJHb09qcS0ce8KDzjk1G\nPkUkAktyaCfMwV7fwmgQrMrx8Jg55qSNzC7BeOMjiiTKtk8qwwPY0xmLKoHJXrQUOM0bnqwb\nuMcU2TCMoU574HTFPaTD7sAoRytQlh8yr25z/SgQ4gqBhtwJ6jsPSpZlaTEW1VXtzk1CyrGO\nQShHb1p+F3DyySQPvGkxArNv2fdRRgmlYP0yCF6gU+RQxDZyMfNUe4fOVbO3jigByYjwrfMO\nvy/yp27bkkYz09RQqbWG351Yc05WVvlf5QOhpgMjlGCG64wRUqKGii2jAb86RV8tWOMMfxox\nt2gvuwOKAFeUttIyg6DFPKmJcsvy9RTUVVZQTkY6e9Ok3jAZiB2YDP4UrgKjhf8AaDdanaP5\nlIA2AYyKgwRtLKFBGSvepfN2xhQdsQ5xQAqqvmfKuPc0542DZPyc/lUZkMilWwFJqT5o2UE7\n8HgUgHx5k+VeBnn396fuCth2ZAOmKI1eS6ctgMFyMcCneWzbjjJIwCORmgA2ll6b6kRfM257\ncAH1pI4W+QnHykbuakdFfc+crnoKBD/O8teTkqeRViNBIwl6L1HpVdf9HBkVQTjjNWkUlQxO\n3+Lj+VUMRWMbE4LMeenarsP7yME/K3XFQhicSBsc4wakhwcvgsTUsZPGS0iGNdxDVrwEbmYc\nE9TWRbqV2yg7XB5WtrT13Z4yW6fSoEXtJw0zO5VAvIA9a7zQWaHy1CcMeTXE2FnhvLUfL1yf\nWvQvCtq015FG38TKo/OoZaPpbwmuzQbTcu0rGMCtRWXrn8Kp6ba/YbWKFc7VUAD6VcXDBQOM\nnnNSMc2cgqPrU+0Nt9aWMleO2KNvSkMD8vWpYwOGNJw0e3vSxgNgEYIoGS7huyBxTmIznOKY\nqlVPYe9SLz1UUAOReMZzUix4bOOaSNdpB21Jn7xoAVF2twam3BlP86gXDKP1qVV+UY4FIB6x\nkoSKdwsY55pse5CeelKMqM4yDSGLycc9aGjJk55FHnN0K1Ir5b2oGH3elLkN7GnLjdntTlVJ\nF3DrQSHl/LuJpuRt560rLu47CkyN3zdKQXHFdy8VGsZTv9aezBQAtDfMOM0w1Fjztz2pduWJ\n60qsuzApPoMGgNRM45x0qVEy2c8UiZX5jyPSgdd33RQPUXkc0ZMnvQrdqczhV4FAhG+8FpzR\nhmxilGGUHPNPXO3pQUMZdi9MCm+WRk53e1T7TjnmolXb+dAxFRtuTwPSljXdnb1qXdu96Agx\nkcUBYbGf4T940gB3H2pfL+Yc8+tAJjkKk596B2F5ALHke9ND5XFAyzEE8CjdtyoHWgB8bfhT\nsM3Xp7UyNN2exqXaVGBQIiK+o+lOePPXpS4+YE9KRjuU45oAZtI+lDKwwB071IqsyqW4p20s\ncCgLELJil8voRUwwVwaZ/q/l5xTuFhFQLweaasY3ZIx7GpWx1FNVTI2TzQAnl5OQ34U84VaT\ny/QYp7R7kx0qQI+WFOjB6E4pVToBzTyqquSetMQxueccUxo1kXaRzU4AEeRzTQPlwRzQAyKI\nLwBxTlY7jkdKkMJ2jHIoVQoPGaRSGv8AN93ik2hl9PWhQc8DIp207TmgBowDkc07bu7YNKqg\nLnpinLzyTTAZtaNcjrTVBaTnj61KqFhzStCGxjqKYEX3SQO1OZxxkYNOVe9KycjIp2EyNmZW\nAPOaXcNu3p70Sfu+TyKSNfMYgnigBv3Y8DmlU4xlacFKtjtUrKGbI7VIEbNnFLu3KccdqXaG\nzxSLkYB70FWEbKxrt/GnhtvUc9qFjMjYXoKGDEg4zigB7Y2jHNEhJXGMUm0ggDrnmpGUjORz\nQBGFO0VKw2hTjikWM44OTQ3zHr+FUAya7ECnIyar/a2ZTs71JNb+f9481ZhtVWEbUHH50mMo\nmS6bAGPyqW3tzG+5lyx/iNWlZVk5G6pd3mHGeBSQD1hGzIPPepFUeWR3qGLG7nOKexwcDk02\nAxWPIxT1jZU5FKOm1gc1Mq7Wz1HpSGR7MjinFWCY+8etP6dBn2pz5jQYGaQiBVOcEdeanSLL\n47Ypu7y127fmb1p5Urt96YWAx7e420sWQvoKRYj5h3ZxTlG5tvSgY7hl570pxuwDinbQoHPF\nIyhmGQanqIPOYfhS5LZbOGpP4ce9SxpuY7R7UDG5JA7E9aR4+mfWpNvlkbhTo8TEgdulMZF/\nGNxqcYCUwRLtw55BzmnRnO736UAI+TzSqvmKSBjFLwaeF3Mp6D0pAIG3L83TFH3Ru7elL6g8\njNPVQWx2pARxx78npnmpQ3mNjquKFUeYRninqu49Ao9qAGrH84GaniXcx7YqHcFfDfhUyx5+\nbPB7UANkcqoaNcDpSuwbGc05VEa+tG0MoBPzeppCBfuk9BUi7QQcVErCM7W5FSbdxyDgUwHb\njv6ZFJvHzfL05FG453GjlvrUjHKPlDZ5x0oZQ0JVhyRjNAYHGAeKkVg0fPJpi6lGGaW3UxlC\nWA4q1avJJHmYYPYVIwJ57U4AgBmPy0DEDeopVJ7DAoZh1J5pLeXzFPtSAf5akkg0iyFuByRT\n1yqnCZNJGuw7hyKYCRgljuOKV2IUbRQvy5JXilZgy4XoKQCR/wC1zTiVaTPftSL90k05V/iH\nApAOXIYZ6U9trKD0qPczZJ5FAycHoKBCrt3dc0rKzDGeKQseABinl8kBhmkyxOOMUKzfMTnH\nQUcKCcHrTlJGDTAG3hRg0bQy8sRzQuR75NAHlqSeeaAF+78qnK05e2f0pu5jg9qdGwx83BoC\n4fdJC08gCPB/GmDO446Uodu4GKAHR7VJPrSr8xwTxRu3YAGBQy/N1oJCOL5umKUYjYlhkUqk\nqadJhl9aAF4kUEcU88ruHUdqjXBIx2p/RfQ1VyugZO3OOafuwuQKajDvS/w+1IlDgh53HHem\ntjiiT+dKq5xn0pFgre1DEeWD3py/Nx0qJlOelMB3me1FGG9vyooAmX5m4pGYjIxTywVQfeq0\nzMZSM4FZsdh+7Gec+9MB3McfjTNm0EZwDTh8hAHIpB0JGx90c0ikbTkc03eFyzDFODAKD3NA\nCqcc+tOBHXFRSOFUcEZpis8eecmgCRm7inMpZc/pUfmFgMinMXI64+lIBMMsZAH1p27y+j/h\nSEkAZ5NJJGHbhsCgYvmenFEbnaTkY7Co5FXoh57mkC7UBJyc0ALtaRetOZQqYzmkZxwc8+lB\nb2waTEOUfKCRx1qOQ7nOGGKfvRV+9k1U5ZuBTGSbiDimiMNkDinqw389aXaN3oKAGrgJRxkE\n8Ckb5Y2CjJpm1p1HONtMB7MWfAGFpJJGjxt6d6cowvJyaYq7dxc9RSARphIDtGGHr3pVZvLB\n9euaSNAVDDmnupOcHj0qrCJVxyB3FNUHbhm4qNf3YBB5p6ruxk4NIYbhu2j5vrUqoF46HuKi\naTa/TiiN93GeaLAIJNxPoKUt0wc0Mo3FcimsyhlVeD607ANaE7twO00eZtbG6nkjYeckVWZN\nw3dDSESSMOn60vy8BlyKikbjaBg0z5hjn8KQE0jKqleppqyMsfTIpyIrcnmkmG75T0FAxYmG\n3d3NG07sD7vrQCFUYAIo3fKTmgkYT823qKJGwucHA9KYsyLgll59+ae53KAnIoAj5fGPlzQ0\njJ8vUUjc/ebA9qA3y8c5pASKw25xxUbN8wIGKN3GKVMMOecdKYw3butTwMjfL37iolXC9jS4\n2/OBg+tAmNfhjnj0prNnIPK0nPmc80+RkjXCjOaYiNTliMYHamt8r8mpGIwD0pu3dz1osAo5\nPJwKYFDZ70/gjFM4XIU80gGtIQQg4pS25Tu59KNobBHJpPXHWmgA4YcjFNkbbg9sUeYFfHWo\nZydvHIo6h0J4bjdIq/eGa+XvjlpfkeIdRlIIPmce4r6WjjaPLA/N6V4h+0FZtJIt8CAs0e0J\n34zzVIk+ZdYUxxueig4Arj9QUt8wXPr6V3etIJLeRX6d64e+Zo5CF/1foa0RBSmZWjBHA71n\nsyLnsByGFXbhsKeNqnvWXLKjbhjI6ZrREsp3TI0ny8HOapTLumZjk/7XpVq7Ty1B+9gdqgZi\n2NowD1FWZPcpXUbLNuU5QjmoZlXyhuTYCM4qzuPmAHrnoarXP7uMlnyWFMRno2IXcnJHTioV\nkGF5OW61YwzKFxsqAW7SS4XgjoaYEEmSx5we+Ka6lV3RrvHdac0LRuGALjODTwwjjdWI2kdB\nQIpyfvG+b5cDA96bv3rtYfKvBpZIWMalnGR0oKhs7fTnPSmIhZgMr/F2A9KjVTwI2OAKmRj5\nxbaASMVEz/6SUjbkj8M0xjsbY9i8t6mkYSNGUI5AyaTzAffBwKVRmM8nJPLd/pVDK65lwAGJ\n/lRJG6yBQcnrUnzrKQMkN2FSSQsse5m+ZRn8KXURTkUyMOMNnkilVVkymdpHOSKkZg24jo3Q\n1BMBGud3P15pgR7mAJBABOMEcVIMsDjr3PallaJl+QluOaRbiMrnkYGA2KYhrNyFzx60skxb\n5VwoH8QpqTJIuC33eGJHek2+XhY/n7k0mAx5N/AyopjnYokVdyjgmpBINzA8t2GKTcYzt3Db\njO33pMAJ3KzEEjHatjwjr1xoesW8nmYRmBJHpWOZPlUbcjqVzRy3zt2PHtWdSPNFoTjfQ+wN\nOvYdX02G4t8NG43Zz+tZ3irSYdY0qeFhuLDH+771wPwV8WrNpp0+VwzqcqucGvU/LEm8YJUj\nBr52cXSmeTKLhI+QPEGnnR7+4gAbbuwGPGea9f8Agr4qa4hFhM4D42jceD16VQ+N3hPEMOoQ\nRnapxJjsPWvNPCurto2sW00bs2GyQK9J/v6budek4n1wYwjAde3WvN/jDrF/peluYMmNfvbf\nT3/HFdtomqR65pcFzCd27htp6GqHjrRINc8PzQOuZCM/iK8qnaM1zI4425tT5Un1O8vZTJcz\nM5IwuO34fnSWslzbzK6FiwPPPH1ovrGSyvpYWJ3xtjnjv1qJGkEhdWzIPyr6C0ZJWPTjFNaH\nuHw7vl8RWE0N2fMnUeW2TwR06Vz3jj4RvYW8t1pz7+d3lsMfXmq/wgvv+KgYbsZX5ue4r3q7\ntftNs0bAYYZ+avLqSdGWhyzk4TR8cTMVkEMyNFKTggCtLRtcudGuRNbthl4BHb3r1L4mfDX7\ncpvLOPy7iPuFwr+31rx6NisksMilJFO3b0xXoQlGrGx0RkqiPor4b/EyPWoUtL6VBcr/ABZ+\n99a7+ZY7yFcEEjpXx/YahNpN0k8b5UHlc4r6A+GnxAi8QWcVpK5FyhwdwxXm18O4+9E5q1Pl\n1Rj/ABS+F8WoEXtjF+9K5AX1rw+4hns5zHOHgmXjHQ5r7Hmj85fLbAiPTNeNfFX4dteQy3kA\nIuUGVcDhvatMNX+zIilUs7M4Xwt48uNFuYi7Nhfl+VsBvrX0D4W8T2/iCwaVcCRcb1JyB0/+\nvXySouIW8q4TypYz81dn4N8cTeHdQGWOJcAeh/CtsRhlNXibVKKqao+olyScnoMj0xVcNuck\ncDrVLw9r1vr1itzAwI25ZAckVeUFn+X5VzXhyi46NHmyi47jdxfLAcU1e3P1FSSMVzj7tMjO\nM5HJqWSDMGX7uD601wdoAO0DnNSFjkKR260xo93yk8UhDNwfoafGuCeTSDbu2omSe9OwW4FA\nhVyrAntUdxELiNl3cE/lUy/7XX0pFUBjxgdfrTRSPAPi5brDqYIXCqMkeprzhtyiMtghufpX\nqHxsRmZZycKpxt7j0rytSNoSTkjnd2r6vC39me3S+EkELPk5C85qMO7Ody4H0p0cyyZC9PU0\nbdvO8lq7Ddjdx83d2/rTny2w5y3elEZ3dRuxUbQ9cSZXuRQDBm2t0KNng01mMyvuPT+KnKBu\nGzLjpuamyNt3KRlf4sUdBajXkDKqhfm9qfC4j3AsHKjkelVmvoI5Q7vtwOBUCzJIGLsBuOdp\nPJ9qQFvdlcRnjqSe4pY1Hm785GM4PYVGsomUkYyBge3tT1dlXBGDnFMRIuF5xgnoKdGwaZVK\nsWzk1ESCo+bnPT0pySOkmc4APX2pDJlVX3Y+Uk9SKiuFVR8mV2npUqsJmK5+QDikZgzY6nFG\noFaT9ziRXJLHoBUxUFcAbjSeZlVV+noBTkHYDjqDQIjKyPJuA5+7inbTD1AxjgVOsWPmzw3z\nD6VCy/Lwdx3UFBncyZXLd/agKUkJH3Dxj+tNwWnIf5BjGex9qRV+Xax5HFFhAGZIZNgy36/W\nm7VCoFXAxlvel8xWkUjPy8cdxS9NzEAKRwKAHLnaFboaYuwKRgkdMmnIxaNc9OlObjjG5cdK\npANZuSQM+goXKwhsBsnoaV1Xg9EYUMqyYBbOBUoQjdSQQBj5lqNV3NkcYGR9KftCyhcEgims\nE6qMN69vpSGN8x2U89eop8bD1zjpkUbF8venJNLlmxxjiqAiWP5WYgjPajbIuFYgA09m77sn\n0FI5dsEkEdN1Fu4hvG4LjB9RSlRuJVs/7NG75umT0zRnb8ykdeaLDE6yBe5HApGTK/P8xB+7\nmnMxZSd2G6jAoWTywHxyxweKYhfM3SEbecfLgUnmOMN1bGNtBYr+7bkk5yO1BcHJ+6F9qAFK\nhlDNx6r3pJIy0iqMCjjblcHd0yaOV+Vs5PcikAjN5YAzknqoo8zdEVBAAPSljzGWI+83GTRy\nu87RtUfmaYDJMeWrYPPGCKeVP2cgetOzvATIB6896TaVbng9wKBkTKFUF/nx0WlVgedufalw\nGY7ck03c+SqjB6E0hJjsHhui+gpCwkkIVdqqOvvQW2t1wO+KF/dyCSNueho6jFVW2oc4BOGB\n700K3zJnqeBSqeNxH8X4UrRnYx6PnjFMQxc5xnBFSRlDuLDa4/iFMljDLuDgPj86cu7AOQFx\n+NADVjXaRg4pflbktzTZiQqDODnmlyRISF+XHYdaQCupwGD4A67aY0arJjPUbuafCqyKSfl7\n4pm4MjFuucUwHLJt5LfMR+YpMrM25vlPQUinbhfxBo3bo845zS6jHR5ZSobDjpSMz8YKhvUU\njKI2yp4bvQsIjbPQnrTGIykx/IMDvT/lbDY7YNJuyjgDavY0u5lZVI4I69qkkGbzFQK3bp3p\nFBzkfM3TFJtVlw5AK8ik3H5WB+ZuA1MY9gob95kEU35edpyfSlBPIb5jSZESjI5zTEOVdrEu\nQBjGKXzF8sY+lRsg3570/wAvy8YG4t1oSJuxGX+PPGO9N+aRQT0x8tKxCuVPI9qe0LHBwcL0\nA6Uihu3zVAJ59KT7ihQcv/do3L91R9afCqs5I6DrmgBCF3E428YIpqJgknkDpUkKq8e8nB3Y\nCmmtu5TIGDmjUBhjVo22s2/uTwBSzH5VcjK8cL605lZgSDx70zywGYqxYLwaAH8rGxzgt2pu\nBCqgLgnqabjfGT1FO3FVVC29Ox96AA/KPlO3B6VJu81g5UZ6ZqOPbuY546U7KhSpOBn71ADm\njMfJ+YmmGQbc9/SnNJ+g6+tV5HXcCTw3b0oAlWTC4HzZ6CnbjwDy38qiEwX92F71Ngsx2nAA\n6UxjfM2hlDc07ztrDcdwA5x3qD5JGyTjb1I9KGePeAG9gDSGS5Pb7hPT0pVjCMec1D5hViFG\nRjB+tSQ5ZhuHz9qZIpw2QeTSOi7C2cCpdpMZOOc1Gp3YyPwoAXYeVBJA/ioMaqwJ+72x3NDM\nVky52kmkkXawyeOpoGOVvmLJyOhWjO7cvRccYqvDJtZ+flqZZMsEzsFACeYDlguQBjmg4G/K\n5B7ijy1jVl7nvRGpxvIxnkL7UtRC7XblX5UdPWlViFy/yjGeKTcWXg4/z0pDlsMQQVNMrqOb\nG3J7+lKv3c4zgZpu1vmzycbvwpSCpIH8X8qkQ5WMi5XrSZ3ZCDdJ70u3yvTb7UoyWZlG3bzT\nQIbtl7yDf3xQy7Fxnn1FMOx8EA/Mcn2p4+6wC5x/F7UxCiRto6Ej+HvTMPvPHXmnbQOR/EOt\nNJZdqryO9IB/BjGH3P8A3fSmsodSCcN2HrS4Ei5Hy88UMhj4PzGgALbcrjL+lO+fHzAB/akL\nd924Y6YoXJwpOAfzo1AVJRuAxh+hobEasOSueVpqr5LZb5l6U5fMg3pw4J+77UwGplW2bfrT\nyo3MuPlHNSNJvIY/KFGMDqaYqmTJ2nrigBskgVQFxk9Aacsgb7xIIGBgU77O0hJHVfUdKSSN\nlUEAcnBOaQaibRCxDKCTg/8A16eFMYZV788dKjXLybWyxA6+1SejDAXp170kAxm/iA7UpZPu\n5+YikUCMt/FSLuDbimQPWqGJvy3y/eH8XpVhsqPM2cMOGqBWKqSBjNJ5jsoRjyORQBKjMpCu\nOvepMIXxkbsfhUMmY5Qcb+MmpGPzKij5m5zSARtjTDavtSxkKzFl2tnjcKj3DcAvB6n0FPMw\nk3BwSVHP+NMBV3SEl1wOtPZQoVd3B5qJZnVG3HKkZB9BQvzZJA6cfSkIdIqMMI+O5p652sFA\nIYcYpsZGBhcGlVcNsLbWz1pACbo9oHJ6H2pGbbIFxj0NPXjcm/A9etKu7q4wq9c9aYEi4WFQ\nPXLE0xlBJ546jFC9CY+Q3PNK0kgYDgeuKBg0YXa27B70rO4UKH6nP0pDt8wMRkVLiSabOwRs\nB0/rUgG4ttK/MqjqOtJ5gmBwMqejf/WpQoYgHgjnjjNHyKuzuxz0qhDlw20jnHUepqSNnZXL\nDawqOOXa22pI5Qscjkn2FFiWSQ7pj94xnHUd6nRzGxwc7uy1Cc8BU5Xk1Kscj/OqY2/NSZQi\nsN5L56Y9qdGxSMkYK4xSt8uSeVY9AKZnzFBB2j0oAsW7fcMg/d1ZVGjYEthTk5qFAyRj+6fX\n1qxt/cnj5v7x6UCJIVC/Ow3Y7+tW4OVCL25qqkbeWGLAH+7ViOQSKRG2DQxlgN5ecjvV+3mL\nSIkY+U96z4ZAshBO84+7jrWpYQn77nAAyFxWQG9Zq/2iMOMk4Feo/DuP7Z4is4cbirZJ7cV5\nto0he4JZe2B9a9a+DKibxI7+V/qk5z2NItHvcM2IgQcsOCasQssq571TwEPTmrMTHjjGakZZ\n2ttOOlSrlgMcGmKxEZBPehZGONozSGTLlW3DpT93zGo42wOcGlSR9xBAI7UDLHDRgZqQZUH6\ncVDGx8sno1SK7MoLHmgBZJDtGDz6VJA+5Dnrio8/NT4UHmZzip6jRPHhV6VJtPB5xmo0baeT\nkVM0gZQBxT6gOb5s+poX5cKTn2pG+Tp+tEZ3ZyvzUhjjyPSnRsEUgjJNR7huGaVGDMSM0AO3\nFVC4681IuFpsbccnml2nIoEP37ee3vQWDAHFMY7jgjIp6rheelIYq43bgtPT0IwKhXK9DipD\nu9aAHcKcYyaYxIbFPxjnvSqu5iTTAVlDIDmmNllx1pyqFXGeTSOwTjvQAbTnpipI2BBDDFNV\ni3Pb3pZNzMMVICovfOKkbKLnNNGDwR0obgc9KYCYk5+an9WHFNVvmBobIbOc0xkmfSiM7hyM\nc0xYyBnNP53begoGh7fLz1FMYjIJGKGYrx70jKWwT0FAhdw25xzSrjjP50zcN1TR8Z7UFCKp\nWTGcjrQu5ptwb936UuC7HLYFNkPl7RjNACvID8g605eWwBxSLH83P407GwZHJoEHRjnkULIG\nXjihThTkc05WXH3aQxF+6T3oVT97I+lKuG79KFAHOee9ADFQu3B5/Snbdp4/GpMbV6Yz3oX5\nuP1piIJJI4hkjNSrgqCOhpPJVvvDiplVeMDigREqt5nHAp6427WFNZ238CpFG7JIoBDFG00K\nPn5qQx7jwaZJywHYUDJIQRn0pDhcjrTVZug6VIEXPIzQBEuduG4Gac3OCOlSsi9sfnTNhUZ6\nilcBrKrYGfrT2jBUFfu+tKI1JzjFSbcLtPWmMgWNjkk4p235Rk/NUm09MUKuT8wx6UCYnyqn\nPWmYZlz0qTaHwCaPvEqentQIrMpbikjUDODVnyflpPJULnoaBhGu5RmmyR7funqakjj2Lyc0\n/aCuAMmgpEKrjIpixvvyRxVhYirBjyKUZ3HGaAYyFMLnoad5fcnFSKM8Y60CM855AoCxEIT1\nBAp/LNz90U7aFT3pXTzCApx60CEVRxjqaUwAc9D70uNvB60hzkZPFUBG0K+YMHJ71JtaPJHA\np6gNyBUh96QEO0fwjinJH3PAqRVXbg8Gk3LjHekhjApjye1Sx4J7Zpu0hgT92lEeW+Wmxju2\neuKfCCq7jTVjbyyP1p0bLlcnipuMfvGCAMUqttX2prL8/PFLtG05PHX60AP3bmxjPvTMNu5B\nFKpV/lHHepo/m4FLYCOORt2amUgtljimLhecU7aJMDHNMBGXaMdzyKTzCuB1NOO1WXPOKeyY\nyQMZpdREfLKGHHOTU8bryy8cVEFODThjy9q9aGAFs5PfqaWI7VDDjNKNrYXocc0q/eAHQUgH\n4DLhgc0kJwxJHFSsxIzimKN3PY0xjY1+ZiPu+9SK20DecH0o4VcUrRsVVuxNIAIO07eM0xgU\nAfrUuNqkdeaRgd3AytAAM/K/rUig+Xk+vFN8st0OCOaesb7AD3oAfHhnyRkDvTVlXzdu78Ka\nykYVuKkihjX5ivPrQBIWCnI6+lIVMinjnNIyqzZFKrMrHI7UgCbGxf1pyN7Yz0pEXdnJzSjG\n5ecHOKAQ4/LGQTzTljz0ODSNhpM8YHBp6k7cLxzS6iBmVfypYx8p4680KDuORQ24MOuBVAP3\n4XBpcZXB5FMRi+cinqxbjoKBiL8q88k0Roka8cH0obCDJNKVH3+9SwF/1jLuPGelSfxHB71X\n2GTBDU5QQetAE02SR6UMq8KpGabHkDk59KNqrlu9A0iRFGMMM0rMFXBFIqiRTnNNZNygZ5pC\nAsCxUHIoVt2RQsYjPHX3pDgZYDPtQIkaRsg9qP485C1E6llAyamWLZjJ7UFoAxwcDNCybpcH\nihj+75PGe1MkXcyvjmglkigq2C3FOZhtI6ikVhnB5pduN3pQMasZAHNPWMZzmhmPy447GnK2\n7OQMUDEyegoVSRzzQjHcRjgd6WJgwLAnA7UACZYkdKefu4/WkUhmzn8KUtjjPFBIqqVXrk0q\nt+fpUbbhIGyNtKqlpee1BRID2xT2B2g96Y3zZxninltu3PXHamA5sKgzzQWCjpkUnOOeTSew\nOB6UWEPHyqN3Wgye1J5m5tmMnsaTY3XvQMdyc0q+nWhW2r8wzijr14J7Uhjvn/u/rRTfL96K\nAHrtKgnqDTZ1Cr8o5NM+0ZwoWlDKz5L9P4RWQ9RAhZueR0p20LyeaQOy52rkevrTXbenXFO4\nagvzljjNLuUMMjHFJH0PaklTdjnmgQ52ZlPHFNDDcAKU5ZBz9084pYo1XJDbgaBhIfTpSpjB\nZuBQVIHy0wM23B4FIY8fOpwaZyqnjJoGS4ANP2upz2oASPMYJIyDUTEj7xwPSpH3MRzTeCfm\noAY6nAZVz609wdwAOTikMg8zI6YqJmkZuOlOwDpG3NgfjUasQxOM1NtJOSMDvUZYbiidfWjq\nIX5VIYjFNaTp6VLhcAtzxUEhw3J4oGSswGCp5NM7YHBPU03zl3DtUiR7sc8UgDheAc+9MaNp\nXGT8opxHUAYNNzxjPNMB+5Y/kAxTFYLyTkU5sHlu1PVEjAPUEUhDBhxuAwadx5fP50evYUwK\n4XA5XvTGICfK+9mk8xo/mzgUfxkfw0j7eTnI70dQGtNn7w5qVGWSPp0qhdalb2bCOU/M3Iqx\nbOZIxsGT1oAlXlW29e1N+6vJ3Y60oZt24Ltx1pQgbPYZzSJGcSLu6MKGzjgDNB+SQgDI9aWN\nQuWz1pjQqKG4B5pwRZNxJ+6KYNsfB69c02WQeQ6qcFu9AhqkOGORjtWfPeu0jxW53YHLdhTo\n4RFGVZyC3Gaa8ItYzJvCp0OeKkDJnt3V1MZJkzk81vW++S3BfjNZtvqdpNdpEisT03ngVuqR\nGuO1Moq/Zw/yg1OsYjj2jkipI2XlsdKYznduzxQIi2MzEZwKSRTGmFOT609m27scnFQyMQik\n/pQFyUZRQOooZiqjnrTfMBjAxS/NkelOwhVYck9elNYBsgCnNjJyMUi8d8etIRGy7RwaTmME\ngZPellU7Qg6+tLtO0E9e9MBkbKy5NOCllOeBTto4GKRs8jNDAa2Fxjio3YI2adJkLgfrUYhK\nuO+aOoCMo6jr6Um3byx5pTxIKbMg27jnNDGQsDKSobk8V5b8eNPM3hmOZQS8D4+gNep/dYMo\n5rkfidprXvg/UAvLbC5/CqJPjfWI/wB5Lu/1bZOa4nUrVZM/MQM9K9A1eENDIrH7pxXE6lbv\ntYqePatEZswpLf8AdPv4CjrWDJ+9Uui8ZxXRzfOpBbK96w3KxbuMR56VaMyjKm7bgGqUyGZZ\nGQ7Hj+bHqK0JGK5OCEPAqpuKP8w+UnBNXcVjP85jJuYAFT+dMmXeuXXAJ4q1drGZiwHy1Tml\nadwiAKq85NMkqXP7sHBJdeSahaQx/MBncOtWZAWmywPTOcVDKwU4P3SOtMRWeUllCnCk/MKi\nuFA6cKTyaRS0aucZH8xTlk8xQoXBI4zTJK9wxVscEY4NVSGlwiye5471ZY7mZAnzdzTY4w0e\n45Q/Sn1GQeZwWzgYxn3pFjMe1goDjp71KcFggXtmqrbvLVFP8XINUMdJGI5A4AJxhvb3o2sz\nMysAf50GIDchOG6nd0qNiYlJA4xgE9aYiRc7cKwC9T7U2O6a3JRhuz/Ko/MzESWHtUUis0it\nI2F45oEWry1dYxKq4VuSKqeWqtux83pWtcXX2qwVOBIDxz2rL3s3VgoU8kjrSGiJm2lsnYMc\nYFDZaFccKOadLjLENlTyd1NjjkVjuI2EZGKAG58xQdmF9PX3oChXHzYHXinybpNhU4A4puxd\nrE8noqr60wE8wqpCgE55NROV38AE/wAqduZV2Ff3i9cdBSbQgBIyaYrajYxmTeRhfXNIJAzk\nD8KRm3SAKCEzwe1OwHY4ByKQ2aHh/WptH1SKe2yCpBbnrX1L4T8RQeJNHWeM7bhVAdSe/r9K\n+S4SRjJAOc8eldz8LfHMmg6rGJpT5DkxsG9K83E0eZcyOStT5lc928Q6Qupae0UoDo3ylfUe\ntfL3jHTDoevXVt5bRw7soemB14r63jkjuIIpwVdJFyCOmK8w+L3gYappc91bIPPi+dGA5x3F\ncFCpyvlZy03Z2Zk/BPxlHK729xKySMPudvrivY7iGOaN8kEMOD2r490fUJtJv1kG6LLgNt9u\n1fT3gjxfH4m0uJWZTcoMNg8gCniKfL7yHUp21R5L8ZvCslvqLalFDsjICs6jgmvMFA5xweBi\nvrXxhoCeItFntCQrSLhfr2r5T1zSbvw9qb2t0jI6kqCRgde1dOFqKSszpw9RWsdn8H40k1oO\nzAIWwuB1b0r6K/1sYVuCea+dvhBIkWsbGPyEgj/ez/Ovoncdq5OfqK5cVdO5y4j4kQ3Fsk9v\nJFJ86EEAV83fFLwy2k6s92sWI5DtEgHJ9sV9KbSxK54PFcf8S/DSaxoMxKB3jGRjqD61jh6v\nJImlPkZ8zhdwDhuV/hxmr2i6rLpV7HNHM0ZByMHHPrUUmiXBnMQRhIp2tt6Z9a67w/8ACy91\nIwPMrgbgfl7/AFr2Z1I8uux6E5x5T27wH4nXxBocO9/MmUZf03d63rq3W6i2MuUPGDXPeDPC\naeG4wgyeMYzxXTNGGcEHA9K8GUlzXieO5e8eF/Fr4bosEuq2K+Vt+Z0x1+lePmTaquPvjpnr\n/wDrr7J1TTYdSs2ikjLsRivmz4ieEbfw7qzyW5DW8nzGP+62etezha3OuVnoUJuWjH+AvHU/\nh2+iYlvs8jDevc89K+jrHVrbWo0ntXHlsORnp7V8gnDYcPhgfvV3/wAP/iFLoN0VmO6Hj5Cf\nve496zxOH5ldFVqPMrn0L/CUII5pJQwG4Dp0pLO8j1CziuogSkqBhntRuKkLuJzXhNW3PLku\nV2BdzLnP1pUUjk4IpEYLu3DOeKRshQVXipQgUEElTg05Wxkt8rYyKRW2nA5WmDgHPf8ASmIk\nUlFywBJ54qGaVvJLr1HrUoXy8E802bmN9r4UDNNDR4r8Z5FkhG0hwCFfHr6V4/520BW//V7V\n6r8XJvIlkgC/MJM5PfjrXlxj3KGxyea+swv8M9ul8I5tkiDYvPU035WbcD8p6CmlcqWUc0LG\n23B+UNzXUbsdJsaPIOTSKAoyX+f0HpSqpC7D09KWP9zkJ1PFO4EDTMrHC7ovSmSv5iqoXAdg\nu2pZOMrj5fbpUaRlpoW6IrUriufXXwv/AGQtE8feEob9y8e9FxPGw4PGQR+NLrX/AAT92zk6\nZrEiIBn98uee2CK+i/2U7wXPgG042KIVGAevAr2tvKUjK8L0zXM6zTNOVdT8yta/Yo8a6RgW\nk0VwjMQ7Pkc+3WuL1j9mfx/oold9OeXDfw459wM1+sEkUToy4BU9sVUl0u1uo9jwI2epI5qo\n1k9yeU/H2++HnibSdyXWkXCyE8fLgGsJYNSjkkWfTbmArwyPGd2c4P8An2r9lLjwTpc8Y3Qp\n1yDgZH6euPyrl9a+B/h3WLVY2sICFYt8yAk/j/ntWntI7Csz8kReRqBvcRt0w3GD3FO+1Iyj\nMgG3k+/41+l+t/sjeCtWEhl0GFDIwJkiypzkHt9P1NeeeIv2CfD7Rn7LJPG/8Co5P51XMu5D\nufC0dxu/eLjHfHPFThwxLDhSOgr6I8cfsUa1o9tJcaNNLPsJ+ScbRwT0NeI6h8MfFGhSEXGn\nTOG4B2n+lVbqMxFkGWxlgOBUTSPs3bcYPalu7O90e4EVzaTQuw3FWjII/A1Ct6k27DABTjP/\nANapGTqEZsPkcZ/Gmq22M4wWP96kWQK+X702STbIobG1ulUA+Zsxg5VSO4oQu6EgLjqSaRW6\nqeFpFkByCPl9uKTAljb5iAnUZpTkZJOfYdqjjk+UgDK/3v6U7zGjVlC7gR97+7QHQYrANhuF\npu/cCG49GHWo5Lg/LgZ/2qbNIfM4Q8jg0bCLEMhXP8RodtseFG4ZyarsZI5FfKquOR3qdXCr\nvGVU0DGpJ1xn1pyuY4ifvljx7UvXIXjvkCo/vMM4HpS8wsSxptBzjdjJqORRIuxPqOaeynzF\nCg+4pqrtmLEfLnFUJq47y3eL5cZHc0xSsaEld+Tg+1I2OgYj5ugpscm0+VuwTycjikNkzsyN\nlQMEYxTCzRmM/wAPQ01iYyV+8w+bPbFCt5n480CHM37x2K8Dt605i2FZyMY+6KieYsuOoPGK\nJMMo2rtIGAKdwJQqyfLtwvWnBsEdlB71CrblC7/mHWpfvEDOVxTAWGRWZ9ylhzioWb5SpPlr\n609csCqAimjcuflzt67vSpAcqrNFluMdzTmBZwQdq45oXyxHkj5eoWiQq0fXB/u1QDF+82Dn\nik9B/EeuO9J54UKEQgd6c25cZTI68UAJtXcQPXGKRsLkH5cdvWn5CxnAAJ4GexpGOeGXOBy3\nfNIBu3aowefTtQ25ck4wRQoXYQTg4zSCNvJ3nkUagNjVVUFhkjrQ0qrIAVJz6VIzMqkN19BR\n/Ei4+po1Aax3SHPCkcEikWZ2m6gADFWlVJFIx82MA1WuYxHtBG3HpTAGjUBjlgx9OlMVV6Zy\nKkJDbct9KY8jiRlGNmOOKQCTMOWxj+dSBPl2MdisMhqiUBfnJHTljUm3dHuPLEcMDQMEjTyi\nXPA6J1NNkZXYjaVOfunrUisYIT8nsW61HJIqbSMgt170AO6SYKfJjkelNYAx7UPGc80vmeXJ\ntZsN2pqsGLgnLelFwH7DuAXoRSHPmdgMYojZljx7YFIzKror/Knc0CDIUFRyfWo8N9923Dpi\npd/lyH+6TgUOucgjP0oGNRR91jtPrTmAOcZz/ezTGIUDaMHH3qRpFVt2cjHNMQrSIxUjp0NO\njlJZkLErUTAfKoYBetO3gyHA4xUgO8zB2cjHIIFNh/fAsSCSegpikfu/3gb+LriiGZRI2GU8\n8Y6fSgZJvI52HAPSpC3ybwBn0pEaXLccHnNNjjLZbrVDFZXkjLAjnjHpUsdu5AiUAluS1MCB\nfl6E+tOjwrArIRIv8PtQIW8tXt3RVX5CM1CZMqC6fNnrUjTNMwYt827pUc0O4Fi+WzyKBEc2\nJF2pweppu7bhT9zFO5fIUfd755qTQfD934r1iDTrY7Xmfb8/Ao3DoZ/2yNTgliMZ3dhSyfab\nqOM21pJN6ELnJr7f+Ev7J+iWtnb3N/BHdMVwQ67gefeva7H4K6PY5Nvp1pADx8kQBHTjpS5l\nHcWp+Zum+FfEGrRs1vo10xU43Kh/Kug0n4F+OtakaNdPkgP95hj8K/TKz8A6fp+PLgTfjHzD\nPB6jFXrfwvZ25GyJUHqOKj2iLSb3Pzft/wBlvx1NFtjjG/cB5Q6kHOSc9AOPzrudH/Yp1u60\n1Z7vVVe+x8ttDHyeDgAk456c8CvvddKtI8fKrfUZq3DbQQqQkeMjBxxUuqgsz8pPiH8KdZ+H\nN4yagoCh/LPqGwDg/gR+dcvG23BjP19a/QD9qr4dHxF4WnnWNXEK7o24B65JP0Ga/PtT5LTK\nxwY2Jz078VfNzbEljeq5ZD35FK7KoTADO3OO4qBZm4dkADc05VWQrIOHXpz1qhg5G794O+TR\nt85fMHY4C0/c0uSQPyo3SKS7uNijO1RRcCHyx5p559O1DBm6phc8mnzKzKNnIByRRG2GJKEd\n+tMQ2ZWGAvOeMint8xxvz6H+lNZQVV3G05+6KViu3AXaCeOe9IBscnmKfkIOcc1IWbaAcDHJ\nprK27Bzz7U0R5cjOFUZ5o1GOSTbk7fmb19KdJ+7Xc33s9KdMrSRhtu045weaYZQ6hSN2O9Go\nDpD8oyu1fSlDHcoHQ9PemnG0yD5kxjFND/ITk8DgCgQ9fmY8Ac/pSv8AKqgkIc4FN2r5iH+M\n9qaNqTFHOVY5DGmA+RQY92/J6AY701oisYO7DtzirMyQ28IEb7sn5+/5VA4fcMrvjI4pAJta\nMjcPwo2gzgjcOKSQEFTuO0dfWkVjvJHzdz7UASjeFwu3HUZFNbP325bsKPmXHfPIFN3Boyz/\nACnOB60wFjPyANk5NOlba3PDk8Uv8Awc+lJICzBTjJ/i9KAHR/LOpK5xzTJGWOQn5tzHP4U/\nyhGNzSZzxkU0O0mSyjCjA3cZoF1GwM0gf59vPr1pWYFUjwwctgelNWPccEbT1xipTH5jqxwx\nXpSGCs4bbnn1x+dK0StMd3PFDL3U445zSLG20YbnvSAOVKrjgd6SQM0qlmwPSnq53EffHbNN\n+baWl464Bp3AXbtJRl+mKfH8rkBRkdaZuCxq4BII/Wmt5kLqd2N1MLjmfDFACSetLJJjYq8F\nTn6Uea0IGV3t/epIVLTMSMEjqe1ILhL3ABwfSljO11Yfe6YI7Uz5j/CSw7ipvl27yfm6YouM\nblsMW6Z605dq7n4bP8Q6Gl2mMBdwKtSJtbI2gY7UhBDGJEbe2G65pxYKnzfrUbf7XQ1I4ACo\nfmPaqAWPLMrRja2c805fLeZ45tx3dCKCoOMZyO9RiXaADy270oGTPGscioMlAOaQbtpdYyEz\njaetBlbzBtHzf3u1KzEsMt81BKByiqu0/N6VNj5Rhv3h6VHz5wY42Y5GKRhIylAOeoNIoljU\nMDx82cEVNgKwDLgdjUK4jhz39KVndYcY3DrzSELJjcWx7UscZmIRlwOvWmpIZOOBt4Ye9T/K\nJByeelAEvmfvANwx0wvaiOQxZXcQTx+FRbxu2oufXipI8lmyN3y5+lCAkVfmK5OAcg9qkjfd\n0UAr0qFN/k9eeopZpG/1hwxxjA60dBFgSAAjbu3VNAHX5S20e9QQsvklDktjOfSpYZGZlCjD\ne9IZZ2uvKnIX9asxgfIUGGb1qs3K79/yg4OO1WEjK4kz8pGOe1AXLNvGrMVDgOeRn+ValqrK\no3HKgfdWsuGH5Mo2Wz36Vt6dGs0wRjhSMcVHUDY09lhjL7yvIPNe7fBWIt50zKEZgPm9ea8P\nht8BkUZKcKa+kPhPZrp/hO2Yr++lyzZ+vFIs9AbOQw54q1DJ50eCNpqrC4bBxz1qVJAWBXpm\noY7lsfLweakBweOtUINShkvja7v3yjcVx2rQDBWJPX0pD3BRhcDpnmplJZhjj61EOOR0NPj+\naTB+7QUifzBjBqVfQ1VXIlwelSxyhvagZZjUbiW6U9dnWmRsGU96WNVbLZwtJgOdtwAHFCsy\n89R0pFwy5Jxmhi0bMF5GKSGS7jImD1FSxvgcdagty6rlxjjpUu1s5A4oYiVssvpSqw3deKi+\nZuM5oDDcARigCY/Nnt9KeqlsYNCso6jrSBQzYzigAXf1IwKmj+YDnIqJWPK05AV470AOdd2f\naiPOcFuKbu6inRsu3BoAkZd3SmeYVBDUqsV5Bp24bcsM0ANXJxnGaeMbidoNJ796Ysmxmznm\ngZP/ACohYhiSOKj8wMBgVO+Gjyp61LKBWDBv0po4HIoVgmBtqQyA84NBJHz5mQeKcshJwwpN\n3c07cWfjjimMTkHBNShf4s1Gq7fvcmnbgV54pgNxuzQN4yCeKkjAPAxTZV+YilcVhFj796l2\n4HJpm3GCDzTuWXNIYm3+LJ5pz9VOM0u1tuOKZJOY+2RTGOZgx2jgnrTmzjjrSLhsPilVjuOe\nRTEOGMD170oIbAbgUkbIc47UjL+VSMftVQMdKRtufQ01FLkY4p0ibW3dW9KAF3GRNp6U5P3Y\nx2pFbpxT3XcnFAhq424P4U/naOaau3Az1p7DgHj8KAYqfKhzyaYT+GaVfmbjipMBuKBIjVd3\nQ80jRl+c4p6sqtjFI0QkPU0FDRKVjOR0qTbmMMO9AA27cU9Y9re1AGcunymYytIdg6DNaEYD\nqtSLGGjPr6UkahTg/LVAO2/MQRTTncM5qTftXHU+tHLYJGKVwGD5mJzzRuHHensFY8DmkfkZ\nUcUwI+NxJ704L8vNL5Z25AqZhmP3oArvnb04oznrkVNt4WmSLjBJ4NA7C8NzTl9Ogp4RVXOc\nmkPbPFAbB5fYHJ9KFy2Tjbin7Apz3oj+ZjnpQBCp+YknjNPUHcfSnbT0KYFPYFVz+lAyECkK\n/MDnFTMu4ZximMpx92gLAQH+UdaFjKrhl59alRUZBx83bFKWxGQeCOtBJDGpQ/Maefu4/izm\nn/eXfjmhUO0k/eoH5jV+bk4BpEjDNk9e1TKqbQevrS4iHU80FDQQykEc0LkMT39Kl2hPlA5q\nIRtuJHJpCHK37sk9+1IqLt4FTr8sWCBuojU556e9SA2SPdEuOTTPJIwWx9KsLGV71G8JkPXG\nKAGLt87OMdqdGw3EA4zUca9Sxp8alunHpQBOqllPpTlyF9KFdlUKcnijdtXmmIh8wbskVZVg\n8ZzwRVXcGYnHFTR5ZeBkH1pCAKSue/anJH5bbialVgrbWx0601cDOfmNBQwNlifXpUm0qOFp\nVUdRx9adw0nGQvvTGKuVPXimSRN1B4qZVznAoRCGOTg0WAjjfdgFelSr8y8nA9KaX20u0sxP\nbHFIB27KjAo2nbjoDQoZRnpRy1A+g5lbbkU6Ri209AuOlAyqbj0py7toPagkaw3SDcuMnOac\ncrnuKTzRHjcM0sZO/cp3D0NIaQojLd8d6TcXwxJ2jtSfOvzHgZqV1WTDfdHtSCwL83K8/Sgr\nt+bGTTlAjb5eRSSDc30p9Q2HBOhYEBqdyrAUig/KW59qdv5wFyPWgQ7ceaQOehNKuDk+hpkk\ngLA7eKTGPwVbr1o+de9IimTG7g07Gec+1UK4u4lcEUjNzkdO9SBduCeT6UvAz3J7VLAarLjg\nY9aXYCn1o/4DzSxsWBHANAxBjGQT8vWnBAeaVOU9OajzuOOnNAD+e3SnE7SCRxSN90KDSDj7\n/T1oQWHPlugwKSP7pFDDcvB+WmqhyAvSjoCJlf5QQOOmKGb5eeeaevuMGo8Hk470gQ9ccZxi\nk29QTUdxlI9y8+1Cfv2V+VXFAxy4XrS7n4wMjNOCjJz0pB8rYB4oGOZScHpzT2x68dab/rFO\nOCBSrjyxuHNAkIvzLzUix7VCg8mk8wbcYoB34J4oGP29M801WDMdo/GlckKABznmn+WNnylR\njtRcRD5fzdaljwMjkkc00YO00/I7UgF52H1NPwducZNNUZbjin8qMVRQ3J696crDHP50jKdu\naQMMAGkIU5ZuDj3qQ/e5NN/iC0DAoBCs233oXDfWkQE5JGRQq8lgcGgYbW9aKduPtRQBVkUX\nHy7topbeA2xyrbvrSgDcMmnE7UwvJrJ6BcfMrEjDY9aRU3rz1zSrMGXOw+nNOjK596BisoC9\nKgYlV5NTPnbz0qFVJbc3IoGPUpDGfT3pzfcAUZPtUWwOx9PSgExsVz16UAP2tt+9zT1kDLtY\nc0xVzyTg01WCsSetMBw+V9wH+FNk3umc0yZS3yqeetPVW2gDgd6QCKzLtBaoWk8t24zmpQhk\nc4HAHemgfNyOKYAWVVGR1p64VgcYGKJEDRklqSEq6hS2RSAam5nbutKyDqCF4pdxhR/yFRsw\nZBtOaoB3mblBxx0pjEbenNKqllAY8U7y+/aiwEDQBsHFW+FjA703jPDc+lMZ/nyaVgEG4ZOa\naWCfeGTTnYLjPJ9KYy55Y/LTsBJ5isvT8DSJIvl4IzTRjOBRgKvJqQFZtuFLUqybTgnAFMcb\n1Dj+HvTGY8E80APZ/QUz+E5Gc0biykgULJ+7AxzQAhtLabbJMgLDvUa3XlS4UbQOlTLGd3zD\nK1DNGhxkc55oYFl5V2g7s561HDcRTPsDDcO1NblfRRWVqlrJ9kmkhyHxn5eCaBWNvcpbpwOl\nRzN8w44NZ2iaol1bpHOdlwihWz61oviRhuPA5FAxiylsgpyD1puA0Zx61J5YZtx4XrUfys2A\nOKBWGyL5gwRkDmsswi7kMs3zxg4VO31rTmX92VThjxmoPsrRQ7VOT3oEVGs1FwrFBnsQKuje\n3HXmpVjPlbu47VLFs8sMepoDUcqrsw3WkT95kHgDpQFVstjBp8fzADGKB6kLR4PU5xR5YGM8\n1LMyg7R19aTCxrzycZoEVdrsxHCr2p/3VALc1BPCZpo5Q5C56VOypFud2wuepoDoOY5xQAVY\nkjIprNtQMBkHpSjO3PUUABPfqKR/mXFLu/d8DmowQvWmA4MeMnAprUjHdkDp60bfl3ZyKBDZ\no2ZRSD5WGaeW3R5zg0inevvS6gJtDNzwahmYcr0IqRs8joexqCb5VBblulPcCFlbaSG5qneQ\n/arOe3dd/nIUx9RjNaEe3OGqK42w4Ydc8fzpgfFfiuyNrfTxY+WNyhyMZwa891lpYbgrGBsN\ne2fGix+y+Jr4qAyy4kXAxwe3868Y1eN/LYp6d61RlI5sIWST5fXIrAvoysgHQZzXSo47nHrX\nM3WPtMrFvl3dKokpXCtcZIfEY7VFJaie1Dq+OcUSfvZGCZCA1Y8yG3sWUfM7HpVxIZnPGVt2\nAbzMGqJG45zhe9aciqqKB8pPWsqduWxyoPQVRI2ac5byvm4qnJI5g+dc81ZjZWZioxkdAKhk\nV0ZWH0FAivICUBXlelV5Fc8HhqtzOd3oO/1qvI/mcZ+f1qgI5IW8tdrhF7k9agZtzYIzU0Cu\nwMb885GTTHzl1bAPbFMRDIzRvu2moFwrsWX7x4PpVhy7QlSw3H9Kj2lkXHLL3NUMgaQs+M7j\n0oZWiXLDIx07ipZBGr+ZjJ9fQ1HI2ZOmCR+dDArsEDDKbmIziibdGoVSPm6nrilYtszwGY9T\nTG/cg7uT0WnYkvaTDHLMqKN2OCfT3pNUs5bOYrNjb94YH61DCxULLG2G+6wFWdUvJrq3Ug7i\no27vSiwGbxtOPmB70jb2IXOeP0oMUka78cHjIpsjFQGHQDJoAVstnPyADFMgYrCSSAQeDSyM\nHCEkhTzTZI2Ygj7n0oAN2y4JJzuX9aR2HPB6Y603cD3z2pcrJHv24AOPxpDuM3ZQFeUHGKST\ncjDnC9SRT48RxhYmzk5NRNI7FuAV9KYDZF2/MHDBuaeC9u0UgO1d2aaSPLYbMt/dFKVMkagr\nkehoceYVrnv3wt+ISX2nxWN2vP3Iznp616FcwpdRGMjfG4/MV8kWuoPprI6MyshBXaTwa+jf\nhx44g8RaStrLKPt8XUHq2BXi4ig4vmicNany6o8n+KngcaHqRurONlgYbc+lc74L8WTeEdai\nniJdgACAeCO4xX07remw61YNFPEsg6j8q+Z/G3gW68N6k88as0EjEjaPu5opzVRcshQakuVn\n0nofiK28TWMdzaFSWHzKvUe1cf8AEz4dweJbV5ocrOq/KcZ5FeQeA/H174P1FkeTFox574Nf\nRGh+JLXxJZrPbugk28rkVyTpujK6MJRdN3ieHeC9Hm0nXIVuY2QRNzjpn1r6FVhLCGxycED8\nBWBqWhwn/SPLVd3PToc1oWGqW8sexpFBUAHJqKknUJlJz1LjbuCBzmmzqrKUkUPG/DKe9WPL\nCgkNu47dDVcqrNuNcl30MTn18D6ZHdNN5ag9elbdrYxW8IEYUHsasKqyqFx3yaU4X659Krmb\n6kuTuIj+XFyAG+lLGdyZIBxyabLcQW6FpXVeM/M1eceNfilDpELR28gbsGU8n8K1hTlN6IqM\nHJ6HTeKfGUGhafNMMGVfup3r5z8YeKm8Ram8uQIiuCoqHxF4ou/EWoG4aQgH5SM8YrD3BVJA\nBXoa9zD0PZLXc9SjT5FqCYVgOCtK2WlAH3xyP8adDaPMyrbo0jNx0rs/B3wzu9Ynaa4jLxKd\npXpt966alSNNNNmk5qKPTfhJrF7cWawzZkjjTO8n+leiAng/jWT4f8Pw6DpKW8Y5XjzMcmtR\nd3Javl6zUpXR403zO4hHmZ470uWH3eRilUZVlzg9aj3hB7msTMdGp2/KcUvLYz680kefu469\n6VM8gZP1pDQ+PY5IZsjoKhkjO1gDn2qTazLgcEc01iSCwbBxzVos8U+NdipaBs7fl3b/ANOa\n8jyNqrv3HPH0r3H40Wh+wIwbIxkr3xxXiMSnaox7+4FfU4X4D2KPwkXO5xGfalO35MgntUvl\n5bjk5yMUL83J+U56Gu03YKu3C7dzt3pzYVfkxkcHBzUZY7SQuccYFJIzeTtXCfSkBGsgjQjO\n5T60loh+22ylT80gXg/rTCXbaGXaeuKm0mTbqkR+8fMGPzp9wW5+qP7Odklp4Hs1j+ZREi7w\nuAeBXqcz7sivLf2eZH/4V9YliSTGD7V6ZINvOa897nQMXOSaeGwuRTFK4OTUyxnIHXvTEODt\nx6UryELwaRpUQEFgPQZp8aedwnzA8/h/jUiYgkYgD+lO3Erg7c59KkS3ZQDjt+NDW5U5PHpm\nlzWHYq3VvFdR4kTPb5ulY03g3Tbpj5kETKeCpQYI9OlbzKWHpTW4A5qlOXQnlOE1T4I+GtUI\nkl063kdTkM6DcuPSvnv40fsT6Nrkd3e6NJJp2pSOZEdRmNieSGHb2xivsDd8o4z70qqj5yAT\n9M1car6hy9j8m/iF+zf4u+H9uLowTapZxvtl8tNpX6c815fG3mTGKeN4njOCjDGD/n+VftHr\nHh2z1OAxywI0RbeV7E5r5l+PH7Idh4uhutU0eIxaqxJVlAAIJzyMY611RkpGLTR+fgjRmBL5\nUd6STL4H8K9PetTxd4V1fwDrFxpmr2jwXFvzux8rLnGc1mxubtVfAUH361YXuS6bZ3OtapDa\nRhvNkIUbeepx0r0Ob9mnx8sayW1slzEwBLI/tXI+DZVsfGmkzo5XEy/oa/VP4T2tvrPg20uH\nijd3TLttHWlKUY7js7H5jXX7P/xCt4WQ6HLIFOd6qcfnWFN8LvHOntsn0aZX/uMpBPvX7AT+\nFbAqy+WpB7dvyqo3grTHbc1rExxjLKCaj2kR8rPyAm8G+ILGYR3Oi3W88kshx+B71Tmt7mxm\nEN5bSWcpGQkowSPXBr9f2+Hejtkm0hJJ5LIG/Q14J+0v8FPD154TlaPTkjudrMl0oy6NgnOe\nuPbNaRalsQ0z8+Vbduxyw6il2jGcc4ztqLDwzzKzZKSFNwH3uetS7g25un8PNGpfoDTlWVhk\nMw4xTlV+SzfKR3pPL24LEYAok/ew7QwU5yDTGNZgrArwR1ao93IyQVXv61J91enJ4+tMBbaS\nq/ux1b3oEN3BF6kMT0PpTGkCPtU+YcZ+Wun+HPhBvHfiqGxYKiZz85I3DjIGO/Wvpa3/AGGb\na6gW9iursQSBiLdQA6cHGTj6Z/Giy6knyCs6NwSFH15qVZI14aQMp4HIr6ub9g1o22xahMPY\nqaj/AOGEbvkLqjIOg+Un+tFkuoHymJQzNhl+UY+U1IkqqoDHK+tfR91+wjrNuvGrxYbu6FlH\nPXg15B8Svg1r3wtlaPUik0O7CSochl7HtRvsBxu4ycKSreop5YwyY++uOSarBzHhgeCBkE4r\nV0Pwfr3iksdKsZLs4OCgyPzqbMCi0hjjIIwvUEUySRogAw4ByCa61vgv442qraHdRyf7Uf5V\nBN8FPiD5j79EuNvdduTV2YXRzYLOV4xnnNPVmkYgHGB3rpl+CfjkqiLoc+73/wD106X4J+Pr\ndZHfRZ41iG5iRxj86LBe5y6kqRvIZcdPembtylWk2t1+tW9U0u90e++yahavb3AAO18cj14q\nm6bmX5fLZhQvMB+5RIGByoXkUR3C+YAoKjtmqz3BVkCrk9x611vhn4L+MvGlu11ptqPKzhVy\nM/X6YpajObbG8szbie4pnmbVIHXqSe1ehx/s1/ELYCbLYnXLH/CrcX7MPjuX5xEucZKrn8qd\niXJHm63BVAEXIxkn1qAsJFLM2STXq1r+yj8RMhRDCgm/jll+7Utt+yJ44mUFpIAuTlgxK/yo\nsPmR5IHHKAc4wPSjekOcnJA5Fe12f7GvjCbb/pcCIxwXyTj8MVqQfsU+JJZHX+1Y2IHJkjYK\nfxxRYLo+fJLmNVLAKVxj6UeZuQfONo4G3vX0JD+w/rXyiTU0ZieUETYx7k1Bqn7FniexBltr\nmKNsnbuUlDxwOOmeg+tA7o8CfMfzbiPTNNSQfdY/N1Nexr+yH46nkbzWhjCjO53yD04AAq2v\n7GvjRU2re2qyt03bsdOh4oC54o80ca78bz0oacDcVUr2H413fir9nfx34b5XTmvFDFAsAyze\n4z1H8q8/urHVdDd4tZsJbRlf+Jf5ikIsR/IvXJqQkbCzfMPQ1WjuFaYBTx29/p7VYYeZsweM\n81KAOdu7ow5qPc/LE/M3OaczK4UEkE1GWG4gc8cVQ7jZZwi46VY0jwvr3ibd/ZmnS3IGSWjQ\ntVG6kXyE3hss2OvT3NfdH7H/AIXgvPDsdxIQGXhAvBweOfXP9KRJ8l2/wI8eTKX/ALIn6DjH\nA98muh0X9lnx9qFyheOOG2b70zc4HfgdTX6Z2nhezhjJKAluu45q2uj2a4Xy1x1wBxWTqIvl\nPz50b9ifxBcMZLrV0jgXoIYjk+mQSMVtXP7DLxWzSPq0zsoJLND/ABY4AGenvX3eLGJX2oiq\nvtVPxFGsOg3CKMh1xz1Henz3C1j8mPGHhW78G642mTOMR/fccrkdQD65FZSnypSM/L1r0v8A\naPuB/wAJ5Mowu07SgH615j8qqrBvlJ4H4VqIkaTcNxHfApVZ8FnXI6CmSAbUzhe+accqSHbK\nHoaAF2/MDuAH92myJ5bbwyuCMbFNQyfKPl+Zl60LK0igqMepoAhkCwq7HIPpXvP7NPwnbxJr\nCalK3+ixnHy8tu7YFeG6bYyaxqkNlGT+8O5uM8V+if7NHwqPg7wwklyn+kTKjd8AYz+fY/Sp\nk+VCPXfDdidPtIoQMCNQn5DGK2vOwSDSeTtxjjioyu47c4NccpcxpYa2TliMUxW7E1BqmqQa\naqG4fYmOWPSkstStdShE0DBlPpUlbFhWK89aes2Tn36VHyAccnpSKw/GkBi+PtH/AOEg0WeI\ngEqjEK3fj/P51+ZXxU8J/wDCI+Mry2aLEUrmRMHjBJr9Upl+1W7oRyy4r4Z/bC8BwaPqyalA\njeYozLI3uTgD8Sa3pPoS0fNG390c4LE/Ln0pkjBdnykEcEikQlkDHdx61KrYXcBuHeuozQ4N\n+7Zu1M+ZlAGMEcmnZ6jbmm7gsPGR9aAEkDY4YYOBj+tNEhXIcfu+lWpEjXYRyCMmq7Sjy3GO\nc/LS6gAywCocj1PajcdwHUdPqfWhnDR8ggg4OKQqsjcMQvTntQApU7j8zAilV2YEg4PrikGV\nbCncOgoXd5OHOw5pjCBWVdwyG9OxqZWVuQmCRzTd7MyqD14VvemlWj4c4JOcjmgBGcqu0nA+\nlIVdQAH5PP4U4/MjYBA77higRPhW4PbikINycBztZujYpXXZtVhk56UquM7mGdp59qbllcsT\njuPpRqIIXH7xVBJ9aYrK0mGJUselSrGwTKtgnmmsr7lwoPPLUBcazEtkfcxyaImVUY5BJpQv\nLEEgdOanhaNcl4wTjg4oGRKoZc8nHakjX9z5mNyZ4zUrXBWPaFwh6+tRuhVQx5WmA3cFTIY4\nzmnq3mbt33fWgBRnETZxyKRS0gO1cL3FABnEYwMc9+1I7TsDuGRn0pfMPBVNz55B9KsNPIY9\n7DeB/EBQBErO23bwMc7qb8jErlsd8U5SSTu4HUYqNgWyQDigBd+TyQFx2py7mXbnDdfwqJT+\n84XK+9SbvLZhyTnH4UhEnmALtZOP7wpjOGxv554NPLbkwgyD3Pam7V87nGMUxq4btyFBkDPF\nSSSFI0PUim+YfMP90dqQOsm7JJIoCwrAyYXqzc57ik2kOuWw3cdM0sbhVJcHGO3WmvGZY0dT\nxngUgJJJQCckKq/xd6buGMsOvK4puG3sdql/X2pGMkSgFeD+lIA8xfMx1GP1qQSRoynBYjkj\n1o3CMKAA3vQE8yZ8gcLke5qgFkjbnPy7jux6VJ/q2BU7uPvVHuMcaNu+c9vSlhyyrzk9TQMk\njPmRlmJGDj3qRZI5BuP8Ix75qJpAysi5609GGUDL+FACr/qsLwc5oi8xoz0De9KqhC5Y4B6D\n0o+8jY5YD5j7UCAybmCfxAZNOU7lDKSefyqOPiMAHYOzGpPl3AKcr3IpMB0yHGVGeeRT2Uxx\nheSOp/wojQGP5R8xPXNSZKoq/eweaQCww7pQ3bGPqamnjCtu3BccVJpMis027lQOAagb72Ax\nY5+7QMartGSAM853CnQeZJM8mdqKcAeppfKOGffgjr9KVSjYKBsnkjoPrQIezfu2CuEOefrU\nsa+TgNiTjO6ouC5Ujap/iNSo6bsAHZjGaBCqpZ+DwT096tMhWUEgYIwfaq1uqrJuZSV6DHb3\nqTgKPnz83JP86ALEKqrER9AeVNXcGSPcHBGcFcVSizuVk4A7n+Krynb80mBnkAd6Qx8R2ZCg\nk/pW/DlVjMfynbnd71jw7I8M3PcrWlHLGgyXIVueKTGdDYtJdXcEKrmVyCTX1d4et1h0Ozi2\n4aOJVOPXAr5i+GFnJqXiCPAZnjOdxHQe9fUelgw2wZ+cjA96zKNSzyG27scVaXPTtVC0+aQM\ncgVeXKt1qWO5YjVGm8wIu8jBbHNWM7WAPNVYX2rg/nVoj92GHfvSGSRn5tp/AU5lbgDg96gV\niWx1PepcvkFTgUDJy2eO9LDIEbBHXvTIZF3FWOWp0YHmHcMigZNGTHnAyO9TRbZBn7tQZKtj\nOPanxSeoxSYyRgYm4PFTL+8i9xUPmcE7NxxQsrbgMY9aSAtowm9mxjmlVzuAHIFV3kbjbinK\nzYKrxnrQBYb24NRlmXkjJzTkjbofzo3buKAJyN2BnFL8v4iotx3buo6UpXac5oGSbsrnNK0n\n7vJ61UVtxIzipxlo89RQA+Nht5PNAPDZHNR4HGOT6U8OWBIGOxoETIw4p7Mu0DqO9QLIB7U7\nduX5uKAHZDPvUkCpAVYZPWo+GOR0xT1XcuAcUFdAjyFOKf0A5/CmBQjHniljYOfftUsQ9iVF\nSQksvJxUW1gPU05Sy9RQAtwwVx/dpWBbBU4o3ruG4ZoZScleKNRjjL5aZbk0zaW5HIpnllsE\nnK+lTK457DFMY9NqsMHFB+91zmoWzuGDUgbpzSYCspWT/ZpxkK8YpjSFeSM80bst8xIoAlDl\ngPWjqvIpqyKrAZ5pxI28c0wFUbmIBwAKdwq7j0qN3CgDBqTfuHI4xTHYFGM07cFkVcduaai/\nNntSsQfY9qVhD8lXPpSJMu7kc1Gqurct8tPxzkCpAkHPOaVgexwKTfk8jml9A3BoAY6jvUiH\n5AMZpHBfAUZNKflYYOaAHcEZPBp642461GwBwMkZpVjC8bjmgBVXb70sisI8jgk00Mc4o8zz\nMKelACrlV57VIvzc9aa2BwPypUyzALxTGP56inebngjNGQvyk5pu7ngUwsP5Zc5xRuzjJoX9\n4OODTFO2baVoTEPyD04pWYbMAcigfMT2xTQwHXmmOxIjBlyAcU7Hy5PWk/h4IFKOaAsK27aM\n8Cl3blx97HNG4ScGj5VB45pDQm08EjrS5bf/AEpVYbRk7RUghz827tmgRGqngEHrUjLjgHAp\nGkORnIpNhdSaYyJt+3g5NSqMoC3UUqqAAetO27snFAxFHydOKft2jLGmxs2SCNo96Vm8xs9A\nOKAAAbAQPmzShRI3z9KRVKrwc0qb1GPegQ5Y+ueF9KaVbyyffpUh+ZutNkcliOnpUjIu4wMU\nrRgyginrGWAxzUojEm3HGKYAmd4A6+9SqmATjFJ5bB92eafk7h6d6YMZHGDkgZ9aNw2/NxT4\n3G58H2qPbljnOKkB7OwwR0qNmfnFS9VAxxT2QBff0pARxx/KXIB9qarbm6YHpUo/dn+lKqiR\nycYFAiLd+NOZg7dO1OWM7iVpvlecvB2nvTAj2fKT/CTU8Ue1uSAMcUIo24B796e0fygmkIar\nblIYZ96dvC5IHIpzRjgd6JFCtwO1MZEvzck4qVSFIyMrRGoXAzmnsp2k4+lIYrZZuDhe1LIu\n4gnr0ppG7HPFSlM98LQBHtwp7+lTIpDZI69KoahBPdWkscEvkyf36spv2IjNuZR971oGSq37\nxg/bpTVZi3Tk0JlFwRksalEfzbsgAUCE2g5Gc+1OVucdqi/i96l8wKwLc/SgQEps+YZo8vcN\ny8U6OHdmQDilPzjjg5pDCNQqMS2R6U7IUD0prSIq9M9qWMBl60AP2t5YI6UMcL3zTFJxjORU\nik9Tz7UwBWLLuxhqdjemR09KRJAuWP5UnnLGQvc0AOjUR++adwflxxmmv2bOAO1Ct5nTIFT0\nGObA6daT2H508427wOaRcMoHegCrdvJHIknVV6ircMyzLvxgmjB6MAfanbAo4XFISDeOTux9\nacqhl4PPfFIUxjIGetL5m5toGPpQACTpxkelHG0kD5s9KUYXhRzTVyrHd1NAC7hs9fehfn+U\n81GkbMpUcc5qaMe2DTDYCu1cAiiJiuCTikK7eepNHTGeRSAlLfLkjmkXdj2p3TG6mxgfMT+F\nA0JnnaSaVc5244pGb/69C5YnHA96QxcFnHPFSZUHAGajH1qRI9vXk1QDNvXrUjZ2hiOPSm/d\nXJNPbPOelIPIawJwVqVxuwMVGjFm4HAp6kjOaBagzfvB6VF5AGXLHr0qVV8xumMU7ywaYyOF\ngowetTAjjA/OmsoXtzTuRgipAcOGzxTt248jgdKEw3Qc0M27KryaYBubb60cEfzpnmBWA6Cl\n3hT0+WgB/IYMOacreZKeMACoi5VTxToSFy3c9jQA7cVX+VCkd+KI17t0pu3zGwelA+g7HvRS\n+XRSENlzsBUZpgm5xjJqUfKoA655qLYHYlR05zWZQsjDy+OlJHGQ249KFUKoPUNSiQxthjkU\nDFkkOSvao5GZFHanEFk3Dk5ppbdw/XtQwBZv4V5bvQx7hcmkUhfl6H+9Q0bLgg5BPakgJtpY\nZIqNo2Zs9al3lY/xxTBIS2FHfmrAc8SqFPU96iZzGT9eKlkYNnAOKjYZxSAGLbdynB9KVU+X\nDcnrS7QrYzn3pJWPl+hp2AaykqRkYpqqsa7DknrmnqBjrmmgEkgnFIBrfNk9famRx/LyNtTf\ndwBxTDuYNlhjNAEYUqzck+9OjG2MkvUnATHQVDIUXAznmkA9QFIbrTXkWRiF60snUBR2qJt0\na5Uc5pCJG+ZQSuCO9Ej9McjvSrGWx7imMgXIByaYIf0xjpTG2+ZnOc0vMmAv40kkIRRt5pDH\nMypGVHeq8gdQCTxmraoDsPeql9IhuVjzgUwFVmjbGM1PsB74OKZDsVTzzT2+7knBxTAbIzBA\nFbcRTFUFhu780qyH+EZ9TQnQkjmkAbd24ZwKb5Qxlj8vpT9w6nvTGJUdc96YGJrEYGsWEVuv\nllwWdh6CtiFlXcG7+tNzllYqCRwCRzQu2RTxz0osBMzfu/ahYw0YIbj0pjK20elCyBVwfwpE\nsco+83ShOzUisPLy3FKvLAj7uKBC7TyQcfhSGIRoB1Oc1PuJj2ggU1fm6nkdaBkLNyD2p6tt\nwc/hScd/Xik+XcWNIeobTu+bil3gqT3pFbGCwy1RthicetMLDTIzjG3AHpTZP3mMjIz0p/mb\nmwD9ajmI3DbnigRIzNJxjGKTeEXGcU3ce/Sm5BU5OaAHA7oyRnFIqiNTk5HvTlyIcdc0hjDL\ng80DsRL93C9zSlGBx29KVm+YADGKc2AwO7mgQcMpG3BFMVNoPrTskvgdKSTOcUAV5mYREDqK\nhdw0J7vVppAy/KM+tV2Vc4x81AIiDAdetRXQM0ZycDr9KkkZV3ZOWqK45tsk4Uin1JPBvjxp\nRK2t8Fwrr5ZK9zmvnHXGYyMgHavs74iaW2s+Fbm2jQO6/Oo9MCvj3X7cxzOpO0bsVomSzkZo\nlRgY8kd6xL7YsjnaOTj2ro7/AGhWVW+Yd6w9QhXaOecZqzMwmiKRgx8buTUF3IJEUKNoH86n\nuIZFUbG5/u1Qk37ASdjVcSGNvE3xqrHY/pWdKskWUxl8/pVxpJ5C68fKeCah8uVsyK2T0K96\nskrqjdUIIxzVUyMxILnryPSr3EKoAMbuTVWQRtkngkdaaVhFVsKzH7yMetV2HzMVBGOvrVja\nVbBASMDJBNNKlVWSN1JPT3FMSKiq6SBijMMdaRl3Mw2496tozQ/MxBJ7VBNliZCMe1Ayr5ch\nmIwACuKViV2/JhVH3ql3B9r9CKiZ2bduHB4NUJlaXLL8nOTkGkkhNxg7uQOtPMnlsNvKdAKj\n2nDlcjnGKYdCKRA2FAyB3oZW28AZXkCk2tG3XineYGYBeAOc/wBKYhjqQmVfDN/DT42eNUGe\n/PvSN++9iDximq23g5Y54phYfNudzHyoUZ+tQNHuhwvVeo9RUvml2LHk9OaYy4+bJXHtQBAS\nzFScYUcD0pW85uPNXymHbqKURB42IY8/nTWUYx9044oAauCpRBwON1G8snC7cDH/ANegZWMh\nTyP4jTvvKctubGSwoAi5XkA4PZaCpGGxg+lKGXaroSpxjmlVgyurNlgM5oQDJMcNnBHekUs3\n8JQdiaPLX/fTvntTYpCcqRlV7UASqFkBjH3up96t6PrN1ot6kkL7CGBD5rPbjJxtbsR6U24z\nhdrZbr9KiUVLRi0ejPqLwL42tvF1qYmkUXCAZHTdV/xV4Xg17T5IHAEhGAw9P8a+YvD/AIhu\nfDd99stm3MB8w7V9E+B/iBZ+JrDEsixXCAb9x6t7V4lai6b5onn1Kbg+aJ4Z428B3Phq4aSL\nM8GcFwMfnWZ4X8ZXnhm8DI7FA3IbkfSvqLXvDNtrVq8M6ACRcH/GvnXxx8M9U8N+fdRxebYp\n3XkgVVOoqnuyLhNSVmewaB8TNL8RWbQjdHOVwePlz9a8i8beJb/w/wCKCsbnYp3rzx9a5rwn\nfLDewhGZFaQbkB4r2vxJ8K7XxXp9tOZ/IkwGWTbngjoafLCkzN8kHqY3hv47pDbqmoBiynDk\n+vc11Nr8YNHuplRS2G53DgV45r/wp1rS7idYovtir8w2jnbXGyxXOnyMssM0Q6bWBGKfsac9\nUx8kJan1OvxM0VN22dGl/uKa57xB8ZNOs43WGTzH/ugY/WvnZjcCQOrSIuOSoNSbZrhjkFjt\nzuxVxw8Ex+xitTuNc+KF5qpeSNPLBGApbNcTqF5NdMHuXycevSn2ul3zoiJC8rkZ+7jj1rpN\nL+G+s6myvJAY4yPlbHJrrh7Omrpm0XGCucnHIZMRhWZgM/KO3rW1ofhG+1q42JCwhB5zwSfp\nXrvhb4Px2ccbTR78j5nI/SvSdP8AD9jp8McUUSoR1bHNctXGRjpDcxlXS2PP/Bvwzi0+FC8e\nM8kt1Br0KxsY7GLbGgVW6YqzIojOxDgetDNvVlzyDXiznKT1ZwyqOT1EkJ2ndySaNpwzZ4A6\nU4YB5PaoNS1a30qH94yl2425/WoScthRi3sSBW+lBX5tzgMBxisPTvHmj38wtxcxxyOQF3nv\n6V0Mu3AKjIz1pOLjuhOLW5Eqncedgpo3byV4XNP45+YgZpD83HQHofepDYdG4k3Ec9qhkDLG\negp0SmNiN2B3pzZkV1B/SjYZ5p8WoxJpLMwJKqWyPpXgcZIYMDkYySf5V7h8WNTaHTniUA7h\ngnNeGqpIx9znjj9K+pwv8M9ahsPY9tpjyc4zTfMaRmXDFOmMUjRsr+oHP/1qcsmZDzs3D7or\nvOkBthOASCfal+zs6MrD5uuc0jbo/vnJyOlKdsm7LFSeM0IRWbdJhM7WH8VJbAx3kIQ7Qsg+\nbueRUrfIxjbjI5qvHGPtcKBtpL/f9OKljW5+rX7O8iyfD/TlB+TyFZfxr06UDdzXkn7L1z9u\n8AWMr7R+6CgD0AFeuzLh/UVw9TpIFj5zjirMZK4x1qNcrj3qdRlflGDmi4WPnj9p74oah8Nd\nLa5tnZHbHK9QSccHtXzva/tneIreOIvOzqGy3yjnrxn8K9o/bct1m8HsjrvPGGI9DX5/+W7M\nWbgDjito0+YzPtbwz+3Iyssc7Rw8D93IMk8dc47V3uhftnaRqDeXfwxQyYzt84IG6c5I471+\ndMkPzLkdOc1J5k3LCZiRk/maXsV3C5+rHgv9oXwt4qultIryKOQnb87ggH0yPbFenrJBMT5U\nscozxsbIr8btF8YaloVws1vcmI4wxBJJ/CvZfhz+1VrGgarbyvczxDdsYyvuUjPdew/xrN07\naofMj9KZFKnjp6Dmo1+VsiuG+EvxZ074l6P5iTRpeQgGYBxtOc4Kn3rupO3b8KgZIsxbPYUM\n3mBvlDbv4TVfdtbk5FKJCucdDTvYTPAv2lvgRp/xE8LXc8EXlX0fzoUG0FgMDccZxk1+cOqa\nPdeFNYu9LvsxzW0hjIfPODjIPoa/Za4jE0bIRkEd+lfAX7a/wzsdD1Q6/aRrHJdSYkRF4JHf\n9K7oS5kYuOp8u/aDbtFPGdrxSKyuBkrzX6O/sh/FGLxZ4aaEqBeKwVwoAU8dRjivzeaZJocg\nYGeMfxDjP619F/sY+MW0Dx5/ZzbjFcqGAz90jnp9Aamorotbn6RSN8xJOTUO88im2sy3FrFN\nn765G7rRyze1ctzUfnnOawPG+gjxB4dvrbYsnmIRtccZ/oetbmT0xTpo91s+MhscAd6qMrMl\n6n5D/F7wP/whPja/tot62juHVX+8ueufx71yx5jCpHv2nH196+k/2zvh7f6Jq41K3HnWHR2I\nwck5C+/Q18z2s/n26MNyrjn2Nd176mSJV/eZG3OB+tMbCLtK5GPyp0bFSDn5j2pZIwznOKQy\nCRPlT5dyd/WoriPzpAFdgOm3NTySfKyxnOOfp7VN4eh/tDxFY27/ACxSSKGbGcf5xQI+uv2L\n/AEUlv8AbpIU88t/rmXLAdx7V9wQ28dtbpGoBC9MDjp/n868e/Z78GW/h3w9byxRLEGXdw2S\n2ec/T2NewMx3E9q5Zu7KSQBo8AFfmHc96mXYMEov4DFU2+aSp4wxxngd6yuUVNWWGCxlkZVE\najJ3Dge5r88P2s/GUOparHp9sVmgPPnLxnDHoK+5/iz4ki0jwrfKpXzShVecYfGQfpX5X/Ej\nWrjXvE1zPdMBIrYAxgHHeummZSZzV1KVQE/cPHoPrX3f+xv4Xsb7w3bzyoDdAbvnAPyZ9K+D\npFMkbbRnuR6Y71+hX7GUKx+D7GVY9pdSzLnnHTP0zTnKyGkj6Jfwjpy4DQo23plRwf8AOKaf\nB+nbf9UvOTzWxNlcg9e1RFmZa5+eRbgjNXwjpixhfs8bH1ZRSXHhWxt7WYxxKu5SCR9K1424\nApZpB5EhJ7Yo5pE8p+bn7XXhq08P+JbWWBt7yMxk4xhc4H55/Svn+eTpg5z0HvX05+2TdTTa\nlDazMpjjunK8fNt9M+lfMKqV3YJZc4Va7lsTYrpGrahbLnBLFefU9/pX6V/sy+Ekj8B2Uksk\nUkmzBZB+HWvzWhRpNW09AcN5oJ6469PxOK/U79nZRH4Pt1CshWNVKtnsB/XNZTlyoaR6K2gW\naDCoOD+VSLodnniIA9T71dYjcciotxXJzxXJzsrlIP7Is1YOVXK9OB1pi6TZMxPlLgnpjirS\nsGWl2nfgGjmYcpXGl2qLhIkHPOAKetnDtIKDr6VYWMhvmOB64zTghYgY79cGnzMpIqGzgXkR\nL+Ioa0hkP+rXp6VdaI8/lVdk8sZ596LsCr/Y9ovWJPT7opF0m0VjmNefbpVtTlhikk70XYjO\nuvD9pccMitzwQMfyrgvGXwJ8N+L7Oe31DTYrlZG3ltvzA46g16T5m3ApVmNNSmnoS43Pz2+L\nH7Hl14buGufDrSzxqWJimXbtXGRzj/OK+bm+02NzJb3MTQyKeFYYzX7I39mmoQvFIituBA3C\nvkD9pj9mFPEER1nQoRBeQqxZI1xkAEmuuMlLczacT4zVhLGR1YCnNCNquW2g44pJYZ9OuZ7e\neJkliba2RzxxTHuMxg7ciq1Dcq3S7gfnwwOQuOtfop+yXbeX4TtSmFXy1bHucZr88JfmkUbg\nPmFfo7+ylH/xSdqB8uFGQfeom7IaR73JJtXFQiZmbinzYyx/z1qsu4H61xI2LMbFmPNZHimf\nydEuJ84Vf8a1FQ5471zvxCuDbeHp9pwdpP6VpHchn5pfHBpbj4iXnmtukZyQw6AEmuEZDGAp\n5C11XxQuRc+LLiRG3sJDk5z1zXK7sFVPVj6V1kAAOv38DoaJD8o3N8hHy4/lUkiNvCErt69e\nah8uNmYnJx0zVDG/6mMooyD396glmEEJJOCo7f055qXzlbIP3R3z+tO0bS31zWrSyBDiVgN2\ncAZ4oH6nuH7Kfwxt/EmuR6lcwi8lDkJuXKIwz8rcdemPxr9DtFsxZ2aKE2nbyMY57/rXlf7P\n/gG20Hw3ZSRMQqwhcMOuec8/Xr15r2PAjxj6YrjqScnYqOm4itheab91WOMmn7ctjt1rO8Ta\nomjaTdXDSRw7YiVaQ4APbn60tkUfNf7UnxUuNBsY7az3pIjlvMyMdDgD3q9+y38TD44sSsj4\nnCbZFGOGzjJ9zjP518v/AB08cSeIvEU0DyLIoY5IPBbnpWv+yv4tn0nxYbSAmKGY7mUd2BwB\n+Oa25PduI/RdlK8etMHyg460trIZrWKQ91+bHY9xTJPvZAwK5hk1uh8wHv15rxr9pL4c2fi7\nwvcu8ebvadqtgAnHHNezRkjAHI71S8RaXDrWkXVvLGZA8bBgPTGMVcdHcD8i7qwk0u8ntH5M\nbbCR0JHFRwlvLcBenBHevQv2gPCqeA/G11HErLZXDB4Y+Tt4Ax+BBrz/AAWKgcZGfTJrsvcx\nY7K4UHK460/aF5HIPTimrGqMXdTu6cdKDtVuWIA6UdRgSSQp4B7ntTdmWJY5TGMUvBbDZYY5\n70nzdBtYdcUCDaxGAR/u0bl+UEfMO1IDujOV2CgECNQenrTKQqjczc7DSbQwJaTc3QelKyFx\njn6UgVI12xks2fSkIcy/MhDZHTHvT49+QwxnOTimLhZPmUjbzSK393nJ60xEkwZ3LO4ZTTGA\nU7s8YxtFOWQR/eXfzytNXneWGOeKAEZw0iqFIX2p7J+7Zt2DnAWljzMNqjaRzTdwbcc5I9qV\nwA7goxkYHOPakib5Tn5c8gHvQysvzg8U6MBlO75vT2pCsOkYttQ8E8nFJJNsxySnANNC/IfM\nbaF6Gh2Cxq7D5T+tUBJwEZmbB7Un/LM7xz1Ap2zCggjJ55pCzKu3OQaBiEuwVgdpPB+lCuOc\nZCjj/wCvSbfmGTjjg+hpWwy/KMseC3rQIRm2jLDc3Yr/ADpN2/8AiZSB/F3p6t9nYNtztGBS\nPIpj3P8AM2aBjo2HTByBmolmO08nZn71SNKDjd8q9MetN8stkEgg9B/SkIXzCrNuUDsKQL8p\nLnJNOmnCklkzjA+lO5WTGMrQMYqogCk/LTzCGXHVj0pGwrDI3HOcU2Zj8vGGzkY7UagS48tR\nkZxwWFI2/wDgX5c9aRWALD+Aj9aUEttAb5AOaYwX5lYqBnoeagCk7uWBA6VLt+z4A5Gc7qWS\nMfM7DaSPWkIFTlOBuPUinnLbg4yg4J9KakuwDjtxTV3/ADZPXtQMcqhMBCCMdKbI5VV3cA96\nJF2yLuXDEetP/hAb7vbPNACs6MEUL360qgBjg9OlKY1UDPLH+IUNlYCFyWzigB8WDle3VjSb\njKm0dF6UgVtoQnD9c0+FhCVyAxzzTAVVaRcbSFXuaQRhuV6dD/gam+0fM/y7j/AF6Uq/KmZF\nwOp20tQGFkVlUjO0dulIT0ZQME4NNjYKGCnG7kZ60/yxJGGHDdCtACyNu28FSOBipImClYy3\nzMeSKj8wNmPaaerCYKFIXaeOOSaQErKN6gcEdcGnsxVchcMeCvrUSg+ZtPEmKXzG3HPzY60a\niEHnbSZBtBOPwqZsbhkGMLySKYN0iYUkn0pyqWj3Icc4K0ATGaILyOTyF71Lbp83zrgkd6gj\nVGXYq/PnkmrG/ceuMcbe9MB6qUYgHIJ/yKesgYbPJ+bOMj0qLcu0E8c5p6yL9oUg/WkJlxPv\nggfKo5z2qxGjZVgu5TyHNQxyRtGTggk/pVmNtuADlT0XtQMsom5euTnvWkscbBSoBXpj6Vmx\nOfL3AZboBUltM6yIApJz0qAPbfgvprG1ur5OoJQN9Oo/lXu2i3gntVVxh19a82+GOivoPhm2\nWUjE580ge/rXo1jCY8vj73IpFLU2tw8sKBz1qRnbvzxUNv8AvI9y8leoqSMktzwKiRVi0qfK\nvPysKuRsPL2dMc1Sh64Y/SrKzCMcjIqRg3XLDafapRhlBBxUSsJlKjl6eqgLgNn1oGPwgfOM\nGpirL901FHIMcipFk3c0BqODNkHqasdVHc1W8wbsCnK3QA0mMstII48Dk5zTRJ09SaapG7Jw\naVV3fOvbtSGT8lh2pVztOKijYO205BqVlO4YbGOtDAlhY8KWomyrcc1DG4Oc809ZCeKaAnU/\nLnOB6UPcA4AGfeo8jbgde9JGwZSFFICQODk9BUsUw24FRx4VApwT3NScN7UAO/i+UfNSqxXJ\nPFQrv3ZzT2yy9QTmgZI0i/wnJ70keJmIPFRq3zcipeOoOaAHK3lZ9BUu4Mu7bj6VCOoDHrTZ\nEfpu2igCYTbRt27x61NkFQVGKrQ4VcZzUqyblxnFIRI8nyhicGnKSP4smo2C4IY5FCyKqgBc\n0FCvGytuJ49qVX3RjLflS+ZuOAeKVU8wELgNQMaGVsgGlZ9oAxkmljVVVs9fWmxSAfMefrSA\nkbMMi5XIxR5nfb1p5cOoOeT2pqtyeOlIA2nj86XzC2QV49aFmzn5cmhydgHTvQSOVcjng0/Y\nVUEdaRWDLnGSKcgKgk9+gqyg372A705ZGcYYAY9KbHmNjnrTly2aAE3dgad86nLDK0Lk5xgi\ngMxUjoc80h2FaTd0WpV7cYFR/d5HNIshXg5qQsTb9ufWlz5ilu4qNWDEnFO+79DQAqnv0ob7\nx5o2g4IOaWPGSWFAC9lzkD1oZWbODmnbhuO7kUZ2rx3oEIrbU96dF8wbjpSL60oZvMGOV70w\nBc8kipFGEFKOuaVSGBB4pDQrKFXI5NMjYsx7VIF2tznFKwAY9qoYzcFxzzT2fdzik4P8PNNT\ncqnNIQu08nP1pVXjI5pUZGHzDmntJujCqMUDGkZXPanLjbmkxwP1FO49KLjF2FsHOBSs4XOR\nmh96xgqdppu0nB6nuaYAyiQgH8qkhVlbYSdppIskknr71K2d6844yaAE2jf1zSLuVz/dpd3J\nz3pT0wOc0gQse1cjOc0u3BPzYFIiBVz3BoZwzY6A96EIfGrMp3HKiow2OCOKWRvJxg59ak2i\nRQQaOoxx6Bh070Fge2KFUopUjrRgl1AGQBQAgUjDg9acNzdSAfekyMHtipVUTcN8pxQARqcb\ngeKcE53/AKUsce1dtJJnPHC0AKzEHIHFLuG7PanQqJF61Gy7WwaAHn7pGMmk3Bo8EUJv3Dby\nDTlweOooASOMsuVPFOfs34VInc428075X6jNICHaNwz1NKuVdsnj2qTCjnGPekfay4HBzQA2\nP7uTRtdVJ7elOx5eD1pfmbpQBG0iqoBpr3UapndxUVwGWQ88VXRRuKnmgC3FciVhzwKsfe5r\nJa2ZWyp+XuKEupEbAyaANeM7gWxipM7lHpVa3O+Mfyqcu3l4x0oAcqBGJ7H1pe20c801f32A\nxqx5YbIXg+tAFUTJk7Mueh9qkjXdk4wadDEIchRkdTSrMGzxigYxt5VWPXpT4t0ilCP+BU0D\ncu7PGakDbfk3YoEMWMlgc5Ap+ASfT0p0agKQDzQMcjvSAaiEKDnjNSDAHJxzTWPybRk45qTh\nlGeCeaYBkeWVC8Zpdq8DpTY2bkZ+lKe5PWgCQqFjx0FLtxjHJqBpTID2+lWW5QbeTigCLeDj\n2NOddzFioz2NMkjPy9MHrT93l43ZK0ATLtPB5OKj3DyT2GaWRTuDZwKQrzntSYx6N+55GKP4\ncg80M+ccYFDMOAq1IC7txyBmjBY8npSsDvyBgUwBuTS6iJlADdc0fdNMEqoBu4p/PGTkUwDn\nbheD60inccnpS8NxnikaPkjPFBQNuxuUiljZtoJHWmqPfPtUi/MoweBQAsZ9etHls3PakOOS\nadEx29aAYMwcjPWlb5ccg0u4LyV4prFW56UDEGWY9hT13cccUnI6U5TjPOKCdQK7eccU5d2M\n+tMWQMpXNPZtq9yMUDBcbiCBSnjAIyM0xckciiSYswAHFMCXoQOlK3A3Dn2o2bhnOaAdmKQC\nqx3g5wPSndW9KjZ+nABpyt3IoAevzNg9aXgZI5qNcZLZwaesg/hFKxY5WO3IGDSKCvfnrS/e\n6nJ9KaylaCRy/NyelN+XOM0/crLnOMdqiZUHzMwX3PA/OmIlKkLSc4xjnvTIrhLhdsb7mBxx\nU/3s8dqZQitnr07Uc/Sn/wAPH5Unys3SkMXZ7mil3UUwI15kGR1okVtxwcLTTKOuefSk3DqT\nisQEU7eSc47U6TDfMPypsnztwOO5pZM+WuF6d6ACRkWPaOtJ5fAJXHHWkztyMcEVJktGM+lO\nwIixT2baABUYVi2P4RS7Qzcc0rWEP8syHg4FOjVo2bjPvRGwUdKHyGJBwMdKdwFbO3rUTOVU\n7eWpUUtgk4FL91W45pDGxfdyeKVWDHJo5Zc44pJCMDigY15P4VBoVsrnPNO3BY8gZNR8cYOP\nrRYQrkcYHPem+WeWzxQu7ceaevC5PQUAMbLH5TRHGu3a3Jz1pSgOMHjNKW2hscHNACt8uVHJ\nxUK7lXbjmlaZi3A47mkkbcy4PNOwEnmKuATlsUxdzqSeKAvJbo1M3NEpbIJpCBmZW46d6kVj\n8v8AdpqssoDA59qseWGj3A4FAEHmNu2qeSaz762CusuSzGrx/ecgY560rKGI3HcRQMqW0ZVS\nXPPpVzcGUAjJqFlCucc06NSFAJx3p3ES8bcY2mmf6uM5cE56Y6UkjeavoPWmsx2kAjAFIYis\nrIc8800/M2PWmRuCfansMY7ZpgDRmTAQ0q5Rhgcd6lOFHy4qJmO7A6UCHySEqR1qNYs55p7H\nnd0zSrHj5s5NADY2DDaetPkk2xqmec8035uy496Yx3Nk80gHrubkcCgIV3EHk1H5jKuB0p6M\nFUnPWgBCw4XqaXzFUBT1zSBgVJxyKj27l3DrQMlc/MRu461DHuDHJ4p7SBlBK7fU1EriU/K3\ny05CBY9rnHJqRE+b3pMjkLyTT1zBjcMmpJGtGdpB4qKNQOCOKcxaRjTRuGVxQAskgVhtOPWh\nW3bjTG2K21j83apFxtI556mmA37vcGmM21Tjml8v35peqkYwaBi+YpjwTziqzSFuM8etNm+T\n/eqNd2Sx5HegBysG4VvmFLs7k/N3pkahiWXg0+NdrFyc+1A0VGwsjA/epJvmh2npUjR+a+8j\nA96imk8sHuPX0oBmdqFml5a3EJG1XjKk/hXxr8QtONhrV7EI28tWIUsMfjX2hIxkTAHy+tfO\nXx+8PtaX6X2cwSj5Qo9/WldmZ86XcLQuctnNZWsx+Wq89+orc1K3UzEodymsm+jeSPYOFFbR\nZLSMK6ZT0HOKx9zeXgDGDkk1rXjeXAUPD55NZU6/KCXwQeK2MmRMombLZQMc1HLi3ZlRizH+\nIVLIpkYkNk4qrFIq5BbI6H2qjO5DNOTtIxleMGqw28qRh/WpWUxqWOD6E+lV2XzAXDcirEMk\n2qyqV3GotuyQZAUZqe4Y+WrkdwPenTKskL+ZjfjjFAyjJBnewOWB4qM/vgAMZpx+QKScHHNQ\nNhGwjfK/Vu1MXUSRWKk9PQCq5ZWYYLE96t7jLhWGVHFQttWTai7VJpjIdpiJwPMbqBSszGMB\nB8zfzqbaASR+BoG3YVAHuaYig25vvHJzgqKc0QaXLAKmMAep9ac0Ygxn160mxmt3lPGDVIRE\nWKqwAwemaj3mOPaV/eZxmpXdNgZuMimeaGkOMsT7UAIvywjHBzSzzyDJGCpWgBWO3JHPOac+\nM7QM4oHYjZldY8cZHahlWNwSd+0dqR5FRuVJBHygetNYqsPQ7wcmlcQyRlXIcYQjmmiMNCHU\n/KeKJG8uTBG4sMjjpSbtuMfMo5ouIc2do4HBwahz5cp3DIp0zb2bHIPP0ouJG8pFC++7FFxj\nFwSy/wAJOSc0jxhlG35efzpGUO2PunGaX5ZV+f5W6CmA7dtypP0ps3zHA7d6ij3KWyd+KkXd\nu2sCcjPy02MSOTy1JQDHcVa0rVbrSJ454pSm1t429OvQ1VaLKjBwc9KRiVYhQAKzcVJWYrJ7\nn0P4A+MFnr1uLW/IS4U4G48mu+uIY9QsyioksTrgbuQfrXxsk0lncBoyVP3vlr1XwX8XrvRb\nSKG5cTQ91kGSBXj1sK46wOGpRtrE6fVvg3YC+N3CPs3O8qg4zXbaLNBa2UcDzglRgEn+dXdL\n16y1rTxNHIjFgPlBH614Z8StcvtG8TP9mmMa/eBGdv0rnjGU/dZz8kqmjPezbxSW5YbZOedp\nrD1TwXpepEPc20brnOD1rx3w/wDHC9sWRJo/NHO4k8V6Novxi0XWlEbxm3mxk85B/GiVGrDU\nHTnEv/8ACudG8r91apGM8r1zT4fh3o6S+YlsiseC2B0rYsdc07UI2eO4Tb14arX220ZgouFD\nEcehrHmqLYnmkZlv4N0yzQ+TCoGea0o7SFAihANp/wD1U43dupz5i7PY1Fdala26qTOoDHhj\n6+lZ803oyfeZbcD7xbBpm3LfdyD3rJm8XaXasyS3Cbl44Pf0rn9Z+KVhpp8uKTzWHXHOKSpT\nl0JUJdjs5VSPG7I+gzWbea7aafuDuu4diw/M15F4i+M00ku21YsM4+XrXBa14t1HUpHCuUjc\nZZSefwNddPBTla5tCg29T13xB8VraCJo7ZlnbPT2ry3XviBqGqNII1dF3cPuyR7VybXJkbeu\n7OeM06SYqvyrz1xXr08LCmd8KKhqXLbUpLOWO5QkyBs/n1r1zwL8Y41ha2vW+XGF35+TtivF\ntxkUndkj+D1qJi8crSKjE56DgCpqUITVhSpqSPs23kh1C1jlgkSQMoPyHNR4JyD27V89+Afi\ndceH5oreRt1sx5XPQ173pWoR6tbpLE6lGGeua8GtRlSZ5tSm4MsoSWGePepv9WDvOMjpTC20\nlSOO1Iyl8K3PcmuSxn5nlnxehMmnlEUMcbsY7Z9a8ItWJYYbIyePQ19D/E6NZNFJX5TllBPW\nvnh2Me8OfmztytfUYV3gerQtyk0a8sxbODz6mk2hbdjjBLdO9OUx4VQTnvTGwkhIyewr0UdA\nITuAJJ4x0oVG8zbjbilwV538Got7eZuU7h05pAMmUYYDpnNQoBJJFjj5x/PrVuaNWyoPaqLN\n5csSN03daXUa3P1L/ZUkWb4e2xjIKLGADjrXskinrxXhX7H0yv8AD+3UDJWNQT6jtXvE3Lcd\nq4Zbm9ytnbgmrcLZA5HNVRjdhqnhYBhkZGakaPmf9tLLeCZkJBH3gw6givzykzGvzEjfzgiv\n0c/bMUf8IDM2wKmGBOP4scfhX5wrMZiuW3jHWu+OxkPT5/lY544pfuuAUyG4DA0N8zB1IAHW\nmOu6JSvyjdxTJIJIQmUKmQjmmXCuy/Inz4xg/wBastwzDfiT1xmo23Ng5Ifu3tTQj0f4F/Fq\n/wDh34r08yPutVYI4bOwZ6k/57V+ofg3xBbeJvD9rewbcSpnAr8b5xtCvFJtcNuVfUjvX6X/\nALJHib/hIfBEJY4nPzkL90nAyP51zVI9UaRZ71IAoGaZ14PSpJclcEc5pnO6uY0JVjyvWvGf\n2kPhjb+NvBWoL5X75V3K+O+c17OjEcDpWT4vhMvh+8jJyJEK8jOM8ZrSnKzJlsfjrq2lPoep\nSWsh3mGUoTjHGa3PhzrVxoHjTS7u0J80TBSd20bTxye3+T2qx8Ybcab8RNVtowJUExxMqkBg\nDjOP1/GuWtdQOm6hE6ozjou3/Gu2S0JVj9e/hvrCa54UtJvM8wqoHIwR7fTjg/yro2+XnPFf\nPX7K3xVPifw+lhcIplRU2t93AxjGPrX0VIg6Nzj+dcWz1LI1bpk81OrDb8x4FVWHIHel5VvU\ne9T1JPn/APa50c6p4JnYQq7Ju2sy5KnHHH51+cLNFHJJEP3apwVxjJ6f0r9hfGOi2uueH723\nuYw6vGVORmvyl+MHhH/hD/HeoQxqPK3tgDovJ4Fd0Hcl6HJp8rDjd33elIcbtwzhhnmmwqzK\nhBJJwTnvQzGSInaAFPAqtSNSP5Y0BK4OcnmvXf2cfBsXinxkBMpkVCHjXGAfXv2zXkG8XBij\ndcBm5avu79kf4aW9rpo1AhWlcA+aDzjHA9utKTtF3Gtz6h8O6TFo2mQwIvlqi7QM54/OtL/W\nE4PHWnqgWNIjztGKFTBxXFJ3NBqx7m6VYWMiNyScAU2NfnOOmKy/FV4bTQ7k5Ak2EqODk9hi\nhbknyp+2H44uNEsfJST5LgFFw3O7vx9K+GZt19M1xMcMe1eq/tF+OLnxN46uIp5NxjUIrY6D\nuK8sWNtp3nAxgf0rsSsRa5A0ht0Y4z6tX6E/sbztN4N02Q4zHGy4zngsTz+dfnzKSqMnUsM/\nhivv/wDYrkV/BdpgKqbDn/vo1FTYpW3PqVtvBzniomYjp0p8oH8PBHFQ7ux61xo0sOXJbjgU\nr8xlTx/tUi+o6UsjD7O3rViZ+fH7ZpX+3lBbEiu3y/1r5tYbUwo2gDOfWvon9slgniYSYyvz\nA7q+dfuxoc4PQiu9bEG38K1W4+I2mNJEjhXUorgEA+uD9a/VH4b6PBpvhuCOEYkf5pHA+8fW\nvy5+D8ZuPidpSMv3nBbtwM96/VXwGqp4dtsg7gmCfQ+34YrlrMaNiVzznvUYPy9etPkUs+e2\nKi4HJ6Vgak0YyPQdaq65rEeh2DzuQCozuPQCrMLFVINcj8VovM8J3aMSAyE/KSMYGf6UJXYj\nzTVv2pdP0bVntJjG7L2JBYD1x+H6VzM37ZcK+YY47ZmUnbIWAHtXxD42kll8UaijTvIiynDB\niOpz/jWNtO0KGcgetdPsULmP0H8L/ti2c2oBNTa3uEkHPk/IYz6+4r0Nv2kPC26Iy3MYWQBl\n2nJr8u7cyQSgxOyMOQ2anXXr4yc3LEodwycgfhSdG2xPMfrvo/i7RvEEKTWOo20qyD5QrjPH\nUEetaUg+TAPFflJ4Y+M2veHJo9l6U2uGE2Mlff3FfbX7Pv7RcPxAX+ydV2/2oigrJH92Uc5O\ne3Ss3FoZ7tISMjvTFmHGKtTqrqGU9areTtOakRKjFjzSXNiLq3ZRgHb36fX/AD60kThWyRVu\nN8L354p9gep+cX7Wnw9u9B8YNfRw+TFI5ZmVfvZJ59K+fVIxlepPPpX6S/tZeH7bVPBNwZo8\nyxxs0bYPBPJ/QV+bEbfu2UNtCsQB612xfMjNkbR+Zd2oXl2lC49q/TD9mO1mtfBdusgAChQP\nXpz/AEr83/Dy27eIrAzLt+cZ96/UP4Jw2ieDbEWu1VVQSq+v41FT4Rrc9EkwzYB/zmo+Vzg5\np7foaYV284ya5UWTxqeCT9a434xX0en+F7qTH8HX+n612cK/KDjmvKv2h9QNn4YYZEayRuNx\n6A44zVrcD83fFNys/iDUJFGQ8rMv51lblZkJQ56f/XqDUJGku7lScqHbGDwfepIZN0ajuF4F\ndRkPDCOQ8ZPqailPzYC5Wn7vQbmPX2qOZTtCg5I54pgVZFeMAKnzd19K+gf2Uvh1Lr2vJdXl\nqptkYbPMTOef0614PYWbaxqdvaJuG5hllGTj6fTP5V+kH7O/gcaH4VsnmhCTOASQQcr/AAnj\n2oclFajPX9FsI9PsFjRRGqgKFHoB0q/uD8AcUSKi4UHgDgURocgDjJrz29SyVTwFxzjrXzX+\n1d45Oi6S0Au8bgVFuG5ZsHH9DX0Hrl19h02S5LBWUcBu9fnB+0h47udc8WXVkZEkjWUgyDqO\nDhSK0guYHseSXt3JfXDO7Fm3Eljzj2qfw54gm8N+ILO+tSVKOFznA9yaorlZiuCHPUGmXAZo\n5IlAJA6V1dLEJn6r/CTxMfE3g+xmLrK4iUMy9CfrXZSYOf7tfJX7GHj6efwymlTMpaFtiNu5\nfuRj1FfWUmOxJ3c1yzXKzRDI2Bz/AA+lSxyMMDt0PFV2xuFTwsVPFSM+UP2zvBNteaX/AGtH\naIxt1271XkMTzx+FfE1vMkrbWyCvX8K/WD4neG7bxB4V1GOaBZEkiKtkcnII4/E1+XHjTw63\nhXxJd6fLC0bxy4x229q6ab90iRQU/LjdlT2FCD5SU+8OME0yFdrdCg6gU7YVXcmWOa1JHSRs\npEifKmMFaj2/N3LdqkT5eJMnPpTmbOVXGemTQIi3BcjHPf0FKvl7eeR2oEf3UD/XH8qTyxNG\nyN8oz1/pQNAsv7zbnDL7806PCxOGHXtTISqqFIyQePWkz94HhhzigBecAKpJ6EVKqq0hbdsw\nMbMdahRTs3Bvm6+9P+aRcj+Lg5oELGNrEZ4bn6UYGcdKYrIz5UnC8ZqWNdyN3OOtMQsbDnJw\nopNxaQbgG7U1eIdwPTqKUYRd3cjkUDEyWUg8DOMUOqrhc4OQcUKqkjgsB1FG7bMQy544HoKX\nUQnysxQj73IPalST5gGGSO1Kx3LuTnPA+tKd0koG3DY6+9AXEVyVO4HJ/SpFZY4+fmwODTPM\nIkCkgN3xQuR8pI59TTGNjbcG5zT1bfww+6PvU1Y1h+XPJ5zT2K7QecZxigBFYrAc/Mo60bRG\noYnIb+GnDO7aVO2mfdchuV7e1LUBwAdVJ4wePak4Vi7tnml5+cuPlB60m7fnaOh59KYx3zeW\n38SHtQvC/rmlyG5T6EUYXe4XqBnFACNhthIAY+lOeTawXof71MLHzsAbsDrSnKxnADNnn2pC\nBVSU8sVX+dJGuWOQV4OKD1ABBJ7U8yblbk5FMBrbvLBjPPQ560km7cCFO7HANOXHHG096VnM\nibjJu2nIHegBZBhlbrkYYelEbn7gXvwaSKM7mYD5WGTk9qarn5QRtHZvagB7FeMfO+efagqd\nu4dc9KWPHmMw+VCMYPekj2+YEJJ29BSESbijHAyzetLkDA6HPNM+Uk7Tl+1NDPHvJG7jH40i\nrk7Lnc7E9OB3p6qrw7yuGxUMO+RcO3BFSBjGdoPUYAHegXmJG527wNqfrTpbhhhQML603zHV\nAdmV6fKadN5fyNnLZ+5igB6xqZA+7IpJsSTD5sNjPFJ5zKwDKNuf4aXcd4K844zTGOGRHkce\n9JvWTaoO3/Gnpl0OPmweRTYQrbiAN3QfWgRNHNtkBUZZeoqWSRGYy5wx6gdqrIufmfqRkYqc\nSLHlFwQw6YpagPjbDqc8nsBTjxJt24fqAOPxqOPKfMDsOMc1NIyyLy/zY596BiqrKxA2467h\nT93mfMByeN1N5eNsdduBRGGMaoAxI5IFMQ+FWZkUnJPOKsxpumQqgbnBWokXylDn5T0BNO8x\nlA8pt0ufm+lIRaf90HKvgk8DuParCSB9qOQvGfrVFdreW2ck8kVbijhnDh1O4DIwcUgLxzHg\nhfLQj1roPB+hyatrdvb43MxB2+1cvHJ52w9FVfm9q9N+GcB8wXkW5PKJTJHJz3FSxnuOl+ZB\nDDA6fu0AUbewrso/kjTy23DFc9pUOY4+hIwc9zxXRWu0Nt+7mky4o2LNkWPKjDEc06Vdy4BI\naqsasp61bOduVOR3rNlklsCmN3JFWsBlLDnnpVCPDMADzVj5lTGe9IRL5wXkDDU+OQH+HAqN\nLcswO7in+WzcK2aBku7anA4pscxUgfwmmHCjk4NPLlBgDIpMos/LuB9afuVRkHNVlZWAwCD3\npY12qTnNAmWlwykjgUNIVzjge1V1m6DtUwY7h/doBEyzBmU55qTzPm9vWoGVVUEdfSolkbcd\nw49KCi2JBuwvSpFUsSM4NQR42/L1zT23ZyDzR1AfgRtg5J71JvEONo4701WDYOMnvQzIvTvS\nYh2884qSNjtyTVR2G4HdhTT1O1SN+TQBdHQkDjFK/CKOnvVTzGVcB85qaKb5QrDNAyV8Nzni\nl3hABnFRtjGAeKTapbJ9KAJncFuuKbxKeG4qJcMuSc0+MqWwoxSETR7Y2wSaWRlZcg1FN8v3\neTU9vIrR5ZPmoGOSaNlxzmkMg6803co/h5prTFskLhaZRKkxVRuHHrT0fdJuU5qBskKAuVPU\n+lOaIR4K8D1oAtM+RyNops2WXeowKYv3fmpu9g3+zUgPEm4AqPmqVWMin9agaYRsFUfWn7/n\nG3p3pAOVsd8HvTuHwS3FMkmiZSAeTTS21QNv41SAnSQxufSpVYSONrciqrAHnnFCuA3BNHUT\nLo+82TlqTkLkd6hVxuDZ5qfzlUEjk+lMoa7eXjYevWneY27AqOE5U5GDmpNpHJ6VPUY9WAXG\nPmp3mjYciov4gu4UsgPAHNNjHKxHTpUinDDeeKg3nbjoaduDAKetSSTfdOVFKGPTHPvTFbP0\nHSkaST0xTAsSN8oBGDRjpg5qON2bqcinZzIOcUgHswHHSkh3K2BzTmZC3NKkio2T0oCw8ttz\nxTtx2gkYpW+blaY2WGOpoDYk8zawJ6UqsHJz96olZWUZozx6GqKJWU4puR06E01Wbb15pdh6\n96QEihFHXJpVY8g4pkYVW5NOKKrFs0AKzbQG/OpQpYBx0qM7SMBs0qgqoGaAJN27rwOlNRmb\ncPSlj+YY/GjjbkCmBIq7mXJxSybQ2ciiPnGTxTpI1IIUZ71IyJWO056GnorHkcgUKQVxihGb\noPloEKikDnjvTkAY803ccgN8zdBRGGXdle9MQyZW/hG4ZqVSYYxuHNO8h26H5e+aNv8ADnIo\nGPiYn73Jp6ydQFOaj2k/dPNSLJs65zSAQY5B/Wnr2ycgUxpPmI25FO+9jBwKAJOfoKWT5uM8\nUfebBbik+VWwCGPSgYxmKsCvAFSB/M5YcUcb8EdBQrBRzQIk2/LxxRGu2M4wWpu7MgB+6af5\ne0HaTVALHkrz1p/mDy+OCKj8wqo3Ck8sN1NJjJFbdw3Q0nqFpI49vWnRxszcGkA1JuMMM4p8\nIZnLk8Y6UMvyttOacm5o+TzQIrXKDrmq7ID82MelX127SGGM1VmtgyYGRikBXb5cqTzSxw7F\nJzzUbFVb5j0qG61FLaMtkkDmmBd09ZWkbJAArSjJZfbvWdpLedD5yhtrDNaYAkwcbaAFXCrn\nFPYs2Ag4J5oVSvXkdqcNrYA4NADNvJOaaqrtbdxUzLj3PtSNjaNwxSYEcbDoFqZY0bBP3s80\n2NkLZXpTcZOenNMBwO3IPJzxSbWbOBtapOFwSKI8bixOaQBH8qgE5b1pYmKsQec1FcXUVrF5\nr5255wM1JuEypIoIDDIFGoDmUK2VODRtKqWPPFJIpdCOh7GnbRtVS2cDmgBirsUDqTzUkbhV\nYYxTfKIYN2qTcikHbk0AIAdpyeMUpUtHjNNZGLH0NS/Krj6UgE8zO1SM0r+wprBeT1Pajc/O\nTzQFiQt+75XNGQOcZpqsVzk5GKcmWWpAUNtXHU01WbHIwKkP3gQKXcPrQMjYBsqRmnMwRQD0\nojYFeOooVgzYK5pjHrgDgZqLe27BGF9ak+XOKNoGQTxQMQRjgkZBPNScliTx2pjSFVAA4p6M\nXOTQAxZOq461JGdp5FBIHak25xQA5gc80oxs4601ZAsm3qx7Uu7ccDrRqIUsWHoaFx34pP1p\nshLDBG0DvSAUJySakUHo3OPSoPMLgcVJGAOh570wJBjzM5wPSk8sLk9jSMwUcjJpY2Eke7tQ\nAibo8Z6VLkN1BqMyfKDjnNDSFiCOlAEjLupV44bOKaGpysW6rxQMcjDdgrkfrTmAVsr0NNWP\ncuTwfamRr5SkdR1ouBY2YII6dzTAoVs54J6UyORjHhuvUVKx2rk0eYhJFXgikZVZQHQOvoaV\nm3c07a3agYkccUZ/dxCP2WpjkLwM1Gu5eTxSq3BGTQMeuVXOcUka/KeRSDO0Z470MVz0wKQx\n276UVDgetFIBGt2ZgQOe9LMo3etPG/aecGj+EZqAGbQ23HC02T06gVI2QpB4FJj92MCnYREw\nzGO1OXKr6r60jqQh2ctTossuGHNAxFbdwKVY9q4BzmlaH5gBwKOQaVxD1VkXimtudhn8aVlO\n7CnimnK9aQwbhwB0prSbZMZwKXb5meR0zULKH45NNAThvl20qrj7x4pjffXA4xT8BlJU07AR\nABZPaiRhuGentTmXdwBg1CybTsPJ+tAxxwVbn6U9SqrlqiTgjcuB60SMQ20c0CFb/WZB49qT\ny/MY801cs2AcDuacZQq5J4FADB+7bGeKn+VPmxzUJKsP1FOkl3YwOKaEPKhlzmomVNwGM560\nM2WyOlREMenWkMhhYQu6g4BapZY5mmXZLtXHINVdQV/scnlDEwGVJ7n0qbTJJpLCMzr+9x82\nKBFpUAAXdkDvTJtqsAuc+tP3c/MOOlMYhWx3o1AVk3LnbyKWLG47uBTh83J6U0HaaaAZJJtB\nCjIzTZl8xdo+UmkaT5uOlSKAxHrQwK6oY02kc5qQkHr9M1K3DcjPtUXk5zu9eBUjH7Mrwaau\neF/WnfMFwTikTHJx9askWRg3y5oU446UfI3C9aWRfm4OagoTzPmwMke1NkxtJAxSD5h8vymn\nHB+U/jTYEYXy1BY570N8wHpTmXauAcikCjaSvakJi7vL7daRiDjHAppboT3qRsbdp6UCYyT5\nlKA5qONFjGAMU7blsqakLKq4IyaC0V2YDOw4Ip3mFtrHpQy9MLzSbwWAxgUEseW/eDjigyDz\nAD0o3HJqvI534AoESsqeaGPPpT2kMi4UcVHtyc4yadu8tc/pTARfm6jBpJPm4B5py/d3AYHe\no5V+bAPPekANGrKe5qttYsVPyipzJt4HWmhfO5oC5BDEI8sTlfSpNvYHBqTy1Y46VAWYkhfz\noKEuJQq9M1mXG6ZtiDBNX2BaQBuKp3jFZQEHfmkSR3K+Tblc/U15n8WtFfWvDM6RxiTy/mwe\nwzz/ADr0+5VVi3bhmuf1SFb6GSOQfu5VKsfwpknwt4ghS1mMUeSQxDH3BNY9yFaFWbqa774l\naRJoOv3sE0WCkm0e69QfxzXm+oyFFXy+B71cSGYuqRjzGyM1lT7dg+TkH8K2dQysIlPIbg1j\nXCu0I2kZzW6MWVJP3eWOUGM1WaaLgqOvVvWr903nBTgbhx9az5rcRMAGyG9qsggaT5iJF3Y6\ne49KlkCNbhxGFP8AdHao7jAlVW5UD9aRpkaPyx361QFW4i/dje2RnPHao1ZFbcTux+tSkBfk\n7k96rSxhZCAABVC6kFwpeTzANpY8VDGGZnyu4dCKmmY7sEY9KjWTdkZ2t2pjBFZjt6AVG237\nx+gqXduxsHU43VWZH8xicYzimASSYwuc5OMelIqq6upbC9NwpSoaYKGyR1PaolUqGzjk8UiR\nLlWZkRssqjg0xm3RSRs2GI49Kka4Owkct90r3pWjIjzKR04pjKgYoOVyoHfrTFjMkgLkBWHb\ntU7IBC7ltp7HFNCjam/A74qhkMaqv3fvKeM0s8iMp258wnmo5mw5UAgZ6kU5dzSFUOOcCgTG\nyqBtBGQwpQiHdjqF4BNPjwXCkEHqSTUbKqEuxyGP6UtRDGkKqVZPnHFRqoMhdTgAYK1MrCQs\nEO5SeGqFm8yRlQFNvVjSAQAqpkB20rsdoEjn14ppA2k9eeRRKCn1NUFhrKGbKtkVEyFu3frU\nwU7d5IGP4aGbd8rDafvA0ARxkbHGAfXmgfKuUJC9RmhVXLEH73BpQVaNlwSq9KADcWmJYbeM\n/jScd/mbvikaMSNuLbVxzTvNWNBgBvehARqq7ctkHP501I187qeucVNJukYbRxjOfSkWRl6L\n25aplqBqaF4kvtJuAkVw/kk7mTPX3r0rWPBt1420O1vLdl88KF3SfTv+deQ7QuHB4Gd1fSvw\noAvPB8O45VTtU9eRXm4l+zSaOWs+TVHz7qngnVtHuGSSIMoJBlUZH/16xXjurOQMI3gB4K+t\nfYs+m2UqMJbdG3DnIrk9e+Guj6kuBbANjjacZrmjjE9zCOI5tz5ut9fvVjdILmSLaf4TVyHx\nRqULJN9rkYjg5Y16PffAGEzSXFvLJArfMsY5A/GuW1j4Sa1p7sioZsjcu3uK6I1actzWM4Pc\nyrnx1rhjVVumCZ9ar3fjfVLqJAZ2EmMMWPX6UqeBddJ2tbS7wcqpqheeH9RtpiLq2aJ14wRW\n8fZF80CKTVbu4Xa88jLnPXoaasjTKfMZt3Xg0q6FemNtkchI5O5SBUcdvdbs/Z5AOhYqcVon\nF6lXiOx5K7toI9qNxMZUY6d6f9juVbCRuzEfdwSKbDpt9LhI7aR+TuJXvW3NFdR80SBpAzBh\nnfjB9KNxZhg7SatL4bvmYqkEjIeTkck+1WF8I6xNHujs5Cg9sEUe0j1Y/aR7me8xj37QOeKY\n0gWP72D19q6TT/hrql4iXEilEJxsY8/jXU6d8FLm+i8y5Plx5wBjNc8q8I9TOVWKPLFZJMGM\nHOeR616h8Lb7UY72JYmm/d8sOdu30rt9H+EGnaf5W4M6rywIxk12Gn+G7LS1xbwrGO4Awa86\ntiIyVjlnWUjSW5VsN+Y9D6VKZuDhcA1CuduAB+VLJzGe/wBK8focfqcd8RIY20W43feHIPpX\nzfcsrTEiPHP3vXk819HePryOHTbhZRmNoz+eK+cWkVt2/gdB/SvpMF8B6WH2GW8jfxAdeDTx\nJhXycN0FMQAxk9O2BTl2HO1dzYr0jtFCMxA28AVEPlX5R0PINSGQhhIThehHpSRlFkKjof4j\nQIZcbhgbeP73pVOZlZsfk3vV7aiqxbLGq08JwuV3gnH0zxR1H1P0h/YuvBN4AtkVskJhh34b\nFfRcjEj3xXy5+wvdJP4KeNGDNHw/4NX1Cd3A7YrglubkKk9M81YVcY9arldsnNW4QOue2agZ\n8/ftfwn/AIQm7kzkeQwUe/Un8hX5qx4aMLHgJksD36mv0z/a2Jl+Gt4qr8+CM/gf6Zr8ylVI\n5tp+6pwfr7V6ENjIlVTJbsT8pz2pd6ysmCNo7ZpPM2klUJPb0qRtu5WKgcVSRNiPlVY8YPHF\nRSMI2XAOBwamZ9yqCMHqRUcmGJYdOnSjYNSG6j/ctjp1DAdD2r7z/YX1b7RofleYFXBAX3BO\nTXwY0mGXOAM7QPU46Y7/AOfpX2b+wlP/AMS2WE5XbOUAPX7o5/PP+eKzqbDifbmCWOe1Rgne\nTjNTmMrnsOwpmfmOOK4dTUIyVrP8VfPoN6QuSsZKg9z6VpL1Jql4gj83Q7pAN2UI/r/Sqjug\nex+T/wAdrNk8fzthoXBYyKwwc56VwgjCc7gT3PvXqf7RkIHxQu+mWXeQPQkivM2hWPcFIIA+\nYV6Bie/fsjfFJfCfiyDTL14/JuZcfMQNp6qM+hPH41+julXC6hp6XI6MPqOma/Gvw/dtpfiC\nzuocBY38wlhnGOc1+pfwD+IA8X+BbB5sPOVOWU5LYJGSO2etclSLvc1jtqelOo+93pkbEtg1\nYZfYVEcZ9KyGLcQ+ZbuuMhlI/wDr1+c37Y3wzv8AQfFDammbm0f522KR1yTj35r9GvMKqMnF\nfPv7Wel/2h4QuHMQ2r96QjjGD37V0U3qZyPzbXdtPzZj6A9PwpWUvD2B6YzURD5nDDjzDj/P\n50siuUIwRgZz610kGz4E8Ov4q8TW9iiFt5wFJwDjk8/Sv1B+Cvg8eFvD9rCI/LHlrlcDjjjn\nvXwj+yn4Fm1vxNBqqNIrxSq0JAyGGcFT+dfphpdubPSbeLkELkgnp7VyVZa2HHVk0zZz601Q\nQwOKYT83NSx5bnrWBoTRJtXPOe4FeB/tQ/EKXwj4VumtkZ5BwZFJG3jJ+nSvc9Suvs9pJKo4\nC8cZr8/P2rvihd319JoxnJgZvliQcMPUnvW9NXJlsfN+uap/b2pT3rDe8rEkk5681UIKlccq\nOtRqXG5zycnIp7ArGGwc/wAq6iENujIxxt6jAIHIr73/AGLlLeD7CMjYI1YsvdsnINfAV9eC\nNdzNxjFfe/7FaTR+HbYXCukmzChhg7SeD+RFZ1PhLR9XyfdBFQZG71PepDls5OPaogPmPauI\n1JEJBxjg0TSBYXGM8GhR8o5OfenzRJ5B3DOe1WiZH53/ALaW6PXLRncyKdzsW7ZJwPoOa+cH\nYGPdnLdh619IftiXCt4iCyp8zsQq46L2r5tALRjhQVrtMzqfg/amX4jaYzS/JDIHYkfe9BX6\nveDf+RbtTgbmUMePbpX5S/BffJ8StKR8BZJMMG4yBziv1Y8HOG8P2eCcCMYB7DsK5qxaRrSq\nOe1V9pbjtUznnJqElt3HSsSx3K4Fcn8Wv3nhK525L7SoXPXPUflmuuj5PIrjvi9N9l8JXMgO\n0hCc1UdxH5Y+M5o28UaoIUaGE3LkIzbiMseMgDp0/CshMwyEH7wXPtV/xFIZtdvUX50WU7iT\nweTVIKsjEL6YNdpjqOkLSOMEdM1E+xt20YOKeo7spAzjFMkgTcWOVz0K0ySLaWQAj6813nwX\n+ITeAfGtnckGWOVxG8bMQOeASfY9vrXCMrFhggACmQMIdWtpCNzb1A+uaJK6GnqfsL4U1+Dx\nHosE8JBXyw2RnHQfp1rRbHPOAa8t/Z3cy+CrUqGLFAGz+Feq3Eag7T0rhtZmpEoymB2qVZCF\nHsag4VvarCYbHPFAHn/xx0P+3vBN3528oEKAp1GRjPWvy38R6X/YuuXdmUIELlRlgfoeK/W3\nxxg+G74P8yLHuC+pHOP0r8qvilHt+IWrsflSSUuFHQZ5xXTS2Zk9znNDjM/iC08tDINy7vUD\nP+NfqB8DbNofCdqQm1MDaexGBX5r/C2MSfEjSYpOIpJkDjHQEiv1K+GtqbfwzaR9FVMLxjpT\nqbDS1udTj1NN3bWB7e9CsORnmlGM4YVydTUsRHdgngV4t+0ww/4Qu7VvmzGSOeg7mvaY8cHq\nteKftN+bF4TukjADyQsN391cc/0qo7kvQ/N64soZPMkR23K5wW6EZ6VDGCRvHVuPahnaKSUM\nclWIx2+tC7YwuGHPOe1du5kRtLI0eE45xk02STy1BPXviiSQqp9M0/R9Lm8RatBYW+d0h53c\nCnqB7L+yz4Cute8VHUJLJjDEwVZW+7k9vyNfovounrpemwxKoU4yRXkn7NfwxXwR4WtWfcsk\n0YLRyKMgnBz9K9rkwfu9F4xXJVm3oUkNRhu6Z9an3ZXLHA9fSq4XkkVDqFwLe0kdumO31FYP\nYs8c/aY+Ii+E/D7lJ9sjxmOONWwWbFfnNrmrS6rqElxMmHkcsxznB6Yr3b9p7xBq3ijxWNPs\nrS6mMbMjbASGycDtxXkWnfBvxzqzsyaVJbQhgvmTeuQP6iu6lBQV2zNs53zgrA5/XmqzXSxu\nxV0Ztw53e9ez6D+yP4v1JiNRuPs7EfdjBJI74rutD/Yf3xgXd1NjBwf72exzWl11Jujhv2UP\nGkmi+OEhPMDvv6c52noK/RnT7ozWsUp+6yg/pn+v6V4N8N/2YtA8A3Al0+0bzgwZZpm3OBjp\nn04/nXvNpZtb2aRdlrmqWexomP8AM8x+BgVJHId2M1XkYocAflSCT5gc1kUaEkha3ccH2IyK\n+Bf2wPB0+k68urwWjfvmIlZV6AdT9ORX3qkg2Y968h/aI+HI8ceD7uBMo+3h8kfNjA6D36d6\n1p6EyPziiuNz7OCMZX6VJDIyxNGv3mP5VHcWc+j6ncWVyMNExQZyD1xnn6UxpCjDB9q6CCwH\nVflyMjnNQ+b8xQgbyc7s9qq3UgjXa7BRn1qP7Uqxq3OwclyOMdMfnVDL6qzMzfcGePb3oaIq\ny8ZXuB3PrUVvdI+HBJVfvBgamXO7rnnilqAKqlWYllwetNyhfcw+9xnvRtG1sn5VOTTGU7Q0\nZ564oEOiDvjA3L/Onh/lZVOFz+VN+VANobngkUn3UZQvye9MYquNoXb1OM1NGpZgF+4DzUck\nZ+Uq3y4zSKxz8v8Aq8c0Ejtu5scBWpWwpypww4AbvTYZE+UYxxjPapNplwsoHtj9KBjOcl2+\nQjgqPWlI8xhn5vmxTcg7htO0nBNDt829YzxwPQ0AKcM397b+FO4bDE4xUbRttH8J4p26OTIw\nQ/TFAEa4Te3dvWplUNHtORxk01l3Lhl+daarg/KuWbvt6UhkisrOB/Bx70/zDKzHGNpwKamd\nuOmf4abgbGV22HsaYDmmkiY5OSw54psbPIrcblX+KkYgbRjcSPvetPgRRjDkDutIBvXGWUbv\nmP8AhUqRyhS5G1R0B71GqjawY98gkU7zmCGNstkcGgQqnG3CZPWjzFUtj7zHHvUfmFArAdOC\nvrUwXdlT8p+8BTAa2Ys5HftUgG6QMflOOaiWQ9McYzRIHbIGT70gFVdzEg8A9KU/eUdCeRQM\nbeBggc5pF6hgck9ENMBrOHckkr6CkVgHOc5PYCntu2kNwfT3pI927LH5V7e9LqLUQKfuCQ57\n+3tUvzKFG0HnqajTocgqQcgVKytgOTweB7UACkHAb72aapjWQsyEOOAaGwz5H8I7etKjefgt\n68jvSGPHyvlOuKUMF+XOS3U+lIrDavQd6FYKwYR5OeaaAkXb5BCjHy9e5NIYg8KCTl+2Dgip\nf4wueg49qYsyRks43N0pDQfNJLhztI4z605cQsVZdxP8VJ5qsrHBLLyM0nnSSMQx59PSqELH\nL5ZdVX5SODTjGdyfNsGMnimqw2oDyWOBirEeyPKtueXbwRUsBjMse0I2d3VqkEcZbKHjtgd6\niViqksV3DsKcsabQSDuPOc0IY5H8typBLdaWM7VbA565PWk8vaoZlYtnqKfIzbQGG1fUimxC\nbnMYyuX65qZsSsjlcFuKhbLLwc7uAaf5bxlQzk470gH27M0xA4A4qw0Rt12+ZvducrUcOWVs\nAfUVN8rY2njptqgBfmKIeeQTmrccKuzsWEZB4Yc/hUCyND95MrilkZkX5U5YZP0qWMfGE83B\nyhXkL61LHtDK4B+bn3H/ANamTMggBX/Wgde2Kmt43ym04G3BJ6UgLWlwtcXTREcP8voK9+8F\n6XHpdjawquXxy/Zq8g8Baf8AbtZzKuYosMMjg89K+htBs4r2MyJ8sv8Ad7AdqlsDq9Et2Vhg\n5IGTW9CPPcqPvqfxrndFkeG6xnIxg+9b8cZaYyxEq1SUjSR2UhCMmrUSFozg7TmqKKSFL9au\n24bkk5FQyh+3oF+961Km9F+bkUKyK2D0qRJA2QeBSAdDMJEwOKl3fP8AI2Kg2gN6DvUkQXaS\nPujvQNDpN3DEZOeQKlWYtyOlQ+adx28jHepIlPkkkYoAkX8qRZhHnmo4rj5SGXmpeOy5yM0D\nHRyFozlafnsePeozMAACmPeljmBz8ny0hkygZyT+NIPmYjdSNyN2PlprTKMFOTSAnVvL+78x\nFSxsWweMGq8c2e2CetPZ9uNv5UATwtJGCwGeaja5L8bcGmLcAMVYkegp28c45oGSxnzCDtOB\nT/lXJI5NRNcheACPwpBKZBuPJ9KLBYnZV25HBqMMxkGDS53A4PPao1ba2NuT60AWTKP4uPXF\nCzB8c/LTFw3DDikWNZCVBwKQFiTbGuV5JNOWZmxx9KqhhGCD81O875RxxQMtiUsxJwopfn7N\n8pqrI25c45pUnVMBsgUAi5u3Y/pTGkIwM4FM+0J0GSadJJEfvDPFAyUSBOQwxSiZrhMd/aqU\n1uPLB3YHXFOjZY9pD8UAXlUhcE5PUUnzHHzYqFpNp3Bs1Y37owwWkxCq3mH7ufrTm+RScYqP\n5JOOlPj+UH+Je1CGIJk3bSMGmyH5RnNJ5i7gGTB9acz7mwFOPWhAWI22qobkCkdo1yc4BqPY\n24gt0pH2MuD970pgTRtE/CvQuMkBuexqONUjYcDGOacqovIyaAJ48nhjx6ipI3GBk5GelRRt\n74FO84YPb1pFErOm4gcn1p0Uit0ziqhl3NwMVOJFVcihiHyrvwAcUKu3r+dNWRZRnoaVcvGR\n3poQ5pliXJpnnTyZ2rxUqxiT7y54pm7YpB4IpMBYpHz9Oop/2geYFKkGl27owV4Jpj7t2Sue\nOtIZYUhx15FLjcvTIFMtpEHbnvTs/NgHC0xkkU3yhc4571IznadvPvVZlDL0xiiPcq8Hg0WG\nTswaMBeG7mhX+cblyvrSf6uHaRkmnLIY2AIzxTuIlXLMSB8vakbKsOtNklZlwq7RSq5kUbuo\npdQHeWDzkChmKgd6azKcIOppu4px1oYyaNlXhv0py/NTPlbHanq21goHFAEqJtAG7/GpGYH5\ncYFV9hLE5xUisWjxj5qGIVmwoGDTopQvb5sUn3R03E9aa2zbkUiuhImBjtTpWDMB39aijZdo\nzSFzuwRxQIshQ20nkjuKTIboeM80IFjBAb3prKAwGMCmIlTc2QOlOjjZcjGaTjaRznNOhYRu\nMtQxj1XLDHajbvOPShZF3sBQCPXGKQWGSKTkcr9KdGxXH8VL5xpq5k5JwaBj2fa2Sak4wBt5\n65qJ8MwB/Ons35UAPjxJnFIGBbaRzTo1BHHApGwpB9+1AiZSOOOKRQGJwcU0ybnBHSpFxt+Y\nj8KAW40OsjFBxgdTTlU7SSOPWkYBdrAYNOZizH0pjBZFbkdfen7Tu64JFR7ljjyw/LrStnqD\nx2pMB3yIMZyTTlbbzjg1G/zcleaazMwAVsetTqMmJJXBGaTA28im+YXbrnFP3bsYFUJlSTTV\nky27qelU7zQ2mdQvC962lwGxUjt5n0oArWNuLO3EKjgCrMal8L0xTdo6g09XC4zxk0hWHtHh\ncHg02OPryM0rKWflqTa3ncD5PWmFhI8x5B69jSudyEHmlb/WAnml3AdqTGMh2rkEcU5clskY\nHvTolDAn0o3BvvcUgEw3fpSOTGw44NO3BVyW49KVu2TmgAVBznBH0p27dgAbRSQxlN2eeaNv\ny80xCkPu4GVo6MQeCegoUjbtGaesYbDHlhSYAqmlXG7dg59KVnOACNvNLtz82RkUAIyt2NJ8\ny+g96dnJ5GRSso2nB4pDGYK85pyvuTk80it+55FCruGO/rQIVo23Keop+4qxwOKahOeT8tKG\n3cUD0YBWByDxS7cHJpvOSAeKfu+Ue1IqwmAqnBwKdG3y8HJo+bjgU0MMkpyaYEnljbuPBpNp\nC5P3ablzH83WljLMuDxSEOKBmpdxU9OKaMqDnrQGKqe+aBhncMnNPwQOKjU7lweKfu2x7jxT\nEAX5t+cH9aeqgtnoahlbO0YxnnNOX5sEdKY+hIzDcQF5ox5i8/iKVflbdnimbupHOTSEOZQF\nB70jhVHoe9HTBNLJ/qwp9aYAqDGRyKUAKpA6ntQiqvAPFHEmeoNIB8eM4Y8UKOeelEaqg5yf\nShTkFiMDvSAfHhiTxgClBOO9RyMqrgcCpMlgBkdKYx21h34peGK54ppfoKdtH973pAG7dkY7\n0rMGHtRztOB9aTbuXpimIVvlA44pY2Ij+bg9qQt8m09c8GnoC3B6CgtAzFlGfvGlX5Tg9fSk\nJwwOKdty240AO3bl5HPvSFgYzScMQCaJDtwO3tUiG+WKKTzD/cNFOwC/My5p02NyjOOKZuPK\nZw2MmhUVVJc7jjiswFLbmGeB2zTpJAq4HSmSL8inOSBTOZEAK7eaYx0b/MD0pCGY5BwM09I9\noLFcrTW+9x0oAkbOB60m5ABu4+tM+9IOeKbNtkjO3rSt3AkFwpyRwKimk3L3x60xlPlgYwaX\ny2aPaRSsAK+0A461IcryBweKQlVjCnGaPMIIH8NMBY8qrbutKvygFPxpwZMZzkmo2yvThapA\nRzSZY1Hu7nrUjAbcHk0CEN3OKQDUyygg8elLt6g8U5o1jwQc+wqNmEmcHFICPn7qHqcmnMoZ\ngMHFSKAmABz6mkZD2NAFc5zjtUzME2jtQzBFwMbvemyAYXnNUBIzHhI1yeprJ1e9lhmjgjyj\nY3M1aSs0akluahkhWbBYBtooAri4e8kjRhtj7mtNVWPCxn5TVO0j2MTjip/OG/A6ikIczbGw\nxzQuN+5hTJMOpH8XWjLSLkDkCgB03+ryM4zUMfzSYzSNuOBnatSQyIrHK/Q0IYjKAuPek8wL\nJgD5sU/aGO49KhkxvB70gJVk3MG71K6grnoarp94HpU0jDaPemLqGNzAnmneX8pyBQFCx5zS\nM+W9sUIGCIIxxyajZOOuKkZgqjHemZGDnrQwIo/vDJ4pWwGx2pVXkZpdmSc8UwDcM4xSKvHo\nO9KwC8jmm7jnGOtIQu1eAeaY2MnmlZirbF6etNYbeCdxpACgL7miRfOUjO0+tLjDA9OKRY2I\nznjvQUKvyrjOT60bV6kUw4TGOak+8MigkZ646+lKi9Sy4NJ0Xnr60rFmAJ4FMYgAVgc01m3E\n8cUbeDg5qPawyc4zSAWSUogGc0yNs9Rg0kildvem/wC8eM54pCFlG1j3pqybMJ3p8zArkVBG\nu1stzmmIkuFdoQVqOEsPanTTbAcVFDvlyeg96BiTSBfmbk1UmcSyfLxViSNDlWOKpMfLbaoL\nGkBBe4WPaT83SqF0hjtVGcnNaMyhkyRk1RaQEbSB+NAHz58ffC5uZE1RB+7k+V+Ojf5/lXzl\nqWnl3KB+navuXx5oNprWg3EM+cKpYMv94dK+Mde0+TTbqVJhjqM1pEho4/UHVbcp6dBWLLhR\ng9CM1rzTK25ZE+XqAay7jap55Y8/h6V0I52ikzbMgNg4z70zZ8pMrdRx9at4Ty2+TcxPXuKp\nyRlsMw+TODmrJKl8wVkHDHvxVSVhztGKvXCtuO3aR2FVn/dQsuwFz1piKMkbFg+dwx1pGVZF\nPyncOhpxbyxt5Cnt3pwVo14OVNMXUqSK6plz+NIEIjBwA/8ASp5YVnZlD8qM7agjlyrMeccU\n7jIZVKNlc7CMYHrUTqI1XIKY6mpJd5+YjC0srHaGwCMfnTArBvLV0KdDkMO9M24+cvnPRama\nQqpUj5mOag3KU4+ZgaYCO3lyZwAzd6csjS5QqcevamzR7cNkk9aczF4QUO31NAETt+7KFgBU\na/dVFJCDqDUjMjTAEAAD73vUckg5b7x9VqhD5WLK5YjZj5Vqqit5mV6g1O2ZGGxN/fHT60xv\n3kjELgnnjpQAky4dQTz1OPTvUc6yMI2jwY/pUsy+XtbByep9qTzvlKj5Fxge9AiGNm2tHwAT\njIpn90k4KnBapNny5AxmlUGSMqT8o60gIeWLlwGz2psYfaSRkgcLSkmP7vGf4qa2REWzhj1p\niFdg5jyvzfxfShWJYkjg9KVN0+3HTjgUyVRG3B6HgUDGt8oZWXHHahmARR/epZJF3AZKMT8w\nqKaMMxLMQR900DGySKsb9Tj71OMqSQodpXoQMVHOG5Xdk5HNWDcExLwMfd6dKAG5K8sfLPXH\nrSP5j4zgIRShRt2sflHIzTW+ZQueeuabGMXarFlOSOq+or2v4KeLrexsW0y4lCZbdFGT3P8A\nWvFFVNpK8jo2KfG0lmyTQyvG4/iXtWFWkqkbMznDmVmfZPnW92rNE+cdee/cVHv6YHbgDmvl\nnTfHWraJcebHcyMM4DbievqK7vw/8bZFYR3cm5QOcr1NeDUwUovQ810HHY9qDFlwTjnOGFNZ\nU8zLIrduRXNeH/iFpmvnylkjSbqFB6108U0U/CsDxn6VyunKm9TCUZRGfYbeVghiTnnpVK68\nP6fJIXa3Unp8wBrRWNtu7OSDSTY8wsxzxyKnmcdidTJk0Gx2GNYYwpH93NRf8InpLYk+zq0u\nME44/KtdUBOV+X+dNkO19w5PTin7SfQXNLuZI8L6ec7LaNW6Y2inHwzp/l+WLeP5eR8orTXd\nvHO05odmRiepz09aPay6hzMz4tFtFyDHGB/sqBzTv7Hto5A2PxFX/mbLFdlMVQ2V/Gk6kgu2\nV2sbZdrbFY/Sp1O1flA/EUbDIvyqMilaMKuCcms+d7EjtxwFYZB7ijaNxwcVGv7tz3XHFJ8w\n2gtjjmp1ESBcMWJzx0pi5jjJ69eKcxUrtzg+tEmQuwjLYzTQdTz34pQ+fpMiFWVyA21fSvnq\nc7pXCj92uSBX0r46t5LrSWEJ/fBc8917ivmy4jEM7hQ20Ejd+PSvo8FrCx6eH2IiwjVQ5bee\nmKl27ZBxgGoHYqgYrv5/EUKzYJy2ScgV6h6BIrM6ujYAB4+tNjUZIeM7/Sn71C5C/vu9OSUr\nI52YGOfakBDIzKQRyKieR1V9jc44B96mkyVUgYBNRyBTM6rztXJPrQxo+5f2AbppPC92xyDL\nJyPp1/WvsJvl4r4q/YJkmtrO7y263kf92ndT0P8AKvtaRdzcc461wz0ZoQmQdO9Tw+hOKrth\nXyaswHk9OlQPoeI/tVRyN4EvMDA2MM+201+YrRqJmk4O4kiv1A/amZF8B3LOQp2sAScDp0+t\nfmA0OJZ2PO4nHtXbT+ExH+a3lgADbRhFhA685IpiofJVepFPyMIeAzcCtQHeZ90EY2ioLhjt\nUAdT2qUsfMZnPTg02VcSctgdqQis+2TcrKvA54/z/kV9m/sG3kkkVx5kisFcqp2gEAjOPc59\ne3HTFfGnmFTIxUE4xmvr39gmX97exYyfMRj7Y/8A11nP4Son3owwoPbHAP0qqxG7FWJWVxkc\nD1qoxLOfauG5qSrlSPSodSG2xmx1Kn+XSpFzVfUmMem3B6ttyAfrVR3Ez8tv2hYxH8UL8ddo\n+YenA4rzIMudxB2j5cdzXqX7S0sbfEiVoAdxyJc8ZIJryYOzHdk4zXoGIr7drBSVfH5V9h/s\nK/Eixs2bRbves24/M7YTngfyr48YMzFuCW4Ndf8ACLX4vDXjizluLhre2MyrIR02nr/Os57F\nxP15b5lBByMcVXbjtmsjwXrUWu+G7O6hdShQBApzwBx/n2rYfO7gZrkNAjxIx9O9YHjrw7be\nIvDt7Z3CB4pImByM9ASP5Vur8vGKS8tVvLV4SWBcY4PJ9qa0kKWqPyL+LPh8+F/HF/ZGFYow\nzMoj+6RnIxXE7Jr2WG2hk2tIwBDH1/8A119GftjfDi/0HxN/ayy+fbTuyeZtICHk4J/z0ryn\n4N+H017xtbLPHvAIAQDPzcYb8K7t1cyPsn9kf4ezaD4fSUwGLc25hKOfqK+qsfKuDgY6YrlP\nh54ZXw7oNtEccKCR6kjr+NdUz9jXDOV2XGNhrIG7c1MibcHseOlN53DaamkmEKiRjgL1PYVH\nUdjh/in4pHhvw/cCNSbjymdFzgYxySfQD+dfln8RPFMni7xPdX0zeYckJt4GAa+zP2yPiJ/Z\ntj9kgmysgIVc8Yxg4+lfB/n/AGhixjB4+9muylFxVzFu4LjhiduF+7TZgzANuPIzinfKqgA5\nUHmmuAucv8nbHathiabYf2pqlrAQAd4Pzdx3r9Pv2c9Dt9H8I2YjUbvJXLHv0FfmLpNwbHWY\nLhAXZCCBjgjIzX6f/APVk1TwtakQPbDy12qTnPTmsqnwlR3PWmZRk47nFQjHUnmnSyEnCjio\nGk3cE81xGha355HFJeSE2regHNQRntS6lI0NjI6joM1cdwZ+eH7X1wJPF0CsNzxytkeg7fpi\nvA7qSM27kKuc8DHPSvWv2ndQNz8RLuNG3BeTn69a8bkYK2M7jjP4V2pGTOr+DazR/E3SpIW/\neK27cBnAx6V+qvhVANFtxnB8sE/Wvyu+CcgHxN0eRXMalwGP+yeDX6neD2STRICAcFeCfT1r\nCqio6mu3zcelR7geBxUsjHsM1FjGTisS0Swg7ge1cN8bpv8AijbxMbW24U46+td7bsoGcZ9q\n8t/aGmdPAuosrEOsTAY9x0qo6smR+YWovv1q/Aw2JWBbB5OTmooUZpH+TauB0qO4k3XU3ljB\n3knA6nNSqzeVvLbSTjbXZsZDSpPBY02X5V9AetKwVej59aibaxAz70DYyTD8gcCmRybry0AA\n/wBaoz3HallcNGzZCp2zWz4C8Iy+L/EVvBAGbLhNqDJJPAxTewj9J/2cVEfg+ByxDlVG3PsO\nf0r1i4ZWGPevPvg34fk8M+EbKymDbo1BUsBkcDg+vINdyzHcfzriluagy4YVNEvOKjVt+Oxq\nwilsYqRnI/FTUk0nwzdMZFQFCCTxjivyt8Y3H2/Xr2fcf9aW+b6mv0k/aQ1b7F4LvFZcq0TB\n2/u8cGvzH1CXz5JZQ3ysxY56n/61dVLZmbOn+EFoJviFYsfmfIJHfjp+tfqZ4WQx6HaDGCsY\nz+IHFfl38C4Xm+IVnnAlB3qx6deh/Wv1K8PKTpFqB/zzBJ9TilU2CJabG7OMGljXzGyG4FEv\nzDHekhj2t6VyGpcjG0jjI614n+1JIF8B3jkkFVbPuMdK9utlLR4HOa8I/awvIrLwLd+cPkK4\nyPUjABq46smWx+cXPnSZHc80mxtg+TO2nSZjuHiB3H7xxTJGZhuD7VArutYxIpJFkVyCM+hN\ne6/sr/CxvE2sDU5JDGYmDHcp6c8fjXiGhabHr2u2um7/ACGnYDfjPX2r9Hv2bfh6PCfheOVk\nUvIc7sc4wB09MilKVkG7PXtA01NL01IV+VUXb/8AWq5vA4NK64Uj2qPbuQEHmuN9zYTlW68U\nyWMyLx8w9KmWMswqdrUsvyYFIk5Fvh7plzdNLNAjM/3mCjJ+taln4M0204+zo445YDt0rdEK\nrtVmVc9Mn/OaWaaCBF3yLu7gH+lLmY0U4dMtrdsqi9MdKkW3T7qIq/Sqlx4o0m2mKS3SR45O\n4gY+tYGrfGPwnpGDLfI/GR5ZzkZx2+lLmYHVfZhuJC4UU1k/d4xx16V4/wCJv2p/DujKgjHD\nfMQ3Jx+HSuY0b9rbTdc8TWum24RhNJtJyVwMdveqV2B7zIgGRVYxlWzVuzlTUrWK4j6OM89e\nlEkYVsYzTWoyJZPl4qPULRNSsJbeZd0bjBz/AC+tTrD81SCHc2M4GDQnYlo/NP8AaQ8Av4L8\neXXlxMtrcMXVgPu8nivIpJ9sY2jc5OFz3Nfff7Xnw3k1PwsdVhVGa0VmZmPOOMge/wDhXxD4\nN0mPUPFmlpITtaU/Ljn0C/XNdiaepB6h8F/2Yb34gtDe6jcy2to+JNqxkseen5Z/Svrqz/Zs\n8HSWsIbSIE8tQgbBJbHQnJ68eneus+Geipo/h+2kAMX7tT5JwQpAxx/9eujn1WObeoG0r1JX\nAzUOeugcvVnyL+0N+zNpHh7Rm1PR4/KUA8HgI3XJ+v8ASvk6Heow55T5fc+9fdf7TnxEsLPw\n2+nqytcYw+T8qg/1r4XkIWSRkTO5uOMf5Fbb6gkJjcuAMjPOaGI370HI4xRGWdGBGCOaTcqn\njjPJpCHtmMFugYcAdjQxaONQSGcimKVUbicKwz81PjU7t3UY4NIBBhkwRtI7GiN2ztAYJ6Yq\nKUsrK7nPPNWmPl4OcMei+1GoEPLAqACc06MBlbghuwJpcleG4BPWl8lGJVmyxGc0wBM9huHT\n8aXcecrUbYZRksdv60oYSsuAwYdvakAFTnj5hnNOjXOWA+bqKchbkKwx0xSrGNpw2SPelcYx\nZtwfjMmMU2H5Y8rlcUTMsbA/hxS8qrLk46g1QhVHHXIb8xTlj3xgA525JzUZURhcHqfzp0jb\nI3wM4+8KAGkBnGWI5+7Uh+VuuV9qZ5hQ5Ck8cZ7CpI1VI8ZyWOT7UgsQHcucZwDkg05bg7w4\nHA9adLvVWAG71PqPSt0eH9MPh37VBc5uw3Kt/KmVYxGkyu91VixxkdhSyfeG1ue1RNjGCO/z\nCpo8bM87scHHQUuohRMzM2SApGN2OlNK+Z/GfUGkhA8lyvzc0/zAzBuM4+7TJ1HKVZT9OtMj\nwpyBuOPxpgjkZsLhec4pxLeZu28dM0AO875yX4VRQMbhxhuuKbIyfKpGSTUnAXMnLL0xQMZt\nYy89TxVhl8p8ZDAdu1RbcsJNp3YpfufePb9aQD16sVXI60scIC5f5N3JPpUcanYvz7QG/WrB\nA3YJ3dzQMg8v5QQcAfxGnbTGgfOSaV2WTIbKg8YpQ6lgnK4GOaS3EPVQhz7c03cIVLEBgemK\nC+3b3GcGnKy7WG36VQxUdvKfjtzmmqBIB2JGRSKrNIAcnIp7IFI3fIOmRQDGKoEmVO01LyMd\nN3ZqNgQ5PI9utM+RP3ZznOd1IAVD5nzDPuKnjYN0OQO9MhV1yinLEVYiQ3kaoieW0fX0NCAZ\nHmT5XYrkZXFK3mTIqhwwHX1ojj/eAMeR09KVYGTzZdykKegpMnUCV2qV4ZacoYhcH5yaF/1Z\nO4A9cHvUsMYkWQtIB2APamMRfk4Zx15YetWI42WPqA2eKSMRp8zqC44DCmiPzZmCHBPPtTAn\nk3fu8nCg/MKJHVroBSdvXj0pIY2+Yk8heV60sQUKJOnrU9RkzMtruOQE6E9eaetx52wr93gV\nWTZ5jIfmUjNavh+xOoapBCgyAQTxxigD1X4a2As7ETsnmeaOB6CvVPDqmFsJ8kROM/0rmNLs\nlhjjSBMKABjHFdnpEZitzvXJBzmpA3ooWjkUxjcpPDVusGhhVl5fHNY+nzFNq9d3NayS+W2H\nU81DLLtpcJcJtY4ZfWrSttHBzVGFUDDI6VdRQy5U4qGUWDIvGBk09pleMbeoNQIoVsjJU0/z\nUVQFxuJpAWFYzLwuR3p7Luxt4qtHM0MnH3Twan+Y8qeKQyXcSoGMGmhnZwtRFWYgqTmlWRlb\n5lIPc02SWlVsEEc0LIWbbjpxUK3DK2eopWlO8uODigssiNgM9utN8x16AbahE0rYPbvT3Znj\nHY0gHtctxwQKSEKrcHhu1MXerDjcKn2qy5Aw1FgGfamj+VU74qWOZ1+8KNrL1Ge+aCxx0pWK\nLEi7o94XB96Rd3GKiEjlcE8dqV1PHJzTsA/zn3ZfgCpBMnUc1WhYsxDdc/nSMrQncRxSGWvO\nC9utCtt55OarJMWOCPpVgSAMF+6aLCJlb/gS0JHub5T9KFAjU4OR6UzJzleKBkoiO4gnDU4Z\n9OKryuWXI4NIsr7cEGgC1u25xzTVm3MPlqBJiv8ADTmmGRjrQBZdz5mRgjvSbSzZFRse4HFJ\n86jcqlfrQBKszbuRkdMVNtXjK5GelVfvch/m706O4ZWCsv40AW45I/mBG3B4qRrgL8vQfSqy\nA8seQKHkMvoMipsBLGwZsDOPWpEmVchgapKzw9TkVN5yzL0wfWqEWVZG79alEgTjPFVV2ryB\nu96dxIvB5pDLEkLScI2KjGY+q7vfvQu+NhtfNOCumSF3Z60MBsdxt3ZBFSwzBlwOtNKOdoZQ\nRT/LG7jg0MB+/wCfGOKHUSDKtQrbvkA5pqqo5zt55FLYdx6uOM9akjkUN/jULD5gw6d6E+Qk\n9TTHctLhwdvJpqyNE2V+YHgiotw6rlX9qerFFHPNAFkzOYwF4oZldcZ5xUcdx820rSxbdrbh\n+NIEixGx+UegpxkwSNwNV14UHOB0okAXrnnvQIlXHbrTwuRjdg1EoXGAaXH8QOfakMlPXGeK\nfG/yYxzVdmUqM1IZAFBUce1AD0kYZBGeetTLJuX5h+NQbv3eCfmoiK7cA5FAE5JZdmfpRGzZ\nAPJpgfcQR19KXay5bPFAD5GHJP6UfwhRwfeoz8q9cmlKlWB+9mgCblVGeTT925wM4FNGQvHN\nPITjjmmMTc0fXmplJxnNQtl++F96VW+TGQfSmwJZGCr8uSfWo1kx94cd6RXITGOKarFs5HFS\nBLlcnFTxgZ3Hniq6r82cYqYLnknANAiRWBUkilXc2OeO1QfMu4Zp8cnFBRIrbuD1qTyxuLVA\nsbLJljg1JuOPrQBJbyryD19ak2qrDJyKrPheMcjqaUP8pHXNAdSymGY4pV2sxPYVCu4KOPmx\nT4t0f3h1FADw27jb92pM7hmolco3tTvMEjnbwB1pCFOWxj8hS8dT9KaqFWJFKGO37vPvQBIs\nYTocnNPPy8MvNVoZ0DEM201ZSYMCSaYD+uA3BojYSAjoAabu3fM3A9aUbYsnsaZVriIwZWY8\nKDgZp3mLgAHikV0ZcY4oKruG0YpBawqyHnPIpVwWz0FIo255zTl+9np7UgQQYXcpp8ePL680\nxpNnJGcmpQoL5H1ApgxXwMcfWpUkQgj0qHcXkIx05oVRu3ZyPagESfe5UUsi7lDntTN3zbQw\nB9KczSMwBAwaQCrGN2ST61Nu/d4Jwe1QSt8vXBU0K2eTTAesZVgxbIp8kZYkrUe55EOeKWN2\nZePzoAWJdvBPWnFQWAPAoVSuSeTTsgtyMikwGqBg8d6a+7dkmn43dMg0zad3XdUgPimwvPWl\nkk2kADdmhk5HynFKyk8ocY4NMB/3Y8hd30psedozwaTcw4H406Pv/EfekA9mXapIzzTjjGBx\nk1C3yjk4weKfGplxk4HWgB3K8d6TG3IHSnc7jnpSbgMelACpIAMEU7hgQKYy8H1ojypx39aA\nH4CrjFJICxBUUM3U9T2psTv3oFYe7fMMCjdh+mRRt+XHek3bOSKCrj+dp70fJuGDz6U1WD8d\nKcvfOAaQhTjbuUgijcWXjimqqpwo60vOaYCqQv3jxSswUkDnNJwFzilz8qkLg0tRjZJRBjI3\nU3zvMBDDAzUu1WbnpTPKDZANMOoSbdoAbJ7VIMKMA0yRR65xRFnj0PrQBIzbVyRle9DYDZHS\nmFD5hycLTV+ViM5oESqCfmPOKe0h24A/Oo1yufQ05nU456UCFCsByPxoV9rAGm+eZAVXpTep\n4PNAyeSQFeBzmgt261FzHjuDT2xu6UDHqpbr0pwQLyelV92DjkCpo8N1bigCVVQg4pw9xTY9\nvPGKcGLZ9BQIRdyjijqxz3pxYMMAYPrTGAjb+tIBzgMBnqKkGdvWoeeueKeBuUYPI7UxkjN8\nyjFLuHSmFvm2/jSr13AZFLqISM/vDuqUr69aYFy3vTt25sEYIpjHZNFN5ooKE3eWwwKBhlJI\nzg0BiT6ik2DoD1rMQh2rGWz+dDFGjXFMnCbQqnnNMUlcocetAyfzv4RUJk2yEKM5FGCHz2xU\nSkRNjflmNIQ+HcwAPGKkz8ucZNRsxVdoI69aIcrnJzQMe0hbjHNN3PtJJApzLuX5TzSNGNuT\n1HagQixg4JOai3edceWoOB1JqZNm3IBFSL8jEjkEUDGHbwCaFYyNtxwKdKAVyBio1mMbDaOf\nWq2Ait7jzpGTbjB6mpixX5QaayKGLHgnnNOK+aRtPPSlcBn3xnoelMVRypHPY0SRNCwzzzRu\nZeSO9Auo9lUKuW57io3Uq2FyaMl5hkZ70jSSLIWPA6UDI5HG4cZBHUU5Soxu4anIit9360qM\nJWOF3sOMUgIw21iSNwpGGxTg8kdKbI22Tp+FSSruTcOpp3AaGCRAYw5p3keWu/q9L5Z4OM4F\nOySfSgRGq+YuRwacgCsy55NNuZHjUvCm8jqPakWUywK7LtY9PagNQfDZwue1MVs44xintmPA\npOcdKQyVF3LimuiHFEkgVCBwaQD5RmgBzg4yMYpQRtwRk03eNwHFKrlXK460AK2cBRzSswZc\nAfWmxr8xP3RimrG5BANO/QQuSyjGKX5Wbn5TTN6qMehp+4S8elK1hCNIByvPtTGZn5anydAV\nqIyHdzQUK7FhjoKa8h4C8n1qRWEnPY02QBW+UcUEjGVlYAng07buYsB7UMQyj1pQzYIH4mgY\nEcjvQqkluwpPMVRjPPrTfM3DK5I70hicjgCnKAFG7rQrLtODTV+YbiMU0IGX5SQe/SlXMmM/\ndpnPbOKcr/uzntQAeYseSvJFQ4aRdxGOelBYdQMGnLIeh5BoAdtDDJ+7VPa24g9M1albbGQo\nzUaKNnJz2pdREfll268U9rc7RzxSn5QQBULTbRjPSgB9xGNqkde9RiTBx/CO9E3zR5zio/OC\nR7cZHWmBDfsu9QOp7VWuJvs+wFcFh19KmAyxkPzN2qOZlmUbyMjtSAr3D7k+QZzVSS2DQM5P\nParE33NqjGahupn+ziMdR1pAzA1CH7QjRE5DcV80fGjwmmlawTtZYZF3qccck19PlG3E1wvx\ne8MnWvDErxxh7mH5gfRfaqWgHw9q0R+1FiTsx1qhJII3OFySK6TWtLNrcSEtvB6qawZrV4xu\nYZH611I52UfL8xTLnHbFVrqQblQtkdCPSrEzMrbR93rVWNlZX8zGO1XczZXkjjk35J8sdCDV\nOYCPG1tx9qnkQ8IBiM9TVaTcjBSfoapMgaGOfNwNp4wetV2YrGzMNuT/APqqcyLtYMOnTHWo\nZFdlKL8/f6UxkG7owbDZ5NR/6tssAQTnipnxjbxjFVpGSRdpBUirAZNI8gKBeOv4VWuEdZMB\n/kwML6VOrdGU/L61E6rIxIOWoAYjMHYEbzt4qPAXKrgGn+diTevyjG00jRAs7q2QeRSAVpNk\nIj6v1qFpflz1z1WnzsoA2j5jy2aiVtsZB69QBQhdSPyfm9T1xTxGMF8YPpUm07CM4JHA71Em\nfKyW+Zeo9KYyW4hJtdqZBzu4qHcVbdjB44qdZv8ARwQe/JI/SoNrzS7lIXHJoELuYr03Lnmm\n3AHysOAvanLuj3qH3BuR7U54zHHk/OuOvvTQrFaXDSKSpXd2FJt6qeDn71OlkO1Wbp0zUb43\nbs89jRYBrxllOF3L61GxAjIXk461PcSfLhQTtPzEdKjWFcbCcL97FA7DA3l4ZX/ebeVFJ5oW\nQOBlsUXL/LuXGBxxSLKiEbh8rL096oBke+YONoL7s07y3Vju4GMmjaWjLJ8vYexpE8yL5Wbc\nP4vc0dRakUihWC4yGOd1EiGKMnduOegqR/Lkj2sce9RyKy7DjK9MU+oCyKJEVwcDoQaaqqZg\nRzjipfLRcs4IHTAqFSNwwMKOM96TASOL55gDnOaesI25L9Bmnxud5+TtnA9Kax+XJXaM5+tI\nBPO43FsE9vWo/K3tv28exxTmVT86jv8AdNNVjyQOvalYC5Z6hPYO7wyGM9QwrrND+LmpabD5\nU0uISMBmGSTXDq219/8AABjb701VEq/MPz7VlOlGotSZQUlqfQuh/GqyvPLW7XylxyTwM12d\nv4r0rUI1kimXB6hjyPwr5Ji8yNSFGQ3APUD3rS03ULq3/wBVLIqL15zn1rzqmBT1icssOnqj\n61jlikkAjYMCMqfb1p8isrcqPXIr5p034pa1p11GHcMkfADc/L6V3ujfHaCTC3USmZjtVUHI\n/wDrVwTws47HNLDz3R6mwLLu6nNG3JOR8tYGi/EbRtWUqZPs0zDhZK3LXU7K8jYQXMcpBx94\nZzXJKnOPQwlGUdwk3MowcDpzTVAZgu7B9aseS4izjIzzUOwtkjHWs7PqZ9NQxtyRwabxzmpM\n+uDTDk/LtoER7htByeOxpN3mN83Q96kaIFTn72MGmbSrADGAKVwDaFU4O6nb127jywHP0poT\ndkDg+9Rt86EAc9DQBj+MBv0WZ4x+9VCV/wB3vXzBcMhklwcDeT6d6+ofEm2LQ55GJbCkAD3F\nfLOqTK2pTBlyrcjaa+jwPwnp4YgctMuGxt+vNO3GMAAZXHFRrs5BPzAVJDGoUYPHq1eqegOL\n7dpHJyPwprbm8/k8jtSCMxt85znoBT5PlOF64wcUgIo8+UpAYjGOlRNm3UNty3f39qmSQD5M\nnA5xUbMyzEOrEYyM9ORSDqfX/wCwX4ijlvLiyI+bdke2eD+X9a+8JISuCvQgGvzB/ZH+IVr4\nG8dNLchFSUhGLNjvX6P6L8SvDWuWcc0GpIoZRhW4NclSLUtTRGyI8vgjIqWOEA9KbZ6pp14o\neG6jljPIZHB/Wr8flSDerZT1GDWJaPnX9rxivgeRSRs+8ePSvzSZWa5l3YB3Hofev0r/AGyr\ndf8AhBZC24NtLcdMe9fmdlpZmcnHNd9P4TF6MnVjADuyVPf0puxSyjOSBTFm2jnmnqysjSE4\nB61ZBLGqksMZOPWq5Pvn3NSogKgn7vUVDJJ8xRlznnd6UxkD4O4gkfh1NfVH7B148Wu30RwG\nBBPPv/8AWr5bkHKnPlN29DX0p+w3KP8AhNryBuQ2D6dTzis6mxSP0ZmYbFAHGKrnLCrDRnyU\nK9McCq5U88155qPi+b61U8QRldJmbPO08etW4fvDHU1Q8YTPb+Hbx0yZI42KgDJPBrSG6JZ+\nW/7Rtx9o+I908gIaRyQB+Veabiyhg2fUYrv/AI8XUl/8Srp9ylVww29uB8p964B4yX2qNqE5\nr0DEJPlzvJAYenSod224jYEbxyCD2A6f574qWbIRhu3dgpqpcfLGWQYO3171Mo3K6n6d/sm6\n+usfDzTQJVnVII1YqwYK4UAjP1617lI29m2nGK+E/wBin4nWfh+aLQt8jG5b5gxwg9T9ev1r\n7rGxk3o2VYZHHUetcctDYjWQ9hmpN5bI9aRVIGegqN5NmDjmoHY+bP2ztDa88D7o7aSVics0\nYzjHJ4r50/ZV+Ht1eeMGvZFfNuPmDfwjPGPz/Svv7xdoMPiLSJbS5jEsLjBU9643wD8LbLwl\ndM9tGIomGAAOa3VW0bGfLqej2OUs4UYcqoFShhg55NJ91FGelOjUMd2M1zlk8KhlBAqj4k1J\nbDRbpwu6REJCn16CtCMFQOmDxxXiP7Tvja48L+ELi7s2KyKuwso7ntVxjzMTPh39orxRLr/j\ni7hdj5Mb4EYYsCw6mvK4cJCxxkDg1Z1jVJdZ1SW5kfdI3zFsd8nP41XRg25Tkc4+teh0MCJS\nApGMUrYxkqUB708bX3EnleNtMmYsvJyB3pDG28jQ6lZT7lASUY3LkD3Ixz9K/Ub9n2aO68Jw\nzxSLOpUDzVXAb9P0r8tJAr3EAUEyFhjA4OK/UH9mdsfDvTY+CfJX5VHSsqmxcdj1y4YBjioM\nd8ZqeQbicDPWogpXiuM0HxjPPrUGvFl0e5ySMLzj06f1q1CvzZNU/E0ir4fviw5EZORVIln5\neftCSbviZfcghXKnBrzWRdytg4613fxp3S+PtRdsj5+M9SPWuB+b/WI2HBxtx2r0DLU7n4G6\nLd6h46sLmCFpI7dxux6Z5r9SPCce3RrULxGqALX57fsfXCr4zuY5UUo2CoY9Gz/9ev0atIkW\n1i28DHOOOe9c1V6lRJjn1wKNvvkU2Q91OaFbHbFcpoSxnb04NeM/tOXgtfBd2pYqZIX2keuO\nK9mRRg56da+e/wBrzUFg8DzjzFRUVsEnrnIA/lVw+ImR+dX3ZJCw7nkd+Tk0NIq5Y9NvSoJr\nhYWQM+WK9P8AP41WmvoGRyZQm0dOOf1r0LGZf3ovy45oaQYOOCBk8Vmx3yXDfKpZ/wAOB+dd\nZ4Z+F/ifxsrS6XaFokIRpM4HPTrj2oUWBzDu1xIIoY2dnICpjOT6V9Wfst/Bu9GoJqF7FsRh\nuGRgj2HHtTfgl+yrqFveR3mtR5VWzg+mCD9f/rV9reFfDFt4fsY4IY8CNAq7h2AqKk1FAouR\npWdubKzjjA5UYz3qff6Dmnbfm5ON1CH5sVxM2HQqNx7nFWomKpwuPXJxUNv/AKzGO/NQ+Ita\nTRdKa5bhDwfp/hSvrYdranzf+1940hj8I3dtbSoN+IpufmxnkAV+f8cSKvlpnYvAz1Ar2r9o\nrx5F4x8RXEFo2yKGQgjOQx74PevFtxVfUEbfeuyCtExep6H+z6kM/jq2X/lqzDGf7u4f4V+o\nOix+Tp0QAwoBCj0GeK/L39nXYvxOsY2+VpTs9lz0/rX6h6V8unwZbcdvXqKzq7pFwRKSpyB1\npo9R0pWX95nIpuTuxiuUstRTH5QOOetfMv7ZE07eF5ommIjJGYux9Gr6WiDemK+Wv22rjydA\nVo2DSzjZj0xVU/iEz4ajXazHOWPGfX8arrtEb5bLgk4P8qeWaP5WOCvB/OpdM059Y1qz0+Jf\nMM0g+UdTnt+Wa9BmB7f+yv8ADdfEHiJdWuoFeC3ba0hHO48cfT/Cv0H0PTo9L09IV4XsK8z+\nAfgG18PeGLXybbyV2g9ORwDz79K9dkjRdoVelcs562RUVbUjHLYzxSxp0wO+KVVI/GpZp47C\n3MsjCMEYVm6ZyOKzehRzPjrxpb+DdIkuGP73YxUcZ4GTXzL4l/bGk+zPFbHy7lGwZEUEYHX6\n1jfthfEXzLqHT7eZlZkZnZT8oGCOD6/418kKzd3yzNlj/n/PFaQhfcTfQ+i9f/ay1y6UlbmQ\n7jhHyFx+XauT1L9orxFJ/rXllHQlZj+ea8mC+Tnsw6d6jCqf3ZJyeSTWns4k3Z22p/FzXtQZ\nmF3cQegWU8/XmsK68Z6vegGa8nZehUSEDn8ayUWPzACTkUvyN8xXqcfSqUUtguRNdSXBYtcS\nsBxtZiT+dTaDrR0HX7K6LN5cMis2CemeelRLEiycHd3plwgbbJ5Ykw2QCOtUkgvqfqj8F/F9\nr4x8EWNxbJLGREMLN1I9Qa9Bjtw5wcex79f8M1+bfwE/aIvPhOrWuxZbdyPnfJEfrx34zXqW\nuftpalNHIlrIUDhgCkQGTg8g5Nc7g7mkddz7MWJI/nlYIn1H+NUb7xVoek5M99Cjg4wzCvzv\n1/8Aai8QalcSfM5VhtLCQ7Rxzj1rhNZ+LniHWcILyRI8giRnI/l1pKm2N2PuD48fFrwte+D7\n+wj1OCRmHl5U7h6n9M1+e1nfTaXq639r8kkEpZS38Qzn+X8qsX2rX2pTHz7l5SeTk8Hj0ql5\nKtGTubcD0ycY9K6Iw5dDO59e/DX9rpNN8LxWWq2q3MlumBPvwc56Y78VheM/2tJ7i68ywURw\nsML82QrZPP1r5eZCyjLE54wDin7drAFun6UlBbiudP458dXnjC682Q5WU5kVjnJ9a5hSiKq5\nIG3FRZTdjJB9T0pZWGFYrx0FahcX5VAyTn1FJDgKS3zHPGetOKZ2joM8+9LIqscDgZ+UmkIR\nSbiP5wN6nCj2p/8AeyccdKauI+vfjPvSRngiT7vT60wGgbcAncScVZXCqY2Xcg5z3qvtC4yS\noB4AqXDNGAjEuT+lAhHkBAYHLZpGkJBfZnJxmnHZg/KQc4NC5zt6gDNIYu4YxjHtTNzrGxVs\nHoP8KeWj8rCsd/c0kimSHaCCPbr9aBEW0blx6fMPenLCttJw3ynp7UY3bcHZjrTpoypyeQeB\nmgBVxJllI3A96QGSPLE8nilXay4UgEjBoXavLfMFGAKAGrnzMbdxxmlVmVWwuWbtSZGNqkqx\n5Jpxy0bFD8yjrQA1sxqv8bE8int8ygquARTJF3YfcDt5/GpEV/JPc4zigQ0/vFCckgURyMys\npznPI/rSnLRttkVgD2ob5WVgMEcD3o1GNVt6c9Acc9TStsDDk7vY9KQ43nu2enpR9595XAPH\n1oAnRPs6sVbORximhvlIyCM4yR3pQDvwpA45X2oYbpAm35GOT70ABZlQ5G1/880zjaq5Oc1M\n0gaYhVzgYHtUSoygsG+XP40wBc+YpxuOcZqw/lu29R8oGPxqrD90g8c5BqeHYseD3NAEqMY2\nBPzcZqvJN8xbbkE8Uq5xu3jr92pfLOQdw9hQA3ey4iYbD1GB1qUsgCkkbunHWo3DNMrNw696\ncyj7zMOucgUgEkXzEDE85p8ZWRmT7xPX1pVDTZdeVTkbai4k5AIbOcrRqA7Druiz93nFTYEi\nB1O09CKikXLFh2pI3Krlifl5yKNSkSIrOxxx/tGljSTy8Mf3ueEpJF3J5m7YSN319qVcRsrh\n855oF1FyvLsdrKeQfWjYHyTyG5HtTN3nb2ddoFPXbJsdWpgx6xBVBckMDxjrT1kkjVjCWUt7\nU1/nkVi5AJxxTWd/lXduweopCFUERsJM788mn5WQMB6fnRuCoGY/vj0WkHZ2XBz2pdRdSYQ7\n4xtO4HqPSpo433MwUYHSq6MzN97dk8hegqfzQkLbc9fWmMeW+dWf73dRT4W3Ry8FGzx71FHn\nO7aS2KI8NcBpG/4CPWgRJbKVBGccVKuA5+bKjjpUe1VaTnluOP509S6L8h3DNJCHW+GU7UJI\nNej/AA70MMj3IYKp/i75rgNNge+vY4gCC7BTzgc1734V0m20jSYoI0+Xr5h60ho6nQ4cRlCc\nsBn610dtu3qhXAFZOhlBcBWXG4ferpVjInEmeOMVLZaRsWao68rg9quxxlm2k81T/wBXtzxV\n63kDJx9/tWZdiyvA5X/gVTxgbCSeKghd2xuHHepkh3fMWG3uKke25NHIy4C8oe1Txxrv3BOf\nSqbYiYYNPWVm+62KAJ2kXcVK4qVPnAEZ4qBd0keGOWz1qRf3Wdv40APaN4m+V6fud1w3Pqab\nGwkGScYqJZmjdgRlc0h9Cf50XOMrUyqsi/KcZHeo45GZSe3pTJGCnKttNMZPtb7o60sZ2jnJ\nANN81mQZIzR5jRp0zmkwHMWz1xUkUjeX83JqtuBTJbDHtU3nBVAHL0ATrMz5UHnHSiOQr1/K\nq7bt3mJye9CkuNxOGouMsmY7sil+0FuCp5qusyIpDDPvT0mG3ikBP5Ybndg0vzNwTuFRxybm\n6gCn+YI29RQA9mC7QBzQV8zLNw1Ju3fMvBpucfeakA8fLwGpY2bdz0qPdt4xk09drrg8GmMk\nXLMeeKA8i/c5PpUSLtzhuKduKnigCRZmkXldp9aWSMheOT609dpwVOWpjSGR+B9TQALI6qqn\njmpvtjyYG3CjpUKsXXaeSKGDRtkdTQA7a4JIU0qsWYBuKia6lUcjP0oZmbDAc+lIC6IzCSob\ng8nNIzK0gCt2qHzHVAWzmm+bHkE0xlgE8jNTKu1QDwetQRMjIdvWpFuFCgEZPrQASMVkHOKk\nSQbjk44oZkmXHGcVApKnB9aT2AniZBn5juqeOYqCelQHCndkEVH55Em0jP0pAWZJXZwUbAHU\nU/5gd27GeeahfC4cDA70MyzcFiBVDLZk2ositnsSKevz8FgM85qlHhRtDZXPSrMcZb+LikwH\n/Oudp3U1VAUlmpwB+96cU3b0GOvekBMrlowQN1G5m7VBDMYnIzle4qbd5n3eBQwHqdzDd1qV\nRwR2qrI7o449qngkLbtxwKAsShdygE8VIrHaFNVhJsyfvVMJvPA42mkMcQN2TTt3zA54qNsx\njJBZad5qOh2daBkpYA5AzTlmULhVpifMOaajCPIA4oEThQ/BOGpjK0eSvTpRy8ZKnBpsfm7s\ns3FDAmVgnI+93qRW+Ubjmq+0sKXBXOelA7Em4yMewFPjuNq9MkVF5bABlOaTcFXryaALkdws\nikjrS+aMAd6qpHt5WpWA2g96Y2TMd2Bmm/6tsgZAqL5/vDmliJYEk/WhiLKyeZk5wPShW+YZ\nPFRsoXv25oVRwc5xSAsb89vlNDP8oA55pjN0C1NGiGEnHNAWEWQliSuKk/hJUcVGiFfmBGKN\nxzz60ATLJvVkI3HtTo2PQDAUdKhKncCOlSqByF+8aAEZv3ZB6mnxSAMuRgUzIjfDimSZVt+M\nigC620uTnGOaVsMoI6dc1DHIV5A5xT2f92Qx+bFSFiZCMfMc+gqMbuQRgdqjV/ujBHHWnOzS\nZGaoYquwYfNxU5bjOfwqBcBQuNx6kigHcSpGKQWFmjjf5iMGljUqBzxSbQy4znFTKVbpTCxP\ntb73RcU05Zef170yT/Vhh1zSCUOcK3zD+HFArDomLZyuMVJvC4LH6UxVLIT0buKkC8glelSM\nB8vJGalGZFwPlFMWTdkHrTdz7cBeO9MQ/adp4zTo2C855xxUQk2nknHpT22jDZ4oKWpKOrMe\npoU/LgUxZty5A5pwYbduCGNAhNgLbtmWHepVZjn1pi5CHJ5FJHlm3DgH86BkmB1J3EdaRiW5\nUc9hTdyxrk85PSiPdyT+FMB8krLhWFEe49B9KMfLzTlLKvqDUgPUknkk09SSAuOlM5QFvTrU\nkbB8MBg+lAhFyHPGaFzG2SMU5pFYHHWmq2Y8uvzUgHqx6ZzSqnyZzUHmeWwB6mpFk2jgZoAV\ndyybs5FLz5mQMDvRu3Z+U5oXc6jjAoGO2lsHORQsi7tvQ0K6nIxinGL+IjmgAVjyDyKMqynt\ninbhtAxiox+7cqwzSGPU7sHPFG3kk9KRR+7ODkUK24YPSgBFQqxK8j0qQOGXGNpJpEYr2pGY\nMuc9PSmISFpBI2/AxwKkyfLPGab1Xfil3MyEDvQAMysofHSgMCwY/LSqTj0ApNgk5NIBGkKv\nuqQSblBBFMQiQE45FRyW4Y7s4NMZOfu+tO3ZUUzG2MYNL7HgUAO+6eSCKXb1YUzbjrzml3Nt\nwOtAAV4BIxS7gqYNJyy4zmlZiSoK8UhCNwvJ5qP5idwGak++CD1pNu0YzzRqAqyM2QRinIo2\nnI5oVRgk0isW5xincBY16gcGpFwoyabI/wC7z0JpduQp7UAO42ZpryEZxTiAQaRMMucUAMZl\njjZ5DtVRlmPQVT07XdM1LKWd9FO4Ygop+bj2rRaON4zHIAwYYI/pVGHwzpVvffa4LNIbn++g\nx+FAzSWRWUFRzj8KVcqcnpSMynbt+hp6t8vHNIdh0jYXpQcFgDyKaJux59aUSA8AUCFPzZpX\nAABA5pjfe6Ubvm60xirIfMA259TTpAy8KcDvSbd+KG+9x0oEPDEDPenLlvrTOPrinBnOOMUA\nSbz6UU35qKChq/KDSRnaSCc5qGSR9wwuQeKa3mBwoHzd6yAkHyk8c07O45bpjrRt+8vUnvSB\nWTg8kCgljcCRiScKBUflEOCORUq/OvTHrSzHCjbzQNCNCmDgZJpxYMoTGDSKx2gjr3pGYK/P\n3qBjljG7JO3FPaVC2MZNRSAyL1wKiO5TgmgCfPVsYWmq5Bx1+lI0m7jtSq2z7q/WmIdvzyRi\nmySKMAdaguJ2hjkk2M+1SQo5JPpVfS7ma9t0eaHypG5KZzimJF1ZPMYjb070gDYJB5qTgDji\no/8AVqxz1qdihiszN83zc1JMNqZHJzUSqdqtUpY7cHoaAGKysygDmlm2nI4xTGUo3Xmgrgeu\naAGNHu2hTjPBpqr9ndipIqf7qgHg00yEk45b3oAYse87waX2HSpIVPlknr3qLdtBUCgQ9dxU\nqeDngio5mLHYOtPh37fm6CkdgXHY9zTsNEe5gvynjoaWFm27SMjsKfhVbkflTSGaYDcAlMB6\nlXjJI6cUgb5dppu4sxAGE6AVJGAPvfrSYERZdxyOOlO2q3HpTtqNIQO9NVSDzzU9bgLtQtyO\nabIm7GOOetHmjcRigybVKmmIe3zL8pwopu75cjA9aaCSpHUelRn5l75zQNDlCrkHkmnKpble\nMcGnKiqd34U9eclT060CZGy/LgUzyyTinM27ntSNJ6DmgEM5i71IF3JxzSrhlw1J/q+Q1MQi\nr145pWGFBPANJuCjHUmkLblA7UrFEQjEk2cYUelP3bWIUcUke4ZpVcdAKNxAiheoqRiChwCK\nZuC9+aR92wHqaA1EDbR9aYcDOaTy2YbvSoHWSSbjhcUC1HyqSVI4p7NtAwMmm4L556CkAO3L\ncHFACNKFPIpUIIOQPWoFYMWbGRUXmM2cGkPoWJJhuAxgVBKdu47cj1prlvlLGiQuw2Lyvei4\nhzSbrcD8qZ5QdcZwcU4sDHgdqaoCck5PtSGRyfKpCnnvVNoQvJbJPNXZFG09yaqTQqxB3YOM\nYoFYi/1jEE4AqGSLzJOD8mOamljdVAPU80yRzHGfXvQBmTKqg47VQu/LurcxMobdwR7VoMu5\niOue1Z9zGIpCD0NAj49+Lnhv/hH/ABNe26o3ktJvjdl4wa8v1Te0Q2nB719e/HDwePEGgi7h\nXN5a8kjqUr5G8RWJs3bdkYbOK6IyM3GxiXcbiFVK8sOtZ8yjyhgEnOPxrQupWeNcJu9vSql9\nuhiGMY+8RWkWYso3LLAqCRc7/vAdqrOxVsgAx54NXbjbMisTlMfrVGVWl29dpIG6tiBt1Giy\nbhxkZ4FU4Fdpd4+Q5xk+lakdvLNC8keGjU4561m3wfdu3DAH3aBCXDIrZ2854qJY02uDw3Wk\nZtxzntj8ah8wM5f7pXg5qkw1I2jTb+7O0k/dpkquFDnAOcYApzusrK2SuD6U/wCZgSCMZ6Gk\nw1K52KpJddvXB61Hl22sG+Re2KfJGm0tsGO+abw7KVBCgfhSDUbMombPaofI/dOUPPbNPXdH\ncMZOFI/KkmJkQIpwg5D/ANKpCEmUSMrbto24Puaj4jXGOvU0s3y4bG4AZqPb5gDNwppj6kit\nuQgsAR0pqYRdiNkHk/WmzI/mKMYOMbfalkzGoEYGTwfWqBjo1WJACcknOaWNi25FOQpzVZpW\n3Ybjb+tSNMEdipx2zS6gKyiTfzlMdarLGPLJUkoeDnrVlmJQZUetN3Zh+dSFznIFUwIlRpEe\nNeTnI9/am7vm2t6dPT2qVcMDk47hRx+NQLhFyjZYng+tJiHbR5RBATPUVXhiIYliDtHHvVnB\nkyrDYvc0wRhY9hfK54qQEWOVlIOFXrj0qNcN8jrg1NIhMJUnDdBTUC7QZAeBg4qwIMI3y9T6\nUp3KuSPapFj+bcAo/HmnSW7KThi64znGKAKxAbsxPfJpvnIzD5Ts6cetSPIs0yAH5QOaQsFm\nO4DB9KQg+YEHcVB4LCo23R5UlnGeOOKklVmmXa3yY5FI7BnwxOP4cUFESK249s96VWO3dkZW\nlYNzkd+1R/MxYqMUwE3jbvPyjNOBUxuGPzMeKarMeAowfWnbPmBx82cUCHqrrbhhjK9s1asp\n3jX5DsDfxHvVT5Vc8cj+VRq7Mc5wQflFIRr3ljKsayCNnjbrtHSsyaHyZsgkOeh7113hvVPt\n1u0DACROCT/OsbxRp72M6XAG+JjjNLlGjMjaS3fcJmx2XPQ1oQ+L9Ss5EaObDq2c1ltIigbT\nw36VEzblYoMjpmodOD3BqOx6Tonxy1OxkKTHz8jA3Dg16N4Y+KlhfWZmu2xk/cXGV+tfN7QH\nYo6N1xinI7xyEKzKpOSM4rlqYWEloc0qUWfX1pqVnq0Cy2kiyoeMg96vhflUg818kWPiC901\nh5NzIu0fd3Haa67RfjTqmmxpFMjOsnKDG4j65rzZ4KX2Tllh+x9EtyQzL97kcVWkTaxrzDQv\njjZMuy6RpGY53Lhdo+ldjpvxC0XUFJF0qN2ZuPwrhnQqU9WjJ0Wb8MYYHmlMIxtB+fNQW99b\n3ILRTLIcZ+U/1q1BsmUSKcueBisreRHs2jk/GUhj0i4UhkHRSOhNfM2qQ7dQuQMZRvventX1\nT4yjb+xbtQgJVCea+W9YBXVZictE3IJHU172B0R34dGeqkt8wxuHWpIwJMKH2gdQaWND0bgf\nWlkRVIZiASe1eudgySRmDcfJ90E9aHJiVB2HDY/nQ3Az/DmkaQTO2DgYxg0gGNuz83yc8e4p\ny7+SzDB7+lDZZAX+bbwKj8sB+m5fSgByM9vIsyL8wOSFPWuhtPiBrtlLE0N/OgQDYrSNgAdB\nmucyyyEH5VApI5uCB8w9+1Jrm3HzM9Msf2hPFemgNFftFg5I3Eiuz8P/ALXniCwkRpbm5yvy\nuqvwT6/SvAVh53M+PQZ60oG5zj5QvTnrUckQ5n0PoT4hftQ6j8QvD72F0T5bKUCsevua+f2j\nDMyocLjvSeYQQSnbmhWQrk9zkZrRLsLmb3GRqrRnH59KfE4XB6fL36UzcRHuf5n68Uqq0iqC\nuB0zQIWOQtkEEd/aoZJWzhOpOBmns/q2MHaFpcrx/eHX2oKRGrDzv3v3vu/jX0B+xjefY/iQ\nwkJCMVC7RklueK8BkjUOmTuY811Hwz8azeBfFEGpQYdo24z09waieqGtz9io4W8iPIZfl+6e\n1QNblSflP4ivlrSP23tLjsLeO4hztjGc7c8e5NdXpf7Y3ha72NK8Me4fd3jP6V522ljbVo98\nhiO4ZHNZPjKFjoN5tRnbb/CeR7/QVwFj+1B4Pv54oo7lfn6kSAn61d8SfGjwrq2i3UUV+q+Z\nC209+mB/Wtobol6bn5l/Fq6M/wAQNZchVVpiRgcdSM1yKy/MM8jP+TXS/E6aOXxtfzxss0bt\nnKjgnpXMQ5feoH7zP3a7zEfLjdgHg9feqzYw6hcDqCelTllKuGGGA+7UcePLAIwOtK4XOk+G\nXiZvBvjLStRH+rtpN7L2JIPB/Ov1f+HPiqz8ZeF7PULfapliD+WrbsfjX49SMVbcpOOhAPJ/\nz1/CvvD9iP4qWl/osXh+4llmuoge3yopbhc/jXNWNIn1y6kZ5qu5DZyOKvtB8o449e3WojB8\nprmTLKLAsuO1RbdnAGK0GtxjAqJrfafWmMrL83bmrMa7fp7ULDyeMCp7eDGR1J6ZpNjK1zIt\ntaySMSqAE5Hbj/P5V+ff7XPxGl1LXTpFteM8DEExq3QYxyO+a+2fi14kTw34VuZfO8hhGTlv\n4cdT+Wf1r8qfHPiC48SeJr65nm8+RpD84HB57fhiuqjuRIwMrG0gCk9s1JGxUcAMT603yykQ\nUgqPU0kbBQcfN7103MmK2ByRhyajbczYLfKe1PRQ+Se9M2smVKcf3qBMrMzLeWqqcDzPve9f\nqL+zKuPAljg/LsADDv71+XVxs3xAnkNkfWv0+/ZLlM3w7s2dNpaNZAu7djPNY1di4voezOpX\np1z/AFqEbtxJq7NHtxUXkg5zzmuI0GRnaQSMZrO8TyCPR707cq0ZB+netZYxu5HFZ3iqEnQb\n5QfvREfpWkNxSR+V/wAam2+PrtWATeSwx025xn9K4GfLJmLjnj/arvfjkob4h3oH/LL5Pw9K\n4OSEgqQdyjnA7V3GZ2/wJupLL4ladOA5jJYyKhPBxx+Rr9SfDlw1xodm7ksTGMsepOK/Lr9n\n27eH4hWsBRW818Hd3z1/Sv1J8Mwj+xYdgwgGB/P+tc9buVHcsyYOdtJuPIzmrDQ4GcU3y/M6\nDAHeuYsarbsEdRXA/FL4a6d8R9DnsL6IsHyUdTgo3r9Pau/MTHkdKQQ55xTUnHUNz5Hh/Yf8\nMNt3LPLOeS5YgH3rRj/Yo8LK2ZICV/ukE19UxxhVIKjk9MU4N8u0DP4V0+3kQ4J7nzjpv7J/\nhWxdWGlW8gXk703flmvVvD/w503Q7FIIraOKMD+FAM127Rkr6UjQ/KcGplVlIagkUbWxhhGI\n1VVHGKuN97PenLbAKB0p6Q7jgAnHoM1jq9zREDKVwDzR5O7IUc1dWHC8jI7nFYWteLtL8Pxy\nS3V5DD5fyspbmj0CxfkmTT4mlkdVVRyWOBXy7+0v8fBo9m+k2Nz875RgOo9D+lYXx0/acTzb\nix0tnDD7sm7Cg54PvXx94k8RXvibVpLu4me4djksx6nvWtOnfVkykVtQke7uGlZznfu+pPOa\npzRsFPJCnj8ac0vlr83QDtTX/eKhVycHIX3rqt0MUemfsyyJ/wALWspfK3naUUv0VvU/hmv0\n+0WCOPS4lDK55BZenBxX5afAFpYPihp0Kv5fnNyR0H/66/VjQ41m0u3A+8FG78vSuesVAryR\n9sYo8sjJHWtKS3CqSf4etQr5W4b5FH1OK5XoaakMKMR8zbe9fI37bEytouAR5qOCn5HJr63u\ntYs7GJpJLiEKv8RaviD9szxNa6vbpb2kiyIku4kHv0ralG8iZHyHJK0e6YHduO7J/LpXv37K\n/wALX8T681/fWTSJGVdJSOMZ5ANeD3EPmqWQAHvj8Of0r6//AGb/AI1eHfB/hq3tLlFiaNWJ\nkkkABz3x35rsntoZLVn2XoOipoukQ20CbUVQoUc49Oavpbu3bP8An0r5s179sbT9OtzHbLbT\nSZwhikEmR6n06H9K881D9ti71DzVkZRIARHtAUAf1rz1e+xvY+2Y7dcfMygDnrXE/FrxNZ6b\n4ZuUeeMfKcKrjOccV8N6l+1r4guLmRjK79tynaMemK4HxJ8cte8WLLFPI4ik9D0rZRb1Jehj\n/EjXJvEfia8eaWQonyojEniuUwisARg4zyKsPO00xkfJfPUnmkbLSFsKeOAa6loZDSRJHnO0\n+ppHLtjB3H6UHLphhznij5ywDcgdMUDQ/wCf+Bcjru70BywztxnqaRlaRWJkwfSo48MQpcqR\nTAkMuzBVSVzjIo3bl5OFzwKYxZV2g5BNERf5gQAAcUrCGyRs5DZJOemaeyvly+4Ajmn+WzNu\nU844zQ0jKy7vm/lVFIjjQcAcjHA6VIsR2EnovO3NMVv3jk/LjkemKc8yso7Aj7w70CDd8zEn\novemxs3zeX8w7k0pHyhuh6bqTO6QjiPPUHvSEG7LDHzHrTVw27PTNP3In3OSBimuvRVGN1Lq\nIGbEewx7hnrTnWFNpIy3QHsKbt2nBb604EYY7cKRjNUMVs/fPUcFf603O5grZDE8LQFIt8Yw\nnpSAnYrbsdg1Ah2UUuCpx059fWlkHKIB8oGS1AVl+U4z60xCcMxODnG3+tADiF+Vs5zxS/eR\nSp5zxUaltpUevBpFbbboS3OeaAJNxVmyvJP4Cn7ZEjJUhQeCT1pdylPmbb35qNpCGUhtynjG\nOlAEioqtxzxx2BpqzpyrHDdwBTi4IOeM8UxstgqgAHBNICRGSTI6oOc0kkgYk/f46elNjkY5\n2gFehFN278kNj0oAdGsa8qhyRSSFuCE2nPTvSs4Eiry3rTtm4nc2CDxRqAZLnLHBFCsu7rjP\nGAKWPbyDyScU1VJXYVzIpNMCTEW8qTtGPvdjQAG2o7YJ5BqrJtXJw2zOSDUse2Ub3U/7NIY/\nY8jSDaoHfHFL8skZydyqeDTWkQKSQT64o4xkrjdTARmDSAIO2TSqwUYxwfWnBWwH6FTjb609\nlVmyoyD1Pp7UCGhcduSOtI25SoYZA6Yp6sJIdoPz54oeRiwUnHHHrQMRi65IBDY/Cm8qijHJ\n6gVKqyJGBkOO59KjZG3DJ57mgQrIdwGRgDtU4YKilhnJwAtMbGA/RemO9IqKyspbkDIoAdtU\n5DLnccHFBA3eWBnHr6UsSY+ZegP4fWiGQ+Y7EYPcHvQMXaOFJ2r13GlwI1JA3VF8salj8wJ/\nKpsfMAThiPWkIijyq7lbk8FRxT1kMhEYIXvio1bqCBuzipVXYC2Az98UDAjyYx8pPOcetS29\nr5wyVYoTninwr+73MMAjHPanLM8aYjb5BwPrQA27WJt6xHhB/F6+lQQqnlsW4wevvSSMzt8u\nRIOeOlP8xfMLMMnPQDvTKHRsNuGb524BojiZWJC5PTil3ZxI20bTmk8zchcHljxikKwFpI/l\nYBvpSxeWsjLv++ckehpsalR85zupWjVWI24GPvehpCJ1wsgXbvCjHzUkP7uQsT8np6U8lVlX\nad+0YLGmFUkjl5IamTYQs6o4PyjPB74qSMhVUYyByfekk3uPVuB0oSNy5AO7Ayc0DLEcvkyY\nPKnnbSyFZGbA2cZquysoyTx3qfyVlQCNSQOcmkJkm5NqHGGPB9asQptYInC9arMgDqcZHT6V\ne0uF7u8hh6Kz43UhHXfDrw+1xNLczAMidB716/ptkW2InK/yrA8M6IumaekS/K+fmb+97132\niRBUWTaAM8rUsaNPSbddyxtwSODWzDEzTqN24Cq9vtYZ27eetaMOyEgj161nqao0UYMAjjmr\nlvDgZ3DB7VQ2mTDY+lXbdj0zikaX7FmFTtKbsU8RFGVt2RTOCeW5qQybmXJ+UUiepPGwkjO7\nqKb5m3gL+NDRBlwG59qXaVwrDI9aBiwsd5IPFWVkPGR9ajLiNgqJ8uOtOW4UJtC5PrSAfnuF\n4qZpAIyQuTVZZGblenvUqg5yDTEKrLJwx2E077OOPrTGw+BtBNPOQOT0pMokC5bgjA4pfOUZ\nBGRUYxI2F4NSIAudwpjEbCtnjHWnbvM5xgUz5X4HT1p3PJpCJVZo+eg70zzhuztJqP55G3Z7\ndKnjJjT5Rk+9BXQVV8z+Hg04RiHhhuFM+1SAAFcD2pdsnBPINIkYF/eHBwtTo/y4BzTfKDHJ\nOKSNRuODimUiUZK+lO+Tyzk85zUSseecjPansYz7+lIYqz7n2jGam3K5BxjiqccYVvMHPtU6\nSBt2eKAH4VskHO3k09QuwMvIxVfbt+dGzTRcPwD1oAnVnXBAwal4bhuDULSNxkYFO8xWHPWg\nQ+JirCpJMu2c4xUUfzNgdaVHV2weucUhjlX5iQ2ad95s7sGolUxuQe3ehmHXGSe9IRLJcvgc\nZXp0pNwEZ3Jk54ppmMZVdufWpApPK8dzTHYfCw6/dz2pV+XqcqahYbW5PXpUscTN7gUwY/cF\n+7Um5dwBOM9zUe3acbefSkkcNgFeaQInXH3e1LuRWx096hXJbjrUrMNpDL+NBVh0cxTKvytO\nVopmwvWq6sQDjmnRkHno1IknTYXAHBFSmYp83aqqsNxOcGpNpjyG6GgZbt5PMUndx6UvnDZ0\n4zVOOMqflbjvUqkHKlsCi2oWJWmVTuC8UvmeYuUOG9Kji+bIBBNKZFVtxUg+1UIsbzKVGDnv\nQJDyBR5yyx7kO2o9uOSaRRb3qFHPPpSrJ13DBHSoUhRkG5tvPNSQxtHkFt4zwaBknn4XY/A9\naqPcGNf3Y3EGp2+Zip5NVJCsOcHFIDQs7xbrtjaOakZT1XmsrTboqXTZ8p5zWgjkjIbHqKaA\nlVvwxTRMwkP92l5PTrTWY9D1pMCWOQMn3ufSrSqPKBY9etY32djMGUmr5kl+6eAKQEnrg9OB\nR0xuTJ9aRcsvoaRZGGFYimBPHkY28UK53kEUijbzQGz8wpAPDNnk4qaFQVOKgVd0m4nK1MuA\n2AcCgbJRtK8rzSkjpjA7VFu685qRZAyhj6YoAc8nllRn609rjapUdKhO0j1x+dQld+Rn6UCL\nqfMB3p+zqelQQk44anMxkU54PtQBOrCNeeaAT9/HTpUEc0asFlbaTwDVjJ27QcCkOwksvnRn\nOc0kZLKCW4Ham8qQScin7VxuHSmMnVj74pxXzF3Zqv5mO9SRkY+9mgCbdt2nGeKXanLMcD0p\nqsvOGy3pQx3RkH5SaAEWTbhs5HTFOXJ+YDn3qNflxnkCpAzu3HAoAeG5xjHrStktlRgVCjE/\ne4qVmaNcD5loDoO3tu2sMf1qXcVxtX8qgbJUcc1IsmxdrHNICXcQ24nHbFSCTPyk8VApO3n9\naemFXnrSGTBlX5s5pBJu6HrUKnLDA4qQqEbp+VMQrLu4709drRbSPmzUfLA9sc0keW9s0g2H\nqDkAfpU/Ktnhh61AJGTJ21KrFY8EYDc1QDt2DycGk3HGSeewFMbLMpp27B3EZNAhyru+ZgOK\nTd+73M2BmgqXQtjNKdskYTbUjJYwGQDn606N1X/dqJWMYIYZHSnw4bOV49KQBuOCGGc8Cn8K\npYemKaqr8x4B9KIz5mVbgetADlYIoHU0CUMzZ59qj8zBAHJB61IwV5FOPmNAD02suSMmk3Fu\n3GaM/MQRxRCxVuRxQMkkBVQP4qd5jfKv8R7U1m3NwcmgSFJF+XJoEK54yDg1KuduScmm7SSS\nVpVU7ieg9KnqAZG4ZoI+ds96Rj82B9adC/BVh1qrAMRQucH5ad8v0FDrjgdKGZmxxikAiZbJ\n3cUqqvAHU0qycFcfWl2qqll60FCSr8m08Ckjbb9OlKGZtpPNG3bISDxR0EJ975fepWyo6cVE\nMfjUnzNg54pAMXDcA4FHlncOcil2qzcdacq/Ln9KYhwHzYHbmkH73kcAU3JVvrSr8rfWgB+3\n3o6HI60uBznik6CjUBF2g4zgnmgZbJzmnLjy8svNG0/w9KYBuBUjbhqb5Z2e5qT7wwePekUl\nvagBEiBX5m6UFAenSneWTIBjINEisxwnahDsJtG7LH8KfG249DimxrIG+fhT0qRd23HFADee\npIFO3H8KXy+tC43bTwKBEC5LHcO/UVOuNwGTSxqDuwc02RPTg0DsStt7DB60fdXI4pkOOcmh\n1fn0NAx64bkdacrblyAARUbR/KATg0CN9pI49qYEiv8AN0zmhWVWIP4ZqOMMnBPWpGjDgHHI\npAthxwGA6E05T+8HpTSQqg43NS89SOKBitjfx0pQW3ZJ4ofHHShmGfSgQ/fRUPmUUDuP+eNQ\nByuetBIEm4Hmn7gFAPJpskPQg9TWPUXQRV285zSmQA9MjuaTZtyM5qNm2SBRyvemAobOdopq\nMfLO7j2p7rtbPT6UxhuXrzmgCSMbhuHSoekwDUOTGMZIAoiZH4NAIk3beMfLSMq4HenMoMeB\nzUS/K230poYHg5xzUnmLtxjmo8lWBPINSPtC8daYhiqGbkdOaXftG1FxQsvGMcmmtJs560MB\nQG43YApm7nOM+opq3JnU/Lj60rR4QHPJPNSgHtIZF2gUx1MZGTxT40CqSOlQsEY/M3NMBZsD\nDZ4NKrHbu4xUfDLg8+lK5wuwUDDzsryuWoWQRsD1zSR4j+9zTwGY4Kgr60gGGZiSq8VNCFVR\nnmotrLn09aVGKHJ6VQkTNIOg6VBMhcggcUm0tkg4yc1IrmNRkbvrSBCttKnBFRtll+lPUL17\nUzbuPGRQxjS37srjlu9PhUqu09Kax3KQBgr0qMb+7YzSAm2jnBwaTzdoIxuPahQivktzSuQr\n4QEsaQiBG28tnJOBUpU4wRk0oXJ55wetOaQjPHzAUxkasGXj5aeigDJ5qD5XPynn0p8fzEDs\nOtMCRvlViR15pEXdHn+VDNuc45GKYZGWM4AwKQhGJXI6LSM2cY60xiZFBz3qT5VGWPNAdAjB\nY8tikk9OtMj3YII69KkGO/WmArMqxjI5qOPLA54FLw2e9NZgxwDgelJDHKPmwelNVdrHjvSs\nrbl54ps2A249e1NiJVwvJFKzGTGMYFReYTxjOaRm8tc9zxipGOMgXjqM1H6npUYjL98Yp0jd\nATmjUnUa8i4wKa7bl5IpJ8BQAKbgbCCDnHFMNRg+bIXp3o8kNznbT4VCrkckdaezblwcEetL\nqMryR9P4qJPlOVOARUohCmopVK44zSGRrtxtFEYUMQetStCvG0c1FNiOPA4Y0ARxsGY85Bpj\nQqMlulCr8u/OBmnNL5g46etFuoEG/wC0y4Hy7RxVK6DZ2/nV0lNpC9c/eqvNht396gCi1uQh\nYfeqmynafM5Nax3JFjHbGaz5o8Lk9aQjF1CxilhYSKGDAjmvkP4veCn0nxBPCFLRSnzIm24y\nD/hX2TdR+ZDgcEeteXfGDwi2saF9pgQNd2w/Fk7ge9XF2E9T4wvrX7K6Mh3HoRWPeRyTSOSd\noH8NdVrFvJaz3KyDO3I6YI/CsF4d0D4PIGR710ROaSMgqOFwQlQMCuVxjB61YkmVoN27a3Tb\niqrxmQb84btWxmO88x5EfK45FU9nmZDL8x5G6rLMWUPjZgZJ9ainG2MSyKTk/LimSUZIXtWK\nFMk87fWq7whpD5v7okcD1PatGaH7XsdWO4cNmqVxGclc+YOmaAIPnEYBICg4LGkkLebkfdp+\n0bACCko9fSopOOB0pjIbgFWJI+lRea0kQjJAOamlYfIOpByaSaEMxKDJ6imBBIxVyM57c05m\n2qFC8n9KhZW8sMT8xbpVnywWOeXxyaYitKjMNx+5nFI2C2FO3Hp6VLs3x7Q27b2qGWN1kHam\nA7PynccDqDRMEmxImfM6VB5jeaVfkHgCpd2AMthugFMCGRWWQAjg8UKpVpAQF9qlUZ++2TnF\nRsu1yQcnpR1DoIrLuyxycdqk88rH5bEZPIB6UxWEb7ZV4IqGMgSKrZO5vlJ6fSn1EP8AKO4l\nj+FKqqzBimG9qVpPvF1YuD2pI5NkkhUEgDqe1MY5pDyByeuKZszIMnjrSRt5kb/MBg9T3pjO\neNp9tvb61Axd5IcfeB6//WqNVXgBjt9D1FTNiSH5TznrTAvy5IyP880XF1GsGaNzxgdPU1I0\njssYHXbyPSkUptcHkZ4z/OoY0IjbP4VQiOZI2JDJg54IqNieAenUY61I0iyHy1PzgfNntUCS\nnew2nOcDigCTiNQ27IzSqq7QT8x7U2M7sDZhCcrmnRhmZmDhQoIpjCRioP8AF61Hxb7sHcDz\nmmrvjZWbhD1pdp3ArwueppkjIxvkOfmXHWn7vMA2ON68N9aVnWNtwxk8D0z71FJH5ewPgMB1\nHc0hisG3MXXrxxToY85yu1cYDU1eSVJJ4zT/ADTGqKd209WxxQIls5vs8+5WwCefeuw0+7h1\naP8AejzSBgq3pXEojSsSMBRV3Tr77LMDjKd6LjIb6xSO5kXJSMMcHHbNVGI5jK4B6MP0rrpo\nRqjNMFAGzA9a5bULeSxujE8TAd/egCFMsrMSWI+XIowGIBBHuab8pjCru5/DmnrGuUBb5s9D\nQDH5VVKKef71JJudcjluinOKGUqWh3BHPRjUbKGhVT1ztLA96mwhszKyjja/fbxzUi3ksIVk\nkZZBwDmmIdoZSuStOVRJjf8ALUezT3Cxs6Z40vrGIIZZSScMwbA5rodL+LGpabMrykTrHwm4\n9BXCLGsf3htGeKcrbdy4z/tVlKhB7onkTPXJPjKNQ0+Rbjy4ywIYdCa8p1TUBqGoSSr9xjlQ\nBVbarruA5HPWkZvMmOD5eOMVrClGGw4xURqszMWAyc85qRfmDGQbfSkOGYFTznmkkZlY981s\nULudpFTbjjO0mo1lUyH19cUu1Pu7WBbnJNMuPlBcj5h2xR0GSlyuCPmUcnNC7ljLkAbuntUX\nmIIw24ZPO09qf5YaNXVvvdM/yqRDSxYDcd2ODSvtkUFQVXvTlxImQm51PzL3xTWkMrbcERr1\n7A0xAWXarBckdjT3Eflhsg98U1lMm1l+XtTdz8p8o9DimA91LKCWxkenQU3h2EZONozml3si\nnKkMeKZIxUKv8GeWFIZJ8qhcDI/nScGM5Y/Kc4oWVWjZEPQ5BNEjecoH3R3PrQAiqWYDjB5B\nNOkby+oU8c0bQmQOeKGTYygIDnnNOwyI4EysGzgcfWlj+9ym3ceR7+tNEbcg44bOKmOFGUbJ\n70CGz/MzBcnI79vpUC3Etvs+YlgcD2PrUrYEgOcZFNkVEbeG3gDn2osgu0Sx6xe2+4ISrc5Y\nOQasw+JdVVVH2mQR427d56fnWcPmTAbA68jk1Iw8qRW2lhjselKwN3Hy3Usr7pfmk6fhTC4b\nMhyDnp3pPOdmYlTzxTeY4xgbiOKYBEzeW2zBZjzu7UjRusi5+YY5p4l8xyehUYxipBCGUpuw\nw6mkHUqbcSMQ+Gxx6e9dv8F/iMPhv40tNT8mSa3UFZU8worZHynp2OK4xoV2be3emorM2cgj\nGBSceZWHs7n6heHf2sfCs2k2zXU2y4aMF1b7o4zjP41qQ/tOeFru4WNSFyM7vMG3Fflj9qu4\nY1RpXZCemcD8qaup3sc2fNfg4AVj0rm9g+5rzo/Wm3+P3hKYsovI9ygFgpB6+nr0q9D8ZvCl\nyu9b4f7pxuH1FfkdDr+rRTbVuHjXP3mJyParsfjHVFnLi/mjkXjbuP50vYvuPmR+t8fxP8Mz\nRBk1GNh65/lV62+IOgzAmPUIc4+8D0/D1r8j/wDhYmswAYvJWI+6d3Aqxa/FbxBbybhduPox\nGaPYvqHMj7I/bI+KMdp4ZNrp17H/AKQCqhDkbTwwY9if6V8K2+UbzNoUleT7dv0q3rvibVPF\nLl9QuJJgCMI7kj8qzbfdtdQ3Hr1x7V1wjyGXNdkt0d0ibTn+tV5Gww2/KT606TnbuOPRqVkb\nazgBgo6mgTDjbwfY0v8Ayy2k7cnqahX7hAOCx4Oal3CTau/LYzzT6Ela6iEkirGNwxgE9c+1\nfpP+x3qVtJ8OdPiNxG0iJiV9wADf3T71+bkqs8bBOXYcYrt/AvxY1rwDbqlnIxJ7k/KPwrKo\nnJWRcdz9c2ntWU/v42brjcKb5lsrEecp4zwQcV+X8P7U/iWMtiRCR1xkf1pZf2pfEqrhpeW5\nyjHNcXspm2nc/URFiypLrt+uM1k+MZIF0G+SKZJCyFCVOQG6f1r81Yv2qPEIUZmbzP8AaYnj\n6Ut5+1F4kvoRHcTExjumVP4gGqjTmmF1Y4748cfEjUFDq3lMUbac7jXn8b+c7dV45Gelaeva\nlP4g1aW8nOHc5OTyfrVCOLdMWjyvGOa7tbGBv/DOT7J4500gFAZV+b8RzX6z+B9lz4ftHhKu\nkighlOQfxFfj0s8treQXETATRMGR+m0g19LeAf2wNS8L+HYLJpdsoY7hs3c+o9q55xuWtD9D\nWtTnBI9KatodhI6Drivhb/huDUGkbGfukH5R3GM4J61Uk/bV1FsGC7mwByzqGI4rn5ZdEaaM\n+844d+Qv6/X6USQqoKuyxnrycV+f8n7aupmNj0LL/q14LHpk+9Zdz+2Zrhk4Eh3DBCnJFHs5\nseh+iHkoqhmkXb65pyeVsJDKB61+a91+1rr9xkJNJv8A97Kisab9qPxBebvKlkLr1feQM/Sq\n9nMND9OmvbFV5uFBJqCTWNNiwGu4V4yNzcn8K/ML/hpTxKGUly7n34qm/wC0H4lMxY3B3MwH\nGdw9Bn0o9nIPd7n6h3XiC1hUOPnT1WuG8efGzS/CNjkHyrg87mAYDrXwtp/7Tmu28LQtPdRN\nnCmOTAI75zXF+MvipfeK2kWS5nk+XAWQlua0jTb3E2uh9Ra9+2ncQNLHG0RycJsiyfr1+tfP\nHjz47an4m1CaRJnDvnfxjJPfH0ry6NvMXaTtIPP5UojwmcfKOhPWto00iXMkuri41CRZbmVi\n56biaiZtyg4Kr3qNpDJtUfez0NOVwu/dncPyFbbGbF8vcpVFyh70it5ZAA46dKdyqR7WxuOc\nClCkM+PqaBdLi6deyaXqUd5DK0MkY4kXqOlfRXhP9sLWND0Gy0x3OIBjKn747EmvnIKy57Ho\nRTDFlcY4HJqZR5twiz6bf9sm9aRkb7Uy4OEZsgnBwc/XH4Vy+sftTa/eSFwSQfvKTxx0/wA+\n9eFsuGXjPenbtjA7Wzjrn/P+RWXso9TTnZ6lqP7RXia9VdkvIOCqjI+leeeIfE994ovjJeOz\nM/zEdAOaoIzJEPMGG6j/ABpik89gB1PWtYJRM3K4iwhG27srjpT2maFWxuG4447D0piAbuuO\nPvGpI8tgE5AOc1Qhp5kQI7Rr3YnpSzeScBgCoPBAoZVZSd3fOKRiGh6YOeQKlod2gGI1LFut\nI2zd3yeoqQZbO0DGO/ekViVw4+bHBoSAd5ZkXZjDDufSneWI1U9GqJZW4JOXU9PWlkkZlOcZ\nznbVCY6N2c8jYOzd6iSP94WYbmHenOGIXnb3Ip0aMG68EcUh7DZGLOCDkdhihTtdQ+OTjpS+\nb86j+7z0oRvMd+M9xmmMXcWU7Bgd803A4w2fWk/1KjLdetJudckr8vQYoJZJ6r1pAC0JXBBz\n3pFy0eMeW45ye/tTtrNgs2M9fagCLc27KjnGPWnSKJNoB+YdxTlxGx5YMPSkjw29jn1FMXUk\n8zpu4PQECmfe5I5BxSHHqWJG6hnzgrx6ikygkO2TZ1Rh1Ap0v7uOMZypPAFNVhLAx6c4x6Uq\nrtZS3O0cCkiRcBWKdT15pp+b72R28sUNhpmB6E8e9Ky+X/F83TjqKeoxX/1gVvT7oqOZhI2A\nMkcE+lSxLGoHPI6HvSTLyFQ/7XuaOgDWYryD060P+8Y5G/I4HTFLtaRVCkK2efc+lOVjtXA/\n3vajUQjsF287m9hikzuZc42dwaWRyyhyNwPanMu9QCNgxTAe6RNhivK9M1HJIsY39CeDgUvX\naS2CtBYGQMAMdqBoTyWZi3DHHSlGdoHTac5owS/mBue9K0ZwRkbeo96AY1GPmOxBXjJpdwdP\nlOKRs7WJkxx0NK21o0bG04qbiCP90pZhuPagyFlHcmkP3VYE5zjFLIrr0AGOlMCRWCNubggY\nHFMWR+D/ABN3xSnLLhvqcUY24AGO+aOoyRWAVQ4B5+7UZkXy842g8cUKwjbIIznJJoyrTBgv\nyjpxxmmFhy7OXC/dH60rEbt7DcSOnpUbFo2LBuWPIFJsdoz9fvUgJSzeW+1ep4ZqRd0cQUdc\n/MKey+Yu7Py9OD3qPBLbsZReCxNMkdgKpUd6dsKqAFzjoajO3dkDKnPSpIjtjKk7xj8RUjQj\nNtI78/dFI0m7PdfXtRHGHj/mTSw4J45JB+U96BCRscAdQfWnQcl0Pye9TnyvLRolyxGDmoeD\nknIOKYxS2FKA8U58yBT0IHWmrJtUYGT0wRSLIWkPBwvOKBiblCgL8rCpdyrJEcZDdPrTW2yK\nWI2k88UqrkqW428rQA/yXEh+UEEZqZU2R9Bv7r3qLcWjIL4Y9Ka3oWYvjqKAZLdYyrhuMY4p\nEYyZIXKj0FMjkEinIG7GKLZdvRyFHJxTGOVmDEKBlh2oUh3ALYbOaSGMxqXZsnOVPtQ0imTI\nwWPYUhAsS/vDI2Vzn6+1Sx5lYCMAZ6JUW1mb2HOBTxny8k4kY8e1MB/neWp3ryDj8aPLMaiQ\nngnNNGAuCQW9/WiVQzL8/PpUsRIy7VIU7i3zbqVVKqF25LelMTE2VXJf09qdgrgxHc3T0p9B\njmDq4+Y4HNOO7h89T2qNnk4OPk9T60c7l3NsIOSopCJmVmlLY2rinf6z92G2SAZ61HJsnwRK\nwUHIHp70fKVc5/eHjc3GaBMtRmRkAPyndgY9BXoHw70IzXJuto3A4XcK5PQdLk1DULeEIz5+\n/jt75r3Dw3pP9lwxxRp+nJFIk3tLs2PAXIU9K6jTbMbXBO1s8Vn6HbujO7jANdHaw4mBx2zW\nbLiT2qAKUJyMd6tJbo20HOKhVQpyehOK0YVV4SOh7VBtsWbdjKpU/LgYFTWasmd3rwagjXbG\nAOtW48mMDNIosHGB3NOjXLYxkUkO3Gc1Ksyo3HSgBVXacjoKekwcYYHg1G2NwIJweaeuc5Iy\nKBkq525HzegoUgLnGG9KN3l4wOadHIp4Yc+tAhNzE8CnqzIPak8sopIbOaUZPDcGgBY3Yc4x\nVhYTJGWJwfekEyquMZNHm7uSCPpSAcyiOMdj60iscbc7u5pqq0nysM5o8ryuc59qYx0knlgb\nV+tCMWbIGBR5g7LxSrhkO1vm9KAF5UnAPNSN8yjBwaj2txuJFIzFWx29qBksZIznnintv4w3\nWo1k3YFOTcw4HtSsAsilVHNOGFkHBwaFVT8rNzTgxjOMbh2pjEZRGeOD3oXDcAcU5d0mSVwa\na2Y+O1IBy42/LyalWMdzkEc1XZlVcqfmp4ZvL9qlASLtXgHgUgz94VGvy9eacjbcc8UwJPMP\nyll+tOZkl6DBpj5Zgf4faliYSNtHUd6B2HKpWTcRgUrKMHHBzxR5hX5G5NKrLJnt6UhDmVlj\nBbkGjzBgKF96YrHjdkinYLPwakCQSZ+XGD60nlyK2c5WlR1YkEdKRWbBwcrVIZKqLsJJ+lMw\n6rlXIoVNw68U6Rf3gVaLiGteHPI+Yd6kWTdyRnI60sb46qMEYNRrGV74pASKxyMdakWXdw3H\nrUYbc2NtDMobpyaYEjE28pAGQwzSoGZtw4p8cwdcdxUayPk8cUASb15yPmqWOZGwslQFXjYE\nr70jKWbceD6Urj1J2Qq/yfdNJuwSGX8adG7DjGRTWkJxnrTGG7GNh2tUyv2c9e9R7Y2jOOGo\njYbQDyc4pdREyyJbsd68dqfcL+7Uofeq+4ybsjIFCSNj5RkGgZchlEy4JG5aRZnWT27E1DCB\ntLdDmrbL5kPyn3oABmSYsR0HJ7VHJtZeeabFPLbkgjcrHHNT7VZeBTYEFvGI5Mqc+1WZG6bR\nUSqsMhIGasRsrLxyakYqSFmAB5qTc2MsOKZuGB8uOetK0ityGz6jtQA9WXjAyae7dCvemRsu\n0Y6UqyKOCMUATtJtjPy5NCqkyg9G9KhZtwzT1JVRx+NMCVRvbO4YHakjO3IHc96YAu30PrRH\n82RSAlZmj4AyKlb98vyjDCq3mENtI49acf3bA5OKALD42KelMaTadw5HpQrFs7WyKI5A0hVh\nxQBYh8uRQ2cH+7QQjSEkke1I2I2DIMcUivuYZ6mgpIag3Skg4FTgFjw3FQZKyHuKmVguD1oE\nOVkZvmjDY9am81cnC1D5y7uhpVkjmYpzuAzSEPkPy+gNKuSmByuKbHjbg05ZSikYplEcbcfj\nViNR1BqNVVlJxziiHhgO2KQE4xu4XDetO2lsbjTQzLypyKXzDIQD1oQg2n8KeWxD1Oc9qZIN\nje1KVJzg8GmMlVw2D1IHSpNu6PcflXNQRx/uzg/MOtG5uNwJT+tICzHhV9RUMG6TJZTwadFn\nA9j0p3z7jk7fagCRfc09QN3UVBHLyQRinNIoAyOfamIsR/KDkZFO3EyY/hxTLeQHcGHbilwF\nXOcH0pDHhwSSRxSLnAP4CkjddvzetG/cSo4KnIoAmj+VWz1prSHaAxpWYMCBycVEsa8t3+tD\nAnEn8LLg0nRj/P0pY18zBb8KS4y49BSBj0wR7U5ZssUAx6GmbhtK4wcUsa4xwTxQSPb06nFI\nsm5sLTPmZsfdNSLGFUg+nakPUUptXeetJ9/ABxmk5UBOxFOUBVyVwR3oGSRr823bx606PKsc\nHNJHJtTbnOTnNP3LyAM0DsIinJz35qQsVUEdRSbR5YKnFO4yMjjFIBv3iX6GlAL80ikc8cU4\nyAAFTgelAhRubdztNJCrw53fMaflW/8Ar0i7vmPakA773zYpqBm7U6Jj5ZY8Zp6/6vPemMaV\nKMB1FO/iyfwpCdxxwKRgU5J6UgHbepxijaAppMMOS3HpTlBDcDIqkNDNw3DHSnDvmlVQuSaa\njeYxYcChgKu1WzinKw28dKa2FUD1NL5eQFP3agQ3aV+bGPQ1JgMRyM0xsSErkqO1AjwuT1qh\nD1UHOTSYwpIGTSx7CBkn3pdxVWI6UAK33Uz1prH0pu/OG71Kq/NzQAb/AGyKM/N6A0oO3IpF\nb5Scc0x9Bd3rzTjnIOPlqHG5c96dnotLqBJ5m1wfu0bx5hxwuMU2RA68CnMny4HGPWmFheGT\nA5Ip24Lz2pkYKt92n4+X5vyoCw1WON1OjywLd6VVDNj2pVG1T/SpG7DUzGwJpfvMXPSlwWTJ\n5+tCr8nFUwQnHlnA5puHMg5+WpPlbvz6U3lfWpQyQZYYIJoZ+nPPSlyVAGetCr8xNUAz7zqB\n1Oc0/cy0hXqR8ppy7tvTn1pgIylW3dKkYvjmolzu+Y8in7mdBzg0gCNQZCc9qftLEYFQ7iW4\nHtU2SFwDjigBv4UUbT60UXAkZRs9KjBK/eJo8wOoJBXmkkk2knqKzsArOPzpojOc9e9Rt8zZ\nH5U9vMVt/Qe1IB2fMYHoBRxv4HekJxHluDTGkPDgcUASyMGYntUYOF3Ad6M7sHbgd6VhleR+\nVAgaQ9CcGiNT95hxQqhpN2MVI0m3jtTBEUknXjFNaQKBkdadOEf7p5pCr+TzzijqMMMvzdRT\nVwynPHNDscccEU+MqygHrVAMXHmDjinsvBNMVW3Hjp0pQWCncMCo6gRqx24/SkMY3bsVJuVe\nq5z/ABVGWLZxQArqUI7UgXcC2cGn+YWb5hk4pI23A5Hy0wIlU7STjNSwnghjxQ0A3cmo1TDn\nHTNIB6tubGMCh1OdtOkxt680nLYycGmAbdynaOKQEspOORTmZlXbjFN2tuBHIxQAxS235xtP\ntSXDSfLsPFPdOBu5HejA2g9eelAESHbncafGAuNx3Ck2qZWwMLjvS7FWMkHkUABwW+UYFP2k\nHO6o9rFcr1o+ZR0zSAdvCscU+OTr0JPFQxkbsnrUiqFct0FMBv2fYwOO/WnMoUnsaZJM8mNm\nQM0nnOzc8kUAJ2wOtMXP3TyTU7OSuBjmoNuyTcT9aLAKYxGoxQ23aDjmhmLZKjNIzCTrwKAs\nLuYfSk9cnk0i59cinsp3gEYPrSAZt2k47ULtxnHNKG25Gc0kagtyaBDtpfJz8oqKYblHP0qT\nDbcZGfSmR56HgZoDoLuC47tUbxsHySMntUzrjnvUR5bLGgCPzFhT5uWpqjzmDcqKJFDP0+Wp\nn+WNCO/FADH+bA9KMMWAIpz5GBxmkZtzAmpAa2Ebao79aTy/L4PIpisfNYN93tTxIsafMc0h\ngylcj1qPLiYE4K4p0kwZAQO9RSS84701qAsjYk3VBIyu2WHFOk9aFZeM8UgG+WPmw3yntUO5\ncbUHzdzVtlWQZBwoqFFVN22gCCNVjDEjJoeFZVyopWXK+lLG3lg88YoEULhhtIYdKoMxbn9K\nu3UmGxjPeqjbW7YNAytcIrLuzg+lZl3bh0ZX5jI5rVaE885FUbhiqletHUR8e/GzwK/hnxFP\nJGG+xTndCerEHk8fWvIbpQivgZGOa+2/it4NTxb4XuITFmeEeZEf4uOSM/Svj3xHpEun3G1w\neTg5GCK6IyRlJHETwjzljZdp67qguG8yJgCAYz+da91Zi4haQMflOKxb223J8jc/xCt7nOyt\nJN+7CMd4xzTPtBkTb8zKOB7UkluFUBMqfekDDyfnfa2cfLVkjEj8tT8zZ6tik27lLZ57f40S\nNtkCxtzj5s0CRmyVGB0+anYGUpJC2W+82MCmMqtJ12gDn3qWRTI2e/T5ahyWd1ReAcljTsSC\nRj72flP5mo/O2qVI/Gnox8zkcHoDUUynaTnnPIoHqQMkhYlTgKc4oa4G7gnc1LkrId3yjtUY\nUbnKrllOTn0qkGoKr7CUbJJxxVqe1EcKySNuHt1qssm12Cng9KYWlUg4MnouaYtRJc8ng56H\nNRxttj35JY8HjpUxV2UoQAc521FJIYXPkoc5wVNIohz5cjHOT12+vvTWj2qG3ZMh4qWSLpuO\nI26jvTWG6NQn3wPyoAQNtkLldxVcYPakbcy7x3/Me9NXaWJ/i6HNO2tn5CWHtTESbtygF90p\nGOKMna3Psai2srH+EY/GnyRmNRhgQRyKLh0ISA0eT1J5peG2heMjkUrLlAR2OSKGYfKwXac4\npgKzbIXcnLAgYHYU9s8DH3h09qj2iPbnJDdeKAWjZjvJAGPcCnYTEVV8lwepP3qbtKhWYZOd\noH9amKrDHhWySOv1qBsqgQ+ucCkBBJiN+VwWH3u9N2t5fLDI9O9TMgkj67ueB3pNgWQAjjFA\nEDMWUbcqM5Oe1O5jbjHzcgetOkUzRsV6HouKjEDqVZuHUd6LhcfIyySDg78UyVjuRu3Q1Lwp\nBJGSOvpUZYd/w96dwGybdudvI6UwMJMFwSy8gmnlGZ2OPlxSlSUK5G7GaEBHH8uZW4JPXvUq\nsvlnqRnv61GqqyuDn0xUsce2PC8jGSTQwF+dY/m4UnqKSPo+wZHf6UYzH1+bPBPTFR7m/dnO\n0NwcUgNfQdY+zTKjplV5Bz2ra8aXFtq9lDNEh8uMZLYx+dca2I1O37/+z2rpNM1aFtN+zTYM\nuOuM5FAzlvMVu/CnAb1NCs6qwAGWPJNW9RsI1w0fAz0xVZV3AriqAbwsg8zkkflS557EZ702\nbDGP5SGB2mhlMnJ6AcY9aQDJlkWYZ4Y1IqrIM9Wp/LFc8kCmAETbj8vH3RTAcyqysS3zevvT\nmZgqMUzxgn+tNWRVYjsRTN7NIrY2bR68flQIAyybiybPQjoad99TIp5NP3blBxkHg571HKGy\nEUYXrTAjfauB/GeoFPkdVIck4xjFIf7qp82c7qb833sDOfu0imPaRWK/z7UjNmQu/wBAPWnt\nh9se3g859KhXcjFG+bByDQISa3zk7QSevtQi7lXH3V6U/nLHOMnOKY0TNxkLuo6gO5LmX7nq\nuetJI7702oXGOaa8Z+8HyFOD70q7kztY5b+GgRI2FUbnwmetKQNpOcjtUe0hTkb5O4PQUgD7\nlYoXx3WjUQ9c7t5cHHSmN3JPzdcetDYeTPl7AP1peGJHv1oAaWXggckc4FG7y0T5eMZJp3nH\ncVUAAdBimMoVgwfctIY9d7MSFxgcE96VJQ2UbO2mKDJyXLe+MYpOeqjgUxgpLbtqlR2btSSD\nhWxhc809pAy9Ce2KQ8R/czt7UagM8zzJiWP7oDtSMB2QqD1zSBFEmE4Y8h6k+9uy3AH60aiG\nZ3KN3rxj0o+7nLHA5NOXf8m0Dphvb3pHXy8nv/OkBJHvkmyuNpXOaiGcHbkHv9aI27sM56Y7\nU5my2xTnuTQLqC4VVcdeh+tOLeQuSxLemKjbbtO085/WlmLNsEi/vDwMdKoYvmgnIXOe1MaP\naoXcFPWhlZWXjtjj1o2ncDjjv71IEgTzFyxJ7jFRqrSNhQ27qacf9YxDFRjoO1OjbKhkY5zy\naAEkXcwdm54AxShBu+fnJwGx3ppYbmYNlc8CnrLtj44Yd6Y7sY8QRtucY6g0gIbq2xaTcCp5\n3E05mAxkZ4wRSC7Hs2HYDPy/rUZUhlWNvLcnOf6UMzK27fgLz/8AWo2/INzgs53fSgQijaSO\no3cinXDbYyAOCelQllVuD0P3hzUiM+871O3saGMVmDZCYP8ASoyu1Sr8AnIb+lP55LD5P9nr\nTTG7bVU5UfMVpiBR+76Hr0FPUhw3UnHTNRs3OB95j+VPC7UwTjdxmkIhaMScou3HJJNSNEje\nW2c5/lSxxlIycZHSnMNkKkMDz90U+oDWjJYup46U/wArdwow/wDeznNM2oJCG4PbninIrN8r\nE7OxFIY3yecFuTx9PeiRmlYgH/Vjnb3qRm3YVex5qNdscjuucnt6+1MBu1SdxOQRkilbc2PT\ntxQi7QW2kL157U4N83XIPIFKwyIRGP5lGHbgj1p33SFA2nvT1G5uThiaAA2TyxGaXKPYiaER\nyDauHHO6nsojILPkN3+tOwJoywO4DuOKj2LIn3sbeaYiOSMNIQF4/Kmxjy8tsz2+XirW7awJ\nxk/lTY4A0jjcQaBCK4nKnoo4xmn7lbZtBJx+NMztUjOR049aBLIyFsbWA27vpRqAuPM3Njac\n5xRg7DghAR1pkbFvmPXoc06ZiVHG0DhT70AEkmYwQV9ttL/Fvb5famMqfKu3aerN6mlVRKcG\nTP1phcaNqh2HLMOtO/iQEgqRz70bUWRVPy03Kxu24E+hoAcrKMYHQ4xSbnZmOenO2l2lsbNv\nPPPWmMD5pPODwaWoD+PNBZyPl59KRQVjJyTz0pJAQ3KkqOlKrloy+MAcUCFR02liTu7CmM7B\nRuPy+opSke4gEhmFHllWDK24AYxTuA1ZGYkY57MajkXoFO49xU7qdgLHazHApm5txUjjoSO9\nAxMHjLDZ/dNOOCMID9cU4bO4yvekP7vA7nnC0yhTHn7oLcdKFYrGoC4bHX0prBgQ28/hT1G1\nchtx67qCRoC7sIcn3oMhO0EghT+VO2q5y77X9BUCr5cxcqSjfLmgCxKyNjA2sf1qObHUMAow\nPxpV/hXIG5cgnsKGYbjgElDjOODU3Ae2fMy67F6baazfuyB0z8vrSNIZmI79QKRd3ClMkfpT\nAVl24ZuWNOVQwODhqaMuNrHleRmlU8Ojcd80wBowy5IwRzxQznjJI56U9kXoG284IqNSZGIY\njAOMUCJWjCspB3NndTZJWEhYhcMaWPGC4cDB2kGomVVYhhu560DJdzxMWJHpmmSbywI69/cU\nkkm3GQSCac8cxZQCAvXd2oEEUn3zt56Cmhjwcc09kPmEq2FIpuG+8xx2xSGKrBlYN060nEO1\nHO526Y9PSjGGxnp/DSfMJBJ1PYdxSsIHAaLg5GecfwmheJFJ/OhNu0fwqDy3vTxy5JwD1pjE\nb+J2XP0pRIjMGI8sqOtRsxZcnrnNOkk3R7wvFHQQnyNlt2xuwPQH1pBHLFGq7gxJ5GeT70Fk\nwqkYJ5206UFJAf4/6elIBrK/mKQQoXjFP3M27dlh6U1vlO04Zj+AxT49zk8jFO4xIwFj/eAl\nqbu28cY9KkXcrEt0xTRmRsgB+MY6UwEYlsgLtUjpTuNm1ch8c1GrliGb7ynkCppGBjD45J6U\ngEdTI8YwMdKdhlkKkbccUbQpyDyBxSfM20jkZ5FAhWwqg9TQrFflxweSxp04VXXZ603zhuO7\nhicCgYIw3EA7x04pI1aNSSfYZpV2RsAxw3cikDL1wTk8E0AO+XbggF+4pFLbsKdq+lStGFG3\nHJ6t6VFGmCVJ+hp3sA7d5JyOrcbqRidwPUU9Y1WHcTk56UwH5PlXP1oAVkVZPl4xzS7ii/L8\nwfrSbg2ATtYmlwUU4OW7CkAbfmAJAKjOaRGOdyj+LG31pQViYK43Fxn6UAbNxDAr1KmgBY2T\nzWTOSpzx0od0lDEL8wpJOGVtpVup9KcQGUsvyDqc0gEG5t247WHT3pWyu1mbPsKRpGjhJI3E\n8inxxtMoZemKoBI/unecDPFOCsxKY+U9TTFKqcMhKqeSfX0pdxyxU/Keq0hE21QPLXAUD71M\ne43YGOOgb1pkbHaFAzk8H0pzELIRtL7eeOlGoChgzYK8+oqSaFVuF2tztyRUKTCNix+63b0p\nJWYyAj5x0P0pjJWb5SSMfT0pjRpLhlyG9uKNy/OnIC9KeyryqZDDtSAcwEkQJAULxn0pVUbM\nqwK9zio5FJUOWx2205oym0jn1QUwHMAcshwMcc9aevz7DnK96JFRmCouBjimLGBGD911blTU\niHGQ+Yp4APbHSlcBpMKM5OCaRZFXeG9MCo48w98g9/egC023yw2Cu3jC0J5YyCe2S2ajhcry\neR709mCnakZPcmn0AXzWjjHzBs9KdCfmYtwSOpFIDiTPVSMipFb92wLDP9KAQ2RX2jyxuHdu\nxqS3hS6mUjkLjI9KRmMmzado7V1fhTw0b6SJwcNnLdwBSYHW+BNFFtF9pZWVnOAx7j6V6ppe\nnl41ILEY+96VhaPYqqgbCQDha7rSbdrdflXO4AH2qXuKxPpsPzDnhf1rdt1P3ulV7K1G0swx\n71di+9gdMVBrGI9IGkQ46ZrQjiChTUMD7PlIq1Gw8z1FRY0J42Ct93IqeFAyk9PrUMfytzwt\nW1x90cmkAQw7DzyKmWMMuRwajWRl7VKx+YNjahpAIuWbnoKf5bBGZW49Kbt+tSRxjoWwaYDU\nk+UFuvSpVHy4NOQLuwcEdqG+XnORQAozyB1pqylpNrL0705JN3IXmpFnK5G35sUAIuEk+Ubh\nin7vY0sJGwbhz3pdyY3Z6UFAGMhPOKTc64DDIo5JyO9KzuTz0FAg2/MABTmUxtnGKbukHKji\nn58xenPepYCtMWUcfWhVVuR+VML7MDFO2lO2M96oZIhOSE5Hc05m2nPSmRL5YOGwT2qRVVWy\nw3elDKsAj84ZB96VWI4zhqRl2vuzgdsU4MHHqwpMQ3zJGzk805OevJ701mZR3B9qRJG64ye9\nICTarZGMUgyvyhsinb19PrSxxqG3A5HpQAzHIx070/huAOaXYFJI59qI1D5YHHtQAmCvekjX\nD/LwKkaMx/MPnHtTo5QvGMj9aBjg4bqMN3p7MBj5Mr60nlp1PfvSKGZtmcqKBC/ebjp2pUcI\n4DLwe9NbeikYpPmwN1IQq8M5A+Wn4DRZX8qNm9flP1pV2Z4bFFxkasY/epfRiTmm/wCsBVOl\nOXG3DD2otcB2d3TgdaXhmDZ4pE2qDk5oO0sCvA9KLDSHLJskO3nNPWQLncMZpoCtyKXjGGX8\naQWHeWjKWU4xRtWQHJ57U1FZQdo4p0al2GDkjtR1ESLI23g5YDFAYtlmFMXKs2eKV/m79abG\niVdiqNr/ADUnWQZOPeiNQwBx0oVeueaZQ7ydv3TuqSMoylT8rVAJHjJKrn2qeORG5KcmpAcn\nyjBPXvQYXRdy4PYCpdu7G0ZpG3RyZIwRTAdGvmR5xhx1Wlt5jC3PQ9qfHIsp4GG7mo2K+cVx\nn3pEk25ZAe3NRqpjkyG+XvzTtydO9JtHegomeNW5VqI4iF4PNRJHt5J4pVYrnaSfaiwE8cwX\n5GNI3y5PUH0pkcis3zDHvUjSeXweRRYRJHn5Tjintyx6HNRriReuBSsN2COcUxiwxHlt/Hoa\ntD7owcjuKgG1s4OPanCYFdoIDUgHGTqAvNCs23BGDTYpieCpBp0bsc5X8anqDHKdzEZzg1J/\nFtxUCYbPlnDZ5zUoXJUOPm9aYChWjlBHFTRsBlWHzZqNZNsgBGcGrMyqyg4+brSHYYZCvvik\nY7gStCsFYrjNP25jCjgk0riGqxXAIyTUq4XpzUUcLeZ8w+lT5KtytGoxeWfKim7/AC2JAxSv\nmMZB/CgYUZPQ9qBgMSKccGpvKDRr8/zelNVRk5pw4Hy8U9QHrhVJH0pu8GTA6UkbFpACOPWp\ngF5KjoaWoCRusOQOaVW82QnocUx4/lDbuach2qcdTVDJl+VfmOc9KVenAxUWclR/dGalVSyZ\nzgnkVIhVztPGPelWXJ24+XtSRt8pUnOKGj3DIOPamBLnavvSNIWP9aYpHc+1LH8o2nn0pgL5\nflkMRknrU6ru+cjgU1ixwCMUsknlx/KM565oAduHVTzS+ZlCW+9SDChcAbqcdrZHV6kLCp+8\nwTgUq/K2ByaRYT0Y9KcmBkn71ADocqx9aVITtYsfpTedx5xx1p6jBG47vftQIartGRk4Wp48\nOOT3qJoxkA+uaXcFzigol+7KQQOnBp8cnbHSoAC/salRgsfzH5vShkkmdzAnFNk+YkL0pksn\n7vdjPsKI2ZYV3rhjSGhOrAt1FOaYR5yuQe1CqW5YhTT5FGcE8Hj8aAGbuPbuKmtzmP0NRbM4\nBHI4PvUit6DmgY/cD14IFPik8yPHTmovvKcnnNK2NoAOAOaBFoRlgAePeo1x5hz0HSmRsyoQ\nedxzUm4HIPFAWE3L680/PzdeD2pgxuwRkHija28J0ApDJtpXKnmlXduI/h9aTiMkbsn3pecD\nJyKYxEAAOW3GnMpK7sZFII+pp244wOlKwhu4sp4/OnMxBG0Ypo+Rsn5hTl/1mf0pjAqT34ph\n6nHAqR2DZApqoduetAhF/eLu9DUvO32qPtjoKWNt2eOO9IBfvYJp31ODSbjxtFJ5bNJuZjim\nA/duUAilwVXnpSRlVjJNC525PWgYi4yfSnZ3rwaRWVxkEYoX7u0ClYB23HPeljjdpOvGKYM8\nDNP3EEgcetADFb5iGHNSfeYEjGKQndkk5NOVugpdRiMCvI6Ubh8uQTSspDccijlpBxVASKwL\nHbxjrzSSMQwBHWo3t9zZzjHpTixIIpagPj+XtyeKeuVBOKiwV2npUnLZBbqOKLCEYliABS8b\nPlNDfKoA5NMO5VyD36UDHRr3pyvtOWGaZ8zDHvTm3IpzzijYRIVG3rSZ29eD6U1P3qjPGKkB\nBIJGaYxi5JxjA96lPy8DpTeSTk4FKo7k8UE3Gsq/ePJpW27cLwe1N4Zs0L97caCkHVeuGFJ9\n4ZLZNNuIy8L7OG6gVDCVaQKwIdeopjLe5vQUUbl9BRRYQsrjyyO9RgbVXJ4qCS4SK6SN/vnt\nUs7DdjpzWROpKpVWJxxilZuAhPB5pvnDn5cjFN3BgCWwKCh00YkQEnJphZVUBQTT1k+UknJ6\nCm7jjPpSAUBkYBuh7VGS24jOAPWpTL8o7moZo3YkjnvgUCBZCcnPFKJNxAzzTdv7vGMe1AXc\nwYcUAgZjCpyvNKsnmRqwyO1NkmEvfpQoK4A+uKBjmbyW579aNwGWUZNNmkbZgpkilUjjHU1T\nAkEx2r2PekbdypOc80vl+tJJgLyefekA1m+UgjgelMjfqcfhTtw2DHJPek27sMG/KkA7Ydyt\nnANKGK52gde9MeYDPGSBUab+GPAPQUCJnI3AZoYBc88Uxvl75NGx5ATnjFAwVlOBUshKjoBU\nWxfLC/xVI2eh5GKYDVwxznJo8wfdBwaWMLy44pskaKpxyxoEAkMkhVehpsknKjHI4pixsmCP\nzp+HLBuCKBhuO0/XFRr8x/pViXC84z3qGNdxLE4HWlbUCZXXaOcdqbG27IJqKPEyknKjtSj9\n22aYC+X82TxzSsOwajk9+M0pHykEfSkBHhl43YpFVgSfXvT2+7QufujoaAGrhT8x+tG0DcCe\nSaZJ8rn6YpVZsfNzjpigB7ZVcDioAe2M1K26TGKBARyePWgADL0AwaaJCud3NLt29eKSMDcf\nWgCPaVbPShlPapWXc2BzSMcCgBu7LADlhSMTz3pFLbicU1ZAued1JgIdzc557Uz7rEt+FSjb\nuB9qR2KqTTAazBhgChctj0Hak8zcQG4an7wiFuvagBjtvyAMH1pkOY87+acm45GMcU5lzHj0\nFADZpAq5xwarrIsi8DIp8m6RAvTFRSRmDhRwakByL/tfhSbctg9aM7Yxx81JyzA45pAJIwVg\nDzRs8zJI4pWO1jng1JagEM2dxz0oEVVDvIQOF6U/CR8Dr3qRpFM2xeB3NQSYjZh1HrVWArXB\nLMApyM0qr8xQntT1ZYwXxkVWXczE59+aQwuIfmz2xVAKrNjnPerlxN8uG+VumKijlMa9BluK\nQFeUBVIHWqMsZ67avupmbjgjrULkKxBPtTAw9QtxNG231r5v+O3w78u4fWkBEEh+dQMAcdRX\n05NF2XpnJrA8RaNb65Ym1uIhLB/cJ79M/hmnHcVj8+r6MwAiHkDgqawLiYNIVxsIP516f8Rv\nA9z4V8S3MMofygxKuwxu59O3avNr2PZMz7c4bDY7GuqBzSRTlhG5mYbSRVBoiMFcHmtGWQ7d\nx7nvVHzPmdc4A5zWpm0QXJXA67yewqCTLBtpPTvVhiJVbblSDnPr9KhcARlicbuDVEFZd8a5\nB5ztqaRdm1fvFlzxTGbbMgCgikhZoZ3OOMYFMCu5AbJz9KgkkeGNlxnnPPpV2SNGiyx+bPFV\nZFEzcdutMLkRGdwPJC7yD0oO8xrIBt38laXaGl8zO0DjHrUUkhl+fd0OB7UwJPsqMqvG5Lt1\nT0qKT/RVAJJLDGfSpbO6+y3EZ257E4ouGDM0oIwR0oArQ28qq5DEkc+tOMj7SwxuJ+8fSrtn\nIlvamNmBY5bPtWfcSBo/LToozzSBjJI5JSQwAXtSPCY885D8k0NI6qeCrqu4fSmyR/KGZ8HH\nOPWqAjXCoSWJXp9Kas3l/L5ny+opy/LIRuwcenFIqx+Y2fvYyDjiiwajo1jjVmOZDTZpECoU\n+Zj6UnCttB3g9Sv8qf5cSruUHdnGCKA8gY+WxGGY46jpTW2RqrM+e/0oWR13A9P5UyTjAPIb\n+dICVWbaCX3Y+6tRfu13Oz7ZP7tNiZm2krkjtU2wGM7lwT3qgDcrKrgE9uaayryS+O2MU/cu\nxc8EdKazB5FLKQM4xQBHwq7l+97VFM7KwwSZM42n071Yk+WQqCMMMHFRYWZxG0mMDI460hEj\nMQh2rlQeTUEjZJ6ke/ansMKdrE54NRSsWXKrkj5QO9O7GPyjqNy/Q0xpDIfmXZjgccCnMWWN\ncLnnk0sgZmXcQuKYMg53MynI71IG+82PakkyylRxzk8UFsDI/iPSkIIgGUnoc8+1IpznJwnb\nFOb5V3KcnNM8otIcnbxkimFg3eYgB4ApN38Sr8q4GP60qlHyobGelNRihOM7fumkJBKxypTG\nR19xRDmKTIJz157CmSN5SqAPnB6Y7U+HEkjHpk0XKLFxI8hBySo71BOViGFJVt2Sfap1kdcx\nHBVqimbbuLDdj+GmIZks7Mx4AwKYIQMfN8uKUMf4sY9TSbt0mF6e4oENWM53BfunrT1D+YT/\nABe9R5ZeBkLnqad5L7gfM3rmqGGw7XJG7nvSKBt+fMZPQ05ciRlOdoPSnysVUqw69M0ihu8E\nL83PSnSRtxnr/Oo12t04YUnmtjcSR2zQIVyVb+8ophQN8/OCe3Y05JGzyu4fzpAQrZccHooN\nMAVvKfDdO+D1pqyBmZgOM4x3oUhpMt0HaoWBWQvjHPSkCHLlmHy4OcU9mXdychetMWTO5gOe\nxNP3DbtH32FMBVjHPIx1z2psf3dw+bngU/BjYhugXjHekkVVKsGzkYOKBDfmDkbvc08NIu1V\nOR1wOKbt+Uruwq85xS53crzkfnSEIWbzGycVE29tuMrz0qRl3MFHX9KSSYDcGyB03YpDEclX\nxjaPWl2lpPkAAA/Chv8AVhQQ/embijAPx7CgBTnBYsAOgXtT/MWJgHBC7eqjIpNq7fnxxzk+\nlG6GaQAMxBHUjAoDUBhVGAAT2pI1Ys43fL70bdsgGCUFNZn+7jCVQMU7myxO5OgqNWEbeW/A\nxwBTstjbu4HQU11EI3Yye9ACocqxJIpd2JMOSwK8D0oyNoYnk8gU/axViB8+OKAB4wuBnk9h\nUbKi5POP50jOUUEj950NKcI4GMDGcUhCkorB4+GUZxTvNO5WQfL1waj2kjpgnvTtyqwUsQuO\nTQMdwykhsPTHbcQOgzzQqlTlOUHVqXCso+YnvQA5srkhgUpFyiYXBLHJpEHqpGe1OGOTt4HF\nBXQadrqV9DT1kO44TOOPamKfurjgnkUTKOQvykH7wNBI2UFl4XYO9CqUjwBv56+lOaRm2qR9\nTRCxVnVRzjvSATjgY3DNITmRn2fd6e1KfuKCcYOelIZHZjheaBC7tw6hWYfjTdzrhQ3y46Gl\nZcyq7fdHpTmYxsxIJU0CGttERCnJpGk+UHaVwMZpP4QQcMDzTslpuRlTTKCRtqoF4yc03lpd\np6Z4z0p3nLJu2/eXgA0NIWUKQMdSaYhHLopIyFzgilfb8gXhe9OaQNHsPT0FIdrbccjPakAm\nUUkFScUfP5abePmztNI2GZ+cUuQSGc/LjrRqAjSBJGJ4/ClVtqkqcbRmk3btoP8Aq2OAabtZ\nQ3PCtgn1o1AUytIvzd+cYpGZh9wcdvanFmbnGU9aau1ZSrA4I7dzTGLGzfKW4YnANLu2huc/\nN2poy2RxkDpSR4/u8d6GMcjIm58Z3cU3ersWxkdCBTW/iWNgT3p8eOMsCfapED/vFUFcLmlV\niuDgBRxnNK3yqrjpnA9qjEnXI3EjrRYAdyMHGUB+9igyHaQW98VIuAo3cjHSonUBdzdAcimI\neip9xgc4zmlGCoVufamNIWjLr06+9Pj3qscjAZ6igYLu3EEkAHpRIyySbh09BSnO4gnJY5yK\nZGd8bsyjeDwBQgFZmVg23dnimKJPnUEZ64PpRyzsQCo6DNDFiwUNk9zSAFDhlx1xn8Kd50aw\nsQSxJppZm2qQdmefelbCxtuIAzwBQICZGVTn6AUhLAle3UqKcrOVG4UgkaNioGDjOaoAj2nn\n7x75qNS0mcJtUn8RTwvzBf73enEljjgbf4hQA1WwxU/Me1KvyqyAZY/nSxyBmY8Aim8DIcE5\nHXvQMk2sqhQu2Q9z1+lMjjbJaNflHBOe9Kvy46jA4akAKwuCcnOeKACNfLyuwl85J7UeWGww\n4/3TxQztHswPvcFjSL80h2rtCnlaQCYYs74z2p2SygHOKQMyqR91c5FCMd24+vSmIVYUWQkt\nvYDipFUtuwQGbnbTMBcgnHPSlMZ3Lg/KeOOtICPakbfOdrep/lSyKY1H+1/FnpSyKrOBu3AH\nv605WDTOC27C9MUgImhaRg7MAqjt3qSMBpy+eCMioxEWUEMSv5Zpy5XIYYwPyqh2Ht5cU2dr\nYfoB2NG0qzI23IGQ1M525bJbghu1O2FtwZ9uRmgBCvmKWAxgZIoLErGxOCMHinMAqAA5JH4G\nmKAY93QdKAFLEs0gwTmnYHllgckjkZ6U0O0QGI1we9IsYb5x8rdxSAbuLbO47VI/TrzSKDH8\nxIIpRGHbeTgZ6UhEe88Hq3rT1U/KSR83B9qduXcVGAq0xm3KNp2rnr709QF2KJCm8DHGKOOM\nHvjbQ5ZsSYCgEA+9IqqsjMq/OfXpRqPzADzFIB2896eZBFk4+UL+VM3DhiuVXrSv/EPvK3U+\nopgMkuA209sdfWnc78luKVlRlyMYUYAPakSMdyT6ZqQHoNysGGQexpkO/lQctSEN5LeZ1zji\nnRyFY8jgdM+lIB8hZW/2QOp6URzbQm9cIT0FIzNsznK9KjfKqA0ZUDnb3pgTtsYsd2Pm60nL\nTKqjn3psUfmxkH92etTcRqMjGBgVQMaGVGyRle1M3FsqDhuopN24Mir0ajhjxxil1EOjkMik\nngqKdtH8Y3DbmmsjCTOMLS+YWY8cUDG7R0b7oPAp7KV9MYyBRk+YQcc+lJvGdjfM3emDE8xm\nUMoLMDgj2qVmRs7XDserAfpUanbJJFuxtGR70942jXIGD7CkBGxZVViMgnHHanAlsj7q9N1K\nd20DfwR0pVhbIy3ygdRQG5GVVQN2Tg8Falxu+U8Drmk3fLlgM+lLIrRKOchv0pgJGyKxdBt5\n4zTGYD5wP3hNKrRs/wAxwQOacsiLwrAv1GaXUQjK6thDuXqR6U+Rg0gOMJjDKe9M3bvm+6e9\nNkPmAc5GeQKYFjaVO0bWTH6VH80OCMgdVpOVVQAQmcc08swjYAZGevpQA395tUMeCd2KdGM7\nn28enr70hBZd6kE+nrR87cfdH92gB6RrI3z5T0xQuxUZSN3PGOtNj3K2A4JP8B7U1PlZjnJH\nXHrQBKuWjaLaDjkU6Nt38OxiMGoopDsG4ENnIPc0NuZX5Iz/ABUASPGY8qTuHXiljbd0+Q5z\nuNRqzyMAFKqB/F3pzB22rtBPfFJgIWMjdjg5DetLuMeTICN340BSyfKuAp5NNjkO4yn6ZpAS\nrIDh2JI4AFS/efLfdFQK3zYUKeTk9qlWR1Urt596YCrtycjJbgU1VLqB3B+7S+YzsDt2gD8K\nRZPmBwcN1FAxwkWOTB4B49s0oZ9+S3loOCajWQcKeg5pV3rjzPlaQdDz9KQie3lEMxY/OnQV\nKsXnK2euc/8A1qijVZFiK8MPvL3q7bxoGbYzMzr0xQIv6Pai4nEAHmMTgccYr1rwz4eNjGqQ\n/LuXkVz3gLw+sax3MqsrA8Ljk16vptqI0VUUKzHk0mM09K09VVAcB8D5v6V01vblMKDx1NUd\nMtgIwvoetbFvGdpB5PY1kzUntYDIpUdBzU0cfknPXNS2a+XGc9amWLpkYFSWh8K46jJxU6ru\nZVA5pYV8sHOKkhXMm7tSGSBvLYBhkVN5fl/MpFMDK3U8+tSqm5CAaBEseG6tT2ULz1HaokVo\n9vHbmp42zJkjigY7cQRkUqqcnaQfehss2etKpCcjikihFRUP3smnKw6bck0IoZgOx71KcI3t\nTRILx2FOwWHPBNNXDLjOadkdjzQIftH3aasQC4Bp0cgGd1Idu4YPNBSFEiq23HIp3bJOKUAb\n8kZNHDLgjBpDGjeG6/LS5+YAHBpBnBGaVFVjmmwHBl5yeaFl3rgnNIuxVJPJoVC2Sq5oAerB\nc8VIsgaPHOajj3r8xXI9O9SRsrtjG2pYxG+uacoCgY+9ScK2COakBXI4waAuNDFgcN846ClV\nWXDNwcc00IAzHNOXMke0mgBTgkMp4xSxseg60irtwAOBTzjbxwaAEyeTSrjkD5SaGxGufvZN\nL159aQh3+qj4NM3RnnkGnKvzYzTjtWTjp60APAxH6ilhYbsg0hkODgZFN2huUODTAlWQ5IIp\nGdNuCetCsVUbuaYzA9BxSHYcqDdwcU94fmqOOQ/xinsxbBHSncQK69MbT61JvKsCwyKZwRtI\nyaf/AA57elIBNo6+tOEecYpijzBgUqg7Tg+1AEihjkqOKduC7S3J9MVAjOOAc1KHbcPUCkBZ\n2hxvBzmoNvlsCDzmmbX3ZU4HepNw6kc0wHsDvy3SnyRhiSOBio42DttLce9S8xx4DZFSNDIg\nWABPSrC4k6HB9KqsndTtqaSE+WGXOfUU+hQqs0ZJPNTRsGUnbUR7HrxToZDtbcMelAEqEOMg\nYapM7kYH5mqvGxU9akhcLuOTkmgBFVoG3dB3p8p+dSoyTSOrcZPy+9SccbTQBKVTyeeGqLy2\nZQAc0Mw3bW/OnMCvQ8UgFkyu0YzQZB90DDU8MPLVs801lLPnqKYCKw8v9519amjZWXBGaiSR\ndzJImBSxSfvCAOO1AEqr3Bp6t5ZAweTUaqfTFOkYqoNMCwcZ9Ka0Am+deMUqtujyRzSWznzC\np6UgHRtltr0+OQM5GDimsFbOeKk2p5Yweal7gE0eeY+KVTIyqWHNPMZxuU54oVtvBI5pjJNq\nSNyefWkEkicHketIMNkDg0k0jx4ULupDJNpbkGm/N6ng1JDIrAAjHrSyEKpwM0wsSZV+hxQr\nndgmm2+GU56LyaJANwPTd0oAWTO3d1FIvZmOfanxoWyo5pWjCJkc0AErbsEcChZP4epoU+Zj\nC5FIT82VHSgCZZGZQMbcdal3/JuX8qhWbdHgjj2pqyfLhRtPapGWWy6cDmlTHCnj3qFC/OTz\nUo2qACc96YD1zk7eeefpUgb39qjjwmSDwetSKqbwwPJ7UhBJiPb25oRc7sHNIf3jYbop4pwU\nLyvRqaAb9zbu59hUyxmSMgfKwqvnLZznFSxzM2RjBpjRIrsY/m5PSpo/myD0qJWHA79qeyyK\ncLxnrSYh38YyKdGpLbzgGmbcnmnQgBuc7aknqSNlm9qXaGAbtTVYsGwMU9f3i9gKBiMwXO7k\nHpTCkpkQghYscipJEwcd8U1G+YA/jQOw5G8tmzznpT43XAyMmlRQc9xSoo3ZHWgY8QspLHhe\n1N46kZ561JIzSMF6KBTFU+Wc8Y7UCH7lYAKMmiTLfN2HWod67QBwc1ISDHtHXP50ASSMsiqQ\nMUirtXJzig9VwMClXdnaWwCaBiqp3BjwKRmMZOOT3pPMAbBbIpdp3fL0pMB6LubcfyoU72xw\nOaWM7ZM/epSwOcJgmmIcsnr0zilbDE+9NIHHFPjyFz60D1CMqPlz89Kr7Tls5phVo+SM5NOV\nMHIwR71JQ8sj5OOaep+UdxUfRW4oi4UFhgdOKCSTdt6nj9aer/u+elV9iecADyeanGOe61Vx\nij5RuB/ChchdxxmmttZcA4NJHnvyKBjlyqkMOaUsVwc/hQCd2SeKRm3YI4FAh/DYI5HoTzQW\nPOOBim+WhOaGYcDOOaTGSKOOtKCSuMVFj95ntT1kYbTjjODTECqMHJp6Esx9AKAp3HpjNNbP\nToKBiJGFUjjJNOU7sjGDTtq4OGBPpSdDgnFBIKDn6U4k8nqKReD7UH17UmNCKhbkCneWy4Pf\n0oLFehwKFj3ZZjnHShi1HDIPPFKuRkjmhWXdyOtN3eWxJ4FMepJuLckYPagZIz3pi/NhjnFK\nrDcQSetAyTmRaTH7wDsKQZV8Hp7Uo9MUDsLuEeabyzHA4oyWODgHtSqfnwaAFZtmPWnIPMzl\nsE0zaWk9qkVV53cCpYgjBVcEc+1OaTHyimox3ZzxUi8EEAMfemAyUlXAPpSxsGHHIp3UHcKa\nsYVcr19KY2KoG4jtSMw9OaUq20nGTTcbl5xmpEPUru+bp3qKSFQxYdWOak4HvRtJ70wI9poq\nbZ70UwObubt7rVUONsCDG71rY2FkDAfLUVhpqt/rPyq8zbU2qOnFZgR7iq4x1qFV8zjrg8ip\npGOBnimRxjdkdz1oAkYr2wPalVRtx60zEUeectUiyKq570wDaI19c0qsYST1pu5Wbk0xc7iO\nopAPb503HvTGX93kHFJ5i85HI7UxV4zj5fSmMFVQQQPepY2DZLLj0pFbowAA6YqRV3KSBz6U\nwsMkUPkgGmsHCcYwOuOtSqR05qLlM7u/pSEOjkEjc9cUjbTncPpQ2yNeo3Gkdg2MjgCkAkTD\nZtxj3pBgZ3HGO1KsqyZwCoFQ7TI2RzTsBLIA6BgPrSR4XJJ57U9oy0J7e1CxgAFqQhhXa27N\nOTOQKSRTIvy+tSjcuCOmKQxCqr160Mz+X2IpsgLAZPOaVcjIzgUxAoQxYxjnmoVXAJPPNOaR\nt2COKNqodx5PakMashbIz8tPViykdBRtDLu4yab5ZXIzTGSMw2gevemGPC+3rSN82FHAAprO\nVjx1ouIlaNWjwOKaFXby+DUYkHYmllOcMRx0zQA5ADkA8imm4KttK5xT/lSEsOaYF3D5hzQA\nisRkHjvSby+QAcCpMpt6VGrCNc5zu4xTsAMqqAx70M3QAc0qlV+8e2aYJFfBAOM0gBSynjk5\nqVWZo23HntUbSCPBA6mmyuc9cdxSAjKusu5249PSpl+TJJGaax3e5NIdu3nrQAvOck8Uz+Eh\neuaNwxgmkzxlTye1ACsQvyscVGsbeWeMA0KTn95z9KfuJ4B+U0AJtwpwe3Wo45t33+MdqcwP\nTHFNaMj5u1LqA5sNhuopvCtz0pzMZMADAqJYys2GPHWmBMvGCabI3zHDZ4qZnVox2qsylm44\n+tSAmG8s/wB70psoZoc4+apJJFU7iO2KYpZlJHNICGNmcjdwan+Xfwcmq+87ju4p6OOcHB/n\nQIGh84tuO0U6PZbrlai3B+MmmtkOOMp6VQxPLzNuIwOvFKxJJB+6aknxgbOlQyL3zg0rAQyR\n8Y6D0pFgIViTj0p+1pMkDkUnmeYDnr6UAZtxHubccsRTY5FYfToKu7duT1FQy2oEe9aAK8uU\nbcBjPXFV5V8zJ21a5GN30qGZXTII4NAGfdIOqnHrVKaMKp7itK4h3ciq7Qh4ytMGeNfGTwQ/\niLR5prcD7Xbxs3Tc0i44Ue+cV8dappstq0kboVb+JTwciv0VvLUbjnJJGCPWvlv43fDEaRdP\nqVtGy2MuVLKMhG9D9a1hIxkj5xmx1f5R2zVXjayr8uf4sVtatZhEaNxypx05rFuFcqEUcdxX\nQmZOLsUZG/eHAOSePSo7hdyjauecfjVht0aPnkjoPSqoYPIuxunJq1sZ2BlVSVYYOOtVGj3c\nlstViZvlK55LctTFwqvnkj+IU0A3a8Y2kZA5qFYx+8CtlcbqmjkCttKnJGdx/lULJuiOG8sE\n8e3tViGMqybHAwuOTUU0aK3A5PRanLDyztbGOPrVdy6ttI2ZGfXFAAEO7AHI61XaQKpRhg59\nKmZ2jC87vRqiDlnOQTnngUEiiNVU44I5qrG21TvGRnn6U92LsoBGc8807/lttUgAnmgBq/Mu\n0tu4+Vvb0pAqlgrZHtTVEayFW+Vg3FLIzeeGJwc8VSYxZm8xSGXYAce9V1Hk7hywJ6mrXmvu\nYSHcTxwKi8wcF8dcHFO4yEYX5920A809f3qb3OU9u9DKNsgZfkbpimjcVVVGB3FIQGN1UbeR\nnOM80xmY9RtAOR3Jp/mbWB6/TvSrEZ1YtkL1BFSIjDbVDAfMfSlMYaHmU8n7tKv3fvAnFMkY\npIuwb/emMk3KqBR83oaPMbIyMsppqMPNwRhs0IzecoJxu7+9FwIo5DDuVhu3Hikj3NIdoBPa\npJJRHnK52tTFUKzEt8u7d9aYhysF5wdwPI7CmLIV3kD5+uakB8xmZeF9AKjcGVfvAqO2f50D\nEZ/MXPty1IzFWUEZGOvrUk0yRsu7liMYFRMGDgHk/wAqAEky0XDfealmw21Vbp6UrIMFs4/2\naZ5bLGGBAYmmHoG/y1KAbhRNujVRtzzyaVMKpJOSOgqRPnQEnd3+lILX2ISvl8EjPWmSMyxq\n397nmpSyL99SfQ+9P5kUDaCFGM0C9CrHIdvmN8wJxUv3ejZHYikjk2hkYjHpimx/eUk4SmA5\nmKSZzll6UCZmyW4JPenFkaQlHUHqAahkYyLgjO7gfWiwA+2KUBk3fypAxYsFbc3XApY32rtH\nB/iLetAZN4OMZGDtpgK42IzhdwBxTZFZsAttLdMdKUkxs4GQOtLtYYDHcpXPHT6fWgaGMwaQ\nAFlZR0x3qSTe2M5wvemHbuDD5QRjmoizLJs8z5yaBkm3Ycld+e9Iu5iwC8Y+7SNhSGYMMcEV\nJLuLDptxSF1GecjKVAK4GSKPMXcnduy0vkho2P3R6ikYAIvyZ2859aYhG28+rHrSSAs4GOak\nkkWRg2z7wxUSyOY+Fz25pjERkUkYLp2PvSL8uef3nbjpUit5cY3HknnNDKpzIQT2GKQdBrAj\nGW47kUKqlQUTKsOaXDNtG3AHO6owpJOCUx3HQ0xDzhW2A7m9vSlRSuWLrxwBnmkXKr6Nng0T\nEsuD+OBSCwpw7Ltyv1qNsjacBhnv2qRh5jJ/dUfdqNSm35lwqnpSDQHkP2jaE+al5bOAc/xZ\np7Lv5Q89d3pTV3bT1+vrRqBGF3TBj2GKc0nzdPwo27gBuwTTWAba2MMrYpgPWYEFQeOnIpJM\n7tm/a2Pl9CaRstlWwFz2oXH3ME7eaBCRyC4h2yfKyjPA5pBHt2MDlW7etPViis6rvGefamsu\nw7hx3FFxi7UXbn5nbp7Use7adr5KtyCaY5C4GPlI60+P5cbACP8APemAjeZ5xJAIxkgihmMi\nngZHIprMI2TcGbcckLzj8aVlHzA/73HWgQFf3ihOp9+lJHMfOKldy4yXA705dqIjAfOf5UFg\n33D0NAB/rAQmQRzyOtKVKx7y4jbt6/SkeRkYhRyw60k38G/n3NBQqz9FLbR9KjjYxqSMsN3J\npZMbhuGaagCq2Mjd29KXUCUsp2rgls9e9Nm24CgE9yfSkTORgZkUZGaa4DMquSCeooAduLcY\nApS7BixHQUxvmwo7dMd6fuC/vGPbGPSmIJmj3nGTx93tSFtq5xh87TTYywWXcM7hkU5drqcZ\nB77qQgaPcNo+VetOjk2g8b1xjn1prbpFG09KBlPu8+tA0IvzRu/UdD9acQfkxxxzSTL5aj5W\n2noccZptuHkkO5vu/lSAXzFEhHlkHP3hSkkMdwGD1ApJN2GZDn0FG7dxtOSOWPSmA+PCqCw5\nz39KYSi3DIPlQDdSM4ZTyTgYpq7WVdrcns3Wi4iRexU4Y85PpUO75WyeS3BqeRsRgleSdvFR\nBR5hiJwV5oAWTCuuVwMde1NDE5wOP6VIv75CrcL/AHjTQGCgEcr6dxRqA75ioOdiHnf/AEpO\nGGWGDTFjXDDJbnIPanRAv24pjA4aPIYK+e/enNu3Ko+UdzTNu5iG4xSzNtdeCT60AJlGZ3Vc\nL90e59aTaI4VK4DdPepdwXlV3mmsMONwAHb1pCG+ZKmBgHBzigNtP7zDL7CnNjGCcP2qJVLN\nt6oP50gGrGdu/oxPHpUjbmx5nA6UMrOMA5C9MUKy8ldzHoWI4pgN3bHCsM46rUkWWBDLtb+G\nmiQl/l4GPSht0nIxkdTSELtKtvIJ47GhgVBcDqO3akZz5fyZwaXaVy3fHSmMImPzfNuGf19K\nJPnHyttPfjge1NfbgMW2huCBSszNGUU89NtMYeY0UajeAM96csaM0j5DHbjbUaw7tqthmz0a\nlMPzMykbvbtS6ghPLfyd5bIC4p7Z8lc9TwGFIIjt2b+O/vSttYnZxjrnpmgQyRW8xOcqCBgU\n9WEQyBhsnNIpzIGX736Uu3aF39M9TTAi2/uSuMHdu/Cl3BmG1izHotSrt8xgjZDcEsKZkAsF\nXa6jqPagB3EnJbauMhaiTAYYbB6j6VI8waPIwDjpihY/MYZ5CnigBI2MbEE726jPahlduDxu\nOeKa20MxI+Unkin52RnBwD90Uhgy5ZQT7H2pWVQpHUZqPcWUfNuPrQNqsrkfOKYdRIy3O3kZ\n/iqeNdr/ADcbuKY21jluBjOaGZ2VSpyMcUgAKu91VMhf4vWlj2CNi3zHr70xSFVcHAzkgUiu\nJGYIMjPXtQIkWQKclTihnGxmUZb3o25XBbjvUbI20uCAoHGaYxxZ1jXK5J/hoZT94Yb1NKqk\nYWRi+3pUbearbmHU8qKQEjzKfvH6betCYRQMjbn+KnNGVVRgLuPNMdlb+HCo2eRmjUAcBpPm\n+Vc5G00rYZjtOeKRmEjFdu0nkelMRWbKqpxj7wpgOaN41CnBJ5FKdsnGcMoycUgJcAHJ55zT\n0ZFBI9aQhnlsrDeuQRkMO1ObPl/7Pf607a3z/NnI6U1lLP5bNhVxwp5piBmVY1DttT+96mjJ\nToNzH9BTcGRWU8MDxnv707ziwURrjB5FAxeF2g9GGTjpSbWXhfuD86FPlqV3bhng0m4FlBJA\n9aQBEqPztz2560LGFBXJ4yQSafGrRzFeqdh3pWxIpI4bOKAFRvlx14/ipqhCp29AMml4VtzD\nPFM5CDZw+d34VIx8ce4bg/y4zRyyg9E96X5GkJwcMKaqs0bK6/IDwaYD9pVQWO4Z4GaWQGQ5\n6iojMi4B69hUiQkRjyzlycnJoAU4jdXwSRyVFLcMrAEDAPp2pykqwOcHHH1pONu7bxnn60AR\nbgqBicknGKd97hiE9u9R+ZhcNzg5qx8sihgm5m/ioEN+Xkg5FIqo3ABBxwasRqilC6/KeRUU\nkxdlZE+SqAjjDKdxbc3en3Bkl3Mr7O5X1p0g4AHX2pNp2qXXO7rzSHcaqk4JHO2nwjyUADkM\n3J4pjAybkxtXoKRceWTg/JxSH0HBlbHQAcimtK5YfPk5p8bJtUSDHHGKjWRfMJVeFODmmIVo\n3llwQAePu04OFZk2c9+O1OEhCblGHY4zSNnduPOO/rQAjPGV5HbOR0pfuW6uONw4prbfLXby\nM8ikIDRxxgdOaAFbc21XYlcZNLj5SxBwe1MVjhjnJB49qerOAc4LHn8KYC+UqQ/KxI60vIAk\nPA7r1qMHbhQCV+8T/ShuSSCVY9D/AEoAniYt+7A+fIao5H6oBtOeeKdk7YzHgDOCKVoyu/5h\nnP3cUgE3tu3HlVGBQqmZsIfkP86NwZQVXHYEU4bVbbyM/wAS0xDmAbcu7AX19aZHGWxl8DGT\n9aMsoVGO4Hk8Uu75CNoJz1pAIGy4ySMdWqRY0VSXORuwQBUbkyxKQNq5qaRVcCN22leSw70x\njGiEayMvDA5CU5t0kaz5PTBqOMFmO35CD901ITlMbsnP3R0oAcv7tShbPfJpu4DBzhs9abIE\njdVXLMeuTUjyxmMBh+nSpYhsqnarow3g446VIrMVDL1XgVHHGkiKi8/NnPtV22URofk+TPXv\nRcCWzs3m2hF5I5auu8L+H2uJF3p8qjg4qnodqbq32xx4716p4b0nZaxqE25H4mkyjZ8P6Oq2\nqbR8+eDXb6Zpw+XfjNU9N0v7PCjAbPbua37G2/iPPvUMqxZghSFSq9/Wr9kgKcCkitY2Zecn\nqauqFQFVGBnNQ9y0Oij+YDFWBHuAzTV+WLJHPap4YyzDPAqRieWWPtU8QVhxwKfAp8zbjin+\nWI2YD1pdQGxqmckZHtViGIbTnjPTFMjZcYVee9SZ9BzTGNOY29qljJzkDK0iqW5xk1KoBXb0\npACrtbd2p8gDdOKZt8vtT9u9gSKZQv3RtNAVmQ8incZ96NvUjvQA9VVRkCk2hlyBhqaq4Xk1\nJG3GPT1pAIq5ZQRjHrTiqhuOTRu85Tg0oXC4ouAp3bjinqwXGetMXd5gAOB70u3cxyOaQD2H\nPWjarNu6U1Wx95aXd0waAGpt3gMtL5mGwpxS9WwVxRxtzigBV5bOTT/LZicnGDTF5zx0qQMy\nw8c5NAEkihcMTRG4ZvmHPak+8AGXmg4WPHfNAC7Quc9TTtm0cdKa/wA2G70u4vQAuCeA2KVg\nDherCmtuLADrTlXbzSAWTDIAOKBJt7ZFNYNuGKccrx2pAPXBbd2NKdqr6imquFKtwPWl+5g4\nzVDHbSvSkVjjGMU4SANn7x70rfNnBxS6jsKqgrlulHyFcgbTSLhk2n060gYIWVxx2NAhwTO1\ngd31pVVlckjjNEZDR/KMnNHH1NADivBGeaX7oGeRQoVRkmneYGwFXmi4DeVbgYpPvMWwcCnq\nzNgN19aazcnnvSCw9XDdDilwd2aYABnpSbiyHnFHQZMnzcE4pMFevIpowyg9KeJOpI6dqYDl\niUkHOKmRhGQHGR61DjzFzjB7VKj/AMLc0AL95yR90+tETtA2772ab9446Cnt+7GMZpDHtIGI\n+XFIAM9Tyaaqg5yNtPjkzxjiiwDlUJNnJYYpZGwuQMGmsxXov409VSXIbrQA4ZZfUYzS4IwA\nMHrUartG1WxU6uSoDDt2pCFEXnL15o5U7G5NMt5TCuGODmpFIWQycHNMY5WVsgnbimrlVZs8\nVN5a7csv40kLoylSvy0bASriSJSSM96j/wBXJ8qcU75QuUGBQkgf7vUUh3HKTK+Tx7VPCysx\nV+lVI5v3m0j8akkdUznJHqKdxEvluv3G3jNLGo+Yng1CuMqV6GpfLMeT1GaQEmBImOhpPLMK\n5blacq+YmehFOhbauyReCc5oCw5X/d4zg0kbqJMN96h1KNyOnTFMbLNuxg0AWtoYgZ2n1p8j\nMvynB46ioYmDRkMOfWlWUFsMDjpmhlE6HEYGOab5nzACowrRsedwp7sGUcEEmpF1LEaAMecA\nimy52dNxqNctHjvmpeVbr25pjC3n8tS5Hy9KfgNEdvU1F5gb5cYHepo5Ij8gfDUxNjLdmt2Y\nHnilVgyk45qTaevWmx5X8fWkNDN3l4AHFTkbow4/SkkXefWljY8L0pDHcpjHJqRvuggfNTtq\n7dtL0XA4FUISI7o/mFOUMp4Wlk+YADg05SzKQ3FAD9w2crz3NQ7t2dpOynMxUHB4pqEo3PSg\nGJD83Q4IqVQZHyDzSxxqeRwaEQNnsakZIoKsuF5NTN/rOTk45qNZPkLdGHSlgzsJPJ9aCR7M\nI2B28UMxwSO9KsnmJjb0p7fdwOppD6Cou6MfNikGR2psamOMkjNLHkruNAkOXdyd3JpIt3Uj\nNI0m8jAxUpDM/wAp+XHNBfQerbV9+ppsWGPmUiybUJxnnFPZgsYAGDQSEDeYx5xz3p7Z3Z/i\nqKFwJCDmnN80md3SgaFWMSMCetSMu3oPxpm/cxfGB6UgZ2cMeEpAS7juPcgZpjFvMV84B6Ch\nHViwU8+tOC7sMeQKYWGRgtI+RkGrC5dMoKjHEmV6VIrbTnoKQAGMOCVxT/MDc5AzTD0wx+am\nyKO64pgSqMNgHINO2tnk49KjRSihs8U8/MMnpQMFkxlXOfeh/u7j09qcqBl3dRQpVgfQdqBj\nY5DjAHFSpIGXJH4U1lXhlP4UjZ3AngelITH8b92Panr93nimMyspwKdGO+OKBD1B5II/KhVL\nHrxS8rt+XKseaD8uf7tAaiNtUcmhQFHPIpMUu44GTQA/5c7ueKTarMWNNDh2UdBT24X7uQKY\nxN30FPjcbivamFVk5I6U9SPSkMQsN2CDkU/cRGeMmm53dKduO3niqAiW3AYSPnJqXh+XGB2p\nJOVC8g0nzdzkUriY9WAbHahQevWkyNpJGaF3DkdDUiRIFGOeaFYcnpTVyuWpOJF54PtTKH8l\ngcZHaldgx6UyMEMeflpY3wpOM4pgSqx2gZwKG56Dmo4Tu56e1S7lXbnrnPvUgM2sjctn270v\nLHrj8K4vWvF/iLS/Ftraw6F9r0ad1D3keWZQe5HbFd023LAAbRx35oGQ7g3JHPrUilW98daT\nhRjHFK22PO3pQIUAKx5pw2txUQ+8AefWnOuW9qAHrheDSIQJOTgUKhPcZpzRg8E8DnNOwCsO\nOlN2FCG3cGk84Njad38qk+9yRxSAaW+bI6elP2Lu3dBTSh69KRFY4DGgCRFDLnpSKoyQRget\nGNrYwSKaXPAPKk0ALz70VNx70UARx4RQR1pkg3MSOR1pkUhPAGBSxbjkE45qLgRlfMznHPSn\nj92oAANIGG4qF5HelGeuM0xjJ4TJz0NOVMRhcZNSbmZgMUqZGWAzSuwIfKwx746YpJVKqNvf\nrSyMU5Hc9Keqkxk++apAVzCWUkdadkhBjmp9q7fTvzTdvzcDNMQ1FG33oyc8Nj3p4Q7sn5Qf\nSgoNp7g0ANMny4HX1phLMx9Kk2+XGcnpTd6SbSvzLU3Abt+UMy/nTtwZcZoba2fm/OnLCI4D\n3PrQAyZiMKg+9S28e3rxzTioCAdxTGZuOMii4D5f3shIwFFM9cflT1xtBqF9wy2KAJN37v7u\nDSqwjjwck0nmEx571HGOpOTSETNtkUcYqswPJJ+SnNIwXOfwpXBkQbjhaYiCTMjKA3OaXcc4\n6gU6NVD80TfeG2mPUco3YGMVIHDSDHFV1Y9ScU/pyKTAe7A/d9aiZSuMcqOtOMv8O3j1pq4R\nDvbLentSGIrBc4okYyKFJyM1G5PUdDUkUYVhjmgCOZXkjKKdozU0OY4wGO4460swBhI757Um\n7CrxgAUmIagwpDdzxT2QYznmmeZ3xSPIGIwcUxjGywyKcrbVx0pGO1eP0qNd8nOKbAfv9uPe\nmiMtyw4pxbPXgetB3KOTxSAdkdB26U1sBNx5NOddsfH3qh2svLNn2oJF8tGG5s57UQoPmx1x\nSzOGxgYxxRtMfzA8GgoRkPB7U37qhicYqRmyAe1N2iTqOKYC7xJ04qNmUEgnNO2qqk9KrCQN\nIQVP1qQRYRhtAHAolXd16H+KmL9zpSMxZdtMZK2FwBzgU24yyjaPrT42RSo60Pn5wOhqWSVl\n/fQ9OaaspUHH5VK3yLwOcVGq7SC3egA8sOpJ6mmqg8o9KY7mR/lGFBxTpRujwn40DIIW8wk9\nFHFLKwjXrkU9Y124bj2FIyK/AHFAiVceTkc1Tl+bBzg1YgZUXYTVeeEtIQKLjHQt8rDdjNQb\nSJDyMVOiqFyDnHWoWXrzxQJjFmRsKDnnFJdAIpCnOKWGJVy6gVDJ/ERz7UhlRzkgk8VJOrPG\nOaRsOvoaFJVcMcigCm3y9ahkX5sd6tSKuCcVUlUhgQaEMq3UbSdRjFc54m0GDWNLuILhfMhl\nXawz+v1rrJD5kPUVQurfMZzxxxVCPh/4reAX8M3zQohaPdlH2/KQecfXrXlFxCGkLJ1znFfd\nnxI8E2/iTSXtZc+bhjFJ/dbFfGfizQp9FvbiGWB4niO0gjGccZHqK2izKRw0yL5zOGIJOCpq\nGVRG5XZgMMZA6e9azWsaqRngHJz1qheXHdT8oH3fat0czKJhCqUDbmH8XrQiDzcOMADnFTSK\nZlSJBsPB/Co5IzCzKWyOlaEFdn+dyeV/hqBldiARwTk+lTXR8tcdSvJApvmDeq5LEjcP8KYy\nGTE02B8qiocNCpbIZScfSp12xuSy5yM4pHRWjKY2E8+1UBUbLN14HfsRUXzSbijFFqZVWMZb\nhlPHvQWLRMzR4fsRQIhX5towCOv1qLayqTtwwbPFS5Em0/dJ4x6UjebGXJX5emfWgYOyqdxT\nOfzzSFi0Z3p83amyMqxRFUJJPJNDSGEoW5YnpTuBH820H+JjilRW3sRhWX7v070m7fIV6HPX\ntTW3R5y23nrSAfI4VmXGM9Kh3OJPTIxTZPlfc3Iak3ZuELNkCruA6QHaqLwx746U5GEcbqXO\nMdqi3Nub5sZPWpVTbjuOrL3pCGhGK5JwQOB3+tDNgq46Y/WmeexZmLZ9MDoPSlVlyyBe3WkI\nX/WOq7eeu6hVPljJ2kfnQsfluBK+ONwprIGVcNv3dD7Ux9BJGDLznpyaau0nCjI6ladI37sn\nGRnGKInUbyOuOtMkWR9vzD5fYVHJxuULkkdMVJGzFg7YztpFmxhiTk/KDikALGvHG4gDn0ps\nKszZ3Y5yfpSZZXIxkr1IpvmqzNuJHp60DEVDI2VIJ3dPansAcgHBz+VQ7FXOCRUx2tjAyB1p\ngKsZfeRjbjrUWf3KkA9cE+tSIwVsL8ozxQ0ZEb7n6+lIQ5W3ISRxnHNQsZGjDBtqA0savHGq\nMc89aTcPnB49PSgBzsjYYDI6HApnyjeFHy+po2/u0AGWzzinfKrMoB46FhTHcgWLdktjp96n\nSjYqEDcgIOB60rZyW9BSeZuVVK8MOT6UDuOKgszyDdnt6UxtuwhVyR0pm47Qp5APFPWMRqHD\nZOelAxrSHoSQMc59akLjIRF9z6ZpDlt6NgHGc0iu3ybl2jGKAHSESblZflx+tRsw2eW2M7eC\nOtL8qKSzcdqNzFSQgVh0Y00BF5gWEoSSTStNnB7gYp7MvD7SxYYPHNJIyxuvG7A5WkT1Bf3m\nCTjHpSj5VIJ+Y1H3z+tScljgcEfeoDcVgV2+n86j8ssxK/KF5PNO252Z+YjpikULHGx3cE/e\n/pTHYazAsGVRjqSalWQAYY/MRkHtUIIyI2Xax65qVmCoVI4B49aQmR7gVDZ6cFfWmGQjIxhT\nS+esnLDbzgcUqkyESKvzKOaNQEVdvVtw7inBhHgZJGacq5cyPS72HzAABuAtADWTbKAX68gU\nzIX5CuTnOKVx8uc57Z9aGkbhgPu4HNMADKrYwVFP27ct1Pb3pWR1UlhuB5oWPPzMpIxytMEN\nU/Nx8pPPIpJI/N68L1PvUjffIVunQYobCyKetSIZGiMjpnHcVHC0gDOcAKOakZgzEjgZ61G2\n3DjOdw6U7gI3+uGzIU1I2BuOMqvGKareaoK/wj7pqLzFRcZznt70ASbmZWI4GMYpvzrs3HA6\nU5GDMU/jA/CmbWZsu2B2FAxWjGFK/M33RzilkbGUxh+hFDr5Z3K3GOM0qyNgOzAAjHNMBsao\nqEdWHJx6UbVVj5a4GM5oiDQsSCGI/OiRcNvzkvxtWkAvlkDcxyuM8GmyKETdnLHnaacqrkId\nyc42mkDecxVlwCMGgBFjaRiQfunOKH+8T1BpDnayq5B6Ukajy/3jZZRigYrfcyvLds05WZuD\ngdj9aYv+rI6+hoVdjf3mxwM0XELGu1mTOV7H3pBjySFXJzzTtxb0XdwTSKjNiNTgZwWpgOWQ\nquMkEjB4pjKu0nd8xPPNK0h4Rm4HemLgxyAjDh/wxSGP5jKngDPNKZH8shBg5yTjtSSOG9DS\njdGAwOc8baQxGboBuYnp6CnRzBclj82MADvUfmfMwbKjH5Uzd8/zDjGFo1ExXJaIsoKtjBFO\n5a3Qbs04MUTdwxAwV9aG80KoULz1x2oJG/MASAB2qSJEkZe/HXFRfOshRunrSqFjhKhj1yTR\nYCNDJIDGBjaxPNSbjkqwwx/ipgkCxn7xyfxp24KuW5Xt61QwZF25LHb/AHaVVWTK7scdKY0m\nAX6L7UCE4Ei8k+lISHYVlBIK9qOFfyx9dwpfL+XczEOP4aUMdu4rzR1GNbbgnd7HFMk3Y+QZ\nPTmno24OHGABnNNb/VnB388UCJFJTaAd7fTpRLjIyu71pBIRw3BIpiZ+b5uezdqYwH3jkfL2\nPpSqowV/g6lqa7DCgct69qcMJIF5LHjNABJGqMOMHqMUn3lIUHk5pWUhfLB2kHmkZvLO4ZYC\npYxyvuk68dMAVCAsZZ8nINSsqqud3OMio+Ii+0kg9jQhCzSN5iqnTGTTyzbwVTcOvNIrZUAj\nHH3qcV3RrsyGHWn1EMT95kOvGfu0u4HK7sBeTSbt6ncrdaSRcKUbgd8cmmPoPjZZ9rlcDsKY\ncBCqDawOd3en7duNjArimquM7hywxQAiv+86fe4605VixmPc7L/EOmaHVGRU6Ec00fKpCv8A\nI3HHrQMcZEQmTgMR0FPj+ZRu64yKj4aPb3HFNUEZwdvHWgCRvvZH8XH406SQKwwuGHDNTV2q\nMBgcj72e9NXNwrFfocnrikSK23n+836UNvVCFPJ5pFjMyqcbWU80/azbgTjHf2o1AhZuDj06\nikZtmGwSOm2nSOMhQQAv3vc1GrfNlj97o3akMkQoCQqkHqQadHhnZiMrjj61H5myQg8tjBNS\nRuCoBXB6g0xdQUfNkjrx9KY0xQ7UUk5xUrfKgY/KScYpnIY+WxPODSGN8t4cSbeOlOVtqErj\nnrSrFu4ZixB6U8xYyvUDndQIaqbMDq2c0MqtnzOS3p0FLINrj0IzuprMqvwFPHUUxi4KsVA3\nj1HakO8rhV3AUJj5tqtil+b5W3bV9PWgYrbhhVw3GetCsVG7bg9KZ5ZYEIp3Z3dadt3bQ52B\nuc9qYhCWMQVkwBzn1p4ZgoUnaOox2pFQNl9xKZ2+uKZsAbaxO49qBDkG2QqCfm9aRtqjGDkc\n5pR8rE4yR+NFwxkVUiBLHkntSAbIwbHzH5h0FKWDzZI3JjBK8UqsQwBGTjg0ixhcktjPOKYh\nJducA/KBy39KcVXAXPBGdopNyqQqcFunH6UuGwVX5T7dqChBlvvdQcZ9qXYfmB7dAafJncox\nkqOaRm2p+9Qk9qRIgZlUt1YUrbmVdo2E8mnRx7sHpxUbHdJ1xjpQVYkUmZsEdDz6Uzd8x+XP\nP+RTl+4SRlm6bTjBoUmOPaMMoP3j1z3oKB1DFQnyt1pzMFGM5J79jUZ2ScbtslOVdoAYZUc8\nUCGrGm1Sww2eamUjeWBIPYVHHiYMeo7HNPP7sA8r25pAOjUSSEbuO7UskKqhLP8AT3pFZWXC\n8EdTTtvn7VxznqaZJDuOVYpjFTqhjVtzctzx2pJImikO75lHJNKwPmeb9xiMhaAF87zNvqvS\nnOieWZCcjvUPnBxll2t3IpkgRuC3HXr1ouBL9lnaPOAA3PB7VCzYt1TZ0709Zpk48z7o3e+P\nSmcsOMyMRnbigBJFKhWd/m/u1NuDboyc7ulN5DAuME8fNSKqrIQ2ARyGpjH58xUOACowRTV/\neOSo+XrimFxkAdR1qaRvnCgcY5IpARtMrN1wuelOIKzOSfl6YpNyrmPr3BpQx2lj8zddppgH\nk/NtxyB0Hehsqw2rgKuMUgd2bzFGDjvRmQKd+NxGDQGoxtxY8bRnmpPlEbMy7iOhFESpJJhj\nuwMk9qcu1VwRhM9aXUCNoyuC33c52ipfu7iOR3oDB1Yr65DU3b97cQfcd6YD/mWAPswR+dRe\nWd3C5ZuSWNTLI0ykA4YDvUfls2SzbWxjmgSHR5kyq4OOlC/3ycbT0pjSCKPg8dMjvSKw4B6e\nlAxyqJPmcsH7baXJPHVl6nFEjOwRkXZg80N+8cj8QRQIcxMkgBfbH2HvSc+Yw3f8CpVk3SnB\n/h4470ineeVOR1+tIB7SjYG6j7pA6/WlZtoHljd/nrTl2RqTjDMMZ7c0fIwznIQ7T70ARqR5\nivt3HpmjzHjkO1dxPADVNtEh3HqOnsKT5t6kPnnJGOtSBdhhi2bv7q7i3v6Vbs7Oa7YJGm4N\nzx6VBbQs6yLGNu7n2+lel+C/DPl2ytMpLP1f0HpQBueFfDC2lvGzx5LKK9A020EbRkJlvaoN\nH03yYF43KBxXW6XZLHGHMfzUnsUWba1dmXHIxWtaxAHGODRbwbUBUYB61bMfl4Kisi0S21uF\n6ipVXdn5TUqxjap6euamjz6fjSKI442bAPSrbRspHFIsbDqKlGeNx49KRRIvyAEcmhV3cnrT\n1w3Qdu9L5eGzmkFhsMflMe/epF+Y8HFO27RxSxqGAB4PrTGKqtkA06Ndpw3U9MVK0YCjPNJt\nRTx1oKGlSOh/OjzdvWpAq7ck802WNWTgkCkIcqhxuHWnE88cUxFCwr1qXaF+8c0wI8bc96cq\njgg5p6qNpPekVVC4HFJgNT5TkDAqaNRI3HakUZ68CjHl9DyakdhzELx3pPmPLDmnBV25zlu9\nLHuOd3emIazeZxSMgKgq2DmpFHJzzTWQZyBQA5mK9txI60LH27d6VH+XHenNnPWgBjgl+OBT\noxlwKdtwoBIzRtPAAoAfIoXjdzRxjJpFTaMtyaDhlIoAXBx0zSqe2Oab6ZJoZtmAoPNK4xeA\ncZxUu8beBnApm3gkc00b0ctng8YpDHiQNjHBNG4dDSYVl9MVJjfyOKADBk4P3adv8taYpbdj\nOTTvXcMVQBt/dkgdaRG2x/N97NP3BYx3PpTWXdnikJjlPykZHNPJ24Ld+1MWMMMA4Ip6j5QG\n5b3oGCMfM44WneWeTmmK235cd+tSyptIINIOg2OQLnIpdoVtw6HpTiRtUY5zmkkxlfTFIQGX\nHBBJ9qTaB1HPWlOFPrS7jgg0DG/hxT9q4AP3aBjb1ojcFfmXpTGOaMSYwfpS+V5QbnccUn3t\nvanY+Y5PGKooWM5UEHkDmlLDd60ijC0gY7uBx3pEk0cxZT0DU5WP3WOajARhnIBpRt3fMaVw\nJOAdrDCn+KnRvtQnGRTPlPy9T2pWjCx5JpDY/wAwtjAOKBgyAHg0yOZVTAbmnyZaNWA+YGgQ\nm07ju496kjl8s4xlexpE+ZTu60zaf73NAFlyGGCM0BBsz3pqYVPm5NKxVj8vAxzQMsRs/l/M\ncrSKpU5HSoY1aRcFiFp8b9m4oF0J45Sq4Veackis/AC57VAqtHHuDbvpUW7DZzz1oEXJozGu\nT1pI8SKVB6ipNxuId/GcdKhSSPcCQVPtQMntwIzsbp2NSyRu6kqeMZqsjK/3mxmpkYpna+R0\nxQA0SHcCDgVb+8vz8/Sq8ib1wBii3kI4Ztw7UhonXcwOOaOWbBGKapYMQDz2pVjf+JsUwe4/\n15waktWEi+hFRb15BHPrR5fIcce1LqMmdiG5HPrTtw289uaiEpEoBFSSyAMMDIzSAI5N2e1S\nhiFyBmmPs3DFKp2jCnj3oAFyWDBeM0+5jkMqPGvy55NJBJzhuAKtNGGj5Py9aWxPUaPvZpnl\nl5AAcA0qgbWKnjtQMmPr81WUSNG0cgAO4VKo+bBH0NMViqr2J4zUiqu3I9cZoAcsZViH47ip\nZGIwAKRcsdp+buDSb8k8YNJgLuDMAvX3pGb+8DSLGWG4daWeTcyoBg0APjf5fmH0pG+8P7vr\nSspUjI4pFyeMYX3ouBMFCt9aT7rAds0iyKFx1PagsJYvu4IpMBzRkjcKkXI69KbDJ8uOtOUb\nmPagBVzGMjpUysGGcc1HGCi/NyaVWG4jvUjJWO5QoOM015NseMdKiZiJCScAdKfG26PmmA1W\nA+YnP+zU6sBgrkbutRsqR9g3HWlPOG9KAJi3UKKZt3KCRzSK25iR/wDqoEbws3zbhQUOU7lI\nPFLwMDp2JqNF3NmrMcKspDnkcigQeWkjDD4AHalkXfGUBzximxsFHpTQzR5YnPPSgY+GNIY/\nL2nf3NO2llIHQU1ZCq5YcmmlihHPHegBys23OKliZmjOetNWReq8fWnhtxyKYkObjquSKGyV\nz1b+7SibzPlZcGk4VgV4NTqIX+E56UyPc+QfuigZkyG+tLH34wDxSKJF+VCcZPpSeZt6DBqS\nMlGAxzSyKrKSThqAGc4yetPX94vzfKRSKnQt1pzNuzVCY8ADHGaN5VTng+lMRmTBNSK3mntk\n1IiNnO0buBUkbEEKRximzLnk0q/dGDQUKxITAGeaJOgpArKxY9KVk+TrzQALGhwzZ4p3PRTw\naZwvHNSBwMHrRqApX5doPNKqn14prAtkjiljX3oAUt0B4GetPLfNwN1JwQB15pP9XJ8pyKAH\nNk/1pdwXgD86IxsbJ5yKZkluBxQIVvvZHHtUkbfLtYcVFuJ/CnLNu4IpXAkUDHtQx2g46mmc\nlcCmLIfMw3SgB6SDOCKc2dwxwtRRtvkIxx61IrZ6H2phqO6DGO/FSZXcARmowe2cU77ze9Ax\nzQoy529KcEAwQc8Y6UxW+XBNC/KBnpQA4EeYM9qkOZG56dqiC7mJFPwOBmkA0RZ3ZPWpFA2g\nZximHlgAPxpW+VueaaQD+G4HWmbSuTnP1pyrtHpmpAAV561YyOOER5bGKkXPXHFN+tOAJxg1\nDAZ5m6Q8844FKuTweKeMK3zYqMDk+ntQIfk45PFPGXXnjvUYZSuDTWmLNnG3FAEvPrRTfn9B\nRQBGu5/unBB/CkaQluSDn0pm/wCcDpTzCONvWpBDlkG7PShm25J6VE0LMQOlKMrw3IoGKJQ7\nDBwB1p7TBfunGabGoVTleM8Uu0A5xn60CDaHUFuo6UiSKrNzxinMT/Cc01I90m3IJx1oAWJk\nlU+ookk2r8o5701d0cpBUYodhs+TpmmhB5gjTd1NNjmIXJ5GelCNiM5GamWNdwDDAIzSuMbM\nwkXAUgetRww7YyAMDNS/6uRs9cVEWZ354GOaAEaMD73SnQsdmM5NOX/V7Tgim/KuAv50XAC5\nIAXrnvUcjGOTnkHpUhI+8DjtUc3+rBxkii4AXLqNowB1pNzMwUtjNS7v3fTg02SNVwzcigBx\nbZGRjLCotzLweDTZJjG/ympYezFcn3pkkM27sOKdn5QpPFTvjb6eoqFiN429aLD6CceZgdKe\nWWP60yRhjLnHpTNwDDNIB8h+U7R1pBuOC2AKeCMHBHtTW3bTxQMcuCtRyY3dKcCy8gUmzcwO\netIaFVRt5HB60gXacjgU5jtjIAz60hUths8UCFaUqmMc0m35Rk4pp7r1z3pyp8nz/pQA1l6n\ndxTI4AvLHNOjjHPPHvUki+XHuxk5oAiQBm4GKY7FHIU5FOZvQ4NJ5Rds9KbAazFiDg7DTt22\nPkFqdtLfLnikVW5UnikA1mOVHTNOx170mflwfXrSbRG3HemA7YGX5Tmo/Lbs3TtSriNuOBSn\nHJzSAa0in5QOe9LjMZI6Coi/XaM5pwYiPpjPamwHSMF2gnr1qHbuYk9M0sn3RxlqTf8AMQaj\nViCRuqLwKaA+0Ecr3pcb269BStlYQoPfmgY4FVXcBnHNN8/PPrTdx+6OBikyFX1NADWZpGAK\nkUsihe9LuLHA60Ns3cnJoJI35wR0oWMqxY9PSpGAHA6U1iR7jFBRHJgYKjjvTWyV+U9aRnVx\n6EU2PczAZ4oBCInlgFhk1HcszfOp2n0p80u6TCjpQyn1oGRQrJGuS2Qe1LcKsi7R8p7mnN+7\nX72aQfvE5HA70iSLIWHYnWqsmYWwR1NWlbbIewxUNwwfG05agZFJsYEgcDrTNse3g84qWNvJ\nBD85qBl3PkDApgJ/sgZ4yaqTINxIGKuhdoPzYNRRtvZuOcYpDKHByAtMkj8wcVaaNuQeKQwm\nNQTwKBHO6pY+YzcduK8c+L3wzj8U2P2q2Rft8Q4GOW9v5171dMOcjJ9awdQsRPnIw3XNUpCc\nbo/OnxVpD6ffGIKQUJV1xz15zXMXUP7xhn5f4RX1d8bvhCkl1cazYA5dcyqq/QfhXzR4g0x7\ndzGmTgkEEYIx/k11qSOaUTD2kgHftkHbtikZDJHlz05zTyqmNR0emLvTejMCp56VoZFeXb8r\nhdxzg1XmDLJuDDHReKtBj5WCvBP3aZsPIccnoDVJiKkmXm8zOOMbf61DtkbeD83vVmRQCG6E\ncGo5l2r8vHOT71YFTbskUMc+9C7mY7s+gxT5F8qRiyds7fShi0YU+vNICuivtZSMP24phkZm\nChjtA5Hv3qdzuYhnw1Rf6uYvs28Yx60C6jGG7Cqc85FNdSsgHQ9CD/jTsEn5enXFPVXbLKwL\nYxg9qYyFUPRHDDPTsKf5QkjlDjCnvTdoTAXr1NMklZc84GevagBqKNynbu2r0NMVRxuGGY9D\nUzFvLcjqR96q6/vJFG/KAc07sAPyqMLv+bnnpSq2SXA+bH6U/aGUkfL6LTFjLwg7flBwQaLs\nWwgUCD5QG3HGOlRsPJ2hnwzcn2oLGZlHII5xT1XzAEYAt29hTDoMaNZBukIbB+WkA+RpQPl7\nU/ywylVO4L2pEZTuTHOelAg3eSytnGe3ao/LkXcG6k5GKV08ybJbKqPu01txcFTwOozQA51+\nUg8U6PaxO04IFReW0mMnj+lLjy2LAZUUASvGqqynIYjJNRzR7RGy9RgH3pWkcgMZMMen+FIj\nNJGAjYC8lT3NAMRum8rkZ5pI2K5boD2pDtLHllB5P19KcIxJliMADoaBD1YMoZjjaeMd6RmL\nSNjgHFEedwdRjAxxSeYFbCg89V7UxodNKRjamRUcgbd93Cd/Wns25Rggvnp6Uzc7ZyQnp60D\nHGQ7jhcAdKjkG7PP4USEqwLEg4o2hmXJ2r13d6YiKRWCD5sY6ileTzFG0hsnpRJtaQseU6A0\n2ONo9yrjd16UDHgqww7bOcAYpGjMK7mIODgYNLtVedpLZ5NN2+ZvDfM2M0DHbSzb2GaXaJGI\n3fMBwaIZAseSQeKTaFPXANK2ohqqhf8AvN2+tTbmZWTaC/TrUMalVbaOG706RUDb+h7mmFxq\nr8wQthxTmiKZcrz+tR/M0h24b3p6sytmQ/N6GpF5jIiyuCV69Rikc5djgjHNTSTbYwARvJzi\nhlDDHALfepoOpBhGJH3FXk06MlQ+CGXG7NR+TtUDdtOOtIw3KQG2j+dNgx33mzgMevNC/Oyn\npn1pm8lQNv3jmpJMSKAnCDk1IhmDu5XnP3f60vzqRnkgfnUjqjDOd0R6mm4aZsDgLyCaYwPz\nKQwwO9AjDoueSDnBolkYrhTjJ64pC37tgWGcc4oEG/zFIPBz09aGzu2N+FPVgI1XGY8cn3pm\n3ylz97PSgZNbsEbLfOB71MzDzgM7gw7VUXau4AFjipFIh2bAMnrntR0AVty5YL8mefUGmJtU\nfMNxznNTRxmYnPyqTjd2qBoXQsH5OcA5ouIe3yuAR159qhL5bKp0PepI2beA7kKOlIygBgeC\nx79/SgBjOV+YYHNM8uJW3dGbnbT2hbt3GNvvRuULudM/4UAM2gAuD83Y1EsfmfePPXNSpgqw\n6jqoph74PTtVDFk3Nt4G0dQadt8xQW69fpQjOYwANzdeaVnXccnHGD7UgGeYWYb02+jCjKou\n734p3zMwGcL2pI2TcQ4zg8UAOVl65yw7mmj5U6kBTSNhI23DcfapJE2oqj7oOfr7UwIjm4Vc\nMFAbP1piMo2sU+8cVPtKrjGRu3DHamMC5Yk7eOCRSYCiNl3kDPPJpI1KNuDAr/epJGZlX+Fu\n9NLNGBgcZ4oAWNSyMqrtBJwzU85KDdgFR2NNZlkmADkHqfSnRiNd2453UDEWNG2qCd7ChVIj\nEbID1JweaawZSN5wvtSx/K45ytADIgGVXUEE8YxTm2/N83PY+ntSNtfbjKc8EU+QeXtHyt3p\nAg+dYWkjXex4560kmGkQnA45PoaWNgGLO20Hpg9KXylmUYPIP50xDJAeQR970pUUeWQDtbFS\ntGFXagxjmoHbCnJy3pQPoEilY0JO5umKXyzyCPl701W8xfkGT1NOaRmQ/NtVugFMQyNfJYbk\n3Keh70/d5Z+6GHXJ7Uis21lHO0460jZICnlsj8KAGKhTLuMqecetTcqAAcE8ihsyNlT93r9K\njA2t3bv7UhFiQ71DYwehquylgB8yAHPPenGQ4Gfue3UUbUYZJIBOADQA2RTuyW+8OPSnRsWj\naThWXqhprQkKxyQBxyelRrGF+QnctAEjfMQFbcW5zSAbc44A6g05UVlLZOVH3qQqrMxGPlGf\nrQAjlDt5x/Kk3Bc4bJPApE/e7SRz12U6H7zMqYA6+1AxrfKyq3JbvT8hWIXsPu01mGzJ5LdD\nSyKCqPtIYfxetJAgLZGTyCOMdqVY9zEls59qbz5gCj5Tz9KXlT97I9KQEioXYK+FHb3pqmWN\nmDAZPTFEsZIBLbl9BSt0G35ewBqhERY7RubCZ7etP8wxkZO459KaxG4KOQD096RpM7wFycZ+\nlAxWwpZiny/1ocKkaZO45zmlV2XZu53DtQsO3dnof4vSmNBt+YngA9GPemFjtAUYAPQVL5Ik\niKt8zDkUgTAXd82f4RQIXad3K4zTnhCqpBDHuKiXeynqMHhTUnlkoVBw3egAVVjy+Ac9D2o2\nhUVSQXYbigFEmNgEzBmzwMUM3mKAvy47+vtUiE/iUFSG6jHSlZWRug3H1o3MFAB3Pnt2pJGE\njkOaoZAzbVAZQXzy3rTvvMp25Qc7Kd8u3eBux2pvmGSPleScAUgGtAWkZB95vmFOWN9zeYQq\nYxtz3py72ZsHhe1O2pkZ5PWgQzBbAzlcd6VjGy5G5XHp3qSRlVN+fl6VGV+UKOuc5pDAYEgI\nbtzT4lO7apyTyM8UkjJI3QjaKZ5YPJB6cc00Me7MrFQcn1owFO/aOnNRx/u1yeST1PpT9wZW\nIHbGKRLBZG+fYdu7p6U1mIVULhivpSBcKuMH2zRGu6VsLtPaqGPADMwyRxuIpY5izB8bjxgn\npikVip3bQrHjdSNGyqNmCQc0hD/9X5hXgN0UdBTNu1Vzwe3rmpNxZlYjGT+FLwVbcPmByDQM\njYlkZvukDBp0e1VAQliwpPM8tm385PHvTtpVgAQCRn6UCE5ViM5AHzUOqswUt8zDg0NGrONu\nckYOe9OikHzRtjPZvT2pgRSQl2XB2svQ9uKczNuB3Z9eKXcWwV5XpS/dIU4x3apGNaMtyW28\n8U7zHEm1uYz/ABYomUFlLHPb6e9CgmN9r7h93FO4AJBDw/VjximsFWTeDnb1FOLfKvA2jAx7\n0kcbFsbh8x4xSJEXa0Z3EqxOR7Uu5hsLDe2fvdjTmyyknKsOMUbWkYL2bjFAw8vfJjv1/wDr\nUMpWI4GWzyKbxuPHQ4B9KezAyIG+8aZQ6JEjZQyBcDJFOeXdgsMJnhqZ8qzK38WMHNIsisvz\nZYbu1MTJJGWKQsDuyOeKkjO1dx4B6GoSV4/eDDHlSORQ9wHQjOSp+VfWlqIGlw2N2Y+/vRHG\nPMBBJPXLdh6VC7gR/MNuTnj1p8iMqhS2GIzQUSbizFRwcntTMsrKHVXPU460H7xXOQy/rQ26\n3GDyMdRQxA20k4Qr3LH0qSC7eCYSDHA4GO1MwGUcnOMmm7fm3Z+Yjj3FAie8uY74qQdmTzgd\n6idRt2EAc4Ld6QwggKW2k/zp/wAzMRJjIGAR60wGiHDAkgp0LHtTmUBdqnv+YqJUZurblHap\nPNDEOAxI4OKBjclW5UDHP4U9UCuSDlm/So1Y8nbnn+LvTsJ5mFbaTyc0AK7bWI6jtimqN6ZJ\nyf5VMw8tSn8JHJ96jWTYoK/KOlIB8OzBBGDjpSIWaFskZzgfSkVvLVcjhhzSr8xXqOcH3pCH\neYiryPujB96bIoLB053EcU1ULbtpztOdp9Kdt3ENnB67R2pjEaPaHy+z39KbNhjGDlwwxmkh\ni8xmG7cG5JzxTi2/cCv+yMe1AAeqxn7mcbKVmUzEKu3bxtNI0YYLjjHrR96T5myvr3pgP37i\n4f72OlLtKRrtIY96RZMfKq7yDnPtSTSCRkZRjI7dKQCbhhiOq9/WnJMDIHQckYPtTFwn3jxn\nPtT9wDYC8HnI9aBD5dytyc+ooaTcuFXLU3y2kZmLZAH+RVqOAO25CcqOe2adh2FgVtpkK4XH\nf1q1DAbi4jxF8xGNo/nmlt4dsiqCd2emOK7jwv4beeSGSRcAtnb60twLfhDwpsk8+YLIcfdx\nxn1r1LR9NLKikAEf3aZougNHINwxnGPpXb6ZYxp91OenSpegXHadpn7pQBwK3ra3G4BsfSnW\n1qWhz90CpbG1MkpJJwKyuaWuXIYQvtV6GMfWkitRgnPPpVpVAUDGMVDKRHtLcdal2hlweD6U\n5lPDCpIYyc560ihYYyynPHpS7C0gBFLGpU1LtLZI60ygVSrHPTFCrt68jtTwueDzUqJu7Urg\nJGwfjtUm0FunFReWYRkjjPNTAllU44JouIVQFODyKeyqee9LIvTAxTB8zAYpjQnGeBxT0b93\ng96U/JwRQpO7BFIGPQZxuGKNoKnIpd2aX7rDIJBpXERmM9QcU5cKuMZpdhkbrinRqGNAw27s\nUuC2ewpM/NjHegkJIB2oGOGMUigk+1PZT26UAEtgDAoJ1EX7vrSyEIuQaXY24gEUvljy8Yya\nQxmTuyelPC/u93vTtwxjGTS7c4B4FA7DDhiCRTljLElW/OpCAqgbcrSBf7vFMBp+UYPWgKGI\nNCrhiTzSohVQ2cc0DsB460vPVhmnE7TufkGpN2FxikIhjk+bgcd6XjzCD0p7AeXwOaNoXaD1\npW1GCrhvu5FSRgKpB/i6UjLu+goAG7PagA8obtxNKFzx1pyqBz1zTWXb0PNMWonlnnB5pNrd\ne1SDOzI61G0bsxIPFMYsafNyacrZcZHNCoWXHf1oCnd1pDFZfmHpUn3uOuKZH8/U4INO27W4\nPU1IAueo6UuC3GaV1ZV9abnbg4596BCqoVueaGT+INkGmsDIvpS43YHApjJY1XaSetOX5h8w\n20xOMg9KXcR3/CkAMQH6/hTl2scnrQuJDuOM9hTo1DNknFMeop4GSKarDdhjg+lLyxIHJzS+\nXtkG9fqaQhu0Bc96kjVTyRzQ23nJ4phYr2yB6UAKY2jb5cmpNxaAt1OcYqPzTIuen0p9vIGG\nw9DQNgtvzkcmpYmKybW6Uzp904Wnsw3ZK5PqKBDGXcWIbIBqTgx56YpP9WPu/LTmUmPcvT0o\nAf5gZQT1qTg7iBjiolUHBX9aAxVjTAI5NvBzjNTMd+CtRhcjmlGei0wJ0BiXj7vpTRGRliOK\nY0bFflNPjZt21jxUgTLlowA2KYFO496R1IYkcCl/gyGzSGx+5ZIcAYcUqyDHA5FAHzdOTTSd\nkm2mSWI5RkhsrmmMvPXFJIGbHGaHYNjsaQywGC4K/eqyJBJEN3DVT5UA9antz5inPXFAEjKG\nUEDODTmZS3HTHSmR3CMOAQelKNrAigaEEfY1LGwVdpTjP3qgZtqjvVtX8xVA49aQxY3WNiW6\nUhXJznHtSb43fHXmnzY3DPFIXUXAdRkYxzVlMGEgnqOKqNvwCp3D0qSNy3DLxVMY5YmWMqDR\nGh6mkDbmPzbRTn+VM5wPakBMrDbhxxTtm3GOVNQwyCRQD0qwynaSvSmwJmXy1Xb1pyyCViMY\nxUVq2Plk5qVVHPPJ9KQCSN5eCOKaSG5xn3pV6Ev0HTNRtuU8cr3oGSCYdAc4pyy7shlz6U3a\nrfdGOKBbSbdwbGOlIomWMbeODT0644xUVvJ5ed4yKsR7W5xxQSKu2LtmiRQ/zdD2pVkTaSw4\nzTpI2kZVU0AQKxzkc+1SKehI2mpI1C5z1XilZRuUn60DBI1ZctSr8rEbcoaVnBbgcU44Xqcg\n0C1EaPdH8oqNdzJgDJB6VKqFoyuCG9qEjKNlRg0DGLjcePm71Minblj16UxYD5hxwe9STMeM\ndKkNRY9vUfjS8ycgUlvKAj7xyBnFLG25crx7VRIhdcbcYI6U1lMkm4cAdqcArckZIoCkElR1\noKRIWBx34pNobqPmpQDGqkDJ9KFO4n0FSMAu3qOPWnrGVIAPBPWmrudTk8U4fcwOQOtBJK2O\noNN8s4BFCvvbYF4qRZNudwxgUwBlGemDQ+VAOOD1oVtwznOaeu1hy2KQ0L0UgctSCHcBnqOa\nN3PB3GkUvyT1NAbi/NzxkU5VxyeD6Um4FcYyR1psm5mBAoGOPyqVzu705SI1BPU0zyzzgj6U\njYlHvjigCQMWUg4JzSn5COKrfZ2jTKNl89DVjc8aDcuaaAeufvHJB7UsffNIZfLXp1pI2+Xn\n71IB7N5meOKbEpVc4+amp8uQT3zxUi5ZskcdqABOY23ULxGOKNp/vfKaevYDpQAiHBxjimrG\nBIW3HnipFbHyg03o3PSgBdxiXDHNPRj14ApjZ46U4qHXbzxQAN8rKc556U6Rg33VwM9abtPX\nNKW2rkCkAxTvbA4x1pzLlyQc0igKuc8mk27VJ6mkADO44GFpY1/eZpI8kAVIF7nimhagu1Wz\n1qTLNwBgGoyw/Gk85iuAcc8UMY9ht+XHzU5WPf0pP4ct1pNqhTjnNAE2SvuMdu1ACldxOD6V\nVmglkC+RN5Uq8+uR6VZRsqNw59u1HUBOW29hmnsV8zcOg4FG4KuMcmjlCBTGOZvl3fpSYKjB\npSrHcfvCkXJ+ZvyoESqdy9KQA0SfLjjFI7Hr90UAKdpHzfUUkZCsw9aQLvUetOVTu9fWgAYL\nj3pwZWUEjFRsjfeUZpd4kIyNvtQBJ5q+popm0UUAM8sbgTycUkkjRj5OtOKk98AUAhcY61IB\nuXbluDSttC/MMnFBZWU7uTmkfC4OeKQDGUso7VJ5JdcZyaGbow6Ac0kcu6Q/Nj0FVYBoY9Cc\nFabGw3kg0bhGW3UKAYyO+aQCySNt9aGXCAikU7ZeV+XFAk7kYHSmAL0bPIpTMsajjJFNZwr+\nxFLsAznk0D6D0k3Z3YziojuVeRketNRSzVLzyMZoEIpG0EjIpzKG6dKQyKDs+7TY3LZzSAWT\na2AORTC4VjkfLikkOMBRk0bvNXaeDSASPc646D3p7KGXrkjtSfL5igPwOtJuXe2KYBLEBggc\n0m8sQAcCjkNk9KcmPTigQyQhnx1NMyV4ApZAd+VwBUm3d+FAxix7k3dSD0pZI/MkyB2pFyud\np4zzS7zuGKAEVc8EYYU5WboTT/OA5xzUe/qSRiiwCBim4dRTnk2qq55BzUTSKzEDsMmkMLMu\nWP0o1AnWXH3hwe9NEnDAjAzTOSuOoFOVSE3HoaQClQyEjrTdwK4HJ705F3AnOFpnEfHegBy8\nR9KDgDDN94dKI+5PSo1UN1OeaAE4VlGOadJLlTjg9KWVNrdPpUBhDgkFgSaGA6OQnqOaeFMm\neabH8pIPNI7bT8rY9aAHJMBkMOlMEzAfKOKPlzk8ZpS25cGgAbnBprMDhRyc05MOcHg0xl+b\n0pdQDywDTfMEbMDzRkiQgHORzSKNvUfNRqA5v3ib8YxVdWDvjt1zUu4tkUsZVU2jhu9MQnCo\nMDknmmyZ8wHP4U5QeQeabu3tyMAUAhrB3ZtvApqqw+XGe5qaPgNg5pIyQdmfm61NhjFXaQSM\nH1pjLmQuefpT5t8ZAPINIo2sM96Qhi/eOW47UoVpOM8USKvJ6U2IHOAc0DEaMRsqnk0Lz7EU\ntx8sYxyc9aSFm8vJ55pgRSff4/GkbKgYOfWrEjRxw5UbnNQ2qfviX6YpAQyN8wGOKVBuOF4B\n60+8i8nvk9aSNgY/emIhvmC5jXjAqnsKqpxipJJA8rFjT1XcBnkUDIpseWM9ajHzRHA47VPI\nok57VFbOqqyE96QyE7eP71NdvLwy8+1StGN2R3NRx5WRieR2oERSAy89+tNkYycdsVK77VJq\nHO5cgUAQPb9D1FULq3Kt8wrUHI2/jUFxD5p5PSgpnLappInXAUMv9xujHsK+Wvjl8LXtNYk1\nKxh2Wk2TIqDOx/pX19JandnqK5vXtFW7t5o3jEiMpXaRVKTTMnE/OfxD4bbR/LxJvZsuueOM\n81kRoI1BZc7ufmr6P+M3wzhsLcXkCbYDkMp/hP8AhXgN9aiKE+cNwXhPf0P866oyuc8o2Mm4\nVd2Ebkc5qKSFvOLMxJwDirywrcpycbBlj6+1Mmj2nIH+7WyMzLkwY2zk45NFwsDNG0RYMVGc\n9KssjTKyhcPVeZdkbxsvzp3pkoqbJFlMJOQ3TvSzWcmwuDu8vg1Las6MHB/eK2ee4q3qF5Hd\nOCg8ot1FMDFfd5y8Bi1IJM/6zh6ezNvQYHX8qUojkgDcemafUCJgrcqCD60jL++XnnGfrSXG\nFUgdBxULOskaq3D56+lMLgZWjVhjcC33gP0pfLCxuhbKmpVzIyxMQC3TihVCqwIyQcZoAgaM\nKwBOBj8KYQEOVXCinyKxmJzvTHApm0b9ucfWgTHbvORZBxxmoixWQOT+6/rTwuxtoUj+tJKg\nZiYxuKr930oAgkbziAR869MVKivHIX4xj8qEiVecgOei01ZAqnexbDchRVDI0Ywq6yOFDDIa\nlki2wxtuzk8qO1E8fmkoee+McCljZPIKjcxJ+8R+lAhizZyV+Ven/wBek2iRSy58w8D3pu3y\n1Ziw44xT4cnO84GOMUAAZpGVcYxwacf3kxUDavQGo9pVS272AoZpNypjAxk0CJGjPmYYAhR2\n7moWXzFDK23+tShQse4Nk0xWVQNi8g5IoGSbt2FfsuePWowxfG4H5uKcuJXPYkd+1CHgAfMB\nkg0ASW9rK0YZSMA4INMkVo5MMODUkM0sa5Iz6gUtxcJcYPzJ7GmMqvt+6oxng5oaNmZFX73T\n8KU8yAsABnOKRSNxkTsc4NIRIyndh128cYqFo1ChicYqWS4ZVHG7d+goLblKryyjOfSq6AV3\nUcgr7jmnwYbzMnDY4p2EO0jpj16GkDDftJC8ZzTAjjc7gVPP8Xpmlz5bZyCG4yKTzPKXAGQx\n60gYbSjDPcHFIYbFDDjIHNPz+83KNpx+VNK4UKfukff9KarbtqAELn7/APeoGPYlW+ck59Bi\nhZPLOMCQetK0asCEbJzzuNMlHzKW6+tBIKwWQZG2lKtJnJ3kHPvildRywXHHHrTI94284Pc0\nihThgARnnJPoKdsO3ggpnINJJGQGKnvmoWUs33tuaBaE821AoZflPeoZ/lGX4HTihUODvbPG\nMdzSY8zCs+DjBB9KoQqrt/hLEfxU12O1ivy04CSP5VbP17ilOGzlqnqA3advLZTHKgVIuEjx\nuDbulM851QjgZGBTUjG1VIKkc5piHrCrbVPbqM1ECm9wRt56VKJAjZIyBzmodvVyuXznHtQM\nm3KvKjtwtMCMxB53n07Ukjbcgdf0p25GZTIxQdNopADSEMUyN46inKn948dhUe1lYlcZHAp8\nko8sZOCOpNAiSNtykE4HpTnXftDNuHb1phcSBSpyTxTY90hcZAkU96YBIBGSsh2+lNZnBTfh\nu3PanSZfD9SOM0yNRvIZuOozSYyUYZ3bd8/Tr0po+RiCRt9KQ7HIDH5umRQseZGdyCoGAopA\nNUNtblSMdqjj/wBHwSxP+yRSNt+npTlVWkywOcd+lMQNlXbd8nPDehqNY90bAndITllP86eu\ndjnk5557U0tGu0Dkdx6+1AyRlAiBPIXoFqNsBWOOo4HepI1Zn2seDyB7elDNtY4OKoNQVcrG\nR0xhjTd2GkXduPYU5WEisuMH0prKDJnoVpBqAVFkUnIOOQPWhlZc7vXseQKPvjPI5zuFKpXk\nk5b3os9wI1+ZcEZHvSx7Y87jhu1OHz4OcexHakaReTjcQeKAE8tto3EA53Gm+Yu1gRkHnil3\n5bOcHrj09qftMfz9S3T1FINiPasfI+ct3NPaMfwnccZo2hOWOcdfWmofkLIvzf0o1AVcs6qp\nyOtNaQTMccnoD607K7g2cLjr70DDMWKjd2OaoCNgqsoZD161YYHbhevqKbuT5ecnPRqSQ8Ni\nTdn9KQFjTVWWQiQkj09auNNFHxEqqOh3dRWQ0gVxiTYQOo9acsjzSbuvGDmgZJeQpHhoSp3N\nyVNQnGNuOWOOf6U9FVoSqrt5zmouTERu57etHUQzb5bKpDMW5PpUi4ZeB8y8UuEjVSGwQOc1\nFH8oZgxBJzTAmVmWRhj2OKXIixuOVPb1o81lVsgHd1NM+QtluSo49qAByA2F6Z7dqGXfJvBp\nkcQikeQKWdhnOeKd1YSng4wFoAcYzI543Cm8KuwuGP8AdFLjcF2yZYnkCnq21jtVSq9aQiFW\n4ZBn/dp20IfmHyYyRSeYdoOOSe3pSNMclAMtnr60DFVgjB0GCBkfSkVnPmBOMHLZ9KVRtc/L\nyBRJIAzEDJI7UCFTOA/GOvNNXJZixyp5FKrjyxg+YMelIq7tp5Xac4oAVWB56Z4IPWkj2xR7\ngck02Rl8/cQSzc57U5juKhOAvUHvSsANId/ynjHGPWpGcMYt45HTHrUUKiON36AngUqqpVcO\nd3cVQCbX2ucYUtmnso5Kjb8vemNbluGfaD6mlwWjCbhlTnOe1IA3eWgUnBxmhWBw6klPQ0rf\n64Bscjj/AApCzJnam/sQO1MYn7yScMvDDkj2p7LtgaQcnPXNKWOBIMowGMHrUfmKy5UbWzQA\nm7K4DkjGSD60CdkVQDyTgn0p6MtwGBO115yOh9qWRDsGABnk564pAOaQtIFKKT/e9aVWaTeG\nGAOhFQ/M65J2gdDUyNujH+c0hDcDcNsuGxRuHzNjOBgfWiRUjzjBOM+9NlieQL8+xf7oFUA1\nfkEan5S3WnwqhkVQcHuak2svysQwHINRbmWVU6ZXcPpSGTNHsUgHJJx9ajwyrnjcDilRtuST\njHrUeVjkLNk5pgIzGRipwO5Ap6ZRcr3GR60yRSNrZyW4461J5I5DSYK9KQDBIzAZGOeeKFBb\nzMggH7pp8iko2H2gD7xFClmYHbwOD/jS2AibczgKMhR82ae3yqDsIBFSswC/NwOhxUTKfLK+\nZkHiqAjX93FuwA2epqRmJ4HBxnIoVY3T5zkKNo4709VCgKeoPUUAIq7ZFZhlcZ5pm8eWxbhi\nam3bmJHK9ATTJEO0ZGRmkINrKq7sEDninMy5bqFIH4Um4DcR95abIx2ksM/SgY3yzIGY7gD0\nzUiLHtXP3umfWkXMyrhqVcKrKPzNFwFKgEr075pnlhVAyCTzmn+YrMAx2kDnNIqllxJ0+8vq\nKYBJtjVufmAzuX+VGfNAGwnsKYM7SyJhO9PDs6nBxn0qGAnC8P8AMQcGiNkWZ2A+TGNtK6gB\nQRg/zpgBXB5AY1WgDmV1VWABpFkDNkfu3HIHanqx3ZK5QDBU1FJG8aqxK4PAxQMeJmkcnfuA\nHNSRsrH5W3cZBqHzEt1GF3jODik2sgZwRv8A6UCJVIOVXvyc05lVo89CehqOTgjIx6460rMF\nkUfw9loAdt2oI85461HuB+4MY4/GnRyeYr8YGcDNOX7oZBj1ouIRVy3zj5jRlNucFuy+op0b\nLGDjJYn71K7Rq+c8Hp9aYyNUaM7SQF/2u1PVgrBCD83Q1D5P7w5O5euSe/pUkjh+ehxxQUKk\ne3dk7mzgUgcspUnC559qcqLuTLYYc59ai+8xI+dWfnFLqSx/J+UYJx19qVtsboD0xgUzdt3o\nOGJ/SnfOsigryozmmAsu2TBGSo4pJFG4FQQcdT2okkOBgDJ6+1Ku77uc8ck96BDdwJ4UnsW9\nant1Fuq4PLdRQxX7PGgODnNRNH8xClmYfpQMuYUMzxJkgdG9aqsSV3sMMTyKeJD8ro+SONvr\nTGkCsS5yM0gAbmXaX6+tOZgNzf8APNfu+/rSGEc5YDPIoZUkyFyT0LUAwfeVUv8AKF6ntmiO\nVkkywKe1M2gt0KgCnbgyN64wAaCUI+S2c8N6UrIm87STg/KPUUq8MrqmQo5BpNzedtYYAXK0\ndShWZZOANgzwBSedtJK8sDgiljyvmHGQPu/WjcuSSvXpj1oEIrHnORk5oeRlmxt2tjvSxyHJ\nyOfc02aQGTfI249CvpQMcpZT12nu1M85ZFKg/KvQetCH93x8y54qRUUMzkKGIxSEJHIyLuVA\n6e9Sx/v7eTb8snYeoqO3j3S7OVXGCoq3FbhZOmB0oGEMIaEMB8wGMdhWhBZ/aFKEEFxkimw2\n7NGUVy+em0V2WheF3uZFmk3eWqfSkMk8L+HmuNskkYlGACuOa9S8N6DuZQRtMZ4HtUfhzw+I\n4lUbkG3PPpXe6Jp6wsoVfvDuKAJtP0l2UEjjPHrXSQWItowBz+FSWdqEVccVoxwh8Anb7Vky\n0tSGGPdGBgj1q9bx/wB0YA70sEG3POauQxgLjGKzLGN82AOtOjXdx3p+wKPc1YjVVA4yfamO\nwxIzuAPSlaPbg7uamx8xA5HrSqm5sAZqWUiNV2r3NSxr3zgVLt8zgUgi2LjNUMjX5cjqamVT\ntHNEaDGT1qRVB7VLCw7fuUgjNAX92PWkDfNUq+vpSEI2SoPSmtwoI+9SvIW7U5QsgwRzTATa\n3JNOj4wSCTTtpHbFOVWxzwKAGhQSTSls8UL8oOacvHJHNACce+aTaFyAae0u0YVetNKt1oAR\nF3Yz2pxAZuhNCsVbg05W+bbQOwKfmx2pysQvJFIoDcZwakkVVOD1oEwVgq5Yc0kbruJB496U\nSDoTS+WOOM80CE2q3PfvS+UeCCKf5XU560z7vagsco7E8UR5DHI4pVUtnHNOZSwxnHtSATAb\nLAcUikScfzqXaNo4waF29NvJ70gGFBN1pdoXvkU9V2/KKXaFXBHNMQFeOOlNbG3LdaeWBGaR\n13YyKLjG8bSc4FIuGXOcGnrt6d6ayFmBzgClcLjlYR4BG4+1LvDMcDHpmnIQZB0HrS7grFSM\njsRTC4z+LHalHt19KNoK8cNRHGwU54PqaBhGzHJ6DvQvzcYxQVJHenquVHFACAEMRsyD3pr8\ncdDT2jLd+KTAXJPQdaQCx7y2COfWnSfd3dTUTXkYwd/FRrqltI/lrIrN0pATrlvrS7McNRk7\nhwQKUkuwFACxMA2007b+8J6imLGTIWBwKdtLLgHmgYLjdyMU9o8gYGeaBjbz1psjEsADgUAO\nK46duaVlIA3nJPIpI1BXkk4NO2jOeMUCYKw3fMuRTsOrdOKOi5NNO51zk5oAVowpGBjJ5pwj\nVWZe9MGeBjmpuGjzj60ANMZVVU8D1p24cLnPvT13cA9KGQMMryKAFjUOCGO6oxJ5ZGPWhCUB\nbOOeadMQzBkGBigCTgqSRjPPFIx24AOacJSygYximbfmG480ASzR7MfN1pqqdvenYD4Gcket\nOb7pIP4UALGBj5WwfemH93JyN9Ebr0I2tTiNygdD60tQJJFGNw/Klt8FeePrSxrjqQTSE7mK\nk7RTKCJW3cHcKezCQjPyH1NRxswbAP41JMRIpVuKQiRHMfJGad5ccqb/AOdRqv7vA54qW1eK\n4hKsChFMQ7ytuCBkVLCo+8DgGo42CnbnjFOjUcqGxjmgCbywvIxig7dretMdtqgNxR5f3SGy\nCaRSFXKrkjIqZWVjlTgelRyS+W2xDyeOaSNjHNtcfWhiZMkS8np3qVpgxACnAHJpqIqSFg2Q\nw6U5mGcYwKQC+YI49xHehdxXdjv601VLMFYZFG1Q5A9e5oGWpEG3KHk9qZC3mKd3TNO3eWAe\nMdKZEg3MSMZNABJGVYNGcYOcVajml8nJGQetRKoXlhkelOWQRSlA2VIzQwJ42Vl5pcbmJXmk\n2hsFRg/3akibau0jDd6AGyASQqpODmk2vGMEZBpfL3tgnBp7524zyKQC28iLlXIz2zVjcDHg\n1QSHzv3jDKg1cV1jYLj5aRXQYoL9emasowVTsXJpqKGY5oZjBIQvIpkj4cOrbhSR5jXrzmnJ\n5ezBOG6moxIGI3ZHbFAE0JDcn1qTqrHORnpSL8pBXgdKRVdmOemetAySNkbg9fWms27O0dKH\nbapOMgcU5WHl5HA9aQ0PQjGTw9RXDTibjHl44okYZDYqQKJo+hz2pXAdFukUZOCO9OUN/Dgj\n3pApWPr+VJzt+WmCHTMvH9/vQgK4Ixg0qr8pY9aQrtWmIUHazZHB5peeo6GmjPUnPHaiNt3G\naCh+7sTxUasVYqOVqTaF+9QCVOTz7VFgBdsmRnaaVc9AMrTFZgxIGM1Mjqp4GeKoBIZDHIew\np5bqxOc0KBwSB+NBkUYJGR7UAPjjP3h0x0p6qMEnig7lOVxj0ojYySHAwKQAzD5dvBpzNwc9\naYGVmZcYPTNPAEbZY7uKAGqBuHcU3PzkjOacrKobHNOVh3GPpQMbjvu/CnH5cMOKFjEzHHB9\n6cqnJHpQSMjVpGJPr0qWNidwI+XtShf3eV+UA0qHOfSqQwChgcmk3LvU4pJH5yFzSlcrnpSY\nCH7xIANP3NgcVCGZTkDNSrI30qUJiKBu2k08sFbGaazDdjbk+tGwDk9aYxTjdu6GjcuM0fLy\nQc8UnIQMy8VICqwmB9vSnq23g9TSKNq8DGaRs8UwDeckdqk3BQO9Rbdq5Y8+lOBDggHijUAL\nr2GRSuCqhicChVWJMZzS8Py3T0qQFR1Tlu9KMBsmk4kwABik3KwKgZINMAYqrkgZ9qfgHC8U\nqqGww4HQ0qxjcDnAqgEZgGAIyBTl2qxPaj+Ek0z5o5ACoOaBkoUeYrd+tKzfeA6k07uM4xih\nscnsBSDqES+valbruxn2pU4wT3FKqsW9qZQKwMf93npQxHUDI9ab5e59x6Uu4CI8gD0oJHrn\n7p59zQ5GBu5po+6hI696kZsf7QxQIZ5m4hRxTl5NMGNvTBqQfcGBQMcowOBSNhmztwaQ7ucc\nDvSo27r2oEO2n0H5UUbzRQBXDFo8mmswLdakbHX8xTElEgO4CoAj27eR81O3fLjHTmn/ACqp\nAGTTdpChs/hQAqYZSCaTy1Vg2MUKfMYsOFFNwG5DUwHsqO/971pWYrkKvHrSbxHgk5FN84np\n0ot3AVlG3cTSKqqo3c0qNjcPbpTN25uTkrRsMkOzoce1J9wc1EVBXcWx6UrMdoB+Y0MGP+8R\n/DnvUmSuABmo413d+F/hNLvJycYpCBoQzFm/Cm7Sookm24GKaZOg6euapADBlIPrSqu+IkHA\nzT2YPioz+7U4Hyk80AIqqFOetNRR5ZbHPapEZSxOOBUfncYIwKVwHsu4EZxikVhgnPSmbw0m\ncUMBwR0oAVX3NnbxStIOVHJpy4CcVHwhJAxTAeoHBxtNKsI5JbtTPMDoC30pPM25446UgGMy\njqQppJI1ZRg5pskIm+lOjjEOD1pACx9cirDfMmPXpSfw7j0qJid3HIpi6ih9pA29Kc2ZMZHF\nM3ZwO/ekk3bcA8UdQLEihoSvQetV4VDNyd1N8xlXBORTkykYyMGgRJnblTzUbAK2QKfE2RnG\nTTZZAfm6L3pDQ12ZuAaUyjacD24qJn3cYwD3pzfuvlHPtQMCWUD+8eKRgNw3c4HP1pyc+9NV\nCyknrQIZIweTp8uKdwy8UeTu68L04qTaEbaOlAxm3kHpUUjNH2yPWpv1pGztC4zSArq37zJO\nDTriTgY6mnSQ7VwwPsaV4x5YJPNMBi5ADDk1FJncSBUoZcNg1FGGbOeR2pAS+aCo6Z70mRg8\nVG0ajoMHvSq25SBTAmjVFU9CaiZt2SRgio1fGMZznmpmkDLtHUikAxnVuTnpSL0Vicjv7UyP\n+71xVnYfLJK8elAFZl3tgjj1pMiNiFGcjtSrN/DjI9qWNvLDFuD6UgGSNuVUx1pU/crgUu8f\nexxio2YP0pAIX8ybaPSg/LgKckd6DGVx296jhzJIyqMY71QDplMgGB061HEPLyznC9qVi4m5\n+5jmornBwFPFAEMkKyTbl4FPmjO0FeMdqfGwX+HmmSb2bAzSuAkqFoxgYNVWjCgv1q0rtyMZ\nFV5OXwooGMVcL6qaR1KrwO+adIHjYY5U9RSqw2kd6AK8i+nK1EihunX0q23ywkcYqFUT7wOG\n9KQityZCBwPemOuQR3qaTMjnjBpu3Cjbye9AFVlLKeMVQuI+PwrWkXI4GKqywiRcUIDgvEGh\nxXSsPKV0cYIbkflXyp8avg+3hyaLVNKhmayYN58bciNt3Ue3P6V9pXtqNpTqD2rlPEWgpqln\nPZyqGglBVwFB3A9quMrMiSPzpmXyZriIqcdqjK+bHgNgg16/8XvhLP4L1L7TZRmexugRGxGP\nLPZT/wDXryCZHgmkWRPLlJx7V1xlc5pRGybWcbewxx3rOmh3M7FiN3c1oPC0cYkDbuedvaor\njOB3B/WtTMy9jpMHCl4xycU26YzJxhQTkEdqtOXjVkUld3GfaqrRmBWEfK449M0wImzGUVXD\ntjAbFEsnl7Qoy+MN9abkqTuYbFHP1pjsqlB93cc0CKuTLlSrFs+lSuu1tpj3HHOR0qz57RqG\nGCuOOKqedO0hJ5BpgOizH85OaXnrjjqahZvIVtvzgt0oXzJNpViFB5X1piFuMBgUO1gecVE3\nysTndzmrSiPeGI6DBHXNV2VmYso+UHgGmFxZJHkKKv3xyKZC/lyycncetO3bAZPuk8DNLGx+\n/jdzgmmUBhWblMszGqaubYmOUFcZ+X1rQ842sitGB5inIBGaueZBqMDPcRoszdWUdKGJmJC2\ncg55GcUrR7oW8uTDBvumklxHIY9+cLy4/lTfLypOMZ/WgkFRJBsPzHqQOtNf92wy3DcDPanC\nM5Vo2CHGST6d6RWMxLgggHkUDGsp3lQd2OhFOk+Vl/jz1zSfOsZyOWPBqNmcIXwCE4Ge9BQ9\nsnfk/OelORxtRlPJ6mo9+4OeA2OpPT2pzfu/LUDg80CHyRBt678MBuLe1MZRInyH5e1MW5aS\nV1C8ngmmqhUkhu9MQ9g+5QrYxyaf5Z5ZnznkCo42bzMkZHcU6NtrMGHOenpQULJ6j06mmNll\nycBal87bGTjeM5xUe5WxkbM8gUiAjX5WxyfSlXcMJjnHLU2LDsBnBBp8mRICFyuc4zzQMbxG\nzKOfl/Wo2kSNULj73GMVLsLl1Ucg81FKNykkgL0A75pjGGNmUGNtqZqbJk6Haem3FMjXCKqt\nnnJp0qsZ0AYF85JHYUARt5hby8YxyR7U4L8oOcL1A9vSnzb/ADlkI4YY96jkh8vYA3Lc/Smh\niKN0hY7sHpjtThFtGwtg5zk0qOskm0H7vBFJs3S/MD9KNxBIcyKynI6GlVQpZSflNJsCrjHA\n5oj+Vd6gdfvGpAlZgqrtGGFVlURyZk5LH73vU7gsuU+UZ59agkJZioGccjNNAIx3Bm/jHH1p\n24/KWTk8E1FIzKowMHvinHDcbsfWqAHX/SFAJBHftj0p+U3SISN3rRyV29x+tRtg/wDLPGRi\npEHEi8YBXvTnk3Ltbr1HvSLGpByO3ApN+6RCkeFA5zQIGkG0gpn2pkp27FALSNycelOY/MG2\nsN1N+dW3R8sP5UIY5lQyAltw649KNrK3z4bPINMMmASnG7g57VIvI2hsnHSgRGoZWzuyaRnL\nqRs3Hv7U9GjfJdsRjv701WaNXRRt3dKBakm4NGu0bQBwRSeYVkUEbt3GaYu5QqMQOeRT955a\nPgg8ZHWi4CbiJOM7M49MU+SRWY+ij9aYx/eZOfm6ketP+WdeBtI4NBQis0asSgPpSb9r7VX5\nsZPpRJhdpVsqD81IufLJJ3DceR6UCDaUmyyj5sYz2olUZZS42+gpdueQd23kKaFHHUMre1DA\nYUKMF6hhjimNHyVbjbTwAGOSfl5pqxiRTknJOeKoocuM7o+GAyWo+9IWb8KR125RRjA5pZPu\nqYjn1HpU9QuJkNIDjBPBojz5rHYZM9qVWKtkqfShvmeMj+FuecUCCP5kOBtHJ2kYpF2My8HB\nHNE27znYncpPSmRtJNuAXy9vI96AHeWr5XmmSQ+XhWHHUYqYMrRgtkSelMVNkeTmUnkD09qB\n3EbbHjEZbHWkbLdeD2btTlkl84Arlcc+1DPuXbwRmmSIzDb8xHPenceXg8DHIprY3ZI4xgGm\nRKNp457LQMk8svGBtB+tCqvyleTmkDeZIR0JGD6ChiIYyg+8KBDWUM7HGVHehsNgIMD0pWkU\nLsAxxn8aapLPgHD479KAEeH51I5wd1SNGd3mk4yeAtNLBom+Uhh3BpfMDMoB2rjk460xirhp\nOu3bzuz1pm8eY/HJ7mnMwbIVhweDSNGJGJcZOO1AhGVJFjz8rHkfSnSKqHk7+PuihSqMC3BP\nAz2pjAtFkH73pSAaG/dbidoz0NOkYhlXbnIzmnRojxtuPGMU9VRYR3ZRimMbnLHa3QcY6fSm\nnAUEncf7uKSM7Rt5YHnaOtL987R93+dLqAm2ONt5BU+nYUSDdFnGRnsaVj5ewE7gy8g9qYx3\nfux+Y6UDHcqFY0SN/Eo+Wkjc7tgAbjvTfKPOWxkdPShh1F3CMoT3Oaf91mYrlDSchVBIak2H\nc3HuDQJiMSm3aeSfu+1PlZo8cZJ6UigLIWYbnIxTsHb8w354A9KQhP8AVr2JPO2klVdwbOTT\nVPlTDn5F45qUbd3A+9TAroyGQyY46Y7VNuAxlNrdaZtCsSpxTmULyXDAijqMazAKxI3ZPX0p\n5i2+WQBluufao2wyA5wM8GnNtUk78n2piGGQfOwOWB4GKe7FeQDx396VR8xZUxR+8aTaGG1j\nkigYikhQWO4n9KaqhMhm3HP3adIyq/zYCCkgYS/Mv1WgYvybwifKOuKJFDMRnL9/pSt8rhiu\n4+hqJ9yqxx1PWgkmWFX25yGxkemKRsL7HsRSR3CryWPIxyKHk+XdjIHagY4lGbeW+YjGDToc\nHBzz6VGcKyZQHdyD6+1DfLMu3g/ypAW5AGVu20ZPvVfJYI3G7oFx2pWuBtY5OM4qJJuS24Ad\nmpAPG0Mz4LKOPxpwPnJl1IbstM4RVJbljynf60rSSbcP19aYXG/J86uOaWNUMZwG/nSlo1+8\nCPamRyFeYzhiaAJl+VxxuwvFNgUx78kAsc4ph3edw3GOnqadE4aTLAjjmjcB+07fmGfrTIwC\nVJIG7j6GnXVxu2hBgdWzUPythT0z8uKBDsyeYw4VRxnFPGW2hFwvekdsgbzgZ4prBQ2N5C9S\nRQJstNHIkRyAE61WhkeRQNhyD0p/nb8JglPU0yN8MzK5AHRv6UdRjxukLc5U8H1FEkO0kbsF\nqZFcCLKjnf8Aeb0p4YNzkPjgUwGwR+W7AdAO1INxUs3PPpTY2LB8ZDZ6Uu3ODjOPegYqqfM3\nHbvxinLkkgnaegHpTGmUkbeQPb9Kezo0iYb6+1ACKGDMhO5cdqdMy/JtXj0HemtuPKsqDHQU\nke5WC7ct13UnqA6TBm4+UY5NJtbysE5U8g0Db8x+8B1+tO5jiQA5kJ/ACgQjM/VeVxjNDSR7\nl38jsPehgPLYmTPP8PQVDFGqyYB37uaB3JtyGT5enf8AwpPlYhlX5eaRmUsQvylaXIVQ2fmb\ng47e9MB3ngsSynFMVljUk9f1FOVSVO87iPSmq21S7DeW4xQA7dENzEkAilfcwBX5T3HbHrSS\nKW+clU2jp7USSBvlJ/dnj60CHuW4GeOxxxTZPmxkAMOB9ad5fzqVfccbR7U1VHllt+Nhxj1N\nBRI+xsBQd2Pmz3NMVg+SBhlGCKGkxgg7n/zxQylpEkUYI4YCkMF2IvTJ9aVIyillPHUqKYzG\nQjByM/dx0pwjGWYnH0oJYnG0t0Y9KWPepOBubHNMG5owx+Ug/lVmN3i4yWVhy+O1MRCybsn7\npXv60MHkQYA6jOPShsLINrblYZp+7KEK2D0AoGNZlO3jK57U7d5KnJ3DOdo9KarBI9oG1l5N\nIzEybMhjjNADyE4cZ2t/FSIp8sjooJPPen8+WSTwvp2qORUkZG3Z46epoGEy/Mu7rjNO27mZ\nS21R0x3pjNuzuPzAYI9qaqmRcg9eQKQh2XbILLTT8y5ZsMvYU+RmQRhF8tjyW6/hTGdWRieG\nz90daQh6/Mi5PzUbizMM52jvTY5cTKTwCMCpbdWkkHTlufamhibizKR8pHzbaP3e4EfNkflT\nGkJcqUyc9+//ANal8srISBw3RR2piBfmYDHHek+RZmXGM8D3pQuWLBtpxg08osi7R1Xr6mgB\njL5CZKZ/hGOxqePbtzt+bvQAGTGCQOn1qyq8xrGBz97NAxLeJdwwcMT0q5Z2j3LOXX5QcAep\nqT7G4dPLhyxONw5rsfDfh37RcK0iMgxyPeoGO8NeHN0yPL8rDpE4r1Dw/oJ8ss67g5546U7Q\n/D5kySoUDpkV3ml6WYVRIxytS2A3T9MVFRSOB3rp9Oscbcrx2NLZWDsqnGRnmtyCMoo4wKhs\n0sMht/lxt6GrUce5sAfN2pyndwFqxHCVbJFQ2URQwu3GO9WPLZjkHpUqrwQODUkeWbaBzUlE\nQiG3nmpoV2qKVY+oPWn8FV2/jQMdgDgDikWMkccGnsG3KAOKexKtjGKNwGfc75o2no1L97tT\n1A3ZbmgfmMXk8Cpkyhz0pygbuFwKcV45oC4xQXbJX8aXb8xyeKkz+7zSIvJJGaBDFTqRUkI2\nnJGGp0bBhzxUiqHBJOKAG8sMHuetIytjk1Jt3AD8ae0eeTzQBXZd3FP2496e0e5emDTo1K4F\nAEcMZPuKdIpLAdAafyo60vHHrSAr+SVk/nSlSxxjA9amK7WHNI2exoGJHFjk4zTzgqWbrS7Q\ny5zgilUKFG7mmIjWNfvDmpF3b8cAVIu3acJj3po+YZxzQAxmKtxyKXaMA+tP280n3eMfnQND\nlAXgHFKV3MD0prZ4yOM9qmxvqSiL7z80vIycZqSMAseOKdnbnHIpAQMwbHGDT26dOal2iROV\nCmmyJuI2+lNARq3qKc7ZHtSeWcD1p23cuBT6iEUDbkDmn7V8vOcGlC7VAA5p7gKPu0mMZGoV\neF3ZoICnoRS7hxxikkz+FIWohXbgjnNLtYgelEnyqABmhA7cdqYxyt1FOVlXr1pq5VuRmlPy\n89RQAnmBmI24pskbL8oOdw5FPVQ3IPFLsIOc/jSAzL6EeTjbt9altrG2UIY4VD4+961bkVXQ\nhvmpY4+BhSBQAoOMDHNNPyt0NWFVuABn+dJcKzcYwaXUCFW/c9CGp8aYXIPNOTg7TwKesYEe\nc4pgNWMsvNKVUDaR81SbdoA5zTGw0mc5FADVwo25zmgBVBUjmlMZ64xT2XcoY9aYEfHdqkYE\nooTp696BEm0nqTSwqcgUgG88LtOf71KzPwMVLIpWQdqVYmLbmbg0kHQYG3J15pVODkcetLEg\njLMeVzSx7ZMk0aiEkTcpOOaSMHucUvye9OXCg4FAxjH5sCnsw2l+9JxwWYU5drffXI7UBYRT\nuTIHNSYzHk8Gn+SFTKjim+XlfvfhQBJt3YyB9ad5YbJTkelLHGNvJ6UkcjDOAKYEZzg4GKe3\n3FJGaZuY8GnBmxg8UFIUN/dFSRssrYI+tIFBbHShlG7gY+lSxakyoY2yOlOj/eMcrin7t0Py\n8Gmp8zAfdNPoMTbtYAipGXGGqNpgz7ScGng7epyKVwJPMV1BcZ7Uqx7cYPvTH2yYwcVJHt3c\n8kCmARjzH7Ajmp2UTLtc/P61XjYJJnGPWp2O7kDNMBiq2AVOcGptxkbmo1zGCoGCaVSyEA81\nIFhZHgbI5HvS/LM+4d6I1MgOD17UzbtPPUUgJ1jCyEe3FIzbiuPl7GhmfardadJu6g470wJF\ncM2Cme2aeY1ZdpGBUPzbgQfl96czcgbvzouBNbq6ZTqvXNTs3y9OahjbdFgGhmOMKcnFIfQe\neVDkZ5xxUqxrLG27gj0qGGbcwPQelSB+pA49KVhEaqYcAnC9asq8bLll3CopIw0fDYNPVhnr\nwBTKJi3Qr6U1flVWJzz3qJJhgKRyTU/3myBhaAEVD5hYjipBMgfa68+tMWdhlSuBnrT5FDMD\n1NAE7bZE4HFM8vdzuwaWKQKdrcAc04kSL93r6UgsO8sGPrz3NL1Xb0HamL8oKinqvqaQC4HQ\njtTVZ1zjikX1JOc07jk0xCwt5akHNSw/Mu7pTVOQARnNP24Ugj8qY0PVsL93NM+9z0PpSQ5R\neODT9p5ytAx0aDd7daPKXeGU8Mfyp7Y2Y6Ejmq7ZSMeuakCYhZHIJwKVVVT1qOMfKTmnsd3o\ncUAJJHt+YnipFjj8nfnilRd0ed2R/doYDaB0BoAdCqtyeFpI1G4jPy0FRtGTgUqsNu0DJoBB\nnZjJyKkU7OQM5qFvm5zxS/ecLnn2oAfjLEY+bOae0bNJy3GOlM8sxyE5wfSnrlcZGfegZFGu\n1sHmplUKcgc1HyzfdwKkVtvWgGOx3xg0NJsYYGVPWhdwY5HFLt+Ug9KGITeMlfypF6Be9EYW\nTJzmkVh5hxwaAJcBVOaRvmXjgUwZlb5jQCc7CeaAFG1V9TTtmec4NIGRWwBk0/zBzxyKkAWQ\nBuBTWZvMKnkGnrsWMOaaJNuMjOaEPoRqqx7v0FSsxkUANxjpSY2sfWnRqVPWqEIrFcKetS8E\nEnAxUKkZPy596eyq0ZBHPWkA51LAFRjNIFZBjtUE0Em5ZI5ii45X1qbbIqDc1AxGHzetPK9x\n0pFXbhuq0u4svHSlYAA2nPYUrdcgYzSEEqMDjvTwcuB0WkAD7v8AOnbi2AV4oC/MRjikL7lx\nmqEOCFmxnihlMkh/Smqm3oakZec96VwBR1FId2Bjn2pWX5Sc803a/lgA80XGPRjyCKdGSxJP\n5VD5jxr8wqVZgeQMGmMk4KH1NJtAXB6U1W+Ulhg04ZVN56UhDiy7cY+lOVR+NR/xcjipFyFY\njvTEM+UMSeKl3ZXg4pmBt65NG7CnrSGSR4Xqd1Iv8TAcVW/eoCWPHpUykbQB3HNAC+Yf7tFJ\ntHrRTuOxXmjPqRmniMKoUdaY7Ozcc/Wm+c6yjcMjtUisP2lsDHKmpA3ZhjHNCzK/BG04poYM\nxIHAoCw9cqh7Z7UixrySOQKjE37zDDC0+Q88Hin0EIu2SLa3WkboFzjFIrKvUjmnSKF6nJo3\nAibO456U6OQKDjGScmmYaTgc4pxtyCDke+KoCKZ/NxtOB3FSbh5YJOStIyJuOCM03b8+KkB/\nnMvOPlPcVMsmVAzyagZvJUZGVzUiSFeduV96QwfCynIyOxpjAMpJyfenGQelRQyOqkMT14pi\nHxyDb607eFTBOc0IoGQx+tQs4Xk9AeKYx7dMdPpTmXcvJo3DaHHIprXGDwKQmNVOQQfrT2VV\nxz+FNWZmzlcUiwszlyeKBEjsOSoxUapuXBNStkYwMikYYO7v6CgY6JY/Lw/GKhkYNlVwRnmk\n3FmJPSl2Ko6c0gEOIeeoqRdrYzxUS8sNzfLT2UuuQeM0wFaQMNuKYz7VI4BokjY8JTAoUAsc\nnvRcRJGwzudQfeiRl5JOB2FCr5ikY702RQqgqMn0pAIkagckk9aFcc7sn2pjF2GRwaGYKqkn\nJ70AIs27Khtpp31OabIo8wHHFPRCy7iMYpghkkZYAA0i5wWHXpTvul+aT7nPakxjZmZfujbn\n0piyMAQ1TbhIgy1QNmTIAwc1I2PhlO3ax47U9mKg8ZPemRx7VDHrUmCuVPXrVB0IxcBSAelS\ns3THeozD8wOM06NguSRn2pCFmkPl7QPxpjAsq7vSnMwZdvRhTC3mKQvJpiI2jXaQp5qRYyqA\ndhzSKoVMEc1IG8wHkAVPUCnNlmLDgUqnbCSozUs0fysM4pm0xx8dKBXCPJHzjj2pNg8zcDge\nlO+ZlB6DPNJ1YnHFMoQMokIx83rRlmQ5YjNOjjLMT0705k+WkBCqKuAhxzzSzR7mPNBKxkio\nVmLcdxTARVLsVBwPekdDF8pPB707cMgg8elMmbzY2THfikISSRmZAVxjinKrRIwHQ9+9JgKo\n3HJokztwOBTAiVj3NLhWUnGKWFQ+4dxTWbb8retICqNzScnoanlkDIR0IFOO0tgDnvTZVAVj\nnjGTSGQYZk64GOajX5VBzk1aiyLbpnd3qnMC/wAopgTBh5Z71XUDdwM0p/doBToyNvXDUDEk\nj4x/D61UkKxsPWrEm5uM4WopoQMHNILEIzJniiFCjc8jpUsYbOBwKZtfeRuBNAhlx8o29vao\nFGFODxVhgXGccUxlwOPyoGUZLfzFJI+asq4tdrHIytdFJtmIx8tUb2164OaAZ534v8NQa1Yz\nRSW6zKRwrDIGORXyZ8W/hG+mzNeWgZk3HfG3VfofSvuZo8xshXtXJeLvB9trVk0U9ukocHOR\n7H/GqjKxnKN0fnPcW0sGYgCDk844PrVWNyEViQQTkL7V9DfFH4LparJe6ZG4YD5kPVfp7V8+\n6tay6bcNHIp3nnpiu2ElJHLKLRBs8yRj13dM9jVSaOVoygA+Xt2q0lwPso6ZIzv/AKVDvB2c\nltx4atkQUm29DHlW+8Peoiob93t5B79qvzQszME7jcM1VmjZioHD96kCkd0cBj3fdORSAblO\n04dT3p7ZC5P3jULEiRi7bcc1YmQhTG5XOGxQm1VwST3xT5F+1Yb7pHVvX2pWh8tQyck9fpTF\n0F8xuPLAI/u0jNncc7Qen1qBcqvyD5+tTKy+Scp+B9aAEkYMFV16daFyzZXAjPFOwWXHXI6m\nmDHzBvu+o7UDE2DYwPL9j7VHt8uQlOF29M09VKrjGW7Gm7WYMrDB65phuROqsxcnAbt6Gho2\nbHPPenKpcAhs46mlZdvyocRscbj1zQIr7XVIjjjHzCkU7VATAZjyMdqkZm3ZBzxg4pGy0iuR\ngfdzQMbIHRMZ3HNMDqvyy8Ljge9TKpWbYx3dyv8AWoXI27jyN3SgWoyKMLIRjcrcmpY49u5c\nkuRkc9BRkbhJG2P4TTFlaPcAuB0LUBqLFhcjPbg+pqKZWijMe35t2S2alVlcqEcFRzzxSyRi\ndi2eSdwwaYDWmEHJGePyrZs7y0Nn++izLtwc9TWHHH8rseWJqULu6cJ0oe4CtiOPaBtRj09q\nSYKVRk4AOPrTvkRsHkDrzUMe4zZxhR93PakMGwSwDe/0odgqksxBU4B9aXau1jxlj8xFNkUq\nc53fT0piYvO5WjJHr7mieMqpc/Lu655xTWuUjjCgFAD0qwz7ehJJHHtSGV1UD7z4wOcUijy5\nGA6KcZ96aq4jIPIJ605ozHgrk/3vSqQD4ZhJ/Fu2/pSSN0XdwOjUKBuAVQD6j0pDGA277vPf\nvSBD4yqjIIznlqZ5g2v/ABueAaYzYY4GB2NEKZADNu3daYyTzhCiAryevPApjszgJ/Du4wKQ\nL5yhRwM96R2ZTnoRxtFLqBJklXCtyvJpu48nGdvQ1EVyy4fBJ+Y1LvCcI2d3O2mLqRsw5G4b\njzUm75W3KOOPr71H5Y8x3Y5O38qI18xd7Hc+OnoKQuorL/pCrnJYZ3UhyzZA+UcAmneYPTkD\nigKe33SOlAxMlXznIFJ5byMPm2nqBj9Kd91QBgfWmRzBuWbDoeKBC+cjRnBwV4NM3Bl3oxD/\nANz1pWwWwF4Y5pdoZg2RkcEYoARlGc7eT1FOZVI3Me3TvTVZirlflIPSk3DcA/DmgBsalfmx\ngAdT1qRVPBPKHk5pGxuGW5HNKrGSQjIBY4J9BQAqsoVwfu9nqNpFaPKk7gcDP86UL+6O4YbO\nCPUVHs2qFUd+PahCJn4ZcD5O/wBaf91hjkMetRmXao4+UkcCnzYVCM4B6H3pjI9pW4Y7cgfr\nR5gWNjgn2UUjNtQrzkdRT1+Vl5+TFFwGiUrHnZntimqxVSH5ft6VIrFWGRuY8e1JvO8E4IB7\nUAEkiumE+8fvCnOgVQUOD3GeaikbZLkrs3dqmKJLHlgQ3ZhQIibd5mSC39KVv3bgpgkDoKXc\nI2OSTxTfLETZQ8kbvwpDDcT1cDvzSbRkncKWTY0bfKC33s9qRVMI2lQd3UUALtKyDHKsKVh+\n6Q7unLKPSmxq3mBcYprFFfAZi47djTAe+SuSApB45ofdHKzBtg6FTTXUeWo/1jZzUrMhZuNx\nPJB5oGRbmZsK/P8AdpNodhhNw/2aRUd1YgqHB/SlZmXO35UxTEKdqxtubC9qF+6OzYzTlj82\nEZOZeufUUwffwuFYdQaQCL3I5XvTf+WJI+Uj1704sAHHIFAYhQOGAGcUCIwzyx7gM+5p7FpM\nbVw4GacybnG1hEhGc9qb1YBwc9M9qLDEkaRcHgj+6BSG49Pud8inHcmUT5k9ajVAzKxO89Ct\nA+gjKkcY4yWPG01LGw8vPZTjNSu0cqqcCMrwQahby5G4+VOuKNRCRzGTJcjg8DFP2GPC7uSO\nlA+chWQDvRJKWVRkHtx2FAIbGT/GpwOM0/d83C8dd1NdsKQDuHekUkw8DB67W70xCRyuZPlQ\nkE9fSlJ2yEE5fPGKUN8oJ+Vj/DQfl6nlvTrQA11Y4YnkDBFHKqqIRuzlmNKzEtnnGMEUbk8s\nHBwDzilcYku1mCfd5zmmuojUMgOG7Gn8lmw3J6A01hJ5akgHaMY96QAIwzD5tp9Kc2dvJ2jO\nM03cisd3BzxSMrZ2KNozn5ucUwZKnzKXyARx9aSL5m3bsHHSho2Od+CM8D1pAQyMqD5uuf6U\ntRCyKpcANkdcd6bI+1lJ5B4GKcGEahyMseKQSFVOBuX+VPYBrSBWIAA9c96RlyeRtPajcGfD\nLuPWneczRt8u/HQd6QDJM4AK0u1oVYuvHY96d/r9oztOPm+lCjfn5vmHCn2pgIu9sEyYGOVp\nXj8vDLzz+NQFWib1UdqmYlsMMofXFAxo/eI0jYAzgZHWnRyqsgcgbcY44/KlWQbSoG7uaaF2\nqOO+QfSi4DpY/LYmI5GMkt/KmcBSQTtbgD0p+795tHU8nPek3IkjMy4GMBaYg8o/KC2dooYw\n7C2GLYxx0qPcvyq2ealT5WwR8p9KXUBvMoTbgleg9KdxhmUHzD2NJGv2djhs85GaGf5c5yc8\n4pgNbMjDJ2n7u33p/k7lKsoG2m+YGw33fTIpxXapYtuB60ARlUUbn5bs3WnltygK22TGMUo3\nRthU+UjrSKowAV78VIXDaFEalvvfzqDcTMVHyk8VYG1o1DDGG+7/AFpZF2vuxgKOPemMZzEw\nyCw9aesu3jHyE1GJN75X05FNG9o9o4Oc80gJZCpXLNtcHke1NWRHLsBx1zRIWx8y5YjBNHLS\nqmNqKM/WmIVWEeF2hg59c0q/MzcAKDjmkbJc4IUZyD6U18Dn+LtQMcqt5h3GkUMqbV55yVoy\nzYIO8nv6UMvyElzHz94DNCELISynOFB9KVVWHAU5PUmhcN9P7rUn3mJC57YpgIGLZdQeTjFP\nCleoyg6LTQ21tw+ZRxxT5G3LjHzZzQAJ/rCWXntSzJGNqswDZzgVEWVmwGyw5pybMjcvzNwT\nQMVfKbgnGD1NOTbv3O3GeMUm50UALujI5OOlM+Z1AIDDPDCkIl2IvmbR15NKpWNQX647Go/O\nSYFWBRl6gUiuJD02nHBpjD91JCVUbWPelDMFCoowo5Peo/MUqS4J9TikkUyfKWIiIySODQIV\nW3SbsbVxz61M6BtrZ2p3PrVaGRtpjAymOPWrMbF12nlFH5mgY0lfLLA8Z6Yp26MbcFvmGMUx\nWC72ZTjPFKp/eKSAfSgQEKNpZSxYdaa4wCCcc9afuAhOX53YFIrAKEPPP3qAEVwyoF475p6M\ni7gcnnIPbNNjj3MV25HTNMjYtlEbleMYpDLCsucD73UjsaAzxwkucjd0FMCkK46kLk5pWUbU\nUvjjNMLjmys2cALjOaYuFkJ3bh1x2ohkYyM7L+6xtH19aVFDAgLtI6mgQjSfxEYNNLeVICH4\nxyKflpGGApVevvTdpZs+WSueDnpQMbHlVABBOMlv6UoVmXIIYZzT8DaGcblXjij5BnC7V65p\nADFZBnn0IpuwKyhgd2eG9qk3KsTsG6HFJMxk2jqMUwHfLGrHPGe1RSL91jgHOd3pU8UYjtzs\nbee+ajJTYWPLehpAIy+Ym5iNw5z6+1K+7Yh6Y5GPWkb5tvOcjij/AFYyrb0zzto1EOlO6ZVC\n4IXO7NRxqFl+YYBGc07aHm5BOORzRKyyTAHIyOtIBm0SSZ3EL2OOlWFU7RgZx1YcVG2JMBSP\nT8aXd1OSCBjHvQMe0i7mIxlV4zUcTExrJv3HGcYqP5mjXj3Y4qeGFFBbaRnvnigBP+Phw4GF\n9Md6sR27MzY4PrToUIj+VN2Oxq5awmRWbnaBnd2+lUIZBb7EyeHHRT/OrVlZtdSKVXErdM9/\nep7HSp9QmQOmN/8AKvQPDfhPMu0Q42rjd6VA0QeFfDKiN/NYk/eyBXpGh+GVVdsS/u+vvV3R\nvDioqJtO3OeB2rudK0FYyu0YU8gYqHuXYg0zRVWLaOT0NdPY6eV28basWmmLCTxx1zita2tz\nnBHHaoYcpUjgKcLwKtrE3Azx1qwsLNxip/L/AHfNSaJEdvCFyVHerEcbO3Jx7U+PacAcVMsX\n8XakWQeWd554qVIdvzA/NU3ljjjApyqqybscUgGCMMc0vl7TwKf/ABHGKN3zbcc0AJ5W3kmh\nV9etP2/Luzx70inOe5pDQbdwOeKcrDbgjNO27iCSKkjjU/e/CmIYzfMD2p4XzOc8UjZp0WOR\nmkA1o+/anAEsMdKfjdx0HrShewoGNaMjBHHNP4wQw+lOZehNO7ccmkIaV2j0pVyyHnBodQ3f\nFOXaeg47mi47CKp9aFU7+tSr90YpvPPOKBg3oFwaXB4bbT41DdTzQVZlODxTCwxVGc/nSMqm\nTjkU4KV4HPrTkj5AHWkHQZtyORj6U5kwvA+X360bTuxnvUqxszNnkDtQKxG5PljHNOVSy5qb\naFXj0pqnfkjigLEch2cjpSKpmGeKm2jjcOKPJCgsG/Ci4DOFXaKcF+XrzUixgx9aM7Y8hdxp\nFDVG0ZFKrZ4xzSthk96Rs8Y60AAxgbQfxpyqDuHc0jBsgCpFXZgkUAQ7CuATzR/FjHzVZMYd\ntwNRMpOQvBpgEf3eKjLMWKnmnxq3RhtqXyx5gIPFBJBwq5JxTGycZ5FTtGGYLt4zStDtOAeK\nQXId2CPSpGA47Zp3ljIGKXaAwz0oGMZf7p+tN2mrQKbgvSo9o8wgDOelAxv8SoRx6094jwqj\nIqRY84H8VKR5jcHGKAKnklWOfWphv5CjinqxUcj8ad/DyM5oAijyJBuBAqVoxvJPJpyrhQet\nD5OStAEbRsFzSRszcHgClZn24BzTlA2Z5oAU7z07UxcnPGTUzbWUc0xWOMLwaXUAVWztzmka\nMkZHrSBW3AZwetTc424/GmMaAFHShVIQt0qTb+7Kjmm7hxuBxQIGUMuScmmJ97DcDtU6r8xz\njFO+Tadw57UFEbptQEgbTUe3ABAxU7SCTahHApm0OSR90UupNgRdq+ppzZHvTtu7BxikVtuf\n4jTGM2Lgcc07am7pilEmzPFPA3rz0pdRgrMFOBxTTGOp5zTyvy8qR7VIqrtA7UEjWjVXBBOM\nUzZufJyBUoZTkUz7zEE4pMoPlX5hRIrSLlRzRjapFSI2IxjrVEkatt6DpU+4NJkLkjtUR+TI\nHeplPoOTSsAisyTEkYB7U9fm+bmo2kKKQy1JatmM55xSKJBsZvmXHvSSKFIwOBSKC43Hj2py\nruYAnAqgFRlXB206RPL/AHgHFRurDO3kVMJN0I/lQBJGqyJk8E01SYm65FLbrujOTj0pphZc\n7TnFSBMvzDOaVlIYEnio1ztGeDUsjBl560DZJtMWWDYpVxMpOefWo/ux4I3UQY2+3eggsx/P\nHtz0qrdXMiTeWRxiplk8snFNmUSSB26ngUiiZbgLEqsvNEUZkyzcc8VE5ZWUMM8VZVlZME7R\niiwCLMYpBgfLVrhmDggH+7VCOUZKn161aAVcMOaVxj2I6gflTow5k+VttN48kkdc02BvOyAc\nNTAuKCucjJHrRCu4ZC5qNpCpAJ47mpI2Kj5TQGorIZG4+VvSnR7t2D070JLtyW5I70FfNXcr\nYoGSElskDjoKlXlQSPmqG2fzoyACOfzp6t8xBGQKAF4ZiSMU9XwpwfzqGFgzngg56U+Rdpzw\nVpMBWmEEgRhlmGRirKttQt0quEyu8Dc3vTyrSY/u5pAS/eXd39KZuDKc/LTlKqp6g1Evz5zx\nVAOTIkDZJFTsxUgY96YAFUAjPpUp+ZQOvFACoRnJHFK0zbsgUxd+4LikYO3KnHrR1GStJlTx\nyaX5cbXqIsY2x14p43KucUgHKqqcZyBzTm2HkHGeagiY7sk5qXcO4+lKwEgUryO9LlW4B5qM\nTYkAIyKerHd0oAVkMuA3FPVhkADgUi5ydwxQzDO1Rg0AL5W0bwM+1LHk9TgUMSuME1GG8zGR\ng57UDZMVw4HT3pyZ5y34VExOScnApyyDv3oELuPqaGXowNOLKyE5xUauqsPRulADlmLJk9qk\nWQyJnim7PlIzgGmhvJXAGaACJPLU54HWpDHlgwqJdxU7h1pQ20Dk0CFVg4J285wKVV2ybgOR\n60qsw5xx1py5bLE8H0pDGvCGyx4NSRqAcjnjGKYFyfvZ9qk6fd4JqQEGMYPQUM2SrcEChhg4\n60xlJ6cCmMexLcjrTtpwSfSo4dy4J5odXViQd1BRKg+XmkXaASBSJkAZFO6ZHSgAT5lOOD70\nkknlxljz6ihgWXjjvxSZHmYIyCKZI2OTzIQclQe1Ob7n1o/DgUrMRgEY70MBIWLZUjGKTzBx\n978KUoWyRUUgkZSy9MYFSMso0obJGRjgClXpk5Gay5Wu4wEVj5nXNX7Zp9oMgAPoaYiwFxjD\nc1IvzcGo1wckntSb/lz0NMCRhngGj/loCT0qPce3T3p64Y8/mKLgKuJMnr7VJxxxg0zb1AOK\ncvC4akMVvnyaXd8uM8Uit8o9ak8sdaBdRituypJzT0UqoVmyfajjd6UqqNx5zT3GC4Ck/pS7\ntq7sUiuMkbefWkZdwJJqRDizN1pU3bhSKo7c1C9x5chByeOMUwLPPpRVD7c3oaKLBqT8HBxz\nTJrYycqcU6Q4UEflS+ZswT0oAYIyn3jSxnGR2NKy/aPmBwR2piFudy4IoANom4Yjap7VLKyi\nPAFRwxhoySMc80nG47jle2KQAqiSMNt4PrT422kKOQKimfywFGTmmK23uQaBFlpNrccGkjjK\ntlm602ONsKc5JNPkVth3HkGmAyS1Gc5xSbfLXDHmnMp+Vqe+JIxu5qgI8BgqntUm0bSW+7TP\nkVhg5pwztwTxUsCNhtkHy53DtSnIOMcUMw6AZqRiAgA60ICs25V+tHl5xnBFPkUYPzc0gxHh\ncbhTGDLhCqjjrSRqTyRniiSRlGQMChZgigZpdRMU/MMjpQG7D0ppHmd8CpViEae5qhDAGVAS\naWR+gI5pdu4nnNMOFUnOTUjG7gzkD0poZ2X5jkZqFvM3BkX5ScE1IVK9uKQEn3cdxSLNkEYx\nzTVfgsBmpCwcAqPSqEP3FUAQDd71CW4Axk55pzybm4GDUYHmMSDjmkMm83apAOCKa0hZMsdp\nxwabMm0dQfemMztjBGPSgBgmxH147mnlQzDutQswHyleM07b8vB25pAKzbSU5J7UNMVXZnjp\nUhXoRycVEsYaTkZoAljUfdbqe9JNHwVJpykNndyKYGXnJyfehgQrDtXcxxzT4+JNynNPYqyk\nGm+T/d5NIB0hEa5IqMMWO4c/Wn49eRTMlW6cUAPRirDJ60Fhk8Um0bhmlkj5yDgd6kCJpMt7\nmkVhGwx171E2IdxdunSo7a6W4jZguDnHNUgLe48+lJHtKsAOaYmWGWOMU5ZFXIHcUmA3cZPw\np7XCrFg0zG1Tj71MaMzIQ3akKxEJGzyeKlRl2nmo32xrgmnJGPLIHLe9UMmVsR0kjN5YA61E\nW7dKm2llPr6UwK+Rkg8mkWSONvqPSnkBZMgAsaSSMScKRkdakBioJGLBcL0FJMu3AP51b+WF\nAw7Cqkz+Z855pANyN3zUNls7mwPamCNt2WPAp1wRtJHFMCKM7X4PNW3hUfMRuJFUIWLMCDV6\n4dtoGccUgKu3ywWPWk3cAMMUrfdGTmmyTBUAYc9qoCKS4EeQelLbbGjzjk03y/MYAjNJL8qk\nKcEHtStqBFNhVIK5IqKPc2SOnpUzvu/GjyWXkGhgG1Pp7VHcR5wRjFOKkkk9RUcnYA0ASqqN\nGFPBqo0QikLMfpVh8qoIb8Khmk343L8tIAhcdMDk0jLH5hNBjLgFRioGfMhAGRTGNdSc4Gaj\n28HjNWFbCkDimlhu4HagGZ1xbjBOcHrVKaHfwemK2ZIw/wCVUpowJOBSEcRrHh9brflFbjv3\nr57+M/wdF5G15ptrvuW++ka88Amvqy6h3DhPlzXO6xpYuFPG30OK0hJxZMo3R+bGq6FNplzK\nJTsCcEeh7g1mLNtUsPlC9c19gfFj4I22sxvf2MYjvwpO1QAr/X3/AMa+V/EHh2bR7trW5t2g\nlVuV75+ldUZXOaUGjImmyyFWyVGDUduBHIcnknljRIypIFKktjk+9N3AMByMnG70rUyEvId1\nwqDB285Hes66jbzhvXG4/drT3eSwAOBjjmoLzZM0bZPmA4PFUIoH91C6n5wpyMdqlnheztI5\nXdTHKePxpjqY5Mggb+1SR7PszpMS6jop7UxFJduwgjYmeHp8XzQjGcdc1FJtaMDB65UelTFz\ngchU24qkA1nVsqpOOvHWo1myp2dDxg9akb52i2YHHOajbK8uNvOBx60gHSSM3yjh8YzRukUs\nGG4DimKQpKEYlXg5701JHG7jJPUntTKLDMAjFQMYz9KrKQV+c4GalVf4WbqKj/1O75N2T+VA\nh62wMe+M47fh60xkbCx8beufWpYpjt5XYvT/AOtTJlbdxuVgM49qBlf7k5JGWxj8KReuQM9t\ntPWFYmUIdwPUmo5iI/lzjcM5qgG89Fx7j0pnmHYuThScZNTqowrnmP1qJozJJhTkDp7UibiN\n5fTbyDjcKdFuWQf3QeabGpMnznKgfrShiIWAbDnpQUEkm6chTgdR6U5VKttPf5jUYydrKpOR\nhh70qwtzufDmgTHRgrkkKTmnGSPcfM+YnjI6Uzy2VshtxC8ikUmHaNoYdfpTDoLJHGvGw468\nGkjiYRlgdidqlDBmJxuPXFMY/dUkqDyfakIgZZFYbiME07bnOWbjn6+1OmYMoRRwejU3a8bA\nZy3GDQIPuZ4yOv0pN2ehwepoZGbLA85xikkJ3A9x2pj6CkRmT5tw3EfNRJHtLgAnAyKYWDbC\nfm+bp6VJktJgNh84+buKLAQwyptUnO7PWns0ayK4csTz0pfJWNsNgYOetMZNysdwQf3aQCgi\nRvnUhaRVPzhBjngnvTh8sfB+XGQ1Jk/K4cgMcZxQMRQu75u3XHrQqqSMN8w6U1lMbHf97P6U\n122thVwT/HTGTSSp5m0nJxz70wMPOx0OO1D7XYEYIxz65pGDfIGG0UdRaiKHRzlgvtSou1SQ\n2T157U54t0m12U45wKiaNvLKZJUmkIkX5JD5i7h1pJGjkJGzYxHBxS+Zt37lLYHFOdd0fCjj\nn3pARNnywr5VsYHFLI+3Hy4bgdOtJtAUMzsDnnJpWVZMMGYqD19TTQhqthiJDjPRqkIVlIOG\nfqG701dshUOMsRnaabsJkOG246j2qhj4trqTjkc59cdqjeZdpkEXJPanbf3QUYUg5/Cj5lbI\nG5G9OlJgRnDMuc5xT9/zbW5PtTY+/G85oVm8okLuOfxqQEWXyWC/xdqRW2hmYcg8UrKFkGcb\nvrSpETkl85NUAke7qv6U5pkkAB4Tuw9aFQ+YwVgij9fakCqYcRgoQctxTAnWMMpK5YY4NNVu\neF2jGCPU1b0rDt5Zbndwvc1YvtL2HcThs5Ax0qQMeQBQFzuI5K+lWYwjZQvgHpTGAWUhlIc9\nc9xUkFs0gygwuc802DIrhRD93JA6tSLIWxtXOR941bkG6MBACc+vWopd0MmGAJxnApAVVTcu\nzGw56HvRISyNnjn7/wDSpJZWm5wcj1FNGzaY3JQ54HrVACyFYy2N3GMURsRIcpvHamN+6XBb\n2pNjKxYHAJ6CpAcyqG5zz/FSIwjy2Mk8EUsau5I2fJ1NMWRXRiRwO9UAfNG27bkHjb6UoDLn\njPel3blXa3JHPFJ91SgJB9akYrSEseenA9qQKY4yWXc4P3u9DrwePlPWowjJtO7C9KdxEu5m\ncMFyCPumkVkjySPmP8NNyyShNxKn+IUrRskhPUdjRqIcqqCwHCnnmk2lsn7qUSZViAyOSM7C\neacjLLjdGVGORQMauT8oG1c4yaBGPukY2n8aGj8tQY/mz/DQyiX1WXPQ0wGsp2tlflB5NIr7\nnVVX2DU6beNpXhe4pku7Y+0ZPUMKQhdwRmxwfu496RVIYuOHAxtpZMhUUjDEZz70SK20sOWI\n5oKQqqGIUttJ5x2pCzhmVhu91pGXMIbBp8fzSDBwMUxDVzJjkEDtTiflIK9OlM34EgK/N2xQ\nGLR5ZfmHHFACq+3JJ+WjJ5CHBxkqabuO4bMKAMmmrzkg7O//ANakIa259roOSfyqx8rMr+Zg\nj7y470gztK5wW5ApqHC7cEgdaQDuHKsOCDgGlwWlk4ypGAPemEruIxsHXPWhHK5Jzg9F9aYB\nGv7zeZPcLSr944IT5uR61FtCsV3cHkjsKXjkv0B3ZoAfIWKnpGQe9HMIJC7lPPHeh1ExGOWb\nnax7URrlQofPPAFACJtaRiQyYFP27IyThc/w96Y+Y/nV884Jo3KxyT24wO9AIau13G35QO9S\nIm5Cy4LZ6U35toDED0GKjbAkwBsPXjoaSGP2lmManMjHhakkDjajEA45x60zaUIOMHqGprbn\nbgEbf1pgHlmP7n3up96VgZMBeMnPNPjYspctt9Rjmodu7LbvlBpDH/eb5xtI4zTpl3RgZUnu\nRTVxJu53PjhaAodQoPQ4OaYhSwbB6DGDSspRSQ2MdM96a8YXcccf3T/OhVEj4LHbjI96QhW3\nMoLD58dqZyCvAU55pZIywDhuV4xTS4ySRz/WqAljl3FgwDdenakZ0VBh+M80yRnkjCKuGPUi\nmtEGXyx8wxy1AEgYqoYncCeKcu5uQcIp5zUdvKpUDGccdOKkXDTZLYHYLQMk8xJOUG0dt1Q+\nYGjZ+uOMd6RmbJAwB/e70nIOAvOOtLUCRWAjU5245+XvQpZ5DlSMjIpFw2AD2pY97ZAYbsUg\nGhm3Yzu/2WHNKwG7b1IOfpSzMybM44796bFvVi7HHHK0+ggXdu3wnH1/WlILebtZQCvpSGQq\n2ANoYZpkMLtv5waAJcNsUqRhRgD19aam5sqvPqaV43LZIzH147GgLuUsM5pgK6llA/hU9R3p\nBGRlQ3y5zio1dmiCj5cnFLwHDLn0BoGPWNPM3EkjGNuaRm+VSpyyngGmsqq3yrtAGfxpyyeY\nmWG31pALtG7zlI3HrHRGOkhwHznbQqqygruHrmhtu0NjA6UALuLQySq2AB070yHJi4PelMag\nYVs7uKbtUYQN1OePSmAsR3OzH8TTl+aTqFGOKQwl0JjI2ZxzSCM8BuMUuoC+WsbMzDI9jTt2\nxcsOf6UrfMoGMDPNRMQzfKT14JpgIoYqAvBJ4PfFTxqzYQcEHNM3f6RjdzjtQ0xVfl+9nrQI\nIpHjVkOH7DjvS/MwBJCD175pysGX5XxnrmneUv3mw4/u0DEbaxChQO5pN46Zwc0YC5ZeRSbt\njHeOG7igQLLtZiMqOu7+lLGRuDOuFPpxQy7txJwAOPam+XllLnK7fzNIYCUMzKVIX1pzSO3O\n1V9GoCjhQMCmKpjfaDnHrTBkuVXG1sk+vam4AyuThurUNtjmQhCd3X0pFfax3LkAZ2+tBIbl\n3hxHsVeODT/M2udnGec0iIFYKxyW+Ye1MkZmZmPAHX3oGPTaznLZBHIFObG3DfL6UyNhJlkX\naCPzpf8AWSKRxjgg0DFZVERUkEsc8etRnLKvt1NPWMj94B0PGaWQqwK7SGPP1pXF1GSMYYlW\nPqx60rZkO1hlem/3pqoY93Bz0HfFETN5JiGSd3PFIZKmVZRjd2qLcHkAjGzuRT9zKwA6+lJH\nLnDhcgjjHXFO4A77vnxlumRS7wMKOm3rjvTIQWV8N82eFNP2leVPz45FMAkYKqKRhj2X+dS8\ntGw5wo+761HuMZzGvJHINSLuOc8HGeKBCYMkeY5AoUZPtVuG0/eKh/eDG7PvUMcQaMmNec8+\n/wBau29vJ5IkyS3QYpAOVdylW4DdAPStzRdLmvtiIrLGOOB1rQ8O+F5ZUjneLczHjvivT/DX\nhd41XMQGTkkDFS2MxtF8LgRx7Y9rr3Neh6J4fVCrplgRgrWnpXh3YqrsAbOSa7HT9ICogVMe\noqOYpIpaXpGYeRtOa6TTNOKsO3vU1tp+2QAcL6VsR25XgDipNEtBn2XaoTcCKsQ5VenApqxs\n/arUUeeDwcVLGMhQqd2al2mRenNOMe4YHBqSOPaoZhkUi0MhjT+Lg04A49RUqIpUkLRwvPSp\nGKZAxC47U7ZtXJ/GmbgcE+tT7Cx5oAh2fL6NUix4HvUnl5604R+9SMg8tn6cU4Rlfr3qb/Vg\n8Z9KXbvOcYNAvQakYY5PGKkcfhmnbVX7zUjKZDweKbCxCseOvNSww/N7etOKjjtUnAjx2pjs\nJtXbg84pQu4gBcD1ojAVuvWpN2W4/lQIZtb7o5pVU9ccU4KVOQDTlzIvy8DvUsCHd1GKkhU+\nWwI4qVI/LBPWkZyV2jH0pANKkJnpQuWXOKGY8Ajipo2DJgigBjRsF3KKNx+UNwDxxTto6ZxT\n4V9Rn0oBERXa2DwOlOXCE5ODT+/zCjYHI4zQMj2/KWPrUqgqQc05XDNtxxTxCWzzxTAhaPbz\nnvSttUfL0pVXLEsOlSR4Ocj8KGMhKhsLmnCMbiG4FSRxoBwdxomw3DcD1pEjI492TngU9Uyp\nAODUUSFc7X4FX8L5O7Pz0xlSRQqjnafSmhSHGasBVPU5NDQmT7vB9aljIPLbcecVJ8xXaRk0\n4IcZPJ70+NWbLYxTAjjXaP6UbmVuBxT1x3GD70ZZgQo49aAGHdzjrTY42YZzmpVXavznmjmN\nioHWgAjhZ+BSsoUnvTow8fy4P1qRYwBjGSaBETIFVRj5jTDG3IPNWFQ7uetOZe/Q5oHYgKlf\n4ecUkcJb5u9WGV2UjpSq3QEYNAFf5t20cnvTlUZJxT2jKyggZzTnyy4XrnoKXUY2OMNGc0rK\nCgGcCnLH0HpS+XuUkcmmKwgjXdhTxik2j7ooRQOT174pwGV4GKQWIFjKvgjJqRsjj8BT1Xd0\n6+tK1uduWNMLEDxnbyuB608RoFGOadLlfl6r3pyqNuFpDQ2QbecZpFG7qakXGTuNJxs+X9aY\nMSMHdil2kNyOKeoLY9RUhzxkcmgTI+F6rkUi/vBnGBT2Y7SDQq/KFzxSAZ5eV6496I4Rtxnv\nmpvunGRikkUnkUuo0xjRhc4NNRe4HNB3dhUsY24JODVDImjZcAjFPMR29MgVYaTamGGTUIlL\ncE4FSwHN++6ZyBUbSFRgDNS8hhjgd6i484kdDVAEYyp7c0rYZxkc0/yxt6806Jd3Uc1LAiYY\nYnOBTkUjnqPQUk0Jf8+1OZWhwR+VAEbMQeVIOanwOCaYZC55WlyW4pASphmOcYohjIzzj6Um\nwKOaepOcg8UAJGrZJPAqRVHOeTTWbdjr+FKzY4xhqroAKQg+fpT1x/COKYys0YG3NPg4OGXF\nIYLujzk8VOJDjjpUG5FyDyKmU/LwOG6UhCSZ4IG6nNkxqR8rUerDoKNu4ggUwZJtZYyQO1Ot\n/mUZGCaSPO0gmhf3JQsOM0MVh8iGGZV+9T2UTNx0U5FOkZX5X7xPWoQzQt0ytIZJvyCWpyMG\nzkZpFj3Lnue1KjbXKgZoGEcbHjbwfWpl3RSY27lpm54255FSxybSGP3aAJo2SX5R8pzSeSbe\nbjo1Qt/rNyjvmrP+sXdnPtQAhVkYg8nrgU+GQP8Ad696PL3fMCQR1oh2jttPWmO5J94YHXvT\nbdjHLhjxnpUsKhlLA1HPET8ytg0hdR6ybZjs45qzCxk3MOlVLdjjLHmraoqrkA880ihsn7ti\n+Pyp0YXb83ANKsm75T8ppdgZcMc0E9R6gJwOKk3BVzUCzbZOOR0pVA3YLe9AEvmGRW7cd6ga\nPzFLoeVOKkLbeOueKak3lsQoyWpajF3fKCTlqs7SAACfWoeOpUAinrJtX73PpRqANIzN6Hpx\nT95T73P0qFMq2QfrUyyDaQVyfWqGJkudw6CpY2LLljgVD94fKNopWk3MRS8wH4CdqUynkbc+\n9Mjk+XOM9sVIqnae+aYDlAbHangKWwW5qNlCxgqfrQCOW71LAl3/ALwqDxS/J/EDUKqfvj8a\ndu3Ac5PpQBIpO47jhaecMRt6VFu3qwOM0qAtxnAFJgSLGeRnIoX5uSOe3pTI2Iyc8U4y4XIH\nOelIBBhlPFOVFaMA/eXpTV/dy5IwDzUiKGb6mmMcV6A+vNHysxXnA6UkbCRn9BwKVWcYC8c0\nCDkMOOKcwBwT+VRW7yLcPHKNo6huxp7fMN3bvS1AkZm5bGB0pIxtUjPFAXIGDhaCBJu2kjFM\nBq7Y+e4oVixBFIF4GRR5ZVwQ2PamOxY271OOppqqPLOTj+dMWbaw9elCjzM7gcg4qA6j0ztp\nVzu6YpqtsJI60q/dyxzmgocJCvVeKbNL5MRkZS3sKVV2g5OaeWEiDjj3qiSK3mE0YmUnY3qK\nVuu4cU5sS8dCKhlD8LjjPWkUTpsdST9KbJnoRwBSxoNpPQ0Ixbg5JFADYs8joTUm393xSqC+\nWUYFIisq5HPtQA9cp2+bHWhk+Xio0lZWxIMGnqx35PQ0ACqMHH405du3nmmBRk45Jp7MAoyO\nfWjUQw5UcHB7VIq71LA/Wg03lfUCqEP2nkk1IWXaAOvvTMDbwcinxruT5uCOlSIerbVxScLI\nCaRc+maG+boKQDmXdzmkUe9Kq7unWm8YyadwJGyuG4waazbhT4+24/Sk24y3amUOVtuFPBpA\no570LhhuAyKMhfakMbtP90flRTt59KKQELBsbhyKaGL7R0APSlaQIcN09qXKbRgc1SJFZhuz\njHPaozlpC2am2DHJxmkkh242mkBEsqBW3Z6Vn/2somwoLdsYrSaJNpLdKjW3h8wYx9QKQFJr\ni4uJQAML61fjtH8ks5yaHVYeR2NLJMVh3b8KewoAeWEajBz60+SRW69MVBBIjLu+8aReeG6Z\n6U9wJnYMoGeKbMCiqEOfXNNGFIGMjPFKcrk55z0piHbSAScGkQiT5e9OOREG6k9qjH3uAQTS\nAURHOM0442+tNXduYDkd80SKVXA70bAI+w84xUUTFshh+NPjU7cMec0MuM7aYCMcqFUU1Yxt\nwwp6oxjB7il3MBj1pWGx0cY8v5uKerY60xcNz2prLtbjke9AgZgq5FR7SSeOKlZkXimxyFmw\nBxQA08rgdPSnzHYo44pkjCNulNZiy80eQDDMu0hVyakjcqnPp0qJW+U7RgmlRSOrUWsAgYyO\nBjFS+XtYEjApcY+bOaf80iAE89qAIpsfSmbfkOOMU+RlTcGHNR7iy8DjGKQEcrbVHGaeuZMY\nNOaHbAHz7Ui/dyBtWkA/aUPXNRiZWZtowOlNLdFDE96TiFSPvZ5pgPX7vPAoVUaPI65oWTeo\n5GBQvynIHFDGK5XIAXPFMjDdFIDU/k5JXApqqq/dHNIAddv0ojXK57USfMODTASqYoEI2WcY\n6DmgSMMjr605flbg8UHC8460gI7grKvIyB1pilRGdqYqRlLR4AxUfzIoHU1Qh2V25YUyQBVB\nXg0rzBSqjkmnTR5YHGBQMTadyntUjjdH6GozJnA6inSZwAOT60rAMS3U4ZxkihmVizEc9Kcz\nlVKkc4qEZ70wFZBGAx5PanxzEMSfSmgKwAJzilaEyMSGwMUAQMqtcjPPenfd46EjNSShQq4H\nzDvUe4HhutSwHq37vJGVPFRts6dBTmDbcAYWmbSynjPNAwyNxOOKiVlkyG5GeKklYqmB0plu\noVQxGaLCHGNU24HekkY8qw57USznbuA+btUbCRgpIOPWgAVR06kdqgfG/Dc1Zkbb93huhqpN\nH/FTGP27Of4qgALMT1z1pymSTAbgetPdRwAcN/OpdwsNWM5xj86jeJ4WILe9SQM247qjfc0/\nPIpiGqTu3HnNKyru3fpTnYAYFNjG1uRkHrSAr55OeD2qJizHaeKlkZfMpzBbg5HygUDGIxI4\n6VFJHtbcOKfuCsFzxSSdc9RQIiYHHSmhRzzipGzwetMZstyKYEb+3SoiowTVncu0ioehK9RS\nArzRBlO0cVlzWokbLfdraZG2nHWqckZVMY+b1oA5K/tUmmMP8Y6fSvH/AIr/AAk07xQGkki2\nzYJV1HzA+p9q9+n02NpPO2/vO5rNvtJjm5ZcmnzWDfQ/O7xt8P7nwvKEuYfK3dJFIIYetcek\nZjUEjkH73HNffvjj4d2utWdxb3ESvFIORjr6Aehr5Z+IXwZvdCkea0DS2KnAzyyfWto1X1Oe\nVO2x49eR8/KMe9VJPkwzE5zW/qGjy29qFZSrZJ3DkfnWPNHhQSm9a64u5g0Z98pkkRD1U4yO\nKkmXc64ThfSn3W1m3t6YGP51Ft6byV2jqPSqJImcMxIT8PSqqs0keeM7sAVq3WmmSPej7QBy\ncVm+SVbbjGOfrVIA53KGfadtOT9zdQsy+ZGCCQe9Qq5aUPtxhcAd6mEjSDduCuR900DLmoyx\nXkrSxgIuMA+tUgQ0YTkc8nHWrbRr9hU7djscbf61V2qqkK53Kf1qkLYgY7Wwwxg9qnks3aNZ\nVI2f3s9KgkmkuFdgRhTyMUrNuhWMScfeAoEKI9zOVOf9oGnSoJVwvOwjNRqduQDgEZalGFjB\n34DGgY05YLt67uvrTZIxIzHy9wXjJp+0srZGMccdveiPdgqPTn3oArPH8pCgEKemaEYOuM4O\naftQIe+7nApEjby1VV6nO70oGCxjc0e7bkZqFtqqqZyc/eqaSGTa+Gw/rUAG4DA9jTAl2KZB\nGWIJHamJsZmbkhOKItikGU8/3qGwpLKmwE4I6596YriqokXfu2tngUqqecEqM8mnuyNtZBjA\nxmmeS7Lwe+Q1K4iTyzDMMNkYycDio2jfydrDbLv3DHPy09cW/mMW3SY6CnWLSXE6xcAMM7j2\npD6FVWb5CVPpSyM+HJUKc4BqWeZg7BgoVTsGO/vUEgbbgcn0piE3mNxkH69qZ5j7mZV75Oe4\nqyRsjUOQaGtQFLtJu44VaYEBCliWG3cOKd5gWR1c52jGcU1UCv8ANzkcZqSNfvndyw5+npQM\nbvDRjC47YPP40z5o2O/bkDNPXMakKPmxwM01lEjfKvzY5zQAhb92FB+9zS7Qse7PyjjApG/d\nqrDselG4SK5A564pDIVfafLwWHUGnSZPyk4okVmxtTHHSg42szIQaBDo224QIdzD71IzDjdn\neOD6Uqyb9wdSGUfL2pvDNhgwLCqAQZ8we/epGZ1XHQ96SNtwXPRTRGw8wiTnNSwE2tsLBvxN\nJ5g6kjA5NG59pA4XPQiogytw0eCaBD1kwrAgOGwce1ETbNy5wqnNSxrt4BA44zTN0bRsHQnH\ncDrQAnl7oiRwc9c80bdsY3MMZ+97UmzzPKGNhJ/IU4Kqs7bMuOimmAkewyMG+bjikX9zCVVu\n+SPSiRQ0LMp/eAZK1JDJDNEi5xxk5o6gRNnyy24Y9qcrMjByNzEYx6+9N5VWQBcHqaazAnOe\nVHDelMQ/DK7gJ8xGemcUm5dqDaWYdTQy7lADtuxnPr7U1mVdq7dp68GlYoVGHmtuGEbg0kkj\nR9PuH5R9KCoUk8k0m4yMFXHTkUtRGjps32a6Y4EmK3pYxOsZI3FxyM1ykbAyhChX3HeuitzJ\nIiMclgOGosBm6gmHYOvQcCq8GoeRtU4dm46VfvoTtJDb2Hr1rGaIQcbdzlsgelPoIuNdKsiY\nwCR8wFT/AGuP0B/Dms1sRtgcso+ao2mPmBejmpAs3jIzFd3BGAR2NV1ypwTlh3qSIecdpPBP\n61CsajzHZiCrbSKYXJZJQyhiOM9SKWSQmNTnGDknFDHZtXbuj6g0SHZtDcjOdtUPUb5mWYlt\noNR4VYf3edwORnvT2K/Mx5OentR96Pnp1H0oEDOFKk4DseaWRmUsCcjODUfyKV2ru79OlSKP\nMZl29RnJoKQxgCwAJA9Kcvlhzz06CkTcTjq+MfSlHmbSwUDb1bvQSDTNkHATHam7jLkE4bqa\nSRsD5U+8MmhVC8nlsYpDFlbzmDcDA4OOaZGxaNhv6mnzM0MQBXP0FM2j5P4ifwpgOJaMFlbB\nXpTGeWQoZOGY9RT9p3cDgHBJ60siluRz7mkLqNOV3EuVwcClhZW5Dex3d6RgFlznPH60m3ar\nHqzdPrRcYrMI/nbJApPMO4OvLE/dp3B25bfxj8aRfmwq8leppgSTN+AHVajXMi8AKB/FSvjy\nR8+CWxUf3W9h1pdQFcNuLEhf50I5kyQpL9SKRZAy7GX5c8ZpwZzjBwCvQUxoFmV1G9Oc8gdq\naMeWVxyDn8Kc+VjQfhTkxCzBuQe4qQYxV/eKc5yOtJtaNsPxznikLhVKDhc5NOjZiDuwVPCn\nvQSKW3PgDDjq3tTc7pQq5Hc+mKRWcZ9RwaGXYzbX3c8j+lMYNlh0/dsfvUokRcEPgZxjHOKk\n3Ky427V7f4U2VWZtpVVcj7p7D1pCGsvmKTjEfYd6eGYoFQBSozupFVlUKW3KO9N+ZpCMfL0x\nTQDRuYh127T1FP5EYyAEz96m8RqwCZ4xxTBGFVVfcT2FIdiVcRqzE5JPeleSNWXHzZ61GWWY\ngMCdvf1qRsDBxtHXNNAG5JNwzgY60kbFYQGJDdfwpv8AeCna3U+4pEUyL1+UdKQD3AYjklj3\npkajnJ6DLUqswdflAI5z2pefmIOeKAGA7chRuDc5qUsJFO1QPU1HEu5c7sgjkCmhG8slOcHp\nQIdu3tlm2sOM9Rj2oSPac+Zxn71J83LbdzYocM0aPj5+hWi4AzHduU45wPQ06TeSSR78UmNo\nw7DPYUNhYwu47s/e74qgFGUjbccbujU0yCPbGv3WPDU4L8wfB8v+8etIn7xuDvz6ChjHoxEh\nXhVpMovzDO/0p21STuHK8EUjYP3k59qgQigrnI4bsKUMqof4u1CqAu4/ePGBT2yrou35MZ47\nmmMjgyrHaQQOaVQqt5rcN1201o/mYYAB/hHajzkaMgAkDgimIViWZSVJDHhacoZmHmMFVuM0\nyOVjIuxlz6Gkk2+XIrnvxmgCSTC7lHPbc1JtDAMXwfen5UwgE7toHAHWq64cHbkHOdrUgJY2\nKNtD9TyD3pI5WZm2nocFakaIAGQ5C7elRr8jgHgEA0wAlVY5GN3Gf7tSLGPvHj2/qKbJlYm2\nYO6m8qoUHBpgHC8HJUnNO3YjKhT1z0prfvFHVR3JpwJKhiCnGAGpDEZS2WYhF6AZpI5AIztX\nn1ojjVhyCxHb1pWlh+5t2t3HagBV2SNno2OlNO0MXfKr6Chl8vdxyRkNntSSKZFU44I6e9MB\ny4KbV6ddpqTaNuQdw7mokYMO+em2lP3R8uHXk+4oAN27adpOaRh8wVeST09KdGQpIY7gwzSM\nN7Kqn6etAAsW6ZsHbQshXA2h3zSswZcqPmBxSbgASQcjsopAFu6MJAyYJ65p6xx7xGG+bHyk\n0xl2xu687eQBTVZ5DkLnjFICZZNrFRgrjBxTFYScL/D2pwQIoKbQoHzc81HGwkLBhtXODjvV\nASco2Nu5W60eXuhBUHIPSmq3zKhGFXofWpvP8tSVO3/Z7mkIibG3zA+OcY96WQGMMrjLMOop\nrMswJAwh6L61Io3scHCgdD1oGMG548ZC46ULk8sMjbjNFxjci56DqKbliUC8gHP0/CgBzMI9\nqk++fSnMxVhIPmU8fjUczeYCwGec5PWnfw4bHWjUTJFG63ZQu2XdndmoWbawCkk9zinRsTIN\npyuec96NyfMAQy56ZpALGAMqz5bOdtKshbe4XcOn0pvyLICBxjr6VGrKnIfAJxt70ASyfKuR\nk7uc+9RMx8yJlyrMPm9BUjIVUAHODkCgKOjHaG60wEyscx4Zxjk59aRWMSkKfpjsKcyssWxB\nuOcbqc1uUwTxgY4pWKGwRlGBbnPJNSLH5J6/eOAeuM9qIkEjhTye2OlTJD8sbFsMDnGKGA5b\nRgp3cMOq+tLHGfMIQHgflVx1W4+6Tn+53zWvovhi4vZl3Dys9vUUElXT9ON1kQjJPLN7V2/h\nnwqJI8ONoB4JGSa1dD8E/Y4wUUPuYEnHSvTNJ8LiOACOMFmGd2OtLmHZmP4d8L/Z8KeWByuR\n2rvNJ0crwFzzzVvTNFeEKuPmYcmun0/S1jI3Dnvis3IpRZXsbELtymPat+1tOjHj8Klgs0AB\nIyavrD0wag1URIIV/hXn3qaONTkE81JF7dqlVF8zOKm7LtYTb8p5xTo13JwOe9S+WGb2p6wl\nRSAgZfmyOKeoYr7VJ5ZVenNCrnKnrQNDPLbjBp7Q9Fx+NPhjIk56VL95uOgoKK3khW5OanRT\ntz27U5o/UUKrMvtQISPO7Jp0gGc4p8ar97OcdqC3mSY28UhiquV56U3ad4J4FOZBjDGnKCSP\nSixI7YGPAzSqo3c1Jt4BUc0ir82W4NIBrRHdxyKeIVUYNS7f7tKU9SKopEMSheOtO27W+anq\nvbpTuQelBAyNDgoDz1pY0bbkcetKH+beOtKPmBqWAq/d61Gsaux2nmpYlDOAegoWHaxI9eaR\nRHt2jBGaPukEdan2g84pd8ecdSKAI403k5FKsbc84qRfyp+CvIXNAEaQs3JNO4EeFHNP9ulK\nke7n8qBleOFjntUyruHp61K8bMpVRhqYi44J5oJEMZ6A496b5bxsO/PWpGBY4AzSsGC4oAZG\nwBIWmtGGUkDA9akjVVU5HJoj+ZiP4aBlZR8w7irO3djOKkih7HAAqV40bAH40DK42Bsd6erY\nj2gc0oxHk4prNIWB24FIYqqS2cYNSxqdxB6U6OI7dzZo3N6YFMRXdAzZPrT8AcK2Kk2g5LDA\n7VGp2ncQKBCsiueeaZye1WFj3fNSQwurE54oGiMM3AqaH942TxS8+YQRT/LztAOM0gBsKOtR\nMo2g5wTT/L6oTk5pgUtMF6jH4UxgcKOuaeoLIeMe9KsIPyk/jS525XrSGQ+XtbqcetLs28qc\nk1Iq8YPHtT1VfLJ70CIvLby6VU2Kc8U7ceg/Wja0i9MjPWkGonk5Wm9iCKdIGVsD0p1uTIPm\nHNAETZWPg809JDtwVzkVL5KbM5ppwo4PFMRAqNwM8k96k8va2MfWpFYKwyOfWkBLSEfwetK4\nC4RRyBiq7DnA6VYMYfocinxxhjnGKaArqpyP6VMdyoOM06QouccUnmFeGGQaQxNp796Ryu7b\nS7trkNzRtX72efSmFhIwFbJ5qQKHYnOBUa4xxUirtKkigBh+bt+NOVQAcgGnqOvYHtSfLnGT\nimAZDRkGo2+50yanCjkjpTdgzx0qWMZtIUAmk2r1xmnAbhjtTtmFz2oAa3rSqOR15pu3nOeK\nfnPA4NIBVU9qWZt2M9aYMr6mlhVpWPYUgD7uD19qNu45HFHIJpysNvPWmUw+XHJzT0j9DTNo\nYZxjmnookbOdoFBI0xtHJtPTrStluQacrEjJ+bnAqSE7ckjimISDI+8cGpGxwcUx2RmyBgmn\nRnb15pDImUfMv8VSR7kVQwzS+XubjvUsjbdq4oKQxVPzf3RSq53DH3cUEHnApI13MP1oEybn\nbuAyPamrLuHlsMt2zT4yytgH5aJMLJnHXvQJEsK7I2TPbg1D+8j4kp8bfwueetTrIJOHGewJ\noAZEzdAOlCKYpix5BqTYMYFMEBTg9aYySOQchm5qZVHl7jjJqCNkDbJPvVOyjZx0xUiGIxST\nkZWrLEJHuXk+lQ27fNnGeMU9W5wR360wH7/3YOdx9KnWPd82McVCqhH3AZFSJcEoVHXpzQMI\n22t6Usjeo5NNLLx2f1p2VZst8x9aTGEa42/Wre1ZBuDcelQxvEOGODUylQpCjFMNQbHAbg0L\nz79qg85GbEmQfWpIZEwVxx1zSYA2QuFXn+9SLGWww49af5wLbQvy5xS7RHKdvOaYh7HkDr60\ni4Lbx0pH/cnI5z2pV3LgKODyaQE64Zxwaf5W4g8AA1CJywPGMHipRlt3pmjoPqG3a5OM5pjK\n2DtNSbj07UzaeQDRcYqhlYdxTlZWyGG3FMU7Vww5pYwvOSKAJYzg7gOKfzuB6L7VEuW4U/LU\nnnBVAK8UgHYCnGc5NBRR0OTmk+XgY79alKjcT1FAELBhCw6c8Cmwny/mbg9Kldg0nAJAFNjY\nNJgr+dACkBeSaVcqPehsMCvU04ghQCDUsdhVwck0olyw4yKaGVVXJ4pykLk5zmmhDsBmx360\nbish2jjFKsbMu48HtTtu2QAg49aAFh+7k8E05hs4xuPamKqq2WPA4p/mbT13UAIo39aeV2qc\nHOaZuXJO7HtSqMMWBzmgBRhgOxzTvljfA4BpjMF6jBp42lcn86YCbstik5Y/d4p7EdhSTN8v\nDbeKTGHlhlxt+brTgxIBHas8X/k3KqzEA1pbueOlIBmM7uOe9OLrtAA56UNhfm7VGp56YFMB\n3mAMAak5JGBio/JPLgginKxXqOaYC7gGwBzTiwzgjIpi/eJ9aXnac9jUgObG0YHU077pyOtR\nh92D0oOeGpFDkzkjP4ULuUjHShcbs5xxSnjqeDTuApwzZk5pfMG3kYNQvlmp8fU7qBC9GzjB\npzMS2B0pFJzlh8vrSqQFYDlqQwkZscc06HkfOeKZGzbCDwaVcr2zTJJAvGOlP5kkwTgAUyNg\nxyTg0ZDc5oGP3MjDmlRwOv401pBIuO/rSrjbntSESfLjcvWkALHJxtpEX5eDQynlu1PVCFXG\n7k8U8x7hkHio1wuS1SKR1HSi4AzYGF4FJxxxSNGd2B0pdvBJPApD1F3e4opmB6GigNRssYY7\n2OB2pqsq8dalRlkTB4x2pi/OuCMD1pgSM33T2zUUjOzMowacqjbjP0qIK2fmFUFx0hyqgH6i\nkhAjjJJ+bPFLwygimsBHh89e1SAsgMnUYqHyieDU7DzF3dMUzzFK9MUXExIvlUqBjFObsVBJ\npzYjcd+KQSDntijYByZDdMdxilZh1YZJ9KYBkEqeetGNyklsCgBS2VJA+lKrFsHvSIuV4PAp\nY12+9IYMpVMkUzbkLzjmnyyDaQWxUYUsu4EEd6pAPiKs7DHHqaSNwAX7ZxREpjjIPPPFCqNp\n3HA9KYg+YsWXpTdx6HrUi/NjH3RURXaxYj5aQCj5VzTmycYpsjL5Z7c0Kdv+0aAEZlbntTin\nyrtO0mmrt3cjj0pwYM25jjHSp6gQtluDk804qdpAHGKeF5LKaYrGXOAQOhNUgFjO2MDApWjL\nDoOaMjoRntQoVU5J4oYhGj28ZzS8hqFYbgSc5odxIxAIqRkTtvzkc0i/u48d6fnsRk05syAj\nHSgCFctwVyKVpNq7VHBp0e5UIakYKqgk9sUMY2MbYS2OTxQ0irxjIxzTGdjGMDjNJER8wPU0\nkIRY+CV/CnrLt2gjrSBv4RxSsM4BbFMB8jLtO4hc1ArkN7UsgE33hz2okAVcDgUAO8zcvA57\n1F9DzUkRC89T6U2RdremeaAGRLulOTgYqdWG0gjNR4VAvelfhDipsA9zuwelBjwpPtTAxaLl\neKUSblxiqAqqvO8jBFPkLSAktkYqVQMYI4psi7lO3gUgBdsSAkcHrSjbtyM4zTJPmTB6dKRn\nKgIKQCTyKGwDTVz5e7AHakaSMSAP1p8aechycdgKAEbCtwc8U5W8wAr071CEZVK8Zzg1YEIV\ncA9s0wFZlUbaryMsfzHmjducYOeKi3fKc96YD/MMy9doqNXzhVbilVSynjApLdVh460DFYHa\nQaRf3a0rf3sfWmtIGxgcUgYfe5AqWQ7oQM81Hk8Be9OEZLMOMqM80wK75HJ4FNhUTSdcCp3j\n3bQ3TrUjxrwFG36UgK0iiPIJqGT5l3AcipLpuobrUUcbZU54x0pCGruZc1G0nODwasru5GOK\ngZPn3MOaQDdx5wMgUvnfLnHNSMRs4FRL+8bgZpgMmjwuMDJ70iR7VI71LJlsEdqikkKrk4oA\ngVQzMD1okwqgU/ym3dODTPLYMd3YUgGrIV5pjbeT0qbb8o4qFl3SHPSgBuVkT0NIYwvPb1qU\n2+5SR0phVmjx2FAyNoyMc8VCww3TNTyZB2mm5C0CKc0NNe3XZxjNWriMMMioZFO08cVIGNqG\nmrc5OOPSuP8AEnheG7tTE0SsWHcZr0DHmZ9KrXFokynIzQB8g/Er4LzzR3F3pm0OoyYFTap9\nT9a+ddY8Pz6RM0VwCsueFxX6S6poIlt2wvJNeG/Er4N22u27uluI7oN5iyJ94n0/GtYT5WZy\njc+L7qFkDEkE4ziq/lmRRuHHfmu+8c+C7/wxKBNatGjLhXxkfT61x2xZlAMZV8YNd0ZKWxyP\nRlc3/wDo/wBn2buwaqBhk83D8Dpu7VpS2oSVQvy8YNJeWu6BQOWB7GqEZLRhWxI3Gamht4jK\nu5CBnpTmhKyKrjc2cgGtNQBjC/N15FUhFW8dfLkwvzKMCsQFlyFB3ZrRvJHPnfMMenes9XeP\nadud3WqAfPJ5JDK2Fxgj1NQJzjbzk4z6VJcbvulBgnrUTYyMNtGcVaANxG8ck9Bj+tSrJuVT\njHHGajiiCGUtnPXrSxqZJGKnhe3oKLCE8xrdWVeh60snzMGjJ+7zmk3N54O393jg0jNJIQvG\nzHNIAb95gj5sfw+/rQ03zfey36U35o2+RNoHFI0ZXAIyCe/rSGPfdld3XHApigKMiQZPzYpW\n2ort827GPxqNGDR7mXn3qhEuZGwVAKgcnHSmLNIsIHylM5zjmg723YPltj7oNDuW2hRk45A6\nUhhDL5m4KARnpTlYqq8EbutRnCqcd+c0rOyw9NzAUrCFm2xzfd3ZH3hSNNszsG3IxUbq+FXG\nO9DRgszqOgwOe9AMbIwfg/MqjP5U5RwxU89fel4UDecPj5lpISqZlwSrnB9qYw3HcHIygOKV\nm8tmGcsRx7Uwyq2CGbYegpy4ZgCf+BA0CHsy+WN3LYzTYWDFeBz29aaJiWJ2ZA4PrinLGFId\nRwenNAxVBzuKHIOKNxJ+7t9BTo2HV3C8Z5qEsGHmbtxzgDNADsp5TAEjPeoZGGcE4yMbu1Pc\n7YwCcc5Iph3Sxugxxz07UB0Hxyho4z1wcYFSrGEzls5OarwyIq4K4JHWp1xGwJG7jp/WgCNv\nMl3HPWo5iYY0Aywz94VYK75HVjsUdPemk/u/VjwPeqCxDndlZAf72OlLFmYZOF9BSyMGXeAc\nj5T7Uz5s7D16jFIRIJiuVlAcDjFRsR5igDbSx5Vsnb83I56fWo2YXMi7c+596AuTbQ2T1+lQ\nRMGU8Mz9NvpUuwqGwffinGZizBQA/YYpCIo5GjkDgE9s0/kSbwMgdRmkXpkHLelBkU4XuetM\nYhJb5tu1C2fY0krKu/zFXDHAZT0o+ZY9jN8rfdHvTvVmA4AG33pIZF5ZBUbSeQMe1TLIIpT8\nu7b2pyt5bu3T0HaoOI5C4zubrmmLqDSGRiVHfjnkUqMJFYsPmHFKyiVtxYDsMCkVdgfodvJ9\naQyJi3GTk9KcsG4kj7wp8hXaQUzn06U6M7nXYc7R0p3ESx7SwwRwuTmtix1COJULHKDrj0rn\n/LEjNuOz2pyny4VK88ZPPFLcRu3F5HIpEaKR3Oec1keWfMZSd3fPpUQnkht4wrgA8HPWkhY/\nPv3Lng56GmArR7WcAggVExDAMRhgKkfc+TjjpxRhSyqy8rzSKtcbbk7SMHGaWRAkZGzKk560\n9JHjY4b5M5xTGICyAjrzknpQJixlmTaSNp6U+SPcqBD+8B5qFg2NhGNuCPepGzDIzED1FILi\nbm6Mqhu9Eke3aAxAbqAOKjWQKF3HMjcU7zj8vGDj86oQBtwIHC+lND7V2k4BGetEm1VA75zS\n8bQ/3sDAJFAxjuGUbPl96HlZZgygqMYIoZUyMbi3tR/q/mC4AHft70XARvlZQg2oeaI1Mas5\nbbzmk37mBY5BGA1Lu2xqZAfcDmkAvnBmMkjbgozinEsu5tuxlHAPvTWWNgx3bUPG0ikkxKyL\nlmOOuaYXHeW3OXw3Umm7sKxDktnvQ0exXYfeYYyab97Y7rk9DTDzHNwq/MDk8mnPtZgWOCOl\nJuJYAKAvbik2hnZgMlhjmpsIesYTDBvlY4/Gm9Dt3gc/nREvRHPQZpsakAkjg9KBj2kRWVMk\n7j93b0omXdISMAY5xQRuX5vmbPShVIUkjCHktQxsadu5P4hjn2psrfvExkDoPelUM3AGc/yp\n/O31wcY9KoRG0YJY4O5eooyzRjYADnOKSRnJJHC+tSL5FwwaNSpUd6QERJRiXKqadtXzkOTt\nIouFQHb1Ycg02NjJEWHLZpCJY5BGzBhlfWkb5supHPJ9qav7tcD5h13dqPMDR/d5JwMUxjZM\nARspwCcmpFbzJSBluOtMkVcfNyR2pVDeX3TPTbQwFXEbY7Y4B9adH8g5fJHzc9PpQqgKoZSX\nFDOiqVznvt9aYgLbWJJxu5AqEqWckn5sU6SVWVQzbfT2pHIZgrHDetIBwYbUGcAnBHrQM5IX\n5s9F9qidduOd/OKkKCMLGOTTGIqpCSoGeOtMyrAFWwc8D1qRVDNgMM9DmmMAWxjDA8LSAcP3\naNkHeTzmkYbVLZwp9KEYyBt4w3SlWQKAM8dCCKOgg2KynY6q3vSqzRg5IQ4zhaarHzDGq7nz\n1x2pJpBHlSmR2XvSsA5FJUOOBnIPr+FPclXLjk4+6OtRW5YNuY7n7Htj0p4yr42grnOfSqHY\nanzcnB9CfWhcqcOAW+nSkZNyuS3HpjrTo8jY7HBHSgAWYSLhtygdPemog4Bb5R6dqc7F23Zy\nM5IFJ5aljuPyt0xQIIlVmIQ8Dkk9zUkeZpPbFNUq7YQFFUYzSxsyyY/1inuKQCeWfOZtwG0d\nPWn/AOsRSj4K/eGP880imPOUUlj3ofsAeO5oGI+JJGbIAI4I60zlWODkY696VeFK8HB4NI0W\nxVLna2c0CG/Z084F/mXrj3qfKvkH5iKjbCyY5JIpHk8rcq9WHLGgBfMYrtQd+GqSaMSNg/ex\nyajQBQo3bQRzQNu4fNnsPegY5ZI1wrNlegXNOAe4wxKhOQKaoEbFtobHUU7dub5QQp5pjGLF\nJuJK5Cjj0p2F2kgc+vvSSIT6n1GaSPEYKEYB7t1FAD43bILfcUfNUe4+SxGTtPSpAp+cp0HB\nPqKajZjAHyj+6e9AhWuMsDyvHT0pIPu7pBvzwKaGPznGG6AelOztjG4EEDikhodC6lcc5zj5\nqRd7ZB5bt7U4RhuScFTyKZO22TpuRvemIGw3fkDkYo4YLsbCHueuaUcKNzZ7g/0pZYUnYZHz\ndSOlIBu0Bum05pskWWwp79RUjgNOAD8mMbT1pqk7gFIxTANwVdqDac8Um7IJPD9MetSorLNu\nkwR/DULsCzEdaBD/AOLYCU3DnFN52t/Djrg05mXcrEHeBS+WI1PO0N3/AKUCDaHjVwdnqB1N\nNlfcwwOOhp7Hy49mMcdB61HDkq2eq/eoLF8wRqOpdTn2okC+YZmJGRQ0anb83U5HtTGUq5Uk\nlc9KQEu4MFBI9j6UQsfMYk4xxj1qLIDYx+dODFynZvTFKwCqRubavT1pFXzAQp2mrG2KK0bh\nvNLZqDdt5J3KeMCqEOLfL5ZI44JNMTduLe+Kem0sO46EGmeWF37RyeBzSAI+Y5A3zEHIFO8s\nRrv2Ak8AikTMcBVj94YYmlj2qVUZK9mo1Cw6OYIjKw46njvTJNrZYdSORS5PmNt6jq1NjjZm\nOwgqeKAHQsq4Zjk4wKlXC7WOGHdaQQFsI4H1+lWBaogDo+Vb7ooAj8srKWj+43apo4TJhuS3\nTb0zT1hPl71P3TzVq2jadjkdsqTQBXaPyTt4A6k/0q3Y2ct15SRxMxJ4x2961NF8My6gFklU\nMuctur0Lwx4TEfziEEHpx0qR3Ob0nwWI2G990mMlyOPpXfaD4TZpELq3mJwOO1dBpXhopOGI\nyM46V3mlaOI48MPve2CKlsErmXpfh3asW9QEznbXX2OnFVzjBHfFXtN0dR5aEZI9a6K10ldu\nRjANZXNlEy7Wz/dhSuB61rW9ntAwCAKui1+XIHA6VMkfygHioY7DI4wcBqlWEA+1Kqt2WrEc\nZ4BOaCrDNoxhRUixhcZ64qZV2ntinMMjAApMCHad3XFPRm3YPSnrHuUnuKesfc9KGVYQKWyc\nYoWPkEin87cdqT5lXPU+lMBdgwccU1fl4xUgOVBIwaeq9DjNAyONGaTpU0du20qTUka4yTUm\n7acgUCK0cPl5AH41KFCt0pzq7E7RilCuuM8ZqeoyKSMK2DTYsuxAFTtHuPJzinRssfAHNMQx\nVfbx1FPb5wD+dSBfMbP3TTthXIxxSGQxqqHIqSRA23PGTTW5U4FPj+ZQD60AL8vrk+1CruU5\nPNIdqk/WnquATiglkflDt0qVVUKRTcHbkdDUm0fw9vWkMY0eV+Q7WqYD5R3aotxOD/KpjIq8\n96AE8ncpBOB3qFYdmeOKstuY4IxmmeSW70DGRjMm3HNPKOxIBx7VNCqrnC/N60jKWYNnFAmI\nEOORk0kalvu8EU5mOCAfrSwttTBXn1oGJIzSDIODSBRt5HNO4I9DS7lLAdTSEN2DPLY96Fj5\nwcmpfJG75xlfapY2CtnHSgCt5JAII4pFjwy81ZctLzjaM80ww/N8vNMAJXdjGTSeQeW6CpY1\nCjJFKylvmDcelBRH5bbQNualWNvLyRT414+Y1IzjbtU0CKzSM2AB3oU7nKkcVKqhW6k07kMF\nAxz1NAETR7l20rRAYO3p2NWRx9QetRlmMjEj6UB0I41MkmAu0U+OM7jk4pyyPtBY5HtSGUHn\nbSGhNoVjk80rQ+nX2p2AzAgZFP8AOEbYC5J70xECQmQn1oa364OPWpvN2tnHNNX7xJHWgPMR\nV4Hy9O9IMM3yjn1qXDdz8npTeMYHFADGi3LuzRHEvc8mp1TCnBoUD+IUmBDMoXpSK3y7c4FW\nJNmRUDAGQADvUldBZFzj1psalc/rU23zG44HrQITuIHTuaBdCJgHzgUiRDPPFTLbuFbnijy+\ng7+tBKIzH8pFIiEqB94U6QCE55Ymhc7Pk4oHYBCsLFs4HpRvRm64pVU8hhk0vkjdgCmFhPKR\nsktQ6lAB27GnbT93tSbSuM0AxuBkHFJJGB8wHXpUvDKcdaSMr0Y4NBRD5R6nvUi8rijzAJgB\nzkVIFbb93gU7gKsRaLd3phTC5I5qT5mwACBSY7LyfekA2McZ6U/g8D71Ix+Wk8sSLnOMUgEa\nMqB+tC5PHOKcFHTNOX5s9gKBW6iLGTkYAHrTDCd2BzTwx2nJz7Cj5lIweDQUQxqdxJp6qwyQ\nMVOpXnI5peG6DOKBFfyT/ezTd21iMc1a2rzk4PpTFj3NlRn3pDI8FV609VO3jgd6X5dwpzYK\n5zxQIjCjdnjApy5J9OaTaGOFqV1LKBnBFMBhVlY0+P5kwOoppkAIB5qWNgrbscYoAciNH82a\nUsW4b1zThzyW49KJFZVAC5JpBcb5hDZHIpuBHuI7ikhXDFScVJyVO7qOKYD4fnULnHFJteNs\nOMr/AHqjhj3k/Nip2OdqlvypjE3K3Q89ial2Dy8A7qrIuJDnrVuBgcg4BpANhyWXJ2r3qcjM\nhOcjtVZGEkhUjC1YVcoAnJHSgREsLLIzuODT1wFODk9SKlXduHmA4702RAsmVNAElttfdk7W\nHSppF3KA3HvTFhURlz1J7Uvm+Yu3GTSAnX51wvIWmFdknAytRxQtDnLcNT2Y7QM0AGA2SOtL\nDH8pDY9aakZkzg4NPSMruB3HigonbayqSAGpSy8J3qNU+Zd3pSTEbhtOTRuHUk8v52GO1INq\nnacKxqMzGOQFmqWWNZ/mGSe1G4xZGKsMjk9KfGTg57VGMsgJ6ipPLZoyBmgB5Xy1LHkCnLIV\nUE8AjIqOOTcrBhT9m+PA7UwJE+ZTk4qbjb8vDd6gRWVQTg0+Njl8nkdqVgHth1wuaYud3Py/\nWnr94Z4GKY371yuciiwDz+8GfSk2iMbiuRRu8mTBHGKVDlfm5GelIB0bIFyOoFM3M3Pb0p6R\nJGSw+6e1J5injGCOKABSW5zjFN85uMZ96UhVYA5YZp/ykkAYFACrIOMHvT2jzlmzTFhG0YPR\ns1MUcFmByD2oGMjUHLKQKXzjswRk01cjaCOvXinhfm5Py9qTAFVWjz055zT0UOmSM/SnBVMZ\nGRzUfzRIQGwppXAcN7TYB+WpWY7tp5FUorh4SS/J7VagukmbaOvvRcB+BuOeRRtKr8vXNKoE\ngfHXvTVb5c55oEOZFDYA5x3pzbowCBnFMXdvI64PFOCGVjyR3obAkLeZjI470jRnuNuOcU1l\n+ZSOPU04yFcjHB/SgB7HbtHqKcwBTBpq4bAPJpQw8zkcVQyjqGlLcRkqfnHIbNMs9TaONY51\n2sOM4rUVvlYEZFU5NPVpM9eKRRZRlZMqQyk07h+2PpWZHYm0YlGPl5+7mtFl2qMdTSENj6E5\nyPapFYbeuajih8vjoKk3KqnAqhbCSDgHNKvzLx8xPak5+ooZgq7s7aQxwGOCOaXGFO3nmmxr\nu53UsaGNSc8k0hgUPXvSFTjOeR2p3Jo6HaetFgEx/F0XFLxszTvlZsE4GKZtGcU7ASrjYQen\nemDCtkdKZ5hJIOcU/A+XHWlYRLu3IMLzQqnv0oPOR0OKZHuX71MCXadoPUUjKvuM9qbv7jr3\nFZ97JcrcLuGIz0xSGaSqv3Rzg9aWQbsgcCo7FnZT5oyfWps7AM96QCRt2qTd8nPXNMX7x9KU\nN1yOtWIdtGdrH8aXaF4HIpinPWpQyjvxUgI3XHWl52gE5FG7b75pefqDSAT/AIFRRtFFUIrx\nrtY7h1p0khGAvI6UK24HJpGyo+U5NIB8bZjzjlTTZGLdBzmkVmVenNG5SuejCmFxjMWJbGB6\nUMVRl3HjGaGk3Lg96b5bKvzc5qQuyRD1x909KZu2rtK80LJ8oU5qQg5y3GegoARcLgsCaDtG\nW6j0pPM/e+WfSlChuQefQ07sLDWcdQPanwlWUqelRso3dePQUpCrkjoaQEka7R7VIcbcjpVZ\nWZiB0Wptyqu3qaYDJmHHy5NJAhySeF9KftD8g8U7A6iqERSEpnFJHjyyzDrRINxKj0zUEbH7\npzQBP/q1yTim7nVs9VpxUscHmozL0XHekBLHFuO5xgU35Y8880s0jM+1V+XFRtz8wGTQBIFX\n7wqJc5PNOZ90eQPrimpk0gHHeq9OKd5gRAPzpBnPJ+WnFhtJIz6UbCGN83IGKduPQc0oXzI2\n2/eA6VDErF+v4VLEPXD5LcEHiosBWPGMnrUi/Kx3Cl8sSdaYxdwX5uScVEkxbOeKezlV2jr0\nFVvuvg8mmMttIuNueageQHIY0/auwk9agTaW3Ec5oYDlTaqnOQRmkaIsRtOOcmnSOu4KBgUi\nsI1JzknikAhUsuRxzilYbY/m9aJFO0fN05pkkzbORx2oAFPzAjp706RRJjJxUcLHbljx2pzb\n+CvSgCTai898UxiCNx5Ipu8Pnqeaey4XmgBqsJOOhpRubsMUxZBJ0Xp3qcN5i8DAoAY7dVpk\nMmflxyKj3HBJFSeYFXcFzSEP2lVLHpml+VmBHSkjkPlnPAPakXczAgfLSGV7g7ZAFGcGpQu6\nhoWZ92QM1GodWGT7VQDWtRJISwyBT2YbTj6Zp3meXJjqajZA3AOKVgEVRuySakkA2MFbJxUL\nLtIGcmjzPm+XhqQwhjy/pxRwzE7ehqSFiVweec5pMeXN1+UmmIjYM2cDApkaBTg047pJmx90\ndKXgpuYimMjJwxHWhfukYA71LbqHJbFBjDNzwKQmVhGzEFWwPWpV3huRn3p/l7SCpGPSlbJz\nxRqCK8kwbeg+8DSeaVYBicY4NIbcRyGU/wAVLMy+SAOKNQKsys0gIbNPWTqCfm7CljX5vWmN\nGPM3dxSQiPzCzbTwKc6nIx0pwO1hxnNLMw7UhkBmO4qB8uKI2MaHjrSqfKUk8g8U4jO054oA\ni3jv0pjIJixzkCnSInPJpsaiNc9zQBJGpKjJxUEikueeKsAhR1/OqsjbpDk8UwHyA8bag27V\n571I3T7xHHFMTORu6CkAMzR/7vpSRncSRwKey729F7U04KlQMGgCCVS/TrTTGAoGeamCZXJP\nFN3BmK45pjGFPlqLG3qePersO1oyrCoJoflYjkUhFby18vgc1XdSB8oyO9XUUUxogM4qQM2S\nIHk8Cue1bTwW3DH5V1c8asgGMGqc9sm3pmgDx7xd4Ftdas5oZIlKSAr8wBxmvmjxz8DbnR7O\n5msfMkVT8isOcfWvum+05ZoyCq/lXG674ZjuFYNGCpPAPT6VpCTiRKPMfnLJZTw3ElvcoY5Y\njtYelQyoIyrFSM4X2zX1V8RvgvbauJXtolt7jduLqvXjGDXz54q8B6hoNw9vd27jZyrKcjA7\n11RqXOeUGjlrSFbi+EkmE8vIFTXCbpM7vlY4xTVty0hAPzY78U64X7qjqoywrdMyMDUY0hbK\n8p3qDyR5gUPhCudtWbiLy7hkk5wOh96qyMzbTnJU8VoBDLIGU/KxwOT6U3cNoRVyg9e5pzSG\nQuARlvSozGRGxJyP73pVXEJJvyU6E9acYzuMkZ2gDmpW2tCjLhiBye5qrISqjqqk0gHspVfl\nk3lvmxTW2vsOWXaOdtHzKCFXeucbhTWJ2nPytnA96AJZgZJRtOMrkD1pvMkY3/K2D8ooZmkG\nFb5UHJ70SN5UKP8AxbtrCgQkjeVahlO8N8vHrUYiG75jk4+6O9P2FWIVvlySPxojYo4Rlyx4\n5oQ9RNyKFYfKQelAXBba+0E5zStGNu0jIpu0b8dRjoaYDSo+VscZp8g2yZ3dvu0rKBGQTkY5\noWMFWUDJ6Ci4iPadwJJfFRLlVdwdi5+61TM0O75EKjoxJ703ywiuPvI3OT2pAMXaYyCuMngg\n07JhU7lDjHIWlCr5ALnP0pBJhdzrwO3tTAG8twqn5Sq5A9fakb/VLkbVz2pp2qzeZ3+6KWVD\nGoBb92ADj3oKHqcxsD8rH+VNLrgIBgjuabGJOuMFuKkWT92u6PLjuKQBGqSZPVcYBNO4kwuA\nm3kkDsKaeEBI2r/s0v3soeDjiqEGxHzKDjnlT6Uixu0h2sE789xSRRhAN5y1Lu8xW/vLxigY\njxnYWAzjqDSoqupJYhscfSpY/uhSNx7moOhdS2McigQMDjLDg9KGA/d49M1I2GCMPuY7+tEc\nJ3BCeT0oEVtzR7gTgMelCyEN6+lLuaTf8m3adp3UR/OjMOdoxt7/AFoEMdlXzN45zke1I2WC\nqi4QDrUnmBmIfAH0706TesP3cqf4aQyALtbduAqRvmctn64qMxqxAPHovvT/AFY8rt5X39aB\njZFPGDvZsdKX7zKB6ZpY8tERnEuM5pqtsRTtJwME0ajBsgr3x2prSfNkLtpuCrNnJ704OvyH\nG5h1oAd827ruU8mm/KSIywZc8elKsh2soGATy3rRHs8kkcndgU7khCWYsgXCjvjilMkm3JIG\neOnNLG5VQpYkFuMU1mDTsCM0AJlkYsfu4xTVYKu9R83QU+SQM21TyOq0OhONpwD69qAF2kRq\nDyxPJpkg5kU/dUcHtT/LLKp67RkmhUHl4X7xORQIiRg0PJxtPFPfIHLH5un1pXG2QjbyetN3\ndVKgqOjehpDHRyFSBjJ7+1N+ZdzFdwNNXcrcjd3OKczbpCM4XGdtPUYLHyFAPr16Usce5Tj8\nSaaPlBYE7aZ5bS8Kww3SkIkDIrFi5L9w3TFKoLMw67uc0vyuoVgCBw3vimr8rAbvKGfu9aLA\nIuPMVvu44+tObK5LrkDgGhdyscj5c8H1NR+W0jYdiV60egDiSyguAqjuabt+YZfcev8AhQFj\nZdwbcQfwxQu3dk5Hc/SmMUhlOWbHOaPMDSb+djcGmqAyfLySeM+lSbXZSAQO2BQA3yxHgb8L\nnIYfypA3kqQT8vX3/KnSR8oC3A9KQMqSH5csTjNIAHzYZucckURbZWZ0cIo9R1pAQykR8AEg\n02PDMPl3DpgdKAJXZdwYn5FGajyYRuK5Dfd3U5Il3Enrn8KRsvkscnpTAXlYhnr/ADNIzANg\nDkdTS7/lw3ReM+lRNGW+ZTnuF9fekSSGRSM8jHfFOEgkiLHgrzmmxuZI9q/KfUjiljkZm+4D\n2PNGoyP5WUv94Oae0asoUtu55YGnbdxbBVI15qGT5lUxnaWPFAx2185HIzyaGjDPgNt9Kaeg\n3MRngjvSyALHjcVK/rQIVWZdpIyo4I7UNMMkr9AvT8KYrSOwwfl60FVblxjHT+lUAzcWb0P8\nWe3tT0JXO35o8dKVvlUj15zSJsjXLbl78+lADmbzFTBG0daTzgd21DjsKavzRYBA+blTTkG+\nNCTt5/GkA75mjCldr9RSmTcNxO3HGKGYszNkjHemNt5ccbvWgepJuJwytkdBnrUMkw5CoMjr\nxUjZRQdvIHQUwLIx5UJnkmgRD5imQKBtB71KF3bgp6dqVVKqcHjuaRW8ojeu8Y+8OKGILck7\nWPT0pZEKmSQ8jtQMrgZBHYLSeeZH2gbV/vEd6EASbELjnO3PSlt/mUNnK+rdaayltzfMzE+t\nK4DSZI2/L07UAOjYiQt0X3pExuJI4J701ZByu0k5/Cn5WRcY/AdqYxQzRtkAfNxz1pZt2zDB\ndw96YpZn+Y7gOlIvzT8NzjnNSAf61l5y2OcUv3iArcdDSiT5iSBnoMcUbfMTCDDrzigAkZm/\ndsRuXpSBjMql/wCHoPWmfM0hLA9KVcSSMVQgYoELL80YWMbWY5NOWESRo2eOmBTo0+ZSeCOa\naVdonGPLGeMUDBlKyCMHbnoaFZI2c9AOo9T60xFEaZcksOtPcRLtZiQWGcd6a7CsIsx3YBD7\nhngdKVVDx7T8rjkE0i/Iq4XAPQ0qq8ig5yc9KYxGj8xvboeaX/WqeNzLxz6dqRW2nJHBJHPr\nTGXfJ1YFfSgQ5c9EU59KVY2lbnDL0zTlZgGONzYo8wKpcntSKGiJ3UNs+Q8D6UpymAYwdvSm\ntNuTKkhep/8ArUjSCSQYY4xnNIRMVCjLH5jzxSK7Bhzle1NT94u3gyk9M9qctu8K7idy5wMd\nqoQvknYU3Yf734U1WEsgVRuPoaWPcPnLdOp9aQR/vC+c7umO1A7CxtvUspBG/B7USKzSMxw3\nYLRJj7uMY60bnjUgr2yKXUBEj+Ybwd3XinvlWBxkfSmRSZXfzilYB4xlsZNADWVjIfm2nqaT\nB8vapyM+nWntIeFBwFPXFJiNlkYbitMGNZvnACbcdeeKdIzHdk7j3HTimBo2XBbnH3aBCJGY\ntyR0AoGxy4ZgDycfLxTV27XLDaew96PNEqAgfODj0xTpgT7DH60CFbB2xhjuwDn3poHmLIAO\nR396d5X7tWyd3r6UMGiJYMA3SkA7yz97d90cNTNy8/PuIppy27cxyeSFpu4eXlR0Oc460xD4\n2xGzbvm6gU8TBoxu4LcfjUccm5s428cmnIqy5XcN3Ue/0oGO8s7QO6mhmeOYAsuG9KTI8xtw\nbOKVY9qjPJPIoAjl2l+c9fyqQSEKSPmHfscUrrubcPmIHrimw7klkLjAPA3cUh3HyyPtCoQC\nRxmmJhRjGW7mnxrGME/QnvTE2r0ORz81GohzRhmI/ujPFM3AKCvVjjntSeWdwYP90/NUqx7R\nuxnPJ+lMBsmcnBDBRg0iI7YB6k8elO8vlto/dMOtOa3aYIhPA6HOKQ0JHvklIChGztIzU6w7\nY924Da33fel8tYWCn7/en28StIWLHC8hfegQrbS3LbQwyajtnAVipJUHsOg9qvxaf9s8tT87\nuegHAre0Xwv56sWG1EOMYxupAZNlYzXEfmeS3lA4JI5J+ldf4f8ABctxjdE0XOdrDrXZeHfC\ncjeVH5W1R90kdq9D07wxEMEqd6+9S2Pc43QvCZ3Ko24HJrvdL8P7VAC7Vrd0jQ1idf3WCewr\np7fRWkAAUD+lZuRUYmZpejJHCu9M89RXQ2uhoy5QYOe9aFnp20KhAOOtaosyuAoxWd2zflKN\nvbCNVXbk1pLD5YwowKkhh8sHPWpsHaO2T1pDsQCPpk4z2qVYTux+VSLCA248+lSc7idv0oCw\n1YymA1P2Nu6U/B6HrU207s0rgRRru4IpwVi3yrTtx3cjAqT585WkAxYmV/50/wCXoTT1Zudw\nxTfLy+QKYyPyz0WlWLc+W7VMv3sgc0/acdM0rgJ5atwox7mnIgXGRg09AF5bp2FLt3Sdc00M\nMDHSnqq7dx/I0xpRnA5PrRsbbzzTIBSc5xgUK27OeRQsmEx3qSNQeCcUihQo28CmIis2fTg1\nMF79ulHkhWznFSIi3fhSrIx+XNP8vzuBUghGMYxQMgWNgfanquV5FTSIOAetKsZK89KBEIhB\n6inYLfL92ntngCpT09/WkPci8kKmM81GsbM23rVjBxzyakWFc5HWmBXjQKpXvTmUDG4UTMWb\n5BU9sodG3+lAEQyec5FOViBx1oZWzt5pZI2GMjAoFqNKugwTTljMjA9FoHMgDc1abYq5B7UC\nK4QbuDmnYO0549KTzOM9alJDc0DI1x3FTrGrLkDDd6bGN3bipVK7STwaBjJMQ5AO4+gpizeZ\nj5Me9CrtyDwT3qVFC/7tAwSMtnPSnxwrnO6mls5A4FSBR2PNAERVVyM5pI4gcZqy0JWPOaZH\nhmBPFA7DJBu6ClihaP8AGrDbegIJNR8s2CcAVIhjfuZCGOabn589R2qyqox5GfemtGobK8ig\nZCqsMsT8vtT92/HYVMqpjjr6UeUGbnimMhbhsAcU9VAXkc1K8YbgCnLDtOTSAhC9Ow9aTy+S\nD+FSMuRg8HNJsO7rQAwKB1pQm5uOlSbVPGOfWo3cqxVOaBWF278g8UvljnNRQyMykOuD61Mu\ncbelArDWK7QBw1G4t0OB3oZVZuuMd6VdrZ5FAwwO4zTzCNoPQ0RsDgEcU6QhmwDSGN8kFsdQ\nfSl8kq3XAqRcoQOBSOGc4FAtBq/LkE5FRtjzhuPbtU/yr3+boaQrznH40ARLGD94c0eXjpUu\n08Ec+9M3bGwxOGoGKgD7uOQKbgM2elSBArfK2QaAp3HjtQBDjdyOtBbLYapfJwBjv6UxVG45\nFAEXl/NweKcyquCRmlVdyk1IsZGNwoJKwxuyF78VOGYg9qfL+7wcUeWZFyOM0DGK2FJpWP7v\npyaFXjaelKuGb72aBkar82OopGU564FPb5SSKGAaPnrQMbwuMjJNKyhOCcU5V+bJ6U7aGyTi\nmBHtHUHAp+5FpdpyAOlMaMsxB49KQdB33ucYp6LjnpTOmPWpI256ZqRDdy7iQNxpAT0HGacq\njcfSmKu1sk9e1MBW9SOaSNlZWVlwRUnysMnj0qNuWwOtMY6OPad1PjjIOX/CmqNzH0p247T3\nI9aYEbYjbIGSfWpO3pQq7VDNyfSpEQsh+XmpBgVVlBxgikO5VBBzTlwVye1LuXHAzmmRYjaF\ntwY8HrTgxEmGHWpI23SYfoBxRx3OMUmMDGq5AGTmnZCP8y8UsPyqznn3puTJJt7UANkjHJzk\n+gpbWRRLlqdt2yYx+NPjt8zg9qZW5IwG4nb8tPgX+IEg06RCwyvAp1vhpAp49DSYh8r9QDn1\nNQQxFmwxzUrj5yAR1pZBtUMKQDj+79x6URt8vK4ohU9Sc/WpGUqpyfvdqYB5yEYxnimrF53z\ng4HpSMu1eBx0qRSqgelA+gKoTqefSpI5NygfxelRMPMbIp3lmOYHqO+KQ0SxxkNuJyM1HMR5\nhVBhupqaNxyRyPSmeU+7OB15poOokcZkT7vze9AZlm8snA9alVtzHacGkkQM+TkjvTBkm7yx\njIIp+7A4b5TUULJuweKc2d+3GRSuA8lPvEVLGyyoFHy1HGiYIJ3YqQYUgjpSDUVo2h+YnIpg\nXceDlj1p/C/e5zzikOdyMB35FNDFXKybG544pPu5+XnNT+aDlgAWFQhtrbj+NO4D3JaMHODS\nrIQRxkU1pE3AnpTkkDZIGMdKnqAbgeW9elKdrR5PXNBG7nrnml+VuM8jtTYDdxRlYD605MfO\nyjlvWgruVWByM8ikKsvIz16VKAk2kjbjtzTt/Qchu2KjMjRtx82alGWTJOCe1HUBdxWQLnI6\n/Skb+LceO1DLtUEHIFCx7+opjQ6PbtPHNLuBx6UkalMt+GPWhR3K81PUBi24bLsM4PeptiFh\nsHPtRGSc5+76VK0YXDLx7UWAjVWjkJIwCKdy0fHWnYDjg801gynrxQAi7lyRyxp8bmPKt8zU\nYBwc/MKTzSrYxmkMdIwY8cChmxzjilbCfKRk0u07Oo54oARZDnDD6Gnl9xwozTFVhHj71O4X\nC7cNTAUu68Y60CQquWFRmUKevzVLCwkUluaYxylGjA4xnqaRpB0PGDUa4247U5sN70ASbtzH\njij+A+uabDja3NO3egNIQm7De1JJhjjG4elLt5pFyWJxiqGJHGGwW49qk4VqFbe3PHFL5m30\nzSsAqgk1DMzx8qu9qm5JJDce9N27BwcmkA2OUSfMVI9fapFZW6fnShsD3pi4wcDmgBdpXODm\npVAXbxk0i+lDMcDA6UXJYudrYbqac3JBz0pm4hgzUNhn60AhQo9epps0RYjPrxTiojBGCfpS\nZEmMZBHrR1KJIycbe9OZx93dzTNp5zSrGrjkfNQA4tub5uB7U/buX0pAuMBsGlbO72oAGwvb\nNLwy44pNwKn19qVVGOOOKOoDgueR0oGDkZwabC3ynJpRjJPY0AG6inZjooEQMVh7bqZHJ5gP\ny8VKq7OD3piDbwvPNFxMdLIyhQvy8c1FETJu3dPWnzN5mFPWkGNpxximIYqjzAM8e9SNINuP\nSmNkYwM09V7EYNICJf3kmc7QKlQ5JYjJFJNtCrtH40uCseRzmkBFCAxct35qRfmXcPu01sbQ\noGPWlVQqYb7tO4CxqOo5NIyjnH40nmeWxP8ADRyH65BGaYD1kPAFJJ83zdDQp8s5xmkmYjAH\nFLYB8a/KMHFNk+b5c/NT0kXaBmkaPa2/Py0wGH5D6nHWmKu6QHtT1bqCevSkMgUlQPxpgOZl\nUEkdarrhZPWpFb5gWGaSVl3nHSpuA2eY5zng/pSq4K5XrmmNGHQHrzzRHs6KcAHmi4Ei4ZCq\nnHOaTaWxg4FLtC4C8mmSSbW6cUhdB7DnHakmZtuMAUjdhmlaTd+FMBMHcMNjI+YU9GVGGDgU\njIN285xURiIbHbrSYWJWZXU7TzTV+VTk/P6UGNVIPAPWkkk3MB39aQwkzkdjSKoY7sc96YzB\nZBuJNO8xdvB4qrgNdvmAzTHJRuRxmhF8xck45pv8WKQx1xujbccFW70KhCA9aR2+UKxzg04t\ntAycUAOkwFAxihVEcZ3c02WTjHfsaYG8zhjzQIlj245GRUW4q2CcDOaduCjaDS+WN2WPFADG\n+U5UZqTzFMYz19Ka7fKcdBQuQAduR60tQE2jjAxTvu9OKQMQQ2M0nLSEgcUx9BmX3Z6VMoHl\ngEfOTyaaylCNx4pytuXjnFAhpUfMO1H3V68U5nIxgfWmSdDk5FICKST5iRT+JEyDUWwb8jpS\n7dpAHOaQAq/MCxqU/N0pPJO0nHI6UxVJb+lADfuyZ4yaVo13Bl5J60N33c00MFx82aoBGl8t\n9uOKcu2ZePve9NdA3BHzdqVB1ycUAPfCx/KOelV9u5cHgdzUizBm44Iouo/kyvBPal1AY1ws\nThU5HrSpJ5incMc1FCVUEFct604sEB3fWgAClG9qmWYIu1hzUKyI7A80/I5L8DtTAinkDLtA\nqKcBIck0qxl5OvGaluFVcZUFakCtC/7s54Jpn3cnPFTbR0P4UyaEvGQhG70pgRRt5knsKc3y\nnIpscTLz0460+Fg6Hd0FICOb5l9arsHRalmztyPWlWQJH83OBTAhVT5gXHXk0rZWX5uQKSGT\n5ssetLIw3AseOlIBJCGwFqNVBJ9KlaI+WGHTrTI4WdTn5RSAi5ZgDwKUqq4A5NPbDNwMgcU9\nR82doxQBEzHbg8Co1jboeMnrUkwoXPyg80AMkUxnHGKYke5icjNTtiQsOgqtyvU4oAAvPOcU\nMuF4PFSJk9ximsCvy0AQsMfMBSbh/d5qToSKY3zNjoKQEf3mxiq1zGQD/dq3+NRzgZHFAGVN\nHu4A5qF7NZV2yKD9a1fLyx4pGiDLTA4TWPD67JMpnPIOK868VeALXUrWRZolMjDl9uOK9yub\nUSMQ3K1lXmirKrMq7uORRzNBa58FeNvglqFndT3FmpK8nygOSOxU9xg15lJY3Gn7luVYYON2\nOR7Gv0R1fwmsq7tuAo4FeOeOvg7Y6ksjRQCF2JLMo5JPrXTGp0OeUOqPjrVlVnRt29iMZqrI\nqtHtMeTjbgV6T4t+EuraF5kgiaa3V8KwGTXCX1g9rcBnUhOh9Ca6YyTMeVmK9vFb/Kse0dea\nWPCsdxGSMYNaV1biTBRSSvaqCRbWk8xcg9OOlapklaR0kjx/qypPaoZGRUj3DLN074rQmwsS\nCRRtHfFVZkxIOM5HDe1CArwuv2plU4EdO3CWTOe+dxpYY9rvJgEt1qFm+VlcYBPGKoQ/bukE\nZGFY59jRdbwyxgBlX8zTpH3woqpwvVq0ZNNhktQyy7ZMcj3pMDKRguGI5P8ACKdJuaZWH3vS\nkRVSTYSFi/ve9MZvLZiwLIPSmMRs4YbtwJ5p8shaFQFwVOAabkzLxkluhpZI2SJVZuQaBAoU\nliSQOmO1P3DaWUkSdMUxmeSbbIO2QAP1p0iDaPmG71xSENkYJGF6uTz7U9HRFO7ndwRVi6sY\nhao6sFlPWqTYVSScuOAP60ygZW2nbwo9aauZWHzckYKnpSeZuibIJVeSPWkkycqBhQRk0CFX\n/WyKV3n19KfFmRg/RQMZo+79xfl71G7eWvydDwBSENZ/3mxCdo5H1p6y4w3Ttx3pJGkCgqBn\nvTAyqS5+52X3oKHqw2kYyuc09lMuWA4xw1MjxvDBuoIK0sch5jYbUpkj5G2+Xk9OaI4x5xY5\nIY5zntTZmEcf3flPf2qJmWaHGfLUHqaBlySRG3AEqMYB9ah3KAC6ZbpRG/nLljlenSlIbys7\nec4yaA6Cqu1cfxelIS5mPy7gDxQqszMT1PGPWiNm8uUuDsB/WhkjZid2UI2MeaYzHcQ52rjG\nRTyrPnJ465pTt5yCysMUgImK+UARx2NObd9nQM245pkbSuxGFKDgDvUVwzY+ZsbeeO1UO5Ms\ngTchXDUNJtChQOmCKjWQsq4GSw6mkjbvjpxQMljKBtzAge1Jv87/AGRmo2ba4DNgdQKWRWbA\nGACeDSEOZsNn04zTVkWSTKLwOo9RTVYqHB+dP71EarGiyZJyf0ouBKrfu9xTAJwAfSmPGFXa\nBhqTcWYbmwpOcelI6guTvwaBDuY9tJHJ5atJJzk9O9CqW5BL8dzSKJGUnr2PFAEkaq0zSYBJ\n4zUjyqoMY2hqhkbaqlRyTjFM27sMqg8c5osBoQ2skkKOQSg67arzWcyyB0HB5Ge1Ot9Ra1j8\notxnpTzfNcBtoyo5x0pgQMS6qysN+cGkaPdwcMPal8yOQ70+U+tLGy7gpGCetFhstDSZI4fN\n25XGePSs9mRWDvkjP4/SulttUtv7Okt5FIZRnOetc9cRx9VX92x49qQEUMyqx4KljgJjOafJ\nGVkHlnbt61FyxdBw/dqlaNdq4JBxzQAxSfMLKMAZJ96VmAcfu8BhkCnqx8tyGG4DAqK43Kil\nt29euBxQAFV5wTu/unoKRsspVfu4wT3qSOQ7iXAAxxUOVXcwyM9V7fWgLixqMRqEIA5JFEcj\nFiyD+LA+lLFJtK4brxzSco/HAz2oHcWTHmdd2OaaV6MnyevNDEnkcc8UhZfLOQRzTESIyjgH\n5G4P1pu8N8qN93qOmaYq+Zjam3HNSNskYDv3NSARuQpCqGTufemBl8zCnDelOaPcpCvhs5oO\n3dkjOBy3emIe8JV1KnIb+GlZUZzGOJBzx0+tQtMXj5HPY+1Jt8yMbyVXOTjvRqMlhikw4G1l\nIz1pJc5XI2HHSnRyrIrZ+5jjbx0oRvOUSFSQOMUAVjlZNzkkrz7GpIphzIvJbjbVyxjjvLsx\nSKNhHWorvThaqVRvcgdxQIrTKWQqwwOpxTlcSrGMYC9D2H1pMNIhI+UAcH09qf5O1evzDnmk\nMYsrMys+MD2/lUwmTy33oSW+6fWo9zSMrCPDKOfpSJIDG5cEk9qsBqxurIpO0VLLGN7b+3OB\nUapuZSTgYoXPz5YbsdaQBJGwded27kL2phk3SMgyZUGfbHpTsMqbh8wAyc0uD5YlQqpYfdNM\nQf6xI1fCr96l/eMdqhSF5/CmndMpO0KQOOaWOLy5CQSQy4zQUKZNxynOaWRtzqGGP9o00bo1\nZhwo4pWj8yP7wYEZP1pACq+/Jf5ycbh6U7jzBG5ATvmmNuVVCjDYpsmZsFWyOhz2NAhyxhWc\nKcJ/epeWjHy5xxu9acxLhc9cYB9aaqltpYZCnkjtQIYNqMC65weMcUK2UAGQC+dpFPZjJI2f\nkC8jPemq5X5yMj0oGNV+M9ST2ok5U46kY5pVGI9o6H86JMbTxlqSAbHIIUwwZhnJ46n0pyFf\nvhSmf0pF3R8B+W7HsaXYJM7myP4hRqA5VOzghO43U7aPvOCp7+tQhUOSoIPQHNTMxb/rmFxm\niwhnlhmVwcLScqWdTt/rT9qKmQ2Tj7tJtVlRgcjpSsA0MXVVPD9QfanNvVtpbcG6EUxt4m2t\nwfWpMlMhent2qkA1VZ1ODkjihpJFjAc5OccU87yDgqqY/E0yRiFQ9EHWgoOSyoV2qO2O9IwW\nQZf5WXgetOkWRZDk7lHGBScmQl/u44FMBFb5RuB6/epJF2Kuxsc4Bo+bgcAdSaWRvM+ZG3DF\nArWFXa0gjlGRn7oH8XrTWXlwfvA9qkb91GhjP+OaaWO7K8f3jSAQyfu/3Zw56q1NwFUEjJbq\nKVZFUln5/rSqEAAZsnOeOgpAG4DYCuF202NFKsW+8OQuadJGwkG1hsxgNmmGJNpOQR3xQBLG\nFX9/IMHHAWn/AGolhgYDcHNQKRt4OFPC8U1m/dgHk5xx3piLbSBiUX7g71CccgHGelDR7Ic4\nwSM4oikyoUgD3oGIwaNPnTLHgGjc0PJBZMbvr7U+VS0mRJ0FJGpkDBXBbGcGmMjeaNoywB2N\n0UdQad5inGAcYxz601R5ZARck9c0rYL+WTuY9MdjQBIpaTkgLgYo+8wCn5O7YxUTNuZAD8o6\nt6mlLtJOXH3ulAh0khjZkii37xgnFMQyKu3+Ic+/0qaMtIzqxCfXvQrLHzuG70NIoiWJjH8w\n+Zj1HahSVAGeR19zTpGYKWRsBuCCP1pkqSSR5DBVB3ZoETtJHNGADtkHNQsFZj5mc4+9TFcI\nxB+83p2qSNj5io+N/QUajEZvL2iN8yduOtJJI7AblwoPYU6MrG7kr84OM0xGfzjtUqD/ABe1\nGpIrHaDg9vxrS0/T4pY/PkfDY4GP1rO3eXwRlSflJp8kjshQcLjIxTKFeNd8rq+4D9aYvMS8\nMwz97PSm+WOuSWI5p6IWjJJwgHSglihSpYKSFIxmm+X8uPvL/eJ5zVjywyLtOABkVH8rR5Iy\nc9qAEVfuv/d7mneYFhPybgTSjuNvAHK1KYQrINwyVzSGQJGFlAVue5qVUAkfLbgozz39qk3R\nrIpChWHRfX609stJjAw3P0pgJI0bMrsCzkYCjpTFhP2pVEm0dWDdqsNbliAvzFjmtfTdAuJo\n/Mccg8jHOKkDFjsXnWTLMHJ4b+VbGl6HcXEiHBDYx8ozmux0HwY1zcbdh3t03ivSNH8DxwmL\n90GcDll4NAHFaB4LddqtGHHXeRg/lXf6D4RWBg3l+Ztbkstdhp+gDcAy7TjB4rqdM0LdgKNo\nUZ6VLYWMTTdBDMJDhWzwFFdDbaGGZNq4fr061rW+klWTByM5PFb9tYrEwIGTWMmaRRnWOjnb\nnAU4rRgh8lghH5Vbjhxn1NWVhDYyACKg2SGQw7ecYqZO4xzUqIGUdqkWMM2KVyrESLxyOad5\nLcDqKmC7+OhFSqu3jk0gIFXbnjinLJ12jpU2VUYZDmnxgZGBx70AMjhDEFvSnRqA3ynincnj\ntUiqka8DJoERtEznOKkUYwPSpV6c8e1N27c80hjFUyMQBUiR/KSaF+XvilbC9OnegAMY8sYG\nTT9u1QAKjaQqoYDNSR5k5J4oAcyjO3GRTdvl+x9acMsCOho+8QD260AJhSMAAe9SRYxjrSbQ\nuCOlHl7s7TSvqIjmjBxtqSONtvI5qRbdvvEVKP3WM88c0xkfl7cE9KeNu4d/rSbtwPGfShfl\nYHGaAJHGzBAoU5bJXil3F254p3LArQIZIgznqaVslPQU9VIO7qKftMijjvS6gMjQeXyeabId\n0Y2g1O6ruAxj1pBHjkHj0o6gMjjzgmpvLWIlgcjHNI3bHftStCWX5eKYyJFGOBTUUsSDxU0M\nZ3YYYxUrRjqKAGGPaFOc0yZTI2AalaM/KO9O8na2QMD1pAVgu1OBk+tOWHIG481a27s9hTPL\n3cA0CYRxxquCOKbtCn1NWlhGzjr70nk8jOBTArrleB3qWKPJ+YcVL5aR9TxTdwUnH3TSGRSA\nebwMgU7jYcUmOp5qzHGDGMjGaAK8MBkUluKljAVsHGKfIcY244601EDsPSgY2RjnAPFNCqx4\n5FSzRg8A81JbwLtJxxSAiVAzc9BT8dVxz60kkYOAvrT9rCQDI9KBD1A8nkc96i3DoF6095Du\n29qFj+UHOKBjfJHBNCr3qcoNh71CyseRzSuC2JFYcU2aXgdsU9VAyW9Kiny3bigGSIu5tzHq\nKb5XeiJztxjFPVec80DQCMdc5pqxoqk45p2OcAECpfJDe3vS6gU2U7vb1qWGPcxJ5qTYF4J4\npvl7TjoKYDJIQvGeD2pqxRxqM96nji+UlhQY33Z25FAEKqB0GRT/ACc/MKfkn0B9KVmMeVAz\nnoaAGMy7OfvUsf3evNKIfLAz1J70x8buB0oACp3Zxk08K3fpSDdyD1pVJaM80DsNEm0nHTpU\nbIZGH6U+NWC880cq1AB5bqtBU9M1LG3y470hwVyF5oERqhReOtCsZB0qyqhlVs4PpTWXy+Qc\nZ6igRAqjdgVKTuOO9Rx/M3Q1KMdTQBFIxbORSLJnAFP+XkEU1tqsMcmgYseOd1M8osSVXGKf\ns24J70rRlm3A0ARf8tACDilbbtIx1qQMFUhjzQVHBHPFAEY+UbetCrvycbR70+EBmO4UrMu3\nHSkAgC7gpp2AuSelNC7u3NAyVKkcUxjUx1NSMwVsAYo2g9sCkXHPepEIWIAJXj1pdobkcn3o\nXLDBNPdtyg+nHFNANCqMbvrSfxMcYpw3dMc0bGDc9KoYkYG0MVxSSTDzAMHJpxYr1+7TFYc5\nGT2pAPVSzDBHHFTjKcA81XiX5Tk4NP8ALIXfuyKBDkUAncaVW38Lxj1pVZWjClfmpxUL0FAD\nVBC7sZ5p20MNxOPUU7zPmHpRIQMccGpYxx+VcY47GhYwvIOT60zbuBDHpS7QCD296BE6YbKj\nk00ybVBA5zzUkMaFt9LNaq/Ktgd8UC2CGZm6JlSeBT+dxOMe1RQMYJSjDMfrUolHpgCmMbgK\neRTo5BMuw/LjmoiPMJOcUqrtfdnjpSKHtNtbgcCp4285d+3GOTVabjHFS2twAuxqAsOOWyN3\nB5pN6phSu73pGXHAODmnRtuXaeuaBBtMcm4fd7inxsWYA9alSPYrbueKjU+ZjjaaGUWYRtOQ\nMg0jEqGPVjxipIVKrxyabLzIrCgkSENtBxyKZJM0ylFG05qRWeNiNuB1oYFmBHWhjINu1hkc\nipkkLOp5HNOkTYuHHWheMDGRQMlT5GZVGAeSaf5nG0jNJKw2gKuaduDAZ4NACx7MMztgdqSM\nfMWPNPSPco4DJTpMBSc4zVJARqoXLr1PNL5vmKTjilW38tQSc5pu07cHgVLAI2WQH+96VKuR\nyQB7VGsK4OOtOjUqfm6dqAEwdu4EjmpXYOoYrzTGYNwKdHlTz0pMdiXaFXn8KXaxUEcjvUat\nu+7zzzmntJ8qhW4I5oQWE8sbsjgCnL8pDtg9sCoFVl75qZv9Vu24PfNAg/1ZJP3amjy4x+Oa\nhjYFSG6dqAzIo7elMZYlYlSAeKjU7eOtCbiuSce1ObavNINSLO3rnr2qdZQSM9KRANxyODTl\njG4jbSEP3ovQcVGzFht6Gk2ndgU5oyvzE0XAVY/3ZcnHpUSyetO5IAJwM5IpzbVbpmkUOiB2\n5c8018q5HUVJlNozSMu7IUj60wuPj+7k/lRjcc0xDkA9e1DKyyZHIoBCqiMpz1NPx+7IHcYp\nsfGcUMx3AdKdw6DVHlhcnJHWpA3BIFMePcxZeM0rcYC80hD0I2g45pTJt6CmrhVJfgUiSrJ0\nNIY5WbzAOgPrUg+904pjMFI3c0kbbweTQIlA3cCjao6/rTY8pznmkGecnjrTGLDIG3Ky4Hal\njxnmiPG3JGKc0YkYN0osMcuMk96a2AowMNSsx70OobHalYBUxuBp2Tn2piruHJxUnUGmKwjY\n44yTSKyq3I5pUUZBJxTf4yT07UAP3fvCM9uKR179PelVhnO3mnsx6EcGiwxqgqBk7qcCc9MU\nxQyyYHSpV4Y9zSAeMbeetC5LfSmhWZiTwKFYquapEisu05HWoo5DKGypBBxUx3D5hyKDlgDj\nFIYi4U7dtPU9RjpTGY791KxLc/dNAw2pRS7k9aKQETSBu+aaUVuS3FOl8uNRgZPpREpk4ZcG\ngTGMuCOc0u3apIPFSbe5GR0pky7cDPanqSOXHU9MU0bipbkiiNxyCKkjf5sYylICGSRTgZwD\nQ0jR8DpRNAmevfNLyynuRQA5JAzAFefWibGMEZqJGO3j736U/cW244waq4CbQBydwxS7gvPq\nKUurA4GBSAHjkBRQMTLLjLZqKZzIwAFWMFlPANMZPlFMQ1Yz94DIHWhidvPSnqwZSCPamSKe\nM8UAGArAE00oWzt5X1pQB0PI9aAxRcKMip6iEbGBlsGmvtxjqakADN/WmyZ3dMCkxiRcxkkd\neKRY9sYzj3FSfQ8U1mWPvuJoEJJJtYIq/jTNwbLMMexpQu9vXmkmUBSMc0w6DUkDdRzUi8qC\nRyTUD7mxxtFTZLkDpQCHlui54FDevamqh3MT0ppJ5BpAOLhm56YpqsRxjPvUUjnb0471NsG1\nWz17UAhm5Xb5utM8gsV28Lnk0+XCnA5py72XHUUxkMikMRjHvToVHU/qKrX2opb5yMlew71z\nja9eT3wVR5a/3aQHVtGGHoTzSMi7gc5x2qra3rPb4YYanRtJJICo4NNAWJIfM5DYFOjAVeRk\n+tM3bH2Ac1JyIyM80gId3mdBxmptpHytyKjjUxt6g1IzfMMmmAyMbt/6UhJMWAcCpeANo796\niVCQR2BpAPjT93j09abu2qzYp2Dt9KjhjZgykZFAAZhIFB+YGpR8qnjAqBsrgAcCplyVpANu\nWVVUryfaoVnEikMpXBqy0aryeahl5kzj5cUgCOPK7tvFRN8sgIHFW4cyKR0FVgu2RgQTQA6Z\nmMJ54NNhbEYI7Upbdx2pdpVcrgn+7TEMmOza2M7u1NWNcM2PlFSyfdBb5WHamINsJUfxGgBq\nkM27PINPb72QBimLsB2980D5WIPIHegYyRVwXTg0LK0iZPAFMOZiQOBUkoKxhFpgQtIZGGFx\njqabcfN0NTMvkIueh61WZRwQc96AJ4duwADB75qKbzGkCqMgdaEk2tuakZd7bwxoAVMRnrg9\n6WdvmCDketNaM/KDUEm6NiD1PSp6iHuCqk559qRVxGG3cmiMkD5hTZJhjYMUhCxyb1KUydfL\nIVehpqZV93WpXRXyWODjtTH0I7ePKt5vI7YpksYWMgfMDU+7y4gq0xvmb0piK/lgr0wahuoz\nJhVOD61fKgGoLmENjBpMoijb/RwjdRxT921cetMVcNjHFLtLZ46UgFiUdAakkYLhaYuI9vqa\naVZmL5oAdIFbJHWomXYQ2fekXcQSaWQ7cE9u1IBrHdnJxmmvGWjyvOOMVKI/MUNjAPemNE0L\nDDbgaYEHzKSDxUhUqgzyaRsnP161J2GaYESrjcSaZww6VOy7ju4+lR7Que1ICIqq9P1psg3L\nnHNTbdw56e1CxjaQetAyu+CoAHOOaYuEyKk29fUU0oGbJFIRBIgbmoPLIVhnrVt8eYEHA9aJ\nIc8ZpAY9xah8fLXO6loaXG8bOa7JojuxnpVOWEAnIyW6iqA8X8SeCzdb0YhQOi44rxLxx8FP\nt3mSwQ7GGWIUcfUV9hahoq3EbZHbiuV1Dw2JoyrDnr9fari7EuNz89vE3g++8N3jJNF5YDbc\n4OCD3rn7lQsjIi/N0LYr7n8W/DW21BCZohcELjDDpXhHjP4LSQyNPp6Mz4yI2H8jXVCoYSgz\nwCS1KyLv+bd3qnNE6Suvb/Cum1jR7iwugsltNEw5O9cEiqLwxybwwKyHpW3MY2ZzLH7RGWVT\nwfpSzRlVXPJI6VozQkPIrr043etVWhfzFO3tVc1xFZc+VgKcg5+tOEkhkB3ZTuO9T3CFFHIB\nx0FRJGFw/Vv7tMBjRjyWYdCSNpqGHbGFjyVUkbj1q5cfvMMQAOhjqrN1CRjJz0HpVCuTX9u1\nreBUO+PblcVWeMSJt3BuNzZNSNJJu3Nzjrz29KieHaxcfIWHWmIWOU/I4J29M+tCssbbhwd1\nMkBWFAB8ueo9acux9zMMgfzoGIZWmdskhVOQO1IZdzGQrnPANSKMMrKCAR37UwOIZg78k/KF\n/rQIOVYYx7ilSVhI2Rj3NIV6yj5lzjdT42Lrgx5H96gY3h1UhsDODmkuFLSgD+DmkVz3BIU8\ncU/LfMeCM8etIWpGm+YZxtJNNG51wq55I5qaRVyu07c0h2YC/d5/M0AJ/rACV2Y4OO9KyBk2\nckDnPennagMeTyeSKj+eRyn8Qqrj6B5jtGwYbUXoT/KoeZoRuJ3dh3qyIy8QD9FNNKqzxsJA\nc9TikCIpGKsBjjGDinFhgBGI9RQ2JJG2ZCqaVm+Ukr5XGSaAHRs27k4Hr/KnmRXXYw2gnmoI\n9q7iH+dT1pfMPdRzzSETSRqqsF5Hrmq7HG0KOfY1MZhwRxxjnpTJB++XoTj7woAhkjC5/hlx\nnK96YwzDlurCp0Vd2WB4OTUeGVmYHKscj6U7gRiNWUBXwMYBpzbOQBjjqelC7WkKnBX0HanJ\nnyym3PfJouBF8rDH/LReefSl3K2Dk0rKC0YZl3dvpT/LXgKcgmgEBaOGJiTsU9TikUsB5YOe\nflzTW/d5wM9jnmlZtx/uimAOCX4+YjqtEzK7KwGPUelDsNu4ZXsfemSD50izyRkrSAdyyjnH\nGcetO8xjENgx220q7FXKlRgbeaj3BXEbN1GafQRKoV2VjjK9veo/LZW6YTPrQ0e1887e+O1O\nGVU7Tz6mgBvlrJ8zZXHQ+tJztJByf7opGbzuhbPc08uqvwpYAc+tIBjJ0LNz1Ip8bM2GYYU9\nDTxIEQ7k3egpFYmPKEFc9+lMA3qy5VuemKjM25d4BCjjFAynb5SckihSAxBHyt0pMY1lVFVi\ncu3Vqeu92C5KKOd3rSMhRTtAk+nYUcuwYHPH3T6UwYnDbivy+pNNmV22bm+UnPFSBQcMOV6V\nHu+SRsFWXjPtQIdtZ2ZSOD2NHlujYAzxjHtTQ7bVxw3Y07LFjIz4xwaQC/IZNpHI9ulMDOrF\nj8q5xu9KN3Ib7zE4BH9adIrnhmwvXHrTY7EJ+WTLNuHY0qqPLwSWLHOO9St8sI2jcx/zilkx\nIo+Tawx1pIBkbFiVBBPZumKWP7ingEnp7UrYk3NtzkYIFNVDx/CF7HsKbAT7RtJJ6ZwKFDlT\n8u5uvHpSsomVVwBj5vr7UvfIbA64/pUgMVTkkNvPXHtUjSOW37cIoweO/wBKC6+YG2EjHJFI\nysrBvmz/AHqsBi7WwUBB709XKqePwFCr91U5bOcUpAud4U7QDz9aQDoZZG+cYQjoQMUSXnmL\nuOd/3Saj3FFVZDwTjdScngLwD+FLYGOjcW8bEfOTSrL8wBXOTkiolYmFieFzgeopHU71Od69\neDSAl3nc21+OuTx+FNSQnlVLKaZJJJwFTJzyadzlmU7B6UwGsUWRSV4/Okjbb5hZsBunFDIy\noMngml8zbn5cDoDVAxdz+Xt6Ajke1B2tb7S2RnhcUv3o1XHGaYqGFSQd3zUuo7D/ACxwB0xg\ne1Hy4IBwMY57GhtitzjnnNNYZB3KCnU0xCqP3aIThs/epQBtcAhfRvWkCKsZYenFAgz9/wCY\negqRCpvWPZgsc/epnGcEbCTipVkMTYYbf7uOajuG3zYXqoyaYDzII1wSX7YoMi+S4IxHS7FU\nOrNlDz0pJYyMsBuUjA+lAEcrDzFJPmADgetJ5u5ShG057dqaF2r8zkHNOUtHOpYB0YY+lACx\ngx7vmBNNChlwFIPdvSnrGY2I2kjPBp0bCMMCfvUdRjGI3BdodSOW9KFG1duRt9e9NVgrAsvJ\n6UsrKrDfHnPf0piHrHlhuPHoKVo9wKg+Xjo3Y0xI42VVLtv3Zx3xUvysxByq5xt9aBoaMrMG\nYfNtxgUnlmMlS2O4ApHZoQEAwpP1NKmHkUYYDGfcUCHQx/u8t97Pc0TK6yFgNoA5Hc0Ku5em\n4A9elHnJ5hK5OP4v6VIDZI2uICR8mOVX19qe0SsoDc7e1HmYz5g/OogzMW+bGDmmMepYq+ec\n+tQ7tkfzfO3pUu4eYCeeKayrDyR859KBMZGwyAybo+rZNTPIFl+VOWHH0qHy9rAEklv4aUgb\ncBwCvY0ajHKdqqrDaQdwodh5fTKs2elKW/eADsM596UMeS2COuBRqIiaMK/yjKdy1P3RfK2c\nL09M0vmCNQc/Oxzt9qJGSTonTrTGNaPYpIcbc/d+tKuIlZTw2cUjeW0fy8qODu7U9sAcjPpS\n6iG7AuW6lelG1j82BwchhR5gMQXfyDk/4UsZ271A5LZGewoAQMTtDvnNJzJnHJzwO31pGccs\nx74PFJJGWhVd3yk5xTAURuz7V4wPvUbljbcRhlGN3Y1JHMscvmI3zAbTnkUSyvMwMiqVPccC\ngBisW53ZPpQ0LLIAVG7IJxSquw7gAV64pQoG5w+Q38JPSkNCtHuzx93nNRNvkZQh5HLNT1B3\nbOxWmrIzRkECPb+tADljByrAsW6UznYFOAVNOUEYJbAPQVFgtuY8kdu1ICaRixGOBjkVH5bx\nqrDJ5+73xQpLSDcMADNPVTJJtc7O45/KgCPcN2VQhCeCfWnhTLIxYdBSCRghBGDuwWpyqY+2\nU67qoB+d0YXKkDkmo2kkZTsGFxw1D7I5AVO3d/DSDcQ5RsHnI9qBX1GqpjkZXy644apFjZVO\nWyeuDxQ2dgbqMZIp7BZTGhXhueTQMEI8lmGQe5oZmC4J5HBp7bmYjooONvY035twU+maAJl2\nlVULhm5PvQV+8R9wdqfDv2ghgVPGDTZEPK9vTPWp1KHruyAhCjGST0+lWLi1e3XMkZG8ccZF\nQCNXUrjoMnFdBp1jdXVmsco3Qg5Cn0pcwjnYbaWQbfvnoMCtiz0JpGIcH5RngZrs9N8GFo0Z\nF8sNyc84Fd54Z8A/ZtzTENvPVVzxR8wsef6T4SlEaboADkHn0PevQ9I8Gq0iqU5UZ244Irt9\nJ8IxxfM8JJU8Lj9a7HTdEj8n5E+cnJ4qHIpI5DSfCJjZWKgqefLNddpeh+VtPlbSe5robPSN\npQlBnoRW5HpqMB8uG9KnmHymFY6PwWYZatq0sxGmKuQwnpt71cjtt3B4NZuTuWloVobMHGOB\n3q7HBtC96mMYjUKOtTxw7sAkAYqC0ReSGOQMGnrGGIHerCxqoqQqqrwRk0FkHA421KoG0cVK\nI1wARzRIrqRjBHpQwIzGd2R0pF+YnDYxVmNCcnHNDR7fuqN3ekDFj+6GI5xSBSx6cU+NcD27\n0q/N0oAjEJHvT0C9GHNSNH8u7v7U1csfmFAEjKMggUbA3VelSDhelO44GetAFdlVlzjmnLH5\ni4PFShBypHFSRqEjO0c0DZCIdseO1KsYVfapATI2M8Um07hgZFBIgXacnk0gXeDipkjOaSNC\npyeCaBiLE2B3HenLtFSRx9xk0jDc3HagCVQcdcimvFtz3OKVZBHyBzRDllyx5PSgCOKPaefS\nl8tto471MylVBpG3M+FPFACNjdz+FSI3y/dzSrCW6jJ9akVDt9qXUCL73yjgVKq4XOeOlN2d\nc06OMDpz7UARbWViSeKfFGWUnsaU5b5ferCxjHXAHJpEsjhjGTnOac3DAKM+tIy/vARyDzTw\nvJIOKYx3HIqJVO7jOM1IsZ7c+tTqAuOhNMZCsQb86VR1UmlDbt3Y9sU0Z3HIGaTEQncr4/hp\nyK3mY7VJw3zAcVIhXvxmkIT7p+9TZFbqvJPrSNtViOaeueMcijUYixmTqOabtA7YFWVcKOe1\nDMGOApFUBAuSTj+VSLvyCeRSpEWYVN5bJnj5aXUCLy1G49jTMYAC08qWU0+Mu3UfLSZaI8Io\nyfvVIzbFwKckILHcPzpNgbIznFIQRqrLuzT9q+maVmXb93BpBkqOxoEM4ZiMU7ZlOegqdYfm\nHOc/nRtG08cj1oAhKlQM0qxlRu9ac33eaRVfseKZXQRvukEVG0ZkPA+Wp/LaRsdu9J5bDg8V\nJIRwhVxilbMa560jfKuOcmnSRvIoHoKChqkYB6k/w08yDOAPwqJYj1zzU8alVCj8aTFqCxgq\ncjIpphPXrzT1cRsQc0scgfpVCGqM5HUU1WJ4PPtTgGVzxSqvJI4NMojMYGcU3BjBz1qx5JKk\ng81DsdVG/rSAYsbMuScmkVSmVYH61KpPpx7VJLllyBx3oAgVNyjmhuuMYoVwoPFNZT1ByKBA\nq5BO7Apdu9RtH4mplgwoBxtPfvSTQFWG3BX2pDG7doxjpSM2/dtB6VJtIHA596RFIGc4Henc\nCFG3RAHrmn7s4HpQy7mBAxStjzOnagQ1W3knp9Kk2h14+9SIvyYxT03ZHGKBkJh3detJ5Xlt\ntIy1WuCw+WlKZ5IzSYFXHADUiruyQeM1MYSrYOCPakjUcgHBzSAhkj3jpzSL8uQwyanGVYjt\nSy7SAR+NO4xkf3DnrUbKNucVNtHrTvLzGec0rgVY9yNkjilUb5MnO2peSoyM05ostlelMCNM\nK2T92kjXcx7LUm1cHuKGQbch9o9KQDNoLdCadgqAMZFCMf8AGn7sKc80CGkZJIPOOlCqdvNI\nrMGPHNPO3jj60xjVjOfUe9NX7+COKeclvlNIg2tlqB2BkH8Ip6pt4HT0objJXkURsW9qLiHb\nfmyO1OXhevWmRNwQRktSY28Gk2ImVF20mRxnrmkjyzY6CpnX5QRgt6UwI3I+9jPrSrIjYVhj\nnimwsFkIYfjUjsJHUnGelIAhzlwcYzwKPN2kjBHpStH5WXxSM25RkfjSGSx5ZRg805pFcndh\naiXdFGQpyT1NKsAb5u9Uh7jhH82RytJMw6Kven+Zt+XFKq5XPpTEJH+8BVjhhSRhVYqwx33U\nFCrbgc5pVTcxB70rASrB8u7dupMHcCBzTOYW+9x6VPHJ5qnjHvSGSmYHae9SRqrDcevWqe7s\nOue9X41Vo+DjimIZEpJyH4zTwdrHPI9KYv7tcL1JpYsvuDmmA6PY2Qx+brz2pn+rIKjj1p8a\nrgnqKZIpZRtbj0qWUSfNMvzcntUfmFZgpFSL8kOScU2JfOkDEEj1oJRMv7xSemKaw6c96eu1\nVYVGRtXnlqGUTrIYxwcCpMB1GcE+lRqpkiUDr15pFXbJkc0AWVbHXGe1Ndhuz2ph3GTdxgUv\nMqkDrSuAuduSo4o3eo4NIrNsxt46Um/5cMpHPGKAHmMcnNLC6scMaZwucjrQigqOOlMZWuo3\nhuvMQZQdRTobpZl8vYyn3q4YizZPQ+tCW67Txz60CE2opUE9uadgyZ+XijYqrx8x7mgMRnnA\noGIZhnZ39qf5gkAOfbFRbVL5Xj/aNOVCzfKMUgJSemeKWT5sADn1qORcKMmpfNVduetINSMs\n24KOT61ZEhVhg1F5i4zjj9aBs4INIRP/ABD1NNZtjEk/hSblOKVAM5xuoGIu1l560rZZen/1\nqVkHbmkhOVxjHPSmUHHVwNtP2jydyjGaryMwm5GV9KsE7o8BtvHSkIFUbQD1p4XHfaajjf5R\n3apGIZvmGKBCRfK3JphkbeaeI/lB6c9aUruz/On0uAKu8kdOKiUNEMkZGetP6cFqVVPXP0pI\ndgZDJkNyKaiFdoC4PWnqx8sgj5vWlEn7vB60xWHMvmLnqaauVAbFKnMfXBpyrj+L8KYaiNIF\nUMR3pTg89qa2N2GHFL6r60ASfKy9cmmbuxOBS7duCq0rKG6jigodu+YcZFIMyLgUrKF6nNMV\ntjZAOKWohUVgpDGpN3lr70bvlG4fNR97ljQSKpypOMn0oYBssM/L2oQYJNKucEnrR5lCLnAY\njGak3AjkZqMSfLknNO/hDDjNMY4FGOcYamxyFmbnJHakZu4/OjyxG+9TyRipYiRpCzYH3cda\nbHkggnipN/yhMDimjGcYpgPGelJuLHBJwKdnpgZpisfMIPSgB7fIBnrSs3dhxSfe4Byac5DK\nABQIh2GipqKAIpEBOO5o2lflzzTJMqOlJFvck+g7Uxis0it9aczFmUsRgUxs7Rk5LU/GY2BX\nNIBm4ecdgyKlhZgxBAI7VFEQuMDmpd20nBzSJGkdcnntTW3thFGT3oaM7vMPSo0mK5x82e9A\nC7trYxipJZB5ZIxnHaomx0Gd3pRGjRMWbp6U7gL5w2jg8ikLFl2kc091MnTpUka8ZbqKbAhU\nlo0IOBjpT93zcDjvUjRosY71HwuewPrS1AUSBeQuaa2GXLGn7htyDUMihsn+HpigbHR7WjPH\nFCSLH8o5FOX5IsdKaw2gUxCsoC5ziq8iOw6k1KWEi9OnNMaYqVA4zSYCBT5YIfg05lG0EZLC\nlbsuPlHPFNhk/eEHj0p2ARAVJxwxpWYdXOKlmZWxt+93qtIoYjIytAh0jnH3cikRd43H5cVI\nuD1IJpHYA5xQCCRjtHOKYyMcY6VJ8suM9aT/ACKQxoXbGNwzzTWI3biCB2p0ZK5JPtg0xmAf\nnmkAL8+SvNPZG2jHB705dojG3g0m9lzleKAMrUdM8xsjnJ5FQLpgVVyuXzgMe1au1mbfnjNS\nIoLZJyDzmkBBFaJHweSOtToBEPkXBzT2kHQDAqNm2tx3pgEsZDBs5pGk3MMD2zTjmSEgDDet\nNWQKu3GRTAcQNvDe2aTyx+FNX7pBHBqNn2jnIHakBOzcDApFJ47CmRyHGNv0pskjIdpGTTYE\nsg3LnNMDfKADjnmhG3L6D0pka85NIB2TuO3vT9wV+abxnjg014zvbnO04NJgSs4GGDDB4xUc\nkgwe49aZIoKjHFNaQJwBwaAH+cdvy5pAxbocE0xSysP7p60u/a20fnQAuAuctmljZdu7HFQm\nE7id1TKv7sAdfSgQSYZgc4FEknHPAHpSSrn7oz71Ay7VO7OaYD/LH3gOfWkSQbiG6U9MKigt\nioVhYMzMflJ4pDHptXOOlIZAJNn61HCx3N/dHeptqlQSfqaYDJcyADGeaZ5SwjDYzinxybSf\nl3KDUV04bIA5NAESktkjpS3CFAoBJNLHhHB28YpWnXfhh9KAIo52GFYbqLxcsGp/lrgMRzni\nkuFZo/oe9LqBEJNx2tx6U3yQuSw5pkmW4HBFTMxCYPIxSsBHGwX6VIqjbg8t1qqpOasK5Xnj\nmgBHztJpvOCTzQnyZJ6d6nmVVj3BcnHagRB1YY6YqOT5k2gYPem+Yfxp64YnnFIYxPmGAeKl\nZWjXBHGKgMbZ44xUwLyDHU0wK7cckVICzRkDgY6011LLnGKb520bT070gBd25AeeeakuI9zE\ngZ9qcmB8w9MU2R/l44NAiMyFU2kdOlCyeZnIxxSTMq7QFzmmK21+OlMYnLdulDZYY6VJH90s\neKibKzHPSkAqqdpOQMU372CTmpAp8v8A2ai27V4oGSvhQcDiowyKmT8xp8q7k4ODUCsvQ8UC\nGtH0YevSkZSeccU5m+bIHFIs2W2ZwKQEZUHml2gjPpTmj4pD8qhT0JpAVnU7i2ahaMSHJGKu\nNH8xwKZGAcgjmqAqyW+UZSRWVcWC5JxkdK6T7OpjPrVGS2GSuciqA5fU9FHkhgoPORxmuPvf\nDsc28lfrxXqE0Z+7jisubSQwYnnPNHUZ86eOPhZaaszedB1/iHXHpXg3iv4L3Vu8zaeMANhY\n5DyR9a+77vR1bgpk49K4/XPBMdxmSMqG7LitVMzcbn52ano15ZXE8VxE6lDhuOhrPeMsy7gS\ncYr7Q8ZfCmDUFk82BJ5TzuRcE14d4w+Ct3p85lt1JhkXcMD7vsa1jURhKJ4w0ayTEyDb2zSF\nYjGVGSQea2dS0G8099txEVbOU46j1rKuIW8zazbHzXQpIxsyk2PLkIRs9iTUUaiRQQeTwcVd\nmRQyDLE9zjAqOS2K3AWMYVj1qkBWaEDbz1420kkJ3AtNlV6CrMcYZV7FTjdSTW3GQwHOSfSm\nIpSN97b8oprZSPGM991SNC0bHvu5B9qZJGVB8z7rccGmCQ1WLOpdvlbt6GnMQzfMRtBwaTy0\njKhTuX0pu4N8wXHoPpQ2Ahy244wFNX7Wxjms3Z7nym9f6VSTDsFJzu6n+lTC3VWYDnJ6ZpiI\n/MdFOegGMY7UK6bRn5OeKezI3mI2U2dKiQhtm7mNuM0WHcVd3mSNjKqcUjSBg24Y4yKTY8cb\nA4I3Z3D0pXQblI444NAEnmfKpzlsU3bI0gIfLY6VJFGjMJQdyjjOO9NmhV43eMksrc0CGh8L\nzlmPBA7U2QcKqAALxmpRmGTZs5PcUkn7vCuPlB7Uhog2siuGbdnuPWoy37sAcseDmppFX5lR\nsE9BUYVpG+UAMBj8aBDpFI/hxxyRUfl71RN2MZOakVmmUqx2HGDSFR5fL7DjgUDFZVbaF4x0\nY0e4/OmM5Xb82cjAIHFKrOPvYY+opiFdSxL7go6Y9aa0hjyF++RgelIzs8vMe5MdBSNIAu9D\nls42+lAxI2Ei/dxJnG73pSrR5Lnf2JFLIf3bMi4Y9fTNMT7yxtxkdaBDpEC7ZQBjOPel3IiD\nd8ozx/jTSAmSrZXp81K0fy7ihLAd+1A0I07KvHQnrinoq7WLN8x/Gmwtv8vy+DjlqUMilgjY\nOeRjigCN8sdhO5Mc0se3zULLlsYB9qk9cYPHNQKCIyW5JOVb0pE31HeWgkKnjJ5pJMRyK6je\nQdo9frSsA0i5bDY5p0O+MuQVB9+tMr1BSxkYg5/2s96YjHoclj1NERPluV6Gmx73XKnGRj5v\nWixJN5IjjIL4bsBUYzuJXg9Ke+FYbvTlvSmDLRMS2B2x1NFgHKpBbe2R14pI23Lv429vSiFO\npB+bHIPeozGY1yPlHYelAErJ8oB+XPJx0pjb2wB9BnsKYpCcM5CseBUjo24budvpSGG0R8E4\nK9xQ+AOnyNyPc0M26MuBgDrTgw8sMwyoGfwqgG7RJIM5z6UnLYIO8AbmA60rsVjV0XIBzz6U\niSLHISgIXHUUgEXa2ZFLbOo3DpSNtES5/iOcetGRtwuXGc8elOdxIyErhB2pAJ5gjJVVOe1N\naN1kG5vvfpUiyKGJC5PYUyRXdQ2c89D1p7jHyROHCblEIOaazNJJjHHv0pVaNQf4mHSiSTcS\nSwPpigQrSAcgY/2qjbc8e+VwATgYpSpO1QuF6nNM6MwK5GaGIfHHuj8sH6U5lLYyCmOCo/nT\nWkKlNpIDHAzUqq7bmBxjlTTsBXMx+You9PugUqsYwFcNnsCadlthZxtbPKDv70DehOdrr79R\nSGKzrHySS3TC0scm3cg496iwFkXggdcn1p8ZklY/JjvzRqAjRt5e92Zkz09KPLIwUJKmlRmm\n3CVtu3+GnqzIBtX5cYLemaAGsoaPEhAZedvrUO0Io2vsBPSpDiE4PLdiaSRcquBl15p9AEiU\n71YnbTyxZmAK7aTyjIo8vjAzt/nTDHsfbtPPJpD6DmVnCgtlV5pud64xxnPFS7lX5tvyegpH\nYmRcAD/ZpkjDtZvlfcMfrULf6sLyzdfrUr7I42Zl8kA/X8KFWTrgKGGR9KVwH5Xy12pkY+63\nalkfbsIAYg846Ypnm7lxgBehxTVZN/GY0H8Pr700MtQsu8syDYT+lQXU37z5F3Iwzj+lKrGF\nioOFIyH69aj8vy5AXYFsUhBnYF2g5PY84HpSLKFmBce1O4jJ+cs5HTHWkVcZ3AY4yfSjYexM\nrebkAqGznBp9u2WOVaQe3GKpgSJKxJyDxUiM6Q8Nhh0xTC46QKSyyLluopqyblVtuD0x9Kc0\ni43AdsmoXkwHOfkxx7mmBMrPJIQTkDnFDOeRgH/GmqqNHgkqDyPX6Gm/KowH2uvRKXUBzfLk\nk5OOfrSLIYwN45IxmlZjJtIIySBinTyxtlck44NHUGNiJUnauBjGc96Ux/Jl5gW/uioWYKB2\nTPTPNSR4CsVXc2c5PpQwBGMbf6zK54Hel+8zHnPQkmmKyupYgDce9STRxsEyoBPy4B5pCFba\nVVCh2qfX9aZgqrBWA53BjQ25Mq42gfLmpPJTy0LMCtUA2YPIyDO9iMmk8yJmHmL93sOtJ5wj\nLseCOBipA0cKHcnPrjmgojZnaQFR8n3vwpXYyKc8N6UwghvmGwt27U/aUUjO4LyWpXE9RWk2\nqmCMYyaa2GU8be5b0pH3BRhQccYPelVZdqnovf6UwFwvI3ZIXOfWl+6iMxABGeKjLKqtGpzG\nTke3tSxpu3L/AHT92gQsOE/eE7mYkc9qFyJN2QD3Ge1IG67j+7PGKfHGs21Qc885oGMlDbgG\nXKsuQe1PLMyrtXK4x705rfGTvLbeM/0odRGEPIcruK0AV1ZNskYXYfpU6tuKhh+8K8ZpPM2z\nMc7Rt6elCR7oyD90dDQIRGHKyDHHpSAOq5YFl9KkWEScMONuBtpi4EY5Z0Xgg9QaAQ3GZB5e\nCtK23cQvTuD0FKXZMYGT607bGZOOTjJA/wAaTGNWBRN8jbgR09DTGzD8p4Gc8jmnCTb80Xr+\nlLcM11cAkheP0pCIVf8AdyHDFwc59qcp6B+hXdRJ/qSqN83tSLkxBd2QOpFFgHbg+1XBGPu0\nbE2kbsv6VLcYW3AB3Y5xTGX/AJabRgcbqLAJ5e5wV+bA5pGVZJemQ3B9qap+UBCVLGnuTu4G\nOMk9qAFbCtjk44HrSrPtkVWBQY69/wARTW3syc4j7tipzj72cOOMVQEaqCcMNp+9j1o8ss3y\njGeTipRncW6hRjbTNwhZtvVh+dAxPL+Y5+7jrUiwmOEB14PIamxxqskYbee/TgVYVJ3yI+Qz\nYFBRBN8y4Qcf3c05MFBHySoq/baSZGbjaFHODWta+F7mRgiLvjPIOOtSIw/sjrb4CFmPPy9q\n2tH8Pm8ty8iF4sc7hjmu30PwY6+Ss8WQSCOOld9o/gRZZCTFmPpjpmpYzzTRfCUMgLeWQ33c\n7e1d94b8ENM33FjhBx8w5rvtE8GiFT+5XZ/dxXY6b4bVWU7AvfdUNgcdpXgmGNACuAvfFdjp\negom1lGD34retdE3Mp2963LbTVjwdmWqLlIwbTSCJB8uB2yK3bPR1Vt2Av0rRhtSzBdtWobc\nsPu96VzSKKkVqAccAetXFtxwwHPrU32Xdx2p8aFFAPK1Fxka2vOFwKn8hTtwOakWI9BTuM7c\nc9qQEPl7ZD0NSbQ+MDBqVYznpnNPER546UiyOH5uDUyRqGPFLtVmPGKd5Y3cHikxiKm5QBy1\nO2srDeMU/wC7yvFLIp7nJpgM3bW4p6gE5H3sUvlhV3ZyaXG1RnqaBEUbfKR71KiqFJoUKOGG\nPpSxnkjqKBoTcc5H4VJs3Y9e9ES1IVIyQKQEW8rkEd6mQBm4HNKkYZR61KML1WmAx4yGXutN\nm+b7oxzUiqTGeoFG0btxpANjj2/MeAeKXy9g455pfunGOCc0rZ5A6dc0dQDAyAvWh1Al5Oad\nCQpBAqVl3NkCmAiMqnHQU1JAXJC4FC/MckYNE2eVA59qAECfN1yDT8A/SiFV8vnlhUqqvH8q\nQDA24hQKl8pVI9e9ORV3elPYj0zSv3AQybPu81HHMNnI5zSqwyeKbt2t7UB0BsM+TwKkVsk8\nYoWEyKcCpYY2K7iOnagSI4VAYk80/wAwbSKXcu/aB1709oecZqh9CONfm4GakKhlOcg0sYEb\ncc1KMenNIRArFF5HFSxqjYz1prYfvg+lKjfN0xSKFWMLmmSKD061LhiMnpRHGu7OfrQxFV8q\nMdFqXcGQDHNW2jTaygA0m0Mo45FIRXCjfyvFTKi7hijyWztJ680SQtHGCG5zgUxiSYZgBxUr\nOMZA4qJYWkYADPrUskZYYHSgRH5gHbFSrMV/iBHvTTA3y5FSraow5oGHyupbtTsjZgcGo0QR\nggdKcsZkxjgUgG7DtI3c+9M8sqMj71WfIwvXmmyr0GeaV9QuV2VtwBGasR7PMAYU5lZlGBzS\nLlfrTAfIoU5DcelMP7zNDcru60qEbuKXUBqxhfmbml6c9KlVQMk9KikU5459aoocrOq4Tp3p\nwYL94c0ihsDFSLgtyM0gI3KkkHmolYnqcmpjH1UDGe9KsO0epoAjVCeKeqOqnB+tPVircipI\n1DZy34UrAVSrMwY9O9P8offXipig5x096i+bkY4pgLuOCQMikVg2C3FLHwjA8mhYypGRmgBx\nXaeOtEityeuabIpX5s5oZj5dIBu7aQKa7DcM8j2p6gdc04quMgc1QCR7WBx0qTaqxknGegps\nIAUnHPU0r/MuSMHqBU9QIeqgGnj0B4p8WGbkce9JtG/PamAzG4nnFOVd3UZFShdynaADQWLE\nA/LQBCyr171GvIxjJqz5fzYAx70wIYz1yaARGqspznA709VByTyKd/HjHXrTvLAHAoGNU7gc\nDApu47fT3pGfbgdRTh8yk4OD29KkBjKzAbT1pqRhd2eTT1Rgpx07U3lM5Gc0AG07hkcUnlhu\nCvJqTnqDTWZuGPFAhhjPToBS7ucDpT968jOTSbQoyeKAGOuzjrUas/IFSr653U5M7d23pQBF\nsbPP40rINvtS9iScUcbaBiZB4UY9ab/ETnFTQoFYdwaYI98pUrx60CEXarbid1O3bl4HFJGq\nqWzTl56HigBscQbknFOb5cYXdT1xsxineYR90UAR9+BQ8Y3AZ4NOVGZsHgHmhhu3DFADY1aM\nknkU9YzKM9Kaj8DJ46VJyrZBwKAEVlYgZxUu70HI71FsDbiadGSvQ0wEbAHT8qeI1ZevPpSf\n6tsnpmk3b5G2nOKQDx1YMcr2okxtx69KaGGRj8qlZQyjdx6CgYkcY6AkN0INOXKt0yOlIoDS\nDP3qVpD5m3GAaYCTNjG31p9rho3UnihozsIptvMoQgLt5phYUOFjCe9SQ4Z+OlSRxo2WY9uB\nTYlEb7CMd80tgHTRYfIXIpyt5O0beajkLNkZOe1OVjtGfvdM0rgPmUGUOy4LccVNHtjVgTz2\npsOMYk5PWnN5ewluaBjJGUrhX+bPSnRsVU9+etM8oxsGx8vWrMG3qFwD2pgMEnmYBGPpQzHc\nQVxxxUqwrtJUYbPNKzDcpbgdM0mMjClo/m+tMt1JZgoIz1q3HsbJp21FYspzxTAhaIRKATkn\nmpFIwBtyexpY9zLubn270kTbWIx1pMBGzuAzg1K21o8AYPrTeeucHNTM3zKB070CK648zhs1\nJ90EkYp4jXGR1zUbNukUEfLmgYhfOPTPNO/1jAjoOxpfkaR1ztPao1jbqx/KiwDtyySfMcU7\nlHwOh70Kq7ANvJPWnfdBB5AoAF+8AckVKc7SP5U1f4T/AA4p8atu5+tAEX+r+VRnvSLkj5uB\n3FTK26Qnbj3p6oOe5NAIrRrhT8uVzSxyF2CrkVIkLR5JPFCqd3yqAaXUbHyW+GBboaQJh8Mu\ncUpkZvl7U5ZF7g5qQEjVdxLDj0p2RIwXaABTWiLHlcA+lOXag5HNOw7DzCORkAd6Qssa7VJJ\npG2Dkn6DNO3BlHGBSFYbbj5m38GnqoznpRHyxB6U7y/lBX8aAGGMDPOSelJCuepwR3qRCmDk\n01huCsPuk4p2GKF2qcHdQpBU5PPpQv7v5dv407AYbsfMKBDm3fLnp6VBLcrB947FJxn3qbzA\nVy3A/Wo5IhNEytHuB6ZoGPwHXOQfenoBsxUSR+Wm0jC4p23dGVXr2NAiRG3KCB9abkMhwOc0\nKpjUKTg0q5Zj6UBcF+XmncOwAGKRUbBJ6UiyDd1z7UegajwAzHIoIVssDmgLtLdhik3ErjGB\nQMWFjyScimqxDEGkX5cAcetKyhmPJpjHFh1NPRhzn04qIKAuCSTT/mbGBzQId97HPNLjbknm\nk6Nk4xil3A96QrDt27tgUvcZHFNdiq85P+7SqxI2/rSGDY9KfG42kEZpu0Ng5pwdegoAEYNG\ncCo9xOOMYqQkBvQ+lKzDnaefSgYqstLGjM2NuRTHbcoIFODbiDk49qAJCrD1GKjGd57g09ie\n54pcBkzmmAHIYbRzSrnOGpx+bjZ81IAV5akxC7h60U3a3oKKCSE7t53MMdhRANrFw2RjkVHc\nMD06imrMFBGOTVDuSswbBUYFODFVIGSKaqnyR2J4oRmjTB4OcEUWAdCy7sYxQqhc8inxquM5\nyTUUsOTnOKLAP4ZSp6kVBcD7iouBiptojxk5NPaTzG4UKKQiFZAFAbg0sihkwDxSNCGbPWpx\nCNpPbFICvDuaMnOMd6lXAj3dfWkGWXYo+XrQGDNjHy0ANLDjaefSmcnIbr6VNuDKF24xTWUM\nfSmgGRj5iD17VIVG0gcGmMyqf73pTlddue/TFDAYPlXBPNCnc/zHil2BlJboKaR3X8KZNxrY\nVjg/hUTSc7StSzZ4IPzd6jb5uSMGkFxGJ6dKRfvggYpGbd1pyMFweoNNDQ5pDC3zAEmmjJ/3\naVwJfvcUxVZWIzQhjWQq45zzU36Cjavyj+KlHGcjmi4hFjVWyenakyVm2fjTm+bb60M/ylhw\nfeluMVdrKWY9OKYF28YyD3NN3loySKQfd2nrQA5gU44PpTWZyvJ4xRt5yBTsEqSR2oERszRx\n4XnNOjkG4Y+73HpQy7lU9valjhJ3Hp6UgQFRzjjJpWOz5duaXYQo3flSbv4T2oGRs21h1xUj\nSK0fAxTEHy5PrT8bVOQKAGrjqcgUpxs+7n601SSM96ThpD60CQvTG3rSP+86HB9aPMJUnOKh\n3HfnOaYyb7oHc0xsrIOM5p/mDy8dKj2nIOSBSAk34k+7mkOMHJwW605sxqW61CVLyBj0oAGX\nYuG5FN2twTUkmCuOf600ybWB6+1AClQfoOtAwqnB70iMGDe9MZduBjFIBWkLDpTwxZSB1xUa\nsTwBketTDCc0ICBSzKcHGKd/rMHtTdwZ2OdqntSMAuAhOKYhsrqpIPNLC+4FW5QjimfZzIx9\nPWiQeSnHNIYSfuwAOhpzfMQMYFIr+YqkjBpZN24UANX7xHQZqE7mkweuankYNICT+VJFs8xn\nY8A0uoCTtyqkY461FJGjck5NS3HzH2qvsP1FUAuC7DFS3XyqO5xTHbyyAKa5AI+tAETL8ucY\nFAyq+oqaSQSDb0FQSIRxnigCGRR1PHNTFP3JOOPWo2I4DDIzVjcklq4VuR2qQKseG6nipvM2\nKec9qZHg4BGKfIm1sHrQBXKbpAR6UqKQf50pwh3ZyfSl3FsEUANmbawAHWpI9yx7iNtMEhyQ\nalZty4HTvSAh81W75Haoiq7uTzSugDgAYFJPCV+bGKYD41+YgH8KQ7d/PBNMt12yg7ue9OnA\nZ845FIBsy7Rn8KhLBF4OamZv3eCOai4XgrQMm3BolCjPNRzqX+YdKFXbgj1pjZ59M0CHSSna\nq9Kaq8gsajk+bBHHNWI4fMbg5oAawDc96YFDMRjHFEm5ZSDx6UxQ4mDdRTAftAXb0qCRQrdO\nKsNksR29aZsO3JNIBCwC5A4pGXcoLcelO2fLnOKj83Pylcml1ATBB5PBprBUOVOafuPUjApP\nl3DPBqgGs4K46VBtzICOTVqVBxjpTOOo60AQMg6EYaqrW+FYnrV6TLnJ4qKSM7uTk0gKEtss\ng5UVmXGkpICO3pXRHGOAAe9Qyw9SBzTA4G98PlpSCvFcr4i8IpdKQq/P09sV61cWhlzjp3rI\nutLMqk45qkNpWPmbxZ8K7e7DhoAeCBx0rxbxV8D545JJbTcGj+bDD9Aa+7LzQ42jZWQHI5zX\nJaj4NSaJwBg9gRmrU2jDkPz01Tw/eabJtntpIV+6DIMZrKks35Vcs2P4a+2fE/wti1JWEsKy\noOSrDP5V4v4s+CTWsjNa5CE5wvYe9dEKnciUDwhUDRhRgMONv9alihFwpBbYV6cd66nxD4Hn\n0nEbxtvPKjHb61hRRtCojcbWwcnuK2umtDHlZnzWZ5Qt83Ur9aptZq21d+GUbi1arFGbh9zj\n+VV5Lfy1A2ZyeTVXEZbqVjJAzJnoO9Q+W0jAqR7rWjcA/Lsjwd3LGmSWflqXGQGP40AU9oEi\nPuyQfvU4zMkj4PzE8Gpfs/lw7lXcO4qJowNu/q3KkDpTQBJL8xyud33mqNVO0hMf7p7UxQ0b\nEr82Cc81YaBlUYHB+amHUiWN/Jdg28Y5qe3YtGoYDGMDjvSFDG2ANxPJFTfeUc49qBNgrjYc\nDOODjpSRyMu5GX73PTilhbysKuPm657Vbi/fnLjy8DH196YrkMca8kDcOpNRNGJFb5e+eau+\nStvjccr1C9zVeTJBCkbt2ePT0pDKksbowKqGXuO9M8sySbl4X+tXJWG4OvY4NQso2EDjnJPp\nTGVZFO08fMe9Rfu5FAd/mHFTtC0gOA2P71RFAy4C4PrQSEigSKqtkAfdpMFVyDt56UMvy7sb\nD03U2XPWTggYHv70gJeWGU4AFRMRuwF2kHHHrTUdtiheQT1p7blAcrkDuKYxrZZmEbfOeNp7\nmkTcY9zDac/pUnyHa0Z+fvxzTFZhu7jPX19qBDJNvlsvDr97NSLloxhssQOKXiPAOAG5z70g\n+XccjcvJo1AbiRuUARFHJpVz5gbOBjLDvihN6ru6oaVUKKC3QjpTGN+ymSMlCeG+ahplWPI5\nRT0pq99rEdjzT2wURVA2g9aQhsynOWwwPzY9KSVtrB2HQcAUNH8zA/M/ak2OEGRlh2osA4bN\n2YwQWGfxpGd1t9jDnPanMXeMY+XmlVd2F3ZLDrQA1cq2c4bbkZo3fLtU853H0p3mIqlD8zZx\nTPv7yRtTPNAh3zbCX5J5XHYUkas0ewYYe9C5ww3/ACegpEkG0uF2EjAosA3ymkIL8halb7pb\noOp+lCZ+UfdPfNIV+8ByDwTTGBhVJB5rYRlyAtNMZXEatnHb2pUPlsFyXI/KkkWOSQkkhh6d\nqQDW+ZCzAhV64PpT/wDVn5GG0jIwP0pdkcanD7lNRKrwshxvjz69KAHW5xGecMxpFVckKdxH\nUUrbVdiJNysfu+lOjxHG428N0NAEa8kvt2nHemrJ8vKHJOQ1KyAqB/D0zQqFeOqjtTQDlZFm\nDN8q49KP3cZClflH8VKyllBK554prMckhRlhikA6NQrtl2C9T3ojYbmwcZ/vDFEcZuI/3IKu\nvVT+tOaNhKwcZcc59qBDUYbXQruOOvpRkfcyQ46Uxjt5P3m6YHanrJ95j1AzmmMVs7i2xgEX\n65piIA27GB2bNIjkrKrOcf0pyoMKPMG3qMUgF87coDhiTzwKT5trF2IHsamktX8ve0bMccY6\n1Uw4XHf/AGqALMUKtjkDIzTWCqwG7C9cDvTYAVwCdzeg7VG33csdx3ZB/pVCHKzjcHG8E5UU\nqs20gj6r6UiksQS23J6e1M3hmbBKdvqKB9CRJPLUbf4jgUgdll2N8x70cNGCAcA/d9qSRQzh\nj93ttpALICpY9F9KVfvZcZRhhSKaRjDbW3fWk5XDZ3D+7TAWNduAfmPX5hR+8ZfvYJ6Uv7yS\nZRvVV20zySu5Qx9d3vUisEQXZgMUkGR061FJN0ULnHXNWI2Z1wygN60LD5uQR8w9DTGRIXmT\nKR4A60MSwI27Tjgdamjj2knPCjIWmI29WI4fHftQAjFn2FU/4FRJJ5mF2blBpqEmPKnOP1p0\nZDfwnnqKYhGTzlKr8pHUmhVGxNuMg4+tLGSwcAA9go60rRlk+Vht6EdxUi1I1XeZMkB+QFHS\nlTKxhcZB+YD0okA3Beg6EjvSR4MrYO1enTpT1GG14wuxg3Y08qu7kfLjoO1RsqRtsjJPf60r\nhmwythuhX/GkMFWMRkg7KTOWGFIBHJ/rSghfmIz/AA+1MVSrmNWPThRTBkgjAyrEMRyT3xTU\nXauOeTxj0pDatHErMxL5+b6U/wA0bQw4IPGaQhFCxSHgZ7Zox+8L4A28hTSTKftDEjgc7e9L\nIyR7mY/SqGK25yAQW43CkRlMIycEHgUqsYirlDsOASO1MZQxmUfKwb5TigBWwWCEblY9fQ1L\nCr/OS+QD941CW3Y2nnpt6ZPrTkUtGS3C9wO5pMBNxMmxhwT1o6OzN8oHAWjd+6LN94fdp0a+\ncULDbjkt/SkIRpGZVIbe2OfWmsWb5i5HtVjaF3KBluoqukjMCrLhgaYxC67lByBjlcdKmUqu\nSDnPU96jUOq4ABHZfSnKzc5646EUtQGrIq43DcQ3AAq22Y3wAuCM8VWX7u3PJ6U8SeZMqEbU\nX19aBDR8yllYrzzSnMigA/QGkz5ikfcXdkCnNkEFnyp6+1MCFY3k3n7u0VaViF4IY98Uxfuu\nic8VBEQm7aeMcnvTGTrIckj+EdKZNluV/djPJpGUJCw3fvQvT1o8ttp3OOnNAhZGbr0YDqO9\nJJlY4yBlc/MFpoUlAu/5f7lPzsjaMAr6ikOwrMWYqOD24prSLtQLktnk4qTcFDLjIPAbvTFX\ny127snNACFgZMjgH0ojKxgr1BPNL5bqDuwoJ+9SyKjfumGD1JpgAkV2I27cDApsmWUgDenXA\n9ac8ZjcGLjikmO3a6nB7igBzNtKcBQBk0jRBsDou3OfWnbMHcxAz0pcDzCknD46UAIvmSRow\nO1Om2nRn94uQfWnKp87YvzBfbpVmPT2nK/eU54oGV0f98X2lhn04qf7KrSAgExntjvWxBoMk\nzp5uck8nHGK6fTfCb30JESDap+v41HMM5Gz0VpWj+Vtuemea37Tw20sigx5Hp0xXc6D4FJY5\njaXnhjwK77RPBrQxxiWBUHTJGTS5hnnmgeB3QugQNG33VYd/rXX6J4Cmc5ZMvnIzwcelelaL\n4ThjQsQfmNdPZ6HHHwqcE5FQ5AkcVpXhFflLjOB0x09q6qx8MmCNNo9wK6W10UrzgBfQVq2+\nmg4A4as+YvlMCz0g5xt2kd617TSwvOcr9K0ltWH8NWIYi3ynhaVw5SnFb7V4FWoVO7OKsfZl\nX7oOakER28jFRctIbCDGxOeatxrx9aSKHOOMVZWPaevFIZGIcAE9Ke0Y2gdjUgXdkZ4pRGX4\n79KCiNIRu609Y2Vidu4etSxwqrYJpWXHFICLkDPQ9ak2/dIbr1zUka9SRxikVNy88+lMBzqg\nzjk9KjVPlqVYvlwetPMe5RUsZH5fy0iqSu4dalkAzheaIUK7i3pQncBm0L+IprKzYxzUqxll\n55FOjQ7uuKfUBkYG3DCnqqN1GMVL5e7INOWIc5pgMRe+cipCw44470xvlGAKVzhTg8+lAAzB\nTntS8sAe1Rxtu5Ix7Va27o+fwoAjZ/lC4xSGM7RzxQ0LMcg81OsfQHipGRkEkccVJHEGGM09\nI9qtu6UsciBCAvJoQiPygsmB0qQsF6CmhWVsfeHrUjKfX8KoCPZzntSLCwYknNWki3RjI5pj\nRjnnH0pMCBTtOTxUuFHuaEtwy570+OE+hzQgE+Z1xjFOk3iNQFqVCR/DnFMff5mTwPSkwGxq\ndwyOKcyYkz1qSFcsRipFjGN2OKYxhUheDtWhWOPapHT5cH8KTyzjg9BUgN2FVyACamik28vz\nTY1aRcjr3p6w46jiqAiVmZiMcdadghuRg1JDEW5JxSyr8xIPHekIg+XOR1pWY5xj6URqWyoH\nvVnYFUAjmgpDI2YtjbxigxgyDsKkVgwI702OJ2PPSkSOO37oXn1pVjPHPPpTkXa3JqQgqwIP\nNAEDLhj81D4281Lt5ywz70kkYePI4xQMijk/uE1MuMZzimRR57Yp4UuGAHegQPnbweKRVbZn\nvUjIwUCn+WOBmgCNFBUZFOL7ARwKcsbAZ7U3bu+8KBiRk856U/5TgkUsTAcHpStjBGcUrBYd\n0Yc8VG0Z3kjpUiqGU8g0ijbgE8UMLEb4Ge1KkRKZHWlmUHoKmhypHHFILEQjLrgdR601AVXB\nHWrX3TupjtuzgY71QERVgPao42cseKsJiRcnjtS+WUBA4PrQMQr0J4pdndTj60sY3feOTRy4\nz0I7UAMX5jzyaHjxgjgmpEjPJpVBbJPH1pARY3fKDzmlVTkkDgUwRGOXduzzmp1+bPOAaaAY\n2DHkcGk5KcUcLkCnRp8uM4pMBnlnbyab5LYz1FTthcc5pgkwTngUADRbVDAYNOVRsyxy3pTA\n+5s7iRTg2XJ6cUAMXnnGPanBsycjPpTWBPzd6RWKvyKXUBWzu4HFG3bySKPMHQ0si/KQfrTA\nRv3a59aey/KD/FTY9pZd3PFP3DOSKaGNTKE55Bpyw+Z0NOAUJn1NRs3zZXimFhduzOaU4UYx\nzTVVs/NzSqCz4pANVBtIIyPSkYlV2g/hUm1hnHJ9KBGWbpSYFYgr/FUhwFAYZpzxovzMcGnb\nfM4XkY60CBUwoPamNz8pGKmiHljnn60u4PyQaQymyFWyakOGwB0qaTDMTjiookLE4oAPLCrm\no2QIOOlWBH8pB4qPbnIxyKYFdlBb0qwqpswwOKReuCMmnPhe+RSAiwV4IwKaSOQDg08gyYPQ\nVGYCrc9M9aAGKSHJ259akaTcMBMCpvs5bgNmmiMrIUxgCgAjA2DIOakXC9acu1gT0x0qPng4\noEKy+hwaQrg8fjUhxIchaQg5oAYIxt245zmlb5mwBxUjDcwI60wE7/6UAKg3vj+GnsvzcJRv\nG0BRz3pTncCaABWzxt6UzZ1YAAmn7BGpZjgGmqvl8g5zTYhEj56c1J5nUEUqFRICaVgskmUB\nB96BixqGkDAZFOuECoXJO40zy2XjGD61Pu3JhvmoGUzcCJgScmpHZWXeoznmka0G4lvwqWNQ\nkeMYXpVAOjzIgC8Gl8stkjk96FUx5wee1TRsuzGdrHrUCGrmEEsNw7Ujjy2G7k9cUjducrTt\nokbPfpmgCT/XcngU3725AOaVWEToG+YE4NKvySFt3DUFEkLFgVkPGOKn43D0xUJjEi5DdsUq\ns8Crhs445FAD2UtuC5A708xCWNVxjHrSKzbDkc0355FAHB680DEWN0b5fu+lSMoRck/Wlhbc\npJ696SaQIwUrnPegY5G3RkqMH1qVVB7800LuXjp0ojURnnkUWESKgWP5jk015AuQvPHWmLhg\neeKI8kHYaAGMWUYzjNP4PJOPSm7i7Zk4qSOPcn0oYEafeXnLZ71Pj5/npu3dIuPrUz+Wzckn\n6U0A2QBk46UxFXBJPHpU6w4xtOR703bGFOfvA0NAKVO4Y6Yp27cuOd3tTfvKBn5if0p0C+Wx\nY/TFQHUYu9eG4HenK25COlPkbd/vUjDC4xQAg3LwTmmKpZ9xPTtTlYBQGHNOVSrcnANIBkhY\nMCTtGe1TEbskdMUxlC5PUUyOZmyDwe1MCYMcgFuKeu2Q4NUml+YZHI61Y+bAcDJoQDzBFIwy\nuMU8xjAwcgVBNIcrxx3qeOQBVGOtADiw/u89KVQc8nikbG7inc+lK2pTGsqk4x1pSpUAjoOl\nDLt+bvQvT5uR6U+ohOd2Cfm9akUhU96j+TJySKcqheCTigOoxtgyXOPSnmQnbt6UvyMnrUSq\nVYntSGTD5t240iuB2waYzfdBXBp+4H3FBIN8zBs5pW3BcL09ajyVB44p3Kx/McEimA/cTCNv\nzAnBpE2rnHHrRD8q8DIqXb8pPrTQEbfKDnrSq3yjNIeme9Iud/TNIY7buZjnGOcU5s+WD0zS\nc7sg47UqD5ueaRQsW3d82aduCjGaRmx/DTZ1ZSpUDDdqAJDjg44pHRXPAxSFSqjccinZHAz+\nVACqu48NzSfxHHX19acrKu7ucUblVeh5oARGC8HpTjtXFRxttyjde1G8dOtUwHSONrSHhQM1\nBZ3izSssY467qkmhaa3KjkNxVbT9NOm5IbOTk+3tUiNMYj3DbwajCMnXoaeF3Kd3JY5peOM8\nigYeVyCTn2pQdvbbSdDkHmnMd2C1O4DmIQBjyaGxjDUMx420FdozjOaYhvlj+8fzopcN6Cim\nFiqWXgAZNI0bbFO3nNSs2/bgVIP3ny5571IiLa25enBpz5eYkr9KjupFRlCjLDrTt3y7s4oE\nOYhenAPak27vcdeKaqkxknk5qWNVVQQ3PcUAKzIHAI7UzAAzjGaGVmkJ2/LjrSbvlHzfhSAc\nPbkVKMBCO3vVePcynA6GncyDOMCnYBzMEXIOKi8wF+Kd5/l/e/KjaG5xigBdxKnHWo2BVCe9\nSZCsCeB61FI27PvSuAzcfL3Y4NPEbHDdqaFyhyeB2pfuwHbkkVQDid0bAkjNM5Xbg5HepTuK\n4JFRuvBzSATdufIGabIwOQB8vejzGjy/GKj3FsAdD1oEOjTvn5ac0e1cZqNW3EgL8tPkkAUE\n8UDHZDcbeajVtzkgcdKfuBGelN2/MGB4oAcRlhx0oKnrnik5ZjyBSN8i7Q24UCYqkc5HNMbJ\nY/3aep3kEjFD7lkwVpoYY2qQwpFYY3YpWbdjJpmCpbP3aTEAbdgDn1pcblYKPxpuflLKNp9q\ndG2wZz8p7Ug6BuIwoPGOaevscHrUbEN93gUz/lpxkigRNKxkXI6+tM2gHJOTimSMwxt6ZoU5\nUk07DJFKZJzio/MLAmm/Kxx7UKpKgDk96QD0Jzk0xR8xPbNG5jkhqFY+YFboafUCWRQNuOM0\n1VXdilkjIbd1FNZdoG08+tDAf8qqcrk1HJ80fFNLNySc54p4Xoh/OkAhkHlhcnNKrDeBjNJt\nXn1FJkgg9qBjpOucVEcFsngGpmxuyeTUEeVkbPIz0oAdGixx5zk5pW4GSN1KGVGw3PtTdpP+\nFArkcO7n09KdMhXvzSKx8ynyLkZz3zU9RkMgHbilhQ7cE5pxXc2TTkjMfIOaoB0i7Bx0xUAZ\nccjNSSzKygD15qFcFj6UgFOF5/hp23cuQ3HvTZfugdRSrGPLAXOaQiCSRDLgdutSbUXOenWo\npIdvIHNNXLNz0oBCyyg0kDbgQRg5pr7ZD0xim4KyDaD1oGSth3z3Wo5FDxNg81J5ihWzwc0R\nQ7lI6A1QEdpGP+WnSm3GS5A6dqf32jkZ5pZQWxjigCv5KhCWPNR26nnLcVaZdybT1qFlG7I4\npANjU7mz2pWmwVBGanZQqjnkiovLOCT07YpAIqrJ0GDUMWVlORyOMVZUjbx1FQCbazHblvWg\nAkbdHkLjtTUYqowM4qXcrd+euKhjh3SEliBmgBT8zbsYNSM5kG08AUeXtbJPFM8s9d/ekAMq\nFgen0pkwzyKJFePBHIobDAY4oARQJKgk+WQ8cCplXyx15p3l8ksQRTAhVDJ3x3phVkU9zVqO\nNSpO7iq0uc46CkA1oz5YJGDRFIYWIHenSfcHPNMVwW5HTpQA+RvMwcHNNyNvB5p7SHa3HJqK\nNSV45NAIN3Rex60/blSM5FRfxYHNSMvl89RQAkjHaB2qDaFYuRU/mKzYxTZAN3A/CgBpwYxj\noetL8rentSMo2n3piKcZNMbJNpbJ7VCy88VKuVB3Go2bEeAOaQhmwq2ScikKFzkVKuHT3oVc\nHgUDIJIx0xUUqnb1wKsSfdx0OaGXd2oBFFV4qKSH5cY4q/JHjGBxUSqcmmIy5LETVQm04M2N\nuB9K6BlKtnFVphuQkUAcjf8Ah+NtxXr15rkdU8IJIzsqj6Yr1KS2Lc4qpNpYlUlgBinoKx89\n+IfAMN1hGhVihyu5c8+leV+Lfg3HJ5twIvnb+FFxz6V9d3+go2dig5rFv/DqyKVMYPHWqjJx\nJ5T4L1r4V6jpOVjtzKHJYDGMKCeCfX2rlJtPutPZhPbOgBx8ynAHpX3rq/g+O4VhgpzkDHSv\nNfFvwhS9jYQxthmyR2J9a2jW6GUqfVHyU0KuzcZXqOKgcx+WyAbiTjntXsXiT4S3VlOxt2UB\n1wVYHj1xXn2q+FbrSpvnTEGPvbTw1bKdzFxaOWe1k3bAdgIwahkt1iXOd7KO3ar0kbLKquPm\n7t2oeBFG5PnyeD61pckx/KZUJIw2M9KfAu9Qc7eOlXp1D4J+ZWHylaqRxtvKEYYH72e1ULqQ\nxoUbeGyzHHNSxW4lyJM5zwVpfllkIGRt7f1qWGMxrnfwx796aGIyq3B59sdfen8NIv8AEAPw\nHtTtwaYKDj5eRTtu2Tb/AAYzTAZcM8i7wNh7e3pUclu0i7gcDGWx/EamVRzk8ilEIXbhyQ3r\nQBnSOqOFI2oefoakK7kG085qaWKPBG35we/ehlWLbIpyxODigQyZd1uxPyheoBqt5Y+6vRhm\nrDKFZv7p6/WnKo4YD5icZpiKUqGBSRgrjBJ7iq8yqyxhOR2Bq1Mv2iRwxwB0FVIY9zMJDsjF\nAAy7nRAwHPNEjfeAPyjg0eWWkDxjCDjP9ac6locBcgnlv607BqJuIbpsG2q6/MhG7IHIx0zU\nnCh2kLZX5VPvTMBIclfnIwR70xoWRvuLwckEmnykOWRQMZ+9QsYZY1X5SP0pPs7RqzMeM/iT\n/hS6kjpCFwEPyEUsbHrt3gcCl3KFygwe+emaamWkO3O7GWFAwa3aRskBB1qPasgwoyueBUyv\nvyr5DelMVmVtgIwR09KQCOoZgyA46Ee9Ij+S54yW6ml3FcBiQvQU0h13DaCMZDGgBBtZl2gn\nnnNPV/ujGCKSRgohK4GPvVFt3bXPKt1poCTcdx3YwTnpQ3OAeE9qa0m3Ab7vQU/leCN3fIqQ\nE43OFGGAzj1pkkbMqkttZutPZfMTeDhumKBGTKuMsMY4pjHO5YIMbcDk0jBXjBJKjP3aZDlS\nGLHcFxmlaJvLBU4BPIHb3oEJhR8o3AZ60kbNtY7Tt+6c/wA6VlMa4YE5OBS7ysZLqQc4IoAQ\nKN4QrkY/A00zLGCu01I26RhtOBjgGmbQrKSe/wCVIQiqwUkICSKcrjC5HbpSSfJIf3mTjORS\nzR/LGd2NwzQMUIWUKemeKkhkBZ0xhlOM1HuyDjIYVEFdFdjJ8r8s3fFA+pJIrrM24gD86QIV\nVnGNv949qcwiLL5ZJwMilCrLI2P7uSvSmHUS3uPJJkA35GCB3p7OrKZS23dwF9KgAHl/K3Ge\nvc+1BxHH8vzAnOBVAEjdR3xxRhjCMkZ70kirJhcYzxuoYliqHqpxupCFVRIpAX5j1pyxrtKf\nd284pioeQW+UnrTI1PmZGcKf8mgDXgvvLjG/OccjvWVI25Tu5BbKjP6Gp5GC8scnFQqoVyVA\nGR1NSA9ZNm4N+7J7Dmo5lVYyBk0zkqQG4zUkcZ24dsnrVgOZh5SY5LfxU1mK5UAENxuojXao\nO4Bck/SiNd29kIY9QM0DHLGGQHdjBxTVIWN/L5APOaRog20hvqKT92kmMHa3A9KVxDo7oZAw\nxJHJxQkitIo2jfnrUbbowQx4zxjtUixszKegXmmAkkaLteMbmznrRJuZmdMqx+8B2pDhmwh4\nz0p2wyF8ccdqQ0IH2/d5FKNqsCDhj1xUccykgKMkdaftCSFh0xTJAuVZiGxzxTWuB5zAfMCu\nDjpTDISxCgMfQ8UqyBeANq4+97+lSxicLEUAbHelaZWYdRtHGKX5kkVSS2eTxSIyRiRWx833\nWz3ouAjrl3w+w9PenMxaRgBwvpTTub/WL87dCD/OkVgqvkYz/Evr7UCHFZIzk4ZMZB9/ShpA\nse5QSc/NSbiYlUHYp48xvWl+7gHlx1x0x60wGeZuQELtyeSae3lqcj73c9qXcM4wHJPSm+Si\nzlMH1zmkA/JbA4I7r60FhwEG3PenbQyseA2Oveo2faqrnHfdQAuZV2hjkZ6+tDMWVn6LnFNT\n5iSW5PQ047sE7ht9KYBtA+ZeA3GKXa0kwBQbB1NRtvPykZPbFKsMmRj5Qeqk9aLgSShVk4GV\nI/Woy25Mg4bODipEzJnjcq0xpFWTKLgj0pjHgG4QlRmMcH6+tPkhEZCmTqOvaltWEMbkncW7\nU12STCAZA4zn1pB1IbgBJFKnCdsjmmorthj8p3dqsSKZZdp5VVxUUb5Ycjd0z2oHYmmkKTBg\nct6kVXFwJTINu6Qc4FOc8EAknOc00MrZKqUc8Z6UCHeaYcPEd27rnrQpM4bccN3HtR8u4qRl\n+uKULvUMSFwfu0gBF5IVMgdD6UjSblAOGb2pITkSHacA5zSrGVYtjPPahBYUZkXhRwc5okm3\ntnb+A9aaq/NtV9vJNSMSvbIz+NMBI3l5RxtDU3yt6uH+hbFOWRtpkPzkj7velXdGow2Cw6Hm\ngQ1f3ci9H2rjFLJsHzdCeTikWEKny/N/Q0/ascedvHf60gFwrR7N20g5GKcyydW2sP7uaFVZ\nWGE3EjpS7UVWY9AMBfei4EckhZSzEg5wpIpZEdmUgqpApGBKxs7Zkz6fpT+J1YrmMZ5yKe4E\nTJJ5Zyfm/vGnbfMtQOB9OuacsTFQmcg/nUsUb7lfZkg7cdKQ0QRRyfKxHPTFWFh/fbmTzS3y\nhR2q7BpT+cMqxU8sq/41tWHh9XAAHOcg0XGc1b2+G8sx7nBwR6VoW+ktIxLx4zxnHWu4s/CT\nXRUxqpkHoPzrrtJ8DvNJgjcg5AK0rjPM9P8ADczf6tNzOPugc10+keCJbrCLEwZepI6V6/pP\nw52wxt5QV25OBzXXaV4FEIA2dOoxUOQ7M8r0v4fyMqjCjjncOtdrpPgp441URqo6HaMZFekW\nXhWDAAXB9xW/aeH4wyjGcdTisuYpROH0zwsEZSYgo+ldHa6CHwNoLHpXWQ6fFGpTAq3HYpnI\nG1l6VPMPlOft9CNv8xwR3xWtBp6lh8oFaUVuq8dqsrboi4xk0rmiiZq2nz4Ax71bW3OMjr7V\nbW3AHyjmpPJ8tfelcCoIQvIG71qTy1I+UYqz5Jj6detPRV5+Xt1pDKyxHrgCpFj6FulTbdyj\nmn7NwNIYx4hwRTJFZsY/GrEMJbkngU825JzTJK6qVwM8VLGdrE4qbyTx8uaPLdegyKCkIF+X\nOB1okVmfpxUhUgg9qeuP4hk9qQyMIdpFJtwvoamQHdyOKcUwuWH0pgRxxlhuNSKu1Q2aN3y8\nDrQp5GRSGNKc++aftHfmnqBu5oZcvwcUILDFjwuc4GaB1zjinrCcnjipI04wKYWI2yeQKNpb\nOKkT5ZMHpTljLZwMUCI1Ud+DTfK6tirDQlmBY9qPJbjn5fegOgxYV2570/Zjg8UBS3Tpmnbd\nzZbmpYhVUrz29aQEsuX4pye549KSSMyc4qgBnyuMZpwj9BxSoVC4PBp6t+7IFA7DY4yq9aRl\n3cdKfuPl7mFOXpzU3Ahi3x5+bK5qWKMyMTnApY4y+7I461KFCqBjFAWI9u0mn7mVcgZqTcNv\nFJ5W1M+tNARxqW4J/KnMuGAPNOVfmB6Cnf6wEAc560mAsbKmVxye9SHIj45FMC7Mk805GLLh\nRQUiHcdvPTNTJEGXOeKYsZkHJxzUyLhcdhSEQPmJvlFTRszc9PrUbTbm4XmrEeHTmgQvzDkG\nmMpZgDwKdu2L603l13d6BjlxHJ7UnLHefyohhfduLfhUzxbs4+U0BsQbTu/lU6/u+DUSQncf\nmzipfLbdhqTEPVVbPFN/5aelPRDtPNM8s8knJpiHspHQ5FCx8gnpSoobqakZvlA4IFAxAoOf\nrTGz5nB/Knb9xxt4oO1Rnoal7gN2ndnIqQ4645qLd+7yAT61JFnjvmmBG29lx0pnlscAtVx0\nHJNIrKq570FDPsqsvPWnCPC44yKljZpASRxUa/KzEigBu3ChscHikX539KnXLY44o2KuSeFp\nAR7sn2qUDC8L+NIpikbKmpd3y7QaAINo28/e70m3nB6e1WPLDRt3aolyvbB96oBu35gSMLTH\nVmzj1qzuO3kcVCQCzEHBNMfQYF54/E08MQ3QEY60RrtT5uhPOKexG4Ko4pCGxqercLTTGW3A\nNxUqsF68+1B9cc0rAQr8rfMKkEYxzwKe0Z2jAphk3YGMj2o1EHlpu+9SKgZhk96cqbOe9PXE\nmWApWAR4wmc856VAePerDfMeTSeTupsCttIAwKkTL8YxTtjs20dqey8Y5FGoyDymYnaeacqc\nYNSnHGOtKMg5IyKBkbIu0ZGWpQuOWHFKFHOeab170hCYWR/u4HrSKrNnIqWOMr1OQaUZweeB\nTAYYzgetEajawY4PalZwFzjmk8wuw9KQISNTg80MPnGeGxUoVWVt2cUjrtwR1xQMacH5X+UV\nBcXX2GMtnK+1Xo9rMNwyar3mnxTDac4zQIw1vJtQ3siMIx1yKt6fFcxycthSO9akUMdsgWNR\njv71KwDYIGCaEBAvLAMeadvK5OMLT9rLkkcetQsnmDg0DGZbPzcUgYBSBwfWneXwdzZIp0cY\nbHPGKVwEwzRjPX1prZDD1HNWI4x/ewO2aVlGxietAWKu4LlsA880rIqqrnpmpfJHk8jrUbLn\n5QOBTCwkifLx0oCnbluR0FGDtIPSpI1HlkbunGKBEKxlT071NJH5Y39zQq7cc7gO9K0nmBsi\ngCFl5yvIPapBhmHbikibK4xz2pVbzFJx82cUCYscYViW+YUsjBSOBgnFG0lgM49aakZMmSeh\nqRgyjcQD1pPL2t/eqTZljkYFNJOcgcdM0wGRrtb0pzKW4U5NIv38Hk1MwUMdvBpgNZHkUDG5\ne9DZZsDgVMoLNxwuOajbG4Beo60AJPH5bKByCM1IBu5HUCk+Zo8joaYsTx4INMENMxxzkH3q\n1DsEHqTVbluScc9KsW6b8gdulMAY7sUhj3NgdOtODBjt24YUjPtfp060gGbsYGPxp3zNzihX\nDPkjiiR3LHjavakNCqN0mO3pVjbtHyrVW1Y+Zgjgd6ubxgjqSKQISSESxll5YciolWTywGXt\n0qW0b7PJtIyG6g06aQLIVHApgOhkCRheppJCQMkce9LEu0ZHFSLCGyx5BoH0I4sKpbsaTlpE\nIJCj0709oSrBVHHWpfuqGoDoPk27gy8HHIpCgkAyenSkb950ODQqtu29B60hDdxXIwRT45A0\neTyRUgYY+Y5PSmGONeB39aXUYcMu4DmkhXyhnPWnxyBvl5xSrHubGaoWo4w+bDnGTTlj8tOn\nGKcjeSpB57UnmFWCtz6UFAiBlz09ab8vA6UhkzGSBhc0z5yvAwD60ASsNq4Bzmowj/aDnkYp\n20txu5qzCVbvQwIvJI5PIqXayqGI+XsKH+VslvloEjFQM8UkAzcUPIxSK+7p0qR2M3JqNYfK\nYMT8vpSYC7xnJGaezCQ520u0MTxgYzQpBwMDPvSH0HY3dgBUe0KdzdBwKe2FIPIFKzBj0+Wn\ncRH5azZYDpRvMWAOlP3bW+UcCkLZbOzigAI3KGx05Iqb5G+YDBxkCm9uPyNKqnAJNMBqrtYM\nT3qUyCRiBxSLt2nJ4pFZQwyMUXANh4O6nsoboaZuPNEakNgikAm0K+9hmnqdzc9KGVuv8IpM\nFu/SkMGyWAxSbhuKkGpI2Kn5uaNoZiT3oGRqVXhuT2qVWVW5GBUYXDAkbjQzBSR0oQrD1USZ\n4IprAZ9cUxX2yYzxTyRwwH41QC28pLspGBTw3vTWbarf3qSHPlkNxzxQA9yOD2oP3iKRm8lT\nkZoRgyjPWkANHtUEYzUi9OuPWo2Oc5FKchV4+tICZuxzTdxkbmkPGMcilB9eKaDUU7gxOcik\nbAwRyaTcee1LFlW3E9aQ0P4wTijkjrx3pGYetLt2rzTARoQ3zd6RVC/40/GVoRQMFuRRcY5W\nXb8tOVQwxmmbQpLDpSjCgAdaLgTKMcE9KRVOCQcUzbmQHdipFUKxJORTYkIDnkjml4K9aQqy\nn1FI3zYxwKkBwk2pypJoLlVFN3EqB0pzsu4KetUgsO30UbDRTCxEmFYHGB606YbeV60kJ3Nw\nePSnyFuRjioArtGB8x+9S7lVcEZpY4yzAtUN5PFBcBHdVPoTQISa8WGNmNUk1TfIMdDxStb+\ndkbuM8c1Nb2CqvOCR2qhFuCTzMjoMUpjA4IpVjWOP5Rye9SN8wHekxkassa4HNKWG3K96Qxr\njP8AMUDj5RzSbCwwsGb5vwzSjnPPFSRqsijK8+tNaLa3YCmIYx3R7TSeWWIOakkw+MD5RTpN\nvlbqAK8LqZGAbIqWPHl8cg9agaEKSyLkn0qRW2bAPxpjHGRVcjr70126f3ajU7mOenpT9u3G\nTtoBEV1GQuQec/dqNWO3pk1OVGducgc1CMc5GCTUgSRsVjIZcfSo5F8z5ScfSnqpzndx6GnH\nCkEYNMgYv3cE5pY8svy8LTztbOBimNlF4P4UIoChWb2xTunbFJ5m5c/xUu3cuTTAjVi249AK\nerttJY5prrhRtOadEwDEtyOhpbARyKH2hSM9aRWBIXPFP+Rd1RQFeSR9KQEiyHeVxx70Nhfl\n603d7ZpGiXcGVsj0oJHqqdR81ImGzjg0isNuelLtMfJHNAxG+ZeDTV+XIPGaczKzDHApXUsp\n6EdqsY1WULuxmmo6xtz1NKmI1J6+1NK+ZyfWp6iHkq68HkU9iFxxk1EzpD82OKeo3NuBypoC\nw6RjtwD1pN3y4xk55pkynaNvXNSIp2/KM+tDAdJg/KBxio9u5CO/pT2+XkjHNIF2sT3pMCOP\nDKVbgimBisnqtSNjcfUimRq27JX5KQwky2HDbcU9trZJIpJGyuAPlNRtCWUjvTBjGb5tvWnq\nxWPrTWXbtHfHNLt2kt/DSEhYyCwOKSR13EA5FL/AQDioLeMrNnGaaQErZ+i0LI0ORnJ96JJP\nmw3ShlUtkmhjK7ZduDx3p8ahFI605cI3HeklQrhs/hSQEZRljJHPPSpYmYL0pisx6cGpEkIT\nGOaGgIgp5LfWoWYl+mBUm5vNOeh4FLIPKX56AGrjGOKjVXVsds08Jwvcse1LJuVj3UUgGyx+\nZkgdaWSRV2qD7Usbrtz602OEfePrVLUBVQD2zUKsfOZWPK8ipLkMpBB+WkUKFAI+f1p2AHxI\npwcGo1Urh8bh0qxJjbxwelRQyEqRn5R2pDIZEZvmPApvmBowmeaszoJOmQBVfaBzjOO9T1Eh\n2zy1BxmmsoXLYqfBMJNRT/6kACgbIWAbgColyuQetTCQ+nNMuF3ngYNIRKp3d+1NVgqnjJJp\nYmXA7+tObawOBQAu4MvNQ7lbO3pQG5K55qJd0RORwfSgaJIcsDuHTpUMjlXwOhq4rpwCMcVB\nKoY579hTAhLHbjvScMvIqRl357EU9VWSPI6UCK8jHIwtQpCyMWOcHpV1Iyreq1HLypUdKBsa\nzCRR6gYOKa37sDBxmmw5VulLIpU/NyMUiUJHH8zEHIxSMdse0n5qkiXMfFR4G4g1SGRq20j+\n961Lt2puPJpNuzk4I7U9V8yM4ODSYEJ+960rg7TgcUirsb5uKchPIz8tICPqvNKwCDB5J7+l\nJkK3IqXaGyM0AQEeWuRyM02TOAAak2jaRSeWBtzQA2QcAjmkIPbrT2XHHagKGPoaYEbZ2470\nxeO2alVRksTxTW/2RxQMjdc9uKhaIL1HFT7jTWXjmgCu1uQfao3hx7irn3V56Gm+WPTOaBGf\nNao0RwozWbJahf4Aa35Id2QBiqklmGbrigDCm0mOZSzJk471g3/h9QjHGR1xiu6a3O3HX6VV\nu7UNwwHShAeP6h4VWZXPkrjscVxXiL4epqEZ82IScY+729a+gP7IVlJPy46Vl3ukjIO3P+1i\ntOazIkj418TfBceW4jiVIycA47f3q8w1z4b3WmCUEsVjPysq9vXFffmoeFIpEcbM7+S2OlcN\nrHw2S4hYGLcOSTj+daxqGPIfDF9o8ttcJ5ZCxsOVI5FU200QKxJxuOAa+q/Enwfgn8wBQsrD\n5JNvT2ryvxD8KZ7Jx5atIcbs4raNRMhwZ5FDF8g3jbKD1A7VLNGHxEV+br0rqrzwPfW8Ika3\ncKBu+X+RrBk067tJ900DoW6Bu3pV83mTysoy2MkbJIF49aSRGiyjHc55AFahaSSIRbckVnzC\nSMvMyZK8ZxTuIh3KrZ24KjmnxxuFSQkEE8Us8Bk5LhCq5OO59KhVXbaHfHce1UhBMh278byT\ngqOtQKwhUqUyF5zVkF+hwYx1NQBFZWY8rnk0AJIokXco+Zh90daWSL/VDHP1p8I3SIq9ezVO\nLNjvcHjv7VXRCKs0OxS4XJzxjrVeS2kbDBR5vp7VelhZXVkJdcc1G0bJOpO5Wb5RmmBlSRlY\n2DD5QeNvrU1vMfJIxtH8S/1qaO3LbzjaQc1G0LK2McMOlArkN1if5VHvUEcZydwPAzn1q1Nb\n7drFyqDjOKhkjDJlWbOcLkYzTQyFPlG8xn1p7YZS+eD69qWZSFUh/wDZKn1qDBxjPQ4NAhY5\nUC5C72HFSbirK5bDA54/lUG8KcIpd+9T7X5VQGCjcR7UuoDH/fSNJ91s8N6UmT/EwYjqcdad\n5cjRsdvbOfeowVkhUkBD0ahiBNjglQSKjj2+Yqhi4HXNSBgn+rG8CkWEb8jAVhnNBQ+IKvmE\n888U3aG3YGQ3emvIoBUJlu22nBF8weWc8fMPegTYi4+4/wA3H5UmH8lyjZA6DvTlbDMwXK9M\nmjia3O0Bc9aBJiRrtbf5oXplaeituLIe+RimeQAwwuc9eaPMUSbcFATjd6GqGK3YFty9cUbs\neZ1BK4z6UrSLtKoN3ocUmckMQG7GpYDEhJh2rJuVe2eaTMjYDMKIY1dZAi4dud1IsbLD8xyc\n/jSEPKmTO1scdKYflXaOe5FO2bdoznccYoST5gsY3HOMUxi7R5ecAp2PcUJlnEb845ApGwu7\nZ8h6FacjbQpC52j71UIZ8xkYsMAHmkZh5gCqdtHnbmLfdJpWw0ZbBC/3qkBZGZWDlQT0AU0k\ngbLGYhFK8FaXy2ZdhwSRnNJtEaqoJJ/lSHciwpkGMjbx9akh3FikZC5PenSyANuI+UdgKiiY\nMzZDKGORTC49tiSfMDIw/u9BQ3+swCAW/i9KUxlWBR8D2OaaoLBiSCwP60wANsU5G8KcbvU0\ngyvJOM0jF1+aP7vcnpmnqzs+Ttdun/16Q2PC/u8/oajY7Y1yQBnBNOdhNMN27zFHTsaYpDKM\nHGc5zT3EE0bowUrknoBUfzxqSR8v96pfLVo1YOSw5C1Nb2oubWSQt+9XnFLYexUXO4EDKdac\nQh68HvikWMpI299o7U/7uB15+9T3C43gyDaC3GCTQ6YJGeB0FG8phhyTnt0pr8rlj83t2pCu\nTKEYfMu49aYsiyxsSxHOAcdKZHJtYr1J43ZqQYXKGMtxkHoKdxDPLO3jn3Hehph94BgcfrS7\npNu1V4Xkk07zpGUjaPnpDGKrbVdflXv7mnFfmIzg9dxPFHO3AzgdR2omVWj+bgHgnNGoCuFa\nNASu6o/M2t93MueBQIQ6osbEbSCW9RT2jUS5ThM5oAYN0ZLuS7seaQJlAr425yBinqrPI/zq\ni9RUZbc25jnjAp9AHbvmJYcjjikykbKo5Yc7TxmkVXVlwQw709ivkndjdu49aYhsyq3KnnOS\nvpRjaxwGyR+dOHywngb6Rnl2oGPTil1AONoRV+bGaB9zbj3GKJHEfGcClbbAw8tsg9vShlAV\nVs5YDI/KmMu+3Vtw25wKUHc23aGBOCaWePbmNPTgdgaQhpwsgYcY605wXycgA80kY/dgsMNj\nkepqRvuhlTPqtPoIjZHC/KfxodU2q5LM3900qqW53FcfNtoOGkV/u5GT7UIBY1aPeScnbnOa\nVS2QCPxpuC27b8ynvSNIHbI+XA5o6gPaRUbbjeCeaP3aZTaRjniljXcyjO8Yzt7YojZcNzwT\nwQKBkasZPnDFSONuKRVEeSy53dBVlmXeCzAbT6YpshBZucjqKNREUmWZW6DpxUhT5QzD5c8r\nUe5mUlRk1N5jbcuM4HOaQETfKhCnBJxz6UMqMygcAcE0sbPyxUYFKfmy5UR+uKYyOMugkCjc\nCcCnspVVYPhsZardpm52YGBnhvWpNQtPszgFiM8UAUISVkLEADHK0jQjOG/1mcrUqhTH8o/C\nnYZYyOrVIEbHeoO3bt+UimyMF2FVyQeBUnlBZAxb5QMlaZEo3SIFaUkFgue1UBNO+0gMdp7b\nemaXak2CPmK9fSkjiG8LhtmPTpU1va/aHWPYVUnqv9aQyHy22E7djZ4YGiH54V3KxJPKgfrX\nQ2ensn7oQl+4BFX9M8HzXUqyiJolzyf/AK1K4dDk4rbzvvbghHGBzmr0ekmWBdo5/vMcfga7\ny18FiRwrBtoPGFrrNF+G3mRsVUlt2Qu3+dTzBZnlNvob3LRI0RLL2UY/Wui03wfK1wCY1K5z\ntXk165Y/Dv5l/dYCnOAPvfjXZaP4DELBxaiLjnaP61POXys8cs/Asv2d4Gt3Ku2A2ea7XS/h\n2sNrGGhB2gdu/vXrFn4SRVB8sbs5rorDwzbKqEpxnkVPMPlPNdG8BoIzsiC56fLW3pvghrZi\nY42Kk5Za9Mh0aGNQEXCjpV62slRCu3A7VLkNROX07QWVVVeB64rXt9LEQwTz3NbdvbqoOVya\nlW1V2Py8VnctRM6GxVVyFz71ajtxzgYNXljCfKF+WpFgxggVLKsUUsR1J5qfywABzV2OPcxP\nSkaA7sk5WmBBHEMg9faniMs2cVMqDbwPmp+flxjmgZGq+lSFflwR171LCo9Kf5ZZsGl1EQqp\nXgjJ9aVYyOAKsrGd2O3rT2jHBB4NMaKTRgNnoKfGAx4xirLW4ZeetM+zbORUsY1fl68VIq7l\nOTipFjO0E9fSjy8sOMUxDI2K8E8etPX7pJ5p7IjEAjpzUiInl8DjNAyJYxtz2pG+ZsAYFS/d\nyOo7Uu0sucY9KTASOMnJzg02TcMZBNTLGQuc80bvlwWxiqAh+72yPen+XuxjipVAboQR3pky\nHI2nipAGhCryfmpEj3MDTmA3CpN2RjGKaATBXntSqqgbs4o8nzCPm49KGTIAbpTATiRjt596\nk2nbkHimLAUOd2PYVOWHA/SgBu3dH0yaFXOQ3FO3Y6DFN3k8EUihNu3IpvK9TxT9uOx5pywj\nvSJGBd65xT2b5fenEHoO1OEJ25xQBW2q3Vck1IsahQAOe9S+XtXIo24UEUDHKoXGeRUm0Mpx\nyKjVDnnp1qRSFUgcUAR8qwGOKfx0NPZRt4bNOeHaqnvSGRCPcflFKzZ+XPFP3FPpSxoN2eop\niI9u1cE81Nj92D270jw4zmiPOAOueKQrEirvhIPam7iuAOnepdm1Qo5A70ixbsANj60gGHEn\nsKRF3Dg4FTSRK3C9PWmspZNqjFJDFW1VW3NzSlevPHpSKxZMEmhV3HniqCw2Fj90ipY1O08U\nMpj680+E7lwOtAajvL2rk03ecgZpzbsYpjAluPwpjFWMofmHNSMwQDdyKX5mUE800LuHINIk\nPMXBoZifuihYwDipo12E/KTmgdiKFflx196c0e1eeaecxjC8CjcW60CGKu0Yp5j+Ut29KBHu\n5BOacsJXhjnvQBHHkfe49qfzuwAAKkbA6CoZCNwCdaQyT+LGc0nlnfuxxShDtJPUU1ZGYdeK\noY/LKpI5FMXMmMevNO/h64HemIP3hwODQIm+63BpWU+XjNIiU5sbhz9algiCOPYTnnNWFj6E\nHAxQoXJwOKk6r1xiiwCbfLGQcmoiSzcjmp/mUZ60NjgkUxdSJ8cDPFNMahsDpUu1R9aaG7Hi\nlcYixgcE5Jp3llML196RjtYEcmpGkLckc0hDBGI85pkasrZJyD2qbdleRzTVUk56+1UUNkZt\nuAOKYFAYY61ceMeWOOajji+Yk9KQiI88HvTlUKvDYpVh/GpFUdKQbEQUMmeppfuryKlEePpT\nWYKwz0oGRRsN2e9IBt4PT1qRtrEHbxmnS4MW4A+lAEKqDzQpKjLdPSplUBRQwVuh5pgRMqSK\nX701+VHFSCPGe4pN3mAjbikIAvTHNMb5cgVZhBXOeeOKaq4XJHA60DKqoHb5jipPJEigD161\nKwG0YHzH+VSbd0foKBkO3aTuOR04oXH8Q4p6w+WTk5zQF4JzUgMyOdppuDxuOQafGoZckYPt\nRgLzjPamBHGSsh447VJu68UrL8wxwuKVV/EUhsazkKQe9R5KjAXrUrRlmx2pmCTjPSgkb5YV\nSSOTSRxlQSOBViOMMcnmo5JMEr2qgGM5bqRinOAMbeaSPrnbT85OMj8qQhFRpl2qeR2pkmV5\nZcVKu6MnbwT1pCvVmOT70gItuVDdajVNyk42nNWUDLxj3qJmO44pjG8R4XPFK3oRTzGB8wP1\npNgXkmgYikKuMc0HAHBwM80pZM+pp3mKeooJIWkCsT1FPVlUHn3ouI/3eQOKjgUEEnk9qQx+\n4MM9s0OwaTHQYqVYPMjIxg1EsfUucgdKdyhY+uDyO1SqoVTnlvSoVyq596kXftb34oGOjbcQ\nT8uO1MZi7EADOacrbYthGabHg8EYz3pkscuWjwOCD0qRTlhk4NMjj2qxByAac3ygN3oGMkQb\n8DJPWpbWQxNkDH1qIb8j1qdVeYkKBxTJGbSszNnjrRIu6MMDz3pWLL8rCh2CLlVoAbGCe1Pd\nvMYKeKjaRo1BHNSN0D9BQMVsjlRgdKlU7uAOVqLzGTIPSn221uQ2R2NHUCZEG/c9NnzJIVBB\nOM0CQnO/8xQoEcmRyGHBpMESsvyL1FSREN1+UU1ZPkOR9KWHH8S5zSGLtPAVtxzTm6hcZA70\nscflZPanqGZBjGM1Qhsf3j2pqgMWIJqZsJxjPvVb5o2+UfLUjHzZ8v7uMUsbiRwMcYp8ZJGW\nxT4/LQjI+Y0DGherdh2peVAkYfhUy7Y1bB59Ki3BmG/kelMCNZPNk3AYFTNIx4IGahaMrgIO\nM5zUxkDcnHFAbifKqlcc9aC5dl4wuKk+UruB5pOCwJ4xQSxPL+Ydh3qR1WBtwbj0pNx3BevN\nP2BmJIzg03YYjOn8XJ7CnyH93kKMUj7edoo3eZtCdO+akAjYscn5adMuVDD7uelGDuAIpjsT\nIvOFFSMQKdzHFCsJGBGRUrZYYQ/N1pu5vM+5j3oGCqXbvgetHmAvjb9KPm4xTVzwD3PWgBwY\nrxjr0qQMWXBHNQpIMlc5wcU92KsD2oCw142yCGxUuxjgA/WkjbDZYdaY2/zC2cDsKbCwrRsr\nfLTnG5s8/WkWSTaeOafG25duOO9IQKwkXGelO3bcetNRQAQOKGyMEDJpjRJu3Ng0SDfkrxTF\nfc2SMc05vlySMUhDmUqqqfvd6ZuO1hilXCyBi27IpeRx3oGCqdnJqNwGXB4NSqeu7tTBIrEl\nhxQBFt+UqBz60biy4BwBVgYYZUcVH5K7uuKBDipeP07GnsuWz7U1kGRlh6UKvXnmgB/y7cNz\nTURdoI6ilU43ZOKZg8EE49aYEv4YoaTfkHimjcw680qqCcHrQA5VxGQaRcZ5JOKVlZW4Ofak\nDIzYI2t1oGPf5jx070L0xjNJgdC1Kx6ADNAxwQY5Ao3BV59aYcnJPFIrDKg5pB0HlmbpxzSq\nxVgppykJlj81KoycsOlOwIXgnrgU1V2MDnINHBUigHauNuRStYZL5e5cDimxjYWBb5f1oWYg\njHA6Gkb5eGGaogkWQgYLYFMZl3fLQXRsBhij5V5UZpDuMZmY85qT7zLxzS/MWxgUvO7BGKLi\nJPM9qKNw9BRRcDFtNet7m88pXA47Dv6VrNMrQkg98GuW0fwxFbt5ru28jp0/Gt7y3ReDwKQ+\npDrGoNY25ZcZrzu4ubjVNTCvK7bmrvNWs3vI0AOVzk02w0K1sowRGC+d249aYD9PtWt7aNXP\nI6CtKNSrlgODQnz87cYqRc4OTxSEAwV+VuPSmyOVYEce9OXYikjvTJcbQM7u/FAEjS7mxwRi\njJ2jA5NQqpXkVIsmDn25FWMVcq5PQUFtzc9Kap3E9xjNLtLRqwGD3oBiTNsbgcUit823G4Uk\njgHD8+lR+cPKIQfN3NAhJJmVmCEfWiNiq7mPNQwKdvPBPNTNIu3nikAu4Y3AUeZubeRuHpSq\nw24FRq3YDvSuJDY2MkhGMU9SvORUkfyuSwwSKZJhjwMYoZXQbj3BpSu6PaODTW2j5s/NSB8L\nznNFiNhS3l4Gc0rKSCduRTWUOATxT1ztFGwajPuxnPHpTo23RjPA9aaU81sY4HWl3ZbaOFp3\nKFXA3c9+TQ8f7s4Ge9C/6vrwai84wyEDlaQD0xtwR260gVcYWkWYSMRjFLGuM+nagBFxuwRQ\nxC5x0okkC445pm75sEc0gCFkZTxgipGbnk8YqOb92uQODS/dXlufenYBQFQZznJ6U8/MOBxU\nKkyYB5FWV+WMr+lAkVPNAJBqTd8vSm+XnORSs20DuR2pMYIwaFgeTmnJIFXaabHtk+YjAPal\nhQLkEc54osAxsgDHPNTQyBWbJz7Ujx+YD29KRVEa5x83rTAWWQKgPU56UzcY29c0rfNximv8\no696GIc33tw64pVyybagVw0mF4PXmpY2Kkt1oEKy/LtAwacWEa4796amDuOT+NM2jc+SevGa\nTGK3zENjpTNpaMknvUn8IUfnSSLxuXpTEMxxnHHvRuCLuxyadHIGjw+RSGPnrkUgImj8yTI6\ndaZLJu/hwAcUk+QuF9acuD8o9KCh4YMCuOfWmspzlufShiY2UAU5/mxxzSAjkcZ6YNMkm+TI\n4pzJuY4GDRcKvkggYPemA5VZsMAGFQ3TDJ3c9qWLiNQoIpzRjcSwoAZHJujUgcLQsnZhgMaZ\n5gzt6CpFyzAEjbQIYyqhznjOKlhG5XA54wKSZSo4Gc8UxXMLYHpTvYY2dnVkjIx60SKRINww\nPWmyKSm7vn8akjufOj2yDpSAaf3ittOcGm+WUjJx96gKYZPlPymrPmDb061IGfJM8Slc9etR\nwyNyMcdasXEJaljh8tCSM4oAVpP3YHc1WuMqgbPenGb95jt0zTzGyrjbvBoArMzRkHGRUiEu\nvzjBpTtjyD2ojJZt2ePemAjRbV+XpUayMrEHpVuVgYuKqFR2PPvQkA6SPkFfxNKhLAKe1SkB\nl2gdqhaM5BWlYCRo/k/2utRFS3zd6mLFeaijO3OaQCOgUFgfmxUMB8vgkD2qxGob71RbUZm/\nvdqoB7MdpC1EvygButSlWjT61GuMfNyaXUCKSMo6sPu96cyb4yc80/f5nGPl96Y2Y8HqKAIY\nmZMilkUHBHXvUix7mznFI6dCozQIbIu5QKYkxUkbc1JjcTnjAqNl+YelIpDi24EEVHuw2MYq\naPGT7011J5IoEMk+fHNEnyoOzU4ptXOKd8sy89RT6AQNlV6c0QksDu4pzAq3HNOChY8k80gI\nmP50pyq5HWnsFZQehpXw3bApgR7TIOeBUT7l+UdKn4ZcZ7011CnjpQBXVTjJFSNhuRzT0XLY\n7UixggkUAMkVW4ANNUbW56e1P5ZgAM+tNkyuMUgGrubOBRsJ4Ipyg8/Nik5U4zx60AQyY28c\nEVXkj3fM3JqafKjGR1psQLDB6VSArSWysQc4pjWaMnY/WrjRls4pvl9iKAMtrCNl2EZNULnQ\n4mZiw+U9a6JrcBNy/e71BLalk56UhWOHvPDsDk/ugR2rmtU+H9vdtvKgMvIr1l7LcBt+7Vab\nTUZCdoLUAeB6x8O4mYERqPX5a4zxB8H47hndbcSMRwFH9a+nb3SI5l2kD6Vz9/oLx5wmUrSM\nu5DifJeofBZmfGzyl/vbc1yetfCqW3kDyRBlz0zjNfZkvh1VUO8WCTxxWPqXgm11AEeQpX6d\n609oZ8p8Mal8Prr7QxJMUKnjI6msjU/Dd3pOHlRmQjuuK+ztV+GMUkjssW9sYHFcP4k+FwlY\noEOzH3M5wapVO4uQ+U5IWWP7pwwyQKWJlW3VtnGPu17RqXwdkhk3hXRU5zjIrk9a8A3UELSw\nxmRE6ptxketbcxHKcFJHsBKrtZhwfSkTzF5L8YwR6+tatx4du4YGLKyqPUGqUmmTRbSxKpj0\n71alcnlKW5zlX+VQcgj0qVmjnUE8lenPSle3byZODlfXv7VT+0GNcjCNjpjpVEE4jBUgZXPI\nY9/amNGZCoHykeopjTOwiDEEg5zV68njkjVQcE9cDj86q4yhu4dUGSxwFb+dQ3CbQuFww4I9\nPeiRTuJ3Hr/CaevY569fpTEVZoRGwZm3L/Woin7kuVCnOfrWq1vGxxgFGHFU5EUNjrt7e1MW\npSlhRsYJDYzuUVGrEZEgzjr71e8lHZlzjA3Z6AD0qKRd0e4KD/D6c0hFdmZoUVSdmfvdzSNG\nI2PzZ4yeKlSIsqg4345pE/c7jjJIwD6UgIgrnbIuFix+Jpu7y12gE5OakmchhGDyBzTFAbnO\nD0plCbhgEAK/bmnqgbOxsd29qeq+Wm4j8x+tINkUfzdWPPvQArEBgpOFxkCq7IPlAba2ckVZ\nUoW2sMkcg0eVHsZmBJxmgRCoEjHaWHODmkdk3MOn8I+vrUqR5jDbcDtSPiRt5+WNeD9aYEav\ntVivQDB+tLtMbRFUynU89aVoQY2ZPlxyTTJHMiq68nFAAGzG2Rt56UNGdykD5mHSk3FowxXH\nrQuWw4y2PyoEPVtyqw4PT8aVgNu5flI4NMhaNYSn8Wcg9qdGfKyrn5H457UDGxgNIQBjIzuN\nMTARyT+NPKRq20navQ1LuGGjhHOM8jikIqKzyJklQo9etS4HlbSeQMikx5iqcB+OT7+lHmcr\nuTaelIYfMyqepA6jrSu2QWXooz70qbvLIU8A8mmNCQx2n5SPWgBJGVUQAFieeacjZn3Fcx0M\niKic8qOaR2WPBJKrjPTqaBDv9UxcuNp/hHX60xGLOSvA7NimyBpY1ymf4gw4xStIXUEnJB4A\nFV0GOP8Aq1O3hvvc8ZqOMmGTafu81IxWJQXDBe/GcGmBSzZXgHk7qQx6kKxDA9OF74psyh2U\nn5EHG0U7aJCH3fMeMHvQQ3krngE8+1BKG4KgRhwCOQ39KV8xuFRthb070MY1mdM7hjgjpTlg\nbajIdxBzmqLI0AQsXwxz1/pT41BjwowAc4PSmMm+R8DAPJB9aVv4cHaPQUiWN8whDzg5xtFI\nsQyWB3AjnNP3RswLL8vfFCsjRkNyxP6Uh2I2j3IGjG/A6U9BgEMxx6U+NhC2EUg4pskRkUhH\n2nOTRYQnlswJ3qztwMHtSviGZEY5Xbhc+tNVWTJwAPY044bnGSR37UCHKwKgMOScEg1Fcbnj\nVCMr2HrSLIRhcAtn8KGl83MbZAHGfT3qhifOsOPuj7wUfypZCIYQwBYt1HpUm0bQN20dMf1o\nZdpC7g6+mOlSAzYGU5HyY9acMMFBi+U4GRTZF/djYOd2KdsYMSpy46UCGsoXcHXygp456UbQ\no5ycjqwoZt0ZX7+Tk98UiqJMM7FWHQE9aAEZxBMAw/dkfep0luyAEnO7kE9MU1l3KAxAXOc0\nrZbG7lfemAMyKPnbJHQYpJPuqFU9c5pyqjScjcAPvU5Y3VvvbV64NLcZGcKjI3Vz+VCDp6r8\nozS/IzDKbmz1qw0AZOuOcDmqBlX95uAMg4P8VSFiynBwD29aXaDwR8y/rUUajcTnnNIQ/wA3\nzl5GMcfjSOvyn5cnG7Ip0YHLbeW4Ap8eVwSCjdBSGJlWUA7kyMggU9oVWDep4Bwfekw2PXua\nmVd3pg8+1AivMDsQKO/P0prfeXC4CtkqKnZ9zBSNh9e1JtVRkDLZ/OnqMjkjXd8wLd8+1Kyh\nYy/UAZAp7bNo4wvUmmuv7xTuDIRxTAI0bajcKzHcKfJHujYSHk/MdvamxfIp8xuAcZHSnYPT\nquMY74qQITHtJIyQKkt2EjOWG0bepqSK3xn35B/pV2Kz3fNt52847UrhYarCO3jcAKw/hqGe\n7a5VN4JfPHFXFtzIgTy2wTgNjNS2uiy7cuuG3H8qLhYzfL3nGBn+9RDHubbnc9b8Ph2WaVUQ\nZVm7HmtvT/CUt5PsS1ICj5n280aFHFR6W8zYUFm+laFnoEkMOFQySE/NxzivWdL+HaW8aSCM\nu/qRiui0v4fG4x5kRKg5wo6/WjmA8ds/CbzAhiMfeOOCBXT6Z8PXny3llUP3eOSPWva9M+Hn\nzMPJQ+23kD611On+DYozgRlj344+lQ5DUWzxrSfhyxeLzEcfTv7112m/DlvMLLDuU9SzGvVr\nPwkrSKSgXb056Vv22gBVChQB9KhyL5Dy/T/A0bAExbQvfFdRY+E0jUBB97k/4V3Froqr1Ax6\nVoRaOFbIXjtis3ItROX0zwzEqqpj6DABraTR4PLClCMccGtq3sdqg46VNFZ7s56UrlWMiHTR\nEuAuTVmKwPHAHetaO38sjipltgzA96m4zNW3ZeccVNHbgjdVvySMrmkMPy4HBoAj2opxxn0p\n6x+1VptNmnUssgQjnNW7eGaNE8xTnvQAqx5XBpyRYbjmpRGVNS+X6c0DIlj+fnikZQ2BVnyc\nt/jSlAoPrQBAsflryKUxjORyDU+3jHUU6OH5fxpAV15yMd6m21IIdrYHepFhO45GB60CKzbl\nwADinbivHarCxFhjIoaPnb1PrQMrhnbPFSqML6mpVj8slTR5ZUk0hjMZ5OcjinLGeSakGPLI\nYYPrQvUADNIRDtVuOhqVYyF449ad5TZyvSp4wv3Se1MCv5W5gc8YpV+Wn/KMjrQIwTzSGJvH\nf5aTy1k4NDQhhnPSpMdMcUxkRhEfTp3pfL3YI6U/b6nNHO044PpSENkX5QMd6kZOncUiljgG\nnbwMihCGoo3cGnsvzY65ojUNz0p0jDaMdaYCKvQ5FEjjPFSBACDjg9qTygc0AQ7Ru5pyxt1H\nNTLGGznjFSLGWU7eKQ0V1Vg3A596eq/Lz1zUyx9T7UwKdtAuoBdvWm7mp+1pF4GaQKdwGKCh\nv3jjpS+Weg6U6Rfm460+Mbm9KBCquMEmkZfm45zTyh9M0FSrdOB1oAYCMhae7ncAegoVAzbu\nAKdwR1oEiFgzH0FSW6eZIFJwKkYBhnbSxjaoOOaAHTD5uaIgOoHSlkCsOtGdqHFJjHK+7gCj\nyw2SeKZCG3A5qzMAoGDz3poZFGoGR2pojZjgHFWAyqowtKqhjnoaVhESxlVJYUySFidwPFXm\nX5eeajaL5eDgUDIol3Jzk1KoUcgUsO1VO40KQq0wGxktL04pSo64wc0qsQvHOaSRTnkEZoAO\nffFKvKkk4FOVT8uORStjn5aAIxhsFetO3NnAzToUG77vSpHUDtigQzfxj86EUMwA4FO2rtBx\nzTwm1eBmgVgjUx5GOB3pdxZs4zTVk3N83C+lKMbsoMCgdg29ARxTY1TfknFK0nzU8ICOKB2A\nYXIbmmcZOBgVI64wDz9aVFJHIouBCtv5vtVqOFAmCOlNj61MGG05OKQEDIFUYXFNjZc/czUj\nem7INMXK54pk6knl7eVwRShVZcn8aMALgnmm4aRsZwKAsOZeMZx6UxUf+LmpSw/DpSsPmAzi\nkNEJXAPHNMYDcMnip2BckA01Y9q/SgQzcvYc05V3YGcijb5vQc05Y28zAHApWAPLVU96SHar\nfWnNu3DHBpTGWHNNdgBsnODzR0X3pR8vWmc89xRYeo0ybZBkcU7lmIxilVc4PU09c845oEIH\nyvTpUYVm5PTtUjQqTycE9qZtK4G7ilYaIUkyxUnODU8ZBDAn5TVdoT5pYYHsKnt4sIdxGeuK\nChoi2xkE5PtQvHG3mpfLEfO7JPOKikZ15Ck0CFUfLhjikUYzg8CgtuwQMUvlkd+tDAdC4xjH\nNLIoXPzZ9qRkK44470mxjljQBICCpJ6LUcbbs+namsxA29BQHHGOKQxVX5gW4PanOm7j7tJH\nljuIyBT2Ikl561IMjbKkZFKuCw5qQQZyTz6UjxgJnvVIQx/mB9CcUzyzH/u1NCp3A/mtOfdn\nDcLQMiDM3Q8+9Rx5LlXG2pSwPQYbtSbtzAMcGiwhDKAdoOB3qO5lRMkDjFOb73NI8fmfLjjF\nADIf9SGzT2yzgrwMVIqDyx7VAyyK3AytAydW8yTlvajytxOOo5xSxhW/hxjualkby+gyCMHF\nBNisQyMQx4pDH5TMR37Gn88huR1o3B1JIycYoGRSZUAHoaV1B246ZpVyy5PanbTtzxn0pgNd\nURvT3pNobOKlSRfL+ZQwNRNk/N0ApAKuGUjOaERVbBHNEkgIG0Ckbcsh78UgFMpVcdKY2TwB\nxRGTuw4zT2wAw3YFFwQm4MAT0pynPU7Qe9KipJznFL8sxA7CmMaq7c85HY0kbBWwQc9qfsVW\nxSc7iMZ70xB5Y28nBPWldl4A7ChkZUzwc0qqApx81AyNWZWHGant5Pl9+9Jx5ik9aT+JlHWg\nRIzJIvTLeopkSANhu/rRGphUHqKc2W5x+IoAasPzFW4qWSPzYxGDj3qU4YAkcU7y1wOetOwy\noI8IQTntTo41UYUbR6CrIVCCuOnekdfL/wB2iwh0KfutxHFM2q04wuOKmhl/d4HShY8gEEcm\ngBvkMynB5FMhkPmZYHAOKshhDjLfhUUnlvISMqvekMtP+8IwMCoCx2kkYWpBtZMDt3pnDZWg\nOpLCnHB496b/ALPOaEXyl253VIOhAXn1pFDFAOM9KVm+cADBHOacw2ouRRNhhyMGmJDHbDcn\nk85pBsx1O71pkyttBU5B606PEi46kcUuoh+XAAHSmzQs0ZAJHzVNnbjsamxuVugGM0x6lKD5\nDtY85qwCkvQ5qHaY1z681JDIq9RigCWNliyWP0qRW24PrzTwkcseD1pm3dINvTpip6iJV2tg\n9DULsOdq45p7MMgHgigMOcjg0DQ1fu5ByafkMOVwaDCFGQc57Uit1GMmkMBGBlgee1KsgDbS\nM8c1G28IdvDelKu5IwCMsTTAkLBPalaSONlx1Pc1EzgghhUbYUgk5AHSiwyYqjPuC4/rTyql\nsnpimAsFUetSYy2D6UAR7SFNSKysilutNUlQ38QpVXruGKBCodzMBTtu0ZpFUbSRR5hycikA\n1fvEZ4p7sMcHFI0i7unSgwhuc4oH0GxsZmB7Cpiw3FW5FRLhOO1K3z4weaXUQ4KBwBTmxxkU\n1RjnrigqG9c0wBRub0Bpkke1dx5Ge1ScIMNSLJuOCKXUY1WKjpgn8qczFcZ5pxb5cEY96ase\neSeKYEckYZgBwTTrVfkPzZOcGpeNvuOarRRhJX55Y54p2EWfMHmbCKG3KuP4aRc/dI59aVQV\n4NIBytnGBgUu4d+Kap3UrL81AxPfJpPv84709RheelNWZFyD1oCxIVG7ikVTG2c9aRG4JHOa\ncuWHNUMQBs9Kcq568mn5PrS5DNxwaVgAJskyfukdKGYrxnmkVTy2eKTYd29jn0oAXdjtSbt/\nA4xTt22PmkwF+amAm35tzcCpVbcuCMimH3PBp/HZttMQMQcnFIqgN8vFDfL0+b3prHcwPSpC\n49m6dznrT1z+FQ/NwB0FS8lev4Uhjti+tFQ7XooAVY92SOKSOM723cgjpUyxjaR2NRyL5LDa\nd1BIJHtjz2prL0A61JG3QN0zSPIAxIxn0qkA7aYunzDFMXBbOD06U6Nyy56c07G05bjNJgiJ\ndrcr0I6UnlqNoB5pGXa5C809FyuR2HNIY1c7jnr2p5jXaGY4zSR45JOT2oZgy4PXtTEIwHKq\negpJJvk2A4OKVWCRnjJ9aYrjd74qgIVkDuM9Md6QkZz93FTNGZOhHFV5EG3rkUASgbuQMihl\nHl4Yc05R5ca7Tk0ydt+dv3qBiM23Hy/LSZ2Lkc0iyFlANEn3cqMkdqliFVmkXnio2ZtxBPFC\nymTqMN6U/eOvakBGrDkYyTTlO5sHjNOVd3WiSPkt/KncQ2QbWGTlR3FO3bWHP0pmRwGztpXA\nwTjpSeoD24yRwabGxZT81MVizAZGKlKKqggZNICNj2AxTFi3g5qRvvAp+OabGQhI6jPOaoYx\nk+Yfw1MMryD8tOIDMD1ojztxg7aQEDqVbPelhXdJluvanFdzEnj0pbddwJLbWHIoASVj91lw\nBSSKnl7upNDZkAJ6k0LiNST81Agj/wBThR81JIW8sMDz3pyKVAI5NMmY/MuRj1pgOSQvhsZp\n0jbcHbyaRRi3AH40m5mbaTlQOKAQp+TI/EUjZC5IwT6UjqfLx/EDmjazMCeKBiq5WPk8U7sA\nR703jpTxnzOeRjikTqNGEyRSbA8fXNIyq24Hp6Uo2xqAvIoKI5IQvuKYsgyFB47VLIw4I59q\nY8S+ZlOAfWi4mO5VSe54pYTuzuGRmmLu3BScipd3lMwxQJDcjzSCcLSD096j3kISBk5qdmLK\nCOOKoRFMzdNvFKrhVPHAp7MGUHoOhzUWwjI7VJSGPIi4BHJpkTLn8aJAWAUHJFLDD/ERk+lA\nyzJyoPQdM1WL/NgjIFNumeSIKMjnNMgG2PLnFICYodmV6ZzRI26MnpRn951wMUzysrktgUag\nDfLHxjPam7jIoyaXcp4PpUixhI8kcmmDI9iNgY+aoId3mHjgGrLKrKSTjHIqGGQGNvXNBI55\nNsmM54qN271NIqH5u+Kqy47danqMJFMjAAnHepFUBSzYwKkTYsOActUFx95RsIWqGSfLIuRT\nFUsxHOO1LGBGcckGmtuRsqfwqQCRD5gzxxUrH93gNn1okk3KMj61DGRu56UAQSW58wMKlD7Y\nxu+9mpVjyGYnA6VXuMheMUxkMmHlNTcLGBS+WpRSBhiKY67RnPFFxAvzLk9jUci7jnPSpY8y\nnAGBUdxHt+7yKAHQsR82c05sjnoaji/dqM8ipd27jFICLzCeMZFMVstz1q0y7UwBVduJR34o\nQC8pGW654qsy7cZ4NWWYNjHC01thzuP0pgS7wwG7hQKrPGJiQMgU4NuUqaG6DBpANVhHGTg+\nlMVi3+FT7goOfu1B2PGOaQiRvUCotx3dcVJGdoAPPNEmw5YdOlMCBkPXPFOVfMGAegp7ANCB\njvTNgQZP4UDGqm1sHrUqqdpLHAoVtyrxSMC2c9KAGM3BweKjjbqMU/b17UKwU0E3I1yWJ71I\nI8x9hmjbkHtT9isAM44oKKygdccUGQMu30qYJtbBxioVxH8xGTSAcVAwR3FDj5QBTtwbHGBT\nnwynbimBFsJ6dPamoh3ZB605GKDnpT41+bPagCNkKqwHXNRmMnH61M0h35xwOKHQr82evSkM\nr4+U8Umwvg5wBVnb8vPfrTdoC7RQIr3EayY7Go2iKx5B4qRkbB+X6VIEJIB6U0MrqpA60CMt\nyRxU3k/NRtYHA5oYiFomXBA4NI2VUjtVn5uAeRTGiJz0oGyr5W5BjrSLCvTGDVhYzuz2oaEn\n5ulAjLms9zEdDTVtE2lWGa0pE+aonH400Ix7rR1mO3I2jpWZ/ZsceVK966hkLYKgA1FcWJYE\ngAUCRy0mhxz9E5zmse+8MRMz/JkGu8FuU+909qgms1bHTmgDy3U/BNvcRnCYBHNcjffDW3kV\ng0TKP7wNe7XGjqYuMdaozaMicbc561abRFj5r1P4VwMGUwnd3Y88VyOs/CFSoMcRaNf4ccV9\nX3Hh9HRiAMZ+7WTceFo7hXBjyMcYp81tQ5T4t1T4T3e9nijZZDnCdBj/ABrjdW+G1xZ5Agld\nduT6g19zX3gpG5aLcB6jpWBffDuKdTiJST1B71sqpk6Z8KyeErqKTiGQhR8uRms64sp0TkEO\nGwVr7O1T4Twux2xbCOy1yGsfByJbh5FtxGxGCV/nWiqkcrR8s/ZWjDkEs+D8oXvVeNZmhEhj\nA2jbnvXvuofCGW0jlPls+7o6n+lchd/Cu+ti48vdkZ24NXGXMLlZ5sshZULHA6Z96VomZ2kc\nFSo5HXiuqufAl7ZxsTH86neABnisyXRpo/3ixOSwJOeKvmJszAZPM4xvXGRinMCuMABAOcjN\nXF0+aBtxgYBu+DSXCbMKFw3pg5NPmFZmdtClW28E4JP86XPzOPTpxVx/mAEkbK/fcKhmj3KP\nlwP71NNBYpbYtj7gzysfvYqGa33ttQFT3NaIj8tAAec+lIsIZnKOd/vTEyuoZodpHzj+GkaJ\nRtGzc3XFWvs4wcNwR096RYzbooHzH+93pXGiu2zJBGM8nA6VG0jEKd21B0XHWppZj5gRAA3V\nqimwPnZiSeMYp3Ac0jfwL3z7UjsrHIBAPNMYmHAbDZp3Mg4+UDpQIb1V1EgbcORUUkW4IiNs\nYetSfIpwRgdzRIA0K5G0Z4o1AjG6NWUnEntS/vI4+Bgt1qbP3ScZ7etMJZpDubIPQUwI9u1h\njg9wOlEzHac/ebpUjWx+VWYpIOT34pqxsF8x1OzsaRQjEhghTI28jsTTfMn52rtAGPpU8kiq\nsf8AFzkt6U2MqzSkDljytGoEClfL2hsDr+NLGGkOG4GOGPep4bdsFSFGORn+VKIx5oyOO4NM\nCrJEsaKqsVzyR60snyt5ap2zuNXmQM5OAV9ahXe69AOOlIRExbcMAE4pyxNIpZsLngFhUsNu\nPK3Y3EnjB5oZvk2j5xnkHtTEU5NrsVUnng/WmEFsbSAUNWZF252lRn9KhXDRnccNnHFACSSu\nq72Pfmmsq5B52mn7Qp3uckDG2nhDtBXnPX296OgEcm2TALFNvI4qSIfuQmSR1NN3BiUK7weM\n1KuOi/Ko4qQK4X5cr94HBAqSEMsbqW2dyvrTo40V2z8w6cd6jjVlQo2XGetUAbnwu0qF9DUf\nmbW2lRvzkYqXlW2qoCtTZFKyEbMHGM+tDAazHy8sMANzUgjTCk/LuPBpsRVYyH+Zj27U9g6w\ngH7g53evtUiGyN5eRjEnTA5qPydsZdiSf7o61MqmFi4+9jOyk5bc+cMByPaqGMaPzG2jPA5N\nHlfMVLfMBzTvlMGQ5OT1HFJHHtA43Z4LUmBXZQ2DghgeKscsMsmwjqfWiPzVumIUCP1NSLj5\n9zYcHAU0AQfKNpVjgH7tKZCsbNwCT6VLGo3gjjnv60xd6s4cZ54WgYi4WQAnAxnce9NVUMhD\nOS+flPt6U4qjMquSnfipG2rIq9GzkHFBIxo2SNvK+U9z/SmNH86u7YJGAO5PpUsjOA5xlM/N\n9aVdpliLnKLyG96AKwzICpXC55Jp3mfecjcqjAFTPHlTzgZ796YzK2EwAfagBn3cqBgdhTzl\nQpZtwxgt7+lN8wSK+9uT2FO2lYfk+Zey47+tAAzKrLwNpPLCljGJCGBBYcUjj7pKEHuPenLv\nZcsfLK8ByKNihI1eZCVXJBwcn070q7FXCoX9T0pxLxYYISx+9t71IsZfB2lnYd+KA9SvdK9u\nq7ANpx06ipWYtIoyCRStC+VbqudtK0LzE4Tyyv60hj+I2AZMhjzUc37vodqZ4FWFt7hvLHl5\nTrmnDS7i6mJUbVX2yadxGf5hcNJxszt60vEjgLw3TP8AStu08PvJndgsRnbjj6/WrVj4Z6Bo\npCSeMDOPf3pORSRjLC4h+5t5wSRnBqMafIynaQT03L2rurPwOZpfJkWTnkhu/vXQWPw3mDMB\nb9vk2j9anmHvseV/2fIgRZIyRwA3qfWrq6HdeariIlhxx6V7HD8NAFVdpG08yMucnvXS2Pw5\nTcieWzLjJTHX3zS5hcrPDLfw+ZnH7vB649DXQWfheVlCsm3zOMlete96f8N41jA8lGPZtvIr\neh+HsKeX5sKvxw2MVPMHKzwqy+HsjWoK58zGMBcmuk0/4UpIqP5LCSMbvl53H3r3DS/BCxqN\n8KlfXvXRW/hpYoxhRt+lS5l8p4pp/wAMI44xIIsu/JXZytdBY+C/LBVIkjXuQOTXrsegqu0h\nMHHQVft9DiWMfIM9anmLUDzrS/CqNjzow34f0robXwzHCx8uMK+MZArrP7ICKrqgFXbbT97Z\nxU8w+U5qz0QRtgqCOp4rRXS18zO3mtk2Q3YA5FTx2Z4HbvUN6lKJlR2aqwUr+OKupZqx+Uce\n1XhDwV21IsZTB6CldjKi24VsYqZIzgnIxVjyd3PrT1iAGAMigCssQ24x+FSLD5fA4HepjH5e\nDj5qcylhnuaYEDbVYZXinsAGB71KIyyhSM1J9m7HqaAIBH5mDineSBjjmrEa7TgjNSbDJk0h\nFdYNgO0dacuNoWpV/ctycmlUjuozQMikQKoGMmhlHGKsGI7QoGWNM8llG3vTAapBxQ8alcDi\npo1ypGOlCoCemRTAhVBtGBzU0ce0Cl8nqy9KQxFsHNIQ6TByVXmmdRjnNOjYrwamWP5eBTGQ\nrgYz+lSMPlAUCntCAAepo8tt3HSkAzaI+WOaMBl5OAalaMYxwaSNlY4xgVI7kSR5fnlamVAG\nPQUKp3deKeVDHOcUCIGUhcLxzSMpXmrOBtyelN2qynqKAIQA2McetEa/M2egqVVHbmngdsCn\nYCFV2vkjg05l44qZiOARTWUk57UAQZHZeacYyWJxjinY8xiMe+alBDRgc5pDK8YO31NT+WMZ\n43U5YwM/0pu07jxSENC4HrT1wWyBz0pVQ9acI+4x70xiD5eNpPvS49ep7jpTnYjgHFPIbaMH\nHrQFhFQqvqKTIifjvUrfMoXd0qNlBbpmgdhrs208URqzYOfwqVZMYGMilGOvSgLCBdrcCkAZ\nuM09GKjNC9Sx4oGNZCrcjpTdwXkd6kk/fc0eVlcY/GgkBJtjNJG5kB96XZtXBGakjX5TgYNA\niLaY+ccURL1b+dTBfMTaTg07yPlUZxQPoNblemKWMEv04xVnyQiEH5hTFGD6CgLEPl9aXyyo\nI7VM3CjaM02NWLHI4pDEhXt2qRlGeeDT3RVIA70km1woHWgBF+ZcDsacuTk4p0art2n86QMF\nYimA8Njk0uwuvpQmGPNSbdrH6cUAyBrf5fWkdcLg8g1ZbIX0yKhWMnAzn1oEgWIKuOv0p4JO\nABnFKu3HHIpEmC5IFBQAEgkcGmxk7vmFSK3U7afwVyOtAhrAqM0qoW6nmmlmkfJ+6BT0bkYz\n0oYdAVRnFTY8sc8io16gAc1J95euBSJImi+Y8U7ZtHPFOJ24wcn3o37uCOadyiERhmAqVU8v\nlqVRu5Ax70suW4OKQiLmRsU7cV460qrz16UsO5t3y81NriGxja3Jx7VKq+YjcUeUGIyOaVoj\nwM4yaoCNI9o9TTcNnhTUw3IxBGaepP3s4FAxqxqqb26mmfdXOO9S53HBNIsfzH0oAZxu9qXa\nHPXApRGqt1zT4iBIc9O1JjESNV6NTTlmPoKkyrtgCn+WGQ9qBbEUfDcDrSx5UEHNSR7VXBPN\nLIxYfKKBEXR9xHGKbvPepIwWba1NkhIbA5FADJFZl4OaFj52g0Kp8zAPFSbdqn+9QUBj9OKN\nu37owacvOKdtbaKdxMY0YJyTzTRhuMVKyhcknNJGu7g8HrSKG/ZRtyODUXkjdz1q4FLqcGop\n1/eAAUCRC0YZs5pMOqk5zU6qMndxn0o8sOeWwKAIY4+hJ4705gFYkDPt3pT8oPHOaRj8y4GP\negLjGdo8hhwaFUbcBuakk+bJPNRLGT7A96AIpeQCeR0pY8FcEd+tSSIV2rj3pjDa3PQ0APbI\nbC/dxTI2HOfvVIN0eARkH0pywhsHGfpSAdC3JU96RFyxyMilxt3Z4I6GhZMYPf8ASmAEtuGO\nBRNnYeaWUc8HinLlRgjdmgCAwhVDK3OKbjbyTkmppVZRjHNR7d2PXNTYCNVUKSR35NPVWk6D\nipDG3I9amVgoVB1pgRbfL4I4NKsZ5NTnbjpk+ppsi7GJzlaAK68ZBprqw6fd7U75fMP8XGfa\noXkI4J+goAcD8ozgUjLuYFflFEi7lB/SmpJk4bsaBEkajJB6Urqu/f26YqMZ5I4FLuHy560D\nG7v4Mdac2FyrDFDcMCRzSeW0iliOKQEadcY4pT8pOTUgxHH0zTWw3/16Qxi/Kme/rQMbgAMg\nikxuBAFPK/dYDgUBawqkfd6Gk2NtIA5o2gc9+tKzsvOKoBmdq8n5qk8xGXacgkUR/vG5/Cnr\nGoOCATTJIlj8tAS+T6VPGoVWI9Kc0YQFsbqZHIGU8EN6UDG+g3VKyjhie2KjWLzBvHHbFSD5\nRjrQAgQt8uOaXPBxSBnV1GOvenqvl/ePemA1XfaqkZyatBA3cBhUALCTFSRqVJDc5pDsOZcA\nsORQqCTaN2FHNPiIXIJAA7VHkhunFVcZJ5axqzH14FLH8qnK8U4N0BHam+YduB+NSDEZUkbc\newp2wtwBT/LXyyBycUkTGNVAOT3BpCI45juMbDgdKmZdoDDHTmiSNW5T71JFEzMcmmUCrubp\nU+8yRn1WoOjZJxjtU6MGUt0pCGRyBVIIpjMWUHbnnpSN81wvPyYqc4AyDmmA4bfL44z1qrGR\nG2AOamjbrk4NDRgtu5/CkA7y2Hz9frSxh2yG4NNXPlEs3PYUtuxabB9KBjWYjkjp2qNV89sj\ncOelTMxwVcd6TcYcDoKQCSOYWGAcd6mXEmGXrUe7zIzx3qSOEHad2KQD2lDNyMelOVgMhuBi\nmtIPO28AU9oxIpWmgGKvTHJ65p0ajnBxmlVSidR6U8x78HpQBEwO0mn5KqrHn0pC21ju+7Ub\nTf3vwFAySYKykn73bFQsAIxnj1pyNuY5HPvTnVZAOtFxDlk6KefepFY8io49g+XvU+BjjrRc\nWozIVcdKXaVwM5pI1/eZbmnbScnHNAxudrbRnmnfOFPFLuCjB/Ck8xup5pDEVdzKSNpp0ilT\n7UjHgEClbtlsn2pAIrBe2aB8rFjwKABt96jzu4YHFKz3EPXv3p6MducVGyj5ednNOVtucGqG\nEyiZcZ2tSqGCgdW9aX+LJFO3Fn44AoDUY+VA5zUiyDGMcUisOQRzSbewp3sAk2+RQM4FV1hY\nygDg+tWW+bgfjQ55BAOKExDVkPmFTx709o2Y7849qQOOrqMUjMT8p6GkAKx3DPHrU74MmVOe\nKb8qr+OKTaOoOaQxDkg8cU5Y1kXkcjrRz0zinqvcmq6BqR+X5Z+U8elPyykD86zNa1ltJIcw\nmRO+O1XNN1aHU4C8fJxnFICx796d5WMEnIpFHB9PWg7u3SgY5cKT/d9KVsvgD7oqJlLY/Wn7\n/lOOtAXHZLHbjihhnjPApu5mXI49aduCxcgk1YhwHm5OMDtTfmUc8VG0mwgAc1KZPlGaBocq\nhupprsAuQAKQtwcHio3jbjB6mkSWFztB6U12O7Ao52gHpTVQmQndxUlD9z0UmTRQIkX5sDqP\nWmzHYxCYp5TanynGKg2hlyetVYBYztUlhzTNmGL9QetPUjNObG0gdO9IQ3O5RjIp3mbuDzUn\nC8HkY7VEqkk9MGkMrl9rE9asxSbo1yeD1qPy13HJpqr5QyTkZphYl2jnHFRqc8YyPWlaTdhs\nfLQO65xnmgLDGctkA/KKYylunGKWRSI+OeaRSxIBHNPoIWPK8Z5NRyqd2CwqQy+WcFOe1Rsu\n5gzflSAWOTau1cE+9JJuCkfdNPMa/eHFMkU9+tACwqGX5z81PydpUdKZGu7OeOKd91evFAiN\nflUkAmjI28LkZqSMkLsA5qNcr90cUW1Ac0g3bsY7AUvzMoOR9KYUcoTjIzmmonRjwaLCGyY3\njk8c1PIwyD6imEbmK9R60u09QAewpD6DFwrFfu8ZzTk3gHPTFIyFs560sjnywO+KYgiYeWQe\nMc5obbt470q5bgjAxTJY9o4b6CmUDEKMZwaI5Xx14p/ljALU1YR5nDYWkApbcMnpRvPUfnQy\njcR1FLgjr0pbCGpIOT0xTd2zO7kNzSCQLldvWpFjbaN3NO2oITb2J5pu1SrDFSbO+O9BA8w4\n5GKbDoMjwVw3AxxRuJ5p2xd2MZpu0Fzzj2qRCrhcljSMxC8/hTmG5gW6UM23jGVpoNRoweuR\n70gzyc5FPWQuOnApGb5unWkMavzA4HFIuFyuOccU9Y9y4Xr3qM/u5Ce+MUAEaiP5m+Y0t06q\nBzQFOetDWu5SxO4UwI1+ZsqePentnjvUW7y5FUCpcnYeMGl1DoOC/wB3H0pHyq4ojba2T1p0\nnzLknihiSIDJvQoRgdal3BQvORUQIXOTnPApFzgHsDipKGySIr8CiKTGT0NLcRHduWmQ553m\nqAsSEeWM8ZHWoIgDwV3LUlycr8pFRx7sAl+aQD5lC89PSomYGPBHFJIxZ89QKlVlMfNMCKJR\ntYlcj1pd+WXLcelS7lC7PWqtxI0cgyOKQhZm+9jgUyAjPIwopZGLKGCmljZfL6ck9KY0gkG3\njpmmbRjGOalm+8AeuOtV1zu68UAWFhCn3xTbiYqwXqtSq3yFhzULOZl9KAE+YnIOBSM2ckij\nnGAKdx5fHNSAvmB4xxgYqq2QxPUVJ5h4UDgdRSyKrZ7e3pQAxZC3Xn0FMOJcg/KacqBf8aaE\nOG5/xoAXzFChT1WmbdzD0qTauwbuuKbtbaOw7UALEwjG0mnS7duQOKjbHJI5oTITrlT60ANU\n7WwehFIPlXB45o8tgwJHFWGCNjNLqAi/Mg7+tQrgMc05oyp64FRSfe46UyR80e1Vx6UyPdu2\nkcGpGy6cdaOQqnp60DRG6sjE46VAGZ+CeasySFgcHio5AI13AUgEkVmjXj61GfmXk4NSRyEj\nnvTZYSMFeaaECsNuOnrTOx96dGu5TnrSuuACOtUCCNg2VxgUOpXnGUojXd14p7qGQjnFLUoi\nILe3pSRqehOTSxsemcU1dytljxTEKWPbGKbIoK5xn6VLlW6Dmlx5Y5HFISKwJ69qQybfapm+\n6eMCoZGVscZ7UhiyMGA45NNbt2qVsAhR1NHkg7mz0osMazrt6c01GHam53EYH1p2R2GKAFZc\nrzSE7aTcfMwelODK2fahgCx7geeTTJGKKQRkin8bgQeaSZTv5P1pARbiwHvSt8rA9qk3KuQO\naReetAEbZ25B+Wm5IHPWpc7jjHFN8sD7xqkA1lJxikU7c7uak4Vck1C3zfN70bgLtK85wtJJ\nlhxxUirxhuppvC8E1IDSpVeDRkkHNPbC47imfxAk8VSGN9eKikiH0zVlcbiaR480IRUVClOb\nJHt3qZ493I49qj3fKRjFHUCKZQyjA+tRfZypBxVhlGF4p23jNJgQNDvXHFVWtzyCRV5lPNMZ\nQ3agDL/s9R0HNQNYquQRtzW15Z645qKS3MmaCbGBdaeoAG7K1VbSk2tujHIxXRyQbY9oGTUK\n2fQt+VO4zjptAR8jHPtWZdeFEmYBl/GvRntY3xharNZHfjZxTuKx5fceB4Wz8gB6bsdKwNQ8\nA+XJ8inZ/fUc17ZJpg2EgDNVJNI4C4696ak0Tynz9qHw1t2Vsxk/7WOTXKal8IIef3RELeqj\nrX0/N4dRpPmQEetULvwzC3yFQVzxxVc7DlPky8+EayR/6kj/AGh/hXL6v8HjseUiRmXp5eK+\nxLjwkp+VYwq567aybnwOszs+0IAeu3rVqbJcD4uvPhLdYEoglZMZ8xuTWNqHw2vFYeWAqnqO\ntfa03gFZEbgMpORxWHffDNbiQlokJ/2QBV+0I5D4uu/BskKmH5nfPUDB/Ksu48O3EMm1YzCx\nX+MEE19jX3wvR8s0O5h/F3NYOofCdJmEjRSEkbQG7VUahDpnyW2l3ioXETLj73HpUMlvJGoJ\njJU19P3fwfgO4iDL4xyxrn9Q+D8u0eTGu7OCGHbuQa0VQnlZ8929uksjbl284DNUc1qqsw3H\nj+HHT3r2mf4RvuLfZt49ff14rMuvhi8UZEka8n5mGSfpir50HKzydUC/fAI/vetR+RvXzBJt\n/wBnHavR5Ph7dxwFFgyGbA2rkAduayLrwLeLlFiLupwSoo5kKzON27ozjB3dARTI4xJL+9Bj\nK8jPSuhbwhfxyN5sTKi9VPWornwzeRJ5YIJbkNj9KpTQWZiNC5kbgNnnd0zTWXyyQzYc9K1J\n9Fu4/KYxO4P3cD+dRXVlMzHNu0eOMsMUcyCxS8wx7QeH/u9aJW4G44OOtWZLFg4UIT/npTVs\nZOVYljn7uOn1ouhFWOPnPT1zUscX7sheCT2FDW0u5gc8enSnfZvLZW3MXxyOgqroBiq3nICN\nhPGDU6whmwAWOcGmo+VLP1x19PxqQ3AhDMvzHtii4iKYNbgLtyM5296hWMvkrJ8q9/6VcuHW\nRjuG6THAqvLG24AjY2M/WmGpWVVViiscdf8A6woUsVCgbealmwOFORjPvmmhfKZCTk4yWpXA\nQLtYB8Y9ajm/eyDHKr17U+RlhiLMGkYnjA5FSPnGCByBilcLFfapbByO5GKbs8pWUE5J4+lT\nyR/LjBwVyKaqeey84OOfamIij8xoSQVVRxmhWYEHdz2wOtOjjZAny7tzYPPSnMwUE5xnjGKL\nDImK7y68diPeptomARfvA5ODxUMsa5CsxVcdhTo7eRCCo+QdCT1qhMdKwUgkdf0qORRJJFIp\nwOpPUVJCzK2GUAep5HtSeTJtJ4GOMGpZSFZo23Fche5x+tKjIyna3y9BuprMFXaRh8c00xkb\nHaNsdqQhXAMalX6nBo8z5kVU3A1KtuI92TgnnGOPwqNRHIoTLMQaYxrbWkxuDc8N2+lOjG3I\nB4zTkt9wK8A5/i700odpBUoM8tSJZGFKuUOSG5qby05AO1OuevNKtvJuCs2R1z04pwjK4VPm\n54b2oGJGp8vbjC579frT7kFMIXVjkHipPs3zDnGRxz1ojhIl8to/n9etAyi0SszyH7opd3XY\nuOwJrSGl7TINrAYqzb6dttyjf60jIGM09BWM61sXlY4+b5elVZY8q6EbSpxg+tba280duQIz\nvx0qSPRJnmO9eWHK46e9TdBYwDbHgHc+3qTUrQSKwDhQSOGxXTQ+E7mNhuH7pj1wc/lUx8KX\nUxKxpuUHvwT9BU8w+U5BbZ5NpCjeTjb61I1rJbzbi3A+8uOld3Y+B7iRg4UnHVdvNXIvBZtp\nixty2f4jz+lHNcmzPP49PkuFJA2qeQSORTo7V5/3c6FVHGMdfevW7XwPNI0ayREqR129q1bX\n4VvcK/lwAgdN3UVXMVY8gutDVbWOSBSZP4mzwB/jTo9EkmKsmT8ucYOT+Fe56b8MjFMBJa5h\nxyGGVz710ln8LYLXDRweWW7KMn86hzK5bnzzFo0j7GNu27+7t5rUt/Bt1LGH8rCtztx0NfQc\nXwtt4GjYh1bdnHXmulh8Bwts3R5CjBXbgVHOPlPm3Tvh/cbuIt/qOmM+9dLa/C4SKAyso6g4\nwc+le/2fgaK3TPkMefwrfg8HoyonlLnqWxS5x8p892/wpaMpI0ewng/Lxj3roNE+GIh3Zj3Z\nONzDt7V7zb+E4o12ogB78cVpf2BEqgsgB7cVPMPlPHdP+Gtui5EXzAYyRzW3Y+CTC4VoyRjG\n7HSvS7PQYlkHGW647Ve+wjkBePpU8xSRwUPgsBQTGuD/ABetaEPhSOKMEKD7AV20OmjaGP4g\n1a+yRcEilzDOPt9AETbsBTjsKvwaLG6AOuT710i6ejfMODUqWe1MnrRcdjETSVj2/TpViPT1\nHBH4VrNCWwcUn2fDbh1oHYqpZoqgY5qVbVV5GN1WFXHOKlWEbsY5pMZSEO5cEAVNHGI1JAyf\nSrP2bqCM05Ywi5A5pMCqISrZK8mnLEQ3FXVAI5AOKSONNxJPbikMgWPkmlW1yDzxVmOIbB6m\nnCPkbTQKxAIwOopscROe3pVloCxJzT1U7SMYoHYqrHxyOSaf5AZWPcVOqbW6Uvl+/OaYEMUJ\nXB9RmpWhNTR42kdT2pcNgZoEVfLI5zSrncAKtNHjtmkjjEjY6GmMquq7sEZPr2p/ljcMVMU2\nsRtp6RBfmoYyLlV4HNIIyevWpVUoeuQaft2r9aQiLZ5Y56GnbV25UU8xqQNxzjpSn73TigOh\nFt44oVdzAYxUhU5AFSJhTyMkUXERNGFzkc0qR7lOKnkJkyVpojb8KGMj4VenNJv54FPVSwJ6\ninKgVeaAIio7c0wJhsEVZWPtSLjofzpCGKoWhY9qkdTU2BgDt60jKT0NAEOzK7e9OWM9PSpB\nH5ZBPNK6t16GgZH8oXI4PemlN2NvFPVVbOeDTo1LNjtQFrjdg28HJoMe6OpdvXjFPjUrGB60\nCK/k7eQc5p235hjpUrIseRuyacNqxk45oGVmVtpxT1DDkinK3ykkcU5W5IJphYaAQpY8Um35\nSQfmp/mGRgu3ipNqbsj6UhjPLBAJ5NKvXkZFOUEZGKQdCM80DBo8ng4oUN5RPVjQtOSNs9ci\ngQ1Fwpz1pvXIAqTySM8VIqhU460AQKduATTg25TxSSLyDjFSliy9qAK+7naKni3bcEZoWHdz\ngg1KPlyKCRFTcx7UHO3HQ0Bju6U4gyDg4oAjClmpV3d+akWFlHWnZ280DE8x5BjkClLbvlXr\nUi8DpwaYD8wwvFIBNrYwORT0JKlelP8AKfbkcChYyG5NMBqxEL1zT1hH408R45zxTiqtzmmJ\nEJ7DpT0hMjEY4pzRI/IwKfj938nHrSATydgB6ikMg3A44qVW3rUDRtnnpSAkZty57ZzQrKzE\nDjNIp244oVRu9M0AKsI3GpY408sjb8wpVxtwBk0cMpxw1A9RJPlWmR52kAfnT17nrTYz+89A\nKLiDZ5efSpEAUgYpGyB0zmnrGVYZNJsBRy5xxSbev50hk+bKipdvHuQelUgIdpZvlG7uaeQO\nCRg05ITHtIPLdRTm4YgijUoaVAH9KjdN1TBRnJzTs7sjH40gK4Ty8dxUiZZdy8E0Mvy4Bpyb\nsYxge1FhMP7tPPzHJ6U7yw3HQ03Hy5zxTEhGZV5601l8x+eFxSnBbbUqj5MscGpGQJGSp9uh\npV3qvseKWFixbNNMh+6aAHHBx/ep8eCMEYIpqrukzUywufmIAX9aAGdV6Y5p46ZBxTFQq3zH\nipEX2xmgQxsKuOppAu3HenNHt6c01cnIzigB5zxTWX72fwpWyB1pm8jgd6CkMWP5hg59TUmc\nHGKRcgY704/Iw4piF8stkr2pIVfB3HipPurkN+FJuLKBmgNSL+Jtwx6UISByMn0pxVSNp6rz\nTlyTk+lSPoNRywPG01IGHlnIyaRWzkdTQx+XpTBDWX5eADTWZhyBR5auynPFPZcZwfagRFy3\nuTSKS3B4FSuA0YGaRYwfujFAxqlCxA5NDE7htGVp25Vbkc0jSHbhcUuoAW2qSRnFRghuq09o\ncc5yKcI2HvTAYvPrmpUdYztWhOOnUUiY3knikA75Wzu71CwHOBxVgLUWxV5wSaXoAyMHZyOD\nQB82M8dqFwMpz81SRRIoA71QAyuy5LcDrTANpyKl45GQab5QHQGkA3e3l8cEmlQHoevrRuDY\nBHNNbhhk0gFZiTjNG7aBu6UKoYHPpxTJSWwOoxTAFXPTpnNQ3ADNnbz6jpUuPm69RUW3zGOD\ntUUAObBQAnmo/wCPOOO9TRBG4PJpSg9KQDFiWTLbsU1V3PnPAqTcE+Ucg0xh0GcUxgzOV2gc\n5zRIWBBHAI5WnN6hsGlXM3PQ4qQGqAq4Ayaj3KzEkcVNITHtxxmhlEqqCMUAVmYRMODz0FCy\nFsg5UZ6VLOo4U8kdKRl3RkAc0wsKyncDwRQykDIPFEcbLjJzSyMNy54FUAbS2CDigjbIaTzA\nkgBHy+tLuBbpmgRIrbVJPApwVWG48Cmow28rke9KvzsBjC0ASNtXA28+tMkI2nnmpDtbPcrU\nbRAfMOvcUAgL71GeMUsTBj8wqM9MH/JpybpAMDkcGgCXy/mzuzzUlx+6VMHPNV0y0vORgdhU\n+wlefmqQIVYCVtw5qymGTaw+lR7VkYDo1TKu3745z2pjI8/NjqfagkrICVI9qkRfmLgZOe1L\nI7DmmAsUoUZA+tPZV8wMActUG4uuQOO9SwOc9aBCzK0a8cDtSqxXBAycc0u5ZAf4iKkjbzF/\nuCkUNXazZYUitndtHFSDBUgjj1qPeFQFjtUcZoGNVstx0qUj5Ce9QxXUBkIHU1KzFV4FAhkS\ntu3VOuDwTVePLPz09KsJJtB4xTAaWVmBA3KOM09olZgVODTTGGUhOO+KSFXP0qRi+WASz9Kh\na4VWTchK561ZIdm6flTmjWRVzzQBGq7txBwPSnKNqrwSvc0/aF4pwY7Su3iiwDZIQcMR8w6V\nJCQO+DRwy5x0okRZecY96AHeSTnnNMkQ9Ac4pYWYHn8KlwA2emetADNob5WPaoI49rndz6VK\ncNIR0FKq+YuM4osUxkYVwTznNPmUMoxxmlx5bDH4inKAy/MeB6UxEPllJOBn3p+7bz3p+QFz\nmj5ce9SAjfvOnWl+fbyce1N2F/unafWmQ+arkN8w9TQImyp+9xTtyvgr0FI0aydT0pY8KpwO\nKBiAncTwRTVKKcAc96cF2jP8NKzBVzgDNAagqkc8YpjNtPIytKSGU7TRtbYue5oEJIRuC/eN\nNMbrkHAz0+tKYwr5OcinFQTnkg+/NAxI2bHODgc0LLtXnr2pPLO0kn8KI13HGKQyQN/F2pfl\nB3CgHauKYGLcY4pBqSLg8gfnSLnzMc4NQrKWbAB4qX/WLvzxTFYU4GQ3rSt8pBGDSfLnBPWj\nbtIzyBTEOVctnOR1NIrBckChlBbI/KmDarEUx9B3Yk805lKqCD+dCMF6DPrT/lbp0pXAZNHH\ncQ4dQcjHNR2On29nkwLtyMGp9oXA6inmQdcYHagYxvmXHbOcU3zWXPFOwOc5z2qGTK4Pc0mM\nljcycnipY8EE45FRI2Y8Y5FOXLcjOKYiRGypXGKGG3+LihcKvXP1o+UD1oERr80nSpZMLg5y\nPSk8vYTtOeKI0KryRQAgkB52YpzNlgcAAUzbliKNjKM/w5oAeH3Lk+tKCq9OfpTNu5yOg9RS\nyY4CcUAP3f7NFM84+lFLURMrYXkZGKTyV2byce1DMPL4602P5mKseO1MAwB8uMGmqw+Ye9Pb\npg9u9M+4p+WgY9VO07e/c00sFJHekDDaF7UeWisDk80AJtGPmFKNhyD0o3BsgnI7VGW2t93I\npgPVgg54WmsRkg9aAwZvmHFCxkLn3oERuvlryc+gpYQPvN9Ke/7wgdGpFI8vkck0h9Blw4Zt\noX5scUxYyFBY/P6VKq7tys3NV2JLDDZ7UCJWJVOmRTRJ/E34UqSbWK9RTWU+Z7dqAHqC2QO9\nP8tdvLdO1M+7jbyaFHPJwPWgBJMbeuD61DGzdmJqWaRRkn5j2pizYK5TiqEPdmVcAUituUlh\nxT55BtBU55puzPOePSkIOWTdGcNSwsWXLHmm7COeBTOrbgcCgY84U+uaJMKuRzQv7wHim7VJ\nGaQMfuZmBxgYpjZZdxHGaZMG3cfhTwWTGeR6UwHs27HPy0BQy+npSrMpyAuacudpI6UCGlOw\n5pjMduOlKHZW3E5yKGPmY47UtygKgoDnOfTtSNuJA/KmruUbBjB7mpGVg3I4xTuAbdvfmkYh\nW+7n3pGPHH4U2GX90SemcGmAKxaQlTxR/ESaJsJgAbWbtR7Y5qRWBWMh5qRl2qcUwMFUseO1\nNBbzPUEZoGOyF4WkClG9aZuKnpih2J5AyaaEyYY6g4OKhcYBPWmtKzODjGBTZJPlO3kntSEP\n3eZ8w4oDssJA65/OkhwFA61JwMjqPSgYjZ4YYHrSsy4yOfWmk4wM5HpSLIvCgcmkIVsbd460\n1W8xcNwKXd1GOKRmAwMZo1GhsygR4HBpiFtijsKmdhtGVy1R5Y4wtCQkSMN2ACaakYEmDzQZ\nCq8il3Kq8nDU2MgmjHPNPjX92CAOlJIBIMDp60kKNtxnNCAY5CL93qaNyllUjk0s0exck9aZ\nADuyRnHemMmuY/JVSoqAI0jEkc1PJcDZhhSKRtBzUgRSMyxhQOc0sPyrtZfnzUqt97pikZir\nbycijoIWSNW4Y1Q2/vuelW2kEi8daqKwaQ5OOaBltUMce3FQuvp1zVvG1FJyRVZk/ebgeOtI\nBnIXB60/d8npTJWCHkZqD5t3BpATY+U89eopsLeZIVI4xUqkNHz970psIKuTjqKYEcybSQKQ\ncjFTtx82M8VXMnzHj5e/tQA1/m+opG3cE06OdBkEZ9DSs2V5GM0WF1GsAydeaTcFwuM0Lg4y\nKcW+bpTGMMhZwT06UNIOh4peDx1psmHbGORSESSfOoqNcrztytP4aMKvXNId3POKQCA7j/dF\nJuHSkdgox1NMRPm35x7UwH7QemaGVTHjmljbJxikk+Vvl5zQBDu29sU5pDtBHHNI7bX+YU8o\nJI/mGDVBYIsYbNIzI4JHFKqkAbeBSNJtzxmkMijywJJwKnC/u6h42ZAp0cwHelcCNVw3Ipx+\nWn8MDzSMcYx83FPzEMVvlOeopZGMkJ9aQuNwyMDpS7SufSmBDFllAJzTvL+b5eg9ad5gjXj8\naANoyO9IZFu2np+NK0bFdwP1om9jxUomO0AjIpgRBQvNKvXpxRIyM3ApOdpPegBsnLE4xSRq\nQwAPWjaWxk0187s5qWBI8ZXuCfaod24gd6mVg3C8nvTFjGTnrSAaflzjvTlwo+9zTmUMABxi\nkAxyRgUAG7HBApJfmXIHzUTfezjinccHPFUBWYeZxmmLG6KQee9WGUbjjrQwCsDn2NIAVsFS\netJLGGJb+VKgDZHU07csanPWnYZHGwK4PTvTXQEdMjrT0IbOeBSeZz/KgRGqhgSAQfSl28gZ\nzUschIIPWmMuc5ODRcBFwynJ+ameX+NPUqykY5HegKeMGkIjZRwKjZWX6VOyjpik2dQKBkO7\n5enFLtDLmhoz68UiqV5I4oAbznAopw5bI4py/MQCvWkBE0ahs9aY8XUjkVb8vLACmlcNt7UW\nApKrdxj0pyhu+KsSLngU2SHzBwcGgZDtA/hzVdYSFbndzV0/Lz1I4pjRjZkfKTTEUPL+XAOG\nqKSzEpBbgirjW25eDzTWU4xjkUDKJ08FSV5I5qrNp+5c7etayZX8akNvlRg49qoVtTnptNjx\nnb81U5NJjZXLIAe2RXTtagsSTgVA0KsSpHtRcVjkP7FjYY8sD6Cqd34bReQmfrXbNpy7cgfS\nnS2YFuoxk+9TcZ5yvheNid0QKnuRmqw8Gx4ZSm5M56V6EunblIxUn9nFVA9eMVSZNjyi98C2\n9xwistZM3gGJty+UoHqwr2eTTVLcpjBwarnR4riQhxgVVxcp4fcfDWKRSEUK3+zwDWDcfCmJ\nR+8Rhznivo99Bh2gYA29Ko3GgoWyyqapSFynzZefCeOY8w4P97HNYt58I4I5H2xNJwfm24Ar\n6lm0NXTATj1rPk8LRysTIn1ajmDkPlW4+ELtgxrx1KjrWZd/CfcmJIyr5yONxOPavrN/CsIz\nhAy+uKgk8L2vaFAMddvNV7Qj2Z8f3HwekY70iliPX/V5/HFUbz4RzMFVELuxznbj86+xh4Nt\n2U5XGewqo3gWNZG/d5U8ZxR7QPZ3PjdPhZJ57RiAjsR7+1Z918K7tZmQxgxKCc45+lfZsnw7\nRGOzb65xVdvAEUilWjVh6Ecn3zR7Qn2R8Ut8NbmDP7ltpXgdSPwqk3w9uI0JEbMnXeUI/DFf\nasvw8gbP7lRxgjH9azrr4Xo67ViG0/jWiq6B7M+NJPAd3HMZHhZVZcgHmoJPAlx5YJZoz/tj\npX2Q/wAM4lwBAHA6ZFQXHwxSZhvtlZRzhxT9oHIfG6+CZ9rKRscdHxw30qKTwfeRqRJEzEDI\nBFfXzfC2PzCyxqV9Nnf2qr/wq/hlEe09+KftCeQ+R4/Csrpu+ZsDPQ03/hGZ1V96M7D7qY5r\n6tl+Fav9+NgOnC1EPhFErBngbk8OOD+VJ1A5WfK8uhzLtGx41PUbDj86UeHp2bCplCP4a+n2\n+FpYMqKdgOMMM1A3wlaQYKxhR0AGDT9oLkZ8zSeHZpMt5YX1Uc01fC0+RGF3YHC45/lX0u3w\nojkbHlMF6NtGKkf4Vr1Xd5SjAYDBo9oPkPmeDw/KRLHypU4JZefpikXw/NtEIjZQpyF25P5V\n9OP8MYGAxExZuWdhSD4SpFJ5rjdkcYHNV7UOQ+Z28N3DOQbdmUDJxximXHheVHXMTySAdua+\nmn+FqqgQRHYx5XHJoX4RhceUuzHXjPFL2gch8w/8I/NI/wA1qRIeN5FXYtFnfMc0O4hcZQd6\n+lo/hY5ZgIwy9RuFOT4YhVyYkQ564p+0J5WfNNv4dbz0EkZYkYwR0qwvhAvI48vAHXjFfS8f\nwtWSPlVQ55YLk1OvwlidgVTd6til7QfKfMkng+SWQeVCVG3oRx+dC+A5/wDWA7gvzFdpr6lg\n+E6LwIjKAfmYnBB9qsR/DpGYJ5J64OU7U/aIOQ+V/wDhDZbrClDtYg7R1FXV+Hs3mYRNyEYy\nwx+FfVMPw3hbIaJeOAyrVuP4c20a48hXK8nANL2jHyHyjb/DWRvm2tGFODGwz+tXU+G8q5CR\nEyZ4+WvqiLwHAcolqp/iqz/wg+1lGwKOuMVPtWPkPlyH4a3A2mWNhx0Izj3rasvhmZyu5VYd\nztxmvpeHwPEVUsm0jksR+lXI/BkCpjbkH2xxUe0HyHzh/wAKx+Rl2RgNwRjJq8nwoikyfKdW\nHFfRcfhWH+CFdwGM1LD4ZUEbQCueRS52PkPnqD4ZNvj/AHbiMf31OPzrcj+GouBgJtx/cX+t\ne6f8I7tO3yjsz6Zq2vh+CMK2zBo5g5TxK1+G/kRonlZAOfm7mtKHwDEzFXt9jf3tvBr2OPSV\nkQfKBz6VOdHHTblfenzBY8it/AK20YKqSmeh7Vu2ng+JVBCYJ9K9Dj0kRowCZHTJp8el+SVy\nuaVx8pxX/CHiFgQNy/Srlv4ZjU/MvHpiu0W0cLtI71MtihxuHzH0pXK5Tlk8OQKudgz9Kng0\ntVyCmSOmRxXTNpe3Jz+FO+x/N0oCxzSacJG27KvxaaByBgAdK2WsVjUNjn2qRoflHpQFjEj0\n/d81WUswy4YbmrUW2+XI+57dakWHb823mkPzRmw6d5bZxg1YW3T+IA+1W/LMkg5p3kjBB60C\nRT+y8cKCPSmNag4+XFagjHtTTFtGSAKYWKYtyoGMYp7R9ARzVpYwvvSrg/e4NAaFb7P1JNNW\n343Grvkjk5pDGAvHNFx2KJg+bPapFjDLnHNWlUHgUrL+dAyrGpU8jAp6xB/lHWp44ief50+O\nMBsYwfWkSyt9n2tg8ChYg2QBirxUMmTUSxKp4PNBSGLFuxjBPpTvLCnIXinqu1xjrSS5J4Bo\nAbwi8rRuB60/BPykUu1fTmgkiCHOTwKeuOSBzUrL82R+VCx4bAoKItvQmndGJBzUhXpnGKRl\nUcjpQAzaT3pVUBs9D609SD2pWUdO5pi2G7Qyn1pGjC4A5HeplVQwFHljJJ6dqLiVyARhm68V\nHJ8uB94VZjXLEDkUnlnPOAKRRErK2OKcvytzzTkjHRhgVI0Y60hEbAsuQORT49oUlqVmC5+m\nKY0e0DHIoEKB5YJByD0pm5mJXNS7l9OgoaPjcBzigoY3yrjofalLBlC45prKWxxyakK7QM1Q\nAI+x6YpFUbcHmnKwZsdqNy56c0hNAy/LgCljQ04Ordqbu2g8ZoGg6YJ5oAOcGkXlcUvKdetA\nhu0Fj6ilVg3QU5VX13H1oGN2B3oKBz8vGCaUk7QT0pRGAx5yKTjgGghjW2ZHINJjJ9qftXkn\nmm7Q3I4oKQvGMBaRYsITjJqVAPWms2OAaBdSNY8MB09xUnDYzwBTlHoOKXaNvTmgYzy9zg5O\nKVlKycdB60/nbkYzSEnaBzmgXUEB2kEZPWn5O0npTGnELYxziqMOqlpWTbtGetAzRO4jcakj\nUjg9TTQVVB3J/Kl2lupxQA513ADAyKUQrgMeKcuGbrzTmYMBntQAgwp+UZFI3APHJpysdudv\nHWozhmznBoJDyyq5J5oRe5/Cl3Bm5pzLjocClqMgldgpp8a7gM80bNxxu4HWpI4zH3znmmMn\nijO1stgVGqhckHinsRnFIwx24pCFjJZeDTYmbcQVyPWnKoIwpqSNcLg8GmIb35pdvB96dIS3\nGOPWhVO3mgA2jyhxn6UKNuATxSQseTj5akWMMpxzn1oHYIWC5B4o5ORSrGF+9yKdu2vyuBji\ngYsUeR83SmvCC5bP0FCsWU5yOaWYMqgAZA70WESRqVj44qP/AFm7sanjwVy3NM2BGLAfhSsM\nbGpTilWP5ueBTVYyNgVOY2ZBTshdRREQ4PGKaTu4xzTmU7sA0iY3dOaLDD7P6GnpnOR9KdvK\nJx8x9KApYkj/APVQA6TcrfMOKjMoZc4yaczM/X6UwbV7c9KQCru+8RgelOjZSxyKVSN3Wmyf\n7JxQMcu3vxR5bOpwcDNOX5V+cc0Kc57CmGg2XC4K5+tATfHgYz6U5nVkwRxSeUAcqeaQtiHy\nzuwPvU8DcpLU7aQxNNYnaeKQDPmUjHAqTYrH+dCoduD1pwIGARz60CQ+OHBJyeBxikaNzFu3\nYpykc4NJIvy8nFAxrncgGeacjDaSew4oVQMntSBQyjJxz2oFYjaQ5BqVWDZ2jmhk+U4HFIOg\nIXkUCGnAyM5NIylQMjpS/d96Tc3lk469aABtp2kikboT3oY5j4pw6DfTKFjUMBnIoVdvJpNx\nHymlPOVPXtSAGwPmxTFkZjjGRT+TDjNNx5fGetA2SQsGBXGD61JNtWMY5qvzuyeDTzncM9KC\nUH/LPhec9fSmtnfkAkY5p6N8xIbgdqTllPOPagoj+8owOKeF8sgZyKRfkXbjigL0JPemIk2q\nxPc0xY1WQ8VJHlmJ29aaqyNuLLjnFIBf4QMZFKr7cgA0kjbCOOaOWycflSEG35dzDFRNEUww\nxmnvG20hvyoVU460hiYJZQG5Pancgnjg0mfmLdfSnbmHBHWmBCibpM1MG2gkDmmEdx1pzMRg\njnFADthHOM0xpGUjH3e9OExIB6DvSFvMGQPagQm1WBcdqifMmCBUgX5sDg03lWznBzyaBinf\nwFwT3pnkuGO7k1ZiYKfUdc0rMNxbPNICq5HLEEYpq4ePP3c1P5XmZB5FMWFQxU9KYEEaGOTr\nuFSLHuJzxRHAVfcOnpU0nyAsOmKXUCvs2qTmmeYGwCM+9MunZsDpShW2qVHFFxj2twxJU/nR\nHGytzwKDwozk5pFyfXFMQ8sG6jJ7U3kyD0FBVlbORSZ3dTSGSuqnBPWk2hfmVty03eXU44NE\ncTPAYxwM80CBsbs5x7Unys2T8yj1pyxmM4K5GKaELZAbHtQA0ksuSBipI12x5PU0Oo4G7bUh\nwrByeOmKYEaBsc9qk2luQfzpV2tk54HekWTnnp6UgCICNs5qaRQy7ugqD72T0GelSN8i4HIp\noERumxxzn0qYt5akjv3qGRlbvT4fu7c7hTAbG3Zshs9qs+YyxlCME0yC3Jk8zI2+lSyL/FkG\nkBAvy8DrU8TFmw5NNj+XJIyaHwxyTjPpRsMWNtzZU455p2SxPHA5pn3enAqNm+ZcAk0x2JYZ\nfLU7+hOakjZeXGMVGSJflx+FOjjVZAucD0oGkiYhVUMgwaGcdDUqL1B4HpULRj15oATLBeuR\nVW8kWSPy26E9quL8qHNUZrcmZSo3KOT6VIEtrYrtDmrq4z6+gpNwVVZe/ahTuY7SAPSkIXZ/\ne+VqmaP5Rk5B6e9JDII1+cZpwmRQRjg0wIHJRzzg075mjxnBoMY25J5NO8k7cg5xUjGwuytj\noehNTNGeoOBTFXacmpo1w+WPHWqQCLGARu6in98hqSRlznNKsYaPC96ARHHvXJbGCacWDIRn\nHNIoEf3jmnMob7opjsNdSuGH5ColuH80jGR61PsfK+lPZQDnHFICDyWPO4Z61MuM4xxio0V1\nk5PyipTgLuHJzTAjLKzMc44pqsQmFNSHap3ADFOChRk9fekw1G7jIo46daGlCjOAR0o/gyp4\nNKV3ICR+VT1ELaSB1fcMDtTtwGcjioY4287jofyqzsBJFMBqqe9GOSOi0rMVXpmk3huo4oGh\ndw4HX2prfvm+ZcAdKcv3iQAOKFJZcH86bGhiKQuM8ZqQ9ueKRlG3J4pG+YYJ/KoJEXuTzSDH\nGetNBIOKl8tWanuMYqsWJbpTtrZG38qFHUClVmYE5oEM3ZbaTg09WDYGOPWhVBkHAA70/wCV\nWxjI9BQUgChTjt60yPK7lAyKcW65GB6UO2FDAUyRir5hztxTgctikz3HGaUfLx3oHYew+U44\nI9KjVVb5zzSsxVTnilGNmQKB9BMARZHWnKzbRgVGqlm64AqRcjnPFIkPmK5PSpVkDDp0qLcW\n+XNLnB6UykDNzUMjNxmpguF9aiuAxwAOtAyWNSI855NOX5WHpTE+6CKlzu4oEO+83TikXPPF\nNT5s5zin7TnIOPWgQnO3+dIMM/B4p2RnGeaTy1WgBeNxxzT45BtKkcGm7x6UgYhcnr6UeghB\nuTpzSKWb745p+3coycGlb5eSOtSMXn0opvnmincLEqqGOPu0iqsZYnmhscEmlUhVJJwtMQmf\nlywpGwx2njI4NKzAgsoyuKi+bcFcdaBgx2beQaSTk5XvxT1t9+QRz61HtK7k7ChghFYbcEYI\nPOKJGDewpGHHy9O9J5e3OaBEilWwoHNGT93vTMhsMoapFXcpJPNAEW47t2MimsSuXHPtSbmX\njtmnblUjjigCLnhx361MI1xnHvTVxgnt6U2OYopDDPNAC8KelOXDE+tMkyzg5GKXcFYucYx0\noAIyGY84PSh1+bH8NLbsu0k8e1CMzZLD5e1VsIqspZuBjFN8w9N3OamkXzGPYD0pqx9Qw60D\nHsoGRuwoPWnLIRHtA4/vetKqjaeOc037h5HFIAZvNPyD6mm/hx3qR5CqZUYpN25emD3pgJG5\nZiBxxSsGhAOM5pV2q2QetEjN0qSWMZWDZx2qQ/dGetNjVguWzUcjHIx0oQD44zkleQakRj5Z\nQ8U0AooGcD2qKT5mAzVDJF544xQvBIPAp8YjjRhnBxTPLLFiDlRxSGBizjmhTtY5ftSxthSO\ncUi7XPzcAUWJFaQKpVec1Eu1cnbg0846ZB5pqqFbLNkdqBjBiQggVL5ZHBPvmlCFRgDAPPFM\naTZkHmgYrMJPlyKGURr6mq8a/NkH5VOasRNuUluOaLAN8wM33cetMZ1VchiCKlZUxx0pix7g\nTjpSAYqiT+Lr61JHGFz61HI2zrSxn5lJbjNADlxGCMUbtrD19qHwNxByO1RqpkbAPIFAD1+Z\nuRg0xWBYnpzSq3mHHQigR7mOTigVhVDNkgZFIx29uaRmaP5VbinMuFDFt3tQAyRTy5OPanRf\nN9489qczB2Gaj2iNSc5PvS6iGyMPMwtI33csaFZTn0qKVxIpA60xj4iehNPVQuWzgU4qhRSO\nuKiVmWQrgEYoAewLcZyKMmNCMVHJuXgU/c+0A0ANljLYpGj+YDoKexK8d6jJPQ5JpDFZgrEY\n+X1pjASMBu+WnNuKD5cimx/uzjHDUgFkjCqdjZ4qrHCOGPODk1ZkdgcldopYCPmLjg09AHed\nu4Oah+8wVTkelOkAC+v0qGMFTk9fakImlVUU7uOOKrqAy571LchVAOc1HvA4PHfigYuAWzjn\nFTqyx2+7GWJxUKruU4NLuwoBXHvQBMvzRnPBqpMo28H5qnWQNCc9c9arlAzseooAi8sY3AYp\n0fzZ3crUi7Uj/Gm4CcE9aQA37peaaJQwyKc7biMjIqFDmQZ+UUATlfmBxjimMqtz3qWRSVzn\n6VDtPJIpi6Dd20jApzSCRSQcU5sqnTPrTdqry3ApCuRjpnGaGk3fL0PWpY2G4cZGe1MmUO7E\nfLQUNBLYxkU9SHycU0t8mCcChdy1Quo3ycvv7d6k3fN+mKUNt5x+FRqccnrTGwbEZIJ470sc\naNnc2B+tNZWaMttzzzTWBOCOKliQ8wrt61Ht2seM1K+QozyaWOMsu4GgARR5eT0qHgk8YFNk\nWQdKVflxkcUARSDc3oKkZtqjByakaMNkjpUax7qoAbD4A60kylfYVJGoU9KbMpbGW4qQIl2h\nBzk1KvTLDioOAc1L9/aOQKAECDk0fdxnrQcL0NIrK7nPAx1oBCthuSOKRVG3I/GlVlKnJ70C\nQbiAM0hjFwORxTJI8DI61My7RUYYNmgCMbk78Gn7tqc9PSnLjoeabt69xTsAoXdHkVGG24BF\nOVtrUS/MDTAFx07mmyRbVAPXNOjYHG4496c3zPk/gaAIVxGxI607IkB45qRlTacn5qiC4yRT\nAayhcc0/5XTAH40uzEf1prLtwAaAE8s9jSfxcjin/d6daarjByKkBq/KxwPl7UkilfbNOVjJ\ngDilkODsJyKAItxGD1py+tSDG3HQUza24EdKEArRhhx1qNsN7CpeWJXGG9qayhsY6CkBXZfQ\nU9WPGefSlLEqRjiljULjvQA7B4xxRt3A560jNzjGKf6ECgCNkO3I60xFPQjFTN/s0jNuX3oA\nhwDnIwRUbKT1796m4deeD0pqqdvHPamAxF29s1EY93QVb2U0qVHt7UFlV4cKD3pNhB9qtMrF\nsY4qPaU9SKCSAqOBnNRyW4Y5FW/LZlyMU3ay8Ec0gKixleOoNK0R256jpVkx8Zzimbfl20AV\n/LGMio2jKnJ6elXFj2rxyajkU+nSmIh2naOMexqB4VViw4BqW6ZggYdc06LEwyelO4ECR7iR\n1FRSW69MVoNCFQMOuajaLIzmkBnm2DLkYx6UjWm6M7RV5bf8KeYdrcHIpgZkOnqsXI+Y1HJp\nKSLlevetYxg9sUjRqvIGDSAyW02NkHG0+1QtYKqkHk1stHuUE8Gn/ZkOM+lAGFHpasMfw02T\nRwVOa3DbpjaOlBj3kDoKeoHOHSwMDH14praLGysSuCT6V0L2mGOGzTY4tzfMPamBzP8AYUAb\nhSaZNoAVt23j0rp2tlVeF+ahoz27jmgDkYfDsZZlKhVbsKgk8MoXKYCnP3jzXX+TtPK8+tOa\nBCc4yfWi5NjjP+EbTkbAwHcCmr4ZjVtzKMfSuzjtzuPHFMmsQW4PB60+YLHHyeG7VYyUQbj1\n4qo/hhXzuUY7cV3otkVdu0VE9mNx4JFPmEziR4SjVRmIEfSlbwpFJltgG3+EDFd1HZDy8Pnm\nkNmuSFH40rjscOvhGFgCygfWo5PDMByDGpwa7xrMqOee1V20394GXkUXDlRxn/CMQrgKoLet\nPXw0h6RqOea7NNPVeWwDUbWRVsdhzQHKji28MhWICLj2p/8AwiiJHkoGXOcV2X2HcgO3BNCw\nEfK3IpC5Tkv+EVgb59hHFN/4RvyOVGf96uzjs85B6U6S13Y+XpQOxyEfhlJlLYwuOfrSt4fC\nxbVGPpXWx2QjUjsaa9rnhVzTuFjlY9BaNRlPwqRdIRGzjmunWM8qRQ1mnmAgZ4p62DlOej0m\nMfNsqxDo0bqC6qFFbXklVJAHvSmAMoJGKkVjBfRRIwUAbM1Yh0eINtYAZraihXJ4/OhbUCTJ\nPFMDN/sWJeVApG0sdBGBWvGG544o2/NzTYGYtiiLhhTG01NpUnk9K1/LHUCkaHoTQykZUdis\nKgYyPWnrajGWHf8ACtJYxJgMcCiZQ3Qce1BLWpQ+y4+lO+zrwRzjtVwRblxjBNKbcR8Ec0xF\nLyvbmnW9uBJkjirSJ82cZAp3lhsnOKBkHl9cjikEJXntU7R8EA5pzYbA6etMZXjhDPz0o8gf\nMeo6VZ8raxPbrUkcYaMnFMRV8kBeOfakeMsnXHtVtV8ts9KYbfdlhQBWRNp6U+NV/i708Rlc\nAjNG3b9Km5SGtb9FB5600ws3erHG0Z4NP2gjgVRLKwj4460jwt1ABq15ewgnmnNGM4JwKARW\nWI7fmHFEce7ParDKF6HIpm3kUdQK4j2nrg0+SMrg5zU00Yb7vPrTeG5A/A0AMRDtyTT1A3DN\nP8s7cnpSFfQY+tACybedvSo/l3dOakX7uO9IytuwKkQmwZB6+tPVQzYxTVyrbWo5boMGmGoO\nu1iMc08RjaABzSLkDn5jUmSigEUAR+XjJzzQoPXjNSs3mAKvGKjIJbPSmCIjlgd1BXcuM81O\nYty9aYYyzcAUCFXCoBjJoA28kc9qPukDvTsHdzSAiXO45GKlTA4JznpTTljk0/5RzRYYipt7\n9ads5xnjrTVZV5PTNPwC3PpTEGBxkU0j16U7aOmcULjdgnIpFbIGjHQ896bkZxtxT9m5s9KX\nYp75NBJCyjqKRmZirAYxUu0L90cn1oVW39OKYxm3cc96U4bIYYqTnk7cUMrOAemKQiID2xRG\nnzZPSpGQtnLcUxo2UcH5fSmA3G3gD8acVwAo6d6Xbgbu1OGGyTxQMjLY+UDFNwGJapjjbgnm\nmADLelAwVVbG3j2p/wB1sADPrSR+ven7SeSM0AIFI4xmk2fN9aVS/fihl3DJbFAhjYXjGaVs\nMgxT+AOmTUYXHagY4N1GMUABsUqDd1FLgKwB6UEgMquAtKrbV9TUq470gUtnjigCPbuBOeKV\nVO0HtT0hx06d6ey5GOg9KBlK6h8yQHPakh0qP72Mmr/lhAKcqnrnA9KBkCwhV605vujPWnFQ\nxx3pWiK5NIBoXbz609l3LgdaXZuAJpqsdxyfyo1AkyfLAzz3qNgNwJHNP54OOKewBXgUCGMA\nvPan/I0XXJpEXs3IpVxGuNooAjjUA5K8VKjLvHtTfL3E4PFEWN2COaBDxnccjApVPmDFP6/L\nSn5OMc0AN2eWM4oV254zTzuIwRUi42DGAaBkK5HHapFHXPSk+7knmlDbkGRg0xi5GMDgUu7a\nvTFQ7sNgip8jy/m60AOyFXnk0ySQv2pOdg9amjTd1NAEUOWf5uBTvMMnC8LUnG3DdaYTuGBS\nAcjDhW/GrG1WXCniqyKf4ue2Kn8sKpweadxalfy28zaDj6Vc3bQB3qCBSGYnk1OuOhFAwKjP\nvURXbjjqalO1myp/OiR+hIyaXUBCoVgw60+NmVjmkHOTjIqQLtjBPNADFwzZP4UeWnmLnn1p\nWXy2G7ihsOxxxS6gEi7WPAFR7s/KBzUh6gN8xFJ8u7IOKdgHZPQ84ppVmPBBHpTY3PzD3qQr\nj60wG/w8ghaGG3joadtOOtO3ADJGR61ImMPMHH3u9NDcBh+tKp3NnP4UexXFAxWVsbu1NajL\nKuO3ambjI30oESr8v0pzdscg1Gjj7pOM1MvTkY9KVhETtg/N0o87nhccU6SRW470rAMBjg4p\njIfMl570+PeRknB9Kdg7cClb/wAexQBEc7sY5p+5ecn8Kh84Z547c09VAbJ+akwF8slDxilj\nyyjIqdMHBamyOp4XNMBqrukFMIVVJOc54pMsrDIqUKN2TytDERqwIo+8cEYNS+WjNwMUu75i\nAOaChgZRzmnbgeSc+lRzSfOFA4p0ikt8vpQBGFEbEEYJ5pwxnkYpI2+b5/zqRmG0DqaAGFD6\n01423ccD0qQtkccGkY7QM8c0XAfG351IrNtJNRcgbhT2kx8pGTigBnzN8xHFPThiQcUiOduD\n0pzKHUEdaQETttB3cmolG3nk5qxNGGGc01VKk9DSAYpVjtApJGMbZzyaQZ8wgDFK67WGeRTA\nd95RzgmmbhCvNORWdWyMCoZFJ75AouBIuWXpilXLMeMAU1ZBtHNS/exg8UgI2Y7zTggYYbii\nNW+fpimtjb1wad9AHr8gx0pjSZ+UDn1p23MYycmjjacdRSAj+6wanNId3PApqyDzNx5HpTmU\nSdWxTAkEYCBs1XumEcfGc1JtZf4vlFJt84FicgUDK7QmRQzdcVLGjRx88+1IrblwR06UF3zy\naTAXb0J79qdt+UgcNT+PlzTJNrbi3HpiqAYq7ThhxjNJtDAEAilUF4SPyPel3ZKoeOKBCFFB\nXGeetOXELEbvpTkILgEYxTpIwegz70hkMzGRhtbBqONWEwLL1NIzHfsxg+tTec42qBk0CAqu\nXA69cU1UzHlietPTLNkj5u9PYbfvYxUgIEXydp+tOXYynIyRTFwyZHJ96csZGTnt0oATyhty\nG6mnspI+UZFMjjxDuZsEnAFTBTtUE4Ip3Arsu1QoCtzzU0Y8tcbahZF8wMo4zU3nAMKBk3/L\nPApiq3IHIqVdrLlelRCQByq5+tUIFkDJgnk9qlhUdDjPvUKwkuW608MfulcNUspBOg+Ur37U\n3yWaMc4PWpQoVl+YZXrTI2ZmLfwU0DJo5dqqNo6c05lXarZqEFlbhcrmpWT5cDgd80xk+/K5\nP5moC7FgwWlhQFTnkCnL8hwRgUhMcyllHPemyKVjIQUi5VuuTUkfAJNFxakMZ+YE9KdtCqWG\nRzVhoflB4FAAZSvqOlSPcY0f3dzcEU7/AJZhSMkGnnaNqkdqRlz901SGG5WYAjBNP2MrHBGC\nKaMSMTjkHFJMp8vjk+opAKql+CeBUoG48tyBgCo1bKgDmiQEkEcYoTAcyqWwetOTKt7U1sj5\ngNx70pDHJ7UIAkVWU84qSGRQcY5qMxnjcMmnxrtbJGKB+Y/B3cnimbx3Hen7gykEYpm3sPwp\nAJy3WpFx249aidnwcDJpys3lcjB74oEISvVeR70/zA0eMc0i4Izj8KedpGQeKAEOV4HK0cbf\nQ0YxhR0psilmA6AUgFPy4x0pfMywK0/gLjqKbwCOKYCjluvFJ/skfjTtq7SO3XNJ1brxigA/\nhPpUXzMwA+7Urt8owM0LhV3OMUmAMOzdKUKpHvTVcbskcU9cDOetAxDgNyOMcU1OD7U8sQo4\nqMsPung0xDyuBkc/Sm7Sc4OBS8KelGTuywx7UDE2tkHFPHJwDzSM3zDHT0p20FgSPlpDFLf3\ngCaQZfHNMO3JI+7TuFAznNMVhW29O9R8l/pTuFXI5NN3Nu6YoAXg54oZx8oHH0oZTgc8ZoX5\nTjAJzQAofGe5p3BwBmo2Uhs96lXHpg0mIRV3r05p4O3g80m4Zxmhlbt1pgC9CDwe9MlccY5N\nO3heTTVw3zAYNAyVgFwf5UMM8jrS9cccUsgI5HWgLdRNm/heBincLgEc0hLLz2xinSdVx1xw\naYCZL5HQ0kZ3AhqXbwc8N6ilClWBPSkAFCV4pMDqeDUi8r/hTeducc0AG35sGmsw2gHOc08A\nt1ppUlckYIoKDcPSijn0WigCZlVYwTg/SmLhkKnk+lHLNtIwKYsnlydMnOKCCXa0cPA4703P\nmMBuwVo84tuB4zTNpjbjnigY/wAwhjzkVHLINwJHHembyxPaldf3JJ6dKBjFZdpweM0vnIGA\nJ5pirtUBR9aFg3Alx75pdSSQSEsSPu00s+75e9HG0gHjFLGRt55PrVMdkyGTbkAHnvT9w8sm\nom+YH3p4BZlHbHNAhVcSKMDHuaGbawcfd6EVIzBsbB8o4xUcnOR+OaBkYIbJY456VNKqrHxz\nUXl+Y3T3pdzBT8vekFhyr8pYfSgruUDOKbwu0seCelSNEvLCgkZt8sDHU9aRWGDlefWlZfLH\nJyaR5CsdAwRju3YyKWaQbB8u00kJXaM0kiliSRx2oAIisw+ZsHNPVFLcdKayhEwFpV+XBxj1\npgxzbf7v5Uu0nDfnTWyenAp6dhnmkQRqx5GeM0jAhhwAKeVCt1yKhlcNjnP9KY7EjLtP3uaT\nJwMjNMDKq5J3GnRyFs4AFIYNtxnFL5nlqR3PNMUbiSRQ69Oec0rjGwszMQRipm2+WRkVFuYN\n046U8RFV+bkmr1JYkaI7cjkUjdCe1SqyiM8YNQ7tzEdFpaiBXJwwP4UTBWwec06Bxn5V49aQ\nsrZ9c8UrssZHGDweKk3LnB4Hek3fMSaQsvIPegB2Rs2j1ojyEJY9+lGPMYKnBxRLEqqMtQTq\nRvhhtI5pPKRRjOaZt3OecCkjcKxVhlKCh+8KwC9M1IwP3h1qFmOPlGFzUysduccYoEyBX25Y\njjNTBT5ZI+9jNM8sH6U4swyAeMUDGxR+Y5HbHWmt+7XbjmplUR9cjjnFQtIRkuMe1DJJVAbB\nxzim4DMSDTYslM/dX1FLlQuRSGMYlnIC01GVQxbjipWxGASck1WWM556Z6UBuTQspiz96lGD\nHkjJpu3arAHGe1RJleOopgP27unNKsfUmk37OfWjzl3DGQe9IEAYM2acc7ivAJprSLtwByTR\ntHJZsnFAxythWBIBxVUbtysDyKfnfweD1pittZsHNMRNIxk+9zQvzjA5GOKRpAsfoT3pIck5\nXipATBjGWFH3YyMc9aJGKsd3NKzHy8gc0xldmWRhT2T5PehcbclcNSbTuBZscdKWoDWYqgbF\nSRuJAB1pMhRjqPemrxL8o4pgPkxGdoGRUYjK/dGV71JnueDTZGCqVU/WgVyJvmbpgVGw6Enm\np41DD5s1GwCk4FAxQokTK9KrMvzENVsNthGOlMZQ657mlYQqybVAIzULu3mA9vapFI6EjNMk\nzyRQA/dhf71Ry/vIxuOeelTb1RBjjimNhY94G45qhIazCPlRikbB5IwakUj7zLwaawDKAe1S\nUIFWRAGpDnoDwKf5P7st6U1VBxg9RQTcXK7cE80zHZutNztyTytSblKggZJqhi+YUjIFIsg2\n425J4NNEm1cYpyvj2oAiaRlYDHtTo5NoOOlJyzE+tGFQ4J5NIYrszrwuDUewqvzHmpcBZM5w\nKbLIuQR1o1JI42PSnFhD2yaVmXg96JMY46mlqIYCWUkA0qruByM4pyZXvxTfLbduJwtMZHtH\npgU4/KvA5o27VxnIzml2/wARHFBQhjz91cmomjdVwRjmrSsqg461FJ81IkgX7xxwKlLbV3cU\nbcZz93uaF2lB8vGaLAQrnfknn0p3yqCAOTTpFG7K80Aq3saooa3y8GkWTEbcc9KkkwY8qO9R\nrhc5xz2qQGbxxgc09cZI74pNyr8uOfWnN1GBTAQqOvftSMpxnHzd6fGPlx15prA9M4yaYwVT\nIcik2HuaVcrkZ/Kl+6CM80hCL8/FMOCORz0p6gkjsaWZT2pgRL06UMu5T2oXLd8ClyVU7hxQ\nAwfKo5waj8vLEk1YGyVcgDcPWo9wxyKBIeY1CKGOSaRsrhB9c05WEikHg9qUDAHf3oAaGCtu\n/Cm48w8U+Tn+Hg0yOMq2QcCkIaoPzIOuaVtscf8AtUrKQxKHmlba6AnrSKGR8jJpeD04pZVK\n7SoOKAvQsaQxn0OfWmnK896lZducdTzUf3cgjNAmJtHXbmlQKucHrQuO9I0aq/AxTBC7STno\nKF+XJNNwR7inMw4HekMZ/Dkmhht6cg0bjyoWlKlkxmgLke7aOOtKuX5NNRPLbJOaWN9zEUDX\nca8Y6g5pjEMxA4NWNpPGBmonj2yZzzTGMVSvanPHubPQU/aduc/Wkb5mx2pEszNUVowhTkZ5\nFLaqNnIwc9KvsFY8cj0NN+zgruAw1AhjRnqDgelMZSx6cVZAwwLc05F357YoEVWhK45xTtu7\n61O8Y25J5qPaPWgZBs+bHU0kibjgcD3qzt+YbQKVo/X8KaApNHnHpTvJyDipxETxSMpRgOop\nAVGUrgYp0cY5J59qtHa3B6Ypvl8Db0qgKrqOAKXaCQTxUzBV7YFMYEY45pAOMYYbjiojAXUh\nSKlVwy7T1pUTb27UwKxtz0NPEA2Y4zVhjhQoG7vmhlBYEDBpAVPK2rlhjFQ7dxHp34q83TDU\noHy47UwKBhB6ilWPcwGeKtYHmEUqx7mHAoEQmH0pPLxjvU8mQ2AeKbt3cHii4yNVG7k8UjQ9\nMcDNS+T27etNI/hJ4pphoQmMcnHNOKhlOBz3qXaB05HbNNUYBB4qbiKToSfloEPzYq5JCD90\n07y1XB70DII4VXBJPFKPmVtw4zU27P8ADSeUWbA6UagQbQ2AKXaeMLipigU8cVKq/Kc1SAqe\nSN2cZNIsRVs4q5tDLxTWRlHNAFXyAzdMUxYx16CrjA7eOp9KiEZOc/lSAhZQG46UwgFuOlXF\nj3Y4+tNliAYkAAVSegiFVzj0qXaNpzjFOX7mAOKUQjcBmkIjXGOODTtoI5AqQw4bPak2nPqK\ndwITApbHQUu1YwFHX1qwIw/HekaLnJxSEVWU8Y5PrRuMjdOamKDtwM07YWbjjFMCDHBwPrSr\nEPLPFThSW2/rTeQpHU1QESxhRx0oEOcZFPZv3eB1py8r1xSGMeMjaewpoJU4HAzUhztBPSjY\nOCBmmIUsGGDye9RyDbwowaftUHgHNKxz1p9B9CBlI6np6Uzbz0zVvy9z8cj3pGjXofvVI0Q4\n3KNw+anKnynnFSZ56ZoZCOO9UIRF9eRSY5IHI9aXAXvRz6UwGr8wOcUpUNTtqueOBQ0e3BVs\n4pANEZQ8GkX5mx0qfCng8Ujr6Ci4DVUDgjmgp3PSmt6559KkjIK5figQxNvzEc4pGj3Y7d6e\nFCPjpRIp3jaeKXUQxlGOgNIvSns3yjjnvTiq4zQxkZjA6UrArHx+tSHDRjjFNbluDkUCIwpU\n8HmncnrSNleO/rUjRHbx19aaGM2lRw2c09cspzTeFwCPxpdp7GgLDRGN2SacoVjnBIoK7Tk0\nik/w9BSAVlRuAtMPzHGOlS9iaYpYUXEHklulPCHoQafH+74akO7rnIpiEZM/SmBWxnHFSDJU\nlaRWyoBOMGkytxN2eS2KSNsv6U5lDDGKkWPBzmhMQ0NuzlMe9M8zjHXFSqwJ2ryKQKuD602I\nXhutJtGCMcUYIByfyoPzAYOR6UBYjZSuCeRSPlsYPHvUzY25IxUbJuQYGaAsR9PcURqWWpEU\nKuCTTlX8DQOxEsZaQ+tG0g7QOlTKpy2OvrRt+Xn86NRjRtK5AxmmMDjmpWj8vmlZC0eRTAYO\ngOc01lDZPQU/y2CjmnbRkgDIoAjj+U4xmgrhcdDUn8HA5pjSBsHGKQDV3DinN12nrQinqacB\nheR+NMkRetSmQdBSbcjAGKVl2qDSKBW+XgUoO4cjNLgR9OQaAw3cCgQisG5ycin/AMPNIR0z\nQ77m29BSBgoG/PanE7uvSkX5VORSlgyg9KECGbvm9qdtG7OKQbQSDwadvxwRVDHFjtI7U3B4\nINIv3uuRU6/LHxTARU2rnOc0KBLxjBo3fMAaUSbZDjmpFqLtCtg9B1qNgN2QaWNWkfBGKeUV\ncA8EUgCFCPmPNSn5mGeBTozuGAKTbuU4NMBJMhRSqu6M45pFyzYPSpFTbyPyoGQKpPFTMp3D\nIwaXyxu3EYpm7dzTAa3zcEYp+Bu9amXb5eCAaZGyljkYoATywVBz3pyrjJxgUqsPN2gcGpyE\n247UCRAy7jmhIypPTbU0apGeuT6UyVV8w9Rx0qbsYKozuBp6sOaSFcqRmnnLKoHQ9aAEBx8w\nFKZOvGKFj2r1zQMbWx96jUBqZJOBUrZYDFRxqd2Q2BU38QOaaASNlXK4JPWn5MhxTVVfMOD9\nRTmTjIOKYCs27k9aJIy3IprEdTxSxvuzk8VIDMbeTRxtK7eeuafIdy9OlJGxwaGAxUaP3z1p\n7Y28U/aR05NNkXuOKABV+U7hk9qVY8KVpu8lgKkUE9aAIthDDApZFd3z0FTLwM+lRnLZ5oEN\nMbNj2pEjBfrg05pfK696UbWwU4NIBghRuvJqZd3l5PB7U0AFueDUg5GC3FAiHbu/h5pVX5si\nlHyBsDJNG4KfamUh/Kxk4zVO6kbggHNXOW6cConh3Ny1SIzPKa4YjPWrOnwtC5jdt2KtRwGF\nc7eOu6pEjzlshT6UgFMY2ntUSkLz1qf5enXioNnOMcU7gOYqxB64pW+YcVGEKr7VMQI1B9aN\nQsMwcDt604dsH5u9G7PUcUkjqoyBimPoMaNVIY9aFkx/u9qXcrgc0mB0IoAOJOB9aUfvMgcC\nlC4QbepoXhsE9KoBFXaNuOac211AYcihmJzQuVXB60gALuTgU3cd/Jpw37evFOCr1IJJoATc\nFbB6npUhITgLVUsFPzdjxUu4lc96BCN90jFRnr1pzxn7wbg9aThuMc1ICxttYkgmm+YGYKDx\nnrTiCmQOagMZ3ccd6BllWCqct81RNGW9hRCPlLGkXJb8aYdAhh2sd5FSso45xjmlZQy4x360\nnlru5JwKQhFlDKPlOM0nliRSzcc8U/aCnoKRVbaf7ho6ARbNq5zkZ7U8qpUgDn+9SeYSNq9K\nQc9OBQAiIHXI5IpJE3Y29e9P+6euMelNJDSZXgUDGkFV+Y/hTE4BGcCppE3delNManpRcVyN\nm2KABk5olhaTDKcetOCFWy1Ct+9x2pAhnzdB8zdKfLCTt3HBHUVNHhVJx3pkjHd1+WmURnJP\nyn5acVAxkZNKQO5KnstI2WPTH40cwgf/AGhj0pFmfaVxkUP6ng+lKrBmHai4yAIq8nlietWG\nUDgmkZN3BHQ0/nbwM0biGeXubgmhV28NzToS23cTg011Z235pC1JYWDHp0FKrZU7lwc02OPv\njips7V6ce9AxjYfK+lMkj+XrTlx93OGNOaMYKHl6oZAoThTnNWFjCY3YINRBdzZIqRsSPszj\njrSBkkfyAjb8tR7drZxxRvO0ANnBxUjMc/c60CBVOAcUvfOMe9RKfLU72OT0oSQyAY6A0DJF\nVE3FvpSZ2kAD5abMVV8bsqaVRuUBT1qgH58xhjgU7cFzzyKHxGRjtSNFuQtjrQUEcijv941a\nDbhtIzis9FKyBiuRU6uN+Tn6UhFlo48ZXk03H7vOOh5HeljXoR9ajZjtYg4ycmlYCwrcAg4p\nqybevNNEmFwRzmnrCZOQwz70gsObcIyaId3l5I5oKtt2k8CkkZlXcOnamHQcFYuWHA70ishZ\nlDUK37oZPzdcVHHCNwJ4yaNRE6ptXNNPc9BSrJtyMZxUyFZcgigojDA4wKHZlIA6U9MMOO3a\nnMvmAYHSmgGJMdoL8H0qXG5ck1AwKyCQjcOlSqwbg8UXAcy7hyeKYcovWnoVHPUU3buG33zU\ngJtPHcdacpCZzSeX8vJ6U4qCmFHPvSGIfmYblyPSg5bO4Z7UFWYZ3bQKTLYyRketAh+0t8o4\n96QZyd3KjvQ4PRehokiYptDYpgP42jB4pAPxpoXy4wD8xpeUUt69KXUBUzzmk6AkjBohychu\nD7UbfM75xTGJ5Z4z0pZF+chjwRSCYrhSMmncB8H5vSpYheCo46cUqsOcimMrbQR0zSKx79M0\n0BNxtHemrjkgZPvTmIZQR+VMXIbnpSGGfmGRTx0OeT60m3rjkU3JHr7imIXcoPTJpGfdnFLw\ncY4pHBTJxQA5UCqC31obJYHpSYMijvQVLjA4IpDuI6l2NKpEa7cZNIz/AC4HWm7sEZHNUBIW\n9qNwHak9TnNOX5eO9ABtz3xTu+QM9qRTtbLD8KWNTySOKBEbRlcnqfalWRh2JHrTeIWJIzRH\nMWXLLsHpQPoSblccjFCccHpSoy/e7dqGkWQ7RTEJHJ8x5qaP5uSNwqAoI89xUkOQMHpSGO2j\n5mHA96bIxwgHTvS8Zzjijjj0oAVsK3GDxRuOMGkeMYzjkUKu3k9KQh+4qtRxs5l56U8tub5a\nQqVbgc02A5sjtmnMCR7Gk3evWgn5doOaRYzNFHkn1opgKs+5iOnpSxqNx3cjOac2zbuA6UzP\nlrvxkGggdMFZTtOCagbKqTu5HFP3bm4HHWmLteTJHy0DHQkYLMN2KVZPMjxt496VUKrtHFG0\nMrEdBQIYQPLyDxTfMEXLU9lIUBfmBqu0LO2AckckUgJH2M2V+9ijzMJwMknFI0YODjBp33eM\n89qYxrqMhR09felZhEnTJxzTH/1igNjFJN12nvQIasgC/KetPdtxwPSmKPLXkd6eI9uSOabA\nTd0IHSpcI0fHXGab8mcn0pq42nJ/Ciwxm77rDlc8ipGk3KccrmmBh8q4waft2psIwOtAhj/M\nNwoK5UL3qRlVIwo7io2xtHODQAyNCwPODnAqVlZl69OKjEYVuGNSNIBHg9aAGspY8NTGY49R\nTkYMuMYzT2Xy1K5zn0pbAIuRHvH3aYrgSDDVKGKqABkdxQ6BfmI4oARuF45Jpnk5bOOKkwQB\njimqzK3znK+1AETx/wB1uD60qxgISTUnyN0BNHk5X0NMQjYyNvHrTVZWyTxg0m4Rg55OaXyw\nwx3PNIYM3vnPSlQvkZOaaV2Nx1pwjOc4p3EL/ETyaaxU4C9acJM8Z5piqMs2KYDU68jC1L8q\n4cDOOtMlyFBI4qSN1MeNuKhiGvIqsARwaJGjUAcbs0gj3MAeme9Mx5km5RnacGmP1HLINwA6\n05l+YtnIpR8uSOD6Uwfd2k5P60AMYAMW6ULhsEfjmpfMEY5waaPmUtkY9KAGSHLYXmnrGemc\n0zlV6YpVY85GDTEGeMgd8Ucc4OD6Uu3BBz70q4LE5yanqPoKsgKjafm6c1GygsQzZHXFIWbc\nTjBpu4LlmX8RTENbe0eF6elPjUeXk9uopY5OBxyacsYGQaCiOUpwo5PWnfwc9aa0QV93btT2\nKiMseeOaQFaR2ZgfwzU6x7R0zUW4eX+vNEczbTjmgAk27gpHFRyuwbYnFSrhm+bioGBU7t2e\naBEjEMFx1qTaCSDUO/dwo5pwVtwXvSGNmX5sj7tQrt3ZH41NJlsrUCwnaTn5RTETSRq0Zyce\nmKS1+ViOSKnXBUfSmuuz7hwO+KVgIpmG8g8nNSKyrGCxz6VGv7xz3NOCmNs4+UUw2BmDHODi\nmbSxztzSffJAOKEZlYYPGcGgYkrKylQuCKcibdpxzSGT96eMj1qUKzLwRimIjLHdjpk035QS\nTz2p6skeQx3N2FMMLluB8p5JoAiDbcjNTMn3T1OKixtfPUUok/gJ5oEBXjGc0xm2xnjBzxT1\nIXPHzU9F8z0z0GaBlXb827vUmVZSCpp8gCkjGSKZH15NICOTbgcdqcylVUdO9KMbyCPalX5s\nhjx2piI0b5tuc0snyjHehowOF5+lCrlsnrUj6AflUc/LSxqFpoILc80u7a3SmSPljVgFUe9K\nI1VDkYNRNKduAMHNHmbUJY0yhNw246GhVVhj0pNyyt0qRcKwHWkBG2MYPFRyR+vWrDEFielM\n5PFIBGUSLgiocbM7vvdsU/mNS1NVX6nmmIa7IyA5570/+EADJpjrxx1p8O7yyWPOcCmIRWwu\nf502RvM4qRlHXIzTPm3bscUhjPvduKlBOMdqGxu9M9qD8rYNADcBVLdxUUjFWB7EVMFyp5qF\n1Ma5YZBpDJVIKZAyaYxCqGPWnhflABwKYwUg880C6hldnsaaqqzj0oA2n1oCb23KflplEbdC\nOgBp3l7V3YzmpDHnoacsZwfm4oAq7W69aec8etSONvbmmyL8u4nmkAyRvJUAdT1pAxIHPNRz\nTAgYBJqTZ5ZUnk+lA0PVupNIsn7v7vPrSsyvwBim+Xx97igQ5U+UHoaQxtIrDOSKRVYYyc81\nJtOCw4o2ArAsuMrgCnrJ5gORT9pbPpUfl9QDVCEjjzk9BS7RnHanbWjhx3pdu5T2OM5oAhZh\nyOhqSNvlPamHDYPWpViZlIHSpABIGO3IpMlM88VGqjzMgdKnVBgknOaV2AzJYZxxTFjLMCB8\nvvU+046U1Y/4WPFO4xnIyG6dqYPQ81JtKtjjFP4VgcZ4pAQyL0INH8IpdmWxTmjbnPSmNkTA\nKccZp3X71IsJ3ZNKV9+9ISG9/amSQgYNTNIu0gDmmKrNHubHtQWM2bRkU4bdu3171IuO/SlZ\nU2lh09KCdintO4jtSqPlPFWl2suCNpqORPLbOQc0yhmTu45pdoP8OadgqOKVsqBg0iSNl3Zx\nUTLxgHmpx90mjZuXgUCITHt5pqtx0qd02+9IicAkUwInwfpSL8oJNWHjXaMcVG0fqaAI2bzB\nyvApGww4GDU0YLKT0FBjO3NICEEBePvUO2FwetOI2ikZA3QUDIdxX6U6NSuQSKf5O4YxmlZQ\npGfxoERBMNkjinFQFzUu0+XnqKTbuAOMZpgQeWG4pGhyoOc1Y2joRTWVVwO3akBXiTax44p6\njaOlTMvygAZpDnbz16UwIlxk54pfkC980pUxtjOaNwLcjigCLAbcTyaRl+Wp/LBXPTNMaM7t\nueKAIuOmOfWm7GjkJzkVKY23YXinbScA8n2p3KIowGbpTmjBAyOakaLb7Gj+EDuaCSNl2/Ln\nNMkUbRwKn2+3NQ4xJzyKAI1jG7IpWjK81OQFXgUjZZecUtwK21t1P2dzwKkKFlo9AelMBdqb\neOaRsY9D7U9R2ApP4j6UAN2D0yKVY8qxPT0pVzjApANuaYBjauMU1kLcngU9T2zzTXYnIHSk\nBH5f93rTduD0+b1qdMjNIrc88U2BGIznrwaOi7SuasDbtIH50m3dnmkgKvknqDhaNpxnvVna\nBx1pkkePm6VViRiqx78UNCcegNPVsL0p27cB6UXHYj2hOAMmiVe4OPapM9wMEUi8/eHNMViJ\nlLKD0HpTcnOMVMy471G2VORSAfhQMD71NZOpHWlX72e9LuGfWjUCLHy9Mmo9p4OOe9WT8vtT\nfLODg0DYxdrDjpTtvygCiOPbyaUgqufypiGPlcAUh4YcVIAdvTJpVbd0HFMBGXjcBiomzyBz\nU+TgjtTWj+YHpQHQYItzdDjFNxjryanh3KpBPemtGVbd1oH0GLGG4pwXg5OB2pU2k8cUpX5s\nHpQLUh2/LhRzS+WcDAxUv3WPGKaFfkmgAZfmAPSnspbpgYpOwU+tOVRgk9aBDOPM3EYpGVTk\nnmpDGCvB5pjKobAHNADG5BNKqhsE5qTb8uOKROAcrwKoY3ad2R0pVYL1FOXnkcijZnJxSYhp\nO4dOKbtJxtFOC+XyTkVJ94ZXgUARP8wzjFKmceopxQn2pChxkUwG+X5hPOBT1iBwQeaRVwo/\nM0rdRs70gGhjnkcUm4s3HAp477sAUOvyDHG6gBqj5c5oY5bIqT+HAA96I1GDxSAauNuWGaRe\nDkdKlUFUOcUFBszQBFu28CkBDHGASaccSLjHNII9vCDmmyR3ksv1ob5Vx3qQ7two68/zqUMb\ntCqCo5pm4tjpip4yuCewphTgY6VTC5F93OO9KVCMNvenfKpAPNJIvQr0oATbuY5OaUuBjjFK\nq7m9KYybuAelMY5mG75RkU1m55FJGrfhUgGBk9fei4xOeKcymRhjpT9oA9aPujjpQIbt4x97\nFKM9AOaWLrx+NLIzFflGKBdLkQz0JprfL0NSbQxGOtCQ/u2DDnNAIZ8204NG3uRUqp8oOMUH\nOcY4pWGRbS59BUjD5cdaevy8EUbsc4pgNwRxipEXI5FN3DccmpFHGc8GkBHIgGOeDSBR2H40\n9UGPUU7G3/dpANVQ3ByKbJHtIOelTqwZRxTGUDk9KAEJyo4pjKEUmpNwfjoKQdeelADNwbtT\nlVd2DxTztPOMChFPcfjQA/airgDJNJIAq8daVVIx3HvTpOQTjNADdp2ZDc0jAqMjrTkQcknB\n9KcudoHBGaAASfID0NJ5Zk6jFI2FyKkXLKOcYoAevyjb3oXbzx81NLFZN3ansDjcBTBDYY9r\nZPApwY7uBUnlnYNx4HWo/MDMSBjHSgB3mFxg8GkYbRkjilA6Fqacs3B49KYx64YZ7UpTYucZ\nojzuC7aex+XrzQIhRSzEgYAqZV7nmmxlmbA+pqzCu1ckZFIXUij2nOaduXcCRnHSnbVZiV4p\noXqeKQyVcMoPamNgKdtO52gAU7ZxQCI9rKvSkC568VI33c5xTkwRzVgRMobBXipY+F5HSmHH\nNOjY4HGTSAG5yV4Y05SNuCctTZFKtmlXaGA6mkCBlEmQaRY8HkYp6feJ7U8S5XoKQEe3PzHp\nUnlqeRxTGG7p0p0ed2KAGNuTJ/KgdQcZNSMwZgG6Cmsu4kdKTAY2e/FODbuvGKbt2tycmpQq\nnGRTENLZJHSmLGxbdu49Kec56UrYLccGgZHIu4YamxpyOcCpmxuO78KZsycDigBW96GC7R2q\nQKDwTzUbLg7SKAHbdoz1pFw3ehiRgA8UqqrNycUAKuNp2mmNnoBUm1eQOKTB6g1LASNmYcHF\nO4b7vSkVdzY6UpRVj44NCARVC9+KVWHmHcCRTfuctyKen3d1Ni1EIBHoKTy8yBiePSjaTlsc\nUDLe1JDBvmzkUxlVexPtUnzKfWhVypbo1NgMRF9Kc0YaNgePSl429c05cMvvQBDzCihuTT9u\n7Bx1obJOCKWNm3EL2oAAwVTkUxZFY4Jpy58xgeVNI8SbRjrTAazFW5P4U4scAjilbn0xTNpZ\nSQeaXUB0Yy5LAUMrYYgcUxeeelSNIfehjK4O5QOmOtSswUZpVU+WTkAmmBcEEjI70xCsw2gr\n1qKSMt0NTuVYgrioTnkk4FIB6/d2+1GRkcZakXCnk809fvjjA9aQCpleM+9RfMu7Iz6VMqgZ\nINIJN3LcCj0AXB4DDjGaV3Y5GMKKFfPTk0u4sD6imBR5ZuOD/OpVyuAV5p20c8Yb1pfXml1A\nTb1btSMQvHUmnfNgEDjvTe+cVQCMxHG3gUo2hCQcGlGW5Y8elNdVDcH5fWpGIxVlGetL8u/A\nHNSgJ5fYmmcBhxjFIBvmFVIK5OaUN83TINPZskt2qONt248jFPzAGHJZgc9qYThevvT/ADCz\n5+8Kc2GXOAKYiMsGUNj60bQrgk8U/crIMDAprYZSSOB0pWAb8zMxyNtSFW2LtOO1R7QEJxUn\nmFiFx05zT2AUIV69RUf35CM4FPVlkBJOaAgjUnHzVIBHnkA1IxwoB5HpUS5MeCO/WnhugzQA\nqKXkBHOO9J5xUsCM5PWmtIqHEZ5PWnBQcZ5qkMVsRrkHmnKDIo2getIxLZ3DAxUtuoVR70CI\n9ig/JxmlVNoO/NEnytxQZTIMEYoAeFViGxnjpSq6tgFdtN5X7o46ZojUyyHJ+lIYySQbsBc0\nKPLXeBgd6l+z7Tkv+FI3THXNFxjmXzFBPFOUFYyM5FRGRgoBGRUsYypGevai4DljbK4ps0bZ\n3qoNTKwQr7DFDSfNgikFxI2dTuAwuKACysWHFB+bA6CnL8+QDVCuQq2+TpxVpvuhhwfakRRG\np3gZHenRuM5J+WkV0Dg8k8U0OGyGHHpTiQRnAx2pm7qcc0rgLsAkDY5xTo1GDke4pU4XJp6J\nu5zigXUQIdpPQ0I2D2FLFneQRkUpQbelUgHD5cyAYojkYgj+9SLIrfJ0p8jLjKnipuNDPmUj\nIzTZI9rE5607zN5xjNSKxkIyBtpDI4htX1p6sdxJFO+VZCpOD2xTY+CQ3eqEJ95sUm4qcGpF\n74qPyyzE81BQrNubDcCn4+QuD8vpSAAdeTSnPAx8negTI0kLY44p/mHfjHFP3Luwp49qYVPX\nPNMQ9c5IamjEh5zgUrN0yenU0cNkrxQAjKVyy/lS7g46bTTgu1euTQ2G4pDECBW55pO/Bxmj\ndwB3ppIZ8EYIpAO27Bg9+9O2jbxxUe5vLORk0u4vsO3C96YWF6LtxzTlQ92o3eW3Iye1LI24\nA4xTH0E+7nJxTd25vmPNPwu3J5zxSSRllyBQSIcYC/rS4Lgrmj5VAGMmgSCPkjk+lIqwKhjw\nRyKNzM2elJk7RjvQytuX9aCRGJ3dM073BpT8qlhyO9N/gBHPemOwq84z19KUMN2M80Kd2OMG\nlYDdwKOoahIAWyWyaTzT0HWjb1O3mmsuAcjBoAft3D5uTSsnzAAUscYwMdTTnUnkHGKdwGfL\nu20mwsQQAOaXauNy8tSr8q5bkmldh1FjyWJI6U7d1GOKRcBc9KcFK/eNFxiCQNgEYHelL8YC\n8Z4oVcZJ5z0prH5TzigLDm3LHleaGUlQcjNNjbEfXilXg+tMAKEdDipN54A5JpoyW5pFzzjt\nQSO5H3hSqRn7tNJLKM/nRtYc9aQx272opm4/3KKRQ+RhyB0pok2xZYfhRJHubJGBSyII8HHy\n+9PUgRW+UZXHvSfJG23GTSBv7xwppIk+bPVc9aCh+SMkjIpqylUPGATSvhuh4prZWMeuaYiZ\npF8vjrUS/L8xIzUc8wjX7uD7VBHN5h9aANBQp5IyM1EfL5Zh82cCmLIY2GOQamuNvXHHpQIq\nNy3Dc96WT5l9/WmiNFbJqVfL4xwaBjlXAXOGHem8ZbB4oDbo2Cj2pmNvHan1CwEDaMMA3WhC\nHySO1MZ/lJA5FShCsPTtk0xEa4ZOThs0FtrEZyMUOq7QTUTSbuF+9SGWGZPLySScdKreYV6D\nINTrtaMnkHGDUPlDy8ID17UgZJ5iyKCTimM3zYxmo2XZz1HSpVXbjvQBIG28LTZGK8D8zTVZ\nfMBqTcGXDcc9aBDFZlbk8mpfMYsN3Sk+8+O46UDJ+9QIc0hbK449ai4jXPcd6kb5TxyKZIiy\nLtIwPWgAWQtjgH3pWm6DoaYY1RRg5pZMKwY0ARNuLn2p0algHzinBtucjg8Gm7CG29BQMWSY\nL/Dk+tAR2YN2pGQ9Oop8fyYHUUwElI28Cm4KrwaeR1PakhUbeTmi4DiS3JHQUHHYZFE2UGcU\nRN03dKQhVPbOM01WC7hjn1psigyA9FH60rLkbugpDIkZ2YbuRUzKEAYUigD5gcikKnZuc45p\niGOw+6qcHnNDQlVyG5pVILHcRwKcVPBPIoGC5Y9jTZOhGaP4jjgUct2pSYhEA43Gnh1VsBfm\nqBPvc1JCuWY00uoglVmyexprL8vHIp8m9VyD9aRDukwOB6U2NDSNoVScGlZizYp8yLyxPSq8\nZ+beD8tSMkZSOQeaWR+DkAjrimTLn3+lM2ttzjt3pbkjpSrx4VcZqtHlXIAwaes3rkYpch2L\ndD2pgTLiRDkY4qLaq4G3PFL83c8UuAVZgeaQyNgMjHUelTbVC7s/PVcgqc5p4JClgMn0oAas\nZkPB+tOO1RtNJExYZK/gKOODtz60ADMExg9eKAD0x8uKGUNg8cmpW+WM8+1ICDheMYNLK26P\nbuwaZGrSMMnApShViD1oDUFj2xnAyfWkjZVJDE+tEbbozjrSKok5JpiEkCspI6GmQq6jOcU/\naApGcUgbLehpgMlUBgd2GqeNi0fBIHvULgt/DzU8ZDLtbjAosBWmyrHnIoUbgGI5qaVVyMcm\nmQhvmJ4AoAZtxJ8xycdqI9yc4zTgp3ZBpVwxOeKQCLlvrUSr8zHOBT0bLdcAU6SPb8xHytTG\nNj27ck0jNtTOPpS7gvGOKjklBz8vFMZIjjHvTZfuj1JxTBMX2jbgetLI3PrQIbtC8HrTx8zA\nk8Co2ZtwwPqamcqoAXk96QhrfM3yjihlVo8dTnJo3bcHtRGNzMe1MCLyyp+U8UNujBJ609U+\nY4NOnjxGM80AVlk39T71NHICuc1B5WPlHUmn8RrtwKlhqPxuXnpTQx3c9KVmDKADxTdw6HtQ\ngFBD57HtTWXGM05uMDPvTX+4WPFMQ07t3qKcjBosDhs1FGx3AnpVuPYMMF60gKzLyCTzTyN2\nM0+QLkgjnNRBzHKSKYCORuG3kUqqWbJOR/dpwQtuJ+9TCpXHzc0ykO3BPr6VE23Ocdal4LE9\ne1IuNpyM0DIZJFjYHGQamVlUbRwTQ8A2jAzSBTuzjmkAjNtU4HPShGCxgHrSso4JOKRlWRgU\nb60aiQu7d06ik4Yc1GwMbDBzRKdo4PFIY0BQxJHSjd5nQ1HExaTpx6VYWNfv9B6UAiBjt460\n+Jl6EUjLuc46U+NQykdDQAqjcvHXPFPVgeCDk0irsye1Csc4NUMYxEe4dqbF83K1NMqsuKgG\nIxheTSESP8x54oEe5SOvFN3ZzmpVYhQM8UagQLtboMY4qUsFXAP40m0YOPWkdVZcE4pARbG5\nwO9Wo4/3eMc1AueO61aVht6UCIWDKvTihiGYfLU0nQA81GsZXkGixRHIo3jPAFN6scHipJBz\nkmnMQvIGaBEDYbJxzQvzKc9Kk5xg96Ty9owD+FIBCVbgdBTEX5zSMCo6UZIBoARl+bKjIpoY\nNhScCljLNz/KnrF5meMGmA1lPINKuOF709lCjjmhgN3TtQAjAZGOvrUUq72B3cVIqFlOKasY\nXrTQBuGAO1LwzcdqQqG4pDjjjHrSGKdv4UN8pyDkUMoOewpDH5YHfNICOUl15OKVWH4DtT9v\ncj5Ka0QVuuTTQhWIdRwBUeACfQ1IygnnsKjVfXpT6ACrxgHvSyZzinfKEAxzTWbGKQDduRg0\nm30qSKRXkwe1Eihm4NAEQPp0p20FelO8vjPakBPf7tADsBQcU1lOOmKeEwuQaTG7BzmiwDGU\neXnvTPKBFWOxx2qMocdOvSmBGx2gDvSfhmn+WU+8cimjKvgDilsBGq54p/kjoSKfGw5+Xml2\nhutADJI+BjpTCvX17VK33SKjGQAe9NAN4YdeabtK9DzUqoVXnGKbt2/WiwxOT2yafww6cijd\n6cU1s/Q0xA64UYOT3ppjXjHPrTlkHfk0u3cvXBoAiYYbPak4c9MCnfw9M01Sct3oAXhlwKTZ\n0UjNSJjaARg0q/e3HtQAm0pz2PrULD5SB61Z3Blwc1C0JUHnigBY1C5700R+vNOUAKB+dPZe\n4HFAELKOOKdt+9gZqQ7dtRxsyyH0pdQI1jbHSnbQzVMjE59KYynPFAiJm2NgdKFZzkkVM0Yd\nRxmkxgnApoZErFu2KXaf4uRT19DQynPBpiGsoKkdh0qPhTjtTw4TKNzzT40B+9jmpGNXLdBg\nUrYYEd6eF2kelJtD9OKYEYiztGetDQ/Nj0qV1O8N0FDKW6EZqhWIWUdMDdTQP3gAGam2jJPe\nmMrKoOOSaQCPu3YpOTwKfuHJPJpmXX7opCGup6c0oyV5FPwcZJpi55z07UwIxkZzxUsX3cet\nIyhsc4p6KdtAMRgVNMf5sEZNOk+9zQWyMrwKVwBFDAk8Gnbd3HQGkVTuye9P256/hQMgMIVs\nd6MnOCOKmYDOTUZRm6HFNCEPOe/pWdqd49vMu3cV74rVjTbgnmmyWyyKRgbfQ1Qipb3izADo\n/wDdNWNrM2TwKbDZpHk4w1WFj3ZA60DG+WV5HNI8LABup71MyuuCBn1prA9c0CIWX5Rg4NOV\nCqhWPNSGPPUUpI43DmmxkKrgkHgU8R8YPSn7dx4FLIMd+PakIi4xg4IpNu05zx2p3HXGKcyh\nvlx8tAyLlgTmm/MvU1KsY6A8UsqDAzVMRGrf3T19ae3zKAMA0Kqhc4pwUNz2oYEYXKnPSnBQ\n+MnAFLt+Y5OKXy/U8VICMvzDoBSMCcgU9mG4AjijI64oGMCMRS7McHpTg3HWnbcqfmoArqu3\nnNSYG3GKf5ftQygYOc0Ekfl9OacAGk5HFBUbuKCWbKlce9CAGyB04oZgy4zikbdt25zSrENv\nPWkPlGLtyflp5UY44o25bHam7c8Z5pj2GgMyYxjHembevrUqtnoaWNgCTQIhVSsfXNOYFsHH\nFSsPl5HNN52hcUAOVRTo/lGGGabtZWwBkU7aWjznmqF5jfl64waftx2pqqeMjNOcnigY37uP\nWnL8w6c03hhn86VW3Z7UgEKkKQxxzShTwM07G5uaRVKg7jxSEDAq/XIqNmIbg5FP+VuQc0qr\nuHQUwI423NnFSFRngU1MpwRUqL7cUgFVSo64FJ/DQ3Qc80q/NnI5oGIScdKRYyy/MetP5256\nik3DZ05oAQR4UDFLKoDDjFH8XOTQ3PWgQ1cY9af5g39cLim7CFGDg+tPYDgdfegY5OF9acvz\nHHQUxeT7U5XXdjGKABhtz2pmXC+oqdguQTyKCvp0oAamD1HNP2kcEcUN9zIWnbt6jFADCh7n\nipIy2dpFOXb6/nSRE+YcHigBVyY2BPNNWP8Adg4wKk2ljmm89iaoBFXdIMnK0jKN2cYFOTHI\nYHd61KygLyeaAEt5BgnHPSmsAWyRR0zinR4kHJ6UwFjG5gRVhs84qBWCtgdKkVvxqSRq/Nyx\nwKeV+X5eRSKo/i6U/Kx9OhpDF+5ijaWbg80gk3cUowrAg8GgYFBJ7YpePwpGVl6HrSc7sVTA\nVuWGelG0cYNLg53EcUvy4FSAxm2ggck0m1Y4wScNT2UL060BFbhutABGvyj3pxZV6flSMuGF\nNKnr1FAEn8J460keBznNAfjleKWPGOFzQBIFUnBzk1E7YfGamI6c80z1+Tce2aQDGXd1605V\nwlP8vpuOKVCuSD2oAj3noaCvzZpzfPkAYqQxgR8HJAzTAqlivalVv7xqb/WAcc+1DR7AOOaX\nUCu0iRygE4p4YySEryo71I6IyjcuaaoUdOAaYEWwyNgGnrhc7hzSbirHA59acuSvJ5oAfHt2\n5Yc9qac7eeKePlbrQ7bmGelIBu0sMqeaB+96HBpwO3kfpSPhV3DvTEGxTjNObEnQYpF5Un8q\nb5hXH1oGPWM7cE0gjPl/eGaVWOSA1JtzznmkAmCqkk0jRl13K1PDCQbTwKaflQgUgGnKrjHN\nKuF5Y7acCcAY/OkkB28jj3oQh+7sOTjr2qOOTEhIPNIPmUjn2pqwiPnH1pjF8wnPHHWlkkyo\nAXnFO2kDPamRuG/CmA6La0ft3pzxjjbwPSo26jZwO9OXcRx96kAKRzu4oZScf3fWhY2LfMKm\nVQMrn5etG4Fd24xikX+72qRxlsjnHakwG5wRR1AhkRkbpxShVdAT1HarAVmcDgiopsDIUEGj\nqMFRWbcfwpH69evpTcHaO+Kft6N0+tJgwYBVC9PegRmSQE/KuKc2HIBGRRtZlPPtSIGRt5bM\nCc0btvOahMZDA/w0k+5o/k7c5oKHNJtViccnFJCwH3xnFNGWC7hnvUm1W7ZpgPDArkDiomYy\nHavFPwUU4PFN2g5yfpTAWHhvn5HrSMAWwB8tO+VOvIpevPQUmBGy9GXpnpSMzHjGKk4VQV9e\nlO3fMCRgGkxkXYITSrG+7jBWlVlXIIyaYWZdo70hEqqE4X1pC3UlaIwe4yKROG9BnvVLQA2E\ngnoMdKcv7xRkAADrT/OC5AG5SKjKq0XpQwGhWZevFHAAXODT2jPHzYHenLtVuBn3NUBEFWPp\nzSM4DDJxUkh2t681EyrNuZ+MdKgYv+sj+U4XOamjjXYectQm3apHCkYNIjLz2IqhERZN3rSr\n/qs9Dmn+UmM9utSRkEqSOKQyPcc/NyPSrKyfLgDaQOlQSYEhb9KktyfMaRhx2piHNlmXd/FT\nSq7jnn0qRWLuXHOO1RP82eMEmkMkjCrGQ34UgXGDnmmbiqqDyScVKyktxgKOtAhhHqD9acwV\ncHrTmm+Up2pqDfwOO9IBzr+7BpI4Tnf91ak2k89sUqY2gDrQO4KoByeRRLjH1NQ3AV1BBJIq\nwmGjVevcmgeoGISREA80W8jQSEMMqBTlbbkY+hpVUzIWBp3CxHuBYE9D2p0iblGw4HSlZNuC\nwFNyPWi4hkcLRycnPtVlTtbJH4VGqfLnPJPWlJYt6mkMduzkhcCnwgOuQaQRt0fgYzxSriMA\nLSAG+YlQcUsa7VAbpTlVdxJ601/m47e1NCFjQYJB5pY1blSeKXcjdOKFkDSYxmmUhDGcHBpV\nO1fanbttMlYRoKQEg+b5jyaFYs2enrUMcx3Apg/WpWyG5Xil1AeWLEA8UOx++OB0xRkN3waV\nmHUj8KQCcBlyOtGBuOfwpEBkYr0780uBu9TQADG37vNAXdntSHJ5Jx7UpYYABqgE8sKPU04Y\nXt+VKmCp9aBnHpUjE3Ar707IXHFMjxuJHNOf5snpQGom0dTSbQ3bikwq8NzSeZxjbigRLtDL\nnHNM+8cAVIu4DORim8NlulBSGKN3B7U4Mqrz2pY8Y60u3cx9aoGMVm25J4PanbuTk4wPzo2n\nPvSjJ+XGTUkiKpZcjikZSGwRk0qE7vb3pPM/eZzmgYu7bjjiiRtwyDg0pYMoA60nEmSBg0hA\njLuK9Bimt8/AOAKXHzZxkijaNpwMGmMSP5ep5qRiccCo48qem760/ad3JxQMdvGRuNL8vOfw\nqNkXliRUm5flPbFAgXlT60m48KeBT1w/IGBTcLuOXoHYd0zgcClwpOTUaBs8nin7c8kYFAh3\nDL/KiRS2B1oLDjA4HJoDbskcc0xiA4p21doycmjaSf8AZpFjHLd/SlYLCbOoxRHGdxfP4UO3\nzA5x7U5fmyO1UDQwffYlqdnjgcGmxleQRzUijkrSJF2cYJ4p6sVXGKjZSrLk5HrT8MW5bikM\nXefSijaP7xopBqRq24Lu6U+T95GR1HaoGYEBcewpyuWjIAwVqwAKvlgvx7UkjBVwpwpp/mDy\nTkd6a+xWVR1oENEfG7dSLnvyKPM8yNgeMdKZ864IwUNAD5GV4yCKiS38vHTmhWy2xxzUpX5e\nuTQAjMQmfQ07IbB61GvfdyPSnM21MdPYUAKsasvAyTUDYPDffzVhTkfIcCmLhWOetA7j49jA\nhetQPGVYpnmp422dqSRPMyTxTBMrKjRt61KZG2kdQabyDjqKUR8c/hQDDcvQHLd6FQMzHHNN\nVTudsDGOtIpPA9R1pCB224weKkjm2oQB1FRNFubINKOMACgBXCqqK3LHrTmXavA5qLIZt3oc\nVLJncuenWmBCMc5HPakZGbA/OpVUbiFHfIozuXIHJPNIdhdwVQVOe1Lxjk9aRY2281HJHxnG\nTnpVBYkC/Ljtmns3ljGOKZCyhCDxT5sFQR0xSEVmmTzAAePepXUMoOc96jMKbScZpsQbcR+O\nKQD2JIGeBmnMpwW7Cm8TNg/KVp0eec4KnjFACRvhd3anKvzk5z7UjL8uMYNLHGT06+tMQbs5\nBFNjUqwI6ml/GnMU3A+1FgCWUtuDdulQKzNHk8D0qThoyeozSpjjNIQ0jeoweO1DOVUjGc05\nvl4A4oU7TzzQNEcZO0kDIp7sJFCnpTd2GLDgelJ95QcYBpDGtCN27t3qRTv5PToKVcbeBmo8\nt0x8tMBsgCzA9u9SKwVjnim5ywwvFKYyevTNArjPMDSdOOtSL/eFDKobjk03dtXKjPPSncQr\nsJFxjk0iqIHXAzRG3zcofrT5G3IQSAKQDJiOfc0ifuwflytMVg2BjOOKe7/NgcjGDSAbtO/P\n41FKzscEGp1UbTk59KaqjGPSlYRW8sAEEZqaOIHntTDIOfX0qSNQydaYwb5c56VGMqvPrVh8\nrjIyKa0bntx3pFCJGrc0zgZYH8KkaP8ACmrGM5xSEQ7h1BIJp/llW67VIpzQ7iSo6UhVtwDc\niqQEMeOhzwafI3mRkdDUsi/KSBj3qAZ3AEg0mHQfEgVck9qhmkHBxx61KzfucY5pjYkXGOlM\nQ2M7lOwc02RdvPv0qSJNqlgcD0qIg5xzj1pgOwGxnrSOu5Tg805l24HU9zUtrGjBs/eoHYqP\nuVeDmiFnZvlHNTzIqDnio1UIBtODUsQhY9TwfSpLd/MyrcA1E67nJ79jTkXy1y3JNNDFKLGC\nV+ao+GU+tPaRVHHftTYWB7UARxruk4HFTTbmjGeMetR8HJGady0fzUtQIFyGx1pzfKwJHFOV\ntpGBxSPgsS3SgAWQNkFcCmrjtQu1UJJ4zRH/ABetMQKpZst0pW+bnOKTkH5jxSNIq8CgBY/m\nUjrg0H5VIB+tKuApGMVHIobnPNK4xE3R/dHPvUxYsPemxqDzuo2ZVuOKNQsMYjcP72abt3Nz\n0Jp6x/NkZHFIflyC2SKLAQx5VW3DHPFSNs8skDNLzIppscYK4ximASKNq49OtKwWXaDyBThG\nANoHFM2lRjHNMYix7SQBxUijahOc4po3BsY5oDFG2sOKBCKxbOOfXNIFG4nFLuKnA4oYe9AW\nFDbY+Tz2prAbxxxTl2t1HSkyN3TikFxfkCn1zUbBsEDin8L706Rd4PagBiM20Z7UBtxNOVht\nwRUflfKW9KQ7DfMCE7jmmIwHbilaEuc/pUixjoeDQAnXnHFBjEkZBHGM0qg7sHhaN21sZ4p9\nRFXfmTGMECp8eWo3DJNHlc+5p5XgA5OKA1GOAGHakRerZp7bWkyaQRhmqQEViMZ5Bpf9XJg8\n/WnMOQO9RswVsMDmqGPYjaeelRK2ct+VOdgGG35vWjyztB9+lICNfmb5upqdMeX2zUZQM2am\n8ofWmBEcL2607IHBpdvytmkICgbuKQCogXNKrAMSelOyFGcZBpBhW6c0AGD68UZ24xzQ7gcd\n6YCyjOKAHSIN444pGwueaUMW5PSl8tW64pARbenrTmQiPOOac0eGyKXccHPIoAhVd6nPGKjk\nj24yeO9WtpXDAggjkVE4EinPY0ARbgucVJGxIG3PNJJHuXhe1Mh3ww/jTAkkX5jgg0ZBG3bT\nfMUKcDPOaduBGaB7biFdq8HrQflj60gG7Jp0n3MY4qhCFd2DnNI2Gwq9aI1bOBTjCVbIqWMb\ntKtkDikYlsE8gU9i2KZndwKQhNysgBFMc1NHHu6ikZAxx0NMBm5eQBwR1qNhtAAPSpPL8vvk\nUrKAvPXtQBHKvAx3pjLtABFSh8/epx2suTz6UAV9ojbcBmm/MBuIwDU/lAnOcDvSyfMuO1AE\nS88Z4oZR93NPVQO/B4pzIN644HSgCuu5vlzipBGe3SnpCNxINL93IHWgY1ee1JjqRkGk3dCK\ncrEd8UwGt8ozj8KZtJXkYqbaGbJpFVt3qtIdhikrjPSnbdueOtOZhwMU/bkk9BikBXZdq880\nz7xB6CrO0ZBNRNCwbOcrVisRMMe+aU4z7VIyjrTcfKMipYiORQvGaYD6mrCqGUZ6010XjA5o\nGRbd1OZQvzdaApDcDin+UzL60CIiDjgcU1V9OtSFHU5zim8dT3pgKAD16iheeMfnTolyOaNp\nxzxTARsZB/lSM25jxipVA2dOaay9WPFAEYUbwBg+uakb5TjikRQq5HWlVTuycE0gIzCfwpv3\neAMmpgcseTT9q4J70gKwPUGk9qeVO48ZoY7V6VQCRg4PPShcnPFC/Mc9DT1B6g0AQgDfkH5e\n9OOGXI9aft8wHIpUwqnNAEMkYDbutM53Ae9TZB6CkVPlz3oARs5OOaQbivHBp4DKpFKvAyea\nBoRuMd/Wk6cgYp3v2pOXbpQIj3ALjHenyAY4NNMg3H5eKcvYUCsIsYzk8UMhjJ45p2z1606R\nju9SRzQBEFBXJppjPrxUqrkE8U0rjvQBCyHaBtwc04Dgg5qU8980i/Kc9R3oAQKNoyMmkwNo\nwtSp82DQw6iq2EMpVwwx0IoA2thqc2FPtSsIR0AwWGeKjXC5yMelTcOuB+tG0E/N2pjGhRgE\n0P8AN0p4j5/2aTHzHHSmA3ywfc0hj+UdsU9vvZUc0h5UHrQAbiTnpUbLhvXNSKemeRTpFC45\n560DsRsT0HYUwqSw+WpldTx0NIwLYz1oENUMpAPSnGMc45oHynnpQGKvkdKAItpY4xUgAwSe\nlLuyp7Uq4brQMbjHAXmmmMbueRT/AMaQK23BGaBMh24YkDIpf4eAakVSuM9KHPOBwKAGIozk\n8mpljDckf/WqLac5FTqw2cdaBELw9Tnimr8o54FTbc9elMkO3tkUDE8vndS7RjpUnLKM8Gmk\nHbQA3yycmkX7hB604ZWhsMxIoEM+XIyKdtG00bO9NO/tQMTcQAKCvoeO9OUbhnH1p0ilsL0F\nAiNl5HakZPm4qcqOh6etCL8x449aAK4j+bilC7cjGak27ckHnNGw7sgGgAxtGW59KaM7h6fy\np+3HJ6UHA5B69qEA1jtGOtKFHY0gXJzjA96cqjtwaYhfLO05ODTApKehpxJ5zSL83txSGN2A\nJjv3oVht5qSNQ2OOPegxheOtADJGO4DH405WPfkVKcY9aj4XpQBH3zjin7SzcHBoUl2xjFTK\nu5s4oArD5urc1KrELgcipvLVVzgZpjNt+6M+1ABwSKOC2OlLGvy56GjcccigBrjCkAURnoSv\naht3TNSKTsGRwKAI3Y8kCk5bnqKmGFfPUelIyhVxnANAhGZcf7PtSHKx8LxRlVXpuFSb8xhQ\nKQyOMBlzS7Pxpy8L0pdu0g5pgKowvqfSnSbl+7zmnKAeR1pvPFACBz1JyPSljPzHbx7U9lCq\nF/ioYAcgUgE3Do3WpI1O3kgUyMLJzintl2AxxQgZMq7mGOneoUQrOfSpFcqAKljGeRiquIjZ\ndvBHNPcK6g9DSSKW5JzTYwJBjOKQxjKN3y0RqyknbxUpUIhA5OetIu5lIzxTAT73XinQqVHO\ncUBg2OMGpcZQ84xSAY33Rx3p7IrR96d8rKCTkU37w44yeKAI9rLg9vWnLHu5J4pSWX5eppOW\nYDotAyVVDcE0m3nGRmjaHOV4FKVXrnmgQwZwVJxSqwXFPOPTIqPG5+elICTI38inRxBpM5NC\nqMAj0pFBDcHBpgNAJds9KWL5sjFSMVz97jpSKhjIxSAUr0FOdfLGR0pGzuB/OmNJ5nBznNAC\nspkYc07eMcjkGj5eGHSg/Nk0dQG8cnrmlGN2e3pSop5NIpGeRz7UAOXGCT+VCghSc8U1W3MS\nwp7MGwOlACKCvSkaRguSMk08cMSDxSrls56UAQqpkU84NI3y8Y6VMVCjI60xgStMBilmBJXA\noC5+bNS8eYUxmlMmw4xgUAIVHXFNXaRg9akGGy2cjHSmBQV9DSAFCgZPFJw6/MOKcu0qQeTS\nhQ0fWmAxUI69KZ/y06cVKFLZzwKjXn7vPNAD1i25fbS9ssMUvmncA33aSR+wXNADOFwD1p20\nYyTgUbflJNEmGVQelIBpPy+1MLGRcEZNPZNvQ5FJzI4HSgB8aqqg4xSAEls1HI5TgYwDS79y\nmgBzDauM5zTEUR8kU9I+M0rZfjpigYmNpAz704ozcqOaYse7AJoZWDAbiPegkcuTnPJpMEqS\nDg+lSbmwGYYNR7tykgY96BhtPlsT1PShVcgemKRnYYyKWOQMhwD1pABBV+D+VB+bNIrDn+9T\nm4XnrSATaEw1MkBVTu/Cnx8gkjjNDMG5amxjVYrHuHHGKjaRgoB696fuDL7Z6UbTycZ+tFhE\nG4+WVPrQqt8uTgVJt2ocjnNMZv3i8bs9qQEsMaqxDHIpnR+OBRv8tCGHJP40gPljk7qYDsh5\nCCMCoXlCbsjOOlS7l2kinNskwCuBQBXhb7QuT8vNWIxvkKAZqBV8psAfKT1qRSYzgH5vWgBG\nTaxI55ppk5+bOAamztXk81FywPy59aQxyyKMkd6dJgqHyM01Qq9VyKb97t9KBDo97LnOBmhc\n7sMKkb92oQ9TUEjkEHn0FMCRZAuRQrb1yRgU35X46djTyNvyihsBDtZh2FKpAkGT8vSkkbot\nMbJjxnJzxSuA5gGYkdKWNRIpUjHPWmF3QYwAO9SpztCjk0wI2DxZAHy0sLfxEfJSvltysc81\nII1aPYCAaGAzG089D0omb5gqcmj/AHunSkjkSNd2CzE9KGMkUA5Lcml3HkDgYpgb5N459qUS\nedxt24poQ/8AgDLxSouByc5pC2E2kcZpWGxd3agYu3bkMPpUrSqYcAZaqyyEcsc+lPDEDKY3\nH+E1I2EUYXJbgVKGAUkDI9aRgfLJx17UW7eUvltzTEPWbCrxntTwq7iMVEy7ZBxgZp8r7ZAR\njFAEkkauoAbYaiijWHOW5Pal278EmmNuZvm/OkNkmcY5OBRHJ8u5Rx2oVWXAyDTlbbwcL/sm\nmkSK26QhWOBS7VC/NzSsCeeMUJjacj6UihCy/d6GpkO0crye9QFyh+ZBmpVYyJnOPQUDHNu3\nY3Gl+VV5bJ6U1o+hzilbCttJ685oYAeoGMmnqBuAzgGmxMN+SfanMgZc9D2pdQEUrHncO9OR\nvmJC4HrSDch3FcinR5+ZiMbhTAVc+X05zTSgbO7vxSKzKcHkYxTxnbu79qkCH7OVwu3B9asl\nsLik2hvmY/NRgPgdMUCEJLE/KKVgABzg0vQYpqKsm7d1FIYA9j19aFbHB6UKu0DNHH8IzTAX\nb68ilQpu+7SDLfe+U0qkbgp5oAAu5vYU8qB0OOKZJtV9uOO9KFBIx90UAMUkNyKlVTuyeRji\nkUDJ53U3nd3FAAuN3zCiToQOnpQ3XjvSKhDZPIpABbYozwOlO2/N97imeYrLzyKXd7c0kxj1\nbPBGKbypz6UoIZOnzUqr1zzgZqrjQksny/KOaZvkC/L8tPEg+nFISWU0CGLMG5PXp0pZD26j\n1pJJPk2qKj3N1ByvekFiXKnYR2/Wpc7z6VArDYBjNPUjOM0AP/i60eueKdKyKNx447Cm8MuT\nnpimIFQFMZ59aR1LY55HFCtyMDgDFByVOBjmqGIRjjGTTtpDcjil5PA/GpDjb96pEN+7gbqU\nx/KMjk0HZ5fI59aRWO3PXBplCqArHA6VJncvPSjzBtJA60AZU9qVgGNik3Fj7dMU4Z257H1p\nMBelAAshK4Axjrml5Ck/yoJHWgr83I+WmMI1zGd3WnLyw29MUsmFTjndxTGXGGGRimA4qPvd\nqapIz0ye9P8AlmTB6UxQI2XOfSpFYfg7lyflpY/uszjABxQw2++aRN3zbulAbEuB60VXyaKQ\nxyEKx5GTTWX04PrS8MqnGMikZgE2g/jViF6qc5ximM2EGB83rUjN8oGM8VEdu0be5xikSMGG\nYjuamYDaARiktYtpZiPu9M07cGz3GM5pgRPhZNw5oLA4A4NH3VB9TShiuWAzRcYYHAPFNOV5\nPI6UyR965A5py5kXI4oAfHJHsK9/WkYjgDlqR9ir8o7c03eG4UYOOaAHNJ83HHvTGmLDAOTQ\nsv8ADUaqfOIPApiJY5AwyKRnMZyehpEG07QO/WnbQzfN8wFMBOdny9acqgck8ml3KvCDjvTS\nwbHapAYvGc9jUm7fHgdaTKnIxQkLLhyQVP6UwGqQuVK4pnLE5PFTSLtXnvVYofOBzlB2oGTD\nK4IGadnABwAfSkDhM56+gpqkMxOMUhEwJbH93vUe7OW6c8Ui7gp5yKUt+7Ubce9O4DeO/c08\nEHvxTONwB5FP8vj0FAxjMI/u09GEmaXA2jau7FNWMtnbxjk0CI13NKSOBjpTtuyIY+9nNDb9\nuQuKUqWYZOOKAEG5OGO8tzmkVmXd2oWMYx3FMkZWXIOD0pEkm8MoOMilbAxz1pIYwcLnApTt\nLYAwtA+gyFuoYArnrT2UKxPak2/I2OBScnbzxQSOaPKk7uKYoKqc/eqSTAX2qJlKtnJoKRJJ\nEXCgU3f8gA60BiVBbPFDfKvC5oAYxKYAP1p20jvRGVY8jFKS277vFADRk5weaRcvnnApsikc\nj15p4XdhsYxQMRcZO44NN+U4C9aHbnDDNKv+r+6B70WAkzhSQOKgaQMCvrU4UsjAHtUdui/x\nj5u9DAiRdsZ+tOg2fPub5scU6dsNtA4xUEcQXLMD6UiSzH80QOeKikkCybQaVcYw3HFQyKVw\nQQ1HULBIUb5h+NSW+1lBxxSeUNoO3jHOKZu2+wHahjLjKGU4PIHeoNzfdzkGgSFo8jjNJJNw\ngC9OppAOxtOc8ik3GQ8DApzFdnzBvZqdD93djimA2RfL5J7UQ47CnyNvU8VAxIj4PNAySV1A\nw3OelZkd5m6MbDkVcuDuUcc1BHAqyZI5PegCxMyqq8cGqiyPuYBflzVm6UsoI4FRNlGGOc0h\nWEVjtOPyprbuCOeelO2MvJGKeq5GSKoBsjkr0245pLFtzkknPpSNjnPSpbWNeWP3qBkVzIX6\n9jRCrMhYn2FEkW1zzwelKAVjNIBpUqcfw0xmbkEcVYjkVveotrZPGaQEasN33cj1oY4/Gp9q\nJHlhxUbRhyBnin5iGhgPu9KduXrj604osbbdtMJCk4ApgIeWJHSoyM5JGRShirYbgGpVjBzg\n8UhIrrGrLjt1pFYKTnpUrALntTVCsDxxSKBsdSOKZtH3sVJt2j1FNLAY70EsUNuj4GaRU2nl\netP4Vc5wKSOTzOSMe1UNCQ/K2P4aGzzt5GaVvlwB61HNMYW+tIQMzIRmlb5mBxik3eYuR1o8\nw/dJ5oAbtP4U54yiswORQv3sd6ecsD+tAEUfzY5wTTvmjLMw4pJF2sMCkw7LyePSmUPj+YHJ\nwaYWDLtI+bNOx8vI6UCMsc5A+lBLGso359KeqjsBUZkAUqR81OVd64oGJtAc80m4N8tP4Xnt\niolXndkj2oAesYC5IzTFRsEgj6U7JHABpNv74ZGaBiL6HgVJGCPfmmyMPmBHSpNhaFNvWpAb\nsG4sDijy/lyeTTfmzypxSru/i4FAEbffC8YokUM/TpSzKWkzjj2py5YdKAGthmHtTWkbtzzT\nlAzSrjkDg0DQrsuzheajjBJyTUmAeKbtCg5NIQrMWbOMVG4HVjzU0e1T61FJj0pgQOrcMORU\nqtuPJx7Ui7h2zTmj+YEjmgBu0rwfwp4JUdacJDuOR7U2QlWHOQaLAGB1zmmtmTGalaP92SKZ\n79qAADy+D0py4IJJp0i7o846UxV8xeOopAN2jzMEZ96ezBmxjpSFfkzjmlCjsKAEODkYyKYz\nBR0wKnVOCKjdRtwRkUCDomc5FJkMoxScqMYp6kbelMYjKMcZxTEjByOtPC8YB496RVKtz0oA\nTcUbnpStjHNG7gkikLFfemBE0eDg8ChV/dYzzS/PJ7CnL93p81HQd7kEb7Sc9akHzADPNO2r\ng5p+wKQc5GKQCKTu2jg+9P8A9Zxj8aOD25oU9VztosFhnyqDgZqGNSWNWFVd23dkmk2qucZB\nosIYp+XrTfM7sMH2p3AXgc0SL8y5FDAazYHIyaVU3R7jTlTrzyac2VQAjIp9QKkifLzmnKvQ\n9qlKmQbsU1cnjHFAELNhjz3p6tnHpStEGPTmk2lTyKQCsitnFIudvPUU9l3RkgYP61H5LFup\nJpDE3Bcgd6dtzxnBxS7cjB6inSLnkHmmIgYbSD1FOVVPLc/SnNgfKKaqlvancYckYx9Kk5OB\n2qPLI3NSK+4YA70hjGjBkx3qNSVYjtU/GSWzuo27l4OBQMbIPkBFN5654pzYjGO1A+704pok\na2GHTmiRNvfIxUmwNTJIyMEdD1oYiJRkbhTetTfcXHemn7vTBpW1AbHGV6nIqRemQaY6lfep\nIWVVzjLd6AIm+YnNMkUBenNTspJBA4pjKc8jimMjTdkYNS4O8hhmgbdy4HWnZzk9DQIi+72p\nen3jkU7buOSaSSPdg0wI8eY3AwKeIwtKeMe1Lu2ru70AQtGWyRxT1UhTn8al80tjIxSMNw44\noAgbHXOKbkFalZMU1odynnB9KAIGU9e1Obcvb8qesZxj0qZYyy+9AFfcVXFAGVqVVXcd3WnL\nhVJ7UARiPavIqPO1qnZQFHzUx4wWBHpQAw5bBFNyynpmpUX60kkRPO7AoAbz1HNNXdGScU6P\nK5GMjNL1YelADduce9NKlWHtUqLu68UNgNgc0EjYyHZjnBApdu0jHPrTF+VyR3p+4/ePSgLD\neBnPApNoZc5p7KZMEHilWP1psoYgG3HpSMo9Ke1KqtjPSjqAzb3xinKAy+9PdRtHFAbaARQB\nC0bd6Xy9wyelSuzZ+cU36nimwGMwVfSiM7uetOaMPnihVYLtHamKwqn1FCgtTUJxyKcvB4pX\nAawP3R1o3DjjFSbDknoKYykqSKWox3HWmMp6kZpVUjgmnsCyjaaZLIPcjmnF8896kkUMAM4P\nrTFIxjHPrS1AZuC5ZvyqQEsuQvFI0ecAdaVWKsc/lQIOGOegpVXapJo254HFJhgcVQxfkCg5\nobLLkdKZj0H1qX5uOeBQA1Y93BpGSpCwLZoXpyKBDVwOOlNb/ZHFPMasvX5qVoxjI6UhkO07\nsZpTkYB5pcHHvSMhHPWgYN0J70ijPfNOVc8Z4pqod2R0pAxduPcUKp4I+7705iSMDrSSArgd\nBQITvz0psy7myDxUjDDDnIpGH7snHOaaHYhdfLIPapFdZGNHlhv4qFi2bgB1piHsMgDrSv0z\nnA9KSNWjUE9cU7AZeeDmkBG6kLnGTTkkPl+/pT2bnK9KMYXPekBCVJU+9N3EMMjIqYcA560b\nd3tVIRCVLEknAp0eR97pQykrgnmnqvT0oGI3yqOM+tLtDD5etLt3A0qpsOc0CAr8uBximnA6\nc09cdTyaRVByR0oAbgLzijqBkc085weOKQZXmgBv3eMc0pykec80oz949aerArjGTQA3JZR6\nVG42sCBUo+VSBTWzgZoAV2AwD3pM9TmkPY+lSqNwzwR6U9wGcKuWXNOjYFcZxmjd2IJFBxuy\nVwO1SAMoWQDvQ2Wbng03jdyae/yrkHNAxI0Jbk8U4qRupY13AelPY47ZpgRbtqjPapANx5HF\nIwLR46UgLNg46UEgzFc7RS8rHk4zS53cY4okAHvQUCZmUY5NSLGY2OabGQvKkVIDv5zk0hCH\n5V4NHPKsPyobG4YHAqRQW78UwCMggZH1p+PnypoK+XnBzQuOuaQAW2jJFMOPMUAc9aeuWYjG\nRQw25bHQUwFZlUnPSljjKrntTVYbASM5p67umeKQhNg3A/jUir82G4o9hSN8xzimMRRtDKRw\nad5e5eGwRQqlucYFO460DIWZlk9aeucAmnIRuOaOG46UCHN8q5HGaZt+UEU9RuYhuD2pysFO\nCM0AMjbaNp5yc0/5fmyO3FI2A3pSp8/sKAE/iGDgYpdgZsk4pVXkk0n8JOKQB5atkD60wsyk\nEnipY8bMng0xoywzn8BSAl3DgH65qPHXnNPUb1CkVIFCkjH0pgRbSIxjrSqy8DPNSMo+83Aq\nNo0X7oyPWgfQf5gVuBkUcHkCo8EZwc1I3y4APagRGVO4tnipPl3gkcUx2O3gZNKG3RjjmkPo\nKV4PpmlSTa2T92kX0pGbHylTVCHyYzkDikX5uB1pI+AB1NNK7mL8igQ9Mo2SOaRvmo3sSSTx\nSI3GAc0DE2jdgZxT8gcZyRTfM6gClUL170AO+VOvelYKq8Hk1C0R3Z3cU9VAXH3vegkVmPk4\nHrSLhWJxjIo5/h5p8hDfKfvUihFAP3hSMCsmegpu7J29KdIpZfegYMvOc4prR+Z9AKUEbgD6\nUm7bkUtQG7WA6ZoU09pAuBmhcNmmIiEYJ+Y4FSKoYcU1lycMcntSmRV24BC0vQAU7WINDSDb\n70mcksvT3pYyoYlh2pNgOVQq4PNG47h6U1ctwDzT0UN9aEMawbBJ6U1c7RnhakYncc8imKWk\n/h4qhBvHmDjilVginjHNJztyRhqTY3VulSAxU+bcT1p5XODTf4Tip42DR5xgUwIy5UbeMUvE\ng2kVFIBuJ6ZpVZtvFDAkSFWUleop7YKg4qJ8hcKdv0oBbbg9RTHYc2cnuPSodu5gQcGpGQ9m\nxmodjLIBjPP3qOgEm7OW4PaosDcOKesZ3NzgUu0r1Gakkr7gAVHXNSR9cN096heMxsT13HpU\nqxkqFJ5oGBUqzZIKCh2G3cAakki2r0zxzTF3eWM9KBg69GGScUbh5eQcGlDMny9fem7M9/rQ\nIVpdy4x0pkeWbrgDqfSnRgMT6U/cvkttXBosMjjJmctngetSHngrTVjaPl+C3NOLdeeKfUCM\nRsJBgZqYyLye4ojYp834UxNqq+/ls5oCwi5b+GnbduKQNuYcYpdpZhluOtIBGQhMls80IwDb\nx0xS8MxHamKCq7eetAh2FbPOG70qsuenFPKiTBAx6mmNGFO7OR2pagK6jbk5NIMthhxUjMOB\nikdNvAagZXGTnZwBUqqXjJ7iiPC8Uq/Kxx0PaqEJFKF+/wBO1WfOBHzLgVWSNpGA6iplhbbl\n/XpTAYI90gqZVC/e4NMddjA9RTmJPUZpDHeZt96kkYKu4KDxUDfMcioysk2FX8aAsTQzeYP3\ngINS+WJFx1qDaysCOg61OpIyymi4gZfu+o60skgZRt+mKdGCykAkNSouxckZpDGI54BXLChs\nSNuI5p7R/Pu6ZFI23bwOlAWFVc4AGeeaevO4Y2mnLIjfMvBxSKSTlvzoGR58whT1qRYzHgAb\njRJ8sYbHPtT4/mYbuB60DAo20jIzT12lMkZ45pJFG7g8dabIw2lelJgUY53jkYH14+lXreTz\nFwxyfT0qLy1ZckYaniMKARwaQExkUrgcUrKBGMH3pqHcxUrkYoUhePSn0Aco6HPFOZRzzg1H\nu+bodo5Jp7gtggZFIBxQ8DGR603ovTn1pWzygNN37OOtABI23B65p23PHtRwMcZokbaOKLAH\nU/NSZKLu25pZTuwo5OM0gJkQDOKLAKCrqN3ytRIpVuDSFQ5FNYZPA56ZpDJVIwWPJxSxsF7d\netD7VRcnPFMbDYPamA5cNuI4OaVVLZz+dAUL06d6ar5YgH3xSACw6ZppJ9cCkPzYwtMVPMbB\nPegRLIwjUFVz7ULN5xJIxQI/MIY8Yo2hc46UJDDd5ag9T3p+dxb3Hak2hWB69qaqkPjPI5pA\nAU54BP1oZiD0yKfvxkmk3Ky4FJCGLsVvnODSM3B2mhoY9yg5p6xorEj7tUMjdiu1sZFKsise\nOtKZAAMDgnGKY0IwcNz60kKw/aHPJo5K4BqOImNdzfQVNGgeM9mqgHR/MvTinbQozSKDtI6Y\npVYHg0XHqEff1xml3eYq5HIpsbdR0NPZuAOlSAMqsMNxSLiNcDml2kNycinRt5gPHIpjBfuj\ntTtpY1G25vmqQsNo5zVAR5LSbMYH6VIRhcYxQpC4IOaNx8z5uQaBjRz2qTd8w44pCvocUxVI\nHNFxEoXc3zLj0pOFB4zSEtwR0xSrimMRVJ5A4oZSwBx0py7txx92nLjBY/hSAZztHfnmpFUM\nvJwKTcNuQKRWDcsMYqQF8tPeim+atFAEWG4AHyiiPluwHWneYfL6cVVWZWkKjJ96skuK53Hp\nUDK3LZBC807cV7U7jGOg70hBA52bjyG5waRSDkAZ57U0rkjB4zSjCk88UmMkABb7o6dKjK9e\neOmKQMFbIpGkAX5T16mqEM2rH8o5z3pW+UjHApI3Xf8AN0p0kqs3T5aADcu31Jpm0Mpw35VL\nDt3MRTAu1icfgKAA7eGx1pHYrH9aeZFUbcUm1NpDdaYDGYqoBORTdwOdvJqR1G31FMXH8Iwa\nQC/LGvPNIF2McnK4yKVQDgY702SJkbaOTnvQAsjbcEDqKch29c0q5PUc+lNk7se3GKAHN84I\nzioljKoWPapG27iDSou+M88UmMrqwZjxx61IVZ1G04OaSNcDHXB5p275jt6CgQ8Asg+uKdjc\nh9BSfc+Ung807A+6DwetMCBV3MAOOadJnkHk9KcxSNgSp/CmuwVuctz2oAGb93t6GkRSFPOM\n1IxVlyBmo3kOB8v5UAPLbVUsc9sUi4EZGOSaGUbd35Uze4bgUAPc7OT0qGTY3KrjFSSTFl5T\nFMd1XHoaQCW5JXngmnRyDzipGOOtLxIvHFDDGCRkHimA9R+IprKAOlOztTA61GzYZW6CkIa0\nnltjGfamRyFiVbnnp6UrEsPbvUTKUyV5pjLO44+78tOB/dYHXvUUW3aCQQaVp9v0pABJj5PI\nqQsrMuT2qFtsnIPHpTwiqoPekTqMbIG3tmgtjjtT8gt0prMN/PWqAZu2qSfwpwVnUeh64pGX\ncOfWnMWXheM0DQ3cyuFJ4FQhnaRyQAc1Y2szAcY60MqbjQwI0Yc5HNOJ8z5TwtNOwN1x6Uis\negOaSGLIpI5HHtTPJ3Mp4+lTR/PxnmhlDAle3egBEXDEDoKjuolzk4qSNmHGOc80kqLj5uDm\nmBFChXr68UbvYjmlDeWRghqe8g8s7RkUgFVvMbDDj1pytsUg8DtUUeBgt0IpZMZ5OfakIJTu\nYbeajZvMOB1oDBVYjhj2qNNx7Z9TTQhHyuFOetKjdz2omkCjnmkXHl4xzTGh6TIzYP40NiJf\nlORmodoViT1pwbcuNtADZY9x37uKRHDYAJqVW2rjZkVFIoDZU4FBJJ5GWJPSoyQpPO2rHmJs\nC9GqvIDtYAZ96ChUUMwOdwpc9eOBxUW4xogI57VIrAtgtxQAsLbT047ipJlC/MvAPao5VCkF\netRzTHoP1pB1GZZww7D1oh6YJGadtZVweM0kYxuOAakZLMxJzxjFR/KcEHJp/wDyzPOPaoCw\n2ccGmTYlmyzKQM+1DHHB4FEEgXl+PSjl8sBu9qAImKyZ45pzLtj4GaVcbjx+VIGIkwBlfekU\nNbJT5aiUbTzVgsIzyOKYpCtyuWPTNUhWGiT27Y5pittHpT5D+7I96YflIpgPU7iDTbiPo56U\nq7cZJxSsw8sgnJpCIFnDZZcDFS7RIucYao47cDnGRUoYL0HNAEQ3KSW7VMwwOvWjaGX5utKO\nwPIpgQyMDjHSnrt3DHWnyYwWUYFR8j5jjNIeo5jycULiOPI5NNGWbOOKeNuwjHNAEXDfMeKF\nY8YHfmn8bcY5phB4OOOlMY7c2CcYpMFh6GnMpwD07UFNpAZsmgCNGO5fTNTNGGb0OaY6HHTp\nSqSr7s5z2oAR8c8VLGy7QR19qjKhj6ZqVdsQOBk4pARyMeRmo+WxmhmHQ0RhlyuetAD2bIIA\n4qPnAx0pxwPlPNOGVXGflpARM23APWgewyO9K6BWAznimj5QeOKLAiTco4A59abtHJNG0qOv\n0pC20ZagbBfvE4wKXhlLCnbCVznn0oWP5eKBDN3GabGzdSPzpyMQ3IpzfKrYGc0gI2Ddc5ya\nOvWkDDaDUir5gDdKYxVJX3FD4UjPShv9YADxSMBgk0CCRSpHzcUqkKfTNIOnv701c556UgHS\nNj3FLHJ+75HJpkmQ2AKUKO9ADgx29OaG4X270uwEnacU3n7pGaYCJhQCT1p7Fdueo9qJIxtG\n4/SmspjwvrQALt+9jApGypPensV246jrTPMABzQBGWHQflTmwy8daNo4OMUjgsOKAGc9BT+V\n7ZNLt+bPTikVSGbJoAbBnDKetSlcgACgKFOehxSr7GgYRr1z1FMVd2TU6MF5YVAWHOO5oANo\n3gqMHFM8xmbhcfWp9ucdjTJIyrnPU0CGHnqMUkhbzAFqX5WXGOfWmrGfWkBHt7s3IpX+ZRls\nCpNoGRjPqaibZ53I6UxMf7dvWmSRleQ3FSbgF5zTOGGKBjVG1qAm5ie9PZeOORSZPcYpAM3Y\nOf4aUPt+nrUhjHXtUUikDpxTAeqKF5OTTPu5GAR605VG0Hue9P8ALXHHNMdiHyx1HJpRHu74\np+09VXIpsjEcYwaQDdoPXnFR8Djp34qROV96SRQ3IODRcQqrub1ph4Y7TkVJGpX3pyoDkjAx\n1oGRSKGXPSk2ngk8VMYw2ec0hQtgelIGQhTuJzxTsbuSeKk27V6c0wKW6naKYDGJU4x+NIvP\nJ6U5trE54qfYrIKY7FZU/KneWVbmpV2q2TzTJPmXIPU0EhHy3WkaNmYjtRzGwJOMdadvVsHH\nNADGi7DtTfLxnvmpixbrwKZt6nPSgEN2jkZ+tRnO4AVNtXYaav8AexSGIu3qRTVUtkCkbIBx\nTkYccYNMQxlO7J6VJGAVOe9KVDE5IpMjJoAFXIIpuN0mCce9O2bec8UdcHrTYEOzafenqDt5\nOKXaWJY0xhlfmNCAY+Gk4p0a/KAfWpFxtBwABRnd2zTAZsQqcnntSJ+dO27SxYDmmfd7YpAC\no248/LS4WReSQacMUxnUZx1oAd5Z7ce+aZs7A5qQ4Zd2Kj+XaSODQAnO7pxS+WeT0Bp69V9K\nG9jmmxEIj+apGUbcdqfw2cDmk6qAakZHGoVsE5FSNH6HIpzrnBximbTjggD0pgL8u3nrUe4t\nkDg07aVPrTt23otADCw7c+uaXjqDUiru6jFRJGVzxQAOxbBxUbZ2k1IylkwD3pGX5cCqAjjY\nsODyKmDblHY1GqhccGh/9YFCkUwJHB3cikb5RnFSk7lyDUfPAPNSAisep5FKSOMU9grYpgUL\n1GDSEJ95unFOOF+UdfWjnqDShenGPegQigqoyc0zaQ2e1WDGJDn8zUTfKp45pgR8tyDilVep\nHUUvPcUuSF6c0XAbt3MD604oNpINJ90c5NLx5YI60wCP5lx0pFJXg85oVT3p6/MOmKGJjG47\nYpN3yn1qbnGM0wcsQcUmOwxsnkDFKxAxzx7UvYgDNNwDjj60AOYjbk00ZK7hUmBtzjNRqDux\n2oGJGoHJOaXnadopVx5nTjvUmR1H5UwIlUht2adIzNj5eDSKQCcDjvTm+ZeDQSMI6A0pXPOa\nGyF96Ow70FCqN3AFKw6DPNKu6MbiODS7AzB8/WgQxge9JuX0qTO4k9RTEUN2qRAVx0HFJtOB\n6VIXxwOaM7W+tMZFIDnd+AojBKnP4U/aB7gUoTndnI9KBjMjuMmkRsxtxzmnNG2Se1NVtq5/\nOgAJ24yeKXdu7U5dvXr3p64fkcCkSRbjnpgU5eY81I7D7uKarYjxjNMYi56Zo9iKOB7Uv+sX\nAPINMBu0sfQUqKVJPWn9I8EUKQvJ+6aVwEVdy4PB9qb5eBhjT2wmMdaSRgx6cUwGABWx1FOZ\nQORxRuHpxRIpZRtNAhI8mTB6HvT3x0zml+ZVwMY700HnjmlYYgjBye1P4CYIy1N2dD2p/Q7s\ncUrgJzt4OPanKwkXBODRjdlqTcI1zjmqsA5lweORinLlVBNLu3w8HnvTC26PgcigQ6Nhk5GA\nP1psjiRsAc052DKMilhxuOKYECxFGxjirMce1hxxT927oP0p0eWJBpAMaLrzk9hSx/cyRilY\njdSycjgcUAOdQy5BpirjrSqw296OckY4NADgw+nFIrbs8cEURqeQTxTtg8v5TzTAiDN34A4q\nV4zGFyetNC7WBfmpC3mY7CgBVZQOak24UH1prLvUDPFDErkdBSAPurjPFG3C00N8vIp7cr6U\nhgPuZo4bnGDSfNsp4/1eOM0wYN90k9KbERsGfvVJ91gDyMZpisZJDnpTEPxuycDNM6HFC5Vv\nrSr9454oAdE2GwTkUpI5I4NIFHHGPemls8r0zipKB1+THJpR8uO9O3Hdg9KcuCMYosIVc/QU\ncnnOabypwTxQvy8gcUCJWxJhe1Mz5ZIxkUKSVOeDSrwCepoGCNtbjAalkbpuHPtTFXPPcmnl\nGXnOaBA3rjikYhW6cY7UeYyryM03dxQPoIp+Uk59qBIZOT1p2R0601PlYjHWmTqOBK8qNxp2\n0tyR1pIwYzxSyMdvWmMbtxnd0pRGvK9DTtw5DHtTFXvnP1oAQRnB5pFz07VIy++RRsIXIHFI\nBNgYgZ4603hdwA4zTlAFLu8tSSMigBI2I+Yc47UmRnJHOaci8nB96a/zZxSAfhfMUt09qRss\nzc4qNZlDLkU6VRyc89aQwj2ry3Wl4kbPFM8vu33aAvy4B5pgNEO5juPNNIxJtXmpfMVFO4c0\n1WG3I4ancQmd2WHXvmkXDjIbAFP+VQc96UKOFAAFFwI5pPLXodvqKZHKJWAAx9atiLaGIOPS\nqywrE2RgA1ID0+U88kmpGyjZGMUxfQ8GlZtykY6d6AGiQtk45zT93lkEnC+lR8x4J6VI+2RC\nD1pjGyN5ilxUa3G5RtPPpT0j2g56AU1YwCCMbsUAN6cmpjkQ4zTFwrAEZyaR87iKQhwiHG45\noLBfYdqarbfmPTpTdgDA5J9qAHhN3Qk09lPmDFIrdidoppB5G7mgBG+XIPWm/e5HGKXcTy3N\nPd1PRdtMCNlTKkg5zktQWG8kfnTkzyTUXc+tNoQ58bTn8D3pj5DIORnvSiNiwPUVI0LMOTwK\nkY1d7Ly2eaDjOKIWKqSBSthl9DQMi2MGGTSqu3cxNKp8znsKZI25toFAMkhx5ZNOb7oIpi4X\nANOmO1Bs5yadxC7gcF+cHNMQbpH44Y0rL5mMEZp3ClcikUIynzBn7tRuqtIQTzUjvsyNuSab\n5fmYI4bvQAsajrjFK2McDLU3a0eRuzUy/KwYnqKQhu0LyPTpUUjFeV49al2lWwT1puMxlsd6\naCw5W3xhc4Hr3pY1XgE9DSKo2+4GaRcqOOc9abGDYXPBbmnIxVsHk9s0bju69qbGcrhhuosF\nhGbaxyBvpd5WPJHXpSRqNx4wKfOpKxhSDhs0WECjarnv2qWNsRgk/MahiLfdYfjUhA3+wFAg\nHzZApy5XrzQsiEHg59qazHO1lOfWpuMNu5Tx170kbeWuMU8FlXJ5XvUmBIm3GB1oKI2YLESe\nW/u0kbNtz2PalZdqhSKWJBG2SevQUxE21uDz0p8UgLFWpN21Tu6AVFzIRgEe9OwyVz5a5c5H\npToslsYwMd6JMbcdcc0NmWPdnvQAqqY/l6803+8oOCKnjKFecj6VG5V2yF9ulIAWTd24p7YK\nkdKaEycEYpGUiQlfpQA+QgKoHfvSSANjjmnMpyM9KQMOc8UmALjuOaXgqCaOB16VIsYxupgL\nGdi5Az9afuXrtzmo1kPJP3aI88849KTAcy7lwv1zTUJHzKeaXa0Y5OFpFx1BpAOBAYZ4PenM\noJwePeo5AW+tClm69KAF54xTF3eZk1Ivytto4LDHSixQIDyTgU04VQAOTT22rknpR8vWmSJQ\nG2kAgc0K3O4CkY8ZbikwHbQ3FIuFHJpVkG3A5p24NGV24NSMR8k/LUartYkjBNSYCgEdaRlL\nc9TTEBYbuv4UHDKSOGpqxgMGPWpOFBJHWgsRiVUZ5ppYYHpnml3hvahQHH40xCLIWlIA+Wn8\nAj1pdqjpxSOwOML+NILDHG6Qnp7UowOlHl5f72PrSgpu4OaAEXvuH0pysB1XilVS2aNm3lul\nADNy7sgfhS7V2kdRR5a7sg8UvG0cYNAwRQFAI3Cn7RztNJHwCWBz2pD2xzmgQde+KTHTtTW+\nXluB60qfM4waAHoVOfWn7d3FRK22Rt3rxUu7PtTQ0J3yOfal42GopCd2R+lOUN1A5NISHbcK\nOMCnGTCkYzTWY9GzxSZG4c8GmBIie+BTlxk5zSKodsCkYmNipHXigB+QACKbu5oVfU4FOjUZ\nJ7e9FgYu4Lk9qafu7sZpyr78elNZirYXimCHx/MvXFMVj5mMnbSENtweDSxsQMYzijoMf/Fx\n0pNwbjtSs20HjJpu3eoIOKkA8lfSipPLPqKKAIWJCg9KbtVVLFcmnyf6s/Wkj/hqiRnLMrEY\nHpUbP85B4FTyf1qpJ/rRTAm3BWyR9KYu5lYkfL1NF12py/c/4CaAE3Ky4Apwj+XGetRR9amb\n7tIRF5ZU8DIp3lnkEY4qRelI3amMib92644JGKcrfOSxz7Uk3+uSj+JqAGN97gd+tSOojUbm\ny1B+4PrS3HUUCGxtvyO1Ky+XgjoadD/HRJ/qhSuA2IhZDu+tQec7SkEcZ61Ov3agoQyZZCNo\npjBd3r3pE+9TV+9TAe77ueMk1J8pjwOneq7fw/Wp4f8AVtSEN2lVODwelLHGUYEfMO4pG+6K\ndbdfwpgDLu596SP5s8YIpR2+tIn+salsA5u27vTFUsxx2qWT+Cmx/eP1p7gxitt+XpS/d+8c\nU1/9dTpvurTYIJF3YIOQKF+Zfehfu0yPvSGSNGGfOcCo5FCr0zTlol+5TEN2qi5z1pdoVQO/\nWo3+4tSN0X6UgF2nb97nrzUePMYndkelSrVeL7xoAekLfM5OFx0pQB904FSN/qz9Kif75pdQ\nFWXg/LkdqaF8xTuXHpRH91fxqb/lmaGBCwTaM8HtinphYyrAluuaZ/yz/KpPX6UIkhe4VFB2\n9Tinx46tUM3+rX/eqT+IUdRDnbdnIqNdz5BpR91qWH7ppjQnlYwd3zUKrjgniiTv9ak/wpDu\nQSRr5nAx70DEYHNFx/DUU33l+lHQZIuN2RxUkjfu8L1qFadJ900riJI5OB83z+lMnPyEHv3q\nOPtU1z/qTQMhjxtHc0zy2gBY8qadb/cFTTf6mmBDu3EHHy09plK8DkdTUcf+rNIn+rel1AVh\n8wJPBGaWNRt4Pej+EfSo4+gpkj5lDNk9BTFx1NSP3qP+A0xoI4+C3Wnx9z3pkX3WpYfvmpYD\nGmMaFRzupEXcoU+vWk/janQ/64fUUCQxsJLlhxUm5SvyjOaLr/WN9abD3oBjmT5eR0qJV288\nHNW2+6fpVT+I0FE8gEaZ9qr7ScA9akuPu0neqF1E3bhtJo27Vz096if7tSH/AFQ+lAxTIu3B\nGfeo5mIXgcUrf6mmyfcSpAhjXd949asKyxx4VjSUn8NIBsfyfLU+0jsMetJH1NSL900ElYhn\nU9KTPzDd9Bin9zR6fWmPoNb5M5GTUI+6S3HpU1194VC3emIcnOMjFNMmJOTkVI33V+lQnqaY\nyXzwcYPFL97JWok+9Usf3akTHRocBmxinH7vHFIf9XR/DQMbknIJ4puc44pzdTTY+i0MYbtq\n7emaFUMDg5pZ/vCktfuv9aaACpXGRTww4TGB2NH/AC0FJJ/rDTEOkZdu0jPvUW3cPent90Ux\nfvGgY/bk80wKd+OoqV+1Nj70wHOy9xTIzuy5PXinfwtTD92p6gIyBm3Z56UxVMbZOSactOk6\nGmAw4wSTk9qVoyy5ByPSmVLF900rEsYyqy0nHcYpO5qVvur9KY0R7hx83OeKRmG7nkUn8S/W\nmzfdb61PUZLHluaU5XGDSWv+r/CpF6LQBFGRyeh9KXjnjrSN/rjTj94UgIjCUwByD1p0f7ss\nBzUh7fWmR/feqQwKjeCOppfUHnFN/iWnL/H9aQhM+WBnnmlaQHkLimzfcX60fw/jQA8jIz0p\nq43e1SN/qxUK/wAVPqA9sL82aRW43etNb7lC/wCrH1pgSfeyCOaGPy4PUUD7xprd6lgJn5eB\nmmLGW56VLD/qzTl+6KQEKqQMMM/SlVdre1Tfx1G38VAAq72GaWRRuBpkf3jT5O1AxmO5HJoX\nhs4p8naj+GgBMtt4Iz6UwfNkgZIpW/1gp0P32+tNCGhyckjmntmRckdKO34071oSAjBC0gbc\nwAGBTl/i+lJD/WgAHG4ZphjJ5xz61Ie/1p/pVAQyZVR3oZ90fIwPanetJJ9ypATlcUbSzAnp\nTv4RSr0NIBoXcx9KTHzYxkYp0f3mo/iP0oAZuAXpxSqQWO3gAc0ifdNMX7x+lMCbaRGOeT2p\njoPpUv8AGv0pk33hVAQiMbs80ixnuOKm/hpTSGQKnJ3crS7VbGODT/4aj/iph1HbdxAzSrnz\nMU6PvSr/AKwVLBhIPSodmcnOasfwmoE6fjTQhPLGD644pw6DJ7Ujf64/SlbrTAZtU8nilYju\nAB2pH+4v1om7UARZ4Oeeal2qyhsbRVb+Kpm/49/xFJAOZPXim7d4OWzUk/3fwpkf3aYEZ/dj\nA5o9DnApzdab/CPrS6gG0MCc03aV5HNOi6H608/dakwIlG49PmqQADntTl/1g+lI3+rP1poA\nbAGOvpUTZOOwqX/loPpTP4TTAGU4x1oVhtINLUUP8f0pASNGBjnOaaE25pV++lOX7v4mmT1E\nbbt5HNNddyjjFIe1Pf7goKGbMU1lU8AfjU//ACzNRR96T3Abj5uDxTvJ7gimL95qk7U0A0rx\nx1FN24Ix1qRfvH6Ukf36bAANvAGPWgL82O1OpE+9SAFk6qwpq45AqSTqahj/AIqAJfvDIGBT\nWk6YHNPH/HuajXoKBj+eucimsC3Q4pydGoPb6UEoibK88GlZh0FM7NS00ITHfOamVfMXk81F\n/EKlpMfQjXO4/LxSgls8Yp9NbrQwG5/hp23HLNxRT5P9XSQIFUbcjrTGY9CKev8AD9Ka33mq\nughFfapXtmnMyqmc5NRN0NB6UhoUsd22l2n6/SmH7zfSpIelAwA3ZzSIB6HFK3Q1JF9yqAj9\neOKYSzNjpU6/caov46GSLzz601k75zT/AOKm/wALVIXCPO04HHvQVPSpF/1dL/EKoBjKVjqP\n8Kkm+6frSf3aQhAgHOad5Z4PakH3jUjdvrTAhwVJAHGacw5GBUn8D/Wmr0FMdhrIc5Y1HGPa\nppqQ9qQDi6sm2mL83G3rTf4qsxd6bAg5X5MUhG3gVO3Wq7feNT1Aft+XpQwIzzzSn7q01/vC\nge4LuaPkDNLwvA4py9qZJ94UMQ9clemaiZWPHSrK/dFQt3+tMaDbuxxxSmEK64OBUi/dqH+L\n8aBDmxyRyaZll7DmnL3+tPal1AYE+Xnmm884HNSDvRD95qoQwsXBXGKTbwFxk04d6cKkpEfP\nJxS7fft0p47/AI03/lsKoTFVRtGQac21cbacai/j/GgRIFKn5hmmtjb8p5qRuh+lRJ1pdRjl\nA6daAx2ncMUsX3xSz9TTEIDu77RTdqyN1OKG/hpq0D6D416kHAqaJfkPcVXj71Zt6QhjLuxz\nxnpUq4TpwM1Gv+sapJPup9aLjDzOuR144p+SvA61H/F+NTSfeouA1sFcAc0ucLihPvUsn8P1\npIRGckD1p652kbqRv60kfeqAcinBBbBoVW34PIpf46ev8VIBHVMfN17VKrJtxt59ahm7VL/y\nz/CgBGjIXrT2AC4JyaF+6KbL96gBGG5fl601gxxzz6VJD9401v8AWGkA5FO3JpzBfoaX+Cm/\nxUwFGd3TnpTZMx4AP1qRPvGo2+8aYChWbnPFPbAXpk0J9ykX/WLUsY/7rcUrc9BSP1pf4DSQ\nxNu7DZ4p2PmAAxSQ/d/Gppf4aYiENhzgZFSeduGMVF61Iv8ASkCQLgnJ6U4YGdvOaZ/DSJ96\nmA7heRS7jtAPFDdKU/dFAhsmGIApgIycc4p5+8Kjj+8/1oF1HL8i5IFOYhe2c9Ka/wDqxTj2\n+lUMRc7RnpQ5G3PpT2+5UTf6s/hQA7YWwegofb5eAMGn+n0phpDAkeWvODS7zHwWz6imfwL9\naH+9+NMB65Zt3T2pUIYHIwp7Uh7fSkb/AFZoEOUFfpTdyZbI4p38AqB/un6ipAcsWQOAKe0P\npzUifdpvrQPoRtJ8wTvSbecDk96F/wBaalT75piIWjzgnk0ip3PFPpD2qQGNkd+KVXJ/hxST\nfdpYe9ADlbgjOSKaeTkDNLH/AKxqen3jTsAmd3FOxlccUi/fNIfuvQACRWbk8UNHtGQck1XT\n7341Z9KBjGDbsHpTdvzZNTN96oJvvUCHtjIOe9L9+Q4xTG+8v0pU++fpSAdIoZeOFzTmVdo/\nvdqa3+pWkb/WJTAczBVG4dKZI3mOHPHoKW4+8aST+CgADZIPB9qccyyHAxiooP8AXCpG++aQ\nEfzyHsAKBHj5yePSnx/cNN/hFNjHpjBOcCo1VmzzxQ33aev3TSEN524FM25jyetSJ980fwn6\n0ARY2jI49qcjhjwOcUh+8aE/i+lABhep60gznI6UhqSP/VtQMbGp3HAzT9vBz0p1rSn7rfWm\nMZt+Zd3NSbhG2QoximfwfjSzfdoYhu4Bstyac0gkU8YHaov46d/yzFSNDip2qSc0hjLLjPek\nH3V+tSN978ad+ghkUhj3Z78U/n0waj9frU38YpgRNyCWH5UK21SCMU9vvCibtQNESsOc5yem\naPfPIFDf6wUL99qQwywC4NSrIv3c80knVKhX/WH60E9S6gVBjvUbbgwLHg9Ka1P/AOWKfU0D\n6jmfbgYyKe0hZQoGKjTv9aVv9Yv1pMYMwYLu6UxnEcg3Ln0pW/gpZv4aOoEilpBhlwfWnRoy\ntyeBRH/F+FObpVdBdQVT2PBoHyyEY4xTo+1Dfx/SkMfEysAR0FSqwbkDmqlt/qz9atR/dNAD\nGJU5JoX5fcGiToadD2+tSAsmAoy3FM3f3Rk0+4+7+NRt/DTHYc6h4wT+Ip8bBmCrwKi/5ZtR\nD94UxFllHOO3Wmxv5gJXjFN/hemWv3T9aAJlcn73zURkOpyuMGlj+6aaOjfWgB69OetIyttw\nDketC/eH0p0P+pP1qR2GeZg+9Lt3NnG00z+JalpiGhflOTmhl+XGab/FSfxGhDFVW2dcj2pZ\nG/d46mnR/cprVIAka8dj3py88YI96Z/Eanj/ANW1AhjevbpRu2tjPNOX/V/jUMn+uoGSH1xn\n3pFYscdu9Sfw1C3+regY/wCXpSqm3tUcf3BUrfeFAxevtSg/ISKY3UUP92gkBt2lmGfSlTbG\noOOSeaVv9SKZ60kND923J7ZpqsO+SDQ3+ramr96qGSZVW6YFJ0bJpZO1Mk7VKAVpN3Qe1KG6\nChPu1DJ/rBVE9R8jb+HztFOUg/dwPSnf8sTUVIGSKTsOcE+tP+vAqPsalX7tHUBFhxg7sj0o\nbjoPypW7UdjQMThhRwFzikWloJJVXd8wJoxnJLZoh+5Te1BQ7cpXGOaPvDjg0N94Ui/eqrC6\njsE+xpGYM3BOKc1RL2+tBROzDzMk5AHSkMig8dKYPvn6U3+EfWmBKw56007e/FPX+Oox3pAJ\nx70UlFAH/9"}}]}
\ No newline at end of file
diff --git a/qa/common/perf_analyzer_input_data_json/int_data.json b/qa/common/perf_analyzer_input_data_json/int_data.json
new file mode 100644
index 0000000000..8921d57f6e
--- /dev/null
+++ b/qa/common/perf_analyzer_input_data_json/int_data.json
@@ -0,0 +1,23 @@
+{
+    "data" :
+        [
+
+            {
+                "INPUT0" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+            },
+            {
+                "INPUT0" : [2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+            },
+            {
+                "INPUT0" : [3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+            },
+            {
+                "INPUT0" : [4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+            }
+
+        ]
+}
diff --git a/qa/common/perf_analyzer_input_data_json/int_data_diff_shape.json b/qa/common/perf_analyzer_input_data_json/int_data_diff_shape.json
new file mode 100644
index 0000000000..53c3f1b412
--- /dev/null
+++ b/qa/common/perf_analyzer_input_data_json/int_data_diff_shape.json
@@ -0,0 +1,55 @@
+{
+    "data" :
+        [
+
+            {
+                "INPUT0" :
+                {
+                    "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    "shape": [2,8,2]
+                },
+                "INPUT1" :
+                {
+                    "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    "shape": [2,8,2]
+                }
+            },
+            {
+                "INPUT0" :
+                {
+                    "content": [2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    "shape": [1,8,4]
+                },
+                "INPUT1" :
+                {
+                    "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    "shape": [1,8,4]
+                }
+            },
+            {
+                "INPUT0" :
+                {
+                    "content": [3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    "shape": [2,8,2]
+                },
+                "INPUT1" :
+                {
+                    "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    "shape": [2,8,2]
+                }
+            },
+            {
+                "INPUT0" :
+                {
+                    "content": [4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    "shape": [2,8,2]
+                },
+                "INPUT1" :
+                {
+                    "content": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                    "shape": [2,8,2]
+                }
+            }
+
+        ]
+}
diff --git a/qa/common/perf_analyzer_input_data_json/int_data_optional.json b/qa/common/perf_analyzer_input_data_json/int_data_optional.json
new file mode 100644
index 0000000000..bf07e47853
--- /dev/null
+++ b/qa/common/perf_analyzer_input_data_json/int_data_optional.json
@@ -0,0 +1,14 @@
+{
+    "data": [
+        {
+            "INPUT0": [
+                1
+            ]
+        },
+        {
+            "INPUT1": [
+                1
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/qa/common/perf_analyzer_input_data_json/non_aligned_output.json b/qa/common/perf_analyzer_input_data_json/non_aligned_output.json
new file mode 100644
index 0000000000..0c0058a241
--- /dev/null
+++ b/qa/common/perf_analyzer_input_data_json/non_aligned_output.json
@@ -0,0 +1,28 @@
+{
+    "data" :
+        [
+            {
+                "INPUT0" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+            },
+            {
+                "INPUT0" : [2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+            },
+            {
+                "INPUT0" : [3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+            },
+            {
+                "INPUT0" : [4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+            }
+        ],
+    "validation_data" :
+        [
+          {
+              "OUTPUT0" : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+              "OUTPUT1" : [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
+          }
+        ]
+}
diff --git a/qa/common/perf_analyzer_input_data_json/output.json b/qa/common/perf_analyzer_input_data_json/output.json
new file mode 100644
index 0000000000..f09aee52de
--- /dev/null
+++ b/qa/common/perf_analyzer_input_data_json/output.json
@@ -0,0 +1,40 @@
+{
+    "data" :
+        [
+            {
+                "INPUT0" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+            },
+            {
+                "INPUT0" : [2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+            },
+            {
+                "INPUT0" : [3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+            },
+            {
+                "INPUT0" : [4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+            }
+        ],
+    "validation_data" :
+        [
+          {
+              "OUTPUT0" : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+              "OUTPUT1" : [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
+          },
+          {
+              "OUTPUT0" : [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+              "OUTPUT1" : [3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
+          },
+          {
+              "OUTPUT0" : [2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+              "OUTPUT1" : [4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
+          },
+          {
+              "OUTPUT0" : [3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+              "OUTPUT1" : [5, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
+          }
+        ]
+}
diff --git a/qa/common/perf_analyzer_input_data_json/repeat_int32_data.json b/qa/common/perf_analyzer_input_data_json/repeat_int32_data.json
new file mode 100644
index 0000000000..6733168df4
--- /dev/null
+++ b/qa/common/perf_analyzer_input_data_json/repeat_int32_data.json
@@ -0,0 +1,31 @@
+{
+    "data": [
+        {
+            "IN": {
+                "content": [
+                    4,
+                    2,
+                    0,
+                    1
+                ],
+                "shape": [
+                    4
+                ]
+            },
+            "DELAY": {
+                "content": [
+                    1,
+                    2,
+                    3,
+                    4
+                ],
+                "shape": [
+                    4
+                ]
+            },
+            "WAIT": [
+                5
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/qa/common/perf_analyzer_input_data_json/seq_data.json b/qa/common/perf_analyzer_input_data_json/seq_data.json
new file mode 100644
index 0000000000..b9577d71e9
--- /dev/null
+++ b/qa/common/perf_analyzer_input_data_json/seq_data.json
@@ -0,0 +1,38 @@
+{
+    "data" :
+        [
+            [
+                {
+                    "INPUT" : ["1"]
+                },
+                {
+                    "INPUT" : ["2"]
+                },
+                {
+                    "INPUT" : ["3"]
+                },
+                {
+                    "INPUT" : ["4"]
+                }
+            ],
+            [
+                {
+                    "INPUT" : ["1"]
+                },
+                {
+                    "INPUT" : ["1"]
+                },
+                {
+                    "INPUT" : ["1"]
+                }
+            ],
+            [
+                {
+                    "INPUT" : ["1"]
+                },
+                {
+                    "INPUT" : ["1"]
+                }
+            ]
+        ]
+}
diff --git a/qa/common/perf_analyzer_input_data_json/seq_output.json b/qa/common/perf_analyzer_input_data_json/seq_output.json
new file mode 100644
index 0000000000..bbc2eea6c3
--- /dev/null
+++ b/qa/common/perf_analyzer_input_data_json/seq_output.json
@@ -0,0 +1,36 @@
+{
+    "data" :
+        [
+            [
+                {
+                    "INPUT" : ["1"]
+                },
+                {
+                    "INPUT" : ["2"]
+                },
+                {
+                    "INPUT" : ["3"]
+                },
+                {
+                    "INPUT" : ["4"]
+                }
+            ]
+        ],
+    "validation_data" :
+        [
+            [
+                {
+                    "OUTPUT" : ["2"]
+                },
+                {
+                    "OUTPUT" : ["2"]
+                },
+                {
+                    "OUTPUT" : ["3"]
+                },
+                {
+                    "OUTPUT" : ["4"]
+                }
+            ]
+        ]
+}
diff --git a/qa/common/perf_analyzer_input_data_json/seq_wrong_output.json b/qa/common/perf_analyzer_input_data_json/seq_wrong_output.json
new file mode 100644
index 0000000000..938a86bed4
--- /dev/null
+++ b/qa/common/perf_analyzer_input_data_json/seq_wrong_output.json
@@ -0,0 +1,36 @@
+{
+    "data" :
+        [
+            [
+                {
+                    "INPUT" : ["1"]
+                },
+                {
+                    "INPUT" : ["2"]
+                },
+                {
+                    "INPUT" : ["3"]
+                },
+                {
+                    "INPUT" : ["4"]
+                }
+            ]
+        ],
+    "validation_data" :
+        [
+            [
+                {
+                    "OUTPUT" : ["0"]
+                },
+                {
+                    "OUTPUT" : ["0"]
+                },
+                {
+                    "OUTPUT" : ["0"]
+                },
+                {
+                    "OUTPUT" : ["0"]
+                }
+            ]
+        ]
+}
diff --git a/qa/common/perf_analyzer_input_data_json/shape_tensor_data.json b/qa/common/perf_analyzer_input_data_json/shape_tensor_data.json
new file mode 100644
index 0000000000..10f6a21c41
--- /dev/null
+++ b/qa/common/perf_analyzer_input_data_json/shape_tensor_data.json
@@ -0,0 +1,9 @@
+{
+    "data" :
+        [
+            {
+                "INPUT0" : [2, 2],
+                "DUMMY_INPUT0" : [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
+            }
+        ]
+}
diff --git a/qa/common/perf_analyzer_input_data_json/string_data.json b/qa/common/perf_analyzer_input_data_json/string_data.json
new file mode 100644
index 0000000000..838bc9a3e2
--- /dev/null
+++ b/qa/common/perf_analyzer_input_data_json/string_data.json
@@ -0,0 +1,21 @@
+{
+    "data" :
+        [
+            {
+                "INPUT0" : ["1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"],
+                "INPUT1" : ["1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"]
+            },
+            {
+                "INPUT0" : ["2", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"],
+                "INPUT1" : ["1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"]
+            },
+            {
+                "INPUT0" : ["3", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"],
+                "INPUT1" : ["1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"]
+            },
+            {
+                "INPUT0" : ["4", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"],
+                "INPUT1" : ["1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"]
+            }
+        ]
+}
diff --git a/qa/common/perf_analyzer_input_data_json/string_data_with_shape.json b/qa/common/perf_analyzer_input_data_json/string_data_with_shape.json
new file mode 100644
index 0000000000..16640c7935
--- /dev/null
+++ b/qa/common/perf_analyzer_input_data_json/string_data_with_shape.json
@@ -0,0 +1,49 @@
+{
+    "data" :
+        [
+            {
+                "INPUT0" :
+                    {
+                        "content": ["1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"],
+                        "shape": [2,8]
+                    },
+                "INPUT1" :
+                    {
+                        "content": ["1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"],
+                        "shape": [2,8]
+                    }
+            },
+            {
+                "INPUT0" :
+                    {
+                        "content": ["1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"]
+                    },
+                "INPUT1" :
+                    {
+                        "content": ["1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"]
+                    }
+            },
+            {
+                "INPUT0" :
+                    {
+                        "content": ["1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"]
+                    },
+                "INPUT1" :
+                    {
+                        "content": ["1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"]
+                    }
+            },
+            {
+                "INPUT0" :
+                    {
+                        "content": ["1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"],
+                        "shape": [2,8]
+                    },
+                "INPUT1" :
+                    {
+                        "content": ["1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"],
+                        "shape": [2,8]
+                    }
+            }
+        ]
+}
diff --git a/qa/common/perf_analyzer_input_data_json/wrong_output.json b/qa/common/perf_analyzer_input_data_json/wrong_output.json
new file mode 100644
index 0000000000..a7765fdcb1
--- /dev/null
+++ b/qa/common/perf_analyzer_input_data_json/wrong_output.json
@@ -0,0 +1,40 @@
+{
+    "data" :
+        [
+            {
+                "INPUT0" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+            },
+            {
+                "INPUT0" : [2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+            },
+            {
+                "INPUT0" : [3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+            },
+            {
+                "INPUT0" : [4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+            }
+        ],
+    "validation_data" :
+        [
+          {
+              "OUTPUT0" : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+              "OUTPUT1" : [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
+          },
+          {
+              "OUTPUT0" : [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+              "OUTPUT1" : [3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
+          },
+          {
+              "OUTPUT0" : [2, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+              "OUTPUT1" : [4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
+          },
+          {
+              "OUTPUT0" : [3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+              "OUTPUT1" : [5, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
+          }
+        ]
+}
diff --git a/qa/common/perf_analyzer_input_data_json/wrong_output_2.json b/qa/common/perf_analyzer_input_data_json/wrong_output_2.json
new file mode 100644
index 0000000000..bc4487a3a3
--- /dev/null
+++ b/qa/common/perf_analyzer_input_data_json/wrong_output_2.json
@@ -0,0 +1,40 @@
+{
+    "data" :
+        [
+            {
+                "INPUT0" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+            },
+            {
+                "INPUT0" : [2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+            },
+            {
+                "INPUT0" : [3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+            },
+            {
+                "INPUT0" : [4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                "INPUT1" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+            }
+        ],
+    "validation_data" :
+        [
+          {
+              "OUTPUT0" : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+              "OUTPUT1" : [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
+          },
+          {
+              "OUTPUT0" : [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+              "OUTPUT1" : [3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
+          },
+          {
+              "OUTPUT0" : [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+              "OUTPUT1" : [4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
+          },
+          {
+              "OUTPUT0" : [3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+              "OUTPUT1" : [5, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
+          }
+        ]
+}
diff --git a/qa/common/reporter.py b/qa/common/reporter.py
new file mode 100755
index 0000000000..c4ab0880ee
--- /dev/null
+++ b/qa/common/reporter.py
@@ -0,0 +1,253 @@
+#!/usr/bin/python
+
+# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import csv
+import json
+import os
+import socket
+from itertools import pairwise
+
+import numpy as np
+import requests
+
+FLAGS = None
+
+ENVS = [
+    "CUDA_DRIVER_VERSION",
+    "CUDA_VERSION",
+    "TRITON_SERVER_VERSION",
+    "NVIDIA_TRITON_SERVER_VERSION",
+    "TRT_VERSION",
+    "CUDNN_VERSION",
+    "CUBLAS_VERSION",
+    "BENCHMARK_PIPELINE",
+    "BENCHMARK_REPO_BRANCH",
+    "BENCHMARK_REPO_COMMIT",
+    "BENCHMARK_CLUSTER",
+    "BENCHMARK_GPU_COUNT",
+]
+
+
+def collect_gpu_metrics(data):
+    import pynvml
+
+    pynvml.nvmlInit()
+    unique_gpu_models = set()
+    total_memory = 0
+    total_free_memory = 0
+
+    # Get the number of available GPUs
+    device_count = pynvml.nvmlDeviceGetCount()
+
+    # Iterate through each GPU
+    for i in range(device_count):
+        handle = pynvml.nvmlDeviceGetHandleByIndex(i)
+
+        # Get GPU name
+        gpu_name = str(pynvml.nvmlDeviceGetName(handle))
+        unique_gpu_models.add(gpu_name)
+
+        # Get GPU memory information
+        memory_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
+        total_memory += memory_info.total
+        total_free_memory += memory_info.free
+
+    data["l_gpus_count"] = device_count
+    data["s_gpu_model"] = ", ".join(unique_gpu_models)
+    data["d_total_gpu_memory_mb"] = total_memory / (1024**2)
+    data["d_total_free_gpu_memory_mb"] = total_free_memory / (1024**2)
+
+    pynvml.nvmlShutdown()
+
+
+def collect_token_latencies(export_data, data):
+    first_token_latencies = []
+    token_to_token_latencies = []
+    requests = export_data["experiments"][0]["requests"]
+
+    for r in requests:
+        init_request, responses = r["timestamp"], r["response_timestamps"]
+        first_token_latency = (responses[0] - init_request) / 1_000_000
+        first_token_latencies.append(first_token_latency)
+        for prev_res, res in pairwise(responses):
+            token_to_token_latencies.append((res - prev_res) / 1_000_000)
+
+    data["d_avg_token_to_token_latency_ms"] = np.mean(token_to_token_latencies)  # msec
+    data["d_avg_first_token_latency_ms"] = np.mean(first_token_latencies)  # msec
+
+
+def annotate(data):
+    # Add all interesting envvar values
+    for data in data:
+        for env in ENVS:
+            if env in os.environ:
+                val = os.environ[env]
+                data["s_" + env.lower()] = val
+
+        # Add this system's name. If running within slurm use
+        # SLURM_JOB_NODELIST as the name (this assumes that the slurm
+        # job was scheduled on a single node, otherwise
+        # SLURM_JOB_NODELIST will list multiple nodes).
+        if "SLURM_JOB_NODELIST" in os.environ:
+            data["s_benchmark_system"] = os.environ["SLURM_JOB_NODELIST"]
+        else:
+            data["s_benchmark_system"] = socket.gethostname()
+
+
+def annotate_csv(data, csv_file):
+    csv_reader = csv.reader(csv_file, delimiter=",")
+    linenum = 0
+    header_row = None
+    concurrency_row = None
+    for row in csv_reader:
+        if linenum == 0:
+            header_row = row
+        else:
+            concurrency_row = row
+            break
+        linenum += 1
+
+    if (header_row is not None) and (concurrency_row is not None):
+        avg_latency_us = 0
+        for header, result in zip(header_row, concurrency_row):
+            if header == "Inferences/Second":
+                data["d_infer_per_sec"] = float(result)
+            elif (
+                (header == "Client Send")
+                or (header == "Network+Server Send/Recv")
+                or (header == "Server Queue")
+                or (header == "Server Compute Input")
+                or (header == "Server Compute Output")
+                or (header == "Server Compute Infer")
+                or (header == "Client Recv")
+            ):
+                avg_latency_us += float(result)
+            elif header == "p50 latency":
+                data["d_latency_p50_ms"] = float(result) / 1000.0
+            elif header == "p90 latency":
+                data["d_latency_p90_ms"] = float(result) / 1000.0
+            elif header == "p95 latency":
+                data["d_latency_p95_ms"] = float(result) / 1000.0
+            elif header == "p99 latency":
+                data["d_latency_p99_ms"] = float(result) / 1000.0
+
+        data["d_latency_avg_ms"] = avg_latency_us / 1000.0
+
+
+def post_to_url(url, data):
+    headers = {"Content-Type": "application/json", "Accept-Charset": "UTF-8"}
+    r = requests.post(url, data=data, headers=headers)
+    r.raise_for_status()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        required=False,
+        default=False,
+        help="Enable verbose output",
+    )
+    parser.add_argument(
+        "--gpu-metrics",
+        action="store_true",
+        required=False,
+        default=False,
+        help="Collect GPU details",
+    )
+    parser.add_argument(
+        "-e",
+        "--profile-export-file",
+        type=argparse.FileType("r"),
+        required=False,
+        help="Profile file exported by perf_analyzer",
+    )
+    parser.add_argument(
+        "--token-latency",
+        action="store_true",
+        required=False,
+        default=False,
+        help="Collect token latency data",
+    )
+
+    parser.add_argument(
+        "-o", "--output", type=str, required=False, help="Output filename"
+    )
+    parser.add_argument(
+        "-u", "--url", type=str, required=False, help="Post results to a URL"
+    )
+    parser.add_argument(
+        "--csv",
+        type=argparse.FileType("r"),
+        required=False,
+        help="perf_analyzer generated CSV",
+    )
+    parser.add_argument("file", type=argparse.FileType("r"))
+    FLAGS = parser.parse_args()
+
+    data = json.loads(FLAGS.file.read())
+
+    if FLAGS.verbose:
+        print("*** Load json ***")
+        print(json.dumps(data, sort_keys=True, indent=2))
+
+    if FLAGS.gpu_metrics:
+        collect_gpu_metrics(data[0])
+
+    if FLAGS.token_latency:
+        if not FLAGS.profile_export_file:
+            raise Exception(
+                "Please provide a profile export file to collect token latencies."
+            )
+        export_data = json.loads(FLAGS.profile_export_file.read())
+        collect_token_latencies(export_data, data[0])
+
+    if FLAGS.csv is not None:
+        if len(data) != 1:
+            raise Exception("--csv requires that json data have a single array entry")
+        annotate_csv(data[0], FLAGS.csv)
+        if FLAGS.verbose:
+            print("*** Annotate CSV ***")
+            print(json.dumps(data, sort_keys=True, indent=2))
+
+    annotate(data)
+
+    if FLAGS.verbose:
+        print("*** Post Annotate ***")
+        print(json.dumps(data, sort_keys=True, indent=2))
+
+    if FLAGS.output is not None:
+        with open(FLAGS.output, "w") as f:
+            f.write(json.dumps(data))
+            f.write("\n")
+
+    if FLAGS.url is not None:
+        post_to_url(FLAGS.url, json.dumps(data))
diff --git a/qa/common/run_all_tests.sh b/qa/common/run_all_tests.sh
index ff513031b7..4dcc918fd0 100755
--- a/qa/common/run_all_tests.sh
+++ b/qa/common/run_all_tests.sh
@@ -25,6 +25,19 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
 CURRENT_DIR=$(pwd)
 DIRS=(../L*/)
 
@@ -32,7 +45,7 @@ passed=0
 failed=0
 for dir in "${DIRS[@]}"; do
     echo -e "Running $dir...\n"
-    (cd $dir && ./test.sh)
+    (cd $dir && ./test.sh ${REPO_VERSION})
     rc=$?
     if (( $rc == 0 )); then
         (( passed++ ))
diff --git a/qa/common/sequence_util.py b/qa/common/sequence_util.py
new file mode 100755
index 0000000000..22c618fbb3
--- /dev/null
+++ b/qa/common/sequence_util.py
@@ -0,0 +1,1168 @@
+#!/usr/bin/env python3
+
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import sys
+import threading
+import time
+from builtins import range, str
+from functools import partial
+
+import infer_util as iu
+import numpy as np
+import test_util as tu
+import tritonclient.grpc as grpcclient
+import tritonclient.http as httpclient
+from tritonclient.utils import *
+
+if sys.version_info >= (3, 0):
+    import queue
+else:
+    import Queue as queue
+
+# By default, find tritonserver on "localhost", but can be overridden
+# with TRITONSERVER_IPADDR envvar
+_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")
+
+_test_system_shared_memory = bool(int(os.environ.get("TEST_SYSTEM_SHARED_MEMORY", 0)))
+_test_cuda_shared_memory = bool(int(os.environ.get("TEST_CUDA_SHARED_MEMORY", 0)))
+
+if _test_system_shared_memory:
+    import tritonclient.utils.shared_memory as shm
+if _test_cuda_shared_memory:
+    import tritonclient.utils.cuda_shared_memory as cudashm
+
+_test_valgrind = bool(int(os.environ.get("TEST_VALGRIND", 0)))
+_test_jetson = bool(int(os.environ.get("TEST_JETSON", 0)))
+
+_max_sequence_idle_ms = 5000
+_valgrind_delay_ms = bool(int(os.environ.get("TEST_DELAY_MS", 50)))
+
+_deferred_exceptions_lock = threading.Lock()
+_deferred_exceptions = None
+_jetson_slowdown_factor = 3
+
+
+class UserData:
+    def __init__(self):
+        self._completed_requests = queue.Queue()
+
+
+# Callback function used for async_stream_infer()
+def completion_callback(user_data, result, error):
+    # passing error raise and handling out
+    user_data._completed_requests.put((result, error))
+
+
+class SequenceBatcherTestUtil(tu.TestResultCollector):
+    def setUp(self):
+        # The helper client for setup will be GRPC for simplicity.
+        self.triton_client_ = grpcclient.InferenceServerClient(
+            f"{_tritonserver_ipaddr}:8001"
+        )
+        self.clear_deferred_exceptions()
+
+    def clear_deferred_exceptions(self):
+        global _deferred_exceptions
+        with _deferred_exceptions_lock:
+            _deferred_exceptions = []
+
+    def add_deferred_exception(self, ex):
+        global _deferred_exceptions
+        with _deferred_exceptions_lock:
+            _deferred_exceptions.append(ex)
+
+    def check_deferred_exception(self):
+        # Just raise one of the exceptions...
+        with _deferred_exceptions_lock:
+            if len(_deferred_exceptions) > 0:
+                raise _deferred_exceptions[0]
+
+    def check_failure(self):
+        # Check securely whether a failure has been registered
+        # This is generic because the failure behavior is undefined
+        # for ragged batches.
+        with _deferred_exceptions_lock:
+            if len(_deferred_exceptions) == 0:
+                raise Exception("Unexpected inference success")
+
+    def precreate_register_regions(
+        self, value_list, dtype, i, batch_size=1, tensor_shape=(1,)
+    ):
+        if _test_system_shared_memory or _test_cuda_shared_memory:
+            shm_region_handles = []
+            for j, value in enumerate(value_list):
+                # For string we can't know the size of the output
+                # so we conservatively assume 64 bytes for each
+                # element of the output
+                if dtype == np.object_:
+                    output_byte_size = 4  # size of empty string
+                else:
+                    output_byte_size = 0
+
+                # create data
+                input_list = list()
+                for b in range(batch_size):
+                    if dtype == np.object_:
+                        in0 = np.full(tensor_shape, value, dtype=np.int32)
+                        in0n = np.array(
+                            [str(x).encode("utf-8") for x in in0.reshape(in0.size)],
+                            dtype=object,
+                        )
+                        in0 = in0n.reshape(tensor_shape)
+                        output_byte_size += 64 * in0.size
+                    else:
+                        in0 = np.full(tensor_shape, value, dtype=dtype)
+                        output_byte_size += np.dtype(dtype).itemsize * in0.size
+                    input_list.append(in0)
+
+                if dtype == np.object_:
+                    input_list_tmp = iu.serialize_byte_tensor_list(input_list)
+                    input_byte_size = sum(
+                        [serialized_byte_size(i0) for i0 in input_list_tmp]
+                    )
+                else:
+                    input_list_tmp = input_list
+                    input_byte_size = sum([i0.nbytes for i0 in input_list_tmp])
+
+                # create shared memory regions and copy data for input values
+                ip_name = "ip{}{}".format(i, j)
+                op_name = "op{}{}_data".format(i, j)
+                if _test_system_shared_memory:
+                    shm_ip_handle = shm.create_shared_memory_region(
+                        ip_name, "/" + ip_name, input_byte_size
+                    )
+                    shm_op_handle = shm.create_shared_memory_region(
+                        op_name, "/" + op_name, output_byte_size
+                    )
+                    shm.set_shared_memory_region(shm_ip_handle, input_list_tmp)
+                    self.triton_client_.register_system_shared_memory(
+                        ip_name, "/" + ip_name, input_byte_size
+                    )
+                    self.triton_client_.register_system_shared_memory(
+                        op_name, "/" + op_name, output_byte_size
+                    )
+                elif _test_cuda_shared_memory:
+                    shm_ip_handle = cudashm.create_shared_memory_region(
+                        ip_name, input_byte_size, 0
+                    )
+                    shm_op_handle = cudashm.create_shared_memory_region(
+                        op_name, output_byte_size, 0
+                    )
+                    cudashm.set_shared_memory_region(shm_ip_handle, input_list_tmp)
+                    self.triton_client_.register_cuda_shared_memory(
+                        ip_name,
+                        cudashm.get_raw_handle(shm_ip_handle),
+                        0,
+                        input_byte_size,
+                    )
+                    self.triton_client_.register_cuda_shared_memory(
+                        op_name,
+                        cudashm.get_raw_handle(shm_op_handle),
+                        0,
+                        output_byte_size,
+                    )
+                shm_region_handles.append((ip_name, input_byte_size, shm_ip_handle))
+                shm_region_handles.append((op_name, output_byte_size, shm_op_handle))
+            return shm_region_handles
+        else:
+            return []
+
+    # Returns (name, byte size, shm_handle)
+    def precreate_register_shape_tensor_regions(
+        self, value_list, dtype, i, batch_size=1, tensor_shape=(1,)
+    ):
+        self.assertFalse(
+            _test_cuda_shared_memory,
+            "Shape tensors does not support CUDA shared memory",
+        )
+        if _test_system_shared_memory:
+            shm_region_handles = []
+            for j, (shape_value, value) in enumerate(value_list):
+                input_list = list()
+                shape_input_list = list()
+
+                for b in range(batch_size):
+                    if dtype == np.object_:
+                        in0 = np.full(tensor_shape, value, dtype=np.int32)
+                        in0n = np.array(
+                            [str(x) for x in in0.reshape(in0.size)], dtype=object
+                        )
+                        in0 = in0n.reshape(tensor_shape)
+                    else:
+                        in0 = np.full(tensor_shape, value, dtype=dtype)
+                    input_list.append(in0)
+
+                # Only one shape tensor input per batch
+                shape_input_list.append(
+                    np.full(tensor_shape, shape_value, dtype=np.int32)
+                )
+
+                if dtype == np.object_:
+                    input_list_tmp = iu.serialize_byte_tensor_list(input_list)
+                    input_byte_size = sum(
+                        [serialized_byte_size(i0) for i0 in input_list_tmp]
+                    )
+                else:
+                    input_list_tmp = input_list
+                    input_byte_size = sum([i0.nbytes for i0 in input_list_tmp])
+
+                shape_input_byte_size = sum([i0.nbytes for i0 in shape_input_list])
+                shape_output_byte_size = shape_input_byte_size
+                output_byte_size = np.dtype(dtype).itemsize + 2
+                resized_output_byte_size = 32 * shape_value
+
+                # create shared memory regions and copy data for input values
+                ip_name = "ip{}{}".format(i, j)
+                shape_ip_name = "shape_ip{}{}".format(i, j)
+                shape_op_name = "shape_op{}{}".format(i, j)
+                op_name = "op{}{}".format(i, j)
+                resized_op_name = "resized_op{}{}".format(i, j)
+
+                shm_ip_handle = shm.create_shared_memory_region(
+                    ip_name, "/" + ip_name, input_byte_size
+                )
+                shm_shape_ip_handle = shm.create_shared_memory_region(
+                    shape_ip_name, "/" + shape_ip_name, shape_input_byte_size
+                )
+                shm_shape_op_handle = shm.create_shared_memory_region(
+                    shape_op_name, "/" + shape_op_name, shape_output_byte_size
+                )
+                shm_op_handle = shm.create_shared_memory_region(
+                    op_name, "/" + op_name, output_byte_size
+                )
+                shm_resized_op_handle = shm.create_shared_memory_region(
+                    resized_op_name, "/" + resized_op_name, resized_output_byte_size
+                )
+                shm.set_shared_memory_region(shm_ip_handle, input_list_tmp)
+                shm.set_shared_memory_region(shm_shape_ip_handle, shape_input_list)
+                self.triton_client_.register_system_shared_memory(
+                    ip_name, "/" + ip_name, input_byte_size
+                )
+                self.triton_client_.register_system_shared_memory(
+                    shape_ip_name, "/" + shape_ip_name, shape_input_byte_size
+                )
+                self.triton_client_.register_system_shared_memory(
+                    shape_op_name, "/" + shape_op_name, shape_output_byte_size
+                )
+                self.triton_client_.register_system_shared_memory(
+                    op_name, "/" + op_name, output_byte_size
+                )
+                self.triton_client_.register_system_shared_memory(
+                    resized_op_name, "/" + resized_op_name, resized_output_byte_size
+                )
+
+                shm_region_handles.append((ip_name, input_byte_size, shm_ip_handle))
+                shm_region_handles.append(
+                    (shape_ip_name, shape_input_byte_size, shm_shape_ip_handle)
+                )
+                shm_region_handles.append(
+                    (shape_op_name, shape_output_byte_size, shm_shape_op_handle)
+                )
+                shm_region_handles.append((op_name, output_byte_size, shm_op_handle))
+                shm_region_handles.append(
+                    (resized_op_name, resized_output_byte_size, shm_resized_op_handle)
+                )
+            return shm_region_handles
+        else:
+            return []
+
+    # Returns (name, byte size, shm_handle)
+    def precreate_register_dynaseq_shape_tensor_regions(
+        self, value_list, dtype, i, batch_size=1, tensor_shape=(1,)
+    ):
+        self.assertFalse(
+            _test_cuda_shared_memory,
+            "Shape tensors does not support CUDA shared memory",
+        )
+        if _test_system_shared_memory:
+            shm_region_handles = []
+            for j, (shape_value, value) in enumerate(value_list):
+                input_list = list()
+                shape_input_list = list()
+                dummy_input_list = list()
+
+                for b in range(batch_size):
+                    if dtype == np.object_:
+                        dummy_in0 = np.full(tensor_shape, value, dtype=np.int32)
+                        dummy_in0n = np.array(
+                            [str(x) for x in dummy_in0.reshape(in0.size)], dtype=object
+                        )
+                        dummy_in0 = dummy_in0n.reshape(tensor_shape)
+                    else:
+                        dummy_in0 = np.full(tensor_shape, value, dtype=dtype)
+                    dummy_input_list.append(dummy_in0)
+                    in0 = np.full(tensor_shape, value, dtype=np.int32)
+                    input_list.append(in0)
+
+                # Only one shape tensor input per batch
+                shape_input_list.append(
+                    np.full(tensor_shape, shape_value, dtype=np.int32)
+                )
+
+                if dtype == np.object_:
+                    input_list_tmp = iu.serialize_byte_tensor_list(input_list)
+                    input_byte_size = sum(
+                        [serialized_byte_size(i0) for i0 in input_list_tmp]
+                    )
+                else:
+                    input_list_tmp = input_list
+                    input_byte_size = sum([i0.nbytes for i0 in input_list_tmp])
+
+                dummy_input_byte_size = sum([i0.nbytes for i0 in dummy_input_list])
+
+                shape_input_byte_size = sum([i0.nbytes for i0 in shape_input_list])
+                shape_output_byte_size = shape_input_byte_size
+                output_byte_size = np.dtype(np.int32).itemsize + 2
+                resized_output_byte_size = 32 * shape_value
+
+                # create shared memory regions and copy data for input values
+                ip_name = "ip{}{}".format(i, j)
+                shape_ip_name = "shape_ip{}{}".format(i, j)
+                dummy_ip_name = "dummy_ip{}{}".format(i, j)
+                shape_op_name = "shape_op{}{}".format(i, j)
+                op_name = "op{}{}".format(i, j)
+                resized_op_name = "resized_op{}{}".format(i, j)
+
+                shm_ip_handle = shm.create_shared_memory_region(
+                    ip_name, "/" + ip_name, input_byte_size
+                )
+                shm_shape_ip_handle = shm.create_shared_memory_region(
+                    shape_ip_name, "/" + shape_ip_name, shape_input_byte_size
+                )
+                shm_dummy_ip_handle = shm.create_shared_memory_region(
+                    dummy_ip_name, "/" + dummy_ip_name, dummy_input_byte_size
+                )
+                shm_shape_op_handle = shm.create_shared_memory_region(
+                    shape_op_name, "/" + shape_op_name, shape_output_byte_size
+                )
+                shm_op_handle = shm.create_shared_memory_region(
+                    op_name, "/" + op_name, output_byte_size
+                )
+                shm_resized_op_handle = shm.create_shared_memory_region(
+                    resized_op_name, "/" + resized_op_name, resized_output_byte_size
+                )
+                shm.set_shared_memory_region(shm_ip_handle, input_list_tmp)
+                shm.set_shared_memory_region(shm_shape_ip_handle, shape_input_list)
+                shm.set_shared_memory_region(shm_dummy_ip_handle, dummy_input_list)
+                self.triton_client_.register_system_shared_memory(
+                    ip_name, "/" + ip_name, input_byte_size
+                )
+                self.triton_client_.register_system_shared_memory(
+                    shape_ip_name, "/" + shape_ip_name, shape_input_byte_size
+                )
+                self.triton_client_.register_system_shared_memory(
+                    dummy_ip_name, "/" + dummy_ip_name, dummy_input_byte_size
+                )
+                self.triton_client_.register_system_shared_memory(
+                    shape_op_name, "/" + shape_op_name, shape_output_byte_size
+                )
+                self.triton_client_.register_system_shared_memory(
+                    op_name, "/" + op_name, output_byte_size
+                )
+                self.triton_client_.register_system_shared_memory(
+                    resized_op_name, "/" + resized_op_name, resized_output_byte_size
+                )
+
+                shm_region_handles.append((ip_name, input_byte_size, shm_ip_handle))
+                shm_region_handles.append(
+                    (shape_ip_name, shape_input_byte_size, shm_shape_ip_handle)
+                )
+                shm_region_handles.append(
+                    (dummy_ip_name, dummy_input_byte_size, shm_dummy_ip_handle)
+                )
+                shm_region_handles.append(
+                    (shape_op_name, shape_output_byte_size, shm_shape_op_handle)
+                )
+                shm_region_handles.append((op_name, output_byte_size, shm_op_handle))
+                shm_region_handles.append(
+                    (resized_op_name, resized_output_byte_size, shm_resized_op_handle)
+                )
+            return shm_region_handles
+        else:
+            return []
+
+    def cleanup_shm_regions(self, shm_handles):
+        # Make sure unregister is before shared memory destruction
+        if _test_system_shared_memory:
+            self.triton_client_.unregister_system_shared_memory()
+        if _test_cuda_shared_memory:
+            self.triton_client_.unregister_cuda_shared_memory()
+        for shm_tmp_handle in shm_handles:
+            if _test_system_shared_memory:
+                shm.destroy_shared_memory_region(shm_tmp_handle[2])
+            elif _test_cuda_shared_memory:
+                cudashm.destroy_shared_memory_region(shm_tmp_handle[2])
+
+    def check_sequence(
+        self,
+        trial,
+        model_name,
+        input_dtype,
+        correlation_id,
+        sequence_thresholds,
+        values,
+        expected_result,
+        protocol,
+        batch_size=1,
+        sequence_name="<unknown>",
+        tensor_shape=(1,),
+    ):
+        """Perform sequence of inferences. The 'values' holds a list of
+        tuples, one for each inference with format:
+
+        (flag_str, value, (ls_ms, gt_ms), (pre_delay_ms, post_delay_ms)
+
+        """
+        if (
+            ("savedmodel" not in trial)
+            and ("graphdef" not in trial)
+            and ("custom" not in trial)
+            and ("onnx" not in trial)
+            and ("libtorch" not in trial)
+            and ("plan" not in trial)
+            and ("python" not in trial)
+        ):
+            self.assertFalse(True, "unknown trial type: " + trial)
+
+        # Can only send the request exactly once since it is a
+        # sequence model with state, so can have only a single config.
+        configs = []
+        if protocol == "http":
+            configs.append((f"{_tritonserver_ipaddr}:8000", "http", False))
+        if protocol == "grpc":
+            configs.append((f"{_tritonserver_ipaddr}:8001", "grpc", False))
+        if protocol == "streaming":
+            configs.append((f"{_tritonserver_ipaddr}:8001", "grpc", True))
+
+        self.assertFalse(
+            _test_system_shared_memory and _test_cuda_shared_memory,
+            "Cannot set both System and CUDA shared memory flags to 1",
+        )
+
+        self.assertEqual(len(configs), 1)
+
+        full_shape = (
+            tensor_shape if "nobatch" in trial else (batch_size,) + tensor_shape
+        )
+
+        # create and register shared memory output region in advance,
+        # knowing that this function will not be called concurrently.
+        if _test_system_shared_memory or _test_cuda_shared_memory:
+            self.triton_client_.unregister_system_shared_memory()
+            self.triton_client_.unregister_cuda_shared_memory()
+            output_byte_size = 512
+            if _test_system_shared_memory:
+                shm_op_handle = shm.create_shared_memory_region(
+                    "output_data", "/output", output_byte_size
+                )
+                self.triton_client_.register_system_shared_memory(
+                    "output_data", "/output", output_byte_size
+                )
+            elif _test_cuda_shared_memory:
+                shm_op_handle = cudashm.create_shared_memory_region(
+                    "output_data", output_byte_size, 0
+                )
+                self.triton_client_.register_cuda_shared_memory(
+                    "output_data",
+                    cudashm.get_raw_handle(shm_op_handle),
+                    0,
+                    output_byte_size,
+                )
+            shm_ip_handles = []
+
+        for config in configs:
+            client_utils = grpcclient if config[1] == "grpc" else httpclient
+
+            triton_client = client_utils.InferenceServerClient(config[0], verbose=True)
+            if config[2]:
+                user_data = UserData()
+                triton_client.start_stream(partial(completion_callback, user_data))
+            # Execute the sequence of inference...
+            try:
+                seq_start_ms = int(round(time.time() * 1000))
+
+                INPUT = "INPUT__0" if trial.startswith("libtorch") else "INPUT"
+                OUTPUT = "OUTPUT__0" if trial.startswith("libtorch") else "OUTPUT"
+                for flag_str, value, thresholds, delay_ms in values:
+                    if _test_valgrind or _test_jetson:
+                        if delay_ms is not None:
+                            delay_ms[0] = max(_valgrind_delay_ms, delay_ms[0])
+                            delay_ms[1] = max(_valgrind_delay_ms, delay_ms[1])
+                        else:
+                            delay_ms = (_valgrind_delay_ms, _valgrind_delay_ms)
+
+                    if delay_ms is not None:
+                        time.sleep(delay_ms[0] / 1000.0)
+
+                    seq_start = False
+                    seq_end = False
+                    if flag_str is not None:
+                        seq_start = "start" in flag_str
+                        seq_end = "end" in flag_str
+
+                    # Construct request IOs
+                    inputs = []
+                    outputs = []
+                    inputs.append(
+                        client_utils.InferInput(
+                            INPUT, full_shape, np_to_triton_dtype(input_dtype)
+                        )
+                    )
+                    outputs.append(client_utils.InferRequestedOutput(OUTPUT))
+                    if input_dtype == np.object_:
+                        in0 = np.full(full_shape, value, dtype=np.int32)
+                        in0n = np.array(
+                            [str(x) for x in in0.reshape(in0.size)], dtype=object
+                        )
+                        in0 = in0n.reshape(full_shape)
+                    else:
+                        in0 = np.full(full_shape, value, dtype=input_dtype)
+
+                    # create input shared memory and copy input data values into it
+                    if _test_system_shared_memory or _test_cuda_shared_memory:
+                        if input_dtype == np.object_:
+                            input_list_tmp = iu.serialize_byte_tensor_list([in0])
+                            input_byte_size = sum(
+                                [serialized_byte_size(i0) for i0 in input_list_tmp]
+                            )
+                        else:
+                            input_list_tmp = [in0]
+                            input_byte_size = sum([i0.nbytes for i0 in input_list_tmp])
+                        ip_name = "ip{}".format(len(shm_ip_handles))
+                        if _test_system_shared_memory:
+                            shm_ip_handles.append(
+                                shm.create_shared_memory_region(
+                                    ip_name, "/" + ip_name, input_byte_size
+                                )
+                            )
+                            shm.set_shared_memory_region(
+                                shm_ip_handles[-1], input_list_tmp
+                            )
+                            triton_client.register_system_shared_memory(
+                                ip_name, "/" + ip_name, input_byte_size
+                            )
+                        elif _test_cuda_shared_memory:
+                            shm_ip_handles.append(
+                                cudashm.create_shared_memory_region(
+                                    ip_name, input_byte_size, 0
+                                )
+                            )
+                            cudashm.set_shared_memory_region(
+                                shm_ip_handles[-1], input_list_tmp
+                            )
+                            triton_client.register_cuda_shared_memory(
+                                ip_name,
+                                cudashm.get_raw_handle(shm_ip_handles[-1]),
+                                0,
+                                input_byte_size,
+                            )
+
+                        inputs[0].set_shared_memory(ip_name, input_byte_size)
+                        outputs[0].set_shared_memory("output_data", output_byte_size)
+                    else:
+                        inputs[0].set_data_from_numpy(in0)
+
+                    start_ms = int(round(time.time() * 1000))
+
+                    if config[2]:
+                        triton_client.async_stream_infer(
+                            model_name,
+                            inputs,
+                            outputs=outputs,
+                            sequence_id=correlation_id,
+                            sequence_start=seq_start,
+                            sequence_end=seq_end,
+                        )
+                        (results, error) = user_data._completed_requests.get()
+                        if error is not None:
+                            raise error
+                    else:
+                        results = triton_client.infer(
+                            model_name,
+                            inputs,
+                            outputs=outputs,
+                            sequence_id=correlation_id,
+                            sequence_start=seq_start,
+                            sequence_end=seq_end,
+                        )
+
+                    end_ms = int(round(time.time() * 1000))
+
+                    # Get value of "OUTPUT", for shared memory, need to get it via
+                    # shared memory utils
+                    if (not _test_system_shared_memory) and (
+                        not _test_cuda_shared_memory
+                    ):
+                        out = results.as_numpy(OUTPUT)
+                    else:
+                        output = results.get_output(OUTPUT)
+                        if config[1] == "http":
+                            output_shape = output["shape"]
+                        else:
+                            output_shape = output.shape
+                        output_type = input_dtype
+                        if _test_system_shared_memory:
+                            out = shm.get_contents_as_numpy(
+                                shm_op_handle, output_type, output_shape
+                            )
+                        else:
+                            out = cudashm.get_contents_as_numpy(
+                                shm_op_handle, output_type, output_shape
+                            )
+                    result = out[0] if "nobatch" in trial else out[0][0]
+                    print("{}: {}".format(sequence_name, result))
+
+                    if thresholds is not None:
+                        lt_ms = thresholds[0]
+                        gt_ms = thresholds[1]
+                        if lt_ms is not None:
+                            self.assertTrue(
+                                (end_ms - start_ms) < lt_ms,
+                                "expected less than "
+                                + str(lt_ms)
+                                + "ms response time, got "
+                                + str(end_ms - start_ms)
+                                + " ms",
+                            )
+                        if gt_ms is not None:
+                            self.assertTrue(
+                                (end_ms - start_ms) > gt_ms,
+                                "expected greater than "
+                                + str(gt_ms)
+                                + "ms response time, got "
+                                + str(end_ms - start_ms)
+                                + " ms",
+                            )
+                    if delay_ms is not None:
+                        time.sleep(delay_ms[1] / 1000.0)
+
+                seq_end_ms = int(round(time.time() * 1000))
+
+                if input_dtype == np.object_:
+                    self.assertEqual(int(result), expected_result)
+                else:
+                    self.assertEqual(result, expected_result)
+
+                if sequence_thresholds is not None:
+                    lt_ms = sequence_thresholds[0]
+                    gt_ms = sequence_thresholds[1]
+                    if lt_ms is not None:
+                        if _test_jetson:
+                            lt_ms *= _jetson_slowdown_factor
+                        self.assertTrue(
+                            (seq_end_ms - seq_start_ms) < lt_ms,
+                            "sequence expected less than "
+                            + str(lt_ms)
+                            + "ms response time, got "
+                            + str(seq_end_ms - seq_start_ms)
+                            + " ms",
+                        )
+                    if gt_ms is not None:
+                        self.assertTrue(
+                            (seq_end_ms - seq_start_ms) > gt_ms,
+                            "sequence expected greater than "
+                            + str(gt_ms)
+                            + "ms response time, got "
+                            + str(seq_end_ms - seq_start_ms)
+                            + " ms",
+                        )
+            except Exception as ex:
+                self.add_deferred_exception(ex)
+            if config[2]:
+                triton_client.stop_stream()
+
+        if _test_system_shared_memory or _test_cuda_shared_memory:
+            self.triton_client_.unregister_system_shared_memory()
+            self.triton_client_.unregister_cuda_shared_memory()
+            destroy_func = (
+                shm.destroy_shared_memory_region
+                if _test_system_shared_memory
+                else cudashm.destroy_shared_memory_region
+            )
+            destroy_func(shm_op_handle)
+            for shm_ip_handle in shm_ip_handles:
+                destroy_func(shm_ip_handle)
+
+    def check_sequence_async(
+        self,
+        trial,
+        model_name,
+        input_dtype,
+        correlation_id,
+        sequence_thresholds,
+        values,
+        expected_result,
+        shm_region_handles,
+        batch_size=1,
+        sequence_name="<unknown>",
+        tensor_shape=(1,),
+    ):
+        """Perform sequence of inferences using stream async run.
+        The 'values' holds a list of tuples, one for each inference with format:
+
+        (flag_str, value, pre_delay_ms)
+
+        """
+        if (
+            ("savedmodel" not in trial)
+            and ("graphdef" not in trial)
+            and ("custom" not in trial)
+            and ("onnx" not in trial)
+            and ("libtorch" not in trial)
+            and ("plan" not in trial)
+            and ("python" not in trial)
+        ):
+            self.assertFalse(True, "unknown trial type: " + trial)
+
+        self.assertFalse(
+            _test_system_shared_memory and _test_cuda_shared_memory,
+            "Cannot set both System and CUDA shared memory flags to 1",
+        )
+
+        full_shape = (
+            tensor_shape if "nobatch" in trial else (batch_size,) + tensor_shape
+        )
+
+        client_utils = grpcclient
+        triton_client = client_utils.InferenceServerClient(
+            f"{_tritonserver_ipaddr}:8001", verbose=True
+        )
+        user_data = UserData()
+        triton_client.start_stream(partial(completion_callback, user_data))
+        # Execute the sequence of inference...
+        try:
+            seq_start_ms = int(round(time.time() * 1000))
+
+            INPUT = "INPUT__0" if trial.startswith("libtorch") else "INPUT"
+            OUTPUT = "OUTPUT__0" if trial.startswith("libtorch") else "OUTPUT"
+            sent_count = 0
+            for flag_str, value, pre_delay_ms in values:
+                seq_start = False
+                seq_end = False
+                if flag_str is not None:
+                    seq_start = "start" in flag_str
+                    seq_end = "end" in flag_str
+
+                # Construct request IOs
+                inputs = []
+                outputs = []
+                inputs.append(
+                    client_utils.InferInput(
+                        INPUT, full_shape, np_to_triton_dtype(input_dtype)
+                    )
+                )
+                outputs.append(client_utils.InferRequestedOutput(OUTPUT))
+
+                if not (_test_system_shared_memory or _test_cuda_shared_memory):
+                    if input_dtype == np.object_:
+                        in0 = np.full(full_shape, value, dtype=np.int32)
+                        in0n = np.array(
+                            [str(x) for x in in0.reshape(in0.size)], dtype=object
+                        )
+                        in0 = in0n.reshape(full_shape)
+                    else:
+                        in0 = np.full(full_shape, value, dtype=input_dtype)
+                    inputs[0].set_data_from_numpy(in0)
+                else:
+                    offset = 2 * sent_count
+                    inputs[0].set_shared_memory(
+                        shm_region_handles[offset][0], shm_region_handles[offset][1]
+                    )
+                    outputs[0].set_shared_memory(
+                        shm_region_handles[offset + 1][0],
+                        shm_region_handles[offset + 1][1],
+                    )
+
+                if pre_delay_ms is not None:
+                    time.sleep(pre_delay_ms / 1000.0)
+
+                triton_client.async_stream_infer(
+                    model_name,
+                    inputs,
+                    outputs=outputs,
+                    sequence_id=correlation_id,
+                    sequence_start=seq_start,
+                    sequence_end=seq_end,
+                )
+                sent_count += 1
+
+            # Wait for the results in the order sent
+            result = None
+            processed_count = 0
+            while processed_count < sent_count:
+                (results, error) = user_data._completed_requests.get()
+                if error is not None:
+                    raise error
+                # Get value of "OUTPUT", for shared memory, need to get it via
+                # shared memory utils
+                if (not _test_system_shared_memory) and (not _test_cuda_shared_memory):
+                    out = results.as_numpy(OUTPUT)
+                else:
+                    output = results.get_output(OUTPUT)
+                    offset = 2 * processed_count + 1
+                    output_shape = output.shape
+                    output_type = input_dtype
+                    if _test_system_shared_memory:
+                        out = shm.get_contents_as_numpy(
+                            shm_region_handles[offset][2], output_type, output_shape
+                        )
+                    else:
+                        out = cudashm.get_contents_as_numpy(
+                            shm_region_handles[offset][2], output_type, output_shape
+                        )
+                result = out[0] if "nobatch" in trial else out[0][0]
+                print("{}: {}".format(sequence_name, result))
+                processed_count += 1
+
+            seq_end_ms = int(round(time.time() * 1000))
+
+            if input_dtype == np.object_:
+                self.assertEqual(int(result), expected_result)
+            else:
+                self.assertEqual(result, expected_result)
+
+            if sequence_thresholds is not None:
+                lt_ms = sequence_thresholds[0]
+                gt_ms = sequence_thresholds[1]
+                if lt_ms is not None:
+                    if _test_jetson:
+                        lt_ms *= _jetson_slowdown_factor
+                    self.assertTrue(
+                        (seq_end_ms - seq_start_ms) < lt_ms,
+                        "sequence expected less than "
+                        + str(lt_ms)
+                        + "ms response time, got "
+                        + str(seq_end_ms - seq_start_ms)
+                        + " ms",
+                    )
+                if gt_ms is not None:
+                    self.assertTrue(
+                        (seq_end_ms - seq_start_ms) > gt_ms,
+                        "sequence expected greater than "
+                        + str(gt_ms)
+                        + "ms response time, got "
+                        + str(seq_end_ms - seq_start_ms)
+                        + " ms",
+                    )
+        except Exception as ex:
+            self.add_deferred_exception(ex)
+        triton_client.stop_stream()
+
+    # This sequence util only sends inference via streaming scenario
+    def check_sequence_shape_tensor_io(
+        self,
+        model_name,
+        input_dtype,
+        correlation_id,
+        sequence_thresholds,
+        values,
+        expected_result,
+        shm_region_handles,
+        using_dynamic_batcher=False,
+        sequence_name="<unknown>",
+    ):
+        """Perform sequence of inferences using async run. The 'values' holds
+        a list of tuples, one for each inference with format:
+
+        (flag_str, shape_value, value, pre_delay_ms)
+
+        """
+        tensor_shape = (1, 1)
+        # shape tensor is 1-D tensor that doesn't contain batch size as first value
+        shape_tensor_shape = (1,)
+        self.assertFalse(
+            _test_cuda_shared_memory,
+            "Shape tensors does not support CUDA shared memory",
+        )
+
+        client_utils = grpcclient
+        triton_client = client_utils.InferenceServerClient(
+            f"{_tritonserver_ipaddr}:8001", verbose=True
+        )
+        user_data = UserData()
+        triton_client.start_stream(partial(completion_callback, user_data))
+        # Execute the sequence of inference...
+        try:
+            seq_start_ms = int(round(time.time() * 1000))
+
+            sent_count = 0
+            shape_values = list()
+            for flag_str, shape_value, value, pre_delay_ms in values:
+                seq_start = False
+                seq_end = False
+                if flag_str is not None:
+                    seq_start = "start" in flag_str
+                    seq_end = "end" in flag_str
+
+                # Construct request IOs
+                inputs = []
+                outputs = []
+                # input order: input, shape(, dummy)
+                inputs.append(
+                    client_utils.InferInput(
+                        "INPUT",
+                        tensor_shape,
+                        np_to_triton_dtype(
+                            np.int32 if using_dynamic_batcher else input_dtype
+                        ),
+                    )
+                )
+                inputs.append(
+                    client_utils.InferInput(
+                        "SHAPE_INPUT", shape_tensor_shape, np_to_triton_dtype(np.int32)
+                    )
+                )
+                if using_dynamic_batcher:
+                    inputs.append(
+                        client_utils.InferInput(
+                            "DUMMY_INPUT", tensor_shape, np_to_triton_dtype(input_dtype)
+                        )
+                    )
+                # output order: shape, output, resized
+                outputs.append(client_utils.InferRequestedOutput("SHAPE_OUTPUT"))
+                outputs.append(client_utils.InferRequestedOutput("OUTPUT"))
+                outputs.append(client_utils.InferRequestedOutput("RESIZED_OUTPUT"))
+
+                # Set IO values
+                shape_values.append(
+                    np.full(shape_tensor_shape, shape_value, dtype=np.int32)
+                )
+                if not _test_system_shared_memory:
+                    if using_dynamic_batcher:
+                        if input_dtype == np.object_:
+                            dummy_in0 = np.full(tensor_shape, value, dtype=np.int32)
+                            dummy_in0n = np.array(
+                                [str(x) for x in in0.reshape(dummy_in0.size)],
+                                dtype=object,
+                            )
+                            dummy_in0 = dummy_in0n.reshape(tensor_shape)
+                        else:
+                            dummy_in0 = np.full(tensor_shape, value, dtype=input_dtype)
+                        in0 = np.full(tensor_shape, value, dtype=np.int32)
+                    else:
+                        if input_dtype == np.object_:
+                            in0 = np.full(tensor_shape, value, dtype=np.int32)
+                            in0n = np.array(
+                                [str(x) for x in in0.reshape(in0.size)], dtype=object
+                            )
+                            in0 = in0n.reshape(tensor_shape)
+                        else:
+                            in0 = np.full(tensor_shape, value, dtype=input_dtype)
+
+                    inputs[0].set_data_from_numpy(in0)
+                    inputs[1].set_data_from_numpy(shape_values[-1])
+                    if using_dynamic_batcher:
+                        inputs[2].set_data_from_numpy(dummy_in0)
+                else:
+                    if using_dynamic_batcher:
+                        input_offset = 6 * sent_count
+                        output_offset = 6 * sent_count + 3
+                    else:
+                        input_offset = 5 * sent_count
+                        output_offset = 5 * sent_count + 2
+                    for i in range(len(inputs)):
+                        inputs[i].set_shared_memory(
+                            shm_region_handles[input_offset + i][0],
+                            shm_region_handles[input_offset + i][1],
+                        )
+                    for i in range(len(outputs)):
+                        outputs[i].set_shared_memory(
+                            shm_region_handles[output_offset + i][0],
+                            shm_region_handles[output_offset + i][1],
+                        )
+
+                if pre_delay_ms is not None:
+                    time.sleep(pre_delay_ms / 1000.0)
+
+                triton_client.async_stream_infer(
+                    model_name,
+                    inputs,
+                    outputs=outputs,
+                    sequence_id=correlation_id,
+                    sequence_start=seq_start,
+                    sequence_end=seq_end,
+                )
+
+                sent_count += 1
+
+            # Wait for the results in the order sent
+            result = None
+            processed_count = 0
+            while processed_count < sent_count:
+                (results, error) = user_data._completed_requests.get()
+                if error is not None:
+                    raise error
+                # Get value of "OUTPUT", for shared memory, need to get it via
+                # shared memory utils
+                if not _test_system_shared_memory:
+                    out = results.as_numpy("OUTPUT")
+                else:
+                    output = results.get_output("OUTPUT")
+                    output_offset = (
+                        6 * processed_count + 4
+                        if using_dynamic_batcher
+                        else 5 * processed_count + 3
+                    )
+                    output_shape = output.shape
+                    output_type = np.int32 if using_dynamic_batcher else np.float32
+                    out = shm.get_contents_as_numpy(
+                        shm_region_handles[output_offset][2], output_type, output_shape
+                    )
+                result = out[0][0]
+
+                # Validate the (debatched) shape of the resized output matches
+                # with the shape input values
+                resized_shape = results.get_output("RESIZED_OUTPUT").shape[1:]
+                self.assertTrue(
+                    np.array_equal(resized_shape, shape_values[processed_count]),
+                    "{}, {}, slot {}, expected: {}, got {}".format(
+                        model_name,
+                        "RESIZED_OUTPUT",
+                        processed_count,
+                        shape_values[processed_count],
+                        resized_shape,
+                    ),
+                )
+                print("{}: {}".format(sequence_name, result))
+                processed_count += 1
+
+            seq_end_ms = int(round(time.time() * 1000))
+
+            if input_dtype == np.object_:
+                self.assertEqual(int(result), expected_result)
+            else:
+                self.assertEqual(result, expected_result)
+
+            if sequence_thresholds is not None:
+                lt_ms = sequence_thresholds[0]
+                gt_ms = sequence_thresholds[1]
+                if lt_ms is not None:
+                    if _test_jetson:
+                        lt_ms *= _jetson_slowdown_factor
+                    self.assertTrue(
+                        (seq_end_ms - seq_start_ms) < lt_ms,
+                        "sequence expected less than "
+                        + str(lt_ms)
+                        + "ms response time, got "
+                        + str(seq_end_ms - seq_start_ms)
+                        + " ms",
+                    )
+                if gt_ms is not None:
+                    self.assertTrue(
+                        (seq_end_ms - seq_start_ms) > gt_ms,
+                        "sequence expected greater than "
+                        + str(gt_ms)
+                        + "ms response time, got "
+                        + str(seq_end_ms - seq_start_ms)
+                        + " ms",
+                    )
+        except Exception as ex:
+            self.add_deferred_exception(ex)
+        triton_client.stop_stream()
+
+    def check_setup(self, model_name):
+        # Make sure test.sh set up the correct batcher settings
+        config = self.triton_client_.get_model_config(model_name).config
+        # Skip the sequence batching check on ensemble model
+        if config.platform != "ensemble":
+            bconfig = config.sequence_batching
+            self.assertEqual(
+                bconfig.max_sequence_idle_microseconds, _max_sequence_idle_ms * 1000
+            )  # 5 secs
+
+    def check_status(self, model_name, batch_exec, exec_cnt, infer_cnt):
+        # There is a time window between when responses are returned and statistics are updated.
+        # To prevent intermittent test failure during that window, wait up to 10 seconds for the
+        # inference statistics to be ready.
+        num_tries = 10
+        for i in range(num_tries):
+            stats = self.triton_client_.get_inference_statistics(model_name, "1")
+            self.assertEqual(len(stats.model_stats), 1, "expect 1 model stats")
+            actual_exec_cnt = stats.model_stats[0].execution_count
+            if actual_exec_cnt == exec_cnt:
+                break
+            print(
+                "WARNING: expect {} executions, got {} (attempt {})".format(
+                    exec_cnt, actual_exec_cnt, i
+                )
+            )
+            time.sleep(1)
+
+        self.assertEqual(
+            stats.model_stats[0].name,
+            model_name,
+            "expect model stats for model {}".format(model_name),
+        )
+        self.assertEqual(
+            stats.model_stats[0].version,
+            "1",
+            "expect model stats for model {} version 1".format(model_name),
+        )
+
+        if batch_exec is not None:
+            batch_stats = stats.model_stats[0].batch_stats
+            print(batch_stats)
+            self.assertEqual(
+                len(batch_stats),
+                len(batch_exec),
+                "expected {} different batch-sizes, got {}".format(
+                    len(batch_exec), len(batch_stats)
+                ),
+            )
+
+            for batch_stat in batch_stats:
+                bs = batch_stat.batch_size
+                bc = batch_stat.compute_infer.count
+                self.assertTrue(
+                    bs in batch_exec, "did not find expected batch-size {}".format(bs)
+                )
+                # Get count from one of the stats
+                self.assertEqual(
+                    bc,
+                    batch_exec[bs],
+                    "expected model-execution-count {} for batch size {}, got {}".format(
+                        batch_exec[bs], bs, bc
+                    ),
+                )
+
+        actual_exec_cnt = stats.model_stats[0].execution_count
+        self.assertEqual(
+            actual_exec_cnt,
+            exec_cnt,
+            "expected model-exec-count {}, got {}".format(exec_cnt, actual_exec_cnt),
+        )
+
+        actual_infer_cnt = stats.model_stats[0].inference_count
+        self.assertEqual(
+            actual_infer_cnt,
+            infer_cnt,
+            "expected model-inference-count {}, got {}".format(
+                infer_cnt, actual_infer_cnt
+            ),
+        )
diff --git a/qa/common/shm_util.py b/qa/common/shm_util.py
new file mode 100755
index 0000000000..16e5ce4e45
--- /dev/null
+++ b/qa/common/shm_util.py
@@ -0,0 +1,474 @@
+#!/usr/bin/env python3
+
+# Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import time
+from ctypes import *
+from os import listdir
+
+import numpy as np
+import tritonclient.http as httpclient
+from tritonclient.utils import *
+
+# By default, find tritonserver on "localhost", but can be overridden
+# with TRITONSERVER_IPADDR envvar
+_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")
+_test_jetson = bool(int(os.environ.get("TEST_JETSON", 0)))
+_test_windows = bool(int(os.environ.get("TEST_WINDOWS", 0)))
+_skip_shm_leak_probe = _test_jetson or _test_windows
+
+
+def _range_repr_dtype(dtype):
+    if dtype == np.float64:
+        return np.int32
+    elif dtype == np.float32:
+        return np.int16
+    elif dtype == np.float16:
+        return np.int8
+    elif dtype == np.object_:  # TYPE_STRING
+        return np.int32
+    return dtype
+
+
+def create_set_shm_regions(
+    input0_list,
+    input1_list,
+    output0_byte_size,
+    output1_byte_size,
+    outputs,
+    shm_region_names,
+    precreated_shm_regions,
+    use_system_shared_memory,
+    use_cuda_shared_memory,
+):
+    # Lazy shm imports...
+    if use_system_shared_memory:
+        import tritonclient.utils.shared_memory as shm
+    if use_cuda_shared_memory:
+        import tritonclient.utils.cuda_shared_memory as cudashm
+
+    if use_system_shared_memory and use_cuda_shared_memory:
+        raise ValueError("Cannot set both System and CUDA shared memory flags to 1")
+
+    if not (use_system_shared_memory or use_cuda_shared_memory):
+        return [], []
+
+    if input0_list[0].dtype == np.object_:
+        input0_byte_size = sum([serialized_byte_size(i0) for i0 in input0_list])
+    else:
+        input0_byte_size = sum([i0.nbytes for i0 in input0_list])
+
+    if input1_list[0].dtype == np.object_:
+        input1_byte_size = sum([serialized_byte_size(i1) for i1 in input1_list])
+    else:
+        input1_byte_size = sum([i1.nbytes for i1 in input1_list])
+
+    if shm_region_names is None:
+        shm_region_names = ["input0", "input1", "output0", "output1"]
+
+    shm_op0_handle = None
+    shm_op1_handle = None
+
+    if use_system_shared_memory:
+        shm_ip0_handle = shm.create_shared_memory_region(
+            shm_region_names[0] + "_data", "/" + shm_region_names[0], input0_byte_size
+        )
+        shm_ip1_handle = shm.create_shared_memory_region(
+            shm_region_names[1] + "_data", "/" + shm_region_names[1], input1_byte_size
+        )
+
+        i = 0
+        if "OUTPUT0" in outputs:
+            if precreated_shm_regions is None:
+                shm_op0_handle = shm.create_shared_memory_region(
+                    shm_region_names[2] + "_data",
+                    "/" + shm_region_names[2],
+                    output0_byte_size,
+                )
+            else:
+                shm_op0_handle = precreated_shm_regions[0]
+            i += 1
+        if "OUTPUT1" in outputs:
+            if precreated_shm_regions is None:
+                shm_op1_handle = shm.create_shared_memory_region(
+                    shm_region_names[2 + i] + "_data",
+                    "/" + shm_region_names[2 + i],
+                    output1_byte_size,
+                )
+            else:
+                shm_op1_handle = precreated_shm_regions[i]
+
+        shm.set_shared_memory_region(shm_ip0_handle, input0_list)
+        shm.set_shared_memory_region(shm_ip1_handle, input1_list)
+
+    if use_cuda_shared_memory:
+        shm_ip0_handle = cudashm.create_shared_memory_region(
+            shm_region_names[0] + "_data", input0_byte_size, 0
+        )
+        shm_ip1_handle = cudashm.create_shared_memory_region(
+            shm_region_names[1] + "_data", input1_byte_size, 0
+        )
+        i = 0
+        if "OUTPUT0" in outputs:
+            if precreated_shm_regions is None:
+                shm_op0_handle = cudashm.create_shared_memory_region(
+                    shm_region_names[2] + "_data", output0_byte_size, 0
+                )
+            else:
+                shm_op0_handle = precreated_shm_regions[0]
+            i += 1
+        if "OUTPUT1" in outputs:
+            if precreated_shm_regions is None:
+                shm_op1_handle = cudashm.create_shared_memory_region(
+                    shm_region_names[2 + i] + "_data", output1_byte_size, 0
+                )
+            else:
+                shm_op1_handle = precreated_shm_regions[i]
+
+        cudashm.set_shared_memory_region(shm_ip0_handle, input0_list)
+        cudashm.set_shared_memory_region(shm_ip1_handle, input1_list)
+
+    return shm_region_names, [
+        shm_ip0_handle,
+        shm_ip1_handle,
+        shm_op0_handle,
+        shm_op1_handle,
+    ]
+
+
+def register_add_shm_regions(
+    inputs,
+    outputs,
+    shm_region_names,
+    precreated_shm_regions,
+    shm_handles,
+    input0_byte_size,
+    input1_byte_size,
+    output0_byte_size,
+    output1_byte_size,
+    use_system_shared_memory,
+    use_cuda_shared_memory,
+    triton_client,
+):
+    # Lazy shm imports...
+    if use_system_shared_memory:
+        import tritonclient.utils.shared_memory as shm
+    if use_cuda_shared_memory:
+        import tritonclient.utils.cuda_shared_memory as cudashm
+
+    if use_system_shared_memory or use_cuda_shared_memory:
+        # Unregister then register required shared memory regions
+        if use_system_shared_memory:
+            triton_client.unregister_system_shared_memory(shm_region_names[0] + "_data")
+            triton_client.unregister_system_shared_memory(shm_region_names[1] + "_data")
+            triton_client.register_system_shared_memory(
+                shm_region_names[0] + "_data",
+                "/" + shm_region_names[0],
+                input0_byte_size,
+            )
+            triton_client.register_system_shared_memory(
+                shm_region_names[1] + "_data",
+                "/" + shm_region_names[1],
+                input1_byte_size,
+            )
+            i = 0
+            if "OUTPUT0" in outputs:
+                if precreated_shm_regions is None:
+                    triton_client.unregister_system_shared_memory(
+                        shm_region_names[2] + "_data"
+                    )
+                    triton_client.register_system_shared_memory(
+                        shm_region_names[2] + "_data",
+                        "/" + shm_region_names[2],
+                        output0_byte_size,
+                    )
+                i += 1
+            if "OUTPUT1" in outputs:
+                if precreated_shm_regions is None:
+                    triton_client.unregister_system_shared_memory(
+                        shm_region_names[2 + i] + "_data"
+                    )
+                    triton_client.register_system_shared_memory(
+                        shm_region_names[2 + i] + "_data",
+                        "/" + shm_region_names[2 + i],
+                        output1_byte_size,
+                    )
+
+        if use_cuda_shared_memory:
+            triton_client.unregister_cuda_shared_memory(shm_region_names[0] + "_data")
+            triton_client.unregister_cuda_shared_memory(shm_region_names[1] + "_data")
+            triton_client.register_cuda_shared_memory(
+                shm_region_names[0] + "_data",
+                cudashm.get_raw_handle(shm_handles[0]),
+                0,
+                input0_byte_size,
+            )
+            triton_client.register_cuda_shared_memory(
+                shm_region_names[1] + "_data",
+                cudashm.get_raw_handle(shm_handles[1]),
+                0,
+                input1_byte_size,
+            )
+            i = 0
+            if "OUTPUT0" in outputs:
+                if precreated_shm_regions is None:
+                    triton_client.unregister_cuda_shared_memory(
+                        shm_region_names[2] + "_data"
+                    )
+                    triton_client.register_cuda_shared_memory(
+                        shm_region_names[2] + "_data",
+                        cudashm.get_raw_handle(shm_handles[2]),
+                        0,
+                        output0_byte_size,
+                    )
+                i += 1
+            if "OUTPUT1" in outputs:
+                if precreated_shm_regions is None:
+                    triton_client.unregister_cuda_shared_memory(
+                        shm_region_names[2 + i] + "_data"
+                    )
+                    triton_client.register_cuda_shared_memory(
+                        shm_region_names[2 + i] + "_data",
+                        cudashm.get_raw_handle(shm_handles[3]),
+                        0,
+                        output1_byte_size,
+                    )
+
+        # Add shared memory regions to inputs
+        inputs[0].set_shared_memory(shm_region_names[0] + "_data", input0_byte_size)
+        inputs[1].set_shared_memory(shm_region_names[1] + "_data", input1_byte_size)
+
+
+def unregister_cleanup_shm_regions(
+    shm_regions,
+    shm_handles,
+    precreated_shm_regions,
+    outputs,
+    use_system_shared_memory,
+    use_cuda_shared_memory,
+):
+    # Lazy shm imports...
+    if use_system_shared_memory:
+        import tritonclient.utils.shared_memory as shm
+    if use_cuda_shared_memory:
+        import tritonclient.utils.cuda_shared_memory as cudashm
+
+    if not (use_system_shared_memory or use_cuda_shared_memory):
+        return None
+
+    triton_client = httpclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8000")
+
+    if use_cuda_shared_memory:
+        triton_client.unregister_cuda_shared_memory(shm_regions[0] + "_data")
+        triton_client.unregister_cuda_shared_memory(shm_regions[1] + "_data")
+        cudashm.destroy_shared_memory_region(shm_handles[0])
+        cudashm.destroy_shared_memory_region(shm_handles[1])
+    else:
+        triton_client.unregister_system_shared_memory(shm_regions[0] + "_data")
+        triton_client.unregister_system_shared_memory(shm_regions[1] + "_data")
+        shm.destroy_shared_memory_region(shm_handles[0])
+        shm.destroy_shared_memory_region(shm_handles[1])
+
+    if precreated_shm_regions is None:
+        i = 0
+        if "OUTPUT0" in outputs:
+            if use_cuda_shared_memory:
+                triton_client.unregister_cuda_shared_memory(shm_regions[2] + "_data")
+                cudashm.destroy_shared_memory_region(shm_handles[2])
+            else:
+                triton_client.unregister_system_shared_memory(shm_regions[2] + "_data")
+                shm.destroy_shared_memory_region(shm_handles[2])
+            i += 1
+        if "OUTPUT1" in outputs:
+            if use_cuda_shared_memory:
+                triton_client.unregister_cuda_shared_memory(
+                    shm_regions[2 + i] + "_data"
+                )
+                cudashm.destroy_shared_memory_region(shm_handles[3])
+            else:
+                triton_client.unregister_system_shared_memory(
+                    shm_regions[2 + i] + "_data"
+                )
+                shm.destroy_shared_memory_region(shm_handles[3])
+
+
+def create_set_either_shm_region(
+    shm_region_names,
+    input_list,
+    input_byte_size,
+    output_byte_size,
+    use_system_shared_memory,
+    use_cuda_shared_memory,
+):
+    # Lazy shm imports...
+    if use_system_shared_memory:
+        import tritonclient.utils.shared_memory as shm
+    if use_cuda_shared_memory:
+        import tritonclient.utils.cuda_shared_memory as cudashm
+
+    if use_cuda_shared_memory and use_system_shared_memory:
+        raise ValueError("Cannot set both System and CUDA shared memory flags to 1")
+
+    if not (use_system_shared_memory or use_cuda_shared_memory):
+        return []
+
+    if use_cuda_shared_memory:
+        shm_ip_handle = cudashm.create_shared_memory_region(
+            shm_region_names[0] + "_data", input_byte_size, 0
+        )
+        shm_op_handle = cudashm.create_shared_memory_region(
+            shm_region_names[1] + "_data", output_byte_size, 0
+        )
+        cudashm.set_shared_memory_region(shm_ip_handle, input_list)
+    elif use_system_shared_memory:
+        shm_ip_handle = shm.create_shared_memory_region(
+            shm_region_names[0] + "_data", "/" + shm_region_names[0], input_byte_size
+        )
+        shm_op_handle = shm.create_shared_memory_region(
+            shm_region_names[1] + "_data", "/" + shm_region_names[1], output_byte_size
+        )
+        shm.set_shared_memory_region(shm_ip_handle, input_list)
+
+    return [shm_ip_handle, shm_op_handle]
+
+
+def register_add_either_shm_regions(
+    inputs,
+    outputs,
+    shm_region_prefix,
+    shm_handles,
+    io_num,
+    input_byte_size,
+    output_byte_size,
+    use_system_shared_memory,
+    use_cuda_shared_memory,
+    triton_client,
+):
+    # Lazy shm imports...
+    if use_system_shared_memory:
+        import tritonclient.utils.shared_memory as shm
+    if use_cuda_shared_memory:
+        import tritonclient.utils.cuda_shared_memory as cudashm
+
+    if use_system_shared_memory or use_cuda_shared_memory:
+        # Unregister then register required shared memory regions
+        input_shm_name = shm_region_prefix[0] + str(io_num)
+        output_shm_name = shm_region_prefix[1] + str(io_num)
+        if use_system_shared_memory:
+            triton_client.unregister_system_shared_memory(input_shm_name + "_data")
+            triton_client.unregister_system_shared_memory(output_shm_name + "_data")
+            triton_client.register_system_shared_memory(
+                input_shm_name + "_data", "/" + input_shm_name, input_byte_size
+            )
+            triton_client.register_system_shared_memory(
+                output_shm_name + "_data", "/" + output_shm_name, output_byte_size
+            )
+
+        if use_cuda_shared_memory:
+            triton_client.unregister_cuda_shared_memory(input_shm_name + "_data")
+            triton_client.unregister_cuda_shared_memory(output_shm_name + "_data")
+            triton_client.register_cuda_shared_memory(
+                input_shm_name + "_data",
+                cudashm.get_raw_handle(shm_handles[0][io_num]),
+                0,
+                input_byte_size,
+            )
+            triton_client.register_cuda_shared_memory(
+                output_shm_name + "_data",
+                cudashm.get_raw_handle(shm_handles[1][io_num]),
+                0,
+                output_byte_size,
+            )
+
+        # Add shared memory regions to inputs
+        inputs[io_num].set_shared_memory(input_shm_name + "_data", input_byte_size)
+        outputs[io_num].set_shared_memory(output_shm_name + "_data", output_byte_size)
+
+
+class ShmLeakDetector:
+    """Detect shared memory leaks when testing Python backend."""
+
+    class ShmLeakProbe:
+        def __init__(self, shm_monitors, enter_delay=1, exit_delay=1):
+            self._shm_monitors = shm_monitors
+            self._enter_delay = enter_delay  # seconds
+            self._exit_delay = exit_delay  # seconds
+
+        def __enter__(self):
+            if _skip_shm_leak_probe:
+                return self
+
+            self._shm_region_free_sizes = self._get_shm_free_sizes(self._enter_delay)
+            return self
+
+        def __exit__(self, type, value, traceback):
+            if _skip_shm_leak_probe:
+                return
+
+            curr_shm_free_sizes = self._get_shm_free_sizes(self._exit_delay)
+
+            shm_leak_detected = False
+            for shm_region in curr_shm_free_sizes:
+                curr_shm_free_size = curr_shm_free_sizes[shm_region]
+                prev_shm_free_size = self._shm_region_free_sizes[shm_region]
+                if curr_shm_free_size < prev_shm_free_size:
+                    shm_leak_detected = True
+                    print(
+                        f"Shared memory leak detected [{shm_region}]: {curr_shm_free_size} (curr free) < {prev_shm_free_size} (prev free)."
+                    )
+            assert not shm_leak_detected, f"Shared memory leak detected."
+
+        def _get_shm_free_sizes(self, delay_sec=0):
+            if delay_sec > 0:
+                time.sleep(delay_sec)
+            shm_free_sizes = {}
+            for shm_region, shm_monitor in self._shm_monitors.items():
+                shm_free_sizes[shm_region] = shm_monitor.free_memory()
+            return shm_free_sizes
+
+    def __init__(self, prefix="triton_python_backend_shm_region"):
+        if _skip_shm_leak_probe:
+            return
+        import triton_shm_monitor
+
+        self._shm_monitors = {}
+        shm_regions = listdir("/dev/shm")
+        for shm_region in shm_regions:
+            if shm_region.startswith(prefix):
+                self._shm_monitors[shm_region] = triton_shm_monitor.SharedMemoryManager(
+                    shm_region
+                )
+
+    def Probe(self):
+        # Jetson cleanup takes too long and results in false positives.
+        # Do not use the shared memory check on Jetson.
+        # [DLIS-4876] Investigate how to re-enable shared memory check on Jetson.
+        if _skip_shm_leak_probe:
+            return self.ShmLeakProbe(None)
+        else:
+            return self.ShmLeakProbe(self._shm_monitors)
diff --git a/qa/common/test_util.py b/qa/common/test_util.py
old mode 100644
new mode 100755
index 39efb5ca41..d0d7bda590
--- a/qa/common/test_util.py
+++ b/qa/common/test_util.py
@@ -1,4 +1,6 @@
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+#!/usr/bin/env python3
+
+# Copyright 2018-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -24,42 +26,311 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import json
+import unittest
+
 import numpy as np
 
 _last_request_id = 0
 
-def validate_for_tf_model(input_dtype, output0_dtype, output1_dtype):
+
+def shape_element_count(shape):
+    cnt = 0
+    for d in shape:
+        if d == -1:
+            return -1
+        if cnt == 0:
+            cnt = d
+        else:
+            cnt = cnt * d
+    return cnt
+
+
+def shape_is_fixed(shape):
+    return shape_element_count(shape) != -1
+
+
+def shape_to_tf_shape(shape):
+    return [None if i == -1 else i for i in shape]
+
+
+def shape_to_onnx_shape(shape, idx=0, increment_index=True):
+    # Onnx use string for variable size dimension, and the same string
+    # will be inferred to have same value for the model run.
+    # So there is an extra "idx" parameter to make sure the string is
+    # unique
+    res = []
+    for dim in shape:
+        if dim == -1:
+            res.append("var_" + str(idx))
+            if increment_index:
+                idx += 1
+        else:
+            res.append(dim)
+    return res, idx
+
+
+def shape_to_dims_str(shape):
+    return ",".join(str(i) for i in shape)
+
+
+def validate_for_tf_model(
+    input_dtype, output0_dtype, output1_dtype, input_shape, output0_shape, output1_shape
+):
     """Return True if input and output dtypes are supported by a TF model."""
+
+    # Not extending test to uint8 yet
+    if (
+        input_dtype == np.uint8
+        or output0_dtype == np.uint8
+        or output1_dtype == np.uint8
+    ):
+        return False
+
+    # If the input type is string the output type must be string or
+    # int32. This is because the QA models we generate convert strings
+    # internally to int32 for compute.
+    if (input_dtype == np.object_) and (
+        ((output0_dtype != np.object_) and (output0_dtype != np.int32))
+        or ((output1_dtype != np.object_) and (output1_dtype != np.int32))
+    ):
+        return False
+
+    return True
+
+
+def validate_for_trt_model(
+    input_dtype, output0_dtype, output1_dtype, input_shape, output0_shape, output1_shape
+):
+    """Return True if input and output dtypes are supported by a TRT model."""
+    supported_datatypes = [bool, np.int8, np.int32, np.uint8, np.float16, np.float32]
+    # FIXME: Remove this check when jetson supports TRT 8.5 (DLIS-4256)
+    if not support_trt_uint8():
+        supported_datatypes.remove(np.uint8)
+    if not input_dtype in supported_datatypes:
+        return False
+    if not output0_dtype in supported_datatypes:
+        return False
+    if not output1_dtype in supported_datatypes:
+        return False
+
+    datatype_set = set([input_dtype, output0_dtype, output1_dtype])
+
+    # Incompatible datatype conversions
+    if (np.int32 in datatype_set) and (np.int8 in datatype_set):
+        return False
+    if (np.float32 in datatype_set) and (np.int32 in datatype_set):
+        return False
+
     return True
 
-def validate_for_c2_model(input_dtype, output0_dtype, output1_dtype):
-    """Return True if input and output dtypes are supported by a Caffe2 model."""
 
-    # Some operations used by test don't support fp16.
-    if ((input_dtype == np.float16) or (output0_dtype == np.float16) or
-        (output1_dtype == np.float16)):
+def validate_for_ensemble_model(
+    ensemble_type,
+    input_dtype,
+    output0_dtype,
+    output1_dtype,
+    input_shape,
+    output0_shape,
+    output1_shape,
+):
+    """Return True if input and output dtypes are supported by the ensemble type."""
+
+    # Not extending test to uint8 yet
+    if (
+        input_dtype == np.uint8
+        or output0_dtype == np.uint8
+        or output1_dtype == np.uint8
+    ):
         return False
 
-    # Some operations don't support any int type except int32.
-    if ((input_dtype == np.int8) or (output0_dtype == np.int8) or
-        (output1_dtype == np.int8) or (input_dtype == np.int16) or
-        (output0_dtype == np.int16) or (output1_dtype == np.int16)):
+    # Those ensemble types contains "identity" model which doesn't allow STRING
+    # data type
+    # Test types that use identity for both input and output
+    test_type_involved = ["reshape", "zero", "fan"]
+    if (
+        input_dtype == np.object_
+        or output0_dtype == np.object_
+        or output1_dtype == np.object_
+    ):
+        for type_str in test_type_involved:
+            if type_str in ensemble_type:
+                return False
+
+    # Otherwise, check input / output separately
+    if input_dtype == np.object_ and "sequence" in ensemble_type:
         return False
 
     return True
 
-def validate_for_trt_model(input_dtype, output0_dtype, output1_dtype):
-    """Return True if input and output dtypes are supported by a TRT model."""
 
-    # TRT supports limited datatypes as of TRT 4.0. Input can be FP16 or
-    # FP32, output must be FP32.
-    if (input_dtype != np.float16) and (input_dtype != np.float32):
+def validate_for_onnx_model(
+    input_dtype, output0_dtype, output1_dtype, input_shape, output0_shape, output1_shape
+):
+    """Return True if input and output dtypes are supported by a Onnx model."""
+
+    # Not extending test to uint8 yet
+    if (
+        input_dtype == np.uint8
+        or output0_dtype == np.uint8
+        or output1_dtype == np.uint8
+    ):
         return False
-    if (output0_dtype != np.float32) or (output1_dtype != np.float32):
+
+    # If the input type is string the output type must be string or
+    # int32. This is because the QA models we generate convert strings
+    # internally to int32 for compute.
+    if (input_dtype == np.object_) and (
+        ((output0_dtype != np.object_) and (output0_dtype != np.int32))
+        or ((output1_dtype != np.object_) and (output1_dtype != np.int32))
+    ):
         return False
+
     return True
 
+
+def validate_for_libtorch_model(
+    input_dtype,
+    output0_dtype,
+    output1_dtype,
+    input_shape,
+    output0_shape,
+    output1_shape,
+    max_batch=0,
+    reshape=False,
+):
+    """Return True if input and output dtypes are supported by a libtorch model."""
+
+    # Not extending test to uint8 yet
+    if (
+        input_dtype == np.uint8
+        or output0_dtype == np.uint8
+        or output1_dtype == np.uint8
+    ):
+        return False
+
+    # STRING data type does not support I/O with more than 1 dims. It supports
+    # batching when 'reshape' field is set properly to empty shape.
+    has_string_type = (
+        (input_dtype == np.object_)
+        or (output0_dtype == np.object_)
+        or (output1_dtype == np.object_)
+    )
+    is_more_than_one_dimensional = (
+        (len(input_shape) > 1)
+        or (len(output0_shape) > 1)
+        or (len(output1_shape) > 1)
+        or (max_batch != 0)
+    )
+
+    if has_string_type and is_more_than_one_dimensional and not reshape:
+        return False
+
+    # FLOAT16 and UINT16 data types are not supported currently
+    if (
+        (input_dtype == np.uint16)
+        or (output0_dtype == np.uint16)
+        or (output1_dtype == np.uint16)
+    ):
+        return False
+    if (
+        (input_dtype == np.float16)
+        or (output0_dtype == np.float16)
+        or (output1_dtype == np.float16)
+    ):
+        return False
+
+    return True
+
+
+def validate_for_openvino_model(
+    input_dtype, output0_dtype, output1_dtype, input_shape, output0_shape, output1_shape
+):
+    """Return True if input and output dtypes are supported by an OpenVino model."""
+
+    # Not extending test to uint8 yet
+    if (
+        input_dtype == np.uint8
+        or output0_dtype == np.uint8
+        or output1_dtype == np.uint8
+    ):
+        return False
+
+    # float16 is not supported on CPU by OpenVino
+    supported_datatypes = [np.int8, np.int32, np.float32]
+    if not input_dtype in supported_datatypes:
+        return False
+    if not output0_dtype in supported_datatypes:
+        return False
+    if not output1_dtype in supported_datatypes:
+        return False
+
+    # Return false if input dtype != output dtype and shape > 1 dims
+    # https://github.com/openvinotoolkit/openvino/issues/7173
+    if ((output1_dtype != input_dtype) or (output0_dtype != input_dtype)) and len(
+        input_shape
+    ) > 1:
+        return False
+
+    return True
+
+
 def get_model_name(pf, input_dtype, output0_dtype, output1_dtype):
     return "{}_{}_{}_{}".format(
-        pf, np.dtype(input_dtype).name, np.dtype(output0_dtype).name,
-        np.dtype(output1_dtype).name)
+        pf,
+        np.dtype(input_dtype).name,
+        np.dtype(output0_dtype).name,
+        np.dtype(output1_dtype).name,
+    )
+
+
+def get_sequence_model_name(pf, dtype):
+    return "{}_sequence_{}".format(pf, np.dtype(dtype).name)
+
+
+def get_dyna_sequence_model_name(pf, dtype):
+    return "{}_dyna_sequence_{}".format(pf, np.dtype(dtype).name)
+
+
+def get_zero_model_name(pf, io_cnt, dtype):
+    return "{}_zero_{}_{}".format(pf, io_cnt, np.dtype(dtype).name)
+
+
+# FIXME: Remove this def when jetson supports TRT 8.5 (DLIS-4256)
+def support_trt_uint8():
+    try:
+        import tensorrt as trt
+    except:
+        # tensorrt library is not found, detect from environment
+        import os
+
+        return not bool(int(os.environ.get("TEST_JETSON", 0)))
+    # tensorrt library is found, return if uint8 is defined
+    return hasattr(trt, "uint8")
+
+
+class TestResultCollector(unittest.TestCase):
+    # TestResultCollector stores test result and prints it to stdout. In order
+    # to use this class, unit tests must inherit this class. Use
+    # `check_test_results` bash function from `common/util.sh` to verify the
+    # expected number of tests produced by this class
+
+    @classmethod
+    def setResult(cls, total, errors, failures):
+        cls.total, cls.errors, cls.failures = total, errors, failures
+
+    @classmethod
+    def tearDownClass(cls):
+        # this method is called when all the unit tests in a class are
+        # finished.
+        json_res = {"total": cls.total, "errors": cls.errors, "failures": cls.failures}
+        with open("test_results.txt", "w+") as f:
+            f.write(json.dumps(json_res))
+
+    def run(self, result=None):
+        # result argument stores the accumulative test results
+        test_result = super().run(result)
+        total = test_result.testsRun
+        errors = len(test_result.errors)
+        failures = len(test_result.failures)
+        self.setResult(total, errors, failures)
diff --git a/qa/common/trace_summary.py b/qa/common/trace_summary.py
new file mode 100755
index 0000000000..e096580d37
--- /dev/null
+++ b/qa/common/trace_summary.py
@@ -0,0 +1,507 @@
+#!/usr/bin/python
+
+# Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import csv
+import json
+
+import numpy as np
+
+FLAGS = None
+
+
+def add_span(span_map, timestamps, span_name, ts_start, ts_end):
+    for tag in (ts_start, ts_end):
+        if tag not in timestamps:
+            raise ValueError('timestamps missing "{}": {}'.format(tag, timestamps))
+    if timestamps[ts_end] < timestamps[ts_start]:
+        raise ValueError(
+            'end timestamp "{}" < start timestamp "{}"'.format(ts_end, ts_start)
+        )
+    if span_name not in span_map:
+        span_map[span_name] = 0
+    span_map[span_name] += timestamps[ts_end] - timestamps[ts_start]
+
+
+class AbstractFrontend:
+    @property
+    def filter_timestamp(self):
+        return None
+
+    def add_frontend_span(self, span_map, timestamps):
+        pass
+
+    def summarize_frontend_span(self, span_map, cnt):
+        return None
+
+
+class HttpFrontend(AbstractFrontend):
+    @property
+    def filter_timestamp(self):
+        return "HTTP_RECV_START"
+
+    def add_frontend_span(self, span_map, timestamps):
+        if ("HTTP_RECV_START" in timestamps) and ("HTTP_SEND_END" in timestamps):
+            add_span(
+                span_map, timestamps, "HTTP_INFER", "HTTP_RECV_START", "HTTP_SEND_END"
+            )
+            add_span(
+                span_map, timestamps, "HTTP_RECV", "HTTP_RECV_START", "HTTP_RECV_END"
+            )
+            add_span(
+                span_map, timestamps, "HTTP_SEND", "HTTP_SEND_START", "HTTP_SEND_END"
+            )
+
+    def summarize_frontend_span(self, span_map, cnt):
+        if "HTTP_INFER" in span_map:
+            res = "HTTP infer request (avg): {}us\n".format(
+                span_map["HTTP_INFER"] / (cnt * 1000)
+            )
+            res += "\tReceive (avg): {}us\n".format(
+                span_map["HTTP_RECV"] / (cnt * 1000)
+            )
+            res += "\tSend (avg): {}us\n".format(span_map["HTTP_SEND"] / (cnt * 1000))
+            res += "\tOverhead (avg): {}us\n".format(
+                (
+                    span_map["HTTP_INFER"]
+                    - span_map["REQUEST"]
+                    - span_map["HTTP_RECV"]
+                    - span_map["HTTP_SEND"]
+                )
+                / (cnt * 1000)
+            )
+            return res
+        else:
+            return None
+
+
+class GrpcFrontend(AbstractFrontend):
+    @property
+    def filter_timestamp(self):
+        return "GRPC_WAITREAD_START"
+
+    def add_frontend_span(self, span_map, timestamps):
+        if ("GRPC_WAITREAD_END" in timestamps) and ("GRPC_SEND_END" in timestamps):
+            add_span(
+                span_map,
+                timestamps,
+                "GRPC_INFER",
+                "GRPC_WAITREAD_END",
+                "GRPC_SEND_END",
+            )
+            add_span(
+                span_map, timestamps, "GRPC_SEND", "GRPC_SEND_START", "GRPC_SEND_END"
+            )
+
+    def summarize_frontend_span(self, span_map, cnt):
+        if "GRPC_INFER" in span_map:
+            res = "GRPC infer request (avg): {}us\n".format(
+                span_map["GRPC_INFER"] / (cnt * 1000)
+            )
+            res += "\tSend (avg): {}us\n".format(span_map["GRPC_SEND"] / (cnt * 1000))
+            return res
+        else:
+            return None
+
+
+def summarize(frontend, traces):
+    # map from (model_name, model_version) to # of traces
+    model_count_map = dict()
+    # map from (model_name, model_version) to map of span->total time
+    model_span_map = dict()
+
+    # Order traces by id to be more intuitive if 'show_trace'
+    traces = sorted(traces, key=lambda t: t.get("id", -1))
+
+    # Filter the trace that is not for the requested frontend
+    match_frontend_id_set = set()
+    for trace in traces:
+        if "id" not in trace:
+            continue
+
+        # Trace without a parent must contain frontend timestamps
+        if "parent_id" not in trace:
+            if frontend.filter_timestamp is None:
+                continue
+            if "timestamps" in trace:
+                for ts in trace["timestamps"]:
+                    if frontend.filter_timestamp in ts["name"]:
+                        match_frontend_id_set.add(trace["id"])
+        # Otherwise need to check whether parent is filtered
+        elif trace["parent_id"] in match_frontend_id_set:
+            match_frontend_id_set.add(trace["id"])
+
+    # Filter the trace that is not meaningful and group them by 'id'
+    filtered_traces = dict()
+    for trace in traces:
+        if "id" not in trace:
+            continue
+        if trace["id"] in match_frontend_id_set:
+            if trace["id"] in filtered_traces.keys():
+                rep_trace = filtered_traces[trace["id"]]
+                # Append the timestamp to the trace representing this 'id'
+                if "model_name" in trace:
+                    rep_trace["model_name"] = trace["model_name"]
+                if "model_version" in trace:
+                    rep_trace["model_version"] = trace["model_version"]
+                if "timestamps" in trace:
+                    rep_trace["timestamps"] += trace["timestamps"]
+            else:
+                # Use this trace to represent this 'id'
+                if "timestamps" not in trace:
+                    trace["timestamps"] = []
+                filtered_traces[trace["id"]] = trace
+
+    for trace_id, trace in filtered_traces.items():
+        if trace_id not in match_frontend_id_set:
+            filtered_traces.pop(trace_id, None)
+            continue
+        timestamps = dict()
+        for ts in trace["timestamps"]:
+            timestamps[ts["name"]] = ts["ns"]
+        if ("REQUEST_START" in timestamps) and ("REQUEST_END" in timestamps):
+            key = (trace["model_name"], trace["model_version"])
+            if key not in model_count_map:
+                model_count_map[key] = 0
+                model_span_map[key] = dict()
+
+            model_count_map[key] += 1
+
+            frontend.add_frontend_span(model_span_map[key], timestamps)
+
+            add_span(
+                model_span_map[key],
+                timestamps,
+                "REQUEST",
+                "REQUEST_START",
+                "REQUEST_END",
+            )
+
+            # The tags below will be missing for ensemble model
+            if ("QUEUE_START" in timestamps) and ("COMPUTE_START" in timestamps):
+                add_span(
+                    model_span_map[key],
+                    timestamps,
+                    "QUEUE",
+                    "QUEUE_START",
+                    "COMPUTE_START",
+                )
+            if ("COMPUTE_START" in timestamps) and ("COMPUTE_END" in timestamps):
+                add_span(
+                    model_span_map[key],
+                    timestamps,
+                    "COMPUTE",
+                    "COMPUTE_START",
+                    "COMPUTE_END",
+                )
+            if ("COMPUTE_INPUT_END" in timestamps) and (
+                "COMPUTE_OUTPUT_START" in timestamps
+            ):
+                add_span(
+                    model_span_map[key],
+                    timestamps,
+                    "COMPUTE_INPUT",
+                    "COMPUTE_START",
+                    "COMPUTE_INPUT_END",
+                )
+                add_span(
+                    model_span_map[key],
+                    timestamps,
+                    "COMPUTE_INFER",
+                    "COMPUTE_INPUT_END",
+                    "COMPUTE_OUTPUT_START",
+                )
+                add_span(
+                    model_span_map[key],
+                    timestamps,
+                    "COMPUTE_OUTPUT",
+                    "COMPUTE_OUTPUT_START",
+                    "COMPUTE_END",
+                )
+            if FLAGS.show_trace:
+                print("{} ({}):".format(trace["model_name"], trace["model_version"]))
+                print("\tid: {}".format(trace["id"]))
+                if "parent_id" in trace:
+                    print("\tparent id: {}".format(trace["parent_id"]))
+                ordered_timestamps = list()
+                for ts in trace["timestamps"]:
+                    # skip GRPC_WAITREAD
+                    if not ts["name"].startswith("GRPC_WAITREAD"):
+                        ordered_timestamps.append((ts["name"], ts["ns"]))
+                ordered_timestamps.sort(key=lambda tup: tup[1])
+
+                now = None
+                for ts in ordered_timestamps:
+                    if now is not None:
+                        print("\t\t{}us".format((ts[1] - now) / 1000))
+                    print("\t{}".format(ts[0]))
+                    now = ts[1]
+
+    for key, cnt in model_count_map.items():
+        model_name, model_value = key
+        print(
+            "Summary for {} ({}): trace count = {}".format(model_name, model_value, cnt)
+        )
+
+        frontend_summary = frontend.summarize_frontend_span(model_span_map[key], cnt)
+        if frontend_summary is not None:
+            print(frontend_summary)
+
+        # collect handler timeline
+        print(
+            "\tHandler (avg): {}us".format(
+                model_span_map[key]["REQUEST"] / (cnt * 1000)
+            )
+        )
+        if ("QUEUE" in model_span_map[key]) and "COMPUTE" in model_span_map[key]:
+            print(
+                "\t\tOverhead (avg): {}us".format(
+                    (
+                        model_span_map[key]["REQUEST"]
+                        - model_span_map[key]["QUEUE"]
+                        - model_span_map[key]["COMPUTE"]
+                    )
+                    / (cnt * 1000)
+                )
+            )
+            print(
+                "\t\tQueue (avg): {}us".format(
+                    model_span_map[key]["QUEUE"] / (cnt * 1000)
+                )
+            )
+            print(
+                "\t\tCompute (avg): {}us".format(
+                    model_span_map[key]["COMPUTE"] / (cnt * 1000)
+                )
+            )
+        if (
+            "COMPUTE_INPUT" in model_span_map[key]
+        ) and "COMPUTE_OUTPUT" in model_span_map[key]:
+            print(
+                "\t\t\tInput (avg): {}us".format(
+                    model_span_map[key]["COMPUTE_INPUT"] / (cnt * 1000)
+                )
+            )
+            print(
+                "\t\t\tInfer (avg): {}us".format(
+                    model_span_map[key]["COMPUTE_INFER"] / (cnt * 1000)
+                )
+            )
+            print(
+                "\t\t\tOutput (avg): {}us".format(
+                    model_span_map[key]["COMPUTE_OUTPUT"] / (cnt * 1000)
+                )
+            )
+
+
+def summarize_dataflow(traces):
+    # collect data flow
+    # - parent input
+    #   - child input
+    #     - ...
+    #   - child output
+
+    # Order traces by id to be more intuitive if 'show_trace'
+    traces = sorted(traces, key=lambda t: t.get("id", -1))
+
+    # {3: [4, 5, 6], 4: [7]}
+    dataflow_parent_map = dict()
+    for trace in traces:
+        if "id" not in trace:
+            continue
+        if "parent_id" in trace:
+            if trace["parent_id"] not in dataflow_parent_map:
+                dataflow_parent_map[trace["parent_id"]] = []
+            dataflow_parent_map[trace["parent_id"]].append(trace["id"])
+
+    if len(dataflow_parent_map) == 0:
+        # print the tensors of model
+        first_id = find_first_id_with_tensor(traces)
+        if first_id != 0:
+            print("Data Flow:")
+        print_tensor_by_id(first_id, traces, 0, 0)
+        return
+
+    # print the tensors of ensemble
+    print("Data Flow:")
+    first_parent_id = list(dataflow_parent_map.items())[0][0]
+
+    # {3: {4: {7: None}, 5: None, 6: None}}
+    dataflow_tree_map = dict()
+    depth = [0]
+    append_dataflow_tensor(
+        dataflow_tree_map, first_parent_id, dataflow_parent_map, traces, depth
+    )
+
+    print_dataflow_tensor(dataflow_tree_map, traces, depth[0], step=0)
+
+
+def append_dataflow_tensor(
+    dataflow_tensor_map, parent_id, dataflow_tree_map, traces, depth
+):
+    if parent_id not in dataflow_tree_map:
+        dataflow_tensor_map[parent_id] = None
+        return
+
+    child_tensor_map = dict()
+    dataflow_tensor_map[parent_id] = child_tensor_map
+    depth[0] = depth[0] + 1
+
+    child_ids = dataflow_tree_map[parent_id]
+    for child_id in child_ids:
+        append_dataflow_tensor(
+            child_tensor_map, child_id, dataflow_tree_map, traces, depth
+        )
+
+
+def print_dataflow_tensor(dataflow_tree_map, traces, depth, step):
+    for parent_id in dataflow_tree_map:
+        print_tensor_by_id(parent_id, traces, depth, step)
+
+        if dataflow_tree_map[parent_id] is None:
+            continue
+
+        print_dataflow_tensor(dataflow_tree_map[parent_id], traces, depth, step + 1)
+
+
+def print_tensor_by_id(id, traces, depth, step):
+    if id == 0:
+        return
+
+    tabs = "\t" * (step + 1)
+
+    print("{0}{1}".format(tabs, "=" * (50 + 8 * (depth - step))))
+    for trace in traces:
+        # print model name and version
+        if (
+            "id" in trace
+            and "model_name" in trace
+            and "model_version" in trace
+            and "timestamps" in trace
+            and trace["id"] == id
+        ):
+            print("{0}Name:   {1}".format(tabs, trace["model_name"]))
+            print("{0}Version:{1}".format(tabs, trace["model_version"]))
+        # print data
+        if "id" in trace and "activity" in trace:
+            if trace["id"] == id and trace["activity"] == "TENSOR_QUEUE_INPUT":
+                print("{0}{1}:".format(tabs, "QUEUE_INPUT"))
+                print(
+                    "{0}\t{1}: {2}".format(
+                        tabs, trace["tensor"]["name"], get_numpy_array(trace["tensor"])
+                    )
+                )
+            elif trace["id"] == id and trace["activity"] == "TENSOR_BACKEND_INPUT":
+                print("{0}{1}:".format(tabs, "BACKEND_INPUT"))
+                print(
+                    "{0}\t{1}: {2}".format(
+                        tabs, trace["tensor"]["name"], get_numpy_array(trace["tensor"])
+                    )
+                )
+            elif trace["id"] == id and trace["activity"] == "TENSOR_BACKEND_OUTPUT":
+                print("{0}{1}:".format(tabs, "BACKEND_OUTPUT"))
+                print(
+                    "{0}\t{1}: {2}".format(
+                        tabs, trace["tensor"]["name"], get_numpy_array(trace["tensor"])
+                    )
+                )
+    print("{0}{1}".format(tabs, "=" * (50 + 8 * (depth - step))))
+
+
+def find_first_id_with_tensor(traces):
+    for trace in traces:
+        if "activity" in trace and (
+            trace["activity"] == "TENSOR_QUEUE_INPUT"
+            or trace["activity"] == "TENSOR_BACKEND_INPUT"
+            or trace["activity"] == "TENSOR_BACKEND_OUTPUT"
+        ):
+            return trace["id"]
+    return 0
+
+
+TRITON_TYPE_TO_NUMPY = {
+    "BOOL": bool,
+    "UINT8": np.uint8,
+    "UINT16": np.uint16,
+    "UINT32": np.uint32,
+    "UINT64": np.uint64,
+    "INT8": np.int8,
+    "INT16": np.int16,
+    "INT32": np.int32,
+    "INT64": np.int64,
+    "FP16": np.float16,
+    "FP32": np.float32,
+    "FP64": np.float64,
+    "BYTES": np.object_,
+}
+
+
+def get_numpy_array(tensor):
+    dtype = TRITON_TYPE_TO_NUMPY[tensor["dtype"]]
+    if dtype == np.object_:
+        value = next(csv.reader([tensor["data"]], skipinitialspace=True))
+    else:
+        value = map(float, tensor["data"].split(","))
+    shape = map(int, tensor["shape"].split(","))
+    array = np.array(list(value), dtype=dtype)
+    array = array.reshape(list(shape))
+    return array
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        required=False,
+        default=False,
+        help="Enable verbose output",
+    )
+    parser.add_argument(
+        "-t",
+        "--show-trace",
+        action="store_true",
+        required=False,
+        default=False,
+        help="Show timestamps for each individual trace",
+    )
+    parser.add_argument("file", type=argparse.FileType("r"), nargs="+")
+    FLAGS = parser.parse_args()
+
+    for f in FLAGS.file:
+        trace_data = json.loads(f.read())
+        if FLAGS.verbose:
+            print(json.dumps(trace_data, sort_keys=True, indent=2))
+
+        # Must summarize HTTP and GRPC separately since they have
+        # different ways of accumulating time.
+        print("File: {}".format(f.name))
+        summarize(HttpFrontend(), trace_data)
+        summarize(GrpcFrontend(), trace_data)
+        summarize_dataflow(trace_data)
diff --git a/qa/common/util.py b/qa/common/util.py
new file mode 100644
index 0000000000..d04aad3e81
--- /dev/null
+++ b/qa/common/util.py
@@ -0,0 +1,107 @@
+import shutil
+import signal
+import subprocess
+import sys
+import time
+from pathlib import Path
+
+from tritonclient.utils import InferenceServerException
+
+
+def run_server(server_executable: str, launch_command: str, log_file):
+    if not Path(server_executable).is_file():
+        raise Exception(f"{server_executable} does not exist")
+    print(f"=== Running {launch_command}")
+    if sys.platform == "win32":
+        server = subprocess.Popen(
+            launch_command, text=True, stdout=log_file, stderr=log_file
+        )
+    else:
+        server = subprocess.Popen(
+            launch_command.split(), text=True, stdout=log_file, stderr=log_file
+        )
+    time.sleep(3)
+    return server
+
+
+def wait_for_server_ready(server_process, triton_client, timeout):
+    start = time.time()
+    while time.time() - start < timeout:
+        print(
+            "Waiting for server to be ready ",
+            round(timeout - (time.time() - start)),
+            flush=True,
+        )
+        time.sleep(1)
+        try:
+            if server_process.poll():
+                raise Exception("=== Server is not running")
+            if triton_client.is_server_ready():
+                print("=== Server is ready", flush=True)
+                return True
+        except InferenceServerException:
+            pass  # Host not ready
+    raise Exception(f"=== Timeout {timeout} secs. Server not ready. ===")
+
+
+def kill_server(server_process):
+    # Only kill process if it's stil running
+    if server_process and not server_process.poll():
+        print("*\n*\n*\nTerminating server\n*\n*\n*\n")
+        # Terminate gracefully for Linux
+        if sys.platform == "win32":
+            server_process.kill()
+        else:
+            server_process.send_signal(signal.SIGINT)
+        try:
+            server_process.wait(timeout=30)
+        except subprocess.TimeoutExpired:
+            server_process.kill()
+            raise Exception("Server did not shutdown properly")
+
+
+def stream_to_log(client_log):
+    original_stdout = sys.stdout
+    original_sterr = sys.stderr
+    sys.stdout = sys.stderr = client_log
+    return original_stdout, original_sterr
+
+
+def stream_to_console(original_stdout, original_sterr):
+    sys.stdout = original_stdout
+    sys.stderr = original_sterr
+
+
+def remove_model_dir(model_dir_path: Path):
+    if not model_dir_path.is_dir():
+        return
+    shutil.rmtree(model_dir_path)
+
+
+def create_model_dir(
+    model_dir_path: Path,
+    model_name: str,
+    model_version: int,
+    model_source_path: Path,
+    model_config_path: Path,
+):
+    remove_model_dir(model_dir_path)
+    model_dir_path = model_dir_path / model_name / str(model_version)
+    model_dir_path.mkdir(parents=True)
+
+    # TODO: Should be able to handle if something like labels.txt needs to be copied
+    shutil.copy(model_source_path, model_dir_path)
+    shutil.copy(model_config_path, model_dir_path.parent)
+
+
+def replace_config_attribute(
+    model_config_path: Path, current_attribute: str, desired_attribute: str
+):
+    original_config = model_config_path.read_text()
+    new_config = original_config.replace(current_attribute, desired_attribute)
+    model_config_path.write_text(new_config)
+
+
+def add_config_attribute(model_config_path: Path, new_attribute: str):
+    with model_config_path.open("a") as f:
+        f.write(new_attribute)
diff --git a/qa/common/util.sh b/qa/common/util.sh
old mode 100644
new mode 100755
index 9e0da01c6c..2c57d06ce0
--- a/qa/common/util.sh
+++ b/qa/common/util.sh
@@ -1,4 +1,5 @@
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+#!/bin/bash
+# Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -24,15 +25,11 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-# SERVER must be defined
-if [ -z "$SERVER" ]; then
-    echo "=== $SERVER is empty"
-    exit 1
-fi
-
+SERVER_IPADDR=${TRITONSERVER_IPADDR:=localhost}
 SERVER_LOG=${SERVER_LOG:=./server.log}
-FILE_TIMEOUT=${FILE_TIMEOUT:=10}
-SERVER_TIMEOUT=${SERVER_TIMEOUT:=60}
+SERVER_TIMEOUT=${SERVER_TIMEOUT:=120}
+SERVER_LD_PRELOAD=${SERVER_LD_PRELOAD:=""}
+MONITOR_FILE_TIMEOUT=${MONITOR_FILE_TIMEOUT:=10}
 
 # Sets WAIT_RET to 0 on success, 1 on failure
 function wait_for_file_str() {
@@ -60,20 +57,26 @@ function wait_for_file_str() {
     WAIT_RET=1
 }
 
-# Wait until server health endpoint show ready. Sets WAIT_RET to 0 on
+# Wait until server health endpoint shows ready. Sets WAIT_RET to 0 on
 # success, 1 on failure
 function wait_for_server_ready() {
+    local spid="$1"; shift
     local wait_time_secs="${1:-30}"; shift
 
     WAIT_RET=0
 
     local wait_secs=$wait_time_secs
     until test $wait_secs -eq 0 ; do
-        sleep 1;
+        if ! kill -0 $spid > /dev/null 2>&1; then
+            echo "=== Server not running."
+            WAIT_RET=1
+            return
+        fi
 
+        sleep 1;
 
         set +e
-        code=`curl -s -w %{http_code} localhost:8000/api/health/ready`
+        code=`curl -s -w %{http_code} ${SERVER_IPADDR}:8000/v2/health/ready`
         set -e
         if [ "$code" == "200" ]; then
             return
@@ -86,20 +89,26 @@ function wait_for_server_ready() {
     WAIT_RET=1
 }
 
-# Wait until server health endpoint show live. Sets WAIT_RET to 0 on
+# Wait until server health endpoint shows live. Sets WAIT_RET to 0 on
 # success, 1 on failure
 function wait_for_server_live() {
+    local spid="$1"; shift
     local wait_time_secs="${1:-30}"; shift
 
     WAIT_RET=0
 
     local wait_secs=$wait_time_secs
     until test $wait_secs -eq 0 ; do
-        sleep 1;
+        if ! kill -0 $spid; then
+            echo "=== Server not running."
+            WAIT_RET=1
+            return
+        fi
 
+        sleep 1;
 
         set +e
-        code=`curl -s -w %{http_code} localhost:8000/api/health/live`
+        code=`curl -s -w %{http_code} ${SERVER_IPADDR}:8000/v2/health/live`
         set -e
         if [ "$code" == "200" ]; then
             return
@@ -112,23 +121,91 @@ function wait_for_server_live() {
     WAIT_RET=1
 }
 
+# Wait until all server model states are stable (MODEL_READY or
+# MODEL_UNAVAILABLE) or until timeout. Note that server has to be
+# live.  If timeout is not specified, only return when all model
+# states are stable.
+function wait_for_model_stable() {
+    local wait_time_secs="${1:--1}"; shift
+
+    local wait_secs=$wait_time_secs
+    until test $wait_secs -eq 0 ; do
+        sleep 1;
+
+        set +e
+        total_count=`curl -s -X POST ${SERVER_IPADDR}:8000/v2/repository/index | json_pp | grep "state" | wc -l`
+        stable_count=`curl -s -X POST ${SERVER_IPADDR}:8000/v2/repository/index | json_pp | grep "READY\|UNAVAILABLE" | wc -l`
+        count=$((total_count - stable_count))
+        set -e
+        if [ "$count" == "0" ]; then
+            return
+        fi
+
+        ((wait_secs--));
+    done
+
+    echo "=== Timeout $wait_time_secs secs. Not all models stable."
+}
+
+function gdb_helper () {
+  if ! command -v gdb > /dev/null 2>&1; then
+    echo "=== WARNING: gdb not installed"
+    return
+  fi
+
+  ### Server Hang ###
+  if kill -0 ${SERVER_PID} > /dev/null 2>&1; then
+    # If server process is still alive, try to get backtrace and core dump from it
+    GDB_LOG="gdb_bt.${SERVER_PID}.log"
+    echo -e "=== WARNING: SERVER HANG DETECTED, DUMPING GDB BACKTRACE TO [${PWD}/${GDB_LOG}] ==="
+    # Dump backtrace log for quick analysis. Allow these commands to fail.
+    gdb -batch -ex "thread apply all bt" -p "${SERVER_PID}" 2>&1 | tee "${GDB_LOG}" || true
+
+    # Generate core dump for deeper analysis. Default filename is "core.${PID}"
+    gdb -batch -ex "gcore" -p "${SERVER_PID}" || true
+  fi
+
+  ### Server Segfaulted ###
+  # If there are any core dumps locally from a segfault, load them and get a backtrace
+  for corefile in $(ls core.* > /dev/null 2>&1); do
+    GDB_LOG="${corefile}.log"
+    echo -e "=== WARNING: SEGFAULT DETECTED, DUMPING GDB BACKTRACE TO [${PWD}/${GDB_LOG}] ==="
+    gdb -batch ${SERVER} ${corefile} -ex "thread apply all bt" | tee "${corefile}.log" || true;
+  done
+}
+
 # Run inference server. Return once server's health endpoint shows
-# ready or timeout expires.  Sets SERVER_PID to pid of SERVER, or 0 if
+# ready or timeout expires. Sets SERVER_PID to pid of SERVER, or 0 if
 # error (including expired timeout)
 function run_server () {
     SERVER_PID=0
+
+    if [ -z "$SERVER" ]; then
+        echo "=== SERVER must be defined"
+        return
+    fi
+
     if [ ! -f "$SERVER" ]; then
         echo "=== $SERVER does not exist"
         return
     fi
 
-    echo "=== Running $SERVER $SERVER_ARGS"
-    $SERVER $SERVER_ARGS > $SERVER_LOG 2>&1 &
+    if [ -z "$SERVER_LD_PRELOAD" ]; then
+      echo "=== Running $SERVER $SERVER_ARGS"
+    else
+      echo "=== Running LD_PRELOAD=$SERVER_LD_PRELOAD $SERVER $SERVER_ARGS"
+    fi
+
+    LD_PRELOAD=$SERVER_LD_PRELOAD:${LD_PRELOAD} $SERVER $SERVER_ARGS > $SERVER_LOG 2>&1 &
     SERVER_PID=$!
 
-    wait_for_server_ready $SERVER_TIMEOUT
+    wait_for_server_ready $SERVER_PID $SERVER_TIMEOUT
     if [ "$WAIT_RET" != "0" ]; then
-        kill $SERVER_PID || true
+        # Get further debug information about server startup failure
+        gdb_helper || true
+
+        # Cleanup
+        kill $SERVER_PID > /dev/null 2>&1 || true
         SERVER_PID=0
     fi
 }
@@ -138,16 +215,27 @@ function run_server () {
 # error (including expired timeout)
 function run_server_tolive () {
     SERVER_PID=0
+
+    if [ -z "$SERVER" ]; then
+        echo "=== SERVER must be defined"
+        return
+    fi
+
     if [ ! -f "$SERVER" ]; then
         echo "=== $SERVER does not exist"
         return
     fi
 
-    echo "=== Running $SERVER $SERVER_ARGS"
-    $SERVER $SERVER_ARGS > $SERVER_LOG 2>&1 &
+    if [ -z "$SERVER_LD_PRELOAD" ]; then
+      echo "=== Running $SERVER $SERVER_ARGS"
+    else
+      echo "=== Running LD_PRELOAD=$SERVER_LD_PRELOAD $SERVER $SERVER_ARGS"
+    fi
+
+    LD_PRELOAD=$SERVER_LD_PRELOAD:${LD_PRELOAD} $SERVER $SERVER_ARGS > $SERVER_LOG 2>&1 &
     SERVER_PID=$!
 
-    wait_for_server_live $SERVER_TIMEOUT
+    wait_for_server_live $SERVER_PID $SERVER_TIMEOUT
     if [ "$WAIT_RET" != "0" ]; then
         kill $SERVER_PID || true
         SERVER_PID=0
@@ -155,17 +243,121 @@ function run_server_tolive () {
 }
 
 # Run inference server and return immediately. Sets SERVER_PID to pid
-# of SERVER, or 0 if error
+# of SERVER, or 0 if error.
 function run_server_nowait () {
     SERVER_PID=0
+
+    if [ -z "$SERVER" ]; then
+        echo "=== SERVER must be defined"
+        return
+    fi
+
+    if [ ! -f "$SERVER" ]; then
+        echo "=== $SERVER does not exist"
+        return
+    fi
+
+    if [[ "$(< /proc/sys/kernel/osrelease)" == *microsoft* ]]; then
+        # LD_PRELOAD not yet supported on windows
+        if [ -z "$SERVER_LD_PRELOAD" ]; then
+            echo "=== Running $SERVER $SERVER_ARGS"
+        else
+            echo "=== LD_PRELOAD not supported for windows"
+            return
+        fi
+
+        $SERVER $SERVER_ARGS > $SERVER_LOG 2>&1 &
+        SERVER_PID=$!
+    else
+        # Non-windows
+        if [ -z "$SERVER_LD_PRELOAD" ]; then
+            echo "=== Running $SERVER $SERVER_ARGS"
+        else
+            echo "=== Running LD_PRELOAD=$SERVER_LD_PRELOAD $SERVER $SERVER_ARGS"
+        fi
+
+        LD_PRELOAD=$SERVER_LD_PRELOAD:${LD_PRELOAD} $SERVER $SERVER_ARGS > $SERVER_LOG 2>&1 &
+        SERVER_PID=$!
+    fi
+}
+
+# Run inference server inside a memory management tool like Valgrind/ASAN.
+# Return once server's health endpoint shows ready or timeout expires. Sets
+# SERVER_PID to pid of SERVER, or 0 if error (including expired timeout)
+function run_server_leakcheck () {
+    SERVER_PID=0
+
+    if [ -z "$SERVER" ]; then
+        echo "=== SERVER must be defined"
+        return
+    fi
+
+    if [ -z "$LEAKCHECK" ]; then
+        echo "=== LEAKCHECK must be defined"
+        return
+    fi
+
     if [ ! -f "$SERVER" ]; then
         echo "=== $SERVER does not exist"
         return
     fi
 
-    echo "=== Running $SERVER $SERVER_ARGS"
-    $SERVER $SERVER_ARGS > $SERVER_LOG 2>&1 &
+    if [ -z "$SERVER_LD_PRELOAD" ]; then
+      echo "=== Running $SERVER $SERVER_ARGS"
+    else
+      echo "=== Running LD_PRELOAD=$SERVER_LD_PRELOAD $SERVER $SERVER_ARGS"
+    fi
+
+    LD_PRELOAD=$SERVER_LD_PRELOAD:${LD_PRELOAD} $LEAKCHECK $LEAKCHECK_ARGS $SERVER $SERVER_ARGS > $SERVER_LOG 2>&1 &
     SERVER_PID=$!
+
+    wait_for_server_ready $SERVER_PID $SERVER_TIMEOUT
+    if [ "$WAIT_RET" != "0" ]; then
+        kill $SERVER_PID || true
+        SERVER_PID=0
+    fi
+}
+
+# Kill inference server. SERVER_PID must be set to the server's pid.
+function kill_server () {
+    # Under WSL the linux PID is not the same as the windows PID and
+    # there doesn't seem to be a way to find the mapping between
+    # them. So we instead assume that this test is the only test
+    # running on the system and just SIGINT all the tritonserver
+    # windows executables running on the system. At least, ideally we
+    # would like to use windows-kill to SIGINT, unfortunately that
+    # causes the entire WSL shell to just exit. So instead we must use
+    # taskkill.exe which can only forcefully kill tritonserver which
+    # means that it does not gracefully exit.
+    if [[ "$(< /proc/sys/kernel/osrelease)" == *microsoft* ]]; then
+        # Disable -x as it makes output below hard to read
+        oldstate="$(set +o)"; [[ -o errexit ]] && oldstate="$oldstate; set -e"
+        set +x
+        set +e
+
+        tasklist=$(/mnt/c/windows/system32/tasklist.exe /FI 'IMAGENAME eq tritonserver.exe' /FO CSV)
+        echo "=== Windows tritonserver tasks"
+        echo "$tasklist"
+
+        taskcount=$(echo "$tasklist" | grep -c tritonserver)
+        if (( $taskcount > 0 )); then
+            echo "$tasklist" | while IFS=, read -r taskname taskpid taskrest; do
+                if [[ "$taskname" == "\"tritonserver.exe\"" ]]; then
+                    taskpid="${taskpid%\"}"
+                    taskpid="${taskpid#\"}"
+                    echo "=== killing windows tritonserver.exe task $taskpid"
+                    # windows-kill.exe -SIGINT $taskpid
+                    /mnt/c/windows/system32/taskkill.exe /PID $taskpid /F /T
+                fi
+            done
+        fi
+
+        set +vx; eval "$oldstate"
+    else
+        # Non-windows...
+        kill $SERVER_PID
+        wait $SERVER_PID
+    fi
 }
 
 # Run nvidia-smi to monitor GPU utilization.
@@ -183,7 +375,7 @@ function run_gpu_monitor () {
     nvidia-smi dmon -s u $MONITOR_ID_ARG -f $MONITOR_LOG &
     MONITOR_PID=$!
 
-    local exists_secs="$FILE_TIMEOUT"
+    local exists_secs="$MONITOR_FILE_TIMEOUT"
     until test $exists_secs -eq 0 -o -f "$MONITOR_LOG" ; do sleep 1; ((exists_secs--)); done
     if [ "$exists_secs" == "0" ]; then
         echo "=== Timeout. Unable to find '$MONITOR_LOG'"
@@ -191,3 +383,146 @@ function run_gpu_monitor () {
         MONITOR_PID=0
     fi
 }
+
+# Create a model version directory for nop models in the model repository
+function create_nop_version_dir () {
+    local dest_dir=$1
+    for nop_model in `ls $dest_dir | grep "nop_"`; do
+        local path=$dest_dir/$nop_model
+        mkdir -p $path/1
+    done
+}
+
+# Check Python unittest results.
+function check_test_results () {
+    local log_file=$1
+    local expected_num_tests=$2
+
+    if [[ -z "$expected_num_tests" ]]; then
+        echo "=== expected number of tests must be defined"
+        return 1
+    fi
+
+    num_failures=`cat $log_file | grep -E ".*total.*errors.*failures.*" | tail -n 1 | jq .failures`
+    num_tests=`cat $log_file | grep -E ".*total.*errors.*failures.*" | tail -n 1 | jq .total`
+    num_errors=`cat $log_file | grep -E ".*total.*errors.*failures.*" | tail -n 1 | jq .errors`
+
+    # Number regular expression
+    re='^[0-9]+$'
+
+    if [[ $? -ne 0 ]] || ! [[ $num_failures =~ $re ]] || ! [[ $num_tests =~ $re ]] || \
+     ! [[ $num_errors =~ $re ]]; then
+        cat $log_file
+        echo -e "\n***\n*** Test Failed: unable to parse test results\n***" >> $log_file
+        return 1
+    fi
+    if [[ $num_errors != "0" ]] || [[ $num_failures != "0" ]] || [[ $num_tests -ne $expected_num_tests ]]; then
+        cat $log_file
+        echo -e "\n***\n*** Test Failed: Expected $expected_num_tests test(s), $num_tests test(s) executed, $num_errors test(s) had error, and $num_failures test(s) failed. \n***" >> $log_file
+        return 1
+    fi
+
+    return 0
+}
+
+# Run multiple inference servers and return immediately. Sets pid for each server
+# correspondingly, or 0 if error.
+function run_multiple_servers_nowait () {
+    if [ -z "$SERVER" ]; then
+        echo "=== SERVER must be defined"
+        return
+    fi
+
+    if [ ! -f "$SERVER" ]; then
+        echo "=== $SERVER does not exist"
+        return
+    fi
+
+    local server_count=$1
+    server_pid=()
+    local server_args=()
+    local server_log=()
+    for (( i=0; i<$server_count; i++ )); do
+        let SERVER${i}_PID=0 || true
+        server_pid+=(SERVER${i}_PID)
+        server_args+=(SERVER${i}_ARGS)
+        server_log+=(SERVER${i}_LOG)
+    done
+
+    for (( i=0; i<$server_count; i++ )); do
+        if [ -z "$SERVER_LD_PRELOAD" ]; then
+            echo "=== Running $SERVER ${!server_args[$i]}"
+        else
+            echo "=== Running LD_PRELOAD=$SERVER_LD_PRELOAD $SERVER ${!server_args[$i]}"
+        fi
+        LD_PRELOAD=$SERVER_LD_PRELOAD:${LD_PRELOAD} $SERVER ${!server_args[$i]} > ${!server_log[$i]} 2>&1 &
+        let SERVER${i}_PID=$!
+    done
+}
+
+# Kill all inference servers.
+function kill_servers () {
+    for (( i=0; i<${#server_pid[@]}; i++ )); do
+        kill ${!server_pid[$i]}
+        wait ${!server_pid[$i]}
+    done
+}
+
+# Upload a local directory to a GCS path
+function gcs_upload () {
+    local local_path=$1
+    local gcs_path=$2
+    gsutil cp -r $local_path $gcs_path
+}
+
+# Sort an array
+# Call with sort_array <array_name>
+# Example: sort_array array
+sort_array() {
+    local -n arr=$1
+    local length=${#arr[@]}
+
+    if [ "$length" -le 1 ]; then
+        return
+    fi
+
+    IFS=$'\n' sorted_arr=($(sort -n <<<"${arr[*]}"))
+    unset IFS
+    arr=("${sorted_arr[@]}")
+}
+
+# Remove an array's outliers
+# Call with remove_array_outliers <array_name> <percent to trim from both sides>
+# Example: remove_array_outliers array 5
+remove_array_outliers() {
+    local -n arr=$1
+    local percent=$2
+    local length=${#arr[@]}
+
+    if [ "$length" -le 1 ]; then
+        return
+    fi
+
+    local trim_count=$((length * percent / 100))
+    local start_index=$trim_count
+    local end_index=$((length - (trim_count*2)))
+
+    arr=("${arr[@]:$start_index:$end_index}")
+}
+
+function setup_virtualenv() {
+    # Create and activate virtual environment
+    virtualenv --system-site-packages venv
+    source venv/bin/activate
+    pip install pytest
+
+    if [[ ${TEST_WINDOWS} == 1 ]]; then
+        pip3 install numpy tritonclient[all]
+    fi
+}
+
+function deactivate_virtualenv() {
+    # Deactivate virtual environment and clean up
+    deactivate
+    rm -fr venv
+}
diff --git a/qa/custom_models/custom_dyna_sequence_int32/config.pbtxt b/qa/custom_models/custom_dyna_sequence_int32/config.pbtxt
new file mode 100644
index 0000000000..4a570484cc
--- /dev/null
+++ b/qa/custom_models/custom_dyna_sequence_int32/config.pbtxt
@@ -0,0 +1,101 @@
+# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "custom_dyna_sequence_int32"
+backend: "dyna_sequence"
+max_batch_size: 8
+default_model_filename: "libtriton_dyna_sequence.so"
+sequence_batching {
+  max_sequence_idle_microseconds: 5000000
+  oldest {
+    max_candidate_sequences: 6
+    preferred_batch_size: [ 4 ]
+    max_queue_delay_microseconds: 0
+  }
+  control_input [
+    {
+      name: "START"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_START
+          int32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "END"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_END
+          int32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "READY"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_READY
+          int32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "CORRID"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_CORRID
+          data_type: TYPE_UINT64
+        }
+      ]
+    }
+  ]
+}
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
+parameters [
+  {
+    key: "execute_delay_ms"
+    value: { string_value: "3" }
+  }
+]
+instance_group [
+  {
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/custom_models/custom_sequence_int32/config.pbtxt b/qa/custom_models/custom_sequence_int32/config.pbtxt
new file mode 100644
index 0000000000..ed25b6ffe3
--- /dev/null
+++ b/qa/custom_models/custom_sequence_int32/config.pbtxt
@@ -0,0 +1,78 @@
+# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "custom_sequence_int32"
+backend: "sequence"
+max_batch_size: 8
+default_model_filename: "libtriton_sequence.so"
+sequence_batching {
+  max_sequence_idle_microseconds: 5000000
+  control_input [
+    {
+      name: "START"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_START
+          int32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "READY"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_READY
+          int32_false_true: [ 0, 1 ]
+        }
+      ]
+    }
+  ]
+}
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
+parameters [
+  {
+    key: "execute_delay_ms"
+    value: { string_value: "3" }
+  }
+]
+instance_group [
+  {
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/custom_models/custom_zero_1_float32/config.pbtxt b/qa/custom_models/custom_zero_1_float32/config.pbtxt
new file mode 100644
index 0000000000..27906e8c2e
--- /dev/null
+++ b/qa/custom_models/custom_zero_1_float32/config.pbtxt
@@ -0,0 +1,43 @@
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "custom_zero_1_float32"
+backend: "identity"
+max_batch_size: 1
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/ensemble_models/batch_to_nobatch_float32_float32_float32/config.pbtxt b/qa/ensemble_models/batch_to_nobatch_float32_float32_float32/config.pbtxt
new file mode 100644
index 0000000000..6e95e7bc0a
--- /dev/null
+++ b/qa/ensemble_models/batch_to_nobatch_float32_float32_float32/config.pbtxt
@@ -0,0 +1,104 @@
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "batch_to_nobatch_float32_float32_float32"
+platform: "ensemble"
+max_batch_size: 8
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+ensemble_scheduling {
+  step [
+    {
+      # batch model
+      model_name: "graphdef_float32_float32_float32"
+      model_version: 1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "temp_output_0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "temp_output_1"
+      }
+    },
+    {
+      # non-batch model with +1 dimension [-1, -1]
+      model_name: "custom_nobatch_zero_1_float32"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "temp_output_0"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "OUTPUT0"
+      }
+    },
+    {
+      # non-batch model with +1 dimension [-1, -1]
+      model_name: "custom_nobatch_zero_1_float32"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "temp_output_1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "OUTPUT1"
+      }
+    }
+  ]
+}
diff --git a/qa/ensemble_models/batch_to_nobatch_nobatch_float32_float32_float32/config.pbtxt b/qa/ensemble_models/batch_to_nobatch_nobatch_float32_float32_float32/config.pbtxt
new file mode 100644
index 0000000000..ea8d70c1ac
--- /dev/null
+++ b/qa/ensemble_models/batch_to_nobatch_nobatch_float32_float32_float32/config.pbtxt
@@ -0,0 +1,104 @@
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "batch_to_nobatch_nobatch_float32_float32_float32"
+platform: "ensemble"
+max_batch_size: 0
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 8, 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 8, 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 8, 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 8, 16 ]
+  }
+]
+ensemble_scheduling {
+  step [
+    {
+      # batch model
+      model_name: "graphdef_float32_float32_float32"
+      model_version: 1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "temp_output_0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "temp_output_1"
+      }
+    },
+    {
+      # non-batch model with +1 dimension [-1, -1]
+      model_name: "custom_nobatch_zero_1_float32"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "temp_output_0"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "OUTPUT0"
+      }
+    },
+    {
+      # non-batch model with +1 dimension [-1, -1]
+      model_name: "custom_nobatch_zero_1_float32"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "temp_output_1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "OUTPUT1"
+      }
+    }
+  ]
+}
diff --git a/qa/ensemble_models/label_override_int32_float32_float32/config.pbtxt b/qa/ensemble_models/label_override_int32_float32_float32/config.pbtxt
new file mode 100644
index 0000000000..07d6d1bd5b
--- /dev/null
+++ b/qa/ensemble_models/label_override_int32_float32_float32/config.pbtxt
@@ -0,0 +1,78 @@
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "label_override_int32_float32_float32"
+platform: "ensemble"
+max_batch_size: 8
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+    label_filename: "output0_labels.txt"
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+ensemble_scheduling {
+  step [
+    {
+      model_name: "wrong_label_int32_float32_float32"
+      model_version: 1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "OUTPUT0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "OUTPUT1"
+      }
+    }
+  ]
+}
diff --git a/qa/ensemble_models/label_override_int32_float32_float32/output0_labels.txt b/qa/ensemble_models/label_override_int32_float32_float32/output0_labels.txt
new file mode 100644
index 0000000000..17dc676b94
--- /dev/null
+++ b/qa/ensemble_models/label_override_int32_float32_float32/output0_labels.txt
@@ -0,0 +1,16 @@
+label0
+label1
+label2
+label3
+label4
+label5
+label6
+label7
+label8
+label9
+label10
+label11
+label12
+label13
+label14
+label15
diff --git a/qa/ensemble_models/mix_ensemble_int32_float32_float32/config.pbtxt b/qa/ensemble_models/mix_ensemble_int32_float32_float32/config.pbtxt
new file mode 100644
index 0000000000..b60a3db600
--- /dev/null
+++ b/qa/ensemble_models/mix_ensemble_int32_float32_float32/config.pbtxt
@@ -0,0 +1,173 @@
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "mix_ensemble_int32_float32_float32"
+platform: "ensemble"
+max_batch_size: 8
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+ensemble_scheduling {
+  step [
+    {
+      model_name: "graphdef_int32_int32_int32"
+      model_version: 1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "add_value_int"
+      }
+    },
+    {
+      model_name: "graphdef_int32_object_object"
+      model_version: 1
+      input_map {
+        key: "INPUT0"
+        value: "add_value_int"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT1"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "input_0_object"
+      }
+    },
+    {
+      model_name: "graphdef_int32_object_object"
+      model_version: 1
+      input_map {
+        key: "INPUT0"
+        value: "add_value_int"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "input_1_object"
+      }
+    },
+    {
+      model_name: "graphdef_object_int32_int32"
+      model_version: 1
+      input_map {
+        key: "INPUT0"
+        value: "input_0_object"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "input_1_object"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "another_add_value_int"
+      }
+    },
+    {
+      model_name: "mix_type_int32_float32_float32"
+      model_version: 1
+      input_map {
+        key: "INPUT0"
+        value: "another_add_value_int"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT1"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "input_0_float"
+      }
+    },
+    {
+      model_name: "mix_type_int32_float32_float32"
+      model_version: 1
+      input_map {
+        key: "INPUT0"
+        value: "another_add_value_int"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "input_1_float"
+      }
+    },
+    {
+      model_name: "mix_platform_float32_float32_float32"
+      model_version: 1
+      input_map {
+        key: "INPUT0"
+        value: "input_0_float"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "input_1_float"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "OUTPUT0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "OUTPUT1"
+      }
+    }
+  ]
+}
diff --git a/qa/ensemble_models/mix_nobatch_batch_float32_float32_float32/config.pbtxt b/qa/ensemble_models/mix_nobatch_batch_float32_float32_float32/config.pbtxt
new file mode 100644
index 0000000000..703b07a28e
--- /dev/null
+++ b/qa/ensemble_models/mix_nobatch_batch_float32_float32_float32/config.pbtxt
@@ -0,0 +1,104 @@
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "mix_nobatch_batch_float32_float32_float32"
+platform: "ensemble"
+max_batch_size: 8
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+ensemble_scheduling {
+  step [
+    {
+      # one of the input first goes to batch model with dimension [-1]
+      model_name: "custom_zero_1_float32"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT0"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "batch_input0"
+      }
+    },
+    {
+      # then goes to non-batch model with +1 dimension [-1, -1]
+      model_name: "custom_nobatch_zero_1_float32"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "batch_input0"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "nobatch_input0"
+      }
+    },
+    {
+      # batch model
+      model_name: "graphdef_float32_float32_float32"
+      model_version: 1
+      input_map {
+        key: "INPUT0"
+        value: "nobatch_input0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "OUTPUT0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "OUTPUT1"
+      }
+    }
+  ]
+}
diff --git a/qa/ensemble_models/mix_platform_float32_float32_float32/config.pbtxt b/qa/ensemble_models/mix_platform_float32_float32_float32/config.pbtxt
new file mode 100644
index 0000000000..896d01fd89
--- /dev/null
+++ b/qa/ensemble_models/mix_platform_float32_float32_float32/config.pbtxt
@@ -0,0 +1,141 @@
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "mix_platform_float32_float32_float32"
+platform: "ensemble"
+max_batch_size: 8
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+ensemble_scheduling {
+  step [
+    {
+      model_name: "graphdef_float32_float32_float32"
+      model_version: 1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT0"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "double_input0"
+      }
+    },
+    {
+      model_name: "savedmodel_float32_float32_float32"
+      model_version: 1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT1"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "double_input1"
+      }
+    },
+    {
+      model_name: "savedmodel_float32_float32_float32"
+      model_version: 1
+      input_map {
+        key: "INPUT0"
+        value: "double_input0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "input0_val"
+      }
+    },
+    {
+      model_name: "graphdef_float32_float32_float32"
+      model_version: 1
+      input_map {
+        key: "INPUT0"
+        value: "double_input1"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT1"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "input1_val"
+      }
+    },
+    {
+      model_name: "savedmodel_float32_float32_float32"
+      model_version: 1
+      input_map {
+        key: "INPUT0"
+        value: "input0_val"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "input1_val"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "OUTPUT0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "OUTPUT1"
+      }
+    }
+  ]
+}
diff --git a/qa/ensemble_models/mix_type_int32_float32_float32/config.pbtxt b/qa/ensemble_models/mix_type_int32_float32_float32/config.pbtxt
new file mode 100644
index 0000000000..125d0977c2
--- /dev/null
+++ b/qa/ensemble_models/mix_type_int32_float32_float32/config.pbtxt
@@ -0,0 +1,173 @@
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "mix_type_int32_float32_float32"
+platform: "ensemble"
+max_batch_size: 8
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+ensemble_scheduling {
+  step [
+    {
+      model_name: "graphdef_int32_int32_int32"
+      model_version: 1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "add_value_int"
+      }
+    },
+    {
+      model_name: "graphdef_int32_object_object"
+      model_version: 1
+      input_map {
+        key: "INPUT0"
+        value: "add_value_int"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT1"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "input_0_object"
+      }
+    },
+    {
+      model_name: "graphdef_int32_object_object"
+      model_version: 1
+      input_map {
+        key: "INPUT0"
+        value: "add_value_int"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "input_1_object"
+      }
+    },
+    {
+      model_name: "graphdef_object_int32_int32"
+      model_version: 1
+      input_map {
+        key: "INPUT0"
+        value: "input_0_object"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "input_1_object"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "another_add_value_int"
+      }
+    },
+    {
+      model_name: "graphdef_int32_float32_float32"
+      model_version: 1
+      input_map {
+        key: "INPUT0"
+        value: "another_add_value_int"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT1"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "input_0_float"
+      }
+    },
+    {
+      model_name: "graphdef_int32_float32_float32"
+      model_version: 1
+      input_map {
+        key: "INPUT0"
+        value: "another_add_value_int"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "input_1_float"
+      }
+    },
+    {
+      model_name: "graphdef_float32_float32_float32"
+      model_version: 1
+      input_map {
+        key: "INPUT0"
+        value: "input_0_float"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "input_1_float"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "OUTPUT0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "OUTPUT1"
+      }
+    }
+  ]
+}
diff --git a/qa/ensemble_models/nobatch_to_batch_float32_float32_float32/config.pbtxt b/qa/ensemble_models/nobatch_to_batch_float32_float32_float32/config.pbtxt
new file mode 100644
index 0000000000..85a78ea2a0
--- /dev/null
+++ b/qa/ensemble_models/nobatch_to_batch_float32_float32_float32/config.pbtxt
@@ -0,0 +1,104 @@
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "nobatch_to_batch_float32_float32_float32"
+platform: "ensemble"
+max_batch_size: 8
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+ensemble_scheduling {
+  step [
+    {
+      # nobatch model with +1 dimension [-1, -1]
+      model_name: "custom_nobatch_zero_1_float32"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT0"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "temp_input_0"
+      }
+    },
+    {
+      # nobatch model with +1 dimension [-1, -1]
+      model_name: "custom_nobatch_zero_1_float32"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "temp_input_1"
+      }
+    },
+    {
+      # batch model
+      model_name: "graphdef_float32_float32_float32"
+      model_version: 1
+      input_map {
+        key: "INPUT0"
+        value: "temp_input_0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "temp_input_1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "OUTPUT0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "OUTPUT1"
+      }
+    }
+  ]
+}
diff --git a/qa/ensemble_models/nobatch_to_batch_nobatch_float32_float32_float32/config.pbtxt b/qa/ensemble_models/nobatch_to_batch_nobatch_float32_float32_float32/config.pbtxt
new file mode 100644
index 0000000000..a791296255
--- /dev/null
+++ b/qa/ensemble_models/nobatch_to_batch_nobatch_float32_float32_float32/config.pbtxt
@@ -0,0 +1,104 @@
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "nobatch_to_batch_nobatch_float32_float32_float32"
+platform: "ensemble"
+max_batch_size: 0
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 8, 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 8, 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 8, 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 8, 16 ]
+  }
+]
+ensemble_scheduling {
+  step [
+    {
+      # nobatch model with +1 dimension [-1, -1]
+      model_name: "custom_nobatch_zero_1_float32"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT0"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "temp_input_0"
+      }
+    },
+    {
+      # nobatch model with +1 dimension [-1, -1]
+      model_name: "custom_nobatch_zero_1_float32"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "temp_input_1"
+      }
+    },
+    {
+      # batch model
+      model_name: "graphdef_float32_float32_float32"
+      model_version: 1
+      input_map {
+        key: "INPUT0"
+        value: "temp_input_0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "temp_input_1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "OUTPUT0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "OUTPUT1"
+      }
+    }
+  ]
+}
diff --git a/qa/ensemble_models/wrong_label_int32_float32_float32/config.pbtxt b/qa/ensemble_models/wrong_label_int32_float32_float32/config.pbtxt
new file mode 100644
index 0000000000..c262211a98
--- /dev/null
+++ b/qa/ensemble_models/wrong_label_int32_float32_float32/config.pbtxt
@@ -0,0 +1,78 @@
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "wrong_label_int32_float32_float32"
+platform: "ensemble"
+max_batch_size: 8
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_INT32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+    label_filename: "output0_labels.txt"
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+ensemble_scheduling {
+  step [
+    {
+      model_name: "graphdef_int32_float32_float32"
+      model_version: 1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "OUTPUT0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "OUTPUT1"
+      }
+    }
+  ]
+}
diff --git a/qa/ensemble_models/wrong_label_int32_float32_float32/output0_labels.txt b/qa/ensemble_models/wrong_label_int32_float32_float32/output0_labels.txt
new file mode 100644
index 0000000000..4b2a96d544
--- /dev/null
+++ b/qa/ensemble_models/wrong_label_int32_float32_float32/output0_labels.txt
@@ -0,0 +1,16 @@
+label0a
+label1a
+label2a
+label3a
+label4a
+label5a
+label6a
+label7a
+label8a
+label9a
+label10a
+label11a
+label12a
+label13a
+label14a
+label15a
diff --git a/qa/openvino_models/README.md b/qa/openvino_models/README.md
new file mode 100644
index 0000000000..939fff6593
--- /dev/null
+++ b/qa/openvino_models/README.md
@@ -0,0 +1,34 @@
+<!--
+# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+The models in this directory are TF2/keras models converted into OpenVINO
+models. The "fixed_batch" model has a fixed batch dimension of 1 and the
+"dynamic_batch" model has a variable batch dimension.
+
+The models are currently in **beta**, which they might not work as expected and
+could be **changed, moved or deleted without warning** in the future.
diff --git a/qa/openvino_models/dynamic_batch/1/model.bin b/qa/openvino_models/dynamic_batch/1/model.bin
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/qa/openvino_models/dynamic_batch/1/model.mapping b/qa/openvino_models/dynamic_batch/1/model.mapping
new file mode 100644
index 0000000000..4705831777
--- /dev/null
+++ b/qa/openvino_models/dynamic_batch/1/model.mapping
@@ -0,0 +1,195 @@
+<?xml version="1.0"?>
+<mapping>
+	<map>
+		<framework name="input1" output_port_id="input1:0" />
+		<IR name="input1" output_port_id="0" />
+	</map>
+	<map>
+		<framework name="input1" output_port_id="input1:0" />
+		<IR name="input1" output_port_id="0" />
+	</map>
+	<map>
+		<framework name="input1" output_port_id="input1:0" />
+		<IR name="input1" output_port_id="0" />
+	</map>
+	<map>
+		<framework name="input1" output_port_id="input1" />
+		<IR name="input1" output_port_id="0" />
+	</map>
+	<map>
+		<framework name="input1" output_port_id="input1" />
+		<IR name="input1" output_port_id="0" />
+	</map>
+	<map>
+		<framework name="input1" output_port_id="input1" />
+		<IR name="input1" output_port_id="0" />
+	</map>
+	<map>
+		<framework name="Func/PartitionedCall/input/_0/placeholder_out_port_0" output_port_id="Func/PartitionedCall/input/_0:0" />
+		<IR name="Func/PartitionedCall/input/_0/placeholder_out_port_0" output_port_id="0" />
+	</map>
+	<map>
+		<framework name="Func/PartitionedCall/input/_0" output_port_id="Func/PartitionedCall/input/_0:0" />
+		<IR name="Func/PartitionedCall/input/_0/placeholder_out_port_0" output_port_id="0" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="Func/PartitionedCall/output/_3:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="Func/PartitionedCall/output/_3" output_port_id="Func/PartitionedCall/output/_3:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="Identity_1" output_port_id="Func/PartitionedCall/output/_3:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/Identity_1" output_port_id="Func/PartitionedCall/output/_3:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="Func/PartitionedCall/output/_3:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="Identity_1:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="Func/PartitionedCall/output/_3" output_port_id="Identity_1:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="Identity_1" output_port_id="Identity_1:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/Identity_1" output_port_id="Identity_1:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="Identity_1:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="PartitionedCall/Identity_1:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="Func/PartitionedCall/output/_3" output_port_id="PartitionedCall/Identity_1:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="Identity_1" output_port_id="PartitionedCall/Identity_1:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/Identity_1" output_port_id="PartitionedCall/Identity_1:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="PartitionedCall/Identity_1:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="PartitionedCall/model/tf.math.subtract/Sub:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="Func/PartitionedCall/output/_3" output_port_id="PartitionedCall/model/tf.math.subtract/Sub:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="Identity_1" output_port_id="PartitionedCall/model/tf.math.subtract/Sub:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/Identity_1" output_port_id="PartitionedCall/model/tf.math.subtract/Sub:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="PartitionedCall/model/tf.math.subtract/Sub:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/model/tf.math.add/Add" output_port_id="PartitionedCall/model/tf.math.add/Add:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/model/tf.math.add/Add" output_port_id="PartitionedCall/model/tf.math.add/Add:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/Identity" output_port_id="PartitionedCall/model/tf.math.add/Add:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="Identity" output_port_id="PartitionedCall/model/tf.math.add/Add:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="Func/PartitionedCall/output/_2" output_port_id="PartitionedCall/model/tf.math.add/Add:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/model/tf.math.add/Add" output_port_id="PartitionedCall/Identity:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/model/tf.math.add/Add" output_port_id="PartitionedCall/Identity:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/Identity" output_port_id="PartitionedCall/Identity:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="Identity" output_port_id="PartitionedCall/Identity:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="Func/PartitionedCall/output/_2" output_port_id="PartitionedCall/Identity:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/model/tf.math.add/Add" output_port_id="Identity:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/model/tf.math.add/Add" output_port_id="Identity:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/Identity" output_port_id="Identity:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="Identity" output_port_id="Identity:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="Func/PartitionedCall/output/_2" output_port_id="Identity:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/model/tf.math.add/Add" output_port_id="Func/PartitionedCall/output/_2:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/model/tf.math.add/Add" output_port_id="Func/PartitionedCall/output/_2:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/Identity" output_port_id="Func/PartitionedCall/output/_2:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="Identity" output_port_id="Func/PartitionedCall/output/_2:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="Func/PartitionedCall/output/_2" output_port_id="Func/PartitionedCall/output/_2:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+</mapping>
diff --git a/qa/openvino_models/dynamic_batch/1/model.xml b/qa/openvino_models/dynamic_batch/1/model.xml
new file mode 100644
index 0000000000..59594953c6
--- /dev/null
+++ b/qa/openvino_models/dynamic_batch/1/model.xml
@@ -0,0 +1,166 @@
+<?xml version="1.0" ?>
+<net name="dynamic_batch" version="11">
+	<layers>
+		<layer id="1" name="Func/PartitionedCall/input/_0/placeholder_out_port_0" type="Parameter" version="opset1">
+			<data shape="1,4" element_type="i32"/>
+			<rt_info>
+				<attribute name="fused_names" version="0" value="Func/PartitionedCall/input/_0/placeholder_out_port_0"/>
+			</rt_info>
+			<output>
+				<port id="0" precision="I32" names="Func/PartitionedCall/input/_0:0">
+					<dim>1</dim>
+					<dim>4</dim>
+					<rt_info>
+						<attribute name="layout" version="0" layout="[N,H]"/>
+					</rt_info>
+				</port>
+			</output>
+		</layer>
+		<layer id="0" name="input1" type="Parameter" version="opset1">
+			<data shape="1,4" element_type="i32"/>
+			<rt_info>
+				<attribute name="fused_names" version="0" value="input1"/>
+			</rt_info>
+			<output>
+				<port id="0" precision="I32" names="input1,input1:0">
+					<dim>1</dim>
+					<dim>4</dim>
+					<rt_info>
+						<attribute name="layout" version="0" layout="[N,H]"/>
+					</rt_info>
+				</port>
+			</output>
+		</layer>
+		<layer id="2" name="PartitionedCall/model/tf.math.subtract/Sub" type="Subtract" version="opset1">
+			<data auto_broadcast="numpy"/>
+			<rt_info>
+				<attribute name="fused_names" version="0" value="PartitionedCall/model/tf.math.subtract/Sub, PartitionedCall/model/tf.math.subtract/Sub/neg_"/>
+			</rt_info>
+			<input>
+				<port id="0" precision="I32">
+					<dim>1</dim>
+					<dim>4</dim>
+				</port>
+				<port id="1" precision="I32">
+					<dim>1</dim>
+					<dim>4</dim>
+				</port>
+			</input>
+			<output>
+				<port id="2" precision="I32" names="Func/PartitionedCall/output/_3:0,Identity_1:0,PartitionedCall/Identity_1:0,PartitionedCall/model/tf.math.subtract/Sub:0">
+					<dim>1</dim>
+					<dim>4</dim>
+					<rt_info>
+						<attribute name="layout" version="0" layout="[N,H]"/>
+					</rt_info>
+				</port>
+			</output>
+		</layer>
+		<layer id="4" name="PartitionedCall/model/tf.math.add/Add" type="Add" version="opset1">
+			<data auto_broadcast="numpy"/>
+			<rt_info>
+				<attribute name="fused_names" version="0" value="PartitionedCall/model/tf.math.add/Add"/>
+			</rt_info>
+			<input>
+				<port id="0" precision="I32">
+					<dim>1</dim>
+					<dim>4</dim>
+				</port>
+				<port id="1" precision="I32">
+					<dim>1</dim>
+					<dim>4</dim>
+				</port>
+			</input>
+			<output>
+				<port id="2" precision="I32" names="Func/PartitionedCall/output/_2:0,Identity:0,PartitionedCall/Identity:0,PartitionedCall/model/tf.math.add/Add:0">
+					<dim>1</dim>
+					<dim>4</dim>
+					<rt_info>
+						<attribute name="layout" version="0" layout="[N,H]"/>
+					</rt_info>
+				</port>
+			</output>
+		</layer>
+		<layer id="5" name="Func/PartitionedCall/output/_2:0" type="Result" version="opset1">
+			<rt_info>
+				<attribute name="fused_names" version="0" value="Func/PartitionedCall/output/_2:0"/>
+			</rt_info>
+			<input>
+				<port id="0" precision="I32">
+					<dim>1</dim>
+					<dim>4</dim>
+				</port>
+			</input>
+		</layer>
+		<layer id="3" name="Func/PartitionedCall/output/_3:0" type="Result" version="opset1">
+			<rt_info>
+				<attribute name="fused_names" version="0" value="Func/PartitionedCall/output/_3:0"/>
+			</rt_info>
+			<input>
+				<port id="0" precision="I32">
+					<dim>1</dim>
+					<dim>4</dim>
+				</port>
+			</input>
+		</layer>
+	</layers>
+	<edges>
+		<edge from-layer="0" from-port="0" to-layer="2" to-port="1"/>
+		<edge from-layer="0" from-port="0" to-layer="4" to-port="1"/>
+		<edge from-layer="1" from-port="0" to-layer="2" to-port="0"/>
+		<edge from-layer="1" from-port="0" to-layer="4" to-port="0"/>
+		<edge from-layer="2" from-port="2" to-layer="3" to-port="0"/>
+		<edge from-layer="4" from-port="2" to-layer="5" to-port="0"/>
+	</edges>
+	<meta_data>
+		<MO_version value="2022.1.0-7019-cdb9bec7210-releases/2022/1"/>
+		<Runtime_version value="2022.1.0-7019-cdb9bec7210-releases/2022/1"/>
+		<legacy_path value="True"/>
+		<cli_parameters>
+			<auto_disable_nhwc_to_nchw value="True"/>
+			<caffe_parser_path value="DIR"/>
+			<compress_fp16 value="False"/>
+			<data_type value="float"/>
+			<disable_nhwc_to_nchw value="False"/>
+			<disable_omitting_optional value="False"/>
+			<disable_resnet_optimization value="False"/>
+			<disable_weights_compression value="False"/>
+			<enable_concat_optimization value="False"/>
+			<enable_flattening_nested_params value="False"/>
+			<enable_ssd_gluoncv value="False"/>
+			<extensions value="DIR"/>
+			<framework value="tf"/>
+			<freeze_placeholder_with_value value="{}"/>
+			<input value="Func/PartitionedCall/input/_0:0,input1"/>
+			<input_model_is_text value="False"/>
+			<k value="DIR/CustomLayersMapping.xml"/>
+			<layout value="Func/PartitionedCall/input/_0:0(nh),input1(nh),Func/PartitionedCall/output/_2:0(nh),Func/PartitionedCall/output/_3:0(nh)"/>
+			<layout_values value="{'Func/PartitionedCall/input/_0:0': {'source_layout': 'nh', 'target_layout': None, 'is_input': True}, 'input1': {'source_layout': 'nh', 'target_layout': None, 'is_input': True}, 'Func/PartitionedCall/output/_2:0': {'source_layout': 'nh', 'target_layout': None, 'is_input': False}, 'Func/PartitionedCall/output/_3:0': {'source_layout': 'nh', 'target_layout': None, 'is_input': False}}"/>
+			<legacy_mxnet_model value="False"/>
+			<log_level value="ERROR"/>
+			<mean_scale_values value="{}"/>
+			<mean_values value="()"/>
+			<model_name value="saved_model"/>
+			<output_dir value="DIR"/>
+			<packed_user_shapes value="defaultdict(&lt;class 'list'&gt;, {'Func/PartitionedCall/input/_0': [{'shape': (1, 4), 'out': 0, 'data_type': &lt;class 'numpy.int32'&gt;, 'added': True}], 'input1': [{'shape': (1, 4), 'port': None, 'data_type': &lt;class 'numpy.int32'&gt;, 'added': True}]})"/>
+			<placeholder_data_types value="{'Func/PartitionedCall/input/_0:0': &lt;class 'numpy.int32'&gt;, 'input1': &lt;class 'numpy.int32'&gt;}"/>
+			<placeholder_shapes value="{'Func/PartitionedCall/input/_0:0': (1, 4), 'input1': (1, 4)}"/>
+			<progress value="False"/>
+			<remove_memory value="False"/>
+			<remove_output_softmax value="False"/>
+			<reverse_input_channels value="False"/>
+			<save_params_from_nd value="False"/>
+			<saved_model_dir value="DIR"/>
+			<scale_values value="()"/>
+			<silent value="False"/>
+			<source_layout value="()"/>
+			<static_shape value="False"/>
+			<stream_output value="False"/>
+			<target_layout value="()"/>
+			<transform value=""/>
+			<use_legacy_frontend value="False"/>
+			<use_new_frontend value="False"/>
+			<unset unset_cli_parameters="batch, counts, disable_fusing, finegrain_fusing, input_checkpoint, input_meta_graph, input_model, input_proto, input_shape, input_symbol, mean_file, mean_file_offsets, nd_prefix_name, output, pretrained_model_name, saved_model_tags, scale, tensorboard_logdir, tensorflow_custom_layer_libraries, tensorflow_custom_operations_config_update, tensorflow_object_detection_api_pipeline_config, tensorflow_use_custom_operations_config, transformations_config"/>
+		</cli_parameters>
+	</meta_data>
+</net>
diff --git a/qa/openvino_models/fixed_batch/1/model.bin b/qa/openvino_models/fixed_batch/1/model.bin
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/qa/openvino_models/fixed_batch/1/model.mapping b/qa/openvino_models/fixed_batch/1/model.mapping
new file mode 100644
index 0000000000..bd1a4eccb8
--- /dev/null
+++ b/qa/openvino_models/fixed_batch/1/model.mapping
@@ -0,0 +1,211 @@
+<?xml version="1.0"?>
+<mapping>
+	<map>
+		<framework name="input1" output_port_id="input1:0" />
+		<IR name="input1" output_port_id="0" />
+	</map>
+	<map>
+		<framework name="input1" output_port_id="input1:0" />
+		<IR name="input1" output_port_id="0" />
+	</map>
+	<map>
+		<framework name="input1" output_port_id="input1:0" />
+		<IR name="input1" output_port_id="0" />
+	</map>
+	<map>
+		<framework name="input1" output_port_id="input1" />
+		<IR name="input1" output_port_id="0" />
+	</map>
+	<map>
+		<framework name="input1" output_port_id="input1" />
+		<IR name="input1" output_port_id="0" />
+	</map>
+	<map>
+		<framework name="input1" output_port_id="input1" />
+		<IR name="input1" output_port_id="0" />
+	</map>
+	<map>
+		<framework name="input0" output_port_id="input0:0" />
+		<IR name="input0" output_port_id="0" />
+	</map>
+	<map>
+		<framework name="input0" output_port_id="input0:0" />
+		<IR name="input0" output_port_id="0" />
+	</map>
+	<map>
+		<framework name="Func/PartitionedCall/input/_0" output_port_id="input0:0" />
+		<IR name="input0" output_port_id="0" />
+	</map>
+	<map>
+		<framework name="input0" output_port_id="Func/PartitionedCall/input/_0:0" />
+		<IR name="input0" output_port_id="0" />
+	</map>
+	<map>
+		<framework name="input0" output_port_id="Func/PartitionedCall/input/_0:0" />
+		<IR name="input0" output_port_id="0" />
+	</map>
+	<map>
+		<framework name="Func/PartitionedCall/input/_0" output_port_id="Func/PartitionedCall/input/_0:0" />
+		<IR name="input0" output_port_id="0" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="Func/PartitionedCall/output/_3:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="Func/PartitionedCall/output/_3" output_port_id="Func/PartitionedCall/output/_3:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="Identity_1" output_port_id="Func/PartitionedCall/output/_3:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/Identity_1" output_port_id="Func/PartitionedCall/output/_3:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="Func/PartitionedCall/output/_3:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="Identity_1:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="Func/PartitionedCall/output/_3" output_port_id="Identity_1:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="Identity_1" output_port_id="Identity_1:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/Identity_1" output_port_id="Identity_1:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="Identity_1:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="PartitionedCall/Identity_1:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="Func/PartitionedCall/output/_3" output_port_id="PartitionedCall/Identity_1:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="Identity_1" output_port_id="PartitionedCall/Identity_1:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/Identity_1" output_port_id="PartitionedCall/Identity_1:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="PartitionedCall/Identity_1:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="PartitionedCall/model/tf.math.subtract/Sub:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="Func/PartitionedCall/output/_3" output_port_id="PartitionedCall/model/tf.math.subtract/Sub:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="Identity_1" output_port_id="PartitionedCall/model/tf.math.subtract/Sub:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/Identity_1" output_port_id="PartitionedCall/model/tf.math.subtract/Sub:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="PartitionedCall/model/tf.math.subtract/Sub:0" />
+		<IR name="PartitionedCall/model/tf.math.subtract/Sub" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/model/tf.math.add/Add" output_port_id="PartitionedCall/model/tf.math.add/Add:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/model/tf.math.add/Add" output_port_id="PartitionedCall/model/tf.math.add/Add:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/Identity" output_port_id="PartitionedCall/model/tf.math.add/Add:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="Identity" output_port_id="PartitionedCall/model/tf.math.add/Add:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="Func/PartitionedCall/output/_2" output_port_id="PartitionedCall/model/tf.math.add/Add:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/model/tf.math.add/Add" output_port_id="PartitionedCall/Identity:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/model/tf.math.add/Add" output_port_id="PartitionedCall/Identity:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/Identity" output_port_id="PartitionedCall/Identity:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="Identity" output_port_id="PartitionedCall/Identity:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="Func/PartitionedCall/output/_2" output_port_id="PartitionedCall/Identity:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/model/tf.math.add/Add" output_port_id="Identity:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/model/tf.math.add/Add" output_port_id="Identity:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/Identity" output_port_id="Identity:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="Identity" output_port_id="Identity:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="Func/PartitionedCall/output/_2" output_port_id="Identity:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/model/tf.math.add/Add" output_port_id="Func/PartitionedCall/output/_2:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/model/tf.math.add/Add" output_port_id="Func/PartitionedCall/output/_2:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="PartitionedCall/Identity" output_port_id="Func/PartitionedCall/output/_2:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="Identity" output_port_id="Func/PartitionedCall/output/_2:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+	<map>
+		<framework name="Func/PartitionedCall/output/_2" output_port_id="Func/PartitionedCall/output/_2:0" />
+		<IR name="PartitionedCall/model/tf.math.add/Add" output_port_id="2" />
+	</map>
+</mapping>
diff --git a/qa/openvino_models/fixed_batch/1/model.xml b/qa/openvino_models/fixed_batch/1/model.xml
new file mode 100644
index 0000000000..e0f8954866
--- /dev/null
+++ b/qa/openvino_models/fixed_batch/1/model.xml
@@ -0,0 +1,152 @@
+<?xml version="1.0" ?>
+<net name="fixed_batch" version="11">
+	<layers>
+		<layer id="1" name="input0" type="Parameter" version="opset1">
+			<data shape="1,4" element_type="i32"/>
+			<rt_info>
+				<attribute name="fused_names" version="0" value="input0"/>
+			</rt_info>
+			<output>
+				<port id="0" precision="I32" names="Func/PartitionedCall/input/_0:0,input0:0">
+					<dim>1</dim>
+					<dim>4</dim>
+				</port>
+			</output>
+		</layer>
+		<layer id="0" name="input1" type="Parameter" version="opset1">
+			<data shape="1,4" element_type="i32"/>
+			<rt_info>
+				<attribute name="fused_names" version="0" value="input1"/>
+			</rt_info>
+			<output>
+				<port id="0" precision="I32" names="input1,input1:0">
+					<dim>1</dim>
+					<dim>4</dim>
+				</port>
+			</output>
+		</layer>
+		<layer id="2" name="PartitionedCall/model/tf.math.subtract/Sub" type="Subtract" version="opset1">
+			<data auto_broadcast="numpy"/>
+			<rt_info>
+				<attribute name="fused_names" version="0" value="PartitionedCall/model/tf.math.subtract/Sub, PartitionedCall/model/tf.math.subtract/Sub/neg_"/>
+			</rt_info>
+			<input>
+				<port id="0" precision="I32">
+					<dim>1</dim>
+					<dim>4</dim>
+				</port>
+				<port id="1" precision="I32">
+					<dim>1</dim>
+					<dim>4</dim>
+				</port>
+			</input>
+			<output>
+				<port id="2" precision="I32" names="Func/PartitionedCall/output/_3:0,Identity_1:0,PartitionedCall/Identity_1:0,PartitionedCall/model/tf.math.subtract/Sub:0">
+					<dim>1</dim>
+					<dim>4</dim>
+				</port>
+			</output>
+		</layer>
+		<layer id="4" name="PartitionedCall/model/tf.math.add/Add" type="Add" version="opset1">
+			<data auto_broadcast="numpy"/>
+			<rt_info>
+				<attribute name="fused_names" version="0" value="PartitionedCall/model/tf.math.add/Add"/>
+			</rt_info>
+			<input>
+				<port id="0" precision="I32">
+					<dim>1</dim>
+					<dim>4</dim>
+				</port>
+				<port id="1" precision="I32">
+					<dim>1</dim>
+					<dim>4</dim>
+				</port>
+			</input>
+			<output>
+				<port id="2" precision="I32" names="Func/PartitionedCall/output/_2:0,Identity:0,PartitionedCall/Identity:0,PartitionedCall/model/tf.math.add/Add:0">
+					<dim>1</dim>
+					<dim>4</dim>
+				</port>
+			</output>
+		</layer>
+		<layer id="5" name="Func/PartitionedCall/output/_2:0" type="Result" version="opset1">
+			<rt_info>
+				<attribute name="fused_names" version="0" value="Func/PartitionedCall/output/_2:0"/>
+			</rt_info>
+			<input>
+				<port id="0" precision="I32">
+					<dim>1</dim>
+					<dim>4</dim>
+				</port>
+			</input>
+		</layer>
+		<layer id="3" name="Func/PartitionedCall/output/_3:0" type="Result" version="opset1">
+			<rt_info>
+				<attribute name="fused_names" version="0" value="Func/PartitionedCall/output/_3:0"/>
+			</rt_info>
+			<input>
+				<port id="0" precision="I32">
+					<dim>1</dim>
+					<dim>4</dim>
+				</port>
+			</input>
+		</layer>
+	</layers>
+	<edges>
+		<edge from-layer="0" from-port="0" to-layer="2" to-port="1"/>
+		<edge from-layer="0" from-port="0" to-layer="4" to-port="1"/>
+		<edge from-layer="1" from-port="0" to-layer="2" to-port="0"/>
+		<edge from-layer="1" from-port="0" to-layer="4" to-port="0"/>
+		<edge from-layer="2" from-port="2" to-layer="3" to-port="0"/>
+		<edge from-layer="4" from-port="2" to-layer="5" to-port="0"/>
+	</edges>
+	<meta_data>
+		<MO_version value="2022.1.0-7019-cdb9bec7210-releases/2022/1"/>
+		<Runtime_version value="2022.1.0-7019-cdb9bec7210-releases/2022/1"/>
+		<legacy_path value="True"/>
+		<cli_parameters>
+			<auto_disable_nhwc_to_nchw value="True"/>
+			<batch value="1"/>
+			<caffe_parser_path value="DIR"/>
+			<compress_fp16 value="False"/>
+			<data_type value="float"/>
+			<disable_nhwc_to_nchw value="False"/>
+			<disable_omitting_optional value="False"/>
+			<disable_resnet_optimization value="False"/>
+			<disable_weights_compression value="False"/>
+			<enable_concat_optimization value="False"/>
+			<enable_flattening_nested_params value="False"/>
+			<enable_ssd_gluoncv value="False"/>
+			<extensions value="DIR"/>
+			<framework value="tf"/>
+			<freeze_placeholder_with_value value="{}"/>
+			<input_model_is_text value="False"/>
+			<k value="DIR/CustomLayersMapping.xml"/>
+			<layout value="()"/>
+			<layout_values value="{}"/>
+			<legacy_mxnet_model value="False"/>
+			<log_level value="ERROR"/>
+			<mean_scale_values value="{}"/>
+			<mean_values value="()"/>
+			<model_name value="saved_model"/>
+			<output_dir value="DIR"/>
+			<placeholder_data_types value="{}"/>
+			<progress value="False"/>
+			<remove_memory value="False"/>
+			<remove_output_softmax value="False"/>
+			<reverse_input_channels value="False"/>
+			<save_params_from_nd value="False"/>
+			<saved_model_dir value="DIR"/>
+			<scale_values value="()"/>
+			<silent value="False"/>
+			<source_layout value="()"/>
+			<static_shape value="False"/>
+			<stream_output value="False"/>
+			<target_layout value="()"/>
+			<transform value=""/>
+			<use_legacy_frontend value="False"/>
+			<use_new_frontend value="False"/>
+			<unset unset_cli_parameters="counts, disable_fusing, finegrain_fusing, input, input_checkpoint, input_meta_graph, input_model, input_proto, input_shape, input_symbol, mean_file, mean_file_offsets, nd_prefix_name, output, packed_user_shapes, placeholder_shapes, pretrained_model_name, saved_model_tags, scale, tensorboard_logdir, tensorflow_custom_layer_libraries, tensorflow_custom_operations_config_update, tensorflow_object_detection_api_pipeline_config, tensorflow_use_custom_operations_config, transformations_config"/>
+		</cli_parameters>
+	</meta_data>
+</net>
diff --git a/qa/python_models/add_sub/config.pbtxt b/qa/python_models/add_sub/config.pbtxt
new file mode 100644
index 0000000000..39bd6771d0
--- /dev/null
+++ b/qa/python_models/add_sub/config.pbtxt
@@ -0,0 +1,58 @@
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+backend: "python"
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+input [
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+
+instance_group [{ kind: KIND_CPU }]
diff --git a/qa/python_models/add_sub/model.py b/qa/python_models/add_sub/model.py
new file mode 100644
index 0000000000..0868014804
--- /dev/null
+++ b/qa/python_models/add_sub/model.py
@@ -0,0 +1,74 @@
+# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    def initialize(self, args):
+        self.model_config = model_config = json.loads(args["model_config"])
+
+        output0_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT0")
+        output1_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT1")
+
+        self.output0_dtype = pb_utils.triton_string_to_numpy(
+            output0_config["data_type"]
+        )
+        self.output1_dtype = pb_utils.triton_string_to_numpy(
+            output1_config["data_type"]
+        )
+
+    def execute(self, requests):
+        """This function is called on inference request."""
+
+        output0_dtype = self.output0_dtype
+        output1_dtype = self.output1_dtype
+
+        responses = []
+        for request in requests:
+            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")
+            if (
+                in_0.as_numpy().dtype.type is np.bytes_
+                or in_0.as_numpy().dtype == np.object_
+            ):
+                out_0, out_1 = (
+                    in_0.as_numpy().astype(np.int32) + in_1.as_numpy().astype(np.int32),
+                    in_0.as_numpy().astype(np.int32) - in_1.as_numpy().astype(np.int32),
+                )
+            else:
+                out_0, out_1 = (
+                    in_0.as_numpy() + in_1.as_numpy(),
+                    in_0.as_numpy() - in_1.as_numpy(),
+                )
+
+            out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0.astype(output0_dtype))
+            out_tensor_1 = pb_utils.Tensor("OUTPUT1", out_1.astype(output1_dtype))
+            responses.append(pb_utils.InferenceResponse([out_tensor_0, out_tensor_1]))
+        return responses
diff --git a/qa/python_models/add_sub_gpu/config.pbtxt b/qa/python_models/add_sub_gpu/config.pbtxt
new file mode 100644
index 0000000000..dd4a3ebecf
--- /dev/null
+++ b/qa/python_models/add_sub_gpu/config.pbtxt
@@ -0,0 +1,63 @@
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "add_sub_gpu"
+backend: "python"
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+
+  }
+]
+input [
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+
+
+  }
+]
+
+instance_group [ { kind: KIND_GPU }]
diff --git a/qa/python_models/async_execute_decouple/config.pbtxt b/qa/python_models/async_execute_decouple/config.pbtxt
new file mode 100644
index 0000000000..847661d176
--- /dev/null
+++ b/qa/python_models/async_execute_decouple/config.pbtxt
@@ -0,0 +1,46 @@
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+backend: "python"
+max_batch_size: 8
+
+input [
+  {
+    name: "WAIT_SECONDS"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "DUMMY_OUT"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+
+instance_group [{ kind: KIND_CPU }]
+model_transaction_policy { decoupled: True }
diff --git a/qa/python_models/async_execute_decouple/model.py b/qa/python_models/async_execute_decouple/model.py
new file mode 100644
index 0000000000..8a529c209c
--- /dev/null
+++ b/qa/python_models/async_execute_decouple/model.py
@@ -0,0 +1,77 @@
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import asyncio
+
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    async def execute(self, requests):
+        processed_requests = []
+        async_tasks = []
+        for request in requests:
+            wait_secs_tensors = pb_utils.get_input_tensor_by_name(
+                request, "WAIT_SECONDS"
+            ).as_numpy()
+            for wait_secs_tensor in wait_secs_tensors:
+                wait_secs = wait_secs_tensor[0]
+                if wait_secs < 0:
+                    self.raise_value_error(requests)
+                async_tasks.append(asyncio.create_task(asyncio.sleep(wait_secs)))
+            processed_requests.append(
+                {
+                    "response_sender": request.get_response_sender(),
+                    "batch_size": wait_secs_tensors.shape[0],
+                }
+            )
+
+        # This decoupled execute should be scheduled to run in the background
+        # concurrently with other instances of decoupled execute, as long as the event
+        # loop is not blocked.
+        await asyncio.gather(*async_tasks)
+
+        for p_req in processed_requests:
+            response_sender = p_req["response_sender"]
+            batch_size = p_req["batch_size"]
+
+            output_tensors = pb_utils.Tensor(
+                "DUMMY_OUT", np.array([0 for i in range(batch_size)], np.float32)
+            )
+            response = pb_utils.InferenceResponse(output_tensors=[output_tensors])
+            response_sender.send(
+                response, flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
+            )
+
+        return None
+
+    def raise_value_error(self, requests):
+        # TODO: Model may raise exception without sending complete final
+        for request in requests:
+            response_sender = request.get_response_sender()
+            response_sender.send(flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL)
+        raise ValueError("wait_secs cannot be negative")
diff --git a/qa/python_models/async_execute_decouple_bls/config.pbtxt b/qa/python_models/async_execute_decouple_bls/config.pbtxt
new file mode 100644
index 0000000000..847661d176
--- /dev/null
+++ b/qa/python_models/async_execute_decouple_bls/config.pbtxt
@@ -0,0 +1,46 @@
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+backend: "python"
+max_batch_size: 8
+
+input [
+  {
+    name: "WAIT_SECONDS"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "DUMMY_OUT"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+
+instance_group [{ kind: KIND_CPU }]
+model_transaction_policy { decoupled: True }
diff --git a/qa/python_models/async_execute_decouple_bls/model.py b/qa/python_models/async_execute_decouple_bls/model.py
new file mode 100644
index 0000000000..a2fd5abf94
--- /dev/null
+++ b/qa/python_models/async_execute_decouple_bls/model.py
@@ -0,0 +1,60 @@
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import asyncio
+
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    async def _execute_a_request(self, request):
+        input_tensor = pb_utils.get_input_tensor_by_name(
+            request, "WAIT_SECONDS"
+        ).as_numpy()
+        bls_input_tensor = pb_utils.Tensor("WAIT_SECONDS", input_tensor)
+        bls_request = pb_utils.InferenceRequest(
+            model_name="async_execute_decouple",
+            inputs=[bls_input_tensor],
+            requested_output_names=["DUMMY_OUT"],
+        )
+        bls_responses = await bls_request.async_exec(decoupled=True)
+        response_sender = request.get_response_sender()
+        for bls_response in bls_responses:
+            bls_output_tensor = pb_utils.get_output_tensor_by_name(
+                bls_response, "DUMMY_OUT"
+            ).as_numpy()
+            output_tensor = pb_utils.Tensor("DUMMY_OUT", bls_output_tensor)
+            response = pb_utils.InferenceResponse(output_tensors=[output_tensor])
+            response_sender.send(response)
+        response_sender.send(flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL)
+
+    async def execute(self, requests):
+        async_futures = []
+        for request in requests:
+            async_future = self._execute_a_request(request)
+            async_futures.append(async_future)
+        await asyncio.gather(*async_futures)
+        return None
diff --git a/qa/python_models/auto_complete/model.py b/qa/python_models/auto_complete/model.py
new file mode 100644
index 0000000000..7f67182387
--- /dev/null
+++ b/qa/python_models/auto_complete/model.py
@@ -0,0 +1,89 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+
+        auto_complete_model_config.set_max_batch_size(0)
+        auto_complete_model_config.add_input(input0)
+        auto_complete_model_config.add_input(input1)
+        auto_complete_model_config.add_output(output0)
+        auto_complete_model_config.add_output(output1)
+
+        return auto_complete_model_config
+
+    def initialize(self, args):
+        self.model_config = model_config = json.loads(args["model_config"])
+
+        output0_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT0")
+        output1_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT1")
+
+        self.output0_dtype = pb_utils.triton_string_to_numpy(
+            output0_config["data_type"]
+        )
+        self.output1_dtype = pb_utils.triton_string_to_numpy(
+            output1_config["data_type"]
+        )
+
+    def execute(self, requests):
+        """This function is called on inference request."""
+
+        output0_dtype = self.output0_dtype
+        output1_dtype = self.output1_dtype
+
+        responses = []
+        for request in requests:
+            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")
+            if (
+                in_0.as_numpy().dtype.type is np.bytes_
+                or in_0.as_numpy().dtype == np.object_
+            ):
+                out_0, out_1 = (
+                    in_0.as_numpy().astype(np.int32) + in_1.as_numpy().astype(np.int32),
+                    in_0.as_numpy().astype(np.int32) - in_1.as_numpy().astype(np.int32),
+                )
+            else:
+                out_0, out_1 = (
+                    in_0.as_numpy() + in_1.as_numpy(),
+                    in_0.as_numpy() - in_1.as_numpy(),
+                )
+
+            out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0.astype(output0_dtype))
+            out_tensor_1 = pb_utils.Tensor("OUTPUT1", out_1.astype(output1_dtype))
+            responses.append(pb_utils.InferenceResponse([out_tensor_0, out_tensor_1]))
+        return responses
diff --git a/qa/python_models/auto_complete_error/model.py b/qa/python_models/auto_complete_error/model.py
new file mode 100644
index 0000000000..1d611c36d5
--- /dev/null
+++ b/qa/python_models/auto_complete_error/model.py
@@ -0,0 +1,52 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+class TritonPythonModel:
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        """
+        The body of this model doesn't matter. The main purpose of this model is
+        to test correct handling of Python errors in the `auto_complete_config`
+        function.
+        """
+        input0 = {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        input1 = {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+        output0 = {"name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [4]}
+        output1 = {"name": "OUTPUT1", "data_type": "TYPE_FP32", "dims": [4]}
+
+        auto_complete_model_config.set_max_batch_size(0)
+        auto_complete_model_config.add_input(input0)
+        auto_complete_model_config.add_input(input1)
+        auto_complete_model_config.add_output(output0)
+        auto_complete_model_config.add_output(output1)
+
+        undefined_variable
+
+        return auto_complete_model_config
+
+    def execute(self, requests):
+        pass
diff --git a/qa/python_models/bls/config.pbtxt b/qa/python_models/bls/config.pbtxt
new file mode 100644
index 0000000000..06bd3d41af
--- /dev/null
+++ b/qa/python_models/bls/config.pbtxt
@@ -0,0 +1,38 @@
+# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "bls"
+backend: "python"
+
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+
+instance_group [{ kind: KIND_CPU }]
diff --git a/qa/python_models/bls/model.py b/qa/python_models/bls/model.py
new file mode 100644
index 0000000000..2f92bbbbdd
--- /dev/null
+++ b/qa/python_models/bls/model.py
@@ -0,0 +1,812 @@
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import gc
+import os
+import sys
+import threading
+import unittest
+from multiprocessing import Pool
+
+import numpy as np
+import torch
+import triton_python_backend_utils as pb_utils
+from torch.utils.dlpack import from_dlpack, to_dlpack
+
+_deferred_exceptions_lock = threading.Lock()
+_deferred_exceptions = []
+
+
+def bls_add_sub(_=None):
+    input0_np = np.random.randn(*[16])
+    input0_np = input0_np.astype(np.float32)
+    input1_np = np.random.randn(*[16])
+    input1_np = input1_np.astype(np.float32)
+    input0 = pb_utils.Tensor("INPUT0", input0_np)
+    input1 = pb_utils.Tensor("INPUT1", input1_np)
+    infer_request = pb_utils.InferenceRequest(
+        model_name="add_sub",
+        inputs=[input0, input1],
+        requested_output_names=["OUTPUT0", "OUTPUT1"],
+    )
+    infer_response = infer_request.exec()
+    if infer_response.has_error():
+        return False
+
+    output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
+    output1 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT1")
+    if output0 is None or output1 is None:
+        return False
+
+    expected_output_0 = input0.as_numpy() + input1.as_numpy()
+    expected_output_1 = input0.as_numpy() - input1.as_numpy()
+
+    if not np.all(expected_output_0 == output0.as_numpy()):
+        return False
+
+    if not np.all(expected_output_1 == output1.as_numpy()):
+        return False
+
+    return True
+
+
+def bls_square(_=None):
+    input0_np = np.random.randint(16, size=1, dtype=np.int32)
+    input0 = pb_utils.Tensor("IN", input0_np)
+    infer_request = pb_utils.InferenceRequest(
+        model_name="square_int32", inputs=[input0], requested_output_names=["OUT"]
+    )
+    infer_responses = infer_request.exec(decoupled=True)
+
+    response_count = 0
+
+    if infer_responses:
+        for infer_response in infer_responses:
+            if infer_response.has_error():
+                return False
+
+            if len(infer_response.output_tensors()) > 0:
+                output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUT")
+                if output0 is None:
+                    return False
+
+                expected_output = input0.as_numpy()
+
+                if not np.all(expected_output == output0.as_numpy()):
+                    return False
+
+            response_count += 1
+
+    if not np.all(input0.as_numpy() == response_count - 1):
+        return False
+
+    return True
+
+
+def bls_libtorch(model_name, result_device):
+    shape = [16]
+    input0_np = np.random.rand(*shape).astype(np.float32)
+    input1_np = np.random.rand(*shape).astype(np.float32)
+    input0 = pb_utils.Tensor("INPUT0", input0_np)
+    input1 = pb_utils.Tensor("INPUT1", input1_np)
+
+    if result_device == "CPU":
+        preferred_memory = pb_utils.PreferredMemory(pb_utils.TRITONSERVER_MEMORY_CPU)
+    else:
+        preferred_memory = pb_utils.PreferredMemory(pb_utils.TRITONSERVER_MEMORY_GPU, 0)
+
+    infer_request = pb_utils.InferenceRequest(
+        model_name=model_name,
+        model_version=1,
+        inputs=[input0, input1],
+        requested_output_names=["OUTPUT__0", "OUTPUT__1"],
+        preferred_memory=preferred_memory,
+    )
+
+    infer_response = infer_request.exec()
+    if infer_response.has_error():
+        return False
+
+    output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT__0")
+    output1 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT__1")
+    if output0 is None or output1 is None:
+        return False
+
+    expected_output_0 = input0.as_numpy() + input1.as_numpy()
+    expected_output_1 = input0.as_numpy() - input1.as_numpy()
+
+    if result_device == "CPU":
+        if not output0.is_cpu() or not output1.is_cpu():
+            return False
+
+        if not np.all(expected_output_0 == output0.as_numpy()):
+            return False
+
+        if not np.all(expected_output_1 == output1.as_numpy()):
+            return False
+    else:
+        if output0.is_cpu() or output1.is_cpu():
+            return False
+        output0 = from_dlpack(output0.to_dlpack()).to("cpu").cpu().detach().numpy()
+        output1 = from_dlpack(output1.to_dlpack()).to("cpu").cpu().detach().numpy()
+
+        if not np.all(output0 == expected_output_0):
+            return False
+        if not np.all(output1 == expected_output_1):
+            return False
+
+    return True
+
+
+class PBBLSTest(unittest.TestCase):
+    def setUp(self):
+        self._is_decoupled = True if os.environ["BLS_KIND"] == "decoupled" else False
+
+    def add_deferred_exception(self, ex):
+        global _deferred_exceptions
+        with _deferred_exceptions_lock:
+            _deferred_exceptions.append(ex)
+
+    def check_deferred_exception(self):
+        with _deferred_exceptions_lock:
+            if len(_deferred_exceptions) > 0:
+                raise _deferred_exceptions[0]
+
+    def test_bls_wrong_inputs(self):
+        input0 = pb_utils.Tensor("INPUT0", np.random.randn(*[1, 16]))
+
+        if self._is_decoupled:
+            infer_request = pb_utils.InferenceRequest(
+                model_name="square_int32", inputs=[], requested_output_names=["OUT"]
+            )
+            infer_responses = infer_request.exec(decoupled=True)
+            for infer_response in infer_responses:
+                self.assertTrue(infer_response.has_error())
+                self.assertIn(
+                    "expected 1 inputs but got 0 inputs for model 'square_int32'. Got input(s) [], but missing required input(s) ['IN']. Please provide all required input(s).",
+                    infer_response.error().message(),
+                )
+                self.assertTrue(len(infer_response.output_tensors()) == 0)
+        else:
+            infer_request = pb_utils.InferenceRequest(
+                model_name="add_sub",
+                inputs=[input0],
+                requested_output_names=["OUTPUT0", "OUTPUT1"],
+            )
+            infer_response = infer_request.exec()
+            self.assertTrue(infer_response.has_error())
+            self.assertIn(
+                "expected 2 inputs but got 1 inputs for model 'add_sub'",
+                infer_response.error().message(),
+            )
+            self.assertTrue(len(infer_response.output_tensors()) == 0)
+
+    def _send_bls_sequence_requests(self, correlation_id, is_decoupled):
+        # Start request
+        try:
+            input = pb_utils.Tensor("INPUT", np.array([1000], dtype=np.int32))
+
+            infer_request = pb_utils.InferenceRequest(
+                model_name="onnx_nobatch_sequence_int32",
+                inputs=[input],
+                requested_output_names=["OUTPUT"],
+                flags=pb_utils.TRITONSERVER_REQUEST_FLAG_SEQUENCE_START,
+                correlation_id=correlation_id,
+            )
+            self.assertTrue(
+                infer_request.flags(), pb_utils.TRITONSERVER_REQUEST_FLAG_SEQUENCE_START
+            )
+            infer_response = infer_request.exec()
+            self.assertFalse(infer_response.has_error())
+            output = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT")
+            self.assertFalse(output.is_cpu())
+            output = from_dlpack(output.to_dlpack()).to("cpu").cpu().detach().numpy()
+            self.assertEqual(output[0], input.as_numpy()[0])
+
+            for i in range(10):
+                input = pb_utils.Tensor("INPUT", np.array([i], dtype=np.int32))
+                infer_request = pb_utils.InferenceRequest(
+                    model_name="onnx_nobatch_sequence_int32",
+                    inputs=[input],
+                    requested_output_names=["OUTPUT"],
+                    correlation_id=correlation_id,
+                )
+
+                if is_decoupled:
+                    infer_responses = infer_request.exec(decoupled=True)
+                    infer_response = next(infer_responses)
+                    with self.assertRaises(StopIteration):
+                        next(infer_responses)
+                else:
+                    infer_response = infer_request.exec()
+                self.assertFalse(infer_response.has_error())
+
+                # The new output is the previous output + the current input
+                expected_output = output[0] + i
+                output = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT")
+                self.assertFalse(output.is_cpu())
+                output = (
+                    from_dlpack(output.to_dlpack()).to("cpu").cpu().detach().numpy()
+                )
+                self.assertEqual(output[0], expected_output)
+
+            # Final request
+            input = pb_utils.Tensor("INPUT", np.array([2000], dtype=np.int32))
+
+            infer_request = pb_utils.InferenceRequest(
+                model_name="onnx_nobatch_sequence_int32",
+                inputs=[input],
+                requested_output_names=["OUTPUT"],
+                correlation_id=correlation_id,
+            )
+            infer_request.set_flags(pb_utils.TRITONSERVER_REQUEST_FLAG_SEQUENCE_END)
+            self.assertTrue(
+                infer_request.flags(), pb_utils.TRITONSERVER_REQUEST_FLAG_SEQUENCE_END
+            )
+
+            if is_decoupled:
+                infer_responses = infer_request.exec(decoupled=True)
+                infer_response = next(infer_responses)
+                with self.assertRaises(StopIteration):
+                    next(infer_responses)
+            else:
+                infer_response = infer_request.exec()
+
+            self.assertFalse(infer_response.has_error())
+            expected_output = output[0] + input.as_numpy()[0]
+            output = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT")
+            self.assertFalse(output.is_cpu())
+            output = from_dlpack(output.to_dlpack()).to("cpu").cpu().detach().numpy()
+            self.assertEqual(output[0], expected_output)
+        except Exception as e:
+            self.add_deferred_exception(e)
+
+    def test_bls_sequence(self):
+        # Send 2 sequence of BLS requests simultaneously and check the responses.
+        threads = []
+        thread1 = threading.Thread(
+            target=self._send_bls_sequence_requests,
+            args=(
+                1000,
+                self._is_decoupled,
+            ),
+        )
+        threads.append(thread1)
+        thread2 = threading.Thread(
+            target=self._send_bls_sequence_requests,
+            args=(
+                1001,
+                self._is_decoupled,
+            ),
+        )
+        threads.append(thread2)
+
+        for thread in threads:
+            thread.start()
+
+        for thread in threads:
+            thread.join()
+
+        # Check if any of the threads had an exception
+        self.check_deferred_exception()
+
+    def test_bls_incorrect_args(self):
+        with self.assertRaises(TypeError):
+            pb_utils.InferenceRequest(
+                inputs=[], requested_output_names=["OUTPUT0", "OUTPUT1"]
+            )
+
+        with self.assertRaises(TypeError):
+            pb_utils.InferenceRequest(
+                model_name="add_sub", requested_output_names=["OUTPUT0", "OUTPUT1"]
+            )
+
+        with self.assertRaises(TypeError):
+            pb_utils.InferenceRequest(model_name="add_sub", inputs=[])
+
+    def _get_gpu_bls_outputs(self, input0_pb, input1_pb, is_decoupled):
+        """
+        This function is created to test that the DLPack container works
+        properly when the inference response and outputs go out of scope.
+        """
+        infer_request = pb_utils.InferenceRequest(
+            model_name="dlpack_add_sub",
+            inputs=[input0_pb, input1_pb],
+            requested_output_names=["OUTPUT0", "OUTPUT1"],
+        )
+        if is_decoupled:
+            infer_responses = infer_request.exec(decoupled=True)
+            infer_response = next(infer_responses)
+            with self.assertRaises(StopIteration):
+                next(infer_responses)
+        else:
+            infer_response = infer_request.exec()
+
+        self.assertFalse(infer_response.has_error())
+
+        output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
+        output1 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT1")
+        self.assertIsNotNone(output0)
+        self.assertIsNotNone(output1)
+
+        # When one of the inputs is in GPU the output returned by the model must
+        # be in GPU, otherwise the outputs will be in CPU.
+        if not input0_pb.is_cpu() or not input1_pb.is_cpu():
+            self.assertTrue((not output0.is_cpu()) and (not output1.is_cpu()))
+        else:
+            self.assertTrue((output0.is_cpu()) and (output1.is_cpu()))
+
+        # Make sure that the reference count is increased by one when DLPack
+        # representation is created.
+        rc_before_dlpack_output0 = sys.getrefcount(output0)
+        rc_before_dlpack_output1 = sys.getrefcount(output1)
+
+        output0_dlpack = output0.to_dlpack()
+        output1_dlpack = output1.to_dlpack()
+
+        rc_after_dlpack_output0 = sys.getrefcount(output0)
+        rc_after_dlpack_output1 = sys.getrefcount(output1)
+
+        self.assertEqual(rc_after_dlpack_output0 - rc_before_dlpack_output0, 1)
+        self.assertEqual(rc_after_dlpack_output1 - rc_before_dlpack_output1, 1)
+
+        # Make sure that reference count decreases after destroying the DLPack
+        output0_dlpack = None
+        output1_dlpack = None
+        rc_after_del_dlpack_output0 = sys.getrefcount(output0)
+        rc_after_del_dlpack_output1 = sys.getrefcount(output1)
+        self.assertEqual(rc_after_del_dlpack_output0 - rc_after_dlpack_output0, -1)
+        self.assertEqual(rc_after_del_dlpack_output1 - rc_after_dlpack_output1, -1)
+
+        return output0.to_dlpack(), output1.to_dlpack()
+
+    def test_zero_length_io(self):
+        model_name = "identity_fp32"
+        input0 = np.zeros([1, 0], dtype=np.float32)
+        input0_pb = pb_utils.Tensor("INPUT0", input0)
+        infer_request = pb_utils.InferenceRequest(
+            model_name=model_name,
+            inputs=[input0_pb],
+            requested_output_names=["OUTPUT0"],
+        )
+
+        if self._is_decoupled:
+            infer_responses = infer_request.exec(decoupled=True)
+            infer_response = next(infer_responses)
+            with self.assertRaises(StopIteration):
+                next(infer_responses)
+        else:
+            infer_response = infer_request.exec()
+
+        self.assertFalse(infer_response.has_error())
+
+        output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
+        self.assertTrue(np.all(output0 == input0))
+
+    def cuda_memory_stats(self):
+        allocated_bytes = torch.cuda.memory_allocated()
+        reserved_bytes = torch.cuda.memory_reserved()
+        return allocated_bytes, reserved_bytes
+
+    def bls_tensor_lifecycle_helper(self):
+        model_name = "dlpack_identity"
+        verbose = True
+
+        # A 10 MB tensor.
+        input_size = 10 * 1024 * 1024
+        input_type_size_bytes = 4  # TYPE_FP32
+        input_size_bytes = input_size * input_type_size_bytes
+
+        # Sending the tensor 50 times to test whether the deallocation is
+        # happening correctly. If the deallocation doesn't happen correctly,
+        # there will be an out of shared memory error.
+        for _ in range(50):
+            input0 = np.ones([1, input_size], dtype=np.float32)
+            input0_pb = pb_utils.Tensor("INPUT0", input0)
+            infer_request = pb_utils.InferenceRequest(
+                model_name=model_name,
+                inputs=[input0_pb],
+                requested_output_names=["OUTPUT0"],
+            )
+
+            if self._is_decoupled:
+                infer_responses = infer_request.exec(decoupled=True)
+                infer_response = next(infer_responses)
+                with self.assertRaises(StopIteration):
+                    next(infer_responses)
+            else:
+                infer_response = infer_request.exec()
+            self.assertFalse(infer_response.has_error())
+
+            output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
+            np.testing.assert_equal(
+                output0.as_numpy(), input0, "BLS CPU memory lifecycle failed."
+            )
+
+        # Show total memory stats before gpu tensor test
+        print(torch.cuda.memory_summary())
+
+        # Checking the same with the GPU tensors.
+        for index in range(50):
+            input0 = None
+            infer_request = None
+            input0_pb = None
+            fail_msg = f"GPU memory lifecycle test failed at index: {index}"
+
+            torch.cuda.empty_cache()
+            alloced, cached = self.cuda_memory_stats()
+
+            # Check cuda memory usage is cleaned up (empty) between iterations
+            # when device tensors go out of scope
+            self.assertEqual(alloced, 0, fail_msg)
+            # Check that cache is properly cleaned up when emptied
+            self.assertEqual(cached, 0, fail_msg)
+
+            if verbose:
+                # NOTE: this reflects total gpu memory usage, and may be affected
+                # by other processes, so don't use it for direct checks but log it
+                # for debugging/context.
+                free_memory, total_memory = torch.cuda.mem_get_info()
+                used_memory = total_memory - free_memory
+                print(f"[DEBUG][Iteration {index}][GPU] {used_memory=} bytes")
+
+            input0 = torch.ones([1, input_size], dtype=torch.float32).to("cuda")
+            input0_pb = pb_utils.Tensor.from_dlpack("INPUT0", to_dlpack(input0))
+            # Check cuda memory usage after creating device tensor
+            alloced, _ = self.cuda_memory_stats()
+            self.assertEqual(
+                alloced,
+                input_size_bytes,
+                "Expected precise byte allocation after input tensor creation",
+            )
+
+            infer_request = pb_utils.InferenceRequest(
+                model_name=model_name,
+                inputs=[input0_pb],
+                requested_output_names=["OUTPUT0"],
+            )
+
+            if self._is_decoupled:
+                infer_responses = infer_request.exec(decoupled=True)
+                infer_response = next(infer_responses)
+                with self.assertRaises(StopIteration):
+                    next(infer_responses)
+            else:
+                infer_response = infer_request.exec()
+
+            self.assertFalse(infer_response.has_error())
+
+            output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
+            output0_pytorch = from_dlpack(output0.to_dlpack())
+
+            # Stats after getting output tensor
+            alloced, _ = self.cuda_memory_stats()
+            self.assertEqual(
+                alloced,
+                input_size_bytes,
+                "Expected only input allocation, as output zero-copies input tensor",
+            )
+
+            # Set inference response and output0_pytorch to None, to make sure
+            # that the DLPack is still valid.
+            output0 = None
+            infer_response = None
+            self.assertTrue(
+                torch.all(output0_pytorch == input0),
+                f"input ({input0}) and output ({output0_pytorch}) didn't match for identity model.",
+            )
+
+        print(torch.cuda.memory_summary())
+
+    def assert_cuda_memory_empty(self, msg):
+        torch.cuda.empty_cache()
+        alloced, cached = self.cuda_memory_stats()
+        self.assertEqual(alloced, 0, msg)
+        self.assertEqual(cached, 0, msg)
+
+    def test_bls_tensor_lifecycle(self):
+        self.assert_cuda_memory_empty("Expected all gpu memory cleaned up before test")
+        self.bls_tensor_lifecycle_helper()
+        self.assert_cuda_memory_empty("Expected all gpu memory cleaned up after test")
+
+    def _test_gpu_bls_add_sub(self, is_input0_gpu, is_input1_gpu, is_decoupled=False):
+        input0 = torch.rand(16)
+        input1 = torch.rand(16)
+
+        if is_input0_gpu:
+            input0 = input0.to("cuda")
+
+        if is_input1_gpu:
+            input1 = input1.to("cuda")
+
+        input0_pb = pb_utils.Tensor.from_dlpack("INPUT0", to_dlpack(input0))
+        input1_pb = pb_utils.Tensor.from_dlpack("INPUT1", to_dlpack(input1))
+
+        output0_dlpack, output1_dlpack = self._get_gpu_bls_outputs(
+            input0_pb, input1_pb, is_decoupled=is_decoupled
+        )
+
+        expected_output_0 = from_dlpack(input0_pb.to_dlpack()).to("cpu") + from_dlpack(
+            input1_pb.to_dlpack()
+        ).to("cpu")
+        expected_output_1 = from_dlpack(input0_pb.to_dlpack()).to("cpu") - from_dlpack(
+            input1_pb.to_dlpack()
+        ).to("cpu")
+
+        self.assertTrue(
+            torch.all(expected_output_0 == from_dlpack(output0_dlpack).to("cpu"))
+        )
+        self.assertTrue(
+            torch.all(expected_output_1 == from_dlpack(output1_dlpack).to("cpu"))
+        )
+
+    def test_gpu_bls(self):
+        for input0_device in [True, False]:
+            for input1_device in [True, False]:
+                self._test_gpu_bls_add_sub(
+                    input0_device, input1_device, self._is_decoupled
+                )
+
+    def test_multiprocess(self):
+        # Test multiprocess Pool with sync BLS
+        if self._is_decoupled:
+            # Fixme: DLIS-4630
+            # func_name = bls_square
+            pass
+        else:
+            func_name = bls_add_sub
+
+            pool = Pool(10)
+            pool.map(func_name, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
+            pool.close()
+            pool.join()
+
+    def test_bls_sync(self):
+        infer_request = pb_utils.InferenceRequest(
+            model_name="non_existent_model", inputs=[], requested_output_names=[]
+        )
+
+        if self._is_decoupled:
+            infer_responses = infer_request.exec(decoupled=True)
+
+            for infer_response in infer_responses:
+                # Because the model doesn't exist, the inference response must have an
+                # error
+                self.assertTrue(infer_response.has_error())
+                self.assertIn(
+                    "Failed for execute the inference request. Model 'non_existent_model' is not ready.",
+                    infer_response.error().message(),
+                )
+
+                # Make sure that the inference requests can be performed properly after
+                # an error.
+                self.assertTrue(bls_square())
+        else:
+            infer_response = infer_request.exec()
+
+            # Because the model doesn't exist, the inference response must have an
+            # error
+            self.assertTrue(infer_response.has_error())
+            self.assertIn(
+                "Failed for execute the inference request. Model 'non_existent_model' is not ready.",
+                infer_response.error().message(),
+            )
+
+            # Make sure that the inference requests can be performed properly after
+            # an error.
+            self.assertTrue(bls_add_sub())
+
+    def test_bls_execute_error(self):
+        # Test BLS with a model that has an error during execution.
+        infer_request = pb_utils.InferenceRequest(
+            model_name="execute_error", inputs=[], requested_output_names=[]
+        )
+        if self._is_decoupled:
+            infer_responses = infer_request.exec(decoupled=True)
+            infer_response = next(infer_responses)
+            with self.assertRaises(StopIteration):
+                next(infer_responses)
+        else:
+            infer_response = infer_request.exec()
+
+        self.assertTrue(infer_response.has_error())
+        self.assertIn(
+            "expected 1 inputs but got 0 inputs for model 'execute_error'",
+            infer_response.error().message(),
+        )
+        self.assertTrue(len(infer_response.output_tensors()) == 0)
+
+    def test_multiple_bls(self):
+        # Test running multiple BLS requests together
+        if self._is_decoupled:
+            for _ in range(100):
+                self.assertTrue(bls_square())
+        else:
+            for _ in range(100):
+                self.assertTrue(bls_add_sub())
+
+    def test_timeout(self):
+        tensor_size = [1, 1024 * 1024]
+        input0_np = np.random.randn(*tensor_size)
+        input0 = pb_utils.Tensor("INPUT0", input0_np.astype(np.float32))
+        infer_request = pb_utils.InferenceRequest(
+            model_name="identity_fp32_timeout",
+            inputs=[input0],
+            requested_output_names=["OUTPUT0"],
+            timeout=5,
+        )
+
+        if self._is_decoupled:
+            infer_responses = infer_request.exec(decoupled=True)
+            infer_response = next(infer_responses)
+        else:
+            infer_response = infer_request.exec()
+
+        # Expect timeout error
+        self.assertTrue(infer_response.has_error())
+        self.assertIn("Request timeout expired", infer_response.error().message())
+        self.assertTrue(len(infer_response.output_tensors()) == 0)
+
+        # Verifies two things:
+        # 1. A request timeout can be accessed by receiver models
+        # 2. A user can specify a very large value (11s) for a timeout
+        infer_request = pb_utils.InferenceRequest(
+            model_name="identity_fp32_timeout",
+            inputs=[input0],
+            requested_output_names=["OUTPUT0"],
+            timeout=11000000000,
+        )
+
+        if self._is_decoupled:
+            infer_responses = infer_request.exec(decoupled=True)
+            infer_response = next(infer_responses)
+        else:
+            infer_response = infer_request.exec()
+
+        # Expect no timeout error. Check for log message
+        # in test.sh
+        self.assertFalse(infer_response.has_error())
+
+    def _test_response_iterator_square(
+        self, expected_output_cnt, expected_output_value, response_iterator
+    ):
+        response_count = 0
+        expected_output_cnt = np.array([expected_output_cnt], dtype=np.int32)
+
+        for infer_response in response_iterator:
+            self.assertFalse(infer_response.has_error())
+            if len(infer_response.output_tensors()) > 0:
+                output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUT")
+                self.assertIsNotNone(output0)
+                self.assertEqual(expected_output_value, output0.as_numpy())
+
+            response_count += 1
+
+        self.assertEqual(response_count, expected_output_cnt)
+
+        # Make sure the iterator is exhausted.
+        with self.assertRaises(StopIteration):
+            next(response_iterator)
+
+        return response_iterator
+
+    def test_response_iterator(self):
+        if self._is_decoupled:
+            # Test the response iterator for decoupled responses. The request
+            # has 4 decoupled responses followed by an empty response.
+            response_value = 4
+            input0_np = np.array([response_value], dtype=np.int32)
+            input0 = pb_utils.Tensor("IN", input0_np)
+            infer_request = pb_utils.InferenceRequest(
+                model_name="square_int32",
+                inputs=[input0],
+                requested_output_names=["OUT"],
+            )
+            infer_responses = infer_request.exec(decoupled=True)
+
+            # case 1. Use Next() to get the next response first, then use
+            # for-loop to get the remaining responses.
+            infer_response = next(infer_responses)
+            self.assertFalse(infer_response.has_error())
+            output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUT")
+            self.assertIsNotNone(output0)
+            self.assertEqual(response_value, output0.as_numpy())
+            # The iterator now should only have 4 remaining responses.
+            infer_responses = self._test_response_iterator_square(
+                4, response_value, infer_responses
+            )
+
+            # case 2. Call for-loop to get all the responses multiple times.
+            infer_responses = self._test_response_iterator_square(
+                5, response_value, infer_responses
+            )
+            infer_responses = self._test_response_iterator_square(
+                5, response_value, infer_responses
+            )
+            infer_responses = self._test_response_iterator_square(
+                5, response_value, infer_responses
+            )
+
+            # case 3. Break from the iteration, then use Next() and for-loop to
+            # get the remaining responses.
+            response_count = 0
+            for infer_response in infer_responses:
+                self.assertFalse(infer_response.has_error())
+                output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUT")
+                self.assertIsNotNone(output0)
+                self.assertEqual(response_value, output0.as_numpy())
+
+                response_count += 1
+                if response_count == 2:
+                    break
+
+            infer_response = next(infer_responses)
+            self.assertFalse(infer_response.has_error())
+            output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUT")
+            self.assertIsNotNone(output0)
+            self.assertEqual(response_value, output0.as_numpy())
+
+            # The iterator now should only have 2 remaining responses.
+            infer_responses = self._test_response_iterator_square(
+                2, response_value, infer_responses
+            )
+
+            # case 4. Delete the iterator before all the responses have been
+            # retrieved.
+            infer_responses = infer_request.exec(decoupled=True)
+
+            infer_response = next(infer_responses)
+            self.assertFalse(infer_response.has_error())
+            output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUT")
+            self.assertIsNotNone(output0)
+            self.assertEqual(response_value, output0.as_numpy())
+
+            del infer_responses
+
+    def test_preferred_memory(self):
+        self.assertTrue(bls_libtorch("libtorch_gpu", "CPU"))
+        self.assertTrue(bls_libtorch("libtorch_cpu", "GPU"))
+
+
+class TritonPythonModel:
+    def execute(self, requests):
+        responses = []
+        for _ in requests:
+            # Run the unittest and store the results in InferenceResponse.
+            test = unittest.main("model", exit=False)
+            for test_case, traceback in test.result.failures:
+                print(f"{test_case} failed:\n{traceback}")
+            responses.append(
+                pb_utils.InferenceResponse(
+                    [
+                        pb_utils.Tensor(
+                            "OUTPUT0",
+                            np.array([test.result.wasSuccessful()], dtype=np.float16),
+                        )
+                    ]
+                )
+            )
+        return responses
diff --git a/qa/python_models/bls_async/config.pbtxt b/qa/python_models/bls_async/config.pbtxt
new file mode 100644
index 0000000000..8e3c7ce2ba
--- /dev/null
+++ b/qa/python_models/bls_async/config.pbtxt
@@ -0,0 +1,38 @@
+# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "bls_async"
+backend: "python"
+
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_BOOL
+    dims: [ 1 ]
+  }
+]
+
+instance_group [{ kind: KIND_CPU }]
diff --git a/qa/python_models/bls_async/model.py b/qa/python_models/bls_async/model.py
new file mode 100644
index 0000000000..8d75259b7b
--- /dev/null
+++ b/qa/python_models/bls_async/model.py
@@ -0,0 +1,251 @@
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import asyncio
+import os
+
+import numpy as np
+import torch
+import triton_python_backend_utils as pb_utils
+from torch.utils.dlpack import from_dlpack, to_dlpack
+
+
+def verify_add_sub_results(input0, input1, infer_response):
+    if infer_response.has_error():
+        print("Async BLS failed:", infer_response.error().message(), flush=True)
+        return False
+
+    output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
+    output1 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT1")
+
+    if (output0 is None) or (output1 is None):
+        return False
+
+    if not input0.is_cpu():
+        input0 = from_dlpack(input0.to_dlpack()).to("cpu").cpu().detach().numpy()
+    else:
+        input0 = input0.as_numpy()
+
+    if not input1.is_cpu():
+        input1 = from_dlpack(input1.to_dlpack()).to("cpu").cpu().detach().numpy()
+    else:
+        input1 = input1.as_numpy()
+
+    if not output0.is_cpu():
+        output0 = from_dlpack(output0.to_dlpack()).to("cpu").cpu().detach().numpy()
+    else:
+        output0 = output0.as_numpy()
+
+    if not output1.is_cpu():
+        output1 = from_dlpack(output1.to_dlpack()).to("cpu").cpu().detach().numpy()
+    else:
+        output1 = output1.as_numpy()
+
+    expected_output_0 = input0 + input1
+    expected_output_1 = input0 - input1
+
+    if not np.all(expected_output_0 == output0):
+        print(f"For OUTPUT0 expected {expected_output_0} found {output0}")
+        return False
+
+    if not np.all(expected_output_1 == output1):
+        print(f"For OUTPUT1 expected {expected_output_1} found {output1}")
+        return False
+
+    return True
+
+
+def verify_square_results(input0, infer_responses):
+    if not input0.is_cpu():
+        input0 = from_dlpack(input0.to_dlpack()).to("cpu").cpu().detach().numpy()
+    else:
+        input0 = input0.as_numpy()
+
+    response_count = 0
+
+    for infer_response in infer_responses:
+        if infer_response.has_error():
+            print(
+                "Async BLS decoupled failed:",
+                infer_response.error().message(),
+                flush=True,
+            )
+            return False
+
+        if len(infer_response.output_tensors()) > 0:
+            output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUT")
+
+            if output0 is None:
+                return False
+
+            if not output0.is_cpu():
+                output0 = (
+                    from_dlpack(output0.to_dlpack()).to("cpu").cpu().detach().numpy()
+                )
+            else:
+                output0 = output0.as_numpy()
+
+            expected_output = input0
+
+            if not np.all(expected_output == input0):
+                print(f"For OUT expected {expected_output} found {output0}")
+                return False
+
+        response_count += 1
+
+    if not np.all(input0 == response_count - 1):
+        print("Expected {} responses, got {}".format(input0, response_count - 1))
+        return False
+
+    return True
+
+
+def create_addsub_inference_request(gpu=False):
+    if not gpu:
+        input0_np = np.random.randn(16)
+        input1_np = np.random.randn(16)
+        input0_np = input0_np.astype(np.float32)
+        input1_np = input1_np.astype(np.float32)
+        input0 = pb_utils.Tensor("INPUT0", input0_np)
+        input1 = pb_utils.Tensor("INPUT1", input1_np)
+    else:
+        input0_pytorch = torch.rand(16).to("cuda")
+        input1_pytorch = torch.rand(16).to("cuda")
+        input0 = pb_utils.Tensor.from_dlpack("INPUT0", to_dlpack(input0_pytorch))
+        input1 = pb_utils.Tensor.from_dlpack("INPUT1", to_dlpack(input1_pytorch))
+
+    infer_request = pb_utils.InferenceRequest(
+        model_name="dlpack_add_sub",
+        inputs=[input0, input1],
+        requested_output_names=["OUTPUT0", "OUTPUT1"],
+    )
+    return input0, input1, infer_request
+
+
+def create_square_inference_request(gpu=False):
+    if not gpu:
+        input0_np = np.random.randint(16, size=1, dtype=np.int32)
+        input0 = pb_utils.Tensor("IN", input0_np)
+    else:
+        input0_pytorch = torch.randint(1, 16, (1,), dtype=torch.int32).to("cuda")
+        input0 = pb_utils.Tensor.from_dlpack("IN", to_dlpack(input0_pytorch))
+
+    infer_request = pb_utils.InferenceRequest(
+        model_name="dlpack_square", inputs=[input0], requested_output_names=["OUT"]
+    )
+    return input0, infer_request
+
+
+async def async_bls_add_sub():
+    input0, input1, infer_request = create_addsub_inference_request()
+    infer_response = await infer_request.async_exec()
+    result_correct = verify_add_sub_results(input0, input1, infer_response)
+    if not result_correct:
+        return False
+
+    infer_response_sync = infer_request.exec()
+    result_correct = verify_add_sub_results(input0, input1, infer_response_sync)
+    if not result_correct:
+        return False
+
+    return True
+
+
+async def async_bls_square():
+    input0, infer_request = create_square_inference_request()
+    infer_responses = await infer_request.async_exec(decoupled=True)
+    result_correct = verify_square_results(input0, infer_responses)
+    if not result_correct:
+        return False
+
+    infer_responses_sync = infer_request.exec(decoupled=True)
+    result_correct = verify_square_results(input0, infer_responses_sync)
+    if not result_correct:
+        return False
+
+    return True
+
+
+async def multiple_async_bls_addsub(gpu):
+    infer_request_aws = []
+    inputs = []
+    for _ in range(10):
+        input0, input1, infer_request = create_addsub_inference_request(gpu)
+        inputs.append((input0, input1))
+        infer_request_aws.append(infer_request.async_exec())
+
+    infer_responses = await asyncio.gather(*infer_request_aws)
+    for infer_response, input_pair in zip(infer_responses, inputs):
+        result_correct = verify_add_sub_results(
+            input_pair[0], input_pair[1], infer_response
+        )
+        if not result_correct:
+            return False
+
+    return True
+
+
+async def multiple_async_bls_square(gpu):
+    infer_request_aws = []
+    inputs = []
+    for _ in range(10):
+        input0, infer_request = create_square_inference_request(gpu)
+        inputs.append(input0)
+        infer_request_aws.append(infer_request.async_exec(decoupled=True))
+
+    async_responses = await asyncio.gather(*infer_request_aws)
+    for infer_responses, input_pair in zip(async_responses, inputs):
+        result_correct = verify_square_results(input_pair, infer_responses)
+        if not result_correct:
+            return False
+
+    return True
+
+
+class TritonPythonModel:
+    async def execute(self, requests):
+        is_decoupled = True if os.environ["BLS_KIND"] == "decoupled" else False
+
+        responses = []
+        for _ in requests:
+            if is_decoupled:
+                test1 = await multiple_async_bls_square(gpu=True)
+                test2 = await multiple_async_bls_square(gpu=False)
+                test3 = await async_bls_square()
+            else:
+                test1 = await multiple_async_bls_addsub(gpu=True)
+                test2 = await multiple_async_bls_addsub(gpu=False)
+                test3 = await async_bls_add_sub()
+
+            responses.append(
+                pb_utils.InferenceResponse(
+                    output_tensors=[
+                        pb_utils.Tensor("OUTPUT0", np.array([test1 & test2 & test3]))
+                    ]
+                )
+            )
+
+        return responses
diff --git a/qa/python_models/bls_finalize_error/config.pbtxt b/qa/python_models/bls_finalize_error/config.pbtxt
new file mode 100644
index 0000000000..ff5f42188b
--- /dev/null
+++ b/qa/python_models/bls_finalize_error/config.pbtxt
@@ -0,0 +1,38 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "bls_finalize_error"
+backend: "python"
+
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+
+instance_group [{ kind: KIND_CPU }]
diff --git a/qa/python_models/bls_finalize_error/model.py b/qa/python_models/bls_finalize_error/model.py
new file mode 100644
index 0000000000..a38b1080ad
--- /dev/null
+++ b/qa/python_models/bls_finalize_error/model.py
@@ -0,0 +1,45 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    def initialize(self, args):
+        pass
+
+    def execute(self, requests):
+        pass
+
+    def finalize(self):
+        print("Cleaning up...")
+        input0_np = np.random.randint(3, size=1, dtype=np.int32)
+        input0 = pb_utils.Tensor("IN", input0_np)
+        infer_request = pb_utils.InferenceRequest(
+            model_name="square_int32", inputs=[input0], requested_output_names=["OUT"]
+        )
+        infer_responses = infer_request.exec(decoupled=True)
diff --git a/qa/python_models/bls_init_error/config.pbtxt b/qa/python_models/bls_init_error/config.pbtxt
new file mode 100644
index 0000000000..6cf5024e1f
--- /dev/null
+++ b/qa/python_models/bls_init_error/config.pbtxt
@@ -0,0 +1,38 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "bls_init_error"
+backend: "python"
+
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+
+instance_group [{ kind: KIND_CPU }]
diff --git a/qa/python_models/bls_init_error/model.py b/qa/python_models/bls_init_error/model.py
new file mode 100644
index 0000000000..b2518e0334
--- /dev/null
+++ b/qa/python_models/bls_init_error/model.py
@@ -0,0 +1,44 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    def initialize(self, args):
+        input0_np = np.random.randint(3, size=1, dtype=np.int32)
+        input0 = pb_utils.Tensor("IN", input0_np)
+        infer_request = pb_utils.InferenceRequest(
+            model_name="square_int32", inputs=[input0], requested_output_names=["OUT"]
+        )
+        infer_responses = infer_request.exec(decoupled=True)
+
+    def execute(self, requests):
+        pass
+
+    def finalize(self):
+        print("Cleaning up...")
diff --git a/qa/python_models/bls_memory/config.pbtxt b/qa/python_models/bls_memory/config.pbtxt
new file mode 100644
index 0000000000..30c2169f6b
--- /dev/null
+++ b/qa/python_models/bls_memory/config.pbtxt
@@ -0,0 +1,39 @@
+# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "bls_memory"
+backend: "python"
+
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+
+instance_group [{ kind: KIND_CPU }]
+
diff --git a/qa/python_models/bls_memory/model.py b/qa/python_models/bls_memory/model.py
new file mode 100644
index 0000000000..69da4f440f
--- /dev/null
+++ b/qa/python_models/bls_memory/model.py
@@ -0,0 +1,103 @@
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import unittest
+
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+
+class PBBLSMemoryTest(unittest.TestCase):
+    def setUp(self):
+        self._is_decoupled = True if os.environ["BLS_KIND"] == "decoupled" else False
+
+    def _send_identity_tensor(self, size, is_decoupled):
+        tensor_size = [1, size]
+        input0_np = np.random.randn(*tensor_size)
+        input0 = pb_utils.Tensor("INPUT0", input0_np.astype(np.float32))
+        infer_request = pb_utils.InferenceRequest(
+            model_name="identity_fp32",
+            inputs=[input0],
+            requested_output_names=["OUTPUT0"],
+        )
+
+        if is_decoupled:
+            infer_responses = infer_request.exec(decoupled=True)
+            infer_response = next(infer_responses)
+            with self.assertRaises(StopIteration):
+                next(infer_responses)
+        else:
+            infer_response = infer_request.exec()
+
+        return input0_np, infer_response
+
+    def test_bls_out_of_memory(self):
+        tensor_size = 256 * 1024 * 1024
+        input0_np, infer_response = self._send_identity_tensor(
+            tensor_size, self._is_decoupled
+        )
+        out_of_memory_message = "Failed to increase the shared memory pool size for key"
+
+        if infer_response.has_error():
+            self.assertIn(out_of_memory_message, infer_response.error().message())
+        else:
+            self.assertFalse(infer_response.has_error())
+            output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
+            self.assertIsNotNone(output0)
+            self.assertTrue(np.allclose(output0.as_numpy(), input0_np))
+
+        tensor_size = 50 * 1024 * 1024
+        for _ in range(4):
+            input0_np, infer_response = self._send_identity_tensor(
+                tensor_size, self._is_decoupled
+            )
+            if infer_response.has_error():
+                self.assertIn(out_of_memory_message, infer_response.error().message())
+            else:
+                self.assertFalse(infer_response.has_error())
+                output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
+                self.assertIsNotNone(output0)
+                self.assertTrue(np.allclose(output0.as_numpy(), input0_np))
+
+
+class TritonPythonModel:
+    def execute(self, requests):
+        responses = []
+        for _ in requests:
+            # Run the unittest and store the results in InferenceResponse.
+            test = unittest.main("model", exit=False)
+            responses.append(
+                pb_utils.InferenceResponse(
+                    [
+                        pb_utils.Tensor(
+                            "OUTPUT0",
+                            np.array([test.result.wasSuccessful()], dtype=np.float16),
+                        )
+                    ]
+                )
+            )
+        return responses
diff --git a/qa/python_models/bls_memory_async/config.pbtxt b/qa/python_models/bls_memory_async/config.pbtxt
new file mode 100644
index 0000000000..66bfcd3bbf
--- /dev/null
+++ b/qa/python_models/bls_memory_async/config.pbtxt
@@ -0,0 +1,39 @@
+# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "bls_memory_async"
+backend: "python"
+
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+
+instance_group [{ kind: KIND_CPU }]
+
diff --git a/qa/python_models/bls_memory_async/model.py b/qa/python_models/bls_memory_async/model.py
new file mode 100644
index 0000000000..d9e676b42e
--- /dev/null
+++ b/qa/python_models/bls_memory_async/model.py
@@ -0,0 +1,98 @@
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+
+async def _send_identity_tensor(size, is_decoupled):
+    tensor_size = [1, size]
+    input0_np = np.random.randn(*tensor_size)
+    input0 = pb_utils.Tensor("INPUT0", input0_np.astype(np.float32))
+    infer_request = pb_utils.InferenceRequest(
+        model_name="identity_fp32", inputs=[input0], requested_output_names=["OUTPUT0"]
+    )
+
+    if is_decoupled:
+        infer_responses = await infer_request.async_exec(decoupled=True)
+        infer_response = next(infer_responses)
+    else:
+        infer_response = await infer_request.async_exec()
+
+    return input0_np, infer_response
+
+
+async def test_bls_out_of_memory():
+    is_decoupled = True if os.environ["BLS_KIND"] == "decoupled" else False
+
+    tensor_size = 256 * 1024 * 1024
+    input0_np, infer_response = await _send_identity_tensor(tensor_size, is_decoupled)
+
+    out_of_memory_message = "Failed to increase the shared memory pool size for key"
+
+    if infer_response.has_error():
+        if not (out_of_memory_message in infer_response.error().message()):
+            return False
+    else:
+        output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
+        if output0 is None:
+            return False
+        if not np.allclose(output0.as_numpy(), input0_np):
+            return False
+
+    tensor_size = 50 * 1024 * 1024
+    for _ in range(4):
+        input0_np, infer_response = await _send_identity_tensor(
+            tensor_size, is_decoupled
+        )
+
+        if infer_response.has_error():
+            if not (out_of_memory_message in infer_response.error().message()):
+                return False
+        else:
+            output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
+            if output0 is None:
+                return False
+            if not np.allclose(output0.as_numpy(), input0_np):
+                return False
+
+    return True
+
+
+class TritonPythonModel:
+    async def execute(self, requests):
+        responses = []
+        for _ in requests:
+            # Run the unittest and store the results in InferenceResponse.
+            result = await test_bls_out_of_memory()
+            responses.append(
+                pb_utils.InferenceResponse(
+                    [pb_utils.Tensor("OUTPUT0", np.array([result], dtype=np.float16))]
+                )
+            )
+        return responses
diff --git a/qa/python_models/bls_model_loading/config.pbtxt b/qa/python_models/bls_model_loading/config.pbtxt
new file mode 100644
index 0000000000..2099ba5db7
--- /dev/null
+++ b/qa/python_models/bls_model_loading/config.pbtxt
@@ -0,0 +1,43 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "bls_model_loading"
+backend: "python"
+
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_BOOL
+    dims: [ 1 ]
+  }
+]
+
+instance_group [
+  {
+    count: 1
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/python_models/bls_model_loading/model.py b/qa/python_models/bls_model_loading/model.py
new file mode 100644
index 0000000000..84162e2fac
--- /dev/null
+++ b/qa/python_models/bls_model_loading/model.py
@@ -0,0 +1,135 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import time
+import unittest
+
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+
+class PBBLSModelLoadingTest(unittest.TestCase):
+    def setUp(self):
+        self.model_name = "onnx_int32_int32_int32"
+
+    def tearDown(self):
+        # The unload call does not wait for the requested model to be fully
+        # unloaded before returning.
+        pb_utils.unload_model(self.model_name)
+        # TODO: Make this more robust to wait until fully unloaded
+        print("Sleep 30 seconds to make sure model finishes unloading...")
+        time.sleep(30)
+        print("Done sleeping.")
+
+    def test_load_unload_model(self):
+        self.assertFalse(pb_utils.is_model_ready(model_name=self.model_name))
+        pb_utils.load_model(model_name=self.model_name)
+        self.assertTrue(pb_utils.is_model_ready(self.model_name))
+        pb_utils.unload_model(self.model_name)
+        self.assertFalse(pb_utils.is_model_ready(self.model_name))
+
+    def test_load_with_config_override(self):
+        self.assertFalse(pb_utils.is_model_ready(self.model_name))
+        pb_utils.load_model(self.model_name)
+        self.assertTrue(pb_utils.is_model_ready(self.model_name))
+
+        # Send the config with the wrong format
+        wrong_config = '"parameters": {"config": {{"backend":"onnxruntime", "version_policy":{"specific":{"versions":[2]}}}}}'
+        with self.assertRaises(pb_utils.TritonModelException):
+            pb_utils.load_model(model_name=self.model_name, config=wrong_config)
+        # The model should not be changed after a failed load model request
+        for version in ["2", "3"]:
+            self.assertTrue(
+                pb_utils.is_model_ready(
+                    model_name=self.model_name, model_version=version
+                )
+            )
+
+        # Send the config with the correct format
+        config = (
+            '{"backend":"onnxruntime", "version_policy":{"specific":{"versions":[2]}}}'
+        )
+        pb_utils.load_model(self.model_name, config=config)
+        # The model should be changed after a successful load model request
+        self.assertTrue(pb_utils.is_model_ready(self.model_name, "2"))
+        self.assertFalse(pb_utils.is_model_ready(self.model_name, "3"))
+
+    def test_load_with_file_override(self):
+        self.assertFalse(pb_utils.is_model_ready(self.model_name))
+        pb_utils.load_model(self.model_name)
+        self.assertTrue(pb_utils.is_model_ready(self.model_name))
+
+        override_name = "override_model"
+        config = '{"backend":"onnxruntime"}'
+        with open("models/onnx_int32_int32_int32/3/model.onnx", "rb") as file:
+            data = file.read()
+        files = {"file:1/model.onnx": data}
+
+        # Request to load the model with override file, should fail without
+        # providing override config.
+        with self.assertRaises(pb_utils.TritonModelException):
+            pb_utils.load_model(self.model_name, "", files)
+
+        # Request to load the model with override file and config in a different name
+        pb_utils.load_model(model_name=override_name, config=config, files=files)
+        # Sanity check that the model with original name is unchanged
+        self.assertFalse(pb_utils.is_model_ready(self.model_name, "1"))
+        self.assertTrue(pb_utils.is_model_ready(self.model_name, "3"))
+
+        # Check the override model readiness
+        self.assertTrue(pb_utils.is_model_ready(override_name, "1"))
+        self.assertFalse(pb_utils.is_model_ready(override_name, "3"))
+
+        # Request to load the model with override file and config in original name
+        pb_utils.load_model(self.model_name, config, files)
+        # Check that the model with original name is changed
+        self.assertTrue(pb_utils.is_model_ready(self.model_name, "1"))
+        self.assertFalse(pb_utils.is_model_ready(self.model_name, "3"))
+
+        # Sanity check readiness of the different named model
+        self.assertTrue(pb_utils.is_model_ready(override_name, "1"))
+        self.assertFalse(pb_utils.is_model_ready(override_name, "3"))
+
+
+class TritonPythonModel:
+    def initialize(self, args):
+        # Run the unittest during initialization
+        test = unittest.main("model", exit=False)
+        self.result = test.result.wasSuccessful()
+
+    def execute(self, requests):
+        responses = []
+        for _ in requests:
+            responses.append(
+                pb_utils.InferenceResponse(
+                    [
+                        pb_utils.Tensor(
+                            "OUTPUT0", np.array([self.result], dtype=np.float16)
+                        )
+                    ]
+                )
+            )
+        return responses
diff --git a/qa/python_models/bls_onnx_warmup/config.pbtxt b/qa/python_models/bls_onnx_warmup/config.pbtxt
new file mode 100644
index 0000000000..879f85ca81
--- /dev/null
+++ b/qa/python_models/bls_onnx_warmup/config.pbtxt
@@ -0,0 +1,38 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "bls_onnx_warmup"
+backend: "python"
+
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+
+instance_group [{ kind: KIND_CPU }]
\ No newline at end of file
diff --git a/qa/python_models/bls_onnx_warmup/model.py b/qa/python_models/bls_onnx_warmup/model.py
new file mode 100644
index 0000000000..233bdc85ab
--- /dev/null
+++ b/qa/python_models/bls_onnx_warmup/model.py
@@ -0,0 +1,88 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import unittest
+
+import numpy as np
+import triton_python_backend_utils as pb_utils
+from torch.utils.dlpack import from_dlpack
+
+
+class PBBLSONNXWarmupTest(unittest.TestCase):
+    def test_onnx_output_mem_type(self):
+        input0_np = np.random.randn(*[16])
+        input0_np = input0_np.astype(np.float32)
+        input1_np = np.random.randn(*[16])
+        input1_np = input1_np.astype(np.float32)
+        input0 = pb_utils.Tensor("INPUT0", input0_np)
+        input1 = pb_utils.Tensor("INPUT1", input1_np)
+        infer_request = pb_utils.InferenceRequest(
+            model_name="onnx_nobatch_float32_float32_float32",
+            inputs=[input0, input1],
+            requested_output_names=["OUTPUT0", "OUTPUT1"],
+        )
+
+        infer_response = infer_request.exec()
+
+        self.assertFalse(infer_response.has_error())
+
+        output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
+        output1 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT1")
+
+        self.assertIsNotNone(output0)
+        self.assertIsNotNone(output1)
+
+        # The memory type of output tensor should be GPU
+        self.assertFalse(output0.is_cpu())
+        self.assertFalse(output1.is_cpu())
+
+        expected_output_0 = input0.as_numpy() - input1.as_numpy()
+        expected_output_1 = input0.as_numpy() + input1.as_numpy()
+
+        output0 = from_dlpack(output0.to_dlpack()).to("cpu").cpu().detach().numpy()
+        output1 = from_dlpack(output1.to_dlpack()).to("cpu").cpu().detach().numpy()
+
+        self.assertTrue(np.all(output0 == expected_output_0))
+        self.assertTrue(np.all(output1 == expected_output_1))
+
+
+class TritonPythonModel:
+    def execute(self, requests):
+        responses = []
+        for _ in requests:
+            # Run the unittest and store the results in InferenceResponse.
+            test = unittest.main("model", exit=False)
+            responses.append(
+                pb_utils.InferenceResponse(
+                    [
+                        pb_utils.Tensor(
+                            "OUTPUT0",
+                            np.array([test.result.wasSuccessful()], dtype=np.float16),
+                        )
+                    ]
+                )
+            )
+        return responses
diff --git a/qa/python_models/bls_parameters/config.pbtxt b/qa/python_models/bls_parameters/config.pbtxt
new file mode 100644
index 0000000000..dddf300185
--- /dev/null
+++ b/qa/python_models/bls_parameters/config.pbtxt
@@ -0,0 +1,52 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "bls_parameters"
+backend: "python"
+max_batch_size: 0
+
+input [
+  {
+    name: "NUMBER_PARAMETERS"
+    data_type: TYPE_UINT8
+    dims: [ 1 ]
+  }
+]
+
+output [
+  {
+    name: "PARAMETERS_AGGREGATED"
+    data_type: TYPE_STRING
+    dims: [ 1 ]
+  }
+]
+
+instance_group [
+  {
+    count: 4
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/python_models/bls_parameters/model.py b/qa/python_models/bls_parameters/model.py
new file mode 100644
index 0000000000..5dc54ebffd
--- /dev/null
+++ b/qa/python_models/bls_parameters/model.py
@@ -0,0 +1,77 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    def execute(self, requests):
+        responses = []
+
+        for request in requests:
+            num_params = int(
+                pb_utils.get_input_tensor_by_name(
+                    request, "NUMBER_PARAMETERS"
+                ).as_numpy()[0]
+            )
+            params = json.loads(request.parameters())
+
+            if num_params == 0:
+                # Base case where the received parameters are returned as JSON
+                response = json.dumps(params)
+                response_tensors = [
+                    pb_utils.Tensor(
+                        "PARAMETERS_AGGREGATED", np.array([response], dtype=np.object_)
+                    )
+                ]
+            else:
+                # Add the parameters of num_params step to the received parameters
+                params["bool_" + str(num_params)] = bool(num_params)
+                params["int_" + str(num_params)] = num_params
+                params["str_" + str(num_params)] = str(num_params)
+                # Complete any remaining steps [1, num_params - 1] by calling self
+                # recursively via BLS
+                bls_request_tensor = pb_utils.Tensor(
+                    "NUMBER_PARAMETERS", np.array([num_params - 1], dtype=np.ubyte)
+                )
+                bls_request = pb_utils.InferenceRequest(
+                    model_name="bls_parameters",
+                    inputs=[bls_request_tensor],
+                    requested_output_names=["PARAMETERS_AGGREGATED"],
+                    parameters=params,
+                )
+                bls_response = bls_request.exec()
+                response_tensors = bls_response.output_tensors()
+
+            inference_response = pb_utils.InferenceResponse(
+                output_tensors=response_tensors
+            )
+            responses.append(inference_response)
+
+        return responses
diff --git a/qa/python_models/bls_request_rescheduling/config.pbtxt b/qa/python_models/bls_request_rescheduling/config.pbtxt
new file mode 100644
index 0000000000..84f8658f7f
--- /dev/null
+++ b/qa/python_models/bls_request_rescheduling/config.pbtxt
@@ -0,0 +1,38 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "bls_request_rescheduling"
+backend: "python"
+
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+
+instance_group [{ kind: KIND_CPU }]
diff --git a/qa/python_models/bls_request_rescheduling/model.py b/qa/python_models/bls_request_rescheduling/model.py
new file mode 100644
index 0000000000..8615622af9
--- /dev/null
+++ b/qa/python_models/bls_request_rescheduling/model.py
@@ -0,0 +1,133 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import time
+import unittest
+
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+
+class RequestReschedulingTest(unittest.TestCase):
+    def _reload_model(self, model_name):
+        # Reload the model to reset the flag for multiple iterations
+        pb_utils.unload_model(model_name)
+        # TODO: Make this more robust to wait until fully unloaded
+        print("Sleep 10 seconds to make sure model finishes unloading...", flush=True)
+        time.sleep(10)
+        print("Done sleeping.", flush=True)
+        pb_utils.load_model(model_name)
+
+    def test_wrong_return_type(self):
+        input0 = pb_utils.Tensor("INPUT0", (np.random.randn(*[4])).astype(np.float32))
+        infer_request = pb_utils.InferenceRequest(
+            model_name="wrong_return_type",
+            inputs=[input0],
+            requested_output_names=["OUTPUT0"],
+        )
+
+        infer_response = infer_request.exec()
+        self.assertTrue(infer_response.has_error())
+        self.assertIn(
+            "Expected a None object in the execute function return list for reschduled request",
+            infer_response.error().message(),
+        )
+
+    def test_non_decoupled_e2e(self):
+        model_name = "request_rescheduling_addsub"
+        self._reload_model(model_name)
+
+        input0_np = np.random.randn(*[16])
+        input0_np = input0_np.astype(np.float32)
+        input1_np = np.random.randn(*[16])
+        input1_np = input1_np.astype(np.float32)
+        input0 = pb_utils.Tensor("INPUT0", input0_np)
+        input1 = pb_utils.Tensor("INPUT1", input1_np)
+        infer_request = pb_utils.InferenceRequest(
+            model_name=model_name,
+            inputs=[input0, input1],
+            requested_output_names=["OUTPUT0", "OUTPUT1"],
+        )
+        infer_response = infer_request.exec()
+
+        self.assertFalse(infer_response.has_error())
+
+        output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
+        output1 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT1")
+
+        self.assertIsNotNone(output0)
+        self.assertIsNotNone(output1)
+
+        expected_output_0 = input0.as_numpy() + input1.as_numpy()
+        expected_output_1 = input0.as_numpy() - input1.as_numpy()
+
+        self.assertEqual(expected_output_0[0], output0.as_numpy()[0])
+        self.assertEqual(expected_output_1[0], output1.as_numpy()[0])
+
+    def test_decoupled_e2e(self):
+        model_name = "iterative_sequence"
+        self._reload_model(model_name)
+
+        input_value = 3
+        input0 = pb_utils.Tensor("IN", np.array([input_value], dtype=np.int32))
+        infer_request = pb_utils.InferenceRequest(
+            model_name=model_name,
+            inputs=[input0],
+            requested_output_names=["OUT"],
+        )
+        infer_responses = infer_request.exec(decoupled=True)
+
+        expected_output = input_value - 1
+
+        if infer_responses:
+            for infer_response in infer_responses:
+                self.assertFalse(infer_response.has_error())
+
+                if len(infer_response.output_tensors()) > 0:
+                    output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUT")
+                    self.assertIsNotNone(output0)
+
+                    self.assertEqual(expected_output, output0.as_numpy()[0])
+                    expected_output -= 1
+
+
+class TritonPythonModel:
+    def execute(self, requests):
+        responses = []
+        for _ in requests:
+            # Run the unittest and store the results in InferenceResponse.
+            test = unittest.main("model", exit=False)
+            responses.append(
+                pb_utils.InferenceResponse(
+                    [
+                        pb_utils.Tensor(
+                            "OUTPUT0",
+                            np.array([test.result.wasSuccessful()], dtype=np.float16),
+                        )
+                    ]
+                )
+            )
+        return responses
diff --git a/qa/python_models/bls_simple/bls_simple.py b/qa/python_models/bls_simple/bls_simple.py
new file mode 100644
index 0000000000..962c3834b9
--- /dev/null
+++ b/qa/python_models/bls_simple/bls_simple.py
@@ -0,0 +1,84 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        inputs = [
+            {"name": "MODEL_NAME", "data_type": "TYPE_STRING", "dims": [1]},
+            {"name": "INPUT0", "data_type": "TYPE_INT32", "dims": [1, 16]},
+            {"name": "INPUT1", "data_type": "TYPE_INT32", "dims": [1, 16]},
+        ]
+        outputs = [
+            {"name": "OUTPUT0", "data_type": "TYPE_INT32", "dims": [16]},
+            {"name": "OUTPUT1", "data_type": "TYPE_INT32", "dims": [16]},
+        ]
+
+        config = auto_complete_model_config.as_dict()
+        input_names = []
+        output_names = []
+        for input in config["input"]:
+            input_names.append(input["name"])
+        for output in config["output"]:
+            output_names.append(output["name"])
+
+        for input in inputs:
+            if input["name"] not in input_names:
+                auto_complete_model_config.add_input(input)
+        for output in outputs:
+            if output["name"] not in output_names:
+                auto_complete_model_config.add_output(output)
+
+        auto_complete_model_config.set_max_batch_size(0)
+
+        return auto_complete_model_config
+
+    def execute(self, requests):
+        responses = []
+        for request in requests:
+            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")
+            model_name = pb_utils.get_input_tensor_by_name(request, "MODEL_NAME")
+            model_name_string = model_name.as_numpy()[0]
+
+            infer_request = pb_utils.InferenceRequest(
+                model_name=model_name_string,
+                requested_output_names=["OUTPUT0", "OUTPUT1"],
+                inputs=[in_0, in_1],
+                trace=request.trace(),
+            )
+
+            infer_response = infer_request.exec()
+
+            inference_response = pb_utils.InferenceResponse(
+                output_tensors=infer_response.output_tensors()
+            )
+            responses.append(inference_response)
+
+        return responses
diff --git a/qa/python_models/bls_undefined/config.pbtxt b/qa/python_models/bls_undefined/config.pbtxt
new file mode 100644
index 0000000000..ab873d8a64
--- /dev/null
+++ b/qa/python_models/bls_undefined/config.pbtxt
@@ -0,0 +1,50 @@
+# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "bls_undefined"
+backend: "python"
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
+
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
+
+instance_group [{
+        kind: KIND_CPU,
+        count: 2
+}]
+
diff --git a/qa/python_models/bls_undefined/model.py b/qa/python_models/bls_undefined/model.py
new file mode 100644
index 0000000000..30e5f4106a
--- /dev/null
+++ b/qa/python_models/bls_undefined/model.py
@@ -0,0 +1,33 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+class TritonPythonModel:
+    def execute(self, requests):
+        undefined_variable
+
+    def finalize(self):
+        print("Cleaning up...")
diff --git a/qa/python_models/busy_op/config.pbtxt b/qa/python_models/busy_op/config.pbtxt
new file mode 100644
index 0000000000..27f9003ab7
--- /dev/null
+++ b/qa/python_models/busy_op/config.pbtxt
@@ -0,0 +1,52 @@
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "busy_op"
+backend: "python"
+max_batch_size: 1
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+instance_group [
+  {
+    count: 1
+    kind : KIND_CPU
+  }
+]
diff --git a/qa/python_models/busy_op/model.py b/qa/python_models/busy_op/model.py
new file mode 100644
index 0000000000..a68343881b
--- /dev/null
+++ b/qa/python_models/busy_op/model.py
@@ -0,0 +1,49 @@
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+import time
+
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    """
+    This model calls sleep for the first request in order to force requests to
+    sit in the queue, and result in memory growth.
+    """
+
+    def initialize(self, args):
+        self.sleep = True
+
+    def execute(self, requests):
+        if self.sleep:
+            time.sleep(50)
+            self.sleep = False
+        responses = []
+        for request in requests:
+            input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+            out_tensor = pb_utils.Tensor("OUTPUT0", input_tensor.as_numpy())
+            responses.append(pb_utils.InferenceResponse([out_tensor]))
+        return responses
diff --git a/qa/python_models/cuda_memory_consumer/1/model.py b/qa/python_models/cuda_memory_consumer/1/model.py
new file mode 100644
index 0000000000..e3526920ea
--- /dev/null
+++ b/qa/python_models/cuda_memory_consumer/1/model.py
@@ -0,0 +1,69 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import triton_python_backend_utils as pb_utils
+from cuda import cuda
+
+
+class TritonPythonModel:
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        input = {"name": "INPUT", "data_type": "TYPE_FP32", "dims": [1]}
+        output = {"name": "OUTPUT", "data_type": "TYPE_FP32", "dims": [1]}
+
+        auto_complete_model_config.set_max_batch_size(0)
+        auto_complete_model_config.add_input(input)
+        auto_complete_model_config.add_output(output)
+
+        return auto_complete_model_config
+
+    def initialize(self, args):
+        self.mem_ptr = None
+        # Initialize CUDA context
+        cuda.cuInit(0)
+        cuda.cuCtxCreate(0, 0)
+
+        mem_info = cuda.cuMemGetInfo()
+        if mem_info[0] != 0:
+            raise pb_utils.TritonModelException("Failed to get CUDA memory info")
+
+        mem_alloc = cuda.cuMemAlloc(mem_info[2] * 0.4)
+        if mem_alloc[0] != 0:
+            raise pb_utils.TritonModelException("Failed to allocate CUDA memory")
+        self.mem_ptr = mem_alloc[1]
+
+    def finalize(self):
+        if self.mem_ptr is not None:
+            cuda.cuMemFree(self.mem_ptr)
+
+    def execute(self, requests):
+        """This function is called on inference request."""
+        responses = []
+        for request in requests:
+            input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+            out_tensor = pb_utils.Tensor("OUTPUT0", input_tensor.as_numpy())
+            responses.append(pb_utils.InferenceResponse([out_tensor]))
+        return responses
diff --git a/qa/python_models/cuda_memory_consumer/config.pbtxt b/qa/python_models/cuda_memory_consumer/config.pbtxt
new file mode 100644
index 0000000000..b1e0348433
--- /dev/null
+++ b/qa/python_models/cuda_memory_consumer/config.pbtxt
@@ -0,0 +1,28 @@
+# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+backend: "python"
+instance_group [{ kind: KIND_GPU, gpus: [0] }]
diff --git a/qa/python_models/custom_metrics/config.pbtxt b/qa/python_models/custom_metrics/config.pbtxt
new file mode 100644
index 0000000000..c2bf81331b
--- /dev/null
+++ b/qa/python_models/custom_metrics/config.pbtxt
@@ -0,0 +1,43 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "custom_metrics"
+backend: "python"
+
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+
+instance_group [
+  {
+    count: 3
+    kind: KIND_CPU
+  }
+]
diff --git a/qa/python_models/custom_metrics/model.py b/qa/python_models/custom_metrics/model.py
new file mode 100644
index 0000000000..31f105a1dd
--- /dev/null
+++ b/qa/python_models/custom_metrics/model.py
@@ -0,0 +1,278 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import unittest
+
+import numpy as np
+import requests
+import triton_python_backend_utils as pb_utils
+
+
+class PBCustomMetricsTest(unittest.TestCase):
+    def _get_metrics(self):
+        metrics_url = "http://localhost:8002/metrics"
+        r = requests.get(metrics_url)
+        r.raise_for_status()
+        return r.text
+
+    def _metric_api_helper(self, metric, kind):
+        # Adding logger to test if custom metrics and logging work together
+        # as they use the same message queue.
+        logger = pb_utils.Logger
+
+        # The value should be 0.0 before the test
+        self.assertEqual(metric.value(), 0.0)
+
+        # Test increment positive value
+        increment = 2023.0
+        metric.increment(increment)
+        self.assertEqual(metric.value(), increment)
+        logger.log_info("Incremented metric to : {}".format(metric.value()))
+
+        # Test increment negative value
+        decrement = -23.5
+        if kind == "counter":
+            # Counter should not accept negative values
+            with self.assertRaises(pb_utils.TritonModelException):
+                metric.increment(decrement)
+        else:
+            metric.increment(decrement)
+            self.assertEqual(metric.value(), increment + decrement)
+            logger.log_info("Decremented metric to : {}".format(metric.value()))
+
+        # Test set value
+        value = 999.9
+        if kind == "counter":
+            # Counter does not support set
+            with self.assertRaises(pb_utils.TritonModelException):
+                metric.set(value)
+        else:
+            metric.set(value)
+            self.assertEqual(metric.value(), value)
+            logger.log_info("Set metric to : {}".format(metric.value()))
+
+    def _dup_metric_helper(self, labels={}):
+        # Adding logger to test if custom metrics and logging work together
+        # as they use the same message queue.
+        logger = pb_utils.Logger
+
+        description = "dup metric"
+        metric_family = pb_utils.MetricFamily(
+            name="test_dup_metric",
+            description=description,
+            kind=pb_utils.MetricFamily.COUNTER,
+        )
+
+        # Verify dupe metrics reference same underlying metric
+        metric1 = metric_family.Metric(labels=labels)
+        metric2 = metric_family.Metric(labels=labels)
+
+        # The value should be 0 before the test
+        self.assertEqual(metric1.value(), 0.0)
+        self.assertEqual(metric2.value(), 0.0)
+
+        # Increment metric 1, check metric 2 == metric 1
+        increment = 7.5
+        metric1.increment(increment)
+        self.assertEqual(metric1.value(), metric2.value())
+        logger.log_info("Incremented metric1 to : {}".format(metric1.value()))
+        logger.log_info("Incremented metric2 to : {}".format(metric2.value()))
+
+        # Assert custom metric/family remains when there's still a reference to it
+        del metric1
+        metrics = self._get_metrics()
+        self.assertIn(description, metrics)
+
+    def test_counter_e2e(self):
+        metric_family = pb_utils.MetricFamily(
+            name="test_counter_e2e",
+            description="test metric counter kind end to end",
+            kind=pb_utils.MetricFamily.COUNTER,
+        )
+        labels = {"example1": "counter_label1", "example2": "counter_label2"}
+        metric = metric_family.Metric(labels=labels)
+        self._metric_api_helper(metric, "counter")
+
+        pattern = (
+            'test_counter_e2e{example1="counter_label1",example2="counter_label2"}'
+        )
+        metrics = self._get_metrics()
+        self.assertIn(pattern, metrics)
+
+    def test_gauge_e2e(self):
+        metric_family = pb_utils.MetricFamily(
+            name="test_gauge_e2e",
+            description="test metric gauge kind end to end",
+            kind=pb_utils.MetricFamily.GAUGE,
+        )
+        labels = {"example1": "counter_label1", "example2": "counter_label2"}
+        metric = metric_family.Metric(labels=labels)
+        self._metric_api_helper(metric, "gauge")
+
+        pattern = 'test_gauge_e2e{example1="counter_label1",example2="counter_label2"}'
+        metrics = self._get_metrics()
+        self.assertIn(pattern, metrics)
+
+    def test_dup_metric_family_diff_kind(self):
+        # Test that a duplicate metric family can't be added with a conflicting type/kind
+        metric_family1 = pb_utils.MetricFamily(
+            name="test_dup_metric_family_diff_kind",
+            description="test metric family with same name but different kind",
+            kind=pb_utils.MetricFamily.COUNTER,
+        )
+        with self.assertRaises(pb_utils.TritonModelException):
+            metric_family2 = pb_utils.MetricFamily(
+                name="test_dup_metric_family_diff_kind",
+                description="test metric family with same name but different kind",
+                kind=pb_utils.MetricFamily.GAUGE,
+            )
+            self.assertIsNone(metric_family2)
+
+        self.assertIsNotNone(metric_family1)
+
+    def test_dup_metric_family_diff_description(self):
+        # Test that a duplicate metric family name will still return the
+        # original metric family even if the description is changed
+        metric_family1 = pb_utils.MetricFamily(
+            name="test_dup_metric_family_diff_description",
+            description="first description",
+            kind=pb_utils.MetricFamily.COUNTER,
+        )
+        metric_family2 = pb_utils.MetricFamily(
+            name="test_dup_metric_family_diff_description",
+            description="second description",
+            kind=pb_utils.MetricFamily.COUNTER,
+        )
+
+        metric2 = metric_family2.Metric()
+        self.assertEqual(metric2.value(), 0)
+
+        # Delete metric_family1 and check if metric_family2 still references it
+        del metric_family1
+        pattern = "test_dup_metric_family_diff_description first description"
+        metrics = self._get_metrics()
+        self.assertIn(pattern, metrics)
+
+        # The first description will be kept if adding a duplicate metric
+        # family name with a different description
+        pattern = "test_dup_metric_family_diff_description second description"
+        self.assertNotIn(pattern, metrics)
+
+    def test_dup_metric_family(self):
+        # Test that adding a duplicate metric family will reuse the original
+        # and not add another entry to registry
+        metric_family1 = pb_utils.MetricFamily(
+            name="test_dup_metric_family",
+            description="dup description",
+            kind=pb_utils.MetricFamily.COUNTER,
+        )
+        metric_family2 = pb_utils.MetricFamily(
+            name="test_dup_metric_family",
+            description="dup description",
+            kind=pb_utils.MetricFamily.COUNTER,
+        )
+
+        metric_key = "custom_metric_key"
+        metric1 = metric_family1.Metric(labels={metric_key: "label1"})
+        metric2 = metric_family2.Metric(labels={metric_key: "label2"})
+
+        self.assertEqual(metric1.value(), 0)
+        self.assertEqual(metric2.value(), 0)
+
+        patterns = [
+            "# HELP test_dup_metric_family dup description",
+            "# TYPE test_dup_metric_family counter",
+            'test_dup_metric_family{custom_metric_key="label2"} 0',
+            'test_dup_metric_family{custom_metric_key="label1"} 0',
+        ]
+        metrics = self._get_metrics()
+        for pattern in patterns:
+            self.assertIn(pattern, metrics)
+
+    def test_dup_metric_labels(self):
+        # Test that adding a duplicate metric will refer to the same
+        # underlying metric, and all instances will be updated
+        labels = {"example1": "label1", "example2": "label2"}
+        self._dup_metric_helper(labels)
+
+    def test_dup_metric_empty_labels(self):
+        # Test that adding a duplicate metric will refer to the same
+        # underlying metric, and all instances will be updated
+        self._dup_metric_helper()
+
+    def test_metric_lifetime_error(self):
+        # Test the error handling when the corresponding 'MetricFamily' is
+        # deleted before the 'Metric' is deleted, and the 'Metric' is still
+        # being used for metric operations
+        kinds = [pb_utils.MetricFamily.COUNTER, pb_utils.MetricFamily.GAUGE]
+        metric_family_names = [
+            "test_metric_lifetime_error_counter",
+            "test_metric_lifetime_error_gauge",
+        ]
+        for kind, name in zip(kinds, metric_family_names):
+            metric_family = pb_utils.MetricFamily(
+                name=name, description="test metric lifetime error", kind=kind
+            )
+            labels = {"example1": "counter_label1", "example2": "counter_label2"}
+            metric = metric_family.Metric(labels=labels)
+
+            # Intentionally delete the 'MetricFamily' before the 'Metric' being deleted
+            del metric_family
+
+            error_msg = "Invalid metric operation as the corresponding 'MetricFamily' has been deleted."
+
+            # Counter does not support set
+            if kind is not pb_utils.MetricFamily.COUNTER:
+                with self.assertRaises(pb_utils.TritonModelException) as ex:
+                    metric.set(10)
+                self.assertIn(error_msg, str(ex.exception))
+
+            with self.assertRaises(pb_utils.TritonModelException) as ex:
+                metric.increment(10)
+            self.assertIn(error_msg, str(ex.exception))
+
+            with self.assertRaises(pb_utils.TritonModelException) as ex:
+                metric.value()
+            self.assertIn(error_msg, str(ex.exception))
+
+
+class TritonPythonModel:
+    def execute(self, requests):
+        responses = []
+        for _ in requests:
+            # Run the unittest and store the results in InferenceResponse.
+            test = unittest.main("model", exit=False)
+            responses.append(
+                pb_utils.InferenceResponse(
+                    [
+                        pb_utils.Tensor(
+                            "OUTPUT0",
+                            np.array([test.result.wasSuccessful()], dtype=np.float16),
+                        )
+                    ]
+                )
+            )
+        return responses
diff --git a/qa/python_models/delayed_model/config.pbtxt b/qa/python_models/delayed_model/config.pbtxt
new file mode 100644
index 0000000000..493a3c8fdb
--- /dev/null
+++ b/qa/python_models/delayed_model/config.pbtxt
@@ -0,0 +1,52 @@
+# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "delayed_model"
+backend: "python"
+max_batch_size: 64
+
+input [
+  {
+    name: "IN"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+output [
+  {
+    name: "OUT"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+instance_group [
+  {
+    count: 1
+    kind : KIND_CPU
+  }
+]
diff --git a/qa/python_models/delayed_model/model.py b/qa/python_models/delayed_model/model.py
new file mode 100644
index 0000000000..e7538148f1
--- /dev/null
+++ b/qa/python_models/delayed_model/model.py
@@ -0,0 +1,45 @@
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import time
+
+import triton_python_backend_utils as pb_utils
+
+# Sleep for 5 seconds to ensure that delayed startup works properly.
+time.sleep(5)
+
+
+class TritonPythonModel:
+    def execute(self, requests):
+        responses = []
+        for request in requests:
+            input_tensor = pb_utils.get_input_tensor_by_name(request, "IN")
+            out_tensor = pb_utils.Tensor("OUT", input_tensor.as_numpy())
+            responses.append(pb_utils.InferenceResponse([out_tensor]))
+        return responses
+
+    def finalize(self):
+        pass
diff --git a/qa/python_models/dlpack_add_sub/config.pbtxt b/qa/python_models/dlpack_add_sub/config.pbtxt
new file mode 100644
index 0000000000..2a2a5f8694
--- /dev/null
+++ b/qa/python_models/dlpack_add_sub/config.pbtxt
@@ -0,0 +1,66 @@
+# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "dlpack_add_sub"
+backend: "python"
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+input [
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+
+instance_group [{kind : KIND_CPU}]
+
+parameters: {
+  key: "FORCE_CPU_ONLY_INPUT_TENSORS"
+  value: {
+    string_value:"no"
+  }
+}
diff --git a/qa/python_models/dlpack_add_sub/model.py b/qa/python_models/dlpack_add_sub/model.py
new file mode 100644
index 0000000000..7f70e05d5c
--- /dev/null
+++ b/qa/python_models/dlpack_add_sub/model.py
@@ -0,0 +1,130 @@
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+
+import numpy as np
+import torch
+import triton_python_backend_utils as pb_utils
+from torch.utils.dlpack import from_dlpack, to_dlpack
+
+
+class TritonPythonModel:
+    def initialize(self, args):
+        self.model_config = model_config = json.loads(args["model_config"])
+
+        output0_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT0")
+        output1_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT1")
+
+        self.output0_dtype = pb_utils.triton_string_to_numpy(
+            output0_config["data_type"]
+        )
+        self.output1_dtype = pb_utils.triton_string_to_numpy(
+            output1_config["data_type"]
+        )
+        self.numpy_to_pytorch_dtype = {
+            np.bool_: torch.bool,
+            np.uint8: torch.uint8,
+            np.int8: torch.int8,
+            np.int16: torch.int16,
+            np.int32: torch.int32,
+            np.int64: torch.int64,
+            np.float16: torch.float16,
+            np.float32: torch.float32,
+            np.float64: torch.float64,
+        }
+
+    def execute(self, requests):
+        output0_dtype = self.output0_dtype
+        output1_dtype = self.output1_dtype
+
+        responses = []
+        for request in requests:
+            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")
+
+            # If both of the tensors are in CPU, use NumPy.
+            if in_0.is_cpu() and in_1.is_cpu():
+                if (
+                    in_0.as_numpy().dtype.type is np.bytes_
+                    or in_0.as_numpy().dtype == np.object_
+                ):
+                    out_0, out_1 = (
+                        in_0.as_numpy().astype(np.int32)
+                        + in_1.as_numpy().astype(np.int32),
+                        in_0.as_numpy().astype(np.int32)
+                        - in_1.as_numpy().astype(np.int32),
+                    )
+                    out_tensor_0 = pb_utils.Tensor(
+                        "OUTPUT0", out_0.astype(output0_dtype)
+                    )
+                    out_tensor_1 = pb_utils.Tensor(
+                        "OUTPUT1", out_1.astype(output1_dtype)
+                    )
+                else:
+                    in_0_pytorch, in_1_pytorch = from_dlpack(
+                        in_0.to_dlpack()
+                    ), from_dlpack(in_1.to_dlpack())
+                    out_0, out_1 = (
+                        in_0_pytorch + in_1_pytorch,
+                        in_0_pytorch - in_1_pytorch,
+                    )
+
+                    if self.output0_dtype == np.object_:
+                        out_tensor_0 = pb_utils.Tensor(
+                            "OUTPUT0", out_0.numpy().astype(output0_dtype)
+                        )
+                    else:
+                        out_0 = out_0.type(self.numpy_to_pytorch_dtype[output0_dtype])
+                        out_tensor_0 = pb_utils.Tensor.from_dlpack(
+                            "OUTPUT0", to_dlpack(out_0)
+                        )
+
+                    if self.output1_dtype == np.object_:
+                        out_tensor_1 = pb_utils.Tensor(
+                            "OUTPUT1", out_1.numpy().astype(output1_dtype)
+                        )
+                    else:
+                        out_1 = out_1.type(self.numpy_to_pytorch_dtype[output1_dtype])
+                        out_tensor_1 = pb_utils.Tensor.from_dlpack(
+                            "OUTPUT1", to_dlpack(out_1)
+                        )
+
+            else:
+                in_0_pytorch, in_1_pytorch = (
+                    from_dlpack(in_0.to_dlpack()).cuda(),
+                    from_dlpack(in_1.to_dlpack()).cuda(),
+                )
+                out_0, out_1 = (
+                    in_0_pytorch + in_1_pytorch,
+                    in_0_pytorch - in_1_pytorch,
+                )
+                out_tensor_0 = pb_utils.Tensor.from_dlpack("OUTPUT0", to_dlpack(out_0))
+                out_tensor_1 = pb_utils.Tensor.from_dlpack("OUTPUT1", to_dlpack(out_1))
+
+            responses.append(pb_utils.InferenceResponse([out_tensor_0, out_tensor_1]))
+
+        return responses
diff --git a/qa/python_models/dlpack_empty_output/config.pbtxt b/qa/python_models/dlpack_empty_output/config.pbtxt
new file mode 100644
index 0000000000..d026db1cd1
--- /dev/null
+++ b/qa/python_models/dlpack_empty_output/config.pbtxt
@@ -0,0 +1,43 @@
+# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "dlpack_empty_output"
+max_batch_size: 8
+
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
diff --git a/qa/python_models/dlpack_empty_output/model.py b/qa/python_models/dlpack_empty_output/model.py
new file mode 100644
index 0000000000..7784e28b4d
--- /dev/null
+++ b/qa/python_models/dlpack_empty_output/model.py
@@ -0,0 +1,53 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import torch
+import triton_python_backend_utils as pb_utils
+from torch.utils.dlpack import to_dlpack
+
+
+class TritonPythonModel:
+    def initialize(self, args):
+        pass
+
+    def execute(self, requests):
+        responses = []
+
+        for _ in requests:
+            SHAPE = (0,)
+
+            pytorch_tensor = torch.ones(SHAPE, dtype=torch.float32)
+
+            device = torch.device("cuda:0")
+            pytorch_tensor = pytorch_tensor.to(device)
+
+            dlpack_tensor = to_dlpack(pytorch_tensor)
+            pb_tensor = pb_utils.Tensor.from_dlpack("OUTPUT", dlpack_tensor)
+
+            inference_response = pb_utils.InferenceResponse(output_tensors=[pb_tensor])
+            responses.append(inference_response)
+
+        return responses
diff --git a/qa/python_models/dlpack_identity/config.pbtxt b/qa/python_models/dlpack_identity/config.pbtxt
new file mode 100644
index 0000000000..292d0df85b
--- /dev/null
+++ b/qa/python_models/dlpack_identity/config.pbtxt
@@ -0,0 +1,59 @@
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "dlpack_identity"
+backend: "python"
+max_batch_size: 64
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+instance_group [
+  {
+    count: 1
+    kind : KIND_CPU
+  }
+]
+
+parameters: {
+  key: "FORCE_CPU_ONLY_INPUT_TENSORS"
+  value: {
+    string_value:"no"
+  }
+}
diff --git a/qa/python_models/dlpack_identity/model.py b/qa/python_models/dlpack_identity/model.py
new file mode 100644
index 0000000000..1bd0748df9
--- /dev/null
+++ b/qa/python_models/dlpack_identity/model.py
@@ -0,0 +1,42 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    def execute(self, requests):
+        """Identity model in Python backend that works with GPU and CPU
+        tensors."""
+
+        responses = []
+        for request in requests:
+            input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+            out_tensor = pb_utils.Tensor.from_dlpack(
+                "OUTPUT0", input_tensor.to_dlpack()
+            )
+            responses.append(pb_utils.InferenceResponse([out_tensor]))
+        return responses
diff --git a/qa/python_models/dlpack_io_identity/config.pbtxt b/qa/python_models/dlpack_io_identity/config.pbtxt
new file mode 100644
index 0000000000..c79595f64a
--- /dev/null
+++ b/qa/python_models/dlpack_io_identity/config.pbtxt
@@ -0,0 +1,67 @@
+# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "dlpack_io_identity"
+backend: "python"
+max_batch_size: 0
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+input [
+  {
+    name: "GPU_OUTPUT"
+    data_type: TYPE_BOOL
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "NEXT_GPU_OUTPUT"
+    data_type: TYPE_BOOL
+    dims: [ -1 ]
+  }
+]
+
+instance_group [{kind : KIND_CPU}]
+
+parameters: {
+  key: "FORCE_CPU_ONLY_INPUT_TENSORS"
+  value: {
+    string_value:"no"
+  }
+}
diff --git a/qa/python_models/dlpack_io_identity/model.py b/qa/python_models/dlpack_io_identity/model.py
new file mode 100644
index 0000000000..225d026992
--- /dev/null
+++ b/qa/python_models/dlpack_io_identity/model.py
@@ -0,0 +1,108 @@
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import numpy as np
+import triton_python_backend_utils as pb_utils
+from torch.utils.dlpack import from_dlpack, to_dlpack
+
+
+class TritonPythonModel:
+    """
+    This Python identity model passes the DLPack tensors as is. "OUTPUT_IS_GPU"
+    input controls whether the model should put the output in GPU or in CPU.
+    """
+
+    def initialize(self, args):
+        self._model_name = args["model_name"]
+
+    def execute(self, requests):
+        responses = []
+        for request in requests:
+            input0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+            gpu_output = pb_utils.get_input_tensor_by_name(
+                request, "GPU_OUTPUT"
+            ).as_numpy()
+
+            if input0.is_cpu():
+                if not gpu_output[0]:
+                    output0 = pb_utils.Tensor.from_dlpack("OUTPUT0", input0.to_dlpack())
+                else:
+                    outptu0_pytorch = from_dlpack(input0.to_dlpack()).cuda()
+                    output0 = pb_utils.Tensor.from_dlpack(
+                        "OUTPUT0", to_dlpack(outptu0_pytorch)
+                    )
+            else:
+                if gpu_output[0]:
+                    output0 = pb_utils.Tensor.from_dlpack("OUTPUT0", input0.to_dlpack())
+                else:
+                    outptu0_pytorch = from_dlpack(input0.to_dlpack()).cpu()
+                    output0 = pb_utils.Tensor.from_dlpack(
+                        "OUTPUT0", to_dlpack(outptu0_pytorch)
+                    )
+
+            next_gpu_output = pb_utils.Tensor("NEXT_GPU_OUTPUT", gpu_output[1:])
+
+            # Do not perform BLS inference if it is the first
+            # model in the pipeline.
+            if self._model_name != "dlpack_io_identity_1":
+                infer_request = pb_utils.InferenceRequest(
+                    model_name="dlpack_io_identity_1",
+                    inputs=[
+                        input0,
+                        pb_utils.get_input_tensor_by_name(request, "GPU_OUTPUT"),
+                    ],
+                    requested_output_names=["OUTPUT0"],
+                )
+                infer_response = infer_request.exec()
+
+                if infer_response.has_error():
+                    raise pb_utils.TritonModelException(
+                        infer_response.error().message()
+                    )
+
+                bls_output0 = pb_utils.get_output_tensor_by_name(
+                    infer_response, "OUTPUT0"
+                )
+                if not output0.is_cpu():
+                    bls_output0 = (
+                        from_dlpack(bls_output0.to_dlpack()).detach().cpu().numpy()
+                    )
+                else:
+                    bls_output0 = bls_output0.as_numpy()
+
+                if not input0.is_cpu():
+                    input0 = from_dlpack(input0.to_dlpack()).detach().cpu().numpy()
+                else:
+                    input0 = input0.as_numpy()
+
+                if not np.allclose(bls_output0, input0):
+                    raise pb_utils.TritonModelException(
+                        "BLS input and output tensors are not equal"
+                    )
+
+            responses.append(pb_utils.InferenceResponse([output0, next_gpu_output]))
+
+        return responses
diff --git a/qa/python_models/dlpack_io_identity_decoupled/config.pbtxt b/qa/python_models/dlpack_io_identity_decoupled/config.pbtxt
new file mode 100644
index 0000000000..6bf39810a5
--- /dev/null
+++ b/qa/python_models/dlpack_io_identity_decoupled/config.pbtxt
@@ -0,0 +1,71 @@
+# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "dlpack_io_identity_decoupled"
+backend: "python"
+max_batch_size: 0
+
+model_transaction_policy {
+  decoupled: True
+}
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+input [
+  {
+    name: "GPU_OUTPUT"
+    data_type: TYPE_BOOL
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "NEXT_GPU_OUTPUT"
+    data_type: TYPE_BOOL
+    dims: [ -1 ]
+  }
+]
+
+instance_group [{kind : KIND_CPU}]
+
+parameters: {
+  key: "FORCE_CPU_ONLY_INPUT_TENSORS"
+  value: {
+    string_value:"no"
+  }
+}
diff --git a/qa/python_models/dlpack_io_identity_decoupled/model.py b/qa/python_models/dlpack_io_identity_decoupled/model.py
new file mode 100644
index 0000000000..5f4e597df8
--- /dev/null
+++ b/qa/python_models/dlpack_io_identity_decoupled/model.py
@@ -0,0 +1,113 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import threading
+import time
+
+import triton_python_backend_utils as pb_utils
+from torch.utils.dlpack import from_dlpack, to_dlpack
+
+
+class TritonPythonModel:
+    """
+    This Python identity model passes the DLPack tensors as is. "OUTPUT_IS_GPU"
+    input controls whether the model should put the output in GPU or in CPU.
+    """
+
+    def initialize(self, args):
+        self._model_name = args["model_name"]
+        self.inflight_thread_count = 0
+        self.inflight_thread_count_lck = threading.Lock()
+
+    def response_thread(self, response_sender, input0, gpu_output):
+        # Sleep 5 seconds to make sure the main thread has exited.
+        time.sleep(5)
+
+        if input0.is_cpu():
+            if not gpu_output[0]:
+                output0 = pb_utils.Tensor.from_dlpack("OUTPUT0", input0.to_dlpack())
+            else:
+                outptu0_pytorch = from_dlpack(input0.to_dlpack()).cuda()
+                output0 = pb_utils.Tensor.from_dlpack(
+                    "OUTPUT0", to_dlpack(outptu0_pytorch)
+                )
+        else:
+            if gpu_output[0]:
+                output0 = pb_utils.Tensor.from_dlpack("OUTPUT0", input0.to_dlpack())
+            else:
+                output0_pytorch = from_dlpack(input0.to_dlpack()).cpu()
+                output0 = pb_utils.Tensor.from_dlpack(
+                    "OUTPUT0", to_dlpack(output0_pytorch)
+                )
+
+        next_gpu_output = pb_utils.Tensor("NEXT_GPU_OUTPUT", gpu_output[1:])
+        infer_response = pb_utils.InferenceResponse([output0, next_gpu_output])
+
+        # Number of times to repeat the response
+        response_repeat = 2
+        for _ in range(response_repeat):
+            response_sender.send(infer_response)
+
+        response_sender.send(flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL)
+
+        with self.inflight_thread_count_lck:
+            self.inflight_thread_count -= 1
+
+    def execute(self, requests):
+        for request in requests:
+            input0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+            gpu_output = pb_utils.get_input_tensor_by_name(
+                request, "GPU_OUTPUT"
+            ).as_numpy()
+
+            thread = threading.Thread(
+                target=self.response_thread,
+                args=(request.get_response_sender(), input0, gpu_output),
+            )
+
+            thread.daemon = True
+
+            with self.inflight_thread_count_lck:
+                self.inflight_thread_count += 1
+
+            thread.start()
+
+    def finalize(self):
+        inflight_threads = True
+        cycles = 0
+        logging_time_sec = 5
+        sleep_time_sec = 0.1
+        cycle_to_log = logging_time_sec / sleep_time_sec
+        while inflight_threads:
+            with self.inflight_thread_count_lck:
+                inflight_threads = self.inflight_thread_count != 0
+                if cycles % cycle_to_log == 0:
+                    print(
+                        f"Waiting for {self.inflight_thread_count} response threads to complete..."
+                    )
+            if inflight_threads:
+                time.sleep(sleep_time_sec)
+                cycles += 1
diff --git a/qa/python_models/dlpack_square/config.pbtxt b/qa/python_models/dlpack_square/config.pbtxt
new file mode 100644
index 0000000000..15cf6b7fd2
--- /dev/null
+++ b/qa/python_models/dlpack_square/config.pbtxt
@@ -0,0 +1,48 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "dlpack_square"
+backend: "python"
+max_batch_size: 0
+model_transaction_policy {
+  decoupled: True
+}
+input [
+  {
+    name: "IN"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+instance_group [{ kind: KIND_CPU }]
+
diff --git a/qa/python_models/dlpack_square/model.py b/qa/python_models/dlpack_square/model.py
new file mode 100644
index 0000000000..b31531461e
--- /dev/null
+++ b/qa/python_models/dlpack_square/model.py
@@ -0,0 +1,139 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+import threading
+
+import numpy as np
+import torch
+
+# triton_python_backend_utils is available in every Triton Python model. You
+# need to use this module to create inference requests and responses. It also
+# contains some utility functions for extracting information from model_config
+# and converting Triton input/output types to numpy types.
+import triton_python_backend_utils as pb_utils
+from torch.utils.dlpack import from_dlpack, to_dlpack
+
+numpy_to_pytorch_dtype = {
+    np.bool_: torch.bool,
+    np.uint8: torch.uint8,
+    np.int8: torch.int8,
+    np.int16: torch.int16,
+    np.int32: torch.int32,
+    np.int64: torch.int64,
+    np.float16: torch.float16,
+    np.float32: torch.float32,
+    np.float64: torch.float64,
+}
+
+
+class TritonPythonModel:
+    def initialize(self, args):
+        self.model_config = model_config = json.loads(args["model_config"])
+
+        output_config = pb_utils.get_output_config_by_name(model_config, "OUT")
+        self.output_dtype = pb_utils.triton_string_to_numpy(output_config["data_type"])
+
+        using_decoupled = pb_utils.using_decoupled_model_transaction_policy(
+            model_config
+        )
+        if not using_decoupled:
+            raise pb_utils.TritonModelException(
+                """the model `{}` can generate any number of responses per request,
+                enable decoupled transaction policy in model configuration to
+                serve this model""".format(
+                    args["model_name"]
+                )
+            )
+
+        self.inflight_thread_count = 0
+        self.inflight_thread_count_lck = threading.Lock()
+
+    def execute(self, requests):
+        for request in requests:
+            self.process_request(request)
+
+        return None
+
+    def process_request(self, request):
+        # Start a separate thread to send the responses for the request. The
+        # sending back the responses is delegated to this thread.
+        thread = threading.Thread(
+            target=self.response_thread,
+            args=(
+                request.get_response_sender(),
+                pb_utils.get_input_tensor_by_name(request, "IN"),
+                self.output_dtype,
+            ),
+        )
+
+        thread.daemon = True
+
+        with self.inflight_thread_count_lck:
+            self.inflight_thread_count += 1
+
+        thread.start()
+
+    def response_thread(self, response_sender, in_input, output_dtype):
+        # The response_sender is used to send response(s) associated with the
+        # corresponding request.
+
+        for idx in range(in_input.as_numpy()[0]):
+            if in_input.is_cpu():
+                if (
+                    in_input.as_numpy().dtype.type is np.bytes_
+                    or in_input.as_numpy().dtype == np.object_
+                ):
+                    out_0 = in_input.as_numpy().astype(np.int32)
+                    out_tensor = pb_utils.Tensor("OUT", out_0.astype(output_dtype))
+                else:
+                    in_0_pytorch = from_dlpack(in_input.to_dlpack())
+                    out_0 = in_0_pytorch
+                    if output_dtype == np.object_:
+                        out_tensor = pb_utils.Tensor(
+                            "OUT", out_0.numpy().astype(output_dtype)
+                        )
+                    else:
+                        out_0 = out_0.type(numpy_to_pytorch_dtype[output_dtype])
+                        out_tensor = pb_utils.Tensor.from_dlpack(
+                            "OUT", to_dlpack(out_0)
+                        )
+            else:
+                in_0_pytorch = from_dlpack(in_input.to_dlpack()).cuda()
+                out_0 = in_0_pytorch
+                out_tensor = pb_utils.Tensor.from_dlpack("OUTPUT0", to_dlpack(out_0))
+
+            response = pb_utils.InferenceResponse(output_tensors=[out_tensor])
+            response_sender.send(response)
+
+        # We must close the response sender to indicate to Triton that we are
+        # done sending responses for the corresponding request. We can't use the
+        # response sender after closing it. The response sender is closed by
+        # setting the TRITONSERVER_RESPONSE_COMPLETE_FINAL.
+        response_sender.send(flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL)
+
+        with self.inflight_thread_count_lck:
+            self.inflight_thread_count -= 1
diff --git a/qa/python_models/dlpack_sub_add/config.pbtxt b/qa/python_models/dlpack_sub_add/config.pbtxt
new file mode 100644
index 0000000000..c9614476c1
--- /dev/null
+++ b/qa/python_models/dlpack_sub_add/config.pbtxt
@@ -0,0 +1,66 @@
+# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "dlpack_sub_add"
+backend: "python"
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+input [
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+
+instance_group [{kind : KIND_CPU}]
+
+parameters: {
+  key: "FORCE_CPU_ONLY_INPUT_TENSORS"
+  value: {
+    string_value:"no"
+  }
+}
diff --git a/qa/python_models/dlpack_sub_add/model.py b/qa/python_models/dlpack_sub_add/model.py
new file mode 100644
index 0000000000..16caafcea2
--- /dev/null
+++ b/qa/python_models/dlpack_sub_add/model.py
@@ -0,0 +1,130 @@
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+
+import numpy as np
+import torch
+import triton_python_backend_utils as pb_utils
+from torch.utils.dlpack import from_dlpack, to_dlpack
+
+
+class TritonPythonModel:
+    def initialize(self, args):
+        self.model_config = model_config = json.loads(args["model_config"])
+
+        output0_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT0")
+        output1_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT1")
+
+        self.output0_dtype = pb_utils.triton_string_to_numpy(
+            output0_config["data_type"]
+        )
+        self.output1_dtype = pb_utils.triton_string_to_numpy(
+            output1_config["data_type"]
+        )
+        self.numpy_to_pytorch_dtype = {
+            np.bool_: torch.bool,
+            np.uint8: torch.uint8,
+            np.int8: torch.int8,
+            np.int16: torch.int16,
+            np.int32: torch.int32,
+            np.int64: torch.int64,
+            np.float16: torch.float16,
+            np.float32: torch.float32,
+            np.float64: torch.float64,
+        }
+
+    def execute(self, requests):
+        output0_dtype = self.output0_dtype
+        output1_dtype = self.output1_dtype
+
+        responses = []
+        for request in requests:
+            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")
+
+            # If both of the tensors are in CPU, use NumPy.
+            if in_0.is_cpu() and in_1.is_cpu():
+                if (
+                    in_0.as_numpy().dtype.type is np.bytes_
+                    or in_0.as_numpy().dtype == np.object_
+                ):
+                    out_0, out_1 = (
+                        in_0.as_numpy().astype(np.int32)
+                        - in_1.as_numpy().astype(np.int32),
+                        in_0.as_numpy().astype(np.int32)
+                        + in_1.as_numpy().astype(np.int32),
+                    )
+                    out_tensor_0 = pb_utils.Tensor(
+                        "OUTPUT0", out_0.astype(output0_dtype)
+                    )
+                    out_tensor_1 = pb_utils.Tensor(
+                        "OUTPUT1", out_1.astype(output1_dtype)
+                    )
+                else:
+                    in_0_pytorch, in_1_pytorch = from_dlpack(
+                        in_0.to_dlpack()
+                    ), from_dlpack(in_1.to_dlpack())
+                    out_0, out_1 = (
+                        in_0_pytorch - in_1_pytorch,
+                        in_0_pytorch + in_1_pytorch,
+                    )
+
+                    if self.output0_dtype == np.object_:
+                        out_tensor_0 = pb_utils.Tensor(
+                            "OUTPUT0", out_0.numpy().astype(output0_dtype)
+                        )
+                    else:
+                        out_0 = out_0.type(self.numpy_to_pytorch_dtype[output0_dtype])
+                        out_tensor_0 = pb_utils.Tensor.from_dlpack(
+                            "OUTPUT0", to_dlpack(out_0)
+                        )
+
+                    if self.output1_dtype == np.object_:
+                        out_tensor_1 = pb_utils.Tensor(
+                            "OUTPUT1", out_1.numpy().astype(output1_dtype)
+                        )
+                    else:
+                        out_1 = out_1.type(self.numpy_to_pytorch_dtype[output1_dtype])
+                        out_tensor_1 = pb_utils.Tensor.from_dlpack(
+                            "OUTPUT1", to_dlpack(out_1)
+                        )
+
+            else:
+                in_0_pytorch, in_1_pytorch = (
+                    from_dlpack(in_0.to_dlpack()).cuda(),
+                    from_dlpack(in_1.to_dlpack()).cuda(),
+                )
+                out_0, out_1 = (
+                    in_0_pytorch - in_1_pytorch,
+                    in_0_pytorch + in_1_pytorch,
+                )
+                out_tensor_0 = pb_utils.Tensor.from_dlpack("OUTPUT0", to_dlpack(out_0))
+                out_tensor_1 = pb_utils.Tensor.from_dlpack("OUTPUT1", to_dlpack(out_1))
+
+            responses.append(pb_utils.InferenceResponse([out_tensor_0, out_tensor_1]))
+
+        return responses
diff --git a/qa/python_models/dlpack_test/config.pbtxt b/qa/python_models/dlpack_test/config.pbtxt
new file mode 100644
index 0000000000..930c71ff74
--- /dev/null
+++ b/qa/python_models/dlpack_test/config.pbtxt
@@ -0,0 +1,44 @@
+# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "dlpack_test"
+backend: "python"
+max_batch_size: 0
+
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+instance_group [
+  {
+    count: 1
+    kind : KIND_CPU
+  }
+]
diff --git a/qa/python_models/dlpack_test/model.py b/qa/python_models/dlpack_test/model.py
new file mode 100644
index 0000000000..64bc7d6692
--- /dev/null
+++ b/qa/python_models/dlpack_test/model.py
@@ -0,0 +1,343 @@
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import unittest
+
+import cupy as cp
+import numpy as np
+import torch
+import triton_python_backend_utils as pb_utils
+from torch.utils.dlpack import from_dlpack, to_dlpack
+
+
+class PBTensorTest(unittest.TestCase):
+    def test_pytorch_dlpack(self):
+        # Test different dtypes
+        pytorch_dtypes = [
+            torch.float16,
+            torch.float32,
+            torch.float64,
+            torch.int8,
+            torch.int16,
+            torch.int32,
+            torch.int64,
+            torch.uint8,
+        ]
+
+        for pytorch_dtype in pytorch_dtypes:
+            pytorch_tensor = torch.ones([100], dtype=pytorch_dtype)
+            dlpack_tensor = to_dlpack(pytorch_tensor)
+            pb_tensor = pb_utils.Tensor.from_dlpack("test_tensor", dlpack_tensor)
+            self.assertTrue(
+                np.array_equal(pb_tensor.as_numpy(), pytorch_tensor.numpy())
+            )
+
+            # Convert the tensor back to DLPack and ensure that both tensors are
+            # the same
+            pytorch_tensor_dlpack = from_dlpack(pb_tensor.to_dlpack())
+            self.assertTrue(torch.equal(pytorch_tensor_dlpack, pytorch_tensor))
+
+            self.assertEqual(pytorch_tensor.type(), pytorch_tensor_dlpack.type())
+
+            # Now let's check that upgraded DLPack implementation also
+            # works as expected, i.e. from_dlpack should work with
+            # external pytorch tensor directly
+
+            pb_tensor_upgraded = pb_utils.Tensor.from_dlpack(
+                "test_tensor", pytorch_tensor
+            )
+            self.assertTrue(
+                np.array_equal(pb_tensor_upgraded.as_numpy(), pytorch_tensor.numpy())
+            )
+
+            # Here we check that `pb_tensor` as a producer, properly
+            # invokes `__dlpack__` and `__dlpack_device__`
+            pytorch_tensor_dlpack = from_dlpack(pb_tensor_upgraded)
+            self.assertTrue(torch.equal(pytorch_tensor_dlpack, pytorch_tensor))
+
+            self.assertEqual(pytorch_tensor.type(), pytorch_tensor_dlpack.type())
+
+    def test_non_contiguous_error(self):
+        pytorch_tensor = torch.rand([20, 30], dtype=torch.float16)
+
+        # Transposing a PyTorch tensor leads to a non contiguous tensor.
+        pytorch_tensor = torch.transpose(pytorch_tensor, 0, 1)
+
+        with self.assertRaises(Exception) as e:
+            pb_utils.Tensor.from_dlpack("test_tensor", to_dlpack(pytorch_tensor))
+        self.assertTrue(
+            str(e.exception)
+            == "DLPack tensor is not contiguous. Only contiguous DLPack tensors that are stored in C-Order are supported."
+        )
+
+    def test_dlpack_string_tensor(self):
+        np_object = np.array(["An Example String"], dtype=np.object_)
+        pb_tensor = pb_utils.Tensor("test_tensor", np_object)
+
+        with self.assertRaises(Exception) as e:
+            pb_tensor.to_dlpack()
+
+        self.assertTrue(
+            str(e.exception) == "DLPack does not have support for string tensors."
+        )
+
+    def test_dlpack_gpu_tensors(self):
+        # Test different dtypes
+        # PyTorch does not support DLPack bool type yet:
+        # https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/DLConvertor.cpp
+        pytorch_dtypes = [
+            torch.float16,
+            torch.float32,
+            torch.float64,
+            torch.int8,
+            torch.int16,
+            torch.int32,
+            torch.int64,
+            torch.uint8,
+        ]
+
+        for pytorch_dtype in pytorch_dtypes:
+            pytorch_tensor = torch.ones([100], dtype=pytorch_dtype, device="cuda")
+            dlpack_tensor = to_dlpack(pytorch_tensor)
+            pb_tensor = pb_utils.Tensor.from_dlpack("test_tensor", dlpack_tensor)
+
+            # Convert the tensor back to DLPack and ensure that both tensors are
+            # the same
+            pytorch_tensor_dlpack = from_dlpack(pb_tensor.to_dlpack())
+            self.assertTrue(torch.equal(pytorch_tensor_dlpack, pytorch_tensor))
+            self.assertEqual(pytorch_tensor.type(), pytorch_tensor_dlpack.type())
+
+            # Now we make sure that updated DLPack implementation works
+            # with GPU as well
+            pb_tensor = pb_utils.Tensor.from_dlpack("test_tensor", pytorch_tensor)
+            pytorch_tensor_dlpack = from_dlpack(pb_tensor)
+            self.assertTrue(torch.equal(pytorch_tensor_dlpack, pytorch_tensor))
+            self.assertEqual(pytorch_tensor.type(), pytorch_tensor_dlpack.type())
+
+    def test_dlpack_gpu_numpy(self):
+        # DLPack tesnors that are in GPU cannot be converted to NumPy
+        pytorch_tensor = torch.rand([100], dtype=torch.float16, device="cuda") * 100
+        pb_tensor = pb_utils.Tensor.from_dlpack("tensor", to_dlpack(pytorch_tensor))
+        # Make sure that `__dlpack_device__` works as expected
+        self.assertFalse(pb_tensor.is_cpu())
+        self.assertTrue(pytorch_tensor.is_cuda)
+        self.assertEqual(
+            pb_tensor.__dlpack_device__(), pytorch_tensor.__dlpack_device__()
+        )
+
+        with self.assertRaises(Exception) as e:
+            pb_tensor.as_numpy()
+        self.assertTrue(
+            str(e.exception)
+            == "Tensor is stored in GPU and cannot be converted to NumPy."
+        )
+
+    def test_dlpack_cpu_numpy(self):
+        # Check compatibiity of PbTensor DLPack implementation
+        # with numpy
+        pytorch_tensor = torch.rand([100], dtype=torch.float16, device="cpu") * 100
+        pb_tensor = pb_utils.Tensor.from_dlpack("tensor", pytorch_tensor)
+        numpy_tensor_dlpack = np.from_dlpack(pb_tensor)
+        self.assertTrue(np.array_equal(numpy_tensor_dlpack, pytorch_tensor.numpy()))
+        # Make sure that `__dlpack_device__` works as expected
+        self.assertTrue(pb_tensor.is_cpu())
+        self.assertFalse(pytorch_tensor.is_cuda)
+        self.assertEqual(
+            pb_tensor.__dlpack_device__(), pytorch_tensor.__dlpack_device__()
+        )
+
+    def test_bool_datatype(self):
+        # [FIXME] pass bool_array directly to `pb_utils.Tensor.from_dlpack`,
+        # when numpy release supports DLPack bool type
+        bool_array = np.asarray([False, True])
+        bool_tensor = pb_utils.Tensor("tensor", bool_array)
+        bool_tensor_dlpack = pb_utils.Tensor.from_dlpack("tensor", bool_tensor)
+        self.assertTrue(np.array_equal(bool_array, bool_tensor_dlpack.as_numpy()))
+
+    def test_cuda_multi_stream(self):
+        # Test that external stream syncs with the default
+        # and pb_tensor has proper data
+        size = 5000
+        pytorch_tensor_1 = torch.tensor([0, 0, 0, 0], device="cuda")
+        pytorch_tensor_2 = torch.tensor([0, 0, 0, 0], device="cuda")
+        expected_output = torch.tensor([2, 2, 2, 2], device="cuda")
+        s1 = torch.cuda.Stream()
+        with torch.cuda.stream(s1):
+            matrix_a = torch.randn(size, size, device="cuda")
+            res = torch.matmul(matrix_a, matrix_a)
+            for _ in range(1000):
+                res = torch.matmul(res, matrix_a)
+            pytorch_tensor_1 += torch.tensor([2, 2, 2, 2], device="cuda")
+            pytorch_tensor_2 += torch.tensor([2, 2, 2, 2], device="cuda")
+
+        pb_tensor_1 = pb_utils.Tensor.from_dlpack("tensor", pytorch_tensor_1)
+        pb_tensor_2 = pb_utils.Tensor.from_dlpack("tensor", to_dlpack(pytorch_tensor_2))
+        pytorch_tensor_dlpack = from_dlpack(pb_tensor_1)
+        self.assertTrue(torch.equal(pytorch_tensor_dlpack, expected_output))
+        pytorch_tensor_dlpack = from_dlpack(pb_tensor_2)
+        self.assertTrue(torch.equal(pytorch_tensor_dlpack, expected_output))
+
+    def test_cuda_non_blocking_multi_stream(self):
+        # Test that external non-blocking stream syncs with the default stream
+        # and pb_tensor has proper data
+        size = 5000
+        cupy_tensor = cp.array([0, 0, 0, 0])
+        expected_output = cp.array([2, 2, 2, 2])
+        non_blocking_stream = cp.cuda.Stream(non_blocking=True)
+        with non_blocking_stream:
+            matrix_a = cp.random.rand(size, size)
+            res = cp.matmul(matrix_a, matrix_a)
+            for _ in range(1000):
+                res = cp.matmul(res, matrix_a)
+            cupy_tensor += cp.array([2, 2, 2, 2])
+
+        pb_tensor = pb_utils.Tensor.from_dlpack("tensor", cupy_tensor)
+        # Verify that non-blocking stream has no pending jobs left
+        self.assertTrue(non_blocking_stream.done)
+        cupy_tensor_dlpack = cp.from_dlpack(pb_tensor)
+        self.assertTrue(cp.array_equal(cupy_tensor_dlpack, expected_output))
+        self.assertFalse(pb_tensor.is_cpu())
+        self.assertEqual(pb_tensor.__dlpack_device__(), cupy_tensor.__dlpack_device__())
+
+    def test_cuda_multi_gpu(self):
+        # Test that when `pb_utils.Tensor.from_dlpack` is called on different
+        # GPU from where external tensor is stored, we receive a pointer
+        # and all pending work on different GPU's default stream
+        # on external tensor is done
+        size = 5000
+        # DLDeviceType::kDLCUDA, device_id 1
+        expected_dlpack_device = (2, 1)
+        with cp.cuda.Device(1):
+            expected_output = cp.array([2, 2, 2, 2])
+            cupy_tensor = cp.array([0, 0, 0, 0])
+            matrix_a = cp.random.rand(size, size)
+            res = cp.matmul(matrix_a, matrix_a)
+            for _ in range(1000):
+                res = cp.matmul(res, matrix_a)
+            cupy_tensor += cp.array([2, 2, 2, 2])
+        with cp.cuda.Device(0):
+            pb_tensor = pb_utils.Tensor.from_dlpack("tensor", cupy_tensor)
+            with cp.cuda.Device(1):
+                # To make sure that the default stream is done with
+                # all compute work
+                self.assertTrue(cp.cuda.Stream(null=True).done)
+            cupy_tensor_dlpack = cp.from_dlpack(pb_tensor)
+
+        with cp.cuda.Device(1):
+            self.assertTrue(cp.array_equal(cupy_tensor_dlpack, expected_output))
+
+        self.assertFalse(pb_tensor.is_cpu())
+        self.assertEqual(pb_tensor.__dlpack_device__(), expected_dlpack_device)
+        self.assertEqual(pb_tensor.__dlpack_device__(), cupy_tensor.__dlpack_device__())
+
+    def test_cuda_blocking_stream_multi_gpu(self):
+        # Test that when `pb_utils.Tensor.from_dlpack` is called on different
+        # GPU from where external tensor is stored, we receive a pointer
+        # and all pending work on different GPU's a blocking stream
+        # on external tensor is done
+        size = 5000
+        # DLDeviceType::kDLCUDA, device_id 1
+        expected_dlpack_device = (2, 1)
+        with cp.cuda.Device(1):
+            expected_output = cp.array([2, 2, 2, 2])
+            blocking_stream = cp.cuda.Stream(non_blocking=False)
+            with blocking_stream:
+                cupy_tensor = cp.array([0, 0, 0, 0])
+                matrix_a = cp.random.rand(size, size)
+                res = cp.matmul(matrix_a, matrix_a)
+                for _ in range(1000):
+                    res = cp.matmul(res, matrix_a)
+                cupy_tensor += cp.array([2, 2, 2, 2])
+        with cp.cuda.Device(0):
+            pb_tensor = pb_utils.Tensor.from_dlpack("tensor", cupy_tensor)
+            with cp.cuda.Device(1):
+                # To make sure that blocking stream is done with
+                # all compute work
+                self.assertTrue(blocking_stream.done)
+            cupy_tensor_dlpack = cp.from_dlpack(pb_tensor)
+
+        with cp.cuda.Device(1):
+            self.assertTrue(cp.array_equal(cupy_tensor_dlpack, expected_output))
+
+        self.assertFalse(pb_tensor.is_cpu())
+        self.assertEqual(pb_tensor.__dlpack_device__(), expected_dlpack_device)
+        self.assertEqual(pb_tensor.__dlpack_device__(), cupy_tensor.__dlpack_device__())
+
+    def test_cuda_non_blocking_stream_multi_gpu(self):
+        # Test that when `pb_utils.Tensor.from_dlpack` is called on different
+        # GPU from where external tensor is stored, we receive a pointer
+        # and all pending work on different GPU's non-blocking stream
+        # on external tensor is done.
+        # This test seems to be affected by `test_cuda_multi_gpu`
+        # and `test_cuda_blocking_stream_multi_gpu` if GPUs 0 and 1 are used.
+        # Thus for this test, we use GPUs 0 and 2
+        # JIRA: DLIS-4887
+        size = 5000
+        #  DLDeviceType::kDLCUDA, device_id 1
+        expected_dlpack_device = (2, 2)
+        with cp.cuda.Device(2):
+            expected_output = cp.array([2, 2, 2, 2])
+            non_blocking_stream = cp.cuda.Stream(non_blocking=True)
+            with non_blocking_stream:
+                cupy_tensor = cp.array([0, 0, 0, 0])
+                matrix_a = cp.random.rand(size, size)
+                res = cp.matmul(matrix_a, matrix_a)
+                for _ in range(1000):
+                    res = cp.matmul(res, matrix_a)
+                cupy_tensor += cp.array([2, 2, 2, 2])
+        with cp.cuda.Device(0):
+            pb_tensor = pb_utils.Tensor.from_dlpack("tensor", cupy_tensor)
+            with cp.cuda.Device(2):
+                # To make sure that non_blocking stream is done with
+                # all compute work
+                self.assertTrue(non_blocking_stream.done)
+            cupy_tensor_dlpack = cp.from_dlpack(pb_tensor)
+
+        with cp.cuda.Device(2):
+            self.assertTrue(cp.array_equal(cupy_tensor_dlpack, expected_output))
+
+        self.assertFalse(pb_tensor.is_cpu())
+        self.assertEqual(pb_tensor.__dlpack_device__(), expected_dlpack_device)
+        self.assertEqual(pb_tensor.__dlpack_device__(), cupy_tensor.__dlpack_device__())
+
+
+class TritonPythonModel:
+    def execute(self, requests):
+        responses = []
+        for _ in requests:
+            # Run the unittest and store the results in InferenceResponse.
+            test = unittest.main("model", exit=False)
+            responses.append(
+                pb_utils.InferenceResponse(
+                    [
+                        pb_utils.Tensor(
+                            "OUTPUT0",
+                            np.array([test.result.wasSuccessful()], dtype=np.float16),
+                        )
+                    ]
+                )
+            )
+        return responses
diff --git a/qa/python_models/ensemble/config.pbtxt b/qa/python_models/ensemble/config.pbtxt
new file mode 100644
index 0000000000..680364fac0
--- /dev/null
+++ b/qa/python_models/ensemble/config.pbtxt
@@ -0,0 +1,98 @@
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "ensemble"
+platform: "ensemble"
+max_batch_size: 0
+
+input [
+   {
+      name: "INPUT0"
+      data_type: TYPE_FP32
+      dims: [ 16 ]
+   },
+   {
+      name: "INPUT1"
+      data_type: TYPE_FP32
+      dims: [ 16 ]
+   }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+ensemble_scheduling {
+  step [
+    {
+      model_name: "add_sub_1"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "output_0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "output_1"
+      }
+    },
+    {
+      model_name: "add_sub_2"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "output_0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "output_1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "OUTPUT0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "OUTPUT1"
+      }
+    }
+  ]
+}
diff --git a/qa/python_models/ensemble_gpu/config.pbtxt b/qa/python_models/ensemble_gpu/config.pbtxt
new file mode 100644
index 0000000000..e6c7ebf298
--- /dev/null
+++ b/qa/python_models/ensemble_gpu/config.pbtxt
@@ -0,0 +1,98 @@
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "ensemble_gpu"
+platform: "ensemble"
+max_batch_size: 0
+
+input [
+   {
+      name: "INPUT0"
+      data_type: TYPE_FP32
+      dims: [ 16 ]
+   },
+   {
+      name: "INPUT1"
+      data_type: TYPE_FP32
+      dims: [ 16 ]
+   }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+ensemble_scheduling {
+  step [
+    {
+      model_name: "add_sub_1"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "output_0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "output_1"
+      }
+    },
+    {
+      model_name: "libtorch_float32_float32_float32"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "output_0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "output_1"
+      }
+      output_map {
+        key: "OUTPUT__0"
+        value: "OUTPUT0"
+      }
+      output_map {
+        key: "OUTPUT__1"
+        value: "OUTPUT1"
+      }
+    }
+  ]
+}
diff --git a/qa/python_models/ensemble_io/config.pbtxt b/qa/python_models/ensemble_io/config.pbtxt
new file mode 100644
index 0000000000..8819601c53
--- /dev/null
+++ b/qa/python_models/ensemble_io/config.pbtxt
@@ -0,0 +1,124 @@
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "ensemble_io"
+platform: "ensemble"
+
+max_batch_size: 0
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+input [
+  {
+    name: "GPU_OUTPUT"
+    data_type: TYPE_BOOL
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "NEXT_GPU_OUTPUT"
+    data_type: TYPE_BOOL
+    dims: [ -1 ]
+  }
+]
+
+ensemble_scheduling {
+  step [
+    {
+      model_name: "dlpack_io_identity_1"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT0"
+      }
+      input_map {
+        key: "GPU_OUTPUT"
+        value: "GPU_OUTPUT"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "output_0"
+      }
+      output_map {
+        key: "NEXT_GPU_OUTPUT"
+        value: "next_gpu_output"
+      }
+    },
+    {
+      model_name: "dlpack_io_identity_2"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "output_0"
+      }
+      input_map {
+        key: "GPU_OUTPUT"
+        value: "next_gpu_output"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "output_1"
+      }
+      output_map {
+        key: "NEXT_GPU_OUTPUT"
+        value: "next_gpu_output_1"
+      }
+    },
+    {
+      model_name: "dlpack_io_identity_3"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "output_1"
+      }
+      input_map {
+        key: "GPU_OUTPUT"
+        value: "next_gpu_output_1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "OUTPUT0"
+      }
+      output_map {
+        key: "NEXT_GPU_OUTPUT"
+        value: "NEXT_GPU_OUTPUT"
+      }
+    }
+  ]
+}
diff --git a/qa/python_models/error_code/config.pbtxt b/qa/python_models/error_code/config.pbtxt
new file mode 100644
index 0000000000..90fd5eb1e3
--- /dev/null
+++ b/qa/python_models/error_code/config.pbtxt
@@ -0,0 +1,47 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "error_code"
+backend: "python"
+max_batch_size: 4
+
+input [
+  {
+    name: "ERROR_CODE"
+    data_type: TYPE_STRING
+    dims: [ 1 ]
+  }
+]
+
+output [
+  {
+    name: "DUMMY_OUT"
+    data_type: TYPE_STRING
+    dims: [ 1 ]
+  }
+]
+
+instance_group [{ kind: KIND_CPU }]
diff --git a/qa/python_models/error_code/model.py b/qa/python_models/error_code/model.py
new file mode 100644
index 0000000000..078a4afb73
--- /dev/null
+++ b/qa/python_models/error_code/model.py
@@ -0,0 +1,59 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    def execute(self, requests):
+        error_code_map = {
+            "UNKNOWN": pb_utils.TritonError.UNKNOWN,
+            "INTERNAL": pb_utils.TritonError.INTERNAL,
+            "NOT_FOUND": pb_utils.TritonError.NOT_FOUND,
+            "INVALID_ARG": pb_utils.TritonError.INVALID_ARG,
+            "UNAVAILABLE": pb_utils.TritonError.UNAVAILABLE,
+            "UNSUPPORTED": pb_utils.TritonError.UNSUPPORTED,
+            "ALREADY_EXISTS": pb_utils.TritonError.ALREADY_EXISTS,
+            "CANCELLED": pb_utils.TritonError.CANCELLED,
+        }
+
+        responses = []
+
+        for request in requests:
+            err_code_tensor = pb_utils.get_input_tensor_by_name(
+                request, "ERROR_CODE"
+            ).as_numpy()
+            err_code_str = str(err_code_tensor[0][0], encoding="utf-8")
+            if err_code_str in error_code_map:
+                error = pb_utils.TritonError(
+                    message=("error code: " + err_code_str),
+                    code=error_code_map[err_code_str],
+                )
+            else:
+                error = pb_utils.TritonError("unrecognized error code: " + err_code_str)
+            responses.append(pb_utils.InferenceResponse(error=error))
+
+        return responses
diff --git a/qa/python_models/execute_cancel/config.pbtxt b/qa/python_models/execute_cancel/config.pbtxt
new file mode 100644
index 0000000000..df509863ad
--- /dev/null
+++ b/qa/python_models/execute_cancel/config.pbtxt
@@ -0,0 +1,47 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "execute_cancel"
+backend: "python"
+max_batch_size: 1
+
+input [
+  {
+    name: "EXECUTE_DELAY"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+
+output [
+  {
+    name: "DUMMY_OUT"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+
+instance_group [{ kind: KIND_CPU }]
diff --git a/qa/python_models/execute_cancel/model.py b/qa/python_models/execute_cancel/model.py
new file mode 100644
index 0000000000..ec7b96ec1a
--- /dev/null
+++ b/qa/python_models/execute_cancel/model.py
@@ -0,0 +1,108 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+import threading
+import time
+
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    def initialize(self, args):
+        self._logger = pb_utils.Logger
+        self._model_config = json.loads(args["model_config"])
+        self._using_decoupled = pb_utils.using_decoupled_model_transaction_policy(
+            self._model_config
+        )
+
+    def execute(self, requests):
+        processed_requests = []
+        for request in requests:
+            delay_tensor = pb_utils.get_input_tensor_by_name(
+                request, "EXECUTE_DELAY"
+            ).as_numpy()
+            delay = delay_tensor[0][0]  # seconds
+            if self._using_decoupled:
+                processed_requests.append(
+                    {"response_sender": request.get_response_sender(), "delay": delay}
+                )
+            else:
+                processed_requests.append({"request": request, "delay": delay})
+        if self._using_decoupled:
+            return self._execute_decoupled(processed_requests)
+        return self._execute_processed_requests(processed_requests)
+
+    def _execute_processed_requests(self, processed_requests):
+        responses = []
+        for processed_request in processed_requests:
+            error = pb_utils.TritonError(message="not cancelled")
+            object_to_check_cancelled = None
+            if "response_sender" in processed_request:
+                object_to_check_cancelled = processed_request["response_sender"]
+            elif "request" in processed_request:
+                object_to_check_cancelled = processed_request["request"]
+            delay = processed_request["delay"]  # seconds
+            time_elapsed = 0.0  # seconds
+            while time_elapsed < delay:
+                time.sleep(1)
+                time_elapsed += 1.0
+                if object_to_check_cancelled.is_cancelled():
+                    self._logger.log_info(
+                        "[execute_cancel] Request cancelled at "
+                        + str(time_elapsed)
+                        + " s"
+                    )
+                    error = pb_utils.TritonError(
+                        message="cancelled", code=pb_utils.TritonError.CANCELLED
+                    )
+                    break
+                self._logger.log_info(
+                    "[execute_cancel] Request not cancelled at "
+                    + str(time_elapsed)
+                    + " s"
+                )
+            responses.append(pb_utils.InferenceResponse(error=error))
+        return responses
+
+    def _execute_decoupled(self, processed_requests):
+        def response_thread(execute_processed_requests, processed_requests):
+            time.sleep(2)  # execute after requests are released
+            responses = execute_processed_requests(processed_requests)
+            for i in range(len(responses)):  # len(responses) == len(processed_requests)
+                response_sender = processed_requests[i]["response_sender"]
+                response_sender.send(responses[i])
+                response_sender.send(
+                    flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
+                )
+
+        thread = threading.Thread(
+            target=response_thread,
+            args=(self._execute_processed_requests, processed_requests),
+        )
+        thread.daemon = True
+        thread.start()
+        return None
diff --git a/qa/python_models/execute_error/config.pbtxt b/qa/python_models/execute_error/config.pbtxt
new file mode 100644
index 0000000000..66a8b0a797
--- /dev/null
+++ b/qa/python_models/execute_error/config.pbtxt
@@ -0,0 +1,52 @@
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "execute_error"
+backend: "python"
+max_batch_size: 64
+
+input [
+  {
+    name: "IN"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+output [
+  {
+    name: "OUT"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+instance_group [
+  {
+    count: 1
+    kind : KIND_CPU
+  }
+]
diff --git a/qa/python_models/execute_error/model.py b/qa/python_models/execute_error/model.py
new file mode 100644
index 0000000000..9ecdbff816
--- /dev/null
+++ b/qa/python_models/execute_error/model.py
@@ -0,0 +1,50 @@
+# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    def execute(self, requests):
+        """This function is called on inference request."""
+        responses = []
+
+        # Generate the error for the first and third request
+        i = 0
+        for request in requests:
+            input_tensor = pb_utils.get_input_tensor_by_name(request, "IN")
+            out_tensor = pb_utils.Tensor("OUT", input_tensor.as_numpy())
+            if i == 0:
+                error = pb_utils.TritonError("An error occurred during execution")
+                responses.append(pb_utils.InferenceResponse([out_tensor], error))
+            elif i == 1:
+                responses.append(pb_utils.InferenceResponse([out_tensor]))
+            elif i == 2:
+                error = pb_utils.TritonError("An error occurred during execution")
+                responses.append(pb_utils.InferenceResponse(error=error))
+            i += 1
+
+        return responses
diff --git a/qa/python_models/execute_return_error/config.pbtxt b/qa/python_models/execute_return_error/config.pbtxt
new file mode 100644
index 0000000000..0a31be6e0d
--- /dev/null
+++ b/qa/python_models/execute_return_error/config.pbtxt
@@ -0,0 +1,52 @@
+# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "execute_return_error"
+backend: "python"
+max_batch_size: 64
+
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+instance_group [
+  {
+    count: 1
+    kind : KIND_CPU
+  }
+]
diff --git a/qa/python_models/execute_return_error/model.py b/qa/python_models/execute_return_error/model.py
new file mode 100644
index 0000000000..e304441f04
--- /dev/null
+++ b/qa/python_models/execute_return_error/model.py
@@ -0,0 +1,43 @@
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+class TritonPythonModel:
+    def initialize(self, args):
+        self._i = -1
+
+    def execute(self, requests):
+        """
+        Tests returning invalid responses in execute request.
+        """
+
+        self._i += 1
+        i = self._i
+
+        if i % 2 == 0:
+            return None
+        else:
+            return [None] * len(requests)
diff --git a/qa/python_models/fan_add_sub/config.pbtxt b/qa/python_models/fan_add_sub/config.pbtxt
new file mode 100644
index 0000000000..1b2a437488
--- /dev/null
+++ b/qa/python_models/fan_add_sub/config.pbtxt
@@ -0,0 +1,139 @@
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "fan_add_sub"
+platform: "ensemble"
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+
+  }
+]
+input [
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+
+
+  }
+]
+output [
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+
+
+  }
+]
+ensemble_scheduling {
+  step [
+    {
+      model_name: "nop_TYPE_FP32_-1"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "INPUT1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "same_input0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "same_input1"
+      }
+    },
+    {
+      model_name: "ENSEMBLE_MODEL_NAME"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "same_input0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "same_input1"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "same_output0"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "same_output1"
+      }
+    },
+    {
+      model_name: "nop_TYPE_FP32_-1"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "same_output0"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "same_output0"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "OUTPUT0"
+      }
+    },
+    {
+      model_name: "nop_TYPE_FP32_-1"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "same_output1"
+      }
+      input_map {
+        key: "INPUT1"
+        value: "same_output1"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "OUTPUT1"
+      }
+    }
+  ]
+}
diff --git a/qa/python_models/fini_error/config.pbtxt b/qa/python_models/fini_error/config.pbtxt
new file mode 100644
index 0000000000..182f9b8adf
--- /dev/null
+++ b/qa/python_models/fini_error/config.pbtxt
@@ -0,0 +1,52 @@
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "fini_error"
+backend: "python"
+max_batch_size: 64
+
+input [
+  {
+    name: "IN"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+output [
+  {
+    name: "OUT"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+instance_group [
+  {
+    count: 1
+    kind : KIND_CPU
+  }
+]
diff --git a/qa/python_models/fini_error/model.py b/qa/python_models/fini_error/model.py
new file mode 100644
index 0000000000..7a9f409aee
--- /dev/null
+++ b/qa/python_models/fini_error/model.py
@@ -0,0 +1,44 @@
+# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    def execute(self, requests):
+        """
+        The body of this model doesn't matter. The main purpose of this model is
+        to test correct handling of Python errors in the `finalize` function.
+        """
+        responses = []
+        for request in requests:
+            input_tensor = pb_utils.get_input_tensor_by_name(request, "IN")
+            out_tensor = pb_utils.Tensor("OUT", input_tensor.as_numpy())
+            responses.append(pb_utils.InferenceResponse([out_tensor], error))
+        return responses
+
+    def finalize(self):
+        undefined_variable
diff --git a/qa/python_models/ground_truth/config.pbtxt b/qa/python_models/ground_truth/config.pbtxt
new file mode 100644
index 0000000000..2b7a7d19a2
--- /dev/null
+++ b/qa/python_models/ground_truth/config.pbtxt
@@ -0,0 +1,52 @@
+# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "ground_truth"
+backend: "python"
+max_batch_size: 64
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+
+instance_group [
+  {
+    count: 1
+    kind : KIND_CPU
+  }
+]
diff --git a/qa/python_models/ground_truth/model.py b/qa/python_models/ground_truth/model.py
new file mode 100644
index 0000000000..24a286e300
--- /dev/null
+++ b/qa/python_models/ground_truth/model.py
@@ -0,0 +1,51 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import time
+
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    def execute(self, requests):
+        """
+        Mock Model that uses the input data to determine how long to wait
+        before returning identity data
+        """
+        assert len(requests) == 1
+        delay = 0
+        request = requests[0]
+        responses = []
+
+        delay_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+        delay_as_numpy = delay_tensor.as_numpy()
+        delay = float(delay_as_numpy[0][0])
+
+        out_tensor = pb_utils.Tensor("OUTPUT0", delay_as_numpy)
+        responses.append(pb_utils.InferenceResponse([out_tensor]))
+
+        time.sleep(delay)
+        return responses
diff --git a/qa/python_models/identity_fp32/config.pbtxt b/qa/python_models/identity_fp32/config.pbtxt
new file mode 100644
index 0000000000..6cf6312acb
--- /dev/null
+++ b/qa/python_models/identity_fp32/config.pbtxt
@@ -0,0 +1,52 @@
+# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "identity_fp32"
+backend: "python"
+max_batch_size: 64
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+instance_group [
+  {
+    count: 1
+    kind : KIND_CPU
+  }
+]
diff --git a/qa/python_models/identity_fp32/model.py b/qa/python_models/identity_fp32/model.py
new file mode 100644
index 0000000000..2161a1e732
--- /dev/null
+++ b/qa/python_models/identity_fp32/model.py
@@ -0,0 +1,40 @@
+# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    def execute(self, requests):
+        """
+        Identity model in Python backend.
+        """
+        responses = []
+        for request in requests:
+            input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+            out_tensor = pb_utils.Tensor("OUTPUT0", input_tensor.as_numpy())
+            responses.append(pb_utils.InferenceResponse([out_tensor]))
+        return responses
diff --git a/qa/python_models/identity_fp32_logging/config.pbtxt b/qa/python_models/identity_fp32_logging/config.pbtxt
new file mode 100644
index 0000000000..aaa4a2ee43
--- /dev/null
+++ b/qa/python_models/identity_fp32_logging/config.pbtxt
@@ -0,0 +1,53 @@
+# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "identity_fp32_logging"
+backend: "python"
+max_batch_size: 64
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+instance_group [
+  {
+    count: 1
+    kind : KIND_CPU
+  }
+]
+
diff --git a/qa/python_models/identity_fp32_logging/model.py b/qa/python_models/identity_fp32_logging/model.py
new file mode 100644
index 0000000000..91ace61fd5
--- /dev/null
+++ b/qa/python_models/identity_fp32_logging/model.py
@@ -0,0 +1,72 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    def initialize(self, args):
+        logger = pb_utils.Logger
+        logger.log("Initialize-Specific Msg!", logger.INFO)
+        logger.log_info("Initialize-Info Msg!")
+        logger.log_warn("Initialize-Warning Msg!")
+        logger.log_error("Initialize-Error Msg!")
+        logger.log_verbose("Initialize-Verbose Msg!")
+
+    def execute(self, requests):
+        """
+        Identity model in Python backend.
+        """
+        # Log as early as possible
+        logger = pb_utils.Logger
+        logger.log("Execute-Specific Msg!", logger.INFO)
+        logger.log_info("Execute-Info Msg!")
+        logger.log_warn("Execute-Warning Msg!")
+        logger.log_error("Execute-Error Msg!")
+        logger.log_verbose("Execute-Verbose Msg!")
+
+        responses = []
+        for request in requests:
+            input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+            out_tensor = pb_utils.Tensor("OUTPUT0", input_tensor.as_numpy())
+            responses.append(pb_utils.InferenceResponse([out_tensor]))
+
+        # Log as late as possible
+        logger.log("Execute-Specific Msg!", logger.INFO)
+        logger.log_info("Execute-Info Msg!")
+        logger.log_warn("Execute-Warning Msg!")
+        logger.log_error("Execute-Error Msg!")
+        logger.log_verbose("Execute-Verbose Msg!")
+
+        return responses
+
+    def finalize(self):
+        logger = pb_utils.Logger
+        logger.log("Finalize-Specific Msg!", logger.INFO)
+        logger.log_info("Finalize-Info Msg!")
+        logger.log_warn("Finalize-Warning Msg!")
+        logger.log_error("Finalize-Error Msg!")
+        logger.log_verbose("Finalize-Verbose Msg!")
diff --git a/qa/python_models/identity_fp32_timeout/config.pbtxt b/qa/python_models/identity_fp32_timeout/config.pbtxt
new file mode 100644
index 0000000000..c14fd8e0a3
--- /dev/null
+++ b/qa/python_models/identity_fp32_timeout/config.pbtxt
@@ -0,0 +1,60 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "identity_fp32_timeout"
+backend: "python"
+max_batch_size: 64
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+instance_group [
+  {
+    count: 1
+    kind : KIND_CPU
+  }
+]
+
+dynamic_batching {
+  default_queue_policy {
+    timeout_action: REJECT
+    allow_timeout_override: true
+    default_timeout_microseconds: 1000000
+  }
+}
diff --git a/qa/python_models/identity_fp32_timeout/model.py b/qa/python_models/identity_fp32_timeout/model.py
new file mode 100644
index 0000000000..356948e8de
--- /dev/null
+++ b/qa/python_models/identity_fp32_timeout/model.py
@@ -0,0 +1,45 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import time
+
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    def execute(self, requests):
+        """
+        Identity model in Python backend.
+        """
+        logger = pb_utils.Logger
+        responses = []
+        for request in requests:
+            input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+            out_tensor = pb_utils.Tensor("OUTPUT0", input_tensor.as_numpy())
+            logger.log_info(f"Request timeout: {request.timeout()}")
+            time.sleep(5)
+            responses.append(pb_utils.InferenceResponse([out_tensor]))
+        return responses
diff --git a/qa/python_models/init_args/config.pbtxt b/qa/python_models/init_args/config.pbtxt
new file mode 100644
index 0000000000..4b0ba9e137
--- /dev/null
+++ b/qa/python_models/init_args/config.pbtxt
@@ -0,0 +1,52 @@
+# Copyright (c) 2020-2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "init_args"
+backend: "python"
+max_batch_size: 64
+
+input [
+  {
+    name: "IN"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+output [
+  {
+    name: "OUT"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+instance_group [
+  {
+    count: 1
+    kind : KIND_CPU
+  }
+]
diff --git a/qa/python_models/init_args/model.py b/qa/python_models/init_args/model.py
new file mode 100644
index 0000000000..0aa9bb2c7a
--- /dev/null
+++ b/qa/python_models/init_args/model.py
@@ -0,0 +1,94 @@
+# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import sys
+
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+
+def check_init_args(args):
+    expected_args = {
+        "model_name": "init_args",
+        "model_instance_name": "init_args_0_0",
+        "model_instance_kind": "CPU",
+        "model_instance_device_id": "0",
+        "model_version": "1",
+    }
+    is_win = sys.platform == "win32"
+    triton_dir = os.getenv(
+        "TRITON_DIR", "c:\\tritonserver" if is_win else "/opt/tritonserver"
+    )
+    repo_path = triton_dir + "/qa/L0_backend_python/models/init_args"
+    expected_args["model_repository"] = (
+        repo_path.replace("/", "\\") if is_win else repo_path
+    )
+
+    for arg in expected_args:
+        if args[arg] != expected_args[arg]:
+            raise pb_utils.TritonModelException(
+                arg
+                + ' does not contain correct value. Expected "'
+                + expected_args[arg]
+                + ", got "
+                + args[arg]
+            )
+
+
+class TritonPythonModel:
+    def initialize(self, args):
+        self.args = args
+        check_init_args(self.args)
+
+    def execute(self, requests):
+        """
+        This function counts the number of keys in the
+        "initialize" args argument to make sure that they are
+        correct.
+        """
+        keys = [
+            "model_config",
+            "model_instance_kind",
+            "model_instance_name",
+            "model_instance_device_id",
+            "model_repository",
+            "model_version",
+            "model_name",
+        ]
+
+        correct_keys = 0
+        for key in keys:
+            if key in list(self.args):
+                correct_keys += 1
+
+        responses = []
+        for _ in requests:
+            out_args = pb_utils.Tensor(
+                "OUT", np.array([correct_keys], dtype=np.float32)
+            )
+            responses.append(pb_utils.InferenceResponse([out_args]))
+        return responses
diff --git a/qa/python_models/init_error/config.pbtxt b/qa/python_models/init_error/config.pbtxt
new file mode 100644
index 0000000000..1e4457054f
--- /dev/null
+++ b/qa/python_models/init_error/config.pbtxt
@@ -0,0 +1,52 @@
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "init_error"
+backend: "python"
+max_batch_size: 64
+
+input [
+  {
+    name: "IN"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+output [
+  {
+    name: "OUT"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+instance_group [
+  {
+    count: 1
+    kind : KIND_CPU
+  }
+]
diff --git a/qa/python_models/init_error/model.py b/qa/python_models/init_error/model.py
new file mode 100644
index 0000000000..654dc8ef2c
--- /dev/null
+++ b/qa/python_models/init_error/model.py
@@ -0,0 +1,46 @@
+# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    def initialize(self, args):
+        self.model_config = args["model_config"]
+        lorem_ipsum
+
+    def execute(self, requests):
+        """
+        The main purpose of this function is to check whether undefined
+        variables are correctly handled in `initialize` function. The body of
+        this function is never called or used.
+        """
+        responses = []
+        for request in requests:
+            input_tensor = pb_utils.get_input_tensor_by_name(request, "IN")
+            out_tensor = pb_utils.Tensor("OUT", input_tensor.as_numpy())
+            responses.append(pb_utils.InferenceResponse([out_tensor], error))
+        return responses
diff --git a/qa/python_models/init_exit/config.pbtxt b/qa/python_models/init_exit/config.pbtxt
new file mode 100644
index 0000000000..a18aff189d
--- /dev/null
+++ b/qa/python_models/init_exit/config.pbtxt
@@ -0,0 +1,46 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "init_exit"
+backend: "python"
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+
+instance_group [{ kind: KIND_CPU }]
diff --git a/qa/python_models/init_exit/model.py b/qa/python_models/init_exit/model.py
new file mode 100644
index 0000000000..6e73eeaa59
--- /dev/null
+++ b/qa/python_models/init_exit/model.py
@@ -0,0 +1,49 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import signal
+import sys
+import time
+
+
+class TritonPythonModel:
+    def initialize(self, args):
+        time.sleep(3)
+        # Simulate the case that the model goes out of memory and gets killed
+        # by the OOM killer
+        # NOTE: Windows runners use python 3.8 which do not have access to SIGKILL.
+        # We should remove this condition check when we upgrade the version of python.
+        # Online forums suggest 'CTRL_C_EVENT' should be the equivalent event, however,
+        # using this signal terminates the entire test, not just the server. SIGINT
+        # seems to work in the meantime.
+        if sys.platform == "win32":
+            os.kill(os.getpid(), signal.SIGINT)
+        else:
+            os.kill(os.getpid(), signal.SIGKILL)
+
+    def execute(self, requests):
+        pass
diff --git a/qa/python_models/iterative_sequence/config.pbtxt b/qa/python_models/iterative_sequence/config.pbtxt
new file mode 100644
index 0000000000..faa1735718
--- /dev/null
+++ b/qa/python_models/iterative_sequence/config.pbtxt
@@ -0,0 +1,51 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "iterative_sequence"
+backend: "python"
+max_batch_size: 0
+model_transaction_policy {
+  decoupled: True
+}
+input [
+  {
+    name: "IN"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+sequence_batching {
+  iterative_sequence : true
+}
+
+instance_group [{ kind: KIND_CPU }]
diff --git a/qa/python_models/iterative_sequence/model.py b/qa/python_models/iterative_sequence/model.py
new file mode 100644
index 0000000000..c45f82a607
--- /dev/null
+++ b/qa/python_models/iterative_sequence/model.py
@@ -0,0 +1,131 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    """
+    This model takes 1 input tensor, an INT32 [ 1 ] input named "IN", and
+    produces an output tensor "OUT" with the same shape as the input tensor.
+    The input value indicates the total number of responses to be generated and
+    the output value indicates the number of remaining responses. For example,
+    if the request input has value 2, the model will:
+        - Send a response with value 1.
+        - Release request with RESCHEDULE flag.
+        - When execute on the same request, send the last response with value 0.
+        - Release request with ALL flag.
+    """
+
+    def initialize(self, args):
+        self.model_config = model_config = json.loads(args["model_config"])
+
+        using_decoupled = pb_utils.using_decoupled_model_transaction_policy(
+            model_config
+        )
+        if not using_decoupled:
+            raise pb_utils.TritonModelException(
+                """the model `{}` can generate any number of responses per request,
+                enable decoupled transaction policy in model configuration to
+                serve this model""".format(
+                    args["model_name"]
+                )
+            )
+
+        # Get IN configuration
+        in_config = pb_utils.get_input_config_by_name(model_config, "IN")
+
+        # Validate the shape and data type of IN
+        in_shape = in_config["dims"]
+        if (len(in_shape) != 1) or (in_shape[0] != 1):
+            raise pb_utils.TritonModelException(
+                """the model `{}` requires the shape of 'IN' to be
+                [1], got {}""".format(
+                    args["model_name"], in_shape
+                )
+            )
+        if in_config["data_type"] != "TYPE_INT32":
+            raise pb_utils.TritonModelException(
+                """the model `{}` requires the data_type of 'IN' to be
+                'TYPE_INT32', got {}""".format(
+                    args["model_name"], in_config["data_type"]
+                )
+            )
+
+        # Get OUT configuration
+        out_config = pb_utils.get_output_config_by_name(model_config, "OUT")
+
+        # Validate the shape and data type of OUT
+        out_shape = out_config["dims"]
+        if (len(out_shape) != 1) or (out_shape[0] != 1):
+            raise pb_utils.TritonModelException(
+                """the model `{}` requires the shape of 'OUT' to be
+                [1], got {}""".format(
+                    args["model_name"], out_shape
+                )
+            )
+        if out_config["data_type"] != "TYPE_INT32":
+            raise pb_utils.TritonModelException(
+                """the model `{}` requires the data_type of 'OUT' to be
+                'TYPE_INT32', got {}""".format(
+                    args["model_name"], out_config["data_type"]
+                )
+            )
+
+        self.remaining_response = 0
+        self.reset_flag = True
+
+    def execute(self, requests):
+        for request in requests:
+            in_input = pb_utils.get_input_tensor_by_name(request, "IN").as_numpy()
+
+            if self.reset_flag:
+                self.remaining_response = in_input[0]
+                self.reset_flag = False
+
+            response_sender = request.get_response_sender()
+
+            self.remaining_response -= 1
+
+            out_output = pb_utils.Tensor(
+                "OUT", np.array([self.remaining_response], np.int32)
+            )
+            response = pb_utils.InferenceResponse(output_tensors=[out_output])
+
+            if self.remaining_response <= 0:
+                response_sender.send(
+                    response, flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
+                )
+            else:
+                request.set_release_flags(
+                    pb_utils.TRITONSERVER_REQUEST_RELEASE_RESCHEDULE
+                )
+                response_sender.send(response)
+
+        return None
diff --git a/qa/python_models/model_env/config.pbtxt b/qa/python_models/model_env/config.pbtxt
new file mode 100644
index 0000000000..ca48cc00a2
--- /dev/null
+++ b/qa/python_models/model_env/config.pbtxt
@@ -0,0 +1,52 @@
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "model_env"
+backend: "python"
+max_batch_size: 64
+
+input [
+  {
+    name: "IN"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+output [
+  {
+    name: "OUT"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+instance_group [
+  {
+    count: 1
+    kind : KIND_CPU
+  }
+]
diff --git a/qa/python_models/model_env/model.py b/qa/python_models/model_env/model.py
new file mode 100644
index 0000000000..8cc9db8d81
--- /dev/null
+++ b/qa/python_models/model_env/model.py
@@ -0,0 +1,42 @@
+# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    def initialize(self, args):
+        # Make sure that environment variables are correctly propagated
+        # to the Python models
+        if "MY_ENV" not in os.environ or os.environ["MY_ENV"] != "MY_ENV":
+            raise pb_utils.TritonModelException(
+                "MY_ENV doesn't exists or contains incorrect value"
+            )
+
+    def execute(self, requests):
+        pass
diff --git a/qa/python_models/model_init_del/config.pbtxt b/qa/python_models/model_init_del/config.pbtxt
new file mode 100644
index 0000000000..be66468a0a
--- /dev/null
+++ b/qa/python_models/model_init_del/config.pbtxt
@@ -0,0 +1,52 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "model_init_del"
+backend: "python"
+max_batch_size: 0
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+instance_group [
+  {
+    count: 1
+    kind: KIND_CPU
+  }
+]  # end instance_group
diff --git a/qa/python_models/model_init_del/model.py b/qa/python_models/model_init_del/model.py
new file mode 100644
index 0000000000..578279f8ef
--- /dev/null
+++ b/qa/python_models/model_init_del/model.py
@@ -0,0 +1,57 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import sys
+import time
+
+import triton_python_backend_utils as pb_utils
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
+from util import get_delay, inc_count
+
+
+class TritonPythonModel:
+    def initialize(self, args):
+        inc_count("initialize")
+        self._sleep("initialize")
+
+    def execute(self, requests):
+        responses = []
+        for request in requests:
+            input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+            out_tensor = pb_utils.Tensor("OUTPUT0", input_tensor.as_numpy())
+            responses.append(pb_utils.InferenceResponse([out_tensor]))
+        self._sleep("infer")
+        return responses
+
+    def finalize(self):
+        inc_count("finalize")
+
+    def _sleep(self, kind):
+        delay = get_delay(kind)
+        if delay > 0:
+            time.sleep(delay)
diff --git a/qa/python_models/model_init_del/util.py b/qa/python_models/model_init_del/util.py
new file mode 100755
index 0000000000..a36f13eea9
--- /dev/null
+++ b/qa/python_models/model_init_del/util.py
@@ -0,0 +1,189 @@
+#!/usr/bin/env python3
+
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import fcntl
+import os
+
+_model_name = "model_init_del"
+
+#
+# Helper functions for reading/writing state to disk
+#
+
+
+def _get_number(filename):
+    full_path = os.path.join(os.environ["MODEL_LOG_DIR"], filename)
+    try:
+        with open(full_path, mode="r", encoding="utf-8", errors="strict") as f:
+            fcntl.lockf(f, fcntl.LOCK_SH)
+            txt = f.read()
+    except FileNotFoundError:
+        txt = "0"
+    return int(txt)
+
+
+def _store_number(filename, number):
+    full_path = os.path.join(os.environ["MODEL_LOG_DIR"], filename)
+    txt = str(number)
+    with open(full_path, mode="w", encoding="utf-8", errors="strict") as f:
+        fcntl.lockf(f, fcntl.LOCK_EX)
+        f.write(txt)
+
+
+def _inc_number(filename):
+    full_path = os.path.join(os.environ["MODEL_LOG_DIR"], filename)
+    try:
+        with open(full_path, mode="r+", encoding="utf-8", errors="strict") as f:
+            fcntl.lockf(f, fcntl.LOCK_EX)
+            txt = f.read()
+            number = int(txt) + 1
+            txt = str(number)
+            f.truncate(0)
+            f.seek(0)
+            f.write(txt)
+    except FileNotFoundError:
+        number = 1
+        _store_number(filename, number)
+    return number
+
+
+#
+# Functions for communicating initialize and finalize count between the model
+# and test
+#
+
+
+def _get_count_filename(kind):
+    if kind != "initialize" and kind != "finalize":
+        raise KeyError("Invalid count kind: " + str(kind))
+    filename = _model_name + "_" + kind + "_count.txt"
+    return filename
+
+
+def get_count(kind):
+    return _get_number(_get_count_filename(kind))
+
+
+def inc_count(kind):
+    return _inc_number(_get_count_filename(kind))
+
+
+def reset_count(kind):
+    count = 0
+    _store_number(_get_count_filename(kind), count)
+    return count
+
+
+#
+# Functions for communicating varies of delay (in seconds) to the model
+#
+
+
+def _get_delay_filename(kind):
+    if kind != "initialize" and kind != "infer":
+        raise KeyError("Invalid delay kind: " + str(kind))
+    filename = _model_name + "_" + kind + "_delay.txt"
+    return filename
+
+
+def get_delay(kind):
+    return _get_number(_get_delay_filename(kind))
+
+
+def set_delay(kind, delay):
+    _store_number(_get_delay_filename(kind), delay)
+    return delay
+
+
+#
+# Functions for modifying the model
+#
+
+
+def update_instance_group(instance_group_str):
+    full_path = os.path.join(os.path.dirname(__file__), "config.pbtxt")
+    with open(full_path, mode="r+", encoding="utf-8", errors="strict") as f:
+        txt = f.read()
+        txt, post_match = txt.split("instance_group [")
+        txt += "instance_group [\n"
+        txt += instance_group_str
+        txt += "\n]  # end instance_group\n"
+        txt += post_match.split("\n]  # end instance_group\n")[1]
+        f.truncate(0)
+        f.seek(0)
+        f.write(txt)
+    return txt
+
+
+def update_sequence_batching(sequence_batching_str):
+    full_path = os.path.join(os.path.dirname(__file__), "config.pbtxt")
+    with open(full_path, mode="r+", encoding="utf-8", errors="strict") as f:
+        txt = f.read()
+        if "sequence_batching {" in txt:
+            txt, post_match = txt.split("sequence_batching {")
+            if sequence_batching_str != "":
+                txt += "sequence_batching {\n"
+                txt += sequence_batching_str
+                txt += "\n}  # end sequence_batching\n"
+            txt += post_match.split("\n}  # end sequence_batching\n")[1]
+        elif sequence_batching_str != "":
+            txt += "\nsequence_batching {\n"
+            txt += sequence_batching_str
+            txt += "\n}  # end sequence_batching\n"
+        f.truncate(0)
+        f.seek(0)
+        f.write(txt)
+    return txt
+
+
+def update_model_file():
+    full_path = os.path.join(os.path.dirname(__file__), "1", "model.py")
+    with open(full_path, mode="a", encoding="utf-8", errors="strict") as f:
+        f.write("\n# dummy model file update\n")
+
+
+def enable_batching():
+    full_path = os.path.join(os.path.dirname(__file__), "config.pbtxt")
+    with open(full_path, mode="r+", encoding="utf-8", errors="strict") as f:
+        txt = f.read()
+        txt = txt.replace("max_batch_size: 0", "max_batch_size: 2")
+        f.truncate(0)
+        f.seek(0)
+        f.write(txt)
+    return txt
+
+
+def disable_batching():
+    full_path = os.path.join(os.path.dirname(__file__), "config.pbtxt")
+    with open(full_path, mode="r+", encoding="utf-8", errors="strict") as f:
+        txt = f.read()
+        txt = txt.replace("max_batch_size: 2", "max_batch_size: 0")
+        f.truncate(0)
+        f.seek(0)
+        f.write(txt)
+    return txt
diff --git a/qa/python_models/multi_file/file1.py b/qa/python_models/multi_file/file1.py
new file mode 100755
index 0000000000..46b6d76934
--- /dev/null
+++ b/qa/python_models/multi_file/file1.py
@@ -0,0 +1,29 @@
+#!/usr/bin/env python3
+
+# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+FILE_NAME = "FILE1"
diff --git a/qa/python_models/multi_file/file2.py b/qa/python_models/multi_file/file2.py
new file mode 100755
index 0000000000..b7174da748
--- /dev/null
+++ b/qa/python_models/multi_file/file2.py
@@ -0,0 +1,29 @@
+#!/usr/bin/env python3
+
+# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+FILE_NAME = "FILE2"
diff --git a/qa/python_models/multi_file/model.py b/qa/python_models/multi_file/model.py
new file mode 100644
index 0000000000..b94d6f336f
--- /dev/null
+++ b/qa/python_models/multi_file/model.py
@@ -0,0 +1,39 @@
+# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import file1
+import triton_python_backend_utils as pb_utils
+
+from . import file2
+
+
+class TritonPythonModel:
+    def initialize(self, args):
+        if file1.FILE_NAME != "FILE1" or file2.FILE_NAME != "FILE2":
+            raise pb_utils.TritonModelException("Imports do not work")
+
+    def execute(self, requests):
+        pass
diff --git a/qa/python_models/non_contiguous/config.pbtxt b/qa/python_models/non_contiguous/config.pbtxt
new file mode 100644
index 0000000000..530c8cbf99
--- /dev/null
+++ b/qa/python_models/non_contiguous/config.pbtxt
@@ -0,0 +1,55 @@
+# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "non_contiguous"
+backend: "python"
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1, -1, -1, -1, -1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1, -1, -1, -1, -1 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ -1, -1, -1, -1, -1 ]
+  },
+  {
+    name: "OUTPUT2"
+    data_type: TYPE_FP32
+    dims: [ -1, -1, -1, -1, -1 ]
+  }
+]
+
+instance_group [{ kind: KIND_CPU }]
diff --git a/qa/python_models/non_contiguous/model.py b/qa/python_models/non_contiguous/model.py
new file mode 100644
index 0000000000..de7417303b
--- /dev/null
+++ b/qa/python_models/non_contiguous/model.py
@@ -0,0 +1,46 @@
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    def execute(self, requests):
+        responses = []
+        new_shape = [10, 2, 6, 5, 11]
+        shape_reorder = [1, 0, 4, 2, 3]
+        for request in requests:
+            input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+            input_numpy = input_tensor.as_numpy()
+            output0 = pb_utils.Tensor("OUTPUT0", input_numpy.reshape(new_shape))
+            # Transpose the tensor to create a non-contiguous tensor.
+            output1 = pb_utils.Tensor("OUTPUT1", input_numpy.T)
+            output2 = pb_utils.Tensor(
+                "OUTPUT2", np.transpose(input_numpy, shape_reorder)
+            )
+            responses.append(pb_utils.InferenceResponse([output0, output1, output2]))
+        return responses
diff --git a/qa/python_models/optional/config.pbtxt b/qa/python_models/optional/config.pbtxt
new file mode 100644
index 0000000000..c681ec807f
--- /dev/null
+++ b/qa/python_models/optional/config.pbtxt
@@ -0,0 +1,55 @@
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "optional"
+backend: "python"
+max_batch_size: 0
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+    optional: true
+  },
+  {
+    name: "INPUT1"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+    optional: true
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  },
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/python_models/optional/model.py b/qa/python_models/optional/model.py
new file mode 100644
index 0000000000..f0a790b43a
--- /dev/null
+++ b/qa/python_models/optional/model.py
@@ -0,0 +1,56 @@
+# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    def execute(self, requests):
+        """Model supporting optional inputs. If the input is not provided, an
+        input tensor of size 1 containing scalar 5 will be used."""
+        responses = []
+        for request in requests:
+            input0_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+            input1_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT1")
+
+            if input0_tensor is not None:
+                input0_numpy = input0_tensor.as_numpy()
+            else:
+                input0_numpy = np.array([5], dtype=np.int32)
+
+            if input1_tensor is not None:
+                input1_numpy = input1_tensor.as_numpy()
+            else:
+                input1_numpy = np.array([5], dtype=np.int32)
+
+            output0_tensor = pb_utils.Tensor("OUTPUT0", input0_numpy + input1_numpy)
+            output1_tensor = pb_utils.Tensor("OUTPUT1", input0_numpy - input1_numpy)
+            responses.append(
+                pb_utils.InferenceResponse([output0_tensor, output1_tensor])
+            )
+
+        return responses
diff --git a/qa/python_models/python_based_backends/add_sub_backend/model.py b/qa/python_models/python_based_backends/add_sub_backend/model.py
new file mode 100644
index 0000000000..7c9736b2d5
--- /dev/null
+++ b/qa/python_models/python_based_backends/add_sub_backend/model.py
@@ -0,0 +1,162 @@
+# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+import os
+
+import triton_python_backend_utils as pb_utils
+
+_ADD_SUB_ARGS_FILENAME = "model.json"
+
+
+class TritonPythonModel:
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        """This function is called only once when loading the model assuming
+        the server was not started with `--disable-auto-complete-config`.
+
+        Parameters
+        ----------
+        auto_complete_model_config : pb_utils.ModelConfig
+          An object containing the existing model configuration.
+
+        Returns
+        -------
+        pb_utils.ModelConfig
+          An object containing the auto-completed model configuration
+        """
+        inputs = [
+            {"name": "INPUT0", "data_type": "TYPE_FP32", "dims": [4]},
+            {"name": "INPUT1", "data_type": "TYPE_FP32", "dims": [4]},
+        ]
+        outputs = [{"name": "OUTPUT", "data_type": "TYPE_FP32", "dims": [4]}]
+
+        config = auto_complete_model_config.as_dict()
+        input_names = []
+        output_names = []
+
+        for input in config["input"]:
+            input_names.append(input["name"])
+
+        for output in config["output"]:
+            output_names.append(output["name"])
+
+        for input in inputs:
+            if input["name"] not in input_names:
+                auto_complete_model_config.add_input(input)
+
+        for output in outputs:
+            if output["name"] not in output_names:
+                auto_complete_model_config.add_output(output)
+
+        return auto_complete_model_config
+
+    def initialize(self, args):
+        """This function allows the model to initialize any state associated with this model.
+
+        Parameters
+        ----------
+        args : dict
+          Both keys and values are strings. The dictionary keys and values are:
+          * model_config: A JSON string containing the model configuration
+          * model_instance_kind: A string containing model instance kind
+          * model_instance_device_id: A string containing model instance device ID
+          * model_repository: Model repository path
+          * model_version: Model version
+          * model_name: Model name
+        """
+
+        self.model_config = model_config = json.loads(args["model_config"])
+
+        # Get OUTPUT configuration
+        output_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT")
+
+        engine_args_filepath = os.path.join(
+            pb_utils.get_model_dir(), _ADD_SUB_ARGS_FILENAME
+        )
+        assert os.path.isfile(
+            engine_args_filepath
+        ), f"'{_ADD_SUB_ARGS_FILENAME}' containing add sub model args must be provided in '{pb_utils.get_model_dir()}'"
+
+        with open(engine_args_filepath) as file:
+            self.add_sub_config = json.load(file)
+
+        assert (
+            "operation" in self.add_sub_config
+        ), f"Missing required key 'operation' in {_ADD_SUB_ARGS_FILENAME}"
+
+        extra_keys = set(self.add_sub_config.keys()) - {"operation"}
+        assert (
+            not extra_keys
+        ), f"Unsupported keys are provided in {_ADD_SUB_ARGS_FILENAME}: {', '.join(extra_keys)}"
+
+        assert self.add_sub_config["operation"] in [
+            "add",
+            "sub",
+        ], f"'operation' value must be 'add' or 'sub' in {_ADD_SUB_ARGS_FILENAME}"
+
+        # Convert Triton types to numpy types
+        self.output_dtype = pb_utils.triton_string_to_numpy(output_config["data_type"])
+
+    def execute(self, requests):
+        """This function is called when an inference request is made
+        for this model.
+
+        Parameters
+        ----------
+        requests : list
+          A list of pb_utils.InferenceRequest
+
+        Returns
+        -------
+        list
+          A list of pb_utils.InferenceResponse. The length of this list must
+          be the same as `requests`
+        """
+
+        responses = []
+
+        for request in requests:
+            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")
+
+            if self.add_sub_config["operation"] == "add":
+                out = in_0.as_numpy() + in_1.as_numpy()
+            else:
+                out = in_0.as_numpy() - in_1.as_numpy()
+
+            # Create output tensors.
+            out_tensor = pb_utils.Tensor("OUTPUT", out.astype(self.output_dtype))
+
+            # Create InferenceResponse.
+            inference_response = pb_utils.InferenceResponse(output_tensors=[out_tensor])
+            responses.append(inference_response)
+
+        return responses
+
+    def finalize(self):
+        """`finalize` is called only once when the model is being unloaded."""
+        print("Cleaning up...")
diff --git a/qa/python_models/python_version/config.pbtxt b/qa/python_models/python_version/config.pbtxt
new file mode 100644
index 0000000000..af520e0771
--- /dev/null
+++ b/qa/python_models/python_version/config.pbtxt
@@ -0,0 +1,29 @@
+# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "python_version"
+
+instance_group [{ kind: KIND_CPU }]
diff --git a/qa/python_models/python_version/model.py b/qa/python_models/python_version/model.py
new file mode 100644
index 0000000000..5d77906fa9
--- /dev/null
+++ b/qa/python_models/python_version/model.py
@@ -0,0 +1,69 @@
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import locale
+import os
+import sys
+
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        input = {"name": "INPUT", "data_type": "TYPE_FP32", "dims": [1]}
+        output = {"name": "OUTPUT", "data_type": "TYPE_FP32", "dims": [1]}
+
+        auto_complete_model_config.set_max_batch_size(0)
+        auto_complete_model_config.add_input(input)
+        auto_complete_model_config.add_output(output)
+
+        return auto_complete_model_config
+
+    def initialize(self, args):
+        import tensorflow
+
+        self.model_config = args["model_config"]
+        # This is to make sure that /bin/bash is not picking up
+        # the wrong shared libraries after installing Tensorflow.
+        # Tensorflow uses a shared library which is common with
+        # bash.
+        os.system("/bin/bash --help")
+        print(
+            f"Python version is {sys.version_info.major}.{sys.version_info.minor}, NumPy version is {np.version.version}, and Tensorflow version is {tensorflow.__version__}",
+            flush=True,
+        )
+        print(f"Locale is {locale.getlocale()}", flush=True)
+
+    def execute(self, requests):
+        """This function is called on inference request."""
+        responses = []
+        for request in requests:
+            input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+            out_tensor = pb_utils.Tensor("OUTPUT0", input_tensor.as_numpy())
+            responses.append(pb_utils.InferenceResponse([out_tensor]))
+        return responses
diff --git a/qa/python_models/pytorch_fp32_fp32/config.pbtxt b/qa/python_models/pytorch_fp32_fp32/config.pbtxt
new file mode 100644
index 0000000000..75e625bf80
--- /dev/null
+++ b/qa/python_models/pytorch_fp32_fp32/config.pbtxt
@@ -0,0 +1,51 @@
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "pytorch_model"
+backend: "python"
+
+input [
+  {
+    name: "IN"
+    data_type: TYPE_FP32
+    dims: [ 1, 1, 28, 28 ]
+  }
+]
+
+output [
+  {
+    name: "OUT"
+    data_type: TYPE_FP32
+    dims: [ 1, 10 ]
+  }
+]
+
+instance_group [
+  {
+    count: 1
+    kind : KIND_CPU
+  }
+]
diff --git a/qa/python_models/pytorch_fp32_fp32/model.py b/qa/python_models/pytorch_fp32_fp32/model.py
new file mode 100644
index 0000000000..98269213b2
--- /dev/null
+++ b/qa/python_models/pytorch_fp32_fp32/model.py
@@ -0,0 +1,77 @@
+# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import triton_python_backend_utils as pb_utils
+
+
+class Net(nn.Module):
+    def __init__(self):
+        super(Net, self).__init__()
+        self.conv1 = nn.Conv2d(1, 32, 3, 1)
+        self.conv2 = nn.Conv2d(32, 64, 3, 1)
+        self.dropout1 = nn.Dropout2d(0.25)
+        self.dropout2 = nn.Dropout2d(0.5)
+        self.fc1 = nn.Linear(9216, 128)
+        self.fc2 = nn.Linear(128, 10)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = F.relu(x)
+        x = self.conv2(x)
+        x = F.relu(x)
+        x = F.max_pool2d(x, 2)
+        x = self.dropout1(x)
+        x = torch.flatten(x, 1)
+        x = self.fc1(x)
+        x = F.relu(x)
+        x = self.dropout2(x)
+        x = self.fc2(x)
+        output = F.log_softmax(x, dim=1)
+        return output
+
+
+class TritonPythonModel:
+    def initialize(self, args):
+        torch.manual_seed(0)
+        self.model = Net()
+        self.model.eval()
+
+    def execute(self, requests):
+        responses = []
+        for request in requests:
+            input_tensor = pb_utils.get_input_tensor_by_name(request, "IN")
+            # This tensor is read-only, we need to make a copy
+            input_data_ro = input_tensor.as_numpy()
+            input_data = np.array(input_data_ro)
+            result = self.model(torch.tensor(input_data))
+
+            out_tensor = pb_utils.Tensor("OUT", result.detach().numpy())
+            responses.append(pb_utils.InferenceResponse([out_tensor]))
+        return responses
diff --git a/qa/python_models/request_rescheduling_addsub/config.pbtxt b/qa/python_models/request_rescheduling_addsub/config.pbtxt
new file mode 100644
index 0000000000..7667bfb3c0
--- /dev/null
+++ b/qa/python_models/request_rescheduling_addsub/config.pbtxt
@@ -0,0 +1,61 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "request_rescheduling_addsub"
+backend: "python"
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+input [
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+sequence_batching {
+  iterative_sequence : true
+}
+instance_group [{ kind: KIND_CPU }]
diff --git a/qa/python_models/request_rescheduling_addsub/model.py b/qa/python_models/request_rescheduling_addsub/model.py
new file mode 100644
index 0000000000..fb7b0ac9c7
--- /dev/null
+++ b/qa/python_models/request_rescheduling_addsub/model.py
@@ -0,0 +1,82 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    def initialize(self, args):
+        self.model_config = model_config = json.loads(args["model_config"])
+
+        output0_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT0")
+        output1_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT1")
+
+        self.output0_dtype = pb_utils.triton_string_to_numpy(
+            output0_config["data_type"]
+        )
+        self.output1_dtype = pb_utils.triton_string_to_numpy(
+            output1_config["data_type"]
+        )
+
+        self.idx = 0
+
+    def execute(self, requests):
+        """This function is called on inference request."""
+
+        output0_dtype = self.output0_dtype
+        output1_dtype = self.output1_dtype
+
+        responses = []
+
+        for request in requests:
+            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")
+
+            out_0, out_1 = (
+                in_0.as_numpy() + in_1.as_numpy(),
+                in_0.as_numpy() - in_1.as_numpy(),
+            )
+
+            out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0.astype(output0_dtype))
+            out_tensor_1 = pb_utils.Tensor("OUTPUT1", out_1.astype(output1_dtype))
+
+            inference_response = pb_utils.InferenceResponse(
+                output_tensors=[out_tensor_0, out_tensor_1]
+            )
+
+            # Explicitly reschedule the first request
+            if self.idx == 0:
+                request.set_release_flags(
+                    pb_utils.TRITONSERVER_REQUEST_RELEASE_RESCHEDULE
+                )
+                responses.append(None)
+                self.idx += 1
+            else:
+                responses.append(inference_response)
+
+        return responses
diff --git a/qa/python_models/response_sender_error/config.pbtxt b/qa/python_models/response_sender_error/config.pbtxt
new file mode 100644
index 0000000000..168839c241
--- /dev/null
+++ b/qa/python_models/response_sender_error/config.pbtxt
@@ -0,0 +1,59 @@
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "response_sender_error"
+backend: "python"
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+input [
+  {
+    name: "INPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT1"
+    data_type: TYPE_FP32
+    dims: [ 16 ]
+  }
+]
+
+instance_group [{ kind: KIND_CPU }]
diff --git a/qa/python_models/response_sender_error/model.py b/qa/python_models/response_sender_error/model.py
new file mode 100644
index 0000000000..4f1e0e5e85
--- /dev/null
+++ b/qa/python_models/response_sender_error/model.py
@@ -0,0 +1,75 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    """This model tries to create a response sender in
+    a model that is not configured with decoupled
+    model transaction policy.
+    """
+
+    def initialize(self, args):
+        self.model_config = model_config = json.loads(args["model_config"])
+
+        output0_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT0")
+        output1_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT1")
+
+        self.output0_dtype = pb_utils.triton_string_to_numpy(
+            output0_config["data_type"]
+        )
+        self.output1_dtype = pb_utils.triton_string_to_numpy(
+            output1_config["data_type"]
+        )
+
+    def execute(self, requests):
+        """Tries to create a response sender object and use that
+        for sending the response.
+        """
+
+        output0_dtype = self.output0_dtype
+        output1_dtype = self.output1_dtype
+
+        for request in requests:
+            response_sender = request.get_response_sender()
+            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")
+            out_0, out_1 = (
+                in_0.as_numpy() + in_1.as_numpy(),
+                in_0.as_numpy() - in_1.as_numpy(),
+            )
+
+            out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0.astype(output0_dtype))
+            out_tensor_1 = pb_utils.Tensor("OUTPUT1", out_1.astype(output1_dtype))
+            response_sender.send(
+                pb_utils.InferenceResponse([out_tensor_0, out_tensor_1])
+            )
+            response_sender.close()
+
+        return None
diff --git a/qa/python_models/sequence_int32/config.pbtxt b/qa/python_models/sequence_int32/config.pbtxt
new file mode 100644
index 0000000000..fb9236b347
--- /dev/null
+++ b/qa/python_models/sequence_int32/config.pbtxt
@@ -0,0 +1,80 @@
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "python_nobatch_sequence_int32"
+backend: "python"
+max_batch_size: 0
+version_policy: { latest { num_versions: 1 }}
+
+
+instance_group [
+  {
+    kind: KIND_GPU
+count: 4
+  }
+]
+
+
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+
+
+  }
+]
+sequence_batching {
+  max_sequence_idle_microseconds: 5000000
+  control_input [
+    {
+      name: "START"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_START
+          int32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "READY"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_READY
+          int32_false_true: [ 0, 1 ]
+        }
+      ]
+    }
+  ]
+}
diff --git a/qa/python_models/sequence_int32/model.py b/qa/python_models/sequence_int32/model.py
new file mode 100644
index 0000000000..445cb5b13e
--- /dev/null
+++ b/qa/python_models/sequence_int32/model.py
@@ -0,0 +1,92 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    def initialize(self, args):
+        self.model_config = model_config = json.loads(args["model_config"])
+
+        output_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT")
+
+        self.output_dtype = pb_utils.triton_string_to_numpy(output_config["data_type"])
+
+        self.accumulator = np.zeros(1)
+        self.max_batch_size = model_config["max_batch_size"]
+
+    def execute(self, requests):
+        """
+        This function is called on inference request.
+        It is derived from "create_tf_modelfile" in
+        common/gen_qa_sequence_models.py and maintains
+        a true accumulator when the max batch size is 0
+
+        """
+        output_dtype = self.output_dtype
+
+        responses = []
+        for request in requests:
+            input_tensor = (
+                pb_utils.get_input_tensor_by_name(request, "INPUT")
+                .as_numpy()
+                .astype(np.int32)
+            )
+            start_tensor = (
+                pb_utils.get_input_tensor_by_name(request, "START")
+                .as_numpy()
+                .astype(np.int32)
+            )
+            ready_tensor = (
+                pb_utils.get_input_tensor_by_name(request, "READY")
+                .as_numpy()
+                .astype(np.int32)
+            )
+
+            if self.max_batch_size == 0:
+                tmp = np.where(
+                    np.equal(start_tensor, 1),
+                    input_tensor,
+                    np.add(self.accumulator, input_tensor),
+                )
+                newacc = np.where(np.equal(ready_tensor, 1), tmp, self.accumulator)
+                self.accumulator = newacc
+                out_tensor = pb_utils.Tensor(
+                    "OUTPUT", self.accumulator.astype(output_dtype)
+                )
+            else:
+                tmp = np.where(
+                    np.equal(ready_tensor, 1),
+                    np.add(start_tensor, input_tensor),
+                    np.zeros(np.shape(input_tensor), dtype=output_dtype),
+                )
+                out_tensor = pb_utils.Tensor("OUTPUT", tmp.astype(output_dtype))
+
+            responses.append(pb_utils.InferenceResponse([out_tensor]))
+        return responses
diff --git a/qa/python_models/sequence_py/config.pbtxt b/qa/python_models/sequence_py/config.pbtxt
new file mode 100644
index 0000000000..b58796058d
--- /dev/null
+++ b/qa/python_models/sequence_py/config.pbtxt
@@ -0,0 +1,53 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+backend: "python"
+max_batch_size: 4
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+
+sequence_batching {
+  oldest {
+    max_candidate_sequences: 4
+    max_queue_delay_microseconds: 1000000
+    preserve_ordering: False
+  }
+  max_sequence_idle_microseconds: 10000000
+}
diff --git a/qa/python_models/sequence_py/model.py b/qa/python_models/sequence_py/model.py
new file mode 100644
index 0000000000..b375af3e30
--- /dev/null
+++ b/qa/python_models/sequence_py/model.py
@@ -0,0 +1,93 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    def initialize(self, args):
+        self.model_config = json.loads(args["model_config"])
+        self.sequences = {}
+        self.decoupled = self.model_config.get("model_transaction_policy", {}).get(
+            "decoupled"
+        )
+
+    def get_next_sequence_output_tensor(self, request):
+        sid = request.correlation_id()
+        flags = request.flags()
+        if flags == pb_utils.TRITONSERVER_REQUEST_FLAG_SEQUENCE_START:
+            if sid in self.sequences:
+                raise pb_utils.TritonModelException(
+                    "Can't start a new sequence with existing ID"
+                )
+            self.sequences[sid] = [1]
+        else:
+            if sid not in self.sequences:
+                raise pb_utils.TritonModelException(
+                    "Need START flag for a sequence ID that doesn't already exist."
+                )
+
+            last = self.sequences[sid][-1]
+            self.sequences[sid].append(last + 1)
+
+        output = self.sequences[sid][-1]
+        output = np.array([output])
+        out_tensor = pb_utils.Tensor("OUTPUT0", output.astype(np.int32))
+        return out_tensor
+
+    def execute(self, requests):
+        if self.decoupled:
+            return self.execute_decoupled(requests)
+        else:
+            return self.execute_non_decoupled(requests)
+
+    def execute_non_decoupled(self, requests):
+        responses = []
+        for request in requests:
+            output_tensor = self.get_next_sequence_output_tensor(request)
+            response = pb_utils.InferenceResponse([output_tensor])
+            responses.append(response)
+        return responses
+
+    def execute_decoupled(self, requests):
+        for request in requests:
+            sender = request.get_response_sender()
+            output_tensor = self.get_next_sequence_output_tensor(request)
+
+            # Send 3 responses per request
+            for _ in range(3):
+                response = pb_utils.InferenceResponse([output_tensor])
+                sender.send(response)
+
+            sender.send(flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL)
+
+        return None
+
+    def finalize(self):
+        print(f"Cleaning up. Final sequences stored: {self.sequences}")
diff --git a/qa/python_models/simple_identity_fp32/config.pbtxt b/qa/python_models/simple_identity_fp32/config.pbtxt
new file mode 100644
index 0000000000..cc5931ad63
--- /dev/null
+++ b/qa/python_models/simple_identity_fp32/config.pbtxt
@@ -0,0 +1,62 @@
+# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "simple_identity_fp32"
+platform: "ensemble"
+max_batch_size: 64
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+ensemble_scheduling {
+  step [
+    {
+      model_name: "identity_fp32"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT0"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "OUTPUT0"
+      }
+    }
+  ]
+}
diff --git a/qa/python_models/string/config.pbtxt b/qa/python_models/string/config.pbtxt
new file mode 100644
index 0000000000..279c6b5097
--- /dev/null
+++ b/qa/python_models/string/config.pbtxt
@@ -0,0 +1,46 @@
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "string"
+backend: "python"
+max_batch_size: 0
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_STRING
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_STRING
+    dims: [ 1 ]
+  }
+]
+
+instance_group [{ kind: KIND_CPU }]
diff --git a/qa/python_models/string/model.py b/qa/python_models/string/model.py
new file mode 100644
index 0000000000..5e419d965a
--- /dev/null
+++ b/qa/python_models/string/model.py
@@ -0,0 +1,49 @@
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    """This model loops through different dtypes to make sure that
+    serialize_byte_tensor works correctly in the Python backend.
+    """
+
+    def initialize(self, args):
+        self._index = 0
+        self._dtypes = [np.bytes_, np.object_]
+
+    def execute(self, requests):
+        responses = []
+        for request in requests:
+            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+            out_tensor_0 = pb_utils.Tensor(
+                "OUTPUT0", in_0.as_numpy().astype(self._dtypes[self._index])
+            )
+            self._index += 1
+            responses.append(pb_utils.InferenceResponse([out_tensor_0]))
+        return responses
diff --git a/qa/python_models/string_fixed/config.pbtxt b/qa/python_models/string_fixed/config.pbtxt
new file mode 100644
index 0000000000..c08783f5dc
--- /dev/null
+++ b/qa/python_models/string_fixed/config.pbtxt
@@ -0,0 +1,46 @@
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "string_fixed"
+backend: "python"
+max_batch_size: 0
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_STRING
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_STRING
+    dims: [ -1 ]
+  }
+]
+
+instance_group [{ kind: KIND_CPU }]
diff --git a/qa/python_models/string_fixed/model.py b/qa/python_models/string_fixed/model.py
new file mode 100644
index 0000000000..d6e23eccb8
--- /dev/null
+++ b/qa/python_models/string_fixed/model.py
@@ -0,0 +1,63 @@
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    """
+    This model returns a constant string on every inference request.
+    """
+
+    def initialize(self, args):
+        self._index = 0
+        self._dtypes = [np.bytes_, np.object_]
+
+    def execute(self, requests):
+        # Create four different responses (empty string or fixed string) * (two
+        # datatypes)
+        responses = []
+        for _ in requests:
+            if self._index == 0:
+                out_tensor_0 = pb_utils.Tensor(
+                    "OUTPUT0", np.array(["123456"], dtype=self._dtypes[0])
+                )
+            elif self._index == 1:
+                out_tensor_0 = pb_utils.Tensor(
+                    "OUTPUT0", np.array([], dtype=self._dtypes[1])
+                )
+            elif self._index == 2:
+                out_tensor_0 = pb_utils.Tensor(
+                    "OUTPUT0", np.array(["123456"], dtype=self._dtypes[0])
+                )
+            elif self._index == 3:
+                out_tensor_0 = pb_utils.Tensor(
+                    "OUTPUT0", np.array([], dtype=self._dtypes[1])
+                )
+            self._index += 1
+            responses.append(pb_utils.InferenceResponse([out_tensor_0]))
+        return responses
diff --git a/qa/python_models/string_identity/config.pbtxt b/qa/python_models/string_identity/config.pbtxt
new file mode 100644
index 0000000000..55ea21d3d4
--- /dev/null
+++ b/qa/python_models/string_identity/config.pbtxt
@@ -0,0 +1,46 @@
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "string_identity"
+backend: "python"
+max_batch_size: 0
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_STRING
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_STRING
+    dims: [ 1 ]
+  }
+]
+
+instance_group [{ kind: KIND_CPU }]
diff --git a/qa/python_models/string_identity/model.py b/qa/python_models/string_identity/model.py
new file mode 100644
index 0000000000..0288b129bc
--- /dev/null
+++ b/qa/python_models/string_identity/model.py
@@ -0,0 +1,48 @@
+# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+import sys
+
+sys.path.append("../../")
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    """This model always returns the input that it has received."""
+
+    def initialize(self, args):
+        self.model_config = json.loads(args["model_config"])
+
+    def execute(self, requests):
+        """This function is called on inference request."""
+
+        responses = []
+        for request in requests:
+            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+            out_tensor_0 = pb_utils.Tensor("OUTPUT0", in_0.as_numpy())
+            responses.append(pb_utils.InferenceResponse([out_tensor_0]))
+        return responses
diff --git a/qa/python_models/sub_add/model.py b/qa/python_models/sub_add/model.py
new file mode 100644
index 0000000000..8ac679c86f
--- /dev/null
+++ b/qa/python_models/sub_add/model.py
@@ -0,0 +1,78 @@
+# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+import sys
+
+import numpy as np
+
+sys.path.append("../../")
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    def initialize(self, args):
+        self.model_config = model_config = json.loads(args["model_config"])
+
+        output0_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT0")
+        output1_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT1")
+
+        self.output0_dtype = pb_utils.triton_string_to_numpy(
+            output0_config["data_type"]
+        )
+        self.output1_dtype = pb_utils.triton_string_to_numpy(
+            output1_config["data_type"]
+        )
+
+    def execute(self, requests):
+        """This function is called on inference request."""
+
+        output0_dtype = self.output0_dtype
+        output1_dtype = self.output1_dtype
+
+        responses = []
+        for request in requests:
+            input_tensors = request.inputs()
+            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")
+            if (
+                in_0.as_numpy().dtype.type is np.bytes_
+                or in_0.as_numpy().dtype == np.object_
+            ):
+                out_0, out_1 = (
+                    in_0.as_numpy().astype(np.int32) - in_1.as_numpy().astype(np.int32),
+                    in_0.as_numpy().astype(np.int32) + in_1.as_numpy().astype(np.int32),
+                )
+            else:
+                out_0, out_1 = (
+                    in_0.as_numpy() - in_1.as_numpy(),
+                    in_0.as_numpy() + in_1.as_numpy(),
+                )
+
+            out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0.astype(output0_dtype))
+            out_tensor_1 = pb_utils.Tensor("OUTPUT1", out_1.astype(output1_dtype))
+            responses.append(pb_utils.InferenceResponse([out_tensor_0, out_tensor_1]))
+        return responses
diff --git a/qa/python_models/torchvision/resnet50/config.pbtxt b/qa/python_models/torchvision/resnet50/config.pbtxt
new file mode 100644
index 0000000000..fdbc7c7de9
--- /dev/null
+++ b/qa/python_models/torchvision/resnet50/config.pbtxt
@@ -0,0 +1,40 @@
+# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "resnet50_python"
+backend: "python"
+max_batch_size: 128
+input {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    format: FORMAT_NCHW
+    dims: [ 3, 224, 224 ]
+  }
+output {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 1000 ]
+  }
diff --git a/qa/python_models/torchvision/resnet50/model.py b/qa/python_models/torchvision/resnet50/model.py
new file mode 100644
index 0000000000..1e2dbbf7a1
--- /dev/null
+++ b/qa/python_models/torchvision/resnet50/model.py
@@ -0,0 +1,62 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import torch
+import triton_python_backend_utils as pb_utils
+from torch.utils.dlpack import to_dlpack
+
+
+class TritonPythonModel:
+    def initialize(self, args):
+        """
+        This function initializes pre-trained ResNet50 model.
+        """
+        self.device = "cuda" if args["model_instance_kind"] == "GPU" else "cpu"
+        # Our tests currently depend on torchvision=0.14,
+        # to make sure `torch.hub` loads Resnet50 implementation
+        # compatible with torchvision=0.14, we need to provide tag
+        self.model = (
+            torch.hub.load(
+                "pytorch/vision:v0.14.1", "resnet50", weights="IMAGENET1K_V2"
+            )
+            .to(self.device)
+            .eval()
+        )
+
+    def execute(self, requests):
+        """
+        This function receives a list of requests (`pb_utils.InferenceRequest`),
+        performs inference on every request and appends it to responses.
+        """
+        responses = []
+        for request in requests:
+            input_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+            result = self.model(
+                torch.as_tensor(input_tensor.as_numpy(), device=self.device)
+            )
+            out_tensor = pb_utils.Tensor.from_dlpack("OUTPUT0", to_dlpack(result))
+            responses.append(pb_utils.InferenceResponse([out_tensor]))
+        return responses
diff --git a/qa/python_models/variable_gpu_output/config.pbtxt b/qa/python_models/variable_gpu_output/config.pbtxt
new file mode 100644
index 0000000000..8fe69444f7
--- /dev/null
+++ b/qa/python_models/variable_gpu_output/config.pbtxt
@@ -0,0 +1,55 @@
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "variable_gpu_output"
+backend: "python"
+max_batch_size: 256
+
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+dynamic_batching {
+  max_queue_delay_microseconds: 1000000
+}
+
+instance_group [
+  {
+    count: 1
+    kind: KIND_GPU
+  }
+]
diff --git a/qa/python_models/variable_gpu_output/model.py b/qa/python_models/variable_gpu_output/model.py
new file mode 100644
index 0000000000..2da2a3cbd2
--- /dev/null
+++ b/qa/python_models/variable_gpu_output/model.py
@@ -0,0 +1,46 @@
+# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import torch
+import triton_python_backend_utils as pb_utils
+from torch.utils.dlpack import to_dlpack
+
+
+class TritonPythonModel:
+    def execute(self, requests):
+        # The client will send 5 requests
+        assert len(requests) == 5
+        responses = []
+        for i, request in enumerate(requests):
+            # Create an (i+1)-element array with all the tensors equal to (i+1)
+            output = torch.ones(i + 1, dtype=torch.float32, device="cuda")
+            output = output * (i + 1)
+            output_pb_tensor = pb_utils.Tensor.from_dlpack("OUTPUT", to_dlpack(output))
+            inference_response = pb_utils.InferenceResponse(
+                output_tensors=[output_pb_tensor]
+            )
+            responses.append(inference_response)
+        return responses
diff --git a/qa/python_models/wrong_model/config.pbtxt b/qa/python_models/wrong_model/config.pbtxt
new file mode 100644
index 0000000000..a9d05275a8
--- /dev/null
+++ b/qa/python_models/wrong_model/config.pbtxt
@@ -0,0 +1,52 @@
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "identity_fp32"
+backend: "python"
+max_batch_size: 64
+
+input [
+  {
+    name: "IN"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+output [
+  {
+    name: "OUT"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+instance_group [
+  {
+    count: 1
+    kind : KIND_CPU
+  }
+]
diff --git a/qa/python_models/wrong_model/model.py b/qa/python_models/wrong_model/model.py
new file mode 100644
index 0000000000..2cac72324f
--- /dev/null
+++ b/qa/python_models/wrong_model/model.py
@@ -0,0 +1,42 @@
+# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    def execute(self, requests):
+        """
+        This model ensures that errors in the execute function are properly
+        handles.
+        """
+        responses = []
+        for request in requests:
+            input_tensor = pb_utils.get_input_tensor_by_name(request, "IN")
+            out_tensor = pb_utils.Tensor("OUT", input_tensor.as_numpy())
+            lorem_ipsum
+            responses.append(pb_utils.InferenceResponse([out_tensor]))
+        return responses
diff --git a/qa/python_models/wrong_return_type/config.pbtxt b/qa/python_models/wrong_return_type/config.pbtxt
new file mode 100644
index 0000000000..e34905e635
--- /dev/null
+++ b/qa/python_models/wrong_return_type/config.pbtxt
@@ -0,0 +1,49 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "wrong_return_type"
+backend: "python"
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 4 ]
+  }
+]
+
+sequence_batching {
+  iterative_sequence : true
+}
+
+instance_group [{ kind: KIND_CPU }]
diff --git a/qa/python_models/wrong_return_type/model.py b/qa/python_models/wrong_return_type/model.py
new file mode 100644
index 0000000000..c5e6f660fc
--- /dev/null
+++ b/qa/python_models/wrong_return_type/model.py
@@ -0,0 +1,67 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    def initialize(self, args):
+        self.model_config = model_config = json.loads(args["model_config"])
+
+        output0_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT0")
+
+        self.output0_dtype = pb_utils.triton_string_to_numpy(
+            output0_config["data_type"]
+        )
+
+    def execute(self, requests):
+        output0_dtype = self.output0_dtype
+
+        responses = []
+
+        for request in requests:
+            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+
+            out_0 = in_0.as_numpy()
+
+            # Create output tensors. You need pb_utils.Tensor
+            # objects to create pb_utils.InferenceResponse.
+            out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0.astype(output0_dtype))
+
+            inference_response = pb_utils.InferenceResponse(
+                output_tensors=[out_tensor_0]
+            )
+
+            request.set_release_flags(pb_utils.TRITONSERVER_REQUEST_RELEASE_RESCHEDULE)
+            # Should append `None` for rescheduled requests.
+            responses.append(inference_response)
+
+        return responses
+
+    def finalize(self):
+        pass
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
new file mode 100644
index 0000000000..783275d8d7
--- /dev/null
+++ b/src/CMakeLists.txt
@@ -0,0 +1,779 @@
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+cmake_minimum_required (VERSION 3.18)
+
+project(tritonserverexe LANGUAGES C CXX)
+
+include(GNUInstallDirs)
+
+#
+# Dependencies
+#
+# We must include the transitive closure of all repos so that we can
+# override the tag. The backend repo is needed for the tests.
+#
+include(FetchContent)
+
+FetchContent_Declare(
+  repo-common
+  GIT_REPOSITORY ${TRITON_REPO_ORGANIZATION}/common.git
+  GIT_TAG ${TRITON_COMMON_REPO_TAG}
+)
+FetchContent_Declare(
+  repo-core
+  GIT_REPOSITORY ${TRITON_REPO_ORGANIZATION}/core.git
+  GIT_TAG ${TRITON_CORE_REPO_TAG}
+)
+FetchContent_Declare(
+  repo-backend
+  GIT_REPOSITORY ${TRITON_REPO_ORGANIZATION}/backend.git
+  GIT_TAG ${TRITON_BACKEND_REPO_TAG}
+)
+
+if(TRITON_ENABLE_GRPC)
+  set(TRITON_COMMON_ENABLE_PROTOBUF ON)
+  set(TRITON_COMMON_ENABLE_GRPC ON)
+endif() # TRITON_ENABLE_GRPC
+
+FetchContent_MakeAvailable(repo-common repo-core repo-backend)
+
+# CUDA
+#
+if(${TRITON_ENABLE_GPU})
+  find_package(CUDAToolkit REQUIRED)
+  message(STATUS "Using CUDA ${CUDA_VERSION}")
+endif() # TRITON_ENABLE_GPU
+
+# libevent
+#
+if(${TRITON_ENABLE_HTTP} OR ${TRITON_ENABLE_METRICS} OR
+    ${TRITON_ENABLE_SAGEMAKER} OR ${TRITON_ENABLE_VERTEX_AI})
+  find_package(Libevent CONFIG REQUIRED)
+  message(STATUS "Using libevent ${Libevent_VERSION}")
+endif()
+
+# OpenTelemetry
+#
+if (NOT WIN32 AND ${TRITON_ENABLE_TRACING})
+    find_package(absl CONFIG REQUIRED)
+    find_package(CURL CONFIG REQUIRED)
+    find_package(nlohmann_json CONFIG REQUIRED)
+    find_package(opentelemetry-cpp CONFIG REQUIRED)
+    message(STATUS "Using opentelemetry-cpp ${opentelemetry-cpp_VERSION}")
+endif()
+
+# re2
+#
+find_package(re2 REQUIRED)
+
+#
+# tritonserver executable
+#
+add_executable(
+  main
+  classification.cc
+  command_line_parser.cc
+  common.cc
+  main.cc
+  shared_memory_manager.cc
+  triton_signal.cc
+  classification.h
+  common.h
+  shared_memory_manager.h
+  triton_signal.h
+)
+
+# On windows a *.lib file can be generated for a exe. When creating
+# tritonserver.exe if we try to create tritonserver.lib it will fail
+# because there is already a trtionserver.lib for tritonserver.dll,
+# this causes the build to fail. To avoid we keep the build name as
+# main.exe and then for windows after installing we rename it to
+# tritonserver.exe (below in the install steps).
+if (NOT WIN32)
+  set_property(TARGET main PROPERTY OUTPUT_NAME tritonserver)
+endif()
+
+target_compile_features(main PRIVATE cxx_std_${TRITON_MIN_CXX_STANDARD})
+if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
+  message("Using MSVC as compiler, default target on Windows 10. "
+    "If the target system is not Windows 10, please update _WIN32_WINNT "
+    "to corresponding value.")
+  target_compile_options(
+    main
+    PRIVATE
+      /W1 /D_WIN32_WINNT=0x0A00 /EHsc /Zc:preprocessor
+  )
+  target_compile_definitions(main
+    PRIVATE
+      NOMINMAX)
+else()
+  target_compile_options(
+    main
+    PRIVATE
+      -Wall -Wextra -Wno-unused-parameter -Wno-deprecated-declarations -Werror
+  )
+endif()
+
+set_target_properties(
+  main
+  PROPERTIES
+    POSITION_INDEPENDENT_CODE ON
+    SKIP_BUILD_RPATH TRUE
+    BUILD_WITH_INSTALL_RPATH TRUE
+    INSTALL_RPATH_USE_LINK_PATH FALSE
+    INSTALL_RPATH "$\{ORIGIN\}/../lib"
+)
+
+target_link_libraries(
+  main
+  PRIVATE
+    triton-common-async-work-queue  # from repo-common
+    triton-common-error             # from repo-common
+    triton-common-logging           # from repo-common
+    triton-core-serverapi           # from repo-core
+    triton-core-serverstub          # from repo-core
+)
+
+if(${TRITON_ENABLE_ASAN})
+  set(CMAKE_BUILD_TYPE Debug)
+  target_compile_definitions(
+    main
+    PRIVATE TRITON_ENABLE_ASAN=1
+  )
+  set(_ASAN_FLAGS "-static-libstdc++ -static-libasan -fno-omit-frame-pointer -fsanitize=address")
+  set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${_ASAN_FLAGS}")
+  set(CMAKE_LINKER_FLAGS_DEBUG "${CMAKE_LINKER_FLAGS_DEBUG} ${_ASAN_FLAGS}")
+endif() # TRITON_ENABLE_ASAN
+
+if(${TRITON_ENABLE_GPU})
+  target_compile_definitions(
+    main
+    PRIVATE TRITON_ENABLE_GPU=1
+    PRIVATE TRITON_MIN_COMPUTE_CAPABILITY=${TRITON_MIN_COMPUTE_CAPABILITY}
+  )
+
+  target_link_libraries(
+    main
+    PRIVATE
+      CUDA::cudart
+  )
+endif() # TRITON_ENABLE_GPU
+
+if(${TRITON_ENABLE_HTTP} OR ${TRITON_ENABLE_METRICS} OR
+    ${TRITON_ENABLE_SAGEMAKER} OR ${TRITON_ENABLE_VERTEX_AI})
+  target_include_directories(
+    main
+    PRIVATE
+      ${LIBEVENT_INCLUDE_DIRS}
+  )
+endif()
+
+
+if(${TRITON_ENABLE_HTTP})
+  target_compile_definitions(
+    main
+    PRIVATE TRITON_ENABLE_HTTP=1
+  )
+endif() # TRITON_ENABLE_HTTP
+
+if(${TRITON_ENABLE_SAGEMAKER})
+  target_compile_definitions(
+    main
+    PRIVATE TRITON_ENABLE_SAGEMAKER=1
+  )
+endif() # TRITON_ENABLE_SAGEMAKER
+
+if(${TRITON_ENABLE_VERTEX_AI})
+  target_compile_definitions(
+    main
+    PRIVATE TRITON_ENABLE_VERTEX_AI=1
+  )
+endif() # TRITON_ENABLE_VERTEX_AI
+
+if(${TRITON_ENABLE_LOGGING})
+  target_compile_definitions(
+    main
+    PRIVATE TRITON_ENABLE_LOGGING=1
+  )
+endif() # TRITON_ENABLE_LOGGING
+
+if(${TRITON_ENABLE_METRICS})
+  target_compile_definitions(
+    main
+    PRIVATE TRITON_ENABLE_METRICS=1
+  )
+endif() # TRITON_ENABLE_METRICS
+
+if(${TRITON_ENABLE_STATS})
+  target_compile_definitions(
+    main
+    PRIVATE TRITON_ENABLE_STATS=1
+  )
+endif() # TRITON_ENABLE_STATS
+
+if(${TRITON_ENABLE_TRACING})
+  target_compile_definitions(
+    main
+    PRIVATE TRITON_ENABLE_TRACING=1
+  )
+# FIXME: remove, when Windows support is added for Opentelemetry
+  if (NOT WIN32)
+    target_include_directories(
+      main
+      PRIVATE
+        ${OPENTELEMETRY_CPP_INCLUDE_DIRS}
+    )
+  endif()
+endif() # TRITON_ENABLE_TRACING
+
+if(${TRITON_ENABLE_NVTX})
+  target_compile_definitions(
+    main
+    PRIVATE TRITON_ENABLE_NVTX=1
+  )
+endif() # TRITON_ENABLE_NVTX
+
+if (NOT WIN32)
+  target_link_libraries(
+    main
+    PRIVATE
+      rt
+      dl
+  )
+endif() # NOT WIN32
+
+if (NOT WIN32)
+  install(
+    TARGETS main
+    RUNTIME DESTINATION bin
+  )
+else()
+  # See explanation above as to why we need to rename main.exe to
+  # tritonserver.exe as part of the install process on windows.
+  install(
+    PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/main.exe
+    DESTINATION bin
+    RENAME tritonserver.exe
+  )
+endif()
+
+if(${TRITON_ENABLE_GRPC})
+  #
+  # GRPC
+  #
+  find_package(gRPC CONFIG REQUIRED)
+  message(STATUS "Using gRPC ${gRPC_VERSION}")
+
+  add_subdirectory(grpc)
+  target_link_libraries(
+      main
+      PRIVATE
+        grpc-endpoint-library
+  )
+
+  target_include_directories(
+    main
+    PRIVATE
+      $<TARGET_PROPERTY:gRPC::grpc,INTERFACE_INCLUDE_DIRECTORIES>
+  )
+
+  target_compile_definitions(
+    main
+    PRIVATE TRITON_ENABLE_GRPC=1
+  )
+endif()
+
+# http endpoint
+#
+if(${TRITON_ENABLE_HTTP}
+      OR ${TRITON_ENABLE_METRICS}
+      OR ${TRITON_ENABLE_SAGEMAKER}
+      OR ${TRITON_ENABLE_VERTEX_AI})
+  find_package(libevhtp CONFIG REQUIRED)
+  message(STATUS "Using libevhtp ${libevhtp_VERSION}")
+
+  list(APPEND
+    HTTP_ENDPOINT_SRCS
+    http_server.cc
+  )
+  list(APPEND
+    HTTP_ENDPOINT_HDRS
+    http_server.h
+  )
+
+  # Add header / src files based on HTTP related endpoint requested
+  if(${TRITON_ENABLE_SAGEMAKER})
+    list(APPEND
+      HTTP_ENDPOINT_SRCS
+      sagemaker_server.cc
+    )
+    list(APPEND
+      HTTP_ENDPOINT_HDRS
+      sagemaker_server.h
+    )
+  endif() # TRITON_ENABLE_SAGEMAKER
+
+  if(${TRITON_ENABLE_VERTEX_AI})
+    list(APPEND
+      HTTP_ENDPOINT_SRCS
+      vertex_ai_server.cc
+    )
+    list(APPEND
+      HTTP_ENDPOINT_HDRS
+      vertex_ai_server.h
+    )
+  endif() # TRITON_ENABLE_VERTEX_AI
+
+  add_library(
+    http-endpoint-library EXCLUDE_FROM_ALL
+    ${HTTP_ENDPOINT_SRCS} ${HTTP_ENDPOINT_HDRS}
+  )
+
+  target_compile_features(http-endpoint-library PRIVATE cxx_std_${TRITON_MIN_CXX_STANDARD})
+  if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
+    target_compile_options(
+      http-endpoint-library
+      PRIVATE
+        /W1 /D_WIN32_WINNT=0x0A00 /EHsc /Zc:preprocessor
+    )
+  else()
+    target_compile_options(
+      http-endpoint-library
+      PRIVATE
+        -Wall -Wextra -Wno-unused-parameter -Wno-deprecated-declarations -Werror
+    )
+  endif()
+
+  set_target_properties(
+    http-endpoint-library
+    PROPERTIES
+      POSITION_INDEPENDENT_CODE ON
+  )
+
+  target_link_libraries(
+    http-endpoint-library
+    PUBLIC
+      triton-common-json      # from repo-common
+      triton-common-logging   # from repo-common
+      triton-core-serverapi   # from repo-core
+      triton-core-serverstub  # from repo-core
+      ${LIBEVENT_LIBRARIES}
+      libevhtp::evhtp
+      re2::re2
+  )
+
+  target_include_directories(
+    http-endpoint-library
+    PRIVATE $<TARGET_PROPERTY:libevhtp::evhtp,INTERFACE_INCLUDE_DIRECTORIES>
+  )
+
+  # FIXME when Triton support of Opentelemetry is available on Windows
+  # add ${OPENTELEMETRY_CPP_INCLUDE_DIRS} to above target_include_directories
+  # JIRA DLIS-4786
+  if (NOT WIN32 AND ${TRITON_ENABLE_TRACING})
+    target_include_directories(
+      http-endpoint-library
+      PRIVATE ${OPENTELEMETRY_CPP_INCLUDE_DIRS}
+    )
+  endif()
+
+  if(${TRITON_ENABLE_GPU})
+    target_compile_definitions(
+      http-endpoint-library
+      PRIVATE TRITON_ENABLE_GPU=1
+      PRIVATE TRITON_MIN_COMPUTE_CAPABILITY=${TRITON_MIN_COMPUTE_CAPABILITY}
+    )
+
+    target_link_libraries(
+      http-endpoint-library
+      PUBLIC
+        CUDA::cudart
+    )
+  endif() # TRITON_ENABLE_GPU
+
+  if(${TRITON_ENABLE_HTTP})
+    target_compile_definitions(
+      http-endpoint-library
+      PRIVATE TRITON_ENABLE_HTTP=1
+    )
+  endif() # TRITON_ENABLE_HTTP
+
+  if(${TRITON_ENABLE_SAGEMAKER})
+    target_compile_definitions(
+      http-endpoint-library
+      PRIVATE TRITON_ENABLE_SAGEMAKER=1
+    )
+  endif() # TRITON_ENABLE_SAGEMAKER
+
+  if(${TRITON_ENABLE_VERTEX_AI})
+    target_compile_definitions(
+      http-endpoint-library
+      PRIVATE TRITON_ENABLE_VERTEX_AI=1
+    )
+  endif() # TRITON_ENABLE_VERTEX_AI
+
+  if(${TRITON_ENABLE_METRICS})
+    target_compile_definitions(
+      http-endpoint-library
+      PRIVATE TRITON_ENABLE_METRICS=1
+    )
+  endif() # TRITON_ENABLE_METRICS
+
+  if(${TRITON_ENABLE_LOGGING})
+    target_compile_definitions(
+      http-endpoint-library
+      PRIVATE TRITON_ENABLE_LOGGING=1
+    )
+  endif() # TRITON_ENABLE_LOGGING
+
+  if(${TRITON_ENABLE_STATS})
+    target_compile_definitions(
+      http-endpoint-library
+      PRIVATE TRITON_ENABLE_STATS=1
+    )
+  endif() # TRITON_ENABLE_STATS
+
+  if(${TRITON_ENABLE_TRACING})
+    target_compile_definitions(
+      http-endpoint-library
+      PRIVATE TRITON_ENABLE_TRACING=1
+    )
+  endif() # TRITON_ENABLE_TRACING
+
+  if(${TRITON_ENABLE_NVTX})
+    target_compile_definitions(
+      http-endpoint-library
+      PRIVATE TRITON_ENABLE_NVTX=1
+    )
+  endif() # TRITON_ENABLE_NVTX
+
+  if (WIN32)
+    find_library(B64_LIBRARY NAMES b64)
+    find_library(ZLIB_LIBRARY NAMES zlib)
+    target_link_libraries(
+      http-endpoint-library
+      PUBLIC
+        ${B64_LIBRARY}
+        ${ZLIB_LIBRARY}
+    )
+  else()
+    target_link_libraries(
+      http-endpoint-library
+      PUBLIC
+        b64
+        z
+    )
+  endif()
+
+  target_link_libraries(
+    main
+    PRIVATE
+      http-endpoint-library
+  )
+endif() # TRITON_ENABLE_HTTP || TRITON_ENABLE_METRICS ||
+        # TRITON_ENABLE_SAGEMAKER || TRITON_ENABLE_VERTEX_AI
+
+# tracing
+#
+if(${TRITON_ENABLE_TRACING})
+  message(STATUS "Using tracing ${TRITON_TRACE_INSTALL_PATH}")
+
+  add_library(
+    tracing-library EXCLUDE_FROM_ALL
+    tracer.cc tracer.h
+  )
+
+  target_compile_features(tracing-library PRIVATE cxx_std_${TRITON_MIN_CXX_STANDARD})
+
+  if (NOT WIN32)
+    target_include_directories(
+      tracing-library
+      PRIVATE ${OPENTELEMETRY_CPP_INCLUDE_DIRS}
+    )
+
+    target_link_libraries(
+      tracing-library
+      PRIVATE
+      ${OPENTELEMETRY_CPP_LIBRARIES})
+  endif()
+
+  target_link_libraries(
+    tracing-library
+    PUBLIC
+      triton-common-logging    # from repo-common
+      triton-common-json      # from repo-common
+      triton-core-serverapi    # from repo-core
+      triton-core-serverstub   # from repo-core
+  )
+
+  target_compile_definitions(
+    tracing-library
+    PRIVATE TRITON_ENABLE_TRACING=1
+  )
+
+  if(${TRITON_ENABLE_GPU})
+    target_compile_definitions(
+      tracing-library
+      PRIVATE TRITON_ENABLE_GPU=1
+      PRIVATE TRITON_MIN_COMPUTE_CAPABILITY=${TRITON_MIN_COMPUTE_CAPABILITY}
+    )
+
+    target_link_libraries(
+      tracing-library
+      PUBLIC
+        CUDA::cudart
+    )
+  endif() # TRITON_ENABLE_GPU
+
+  if(${TRITON_ENABLE_METRICS})
+    target_compile_definitions(
+      tracing-library
+      PRIVATE TRITON_ENABLE_METRICS=1
+    )
+  endif() # TRITON_ENABLE_METRICS
+
+  if(${TRITON_ENABLE_LOGGING})
+    target_compile_definitions(
+      tracing-library
+      PRIVATE TRITON_ENABLE_LOGGING=1
+    )
+  endif() # TRITON_ENABLE_LOGGING
+
+  if(${TRITON_ENABLE_STATS})
+    target_compile_definitions(
+      tracing-library
+      PRIVATE TRITON_ENABLE_STATS=1
+    )
+  endif() # TRITON_ENABLE_STATS
+
+  if(${TRITON_ENABLE_NVTX})
+    target_compile_definitions(
+      tracing-library
+      PRIVATE TRITON_ENABLE_NVTX=1
+    )
+  endif() # TRITON_ENABLE_NVTX
+
+  target_link_libraries(
+    main
+    PRIVATE
+      tracing-library
+  )
+endif() # TRITON_ENABLE_TRACING
+
+if (NOT WIN32)
+  #
+  # simple
+  #
+  add_executable(
+    simple
+    simple.cc
+  )
+
+  target_compile_features(simple PRIVATE cxx_std_${TRITON_MIN_CXX_STANDARD})
+  if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
+    message("Using MSVC as compiler, default target on Windows 10. "
+            "If the target system is not Windows 10, please update _WIN32_WINNT "
+            "to corresponding value.")
+    target_compile_options(
+      simple
+      PRIVATE
+        /W1 /D_WIN32_WINNT=0x0A00 /EHsc /Zc:preprocessor
+    )
+  else()
+    target_compile_options(
+      simple
+      PRIVATE
+        -Wall -Wextra -Wno-type-limits -Wno-unused-parameter -Wno-deprecated-declarations -Werror
+    )
+  endif()
+
+  set_target_properties(
+    simple
+    PROPERTIES
+      POSITION_INDEPENDENT_CODE ON
+      SKIP_BUILD_RPATH TRUE
+      BUILD_WITH_INSTALL_RPATH TRUE
+      INSTALL_RPATH_USE_LINK_PATH FALSE
+      INSTALL_RPATH ""
+  )
+
+  target_link_libraries(
+    simple
+    PRIVATE
+      triton-common-async-work-queue  # from repo-common
+      triton-common-error             # from repo-common
+      triton-core-serverapi           # from repo-core
+      triton-core-serverstub          # from repo-core
+    )
+
+  if(${TRITON_ENABLE_GPU})
+    target_compile_definitions(
+      simple
+      PRIVATE TRITON_ENABLE_GPU=1
+      PRIVATE TRITON_MIN_COMPUTE_CAPABILITY=${TRITON_MIN_COMPUTE_CAPABILITY}
+    )
+
+    target_link_libraries(
+      simple
+      PRIVATE
+        CUDA::cudart
+    )
+  endif() # TRITON_ENABLE_GPU
+
+  install(
+    TARGETS simple
+    RUNTIME DESTINATION bin
+  )
+
+  #
+  # multi_server example
+  #
+  add_executable(
+    multi_server
+    multi_server.cc
+  )
+
+  target_compile_features(multi_server PRIVATE cxx_std_${TRITON_MIN_CXX_STANDARD})
+  if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
+    message("Using MSVC as compiler, default target on Windows 10. "
+            "If the target system is not Windows 10, please update _WIN32_WINNT "
+            "to corresponding value.")
+    target_compile_options(
+      multi_server
+      PRIVATE
+        /W1 /D_WIN32_WINNT=0x0A00 /EHsc /Zc:preprocessor
+    )
+  else()
+    target_compile_options(
+      multi_server
+      PRIVATE
+        -Wall -Wextra -Wno-type-limits -Wno-unused-parameter -Wno-deprecated-declarations -Werror
+    )
+  endif()
+
+  set_target_properties(
+    multi_server
+    PROPERTIES
+      POSITION_INDEPENDENT_CODE ON
+      SKIP_BUILD_RPATH TRUE
+      BUILD_WITH_INSTALL_RPATH TRUE
+      INSTALL_RPATH_USE_LINK_PATH FALSE
+      INSTALL_RPATH ""
+  )
+
+  target_link_libraries(
+    multi_server
+    PRIVATE
+      triton-common-async-work-queue  # from repo-common
+      triton-common-error             # from repo-common
+      triton-core-serverapi           # from repo-core
+      triton-core-serverstub          # from repo-core
+    )
+
+  if(${TRITON_ENABLE_GPU})
+    target_compile_definitions(
+      multi_server
+      PRIVATE TRITON_ENABLE_GPU=1
+      PRIVATE TRITON_MIN_COMPUTE_CAPABILITY=${TRITON_MIN_COMPUTE_CAPABILITY}
+    )
+
+    target_link_libraries(
+      multi_server
+      PRIVATE
+        CUDA::cudart
+    )
+  endif() # TRITON_ENABLE_GPU
+
+  install(
+    TARGETS multi_server
+    RUNTIME DESTINATION bin
+  )
+
+  if(${TRITON_ENABLE_GPU})
+    #
+    # memory_alloc example
+    #
+    add_executable(
+      memory_alloc
+      memory_alloc.cc
+    )
+
+    target_compile_features(memory_alloc PRIVATE cxx_std_${TRITON_MIN_CXX_STANDARD})
+    if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
+      message("Using MSVC as compiler, default target on Windows 10. "
+              "If the target system is not Windows 10, please update _WIN32_WINNT "
+              "to corresponding value.")
+      target_compile_options(
+        memory_alloc
+        PRIVATE
+          /W1 /D_WIN32_WINNT=0x0A00 /EHsc /Zc:preprocessor
+      )
+    else()
+      target_compile_options(
+        memory_alloc
+        PRIVATE
+          -Wall -Wextra -Wno-type-limits -Wno-unused-parameter -Wno-deprecated-declarations -Werror
+      )
+    endif()
+
+    set_target_properties(
+      memory_alloc
+      PROPERTIES
+        POSITION_INDEPENDENT_CODE ON
+        SKIP_BUILD_RPATH TRUE
+        BUILD_WITH_INSTALL_RPATH TRUE
+        INSTALL_RPATH_USE_LINK_PATH FALSE
+        INSTALL_RPATH ""
+    )
+
+    target_compile_definitions(
+      memory_alloc
+      PRIVATE TRITON_ENABLE_GPU=1
+      PRIVATE TRITON_MIN_COMPUTE_CAPABILITY=${TRITON_MIN_COMPUTE_CAPABILITY}
+    )
+
+    target_link_libraries(
+      memory_alloc
+      PRIVATE
+        triton-common-async-work-queue  # from repo-common
+        triton-common-error             # from repo-common
+        triton-core-serverapi           # from repo-core
+        triton-core-serverstub          # from repo-core
+        CUDA::cudart
+      )
+
+    install(
+      TARGETS memory_alloc
+      RUNTIME DESTINATION bin
+    )
+  endif() # TRITON_ENABLE_GPU
+endif() # NOT WIN32
+
+# Currently unit tests do not build for windows...
+if ( NOT WIN32)
+  add_subdirectory(test test)
+endif() # NOT WIN32
diff --git a/src/classification.cc b/src/classification.cc
new file mode 100644
index 0000000000..2d8cd26b9e
--- /dev/null
+++ b/src/classification.cc
@@ -0,0 +1,130 @@
+// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "classification.h"
+
+#include <algorithm>
+#include <numeric>
+
+#include "common.h"
+
+namespace triton { namespace server {
+
+namespace {
+
+template <typename T>
+TRITONSERVER_Error*
+AddClassResults(
+    TRITONSERVER_InferenceResponse* response, const uint32_t output_idx,
+    const char* base, const size_t element_cnt, const uint32_t req_class_cnt,
+    std::vector<std::string>* class_strs)
+{
+  const T* probs = reinterpret_cast<const T*>(base);
+
+  std::vector<size_t> idx(element_cnt);
+  iota(idx.begin(), idx.end(), 0);
+  sort(idx.begin(), idx.end(), [&probs](size_t i1, size_t i2) {
+    return probs[i1] > probs[i2];
+  });
+
+  const size_t class_cnt = std::min(element_cnt, (size_t)req_class_cnt);
+  for (size_t k = 0; k < class_cnt; ++k) {
+    class_strs->push_back(
+        std::to_string(probs[idx[k]]) + ":" + std::to_string(idx[k]));
+
+    const char* label;
+    RETURN_IF_ERR(TRITONSERVER_InferenceResponseOutputClassificationLabel(
+        response, output_idx, idx[k], &label));
+    if (label != nullptr) {
+      class_strs->back() += ":";
+      class_strs->back().append(label);
+    }
+  }
+
+  return nullptr;  // success
+}
+
+}  // namespace
+
+
+TRITONSERVER_Error*
+TopkClassifications(
+    TRITONSERVER_InferenceResponse* response, const uint32_t output_idx,
+    const char* base, const size_t byte_size,
+    const TRITONSERVER_DataType datatype, const uint32_t req_class_count,
+    std::vector<std::string>* class_strs)
+{
+  const size_t element_cnt =
+      byte_size / TRITONSERVER_DataTypeByteSize(datatype);
+
+  switch (datatype) {
+    case TRITONSERVER_TYPE_UINT8:
+      return AddClassResults<uint8_t>(
+          response, output_idx, base, element_cnt, req_class_count, class_strs);
+    case TRITONSERVER_TYPE_UINT16:
+      return AddClassResults<uint16_t>(
+          response, output_idx, base, element_cnt, req_class_count, class_strs);
+    case TRITONSERVER_TYPE_UINT32:
+      return AddClassResults<uint32_t>(
+          response, output_idx, base, element_cnt, req_class_count, class_strs);
+    case TRITONSERVER_TYPE_UINT64:
+      return AddClassResults<uint64_t>(
+          response, output_idx, base, element_cnt, req_class_count, class_strs);
+
+    case TRITONSERVER_TYPE_INT8:
+      return AddClassResults<int8_t>(
+          response, output_idx, base, element_cnt, req_class_count, class_strs);
+    case TRITONSERVER_TYPE_INT16:
+      return AddClassResults<int16_t>(
+          response, output_idx, base, element_cnt, req_class_count, class_strs);
+    case TRITONSERVER_TYPE_INT32:
+      return AddClassResults<int32_t>(
+          response, output_idx, base, element_cnt, req_class_count, class_strs);
+    case TRITONSERVER_TYPE_INT64:
+      return AddClassResults<int64_t>(
+          response, output_idx, base, element_cnt, req_class_count, class_strs);
+
+    case TRITONSERVER_TYPE_FP32:
+      return AddClassResults<float>(
+          response, output_idx, base, element_cnt, req_class_count, class_strs);
+    case TRITONSERVER_TYPE_FP64:
+      return AddClassResults<double>(
+          response, output_idx, base, element_cnt, req_class_count, class_strs);
+
+    default:
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG,
+          std::string(
+              std::string("class result not available for output due to "
+                          "unsupported type '") +
+              std::string(TRITONSERVER_DataTypeString(datatype)) + "'")
+              .c_str());
+  }
+
+  return nullptr;  // success
+}
+
+}}  // namespace triton::server
diff --git a/src/classification.h b/src/classification.h
new file mode 100644
index 0000000000..9264baa2b0
--- /dev/null
+++ b/src/classification.h
@@ -0,0 +1,41 @@
+// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include "triton/core/tritonserver.h"
+
+namespace triton { namespace server {
+
+TRITONSERVER_Error* TopkClassifications(
+    TRITONSERVER_InferenceResponse* response, const uint32_t output_idx,
+    const char* base, const size_t byte_size,
+    const TRITONSERVER_DataType datatype, const uint32_t req_class_count,
+    std::vector<std::string>* class_strs);
+
+}}  // namespace triton::server
diff --git a/src/clients/c++/BUILD b/src/clients/c++/BUILD
deleted file mode 100644
index 2343d6ec6d..0000000000
--- a/src/clients/c++/BUILD
+++ /dev/null
@@ -1,107 +0,0 @@
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-package(
-    default_visibility = ["//visibility:public"],
-)
-
-cc_library(
-    name = "image_client_main",
-    srcs = ["image_client.cc"],
-    deps = [
-        ":request",
-        "//src/core:model_config_proto",
-    ],
-)
-
-cc_library(
-    name = "perf_client_main",
-    srcs = ["perf_client.cc"],
-    deps = [
-        ":request",
-        "//src/core:constants",
-        "//src/core:model_config_proto",
-    ],
-)
-
-cc_library(
-    name = "simple_client_main",
-    srcs = ["simple_client.cc"],
-    deps = [
-        ":request",
-        "//src/core:model_config_proto",
-    ],
-)
-
-cc_library(
-    name = "request",
-    srcs = ["request.cc"],
-    hdrs = ["request.h"],
-    deps = [
-        "//src/core:api_proto",
-        "//src/core:constants",
-        "//src/core:grpc_service_proto",
-        "//src/core:model_config_proto",
-        "//src/core:model_config",
-        "//src/core:request_status_proto",
-        "//src/core:server_status_proto",
-    ],
-)
-
-cc_binary(
-    name = "image_client",
-    deps = [
-        ":image_client_main",
-        ":request",
-    ],
-    linkopts = [
-        "-pthread",
-        "-lcurl", "-lz",
-        "-lopencv_core", "-lopencv_imgproc", "-lopencv_highgui"
-    ],
-)
-
-cc_binary(
-    name = "perf_client",
-    deps = [
-        ":perf_client_main",
-        ":request",
-    ],
-    linkopts = [
-        "-pthread", "-lcurl"
-    ],
-)
-
-cc_binary(
-    name = "simple_client",
-    deps = [
-        ":simple_client_main",
-        ":request",
-    ],
-    linkopts = [
-        "-pthread", "-lcurl"
-    ],
-)
diff --git a/src/clients/c++/image_client.cc b/src/clients/c++/image_client.cc
deleted file mode 100644
index ca67063cf7..0000000000
--- a/src/clients/c++/image_client.cc
+++ /dev/null
@@ -1,688 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "src/clients/c++/request.h"
-
-#include <dirent.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <unistd.h>
-#include <algorithm>
-#include <fstream>
-#include <iostream>
-#include <iterator>
-#include <opencv2/core/core.hpp>
-#include <opencv2/highgui/highgui.hpp>
-#include <opencv2/imgproc/imgproc.hpp>
-#include <string>
-#include "src/core/model_config.pb.h"
-
-namespace ni = nvidia::inferenceserver;
-namespace nic = nvidia::inferenceserver::client;
-
-namespace {
-
-enum ScaleType { NONE = 0, VGG = 1, INCEPTION = 2 };
-
-enum ProtocolType { HTTP = 0, GRPC = 1 };
-
-void
-Preprocess(
-  const cv::Mat& img, ni::ModelInput::Format format, int img_type1,
-  int img_type3, size_t img_channels, const cv::Size& img_size,
-  const ScaleType scale, std::vector<uint8_t>* input_data)
-{
-  // Image channels are in BGR order. Currently model configuration
-  // data doesn't provide any information as to the expected channel
-  // orderings (like RGB, BGR). We are going to assume that RGB is the
-  // most likely ordering and so change the channels to that ordering.
-
-  cv::Mat sample;
-  if ((img.channels() == 3) && (img_channels == 1)) {
-    cv::cvtColor(img, sample, CV_BGR2GRAY);
-  } else if ((img.channels() == 4) && (img_channels == 1)) {
-    cv::cvtColor(img, sample, CV_BGRA2GRAY);
-  } else if ((img.channels() == 3) && (img_channels == 3)) {
-    cv::cvtColor(img, sample, CV_BGR2RGB);
-  } else if ((img.channels() == 4) && (img_channels == 3)) {
-    cv::cvtColor(img, sample, CV_BGRA2RGB);
-  } else if ((img.channels() == 1) && (img_channels == 3)) {
-    cv::cvtColor(img, sample, CV_GRAY2RGB);
-  } else {
-    std::cerr << "unexpected number of channels in input image or model"
-              << std::endl;
-    exit(1);
-  }
-
-  cv::Mat sample_resized;
-  if (sample.size() != img_size) {
-    cv::resize(sample, sample_resized, img_size);
-  } else {
-    sample_resized = sample;
-  }
-
-  cv::Mat sample_type;
-  sample_resized.convertTo(
-    sample_type, (img_channels == 3) ? img_type3 : img_type1);
-
-  cv::Mat sample_final;
-  if (scale == ScaleType::INCEPTION) {
-    if (img_channels == 1) {
-      sample_final = sample_type.mul(cv::Scalar(1 / 128.0));
-      sample_final = sample_final - cv::Scalar(1.0);
-    } else {
-      sample_final =
-        sample_type.mul(cv::Scalar(1 / 128.0, 1 / 128.0, 1 / 128.0));
-      sample_final = sample_final - cv::Scalar(1.0, 1.0, 1.0);
-    }
-  } else if (scale == ScaleType::VGG) {
-    if (img_channels == 1) {
-      sample_final = sample_type - cv::Scalar(128);
-    } else {
-      sample_final = sample_type - cv::Scalar(104, 117, 123);
-    }
-  } else {
-    sample_final = sample_type;
-  }
-
-  // Allocate a buffer to hold all image elements.
-  size_t img_byte_size = sample_final.total() * sample_final.elemSize();
-  size_t pos = 0;
-  input_data->resize(img_byte_size);
-
-  // For NHWC format Mat is already in the correct order but need to
-  // handle both cases of data being contigious or not.
-  if (format == ni::ModelInput::FORMAT_NHWC) {
-    if (sample_final.isContinuous()) {
-      memcpy(&((*input_data)[0]), sample_final.datastart, img_byte_size);
-      pos = img_byte_size;
-    } else {
-      size_t row_byte_size = sample_final.cols * sample_final.elemSize();
-      for (int r = 0; r < sample_final.rows; ++r) {
-        memcpy(
-          &((*input_data)[pos]), sample_final.ptr<uint8_t>(r), row_byte_size);
-        pos += row_byte_size;
-      }
-    }
-  } else {
-    // (format == ni::ModelInput::FORMAT_NCHW)
-    //
-    // For CHW formats must split out each channel from the matrix and
-    // order them as BBBB...GGGG...RRRR. To do this split the channels
-    // of the image directly into 'input_data'. The BGR channels are
-    // backed by the 'input_data' vector so that ends up with CHW
-    // order of the data.
-    std::vector<cv::Mat> input_bgr_channels;
-    for (size_t i = 0; i < img_channels; ++i) {
-      input_bgr_channels.emplace_back(
-        img_size.height, img_size.width, img_type1, &((*input_data)[pos]));
-      pos += input_bgr_channels.back().total() *
-             input_bgr_channels.back().elemSize();
-    }
-
-    cv::split(sample_final, input_bgr_channels);
-  }
-
-  if (pos != img_byte_size) {
-    std::cerr << "unexpected total size of channels " << pos << ", expecting "
-              << img_byte_size << std::endl;
-    exit(1);
-  }
-}
-
-void
-Postprocess(
-  const std::vector<std::unique_ptr<nic::InferContext::Result>>& results,
-  const std::vector<std::string>& filenames, const size_t batch_size)
-{
-  if (results.size() != 1) {
-    std::cerr << "expected 1 result, got " << results.size() << std::endl;
-    exit(1);
-  }
-
-  const std::unique_ptr<nic::InferContext::Result>& result = results[0];
-
-  if (filenames.size() != batch_size) {
-    std::cerr << "expected " << batch_size << " filenames, got "
-              << filenames.size() << std::endl;
-    exit(1);
-  }
-
-  for (size_t b = 0; b < batch_size; ++b) {
-    size_t cnt = 0;
-    nic::Error err = result->GetClassCount(b, &cnt);
-    if (!err.IsOk()) {
-      std::cerr << "failed reading class count for batch " << b << ": " << err
-                << std::endl;
-      exit(1);
-    }
-
-    std::cout << "Image '" << filenames[b] << "':" << std::endl;
-
-    for (size_t c = 0; c < cnt; ++c) {
-      nic::InferContext::Result::ClassResult cls;
-      nic::Error err = result->GetClassAtCursor(b, &cls);
-      if (!err.IsOk()) {
-        std::cerr << "failed reading class for batch " << b << ": " << err
-                  << std::endl;
-        exit(1);
-      }
-
-      std::cout << "    " << cls.idx << " (" << cls.label << ") = " << cls.value
-                << std::endl;
-    }
-  }
-}
-
-void
-Usage(char** argv, const std::string& msg = std::string())
-{
-  if (!msg.empty()) {
-    std::cerr << "error: " << msg << std::endl;
-  }
-
-  std::cerr << "Usage: " << argv[0]
-            << " [options] <image filename / image folder>" << std::endl;
-  std::cerr << "    Note that image folder should only contain image files."
-            << std::endl;
-  std::cerr << "\t-v" << std::endl;
-  std::cerr << "\t-a" << std::endl;
-  std::cerr << "\t-b <batch size>" << std::endl;
-  std::cerr << "\t-c <topk>" << std::endl;
-  std::cerr << "\t-s <NONE|INCEPTION|VGG>" << std::endl;
-  std::cerr << "\t-p <proprocessed output filename>" << std::endl;
-  std::cerr << "\t-m <model name>" << std::endl;
-  std::cerr << "\t-x <model version>" << std::endl;
-  std::cerr << "\t-u <URL for inference service>" << std::endl;
-  std::cerr << "\t-i <Protocol used to communicate with inference service>"
-            << std::endl;
-  std::cerr << std::endl;
-  std::cerr << "If -a is specified then asynchronous client API will be used. "
-            << "Default is to use the synchronous API." << std::endl;
-  std::cerr
-    << "For -b, a single image will be replicated and sent in a batch"
-    << std::endl
-    << "        of the specified size. A directory of images will be grouped"
-    << std::endl
-    << "        into batches. Default is 1." << std::endl;
-  std::cerr << "For -c, the <topk> classes will be returned, default is 1."
-            << std::endl;
-  std::cerr << "For -s, specify the type of pre-processing scaling that"
-            << std::endl
-            << "        should be performed on the image, default is NONE."
-            << std::endl
-            << "    INCEPTION: scale each pixel RGB value to [-1.0, 1.0)."
-            << std::endl
-            << "    VGG: subtract mean BGR value (104, 117, 123) from"
-            << std::endl
-            << "         each pixel." << std::endl;
-  std::cerr
-    << "If -x is not specified the most recent version (that is, the highest "
-    << "numbered version) of the model will be used." << std::endl;
-  std::cerr << "For -p, it generates file only if image file is specified."
-            << std::endl;
-  std::cerr << "For -u, the default server URL is localhost:8000." << std::endl;
-  std::cerr << "For -i, available protocols are gRPC and HTTP. Default is HTTP."
-            << std::endl;
-  std::cerr << std::endl;
-
-  exit(1);
-}
-
-ScaleType
-ParseScale(const std::string& str)
-{
-  if (str == "NONE") {
-    return ScaleType::NONE;
-  } else if (str == "INCEPTION") {
-    return ScaleType::INCEPTION;
-  } else if (str == "VGG") {
-    return ScaleType::VGG;
-  }
-
-  std::cerr << "unexpected scale type \"" << str
-            << "\", expecting NONE, INCEPTION or VGG" << std::endl;
-  exit(1);
-
-  return ScaleType::NONE;
-}
-
-ProtocolType
-ParseProtocol(const std::string& str)
-{
-  std::string protocol(str);
-  std::transform(protocol.begin(), protocol.end(), protocol.begin(), ::tolower);
-  if (protocol == "http") {
-    return ProtocolType::HTTP;
-  } else if (protocol == "grpc") {
-    return ProtocolType::GRPC;
-  }
-
-  std::cerr << "unexpected protocol type \"" << str
-            << "\", expecting HTTP or gRPC" << std::endl;
-  exit(1);
-
-  return ProtocolType::HTTP;
-}
-
-bool
-ParseType(const ni::DataType& dtype, int* type1, int* type3)
-{
-  if (dtype == ni::DataType::TYPE_UINT8) {
-    *type1 = CV_8UC1;
-    *type3 = CV_8UC3;
-  } else if (dtype == ni::DataType::TYPE_INT8) {
-    *type1 = CV_8SC1;
-    *type3 = CV_8SC3;
-  } else if (dtype == ni::DataType::TYPE_UINT16) {
-    *type1 = CV_16UC1;
-    *type3 = CV_16UC3;
-  } else if (dtype == ni::DataType::TYPE_INT16) {
-    *type1 = CV_16SC1;
-    *type3 = CV_16SC3;
-  } else if (dtype == ni::DataType::TYPE_INT32) {
-    *type1 = CV_32SC1;
-    *type3 = CV_32SC3;
-  } else if (dtype == ni::DataType::TYPE_FP32) {
-    *type1 = CV_32FC1;
-    *type3 = CV_32FC3;
-  } else if (dtype == ni::DataType::TYPE_FP64) {
-    *type1 = CV_64FC1;
-    *type3 = CV_64FC3;
-  } else {
-    return false;
-  }
-
-  return true;
-}
-
-void
-ParseModel(
-  const std::unique_ptr<nic::InferContext>& ctx, const size_t batch_size,
-  size_t* c, size_t* h, size_t* w, ni::ModelInput::Format* format, int* type1,
-  int* type3, bool verbose = false)
-{
-  if (ctx->Inputs().size() != 1) {
-    std::cerr << "expecting 1 input, model \"" << ctx->ModelName() << "\" has "
-              << ctx->Inputs().size() << std::endl;
-    exit(1);
-  }
-
-  if (ctx->Outputs().size() != 1) {
-    std::cerr << "expecting 1 output, model \"" << ctx->ModelName() << "\" has "
-              << ctx->Outputs().size() << std::endl;
-    exit(1);
-  }
-
-  const auto& input = ctx->Inputs()[0];
-  const auto& output = ctx->Outputs()[0];
-
-  if (output->DType() != ni::DataType::TYPE_FP32) {
-    std::cerr << "expecting model output datatype to be TYPE_FP32, model \""
-              << ctx->ModelName() << "\" output type is "
-              << ni::DataType_Name(output->DType()) << std::endl;
-    exit(1);
-  }
-
-  // Output is expected to be a vector. But allow any number of
-  // dimensions as long as all but 1 is size 1 (e.g. { 10 }, { 1, 10
-  // }, { 10, 1, 1 } are all ok).
-  size_t non_one_cnt = 0;
-  for (const auto dim : output->Dims()) {
-    if (dim > 1) {
-      non_one_cnt++;
-      if (non_one_cnt > 1) {
-        std::cerr << "expecting model output to be a vector" << std::endl;
-        exit(1);
-      }
-    }
-  }
-
-  *format = input->Format();
-
-  int max_batch_size = ctx->MaxBatchSize();
-
-  // Model specifying maximum batch size of 0 indicates that batching
-  // is not supported and so the input tensors do not expect a "N"
-  // dimension (and 'batch_size' should be 1 so that only a single
-  // image instance is inferred at a time).
-  if (max_batch_size == 0) {
-    if (batch_size != 1) {
-      std::cerr << "batching not supported for model \"" << ctx->ModelName()
-                << "\"" << std::endl;
-      exit(1);
-    }
-  } else {
-    // max_batch_size > 0
-    if (batch_size > (size_t)max_batch_size) {
-      std::cerr << "expecting batch size <= " << max_batch_size
-                << " for model \"" << ctx->ModelName() << "\"" << std::endl;
-      exit(1);
-    }
-  }
-
-  if (input->Dims().size() != 3) {
-    std::cerr << "expecting model input to have 3 dimensions, model \""
-              << ctx->ModelName() << "\" input has " << input->Dims().size()
-              << std::endl;
-    exit(1);
-  }
-
-  // Input must be NHWC or NCHW...
-  if (
-    (*format != ni::ModelInput::FORMAT_NCHW) &&
-    (*format != ni::ModelInput::FORMAT_NHWC)) {
-    std::cerr << "unexpected input format "
-              << ni::ModelInput_Format_Name(*format) << ", expecting "
-              << ni::ModelInput_Format_Name(ni::ModelInput::FORMAT_NHWC)
-              << " or "
-              << ni::ModelInput_Format_Name(ni::ModelInput::FORMAT_NCHW)
-              << std::endl;
-    exit(1);
-  }
-
-  if (*format == ni::ModelInput::FORMAT_NHWC) {
-    *h = input->Dims()[0];
-    *w = input->Dims()[1];
-    *c = input->Dims()[2];
-  } else if (*format == ni::ModelInput::FORMAT_NCHW) {
-    *c = input->Dims()[0];
-    *h = input->Dims()[1];
-    *w = input->Dims()[2];
-  }
-
-  if (!ParseType(input->DType(), type1, type3)) {
-    std::cerr << "unexpected input datatype \""
-              << ni::DataType_Name(input->DType()) << "\" for model \""
-              << ctx->ModelName() << std::endl;
-    exit(1);
-  }
-}
-
-void
-FileToInputData(
-  const std::string& filename, size_t c, size_t h, size_t w,
-  ni::ModelInput::Format format, int type1, int type3, ScaleType scale,
-  std::vector<uint8_t>* input_data)
-{
-  // Load the specified image.
-  std::ifstream file(filename);
-  std::vector<char> data;
-  file >> std::noskipws;
-  std::copy(
-    std::istream_iterator<char>(file), std::istream_iterator<char>(),
-    std::back_inserter(data));
-  if (data.empty()) {
-    std::cerr << "error: unable to read image file " << filename << std::endl;
-    exit(1);
-  }
-
-  cv::Mat img = imdecode(cv::Mat(data), 1);
-  if (img.empty()) {
-    std::cerr << "error: unable to decode image " << filename << std::endl;
-    exit(1);
-  }
-
-  // Pre-process the image to match input size expected by the model.
-  Preprocess(img, format, type1, type3, c, cv::Size(w, h), scale, input_data);
-}
-
-}  // namespace
-
-int
-main(int argc, char** argv)
-{
-  bool verbose = false;
-  bool async = false;
-  size_t batch_size = 1;
-  size_t topk = 1;
-  ScaleType scale = ScaleType::NONE;
-  std::string preprocess_output_filename;
-  std::string model_name;
-  int model_version = -1;
-  std::string url("localhost:8000");
-  ProtocolType protocol = ProtocolType::HTTP;
-
-  // Parse commandline...
-  int opt;
-  while ((opt = getopt(argc, argv, "vau:m:x:b:c:s:p:i:")) != -1) {
-    switch (opt) {
-      case 'v':
-        verbose = true;
-        break;
-      case 'a':
-        async = true;
-        break;
-      case 'u':
-        url = optarg;
-        break;
-      case 'm':
-        model_name = optarg;
-        break;
-      case 'x':
-        model_version = atoi(optarg);
-        break;
-      case 'b':
-        batch_size = atoi(optarg);
-        break;
-      case 'c':
-        topk = atoi(optarg);
-        break;
-      case 's':
-        scale = ParseScale(optarg);
-        break;
-      case 'p':
-        preprocess_output_filename = optarg;
-        break;
-      case 'i':
-        protocol = ParseProtocol(optarg);
-        break;
-      case '?':
-        Usage(argv);
-        break;
-    }
-  }
-
-  if (model_name.empty()) {
-    Usage(argv, "-m flag must be specified");
-  }
-  if (batch_size <= 0) {
-    Usage(argv, "batch size must be > 0");
-  }
-  if (topk <= 0) {
-    Usage(argv, "topk must be > 0");
-  }
-  if (optind >= argc) {
-    Usage(argv, "image file or image folder must be specified");
-  }
-
-  // Create the context for inference of the specified model. From it
-  // extract and validate that the model meets the requirements for
-  // image classification.
-  std::unique_ptr<nic::InferContext> ctx;
-  nic::Error err;
-  if (protocol == ProtocolType::HTTP) {
-    err = nic::InferHttpContext::Create(
-      &ctx, url, model_name, model_version, verbose);
-  } else {
-    err = nic::InferGrpcContext::Create(
-      &ctx, url, model_name, model_version, verbose);
-  }
-  if (!err.IsOk()) {
-    std::cerr << "error: unable to create inference context: " << err
-              << std::endl;
-    exit(1);
-  }
-
-  size_t c, h, w;
-  ni::ModelInput::Format format;
-  int type1, type3;
-  ParseModel(ctx, batch_size, &c, &h, &w, &format, &type1, &type3, verbose);
-
-  //
-
-  // Collect the names of the image(s).
-  std::vector<std::string> image_filenames;
-
-  struct stat name_stat;
-  if (stat(argv[optind], &name_stat) != 0) {
-    std::cerr << "Failed to find '" << std::string(argv[optind])
-              << "': " << strerror(errno) << std::endl;
-    exit(1);
-  }
-
-  if (name_stat.st_mode & S_IFDIR) {
-    const std::string dirname = argv[optind];
-    DIR* dir_ptr = opendir(dirname.c_str());
-    struct dirent* d_ptr;
-    while ((d_ptr = readdir(dir_ptr)) != NULL) {
-      const std::string filename = d_ptr->d_name;
-      if ((filename != ".") && (filename != "..")) {
-        image_filenames.push_back(dirname + "/" + filename);
-      }
-    }
-    closedir(dir_ptr);
-  } else {
-    image_filenames.push_back(argv[optind]);
-  }
-
-  // Sort the filenames so that we always visit them in the same order
-  // (readdir does not guarantee any particular order).
-  std::sort(image_filenames.begin(), image_filenames.end());
-
-  // Preprocess the images into input data according to model
-  // requirements
-  std::vector<std::vector<uint8_t>> image_data;
-  for (const auto& fn : image_filenames) {
-    image_data.emplace_back();
-    FileToInputData(
-      fn, c, h, w, format, type1, type3, scale, &(image_data.back()));
-
-    if ((image_data.size() == 1) && !preprocess_output_filename.empty()) {
-      std::ofstream output_file(preprocess_output_filename);
-      std::ostream_iterator<uint8_t> output_iterator(output_file);
-      std::copy(image_data[0].begin(), image_data[0].end(), output_iterator);
-    }
-  }
-
-  // Configure context for 'batch_size' and 'topk'
-  std::unique_ptr<nic::InferContext::Options> options;
-  err = nic::InferContext::Options::Create(&options);
-  if (!err.IsOk()) {
-    std::cerr << "failed initializing infer options: " << err << std::endl;
-    exit(1);
-  }
-
-  options->SetBatchSize(batch_size);
-  options->AddClassResult(ctx->Outputs()[0], topk);
-  err = ctx->SetRunOptions(*options);
-  if (!err.IsOk()) {
-    std::cerr << "failed initializing batch size: " << err << std::endl;
-    exit(1);
-  }
-
-  // Send requests of 'batch_size' images. If the number of images
-  // isn't an exact multiple of 'batch_size' then just start over with
-  // the first images until the batch is filled.
-  //
-  // Number of requests sent = ceil(number of images / batch_size)
-  std::vector<std::vector<std::unique_ptr<nic::InferContext::Result>>> results;
-  std::vector<std::vector<std::string>> result_filenames;
-  std::vector<std::shared_ptr<nic::InferContext::Request>> requests;
-  size_t image_idx = 0;
-  bool last_request = false;
-  while (!last_request) {
-    // Already verified that there is 1 input...
-    const auto& input = ctx->Inputs()[0];
-
-    // Reset the input for new request.
-    err = input->Reset();
-    if (!err.IsOk()) {
-      std::cerr << "failed resetting input: " << err << std::endl;
-      exit(1);
-    }
-
-    // Set input to be the next 'batch_size' images (preprocessed).
-    std::vector<std::string> input_filenames;
-    for (size_t idx = 0; idx < batch_size; ++idx) {
-      input_filenames.push_back(image_filenames[image_idx]);
-      err = input->SetRaw(image_data[image_idx]);
-      if (!err.IsOk()) {
-        std::cerr << "failed setting input: " << err << std::endl;
-        exit(1);
-      }
-
-      image_idx = (image_idx + 1) % image_data.size();
-      if (image_idx == 0) {
-        last_request = true;
-      }
-    }
-
-    result_filenames.emplace_back(std::move(input_filenames));
-
-    // Send request.
-    if (!async) {
-      results.emplace_back();
-      err = ctx->Run(&(results.back()));
-      if (!err.IsOk()) {
-        std::cerr << "failed sending synchronous infer request: " << err
-                  << std::endl;
-        exit(1);
-      }
-    } else {
-      std::shared_ptr<nic::InferContext::Request> req;
-      err = ctx->AsyncRun(&req);
-      if (!err.IsOk()) {
-        std::cerr << "failed sending asynchronous infer request: " << err
-                  << std::endl;
-        exit(1);
-      }
-
-      requests.emplace_back(std::move(req));
-    }
-  }
-
-  // For async, retrieve results according to the send order
-  if (async) {
-    for (auto& request : requests) {
-      results.emplace_back();
-      err = ctx->GetAsyncRunResults(&(results.back()), request, true);
-      if (!err.IsOk()) {
-        std::cerr << "failed receiving infer response: " << err << std::endl;
-        exit(1);
-      }
-    }
-  }
-
-  // Post-process the results to make prediction(s)
-  for (size_t idx = 0; idx < results.size(); idx++) {
-    std::cout << "Request " << idx << ", batch size " << batch_size
-              << std::endl;
-    Postprocess(results[idx], result_filenames[idx], batch_size);
-  }
-
-  return 0;
-}
diff --git a/src/clients/c++/perf_client.cc b/src/clients/c++/perf_client.cc
deleted file mode 100644
index 2c9067c82b..0000000000
--- a/src/clients/c++/perf_client.cc
+++ /dev/null
@@ -1,1377 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "src/clients/c++/request.h"
-
-#include <math.h>
-#include <time.h>
-#include <unistd.h>
-#include <algorithm>
-#include <chrono>
-#include <condition_variable>
-#include <csignal>
-#include <fstream>
-#include <iomanip>
-#include <iostream>
-#include <mutex>
-#include <string>
-#include <thread>
-#include "src/core/constants.h"
-
-namespace ni = nvidia::inferenceserver;
-namespace nic = nvidia::inferenceserver::client;
-
-//==============================================================================
-// Perf Client
-//
-// Perf client provides various metrics to measure the performance of
-// the inference server. It can either be used to measure the throughput,
-// latency and time distribution under specific setting (i.e. fixed batch size
-// and fixed concurrent requests), or be used to generate throughput-latency
-// data point under dynamic setting (i.e. collecting throughput-latency data
-// under different load level).
-//
-// The following data is collected and used as part of the metrics:
-// - Throughput (infer/sec):
-//     The number of inference processed per second as seen by the client.
-//     The number of inference is measured by the multiplication of the number
-//     of requests and their batch size. And the total time is the time elapsed
-//     from when the client starts sending requests to when the client received
-//     all responses.
-// - Latency (usec):
-//     The average elapsed time between when a request is sent and
-//     when the response for the request is received.
-//
-// There are two settings (see -d option) for the data collection:
-// - Fixed concurrent request mode:
-//     In this setting, the client will maintain a fixed number of concurrent
-//     requests sent to the server (see -t option). See ConcurrencyManager for
-//     more detail. The number of requests will be the total number of requests
-//     sent within the time interval for measurement (see -p option) and
-//     the latency will be the average latency across all requests.
-//
-//     Besides throughput and latency, which is measured in client side,
-//     the following data measured by the server will also be reported
-//     in this setting:
-//     - Concurrent request: the number of concurrent requests as specified
-//         in -t option
-//     - Batch size: the batch size of each request as specified in -b option
-//     - Inference count: batch size * number of inference requests
-//     - Cumulative time: the total time between request received and
-//         response sent on the requests sent by perf client.
-//     - Average Cumulative time: cumulative time / number of inference requests
-//     - Compute time: the total time it takes to run inferencing including time
-//         copying input tensors to GPU memory, time executing the model,
-//         and time copying output tensors from GPU memory for the requests
-//         sent by perf client.
-//     - Average compute time: compute time / number of inference requests
-//     - Queue time: the total time it takes to wait for an available model
-//         instance for the requests sent by perf client.
-//     - Average queue time: queue time / number of inference requests
-//
-// - Dynamic concurrent request mode:
-//     In this setting, the client will perform the following procedure:
-//       1. Follows the procedure in fixed concurrent request mode using
-//          k concurrent requests (k starts at 1).
-//       2. Gathers data reported from step 1.
-//       3. Increases k by 1 and repeats step 1 and 2 until latency from current
-//          iteration exceeds latency threshold (see -l option)
-//     At each iteration, the data mentioned in fixed concurrent request mode
-//     will be reported. Besides that, after the procedure above, a collection
-//     of "throughput, latency, concurrent request count" tuples will be
-//     reported in increasing load level order.
-//
-// Options:
-// -b: batch size for each request sent.
-// -t: number of concurrent requests sent. If -d is set, -t indicate the number
-//     of concurrent requests to start with ("starting concurrency" level).
-// -d: enable dynamic concurrent request mode.
-// -l: latency threshold in msec, will have no effect if -d is not set.
-// -p: time interval for each measurement window in msec.
-//
-// For detail of the options not listed, please refer to the usage.
-//
-
-namespace {
-
-volatile bool early_exit = false;
-
-void
-SignalHandler(int signum)
-{
-  std::cout << "Interrupt signal (" << signum << ") received." << std::endl
-            << "Waiting for in-flight inferences to complete." << std::endl;
-
-  early_exit = true;
-}
-
-typedef struct PerformanceStatusStruct {
-  uint32_t concurrency;
-  size_t batch_size;
-  // Request count and elapsed time measured by server
-  uint64_t server_request_count;
-  uint64_t server_cumm_time_ns;
-  uint64_t server_queue_time_ns;
-  uint64_t server_compute_time_ns;
-
-  // Request count and elapsed time measured by client
-  uint64_t client_request_count;
-  uint64_t client_duration_ns;
-  uint64_t client_min_latency_ns;
-  uint64_t client_max_latency_ns;
-  uint64_t client_avg_latency_ns;
-  // Using usec to avoid square of large number (large in nsec)
-  uint64_t std_us;
-  uint64_t client_avg_request_time_ns;
-  uint64_t client_avg_send_time_ns;
-  uint64_t client_avg_receive_time_ns;
-  // Per infer stat
-  int client_infer_per_sec;
-} PerfStatus;
-
-
-enum ProtocolType { HTTP = 0, GRPC = 1 };
-
-//==============================================================================
-// Concurrency Manager
-//
-// An instance of concurrency manager will be created at the beginning of the
-// perf client and it will be used to simulate different load level in respect
-// to number of concurrent infer requests and to report the performance status.
-// After the creation, perf client obtains performance status under the setting
-// specified in command line options by calling Step() function.
-//
-// (Tentative usage)
-// std::unique_ptr<ConcurrencyManager> manager;
-// ConcurrencyManager::Create(&manager, ...);
-// if (fixed_mode) {
-//   PerfStatus status_summary;
-//   manager->Step(status_summary, concurrent_request_count);
-//   Report(status_summary, ...);
-// } else {
-//   PerfStatus status_summary;
-//   for (count = 1;;count++) {
-//     manager->Step(status_summary, count);
-//     Report(status_summary, ...);
-//     if (status_summary.avg_latency_us >= latency_threshold)
-//       break;
-//   }
-// }
-//
-// Detail:
-// Concurrency Manager will maintain the number of concurrent requests by using
-// corresponding number of worker threads that keep sending randomly generated
-// requests to the server. The worker threads will record the start time and end
-// time of each request into a shared vector.
-//
-// The manager can adjust the number of concurrent requests by creating
-// new threads or by pausing existing threads (by pause_index_).
-// After the adjustment, the manager will actively measure the throughput until
-// it is stable. Once stable, the manager update the 'status_summary' based on
-// the most recent measurement.
-//
-// The measurement procedure:
-// 1. Main thread gets start status from the server and records the start time.
-// 2. After given time interval, main thread gets end status from the server and
-//    records the end time.
-// 3. From the shared vector, Main thread uses data that are generated between
-//    recorded start time and end time to measure client side status and
-//    update status_summary.
-
-class ConcurrencyManager {
- public:
-  ~ConcurrencyManager()
-  {
-    early_exit = true;
-    // wake up all threads
-    {
-      // Acquire lock first to make sure no worker thread is trying to pause
-      // (avoid dead lock)
-      std::lock_guard<std::mutex> lk(wake_mutex_);
-      *pause_index_ = threads_.size();
-    }
-    wake_signal_.notify_all();
-
-    size_t cnt = 0;
-    for (auto& thread : threads_) {
-      thread.join();
-      if (!threads_status_[cnt]->IsOk()) {
-        std::cerr << "Thread [" << cnt
-                  << "] had error: " << *(threads_status_[cnt]) << std::endl;
-      }
-      cnt++;
-    }
-  }
-
-  static nic::Error Create(
-    std::unique_ptr<ConcurrencyManager>* manager, const bool verbose,
-    const bool profile, const int32_t batch_size, const double stable_offset,
-    const uint64_t measurement_window_ms, const size_t max_measurement_count,
-    const bool async, const std::string& model_name, const int model_version,
-    const std::string& url, const ProtocolType protocol)
-  {
-    manager->reset(new ConcurrencyManager(
-      verbose, profile, batch_size, stable_offset, measurement_window_ms,
-      max_measurement_count, async, model_name, model_version, url, protocol));
-    (*manager)->pause_index_.reset(new size_t(0));
-    (*manager)->request_timestamps_.reset(new TimestampVector());
-    return nic::Error(ni::RequestStatusCode::SUCCESS);
-  }
-
-  // Step will adjust the number of concurrent requests to be the same as
-  // 'concurrent_request_count' (by creating threads or by pausing threads)
-  // and it will actively measure throughput in every 'measurement_window' msec
-  // until the throughput is stable. Once the throughput is stable, it summarize
-  // the most recent measurement into 'status_summary'
-  // NOTE: the requests are being sent regardless of the measurement, so the
-  // data returned by the server (see struct PerforamnceStatusStruct) will
-  // include more requests than what the client measures (we can't get the exact
-  // server status right before the first request and right after the last
-  // request).
-  nic::Error Step(
-    PerfStatus& status_summary, const size_t concurrent_request_count)
-  {
-    status_summary.concurrency = concurrent_request_count;
-
-    // Adjust concurrency level
-    {
-      // Acquire lock first to make sure no worker thread is trying to pause
-      // (avoid dead lock)
-      std::lock_guard<std::mutex> lk(wake_mutex_);
-      *pause_index_ = concurrent_request_count;
-    }
-    wake_signal_.notify_all();
-
-    // Create new threads if we can not provide concurrency needed
-    if (!async_) {
-      while (concurrent_request_count > threads_.size()) {
-        // Launch new thread for inferencing
-        threads_status_.emplace_back(
-          new nic::Error(ni::RequestStatusCode::SUCCESS));
-        threads_context_stat_.emplace_back(new nic::InferContext::Stat());
-        size_t new_thread_index = threads_.size();
-        threads_.emplace_back(
-          &ConcurrencyManager::Infer, this, threads_status_.back(),
-          threads_context_stat_.back(), request_timestamps_, pause_index_,
-          new_thread_index);
-      }
-    } else {
-      // TODO: check how much extra latency async infer introduces.
-      // One worker thread still need to prepare the requests
-      // in sequence, intuitively, it seems like the concurrency level
-      // may not be as stable as using multiple worker threads. Maybe having
-      // multiple worker threads and each handles some number of requests?
-
-      // One worker thread is sufficient for async mode
-      if (threads_.size() == 0) {
-        // Launch new thread for inferencing
-        threads_status_.emplace_back(
-          new nic::Error(ni::RequestStatusCode::SUCCESS));
-        threads_context_stat_.emplace_back(new nic::InferContext::Stat());
-        threads_.emplace_back(
-          &ConcurrencyManager::AsyncInfer, this, threads_status_.back(),
-          threads_context_stat_.back(), request_timestamps_, pause_index_);
-      }
-    }
-
-
-    std::cout << "Request concurrency: " << concurrent_request_count
-              << std::endl;
-
-    // Start measurement
-    nic::Error err(ni::RequestStatusCode::SUCCESS);
-
-    size_t recent_k = 3;
-    std::vector<int> infer_per_sec;
-    std::vector<uint64_t> latencies;
-    // Stable will only be changed if max_measurement_count >= recent_k
-    bool stable = true;
-    double avg_ips = 0;
-    uint64_t avg_latency = 0;
-    do {
-      // Check thread status to make sure that the actual concurrency level is
-      // consistent to the one being reported
-      // If some thread return early, main thread will return and
-      // the worker thread's error message will be reported
-      // when ConcurrencyManager's destructor get called.
-      for (auto& thread_status : threads_status_) {
-        if (!thread_status->IsOk()) {
-          return nic::Error(
-            ni::RequestStatusCode::INTERNAL,
-            "Failed to maintain concurrency level requested."
-            " Worker thread(s) failed to generate concurrent requests.");
-        }
-      }
-
-      err = Measure(status_summary);
-      if (!err.IsOk()) {
-        return err;
-      }
-      infer_per_sec.push_back(status_summary.client_infer_per_sec);
-      latencies.push_back(status_summary.client_avg_latency_ns);
-      avg_ips += (double)infer_per_sec.back() / recent_k;
-      avg_latency += latencies.back() / recent_k;
-
-      if (verbose_) {
-        std::cout << "  Pass [" << infer_per_sec.size()
-                  << "] throughput: " << infer_per_sec.back() << " infer/sec. "
-                  << "Avg latency: "
-                  << (status_summary.client_avg_latency_ns / 1000)
-                  << " usec (std " << status_summary.std_us << " usec)"
-                  << std::endl;
-      }
-
-      if (infer_per_sec.size() >= recent_k) {
-        size_t idx = infer_per_sec.size() - recent_k;
-        if (infer_per_sec.size() > recent_k) {
-          avg_ips -= (double)infer_per_sec[idx - 1] / recent_k;
-          avg_latency -= latencies[idx - 1] / recent_k;
-        }
-        stable = true;
-        for (; idx < infer_per_sec.size(); idx++) {
-          // We call it stable only if recent_k measurement are within
-          // +/-(stable_offset_)% of the average infer per second and latency
-          if (
-            (infer_per_sec[idx] < avg_ips * (1 - stable_offset_)) ||
-            (infer_per_sec[idx] > avg_ips * (1 + stable_offset_))) {
-            stable = false;
-            break;
-          }
-          if (
-            (latencies[idx] < avg_latency * (1 - stable_offset_)) ||
-            (latencies[idx] > avg_latency * (1 + stable_offset_))) {
-            stable = false;
-            break;
-          }
-        }
-        if (stable) {
-          break;
-        }
-      }
-    } while ((!early_exit) && (infer_per_sec.size() < max_measurement_count_));
-    if (early_exit) {
-      return nic::Error(
-        ni::RequestStatusCode::INTERNAL, "Received exit signal.");
-    } else if (!stable) {
-      std::cerr << "Failed to obtain stable measurement within "
-                << max_measurement_count_
-                << " measurement windows for concurrency "
-                << concurrent_request_count << ". Please try to "
-                << "increase the time window." << std::endl;
-    }
-
-    return err;
-  }
-
- private:
-  using TimestampVector =
-    std::vector<std::pair<struct timespec, struct timespec>>;
-
-  ConcurrencyManager(
-    const bool verbose, const bool profile, const int32_t batch_size,
-    const double stable_offset, const int32_t measurement_window_ms,
-    const size_t max_measurement_count, const bool async,
-    const std::string& model_name, const int model_version,
-    const std::string& url, const ProtocolType protocol)
-      : verbose_(verbose), profile_(profile), batch_size_(batch_size),
-        stable_offset_(stable_offset),
-        measurement_window_ms_(measurement_window_ms),
-        max_measurement_count_(max_measurement_count), async_(async),
-        model_name_(model_name), model_version_(model_version), url_(url),
-        protocol_(protocol)
-  {
-  }
-
-  nic::Error StartProfile()
-  {
-    std::unique_ptr<nic::ProfileContext> ctx;
-    nic::Error err;
-    if (protocol_ == ProtocolType::HTTP) {
-      err = nic::ProfileHttpContext::Create(&ctx, url_, false);
-    } else {
-      err = nic::ProfileGrpcContext::Create(&ctx, url_, false);
-    }
-    if (!err.IsOk()) {
-      return err;
-    }
-
-    return ctx->StartProfile();
-  }
-
-  nic::Error StopProfile()
-  {
-    std::unique_ptr<nic::ProfileContext> ctx;
-    nic::Error err;
-    if (protocol_ == ProtocolType::HTTP) {
-      err = nic::ProfileHttpContext::Create(&ctx, url_, false);
-    } else {
-      err = nic::ProfileGrpcContext::Create(&ctx, url_, false);
-    }
-    if (!err.IsOk()) {
-      return err;
-    }
-
-    return ctx->StopProfile();
-  }
-
-  nic::Error GetModelStatus(ni::ModelStatus* model_status)
-  {
-    std::unique_ptr<nic::ServerStatusContext> ctx;
-    nic::Error err;
-    if (protocol_ == ProtocolType::HTTP) {
-      err =
-        nic::ServerStatusHttpContext::Create(&ctx, url_, model_name_, false);
-    } else {
-      err =
-        nic::ServerStatusGrpcContext::Create(&ctx, url_, model_name_, false);
-    }
-    if (err.IsOk()) {
-      ni::ServerStatus server_status;
-      err = ctx->GetServerStatus(&server_status);
-      if (err.IsOk()) {
-        const auto& itr = server_status.model_status().find(model_name_);
-        if (itr == server_status.model_status().end()) {
-          err = nic::Error(
-            ni::RequestStatusCode::INTERNAL, "unable to find status for model");
-        } else {
-          model_status->CopyFrom(itr->second);
-        }
-      }
-    }
-
-    return err;
-  }
-
-  nic::Error GetAccumulatedContextStat(nic::InferContext::Stat* contexts_stat)
-  {
-    std::lock_guard<std::mutex> lk(status_report_mutex_);
-    for (auto& context_stat : threads_context_stat_) {
-      contexts_stat->completed_request_count +=
-        context_stat->completed_request_count;
-      contexts_stat->cumulative_total_request_time_ns +=
-        context_stat->cumulative_total_request_time_ns;
-      contexts_stat->cumulative_send_time_ns +=
-        context_stat->cumulative_send_time_ns;
-      contexts_stat->cumulative_receive_time_ns +=
-        context_stat->cumulative_receive_time_ns;
-    }
-    return nic::Error::Success;
-  }
-
-  nic::Error Summarize(
-    PerfStatus& summary, const ni::ModelStatus& start_status,
-    const ni::ModelStatus& end_status,
-    const nic::InferContext::Stat& start_stat,
-    const nic::InferContext::Stat& end_stat)
-  {
-    nic::Error err(ni::RequestStatusCode::SUCCESS);
-
-    //===============
-    // Summarizing statistic measured by client
-
-    // Get the requests in the shared vector
-    TimestampVector current_timestamps;
-    status_report_mutex_.lock();
-    request_timestamps_->swap(current_timestamps);
-    status_report_mutex_.unlock();
-
-    // finding the start time of the first request
-    // and the end time of the last request in the timestamp queue
-    uint64_t first_request_start_ns = 0;
-    uint64_t last_request_end_ns = 0;
-    for (auto& timestamp : current_timestamps) {
-      uint64_t request_start_time =
-        timestamp.first.tv_sec * ni::NANOS_PER_SECOND + timestamp.first.tv_nsec;
-      uint64_t request_end_time =
-        timestamp.second.tv_sec * ni::NANOS_PER_SECOND +
-        timestamp.second.tv_nsec;
-      if (
-        (first_request_start_ns > request_start_time) ||
-        (first_request_start_ns == 0)) {
-        first_request_start_ns = request_start_time;
-      }
-      if (
-        (last_request_end_ns < request_end_time) ||
-        (last_request_end_ns == 0)) {
-        last_request_end_ns = request_end_time;
-      }
-    }
-
-    // Define the measurement window [client_start_ns, client_end_ns) to be
-    // in the middle of the queue
-    uint64_t measurement_window_ns = measurement_window_ms_ * 1000 * 1000;
-    uint64_t offset = first_request_start_ns + measurement_window_ns;
-    offset =
-      (offset > last_request_end_ns) ? 0 : (last_request_end_ns - offset) / 2;
-
-    uint64_t client_start_ns = first_request_start_ns + offset;
-    uint64_t client_end_ns = client_start_ns + measurement_window_ns;
-    uint64_t client_duration_ns = client_end_ns - client_start_ns;
-
-    // Get measurement from requests that fall within the time interval
-    size_t valid_timestamp_count = 0;
-    uint64_t min_latency_ns = 0;
-    uint64_t max_latency_ns = 0;
-    uint64_t tol_latency_ns = 0;
-    uint64_t tol_square_latency_us = 0;
-    for (auto& timestamp : current_timestamps) {
-      uint64_t request_start_ns =
-        timestamp.first.tv_sec * ni::NANOS_PER_SECOND + timestamp.first.tv_nsec;
-      uint64_t request_end_ns = timestamp.second.tv_sec * ni::NANOS_PER_SECOND +
-                                timestamp.second.tv_nsec;
-
-      if (request_start_ns <= request_end_ns) {
-        // Only counting requests that end within the time interval
-        if (
-          (request_end_ns >= client_start_ns) &&
-          (request_end_ns <= client_end_ns)) {
-          uint64_t request_latency = request_end_ns - request_start_ns;
-          if ((request_latency < min_latency_ns) || (min_latency_ns == 0))
-            min_latency_ns = request_latency;
-          if ((request_latency > max_latency_ns) || (max_latency_ns == 0))
-            max_latency_ns = request_latency;
-          tol_latency_ns += request_latency;
-          tol_square_latency_us +=
-            (request_latency * request_latency) / (1000 * 1000);
-          valid_timestamp_count++;
-        }
-      }
-    }
-
-    if (valid_timestamp_count == 0) {
-      return nic::Error(
-        ni::RequestStatusCode::INTERNAL,
-        "No valid requests recorded within time interval."
-        " Please use a larger time window.");
-    }
-
-    summary.batch_size = batch_size_;
-    summary.client_request_count = valid_timestamp_count;
-    summary.client_duration_ns = client_duration_ns;
-    float client_duration_sec =
-      (float)summary.client_duration_ns / ni::NANOS_PER_SECOND;
-    summary.client_infer_per_sec =
-      (int)(valid_timestamp_count * summary.batch_size / client_duration_sec);
-    summary.client_min_latency_ns = min_latency_ns;
-    summary.client_max_latency_ns = max_latency_ns;
-    summary.client_avg_latency_ns = tol_latency_ns / valid_timestamp_count;
-
-    // calculate standard deviation
-    uint64_t expected_square_latency_us =
-      tol_square_latency_us / valid_timestamp_count;
-    uint64_t square_avg_latency_us =
-      (summary.client_avg_latency_ns * summary.client_avg_latency_ns) /
-      (1000 * 1000);
-    uint64_t var_us = (expected_square_latency_us > square_avg_latency_us)
-                        ? (expected_square_latency_us - square_avg_latency_us)
-                        : 0;
-    summary.std_us = (uint64_t)(sqrt(var_us));
-
-    size_t completed_count =
-      end_stat.completed_request_count - start_stat.completed_request_count;
-    uint64_t request_time_ns = end_stat.cumulative_total_request_time_ns -
-                               start_stat.cumulative_total_request_time_ns;
-    uint64_t send_time_ns =
-      end_stat.cumulative_send_time_ns - start_stat.cumulative_send_time_ns;
-    uint64_t receive_time_ns = end_stat.cumulative_receive_time_ns -
-                               start_stat.cumulative_receive_time_ns;
-    if (completed_count != 0) {
-      summary.client_avg_request_time_ns = request_time_ns / completed_count;
-      summary.client_avg_send_time_ns = send_time_ns / completed_count;
-      summary.client_avg_receive_time_ns = receive_time_ns / completed_count;
-    }
-
-    //===============
-    // Summarizing statistic measured by client
-
-    // If model_version is -1 then look in the end status to find the
-    // latest (highest valued version) and use that as the version.
-    uint32_t status_model_version = 0;
-    if (model_version_ < 0) {
-      for (const auto& vp : end_status.version_status()) {
-        status_model_version = std::max(status_model_version, vp.first);
-      }
-    } else {
-      status_model_version = model_version_;
-    }
-
-    const auto& vend_itr =
-      end_status.version_status().find(status_model_version);
-    if (vend_itr == end_status.version_status().end()) {
-      err = nic::Error(
-        ni::RequestStatusCode::INTERNAL, "missing model version status");
-    } else {
-      const auto& end_itr = vend_itr->second.infer_stats().find(batch_size_);
-      if (end_itr == vend_itr->second.infer_stats().end()) {
-        err = nic::Error(
-          ni::RequestStatusCode::INTERNAL, "missing inference stats");
-      } else {
-        uint64_t start_cnt = 0;
-        uint64_t start_cumm_time_ns = 0;
-        uint64_t start_queue_time_ns = 0;
-        uint64_t start_compute_time_ns = 0;
-
-        const auto& vstart_itr =
-          start_status.version_status().find(status_model_version);
-        if (vstart_itr != start_status.version_status().end()) {
-          const auto& start_itr =
-            vstart_itr->second.infer_stats().find(batch_size_);
-          if (start_itr != vstart_itr->second.infer_stats().end()) {
-            start_cnt = start_itr->second.success().count();
-            start_cumm_time_ns = start_itr->second.success().total_time_ns();
-            start_queue_time_ns = start_itr->second.queue().total_time_ns();
-            start_compute_time_ns = start_itr->second.compute().total_time_ns();
-          }
-        }
-
-        summary.server_request_count =
-          end_itr->second.success().count() - start_cnt;
-        summary.server_cumm_time_ns =
-          end_itr->second.success().total_time_ns() - start_cumm_time_ns;
-        summary.server_queue_time_ns =
-          end_itr->second.queue().total_time_ns() - start_queue_time_ns;
-        summary.server_compute_time_ns =
-          end_itr->second.compute().total_time_ns() - start_compute_time_ns;
-      }
-    }
-    return err;
-  }
-
-  // Function for worker threads
-  void Infer(
-    std::shared_ptr<nic::Error> err,
-    std::shared_ptr<nic::InferContext::Stat> stat,
-    std::shared_ptr<TimestampVector> timestamp,
-    std::shared_ptr<size_t> pause_index, const size_t thread_index)
-  {
-    // Create the context for inference of the specified model.
-    std::unique_ptr<nic::InferContext> ctx;
-    if (protocol_ == ProtocolType::HTTP) {
-      *err = nic::InferHttpContext::Create(
-        &ctx, url_, model_name_, model_version_, false);
-    } else {
-      *err = nic::InferGrpcContext::Create(
-        &ctx, url_, model_name_, model_version_, false);
-    }
-    if (!err->IsOk()) {
-      return;
-    }
-
-    if (batch_size_ > ctx->MaxBatchSize()) {
-      *err = nic::Error(
-        ni::RequestStatusCode::INVALID_ARG,
-        "expecting batch size <= " + std::to_string(ctx->MaxBatchSize()) +
-          " for model '" + ctx->ModelName() + "'");
-      return;
-    }
-
-    // Prepare context for 'batch_size' batches. Request that all
-    // outputs be returned.
-    std::unique_ptr<nic::InferContext::Options> options;
-    *err = nic::InferContext::Options::Create(&options);
-    if (!err->IsOk()) {
-      return;
-    }
-
-    options->SetBatchSize(batch_size_);
-    for (const auto& output : ctx->Outputs()) {
-      options->AddRawResult(output);
-    }
-
-    *err = ctx->SetRunOptions(*options);
-    if (!err->IsOk()) {
-      return;
-    }
-
-    // Create a randomly initialized buffer that is large enough to
-    // provide the largest needed input. We (re)use this buffer for all
-    // input values.
-    size_t max_input_byte_size = 0;
-    for (const auto& input : ctx->Inputs()) {
-      max_input_byte_size = std::max(max_input_byte_size, input->ByteSize());
-    }
-
-    std::vector<uint8_t> input_buf(max_input_byte_size);
-    for (size_t i = 0; i < input_buf.size(); ++i) {
-      input_buf[i] = rand();
-    }
-
-    // Initialize inputs to use random values...
-    for (const auto& input : ctx->Inputs()) {
-      *err = input->Reset();
-      if (!err->IsOk()) {
-        return;
-      }
-
-      for (size_t i = 0; i < batch_size_; ++i) {
-        *err = input->SetRaw(&input_buf[0], input->ByteSize());
-        if (!err->IsOk()) {
-          return;
-        }
-      }
-    }
-
-    // run inferencing until receiving exit signal to maintain server load.
-    do {
-      // Run inference to get output
-      std::vector<std::unique_ptr<nic::InferContext::Result>> results;
-
-      // Record the start time of the request
-      struct timespec start_time;
-      clock_gettime(CLOCK_MONOTONIC, &start_time);
-
-      *err = ctx->Run(&results);
-
-      // Record the end time of the request
-      struct timespec end_time;
-      clock_gettime(CLOCK_MONOTONIC, &end_time);
-
-      if (!err->IsOk()) {
-        return;
-      }
-
-      // Add the request timestamp to shared vector with proper locking
-      status_report_mutex_.lock();
-      // Critical section
-      request_timestamps_->emplace_back(std::make_pair(start_time, end_time));
-      // Update its InferContext statistic to shared Stat pointer
-      ctx->GetStat(stat.get());
-      status_report_mutex_.unlock();
-
-      // Wait if the thread should be paused
-      if (thread_index >= *pause_index) {
-        // Using conditional variable to be able to wake up pausing threads
-        std::unique_lock<std::mutex> lk(wake_mutex_);
-        wake_signal_.wait(lk, [thread_index, pause_index] {
-          return (thread_index < *pause_index);
-        });
-        lk.unlock();
-      }
-      // Stop inferencing if an early exit has been signaled.
-    } while (!early_exit);
-  }
-
-  // Function for worker threads
-  void AsyncInfer(
-    std::shared_ptr<nic::Error> err,
-    std::shared_ptr<nic::InferContext::Stat> stat,
-    std::shared_ptr<TimestampVector> timestamp,
-    std::shared_ptr<size_t> pause_index)
-  {
-    // Create the context for inference of the specified model.
-    std::unique_ptr<nic::InferContext> ctx;
-    if (protocol_ == ProtocolType::HTTP) {
-      *err = nic::InferHttpContext::Create(
-        &ctx, url_, model_name_, model_version_, false);
-    } else {
-      *err = nic::InferGrpcContext::Create(
-        &ctx, url_, model_name_, model_version_, false);
-    }
-    if (!err->IsOk()) {
-      return;
-    }
-
-    if (batch_size_ > ctx->MaxBatchSize()) {
-      *err = nic::Error(
-        ni::RequestStatusCode::INVALID_ARG,
-        "expecting batch size <= " + std::to_string(ctx->MaxBatchSize()) +
-          " for model '" + ctx->ModelName() + "'");
-      return;
-    }
-
-    // Prepare context for 'batch_size' batches. Request that all
-    // outputs be returned.
-    std::unique_ptr<nic::InferContext::Options> options;
-    *err = nic::InferContext::Options::Create(&options);
-    if (!err->IsOk()) {
-      return;
-    }
-
-    options->SetBatchSize(batch_size_);
-    for (const auto& output : ctx->Outputs()) {
-      options->AddRawResult(output);
-    }
-
-    *err = ctx->SetRunOptions(*options);
-    if (!err->IsOk()) {
-      return;
-    }
-
-    // Create a randomly initialized buffer that is large enough to
-    // provide the largest needed input. We (re)use this buffer for all
-    // input values.
-    size_t max_input_byte_size = 0;
-    for (const auto& input : ctx->Inputs()) {
-      max_input_byte_size = std::max(max_input_byte_size, input->ByteSize());
-    }
-
-    std::vector<uint8_t> input_buf(max_input_byte_size);
-    for (size_t i = 0; i < input_buf.size(); ++i) {
-      input_buf[i] = rand();
-    }
-
-    // Initialize inputs to use random values...
-    for (const auto& input : ctx->Inputs()) {
-      *err = input->Reset();
-      if (!err->IsOk()) {
-        return;
-      }
-
-      for (size_t i = 0; i < batch_size_; ++i) {
-        *err = input->SetRaw(&input_buf[0], input->ByteSize());
-        if (!err->IsOk()) {
-          return;
-        }
-      }
-    }
-
-    std::map<uint64_t, struct timespec> requests_start_time;
-    // run inferencing until receiving exit signal to maintain server load.
-    do {
-      // Run inference to get output
-      std::vector<std::unique_ptr<nic::InferContext::Result>> results;
-      std::shared_ptr<nic::InferContext::Request> request;
-
-      // Create async requests such that the number of ongoing requests
-      // matches the concurrency level (here is '*pause_index')
-      while (requests_start_time.size() < *pause_index) {
-        struct timespec start_time;
-        clock_gettime(CLOCK_MONOTONIC, &start_time);
-        *err = ctx->AsyncRun(&request);
-        if (!err->IsOk()) {
-          return;
-        }
-        requests_start_time.emplace(request->Id(), start_time);
-      }
-
-      if (requests_start_time.size() < *pause_index) {
-        std::cerr << "This message shouldn't be printed twice in a row"
-                  << std::endl;
-      }
-
-      // Get any request that is completed and
-      // record the end time of the request
-      while (true) {
-        nic::Error tmp_err;
-        if (requests_start_time.size() >= *pause_index) {
-          tmp_err = ctx->GetReadyAsyncRequest(&request, true);
-        } else {
-          // Don't wait if worker needs to maintain concurrency level
-          // Just make sure all completed requests at the moment
-          // are measured correctly
-          tmp_err = ctx->GetReadyAsyncRequest(&request, false);
-        }
-
-        if (tmp_err.Code() == ni::RequestStatusCode::UNAVAILABLE) {
-          break;
-        } else if (!tmp_err.IsOk()) {
-          *err = tmp_err;
-          return;
-        }
-        *err = ctx->GetAsyncRunResults(&results, request, true);
-
-        struct timespec end_time;
-        clock_gettime(CLOCK_MONOTONIC, &end_time);
-
-        if (!err->IsOk()) {
-          return;
-        }
-
-        auto itr = requests_start_time.find(request->Id());
-        struct timespec start_time = itr->second;
-        requests_start_time.erase(itr);
-
-        // Add the request timestamp to shared vector with proper locking
-        status_report_mutex_.lock();
-        // Critical section
-        request_timestamps_->emplace_back(std::make_pair(start_time, end_time));
-        // Update its InferContext statistic to shared Stat pointer
-        ctx->GetStat(stat.get());
-        status_report_mutex_.unlock();
-      }
-
-      // Stop inferencing if an early exit has been signaled.
-    } while (!early_exit);
-  }
-
-  // Used for measurement
-  nic::Error Measure(PerfStatus& status_summary)
-  {
-    nic::Error err(ni::RequestStatusCode::SUCCESS);
-
-    ni::ModelStatus start_status;
-    ni::ModelStatus end_status;
-    nic::InferContext::Stat start_stat;
-    nic::InferContext::Stat end_stat;
-
-    err = GetModelStatus(&start_status);
-    if (!err.IsOk()) {
-      return err;
-    }
-    // Start profiling on the server if requested.
-    if (profile_) {
-      err = StartProfile();
-      if (!err.IsOk()) {
-        return err;
-      }
-    }
-
-    err = GetAccumulatedContextStat(&start_stat);
-
-    // Wait for specified time interval in msec
-    std::this_thread::sleep_for(
-      std::chrono::milliseconds((uint64_t)(measurement_window_ms_ * 1.2)));
-
-    err = GetAccumulatedContextStat(&end_stat);
-
-    // Stop profiling on the server if requested.
-    if (profile_) {
-      err = StopProfile();
-      if (!err.IsOk()) {
-        return err;
-      }
-    }
-
-    // Get server status and then print report on difference between
-    // before and after status.
-    err = GetModelStatus(&end_status);
-    if (!err.IsOk()) {
-      return err;
-    }
-
-    err =
-      Summarize(status_summary, start_status, end_status, start_stat, end_stat);
-    if (!err.IsOk()) {
-      return err;
-    }
-
-    return nic::Error(ni::RequestStatusCode::SUCCESS);
-  }
-
-  bool verbose_;
-  bool profile_;
-  size_t batch_size_;
-  double stable_offset_;
-  uint64_t measurement_window_ms_;
-  size_t max_measurement_count_;
-  bool async_;
-  std::string model_name_;
-  int model_version_;
-  std::string url_;
-  ProtocolType protocol_;
-
-  // Note: early_exit signal is kept global
-  std::vector<std::thread> threads_;
-  std::vector<std::shared_ptr<nic::Error>> threads_status_;
-  std::vector<std::shared_ptr<nic::InferContext::Stat>> threads_context_stat_;
-
-  // pause_index_ tells threads (with idx >= pause_index_) to pause sending
-  // requests such that load level can decrease without terminating threads.
-  std::shared_ptr<size_t> pause_index_;
-  // Use condition variable to pause/continue worker threads
-  std::condition_variable wake_signal_;
-  std::mutex wake_mutex_;
-
-  // Pointer to a vector of request timestamps <start_time, end_time>
-  // Request latency will be end_time - start_time
-  std::shared_ptr<TimestampVector> request_timestamps_;
-  // Mutex to avoid race condition on adding elements into the timestamp vector
-  // and on updating context statistic.
-  std::mutex status_report_mutex_;
-};
-
-ProtocolType
-ParseProtocol(const std::string& str)
-{
-  std::string protocol(str);
-  std::transform(protocol.begin(), protocol.end(), protocol.begin(), ::tolower);
-  if (protocol == "http") {
-    return ProtocolType::HTTP;
-  } else if (protocol == "grpc") {
-    return ProtocolType::GRPC;
-  }
-
-  std::cerr << "unexpected protocol type \"" << str
-            << "\", expecting HTTP or gRPC" << std::endl;
-  exit(1);
-
-  return ProtocolType::HTTP;
-}
-
-nic::Error
-Report(
-  const PerfStatus& summary, const size_t concurrent_request_count,
-  const ProtocolType protocol, const bool verbose)
-{
-  const uint64_t cnt = summary.server_request_count;
-
-  const uint64_t cumm_time_us = summary.server_cumm_time_ns / 1000;
-  const uint64_t cumm_avg_us = cumm_time_us / cnt;
-
-  const uint64_t queue_time_us = summary.server_queue_time_ns / 1000;
-  const uint64_t queue_avg_us = queue_time_us / cnt;
-
-  const uint64_t compute_time_us = summary.server_compute_time_ns / 1000;
-  const uint64_t compute_avg_us = compute_time_us / cnt;
-
-  const uint64_t avg_latency_us = summary.client_avg_latency_ns / 1000;
-  const uint64_t std_us = summary.std_us;
-
-  const uint64_t avg_request_time_us =
-    summary.client_avg_request_time_ns / 1000;
-  const uint64_t avg_send_time_us = summary.client_avg_send_time_ns / 1000;
-  const uint64_t avg_receive_time_us =
-    summary.client_avg_receive_time_ns / 1000;
-  const uint64_t avg_response_wait_time_us =
-    avg_request_time_us - avg_send_time_us - avg_receive_time_us;
-
-  std::string client_library_detail = "    ";
-  if (protocol == ProtocolType::GRPC) {
-    client_library_detail +=
-      "Avg gRPC time: " +
-      std::to_string(
-        avg_send_time_us + avg_receive_time_us + avg_request_time_us) +
-      " usec (";
-    if (!verbose) {
-      client_library_detail +=
-        "(un)marshal request/response " +
-        std::to_string(avg_send_time_us + avg_receive_time_us) +
-        " usec + response wait " + std::to_string(avg_request_time_us) +
-        " usec)";
-    } else {
-      client_library_detail +=
-        "marshal " + std::to_string(avg_send_time_us) +
-        " usec + response wait " + std::to_string(avg_request_time_us) +
-        " usec + unmarshal " + std::to_string(avg_receive_time_us) + " usec)";
-    }
-  } else {
-    client_library_detail +=
-      "Avg HTTP time: " + std::to_string(avg_request_time_us) + " usec (";
-    if (!verbose) {
-      client_library_detail +=
-        "send/recv " + std::to_string(avg_send_time_us + avg_receive_time_us) +
-        " usec + response wait " + std::to_string(avg_response_wait_time_us) +
-        " usec)";
-    } else {
-      client_library_detail +=
-        "send " + std::to_string(avg_send_time_us) + " usec + response wait " +
-        std::to_string(avg_response_wait_time_us) + " usec + receive " +
-        std::to_string(avg_receive_time_us) + " usec)";
-    }
-  }
-
-  std::cout << "  Client: " << std::endl
-            << "    Request count: " << summary.client_request_count
-            << std::endl
-            << "    Throughput: " << summary.client_infer_per_sec
-            << " infer/sec" << std::endl
-            << "    Avg latency: " << avg_latency_us << " usec"
-            << " (standard deviation " << std_us << " usec)" << std::endl
-            << client_library_detail << std::endl
-            << "  Server: " << std::endl
-            << "    Request count: " << cnt << std::endl
-            << "    Avg request latency: " << cumm_avg_us << " usec"
-            << " (overhead " << (cumm_avg_us - queue_avg_us - compute_avg_us)
-            << " usec + "
-            << "queue " << queue_avg_us << " usec + "
-            << "compute " << compute_avg_us << " usec)" << std::endl
-            << std::endl;
-
-  return nic::Error(ni::RequestStatusCode::SUCCESS);
-}
-
-void
-Usage(char** argv, const std::string& msg = std::string())
-{
-  if (!msg.empty()) {
-    std::cerr << "error: " << msg << std::endl;
-  }
-
-  std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
-  std::cerr << "\t-v" << std::endl;
-  std::cerr << "\t-f <filename for storing report in csv format>" << std::endl;
-  std::cerr << "\t-b <batch size>" << std::endl;
-  std::cerr << "\t-t <number of concurrent requests>" << std::endl;
-  std::cerr << "\t-d" << std::endl;
-  std::cerr << "\t-a" << std::endl;
-  std::cerr << "\t-l <latency threshold (in msec)>" << std::endl;
-  std::cerr << "\t-c <maximum concurrency>" << std::endl;
-  std::cerr << "\t-s <deviation threshold for stable measurement"
-            << " (in percentage)>" << std::endl;
-  std::cerr << "\t-p <measurement window (in msec)>" << std::endl;
-  std::cerr << "\t-r <maximum number of measurements for each profiling>"
-            << std::endl;
-  std::cerr << "\t-n" << std::endl;
-  std::cerr << "\t-m <model name>" << std::endl;
-  std::cerr << "\t-x <model version>" << std::endl;
-  std::cerr << "\t-u <URL for inference service>" << std::endl;
-  std::cerr << "\t-i <Protocol used to communicate with inference service>"
-            << std::endl;
-  std::cerr << std::endl;
-  std::cerr
-    << "The -d flag enables dynamic concurrent request count where the number"
-    << " of concurrent requests will increase linearly until the request"
-    << " latency is above the threshold set (see -l)." << std::endl;
-  std::cerr << "The -a flag changes the way to maintain concurrency level from"
-            << " sending synchronous requests to sending asynchrnous requests."
-            << std::endl;
-  std::cerr
-    << "For -t, it indicates the number of starting concurrent requests if -d"
-    << " flag is set." << std::endl;
-  std::cerr
-    << "For -s, it indicates the deviation threshold for the measurements. The"
-    << " measurement is considered as stable if the recent 3 measurements are "
-    << "within +/- (deviation threshold)% of their average in terms of both "
-    << "infer per second and latency. Default is 10(%)" << std::endl;
-  std::cerr
-    << "For -c, it indicates the maximum number of concurrent requests allowed"
-    << " if -d flag is set. Once the number of concurrent requests exceeds the"
-    << " maximum, the perf client will stop and exit regardless of the latency"
-    << " threshold. Default is 0 to indicate that no limit is set on the number"
-    << " of concurrent requests." << std::endl;
-  std::cerr
-    << "For -p, it indicates the time interval used for each measurement."
-    << " The perf client will sample a time interval specified by -p and"
-    << " take measurement over the requests completed"
-    << " within that time interval." << std::endl;
-  std::cerr
-    << "For -r, it indicates the maximum number of measurements for each"
-    << " profiling setting. The perf client will take multiple measurements and"
-    << " report the measurement until it is stable. The perf client will abort"
-    << " if the measurement is still unstable after the maximum number of"
-    << " measuremnts." << std::endl;
-  std::cerr << "For -l, it has no effect unless -d flag is set." << std::endl;
-  std::cerr << "The -n flag enables profiling for the duration of the run"
-            << std::endl;
-  std::cerr
-    << "If -x is not specified the most recent version (that is, the highest "
-    << "numbered version) of the model will be used." << std::endl;
-  std::cerr << "For -i, available protocols are gRPC and HTTP. Default is HTTP."
-            << std::endl;
-
-  exit(1);
-}
-
-}  // namespace
-
-int
-main(int argc, char** argv)
-{
-  bool verbose = false;
-  bool profile = false;
-  bool dynamic_concurrency_mode = false;
-  bool profiling_asynchronous_infer = false;
-  uint64_t latency_threshold_ms = 0;
-  int32_t batch_size = 1;
-  int32_t concurrent_request_count = 1;
-  size_t max_concurrency = 0;
-  double stable_offset = 0.1;
-  uint64_t measurement_window_ms = 0;
-  size_t max_measurement_count = 10;
-  std::string model_name;
-  int model_version = -1;
-  std::string url("localhost:8000");
-  std::string filename("");
-  ProtocolType protocol = ProtocolType::HTTP;
-
-  // Parse commandline...
-  int opt;
-  while ((opt = getopt(argc, argv, "vndac:u:m:x:b:t:p:i:l:r:s:f:")) != -1) {
-    switch (opt) {
-      case 'v':
-        verbose = true;
-        break;
-      case 'n':
-        profile = true;
-        break;
-      case 'd':
-        dynamic_concurrency_mode = true;
-        break;
-      case 'u':
-        url = optarg;
-        break;
-      case 'm':
-        model_name = optarg;
-        break;
-      case 'x':
-        model_version = atoi(optarg);
-        break;
-      case 'b':
-        batch_size = atoi(optarg);
-        break;
-      case 't':
-        concurrent_request_count = atoi(optarg);
-        break;
-      case 'p':
-        measurement_window_ms = atoi(optarg);
-        break;
-      case 'i':
-        protocol = ParseProtocol(optarg);
-        break;
-      case 'l':
-        latency_threshold_ms = atoi(optarg);
-        break;
-      case 'c':
-        max_concurrency = atoi(optarg);
-        break;
-      case 'r':
-        max_measurement_count = atoi(optarg);
-        break;
-      case 's':
-        stable_offset = atof(optarg) / 100;
-        break;
-      case 'f':
-        filename = optarg;
-        break;
-      case 'a':
-        profiling_asynchronous_infer = true;
-        break;
-      case '?':
-        Usage(argv);
-        break;
-    }
-  }
-
-  if (model_name.empty()) {
-    Usage(argv, "-m flag must be specified");
-  }
-  if (batch_size <= 0) {
-    Usage(argv, "batch size must be > 0");
-  }
-  if (measurement_window_ms <= 0) {
-    Usage(argv, "measurement window must be > 0 in msec");
-  }
-  if (concurrent_request_count <= 0) {
-    Usage(argv, "concurrent request count must be > 0");
-  }
-  if (dynamic_concurrency_mode && latency_threshold_ms < 0) {
-    Usage(argv, "latency threshold must be >= 0 for dynamic concurrency mode");
-  }
-
-  // trap SIGINT to allow threads to exit gracefully
-  signal(SIGINT, SignalHandler);
-
-  nic::Error err(ni::RequestStatusCode::SUCCESS);
-  std::unique_ptr<ConcurrencyManager> manager;
-  err = ConcurrencyManager::Create(
-    &manager, verbose, profile, batch_size, stable_offset,
-    measurement_window_ms, max_measurement_count, profiling_asynchronous_infer,
-    model_name, model_version, url, protocol);
-  if (!err.IsOk()) {
-    std::cerr << err << std::endl;
-    return 1;
-  }
-
-  // pre-run report
-  std::cout << "*** Measurement Settings ***" << std::endl
-            << "  Batch size: " << batch_size << std::endl
-            << "  Measurement window: " << measurement_window_ms << " msec"
-            << std::endl;
-  if (dynamic_concurrency_mode) {
-    std::cout << "  Latency limit: " << latency_threshold_ms << " msec"
-              << std::endl;
-    if (max_concurrency != 0) {
-      std::cout << "  Concurrency limit: " << max_concurrency
-                << " concurrent requests" << std::endl;
-    }
-  }
-  std::cout << std::endl;
-
-  PerfStatus status_summary;
-  std::vector<PerfStatus> summary;
-  if (!dynamic_concurrency_mode) {
-    err = manager->Step(status_summary, concurrent_request_count);
-    if (err.IsOk()) {
-      err = Report(status_summary, concurrent_request_count, protocol, verbose);
-    }
-  } else {
-    for (size_t count = concurrent_request_count;
-         (count <= max_concurrency) || (max_concurrency == 0); count++) {
-      err = manager->Step(status_summary, count);
-      if (err.IsOk()) {
-        err = Report(status_summary, count, protocol, verbose);
-        summary.push_back(status_summary);
-        uint64_t avg_latency_ms =
-          status_summary.client_avg_latency_ns / (1000 * 1000);
-        if ((avg_latency_ms >= latency_threshold_ms) || !err.IsOk()) {
-          std::cerr << err << std::endl;
-          break;
-        }
-      } else {
-        break;
-      }
-    }
-  }
-  if (!err.IsOk()) {
-    std::cerr << err << std::endl;
-    return 1;
-  }
-  if (summary.size()) {
-    std::ofstream ofs(filename, std::ofstream::out);
-    // Can print more depending on verbose, but it seems too much information
-    std::cout << "Inferences/Second vs. Client Average Batch Latency"
-              << std::endl;
-    if (!filename.empty()) {
-      ofs << "Concurrency,Inferences/Second,Client Send,"
-          << "Network+Server Send/Recv,Server Queue,"
-          << "Server Compute,Client Recv" << std::endl;
-    }
-
-    for (PerfStatus& status : summary) {
-      std::cout << "Concurrency: " << status.concurrency << ", "
-                << status.client_infer_per_sec << " infer/sec, latency "
-                << (status.client_avg_latency_ns / 1000) << " usec"
-                << std::endl;
-    }
-
-    if (!filename.empty()) {
-      // Sort summary results in order of increasing infer/sec.
-      std::sort(
-        summary.begin(), summary.end(),
-        [](const PerfStatus& a, const PerfStatus& b) -> bool {
-          return a.client_infer_per_sec < b.client_infer_per_sec;
-        });
-
-      for (PerfStatus& status : summary) {
-        uint64_t avg_queue_ns =
-          status.server_queue_time_ns / status.server_request_count;
-        uint64_t avg_compute_ns =
-          status.server_compute_time_ns / status.server_request_count;
-        uint64_t avg_network_misc_ns =
-          status.client_avg_latency_ns - avg_queue_ns - avg_compute_ns -
-          status.client_avg_send_time_ns - status.client_avg_receive_time_ns;
-
-        ofs << status.concurrency << "," << status.client_infer_per_sec << ","
-            << (status.client_avg_send_time_ns / 1000) << ","
-            << (avg_network_misc_ns / 1000) << "," << (avg_queue_ns / 1000)
-            << "," << (avg_compute_ns / 1000) << ","
-            << (status.client_avg_receive_time_ns / 1000) << std::endl;
-      }
-    }
-    ofs.close();
-  }
-  return 0;
-}
diff --git a/src/clients/c++/request.cc b/src/clients/c++/request.cc
deleted file mode 100644
index 8b1f2f508f..0000000000
--- a/src/clients/c++/request.cc
+++ /dev/null
@@ -1,2460 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "src/clients/c++/request.h"
-
-#include <curl/curl.h>
-#include <google/protobuf/text_format.h>
-#include <iostream>
-#include <memory>
-#include "src/core/constants.h"
-
-namespace nvidia { namespace inferenceserver { namespace client {
-
-//==============================================================================
-
-// Global initialization for libcurl. Libcurl requires global
-// initialization before any other threads are created and before any
-// curl methods are used. The curl_global static object is used to
-// perform this initialization.
-class CurlGlobal {
- public:
-  CurlGlobal();
-  ~CurlGlobal();
-
-  const Error& Status() const { return err_; }
-
- private:
-  Error err_;
-};
-
-CurlGlobal::CurlGlobal() : err_(RequestStatusCode::SUCCESS)
-{
-  if (curl_global_init(CURL_GLOBAL_ALL) != 0) {
-    err_ = Error(RequestStatusCode::INTERNAL, "global initialization failed");
-  }
-}
-
-CurlGlobal::~CurlGlobal()
-{
-  curl_global_cleanup();
-}
-
-static CurlGlobal curl_global;
-
-//==============================================================================
-
-// Use map to keep track of gRPC channels. <key, value> : <url, Channel*>
-// If context is created on url that has established Channel, then reuse it.
-std::map<std::string, std::shared_ptr<grpc::Channel>> grpc_channel_map_;
-std::shared_ptr<grpc::Channel>
-GetChannel(const std::string& url)
-{
-  const auto& channel_itr = grpc_channel_map_.find(url);
-  if (channel_itr != grpc_channel_map_.end()) {
-    return channel_itr->second;
-  } else {
-    grpc::ChannelArguments arguments;
-    arguments.SetMaxSendMessageSize(MAX_GRPC_MESSAGE_SIZE);
-    arguments.SetMaxReceiveMessageSize(MAX_GRPC_MESSAGE_SIZE);
-    std::shared_ptr<grpc::Channel> channel = grpc::CreateCustomChannel(
-      url, grpc::InsecureChannelCredentials(), arguments);
-    grpc_channel_map_.insert(std::make_pair(url, channel));
-    return channel;
-  }
-}
-
-//==============================================================================
-
-const Error Error::Success(RequestStatusCode::SUCCESS);
-
-Error::Error(RequestStatusCode code, const std::string& msg)
-    : code_(code), msg_(msg), request_id_(0)
-{
-}
-
-Error::Error(RequestStatusCode code) : code_(code), request_id_(0) {}
-
-Error::Error(const RequestStatus& status) : Error(status.code(), status.msg())
-{
-  server_id_ = status.server_id();
-  request_id_ = status.request_id();
-}
-
-std::ostream&
-operator<<(std::ostream& out, const Error& err)
-{
-  out << "[" << err.server_id_ << " " << err.request_id_ << "] "
-      << RequestStatusCode_Name(err.code_);
-  if (!err.msg_.empty()) {
-    out << " - " << err.msg_;
-  }
-  return out;
-}
-
-//==============================================================================
-
-ServerHealthContext::ServerHealthContext(bool verbose) : verbose_(verbose) {}
-
-//==============================================================================
-
-ServerStatusContext::ServerStatusContext(bool verbose) : verbose_(verbose) {}
-
-//==============================================================================
-
-class OptionsImpl : public InferContext::Options {
- public:
-  OptionsImpl();
-  ~OptionsImpl() = default;
-
-  size_t BatchSize() const override { return batch_size_; }
-  void SetBatchSize(size_t batch_size) override { batch_size_ = batch_size; }
-
-  Error AddRawResult(
-    const std::shared_ptr<InferContext::Output>& output) override;
-  Error AddClassResult(
-    const std::shared_ptr<InferContext::Output>& output, uint64_t k) override;
-
-  // Options for an output
-  struct OutputOptions {
-    OutputOptions(InferContext::Result::ResultFormat f, uint64_t n = 0)
-        : result_format(f), u64(n)
-    {
-    }
-    InferContext::Result::ResultFormat result_format;
-    uint64_t u64;
-  };
-
-  using OutputOptionsPair =
-    std::pair<std::shared_ptr<InferContext::Output>, OutputOptions>;
-
-  const std::vector<OutputOptionsPair>& Outputs() const { return outputs_; }
-
- private:
-  size_t batch_size_;
-  std::vector<OutputOptionsPair> outputs_;
-};
-
-OptionsImpl::OptionsImpl() : batch_size_(0) {}
-
-Error
-OptionsImpl::AddRawResult(const std::shared_ptr<InferContext::Output>& output)
-{
-  outputs_.emplace_back(std::make_pair(
-    output, OutputOptions(InferContext::Result::ResultFormat::RAW)));
-  return Error::Success;
-}
-
-Error
-OptionsImpl::AddClassResult(
-  const std::shared_ptr<InferContext::Output>& output, uint64_t k)
-{
-  outputs_.emplace_back(std::make_pair(
-    output, OutputOptions(InferContext::Result::ResultFormat::CLASS, k)));
-  return Error::Success;
-}
-
-Error
-InferContext::Options::Create(std::unique_ptr<InferContext::Options>* options)
-{
-  options->reset(new OptionsImpl());
-  return Error::Success;
-}
-
-//==============================================================================
-
-class InputImpl : public InferContext::Input {
- public:
-  InputImpl(const ModelInput& mio);
-  InputImpl(const InputImpl& obj);
-  ~InputImpl() = default;
-
-  const std::string& Name() const override { return mio_.name(); }
-  size_t ByteSize() const override { return byte_size_; }
-  DataType DType() const override { return mio_.data_type(); }
-  ModelInput::Format Format() const override { return mio_.format(); }
-  const DimsList& Dims() const override { return mio_.dims(); }
-
-  void SetBatchSize(size_t batch_size) { batch_size_ = batch_size; }
-
-  Error Reset() override;
-  Error SetRaw(const std::vector<uint8_t>& input) override;
-  Error SetRaw(const uint8_t* input, size_t input_byte_size) override;
-
-  // Copy into 'buf' up to 'size' bytes of this input's data. Return
-  // the actual amount copied in 'input_bytes' and if the end of input
-  // is reached in 'end_of_input'
-  Error GetNext(
-    uint8_t* buf, size_t size, size_t* input_bytes, bool* end_of_input);
-
-  // Copy the pointer of the raw buffer at 'batch_idx' into 'buf'
-  Error GetRaw(size_t batch_idx, const uint8_t** buf) const;
-
-  // Prepare to send this input as part of a request.
-  Error PrepareForRequest();
-
- private:
-  const ModelInput mio_;
-  const size_t byte_size_;
-  size_t batch_size_;
-  std::vector<const uint8_t*> bufs_;
-  size_t bufs_idx_, buf_pos_;
-};
-
-InputImpl::InputImpl(const ModelInput& mio)
-    : mio_(mio), byte_size_(GetSize(mio)), batch_size_(0), bufs_idx_(0),
-      buf_pos_(0)
-{
-}
-
-InputImpl::InputImpl(const InputImpl& obj)
-    : mio_(obj.mio_), byte_size_(obj.byte_size_), batch_size_(obj.batch_size_),
-      bufs_idx_(0), buf_pos_(0)
-{
-  // Set raw inputs
-  for (size_t batch_idx = 0; batch_idx < batch_size_; batch_idx++) {
-    const uint8_t* data_ptr;
-    obj.GetRaw(batch_idx, &data_ptr);
-    SetRaw(data_ptr, byte_size_);
-  }
-}
-
-Error
-InputImpl::SetRaw(const uint8_t* input, size_t input_byte_size)
-{
-  if (input_byte_size != byte_size_) {
-    bufs_.clear();
-    return Error(
-      RequestStatusCode::INVALID_ARG,
-      "invalid size " + std::to_string(input_byte_size) + " bytes for input '" +
-        Name() + "', expects " + std::to_string(byte_size_) + " bytes");
-  }
-
-  if (bufs_.size() >= batch_size_) {
-    bufs_.clear();
-    return Error(
-      RequestStatusCode::INVALID_ARG, "expecting " +
-                                        std::to_string(batch_size_) +
-                                        " invocations of SetRaw for input '" +
-                                        Name() + "', one per batch entry");
-  }
-
-  bufs_.push_back(input);
-  return Error::Success;
-}
-
-Error
-InputImpl::SetRaw(const std::vector<uint8_t>& input)
-{
-  return SetRaw(&input[0], input.size());
-}
-
-Error
-InputImpl::GetNext(
-  uint8_t* buf, size_t size, size_t* input_bytes, bool* end_of_input)
-{
-  size_t total_size = 0;
-
-  while ((bufs_idx_ < bufs_.size()) && (size > 0)) {
-    const size_t csz = std::min(byte_size_ - buf_pos_, size);
-    if (csz > 0) {
-      const uint8_t* input_ptr = bufs_[bufs_idx_] + buf_pos_;
-      std::copy(input_ptr, input_ptr + csz, buf);
-      buf_pos_ += csz;
-      buf += csz;
-      size -= csz;
-      total_size += csz;
-    }
-
-    if (buf_pos_ == byte_size_) {
-      bufs_idx_++;
-      buf_pos_ = 0;
-    }
-  }
-
-  *input_bytes = total_size;
-  *end_of_input = (bufs_idx_ >= bufs_.size());
-  return Error::Success;
-}
-
-Error
-InputImpl::GetRaw(size_t batch_idx, const uint8_t** buf) const
-{
-  if (batch_idx >= batch_size_) {
-    return Error(
-      RequestStatusCode::INVALID_ARG,
-      "unexpected batch entry " + std::to_string(batch_idx) +
-        " requested for input '" + Name() + "', batch size is " +
-        std::to_string(batch_size_));
-  }
-
-  *buf = bufs_[batch_idx];
-  return Error::Success;
-}
-
-Error
-InputImpl::Reset()
-{
-  bufs_.clear();
-  bufs_idx_ = 0;
-  buf_pos_ = 0;
-  return Error::Success;
-}
-
-Error
-InputImpl::PrepareForRequest()
-{
-  if (bufs_.size() != batch_size_) {
-    return Error(
-      RequestStatusCode::INVALID_ARG,
-      "expecting " + std::to_string(batch_size_) +
-        " invocations of SetRaw for input '" + Name() + "', have " +
-        std::to_string(bufs_.size()));
-  }
-
-  // Reset position so request sends entire input.
-  bufs_idx_ = 0;
-  buf_pos_ = 0;
-  return Error::Success;
-}
-
-//==============================================================================
-
-class OutputImpl : public InferContext::Output {
- public:
-  OutputImpl(const ModelOutput& mio);
-  ~OutputImpl() = default;
-
-  const std::string& Name() const override { return mio_.name(); }
-  size_t ByteSize() const override { return byte_size_; }
-  DataType DType() const override { return mio_.data_type(); }
-  const DimsList& Dims() const override { return mio_.dims(); }
-
-  InferContext::Result::ResultFormat ResultFormat() const
-  {
-    return result_format_;
-  }
-  void SetResultFormat(InferContext::Result::ResultFormat result_format)
-  {
-    result_format_ = result_format;
-  }
-
- private:
-  const ModelOutput mio_;
-  const size_t byte_size_;
-  InferContext::Result::ResultFormat result_format_;
-};
-
-OutputImpl::OutputImpl(const ModelOutput& mio)
-    : mio_(mio), byte_size_(GetSize(mio)),
-      result_format_(InferContext::Result::ResultFormat::RAW)
-{
-}
-
-//==============================================================================
-
-class ResultImpl : public InferContext::Result {
- public:
-  ResultImpl(
-    const std::shared_ptr<InferContext::Output>& output, uint64_t batch_size,
-    InferContext::Result::ResultFormat result_format);
-  ~ResultImpl() = default;
-
-  const std::string& ModelName() const override { return model_name_; }
-  uint32_t ModelVersion() const override { return model_version_; }
-
-  const std::shared_ptr<InferContext::Output> GetOutput() const override
-  {
-    return output_;
-  }
-
-  Error GetRaw(
-    size_t batch_idx, const std::vector<uint8_t>** buf) const override;
-  Error GetRawAtCursor(
-    size_t batch_idx, const uint8_t** buf, size_t adv_byte_size) override;
-  Error GetClassCount(size_t batch_idx, size_t* cnt) const override;
-  Error GetClassAtCursor(size_t batch_idx, ClassResult* result) override;
-  Error ResetCursors() override;
-  Error ResetCursor(size_t batch_idx) override;
-
-  // Get the result format for this result.
-  InferContext::Result::ResultFormat ResultFormat() const
-  {
-    return result_format_;
-  }
-
-  // Set information about the model that produced this result.
-  void SetModel(const std::string& name, const uint32_t version)
-  {
-    model_name_ = name;
-    model_version_ = version;
-  }
-
-  // Set results for a CLASS format result.
-  void SetClassResult(const InferResponseHeader::Output& result)
-  {
-    class_result_ = result;
-  }
-
-  // For RAW format result, copy into the output up to 'size' bytes of
-  // output data from 'buf'. Return the actual amount copied in
-  // 'result_bytes'.
-  Error SetNextRawResult(const uint8_t* buf, size_t size, size_t* result_bytes);
-
- private:
-  const std::shared_ptr<InferContext::Output> output_;
-  const size_t byte_size_;
-  const size_t batch_size_;
-  const InferContext::Result::ResultFormat result_format_;
-
-  std::vector<std::vector<uint8_t>> bufs_;
-  size_t bufs_idx_;
-  std::vector<size_t> bufs_pos_;
-
-  std::string model_name_;
-  uint32_t model_version_;
-
-  InferResponseHeader::Output class_result_;
-  std::vector<size_t> class_pos_;
-};
-
-ResultImpl::ResultImpl(
-  const std::shared_ptr<InferContext::Output>& output, uint64_t batch_size,
-  InferContext::Result::ResultFormat result_format)
-    : output_(output), byte_size_(output->ByteSize()), batch_size_(batch_size),
-      result_format_(result_format), bufs_(batch_size), bufs_idx_(0),
-      bufs_pos_(batch_size), class_pos_(batch_size)
-{
-}
-
-Error
-ResultImpl::GetRaw(size_t batch_idx, const std::vector<uint8_t>** buf) const
-{
-  if (result_format_ != InferContext::Result::ResultFormat::RAW) {
-    return Error(
-      RequestStatusCode::UNSUPPORTED,
-      "raw result not available for non-RAW output '" + output_->Name() + "'");
-  }
-
-  if (batch_idx >= batch_size_) {
-    return Error(
-      RequestStatusCode::INVALID_ARG,
-      "unexpected batch entry " + std::to_string(batch_idx) +
-        " requested for output '" + output_->Name() + "', batch size is " +
-        std::to_string(batch_size_));
-  }
-
-  *buf = &bufs_[batch_idx];
-  return Error::Success;
-}
-
-Error
-ResultImpl::GetRawAtCursor(
-  size_t batch_idx, const uint8_t** buf, size_t adv_byte_size)
-{
-  if (result_format_ != InferContext::Result::ResultFormat::RAW) {
-    return Error(
-      RequestStatusCode::UNSUPPORTED,
-      "raw result not available for non-RAW output '" + output_->Name() + "'");
-  }
-
-  if (batch_idx >= batch_size_) {
-    return Error(
-      RequestStatusCode::INVALID_ARG,
-      "unexpected batch entry " + std::to_string(batch_idx) +
-        "requested for output '" + output_->Name() + "', batch size is " +
-        std::to_string(batch_size_));
-  }
-
-  if ((bufs_pos_[batch_idx] + adv_byte_size) > byte_size_) {
-    return Error(
-      RequestStatusCode::UNSUPPORTED,
-      "attempt to read beyond end of result for output output '" +
-        output_->Name() + "'");
-  }
-
-  *buf = &bufs_[batch_idx][bufs_pos_[batch_idx]];
-  bufs_pos_[batch_idx] += adv_byte_size;
-  return Error::Success;
-}
-
-Error
-ResultImpl::GetClassCount(size_t batch_idx, size_t* cnt) const
-{
-  if (result_format_ != InferContext::Result::ResultFormat::CLASS) {
-    return Error(
-      RequestStatusCode::UNSUPPORTED,
-      "class result not available for non-CLASS output '" + output_->Name() +
-        "'");
-  }
-
-  // Number of classifications should equal expected batch size but
-  // check both to be careful and to protext class_pos_ accesses.
-  if (
-    (batch_idx >= (size_t)class_result_.batch_classes().size()) ||
-    (batch_idx >= batch_size_)) {
-    return Error(
-      RequestStatusCode::INVALID_ARG,
-      "unexpected batch entry " + std::to_string(batch_idx) +
-        "requested for output '" + output_->Name() + "', batch size is " +
-        std::to_string(batch_size_));
-  }
-
-  const InferResponseHeader::Output::Classes& classes =
-    class_result_.batch_classes(batch_idx);
-
-  *cnt = classes.cls().size();
-  return Error::Success;
-}
-
-Error
-ResultImpl::GetClassAtCursor(
-  size_t batch_idx, InferContext::Result::ClassResult* result)
-{
-  if (result_format_ != InferContext::Result::ResultFormat::CLASS) {
-    return Error(
-      RequestStatusCode::UNSUPPORTED,
-      "class result not available for non-CLASS output '" + output_->Name() +
-        "'");
-  }
-
-  // Number of classifications should equal expected batch size but
-  // check both to be careful and to protext class_pos_ accesses.
-  if (
-    (batch_idx >= (size_t)class_result_.batch_classes().size()) ||
-    (batch_idx >= batch_size_)) {
-    return Error(
-      RequestStatusCode::INVALID_ARG,
-      "unexpected batch entry " + std::to_string(batch_idx) +
-        "requested for output '" + output_->Name() + "', batch size is " +
-        std::to_string(batch_size_));
-  }
-
-  const InferResponseHeader::Output::Classes& classes =
-    class_result_.batch_classes(batch_idx);
-
-  if (class_pos_[batch_idx] >= (size_t)classes.cls().size()) {
-    return Error(
-      RequestStatusCode::UNSUPPORTED,
-      "attempt to read beyond end of result for output output '" +
-        output_->Name() + "'");
-  }
-
-  const InferResponseHeader::Output::Class& cls =
-    classes.cls(class_pos_[batch_idx]);
-
-  result->idx = cls.idx();
-  result->value = cls.value();
-  result->label = cls.label();
-
-  class_pos_[batch_idx]++;
-  return Error::Success;
-}
-
-Error
-ResultImpl::ResetCursors()
-{
-  std::fill(bufs_pos_.begin(), bufs_pos_.end(), 0);
-  std::fill(class_pos_.begin(), class_pos_.end(), 0);
-  return Error::Success;
-}
-
-Error
-ResultImpl::ResetCursor(size_t batch_idx)
-{
-  if (batch_idx >= batch_size_) {
-    return Error(
-      RequestStatusCode::INVALID_ARG,
-      "unexpected batch entry " + std::to_string(batch_idx) +
-        "requested for output '" + output_->Name() + "', batch size is " +
-        std::to_string(batch_size_));
-  }
-
-  bufs_pos_[batch_idx] = 0;
-  class_pos_[batch_idx] = 0;
-  return Error::Success;
-}
-
-Error
-ResultImpl::SetNextRawResult(
-  const uint8_t* buf, size_t size, size_t* result_bytes)
-{
-  size_t total_size = 0;
-
-  while ((bufs_idx_ < bufs_.size()) && (size > 0)) {
-    const size_t csz = std::min(byte_size_ - bufs_pos_[bufs_idx_], size);
-    if (csz > 0) {
-      std::copy(buf, buf + csz, std::back_inserter(bufs_[bufs_idx_]));
-      bufs_pos_[bufs_idx_] += csz;
-      buf += csz;
-      size -= csz;
-      total_size += csz;
-    }
-
-    if (bufs_pos_[bufs_idx_] == byte_size_) {
-      bufs_idx_++;
-    }
-  }
-
-  *result_bytes = total_size;
-  return Error::Success;
-}
-
-//==============================================================================
-
-InferContext::RequestTimers::RequestTimers()
-{
-  Reset();
-}
-
-Error
-InferContext::RequestTimers::Reset()
-{
-  request_start_.tv_sec = 0;
-  request_end_.tv_sec = 0;
-  send_start_.tv_sec = 0;
-  send_end_.tv_sec = 0;
-  receive_start_.tv_sec = 0;
-  receive_end_.tv_sec = 0;
-  request_start_.tv_nsec = 0;
-  request_end_.tv_nsec = 0;
-  send_start_.tv_nsec = 0;
-  send_end_.tv_nsec = 0;
-  receive_start_.tv_nsec = 0;
-  receive_end_.tv_nsec = 0;
-  return Error::Success;
-}
-
-Error
-InferContext::RequestTimers::Record(Kind kind)
-{
-  switch (kind) {
-    case Kind::REQUEST_START:
-      clock_gettime(CLOCK_MONOTONIC, &request_start_);
-      break;
-    case Kind::REQUEST_END:
-      clock_gettime(CLOCK_MONOTONIC, &request_end_);
-      break;
-    case Kind::SEND_START:
-      clock_gettime(CLOCK_MONOTONIC, &send_start_);
-      break;
-    case Kind::SEND_END:
-      clock_gettime(CLOCK_MONOTONIC, &send_end_);
-      break;
-    case Kind::RECEIVE_START:
-      clock_gettime(CLOCK_MONOTONIC, &receive_start_);
-      break;
-    case Kind::RECEIVE_END:
-      clock_gettime(CLOCK_MONOTONIC, &receive_end_);
-      break;
-  }
-  return Error::Success;
-}
-
-//==============================================================================
-
-class RequestImpl : public InferContext::Request {
- public:
-  virtual ~RequestImpl() = default;
-
-  uint64_t Id() const { return id_; };
-
-  // Initialize 'requested_results_' according to 'batch_size' and
-  // 'requested_outs' as the placeholder for the results
-  Error InitializeRequestedResults(
-    const std::vector<std::shared_ptr<InferContext::Output>>& requested_outs,
-    const size_t batch_size);
-
-  // Return the results of the request. 'ready_' should always be checked
-  // before calling GetResults() to ensure the request has been completed.
-  virtual Error GetResults(
-    std::vector<std::unique_ptr<InferContext::Result>>* results) = 0;
-
- protected:
-  RequestImpl(const uint64_t id);
-
-  // Helper function called after inference to set non-RAW results in
-  // 'requested_results_'.
-  Error PostRunProcessing(
-    std::vector<std::unique_ptr<InferContext::Result>>& results,
-    const InferResponseHeader& infer_response);
-
-  friend class InferContext;
-
-  // Identifier seen by user
-  uint64_t id_;
-
-  // Internal identifier for asynchronous call
-  uintptr_t run_index_;
-
-  // Indicating if the request has been completed.
-  bool ready_;
-
-  // The timer for infer request.
-  InferContext::RequestTimers timer_;
-
-  // Results being collected for the requested outputs from inference
-  // server response.
-  std::vector<std::unique_ptr<InferContext::Result>> requested_results_;
-
-  // Current positions within output vectors when processing response.
-  size_t result_pos_idx_;
-};
-
-RequestImpl::RequestImpl(const uint64_t id)
-    : id_(id), ready_(false), result_pos_idx_(0)
-{
-}
-
-Error
-RequestImpl::InitializeRequestedResults(
-  const std::vector<std::shared_ptr<InferContext::Output>>& requested_outs,
-  const size_t batch_size)
-{
-  // Initialize the results vector to collect the requested results.
-  requested_results_.clear();
-  for (const auto& io : requested_outs) {
-    std::unique_ptr<ResultImpl> rp(new ResultImpl(
-      io, batch_size, reinterpret_cast<OutputImpl*>(io.get())->ResultFormat()));
-    requested_results_.emplace_back(std::move(rp));
-  }
-  return Error::Success;
-}
-
-Error
-RequestImpl::PostRunProcessing(
-  std::vector<std::unique_ptr<InferContext::Result>>& results,
-  const InferResponseHeader& infer_response)
-{
-  // At this point, the RAW requested results have their result values
-  // set. Now need to initialize non-RAW results.
-  for (auto& rr : results) {
-    ResultImpl* r = reinterpret_cast<ResultImpl*>(rr.get());
-    r->SetModel(infer_response.model_name(), infer_response.model_version());
-    switch (r->ResultFormat()) {
-      case InferContext::Result::ResultFormat::RAW:
-        r->ResetCursors();
-        break;
-
-      case InferContext::Result::ResultFormat::CLASS: {
-        for (const auto& ir : infer_response.output()) {
-          if (ir.name() == r->GetOutput()->Name()) {
-            r->SetClassResult(ir);
-            break;
-          }
-        }
-        break;
-      }
-    }
-  }
-  return Error::Success;
-}
-
-//==============================================================================
-
-InferContext::InferContext(
-  const std::string& model_name, int model_version, bool verbose)
-    : model_name_(model_name), model_version_(model_version), verbose_(verbose),
-      total_input_byte_size_(0), batch_size_(0), async_request_id_(0),
-      worker_(), exiting_(true)
-{
-}
-
-Error
-InferContext::GetInput(
-  const std::string& name, std::shared_ptr<Input>* input) const
-{
-  for (const auto& io : inputs_) {
-    if (io->Name() == name) {
-      *input = io;
-      return Error::Success;
-    }
-  }
-
-  return Error(
-    RequestStatusCode::INVALID_ARG,
-    "unknown input '" + name + "' for '" + model_name_ + "'");
-}
-
-Error
-InferContext::GetOutput(
-  const std::string& name, std::shared_ptr<Output>* output) const
-{
-  for (const auto& io : outputs_) {
-    if (io->Name() == name) {
-      *output = io;
-      return Error::Success;
-    }
-  }
-
-  return Error(
-    RequestStatusCode::INVALID_ARG,
-    "unknown output '" + name + "' for '" + model_name_ + "'");
-}
-
-Error
-InferContext::SetRunOptions(const InferContext::Options& boptions)
-{
-  const OptionsImpl& options = reinterpret_cast<const OptionsImpl&>(boptions);
-
-  // If the model doesn't support batching (i.e. max_batch_size_ == 0)
-  // then still allow batch size of 1 to be specified.
-  uint64_t effective_max_batch_size = std::max((uint64_t)1, max_batch_size_);
-  if (options.BatchSize() > effective_max_batch_size) {
-    return Error(
-      RequestStatusCode::INVALID_ARG,
-      "run batch-size " + std::to_string(options.BatchSize()) +
-        " exceeds maximum batch size " +
-        std::to_string(effective_max_batch_size) + " allowed for model '" +
-        model_name_ + "'");
-  }
-
-  // If batch-size 0 was requested (no batching) treat it like
-  // batch-size 1.
-  batch_size_ = std::max((uint64_t)1, options.BatchSize());
-  total_input_byte_size_ = 0;
-
-  // Create the InferRequestHeader protobuf. This protobuf will be
-  // used for all subsequent requests.
-  infer_request_.Clear();
-
-  infer_request_.set_batch_size(batch_size_);
-
-  for (const auto& io : inputs_) {
-    reinterpret_cast<InputImpl*>(io.get())->SetBatchSize(batch_size_);
-    total_input_byte_size_ += io->ByteSize() * batch_size_;
-
-    auto rinput = infer_request_.add_input();
-    rinput->set_name(io->Name());
-    rinput->set_byte_size(io->ByteSize());
-  }
-
-  requested_outputs_.clear();
-
-  for (const auto& p : options.Outputs()) {
-    const std::shared_ptr<Output>& output = p.first;
-    const OptionsImpl::OutputOptions& ooptions = p.second;
-
-    reinterpret_cast<OutputImpl*>(output.get())
-      ->SetResultFormat(ooptions.result_format);
-    requested_outputs_.emplace_back(output);
-
-    auto routput = infer_request_.add_output();
-    routput->set_name(output->Name());
-    routput->set_byte_size(output->ByteSize());
-    if (ooptions.result_format == Result::ResultFormat::CLASS) {
-      routput->mutable_cls()->set_count(ooptions.u64);
-    }
-  }
-
-  return Error::Success;
-}
-
-Error
-InferContext::GetStat(Stat* stat)
-{
-  stat->completed_request_count = context_stat_.completed_request_count;
-  stat->cumulative_total_request_time_ns =
-    context_stat_.cumulative_total_request_time_ns;
-  stat->cumulative_send_time_ns = context_stat_.cumulative_send_time_ns;
-  stat->cumulative_receive_time_ns = context_stat_.cumulative_receive_time_ns;
-  return Error::Success;
-}
-
-Error
-InferContext::UpdateStat(const RequestTimers& timer)
-{
-  uint64_t request_start_ns = timer.request_start_.tv_sec * NANOS_PER_SECOND +
-                              timer.request_start_.tv_nsec;
-  uint64_t request_end_ns =
-    timer.request_end_.tv_sec * NANOS_PER_SECOND + timer.request_end_.tv_nsec;
-  uint64_t send_start_ns =
-    timer.send_start_.tv_sec * NANOS_PER_SECOND + timer.send_start_.tv_nsec;
-  uint64_t send_end_ns =
-    timer.send_end_.tv_sec * NANOS_PER_SECOND + timer.send_end_.tv_nsec;
-  uint64_t receive_start_ns = timer.receive_start_.tv_sec * NANOS_PER_SECOND +
-                              timer.receive_start_.tv_nsec;
-  uint64_t receive_end_ns =
-    timer.receive_end_.tv_sec * NANOS_PER_SECOND + timer.receive_end_.tv_nsec;
-  if (
-    (request_start_ns >= request_end_ns) || (send_start_ns > send_end_ns) ||
-    (receive_start_ns > receive_end_ns)) {
-    return Error(RequestStatusCode::INVALID_ARG, "Timer not set correctly.");
-  }
-
-  uint64_t request_time_ns = request_end_ns - request_start_ns;
-  uint64_t send_time_ns = send_end_ns - send_start_ns;
-  uint64_t receive_time_ns = receive_end_ns - receive_start_ns;
-
-  context_stat_.completed_request_count++;
-  context_stat_.cumulative_total_request_time_ns += request_time_ns;
-  context_stat_.cumulative_send_time_ns += send_time_ns;
-  context_stat_.cumulative_receive_time_ns += receive_time_ns;
-  return Error::Success;
-}
-
-Error
-InferContext::GetReadyAsyncRequest(std::shared_ptr<Request>* request, bool wait)
-{
-  if (ongoing_async_requests_.size() == 0) {
-    return Error(
-      RequestStatusCode::UNAVAILABLE,
-      "No asynchronous requests have been sent");
-  }
-
-  Error err;
-  std::unique_lock<std::mutex> lock(mutex_);
-  cv_.wait(lock, [&err, request, this, wait] {
-    for (auto& ongoing_async_request : this->ongoing_async_requests_) {
-      if (std::static_pointer_cast<RequestImpl>(ongoing_async_request.second)
-            ->ready_) {
-        *request = ongoing_async_request.second;
-        err = Error::Success;
-        return true;
-      }
-    }
-
-    if (!wait) {
-      err = Error(RequestStatusCode::UNAVAILABLE, "No completed request.");
-      return true;
-    } else {
-      return false;
-    }
-  });
-
-  lock.unlock();
-  return err;
-}
-
-Error
-InferContext::IsRequestReady(
-  const std::shared_ptr<Request>& async_request, bool wait)
-{
-  if (ongoing_async_requests_.size() == 0) {
-    return Error(
-      RequestStatusCode::INVALID_ARG,
-      "No asynchronous requests have been sent");
-  }
-
-  std::shared_ptr<RequestImpl> request =
-    std::static_pointer_cast<RequestImpl>(async_request);
-
-  auto itr = ongoing_async_requests_.find(request->run_index_);
-  if (itr == ongoing_async_requests_.end()) {
-    return Error(
-      RequestStatusCode::INVALID_ARG, "No matched asynchronous request found.");
-  }
-
-  Error err = Error::Success;
-  std::unique_lock<std::mutex> lock(mutex_);
-  cv_.wait(lock, [&err, &request, wait] {
-    if (!request->ready_) {
-      if (wait) {
-        return false;
-      } else {
-        err = Error(RequestStatusCode::UNAVAILABLE, "Request is not ready.");
-      }
-    }
-    return true;
-  });
-
-  if (!err.IsOk()) {
-    lock.unlock();
-    return err;
-  } else {
-    ongoing_async_requests_.erase(itr->first);
-  }
-  lock.unlock();
-  return Error::Success;
-}
-
-//==============================================================================
-
-ProfileContext::ProfileContext(bool verbose) : verbose_(verbose) {}
-
-Error
-ProfileContext::StartProfile()
-{
-  return SendCommand("start");
-}
-
-Error
-ProfileContext::StopProfile()
-{
-  return SendCommand("stop");
-}
-
-//==============================================================================
-
-Error
-ServerHealthHttpContext::Create(
-  std::unique_ptr<ServerHealthContext>* ctx, const std::string& server_url,
-  bool verbose)
-{
-  ctx->reset(static_cast<ServerHealthContext*>(
-    new ServerHealthHttpContext(server_url, verbose)));
-  return Error::Success;
-}
-
-ServerHealthHttpContext::ServerHealthHttpContext(
-  const std::string& server_url, bool verbose)
-    : ServerHealthContext(verbose), url_(server_url + "/" + kHealthRESTEndpoint)
-{
-}
-
-Error
-ServerHealthHttpContext::GetHealth(const std::string& url, bool* health)
-{
-  if (!curl_global.Status().IsOk()) {
-    return curl_global.Status();
-  }
-
-  CURL* curl = curl_easy_init();
-  if (!curl) {
-    return Error(
-      RequestStatusCode::INTERNAL, "failed to initialize HTTP client");
-  }
-
-  curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
-  curl_easy_setopt(curl, CURLOPT_USERAGENT, "libcurl-agent/1.0");
-  if (verbose_) {
-    curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L);
-  }
-
-  CURLcode res = curl_easy_perform(curl);
-  if (res != CURLE_OK) {
-    curl_easy_cleanup(curl);
-    return Error(
-      RequestStatusCode::INTERNAL,
-      "HTTP client failed: " + std::string(curl_easy_strerror(res)));
-  }
-
-  // Must use 64-bit integer with curl_easy_getinfo
-  int64_t http_code;
-  curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_code);
-
-  curl_easy_cleanup(curl);
-
-  *health = (http_code == 200) ? true : false;
-
-  return Error::Success;
-}
-
-Error
-ServerHealthHttpContext::GetReady(bool* ready)
-{
-  return GetHealth(url_ + "/ready", ready);
-}
-
-Error
-ServerHealthHttpContext::GetLive(bool* live)
-{
-  return GetHealth(url_ + "/live", live);
-}
-
-//==============================================================================
-
-Error
-ServerStatusHttpContext::Create(
-  std::unique_ptr<ServerStatusContext>* ctx, const std::string& server_url,
-  bool verbose)
-{
-  ctx->reset(static_cast<ServerStatusContext*>(
-    new ServerStatusHttpContext(server_url, verbose)));
-  return Error::Success;
-}
-
-Error
-ServerStatusHttpContext::Create(
-  std::unique_ptr<ServerStatusContext>* ctx, const std::string& server_url,
-  const std::string& model_name, bool verbose)
-{
-  ctx->reset(static_cast<ServerStatusContext*>(
-    new ServerStatusHttpContext(server_url, model_name, verbose)));
-  return Error::Success;
-}
-
-ServerStatusHttpContext::ServerStatusHttpContext(
-  const std::string& server_url, bool verbose)
-    : ServerStatusContext(verbose), url_(server_url + "/" + kStatusRESTEndpoint)
-{
-}
-
-ServerStatusHttpContext::ServerStatusHttpContext(
-  const std::string& server_url, const std::string& model_name, bool verbose)
-    : ServerStatusContext(verbose),
-      url_(server_url + "/" + kStatusRESTEndpoint + "/" + model_name)
-{
-}
-
-Error
-ServerStatusHttpContext::GetServerStatus(ServerStatus* server_status)
-{
-  server_status->Clear();
-  request_status_.Clear();
-  response_.clear();
-
-  if (!curl_global.Status().IsOk()) {
-    return curl_global.Status();
-  }
-
-  CURL* curl = curl_easy_init();
-  if (!curl) {
-    return Error(
-      RequestStatusCode::INTERNAL, "failed to initialize HTTP client");
-  }
-
-  // Want binary representation of the status.
-  std::string full_url = url_ + "?format=binary";
-  curl_easy_setopt(curl, CURLOPT_URL, full_url.c_str());
-  curl_easy_setopt(curl, CURLOPT_USERAGENT, "libcurl-agent/1.0");
-  if (verbose_) {
-    curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L);
-  }
-
-  // response headers handled by ResponseHeaderHandler()
-  curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, ResponseHeaderHandler);
-  curl_easy_setopt(curl, CURLOPT_HEADERDATA, this);
-
-  // response data handled by ResponseHandler()
-  curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, ResponseHandler);
-  curl_easy_setopt(curl, CURLOPT_WRITEDATA, this);
-
-  CURLcode res = curl_easy_perform(curl);
-  if (res != CURLE_OK) {
-    curl_easy_cleanup(curl);
-    return Error(
-      RequestStatusCode::INTERNAL,
-      "HTTP client failed: " + std::string(curl_easy_strerror(res)));
-  }
-
-  // Must use 64-bit integer with curl_easy_getinfo
-  int64_t http_code;
-  curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_code);
-
-  curl_easy_cleanup(curl);
-
-  // Should have a request status, if not then create an error status.
-  if (request_status_.code() == RequestStatusCode::INVALID) {
-    request_status_.Clear();
-    request_status_.set_code(RequestStatusCode::INTERNAL);
-    request_status_.set_msg("status request did not return status");
-  }
-
-  // If request has failing HTTP status or the request's explicit
-  // status is not SUCCESS, then signal an error.
-  if (
-    (http_code != 200) ||
-    (request_status_.code() != RequestStatusCode::SUCCESS)) {
-    return Error(request_status_);
-  }
-
-  // Parse the response as a ModelConfigList...
-  if (!server_status->ParseFromString(response_)) {
-    return Error(RequestStatusCode::INTERNAL, "failed to parse server status");
-  }
-
-  if (verbose_) {
-    std::cout << server_status->DebugString() << std::endl;
-  }
-
-  return Error(request_status_);
-}
-
-size_t
-ServerStatusHttpContext::ResponseHeaderHandler(
-  void* contents, size_t size, size_t nmemb, void* userp)
-{
-  ServerStatusHttpContext* ctx =
-    reinterpret_cast<ServerStatusHttpContext*>(userp);
-
-  char* buf = reinterpret_cast<char*>(contents);
-  size_t byte_size = size * nmemb;
-
-  size_t idx = strlen(kStatusHTTPHeader);
-  if ((idx < byte_size) && !strncasecmp(buf, kStatusHTTPHeader, idx)) {
-    while ((idx < byte_size) && (buf[idx] != ':')) {
-      ++idx;
-    }
-
-    if (idx < byte_size) {
-      std::string hdr(buf + idx + 1, byte_size - idx - 1);
-
-      if (!google::protobuf::TextFormat::ParseFromString(
-            hdr, &ctx->request_status_)) {
-        ctx->request_status_.Clear();
-      }
-    }
-  }
-
-  return byte_size;
-}
-
-size_t
-ServerStatusHttpContext::ResponseHandler(
-  void* contents, size_t size, size_t nmemb, void* userp)
-{
-  ServerStatusHttpContext* ctx =
-    reinterpret_cast<ServerStatusHttpContext*>(userp);
-  uint8_t* buf = reinterpret_cast<uint8_t*>(contents);
-  size_t result_bytes = size * nmemb;
-  std::copy(buf, buf + result_bytes, std::back_inserter(ctx->response_));
-  return result_bytes;
-}
-
-//==============================================================================
-
-class HttpRequestImpl : public RequestImpl {
- public:
-  HttpRequestImpl(
-    const uint64_t id,
-    const std::vector<std::shared_ptr<InferContext::Input>> inputs);
-
-  ~HttpRequestImpl();
-
-  // Initialize the request for HTTP transfer on top of
-  // RequestImpl.InitializeRequestedResults()
-  Error InitializeRequest(
-    const std::vector<std::shared_ptr<InferContext::Output>>& requested_outputs,
-    const size_t batch_size);
-
-  // Copy into 'buf' up to 'size' bytes of input data. Return the
-  // actual amount copied in 'input_bytes'.
-  Error GetNextInput(uint8_t* buf, size_t size, size_t* input_bytes);
-
-  // Copy into the context 'size' bytes of result data from
-  // 'buf'. Return the actual amount copied in 'result_<bytes'.
-  Error SetNextRawResult(const uint8_t* buf, size_t size, size_t* result_bytes);
-
-  // @see RequestImpl.GetResults()
-  Error GetResults(
-    std::vector<std::unique_ptr<InferContext::Result>>* results) override;
-
- private:
-  friend class InferHttpContext;
-
-  // Pointer to easy handle that is processing the request
-  CURL* easy_handle_;
-
-  // Pointer to the list of the HTTP request header, keep it such that it will
-  // be valid during the transfer and can be freed once transfer is completed.
-  struct curl_slist* header_list_;
-
-  // Status code for the HTTP request.
-  CURLcode http_status_;
-
-  // RequestStatus received in server response.
-  RequestStatus request_status_;
-
-  // Buffer that accumulates the serialized InferResponseHeader at the
-  // end of the body.
-  std::string infer_response_buffer_;
-
-  // The inputs for the request. For asynchronous request, it should
-  // be a deep copy of the inputs set by the user in case the user modifies
-  // them for another request during the HTTP transfer.
-  std::vector<std::shared_ptr<InferContext::Input>> inputs_;
-
-  // Current positions within input vectors when sending request.
-  size_t input_pos_idx_;
-};
-
-HttpRequestImpl::HttpRequestImpl(
-  const uint64_t id,
-  const std::vector<std::shared_ptr<InferContext::Input>> inputs)
-    : RequestImpl(id), easy_handle_(curl_easy_init()), header_list_(NULL),
-      inputs_(inputs), input_pos_idx_(0)
-{
-  if (easy_handle_ != NULL) {
-    run_index_ = reinterpret_cast<uintptr_t>(easy_handle_);
-  }
-}
-
-HttpRequestImpl::~HttpRequestImpl()
-{
-  if (easy_handle_ != NULL) {
-    curl_easy_cleanup(easy_handle_);
-  }
-}
-
-Error
-HttpRequestImpl::InitializeRequest(
-  const std::vector<std::shared_ptr<InferContext::Output>>& requested_outputs,
-  const size_t batch_size)
-{
-  infer_response_buffer_.clear();
-
-  // Reset all the position indicators so that we send all inputs
-  // correctly.
-  request_status_.Clear();
-
-  for (auto& io : inputs_) {
-    reinterpret_cast<InputImpl*>(io.get())->PrepareForRequest();
-  }
-
-  input_pos_idx_ = 0;
-  result_pos_idx_ = 0;
-
-  return RequestImpl::InitializeRequestedResults(requested_outputs, batch_size);
-}
-
-
-Error
-HttpRequestImpl::GetNextInput(uint8_t* buf, size_t size, size_t* input_bytes)
-{
-  *input_bytes = 0;
-
-  while ((size > 0) && (input_pos_idx_ < inputs_.size())) {
-    InputImpl* io = reinterpret_cast<InputImpl*>(inputs_[input_pos_idx_].get());
-    size_t ib = 0;
-    bool eoi = false;
-    Error err = io->GetNext(buf, size, &ib, &eoi);
-    if (!err.IsOk()) {
-      return err;
-    }
-
-    // If input was completely read then move to the next.
-    if (eoi) {
-      input_pos_idx_++;
-    }
-    if (ib != 0) {
-      *input_bytes += ib;
-      size -= ib;
-      buf += ib;
-    }
-  }
-
-  // Sent all input bytes
-  if (input_pos_idx_ >= inputs_.size()) {
-    timer_.Record(InferContext::RequestTimers::Kind::SEND_END);
-  }
-
-  return Error::Success;
-}
-
-Error
-HttpRequestImpl::SetNextRawResult(
-  const uint8_t* buf, size_t size, size_t* result_bytes)
-{
-  *result_bytes = 0;
-
-  while ((size > 0) && (result_pos_idx_ < requested_results_.size())) {
-    ResultImpl* io =
-      reinterpret_cast<ResultImpl*>(requested_results_[result_pos_idx_].get());
-    size_t ob = 0;
-
-    // Only try to read raw result for RAW
-    if (io->ResultFormat() == InferContext::Result::ResultFormat::RAW) {
-      Error err = io->SetNextRawResult(buf, size, &ob);
-      if (!err.IsOk()) {
-        return err;
-      }
-    }
-
-    // If output couldn't accept any more bytes then move to the next.
-    if (ob == 0) {
-      result_pos_idx_++;
-    } else {
-      *result_bytes += ob;
-      size -= ob;
-      buf += ob;
-    }
-  }
-
-  // If there is any bytes left then they belong to the response
-  // header, since all the RAW results have been filled.
-  if (size > 0) {
-    infer_response_buffer_.append(reinterpret_cast<const char*>(buf), size);
-    *result_bytes += size;
-  }
-
-  return Error::Success;
-}
-
-Error
-HttpRequestImpl::GetResults(
-  std::vector<std::unique_ptr<InferContext::Result>>* results)
-{
-  InferResponseHeader infer_response;
-
-  if (http_status_ != CURLE_OK) {
-    curl_slist_free_all(header_list_);
-    requested_results_.clear();
-    return Error(
-      RequestStatusCode::INTERNAL,
-      "HTTP client failed: " + std::string(curl_easy_strerror(http_status_)));
-  }
-
-  // Must use 64-bit integer with curl_easy_getinfo
-  int64_t http_code;
-  curl_easy_getinfo(easy_handle_, CURLINFO_RESPONSE_CODE, &http_code);
-
-  curl_slist_free_all(header_list_);
-
-  // Should have a request status, if not then create an error status.
-  if (request_status_.code() == RequestStatusCode::INVALID) {
-    request_status_.Clear();
-    request_status_.set_code(RequestStatusCode::INTERNAL);
-    request_status_.set_msg("infer request did not return status");
-  }
-
-  // If request has failing HTTP status or the request's explicit
-  // status is not SUCCESS, then signal an error.
-  if (
-    (http_code != 200) ||
-    (request_status_.code() != RequestStatusCode::SUCCESS)) {
-    requested_results_.clear();
-    return Error(request_status_);
-  }
-
-  // The infer response header should be available...
-  if (infer_response_buffer_.empty()) {
-    requested_results_.clear();
-    return Error(
-      RequestStatusCode::INTERNAL,
-      "infer request did not return result header");
-  }
-
-  infer_response.ParseFromString(infer_response_buffer_);
-
-  PostRunProcessing(requested_results_, infer_response);
-
-  results->swap(requested_results_);
-
-  return Error(request_status_);
-}
-
-//==============================================================================
-
-Error
-InferHttpContext::Create(
-  std::unique_ptr<InferContext>* ctx, const std::string& server_url,
-  const std::string& model_name, int model_version, bool verbose)
-{
-  InferHttpContext* ctx_ptr =
-    new InferHttpContext(server_url, model_name, model_version, verbose);
-
-  // Get status of the model and create the inputs and outputs.
-  std::unique_ptr<ServerStatusContext> sctx;
-  Error err =
-    ServerStatusHttpContext::Create(&sctx, server_url, model_name, verbose);
-  if (err.IsOk()) {
-    ServerStatus server_status;
-    err = sctx->GetServerStatus(&server_status);
-    if (err.IsOk()) {
-      const auto& itr = server_status.model_status().find(model_name);
-      if (itr == server_status.model_status().end()) {
-        err = Error(
-          RequestStatusCode::INTERNAL,
-          "unable to find status information for \"" + model_name + "\"");
-      } else {
-        const ModelConfig& model_info = itr->second.config();
-
-        ctx_ptr->max_batch_size_ =
-          static_cast<uint64_t>(std::max(0, model_info.max_batch_size()));
-
-        // Create inputs and outputs
-        for (const auto& io : model_info.input()) {
-          ctx_ptr->inputs_.emplace_back(std::make_shared<InputImpl>(io));
-        }
-        for (const auto& io : model_info.output()) {
-          ctx_ptr->outputs_.emplace_back(std::make_shared<OutputImpl>(io));
-        }
-      }
-    }
-  }
-
-  // Create request context for synchronous request.
-  ctx_ptr->sync_request_.reset(
-    static_cast<Request*>(new HttpRequestImpl(0, ctx_ptr->inputs_)));
-
-  if (err.IsOk()) {
-    ctx->reset(static_cast<InferContext*>(ctx_ptr));
-  } else {
-    ctx->reset();
-  }
-
-  return err;
-}
-
-InferHttpContext::InferHttpContext(
-  const std::string& server_url, const std::string& model_name,
-  int model_version, bool verbose)
-    : InferContext(model_name, model_version, verbose),
-      multi_handle_(curl_multi_init())
-{
-  // Process url for HTTP request
-  // URL doesn't contain the version portion if using the latest version.
-  url_ = server_url + "/" + kInferRESTEndpoint + "/" + model_name;
-  if (model_version_ >= 0) {
-    url_ += "/" + std::to_string(model_version_);
-  }
-}
-
-InferHttpContext::~InferHttpContext()
-{
-  exiting_ = true;
-  // thread not joinable if AsyncRun() is not called
-  // (it is default constructed thread before the first AsyncRun() call)
-  if (worker_.joinable()) {
-    cv_.notify_all();
-    worker_.join();
-  }
-
-  if (multi_handle_ != NULL) {
-    for (auto& request : ongoing_async_requests_) {
-      CURL* easy_handle =
-        std::static_pointer_cast<HttpRequestImpl>(request.second)->easy_handle_;
-      // Just remove, easy_cleanup will be done in ~HttpRequestImpl()
-      curl_multi_remove_handle(multi_handle_, easy_handle);
-    }
-    curl_multi_cleanup(multi_handle_);
-  }
-}
-
-Error
-InferHttpContext::Run(std::vector<std::unique_ptr<Result>>* results)
-{
-  std::shared_ptr<HttpRequestImpl> sync_request =
-    std::static_pointer_cast<HttpRequestImpl>(sync_request_);
-
-  if (!curl_global.Status().IsOk()) {
-    return curl_global.Status();
-  }
-
-  Error err = PreRunProcessing(sync_request_);
-
-  if (!err.IsOk()) {
-    return err;
-  }
-
-  // Take run time
-  sync_request->timer_.Reset();
-  sync_request->timer_.Record(RequestTimers::Kind::REQUEST_START);
-  sync_request->timer_.Record(RequestTimers::Kind::SEND_START);
-  sync_request->http_status_ = curl_easy_perform(sync_request->easy_handle_);
-  sync_request->timer_.Record(RequestTimers::Kind::RECEIVE_END);
-  sync_request->timer_.Record(RequestTimers::Kind::REQUEST_END);
-
-  err = UpdateStat(sync_request->timer_);
-  if (!err.IsOk()) {
-    std::cerr << "Failed to update context stat: " << err << std::endl;
-  }
-  return sync_request->GetResults(results);
-}
-
-Error
-InferHttpContext::AsyncRun(std::shared_ptr<Request>* async_request)
-{
-  if (!multi_handle_) {
-    return Error(
-      RequestStatusCode::INTERNAL, "failed to start HTTP asynchronous client");
-  } else if (exiting_) {
-    // abusing variable here, exiting_ is true either when destructor is called
-    // or the worker thread is not acutally created.
-    exiting_ = false;
-    worker_ = std::thread(&InferHttpContext::AsyncTransfer, this);
-  }
-
-  // Make a copy of the current inputs
-  std::vector<std::shared_ptr<Input>> inputs;
-  for (const auto& io : inputs_) {
-    InputImpl* input = reinterpret_cast<InputImpl*>(io.get());
-    inputs.emplace_back(std::make_shared<InputImpl>(*input));
-  }
-
-  HttpRequestImpl* current_context =
-    new HttpRequestImpl(async_request_id_++, inputs);
-  async_request->reset(static_cast<Request*>(current_context));
-
-  if (!current_context->easy_handle_) {
-    return Error(
-      RequestStatusCode::INTERNAL, "failed to initialize HTTP client");
-  }
-
-  Error err = PreRunProcessing(*async_request);
-
-  {
-    std::lock_guard<std::mutex> lock(mutex_);
-
-    auto insert_result = ongoing_async_requests_.emplace(std::make_pair(
-      reinterpret_cast<uintptr_t>(current_context->easy_handle_),
-      *async_request));
-
-    if (!insert_result.second) {
-      return Error(
-        RequestStatusCode::INTERNAL,
-        "Failed to insert new asynchronous request context.");
-    }
-
-    curl_multi_add_handle(multi_handle_, current_context->easy_handle_);
-    current_context->timer_.Reset();
-    current_context->timer_.Record(RequestTimers::Kind::REQUEST_START);
-    current_context->timer_.Record(RequestTimers::Kind::SEND_START);
-  }
-
-  cv_.notify_all();
-  return Error(RequestStatusCode::SUCCESS);
-}
-
-Error
-InferHttpContext::GetAsyncRunResults(
-  std::vector<std::unique_ptr<Result>>* results,
-  const std::shared_ptr<Request>& async_request, bool wait)
-{
-  Error err = IsRequestReady(async_request, wait);
-  if (!err.IsOk()) {
-    return err;
-  }
-  std::shared_ptr<HttpRequestImpl> http_request =
-    std::static_pointer_cast<HttpRequestImpl>(async_request);
-
-  {
-    std::lock_guard<std::mutex> lock(mutex_);
-    curl_multi_remove_handle(multi_handle_, http_request->easy_handle_);
-  }
-
-  err = UpdateStat(http_request->timer_);
-  if (!err.IsOk()) {
-    std::cerr << "Failed to update context stat: " << err << std::endl;
-  }
-  return http_request->GetResults(results);
-}
-
-size_t
-InferHttpContext::RequestProvider(
-  void* contents, size_t size, size_t nmemb, void* userp)
-{
-  HttpRequestImpl* request = reinterpret_cast<HttpRequestImpl*>(userp);
-
-  size_t input_bytes = 0;
-  Error err = request->GetNextInput(
-    reinterpret_cast<uint8_t*>(contents), size * nmemb, &input_bytes);
-  if (!err.IsOk()) {
-    std::cerr << "RequestProvider: " << err << std::endl;
-    return CURL_READFUNC_ABORT;
-  }
-
-  return input_bytes;
-}
-
-size_t
-InferHttpContext::ResponseHeaderHandler(
-  void* contents, size_t size, size_t nmemb, void* userp)
-{
-  HttpRequestImpl* request = reinterpret_cast<HttpRequestImpl*>(userp);
-  char* buf = reinterpret_cast<char*>(contents);
-  size_t byte_size = size * nmemb;
-
-  size_t idx = strlen(kStatusHTTPHeader);
-  if ((idx < byte_size) && !strncasecmp(buf, kStatusHTTPHeader, idx)) {
-    while ((idx < byte_size) && (buf[idx] != ':')) {
-      ++idx;
-    }
-
-    if (idx < byte_size) {
-      std::string hdr(buf + idx + 1, byte_size - idx - 1);
-      if (!google::protobuf::TextFormat::ParseFromString(
-            hdr, &request->request_status_)) {
-        request->request_status_.Clear();
-      }
-    }
-  }
-
-  return byte_size;
-}
-
-size_t
-InferHttpContext::ResponseHandler(
-  void* contents, size_t size, size_t nmemb, void* userp)
-{
-  HttpRequestImpl* request = reinterpret_cast<HttpRequestImpl*>(userp);
-  size_t result_bytes = 0;
-
-  if (request->timer_.receive_start_.tv_sec == 0) {
-    request->timer_.Record(RequestTimers::Kind::RECEIVE_START);
-  }
-
-  Error err = request->SetNextRawResult(
-    reinterpret_cast<uint8_t*>(contents), size * nmemb, &result_bytes);
-  if (!err.IsOk()) {
-    std::cerr << "ResponseHandler: " << err << std::endl;
-    return 0;
-  }
-
-  return result_bytes;
-}
-
-Error
-InferHttpContext::PreRunProcessing(std::shared_ptr<Request>& request)
-{
-  std::shared_ptr<HttpRequestImpl> http_request =
-    std::static_pointer_cast<HttpRequestImpl>(request);
-
-  http_request->InitializeRequest(requested_outputs_, batch_size_);
-
-  CURL* curl = http_request->easy_handle_;
-  if (!curl) {
-    return Error(
-      RequestStatusCode::INTERNAL, "failed to initialize HTTP client");
-  }
-
-  std::string full_url = url_ + "?format=binary";
-  curl_easy_setopt(curl, CURLOPT_URL, full_url.c_str());
-  curl_easy_setopt(curl, CURLOPT_USERAGENT, "libcurl-agent/1.0");
-  curl_easy_setopt(curl, CURLOPT_POST, 1L);
-  curl_easy_setopt(curl, CURLOPT_TCP_NODELAY, 1L);
-  if (verbose_) {
-    curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L);
-  }
-
-  // request data provided by RequestProvider()
-  curl_easy_setopt(curl, CURLOPT_READFUNCTION, RequestProvider);
-  curl_easy_setopt(curl, CURLOPT_READDATA, http_request.get());
-
-  // response headers handled by ResponseHeaderHandler()
-  curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, ResponseHeaderHandler);
-  curl_easy_setopt(curl, CURLOPT_HEADERDATA, http_request.get());
-
-  // response data handled by ResponseHandler()
-  curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, ResponseHandler);
-  curl_easy_setopt(curl, CURLOPT_WRITEDATA, http_request.get());
-
-  // set the expected POST size. If you want to POST large amounts of
-  // data, consider CURLOPT_POSTFIELDSIZE_LARGE
-  curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, total_input_byte_size_);
-
-  // Headers to specify input and output tensors
-  infer_request_str_.clear();
-  infer_request_str_ = std::string(kInferRequestHTTPHeader) + ":" +
-                       infer_request_.ShortDebugString();
-  struct curl_slist* list = NULL;
-  list = curl_slist_append(list, "Expect:");
-  list = curl_slist_append(list, "Content-Type: application/octet-stream");
-  list = curl_slist_append(list, infer_request_str_.c_str());
-  curl_easy_setopt(curl, CURLOPT_HTTPHEADER, list);
-
-  // The list should be freed after the request
-  http_request->header_list_ = list;
-
-  return Error::Success;
-}
-
-void
-InferHttpContext::AsyncTransfer()
-{
-  int place_holder = 0;
-  CURLMsg* msg = NULL;
-  do {
-    bool has_completed = false;
-    // sleep if no work is available
-    std::unique_lock<std::mutex> lock(mutex_);
-    cv_.wait(lock, [this] {
-      if (this->exiting_) {
-        return true;
-      }
-      // wake up if at least one request is not ready
-      for (auto& ongoing_async_request : this->ongoing_async_requests_) {
-        if (
-          std::static_pointer_cast<HttpRequestImpl>(
-            ongoing_async_request.second)
-            ->ready_ == false) {
-          return true;
-        }
-      }
-      return false;
-    });
-    curl_multi_perform(multi_handle_, &place_holder);
-    while ((msg = curl_multi_info_read(multi_handle_, &place_holder))) {
-      // update request status
-      uintptr_t identifier = reinterpret_cast<uintptr_t>(msg->easy_handle);
-      auto itr = ongoing_async_requests_.find(identifier);
-      // This shouldn't happen
-      if (itr == ongoing_async_requests_.end()) {
-        fprintf(
-          stderr,
-          "Unexpected error: received completed request that"
-          " is not in the list of asynchronous requests.\n");
-        curl_multi_remove_handle(multi_handle_, msg->easy_handle);
-        curl_easy_cleanup(msg->easy_handle);
-        continue;
-      }
-      std::shared_ptr<HttpRequestImpl> http_request =
-        std::static_pointer_cast<HttpRequestImpl>(itr->second);
-
-      if (msg->msg != CURLMSG_DONE) {
-        // Something wrong happened.
-        fprintf(stderr, "Unexpected error: received CURLMsg=%d\n", msg->msg);
-      } else {
-        http_request->timer_.Record(RequestTimers::Kind::RECEIVE_END);
-        http_request->timer_.Record(RequestTimers::Kind::REQUEST_END);
-      }
-      http_request->http_status_ = msg->data.result;
-      http_request->ready_ = true;
-      has_completed = true;
-    }
-    lock.unlock();
-    // if it has completed tasks, send signal in case the main thread is waiting
-    if (has_completed) {
-      cv_.notify_all();
-    }
-  } while (!exiting_);
-}
-
-//==============================================================================
-
-Error
-ProfileHttpContext::Create(
-  std::unique_ptr<ProfileContext>* ctx, const std::string& server_url,
-  bool verbose)
-{
-  ctx->reset(
-    static_cast<ProfileContext*>(new ProfileHttpContext(server_url, verbose)));
-  return Error::Success;
-}
-
-ProfileHttpContext::ProfileHttpContext(
-  const std::string& server_url, bool verbose)
-    : ProfileContext(verbose), url_(server_url + "/" + kProfileRESTEndpoint)
-{
-}
-
-Error
-ProfileHttpContext::SendCommand(const std::string& cmd_str)
-{
-  request_status_.Clear();
-
-  if (!curl_global.Status().IsOk()) {
-    return curl_global.Status();
-  }
-
-  CURL* curl = curl_easy_init();
-  if (!curl) {
-    return Error(
-      RequestStatusCode::INTERNAL, "failed to initialize HTTP client");
-  }
-
-  // Want binary representation of the status.
-  std::string full_url = url_ + "?cmd=" + cmd_str;
-  curl_easy_setopt(curl, CURLOPT_URL, full_url.c_str());
-  curl_easy_setopt(curl, CURLOPT_USERAGENT, "libcurl-agent/1.0");
-  if (verbose_) {
-    curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L);
-  }
-
-  // response headers handled by ResponseHeaderHandler()
-  curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, ResponseHeaderHandler);
-  curl_easy_setopt(curl, CURLOPT_HEADERDATA, this);
-
-  CURLcode res = curl_easy_perform(curl);
-  if (res != CURLE_OK) {
-    curl_easy_cleanup(curl);
-    return Error(
-      RequestStatusCode::INTERNAL,
-      "HTTP client failed: " + std::string(curl_easy_strerror(res)));
-  }
-
-  // Must use 64-bit integer with curl_easy_getinfo
-  int64_t http_code;
-  curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_code);
-
-  curl_easy_cleanup(curl);
-
-  // Should have a request status, if not then create an error status.
-  if (request_status_.code() == RequestStatusCode::INVALID) {
-    request_status_.Clear();
-    request_status_.set_code(RequestStatusCode::INTERNAL);
-    request_status_.set_msg("profile request did not return status");
-  }
-
-  return Error(request_status_);
-}
-
-size_t
-ProfileHttpContext::ResponseHeaderHandler(
-  void* contents, size_t size, size_t nmemb, void* userp)
-{
-  ProfileHttpContext* ctx = reinterpret_cast<ProfileHttpContext*>(userp);
-
-  char* buf = reinterpret_cast<char*>(contents);
-  size_t byte_size = size * nmemb;
-
-  size_t idx = strlen(kStatusHTTPHeader);
-  if ((idx < byte_size) && !strncasecmp(buf, kStatusHTTPHeader, idx)) {
-    while ((idx < byte_size) && (buf[idx] != ':')) {
-      ++idx;
-    }
-
-    if (idx < byte_size) {
-      std::string hdr(buf + idx + 1, byte_size - idx - 1);
-
-      if (!google::protobuf::TextFormat::ParseFromString(
-            hdr, &ctx->request_status_)) {
-        ctx->request_status_.Clear();
-      }
-    }
-  }
-
-  return byte_size;
-}
-
-//==============================================================================
-
-Error
-ServerHealthGrpcContext::Create(
-  std::unique_ptr<ServerHealthContext>* ctx, const std::string& server_url,
-  bool verbose)
-{
-  ctx->reset(static_cast<ServerHealthContext*>(
-    new ServerHealthGrpcContext(server_url, verbose)));
-  return Error::Success;
-}
-
-ServerHealthGrpcContext::ServerHealthGrpcContext(
-  const std::string& server_url, bool verbose)
-    : ServerHealthContext(verbose),
-      stub_(GRPCService::NewStub(GetChannel(server_url)))
-{
-}
-
-Error
-ServerHealthGrpcContext::GetHealth(const std::string& mode, bool* health)
-{
-  Error err;
-
-  HealthRequest request;
-  HealthResponse response;
-  grpc::ClientContext context;
-
-  request.set_mode(mode);
-  grpc::Status grpc_status = stub_->Health(&context, request, &response);
-  if (grpc_status.ok()) {
-    *health = response.health();
-    err = Error(response.request_status());
-  } else {
-    // Something wrong with the gRPC conncection
-    err = Error(
-      RequestStatusCode::INTERNAL,
-      "gRPC client failed: " + std::to_string(grpc_status.error_code()) + ": " +
-        grpc_status.error_message());
-  }
-
-  if (verbose_ && err.IsOk()) {
-    std::cout << mode << ": " << *health << std::endl;
-  }
-
-  return err;
-}
-
-Error
-ServerHealthGrpcContext::GetReady(bool* ready)
-{
-  return GetHealth("ready", ready);
-}
-
-Error
-ServerHealthGrpcContext::GetLive(bool* live)
-{
-  return GetHealth("live", live);
-}
-
-//==============================================================================
-
-Error
-ServerStatusGrpcContext::Create(
-  std::unique_ptr<ServerStatusContext>* ctx, const std::string& server_url,
-  bool verbose)
-{
-  ctx->reset(static_cast<ServerStatusContext*>(
-    new ServerStatusGrpcContext(server_url, verbose)));
-  return Error::Success;
-}
-
-Error
-ServerStatusGrpcContext::Create(
-  std::unique_ptr<ServerStatusContext>* ctx, const std::string& server_url,
-  const std::string& model_name, bool verbose)
-{
-  ctx->reset(static_cast<ServerStatusContext*>(
-    new ServerStatusGrpcContext(server_url, model_name, verbose)));
-  return Error::Success;
-}
-
-ServerStatusGrpcContext::ServerStatusGrpcContext(
-  const std::string& server_url, bool verbose)
-    : ServerStatusContext(verbose), model_name_(""),
-      stub_(GRPCService::NewStub(GetChannel(server_url)))
-{
-}
-
-ServerStatusGrpcContext::ServerStatusGrpcContext(
-  const std::string& server_url, const std::string& model_name, bool verbose)
-    : ServerStatusContext(verbose), model_name_(model_name),
-      stub_(GRPCService::NewStub(GetChannel(server_url)))
-{
-}
-
-Error
-ServerStatusGrpcContext::GetServerStatus(ServerStatus* server_status)
-{
-  server_status->Clear();
-
-  Error grpc_status;
-
-  StatusRequest request;
-  StatusResponse response;
-  grpc::ClientContext context;
-
-  request.set_model_name(model_name_);
-  grpc::Status status = stub_->Status(&context, request, &response);
-  if (status.ok()) {
-    server_status->Swap(response.mutable_server_status());
-    grpc_status = Error(response.request_status());
-  } else {
-    // Something wrong with the gRPC conncection
-    grpc_status = Error(
-      RequestStatusCode::INTERNAL,
-      "gRPC client failed: " + std::to_string(status.error_code()) + ": " +
-        status.error_message());
-  }
-
-  // Log server status if request is SUCCESS and verbose is true.
-  if (grpc_status.Code() == RequestStatusCode::SUCCESS && verbose_) {
-    std::cout << server_status->DebugString() << std::endl;
-  }
-  return grpc_status;
-}
-
-//==============================================================================
-
-class GrpcRequestImpl : public RequestImpl {
- public:
-  GrpcRequestImpl(const uint64_t id, const uintptr_t run_index);
-
-  // @see RequestImpl.GetResults()
-  Error GetResults(
-    std::vector<std::unique_ptr<InferContext::Result>>* results) override;
-
- private:
-  // Unmarshall and process 'grpc_response_' into 'requested_results'
-  Error SetRawResult();
-
-  friend class InferGrpcContext;
-
-  // Variables for gRPC call
-  grpc::ClientContext grpc_context_;
-  grpc::Status grpc_status_;
-  InferResponse grpc_response_;
-};
-
-GrpcRequestImpl::GrpcRequestImpl(const uint64_t id, const uintptr_t run_index)
-    : RequestImpl(id)
-{
-  run_index_ = run_index;
-}
-
-Error
-GrpcRequestImpl::SetRawResult()
-{
-  result_pos_idx_ = 0;
-  for (std::string output : grpc_response_.raw_output()) {
-    const uint8_t* buf = reinterpret_cast<uint8_t*>(&output[0]);
-    size_t size = output.size();
-    size_t result_bytes = 0;
-
-    // Not using loop as in HTTP Infer because the output size should match
-    if ((size > 0) && (result_pos_idx_ < requested_results_.size())) {
-      ResultImpl* io = reinterpret_cast<ResultImpl*>(
-        requested_results_[result_pos_idx_].get());
-
-      // Only try to read raw result for RAW
-      if (io->ResultFormat() == InferContext::Result::ResultFormat::RAW) {
-        Error err = io->SetNextRawResult(buf, size, &result_bytes);
-        if (!err.IsOk()) {
-          return err;
-        }
-      }
-    }
-
-    if (result_bytes != size) {
-      return Error(
-        RequestStatusCode::INVALID,
-        "Written bytes doesn't match received bytes.");
-    }
-
-    result_pos_idx_++;
-  }
-
-  return Error::Success;
-}
-
-Error
-GrpcRequestImpl::GetResults(
-  std::vector<std::unique_ptr<InferContext::Result>>* results)
-{
-  results->clear();
-  InferResponseHeader infer_response;
-
-  Error err(RequestStatusCode::SUCCESS);
-  if (grpc_status_.ok()) {
-    infer_response.Swap(grpc_response_.mutable_meta_data());
-    err = Error(grpc_response_.request_status());
-    if (err.IsOk()) {
-      Error set_err = SetRawResult();
-      if (!set_err.IsOk()) {
-        return set_err;
-      }
-    }
-  } else {
-    // Something wrong with the gRPC conncection
-    err = Error(
-      RequestStatusCode::INTERNAL,
-      "gRPC client failed: " + std::to_string(grpc_status_.error_code()) +
-        ": " + grpc_status_.error_message());
-  }
-
-  // Only continue to process result if gRPC status is SUCCESS
-  if (err.Code() == RequestStatusCode::SUCCESS) {
-    PostRunProcessing(requested_results_, infer_response);
-    results->swap(requested_results_);
-  }
-
-  return err;
-}
-
-//==============================================================================
-
-Error
-InferGrpcContext::Create(
-  std::unique_ptr<InferContext>* ctx, const std::string& server_url,
-  const std::string& model_name, int model_version, bool verbose)
-{
-  InferGrpcContext* ctx_ptr =
-    new InferGrpcContext(server_url, model_name, model_version, verbose);
-
-  // Create request context for synchronous request.
-  ctx_ptr->sync_request_.reset(
-    static_cast<Request*>(new GrpcRequestImpl(0, 0)));
-
-  // Get status of the model and create the inputs and outputs.
-  std::unique_ptr<ServerStatusContext> sctx;
-  Error err =
-    ServerStatusGrpcContext::Create(&sctx, server_url, model_name, verbose);
-  if (err.IsOk()) {
-    ServerStatus server_status;
-    err = sctx->GetServerStatus(&server_status);
-    if (err.IsOk()) {
-      const auto& itr = server_status.model_status().find(model_name);
-      if (itr == server_status.model_status().end()) {
-        err = Error(
-          RequestStatusCode::INTERNAL,
-          "unable to find status information for \"" + model_name + "\"");
-      } else {
-        const ModelConfig& model_info = itr->second.config();
-
-        ctx_ptr->max_batch_size_ =
-          static_cast<uint64_t>(std::max(0, model_info.max_batch_size()));
-
-        // Create inputs and outputs
-        for (const auto& io : model_info.input()) {
-          ctx_ptr->inputs_.emplace_back(std::make_shared<InputImpl>(io));
-        }
-        for (const auto& io : model_info.output()) {
-          ctx_ptr->outputs_.emplace_back(std::make_shared<OutputImpl>(io));
-        }
-      }
-    }
-  }
-
-  if (err.IsOk()) {
-    ctx->reset(static_cast<InferContext*>(ctx_ptr));
-  } else {
-    ctx->reset();
-  }
-
-  return err;
-}
-
-InferGrpcContext::InferGrpcContext(
-  const std::string& server_url, const std::string& model_name,
-  int model_version, bool verbose)
-    : InferContext(model_name, model_version, verbose),
-      stub_(GRPCService::NewStub(GetChannel(server_url)))
-{
-}
-
-InferGrpcContext::~InferGrpcContext()
-{
-  exiting_ = true;
-  // thread not joinable if AsyncRun() is not called
-  // (it is default constructed thread before the first AsyncRun() call)
-  if (worker_.joinable()) {
-    cv_.notify_all();
-    worker_.join();
-  }
-
-  // Close complete queue and drain its content
-  async_request_completion_queue_.Shutdown();
-  bool has_next = true;
-  void* tag;
-  bool ok;
-  do {
-    has_next = async_request_completion_queue_.Next(&tag, &ok);
-  } while (has_next);
-}
-
-Error
-InferGrpcContext::Run(std::vector<std::unique_ptr<Result>>* results)
-{
-  grpc::ClientContext context;
-
-  std::shared_ptr<GrpcRequestImpl> sync_request =
-    std::static_pointer_cast<GrpcRequestImpl>(sync_request_);
-  sync_request->InitializeRequestedResults(requested_outputs_, batch_size_);
-
-  sync_request->timer_.Reset();
-  // Use send timer to measure time for marshalling infer request
-  sync_request->timer_.Record(RequestTimers::Kind::SEND_START);
-  PreRunProcessing(sync_request_);
-  sync_request->timer_.Record(RequestTimers::Kind::SEND_END);
-
-  sync_request->timer_.Record(RequestTimers::Kind::REQUEST_START);
-  sync_request->grpc_status_ =
-    stub_->Infer(&context, request_, &sync_request->grpc_response_);
-  sync_request->timer_.Record(RequestTimers::Kind::REQUEST_END);
-
-  sync_request->timer_.Record(RequestTimers::Kind::RECEIVE_START);
-  Error request_status = sync_request->GetResults(results);
-  sync_request->timer_.Record(RequestTimers::Kind::RECEIVE_END);
-
-  Error err = UpdateStat(sync_request->timer_);
-  if (!err.IsOk()) {
-    std::cerr << "Failed to update context stat: " << err << std::endl;
-  }
-
-  return request_status;
-}
-
-Error
-InferGrpcContext::AsyncRun(std::shared_ptr<Request>* async_request)
-{
-  if (exiting_) {
-    exiting_ = false;
-    worker_ = std::thread(&InferGrpcContext::AsyncTransfer, this);
-  }
-  uintptr_t run_index;
-  if (reusable_slot_.empty()) {
-    run_index = ongoing_async_requests_.size();
-  } else {
-    run_index = reusable_slot_.back();
-    reusable_slot_.pop_back();
-  }
-
-  GrpcRequestImpl* current_context =
-    new GrpcRequestImpl(async_request_id_++, run_index);
-  async_request->reset(static_cast<Request*>(current_context));
-
-  auto insert_result =
-    ongoing_async_requests_.emplace(std::make_pair(run_index, *async_request));
-
-  if (!insert_result.second) {
-    return Error(
-      RequestStatusCode::INTERNAL,
-      "Failed to insert new asynchronous request context.");
-  }
-
-  current_context->timer_.Reset();
-  current_context->timer_.Record(RequestTimers::Kind::SEND_START);
-  PreRunProcessing(*async_request);
-  current_context->timer_.Record(RequestTimers::Kind::SEND_END);
-
-  current_context->timer_.Record(RequestTimers::Kind::REQUEST_START);
-  std::unique_ptr<grpc::ClientAsyncResponseReader<InferResponse>> rpc(
-    stub_->PrepareAsyncInfer(
-      &current_context->grpc_context_, request_,
-      &async_request_completion_queue_));
-
-  rpc->StartCall();
-
-  rpc->Finish(
-    &current_context->grpc_response_, &current_context->grpc_status_,
-    (void*)run_index);
-
-  cv_.notify_all();
-  return Error(RequestStatusCode::SUCCESS);
-}
-
-Error
-InferGrpcContext::GetAsyncRunResults(
-  std::vector<std::unique_ptr<Result>>* results,
-  const std::shared_ptr<Request>& async_request, bool wait)
-{
-  Error err = IsRequestReady(async_request, wait);
-  if (!err.IsOk()) {
-    return err;
-  }
-
-  std::shared_ptr<GrpcRequestImpl> grpc_request =
-    std::static_pointer_cast<GrpcRequestImpl>(async_request);
-
-  reusable_slot_.push_back(grpc_request->run_index_);
-  grpc_request->timer_.Record(RequestTimers::Kind::RECEIVE_START);
-  Error request_status = grpc_request->GetResults(results);
-  grpc_request->timer_.Record(RequestTimers::Kind::RECEIVE_END);
-  err = UpdateStat(grpc_request->timer_);
-  if (!err.IsOk()) {
-    std::cerr << "Failed to update context stat: " << err << std::endl;
-  }
-  return request_status;
-}
-
-Error
-InferGrpcContext::PreRunProcessing(std::shared_ptr<Request>& request)
-{
-  std::shared_ptr<GrpcRequestImpl> grpc_request =
-    std::static_pointer_cast<GrpcRequestImpl>(request);
-  grpc_request->InitializeRequestedResults(requested_outputs_, batch_size_);
-
-  for (auto& io : inputs_) {
-    reinterpret_cast<InputImpl*>(io.get())->PrepareForRequest();
-  }
-
-  request_.Clear();
-  request_.set_model_name(model_name_);
-  request_.set_version(model_version_);
-  request_.mutable_meta_data()->MergeFrom(infer_request_);
-
-  size_t input_pos_idx = 0;
-  while (input_pos_idx < inputs_.size()) {
-    InputImpl* io = reinterpret_cast<InputImpl*>(inputs_[input_pos_idx].get());
-    std::string* new_input = request_.add_raw_input();
-    // Append all batches of one input together
-    for (size_t batch_idx = 0; batch_idx < batch_size_; batch_idx++) {
-      const uint8_t* data_ptr;
-      io->GetRaw(batch_idx, &data_ptr);
-      new_input->append(
-        reinterpret_cast<const char*>(data_ptr), io->ByteSize());
-    }
-    input_pos_idx++;
-  }
-  return Error::Success;
-}
-
-void
-InferGrpcContext::AsyncTransfer()
-{
-  do {
-    // sleep if no work is available
-    std::unique_lock<std::mutex> lock(mutex_);
-    cv_.wait(lock, [this] {
-      if (this->exiting_) {
-        return true;
-      }
-      // wake up if at least one request is not ready
-      for (auto& ongoing_async_request : this->ongoing_async_requests_) {
-        if (
-          std::static_pointer_cast<GrpcRequestImpl>(
-            ongoing_async_request.second)
-            ->ready_ == false) {
-          return true;
-        }
-      }
-      return false;
-    });
-    lock.unlock();
-    // gRPC async APIs are thread-safe https://github.com/grpc/grpc/issues/4486
-    if (!exiting_) {
-      size_t got;
-      bool ok = true;
-      bool status = async_request_completion_queue_.Next((void**)(&got), &ok);
-      {
-        std::lock_guard<std::mutex> lock(mutex_);
-        if (!ok) {
-          fprintf(stderr, "Unexpected not ok on client side.");
-        }
-        if (!status) {
-          fprintf(stderr, "Completion queue is closed.");
-        }
-        auto itr = ongoing_async_requests_.find(got);
-        if (itr == ongoing_async_requests_.end()) {
-          fprintf(
-            stderr,
-            "Unexpected error: received completed request that"
-            " is not in the list of asynchronous requests.\n");
-          continue;
-        }
-
-        std::shared_ptr<GrpcRequestImpl> grpc_request =
-          std::static_pointer_cast<GrpcRequestImpl>(itr->second);
-        grpc_request->timer_.Record(RequestTimers::Kind::REQUEST_END);
-        grpc_request->ready_ = true;
-      }
-      // send signal in case the main thread is waiting
-      cv_.notify_all();
-    }
-  } while (!exiting_);
-}
-
-//==============================================================================
-
-Error
-ProfileGrpcContext::Create(
-  std::unique_ptr<ProfileContext>* ctx, const std::string& server_url,
-  bool verbose)
-{
-  ctx->reset(
-    static_cast<ProfileContext*>(new ProfileGrpcContext(server_url, verbose)));
-  return Error::Success;
-}
-
-ProfileGrpcContext::ProfileGrpcContext(
-  const std::string& server_url, bool verbose)
-    : ProfileContext(verbose),
-      stub_(GRPCService::NewStub(GetChannel(server_url)))
-{
-}
-
-Error
-ProfileGrpcContext::SendCommand(const std::string& cmd_str)
-{
-  ProfileRequest request;
-  ProfileResponse response;
-  grpc::ClientContext context;
-
-  request.set_cmd(cmd_str);
-  grpc::Status status = stub_->Profile(&context, request, &response);
-  if (status.ok()) {
-    return Error(response.request_status());
-  } else {
-    // Something wrong with the gRPC conncection
-    return Error(
-      RequestStatusCode::INTERNAL,
-      "gRPC client failed: " + std::to_string(status.error_code()) + ": " +
-        status.error_message());
-  }
-}
-
-}}}  // namespace nvidia::inferenceserver::client
diff --git a/src/clients/c++/request.h b/src/clients/c++/request.h
deleted file mode 100644
index c05a654dba..0000000000
--- a/src/clients/c++/request.h
+++ /dev/null
@@ -1,1077 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-/// \file
-
-#include <curl/curl.h>
-#include <grpcpp/grpcpp.h>
-#include <condition_variable>
-#include <memory>
-#include <mutex>
-#include <string>
-#include <thread>
-#include <vector>
-#include "src/core/api.pb.h"
-#include "src/core/grpc_service.grpc.pb.h"
-#include "src/core/grpc_service.pb.h"
-#include "src/core/model_config.h"
-#include "src/core/model_config.pb.h"
-#include "src/core/request_status.pb.h"
-#include "src/core/server_status.pb.h"
-
-namespace nvidia { namespace inferenceserver { namespace client {
-
-//==============================================================================
-/// Error status reported by client API.
-///
-class Error {
- public:
-  /// Create an error from a RequestStatus.
-  /// \param status The RequestStatus object
-  explicit Error(const RequestStatus& status);
-
-  /// Create an error from a RequestStatusCode.
-  /// \param code The status code for the error
-  explicit Error(RequestStatusCode code = RequestStatusCode::SUCCESS);
-
-  /// Create an error from a RequestStatusCode and a detailed message.
-  /// \param code The status code for the error
-  /// \param msg The detailed message for the error
-  explicit Error(RequestStatusCode code, const std::string& msg);
-
-  /// Accessor for the RequestStatusCode of this error.
-  /// \return The RequestStatusCode for the error.
-  RequestStatusCode Code() const { return code_; }
-
-  /// Accessor for the message of this error.
-  /// \return The detailed messsage for the error. Empty if no
-  /// detailed message.
-  const std::string& Message() const { return msg_; }
-
-  /// Accessor for the ID of the inference server associated with this
-  /// error.
-  /// \return The ID of the inference server associated with this
-  /// error, or empty-string if no inference server is associated with
-  /// the error.
-  const std::string& ServerId() const { return server_id_; }
-
-  /// Accessor for the ID of the request associated with this error.
-  /// \return The ID of the request associated with this error, or 0
-  /// (zero) if no request ID is associated with the error.
-  uint64_t RequestId() const { return request_id_; }
-
-  /// Does this error indicate OK status?
-  /// \return True if this error indicates "ok"/"success", false if
-  /// error indicates a failure.
-  bool IsOk() const { return code_ == RequestStatusCode::SUCCESS; }
-
-  /// Convenience "success" value. Can be used as Error::Success to
-  /// indicate no error.
-  static const Error Success;
-
- private:
-  friend std::ostream& operator<<(std::ostream&, const Error&);
-  RequestStatusCode code_;
-  std::string msg_;
-  std::string server_id_;
-  uint64_t request_id_;
-};
-
-//==============================================================================
-/// A ServerHealthContext object is used to query an inference server
-/// for health information. Once created a ServerHealthContext object
-/// can be used repeatedly to get health from the server. A
-/// ServerHealthContext object can use either HTTP protocol or gRPC
-/// protocol depending on the Create function
-/// (ServerHealthHttpContext::Create or
-/// ServerHealthGrpcContext::Create). For example:
-///
-/// \code
-/// std::unique_ptr<ServerHealthContext> ctx;
-///   ServerHealthHttpContext::Create(&ctx, "localhost:8000");
-///   bool ready;
-///   ctx->GetReady(&ready);
-///   ...
-///   bool live;
-///   ctx->GetLive(&live);
-///   ...
-/// \endcode
-///
-/// \note
-///   ServerHealthContext::Create methods are thread-safe.
-///   GetReady() and GetLive() are not thread-safe. For a given
-///   ServerHealthContext, calls to GetReady() and GetLive() must be
-///   serialized.
-///
-class ServerHealthContext {
- public:
-  /// Contact the inference server and get readiness state.
-  /// \param ready Returns the readiness state of the server.
-  /// \return Error object indicating success or failure of the request.
-  virtual Error GetReady(bool* ready) = 0;
-
-  /// Contact the inference server and get liveness state.
-  /// \param ready Returns the liveness state of the server.
-  /// \return Error object indicating success or failure of the request.
-  virtual Error GetLive(bool* live) = 0;
-
- protected:
-  ServerHealthContext(bool);
-
-  // If true print verbose output
-  const bool verbose_;
-};
-
-//==============================================================================
-/// A ServerStatusContext object is used to query an inference server
-/// for status information, including information about the models
-/// available on that server. Once created a ServerStatusContext object
-/// can be used repeatedly to get status from the server.
-/// A ServerStatusContext object can use either HTTP protocol or gRPC protocol
-/// depending on the Create function (ServerStatusHttpContext::Create or
-/// ServerStatusGrpcContext::Create). For example:
-///
-/// \code
-///   std::unique_ptr<ServerStatusContext> ctx;
-///   ServerStatusHttpContext::Create(&ctx, "localhost:8000");
-///   ServerStatus status;
-///   ctx->GetServerStatus(&status);
-///   ...
-///   ctx->GetServerStatus(&status);
-///   ...
-/// \endcode
-///
-/// \note
-///   ServerStatusContext::Create methods are thread-safe.
-///   GetServerStatus() is not thread-safe. For a given
-///   ServerStatusContext, calls to GetServerStatus() must be
-///   serialized.
-///
-class ServerStatusContext {
- public:
-  /// Contact the inference server and get status.
-  /// \param status Returns the status.
-  /// \return Error object indicating success or failure of the request.
-  virtual Error GetServerStatus(ServerStatus* status) = 0;
-
- protected:
-  ServerStatusContext(bool);
-
-  // If true print verbose output
-  const bool verbose_;
-};
-
-//==============================================================================
-/// An InferContext object is used to run inference on an inference
-/// server for a specific model. Once created an InferContext object
-/// can be used repeatedly to perform inference using the
-/// model. Options that control how inference is performed can be
-/// changed in between inference runs.
-///
-/// A InferContext object can use either HTTP protocol or gRPC protocol
-/// depending on the Create function (InferHttpContext::Create or
-/// InferGrpcContext::Create). For example:
-///
-/// \code
-///   std::unique_ptr<InferContext> ctx;
-///   InferHttpContext::Create(&ctx, "localhost:8000", "mnist");
-///   ...
-///   std::unique_ptr<Options> options0;
-///   Options::Create(&options0);
-///   options->SetBatchSize(b);
-///   options->AddClassResult(output, topk);
-///   ctx->SetRunOptions(*options0);
-///   ...
-///   ctx->Run(&results0);  // run using options0
-///   ctx->Run(&results1);  // run using options0
-///   ...
-///   std::unique_ptr<Options> options1;
-///   Options::Create(&options1);
-///   options->AddRawResult(output);
-///   ctx->SetRunOptions(*options);
-///   ...
-///   ctx->Run(&results2);  // run using options1
-///   ctx->Run(&results3);  // run using options1
-///   ...
-/// \endcode
-///
-/// \note
-///   InferContext::Create methods are thread-safe.
-///   All other InferContext methods, and nested class methods are not
-///   thread-safe.
-/// \par
-///   The Run() calls are not thread-safe but a new Run() can
-///   be invoked as soon as the previous completes. The returned result
-///   objects are owned by the caller and may be retained and accessed
-///   even after the InferContext object is destroyed.
-/// \par
-///   AsyncRun() and GetAsyncRunStatus() calls are not thread-safe.
-///   What's more, calling one method while the other one is running
-///   will result in undefined behavior given that they will modify the
-///   shared data internally.
-/// \par
-///   For more parallelism multiple InferContext objects can access the
-///   same inference server with no serialization requirements across
-///   those objects.
-/// \endcode
-///
-class InferContext {
- public:
-  //==============
-  /// An input to the model.
-  class Input {
-   public:
-    /// Destroy the input.
-    virtual ~Input(){};
-
-    /// \return The name of the input.
-    virtual const std::string& Name() const = 0;
-
-    /// \return The size in bytes of this input. This is the size for
-    /// one instance of the input, not the entire size of a batched
-    /// input.
-    virtual size_t ByteSize() const = 0;
-
-    /// \return The data-type of the input.
-    virtual DataType DType() const = 0;
-
-    /// \return The format of the input.
-    virtual ModelInput::Format Format() const = 0;
-
-    /// \return The dimensions/shape of the input.
-    virtual const DimsList& Dims() const = 0;
-
-    /// Prepare this input to receive new tensor values. Forget any
-    /// existing values that were set by previous calls to
-    /// SetRaw().
-    /// \return Error object indicating success or failure.
-    virtual Error Reset() = 0;
-
-    /// Set tensor values for this input from a byte array. The array
-    /// is not copied and so it must not be modified or destroyed
-    /// until this input is no longer needed (that is until the Run()
-    /// call(s) that use the input have completed). For batched inputs
-    /// this function must be called batch-size times to provide all
-    /// tensor values for a batch of this input.
-    /// \param input The pointer to the array holding the tensor value.
-    /// \param input_byte_size The size of the array in bytes, must match
-    /// the size expected by the input.
-    /// \return Error object indicating success or failure.
-    virtual Error SetRaw(const uint8_t* input, size_t input_byte_size) = 0;
-
-    /// Set tensor values for this input from a byte vector. The vector
-    /// is not copied and so it must not be modified or destroyed
-    /// until this input is no longer needed (that is until the Run()
-    /// call(s) that use the input have completed). For batched inputs
-    /// this function must be called batch-size times to provide all
-    /// tensor values for a batch of this input.
-    /// \param input The vector holding tensor values.
-    /// \return Error object indicating success or failure.
-    virtual Error SetRaw(const std::vector<uint8_t>& input) = 0;
-  };
-
-  //==============
-  /// An output from the model.
-  class Output {
-   public:
-    /// Destroy the output.
-    virtual ~Output(){};
-
-    /// \return The name of the output.
-    virtual const std::string& Name() const = 0;
-
-    /// \return The size in bytes of this output. This is the size for
-    /// one instance of the output, not the entire size of a batched
-    /// input.
-    virtual size_t ByteSize() const = 0;
-
-    /// \return The data-type of the output.
-    virtual DataType DType() const = 0;
-
-    /// \return The dimensions/shape of the output.
-    virtual const DimsList& Dims() const = 0;
-  };
-
-  //==============
-  /// An inference result corresponding to an output.
-  class Result {
-   public:
-    /// Destroy the result.
-    virtual ~Result(){};
-
-    /// Format in which result is returned.
-    enum ResultFormat {
-      /// RAW format is the entire result tensor of values.
-      RAW = 0,
-
-      /// CLASS format is the top-k highest probability values of the
-      /// result and the associated class label (if provided by the
-      /// model).
-      CLASS = 1
-    };
-
-    /// \return The name of the model that produced this result.
-    virtual const std::string& ModelName() const = 0;
-
-    /// \return The version of the model that produced this result.
-    virtual uint32_t ModelVersion() const = 0;
-
-    /// \return The Output object corresponding to this result.
-    virtual const std::shared_ptr<Output> GetOutput() const = 0;
-
-    /// Get a reference to entire raw result data for a specific batch
-    /// entry. Returns error if this result is not RAW format.
-    /// \param batch_idx Returns the results for this entry of the batch.
-    /// \param buf Returns the vector of result bytes.
-    /// \return Error object indicating success or failure.
-    virtual Error GetRaw(
-      size_t batch_idx, const std::vector<uint8_t>** buf) const = 0;
-
-    /// Get a reference to raw result data for a specific batch entry
-    /// at the current "cursor" and advance the cursor by the specified
-    /// number of bytes. More typically use GetRawAtCursor<T>() method
-    /// to return the data as a specific type T. Use ResetCursor() to
-    /// reset the cursor to the beginning of the result. Returns error
-    /// if this result is not RAW format.
-    /// \param batch_idx Returns results for this entry of the batch.
-    /// \param buf Returns pointer to 'adv_byte_size' bytes of data.
-    /// \param adv_byte_size The number of bytes of data to get a reference to.
-    /// \return Error object indicating success or failure.
-    virtual Error GetRawAtCursor(
-      size_t batch_idx, const uint8_t** buf, size_t adv_byte_size) = 0;
-
-    /// Read a value for a specific batch entry at the current "cursor"
-    /// from the result tensor as the specified type T and advance the
-    /// cursor. Use ResetCursor() to reset the cursor to the beginning
-    /// of the result. Returns error if this result is not RAW format.
-    /// \param batch_idx Returns results for this entry of the batch.
-    /// \param out Returns the value at the cursor.
-    /// \return Error object indicating success or failure.
-    template <typename T>
-    Error GetRawAtCursor(size_t batch_idx, T* out);
-
-    /// The result value for CLASS format results.
-    struct ClassResult {
-      /// The index of the class in the result vector.
-      size_t idx;
-      /// The value of the class.
-      float value;
-      /// The label for the class, if provided by the model.
-      std::string label;
-    };
-
-    /// Get the number of class results for a batch. Returns error if
-    /// this result is not CLASS format.
-    /// \param batch_idx The index in the batch.
-    /// \param cnt Returns the number of ClassResult entries for the
-    /// batch entry.
-    /// \return Error object indicating success or failure.
-    virtual Error GetClassCount(size_t batch_idx, size_t* cnt) const = 0;
-
-    /// Get the ClassResult result for a specific batch entry at the
-    /// current cursor. Use ResetCursor() to reset the cursor to the
-    /// beginning of the result. Returns error if this result is not
-    /// CLASS format.
-    /// \param batch_idx The index in the batch.
-    /// \param result Returns the ClassResult value for the batch at the cursor.
-    /// \return Error object indicating success or failure.
-    virtual Error GetClassAtCursor(size_t batch_idx, ClassResult* result) = 0;
-
-    /// Reset cursor to beginning of result for all batch entries.
-    /// \return Error object indicating success or failure.
-    virtual Error ResetCursors() = 0;
-
-    /// Reset cursor to beginning of result for specified batch entry.
-    /// \param batch_idx The index in the batch.
-    /// \return Error object indicating success or failure.
-    virtual Error ResetCursor(size_t batch_idx) = 0;
-  };
-
-  //==============
-  /// Run options to be applied to all subsequent Run() invocations.
-  class Options {
-   public:
-    virtual ~Options(){};
-
-    /// Create a new Options object with default values.
-    /// \return Error object indicating success or failure.
-    static Error Create(std::unique_ptr<Options>* options);
-
-    /// \return The batch size to use for all subsequent inferences.
-    virtual size_t BatchSize() const = 0;
-
-    /// Set the batch size to use for all subsequent inferences.
-    /// \param batch_size The batch size.
-    virtual void SetBatchSize(size_t batch_size) = 0;
-
-    /// Add 'output' to the list of requested RAW results. Run() will
-    /// return the output's full tensor as a result.
-    /// \param output The output.
-    /// \return Error object indicating success or failure.
-    virtual Error AddRawResult(
-      const std::shared_ptr<InferContext::Output>& output) = 0;
-
-    /// Add 'output' to the list of requested CLASS results. Run() will
-    /// return the highest 'k' values of 'output' as a result.
-    /// \param output The output.
-    /// \param k Set how many class results to return for the output.
-    /// \return Error object indicating success or failure.
-    virtual Error AddClassResult(
-      const std::shared_ptr<InferContext::Output>& output, uint64_t k) = 0;
-  };
-
-  //==============
-  /// Handle to a inference request. The request handle is used to get
-  /// request results if the request is sent by AsyncRun().
-  class Request {
-   public:
-    /// Destroy the request handle.
-    virtual ~Request() = default;
-
-    /// \return The unique identifier of the request.
-    virtual uint64_t Id() const = 0;
-  };
-
-  //==============
-  /// Cumulative statistic of the InferContext.
-  ///
-  /// \note
-  ///   For gRPC protocol, 'cumulative_send_time_ns' represents the
-  ///   time for marshaling infer request.
-  ///   'cumulative_receive_time_ns' represents the time for
-  ///   unmarshaling infer response.
-  struct Stat {
-    /// Total number of requests completed.
-    size_t completed_request_count;
-
-    /// Time from the request start until the response is completely
-    /// received.
-    uint64_t cumulative_total_request_time_ns;
-
-    /// Time from the request start until the last byte is sent.
-    uint64_t cumulative_send_time_ns;
-
-    /// Time from receiving first byte of the response until the
-    /// response is completely received.
-    uint64_t cumulative_receive_time_ns;
-
-    /// Create a new Stat object with zero-ed statistics.
-    Stat()
-        : completed_request_count(0), cumulative_total_request_time_ns(0),
-          cumulative_send_time_ns(0), cumulative_receive_time_ns(0)
-    {
-    }
-  };
-
-  //==============
-  /// Timer to record the timestamp for different stages of request
-  /// handling.
-  class RequestTimers {
-   public:
-    /// The kind of the timer.
-    enum Kind {
-      /// The start of request handling.
-      REQUEST_START,
-      /// The end of request handling.
-      REQUEST_END,
-      /// The start of sending request bytes to the server (i.e. first byte).
-      SEND_START,
-      /// The end of sending request bytes to the server (i.e. last byte).
-      SEND_END,
-      /// The start of receiving response bytes from the server
-      /// (i.e. first byte).
-      RECEIVE_START,
-      /// The end of receiving response bytes from the server
-      /// (i.e. last byte).
-      RECEIVE_END
-    };
-
-    /// Construct a timer with zero-ed timestamps.
-    RequestTimers();
-
-    /// Reset all timestamp values to zero. Must be called before
-    /// re-using the timer.
-    /// \return Error object indicating success or failure.
-    Error Reset();
-
-    /// Record the current timestamp for a request stage.
-    /// \param kind The Kind of the timestamp.
-    /// \return Error object indicating success or failure.
-    Error Record(Kind kind);
-
-   private:
-    friend class InferContext;
-    friend class InferHttpContext;
-    friend class InferGrpcContext;
-    struct timespec request_start_;
-    struct timespec request_end_;
-    struct timespec send_start_;
-    struct timespec send_end_;
-    struct timespec receive_start_;
-    struct timespec receive_end_;
-  };
-
- public:
-  /// Destroy the inference context.
-  virtual ~InferContext() = default;
-
-  /// \return The name of the model being used for this context.
-  const std::string& ModelName() const { return model_name_; }
-
-  /// \return The version of the model being used for this context. -1
-  /// indicates that the latest (i.e. highest version number) version
-  /// of that model is being used.
-  int ModelVersion() const { return model_version_; }
-
-  /// \return The maximum batch size supported by the context. A
-  /// maximum batch size indicates that the context does not support
-  /// batching and so only a single inference at a time can be
-  /// performed.
-  uint64_t MaxBatchSize() const { return max_batch_size_; }
-
-  /// \return The inputs of the model.
-  const std::vector<std::shared_ptr<Input>>& Inputs() const { return inputs_; }
-
-  /// \return The outputs of the model.
-  const std::vector<std::shared_ptr<Output>>& Outputs() const
-  {
-    return outputs_;
-  }
-
-  /// Get a named input.
-  /// \param name The name of the input.
-  /// \param input Returns the Input object for 'name'.
-  /// \return Error object indicating success or failure.
-  Error GetInput(const std::string& name, std::shared_ptr<Input>* input) const;
-
-  /// Get a named output.
-  /// \param name The name of the output.
-  /// \param output Returns the Output object for 'name'.
-  /// \return Error object indicating success or failure.
-  Error GetOutput(
-    const std::string& name, std::shared_ptr<Output>* output) const;
-
-  /// Set the options to use for all subsequent Run() invocations.
-  /// \param options The options.
-  /// \return Error object indicating success or failure.
-  Error SetRunOptions(const Options& options);
-
-  /// Get the current statistics of the InferContext.
-  /// \param stat Returns the Stat object holding the statistics.
-  /// \return Error object indicating success or failure.
-  Error GetStat(Stat* stat);
-
-  /// Send a synchronous request to the inference server to perform an
-  /// inference to produce results for the outputs specified in the
-  /// most recent call to SetRunOptions(). The Result objects holding
-  /// the output values are returned in the same order as the outputs
-  /// are specified in the options.
-  /// \param results Returns Result objects holding inference results.
-  /// \return Error object indicating success or failure.
-  virtual Error Run(std::vector<std::unique_ptr<Result>>* results) = 0;
-
-  /// Send an asynchronous request to the inference server to perform
-  /// an inference to produce results for the outputs specified in the
-  /// most recent call to SetRunOptions().
-  /// \param async_request Returns a Request object that can be used
-  /// to retrieve the inference results for the request.
-  /// \return Error object indicating success or failure.
-  virtual Error AsyncRun(std::shared_ptr<Request>* async_request) = 0;
-
-  /// Get the results of the asynchronous request referenced by 'async_request'.
-  /// The Result objects holding the output values are returned in the same
-  /// order as the outputs are specified in the options when AsyncRun() was
-  /// called. \param results Return Result objects holding inference results.
-  /// \param async_request Request handle to retrieve results.
-  /// \param wait If true, block until the request completes. Otherwise, return
-  /// immediately.
-  /// \return Error object indicating success or failure. Success will be
-  /// returned only if the request has been completed succesfully. UNAVAILABLE
-  /// will be returned if 'wait' is false and the request is not ready.
-  virtual Error GetAsyncRunResults(
-    std::vector<std::unique_ptr<Result>>* results,
-    const std::shared_ptr<Request>& async_request, bool wait) = 0;
-
-  /// Get any one completed asynchronous request.
-  /// \param async_request Returns the Request object holding the
-  /// completed request.
-  /// \param wait If true, block until the request completes. Otherwise, return
-  /// immediately.
-  /// \return Error object indicating success or failure. Success will be
-  /// returned only if a completed request was returned.. UNAVAILABLE
-  /// will be returned if 'wait' is false and no request is ready.
-  Error GetReadyAsyncRequest(
-    std::shared_ptr<Request>* async_request, bool wait);
-
- protected:
-  InferContext(const std::string&, int, bool);
-
-  // Function for worker thread to proceed the data transfer for all requests
-  virtual void AsyncTransfer() = 0;
-
-  // Helper function called before inference to prepare 'request'
-  virtual Error PreRunProcessing(std::shared_ptr<Request>& request) = 0;
-
-  // Helper function called by GetAsyncRunResults() to check if the request
-  // is ready. If the request is valid and wait == true,
-  // the function will block until request is ready.
-  Error IsRequestReady(
-    const std::shared_ptr<Request>& async_request, bool wait);
-
-  // Update the context stat with the given timer
-  Error UpdateStat(const RequestTimers& timer);
-
-  using AsyncReqMap = std::map<uintptr_t, std::shared_ptr<Request>>;
-
-  // map to record ongoing asynchronous requests with pointer to easy handle
-  // as key
-  AsyncReqMap ongoing_async_requests_;
-
-  // Model name
-  const std::string model_name_;
-
-  // Model version
-  const int model_version_;
-
-  // If true print verbose output
-  const bool verbose_;
-
-  // Maximum batch size supported by this context. A maximum batch
-  // size indicates that the context does not support batching and so
-  // only a single inference at a time can be performed.
-  uint64_t max_batch_size_;
-
-  // Total size of all inputs, in bytes (must be 64-bit integer
-  // because used with curl_easy_setopt).
-  uint64_t total_input_byte_size_;
-
-  // Requested batch size for inference request
-  uint64_t batch_size_;
-
-  // Use to assign unique identifier for each asynchronous request
-  uint64_t async_request_id_;
-
-  // The inputs and outputs
-  std::vector<std::shared_ptr<Input>> inputs_;
-  std::vector<std::shared_ptr<Output>> outputs_;
-
-  // Settings generated by current option
-  // InferRequestHeader protobuf describing the request
-  InferRequestHeader infer_request_;
-
-  // Outputs requested for inference request
-  std::vector<std::shared_ptr<Output>> requested_outputs_;
-
-  // Standalone request context used for synchronous request
-  std::shared_ptr<Request> sync_request_;
-
-  // The statistic of the current context
-  Stat context_stat_;
-
-  // worker thread that will perform the asynchronous transfer
-  std::thread worker_;
-
-  // Avoid race condition between main thread and worker thread
-  std::mutex mutex_;
-
-  // Condition variable used for waiting on asynchronous request
-  std::condition_variable cv_;
-
-  // signal for worker thread to stop
-  bool exiting_;
-};
-
-//==============================================================================
-/// A ProfileContext object is used to control profiling on the
-/// inference server. Once created a ProfileContext object can be used
-/// repeatedly.
-///
-/// A ProfileContext object can use either HTTP protocol or gRPC protocol
-/// depending on the Create function (ProfileHttpContext::Create or
-/// ProfileGrpcContext::Create). For example:
-///
-/// \code
-///   std::unique_ptr<ProfileContext> ctx;
-///   ProfileGrpcContext::Create(&ctx, "localhost:8000");
-///   ctx->StartProfile();
-///   ...
-///   ctx->StopProfile();
-///   ...
-/// \endcode
-///
-/// \note
-///   ProfileContext::Create methods are thread-safe.  StartProfiling()
-///   and StopProfiling() are not thread-safe. For a given
-///   ProfileContext, calls to these methods must be serialized.
-///
-class ProfileContext {
- public:
-  /// Start profiling on the inference server.
-  /// \return Error object indicating success or failure.
-  Error StartProfile();
-
-  /// Stop profiling on the inference server.
-  // \return Error object indicating success or failure.
-  Error StopProfile();
-
- protected:
-  ProfileContext(bool);
-  virtual Error SendCommand(const std::string& cmd_str) = 0;
-
-  // If true print verbose output
-  const bool verbose_;
-};
-
-//==============================================================================
-/// ServerHealthHttpContext is the HTTP instantiation of
-/// ServerHealthContext.
-///
-class ServerHealthHttpContext : public ServerHealthContext {
- public:
-  /// Create a context that returns health information.
-  /// \param ctx Returns a new ServerHealthHttpContext object.
-  /// \param server_url The inference server name and port.
-  /// \param verbose If true generate verbose output when contacting
-  /// the inference server.
-  /// \return Error object indicating success or failure.
-  static Error Create(
-    std::unique_ptr<ServerHealthContext>* ctx, const std::string& server_url,
-    bool verbose = false);
-
-  Error GetReady(bool* ready) override;
-  Error GetLive(bool* live) override;
-
- private:
-  ServerHealthHttpContext(const std::string&, bool);
-  Error GetHealth(const std::string& url, bool* health);
-
-  // URL for health endpoint on inference server.
-  const std::string url_;
-};
-
-//==============================================================================
-/// ServerStatusHttpContext is the HTTP instantiation of
-/// ServerStatusContext.
-///
-class ServerStatusHttpContext : public ServerStatusContext {
- public:
-  /// Create a context that returns information about an inference
-  /// server and all models on the server using HTTP protocol.
-  /// \param ctx Returns a new ServerStatusHttpContext object.
-  /// \param server_url The inference server name and port.
-  /// \param verbose If true generate verbose output when contacting
-  /// the inference server.
-  /// \return Error object indicating success or failure.
-  static Error Create(
-    std::unique_ptr<ServerStatusContext>* ctx, const std::string& server_url,
-    bool verbose = false);
-
-  /// Create a context that returns information about an inference
-  /// server and one model on the sever using HTTP protocol.
-  /// \param ctx Returns a new ServerStatusHttpContext object.
-  /// \param server_url The inference server name and port.
-  /// \param model_name The name of the model to get status for.
-  /// \param verbose If true generate verbose output when contacting
-  /// the inference server.
-  /// \return Error object indicating success or failure.
-  static Error Create(
-    std::unique_ptr<ServerStatusContext>* ctx, const std::string& server_url,
-    const std::string& model_name, bool verbose = false);
-
-  /// Contact the inference server and get status.
-  /// \param status Returns the status.
-  /// \return Error object indicating success or failure.
-  Error GetServerStatus(ServerStatus* status) override;
-
- private:
-  static size_t ResponseHeaderHandler(void*, size_t, size_t, void*);
-  static size_t ResponseHandler(void*, size_t, size_t, void*);
-
-  ServerStatusHttpContext(const std::string&, bool);
-  ServerStatusHttpContext(const std::string&, const std::string&, bool);
-
-  // URL for status endpoint on inference server.
-  const std::string url_;
-
-  // RequestStatus received in server response
-  RequestStatus request_status_;
-
-  // Serialized ServerStatus response from server.
-  std::string response_;
-};
-
-//==============================================================================
-/// InferHttpContext is the HTTP instantiation of InferContext.
-///
-class InferHttpContext : public InferContext {
- public:
-  ~InferHttpContext() override;
-
-  /// Create context that performs inference for a model using HTTP protocol.
-  /// \param ctx Returns a new InferHttpContext object.
-  /// \param server_url The inference server name and port.
-  /// \param model_name The name of the model to get status for.
-  /// \param model_version The version of the model to use for inference,
-  /// or -1 to indicate that the latest (i.e. highest version number)
-  /// version should be used.
-  /// \param verbose If true generate verbose output when contacting
-  /// the inference server.
-  /// \return Error object indicating success or failure.
-  static Error Create(
-    std::unique_ptr<InferContext>* ctx, const std::string& server_url,
-    const std::string& model_name, int model_version = -1,
-    bool verbose = false);
-
-  Error Run(std::vector<std::unique_ptr<Result>>* results) override;
-  Error AsyncRun(std::shared_ptr<Request>* async_request) override;
-  Error GetAsyncRunResults(
-    std::vector<std::unique_ptr<Result>>* results,
-    const std::shared_ptr<Request>& async_request, bool wait) override;
-
- private:
-  static size_t RequestProvider(void*, size_t, size_t, void*);
-  static size_t ResponseHeaderHandler(void*, size_t, size_t, void*);
-  static size_t ResponseHandler(void*, size_t, size_t, void*);
-
-  InferHttpContext(const std::string&, const std::string&, int, bool);
-
-  // @see InferContext.AsyncTransfer()
-  void AsyncTransfer() override;
-
-  // @see InferContext.PreRunProcessing()
-  Error PreRunProcessing(std::shared_ptr<Request>& request) override;
-
-  // curl multi handle for processing asynchronous requests
-  CURLM* multi_handle_;
-
-  // URL to POST to
-  std::string url_;
-
-  // Serialized InferRequestHeader
-  std::string infer_request_str_;
-
-  // Keep an easy handle alive to reuse the connection
-  CURL* curl_;
-};
-
-//==============================================================================
-/// ProfileHttpContext is the HTTP instantiation of ProfileContext.
-///
-class ProfileHttpContext : public ProfileContext {
- public:
-  /// Create context that controls profiling on a server using HTTP
-  /// protocol.
-  /// \param ctx Returns the new ProfileContext object.
-  /// \param server_url The inference server name and port.
-  /// \param verbose If true generate verbose output when contacting
-  /// the inference server.
-  /// \return Error object indicating success or failure.
-  static Error Create(
-    std::unique_ptr<ProfileContext>* ctx, const std::string& server_url,
-    bool verbose = false);
-
- private:
-  static size_t ResponseHeaderHandler(void*, size_t, size_t, void*);
-
-  ProfileHttpContext(const std::string&, bool);
-  Error SendCommand(const std::string& cmd_str) override;
-
-  // URL for status endpoint on inference server.
-  const std::string url_;
-
-  // RequestStatus received in server response
-  RequestStatus request_status_;
-};
-
-//==============================================================================
-/// ServerHealthGrpcContext is the gRPC instantiation of
-/// ServerHealthContext.
-///
-class ServerHealthGrpcContext : public ServerHealthContext {
- public:
-  /// Create a context that returns health information about server.
-  /// \param ctx Returns a new ServerHealthGrpcContext object.
-  /// \param server_url The inference server name and port.
-  /// \param verbose If true generate verbose output when contacting
-  /// the inference server.
-  /// \return Error object indicating success or failure.
-  static Error Create(
-    std::unique_ptr<ServerHealthContext>* ctx, const std::string& server_url,
-    bool verbose = false);
-
-  Error GetReady(bool* ready) override;
-  Error GetLive(bool* live) override;
-
- private:
-  ServerHealthGrpcContext(const std::string&, bool);
-  Error GetHealth(const std::string& mode, bool* health);
-
-  // gRPC end point.
-  std::unique_ptr<GRPCService::Stub> stub_;
-};
-
-//==============================================================================
-/// ServerStatusGrpcContext is the gRPC instantiation of
-/// ServerStatusContext.
-///
-class ServerStatusGrpcContext : public ServerStatusContext {
- public:
-  /// Create a context that returns information about an inference
-  /// server and all models on the server using gRPC protocol.
-  /// \param ctx Returns a new ServerStatusGrpcContext object.
-  /// \param server_url The inference server name and port.
-  /// \param verbose If true generate verbose output when contacting
-  /// the inference server.
-  /// \return Error object indicating success or failure.
-  static Error Create(
-    std::unique_ptr<ServerStatusContext>* ctx, const std::string& server_url,
-    bool verbose = false);
-
-  /// Create a context that returns information about an inference
-  /// server and one model on the sever using gRPC protocol.
-  /// \param ctx Returns a new ServerStatusGrpcContext object.
-  /// \param server_url The inference server name and port.
-  /// \param model_name The name of the model to get status for.
-  /// \param verbose If true generate verbose output when contacting
-  /// the inference server.
-  /// \return Error object indicating success or failure.
-  static Error Create(
-    std::unique_ptr<ServerStatusContext>* ctx, const std::string& server_url,
-    const std::string& model_name, bool verbose = false);
-
-  /// Contact the inference server and get status.
-  /// \param status Returns the status.
-  /// \return Error object indicating success or failure.
-  Error GetServerStatus(ServerStatus* status) override;
-
- private:
-  ServerStatusGrpcContext(const std::string&, bool);
-  ServerStatusGrpcContext(const std::string&, const std::string&, bool);
-
-  // Model name
-  const std::string model_name_;
-
-  // gRPC end point.
-  std::unique_ptr<GRPCService::Stub> stub_;
-};
-
-//==============================================================================
-/// InferGrpcContext is the gRPC instantiation of InferContext.
-///
-class InferGrpcContext : public InferContext {
- public:
-  ~InferGrpcContext() override;
-
-  /// Create context that performs inference for a model using gRPC protocol.
-  /// \param ctx Returns a new InferGrpcContext object.
-  /// \param server_url The inference server name and port.
-  /// \param model_name The name of the model to get status for.
-  /// \param model_version The version of the model to use for inference,
-  /// or -1 to indicate that the latest (i.e. highest version number)
-  /// version should be used.
-  /// \param verbose If true generate verbose output when contacting
-  /// the inference server.
-  /// \return Error object indicating success or failure.
-  static Error Create(
-    std::unique_ptr<InferContext>* ctx, const std::string& server_url,
-    const std::string& model_name, int model_version = -1,
-    bool verbose = false);
-
-  Error Run(std::vector<std::unique_ptr<Result>>* results) override;
-  Error AsyncRun(std::shared_ptr<Request>* async_request) override;
-  Error GetAsyncRunResults(
-    std::vector<std::unique_ptr<Result>>* results,
-    const std::shared_ptr<Request>& async_request, bool wait) override;
-
- private:
-  InferGrpcContext(const std::string&, const std::string&, int, bool);
-
-  // @see InferContext.AsyncTransfer()
-  void AsyncTransfer() override;
-
-  // @see InferContext.PreRunProcessing()
-  Error PreRunProcessing(std::shared_ptr<Request>& request) override;
-
-  // additional vector contains 1-indexed key to available slots
-  // in async request map.
-  std::vector<uintptr_t> reusable_slot_;
-
-  // The producer-consumer queue used to communicate asynchronously with
-  // the gRPC runtime.
-  grpc::CompletionQueue async_request_completion_queue_;
-
-  // gRPC end point.
-  std::unique_ptr<GRPCService::Stub> stub_;
-
-  // request for gRPC call, one request object can be used for multiple calls
-  // since it can be overwritten as soon as the gRPC send finishes.
-  InferRequest request_;
-};
-
-//==============================================================================
-//// ProfileGrpcContext is the gRPC instantiation of ProfileContext.
-////
-class ProfileGrpcContext : public ProfileContext {
- public:
-  /// Create context that controls profiling on a server using gRPC
-  /// protocol.
-  /// \param ctx Returns the new ProfileContext object.
-  /// \param server_url The inference server name and port.
-  /// \param verbose If true generate verbose output when contacting
-  /// the inference server.
-  /// \return Error object indicating success or failure.
-  static Error Create(
-    std::unique_ptr<ProfileContext>* ctx, const std::string& server_url,
-    bool verbose = false);
-
- private:
-  ProfileGrpcContext(const std::string&, bool);
-  Error SendCommand(const std::string& cmd_str) override;
-
-  // gRPC end point.
-  std::unique_ptr<GRPCService::Stub> stub_;
-};
-
-//==============================================================================
-
-std::ostream& operator<<(std::ostream&, const Error&);
-
-template <typename T>
-Error
-InferContext::Result::GetRawAtCursor(size_t batch_idx, T* out)
-{
-  const uint8_t* buf;
-  Error err = GetRawAtCursor(batch_idx, &buf, sizeof(T));
-  if (!err.IsOk()) {
-    return err;
-  }
-
-  std::copy(buf, buf + sizeof(T), reinterpret_cast<uint8_t*>(out));
-  return Error::Success;
-}
-
-}}}  // namespace nvidia::inferenceserver::client
diff --git a/src/clients/c++/simple_client.cc b/src/clients/c++/simple_client.cc
deleted file mode 100644
index 779e820afb..0000000000
--- a/src/clients/c++/simple_client.cc
+++ /dev/null
@@ -1,196 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "src/clients/c++/request.h"
-
-#include <unistd.h>
-#include <iostream>
-#include <string>
-
-namespace ni = nvidia::inferenceserver;
-namespace nic = nvidia::inferenceserver::client;
-
-#define FAIL_IF_ERR(X, MSG)                                        \
-  {                                                                \
-    nic::Error err = (X);                                          \
-    if (!err.IsOk()) {                                             \
-      std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
-      exit(1);                                                     \
-    }                                                              \
-  }
-
-namespace {
-
-void
-Usage(char** argv, const std::string& msg = std::string())
-{
-  if (!msg.empty()) {
-    std::cerr << "error: " << msg << std::endl;
-  }
-
-  std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
-  std::cerr << "\t-v" << std::endl;
-  std::cerr << "\t-i <Protocol used to communicate with inference service>"
-            << std::endl;
-  std::cerr << "\t-u <URL for inference service>" << std::endl;
-  std::cerr << std::endl;
-  std::cerr
-    << "For -i, available protocols are 'grpc' and 'http'. Default is 'http."
-    << std::endl;
-
-  exit(1);
-}
-
-}  // namespace
-
-int
-main(int argc, char** argv)
-{
-  bool verbose = false;
-  std::string url("localhost:8000");
-  std::string protocol = "http";
-
-  // Parse commandline...
-  int opt;
-  while ((opt = getopt(argc, argv, "vi:u:")) != -1) {
-    switch (opt) {
-      case 'v':
-        verbose = true;
-        break;
-      case 'i':
-        protocol = optarg;
-        break;
-      case 'u':
-        url = optarg;
-        break;
-      case '?':
-        Usage(argv);
-        break;
-    }
-  }
-
-  nic::Error err;
-
-  // We use a simple model that takes 2 input tensors of 16 integers
-  // each and returns 2 output tensors of 16 integers each. One output
-  // tensor is the element-wise sum of the inputs and one output is
-  // the element-wise difference.
-  std::string model_name = "simple";
-
-  // Create the inference context for the model.
-  std::unique_ptr<nic::InferContext> ctx;
-  if (protocol == "http") {
-    err = nic::InferHttpContext::Create(
-      &ctx, url, model_name, -1 /* model_version */, verbose);
-  } else if (protocol == "grpc") {
-    err = nic::InferGrpcContext::Create(
-      &ctx, url, model_name, -1 /* model_version */, verbose);
-  } else {
-    Usage(argv, "unknown protocol '" + protocol + "'");
-  }
-
-  if (!err.IsOk()) {
-    std::cerr << "error: unable to create inference context: " << err
-              << std::endl;
-    exit(1);
-  }
-
-  // Set the context options to do batch-size 1 requests. Also request
-  // that all output tensors be returned.
-  std::unique_ptr<nic::InferContext::Options> options;
-  FAIL_IF_ERR(
-    nic::InferContext::Options::Create(&options),
-    "unable to create inference options");
-
-  options->SetBatchSize(1);
-  for (const auto& output : ctx->Outputs()) {
-    options->AddRawResult(output);
-  }
-
-  FAIL_IF_ERR(ctx->SetRunOptions(*options), "unable to set inference options");
-
-  // Create the data for the two input tensors. Initialize the first
-  // to unique integers and the second to all ones.
-  std::vector<int32_t> input0_data(16);
-  std::vector<int32_t> input1_data(16);
-  for (size_t i = 0; i < 16; ++i) {
-    input0_data[i] = i;
-    input1_data[i] = 1;
-  }
-
-  // Initialize the inputs with the data.
-  std::shared_ptr<nic::InferContext::Input> input0, input1;
-  FAIL_IF_ERR(ctx->GetInput("INPUT0", &input0), "unable to get INPUT0");
-  FAIL_IF_ERR(ctx->GetInput("INPUT1", &input1), "unable to get INPUT1");
-
-  FAIL_IF_ERR(input0->Reset(), "unable to reset INPUT0");
-  FAIL_IF_ERR(input1->Reset(), "unable to reset INPUT1");
-
-  FAIL_IF_ERR(
-    input0->SetRaw(
-      reinterpret_cast<uint8_t*>(&input0_data[0]), input0->ByteSize()),
-    "unable to set data for INPUT0");
-  FAIL_IF_ERR(
-    input1->SetRaw(
-      reinterpret_cast<uint8_t*>(&input1_data[0]), input1->ByteSize()),
-    "unable to set data for INPUT1");
-
-  // Send inference request to the inference server.
-  std::vector<std::unique_ptr<nic::InferContext::Result>> results;
-  FAIL_IF_ERR(ctx->Run(&results), "unable to run model");
-
-  // We expect there to be 2 results. Walk over all 16 result elements
-  // and print the sum and difference calculated by the model.
-  if (results.size() != 2) {
-    std::cerr << "error: expected 2 results, got " << results.size()
-              << std::endl;
-  }
-
-  for (size_t i = 0; i < 16; ++i) {
-    int32_t r0, r1;
-    FAIL_IF_ERR(
-      results[0]->GetRawAtCursor(0 /* batch idx */, &r0),
-      "unable to get OUTPUT0 result at idx " + std::to_string(i));
-    FAIL_IF_ERR(
-      results[1]->GetRawAtCursor(0 /* batch idx */, &r1),
-      "unable to get OUTPUT1 result at idx " + std::to_string(i));
-    std::cout << input0_data[i] << " + " << input1_data[i] << " = " << r0
-              << std::endl;
-    std::cout << input0_data[i] << " - " << input1_data[i] << " = " << r1
-              << std::endl;
-
-    if ((input0_data[i] + input1_data[i]) != r0) {
-      std::cerr << "error: incorrect sum" << std::endl;
-      exit(1);
-    }
-    if ((input0_data[i] - input1_data[i]) != r1) {
-      std::cerr << "error: incorrect difference" << std::endl;
-      exit(1);
-    }
-  }
-
-  return 0;
-}
diff --git a/src/clients/python/BUILD b/src/clients/python/BUILD
deleted file mode 100644
index b3ed6241d3..0000000000
--- a/src/clients/python/BUILD
+++ /dev/null
@@ -1,70 +0,0 @@
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-package(
-    default_visibility = ["//visibility:public"],
-)
-
-cc_library(
-    name = "crequest_base",
-    srcs = ["crequest.cc"],
-    hdrs = ["crequest.h"],
-    deps = [
-        "//src/clients/c++:request",
-    ],
-)
-
-cc_binary(
-    name = "libcrequest.so",
-    deps = [
-        ":crequest_base",
-        "//src/clients/c++:request",
-        "//src/core:api_proto",
-        "//src/core:grpc_service_proto",
-        "//src/core:model_config_proto",
-        "//src/core:request_status_proto",
-        "//src/core:server_status_proto",
-    ],
-    linkshared = 1,
-    linkopts = [
-        "-lcurl",
-        "-lz"
-    ],
-)
-
-sh_binary(
-    name = "build_pip",
-    srcs = ["build_pip.sh"],
-    data = [
-        "setup.py",
-        ":libcrequest.so",
-        "//src/core:api_proto_py_pb2",
-        "//src/core:grpc_service_proto_py_pb2",
-        "//src/core:model_config_proto_py_pb2",
-        "//src/core:request_status_proto_py_pb2",
-        "//src/core:server_status_proto_py_pb2",
-    ],
-)
diff --git a/src/clients/python/__init__.py b/src/clients/python/__init__.py
deleted file mode 100644
index eab109f10c..0000000000
--- a/src/clients/python/__init__.py
+++ /dev/null
@@ -1,968 +0,0 @@
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from builtins import range
-from enum import IntEnum
-from future.utils import iteritems
-from ctypes import *
-import numpy as np
-from numpy.ctypeslib import ndpointer
-import pkg_resources
-import tensorrtserver.api.model_config_pb2
-from tensorrtserver.api.server_status_pb2 import ServerStatus
-
-class _utf8(object):
-    @classmethod
-    def from_param(cls, value):
-        if value is None:
-            return None
-        elif isinstance(value, bytes):
-            return value
-        else:
-            return value.encode('utf8')
-
-_crequest_path = pkg_resources.resource_filename('tensorrtserver.api', 'libcrequest.so')
-_crequest = cdll.LoadLibrary(_crequest_path)
-
-_crequest_error_new = _crequest.ErrorNew
-_crequest_error_new.restype = c_void_p
-_crequest_error_new.argtypes = [_utf8]
-_crequest_error_del = _crequest.ErrorDelete
-_crequest_error_del.argtypes = [c_void_p]
-_crequest_error_isok = _crequest.ErrorIsOk
-_crequest_error_isok.restype = c_bool
-_crequest_error_isok.argtypes = [c_void_p]
-_crequest_error_isunavailable = _crequest.ErrorIsUnavailable
-_crequest_error_isunavailable.restype = c_bool
-_crequest_error_isunavailable.argtypes = [c_void_p]
-_crequest_error_msg = _crequest.ErrorMessage
-_crequest_error_msg.restype = c_char_p
-_crequest_error_msg.argtypes = [c_void_p]
-_crequest_error_serverid = _crequest.ErrorServerId
-_crequest_error_serverid.restype = c_char_p
-_crequest_error_serverid.argtypes = [c_void_p]
-_crequest_error_requestid = _crequest.ErrorRequestId
-_crequest_error_requestid.restype = c_int64
-_crequest_error_requestid.argtypes = [c_void_p]
-
-_crequest_health_ctx_new = _crequest.ServerHealthContextNew
-_crequest_health_ctx_new.restype = c_void_p
-_crequest_health_ctx_new.argtypes = [POINTER(c_void_p), _utf8, c_int, c_bool]
-_crequest_health_ctx_del = _crequest.ServerHealthContextDelete
-_crequest_health_ctx_del.argtypes = [c_void_p]
-_crequest_health_ctx_ready = _crequest.ServerHealthContextGetReady
-_crequest_health_ctx_ready.restype = c_void_p
-_crequest_health_ctx_ready.argtypes = [c_void_p, POINTER(c_bool)]
-_crequest_health_ctx_live = _crequest.ServerHealthContextGetLive
-_crequest_health_ctx_live.restype = c_void_p
-_crequest_health_ctx_live.argtypes = [c_void_p, POINTER(c_bool)]
-
-_crequest_status_ctx_new = _crequest.ServerStatusContextNew
-_crequest_status_ctx_new.restype = c_void_p
-_crequest_status_ctx_new.argtypes = [POINTER(c_void_p), _utf8, c_int, _utf8, c_bool]
-_crequest_status_ctx_del = _crequest.ServerStatusContextDelete
-_crequest_status_ctx_del.argtypes = [c_void_p]
-_crequest_status_ctx_get = _crequest.ServerStatusContextGetServerStatus
-_crequest_status_ctx_get.restype = c_void_p
-_crequest_status_ctx_get.argtypes = [c_void_p, POINTER(c_char_p), POINTER(c_uint32)]
-
-_crequest_infer_ctx_new = _crequest.InferContextNew
-_crequest_infer_ctx_new.restype = c_void_p
-_crequest_infer_ctx_new.argtypes = [POINTER(c_void_p), _utf8, c_int, _utf8, c_int, c_bool]
-_crequest_infer_ctx_del = _crequest.InferContextDelete
-_crequest_infer_ctx_del.argtypes = [c_void_p]
-_crequest_infer_ctx_set_options = _crequest.InferContextSetOptions
-_crequest_infer_ctx_set_options.restype = c_void_p
-_crequest_infer_ctx_set_options.argtypes = [c_void_p, c_void_p]
-_crequest_infer_ctx_run = _crequest.InferContextRun
-_crequest_infer_ctx_run.restype = c_void_p
-_crequest_infer_ctx_run.argtypes = [c_void_p]
-_crequest_infer_ctx_async_run = _crequest.InferContextAsyncRun
-_crequest_infer_ctx_async_run.restype = c_void_p
-_crequest_infer_ctx_async_run.argtypes = [c_void_p, POINTER(c_uint64)]
-_crequest_infer_ctx_get_async_run_results = _crequest.InferContextGetAsyncRunResults
-_crequest_infer_ctx_get_async_run_results.restype = c_void_p
-_crequest_infer_ctx_get_async_run_results.argtypes = [c_void_p, c_uint64, c_bool]
-_crequest_infer_ctx_get_ready_async_request = _crequest.InferContextGetReadyAsyncRequest
-_crequest_infer_ctx_get_ready_async_request.restype = c_void_p
-_crequest_infer_ctx_get_ready_async_request.argtypes = [c_void_p, POINTER(c_uint64), c_bool]
-
-_crequest_infer_ctx_options_new = _crequest.InferContextOptionsNew
-_crequest_infer_ctx_options_new.restype = c_void_p
-_crequest_infer_ctx_options_new.argtypes = [POINTER(c_void_p), c_uint64]
-_crequest_infer_ctx_options_del = _crequest.InferContextOptionsDelete
-_crequest_infer_ctx_options_del.argtypes = [c_void_p]
-_crequest_infer_ctx_options_add_raw = _crequest.InferContextOptionsAddRaw
-_crequest_infer_ctx_options_add_raw.restype = c_void_p
-_crequest_infer_ctx_options_add_raw.argtypes = [c_void_p, c_void_p, _utf8]
-_crequest_infer_ctx_options_add_class = _crequest.InferContextOptionsAddClass
-_crequest_infer_ctx_options_add_class.restype = c_void_p
-_crequest_infer_ctx_options_add_class.argtypes = [c_void_p, c_void_p, _utf8, c_uint64]
-
-_crequest_infer_ctx_input_new = _crequest.InferContextInputNew
-_crequest_infer_ctx_input_new.restype = c_void_p
-_crequest_infer_ctx_input_new.argtypes = [POINTER(c_void_p), c_void_p, _utf8]
-_crequest_infer_ctx_input_del = _crequest.InferContextInputDelete
-_crequest_infer_ctx_input_del.argtypes = [c_void_p]
-_crequest_infer_ctx_input_set_raw = _crequest.InferContextInputSetRaw
-_crequest_infer_ctx_input_set_raw.restype = c_void_p
-_crequest_infer_ctx_input_set_raw.argtypes = [c_void_p, c_void_p, c_uint64]
-
-_crequest_infer_ctx_result_new = _crequest.InferContextResultNew
-_crequest_infer_ctx_result_new.restype = c_void_p
-_crequest_infer_ctx_result_new.argtypes = [POINTER(c_void_p), c_void_p, _utf8]
-_crequest_infer_ctx_result_del = _crequest.InferContextResultDelete
-_crequest_infer_ctx_result_del.argtypes = [c_void_p]
-_crequest_infer_ctx_result_modelname = _crequest.InferContextResultModelName
-_crequest_infer_ctx_result_modelname.restype = c_void_p
-_crequest_infer_ctx_result_modelname.argtypes = [c_void_p, POINTER(c_char_p)]
-_crequest_infer_ctx_result_modelver = _crequest.InferContextResultModelVersion
-_crequest_infer_ctx_result_modelver.restype = c_void_p
-_crequest_infer_ctx_result_modelver.argtypes = [c_void_p, POINTER(c_uint32)]
-_crequest_infer_ctx_result_dtype = _crequest.InferContextResultDataType
-_crequest_infer_ctx_result_dtype.restype = c_void_p
-_crequest_infer_ctx_result_dtype.argtypes = [c_void_p, POINTER(c_uint32)]
-_crequest_infer_ctx_result_dims = _crequest.InferContextResultDims
-_crequest_infer_ctx_result_dims.restype = c_void_p
-_crequest_infer_ctx_result_dims.argtypes = [c_void_p, c_uint64,
-                                            ndpointer(c_uint32, flags="C_CONTIGUOUS"),
-                                            POINTER(c_uint64)]
-_crequest_infer_ctx_result_next_raw = _crequest.InferContextResultNextRaw
-_crequest_infer_ctx_result_next_raw.restype = c_void_p
-_crequest_infer_ctx_result_next_raw.argtypes = [c_void_p, c_uint64, POINTER(c_char_p),
-                                                POINTER(c_uint64)]
-_crequest_infer_ctx_result_class_cnt = _crequest.InferContextResultClassCount
-_crequest_infer_ctx_result_class_cnt.restype = c_void_p
-_crequest_infer_ctx_result_class_cnt.argtypes = [c_void_p, c_uint64, POINTER(c_uint64)]
-_crequest_infer_ctx_result_next_class = _crequest.InferContextResultNextClass
-_crequest_infer_ctx_result_next_class.restype = c_void_p
-_crequest_infer_ctx_result_next_class.argtypes = [c_void_p, c_uint64, POINTER(c_uint64),
-                                                  POINTER(c_float), POINTER(c_char_p)]
-
-
-def _raise_if_error(err):
-    """
-    Raise InferenceServerException if 'err' is non-success.
-    Otherwise return the request ID.
-    """
-    if err.value is not None:
-        ex = InferenceServerException(err)
-        isok = _crequest_error_isok(err)
-        _crequest_error_del(err)
-        if not isok:
-            raise ex
-        return ex.request_id()
-    return 0
-
-def _raise_error(msg):
-    err = c_void_p(_crequest_error_new(msg))
-    ex = InferenceServerException(err)
-    _crequest_error_del(err)
-    raise ex
-
-
-class ProtocolType(IntEnum):
-    """Protocol types supported by the client API
-
-    HTTP
-        The HTTP protocol.
-    GRPC
-        The GRPC protocol.
-
-    """
-    HTTP = 0
-    GRPC = 1
-
-    @classmethod
-    def from_str(cls, value):
-        """Convert a string to the corresponding ProtocolType.
-
-        Parameters
-        ----------
-        value : str
-            The string value to convert.
-
-        Returns
-        -------
-        ProtocolType
-            The ProtocolType corresponding to 'value'.
-
-        Raises
-        ------
-        Exception
-            If 'value' is an unknown protocol.
-
-        """
-        if value.lower() == 'http':
-            return ProtocolType.HTTP
-        elif value.lower() == 'grpc':
-            return ProtocolType.GRPC
-        raise Exception("unexpected protocol: " + value +
-                        ", expecting HTTP or gRPC")
-        return ProtocolType.HTTP
-
-class InferenceServerException(Exception):
-    """Exception indicating non-Success status.
-
-    Parameters
-    ----------
-    err : c_void_p
-        Pointer to an Error that should be used to initialize the exception.
-
-    """
-    def __init__(self, err):
-        self._msg = None
-        self._server_id = None
-        self._request_id = 0
-        if (err is not None) and (err.value is not None):
-            self._msg = _crequest_error_msg(err)
-            if self._msg is not None:
-                self._msg = self._msg.decode('utf-8')
-            self._server_id = _crequest_error_serverid(err)
-            if self._server_id is not None:
-                self._server_id = self._server_id.decode('utf-8')
-            self._request_id = _crequest_error_requestid(err)
-
-    def __str__(self):
-        msg = super().__str__() if self._msg is None else self._msg
-        if self._server_id is not None:
-            msg = '[' + self._server_id + ' ' + str(self._request_id) + '] ' + msg
-        return msg
-
-    def message(self):
-        """Get the exception message.
-
-        Returns
-        -------
-        str
-            The message associated with this exception, or None if no message.
-
-        """
-        return self._msg
-
-    def server_id(self):
-        """Get the ID of the server associated with this exception.
-
-        Returns
-        -------
-        str
-            The ID of the server associated with this exception, or
-            None if no server is associated.
-
-        """
-        return self._server_id
-
-    def request_id(self):
-        """Get the ID of the request with this exception.
-
-        Returns
-        -------
-        int
-            The ID of the request associated with this exception, or
-            0 (zero) if no request is associated.
-
-        """
-        return self._request_id
-
-class ServerHealthContext:
-    """Performs a health request to an inference server.
-
-    Parameters
-    ----------
-    url : str
-        The inference server URL, e.g. localhost:8000.
-
-    protocol : ProtocolType
-        The protocol used to communicate with the server.
-
-    verbose : bool
-        If True generate verbose output.
-
-    """
-    def __init__(self, url, protocol, verbose=False):
-        self._last_request_id = 0
-        self._ctx = c_void_p()
-        _raise_if_error(
-            c_void_p(
-                _crequest_health_ctx_new(
-                    byref(self._ctx), url, int(protocol), verbose)))
-
-    def __del__(self):
-        # when module is unloading may get called after
-        # _crequest_health_ctx_del has been released
-        if _crequest_health_ctx_del is not None:
-            self.close()
-
-    def __enter__(self):
-        return self
-
-    def __exit__(self, type, value, traceback):
-        self.close()
-
-    def close(self):
-        """Close the context. Any future calls to is_ready() or is_live() will
-        result in an Error.
-
-        """
-        _crequest_health_ctx_del(self._ctx)
-        self._ctx = None
-
-    def is_ready(self):
-        """Contact the inference server and get readiness.
-
-        Returns
-        -------
-        bool
-            True if server is ready, False if server is not ready.
-
-        Raises
-        ------
-        InferenceServerException
-            If unable to get readiness.
-
-        """
-        self._last_request_id = None
-        if self._ctx is None:
-            _raise_error("ServerHealthContext is closed")
-
-        cready = c_bool()
-        self._last_request_id = _raise_if_error(
-            c_void_p(_crequest_health_ctx_ready(self._ctx, byref(cready))))
-        return cready.value
-
-    def is_live(self):
-        """Contact the inference server and get liveness.
-
-        Returns
-        -------
-        bool
-            True if server is live, False if server is not live.
-
-        Raises
-        ------
-        InferenceServerException
-            If unable to get liveness.
-
-        """
-        self._last_request_id = None
-        if self._ctx is None:
-            _raise_error("ServerHealthContext is closed")
-
-        clive = c_bool()
-        self._last_request_id = _raise_if_error(
-            c_void_p(_crequest_health_ctx_live(self._ctx, byref(clive))))
-        return clive.value
-
-    def get_last_request_id(self):
-        """Get the request ID of the most recent is_ready() or is_live()
-        request.
-
-        Returns
-        -------
-        int
-            The request ID, or None if a request has not yet been made
-            or if the last request was not successful.
-
-        """
-        return self._last_request_id
-
-
-class ServerStatusContext:
-    """Performs a status request to an inference server.
-
-    A request can be made to get status for the server and all models
-    managed by the server, or to get status foronly a single model.
-
-    Parameters
-    ----------
-    url : str
-        The inference server URL, e.g. localhost:8000.
-
-    protocol : ProtocolType
-        The protocol used to communicate with the server.
-
-    model_name : str
-        The name of the model to get status for, or None to get status
-        for all models managed by the server.
-
-    verbose : bool
-        If True generate verbose output.
-
-    """
-    def __init__(self, url, protocol, model_name=None, verbose=False):
-        self._last_request_id = 0
-        self._ctx = c_void_p()
-        _raise_if_error(
-            c_void_p(
-                _crequest_status_ctx_new(
-                    byref(self._ctx), url, int(protocol), model_name, verbose)))
-
-    def __del__(self):
-        # when module is unloading may get called after
-        # _crequest_status_ctx_del has been released
-        if _crequest_status_ctx_del is not None:
-            self.close()
-
-    def __enter__(self):
-        return self
-
-    def __exit__(self, type, value, traceback):
-        self.close()
-
-    def close(self):
-        """Close the context. Any future calls to get_server_status() will
-        result in an Error.
-
-        """
-        _crequest_status_ctx_del(self._ctx)
-        self._ctx = None
-
-    def get_server_status(self):
-        """Contact the inference server and get status.
-
-        Returns
-        -------
-        ServerStatus
-            The ServerStatus protobuf containing the status.
-
-        Raises
-        ------
-        InferenceServerException
-            If unable to get status.
-
-        """
-        self._last_request_id = None
-        if self._ctx is None:
-            _raise_error("ServerStatusContext is closed")
-
-        cstatus = c_char_p()
-        cstatus_len = c_uint32()
-        self._last_request_id = _raise_if_error(
-            c_void_p(_crequest_status_ctx_get(
-                self._ctx, byref(cstatus), byref(cstatus_len))))
-        status_buf = cast(cstatus, POINTER(c_byte * cstatus_len.value))[0]
-
-        status = ServerStatus()
-        status.ParseFromString(status_buf)
-        return status
-
-    def get_last_request_id(self):
-        """Get the request ID of the most recent get_server_status() request.
-
-        Returns
-        -------
-        int
-            The request ID, or None if a request has not yet been made
-            or if the last request was not successful.
-
-        """
-        return self._last_request_id
-
-
-class InferContext:
-    """An InferContext object is used to run inference on an inference
-    server for a specific model.
-
-    Once created an InferContext object can be used repeatedly to
-    perform inference using the model.
-
-    Parameters
-    ----------
-    url : str
-        The inference server URL, e.g. localhost:8000.
-
-    protocol : ProtocolType
-        The protocol used to communicate with the server.
-
-    model_name : str
-        The name of the model to get status for, or None to get status
-        for all models managed by the server.
-
-    model_version : int
-        The version of the model to use for inference,
-        or None to indicate that the latest (i.e. highest version number)
-        version should be used.
-
-    verbose : bool
-        If True generate verbose output.
-
-    """
-    class ResultFormat:
-        """Formats for output tensor results.
-
-        RAW
-            All values of the output are returned as an numpy array
-            of the appropriate type.
-
-        CLASS
-            Specified as tuple (CLASS, k). Top 'k' results
-            are returned as an array of (index, value, label) tuples.
-
-        """
-        RAW = 1,
-        CLASS = 2
-
-    def __init__(self, url, protocol, model_name, model_version=None, verbose=False):
-        self._last_request_id = None
-        self._last_request_model_name = None
-        self._last_request_model_version = None
-        self._requested_outputs_dict = dict()
-        self._ctx = c_void_p()
-
-        imodel_version = -1 if model_version is None else model_version
-        _raise_if_error(
-            c_void_p(
-                _crequest_infer_ctx_new(
-                    byref(self._ctx), url, int(protocol),
-                    model_name, imodel_version, verbose)))
-
-    def __del__(self):
-        # when module is unloading may get called after
-        # _crequest_infer_ctx_del has been released
-        if _crequest_infer_ctx_del is not None:
-            self.close()
-
-    def __enter__(self):
-        return self
-
-    def __exit__(self, type, value, traceback):
-        self.close()
-
-    def _get_result_numpy_dtype(self, result):
-        ctype = c_uint32()
-        _raise_if_error(c_void_p(_crequest_infer_ctx_result_dtype(result, byref(ctype))))
-        if ctype.value == model_config_pb2.TYPE_BOOL:
-            return np.bool_
-        elif ctype.value == model_config_pb2.TYPE_UINT8:
-            return np.uint8
-        elif ctype.value == model_config_pb2.TYPE_UINT16:
-            return np.uint16
-        elif ctype.value == model_config_pb2.TYPE_UINT32:
-            return np.uint32
-        elif ctype.value == model_config_pb2.TYPE_UINT64:
-            return np.uint64
-        elif ctype.value == model_config_pb2.TYPE_INT8:
-            return np.int8
-        elif ctype.value == model_config_pb2.TYPE_INT16:
-            return np.int16
-        elif ctype.value == model_config_pb2.TYPE_INT32:
-            return np.int32
-        elif ctype.value == model_config_pb2.TYPE_INT64:
-            return np.int64
-        elif ctype.value == model_config_pb2.TYPE_FP16:
-            return np.float16
-        elif ctype.value == model_config_pb2.TYPE_FP32:
-            return np.float32
-        elif ctype.value == model_config_pb2.TYPE_FP64:
-            return np.float64
-        _raise_error("unknown result datatype " + ctype.value)
-
-    def _prepare_request(self, inputs, outputs, batch_size, contiguous_input_values):
-        # Make sure each input is given as a list (one entry per
-        # batch). It is a common error when using batch-size 1 to
-        # specify an input directly as an array instead of as a list
-        # containing one array.
-        for inp_name, inp in inputs.items():
-            if not isinstance(inp, (list, tuple)):
-                _raise_error("input '" + inp_name +
-                             "' values must be specified as a list or numpy arrays")
-
-        # Set run options using formats specified in 'outputs'
-        options = c_void_p()
-        try:
-            _raise_if_error(c_void_p(_crequest_infer_ctx_options_new(byref(options), batch_size)))
-
-            for (output_name, output_format) in iteritems(outputs):
-                if output_format == InferContext.ResultFormat.RAW:
-                    _raise_if_error(
-                        c_void_p(
-                            _crequest_infer_ctx_options_add_raw(self._ctx, options, output_name)))
-                elif (isinstance(output_format, (list, tuple)) and
-                      (output_format[0] == InferContext.ResultFormat.CLASS)):
-                    _raise_if_error(
-                        c_void_p(
-                            _crequest_infer_ctx_options_add_class(
-                                self._ctx, options, output_name, c_uint64(output_format[1]))))
-                else:
-                    _raise_error("unrecognized output format")
-
-            _raise_if_error(c_void_p(_crequest_infer_ctx_set_options(self._ctx, options)))
-
-        finally:
-            _crequest_infer_ctx_options_del(options)
-
-        # Set the input values in the provided 'contiguous_input_values'
-        for (input_name, input_values) in iteritems(inputs):
-            input = c_void_p()
-            try:
-                _raise_if_error(
-                    c_void_p(_crequest_infer_ctx_input_new(byref(input), self._ctx, input_name)))
-
-                for input_value in input_values:
-                    if not input_value.flags['C_CONTIGUOUS']:
-                        input_value = np.ascontiguousarray(input_value)
-                    contiguous_input_values.append(input_value)
-                    _raise_if_error(
-                        c_void_p(
-                            _crequest_infer_ctx_input_set_raw(
-                                input, input_value.ctypes.data_as(c_void_p),
-                                c_uint64(input_value.size * input_value.itemsize))))
-            finally:
-                _crequest_infer_ctx_input_del(input)
-
-    def _get_results(self, outputs, batch_size):
-        # Create the result map.
-        results = dict()
-        for (output_name, output_format) in iteritems(outputs):
-            result = c_void_p()
-            try:
-                _raise_if_error(
-                    c_void_p(_crequest_infer_ctx_result_new(byref(result), self._ctx, output_name)))
-
-                # The model name and version are the same for every
-                # result so only set once
-                if self._last_request_model_name is None:
-                    cmodelname = c_char_p()
-                    _raise_if_error(
-                        c_void_p(
-                            _crequest_infer_ctx_result_modelname(result, byref(cmodelname))))
-                    if cmodelname.value is not None:
-                        self._last_request_model_name = cmodelname.value.decode('utf-8')
-                if self._last_request_model_version is None:
-                    cmodelver = c_uint32()
-                    _raise_if_error(
-                        c_void_p(
-                            _crequest_infer_ctx_result_modelver(result, byref(cmodelver))))
-                    self._last_request_model_version = cmodelver.value
-
-                result_dtype = self._get_result_numpy_dtype(result)
-                results[output_name] = list()
-                if output_format == InferContext.ResultFormat.RAW:
-                    for b in range(batch_size):
-                        # Get the result value into a 1-dim np array
-                        # of the appropriate type
-                        cval = c_char_p()
-                        cval_len = c_uint64()
-                        _raise_if_error(
-                            c_void_p(
-                                _crequest_infer_ctx_result_next_raw(
-                                    result, b, byref(cval), byref(cval_len))))
-                        val_buf = cast(cval, POINTER(c_byte * cval_len.value))[0]
-                        val = np.frombuffer(val_buf, dtype=result_dtype)
-                        # Reshape the result to the appropriate shape
-                        max_shape_dims = 16
-                        shape = np.zeros(max_shape_dims, dtype=np.uint32)
-                        shape_len = c_uint64()
-                        _raise_if_error(
-                            c_void_p(
-                                _crequest_infer_ctx_result_dims(
-                                    result, c_uint64(max_shape_dims),
-                                    shape, byref(shape_len))))
-                        shaped = np.reshape(np.copy(val), np.resize(shape, shape_len.value).tolist())
-                        results[output_name].append(shaped)
-
-                elif (isinstance(output_format, (list, tuple)) and
-                      (output_format[0] == InferContext.ResultFormat.CLASS)):
-                    for b in range(batch_size):
-                        classes = list()
-                        ccnt = c_uint64()
-                        _raise_if_error(
-                           c_void_p(_crequest_infer_ctx_result_class_cnt(result, b, byref(ccnt))))
-                        for cc in range(ccnt.value):
-                            cidx = c_uint64()
-                            cprob = c_float()
-                            clabel = c_char_p()
-                            _raise_if_error(
-                                c_void_p(
-                                    _crequest_infer_ctx_result_next_class(
-                                        result, b, byref(cidx), byref(cprob), byref(clabel))))
-                            label = None if clabel.value is None else clabel.value.decode('utf-8')
-                            classes.append((cidx.value, cprob.value, label))
-                        results[output_name].append(classes)
-                else:
-                    _raise_error("unrecognized output format")
-            finally:
-                _crequest_infer_ctx_result_del(result)
-
-        return results
-
-    def close(self):
-        """Close the context. Any future calls to object will result in an
-        Error.
-
-        """
-        _crequest_infer_ctx_del(self._ctx)
-        self._ctx = None
-
-    def run(self, inputs, outputs, batch_size=1):
-        """Run inference using the supplied 'inputs' to calculate the outputs
-        specified by 'outputs'.
-
-        Parameters
-        ----------
-        inputs : dict
-            Dictionary from input name to the value(s) for that
-            input. An input value is specified as a numpy array. Each
-            input in the dictionary maps to a list of values (i.e. a
-            list of numpy array objects), where the length of the list
-            must equal the 'batch_size'.
-
-        outputs : dict
-            Dictionary from output name to a value indicating the
-            ResultFormat that should be used for that output. For RAW
-            the value should be ResultFormat.RAW. For CLASS the value
-            should be a tuple (ResultFormat.CLASS, k), where 'k'
-            indicates how many classification results should be
-            returned for the output.
-
-        batch_size : int
-            The batch size of the inference. Each input must provide
-            an appropriately sized batch of inputs.
-
-        Returns
-        -------
-        dict
-            A dictionary from output name to the list of values for
-            that output (one list element for each entry of the
-            batch). The format of a value returned for an output
-            depends on the output format specified in 'outputs'. For
-            format RAW a value is a numpy array of the appropriate
-            type and shape for the output. For format CLASS a value is
-            the top 'k' output values returned as an array of (class
-            index, class value, class label) tuples.
-
-        Raises
-        ------
-        InferenceServerException
-            If all inputs are not specified, if the size of input data
-            does not match expectations, if unknown output names are
-            specified or if server fails to perform inference.
-
-        """
-        self._last_request_id = None
-        self._last_request_model_name = None
-        self._last_request_model_version = None
-
-        # The input values must be contiguous and the lifetime of those
-        # contiguous copies must span until the inference completes
-        # so grab a reference to them at this scope.
-        contiguous_input = list()
-
-        # Set run option and input values
-        self._prepare_request(inputs, outputs, batch_size, contiguous_input)
-
-        # Run inference...
-        self._last_request_id = _raise_if_error(c_void_p(_crequest_infer_ctx_run(self._ctx)))
-
-        return self._get_results(outputs, batch_size)
-
-    def async_run(self, inputs, outputs, batch_size=1):
-        """Run inference using the supplied 'inputs' to calculate the outputs
-        specified by 'outputs'.
-
-        Unlike run(), async_run() returns immediately after sending
-        the inference request to the server. The returned integer
-        identifier must be used subsequently to wait on and retrieve
-        the actual inference results.
-
-        Parameters
-        ----------
-        inputs : dict
-            Dictionary from input name to the value(s) for that
-            input. An input value is specified as a numpy array. Each
-            input in the dictionary maps to a list of values (i.e. a
-            list of numpy array objects), where the length of the list
-            must equal the 'batch_size'.
-
-        outputs : dict
-            Dictionary from output name to a value indicating the
-            ResultFormat that should be used for that output. For RAW
-            the value should be ResultFormat.RAW. For CLASS the value
-            should be a tuple (ResultFormat.CLASS, k), where 'k'
-            indicates how many classification results should be
-            returned for the output.
-
-        batch_size : int
-            The batch size of the inference. Each input must provide
-            an appropriately sized batch of inputs.
-
-
-        Returns
-        -------
-        int
-            Integer identifier which must be passed to
-            get_async_run_results() to wait on and retrieve the
-            inference results.
-
-        Raises
-        ------
-        InferenceServerException
-            If all inputs are not specified, if the size of input data
-            does not match expectations, if unknown output names are
-            specified or if server fails to perform inference.
-
-        """
-        # Same situation as in run(), but the list will be kept inside
-        # the object given that the request is asynchronous
-        contiguous_input = list()
-
-        # Set run option and input values
-        self._prepare_request(inputs, outputs, batch_size, contiguous_input)
-
-        # Run asynchronous inference...
-        c_request_id = c_uint64()
-        _raise_if_error(
-            c_void_p(
-                _crequest_infer_ctx_async_run(self._ctx, byref(c_request_id))))
-
-        self._requested_outputs_dict[c_request_id.value] = (outputs, batch_size, contiguous_input)
-
-        return c_request_id.value
-
-    def get_async_run_results(self, request_id, wait):
-        """Retrieve the results of a previous async_run() using the supplied
-        'request_id'
-
-        Parameters
-        ----------
-        request_id : int
-            The integer ID of the asynchronous request returned by async_run().
-
-        wait : bool
-            If True block until the request results are ready. If False return
-            immediately even if results are not ready.
-
-        Returns
-        -------
-        dict
-            None if the results are not ready and 'wait' is False. A
-            dictionary from output name to the list of values for that
-            output (one list element for each entry of the batch). The
-            format of a value returned for an output depends on the
-            output format specified in 'outputs'. For format RAW a
-            value is a numpy array of the appropriate type and shape
-            for the output. For format CLASS a value is the top 'k'
-            output values returned as an array of (class index, class
-            value, class label) tuples.
-
-        Raises
-        ------
-        InferenceServerException
-            If the request ID supplied is not valid, or if the server
-            fails to perform inference.
-
-        """
-        # Get async run results
-        err = c_void_p(_crequest_infer_ctx_get_async_run_results(
-            self._ctx, request_id, wait))
-
-        if not wait:
-            isunavailable = _crequest_error_isunavailable(err)
-            if isunavailable:
-                _crequest_error_del(err)
-                return None
-
-        self._last_request_id = _raise_if_error(err)
-
-        requested_outputs = self._requested_outputs_dict[request_id]
-        del self._requested_outputs_dict[request_id]
-
-        return self._get_results(requested_outputs[0], requested_outputs[1])
-
-    def get_ready_async_request(self, wait):
-        """Get the request ID of an async_run() request that has completed but
-        not yet had results read with get_async_run_results().
-
-        Parameters
-        ----------
-        wait : bool
-            If True block until an async request is ready. If False return
-            immediately even if results are not ready.
-
-        Returns
-        -------
-        int
-            None if no asynchronous results are ready and 'wait' is
-            False. An integer identifier which must be passed to
-            get_async_run_results() to wait on and retrieve the
-            inference results.
-
-        Raises
-        ------
-        InferenceServerException
-            If no asynchronous request is in flight or completed.
-
-        """
-        # Get async run results
-        c_request_id = c_uint64()
-        err = c_void_p(_crequest_infer_ctx_get_ready_async_request(
-            self._ctx, byref(c_request_id), wait))
-
-        if not wait:
-            isunavailable = _crequest_error_isunavailable(err)
-            if isunavailable:
-                _crequest_error_del(err)
-                return None
-
-        _raise_if_error(err)
-
-        return c_request_id.value
-
-    def get_last_request_id(self):
-        """Get the request ID of the most recent run() request.
-
-        Returns
-        -------
-        int
-            The request ID, or None if a request has not yet been made
-            or if the last request was not successful.
-
-        """
-        return self._last_request_id
-
-    def get_last_request_model_name(self):
-        """Get the model name used in the most recent run() request.
-
-        Returns
-        -------
-        str
-            The model name, or None if a request has not yet been made
-            or if the last request was not successful.
-
-        """
-        return self._last_request_model_name
-
-    def get_last_request_model_version(self):
-        """Get the model version used in the most recent run() request.
-
-        Returns
-        -------
-        int
-            The model version, or None if a request has not yet been made
-            or if the last request was not successful.
-
-        """
-        return self._last_request_model_version
diff --git a/src/clients/python/build_pip.sh b/src/clients/python/build_pip.sh
deleted file mode 100755
index 9b1b1837d4..0000000000
--- a/src/clients/python/build_pip.sh
+++ /dev/null
@@ -1,101 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#!/bin/bash
-# Copyright 2017 Google Inc. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-set -e
-
-function main() {
-  if [[ $# -lt 1 ]] ; then
-    echo "usage: $0 <destination dir>"
-    exit 1
-  fi
-
-  if [[ ! -d "bazel-bin/src/clients/python" ]]; then
-    echo "Could not find bazel-bin/src/clients/python"
-    exit 1
-  fi
-
-  if [[ ! -f "VERSION" ]]; then
-    echo "Could not find VERSION"
-    exit 1
-  fi
-
-  VERSION=`cat VERSION`
-  DEST="$1"
-  TMPDIR="$(mktemp -d)"
-
-  echo $(date) : "=== Using tmpdir: ${TMPDIR}"
-  mkdir -p ${TMPDIR}/tensorrtserver/api
-
-  echo "Adding package files"
-  cp bazel-genfiles/src/core/*_pb2.py \
-    "${TMPDIR}/tensorrtserver/api/."
-
-  cp bazel-genfiles/src/core/*_grpc.py \
-    "${TMPDIR}/tensorrtserver/api/."
-
-  cp bazel-bin/src/clients/python/libcrequest.so \
-    "${TMPDIR}/tensorrtserver/api/."
-
-  cp src/clients/python/__init__.py \
-    "${TMPDIR}/tensorrtserver/api/."
-
-  cp src/clients/python/setup.py "${TMPDIR}"
-	touch ${TMPDIR}/tensorrtserver/__init__.py
-
-  # Use 'sed' command to fix protoc compiled imports (see
-  # https://github.com/google/protobuf/issues/1491).
-	sed -i "s/^from src\.core import \([^ ]*\)_pb2 as \([^ ]*\)$/from tensorrtserver.api import \1_pb2 as \2/" \
-    ${TMPDIR}/tensorrtserver/api/*_pb2.py
-	sed -i "s/^from src\.core import \([^ ]*\)_pb2 as \([^ ]*\)$/from tensorrtserver.api import \1_pb2 as \2/" \
-    ${TMPDIR}/tensorrtserver/api/*_pb2_grpc.py
-
-  pushd "${TMPDIR}"
-  echo $(date) : "=== Building wheel"
-  VERSION=$VERSION python${PYVER} setup.py bdist_wheel # >/dev/null
-  mkdir -p "${DEST}"
-  cp dist/* "${DEST}"
-  popd
-  rm -rf "${TMPDIR}"
-  echo $(date) : "=== Output wheel file is in: ${DEST}"
-}
-
-main "$@"
diff --git a/src/clients/python/crequest.cc b/src/clients/python/crequest.cc
deleted file mode 100644
index 75741adac9..0000000000
--- a/src/clients/python/crequest.cc
+++ /dev/null
@@ -1,577 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "src/clients/python/crequest.h"
-
-#include <iostream>
-
-namespace ni = nvidia::inferenceserver;
-namespace nic = nvidia::inferenceserver::client;
-
-//==============================================================================
-nic::Error*
-ErrorNew(const char* msg)
-{
-  return new nic::Error(ni::RequestStatusCode::INTERNAL, std::string(msg));
-}
-
-void
-ErrorDelete(nic::Error* ctx)
-{
-  delete ctx;
-}
-
-bool
-ErrorIsOk(nic::Error* ctx)
-{
-  return ctx->IsOk();
-}
-
-bool
-ErrorIsUnavailable(nic::Error* ctx)
-{
-  return (ctx->Code() == ni::RequestStatusCode::UNAVAILABLE);
-}
-
-const char*
-ErrorMessage(nic::Error* ctx)
-{
-  return ctx->Message().c_str();
-}
-
-const char*
-ErrorServerId(nic::Error* ctx)
-{
-  return ctx->ServerId().c_str();
-}
-
-uint64_t
-ErrorRequestId(nic::Error* ctx)
-{
-  return ctx->RequestId();
-}
-
-//==============================================================================
-namespace {
-
-enum ProtocolType { HTTP = 0, GRPC = 1 };
-
-nic::Error
-ParseProtocol(ProtocolType* protocol, const int protocol_int)
-{
-  if (protocol_int == 0) {
-    *protocol = ProtocolType::HTTP;
-    return nic::Error::Success;
-  } else if (protocol_int == 1) {
-    *protocol = ProtocolType::GRPC;
-    return nic::Error::Success;
-  }
-  return nic::Error(
-    ni::RequestStatusCode::INVALID_ARG,
-    "unexpected protocol integer, expecting 0 for HTTP or 1 for gRPC");
-}
-
-}  // namespace
-
-//==============================================================================
-struct ServerHealthContextCtx {
-  std::unique_ptr<nic::ServerHealthContext> ctx;
-};
-
-nic::Error*
-ServerHealthContextNew(
-  ServerHealthContextCtx** ctx, const char* url, int protocol_int, bool verbose)
-{
-  nic::Error err;
-  ProtocolType protocol;
-  err = ParseProtocol(&protocol, protocol_int);
-  if (err.IsOk()) {
-    ServerHealthContextCtx* lctx = new ServerHealthContextCtx;
-    if (protocol == ProtocolType::HTTP) {
-      err = nic::ServerHealthHttpContext::Create(
-        &(lctx->ctx), std::string(url), verbose);
-    } else {
-      err = nic::ServerHealthGrpcContext::Create(
-        &(lctx->ctx), std::string(url), verbose);
-    }
-
-    if (err.IsOk()) {
-      *ctx = lctx;
-      return nullptr;
-    }
-
-    delete lctx;
-  }
-
-  *ctx = nullptr;
-  return new nic::Error(err);
-}
-
-void
-ServerHealthContextDelete(ServerHealthContextCtx* ctx)
-{
-  delete ctx;
-}
-
-nic::Error*
-ServerHealthContextGetReady(ServerHealthContextCtx* ctx, bool* ready)
-{
-  nic::Error err = ctx->ctx->GetReady(ready);
-  if (err.IsOk()) {
-    return nullptr;
-  }
-
-  return new nic::Error(err);
-}
-
-nic::Error*
-ServerHealthContextGetLive(ServerHealthContextCtx* ctx, bool* live)
-{
-  nic::Error err = ctx->ctx->GetLive(live);
-  if (err.IsOk()) {
-    return nullptr;
-  }
-
-  return new nic::Error(err);
-}
-
-//==============================================================================
-struct ServerStatusContextCtx {
-  std::unique_ptr<nic::ServerStatusContext> ctx;
-  std::string status_buf;
-};
-
-nic::Error*
-ServerStatusContextNew(
-  ServerStatusContextCtx** ctx, const char* url, int protocol_int,
-  const char* model_name, bool verbose)
-{
-  nic::Error err;
-  ProtocolType protocol;
-  err = ParseProtocol(&protocol, protocol_int);
-  if (err.IsOk()) {
-    ServerStatusContextCtx* lctx = new ServerStatusContextCtx;
-    if (model_name == nullptr) {
-      if (protocol == ProtocolType::HTTP) {
-        err = nic::ServerStatusHttpContext::Create(
-          &(lctx->ctx), std::string(url), verbose);
-      } else {
-        err = nic::ServerStatusGrpcContext::Create(
-          &(lctx->ctx), std::string(url), verbose);
-      }
-    } else {
-      if (protocol == ProtocolType::HTTP) {
-        err = nic::ServerStatusHttpContext::Create(
-          &(lctx->ctx), std::string(url), std::string(model_name), verbose);
-      } else {
-        err = nic::ServerStatusGrpcContext::Create(
-          &(lctx->ctx), std::string(url), std::string(model_name), verbose);
-      }
-    }
-
-    if (err.IsOk()) {
-      *ctx = lctx;
-      return nullptr;
-    }
-
-    delete lctx;
-  }
-
-  *ctx = nullptr;
-  return new nic::Error(err);
-}
-
-void
-ServerStatusContextDelete(ServerStatusContextCtx* ctx)
-{
-  delete ctx;
-}
-
-nic::Error*
-ServerStatusContextGetServerStatus(
-  ServerStatusContextCtx* ctx, char** status, uint32_t* status_len)
-{
-  ctx->status_buf.clear();
-
-  ni::ServerStatus server_status;
-  nic::Error err = ctx->ctx->GetServerStatus(&server_status);
-  if (err.IsOk()) {
-    if (server_status.SerializeToString(&ctx->status_buf)) {
-      *status = &ctx->status_buf[0];
-      *status_len = ctx->status_buf.size();
-    } else {
-      err = nic::Error(
-        ni::RequestStatusCode::INTERNAL, "failed to parse server status");
-    }
-  }
-
-  return new nic::Error(err);
-}
-
-//==============================================================================
-struct InferContextCtx {
-  std::unique_ptr<nic::InferContext> ctx;
-  std::vector<std::unique_ptr<nic::InferContext::Result>> results;
-  std::vector<std::shared_ptr<nic::InferContext::Request>> requests;
-};
-
-nic::Error*
-InferContextNew(
-  InferContextCtx** ctx, const char* url, int protocol_int,
-  const char* model_name, int model_version, bool verbose)
-{
-  nic::Error err;
-  ProtocolType protocol;
-  err = ParseProtocol(&protocol, protocol_int);
-  if (err.IsOk()) {
-    InferContextCtx* lctx = new InferContextCtx;
-    if (protocol == ProtocolType::HTTP) {
-      err = nic::InferHttpContext::Create(
-        &(lctx->ctx), std::string(url), std::string(model_name), model_version,
-        verbose);
-    } else {
-      err = nic::InferGrpcContext::Create(
-        &(lctx->ctx), std::string(url), std::string(model_name), model_version,
-        verbose);
-    }
-
-    if (err.IsOk()) {
-      *ctx = lctx;
-      return nullptr;
-    }
-    delete lctx;
-  }
-
-  *ctx = nullptr;
-  return new nic::Error(err);
-}
-
-void
-InferContextDelete(InferContextCtx* ctx)
-{
-  delete ctx;
-}
-
-nic::Error*
-InferContextSetOptions(
-  InferContextCtx* ctx, nic::InferContext::Options* options)
-{
-  nic::Error err = ctx->ctx->SetRunOptions(*options);
-  return new nic::Error(err);
-}
-
-nic::Error*
-InferContextRun(InferContextCtx* ctx)
-{
-  ctx->results.clear();
-  nic::Error err = ctx->ctx->Run(&ctx->results);
-  return new nic::Error(err);
-}
-
-nic::Error*
-InferContextAsyncRun(InferContextCtx* ctx, size_t* request_id)
-{
-  std::shared_ptr<nic::InferContext::Request> request;
-  nic::Error err = ctx->ctx->AsyncRun(&request);
-  ctx->requests.push_back(request);
-  *request_id = request->Id();
-  return new nic::Error(err);
-}
-
-nic::Error*
-InferContextGetAsyncRunResults(
-  InferContextCtx* ctx, size_t request_id, bool wait)
-{
-  for (auto itr = ctx->requests.begin(); itr != ctx->requests.end(); itr++) {
-    if ((*itr)->Id() == request_id) {
-      ctx->results.clear();
-      nic::Error err = ctx->ctx->GetAsyncRunResults(&ctx->results, *itr, wait);
-      if (err.IsOk()) {
-        ctx->requests.erase(itr);
-      }
-      return new nic::Error(err);
-    }
-  }
-  return new nic::Error(
-    ni::RequestStatusCode::INVALID_ARG,
-    "The request ID doesn't match any existing asynchrnous requests");
-}
-
-nic::Error*
-InferContextGetReadyAsyncRequest(
-  InferContextCtx* ctx, size_t* request_id, bool wait)
-{
-  // Here we assume that all asynchronous request is created by calling
-  // InferContextAsyncRun(). Thus we don't need to check ctx->requests.
-  std::shared_ptr<nic::InferContext::Request> request;
-  nic::Error err = ctx->ctx->GetReadyAsyncRequest(&request, wait);
-  *request_id = request->Id();
-  return new nic::Error(err);
-}
-
-//==============================================================================
-nic::Error*
-InferContextOptionsNew(nic::InferContext::Options** ctx, uint64_t batch_size)
-{
-  std::unique_ptr<nic::InferContext::Options> uctx;
-  nic::Error err = nic::InferContext::Options::Create(&uctx);
-  if (err.IsOk()) {
-    *ctx = uctx.release();
-    (*ctx)->SetBatchSize(batch_size);
-    return nullptr;
-  }
-
-  *ctx = nullptr;
-  return new nic::Error(err);
-}
-
-void
-InferContextOptionsDelete(nic::InferContext::Options* ctx)
-{
-  delete ctx;
-}
-
-nic::Error*
-InferContextOptionsAddRaw(
-  InferContextCtx* infer_ctx, nic::InferContext::Options* ctx,
-  const char* output_name)
-{
-  std::shared_ptr<nic::InferContext::Output> output;
-  nic::Error err = infer_ctx->ctx->GetOutput(std::string(output_name), &output);
-  if (err.IsOk()) {
-    err = ctx->AddRawResult(output);
-  }
-
-  return new nic::Error(err);
-}
-
-nic::Error*
-InferContextOptionsAddClass(
-  InferContextCtx* infer_ctx, nic::InferContext::Options* ctx,
-  const char* output_name, uint64_t count)
-{
-  std::shared_ptr<nic::InferContext::Output> output;
-  nic::Error err = infer_ctx->ctx->GetOutput(std::string(output_name), &output);
-  if (err.IsOk()) {
-    err = ctx->AddClassResult(output, count);
-  }
-
-  return new nic::Error(err);
-}
-
-//==============================================================================
-struct InferContextInputCtx {
-  std::shared_ptr<nic::InferContext::Input> input;
-};
-
-nic::Error*
-InferContextInputNew(
-  InferContextInputCtx** ctx, InferContextCtx* infer_ctx,
-  const char* input_name)
-{
-  InferContextInputCtx* lctx = new InferContextInputCtx;
-  nic::Error err =
-    infer_ctx->ctx->GetInput(std::string(input_name), &lctx->input);
-  if (err.IsOk()) {
-    lctx->input->Reset();
-  }
-  *ctx = lctx;
-  return new nic::Error(err);
-}
-
-void
-InferContextInputDelete(InferContextInputCtx* ctx)
-{
-  delete ctx;
-}
-
-nic::Error*
-InferContextInputSetRaw(
-  InferContextInputCtx* ctx, const void* data, uint64_t byte_size)
-{
-  nic::Error err =
-    ctx->input->SetRaw(reinterpret_cast<const uint8_t*>(data), byte_size);
-  return new nic::Error(err);
-}
-
-//==============================================================================
-struct InferContextResultCtx {
-  std::unique_ptr<nic::InferContext::Result> result;
-  nic::InferContext::Result::ClassResult cr;
-};
-
-nic::Error*
-InferContextResultNew(
-  InferContextResultCtx** ctx, InferContextCtx* infer_ctx,
-  const char* result_name)
-{
-  InferContextResultCtx* lctx = new InferContextResultCtx;
-  for (auto& r : infer_ctx->results) {
-    if ((r != nullptr) && (r->GetOutput()->Name() == result_name)) {
-      lctx->result.swap(r);
-      break;
-    }
-  }
-
-  if (lctx->result == nullptr) {
-    return new nic::Error(
-      ni::RequestStatusCode::INTERNAL,
-      "unable to find result for output '" + std::string(result_name) + "'");
-  }
-
-  *ctx = lctx;
-  return nullptr;
-}
-
-void
-InferContextResultDelete(InferContextResultCtx* ctx)
-{
-  delete ctx;
-}
-
-nic::Error*
-InferContextResultModelName(InferContextResultCtx* ctx, const char** model_name)
-{
-  if (ctx->result == nullptr) {
-    return new nic::Error(
-      ni::RequestStatusCode::INTERNAL,
-      "model name not available for empty result");
-  }
-
-  *model_name = ctx->result->ModelName().c_str();
-  return nullptr;
-}
-
-nic::Error*
-InferContextResultModelVersion(
-  InferContextResultCtx* ctx, uint32_t* model_version)
-{
-  if (ctx->result == nullptr) {
-    return new nic::Error(
-      ni::RequestStatusCode::INTERNAL,
-      "model version not available for empty result");
-  }
-
-  *model_version = ctx->result->ModelVersion();
-  return nullptr;
-}
-
-nic::Error*
-InferContextResultDataType(InferContextResultCtx* ctx, uint32_t* dtype)
-{
-  if (ctx->result == nullptr) {
-    return new nic::Error(
-      ni::RequestStatusCode::INTERNAL,
-      "datatype not available for empty result");
-  }
-
-  ni::DataType data_type = ctx->result->GetOutput()->DType();
-  *dtype = static_cast<uint32_t>(data_type);
-
-  return nullptr;
-}
-
-nic::Error*
-InferContextResultDims(
-  InferContextResultCtx* ctx, uint64_t max_dims, uint32_t* shape,
-  uint64_t* shape_len)
-{
-  if (ctx->result == nullptr) {
-    return new nic::Error(
-      ni::RequestStatusCode::INTERNAL, "dims not available for empty result");
-  }
-
-  const ni::DimsList& dims = ctx->result->GetOutput()->Dims();
-  if (static_cast<uint64_t>(dims.size()) > max_dims) {
-    return new nic::Error(
-      ni::RequestStatusCode::INTERNAL,
-      "number of result dims exceeds maximum of " + std::to_string(max_dims));
-  }
-
-  size_t cnt = 0;
-  for (auto dim : dims) {
-    shape[cnt++] = static_cast<uint32_t>(dim);
-  }
-
-  *shape_len = dims.size();
-
-  return nullptr;
-}
-
-nic::Error*
-InferContextResultNextRaw(
-  InferContextResultCtx* ctx, size_t batch_idx, const char** val,
-  uint64_t* val_len)
-{
-  if (ctx->result == nullptr) {
-    return new nic::Error(
-      ni::RequestStatusCode::INTERNAL,
-      "no raw result available for empty result");
-  }
-
-  const std::vector<uint8_t>* buf;
-  nic::Error err = ctx->result->GetRaw(batch_idx, &buf);
-  if (err.IsOk()) {
-    *val = reinterpret_cast<const char*>(&((*buf)[0]));
-    *val_len = buf->size();
-  }
-
-  return new nic::Error(err);
-}
-
-nic::Error*
-InferContextResultClassCount(
-  InferContextResultCtx* ctx, size_t batch_idx, uint64_t* count)
-{
-  if (ctx->result == nullptr) {
-    return new nic::Error(
-      ni::RequestStatusCode::INTERNAL, "no classes available for empty result");
-  }
-
-  nic::Error err = ctx->result->GetClassCount(batch_idx, count);
-  return new nic::Error(err);
-}
-
-nic::Error*
-InferContextResultNextClass(
-  InferContextResultCtx* ctx, size_t batch_idx, uint64_t* idx, float* prob,
-  const char** label)
-{
-  if (ctx->result == nullptr) {
-    return new nic::Error(
-      ni::RequestStatusCode::INTERNAL, "no classes available for empty result");
-  }
-
-  nic::Error err = ctx->result->GetClassAtCursor(batch_idx, &ctx->cr);
-  if (err.IsOk()) {
-    auto& cr = ctx->cr;
-    *idx = cr.idx;
-    *prob = cr.value;
-    *label = cr.label.c_str();
-  }
-
-  return new nic::Error(err);
-}
diff --git a/src/clients/python/crequest.h b/src/clients/python/crequest.h
deleted file mode 100644
index e2d2cc5131..0000000000
--- a/src/clients/python/crequest.h
+++ /dev/null
@@ -1,134 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <stddef.h>
-#include "src/clients/c++/request.h"
-
-namespace nic = nvidia::inferenceserver::client;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-//==============================================================================
-// Error
-nic::Error* ErrorNew(const char* msg);
-void ErrorDelete(nic::Error* ctx);
-bool ErrorIsOk(nic::Error* ctx);
-bool ErrorIsUnavailable(nic::Error* ctx);
-const char* ErrorMessage(nic::Error* ctx);
-const char* ErrorServerId(nic::Error* ctx);
-uint64_t ErrorRequestId(nic::Error* ctx);
-
-//==============================================================================
-// ServerHealthContext
-typedef struct ServerHealthContextCtx ServerHealthContextCtx;
-nic::Error* ServerHealthContextNew(
-  ServerHealthContextCtx** ctx, const char* url, int protocol_int,
-  bool verbose);
-void ServerHealthContextDelete(ServerHealthContextCtx* ctx);
-nic::Error* ServerHealthContextGetReady(
-  ServerHealthContextCtx* ctx, bool* ready);
-nic::Error* ServerHealthContextGetLive(ServerHealthContextCtx* ctx, bool* live);
-
-//==============================================================================
-// ServerStatusContext
-typedef struct ServerStatusContextCtx ServerStatusContextCtx;
-nic::Error* ServerStatusContextNew(
-  ServerStatusContextCtx** ctx, const char* url, int protocol_int,
-  const char* model_name, bool verbose);
-void ServerStatusContextDelete(ServerStatusContextCtx* ctx);
-nic::Error* ServerStatusContextGetServerStatus(
-  ServerStatusContextCtx* ctx, char** status, uint32_t* status_len);
-
-//==============================================================================
-// InferContext
-typedef struct InferContextCtx InferContextCtx;
-nic::Error* InferContextNew(
-  InferContextCtx** ctx, const char* url, int protocol_int,
-  const char* model_name, int model_version, bool verbose);
-void InferContextDelete(InferContextCtx* ctx);
-nic::Error* InferContextSetOptions(
-  InferContextCtx* ctx, nic::InferContext::Options* options);
-nic::Error* InferContextRun(InferContextCtx* ctx);
-nic::Error* InferContextAsyncRun(InferContextCtx* ctx, size_t* request_id);
-nic::Error* InferContextGetAsyncRunResults(
-  InferContextCtx* ctx, size_t request_id, bool wait);
-nic::Error* InferContextGetReadyAsyncRequest(
-  InferContextCtx* ctx, size_t* request_id, bool wait);
-
-//==============================================================================
-// InferContext::Options
-nic::Error* InferContextOptionsNew(
-  nic::InferContext::Options** ctx, uint64_t batch_size);
-void InferContextOptionsDelete(nic::InferContext::Options* ctx);
-nic::Error* InferContextOptionsAddRaw(
-  InferContextCtx* infer_ctx, nic::InferContext::Options* ctx,
-  const char* output_name);
-nic::Error* InferContextOptionsAddClass(
-  InferContextCtx* infer_ctx, nic::InferContext::Options* ctx,
-  const char* output_name, uint64_t count);
-
-//==============================================================================
-// InferContext::Input
-typedef struct InferContextInputCtx InferContextInputCtx;
-nic::Error* InferContextInputNew(
-  InferContextInputCtx** ctx, InferContextCtx* infer_ctx,
-  const char* input_name);
-void InferContextInputDelete(InferContextInputCtx* ctx);
-nic::Error* InferContextInputSetRaw(
-  InferContextInputCtx* ctx, const void* data, uint64_t byte_size);
-
-//==============================================================================
-// InferContext::Result
-typedef struct InferContextResultCtx InferContextResultCtx;
-nic::Error* InferContextResultNew(
-  InferContextResultCtx** ctx, InferContextCtx* infer_ctx,
-  const char* result_name);
-void InferContextResultDelete(InferContextResultCtx* ctx);
-nic::Error* InferContextResultModelName(
-  InferContextResultCtx* ctx, const char** model_name);
-nic::Error* InferContextResultModelVersion(
-  InferContextResultCtx* ctx, uint32_t* model_version);
-nic::Error* InferContextResultDataType(
-  InferContextResultCtx* ctx, uint32_t* dtype);
-nic::Error* InferContextResultDims(
-  InferContextResultCtx* ctx, uint64_t max_dims, uint32_t* shape,
-  uint64_t* shape_len);
-nic::Error* InferContextResultNextRaw(
-  InferContextResultCtx* ctx, size_t batch_idx, const char** val,
-  uint64_t* val_len);
-nic::Error* InferContextResultClassCount(
-  InferContextResultCtx* ctx, size_t batch_idx, uint64_t* count);
-nic::Error* InferContextResultNextClass(
-  InferContextResultCtx* ctx, size_t batch_idx, uint64_t* idx, float* prob,
-  const char** label);
-
-#ifdef __cplusplus
-}
-#endif
diff --git a/src/clients/python/grpc_image_client.py b/src/clients/python/grpc_image_client.py
deleted file mode 100755
index 25862b4c9d..0000000000
--- a/src/clients/python/grpc_image_client.py
+++ /dev/null
@@ -1,315 +0,0 @@
-#!/usr/bin/python
-
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import argparse
-import numpy as np
-import os
-from builtins import range
-from PIL import Image
-
-import grpc
-from tensorrtserver.api import api_pb2
-from tensorrtserver.api import grpc_service_pb2
-from tensorrtserver.api import grpc_service_pb2_grpc
-import tensorrtserver.api.model_config_pb2 as model_config
-
-FLAGS = None
-
-def model_dtype_to_np(model_dtype):
-
-    if model_dtype == model_config.TYPE_BOOL:
-        return np.bool
-    elif model_dtype == model_config.TYPE_INT8:
-        return np.int8
-    elif model_dtype == model_config.TYPE_INT16:
-        return np.int16
-    elif model_dtype == model_config.TYPE_INT32:
-        return np.int32
-    elif model_dtype == model_config.TYPE_INT64:
-        return np.int64
-    elif model_dtype == model_config.TYPE_UINT8:
-        return np.uint8
-    elif model_dtype == model_config.TYPE_UINT16:
-        return np.uint16
-    elif model_dtype == model_config.TYPE_FP16:
-        return np.float16
-    elif model_dtype == model_config.TYPE_FP32:
-        return np.float32
-    elif model_dtype == model_config.TYPE_FP64:
-        return np.float64
-    return None
-
-def parse_model(status, model_name, batch_size, verbose=False):
-    """
-    Check the configuration of a model to make sure it meets the
-    requirements for an image classification network (as expected by
-    this client)
-    """
-    server_status = status.server_status
-    if model_name not in server_status.model_status.keys():
-        raise Exception("unable to get status for '" + model_name + "'")
-
-    status = server_status.model_status[model_name]
-    config = status.config
-
-    if len(config.input) != 1:
-        raise Exception("expecting 1 input, got {}".format(len(config.input)))
-    if len(config.output) != 1:
-        raise Exception("expecting 1 output, got {}".format(len(config.output)))
-
-    input = config.input[0]
-    output = config.output[0]
-
-    if output.data_type != model_config.TYPE_FP32:
-        raise Exception("expecting output datatype to be TYPE_FP32, model '" +
-                        model_name + "' output type is " +
-                        model_config.DataType.Name(output.data_type))
-
-    # Output is expected to be a vector. But allow any number of
-    # dimensions as long as all but 1 is size 1 (e.g. { 10 }, { 1, 10
-    # }, { 10, 1, 1 } are all ok).
-    non_one_cnt = 0
-    for dim in output.dims:
-        if dim > 1:
-            non_one_cnt += 1
-            if non_one_cnt > 1:
-                raise Exception("expecting model output to be a vector")
-
-    # Model specifying maximum batch size of 0 indicates that batching
-    # is not supported and so the input tensors do not expect an "N"
-    # dimension (and 'batch_size' should be 1 so that only a single
-    # image instance is inferred at a time).
-    max_batch_size = config.max_batch_size
-    if max_batch_size == 0:
-        if batch_size != 1:
-            raise Exception("batching not supported for model '" + model_name + "'")
-    else: # max_batch_size > 0
-        if batch_size > max_batch_size:
-            raise Exception(
-                "expecting batch size <= {} for model '{}'".format(max_batch_size, model_name))
-
-    # Model input must have 3 dims, either CHW or HWC
-    if len(input.dims) != 3:
-        raise Exception(
-            "expecting input to have 3 dimensions, model '{}' input has {}".format(
-                model_name, len(input.dims)))
-
-    if ((input.format != model_config.ModelInput.FORMAT_NCHW) and
-        (input.format != model_config.ModelInput.FORMAT_NHWC)):
-        raise Exception("unexpected input format " + model_config.ModelInput.Format.Name(input.format) +
-                        ", expecting " +
-                        model_config.ModelInput.Format.Name(model_config.ModelInput.FORMAT_NCHW) +
-                        " or " +
-                        model_config.ModelInput.Format.Name(model_config.ModelInput.FORMAT_NHWC))
-
-    if input.format == model_config.ModelInput.FORMAT_NHWC:
-        h = input.dims[0]
-        w = input.dims[1]
-        c = input.dims[2]
-    else:
-        c = input.dims[0]
-        h = input.dims[1]
-        w = input.dims[2]
-
-    output_size = 1
-    for dim in output.dims:
-        output_size = output_size * dim
-    output_size = output_size * np.dtype(model_dtype_to_np(output.data_type)).itemsize
-
-    return (input.name, output.name, c, h, w, input.format, model_dtype_to_np(input.data_type), output_size)
-
-def preprocess(img, format, dtype, c, h, w, scaling):
-    """
-    Pre-process an image to meet the size, type and format
-    requirements specified by the parameters.
-    """
-    #np.set_printoptions(threshold='nan')
-
-    if c == 1:
-        sample_img = img.convert('L')
-    else:
-        sample_img = img.convert('RGB')
-
-    resized_img = sample_img.resize((h, w), Image.BILINEAR)
-    resized = np.array(resized_img)
-    if resized.ndim == 2:
-        resized = resized[:,:,np.newaxis]
-
-    typed = resized.astype(dtype)
-
-    if scaling == 'INCEPTION':
-        scaled = (typed / 128) - 1
-    elif scaling == 'VGG':
-        if c == 1:
-            scaled = typed - np.asarray((128,), dtype=dtype)
-        else:
-            scaled = typed - np.asarray((123, 117, 104), dtype=dtype)
-    else:
-        scaled = typed
-
-    # Swap to CHW if necessary
-    if format == model_config.ModelInput.FORMAT_NCHW:
-        ordered = np.transpose(scaled, (2, 0, 1))
-    else:
-        ordered = scaled
-
-    # Channels are in RGB order. Currently model configuration data
-    # doesn't provide any information as to other channel orderings
-    # (like BGR) so we just assume RGB.
-    return ordered
-
-def postprocess(results, filenames, batch_size):
-    """
-    Post-process results to show classifications.
-    """
-    if len(results) != 1:
-        raise Exception("expected 1 result, got {}".format(len(results)))
-
-    batched_result = results[0].batch_classes
-    if len(batched_result) != batch_size:
-        raise Exception("expected {} results, got {}".format(batch_size, len(batched_result)))
-    if len(filenames) != batch_size:
-        raise Exception("expected {} filenames, got {}".format(batch_size, len(filenames)))
-
-    for (index, result) in enumerate(batched_result):
-        print("Image '{}':".format(filenames[index]))
-        for cls in result.cls:
-            print("    {} ({}) = {}".format(cls.idx, cls.label, cls.value))
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('-v', '--verbose', action="store_true", required=False, default=False,
-                        help='Enable verbose output')
-    parser.add_argument('-a', '--async', action="store_true", required=False, default=False,
-                        help='Use asynchronous inference API')
-    parser.add_argument('-m', '--model-name', type=str, required=True,
-                        help='Name of model')
-    parser.add_argument('-x', '--model-version', type=int, required=False,
-                        help='Version of model. Default is to use latest version.')
-    parser.add_argument('-b', '--batch-size', type=int, required=False, default=1,
-                        help='Batch size. Default is 1.')
-    parser.add_argument('-c', '--classes', type=int, required=False, default=1,
-                        help='Number of class results to report. Default is 1.')
-    parser.add_argument('-s', '--scaling', type=str, choices=['NONE', 'INCEPTION', 'VGG'],
-                        required=False, default='NONE',
-                        help='Type of scaling to apply to image pixels. Default is NONE.')
-    parser.add_argument('-u', '--url', type=str, required=False, default='localhost:8001',
-                        help='Inference server URL. Default is localhost:8001.')
-    parser.add_argument('image_filename', type=str, nargs='?', default=None,
-                        help='Input image.')
-    FLAGS = parser.parse_args()
-
-    # Create gRPC stub for communicating with the server
-    channel = grpc.insecure_channel(FLAGS.url)
-    grpc_stub = grpc_service_pb2_grpc.GRPCServiceStub(channel)
-
-    # Prepare request for Status gRPC
-    request = grpc_service_pb2.StatusRequest(model_name=FLAGS.model_name)
-    # Call and receive response from Status gRPC
-    response = grpc_stub.Status(request)
-    # Make sure the model matches our requirements, and get some
-    # properties of the model that we need for preprocessing
-    input_name, output_name, c, h, w, format, dtype, output_size = parse_model(
-        response, FLAGS.model_name, FLAGS.batch_size, FLAGS.verbose)
-
-    # Prepare request for Infer gRPC
-    # The meta data part can be reused across requests
-    request = grpc_service_pb2.InferRequest()
-    request.model_name = FLAGS.model_name
-    if FLAGS.model_version is None:
-        request.version = -1
-    else:
-        request.version = FLAGS.model_version
-    request.meta_data.batch_size = FLAGS.batch_size
-    output_message = api_pb2.InferRequestHeader.Output()
-    output_message.name = output_name
-    output_message.byte_size = output_size
-    output_message.cls.count = FLAGS.classes
-    request.meta_data.output.extend([output_message])
-
-    filenames = []
-    if os.path.isdir(FLAGS.image_filename):
-        filenames = [os.path.join(FLAGS.image_filename, f)
-                     for f in os.listdir(FLAGS.image_filename)
-                     if os.path.isfile(os.path.join(FLAGS.image_filename, f))]
-    else:
-        filenames = [FLAGS.image_filename,]
-
-    filenames.sort()
-
-    # Preprocess the images into input data according to model
-    # requirements
-    image_data = []
-    for filename in filenames:
-        img = Image.open(filename)
-        image_data.append(preprocess(img, format, dtype, c, h, w, FLAGS.scaling))
-
-    request.meta_data.input.add(
-        name=input_name, byte_size=image_data[0].size * image_data[0].itemsize)
-
-    # Send requests of FLAGS.batch_size images. If the number of
-    # images isn't an exact multiple of FLAGS.batch_size then just
-    # start over with the first images until the batch is filled.
-    result_filenames = []
-    requests = []
-    responses = []
-    image_idx = 0
-    last_request = False
-    while not last_request:
-        input_bytes = None
-        input_filenames = []
-        del request.raw_input[:]
-        for idx in range(FLAGS.batch_size):
-            input_filenames.append(filenames[image_idx])
-            if input_bytes is None:
-                input_bytes = image_data[image_idx].tobytes()
-            else:
-                input_bytes += image_data[image_idx].tobytes()
-
-            image_idx = (image_idx + 1) % len(image_data)
-            if image_idx == 0:
-                last_request = True
-
-        request.raw_input.extend([input_bytes])
-        result_filenames.append(input_filenames)
-
-        # Send request
-        if not FLAGS.async:
-            responses.append(grpc_stub.Infer(request))
-        else:
-            requests.append(grpc_stub.Infer.future(request))
-
-    # For async, retrieve results according to the send order
-    if FLAGS.async:
-        for request in requests:
-            responses.append(request.result())
-
-    for idx in range(len(responses)):
-        print("Request {}, batch size {}".format(idx, FLAGS.batch_size))
-        postprocess(responses[idx].meta_data.output, result_filenames[idx], FLAGS.batch_size)
diff --git a/src/clients/python/image_client.py b/src/clients/python/image_client.py
deleted file mode 100755
index c3254d5036..0000000000
--- a/src/clients/python/image_client.py
+++ /dev/null
@@ -1,289 +0,0 @@
-#!/usr/bin/python
-
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import argparse
-import numpy as np
-import os
-from builtins import range
-from PIL import Image
-from tensorrtserver.api import *
-import tensorrtserver.api.model_config_pb2 as model_config
-
-FLAGS = None
-
-def model_dtype_to_np(model_dtype):
-    if model_dtype == model_config.TYPE_BOOL:
-        return np.bool
-    elif model_dtype == model_config.TYPE_INT8:
-        return np.int8
-    elif model_dtype == model_config.TYPE_INT16:
-        return np.int16
-    elif model_dtype == model_config.TYPE_INT32:
-        return np.int32
-    elif model_dtype == model_config.TYPE_INT64:
-        return np.int64
-    elif model_dtype == model_config.TYPE_UINT8:
-        return np.uint8
-    elif model_dtype == model_config.TYPE_UINT16:
-        return np.uint16
-    elif model_dtype == model_config.TYPE_FP16:
-        return np.float16
-    elif model_dtype == model_config.TYPE_FP32:
-        return np.float32
-    elif model_dtype == model_config.TYPE_FP64:
-        return np.float64
-    return None
-
-def parse_model(url, protocol, model_name, batch_size, verbose=False):
-    """
-    Check the configuration of a model to make sure it meets the
-    requirements for an image classification network (as expected by
-    this client)
-    """
-    ctx = ServerStatusContext(url, protocol, model_name, verbose)
-    server_status = ctx.get_server_status()
-
-    if model_name not in server_status.model_status:
-        raise Exception("unable to get status for '" + model_name + "'")
-
-    status = server_status.model_status[model_name]
-    config = status.config
-
-    if len(config.input) != 1:
-        raise Exception("expecting 1 input, got {}".format(len(config.input)))
-    if len(config.output) != 1:
-        raise Exception("expecting 1 output, got {}".format(len(config.output)))
-
-    input = config.input[0]
-    output = config.output[0]
-
-    if output.data_type != model_config.TYPE_FP32:
-        raise Exception("expecting output datatype to be TYPE_FP32, model '" +
-                        model_name + "' output type is " +
-                        model_config.DataType.Name(output.data_type))
-
-    # Output is expected to be a vector. But allow any number of
-    # dimensions as long as all but 1 is size 1 (e.g. { 10 }, { 1, 10
-    # }, { 10, 1, 1 } are all ok).
-    non_one_cnt = 0
-    for dim in output.dims:
-        if dim > 1:
-            non_one_cnt += 1
-            if non_one_cnt > 1:
-                raise Exception("expecting model output to be a vector")
-
-    # Model specifying maximum batch size of 0 indicates that batching
-    # is not supported and so the input tensors do not expect an "N"
-    # dimension (and 'batch_size' should be 1 so that only a single
-    # image instance is inferred at a time).
-    max_batch_size = config.max_batch_size
-    if max_batch_size == 0:
-        if batch_size != 1:
-            raise Exception("batching not supported for model '" + model_name + "'")
-    else: # max_batch_size > 0
-        if batch_size > max_batch_size:
-            raise Exception("expecting batch size <= {} for model {}".format(max_batch_size, model_name))
-
-    # Model input must have 3 dims, either CHW or HWC
-    if len(input.dims) != 3:
-        raise Exception(
-            "expecting input to have 3 dimensions, model '{}' input has {}".format(
-                model_name, len(input.dims)))
-
-    if ((input.format != model_config.ModelInput.FORMAT_NCHW) and
-        (input.format != model_config.ModelInput.FORMAT_NHWC)):
-        raise Exception("unexpected input format " + model_config.ModelInput.Format.Name(input.format) +
-                        ", expecting " +
-                        model_config.ModelInput.Format.Name(model_config.ModelInput.FORMAT_NCHW) +
-                        " or " +
-                        model_config.ModelInput.Format.Name(model_config.ModelInput.FORMAT_NHWC))
-
-    if input.format == model_config.ModelInput.FORMAT_NHWC:
-        h = input.dims[0]
-        w = input.dims[1]
-        c = input.dims[2]
-    else:
-        c = input.dims[0]
-        h = input.dims[1]
-        w = input.dims[2]
-
-    return (input.name, output.name, c, h, w, input.format, model_dtype_to_np(input.data_type))
-
-def preprocess(img, format, dtype, c, h, w, scaling):
-    """
-    Pre-process an image to meet the size, type and format
-    requirements specified by the parameters.
-    """
-    #np.set_printoptions(threshold='nan')
-
-    if c == 1:
-        sample_img = img.convert('L')
-    else:
-        sample_img = img.convert('RGB')
-
-    resized_img = sample_img.resize((h, w), Image.BILINEAR)
-    resized = np.array(resized_img)
-    if resized.ndim == 2:
-        resized = resized[:,:,np.newaxis]
-
-    typed = resized.astype(dtype)
-
-    if scaling == 'INCEPTION':
-        scaled = (typed / 128) - 1
-    elif scaling == 'VGG':
-        if c == 1:
-            scaled = typed - np.asarray((128,), dtype=dtype)
-        else:
-            scaled = typed - np.asarray((123, 117, 104), dtype=dtype)
-    else:
-        scaled = typed
-
-    # Swap to CHW if necessary
-    if format == model_config.ModelInput.FORMAT_NCHW:
-        ordered = np.transpose(scaled, (2, 0, 1))
-    else:
-        ordered = scaled
-
-    # Channels are in RGB order. Currently model configuration data
-    # doesn't provide any information as to other channel orderings
-    # (like BGR) so we just assume RGB.
-    return ordered
-
-def postprocess(results, filenames, batch_size):
-    """
-    Post-process results to show classifications.
-    """
-    if len(results) != 1:
-        raise Exception("expected 1 result, got {}".format(len(results)))
-
-    batched_result = list(results.values())[0]
-    if len(batched_result) != batch_size:
-        raise Exception("expected {} results, got {}".format(batch_size, len(batched_result)))
-    if len(filenames) != batch_size:
-        raise Exception("expected {} filenames, got {}".format(batch_size, len(filenames)))
-
-    for (index, result) in enumerate(batched_result):
-        print("Image '{}':".format(filenames[index]))
-        for cls in result:
-            print("    {} ({}) = {}".format(cls[0], cls[2], cls[1]))
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('-v', '--verbose', action="store_true", required=False, default=False,
-                        help='Enable verbose output')
-    parser.add_argument('-a', '--async', action="store_true", required=False, default=False,
-                        help='Use asynchronous inference API')
-    parser.add_argument('-m', '--model-name', type=str, required=True,
-                        help='Name of model')
-    parser.add_argument('-x', '--model-version', type=int, required=False,
-                        help='Version of model. Default is to use latest version.')
-    parser.add_argument('-b', '--batch-size', type=int, required=False, default=1,
-                        help='Batch size. Default is 1.')
-    parser.add_argument('-c', '--classes', type=int, required=False, default=1,
-                        help='Number of class results to report. Default is 1.')
-    parser.add_argument('-s', '--scaling', type=str, choices=['NONE', 'INCEPTION', 'VGG'],
-                        required=False, default='NONE',
-                        help='Type of scaling to apply to image pixels. Default is NONE.')
-    parser.add_argument('-u', '--url', type=str, required=False, default='localhost:8000',
-                        help='Inference server URL. Default is localhost:8000.')
-    parser.add_argument('-i', '--protocol', type=str, required=False, default='HTTP',
-                        help='Protocol (HTTP/gRPC) used to ' +
-                        'communicate with inference service. Default is HTTP.')
-    parser.add_argument('image_filename', type=str, nargs='?', default=None,
-                        help='Input image / Input folder.')
-    FLAGS = parser.parse_args()
-
-    protocol = ProtocolType.from_str(FLAGS.protocol)
-
-    # Make sure the model matches our requirements, and get some
-    # properties of the model that we need for preprocessing
-    input_name, output_name, c, h, w, format, dtype = parse_model(
-        FLAGS.url, protocol, FLAGS.model_name,
-        FLAGS.batch_size, FLAGS.verbose)
-
-    ctx = InferContext(FLAGS.url, protocol,
-                       FLAGS.model_name, FLAGS.model_version, FLAGS.verbose)
-
-    filenames = []
-    if os.path.isdir(FLAGS.image_filename):
-        filenames = [os.path.join(FLAGS.image_filename, f)
-                     for f in os.listdir(FLAGS.image_filename)
-                     if os.path.isfile(os.path.join(FLAGS.image_filename, f))]
-    else:
-        filenames = [FLAGS.image_filename,]
-
-    filenames.sort()
-
-    # Preprocess the images into input data according to model
-    # requirements
-    image_data = []
-    for filename in filenames:
-        img = Image.open(filename)
-        image_data.append(preprocess(img, format, dtype, c, h, w, FLAGS.scaling))
-
-    # Send requests of FLAGS.batch_size images. If the number of
-    # images isn't an exact multiple of FLAGS.batch_size then just
-    # start over with the first images until the batch is filled.
-    results = []
-    result_filenames = []
-    request_ids = []
-    image_idx = 0
-    last_request = False
-    while not last_request:
-        input_filenames = []
-        input_batch = []
-        for idx in range(FLAGS.batch_size):
-            input_filenames.append(filenames[image_idx])
-            input_batch.append(image_data[image_idx])
-            image_idx = (image_idx + 1) % len(image_data)
-            if image_idx == 0:
-                last_request = True
-
-        result_filenames.append(input_filenames)
-
-        # Send request
-        if not FLAGS.async:
-            results.append(ctx.run(
-                { input_name : input_batch },
-                { output_name : (InferContext.ResultFormat.CLASS, FLAGS.classes) },
-                FLAGS.batch_size))
-        else:
-            request_ids.append(ctx.async_run(
-                { input_name : input_batch },
-                { output_name : (InferContext.ResultFormat.CLASS, FLAGS.classes) },
-                FLAGS.batch_size))
-
-    # For async, retrieve results according to the send order
-    if FLAGS.async:
-        for request_id in request_ids:
-            results.append(ctx.get_async_run_results(request_id, True))
-
-    for idx in range(len(results)):
-        print("Request {}, batch size {}".format(idx, FLAGS.batch_size))
-        postprocess(results[idx], result_filenames[idx], FLAGS.batch_size)
diff --git a/src/clients/python/setup.py b/src/clients/python/setup.py
deleted file mode 100644
index 7a105561b5..0000000000
--- a/src/clients/python/setup.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import os
-from setuptools import find_packages
-from setuptools import setup, dist
-
-if 'VERSION' not in os.environ:
-    raise Exception('envvar VERSION must be specified')
-
-VERSION = os.environ['VERSION']
-
-REQUIRED = [
-    'future',
-    'numpy',
-    'protobuf>=3.5.0',
-    'grpcio'
-]
-
-try:
-    from wheel.bdist_wheel import bdist_wheel as _bdist_wheel
-    class bdist_wheel(_bdist_wheel):
-        def finalize_options(self):
-            _bdist_wheel.finalize_options(self)
-            self.root_is_pure = False
-except ImportError:
-    bdist_wheel = None
-
-setup(
-    name='tensorrtserver',
-    version=VERSION,
-    author='NVIDIA Inc.',
-    author_email='davidg@nvidia.com',
-    description='Python client library for TensorRT Inference Server',
-    license='BSD',
-    url='http://nvidia.com',
-    keywords='tensorrt inference server service client',
-    packages=find_packages(),
-    install_requires=REQUIRED,
-    package_data={
-        '': [ 'libcrequest.so', ],
-    },
-    zip_safe=False,
-    cmdclass={'bdist_wheel': bdist_wheel},
-)
diff --git a/src/clients/python/simple_client.py b/src/clients/python/simple_client.py
deleted file mode 100644
index cdfdbfede1..0000000000
--- a/src/clients/python/simple_client.py
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/usr/bin/python
-
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import argparse
-import numpy as np
-import os
-from builtins import range
-from tensorrtserver.api import *
-
-FLAGS = None
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('-v', '--verbose', action="store_true", required=False, default=False,
-                        help='Enable verbose output')
-    parser.add_argument('-u', '--url', type=str, required=False, default='localhost:8000',
-                        help='Inference server URL. Default is localhost:8000.')
-    parser.add_argument('-i', '--protocol', type=str, required=False, default='http',
-                        help='Protocol ("http"/"grpc") used to ' +
-                        'communicate with inference service. Default is "http".')
-
-    FLAGS = parser.parse_args()
-    protocol = ProtocolType.from_str(FLAGS.protocol)
-
-    # We use a simple model that takes 2 input tensors of 16 integers
-    # each and returns 2 output tensors of 16 integers each. One
-    # output tensor is the element-wise sum of the inputs and one
-    # output is the element-wise difference.
-    model_name = "simple"
-    model_version = -1
-    batch_size = 1
-
-    # Create the inference context for the model.
-    ctx = InferContext(FLAGS.url, protocol, model_name, model_version, FLAGS.verbose)
-
-    # Create the data for the two input tensors. Initialize the first
-    # to unique integers and the second to all ones.
-    input0_data = np.arange(start=0, stop=16, dtype=np.int32)
-    input1_data = np.ones(shape=16, dtype=np.int32)
-
-    # Send inference request to the inference server. Get results for
-    # both output tensors.
-    result = ctx.run({ 'INPUT0' : (input0_data,),
-                       'INPUT1' : (input1_data,) },
-                     { 'OUTPUT0' : InferContext.ResultFormat.RAW,
-                       'OUTPUT1' : InferContext.ResultFormat.RAW },
-                     batch_size)
-
-    # We expect there to be 2 results (each with batch-size 1). Walk
-    # over all 16 result elements and print the sum and difference
-    # calculated by the model.
-    output0_data = result['OUTPUT0'][0]
-    output1_data = result['OUTPUT1'][0]
-
-    for i in range(16):
-        print(str(input0_data[i]) + " + " + str(input1_data[i]) + " = " + str(output0_data[i]))
-        print(str(input0_data[i]) + " - " + str(input1_data[i]) + " = " + str(output1_data[i]))
-        if (input0_data[i] + input1_data[i]) != output0_data[i]:
-            print("error: incorrect sum");
-            sys.exit(1);
-        if (input0_data[i] - input1_data[i]) != output1_data[i]:
-            print("error: incorrect difference");
-            sys.exit(1);
diff --git a/src/command_line_parser.cc b/src/command_line_parser.cc
new file mode 100644
index 0000000000..57f90595a4
--- /dev/null
+++ b/src/command_line_parser.cc
@@ -0,0 +1,2343 @@
+// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+
+#include "command_line_parser.h"
+constexpr const char* GLOBAL_OPTION_GROUP = "";
+
+#ifdef _WIN32
+int optind = 1;
+const char* optarg = nullptr;
+
+/// Implementation of `getopt_long` for Windows.
+/// Linux uses available implementation:
+/// https://github.com/gcc-mirror/gcc/blob/fab08d12b40ad637c5a4ce8e026fb43cd3f0fad1/include/getopt.h
+/// and
+/// https://github.com/gcc-mirror/gcc/blob/fab08d12b40ad637c5a4ce8e026fb43cd3f0fad1/libiberty/getopt.c#L521
+/// Parameters' description is available here:
+/// https://github.com/gcc-mirror/gcc/blob/fab08d12b40ad637c5a4ce8e026fb43cd3f0fad1/libiberty/getopt.c#L464-L518
+/// `optind' is an index to iterate over `argv`, (whose length is `argc`),
+/// and starts from 1, since argv[0] is the program name.
+/// Text in the current `argv`-element is returned in `optarg'.
+/// Note: if option was provided in the form of --<key>=<value>, then
+/// optarg is (argv[optind] + found + 1), i.e. everything after `=`.
+/// Alternatively, option can be provided as --<key> <value>.
+/// In this case, <value> is storred as a separate parameter in `argv`.
+/// `longind` returns the index in `longopts` of the long-named option found.
+
+int
+getopt_long(
+    int argc, char* const argv[], const char* optstring,
+    const struct option* longopts, int* longind)
+{
+  if (optind >= argc) {
+    return -1;
+  }
+  const struct option* curr_longopt = longopts;
+  std::string argv_str = argv[optind];
+  size_t found = argv_str.find_first_of("=");
+  std::string key = argv_str.substr(
+      2, (found == std::string::npos) ? std::string::npos : (found - 2));
+  int option_index = 0;
+  for (curr_longopt, option_index; curr_longopt->name;
+       curr_longopt++, option_index++) {
+    if (key == curr_longopt->name) {
+      if (longind != NULL)
+        (*longind) = option_index;
+      if (curr_longopt->has_arg == required_argument) {
+        if (found == std::string::npos) {
+          optind++;
+          if (optind >= argc) {
+            std::cerr << argv[0] << ": option '" << argv_str
+                      << "' requires an argument" << std::endl;
+            return '?';
+          }
+          optarg = argv[optind];
+        } else {
+          optarg = (argv[optind] + found + 1);
+        }
+      }
+      optind++;
+      return curr_longopt->val;
+    }
+  }
+  return -1;
+}
+#endif
+
+#include <algorithm>
+#include <iomanip>
+#include <iostream>
+#include <string>
+
+#include "common.h"
+
+#define TRITONJSON_STATUSTYPE TRITONSERVER_Error*
+#define TRITONJSON_STATUSRETURN(M) \
+  return TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INTERNAL, (M).c_str())
+#define TRITONJSON_STATUSSUCCESS nullptr
+#include "triton/common/triton_json.h"
+
+
+namespace triton { namespace server {
+
+// [FIXME] expose following parse helpers for other type of parser
+namespace {
+
+// A wrapper around std::stoi, std::stoull, std::stoll, std::stod
+// to catch `invalid argument` and `out of range` exceptions
+template <typename T>
+T StringTo(const std::string& arg);
+
+template <>
+int
+StringTo(const std::string& arg)
+{
+  return std::stoi(arg);
+}
+
+#ifdef TRITON_ENABLE_TRACING
+template <>
+uint32_t
+StringTo(const std::string& arg)
+{
+  return std::stoul(arg);
+}
+#endif  // TRITON_ENABLE_TRACING
+
+template <>
+uint64_t
+StringTo(const std::string& arg)
+{
+  return std::stoull(arg);
+}
+
+template <>
+int64_t
+StringTo(const std::string& arg)
+{
+  return std::stoll(arg);
+}
+
+template <>
+double
+StringTo(const std::string& arg)
+{
+  return std::stod(arg);
+}
+
+// There must be specialization for the types to be parsed into so that
+// the argument is properly validated and parsed. Attempted to use input
+// operator (>>) but it will consume improper argument without error
+// (i.e. parse "1.4" to 'int' will return 1 but we want to report error).
+template <typename T>
+T
+ParseOption(const std::string& arg)
+{
+  try {
+    return StringTo<T>(arg);
+  }
+  catch (const std::invalid_argument& ia) {
+    std::stringstream ss;
+    ss << "Invalid option value. Got " << arg << std::endl;
+    throw ParseException(ss.str());
+  }
+  catch (const std::out_of_range& oor) {
+    std::stringstream ss;
+    ss << "Provided option value is out of bound. Got " << arg << std::endl;
+    throw ParseException(ss.str());
+  }
+}
+
+template <>
+bool
+ParseOption(const std::string& arg)
+{
+  // 'arg' need to comply with template declaration
+  std::string larg = arg;
+  std::transform(larg.begin(), larg.end(), larg.begin(), [](unsigned char c) {
+    return std::tolower(c);
+  });
+
+  if ((larg == "true") || (larg == "on") || (larg == "1")) {
+    return true;
+  }
+  if ((larg == "false") || (larg == "off") || (larg == "0")) {
+    return false;
+  }
+
+  throw ParseException("invalid value for bool option: " + arg);
+}
+
+// Condition here merely to avoid compilation error, this function will
+// be defined but not used otherwise.
+#ifdef TRITON_ENABLE_LOGGING
+int
+ParseIntBoolOption(std::string arg)
+{
+  std::transform(arg.begin(), arg.end(), arg.begin(), [](unsigned char c) {
+    return std::tolower(c);
+  });
+
+  if (arg == "true") {
+    return 1;
+  }
+  if (arg == "false") {
+    return 0;
+  }
+
+  return ParseOption<int>(arg);
+}
+#endif  // TRITON_ENABLE_LOGGING
+
+std::string
+PairsToJsonStr(std::vector<std::pair<std::string, std::string>> settings)
+{
+  triton::common::TritonJson::Value json(
+      triton::common::TritonJson::ValueType::OBJECT);
+  for (const auto& setting : settings) {
+    const auto& key = setting.first;
+    const auto& value = setting.second;
+    json.SetStringObject(key.c_str(), value);
+  }
+  triton::common::TritonJson::WriteBuffer buffer;
+  auto err = json.Write(&buffer);
+  if (err != nullptr) {
+    LOG_TRITONSERVER_ERROR(err, "failed to convert config to JSON");
+  }
+  return buffer.Contents();
+}
+
+template <typename T1, typename T2>
+std::pair<T1, T2>
+ParsePairOption(const std::string& arg, const std::string& delim_str)
+{
+  int delim = arg.find(delim_str);
+
+  if ((delim < 0)) {
+    std::stringstream ss;
+    ss << "Cannot parse pair option due to incorrect number of inputs."
+          "--<pair option> argument requires format <first>"
+       << delim_str << "<second>. "
+       << "Found: " << arg << std::endl;
+    throw ParseException(ss.str());
+  }
+
+  std::string first_string = arg.substr(0, delim);
+  std::string second_string = arg.substr(delim + delim_str.length());
+
+  // Specific conversion from key-value string to actual key-value type,
+  // should be extracted out of this function if we need to parse
+  // more pair option of different types.
+  return {ParseOption<T1>(first_string), ParseOption<T2>(second_string)};
+}
+
+// Split 'options' by 'delim_str' and place split strings into a vector
+std::vector<std::string>
+SplitOptions(std::string options, const std::string& delim_str)
+{
+  std::vector<std::string> res;
+
+  int delim = options.find(delim_str);
+  while ((delim >= 0)) {
+    res.emplace_back(options.substr(0, delim));
+    options = options.substr(delim + delim_str.length());
+    delim = options.find(delim_str);
+  }
+  // include last element
+  res.emplace_back(options);
+  return res;
+}
+
+}  // namespace
+
+enum TritonOptionId {
+  OPTION_HELP = 1000,
+#ifdef TRITON_ENABLE_LOGGING
+  OPTION_LOG_VERBOSE,
+  OPTION_LOG_INFO,
+  OPTION_LOG_WARNING,
+  OPTION_LOG_ERROR,
+  OPTION_LOG_FORMAT,
+  OPTION_LOG_FILE,
+#endif  // TRITON_ENABLE_LOGGING
+  OPTION_ID,
+  OPTION_MODEL_REPOSITORY,
+  OPTION_EXIT_ON_ERROR,
+  OPTION_DISABLE_AUTO_COMPLETE_CONFIG,
+  OPTION_STRICT_MODEL_CONFIG,
+  OPTION_STRICT_READINESS,
+#if defined(TRITON_ENABLE_HTTP)
+  OPTION_ALLOW_HTTP,
+  OPTION_HTTP_HEADER_FORWARD_PATTERN,
+  OPTION_HTTP_PORT,
+  OPTION_REUSE_HTTP_PORT,
+  OPTION_HTTP_ADDRESS,
+  OPTION_HTTP_THREAD_COUNT,
+  OPTION_HTTP_RESTRICTED_API,
+#endif  // TRITON_ENABLE_HTTP
+#if defined(TRITON_ENABLE_GRPC)
+  OPTION_ALLOW_GRPC,
+  OPTION_GRPC_PORT,
+  OPTION_REUSE_GRPC_PORT,
+  OPTION_GRPC_ADDRESS,
+  OPTION_GRPC_HEADER_FORWARD_PATTERN,
+  OPTION_GRPC_INFER_ALLOCATION_POOL_SIZE,
+  OPTION_GRPC_USE_SSL,
+  OPTION_GRPC_USE_SSL_MUTUAL,
+  OPTION_GRPC_SERVER_CERT,
+  OPTION_GRPC_SERVER_KEY,
+  OPTION_GRPC_ROOT_CERT,
+  OPTION_GRPC_RESPONSE_COMPRESSION_LEVEL,
+  OPTION_GRPC_ARG_KEEPALIVE_TIME_MS,
+  OPTION_GRPC_ARG_KEEPALIVE_TIMEOUT_MS,
+  OPTION_GRPC_ARG_KEEPALIVE_PERMIT_WITHOUT_CALLS,
+  OPTION_GRPC_ARG_HTTP2_MAX_PINGS_WITHOUT_DATA,
+  OPTION_GRPC_ARG_HTTP2_MIN_RECV_PING_INTERVAL_WITHOUT_DATA_MS,
+  OPTION_GRPC_ARG_HTTP2_MAX_PING_STRIKES,
+  OPTION_GRPC_RESTRICTED_PROTOCOL,
+  OPTION_GRPC_ARG_MAX_CONNECTION_AGE_MS,
+  OPTION_GRPC_ARG_MAX_CONNECTION_AGE_GRACE_MS,
+#endif  // TRITON_ENABLE_GRPC
+#if defined(TRITON_ENABLE_SAGEMAKER)
+  OPTION_ALLOW_SAGEMAKER,
+  OPTION_SAGEMAKER_PORT,
+  OPTION_SAGEMAKER_SAFE_PORT_RANGE,
+  OPTION_SAGEMAKER_THREAD_COUNT,
+#endif  // TRITON_ENABLE_SAGEMAKER
+#if defined(TRITON_ENABLE_VERTEX_AI)
+  OPTION_ALLOW_VERTEX_AI,
+  OPTION_VERTEX_AI_PORT,
+  OPTION_VERTEX_AI_THREAD_COUNT,
+  OPTION_VERTEX_AI_DEFAULT_MODEL,
+#endif  // TRITON_ENABLE_VERTEX_AI
+#ifdef TRITON_ENABLE_METRICS
+  OPTION_ALLOW_METRICS,
+  OPTION_ALLOW_GPU_METRICS,
+  OPTION_ALLOW_CPU_METRICS,
+  OPTION_METRICS_ADDRESS,
+  OPTION_METRICS_PORT,
+  OPTION_METRICS_INTERVAL_MS,
+  OPTION_METRICS_CONFIG,
+#endif  // TRITON_ENABLE_METRICS
+#ifdef TRITON_ENABLE_TRACING
+  OPTION_TRACE_FILEPATH,
+  OPTION_TRACE_LEVEL,
+  OPTION_TRACE_RATE,
+  OPTION_TRACE_COUNT,
+  OPTION_TRACE_LOG_FREQUENCY,
+  OPTION_TRACE_CONFIG,
+#endif  // TRITON_ENABLE_TRACING
+  OPTION_MODEL_CONTROL_MODE,
+  OPTION_POLL_REPO_SECS,
+  OPTION_STARTUP_MODEL,
+  OPTION_RATE_LIMIT,
+  OPTION_RATE_LIMIT_RESOURCE,
+  OPTION_PINNED_MEMORY_POOL_BYTE_SIZE,
+  OPTION_CUDA_MEMORY_POOL_BYTE_SIZE,
+  OPTION_CUDA_VIRTUAL_ADDRESS_SIZE,
+  OPTION_RESPONSE_CACHE_BYTE_SIZE,
+  OPTION_CACHE_CONFIG,
+  OPTION_CACHE_DIR,
+  OPTION_MIN_SUPPORTED_COMPUTE_CAPABILITY,
+  OPTION_EXIT_TIMEOUT_SECS,
+  OPTION_BACKEND_DIR,
+  OPTION_REPOAGENT_DIR,
+  OPTION_BUFFER_MANAGER_THREAD_COUNT,
+  OPTION_MODEL_LOAD_THREAD_COUNT,
+  OPTION_MODEL_LOAD_RETRY_COUNT,
+  OPTION_BACKEND_CONFIG,
+  OPTION_HOST_POLICY,
+  OPTION_MODEL_LOAD_GPU_LIMIT,
+  OPTION_MODEL_NAMESPACING
+};
+
+void
+TritonParser::SetupOptions()
+{
+  global_options_.push_back(
+      {OPTION_HELP, "help", Option::ArgNone, "Print usage"});
+
+  server_options_.push_back(
+      {OPTION_ID, "id", Option::ArgStr, "Identifier for this server."});
+  server_options_.push_back(
+      {OPTION_EXIT_TIMEOUT_SECS, "exit-timeout-secs", Option::ArgInt,
+       "Timeout (in seconds) when exiting to wait for in-flight inferences to "
+       "finish. After the timeout expires the server exits even if inferences "
+       "are still in flight."});
+
+  model_repo_options_.push_back(
+      {OPTION_MODEL_REPOSITORY, "model-store", Option::ArgStr,
+       "Equivalent to --model-repository."});
+  model_repo_options_.push_back(
+      {OPTION_MODEL_REPOSITORY, "model-repository", Option::ArgStr,
+       "Path to model repository directory. It may be specified multiple times "
+       "to add multiple model repositories. Note that if a model is not unique "
+       "across all model repositories at any time, the model will not be "
+       "available."});
+  model_repo_options_.push_back(
+      {OPTION_EXIT_ON_ERROR, "exit-on-error", Option::ArgBool,
+       "Exit the inference server if an error occurs during initialization."});
+  model_repo_options_.push_back(
+      {OPTION_DISABLE_AUTO_COMPLETE_CONFIG, "disable-auto-complete-config",
+       Option::ArgNone,
+       "If set, disables the triton and backends from auto completing model "
+       "configuration files. Model configuration files must be provided and "
+       "all required "
+       "configuration settings must be specified."});
+  model_repo_options_.push_back(
+      {OPTION_STRICT_READINESS, "strict-readiness", Option::ArgBool,
+       "If true /v2/health/ready endpoint indicates ready if the server "
+       "is responsive and all models are available. If false "
+       "/v2/health/ready endpoint indicates ready if server is responsive "
+       "even if some/all models are unavailable."});
+  model_repo_options_.push_back(
+      {OPTION_MODEL_CONTROL_MODE, "model-control-mode", Option::ArgStr,
+       "Specify the mode for model management. Options are \"none\", \"poll\" "
+       "and \"explicit\". The default is \"none\". "
+       "For \"none\", the server will load all models in the model "
+       "repository(s) at startup and will not make any changes to the load "
+       "models after that. For \"poll\", the server will poll the model "
+       "repository(s) to detect changes and will load/unload models based on "
+       "those changes. The poll rate is controlled by 'repository-poll-secs'. "
+       "For \"explicit\", model load and unload is initiated by using the "
+       "model control APIs, and only models specified with --load-model will "
+       "be loaded at startup."});
+  model_repo_options_.push_back(
+      {OPTION_POLL_REPO_SECS, "repository-poll-secs", Option::ArgInt,
+       "Interval in seconds between each poll of the model repository to check "
+       "for changes. Valid only when --model-control-mode=poll is "
+       "specified."});
+  model_repo_options_.push_back(
+      {OPTION_STARTUP_MODEL, "load-model", Option::ArgStr,
+       "Name of the model to be loaded on server startup. It may be specified "
+       "multiple times to add multiple models. To load ALL models at startup, "
+       "specify '*' as the model name with --load-model=* as the ONLY "
+       "--load-model argument, this does not imply any pattern matching. "
+       "Specifying --load-model=* in conjunction with another --load-model "
+       "argument will result in error. Note that this option will only take "
+       "effect if --model-control-mode=explicit is true."});
+  model_repo_options_.push_back(
+      {OPTION_MODEL_LOAD_THREAD_COUNT, "model-load-thread-count",
+       Option::ArgInt,
+       "The number of threads used to concurrently load models in "
+       "model repositories. Default is 4."});
+  model_repo_options_.push_back(
+      {OPTION_MODEL_LOAD_RETRY_COUNT, "model-load-retry-count", Option::ArgInt,
+       "The number of retry to load a model in "
+       "model repositories. Default is 0."});
+  model_repo_options_.push_back(
+      {OPTION_MODEL_NAMESPACING, "model-namespacing", Option::ArgBool,
+       "Whether model namespacing is enable or not. If true, models with the "
+       "same name can be served if they are in different namespace."});
+
+#if defined(TRITON_ENABLE_HTTP)
+  http_options_.push_back(
+      {OPTION_ALLOW_HTTP, "allow-http", Option::ArgBool,
+       "Allow the server to listen for HTTP requests."});
+  http_options_.push_back(
+      {OPTION_HTTP_ADDRESS, "http-address", Option::ArgStr,
+       "The address for the http server to bind to. Default is 0.0.0.0"});
+  http_options_.push_back(
+      {OPTION_HTTP_PORT, "http-port", Option::ArgInt,
+       "The port for the server to listen on for HTTP "
+       "requests. Default is 8000."});
+  http_options_.push_back(
+      {OPTION_REUSE_HTTP_PORT, "reuse-http-port", Option::ArgBool,
+       "Allow multiple servers to listen on the same HTTP port when every "
+       "server has this option set. If you plan to use this option as a way to "
+       "load balance between different Triton servers, the same model "
+       "repository or set of models must be used for every server."});
+  http_options_.push_back(
+      {OPTION_HTTP_HEADER_FORWARD_PATTERN, "http-header-forward-pattern",
+       Option::ArgStr,
+       "The regular expression pattern that will be used for forwarding HTTP "
+       "headers as inference request parameters."});
+  http_options_.push_back(
+      {OPTION_HTTP_THREAD_COUNT, "http-thread-count", Option::ArgInt,
+       "Number of threads handling HTTP requests."});
+  http_options_.push_back(
+      {OPTION_HTTP_RESTRICTED_API, "http-restricted-api",
+       "<string>:<string>=<string>",
+       "Specify restricted HTTP api setting. The format of this "
+       "flag is --http-restricted-api=<apis>,<key>=<value>. Where "
+       "<api> is a comma-separated list of apis to be restricted. "
+       "<key> will be additional header key to be checked when a HTTP request "
+       "is received, and <value> is the value expected to be matched."
+       " Allowed APIs: " +
+           Join(RESTRICTED_CATEGORY_NAMES, ", ")});
+#endif  // TRITON_ENABLE_HTTP
+
+#if defined(TRITON_ENABLE_GRPC)
+  grpc_options_.push_back(
+      {OPTION_ALLOW_GRPC, "allow-grpc", Option::ArgBool,
+       "Allow the server to listen for GRPC requests."});
+  grpc_options_.push_back(
+      {OPTION_GRPC_ADDRESS, "grpc-address", Option::ArgStr,
+       "The address for the grpc server to binds to. Default is 0.0.0.0"});
+  grpc_options_.push_back(
+      {OPTION_GRPC_PORT, "grpc-port", Option::ArgInt,
+       "The port for the server to listen on for GRPC "
+       "requests. Default is 8001."});
+  grpc_options_.push_back(
+      {OPTION_REUSE_GRPC_PORT, "reuse-grpc-port", Option::ArgBool,
+       "Allow multiple servers to listen on the same GRPC port when every "
+       "server has this option set. If you plan to use this option as a way to "
+       "load balance between different Triton servers, the same model "
+       "repository or set of models must be used for every server."});
+  grpc_options_.push_back(
+      {OPTION_GRPC_HEADER_FORWARD_PATTERN, "grpc-header-forward-pattern",
+       Option::ArgStr,
+       "The regular expression pattern that will be used for forwarding GRPC "
+       "headers as inference request parameters."});
+  grpc_options_.push_back(
+      {OPTION_GRPC_INFER_ALLOCATION_POOL_SIZE,
+       "grpc-infer-allocation-pool-size", Option::ArgInt,
+       "The maximum number of inference request/response objects that remain "
+       "allocated for reuse. As long as the number of in-flight requests "
+       "doesn't exceed this value there will be no allocation/deallocation of "
+       "request/response objects."});
+  grpc_options_.push_back(
+      {OPTION_GRPC_USE_SSL, "grpc-use-ssl", Option::ArgBool,
+       "Use SSL authentication for GRPC requests. Default is false."});
+  grpc_options_.push_back(
+      {OPTION_GRPC_USE_SSL_MUTUAL, "grpc-use-ssl-mutual", Option::ArgBool,
+       "Use mututal SSL authentication for GRPC requests. This option will "
+       "preempt '--grpc-use-ssl' if it is also specified. Default is false."});
+  grpc_options_.push_back(
+      {OPTION_GRPC_SERVER_CERT, "grpc-server-cert", Option::ArgStr,
+       "File holding PEM-encoded server certificate. Ignored unless "
+       "--grpc-use-ssl is true."});
+  grpc_options_.push_back(
+      {OPTION_GRPC_SERVER_KEY, "grpc-server-key", Option::ArgStr,
+       "File holding PEM-encoded server key. Ignored unless "
+       "--grpc-use-ssl is true."});
+  grpc_options_.push_back(
+      {OPTION_GRPC_ROOT_CERT, "grpc-root-cert", Option::ArgStr,
+       "File holding PEM-encoded root certificate. Ignore unless "
+       "--grpc-use-ssl is false."});
+  grpc_options_.push_back(
+      {OPTION_GRPC_RESPONSE_COMPRESSION_LEVEL,
+       "grpc-infer-response-compression-level", Option::ArgStr,
+       "The compression level to be used while returning the infer response to "
+       "the peer. Allowed values are none, low, medium and high. By default, "
+       "compression level is selected as none."});
+  grpc_options_.push_back(
+      {OPTION_GRPC_ARG_KEEPALIVE_TIME_MS, "grpc-keepalive-time", Option::ArgInt,
+       "The period (in milliseconds) after which a keepalive ping is sent on "
+       "the transport. Default is 7200000 (2 hours)."});
+  grpc_options_.push_back(
+      {OPTION_GRPC_ARG_KEEPALIVE_TIMEOUT_MS, "grpc-keepalive-timeout",
+       Option::ArgInt,
+       "The period (in milliseconds) the sender of the keepalive ping waits "
+       "for an acknowledgement. If it does not receive an acknowledgment "
+       "within this time, it will close the connection. "
+       "Default is 20000 (20 seconds)."});
+  grpc_options_.push_back(
+      {OPTION_GRPC_ARG_KEEPALIVE_PERMIT_WITHOUT_CALLS,
+       "grpc-keepalive-permit-without-calls", Option::ArgBool,
+       "Allows keepalive pings to be sent even if there are no calls in flight "
+       "(0 : false; 1 : true). Default is 0 (false)."});
+  grpc_options_.push_back(
+      {OPTION_GRPC_ARG_HTTP2_MAX_PINGS_WITHOUT_DATA,
+       "grpc-http2-max-pings-without-data", Option::ArgInt,
+       "The maximum number of pings that can be sent when there is no "
+       "data/header frame to be sent. gRPC Core will not continue sending "
+       "pings if we run over the limit. Setting it to 0 allows sending pings "
+       "without such a restriction. Default is 2."});
+  grpc_options_.push_back(
+      {OPTION_GRPC_ARG_HTTP2_MIN_RECV_PING_INTERVAL_WITHOUT_DATA_MS,
+       "grpc-http2-min-recv-ping-interval-without-data", Option::ArgInt,
+       "If there are no data/header frames being sent on the transport, this "
+       "channel argument on the server side controls the minimum time "
+       "(in milliseconds) that gRPC Core would expect between receiving "
+       "successive pings. If the time between successive pings is less than "
+       "this time, then the ping will be considered a bad ping from the peer. "
+       "Such a ping counts as a ‘ping strike’. Default is 300000 (5 "
+       "minutes)."});
+  grpc_options_.push_back(
+      {OPTION_GRPC_ARG_HTTP2_MAX_PING_STRIKES, "grpc-http2-max-ping-strikes",
+       Option::ArgInt,
+       "Maximum number of bad pings that the server will tolerate before "
+       "sending an HTTP2 GOAWAY frame and closing the transport. Setting it to "
+       "0 allows the server to accept any number of bad pings. Default is 2."});
+  grpc_options_.push_back(
+      {OPTION_GRPC_ARG_MAX_CONNECTION_AGE_MS, "grpc-max-connection-age",
+       Option::ArgInt,
+       "Maximum time that a channel may exist in milliseconds."
+       "Default is undefined."});
+  grpc_options_.push_back(
+      {OPTION_GRPC_ARG_MAX_CONNECTION_AGE_GRACE_MS,
+       "grpc-max-connection-age-grace", Option::ArgInt,
+       "Grace period after the channel reaches its max age. "
+       "Default is undefined."});
+  grpc_options_.push_back(
+      {OPTION_GRPC_RESTRICTED_PROTOCOL, "grpc-restricted-protocol",
+       "<string>:<string>=<string>",
+       "Specify restricted GRPC protocol setting. The format of this "
+       "flag is --grpc-restricted-protocol=<protocols>,<key>=<value>. Where "
+       "<protocol> is a comma-separated list of protocols to be restricted. "
+       "<key> will be additional header key to be checked when a GRPC request "
+       "is received, and <value> is the value expected to be matched."
+       " Allowed protocols: " +
+           Join(RESTRICTED_CATEGORY_NAMES, ", ")});
+#endif  // TRITON_ENABLE_GRPC
+
+#ifdef TRITON_ENABLE_LOGGING
+  logging_options_.push_back(
+      {OPTION_LOG_VERBOSE, "log-verbose", Option::ArgInt,
+       "Set verbose logging level. Zero (0) disables verbose logging and "
+       "values >= 1 enable verbose logging."});
+  logging_options_.push_back(
+      {OPTION_LOG_INFO, "log-info", Option::ArgBool,
+       "Enable/disable info-level logging."});
+  logging_options_.push_back(
+      {OPTION_LOG_WARNING, "log-warning", Option::ArgBool,
+       "Enable/disable warning-level logging."});
+  logging_options_.push_back(
+      {OPTION_LOG_ERROR, "log-error", Option::ArgBool,
+       "Enable/disable error-level logging."});
+  logging_options_.push_back(
+      {OPTION_LOG_FORMAT, "log-format", Option::ArgStr,
+       "Set the logging format. Options are \"default\" and \"ISO8601\". "
+       "The default is \"default\". For \"default\", the log severity (L) and "
+       "timestamp will be logged as \"LMMDD hh:mm:ss.ssssss\". "
+       "For \"ISO8601\", the log format will be \"YYYY-MM-DDThh:mm:ssZ L\"."});
+  logging_options_.push_back(
+      {OPTION_LOG_FILE, "log-file", Option::ArgStr,
+       "Set the name of the log output file. If specified, log outputs will be "
+       "saved to this file. If not specified, log outputs will stream to the "
+       "console."});
+#endif  // TRITON_ENABLE_LOGGING
+
+#if defined(TRITON_ENABLE_SAGEMAKER)
+  sagemaker_options_.push_back(
+      {OPTION_ALLOW_SAGEMAKER, "allow-sagemaker", Option::ArgBool,
+       "Allow the server to listen for Sagemaker requests. Default is false."});
+  sagemaker_options_.push_back(
+      {OPTION_SAGEMAKER_PORT, "sagemaker-port", Option::ArgInt,
+       "The port for the server to listen on for Sagemaker requests. Default "
+       "is 8080."});
+  sagemaker_options_.push_back(
+      {OPTION_SAGEMAKER_SAFE_PORT_RANGE, "sagemaker-safe-port-range",
+       "<integer>-<integer>",
+       "Set the allowed port range for endpoints other than the SageMaker "
+       "endpoints."});
+  sagemaker_options_.push_back(
+      {OPTION_SAGEMAKER_THREAD_COUNT, "sagemaker-thread-count", Option::ArgInt,
+       "Number of threads handling Sagemaker requests. Default is 8."});
+#endif  // TRITON_ENABLE_SAGEMAKER
+
+#if defined(TRITON_ENABLE_VERTEX_AI)
+  vertex_options_.push_back(
+      {OPTION_ALLOW_VERTEX_AI, "allow-vertex-ai", Option::ArgBool,
+       "Allow the server to listen for Vertex AI requests. Default is true if "
+       "AIP_MODE=PREDICTION, false otherwise."});
+  vertex_options_.push_back(
+      {OPTION_VERTEX_AI_PORT, "vertex-ai-port", Option::ArgInt,
+       "The port for the server to listen on for Vertex AI requests. Default "
+       "is AIP_HTTP_PORT if set, 8080 otherwise."});
+  vertex_options_.push_back(
+      {OPTION_VERTEX_AI_THREAD_COUNT, "vertex-ai-thread-count", Option::ArgInt,
+       "Number of threads handling Vertex AI requests. Default is 8."});
+  vertex_options_.push_back(
+      {OPTION_VERTEX_AI_DEFAULT_MODEL, "vertex-ai-default-model",
+       Option::ArgStr,
+       "The name of the model to use for single-model inference requests."});
+#endif  // TRITON_ENABLE_VERTEX_AI
+
+#if defined(TRITON_ENABLE_METRICS)
+  metric_options_.push_back(
+      {OPTION_ALLOW_METRICS, "allow-metrics", Option::ArgBool,
+       "Allow the server to provide prometheus metrics."});
+  metric_options_.push_back(
+      {OPTION_ALLOW_GPU_METRICS, "allow-gpu-metrics", Option::ArgBool,
+       "Allow the server to provide GPU metrics. Ignored unless "
+       "--allow-metrics is true."});
+  metric_options_.push_back(
+      {OPTION_ALLOW_CPU_METRICS, "allow-cpu-metrics", Option::ArgBool,
+       "Allow the server to provide CPU metrics. Ignored unless "
+       "--allow-metrics is true."});
+  metric_options_.push_back(
+      {OPTION_METRICS_ADDRESS, "metrics-address", Option::ArgStr,
+       "The address for the metrics server to bind to. Default is the same as "
+       "--http-address if built with HTTP support. Otherwise, default is "
+       "0.0.0.0"});
+  metric_options_.push_back(
+      {OPTION_METRICS_PORT, "metrics-port", Option::ArgInt,
+       "The port reporting prometheus metrics. Default is 8002."});
+  metric_options_.push_back(
+      {OPTION_METRICS_INTERVAL_MS, "metrics-interval-ms", Option::ArgFloat,
+       "Metrics will be collected once every <metrics-interval-ms> "
+       "milliseconds. Default is 2000 milliseconds."});
+  metric_options_.push_back(
+      {OPTION_METRICS_CONFIG, "metrics-config", "<string>=<string>",
+       "Specify a metrics-specific configuration setting. The format of this "
+       "flag is --metrics-config=<setting>=<value>. It can be specified "
+       "multiple times."});
+#endif  // TRITON_ENABLE_METRICS
+
+#ifdef TRITON_ENABLE_TRACING
+  tracing_options_.push_back(
+      {OPTION_TRACE_CONFIG, "trace-config", "<string>,<string>=<string>",
+       "Specify global or trace mode specific configuration setting. "
+       "The format of this flag is --trace-config "
+       "<mode>,<setting>=<value>. "
+       "Where <mode> is either \"triton\" or \"opentelemetry\". "
+       "The default is \"triton\". To specify global trace settings "
+       "(level, rate, count, or mode), the format would be "
+       "--trace-config <setting>=<value>. For \"triton\" mode, the server will "
+       "use "
+       "Triton's Trace APIs. For \"opentelemetry\" mode, the server will use "
+       "OpenTelemetry's APIs to generate, collect and export traces for "
+       "individual inference requests."});
+#endif  // TRITON_ENABLE_TRACING
+
+  cache_options_.push_back(
+      {OPTION_CACHE_CONFIG, "cache-config", "<string>,<string>=<string>",
+       "Specify a cache-specific configuration setting. The format of this "
+       "flag is --cache-config=<cache_name>,<setting>=<value>. Where "
+       "<cache_name> is the name of the cache, such as 'local' or 'redis'. "
+       "Example: --cache-config=local,size=1048576 will configure a 'local' "
+       "cache implementation with a fixed buffer pool of size 1048576 bytes."});
+  cache_options_.push_back(
+      {OPTION_CACHE_DIR, "cache-directory", Option::ArgStr,
+       "The global directory searched for cache shared libraries. Default is "
+       "'/opt/tritonserver/caches'. This directory is expected to contain a "
+       "cache implementation as a shared library with the name "
+       "'libtritoncache.so'."});
+
+
+  rate_limiter_options_.push_back(
+      // FIXME:  fix the default to execution_count once RL logic is complete.
+      {OPTION_RATE_LIMIT, "rate-limit", Option::ArgStr,
+       "Specify the mode for rate limiting. Options are \"execution_count\" "
+       "and \"off\". The default is \"off\". For "
+       "\"execution_count\", the server will determine the instance using "
+       "configured priority and the number of time the instance has been "
+       "used to run inference. The inference will finally be executed once "
+       "the required resources are available. For \"off\", the server will "
+       "ignore any rate limiter config and run inference as soon as an "
+       "instance is ready."});
+  rate_limiter_options_.push_back(
+      {OPTION_RATE_LIMIT_RESOURCE, "rate-limit-resource",
+       "<string>:<integer>:<integer>",
+       "The number of resources available to the server. The format of this "
+       "flag is --rate-limit-resource=<resource_name>:<count>:<device>. The "
+       "<device> is optional and if not listed will be applied to every "
+       "device. If the resource is specified as \"GLOBAL\" in the model "
+       "configuration the resource is considered shared among all the devices "
+       "in the system. The <device> property is ignored for such resources. "
+       "This flag can be specified multiple times to specify each resources "
+       "and their availability. By default, the max across all instances that "
+       "list the resource is selected as its availability. The values for this "
+       "flag is case-insensitive."});
+
+  memory_device_options_.push_back(
+      {OPTION_PINNED_MEMORY_POOL_BYTE_SIZE, "pinned-memory-pool-byte-size",
+       Option::ArgInt,
+       "The total byte size that can be allocated as pinned system memory. "
+       "If GPU support is enabled, the server will allocate pinned system "
+       "memory to accelerate data transfer between host and devices until it "
+       "exceeds the specified byte size. If 'numa-node' is configured via "
+       "--host-policy, the pinned system memory of the pool size will be "
+       "allocated on each numa node. This option will not affect the "
+       "allocation conducted by the backend frameworks. Default is 256 MB."});
+  memory_device_options_.push_back(
+      {OPTION_CUDA_MEMORY_POOL_BYTE_SIZE, "cuda-memory-pool-byte-size",
+       "<integer>:<integer>",
+       "The total byte size that can be allocated as CUDA memory for the GPU "
+       "device. If GPU support is enabled, the server will allocate CUDA "
+       "memory to minimize data transfer between host and devices until it "
+       "exceeds the specified byte size. This option will not affect the "
+       "allocation conducted by the backend frameworks. The argument should be "
+       "2 integers separated by colons in the format "
+       "<GPU device ID>:<pool byte size>. This option can be used multiple "
+       "times, but only once per GPU device. Subsequent uses will overwrite "
+       "previous uses for the same GPU device. Default is 64 MB."});
+  memory_device_options_.push_back(
+      {OPTION_CUDA_VIRTUAL_ADDRESS_SIZE, "cuda-virtual-address-size",
+       "<integer>:<integer>",
+       "The total CUDA virtual address size that will be used for each "
+       "implicit state when growable memory is used. This value determines "
+       "the maximum size of each implicit state. The state size cannot go "
+       "beyond this value. The argument should be "
+       "2 integers separated by colons in the format "
+       "<GPU device ID>:<CUDA virtual address size>. This option can be used "
+       "multiple "
+       "times, but only once per GPU device. Subsequent uses will overwrite "
+       "previous uses for the same GPU device. Default is 1 GB."});
+  memory_device_options_.push_back(
+      {OPTION_MIN_SUPPORTED_COMPUTE_CAPABILITY,
+       "min-supported-compute-capability", Option::ArgFloat,
+       "The minimum supported CUDA compute capability. GPUs that don't support "
+       "this compute capability will not be used by the server."});
+  memory_device_options_.push_back(
+      {OPTION_BUFFER_MANAGER_THREAD_COUNT, "buffer-manager-thread-count",
+       Option::ArgInt,
+       "The number of threads used to accelerate copies and other operations "
+       "required to manage input and output tensor contents. Default is 0."});
+  memory_device_options_.push_back(
+      {OPTION_HOST_POLICY, "host-policy", "<string>,<string>=<string>",
+       "Specify a host policy setting associated with a policy name. The "
+       "format of this flag is --host-policy=<policy_name>,<setting>=<value>. "
+       "Currently supported settings are 'numa-node', 'cpu-cores'. Note that "
+       "'numa-node' setting will affect pinned memory pool behavior, see "
+       "--pinned-memory-pool for more detail."});
+  memory_device_options_.push_back(
+      {OPTION_MODEL_LOAD_GPU_LIMIT, "model-load-gpu-limit",
+       "<device_id>:<fraction>",
+       "Specify the limit on GPU memory usage as a fraction. If model loading "
+       "on the device is requested and the current memory usage exceeds the "
+       "limit, the load will be rejected. If not specified, the limit will "
+       "not be set."});
+
+  backend_options_.push_back(
+      {OPTION_BACKEND_DIR, "backend-directory", Option::ArgStr,
+       "The global directory searched for backend shared libraries. Default is "
+       "'/opt/tritonserver/backends'."});
+  backend_options_.push_back(
+      {OPTION_BACKEND_CONFIG, "backend-config", "<string>,<string>=<string>",
+       "Specify a backend-specific configuration setting. The format of this "
+       "flag is --backend-config=<backend_name>,<setting>=<value>. Where "
+       "<backend_name> is the name of the backend, such as 'tensorrt'."});
+
+  repo_agent_options_.push_back(
+      {OPTION_REPOAGENT_DIR, "repoagent-directory", Option::ArgStr,
+       "The global directory searched for repository agent shared libraries. "
+       "Default is '/opt/tritonserver/repoagents'."});
+
+  // Deprecations
+  deprecated_options_.push_back(
+      {OPTION_STRICT_MODEL_CONFIG, "strict-model-config", Option::ArgBool,
+       "DEPRECATED: If true model configuration files must be provided and all "
+       "required "
+       "configuration settings must be specified. If false the model "
+       "configuration may be absent or only partially specified and the "
+       "server will attempt to derive the missing required configuration."});
+  deprecated_options_.push_back(
+      {OPTION_RESPONSE_CACHE_BYTE_SIZE, "response-cache-byte-size",
+       Option::ArgInt, "DEPRECATED: Please use --cache-config instead."});
+#ifdef TRITON_ENABLE_TRACING
+  deprecated_options_.push_back(
+      {OPTION_TRACE_FILEPATH, "trace-file", Option::ArgStr,
+       "DEPRECATED: Please use --trace-config triton,file=<path/to/your/file>"
+       " Set the file where trace output will be saved. If "
+       "--trace-log-frequency"
+       " is also specified, this argument value will be the prefix of the files"
+       " to save the trace output. See --trace-log-frequency for detail."});
+  deprecated_options_.push_back(
+      {OPTION_TRACE_LEVEL, "trace-level", Option::ArgStr,
+       "DEPRECATED: Please use --trace-config level=<OFF|TIMESTAMPS|TENSORS>"
+       "Specify a trace level. OFF to disable tracing, TIMESTAMPS to "
+       "trace timestamps, TENSORS to trace tensors. It may be specified "
+       "multiple times to trace multiple information. Default is OFF."});
+  deprecated_options_.push_back(
+      {OPTION_TRACE_RATE, "trace-rate", Option::ArgInt,
+       "DEPRECATED: Please use --trace-config rate=<rate value>"
+       "Set the trace sampling rate. Default is 1000."});
+  deprecated_options_.push_back(
+      {OPTION_TRACE_COUNT, "trace-count", Option::ArgInt,
+       "DEPRECATED: Please use --trace-config count=<count value>"
+       "Set the number of traces to be sampled. If the value is -1, the number "
+       "of traces to be sampled will not be limited. Default is -1."});
+  deprecated_options_.push_back(
+      {OPTION_TRACE_LOG_FREQUENCY, "trace-log-frequency", Option::ArgInt,
+       "DEPRECATED: Please use --trace-config triton,log-frequency=<value>"
+       "Set the trace log frequency. If the value is 0, Triton will only log "
+       "the trace output to <trace-file> when shutting down. Otherwise, Triton "
+       "will log the trace output to <trace-file>.<idx> when it collects the "
+       "specified number of traces. For example, if the log frequency is 100, "
+       "when Triton collects the 100-th trace, it logs the traces to file "
+       "<trace-file>.0, and when it collects the 200-th trace, it logs the "
+       "101-th to the 200-th traces to file <trace-file>.1. Default is 0."});
+#endif  // TRITON_ENABLE_TRACING
+}
+
+void
+TritonParser::SetupOptionGroups()
+{
+  SetupOptions();
+  option_groups_.emplace_back(GLOBAL_OPTION_GROUP, global_options_);
+  option_groups_.emplace_back("Server", server_options_);
+  option_groups_.emplace_back("Logging", logging_options_);
+  option_groups_.emplace_back("Model Repository", model_repo_options_);
+  option_groups_.emplace_back("HTTP", http_options_);
+  option_groups_.emplace_back("GRPC", grpc_options_);
+  option_groups_.emplace_back("Sagemaker", sagemaker_options_);
+  option_groups_.emplace_back("Vertex", vertex_options_);
+  option_groups_.emplace_back("Metrics", metric_options_);
+  option_groups_.emplace_back("Tracing", tracing_options_);
+  option_groups_.emplace_back("Backend", backend_options_);
+  option_groups_.emplace_back("Repository Agent", repo_agent_options_);
+  option_groups_.emplace_back("Response Cache", cache_options_);
+  option_groups_.emplace_back("Rate Limiter", rate_limiter_options_);
+  option_groups_.emplace_back(
+      "Memory/Device Management", memory_device_options_);
+  option_groups_.emplace_back("DEPRECATED", deprecated_options_);
+}
+
+TritonParser::TritonParser()
+{
+  SetupOptionGroups();
+}
+
+void
+TritonServerParameters::CheckPortCollision()
+{
+  // [FIXME] try to make this function endpoint type agnostic
+  // List of enabled services and their constraints
+  std::vector<
+      std::tuple<std::string, std::string, int32_t, bool, int32_t, int32_t>>
+      ports;
+#ifdef TRITON_ENABLE_HTTP
+  if (allow_http_) {
+    ports.emplace_back("HTTP", http_address_, http_port_, false, -1, -1);
+  }
+#endif  // TRITON_ENABLE_HTTP
+#ifdef TRITON_ENABLE_GRPC
+  if (allow_grpc_) {
+    ports.emplace_back(
+        "GRPC", grpc_options_.socket_.address_, grpc_options_.socket_.port_,
+        false, -1, -1);
+  }
+#endif  // TRITON_ENABLE_GRPC
+#ifdef TRITON_ENABLE_METRICS
+  if (allow_metrics_) {
+    ports.emplace_back(
+        "metrics", metrics_address_, metrics_port_, false, -1, -1);
+  }
+#endif  // TRITON_ENABLE_METRICS
+#ifdef TRITON_ENABLE_SAGEMAKER
+  if (allow_sagemaker_) {
+    ports.emplace_back(
+        "SageMaker", sagemaker_address_, sagemaker_port_,
+        sagemaker_safe_range_set_, sagemaker_safe_range_.first,
+        sagemaker_safe_range_.second);
+  }
+#endif  // TRITON_ENABLE_SAGEMAKER
+#ifdef TRITON_ENABLE_VERTEX_AI
+  if (allow_vertex_ai_) {
+    ports.emplace_back(
+        "Vertex AI", vertex_ai_address_, vertex_ai_port_, false, -1, -1);
+  }
+#endif  // TRITON_ENABLE_VERTEX_AI
+
+  for (auto curr_it = ports.begin(); curr_it != ports.end(); ++curr_it) {
+    // If the current service doesn't specify the allow port range for other
+    // services, then we don't need to revisit the checked services
+    auto comparing_it = (std::get<3>(*curr_it)) ? ports.begin() : (curr_it + 1);
+    for (; comparing_it != ports.end(); ++comparing_it) {
+      if (comparing_it == curr_it) {
+        continue;
+      }
+      if (std::get<1>(*curr_it) != std::get<1>(*comparing_it)) {
+        continue;
+      }
+      // Set range and comparing service port is out of range
+      if (std::get<3>(*curr_it) &&
+          ((std::get<2>(*comparing_it) < std::get<4>(*curr_it)) ||
+           (std::get<2>(*comparing_it) > std::get<5>(*curr_it)))) {
+        std::stringstream ss;
+        ss << "The server cannot listen to " << std::get<0>(*comparing_it)
+           << " requests at port " << std::get<2>(*comparing_it)
+           << ", allowed port range is [" << std::get<4>(*curr_it) << ", "
+           << std::get<5>(*curr_it) << "]" << std::endl;
+        throw ParseException(ss.str());
+      }
+      if (std::get<2>(*curr_it) == std::get<2>(*comparing_it)) {
+        std::stringstream ss;
+        ss << "The server cannot listen to " << std::get<0>(*curr_it)
+           << " requests "
+           << "and " << std::get<0>(*comparing_it)
+           << " requests at the same address and port " << std::get<1>(*curr_it)
+           << ":" << std::get<2>(*curr_it) << std::endl;
+        throw ParseException(ss.str());
+      }
+    }
+  }
+}
+
+TritonServerParameters::ManagedTritonServerOptionPtr
+TritonServerParameters::BuildTritonServerOptions()
+{
+  TRITONSERVER_ServerOptions* loptions = nullptr;
+  THROW_IF_ERR(
+      ParseException, TRITONSERVER_ServerOptionsNew(&loptions),
+      "creating server options");
+  ManagedTritonServerOptionPtr managed_ptr(
+      loptions, TRITONSERVER_ServerOptionsDelete);
+  THROW_IF_ERR(
+      ParseException,
+      TRITONSERVER_ServerOptionsSetServerId(loptions, server_id_.c_str()),
+      "setting server ID");
+  for (const auto& model_repository_path : model_repository_paths_) {
+    THROW_IF_ERR(
+        ParseException,
+        TRITONSERVER_ServerOptionsSetModelRepositoryPath(
+            loptions, model_repository_path.c_str()),
+        "setting model repository path");
+  }
+  THROW_IF_ERR(
+      ParseException,
+      TRITONSERVER_ServerOptionsSetModelControlMode(loptions, control_mode_),
+      "setting model control mode");
+  for (const auto& model : startup_models_) {
+    THROW_IF_ERR(
+        ParseException,
+        TRITONSERVER_ServerOptionsSetStartupModel(loptions, model.c_str()),
+        "setting startup model");
+  }
+  THROW_IF_ERR(
+      ParseException,
+      TRITONSERVER_ServerOptionsSetRateLimiterMode(loptions, rate_limit_mode_),
+      "setting rate limiter configuration");
+  for (const auto& resource : rate_limit_resources_) {
+    THROW_IF_ERR(
+        ParseException,
+        TRITONSERVER_ServerOptionsAddRateLimiterResource(
+            loptions, std::get<0>(resource).c_str(), std::get<1>(resource),
+            std::get<2>(resource)),
+        "setting rate limiter resource");
+  }
+  THROW_IF_ERR(
+      ParseException,
+      TRITONSERVER_ServerOptionsSetPinnedMemoryPoolByteSize(
+          loptions, pinned_memory_pool_byte_size_),
+      "setting total pinned memory byte size");
+  for (const auto& cuda_pool : cuda_pools_) {
+    THROW_IF_ERR(
+        ParseException,
+        TRITONSERVER_ServerOptionsSetCudaMemoryPoolByteSize(
+            loptions, cuda_pool.first, cuda_pool.second),
+        "setting total CUDA memory byte size");
+  }
+  for (const auto& cuda_virtual_address_size : cuda_virtual_address_size_) {
+    THROW_IF_ERR(
+        ParseException,
+        TRITONSERVER_ServerOptionsSetCudaVirtualAddressSize(
+            loptions, cuda_virtual_address_size.first,
+            cuda_virtual_address_size.second),
+        "setting total CUDA virtual address size");
+  }
+  THROW_IF_ERR(
+      ParseException,
+      TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability(
+          loptions, min_supported_compute_capability_),
+      "setting minimum supported CUDA compute capability");
+  THROW_IF_ERR(
+      ParseException,
+      TRITONSERVER_ServerOptionsSetExitOnError(loptions, exit_on_error_),
+      "setting exit on error");
+  THROW_IF_ERR(
+      ParseException,
+      TRITONSERVER_ServerOptionsSetStrictModelConfig(
+          loptions, strict_model_config_),
+      "setting strict model configuration");
+  THROW_IF_ERR(
+      ParseException,
+      TRITONSERVER_ServerOptionsSetStrictReadiness(loptions, strict_readiness_),
+      "setting strict readiness");
+  // [FIXME] std::max seems to be part of Parse()
+  THROW_IF_ERR(
+      ParseException,
+      TRITONSERVER_ServerOptionsSetExitTimeout(
+          loptions, std::max(0, exit_timeout_secs_)),
+      "setting exit timeout");
+  THROW_IF_ERR(
+      ParseException,
+      TRITONSERVER_ServerOptionsSetBufferManagerThreadCount(
+          loptions, std::max(0, buffer_manager_thread_count_)),
+      "setting buffer manager thread count");
+  THROW_IF_ERR(
+      ParseException,
+      TRITONSERVER_ServerOptionsSetModelLoadThreadCount(
+          loptions, std::max(1u, model_load_thread_count_)),
+      "setting model load thread count");
+  THROW_IF_ERR(
+      ParseException,
+      TRITONSERVER_ServerOptionsSetModelLoadRetryCount(
+          loptions, std::max(0u, model_load_retry_count_)),
+      "setting model load retry count");
+  THROW_IF_ERR(
+      ParseException,
+      TRITONSERVER_ServerOptionsSetModelNamespacing(
+          loptions, enable_model_namespacing_),
+      "setting model namespacing");
+
+#ifdef TRITON_ENABLE_LOGGING
+  TRITONSERVER_ServerOptionsSetLogFile(loptions, log_file_.c_str());
+  THROW_IF_ERR(
+      ParseException, TRITONSERVER_ServerOptionsSetLogInfo(loptions, log_info_),
+      "setting log info enable");
+  THROW_IF_ERR(
+      ParseException, TRITONSERVER_ServerOptionsSetLogWarn(loptions, log_warn_),
+      "setting log warn enable");
+  THROW_IF_ERR(
+      ParseException,
+      TRITONSERVER_ServerOptionsSetLogError(loptions, log_error_),
+      "setting log error enable");
+  THROW_IF_ERR(
+      ParseException,
+      TRITONSERVER_ServerOptionsSetLogVerbose(loptions, log_verbose_),
+      "setting log verbose level");
+  switch (log_format_) {
+    case triton::common::Logger::Format::kDEFAULT:
+      THROW_IF_ERR(
+          ParseException,
+          TRITONSERVER_ServerOptionsSetLogFormat(
+              loptions, TRITONSERVER_LOG_DEFAULT),
+          "setting log format");
+      break;
+    case triton::common::Logger::Format::kISO8601:
+      THROW_IF_ERR(
+          ParseException,
+          TRITONSERVER_ServerOptionsSetLogFormat(
+              loptions, TRITONSERVER_LOG_ISO8601),
+          "setting log format");
+      break;
+  }
+#endif  // TRITON_ENABLE_LOGGING
+
+#ifdef TRITON_ENABLE_METRICS
+  THROW_IF_ERR(
+      ParseException,
+      TRITONSERVER_ServerOptionsSetMetrics(loptions, allow_metrics_),
+      "setting metrics enable");
+  THROW_IF_ERR(
+      ParseException,
+      TRITONSERVER_ServerOptionsSetGpuMetrics(loptions, allow_gpu_metrics_),
+      "setting GPU metrics enable");
+  THROW_IF_ERR(
+      ParseException,
+      TRITONSERVER_ServerOptionsSetCpuMetrics(loptions, allow_cpu_metrics_),
+      "setting CPU metrics enable");
+  THROW_IF_ERR(
+      ParseException,
+      TRITONSERVER_ServerOptionsSetMetricsInterval(
+          loptions, metrics_interval_ms_),
+      "setting metrics interval");
+  for (const auto& mcs : metrics_config_settings_) {
+    THROW_IF_ERR(
+        ParseException,
+        TRITONSERVER_ServerOptionsSetMetricsConfig(
+            loptions, std::get<0>(mcs).c_str(), std::get<1>(mcs).c_str(),
+            std::get<2>(mcs).c_str()),
+        "setting metrics configuration");
+  }
+
+#endif  // TRITON_ENABLE_METRICS
+
+  THROW_IF_ERR(
+      ParseException,
+      TRITONSERVER_ServerOptionsSetBackendDirectory(
+          loptions, backend_dir_.c_str()),
+      "setting backend directory");
+
+  // Enable cache and configure it if a cache CLI arg is passed,
+  // this will allow for an empty configuration.
+  if (enable_cache_) {
+    THROW_IF_ERR(
+        ParseException,
+        TRITONSERVER_ServerOptionsSetCacheDirectory(
+            loptions, cache_dir_.c_str()),
+        "setting cache directory");
+
+    for (const auto& cache_pair : cache_config_settings_) {
+      const auto& cache_name = cache_pair.first;
+      const auto& settings = cache_pair.second;
+      const auto& json_config_str = PairsToJsonStr(settings);
+      THROW_IF_ERR(
+          ParseException,
+          TRITONSERVER_ServerOptionsSetCacheConfig(
+              loptions, cache_name.c_str(), json_config_str.c_str()),
+          "setting cache configuration");
+    }
+  }
+
+  THROW_IF_ERR(
+      ParseException,
+      TRITONSERVER_ServerOptionsSetRepoAgentDirectory(
+          loptions, repoagent_dir_.c_str()),
+      "setting repository agent directory");
+  for (const auto& bcs : backend_config_settings_) {
+    THROW_IF_ERR(
+        ParseException,
+        TRITONSERVER_ServerOptionsSetBackendConfig(
+            loptions, std::get<0>(bcs).c_str(), std::get<1>(bcs).c_str(),
+            std::get<2>(bcs).c_str()),
+        "setting backend configuration");
+  }
+  for (const auto& limit : load_gpu_limit_) {
+    THROW_IF_ERR(
+        ParseException,
+        TRITONSERVER_ServerOptionsSetModelLoadDeviceLimit(
+            loptions, TRITONSERVER_INSTANCEGROUPKIND_GPU, limit.first,
+            limit.second),
+        "setting model load GPU limit");
+  }
+  for (const auto& hp : host_policies_) {
+    THROW_IF_ERR(
+        ParseException,
+        TRITONSERVER_ServerOptionsSetHostPolicy(
+            loptions, std::get<0>(hp).c_str(), std::get<1>(hp).c_str(),
+            std::get<2>(hp).c_str()),
+        "setting host policy");
+  }
+  return managed_ptr;
+}
+
+std::pair<TritonServerParameters, std::vector<char*>>
+TritonParser::Parse(int argc, char** argv)
+{
+  //
+  // Step 1. Before parsing setup
+  //
+  TritonServerParameters lparams;
+  bool strict_model_config_present{false};
+  bool disable_auto_complete_config{false};
+  bool cache_size_present{false};
+  bool cache_config_present{false};
+#ifdef TRITON_ENABLE_TRACING
+  bool explicit_disable_trace{false};
+  bool trace_filepath_present{false};
+  bool trace_level_present{false};
+  bool trace_rate_present{false};
+  bool trace_count_present{false};
+  bool trace_log_frequency_present{false};
+#endif  // TRITON_ENABLE_TRACING
+  int option_index = 0;
+
+#ifdef TRITON_ENABLE_GRPC
+  triton::server::grpc::Options& lgrpc_options = lparams.grpc_options_;
+#endif  // TRITON_ENABLE_GRPC
+
+#if defined TRITON_ENABLE_HTTP || defined TRITON_ENABLE_GRPC
+  // According to HTTP specification header names are case-insensitive.
+  const std::string case_insensitive_prefix{"(?i)"};
+#endif  // TRITON_ENABLE_HTTP || TRITON_ENABLE_GRPC
+
+#ifdef TRITON_ENABLE_VERTEX_AI
+  // Set different default value if specific flag is set
+  {
+    auto aip_mode =
+        triton::server::GetEnvironmentVariableOrDefault("AIP_MODE", "");
+    // Enable Vertex AI service and disable HTTP / GRPC service by default
+    // if detecting Vertex AI environment
+    if (aip_mode == "PREDICTION") {
+      lparams.allow_vertex_ai_ = true;
+#ifdef TRITON_ENABLE_HTTP
+      lparams.allow_http_ = false;
+#endif  // TRITON_ENABLE_HTTP
+#ifdef TRITON_ENABLE_GRPC
+      lparams.allow_grpc_ = false;
+#endif  // TRITON_ENABLE_GRPC
+    }
+    auto port = triton::server::GetEnvironmentVariableOrDefault(
+        "AIP_HTTP_PORT", "8080");
+    lparams.vertex_ai_port_ = ParseOption<int>(port);
+  }
+#endif  // TRITON_ENABLE_VERTEX_AI
+
+  //
+  // Step 2. parse options
+  //
+  std::vector<struct option> long_options;
+  for (const auto& group : option_groups_) {
+    for (const auto& o : group.second) {
+      long_options.push_back(o.GetLongOption());
+    }
+  }
+  long_options.push_back({nullptr, 0, nullptr, 0});
+
+  int flag;
+  while ((flag = getopt_long(
+              argc, argv, "", &long_options[0], &option_index)) != -1) {
+    try {
+      switch (flag) {
+        case OPTION_HELP:
+          // [FIXME] how help is printed?
+        case '?':
+          // [FIXME] fall through when seeing this, currently consumes all
+          // options [FIXME] disable stderr output of `getopt_long`
+          throw ParseException();
+#ifdef TRITON_ENABLE_LOGGING
+        case OPTION_LOG_VERBOSE:
+          lparams.log_verbose_ = ParseIntBoolOption(optarg);
+          break;
+        case OPTION_LOG_INFO:
+          lparams.log_info_ = ParseOption<bool>(optarg);
+          break;
+        case OPTION_LOG_WARNING:
+          lparams.log_warn_ = ParseOption<bool>(optarg);
+          break;
+        case OPTION_LOG_ERROR:
+          lparams.log_error_ = ParseOption<bool>(optarg);
+          break;
+        case OPTION_LOG_FORMAT: {
+          std::string format_str(optarg);
+          if (format_str == "default") {
+            lparams.log_format_ = triton::common::Logger::Format::kDEFAULT;
+          } else if (format_str == "ISO8601") {
+            lparams.log_format_ = triton::common::Logger::Format::kISO8601;
+          } else {
+            throw ParseException("invalid argument for --log-format");
+          }
+          break;
+        }
+        case OPTION_LOG_FILE:
+          lparams.log_file_ = optarg;
+          break;
+#endif  // TRITON_ENABLE_LOGGING
+
+        case OPTION_ID:
+          lparams.server_id_ = optarg;
+          break;
+        case OPTION_MODEL_REPOSITORY:
+          lparams.model_repository_paths_.insert(optarg);
+          break;
+        case OPTION_EXIT_ON_ERROR:
+          lparams.exit_on_error_ = ParseOption<bool>(optarg);
+          break;
+        case OPTION_DISABLE_AUTO_COMPLETE_CONFIG:
+          disable_auto_complete_config = true;
+          break;
+        case OPTION_STRICT_MODEL_CONFIG:
+          std::cerr << "Warning: '--strict-model-config' has been deprecated! "
+                       "Please use '--disable-auto-complete-config' instead."
+                    << std::endl;
+          strict_model_config_present = true;
+          lparams.strict_model_config_ = ParseOption<bool>(optarg);
+          break;
+        case OPTION_STRICT_READINESS:
+          lparams.strict_readiness_ = ParseOption<bool>(optarg);
+          break;
+
+#ifdef TRITON_ENABLE_HTTP
+        case OPTION_ALLOW_HTTP:
+          lparams.allow_http_ = ParseOption<bool>(optarg);
+          break;
+        case OPTION_HTTP_PORT:
+          lparams.http_port_ = ParseOption<int>(optarg);
+          break;
+        case OPTION_REUSE_HTTP_PORT:
+          lparams.reuse_http_port_ = ParseOption<bool>(optarg);
+          break;
+        case OPTION_HTTP_ADDRESS:
+          lparams.http_address_ = optarg;
+          break;
+        case OPTION_HTTP_HEADER_FORWARD_PATTERN:
+          lparams.http_forward_header_pattern_ =
+              std::move(case_insensitive_prefix + optarg);
+          break;
+          break;
+        case OPTION_HTTP_THREAD_COUNT:
+          lparams.http_thread_cnt_ = ParseOption<int>(optarg);
+          break;
+        case OPTION_HTTP_RESTRICTED_API:
+          ParseRestrictedFeatureOption(
+              optarg, long_options[option_index].name, "", "api",
+              lparams.http_restricted_apis_);
+          break;
+
+#endif  // TRITON_ENABLE_HTTP
+
+#ifdef TRITON_ENABLE_SAGEMAKER
+        case OPTION_ALLOW_SAGEMAKER:
+          lparams.allow_sagemaker_ = ParseOption<bool>(optarg);
+          break;
+        case OPTION_SAGEMAKER_PORT:
+          lparams.sagemaker_port_ = ParseOption<int>(optarg);
+          break;
+        case OPTION_SAGEMAKER_SAFE_PORT_RANGE:
+          lparams.sagemaker_safe_range_set_ = true;
+          lparams.sagemaker_safe_range_ =
+              ParsePairOption<int, int>(optarg, "-");
+          break;
+        case OPTION_SAGEMAKER_THREAD_COUNT:
+          lparams.sagemaker_thread_cnt_ = ParseOption<int>(optarg);
+          break;
+#endif  // TRITON_ENABLE_SAGEMAKER
+
+#ifdef TRITON_ENABLE_VERTEX_AI
+        case OPTION_ALLOW_VERTEX_AI:
+          lparams.allow_vertex_ai_ = ParseOption<bool>(optarg);
+          break;
+        case OPTION_VERTEX_AI_PORT:
+          lparams.vertex_ai_port_ = ParseOption<int>(optarg);
+          break;
+        case OPTION_VERTEX_AI_THREAD_COUNT:
+          lparams.vertex_ai_thread_cnt_ = ParseOption<int>(optarg);
+          break;
+        case OPTION_VERTEX_AI_DEFAULT_MODEL:
+          lparams.vertex_ai_default_model_ = optarg;
+          break;
+#endif  // TRITON_ENABLE_VERTEX_AI
+
+#ifdef TRITON_ENABLE_GRPC
+        case OPTION_ALLOW_GRPC:
+          lparams.allow_grpc_ = ParseOption<bool>(optarg);
+          break;
+        case OPTION_GRPC_PORT:
+          lgrpc_options.socket_.port_ = ParseOption<int>(optarg);
+          break;
+        case OPTION_REUSE_GRPC_PORT:
+          lgrpc_options.socket_.reuse_port_ = ParseOption<bool>(optarg);
+          break;
+        case OPTION_GRPC_ADDRESS:
+          lgrpc_options.socket_.address_ = optarg;
+          break;
+        case OPTION_GRPC_INFER_ALLOCATION_POOL_SIZE:
+          lgrpc_options.infer_allocation_pool_size_ = ParseOption<int>(optarg);
+          break;
+        case OPTION_GRPC_USE_SSL:
+          lgrpc_options.ssl_.use_ssl_ = ParseOption<bool>(optarg);
+          break;
+        case OPTION_GRPC_USE_SSL_MUTUAL:
+          lgrpc_options.ssl_.use_mutual_auth_ = ParseOption<bool>(optarg);
+          lgrpc_options.ssl_.use_ssl_ = true;
+          break;
+        case OPTION_GRPC_SERVER_CERT:
+          lgrpc_options.ssl_.server_cert_ = optarg;
+          break;
+        case OPTION_GRPC_SERVER_KEY:
+          lgrpc_options.ssl_.server_key_ = optarg;
+          break;
+        case OPTION_GRPC_ROOT_CERT:
+          lgrpc_options.ssl_.root_cert_ = optarg;
+          break;
+        case OPTION_GRPC_RESPONSE_COMPRESSION_LEVEL: {
+          std::string mode_str(optarg);
+          std::transform(
+              mode_str.begin(), mode_str.end(), mode_str.begin(), ::tolower);
+          if (mode_str == "none") {
+            lgrpc_options.infer_compression_level_ = GRPC_COMPRESS_LEVEL_NONE;
+          } else if (mode_str == "low") {
+            lgrpc_options.infer_compression_level_ = GRPC_COMPRESS_LEVEL_LOW;
+          } else if (mode_str == "medium") {
+            lgrpc_options.infer_compression_level_ = GRPC_COMPRESS_LEVEL_MED;
+          } else if (mode_str == "high") {
+            lgrpc_options.infer_compression_level_ = GRPC_COMPRESS_LEVEL_HIGH;
+          } else {
+            throw ParseException(
+                "invalid argument for "
+                "--grpc_infer_response_compression_level");
+          }
+          break;
+        }
+        case OPTION_GRPC_ARG_KEEPALIVE_TIME_MS:
+          lgrpc_options.keep_alive_.keepalive_time_ms_ =
+              ParseOption<int>(optarg);
+          break;
+        case OPTION_GRPC_ARG_KEEPALIVE_TIMEOUT_MS:
+          lgrpc_options.keep_alive_.keepalive_timeout_ms_ =
+              ParseOption<int>(optarg);
+          break;
+        case OPTION_GRPC_ARG_KEEPALIVE_PERMIT_WITHOUT_CALLS:
+          lgrpc_options.keep_alive_.keepalive_permit_without_calls_ =
+              ParseOption<bool>(optarg);
+          break;
+        case OPTION_GRPC_ARG_HTTP2_MAX_PINGS_WITHOUT_DATA:
+          lgrpc_options.keep_alive_.http2_max_pings_without_data_ =
+              ParseOption<int>(optarg);
+          break;
+        case OPTION_GRPC_ARG_HTTP2_MIN_RECV_PING_INTERVAL_WITHOUT_DATA_MS:
+          lgrpc_options.keep_alive_
+              .http2_min_recv_ping_interval_without_data_ms_ =
+              ParseOption<int>(optarg);
+          break;
+        case OPTION_GRPC_ARG_HTTP2_MAX_PING_STRIKES:
+          lgrpc_options.keep_alive_.http2_max_ping_strikes_ =
+              ParseOption<int>(optarg);
+          break;
+        case OPTION_GRPC_ARG_MAX_CONNECTION_AGE_MS:
+          lgrpc_options.keep_alive_.max_connection_age_ms_ =
+              ParseOption<int>(optarg);
+          break;
+        case OPTION_GRPC_ARG_MAX_CONNECTION_AGE_GRACE_MS:
+          lgrpc_options.keep_alive_.max_connection_age_grace_ms_ =
+              ParseOption<int>(optarg);
+          break;
+        case OPTION_GRPC_RESTRICTED_PROTOCOL: {
+          ParseRestrictedFeatureOption(
+              optarg, long_options[option_index].name,
+              std::string(
+                  triton::server::grpc::kRestrictedProtocolHeaderTemplate),
+              "protocol", lgrpc_options.restricted_protocols_);
+          break;
+        }
+        case OPTION_GRPC_HEADER_FORWARD_PATTERN:
+          lgrpc_options.forward_header_pattern_ =
+              std::move(case_insensitive_prefix + optarg);
+          break;
+#endif  // TRITON_ENABLE_GRPC
+
+#ifdef TRITON_ENABLE_METRICS
+        case OPTION_ALLOW_METRICS:
+          lparams.allow_metrics_ = ParseOption<bool>(optarg);
+          break;
+        case OPTION_ALLOW_GPU_METRICS:
+          lparams.allow_gpu_metrics_ = ParseOption<bool>(optarg);
+          break;
+        case OPTION_ALLOW_CPU_METRICS:
+          lparams.allow_cpu_metrics_ = ParseOption<bool>(optarg);
+          break;
+        case OPTION_METRICS_ADDRESS:
+          lparams.metrics_address_ = optarg;
+          break;
+        case OPTION_METRICS_PORT:
+          lparams.metrics_port_ = ParseOption<int>(optarg);
+          break;
+        case OPTION_METRICS_INTERVAL_MS:
+          lparams.metrics_interval_ms_ = ParseOption<int>(optarg);
+          break;
+        case OPTION_METRICS_CONFIG:
+          lparams.metrics_config_settings_.push_back(
+              ParseMetricsConfigOption(optarg));
+          break;
+#endif  // TRITON_ENABLE_METRICS
+
+#ifdef TRITON_ENABLE_TRACING
+        case OPTION_TRACE_FILEPATH: {
+          std::cerr << "Warning: '--trace-file' has been deprecated and will be"
+                       " removed in future releases. Please use "
+                       "'--trace-config triton,file=<filepath> instead."
+                    << std::endl;
+          trace_filepath_present = true;
+          lparams.trace_filepath_ = optarg;
+          break;
+        }
+        case OPTION_TRACE_LEVEL: {
+          std::cerr
+              << "Warning: '--trace-level' has been deprecated and will be"
+                 " removed in future releases. Please use "
+                 "'--trace-config level=<OFF|TIMESTAMPS|TENSORS> instead."
+              << std::endl;
+          trace_level_present = true;
+          auto parsed_level = ParseTraceLevelOption(optarg);
+          explicit_disable_trace |=
+              (parsed_level == TRITONSERVER_TRACE_LEVEL_DISABLED);
+          lparams.trace_level_ = static_cast<TRITONSERVER_InferenceTraceLevel>(
+              lparams.trace_level_ | parsed_level);
+          break;
+        }
+        case OPTION_TRACE_RATE:
+          std::cerr << "Warning: '--trace-rate' has been deprecated and will be"
+                       " removed in future releases. Please use "
+                       "'--trace-config rate=<rate value> instead."
+                    << std::endl;
+          trace_rate_present = true;
+          lparams.trace_rate_ = ParseOption<int>(optarg);
+          break;
+
+        case OPTION_TRACE_COUNT:
+          std::cerr
+              << "Warning: '--trace-count' has been deprecated and will be"
+                 " removed in future releases. Please use "
+                 "'--trace-config count=<count value> instead."
+              << std::endl;
+          trace_count_present = true;
+          lparams.trace_count_ = ParseOption<int>(optarg);
+          break;
+        case OPTION_TRACE_LOG_FREQUENCY:
+          std::cerr
+              << "Warning: '--trace-log-frequency' has been deprecated and "
+                 "will be"
+                 " removed in future releases. Please use "
+                 "'--trace-config triton,log-frequency=<log frequency "
+                 "value> instead."
+              << std::endl;
+          trace_log_frequency_present = true;
+          lparams.trace_log_frequency_ = ParseOption<int>(optarg);
+          break;
+        case OPTION_TRACE_CONFIG: {
+          auto trace_config_setting = ParseTraceConfigOption(optarg);
+          triton::server::TraceConfig& tc =
+              lparams
+                  .trace_config_map_[std::get<0>(trace_config_setting).c_str()];
+          tc.push_back(std::make_pair(
+              std::get<1>(trace_config_setting).c_str(),
+              std::get<2>(trace_config_setting).c_str()));
+          break;
+        }
+#endif  // TRITON_ENABLE_TRACING
+
+        case OPTION_POLL_REPO_SECS:
+          lparams.repository_poll_secs_ = ParseOption<int>(optarg);
+          break;
+        case OPTION_STARTUP_MODEL:
+          lparams.startup_models_.insert(optarg);
+          break;
+        case OPTION_MODEL_CONTROL_MODE: {
+          std::string mode_str(optarg);
+          std::transform(
+              mode_str.begin(), mode_str.end(), mode_str.begin(), ::tolower);
+          if (mode_str == "none") {
+            lparams.control_mode_ = TRITONSERVER_MODEL_CONTROL_NONE;
+          } else if (mode_str == "poll") {
+            lparams.control_mode_ = TRITONSERVER_MODEL_CONTROL_POLL;
+          } else if (mode_str == "explicit") {
+            lparams.control_mode_ = TRITONSERVER_MODEL_CONTROL_EXPLICIT;
+          } else {
+            throw ParseException("invalid argument for --model-control-mode");
+          }
+          break;
+        }
+        case OPTION_RATE_LIMIT: {
+          std::string rate_limit_str(optarg);
+          std::transform(
+              rate_limit_str.begin(), rate_limit_str.end(),
+              rate_limit_str.begin(), ::tolower);
+          if (rate_limit_str == "execution_count") {
+            lparams.rate_limit_mode_ = TRITONSERVER_RATE_LIMIT_EXEC_COUNT;
+          } else if (rate_limit_str == "off") {
+            lparams.rate_limit_mode_ = TRITONSERVER_RATE_LIMIT_OFF;
+          } else {
+            throw ParseException("invalid argument for --rate-limit");
+          }
+          break;
+        }
+        case OPTION_RATE_LIMIT_RESOURCE: {
+          std::string rate_limit_resource_str(optarg);
+          std::transform(
+              rate_limit_resource_str.begin(), rate_limit_resource_str.end(),
+              rate_limit_resource_str.begin(), ::tolower);
+          lparams.rate_limit_resources_.push_back(
+              ParseRateLimiterResourceOption(optarg));
+          break;
+        }
+        case OPTION_PINNED_MEMORY_POOL_BYTE_SIZE:
+          lparams.pinned_memory_pool_byte_size_ = ParseOption<int64_t>(optarg);
+          break;
+        case OPTION_CUDA_MEMORY_POOL_BYTE_SIZE:
+          lparams.cuda_pools_.push_back(
+              ParsePairOption<int, uint64_t>(optarg, ":"));
+          break;
+        case OPTION_CUDA_VIRTUAL_ADDRESS_SIZE:
+          lparams.cuda_virtual_address_size_.push_back(
+              ParsePairOption<int, size_t>(optarg, ":"));
+          break;
+        case OPTION_RESPONSE_CACHE_BYTE_SIZE: {
+          cache_size_present = true;
+          const auto byte_size = std::to_string(ParseOption<int64_t>(optarg));
+          lparams.cache_config_settings_["local"] = {{"size", byte_size}};
+          std::cerr
+              << "Warning: '--response-cache-byte-size' has been deprecated! "
+                 "This will default to the 'local' cache implementation with "
+                 "the provided byte size for its config. Please use "
+                 "'--cache-config' instead. The equivalent "
+                 "--cache-config CLI args would be: "
+                 "'--cache-config=local,size=" +
+                     byte_size + "'"
+              << std::endl;
+          break;
+        }
+        case OPTION_CACHE_CONFIG: {
+          cache_config_present = true;
+          const auto cache_setting = ParseCacheConfigOption(optarg);
+          const auto& cache_name = std::get<0>(cache_setting);
+          const auto& key = std::get<1>(cache_setting);
+          const auto& value = std::get<2>(cache_setting);
+          lparams.cache_config_settings_[cache_name].push_back({key, value});
+          break;
+        }
+        case OPTION_CACHE_DIR:
+          lparams.cache_dir_ = optarg;
+          break;
+        case OPTION_MIN_SUPPORTED_COMPUTE_CAPABILITY:
+          lparams.min_supported_compute_capability_ =
+              ParseOption<double>(optarg);
+          break;
+        case OPTION_EXIT_TIMEOUT_SECS:
+          lparams.exit_timeout_secs_ = ParseOption<int>(optarg);
+          break;
+        case OPTION_BACKEND_DIR:
+          lparams.backend_dir_ = optarg;
+          break;
+        case OPTION_REPOAGENT_DIR:
+          lparams.repoagent_dir_ = optarg;
+          break;
+        case OPTION_BUFFER_MANAGER_THREAD_COUNT:
+          lparams.buffer_manager_thread_count_ = ParseOption<int>(optarg);
+          break;
+        case OPTION_MODEL_LOAD_THREAD_COUNT:
+          lparams.model_load_thread_count_ = ParseOption<int>(optarg);
+          break;
+        case OPTION_MODEL_LOAD_RETRY_COUNT:
+          lparams.model_load_retry_count_ = ParseOption<int>(optarg);
+          break;
+        case OPTION_BACKEND_CONFIG:
+          lparams.backend_config_settings_.push_back(
+              ParseBackendConfigOption(optarg));
+          break;
+        case OPTION_HOST_POLICY:
+          lparams.host_policies_.push_back(ParseHostPolicyOption(optarg));
+          break;
+        case OPTION_MODEL_LOAD_GPU_LIMIT:
+          lparams.load_gpu_limit_.emplace(
+              ParsePairOption<int, double>(optarg, ":"));
+          break;
+        case OPTION_MODEL_NAMESPACING:
+          lparams.enable_model_namespacing_ = ParseOption<bool>(optarg);
+          break;
+      }
+    }
+    catch (const ParseException& pe) {
+      if ((pe.what() != NULL) && (strlen(pe.what()) != 0)) {
+        std::stringstream ss;
+        ss << "Bad option: \"--" << long_options[option_index].name << "\".\n"
+           << pe.what() << std::endl;
+        throw ParseException(ss.str());
+      } else {
+        // In case of `Unrecognized option` or `Help` option, just throw a
+        // ParseException
+        throw ParseException();
+      }
+    }
+  }
+
+  if (optind < argc) {
+    throw ParseException(std::string("Unexpected argument: ") + argv[optind]);
+  }
+
+  //
+  // Step 3. Post parsing validation, usually for options that depend on the
+  // others which are not determined until after parsing.
+  //
+
+  if (lparams.control_mode_ != TRITONSERVER_MODEL_CONTROL_POLL) {
+    lparams.repository_poll_secs_ = 0;
+  }
+
+#ifdef TRITON_ENABLE_VERTEX_AI
+  // Set default model repository if specific flag is set, postpone the
+  // check to after parsing so we only monitor the default repository if
+  // Vertex service is allowed
+  if (lparams.model_repository_paths_.empty()) {
+    auto aip_storage_uri =
+        triton::server::GetEnvironmentVariableOrDefault("AIP_STORAGE_URI", "");
+    if (!aip_storage_uri.empty()) {
+      lparams.model_repository_paths_.insert(aip_storage_uri);
+    }
+  }
+#endif  // TRITON_ENABLE_VERTEX_AI
+
+#ifdef TRITON_ENABLE_METRICS
+  lparams.allow_gpu_metrics_ &= lparams.allow_metrics_;
+  lparams.allow_cpu_metrics_ &= lparams.allow_metrics_;
+  // Set metrics_address to default if never specified
+  if (lparams.metrics_address_.empty()) {
+#ifdef TRITON_ENABLE_HTTP
+    // If built with HTTP support, default to HTTP address
+    lparams.metrics_address_ = lparams.http_address_;
+#else
+    // Otherwise have default for builds without HTTP support
+    lparams.metrics_address_ = "0.0.0.0";
+#endif  // TRITON_ENABLE_HTTP
+  }
+#endif  // TRITON_ENABLE_METRICS
+
+#ifdef TRITON_ENABLE_TRACING
+  PostProcessTraceArgs(
+      lparams, trace_level_present, trace_rate_present, trace_count_present,
+      trace_filepath_present, trace_log_frequency_present,
+      explicit_disable_trace);
+#endif  // TRITON_ENABLE_TRACING
+
+  // Check if there is a conflict between --disable-auto-complete-config
+  // and --strict-model-config
+  if (disable_auto_complete_config) {
+    if (strict_model_config_present && !lparams.strict_model_config_) {
+      std::cerr
+          << "Warning: Overriding deprecated '--strict-model-config' from "
+             "False to True in favor of '--disable-auto-complete-config'!"
+          << std::endl;
+    }
+    lparams.strict_model_config_ = true;
+  }
+
+  // Check if there is a conflict between --response-cache-byte-size
+  // and --cache-config
+  if (cache_size_present && cache_config_present) {
+    throw ParseException(
+        "Error: Incompatible flags --response-cache-byte-size and "
+        "--cache-config both provided. Please provide one or the other.");
+  }
+  lparams.enable_cache_ = (cache_size_present || cache_config_present);
+  return {lparams, {}};
+}
+
+std::string
+TritonParser::FormatUsageMessage(std::string str, int offset)
+{
+  int width = 60;
+  int current_pos = offset;
+  while (current_pos + width < int(str.length())) {
+    int n = str.rfind(' ', current_pos + width);
+    if (n != int(std::string::npos)) {
+      str.replace(n, 1, "\n\t");
+      current_pos += (width + 9);
+    }
+  }
+
+  return str;
+}
+
+std::string
+TritonParser::Usage()
+{
+  std::stringstream ss;
+  for (const auto& group : option_groups_) {
+    if (!group.first.empty() && !group.second.empty()) {
+      ss << std::endl << group.first << ":" << std::endl;
+    }
+
+    for (const auto& o : group.second) {
+      if (!o.arg_desc_.empty()) {
+        ss << "  --" << o.flag_ << " <" << o.arg_desc_ << ">" << std::endl
+           << "\t" << FormatUsageMessage(o.desc_, 0) << std::endl;
+      } else {
+        ss << "  --" << o.flag_ << std::endl
+           << "\t" << FormatUsageMessage(o.desc_, 0) << std::endl;
+      }
+    }
+  }
+  return ss.str();
+}
+
+std::tuple<std::string, std::string, std::string>
+TritonParser::ParseMetricsConfigOption(const std::string& arg)
+{
+  // Format is "<setting>=<value>" for generic configs/settings
+  int delim_setting = arg.find("=");
+  if (delim_setting < 0) {
+    std::stringstream ss;
+    ss << "--metrics-config option format is "
+       << "<setting>=<value>. Got " << arg << std::endl;
+    throw ParseException(ss.str());
+  }
+
+  // Break section before "=" into substr to avoid matching commas
+  // in setting values.
+  auto name_substr = arg.substr(0, delim_setting);
+  int delim_name = name_substr.find(",");
+
+  // No name-specific configs currently supported, though it may be in
+  // the future. Map global configs to empty string like other configs for
+  // now.
+  std::string name_string = std::string();
+  if (delim_name >= 0) {
+    std::stringstream ss;
+    ss << "--metrics-config option format is "
+       << "<setting>=<value>. Got " << arg << std::endl;
+    throw ParseException(ss.str());
+  }  // else global metrics config
+
+  std::string setting_string =
+      arg.substr(delim_name + 1, delim_setting - delim_name - 1);
+  std::string value_string = arg.substr(delim_setting + 1);
+
+  if (setting_string.empty() || value_string.empty()) {
+    std::stringstream ss;
+    ss << "--metrics-config option format is "
+       << "<setting>=<value>. Got " << arg << std::endl;
+    throw ParseException(ss.str());
+  }
+
+  return {name_string, setting_string, value_string};
+}
+
+std::tuple<std::string, std::string, std::string>
+TritonParser::ParseCacheConfigOption(const std::string& arg)
+{
+  // Format is "<cache_name>,<setting>=<value>" for specific
+  // config/settings and "<setting>=<value>" for cache agnostic
+  // configs/settings
+  int delim_name = arg.find(",");
+  int delim_setting = arg.find("=", delim_name + 1);
+
+  std::string name_string = std::string();
+  if (delim_name > 0) {
+    name_string = arg.substr(0, delim_name);
+  }
+  // No cache-agnostic global settings are currently supported
+  else {
+    std::stringstream ss;
+    ss << "No cache specified. --cache-config option format is "
+       << "<cache name>,<setting>=<value>. Got " << arg << std::endl;
+    throw ParseException(ss.str());
+  }
+
+  if (delim_setting < 0) {
+    std::stringstream ss;
+    ss << "--cache-config option format is '<cache "
+          "name>,<setting>=<value>'. Got "
+       << arg << std::endl;
+    throw ParseException(ss.str());
+  }
+  std::string setting_string =
+      arg.substr(delim_name + 1, delim_setting - delim_name - 1);
+  std::string value_string = arg.substr(delim_setting + 1);
+
+  if (setting_string.empty() || value_string.empty()) {
+    std::stringstream ss;
+    ss << "--cache-config option format is '<cache "
+          "name>,<setting>=<value>'. Got "
+       << arg << std::endl;
+    throw ParseException(ss.str());
+  }
+
+  return {name_string, setting_string, value_string};
+}
+
+std::tuple<std::string, int, int>
+TritonParser::ParseRateLimiterResourceOption(const std::string& arg)
+{
+  std::string error_string(
+      "--rate-limit-resource option format is "
+      "'<resource_name>:<count>:<device>' or '<resource_name>:<count>'. "
+      "Got " +
+      arg);
+
+  std::string name_string("");
+  int count = -1;
+  int device_id = -1;
+
+  size_t delim_first = arg.find(":");
+  size_t delim_second = arg.find(":", delim_first + 1);
+
+  if (delim_second != std::string::npos) {
+    // Handle format `<resource_name>:<count>:<device>'
+    size_t delim_third = arg.find(":", delim_second + 1);
+    if (delim_third != std::string::npos) {
+      throw ParseException(error_string);
+    }
+    name_string = arg.substr(0, delim_first);
+    count = ParseOption<int>(
+        arg.substr(delim_first + 1, delim_second - delim_first - 1));
+    device_id = ParseOption<int>(arg.substr(delim_second + 1));
+  } else if (delim_first != std::string::npos) {
+    // Handle format `<resource_name>:<count>'
+    name_string = arg.substr(0, delim_first);
+    count = ParseOption<int>(arg.substr(delim_first + 1));
+  } else {
+    // If no colons found
+    throw ParseException(error_string);
+  }
+
+  return {name_string, count, device_id};
+}
+
+std::tuple<std::string, std::string, std::string>
+TritonParser::ParseBackendConfigOption(const std::string& arg)
+{
+  // Format is "<backend_name>,<setting>=<value>" for specific
+  // config/settings and "<setting>=<value>" for backend agnostic
+  // configs/settings
+  int delim_name = arg.find(",");
+  int delim_setting = arg.find("=", delim_name + 1);
+
+  std::string name_string = std::string();
+  if (delim_name > 0) {
+    name_string = arg.substr(0, delim_name);
+  } else if (delim_name == 0) {
+    std::stringstream ss;
+    ss << "No backend specified. --backend-config option format is "
+       << "<backend name>,<setting>=<value> or "
+       << "<setting>=<value>. Got " << arg << std::endl;
+    throw ParseException(ss.str());
+  }  // else global backend config
+
+  if (delim_setting < 0) {
+    std::stringstream ss;
+    ss << "--backend-config option format is '<backend "
+          "name>,<setting>=<value>'. Got "
+       << arg << std::endl;
+    throw ParseException(ss.str());
+  }
+  std::string setting_string =
+      arg.substr(delim_name + 1, delim_setting - delim_name - 1);
+  std::string value_string = arg.substr(delim_setting + 1);
+
+  if (setting_string.empty() || value_string.empty()) {
+    std::stringstream ss;
+    ss << "--backend-config option format is '<backend "
+          "name>,<setting>=<value>'. Got "
+       << arg << std::endl;
+    throw ParseException(ss.str());
+  }
+
+  return {name_string, setting_string, value_string};
+}
+
+void
+TritonParser::ParseRestrictedFeatureOption(
+    const std::string& arg, const std::string& option_name,
+    const std::string& key_prefix, const std::string& feature_type,
+    RestrictedFeatures& restricted_features)
+{
+  const auto& parsed_tuple =
+      ParseGenericConfigOption(arg, ":", "=", option_name, "config name");
+
+  const auto& features = SplitOptions(std::get<0>(parsed_tuple), ",");
+  const auto& key = std::get<1>(parsed_tuple);
+  const auto& value = std::get<2>(parsed_tuple);
+
+  for (const auto& feature : features) {
+    const auto& category = RestrictedFeatures::ToCategory(feature);
+
+    if (category == RestrictedCategory::INVALID) {
+      std::stringstream ss;
+      ss << "unknown restricted " << feature_type << " '" << feature << "' "
+         << std::endl;
+      throw ParseException(ss.str());
+    }
+
+    if (restricted_features.IsRestricted(category)) {
+      // restricted feature can only be in one group
+      std::stringstream ss;
+      ss << "restricted " << feature_type << " '" << feature
+         << "' can not be specified in multiple config groups" << std::endl;
+      throw ParseException(ss.str());
+    }
+    restricted_features.Insert(
+        category, std::make_pair(key_prefix + key, value));
+  }
+}
+
+std::tuple<std::string, std::string, std::string>
+TritonParser::ParseHostPolicyOption(const std::string& arg)
+{
+  return ParseGenericConfigOption(arg, ",", "=", "host-policy", "policy name");
+}
+
+std::tuple<std::string, std::string, std::string>
+TritonParser::ParseGenericConfigOption(
+    const std::string& arg, const std::string& first_delim,
+    const std::string& second_delim, const std::string& option_name,
+    const std::string& config_name)
+{
+  // Format is "<string>,<string>=<string>"
+  int delim_name = arg.find(first_delim);
+  int delim_setting = arg.find(second_delim, delim_name + 1);
+
+  std::string error_string = "--" + option_name + " option format is '<" +
+                             config_name + ">" + first_delim + "<setting>" +
+                             second_delim + "<value>'. Got " + arg + "\n";
+
+  // Check for 2 semicolons
+  if ((delim_name < 0) || (delim_setting < 0)) {
+    throw ParseException(error_string);
+  }
+
+  std::string name_string = arg.substr(0, delim_name);
+  std::string setting_string =
+      arg.substr(delim_name + 1, delim_setting - delim_name - 1);
+  std::string value_string = arg.substr(delim_setting + 1);
+
+  if (name_string.empty() || setting_string.empty() || value_string.empty()) {
+    throw ParseException(error_string);
+  }
+
+  return {name_string, setting_string, value_string};
+}
+
+#ifdef TRITON_ENABLE_TRACING
+TRITONSERVER_InferenceTraceLevel
+TritonParser::ParseTraceLevelOption(std::string arg)
+{
+  std::transform(arg.begin(), arg.end(), arg.begin(), [](unsigned char c) {
+    return std::tolower(c);
+  });
+
+  if ((arg == "false") || (arg == "off")) {
+    return TRITONSERVER_TRACE_LEVEL_DISABLED;
+  }
+  if ((arg == "true") || (arg == "on") || (arg == "min") || (arg == "max") ||
+      (arg == "timestamps")) {
+    return TRITONSERVER_TRACE_LEVEL_TIMESTAMPS;
+  }
+  if (arg == "tensors") {
+    return TRITONSERVER_TRACE_LEVEL_TENSORS;
+  }
+
+  throw ParseException("invalid value for trace level option: " + arg);
+}
+
+InferenceTraceMode
+TritonParser::ParseTraceModeOption(std::string arg)
+{
+  std::transform(arg.begin(), arg.end(), arg.begin(), [](unsigned char c) {
+    return std::tolower(c);
+  });
+
+  if (arg == "triton") {
+    return TRACE_MODE_TRITON;
+  }
+  if (arg == "opentelemetry") {
+    return TRACE_MODE_OPENTELEMETRY;
+  }
+
+  throw ParseException(
+      "invalid value for trace mode option: " + arg +
+      ". Available options are \"triton\" and \"opentelemetry\"");
+}
+
+std::tuple<std::string, std::string, std::string>
+TritonParser::ParseTraceConfigOption(const std::string& arg)
+{
+  int delim_name = arg.find(",");
+  int delim_setting = arg.find("=", delim_name + 1);
+
+  std::string name_string = std::string();
+  if (delim_name > 0) {
+    name_string =
+        std::to_string(ParseTraceModeOption(arg.substr(0, delim_name)));
+  } else if (delim_name == 0) {
+    std::stringstream ss;
+    ss << "No trace mode specified. --trace-config option format is "
+       << "<trace mode>,<setting>=<value> or "
+       << "<setting>=<value>. Got " << arg << std::endl;
+    throw ParseException(ss.str());
+  }  // else global trace config
+
+  if (delim_setting < 0) {
+    std::stringstream ss;
+    ss << "--trace-config option format is '<trace mode>,<setting>=<value>'. "
+          "Got "
+       << arg << std::endl;
+    throw ParseException(ss.str());
+  }
+  std::string setting_string =
+      arg.substr(delim_name + 1, delim_setting - delim_name - 1);
+  std::string value_string = arg.substr(delim_setting + 1);
+
+  if (setting_string.empty() || value_string.empty()) {
+    std::stringstream ss;
+    ss << "--trace-config option format is '<trace mode>,<setting>=<value>'. "
+          "Got "
+       << arg << std::endl;
+    throw ParseException(ss.str());
+  }
+
+  return {name_string, setting_string, value_string};
+}
+
+void
+TritonParser::SetGlobalTraceArgs(
+    TritonServerParameters& lparams, bool trace_level_present,
+    bool trace_rate_present, bool trace_count_present,
+    bool explicit_disable_trace)
+{
+  for (const auto& [setting, value_variant] : lparams.trace_config_map_[""]) {
+    auto value = std::get<std::string>(value_variant);
+    try {
+      if (setting == "rate") {
+        if (trace_rate_present) {
+          std::cerr << "Warning: Overriding deprecated '--trace-rate' "
+                       "in favor of provided rate value in --trace-config!"
+                    << std::endl;
+        }
+        lparams.trace_rate_ = ParseOption<int>(value);
+      }
+      if (setting == "level") {
+        if (trace_level_present) {
+          std::cerr << "Warning: Overriding deprecated '--trace-level' "
+                       "in favor of provided level in --trace-config!"
+                    << std::endl;
+        }
+        auto parsed_level_config = ParseTraceLevelOption(value);
+        explicit_disable_trace |=
+            (parsed_level_config == TRITONSERVER_TRACE_LEVEL_DISABLED);
+        lparams.trace_level_ = static_cast<TRITONSERVER_InferenceTraceLevel>(
+            lparams.trace_level_ | parsed_level_config);
+      }
+      if (setting == "mode") {
+        lparams.trace_mode_ = ParseTraceModeOption(value);
+      }
+      if (setting == "count") {
+        if (trace_count_present) {
+          std::cerr << "Warning: Overriding deprecated '--trace-count' "
+                       "in favor of provided count in --trace-config!"
+                    << std::endl;
+        }
+        lparams.trace_count_ = ParseOption<int>(value);
+      }
+    }
+    catch (const ParseException& pe) {
+      std::stringstream ss;
+      ss << "Bad option: \"--trace-config " << setting << "\".\n"
+         << pe.what() << std::endl;
+      throw ParseException(ss.str());
+    }
+  }
+}
+
+void
+TritonParser::SetTritonTraceArgs(
+    TritonServerParameters& lparams, bool trace_filepath_present,
+    bool trace_log_frequency_present)
+{
+  for (const auto& [setting, value_variant] :
+       lparams.trace_config_map_[std::to_string(TRACE_MODE_TRITON)]) {
+    auto value = std::get<std::string>(value_variant);
+    try {
+      if (setting == "file") {
+        if (trace_filepath_present) {
+          std::cerr << "Warning: Overriding deprecated '--trace-file' "
+                       "in favor of provided file in --trace-config!"
+                    << std::endl;
+        }
+        lparams.trace_filepath_ = value;
+      } else if (setting == "log-frequency") {
+        if (trace_log_frequency_present) {
+          std::cerr << "Warning: Overriding deprecated '--trace-log-frequency' "
+                       "in favor of provided log-frequency in --trace-config!"
+                    << std::endl;
+        }
+        lparams.trace_log_frequency_ = ParseOption<int>(value);
+      }
+    }
+    catch (const ParseException& pe) {
+      std::stringstream ss;
+      ss << "Bad option: \"--trace-config triton," << setting << "\".\n"
+         << pe.what() << std::endl;
+      throw ParseException(ss.str());
+    }
+  }
+}
+
+void
+TritonParser::SetOpenTelemetryTraceArgs(
+    TritonServerParameters& lparams, bool trace_filepath_present,
+    bool trace_log_frequency_present)
+{
+  if (trace_filepath_present) {
+    std::cerr << "Warning: '--trace-file' is deprecated and will "
+                 "be ignored with opentelemetry tracing mode. "
+              << std::endl;
+  }
+  if (trace_log_frequency_present) {
+    std::cerr << "Warning: '--trace-log-frequency' is deprecated "
+                 "and will be ignored with opentelemetry tracing mode."
+              << std::endl;
+  }
+  triton::server::TraceConfig& otel_trace_settings =
+      lparams.trace_config_map_[std::to_string(TRACE_MODE_OPENTELEMETRY)];
+  ProcessOpenTelemetryBatchSpanProcessorArgs(otel_trace_settings);
+}
+
+void
+TritonParser::ProcessOpenTelemetryBatchSpanProcessorArgs(
+    TraceConfig& otel_trace_settings)
+{
+  std::unordered_map<std::string, std::string> otel_bsp_default_settings = {};
+  // Set up default BatchSpanProcessor parameters, or use
+  // parameters, specified by environment variables
+  auto env_bsp_max_queue_size = triton::server::GetEnvironmentVariableOrDefault(
+      "OTEL_BSP_MAX_QUEUE_SIZE", "2048");
+  otel_bsp_default_settings.insert(std::make_pair(
+      std::string("bsp_max_queue_size"), env_bsp_max_queue_size));
+  auto env_bsp_schedule_delay = triton::server::GetEnvironmentVariableOrDefault(
+      "OTEL_BSP_SCHEDULE_DELAY", "5000");
+  otel_bsp_default_settings.insert(std::make_pair(
+      std::string("bsp_schedule_delay"), env_bsp_schedule_delay));
+  auto env_bsp_max_export_batch_size =
+      triton::server::GetEnvironmentVariableOrDefault(
+          "OTEL_BSP_MAX_EXPORT_BATCH_SIZE", "512");
+  otel_bsp_default_settings.insert(std::make_pair(
+      std::string("bsp_max_export_batch_size"), env_bsp_max_export_batch_size));
+
+  // Process cmd args and convert string arguments to integers.
+  // Throw a ParseException for invalid arguments
+  for (auto& [setting, value_variant] : otel_trace_settings) {
+    try {
+      auto value = std::get<std::string>(value_variant);
+      if (setting == "bsp_max_queue_size") {
+        value_variant = ParseOption<uint32_t>(value);
+        otel_bsp_default_settings.erase("bsp_max_queue_size");
+      } else if (setting == "bsp_schedule_delay") {
+        value_variant = ParseOption<uint32_t>(value);
+        otel_bsp_default_settings.erase("bsp_schedule_delay");
+      } else if (setting == "bsp_max_export_batch_size") {
+        value_variant = ParseOption<uint32_t>(value);
+        otel_bsp_default_settings.erase("bsp_max_export_batch_size");
+      }
+    }
+    catch (const ParseException& pe) {
+      std::stringstream ss;
+      ss << "Bad option: \"--trace-config opentelemetry," << setting << "\".\n"
+         << pe.what() << std::endl;
+      throw ParseException(ss.str());
+    }
+  }
+  // If not all BSP settings were provided through cmd,
+  // populate OpenTelemetry's trace settings with the default value.
+  if (!otel_bsp_default_settings.empty()) {
+    for (const auto& [setting, value] : otel_bsp_default_settings) {
+      try {
+        otel_trace_settings.push_back(
+            std::make_pair(setting, ParseOption<uint32_t>(value)));
+      }
+      catch (const ParseException& pe) {
+        std::stringstream ss;
+        ss << "Bad option: \"OTEL_";
+        for (auto& ch : setting) {
+          ss << static_cast<char>(std::toupper(ch));
+        }
+        ss << "\".\n" << pe.what() << std::endl;
+        throw ParseException(ss.str());
+      }
+    }
+  }
+}
+
+void
+TritonParser::PostProcessTraceArgs(
+    TritonServerParameters& lparams, bool trace_level_present,
+    bool trace_rate_present, bool trace_count_present,
+    bool trace_filepath_present, bool trace_log_frequency_present,
+    bool explicit_disable_trace)
+{
+  SetGlobalTraceArgs(
+      lparams, trace_level_present, trace_rate_present, trace_count_present,
+      explicit_disable_trace);
+
+  if (lparams.trace_mode_ == TRACE_MODE_OPENTELEMETRY) {
+    SetOpenTelemetryTraceArgs(
+        lparams, trace_filepath_present, trace_log_frequency_present);
+  } else if (lparams.trace_mode_ == TRACE_MODE_TRITON) {
+    SetTritonTraceArgs(
+        lparams, trace_filepath_present, trace_log_frequency_present);
+  }
+
+  if (explicit_disable_trace) {
+    lparams.trace_level_ = TRITONSERVER_TRACE_LEVEL_DISABLED;
+  }
+}
+
+#endif  // TRITON_ENABLE_TRACING
+}}      // namespace triton::server
diff --git a/src/command_line_parser.h b/src/command_line_parser.h
new file mode 100644
index 0000000000..0131c99ff2
--- /dev/null
+++ b/src/command_line_parser.h
@@ -0,0 +1,349 @@
+// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+#pragma once
+
+#include <list>
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+#include <thread>
+#include <unordered_map>
+#include <vector>
+
+#include "restricted_features.h"
+#include "triton/common/logging.h"
+#include "triton/core/tritonserver.h"
+#ifdef TRITON_ENABLE_GRPC
+// To avoid ambiguous reference during build
+// grpc headers should be imported first
+// https://github.com/open-telemetry/opentelemetry-cpp/blob/main/examples/otlp/README.md#additional-notes-regarding-abseil-library
+#include "grpc/grpc_server.h"
+#endif  // TRITON_ENABLE_GRPC
+#if defined(TRITON_ENABLE_HTTP) || defined(TRITON_ENABLE_METRICS)
+#include "http_server.h"
+#endif  // TRITON_ENABLE_HTTP || TRITON_ENABLE_METRICS
+#ifdef TRITON_ENABLE_SAGEMAKER
+#include "sagemaker_server.h"
+#endif  // TRITON_ENABLE_SAGEMAKER
+#ifdef TRITON_ENABLE_VERTEX_AI
+#include "vertex_ai_server.h"
+#endif  // TRITON_ENABLE_VERTEX_AI
+
+#ifndef _WIN32
+#include <getopt.h>
+#include <unistd.h>
+#else
+// Minimum implementation of <getopt.h> for Windows
+#define required_argument 1
+#define no_argument 2
+struct option {
+  option(const char* name, int has_arg, int* flag, int val)
+      : name(name), has_arg(has_arg), flag(flag), val(val)
+  {
+  }
+  const char* name;
+  int has_arg;
+  int* flag;
+  int val;
+};
+#endif
+#ifdef TRITON_ENABLE_TRACING
+#include "tracer.h"
+#endif
+
+
+namespace triton { namespace server {
+
+// Command-line options
+struct Option {
+  static constexpr const char* ArgNone = "";
+  static constexpr const char* ArgBool = "boolean";
+  static constexpr const char* ArgFloat = "float";
+  static constexpr const char* ArgInt = "integer";
+  static constexpr const char* ArgStr = "string";
+
+  Option(int id, std::string flag, std::string arg_desc, std::string desc)
+      : id_(id), flag_(flag), arg_desc_(arg_desc), desc_(desc)
+  {
+  }
+
+  struct option GetLongOption() const
+  {
+    struct option lo {
+      flag_.c_str(), (!arg_desc_.empty()) ? required_argument : no_argument,
+          nullptr, id_
+    };
+    return lo;
+  }
+
+  const int id_;
+  const std::string flag_;
+  const std::string arg_desc_;
+  const std::string desc_;
+};
+
+struct TritonServerParameters {
+  std::string server_id_{"triton"};
+  bool exit_on_error_{true};
+  bool strict_model_config_{false};
+  bool strict_readiness_{true};
+  int32_t exit_timeout_secs_{30};
+#ifdef TRITON_ENABLE_GPU
+  double min_supported_compute_capability_{TRITON_MIN_COMPUTE_CAPABILITY};
+#else
+  double min_supported_compute_capability_{0.0};
+#endif  // TRITON_ENABLE_GPU
+  std::string repoagent_dir_{"/opt/tritonserver/repoagents"};
+  std::string backend_dir_{"/opt/tritonserver/backends"};
+  std::vector<std::tuple<std::string, std::string, std::string>>
+      backend_config_settings_;
+
+  // Model repository manager configuration
+  bool enable_model_namespacing_{false};
+  std::set<std::string> model_repository_paths_{};
+  TRITONSERVER_ModelControlMode control_mode_{TRITONSERVER_MODEL_CONTROL_NONE};
+  std::set<std::string> startup_models_{};
+  // Interval, in seconds, when the model repository is polled for changes.
+  int32_t repository_poll_secs_{15};
+  // Number of threads to use for concurrently loading models
+  uint32_t model_load_thread_count_{4};
+  uint32_t model_load_retry_count_{0};
+  std::map<int, double> load_gpu_limit_;
+
+  // Rate limiter configuration
+  // FIXME: Once the rate limiter implementation is complete make
+  // EXEC_COUNT the default.
+  // TRITONSERVER_RateLimitMode
+  // rate_limit_mode_{TRITONSERVER_RATE_LIMIT_EXEC_COUNT};
+  TRITONSERVER_RateLimitMode rate_limit_mode_{TRITONSERVER_RATE_LIMIT_OFF};
+  std::vector<std::tuple<std::string, int, int>> rate_limit_resources_;
+
+  // memory pool configuration
+  int64_t pinned_memory_pool_byte_size_{1 << 28};
+  std::list<std::pair<int, uint64_t>> cuda_pools_;
+  std::list<std::pair<int, size_t>> cuda_virtual_address_size_;
+
+  // [FIXME] this option is broken after backend separation: this should have
+  // controlled backend copy behavior but not properly propagate to backend
+  // after separation, need to go through backend config.
+  int32_t buffer_manager_thread_count_{0};
+
+  std::vector<std::tuple<std::string, std::string, std::string>> host_policies_;
+
+  // Cache configuration
+  bool enable_cache_{false};
+  std::string cache_dir_{"/opt/tritonserver/caches"};
+  std::unordered_map<
+      std::string, std::vector<std::pair<std::string, std::string>>>
+      cache_config_settings_;
+
+#ifdef TRITON_ENABLE_LOGGING
+  bool log_info_{true};
+  bool log_warn_{true};
+  bool log_error_{true};
+  int32_t log_verbose_{0};
+  triton::common::Logger::Format log_format_{
+      triton::common::Logger::Format::kDEFAULT};
+  std::string log_file_{};
+#endif  // TRITON_ENABLE_LOGGING
+
+#ifdef TRITON_ENABLE_TRACING
+  std::string trace_filepath_{};
+  TRITONSERVER_InferenceTraceLevel trace_level_{
+      TRITONSERVER_TRACE_LEVEL_DISABLED};
+  int32_t trace_rate_{1000};
+  int32_t trace_count_{-1};
+  int32_t trace_log_frequency_{0};
+  InferenceTraceMode trace_mode_{TRACE_MODE_TRITON};
+  TraceConfigMap trace_config_map_;
+#endif  // TRITON_ENABLE_TRACING
+
+// The configurations for various endpoints (i.e. HTTP, GRPC and metrics)
+#ifdef TRITON_ENABLE_HTTP
+  bool allow_http_{true};
+  std::string http_address_{"0.0.0.0"};
+  int32_t http_port_{8000};
+  bool reuse_http_port_{false};
+  std::string http_forward_header_pattern_;
+  // The number of threads to initialize for the HTTP front-end.
+  int http_thread_cnt_{8};
+  RestrictedFeatures http_restricted_apis_{};
+#endif  // TRITON_ENABLE_HTTP
+
+#ifdef TRITON_ENABLE_GRPC
+  bool allow_grpc_{true};
+  triton::server::grpc::Options grpc_options_;
+#endif  // TRITON_ENABLE_GRPC
+
+#ifdef TRITON_ENABLE_METRICS
+  bool allow_metrics_{true};
+  // Defaults to http_address_ if TRITON_ENABLE_HTTP is enabled for backwards,
+  // otherwise defaults to "0.0.0.0" for TRITON_ENABLE_HTTP is disabled.
+  std::string metrics_address_{""};
+  int32_t metrics_port_{8002};
+  // Metric settings for Triton core
+  float metrics_interval_ms_{2000};
+  bool allow_gpu_metrics_{true};
+  bool allow_cpu_metrics_{true};
+  std::vector<std::tuple<std::string, std::string, std::string>>
+      metrics_config_settings_;
+#endif  // TRITON_ENABLE_METRICS
+
+#ifdef TRITON_ENABLE_SAGEMAKER
+  bool allow_sagemaker_{false};
+  std::string sagemaker_address_{"0.0.0.0"};
+  int32_t sagemaker_port_{8080};
+  bool sagemaker_safe_range_set_{false};
+  std::pair<int32_t, int32_t> sagemaker_safe_range_{-1, -1};
+  // The number of threads to initialize for the SageMaker HTTP front-end.
+  int sagemaker_thread_cnt_{8};
+#endif  // TRITON_ENABLE_SAGEMAKER
+
+#ifdef TRITON_ENABLE_VERTEX_AI
+  bool allow_vertex_ai_{false};
+  std::string vertex_ai_address_{"0.0.0.0"};
+  int32_t vertex_ai_port_{8080};
+  // The number of threads to initialize for the Vertex AI HTTP front-end.
+  int vertex_ai_thread_cnt_{8};
+  std::string vertex_ai_default_model_{};
+#endif  // TRITON_ENABLE_VERTEX_AI
+
+  // [FIXME] who should call this function?
+  void CheckPortCollision();
+  using ManagedTritonServerOptionPtr = std::unique_ptr<
+      TRITONSERVER_ServerOptions, decltype(&TRITONSERVER_ServerOptionsDelete)>;
+  ManagedTritonServerOptionPtr BuildTritonServerOptions();
+};
+
+// Exception type to be thrown if the error is parsing related
+class ParseException : public std::exception {
+ public:
+  ParseException() = default;
+  ParseException(const std::string& message) : message_(message) {}
+
+  virtual const char* what() const throw() { return message_.c_str(); }
+
+ private:
+  const std::string message_{""};
+};
+
+// [WIP] Fall-through parser, Parse() will convert the recognized options into
+// parameter object and return the unrecognized options to be another argument
+// list for other parser to consume.
+// This allows the composition of parser chain.
+// [FIXME] abstract interface, concrete class below should only parse Triton
+// core and endpoint control options (endpoint specific options in their own
+// parser)
+class TritonParser {
+ public:
+  TritonParser();
+  // Parse command line arguments into a parameters struct and transform
+  // the argument list to contain only unrecognized options. The content of
+  // unrecognized argument list shares the same lifecycle as 'argv'.
+  // Raise ParseException if fail to parse recognized options.
+  std::pair<TritonServerParameters, std::vector<char*>> Parse(
+      int argc, char** argv);
+
+  // Return usage of all recognized options
+  std::string Usage();
+
+ private:
+  std::string FormatUsageMessage(std::string str, int offset);
+  // Helper functions for parsing options that require multi-value parsing.
+  std::tuple<std::string, std::string, std::string> ParseCacheConfigOption(
+      const std::string& arg);
+  std::tuple<std::string, int, int> ParseRateLimiterResourceOption(
+      const std::string& arg);
+  std::tuple<std::string, std::string, std::string> ParseBackendConfigOption(
+      const std::string& arg);
+  std::tuple<std::string, std::string, std::string> ParseHostPolicyOption(
+      const std::string& arg);
+  std::tuple<std::string, std::string, std::string> ParseMetricsConfigOption(
+      const std::string& arg);
+  void ParseRestrictedFeatureOption(
+      const std::string& arg, const std::string& option_name,
+      const std::string& header_prefix, const std::string& feature_type,
+      RestrictedFeatures& restricted_features);
+#ifdef TRITON_ENABLE_TRACING
+  TRITONSERVER_InferenceTraceLevel ParseTraceLevelOption(std::string arg);
+  InferenceTraceMode ParseTraceModeOption(std::string arg);
+  std::tuple<std::string, std::string, std::string> ParseTraceConfigOption(
+      const std::string& arg);
+  // Helper functions for post processing for collected trace arguments.
+  void SetGlobalTraceArgs(
+      TritonServerParameters& lparams, bool trace_level_present,
+      bool trace_rate_present, bool trace_count_present,
+      bool explicit_disable_trace);
+  void SetTritonTraceArgs(
+      TritonServerParameters& lparams, bool trace_filepath_present,
+      bool trace_log_frequency_present);
+  void SetOpenTelemetryTraceArgs(
+      TritonServerParameters& lparams, bool trace_filepath_present,
+      bool trace_log_frequency_present);
+  void PostProcessTraceArgs(
+      TritonServerParameters& lparams, bool trace_level_present,
+      bool trace_rate_present, bool trace_count_present,
+      bool trace_filepath_present, bool trace_log_frequency_present,
+      bool explicit_disable_trace);
+  void ProcessOpenTelemetryBatchSpanProcessorArgs(
+      TraceConfig& otel_trace_settings);
+#endif  // TRITON_ENABLE_TRACING
+  // Helper function to parse option in
+  // "<string>[1st_delim]<string>[2nd_delim]<string>" format
+  std::tuple<std::string, std::string, std::string> ParseGenericConfigOption(
+      const std::string& arg, const std::string& first_delim,
+      const std::string& second_delim, const std::string& option_name,
+      const std::string& config_name);
+
+  // Initialize individual option groups
+  void SetupOptions();
+  // Initialize option group mappings
+  void SetupOptionGroups();
+
+  // Sum of option groups: vector to maintain insertion order for Usage()
+  std::vector<std::pair<std::string, std::vector<Option>&>> option_groups_;
+  // Individual option groups
+  std::vector<Option> global_options_;
+  std::vector<Option> server_options_;
+  std::vector<Option> model_repo_options_;
+  std::vector<Option> logging_options_;
+  std::vector<Option> http_options_;
+  std::vector<Option> grpc_options_;
+  std::vector<Option> sagemaker_options_;
+  std::vector<Option> vertex_options_;
+  std::vector<Option> metric_options_;
+  std::vector<Option> tracing_options_;
+  std::vector<Option> backend_options_;
+  std::vector<Option> repo_agent_options_;
+  std::vector<Option> cache_options_;
+  std::vector<Option> rate_limiter_options_;
+  std::vector<Option> memory_device_options_;
+  // Group deprecated options to keep preferred options more succinct
+  std::vector<Option> deprecated_options_;
+};
+}}  // namespace triton::server
diff --git a/src/common.cc b/src/common.cc
new file mode 100644
index 0000000000..289d868866
--- /dev/null
+++ b/src/common.cc
@@ -0,0 +1,105 @@
+// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "common.h"
+
+#include <algorithm>
+#include <iterator>
+
+#include "restricted_features.h"
+#include "triton/core/tritonserver.h"
+
+namespace triton { namespace server {
+
+TRITONSERVER_Error*
+GetModelVersionFromString(const std::string& version_string, int64_t* version)
+{
+  if (version_string.empty()) {
+    *version = -1;
+    return nullptr;  // success
+  }
+
+  try {
+    *version = std::stol(version_string);
+  }
+  catch (std::exception& e) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INVALID_ARG,
+        std::string(
+            "failed to get model version from specified version string '" +
+            version_string + "' (details: " + e.what() +
+            "), version should be an integral value > 0")
+            .c_str());
+  }
+
+  if (*version < 0) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INVALID_ARG,
+        std::string(
+            "invalid model version specified '" + version_string +
+            "' , version should be an integral value > 0")
+            .c_str());
+  }
+
+  return nullptr;  // success
+}
+
+std::string
+GetEnvironmentVariableOrDefault(
+    const std::string& variable_name, const std::string& default_value)
+{
+  const char* value = getenv(variable_name.c_str());
+  return value ? value : default_value;
+}
+
+int64_t
+GetElementCount(const std::vector<int64_t>& dims)
+{
+  bool first = true;
+  int64_t cnt = 0;
+  for (auto dim : dims) {
+    if (dim == WILDCARD_DIM) {
+      return -1;
+    }
+
+    if (first) {
+      cnt = dim;
+      first = false;
+    } else {
+      cnt *= dim;
+    }
+  }
+
+  return cnt;
+}
+
+bool
+Contains(const std::vector<std::string>& vec, const std::string& str)
+{
+  return std::find(vec.begin(), vec.end(), str) != vec.end();
+}
+
+}}  // namespace triton::server
diff --git a/src/common.h b/src/common.h
new file mode 100644
index 0000000000..aa160f394f
--- /dev/null
+++ b/src/common.h
@@ -0,0 +1,187 @@
+// Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#pragma once
+
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "triton/core/tritonserver.h"
+
+namespace triton { namespace server {
+
+constexpr char kInferHeaderContentLengthHTTPHeader[] =
+    "Inference-Header-Content-Length";
+constexpr char kAcceptEncodingHTTPHeader[] = "Accept-Encoding";
+constexpr char kContentEncodingHTTPHeader[] = "Content-Encoding";
+constexpr char kContentTypeHeader[] = "Content-Type";
+constexpr char kContentLengthHeader[] = "Content-Length";
+
+constexpr int MAX_GRPC_MESSAGE_SIZE = INT32_MAX;
+
+/// The value for a dimension in a shape that indicates that that
+/// dimension can take on any size.
+constexpr int WILDCARD_DIM = -1;
+
+/// Request parameter keys that start with a "triton_" prefix for internal use
+const std::vector<std::string> TRITON_RESERVED_REQUEST_PARAMS{
+    "triton_enable_empty_final_response"};
+
+#define RETURN_IF_ERR(X)             \
+  do {                               \
+    TRITONSERVER_Error* err__ = (X); \
+    if (err__ != nullptr) {          \
+      return err__;                  \
+    }                                \
+  } while (false)
+
+#define RETURN_MSG_IF_ERR(X, MSG)                                      \
+  do {                                                                 \
+    TRITONSERVER_Error* err__ = (X);                                   \
+    if (err__ != nullptr) {                                            \
+      auto new_err = TRITONSERVER_ErrorNew(                            \
+          TRITONSERVER_ErrorCode(err__),                               \
+          (std::string(MSG) + ": " + TRITONSERVER_ErrorMessage(err__)) \
+              .c_str());                                               \
+      TRITONSERVER_ErrorDelete(err__);                                 \
+      return new_err;                                                  \
+    }                                                                  \
+  } while (false)
+
+#define GOTO_IF_ERR(X, T)            \
+  do {                               \
+    TRITONSERVER_Error* err__ = (X); \
+    if (err__ != nullptr) {          \
+      goto T;                        \
+    }                                \
+  } while (false)
+
+#define FAIL(MSG)                                 \
+  do {                                            \
+    std::cerr << "error: " << (MSG) << std::endl; \
+    exit(1);                                      \
+  } while (false)
+
+#define FAIL_IF_ERR(X, MSG)                                       \
+  do {                                                            \
+    TRITONSERVER_Error* err__ = (X);                              \
+    if (err__ != nullptr) {                                       \
+      std::cerr << "error: " << (MSG) << ": "                     \
+                << TRITONSERVER_ErrorCodeString(err__) << " - "   \
+                << TRITONSERVER_ErrorMessage(err__) << std::endl; \
+      TRITONSERVER_ErrorDelete(err__);                            \
+      exit(1);                                                    \
+    }                                                             \
+  } while (false)
+
+#define THROW_IF_ERR(EX_TYPE, X, MSG)                                     \
+  do {                                                                    \
+    TRITONSERVER_Error* err__ = (X);                                      \
+    if (err__ != nullptr) {                                               \
+      auto ex__ = (EX_TYPE)(std::string("error: ") + (MSG) + ": " +       \
+                            TRITONSERVER_ErrorCodeString(err__) + " - " + \
+                            TRITONSERVER_ErrorMessage(err__));            \
+      TRITONSERVER_ErrorDelete(err__);                                    \
+      throw ex__;                                                         \
+    }                                                                     \
+  } while (false)
+
+#define IGNORE_ERR(X)                  \
+  do {                                 \
+    TRITONSERVER_Error* err__ = (X);   \
+    if (err__ != nullptr) {            \
+      TRITONSERVER_ErrorDelete(err__); \
+    }                                  \
+  } while (false)
+
+#ifdef TRITON_ENABLE_GPU
+#define FAIL_IF_CUDA_ERR(X, MSG)                                           \
+  do {                                                                     \
+    cudaError_t err__ = (X);                                               \
+    if (err__ != cudaSuccess) {                                            \
+      std::cerr << "error: " << (MSG) << ": " << cudaGetErrorString(err__) \
+                << std::endl;                                              \
+      exit(1);                                                             \
+    }                                                                      \
+  } while (false)
+#endif  // TRITON_ENABLE_GPU
+
+/// Get the integral version from a string, or fail if string does not
+/// represent a valid version.
+///
+/// \param version_string The string version.
+/// \param version Returns the integral version.
+/// \return The error status. Failure if 'version_string' doesn't
+/// convert to valid version.
+TRITONSERVER_Error* GetModelVersionFromString(
+    const std::string& version_string, int64_t* version);
+
+/// Get the value of the environment variable, or default value if not set
+///
+/// \param variable_name The name of the environment variable.
+/// \param default_value The default value.
+/// \return The environment variable or the default value if not set.
+std::string GetEnvironmentVariableOrDefault(
+    const std::string& variable_name, const std::string& default_value);
+
+/// Get the number of elements in a shape.
+///
+/// \param dims The shape.
+/// \return The number of elements, or -1 if the number of elements
+/// cannot be determined because the shape contains one or more
+/// wildcard dimensions.
+int64_t GetElementCount(const std::vector<int64_t>& dims);
+
+/// Returns if 'vec' contains 'str'.
+///
+/// \param vec The vector of strings to search.
+/// \param str The string to lookup.
+/// \return True if the str is found, false otherwise.
+bool Contains(const std::vector<std::string>& vec, const std::string& str);
+
+/// Joins container of strings into a single string delimited by
+/// 'delim'.
+///
+/// \param container The container of strings to join.
+/// \param delim The delimiter to join with.
+/// \return The joint string.
+template <class T>
+std::string
+Join(const T& container, const std::string& delim)
+{
+  if (container.empty()) {
+    return "";
+  }
+  std::stringstream ss;
+  ss << container[0];
+  for (size_t i = 1; i < container.size(); ++i) {
+    ss << delim << container[i];
+  }
+  return ss.str();
+}
+
+}}  // namespace triton::server
diff --git a/src/core/BUILD b/src/core/BUILD
deleted file mode 100644
index cdde6b370f..0000000000
--- a/src/core/BUILD
+++ /dev/null
@@ -1,334 +0,0 @@
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-package(
-    default_visibility = ["//visibility:public"],
-)
-
-load('@protobuf_archive//:protobuf.bzl', 'py_proto_library')
-load('@tf_serving//tensorflow_serving:serving.bzl', 'serving_proto_library')
-load('@tf_serving//tensorflow_serving:serving.bzl', 'serving_proto_library_py')
-
-serving_proto_library(
-    name = "api_proto",
-    srcs = ["api.proto"],
-    cc_api_version = 2,
-    deps = [
-    ],
-)
-
-serving_proto_library_py(
-    name = "api_proto_py_pb2",
-    srcs = ["api.proto"],
-    proto_library = "api_proto",
-    deps = [
-    ],
-)
-
-serving_proto_library(
-    name = "grpc_service_proto",
-    srcs = ["grpc_service.proto"],
-    has_services = 1,
-    cc_api_version = 2,
-    cc_grpc_version = 1,
-    deps = [
-        ":api_proto",
-        ":request_status_proto",
-        ":server_status_proto",
-    ],
-)
-
-py_proto_library(
-    name = "grpc_service_proto_py_pb2",
-    srcs = ["grpc_service.proto"],
-    use_grpc_plugin = True,
-    srcs_version = "PY2AND3",
-    default_runtime="@protobuf_archive//:protobuf_python",
-    protoc="@protobuf_archive//:protoc",
-)
-
-serving_proto_library(
-    name = "model_config_proto",
-    srcs = ["model_config.proto"],
-    cc_api_version = 2,
-    deps = [
-    ],
-)
-
-serving_proto_library_py(
-    name = "model_config_proto_py_pb2",
-    srcs = ["model_config.proto"],
-    proto_library = "model_config_proto",
-    deps = [
-    ],
-)
-
-serving_proto_library(
-    name = "request_status_proto",
-    srcs = ["request_status.proto"],
-    cc_api_version = 2,
-    deps = [
-    ],
-)
-
-serving_proto_library_py(
-    name = "request_status_proto_py_pb2",
-    srcs = ["request_status.proto"],
-    proto_library = "request_status_proto",
-    deps = [
-    ],
-)
-
-serving_proto_library(
-    name = "server_status_proto",
-    srcs = ["server_status.proto"],
-    cc_api_version = 2,
-    deps = [
-        ":model_config_proto",
-    ],
-)
-
-serving_proto_library_py(
-    name = "server_status_proto_py_pb2",
-    srcs = ["server_status.proto"],
-    proto_library = "server_status_proto",
-    deps = [
-        ":model_config_proto_py_pb2",
-    ],
-)
-
-cc_library(
-    name = "autofill_header",
-    hdrs = ["autofill.h"],
-    deps = [
-        ":model_config",
-        "@org_tensorflow//tensorflow/core:lib",
-    ],
-)
-
-cc_library(
-    name = "autofill",
-    srcs = ["autofill.cc"],
-    deps = [
-        ":autofill_header",
-        ":constants",
-        ":logging",
-        ":model_config",
-        ":model_config_proto",
-        "//src/servables/caffe2:autofill",
-        "//src/servables/tensorflow:autofill",
-        "//src/servables/tensorrt:autofill",
-        "@org_tensorflow//tensorflow/core:lib",
-    ],
-)
-
-cc_library(
-    name = "constants",
-    hdrs = ["constants.h"],
-)
-
-cc_library(
-    name = "infer_header",
-    hdrs = ["infer.h"],
-    deps = [
-        ":api_proto",
-        ":grpc_service_proto",
-        ":label_provider",
-        ":metrics",
-        ":model_config_proto",
-        ":server_status_header",
-        "@com_github_libevent_libevent//:libevent",
-        "@org_tensorflow//tensorflow/core:lib",
-    ],
-)
-
-cc_library(
-    name = "infer",
-    srcs = ["infer.cc"],
-    deps = [
-        ":constants",
-        ":infer_header",
-        ":logging",
-        ":utils",
-        "@com_github_libevent_libevent//:libevent",
-        "@org_tensorflow//tensorflow/core:lib",
-    ],
-)
-
-cc_library(
-    name = "label_provider",
-    srcs = ["label_provider.cc"],
-    hdrs = ["label_provider.h"],
-    deps = [
-        "@org_tensorflow//tensorflow/core:lib",
-    ],
-)
-
-cc_library(
-    name = "logging",
-    srcs = ["logging.cc"],
-    hdrs = ["logging.h"],
-    deps = [
-    ],
-)
-
-cc_library(
-    name = "metrics",
-    srcs = ["metrics.cc"],
-    hdrs = ["metrics.h"],
-    deps = [
-        ":constants",
-        ":logging",
-        "@prometheus//core:core",
-        "@prometheus//pull:pull",
-        "@org_tensorflow//tensorflow/c:c_api",
-        "@org_tensorflow//tensorflow/core:lib",
-    ],
-)
-
-cc_library(
-    name = "model_config",
-    srcs = ["model_config.cc"],
-    hdrs = ["model_config.h"],
-    deps = [
-        ":constants",
-        ":model_config_proto",
-    ],
-)
-
-cc_library(
-    name = "model_repository_manager",
-    srcs = ["model_repository_manager.cc"],
-    hdrs = ["model_repository_manager.h"],
-    deps = [
-        ":constants",
-        ":logging",
-        ":model_config",
-        ":model_config_proto",
-        ":utils",
-        "@org_tensorflow//tensorflow/core:lib",
-        "@tf_serving//tensorflow_serving/config:model_server_config_proto",
-    ],
-)
-
-cc_library(
-    name = "profile",
-    srcs = ["profile.cc"],
-    hdrs = ["profile.h"],
-    deps = [
-        "@org_tensorflow//tensorflow/c:c_api",
-        "@org_tensorflow//tensorflow/core:lib",
-    ],
-)
-
-cc_library(
-    name = "server",
-    srcs = ["server.cc"],
-    hdrs = ["server.h"],
-    deps = [
-        ":api_proto",
-        ":constants",
-        ":grpc_service_proto",
-        ":infer_header",
-        ":logging",
-        ":model_config",
-        ":model_config_proto",
-        ":model_repository_manager",
-        ":profile",
-        ":request_status",
-        ":request_status_proto",
-        ":server_status_header",
-        ":server_status_proto",
-        ":utils",
-        "//src/nvrpc:nvrpc",
-        "//src/servables/caffe2:netdef_bundle_source_adapter",
-        "//src/servables/tensorflow:graphdef_bundle_source_adapter",
-        "//src/servables/tensorflow:savedmodel_bundle_source_adapter",
-        "//src/servables/tensorrt:plan_bundle_source_adapter",
-        "@com_google_absl//absl/memory",
-        "@com_google_absl//absl/strings",
-        "@com_googlesource_code_re2//:re2",
-        "@grpc//:grpc++_unsecure",
-        "@org_tensorflow//tensorflow/core:lib",
-        "@tf_serving//tensorflow_serving/config:model_server_config_proto",
-        "@tf_serving//tensorflow_serving/core:servable_state_monitor",
-        "@tf_serving//tensorflow_serving/core:availability_preserving_policy",
-        "@tf_serving//tensorflow_serving/model_servers:server_core",
-        "@tf_serving//tensorflow_serving/util:threadpool_executor",
-        "@tf_serving//tensorflow_serving/util/net_http/server/public:http_server",
-        "@tf_serving//tensorflow_serving/util/net_http/server/public:http_server_api",
-    ],
-)
-
-cc_library(
-    name = "server_status_header",
-    hdrs = ["server_status.h"],
-    deps = [
-        ":model_config_proto",
-        ":model_repository_manager",
-        ":server_status_proto",
-        "@org_tensorflow//tensorflow/core:lib",
-        "@tf_serving//tensorflow_serving/core:servable_state_monitor",
-    ],
-)
-
-cc_library(
-    name = "server_status",
-    srcs = ["server_status.cc"],
-    deps = [
-        ":constants",
-        ":infer_header",
-        ":logging",
-        ":metrics",
-        ":server_status_header",
-        "@org_tensorflow//tensorflow/core:lib",
-        "@tf_serving//tensorflow_serving/core:servable_state",
-    ],
-)
-
-cc_library(
-    name = "request_status",
-    srcs = ["request_status.cc"],
-    hdrs = ["request_status.h"],
-    deps = [
-        ":request_status_proto",
-        "@org_tensorflow//tensorflow/core:lib",
-    ],
-)
-
-cc_library(
-    name = "utils",
-    srcs = ["utils.cc"],
-    hdrs = ["utils.h"],
-    deps = [
-        ":autofill",
-        ":constants",
-        ":logging",
-        ":model_config_proto",
-        "@org_tensorflow//tensorflow/c:c_api",
-        "@org_tensorflow//tensorflow/core:lib",
-    ],
-)
diff --git a/src/core/api.proto b/src/core/api.proto
deleted file mode 100644
index 0ca584d780..0000000000
--- a/src/core/api.proto
+++ /dev/null
@@ -1,254 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-syntax = "proto3";
-
-package nvidia.inferenceserver;
-
-//@@.. cpp:namespace:: nvidia::inferenceserver
-
-//@@
-//@@.. cpp:var:: message InferRequestHeader
-//@@
-//@@   Meta-data for an inferencing request. The actual input data is
-//@@   delivered separate from this header, in the HTTP body for an HTTP
-//@@   request, or in the :cpp:var:`InferRequest` message for a gRPC request.
-//@@
-message InferRequestHeader
-{
-  //@@  .. cpp:var:: message Input
-  //@@
-  //@@     Meta-data for an input tensor provided as part of an inferencing
-  //@@     request.
-  //@@
-  message Input
-  {
-    //@@    .. cpp:var:: string name
-    //@@
-    //@@       The name of the input tensor.
-    //@@
-    string name = 1;
-
-    //@@    .. cpp:var:: uint64 byte_size
-    //@@
-    //@@       The size of the input tensor, in bytes. This is the size for
-    //@@       one instance of the input, not the entire size of a batched
-    //@@       input.
-    //@@
-    uint64 byte_size = 2;
-  }
-
-  //@@  .. cpp:var:: message Output
-  //@@
-  //@@     Meta-data for a requested output tensor as part of an inferencing
-  //@@     request.
-  //@@
-  message Output
-  {
-    //@@    .. cpp:var:: string name
-    //@@
-    //@@       The name of the output tensor.
-    //@@
-    string name = 1;
-
-    //@@    .. cpp:var:: uint64 byte_size
-    //@@
-    //@@       The size of the output tensor, in bytes. This is the size for
-    //@@       one instance of the output, not the entire size of a batched
-    //@@       output.
-    //@@
-    uint64 byte_size = 2;
-
-    //@@    .. cpp:var:: message Class
-    //@@
-    //@@       Options for an output returned as a classification.
-    //@@
-    message Class
-    {
-      //@@      .. cpp:var:: uint32 count
-      //@@
-      //@@         Indicates how many classification values should be returned
-      //@@         for the output. The 'count' highest priority values are
-      //@@         returned.
-      //@@
-      uint32 count = 1;
-    }
-
-    //@@    .. cpp:var:: Class cls
-    //@@
-    //@@       Optional. If defined return this output as a classification
-    //@@       instead of raw data. The output tensor will be interpreted as
-    //@@       probabilities and the classifications associated with the
-    //@@       highest probabilities will be returned.
-    //@@
-    Class cls = 3;
-  }
-
-  //@@  .. cpp:var:: uint32 batch_size
-  //@@
-  //@@     The batch size of the inference request. This must be >= 1. For
-  //@@     models that don't support batching batch_size must be 1.
-  //@@
-  uint32 batch_size = 1;
-
-  //@@  .. cpp:var:: Input input (repeated)
-  //@@
-  //@@     The input meta-data for the inputs provided with the the inference
-  //@@     request.
-  //@@
-  repeated Input input = 2;
-
-  //@@  .. cpp:var:: Output output (repeated)
-  //@@
-  //@@     The output meta-data for the inputs provided with the the inference
-  //@@     request.
-  //@@
-  repeated Output output = 3;
-}
-
-//@@
-//@@.. cpp:var:: message InferResponseHeader
-//@@
-//@@   Meta-data for the response to an inferencing request. The actual output
-//@@   data is delivered separate from this header, in the HTTP body for an HTTP
-//@@   request, or in the :cpp:var:`InferResponse` message for a gRPC request.
-//@@
-message InferResponseHeader
-{
-  //@@  .. cpp:var:: message Output
-  //@@
-  //@@     Meta-data for an output tensor requested as part of an inferencing
-  //@@     request.
-  //@@
-  message Output
-  {
-    //@@    .. cpp:var:: string name
-    //@@
-    //@@       The name of the output tensor.
-    //@@
-    string name = 1;
-
-    //@@    .. cpp:var:: message Raw
-    //@@
-    //@@       Meta-data for an output tensor being returned as raw data.
-    //@@
-    message Raw
-    {
-      //@@      .. cpp:var:: uint64 byte_size
-      //@@
-      //@@         The size of the output tensor, in bytes. This is the size for
-      //@@         one instance of the output, not the entire size of a batched
-      //@@         output.
-      //@@
-      uint64 byte_size = 1;
-    }
-
-    //@@    .. cpp:var:: message Class
-    //@@
-    //@@       Information about each classification for this output.
-    //@@
-    message Class
-    {
-      //@@      .. cpp:var:: int32 idx
-      //@@
-      //@@         The classification index.
-      //@@
-      int32 idx = 1;
-
-      //@@      .. cpp:var:: float value
-      //@@
-      //@@         The classification value as a float (typically a
-      //@@         probability).
-      //@@
-      float value = 2;
-
-      //@@      .. cpp:var:: string label
-      //@@
-      //@@         The label for the class (optional, only available if provided
-      //@@         by the model).
-      //@@
-      string label = 3;
-    }
-
-    //@@    .. cpp:var:: message Classes
-    //@@
-    //@@       Meta-data for an output tensor being returned as classifications.
-    //@@
-    message Classes
-    {
-      //@@      .. cpp:var:: Class cls (repeated)
-      //@@
-      //@@         The topk classes for this output.
-      //@@
-      repeated Class cls = 1;
-    }
-
-    //@@    .. cpp:var:: Raw raw
-    //@@
-    //@@       If specified deliver results for this output as raw tensor data.
-    //@@       The actual output data is delivered in the HTTP body for an HTTP
-    //@@       request, or in the :cpp:var:`InferResponse` message for a gRPC
-    //@@       request. Only one of 'raw' and 'batch_classes' may be specified.
-    //@@
-    Raw raw = 2;
-
-    //@@    .. cpp:var:: Classes batch_classes (repeated)
-    //@@
-    //@@       If specified deliver results for this output as classifications.
-    //@@       There is one :cpp:var:`Classes` object for each batch entry in
-    //@@       the output. Only one of 'raw' and 'batch_classes' may be
-    //@@       specified.
-    //@@
-    repeated Classes batch_classes = 3;
-  }
-
-  //@@  .. cpp:var:: string model_name
-  //@@
-  //@@     The name of the model that produced the outputs.
-  //@@
-  string model_name = 1;
-
-  //@@  .. cpp:var:: uint32 model_version
-  //@@
-  //@@     The version of the model that produced the outputs.
-  //@@
-  uint32 model_version = 2;
-
-  //@@  .. cpp:var:: uint32 batch_size
-  //@@
-  //@@     The batch size of the outputs. This will always be equal to the
-  //@@     batch size of the inputs. For models that don't support
-  //@@     batching the batch_size will be 1.
-  //@@
-  uint32 batch_size = 3;
-
-  //@@  .. cpp:var:: Output output (repeated)
-  //@@
-  //@@     The outputs, in the same order as they were requested in
-  //@@     :cpp:var:`InferRequestHeader`.
-  //@@
-  repeated Output output = 4;
-}
diff --git a/src/core/autofill.cc b/src/core/autofill.cc
deleted file mode 100644
index 2bc8e68486..0000000000
--- a/src/core/autofill.cc
+++ /dev/null
@@ -1,225 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "src/core/autofill.h"
-
-#include "src/core/constants.h"
-#include "src/core/logging.h"
-#include "src/core/model_config.h"
-#include "src/servables/caffe2/autofill.h"
-#include "src/servables/tensorflow/autofill.h"
-#include "src/servables/tensorrt/autofill.h"
-#include "tensorflow/core/lib/io/path.h"
-#include "tensorflow/core/platform/env.h"
-
-namespace nvidia { namespace inferenceserver {
-
-//
-// AutoFillNull
-//
-class AutoFillNull : public AutoFill {
- public:
-  static tensorflow::Status Create(std::unique_ptr<AutoFillNull>* autofill);
-  tensorflow::Status Fix(ModelConfig* config);
-
- private:
-  AutoFillNull() : AutoFill(std::string()) {}
-};
-
-tensorflow::Status
-AutoFillNull::Create(std::unique_ptr<AutoFillNull>* autofill)
-{
-  autofill->reset(new AutoFillNull);
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-AutoFillNull::Fix(ModelConfig* config)
-{
-  return tensorflow::Status::OK();
-}
-
-//
-// AutoFillSimple
-//
-class AutoFillSimple : public AutoFill {
- public:
-  static tensorflow::Status Create(
-    const std::string& model_name, std::unique_ptr<AutoFillSimple>* autofill);
-  tensorflow::Status Fix(ModelConfig* config);
-
- private:
-  AutoFillSimple(const std::string& model_name) : AutoFill(model_name) {}
-};
-
-tensorflow::Status
-AutoFillSimple::Create(
-  const std::string& model_name, std::unique_ptr<AutoFillSimple>* autofill)
-{
-  autofill->reset(new AutoFillSimple(model_name));
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-AutoFillSimple::Fix(ModelConfig* config)
-{
-  // Set name if not already set.
-  if (config->name().empty()) {
-    config->set_name(model_name_);
-  }
-
-  return tensorflow::Status::OK();
-}
-
-//
-// AutoFill
-//
-tensorflow::Status
-AutoFill::Create(
-  const std::string& model_name, const std::string& model_path,
-  const ModelConfig& config, std::unique_ptr<AutoFill>* autofill)
-{
-  autofill->reset();
-
-  // If the config specifies a platform use it to create the
-  // appropriate autofill object, otherwise just try creating each
-  // autofill object to see if one can detect the platform.
-  const Platform platform = GetPlatform(config.platform());
-
-  if (
-    (platform == Platform::PLATFORM_TENSORFLOW_SAVEDMODEL) ||
-    (platform == Platform::PLATFORM_UNKNOWN)) {
-    std::unique_ptr<AutoFillSavedModel> afsm;
-    tensorflow::Status status =
-      AutoFillSavedModel::Create(model_name, model_path, &afsm);
-    if (status.ok()) {
-      *autofill = std::move(afsm);
-      return tensorflow::Status::OK();
-    }
-  }
-
-  if (
-    (platform == Platform::PLATFORM_TENSORFLOW_GRAPHDEF) ||
-    (platform == Platform::PLATFORM_UNKNOWN)) {
-    std::unique_ptr<AutoFillGraphDef> afgd;
-    tensorflow::Status status =
-      AutoFillGraphDef::Create(model_name, model_path, &afgd);
-    if (status.ok()) {
-      *autofill = std::move(afgd);
-      return tensorflow::Status::OK();
-    }
-  }
-
-  if (
-    (platform == Platform::PLATFORM_TENSORRT_PLAN) ||
-    (platform == Platform::PLATFORM_UNKNOWN)) {
-    std::unique_ptr<AutoFillPlan> afp;
-    tensorflow::Status status =
-      AutoFillPlan::Create(model_name, model_path, &afp);
-    if (status.ok()) {
-      *autofill = std::move(afp);
-      return tensorflow::Status::OK();
-    }
-  }
-
-  if (
-    (platform == Platform::PLATFORM_CAFFE2_NETDEF) ||
-    (platform == Platform::PLATFORM_UNKNOWN)) {
-    std::unique_ptr<AutoFillNetDef> afnd;
-    tensorflow::Status status =
-      AutoFillNetDef::Create(model_name, model_path, &afnd);
-    if (status.ok()) {
-      *autofill = std::move(afnd);
-      return tensorflow::Status::OK();
-    }
-  }
-
-  // Unable to determine the platform so just use the simple autofill,
-  // or null if that fails.
-  {
-    std::unique_ptr<AutoFillSimple> afs;
-    tensorflow::Status status = AutoFillSimple::Create(model_name, &afs);
-    if (status.ok()) {
-      *autofill = std::move(afs);
-    } else {
-      std::unique_ptr<AutoFillNull> afn;
-      TF_RETURN_IF_ERROR(AutoFillNull::Create(&afn));
-      *autofill = std::move(afn);
-    }
-  }
-
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-AutoFill::GetSubdirs(const std::string& path, std::set<std::string>* subdirs)
-{
-  std::vector<std::string> childs;
-  TF_RETURN_IF_ERROR(tensorflow::Env::Default()->GetChildren(path, &childs));
-
-  // GetChildren() returns all descendants instead for cloud storage
-  // like GCS. In such case we should filter out all non-direct
-  // descendants.
-  std::set<std::string> real_childs;
-  for (const std::string& child : childs) {
-    real_childs.insert(child.substr(0, child.find_first_of('/')));
-  }
-
-  for (const auto& child : real_childs) {
-    const auto vp = tensorflow::io::JoinPath(path, child);
-    if (tensorflow::Env::Default()->IsDirectory(vp).ok()) {
-      subdirs->insert(child);
-    }
-  }
-
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-AutoFill::GetFiles(const std::string& path, std::set<std::string>* files)
-{
-  std::vector<std::string> childs;
-  TF_RETURN_IF_ERROR(tensorflow::Env::Default()->GetChildren(path, &childs));
-
-  // GetChildren() returns all descendants instead for cloud storage
-  // like GCS. In such case we should filter out all non-direct
-  // descendants.
-  std::set<std::string> real_childs;
-  for (const std::string& child : childs) {
-    real_childs.insert(child.substr(0, child.find_first_of('/')));
-  }
-
-  for (const auto& child : real_childs) {
-    const auto vp = tensorflow::io::JoinPath(path, child);
-    if (!tensorflow::Env::Default()->IsDirectory(vp).ok()) {
-      files->insert(child);
-    }
-  }
-
-  return tensorflow::Status::OK();
-}
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/core/autofill.h b/src/core/autofill.h
deleted file mode 100644
index df18b3de0b..0000000000
--- a/src/core/autofill.h
+++ /dev/null
@@ -1,55 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <string>
-#include "src/core/model_config.pb.h"
-#include "tensorflow/core/lib/core/errors.h"
-
-namespace nvidia { namespace inferenceserver {
-
-class AutoFill {
- public:
-  /// Create an AutoFill object for a specific model.
-  static tensorflow::Status Create(
-    const std::string& model_name, const std::string& model_path,
-    const ModelConfig& config, std::unique_ptr<AutoFill>* autofill);
-
-  /// Autofill settings in a configuration.
-  virtual tensorflow::Status Fix(ModelConfig* config) = 0;
-
- protected:
-  AutoFill(const std::string& model_name) : model_name_(model_name) {}
-
-  static tensorflow::Status GetSubdirs(
-    const std::string& path, std::set<std::string>* subdirs);
-  static tensorflow::Status GetFiles(
-    const std::string& path, std::set<std::string>* files);
-
-  const std::string model_name_;
-};
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/core/constants.h b/src/core/constants.h
deleted file mode 100644
index d34c495a67..0000000000
--- a/src/core/constants.h
+++ /dev/null
@@ -1,60 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <stdint.h>
-
-namespace nvidia { namespace inferenceserver {
-
-constexpr char kInferRequestHTTPHeader[] = "NV-InferRequest";
-constexpr char kStatusHTTPHeader[] = "NV-Status";
-
-constexpr char kInferRESTEndpoint[] = "api/infer";
-constexpr char kStatusRESTEndpoint[] = "api/status";
-constexpr char kProfileRESTEndpoint[] = "api/profile";
-constexpr char kHealthRESTEndpoint[] = "api/health";
-
-constexpr char kTensorFlowGraphDefPlatform[] = "tensorflow_graphdef";
-constexpr char kTensorFlowSavedModelPlatform[] = "tensorflow_savedmodel";
-constexpr char kTensorRTPlanPlatform[] = "tensorrt_plan";
-constexpr char kCaffe2NetDefPlatform[] = "caffe2_netdef";
-
-constexpr char kModelConfigPbTxt[] = "config.pbtxt";
-constexpr char kTensorRTPlanFilename[] = "model.plan";
-constexpr char kTensorFlowGraphDefFilename[] = "model.graphdef";
-constexpr char kTensorFlowSavedModelFilename[] = "model.savedmodel";
-constexpr char kCaffe2NetDefFilename[] = "model.netdef";
-constexpr char kCaffe2NetDefInitFilenamePrefix[] = "init_";
-
-constexpr char kMetricsLabelModelName[] = "model";
-constexpr char kMetricsLabelModelVersion[] = "version";
-constexpr char kMetricsLabelGpuUuid[] = "gpu_uuid";
-
-constexpr uint64_t NANOS_PER_SECOND = 1000000000;
-constexpr int MAX_GRPC_MESSAGE_SIZE = INT32_MAX;
-constexpr int SCHEDULER_DEFAULT_NICE = 5;
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/core/grpc_service.proto b/src/core/grpc_service.proto
deleted file mode 100644
index 7ce7f9bb19..0000000000
--- a/src/core/grpc_service.proto
+++ /dev/null
@@ -1,241 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-syntax = "proto3";
-
-package nvidia.inferenceserver;
-
-//@@.. cpp:namespace:: nvidia::inferenceserver
-
-import "src/core/api.proto";
-import "src/core/request_status.proto";
-import "src/core/server_status.proto";
-
-//@@
-//@@.. cpp:var:: service GRPCService
-//@@
-//@@   Inference Server GRPC endpoints.
-//@@
-service GRPCService
-{
-  //@@  .. cpp:var:: rpc Status(StatusRequest) returns (StatusResponse)
-  //@@
-  //@@     Get status for entire inference server or for a specified model.
-  //@@
-  rpc Status(StatusRequest) returns (StatusResponse) {}
-
-  //@@  .. cpp:var:: rpc Profile(ProfileRequest) returns (ProfileResponse)
-  //@@
-  //@@     Enable and disable low-level GPU profiling.
-  //@@
-  rpc Profile(ProfileRequest) returns (ProfileResponse) {}
-
-  //@@  .. cpp:var:: rpc Health(HealthRequest) returns (HealthResponse)
-  //@@
-  //@@     Check liveness and readiness of the inference server.
-  //@@
-  rpc Health(HealthRequest) returns (HealthResponse) {}
-
-  //@@  .. cpp:var:: rpc Infer(InferRequest) returns (InferResponse)
-  //@@
-  //@@     Request inference using a specific model. [ To handle large input
-  //@@     tensors likely need to set the maximum message size to that they
-  //@@     can be transmitted in one pass.
-  //@@
-  rpc Infer(InferRequest) returns (InferResponse) {}
-}
-
-//@@
-//@@.. cpp:var:: message StatusRequest
-//@@
-//@@   Request message for Status gRPC endpoint.
-//@@
-message StatusRequest
-{
-  //@@
-  //@@  .. cpp:var:: string model_name
-  //@@
-  //@@     The specific model status to be returned. If empty return status
-  //@@     for all models.
-  //@@
-  string model_name = 1;
-}
-
-//@@
-//@@.. cpp:var:: message StatusResponse
-//@@
-//@@   Response message for Status gRPC endpoint.
-//@@
-message StatusResponse
-{
-  //@@
-  //@@  .. cpp:var:: RequestStatus request_status
-  //@@
-  //@@     The status of the request, indicating success or failure.
-  //@@
-  RequestStatus request_status = 1;
-
-  //@@
-  //@@  .. cpp:var:: ServerStatus server_status
-  //@@
-  //@@     The server and model status.
-  //@@
-  ServerStatus server_status = 2;
-}
-
-//@@
-//@@.. cpp:var:: message ProfileRequest
-//@@
-//@@   Request message for Profile gRPC endpoint.
-//@@
-message ProfileRequest
-{
-  //@@
-  //@@  .. cpp:var:: string cmd
-  //@@
-  //@@     The requested profiling action: 'start' requests that GPU
-  //@@     profiling be enabled on all GPUs controlled by the inference
-  //@@     server; 'stop' requests that GPU profiling be disabled on all GPUs
-  //@@     controlled by the inference server.
-  //@@
-  string cmd = 1;
-}
-
-//@@
-//@@.. cpp:var:: message ProfileResponse
-//@@
-//@@   Response message for Profile gRPC endpoint.
-//@@
-message ProfileResponse
-{
-  //@@
-  //@@  .. cpp:var:: RequestStatus request_status
-  //@@
-  //@@     The status of the request, indicating success or failure.
-  //@@
-  RequestStatus request_status = 1;
-}
-
-//@@
-//@@.. cpp:var:: message HealthRequest
-//@@
-//@@   Request message for Health gRPC endpoint.
-//@@
-message HealthRequest
-{
-  //@@
-  //@@  .. cpp:var:: string mode
-  //@@
-  //@@     The requested health action: 'live' requests the liveness
-  //@@     state of the inference server; 'ready' requests the readiness state
-  //@@     of the inference server.
-  //@@
-  string mode = 1;
-}
-
-//@@
-//@@.. cpp:var:: message HealthResponse
-//@@
-//@@   Response message for Health gRPC endpoint.
-//@@
-message HealthResponse
-{
-  //@@
-  //@@  .. cpp:var:: RequestStatus request_status
-  //@@
-  //@@     The status of the request, indicating success or failure.
-  //@@
-  RequestStatus request_status = 1;
-
-  //@@
-  //@@  .. cpp:var:: bool health
-  //@@
-  //@@     The result of the request. True indicates the inference server is
-  //@@     live/ready, false indicates the inference server is not live/ready.
-  //@@
-  bool health = 2;
-}
-
-//@@
-//@@.. cpp:var:: message InferRequest
-//@@
-//@@   Request message for Infer gRPC endpoint.
-//@@
-message InferRequest
-{
-  //@@  .. cpp:var:: string model_name
-  //@@
-  //@@     The name of the model to use for inferencing.
-  //@@
-  string model_name = 1;
-
-  //@@  .. cpp:var:: int32 version
-  //@@
-  //@@     The version of the model to use for inference. If -1
-  //@@     the latest/most-recent version of the model is used.
-  //@@
-  int32 version = 2;
-
-  //@@  .. cpp:var:: InferRequestHeader meta_data
-  //@@
-  //@@     Meta-data for the request profiling input tensors and requesting
-  //@@     output tensors.
-  //@@
-  InferRequestHeader meta_data = 3;
-
-  //@@  .. cpp:var:: bytes raw_input (repeated)
-  //@@
-  //@@     The raw input tensor data in the order specified in 'meta_data'.
-  //@@
-  repeated bytes raw_input = 4;
-}
-
-//@@
-//@@.. cpp:var:: message InferResponse
-//@@
-//@@   Response message for Infer gRPC endpoint.
-//@@
-message InferResponse
-{
-  //@@
-  //@@  .. cpp:var:: RequestStatus request_status
-  //@@
-  //@@     The status of the request, indicating success or failure.
-  //@@
-  RequestStatus request_status = 1;
-
-  //@@  .. cpp:var:: InferResponseHeader meta_data
-  //@@
-  //@@     The response meta-data for the output tensors.
-  //@@
-  InferResponseHeader meta_data = 2;
-
-  //@@  .. cpp:var:: bytes raw_output (repeated)
-  //@@
-  //@@     The raw output tensor data in the order specified in 'meta_data'.
-  //@@
-  repeated bytes raw_output = 3;
-}
diff --git a/src/core/infer.cc b/src/core/infer.cc
deleted file mode 100644
index 3950065279..0000000000
--- a/src/core/infer.cc
+++ /dev/null
@@ -1,996 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "src/core/infer.h"
-
-#include <sys/resource.h>
-#include <sys/syscall.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <unistd.h>
-#include <chrono>
-#include "src/core/constants.h"
-#include "src/core/logging.h"
-#include "src/core/utils.h"
-#include "tensorflow/core/lib/core/errors.h"
-
-namespace nvidia { namespace inferenceserver {
-
-GRPCInferRequestProvider::GRPCInferRequestProvider(
-  const InferRequest& request, const int version)
-    : InferRequestProvider(request.model_name(), version), request_(request)
-{
-  content_delivered_.resize(request_.raw_input_size(), false);
-}
-
-tensorflow::Status
-GRPCInferRequestProvider::Create(
-  const InferRequest& request,
-  std::shared_ptr<GRPCInferRequestProvider>* infer_provider)
-{
-  // Make sure the request has a batch-size > 0. Even for models that
-  // don't support batching the requested batch size must be 1.
-  if (request.meta_data().batch_size() < 1) {
-    return tensorflow::errors::InvalidArgument(
-      "inference request batch-size must be >= 1 for models that ",
-      "support batching, and must be 1 for models that don't ",
-      "support batching");
-  }
-
-  const int version = (request.version() >= 0) ? request.version() : -1;
-  infer_provider->reset(new GRPCInferRequestProvider(request, version));
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-GRPCInferRequestProvider::GetNextInputContent(
-  int idx, const void** content, size_t* content_byte_size,
-  bool force_contiguous)
-{
-  if ((idx < 0) || (idx >= request_.raw_input_size())) {
-    return tensorflow::errors::Internal("unexpected input index ", idx);
-  }
-
-  if (content_delivered_[idx]) {
-    *content = nullptr;
-    *content_byte_size = 0;
-  } else {
-    const std::string& raw = request_.raw_input(idx);
-    *content = raw.c_str();
-    *content_byte_size = raw.size();
-    content_delivered_[idx] = true;
-  }
-
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-HTTPInferRequestProvider::Create(
-  evbuffer* input_buffer, const std::string& model_name,
-  const std::string& model_version_str, const std::string& request_header_str,
-  std::shared_ptr<HTTPInferRequestProvider>* infer_provider)
-{
-  int version = -1;
-  if (!model_version_str.empty()) {
-    version = std::atoi(model_version_str.c_str());
-  }
-
-  auto provider = new HTTPInferRequestProvider(model_name, version);
-  infer_provider->reset(provider);
-
-  if (!tensorflow::protobuf::TextFormat::ParseFromString(
-        request_header_str, &(provider->request_header_))) {
-    return tensorflow::errors::InvalidArgument(
-      "unable to parse request for model '", model_name, "'");
-  }
-
-  // Make sure the request has a batch-size > 0. Even for models that
-  // don't support batching the requested batch size must be 1.
-  if (provider->request_header_.batch_size() < 1) {
-    return tensorflow::errors::InvalidArgument(
-      "inference request batch-size must be >= 1 for models that ",
-      "support batching, and must be 1 for models that don't ",
-      "support batching");
-  }
-
-  // Now need to create 'contents_'. Each input has one entry in
-  // 'contents_' which gives a list of all the blocks of data for that
-  // input. These blocks are not necessarily contiguous so we keep
-  // track of each separately to avoid needing to copy everything into
-  // one buffer.
-  //
-  // Get the addr and size of each chunk of input data from the
-  // evbuffer.
-  int n = evbuffer_peek(input_buffer, -1, NULL, NULL, 0);
-  if (n > 0) {
-    struct evbuffer_iovec* v = static_cast<struct evbuffer_iovec*>(
-      alloca(sizeof(struct evbuffer_iovec) * n));
-    if (evbuffer_peek(input_buffer, -1, NULL, v, n) != n) {
-      return tensorflow::errors::Internal(
-        "unexpected error getting input buffers ");
-    }
-
-    int v_idx = 0;
-
-    // For each input get the blocks holding the data for that input
-    for (const auto& input : provider->request_header_.input()) {
-      provider->contents_idx_.push_back(0);
-      provider->contents_.emplace_back();
-      auto& blocks = provider->contents_.back();
-
-      size_t total_byte_size =
-        provider->request_header_.batch_size() * input.byte_size();
-      while ((total_byte_size > 0) && (v_idx < n)) {
-        blocks.emplace_back();
-        Block& block = blocks.back();
-
-        char* base = static_cast<char*>(v[v_idx].iov_base);
-        block.first = base;
-        if (v[v_idx].iov_len > total_byte_size) {
-          block.second = total_byte_size;
-          v[v_idx].iov_base = static_cast<void*>(base + total_byte_size);
-          v[v_idx].iov_len -= total_byte_size;
-          total_byte_size = 0;
-        } else {
-          block.second = v[v_idx].iov_len;
-          total_byte_size -= v[v_idx].iov_len;
-          v_idx++;
-        }
-      }
-    }
-
-    if (v_idx != n) {
-      return tensorflow::errors::InvalidArgument(
-        "unexpected additional input data for model '", provider->ModelName(),
-        "'");
-    }
-  }
-
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-HTTPInferRequestProvider::GetNextInputContent(
-  int idx, const void** content, size_t* content_byte_size,
-  bool force_contiguous)
-{
-  if ((idx < 0) || ((size_t)idx >= contents_.size())) {
-    return tensorflow::errors::Internal("unexpected input index ", idx);
-  }
-
-  const size_t block_cnt = contents_[idx].size();
-  const size_t block_idx = contents_idx_[idx];
-
-  if (block_idx >= block_cnt) {
-    *content = nullptr;
-    *content_byte_size = 0;
-  }
-  // Return next block of data...
-  else if (!force_contiguous || ((block_idx + 1) >= block_cnt)) {
-    const auto& block = contents_[idx][block_idx];
-    *content = block.first;
-    *content_byte_size = block.second;
-    contents_idx_[idx]++;
-  }
-  // If remaining data needs to be returned in one contiguous region
-  // and there is more than one block remaining, then need to copy the
-  // content into a single contiguous buffer.
-  else {
-    size_t total_size = 0;
-    for (size_t i = block_idx; i < block_cnt; i++) {
-      const auto& block = contents_[idx][i];
-      total_size += block.second;
-    }
-
-    contiguous_buffers_.emplace_back();
-    std::vector<char>& buf = contiguous_buffers_.back();
-    buf.reserve(total_size);
-
-    for (size_t i = block_idx; i < block_cnt; i++) {
-      const auto& block = contents_[idx][i];
-      buf.insert(buf.end(), block.first, block.first + block.second);
-    }
-
-    if (buf.size() != total_size) {
-      return tensorflow::errors::Internal("contiguous input failed");
-    }
-
-    *content = &(buf[0]);
-    *content_byte_size = total_size;
-  }
-
-  return tensorflow::Status::OK();
-}
-
-
-tensorflow::Status
-GRPCInferResponseProvider::Create(
-  const InferRequestHeader& request_header, InferResponse* response,
-  std::shared_ptr<GRPCInferResponseProvider>* infer_provider)
-{
-  GRPCInferResponseProvider* provider =
-    new GRPCInferResponseProvider(request_header, response);
-  infer_provider->reset(provider);
-
-  // Make space in the response for the output data. For outputs
-  // returning raw tensor data we allocate space directly in the
-  // response protobuf. For outputs returning classification we create
-  // a buffer to hold the output that we can then post-process for
-  // classifications.
-  for (const auto& requested_output : request_header.output()) {
-    std::string* output = provider->response_->add_raw_output();
-    const size_t output_byte_size =
-      request_header.batch_size() * requested_output.byte_size();
-
-    if (requested_output.has_cls()) {
-      provider->CreateOutputBuffer(output_byte_size);
-    } else {
-      output->resize(output_byte_size);
-      provider->AddOutputBuffer(
-        static_cast<void*>(&((*output)[0])), output->size());
-    }
-  }
-
-  return tensorflow::Status::OK();
-}
-
-HTTPInferResponseProvider::HTTPInferResponseProvider(
-  evbuffer* output_buffer, const InferRequestHeader& request_header)
-    : InferResponseProvider(request_header), output_buffer_(output_buffer)
-{
-  // Get the total size needed for raw output tensors...
-  total_raw_byte_size_ = 0;
-  for (const auto& requested_output : request_header.output()) {
-    if (!requested_output.has_cls()) {
-      total_raw_byte_size_ +=
-        request_header.batch_size() * requested_output.byte_size();
-    }
-  }
-}
-
-tensorflow::Status
-HTTPInferResponseProvider::Create(
-  evbuffer* output_buffer, const InferRequestHeader& request_header,
-  std::shared_ptr<HTTPInferResponseProvider>* infer_provider)
-{
-  HTTPInferResponseProvider* provider =
-    new HTTPInferResponseProvider(output_buffer, request_header);
-  infer_provider->reset(provider);
-
-  char* raw_output_base = nullptr;
-  if (provider->total_raw_byte_size_ > 0) {
-    // Reserve contiguous space in the output to hold all the raw output
-    // tensor data that must be returned in the response.
-    if (
-      evbuffer_reserve_space(
-        output_buffer, provider->total_raw_byte_size_, &provider->output_iovec_,
-        1) != 1) {
-      return tensorflow::errors::Internal(
-        "failed to reserve ", provider->total_raw_byte_size_,
-        " bytes in output tensor buffer");
-    }
-
-    if (provider->output_iovec_.iov_len < provider->total_raw_byte_size_) {
-      return tensorflow::errors::Internal(
-        "reserved ", provider->output_iovec_.iov_len,
-        " bytes in output tensor buffer, need ",
-        provider->total_raw_byte_size_);
-    }
-
-    provider->output_iovec_.iov_len = provider->total_raw_byte_size_;
-    raw_output_base = static_cast<char*>(provider->output_iovec_.iov_base);
-  }
-
-  // For outputs returning raw tensor data we allocate space directly
-  // from the space reserved in 'output_buffer'. For outputs returning
-  // classification we create a buffer to hold the output that we can
-  // then post-process for classifications.
-  size_t raw_output_offset = 0;
-  for (const auto& requested_output : request_header.output()) {
-    const size_t output_byte_size =
-      request_header.batch_size() * requested_output.byte_size();
-
-    if (requested_output.has_cls()) {
-      provider->CreateOutputBuffer(output_byte_size);
-    } else {
-      provider->AddOutputBuffer(
-        static_cast<void*>(raw_output_base + raw_output_offset),
-        output_byte_size);
-      raw_output_offset += output_byte_size;
-    }
-  }
-
-  if (raw_output_offset != provider->total_raw_byte_size_) {
-    return tensorflow::errors::Internal(
-      "failed to partition ", provider->total_raw_byte_size_,
-      " bytes across output tensor buffer");
-  }
-
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-HTTPInferResponseProvider::FinalizeResponse(const InferenceServable& is)
-{
-  if (total_raw_byte_size_ > 0) {
-    if (evbuffer_commit_space(output_buffer_, &output_iovec_, 1) != 0) {
-      return tensorflow::errors::Internal(
-        "failed to commit output tensors to output buffer");
-    }
-  }
-
-  return FinalizeResponseHeader(is);
-}
-
-
-namespace {
-
-template <typename T>
-void
-AddClassResults(
-  InferResponseHeader::Output* poutput, void* poutput_buffer,
-  const size_t batch_size, const InferRequestHeader::Output& output,
-  const LabelProvider& label_provider)
-{
-  T* probs = reinterpret_cast<T*>(poutput_buffer);
-  const size_t entry_cnt = (output.byte_size() / sizeof(T));
-  const size_t class_cnt = std::min((size_t)output.cls().count(), entry_cnt);
-  std::vector<size_t> idx(entry_cnt);
-
-  for (size_t i = 0; i < batch_size; ++i) {
-    iota(idx.begin(), idx.end(), 0);
-    sort(idx.begin(), idx.end(), [&probs](size_t i1, size_t i2) {
-      return probs[i1] > probs[i2];
-    });
-
-    auto bcls = poutput->add_batch_classes();
-    for (size_t k = 0; k < class_cnt; ++k) {
-      auto cls = bcls->add_cls();
-      cls->set_idx(idx[k]);
-      cls->set_label(label_provider.GetLabel(output.name(), idx[k]));
-
-      cls->set_value(static_cast<float>(probs[idx[k]]));
-    }
-
-    probs += entry_cnt;
-  }
-}
-
-}  // namespace
-
-void
-InferResponseProvider::CreateOutputBuffer(size_t byte_size)
-{
-  char* buffer = new char[byte_size];
-  created_buffers_.emplace_back(buffer);
-  buffers_.emplace_back(buffer, byte_size);
-}
-
-void
-InferResponseProvider::AddOutputBuffer(void* buffer, size_t byte_size)
-{
-  buffers_.emplace_back(buffer, byte_size);
-}
-
-tensorflow::Status
-InferResponseProvider::GetOutputBuffer(
-  int idx, void** buffer, size_t buffer_byte_size)
-{
-  if ((idx < 0) || (idx >= (int)buffers_.size())) {
-    return tensorflow::errors::Internal("unexpected output index ", idx);
-  }
-
-  if (buffers_[idx].second != buffer_byte_size) {
-    return tensorflow::errors::Internal(
-      "unexpected output size ", buffers_[idx].second);
-  }
-
-  *buffer = buffers_[idx].first;
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-InferResponseProvider::FinalizeResponseHeader(const InferenceServable& is)
-{
-  InferResponseHeader* response_header = MutableResponseHeader();
-  response_header->Clear();
-
-  const LabelProvider& label_provider = is.GetLabelProvider();
-
-  response_header->set_model_name(is.Name());
-  response_header->set_model_version(is.Version());
-
-  const size_t batch_size = request_header_.batch_size();
-  response_header->set_batch_size(batch_size);
-
-  int output_idx = 0;
-  for (const auto& output : request_header_.output()) {
-    auto poutput = response_header->add_output();
-    poutput->set_name(output.name());
-
-    if (!output.has_cls()) {
-      poutput->mutable_raw()->set_byte_size(output.byte_size());
-    } else {
-      void* output_buffer;
-      TF_RETURN_IF_ERROR(GetOutputBuffer(
-        output_idx, &output_buffer, batch_size * output.byte_size()));
-
-      DataType dtype;
-      TF_RETURN_IF_ERROR(is.GetOutputDataType(output.name(), &dtype));
-
-      switch (dtype) {
-        case DataType::TYPE_UINT8:
-          AddClassResults<uint8_t>(
-            poutput, output_buffer, batch_size, output, label_provider);
-          break;
-        case DataType::TYPE_UINT16:
-          AddClassResults<uint16_t>(
-            poutput, output_buffer, batch_size, output, label_provider);
-          break;
-        case DataType::TYPE_UINT32:
-          AddClassResults<uint32_t>(
-            poutput, output_buffer, batch_size, output, label_provider);
-          break;
-        case DataType::TYPE_UINT64:
-          AddClassResults<uint64_t>(
-            poutput, output_buffer, batch_size, output, label_provider);
-          break;
-
-        case DataType::TYPE_INT8:
-          AddClassResults<int8_t>(
-            poutput, output_buffer, batch_size, output, label_provider);
-          break;
-        case DataType::TYPE_INT16:
-          AddClassResults<int16_t>(
-            poutput, output_buffer, batch_size, output, label_provider);
-          break;
-        case DataType::TYPE_INT32:
-          AddClassResults<int32_t>(
-            poutput, output_buffer, batch_size, output, label_provider);
-          break;
-        case DataType::TYPE_INT64:
-          AddClassResults<int64_t>(
-            poutput, output_buffer, batch_size, output, label_provider);
-          break;
-
-        case DataType::TYPE_FP32:
-          AddClassResults<float>(
-            poutput, output_buffer, batch_size, output, label_provider);
-          break;
-        case DataType::TYPE_FP64:
-          AddClassResults<double>(
-            poutput, output_buffer, batch_size, output, label_provider);
-          break;
-
-        default:
-          return tensorflow::errors::InvalidArgument(
-            "class result not available for output '", output.name(),
-            "' due to unsupported type '", DataType_Name(dtype), "'");
-      }
-    }
-
-    output_idx++;
-  }
-
-  return tensorflow::Status::OK();
-}
-
-
-InferenceServable::InferenceServable()
-    : runner_cnt_(0), idle_runner_cnt_(0), max_preferred_batch_size_(0),
-      pending_batch_delay_ns_(0), pending_batch_size_(0),
-      pending_batch_queue_cnt_(0)
-{
-  runner_threads_exit_.store(false);
-}
-
-InferenceServable::~InferenceServable()
-{
-  // Signal the runner threads to exit and then wait for them...
-  {
-    std::unique_lock<std::mutex> lock(mu_);
-    runner_threads_exit_.store(true);
-    cv_.notify_all();
-  }
-
-  for (auto& runner : runner_threads_) {
-    runner->join();
-  }
-}
-
-void
-InferenceServable::GetMetricLabels(
-  std::map<std::string, std::string>* labels, const int gpu_device) const
-{
-  labels->insert(std::map<std::string, std::string>::value_type(
-    std::string(kMetricsLabelModelName), Name()));
-  labels->insert(std::map<std::string, std::string>::value_type(
-    std::string(kMetricsLabelModelVersion), std::to_string(Version())));
-  for (const auto& tag : Tags()) {
-    labels->insert(std::map<std::string, std::string>::value_type(
-      "_" + tag.first, tag.second));
-  }
-
-  // 'gpu_device' can be -1 to indicate that the GPU is not known. In
-  // that case use a metric that doesn't have the gpu_uuid label.
-  if (gpu_device >= 0) {
-    std::string uuid;
-    if (Metrics::UUIDForCudaDevice(gpu_device, &uuid)) {
-      labels->insert(std::map<std::string, std::string>::value_type(
-        std::string(kMetricsLabelGpuUuid), uuid));
-    }
-  }
-}
-
-prometheus::Counter&
-InferenceServable::GetCounterMetric(
-  std::map<int, prometheus::Counter*>& metrics,
-  prometheus::Family<prometheus::Counter>& family, const int gpu_device) const
-{
-  const auto itr = metrics.find(gpu_device);
-  if (itr != metrics.end()) {
-    return *(itr->second);
-  }
-
-  std::map<std::string, std::string> labels;
-  GetMetricLabels(&labels, gpu_device);
-
-  prometheus::Counter& counter = family.Add(labels);
-  metrics.insert(
-    std::map<int, prometheus::Counter*>::value_type(gpu_device, &counter));
-  return counter;
-}
-
-prometheus::Counter&
-InferenceServable::MetricInferenceSuccess(int gpu_device) const
-{
-  return GetCounterMetric(
-    metric_inf_success_, Metrics::FamilyInferenceSuccess(), gpu_device);
-}
-
-prometheus::Counter&
-InferenceServable::MetricInferenceFailure(int gpu_device) const
-{
-  return GetCounterMetric(
-    metric_inf_failure_, Metrics::FamilyInferenceFailure(), gpu_device);
-}
-
-prometheus::Counter&
-InferenceServable::MetricInferenceCount(int gpu_device) const
-{
-  return GetCounterMetric(
-    metric_inf_count_, Metrics::FamilyInferenceCount(), gpu_device);
-}
-
-prometheus::Counter&
-InferenceServable::MetricInferenceExecutionCount(int gpu_device) const
-{
-  return GetCounterMetric(
-    metric_inf_exec_count_, Metrics::FamilyInferenceExecutionCount(),
-    gpu_device);
-}
-
-prometheus::Counter&
-InferenceServable::MetricInferenceRequestDuration(int gpu_device) const
-{
-  return GetCounterMetric(
-    metric_inf_request_duration_us_, Metrics::FamilyInferenceRequestDuration(),
-    gpu_device);
-}
-
-prometheus::Counter&
-InferenceServable::MetricInferenceComputeDuration(int gpu_device) const
-{
-  return GetCounterMetric(
-    metric_inf_compute_duration_us_, Metrics::FamilyInferenceComputeDuration(),
-    gpu_device);
-}
-
-prometheus::Counter&
-InferenceServable::MetricInferenceQueueDuration(int gpu_device) const
-{
-  return GetCounterMetric(
-    metric_inf_queue_duration_us_, Metrics::FamilyInferenceQueueDuration(),
-    gpu_device);
-}
-
-prometheus::Histogram&
-InferenceServable::MetricInferenceLoadRatio(int gpu_device) const
-{
-  const auto itr = metric_inf_load_ratio_.find(gpu_device);
-  if (itr != metric_inf_load_ratio_.end()) {
-    return *(itr->second);
-  }
-
-  std::map<std::string, std::string> labels;
-  GetMetricLabels(&labels, gpu_device);
-
-  prometheus::Histogram& hist = Metrics::FamilyInferenceLoadRatio().Add(
-    labels, std::vector<double>{1.05, 1.10, 1.25, 1.5, 2.0, 10.0, 50.0});
-  metric_inf_load_ratio_.insert(
-    std::map<int, prometheus::Histogram*>::value_type(gpu_device, &hist));
-  return hist;
-}
-
-tensorflow::Status
-InferenceServable::SetModelConfig(
-  const tensorflow::StringPiece& path, const ModelConfig& config)
-{
-  config_ = config;
-  TF_RETURN_IF_ERROR(GetModelVersionFromPath(path, &version_));
-  for (const auto& tag : config_.tags()) {
-    tags_.insert(
-      std::map<std::string, std::string>::value_type(tag.first, tag.second));
-  }
-
-  max_preferred_batch_size_ = 0;
-  preferred_batch_sizes_.clear();
-  for (const auto size : config.dynamic_batching().preferred_batch_size()) {
-    max_preferred_batch_size_ =
-      std::max(max_preferred_batch_size_, (size_t)size);
-    preferred_batch_sizes_.insert(size);
-  }
-
-  pending_batch_delay_ns_ =
-    (uint64_t)config.dynamic_batching().max_queue_delay_microseconds() * 1000;
-
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-InferenceServable::SetRunnerCount(uint32_t cnt)
-{
-  if (runner_cnt_ != 0) {
-    return tensorflow::errors::Internal(
-      "Attempt to change runner count from ", runner_cnt_, " to ", cnt,
-      " not allowed");
-  }
-
-  runner_cnt_ = cnt;
-
-  // Set default nice level unless overridden by model priority
-  int nice = SCHEDULER_DEFAULT_NICE;
-  if (config_.has_optimization()) {
-    switch (config_.optimization().priority()) {
-      case ModelOptimizationPolicy::PRIORITY_MAX:
-        nice = 0;
-        break;
-      case ModelOptimizationPolicy::PRIORITY_MIN:
-        nice = 19;
-        break;
-      default:
-        nice = SCHEDULER_DEFAULT_NICE;
-        break;
-    }
-  }
-
-  // Create the runner threads for this servable.
-  for (uint32_t c = 0; c < runner_cnt_; ++c) {
-    runner_threads_.emplace_back(
-      new std::thread([this, c, nice]() { RunnerThread(c, nice); }));
-  }
-
-  return tensorflow::Status::OK();
-}
-
-void
-InferenceServable::AsyncRun(
-  std::shared_ptr<ModelInferStats> stats,
-  std::shared_ptr<InferRequestProvider> request_provider,
-  std::shared_ptr<InferResponseProvider> response_provider,
-  std::function<void(tensorflow::Status)> OnCompleteHandleInfer)
-{
-  auto run_timer = std::make_shared<ModelInferStats::ScopedTimer>();
-  struct timespec queued_timestamp = stats->StartRunTimer(run_timer.get());
-  bool wake_runner = false;
-  {
-    std::lock_guard<std::mutex> lock(mu_);
-    queue_.emplace_back(
-      queued_timestamp, stats, request_provider, response_provider,
-      [OnCompleteHandleInfer, run_timer](tensorflow::Status status) mutable {
-        run_timer.reset();
-        OnCompleteHandleInfer(status);
-      });
-    wake_runner = (idle_runner_cnt_ > 0);
-  }
-  if (wake_runner) {
-    cv_.notify_one();
-  }
-}
-
-// Since callers are expecting synchronous behavior, this function
-// must wait until the request is processed and the response is
-// returned. This function can be simplified significantly once we
-// have [DLIS-124].
-void
-InferenceServable::Run(
-  std::shared_ptr<ModelInferStats> stats,
-  std::shared_ptr<InferRequestProvider> request_provider,
-  std::shared_ptr<InferResponseProvider> response_provider,
-  std::function<void(tensorflow::Status)> OnCompleteHandleInfer)
-{
-  // Since this call is synchronous right now we can just use a scoped
-  // timer to measure the entire run time.
-  ModelInferStats::ScopedTimer run_timer;
-  struct timespec queued_timestamp = stats->StartRunTimer(&run_timer);
-
-
-  std::mutex lmu;
-  std::condition_variable lcv;
-  tensorflow::Status run_status;
-  bool run_completed = false;
-  bool wake_runner = false;
-
-  // Add request to queue...
-  {
-    std::lock_guard<std::mutex> lock(mu_);
-    queue_.emplace_back(
-      queued_timestamp, stats, request_provider, response_provider,
-      [&lmu, &lcv, &run_status, &run_completed](tensorflow::Status status) {
-        // signal complete and propagate status
-        {
-          std::lock_guard<std::mutex> lk(lmu);
-          run_status = status;
-          run_completed = true;
-        }
-        lcv.notify_one();
-      });
-
-    // If there are any idle runners then wake one up to service this
-    // request. We do the actual wake outside of the lock to avoid
-    // having the woken thread immediately block on the lock
-    wake_runner = (idle_runner_cnt_ > 0);
-  }
-
-  if (wake_runner) {
-    cv_.notify_one();
-  }
-
-  // [DLIS-124] must wait for request to indicate complete...
-  {
-    std::chrono::seconds wait_timeout(1);
-    std::unique_lock<std::mutex> lk(lmu);
-    while (!run_completed) {
-      lcv.wait_for(lk, wait_timeout);
-    }
-  }
-
-  OnCompleteHandleInfer(run_status);
-}
-
-void
-InferenceServable::RunnerThread(const uint32_t runner_id, const int nice)
-{
-  if (setpriority(PRIO_PROCESS, syscall(SYS_gettid), nice) == 0) {
-    LOG_INFO << "Starting runner thread " << runner_id << " at nice " << nice
-             << "...";
-  } else {
-    LOG_ERROR << "Starting runner thread " << runner_id
-              << " at default nice (requested nice " << nice << " failed)...";
-  }
-
-  // For debugging delay start of runner threads until the queue
-  // contains the specified number of entries.
-  const char* dstr = getenv("TRTSERVER_DELAY_SCHEDULER");
-  size_t delay_cnt = 0;
-  if (dstr != nullptr) {
-    delay_cnt = atoi(dstr);
-    LOG_INFO << "Delaying runner thread " << runner_id << " until " << delay_cnt
-             << " queued payloads...";
-  }
-
-  const uint64_t default_wait_microseconds = 500 * 1000;
-  const bool dynamic_batching_enabled = config_.has_dynamic_batching();
-
-  while (!runner_threads_exit_.load()) {
-    auto state = std::make_shared<RunnerThreadState>();
-    bool wake_runner = false;
-    uint64_t wait_microseconds = 0;
-
-    // Hold the lock for as short a time as possible.
-    {
-      std::unique_lock<std::mutex> lock(mu_);
-      if (delay_cnt > 0) {
-        // Debugging... wait until queue contains 'delay_cnt' items...
-        wait_microseconds = 10 * 1000;
-        if (queue_.size() >= delay_cnt) {
-          delay_cnt = 0;
-        }
-      } else if (queue_.empty()) {
-        wait_microseconds = default_wait_microseconds;
-      } else if (dynamic_batching_enabled) {
-        // Use dynamic batching to get request payload(s) to execute.
-        wait_microseconds = GetDynamicBatch(config_.dynamic_batching());
-        if (wait_microseconds == 0) {
-          for (size_t idx = 0; idx < pending_batch_queue_cnt_; ++idx) {
-            state->payloads.emplace_back(queue_.front());
-            queue_.pop_front();
-          }
-
-          pending_batch_size_ = 0;
-          pending_batch_queue_cnt_ = 0;
-
-          // If there are still requests in the queue after removing
-          // the pending batch and if there are any idle runners
-          // then wake one up to service the requests remaining in
-          // the queue. We need this special wake logic for the
-          // dynamic batching case because we may delay handling
-          // requests in the queue and so idle the runners that
-          // would normally be handling those requests. We do the
-          // actual wake outside of the lock to avoid having the
-          // woken thread immediately block on the lock.
-          wake_runner = !queue_.empty() && (idle_runner_cnt_ > 0);
-        }
-      } else {
-        // No batching... execute next request payload
-        state->payloads.emplace_back(queue_.front());
-        queue_.pop_front();
-      }
-
-      // If no requests are to be handled, wait for notification or
-      // for the specified timeout before checking the queue again.
-      if (wait_microseconds > 0) {
-        idle_runner_cnt_++;
-        std::chrono::microseconds wait_timeout(wait_microseconds);
-        cv_.wait_for(lock, wait_timeout);
-        idle_runner_cnt_--;
-      }
-    }
-
-    if (wake_runner) {
-      cv_.notify_one();
-    }
-
-    if (!state->payloads.empty()) {
-      auto OnCompleteQueuedPayloads = [state](tensorflow::Status status) {
-        bool found_success = false;
-        for (auto& payload : state->payloads) {
-          tensorflow::Status final_status =
-            status.ok() ? (payload.status_.ok() ? payload.compute_status_
-                                                : payload.status_)
-                        : status;
-
-          // All the payloads executed together, so count 1 execution in
-          // the first successful payload. Other payloads stay at 0
-          // executions.
-          if (!found_success && final_status.ok()) {
-            payload.stats_->SetModelExecutionCount(1);
-            found_success = true;
-          }
-          payload.complete_function_(final_status);
-        }
-      };
-      Run(runner_id, &(state->payloads), OnCompleteQueuedPayloads);
-    }
-
-  }  // end runner loop
-
-  LOG_INFO << "Stopping runner thread " << runner_id << "...";
-}
-
-uint64_t
-InferenceServable::GetDynamicBatch(const ModelDynamicBatching& batching_config)
-{
-  // 'mu_' mutex must be held when this function is called. queue_
-  // must not be empty.
-
-  // Handle the cases where the pending batch or request must be
-  // executed immediately.
-  //
-  //   1) if next request would make pending batch larger than the max
-  //   preferred batch size then must execute the pending patch
-  //   immediately
-  //
-  //   2) if no pending batch and next request on its own has batch
-  //   size larger than the max preferred batch size then must execute
-  //   immediately
-  {
-    const auto batch_size =
-      queue_.front().request_provider_->RequestHeader().batch_size();
-    if ((pending_batch_size_ + batch_size) >= max_preferred_batch_size_) {
-      if (pending_batch_queue_cnt_ == 0) {
-        pending_batch_size_ = batch_size;
-        pending_batch_queue_cnt_ = 1;
-      }
-      return 0;
-    }
-  }
-
-  // Examine the new requests. If adding these new requests to the
-  // pending batch allows a preferred batch size then execute it
-  // immediately. Stop examining requests if the maximum preferred
-  // batch size would be exceeded.
-  size_t best_preferred_batch_size = 0;
-  size_t best_preferred_batch_cnt = 0;
-  size_t search_batch_size = pending_batch_size_;
-  size_t search_batch_cnt = pending_batch_queue_cnt_;
-  for (auto idx = pending_batch_queue_cnt_; idx < queue_.size(); ++idx) {
-    const auto batch_size =
-      queue_[idx].request_provider_->RequestHeader().batch_size();
-
-    if ((search_batch_size + batch_size) > max_preferred_batch_size_) {
-      break;
-    }
-
-    search_batch_size += batch_size;
-    search_batch_cnt++;
-
-    if (
-      preferred_batch_sizes_.find(search_batch_size) !=
-      preferred_batch_sizes_.end()) {
-      best_preferred_batch_size = search_batch_size;
-      best_preferred_batch_cnt = search_batch_cnt;
-    }
-  }
-
-  // If we found a preferred batch size then execute that.
-  if (best_preferred_batch_size != 0) {
-    pending_batch_size_ = best_preferred_batch_size;
-    pending_batch_queue_cnt_ = best_preferred_batch_cnt;
-    return 0;
-  }
-
-  pending_batch_size_ = search_batch_size;
-  pending_batch_queue_cnt_ = search_batch_cnt;
-
-  // Should always have at least one request in the pending batch at
-  // this point.
-  if (pending_batch_queue_cnt_ == 0) {
-    LOG_ERROR << "unexpected pending batch size 0";
-    return 0;
-  }
-
-  // If there is no batch queuing delay then just immediately
-  // execute whatever is pending.
-  if (pending_batch_delay_ns_ == 0) {
-    return 0;
-  }
-
-  // Compare the age of the oldest pending request to the maximum
-  // batch queuing delay and execute now if queuing delay is
-  // exceeded. If queuing delay not exceeded create a timer to wakeup
-  // a thread to check again at the maximum allowed delay.
-  struct timespec now;
-  clock_gettime(CLOCK_MONOTONIC, &now);
-  struct timespec& queued = queue_.front().queued_timestamp_;
-  uint64_t delay_ns = (now.tv_sec * NANOS_PER_SECOND + now.tv_nsec) -
-                      (queued.tv_sec * NANOS_PER_SECOND + queued.tv_nsec);
-
-  if (delay_ns >= pending_batch_delay_ns_) {
-    return 0;
-  }
-
-  // Return non-zero wait microseconds to cause this runner to wait
-  // until the queue delay has expired. Another thread may be awaken
-  // due to incoming request to handle the pending batch before this
-  // thread wakes and that is ok. But if no other request comes in
-  // then this thread will wake and revist the pending batch (and at
-  // that time will then see the delay has been exceeded and will send
-  // the batch).
-  return (pending_batch_delay_ns_ - delay_ns) / 1000;
-}
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/core/infer.h b/src/core/infer.h
deleted file mode 100644
index 976a8dd4d8..0000000000
--- a/src/core/infer.h
+++ /dev/null
@@ -1,406 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <condition_variable>
-#include <mutex>
-#include "libevent/include/event2/buffer.h"
-#include "src/core/api.pb.h"
-#include "src/core/grpc_service.pb.h"
-#include "src/core/label_provider.h"
-#include "src/core/metrics.h"
-#include "src/core/model_config.pb.h"
-#include "src/core/server_status.h"
-#include "tensorflow/core/lib/core/errors.h"
-
-struct evbuffer;
-
-namespace nvidia { namespace inferenceserver {
-
-class InferenceServable;
-
-
-// Provide inference request inputs and meta-data
-class InferRequestProvider {
- public:
-  explicit InferRequestProvider(
-    const std::string& model_name, const int version)
-      : model_name_(model_name), version_(version)
-  {
-  }
-
-  // Return the requested model name.
-  const std::string& ModelName() const { return model_name_; }
-
-  // Return the requested model version, or -1 if no specific version
-  // was requested.
-  int ModelVersion() const { return version_; }
-
-  // Get the request header for this inference request.
-  virtual const InferRequestHeader& RequestHeader() const = 0;
-
-  // Get the next contiguous chunk of bytes for the 'idx'
-  // input. Return a pointer to the chunk in 'content' and the length
-  // of the chunk in 'content_byte_size'. If there are no more bytes
-  // for the input return 'content' = nullptr. If 'force_contiguous'
-  // is true then the entire (remaining) input will be returned as a
-  // single chunk. In some cases this will require copying the data.
-  virtual tensorflow::Status GetNextInputContent(
-    int idx, const void** content, size_t* content_byte_size,
-    bool force_contiguous) = 0;
-
- private:
-  const std::string& model_name_;
-  const int version_;
-};
-
-// Inference input provider for a gRPC inference request
-class GRPCInferRequestProvider : public InferRequestProvider {
- public:
-  // Initialize based on gRPC request
-  static tensorflow::Status Create(
-    const InferRequest& request,
-    std::shared_ptr<GRPCInferRequestProvider>* infer_provider);
-
-  const InferRequestHeader& RequestHeader() const override
-  {
-    return request_.meta_data();
-  }
-
-  tensorflow::Status GetNextInputContent(
-    int idx, const void** content, size_t* content_byte_size,
-    bool force_contiguous) override;
-
- private:
-  GRPCInferRequestProvider(const InferRequest& request, const int version);
-
-  const InferRequest& request_;
-  std::vector<bool> content_delivered_;
-};
-
-// Inference input provider for an HTTP inference request
-class HTTPInferRequestProvider : public InferRequestProvider {
- public:
-  // Initialize based on HTTP request
-  static tensorflow::Status Create(
-    evbuffer* input_buffer, const std::string& model_name,
-    const std::string& model_version_str, const std::string& request_header_str,
-    std::shared_ptr<HTTPInferRequestProvider>* infer_provider);
-
-  const InferRequestHeader& RequestHeader() const override
-  {
-    return request_header_;
-  }
-
-  tensorflow::Status GetNextInputContent(
-    int idx, const void** content, size_t* content_byte_size,
-    bool force_contiguous) override;
-
- private:
-  HTTPInferRequestProvider(const std::string& model_name, const int version)
-      : InferRequestProvider(model_name, version)
-  {
-  }
-
-  InferRequestHeader request_header_;
-  using Block = std::pair<const char*, size_t>;
-  std::vector<std::vector<Block>> contents_;
-  std::vector<size_t> contents_idx_;
-  std::vector<std::vector<char>> contiguous_buffers_;
-};
-
-// Provide inference request outputs
-class InferResponseProvider {
- public:
-  explicit InferResponseProvider(const InferRequestHeader& request_header)
-      : request_header_(request_header)
-  {
-  }
-
-  // Get the response header for this inference request.
-  virtual const InferResponseHeader& ResponseHeader() const = 0;
-
-  // Get the response header for this inference request.
-  virtual InferResponseHeader* MutableResponseHeader() = 0;
-
-  // Finialize response based on a servable.
-  virtual tensorflow::Status FinalizeResponse(const InferenceServable& is)
-  {
-    return FinalizeResponseHeader(is);
-  }
-
-  // Create a buffer for the next output of the specified
-  // 'byte_size'.
-  void CreateOutputBuffer(size_t byte_size);
-
-  // Set the buffer for the next output to by 'buffer' which is size
-  // of 'byte_size'.
-  void AddOutputBuffer(void* buffer, size_t byte_size);
-
-  // Get a pointer to the buffer into which output 'idx' should be
-  // written. The size of the buffer must be exactly
-  // 'buffer_byte_size'.
-  tensorflow::Status GetOutputBuffer(
-    int idx, void** buffer, size_t buffer_byte_size);
-
-  // Finialize response header values based on a servable.
-  tensorflow::Status FinalizeResponseHeader(const InferenceServable& is);
-
- private:
-  const InferRequestHeader& request_header_;
-
-  using Buffer = std::pair<void*, size_t>;
-  std::vector<Buffer> buffers_;
-
-  std::vector<std::unique_ptr<char[]>> created_buffers_;
-};
-
-// Inference response provider for a gRPC inference request
-class GRPCInferResponseProvider : public InferResponseProvider {
- public:
-  // Initialize based on gRPC request
-  static tensorflow::Status Create(
-    const InferRequestHeader& request_header, InferResponse* response,
-    std::shared_ptr<GRPCInferResponseProvider>* infer_provider);
-
-  const InferResponseHeader& ResponseHeader() const override
-  {
-    return response_->meta_data();
-  }
-
-  InferResponseHeader* MutableResponseHeader() override
-  {
-    return response_->mutable_meta_data();
-  }
-
- private:
-  GRPCInferResponseProvider(
-    const InferRequestHeader& request_header, InferResponse* response)
-      : InferResponseProvider(request_header), response_(response)
-  {
-  }
-
-  InferResponse* response_;
-};
-
-// Inference response provider for an HTTP inference request
-class HTTPInferResponseProvider : public InferResponseProvider {
- public:
-  static tensorflow::Status Create(
-    evbuffer* output_buffer, const InferRequestHeader& request_header,
-    std::shared_ptr<HTTPInferResponseProvider>* infer_provider);
-
-  const InferResponseHeader& ResponseHeader() const override
-  {
-    return response_header_;
-  }
-
-  InferResponseHeader* MutableResponseHeader() override
-  {
-    return &response_header_;
-  }
-
-  tensorflow::Status FinalizeResponse(const InferenceServable& is);
-
- private:
-  HTTPInferResponseProvider(
-    evbuffer* output_buffer, const InferRequestHeader& request_header);
-
-  InferResponseHeader response_header_;
-  evbuffer* output_buffer_;
-  struct evbuffer_iovec output_iovec_;
-  size_t total_raw_byte_size_;
-};
-
-// Interface for servables that handle generic inference requests.
-class InferenceServable {
- public:
-  InferenceServable();
-  virtual ~InferenceServable();
-
-  // Get the name of model being served.
-  const std::string& Name() const { return config_.name(); }
-
-  // Get the version of model being served.
-  uint32_t Version() const { return version_; }
-
-  // Get the tags of model being served.
-  const std::map<std::string, std::string>& Tags() const { return tags_; }
-
-  // Get the configuration of model being served.
-  const ModelConfig& Config() const { return config_; }
-
-  // Get the datatype for a named output.
-  virtual tensorflow::Status GetOutputDataType(
-    const std::string& name, DataType* dtype) const = 0;
-
-  // Get a label provider for the servable.
-  virtual const LabelProvider& GetLabelProvider() const = 0;
-
-  // Run inference using the provided request to produce outputs in
-  // the provide response.
-  // This method should be called by synchronous frontends
-  void Run(
-    std::shared_ptr<ModelInferStats> stats,
-    std::shared_ptr<InferRequestProvider> request_provider,
-    std::shared_ptr<InferResponseProvider> response_provider,
-    std::function<void(tensorflow::Status)> OnCompleteHandleInfer);
-
-  // Run inference using the provided request to produce outputs in
-  // the provide response.
-  // This method should be called by asynchronous frontends
-  void AsyncRun(
-    std::shared_ptr<ModelInferStats> stats,
-    std::shared_ptr<InferRequestProvider> request_provider,
-    std::shared_ptr<InferResponseProvider> response_provider,
-    std::function<void(tensorflow::Status)> OnCompleteHandleInfer);
-
-  // Get a metric for the servable specialized for the given GPU index
-  // (if -1 then return non-specialized version of the metric).
-  prometheus::Counter& MetricInferenceSuccess(int gpu_device) const;
-  prometheus::Counter& MetricInferenceFailure(int gpu_device) const;
-  prometheus::Counter& MetricInferenceCount(int gpu_device) const;
-  prometheus::Counter& MetricInferenceExecutionCount(int gpu_device) const;
-  prometheus::Counter& MetricInferenceRequestDuration(int gpu_device) const;
-  prometheus::Counter& MetricInferenceComputeDuration(int gpu_device) const;
-  prometheus::Counter& MetricInferenceQueueDuration(int gpu_device) const;
-  prometheus::Histogram& MetricInferenceLoadRatio(int gpu_device) const;
-
- protected:
-  // Set the configuration of the model being served.
-  tensorflow::Status SetModelConfig(
-    const tensorflow::StringPiece& path, const ModelConfig& config);
-
-  // Set the number of runners to use for executing requests to this
-  // servable. Currently this method may be called only once but in
-  // the future could allow it to be called multiple times to
-  // dynamically adjust the number of runners.
-  tensorflow::Status SetRunnerCount(uint32_t cnt);
-
-  // Called by runner thread when a request has been completed with
-  // the result status for the request. If successful the
-  // ResponseProvider will have been updated with the response.
-  using CompleteFunc = std::function<void(tensorflow::Status)>;
-
-  struct RunnerPayload {
-    RunnerPayload() = default;
-    RunnerPayload(const RunnerPayload& payload) = default;
-    RunnerPayload(
-      struct timespec queued_timestamp, std::shared_ptr<ModelInferStats> stats,
-      std::shared_ptr<InferRequestProvider> request_provider,
-      std::shared_ptr<InferResponseProvider> response_provider,
-      CompleteFunc complete_function)
-        : queued_timestamp_(queued_timestamp), stats_(stats),
-          request_provider_(request_provider),
-          response_provider_(response_provider),
-          complete_function_(complete_function),
-          status_(tensorflow::Status::OK()),
-          compute_status_(tensorflow::Status::OK())
-    {
-    }
-
-    struct timespec queued_timestamp_;
-    std::shared_ptr<ModelInferStats> stats_;
-    std::shared_ptr<InferRequestProvider> request_provider_;
-    std::shared_ptr<InferResponseProvider> response_provider_;
-    CompleteFunc complete_function_;
-    tensorflow::Status status_;
-    tensorflow::Status compute_status_;
-  };
-
-  struct RunnerThreadState {
-    std::vector<RunnerPayload> payloads;
-  };
-
-  // Run inference as the runner specified by 'runner_idx' using the
-  // provided request payloads to produce outputs in the provided
-  // response. A non-OK return status indicates an internal error that
-  // prevents any of the of requests from completing. If an error is
-  // isolate to a single request payload it will be reported in that
-  // payload.
-  // This methods should be called if the backend executes synchronously
-  virtual void Run(
-    uint32_t runner_idx, std::vector<RunnerPayload>* payloads,
-    std::function<void(tensorflow::Status)> OnCompleteQueuedPayloads)
-  {
-    OnCompleteQueuedPayloads(
-      tensorflow::errors::Unavailable("unable to serve model"));
-  }
-
- private:
-  // Configuration of the model that this servable represents.
-  ModelConfig config_;
-
-  // Version of the model that this servable represents.
-  uint32_t version_;
-
-  // Tags of the model that this servable represents.
-  std::map<std::string, std::string> tags_;
-
-  // The number of runner threads for this servable.
-  uint32_t runner_cnt_;
-
-  // The number of runner threads currently idle.
-  uint32_t idle_runner_cnt_;
-
-  // Mutex and condvar protecting the scheduling queue.
-  std::mutex mu_;
-  std::condition_variable cv_;
-
-  // Queue holding inference requests for the model represented by
-  // this servable.
-  std::deque<RunnerPayload> queue_;
-
-  std::vector<std::unique_ptr<std::thread>> runner_threads_;
-  std::atomic<bool> runner_threads_exit_;
-
-  void RunnerThread(const uint32_t runner_id, const int nice);
-  uint64_t GetDynamicBatch(const ModelDynamicBatching& batching_config);
-
-  size_t max_preferred_batch_size_;
-  std::set<int32_t> preferred_batch_sizes_;
-  uint64_t pending_batch_delay_ns_;
-  size_t pending_batch_size_;
-  size_t pending_batch_queue_cnt_;
-
-  void GetMetricLabels(
-    std::map<std::string, std::string>* labels, const int gpu_device) const;
-  prometheus::Counter& GetCounterMetric(
-    std::map<int, prometheus::Counter*>& metrics,
-    prometheus::Family<prometheus::Counter>& family,
-    const int gpu_device) const;
-
-  mutable std::map<int, prometheus::Counter*> metric_inf_success_;
-  mutable std::map<int, prometheus::Counter*> metric_inf_failure_;
-  mutable std::map<int, prometheus::Counter*> metric_inf_count_;
-  mutable std::map<int, prometheus::Counter*> metric_inf_exec_count_;
-  mutable std::map<int, prometheus::Counter*> metric_inf_request_duration_us_;
-  mutable std::map<int, prometheus::Counter*> metric_inf_compute_duration_us_;
-  mutable std::map<int, prometheus::Counter*> metric_inf_queue_duration_us_;
-  mutable std::map<int, prometheus::Histogram*> metric_inf_load_ratio_;
-};
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/core/label_provider.cc b/src/core/label_provider.cc
deleted file mode 100644
index 1b463c1318..0000000000
--- a/src/core/label_provider.cc
+++ /dev/null
@@ -1,85 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "src/core/label_provider.h"
-
-#include <iostream>
-#include <iterator>
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/io/inputbuffer.h"
-#include "tensorflow/core/platform/env.h"
-
-namespace nvidia { namespace inferenceserver {
-
-const std::string&
-LabelProvider::GetLabel(const std::string& name, size_t index) const
-{
-  static const std::string not_found;
-
-  auto itr = label_map_.find(name);
-  if (itr == label_map_.end()) {
-    return not_found;
-  }
-
-  if (itr->second.size() <= index) {
-    return not_found;
-  }
-
-  return itr->second[index];
-}
-
-tensorflow::Status
-LabelProvider::AddLabels(const std::string& name, const std::string& filepath)
-{
-  std::unique_ptr<tensorflow::RandomAccessFile> label_file;
-  TF_RETURN_IF_ERROR(
-    tensorflow::Env::Default()->NewRandomAccessFile(filepath, &label_file));
-
-  auto p = label_map_.insert(std::make_pair(name, std::vector<std::string>()));
-  if (!p.second) {
-    return tensorflow::errors::Internal(
-      "multiple label files for '", name, "'");
-  }
-
-  auto itr = p.first;
-
-  constexpr int kInputBufferSize = 1 * 1024 * 1024;
-  tensorflow::io::InputBuffer input_buffer(label_file.get(), kInputBufferSize);
-  tensorflow::string line;
-  tensorflow::Status status = input_buffer.ReadLine(&line);
-  while (status.ok()) {
-    itr->second.push_back(line);
-    status = input_buffer.ReadLine(&line);
-  }
-
-  if (!tensorflow::errors::IsOutOfRange(status)) {
-    return status;
-  }
-
-  return tensorflow::Status::OK();
-}
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/core/label_provider.h b/src/core/label_provider.h
deleted file mode 100644
index b7fb2717e9..0000000000
--- a/src/core/label_provider.h
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <string>
-#include <unordered_map>
-#include <vector>
-#include "tensorflow/core/lib/core/errors.h"
-
-namespace nvidia { namespace inferenceserver {
-
-// A single inference input
-class LabelProvider {
- public:
-  LabelProvider() = default;
-
-  // Return the label for a given 'index' of a 'name'. Return empty
-  // string if no label is available.
-  const std::string& GetLabel(const std::string& name, size_t index) const;
-
-  // Add a set of named labels initialized from a given 'filepath'.
-  tensorflow::Status AddLabels(
-    const std::string& name, const std::string& filepath);
-
- private:
-  TF_DISALLOW_COPY_AND_ASSIGN(LabelProvider);
-
-  std::unordered_map<std::string, std::vector<std::string>> label_map_;
-};
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/core/logging.cc b/src/core/logging.cc
deleted file mode 100644
index fce40a9f73..0000000000
--- a/src/core/logging.cc
+++ /dev/null
@@ -1,104 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "src/core/logging.h"
-#include <sys/time.h>
-#include <sys/types.h>
-#include <time.h>
-#include <unistd.h>
-#include <iomanip>
-#include <iostream>
-
-namespace nvidia { namespace inferenceserver {
-
-Logger gLogger_;
-
-Logger::Logger() : enables_{true, true, true}, vlevel_(0) {}
-
-void
-Logger::Log(const std::string& msg)
-{
-  std::cerr << msg << std::endl;
-}
-
-void
-Logger::Flush()
-{
-  std::cerr << std::flush;
-}
-
-
-const std::vector<char> LogMessage::level_name_{'E', 'W', 'I'};
-
-LogMessage::LogMessage(const char* file, int line, uint32_t level)
-{
-  struct timeval tv;
-  gettimeofday(&tv, NULL);
-  struct tm tm_time;
-  gmtime_r(((time_t*)&(tv.tv_sec)), &tm_time);
-
-  std::string path(file);
-  size_t pos = path.rfind('/');
-  if (pos != std::string::npos) {
-    path = path.substr(pos + 1, std::string::npos);
-  }
-
-  stream_ << level_name_[std::min(level, (uint32_t)Level::kINFO)]
-          << std::setfill('0') << std::setw(2) << (tm_time.tm_mon + 1)
-          << std::setw(2) << tm_time.tm_mday << " " << std::setw(2)
-          << tm_time.tm_hour << ':' << std::setw(2) << tm_time.tm_min << ':'
-          << std::setw(2) << tm_time.tm_sec << "." << std::setw(6) << tv.tv_usec
-          << ' ' << static_cast<uint32_t>(getpid()) << ' ' << path << ':'
-          << line << "] ";
-}
-
-LogMessage::~LogMessage()
-{
-  gLogger_.Log(stream_.str());
-}
-
-uint32_t
-DelegatedVerboseLogLevel()
-{
-  return gLogger_.VerboseLevel();
-}
-
-void
-DelegatedLogMessage(
-  int dlevel, const char* file, int line, const std::string& msg)
-{
-  if ((dlevel < 0) || (dlevel > LOG_DELEGATED_INFO_LEVEL)) {
-    dlevel = LOG_DELEGATED_ERROR_LEVEL;
-  }
-
-  auto level = static_cast<nvidia::inferenceserver::LogMessage::Level>(dlevel);
-  if (nvidia::inferenceserver::gLogger_.IsEnabled(level)) {
-    nvidia::inferenceserver::LogMessage lmsg(file, line, level);
-    lmsg.stream() << msg;
-  }
-}
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/core/logging.h b/src/core/logging.h
deleted file mode 100644
index ce23b9b3e2..0000000000
--- a/src/core/logging.h
+++ /dev/null
@@ -1,139 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <sstream>
-#include <string>
-#include <vector>
-
-namespace nvidia { namespace inferenceserver {
-
-// A log message.
-class LogMessage {
- public:
-  // Log levels.
-  enum Level { kERROR = 0, kWARNING = 1, kINFO = 2 };
-
-  LogMessage(const char* file, int line, uint32_t level);
-  ~LogMessage();
-
-  std::stringstream& stream() { return stream_; }
-
- private:
-  static const std::vector<char> level_name_;
-  std::stringstream stream_;
-};
-
-// Global logger for messages. Controls how log messages are reported.
-class Logger {
- public:
-  Logger();
-
-  // Is a log level enabled.
-  bool IsEnabled(LogMessage::Level level) const { return enables_[level]; }
-
-  // Set enable for a log Level.
-  void SetEnabled(LogMessage::Level level, bool enable)
-  {
-    enables_[level] = enable;
-  }
-
-  // Get the current verbose logging level.
-  uint32_t VerboseLevel() const { return vlevel_; }
-
-  // Set the current verbose logging level.
-  void SetVerboseLevel(uint32_t vlevel) { vlevel_ = vlevel; }
-
-  // Log a message.
-  void Log(const std::string& msg);
-
-  // Flush the log.
-  void Flush();
-
- private:
-  std::vector<bool> enables_;
-  uint32_t vlevel_;
-};
-
-extern Logger gLogger_;
-
-#define LOG_ENABLE_INFO(E)                      \
-  nvidia::inferenceserver::gLogger_.SetEnabled( \
-    nvidia::inferenceserver::LogMessage::Level::kINFO, (E))
-#define LOG_ENABLE_WARNING(E)                   \
-  nvidia::inferenceserver::gLogger_.SetEnabled( \
-    nvidia::inferenceserver::LogMessage::Level::kWARNING, (E))
-#define LOG_ENABLE_ERROR(E)                     \
-  nvidia::inferenceserver::gLogger_.SetEnabled( \
-    nvidia::inferenceserver::LogMessage::Level::kERROR, (E))
-
-#define LOG_INFO                                            \
-  if (nvidia::inferenceserver::gLogger_.IsEnabled(          \
-        nvidia::inferenceserver::LogMessage::Level::kINFO)) \
-  nvidia::inferenceserver::LogMessage(                      \
-    (char*)__FILE__, __LINE__,                              \
-    nvidia::inferenceserver::LogMessage::Level::kINFO)      \
-    .stream()
-#define LOG_WARNING                                            \
-  if (nvidia::inferenceserver::gLogger_.IsEnabled(             \
-        nvidia::inferenceserver::LogMessage::Level::kWARNING)) \
-  nvidia::inferenceserver::LogMessage(                         \
-    (char*)__FILE__, __LINE__,                                 \
-    nvidia::inferenceserver::LogMessage::Level::kWARNING)      \
-    .stream()
-#define LOG_ERROR                                            \
-  if (nvidia::inferenceserver::gLogger_.IsEnabled(           \
-        nvidia::inferenceserver::LogMessage::Level::kERROR)) \
-  nvidia::inferenceserver::LogMessage(                       \
-    (char*)__FILE__, __LINE__,                               \
-    nvidia::inferenceserver::LogMessage::Level::kERROR)      \
-    .stream()
-
-#define LOG_SET_VERBOSE(L)                           \
-  nvidia::inferenceserver::gLogger_.SetVerboseLevel( \
-    static_cast<uint32_t>(std::max(0, (L))))
-#define LOG_VERBOSE_IS_ON(L) \
-  (nvidia::inferenceserver::gLogger_.VerboseLevel() >= (L))
-#define LOG_VERBOSE(L)                                 \
-  if (LOG_VERBOSE_IS_ON(L))                            \
-  nvidia::inferenceserver::LogMessage(                 \
-    (char*)__FILE__, __LINE__,                         \
-    nvidia::inferenceserver::LogMessage::Level::kINFO) \
-    .stream()
-
-#define LOG_FLUSH nvidia::inferenceserver::gLogger_.Flush()
-
-// Callbacks used for frameworks to log via this logging
-// interface. These declarations must be kept in sync with the usage
-// in tools/patch files.
-#define LOG_DELEGATED_ERROR_LEVEL 0
-#define LOG_DELEGATED_WARNING_LEVEL 1
-#define LOG_DELEGATED_INFO_LEVEL 2
-extern "C" uint32_t DelegatedVerboseLogLevel();
-extern "C" void DelegatedLogMessage(
-  int level, const char* file, int line, const std::string& msg);
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/core/metrics.cc b/src/core/metrics.cc
deleted file mode 100644
index d316dc40de..0000000000
--- a/src/core/metrics.cc
+++ /dev/null
@@ -1,328 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-
-#include "src/core/metrics.h"
-
-#include <cuda_runtime_api.h>
-#include <nvml.h>
-#include <thread>
-#include "src/core/constants.h"
-#include "src/core/logging.h"
-
-namespace nvidia { namespace inferenceserver {
-
-Metrics::Metrics()
-    : registry_(std::make_shared<prometheus::Registry>()),
-      inf_success_family_(
-        prometheus::BuildCounter()
-          .Name("nv_inference_request_success")
-          .Help("Number of successful inference requests, all batch sizes")
-          .Register(*registry_)),
-      inf_failure_family_(
-        prometheus::BuildCounter()
-          .Name("nv_inference_request_failure")
-          .Help("Number of failed inference requests, all batch sizes")
-          .Register(*registry_)),
-      inf_count_family_(prometheus::BuildCounter()
-                          .Name("nv_inference_count")
-                          .Help("Number of inferences performed")
-                          .Register(*registry_)),
-      inf_count_exec_family_(prometheus::BuildCounter()
-                               .Name("nv_inference_exec_count")
-                               .Help("Number of model executions performed")
-                               .Register(*registry_)),
-      inf_request_duration_us_family_(
-        prometheus::BuildCounter()
-          .Name("nv_inference_request_duration_us")
-          .Help("Cummulative inference request duration in microseconds")
-          .Register(*registry_)),
-      inf_compute_duration_us_family_(
-        prometheus::BuildCounter()
-          .Name("nv_inference_compute_duration_us")
-          .Help("Cummulative inference compute duration in microseconds")
-          .Register(*registry_)),
-      inf_queue_duration_us_family_(
-        prometheus::BuildCounter()
-          .Name("nv_inference_queue_duration_us")
-          .Help("Cummulative inference queuing duration in microseconds")
-          .Register(*registry_)),
-      inf_load_ratio_family_(prometheus::BuildHistogram()
-                               .Name("nv_inference_load_ratio")
-                               .Register(*registry_)),
-      gpu_utilization_family_(prometheus::BuildGauge()
-                                .Name("nv_gpu_utilization")
-                                .Help("GPU utilization rate [0.0 - 1.0)")
-                                .Register(*registry_)),
-      gpu_memory_total_family_(prometheus::BuildGauge()
-                                 .Name("nv_gpu_memory_total_bytes")
-                                 .Help("GPU total memory, in bytes")
-                                 .Register(*registry_)),
-      gpu_memory_used_family_(prometheus::BuildGauge()
-                                .Name("nv_gpu_memory_used_bytes")
-                                .Help("GPU used memory, in bytes")
-                                .Register(*registry_)),
-      gpu_power_usage_family_(prometheus::BuildGauge()
-                                .Name("nv_gpu_power_usage")
-                                .Help("GPU power usage in watts")
-                                .Register(*registry_)),
-      gpu_power_limit_family_(prometheus::BuildGauge()
-                                .Name("nv_gpu_power_limit")
-                                .Help("GPU power management limit in watts")
-                                .Register(*registry_)),
-      gpu_energy_consumption_family_(
-        prometheus::BuildCounter()
-          .Name("nv_energy_consumption")
-          .Help("GPU energy consumption in joules since the trtserver started")
-          .Register(*registry_))
-{
-}
-
-Metrics::~Metrics()
-{
-  // Signal the nvml thread to exit and then wait for it...
-  if (nvml_thread_ != nullptr) {
-    nvml_thread_exit_.store(true);
-    nvml_thread_->join();
-  }
-}
-
-void
-Metrics::Initialize(uint32_t port)
-{
-  auto singleton = GetSingleton();
-  if (singleton->exposer_) {
-    LOG_WARNING << "Metrics already initialized.";
-    return;
-  }
-
-  singleton->InitializeNvmlMetrics();
-
-  std::ostringstream stream;
-  stream << "0.0.0.0:" << port;
-  singleton->exposer_.reset(new prometheus::Exposer(stream.str()));
-  singleton->exposer_->RegisterCollectable(singleton->registry_);
-}
-
-bool
-Metrics::InitializeNvmlMetrics()
-{
-  nvmlReturn_t nvmlerr = nvmlInit();
-  if (nvmlerr != NVML_SUCCESS) {
-    LOG_ERROR << "failed to initialize NVML: NVML_ERROR " << nvmlerr;
-    return false;
-  }
-
-  unsigned int dcnt;
-  nvmlerr = nvmlDeviceGetCount(&dcnt);
-  if (nvmlerr != NVML_SUCCESS) {
-    LOG_ERROR << "failed to get device count for nvml metrics: NVML_ERROR "
-              << nvmlerr;
-    return false;
-  }
-
-  // Create NVML metrics for each GPU
-  LOG_INFO << "found " << dcnt << " GPUs supporting NVML metrics";
-  for (unsigned int didx = 0; didx < dcnt; ++didx) {
-    // Get handle for the GPU
-    nvmlDevice_t gpu;
-    nvmlReturn_t nvmlerr = nvmlDeviceGetHandleByIndex(didx, &gpu);
-    if (nvmlerr == NVML_SUCCESS) {
-      char name[NVML_DEVICE_NAME_BUFFER_SIZE + 1];
-      if (
-        nvmlDeviceGetName(gpu, name, NVML_DEVICE_NAME_BUFFER_SIZE) ==
-        NVML_SUCCESS) {
-        LOG_INFO << "  GPU " << didx << ": " << name;
-      }
-    } else {
-      LOG_ERROR << "failed to get device handle for GPU " << didx
-                << ": NVML_ERROR " << nvmlerr;
-      continue;
-    }
-
-    std::string uuid;
-    char uuid_str[NVML_DEVICE_UUID_BUFFER_SIZE + 1];
-    nvmlerr = nvmlDeviceGetUUID(gpu, uuid_str, NVML_DEVICE_UUID_BUFFER_SIZE);
-    if (nvmlerr == NVML_SUCCESS) {
-      uuid = uuid_str;
-    } else {
-      uuid = "unknown";
-    }
-
-    std::map<std::string, std::string> gpu_labels;
-    gpu_labels.insert(std::map<std::string, std::string>::value_type(
-      kMetricsLabelGpuUuid, uuid));
-
-    gpu_utilization_.push_back(&gpu_utilization_family_.Add(gpu_labels));
-    gpu_memory_total_.push_back(&gpu_memory_total_family_.Add(gpu_labels));
-    gpu_memory_used_.push_back(&gpu_memory_used_family_.Add(gpu_labels));
-    gpu_power_usage_.push_back(&gpu_power_usage_family_.Add(gpu_labels));
-    gpu_power_limit_.push_back(&gpu_power_limit_family_.Add(gpu_labels));
-    gpu_energy_consumption_.push_back(
-      &gpu_energy_consumption_family_.Add(gpu_labels));
-  }
-
-  // Periodically send the NVML metrics...
-  if (dcnt > 0) {
-    nvml_thread_exit_.store(false);
-    nvml_thread_.reset(new std::thread([this, dcnt] {
-      unsigned long long last_energy[dcnt];
-      for (unsigned int didx = 0; didx < dcnt; ++didx) {
-        last_energy[didx] = 0;
-      }
-
-      while (!nvml_thread_exit_.load()) {
-        std::this_thread::sleep_for(std::chrono::milliseconds(2000));
-
-        for (unsigned int didx = 0; didx < dcnt; ++didx) {
-          nvmlDevice_t gpu;
-          nvmlReturn_t nvmlerr = nvmlDeviceGetHandleByIndex(didx, &gpu);
-          if (nvmlerr != NVML_SUCCESS) {
-            LOG_ERROR << "failed to get NVML handle for GPU " << didx
-                      << ", NVML_ERROR " << nvmlerr;
-          } else {
-            // Power limit
-            {
-              unsigned int power_limit;
-              nvmlReturn_t nvmlerr =
-                nvmlDeviceGetPowerManagementLimit(gpu, &power_limit);
-              if (nvmlerr != NVML_SUCCESS) {
-                LOG_ERROR << "failed to get power limit for GPU " << didx
-                          << ", NVML_ERROR " << nvmlerr;
-                power_limit = 0;
-              }
-              gpu_power_limit_[didx]->Set((double)power_limit * 0.001);
-            }
-
-            // Power usage
-            {
-              unsigned int power_usage;
-              nvmlReturn_t nvmlerr = nvmlDeviceGetPowerUsage(gpu, &power_usage);
-              if (nvmlerr != NVML_SUCCESS) {
-                LOG_ERROR << "failed to get power usage for GPU " << didx
-                          << ", NVML_ERROR " << nvmlerr;
-                power_usage = 0;
-              }
-              gpu_power_usage_[didx]->Set((double)power_usage * 0.001);
-            }
-
-            // Energy Consumption
-            {
-              unsigned long long energy;
-              nvmlReturn_t nvmlerr =
-                nvmlDeviceGetTotalEnergyConsumption(gpu, &energy);
-              if (nvmlerr != NVML_SUCCESS) {
-                LOG_ERROR << "failed to get energy consumption for GPU " << didx
-                          << ", NVML_ERROR " << nvmlerr;
-              } else {
-                if (last_energy[didx] == 0) {
-                  last_energy[didx] = energy;
-                }
-                gpu_energy_consumption_[didx]->Increment(
-                  (double)(energy - last_energy[didx]) * 0.001);
-                last_energy[didx] = energy;
-              }
-            }
-
-            // Utilization
-            {
-              nvmlUtilization_t util;
-              nvmlReturn_t nvmlerr = nvmlDeviceGetUtilizationRates(gpu, &util);
-              if (nvmlerr != NVML_SUCCESS) {
-                LOG_ERROR << "failed to get utilization for GPU " << didx
-                          << ", NVML_ERROR " << nvmlerr;
-                util.gpu = 0;
-              }
-              gpu_utilization_[didx]->Set((double)util.gpu * 0.01);
-            }
-
-            // Memory
-            {
-              nvmlMemory_t mem;
-              nvmlReturn_t nvmlerr = nvmlDeviceGetMemoryInfo(gpu, &mem);
-              if (nvmlerr != NVML_SUCCESS) {
-                LOG_ERROR << "failed to get memory for GPU " << didx
-                          << ", NVML_ERROR " << nvmlerr;
-                mem.total = 0;
-                mem.used = 0;
-              }
-              gpu_memory_total_[didx]->Set(mem.total);
-              gpu_memory_used_[didx]->Set(mem.used);
-            }
-          }
-        }
-      }
-    }));
-  }
-
-  return true;
-}
-
-bool
-Metrics::UUIDForCudaDevice(int cuda_device, std::string* uuid)
-{
-  char pcibusid_str[64];
-  cudaError_t cuerr =
-    cudaDeviceGetPCIBusId(pcibusid_str, sizeof(pcibusid_str) - 1, cuda_device);
-  if (cuerr != cudaSuccess) {
-    LOG_ERROR << "failed to get PCI Bus ID for CUDA device " << cuda_device
-              << ": " << cudaGetErrorString(cuerr);
-    return false;
-  }
-
-  nvmlDevice_t device;
-  nvmlReturn_t nvmlerr = nvmlDeviceGetHandleByPciBusId(pcibusid_str, &device);
-  if (nvmlerr != NVML_SUCCESS) {
-    LOG_ERROR << "failed to get device from PCI Bus ID: NVML_ERROR " << nvmlerr;
-    return false;
-  }
-
-  char uuid_str[NVML_DEVICE_UUID_BUFFER_SIZE + 1];
-  nvmlerr = nvmlDeviceGetUUID(device, uuid_str, NVML_DEVICE_UUID_BUFFER_SIZE);
-  if (nvmlerr != NVML_SUCCESS) {
-    LOG_ERROR << "failed to get device UUID: NVML_ERROR " << nvmlerr;
-    return false;
-  }
-
-  *uuid = uuid_str;
-  return true;
-}
-
-std::shared_ptr<prometheus::Registry>
-Metrics::GetRegistry()
-{
-  auto singleton = Metrics::GetSingleton();
-  return singleton->registry_;
-}
-
-Metrics*
-Metrics::GetSingleton()
-{
-  static Metrics singleton;
-  return &singleton;
-}
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/core/metrics.h b/src/core/metrics.h
deleted file mode 100644
index 06b88e39d6..0000000000
--- a/src/core/metrics.h
+++ /dev/null
@@ -1,137 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-#pragma once
-
-#include <atomic>
-#include <thread>
-#include "prometheus/exposer.h"
-#include "prometheus/registry.h"
-
-namespace nvidia { namespace inferenceserver {
-
-class Metrics {
- public:
-  // Initialize metrics reporting on 'port'.
-  static void Initialize(uint32_t port);
-
-  // Get the UUID for a CUDA device. Return true and initialize 'uuid'
-  // if a UUID is found, return false if a UUID cannot be returned.
-  static bool UUIDForCudaDevice(int cuda_device, std::string* uuid);
-
-  // Metric family counting successful inference requests
-  static prometheus::Family<prometheus::Counter>& FamilyInferenceSuccess()
-  {
-    return GetSingleton()->inf_success_family_;
-  }
-
-  // Metric family counting failed inference requests
-  static prometheus::Family<prometheus::Counter>& FamilyInferenceFailure()
-  {
-    return GetSingleton()->inf_failure_family_;
-  }
-
-  // Metric family counting inferences performed, where a batch-size
-  // 'n' inference request is counted as 'n' inferences
-  static prometheus::Family<prometheus::Counter>& FamilyInferenceCount()
-  {
-    return GetSingleton()->inf_count_family_;
-  }
-
-  // Metric family counting inferences performed, where a batch-size
-  // 'n' inference request is counted as 'n' inferences
-  static prometheus::Family<prometheus::Counter>&
-  FamilyInferenceExecutionCount()
-  {
-    return GetSingleton()->inf_count_exec_family_;
-  }
-
-  // Metric family of cumulative inference request duration, in
-  // microseconds
-  static prometheus::Family<prometheus::Counter>&
-  FamilyInferenceRequestDuration()
-  {
-    return GetSingleton()->inf_request_duration_us_family_;
-  }
-
-  // Metric family of cumulative inference compute duration, in
-  // microseconds
-  static prometheus::Family<prometheus::Counter>&
-  FamilyInferenceComputeDuration()
-  {
-    return GetSingleton()->inf_compute_duration_us_family_;
-  }
-
-  // Metric family of cumulative inference queuing duration, in
-  // microseconds
-  static prometheus::Family<prometheus::Counter>& FamilyInferenceQueueDuration()
-  {
-    return GetSingleton()->inf_queue_duration_us_family_;
-  }
-
-  // Metric family of load-ratio histogram
-  static prometheus::Family<prometheus::Histogram>& FamilyInferenceLoadRatio()
-  {
-    return GetSingleton()->inf_load_ratio_family_;
-  }
-
- private:
-  Metrics();
-  virtual ~Metrics();
-  static Metrics* GetSingleton();
-  static std::shared_ptr<prometheus::Registry> GetRegistry();
-  bool InitializeNvmlMetrics();
-
-  std::unique_ptr<prometheus::Exposer> exposer_;
-  std::shared_ptr<prometheus::Registry> registry_;
-
-  prometheus::Family<prometheus::Counter>& inf_success_family_;
-  prometheus::Family<prometheus::Counter>& inf_failure_family_;
-  prometheus::Family<prometheus::Counter>& inf_count_family_;
-  prometheus::Family<prometheus::Counter>& inf_count_exec_family_;
-  prometheus::Family<prometheus::Counter>& inf_request_duration_us_family_;
-  prometheus::Family<prometheus::Counter>& inf_compute_duration_us_family_;
-  prometheus::Family<prometheus::Counter>& inf_queue_duration_us_family_;
-  prometheus::Family<prometheus::Histogram>& inf_load_ratio_family_;
-  prometheus::Family<prometheus::Gauge>& gpu_utilization_family_;
-  prometheus::Family<prometheus::Gauge>& gpu_memory_total_family_;
-  prometheus::Family<prometheus::Gauge>& gpu_memory_used_family_;
-  prometheus::Family<prometheus::Gauge>& gpu_power_usage_family_;
-  prometheus::Family<prometheus::Gauge>& gpu_power_limit_family_;
-  prometheus::Family<prometheus::Counter>& gpu_energy_consumption_family_;
-
-  std::vector<prometheus::Gauge*> gpu_utilization_;
-  std::vector<prometheus::Gauge*> gpu_memory_total_;
-  std::vector<prometheus::Gauge*> gpu_memory_used_;
-  std::vector<prometheus::Gauge*> gpu_power_usage_;
-  std::vector<prometheus::Gauge*> gpu_power_limit_;
-  std::vector<prometheus::Counter*> gpu_energy_consumption_;
-
-  std::unique_ptr<std::thread> nvml_thread_;
-  std::atomic<bool> nvml_thread_exit_;
-};
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/core/model_config.cc b/src/core/model_config.cc
deleted file mode 100644
index 9adccfd57c..0000000000
--- a/src/core/model_config.cc
+++ /dev/null
@@ -1,116 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "src/core/model_config.h"
-
-#include "src/core/constants.h"
-
-namespace nvidia { namespace inferenceserver {
-
-size_t
-GetDataTypeByteSize(const DataType dtype)
-{
-  switch (dtype) {
-    case TYPE_BOOL:
-      return 1;
-    case TYPE_UINT8:
-      return 1;
-    case TYPE_UINT16:
-      return 2;
-    case TYPE_UINT32:
-      return 4;
-    case TYPE_UINT64:
-      return 8;
-    case TYPE_INT8:
-      return 1;
-    case TYPE_INT16:
-      return 2;
-    case TYPE_INT32:
-      return 4;
-    case TYPE_INT64:
-      return 8;
-    case TYPE_FP16:
-      return 2;
-    case TYPE_FP32:
-      return 4;
-    case TYPE_FP64:
-      return 8;
-    default:
-      break;
-  }
-
-  return 0;
-}
-
-uint64_t
-GetSize(const DataType& dtype, const DimsList& dims)
-{
-  size_t dt_size = GetDataTypeByteSize(dtype);
-  if (dt_size <= 0) {
-    return 0;
-  }
-
-  int64_t size = 0;
-  for (auto dim : dims) {
-    if (size == 0) {
-      size = dim;
-    } else {
-      size *= dim;
-    }
-  }
-
-  return size * dt_size;
-}
-
-uint64_t
-GetSize(const ModelInput& mio)
-{
-  return GetSize(mio.data_type(), mio.dims());
-}
-
-uint64_t
-GetSize(const ModelOutput& mio)
-{
-  return GetSize(mio.data_type(), mio.dims());
-}
-
-Platform
-GetPlatform(const std::string& platform_str)
-{
-  if (platform_str == kTensorFlowGraphDefPlatform) {
-    return Platform::PLATFORM_TENSORFLOW_GRAPHDEF;
-  } else if (platform_str == kTensorFlowSavedModelPlatform) {
-    return Platform::PLATFORM_TENSORFLOW_SAVEDMODEL;
-  } else if (platform_str == kTensorRTPlanPlatform) {
-    return Platform::PLATFORM_TENSORRT_PLAN;
-  } else if (platform_str == kCaffe2NetDefPlatform) {
-    return Platform::PLATFORM_CAFFE2_NETDEF;
-  }
-
-  return Platform::PLATFORM_UNKNOWN;
-}
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/core/model_config.h b/src/core/model_config.h
deleted file mode 100644
index 859a8f1e13..0000000000
--- a/src/core/model_config.h
+++ /dev/null
@@ -1,63 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "src/core/model_config.pb.h"
-
-namespace nvidia { namespace inferenceserver {
-
-using DimsList = ::google::protobuf::RepeatedField<::google::protobuf::int64>;
-
-// Enumeration for the different platform types
-enum Platform {
-  PLATFORM_UNKNOWN = 0,
-  PLATFORM_TENSORRT_PLAN = 1,
-  PLATFORM_TENSORFLOW_GRAPHDEF = 2,
-  PLATFORM_TENSORFLOW_SAVEDMODEL = 3,
-  PLATFORM_CAFFE2_NETDEF = 4
-};
-
-// Get the size of a datatype in bytes. Return 0 if unable to
-// determine the size of the data type.
-size_t GetDataTypeByteSize(const DataType dtype);
-
-// Get the size of a tensor based on datatype and dimensions. Return 0
-// if unable to determine the size of the data type.
-uint64_t GetSize(const DataType& dtype, const DimsList& dims);
-
-// Get the size of a tensor based on ModelInput. Return 0 if unable to
-// determine the size of the data type.
-uint64_t GetSize(const ModelInput& mio);
-
-// Get the size of a tensor based on ModelOutput. Return 0 if unable
-// to determine the size of the data type.
-uint64_t GetSize(const ModelOutput& mio);
-
-// Get the Platform value for a platform string or Platform::UNKNOWN
-// if the platform string is not recognized.
-Platform GetPlatform(const std::string& platform_str);
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/core/model_config.proto b/src/core/model_config.proto
deleted file mode 100644
index a4a5ff52c1..0000000000
--- a/src/core/model_config.proto
+++ /dev/null
@@ -1,513 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Copyright (c) 2018, TensorFlow Authors. All rights reserved.
-
-syntax = "proto3";
-
-package nvidia.inferenceserver;
-
-//@@.. cpp:namespace:: nvidia::inferenceserver
-
-//@@
-//@@.. cpp:enum:: DataType
-//@@
-//@@   Data types supported for input and output tensors.
-//@@
-enum DataType {
-  //@@  .. cpp:enumerator:: DataType::INVALID = 0
-  TYPE_INVALID = 0;
-
-  //@@  .. cpp:enumerator:: DataType::BOOL = 1
-  TYPE_BOOL = 1;
-
-  //@@  .. cpp:enumerator:: DataType::UINT8 = 2
-  TYPE_UINT8 = 2;
-  //@@  .. cpp:enumerator:: DataType::UINT16 = 3
-  TYPE_UINT16 = 3;
-  //@@  .. cpp:enumerator:: DataType::UINT32 = 4
-  TYPE_UINT32 = 4;
-  //@@  .. cpp:enumerator:: DataType::UINT64 = 5
-  TYPE_UINT64 = 5;
-
-  //@@  .. cpp:enumerator:: DataType::INT8 = 6
-  TYPE_INT8 = 6;
-  //@@  .. cpp:enumerator:: DataType::INT16 = 7
-  TYPE_INT16 = 7;
-  //@@  .. cpp:enumerator:: DataType::INT32 = 8
-  TYPE_INT32 = 8;
-  //@@  .. cpp:enumerator:: DataType::INT64 = 9
-  TYPE_INT64 = 9;
-
-  //@@  .. cpp:enumerator:: DataType::FP16 = 10
-  TYPE_FP16 = 10;
-  //@@  .. cpp:enumerator:: DataType::FP32 = 11
-  TYPE_FP32 = 11;
-  //@@  .. cpp:enumerator:: DataType::FP64 = 12
-  TYPE_FP64 = 12;
-}
-
-//@@
-//@@.. cpp:var:: message ModelInstanceGroup
-//@@
-//@@   A group of one or more instances of a model and resources made
-//@@   available for those instances.
-//@@
-message ModelInstanceGroup
-{
-  //@@
-  //@@  .. cpp:enum:: Kind
-  //@@
-  //@@     Kind of this instance group.
-  //@@
-  enum Kind {
-    //@@    .. cpp:enumerator:: Kind::KIND_AUTO = 0
-    //@@
-    //@@       This instance group represents instances that can run on either
-    //@@       CPU or GPU. If all GPUs listed in 'gpus' are available then
-    //@@       instances will be created on GPU(s), otherwise instances will
-    //@@       be created on CPU.
-    //@@
-    KIND_AUTO = 0;
-
-    //@@    .. cpp:enumerator:: Kind::KIND_GPU = 1
-    //@@
-    //@@       This instance group represents instances that must run on the
-    //@@       GPU.
-    //@@
-    KIND_GPU = 1;
-
-    //@@    .. cpp:enumerator:: Kind::KIND_CPU = 2
-    //@@
-    //@@       This instance group represents instances that must run on the
-    //@@       CPU.
-    //@@
-    KIND_CPU = 2;
-  }
-
-  //@@  .. cpp:var:: string name
-  //@@
-  //@@     Optional name of this group of instances. If not specified the
-  //@@     name will be formed as <model name>_<group number>. The name of
-  //@@     individual instances will be further formed by a unique instance
-  //@@     number and GPU index:
-  //@@
-  string name = 1;
-
-  //@@  .. cpp:var:: Kind kind
-  //@@
-  //@@     The kind of this instance group. Default is KIND_AUTO. If
-  //@@     KIND_AUTO or KIND_GPU then both 'count' and 'gpu' are valid and
-  //@@     may be specified. If KIND_CPU only 'count' is valid and 'gpu'
-  //@@     cannot be specified.
-  //@@
-  Kind kind = 4;
-
-  //@@  .. cpp:var:: int32 count
-  //@@
-  //@@     For a group assigned to GPU, the number of instances created for
-  //@@     each GPU listed in 'gpus'. For a group assigned to CPU the number
-  //@@     of instances created. Default is 1.
-  int32 count = 2;
-
-  //@@  .. cpp:var:: int32 gpus (repeated)
-  //@@
-  //@@     GPU(s) where instances should be available. For each GPU listed,
-  //@@     'count' instances of the model will be available. Setting 'gpus'
-  //@@     to empty (or not specifying at all) is eqivalent to listing all
-  //@@     available GPUs.
-  //@@
-  repeated int32 gpus = 3;
-}
-
-//@@
-//@@.. cpp:var:: message ModelInput
-//@@
-//@@   An input required by the model.
-//@@
-message ModelInput
-{
-  //@@
-  //@@  .. cpp:enum:: Format
-  //@@
-  //@@     The format for the input.
-  //@@
-  enum Format {
-    //@@    .. cpp:enumerator:: Format::FORMAT_NONE = 0
-    //@@
-    //@@       The input has no specific format. This is the default.
-    //@@
-    FORMAT_NONE = 0;
-
-    //@@    .. cpp:enumerator:: Format::FORMAT_NHWC = 1
-    //@@
-    //@@       HWC image format. Tensors with this format require 3 dimensions
-    //@@       if the model does not support batching (max_batch_size = 0) or 4
-    //@@       dimensions if the model does support batching (max_batch_size
-    //@@       >= 1). In either case the 'dims' below should only specify the
-    //@@       3 non-batch dimensions (i.e. HWC or CHW).
-    //@@
-    FORMAT_NHWC = 1;
-
-    //@@    .. cpp:enumerator:: Format::FORMAT_NCHW = 2
-    //@@
-    //@@       CHW image format. Tensors with this format require 3 dimensions
-    //@@       if the model does not support batching (max_batch_size = 0) or 4
-    //@@       dimensions if the model does support batching (max_batch_size
-    //@@       >= 1). In either case the 'dims' below should only specify the
-    //@@       3 non-batch dimensions (i.e. HWC or CHW).
-    //@@
-    FORMAT_NCHW = 2;
-  }
-
-  //@@  .. cpp:var:: string name
-  //@@
-  //@@     The name of the input.
-  //@@
-  string name = 1;
-
-  //@@  .. cpp:var:: DataType data_type
-  //@@
-  //@@     The data-type of the input.
-  //@@
-  DataType data_type = 2;
-
-  //@@  .. cpp:var:: Format format
-  //@@
-  //@@     The format of the input. Optional.
-  //@@
-  Format format = 3;
-
-  //@@  .. cpp:var:: int64 dims (repeated)
-  //@@
-  //@@     The dimensions/shape of the input tensor.
-  //@@
-  repeated int64 dims = 4;
-}
-
-//@@
-//@@.. cpp:var:: message ModelOutput
-//@@
-//@@   An output produced by the model.
-//@@
-message ModelOutput
-{
-  //@@  .. cpp:var:: string name
-  //@@
-  //@@     The name of the output.
-  //@@
-  string name = 1;
-
-  //@@  .. cpp:var:: DataType data_type
-  //@@
-  //@@     The data-type of the output.
-  //@@
-  DataType data_type = 2;
-
-  //@@  .. cpp:var:: int64 dims (repeated)
-  //@@
-  //@@     The dimensions/shape of the output tensor.
-  //@@
-  repeated int64 dims = 3;
-
-  //@@  .. cpp:var:: string label_filename
-  //@@
-  //@@     The label file associated with this output. Should be specified only
-  //@@     for outputs that represent classifications. Optional.
-  //@@
-  string label_filename = 4;
-}
-
-//@@
-//@@.. cpp:var:: message ModelVersionPolicy
-//@@
-//@@   Policy indicating which versions of a model should be made
-//@@   available by the inference server.
-//@@
-message ModelVersionPolicy
-{
-  //@@  .. cpp:var:: message Latest
-  //@@
-  //@@     Serve only the latest version(s) of a model. This is
-  //@@     the default policy.
-  //@@
-  message Latest
-  {
-    //@@    .. cpp:var:: uint32 num_versions
-    //@@
-    //@@       Serve only the 'num_versions' highest-numbered versions. T
-    //@@       The default value of 'num_versions' is 1, indicating that by
-    //@@       default only the single highest-number version of a
-    //@@       model will be served.
-    //@@
-    uint32 num_versions = 1;
-  }
-
-  //@@  .. cpp:var:: message All
-  //@@
-  //@@     Serve all versions of the model.
-  //@@
-  message All {}
-
-  //@@  .. cpp:var:: message Specific
-  //@@
-  //@@     Serve only specific versions of the model.
-  //@@
-  message Specific
-  {
-    //@@    .. cpp:var:: int64 versions (repeated)
-    //@@
-    //@@       The specific versions of the model that will be served.
-    //@@
-    repeated int64 versions = 1;
-  }
-
-  // Each model must implement only a single policy. The default
-  // policy is 'Latest'.
-  oneof policy_choice
-  {
-    //@@    .. cpp:var:: Latest latest
-    //@@
-    //@@       Serve only latest version(s) of the model.
-    //@@
-    Latest latest = 1;
-
-    //@@    .. cpp:var:: All all
-    //@@
-    //@@       Serve all versions of the model.
-    //@@
-    All all = 2;
-
-    //@@    .. cpp:var:: Specific specific
-    //@@
-    //@@       Serve only specific version(s) of the model.
-    //@@
-    Specific specific = 3;
-  }
-}
-
-//@@
-//@@.. cpp:var:: message ModelOptimizationPolicy
-//@@
-//@@   Optimization settings for a model. These settings control if/how a
-//@@   model is optimized and prioritized by the backend framework when
-//@@   it is loaded.
-//@@
-message ModelOptimizationPolicy
-{
-  //@@
-  //@@  .. cpp:var:: message Graph
-  //@@
-  //@@     Enable generic graph optimization of the model. If not specified
-  //@@     the framework's default level of optimization is used. Currently
-  //@@     only supported for TensorFlow graphdef and savedmodel models and
-  //@@     causes XLA to be enabled/disabled for the model.
-  //@@
-  message Graph
-  {
-    //@@    .. cpp:var:: int32 level
-    //@@
-    //@@       The optimization level. Defaults to 0 (zero) if not specified.
-    //@@
-    //@@         - -1: Disabled
-    //@@         -  0: Framework default
-    //@@         -  1+: Enable optimization level (greater values indicate
-    //@@            higher optimization levels)
-    //@@
-    int32 level = 1;
-  }
-
-  //@@
-  //@@  .. cpp:enum:: ModelPriority
-  //@@
-  //@@     Model priorities. A model will be given scheduling and execution
-  //@@     preference over models at lower priorities. Current model
-  //@@     priorities only work for TensorRT models.
-  //@@
-  enum ModelPriority {
-    //@@    .. cpp:enumerator:: ModelPriority::PRIORITY_DEFAULT = 0
-    //@@
-    //@@       The default model priority.
-    //@@
-    PRIORITY_DEFAULT = 0;
-
-    //@@    .. cpp:enumerator:: ModelPriority::PRIORITY_MAX = 1
-    //@@
-    //@@       The maximum model priority.
-    //@@
-    PRIORITY_MAX = 1;
-
-    //@@    .. cpp:enumerator:: ModelPriority::PRIORITY_MIN = 2
-    //@@
-    //@@       The minimum model priority.
-    //@@
-    PRIORITY_MIN = 2;
-  }
-
-  //@@  .. cpp:var:: Graph graph
-  //@@
-  //@@     The graph optimization setting for the model. Optional.
-  //@@
-  Graph graph = 1;
-
-  //@@  .. cpp:var:: ModelPriority priority
-  //@@
-  //@@     The priority setting for the model. Optional.
-  //@@
-  ModelPriority priority = 2;
-}
-
-//@@
-//@@.. cpp:var:: message ModelDynamicBatching
-//@@
-//@@   Dynamic batching configuration. These settings control if/how dynamic
-//@@   batching operates for the model.
-//@@
-message ModelDynamicBatching
-{
-  //@@  .. cpp:var:: int32 preferred_batch_size (repeated)
-  //@@
-  //@@     Preferred batch sizes for dynamic batching. If a batch of one of
-  //@@     these sizes can be formed it will be executed immediately.  If
-  //@@     not specified a preferred batch size will be chosen automatically
-  //@@     based on model and GPU characteristics.
-  //@@
-  repeated int32 preferred_batch_size = 1;
-
-  //@@  .. cpp:var:: int32 max_queue_delay_microseconds
-  //@@
-  //@@     The maximum time, in microseconds, a request will be delayed in
-  //@@     the scheduling queue to wait for additional requests for
-  //@@     batching. Default is 0.
-  //@@
-  int32 max_queue_delay_microseconds = 2;
-}
-
-//@@
-//@@.. cpp:var:: message ModelConfig
-//@@
-//@@   A model configuration.
-//@@
-message ModelConfig
-{
-  //@@  .. cpp:var:: string name
-  //@@
-  //@@     The name of the model.
-  //@@
-  string name = 1;
-
-  //@@  .. cpp:var:: string platform
-  //@@
-  //@@     The framework for the model. Possible values are
-  //@@     "tensorrt_plan", "tensorflow_graphdef",
-  //@@     "tensorflow_savedmodel", and "caffe2_netdef".
-  //@@
-  string platform = 2;
-
-  //@@  .. cpp:var:: ModelVersionPolicy version_policy
-  //@@
-  //@@     Policy indicating which version(s) of the model will be served.
-  //@@
-  ModelVersionPolicy version_policy = 3;
-
-  //@@  .. cpp:var:: int32 max_batch_size
-  //@@
-  //@@     Maximum batch size allowed for inference. This can only decrease
-  //@@     what is allowed by the model itself. A max_batch_size value of 0
-  //@@     indicates that batching is not allowed for the model and the
-  //@@     dimension/shape of the input and output tensors must exactly
-  //@@     match what is specified in the input and output configuration. A
-  //@@     max_batch_size value > 0 indicates that batching is allowed and
-  //@@     so the model expects the input tensors to have an additional
-  //@@     initial dimension for the batching that is not specified in the
-  //@@     input (for example, if the model supports batched inputs of
-  //@@     2-dimensional tensors then the model configuration will specify
-  //@@     the input shape as [ X, Y ] but the model will expect the actual
-  //@@     input tensors to have shape [ N, X, Y ]). For max_batch_size > 0
-  //@@     returned outputs will also have an additional initial dimension
-  //@@     for the batch.
-  //@@
-  int32 max_batch_size = 4;
-
-  //@@  .. cpp:var:: ModelInput input (repeated)
-  //@@
-  //@@     The inputs request by the model.
-  //@@
-  repeated ModelInput input = 5;
-
-  //@@  .. cpp:var:: ModelOutput output (repeated)
-  //@@
-  //@@     The outputs produced by the model.
-  //@@
-  repeated ModelOutput output = 6;
-
-  //@@  .. cpp:var:: ModelOptimizationPolicy optimization
-  //@@
-  //@@     Optimization configuration for the model. If not specified
-  //@@     then default optimization policy is used.
-  //@@
-  ModelOptimizationPolicy optimization = 12;
-
-  //@@  .. cpp:var:: ModelDynamicBatching dynamic_batching
-  //@@
-  //@@     Dynamic batching configuration for the model. If not specified
-  //@@     then dynamic batching is disabled for the model.
-  //@@
-  ModelDynamicBatching dynamic_batching = 11;
-
-  //@@  .. cpp:var:: ModelInstanceGroup instance_group (repeated)
-  //@@
-  //@@     Instances of this model. If not specified, one instance
-  //@@     of the model will be instantiated on each available GPU.
-  //@@
-  repeated ModelInstanceGroup instance_group = 7;
-
-  //@@  .. cpp:var:: string default_model_filename
-  //@@
-  //@@     Optional filename of the model file to use if a
-  //@@     compute-capability specific model is not specified in
-  //@@     :cpp:var:`cc_model_names`. If not specified the default name
-  //@@     is 'model.graphdef', 'model.savedmodel', 'model.plan' or
-  //@@     'model.netdef' depending on the model type.
-  //@@
-  string default_model_filename = 8;
-
-  //@@  .. cpp:var:: map<string,string> cc_model_filenames
-  //@@
-  //@@     Optional map from CUDA compute capability to the filename of
-  //@@     the model that supports that compute capability. The filename
-  //@@     refers to a file within the model version directory.
-  //@@
-  map<string, string> cc_model_filenames = 9;
-
-  //@@  .. cpp:var:: map<string,string> tags
-  //@@
-  //@@     Optional model tags. User-specific key-value pairs for this
-  //@@     model. These tags are applied to the metrics reported on the HTTP
-  //@@     metrics port.
-  //@@
-  map<string, string> tags = 10;
-}
diff --git a/src/core/model_repository_manager.cc b/src/core/model_repository_manager.cc
deleted file mode 100644
index 426de6e71c..0000000000
--- a/src/core/model_repository_manager.cc
+++ /dev/null
@@ -1,326 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-
-#include "src/core/model_repository_manager.h"
-
-#include "src/core/constants.h"
-#include "src/core/logging.h"
-#include "src/core/utils.h"
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/io/path.h"
-#include "tensorflow/core/platform/env.h"
-#include "tensorflow/core/platform/file_statistics.h"
-
-namespace nvidia { namespace inferenceserver {
-
-namespace {
-
-int64_t
-GetModifiedTime(const std::string& path)
-{
-  // If there is an error in any step the fall-back default
-  // modification time is 0. This means that in error cases 'path'
-  // will show as not modified. This is the safe fall-back to avoid
-  // assuming a model is constantly being modified.
-
-  // If 'path' is a file return its mtime.
-  if (!tensorflow::Env::Default()->IsDirectory(path).ok()) {
-    tensorflow::FileStatistics stat;
-    if (!tensorflow::Env::Default()->Stat(path, &stat).ok()) {
-      LOG_ERROR << "Failed to determine modification time for '" << path
-                << "', assuming 0";
-      return 0;
-    }
-
-    return stat.mtime_nsec;
-  }
-
-  // 'path' is a directory. Return the most recent mtime of the
-  // contents of the directory.
-  //
-  // GetChildren() returns all descendants instead for cloud storage
-  // like GCS.  In such case we should filter out all non-direct
-  // descendants.
-  std::vector<std::string> children;
-  if (!tensorflow::Env::Default()->GetChildren(path, &children).ok()) {
-    LOG_ERROR << "Failed to determine modification time for '" << path
-              << "', assuming 0";
-  }
-
-  std::set<std::string> real_children;
-  for (size_t i = 0; i < children.size(); ++i) {
-    const std::string& child = children[i];
-    real_children.insert(child.substr(0, child.find_first_of('/')));
-  }
-
-  int64_t mtime = 0;
-  for (const auto& child : real_children) {
-    const auto full_path = tensorflow::io::JoinPath(path, child);
-    mtime = std::max(mtime, GetModifiedTime(full_path));
-  }
-
-  return mtime;
-}
-
-// Return true if any file in the subdirectory root at 'path' has been
-// modified more recently than 'last'. Return the most-recent modified
-// time in 'last'.
-bool
-IsModified(const std::string& path, int64_t* last_ns)
-{
-  const int64_t repo_ns = GetModifiedTime(path);
-  bool modified = repo_ns > *last_ns;
-  *last_ns = repo_ns;
-  return modified;
-}
-
-}  // namespace
-
-ModelRepositoryManager* ModelRepositoryManager::singleton = nullptr;
-
-ModelRepositoryManager::ModelRepositoryManager(
-  const std::string& repository_path, const bool autofill)
-    : repository_path_(repository_path), autofill_(autofill)
-{
-}
-
-tensorflow::Status
-ModelRepositoryManager::Create(
-  const std::string& repository_path, const bool autofill)
-{
-  if (singleton != nullptr) {
-    return tensorflow::errors::AlreadyExists(
-      "ModelRepositoryManager singleton already created");
-  }
-
-  singleton = new ModelRepositoryManager(repository_path, autofill);
-
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-ModelRepositoryManager::GetModelConfig(
-  const std::string& name, ModelConfig* model_config)
-{
-  std::lock_guard<std::mutex> lock(singleton->infos_mu_);
-
-  const auto itr = singleton->infos_.find(name);
-  if (itr == singleton->infos_.end()) {
-    return tensorflow::errors::NotFound(
-      "no configuration for model '", name, "'");
-  }
-
-  *model_config = itr->second.model_config_;
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-ModelRepositoryManager::GetTFSModelConfig(
-  const std::string& name, tfs::ModelConfig* tfs_model_config)
-{
-  std::lock_guard<std::mutex> lock(singleton->infos_mu_);
-
-  const auto itr = singleton->infos_.find(name);
-  if (itr == singleton->infos_.end()) {
-    return tensorflow::errors::NotFound(
-      "no TFS configuration for model '", name, "'");
-  }
-
-  *tfs_model_config = itr->second.tfs_model_config_;
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-ModelRepositoryManager::GetModelPlatform(
-  const std::string& name, Platform* platform)
-{
-  std::lock_guard<std::mutex> lock(singleton->infos_mu_);
-
-  const auto itr = singleton->infos_.find(name);
-  if (itr == singleton->infos_.end()) {
-    *platform = Platform::PLATFORM_UNKNOWN;
-  } else {
-    *platform = itr->second.platform_;
-  }
-
-  if (*platform == Platform::PLATFORM_UNKNOWN) {
-    return tensorflow::errors::NotFound(
-      "unknown platform for model '", name, "'");
-  }
-
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-ModelRepositoryManager::Poll(
-  std::set<std::string>* added, std::set<std::string>* deleted,
-  std::set<std::string>* modified, std::set<std::string>* unmodified)
-{
-  // Serialize all polling operation...
-  std::lock_guard<std::mutex> lock(singleton->poll_mu_);
-
-  added->clear();
-  deleted->clear();
-  modified->clear();
-  unmodified->clear();
-
-  // We don't modify 'infos_' in place to minimize how long we need to
-  // hold the lock and also prevent any partial changes to do an error
-  // during processing.
-  ModelInfoMap new_infos;
-
-  // Each subdirectory of repository path is a model directory from
-  // which we read the model configuration.
-  std::vector<std::string> children;
-  TF_RETURN_IF_ERROR(tensorflow::Env::Default()->GetChildren(
-    singleton->repository_path_, &children));
-
-  // GetChildren() returns all descendants instead for cloud storage
-  // like GCS.  In such case we should filter out all non-direct
-  // descendants.
-  std::set<std::string> real_children;
-  for (size_t i = 0; i < children.size(); ++i) {
-    const std::string& child = children[i];
-    real_children.insert(child.substr(0, child.find_first_of('/')));
-  }
-
-  for (const auto& child : real_children) {
-    const auto full_path =
-      tensorflow::io::JoinPath(singleton->repository_path_, child);
-    if (!tensorflow::Env::Default()->IsDirectory(full_path).ok()) {
-      continue;
-    }
-
-    // If 'child' is a new model or an existing model that has been
-    // modified since the last time it was polled, then need to
-    // (re)load, normalize and validate the configuration.
-    bool need_load = false;
-    int64_t mtime_ns;
-    const auto iitr = singleton->infos_.find(child);
-    if (iitr == singleton->infos_.end()) {
-      added->insert(child);
-      mtime_ns = GetModifiedTime(std::string(full_path));
-      need_load = true;
-    } else {
-      mtime_ns = iitr->second.mtime_nsec_;
-      if (IsModified(std::string(full_path), &mtime_ns)) {
-        modified->insert(child);
-        need_load = true;
-      } else {
-        unmodified->insert(child);
-        const auto& ret = new_infos.emplace(child, iitr->second);
-        if (!ret.second) {
-          return tensorflow::errors::AlreadyExists(
-            "unexpected model info for model '", child, "'");
-        }
-      }
-    }
-
-    if (need_load) {
-      const auto& ret = new_infos.emplace(child, ModelInfo{});
-      if (!ret.second) {
-        return tensorflow::errors::AlreadyExists(
-          "unexpected model info for model '", child, "'");
-      }
-
-      ModelInfo& model_info = ret.first->second;
-      ModelConfig& model_config = model_info.model_config_;
-      tfs::ModelConfig& tfs_config = model_info.tfs_model_config_;
-      model_info.mtime_nsec_ = mtime_ns;
-
-      // If enabled, try to automatically generate missing parts of
-      // the model configuration (autofill) from the model
-      // definition. In all cases normalize and validate the config.
-      TF_RETURN_IF_ERROR(GetNormalizedModelConfig(
-        full_path, singleton->autofill_, &model_config));
-      TF_RETURN_IF_ERROR(ValidateModelConfig(model_config, std::string()));
-
-      model_info.platform_ = GetPlatform(model_config.platform());
-
-      // Make sure the name of the model matches the name of the
-      // directory. This is a somewhat arbitrary requirement but seems
-      // like good practice to require it of the user. It also acts as a
-      // check to make sure we don't have two different models with the
-      // same name.
-      if (model_config.name() != child) {
-        return tensorflow::errors::InvalidArgument(
-          "unexpected directory name '", child, "' for model '",
-          model_config.name(), "', directory name must equal model name");
-      }
-
-      tfs_config.set_name(model_config.name());
-      tfs_config.set_base_path(full_path);
-      tfs_config.set_model_platform(model_config.platform());
-
-      // Create the appropriate TFS version policy from the model
-      // configuration policy.
-      if (model_config.version_policy().has_latest()) {
-        tfs::FileSystemStoragePathSourceConfig::ServableVersionPolicy::Latest
-          latest;
-        latest.set_num_versions(
-          model_config.version_policy().latest().num_versions());
-        tfs_config.mutable_model_version_policy()->mutable_latest()->CopyFrom(
-          latest);
-      } else if (model_config.version_policy().has_all()) {
-        tfs::FileSystemStoragePathSourceConfig::ServableVersionPolicy::All all;
-        tfs_config.mutable_model_version_policy()->mutable_all()->CopyFrom(all);
-      } else if (model_config.version_policy().has_specific()) {
-        tfs::FileSystemStoragePathSourceConfig::ServableVersionPolicy::Specific
-          specific;
-        specific.mutable_versions()->CopyFrom(
-          model_config.version_policy().specific().versions());
-        tfs_config.mutable_model_version_policy()->mutable_specific()->CopyFrom(
-          specific);
-      } else {
-        return tensorflow::errors::Internal(
-          "expected version policy for model '", model_config.name());
-      }
-    }
-  }
-
-  // Anything in 'infos_' that is not in "added", "modified", or
-  // "unmodified" is deleted.
-  for (const auto& pr : singleton->infos_) {
-    if (
-      (added->find(pr.first) == added->end()) &&
-      (modified->find(pr.first) == modified->end()) &&
-      (unmodified->find(pr.first) == unmodified->end())) {
-      deleted->insert(pr.first);
-    }
-  }
-
-  // Swap the new infos in place under a short-lived lock and only if
-  // there were no errors encountered during polling.
-  {
-    std::lock_guard<std::mutex> lock(singleton->infos_mu_);
-    singleton->infos_.swap(new_infos);
-  }
-
-  return tensorflow::Status::OK();
-}
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/core/model_repository_manager.h b/src/core/model_repository_manager.h
deleted file mode 100644
index 27ef50bb96..0000000000
--- a/src/core/model_repository_manager.h
+++ /dev/null
@@ -1,112 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-#pragma once
-
-#include <mutex>
-#include "src/core/model_config.h"
-#include "src/core/model_config.pb.h"
-#include "tensorflow/core/lib/core/status.h"
-#include "tensorflow_serving/config/model_server_config.pb.h"
-
-namespace tfs = tensorflow::serving;
-
-namespace nvidia { namespace inferenceserver {
-
-/// A singleton to manage the model repository active in the server. A
-/// singleton is used because the servables have no connection to the
-/// server itself but they need to have access to the configuration.
-class ModelRepositoryManager {
- public:
-  /// Create a manager for a repository.
-  /// \param repositpory_path The file-system path of the repository.
-  /// \param autofill If true attempt to autofill missing required
-  /// information in each model configuration.
-  /// \return The error status.
-  static tensorflow::Status Create(
-    const std::string& repository_path, const bool autofill);
-
-  /// Poll the model repository to determine the new set of models and
-  /// compare with the current set. Return the additions, deletions,
-  /// and modifications that have occurred since the last Poll().
-  /// \param added The names of the models added to the repository.
-  /// \param deleted The names of the models removed from the repository.
-  /// \param modified The names of the models remaining in the
-  /// repository that have been changed.
-  /// \param unmodified The names of the models remaining in the
-  /// repository that have not changed.
-  /// \return The error status.
-  static tensorflow::Status Poll(
-    std::set<std::string>* added, std::set<std::string>* deleted,
-    std::set<std::string>* modified, std::set<std::string>* unmodified);
-
-  /// Get the configuration for a named model.
-  /// \param name The model name.
-  /// \param model_config Returns the model configuration.
-  /// \return OK if found, NOT_FOUND otherwise.
-  static tensorflow::Status GetModelConfig(
-    const std::string& name, ModelConfig* model_config);
-
-  /// Get TFS-style configuration for a named model.
-  /// \param name The model name.
-  /// \param tfs_model_config Returns the TFS-style model configuration.
-  /// \return OK if found, NOT_FOUND otherwise.
-  static tensorflow::Status GetTFSModelConfig(
-    const std::string& name, tfs::ModelConfig* tfs_model_config);
-
-  /// Get the platform for a named model.
-  /// \param name The model name.
-  /// \param platform Returns the Platform.
-  /// \return OK if found, NOT_FOUND otherwise.
-  static tensorflow::Status GetModelPlatform(
-    const std::string& name, Platform* platform);
-
- private:
-  struct ModelInfo {
-    int64_t mtime_nsec_;
-    ModelConfig model_config_;
-    tfs::ModelConfig tfs_model_config_;
-    Platform platform_;
-  };
-
-  // Map from model name to information about the model.
-  using ModelInfoMap = std::unordered_map<std::string, ModelInfo>;
-
-  ModelRepositoryManager(
-    const std::string& repository_path, const bool autofill);
-  ~ModelRepositoryManager() = default;
-
-  static ModelRepositoryManager* singleton;
-
-  const std::string repository_path_;
-  const bool autofill_;
-
-  std::mutex poll_mu_;
-  std::mutex infos_mu_;
-  ModelInfoMap infos_;
-};
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/core/profile.cc b/src/core/profile.cc
deleted file mode 100644
index 58182d4961..0000000000
--- a/src/core/profile.cc
+++ /dev/null
@@ -1,93 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "src/core/profile.h"
-
-#include <cuda_profiler_api.h>
-#include <cuda_runtime_api.h>
-#include "tensorflow/core/lib/core/errors.h"
-
-namespace nvidia { namespace inferenceserver {
-
-tensorflow::Status
-ProfileStartAll()
-{
-  int dcnt;
-  cudaError_t cuerr = cudaGetDeviceCount(&dcnt);
-  if (cuerr == cudaErrorNoDevice) {
-    dcnt = 0;
-  } else if (cuerr != cudaSuccess) {
-    return tensorflow::errors::Internal(
-      "failed to get device count for profiling: ", cudaGetErrorString(cuerr));
-  }
-
-  for (int i = 0; i < dcnt; i++) {
-    cuerr = cudaSetDevice(i);
-    if (cuerr != cudaSuccess) {
-      return tensorflow::errors::Internal(
-        "failed to set device for profiling: ", cudaGetErrorString(cuerr));
-    }
-
-    cuerr = cudaProfilerStart();
-    if (cuerr != cudaSuccess) {
-      return tensorflow::errors::Internal(
-        "failed to start profiling: ", cudaGetErrorString(cuerr));
-    }
-  }
-
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-ProfileStopAll()
-{
-  int dcnt;
-  cudaError_t cuerr = cudaGetDeviceCount(&dcnt);
-  if (cuerr == cudaErrorNoDevice) {
-    dcnt = 0;
-  } else if (cuerr != cudaSuccess) {
-    return tensorflow::errors::Internal(
-      "failed to get device count for profiling: ", cudaGetErrorString(cuerr));
-  }
-
-  for (int i = 0; i < dcnt; i++) {
-    cuerr = cudaSetDevice(i);
-    if (cuerr != cudaSuccess) {
-      return tensorflow::errors::Internal(
-        "failed to set device for profiling: ", cudaGetErrorString(cuerr));
-    }
-
-    cuerr = cudaProfilerStop();
-    if (cuerr != cudaSuccess) {
-      return tensorflow::errors::Internal(
-        "failed to stop profiling: ", cudaGetErrorString(cuerr));
-    }
-  }
-
-  return tensorflow::Status::OK();
-}
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/core/profile.h b/src/core/profile.h
deleted file mode 100644
index 295304be74..0000000000
--- a/src/core/profile.h
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "tensorflow/core/lib/core/status.h"
-
-namespace nvidia { namespace inferenceserver {
-
-// Start profiling on all GPU devices.
-tensorflow::Status ProfileStartAll();
-
-// Stop profiling on all GPU devices.
-tensorflow::Status ProfileStopAll();
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/core/request_status.cc b/src/core/request_status.cc
deleted file mode 100644
index 8f1fd08125..0000000000
--- a/src/core/request_status.cc
+++ /dev/null
@@ -1,93 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "src/core/request_status.h"
-
-namespace nvidia { namespace inferenceserver {
-
-namespace {
-
-RequestStatusCode
-GetStatusCode(const tensorflow::Status& tf_status)
-{
-  if (tf_status.ok()) {
-    return RequestStatusCode::SUCCESS;
-  }
-
-  switch (tf_status.code()) {
-    case tensorflow::error::INVALID_ARGUMENT:
-      return RequestStatusCode::INVALID_ARG;
-    case tensorflow::error::NOT_FOUND:
-      return RequestStatusCode::NOT_FOUND;
-    case tensorflow::error::UNAVAILABLE:
-      return RequestStatusCode::UNAVAILABLE;
-    case tensorflow::error::INTERNAL:
-      return RequestStatusCode::INTERNAL;
-    default:
-      break;
-  }
-
-  return RequestStatusCode::UNKNOWN;
-}
-
-}  // namespace
-
-void
-RequestStatusFactory::Create(
-  RequestStatus* status, uint64_t request_id, const std::string& server_id,
-  RequestStatusCode code, const std::string& msg)
-{
-  status->Clear();
-  status->set_code(code);
-  status->set_msg(msg);
-  status->set_server_id(server_id);
-  status->set_request_id(request_id);
-}
-
-void
-RequestStatusFactory::Create(
-  RequestStatus* status, uint64_t request_id, const std::string& server_id,
-  RequestStatusCode code)
-{
-  status->Clear();
-  status->set_code(code);
-  status->set_server_id(server_id);
-  status->set_request_id(request_id);
-}
-
-void
-RequestStatusFactory::Create(
-  RequestStatus* status, uint64_t request_id, const std::string& server_id,
-  const tensorflow::Status& tf_status)
-{
-  status->Clear();
-  status->set_code(GetStatusCode(tf_status));
-  status->set_msg(tf_status.error_message());
-  status->set_server_id(server_id);
-  status->set_request_id(request_id);
-}
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/core/request_status.h b/src/core/request_status.h
deleted file mode 100644
index 1d83e47de4..0000000000
--- a/src/core/request_status.h
+++ /dev/null
@@ -1,49 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "src/core/request_status.pb.h"
-#include "tensorflow/core/lib/core/status.h"
-
-namespace nvidia { namespace inferenceserver {
-
-class RequestStatusFactory {
- public:
-  // Create a Status object from a code and optional message.
-  static void Create(
-    RequestStatus* status, uint64_t request_id, const std::string& server_id,
-    RequestStatusCode code, const std::string& msg);
-  static void Create(
-    RequestStatus* status, uint64_t request_id, const std::string& server_id,
-    RequestStatusCode code);
-
-  // Create a Status object from a TensorFlow status.
-  static void Create(
-    RequestStatus* status, uint64_t request_id, const std::string& server_id,
-    const tensorflow::Status& tf_status);
-};
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/core/request_status.proto b/src/core/request_status.proto
deleted file mode 100644
index 83ae809fd7..0000000000
--- a/src/core/request_status.proto
+++ /dev/null
@@ -1,126 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-syntax = "proto3";
-
-package nvidia.inferenceserver;
-
-//@@.. cpp:namespace:: nvidia::inferenceserver
-
-//@@
-//@@.. cpp:enum:: RequestStatusCode
-//@@
-//@@   Status codes returned for inference server requests. The
-//@@   :cpp:enumerator:`RequestStatusCode::SUCCESS` status code indicates
-//@@   not error, all other codes indicate an error.
-//@@
-enum RequestStatusCode {
-  //@@  .. cpp:enumerator:: RequestStatusCode::INVALID = 0
-  //@@
-  //@@     Invalid status. Used internally but should not be returned as
-  //@@     part of a :cpp:var:`RequestStatus`.
-  //@@
-  INVALID = 0;
-
-  //@@  .. cpp:enumerator:: RequestStatusCode::SUCCESS = 1
-  //@@
-  //@@     Error code indicating success.
-  //@@
-  SUCCESS = 1;
-
-  //@@  .. cpp:enumerator:: RequestStatusCode::UNKNOWN = 2
-  //@@
-  //@@     Error code indicating an unknown failure.
-  //@@
-  UNKNOWN = 2;
-
-  //@@  .. cpp:enumerator:: RequestStatusCode::INTERNAL = 3
-  //@@
-  //@@     Error code indicating an internal failure.
-  //@@
-  INTERNAL = 3;
-
-  //@@  .. cpp:enumerator:: RequestStatusCode::NOT_FOUND = 4
-  //@@
-  //@@     Error code indicating a resource or request was not found.
-  //@@
-  NOT_FOUND = 4;
-
-  //@@  .. cpp:enumerator:: RequestStatusCode::INVALID_ARG = 5
-  //@@
-  //@@     Error code indicating a failure caused by an unknown argument or
-  //@@     value.
-  //@@
-  INVALID_ARG = 5;
-
-  //@@  .. cpp:enumerator:: RequestStatusCode::UNAVAILABLE = 6
-  //@@
-  //@@     Error code indicating an unavailable resource.
-  //@@
-  UNAVAILABLE = 6;
-
-  //@@  .. cpp:enumerator:: RequestStatusCode::UNSUPPORTED = 7
-  //@@
-  //@@     Error code indicating an unsupported request or operation.
-  //@@
-  UNSUPPORTED = 7;
-}
-
-//@@
-//@@.. cpp:var:: message RequestStatus
-//@@
-//@@   Status returned for all inference server requests. The
-//@@   RequestStatus provides a :cpp:enum:`RequestStatusCode`, an
-//@@   optional status message, and server and request IDs.
-//@@
-message RequestStatus
-{
-  //@@  .. cpp:var:: RequestStatusCode code
-  //@@
-  //@@     The status code.
-  //@@
-  RequestStatusCode code = 1;
-
-  //@@  .. cpp:var:: string msg
-  //@@
-  //@@     The optional status message.
-  //@@
-  string msg = 2;
-
-  //@@  .. cpp:var:: string server_id
-  //@@
-  //@@     The identifying string for the server that is returning
-  //@@     this status.
-  //@@
-  string server_id = 3;
-
-  //@@  .. cpp:var:: string request_id
-  //@@
-  //@@     Unique identifier for the request. Value 0 (zero) indicates
-  //@@     the request ID is not known.
-  //@@
-  uint64 request_id = 4;
-}
diff --git a/src/core/server.cc b/src/core/server.cc
deleted file mode 100644
index 408a4b1012..0000000000
--- a/src/core/server.cc
+++ /dev/null
@@ -1,1514 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "src/core/server.h"
-
-#include <cuda_profiler_api.h>
-#include <stdint.h>
-#include <time.h>
-#include <unistd.h>
-#include <algorithm>
-#include <csignal>
-#include <iostream>
-#include <memory>
-#include <utility>
-#include <vector>
-
-#include "absl/strings/str_cat.h"
-#include "absl/strings/string_view.h"
-#include "google/protobuf/wrappers.pb.h"
-#include "grpc++/security/server_credentials.h"
-#include "grpc++/server.h"
-#include "grpc++/server_builder.h"
-#include "grpc++/server_context.h"
-#include "grpc++/support/status.h"
-#include "grpc/grpc.h"
-#include "re2/re2.h"
-
-#include "src/core/api.pb.h"
-#include "src/core/constants.h"
-#include "src/core/grpc_service.grpc.pb.h"
-#include "src/core/infer.h"
-#include "src/core/logging.h"
-#include "src/core/metrics.h"
-#include "src/core/model_config.h"
-#include "src/core/model_config.pb.h"
-#include "src/core/model_repository_manager.h"
-#include "src/core/profile.h"
-#include "src/core/request_status.h"
-#include "src/core/server.h"
-#include "src/core/server_status.pb.h"
-#include "src/core/utils.h"
-#include "src/servables/caffe2/netdef_bundle.h"
-#include "src/servables/caffe2/netdef_bundle.pb.h"
-#include "src/servables/tensorflow/graphdef_bundle.h"
-#include "src/servables/tensorflow/graphdef_bundle.pb.h"
-#include "src/servables/tensorflow/savedmodel_bundle.h"
-#include "src/servables/tensorflow/savedmodel_bundle.pb.h"
-#include "src/servables/tensorrt/plan_bundle.h"
-#include "src/servables/tensorrt/plan_bundle.pb.h"
-#include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/io/path.h"
-#include "tensorflow/core/lib/strings/str_util.h"
-#include "tensorflow/core/platform/env.h"
-#include "tensorflow/core/platform/init_main.h"
-#include "tensorflow/core/platform/protobuf.h"
-#include "tensorflow/core/platform/types.h"
-#include "tensorflow/core/protobuf/config.pb.h"
-#include "tensorflow/core/util/command_line_flags.h"
-#include "tensorflow_serving/apis/model.pb.h"
-#include "tensorflow_serving/config/model_server_config.pb.h"
-#include "tensorflow_serving/config/platform_config.pb.h"
-#include "tensorflow_serving/core/availability_preserving_policy.h"
-#include "tensorflow_serving/core/servable_handle.h"
-#include "tensorflow_serving/model_servers/server_core.h"
-#include "tensorflow_serving/util/net_http/server/public/httpserver.h"
-#include "tensorflow_serving/util/net_http/server/public/response_code_enum.h"
-#include "tensorflow_serving/util/net_http/server/public/server_request_interface.h"
-#include "tensorflow_serving/util/threadpool_executor.h"
-
-#include "src/nvrpc/Context.h"
-#include "src/nvrpc/Executor.h"
-#include "src/nvrpc/Resources.h"
-#include "src/nvrpc/Server.h"
-#include "src/nvrpc/Service.h"
-#include "src/nvrpc/ThreadPool.h"
-
-using nvrpc::Context;
-using nvrpc::ThreadPool;
-
-namespace nvidia { namespace inferenceserver {
-
-
-namespace {
-
-class AsyncResources : public nvrpc::Resources {
- public:
-  explicit AsyncResources(
-    InferenceServer* server, int infer_threads, int mgmt_threads)
-      : m_Server(server), m_MgmtThreadPool(mgmt_threads),
-        m_InferThreadPool(infer_threads)
-  {
-  }
-
-  InferenceServer* GetServer() { return m_Server; }
-  ThreadPool& GetMgmtThreadPool() { return m_MgmtThreadPool; }
-  ThreadPool& GetInferThreadPool() { return m_InferThreadPool; }
-
- private:
-  InferenceServer* m_Server;
-
-  // We can and should get specific on thread affinity.  It might not be as
-  // important on the frontend, but the backend threadpool should be aligned
-  // with the respective devices.
-  ThreadPool m_MgmtThreadPool;
-  ThreadPool m_InferThreadPool;
-};
-
-static std::shared_ptr<AsyncResources> g_Resources;
-
-class StatusContext final
-    : public Context<StatusRequest, StatusResponse, AsyncResources> {
-  void ExecuteRPC(
-    StatusRequest& request, StatusResponse& response) final override
-  {
-    GetResources()->GetMgmtThreadPool().enqueue([this, &request, &response] {
-      ServerStatTimerScoped timer(
-        GetResources()->GetServer()->StatusManager(),
-        ServerStatTimerScoped::Kind::STATUS);
-
-      RequestStatus* request_status = response.mutable_request_status();
-      ServerStatus* server_status = response.mutable_server_status();
-
-      GetResources()->GetServer()->HandleStatus(
-        request_status, server_status, request.model_name());
-      this->FinishResponse();
-    });
-  }
-};
-
-class InferContext final
-    : public Context<InferRequest, InferResponse, AsyncResources> {
-  void ExecuteRPC(InferRequest& request, InferResponse& response) final override
-  {
-    auto server = GetResources()->GetServer();
-    auto infer_stats = std::make_shared<ModelInferStats>(
-      server->StatusManager(), request.model_name());
-    auto timer = std::make_shared<ModelInferStats::ScopedTimer>();
-    infer_stats->StartRequestTimer(timer.get());
-
-    RequestStatus* request_status = response.mutable_request_status();
-
-    std::shared_ptr<GRPCInferRequestProvider> request_provider;
-    tensorflow::Status status =
-      GRPCInferRequestProvider::Create(request, &request_provider);
-    if (status.ok()) {
-      std::shared_ptr<GRPCInferResponseProvider> response_provider;
-      status = GRPCInferResponseProvider::Create(
-        request.meta_data(), &response, &response_provider);
-      if (status.ok()) {
-        server->HandleInfer(
-          request_status, request_provider, response_provider, infer_stats,
-          [this, request_status, &response, infer_stats, timer]() mutable {
-            // If the response is an error then clear the meta-data
-            // and raw output as they may be partially or
-            // un-initialized.
-            if (request_status->code() != RequestStatusCode::SUCCESS) {
-              response.mutable_meta_data()->Clear();
-              response.mutable_raw_output()->Clear();
-            }
-
-            timer.reset();
-            this->FinishResponse();
-          },
-          true  // async_frontend
-        );
-      }
-    }
-
-    if (!status.ok()) {
-      LOG_VERBOSE(1) << "Infer failed: " << status.error_message();
-      infer_stats->SetFailed(true);
-      RequestStatusFactory::Create(
-        request_status, 0 /* request_id */, server->Id(), status);
-
-      // If the response is an error then clear the meta-data and raw
-      // output as they may be partially or un-initialized.
-      response.mutable_meta_data()->Clear();
-      response.mutable_raw_output()->Clear();
-
-      this->FinishResponse();
-    }
-  }
-};
-
-class ProfileContext final
-    : public Context<ProfileRequest, ProfileResponse, AsyncResources> {
-  void ExecuteRPC(
-    ProfileRequest& request, ProfileResponse& response) final override
-  {
-    GetResources()->GetMgmtThreadPool().enqueue([this, &request, &response] {
-      auto server = GetResources()->GetServer();
-      ServerStatTimerScoped timer(
-        server->StatusManager(), ServerStatTimerScoped::Kind::PROFILE);
-
-      RequestStatus* request_status = response.mutable_request_status();
-      server->HandleProfile(request_status, request.cmd());
-      this->FinishResponse();
-    });
-  }
-};
-
-class HealthContext final
-    : public Context<HealthRequest, HealthResponse, AsyncResources> {
-  void ExecuteRPC(
-    HealthRequest& request, HealthResponse& response) final override
-  {
-    GetResources()->GetMgmtThreadPool().enqueue([this, &request, &response] {
-      auto server = GetResources()->GetServer();
-      ServerStatTimerScoped timer(
-        server->StatusManager(), ServerStatTimerScoped::Kind::HEALTH);
-
-      RequestStatus* request_status = response.mutable_request_status();
-      bool health;
-
-      server->HandleHealth(request_status, &health, request.mode());
-      response.set_health(health);
-      this->FinishResponse();
-    });
-  }
-};
-
-//
-// Handle HTTP requests
-//
-class HTTPServiceImpl {
- public:
-  explicit HTTPServiceImpl(InferenceServer* server)
-      : server_(server),
-        api_regex_(R"(/api/(health|profile|infer|status)(.*))"),
-        health_regex_(R"(/(live|ready))"),
-        infer_regex_(R"(/([^/]+)(?:/(\d+))?)"), status_regex_(R"(/(.*))")
-  {
-  }
-
-  tfs::net_http::RequestHandler Dispatch(
-    tfs::net_http::ServerRequestInterface* req);
-
- private:
-  void Handle(tfs::net_http::ServerRequestInterface* req);
-  tfs::net_http::HTTPStatusCode Health(
-    tfs::net_http::ServerRequestInterface* req, const std::string& health_uri);
-  tfs::net_http::HTTPStatusCode Profile(
-    tfs::net_http::ServerRequestInterface* req, const std::string& profile_uri);
-  tfs::net_http::HTTPStatusCode Infer(
-    tfs::net_http::ServerRequestInterface* req, const std::string& infer_uri);
-  tfs::net_http::HTTPStatusCode Status(
-    tfs::net_http::ServerRequestInterface* req, const std::string& status_uri);
-
-  InferenceServer* server_;
-  re2::RE2 api_regex_;
-  re2::RE2 health_regex_;
-  re2::RE2 infer_regex_;
-  re2::RE2 status_regex_;
-};
-
-tfs::net_http::RequestHandler
-HTTPServiceImpl::Dispatch(tfs::net_http::ServerRequestInterface* req)
-{
-  return
-    [this](tfs::net_http::ServerRequestInterface* req) { this->Handle(req); };
-}
-
-void
-HTTPServiceImpl::Handle(tfs::net_http::ServerRequestInterface* req)
-{
-  LOG_VERBOSE(1) << "HTTP request: " << req->http_method() << " "
-                 << req->uri_path();
-
-  tfs::net_http::HTTPStatusCode status =
-    tfs::net_http::HTTPStatusCode::BAD_REQUEST;
-
-  std::string endpoint, rest;
-  if (RE2::FullMatch(
-        std::string(req->uri_path()), api_regex_, &endpoint, &rest)) {
-    // health
-    if (endpoint == "health") {
-      status = Health(req, rest);
-    }
-    // profile
-    else if (endpoint == "profile") {
-      status = Profile(req, rest);
-    }
-    // infer
-    else if (endpoint == "infer") {
-      status = Infer(req, rest);
-    }
-    // status
-    else if (endpoint == "status") {
-      status = Status(req, rest);
-    }
-  }
-
-  if (status != tfs::net_http::HTTPStatusCode::OK) {
-    LOG_VERBOSE(1) << "HTTP error: " << req->http_method() << " "
-                   << req->uri_path() << " - " << static_cast<int>(status);
-  }
-
-  req->ReplyWithStatus(status);
-}
-
-tfs::net_http::HTTPStatusCode
-HTTPServiceImpl::Health(
-  tfs::net_http::ServerRequestInterface* req, const std::string& health_uri)
-{
-  ServerStatTimerScoped timer(
-    server_->StatusManager(), ServerStatTimerScoped::Kind::HEALTH);
-
-  if (req->http_method() != "GET") {
-    return tfs::net_http::HTTPStatusCode::METHOD_NA;
-  }
-
-  std::string mode;
-  if (!health_uri.empty()) {
-    if (!RE2::FullMatch(health_uri, health_regex_, &mode)) {
-      return tfs::net_http::HTTPStatusCode::BAD_REQUEST;
-    }
-  }
-
-  RequestStatus request_status;
-  bool health;
-  server_->HandleHealth(&request_status, &health, mode);
-
-  req->OverwriteResponseHeader(
-    kStatusHTTPHeader, request_status.ShortDebugString());
-
-  return (
-    (health) ? tfs::net_http::HTTPStatusCode::OK
-             : tfs::net_http::HTTPStatusCode::BAD_REQUEST);
-}
-
-tfs::net_http::HTTPStatusCode
-HTTPServiceImpl::Profile(
-  tfs::net_http::ServerRequestInterface* req, const std::string& profile_uri)
-{
-  ServerStatTimerScoped timer(
-    server_->StatusManager(), ServerStatTimerScoped::Kind::PROFILE);
-
-  if (req->http_method() != "GET") {
-    return tfs::net_http::HTTPStatusCode::METHOD_NA;
-  }
-
-  if (!profile_uri.empty() && (profile_uri != "/")) {
-    return tfs::net_http::HTTPStatusCode::BAD_REQUEST;
-  }
-
-  std::string cmd;
-  if (!req->QueryParam("cmd", &cmd)) {
-    cmd.clear();
-  }
-
-  RequestStatus request_status;
-  server_->HandleProfile(&request_status, cmd);
-
-  req->OverwriteResponseHeader(
-    kStatusHTTPHeader, request_status.ShortDebugString());
-
-  return (
-    (request_status.code() == RequestStatusCode::SUCCESS)
-      ? tfs::net_http::HTTPStatusCode::OK
-      : tfs::net_http::HTTPStatusCode::BAD_REQUEST);
-}
-
-tfs::net_http::HTTPStatusCode
-HTTPServiceImpl::Infer(
-  tfs::net_http::ServerRequestInterface* req, const std::string& infer_uri)
-{
-  if (req->http_method() != "POST") {
-    return tfs::net_http::HTTPStatusCode::METHOD_NA;
-  }
-
-  std::string model_name, model_version_str;
-  if (!infer_uri.empty()) {
-    if (!RE2::FullMatch(
-          infer_uri, infer_regex_, &model_name, &model_version_str)) {
-      return tfs::net_http::HTTPStatusCode::BAD_REQUEST;
-    }
-  }
-
-  auto infer_stats =
-    std::make_shared<ModelInferStats>(server_->StatusManager(), model_name);
-  auto timer = std::make_shared<ModelInferStats::ScopedTimer>();
-  infer_stats->StartRequestTimer(timer.get());
-
-  absl::string_view infer_request_header =
-    req->GetRequestHeader(kInferRequestHTTPHeader);
-  std::string infer_request_header_str(
-    infer_request_header.data(), infer_request_header.size());
-
-  RequestStatus request_status;
-
-  std::shared_ptr<HTTPInferRequestProvider> request_provider;
-  tensorflow::Status status = HTTPInferRequestProvider::Create(
-    req->InputBuffer(), model_name, model_version_str, infer_request_header_str,
-    &request_provider);
-  if (status.ok()) {
-    std::shared_ptr<HTTPInferResponseProvider> response_provider;
-    status = HTTPInferResponseProvider::Create(
-      req->OutputBuffer(), request_provider->RequestHeader(),
-      &response_provider);
-    if (status.ok()) {
-      server_->HandleInfer(
-        &request_status, request_provider, response_provider, infer_stats,
-        [&request_status, request_provider, response_provider, infer_stats,
-         req]() mutable {
-          if (request_status.code() == RequestStatusCode::SUCCESS) {
-            std::string format;
-            if (!req->QueryParam("format", &format)) {
-              format = "text";
-            }
-
-            const InferResponseHeader& response_header =
-              response_provider->ResponseHeader();
-
-            std::string rstr;
-            if (format == "binary") {
-              response_header.SerializeToString(&rstr);
-            } else {
-              rstr = response_header.DebugString();
-            }
-            req->WriteResponseBytes(rstr.c_str(), rstr.size());
-          }
-        },
-        false  // async frontend
-      );
-    }
-  }
-
-  if (!status.ok()) {
-    LOG_VERBOSE(1) << "Infer failed: " << status.error_message();
-    infer_stats->SetFailed(true);
-    RequestStatusFactory::Create(
-      &request_status, 0 /* request_id */, server_->Id(), status);
-  }
-
-  // this part still needs to be implemented in teh completer
-  req->OverwriteResponseHeader(
-    kStatusHTTPHeader, request_status.ShortDebugString());
-  req->OverwriteResponseHeader("Content-Type", "application/octet-stream");
-
-  return (
-    (request_status.code() == RequestStatusCode::SUCCESS)
-      ? tfs::net_http::HTTPStatusCode::OK
-      : tfs::net_http::HTTPStatusCode::BAD_REQUEST);
-}
-
-tfs::net_http::HTTPStatusCode
-HTTPServiceImpl::Status(
-  tfs::net_http::ServerRequestInterface* req, const std::string& status_uri)
-{
-  ServerStatTimerScoped timer(
-    server_->StatusManager(), ServerStatTimerScoped::Kind::STATUS);
-
-  if (req->http_method() != "GET") {
-    return tfs::net_http::HTTPStatusCode::METHOD_NA;
-  }
-
-  std::string model_name;
-  if (!status_uri.empty()) {
-    if (!RE2::FullMatch(status_uri, status_regex_, &model_name)) {
-      return tfs::net_http::HTTPStatusCode::BAD_REQUEST;
-    }
-  }
-
-  RequestStatus request_status;
-  ServerStatus server_status;
-  server_->HandleStatus(&request_status, &server_status, model_name);
-
-  // If got status successfully then send it...
-  if (request_status.code() == RequestStatusCode::SUCCESS) {
-    std::string format;
-    if (!req->QueryParam("format", &format)) {
-      format = "text";
-    }
-
-    if (format == "binary") {
-      std::string server_status_str;
-      server_status.SerializeToString(&server_status_str);
-      req->WriteResponseString(server_status_str);
-      req->OverwriteResponseHeader("Content-Type", "application/octet-stream");
-    } else {
-      req->WriteResponseString(server_status.DebugString());
-    }
-  }
-
-  req->OverwriteResponseHeader(
-    kStatusHTTPHeader, request_status.ShortDebugString());
-
-  return (
-    (request_status.code() == RequestStatusCode::SUCCESS)
-      ? tfs::net_http::HTTPStatusCode::OK
-      : tfs::net_http::HTTPStatusCode::BAD_REQUEST);
-}
-
-// Scoped increment / decrement of atomic
-class ScopedAtomicIncrement {
- public:
-  explicit ScopedAtomicIncrement(std::atomic<uint64_t>& counter)
-      : counter_(counter)
-  {
-    counter_++;
-  }
-
-  ~ScopedAtomicIncrement() { counter_--; }
-
- private:
-  std::atomic<uint64_t>& counter_;
-};
-
-}  // namespace
-
-//
-// InferenceServer
-//
-InferenceServer::InferenceServer()
-    : ready_state_(ServerReadyState::SERVER_INVALID), next_request_id_(1)
-{
-  struct timespec ts;
-  clock_gettime(CLOCK_MONOTONIC, &ts);
-  start_time_ns_ = ts.tv_sec * NANOS_PER_SECOND + ts.tv_nsec;
-
-  const char* vstr = getenv("TENSORRT_SERVER_VERSION");
-  if (vstr != nullptr) {
-    version_.assign(vstr);
-  }
-
-  id_ = "inference:0";
-  http_port_ = 8000;
-  grpc_port_ = 8001;
-  metrics_port_ = 8002;
-  http_thread_cnt_ = 8;
-  strict_model_config_ = true;
-  strict_readiness_ = true;
-  profiling_enabled_ = false;
-  poll_model_repository_enabled_ = true;
-  repository_poll_secs_ = 15;
-  exit_timeout_secs_ = 30;
-
-  inflight_request_counter_ = 0;
-
-  status_manager_.reset(new ServerStatusManager(version_));
-}
-
-void
-InferenceServer::LogInitError(const std::string& msg)
-{
-  LOG_ERROR << msg;
-  ready_state_ = ServerReadyState::SERVER_FAILED_TO_INITIALIZE;
-}
-
-bool
-InferenceServer::Init(int argc, char** argv)
-{
-  tensorflow::Status status;
-
-  ready_state_ = ServerReadyState::SERVER_INITIALIZING;
-
-  std::string server_id("inference:0");
-  std::string model_store_path;
-  std::string platform_config_file;
-
-  // On error, the init process will stop.
-  // The difference is if the server will be terminated.
-  bool exit_on_error = true;
-  bool strict_model_config = strict_model_config_;
-  bool strict_readiness = strict_readiness_;
-  bool allow_profiling = profiling_enabled_;
-  bool tf_allow_soft_placement = true;
-  float tf_gpu_memory_fraction = 0.0;
-  bool allow_poll_model_repository = poll_model_repository_enabled_;
-  int32_t repository_poll_secs = repository_poll_secs_;
-  int32_t exit_timeout_secs = exit_timeout_secs_;
-
-  bool allow_http = true;
-  bool allow_grpc = true;
-  bool allow_metrics = true;
-  int32_t http_port = http_port_;
-  int32_t grpc_port = grpc_port_;
-  int32_t metrics_port = metrics_port_;
-  int32_t http_thread_cnt = http_thread_cnt_;
-
-  bool log_info = true;
-  bool log_warn = true;
-  bool log_error = true;
-  int32_t log_verbose = 0;
-
-  std::vector<tensorflow::Flag> flag_list = {
-    tensorflow::Flag("log-info", &log_info, "Enable/Disable info logging"),
-    tensorflow::Flag(
-      "log-warning", &log_warn, "Enable/Disable warning logging"),
-    tensorflow::Flag("log-error", &log_error, "Enable/Disable error logging"),
-    tensorflow::Flag("log-verbose", &log_verbose, "Verbose logging level"),
-    tensorflow::Flag("id", &server_id, "Identifier for this server"),
-    tensorflow::Flag(
-      "model-store", &model_store_path, "Path to model store directory."),
-    tensorflow::Flag(
-      "platform-config-file", &platform_config_file,
-      "If non-empty, read an ASCII PlatformConfigMap protobuf "
-      "from the supplied file name, and use that platform "
-      "config instead of the default platform."),
-    tensorflow::Flag(
-      "exit-on-error", &exit_on_error,
-      "Exit the inference server if an error occurs during "
-      "initialization."),
-    tensorflow::Flag(
-      "strict-model-config", &strict_model_config,
-      "If true model configuration files must be provided and all required "
-      "configuration settings must be specified. If false the model "
-      "configuration may be absent or only partially specified and the "
-      "server will attempt to derive the missing required configuration."),
-    tensorflow::Flag(
-      "strict-readiness", &strict_readiness,
-      "If true /api/health/ready endpoint indicates ready if the server "
-      "is responsive and all models are available. If false "
-      "/api/health/ready endpoint indicates ready if server is responsive even "
-      "if some/all models are unavailable."),
-    tensorflow::Flag(
-      "allow-profiling", &allow_profiling, "Allow server profiling."),
-    tensorflow::Flag(
-      "allow-http", &allow_http,
-      "Allow the server to listen on for HTTP requests."),
-    tensorflow::Flag(
-      "allow-grpc", &allow_grpc,
-      "Allow the server to listen on for gRPC requests."),
-    tensorflow::Flag(
-      "allow-metrics", &allow_metrics,
-      "Allow the server to provide prometheus metrics."),
-    tensorflow::Flag(
-      "http-port", &http_port,
-      "The port for the server to listen on for HTTP requests."),
-    tensorflow::Flag(
-      "grpc-port", &grpc_port,
-      "The port for the server to listen on for gRPC requests."),
-    tensorflow::Flag(
-      "metrics-port", &metrics_port, "The port exposing prometheus metrics."),
-    tensorflow::Flag(
-      "http-thread-count", &http_thread_cnt,
-      "Number of threads handling HTTP requests."),
-    tensorflow::Flag(
-      "allow-poll-model-repository", &allow_poll_model_repository,
-      "Poll the model repository to detect changes. The poll rate is "
-      "controlled by 'repository-poll-secs'."),
-    tensorflow::Flag(
-      "repository-poll-secs", &repository_poll_secs,
-      "Interval in seconds between each poll of the model repository to check "
-      "for changes. A value of zero indicates that the repository is checked "
-      "only a single time at startup. Valid only when "
-      "--allow-poll-model-repository=true is specified."),
-    tensorflow::Flag(
-      "exit-timeout-secs", &exit_timeout_secs,
-      "Timeout (in seconds) when exiting to wait for in-flight inferences to "
-      "finish. After the timeout expires the server exits even if inferences "
-      "are still in flight."),
-    tensorflow::Flag(
-      "tf-allow-soft-placement", &tf_allow_soft_placement,
-      "Instruct TensorFlow to use CPU implementation of an operation when a "
-      "GPU implementation is not available."),
-    tensorflow::Flag(
-      "tf-gpu-memory-fraction", &tf_gpu_memory_fraction,
-      "Reserve a portion of GPU memory for TensorFlow models. Default value "
-      "0.0 indicates that TensorFlow should dynamically allocate memory as "
-      "needed. Value of 1.0 indicates that TensorFlow should allocate all of "
-      "GPU memory."),
-  };
-
-  std::string usage = tensorflow::Flags::Usage(argv[0], flag_list);
-  const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list);
-  if (!parse_result) {
-    LogInitError(usage);
-    return false;
-  }
-
-
-  LOG_ENABLE_INFO(log_info);
-  LOG_ENABLE_WARNING(log_warn);
-  LOG_ENABLE_ERROR(log_error);
-  LOG_SET_VERBOSE(log_verbose);
-
-  LOG_INFO << "Initializing TensorRT Inference Server";
-
-  // So the server was init with default value first, then the settings
-  // will be modified after parsing arguments
-  id_ = server_id;
-  http_port_ = allow_http ? http_port : -1;
-  grpc_port_ = allow_grpc ? grpc_port : -1;
-  metrics_port_ = allow_metrics ? metrics_port : -1;
-  model_store_path_ = model_store_path;
-  http_thread_cnt_ = http_thread_cnt;
-  strict_model_config_ = strict_model_config;
-  strict_readiness_ = strict_readiness;
-  profiling_enabled_ = allow_profiling;
-  poll_model_repository_enabled_ = allow_poll_model_repository;
-  repository_poll_secs_ = std::max(0, repository_poll_secs);
-  exit_timeout_secs_ = std::max(0, exit_timeout_secs);
-
-  if (argc != 1) {
-    LOG_ERROR << "Unrecognized option: " << argv[1];
-    LogInitError(usage);
-    return false;
-  }
-
-  if (model_store_path_.empty()) {
-    LOG_ERROR << "--model-store must be specified";
-    LogInitError(usage);
-    return false;
-  }
-
-  if (!allow_http && !allow_grpc) {
-    LOG_ERROR << "At least one of the following options must be true: "
-              << "--allow-http, --allow-grpc";
-    LogInitError(usage);
-    return false;
-  } else if (allow_http && allow_grpc && http_port == grpc_port) {
-    LOG_ERROR << "The server cannot listen to HTTP requests "
-              << "and gRPC requests at the same port";
-    LogInitError(usage);
-    return false;
-  } else if (
-    allow_metrics && ((allow_grpc && (metrics_port == grpc_port)) ||
-                      (allow_http && (metrics_port == http_port)))) {
-    LOG_ERROR << "The server cannot provide metrics on same port used for "
-              << "HTTP or gRPC requests";
-    LogInitError(usage);
-    return false;
-  }
-
-  // Initialize metrics reporting port.
-  if (metrics_port_ >= 0) {
-    LOG_INFO << "Reporting prometheus metrics on port " << metrics_port_;
-    Metrics::Initialize(metrics_port_);
-  }
-
-  // Start the HTTP and/or gRPC server accepting connections.
-  Start();
-
-  // Disable profiling at server start. Server API can be used to
-  // start/stop profiling (unless disabled as indicated by
-  // 'allow_profiling').
-  status = ProfileStopAll();
-  if (!status.ok()) {
-    LogInitError(status.error_message());
-    return !exit_on_error;
-  }
-
-  // For ServerCore Options, we leave servable_state_monitor_creator unspecified
-  // so the default servable_state_monitor_creator will be used.
-  tfs::ServerCore::Options options;
-
-  // Set some default values in Options
-  options.aspired_version_policy = std::unique_ptr<tfs::AspiredVersionPolicy>(
-    new tfs::AvailabilityPreservingPolicy);
-
-  // If not polling the model repository then set the poll secs to 0
-  // in TFS so that repository is only checked a single time at
-  // startup.
-  options.max_num_load_retries = 0;
-  options.file_system_poll_wait_seconds =
-    (allow_poll_model_repository) ? repository_poll_secs_ : 0;
-
-  // Platform configuration
-  if (platform_config_file.empty()) {
-    options.platform_config_map =
-      BuildPlatformConfigMap(tf_gpu_memory_fraction, tf_allow_soft_placement);
-  } else {
-    status =
-      ParseProtoTextFile(platform_config_file, &options.platform_config_map);
-    if (!status.ok()) {
-      LogInitError(status.error_message());
-      return !exit_on_error;
-    }
-  }
-  LOG_VERBOSE(1) << options.platform_config_map.DebugString();
-
-  // Create the global manager for the repository. Add all models'
-  // into the server core 'options' so that they are eagerly loaded
-  // below when ServerCore is created.
-  status =
-    ModelRepositoryManager::Create(model_store_path_, !strict_model_config_);
-  if (!status.ok()) {
-    LogInitError(status.error_message());
-    return !exit_on_error;
-  }
-
-  std::set<std::string> added, deleted, modified, unmodified;
-  status =
-    ModelRepositoryManager::Poll(&added, &deleted, &modified, &unmodified);
-  if (!status.ok()) {
-    LogInitError(status.error_message());
-    return !exit_on_error;
-  }
-
-  if (!deleted.empty() || !modified.empty() || !unmodified.empty()) {
-    LogInitError("Unexpected initial state for model repository");
-    return !exit_on_error;
-  }
-
-  for (const auto& name : added) {
-    tfs::ModelConfig* tfs_config =
-      options.model_server_config.mutable_model_config_list()->add_config();
-    status = ModelRepositoryManager::GetTFSModelConfig(name, tfs_config);
-    if (!status.ok()) {
-      LogInitError("Internal: model repository manager inconsistency");
-      return !exit_on_error;
-    }
-
-    status = status_manager_->InitForModel(name);
-    if (!status.ok()) {
-      LogInitError(status.error_message());
-      return !exit_on_error;
-    }
-  }
-
-  LOG_VERBOSE(1) << options.model_server_config.DebugString();
-
-  // Create the server core. We assume that any failure is due to a
-  // model not loading correctly so we just continue if not exiting on
-  // error.
-  status = tfs::ServerCore::Create(std::move(options), &core_);
-  if (!status.ok()) {
-    LOG_ERROR << status;
-    if (exit_on_error) {
-      return false;
-    }
-  }
-
-  ready_state_ = ServerReadyState::SERVER_READY;
-  return true;
-}
-
-bool
-InferenceServer::Close()
-{
-  ready_state_ = ServerReadyState::SERVER_EXITING;
-
-  if (core_ == nullptr) {
-    LOG_INFO << "No server context available. Exiting immediately.";
-    return true;
-  } else {
-    LOG_INFO << "Waiting for in-flight inferences to complete.";
-  }
-
-  // Reload an empty configuration to cause all models to unload.
-  tfs::ModelServerConfig msc;
-  msc.mutable_model_config_list();
-  tensorflow::Status status = core_->ReloadConfig(msc);
-  if (!status.ok()) {
-    LOG_ERROR << "Failed to gracefully unload models: " << status;
-  }
-
-  // Wait for all in-flight requests to complete and all loaded models
-  // to unload, or for the exit timeout to expire.
-  const tfs::ServableStateMonitor& monitor = *core_->servable_state_monitor();
-  uint32_t exit_timeout_iters = exit_timeout_secs_;
-
-  while (true) {
-    const auto& live_models = monitor.GetLiveServableStates();
-
-    LOG_INFO << "Timeout " << exit_timeout_iters << ": Found "
-             << live_models.size() << " live models and "
-             << inflight_request_counter_ << " in-flight requests";
-    if (LOG_VERBOSE_IS_ON(1)) {
-      for (const auto& m : live_models) {
-        for (const auto& v : m.second) {
-          LOG_VERBOSE(1) << m.first << "v" << v.first << ": "
-                         << v.second.DebugString();
-        }
-      }
-    }
-
-    if ((live_models.size() == 0) && (inflight_request_counter_ == 0)) {
-      return true;
-    }
-    if (exit_timeout_iters <= 0) {
-      LOG_ERROR << "Exit timeout expired. Exiting immediately.";
-      break;
-    }
-
-    exit_timeout_iters--;
-    tensorflow::Env::Default()->SleepForMicroseconds(1000 * 1000);
-  }
-
-  return false;
-}
-
-void
-InferenceServer::Wait()
-{
-  tensorflow::Status status;
-
-  // If model load/unload is enabled for the model store, then
-  // periodically look for changes and update the loaded model
-  // configurations appropriately.
-  if (poll_model_repository_enabled_) {
-    while (ready_state_ != ServerReadyState::SERVER_EXITING) {
-      if (ready_state_ == ServerReadyState::SERVER_READY) {
-        std::set<std::string> added, deleted, modified, unmodified;
-        status = ModelRepositoryManager::Poll(
-          &added, &deleted, &modified, &unmodified);
-        if (!status.ok()) {
-          LOG_ERROR << "Failed to poll model repository: "
-                    << status.error_message();
-          goto next;
-        }
-
-        // Nothing to do if no model adds, deletes or modifies.
-        if (added.empty() && deleted.empty() && modified.empty()) {
-          goto next;
-        }
-
-        // There was a change in the model repository so need to
-        // create a new TFS model configuration and reload it into the
-        // server to cause the appropriate models to be loaded and
-        // unloaded.
-        tfs::ModelServerConfig msc;
-        msc.mutable_model_config_list();
-
-        // Added models should be loaded and be initialized for status
-        // reporting.
-        for (const auto& name : added) {
-          tfs::ModelConfig* tfs_config =
-            msc.mutable_model_config_list()->add_config();
-          status = ModelRepositoryManager::GetTFSModelConfig(name, tfs_config);
-          if (!status.ok()) {
-            LOG_ERROR << "Failed to create server config for '" << name
-                      << "': " << status.error_message();
-            goto next;
-          }
-
-          status = status_manager_->InitForModel(name);
-          if (!status.ok()) {
-            LOG_ERROR << "Failed to initialize status for '" << name
-                      << "': " << status.error_message();
-            goto next;
-          }
-        }
-
-        // Keep unmodified models...
-        for (const auto& name : unmodified) {
-          tfs::ModelConfig* tfs_config =
-            msc.mutable_model_config_list()->add_config();
-          status = ModelRepositoryManager::GetTFSModelConfig(name, tfs_config);
-          if (!status.ok()) {
-            LOG_ERROR << "Failed to create server config for '" << name
-                      << "': " << status.error_message();
-            goto next;
-          }
-        }
-
-        status = core_->ReloadConfig(msc);
-        if (!status.ok()) {
-          LOG_ERROR << "Failed to reload model configurations: "
-                    << status.error_message();
-          goto next;
-        }
-
-        // If there are any modified model, (re)load them to pick up
-        // the changes. We want to keep the current status information
-        // so don't re-init it.
-        if (!modified.empty()) {
-          for (const auto& name : modified) {
-            tfs::ModelConfig* tfs_config =
-              msc.mutable_model_config_list()->add_config();
-            status =
-              ModelRepositoryManager::GetTFSModelConfig(name, tfs_config);
-            if (!status.ok()) {
-              LOG_ERROR << "Failed to create server config for '" << name
-                        << "': " << status.error_message();
-              goto next;
-            }
-          }
-
-          status = core_->ReloadConfig(msc);
-          if (!status.ok()) {
-            LOG_ERROR << "Failed to reload modified model configurations: "
-                      << status.error_message();
-            goto next;
-          }
-        }
-      }
-
-    next:
-      tensorflow::Env::Default()->SleepForMicroseconds(
-        repository_poll_secs_ * 1000 * 1000);
-    }
-  }
-
-  if (grpc_server_) {
-    grpc_server_->Shutdown();
-  }
-
-  if (http_server_ != nullptr) {
-    http_server_->WaitForTermination();
-  }
-}
-
-std::unique_ptr<nvrpc::Server>
-InferenceServer::StartGrpcServer()
-{
-  // DLIS-162 - provide global defaults and cli overridable options
-  g_Resources = std::make_shared<AsyncResources>(
-    this,  // InferenceServer*,
-    1,     // infer threads
-    1      // mgmt threads
-  );
-  // PlanBundle::SetInferenceManager(g_Resources);
-
-  LOG_INFO << "Building nvrpc server";
-  const std::string addr = "0.0.0.0:" + std::to_string(grpc_port_);
-  auto server = nvrpc::make_unique<nvrpc::Server>(addr);
-
-  server->GetBuilder().SetMaxMessageSize(tensorflow::kint32max);
-
-  LOG_INFO << "Register TensorRT GRPCService";
-  auto inferenceService = server->RegisterAsyncService<GRPCService>();
-
-  LOG_INFO << "Register Infer RPC";
-  auto rpcInfer = inferenceService->RegisterRPC<InferContext>(
-    &GRPCService::AsyncService::RequestInfer);
-
-  LOG_INFO << "Register Status RPC";
-  auto rpcStatus = inferenceService->RegisterRPC<StatusContext>(
-    &GRPCService::AsyncService::RequestStatus);
-
-  LOG_INFO << "Register Profile RPC";
-  auto rpcProfile = inferenceService->RegisterRPC<ProfileContext>(
-    &GRPCService::AsyncService::RequestProfile);
-
-  LOG_INFO << "Register Health RPC";
-  auto rpcHealth = inferenceService->RegisterRPC<HealthContext>(
-    &GRPCService::AsyncService::RequestHealth);
-
-  LOG_INFO << "Register Executor";
-  auto executor = server->RegisterExecutor(new ::nvrpc::Executor(1));
-
-  // You can register RPC execution contexts from any registered RPC on any
-  // executor.
-  executor->RegisterContexts(
-    rpcInfer, g_Resources, 1000);  // Configurable DLIS-161
-  executor->RegisterContexts(rpcStatus, g_Resources, 1);
-  executor->RegisterContexts(rpcHealth, g_Resources, 1);
-  executor->RegisterContexts(rpcProfile, g_Resources, 1);
-
-  server->AsyncRun();
-
-  return std::move(server);
-}
-
-namespace {
-
-class HTTPRequestExecutor final : public tfs::net_http::EventExecutor {
- public:
-  // Create executor for HTTP server. Seems to require at least 2
-  // threads or else it hangs.
-  explicit HTTPRequestExecutor(int num_threads)
-      : executor_(
-          tensorflow::Env::Default(), "httpserver", std::max(2, num_threads))
-  {
-  }
-
-  void Schedule(std::function<void()> fn) override { executor_.Schedule(fn); }
-
- private:
-  tfs::ThreadPoolExecutor executor_;
-};
-
-}  // namespace
-
-std::unique_ptr<tfs::net_http::HTTPServerInterface>
-InferenceServer::StartHttpServer()
-{
-  auto options = absl::make_unique<tfs::net_http::ServerOptions>();
-  options->AddPort(static_cast<uint32_t>(http_port_));
-  options->SetExecutor(
-    absl::make_unique<HTTPRequestExecutor>(http_thread_cnt_));
-
-  auto server = tfs::net_http::CreateEvHTTPServer(std::move(options));
-  if (server != nullptr) {
-    std::shared_ptr<HTTPServiceImpl> service =
-      std::make_shared<HTTPServiceImpl>(this);
-
-    tfs::net_http::RequestHandlerOptions handler_options;
-    server->RegisterRequestDispatcher(
-      [service](tfs::net_http::ServerRequestInterface* req) {
-        return service->Dispatch(req);
-      },
-      handler_options);
-
-    if (!server->StartAcceptingRequests()) {
-      server.reset();
-    }
-  }
-
-  return std::move(server);
-}
-
-void
-InferenceServer::Start()
-{
-  LOG_INFO << "Starting server '" << id_ << "' listening on";
-
-  // Enable gRPC endpoint if requested...
-  if (grpc_port_ != -1) {
-    LOG_INFO << " localhost:" << std::to_string(grpc_port_)
-             << " for gRPC requests";
-    grpc_server_ = StartGrpcServer();
-    if (grpc_server_ == nullptr) {
-      LOG_ERROR << "Failed to start gRPC server";
-    }
-  }
-
-  // Enable HTTP endpoint if requested...
-  if (http_port_ != -1) {
-    LOG_INFO << " localhost:" << std::to_string(http_port_)
-             << " for HTTP requests";
-
-    http_server_ = StartHttpServer();
-    if (http_server_ == nullptr) {
-      LOG_ERROR << "Failed to start HTTP server";
-    }
-  }
-}
-
-void
-InferenceServer::HandleHealth(
-  RequestStatus* request_status, bool* health, const std::string& mode)
-{
-  *health = false;
-
-  if (ready_state_ == ServerReadyState::SERVER_EXITING) {
-    RequestStatusFactory::Create(
-      request_status, 0, id_, RequestStatusCode::UNAVAILABLE, "Server exiting");
-    return;
-  }
-
-  ScopedAtomicIncrement inflight(inflight_request_counter_);
-  const uint64_t request_id = NextRequestId();
-
-  // Server is considered live if it can respond to this health
-  // request and it was able to initialize.
-  if (mode == "live") {
-    *health =
-      ((ready_state_ != ServerReadyState::SERVER_INVALID) &&
-       (ready_state_ != ServerReadyState::SERVER_FAILED_TO_INITIALIZE));
-    RequestStatusFactory::Create(
-      request_status, request_id, id_, RequestStatusCode::SUCCESS);
-  }
-  // Server is considered ready if it is in the ready state.
-  // Additionally can report ready only when all models are ready.
-  else if (mode == "ready") {
-    *health = (ready_state_ == ServerReadyState::SERVER_READY);
-    if (*health && strict_readiness_) {
-      // Strict readiness... get the model status and make sure all
-      // models are ready.
-      tfs::ServableStateMonitor* monitor = nullptr;
-      if (core_ != nullptr) {
-        monitor = core_->servable_state_monitor();
-      }
-
-      ServerStatus server_status;
-      tensorflow::Status status = status_manager_->Get(
-        &server_status, id_, ready_state_, UptimeNs(), monitor);
-
-      *health = status.ok();
-      if (*health) {
-        for (const auto& ms : server_status.model_status()) {
-          for (const auto& vs : ms.second.version_status()) {
-            if (vs.second.ready_state() != ModelReadyState::MODEL_READY) {
-              *health = false;
-              goto strict_done;
-            }
-          }
-        }
-      strict_done:;
-      }
-    }
-
-    RequestStatusFactory::Create(
-      request_status, request_id, id_, RequestStatusCode::SUCCESS);
-  } else {
-    RequestStatusFactory::Create(
-      request_status, request_id, id_, RequestStatusCode::UNKNOWN,
-      "unknown health mode '" + mode + "'");
-  }
-}
-
-void
-InferenceServer::HandleProfile(
-  RequestStatus* request_status, const std::string& cmd)
-{
-  if (ready_state_ != ServerReadyState::SERVER_READY) {
-    RequestStatusFactory::Create(
-      request_status, 0, id_, RequestStatusCode::UNAVAILABLE,
-      "Server not ready");
-    return;
-  }
-
-  ScopedAtomicIncrement inflight(inflight_request_counter_);
-  const uint64_t request_id = NextRequestId();
-
-  if (!profiling_enabled_) {
-    RequestStatusFactory::Create(
-      request_status, request_id, id_, RequestStatusCode::UNSUPPORTED,
-      "Profile API not enabled");
-  } else if (cmd == "start") {
-    RequestStatusFactory::Create(
-      request_status, request_id, id_, ProfileStartAll());
-  } else if (cmd == "stop") {
-    RequestStatusFactory::Create(
-      request_status, request_id, id_, ProfileStopAll());
-  } else {
-    RequestStatusFactory::Create(
-      request_status, request_id, id_, RequestStatusCode::INVALID_ARG,
-      "Unknown profile command '" + std::string(cmd) + "'");
-  }
-}
-
-namespace {
-
-// Use the servable appropriate for the requested model's platform.
-struct AsyncState {
-  InferenceServable* is = nullptr;
-  tfs::ServableHandle<GraphDefBundle> graphdef_bundle;
-  tfs::ServableHandle<PlanBundle> plan_bundle;
-  tfs::ServableHandle<NetDefBundle> netdef_bundle;
-  tfs::ServableHandle<SavedModelBundle> saved_model_bundle;
-};
-}  // namespace
-
-void
-InferenceServer::HandleInfer(
-  RequestStatus* request_status,
-  std::shared_ptr<InferRequestProvider> request_provider,
-  std::shared_ptr<InferResponseProvider> response_provider,
-  std::shared_ptr<ModelInferStats> infer_stats,
-  std::function<void()> OnCompleteInferRPC, bool async_frontend)
-{
-  if (ready_state_ != ServerReadyState::SERVER_READY) {
-    RequestStatusFactory::Create(
-      request_status, 0, id_, RequestStatusCode::UNAVAILABLE,
-      "Server not ready");
-    OnCompleteInferRPC();
-    return;
-  }
-
-  ScopedAtomicIncrement inflight(inflight_request_counter_);
-  const uint64_t request_id = NextRequestId();
-
-  tensorflow::Status status = tensorflow::Status::OK();
-
-  // Create the model-spec. A negative version indicates that the
-  // latest version of the model should be used.
-  tfs::ModelSpec model_spec;
-  model_spec.set_name(request_provider->ModelName());
-  if (request_provider->ModelVersion() >= 0) {
-    model_spec.mutable_version()->set_value(request_provider->ModelVersion());
-  }
-
-  auto state = std::make_shared<AsyncState>();
-
-  std::function<void()> handle;
-
-  Platform platform;
-  status = ModelRepositoryManager::GetModelPlatform(
-    request_provider->ModelName(), &platform);
-  if (status.ok()) {
-    switch (platform) {
-      case Platform::PLATFORM_TENSORFLOW_GRAPHDEF:
-        status =
-          core_->GetServableHandle(model_spec, &(state->graphdef_bundle));
-        if (status.ok()) {
-          state->is =
-            static_cast<InferenceServable*>(state->graphdef_bundle.get());
-        }
-        break;
-      case Platform::PLATFORM_TENSORFLOW_SAVEDMODEL:
-        status =
-          core_->GetServableHandle(model_spec, &(state->saved_model_bundle));
-        if (status.ok()) {
-          state->is =
-            static_cast<InferenceServable*>(state->saved_model_bundle.get());
-        }
-        break;
-      case Platform::PLATFORM_TENSORRT_PLAN:
-        status = core_->GetServableHandle(model_spec, &(state->plan_bundle));
-        if (status.ok()) {
-          state->is = static_cast<InferenceServable*>(state->plan_bundle.get());
-        }
-        break;
-      case Platform::PLATFORM_CAFFE2_NETDEF:
-        status = core_->GetServableHandle(model_spec, &(state->netdef_bundle));
-        if (status.ok()) {
-          state->is =
-            static_cast<InferenceServable*>(state->netdef_bundle.get());
-        }
-        break;
-      default:
-        break;
-    }
-  }
-
-  infer_stats->SetRequestedVersion(request_provider->ModelVersion());
-  infer_stats->SetModelServable(state->is);
-  infer_stats->SetBatchSize(request_provider->RequestHeader().batch_size());
-
-  if (!status.ok() || (state->is == nullptr)) {
-    status = tensorflow::errors::Unavailable(
-      "Inference request for unknown model '", request_provider->ModelName(),
-      "'");
-  }
-
-  auto OnCompleteHandleInfer =
-    [this, OnCompleteInferRPC, state, response_provider, request_status,
-     request_id, infer_stats](tensorflow::Status status) mutable {
-      if (status.ok()) {
-        auto status = response_provider->FinalizeResponse(*(state->is));
-        if (status.ok()) {
-          RequestStatusFactory::Create(request_status, request_id, id_, status);
-          OnCompleteInferRPC();
-          return;
-        }
-      }
-      // Report only stats that are relevant for a failed inference run.
-      infer_stats->SetFailed(true);
-      LOG_VERBOSE(1) << "Infer failed: "
-                     << status.error_message();  // should logged as an error
-      RequestStatusFactory::Create(request_status, request_id, id_, status);
-      OnCompleteInferRPC();
-    };
-
-  if (status.ok()) {
-    // we need to capture the servable handle to keep it alive
-    // it goes away when it goes out of scope
-    if (async_frontend) {
-      state->is->AsyncRun(
-        infer_stats, request_provider, response_provider,
-        OnCompleteHandleInfer);
-    } else {
-      state->is->Run(
-        infer_stats, request_provider, response_provider,
-        OnCompleteHandleInfer);
-    }
-  } else {
-    OnCompleteHandleInfer(status);
-  }
-}
-
-void
-InferenceServer::HandleStatus(
-  RequestStatus* request_status, ServerStatus* server_status,
-  const std::string& model_name)
-{
-  if (ready_state_ == ServerReadyState::SERVER_EXITING) {
-    RequestStatusFactory::Create(
-      request_status, 0, id_, RequestStatusCode::UNAVAILABLE, "Server exiting");
-    return;
-  }
-
-  ScopedAtomicIncrement inflight(inflight_request_counter_);
-  const uint64_t request_id = NextRequestId();
-
-  tfs::ServableStateMonitor* monitor = nullptr;
-  if (core_ != nullptr) {
-    monitor = core_->servable_state_monitor();
-  }
-
-  // If no specific model request just return the entire status
-  // object.
-  if (model_name.empty()) {
-    RequestStatusFactory::Create(
-      request_status, request_id, id_,
-      status_manager_->Get(
-        server_status, id_, ready_state_, UptimeNs(), monitor));
-  } else {
-    RequestStatusFactory::Create(
-      request_status, request_id, id_,
-      status_manager_->Get(
-        server_status, id_, ready_state_, UptimeNs(), model_name, monitor));
-  }
-}
-
-uint64_t
-InferenceServer::UptimeNs() const
-{
-  struct timespec now;
-  clock_gettime(CLOCK_MONOTONIC, &now);
-
-  uint64_t now_ns = now.tv_sec * NANOS_PER_SECOND + now.tv_nsec;
-  return now_ns - start_time_ns_;
-}
-
-tensorflow::Status
-InferenceServer::ParseProtoTextFile(
-  const std::string& file, google::protobuf::Message* message)
-{
-  std::unique_ptr<tensorflow::ReadOnlyMemoryRegion> file_data;
-  TF_RETURN_IF_ERROR(
-    tensorflow::Env::Default()->NewReadOnlyMemoryRegionFromFile(
-      file, &file_data));
-  std::string file_data_str(
-    reinterpret_cast<const char*>(file_data->data()), file_data->length());
-  if (tensorflow::protobuf::TextFormat::ParseFromString(
-        file_data_str, message)) {
-    return tensorflow::Status::OK();
-  } else {
-    return tensorflow::errors::InvalidArgument(
-      "Invalid protobuf file: '", file, "'");
-  }
-}
-
-tfs::PlatformConfigMap
-InferenceServer::BuildPlatformConfigMap(
-  float tf_gpu_memory_fraction, bool tf_allow_soft_placement)
-{
-  ::google::protobuf::Any graphdef_source_adapter_config;
-  ::google::protobuf::Any saved_model_source_adapter_config;
-  ::google::protobuf::Any plan_source_adapter_config;
-  ::google::protobuf::Any netdef_source_adapter_config;
-
-  //// Tensorflow GraphDef
-  {
-    GraphDefBundleSourceAdapterConfig graphdef_config;
-
-    graphdef_config.set_autofill(!strict_model_config_);
-
-    // Tensorflow session config
-    if (tf_gpu_memory_fraction == 0.0) {
-      graphdef_config.mutable_session_config()
-        ->mutable_gpu_options()
-        ->set_allow_growth(true);
-    } else {
-      graphdef_config.mutable_session_config()
-        ->mutable_gpu_options()
-        ->set_per_process_gpu_memory_fraction(tf_gpu_memory_fraction);
-    }
-
-    graphdef_config.mutable_session_config()->set_allow_soft_placement(
-      tf_allow_soft_placement);
-    graphdef_source_adapter_config.PackFrom(graphdef_config);
-  }
-
-  //// Tensorflow SavedModel
-  {
-    SavedModelBundleSourceAdapterConfig saved_model_config;
-
-    saved_model_config.set_autofill(!strict_model_config_);
-
-    if (tf_gpu_memory_fraction == 0.0) {
-      saved_model_config.mutable_session_config()
-        ->mutable_gpu_options()
-        ->set_allow_growth(true);
-    } else {
-      saved_model_config.mutable_session_config()
-        ->mutable_gpu_options()
-        ->set_per_process_gpu_memory_fraction(tf_gpu_memory_fraction);
-    }
-
-    saved_model_config.mutable_session_config()->set_allow_soft_placement(
-      tf_allow_soft_placement);
-    saved_model_source_adapter_config.PackFrom(saved_model_config);
-  }
-
-  //// Caffe NetDef
-  {
-    NetDefBundleSourceAdapterConfig netdef_config;
-    netdef_config.set_autofill(!strict_model_config_);
-    netdef_source_adapter_config.PackFrom(netdef_config);
-  }
-
-  //// TensorRT
-  {
-    PlanBundleSourceAdapterConfig plan_config;
-    plan_config.set_autofill(!strict_model_config_);
-    plan_source_adapter_config.PackFrom(plan_config);
-  }
-
-  tfs::PlatformConfigMap platform_config_map;
-
-  (*(*platform_config_map
-        .mutable_platform_configs())[kTensorFlowGraphDefPlatform]
-      .mutable_source_adapter_config()) = graphdef_source_adapter_config;
-  (*(*platform_config_map
-        .mutable_platform_configs())[kTensorFlowSavedModelPlatform]
-      .mutable_source_adapter_config()) = saved_model_source_adapter_config;
-  (*(*platform_config_map.mutable_platform_configs())[kCaffe2NetDefPlatform]
-      .mutable_source_adapter_config()) = netdef_source_adapter_config;
-  (*(*platform_config_map.mutable_platform_configs())[kTensorRTPlanPlatform]
-      .mutable_source_adapter_config()) = plan_source_adapter_config;
-
-  return platform_config_map;
-}
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/core/server.h b/src/core/server.h
deleted file mode 100644
index f4ff3532d9..0000000000
--- a/src/core/server.h
+++ /dev/null
@@ -1,179 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <stddef.h>
-#include <stdint.h>
-#include <atomic>
-#include <string>
-#include <thread>
-#include <unordered_map>
-
-#include "grpc++/server.h"
-
-#include "src/core/api.pb.h"
-#include "src/core/infer.h"
-#include "src/core/model_config.pb.h"
-#include "src/core/request_status.pb.h"
-#include "src/core/server_status.h"
-#include "src/core/server_status.pb.h"
-#include "tensorflow/core/lib/core/status.h"
-#include "tensorflow_serving/model_servers/server_core.h"
-#include "tensorflow_serving/util/net_http/server/public/httpserver.h"
-
-#include "src/nvrpc/Server.h"
-
-
-namespace tfs = tensorflow::serving;
-
-namespace nvidia { namespace inferenceserver {
-
-// Inference server information.
-class InferenceServer {
- public:
-  // Construct an inference server.
-  InferenceServer();
-
-  // Initialize the server.
-  // Return true on success, false otherwise.
-  bool Init(int argc, char** argv);
-
-  // Close the server.
-  // Return true if all models are unloaded, false if exit timeout occurs.
-  bool Close();
-
-  // Wait for server. Does not return until server is shutdown.
-  void Wait();
-
-  // Run health check indicated by 'mode'
-  void HandleHealth(
-    RequestStatus* request_status, bool* health, const std::string& mode);
-
-  // Run profile 'cmd' for profiling all the all GPU devices
-  void HandleProfile(RequestStatus* request_status, const std::string& cmd);
-
-  // Perform inference on the given input for specified model and
-  // update RequestStatus object with the status of the inference.
-  void HandleInfer(
-    RequestStatus* request_status,
-    std::shared_ptr<InferRequestProvider> request_provider,
-    std::shared_ptr<InferResponseProvider> response_provider,
-    std::shared_ptr<ModelInferStats> infer_stats,
-    std::function<void()> OnCompleteInferRPC, bool async_frontend);
-
-  // Update the RequestStatus object and ServerStatus object with the
-  // status of the model. If 'model_name' is empty, update with the
-  // status of all models.
-  void HandleStatus(
-    RequestStatus* request_status, ServerStatus* server_status,
-    const std::string& model_name);
-
-  // Return the server version.
-  const std::string& Version() const { return version_; }
-
-  // Return the ID of the server.
-  const std::string& Id() const { return id_; }
-
-  // Return the ready state for the server.
-  ServerReadyState ReadyState() const { return ready_state_; }
-
-  // Return the HTTP port of the server, or -1 if HTTP is not enabled.
-  int HttpPort() const { return http_port_; }
-
-  // Return the gRPC port of the server, or -1 if gRPC is not enabled.
-  int GrpcPort() const { return grpc_port_; }
-
-  // Return the metrics port of the server, or -1 if metrics are not
-  // enabled.
-  int MetricsPort() const { return metrics_port_; }
-
-  // Return the status manager for this server.
-  std::shared_ptr<ServerStatusManager> StatusManager() const
-  {
-    return status_manager_;
-  }
-
- private:
-  // Start server running and listening on gRPC and/or HTTP endpoints.
-  void Start();
-
-  std::unique_ptr<nvrpc::Server> StartGrpcServer();
-
-  std::unique_ptr<tfs::net_http::HTTPServerInterface> StartHttpServer();
-
-  tensorflow::Status ParseProtoTextFile(
-    const std::string& file, google::protobuf::Message* message);
-
-  tfs::PlatformConfigMap BuildPlatformConfigMap(
-    float tf_gpu_memory_fraction, bool tf_allow_soft_placement);
-
-  // Return the uptime of the server in nanoseconds.
-  uint64_t UptimeNs() const;
-
-  // Return the next request ID for this server.
-  uint64_t NextRequestId() { return next_request_id_++; }
-
-  // Helper function to perform repeated task during initialization.
-  void LogInitError(const std::string& msg);
-
-  std::string version_;
-  std::string id_;
-
-  // Use -1 for a port to indicate the corresponding service is
-  // disabled
-  int http_port_;
-  int grpc_port_;
-  int metrics_port_;
-
-  std::string model_store_path_;
-  int http_thread_cnt_;
-  bool strict_model_config_;
-  bool strict_readiness_;
-  bool profiling_enabled_;
-  bool poll_model_repository_enabled_;
-  uint32_t repository_poll_secs_;
-  uint32_t exit_timeout_secs_;
-  uint64_t start_time_ns_;
-
-  // Current state of the inference server.
-  ServerReadyState ready_state_;
-
-  // Each request is assigned a unique id.
-  std::atomic<uint64_t> next_request_id_;
-
-  // Number of in-flight requests. During shutdown we attempt to wait
-  // for all in-flight requests to complete before exiting.
-  std::atomic<uint64_t> inflight_request_counter_;
-
-  std::unique_ptr<tfs::ServerCore> core_;
-  std::shared_ptr<ServerStatusManager> status_manager_;
-
-  std::unique_ptr<tfs::net_http::HTTPServerInterface> http_server_;
-
-  std::unique_ptr<nvrpc::Server> grpc_server_;
-};
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/core/server_status.cc b/src/core/server_status.cc
deleted file mode 100644
index 3e000cdc5f..0000000000
--- a/src/core/server_status.cc
+++ /dev/null
@@ -1,429 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "src/core/server_status.h"
-
-#include <time.h>
-#include "src/core/constants.h"
-#include "src/core/infer.h"
-#include "src/core/logging.h"
-#include "src/core/metrics.h"
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow_serving/core/servable_state.h"
-
-namespace nvidia { namespace inferenceserver {
-
-namespace {
-
-void
-SetModelVersionReadyState(
-  const tensorflow::serving::ServableStateMonitor& monitor, ModelStatus& ms)
-{
-  const std::string& model_name = ms.config().name();
-
-  // Set all model versions for which we have status to
-  // unavailable... and then override that with actual status for the
-  // versions that are currently being served.
-  for (auto& itr : *ms.mutable_version_status()) {
-    itr.second.set_ready_state(ModelReadyState::MODEL_UNAVAILABLE);
-  }
-
-  const tensorflow::serving::ServableStateMonitor::VersionMap
-    versions_and_states = monitor.GetVersionStates(model_name);
-  for (const auto& version_and_state : versions_and_states) {
-    const int32_t version = version_and_state.first;
-    const tensorflow::serving::ServableState& servable_state =
-      version_and_state.second.state;
-
-    ModelReadyState ready_state = ModelReadyState::MODEL_UNKNOWN;
-    switch (servable_state.manager_state) {
-      case tensorflow::serving::ServableState::ManagerState::kLoading:
-        ready_state = ModelReadyState::MODEL_LOADING;
-        break;
-      case tensorflow::serving::ServableState::ManagerState::kUnloading:
-        ready_state = ModelReadyState::MODEL_UNLOADING;
-        break;
-      case tensorflow::serving::ServableState::ManagerState::kAvailable:
-        ready_state = ModelReadyState::MODEL_READY;
-        break;
-      default:
-        ready_state = ModelReadyState::MODEL_UNAVAILABLE;
-        break;
-    }
-
-    if (ready_state != ModelReadyState::MODEL_UNKNOWN) {
-      auto& mvs = *ms.mutable_version_status();
-      mvs[version].set_ready_state(ready_state);
-    }
-  }
-}
-
-}  // namespace
-
-ServerStatusManager::ServerStatusManager(const std::string& server_version)
-{
-  const auto& version = server_version;
-  if (!version.empty()) {
-    server_status_.set_version(version);
-  }
-}
-
-tensorflow::Status
-ServerStatusManager::InitForModel(const std::string& model_name)
-{
-  ModelConfig model_config;
-  TF_RETURN_IF_ERROR(
-    ModelRepositoryManager::GetModelConfig(model_name, &model_config));
-
-  std::lock_guard<std::mutex> lock(mu_);
-
-  auto& ms = *server_status_.mutable_model_status();
-  if (ms.find(model_name) == ms.end()) {
-    LOG_INFO << "New status tracking for model '" << model_name << "'";
-  } else {
-    LOG_INFO << "New status tracking for re-added model '" << model_name << "'";
-    ms[model_name].Clear();
-  }
-
-  ms[model_name].mutable_config()->CopyFrom(model_config);
-
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-ServerStatusManager::Get(
-  ServerStatus* server_status, const std::string& server_id,
-  ServerReadyState server_ready_state, uint64_t server_uptime_ns,
-  const tensorflow::serving::ServableStateMonitor* monitor) const
-{
-  std::lock_guard<std::mutex> lock(mu_);
-  server_status->CopyFrom(server_status_);
-  server_status->set_id(server_id);
-  server_status->set_ready_state(server_ready_state);
-  server_status->set_uptime_ns(server_uptime_ns);
-
-  if (monitor != nullptr) {
-    for (auto& msitr : *server_status->mutable_model_status()) {
-      SetModelVersionReadyState(*monitor, msitr.second);
-    }
-  }
-
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-ServerStatusManager::Get(
-  ServerStatus* server_status, const std::string& server_id,
-  ServerReadyState server_ready_state, uint64_t server_uptime_ns,
-  const std::string& model_name,
-  const tensorflow::serving::ServableStateMonitor* monitor) const
-{
-  std::lock_guard<std::mutex> lock(mu_);
-
-  server_status->Clear();
-  server_status->set_version(server_status_.version());
-  server_status->set_id(server_id);
-  server_status->set_ready_state(server_ready_state);
-  server_status->set_uptime_ns(server_uptime_ns);
-
-  const auto& itr = server_status_.model_status().find(model_name);
-  if (itr == server_status_.model_status().end()) {
-    return tensorflow::errors::InvalidArgument(
-      "no status available for unknown model '", model_name, "'");
-  }
-
-  auto& ms = *server_status->mutable_model_status();
-  ms[model_name].CopyFrom(itr->second);
-  if (monitor != nullptr) {
-    SetModelVersionReadyState(*monitor, ms[model_name]);
-  }
-
-  return tensorflow::Status::OK();
-}
-
-void
-ServerStatusManager::UpdateServerStat(
-  uint64_t duration, ServerStatTimerScoped::Kind kind)
-{
-  std::lock_guard<std::mutex> lock(mu_);
-
-  switch (kind) {
-    case ServerStatTimerScoped::Kind::STATUS: {
-      StatDuration* d =
-        server_status_.mutable_status_stats()->mutable_success();
-      d->set_count(d->count() + 1);
-      d->set_total_time_ns(d->total_time_ns() + duration);
-      break;
-    }
-
-    case ServerStatTimerScoped::Kind::PROFILE: {
-      StatDuration* d =
-        server_status_.mutable_profile_stats()->mutable_success();
-      d->set_count(d->count() + 1);
-      d->set_total_time_ns(d->total_time_ns() + duration);
-      break;
-    }
-
-    case ServerStatTimerScoped::Kind::HEALTH: {
-      StatDuration* d =
-        server_status_.mutable_health_stats()->mutable_success();
-      d->set_count(d->count() + 1);
-      d->set_total_time_ns(d->total_time_ns() + duration);
-      break;
-    }
-  }
-}
-
-void
-ServerStatusManager::UpdateFailedInferStats(
-  const std::string& model_name, const uint32_t model_version,
-  size_t batch_size, uint64_t request_duration_ns)
-{
-  std::lock_guard<std::mutex> lock(mu_);
-
-  // Model must exist...
-  auto itr = server_status_.mutable_model_status()->find(model_name);
-  if (itr == server_status_.model_status().end()) {
-    LOG_ERROR << "can't update INFER duration stat for " << model_name;
-  } else {
-    // batch_size may be zero if the failure occurred before it could
-    // be determined... but we still record the failure.
-
-    // model version
-    auto& mvs = *itr->second.mutable_version_status();
-    auto mvs_itr = mvs.find(model_version);
-    if (mvs_itr == mvs.end()) {
-      ModelVersionStatus& version_status = mvs[model_version];
-      InferRequestStats& stats =
-        (*version_status.mutable_infer_stats())[batch_size];
-      stats.mutable_failed()->set_count(1);
-      stats.mutable_failed()->set_total_time_ns(request_duration_ns);
-    } else {
-      ModelVersionStatus& version_status = mvs_itr->second;
-      auto& is = *version_status.mutable_infer_stats();
-      auto is_itr = is.find(batch_size);
-      if (is_itr == is.end()) {
-        InferRequestStats& stats = is[batch_size];
-        stats.mutable_failed()->set_count(1);
-        stats.mutable_failed()->set_total_time_ns(request_duration_ns);
-      } else {
-        InferRequestStats& stats = is_itr->second;
-        stats.mutable_failed()->set_count(stats.failed().count() + 1);
-        stats.mutable_failed()->set_total_time_ns(
-          stats.failed().total_time_ns() + request_duration_ns);
-      }
-    }
-  }
-}
-
-void
-ServerStatusManager::UpdateSuccessInferStats(
-  const std::string& model_name, const uint32_t model_version,
-  size_t batch_size, uint32_t execution_cnt, uint64_t request_duration_ns,
-  uint64_t run_duration_ns, uint64_t compute_duration_ns)
-{
-  std::lock_guard<std::mutex> lock(mu_);
-
-  // Model must exist...
-  auto itr = server_status_.mutable_model_status()->find(model_name);
-  if (itr == server_status_.model_status().end()) {
-    LOG_ERROR << "can't update duration stat for " << model_name;
-  } else if (batch_size == 0) {
-    LOG_ERROR << "can't update INFER durations without batch size for "
-              << model_name;
-  } else {
-    // model version
-    auto& mvs = *itr->second.mutable_version_status();
-    auto mvs_itr = mvs.find(model_version);
-    InferRequestStats* new_stats = nullptr;
-    InferRequestStats* existing_stats = nullptr;
-    if (mvs_itr == mvs.end()) {
-      ModelVersionStatus& version_status = mvs[model_version];
-      version_status.set_model_inference_count(batch_size);
-      version_status.set_model_execution_count(execution_cnt);
-      new_stats = &((*version_status.mutable_infer_stats())[batch_size]);
-    } else {
-      ModelVersionStatus& version_status = mvs_itr->second;
-      version_status.set_model_inference_count(
-        version_status.model_inference_count() + batch_size);
-      version_status.set_model_execution_count(
-        version_status.model_execution_count() + execution_cnt);
-
-      auto& is = *version_status.mutable_infer_stats();
-      auto is_itr = is.find(batch_size);
-      if (is_itr == is.end()) {
-        new_stats = &is[batch_size];
-      } else {
-        existing_stats = &is_itr->second;
-      }
-    }
-
-    if (new_stats != nullptr) {
-      new_stats->mutable_success()->set_count(1);
-      new_stats->mutable_success()->set_total_time_ns(request_duration_ns);
-      new_stats->mutable_compute()->set_count(1);
-      new_stats->mutable_compute()->set_total_time_ns(compute_duration_ns);
-      new_stats->mutable_queue()->set_count(1);
-      new_stats->mutable_queue()->set_total_time_ns(
-        run_duration_ns - compute_duration_ns);
-    } else if (existing_stats != nullptr) {
-      InferRequestStats& stats = *existing_stats;
-      stats.mutable_success()->set_count(stats.success().count() + 1);
-      stats.mutable_success()->set_total_time_ns(
-        stats.success().total_time_ns() + request_duration_ns);
-      stats.mutable_compute()->set_count(stats.compute().count() + 1);
-      stats.mutable_compute()->set_total_time_ns(
-        stats.compute().total_time_ns() + compute_duration_ns);
-      stats.mutable_queue()->set_count(stats.queue().count() + 1);
-      stats.mutable_queue()->set_total_time_ns(
-        stats.queue().total_time_ns() +
-        (run_duration_ns - compute_duration_ns));
-    } else {
-      LOG_ERROR << "Internal error logging INFER stats for " << model_name;
-    }
-  }
-}
-
-ServerStatTimerScoped::~ServerStatTimerScoped()
-{
-  // Do nothing reporting is disabled...
-  if (enabled_) {
-    struct timespec end;
-    clock_gettime(CLOCK_MONOTONIC, &end);
-
-    uint64_t start_ns = start_.tv_sec * NANOS_PER_SECOND + start_.tv_nsec;
-    uint64_t end_ns = end.tv_sec * NANOS_PER_SECOND + end.tv_nsec;
-    uint64_t duration = (start_ns > end_ns) ? 0 : end_ns - start_ns;
-
-    status_manager_->UpdateServerStat(duration, kind_);
-  }
-}
-
-ModelInferStats::ScopedTimer::ScopedTimer()
-    : cummulative_duration_ns_(0), duration_ptr_(nullptr)
-{
-  start_.tv_sec = 0;
-  start_.tv_nsec = 0;
-}
-
-ModelInferStats::ScopedTimer::~ScopedTimer()
-{
-  if (duration_ptr_ != nullptr) {
-    Stop();
-    *duration_ptr_ = cummulative_duration_ns_;
-  }
-}
-
-struct timespec
-ModelInferStats::ScopedTimer::Start()
-{
-  clock_gettime(CLOCK_MONOTONIC, &start_);
-  return start_;
-}
-
-void
-ModelInferStats::ScopedTimer::Stop()
-{
-  // Ignore the stop if the timer hasn't been started
-  if (start_.tv_sec != 0) {
-    struct timespec end;
-    clock_gettime(CLOCK_MONOTONIC, &end);
-
-    uint64_t start_ns = start_.tv_sec * NANOS_PER_SECOND + start_.tv_nsec;
-    uint64_t end_ns = end.tv_sec * NANOS_PER_SECOND + end.tv_nsec;
-    cummulative_duration_ns_ += (start_ns > end_ns) ? 0 : end_ns - start_ns;
-
-    start_.tv_sec = 0;
-    start_.tv_nsec = 0;
-  }
-}
-
-ModelInferStats::~ModelInferStats()
-{
-  const uint32_t model_version = (model_servable_ != nullptr)
-                                   ? model_servable_->Version()
-                                   : requested_model_version_;
-
-  if (failed_) {
-    status_manager_->UpdateFailedInferStats(
-      model_name_, model_version, batch_size_, request_duration_ns_);
-    if (model_servable_ == nullptr) {
-      LOG_ERROR << "Unable to collect inference metrics for nullptr servable";
-    } else {
-      model_servable_->MetricInferenceFailure(gpu_device_).Increment();
-    }
-  } else {
-    status_manager_->UpdateSuccessInferStats(
-      model_name_, model_version, batch_size_, execution_count_,
-      request_duration_ns_, run_duration_ns_, compute_duration_ns_);
-
-    if (model_servable_ == nullptr) {
-      LOG_ERROR << "Unable to collect inference metrics for nullptr servable";
-    } else {
-      model_servable_->MetricInferenceSuccess(gpu_device_).Increment();
-      model_servable_->MetricInferenceCount(gpu_device_).Increment(batch_size_);
-      if (execution_count_ > 0) {
-        model_servable_->MetricInferenceExecutionCount(gpu_device_)
-          .Increment(execution_count_);
-      }
-
-      model_servable_->MetricInferenceRequestDuration(gpu_device_)
-        .Increment(request_duration_ns_ / 1000);
-      model_servable_->MetricInferenceComputeDuration(gpu_device_)
-        .Increment(compute_duration_ns_ / 1000);
-      model_servable_->MetricInferenceQueueDuration(gpu_device_)
-        .Increment((run_duration_ns_ - compute_duration_ns_) / 1000);
-
-      model_servable_->MetricInferenceLoadRatio(gpu_device_)
-        .Observe(
-          (double)request_duration_ns_ /
-          std::max(1.0, (double)compute_duration_ns_));
-    }
-  }
-}
-
-struct timespec
-ModelInferStats::StartRequestTimer(ScopedTimer* timer) const
-{
-  timer->duration_ptr_ = &request_duration_ns_;
-  return timer->Start();
-}
-
-struct timespec
-ModelInferStats::StartRunTimer(ScopedTimer* timer) const
-{
-  timer->duration_ptr_ = &run_duration_ns_;
-  return timer->Start();
-}
-
-struct timespec
-ModelInferStats::StartComputeTimer(ScopedTimer* timer) const
-{
-  timer->duration_ptr_ = &compute_duration_ns_;
-  return timer->Start();
-}
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/core/server_status.h b/src/core/server_status.h
deleted file mode 100644
index 29404c6684..0000000000
--- a/src/core/server_status.h
+++ /dev/null
@@ -1,201 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <time.h>
-#include <mutex>
-#include "src/core/model_config.pb.h"
-#include "src/core/model_repository_manager.h"
-#include "src/core/server_status.pb.h"
-#include "tensorflow/core/lib/core/status.h"
-#include "tensorflow_serving/core/servable_state_monitor.h"
-
-namespace nvidia { namespace inferenceserver {
-
-class InferenceServable;
-class ServerStatusManager;
-
-// Updates a server stat with duration measured by a C++ scope.
-class ServerStatTimerScoped {
- public:
-  enum Kind {
-    // Stat for status request. Duration from request to response.
-    STATUS,
-    // Stat for profile request. Duration from request to response.
-    PROFILE,
-    // Stat for health request. Duration from request to response.
-    HEALTH
-  };
-
-  // Start server timer for a given status 'kind'.
-  ServerStatTimerScoped(
-    const std::shared_ptr<ServerStatusManager>& status_manager, Kind kind)
-      : status_manager_(status_manager), kind_(kind), enabled_(true)
-  {
-    clock_gettime(CLOCK_MONOTONIC, &start_);
-  }
-
-  // Stop the timer and record the duration, unless reporting has been
-  // disabled.
-  ~ServerStatTimerScoped();
-
-  // Enable/Disable reporting for this timer. By default reporting is
-  // enabled and so the server status is updated when this object is
-  // destructed. Reporting may be enabled/disabled multiple times
-  // while the timer is running without affecting the duration.
-  void SetEnabled(bool enabled) { enabled_ = enabled; }
-
- private:
-  std::shared_ptr<ServerStatusManager> status_manager_;
-  const Kind kind_;
-  bool enabled_;
-  struct timespec start_;
-};
-
-// Stats collector for an inference request.
-class ModelInferStats {
- public:
-  // A timer that starts on construction and stops on destruction. Can
-  // also be stopped and started manually and multiple times. The
-  // measured time is the accumulation of start-stop durations.
-  class ScopedTimer {
-   public:
-    ScopedTimer();
-    ~ScopedTimer();
-
-    struct timespec Start();
-    void Stop();
-
-   private:
-    friend class ModelInferStats;
-    struct timespec start_;
-    uint64_t cummulative_duration_ns_;
-    uint64_t* duration_ptr_;
-  };
-
- public:
-  // Start model-specific timer for 'model_name' and a given status
-  // 'kind'.
-  ModelInferStats(
-    const std::shared_ptr<ServerStatusManager>& status_manager,
-    const std::string& model_name)
-      : status_manager_(status_manager), model_name_(model_name),
-        failed_(false), requested_model_version_(-1), model_servable_(nullptr),
-        batch_size_(0), gpu_device_(-1), execution_count_(0),
-        request_duration_ns_(0)
-  {
-  }
-
-  // Report collected statistics.
-  ~ModelInferStats();
-
-  // Mark inferencing request as failed / not-failed.
-  void SetFailed(bool failed) { failed_ = failed; }
-  // Set the model version explicitly requested for the inference, or
-  // -1 if latest version was requested.
-  void SetRequestedVersion(int v) { requested_model_version_ = v; }
-  // Set model servable for the inference stats.
-  void SetModelServable(const InferenceServable* s) { model_servable_ = s; }
-  // Set batch size for the inference stats.
-  void SetBatchSize(size_t bs) { batch_size_ = bs; }
-  // Set CUDA GPU device index where inference was performed.
-  void SetGPUDevice(int idx) { gpu_device_ = idx; }
-  // Set the number of model executions that were performed for this
-  // inference request. Can be zero if this request was dynamically
-  // batched with another request (in dynamic batch case only one of
-  // the batched requests will count the execution).
-  void SetModelExecutionCount(uint32_t count) { execution_count_ = count; }
-  // Get a ScopedTimer that measures entire inference request-response
-  // duration. The lifetime of 'timer' must not exceed the
-  // lifetime of 'this' object.
-  struct timespec StartRequestTimer(ScopedTimer* timer) const;
-
-  // Get a ScopedTimer that measures time spent in servable Run(),
-  // including queuing, scheduling and compute time. The lifetime of
-  // 'timer' must not exceed the lifetime of 'this' object.
-  struct timespec StartRunTimer(ScopedTimer* timer) const;
-
-  // Get a ScopedTimer that measures model compute duration including
-  // H2D, compute and D2H. The lifetime of 'timer' must not exceed the
-  // lifetime of 'this' object.
-  struct timespec StartComputeTimer(ScopedTimer* timer) const;
-
- private:
-  std::shared_ptr<ServerStatusManager> status_manager_;
-  const std::string model_name_;
-  bool failed_;
-  int requested_model_version_;
-  const InferenceServable* model_servable_;
-  size_t batch_size_;
-  int gpu_device_;
-  uint32_t execution_count_;
-
-  mutable uint64_t request_duration_ns_;
-  mutable uint64_t run_duration_ns_;
-  mutable uint64_t compute_duration_ns_;
-};
-
-// Manage access and updates to server status information.
-class ServerStatusManager {
- public:
-  // Create a manager for server status
-  explicit ServerStatusManager(const std::string& server_version);
-
-  // Initialize status for a model.
-  tensorflow::Status InitForModel(const std::string& model_name);
-
-  // Get the entire server status, including status for all models.
-  tensorflow::Status Get(
-    ServerStatus* server_status, const std::string& server_id,
-    ServerReadyState server_ready_state, uint64_t server_uptime_ns,
-    const tensorflow::serving::ServableStateMonitor* monitor) const;
-
-  // Get the server status and the status for a single model.
-  tensorflow::Status Get(
-    ServerStatus* server_status, const std::string& server_id,
-    ServerReadyState server_ready_state, uint64_t server_uptime_ns,
-    const std::string& model_name,
-    const tensorflow::serving::ServableStateMonitor* monitor) const;
-
-  // Add a duration to the Server Stat specified by 'kind'.
-  void UpdateServerStat(uint64_t duration, ServerStatTimerScoped::Kind kind);
-
-  // Add durations to Infer stats for a failed inference request.
-  void UpdateFailedInferStats(
-    const std::string& model_name, const uint32_t model_version,
-    size_t batch_size, uint64_t request_duration_ns);
-
-  // Add durations to Infer stats for a successful inference request.
-  void UpdateSuccessInferStats(
-    const std::string& model_name, const uint32_t model_version,
-    size_t batch_size, uint32_t execution_cnt, uint64_t request_duration_ns,
-    uint64_t run_duration_ns, uint64_t compute_duration_ns);
-
- private:
-  mutable std::mutex mu_;
-  ServerStatus server_status_;
-};
-}}  // namespace nvidia::inferenceserver
diff --git a/src/core/server_status.proto b/src/core/server_status.proto
deleted file mode 100644
index e17cabcd96..0000000000
--- a/src/core/server_status.proto
+++ /dev/null
@@ -1,336 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-syntax = "proto3";
-
-package nvidia.inferenceserver;
-
-//@@.. cpp:namespace:: nvidia::inferenceserver
-
-import "src/core/model_config.proto";
-
-//@@
-//@@.. cpp:var:: message StatDuration
-//@@
-//@@   Statistic collecting a duration metric.
-//@@
-message StatDuration
-{
-  //@@  .. cpp:var:: uint64 count
-  //@@
-  //@@     Cumulative number of times this metric occurred.
-  //@@
-  uint64 count = 1;
-
-  //@@  .. cpp:var:: uint64 total_time_ns
-  //@@
-  //@@     Total collected duration of this metric in nanoseconds.
-  //@@
-  uint64 total_time_ns = 2;
-}
-
-//@@
-//@@.. cpp:var:: message StatusRequestStats
-//@@
-//@@   Statistics collected for Status requests.
-//@@
-message StatusRequestStats
-{
-  //@@  .. cpp:var:: StatDuration success
-  //@@
-  //@@     Total time required to handle successful Status requests, not
-  //@@     including HTTP or gRPC endpoint termination time.
-  //@@
-  StatDuration success = 1;
-}
-
-//@@
-//@@.. cpp:var:: message ProfileRequestStats
-//@@
-//@@   Statistics collected for Profile requests.
-//@@
-message ProfileRequestStats
-{
-  //@@  .. cpp:var:: StatDuration success
-  //@@
-  //@@     Total time required to handle successful Profile requests, not
-  //@@     including HTTP or gRPC endpoint termination time.
-  //@@
-  StatDuration success = 1;
-}
-
-//@@
-//@@.. cpp:var:: message HealthRequestStats
-//@@
-//@@   Statistics collected for Health requests.
-//@@
-message HealthRequestStats
-{
-  //@@  .. cpp:var:: StatDuration success
-  //@@
-  //@@     Total time required to handle successful Health requests, not
-  //@@     including HTTP or gRPC endpoint termination time.
-  //@@
-  StatDuration success = 1;
-}
-
-//@@
-//@@.. cpp:var:: message InferRequestStats
-//@@
-//@@   Statistics collected for Infer requests.
-//@@
-message InferRequestStats
-{
-  //@@  .. cpp:var:: StatDuration success
-  //@@
-  //@@     Total time required to handle successful Infer requests, not
-  //@@     including HTTP or gRPC endpoint termination time.
-  //@@
-  StatDuration success = 1;
-
-  //@@  .. cpp:var:: StatDuration failed
-  //@@
-  //@@     Total time required to handle failed Infer requests, not
-  //@@     including HTTP or gRPC endpoint termination time.
-  //@@
-  StatDuration failed = 2;
-
-  //@@  .. cpp:var:: StatDuration compute
-  //@@
-  //@@     Time required to run inferencing for an inference request;
-  //@@     including time copying input tensors to GPU memory, time
-  //@@     executing the model, and time copying output tensors from GPU
-  //@@     memory.
-  //@@
-  StatDuration compute = 3;
-
-  //@@  .. cpp:var:: StatDuration queue
-  //@@
-  //@@     Time an inference request waits in scheduling queue for an
-  //@@     available model instance.
-  //@@
-  StatDuration queue = 4;
-}
-
-//@@
-//@@.. cpp:enum:: ModelReadyState
-//@@
-//@@   Readiness status for models.
-//@@
-enum ModelReadyState {
-  //@@  .. cpp:enumerator:: ModelReadyState::MODEL_UNKNOWN = 0
-  //@@
-  //@@     The model is in an unknown state. The model is not available for
-  //@@     inferencing.
-  //@@
-  MODEL_UNKNOWN = 0;
-
-  //@@  .. cpp:enumerator:: ModelReadyState::MODEL_READY = 1
-  //@@
-  //@@     The model is ready and available for inferencing.
-  //@@
-  MODEL_READY = 1;
-
-  //@@  .. cpp:enumerator:: ModelReadyState::MODEL_UNAVAILABLE = 2
-  //@@
-  //@@     The model is unavailable, indicating that the model failed to
-  //@@     load or has been implicitly or explicitly unloaded. The model is
-  //@@     not available for inferencing.
-  //@@
-  MODEL_UNAVAILABLE = 2;
-
-  //@@  .. cpp:enumerator:: ModelReadyState::MODEL_LOADING = 3
-  //@@
-  //@@     The model is being loaded by the inference server. The model is
-  //@@     not available for inferencing.
-  //@@
-  MODEL_LOADING = 3;
-
-  //@@  .. cpp:enumerator:: ModelReadyState::MODEL_UNLOADING = 4
-  //@@
-  //@@     The model is being unloaded by the inference server. The model is
-  //@@     not available for inferencing.
-  //@@
-  MODEL_UNLOADING = 4;
-}
-
-//@@
-//@@.. cpp:var:: message ModelVersionStatus
-//@@
-//@@   Status for a version of a model.
-//@@
-message ModelVersionStatus
-{
-  //@@  .. cpp:var:: ModelReadyState ready_statue
-  //@@
-  //@@     Current readiness state for the model.
-  //@@
-  ModelReadyState ready_state = 1;
-
-  //@@  .. cpp:var:: map<uint32, InferRequestStats> infer_stats
-  //@@
-  //@@     Inference statistics for the model, as a map from batch size
-  //@@     to the statistics. A batch size will not occur in the map
-  //@@     unless there has been at least one inference request of
-  //@@     that batch size.
-  //@@
-  map<uint32, InferRequestStats> infer_stats = 2;
-
-  //@@  .. cpp:var:: uint64 model_execution_count
-  //@@
-  //@@     Cumulative number of model executions performed for the
-  //@@     model. A single model execution performs inferencing for
-  //@@     the entire request batch and can perform inferencing for multiple
-  //@@     requests if dynamic batching is enabled.
-  //@@
-  uint64 model_execution_count = 3;
-
-  //@@  .. cpp:var:: uint64 model_inference_count
-  //@@
-  //@@     Cumulative number of model inferences performed for the
-  //@@     model. Each inference in a batched request is counted as
-  //@@     an individual inference.
-  //@@
-  uint64 model_inference_count = 4;
-}
-
-//@@
-//@@.. cpp:var:: message ModelStatus
-//@@
-//@@   Status for a model.
-//@@
-message ModelStatus
-{
-  //@@  .. cpp:var:: ModelConfig config
-  //@@
-  //@@     The configuration for the model.
-  //@@
-  ModelConfig config = 1;
-
-  //@@  .. cpp:var:: map<uint32, ModelVersionStatus> version_status
-  //@@
-  //@@     Duration statistics for each version of the model, as a map
-  //@@     from version to the status. A version will not occur in the map
-  //@@     unless there has been at least one inference request of
-  //@@     that model version.
-  //@@
-  map<uint32, ModelVersionStatus> version_status = 2;
-}
-
-//@@
-//@@.. cpp:enum:: ServerReadyState
-//@@
-//@@   Readiness status for the inference server.
-//@@
-enum ServerReadyState {
-  //@@  .. cpp:enumerator:: ServerReadyState::SERVER_INVALID = 0
-  //@@
-  //@@     The server is in an invalid state and will likely not
-  //@@     response correctly to any requests.
-  //@@
-  SERVER_INVALID = 0;
-
-  //@@  .. cpp:enumerator:: ServerReadyState::SERVER_INITIALIZING = 1
-  //@@
-  //@@     The server is initializing.
-  //@@
-  SERVER_INITIALIZING = 1;
-
-  //@@  .. cpp:enumerator:: ServerReadyState::SERVER_READY = 2
-  //@@
-  //@@     The server is ready and accepting requests.
-  //@@
-  SERVER_READY = 2;
-
-  //@@  .. cpp:enumerator:: ServerReadyState::SERVER_EXITING = 3
-  //@@
-  //@@     The server is exiting and will not respond to requests.
-  //@@
-  SERVER_EXITING = 3;
-
-  //@@  .. cpp:enumerator:: ServerReadyState::SERVER_FAILED_TO_INITIALIZE = 10
-  //@@
-  //@@     The server did not initialize correctly. Most requests will fail.
-  //@@
-  SERVER_FAILED_TO_INITIALIZE = 10;
-}
-
-//@@
-//@@.. cpp:var:: message ServerStatus
-//@@
-//@@   Status for the inference server.
-//@@
-message ServerStatus
-{
-  //@@  .. cpp:var:: string id
-  //@@
-  //@@     The server's ID.
-  //@@
-  string id = 1;
-
-  //@@  .. cpp:var:: string version
-  //@@
-  //@@     The server's version.
-  //@@
-  string version = 2;
-
-  //@@  .. cpp:var:: ServerReadyState ready_state
-  //@@
-  //@@     Current readiness state for the server.
-  //@@
-  ServerReadyState ready_state = 7;
-
-  //@@  .. cpp:var:: uint64 uptime_ns
-  //@@
-  //@@     Server uptime in nanoseconds.
-  //@@
-  uint64 uptime_ns = 3;
-
-  //@@  .. cpp:var:: map<string, ModelStatus> model_status
-  //@@
-  //@@     Status for each model, as a map from model name to the
-  //@@     status.
-  //@@
-  map<string, ModelStatus> model_status = 4;
-
-  //@@  .. cpp:var:: StatusRequestStats status_stats
-  //@@
-  //@@     Statistics for Status requests.
-  //@@
-  StatusRequestStats status_stats = 5;
-
-  //@@  .. cpp:var:: ProfileRequestStats profile_stats
-  //@@
-  //@@     Statistics for Profile requests.
-  //@@
-  ProfileRequestStats profile_stats = 6;
-
-  //@@  .. cpp:var:: HealthRequestStats health_stats
-  //@@
-  //@@     Statistics for Health requests.
-  //@@
-  HealthRequestStats health_stats = 8;
-}
diff --git a/src/core/utils.cc b/src/core/utils.cc
deleted file mode 100644
index e4c4b07a57..0000000000
--- a/src/core/utils.cc
+++ /dev/null
@@ -1,408 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "src/core/utils.h"
-
-#include <cuda_runtime_api.h>
-#include "absl/strings/numbers.h"
-#include "src/core/autofill.h"
-#include "src/core/constants.h"
-#include "src/core/logging.h"
-#include "tensorflow/core/lib/io/path.h"
-#include "tensorflow/core/platform/env.h"
-
-namespace nvidia { namespace inferenceserver {
-
-tensorflow::Status
-GetCudaPriority(
-  ModelOptimizationPolicy::ModelPriority priority, int* cuda_stream_priority)
-{
-  // Default priority is 0
-  *cuda_stream_priority = 0;
-
-  int min, max;
-  cudaError_t cuerr = cudaDeviceGetStreamPriorityRange(&min, &max);
-  if ((cuerr != cudaErrorNoDevice) && (cuerr != cudaSuccess)) {
-    return tensorflow::errors::Internal(
-      "unable to get allowed CUDA stream priorities: ",
-      cudaGetErrorString(cuerr));
-  }
-
-  switch (priority) {
-    case ModelOptimizationPolicy::PRIORITY_MAX:
-      *cuda_stream_priority = max;
-      break;
-    case ModelOptimizationPolicy::PRIORITY_MIN:
-      *cuda_stream_priority = min;
-      break;
-    default:
-      *cuda_stream_priority = 0;
-      break;
-  }
-
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-GetModelVersionFromPath(const tensorflow::StringPiece& path, uint32_t* version)
-{
-  auto version_dir = tensorflow::io::Basename(path);
-
-  // Determine the version from the last segment of 'path'
-  if (!absl::SimpleAtoi(version_dir, version)) {
-    return tensorflow::errors::Internal(
-      "unable to determine model version from ", path);
-  }
-
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-GetNormalizedModelConfig(
-  const tensorflow::StringPiece& path, const bool autofill, ModelConfig* config)
-{
-  // If 'autofill' then the configuration file can be empty.
-  const auto config_path = tensorflow::io::JoinPath(path, kModelConfigPbTxt);
-  if (autofill && !tensorflow::Env::Default()->FileExists(config_path).ok()) {
-    config->Clear();
-  } else {
-    TF_RETURN_IF_ERROR(
-      ReadTextProto(tensorflow::Env::Default(), config_path, config));
-  }
-
-  // Autofill if requested...
-  if (autofill) {
-    const std::string model_name(tensorflow::io::Basename(path));
-    std::unique_ptr<AutoFill> af;
-    TF_RETURN_IF_ERROR(
-      AutoFill::Create(model_name, std::string(path), *config, &af));
-    TF_RETURN_IF_ERROR(af->Fix(config));
-
-    LOG_VERBOSE(1) << "autofilled config: " << config->DebugString();
-  }
-
-  if (config->platform().empty()) {
-    return tensorflow::errors::InvalidArgument(
-      "must specify platform for model '", config->name(), "'");
-  }
-
-  // If 'default_model_filename' is not specified set it appropriately
-  // based upon 'platform'.
-  if (config->default_model_filename().empty()) {
-    if (config->platform() == kTensorFlowGraphDefPlatform) {
-      config->set_default_model_filename(kTensorFlowGraphDefFilename);
-    } else if (config->platform() == kTensorFlowSavedModelPlatform) {
-      config->set_default_model_filename(kTensorFlowSavedModelFilename);
-    } else if (config->platform() == kTensorRTPlanPlatform) {
-      config->set_default_model_filename(kTensorRTPlanFilename);
-    } else if (config->platform() == kCaffe2NetDefPlatform) {
-      config->set_default_model_filename(kCaffe2NetDefFilename);
-    } else {
-      return tensorflow::errors::Internal(
-        "unexpected platform type ", config->platform(), " for ",
-        config->name());
-    }
-  }
-
-  // If version_policy is not specified, default to Latest 1 version.
-  if (!config->has_version_policy()) {
-    ModelVersionPolicy::Latest latest;
-    latest.set_num_versions(1);
-    config->mutable_version_policy()->mutable_latest()->CopyFrom(latest);
-  }
-
-  // If dynamic batching is specified...
-  if (config->has_dynamic_batching()) {
-    // If preferred batch size is not specified choose
-    // automatically. For now we just choose 4, 8 as those are
-    // generally good values for GPUs.
-    if (config->dynamic_batching().preferred_batch_size().size() == 0) {
-      if (config->max_batch_size() >= 4) {
-        config->mutable_dynamic_batching()->mutable_preferred_batch_size()->Add(
-          4);
-      }
-      if (config->max_batch_size() >= 8) {
-        config->mutable_dynamic_batching()->mutable_preferred_batch_size()->Add(
-          8);
-      }
-    }
-  }
-
-  // Make sure there is at least one instance_group.
-  if (config->instance_group().size() == 0) {
-    ModelInstanceGroup* group = config->add_instance_group();
-    group->set_name(config->name());
-  }
-
-  int device_cnt;
-  cudaError_t cuerr = cudaGetDeviceCount(&device_cnt);
-  if (cuerr == cudaErrorNoDevice) {
-    device_cnt = 0;
-  } else if (cuerr != cudaSuccess) {
-    return tensorflow::errors::Internal(
-      "unable to get number of CUDA devices for ", config->name(), ": ",
-      cudaGetErrorString(cuerr));
-  }
-
-  // Assign default name, kind and count to each instance group that
-  // doesn't give those values explicitly. For KIND_GPU, set GPUs to
-  // all available if not specified explicitly.
-  size_t cnt = 0;
-  for (auto& group : *config->mutable_instance_group()) {
-    // Name
-    if (group.name().empty()) {
-      group.set_name(config->name() + "_" + std::to_string(cnt));
-    }
-    cnt++;
-
-    // For KIND_AUTO... if there are no GPUs or if any of the listed
-    // 'gpu's are not present, then use KIND_CPU.
-    if (group.kind() == ModelInstanceGroup::KIND_AUTO) {
-      if (device_cnt == 0) {
-        group.set_kind(ModelInstanceGroup::KIND_CPU);
-      } else {
-        for (const int32_t gid : group.gpus()) {
-          if ((gid < 0) || (gid >= device_cnt)) {
-            group.set_kind(ModelInstanceGroup::KIND_CPU);
-            break;
-          }
-        }
-      }
-
-      if (group.kind() == ModelInstanceGroup::KIND_AUTO) {
-        group.set_kind(ModelInstanceGroup::KIND_GPU);
-      }
-    }
-
-    // Count
-    if (group.count() < 1) {
-      group.set_count(1);
-    }
-
-    // GPUs
-    if (
-      (group.kind() == ModelInstanceGroup::KIND_GPU) &&
-      (group.gpus().size() == 0)) {
-      for (int d = 0; d < device_cnt; d++) {
-        group.add_gpus(d);
-      }
-    }
-  }
-
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-ValidateModelConfig(
-  const ModelConfig& config, const std::string& expected_platform)
-{
-  if (config.name().empty()) {
-    return tensorflow::errors::InvalidArgument(
-      "model configuration must specify 'name'");
-  }
-
-  if (config.platform().empty()) {
-    return tensorflow::errors::InvalidArgument(
-      "must specify 'platform' for ", config.name());
-  }
-
-  if (!expected_platform.empty() && (config.platform() != expected_platform)) {
-    return tensorflow::errors::NotFound(
-      "expected model of type ", expected_platform, " for ", config.name());
-  }
-
-  if (!config.has_version_policy()) {
-    return tensorflow::errors::InvalidArgument(
-      "must specify 'version policy' for ", config.name());
-  }
-
-  if (config.instance_group().size() == 0) {
-    return tensorflow::errors::InvalidArgument(
-      "must specify one or more 'instance group's for ", config.name());
-  }
-
-  // If dynamic batching is specified make sure the preferred batch
-  // sizes are positive and don't exceed maximum batch size. Make sure
-  // the max delay is non-negative.
-  if (config.has_dynamic_batching()) {
-    for (const auto size : config.dynamic_batching().preferred_batch_size()) {
-      if (size <= 0) {
-        return tensorflow::errors::InvalidArgument(
-          "dynamic batching preferred size must be positive for ",
-          config.name());
-      }
-      if (size > config.max_batch_size()) {
-        return tensorflow::errors::InvalidArgument(
-          "dynamic batching preferred size must be <= max batch size for ",
-          config.name());
-      }
-    }
-
-    if (config.dynamic_batching().max_queue_delay_microseconds() < 0) {
-      return tensorflow::errors::InvalidArgument(
-        "dynamic batching maximum queue delay must be non-negative for ",
-        config.name());
-    }
-  }
-
-  // Make sure KIND_GPU instance group specifies at least one GPU and
-  // doesn't specify a non-existent GPU. Make sure non-KIND_GPU does
-  // not specify any GPUs.
-  int dcnt;
-  cudaError_t cuerr = cudaGetDeviceCount(&dcnt);
-  if (cuerr == cudaErrorNoDevice) {
-    dcnt = 0;
-  } else if (cuerr != cudaSuccess) {
-    return tensorflow::errors::Internal(
-      "failed to get device count for validation of model ", config.name(),
-      ": ", cudaGetErrorString(cuerr));
-  }
-
-  for (const auto& group : config.instance_group()) {
-    if (group.kind() == ModelInstanceGroup::KIND_GPU) {
-      if (group.gpus().size() == 0) {
-        return tensorflow::errors::InvalidArgument(
-          "instance group ", group.name(), " of model ", config.name(),
-          " has kind KIND_GPU but specifies no GPUs");
-      }
-
-      for (const int32_t gid : group.gpus()) {
-        if ((gid < 0) || (gid >= dcnt)) {
-          return tensorflow::errors::InvalidArgument(
-            "instance group ", group.name(), " of model ", config.name(),
-            " specifies invalid GPU id ", gid, ", valid GPUs are 0 - ",
-            (dcnt - 1));
-        }
-      }
-    } else if (group.kind() == ModelInstanceGroup::KIND_CPU) {
-      if (group.gpus().size() > 0) {
-        return tensorflow::errors::InvalidArgument(
-          "instance group ", group.name(), " of model ", config.name(),
-          " has kind KIND_CPU but specifies one or more GPUs");
-      }
-    } else {
-      return tensorflow::errors::Internal(
-        "instance group ", group.name(), " of model ", config.name(),
-        " has unexpected kind KIND_AUTO");
-    }
-  }
-
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-ValidateModelInput(const ModelInput& io)
-{
-  std::set<std::string> allowed;
-  return ValidateModelInput(io, allowed);
-}
-
-tensorflow::Status
-ValidateModelInput(const ModelInput& io, const std::set<std::string>& allowed)
-{
-  if (io.name().empty()) {
-    return tensorflow::errors::InvalidArgument(
-      "model input must specify 'name'");
-  }
-
-  if (io.data_type() == DataType::TYPE_INVALID) {
-    return tensorflow::errors::InvalidArgument(
-      "model input must specify 'data_type'");
-  }
-
-  if (io.dims_size() == 0) {
-    return tensorflow::errors::InvalidArgument(
-      "model input must specify 'dims'");
-  }
-
-  if (
-    ((io.format() == ModelInput::FORMAT_NHWC) ||
-     (io.format() == ModelInput::FORMAT_NCHW)) &&
-    (io.dims_size() != 3)) {
-    return tensorflow::errors::InvalidArgument(
-      "model input NHWC/NCHW require 3 dims");
-  }
-
-  if (!allowed.empty() && (allowed.find(io.name()) == allowed.end())) {
-    std::string astr;
-    for (const auto& a : allowed) {
-      if (!astr.empty()) {
-        astr.append(", ");
-      }
-      astr.append(a);
-    }
-
-    return tensorflow::errors::InvalidArgument(
-      "unexpected inference input '", io.name(),
-      "', allowed inputs are: ", astr);
-  }
-
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-ValidateModelOutput(const ModelOutput& io)
-{
-  std::set<std::string> allowed;
-  return ValidateModelOutput(io, allowed);
-}
-
-tensorflow::Status
-ValidateModelOutput(const ModelOutput& io, const std::set<std::string>& allowed)
-{
-  if (io.name().empty()) {
-    return tensorflow::errors::InvalidArgument(
-      "model output must specify 'name'");
-  }
-
-  if (io.data_type() == DataType::TYPE_INVALID) {
-    return tensorflow::errors::InvalidArgument(
-      "model output must specify 'data_type'");
-  }
-
-  if (io.dims_size() == 0) {
-    return tensorflow::errors::InvalidArgument(
-      "model output must specify 'dims'");
-  }
-
-  if (!allowed.empty() && (allowed.find(io.name()) == allowed.end())) {
-    std::string astr;
-    for (const auto& a : allowed) {
-      if (!astr.empty()) {
-        astr.append(", ");
-      }
-      astr.append(a);
-    }
-
-    return tensorflow::errors::InvalidArgument(
-      "unexpected inference output '", io.name(),
-      "', allowed outputs are: ", astr);
-  }
-
-  return tensorflow::Status::OK();
-}
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/core/utils.h b/src/core/utils.h
deleted file mode 100644
index 164d0a27d7..0000000000
--- a/src/core/utils.h
+++ /dev/null
@@ -1,102 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "src/core/model_config.pb.h"
-#include "tensorflow/core/lib/core/errors.h"
-
-namespace nvidia { namespace inferenceserver {
-
-/// Get the CUDA stream priority for a given ModelPriority
-/// \param priority The ModelOptimizationPolicy::ModelPriority priority.
-/// \param cuda_stream_priority Returns the CUDA stream priority.
-/// \return The error status.
-tensorflow::Status GetCudaPriority(
-  ModelOptimizationPolicy::ModelPriority priority, int* cuda_stream_priority);
-
-/// Get version of a model from the path containing the model
-/// definition file.
-/// \param path The path to the model definition file.
-/// \param version Returns the version.
-/// \return The error status.
-tensorflow::Status GetModelVersionFromPath(
-  const tensorflow::StringPiece& path, uint32_t* version);
-
-/// Read a ModelConfig and normalize it as expected by model servables.
-/// \param path The full-path to the directory containing the
-/// model configuration.
-/// \param autofill If true attempt to determine any missing required
-/// configuration from the model definition.
-/// \param config Returns the normalized model configuration.
-/// \return The error status.
-tensorflow::Status GetNormalizedModelConfig(
-  const tensorflow::StringPiece& path, const bool autofill,
-  ModelConfig* config);
-
-/// Validate that a model is specified correctly (excluding inputs and
-/// outputs which are validated via ValidateModelInput() and
-/// ValidateModelOutput()).
-/// \param config The model configuration to validate.
-/// \param expected_platform If non-empty the model will be checked
-/// to make sure its platform matches this value.
-/// \return The error status. A non-OK status indicates the configuration
-/// is not valid.
-tensorflow::Status ValidateModelConfig(
-  const ModelConfig& config, const std::string& expected_platform);
-
-/// Validate that input is specified correctly in a model
-/// configuration.
-/// \param io The model input.
-/// \return The error status. A non-OK status indicates the input
-/// is not valid.
-tensorflow::Status ValidateModelInput(const ModelInput& io);
-
-/// Validate that an input is specified correctly in a model
-/// configuration and matches one of the allowed input names.
-/// \param io The model input.
-/// \param allowed The set of allowed input names.
-/// \return The error status. A non-OK status indicates the input
-/// is not valid.
-tensorflow::Status ValidateModelInput(
-  const ModelInput& io, const std::set<std::string>& allowed);
-
-/// Validate that an output is specified correctly in a model
-/// configuration.
-/// \param io The model output.
-/// \return The error status. A non-OK status indicates the output
-/// is not valid.
-tensorflow::Status ValidateModelOutput(const ModelOutput& io);
-
-/// Validate that an output is specified correctly in a model
-/// configuration and matches one of the allowed output names.
-/// \param io The model output.
-/// \param allowed The set of allowed output names.
-/// \return The error status. A non-OK status indicates the output
-/// is not valid.
-tensorflow::Status ValidateModelOutput(
-  const ModelOutput& io, const std::set<std::string>& allowed);
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/data_compressor.h b/src/data_compressor.h
new file mode 100644
index 0000000000..a63fb43774
--- /dev/null
+++ b/src/data_compressor.h
@@ -0,0 +1,310 @@
+// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#pragma once
+
+#include <event2/buffer.h>
+#include <zlib.h>
+
+#include <cassert>
+#include <cstring>
+#include <iostream>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "common.h"
+#include "triton/core/tritonserver.h"
+
+namespace triton { namespace server {
+
+//
+// DataCompressor
+//
+class DataCompressor {
+ public:
+  enum class Type { UNKNOWN, IDENTITY, GZIP, DEFLATE };
+
+  // Specialization where the source and destination buffer are stored as
+  // evbuffer
+  static TRITONSERVER_Error* CompressData(
+      const Type type, evbuffer* source, evbuffer* compressed_data)
+  {
+    size_t expected_compressed_size = evbuffer_get_length(source);
+    // nothing to be compressed
+    if (expected_compressed_size == 0) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG, "nothing to be compressed");
+    }
+
+    z_stream stream;
+    stream.zalloc = Z_NULL;
+    stream.zfree = Z_NULL;
+    stream.opaque = Z_NULL;
+    switch (type) {
+      case Type::UNKNOWN:
+      case Type::IDENTITY: {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG, "nothing to be compressed");
+      }
+      case Type::GZIP:
+        if (deflateInit2(
+                &stream, Z_DEFAULT_COMPRESSION /* level */,
+                Z_DEFLATED /* method */, 15 | 16 /* windowBits */,
+                8 /* memLevel */, Z_DEFAULT_STRATEGY /* strategy */) != Z_OK) {
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INTERNAL,
+              "failed to initialize state for gzip data compression");
+        }
+        break;
+      case Type::DEFLATE: {
+        if (deflateInit(&stream, Z_DEFAULT_COMPRESSION /* level */) != Z_OK) {
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INTERNAL,
+              "failed to initialize state for deflate data compression");
+        }
+        break;
+      }
+    }
+    // ensure the internal state are cleaned up on function return
+    std::unique_ptr<z_stream, decltype(&deflateEnd)> managed_stream(
+        &stream, deflateEnd);
+
+    // Get the addr and size of each chunk of memory in 'source'
+    struct evbuffer_iovec* buffer_array = nullptr;
+    int buffer_count = evbuffer_peek(source, -1, NULL, NULL, 0);
+    if (buffer_count > 0) {
+      buffer_array = static_cast<struct evbuffer_iovec*>(
+          alloca(sizeof(struct evbuffer_iovec) * buffer_count));
+      if (evbuffer_peek(source, -1, NULL, buffer_array, buffer_count) !=
+          buffer_count) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INTERNAL,
+            "unexpected error getting buffers to be compressed");
+      }
+    }
+    // Reserve the same size as source for compressed data, it is less likely
+    // that a negative compression happens.
+    struct evbuffer_iovec current_reserved_space;
+    RETURN_MSG_IF_ERR(
+        AllocEVBuffer(
+            expected_compressed_size, compressed_data, &current_reserved_space),
+        "unexpected error allocating output buffer for compression: ");
+    stream.next_out =
+        reinterpret_cast<unsigned char*>(current_reserved_space.iov_base);
+    stream.avail_out = expected_compressed_size;
+
+    // Compress until end of 'source'
+    for (int idx = 0; idx < buffer_count; ++idx) {
+      stream.next_in =
+          reinterpret_cast<unsigned char*>(buffer_array[idx].iov_base);
+      stream.avail_in = buffer_array[idx].iov_len;
+
+      // run deflate() on input until source has been read in
+      do {
+        // Need additional buffer
+        if (stream.avail_out == 0) {
+          RETURN_MSG_IF_ERR(
+              CommitEVBuffer(
+                  compressed_data, &current_reserved_space,
+                  expected_compressed_size),
+              "unexpected error committing output buffer for compression: ");
+          RETURN_MSG_IF_ERR(
+              AllocEVBuffer(
+                  expected_compressed_size, compressed_data,
+                  &current_reserved_space),
+              "unexpected error allocating output buffer for compression: ");
+          stream.next_out =
+              reinterpret_cast<unsigned char*>(current_reserved_space.iov_base);
+          stream.avail_out = expected_compressed_size;
+        }
+        auto flush = ((idx + 1) != buffer_count) ? Z_NO_FLUSH : Z_FINISH;
+        auto ret = deflate(&stream, flush);
+        if (ret == Z_STREAM_ERROR) {
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INTERNAL,
+              "encountered inconsistent stream state during compression");
+        }
+      } while (stream.avail_out == 0);
+    }
+    // Make sure the last buffer is committed
+    if (current_reserved_space.iov_base != nullptr) {
+      RETURN_MSG_IF_ERR(
+          CommitEVBuffer(
+              compressed_data, &current_reserved_space,
+              expected_compressed_size - stream.avail_out),
+          "unexpected error committing output buffer for compression: ");
+    }
+    return nullptr;  // success
+  }
+
+  static TRITONSERVER_Error* DecompressData(
+      const Type type, evbuffer* source, evbuffer* decompressed_data)
+  {
+    size_t source_byte_size = evbuffer_get_length(source);
+    // nothing to be decompressed
+    if (evbuffer_get_length(source) == 0) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG, "nothing to be decompressed");
+    }
+    // Set reasonable size for each output buffer to be allocated
+    size_t output_buffer_size = (source_byte_size > (1 << 20 /* 1MB */))
+                                    ? source_byte_size
+                                    : (1 << 20 /* 1MB */);
+
+    switch (type) {
+      case Type::UNKNOWN:
+      case Type::IDENTITY: {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG, "nothing to be decompressed");
+      }
+      case Type::GZIP:
+      case Type::DEFLATE:
+        // zlib can automatically detect compression type
+        {
+          z_stream stream;
+          stream.zalloc = Z_NULL;
+          stream.zfree = Z_NULL;
+          stream.opaque = Z_NULL;
+          stream.avail_in = 0;
+          stream.next_in = Z_NULL;
+
+          if (inflateInit2(&stream, 15 | 32) != Z_OK) {
+            return TRITONSERVER_ErrorNew(
+                TRITONSERVER_ERROR_INTERNAL,
+                "failed to initialize state for data decompression");
+          }
+          // ensure the internal state are cleaned up on function return
+          std::unique_ptr<z_stream, decltype(&inflateEnd)> managed_stream(
+              &stream, inflateEnd);
+
+          // Get the addr and size of each chunk of memory in 'source'
+          struct evbuffer_iovec* buffer_array = nullptr;
+          int buffer_count = evbuffer_peek(source, -1, NULL, NULL, 0);
+          if (buffer_count > 0) {
+            buffer_array = static_cast<struct evbuffer_iovec*>(
+                alloca(sizeof(struct evbuffer_iovec) * buffer_count));
+            if (evbuffer_peek(source, -1, NULL, buffer_array, buffer_count) !=
+                buffer_count) {
+              return TRITONSERVER_ErrorNew(
+                  TRITONSERVER_ERROR_INTERNAL,
+                  "unexpected error getting buffers to be decompressed");
+            }
+          }
+          // Reserve the same size as source for compressed data, it is less
+          // likely that a negative compression happens.
+          struct evbuffer_iovec current_reserved_space;
+          RETURN_MSG_IF_ERR(
+              AllocEVBuffer(
+                  output_buffer_size, decompressed_data,
+                  &current_reserved_space),
+              "unexpected error allocating output buffer for decompression: ");
+          stream.next_out =
+              reinterpret_cast<unsigned char*>(current_reserved_space.iov_base);
+          stream.avail_out = output_buffer_size;
+
+          // Compress until end of 'source'
+          for (int idx = 0; idx < buffer_count; ++idx) {
+            stream.next_in =
+                reinterpret_cast<unsigned char*>(buffer_array[idx].iov_base);
+            stream.avail_in = buffer_array[idx].iov_len;
+
+            // run inflate() on input until source has been read in
+            do {
+              // Need additional buffer
+              if (stream.avail_out == 0) {
+                RETURN_MSG_IF_ERR(
+                    CommitEVBuffer(
+                        decompressed_data, &current_reserved_space,
+                        output_buffer_size),
+                    "unexpected error committing output buffer for "
+                    "decompression: ");
+                RETURN_MSG_IF_ERR(
+                    AllocEVBuffer(
+                        output_buffer_size, decompressed_data,
+                        &current_reserved_space),
+                    "unexpected error allocating output buffer for "
+                    "decompression: ");
+                stream.next_out = reinterpret_cast<unsigned char*>(
+                    current_reserved_space.iov_base);
+                stream.avail_out = output_buffer_size;
+              }
+              auto ret = inflate(&stream, Z_NO_FLUSH);
+              if (ret == Z_STREAM_ERROR) {
+                return TRITONSERVER_ErrorNew(
+                    TRITONSERVER_ERROR_INTERNAL,
+                    "encountered inconsistent stream state during "
+                    "decompression");
+              }
+            } while (stream.avail_out == 0);
+          }
+          // Make sure the last buffer is committed
+          if (current_reserved_space.iov_base != nullptr) {
+            RETURN_MSG_IF_ERR(
+                CommitEVBuffer(
+                    decompressed_data, &current_reserved_space,
+                    output_buffer_size - stream.avail_out),
+                "unexpected error committing output buffer for compression: ");
+          }
+          break;
+        }
+    }
+    return nullptr;  // success
+  }
+
+ private:
+  static TRITONSERVER_Error* AllocEVBuffer(
+      const size_t byte_size, evbuffer* evb,
+      struct evbuffer_iovec* current_reserved_space)
+  {
+    // Reserve requested space in evbuffer...
+    if ((evbuffer_reserve_space(evb, byte_size, current_reserved_space, 1) !=
+         1) ||
+        (current_reserved_space->iov_len < byte_size)) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INTERNAL,
+          std::string(
+              "failed to reserve " + std::to_string(byte_size) +
+              " bytes in evbuffer")
+              .c_str());
+    }
+    return nullptr;  // success
+  }
+
+  static TRITONSERVER_Error* CommitEVBuffer(
+      evbuffer* evb, struct evbuffer_iovec* current_reserved_space,
+      const size_t filled_byte_size)
+  {
+    current_reserved_space->iov_len = filled_byte_size;
+    if (evbuffer_commit_space(evb, current_reserved_space, 1) != 0) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INTERNAL, "failed to commit allocated evbuffer");
+    }
+    current_reserved_space->iov_base = nullptr;
+    return nullptr;  // success
+  }
+};
+
+}}  // namespace triton::server
diff --git a/src/grpc/CMakeLists.txt b/src/grpc/CMakeLists.txt
new file mode 100644
index 0000000000..8b200a591e
--- /dev/null
+++ b/src/grpc/CMakeLists.txt
@@ -0,0 +1,144 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+add_library(
+  grpc-endpoint-library EXCLUDE_FROM_ALL
+  grpc_server.cc
+  grpc_server.h
+  grpc_handler.h
+  grpc_utils.cc
+  grpc_utils.h
+  infer_handler.cc
+  infer_handler.h
+  stream_infer_handler.h
+  stream_infer_handler.cc
+)
+
+target_compile_features(grpc-endpoint-library PRIVATE cxx_std_${TRITON_MIN_CXX_STANDARD})
+if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
+  target_compile_options(
+    grpc-endpoint-library
+    PRIVATE
+      /W1 /D_WIN32_WINNT=0x0A00 /EHsc /Zc:preprocessor
+  )
+else()
+  target_compile_options(
+    grpc-endpoint-library
+    PRIVATE
+      -Wall -Wextra -Wno-unused-parameter -Wno-deprecated-declarations -Werror
+  )
+endif()
+
+set_target_properties(
+  grpc-endpoint-library
+  PROPERTIES
+    POSITION_INDEPENDENT_CODE ON
+)
+
+target_link_libraries(
+  grpc-endpoint-library
+  PUBLIC
+    proto-library                 # from repo-common
+    triton-common-logging         # from repo-common
+    triton-common-table-printer   # from repo-common
+    triton-common-json            # from repo-common
+    grpc-health-library           # from repo-common
+    grpc-service-library          # from repo-common
+    triton-core-serverapi         # from repo-core
+    triton-core-serverstub        # from repo-core
+    gRPC::grpc++
+    gRPC::grpc
+    protobuf::libprotobuf
+)
+
+target_include_directories(
+  grpc-endpoint-library
+  PRIVATE $<TARGET_PROPERTY:gRPC::grpc,INTERFACE_INCLUDE_DIRECTORIES>
+)
+
+# FIXME when Triton support of OpenTelemetry is available on Windows
+# add ${OPENTELEMETRY_CPP_INCLUDE_DIRS} to above target_include_directories
+# JIRA DLIS-4786
+if (NOT WIN32 AND ${TRITON_ENABLE_TRACING})
+  target_include_directories(
+    grpc-endpoint-library
+    PRIVATE ${OPENTELEMETRY_CPP_INCLUDE_DIRS}
+  )
+endif()
+
+target_compile_definitions(
+  grpc-endpoint-library
+  PRIVATE TRITON_ENABLE_GRPC=1
+)
+
+if(${TRITON_ENABLE_GPU})
+  target_compile_definitions(
+    grpc-endpoint-library
+    PRIVATE TRITON_ENABLE_GPU=1
+    PRIVATE TRITON_MIN_COMPUTE_CAPABILITY=${TRITON_MIN_COMPUTE_CAPABILITY}
+  )
+
+  target_link_libraries(
+    grpc-endpoint-library
+    PUBLIC
+      CUDA::cudart
+  )
+endif() # TRITON_ENABLE_GPU
+
+if(${TRITON_ENABLE_METRICS})
+  target_compile_definitions(
+    grpc-endpoint-library
+    PRIVATE TRITON_ENABLE_METRICS=1
+  )
+endif() # TRITON_ENABLE_METRICS
+
+if(${TRITON_ENABLE_LOGGING})
+  target_compile_definitions(
+    grpc-endpoint-library
+    PRIVATE TRITON_ENABLE_LOGGING=1
+  )
+endif() # TRITON_ENABLE_LOGGING
+
+if(${TRITON_ENABLE_STATS})
+  target_compile_definitions(
+    grpc-endpoint-library
+    PRIVATE TRITON_ENABLE_STATS=1
+  )
+endif() # TRITON_ENABLE_STATS
+
+if(${TRITON_ENABLE_TRACING})
+  target_compile_definitions(
+    grpc-endpoint-library
+    PRIVATE TRITON_ENABLE_TRACING=1
+  )
+endif() # TRITON_ENABLE_TRACING
+
+if(${TRITON_ENABLE_NVTX})
+  target_compile_definitions(
+    grpc-endpoint-library
+    PRIVATE TRITON_ENABLE_NVTX=1
+  )
+endif() # TRITON_ENABLE_NVTX
diff --git a/src/grpc/grpc_handler.h b/src/grpc/grpc_handler.h
new file mode 100644
index 0000000000..4f1bcdfac0
--- /dev/null
+++ b/src/grpc/grpc_handler.h
@@ -0,0 +1,46 @@
+// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#pragma once
+
+#include <string>
+
+namespace triton { namespace server { namespace grpc {
+class HandlerBase {
+ public:
+  virtual ~HandlerBase() = default;
+  virtual void Start() = 0;
+  virtual void Stop() = 0;
+};
+
+class ICallData {
+ public:
+  virtual ~ICallData() = default;
+  virtual bool Process(bool ok) = 0;
+  virtual std::string Name() = 0;
+  virtual uint64_t Id() = 0;
+};
+
+}}}  // namespace triton::server::grpc
diff --git a/src/grpc/grpc_server.cc b/src/grpc/grpc_server.cc
new file mode 100644
index 0000000000..355c1910d8
--- /dev/null
+++ b/src/grpc/grpc_server.cc
@@ -0,0 +1,2503 @@
+// Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "grpc_server.h"
+
+#include <google/protobuf/arena.h>
+#include <grpc++/alarm.h>
+
+#include <chrono>
+#include <condition_variable>
+#include <cstdint>
+#include <fstream>
+#include <list>
+#include <map>
+#include <mutex>
+#include <queue>
+#include <sstream>
+#include <thread>
+
+#include "../classification.h"
+#include "../common.h"
+#include "grpc++/grpc++.h"
+#include "grpc++/security/server_credentials.h"
+#include "grpc++/server.h"
+#include "grpc++/server_builder.h"
+#include "grpc++/server_context.h"
+#include "grpc++/support/status.h"
+#include "triton/common/logging.h"
+#include "triton/common/table_printer.h"
+#include "triton/core/tritonserver.h"
+
+#define TRITONJSON_STATUSTYPE TRITONSERVER_Error*
+#define TRITONJSON_STATUSRETURN(M) \
+  return TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INTERNAL, (M).c_str())
+#define TRITONJSON_STATUSSUCCESS nullptr
+#include "triton/common/triton_json.h"
+
+#ifdef TRITON_ENABLE_TRACING
+#include "../tracer.h"
+#endif  // TRITON_ENABLE_TRACING
+
+#define REGISTER_GRPC_INFER_THREAD_COUNT 2
+
+namespace triton { namespace server { namespace grpc {
+
+namespace {
+
+//
+// The server has separate handling mechanisms for inference RPCs
+// and non-inference RPCs.
+//
+
+//=========================================================================
+//  The following section contains the handling mechanism for non-inference
+//  RPCs. A single thread is created to handle all these requests as they
+//  are deemed to be not performance critical.
+//=========================================================================
+
+template <typename ResponderType, typename RequestType, typename ResponseType>
+class CommonCallData : public ICallData {
+ public:
+  using StandardRegisterFunc = std::function<void(
+      ::grpc::ServerContext*, RequestType*, ResponderType*, void*)>;
+  using StandardCallbackFunc =
+      std::function<void(RequestType&, ResponseType*, ::grpc::Status*)>;
+
+  CommonCallData(
+      const std::string& name, const uint64_t id,
+      const StandardRegisterFunc OnRegister,
+      const StandardCallbackFunc OnExecute, const bool async,
+      ::grpc::ServerCompletionQueue* cq,
+      const std::pair<std::string, std::string>& restricted_kv,
+      const uint64_t& response_delay = 0)
+      : name_(name), id_(id), OnRegister_(OnRegister), OnExecute_(OnExecute),
+        async_(async), cq_(cq), responder_(&ctx_), step_(Steps::START),
+        restricted_kv_(restricted_kv), response_delay_(response_delay)
+  {
+    OnRegister_(&ctx_, &request_, &responder_, this);
+    LOG_VERBOSE(1) << "Ready for RPC '" << name_ << "', " << id_;
+  }
+
+  ~CommonCallData()
+  {
+    if (async_thread_.joinable()) {
+      async_thread_.join();
+    }
+  }
+
+  bool Process(bool ok) override;
+
+  std::string Name() override { return name_; }
+
+  uint64_t Id() override { return id_; }
+
+ private:
+  void Execute();
+  void AddToCompletionQueue();
+  void WriteResponse();
+  bool ExecutePrecondition();
+
+  const std::string name_;
+  const uint64_t id_;
+  const StandardRegisterFunc OnRegister_;
+  const StandardCallbackFunc OnExecute_;
+  const bool async_;
+  ::grpc::ServerCompletionQueue* cq_;
+
+  ::grpc::ServerContext ctx_;
+  ::grpc::Alarm alarm_;
+
+  ResponderType responder_;
+  RequestType request_;
+  ResponseType response_;
+  ::grpc::Status status_;
+
+  std::thread async_thread_;
+
+  Steps step_;
+
+  std::pair<std::string, std::string> restricted_kv_{"", ""};
+
+  const uint64_t response_delay_;
+};
+
+template <typename ResponderType, typename RequestType, typename ResponseType>
+bool
+CommonCallData<ResponderType, RequestType, ResponseType>::Process(bool rpc_ok)
+{
+  LOG_VERBOSE(1) << "Process for " << name_ << ", rpc_ok=" << rpc_ok << ", "
+                 << id_ << " step " << step_;
+
+  // If RPC failed on a new request then the server is shutting down
+  // and so we should do nothing (including not registering for a new
+  // request). If RPC failed on a non-START step then there is nothing
+  // we can do since we one execute one step.
+  const bool shutdown = (!rpc_ok && (step_ == Steps::START));
+  if (shutdown) {
+    if (async_thread_.joinable()) {
+      async_thread_.join();
+    }
+    step_ = Steps::FINISH;
+  }
+
+  if (step_ == Steps::START) {
+    // Start a new request to replace this one...
+    if (!shutdown) {
+      new CommonCallData<ResponderType, RequestType, ResponseType>(
+          name_, id_ + 1, OnRegister_, OnExecute_, async_, cq_, restricted_kv_,
+          response_delay_);
+    }
+
+    if (!async_) {
+      // For synchronous calls, execute and write response
+      // here.
+      Execute();
+      WriteResponse();
+    } else {
+      // For asynchronous calls, delegate the execution to another
+      // thread.
+      step_ = Steps::ISSUED;
+      async_thread_ = std::thread(&CommonCallData::Execute, this);
+    }
+  } else if (step_ == Steps::WRITEREADY) {
+    // Will only come here for asynchronous mode.
+    WriteResponse();
+  } else if (step_ == Steps::COMPLETE) {
+    step_ = Steps::FINISH;
+  }
+
+  return step_ != Steps::FINISH;
+}
+
+template <typename ResponderType, typename RequestType, typename ResponseType>
+void
+CommonCallData<ResponderType, RequestType, ResponseType>::Execute()
+{
+  if (ExecutePrecondition()) {
+    OnExecute_(request_, &response_, &status_);
+  } else {
+    status_ = ::grpc::Status(
+        ::grpc::StatusCode::UNAVAILABLE,
+        std::string("This protocol is restricted, expecting header '") +
+            restricted_kv_.first + "'");
+  }
+  step_ = Steps::WRITEREADY;
+
+  if (async_) {
+    // For asynchronous operation, need to add itself onto the completion
+    // queue so that the response can be written once the object is
+    // taken up next for execution.
+    AddToCompletionQueue();
+  }
+}
+
+template <typename ResponderType, typename RequestType, typename ResponseType>
+bool
+CommonCallData<ResponderType, RequestType, ResponseType>::ExecutePrecondition()
+{
+  if (!restricted_kv_.first.empty()) {
+    const auto& metadata = ctx_.client_metadata();
+    const auto it = metadata.find(restricted_kv_.first);
+    return (it != metadata.end()) && (it->second == restricted_kv_.second);
+  }
+  return true;
+}
+
+template <typename ResponderType, typename RequestType, typename ResponseType>
+void
+CommonCallData<ResponderType, RequestType, ResponseType>::AddToCompletionQueue()
+{
+  alarm_.Set(cq_, gpr_now(gpr_clock_type::GPR_CLOCK_REALTIME), this);
+}
+
+template <typename ResponderType, typename RequestType, typename ResponseType>
+void
+CommonCallData<ResponderType, RequestType, ResponseType>::WriteResponse()
+{
+  if (response_delay_ != 0) {
+    // Will delay the write of the response by the specified time.
+    // This can be used to test the flow where there are other
+    // responses available to be written.
+    LOG_VERBOSE(1) << "Delaying the write of the response by "
+                   << response_delay_ << " seconds";
+    std::this_thread::sleep_for(std::chrono::seconds(response_delay_));
+  }
+  step_ = Steps::COMPLETE;
+  responder_.Finish(response_, status_, this);
+}
+
+//
+// CommonHandler
+//
+// A common handler for all non-inference requests.
+//
+class CommonHandler : public HandlerBase {
+ public:
+  CommonHandler(
+      const std::string& name,
+      const std::shared_ptr<TRITONSERVER_Server>& tritonserver,
+      const std::shared_ptr<SharedMemoryManager>& shm_manager,
+      TraceManager* trace_manager,
+      inference::GRPCInferenceService::AsyncService* service,
+      ::grpc::health::v1::Health::AsyncService* health_service,
+      ::grpc::ServerCompletionQueue* cq,
+      const RestrictedFeatures& restricted_keys, const uint64_t response_delay);
+
+  // Descriptive name of of the handler.
+  const std::string& Name() const { return name_; }
+
+  // Start handling requests.
+  void Start() override;
+
+  // Stop handling requests.
+  void Stop() override;
+
+ private:
+  void SetUpAllRequests();
+
+  // [FIXME] turn into generated code
+  void RegisterServerLive();
+  void RegisterServerReady();
+  void RegisterHealthCheck();
+  void RegisterModelReady();
+  void RegisterServerMetadata();
+  void RegisterModelMetadata();
+  void RegisterModelConfig();
+  void RegisterModelStatistics();
+  void RegisterTrace();
+  void RegisterLogging();
+  void RegisterSystemSharedMemoryStatus();
+  void RegisterSystemSharedMemoryRegister();
+  void RegisterSystemSharedMemoryUnregister();
+  void RegisterCudaSharedMemoryStatus();
+  void RegisterCudaSharedMemoryRegister();
+  void RegisterCudaSharedMemoryUnregister();
+  void RegisterRepositoryIndex();
+  void RegisterRepositoryModelLoad();
+  void RegisterRepositoryModelUnload();
+
+  // Set count and cumulative duration for 'RegisterModelStatistics()'
+  template <typename PBTYPE>
+  TRITONSERVER_Error* SetStatisticsDuration(
+      triton::common::TritonJson::Value& statistics_json,
+      const std::string& statistics_name,
+      PBTYPE* mutable_statistics_duration_protobuf) const;
+
+  const std::string name_;
+  std::shared_ptr<TRITONSERVER_Server> tritonserver_;
+
+  std::shared_ptr<SharedMemoryManager> shm_manager_;
+  TraceManager* trace_manager_;
+
+  inference::GRPCInferenceService::AsyncService* service_;
+  ::grpc::health::v1::Health::AsyncService* health_service_;
+  ::grpc::ServerCompletionQueue* cq_;
+  std::unique_ptr<std::thread> thread_;
+  RestrictedFeatures restricted_keys_{};
+  const uint64_t response_delay_ = 0;
+};
+
+CommonHandler::CommonHandler(
+    const std::string& name,
+    const std::shared_ptr<TRITONSERVER_Server>& tritonserver,
+    const std::shared_ptr<SharedMemoryManager>& shm_manager,
+    TraceManager* trace_manager,
+    inference::GRPCInferenceService::AsyncService* service,
+    ::grpc::health::v1::Health::AsyncService* health_service,
+    ::grpc::ServerCompletionQueue* cq,
+    const RestrictedFeatures& restricted_keys,
+    const uint64_t response_delay = 0)
+    : name_(name), tritonserver_(tritonserver), shm_manager_(shm_manager),
+      trace_manager_(trace_manager), service_(service),
+      health_service_(health_service), cq_(cq),
+      restricted_keys_(restricted_keys), response_delay_(response_delay)
+{
+}
+
+void
+CommonHandler::Start()
+{
+  // Use a barrier to make sure we don't return until thread has
+  // started.
+  auto barrier = std::make_shared<Barrier>(2);
+
+  thread_.reset(new std::thread([this, barrier] {
+    SetUpAllRequests();
+    barrier->Wait();
+
+    void* tag;
+    bool ok;
+
+    while (cq_->Next(&tag, &ok)) {
+      ICallData* call_data = static_cast<ICallData*>(tag);
+      if (!call_data->Process(ok)) {
+        LOG_VERBOSE(1) << "Done for " << call_data->Name() << ", "
+                       << call_data->Id();
+        delete call_data;
+      }
+    }
+  }));
+
+  barrier->Wait();
+  LOG_VERBOSE(1) << "Thread started for " << Name();
+}
+
+void
+CommonHandler::Stop()
+{
+  if (thread_->joinable()) {
+    thread_->join();
+  }
+
+  LOG_VERBOSE(1) << "Thread exited for " << Name();
+}
+
+void
+CommonHandler::SetUpAllRequests()
+{
+  // Define all the RPCs to be handled by this handler below
+  //
+  // Within each of the Register function, the format of RPC specification is:
+  // 1. A OnRegister function: This will be called when the
+  //    server is ready to receive the requests for this RPC.
+  // 2. A OnExecute function: This will be called when the
+  //    to process the request.
+  // 3. Create a CommonCallData object with the above callback
+  //    functions
+
+  // health (GRPC standard)
+  RegisterHealthCheck();
+  // health (Triton)
+  RegisterServerLive();
+  RegisterServerReady();
+  RegisterModelReady();
+
+  // Metadata
+  RegisterServerMetadata();
+  RegisterModelMetadata();
+
+  // model config
+  RegisterModelConfig();
+
+  // shared memory
+  // system..
+  RegisterSystemSharedMemoryStatus();
+  RegisterSystemSharedMemoryRegister();
+  RegisterSystemSharedMemoryUnregister();
+  // cuda..
+  RegisterCudaSharedMemoryStatus();
+  RegisterCudaSharedMemoryRegister();
+  RegisterCudaSharedMemoryUnregister();
+
+  // model repository
+  RegisterRepositoryIndex();
+  RegisterRepositoryModelLoad();
+  RegisterRepositoryModelUnload();
+
+  // statistics
+  RegisterModelStatistics();
+
+  // trace
+  RegisterTrace();
+
+  // logging
+  RegisterLogging();
+}
+
+void
+CommonHandler::RegisterServerLive()
+{
+  auto OnRegisterServerLive =
+      [this](
+          ::grpc::ServerContext* ctx, inference::ServerLiveRequest* request,
+          ::grpc::ServerAsyncResponseWriter<inference::ServerLiveResponse>*
+              responder,
+          void* tag) {
+        this->service_->RequestServerLive(
+            ctx, request, responder, this->cq_, this->cq_, tag);
+      };
+
+  auto OnExecuteServerLive = [this](
+                                 inference::ServerLiveRequest& request,
+                                 inference::ServerLiveResponse* response,
+                                 ::grpc::Status* status) {
+    bool live = false;
+    TRITONSERVER_Error* err =
+        TRITONSERVER_ServerIsLive(tritonserver_.get(), &live);
+
+    response->set_live((err == nullptr) && live);
+
+    GrpcStatusUtil::Create(status, err);
+    TRITONSERVER_ErrorDelete(err);
+  };
+
+  const std::pair<std::string, std::string>& restricted_kv =
+      restricted_keys_.Get(RestrictedCategory::HEALTH);
+  new CommonCallData<
+      ::grpc::ServerAsyncResponseWriter<inference::ServerLiveResponse>,
+      inference::ServerLiveRequest, inference::ServerLiveResponse>(
+      "ServerLive", 0, OnRegisterServerLive, OnExecuteServerLive,
+      false /* async */, cq_, restricted_kv, response_delay_);
+}
+
+void
+CommonHandler::RegisterServerReady()
+{
+  auto OnRegisterServerReady =
+      [this](
+          ::grpc::ServerContext* ctx, inference::ServerReadyRequest* request,
+          ::grpc::ServerAsyncResponseWriter<inference::ServerReadyResponse>*
+              responder,
+          void* tag) {
+        this->service_->RequestServerReady(
+            ctx, request, responder, this->cq_, this->cq_, tag);
+      };
+
+  auto OnExecuteServerReady = [this](
+                                  inference::ServerReadyRequest& request,
+                                  inference::ServerReadyResponse* response,
+                                  ::grpc::Status* status) {
+    bool ready = false;
+    TRITONSERVER_Error* err =
+        TRITONSERVER_ServerIsReady(tritonserver_.get(), &ready);
+
+    response->set_ready((err == nullptr) && ready);
+
+    GrpcStatusUtil::Create(status, err);
+    TRITONSERVER_ErrorDelete(err);
+  };
+
+  const std::pair<std::string, std::string>& restricted_kv =
+      restricted_keys_.Get(RestrictedCategory::HEALTH);
+  new CommonCallData<
+      ::grpc::ServerAsyncResponseWriter<inference::ServerReadyResponse>,
+      inference::ServerReadyRequest, inference::ServerReadyResponse>(
+      "ServerReady", 0, OnRegisterServerReady, OnExecuteServerReady,
+      false /* async */, cq_, restricted_kv, response_delay_);
+}
+
+void
+CommonHandler::RegisterHealthCheck()
+{
+  auto OnRegisterHealthCheck =
+      [this](
+          ::grpc::ServerContext* ctx,
+          ::grpc::health::v1::HealthCheckRequest* request,
+          ::grpc::ServerAsyncResponseWriter<
+              ::grpc::health::v1::HealthCheckResponse>* responder,
+          void* tag) {
+        this->health_service_->RequestCheck(
+            ctx, request, responder, this->cq_, this->cq_, tag);
+      };
+
+  auto OnExecuteHealthCheck = [this](
+                                  ::grpc::health::v1::HealthCheckRequest&
+                                      request,
+                                  ::grpc::health::v1::HealthCheckResponse*
+                                      response,
+                                  ::grpc::Status* status) {
+    bool live = false;
+    TRITONSERVER_Error* err =
+        TRITONSERVER_ServerIsReady(tritonserver_.get(), &live);
+
+    auto serving_status =
+        ::grpc::health::v1::HealthCheckResponse_ServingStatus_UNKNOWN;
+    if (err == nullptr) {
+      serving_status =
+          live ? ::grpc::health::v1::HealthCheckResponse_ServingStatus_SERVING
+               : ::grpc::health::v1::
+                     HealthCheckResponse_ServingStatus_NOT_SERVING;
+    }
+    response->set_status(serving_status);
+
+    GrpcStatusUtil::Create(status, err);
+    TRITONSERVER_ErrorDelete(err);
+  };
+
+  const std::pair<std::string, std::string>& restricted_kv =
+      restricted_keys_.Get(RestrictedCategory::HEALTH);
+  new CommonCallData<
+      ::grpc::ServerAsyncResponseWriter<
+          ::grpc::health::v1::HealthCheckResponse>,
+      ::grpc::health::v1::HealthCheckRequest,
+      ::grpc::health::v1::HealthCheckResponse>(
+      "Check", 0, OnRegisterHealthCheck, OnExecuteHealthCheck,
+      false /* async */, cq_, restricted_kv, response_delay_);
+}
+
+void
+CommonHandler::RegisterModelReady()
+{
+  auto OnRegisterModelReady =
+      [this](
+          ::grpc::ServerContext* ctx, inference::ModelReadyRequest* request,
+          ::grpc::ServerAsyncResponseWriter<inference::ModelReadyResponse>*
+              responder,
+          void* tag) {
+        this->service_->RequestModelReady(
+            ctx, request, responder, this->cq_, this->cq_, tag);
+      };
+
+  auto OnExecuteModelReady = [this](
+                                 inference::ModelReadyRequest& request,
+                                 inference::ModelReadyResponse* response,
+                                 ::grpc::Status* status) {
+    bool is_ready = false;
+    int64_t requested_model_version;
+    auto err =
+        GetModelVersionFromString(request.version(), &requested_model_version);
+    if (err == nullptr) {
+      err = TRITONSERVER_ServerModelIsReady(
+          tritonserver_.get(), request.name().c_str(), requested_model_version,
+          &is_ready);
+    }
+
+    response->set_ready(is_ready);
+
+    GrpcStatusUtil::Create(status, err);
+    TRITONSERVER_ErrorDelete(err);
+  };
+
+  const std::pair<std::string, std::string>& restricted_kv =
+      restricted_keys_.Get(RestrictedCategory::HEALTH);
+  new CommonCallData<
+      ::grpc::ServerAsyncResponseWriter<inference::ModelReadyResponse>,
+      inference::ModelReadyRequest, inference::ModelReadyResponse>(
+      "ModelReady", 0, OnRegisterModelReady, OnExecuteModelReady,
+      false /* async */, cq_, restricted_kv, response_delay_);
+}
+
+void
+CommonHandler::RegisterServerMetadata()
+{
+  auto OnRegisterServerMetadata =
+      [this](
+          ::grpc::ServerContext* ctx, inference::ServerMetadataRequest* request,
+          ::grpc::ServerAsyncResponseWriter<inference::ServerMetadataResponse>*
+              responder,
+          void* tag) {
+        this->service_->RequestServerMetadata(
+            ctx, request, responder, this->cq_, this->cq_, tag);
+      };
+
+  auto OnExecuteServerMetadata =
+      [this](
+          inference::ServerMetadataRequest& request,
+          inference::ServerMetadataResponse* response, ::grpc::Status* status) {
+        TRITONSERVER_Message* server_metadata_message = nullptr;
+        TRITONSERVER_Error* err = TRITONSERVER_ServerMetadata(
+            tritonserver_.get(), &server_metadata_message);
+        GOTO_IF_ERR(err, earlyexit);
+
+        const char* buffer;
+        size_t byte_size;
+        err = TRITONSERVER_MessageSerializeToJson(
+            server_metadata_message, &buffer, &byte_size);
+        GOTO_IF_ERR(err, earlyexit);
+
+        {
+          triton::common::TritonJson::Value server_metadata_json;
+          err = server_metadata_json.Parse(buffer, byte_size);
+          GOTO_IF_ERR(err, earlyexit);
+
+          const char* name;
+          size_t namelen;
+          err = server_metadata_json.MemberAsString("name", &name, &namelen);
+          GOTO_IF_ERR(err, earlyexit);
+
+          const char* version;
+          size_t versionlen;
+          err = server_metadata_json.MemberAsString(
+              "version", &version, &versionlen);
+          GOTO_IF_ERR(err, earlyexit);
+
+          response->set_name(std::string(name, namelen));
+          response->set_version(std::string(version, versionlen));
+
+          if (server_metadata_json.Find("extensions")) {
+            triton::common::TritonJson::Value extensions_json;
+            err = server_metadata_json.MemberAsArray(
+                "extensions", &extensions_json);
+            GOTO_IF_ERR(err, earlyexit);
+
+            for (size_t idx = 0; idx < extensions_json.ArraySize(); ++idx) {
+              const char* ext;
+              size_t extlen;
+              err = extensions_json.IndexAsString(idx, &ext, &extlen);
+              GOTO_IF_ERR(err, earlyexit);
+              response->add_extensions(std::string(ext, extlen));
+            }
+          }
+          TRITONSERVER_MessageDelete(server_metadata_message);
+        }
+
+      earlyexit:
+        GrpcStatusUtil::Create(status, err);
+        TRITONSERVER_ErrorDelete(err);
+      };
+
+  const std::pair<std::string, std::string>& restricted_kv =
+      restricted_keys_.Get(RestrictedCategory::METADATA);
+  new CommonCallData<
+      ::grpc::ServerAsyncResponseWriter<inference::ServerMetadataResponse>,
+      inference::ServerMetadataRequest, inference::ServerMetadataResponse>(
+      "ServerMetadata", 0, OnRegisterServerMetadata, OnExecuteServerMetadata,
+      false /* async */, cq_, restricted_kv, response_delay_);
+}
+
+void
+CommonHandler::RegisterModelMetadata()
+{
+  auto OnRegisterModelMetadata =
+      [this](
+          ::grpc::ServerContext* ctx, inference::ModelMetadataRequest* request,
+          ::grpc::ServerAsyncResponseWriter<inference::ModelMetadataResponse>*
+              responder,
+          void* tag) {
+        this->service_->RequestModelMetadata(
+            ctx, request, responder, this->cq_, this->cq_, tag);
+      };
+
+  auto OnExecuteModelMetadata = [this](
+                                    inference::ModelMetadataRequest& request,
+                                    inference::ModelMetadataResponse* response,
+                                    ::grpc::Status* status) {
+    int64_t requested_model_version;
+    auto err =
+        GetModelVersionFromString(request.version(), &requested_model_version);
+    GOTO_IF_ERR(err, earlyexit);
+
+    {
+      TRITONSERVER_Message* model_metadata_message = nullptr;
+      err = TRITONSERVER_ServerModelMetadata(
+          tritonserver_.get(), request.name().c_str(), requested_model_version,
+          &model_metadata_message);
+      GOTO_IF_ERR(err, earlyexit);
+
+      const char* buffer;
+      size_t byte_size;
+      err = TRITONSERVER_MessageSerializeToJson(
+          model_metadata_message, &buffer, &byte_size);
+      GOTO_IF_ERR(err, earlyexit);
+
+      triton::common::TritonJson::Value model_metadata_json;
+      err = model_metadata_json.Parse(buffer, byte_size);
+      GOTO_IF_ERR(err, earlyexit);
+
+      const char* name;
+      size_t namelen;
+      err = model_metadata_json.MemberAsString("name", &name, &namelen);
+      GOTO_IF_ERR(err, earlyexit);
+
+      response->set_name(std::string(name, namelen));
+
+      if (model_metadata_json.Find("versions")) {
+        triton::common::TritonJson::Value versions_json;
+        err = model_metadata_json.MemberAsArray("versions", &versions_json);
+        GOTO_IF_ERR(err, earlyexit);
+
+        for (size_t idx = 0; idx < versions_json.ArraySize(); ++idx) {
+          const char* version;
+          size_t versionlen;
+          err = versions_json.IndexAsString(idx, &version, &versionlen);
+          GOTO_IF_ERR(err, earlyexit);
+          response->add_versions(std::string(version, versionlen));
+        }
+      }
+
+      const char* platform;
+      size_t platformlen;
+      err = model_metadata_json.MemberAsString(
+          "platform", &platform, &platformlen);
+      GOTO_IF_ERR(err, earlyexit);
+      response->set_platform(std::string(platform, platformlen));
+
+      if (model_metadata_json.Find("inputs")) {
+        triton::common::TritonJson::Value inputs_json;
+        err = model_metadata_json.MemberAsArray("inputs", &inputs_json);
+        GOTO_IF_ERR(err, earlyexit);
+
+        for (size_t idx = 0; idx < inputs_json.ArraySize(); ++idx) {
+          triton::common::TritonJson::Value io_json;
+          err = inputs_json.IndexAsObject(idx, &io_json);
+          GOTO_IF_ERR(err, earlyexit);
+
+          inference::ModelMetadataResponse::TensorMetadata* io =
+              response->add_inputs();
+
+          const char* name;
+          size_t namelen;
+          err = io_json.MemberAsString("name", &name, &namelen);
+          GOTO_IF_ERR(err, earlyexit);
+
+          const char* datatype;
+          size_t datatypelen;
+          err = io_json.MemberAsString("datatype", &datatype, &datatypelen);
+          GOTO_IF_ERR(err, earlyexit);
+
+          io->set_name(std::string(name, namelen));
+          io->set_datatype(std::string(datatype, datatypelen));
+
+          if (io_json.Find("shape")) {
+            triton::common::TritonJson::Value shape_json;
+            err = io_json.MemberAsArray("shape", &shape_json);
+            GOTO_IF_ERR(err, earlyexit);
+
+            for (size_t sidx = 0; sidx < shape_json.ArraySize(); ++sidx) {
+              int64_t d;
+              err = shape_json.IndexAsInt(sidx, &d);
+              GOTO_IF_ERR(err, earlyexit);
+
+              io->add_shape(d);
+            }
+          }
+        }
+      }
+
+      if (model_metadata_json.Find("outputs")) {
+        triton::common::TritonJson::Value outputs_json;
+        err = model_metadata_json.MemberAsArray("outputs", &outputs_json);
+        GOTO_IF_ERR(err, earlyexit);
+
+        for (size_t idx = 0; idx < outputs_json.ArraySize(); ++idx) {
+          triton::common::TritonJson::Value io_json;
+          err = outputs_json.IndexAsObject(idx, &io_json);
+          GOTO_IF_ERR(err, earlyexit);
+
+          inference::ModelMetadataResponse::TensorMetadata* io =
+              response->add_outputs();
+
+          const char* name;
+          size_t namelen;
+          err = io_json.MemberAsString("name", &name, &namelen);
+          GOTO_IF_ERR(err, earlyexit);
+
+          const char* datatype;
+          size_t datatypelen;
+          err = io_json.MemberAsString("datatype", &datatype, &datatypelen);
+          GOTO_IF_ERR(err, earlyexit);
+
+          io->set_name(std::string(name, namelen));
+          io->set_datatype(std::string(datatype, datatypelen));
+
+          if (io_json.Find("shape")) {
+            triton::common::TritonJson::Value shape_json;
+            err = io_json.MemberAsArray("shape", &shape_json);
+            GOTO_IF_ERR(err, earlyexit);
+
+            for (size_t sidx = 0; sidx < shape_json.ArraySize(); ++sidx) {
+              int64_t d;
+              err = shape_json.IndexAsInt(sidx, &d);
+              GOTO_IF_ERR(err, earlyexit);
+
+              io->add_shape(d);
+            }
+          }
+        }
+      }
+
+      TRITONSERVER_MessageDelete(model_metadata_message);
+    }
+
+  earlyexit:
+    GrpcStatusUtil::Create(status, err);
+    TRITONSERVER_ErrorDelete(err);
+  };
+
+  const std::pair<std::string, std::string>& restricted_kv =
+      restricted_keys_.Get(RestrictedCategory::METADATA);
+  new CommonCallData<
+      ::grpc::ServerAsyncResponseWriter<inference::ModelMetadataResponse>,
+      inference::ModelMetadataRequest, inference::ModelMetadataResponse>(
+      "ModelMetadata", 0, OnRegisterModelMetadata, OnExecuteModelMetadata,
+      false /* async */, cq_, restricted_kv, response_delay_);
+}
+
+void
+CommonHandler::RegisterModelConfig()
+{
+  auto OnRegisterModelConfig =
+      [this](
+          ::grpc::ServerContext* ctx, inference::ModelConfigRequest* request,
+          ::grpc::ServerAsyncResponseWriter<inference::ModelConfigResponse>*
+              responder,
+          void* tag) {
+        this->service_->RequestModelConfig(
+            ctx, request, responder, this->cq_, this->cq_, tag);
+      };
+
+  auto OnExecuteModelConfig = [this](
+                                  inference::ModelConfigRequest& request,
+                                  inference::ModelConfigResponse* response,
+                                  ::grpc::Status* status) {
+    int64_t requested_model_version;
+    auto err =
+        GetModelVersionFromString(request.version(), &requested_model_version);
+    if (err == nullptr) {
+      TRITONSERVER_Message* model_config_message = nullptr;
+      err = TRITONSERVER_ServerModelConfig(
+          tritonserver_.get(), request.name().c_str(), requested_model_version,
+          1 /* config_version */, &model_config_message);
+      if (err == nullptr) {
+        const char* buffer;
+        size_t byte_size;
+        err = TRITONSERVER_MessageSerializeToJson(
+            model_config_message, &buffer, &byte_size);
+        if (err == nullptr) {
+          ::google::protobuf::util::JsonStringToMessage(
+              ::google::protobuf::stringpiece_internal::StringPiece(
+                  buffer, (int)byte_size),
+              response->mutable_config());
+        }
+        TRITONSERVER_MessageDelete(model_config_message);
+      }
+    }
+
+    GrpcStatusUtil::Create(status, err);
+    TRITONSERVER_ErrorDelete(err);
+  };
+
+  const std::pair<std::string, std::string>& restricted_kv =
+      restricted_keys_.Get(RestrictedCategory::MODEL_CONFIG);
+  new CommonCallData<
+      ::grpc::ServerAsyncResponseWriter<inference::ModelConfigResponse>,
+      inference::ModelConfigRequest, inference::ModelConfigResponse>(
+      "ModelConfig", 0, OnRegisterModelConfig, OnExecuteModelConfig,
+      false /* async */, cq_, restricted_kv, response_delay_);
+}
+
+void
+CommonHandler::RegisterModelStatistics()
+{
+  auto OnRegisterModelStatistics =
+      [this](
+          ::grpc::ServerContext* ctx,
+          inference::ModelStatisticsRequest* request,
+          ::grpc::ServerAsyncResponseWriter<inference::ModelStatisticsResponse>*
+              responder,
+          void* tag) {
+        this->service_->RequestModelStatistics(
+            ctx, request, responder, this->cq_, this->cq_, tag);
+      };
+
+  auto OnExecuteModelStatistics = [this](
+                                      inference::ModelStatisticsRequest&
+                                          request,
+                                      inference::ModelStatisticsResponse*
+                                          response,
+                                      ::grpc::Status* status) {
+#ifdef TRITON_ENABLE_STATS
+    triton::common::TritonJson::Value model_stats_json;
+
+    int64_t requested_model_version;
+    auto err =
+        GetModelVersionFromString(request.version(), &requested_model_version);
+    GOTO_IF_ERR(err, earlyexit);
+
+    {
+      TRITONSERVER_Message* model_stats_message = nullptr;
+      err = TRITONSERVER_ServerModelStatistics(
+          tritonserver_.get(), request.name().c_str(), requested_model_version,
+          &model_stats_message);
+      GOTO_IF_ERR(err, earlyexit);
+
+      const char* buffer;
+      size_t byte_size;
+      err = TRITONSERVER_MessageSerializeToJson(
+          model_stats_message, &buffer, &byte_size);
+      GOTO_IF_ERR(err, earlyexit);
+
+      err = model_stats_json.Parse(buffer, byte_size);
+      GOTO_IF_ERR(err, earlyexit);
+
+      TRITONSERVER_MessageDelete(model_stats_message);
+    }
+
+    if (model_stats_json.Find("model_stats")) {
+      triton::common::TritonJson::Value stats_json;
+      err = model_stats_json.MemberAsArray("model_stats", &stats_json);
+      GOTO_IF_ERR(err, earlyexit);
+
+      for (size_t idx = 0; idx < stats_json.ArraySize(); ++idx) {
+        triton::common::TritonJson::Value model_stat;
+        err = stats_json.IndexAsObject(idx, &model_stat);
+        GOTO_IF_ERR(err, earlyexit);
+
+        auto statistics = response->add_model_stats();
+
+        const char* name;
+        size_t namelen;
+        err = model_stat.MemberAsString("name", &name, &namelen);
+        GOTO_IF_ERR(err, earlyexit);
+
+        const char* version;
+        size_t versionlen;
+        err = model_stat.MemberAsString("version", &version, &versionlen);
+        GOTO_IF_ERR(err, earlyexit);
+
+        statistics->set_name(std::string(name, namelen));
+        statistics->set_version(std::string(version, versionlen));
+
+        uint64_t ucnt;
+        err = model_stat.MemberAsUInt("last_inference", &ucnt);
+        GOTO_IF_ERR(err, earlyexit);
+        statistics->set_last_inference(ucnt);
+
+        err = model_stat.MemberAsUInt("inference_count", &ucnt);
+        GOTO_IF_ERR(err, earlyexit);
+        statistics->set_inference_count(ucnt);
+
+        err = model_stat.MemberAsUInt("execution_count", &ucnt);
+        GOTO_IF_ERR(err, earlyexit);
+        statistics->set_execution_count(ucnt);
+
+        {
+          triton::common::TritonJson::Value infer_stats_json;
+          err = model_stat.MemberAsObject("inference_stats", &infer_stats_json);
+          GOTO_IF_ERR(err, earlyexit);
+
+          err = SetStatisticsDuration(
+              infer_stats_json, "success",
+              statistics->mutable_inference_stats()->mutable_success());
+          GOTO_IF_ERR(err, earlyexit);
+          err = SetStatisticsDuration(
+              infer_stats_json, "fail",
+              statistics->mutable_inference_stats()->mutable_fail());
+          GOTO_IF_ERR(err, earlyexit);
+          err = SetStatisticsDuration(
+              infer_stats_json, "queue",
+              statistics->mutable_inference_stats()->mutable_queue());
+          GOTO_IF_ERR(err, earlyexit);
+          err = SetStatisticsDuration(
+              infer_stats_json, "compute_input",
+              statistics->mutable_inference_stats()->mutable_compute_input());
+          GOTO_IF_ERR(err, earlyexit);
+          err = SetStatisticsDuration(
+              infer_stats_json, "compute_infer",
+              statistics->mutable_inference_stats()->mutable_compute_infer());
+          GOTO_IF_ERR(err, earlyexit);
+          err = SetStatisticsDuration(
+              infer_stats_json, "compute_output",
+              statistics->mutable_inference_stats()->mutable_compute_output());
+          GOTO_IF_ERR(err, earlyexit);
+          err = SetStatisticsDuration(
+              infer_stats_json, "cache_hit",
+              statistics->mutable_inference_stats()->mutable_cache_hit());
+          GOTO_IF_ERR(err, earlyexit);
+          err = SetStatisticsDuration(
+              infer_stats_json, "cache_miss",
+              statistics->mutable_inference_stats()->mutable_cache_miss());
+          GOTO_IF_ERR(err, earlyexit);
+        }
+
+        {
+          triton::common::TritonJson::Value responses_json;
+          err = model_stat.MemberAsObject("response_stats", &responses_json);
+          GOTO_IF_ERR(err, earlyexit);
+
+          std::vector<std::string> keys;
+          err = responses_json.Members(&keys);
+          GOTO_IF_ERR(err, earlyexit);
+
+          for (const auto& key : keys) {
+            triton::common::TritonJson::Value res_json;
+            err = responses_json.MemberAsObject(key.c_str(), &res_json);
+            GOTO_IF_ERR(err, earlyexit);
+
+            inference::InferResponseStatistics res;
+
+            err = SetStatisticsDuration(
+                res_json, "compute_infer", res.mutable_compute_infer());
+            GOTO_IF_ERR(err, earlyexit);
+            err = SetStatisticsDuration(
+                res_json, "compute_output", res.mutable_compute_output());
+            GOTO_IF_ERR(err, earlyexit);
+            err = SetStatisticsDuration(
+                res_json, "success", res.mutable_success());
+            GOTO_IF_ERR(err, earlyexit);
+            err = SetStatisticsDuration(res_json, "fail", res.mutable_fail());
+            GOTO_IF_ERR(err, earlyexit);
+            err = SetStatisticsDuration(
+                res_json, "empty_response", res.mutable_empty_response());
+            GOTO_IF_ERR(err, earlyexit);
+            err =
+                SetStatisticsDuration(res_json, "cancel", res.mutable_cancel());
+            GOTO_IF_ERR(err, earlyexit);
+
+            (*statistics->mutable_response_stats())[key] = std::move(res);
+          }
+        }
+
+        {
+          triton::common::TritonJson::Value batches_json;
+          err = model_stat.MemberAsArray("batch_stats", &batches_json);
+          GOTO_IF_ERR(err, earlyexit);
+
+          for (size_t idx = 0; idx < batches_json.ArraySize(); ++idx) {
+            triton::common::TritonJson::Value batch_stat;
+            err = batches_json.IndexAsObject(idx, &batch_stat);
+            GOTO_IF_ERR(err, earlyexit);
+
+            auto batch_statistics = statistics->add_batch_stats();
+
+            uint64_t ucnt;
+            err = batch_stat.MemberAsUInt("batch_size", &ucnt);
+            GOTO_IF_ERR(err, earlyexit);
+            batch_statistics->set_batch_size(ucnt);
+
+            err = SetStatisticsDuration(
+                batch_stat, "compute_input",
+                batch_statistics->mutable_compute_input());
+            GOTO_IF_ERR(err, earlyexit);
+            err = SetStatisticsDuration(
+                batch_stat, "compute_infer",
+                batch_statistics->mutable_compute_infer());
+            GOTO_IF_ERR(err, earlyexit);
+            err = SetStatisticsDuration(
+                batch_stat, "compute_output",
+                batch_statistics->mutable_compute_output());
+            GOTO_IF_ERR(err, earlyexit);
+          }
+        }
+
+        {
+          triton::common::TritonJson::Value memory_usage_json;
+          err = model_stat.MemberAsArray("memory_usage", &memory_usage_json);
+          GOTO_IF_ERR(err, earlyexit);
+
+          for (size_t idx = 0; idx < memory_usage_json.ArraySize(); ++idx) {
+            triton::common::TritonJson::Value usage;
+            err = memory_usage_json.IndexAsObject(idx, &usage);
+            GOTO_IF_ERR(err, earlyexit);
+
+            auto memory_usage = statistics->add_memory_usage();
+            {
+              const char* type;
+              size_t type_len;
+              err = usage.MemberAsString("type", &type, &type_len);
+              GOTO_IF_ERR(err, earlyexit);
+              memory_usage->set_type(std::string(type, type_len));
+            }
+            {
+              int64_t id;
+              err = usage.MemberAsInt("id", &id);
+              GOTO_IF_ERR(err, earlyexit);
+              memory_usage->set_id(id);
+            }
+            {
+              uint64_t byte_size;
+              err = usage.MemberAsUInt("byte_size", &byte_size);
+              GOTO_IF_ERR(err, earlyexit);
+              memory_usage->set_byte_size(byte_size);
+            }
+          }
+        }
+      }
+    }
+
+  earlyexit:
+    GrpcStatusUtil::Create(status, err);
+    TRITONSERVER_ErrorDelete(err);
+#else
+    auto err = TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_UNAVAILABLE,
+        "the server does not support model statistics");
+    GrpcStatusUtil::Create(status, err);
+    TRITONSERVER_ErrorDelete(err);
+#endif
+  };
+
+  const std::pair<std::string, std::string>& restricted_kv =
+      restricted_keys_.Get(RestrictedCategory::STATISTICS);
+  new CommonCallData<
+      ::grpc::ServerAsyncResponseWriter<inference::ModelStatisticsResponse>,
+      inference::ModelStatisticsRequest, inference::ModelStatisticsResponse>(
+      "ModelStatistics", 0, OnRegisterModelStatistics, OnExecuteModelStatistics,
+      false /* async */, cq_, restricted_kv, response_delay_);
+}
+
+template <typename PBTYPE>
+TRITONSERVER_Error*
+CommonHandler::SetStatisticsDuration(
+    triton::common::TritonJson::Value& statistics_json,
+    const std::string& statistics_name,
+    PBTYPE* mutable_statistics_duration_protobuf) const
+{
+  triton::common::TritonJson::Value statistics_duration_json;
+  RETURN_IF_ERR(statistics_json.MemberAsObject(
+      statistics_name.c_str(), &statistics_duration_json));
+
+  uint64_t value;
+  RETURN_IF_ERR(statistics_duration_json.MemberAsUInt("count", &value));
+  mutable_statistics_duration_protobuf->set_count(value);
+  RETURN_IF_ERR(statistics_duration_json.MemberAsUInt("ns", &value));
+  mutable_statistics_duration_protobuf->set_ns(value);
+
+  return nullptr;
+}
+
+void
+CommonHandler::RegisterTrace()
+{
+  auto OnRegisterTrace =
+      [this](
+          ::grpc::ServerContext* ctx, inference::TraceSettingRequest* request,
+          ::grpc::ServerAsyncResponseWriter<inference::TraceSettingResponse>*
+              responder,
+          void* tag) {
+        this->service_->RequestTraceSetting(
+            ctx, request, responder, this->cq_, this->cq_, tag);
+      };
+
+  auto OnExecuteTrace = [this](
+                            inference::TraceSettingRequest& request,
+                            inference::TraceSettingResponse* response,
+                            ::grpc::Status* status) {
+#ifdef TRITON_ENABLE_TRACING
+    TRITONSERVER_Error* err = nullptr;
+    TRITONSERVER_InferenceTraceLevel level = TRITONSERVER_TRACE_LEVEL_DISABLED;
+    uint32_t rate;
+    int32_t count;
+    uint32_t log_frequency;
+    std::string filepath;
+    InferenceTraceMode trace_mode;
+    TraceConfigMap config_map;
+
+    if (!request.model_name().empty()) {
+      bool ready = false;
+      GOTO_IF_ERR(
+          TRITONSERVER_ServerModelIsReady(
+              tritonserver_.get(), request.model_name().c_str(),
+              -1 /* model version */, &ready),
+          earlyexit);
+      if (!ready) {
+        err = TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG,
+            (std::string("Request for unknown model : ") + request.model_name())
+                .c_str());
+        GOTO_IF_ERR(err, earlyexit);
+      }
+    }
+
+    // Update trace setting
+    if (!request.settings().empty()) {
+      TraceManager::NewSetting new_setting;
+      {
+        static std::string setting_name = "trace_file";
+        auto it = request.settings().find(setting_name);
+        if (it != request.settings().end()) {
+          if (it->second.value().size() == 0) {
+            new_setting.clear_filepath_ = true;
+          } else if (it->second.value().size() == 1) {
+            filepath = it->second.value()[0];
+            new_setting.filepath_ = &filepath;
+          } else {
+            err = TRITONSERVER_ErrorNew(
+                TRITONSERVER_ERROR_INVALID_ARG,
+                (std::string("expect only 1 value for '") + setting_name + "'")
+                    .c_str());
+            GOTO_IF_ERR(err, earlyexit);
+          }
+        }
+      }
+      {
+        static std::string setting_name = "trace_level";
+        auto it = request.settings().find(setting_name);
+        if (it != request.settings().end()) {
+          if (it->second.value().size() == 0) {
+            new_setting.clear_level_ = true;
+          } else {
+            for (const auto& level_str : it->second.value()) {
+              if (level_str == "OFF") {
+                if (it->second.value().size() == 1) {
+                  level = TRITONSERVER_TRACE_LEVEL_DISABLED;
+                  new_setting.level_ = &level;
+                } else {
+                  err = TRITONSERVER_ErrorNew(
+                      TRITONSERVER_ERROR_INVALID_ARG,
+                      "Expect only one trace level 'OFF' is specified");
+                  GOTO_IF_ERR(err, earlyexit);
+                }
+              } else if (level_str == "TIMESTAMPS") {
+                level = static_cast<TRITONSERVER_InferenceTraceLevel>(
+                    level | TRITONSERVER_TRACE_LEVEL_TIMESTAMPS);
+                new_setting.level_ = &level;
+              } else if (level_str == "TENSORS") {
+                level = static_cast<TRITONSERVER_InferenceTraceLevel>(
+                    level | TRITONSERVER_TRACE_LEVEL_TENSORS);
+                new_setting.level_ = &level;
+              }
+            }
+          }
+        }
+      }
+      {
+        static std::string setting_name = "trace_rate";
+        auto it = request.settings().find(setting_name);
+        if (it != request.settings().end()) {
+          if (it->second.value().size() == 0) {
+            new_setting.clear_rate_ = true;
+          } else if (it->second.value().size() == 1) {
+            try {
+              rate = std::stoi(it->second.value()[0]);
+              new_setting.rate_ = &rate;
+            }
+            catch (const std::invalid_argument& ia) {
+              err = TRITONSERVER_ErrorNew(
+                  TRITONSERVER_ERROR_INVALID_ARG,
+                  (std::string("Unable to parse '") + setting_name +
+                   "', got: " + it->second.value()[0])
+                      .c_str());
+              GOTO_IF_ERR(err, earlyexit);
+            }
+            catch (const std::out_of_range& oor) {
+              err = TRITONSERVER_ErrorNew(
+                  TRITONSERVER_ERROR_INVALID_ARG,
+                  (std::string("Unable to parse '") + setting_name +
+                   "', value is out of range [ " +
+                   std::to_string(std::numeric_limits<std::uint32_t>::min()) +
+                   ", " +
+                   std::to_string(std::numeric_limits<std::uint32_t>::max()) +
+                   " ], got: " + it->second.value()[0])
+                      .c_str());
+              GOTO_IF_ERR(err, earlyexit);
+            }
+          } else {
+            err = TRITONSERVER_ErrorNew(
+                TRITONSERVER_ERROR_INVALID_ARG,
+                (std::string("expect only 1 value for '") + setting_name + "'")
+                    .c_str());
+            GOTO_IF_ERR(err, earlyexit);
+          }
+        }
+      }
+      {
+        static std::string setting_name = "trace_count";
+        auto it = request.settings().find(setting_name);
+        if (it != request.settings().end()) {
+          if (it->second.value().size() == 0) {
+            new_setting.clear_count_ = true;
+          } else if (it->second.value().size() == 1) {
+            try {
+              count = std::stoi(it->second.value()[0]);
+              if (count < TraceManager::MIN_TRACE_COUNT_VALUE) {
+                err = TRITONSERVER_ErrorNew(
+                    TRITONSERVER_ERROR_INVALID_ARG,
+                    (std::string("Unable to parse '") + setting_name +
+                     "'. Expecting value >= " +
+                     std::to_string(TraceManager::MIN_TRACE_COUNT_VALUE) +
+                     ", got: " + it->second.value()[0])
+                        .c_str());
+                GOTO_IF_ERR(err, earlyexit);
+              }
+              new_setting.count_ = &count;
+            }
+            catch (const std::invalid_argument& ia) {
+              err = TRITONSERVER_ErrorNew(
+                  TRITONSERVER_ERROR_INVALID_ARG,
+                  (std::string("Unable to parse '") + setting_name +
+                   "', got: " + it->second.value()[0])
+                      .c_str());
+              GOTO_IF_ERR(err, earlyexit);
+            }
+            catch (const std::out_of_range& oor) {
+              err = TRITONSERVER_ErrorNew(
+                  TRITONSERVER_ERROR_INVALID_ARG,
+                  (std::string("Unable to parse '") + setting_name +
+                   "', value is out of range [ " +
+                   std::to_string(TraceManager::MIN_TRACE_COUNT_VALUE) + ", " +
+                   std::to_string(std::numeric_limits<std::int32_t>::max()) +
+                   " ], got: " + it->second.value()[0])
+                      .c_str());
+              GOTO_IF_ERR(err, earlyexit);
+            }
+          } else {
+            err = TRITONSERVER_ErrorNew(
+                TRITONSERVER_ERROR_INVALID_ARG,
+                (std::string("expect only 1 value for '") + setting_name + "'")
+                    .c_str());
+            GOTO_IF_ERR(err, earlyexit);
+          }
+        }
+      }
+      {
+        static std::string setting_name = "log_frequency";
+        auto it = request.settings().find(setting_name);
+        if (it != request.settings().end()) {
+          if (it->second.value().size() == 0) {
+            new_setting.clear_log_frequency_ = true;
+          } else if (it->second.value().size() == 1) {
+            try {
+              log_frequency = std::stoi(it->second.value()[0]);
+              new_setting.log_frequency_ = &log_frequency;
+            }
+            catch (const std::invalid_argument& ia) {
+              err = TRITONSERVER_ErrorNew(
+                  TRITONSERVER_ERROR_INVALID_ARG,
+                  (std::string("Unable to parse '") + setting_name +
+                   "', got: " + it->second.value()[0])
+                      .c_str());
+              GOTO_IF_ERR(err, earlyexit);
+            }
+            catch (const std::out_of_range& oor) {
+              err = TRITONSERVER_ErrorNew(
+                  TRITONSERVER_ERROR_INVALID_ARG,
+                  (std::string("Unable to parse '") + setting_name +
+                   "', value is out of range [ " +
+                   std::to_string(std::numeric_limits<std::uint32_t>::min()) +
+                   ", " +
+                   std::to_string(std::numeric_limits<std::uint32_t>::max()) +
+                   " ], got: " + it->second.value()[0])
+                      .c_str());
+              GOTO_IF_ERR(err, earlyexit);
+            }
+          } else {
+            err = TRITONSERVER_ErrorNew(
+                TRITONSERVER_ERROR_INVALID_ARG,
+                (std::string("expect only 1 value for '") + setting_name + "'")
+                    .c_str());
+            GOTO_IF_ERR(err, earlyexit);
+          }
+        }
+      }
+
+      err =
+          trace_manager_->UpdateTraceSetting(request.model_name(), new_setting);
+      GOTO_IF_ERR(err, earlyexit);
+    }
+
+    // Get current trace setting, this is needed even if the setting
+    // has been updated above as some values may not be provided in the request.
+    trace_manager_->GetTraceSetting(
+        request.model_name(), &level, &rate, &count, &log_frequency, &filepath,
+        &trace_mode, &config_map);
+    // level
+    {
+      inference::TraceSettingResponse::SettingValue level_setting;
+      if (level == TRITONSERVER_TRACE_LEVEL_DISABLED) {
+        level_setting.add_value("OFF");
+      } else {
+        if (level & TRITONSERVER_TRACE_LEVEL_TIMESTAMPS) {
+          level_setting.add_value("TIMESTAMPS");
+        }
+        if (level & TRITONSERVER_TRACE_LEVEL_TENSORS) {
+          level_setting.add_value("TENSORS");
+        }
+      }
+      (*response->mutable_settings())["trace_level"] = level_setting;
+    }
+    (*response->mutable_settings())["trace_rate"].add_value(
+        std::to_string(rate));
+    (*response->mutable_settings())["trace_count"].add_value(
+        std::to_string(count));
+    if (trace_mode == TRACE_MODE_TRITON) {
+      (*response->mutable_settings())["log_frequency"].add_value(
+          std::to_string(log_frequency));
+      (*response->mutable_settings())["trace_file"].add_value(filepath);
+    }
+    (*response->mutable_settings())["trace_mode"].add_value(
+        trace_manager_->InferenceTraceModeString(trace_mode));
+    {
+      auto mode_key = std::to_string(trace_mode);
+      auto trace_options_it = config_map.find(mode_key);
+      if (trace_options_it != config_map.end()) {
+        for (const auto& [key, value] : trace_options_it->second) {
+          if ((key == "file") || (key == "log-frequency")) {
+            continue;
+          }
+          std::string valueAsString;
+          if (std::holds_alternative<std::string>(value)) {
+            valueAsString = std::get<std::string>(value);
+          } else if (std::holds_alternative<int>(value)) {
+            valueAsString = std::to_string(std::get<int>(value));
+          } else if (std::holds_alternative<uint32_t>(value)) {
+            valueAsString = std::to_string(std::get<uint32_t>(value));
+          }
+          (*response->mutable_settings())[key].add_value(valueAsString);
+        }
+      }
+    }
+  earlyexit:
+    GrpcStatusUtil::Create(status, err);
+    TRITONSERVER_ErrorDelete(err);
+#else
+    auto err = TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_UNAVAILABLE, "the server does not support trace");
+    GrpcStatusUtil::Create(status, err);
+    TRITONSERVER_ErrorDelete(err);
+#endif
+  };
+
+  const std::pair<std::string, std::string>& restricted_kv =
+      restricted_keys_.Get(RestrictedCategory::TRACE);
+  new CommonCallData<
+      ::grpc::ServerAsyncResponseWriter<inference::TraceSettingResponse>,
+      inference::TraceSettingRequest, inference::TraceSettingResponse>(
+      "Trace", 0, OnRegisterTrace, OnExecuteTrace, false /* async */, cq_,
+      restricted_kv, response_delay_);
+}
+
+void
+CommonHandler::RegisterLogging()
+{
+  auto OnRegisterLogging =
+      [this](
+          ::grpc::ServerContext* ctx, inference::LogSettingsRequest* request,
+          ::grpc::ServerAsyncResponseWriter<inference::LogSettingsResponse>*
+              responder,
+          void* tag) {
+        this->service_->RequestLogSettings(
+            ctx, request, responder, this->cq_, this->cq_, tag);
+      };
+
+  auto OnExecuteLogging = [this](
+                              inference::LogSettingsRequest& request,
+                              inference::LogSettingsResponse* response,
+                              ::grpc::Status* status) {
+
+#ifdef TRITON_ENABLE_LOGGING
+    TRITONSERVER_Error* err = nullptr;
+    // Update log settings
+    // Server and Core repos do not have the same Logger object
+    // Each update must be applied to both server and core repo versions
+    if (!request.settings().empty()) {
+      {
+        static std::string setting_name = "log_file";
+        auto it = request.settings().find(setting_name);
+        if (it != request.settings().end()) {
+          err = TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_UNSUPPORTED,
+              "log file location can not be updated through network protocol");
+          GOTO_IF_ERR(err, earlyexit);
+        }
+      }
+      {
+        static std::string setting_name = "log_info";
+        auto it = request.settings().find(setting_name);
+        if (it != request.settings().end()) {
+          const auto& log_param = it->second;
+          if (log_param.parameter_choice_case() !=
+              inference::LogSettingsRequest_SettingValue::ParameterChoiceCase::
+                  kBoolParam) {
+            err = TRITONSERVER_ErrorNew(
+                TRITONSERVER_ERROR_INVALID_ARG,
+                (std::string("expect boolean for '") + setting_name + "'")
+                    .c_str());
+            GOTO_IF_ERR(err, earlyexit);
+          } else {
+            bool log_info_status = it->second.bool_param();
+            LOG_ENABLE_INFO(log_info_status);
+            TRITONSERVER_ServerOptionsSetLogInfo(nullptr, log_info_status);
+          }
+        }
+      }
+      {
+        static std::string setting_name = "log_warning";
+        auto it = request.settings().find(setting_name);
+        if (it != request.settings().end()) {
+          const auto& log_param = it->second;
+          if (log_param.parameter_choice_case() !=
+              inference::LogSettingsRequest_SettingValue::ParameterChoiceCase::
+                  kBoolParam) {
+            err = TRITONSERVER_ErrorNew(
+                TRITONSERVER_ERROR_INVALID_ARG,
+                (std::string("expect boolean for '") + setting_name + "'")
+                    .c_str());
+            GOTO_IF_ERR(err, earlyexit);
+          } else {
+            bool log_warn_status = it->second.bool_param();
+            LOG_ENABLE_WARNING(log_warn_status);
+            TRITONSERVER_ServerOptionsSetLogWarn(nullptr, log_warn_status);
+          }
+        }
+      }
+      {
+        static std::string setting_name = "log_error";
+        auto it = request.settings().find(setting_name);
+        if (it != request.settings().end()) {
+          const auto& log_param = it->second;
+          if (log_param.parameter_choice_case() !=
+              inference::LogSettingsRequest_SettingValue::ParameterChoiceCase::
+                  kBoolParam) {
+            err = TRITONSERVER_ErrorNew(
+                TRITONSERVER_ERROR_INVALID_ARG,
+                (std::string("expect boolean for '") + setting_name + "'")
+                    .c_str());
+            GOTO_IF_ERR(err, earlyexit);
+          } else {
+            bool log_error_status = it->second.bool_param();
+            LOG_ENABLE_ERROR(log_error_status);
+            TRITONSERVER_ServerOptionsSetLogError(nullptr, log_error_status);
+          }
+        }
+      }
+      {
+        static std::string setting_name = "log_verbose_level";
+        auto it = request.settings().find(setting_name);
+        if (it != request.settings().end()) {
+          const auto& log_param = it->second;
+          if (log_param.parameter_choice_case() !=
+              inference::LogSettingsRequest_SettingValue::ParameterChoiceCase::
+                  kUint32Param) {
+            err = TRITONSERVER_ErrorNew(
+                TRITONSERVER_ERROR_INVALID_ARG,
+                (std::string("expect int32 for '") + setting_name + "'")
+                    .c_str());
+            GOTO_IF_ERR(err, earlyexit);
+          } else {
+            uint32_t verbose_level = it->second.uint32_param();
+            LOG_SET_VERBOSE(static_cast<int32_t>(verbose_level));
+            TRITONSERVER_ServerOptionsSetLogVerbose(nullptr, verbose_level);
+          }
+        }
+      }
+      {
+        static std::string setting_name = "log_format";
+        auto it = request.settings().find(setting_name);
+        if (it != request.settings().end()) {
+          const auto& log_param = it->second;
+          if (log_param.parameter_choice_case() !=
+              inference::LogSettingsRequest_SettingValue::ParameterChoiceCase::
+                  kStringParam) {
+            err = TRITONSERVER_ErrorNew(
+                TRITONSERVER_ERROR_INVALID_ARG,
+                (std::string("expect string for '") + setting_name + "'")
+                    .c_str());
+            GOTO_IF_ERR(err, earlyexit);
+          } else {
+            const std::string& log_format_parse = it->second.string_param();
+            triton::common::Logger::Format log_format_final =
+                triton::common::Logger::Format::kDEFAULT;
+            if (log_format_parse == "ISO8601") {
+              log_format_final = triton::common::Logger::Format::kISO8601;
+            } else if (log_format_parse != "default") {
+              err = TRITONSERVER_ErrorNew(
+                  TRITONSERVER_ERROR_INVALID_ARG,
+                  ("invalid argument for log_format, got: " + log_format_parse)
+                      .c_str());
+              GOTO_IF_ERR(err, earlyexit);
+            }
+            LOG_SET_FORMAT(log_format_final);
+            switch (log_format_final) {
+              case triton::common::Logger::Format::kDEFAULT:
+                TRITONSERVER_ServerOptionsSetLogFormat(
+                    nullptr, TRITONSERVER_LOG_DEFAULT);
+                break;
+              case triton::common::Logger::Format::kISO8601:
+                TRITONSERVER_ServerOptionsSetLogFormat(
+                    nullptr, TRITONSERVER_LOG_ISO8601);
+                break;
+            }
+          }
+        }
+      }
+      GOTO_IF_ERR(err, earlyexit);
+    }
+    (*response->mutable_settings())["log_file"].set_string_param(LOG_FILE);
+    (*response->mutable_settings())["log_info"].set_bool_param(LOG_INFO_IS_ON);
+    (*response->mutable_settings())["log_warning"].set_bool_param(
+        LOG_WARNING_IS_ON);
+    (*response->mutable_settings())["log_error"].set_bool_param(
+        LOG_ERROR_IS_ON);
+    (*response->mutable_settings())["log_verbose_level"].set_uint32_param(
+        LOG_VERBOSE_LEVEL);
+    (*response->mutable_settings())["log_format"].set_string_param(
+        LOG_FORMAT_STRING);
+  earlyexit:
+    GrpcStatusUtil::Create(status, err);
+    TRITONSERVER_ErrorDelete(err);
+#else
+    auto err = TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_UNAVAILABLE,
+        "the server does not support dynamic logging");
+    GrpcStatusUtil::Create(status, err);
+    TRITONSERVER_ErrorDelete(err);
+#endif
+  };
+
+  const std::pair<std::string, std::string>& restricted_kv =
+      restricted_keys_.Get(RestrictedCategory::LOGGING);
+  new CommonCallData<
+      ::grpc::ServerAsyncResponseWriter<inference::LogSettingsResponse>,
+      inference::LogSettingsRequest, inference::LogSettingsResponse>(
+      "Logging", 0, OnRegisterLogging, OnExecuteLogging, false /* async */, cq_,
+      restricted_kv, response_delay_);
+}
+
+void
+CommonHandler::RegisterSystemSharedMemoryStatus()
+{
+  auto OnRegisterSystemSharedMemoryStatus =
+      [this](
+          ::grpc::ServerContext* ctx,
+          inference::SystemSharedMemoryStatusRequest* request,
+          ::grpc::ServerAsyncResponseWriter<
+              inference::SystemSharedMemoryStatusResponse>* responder,
+          void* tag) {
+        this->service_->RequestSystemSharedMemoryStatus(
+            ctx, request, responder, this->cq_, this->cq_, tag);
+      };
+
+  auto OnExecuteSystemSharedMemoryStatus =
+      [this](
+          inference::SystemSharedMemoryStatusRequest& request,
+          inference::SystemSharedMemoryStatusResponse* response,
+          ::grpc::Status* status) {
+        triton::common::TritonJson::Value shm_status_json(
+            triton::common::TritonJson::ValueType::ARRAY);
+        TRITONSERVER_Error* err = shm_manager_->GetStatus(
+            request.name(), TRITONSERVER_MEMORY_CPU, &shm_status_json);
+        GOTO_IF_ERR(err, earlyexit);
+
+        for (size_t idx = 0; idx < shm_status_json.ArraySize(); ++idx) {
+          triton::common::TritonJson::Value shm_region_json;
+          err = shm_status_json.IndexAsObject(idx, &shm_region_json);
+          GOTO_IF_ERR(err, earlyexit);
+
+          const char* name;
+          size_t namelen;
+          err = shm_region_json.MemberAsString("name", &name, &namelen);
+          GOTO_IF_ERR(err, earlyexit);
+
+          const char* key;
+          size_t keylen;
+          err = shm_region_json.MemberAsString("key", &key, &keylen);
+          GOTO_IF_ERR(err, earlyexit);
+
+          uint64_t offset;
+          err = shm_region_json.MemberAsUInt("offset", &offset);
+          GOTO_IF_ERR(err, earlyexit);
+
+          uint64_t byte_size;
+          err = shm_region_json.MemberAsUInt("byte_size", &byte_size);
+          GOTO_IF_ERR(err, earlyexit);
+
+          inference::SystemSharedMemoryStatusResponse::RegionStatus
+              region_status;
+          region_status.set_name(std::string(name, namelen));
+          region_status.set_key(std::string(key, keylen));
+          region_status.set_offset(offset);
+          region_status.set_byte_size(byte_size);
+
+          (*response->mutable_regions())[name] = region_status;
+        }
+
+      earlyexit:
+        GrpcStatusUtil::Create(status, err);
+        TRITONSERVER_ErrorDelete(err);
+      };
+
+  const std::pair<std::string, std::string>& restricted_kv =
+      restricted_keys_.Get(RestrictedCategory::SHARED_MEMORY);
+  new CommonCallData<
+      ::grpc::ServerAsyncResponseWriter<
+          inference::SystemSharedMemoryStatusResponse>,
+      inference::SystemSharedMemoryStatusRequest,
+      inference::SystemSharedMemoryStatusResponse>(
+      "SystemSharedMemoryStatus", 0, OnRegisterSystemSharedMemoryStatus,
+      OnExecuteSystemSharedMemoryStatus, false /* async */, cq_, restricted_kv,
+      response_delay_);
+}
+
+void
+CommonHandler::RegisterSystemSharedMemoryRegister()
+{
+  auto OnRegisterSystemSharedMemoryRegister =
+      [this](
+          ::grpc::ServerContext* ctx,
+          inference::SystemSharedMemoryRegisterRequest* request,
+          ::grpc::ServerAsyncResponseWriter<
+              inference::SystemSharedMemoryRegisterResponse>* responder,
+          void* tag) {
+        this->service_->RequestSystemSharedMemoryRegister(
+            ctx, request, responder, this->cq_, this->cq_, tag);
+      };
+
+  auto OnExecuteSystemSharedMemoryRegister =
+      [this](
+          inference::SystemSharedMemoryRegisterRequest& request,
+          inference::SystemSharedMemoryRegisterResponse* response,
+          ::grpc::Status* status) {
+        TRITONSERVER_Error* err = shm_manager_->RegisterSystemSharedMemory(
+            request.name(), request.key(), request.offset(),
+            request.byte_size());
+
+        GrpcStatusUtil::Create(status, err);
+        TRITONSERVER_ErrorDelete(err);
+      };
+
+  const std::pair<std::string, std::string>& restricted_kv =
+      restricted_keys_.Get(RestrictedCategory::SHARED_MEMORY);
+  new CommonCallData<
+      ::grpc::ServerAsyncResponseWriter<
+          inference::SystemSharedMemoryRegisterResponse>,
+      inference::SystemSharedMemoryRegisterRequest,
+      inference::SystemSharedMemoryRegisterResponse>(
+      "SystemSharedMemoryRegister", 0, OnRegisterSystemSharedMemoryRegister,
+      OnExecuteSystemSharedMemoryRegister, false /* async */, cq_,
+      restricted_kv, response_delay_);
+}
+
+void
+CommonHandler::RegisterSystemSharedMemoryUnregister()
+{
+  auto OnRegisterSystemSharedMemoryUnregister =
+      [this](
+          ::grpc::ServerContext* ctx,
+          inference::SystemSharedMemoryUnregisterRequest* request,
+          ::grpc::ServerAsyncResponseWriter<
+              inference::SystemSharedMemoryUnregisterResponse>* responder,
+          void* tag) {
+        this->service_->RequestSystemSharedMemoryUnregister(
+            ctx, request, responder, this->cq_, this->cq_, tag);
+      };
+
+  auto OnExecuteSystemSharedMemoryUnregister =
+      [this](
+          inference::SystemSharedMemoryUnregisterRequest& request,
+          inference::SystemSharedMemoryUnregisterResponse* response,
+          ::grpc::Status* status) {
+        TRITONSERVER_Error* err = nullptr;
+        if (request.name().empty()) {
+          err = shm_manager_->UnregisterAll(TRITONSERVER_MEMORY_CPU);
+        } else {
+          err =
+              shm_manager_->Unregister(request.name(), TRITONSERVER_MEMORY_CPU);
+        }
+
+        GrpcStatusUtil::Create(status, err);
+        TRITONSERVER_ErrorDelete(err);
+      };
+
+  const std::pair<std::string, std::string>& restricted_kv =
+      restricted_keys_.Get(RestrictedCategory::SHARED_MEMORY);
+  new CommonCallData<
+      ::grpc::ServerAsyncResponseWriter<
+          inference::SystemSharedMemoryUnregisterResponse>,
+      inference::SystemSharedMemoryUnregisterRequest,
+      inference::SystemSharedMemoryUnregisterResponse>(
+      "SystemSharedMemoryUnregister", 0, OnRegisterSystemSharedMemoryUnregister,
+      OnExecuteSystemSharedMemoryUnregister, false /* async */, cq_,
+      restricted_kv, response_delay_);
+}
+
+void
+CommonHandler::RegisterCudaSharedMemoryStatus()
+{
+  auto OnRegisterCudaSharedMemoryStatus =
+      [this](
+          ::grpc::ServerContext* ctx,
+          inference::CudaSharedMemoryStatusRequest* request,
+          ::grpc::ServerAsyncResponseWriter<
+              inference::CudaSharedMemoryStatusResponse>* responder,
+          void* tag) {
+        this->service_->RequestCudaSharedMemoryStatus(
+            ctx, request, responder, this->cq_, this->cq_, tag);
+      };
+  auto OnExecuteCudaSharedMemoryStatus =
+      [this](
+          inference::CudaSharedMemoryStatusRequest& request,
+          inference::CudaSharedMemoryStatusResponse* response,
+          ::grpc::Status* status) {
+        triton::common::TritonJson::Value shm_status_json(
+            triton::common::TritonJson::ValueType::ARRAY);
+        TRITONSERVER_Error* err = shm_manager_->GetStatus(
+            request.name(), TRITONSERVER_MEMORY_GPU, &shm_status_json);
+        GOTO_IF_ERR(err, earlyexit);
+
+        for (size_t idx = 0; idx < shm_status_json.ArraySize(); ++idx) {
+          triton::common::TritonJson::Value shm_region_json;
+          err = shm_status_json.IndexAsObject(idx, &shm_region_json);
+          GOTO_IF_ERR(err, earlyexit);
+
+          const char* name;
+          size_t namelen;
+          err = shm_region_json.MemberAsString("name", &name, &namelen);
+          GOTO_IF_ERR(err, earlyexit);
+
+          uint64_t device_id;
+          err = shm_region_json.MemberAsUInt("device_id", &device_id);
+          GOTO_IF_ERR(err, earlyexit);
+
+          uint64_t byte_size;
+          err = shm_region_json.MemberAsUInt("byte_size", &byte_size);
+          GOTO_IF_ERR(err, earlyexit);
+
+
+          inference::CudaSharedMemoryStatusResponse::RegionStatus region_status;
+          region_status.set_name(std::string(name, namelen));
+          region_status.set_device_id(device_id);
+          region_status.set_byte_size(byte_size);
+
+          (*response->mutable_regions())[name] = region_status;
+        }
+      earlyexit:
+        GrpcStatusUtil::Create(status, err);
+        TRITONSERVER_ErrorDelete(err);
+      };
+
+  const std::pair<std::string, std::string>& restricted_kv =
+      restricted_keys_.Get(RestrictedCategory::SHARED_MEMORY);
+  new CommonCallData<
+      ::grpc::ServerAsyncResponseWriter<
+          inference::CudaSharedMemoryStatusResponse>,
+      inference::CudaSharedMemoryStatusRequest,
+      inference::CudaSharedMemoryStatusResponse>(
+      "CudaSharedMemoryStatus", 0, OnRegisterCudaSharedMemoryStatus,
+      OnExecuteCudaSharedMemoryStatus, false /* async */, cq_, restricted_kv,
+      response_delay_);
+}
+
+void
+CommonHandler::RegisterCudaSharedMemoryRegister()
+{
+  auto OnRegisterCudaSharedMemoryRegister =
+      [this](
+          ::grpc::ServerContext* ctx,
+          inference::CudaSharedMemoryRegisterRequest* request,
+          ::grpc::ServerAsyncResponseWriter<
+              inference::CudaSharedMemoryRegisterResponse>* responder,
+          void* tag) {
+        this->service_->RequestCudaSharedMemoryRegister(
+            ctx, request, responder, this->cq_, this->cq_, tag);
+      };
+
+  auto OnExecuteCudaSharedMemoryRegister =
+      [this](
+          inference::CudaSharedMemoryRegisterRequest& request,
+          inference::CudaSharedMemoryRegisterResponse* response,
+          ::grpc::Status* status) {
+        TRITONSERVER_Error* err = nullptr;
+#ifdef TRITON_ENABLE_GPU
+        err = shm_manager_->RegisterCUDASharedMemory(
+            request.name(),
+            reinterpret_cast<const cudaIpcMemHandle_t*>(
+                request.raw_handle().c_str()),
+            request.byte_size(), request.device_id());
+#else
+        err = TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG,
+            std::string(
+                "failed to register CUDA shared memory region: '" +
+                request.name() + "', GPUs not supported")
+                .c_str());
+#endif  // TRITON_ENABLE_GPU
+
+        GrpcStatusUtil::Create(status, err);
+        TRITONSERVER_ErrorDelete(err);
+      };
+
+  const std::pair<std::string, std::string>& restricted_kv =
+      restricted_keys_.Get(RestrictedCategory::SHARED_MEMORY);
+  new CommonCallData<
+      ::grpc::ServerAsyncResponseWriter<
+          inference::CudaSharedMemoryRegisterResponse>,
+      inference::CudaSharedMemoryRegisterRequest,
+      inference::CudaSharedMemoryRegisterResponse>(
+      "CudaSharedMemoryRegister", 0, OnRegisterCudaSharedMemoryRegister,
+      OnExecuteCudaSharedMemoryRegister, false /* async */, cq_, restricted_kv,
+      response_delay_);
+}
+
+void
+CommonHandler::RegisterCudaSharedMemoryUnregister()
+{
+  auto OnRegisterCudaSharedMemoryUnregister =
+      [this](
+          ::grpc::ServerContext* ctx,
+          inference::CudaSharedMemoryUnregisterRequest* request,
+          ::grpc::ServerAsyncResponseWriter<
+              inference::CudaSharedMemoryUnregisterResponse>* responder,
+          void* tag) {
+        this->service_->RequestCudaSharedMemoryUnregister(
+            ctx, request, responder, this->cq_, this->cq_, tag);
+      };
+
+  auto OnExecuteCudaSharedMemoryUnregister =
+      [this](
+          inference::CudaSharedMemoryUnregisterRequest& request,
+          inference::CudaSharedMemoryUnregisterResponse* response,
+          ::grpc::Status* status) {
+        TRITONSERVER_Error* err = nullptr;
+        if (request.name().empty()) {
+          err = shm_manager_->UnregisterAll(TRITONSERVER_MEMORY_GPU);
+        } else {
+          err =
+              shm_manager_->Unregister(request.name(), TRITONSERVER_MEMORY_GPU);
+        }
+
+        GrpcStatusUtil::Create(status, err);
+        TRITONSERVER_ErrorDelete(err);
+      };
+  const std::pair<std::string, std::string>& restricted_kv =
+      restricted_keys_.Get(RestrictedCategory::SHARED_MEMORY);
+
+  new CommonCallData<
+      ::grpc::ServerAsyncResponseWriter<
+          inference::CudaSharedMemoryUnregisterResponse>,
+      inference::CudaSharedMemoryUnregisterRequest,
+      inference::CudaSharedMemoryUnregisterResponse>(
+      "CudaSharedMemoryUnregister", 0, OnRegisterCudaSharedMemoryUnregister,
+      OnExecuteCudaSharedMemoryUnregister, false /* async */, cq_,
+      restricted_kv, response_delay_);
+}
+
+void
+CommonHandler::RegisterRepositoryIndex()
+{
+  auto OnRegisterRepositoryIndex =
+      [this](
+          ::grpc::ServerContext* ctx,
+          inference::RepositoryIndexRequest* request,
+          ::grpc::ServerAsyncResponseWriter<inference::RepositoryIndexResponse>*
+              responder,
+          void* tag) {
+        this->service_->RequestRepositoryIndex(
+            ctx, request, responder, this->cq_, this->cq_, tag);
+      };
+
+  auto OnExecuteRepositoryIndex =
+      [this](
+          inference::RepositoryIndexRequest& request,
+          inference::RepositoryIndexResponse* response,
+          ::grpc::Status* status) {
+        TRITONSERVER_Error* err = nullptr;
+        if (request.repository_name().empty()) {
+          uint32_t flags = 0;
+          if (request.ready()) {
+            flags |= TRITONSERVER_INDEX_FLAG_READY;
+          }
+
+          TRITONSERVER_Message* model_index_message = nullptr;
+          err = TRITONSERVER_ServerModelIndex(
+              tritonserver_.get(), flags, &model_index_message);
+          GOTO_IF_ERR(err, earlyexit);
+
+          const char* buffer;
+          size_t byte_size;
+          err = TRITONSERVER_MessageSerializeToJson(
+              model_index_message, &buffer, &byte_size);
+          GOTO_IF_ERR(err, earlyexit);
+
+          triton::common::TritonJson::Value model_index_json;
+          err = model_index_json.Parse(buffer, byte_size);
+          GOTO_IF_ERR(err, earlyexit);
+
+          err = model_index_json.AssertType(
+              triton::common::TritonJson::ValueType::ARRAY);
+          GOTO_IF_ERR(err, earlyexit);
+
+          for (size_t idx = 0; idx < model_index_json.ArraySize(); ++idx) {
+            triton::common::TritonJson::Value index_json;
+            err = model_index_json.IndexAsObject(idx, &index_json);
+            GOTO_IF_ERR(err, earlyexit);
+
+            auto model_index = response->add_models();
+
+            const char* name;
+            size_t namelen;
+            err = index_json.MemberAsString("name", &name, &namelen);
+            GOTO_IF_ERR(err, earlyexit);
+            model_index->set_name(std::string(name, namelen));
+
+            if (index_json.Find("version")) {
+              const char* version;
+              size_t versionlen;
+              err = index_json.MemberAsString("version", &version, &versionlen);
+              GOTO_IF_ERR(err, earlyexit);
+              model_index->set_version(std::string(version, versionlen));
+            }
+            if (index_json.Find("state")) {
+              const char* state;
+              size_t statelen;
+              err = index_json.MemberAsString("state", &state, &statelen);
+              GOTO_IF_ERR(err, earlyexit);
+              model_index->set_state(std::string(state, statelen));
+            }
+            if (index_json.Find("reason")) {
+              const char* reason;
+              size_t reasonlen;
+              err = index_json.MemberAsString("reason", &reason, &reasonlen);
+              GOTO_IF_ERR(err, earlyexit);
+              model_index->set_reason(std::string(reason, reasonlen));
+            }
+          }
+
+          TRITONSERVER_MessageDelete(model_index_message);
+        } else {
+          err = TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_UNSUPPORTED,
+              "'repository_name' specification is not supported");
+        }
+
+      earlyexit:
+        GrpcStatusUtil::Create(status, err);
+        TRITONSERVER_ErrorDelete(err);
+      };
+
+  const std::pair<std::string, std::string>& restricted_kv =
+      restricted_keys_.Get(RestrictedCategory::MODEL_REPOSITORY);
+  new CommonCallData<
+      ::grpc::ServerAsyncResponseWriter<inference::RepositoryIndexResponse>,
+      inference::RepositoryIndexRequest, inference::RepositoryIndexResponse>(
+      "RepositoryIndex", 0, OnRegisterRepositoryIndex, OnExecuteRepositoryIndex,
+      false /* async */, cq_, restricted_kv, response_delay_);
+}
+
+void
+CommonHandler::RegisterRepositoryModelLoad()
+{
+  auto OnRegisterRepositoryModelLoad =
+      [this](
+          ::grpc::ServerContext* ctx,
+          inference::RepositoryModelLoadRequest* request,
+          ::grpc::ServerAsyncResponseWriter<
+              inference::RepositoryModelLoadResponse>* responder,
+          void* tag) {
+        this->service_->RequestRepositoryModelLoad(
+            ctx, request, responder, this->cq_, this->cq_, tag);
+      };
+
+  auto OnExecuteRepositoryModelLoad =
+      [this](
+          inference::RepositoryModelLoadRequest& request,
+          inference::RepositoryModelLoadResponse* response,
+          ::grpc::Status* status) {
+        TRITONSERVER_Error* err = nullptr;
+        if (request.repository_name().empty()) {
+          std::vector<TRITONSERVER_Parameter*> params;
+          // WAR for the const-ness check
+          std::vector<const TRITONSERVER_Parameter*> const_params;
+          for (const auto& param_proto : request.parameters()) {
+            if (param_proto.first == "config") {
+              if (param_proto.second.parameter_choice_case() !=
+                  inference::ModelRepositoryParameter::ParameterChoiceCase::
+                      kStringParam) {
+                err = TRITONSERVER_ErrorNew(
+                    TRITONSERVER_ERROR_INVALID_ARG,
+                    (std::string("invalid value type for load parameter '") +
+                     param_proto.first + "', expected string_param.")
+                        .c_str());
+                break;
+              } else {
+                auto param = TRITONSERVER_ParameterNew(
+                    param_proto.first.c_str(), TRITONSERVER_PARAMETER_STRING,
+                    param_proto.second.string_param().c_str());
+                if (param != nullptr) {
+                  params.emplace_back(param);
+                  const_params.emplace_back(param);
+                } else {
+                  err = TRITONSERVER_ErrorNew(
+                      TRITONSERVER_ERROR_INTERNAL,
+                      "unexpected error on creating Triton parameter");
+                  break;
+                }
+              }
+            } else if (param_proto.first.rfind("file:", 0) == 0) {
+              if (param_proto.second.parameter_choice_case() !=
+                  inference::ModelRepositoryParameter::ParameterChoiceCase::
+                      kBytesParam) {
+                err = TRITONSERVER_ErrorNew(
+                    TRITONSERVER_ERROR_INVALID_ARG,
+                    (std::string("invalid value type for load parameter '") +
+                     param_proto.first + "', expected bytes_param.")
+                        .c_str());
+                break;
+              } else {
+                auto param = TRITONSERVER_ParameterBytesNew(
+                    param_proto.first.c_str(),
+                    param_proto.second.bytes_param().data(),
+                    param_proto.second.bytes_param().length());
+                if (param != nullptr) {
+                  params.emplace_back(param);
+                  const_params.emplace_back(param);
+                } else {
+                  err = TRITONSERVER_ErrorNew(
+                      TRITONSERVER_ERROR_INTERNAL,
+                      "unexpected error on creating Triton parameter");
+                  break;
+                }
+              }
+            } else {
+              err = TRITONSERVER_ErrorNew(
+                  TRITONSERVER_ERROR_INVALID_ARG,
+                  (std::string("unrecognized load parameter '") +
+                   param_proto.first + "'.")
+                      .c_str());
+              break;
+            }
+          }
+          if (err == nullptr) {
+            err = TRITONSERVER_ServerLoadModelWithParameters(
+                tritonserver_.get(), request.model_name().c_str(),
+                const_params.data(), const_params.size());
+          }
+          // Assumes no further 'params' access after load API returns
+          for (auto& param : params) {
+            TRITONSERVER_ParameterDelete(param);
+          }
+        } else {
+          err = TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_UNSUPPORTED,
+              "'repository_name' specification is not supported");
+        }
+
+        GrpcStatusUtil::Create(status, err);
+        TRITONSERVER_ErrorDelete(err);
+      };
+
+  const std::pair<std::string, std::string>& restricted_kv =
+      restricted_keys_.Get(RestrictedCategory::MODEL_REPOSITORY);
+  new CommonCallData<
+      ::grpc::ServerAsyncResponseWriter<inference::RepositoryModelLoadResponse>,
+      inference::RepositoryModelLoadRequest,
+      inference::RepositoryModelLoadResponse>(
+      "RepositoryModelLoad", 0, OnRegisterRepositoryModelLoad,
+      OnExecuteRepositoryModelLoad, true /* async */, cq_, restricted_kv,
+      response_delay_);
+}
+
+void
+CommonHandler::RegisterRepositoryModelUnload()
+{
+  auto OnRegisterRepositoryModelUnload =
+      [this](
+          ::grpc::ServerContext* ctx,
+          inference::RepositoryModelUnloadRequest* request,
+          ::grpc::ServerAsyncResponseWriter<
+              inference::RepositoryModelUnloadResponse>* responder,
+          void* tag) {
+        this->service_->RequestRepositoryModelUnload(
+            ctx, request, responder, this->cq_, this->cq_, tag);
+      };
+
+  auto OnExecuteRepositoryModelUnload =
+      [this](
+          inference::RepositoryModelUnloadRequest& request,
+          inference::RepositoryModelUnloadResponse* response,
+          ::grpc::Status* status) {
+        TRITONSERVER_Error* err = nullptr;
+        if (request.repository_name().empty()) {
+          // Check if the dependent models should be removed
+          bool unload_dependents = false;
+          for (auto param : request.parameters()) {
+            if (param.first.compare("unload_dependents") == 0) {
+              const auto& unload_param = param.second;
+              if (unload_param.parameter_choice_case() !=
+                  inference::ModelRepositoryParameter::ParameterChoiceCase::
+                      kBoolParam) {
+                err = TRITONSERVER_ErrorNew(
+                    TRITONSERVER_ERROR_INVALID_ARG,
+                    "invalid value type for 'unload_dependents' parameter, "
+                    "expected "
+                    "bool_param.");
+              }
+              unload_dependents = unload_param.bool_param();
+              break;
+            }
+          }
+          if (err == nullptr) {
+            if (unload_dependents) {
+              err = TRITONSERVER_ServerUnloadModelAndDependents(
+                  tritonserver_.get(), request.model_name().c_str());
+            } else {
+              err = TRITONSERVER_ServerUnloadModel(
+                  tritonserver_.get(), request.model_name().c_str());
+            }
+          }
+        } else {
+          err = TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_UNSUPPORTED,
+              "'repository_name' specification is not supported");
+        }
+
+        GrpcStatusUtil::Create(status, err);
+        TRITONSERVER_ErrorDelete(err);
+      };
+
+  const std::pair<std::string, std::string>& restricted_kv =
+      restricted_keys_.Get(RestrictedCategory::MODEL_REPOSITORY);
+  new CommonCallData<
+      ::grpc::ServerAsyncResponseWriter<
+          inference::RepositoryModelUnloadResponse>,
+      inference::RepositoryModelUnloadRequest,
+      inference::RepositoryModelUnloadResponse>(
+      "RepositoryModelUnload", 0, OnRegisterRepositoryModelUnload,
+      OnExecuteRepositoryModelUnload, true /* async */, cq_, restricted_kv,
+      response_delay_);
+}
+
+}  // namespace
+
+//
+// Server
+//
+Server::Server(
+    const std::shared_ptr<TRITONSERVER_Server>& tritonserver,
+    triton::server::TraceManager* trace_manager,
+    const std::shared_ptr<SharedMemoryManager>& shm_manager,
+    const Options& options)
+    : tritonserver_(tritonserver), trace_manager_(trace_manager),
+      shm_manager_(shm_manager), server_addr_(
+                                     options.socket_.address_ + ":" +
+                                     std::to_string(options.socket_.port_))
+{
+  std::shared_ptr<::grpc::ServerCredentials> credentials;
+  const auto& ssl_options = options.ssl_;
+  if (ssl_options.use_ssl_) {
+    std::string key;
+    std::string cert;
+    std::string root;
+    ReadFile(ssl_options.server_cert_, cert);
+    ReadFile(ssl_options.server_key_, key);
+    ReadFile(ssl_options.root_cert_, root);
+    ::grpc::SslServerCredentialsOptions::PemKeyCertPair keycert = {key, cert};
+    ::grpc::SslServerCredentialsOptions sslOpts;
+    sslOpts.pem_root_certs = root;
+    sslOpts.pem_key_cert_pairs.push_back(keycert);
+    if (ssl_options.use_mutual_auth_) {
+      sslOpts.client_certificate_request =
+          GRPC_SSL_REQUEST_AND_REQUIRE_CLIENT_CERTIFICATE_AND_VERIFY;
+    }
+    credentials = ::grpc::SslServerCredentials(sslOpts);
+  } else {
+    credentials = ::grpc::InsecureServerCredentials();
+  }
+
+  builder_.AddListeningPort(server_addr_, credentials, &bound_port_);
+  builder_.SetMaxMessageSize(MAX_GRPC_MESSAGE_SIZE);
+  builder_.RegisterService(&service_);
+  builder_.RegisterService(&health_service_);
+  builder_.AddChannelArgument(
+      GRPC_ARG_ALLOW_REUSEPORT, options.socket_.reuse_port_);
+
+  {
+    // GRPC KeepAlive Docs:
+    // https://grpc.github.io/grpc/cpp/md_doc_keepalive.html NOTE: In order to
+    // work properly, the client-side settings should be in agreement with
+    // server-side settings.
+    const auto& keepalive_options = options.keep_alive_;
+    builder_.AddChannelArgument(
+        GRPC_ARG_KEEPALIVE_TIME_MS, keepalive_options.keepalive_time_ms_);
+    builder_.AddChannelArgument(
+        GRPC_ARG_KEEPALIVE_TIMEOUT_MS, keepalive_options.keepalive_timeout_ms_);
+    builder_.AddChannelArgument(
+        GRPC_ARG_KEEPALIVE_PERMIT_WITHOUT_CALLS,
+        keepalive_options.keepalive_permit_without_calls_);
+    builder_.AddChannelArgument(
+        GRPC_ARG_HTTP2_MAX_PINGS_WITHOUT_DATA,
+        keepalive_options.http2_max_pings_without_data_);
+    builder_.AddChannelArgument(
+        GRPC_ARG_HTTP2_MIN_RECV_PING_INTERVAL_WITHOUT_DATA_MS,
+        keepalive_options.http2_min_recv_ping_interval_without_data_ms_);
+    builder_.AddChannelArgument(
+        GRPC_ARG_HTTP2_MAX_PING_STRIKES,
+        keepalive_options.http2_max_ping_strikes_);
+    if (keepalive_options.max_connection_age_ms_ != 0) {
+      builder_.AddChannelArgument(
+          GRPC_ARG_MAX_CONNECTION_AGE_MS,
+          keepalive_options.max_connection_age_ms_);
+    }
+    if (keepalive_options.max_connection_age_grace_ms_ != 0) {
+      builder_.AddChannelArgument(
+          GRPC_ARG_MAX_CONNECTION_AGE_GRACE_MS,
+          keepalive_options.max_connection_age_grace_ms_);
+    }
+
+    std::vector<std::string> headers{"GRPC KeepAlive Option", "Value"};
+    triton::common::TablePrinter table_printer(headers);
+    std::vector<std::string> row{
+        "keepalive_time_ms",
+        std::to_string(keepalive_options.keepalive_time_ms_)};
+    table_printer.InsertRow(row);
+
+    row = {
+        "keepalive_timeout_ms",
+        std::to_string(keepalive_options.keepalive_timeout_ms_)};
+    table_printer.InsertRow(row);
+
+    row = {
+        "keepalive_permit_without_calls",
+        std::to_string(keepalive_options.keepalive_permit_without_calls_)};
+    table_printer.InsertRow(row);
+
+    row = {
+        "http2_max_pings_without_data",
+        std::to_string(keepalive_options.http2_max_pings_without_data_)};
+    table_printer.InsertRow(row);
+
+    row = {
+        "http2_min_recv_ping_interval_without_data_ms",
+        std::to_string(
+            keepalive_options.http2_min_recv_ping_interval_without_data_ms_)};
+    table_printer.InsertRow(row);
+
+    row = {
+        "http2_max_ping_strikes",
+        std::to_string(keepalive_options.http2_max_ping_strikes_)};
+    table_printer.InsertRow(row);
+
+    if (keepalive_options.max_connection_age_ms_ != 0) {
+      row = {
+          "max_connection_age_ms",
+          std::to_string(keepalive_options.max_connection_age_ms_)};
+      table_printer.InsertRow(row);
+    }
+
+    if (keepalive_options.max_connection_age_grace_ms_ != 0) {
+      row = {
+          "max_connection_age_grace_ms",
+          std::to_string(keepalive_options.max_connection_age_grace_ms_)};
+      table_printer.InsertRow(row);
+    }
+    LOG_VERBOSE(1) << table_printer.PrintTable();
+  }
+
+  common_cq_ = builder_.AddCompletionQueue();
+  model_infer_cq_ = builder_.AddCompletionQueue();
+  model_stream_infer_cq_ = builder_.AddCompletionQueue();
+
+  // For testing purposes only, add artificial delay in grpc responses.
+  const char* dstr = getenv("TRITONSERVER_SERVER_DELAY_GRPC_RESPONSE_SEC");
+  uint64_t response_delay = 0;
+  if (dstr != nullptr) {
+    response_delay = atoi(dstr);
+  }
+  // A common Handler for other non-inference requests
+  common_handler_.reset(new CommonHandler(
+      "CommonHandler", tritonserver_, shm_manager_, trace_manager_, &service_,
+      &health_service_, common_cq_.get(), options.restricted_protocols_,
+      response_delay));
+
+  // [FIXME] "register" logic is different for infer
+  // Handler for model inference requests.
+  std::pair<std::string, std::string> restricted_kv =
+      options.restricted_protocols_.Get(RestrictedCategory::INFERENCE);
+  for (int i = 0; i < REGISTER_GRPC_INFER_THREAD_COUNT; ++i) {
+    model_infer_handlers_.emplace_back(new ModelInferHandler(
+        "ModelInferHandler", tritonserver_, trace_manager_, shm_manager_,
+        &service_, model_infer_cq_.get(),
+        options.infer_allocation_pool_size_ /* max_state_bucket_count */,
+        options.infer_compression_level_, restricted_kv,
+        options.forward_header_pattern_));
+  }
+
+  // Handler for streaming inference requests. Keeps one handler for streaming
+  // to avoid possible concurrent writes which is not allowed
+  model_stream_infer_handlers_.emplace_back(new ModelStreamInferHandler(
+      "ModelStreamInferHandler", tritonserver_, trace_manager_, shm_manager_,
+      &service_, model_stream_infer_cq_.get(),
+      options.infer_allocation_pool_size_ /* max_state_bucket_count */,
+      options.infer_compression_level_, restricted_kv,
+      options.forward_header_pattern_));
+}
+
+Server::~Server()
+{
+  IGNORE_ERR(Stop());
+}
+
+TRITONSERVER_Error*
+Server::Create(
+    const std::shared_ptr<TRITONSERVER_Server>& tritonserver,
+    triton::server::TraceManager* trace_manager,
+    const std::shared_ptr<SharedMemoryManager>& shm_manager,
+    const Options& server_options, std::unique_ptr<Server>* server)
+{
+  const std::string addr = server_options.socket_.address_ + ":" +
+                           std::to_string(server_options.socket_.port_);
+  try {
+    server->reset(
+        new Server(tritonserver, trace_manager, shm_manager, server_options));
+  }
+  catch (const std::invalid_argument& pe) {
+    return TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INVALID_ARG, pe.what());
+    ;
+  }
+
+  return nullptr;  // success
+}
+
+TRITONSERVER_Error*
+Server::Start()
+{
+  if (running_) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_ALREADY_EXISTS, "GRPC server is already running.");
+  }
+
+  server_ = builder_.BuildAndStart();
+  // Check if binding port failed
+  if (bound_port_ == 0) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_UNAVAILABLE,
+        (std::string("Socket '") + server_addr_ + "' already in use ").c_str());
+  }
+
+  common_handler_->Start();
+  for (auto& model_infer_handler : model_infer_handlers_) {
+    model_infer_handler->Start();
+  }
+  for (auto& model_stream_infer_handler : model_stream_infer_handlers_) {
+    model_stream_infer_handler->Start();
+  }
+
+  running_ = true;
+  LOG_INFO << "Started GRPCInferenceService at " << server_addr_;
+  return nullptr;  // success
+}
+
+TRITONSERVER_Error*
+Server::Stop()
+{
+  if (!running_) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_UNAVAILABLE, "GRPC server is not running.");
+  }
+
+  // Always shutdown the completion queue after the server.
+  server_->Shutdown();
+
+  common_cq_->Shutdown();
+  model_infer_cq_->Shutdown();
+  model_stream_infer_cq_->Shutdown();
+
+  // Must stop all handlers explicitly to wait for all the handler
+  // threads to join since they are referencing completion queue, etc.
+  common_handler_->Stop();
+  for (auto& model_infer_handler : model_infer_handlers_) {
+    model_infer_handler->Stop();
+  }
+  for (auto& model_stream_infer_handler : model_stream_infer_handlers_) {
+    model_stream_infer_handler->Stop();
+  }
+
+  running_ = false;
+  return nullptr;  // success
+}
+
+}}}  // namespace triton::server::grpc
diff --git a/src/grpc/grpc_server.h b/src/grpc/grpc_server.h
new file mode 100644
index 0000000000..8a38cdd4fe
--- /dev/null
+++ b/src/grpc/grpc_server.h
@@ -0,0 +1,139 @@
+// Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#pragma once
+
+#include <grpc++/grpc++.h>
+
+#include <vector>
+
+#include "../restricted_features.h"
+#include "../shared_memory_manager.h"
+#include "../tracer.h"
+#include "grpc_handler.h"
+#include "grpc_service.grpc.pb.h"
+#include "grpc_utils.h"
+#include "health.grpc.pb.h"
+#include "infer_handler.h"
+#include "stream_infer_handler.h"
+#include "triton/core/tritonserver.h"
+
+namespace triton { namespace server { namespace grpc {
+
+// GRPC uses HTTP2 which requires header to be in lowercase, so the Triton
+// specific header that may be set for GRPC is defined to be all lowercases
+constexpr char kRestrictedProtocolHeaderTemplate[] = "triton-grpc-protocol-";
+
+struct SocketOptions {
+  std::string address_{"0.0.0.0"};
+  int32_t port_{8001};
+  bool reuse_port_{false};
+};
+
+struct SslOptions {
+  // Whether SSL is used for communication
+  bool use_ssl_{false};
+  // File holding PEM-encoded server certificate
+  std::string server_cert_{""};
+  // File holding PEM-encoded server key
+  std::string server_key_{""};
+  // File holding PEM-encoded root certificate
+  std::string root_cert_{""};
+  // Whether to use Mutual Authentication
+  bool use_mutual_auth_{false};
+};
+
+// GRPC KeepAlive: https://grpc.github.io/grpc/cpp/md_doc_keepalive.html
+// https://grpc.io/docs/guides/keepalive/
+struct KeepAliveOptions {
+  int keepalive_time_ms_{7200000};
+  int keepalive_timeout_ms_{20000};
+  bool keepalive_permit_without_calls_{false};
+  int http2_max_pings_without_data_{2};
+  int http2_min_recv_ping_interval_without_data_ms_{300000};
+  int http2_max_ping_strikes_{2};
+  int max_connection_age_ms_{0};
+  int max_connection_age_grace_ms_{0};
+};
+
+struct Options {
+  SocketOptions socket_;
+  SslOptions ssl_;
+  KeepAliveOptions keep_alive_;
+  grpc_compression_level infer_compression_level_{GRPC_COMPRESS_LEVEL_NONE};
+  // The maximum number of inference request/response objects that
+  // remain allocated for reuse. As long as the number of in-flight
+  // requests doesn't exceed this value there will be no
+  // allocation/deallocation of request/response objects.
+  int infer_allocation_pool_size_{8};
+  RestrictedFeatures restricted_protocols_;
+  std::string forward_header_pattern_;
+};
+
+class Server {
+ public:
+  static TRITONSERVER_Error* Create(
+      const std::shared_ptr<TRITONSERVER_Server>& tritonserver,
+      triton::server::TraceManager* trace_manager,
+      const std::shared_ptr<SharedMemoryManager>& shm_manager,
+      const Options& server_options, std::unique_ptr<Server>* server);
+
+  ~Server();
+
+  TRITONSERVER_Error* Start();
+  TRITONSERVER_Error* Stop();
+
+ private:
+  Server(
+      const std::shared_ptr<TRITONSERVER_Server>& tritonserver,
+      triton::server::TraceManager* trace_manager,
+      const std::shared_ptr<SharedMemoryManager>& shm_manager,
+      const Options& server_options);
+
+  std::shared_ptr<TRITONSERVER_Server> tritonserver_;
+  TraceManager* trace_manager_;
+  std::shared_ptr<SharedMemoryManager> shm_manager_;
+  const std::string server_addr_;
+
+  ::grpc::ServerBuilder builder_;
+
+  inference::GRPCInferenceService::AsyncService service_;
+  ::grpc::health::v1::Health::AsyncService health_service_;
+
+  std::unique_ptr<::grpc::Server> server_;
+
+  std::unique_ptr<::grpc::ServerCompletionQueue> common_cq_;
+  std::unique_ptr<::grpc::ServerCompletionQueue> model_infer_cq_;
+  std::unique_ptr<::grpc::ServerCompletionQueue> model_stream_infer_cq_;
+
+  std::unique_ptr<HandlerBase> common_handler_;
+  std::vector<std::unique_ptr<HandlerBase>> model_infer_handlers_;
+  std::vector<std::unique_ptr<HandlerBase>> model_stream_infer_handlers_;
+
+  int bound_port_{0};
+  bool running_{false};
+};
+
+}}}  // namespace triton::server::grpc
diff --git a/src/grpc/grpc_utils.cc b/src/grpc/grpc_utils.cc
new file mode 100644
index 0000000000..4589899441
--- /dev/null
+++ b/src/grpc/grpc_utils.cc
@@ -0,0 +1,160 @@
+// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "grpc_utils.h"
+
+namespace triton { namespace server { namespace grpc {
+
+std::ostream&
+operator<<(std::ostream& out, const Steps& step)
+{
+  switch (step) {
+    case START:
+      out << "START";
+      break;
+    case COMPLETE:
+      out << "COMPLETE";
+      break;
+    case FINISH:
+      out << "FINISH";
+      break;
+    case ISSUED:
+      out << "ISSUED";
+      break;
+    case READ:
+      out << "READ";
+      break;
+    case WRITEREADY:
+      out << "WRITEREADY";
+      break;
+    case WRITTEN:
+      out << "WRITTEN";
+      break;
+    case WAITING_NOTIFICATION:
+      out << "WAITING_NOTIFICATION";
+      break;
+    case CANCELLATION_ISSUED:
+      out << "CANCELLATION_ISSUED";
+      break;
+    case CANCELLED:
+      out << "CANCELLED";
+      break;
+    case PARTIAL_COMPLETION:
+      out << "PARTIAL_COMPLETION";
+      break;
+  }
+
+  return out;
+}
+
+void
+GrpcStatusUtil::Create(::grpc::Status* status, TRITONSERVER_Error* err)
+{
+  if (err == nullptr) {
+    *status = ::grpc::Status::OK;
+  } else {
+    *status = ::grpc::Status(
+        GrpcStatusUtil::CodeToStatus(TRITONSERVER_ErrorCode(err)),
+        TRITONSERVER_ErrorMessage(err));
+  }
+}
+
+::grpc::StatusCode
+GrpcStatusUtil::CodeToStatus(TRITONSERVER_Error_Code code)
+{
+  // GRPC status codes:
+  // https://github.com/grpc/grpc/blob/master/include/grpc/impl/codegen/status.h
+  switch (code) {
+    case TRITONSERVER_ERROR_UNKNOWN:
+      return ::grpc::StatusCode::UNKNOWN;
+    case TRITONSERVER_ERROR_INTERNAL:
+      return ::grpc::StatusCode::INTERNAL;
+    case TRITONSERVER_ERROR_NOT_FOUND:
+      return ::grpc::StatusCode::NOT_FOUND;
+    case TRITONSERVER_ERROR_INVALID_ARG:
+      return ::grpc::StatusCode::INVALID_ARGUMENT;
+    case TRITONSERVER_ERROR_UNAVAILABLE:
+      return ::grpc::StatusCode::UNAVAILABLE;
+    case TRITONSERVER_ERROR_UNSUPPORTED:
+      return ::grpc::StatusCode::UNIMPLEMENTED;
+    case TRITONSERVER_ERROR_ALREADY_EXISTS:
+      return ::grpc::StatusCode::ALREADY_EXISTS;
+    case TRITONSERVER_ERROR_CANCELLED:
+      return ::grpc::StatusCode::CANCELLED;
+  }
+
+  return ::grpc::StatusCode::UNKNOWN;
+}
+
+TRITONSERVER_Error*
+ParseClassificationParams(
+    const inference::ModelInferRequest::InferRequestedOutputTensor& output,
+    bool* has_classification, uint32_t* classification_count)
+{
+  *has_classification = false;
+
+  const auto& class_it = output.parameters().find("classification");
+  if (class_it != output.parameters().end()) {
+    *has_classification = true;
+
+    const auto& param = class_it->second;
+    if (param.parameter_choice_case() !=
+        inference::InferParameter::ParameterChoiceCase::kInt64Param) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG,
+          "invalid value type for 'classification' parameter, expected "
+          "int64_param");
+    }
+
+    const int64_t cnt = param.int64_param();
+    if (cnt <= 0) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG,
+          "invalid value for 'classification' parameter, expected >= 0");
+    }
+
+    *classification_count = cnt;
+  }
+
+  return nullptr;  // success
+}
+
+void
+ReadFile(const std::string& filename, std::string& data)
+{
+  data.clear();
+  if (!filename.empty()) {
+    std::ifstream file(filename.c_str(), std::ios::in);
+    if (file.is_open()) {
+      std::stringstream ss;
+      ss << file.rdbuf();
+      file.close();
+      data = ss.str();
+    }
+  }
+}
+
+}}}  // namespace triton::server::grpc
diff --git a/src/grpc/grpc_utils.h b/src/grpc/grpc_utils.h
new file mode 100644
index 0000000000..898e4acb4f
--- /dev/null
+++ b/src/grpc/grpc_utils.h
@@ -0,0 +1,187 @@
+// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#pragma once
+
+#include <list>
+#include <memory>
+#include <unordered_map>
+
+#include "../classification.h"
+#include "../common.h"
+#include "../shared_memory_manager.h"
+#include "grpc_service.grpc.pb.h"
+#include "triton/common/logging.h"
+#include "triton/core/tritonserver.h"
+
+namespace triton { namespace server { namespace grpc {
+
+// The step of processing that the state is in. Every state must
+// recognize START, COMPLETE and FINISH and the others are optional.
+typedef enum {
+  // This marks the starting stage of the RPC
+  START,
+  // This marks that RPC is complete.
+  COMPLETE,
+  // This marks the stage where all the notifications from the gRPC
+  // completion queue is received and state can be safely released.
+  FINISH,
+  // This stage means that RPC has been issued to Triton for inference
+  // and is waiting for the server callbacks or cancellation to be
+  // invoked.
+  ISSUED,
+  // This stage means the request has been read from the network and
+  // can be sent to Triton for execution.
+  READ,
+  // This stage means that the response is ready to be written back to
+  // the network.
+  WRITEREADY,
+  // This stage means that response has been written completely to the
+  // network.
+  WRITTEN,
+  // This marks the special stage for the state object to differentiate
+  // the tag delivered from AsyncNotifyWhenDone() method.
+  WAITING_NOTIFICATION,
+  // This stage means that the cancellation for the RPC has been issued
+  // to the server.
+  CANCELLATION_ISSUED,
+  // This stage marks that the state has been successfully cancelled.
+  CANCELLED,
+  // This is intermediary stage where the state has been been partially
+  // completed by grpc responder Finish call or AsyncNotifyWhenDone()
+  // notification. The other next call will move the stage to fully
+  // complete.
+  PARTIAL_COMPLETION
+} Steps;
+
+// Debugging helper
+std::ostream& operator<<(std::ostream& out, const Steps& step);
+
+//
+// GrpcStatusUtil
+//
+class GrpcStatusUtil {
+ public:
+  static void Create(::grpc::Status* status, TRITONSERVER_Error* err);
+  static ::grpc::StatusCode CodeToStatus(TRITONSERVER_Error_Code code);
+};
+
+template <typename TensorType>
+TRITONSERVER_Error*
+ParseSharedMemoryParams(
+    const TensorType& tensor, bool* has_shared_memory, std::string* region_name,
+    int64_t* offset, size_t* byte_size)
+{
+  *has_shared_memory = false;
+  *offset = 0 /* default value */;
+  const auto& region_it = tensor.parameters().find("shared_memory_region");
+  if (region_it != tensor.parameters().end()) {
+    *has_shared_memory = true;
+    const auto& infer_param = region_it->second;
+    if (infer_param.parameter_choice_case() !=
+        inference::InferParameter::ParameterChoiceCase::kStringParam) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG,
+          std::string(
+              "invalid value type for 'shared_memory_region' parameter for "
+              "tensor '" +
+              tensor.name() + "', expected string_param.")
+              .c_str());
+    }
+    *region_name = infer_param.string_param();
+  }
+
+  const auto& offset_it = tensor.parameters().find("shared_memory_offset");
+  if (offset_it != tensor.parameters().end()) {
+    if (!*has_shared_memory) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG,
+          std::string(
+              "'shared_memory_offset' can not be specified without "
+              "'shared_memory_region' parameter for tensor '" +
+              tensor.name() + "'")
+              .c_str());
+    }
+    const auto& infer_param = offset_it->second;
+    if (infer_param.parameter_choice_case() !=
+        inference::InferParameter::ParameterChoiceCase::kInt64Param) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG,
+          std::string(
+              "invalid value type for 'shared_memory_offset' parameter for "
+              "tensor '" +
+              tensor.name() + "', expected int64_param.")
+              .c_str());
+    }
+    *offset = infer_param.int64_param();
+  }
+
+  const auto& bs_it = tensor.parameters().find("shared_memory_byte_size");
+  if (bs_it != tensor.parameters().end()) {
+    if (!*has_shared_memory) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG,
+          std::string(
+              "'shared_memory_byte_size' can not be specified without "
+              "'shared_memory_region' parameter for tensor '" +
+              tensor.name() + "'")
+              .c_str());
+    }
+    const auto& infer_param = bs_it->second;
+    if (infer_param.parameter_choice_case() !=
+        inference::InferParameter::ParameterChoiceCase::kInt64Param) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG,
+          std::string(
+              "invalid value type for 'shared_memory_byte_size' parameter "
+              "for "
+              "tensor '" +
+              tensor.name() + "', expected int64_param.")
+              .c_str());
+    }
+    *byte_size = infer_param.int64_param();
+  } else {
+    if (*has_shared_memory) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG,
+          std::string(
+              "'shared_memory_byte_size' must be specified along with "
+              "'shared_memory_region' parameter for tensor '" +
+              tensor.name() + "'")
+              .c_str());
+    }
+  }
+
+  return nullptr;
+}
+
+TRITONSERVER_Error* ParseClassificationParams(
+    const inference::ModelInferRequest::InferRequestedOutputTensor& output,
+    bool* has_classification, uint32_t* classification_count);
+
+
+void ReadFile(const std::string& filename, std::string& data);
+
+}}}  // namespace triton::server::grpc
diff --git a/src/grpc/infer_handler.cc b/src/grpc/infer_handler.cc
new file mode 100644
index 0000000000..5c60fb46ed
--- /dev/null
+++ b/src/grpc/infer_handler.cc
@@ -0,0 +1,1079 @@
+// Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "infer_handler.h"
+
+#ifndef NDEBUG
+uint64_t
+NextUniqueId()
+{
+  static std::atomic<uint64_t> id(0);
+  return ++id;
+}
+#endif  // NDEBUG
+
+namespace triton { namespace server { namespace grpc {
+
+TRITONSERVER_Error*
+OutputBufferAttributesHelper(
+    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
+    const TensorShmMap& shm_map,
+    TRITONSERVER_BufferAttributes* buffer_attributes)
+{
+  // We only need to set the cuda ipc handle here. The rest of the buffer
+  // attributes have been properly populated by triton core.
+  if (tensor_name != nullptr) {
+    const auto& pr = shm_map.find(tensor_name);
+
+    if (pr != shm_map.end()) {
+      if (pr->second.memory_type_ == TRITONSERVER_MEMORY_GPU) {
+        RETURN_IF_ERR(TRITONSERVER_BufferAttributesSetCudaIpcHandle(
+            buffer_attributes, pr->second.cuda_ipc_handle_));
+      }
+    }
+  }
+
+  return nullptr;  // Success
+}
+
+TRITONSERVER_Error*
+OutputBufferQueryHelper(
+    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
+    size_t* byte_size, const TensorShmMap& shm_map,
+    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id)
+{
+  // Check if shared memory is used if named tensor is provided
+  if (tensor_name != nullptr) {
+    const auto& pr = shm_map.find(tensor_name);
+    if (pr != shm_map.end()) {
+      // The output is in shared memory so check that shared memory
+      // size is at least large enough for the output, if byte size is provided
+      if ((byte_size != nullptr) && (*byte_size > pr->second.byte_size_)) {
+        // Don't return error yet and just set to the default properties for
+        // GRPC buffer, error will be raised when allocation happens
+        *memory_type = TRITONSERVER_MEMORY_CPU;
+        *memory_type_id = 0;
+      } else {
+        *memory_type = pr->second.memory_type_;
+        *memory_type_id = pr->second.memory_type_id_;
+      }
+      return nullptr;  // Success
+    }
+  }
+
+  // Not using shared memory so a buffer created directly in
+  // the response protobuf will be used, and the type will be CPU.
+  *memory_type = TRITONSERVER_MEMORY_CPU;
+  *memory_type_id = 0;
+  return nullptr;  // Success
+}
+
+// Make sure to keep InferResponseAlloc and OutputBufferQuery logic in sync
+TRITONSERVER_Error*
+InferResponseAlloc(
+    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
+    size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
+    int64_t preferred_memory_type_id, void* userp, void** buffer,
+    void** buffer_userp, TRITONSERVER_MemoryType* actual_memory_type,
+    int64_t* actual_memory_type_id)
+{
+  AllocPayload<inference::ModelInferResponse>* payload =
+      reinterpret_cast<AllocPayload<inference::ModelInferResponse>*>(userp);
+
+  // ModelInfer RPC expects exactly one response per request. Hence,
+  // will be creating and using just one response object.
+  inference::ModelInferResponse* response =
+      payload->response_queue_->GetNonDecoupledResponse();
+  return ResponseAllocatorHelper(
+      allocator, tensor_name, byte_size, preferred_memory_type,
+      preferred_memory_type_id, response, payload->shm_map_, buffer,
+      buffer_userp, actual_memory_type, actual_memory_type_id);
+}
+
+// Make sure to keep InferResponseAlloc and OutputBufferQuery logic in sync
+TRITONSERVER_Error*
+OutputBufferQuery(
+    TRITONSERVER_ResponseAllocator* allocator, void* userp,
+    const char* tensor_name, size_t* byte_size,
+    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id)
+{
+  AllocPayload<inference::ModelInferResponse>* payload =
+      reinterpret_cast<AllocPayload<inference::ModelInferResponse>*>(userp);
+
+  return OutputBufferQueryHelper(
+      allocator, tensor_name, byte_size, payload->shm_map_, memory_type,
+      memory_type_id);
+}
+
+// Make sure to keep InferResponseAlloc, OutputBufferQuery, and
+// OutputBufferAttributes logic in sync
+TRITONSERVER_Error*
+OutputBufferAttributes(
+    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
+    TRITONSERVER_BufferAttributes* buffer_attributes, void* userp,
+    void* buffer_userp)
+{
+  AllocPayload<inference::ModelInferResponse>* payload =
+      reinterpret_cast<AllocPayload<inference::ModelInferResponse>*>(userp);
+
+  return OutputBufferAttributesHelper(
+      allocator, tensor_name, payload->shm_map_, buffer_attributes);
+  return nullptr;  // Success
+}
+
+TRITONSERVER_Error*
+InferResponseFree(
+    TRITONSERVER_ResponseAllocator* allocator, void* buffer, void* buffer_userp,
+    size_t byte_size, TRITONSERVER_MemoryType memory_type,
+    int64_t memory_type_id)
+{
+  LOG_VERBOSE(1) << "GRPC free: "
+                 << "size " << byte_size << ", addr " << buffer;
+
+  // Don't do anything when releasing a buffer since InferResponseAlloc
+  // wrote directly into the response protobuf.
+  return nullptr;  // Success
+}
+
+TRITONSERVER_Error* InferGRPCToInputHelper(
+    const std::string& input_name, const std::string& model_name,
+    const TRITONSERVER_DataType tensor_dt, const TRITONSERVER_DataType input_dt,
+    const size_t binary_data_byte_size);
+
+TRITONSERVER_Error* InferGRPCToInput(
+    const std::shared_ptr<TRITONSERVER_Server>& tritonserver,
+    const std::shared_ptr<SharedMemoryManager>& shm_manager,
+    const inference::ModelInferRequest& request,
+    std::list<std::string>* serialized_data,
+    TRITONSERVER_InferenceRequest* inference_request);
+
+TRITONSERVER_Error*
+InferGRPCToInputHelper(
+    const std::string& input_name, const std::string& model_name,
+    const TRITONSERVER_DataType tensor_dt, const TRITONSERVER_DataType input_dt,
+    const size_t binary_data_byte_size)
+{
+  if (binary_data_byte_size != 0) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INVALID_ARG,
+        std::string(
+            "unexpected explicit tensor data for input tensor '" + input_name +
+            "' for model '" + model_name +
+            "', binary data was already supplied.")
+            .c_str());
+  }
+
+  if (tensor_dt != input_dt) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INVALID_ARG,
+        std::string(
+            "unexpected explicit tensor data for input tensor '" + input_name +
+            "' for model '" + model_name + "' of type '" +
+            TRITONSERVER_DataTypeString(tensor_dt) + "', expected datatype '" +
+            TRITONSERVER_DataTypeString(input_dt) + "'")
+            .c_str());
+  }
+
+  return nullptr;  // success
+}
+
+TRITONSERVER_Error*
+InferResponseStart(TRITONSERVER_ResponseAllocator* allocator, void* userp)
+{
+  AllocPayload<inference::ModelInferResponse>* payload =
+      reinterpret_cast<AllocPayload<inference::ModelInferResponse>*>(userp);
+
+  // ModelInfer RPC expects exactly one response per request. Hence, always call
+  // GetNonDecoupledResponse() to create one response object on response start.
+  payload->response_queue_->GetNonDecoupledResponse();
+
+  return nullptr;  // success
+}
+
+TRITONSERVER_Error*
+SetInferenceRequestMetadata(
+    TRITONSERVER_InferenceRequest* inference_request,
+    const inference::ModelInferRequest& request, StateParameters& state_params)
+{
+  RETURN_IF_ERR(TRITONSERVER_InferenceRequestSetId(
+      inference_request, request.id().c_str()));
+
+  uint32_t flags = 0;
+  for (auto param : request.parameters()) {
+    if (param.first.compare("sequence_id") == 0) {
+      const auto& infer_param = param.second;
+      if (infer_param.parameter_choice_case() ==
+          inference::InferParameter::ParameterChoiceCase::kInt64Param) {
+        RETURN_IF_ERR(TRITONSERVER_InferenceRequestSetCorrelationId(
+            inference_request, infer_param.int64_param()));
+      } else if (
+          infer_param.parameter_choice_case() ==
+          inference::InferParameter::ParameterChoiceCase::kStringParam) {
+        RETURN_IF_ERR(TRITONSERVER_InferenceRequestSetCorrelationIdString(
+            inference_request, infer_param.string_param().c_str()));
+      } else {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG,
+            "invalid value type for 'sequence_id' parameter, expected "
+            "int64_param or string_param.");
+      }
+    } else if (param.first.compare("sequence_start") == 0) {
+      const auto& infer_param = param.second;
+      if (infer_param.parameter_choice_case() !=
+          inference::InferParameter::ParameterChoiceCase::kBoolParam) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG,
+            "invalid value type for 'sequence_start' parameter, expected "
+            "bool_param.");
+      }
+      if (infer_param.bool_param()) {
+        flags |= TRITONSERVER_REQUEST_FLAG_SEQUENCE_START;
+      }
+    } else if (param.first.compare("sequence_end") == 0) {
+      const auto& infer_param = param.second;
+      if (infer_param.parameter_choice_case() !=
+          inference::InferParameter::ParameterChoiceCase::kBoolParam) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG,
+            "invalid value type for 'sequence_end' parameter, expected "
+            "bool_param.");
+      }
+      if (infer_param.bool_param()) {
+        flags |= TRITONSERVER_REQUEST_FLAG_SEQUENCE_END;
+      }
+    } else if (param.first.compare("priority") == 0) {
+      const auto& infer_param = param.second;
+      if (infer_param.parameter_choice_case() ==
+          inference::InferParameter::ParameterChoiceCase::kInt64Param) {
+        if (infer_param.int64_param() < 0) {
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INVALID_ARG,
+              "invalid value for 'priority', expected value >= 0.");
+        }
+        RETURN_IF_ERR(TRITONSERVER_InferenceRequestSetPriorityUInt64(
+            inference_request, infer_param.int64_param()));
+      } else if (
+          infer_param.parameter_choice_case() ==
+          inference::InferParameter::ParameterChoiceCase::kUint64Param) {
+        RETURN_IF_ERR(TRITONSERVER_InferenceRequestSetPriorityUInt64(
+            inference_request, infer_param.uint64_param()));
+      } else {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG,
+            "invalid value type for 'priority' parameter, expected "
+            "int64_param or uint64_param.");
+      }
+    } else if (param.first.compare("timeout") == 0) {
+      const auto& infer_param = param.second;
+      if (infer_param.parameter_choice_case() !=
+          inference::InferParameter::ParameterChoiceCase::kInt64Param) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG,
+            "invalid value type for 'timeout' parameter, expected "
+            "int64_param.");
+      }
+      RETURN_IF_ERR(TRITONSERVER_InferenceRequestSetTimeoutMicroseconds(
+          inference_request, infer_param.int64_param()));
+    } else if (param.first.rfind("triton_", 0) == 0) {
+      if (!Contains(TRITON_RESERVED_REQUEST_PARAMS, param.first)) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG,
+            (std::string(
+                 "parameter keys starting with 'triton_' are reserved for "
+                 "Triton "
+                 "usage. Only the following keys starting with 'triton_' are "
+                 "allowed: ") +
+             Join(TRITON_RESERVED_REQUEST_PARAMS, " "))
+                .c_str());
+      }
+      RETURN_IF_ERR(SetStateParameterFromTritonParameter(state_params, param));
+    } else {
+      const auto& infer_param = param.second;
+      if (infer_param.parameter_choice_case() ==
+          inference::InferParameter::ParameterChoiceCase::kInt64Param) {
+        RETURN_IF_ERR(TRITONSERVER_InferenceRequestSetIntParameter(
+            inference_request, param.first.c_str(), infer_param.int64_param()));
+      } else if (
+          infer_param.parameter_choice_case() ==
+          inference::InferParameter::ParameterChoiceCase::kBoolParam) {
+        RETURN_IF_ERR(TRITONSERVER_InferenceRequestSetBoolParameter(
+            inference_request, param.first.c_str(), infer_param.bool_param()));
+      } else if (
+          infer_param.parameter_choice_case() ==
+          inference::InferParameter::ParameterChoiceCase::kStringParam) {
+        RETURN_IF_ERR(TRITONSERVER_InferenceRequestSetStringParameter(
+            inference_request, param.first.c_str(),
+            infer_param.string_param().c_str()));
+      } else if (
+          infer_param.parameter_choice_case() ==
+          inference::InferParameter::ParameterChoiceCase::kDoubleParam) {
+        RETURN_IF_ERR(TRITONSERVER_InferenceRequestSetDoubleParameter(
+            inference_request, param.first.c_str(),
+            infer_param.double_param()));
+      } else {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG,
+            std::string(
+                "invalid value type for '" + param.first +
+                "' parameter, expected "
+                "int64_param, bool_param, or string_param.")
+                .c_str());
+      }
+    }
+  }
+
+  RETURN_IF_ERR(
+      TRITONSERVER_InferenceRequestSetFlags(inference_request, flags));
+
+  for (const auto& input : request.inputs()) {
+    RETURN_IF_ERR(TRITONSERVER_InferenceRequestAddInput(
+        inference_request, input.name().c_str(),
+        TRITONSERVER_StringToDataType(input.datatype().c_str()),
+        input.shape().data(), input.shape_size()));
+  }
+
+  for (const auto& output : request.outputs()) {
+    RETURN_IF_ERR(TRITONSERVER_InferenceRequestAddRequestedOutput(
+        inference_request, output.name().c_str()));
+  }
+
+  return nullptr;  // Success
+}
+
+TRITONSERVER_Error*
+SetStateParameterFromTritonParameter(
+    StateParameters& state_params,
+    const std::pair<std::string, inference::InferParameter>& param)
+{
+  const auto& key = param.first;
+  const auto& value = param.second;
+  if (key == "triton_enable_empty_final_response") {
+    if (value.parameter_choice_case() !=
+        inference::InferParameter::ParameterChoiceCase::kBoolParam) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG,
+          (std::string("invalid value type for '") + key +
+           std::string("' parameter, expected bool_param."))
+              .c_str());
+    }
+    state_params.enable_empty_final_response_ = value.bool_param();
+  }
+
+  return nullptr;  // success
+}
+
+TRITONSERVER_Error*
+InferGRPCToInput(
+    const std::shared_ptr<TRITONSERVER_Server>& tritonserver,
+    const std::shared_ptr<SharedMemoryManager>& shm_manager,
+    const inference::ModelInferRequest& request,
+    std::list<std::string>* serialized_data,
+    TRITONSERVER_InferenceRequest* inference_request)
+{
+  // Verify that the batch-byte-size of each input matches the size of
+  // the provided tensor data (provided raw or from shared memory)
+  int index = 0;
+  for (const auto& io : request.inputs()) {
+    const void* base;
+    size_t byte_size = 0;
+    TRITONSERVER_MemoryType memory_type = TRITONSERVER_MEMORY_CPU;
+    int64_t memory_type_id = 0;
+
+    std::string region_name;
+    int64_t offset;
+    bool has_shared_memory;
+    RETURN_IF_ERR(
+        ParseSharedMemoryParams<inference::ModelInferRequest::InferInputTensor>(
+            io, &has_shared_memory, &region_name, &offset, &byte_size));
+
+    TRITONSERVER_BufferAttributes* buffer_attributes;
+    RETURN_IF_ERR(TRITONSERVER_BufferAttributesNew(&buffer_attributes));
+    auto buffer_attributes_del =
+        [](TRITONSERVER_BufferAttributes* buffer_attributes) {
+          TRITONSERVER_BufferAttributesDelete(buffer_attributes);
+        };
+    std::unique_ptr<
+        TRITONSERVER_BufferAttributes, decltype(buffer_attributes_del)>
+        buffer_attrsl(buffer_attributes, buffer_attributes_del);
+    char* cuda_ipc_handle = nullptr;
+
+    if (has_shared_memory) {
+      if (io.has_contents()) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG,
+            std::string(
+                "unexpected 'content' provided when using shared memory "
+                "for "
+                "input tensor '" +
+                io.name() + "' for model '" + request.model_name() + "'")
+                .c_str());
+      }
+      void* tmp;
+      RETURN_IF_ERR(shm_manager->GetMemoryInfo(
+          region_name, offset, &tmp, &memory_type, &memory_type_id));
+      base = tmp;
+      if (memory_type == TRITONSERVER_MEMORY_GPU) {
+#ifdef TRITON_ENABLE_GPU
+        RETURN_IF_ERR(shm_manager->GetCUDAHandle(
+            region_name,
+            reinterpret_cast<cudaIpcMemHandle_t**>(&cuda_ipc_handle)));
+#endif
+      }
+    } else {
+      if (io.has_contents() && (!request.raw_input_contents().empty())) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG,
+            std::string(
+                "contents field must not be specified when using "
+                "raw_input_contents for '" +
+                io.name() + "' for model '" + request.model_name() + "'")
+                .c_str());
+      } else if (io.has_contents()) {
+        // Check the presence of explicit tensors
+        TRITONSERVER_DataType dtype =
+            TRITONSERVER_StringToDataType(io.datatype().c_str());
+        const size_t elem_byte_size = TRITONSERVER_DataTypeByteSize(dtype);
+        if (io.contents().bool_contents_size() != 0) {
+          RETURN_IF_ERR(InferGRPCToInputHelper(
+              io.name(), request.model_name(), TRITONSERVER_TYPE_BOOL, dtype,
+              byte_size));
+          base = (const void*)io.contents().bool_contents().data();
+          byte_size = io.contents().bool_contents_size() * elem_byte_size;
+        }
+
+        if (io.contents().int_contents_size() != 0) {
+          if (dtype == TRITONSERVER_TYPE_INT8) {
+            RETURN_IF_ERR(InferGRPCToInputHelper(
+                io.name(), request.model_name(), TRITONSERVER_TYPE_INT8, dtype,
+                byte_size));
+            serialized_data->emplace_back();
+            auto& serialized = serialized_data->back();
+            serialized.reserve(
+                io.contents().int_contents_size() * elem_byte_size);
+            for (const auto& element : io.contents().int_contents()) {
+              // Assuming the system is little-endian, picking the
+              // least significant byte of 32-bit integer as a
+              // int8 element
+              serialized.append(
+                  reinterpret_cast<const char*>(&element), elem_byte_size);
+            }
+            base = serialized.c_str();
+            byte_size = serialized.size();
+          } else if (dtype == TRITONSERVER_TYPE_INT16) {
+            RETURN_IF_ERR(InferGRPCToInputHelper(
+                io.name(), request.model_name(), TRITONSERVER_TYPE_INT16, dtype,
+                byte_size));
+            serialized_data->emplace_back();
+            auto& serialized = serialized_data->back();
+            serialized.reserve(
+                io.contents().int_contents_size() * elem_byte_size);
+            for (const auto& element : io.contents().int_contents()) {
+              // Assuming the system is little-endian, picking the
+              // least 2 significant bytes of 32-bit integer as a
+              // int16 element
+              serialized.append(
+                  reinterpret_cast<const char*>(&element), elem_byte_size);
+            }
+            base = serialized.c_str();
+            byte_size = serialized.size();
+          } else {
+            RETURN_IF_ERR(InferGRPCToInputHelper(
+                io.name(), request.model_name(), TRITONSERVER_TYPE_INT32, dtype,
+                byte_size));
+            base = (const void*)io.contents().int_contents().data();
+            byte_size = io.contents().int_contents_size() * elem_byte_size;
+          }
+        }
+
+        if (io.contents().int64_contents_size() != 0) {
+          RETURN_IF_ERR(InferGRPCToInputHelper(
+              io.name(), request.model_name(), TRITONSERVER_TYPE_INT64, dtype,
+              byte_size));
+          base = (const void*)io.contents().int64_contents().data();
+          byte_size = io.contents().int64_contents_size() * elem_byte_size;
+        }
+
+        if (io.contents().uint_contents_size() != 0) {
+          if (dtype == TRITONSERVER_TYPE_UINT8) {
+            RETURN_IF_ERR(InferGRPCToInputHelper(
+                io.name(), request.model_name(), TRITONSERVER_TYPE_UINT8, dtype,
+                byte_size));
+            serialized_data->emplace_back();
+            auto& serialized = serialized_data->back();
+            serialized.reserve(
+                io.contents().uint_contents_size() * elem_byte_size);
+            for (const auto& element : io.contents().uint_contents()) {
+              // Assuming the system is little-endian, picking the
+              // least significant byte of 32-bit unsigned integer as a
+              // uint8 element
+              serialized.append(
+                  reinterpret_cast<const char*>(&element), elem_byte_size);
+            }
+            base = serialized.c_str();
+            byte_size = serialized.size();
+          } else if (dtype == TRITONSERVER_TYPE_UINT16) {
+            RETURN_IF_ERR(InferGRPCToInputHelper(
+                io.name(), request.model_name(), TRITONSERVER_TYPE_UINT16,
+                dtype, byte_size));
+            serialized_data->emplace_back();
+            auto& serialized = serialized_data->back();
+            serialized.reserve(
+                io.contents().uint_contents_size() * elem_byte_size);
+            for (const auto& element : io.contents().uint_contents()) {
+              // Assuming the system is little-endian, picking the
+              // least 2 significant bytes of 32-bit integer as a
+              // uint16 element
+              serialized.append(
+                  reinterpret_cast<const char*>(&element), elem_byte_size);
+            }
+            base = serialized.c_str();
+            byte_size = serialized.size();
+          } else {
+            RETURN_IF_ERR(InferGRPCToInputHelper(
+                io.name(), request.model_name(), TRITONSERVER_TYPE_UINT32,
+                dtype, byte_size));
+            base = (const void*)io.contents().uint_contents().data();
+            byte_size = io.contents().uint_contents_size() * elem_byte_size;
+          }
+        }
+
+        if (io.contents().uint64_contents_size() != 0) {
+          RETURN_IF_ERR(InferGRPCToInputHelper(
+              io.name(), request.model_name(), TRITONSERVER_TYPE_UINT64, dtype,
+              byte_size));
+          base = (const void*)io.contents().uint64_contents().data();
+          byte_size = io.contents().uint64_contents_size() * elem_byte_size;
+        }
+
+        if (io.contents().fp32_contents_size() != 0) {
+          RETURN_IF_ERR(InferGRPCToInputHelper(
+              io.name(), request.model_name(), TRITONSERVER_TYPE_FP32, dtype,
+              byte_size));
+          base = (const void*)io.contents().fp32_contents().data();
+          byte_size = io.contents().fp32_contents_size() * elem_byte_size;
+        }
+
+        if (io.contents().fp64_contents_size() != 0) {
+          RETURN_IF_ERR(InferGRPCToInputHelper(
+              io.name(), request.model_name(), TRITONSERVER_TYPE_FP64, dtype,
+              byte_size));
+          base = (const void*)io.contents().fp64_contents().data();
+          byte_size = io.contents().fp64_contents_size() * elem_byte_size;
+        }
+
+        if (io.contents().bytes_contents_size() != 0) {
+          RETURN_IF_ERR(InferGRPCToInputHelper(
+              io.name(), request.model_name(), TRITONSERVER_TYPE_BYTES, dtype,
+              byte_size));
+
+          serialized_data->emplace_back();
+          auto& serialized = serialized_data->back();
+
+          // Serialize the output tensor strings. Each string is
+          // serialized as a 4-byte length followed by the string itself
+          // with no null-terminator.
+          for (const auto& element : io.contents().bytes_contents()) {
+            uint32_t len{(uint32_t)element.size()};
+            serialized.append(
+                reinterpret_cast<const char*>(&len), sizeof(uint32_t));
+            if (element.size() > 0) {
+              serialized.append(element.c_str(), len);
+            }
+          }
+          base = serialized.c_str();
+          byte_size = serialized.size();
+        }
+      } else if (request.raw_input_contents().size() > index) {
+        // Try to read the raw contents if available
+        const std::string& raw = request.raw_input_contents()[index++];
+        base = raw.c_str();
+        byte_size = raw.size();
+      } else {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG,
+            std::string(
+                "unable to find data for input tensor '" + io.name() +
+                "' for model '" + request.model_name() + "' in request.")
+                .c_str());
+      }
+    }
+
+    if (cuda_ipc_handle != nullptr) {
+      RETURN_IF_ERR(TRITONSERVER_BufferAttributesSetCudaIpcHandle(
+          buffer_attributes, reinterpret_cast<void*>(cuda_ipc_handle)));
+    }
+
+    RETURN_IF_ERR(TRITONSERVER_BufferAttributesSetMemoryType(
+        buffer_attributes, memory_type));
+    RETURN_IF_ERR(TRITONSERVER_BufferAttributesSetMemoryTypeId(
+        buffer_attributes, memory_type_id));
+    RETURN_IF_ERR(
+        TRITONSERVER_BufferAttributesSetByteSize(buffer_attributes, byte_size));
+    RETURN_IF_ERR(
+        TRITONSERVER_InferenceRequestAppendInputDataWithBufferAttributes(
+            inference_request, io.name().c_str(), base, buffer_attributes));
+  }
+
+  return nullptr;  // success
+}
+
+void
+InferRequestComplete(
+    TRITONSERVER_InferenceRequest* request, const uint32_t flags, void* userp)
+{
+  LOG_VERBOSE(1) << "ModelInferHandler::InferRequestComplete";
+
+  RequestReleasePayload* request_release_payload =
+      static_cast<RequestReleasePayload*>(userp);
+
+  if ((flags & TRITONSERVER_REQUEST_RELEASE_ALL) != 0) {
+    delete request_release_payload;
+  }
+}
+
+//===========================================================================
+//  The following section contains the handling mechanism for ModelInfer RPC.
+//  This implementation is tuned towards performance and reducing latency.
+//===========================================================================
+
+void
+ModelInferHandler::StartNewRequest()
+{
+  auto context = std::make_shared<State::Context>(cq_);
+  context->SetCompressionLevel(compression_level_);
+  State* state = StateNew(tritonserver_.get(), context);
+
+#ifdef TRITON_ENABLE_TRACING
+  // Can't create trace as we don't know the model to be requested,
+  // track timestamps in 'state'
+  state->trace_timestamps_.emplace_back(
+      std::make_pair("GRPC_WAITREAD_START", TraceManager::CaptureTimestamp()));
+#endif  // TRITON_ENABLE_TRACING
+
+  service_->RequestModelInfer(
+      state->context_->ctx_.get(), &state->request_,
+      state->context_->responder_.get(), cq_, cq_, state);
+
+  LOG_VERBOSE(1) << "New request handler for " << Name() << ", "
+                 << state->unique_id_;
+}
+
+bool
+ModelInferHandler::Process(InferHandler::State* state, bool rpc_ok)
+{
+  // There are multiple handlers registered in the gRPC service.
+  // Hence, there we can have a case where a handler thread is
+  // making progress in the state machine for a request and the
+  // other thread is issuing cancellation on the same request.
+  // Need to protect the state transitions for these cases.
+  std::lock_guard<std::recursive_mutex> lock(state->step_mtx_);
+
+  // Handle notification for cancellation which can be raised
+  // asynchronously if detected on the network.
+  if (state->IsGrpcContextCancelled()) {
+    bool resume = state->context_->HandleCancellation(state, rpc_ok, Name());
+    return resume;
+  }
+
+
+  LOG_VERBOSE(1) << "Process for " << Name() << ", rpc_ok=" << rpc_ok << ", "
+                 << state->unique_id_ << " step " << state->step_;
+
+  // We need an explicit finish indicator. Can't use 'state->step_'
+  // because we launch an async thread that could update 'state's
+  // step_ to be FINISH before this thread exits this function.
+  bool finished = false;
+
+  // If RPC failed on a new request then the server is shutting down
+  // and so we should do nothing (including not registering for a new
+  // request). If RPC failed on a non-START step then there is nothing
+  // we can do since we one execute one step.
+  const bool shutdown = (!rpc_ok && (state->step_ == Steps::START));
+  if (shutdown) {
+    state->step_ = Steps::FINISH;
+    finished = true;
+  }
+
+  if (state->step_ == Steps::START) {
+#ifdef TRITON_ENABLE_TRACING
+    // Can't create trace as we don't know the model to be requested,
+    // track timestamps in 'state'
+    state->trace_timestamps_.emplace_back(
+        std::make_pair("GRPC_WAITREAD_END", TraceManager::CaptureTimestamp()));
+#endif  // TRITON_ENABLE_TRACING
+
+    // Start a new request to replace this one...
+    if (!shutdown) {
+      StartNewRequest();
+    }
+
+    if (ExecutePrecondition(state)) {
+      Execute(state);
+    } else {
+      ::grpc::Status status = ::grpc::Status(
+          ::grpc::StatusCode::UNAVAILABLE,
+          std::string("This protocol is restricted, expecting header '") +
+              restricted_kv_.first + "'");
+
+
+#ifdef TRITON_ENABLE_TRACING
+      state->trace_timestamps_.emplace_back(
+          std::make_pair("GRPC_SEND_START", TraceManager::CaptureTimestamp()));
+#endif  // TRITON_ENABLE_TRACING
+
+      state->step_ = COMPLETE;
+      state->context_->responder_->Finish(
+          inference::ModelInferResponse(), status, state);
+    }
+
+  } else if (state->step_ == Steps::COMPLETE) {
+#ifdef TRITON_ENABLE_TRACING
+    state->trace_timestamps_.emplace_back(
+        std::make_pair("GRPC_SEND_END", TraceManager::CaptureTimestamp()));
+#endif  // TRITON_ENABLE_TRACING
+
+    state->step_ = Steps::FINISH;
+  } else if (state->step_ == Steps::FINISH) {
+    finished = true;
+  }
+
+  return !finished;
+}
+
+TRITONSERVER_Error*
+ResponseAllocatorHelper(
+    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
+    size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
+    int64_t preferred_memory_type_id, inference::ModelInferResponse* response,
+    const TensorShmMap& shm_map, void** buffer, void** buffer_userp,
+    TRITONSERVER_MemoryType* actual_memory_type, int64_t* actual_memory_type_id)
+{
+  *buffer = nullptr;
+  *buffer_userp = nullptr;
+  *actual_memory_type = preferred_memory_type;
+  *actual_memory_type_id = preferred_memory_type_id;
+
+  // We add an output contents even if the 'byte_size' == 0 because we
+  // expect to have a contents for every output.
+  inference::ModelInferResponse::InferOutputTensor* output_tensor =
+      response->add_outputs();
+  output_tensor->set_name(tensor_name);
+  std::string* raw_output = response->add_raw_output_contents();
+
+  if (byte_size > 0) {
+    const auto& pr = shm_map.find(tensor_name);
+    if (pr != shm_map.end()) {
+      // The output is in shared memory so check that shared memory
+      // size is at least large enough for the output.
+      if (byte_size > pr->second.byte_size_) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INTERNAL,
+            std::string(
+                "shared memory size specified with the request for output '" +
+                std::string(tensor_name) + "' (" +
+                std::to_string(pr->second.byte_size_) +
+                " bytes) should be at least " + std::to_string(byte_size) +
+                " bytes to hold the results")
+                .c_str());
+      }
+
+      *buffer = const_cast<void*>(pr->second.base_);
+      *actual_memory_type = pr->second.memory_type_;
+      *actual_memory_type_id = pr->second.memory_type_id_;
+
+      LOG_VERBOSE(1) << "GRPC: using shared-memory for '" << tensor_name
+                     << "', size: " << byte_size << ", addr: " << *buffer;
+      return nullptr;  // Success
+    }
+
+    // Not using shared memory so allocate a buffer. The buffer we
+    // create is directly in the response protobuf so we can't
+    // allocate any type other than CPU.
+    //
+    // FIXME we could use pinned CPU memory here.
+    if (*actual_memory_type != TRITONSERVER_MEMORY_CPU) {
+      LOG_VERBOSE(1) << "GRPC: unable to provide '" << tensor_name << "' in "
+                     << TRITONSERVER_MemoryTypeString(*actual_memory_type)
+                     << ", will use "
+                     << TRITONSERVER_MemoryTypeString(TRITONSERVER_MEMORY_CPU);
+      *actual_memory_type = TRITONSERVER_MEMORY_CPU;
+      *actual_memory_type_id = 0;
+    }
+
+    raw_output->resize(byte_size);
+    *buffer = static_cast<void*>(&((*raw_output)[0]));
+
+    LOG_VERBOSE(1) << "GRPC: using buffer for '" << tensor_name
+                   << "', size: " << byte_size << ", addr: " << *buffer;
+  }
+
+  return nullptr;  // Success
+}
+
+void
+ModelInferHandler::Execute(InferHandler::State* state)
+{
+  TRITONSERVER_Error* err = nullptr;
+  const inference::ModelInferRequest& request = state->request_;
+  auto response_queue = state->response_queue_;
+  int64_t requested_model_version;
+  if (err == nullptr) {
+    err = GetModelVersionFromString(
+        request.model_version(), &requested_model_version);
+  }
+
+  if (err == nullptr) {
+    uint32_t txn_flags;
+    err = TRITONSERVER_ServerModelTransactionProperties(
+        tritonserver_.get(), request.model_name().c_str(),
+        requested_model_version, &txn_flags, nullptr /* voidp */);
+    if ((err == nullptr) && (txn_flags & TRITONSERVER_TXN_DECOUPLED) != 0) {
+      err = TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_UNSUPPORTED,
+          "ModelInfer RPC doesn't support models with decoupled "
+          "transaction policy");
+    }
+  }
+
+  // Create the inference request which contains all the
+  // input information needed for an inference.
+  TRITONSERVER_InferenceRequest* irequest = nullptr;
+  if (err == nullptr) {
+    err = TRITONSERVER_InferenceRequestNew(
+        &irequest, tritonserver_.get(), request.model_name().c_str(),
+        requested_model_version);
+  }
+
+  if (err == nullptr) {
+    state->inference_request_ = {
+        irequest, [](TRITONSERVER_InferenceRequest* request) {
+          LOG_TRITONSERVER_ERROR(
+              TRITONSERVER_InferenceRequestDelete(request),
+              "deleting gRPC inference request");
+        }};
+    err = SetInferenceRequestMetadata(irequest, request, state->parameters_);
+  }
+
+  if (err == nullptr) {
+    err = ForwardHeadersAsParameters(irequest, state);
+  }
+
+  // Will be used to hold the serialized data in case explicit string
+  // tensors are present in the request.
+  std::list<std::string> serialized_data;
+
+  if (err == nullptr) {
+    err = InferGRPCToInput(
+        tritonserver_, shm_manager_, request, &serialized_data, irequest);
+  }
+  if (err == nullptr) {
+    err = InferAllocatorPayload<inference::ModelInferResponse>(
+        tritonserver_, shm_manager_, request, std::move(serialized_data),
+        response_queue, &state->alloc_payload_);
+  }
+
+  auto request_release_payload =
+      std::make_unique<RequestReleasePayload>(state->inference_request_);
+  if (err == nullptr) {
+    err = TRITONSERVER_InferenceRequestSetReleaseCallback(
+        irequest, InferRequestComplete,
+        request_release_payload.get() /* request_release_userp */);
+  }
+  if (err == nullptr) {
+    err = TRITONSERVER_InferenceRequestSetResponseCallback(
+        irequest, allocator_,
+        &state->alloc_payload_ /* response_allocator_userp */,
+        InferResponseComplete, reinterpret_cast<void*>(state));
+  }
+  // Get request ID for logging in case of error.
+  const char* request_id = "";
+  if (irequest != nullptr) {
+    LOG_TRITONSERVER_ERROR(
+        TRITONSERVER_InferenceRequestId(irequest, &request_id),
+        "unable to retrieve request ID string");
+  }
+
+  if (!strncmp(request_id, "", 1)) {
+    request_id = "<id_unknown>";
+  }
+  if (err == nullptr) {
+    TRITONSERVER_InferenceTrace* triton_trace = nullptr;
+#ifdef TRITON_ENABLE_TRACING
+    GrpcServerCarrier carrier(state->context_->ctx_.get());
+    auto start_options =
+        trace_manager_->GetTraceStartOptions(carrier, request.model_name());
+    state->trace_ = std::move(trace_manager_->SampleTrace(start_options));
+    if (state->trace_ != nullptr) {
+      triton_trace = state->trace_->trace_;
+    }
+#endif  // TRITON_ENABLE_TRACING
+
+    state->step_ = ISSUED;
+    err = TRITONSERVER_ServerInferAsync(
+        tritonserver_.get(), irequest, triton_trace);
+  }
+
+  // If not error then state->step_ == ISSUED and inference request
+  // has initiated... completion callback will transition to
+  // COMPLETE or CANCELLED. Recording the state and the irequest
+  // to handle gRPC stream cancellation.
+  if (err == nullptr) {
+    state->context_->InsertInflightState(state);
+    // The payload will be cleaned in request release callback.
+    request_release_payload.release();
+  } else {
+    // If error go immediately to COMPLETE.
+    LOG_VERBOSE(1) << "[request id: " << request_id << "] "
+                   << "Infer failed: " << TRITONSERVER_ErrorMessage(err);
+
+    ::grpc::Status status;
+    GrpcStatusUtil::Create(&status, err);
+    TRITONSERVER_ErrorDelete(err);
+
+    inference::ModelInferResponse error_response;
+
+#ifdef TRITON_ENABLE_TRACING
+    state->trace_timestamps_.emplace_back(
+        std::make_pair("GRPC_SEND_START", TraceManager::CaptureTimestamp()));
+#endif  // TRITON_ENABLE_TRACING
+
+    state->step_ = COMPLETE;
+    state->context_->responder_->Finish(error_response, status, state);
+  }
+}
+
+void
+ModelInferHandler::InferResponseComplete(
+    TRITONSERVER_InferenceResponse* iresponse, const uint32_t flags,
+    void* userp)
+{
+  State* state = reinterpret_cast<State*>(userp);
+
+  // There are multiple handlers registered in the gRPC service
+  // Hence, we would need to properly synchronize this thread
+  // and the handler thread handling async cancellation
+  // notification.
+  std::lock_guard<std::recursive_mutex> lock(state->step_mtx_);
+
+  // Increment the callback index if received valid 'iresponse'
+  if (iresponse != nullptr) {
+    state->cb_count_++;
+  }
+
+  LOG_VERBOSE(1) << "ModelInferHandler::InferResponseComplete, "
+                 << state->unique_id_ << " step " << state->step_;
+
+  // Allow sending 1 response and final flag separately, only mark
+  // non-inflight when seeing final flag
+  if (flags & TRITONSERVER_RESPONSE_COMPLETE_FINAL) {
+    state->context_->EraseInflightState(state);
+  }
+
+  // If gRPC Stream is cancelled then no need of forming and returning
+  // a response.
+  if (state->IsGrpcContextCancelled()) {
+    // Clean-up the received response object.
+    LOG_TRITONSERVER_ERROR(
+        TRITONSERVER_InferenceResponseDelete(iresponse),
+        "deleting GRPC inference response");
+
+    state->context_->EraseInflightState(state);
+    state->step_ = Steps::CANCELLED;
+
+    LOG_VERBOSE(1) << "ModelInferHandler::InferResponseComplete, "
+                   << state->unique_id_
+                   << ", skipping response generation as grpc transaction was "
+                      "cancelled... ";
+
+    // Send state back to the queue so that state can be released
+    // in the next cycle.
+    state->context_->PutTaskBackToQueue(state);
+
+    return;
+  }
+
+  TRITONSERVER_Error* err = nullptr;
+  // This callback is expected to be called exactly once for each request.
+  // Will use the single response object in the response list to hold the
+  // information.
+  inference::ModelInferResponse* response =
+      state->response_queue_->GetResponseAt(0);
+  bool response_created = false;
+  if (response == nullptr) {
+    LOG_ERROR << "expected allocator to have created a response object";
+    err = TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INTERNAL,
+        "No response object found in the callback");
+    response_created = true;
+    response = new inference::ModelInferResponse();
+  }
+
+  if (state->cb_count_ != 1) {
+    err = TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INTERNAL, std::string(
+                                         "expected a single response, got " +
+                                         std::to_string(state->cb_count_))
+                                         .c_str());
+  } else if (iresponse != nullptr) {
+    err = InferResponseCompleteCommon<inference::ModelInferResponse>(
+        state->tritonserver_, iresponse, *response, state->alloc_payload_);
+#ifdef TRITON_ENABLE_TRACING
+    state->trace_timestamps_.emplace_back(std::make_pair(
+        "INFER_RESPONSE_COMPLETE", TraceManager::CaptureTimestamp()));
+#endif  // TRITON_ENABLE_TRACING
+  }
+
+  if (err != nullptr) {
+    response->Clear();
+  }
+
+  GrpcStatusUtil::Create(&state->status_, err);
+  TRITONSERVER_ErrorDelete(err);
+
+  LOG_TRITONSERVER_ERROR(
+      TRITONSERVER_InferenceResponseDelete(iresponse),
+      "deleting GRPC inference response");
+
+  // Defer sending the response until FINAL flag is seen or
+  // there is error
+  if ((flags & TRITONSERVER_RESPONSE_COMPLETE_FINAL) == 0) {
+    return;
+  }
+
+
+#ifdef TRITON_ENABLE_TRACING
+  state->trace_timestamps_.emplace_back(
+      std::make_pair("GRPC_SEND_START", TraceManager::CaptureTimestamp()));
+#endif  // TRITON_ENABLE_TRACING
+
+  state->step_ = COMPLETE;
+  state->context_->responder_->Finish(*response, state->status_, state);
+  if (response_created) {
+    delete response;
+  }
+}
+
+}}}  // namespace triton::server::grpc
diff --git a/src/grpc/infer_handler.h b/src/grpc/infer_handler.h
new file mode 100644
index 0000000000..84095a188b
--- /dev/null
+++ b/src/grpc/infer_handler.h
@@ -0,0 +1,1470 @@
+// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#pragma once
+
+#include <grpc++/alarm.h>
+#include <grpc++/grpc++.h>
+#include <re2/re2.h>
+
+#include <condition_variable>
+#include <queue>
+#include <regex>
+#include <thread>
+
+#include "../tracer.h"
+#include "grpc_handler.h"
+#include "grpc_service.grpc.pb.h"
+#include "grpc_utils.h"
+#include "triton/common/logging.h"
+#include "triton/core/tritonserver.h"
+
+// Unique IDs are only needed when debugging. They only appear in
+// verbose logging.
+#ifndef NDEBUG
+uint64_t NextUniqueId();
+#define NEXT_UNIQUE_ID NextUniqueId()
+#else
+#define NEXT_UNIQUE_ID (0)
+#endif  // NDEBUG
+
+namespace triton { namespace server { namespace grpc {
+
+// Options used in InferHandler/StreamInferHandler states that are set from
+// request parameters
+struct StateParameters {
+  // Whether to generate an empty response when a FINAL flag is received with
+  // no corresponding response. Only applicable to StreamInferHandlerState.
+  bool enable_empty_final_response_ = false;
+};
+
+//
+// C++11 doesn't have a barrier so we implement our own.
+//
+class Barrier {
+ public:
+  explicit Barrier(size_t cnt) : threshold_(cnt), count_(cnt), generation_(0) {}
+
+  void Wait()
+  {
+    std::unique_lock<std::mutex> lock(mu_);
+    auto lgen = generation_;
+    if (--count_ == 0) {
+      generation_++;
+      count_ = threshold_;
+      cv_.notify_all();
+    } else {
+      cv_.wait(lock, [this, lgen] { return lgen != generation_; });
+    }
+  }
+
+ private:
+  std::mutex mu_;
+  std::condition_variable cv_;
+  const size_t threshold_;
+  size_t count_;
+  size_t generation_;
+};
+
+// Simple structure that carries the userp payload needed for
+// request release callback.
+struct RequestReleasePayload final {
+  explicit RequestReleasePayload(
+      const std::shared_ptr<TRITONSERVER_InferenceRequest>& inference_request)
+      : inference_request_(inference_request){};
+
+ private:
+  std::shared_ptr<TRITONSERVER_InferenceRequest> inference_request_ = nullptr;
+};
+
+//
+// ResponseQueue
+//
+// A simple queue holding the responses to be written. Uses a
+// vector of persistent message objects to prevent allocating
+// memory for each response to be written.
+//
+template <typename ResponseType>
+class ResponseQueue {
+ public:
+  explicit ResponseQueue() { Reset(); }
+
+  ~ResponseQueue()
+  {
+    for (auto response : responses_) {
+      delete response;
+    }
+  }
+
+  // Resets the queue
+  void Reset()
+  {
+    alloc_count_ = 0;
+    ready_count_ = 0;
+    current_index_ = 0;
+    for (auto response : responses_) {
+      response->Clear();
+    }
+  }
+
+  // Gets the response for the non-decoupled models.
+  // Note that there will be a single response in
+  // non-decoupled cases.
+  ResponseType* GetNonDecoupledResponse()
+  {
+    std::lock_guard<std::mutex> lock(mtx_);
+    alloc_count_ = 1;
+    if (responses_.size() < 1) {
+      responses_.push_back(new ResponseType());
+    }
+    return responses_[0];
+  }
+
+  // Allocates a response on the head of the queue
+  void AllocateResponse()
+  {
+    std::lock_guard<std::mutex> lock(mtx_);
+    alloc_count_++;
+    if (responses_.size() < alloc_count_) {
+      responses_.push_back(new ResponseType());
+    }
+  }
+
+  // Gets the last allocated response
+  ResponseType* GetLastAllocatedResponse()
+  {
+    std::lock_guard<std::mutex> lock(mtx_);
+    if (responses_.size() < alloc_count_) {
+      LOG_ERROR
+          << "[INTERNAL] Attempting to access the response not yet allocated";
+      return nullptr;
+    }
+    return responses_[alloc_count_ - 1];
+  }
+
+  // Marks the next non-ready response complete
+  bool MarkNextResponseComplete()
+  {
+    std::lock_guard<std::mutex> lock(mtx_);
+    if (alloc_count_ <= ready_count_) {
+      LOG_ERROR
+          << "[INTERNAL] Attempting to mark an unallocated response complete";
+      return false;
+    }
+    ready_count_++;
+
+    return true;
+  }
+
+  // Gets the current response from the tail of
+  // the queue.
+  ResponseType* GetCurrentResponse()
+  {
+    std::lock_guard<std::mutex> lock(mtx_);
+    if (current_index_ >= ready_count_) {
+      LOG_ERROR << "[INTERNAL] Attempting to access current response when it "
+                   "is not ready";
+      return nullptr;
+    }
+    return responses_[current_index_];
+  }
+
+  // Gets the response at the specified index
+  ResponseType* GetResponseAt(const uint32_t index)
+  {
+    std::lock_guard<std::mutex> lock(mtx_);
+    if (index >= alloc_count_) {
+      LOG_ERROR << "[INTERNAL] Attempting to access response which is not yet "
+                   "allocated";
+      return nullptr;
+    }
+    return responses_[index];
+  }
+
+  // Pops the response from the tail of the queue
+  void PopResponse()
+  {
+    std::lock_guard<std::mutex> lock(mtx_);
+    current_index_++;
+  }
+
+  // Returns whether the queue is empty
+  bool IsEmpty()
+  {
+    std::lock_guard<std::mutex> lock(mtx_);
+    return ((alloc_count_ == ready_count_) && (alloc_count_ == current_index_));
+  }
+
+  // Returns whether the queue has responses
+  // ready to be written.
+  bool HasReadyResponse()
+  {
+    std::lock_guard<std::mutex> lock(mtx_);
+    return (ready_count_ > current_index_);
+  }
+
+ private:
+  std::vector<ResponseType*> responses_;
+  std::mutex mtx_;
+
+  // There are three indices to track the responses in the queue
+  // Tracks the allocated response
+  uint32_t alloc_count_;
+  // Tracks the response that is ready to be written
+  uint32_t ready_count_;
+  // Tracks the response next in the queue to be written
+  uint32_t current_index_;
+};
+
+
+//
+// ShmInfo
+//
+// Simple structure that carries the shared memory information
+//
+struct ShmInfo {
+  ShmInfo(
+      void* base, size_t byte_size, TRITONSERVER_MemoryType memory_type,
+      int64_t memory_type_id, char* cuda_ipc_handle)
+      : base_(base), byte_size_(byte_size), memory_type_(memory_type),
+        memory_type_id_(memory_type_id), cuda_ipc_handle_(cuda_ipc_handle)
+  {
+  }
+  void* base_;
+  size_t byte_size_;
+  TRITONSERVER_MemoryType memory_type_;
+  int64_t memory_type_id_;
+  char* cuda_ipc_handle_;
+};
+
+
+using TensorShmMap = std::unordered_map<std::string, ShmInfo>;
+
+//
+// AllocPayload
+//
+// Simple structure that carries the userp payload needed for
+// allocation.
+//
+template <typename ResponseType>
+struct AllocPayload {
+  using ClassificationMap = std::unordered_map<std::string, uint32_t>;
+
+  explicit AllocPayload() : response_queue_(nullptr) {}
+  ~AllocPayload()
+  {
+    // Don't delete 'response_'.. it is owned by the InferHandlerState
+  }
+
+  std::shared_ptr<ResponseQueue<ResponseType>> response_queue_;
+  uint32_t response_alloc_count_;
+  TensorShmMap shm_map_;
+  ClassificationMap classification_map_;
+
+  // Used to extend the lifetime of the serialized data in case
+  // non-raw contents were provided in the request. Serialized data's
+  // actual lifetime is that of the request whereas AllocPayload's
+  // lifetime is that of a response... but it is convenient to keep it
+  // here.
+  std::list<std::string> serialized_data_;
+};
+
+template <typename ResponseType>
+TRITONSERVER_Error*
+InferAllocatorPayload(
+    const std::shared_ptr<TRITONSERVER_Server>& tritonserver,
+    const std::shared_ptr<SharedMemoryManager>& shm_manager,
+    const inference::ModelInferRequest& request,
+    std::list<std::string>&& serialized_data,
+    std::shared_ptr<ResponseQueue<ResponseType>> response_queue,
+    AllocPayload<ResponseType>* alloc_payload)
+{
+  alloc_payload->response_queue_ = response_queue;
+  alloc_payload->shm_map_.clear();
+  alloc_payload->classification_map_.clear();
+  alloc_payload->serialized_data_ = std::move(serialized_data);
+
+  // If any of the outputs use shared memory, then we must calculate
+  // the memory address for that output and store it in the allocator
+  // payload so that it is available when the allocation callback is
+  // invoked.
+  for (const auto& io : request.outputs()) {
+    std::string region_name;
+    int64_t offset;
+    size_t byte_size;
+    bool has_shared_memory;
+    RETURN_IF_ERR(ParseSharedMemoryParams<
+                  inference::ModelInferRequest::InferRequestedOutputTensor>(
+        io, &has_shared_memory, &region_name, &offset, &byte_size));
+
+    bool has_classification;
+    uint32_t classification_count;
+    RETURN_IF_ERR(ParseClassificationParams(
+        io, &has_classification, &classification_count));
+
+    if (has_shared_memory && has_classification) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG,
+          "output can't set both 'shared_memory_region' and "
+          "'classification'");
+    }
+
+    if (has_shared_memory) {
+      void* base;
+      TRITONSERVER_MemoryType memory_type;
+      int64_t memory_type_id;
+      RETURN_IF_ERR(shm_manager->GetMemoryInfo(
+          region_name, offset, &base, &memory_type, &memory_type_id));
+
+      if (memory_type == TRITONSERVER_MEMORY_GPU) {
+#ifdef TRITON_ENABLE_GPU
+        char* cuda_handle;
+        RETURN_IF_ERR(shm_manager->GetCUDAHandle(
+            region_name, reinterpret_cast<cudaIpcMemHandle_t**>(&cuda_handle)));
+        alloc_payload->shm_map_.emplace(
+            io.name(),
+            ShmInfo(base, byte_size, memory_type, memory_type_id, cuda_handle));
+#endif
+      } else {
+        alloc_payload->shm_map_.emplace(
+            io.name(), ShmInfo(
+                           base, byte_size, memory_type, memory_type_id,
+                           nullptr /* cuda_ipc_handle */));
+      }
+    } else if (has_classification) {
+      alloc_payload->classification_map_.emplace(
+          io.name(), classification_count);
+    }
+  }
+
+  return nullptr;  // Success
+}
+
+TRITONSERVER_Error* InferGRPCToInputHelper(
+    const std::string& input_name, const std::string& model_name,
+    const TRITONSERVER_DataType tensor_dt, const TRITONSERVER_DataType input_dt,
+    const size_t binary_data_byte_size);
+
+TRITONSERVER_Error* InferGRPCToInput(
+    const std::shared_ptr<TRITONSERVER_Server>& tritonserver,
+    const std::shared_ptr<SharedMemoryManager>& shm_manager,
+    const inference::ModelInferRequest& request,
+    std::list<std::string>* serialized_data,
+    TRITONSERVER_InferenceRequest* inference_request);
+
+TRITONSERVER_Error* ResponseAllocatorHelper(
+    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
+    size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
+    int64_t preferred_memory_type_id, inference::ModelInferResponse* response,
+    const TensorShmMap& shm_map, void** buffer, void** buffer_userp,
+    TRITONSERVER_MemoryType* actual_memory_type,
+    int64_t* actual_memory_type_id);
+
+TRITONSERVER_Error* OutputBufferAttributesHelper(
+    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
+    const TensorShmMap& shm_map,
+    TRITONSERVER_BufferAttributes* buffer_attributes);
+
+TRITONSERVER_Error* OutputBufferQueryHelper(
+    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
+    size_t* byte_size, const TensorShmMap& shm_map,
+    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id);
+
+// Make sure to keep InferResponseAlloc and OutputBufferQuery logic in sync
+TRITONSERVER_Error* InferResponseAlloc(
+    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
+    size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
+    int64_t preferred_memory_type_id, void* userp, void** buffer,
+    void** buffer_userp, TRITONSERVER_MemoryType* actual_memory_type,
+    int64_t* actual_memory_type_id);
+
+TRITONSERVER_Error* SetInferenceRequestMetadata(
+    TRITONSERVER_InferenceRequest* inference_request,
+    const inference::ModelInferRequest& request, StateParameters& state_params);
+
+// Helper to set options for StreamInferHandler state when parsing
+// request parameters.
+TRITONSERVER_Error* SetStateParameterFromTritonParameter(
+    StateParameters& state_params,
+    const std::pair<std::string, inference::InferParameter>& param);
+
+void InferRequestComplete(
+    TRITONSERVER_InferenceRequest* request, const uint32_t flags, void* userp);
+
+// Make sure to keep InferResponseAlloc and OutputBufferQuery logic in sync
+TRITONSERVER_Error* OutputBufferQuery(
+    TRITONSERVER_ResponseAllocator* allocator, void* userp,
+    const char* tensor_name, size_t* byte_size,
+    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id);
+
+// Make sure to keep InferResponseAlloc, OutputBufferQuery, and
+// OutputBufferAttributes logic in sync
+TRITONSERVER_Error* OutputBufferAttributes(
+    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
+    TRITONSERVER_BufferAttributes* buffer_attributes, void* userp,
+    void* buffer_userp);
+
+TRITONSERVER_Error* InferResponseFree(
+    TRITONSERVER_ResponseAllocator* allocator, void* buffer, void* buffer_userp,
+    size_t byte_size, TRITONSERVER_MemoryType memory_type,
+    int64_t memory_type_id);
+
+TRITONSERVER_Error* InferResponseStart(
+    TRITONSERVER_ResponseAllocator* allocator, void* userp);
+
+template <typename ResponseType>
+TRITONSERVER_Error*
+InferResponseCompleteCommon(
+    TRITONSERVER_Server* server, TRITONSERVER_InferenceResponse* iresponse,
+    inference::ModelInferResponse& response,
+    const AllocPayload<ResponseType>& alloc_payload)
+{
+  RETURN_IF_ERR(TRITONSERVER_InferenceResponseError(iresponse));
+
+  const char *model_name, *id;
+  int64_t model_version;
+  RETURN_IF_ERR(TRITONSERVER_InferenceResponseModel(
+      iresponse, &model_name, &model_version));
+  RETURN_IF_ERR(TRITONSERVER_InferenceResponseId(iresponse, &id));
+
+  response.set_id(id);
+  response.set_model_name(model_name);
+  response.set_model_version(std::to_string(model_version));
+
+  // Propagate response parameters.
+  uint32_t parameter_count;
+  RETURN_IF_ERR(TRITONSERVER_InferenceResponseParameterCount(
+      iresponse, &parameter_count));
+  for (uint32_t pidx = 0; pidx < parameter_count; ++pidx) {
+    const char* name;
+    TRITONSERVER_ParameterType type;
+    const void* vvalue;
+    RETURN_IF_ERR(TRITONSERVER_InferenceResponseParameter(
+        iresponse, pidx, &name, &type, &vvalue));
+    inference::InferParameter& param = (*response.mutable_parameters())[name];
+    switch (type) {
+      case TRITONSERVER_PARAMETER_BOOL:
+        param.set_bool_param(*(reinterpret_cast<const bool*>(vvalue)));
+        break;
+      case TRITONSERVER_PARAMETER_INT:
+        param.set_int64_param(*(reinterpret_cast<const int64_t*>(vvalue)));
+        break;
+      case TRITONSERVER_PARAMETER_STRING:
+        param.set_string_param(reinterpret_cast<const char*>(vvalue));
+        break;
+      case TRITONSERVER_PARAMETER_DOUBLE:
+        param.set_double_param(*(reinterpret_cast<const double*>(vvalue)));
+        break;
+      case TRITONSERVER_PARAMETER_BYTES:
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_UNSUPPORTED,
+            "Response parameter of type 'TRITONSERVER_PARAMETER_BYTES' is not "
+            "currently supported");
+        break;
+    }
+  }
+
+  // Go through each response output and transfer information to the
+  // corresponding GRPC response output.
+  uint32_t output_count;
+  RETURN_IF_ERR(
+      TRITONSERVER_InferenceResponseOutputCount(iresponse, &output_count));
+  if (output_count != (uint32_t)response.outputs_size()) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INTERNAL, "response output count mismatch");
+  }
+
+  for (uint32_t output_idx = 0; output_idx < output_count; ++output_idx) {
+    const char* cname;
+    TRITONSERVER_DataType datatype;
+    const int64_t* shape;
+    uint64_t dim_count;
+    const void* base;
+    size_t byte_size;
+    TRITONSERVER_MemoryType memory_type;
+    int64_t memory_type_id;
+    void* userp;
+
+    RETURN_IF_ERR(TRITONSERVER_InferenceResponseOutput(
+        iresponse, output_idx, &cname, &datatype, &shape, &dim_count, &base,
+        &byte_size, &memory_type, &memory_type_id, &userp));
+
+    const std::string name(cname);
+
+    // There are usually very few outputs so fastest just to look for
+    // the one we want... could create a map for cases where there are
+    // a large number of outputs. Or rely on order to be same...
+    inference::ModelInferResponse::InferOutputTensor* output = nullptr;
+    for (auto& io : *(response.mutable_outputs())) {
+      if (io.name() == name) {
+        output = &io;
+        break;
+      }
+    }
+
+    if (output == nullptr) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INTERNAL,
+          "unable to find expected response output");
+    }
+
+    // If this output was requested as classification then remove the
+    // raw output from the response and instead return classification
+    // results as a string tensor
+    const auto itr = alloc_payload.classification_map_.find(name);
+    if (itr == alloc_payload.classification_map_.end()) {
+      // Not classification...
+      output->set_datatype(TRITONSERVER_DataTypeString(datatype));
+      for (size_t idx = 0; idx < dim_count; idx++) {
+        output->add_shape(shape[idx]);
+      }
+    } else {
+      // Classification
+      const uint32_t classification_count = itr->second;
+
+      // For classification need to determine the batch size, if any,
+      // because need to use that to break up the response for each
+      // batch entry.
+      uint32_t batch_size = 0;
+
+      uint32_t batch_flags;
+      RETURN_IF_ERR(TRITONSERVER_ServerModelBatchProperties(
+          server, model_name, model_version, &batch_flags,
+          nullptr /* voidp */));
+      if ((dim_count > 0) &&
+          ((batch_flags & TRITONSERVER_BATCH_FIRST_DIM) != 0)) {
+        batch_size = shape[0];
+      }
+
+      // Determine the batch1 byte size of the tensor... needed when
+      // the response tensor batch-size > 1 so that we know how to
+      // stride though the tensor data.
+      size_t batch1_element_count = 1;
+      for (size_t idx = ((batch_size == 0) ? 0 : 1); idx < dim_count; idx++) {
+        batch1_element_count *= shape[idx];
+      }
+
+      const size_t batch1_byte_size =
+          batch1_element_count * TRITONSERVER_DataTypeByteSize(datatype);
+
+      // Create the classification contents
+      std::string serialized;
+
+      size_t class_offset = 0;
+      for (uint32_t bs = 0; bs < std::max((uint32_t)1, batch_size); ++bs) {
+        std::vector<std::string> class_strs;
+        RETURN_IF_ERR(TopkClassifications(
+            iresponse, output_idx,
+            reinterpret_cast<const char*>(base) + class_offset,
+            ((class_offset + batch1_byte_size) > byte_size) ? 0
+                                                            : batch1_byte_size,
+            datatype, classification_count, &class_strs));
+
+        // Serialize for binary representation...
+        for (const auto& str : class_strs) {
+          uint32_t len = str.size();
+          serialized.append(reinterpret_cast<const char*>(&len), sizeof(len));
+          if (len > 0) {
+            serialized.append(str);
+          }
+        }
+
+        class_offset += batch1_byte_size;
+      }
+
+      // Update the output with new datatype, shape and contents.
+      output->set_datatype(
+          TRITONSERVER_DataTypeString(TRITONSERVER_TYPE_BYTES));
+
+      if (batch_size > 0) {
+        output->add_shape(batch_size);
+      }
+      output->add_shape(
+          std::min(classification_count, (uint32_t)batch1_element_count));
+
+      (*response.mutable_raw_output_contents())[output_idx] =
+          std::move(serialized);
+    }
+  }
+
+  // Make sure response doesn't exceed GRPC limits.
+  if (response.ByteSizeLong() > MAX_GRPC_MESSAGE_SIZE) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INVALID_ARG,
+        std::string(
+            "Response has byte size " +
+            std::to_string(response.ByteSizeLong()) +
+            " which exceeds gRPC's byte size limit " + std::to_string(INT_MAX) +
+            ".")
+            .c_str());
+  }
+
+  return nullptr;  // success
+}
+
+//
+// InferHandlerState
+//
+template <
+    typename ServerResponderType, typename RequestType, typename ResponseType>
+class InferHandlerState {
+ public:
+  using InferHandlerStateType =
+      InferHandlerState<ServerResponderType, RequestType, ResponseType>;
+
+  // State that is shared across all state objects that make up a GRPC
+  // transaction (e.g. a stream).
+  struct Context {
+    explicit Context(
+        ::grpc::ServerCompletionQueue* cq, const uint64_t unique_id = 0)
+        : cq_(cq), unique_id_(unique_id), ongoing_requests_(0),
+          step_(Steps::START), finish_ok_(true), ongoing_write_(false),
+          received_notification_(false)
+    {
+      ctx_.reset(new ::grpc::ServerContext());
+      responder_.reset(new ServerResponderType(ctx_.get()));
+    }
+
+    void SetCompressionLevel(grpc_compression_level compression_level)
+    {
+      ctx_->set_compression_level(compression_level);
+    }
+
+    void GrpcContextAsyncNotifyWhenDone(InferHandlerStateType* state)
+    {
+      notify_state_ = std::unique_ptr<InferHandlerStateType>(
+          new InferHandlerStateType(Steps::WAITING_NOTIFICATION, state));
+      ctx_->AsyncNotifyWhenDone(notify_state_.get());
+    }
+
+    void SetReceivedNotification(bool value) { received_notification_ = true; }
+
+    bool ReceivedNotification() { return received_notification_; }
+
+    bool IsCancelled()
+    {
+      return received_notification_ ? ctx_->IsCancelled() : false;
+    }
+
+    // Increments the ongoing request counter
+    void IncrementRequestCounter() { ongoing_requests_++; }
+
+    // Decrements the ongoing request counter
+    void DecrementRequestCounter() { ongoing_requests_--; }
+
+    // Adds the state object created on this context
+    void InsertState(InferHandlerStateType* state)
+    {
+      all_states_.insert(state);
+    }
+
+    // Erases the state object created on this context
+    void EraseState(InferHandlerStateType* state)
+    {
+      EraseInflightState(state);
+      all_states_.erase(state);
+    }
+
+    bool HandleCompletion()
+    {
+      if (step_ != Steps::FINISH) {
+        for (auto state : all_states_) {
+          std::lock_guard<std::recursive_mutex> lock(state->step_mtx_);
+          // There is no order guarantee on when the AsyncNotifyWhenDone
+          // event is placed on the completion queue vs when the actual
+          // state RPC is processed. Need to transition through two steps
+          // to preserve the lifetime of the state object.
+          if (state->step_ == Steps::PARTIAL_COMPLETION) {
+            state->step_ = Steps::COMPLETE;
+          } else {
+            state->step_ = Steps::FINISH;
+          }
+          PutTaskBackToQueue(state);
+        }
+        step_ = Steps::FINISH;
+        return true;
+      }
+      return false;
+    }
+
+    const std::string DebugString(InferHandlerStateType* state)
+    {
+      std::string debug_string("");
+      debug_string.append(
+          "Running state_id " + std::to_string(state->unique_id_) + "\n");
+      debug_string.append(
+          "\tContext step " + std::to_string(state->context_->step_) + " id " +
+          std::to_string(state->context_->unique_id_) + "\n");
+      for (auto new_state : all_states_) {
+        debug_string.append(
+            "\t\t State id " + std::to_string(new_state->unique_id_) +
+            ": State step " + std::to_string(new_state->step_) + "\n");
+      }
+
+      return debug_string;
+    }
+
+    // Inserts the state to a set tracking active requests
+    // within the server core. Should only be called when
+    // the request was successfully enqueued on Triton.
+    void InsertInflightState(InferHandlerStateType* state)
+    {
+      std::lock_guard<std::recursive_mutex> lock(mu_);
+      inflight_states_.insert(state);
+    }
+
+    // Erases the state to a set tracking active requests
+    // within the server core.
+    void EraseInflightState(InferHandlerStateType* state)
+    {
+      std::lock_guard<std::recursive_mutex> lock(mu_);
+      inflight_states_.erase(state);
+    }
+
+    // Issues the cancellation for all inflight requests
+    // being tracked by this context.
+    void IssueRequestCancellation()
+    {
+      {
+        std::lock_guard<std::recursive_mutex> lock(mu_);
+
+        // Issues the request cancellation to the core.
+        for (auto state : inflight_states_) {
+          std::lock_guard<std::recursive_mutex> lock(state->step_mtx_);
+          if (state->step_ != Steps::CANCELLED &&
+              state->step_ != Steps::COMPLETE) {
+            LOG_VERBOSE(1) << "Issuing cancellation for " << state->unique_id_;
+            if (state->inference_request_.get() == nullptr) {
+              // The context might be holding some states that have
+              // not been issued to Triton core. Need to skip calling
+              // issuing cancellation for such requests.
+              continue;
+            }
+            // Note that request may or may not be valid at this point.
+            // Assuming if RequestComplete callback is run asynchronously
+            // before this point.
+            TRITONSERVER_Error* err = nullptr;
+            err = TRITONSERVER_InferenceRequestCancel(
+                state->inference_request_.get());
+            // TODO: Add request id to the message
+            if (err != nullptr) {
+              LOG_INFO << "Failed to cancel the request: "
+                       << TRITONSERVER_ErrorMessage(err);
+            }
+            state->step_ = Steps::CANCELLATION_ISSUED;
+          } else if (state->step_ == Steps::COMPLETE) {
+            // The RPC is complete and no callback will be invoked to retrieve
+            // the object. Hence, need to explicitly place the state on the
+            // completion queue.
+            PutTaskBackToQueue(state);
+          }
+        }
+      }
+    }
+
+
+    // Handles the gRPC context cancellation. This function can be called
+    // multiple times and is supposed to be re-entrant.
+    // Returns whether or not to continue cycling through the gRPC
+    // completion queue or not.
+    bool HandleCancellation(
+        InferHandlerStateType* state, bool rpc_ok, const std::string& name)
+    {
+      if (!IsCancelled()) {
+        LOG_ERROR
+            << "[INTERNAL] HandleCancellation called even when the context was "
+               "not cancelled for "
+            << name << ", rpc_ok=" << rpc_ok << ", context "
+            << state->context_->unique_id_ << ", " << state->unique_id_
+            << " step " << state->step_;
+        return true;
+      }
+      if ((state->step_ != Steps::CANCELLATION_ISSUED) &&
+          (state->step_ != Steps::CANCELLED)) {
+        LOG_VERBOSE(1) << "Cancellation notification received for " << name
+                       << ", rpc_ok=" << rpc_ok << ", context "
+                       << state->context_->unique_id_ << ", "
+                       << state->unique_id_ << " step " << state->step_;
+
+        // If the context has not been cancelled then
+        // issue cancellation request to all the inflight
+        // states belonging to the context.
+        if (state->context_->step_ != Steps::CANCELLED) {
+          IssueRequestCancellation();
+          // Mark the context as cancelled
+          state->context_->step_ = Steps::CANCELLED;
+
+          // The state returns true because the CancelExecution
+          // call above would have raised alarm objects on all
+          // pending inflight states objects. This state will
+          // be taken up along with all the other states in the
+          // next iteration from the completion queue which
+          // would release the state.
+          return true;
+        }
+      }
+
+      if (state->step_ != Steps::CANCELLATION_ISSUED) {
+        // The cancellation has not been issued hence the state can
+        // be released.
+        LOG_VERBOSE(1) << "Completing cancellation for " << name
+                       << ", rpc_ok=" << rpc_ok << ", context "
+                       << state->context_->unique_id_ << ", "
+                       << state->unique_id_ << " step " << state->step_;
+
+        return false;
+      } else {
+        // Should wait for the ResponseComplete callbacks to be invoked.
+        LOG_VERBOSE(1)
+            << "Waiting for the callback to retrieve cancellation for " << name
+            << ", rpc_ok=" << rpc_ok << ", context "
+            << state->context_->unique_id_ << ", " << state->unique_id_
+            << " step " << state->step_;
+
+        return true;
+      }
+    }
+
+    // Enqueue 'state' so that its response is delivered in the
+    // correct order.
+    void EnqueueForResponse(InferHandlerStateType* state)
+    {
+      std::lock_guard<std::recursive_mutex> lock(mu_);
+      states_.push(state);
+    }
+
+    // Write the response to the stream directly.
+    void DecoupledWriteResponse(InferHandlerStateType* state)
+    {
+#ifdef TRITON_ENABLE_TRACING
+      state->trace_timestamps_.emplace_back(
+          std::make_pair("GRPC_SEND_START", TraceManager::CaptureTimestamp()));
+#endif  // TRITON_ENABLE_TRACING
+      state->step_ = Steps::WRITTEN;
+      ResponseType* response = state->response_queue_->GetCurrentResponse();
+      responder_->Write(*response, state);
+
+      // Clear the response after writing
+      response->mutable_infer_response()->Clear();
+
+      // Pop the response from queue
+      state->response_queue_->PopResponse();
+    }
+
+    // Adds the state object to the completion queue so
+    // that it can be processed later
+    void PutTaskBackToQueue(InferHandlerStateType* state)
+    {
+      std::lock_guard<std::recursive_mutex> lock(mu_);
+      // FIXME: Is there a better way to put task on the
+      // completion queue rather than using alarm object?
+      // The alarm object will add a new task to the back of the
+      // completion queue when it expires or when it’s cancelled.
+      state->alarm_.Set(
+          cq_, gpr_now(gpr_clock_type::GPR_CLOCK_REALTIME), state);
+    }
+
+    // Check the state at the front of the queue and write it if
+    // ready. The state at the front of the queue is ready if it is in
+    // the WRITEREADY state and it equals 'required_state' (or
+    // 'required_state' is nullptr). Return nullptr if front of queue
+    // was not ready (and so not written), or return the state if it
+    // was ready and written.
+    InferHandlerStateType* WriteResponseIfReady(
+        InferHandlerStateType* required_state)
+    {
+      std::lock_guard<std::recursive_mutex> lock(mu_);
+      if (states_.empty()) {
+        return nullptr;
+      }
+
+      InferHandlerStateType* state = states_.front();
+      if (state->step_ != Steps::WRITEREADY) {
+        return nullptr;
+      }
+
+      if ((required_state != nullptr) && (state != required_state)) {
+        return nullptr;
+      }
+
+#ifdef TRITON_ENABLE_TRACING
+      state->trace_timestamps_.emplace_back(
+          std::make_pair("GRPC_SEND_START", TraceManager::CaptureTimestamp()));
+#endif  // TRITON_ENABLE_TRACING
+
+      state->step_ = Steps::WRITTEN;
+      state->context_->ongoing_write_ = true;
+      // Non decoupled writes use only one response
+      responder_->Write(*state->response_queue_->GetResponseAt(0), state);
+
+      return state;
+    }
+
+    // If 'state' is at the front of the queue and written, pop it and
+    // return true. Other return false.
+    bool PopCompletedResponse(InferHandlerStateType* state)
+    {
+      std::lock_guard<std::recursive_mutex> lock(mu_);
+      if (states_.empty()) {
+        return false;
+      }
+
+      InferHandlerStateType* front = states_.front();
+      if ((front == state) && (state->step_ == Steps::WRITTEN)) {
+        states_.pop();
+        return true;
+      }
+
+      return false;
+    }
+
+    // Return true if this context has completed all reads and writes.
+    bool IsRequestsCompleted()
+    {
+      std::lock_guard<std::recursive_mutex> lock(mu_);
+      return (
+          (step_ == Steps::WRITEREADY) && states_.empty() &&
+          (ongoing_requests_ == 0));
+    }
+
+    // The grpc completion queue associated with the RPC.
+    ::grpc::ServerCompletionQueue* cq_;
+
+    // Unique ID for the context. Used only for debugging so will
+    // always be 0 in non-debug builds.
+    const uint64_t unique_id_;
+
+    // Context for the rpc, allowing to tweak aspects of it such as
+    // the use of compression, authentication, as well as to send
+    // metadata back to the client.
+    std::unique_ptr<::grpc::ServerContext> ctx_;
+    std::unique_ptr<ServerResponderType> responder_;
+
+    // The states associated with this context that are currently
+    // active. Used by stream handlers to maintain request / response
+    // orders. A state enters this queue when it has successfully read
+    // a request and exits the queue when it is written.
+    std::recursive_mutex mu_;
+    std::queue<InferHandlerStateType*> states_;
+    std::atomic<uint32_t> ongoing_requests_;
+
+    // Tracks the inflight requests sent to Triton core via this
+    // context. We will use this structure to issue cancellations
+    // on these requests.
+    std::set<InferHandlerStateType*> inflight_states_;
+
+    // Tracks all the states that have been created on this context.
+    std::set<InferHandlerStateType*> all_states_;
+
+    // The step of the entire context.
+    Steps step_;
+
+    // True if this context should finish with OK status, false if
+    // should finish with CANCELLED status.
+    bool finish_ok_;
+
+    // True if there is an ongoing write to the grpc stream
+    std::atomic<bool> ongoing_write_;
+
+    // The state object that is sent to grpc async notification
+    // for tracking the gRPC stream.
+    std::unique_ptr<InferHandlerState> notify_state_;
+
+    // Tracks whether the async notification has been delivered by
+    // completion queue.
+    bool received_notification_;
+  };
+
+  // This constructor is used to build a wrapper state object
+  // pointing to the actual state object. The wrapper state
+  // object is used to distinguish a tag from AsyncNotifyWhenDone()
+  // signal.
+  explicit InferHandlerState(Steps start_step, InferHandlerState* state)
+      : step_(start_step), state_ptr_(state), async_notify_state_(false)
+  {
+    state->MarkAsAsyncNotifyState();
+  }
+
+  explicit InferHandlerState(
+      TRITONSERVER_Server* tritonserver,
+      const std::shared_ptr<Context>& context, Steps start_step = Steps::START)
+      : tritonserver_(tritonserver), async_notify_state_(false)
+  {
+    // For debugging and testing,
+    const char* dstr = getenv("TRITONSERVER_DELAY_GRPC_RESPONSE");
+    delay_response_ms_ = 0;
+    if (dstr != nullptr) {
+      delay_response_ms_ = atoi(dstr);
+    }
+    response_queue_.reset(new ResponseQueue<ResponseType>());
+    Reset(context, start_step);
+  }
+
+  ~InferHandlerState() { ClearTraceTimestamps(); }
+
+  bool IsGrpcContextCancelled() { return context_->IsCancelled(); }
+
+  void Reset(
+      const std::shared_ptr<Context>& context, Steps start_step = Steps::START)
+  {
+    unique_id_ = NEXT_UNIQUE_ID;
+    context_ = context;
+    step_ = start_step;
+    status_ = ::grpc::Status{};
+    cb_count_ = 0;
+    is_decoupled_ = false;
+    complete_ = false;
+    parameters_ = {};
+    request_.Clear();
+    response_queue_->Reset();
+    // Clear trace_timestamps_ here so they do not grow indefinitely since
+    // states are re-used for performance.
+    ClearTraceTimestamps();
+    // The pointer should be nullptr for all state objects instead of
+    // wrapper state object in WAITING_NOTIFICATION step.
+    state_ptr_ = nullptr;
+    async_notify_state_ = false;
+  }
+
+  void Release()
+  {
+    context_ = nullptr;
+    inference_request_.reset();
+    ClearTraceTimestamps();
+  }
+
+  void ClearTraceTimestamps()
+  {
+#ifdef TRITON_ENABLE_TRACING
+    if (trace_ != nullptr) {
+      for (const auto& timestamp : trace_timestamps_) {
+        trace_->CaptureTimestamp(timestamp.first, timestamp.second);
+      }
+      trace_.reset();
+    }
+    trace_timestamps_.clear();
+#endif  // TRITON_ENABLE_TRACING
+  }
+
+  // Returns whether all the responses from the state
+  // are delivered and successfully written on the
+  // stream.
+  bool IsComplete() { return (complete_ && response_queue_->IsEmpty()); }
+
+  void MarkAsAsyncNotifyState() { async_notify_state_ = true; }
+  bool IsAsyncNotifyState() { return async_notify_state_; }
+
+  // Needed in the response handle for classification outputs.
+  TRITONSERVER_Server* tritonserver_;
+
+  // Unique ID for the state. Used only for debugging so will
+  // always be 0 in non-debug builds.
+  uint64_t unique_id_;
+
+  std::shared_ptr<Context> context_;
+  Steps step_;
+  std::recursive_mutex step_mtx_;
+
+  // Shared pointer to the inference request object. The lifetime of
+  // inference request object is extended till all the responses from
+  // the request are processed and the request is released.
+  std::shared_ptr<TRITONSERVER_InferenceRequest> inference_request_;
+
+#ifdef TRITON_ENABLE_TRACING
+  std::shared_ptr<TraceManager::Trace> trace_;
+  // Additional timestamps that are captured before a trace stream is acquired
+  std::deque<std::pair<std::string, uint64_t>> trace_timestamps_;
+#endif  // TRITON_ENABLE_TRACING
+
+  bool is_decoupled_ = false;
+  StateParameters parameters_;
+
+  ::grpc::Status status_;
+  std::atomic<uint32_t> cb_count_;
+  bool complete_;
+
+  RequestType request_;
+  std::shared_ptr<ResponseQueue<ResponseType>> response_queue_;
+
+  ::grpc::Alarm alarm_;
+
+  // For testing and debugging
+  int delay_response_ms_;
+
+  // For inference requests the allocator payload, unused for other
+  // requests.
+  AllocPayload<ResponseType> alloc_payload_;
+
+  // The below pointer is only set when using this state object as a
+  // wrapper over actual state when being sent to completion queue
+  // using AsyncNotifyWhenDone function. Otherwise it is nullptr.
+  InferHandlerState* state_ptr_;
+
+  // Tracks whether this state object has been wrapped and send to
+  // AsyncNotifyWhenDone() function as a tag.
+  bool async_notify_state_;
+};
+
+
+//
+// InferHandler
+//
+template <
+    typename ServiceType, typename ServerResponderType, typename RequestType,
+    typename ResponseType>
+class InferHandler : public HandlerBase {
+ public:
+  InferHandler(
+      const std::string& name,
+      const std::shared_ptr<TRITONSERVER_Server>& tritonserver,
+      ServiceType* service, ::grpc::ServerCompletionQueue* cq,
+      size_t max_state_bucket_count,
+      std::pair<std::string, std::string> restricted_kv,
+      const std::string& header_forward_pattern);
+  virtual ~InferHandler();
+
+  // Descriptive name of of the handler.
+  const std::string& Name() const { return name_; }
+
+  // Start handling requests.
+  void Start() override;
+
+  // Stop handling requests.
+  void Stop() override;
+
+ protected:
+  using State =
+      InferHandlerState<ServerResponderType, RequestType, ResponseType>;
+  using StateContext = typename State::Context;
+
+  State* StateNew(
+      TRITONSERVER_Server* tritonserver,
+      const std::shared_ptr<StateContext>& context,
+      Steps start_step = Steps::START)
+  {
+    State* state = nullptr;
+
+    if (max_state_bucket_count_ > 0) {
+      std::lock_guard<std::mutex> lock(alloc_mu_);
+
+      if (!state_bucket_.empty()) {
+        state = state_bucket_.back();
+        state->Reset(context, start_step);
+        state_bucket_.pop_back();
+      }
+    }
+
+    if (state == nullptr) {
+      state = new State(tritonserver, context, start_step);
+    }
+
+    if (start_step == Steps::START) {
+      // Need to be called to receive an asynchronous notification
+      // when the transaction is cancelled.
+      context->GrpcContextAsyncNotifyWhenDone(state);
+    }
+    context->InsertState(state);
+
+    LOG_VERBOSE(2) << "StateNew, " << state->unique_id_ << " Step "
+                   << state->step_;
+
+    return state;
+  }
+
+  void StateRelease(State* state)
+  {
+    LOG_VERBOSE(2) << "StateRelease, " << state->unique_id_ << " Step "
+                   << state->step_;
+    if (max_state_bucket_count_ > 0) {
+      std::lock_guard<std::mutex> lock(alloc_mu_);
+
+      if (state_bucket_.size() < max_state_bucket_count_) {
+        state->Release();
+        state_bucket_.push_back(state);
+        return;
+      }
+    }
+
+    delete state;
+  }
+
+  virtual void StartNewRequest() = 0;
+  virtual bool Process(State* state, bool rpc_ok) = 0;
+  bool ExecutePrecondition(InferHandler::State* state);
+
+  TRITONSERVER_Error* ForwardHeadersAsParameters(
+      TRITONSERVER_InferenceRequest* irequest, InferHandler::State* state);
+
+  const std::string name_;
+  std::shared_ptr<TRITONSERVER_Server> tritonserver_;
+
+  ServiceType* service_;
+  ::grpc::ServerCompletionQueue* cq_;
+  std::unique_ptr<std::thread> thread_;
+
+  // Mutex to serialize State allocation
+  std::mutex alloc_mu_;
+
+  // Keep some number of state objects for reuse to avoid the overhead
+  // of creating a state for every new request.
+  const size_t max_state_bucket_count_;
+  std::vector<State*> state_bucket_;
+
+  std::pair<std::string, std::string> restricted_kv_;
+  std::string header_forward_pattern_;
+  re2::RE2 header_forward_regex_;
+};
+
+template <
+    typename ServiceType, typename ServerResponderType, typename RequestType,
+    typename ResponseType>
+InferHandler<ServiceType, ServerResponderType, RequestType, ResponseType>::
+    InferHandler(
+        const std::string& name,
+        const std::shared_ptr<TRITONSERVER_Server>& tritonserver,
+        ServiceType* service, ::grpc::ServerCompletionQueue* cq,
+        size_t max_state_bucket_count,
+        std::pair<std::string, std::string> restricted_kv,
+        const std::string& header_forward_pattern)
+    : name_(name), tritonserver_(tritonserver), service_(service), cq_(cq),
+      max_state_bucket_count_(max_state_bucket_count),
+      restricted_kv_(restricted_kv),
+      header_forward_pattern_(header_forward_pattern),
+      header_forward_regex_(header_forward_pattern_)
+{
+}
+
+template <
+    typename ServiceType, typename ServerResponderType, typename RequestType,
+    typename ResponseType>
+InferHandler<ServiceType, ServerResponderType, RequestType, ResponseType>::
+    ~InferHandler()
+{
+  for (State* state : state_bucket_) {
+    delete state;
+  }
+  state_bucket_.clear();
+
+  LOG_VERBOSE(1) << "Destructed " << Name();
+}
+
+template <
+    typename ServiceType, typename ServerResponderType, typename RequestType,
+    typename ResponseType>
+void
+InferHandler<
+    ServiceType, ServerResponderType, RequestType, ResponseType>::Start()
+{
+  // Use a barrier to make sure we don't return until thread has
+  // started.
+  auto barrier = std::make_shared<Barrier>(2);
+
+  thread_.reset(new std::thread([this, barrier] {
+    StartNewRequest();
+    barrier->Wait();
+
+    void* tag;
+    bool ok;
+
+    while (cq_->Next(&tag, &ok)) {
+      State* state = static_cast<State*>(tag);
+      if (state->step_ == Steps::WAITING_NOTIFICATION) {
+        State* state_wrapper = state;
+        state = state_wrapper->state_ptr_;
+        state->context_->SetReceivedNotification(true);
+        LOG_VERBOSE(1) << "Received notification for " << Name() << ", "
+                       << state->unique_id_;
+      }
+      LOG_VERBOSE(2) << "Grpc::CQ::Next() "
+                     << state->context_->DebugString(state);
+      if (!Process(state, ok)) {
+        LOG_VERBOSE(1) << "Done for " << Name() << ", " << state->unique_id_;
+        state->context_->EraseState(state);
+        StateRelease(state);
+      } else {
+        LOG_VERBOSE(2) << "Returning from " << Name() << ", "
+                       << state->unique_id_ << ", " << state->step_;
+      }
+    }
+  }));
+
+  barrier->Wait();
+  LOG_VERBOSE(1) << "Thread started for " << Name();
+}
+
+template <
+    typename ServiceType, typename ServerResponderType, typename RequestType,
+    typename ResponseType>
+void
+InferHandler<
+    ServiceType, ServerResponderType, RequestType, ResponseType>::Stop()
+{
+  if (thread_->joinable()) {
+    thread_->join();
+  }
+
+  LOG_VERBOSE(1) << "Thread exited for " << Name();
+}
+
+template <
+    typename ServiceType, typename ServerResponderType, typename RequestType,
+    typename ResponseType>
+bool
+InferHandler<ServiceType, ServerResponderType, RequestType, ResponseType>::
+    ExecutePrecondition(InferHandler::State* state)
+{
+  if (!restricted_kv_.first.empty()) {
+    const auto& metadata = state->context_->ctx_->client_metadata();
+    const auto it = metadata.find(restricted_kv_.first);
+    return (it != metadata.end()) && (it->second == restricted_kv_.second);
+  }
+  return true;
+}
+
+template <
+    typename ServiceType, typename ServerResponderType, typename RequestType,
+    typename ResponseType>
+TRITONSERVER_Error*
+InferHandler<ServiceType, ServerResponderType, RequestType, ResponseType>::
+    ForwardHeadersAsParameters(
+        TRITONSERVER_InferenceRequest* irequest, InferHandler::State* state)
+{
+  TRITONSERVER_Error* err = nullptr;
+  if (!header_forward_pattern_.empty()) {
+    const auto& metadata = state->context_->ctx_->client_metadata();
+    for (const auto& pair : metadata) {
+      auto& key = pair.first;
+      auto& value = pair.second;
+      std::string param_key = std::string(key.begin(), key.end());
+      if (RE2::PartialMatch(param_key, header_forward_regex_)) {
+        std::string param_value = std::string(value.begin(), value.end());
+        err = TRITONSERVER_InferenceRequestSetStringParameter(
+            irequest, param_key.c_str(), param_value.c_str());
+        if (err != nullptr) {
+          break;
+        }
+      }
+    }
+  }
+
+  return err;
+}
+
+//
+// ModelInferHandler
+//
+class ModelInferHandler
+    : public InferHandler<
+          inference::GRPCInferenceService::AsyncService,
+          ::grpc::ServerAsyncResponseWriter<inference::ModelInferResponse>,
+          inference::ModelInferRequest, inference::ModelInferResponse> {
+ public:
+  ModelInferHandler(
+      const std::string& name,
+      const std::shared_ptr<TRITONSERVER_Server>& tritonserver,
+      TraceManager* trace_manager,
+      const std::shared_ptr<SharedMemoryManager>& shm_manager,
+      inference::GRPCInferenceService::AsyncService* service,
+      ::grpc::ServerCompletionQueue* cq, size_t max_state_bucket_count,
+      grpc_compression_level compression_level,
+      std::pair<std::string, std::string> restricted_kv,
+      const std::string& forward_header_pattern)
+      : InferHandler(
+            name, tritonserver, service, cq, max_state_bucket_count,
+            restricted_kv, forward_header_pattern),
+        trace_manager_(trace_manager), shm_manager_(shm_manager),
+        compression_level_(compression_level)
+  {
+    // Create the allocator that will be used to allocate buffers for
+    // the result tensors.
+    FAIL_IF_ERR(
+        TRITONSERVER_ResponseAllocatorNew(
+            &allocator_, InferResponseAlloc, InferResponseFree,
+            InferResponseStart),
+        "creating inference response allocator");
+    FAIL_IF_ERR(
+        TRITONSERVER_ResponseAllocatorSetQueryFunction(
+            allocator_, OutputBufferQuery),
+        "setting allocator's query function");
+    FAIL_IF_ERR(
+        TRITONSERVER_ResponseAllocatorSetBufferAttributesFunction(
+            allocator_, OutputBufferAttributes),
+        "setting allocator's output buffer attributes function");
+  }
+
+  ~ModelInferHandler()
+  {
+    LOG_TRITONSERVER_ERROR(
+        TRITONSERVER_ResponseAllocatorDelete(allocator_),
+        "deleting response allocator");
+  }
+
+ protected:
+  void StartNewRequest() override;
+  bool Process(State* state, bool rpc_ok) override;
+
+ private:
+  void Execute(State* state);
+  static void InferResponseComplete(
+      TRITONSERVER_InferenceResponse* response, const uint32_t flags,
+      void* userp);
+
+  TraceManager* trace_manager_;
+  std::shared_ptr<SharedMemoryManager> shm_manager_;
+  TRITONSERVER_ResponseAllocator* allocator_;
+
+  grpc_compression_level compression_level_;
+};
+
+#if !defined(_WIN32) && defined(TRITON_ENABLE_TRACING)
+class GrpcServerCarrier : public otel_cntxt::propagation::TextMapCarrier {
+ public:
+  GrpcServerCarrier(::grpc::ServerContext* context) : context_(context) {}
+  GrpcServerCarrier() = default;
+  virtual opentelemetry::nostd::string_view Get(
+      opentelemetry::nostd::string_view key) const noexcept override
+  {
+    auto it = context_->client_metadata().find({key.data(), key.size()});
+    if (it != context_->client_metadata().end()) {
+      return it->second.data();
+    }
+    return "";
+  }
+
+  // Not required on server side
+  virtual void Set(
+      opentelemetry::nostd::string_view key,
+      opentelemetry::nostd::string_view value) noexcept override
+  {
+    return;
+  }
+
+  ::grpc::ServerContext* context_;
+};
+#else
+using GrpcServerCarrier = void*;
+#endif  // TRITON_ENABLE_TRACING
+
+}}}  // namespace triton::server::grpc
diff --git a/src/grpc/stream_infer_handler.cc b/src/grpc/stream_infer_handler.cc
new file mode 100644
index 0000000000..306925c570
--- /dev/null
+++ b/src/grpc/stream_infer_handler.cc
@@ -0,0 +1,734 @@
+// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "stream_infer_handler.h"
+
+#include <regex>
+
+namespace triton { namespace server { namespace grpc {
+
+// Make sure to keep InferResponseAlloc and OutputBufferQuery logic in sync
+TRITONSERVER_Error*
+StreamInferResponseAlloc(
+    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
+    size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
+    int64_t preferred_memory_type_id, void* userp, void** buffer,
+    void** buffer_userp, TRITONSERVER_MemoryType* actual_memory_type,
+    int64_t* actual_memory_type_id)
+{
+  AllocPayload<inference::ModelStreamInferResponse>* payload =
+      reinterpret_cast<AllocPayload<inference::ModelStreamInferResponse>*>(
+          userp);
+
+  auto response = payload->response_queue_->GetLastAllocatedResponse();
+
+  if (response == nullptr) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INTERNAL,
+        "Unable to access the last allocated response");
+  }
+
+  return ResponseAllocatorHelper(
+      allocator, tensor_name, byte_size, preferred_memory_type,
+      preferred_memory_type_id, response->mutable_infer_response(),
+      payload->shm_map_, buffer, buffer_userp, actual_memory_type,
+      actual_memory_type_id);
+}
+
+TRITONSERVER_Error*
+StreamInferResponseStart(TRITONSERVER_ResponseAllocator* allocator, void* userp)
+{
+  AllocPayload<inference::ModelStreamInferResponse>* payload =
+      reinterpret_cast<AllocPayload<inference::ModelStreamInferResponse>*>(
+          userp);
+
+  // Move to the next response object
+  payload->response_queue_->AllocateResponse();
+
+  return nullptr;  // success
+}
+
+// Make sure to keep InferResponseAlloc and OutputBufferQuery logic in sync
+TRITONSERVER_Error*
+StreamOutputBufferQuery(
+    TRITONSERVER_ResponseAllocator* allocator, void* userp,
+    const char* tensor_name, size_t* byte_size,
+    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id)
+{
+  AllocPayload<inference::ModelStreamInferResponse>* payload =
+      reinterpret_cast<AllocPayload<inference::ModelStreamInferResponse>*>(
+          userp);
+  return OutputBufferQueryHelper(
+      allocator, tensor_name, byte_size, payload->shm_map_, memory_type,
+      memory_type_id);
+}
+
+// Make sure to keep InferResponseAlloc, OutputBufferQuery, and
+// OutputBufferAttributes logic in sync
+TRITONSERVER_Error*
+StreamOutputBufferAttributes(
+    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
+    TRITONSERVER_BufferAttributes* buffer_attributes, void* userp,
+    void* buffer_userp)
+{
+  AllocPayload<inference::ModelStreamInferResponse>* payload =
+      reinterpret_cast<AllocPayload<inference::ModelStreamInferResponse>*>(
+          userp);
+
+  return OutputBufferAttributesHelper(
+      allocator, tensor_name, payload->shm_map_, buffer_attributes);
+}
+
+//=============================================================================
+//  The following section contains the handling mechanism for ModelStreamInfer
+//  RPC. This implementation is tuned towards performance and reducing latency.
+//=============================================================================
+
+void
+ModelStreamInferHandler::StartNewRequest()
+{
+  auto context = std::make_shared<State::Context>(cq_, NEXT_UNIQUE_ID);
+  context->SetCompressionLevel(compression_level_);
+  State* state = StateNew(tritonserver_.get(), context);
+
+#ifdef TRITON_ENABLE_TRACING
+  // Can't create trace as we don't know the model to be requested,
+  // track timestamps in 'state'
+  state->trace_timestamps_.emplace_back(
+      std::make_pair("GRPC_WAITREAD_START", TraceManager::CaptureTimestamp()));
+#endif  // TRITON_ENABLE_TRACING
+
+  service_->RequestModelStreamInfer(
+      state->context_->ctx_.get(), state->context_->responder_.get(), cq_, cq_,
+      state);
+
+  LOG_VERBOSE(1) << "New request handler for " << Name() << ", "
+                 << state->unique_id_;
+}
+
+bool
+ModelStreamInferHandler::Process(InferHandler::State* state, bool rpc_ok)
+{
+  // Because gRPC doesn't allow concurrent writes on the
+  // the stream we only have a single handler thread that
+  // reads from the completion queue. Hence, cancellation
+  // notification will be received on the same handler
+  // thread.
+  // This means that we only need to take care of
+  // synchronizing this thread and the ResponseComplete
+  // threads.
+  if (state->context_->ReceivedNotification()) {
+    std::lock_guard<std::recursive_mutex> lock(state->step_mtx_);
+    if (state->IsGrpcContextCancelled()) {
+      bool resume = state->context_->HandleCancellation(state, rpc_ok, Name());
+      return resume;
+    } else {
+      if (state->context_->HandleCompletion()) {
+        return true;
+      }
+    }
+  }
+
+  LOG_VERBOSE(1) << "Process for " << Name() << ", rpc_ok=" << rpc_ok
+                 << ", context " << state->context_->unique_id_ << ", "
+                 << state->unique_id_ << " step " << state->step_;
+
+  // We need an explicit finish indicator. Can't use 'state->step_'
+  // because we launch an async thread that could update 'state's
+  // step_ to be FINISH before this thread exits this function.
+  bool finished = false;
+
+  if (state->step_ == Steps::START) {
+    // A new stream connection... If RPC failed on a new request then
+    // the server is shutting down and so we should do nothing.
+    if (!rpc_ok) {
+      state->step_ = Steps::FINISH;
+      return false;
+    }
+
+    // Start a new request to replace this one...
+    StartNewRequest();
+
+    if (ExecutePrecondition(state)) {
+      // Since this is the start of a connection, 'state' hasn't been
+      // used yet so use it to read a request off the connection.
+      state->context_->step_ = Steps::READ;
+      state->step_ = Steps::READ;
+      state->context_->responder_->Read(&state->request_, state);
+    } else {
+      // Precondition is not satisfied, cancel the stream
+      state->context_->step_ = Steps::COMPLETE;
+      state->step_ = Steps::PARTIAL_COMPLETION;
+      ::grpc::Status status = ::grpc::Status(
+          ::grpc::StatusCode::UNAVAILABLE,
+          std::string("This protocol is restricted, expecting header '") +
+              restricted_kv_.first + "'");
+      state->context_->responder_->Finish(status, state);
+      return !finished;
+    }
+
+  } else if (state->step_ == Steps::READ) {
+    TRITONSERVER_Error* err = nullptr;
+    const inference::ModelInferRequest& request = state->request_;
+#ifdef TRITON_ENABLE_TRACING
+    state->trace_timestamps_.emplace_back(
+        std::make_pair("GRPC_WAITREAD_END", TraceManager::CaptureTimestamp()));
+#endif  // TRITON_ENABLE_TRACING
+
+    // If done reading and no in-flight requests then can finish the
+    // entire stream. Otherwise just finish this state.
+    if (!rpc_ok) {
+      state->context_->step_ = Steps::WRITEREADY;
+      if (state->context_->IsRequestsCompleted()) {
+        state->context_->step_ = Steps::COMPLETE;
+        state->step_ = Steps::PARTIAL_COMPLETION;
+        LOG_VERBOSE(2) << "Finishing responder from state "
+                       << state->unique_id_;
+        state->context_->responder_->Finish(
+            state->context_->finish_ok_ ? ::grpc::Status::OK
+                                        : ::grpc::Status::CANCELLED,
+            state);
+      } else {
+        state->step_ = Steps::FINISH;
+        finished = true;
+      }
+
+      return !finished;
+    }
+
+    int64_t requested_model_version;
+    err = GetModelVersionFromString(
+        request.model_version(), &requested_model_version);
+
+    // Record the transaction policy of the model into the current state
+    // object.
+    if (err == nullptr) {
+      uint32_t txn_flags;
+      err = TRITONSERVER_ServerModelTransactionProperties(
+          tritonserver_.get(), request.model_name().c_str(),
+          requested_model_version, &txn_flags, nullptr /* voidp */);
+      if (err == nullptr) {
+        state->is_decoupled_ = ((txn_flags & TRITONSERVER_TXN_DECOUPLED) != 0);
+      }
+    }
+
+    // Request has been successfully read, increment the context request
+    // counter.
+    state->context_->IncrementRequestCounter();
+
+    // If the request is not for a model with decoupled transaction policy
+    // then put it in the context queue so that its response is sent in
+    // the same order as the request was received.
+    if (!state->is_decoupled_) {
+      state->context_->EnqueueForResponse(state);
+    }
+
+    // Need to get context here as it is needed below. 'state' can
+    // complete inference, write response, and finish (which releases
+    // context) before we make any forward progress.... so need to
+    // hold onto context here while we know it is good.
+    std::shared_ptr<StateContext> context = state->context_;
+
+    // Issue the inference request into server...
+    auto response_queue_ = state->response_queue_;
+
+    // Create the inference request which contains all the
+    // input information needed for an inference.
+    TRITONSERVER_InferenceRequest* irequest = nullptr;
+    if (err == nullptr) {
+      err = TRITONSERVER_InferenceRequestNew(
+          &irequest, tritonserver_.get(), request.model_name().c_str(),
+          requested_model_version);
+    }
+
+    if (err == nullptr) {
+      state->inference_request_ = {
+          irequest, [](TRITONSERVER_InferenceRequest* request) {
+            LOG_TRITONSERVER_ERROR(
+                TRITONSERVER_InferenceRequestDelete(request),
+                "deleting gRPC inference request");
+          }};
+      err = SetInferenceRequestMetadata(irequest, request, state->parameters_);
+    }
+
+    if (err == nullptr) {
+      err = ForwardHeadersAsParameters(irequest, state);
+    }
+
+    // Will be used to hold the serialized data in case explicit string
+    // tensors are present in the request.
+    std::list<std::string> serialized_data;
+
+    if (err == nullptr) {
+      err = InferGRPCToInput(
+          tritonserver_, shm_manager_, request, &serialized_data, irequest);
+    }
+    if (err == nullptr) {
+      err = InferAllocatorPayload<inference::ModelStreamInferResponse>(
+          tritonserver_, shm_manager_, request, std::move(serialized_data),
+          response_queue_, &state->alloc_payload_);
+    }
+
+    auto request_release_payload =
+        std::make_unique<RequestReleasePayload>(state->inference_request_);
+    if (err == nullptr) {
+      err = TRITONSERVER_InferenceRequestSetReleaseCallback(
+          irequest, InferRequestComplete,
+          request_release_payload.get() /* request_release_userp */);
+    }
+    if (err == nullptr) {
+      err = TRITONSERVER_InferenceRequestSetResponseCallback(
+          irequest, allocator_,
+          &state->alloc_payload_ /* response_allocator_userp */,
+          StreamInferResponseComplete, reinterpret_cast<void*>(state));
+    }
+
+    if (err == nullptr) {
+      TRITONSERVER_InferenceTrace* triton_trace = nullptr;
+#ifdef TRITON_ENABLE_TRACING
+      GrpcServerCarrier carrier(state->context_->ctx_.get());
+      auto start_options =
+          trace_manager_->GetTraceStartOptions(carrier, request.model_name());
+      state->trace_ = std::move(trace_manager_->SampleTrace(start_options));
+      if (state->trace_ != nullptr) {
+        triton_trace = state->trace_->trace_;
+      }
+#endif  // TRITON_ENABLE_TRACING
+
+      state->step_ = ISSUED;
+      err = TRITONSERVER_ServerInferAsync(
+          tritonserver_.get(), irequest, triton_trace);
+    }
+
+    // If there was not an error in issuing the 'state' request then
+    // state->step_ == ISSUED and inference request has
+    // initiated... the completion callback will transition to
+    // WRITEREADY or WRITTEN or CANCELLED. Recording the state and the
+    // irequest to handle gRPC stream cancellation.
+    if (err == nullptr) {
+      state->context_->InsertInflightState(state);
+      // The payload will be cleaned in request release callback.
+      request_release_payload.release();
+    } else {
+      // If there was an error then enqueue the error response and show
+      // it to be ready for writing.
+      inference::ModelStreamInferResponse* response;
+      if (state->is_decoupled_) {
+        state->response_queue_->AllocateResponse();
+        response = state->response_queue_->GetLastAllocatedResponse();
+      } else {
+        response = state->response_queue_->GetNonDecoupledResponse();
+      }
+
+      // Get request ID for logging in case of error.
+      std::string log_request_id = request.id();
+      if (log_request_id.empty()) {
+        log_request_id = "<id_unknown>";
+      }
+      LOG_VERBOSE(1) << "[request id: " << log_request_id << "] "
+                     << "Infer failed: " << TRITONSERVER_ErrorMessage(err);
+
+      ::grpc::Status status;
+      GrpcStatusUtil::Create(&status, err);
+      TRITONSERVER_ErrorDelete(err);
+      response->set_error_message(status.error_message());
+
+      response->mutable_infer_response()->Clear();
+      // repopulate the id so that client knows which request failed.
+      response->mutable_infer_response()->set_id(request.id());
+      state->step_ = Steps::WRITEREADY;
+      if (!state->is_decoupled_) {
+        state->context_->WriteResponseIfReady(state);
+      } else {
+        state->response_queue_->MarkNextResponseComplete();
+        state->complete_ = true;
+        state->context_->PutTaskBackToQueue(state);
+      }
+    }
+
+    // Now that the inference request is in flight, create a copy of
+    // 'state' and use it to attempt another read from the connection
+    // (i.e the next request in the stream).
+    State* next_read_state =
+        StateNew(tritonserver_.get(), context, Steps::READ);
+
+#ifdef TRITON_ENABLE_TRACING
+    // Capture a timestamp for the time when we start waiting for this
+    // next request to read.
+    // Can't create trace as we don't know the model to be requested,
+    // track timestamps in 'state'
+    next_read_state->trace_timestamps_.emplace_back(std::make_pair(
+        "GRPC_WAITREAD_START", TraceManager::CaptureTimestamp()));
+#endif  // TRITON_ENABLE_TRACING
+
+    next_read_state->context_->responder_->Read(
+        &next_read_state->request_, next_read_state);
+  } else if (state->step_ == Steps::PARTIAL_COMPLETION) {
+    state->step_ = Steps::COMPLETE;
+  } else if (state->step_ == Steps::COMPLETE) {
+    state->step_ = Steps::FINISH;
+  } else if (state->step_ == Steps::FINISH) {
+    // The RPC execution is finished hence the state
+    // can be released.
+    finished = true;
+  } else if (!state->is_decoupled_) {
+    // We handle the WRITTEN and WRITEREADY states little
+    // differently depending whether the inference request
+    // is for a decoupled model or not. This is because the
+    // grpc contract requires us to call Write() only once
+    // on a task. Hence, for decoupled writes, we call only
+    // one write and then wait for another notification from
+    // the completion queue to execute pending Write()'s, if
+    // any.
+
+    //
+    // Non-Decoupled state transitions
+    //
+    if (state->step_ == Steps::WRITTEN) {
+      state->context_->ongoing_write_ = false;
+#ifdef TRITON_ENABLE_TRACING
+      state->trace_timestamps_.emplace_back(
+          std::make_pair("GRPC_SEND_END", TraceManager::CaptureTimestamp()));
+#endif  // TRITON_ENABLE_TRACING
+
+      // If the write failed (for example, client closed the stream)
+      // mark that the stream did not complete successfully but don't
+      // cancel right away... need to wait for any pending reads,
+      // inferences and writes to complete.
+      if (!rpc_ok) {
+        LOG_VERBOSE(1) << "Write for " << Name() << ", rpc_ok=" << rpc_ok
+                       << ", context " << state->context_->unique_id_ << ", "
+                       << state->unique_id_ << " step " << state->step_
+                       << ", failed";
+        state->context_->finish_ok_ = false;
+      }
+
+      // Log an error if 'state' is not the expected next response. Mark
+      // that the stream did not complete successfully but don't cancel
+      // right away... need to wait for any pending reads, inferences
+      // and writes to complete.
+      if (!state->context_->PopCompletedResponse(state)) {
+        LOG_ERROR << "Unexpected response for " << Name()
+                  << ", rpc_ok=" << rpc_ok << ", context "
+                  << state->context_->unique_id_ << ", " << state->unique_id_
+                  << " step " << state->step_;
+        state->context_->finish_ok_ = false;
+      }
+
+      // Write the next response if it is ready...
+      state->context_->WriteResponseIfReady(nullptr);
+
+      // The response for the request has been written completely.
+      // The counter can be safely decremented.
+      state->context_->DecrementRequestCounter();
+      finished = Finish(state);
+    }
+  } else {
+    //
+    //  Decoupled state transitions
+    //
+    if (state->step_ == Steps::WRITTEN) {
+      state->context_->ongoing_write_ = false;
+#ifdef TRITON_ENABLE_TRACING
+      state->trace_timestamps_.emplace_back(
+          std::make_pair("GRPC_SEND_END", TraceManager::CaptureTimestamp()));
+#endif  // TRITON_ENABLE_TRACING
+
+      // If the write failed (for example, client closed the stream)
+      // mark that the stream did not complete successfully but don't
+      // cancel right away... need to wait for any pending reads,
+      // inferences and writes to complete.
+      if (!rpc_ok) {
+        LOG_VERBOSE(1) << "Write for " << Name() << ", rpc_ok=" << rpc_ok
+                       << ", context " << state->context_->unique_id_ << ", "
+                       << state->unique_id_ << " step " << state->step_
+                       << ", failed";
+        state->context_->finish_ok_ = false;
+      }
+
+      // Finish the state if all the transactions associated with
+      // the state have completed.
+      if (state->IsComplete()) {
+        state->context_->DecrementRequestCounter();
+        finished = Finish(state);
+      } else {
+        std::lock_guard<std::recursive_mutex> lock(state->step_mtx_);
+
+        // If there is an available response to be written
+        // to the stream, then transition directly to WRITEREADY
+        // state and enqueue itself to the completion queue to be
+        // taken up later. Otherwise, go to ISSUED state and wait
+        // for the callback to make a response available.
+        if (state->response_queue_->HasReadyResponse()) {
+          state->step_ = Steps::WRITEREADY;
+          state->context_->PutTaskBackToQueue(state);
+        } else {
+          state->step_ = Steps::ISSUED;
+        }
+      }
+    } else if (state->step_ == Steps::WRITEREADY) {
+      if (state->delay_response_ms_ != 0) {
+        // Will delay the write of the response by the specified time.
+        // This can be used to test the flow where there are other
+        // responses available to be written.
+        LOG_INFO << "Delaying the write of the response by "
+                 << state->delay_response_ms_ << " ms...";
+        std::this_thread::sleep_for(
+            std::chrono::milliseconds(state->delay_response_ms_));
+      }
+
+      // Finish the state if all the transactions associated with
+      // the state have completed.
+      if (state->IsComplete()) {
+        state->context_->DecrementRequestCounter();
+        finished = Finish(state);
+      } else {
+        // GRPC doesn't allow to issue another write till
+        // the notification from previous write has been
+        // delivered. If there is an ongoing write then
+        // defer writing and place the task at the back
+        // of the completion queue to be taken up later.
+        if (!state->context_->ongoing_write_) {
+          state->context_->ongoing_write_ = true;
+          state->context_->DecoupledWriteResponse(state);
+        } else {
+          state->context_->PutTaskBackToQueue(state);
+        }
+      }
+    }
+  }
+
+  return !finished;
+}
+
+bool
+ModelStreamInferHandler::Finish(InferHandler::State* state)
+{
+  // If done reading and no in-flight requests then can finish the
+  // entire stream.
+  if (state->context_->IsRequestsCompleted()) {
+    state->context_->step_ = Steps::COMPLETE;
+    state->step_ = Steps::PARTIAL_COMPLETION;
+    LOG_VERBOSE(2) << "Finishing responder from state " << state->unique_id_;
+    state->context_->responder_->Finish(
+        state->context_->finish_ok_ ? ::grpc::Status::OK
+                                    : ::grpc::Status::CANCELLED,
+        state);
+  } else if (state->IsAsyncNotifyState()) {
+    // Should only mark the state complete as the state has been sent
+    // to AsyncNotifyWhenDone() tag and the completion event should take
+    // care of finally releasing the state object.
+    state->step_ = Steps::COMPLETE;
+  } else {
+    // Can finish this state.
+    state->step_ = Steps::FINISH;
+    return true;
+  }
+
+  return false;
+}
+
+void
+ModelStreamInferHandler::StreamInferResponseComplete(
+    TRITONSERVER_InferenceResponse* iresponse, const uint32_t flags,
+    void* userp)
+{
+  State* state = reinterpret_cast<State*>(userp);
+
+  // Increment the callback index
+  uint32_t response_index = state->cb_count_++;
+
+  LOG_VERBOSE(1) << "ModelStreamInferHandler::StreamInferComplete, context "
+                 << state->context_->unique_id_ << ", " << state->unique_id_
+                 << " step " << state->step_ << ", callback index "
+                 << state->cb_count_ << ", flags " << flags;
+
+#ifdef TRITON_ENABLE_TRACING
+  if (state->cb_count_ == 1) {
+    state->trace_timestamps_.emplace_back(std::make_pair(
+        "INFER_RESPONSE_COMPLETE", TraceManager::CaptureTimestamp()));
+  }
+#endif  // TRITON_ENABLE_TRACING
+
+  // Log appropriate errors
+  state->complete_ = ((flags & TRITONSERVER_RESPONSE_COMPLETE_FINAL) != 0);
+  if (!state->is_decoupled_) {
+    if (!state->complete_) {
+      LOG_ERROR << "[INTERNAL] ModelStreamInfer received a response without "
+                   "FINAL flag for a model with one-to-one transaction";
+    }
+    if (iresponse == nullptr) {
+      LOG_ERROR << "[INTERNAL] ModelStreamInfer received a null response for a "
+                   "model with one-to-one transaction";
+    }
+  }
+
+  // If receiving the final callback then erase the state from the inflight
+  // state data structure to prevent cancellation being called on the request.
+  // Also make sure that if this state was sent to gRPC async notification
+  // mechanism then the state is not removed as it would be needed for handling
+  // the cancellation if detected.
+  if (state->complete_ && (!state->IsAsyncNotifyState())) {
+    state->context_->EraseInflightState(state);
+  }
+
+  if (state->IsGrpcContextCancelled()) {
+    std::lock_guard<std::recursive_mutex> lock(state->step_mtx_);
+    // Clean-up the received response object.
+    LOG_TRITONSERVER_ERROR(
+        TRITONSERVER_InferenceResponseDelete(iresponse),
+        "deleting GRPC inference response");
+
+    LOG_VERBOSE(1) << "ModelStreamInferHandler::StreamInferResponseComplete, "
+                   << state->unique_id_
+                   << ", skipping response generation as grpc transaction was "
+                      "cancelled... ";
+
+    // If this was the final callback for the state
+    // then cycle through the completion queue so
+    // that state object can be released.
+    if (state->complete_) {
+      state->step_ = Steps::CANCELLED;
+      state->context_->PutTaskBackToQueue(state);
+    }
+
+    return;
+  }
+
+  auto& response_queue = state->response_queue_;
+  std::string log_request_id = state->request_.id();
+  if (log_request_id.empty()) {
+    log_request_id = "<id_unknown>";
+  }
+
+  inference::ModelStreamInferResponse* response = nullptr;
+  bool failed = false;
+  if (iresponse) {
+    // Backend returned a non-null response
+    TRITONSERVER_Error* err = nullptr;
+    response = response_queue->GetResponseAt(response_index);
+    if (response) {
+      inference::ModelInferResponse& infer_response =
+          *(response->mutable_infer_response());
+      // Validate Triton iresponse and set grpc/protobuf response fields from it
+      err = InferResponseCompleteCommon<inference::ModelStreamInferResponse>(
+          state->tritonserver_, iresponse, infer_response,
+          state->alloc_payload_);
+    } else {
+      LOG_ERROR << "expected the response allocator to have added the response";
+    }
+
+    if (err != nullptr) {
+      failed = true;
+      ::grpc::Status status;
+      GrpcStatusUtil::Create(&status, err);
+      response->mutable_infer_response()->Clear();
+      response->set_error_message(status.error_message());
+      LOG_VERBOSE(1) << "Failed for ID: " << log_request_id << std::endl;
+    }
+
+    TRITONSERVER_ErrorDelete(err);
+    LOG_TRITONSERVER_ERROR(
+        TRITONSERVER_InferenceResponseDelete(iresponse),
+        "deleting GRPC inference response");
+  }
+
+  // Decoupled backends can return a null response via
+  // TRITONBACKEND_ResponseFactorySendFlags. By default, these null
+  // "empty" responses are not sent back to the client. Clients can
+  // opt-in to receiving these empty responses via request parameters.
+  // NOTE: The complete flag is the only flag used for this case at this time.
+  const bool empty_final =
+      (!iresponse && state->is_decoupled_ && state->complete_);
+  const bool enable_empty_final =
+      state->parameters_.enable_empty_final_response_;
+
+  const bool create_empty_response = (empty_final && enable_empty_final);
+  if (create_empty_response) {
+    // Assume decoupled here based on prior checks.
+    state->response_queue_->AllocateResponse();
+    response = state->response_queue_->GetLastAllocatedResponse();
+    if (response) {
+      LOG_VERBOSE(1) << "[request id: " << log_request_id << "] "
+                     << "Creating empty final response";
+      response->mutable_infer_response()->Clear();
+    } else {
+      LOG_ERROR << "expected the response allocator to have added the response";
+    }
+  }
+
+  if (response) {
+    auto& infer_response = *(response->mutable_infer_response());
+    // Set response metadata to associate it with request. These will be set
+    // by InferResponseCompleteCommon for successful inference.
+    if (create_empty_response || failed) {
+      infer_response.set_id(state->request_.id());
+      infer_response.set_model_name(state->request_.model_name());
+      infer_response.set_model_version(state->request_.model_version());
+    }
+    auto& params = *(infer_response.mutable_parameters());
+    params["triton_final_response"].set_bool_param(state->complete_);
+  }
+
+  // Update states to signal that response/error is ready to write to stream
+  {
+    // Need to hold lock because the handler thread processing context
+    // cancellation might have cancelled or marked the state for cancellation.
+    std::lock_guard<std::recursive_mutex> lock(state->step_mtx_);
+
+    if (state->IsGrpcContextCancelled()) {
+      LOG_VERBOSE(1)
+          << "ModelStreamInferHandler::StreamInferResponseComplete, "
+          << state->unique_id_
+          << ", skipping writing response because of transaction was cancelled";
+
+      // If this was the final callback for the state
+      // then cycle through the completion queue so
+      // that state object can be released.
+      if (state->complete_) {
+        state->step_ = Steps::CANCELLED;
+        state->context_->PutTaskBackToQueue(state);
+      }
+
+      return;
+    }
+
+    if (state->is_decoupled_) {
+      if (response) {
+        state->response_queue_->MarkNextResponseComplete();
+      }
+      if (state->step_ == Steps::ISSUED) {
+        state->step_ = Steps::WRITEREADY;
+        state->context_->PutTaskBackToQueue(state);
+      }
+    } else {
+      state->step_ = Steps::WRITEREADY;
+      state->context_->WriteResponseIfReady(state);
+    }
+  }
+}
+
+}}}  // namespace triton::server::grpc
diff --git a/src/grpc/stream_infer_handler.h b/src/grpc/stream_infer_handler.h
new file mode 100644
index 0000000000..60c4530227
--- /dev/null
+++ b/src/grpc/stream_infer_handler.h
@@ -0,0 +1,124 @@
+// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#pragma once
+
+#include "infer_handler.h"
+
+namespace triton { namespace server { namespace grpc {
+
+// Make sure to keep InferResponseAlloc and OutputBufferQuery logic in sync
+TRITONSERVER_Error* StreamInferResponseAlloc(
+    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
+    size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
+    int64_t preferred_memory_type_id, void* userp, void** buffer,
+    void** buffer_userp, TRITONSERVER_MemoryType* actual_memory_type,
+    int64_t* actual_memory_type_id);
+
+//
+// Additional Stream Infer utilities
+//
+TRITONSERVER_Error* StreamInferResponseStart(
+    TRITONSERVER_ResponseAllocator* allocator, void* userp);
+
+// Make sure to keep InferResponseAlloc and OutputBufferQuery logic in sync
+TRITONSERVER_Error* StreamOutputBufferQuery(
+    TRITONSERVER_ResponseAllocator* allocator, void* userp,
+    const char* tensor_name, size_t* byte_size,
+    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id);
+
+// Make sure to keep InferResponseAlloc, OutputBufferQuery, and
+// OutputBufferAttributes logic in sync
+TRITONSERVER_Error* StreamOutputBufferAttributes(
+    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
+    TRITONSERVER_BufferAttributes* buffer_attributes, void* userp,
+    void* buffer_userp);
+
+class ModelStreamInferHandler
+    : public InferHandler<
+          inference::GRPCInferenceService::AsyncService,
+          ::grpc::ServerAsyncReaderWriter<
+              inference::ModelStreamInferResponse,
+              inference::ModelInferRequest>,
+          inference::ModelInferRequest, inference::ModelStreamInferResponse> {
+ public:
+  ModelStreamInferHandler(
+      const std::string& name,
+      const std::shared_ptr<TRITONSERVER_Server>& tritonserver,
+      TraceManager* trace_manager,
+      const std::shared_ptr<SharedMemoryManager>& shm_manager,
+      inference::GRPCInferenceService::AsyncService* service,
+      ::grpc::ServerCompletionQueue* cq, size_t max_state_bucket_count,
+      grpc_compression_level compression_level,
+      std::pair<std::string, std::string> restricted_kv,
+      const std::string& header_forward_pattern)
+      : InferHandler(
+            name, tritonserver, service, cq, max_state_bucket_count,
+            restricted_kv, header_forward_pattern),
+        trace_manager_(trace_manager), shm_manager_(shm_manager),
+        compression_level_(compression_level)
+  {
+    // Create the allocator that will be used to allocate buffers for
+    // the result tensors.
+    FAIL_IF_ERR(
+        TRITONSERVER_ResponseAllocatorNew(
+            &allocator_, StreamInferResponseAlloc, InferResponseFree,
+            StreamInferResponseStart),
+        "creating response allocator");
+    FAIL_IF_ERR(
+        TRITONSERVER_ResponseAllocatorSetQueryFunction(
+            allocator_, StreamOutputBufferQuery),
+        "setting allocator's query function");
+    FAIL_IF_ERR(
+        TRITONSERVER_ResponseAllocatorSetBufferAttributesFunction(
+            allocator_, StreamOutputBufferAttributes),
+        "setting allocator's output buffer attribute query function");
+  }
+
+  ~ModelStreamInferHandler()
+  {
+    LOG_TRITONSERVER_ERROR(
+        TRITONSERVER_ResponseAllocatorDelete(allocator_),
+        "deleting response allocator");
+  }
+
+ protected:
+  void StartNewRequest() override;
+  bool Process(State* state, bool rpc_ok) override;
+
+ private:
+  static void StreamInferResponseComplete(
+      TRITONSERVER_InferenceResponse* response, const uint32_t flags,
+      void* userp);
+  bool Finish(State* state);
+
+  TraceManager* trace_manager_;
+  std::shared_ptr<SharedMemoryManager> shm_manager_;
+  TRITONSERVER_ResponseAllocator* allocator_;
+
+  grpc_compression_level compression_level_;
+};
+
+}}}  // namespace triton::server::grpc
diff --git a/src/http_server.cc b/src/http_server.cc
new file mode 100644
index 0000000000..427112d1e9
--- /dev/null
+++ b/src/http_server.cc
@@ -0,0 +1,4714 @@
+// Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifdef _WIN32
+#define NOMINMAX
+#endif
+
+#include "http_server.h"
+
+#include <event2/buffer.h>
+#include <re2/re2.h>
+
+#include <algorithm>
+#include <list>
+#include <regex>
+#include <thread>
+
+#include "classification.h"
+
+#define TRITONJSON_STATUSTYPE TRITONSERVER_Error*
+#define TRITONJSON_STATUSRETURN(M) \
+  return TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INTERNAL, (M).c_str())
+#define TRITONJSON_STATUSSUCCESS nullptr
+#include "triton/common/triton_json.h"
+
+extern "C" {
+#include <b64/cdecode.h>
+}
+
+namespace triton { namespace server {
+
+#define RETURN_AND_CALLBACK_IF_ERR(X, CALLBACK) \
+  do {                                          \
+    TRITONSERVER_Error* err__ = (X);            \
+    if (err__ != nullptr) {                     \
+      CALLBACK(err__);                          \
+      TRITONSERVER_ErrorDelete(err__);          \
+      return;                                   \
+    }                                           \
+  } while (false)
+
+#define RETURN_AND_RESPOND_IF_ERR(REQ, X)                \
+  do {                                                   \
+    TRITONSERVER_Error* err__ = (X);                     \
+    if (err__ != nullptr) {                              \
+      EVBufferAddErrorJson((REQ)->buffer_out, err__);    \
+      evhtp_send_reply((REQ), HttpCodeFromError(err__)); \
+      TRITONSERVER_ErrorDelete(err__);                   \
+      return;                                            \
+    }                                                    \
+  } while (false)
+
+#define RETURN_AND_RESPOND_WITH_ERR(REQ, CODE, MSG) \
+  do {                                              \
+    EVBufferAddErrorJson((REQ)->buffer_out, MSG);   \
+    evhtp_send_reply((REQ), CODE);                  \
+    return;                                         \
+  } while (false)
+
+#define RETURN_AND_RESPOND_IF_RESTRICTED(                               \
+    REQ, RESTRICTED_CATEGORY, RESTRICTED_APIS)                          \
+  do {                                                                  \
+    auto const& is_restricted_api =                                     \
+        RESTRICTED_APIS.IsRestricted(RESTRICTED_CATEGORY);              \
+    auto const& restriction = RESTRICTED_APIS.Get(RESTRICTED_CATEGORY); \
+    if (is_restricted_api && RespondIfRestricted(REQ, restriction)) {   \
+      return;                                                           \
+    }                                                                   \
+  } while (false)
+
+
+namespace {
+
+int
+HttpCodeFromError(TRITONSERVER_Error* error)
+{
+  if (error == nullptr) {
+    return EVHTP_RES_OK;
+  }
+  switch (TRITONSERVER_ErrorCode(error)) {
+    case TRITONSERVER_ERROR_INTERNAL:
+      return EVHTP_RES_SERVERR;
+    case TRITONSERVER_ERROR_NOT_FOUND:
+      return EVHTP_RES_NOTFOUND;
+    case TRITONSERVER_ERROR_UNAVAILABLE:
+      return EVHTP_RES_SERVUNAVAIL;
+    case TRITONSERVER_ERROR_UNSUPPORTED:
+      return EVHTP_RES_NOTIMPL;
+    // cases that has no direct matching code
+    case TRITONSERVER_ERROR_UNKNOWN:
+    case TRITONSERVER_ERROR_INVALID_ARG:
+    case TRITONSERVER_ERROR_ALREADY_EXISTS:
+    case TRITONSERVER_ERROR_CANCELLED:
+      return EVHTP_RES_BADREQ;
+  }
+
+  return EVHTP_RES_BADREQ;
+}
+
+void
+EVBufferAddErrorJson(evbuffer* buffer, const char* message)
+{
+  triton::common::TritonJson::Value response(
+      triton::common::TritonJson::ValueType::OBJECT);
+  response.AddStringRef("error", message, strlen(message));
+
+  triton::common::TritonJson::WriteBuffer buffer_json;
+  response.Write(&buffer_json);
+
+  evbuffer_add(buffer, buffer_json.Base(), buffer_json.Size());
+}
+
+void
+EVBufferAddErrorJson(evbuffer* buffer, TRITONSERVER_Error* err)
+{
+  const char* message = TRITONSERVER_ErrorMessage(err);
+  EVBufferAddErrorJson(buffer, message);
+}
+
+void
+AddContentTypeHeader(evhtp_request_t* req, const char* type)
+{
+  // Remove existing header if found
+  auto content_header =
+      evhtp_headers_find_header(req->headers_out, kContentTypeHeader);
+  if (content_header) {
+    evhtp_header_rm_and_free(req->headers_out, content_header);
+  }
+
+  evhtp_headers_add_header(
+      req->headers_out, evhtp_header_new(kContentTypeHeader, type, 1, 1));
+}
+
+TRITONSERVER_Error*
+SetTritonParameterFromJsonParameter(
+    const std::string& parameter,
+    triton::common::TritonJson::Value& params_json,
+    TRITONSERVER_InferenceRequest* irequest)
+{
+  triton::common::TritonJson::Value value;
+  if (!params_json.Find(parameter.c_str(), &value)) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INTERNAL,
+        ("parameter key '" + parameter + "' was not found in the JSON")
+            .c_str());
+  }
+
+  if (value.IsString()) {
+    std::string string_value;
+    RETURN_IF_ERR(value.AsString(&string_value));
+    RETURN_IF_ERR(TRITONSERVER_InferenceRequestSetStringParameter(
+        irequest, parameter.c_str(), string_value.c_str()));
+  } else if (value.IsInt()) {
+    int64_t int_value;
+    RETURN_IF_ERR(value.AsInt(&int_value));
+    RETURN_IF_ERR(TRITONSERVER_InferenceRequestSetIntParameter(
+        irequest, parameter.c_str(), int_value));
+  } else if (value.IsBool()) {
+    bool bool_value;
+    RETURN_IF_ERR(value.AsBool(&bool_value));
+    RETURN_IF_ERR(TRITONSERVER_InferenceRequestSetBoolParameter(
+        irequest, parameter.c_str(), bool_value));
+  } else if (value.IsNumber()) {
+    double double_value;
+    RETURN_IF_ERR(value.AsDouble(&double_value));
+    RETURN_IF_ERR(TRITONSERVER_InferenceRequestSetDoubleParameter(
+        irequest, parameter.c_str(), double_value));
+  } else {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INVALID_ARG,
+        ("parameter '" + parameter +
+         "' has invalid type. It should be either "
+         "'int', 'bool', or 'string'.")
+            .c_str());
+  }
+  return nullptr;  // success
+}
+
+}  // namespace
+
+TRITONSERVER_Error*
+HTTPServer::Start()
+{
+  if (!worker_.joinable()) {
+    evbase_ = event_base_new();
+    htp_ = evhtp_new(evbase_, NULL);
+    evhtp_enable_flag(htp_, EVHTP_FLAG_ENABLE_NODELAY);
+    if (reuse_port_) {
+      evhtp_enable_flag(htp_, EVHTP_FLAG_ENABLE_REUSEPORT);
+    }
+    evhtp_set_gencb(htp_, HTTPServer::Dispatch, this);
+    evhtp_set_pre_accept_cb(htp_, HTTPServer::NewConnection, this);
+    evhtp_use_threads_wexit(htp_, NULL, NULL, thread_cnt_, NULL);
+    if (evhtp_bind_socket(htp_, address_.c_str(), port_, 1024) != 0) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_UNAVAILABLE,
+          (std::string("Socket '") + address_ + ":" + std::to_string(port_) +
+           "' already in use ")
+              .c_str());
+    }
+
+    // Set listening event for breaking event loop
+    evutil_socketpair(AF_UNIX, SOCK_STREAM, 0, fds_);
+    break_ev_ = event_new(evbase_, fds_[0], EV_READ, StopCallback, evbase_);
+    event_add(break_ev_, NULL);
+    worker_ = std::thread(event_base_loop, evbase_, 0);
+
+    return nullptr;
+  }
+
+  return TRITONSERVER_ErrorNew(
+      TRITONSERVER_ERROR_ALREADY_EXISTS, "HTTP server is already running.");
+}
+
+TRITONSERVER_Error*
+HTTPServer::Stop(uint32_t* exit_timeout_secs, const std::string& service_name)
+{
+  {
+    std::lock_guard<std::mutex> lock(conn_mu_);
+    accepting_new_conn_ = false;
+  }
+  if (exit_timeout_secs != nullptr) {
+    // Note: conn_cnt_ can only decrease
+    while (*exit_timeout_secs > 0 && conn_cnt_ > 0) {
+      LOG_INFO << "Timeout " << *exit_timeout_secs << ": Found " << conn_cnt_
+               << " " << service_name << " service connections";
+      std::this_thread::sleep_for(std::chrono::seconds(1));
+      (*exit_timeout_secs)--;
+    }
+  }
+
+  if (worker_.joinable()) {
+    // Notify event loop to break via fd write
+    send(fds_[1], (const char*)&evbase_, sizeof(event_base*), 0);
+    worker_.join();
+    event_free(break_ev_);
+    evutil_closesocket(fds_[0]);
+    evutil_closesocket(fds_[1]);
+    evhtp_unbind_socket(htp_);
+    evhtp_free(htp_);
+    event_base_free(evbase_);
+    return nullptr;
+  }
+  return TRITONSERVER_ErrorNew(
+      TRITONSERVER_ERROR_UNAVAILABLE, "HTTP server is not running.");
+}
+
+void
+HTTPServer::StopCallback(evutil_socket_t sock, short events, void* arg)
+{
+  struct event_base* base = (struct event_base*)arg;
+  event_base_loopbreak(base);
+}
+
+void
+HTTPServer::Dispatch(evhtp_request_t* req, void* arg)
+{
+  (static_cast<HTTPServer*>(arg))->Handle(req);
+}
+
+evhtp_res
+HTTPServer::NewConnection(evhtp_connection_t* conn, void* arg)
+{
+  HTTPServer* server = static_cast<HTTPServer*>(arg);
+  {
+    std::lock_guard<std::mutex> lock(server->conn_mu_);
+    if (!server->accepting_new_conn_) {
+      return EVHTP_RES_SERVUNAVAIL;  // reset connection
+    }
+    server->conn_cnt_++;
+  }
+  evhtp_connection_set_hook(
+      conn, evhtp_hook_on_connection_fini,
+      (evhtp_hook)(void*)HTTPServer::EndConnection, arg);
+  return EVHTP_RES_OK;
+}
+
+evhtp_res
+HTTPServer::EndConnection(evhtp_connection_t* conn, void* arg)
+{
+  HTTPServer* server = static_cast<HTTPServer*>(arg);
+  {
+    std::lock_guard<std::mutex> lock(server->conn_mu_);
+    server->conn_cnt_--;
+  }
+  return EVHTP_RES_OK;
+}
+
+#ifdef TRITON_ENABLE_METRICS
+
+void
+HTTPMetricsServer::Handle(evhtp_request_t* req)
+{
+  LOG_VERBOSE(1) << "HTTP request: " << req->method << " "
+                 << req->uri->path->full;
+
+  if (req->method != htp_method_GET) {
+    RETURN_AND_RESPOND_WITH_ERR(
+        req, EVHTP_RES_METHNALLOWED, "Method Not Allowed");
+  }
+
+  evhtp_headers_add_header(
+      req->headers_out,
+      evhtp_header_new(kContentTypeHeader, "text/plain; charset=utf-8", 1, 1));
+
+  // Call to metric endpoint should not have any trailing string
+  if (RE2::FullMatch(std::string(req->uri->path->full), api_regex_)) {
+    TRITONSERVER_Metrics* metrics = nullptr;
+    TRITONSERVER_Error* err =
+        TRITONSERVER_ServerMetrics(server_.get(), &metrics);
+    if (err == nullptr) {
+      const char* base;
+      size_t byte_size;
+      err = TRITONSERVER_MetricsFormatted(
+          metrics, TRITONSERVER_METRIC_PROMETHEUS, &base, &byte_size);
+      if (err == nullptr) {
+        evbuffer_add(req->buffer_out, base, byte_size);
+      }
+    }
+
+    TRITONSERVER_MetricsDelete(metrics);
+    RETURN_AND_RESPOND_IF_ERR(req, err);
+    TRITONSERVER_ErrorDelete(err);
+  }
+
+  evhtp_send_reply(req, EVHTP_RES_OK);
+}
+
+TRITONSERVER_Error*
+HTTPMetricsServer::Create(
+    const std::shared_ptr<TRITONSERVER_Server>& server, const int32_t port,
+    std::string address, const int thread_cnt,
+    std::unique_ptr<HTTPServer>* metrics_server)
+{
+  metrics_server->reset(
+      new HTTPMetricsServer(server, port, address, thread_cnt));
+
+  const std::string addr = address + ":" + std::to_string(port);
+  LOG_INFO << "Started Metrics Service at " << addr;
+
+  return nullptr;
+}
+
+#endif  // TRITON_ENABLE_METRICS
+
+namespace {
+
+// Allocate an evbuffer of size 'byte_size'. Return the 'evb' and
+// the 'base' address of the buffer contents.
+TRITONSERVER_Error*
+AllocEVBuffer(const size_t byte_size, evbuffer** evb, void** base)
+{
+  evbuffer* evhttp_buffer = evbuffer_new();
+  if (evhttp_buffer == nullptr) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INTERNAL,
+        "failed to create evbuffer for output tensor");
+  }
+
+  // Reserve requested space in evbuffer...
+  struct evbuffer_iovec output_iovec;
+  if (evbuffer_reserve_space(evhttp_buffer, byte_size, &output_iovec, 1) != 1) {
+    evbuffer_free(evhttp_buffer);
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INTERNAL,
+        std::string(
+            "failed to reserve " + std::to_string(byte_size) +
+            " bytes in output tensor buffer")
+            .c_str());
+  }
+
+  if (output_iovec.iov_len < byte_size) {
+    evbuffer_free(evhttp_buffer);
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INTERNAL,
+        std::string(
+            "reserved " + std::to_string(output_iovec.iov_len) +
+            " bytes in output tensor buffer, need " + std::to_string(byte_size))
+            .c_str());
+  }
+
+  output_iovec.iov_len = byte_size;
+  *base = output_iovec.iov_base;
+
+  // Immediately commit the buffer space. We are relying on evbuffer
+  // not to relocate this space. Because we request a contiguous
+  // chunk every time (above by allowing only a single entry in
+  // output_iovec), this seems to be a valid assumption.
+  if (evbuffer_commit_space(evhttp_buffer, &output_iovec, 1) != 0) {
+    *base = nullptr;
+    evbuffer_free(evhttp_buffer);
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INTERNAL,
+        "failed to commit output tensors to output buffer");
+  }
+
+  *evb = evhttp_buffer;
+
+  return nullptr;  // success
+}
+
+// Recursively adds to byte_size from multi dimensional data input
+TRITONSERVER_Error*
+JsonBytesArrayByteSize(
+    triton::common::TritonJson::Value& tensor_data, size_t* byte_size)
+{
+  *byte_size = 0;
+  // Recurse if not last dimension...
+  if (tensor_data.IsArray()) {
+    for (size_t i = 0; i < tensor_data.ArraySize(); i++) {
+      triton::common::TritonJson::Value el;
+      RETURN_IF_ERR(tensor_data.At(i, &el));
+      size_t byte_size_;
+      RETURN_IF_ERR(JsonBytesArrayByteSize(el, &byte_size_));
+      *byte_size += byte_size_;
+    }
+  } else {
+    // Serialized data size is the length of the string itself plus
+    // 4 bytes to record the string length.
+    const char* str;
+    size_t len = 0;
+    RETURN_MSG_IF_ERR(
+        tensor_data.AsString(&str, &len), "Unable to parse JSON bytes array");
+    *byte_size += len + sizeof(uint32_t);
+  }
+
+  return nullptr;  // success
+}
+
+TRITONSERVER_Error*
+ReadDataFromJsonHelper(
+    char* base, const TRITONSERVER_DataType dtype,
+    triton::common::TritonJson::Value& tensor_data, int* counter,
+    int64_t expected_cnt)
+{
+  // FIXME should move 'switch' statement outside the recursive function and
+  // pass in a read data callback once data type is confirmed.
+  // Currently 'switch' is performed on each element even through all elements
+  // have the same data type.
+
+  // Recurse on array element if not last dimension...
+  if (tensor_data.IsArray()) {
+    for (size_t i = 0; i < tensor_data.ArraySize(); i++) {
+      triton::common::TritonJson::Value el;
+      RETURN_IF_ERR(tensor_data.At(i, &el));
+      RETURN_IF_ERR(
+          ReadDataFromJsonHelper(base, dtype, el, counter, expected_cnt));
+    }
+  } else {
+    // Check if writing to 'serialized' is overrunning the expected byte_size
+    if (*counter >= expected_cnt) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INTERNAL,
+          "Shape does not match true shape of 'data' field");
+    }
+    switch (dtype) {
+      case TRITONSERVER_TYPE_BOOL: {
+        bool b = false;
+        RETURN_IF_ERR(tensor_data.AsBool(&b));
+        uint8_t* data_vec = reinterpret_cast<uint8_t*>(base);
+        // FIXME for unsigned should bounds check and raise error
+        // since otherwise the actually used value will be
+        // unexpected.
+        data_vec[*counter] = (uint8_t)(b ? 1 : 0);
+        *counter += 1;
+        break;
+      }
+      case TRITONSERVER_TYPE_UINT8: {
+        uint64_t ui = 0;
+        RETURN_IF_ERR(tensor_data.AsUInt(&ui));
+        uint8_t* data_vec = reinterpret_cast<uint8_t*>(base);
+        data_vec[*counter] = (uint8_t)ui;
+        *counter += 1;
+        break;
+      }
+      case TRITONSERVER_TYPE_UINT16: {
+        uint64_t ui = 0;
+        RETURN_IF_ERR(tensor_data.AsUInt(&ui));
+        uint16_t* data_vec = reinterpret_cast<uint16_t*>(base);
+        data_vec[*counter] = (uint16_t)ui;
+        *counter += 1;
+        break;
+      }
+      case TRITONSERVER_TYPE_UINT32: {
+        uint64_t ui = 0;
+        RETURN_IF_ERR(tensor_data.AsUInt(&ui));
+        uint32_t* data_vec = reinterpret_cast<uint32_t*>(base);
+        data_vec[*counter] = (uint32_t)ui;
+        *counter += 1;
+        break;
+      }
+      case TRITONSERVER_TYPE_UINT64: {
+        uint64_t ui = 0;
+        RETURN_IF_ERR(tensor_data.AsUInt(&ui));
+        uint64_t* data_vec = reinterpret_cast<uint64_t*>(base);
+        data_vec[*counter] = ui;
+        *counter += 1;
+        break;
+      }
+      case TRITONSERVER_TYPE_INT8: {
+        // FIXME for signed type just assigning to smaller type is
+        // "implementation defined" and so really need to bounds
+        // check.
+        int64_t si = 0;
+        RETURN_IF_ERR(tensor_data.AsInt(&si));
+        int8_t* data_vec = reinterpret_cast<int8_t*>(base);
+        data_vec[*counter] = (int8_t)si;
+        *counter += 1;
+        break;
+      }
+      case TRITONSERVER_TYPE_INT16: {
+        int64_t si = 0;
+        RETURN_IF_ERR(tensor_data.AsInt(&si));
+        int16_t* data_vec = reinterpret_cast<int16_t*>(base);
+        data_vec[*counter] = (int16_t)si;
+        *counter += 1;
+        break;
+      }
+      case TRITONSERVER_TYPE_INT32: {
+        int64_t si = 0;
+        RETURN_IF_ERR(tensor_data.AsInt(&si));
+        int32_t* data_vec = reinterpret_cast<int32_t*>(base);
+        data_vec[*counter] = (int32_t)si;
+        *counter += 1;
+        break;
+      }
+      case TRITONSERVER_TYPE_INT64: {
+        int64_t si = 0;
+        RETURN_IF_ERR(tensor_data.AsInt(&si));
+        int64_t* data_vec = reinterpret_cast<int64_t*>(base);
+        data_vec[*counter] = si;
+        *counter += 1;
+        break;
+      }
+      case TRITONSERVER_TYPE_FP32: {
+        double fp64 = 0;
+        RETURN_IF_ERR(tensor_data.AsDouble(&fp64));
+        float* data_vec = reinterpret_cast<float*>(base);
+        data_vec[*counter] = fp64;
+        *counter += 1;
+        break;
+      }
+      case TRITONSERVER_TYPE_FP64: {
+        double fp64 = 0;
+        RETURN_IF_ERR(tensor_data.AsDouble(&fp64));
+        double* data_vec = reinterpret_cast<double*>(base);
+        data_vec[*counter] = fp64;
+        *counter += 1;
+        break;
+      }
+      case TRITONSERVER_TYPE_BYTES: {
+        const char* cstr;
+        size_t len = 0;
+        RETURN_IF_ERR(tensor_data.AsString(&cstr, &len));
+        if (static_cast<int64_t>(*counter + len + sizeof(uint32_t)) >
+            expected_cnt) {
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INTERNAL,
+              "Shape does not match true shape of 'data' field");
+        }
+        memcpy(
+            base + *counter, reinterpret_cast<char*>(&len), sizeof(uint32_t));
+        std::copy(cstr, cstr + len, base + *counter + sizeof(uint32_t));
+        *counter += len + sizeof(uint32_t);
+        break;
+      }
+      default:
+        break;
+    }
+  }
+
+  return nullptr;  // success
+}
+
+TRITONSERVER_Error*
+ReadDataFromJson(
+    const char* tensor_name, triton::common::TritonJson::Value& tensor_data,
+    char* base, const TRITONSERVER_DataType dtype, int64_t expected_cnt)
+{
+  int counter = 0;
+  switch (dtype) {
+    // FP16 not supported via JSON
+    case TRITONSERVER_TYPE_FP16:
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG,
+          std::string(
+              "receiving FP16 data via JSON is not supported. Please use the "
+              "binary data format for input " +
+              std::string(tensor_name))
+              .c_str());
+
+    // BF16 not supported via JSON
+    case TRITONSERVER_TYPE_BF16:
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG,
+          std::string(
+              "receiving BF16 data via JSON is not supported. Please use the "
+              "binary data format for input " +
+              std::string(tensor_name))
+              .c_str());
+
+    case TRITONSERVER_TYPE_INVALID:
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG,
+          std::string("invalid datatype for input " + std::string(tensor_name))
+              .c_str());
+
+    default:
+      RETURN_MSG_IF_ERR(
+          ReadDataFromJsonHelper(
+              base, dtype, tensor_data, &counter, expected_cnt),
+          "Unable to parse 'data'");
+      break;
+  }
+
+  // Check if 'ReadDataFromJsonHelper' reads less than the expected byte size
+  if (counter != expected_cnt) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INTERNAL,
+        "Unable to parse 'data': Shape does not match true shape of 'data' "
+        "field");
+  }
+
+  return nullptr;
+}
+
+TRITONSERVER_Error*
+WriteDataToJsonCheck(
+    const std::string& output_name, const size_t byte_size,
+    const size_t expected_size)
+{
+  if (byte_size != expected_size) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INTERNAL,
+        std::string(
+            "output tensor shape does not match size of output for '" +
+            output_name + "'")
+            .c_str());
+  }
+
+  return nullptr;  // success
+}
+
+TRITONSERVER_Error*
+WriteDataToJson(
+    triton::common::TritonJson::Value* data_json,
+    const std::string& output_name, const TRITONSERVER_DataType datatype,
+    const void* base, const size_t byte_size, const size_t element_count)
+{
+  switch (datatype) {
+    case TRITONSERVER_TYPE_BOOL: {
+      const uint8_t* bool_base = reinterpret_cast<const uint8_t*>(base);
+      if (byte_size != (element_count * sizeof(uint8_t))) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INTERNAL,
+            std::string(
+                "output tensor shape does not match size of output for '" +
+                output_name + "'")
+                .c_str());
+      }
+      for (size_t e = 0; e < element_count; ++e) {
+        RETURN_IF_ERR(
+            data_json->AppendBool((bool_base[e] == 0) ? false : true));
+      }
+      break;
+    }
+
+    case TRITONSERVER_TYPE_UINT8: {
+      RETURN_IF_ERR(WriteDataToJsonCheck(
+          output_name, byte_size, sizeof(uint8_t) * element_count));
+      const uint8_t* cbase = reinterpret_cast<const uint8_t*>(base);
+      for (size_t e = 0; e < element_count; ++e) {
+        RETURN_IF_ERR(data_json->AppendUInt(cbase[e]));
+      }
+      break;
+    }
+
+    case TRITONSERVER_TYPE_UINT16: {
+      RETURN_IF_ERR(WriteDataToJsonCheck(
+          output_name, byte_size, sizeof(uint16_t) * element_count));
+      const uint16_t* cbase = reinterpret_cast<const uint16_t*>(base);
+      for (size_t e = 0; e < element_count; ++e) {
+        RETURN_IF_ERR(data_json->AppendUInt(cbase[e]));
+      }
+      break;
+    }
+
+    case TRITONSERVER_TYPE_UINT32: {
+      RETURN_IF_ERR(WriteDataToJsonCheck(
+          output_name, byte_size, sizeof(uint32_t) * element_count));
+      const uint32_t* cbase = reinterpret_cast<const uint32_t*>(base);
+      for (size_t e = 0; e < element_count; ++e) {
+        RETURN_IF_ERR(data_json->AppendUInt(cbase[e]));
+      }
+      break;
+    }
+
+    case TRITONSERVER_TYPE_UINT64: {
+      RETURN_IF_ERR(WriteDataToJsonCheck(
+          output_name, byte_size, sizeof(uint64_t) * element_count));
+      const uint64_t* cbase = reinterpret_cast<const uint64_t*>(base);
+      for (size_t e = 0; e < element_count; ++e) {
+        RETURN_IF_ERR(data_json->AppendUInt(cbase[e]));
+      }
+      break;
+    }
+
+    case TRITONSERVER_TYPE_INT8: {
+      RETURN_IF_ERR(WriteDataToJsonCheck(
+          output_name, byte_size, sizeof(int8_t) * element_count));
+      const int8_t* cbase = reinterpret_cast<const int8_t*>(base);
+      for (size_t e = 0; e < element_count; ++e) {
+        RETURN_IF_ERR(data_json->AppendInt(cbase[e]));
+      }
+      break;
+    }
+
+    case TRITONSERVER_TYPE_INT16: {
+      RETURN_IF_ERR(WriteDataToJsonCheck(
+          output_name, byte_size, sizeof(int16_t) * element_count));
+      const int16_t* cbase = reinterpret_cast<const int16_t*>(base);
+      for (size_t e = 0; e < element_count; ++e) {
+        RETURN_IF_ERR(data_json->AppendInt(cbase[e]));
+      }
+      break;
+    }
+
+    case TRITONSERVER_TYPE_INT32: {
+      RETURN_IF_ERR(WriteDataToJsonCheck(
+          output_name, byte_size, sizeof(int32_t) * element_count));
+      const int32_t* cbase = reinterpret_cast<const int32_t*>(base);
+      for (size_t e = 0; e < element_count; ++e) {
+        RETURN_IF_ERR(data_json->AppendInt(cbase[e]));
+      }
+      break;
+    }
+
+    case TRITONSERVER_TYPE_INT64: {
+      RETURN_IF_ERR(WriteDataToJsonCheck(
+          output_name, byte_size, sizeof(int64_t) * element_count));
+      const int64_t* cbase = reinterpret_cast<const int64_t*>(base);
+      for (size_t e = 0; e < element_count; ++e) {
+        RETURN_IF_ERR(data_json->AppendInt(cbase[e]));
+      }
+      break;
+    }
+
+    // FP16 not supported via JSON
+    case TRITONSERVER_TYPE_FP16:
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG,
+          "sending FP16 data via JSON is not supported. Please use the "
+          "binary data format for output");
+
+    // BF16 not supported via JSON
+    case TRITONSERVER_TYPE_BF16:
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG,
+          "sending BF16 data via JSON is not supported. Please use the "
+          "binary data format for output");
+
+    case TRITONSERVER_TYPE_FP32: {
+      RETURN_IF_ERR(WriteDataToJsonCheck(
+          output_name, byte_size, sizeof(float) * element_count));
+      const float* cbase = reinterpret_cast<const float*>(base);
+      for (size_t e = 0; e < element_count; ++e) {
+        RETURN_IF_ERR(data_json->AppendDouble(cbase[e]));
+      }
+      break;
+    }
+
+    case TRITONSERVER_TYPE_FP64: {
+      RETURN_IF_ERR(WriteDataToJsonCheck(
+          output_name, byte_size, sizeof(double) * element_count));
+      const double* cbase = reinterpret_cast<const double*>(base);
+      for (size_t e = 0; e < element_count; ++e) {
+        RETURN_IF_ERR(data_json->AppendDouble(cbase[e]));
+      }
+      break;
+    }
+
+    case TRITONSERVER_TYPE_BYTES: {
+      const char* cbase = reinterpret_cast<const char*>(base);
+      size_t offset = 0;
+      for (size_t e = 0; e < element_count; ++e) {
+        if ((offset + sizeof(uint32_t)) > byte_size) {
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INTERNAL,
+              std::string(
+                  "output tensor shape does not match size of output for '" +
+                  output_name + "'")
+                  .c_str());
+        }
+
+        const size_t len = *(reinterpret_cast<const uint32_t*>(cbase + offset));
+        offset += sizeof(uint32_t);
+
+        if ((offset + len) > byte_size) {
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INTERNAL,
+              std::string(
+                  "output tensor shape does not match size of output for '" +
+                  output_name + "'")
+                  .c_str());
+        }
+
+        // Can use stringref because 'base' buffer is not deleted
+        // until response is deleted and that happens after this json
+        // is serialized.
+        RETURN_IF_ERR(data_json->AppendStringRef(cbase + offset, len));
+        offset += len;
+      }
+      break;
+    }
+
+    case TRITONSERVER_TYPE_INVALID:
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG,
+          "Invalid data type for output tensor");
+  }
+
+  return nullptr;  // success
+}
+
+TRITONSERVER_Error*
+CheckBinaryInputData(
+    triton::common::TritonJson::Value& request_input, bool* is_binary,
+    size_t* byte_size)
+{
+  *is_binary = false;
+
+  triton::common::TritonJson::Value params_json;
+  if (request_input.Find("parameters", &params_json)) {
+    triton::common::TritonJson::Value binary_data_size_json;
+    if (params_json.Find("binary_data_size", &binary_data_size_json)) {
+      RETURN_MSG_IF_ERR(
+          binary_data_size_json.AsUInt(reinterpret_cast<uint64_t*>(byte_size)),
+          "Unable to parse 'binary_data_size'");
+      *is_binary = true;
+    }
+  }
+
+  return nullptr;  // success
+}
+
+TRITONSERVER_Error*
+CheckBinaryOutputData(
+    triton::common::TritonJson::Value& request_output, bool* is_binary)
+{
+  *is_binary = false;
+
+  triton::common::TritonJson::Value params_json;
+  if (request_output.Find("parameters", &params_json)) {
+    triton::common::TritonJson::Value binary_data_json;
+    if (params_json.Find("binary_data", &binary_data_json)) {
+      RETURN_MSG_IF_ERR(
+          binary_data_json.AsBool(is_binary), "Unable to parse 'binary_data'");
+    }
+  }
+
+  return nullptr;  // success
+}
+
+TRITONSERVER_Error*
+CheckSharedMemoryData(
+    triton::common::TritonJson::Value& request_input, bool* use_shm,
+    const char** shm_region, uint64_t* offset, uint64_t* byte_size)
+{
+  *use_shm = false;
+  *offset = 0;
+  *byte_size = 0;
+
+  triton::common::TritonJson::Value params_json;
+  if (request_input.Find("parameters", &params_json)) {
+    {
+      triton::common::TritonJson::Value region_json;
+      if (params_json.Find("shared_memory_region", &region_json)) {
+        *use_shm = true;
+        size_t len;
+        RETURN_MSG_IF_ERR(
+            region_json.AsString(shm_region, &len),
+            "Unable to parse 'shared_memory_region'");
+      }
+    }
+
+    {
+      triton::common::TritonJson::Value offset_json;
+      if (params_json.Find("shared_memory_offset", &offset_json)) {
+        RETURN_MSG_IF_ERR(
+            offset_json.AsUInt(offset),
+            "Unable to parse 'shared_memory_offset'");
+      }
+    }
+
+    {
+      triton::common::TritonJson::Value size_json;
+      if (params_json.Find("shared_memory_byte_size", &size_json)) {
+        RETURN_MSG_IF_ERR(
+            size_json.AsUInt(byte_size),
+            "Unable to parse 'shared_memory_byte_size'");
+      }
+    }
+  }
+
+  return nullptr;  // success
+}
+
+TRITONSERVER_Error*
+CheckClassificationOutput(
+    triton::common::TritonJson::Value& request_output, uint64_t* num_classes)
+{
+  *num_classes = 0;
+
+  triton::common::TritonJson::Value params_json;
+  if (request_output.Find("parameters", &params_json)) {
+    triton::common::TritonJson::Value cls_json;
+    if (params_json.Find("classification", &cls_json)) {
+      RETURN_MSG_IF_ERR(
+          cls_json.AsUInt(num_classes), "Unable to set 'classification'");
+    }
+  }
+
+  return nullptr;  // success
+}
+
+TRITONSERVER_Error*
+ValidateInputContentType(triton::common::TritonJson::Value& io)
+{
+  bool has_data = false;
+  bool has_binary = false;
+  bool has_shared_memory = false;
+
+  has_data = io.Find("data");
+
+  triton::common::TritonJson::Value params_json;
+  if (io.Find("parameters", &params_json)) {
+    has_binary = params_json.Find("binary_data_size");
+    has_shared_memory = params_json.Find("shared_memory_region");
+  }
+
+  int set_count = has_data + has_binary + has_shared_memory;
+  if (set_count != 1) {
+    std::string err_str =
+        "Input must set only one of the following fields: 'data', "
+        "'binary_data_size' in 'parameters', 'shared_memory_region' in "
+        "'parameters'. But";
+    if (set_count == 0) {
+      err_str += " no field is set";
+    } else {
+      err_str += " set";
+      if (has_data) {
+        err_str += " 'data'";
+      }
+      if (has_binary) {
+        err_str += " 'binary_data_size'";
+      }
+      if (has_shared_memory) {
+        err_str += " 'shared_memory_region'";
+      }
+    }
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INVALID_ARG, err_str.c_str());
+  }
+
+  return nullptr;  // success
+}
+
+TRITONSERVER_Error*
+ValidateOutputParameter(triton::common::TritonJson::Value& io)
+{
+  triton::common::TritonJson::Value params_json;
+  if (io.Find("parameters", &params_json)) {
+    const bool has_shared_memory = params_json.Find("shared_memory_region");
+    if (has_shared_memory) {
+      // Currently shared memory can't set with classification because
+      // cls results are not stored in shared memory, internally it is computed
+      // based on results in shared memory.
+      if (params_json.Find("classification")) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG,
+            "Output can't set both 'shared_memory_region' and "
+            "'classification'");
+      }
+
+      triton::common::TritonJson::Value binary_data_json;
+      if (params_json.Find("binary_data", &binary_data_json)) {
+        bool is_binary = false;
+        RETURN_MSG_IF_ERR(
+            binary_data_json.AsBool(&is_binary), "Unable to set 'binary_data'");
+        if (is_binary) {
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INVALID_ARG,
+              "Output can't set both 'shared_memory_region' and 'binary_data'");
+        }
+      }
+    }
+  }
+
+  return nullptr;  // success
+}
+
+TRITONSERVER_Error*
+EVBufferToJson(
+    triton::common::TritonJson::Value* document, evbuffer_iovec* v, int* v_idx,
+    const size_t length, int n)
+{
+  size_t offset = 0, remaining_length = length;
+  char* json_base;
+  std::vector<char> json_buffer;
+
+  // No need to memcpy when number of iovecs is 1
+  if ((n > 0) && (v[0].iov_len >= remaining_length)) {
+    json_base = static_cast<char*>(v[0].iov_base);
+    if (v[0].iov_len > remaining_length) {
+      v[0].iov_base = static_cast<void*>(json_base + remaining_length);
+      v[0].iov_len -= remaining_length;
+      remaining_length = 0;
+    } else if (v[0].iov_len == remaining_length) {
+      remaining_length = 0;
+      *v_idx += 1;
+    }
+  } else {
+    json_buffer.resize(length);
+    json_base = json_buffer.data();
+    while ((remaining_length > 0) && (*v_idx < n)) {
+      char* base = static_cast<char*>(v[*v_idx].iov_base);
+      size_t base_size;
+      if (v[*v_idx].iov_len > remaining_length) {
+        base_size = remaining_length;
+        v[*v_idx].iov_base = static_cast<void*>(base + remaining_length);
+        v[*v_idx].iov_len -= remaining_length;
+        remaining_length = 0;
+      } else {
+        base_size = v[*v_idx].iov_len;
+        remaining_length -= v[*v_idx].iov_len;
+        *v_idx += 1;
+      }
+
+      memcpy(json_base + offset, base, base_size);
+      offset += base_size;
+    }
+  }
+
+  if (remaining_length != 0) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INVALID_ARG,
+        std::string(
+            "unexpected size for request JSON, expecting " +
+            std::to_string(remaining_length) + " more bytes")
+            .c_str());
+  }
+
+  RETURN_IF_ERR(document->Parse(json_base, length));
+
+  return nullptr;  // success
+}
+
+std::string
+CompressionTypeUsed(const std::string accept_encoding)
+{
+  std::vector<std::string> encodings;
+  size_t offset = 0;
+  size_t delimeter_pos = accept_encoding.find(',');
+  while (delimeter_pos != std::string::npos) {
+    encodings.emplace_back(
+        accept_encoding.substr(offset, delimeter_pos - offset));
+    offset = delimeter_pos + 1;
+    delimeter_pos = accept_encoding.find(',', offset);
+  }
+  std::string res = "identity";
+  double weight = 0;
+  encodings.emplace_back(accept_encoding.substr(offset));
+  for (const auto& encoding : encodings) {
+    auto start_pos = encoding.find_first_not_of(' ');
+    auto weight_pos = encoding.find(";q=");
+    // Skip if the encoding is malformed
+    if ((start_pos == std::string::npos) ||
+        ((weight_pos != std::string::npos) && (start_pos >= weight_pos))) {
+      continue;
+    }
+    const std::string type =
+        (weight_pos == std::string::npos)
+            ? encoding.substr(start_pos)
+            : encoding.substr(start_pos, weight_pos - start_pos);
+    double type_weight = 1;
+    if (weight_pos != std::string::npos) {
+      try {
+        type_weight = std::stod(encoding.substr(weight_pos + 3));
+      }
+      catch (const std::invalid_argument& ia) {
+        continue;
+      }
+    }
+    if (((type == "identity") || (type == "deflate") || (type == "gzip")) &&
+        (type_weight > weight)) {
+      res = type;
+      weight = type_weight;
+    }
+  }
+  return res;
+}
+
+}  // namespace
+
+HTTPAPIServer::HTTPAPIServer(
+    const std::shared_ptr<TRITONSERVER_Server>& server,
+    triton::server::TraceManager* trace_manager,
+    const std::shared_ptr<SharedMemoryManager>& shm_manager, const int32_t port,
+    const bool reuse_port, const std::string& address,
+    const std::string& header_forward_pattern, const int thread_cnt,
+    const RestrictedFeatures& restricted_apis)
+    : HTTPServer(port, reuse_port, address, header_forward_pattern, thread_cnt),
+      server_(server), trace_manager_(trace_manager), shm_manager_(shm_manager),
+      allocator_(nullptr), server_regex_(R"(/v2(?:/health/(live|ready))?)"),
+      model_regex_(
+          R"(/v2/models/([^/]+)(?:/versions/([0-9]+))?(?:/(infer|generate|generate_stream|ready|config|stats|trace/setting))?)"),
+      modelcontrol_regex_(
+          R"(/v2/repository(?:/([^/]+))?/(index|models/([^/]+)/(load|unload)))"),
+      systemsharedmemory_regex_(
+          R"(/v2/systemsharedmemory(?:/region/([^/]+))?/(status|register|unregister))"),
+      cudasharedmemory_regex_(
+          R"(/v2/cudasharedmemory(?:/region/([^/]+))?/(status|register|unregister))"),
+      trace_regex_(R"(/v2/trace/setting)"), restricted_apis_(restricted_apis)
+{
+  // FIXME, don't cache server metadata. The http endpoint should
+  // not be deciding that server metadata will not change during
+  // execution.
+  TRITONSERVER_Message* message = nullptr;
+  server_metadata_err_ = TRITONSERVER_ServerMetadata(server_.get(), &message);
+  if (server_metadata_err_ == nullptr) {
+    const char* buffer;
+    size_t byte_size;
+    server_metadata_err_ =
+        TRITONSERVER_MessageSerializeToJson(message, &buffer, &byte_size);
+    server_metadata_ = std::string(buffer, byte_size);
+  }
+
+  if (message != nullptr) {
+    TRITONSERVER_MessageDelete(message);
+  }
+
+  FAIL_IF_ERR(
+      TRITONSERVER_ResponseAllocatorNew(
+          &allocator_, InferResponseAlloc, InferResponseFree,
+          nullptr /* start_fn */),
+      "creating response allocator");
+  FAIL_IF_ERR(
+      TRITONSERVER_ResponseAllocatorSetQueryFunction(
+          allocator_, OutputBufferQuery),
+      "setting allocator's query function");
+  FAIL_IF_ERR(
+      TRITONSERVER_ResponseAllocatorSetBufferAttributesFunction(
+          allocator_, OutputBufferAttributes),
+      "setting allocator's buffer attributes function");
+
+  ConfigureGenerateMappingSchema();
+}
+
+HTTPAPIServer::~HTTPAPIServer()
+{
+  if (server_metadata_err_ != nullptr) {
+    TRITONSERVER_ErrorDelete(server_metadata_err_);
+  }
+  LOG_TRITONSERVER_ERROR(
+      TRITONSERVER_ResponseAllocatorDelete(allocator_),
+      "deleting response allocator");
+}
+
+// Make sure to keep InferResponseAlloc, OutputBufferQuery, and
+// OutputBufferAttributes logic in sync
+TRITONSERVER_Error*
+HTTPAPIServer::InferResponseAlloc(
+    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
+    size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
+    int64_t preferred_memory_type_id, void* userp, void** buffer,
+    void** buffer_userp, TRITONSERVER_MemoryType* actual_memory_type,
+    int64_t* actual_memory_type_id)
+{
+  AllocPayload* payload = reinterpret_cast<AllocPayload*>(userp);
+  std::unordered_map<std::string, AllocPayload::OutputInfo*>& output_map =
+      payload->output_map_;
+  const AllocPayload::OutputInfo::Kind default_output_kind =
+      payload->default_output_kind_;
+
+  *buffer = nullptr;
+  *buffer_userp = nullptr;
+  *actual_memory_type = preferred_memory_type;
+  *actual_memory_type_id = preferred_memory_type_id;
+
+  AllocPayload::OutputInfo* info = nullptr;
+
+  // If we don't find an output then it means that the output wasn't
+  // explicitly specified in the request. In that case we create an
+  // OutputInfo for it that uses default setting of JSON.
+  auto pr = output_map.find(tensor_name);
+  if (pr == output_map.end()) {
+    info = new AllocPayload::OutputInfo(default_output_kind, 0);
+  } else {
+    // Take ownership of the OutputInfo object.
+    info = pr->second;
+    output_map.erase(pr);
+  }
+
+  // If the output is in shared memory...
+  if (info->kind_ == AllocPayload::OutputInfo::SHM) {
+    // ...then make sure shared memory size is at least as big as
+    // the size of the output.
+    if (byte_size > info->byte_size_) {
+      const auto info_byte_size = info->byte_size_;
+      delete info;
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INTERNAL,
+          std::string(
+              "shared memory size specified with the request for output '" +
+              std::string(tensor_name) + "' (" +
+              std::to_string(info_byte_size) + " bytes) should be at least " +
+              std::to_string(byte_size) + " bytes to hold the results")
+              .c_str());
+    }
+
+    *buffer = const_cast<void*>(info->base_);
+    *actual_memory_type = info->memory_type_;
+    *actual_memory_type_id = info->device_id_;
+    *buffer_userp = reinterpret_cast<void*>(info);
+
+    LOG_VERBOSE(1) << "HTTP: using shared-memory for '" << tensor_name
+                   << "', size: " << byte_size << ", addr: " << *buffer;
+    return nullptr;  // Success
+  }
+
+  // Don't need to do anything if no memory was requested.
+  if (byte_size > 0) {
+    // Can't allocate for any memory type other than CPU. If asked to
+    // allocate on GPU memory then force allocation on CPU instead.
+    if (*actual_memory_type != TRITONSERVER_MEMORY_CPU) {
+      LOG_VERBOSE(1) << "HTTP: unable to provide '" << tensor_name << "' in "
+                     << TRITONSERVER_MemoryTypeString(*actual_memory_type)
+                     << ", will use "
+                     << TRITONSERVER_MemoryTypeString(TRITONSERVER_MEMORY_CPU);
+      *actual_memory_type = TRITONSERVER_MEMORY_CPU;
+      *actual_memory_type_id = 0;
+    }
+
+    evbuffer* evhttp_buffer;
+    TRITONSERVER_Error* err = AllocEVBuffer(byte_size, &evhttp_buffer, buffer);
+    if (err != nullptr) {
+      delete info;
+      return err;
+    }
+
+    // Associate info with the evbuffer with this allocation.
+    // Ownership passes to 'buffer_userp' which has the same lifetime
+    // as the buffer itself.
+    info->evbuffer_ = evhttp_buffer;
+
+    LOG_VERBOSE(1) << "HTTP using buffer for: '" << tensor_name
+                   << "', size: " << byte_size << ", addr: " << *buffer;
+  }
+
+  *buffer_userp = reinterpret_cast<void*>(info);
+
+  return nullptr;  // Success
+}
+
+// Make sure to keep InferResponseAlloc, OutputBufferQuery, and
+// OutputBufferAttributes logic in sync
+TRITONSERVER_Error*
+HTTPAPIServer::OutputBufferAttributes(
+    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
+    TRITONSERVER_BufferAttributes* buffer_attributes, void* userp,
+    void* buffer_userp)
+{
+  AllocPayload::OutputInfo* info =
+      reinterpret_cast<AllocPayload::OutputInfo*>(buffer_userp);
+
+  // We only need to set the cuda ipc handle here. The rest of the buffer
+  // attributes have been properly populated by triton core.
+  if (tensor_name != nullptr) {
+    if (info->kind_ == AllocPayload::OutputInfo::SHM &&
+        info->memory_type_ == TRITONSERVER_MEMORY_GPU) {
+      RETURN_IF_ERR(TRITONSERVER_BufferAttributesSetCudaIpcHandle(
+          buffer_attributes, info->cuda_ipc_handle_));
+    }
+  }
+
+  return nullptr;  // Success
+}
+
+// Make sure to keep InferResponseAlloc and OutputBufferQuery logic in sync
+TRITONSERVER_Error*
+HTTPAPIServer::OutputBufferQuery(
+    TRITONSERVER_ResponseAllocator* allocator, void* userp,
+    const char* tensor_name, size_t* byte_size,
+    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id)
+{
+  AllocPayload* payload = reinterpret_cast<AllocPayload*>(userp);
+
+  if (tensor_name != nullptr) {
+    auto pr = payload->output_map_.find(tensor_name);
+    if ((pr != payload->output_map_.end()) &&
+        (pr->second->kind_ == AllocPayload::OutputInfo::SHM)) {
+      // The output is in shared memory so check that shared memory
+      // size is at least large enough for the output, if byte size is provided
+      if ((byte_size != nullptr) && (*byte_size > pr->second->byte_size_)) {
+        // Don't return error yet and just set to the default properties for
+        // GRPC buffer, error will be raised when allocation happens
+        *memory_type = TRITONSERVER_MEMORY_CPU;
+        *memory_type_id = 0;
+      } else {
+        *memory_type = pr->second->memory_type_;
+        *memory_type_id = pr->second->device_id_;
+      }
+      return nullptr;  // Success
+    }
+  }
+
+  // Not using shared memory so a evhtp buffer will be used,
+  // and the type will be CPU.
+  *memory_type = TRITONSERVER_MEMORY_CPU;
+  *memory_type_id = 0;
+  return nullptr;  // Success
+}
+
+TRITONSERVER_Error*
+HTTPAPIServer::InferResponseFree(
+    TRITONSERVER_ResponseAllocator* allocator, void* buffer, void* buffer_userp,
+    size_t byte_size, TRITONSERVER_MemoryType memory_type,
+    int64_t memory_type_id)
+{
+  LOG_VERBOSE(1) << "HTTP release: "
+                 << "size " << byte_size << ", addr " << buffer;
+
+  // 'buffer' is backed by shared memory or evbuffer so we don't
+  // delete directly.
+  auto info = reinterpret_cast<AllocPayload::OutputInfo*>(buffer_userp);
+  delete info;
+
+  return nullptr;  // Success
+}
+
+void
+HTTPAPIServer::HandleServerHealth(evhtp_request_t* req, const std::string& kind)
+{
+  RETURN_AND_RESPOND_IF_RESTRICTED(
+      req, RestrictedCategory::HEALTH, restricted_apis_);
+
+  if (req->method != htp_method_GET) {
+    RETURN_AND_RESPOND_WITH_ERR(
+        req, EVHTP_RES_METHNALLOWED, "Method Not Allowed");
+  }
+
+  TRITONSERVER_Error* err = nullptr;
+  bool ready = false;
+
+  if (kind == "live") {
+    err = TRITONSERVER_ServerIsLive(server_.get(), &ready);
+  } else {
+    err = TRITONSERVER_ServerIsReady(server_.get(), &ready);
+  }
+
+  RETURN_AND_RESPOND_IF_ERR(req, err);
+  evhtp_send_reply(req, ready ? EVHTP_RES_OK : EVHTP_RES_BADREQ);
+}
+
+void
+HTTPAPIServer::HandleRepositoryIndex(
+    evhtp_request_t* req, const std::string& repository_name)
+{
+  RETURN_AND_RESPOND_IF_RESTRICTED(
+      req, RestrictedCategory::MODEL_REPOSITORY, restricted_apis_);
+
+  AddContentTypeHeader(req, "application/json");
+  if (req->method != htp_method_POST) {
+    RETURN_AND_RESPOND_WITH_ERR(
+        req, EVHTP_RES_METHNALLOWED, "Method Not Allowed");
+  }
+
+  TRITONSERVER_Error* err = nullptr;
+
+  struct evbuffer_iovec* v = nullptr;
+  int v_idx = 0;
+  int n = evbuffer_peek(req->buffer_in, -1, NULL, NULL, 0);
+  if (n > 0) {
+    v = static_cast<struct evbuffer_iovec*>(
+        alloca(sizeof(struct evbuffer_iovec) * n));
+    if (evbuffer_peek(req->buffer_in, -1, NULL, v, n) != n) {
+      err = TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INTERNAL,
+          "unexpected error getting registry index request body");
+    }
+  }
+
+  bool ready = false;
+
+  if (err == nullptr) {
+    // If no request json then just use all default values.
+    size_t buffer_len = evbuffer_get_length(req->buffer_in);
+    if (buffer_len > 0) {
+      triton::common::TritonJson::Value index_request;
+      err = EVBufferToJson(&index_request, v, &v_idx, buffer_len, n);
+      if (err == nullptr) {
+        triton::common::TritonJson::Value ready_json;
+        if (index_request.Find("ready", &ready_json)) {
+          err = ready_json.AsBool(&ready);
+        }
+      }
+    }
+  }
+
+  if (err == nullptr) {
+    uint32_t flags = 0;
+    if (ready) {
+      flags |= TRITONSERVER_INDEX_FLAG_READY;
+    }
+
+    TRITONSERVER_Message* message = nullptr;
+    err = TRITONSERVER_ServerModelIndex(server_.get(), flags, &message);
+    if (err == nullptr) {
+      const char* buffer;
+      size_t byte_size;
+      err = TRITONSERVER_MessageSerializeToJson(message, &buffer, &byte_size);
+      if (err == nullptr) {
+        evbuffer_add(req->buffer_out, buffer, byte_size);
+        evhtp_send_reply(req, EVHTP_RES_OK);
+      }
+
+      TRITONSERVER_MessageDelete(message);
+    }
+  }
+
+  RETURN_AND_RESPOND_IF_ERR(req, err);
+}
+
+void
+HTTPAPIServer::HandleRepositoryControl(
+    evhtp_request_t* req, const std::string& repository_name,
+    const std::string& model_name, const std::string& action)
+{
+  RETURN_AND_RESPOND_IF_RESTRICTED(
+      req, RestrictedCategory::MODEL_REPOSITORY, restricted_apis_);
+
+  AddContentTypeHeader(req, "application/json");
+  if (req->method != htp_method_POST) {
+    RETURN_AND_RESPOND_WITH_ERR(
+        req, EVHTP_RES_METHNALLOWED, "Method Not Allowed");
+  }
+
+  TRITONSERVER_Error* err = nullptr;
+  if (!repository_name.empty()) {
+    err = TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_UNSUPPORTED,
+        "'repository_name' specification is not supported");
+  } else {
+    if (action == "load") {
+      struct evbuffer_iovec* v = nullptr;
+      int v_idx = 0;
+      int n = evbuffer_peek(req->buffer_in, -1, NULL, NULL, 0);
+      if (n > 0) {
+        v = static_cast<struct evbuffer_iovec*>(
+            alloca(sizeof(struct evbuffer_iovec) * n));
+        if (evbuffer_peek(req->buffer_in, -1, NULL, v, n) != n) {
+          RETURN_AND_RESPOND_IF_ERR(
+              req, TRITONSERVER_ErrorNew(
+                       TRITONSERVER_ERROR_INTERNAL,
+                       "unexpected error getting load model request buffers"));
+        }
+      }
+      static auto param_deleter =
+          [](std::vector<TRITONSERVER_Parameter*>* params) {
+            if (params != nullptr) {
+              for (auto& param : *params) {
+                TRITONSERVER_ParameterDelete(param);
+              }
+              delete params;
+            }
+          };
+      std::unique_ptr<
+          std::vector<TRITONSERVER_Parameter*>, decltype(param_deleter)>
+          params(new std::vector<TRITONSERVER_Parameter*>(), param_deleter);
+      // local variables to store the decoded file content, the data must
+      // be valid until TRITONSERVER_ServerLoadModelWithParameters returns.
+      std::list<std::vector<char>> binary_files;
+      // WAR for the const-ness check
+      std::vector<const TRITONSERVER_Parameter*> const_params;
+      size_t buffer_len = evbuffer_get_length(req->buffer_in);
+      if (buffer_len > 0) {
+        triton::common::TritonJson::Value request;
+        RETURN_AND_RESPOND_IF_ERR(
+            req, EVBufferToJson(&request, v, &v_idx, buffer_len, n));
+
+        // Parse request body for parameters
+        triton::common::TritonJson::Value param_json;
+        if (request.Find("parameters", &param_json)) {
+          // Iterate over each member in 'param_json'
+          std::vector<std::string> members;
+          RETURN_AND_RESPOND_IF_ERR(req, param_json.Members(&members));
+          for (const auto& m : members) {
+            const char* param_str = nullptr;
+            size_t param_len = 0;
+            RETURN_AND_RESPOND_IF_ERR(
+                req,
+                param_json.MemberAsString(m.c_str(), &param_str, &param_len));
+
+            TRITONSERVER_Parameter* param = nullptr;
+            if (m == "config") {
+              param = TRITONSERVER_ParameterNew(
+                  m.c_str(), TRITONSERVER_PARAMETER_STRING, param_str);
+            } else if (m.rfind("file:", 0) == 0) {
+              // Decode base64
+              base64_decodestate s;
+              base64_init_decodestate(&s);
+
+              // The decoded can not be larger than the input...
+              binary_files.emplace_back(std::vector<char>(param_len + 1));
+              size_t decoded_size = base64_decode_block(
+                  param_str, param_len, binary_files.back().data(), &s);
+              param = TRITONSERVER_ParameterBytesNew(
+                  m.c_str(), binary_files.back().data(), decoded_size);
+            }
+
+            if (param != nullptr) {
+              params->emplace_back(param);
+              const_params.emplace_back(param);
+            } else {
+              RETURN_AND_RESPOND_IF_ERR(
+                  req, TRITONSERVER_ErrorNew(
+                           TRITONSERVER_ERROR_INTERNAL,
+                           "unexpected error on creating Triton parameter"));
+            }
+          }
+        }
+      }
+      RETURN_AND_RESPOND_IF_ERR(
+          req, TRITONSERVER_ServerLoadModelWithParameters(
+                   server_.get(), model_name.c_str(), const_params.data(),
+                   const_params.size()));
+    } else if (action == "unload") {
+      // Check if the dependent models should be removed
+      bool unload_dependents = false;
+      {
+        struct evbuffer_iovec* v = nullptr;
+        int v_idx = 0;
+        int n = evbuffer_peek(req->buffer_in, -1, NULL, NULL, 0);
+        if (n > 0) {
+          v = static_cast<struct evbuffer_iovec*>(
+              alloca(sizeof(struct evbuffer_iovec) * n));
+          if (evbuffer_peek(req->buffer_in, -1, NULL, v, n) != n) {
+            err = TRITONSERVER_ErrorNew(
+                TRITONSERVER_ERROR_INTERNAL,
+                "unexpected error getting model control request body");
+          }
+        }
+
+        size_t buffer_len = evbuffer_get_length(req->buffer_in);
+        if (buffer_len > 0) {
+          triton::common::TritonJson::Value control_request;
+          err = EVBufferToJson(&control_request, v, &v_idx, buffer_len, n);
+          if (err == nullptr) {
+            triton::common::TritonJson::Value params_json;
+            if (control_request.Find("parameters", &params_json)) {
+              triton::common::TritonJson::Value ud_json;
+              if (params_json.Find("unload_dependents", &ud_json)) {
+                auto parse_err = ud_json.AsBool(&unload_dependents);
+                if (parse_err != nullptr) {
+                  err = TRITONSERVER_ErrorNew(
+                      TRITONSERVER_ErrorCode(parse_err),
+                      (std::string("Unable to parse 'unload_dependents': ") +
+                       TRITONSERVER_ErrorMessage(parse_err))
+                          .c_str());
+                  TRITONSERVER_ErrorDelete(parse_err);
+                }
+              }
+            }
+          }
+        }
+      }
+      if (unload_dependents) {
+        err = TRITONSERVER_ServerUnloadModelAndDependents(
+            server_.get(), model_name.c_str());
+      } else {
+        err = TRITONSERVER_ServerUnloadModel(server_.get(), model_name.c_str());
+      }
+    }
+  }
+
+  RETURN_AND_RESPOND_IF_ERR(req, err);
+  evhtp_send_reply(req, EVHTP_RES_OK);
+}
+
+void
+HTTPAPIServer::HandleModelReady(
+    evhtp_request_t* req, const std::string& model_name,
+    const std::string& model_version_str)
+{
+  RETURN_AND_RESPOND_IF_RESTRICTED(
+      req, RestrictedCategory::HEALTH, restricted_apis_);
+
+  if (req->method != htp_method_GET) {
+    RETURN_AND_RESPOND_WITH_ERR(
+        req, EVHTP_RES_METHNALLOWED, "Method Not Allowed");
+  }
+
+  if (model_name.empty()) {
+    RETURN_AND_RESPOND_WITH_ERR(
+        req, EVHTP_RES_BADREQ, "Missing model name in ModelReady request");
+  }
+
+  bool ready = false;
+
+  int64_t requested_model_version;
+  auto err =
+      GetModelVersionFromString(model_version_str, &requested_model_version);
+  if (err == nullptr) {
+    err = TRITONSERVER_ServerModelIsReady(
+        server_.get(), model_name.c_str(), requested_model_version, &ready);
+  }
+
+  if (!ready && !err) {
+    RETURN_AND_RESPOND_WITH_ERR(
+        req, EVHTP_RES_BADREQ, "Model version not ready");
+  }
+
+  RETURN_AND_RESPOND_IF_ERR(req, err);
+  evhtp_send_reply(req, EVHTP_RES_OK);
+}
+
+void
+HTTPAPIServer::HandleModelMetadata(
+    evhtp_request_t* req, const std::string& model_name,
+    const std::string& model_version_str)
+{
+  RETURN_AND_RESPOND_IF_RESTRICTED(
+      req, RestrictedCategory::METADATA, restricted_apis_);
+
+  AddContentTypeHeader(req, "application/json");
+
+  if (req->method != htp_method_GET) {
+    RETURN_AND_RESPOND_WITH_ERR(
+        req, EVHTP_RES_METHNALLOWED, "Method Not Allowed");
+  }
+
+  if (model_name.empty()) {
+    RETURN_AND_RESPOND_WITH_ERR(
+        req, EVHTP_RES_BADREQ, "Missing model name in ModelMetadata request");
+  }
+
+  TRITONSERVER_Message* message = nullptr;
+
+  int64_t requested_model_version;
+  auto err =
+      GetModelVersionFromString(model_version_str, &requested_model_version);
+  if (err == nullptr) {
+    err = TRITONSERVER_ServerModelMetadata(
+        server_.get(), model_name.c_str(), requested_model_version, &message);
+    if (err == nullptr) {
+      const char* buffer;
+      size_t byte_size;
+      err = TRITONSERVER_MessageSerializeToJson(message, &buffer, &byte_size);
+      if (err == nullptr) {
+        evbuffer_add(req->buffer_out, buffer, byte_size);
+        evhtp_send_reply(req, EVHTP_RES_OK);
+      }
+      TRITONSERVER_MessageDelete(message);
+    }
+  }
+
+  RETURN_AND_RESPOND_IF_ERR(req, err);
+}
+
+TRITONSERVER_Error*
+HTTPAPIServer::GetModelConfig(
+    const std::string& model_name, int64_t requested_model_version,
+    std::string* config_json)
+{
+  if (model_name.empty()) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INVALID_ARG,
+        "Missing model name in ModelConfig request");
+  }
+
+  TRITONSERVER_Message* message = nullptr;
+  RETURN_IF_ERR(TRITONSERVER_ServerModelConfig(
+      server_.get(), model_name.c_str(), requested_model_version,
+      1 /* config_version */, &message));
+  const char* buffer;
+  size_t byte_size;
+  TRITONSERVER_Error* err = nullptr;
+  err = TRITONSERVER_MessageSerializeToJson(message, &buffer, &byte_size);
+  if (err == nullptr) {
+    // Copy config into string for simplicity
+    *config_json = std::string(buffer, byte_size);
+  }
+  if (message) {
+    TRITONSERVER_MessageDelete(message);
+  }
+
+  return err;
+}
+
+void
+HTTPAPIServer::HandleModelConfig(
+    evhtp_request_t* req, const std::string& model_name,
+    const std::string& model_version_str)
+{
+  RETURN_AND_RESPOND_IF_RESTRICTED(
+      req, RestrictedCategory::MODEL_CONFIG, restricted_apis_);
+
+  AddContentTypeHeader(req, "application/json");
+  if (req->method != htp_method_GET) {
+    RETURN_AND_RESPOND_WITH_ERR(
+        req, EVHTP_RES_METHNALLOWED, "Method Not Allowed");
+  }
+
+  int64_t requested_model_version;
+  RETURN_AND_RESPOND_IF_ERR(
+      req,
+      GetModelVersionFromString(model_version_str, &requested_model_version));
+
+  std::string config_json_str = "";
+  RETURN_AND_RESPOND_IF_ERR(
+      req,
+      GetModelConfig(model_name, requested_model_version, &config_json_str));
+  evbuffer_add(
+      req->buffer_out, config_json_str.c_str(), config_json_str.size());
+  evhtp_send_reply(req, EVHTP_RES_OK);
+}
+
+void
+HTTPAPIServer::HandleModelStats(
+    evhtp_request_t* req, const std::string& model_name,
+    const std::string& model_version_str)
+{
+  RETURN_AND_RESPOND_IF_RESTRICTED(
+      req, RestrictedCategory::STATISTICS, restricted_apis_);
+
+  AddContentTypeHeader(req, "application/json");
+  if (req->method != htp_method_GET) {
+    RETURN_AND_RESPOND_WITH_ERR(
+        req, EVHTP_RES_METHNALLOWED, "Method Not Allowed");
+  }
+
+#ifdef TRITON_ENABLE_STATS
+  TRITONSERVER_Message* model_stats_message = nullptr;
+
+  int64_t requested_model_version;
+  auto err =
+      GetModelVersionFromString(model_version_str, &requested_model_version);
+  if (err == nullptr) {
+    err = TRITONSERVER_ServerModelStatistics(
+        server_.get(), model_name.c_str(), requested_model_version,
+        &model_stats_message);
+    if (err == nullptr) {
+      const char* buffer;
+      size_t byte_size;
+      err = TRITONSERVER_MessageSerializeToJson(
+          model_stats_message, &buffer, &byte_size);
+      if (err == nullptr) {
+        // Add the statistics to the response
+        evbuffer_add(req->buffer_out, buffer, byte_size);
+        evhtp_send_reply(req, EVHTP_RES_OK);
+      }
+      TRITONSERVER_MessageDelete(model_stats_message);
+    }
+  }
+
+#else
+  auto err = TRITONSERVER_ErrorNew(
+      TRITONSERVER_ERROR_UNAVAILABLE,
+      "the server does not support model statistics");
+#endif
+
+  RETURN_AND_RESPOND_IF_ERR(req, err);
+}
+
+void
+HTTPAPIServer::HandleTrace(evhtp_request_t* req, const std::string& model_name)
+{
+  RETURN_AND_RESPOND_IF_RESTRICTED(
+      req, RestrictedCategory::TRACE, restricted_apis_);
+
+  AddContentTypeHeader(req, "application/json");
+  if ((req->method != htp_method_GET) && (req->method != htp_method_POST)) {
+    RETURN_AND_RESPOND_WITH_ERR(
+        req, EVHTP_RES_METHNALLOWED, "Method Not Allowed");
+    return;
+  }
+
+#ifdef TRITON_ENABLE_TRACING
+  TRITONSERVER_InferenceTraceLevel level = TRITONSERVER_TRACE_LEVEL_DISABLED;
+  uint32_t rate;
+  int32_t count;
+  uint32_t log_frequency;
+  std::string filepath;
+  InferenceTraceMode trace_mode;
+  TraceConfigMap config_map;
+
+  if (!model_name.empty()) {
+    bool ready = false;
+    RETURN_AND_RESPOND_IF_ERR(
+        req,
+        TRITONSERVER_ServerModelIsReady(
+            server_.get(), model_name.c_str(), -1 /* model version */, &ready));
+    if (!ready) {
+      RETURN_AND_RESPOND_IF_ERR(
+          req, TRITONSERVER_ErrorNew(
+                   TRITONSERVER_ERROR_INVALID_ARG,
+                   ("Request for unknown model : " + model_name).c_str()));
+    }
+  }
+
+  // Perform trace setting update if requested
+  if (req->method == htp_method_POST) {
+    struct evbuffer_iovec* v = nullptr;
+    int v_idx = 0;
+    int n = evbuffer_peek(req->buffer_in, -1, NULL, NULL, 0);
+    if (n > 0) {
+      v = static_cast<struct evbuffer_iovec*>(
+          alloca(sizeof(struct evbuffer_iovec) * n));
+      if (evbuffer_peek(req->buffer_in, -1, NULL, v, n) != n) {
+        RETURN_AND_RESPOND_IF_ERR(
+            req, TRITONSERVER_ErrorNew(
+                     TRITONSERVER_ERROR_INTERNAL,
+                     "unexpected error getting trace request buffers"));
+      }
+    }
+
+    triton::common::TritonJson::Value request;
+    size_t buffer_len = evbuffer_get_length(req->buffer_in);
+    RETURN_AND_RESPOND_IF_ERR(
+        req, EVBufferToJson(&request, v, &v_idx, buffer_len, n));
+
+    TraceManager::NewSetting new_setting;
+
+    triton::common::TritonJson::Value setting_json;
+    if (request.Find("trace_file", &setting_json)) {
+      if (setting_json.IsNull()) {
+        new_setting.clear_filepath_ = true;
+      } else {
+        RETURN_AND_RESPOND_IF_ERR(req, setting_json.AsString(&filepath));
+        new_setting.filepath_ = &filepath;
+      }
+    }
+    if (request.Find("trace_level", &setting_json)) {
+      if (setting_json.IsNull()) {
+        new_setting.clear_level_ = true;
+      } else {
+        triton::common::TritonJson::Value level_array;
+        RETURN_AND_RESPOND_IF_ERR(
+            req, request.MemberAsArray("trace_level", &level_array));
+        for (size_t i = 0; i < level_array.ArraySize(); ++i) {
+          std::string level_str;
+          RETURN_AND_RESPOND_IF_ERR(
+              req, level_array.IndexAsString(i, &level_str));
+          if (level_str == "OFF") {
+            if (level_array.ArraySize() == 1) {
+              level = TRITONSERVER_TRACE_LEVEL_DISABLED;
+              new_setting.level_ = &level;
+            } else {
+              RETURN_AND_RESPOND_IF_ERR(
+                  req, TRITONSERVER_ErrorNew(
+                           TRITONSERVER_ERROR_INVALID_ARG,
+                           "Expect only one trace level 'OFF' is specified"));
+            }
+          } else if (level_str == "TIMESTAMPS") {
+            level = static_cast<TRITONSERVER_InferenceTraceLevel>(
+                level | TRITONSERVER_TRACE_LEVEL_TIMESTAMPS);
+            new_setting.level_ = &level;
+          } else if (level_str == "TENSORS") {
+            level = static_cast<TRITONSERVER_InferenceTraceLevel>(
+                level | TRITONSERVER_TRACE_LEVEL_TENSORS);
+            new_setting.level_ = &level;
+          }
+        }
+      }
+    }
+    if (request.Find("trace_rate", &setting_json)) {
+      if (setting_json.IsNull()) {
+        new_setting.clear_rate_ = true;
+      } else {
+        std::string rate_str;
+        RETURN_AND_RESPOND_IF_ERR(req, setting_json.AsString(&rate_str));
+        try {
+          rate = std::stoi(rate_str);
+          new_setting.rate_ = &rate;
+        }
+        catch (const std::invalid_argument& ia) {
+          RETURN_AND_RESPOND_IF_ERR(
+              req, TRITONSERVER_ErrorNew(
+                       TRITONSERVER_ERROR_INVALID_ARG,
+                       (std::string("Unable to parse 'trace_rate', got: ") +
+                        rate_str)
+                           .c_str()));
+        }
+        catch (const std::out_of_range& oor) {
+          RETURN_AND_RESPOND_IF_ERR(
+              req,
+              TRITONSERVER_ErrorNew(
+                  TRITONSERVER_ERROR_INVALID_ARG,
+                  (std::string("Unable to parse 'trace_rate', value is out of "
+                               "range [ ") +
+                   std::to_string(std::numeric_limits<std::uint32_t>::min()) +
+                   ", " +
+                   std::to_string(std::numeric_limits<std::uint32_t>::max()) +
+                   " ], got: " + rate_str)
+                      .c_str()));
+        }
+      }
+    }
+    if (request.Find("trace_count", &setting_json)) {
+      if (setting_json.IsNull()) {
+        new_setting.clear_count_ = true;
+      } else {
+        std::string count_str;
+        RETURN_AND_RESPOND_IF_ERR(req, setting_json.AsString(&count_str));
+        try {
+          count = std::stoi(count_str);
+          if (count < TraceManager::MIN_TRACE_COUNT_VALUE) {
+            RETURN_AND_RESPOND_IF_ERR(
+                req, TRITONSERVER_ErrorNew(
+                         TRITONSERVER_ERROR_INVALID_ARG,
+                         (std::string("Unable to parse 'trace_count'.") +
+                          " Expecting value >= " +
+                          std::to_string(TraceManager::MIN_TRACE_COUNT_VALUE) +
+                          ", got:" + count_str)
+                             .c_str()));
+          }
+          new_setting.count_ = &count;
+        }
+        catch (const std::invalid_argument& ia) {
+          RETURN_AND_RESPOND_IF_ERR(
+              req, TRITONSERVER_ErrorNew(
+                       TRITONSERVER_ERROR_INVALID_ARG,
+                       (std::string("Unable to parse 'trace_count', got: ") +
+                        count_str)
+                           .c_str()));
+        }
+        catch (const std::out_of_range& oor) {
+          RETURN_AND_RESPOND_IF_ERR(
+              req,
+              TRITONSERVER_ErrorNew(
+                  TRITONSERVER_ERROR_INVALID_ARG,
+                  (std::string("Unable to parse 'trace_count', value is out of "
+                               "range [ ") +
+                   std::to_string(TraceManager::MIN_TRACE_COUNT_VALUE) + ", " +
+                   std::to_string(std::numeric_limits<std::int32_t>::max()) +
+                   " ], got: " + count_str)
+                      .c_str()));
+        }
+      }
+    }
+    if (request.Find("log_frequency", &setting_json)) {
+      if (setting_json.IsNull()) {
+        new_setting.clear_log_frequency_ = true;
+      } else {
+        std::string frequency_str;
+        RETURN_AND_RESPOND_IF_ERR(req, setting_json.AsString(&frequency_str));
+        try {
+          log_frequency = std::stoi(frequency_str);
+          new_setting.log_frequency_ = &log_frequency;
+        }
+        catch (const std::invalid_argument& ia) {
+          RETURN_AND_RESPOND_IF_ERR(
+              req, TRITONSERVER_ErrorNew(
+                       TRITONSERVER_ERROR_INVALID_ARG,
+                       (std::string("Unable to parse 'log_frequency', got: ") +
+                        frequency_str)
+                           .c_str()));
+        }
+        catch (const std::out_of_range& oor) {
+          RETURN_AND_RESPOND_IF_ERR(
+              req,
+              TRITONSERVER_ErrorNew(
+                  TRITONSERVER_ERROR_INVALID_ARG,
+                  (std::string(
+                       "Unable to parse 'log_frequency', value is out of "
+                       "range [ ") +
+                   std::to_string(std::numeric_limits<std::uint32_t>::min()) +
+                   ", " +
+                   std::to_string(std::numeric_limits<std::uint32_t>::max()) +
+                   " ], got: " + frequency_str)
+                      .c_str()));
+        }
+      }
+    }
+    RETURN_AND_RESPOND_IF_ERR(
+        req, trace_manager_->UpdateTraceSetting(model_name, new_setting));
+  }
+
+  // Get current trace setting, this is needed even if the setting
+  // has been updated above as some values may not be provided in the request.
+  trace_manager_->GetTraceSetting(
+      model_name, &level, &rate, &count, &log_frequency, &filepath, &trace_mode,
+      &config_map);
+  triton::common::TritonJson::Value trace_response(
+      triton::common::TritonJson::ValueType::OBJECT);
+  // level
+  {
+    triton::common::TritonJson::Value level_array(
+        triton::common::TritonJson::ValueType::ARRAY);
+    if (level == TRITONSERVER_TRACE_LEVEL_DISABLED) {
+      RETURN_AND_RESPOND_IF_ERR(req, level_array.AppendString("OFF"));
+    } else {
+      if (level & TRITONSERVER_TRACE_LEVEL_TIMESTAMPS) {
+        RETURN_AND_RESPOND_IF_ERR(req, level_array.AppendString("TIMESTAMPS"));
+      }
+      if (level & TRITONSERVER_TRACE_LEVEL_TENSORS) {
+        RETURN_AND_RESPOND_IF_ERR(req, level_array.AppendString("TENSORS"));
+      }
+    }
+    RETURN_AND_RESPOND_IF_ERR(
+        req, trace_response.Add("trace_level", std::move(level_array)));
+  }
+  RETURN_AND_RESPOND_IF_ERR(
+      req, trace_response.AddString("trace_rate", std::to_string(rate)));
+  RETURN_AND_RESPOND_IF_ERR(
+      req, trace_response.AddString("trace_count", std::to_string(count)));
+  if (trace_mode == TRACE_MODE_TRITON) {
+    RETURN_AND_RESPOND_IF_ERR(
+        req, trace_response.AddString(
+                 "log_frequency", std::to_string(log_frequency)));
+    RETURN_AND_RESPOND_IF_ERR(
+        req, trace_response.AddString("trace_file", filepath));
+  }
+  RETURN_AND_RESPOND_IF_ERR(
+      req,
+      trace_response.AddString(
+          "trace_mode", trace_manager_->InferenceTraceModeString(trace_mode)));
+  auto mode_key = std::to_string(trace_mode);
+  auto trace_options_it = config_map.find(mode_key);
+  if (trace_options_it != config_map.end()) {
+    for (const auto& [key, value] : trace_options_it->second) {
+      if ((key == "file") || (key == "log-frequency")) {
+        continue;
+      }
+      std::string valueAsString;
+      if (std::holds_alternative<std::string>(value)) {
+        valueAsString = std::get<std::string>(value);
+      } else if (std::holds_alternative<int>(value)) {
+        valueAsString = std::to_string(std::get<int>(value));
+      } else if (std::holds_alternative<uint32_t>(value)) {
+        valueAsString = std::to_string(std::get<uint32_t>(value));
+      }
+      RETURN_AND_RESPOND_IF_ERR(
+          req, trace_response.AddString(key.c_str(), valueAsString));
+    }
+  }
+  triton::common::TritonJson::WriteBuffer buffer;
+  RETURN_AND_RESPOND_IF_ERR(req, trace_response.Write(&buffer));
+  evbuffer_add(req->buffer_out, buffer.Base(), buffer.Size());
+  evhtp_send_reply(req, EVHTP_RES_OK);
+#else
+  RETURN_AND_RESPOND_IF_ERR(
+      req, TRITONSERVER_ErrorNew(
+               TRITONSERVER_ERROR_UNAVAILABLE,
+               "the server does not support tracing"));
+#endif
+}
+
+void
+HTTPAPIServer::HandleLogging(evhtp_request_t* req)
+{
+  RETURN_AND_RESPOND_IF_RESTRICTED(
+      req, RestrictedCategory::LOGGING, restricted_apis_);
+
+  AddContentTypeHeader(req, "application/json");
+  if ((req->method != htp_method_GET) && (req->method != htp_method_POST)) {
+    RETURN_AND_RESPOND_WITH_ERR(
+        req, EVHTP_RES_METHNALLOWED, "Method Not Allowed");
+  }
+
+#ifdef TRITON_ENABLE_LOGGING
+  // Perform log setting update if requested
+  if (req->method == htp_method_POST) {
+    struct evbuffer_iovec* v = nullptr;
+    int v_idx = 0;
+    int n = evbuffer_peek(req->buffer_in, -1, NULL, NULL, 0);
+    if (n > 0) {
+      v = static_cast<struct evbuffer_iovec*>(
+          alloca(sizeof(struct evbuffer_iovec) * n));
+      if (evbuffer_peek(req->buffer_in, -1, NULL, v, n) != n) {
+        RETURN_AND_RESPOND_IF_ERR(
+            req,
+            TRITONSERVER_ErrorNew(
+                TRITONSERVER_ERROR_INTERNAL,
+                "unexpected error getting dynamic logging request buffers"));
+      }
+    }
+    triton::common::TritonJson::Value request;
+    size_t buffer_len = evbuffer_get_length(req->buffer_in);
+    RETURN_AND_RESPOND_IF_ERR(
+        req, EVBufferToJson(&request, v, &v_idx, buffer_len, n));
+    // Server and Core repos do not have the same Logger object
+    // Each update must be applied to both server and core repo versions
+    triton::common::TritonJson::Value setting_json;
+    if (request.Find("log_file", &setting_json)) {
+      if (!setting_json.IsNull()) {
+        RETURN_AND_RESPOND_IF_ERR(
+            req, TRITONSERVER_ErrorNew(
+                     TRITONSERVER_ERROR_UNSUPPORTED,
+                     "log file location can not be updated through network "
+                     "protocol"));
+      }
+    }
+    if (request.Find("log_info", &setting_json)) {
+      if (!setting_json.IsNull()) {
+        bool log_info_status;
+        RETURN_AND_RESPOND_IF_ERR(req, setting_json.AsBool(&log_info_status));
+        LOG_ENABLE_INFO(log_info_status);
+        TRITONSERVER_ServerOptionsSetLogInfo(nullptr, log_info_status);
+      }
+    }
+    if (request.Find("log_warning", &setting_json)) {
+      if (!setting_json.IsNull()) {
+        bool log_warn_status;
+        RETURN_AND_RESPOND_IF_ERR(req, setting_json.AsBool(&log_warn_status));
+        LOG_ENABLE_WARNING(log_warn_status);
+        TRITONSERVER_ServerOptionsSetLogWarn(nullptr, log_warn_status);
+      }
+    }
+    if (request.Find("log_error", &setting_json)) {
+      if (!setting_json.IsNull()) {
+        bool log_error_status;
+        RETURN_AND_RESPOND_IF_ERR(req, setting_json.AsBool(&log_error_status));
+        LOG_ENABLE_ERROR(log_error_status);
+        TRITONSERVER_ServerOptionsSetLogError(nullptr, log_error_status);
+      }
+    }
+    if (request.Find("log_verbose_level", &setting_json)) {
+      if (!setting_json.IsNull()) {
+        uint64_t verbose_level;
+        RETURN_AND_RESPOND_IF_ERR(req, setting_json.AsUInt(&verbose_level));
+        LOG_SET_VERBOSE(static_cast<int32_t>(verbose_level));
+        TRITONSERVER_ServerOptionsSetLogVerbose(
+            nullptr, static_cast<int32_t>(verbose_level));
+      }
+    }
+    if (request.Find("log_format", &setting_json)) {
+      if (!setting_json.IsNull()) {
+        std::string log_format_parse;
+        RETURN_AND_RESPOND_IF_ERR(
+            req, setting_json.AsString(&log_format_parse));
+        triton::common::Logger::Format log_format_final =
+            triton::common::Logger::Format::kDEFAULT;
+        if (log_format_parse == "ISO8601") {
+          log_format_final = triton::common::Logger::Format::kISO8601;
+        } else if (log_format_parse != "default") {
+          // Returns from function
+          RETURN_AND_RESPOND_IF_ERR(
+              req, TRITONSERVER_ErrorNew(
+                       TRITONSERVER_ERROR_UNAVAILABLE,
+                       ("invalid argument for --log_format, got: " +
+                        log_format_parse)
+                           .c_str()));
+        }
+        LOG_SET_FORMAT(log_format_final);
+        switch (log_format_final) {
+          case triton::common::Logger::Format::kDEFAULT:
+            TRITONSERVER_ServerOptionsSetLogFormat(
+                nullptr, TRITONSERVER_LOG_DEFAULT);
+            break;
+          case triton::common::Logger::Format::kISO8601:
+            TRITONSERVER_ServerOptionsSetLogFormat(
+                nullptr, TRITONSERVER_LOG_ISO8601);
+            break;
+        }
+      }
+    }
+  }
+  triton::common::TritonJson::Value log_setting_response(
+      triton::common::TritonJson::ValueType::OBJECT);
+  RETURN_AND_RESPOND_IF_ERR(
+      req, log_setting_response.AddString("log_file", LOG_FILE));
+  RETURN_AND_RESPOND_IF_ERR(
+      req, log_setting_response.AddBool("log_info", LOG_INFO_IS_ON));
+  RETURN_AND_RESPOND_IF_ERR(
+      req, log_setting_response.AddBool("log_warning", LOG_WARNING_IS_ON));
+  RETURN_AND_RESPOND_IF_ERR(
+      req, log_setting_response.AddBool("log_error", LOG_ERROR_IS_ON));
+  RETURN_AND_RESPOND_IF_ERR(
+      req, log_setting_response.AddInt(
+               "log_verbose_level", static_cast<uint64_t>(LOG_VERBOSE_LEVEL)));
+  RETURN_AND_RESPOND_IF_ERR(
+      req, log_setting_response.AddString("log_format", LOG_FORMAT_STRING));
+  triton::common::TritonJson::WriteBuffer buffer;
+  RETURN_AND_RESPOND_IF_ERR(req, log_setting_response.Write(&buffer));
+  evbuffer_add(req->buffer_out, buffer.Base(), buffer.Size());
+  evhtp_send_reply(req, EVHTP_RES_OK);
+#else
+  RETURN_AND_RESPOND_IF_ERR(
+      req, TRITONSERVER_ErrorNew(
+               TRITONSERVER_ERROR_UNAVAILABLE,
+               "the server does not support dynamic logging"));
+#endif  // TRITON_ENABLE_LOGGING
+}
+
+void
+HTTPAPIServer::HandleServerMetadata(evhtp_request_t* req)
+{
+  RETURN_AND_RESPOND_IF_RESTRICTED(
+      req, RestrictedCategory::METADATA, restricted_apis_);
+
+  AddContentTypeHeader(req, "application/json");
+  if (req->method != htp_method_GET) {
+    RETURN_AND_RESPOND_WITH_ERR(
+        req, EVHTP_RES_METHNALLOWED, "Method Not Allowed");
+  }
+
+  if (server_metadata_err_ == nullptr) {
+    evbuffer_add(
+        req->buffer_out, server_metadata_.c_str(), server_metadata_.size());
+    evhtp_send_reply(req, EVHTP_RES_OK);
+  } else {
+    // Not using RETURN_AND_RESPOND_IF_ERR macro as the Triton error can
+    // be persistent, the macro will clean up the error object.
+    EVBufferAddErrorJson(req->buffer_out, server_metadata_err_);
+    evhtp_send_reply(req, HttpCodeFromError(server_metadata_err_));
+  }
+}
+
+void
+HTTPAPIServer::HandleSystemSharedMemory(
+    evhtp_request_t* req, const std::string& region_name,
+    const std::string& action)
+{
+  RETURN_AND_RESPOND_IF_RESTRICTED(
+      req, RestrictedCategory::SHARED_MEMORY, restricted_apis_);
+
+  AddContentTypeHeader(req, "application/json");
+  if ((action == "status") && (req->method != htp_method_GET)) {
+    RETURN_AND_RESPOND_WITH_ERR(
+        req, EVHTP_RES_METHNALLOWED, "Method Not Allowed");
+  } else if ((action != "status") && (req->method != htp_method_POST)) {
+    RETURN_AND_RESPOND_WITH_ERR(
+        req, EVHTP_RES_METHNALLOWED, "Method Not Allowed");
+  }
+
+  TRITONSERVER_Error* err = nullptr;
+  if (action == "status") {
+    triton::common::TritonJson::Value shm_status(
+        triton::common::TritonJson::ValueType::ARRAY);
+    err = shm_manager_->GetStatus(
+        region_name, TRITONSERVER_MEMORY_CPU, &shm_status);
+    if (err == nullptr) {
+      triton::common::TritonJson::WriteBuffer buffer;
+      err = shm_status.Write(&buffer);
+      if (err == nullptr) {
+        evbuffer_add(req->buffer_out, buffer.Base(), buffer.Size());
+      }
+    }
+  } else if (action == "register") {
+    if (region_name.empty()) {
+      err = TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG,
+          "'region name' is necessary to register system shared memory region");
+    } else {
+      struct evbuffer_iovec* v = nullptr;
+      int v_idx = 0;
+      int n = evbuffer_peek(req->buffer_in, -1, NULL, NULL, 0);
+      if (n > 0) {
+        v = static_cast<struct evbuffer_iovec*>(
+            alloca(sizeof(struct evbuffer_iovec) * n));
+        if (evbuffer_peek(req->buffer_in, -1, NULL, v, n) != n) {
+          err = TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INTERNAL,
+              "unexpected error getting register request buffers");
+        }
+      }
+
+      if (err == nullptr) {
+        triton::common::TritonJson::Value register_request;
+        size_t buffer_len = evbuffer_get_length(req->buffer_in);
+        err = EVBufferToJson(&register_request, v, &v_idx, buffer_len, n);
+        if (err == nullptr) {
+          triton::common::TritonJson::Value key_json;
+          if (!register_request.Find("key", &key_json)) {
+            err = TRITONSERVER_ErrorNew(
+                TRITONSERVER_ERROR_INVALID_ARG,
+                "Shared memory register request has no 'key' field");
+          }
+
+          const char* shm_key = nullptr;
+          if (err == nullptr) {
+            size_t shm_key_len;
+            err = key_json.AsString(&shm_key, &shm_key_len);
+          }
+
+          uint64_t offset = 0;
+          if (err == nullptr) {
+            triton::common::TritonJson::Value offset_json;
+            if (register_request.Find("offset", &offset_json)) {
+              err = offset_json.AsUInt(&offset);
+            }
+          }
+
+          uint64_t byte_size = 0;
+          if (err == nullptr) {
+            triton::common::TritonJson::Value byte_size_json;
+            if (!register_request.Find("byte_size", &byte_size_json)) {
+              err = TRITONSERVER_ErrorNew(
+                  TRITONSERVER_ERROR_INVALID_ARG,
+                  "Shared memory register request has no 'byte_size' field");
+            } else {
+              err = byte_size_json.AsUInt(&byte_size);
+            }
+          }
+
+          if (err == nullptr) {
+            err = shm_manager_->RegisterSystemSharedMemory(
+                region_name, shm_key, offset, byte_size);
+          }
+        }
+      }
+    }
+  } else if (action == "unregister") {
+    if (region_name.empty()) {
+      err = shm_manager_->UnregisterAll(TRITONSERVER_MEMORY_CPU);
+    } else {
+      err = shm_manager_->Unregister(region_name, TRITONSERVER_MEMORY_CPU);
+    }
+  }
+
+  RETURN_AND_RESPOND_IF_ERR(req, err);
+  evhtp_send_reply(req, EVHTP_RES_OK);
+}
+
+void
+HTTPAPIServer::HandleCudaSharedMemory(
+    evhtp_request_t* req, const std::string& region_name,
+    const std::string& action)
+{
+  RETURN_AND_RESPOND_IF_RESTRICTED(
+      req, RestrictedCategory::SHARED_MEMORY, restricted_apis_);
+
+  AddContentTypeHeader(req, "application/json");
+  if ((action == "status") && (req->method != htp_method_GET)) {
+    RETURN_AND_RESPOND_WITH_ERR(
+        req, EVHTP_RES_METHNALLOWED, "Method Not Allowed");
+  } else if ((action != "status") && (req->method != htp_method_POST)) {
+    RETURN_AND_RESPOND_WITH_ERR(
+        req, EVHTP_RES_METHNALLOWED, "Method Not Allowed");
+  }
+
+  TRITONSERVER_Error* err = nullptr;
+  if (action == "status") {
+    triton::common::TritonJson::Value shm_status(
+        triton::common::TritonJson::ValueType::ARRAY);
+    err = shm_manager_->GetStatus(
+        region_name, TRITONSERVER_MEMORY_GPU, &shm_status);
+    if (err == nullptr) {
+      triton::common::TritonJson::WriteBuffer buffer;
+      err = shm_status.Write(&buffer);
+      if (err == nullptr) {
+        evbuffer_add(req->buffer_out, buffer.Base(), buffer.Size());
+      }
+    }
+  } else if (action == "register") {
+    if (region_name.empty()) {
+      err = TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG,
+          "'region name' is necessary to register cuda shared memory region");
+    } else {
+#ifdef TRITON_ENABLE_GPU
+      struct evbuffer_iovec* v = nullptr;
+      int v_idx = 0;
+      int n = evbuffer_peek(req->buffer_in, -1, NULL, NULL, 0);
+      if (n > 0) {
+        v = static_cast<struct evbuffer_iovec*>(
+            alloca(sizeof(struct evbuffer_iovec) * n));
+        if (evbuffer_peek(req->buffer_in, -1, NULL, v, n) != n) {
+          err = TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INTERNAL,
+              "unexpected error getting register request buffers");
+        }
+      }
+      if (err == nullptr) {
+        triton::common::TritonJson::Value register_request;
+        size_t buffer_len = evbuffer_get_length(req->buffer_in);
+        err = EVBufferToJson(&register_request, v, &v_idx, buffer_len, n);
+        if (err == nullptr) {
+          const char* b64_handle = nullptr;
+          size_t b64_handle_len = 0;
+          triton::common::TritonJson::Value raw_handle_json;
+          if (!register_request.Find("raw_handle", &raw_handle_json)) {
+            err = TRITONSERVER_ErrorNew(
+                TRITONSERVER_ERROR_INVALID_ARG,
+                "Shared memory register request has no 'raw_handle' field");
+          } else {
+            err = raw_handle_json.MemberAsString(
+                "b64", &b64_handle, &b64_handle_len);
+          }
+
+          uint64_t byte_size = 0;
+          if (err == nullptr) {
+            err = register_request.MemberAsUInt("byte_size", &byte_size);
+          }
+
+          uint64_t device_id = 0;
+          if (err == nullptr) {
+            err = register_request.MemberAsUInt("device_id", &device_id);
+          }
+
+          if (err == nullptr) {
+            base64_decodestate s;
+            base64_init_decodestate(&s);
+
+            // The decoded can not be larger than the input...
+            std::vector<char> raw_handle(b64_handle_len + 1);
+            size_t decoded_size = base64_decode_block(
+                b64_handle, b64_handle_len, raw_handle.data(), &s);
+            if (decoded_size != sizeof(cudaIpcMemHandle_t)) {
+              err = TRITONSERVER_ErrorNew(
+                  TRITONSERVER_ERROR_INVALID_ARG,
+                  "'raw_handle' must be a valid base64 encoded "
+                  "cudaIpcMemHandle_t");
+            } else {
+              raw_handle.resize(sizeof(cudaIpcMemHandle_t));
+              err = shm_manager_->RegisterCUDASharedMemory(
+                  region_name.c_str(),
+                  reinterpret_cast<const cudaIpcMemHandle_t*>(
+                      raw_handle.data()),
+                  byte_size, device_id);
+            }
+          }
+        }
+      }
+#else
+      err = TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG,
+          std::string(
+              "failed to register CUDA shared memory region: '" + region_name +
+              "', GPUs not supported")
+              .c_str());
+#endif  // TRITON_ENABLE_GPU
+    }
+  } else if (action == "unregister") {
+    if (region_name.empty()) {
+      err = shm_manager_->UnregisterAll(TRITONSERVER_MEMORY_GPU);
+    } else {
+      err = shm_manager_->Unregister(region_name, TRITONSERVER_MEMORY_GPU);
+    }
+  }
+
+  RETURN_AND_RESPOND_IF_ERR(req, err);
+  evhtp_send_reply(req, EVHTP_RES_OK);
+}
+
+TRITONSERVER_Error*
+HTTPAPIServer::GetContentLength(
+    evhtp_request_t* req, evbuffer* decompressed_buffer,
+    int32_t* content_length)
+{
+  TRITONSERVER_Error* err = nullptr;
+
+  // Set to body size in case there is no Content-Length to compare with
+  int32_t lcontent_length = evbuffer_get_length(req->buffer_in);
+  if (decompressed_buffer == nullptr) {
+    const char* content_length_c_str =
+        evhtp_kv_find(req->headers_in, kContentLengthHeader);
+    if (content_length_c_str != nullptr) {
+      try {
+        lcontent_length = std::atoi(content_length_c_str);
+      }
+      catch (const std::invalid_argument& ia) {
+        err = TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG,
+            (std::string("Unable to parse ") + kContentLengthHeader +
+             ", got: " + content_length_c_str)
+                .c_str());
+      }
+    }
+  } else {
+    // The Content-Length doesn't reflect the actual request body size
+    // if compression is used, set 'content_length' to the decompressed size
+    lcontent_length = evbuffer_get_length(decompressed_buffer);
+  }
+
+  *content_length = lcontent_length;
+  return err;
+}
+
+
+TRITONSERVER_Error*
+HTTPAPIServer::GetInferenceHeaderLength(
+    evhtp_request_t* req, int32_t content_length, size_t* header_length)
+{
+  // Set to content length in case that the header is not specified
+  *header_length = content_length;
+
+  // Find Inference-Header-Content-Length in header.
+  const char* header_length_c_str =
+      evhtp_kv_find(req->headers_in, kInferHeaderContentLengthHTTPHeader);
+  if (header_length_c_str != NULL) {
+    int parsed_value;
+    try {
+      parsed_value = std::atoi(header_length_c_str);
+    }
+    catch (const std::invalid_argument& ia) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG, (std::string("Unable to parse ") +
+                                           kInferHeaderContentLengthHTTPHeader +
+                                           ", got: " + header_length_c_str)
+                                              .c_str());
+    }
+
+    // Check if the content length is in proper range
+    if ((parsed_value < 0) || (parsed_value > content_length)) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG,
+          (std::string("inference header size should be in range (0, ") +
+           std::to_string(content_length) + "), got: " + header_length_c_str)
+              .c_str());
+    }
+    *header_length = parsed_value;
+  }
+  return nullptr;
+}
+
+DataCompressor::Type
+HTTPAPIServer::GetRequestCompressionType(evhtp_request_t* req)
+{
+  const char* content_encoding_c_str =
+      evhtp_kv_find(req->headers_in, kContentEncodingHTTPHeader);
+  if (content_encoding_c_str != NULL) {
+    std::string content_encoding(content_encoding_c_str);
+    if (content_encoding == "deflate") {
+      return DataCompressor::Type::DEFLATE;
+    } else if (content_encoding == "gzip") {
+      return DataCompressor::Type::GZIP;
+    } else if (!content_encoding.empty() && (content_encoding != "identity")) {
+      return DataCompressor::Type::UNKNOWN;
+    }
+  }
+  return DataCompressor::Type::IDENTITY;
+}
+
+DataCompressor::Type
+HTTPAPIServer::GetResponseCompressionType(evhtp_request_t* req)
+{
+  // Find Accept-Encoding in header. Try to compress if found
+  const char* accept_encoding_c_str =
+      evhtp_kv_find(req->headers_in, kAcceptEncodingHTTPHeader);
+  if (accept_encoding_c_str != NULL) {
+    std::string accept_encoding = CompressionTypeUsed(accept_encoding_c_str);
+    if (accept_encoding == "deflate") {
+      return DataCompressor::Type::DEFLATE;
+    } else if (accept_encoding == "gzip") {
+      return DataCompressor::Type::GZIP;
+    }
+  }
+  return DataCompressor::Type::IDENTITY;
+}
+
+// Helpers for parsing JSON requests for Triton-specific fields
+TRITONSERVER_Error*
+HTTPAPIServer::ParseJsonTritonIO(
+    triton::common::TritonJson::Value& request_json,
+    TRITONSERVER_InferenceRequest* irequest, InferRequestClass* infer_req,
+    const std::string& model_name, evbuffer_iovec* v, int* v_idx_ptr,
+    size_t header_length, int n)
+{
+  // Get the byte-size for each input and from that get the blocks
+  // holding the data for that input
+  triton::common::TritonJson::Value inputs_json;
+  RETURN_MSG_IF_ERR(
+      request_json.MemberAsArray("inputs", &inputs_json),
+      "Unable to parse 'inputs'");
+
+  int& v_idx = *v_idx_ptr;
+  for (size_t i = 0; i < inputs_json.ArraySize(); i++) {
+    triton::common::TritonJson::Value request_input;
+    RETURN_IF_ERR(inputs_json.At(i, &request_input));
+    RETURN_IF_ERR(ValidateInputContentType(request_input));
+
+    const char* input_name;
+    size_t input_name_len;
+    RETURN_MSG_IF_ERR(
+        request_input.MemberAsString("name", &input_name, &input_name_len),
+        "Unable to parse 'name'");
+
+    const char* datatype;
+    size_t datatype_len;
+    RETURN_MSG_IF_ERR(
+        request_input.MemberAsString("datatype", &datatype, &datatype_len),
+        "Unable to parse 'datatype'");
+    const TRITONSERVER_DataType dtype = TRITONSERVER_StringToDataType(datatype);
+
+    triton::common::TritonJson::Value shape_json;
+    RETURN_MSG_IF_ERR(
+        request_input.MemberAsArray("shape", &shape_json),
+        "Unable to parse 'shape'");
+    std::vector<int64_t> shape_vec;
+    for (size_t i = 0; i < shape_json.ArraySize(); i++) {
+      uint64_t d = 0;
+      RETURN_MSG_IF_ERR(
+          shape_json.IndexAsUInt(i, &d), "Unable to parse 'shape'");
+      shape_vec.push_back(d);
+    }
+
+    RETURN_IF_ERR(TRITONSERVER_InferenceRequestAddInput(
+        irequest, input_name, dtype, &shape_vec[0], shape_vec.size()));
+
+    bool binary_input;
+    size_t byte_size;
+    RETURN_IF_ERR(
+        CheckBinaryInputData(request_input, &binary_input, &byte_size));
+
+    if ((byte_size == 0) && binary_input) {
+      RETURN_IF_ERR(TRITONSERVER_InferenceRequestAppendInputData(
+          irequest, input_name, nullptr, 0 /* byte_size */,
+          TRITONSERVER_MEMORY_CPU, 0 /* memory_type_id */));
+    } else if (binary_input) {
+      if (header_length == 0) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG,
+            "must specify valid 'Infer-Header-Content-Length' in request "
+            "header and 'binary_data_size' when passing inputs in binary "
+            "data format");
+      }
+
+      // Process one block at a time
+      while ((byte_size > 0) && (v_idx < n)) {
+        char* base = static_cast<char*>(v[v_idx].iov_base);
+        size_t base_size;
+        if (v[v_idx].iov_len > byte_size) {
+          base_size = byte_size;
+          v[v_idx].iov_base = static_cast<void*>(base + byte_size);
+          v[v_idx].iov_len -= byte_size;
+          byte_size = 0;
+        } else {
+          base_size = v[v_idx].iov_len;
+          byte_size -= v[v_idx].iov_len;
+          v_idx++;
+        }
+
+        RETURN_IF_ERR(TRITONSERVER_InferenceRequestAppendInputData(
+            irequest, input_name, base, base_size, TRITONSERVER_MEMORY_CPU,
+            0 /* memory_type_id */));
+      }
+
+      if (byte_size != 0) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG,
+            std::string(
+                "unexpected size for input '" + std::string(input_name) +
+                "', expecting " + std::to_string(byte_size) +
+                " additional bytes for model '" + model_name + "'")
+                .c_str());
+      }
+    } else {
+      // Process input if in shared memory.
+      bool use_shm;
+      uint64_t shm_offset;
+      const char* shm_region;
+      RETURN_IF_ERR(CheckSharedMemoryData(
+          request_input, &use_shm, &shm_region, &shm_offset,
+          reinterpret_cast<uint64_t*>(&byte_size)));
+      if (use_shm) {
+        void* base;
+        TRITONSERVER_MemoryType memory_type;
+        int64_t memory_type_id;
+        RETURN_IF_ERR(shm_manager_->GetMemoryInfo(
+            shm_region, shm_offset, &base, &memory_type, &memory_type_id));
+        if (memory_type == TRITONSERVER_MEMORY_GPU) {
+#ifdef TRITON_ENABLE_GPU
+          cudaIpcMemHandle_t* cuda_handle;
+          RETURN_IF_ERR(shm_manager_->GetCUDAHandle(shm_region, &cuda_handle));
+          TRITONSERVER_BufferAttributes* buffer_attributes;
+          RETURN_IF_ERR(TRITONSERVER_BufferAttributesNew(&buffer_attributes));
+          auto buffer_attributes_del =
+              [](TRITONSERVER_BufferAttributes* buffer_attributes) {
+                TRITONSERVER_BufferAttributesDelete(buffer_attributes);
+              };
+
+          std::unique_ptr<
+              TRITONSERVER_BufferAttributes, decltype(buffer_attributes_del)>
+              buffer_attrsl(buffer_attributes, buffer_attributes_del);
+          RETURN_IF_ERR(TRITONSERVER_BufferAttributesSetMemoryType(
+              buffer_attributes, memory_type));
+          RETURN_IF_ERR(TRITONSERVER_BufferAttributesSetMemoryTypeId(
+              buffer_attributes, memory_type_id));
+          RETURN_IF_ERR(TRITONSERVER_BufferAttributesSetCudaIpcHandle(
+              buffer_attributes, reinterpret_cast<void*>(cuda_handle)));
+          RETURN_IF_ERR(TRITONSERVER_BufferAttributesSetByteSize(
+              buffer_attributes, byte_size));
+          RETURN_IF_ERR(
+              TRITONSERVER_InferenceRequestAppendInputDataWithBufferAttributes(
+                  irequest, input_name, base, buffer_attributes));
+#endif
+        } else {
+          RETURN_IF_ERR(TRITONSERVER_InferenceRequestAppendInputData(
+              irequest, input_name, base, byte_size, memory_type,
+              memory_type_id));
+        }
+      } else {
+        const int64_t element_cnt = GetElementCount(shape_vec);
+
+        // FIXME, element count should never be 0 or negative so
+        // shouldn't we just return an error here?
+        if (element_cnt == 0) {
+          RETURN_IF_ERR(TRITONSERVER_InferenceRequestAppendInputData(
+              irequest, input_name, nullptr, 0 /* byte_size */,
+              TRITONSERVER_MEMORY_CPU, 0 /* memory_type_id */));
+        } else {
+          // JSON... presence of "data" already validated but still
+          // checking here. Flow in this endpoint needs to be
+          // reworked...
+          triton::common::TritonJson::Value tensor_data;
+          RETURN_MSG_IF_ERR(
+              request_input.MemberAsArray("data", &tensor_data),
+              "Unable to parse 'data'");
+
+          if (dtype == TRITONSERVER_TYPE_BYTES) {
+            RETURN_IF_ERR(JsonBytesArrayByteSize(tensor_data, &byte_size));
+          } else {
+            byte_size = element_cnt * TRITONSERVER_DataTypeByteSize(dtype);
+          }
+
+          infer_req->serialized_data_.emplace_back();
+          std::vector<char>& serialized = infer_req->serialized_data_.back();
+          serialized.resize(byte_size);
+
+          RETURN_IF_ERR(ReadDataFromJson(
+              input_name, tensor_data, &serialized[0], dtype,
+              dtype == TRITONSERVER_TYPE_BYTES ? byte_size : element_cnt));
+          RETURN_IF_ERR(TRITONSERVER_InferenceRequestAppendInputData(
+              irequest, input_name, &serialized[0], serialized.size(),
+              TRITONSERVER_MEMORY_CPU, 0 /* memory_type_id */));
+        }
+      }
+    }
+  }
+
+  if (v_idx != n) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INVALID_ARG,
+        std::string(
+            "unexpected additional input data for model '" + model_name + "'")
+            .c_str());
+  }
+
+  // outputs is optional
+  if (request_json.Find("outputs")) {
+    triton::common::TritonJson::Value outputs_json;
+    RETURN_MSG_IF_ERR(
+        request_json.MemberAsArray("outputs", &outputs_json),
+        "Unable to parse 'outputs'");
+    for (size_t i = 0; i < outputs_json.ArraySize(); i++) {
+      triton::common::TritonJson::Value request_output;
+      RETURN_IF_ERR(outputs_json.At(i, &request_output));
+      RETURN_IF_ERR(ValidateOutputParameter(request_output));
+
+      const char* output_name;
+      size_t output_name_len;
+      RETURN_MSG_IF_ERR(
+          request_output.MemberAsString("name", &output_name, &output_name_len),
+          "Unable to parse 'name'");
+      RETURN_IF_ERR(TRITONSERVER_InferenceRequestAddRequestedOutput(
+          irequest, output_name));
+
+      uint64_t class_size;
+      RETURN_IF_ERR(CheckClassificationOutput(request_output, &class_size));
+
+      bool use_shm;
+      uint64_t offset, byte_size;
+      const char* shm_region;
+      RETURN_IF_ERR(CheckSharedMemoryData(
+          request_output, &use_shm, &shm_region, &offset, &byte_size));
+
+      // ValidateOutputParameter ensures that both shm and
+      // classification cannot be true.
+      if (use_shm) {
+        void* base;
+        TRITONSERVER_MemoryType memory_type;
+        int64_t memory_type_id;
+        RETURN_IF_ERR(shm_manager_->GetMemoryInfo(
+            shm_region, offset, &base, &memory_type, &memory_type_id));
+
+        if (memory_type == TRITONSERVER_MEMORY_GPU) {
+#ifdef TRITON_ENABLE_GPU
+          cudaIpcMemHandle_t* cuda_handle;
+          RETURN_IF_ERR(shm_manager_->GetCUDAHandle(shm_region, &cuda_handle));
+          infer_req->alloc_payload_.output_map_.emplace(
+              std::piecewise_construct, std::forward_as_tuple(output_name),
+              std::forward_as_tuple(new AllocPayload::OutputInfo(
+                  base, byte_size, memory_type, memory_type_id,
+                  reinterpret_cast<char*>(cuda_handle))));
+#endif
+        } else {
+          infer_req->alloc_payload_.output_map_.emplace(
+              std::piecewise_construct, std::forward_as_tuple(output_name),
+              std::forward_as_tuple(new AllocPayload::OutputInfo(
+                  base, byte_size, memory_type, memory_type_id,
+                  nullptr /* cuda ipc handle */)));
+        }
+      } else {
+        bool use_binary;
+        RETURN_IF_ERR(CheckBinaryOutputData(request_output, &use_binary));
+        infer_req->alloc_payload_.output_map_.emplace(
+            std::piecewise_construct, std::forward_as_tuple(output_name),
+            std::forward_as_tuple(new AllocPayload::OutputInfo(
+                use_binary ? AllocPayload::OutputInfo::BINARY
+                           : AllocPayload::OutputInfo::JSON,
+                class_size)));
+      }
+    }
+  }
+  return nullptr;  // success
+}
+
+TRITONSERVER_Error*
+HTTPAPIServer::ParseJsonTritonParams(
+    triton::common::TritonJson::Value& request_json,
+    TRITONSERVER_InferenceRequest* irequest, InferRequestClass* infer_req)
+{
+  // The default setting for returned outputs (JSON or BINARY). This
+  // is needed for the case when outputs are not explicitly specified.
+  AllocPayload::OutputInfo::Kind output_kind = AllocPayload::OutputInfo::JSON;
+
+
+  triton::common::TritonJson::Value params_json;
+  if (request_json.Find("parameters", &params_json)) {
+    std::vector<std::string> parameters;
+    RETURN_MSG_IF_ERR(
+        params_json.Members(&parameters), "failed to get request params.");
+
+    uint32_t flags = 0;
+    for (auto& parameter : parameters) {
+      if (parameter == "sequence_id") {
+        uint64_t seq_id;
+        // Try to parse sequence_id as uint64_t
+        TRITONSERVER_Error* err;
+        if ((err = params_json.MemberAsUInt(parameter.c_str(), &seq_id)) !=
+            nullptr) {
+          TRITONSERVER_ErrorDelete(err);
+          // On failure try to parse as a string
+          std::string seq_id;
+          RETURN_MSG_IF_ERR(
+              params_json.MemberAsString(parameter.c_str(), &seq_id),
+              "Unable to parse 'sequence_id'");
+          RETURN_IF_ERR(TRITONSERVER_InferenceRequestSetCorrelationIdString(
+              irequest, seq_id.c_str()));
+        } else {
+          RETURN_IF_ERR(
+              TRITONSERVER_InferenceRequestSetCorrelationId(irequest, seq_id));
+        }
+      } else if (parameter == "sequence_start") {
+        bool start;
+        RETURN_MSG_IF_ERR(
+            params_json.MemberAsBool(parameter.c_str(), &start),
+            "Unable to parse 'sequence_start'");
+        if (start) {
+          flags |= TRITONSERVER_REQUEST_FLAG_SEQUENCE_START;
+        }
+      } else if (parameter == "sequence_end") {
+        bool end;
+        RETURN_MSG_IF_ERR(
+            params_json.MemberAsBool(parameter.c_str(), &end),
+            "Unable to parse 'sequence_end'");
+        if (end) {
+          flags |= TRITONSERVER_REQUEST_FLAG_SEQUENCE_END;
+        }
+      } else if (parameter == "priority") {
+        uint64_t p;
+        RETURN_MSG_IF_ERR(
+            params_json.MemberAsUInt(parameter.c_str(), &p),
+            "Unable to parse 'priority'");
+        RETURN_IF_ERR(
+            TRITONSERVER_InferenceRequestSetPriorityUInt64(irequest, p));
+      } else if (parameter == "timeout") {
+        uint64_t t;
+        RETURN_MSG_IF_ERR(
+            params_json.MemberAsUInt(parameter.c_str(), &t),
+            "Unable to parse 'timeout'");
+        RETURN_IF_ERR(
+            TRITONSERVER_InferenceRequestSetTimeoutMicroseconds(irequest, t));
+      } else if (parameter == "binary_data_output") {
+        bool bdo;
+        RETURN_MSG_IF_ERR(
+            params_json.MemberAsBool(parameter.c_str(), &bdo),
+            "Unable to parse 'binary_data_output'");
+        output_kind = (bdo) ? AllocPayload::OutputInfo::BINARY
+                            : AllocPayload::OutputInfo::JSON;
+      } else if (parameter.rfind("triton_", 0) == 0) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG,
+            ("parameter keys starting with 'triton_' are reserved for Triton "
+             "usage "
+             "and should not be specified."));
+      } else {
+        RETURN_IF_ERR(SetTritonParameterFromJsonParameter(
+            parameter, params_json, irequest));
+      }
+    }
+
+    RETURN_IF_ERR(TRITONSERVER_InferenceRequestSetFlags(irequest, flags));
+  }
+
+  // Set output kind to JSON by default, or BINARY if specified in parameters.
+  infer_req->alloc_payload_.default_output_kind_ = output_kind;
+  return nullptr;  // Success
+}
+
+TRITONSERVER_Error*
+HTTPAPIServer::ParseJsonTritonRequestID(
+    triton::common::TritonJson::Value& request_json,
+    TRITONSERVER_InferenceRequest* irequest)
+{
+  // Set InferenceRequest request_id
+  triton::common::TritonJson::Value id_json;
+  if (request_json.Find("id", &id_json)) {
+    const char* id;
+    size_t id_len;
+    RETURN_MSG_IF_ERR(id_json.AsString(&id, &id_len), "Unable to parse 'id'");
+    RETURN_IF_ERR(TRITONSERVER_InferenceRequestSetId(irequest, id));
+  }
+
+  return nullptr;  // Success
+}
+
+// TODO: Can refactor other non-inference routes to re-use this helper instead.
+TRITONSERVER_Error*
+HTTPAPIServer::EVRequestToJson(
+    evhtp_request_t* req, triton::common::TritonJson::Value* request_json_ptr)
+{
+  struct evbuffer_iovec* v = nullptr;
+  int v_idx = 0;
+  int n = evbuffer_peek(req->buffer_in, -1, NULL, NULL, 0);
+  if (n > 0) {
+    v = static_cast<struct evbuffer_iovec*>(
+        alloca(sizeof(struct evbuffer_iovec) * n));
+    if (evbuffer_peek(req->buffer_in, -1, NULL, v, n) != n) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INTERNAL,
+          "Unexpected error getting request buffers");
+    }
+  }
+  size_t buffer_len = evbuffer_get_length(req->buffer_in);
+  RETURN_IF_ERR(EVBufferToJson(request_json_ptr, v, &v_idx, buffer_len, n));
+  return nullptr;  // success
+}
+
+TRITONSERVER_Error*
+HTTPAPIServer::EVBufferToInput(
+    const std::string& model_name, TRITONSERVER_InferenceRequest* irequest,
+    evbuffer* input_buffer, InferRequestClass* infer_req, size_t header_length)
+{
+  // Extract individual input data from HTTP body and register in
+  // 'irequest'. The HTTP body is not necessarily stored in contiguous
+  // memory.
+  //
+  // Get the addr and size of each chunk of memory holding the HTTP
+  // body.
+  struct evbuffer_iovec* v = nullptr;
+  int v_idx = 0;
+
+  int n = evbuffer_peek(input_buffer, -1, NULL, NULL, 0);
+  if (n > 0) {
+    v = static_cast<struct evbuffer_iovec*>(
+        alloca(sizeof(struct evbuffer_iovec) * n));
+    if (evbuffer_peek(input_buffer, -1, NULL, v, n) != n) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INTERNAL,
+          "unexpected error getting input buffers");
+    }
+  }
+
+  // Extract just the json header from the HTTP body. 'header_length == 0' means
+  // that the entire HTTP body should be input data for a raw binary request.
+  triton::common::TritonJson::Value request_json;
+  RETURN_IF_ERR(EVBufferToJson(&request_json, v, &v_idx, header_length, n));
+
+  // Parse request JSON and fill related Triton fields
+  RETURN_IF_ERR(ParseJsonTritonRequestID(request_json, irequest));
+  RETURN_IF_ERR(ParseJsonTritonParams(request_json, irequest, infer_req));
+  RETURN_IF_ERR(ParseJsonTritonIO(
+      request_json, irequest, infer_req, model_name, v, &v_idx, header_length,
+      n));
+
+  return nullptr;  // success
+}
+
+TRITONSERVER_Error*
+HTTPAPIServer::EVBufferToRawInput(
+    const std::string& model_name, TRITONSERVER_InferenceRequest* irequest,
+    evbuffer* input_buffer, InferRequestClass* infer_req)
+{
+  static const char* raw_input_name = "raw_input";
+  RETURN_IF_ERR(
+      TRITONSERVER_InferenceRequestAddRawInput(irequest, raw_input_name));
+
+  size_t byte_size = evbuffer_get_length(input_buffer);
+  // zero-shape tensor
+  if (byte_size == 0) {
+    RETURN_IF_ERR(TRITONSERVER_InferenceRequestAppendInputData(
+        irequest, raw_input_name, nullptr, 0 /* byte_size */,
+        TRITONSERVER_MEMORY_CPU, 0 /* memory_type_id */));
+  } else {
+    struct evbuffer_iovec* v = nullptr;
+    int v_idx = 0;
+    int n = evbuffer_peek(input_buffer, -1, NULL, NULL, 0);
+    if (n > 0) {
+      v = static_cast<struct evbuffer_iovec*>(
+          alloca(sizeof(struct evbuffer_iovec) * n));
+      if (evbuffer_peek(input_buffer, -1, NULL, v, n) != n) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INTERNAL,
+            "unexpected error getting input buffers");
+      }
+    }
+    // Process one block at a time
+    while ((byte_size > 0) && (v_idx < n)) {
+      char* base = static_cast<char*>(v[v_idx].iov_base);
+      size_t base_size;
+      if (v[v_idx].iov_len > byte_size) {
+        base_size = byte_size;
+        v[v_idx].iov_base = static_cast<void*>(base + byte_size);
+        v[v_idx].iov_len -= byte_size;
+        byte_size = 0;
+      } else {
+        base_size = v[v_idx].iov_len;
+        byte_size -= v[v_idx].iov_len;
+        v_idx++;
+      }
+
+      RETURN_IF_ERR(TRITONSERVER_InferenceRequestAppendInputData(
+          irequest, raw_input_name, base, base_size, TRITONSERVER_MEMORY_CPU,
+          0 /* memory_type_id */));
+    }
+  }
+  infer_req->alloc_payload_.default_output_kind_ =
+      AllocPayload::OutputInfo::BINARY;
+  return nullptr;  // success
+}
+
+struct HeaderSearchPayload {
+  HeaderSearchPayload(
+      const re2::RE2& regex, TRITONSERVER_InferenceRequest* request)
+      : regex_(regex), request_(request), error_(nullptr)
+  {
+  }
+
+  const re2::RE2& regex_;
+  TRITONSERVER_InferenceRequest* request_;
+  TRITONSERVER_Error* error_;
+};
+
+int
+ForEachHeader(evhtp_header_t* header, void* arg)
+{
+  HeaderSearchPayload* header_search_payload =
+      reinterpret_cast<HeaderSearchPayload*>(arg);
+
+  TRITONSERVER_InferenceRequest* request = header_search_payload->request_;
+  const re2::RE2& regex = header_search_payload->regex_;
+
+  std::string matched_string;
+  if (RE2::PartialMatch(std::string(header->key), regex)) {
+    header_search_payload->error_ =
+        TRITONSERVER_InferenceRequestSetStringParameter(
+            request, header->key, header->val);
+
+    if (header_search_payload->error_ != nullptr) {
+      return 1;
+    }
+  }
+
+  return 0;
+}
+
+TRITONSERVER_Error*
+HTTPAPIServer::CheckTransactionPolicy(
+    evhtp_request_t* req, const std::string& model_name,
+    int64_t requested_model_version)
+{
+  uint32_t txn_flags;
+  RETURN_IF_ERR(TRITONSERVER_ServerModelTransactionProperties(
+      server_.get(), model_name.c_str(), requested_model_version, &txn_flags,
+      nullptr /* voidp */));
+  if ((txn_flags & TRITONSERVER_TXN_DECOUPLED) != 0) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_UNSUPPORTED,
+        "HTTP end point doesn't support models with decoupled "
+        "transaction policy");
+  }
+
+  return nullptr;  // success
+}
+
+std::shared_ptr<TraceManager::Trace>
+HTTPAPIServer::StartTrace(
+    evhtp_request_t* req, const std::string& model_name,
+    TRITONSERVER_InferenceTrace** triton_trace)
+{
+#ifdef TRITON_ENABLE_TRACING
+  HttpTextMapCarrier carrier(req->headers_in);
+  auto start_options =
+      trace_manager_->GetTraceStartOptions(carrier, model_name);
+  std::shared_ptr<TraceManager::Trace> trace;
+  trace = std::move(trace_manager_->SampleTrace(start_options));
+  if (trace != nullptr) {
+    *triton_trace = trace->trace_;
+    // Timestamps from evhtp are capture in 'req'. We record here
+    // since this is the first place where we have access to trace
+    // manager.
+    trace->CaptureTimestamp("HTTP_RECV_START", req->recv_start_ns);
+    trace->CaptureTimestamp("HTTP_RECV_END", req->recv_end_ns);
+  }
+  return trace;
+#else
+  return nullptr;
+#endif  // TRITON_ENABLE_TRACING
+}
+
+TRITONSERVER_Error*
+HTTPAPIServer::DecompressBuffer(
+    evhtp_request_t* req, evbuffer** decompressed_buffer)
+{
+  auto compression_type = GetRequestCompressionType(req);
+  switch (compression_type) {
+    case DataCompressor::Type::DEFLATE:
+    case DataCompressor::Type::GZIP: {
+      *decompressed_buffer = evbuffer_new();
+      RETURN_IF_ERR(DataCompressor::DecompressData(
+          compression_type, req->buffer_in, *decompressed_buffer));
+      break;
+    }
+    case DataCompressor::Type::UNKNOWN: {
+      // Encounter unsupported compressed type, send error with supported types
+      // in Accept-Encoding
+      evhtp_headers_add_header(
+          req->headers_out,
+          evhtp_header_new(kAcceptEncodingHTTPHeader, "gzip, deflate", 1, 1));
+      // FIXME: Map TRITONSERVER_ERROR_UNSUPPORTED to EVHTP_RES_UNSUPPORTED
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_UNSUPPORTED, "Unsupported compression type");
+    }
+    case DataCompressor::Type::IDENTITY:
+      // Do nothing
+      break;
+  }
+
+  return nullptr;  // success
+}
+
+TRITONSERVER_Error*
+HTTPAPIServer::EVRequestToTritonRequest(
+    evhtp_request_t* req, const std::string& model_name,
+    TRITONSERVER_InferenceRequest* irequest, evbuffer* decompressed_buffer,
+    InferRequestClass* infer_req, size_t header_length)
+{
+  if (header_length != 0) {
+    RETURN_IF_ERR(EVBufferToInput(
+        model_name, irequest,
+        (decompressed_buffer == nullptr) ? req->buffer_in : decompressed_buffer,
+        infer_req, header_length));
+  } else {
+    RETURN_IF_ERR(EVBufferToRawInput(
+        model_name, irequest,
+        (decompressed_buffer == nullptr) ? req->buffer_in : decompressed_buffer,
+        infer_req));
+  }
+
+  return nullptr;  // success
+}
+
+TRITONSERVER_Error*
+HTTPAPIServer::ForwardHeaders(
+    evhtp_request_t* req, TRITONSERVER_InferenceRequest* irequest)
+{
+  if (!header_forward_pattern_.empty()) {
+    HeaderSearchPayload header_search_payload(header_forward_regex_, irequest);
+    int status = evhtp_kvs_for_each(
+        req->headers_in, ForEachHeader,
+        reinterpret_cast<void*>(&header_search_payload));
+    if (status != 0) {
+      return header_search_payload.error_;
+    }
+  }
+
+  return nullptr;  // success
+}
+
+void
+HTTPAPIServer::HandleGenerate(
+    evhtp_request_t* req, const std::string& model_name,
+    const std::string& model_version_str, bool streaming)
+{
+  RETURN_AND_RESPOND_IF_RESTRICTED(
+      req, RestrictedCategory::INFERENCE, restricted_apis_);
+
+  AddContentTypeHeader(req, "application/json");
+  if (req->method != htp_method_POST) {
+    RETURN_AND_RESPOND_WITH_ERR(
+        req, EVHTP_RES_METHNALLOWED, "Method Not Allowed");
+  }
+
+  int64_t requested_model_version;
+  RETURN_AND_RESPOND_IF_ERR(
+      req,
+      GetModelVersionFromString(model_version_str, &requested_model_version));
+
+  // If tracing is enabled see if this request should be traced.
+  TRITONSERVER_InferenceTrace* triton_trace = nullptr;
+  std::shared_ptr<TraceManager::Trace> trace =
+      StartTrace(req, model_name, &triton_trace);
+
+  std::map<std::string, triton::common::TritonJson::Value> input_metadata;
+  triton::common::TritonJson::Value meta_data_root;
+  RETURN_AND_RESPOND_IF_ERR(
+      req, ModelInputMetadata(
+               model_name, requested_model_version, &input_metadata,
+               &meta_data_root));
+
+
+  // [FIXME] decompression should have been done here. before parsing request
+  // body
+  if (GetRequestCompressionType(req) != DataCompressor::Type::IDENTITY) {
+    RETURN_AND_RESPOND_IF_ERR(
+        req,
+        TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG,
+            "Unsupported content-encoding, only 'identity' is supported."));
+  }
+
+  // Create the inference request object which provides all information needed
+  // for an inference. Make sure it is cleaned up on early error.
+  TRITONSERVER_InferenceRequest* irequest = nullptr;
+  RETURN_AND_RESPOND_IF_ERR(
+      req, TRITONSERVER_InferenceRequestNew(
+               &irequest, server_.get(), model_name.c_str(),
+               requested_model_version));
+
+  std::shared_ptr<TRITONSERVER_InferenceRequest> irequest_shared = {
+      irequest, [](TRITONSERVER_InferenceRequest* request) {
+        LOG_TRITONSERVER_ERROR(
+            TRITONSERVER_InferenceRequestDelete(request),
+            "deleting HTTP/REST inference request");
+      }};
+
+  // HTTP request paused when creating inference request. Resume it on exit if
+  // this function returns early due to error. Otherwise resumed in callback.
+  std::unique_ptr<GenerateRequestClass> generate_request;
+  if (streaming) {
+    generate_request.reset(new GenerateRequestClass(
+        server_.get(), req, GetResponseCompressionType(req),
+        generate_stream_request_schema_.get(),
+        generate_stream_response_schema_.get(), streaming, irequest_shared));
+  } else {
+    generate_request.reset(new GenerateRequestClass(
+        server_.get(), req, GetResponseCompressionType(req),
+        generate_request_schema_.get(), generate_response_schema_.get(),
+        streaming, irequest_shared));
+  }
+  generate_request->trace_ = trace;
+
+  const char* request_id = "<id_unknown>";
+  // Callback to cleanup on any errors encountered below. Capture everything
+  // by reference to capture local updates, except for shared pointers which
+  // should be captured by value in case of ref count issues.
+  // The callback does not own the error object.
+  auto error_callback = [&, trace](TRITONSERVER_Error* error) {
+    if (error != nullptr) {
+      // Get request ID for logging in case of error.
+      if (irequest != nullptr) {
+        LOG_TRITONSERVER_ERROR(
+            TRITONSERVER_InferenceRequestId(irequest, &request_id),
+            "unable to retrieve request ID string");
+      }
+      if (!strncmp(request_id, "", 1)) {
+        request_id = "<id_unknown>";
+      }
+
+      LOG_VERBOSE(1) << "[request id: " << request_id << "] "
+                     << "Infer failed: " << TRITONSERVER_ErrorMessage(error);
+      AddContentTypeHeader(req, "application/json");
+      EVBufferAddErrorJson(req->buffer_out, error);
+      evhtp_send_reply(req, HttpCodeFromError(error));
+      evhtp_request_resume(req);
+
+#ifdef TRITON_ENABLE_TRACING
+      // If HTTP server still owns Triton trace
+      if ((trace != nullptr) && (trace->trace_ != nullptr)) {
+        TraceManager::TraceRelease(trace->trace_, trace->trace_userp_);
+      }
+#endif  // TRITON_ENABLE_TRACING
+    }
+  };
+
+  // Option 1: Form tensor-like JSON request and try to re-use HandleInfer
+  //           as much as possible. Probably need to do something like overwrite
+  //           req->buffer_in or create a new evhtp_request to pass and handle.
+  // Option 2: Do inference logic directly here after parsing request.
+  // Note:
+  //   Currently option 2 is selected. It is true that HandleInfer() includes
+  //   handling for features that will be requested for generate endpoints
+  //   (i.e. tracing), however, it is currently tied to infer endpoint logic and
+  //   some decoupling must be done to properly reuse it (for example, response
+  //   callback is tied to infer logic and inflexible for response streaming).
+  //   For the time being, it is less mental burden to support this endpoint
+  //   without early optimization for code reuse.
+  //   Also, there is limitation on Triton JSON library that makes forming
+  //   arbitrary JSON message convoluted (added key is reference to a string and
+  //   thus the string must live as long as the JSON message).
+  triton::common::TritonJson::Value request;
+  RETURN_AND_CALLBACK_IF_ERR(EVRequestToJson(req, &request), error_callback);
+
+  RETURN_AND_CALLBACK_IF_ERR(
+      generate_request->ConvertGenerateRequest(
+          input_metadata, generate_request->RequestSchema(), request),
+      error_callback);
+
+  auto request_release_payload =
+      std::make_unique<RequestReleasePayload>(irequest_shared, nullptr);
+  // [FIXME] decompression..
+  RETURN_AND_CALLBACK_IF_ERR(
+      TRITONSERVER_InferenceRequestSetReleaseCallback(
+          irequest, InferRequestClass::InferRequestComplete,
+          request_release_payload.get()),
+      error_callback);
+  RETURN_AND_CALLBACK_IF_ERR(
+      TRITONSERVER_InferenceRequestSetResponseCallback(
+          irequest, allocator_,
+          reinterpret_cast<void*>(&generate_request->alloc_payload_),
+          GenerateRequestClass::InferResponseComplete,
+          reinterpret_cast<void*>(generate_request.get())),
+      error_callback);
+
+  RETURN_AND_CALLBACK_IF_ERR(
+      TRITONSERVER_ServerInferAsync(server_.get(), irequest, triton_trace),
+      error_callback);
+
+#ifdef TRITON_ENABLE_TRACING
+  // Ownership of trace passed to Triton core, set trace to null to mark it
+  // as no longer owned here.
+  if (trace != nullptr) {
+    trace->trace_ = nullptr;
+  }
+#endif  // TRITON_ENABLE_TRACING
+  generate_request.release();
+  request_release_payload.release();
+}
+
+TRITONSERVER_Error*
+HTTPAPIServer::ModelInputMetadata(
+    const std::string& model_name, const int64_t model_version,
+    std::map<std::string, triton::common::TritonJson::Value>* input_metadata,
+    triton::common::TritonJson::Value* metadata_root)
+{
+  {
+    if (model_name.empty()) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG,
+          "Missing model name in metadata request");
+    }
+
+    TRITONSERVER_Message* message = nullptr;
+    RETURN_IF_ERR(TRITONSERVER_ServerModelMetadata(
+        server_.get(), model_name.c_str(), model_version, &message));
+    const char* buffer;
+    size_t byte_size;
+    TRITONSERVER_Error* err = nullptr;
+    err = TRITONSERVER_MessageSerializeToJson(message, &buffer, &byte_size);
+    if (err == nullptr) {
+      RETURN_IF_ERR(metadata_root->Parse(buffer, byte_size));
+    }
+    if (message) {
+      TRITONSERVER_MessageDelete(message);
+    }
+  }
+
+  // input
+  triton::common::TritonJson::Value inputs;
+  RETURN_IF_ERR(metadata_root->MemberAsArray("inputs", &inputs));
+  for (size_t i = 0; i < inputs.ArraySize(); ++i) {
+    triton::common::TritonJson::Value input;
+    RETURN_IF_ERR(inputs.At(i, &input));
+    std::string name = "";
+    RETURN_IF_ERR(input.MemberAsString("name", &name));
+    (*input_metadata)[name] = std::move(input);
+  }
+
+  return nullptr;  // success
+}
+
+TRITONSERVER_Error*
+HTTPAPIServer::GenerateRequestClass::ConvertGenerateRequest(
+    std::map<std::string, triton::common::TritonJson::Value>& input_metadata,
+    const MappingSchema* schema,
+    triton::common::TritonJson::Value& generate_request)
+{
+  // First find all top-level keys in JSON
+  std::vector<std::string> members;
+  RETURN_IF_ERR(generate_request.Members(&members));
+
+  for (const auto& m : members) {
+    auto it = schema->children_.find(m);
+    if (it != schema->children_.end()) {
+      switch (it->second->kind_) {
+        case MappingSchema::Kind::EXACT_MAPPING: {
+          // Read meta data
+          RETURN_IF_ERR(ExactMappingInput(m, generate_request, input_metadata));
+          break;
+        }
+        case MappingSchema::Kind::MAPPING_SCHEMA: {
+          // The key is nested schema
+          if (input_metadata.find(m) != input_metadata.end()) {
+            return TRITONSERVER_ErrorNew(
+                TRITONSERVER_ERROR_INVALID_ARG,
+                (std::string(
+                     "Keyword '" + m +
+                     "' for nested schema also given as input tensor name")
+                     .c_str()));
+          }
+          triton::common::TritonJson::Value nested_generate_request;
+          RETURN_MSG_IF_ERR(
+              generate_request.MemberAsObject(
+                  m.c_str(), &nested_generate_request),
+              "Expected JSON object for keyword: '" + m + "'");
+          RETURN_MSG_IF_ERR(
+              ConvertGenerateRequest(
+                  input_metadata, it->second.get(), nested_generate_request),
+              "Converting keyword: '" + m + "'");
+          break;
+        }
+        default:
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_UNSUPPORTED, "Unsupported schema kind");
+      }
+    } else if (schema->allow_unspecified_) {
+      // Unspecified key follows EXACT_MAPPING
+      RETURN_IF_ERR(ExactMappingInput(m, generate_request, input_metadata));
+    } else {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_UNSUPPORTED,
+          "The schema disallow unspecified key");
+    }
+  }
+  return nullptr;  // success
+}
+
+TRITONSERVER_Error*
+HTTPAPIServer::GenerateRequestClass::ExactMappingInput(
+    const std::string& name,
+    triton::common::TritonJson::Value& generate_request,
+    std::map<std::string, triton::common::TritonJson::Value>& input_metadata)
+{
+  auto it = input_metadata.find(name);
+  if (it == input_metadata.end()) {
+    RETURN_IF_ERR(SetTritonParameterFromJsonParameter(
+        name, generate_request, triton_request_.get()));
+  } else {
+    // Parse data type and shape
+    std::string value;
+    it->second.MemberAsString("datatype", &value);
+    auto dtype = TRITONSERVER_StringToDataType(value.c_str());
+
+    // Perform shape validation, assume the value must be either
+    // primitive type or 1-D array.
+    triton::common::TritonJson::Value tensor_data;
+    if (!generate_request.Find(name.c_str(), &tensor_data)) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG,
+          (std::string("unexpected key not found in generate request, "
+                       "expecting key '") +
+           name + "'")
+              .c_str());
+    }
+
+    size_t element_cnt = tensor_data.IsArray() ? tensor_data.ArraySize() : 1;
+
+    size_t byte_size = 0;
+    if (dtype == TRITONSERVER_TYPE_BYTES) {
+      RETURN_IF_ERR(JsonBytesArrayByteSize(tensor_data, &byte_size));
+    } else {
+      byte_size = element_cnt * TRITONSERVER_DataTypeByteSize(dtype);
+    }
+
+    std::vector<int64_t> shape_vec;
+    {
+      triton::common::TritonJson::Value value;
+      if (!it->second.Find("shape", &value)) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INTERNAL,
+            (std::string(
+                 "Unexpected 'shape' not found in model metadata for input '") +
+             name)
+                .c_str());
+      }
+      for (size_t i = 0; i < value.ArraySize(); ++i) {
+        int64_t d = 0;
+        RETURN_IF_ERR(value.IndexAsInt(i, &d));
+        shape_vec.push_back(d);
+      }
+      // Because generate request don't carry too much shape information, using
+      // a two-pass process to pad the request value to match input shape.
+      // 1. iterate shape for fixed dimension to distribute 'element_cnt'.
+      // 2. Set most inner dynamic shape to the remaining element count,
+      //    other dynamic shape to be 1.
+      for (auto rit = shape_vec.rbegin(); rit != shape_vec.rend(); ++rit) {
+        if (*rit != -1) {
+          if (element_cnt % *rit) {
+            return TRITONSERVER_ErrorNew(
+                TRITONSERVER_ERROR_INVALID_ARG,
+                (std::string("The schema can not convert input '") + name +
+                 "' to tensor with proper shape")
+                    .c_str());
+          }
+          element_cnt /= *rit;
+        }
+      }
+      for (auto rit = shape_vec.rbegin(); rit != shape_vec.rend(); ++rit) {
+        if (*rit == -1) {
+          *rit = element_cnt;
+          element_cnt = 1;
+        }
+      }
+      if (element_cnt != 1) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG,
+            (std::string("The schema can not convert input '") + name +
+             "' to tensor with proper shape")
+                .c_str());
+      }
+    }
+
+    serialized_data_.emplace_back();
+    std::vector<char>& serialized = serialized_data_.back();
+    serialized.resize(byte_size);
+    RETURN_IF_ERR(ReadDataFromJson(
+        name.c_str(), tensor_data, &serialized[0], dtype,
+        dtype == TRITONSERVER_TYPE_BYTES ? byte_size : element_cnt));
+
+    RETURN_IF_ERR(TRITONSERVER_InferenceRequestAddInput(
+        triton_request_.get(), name.c_str(), dtype, &shape_vec[0],
+        shape_vec.size()));
+    RETURN_IF_ERR(TRITONSERVER_InferenceRequestAppendInputData(
+        triton_request_.get(), name.c_str(), &serialized[0], serialized.size(),
+        TRITONSERVER_MEMORY_CPU, 0 /* memory_type_id */));
+  }
+  return nullptr;  // success
+}
+
+void
+HTTPAPIServer::HandleInfer(
+    evhtp_request_t* req, const std::string& model_name,
+    const std::string& model_version_str)
+{
+  RETURN_AND_RESPOND_IF_RESTRICTED(
+      req, RestrictedCategory::INFERENCE, restricted_apis_);
+
+  if (req->method != htp_method_POST) {
+    RETURN_AND_RESPOND_WITH_ERR(
+        req, EVHTP_RES_METHNALLOWED, "Method Not Allowed");
+  }
+
+  int64_t requested_model_version;
+  RETURN_AND_RESPOND_IF_ERR(
+      req, GetModelVersionFromString(
+               model_version_str.c_str(), &requested_model_version));
+  RETURN_AND_RESPOND_IF_ERR(
+      req, CheckTransactionPolicy(req, model_name, requested_model_version));
+
+  // If tracing is enabled see if this request should be traced.
+  TRITONSERVER_InferenceTrace* triton_trace = nullptr;
+  std::shared_ptr<TraceManager::Trace> trace =
+      StartTrace(req, model_name, &triton_trace);
+
+  // Decompress request body if it is compressed in supported type
+  evbuffer* decompressed_buffer = nullptr;
+  RETURN_AND_RESPOND_IF_ERR(req, DecompressBuffer(req, &decompressed_buffer));
+
+  // Get content length as a default header_length if no header specified
+  int32_t content_length = 0;
+  RETURN_AND_RESPOND_IF_ERR(
+      req, GetContentLength(req, decompressed_buffer, &content_length));
+
+  // Get the header length
+  size_t header_length = 0;
+  RETURN_AND_RESPOND_IF_ERR(
+      req, GetInferenceHeaderLength(req, content_length, &header_length));
+
+  // Create the inference request object which provides all information needed
+  // for an inference. Make sure it is cleaned up on early error.
+  TRITONSERVER_InferenceRequest* irequest = nullptr;
+  RETURN_AND_RESPOND_IF_ERR(
+      req, TRITONSERVER_InferenceRequestNew(
+               &irequest, server_.get(), model_name.c_str(),
+               requested_model_version));
+  std::shared_ptr<TRITONSERVER_InferenceRequest> irequest_shared(
+      irequest, [](TRITONSERVER_InferenceRequest* request) {
+        LOG_TRITONSERVER_ERROR(
+            TRITONSERVER_InferenceRequestDelete(request),
+            "deleting HTTP/REST inference request");
+      });
+  // HTTP request paused when creating inference request. Resume it on exit if
+  // this function returns early due to error. Otherwise resumed in callback.
+  bool connection_paused = true;
+  auto infer_request = CreateInferRequest(req, irequest_shared);
+  infer_request->trace_ = trace;
+
+  const char* request_id = "<id_unknown>";
+  // Callback to cleanup on any errors encountered below. Capture everything
+  // by reference to capture local updates, except for shared pointers which
+  // should be captured by value in case of ref count issues.
+  auto error_callback = [&, trace](TRITONSERVER_Error* error) {
+    if (error != nullptr) {
+      LOG_VERBOSE(1) << "[request id: " << request_id << "] "
+                     << "Infer failed: " << TRITONSERVER_ErrorMessage(error);
+      AddContentTypeHeader(req, "application/json");
+      EVBufferAddErrorJson(req->buffer_out, error);
+      evhtp_send_reply(req, HttpCodeFromError(error));
+      if (connection_paused) {
+        evhtp_request_resume(req);
+      }
+#ifdef TRITON_ENABLE_TRACING
+      // If HTTP server still owns Triton trace
+      if ((trace != nullptr) && (trace->trace_ != nullptr)) {
+        TraceManager::TraceRelease(trace->trace_, trace->trace_userp_);
+      }
+#endif  // TRITON_ENABLE_TRACING
+    }
+  };
+
+  // Parse EV request and fill Triton request fields from it
+  RETURN_AND_CALLBACK_IF_ERR(
+      EVRequestToTritonRequest(
+          req, model_name, irequest, decompressed_buffer, infer_request.get(),
+          header_length),
+      error_callback);
+
+  // Get request ID for logging in case of error.
+  LOG_TRITONSERVER_ERROR(
+      TRITONSERVER_InferenceRequestId(irequest, &request_id),
+      "unable to retrieve request ID string");
+  // Reset id to unknown if empty in core.
+  if (!strncmp(request_id, "", 1)) {
+    request_id = "<id_unknown>";
+  }
+
+  RETURN_AND_CALLBACK_IF_ERR(ForwardHeaders(req, irequest), error_callback);
+
+  auto request_release_payload = std::make_unique<RequestReleasePayload>(
+      irequest_shared, decompressed_buffer);
+  RETURN_AND_CALLBACK_IF_ERR(
+      TRITONSERVER_InferenceRequestSetReleaseCallback(
+          irequest, InferRequestClass::InferRequestComplete,
+          request_release_payload.get()),
+      error_callback);
+  RETURN_AND_CALLBACK_IF_ERR(
+      TRITONSERVER_InferenceRequestSetResponseCallback(
+          irequest, allocator_,
+          reinterpret_cast<void*>(&infer_request->alloc_payload_),
+          InferRequestClass::InferResponseComplete,
+          reinterpret_cast<void*>(infer_request.get())),
+      error_callback);
+
+  auto err =
+      TRITONSERVER_ServerInferAsync(server_.get(), irequest, triton_trace);
+#ifdef TRITON_ENABLE_TRACING
+  // Ownership of trace passed to Triton core, set trace to null to mark it
+  // as no longer owned here.
+  if (trace != nullptr) {
+    trace->trace_ = nullptr;
+  }
+#endif  // TRITON_ENABLE_TRACING
+
+  RETURN_AND_CALLBACK_IF_ERR(err, error_callback);
+  infer_request.release();
+  request_release_payload.release();
+}
+
+void
+HTTPAPIServer::InferRequestClass::ReplyCallback(
+    evthr_t* thr, void* arg, void* shared)
+{
+  HTTPAPIServer::InferRequestClass* infer_request =
+      reinterpret_cast<HTTPAPIServer::InferRequestClass*>(arg);
+
+  evhtp_request_t* request = infer_request->EvHtpRequest();
+
+  if (request != nullptr) {
+    evhtp_send_reply(request, infer_request->response_code_);
+    evhtp_request_resume(request);
+  }
+
+#ifdef TRITON_ENABLE_TRACING
+  if (infer_request->trace_ != nullptr) {
+    infer_request->trace_->CaptureTimestamp(
+        "HTTP_SEND_START", request->send_start_ns);
+    infer_request->trace_->CaptureTimestamp(
+        "HTTP_SEND_END", request->send_end_ns);
+  }
+#endif  // TRITON_ENABLE_TRACING
+
+  delete infer_request;
+}
+
+evhtp_res
+HTTPAPIServer::InferRequestClass::RequestFiniHook(
+    evhtp_request* request, void* arg)
+{
+  HTTPAPIServer::InferRequestClass* infer_request =
+      reinterpret_cast<HTTPAPIServer::InferRequestClass*>(arg);
+  if (infer_request->req_ != request) {
+    LOG_ERROR << "[INTERNAL] mismatched request in fini hook";
+    return EVHTP_RES_ERROR;
+  } else {
+    LOG_TRITONSERVER_ERROR(
+        TRITONSERVER_InferenceRequestCancel(
+            infer_request->triton_request_.get()),
+        "cancelling request");
+    infer_request->req_ = nullptr;
+  }
+  return EVHTP_RES_OK;
+}
+
+HTTPAPIServer::InferRequestClass::InferRequestClass(
+    TRITONSERVER_Server* server, evhtp_request_t* req,
+    DataCompressor::Type response_compression_type,
+    const std::shared_ptr<TRITONSERVER_InferenceRequest>& triton_request)
+    : server_(server), req_(req),
+      response_compression_type_(response_compression_type), response_count_(0),
+      triton_request_(triton_request)
+{
+  evhtp_connection_t* htpconn = evhtp_request_get_connection(req);
+  thread_ = htpconn->thread;
+  evhtp_request_pause(req);
+  evhtp_request_set_hook(
+      req_, evhtp_hook_on_request_fini, (evhtp_hook)(void*)RequestFiniHook,
+      reinterpret_cast<void*>(this));
+}
+
+void
+HTTPAPIServer::InferRequestClass::InferRequestComplete(
+    TRITONSERVER_InferenceRequest* request, const uint32_t flags, void* userp)
+{
+  // FIXME need to manage the lifetime of InferRequestClass so that we
+  // delete it here.
+
+  RequestReleasePayload* request_release_payload =
+      reinterpret_cast<RequestReleasePayload*>(userp);
+
+  if ((flags & TRITONSERVER_REQUEST_RELEASE_ALL) != 0) {
+    delete request_release_payload;
+  }
+}
+
+void
+HTTPAPIServer::InferRequestClass::InferResponseComplete(
+    TRITONSERVER_InferenceResponse* response, const uint32_t flags, void* userp)
+{
+  // FIXME can't use InferRequestClass object here since it's lifetime
+  // is different than response. For response we need to know how to
+  // send each output (as json, shm, or binary) and that information
+  // has to be maintained in a way that allows us to clean it up
+  // appropriately if connection closed or last response sent.
+  //
+  // But for now userp is the InferRequestClass object and the end of
+  // its life is in the ReplyCallback.
+
+  HTTPAPIServer::InferRequestClass* infer_request =
+      reinterpret_cast<HTTPAPIServer::InferRequestClass*>(userp);
+
+  if (response != nullptr) {
+    ++infer_request->response_count_;
+  }
+
+  TRITONSERVER_Error* err = nullptr;
+  if (infer_request->response_count_ != 1) {
+    err = TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INTERNAL,
+        std::string(
+            "expected a single response, got " +
+            std::to_string(infer_request->response_count_))
+            .c_str());
+  } else if (response != nullptr) {
+    err = infer_request->FinalizeResponse(response);
+#ifdef TRITON_ENABLE_TRACING
+    if (infer_request->trace_ != nullptr) {
+      infer_request->trace_->CaptureTimestamp(
+          "INFER_RESPONSE_COMPLETE", TraceManager::CaptureTimestamp());
+    }
+#endif  // TRITON_ENABLE_TRACING
+  }
+
+
+  LOG_TRITONSERVER_ERROR(
+      TRITONSERVER_InferenceResponseDelete(response),
+      "deleting inference response");
+
+  if (err != nullptr) {
+    EVBufferAddErrorJson(infer_request->req_->buffer_out, err);
+    infer_request->response_code_ = HttpCodeFromError(err);
+    TRITONSERVER_ErrorDelete(err);
+  }
+
+  // Defer sending the response until FINAL flag is seen
+  if ((flags & TRITONSERVER_RESPONSE_COMPLETE_FINAL) == 0) {
+    return;
+  }
+  evthr_defer(
+      infer_request->thread_, InferRequestClass::ReplyCallback, infer_request);
+}
+
+TRITONSERVER_Error*
+HTTPAPIServer::InferRequestClass::FinalizeResponse(
+    TRITONSERVER_InferenceResponse* response)
+{
+  RETURN_IF_ERR(TRITONSERVER_InferenceResponseError(response));
+
+  triton::common::TritonJson::Value response_json(
+      triton::common::TritonJson::ValueType::OBJECT);
+
+  const char* request_id = "";
+  RETURN_IF_ERR(TRITONSERVER_InferenceResponseId(response, &request_id));
+  if (strncmp(request_id, "", 1)) {
+    RETURN_IF_ERR(response_json.AddStringRef("id", request_id));
+  }
+
+  const char* model_name;
+  int64_t model_version;
+  RETURN_IF_ERR(TRITONSERVER_InferenceResponseModel(
+      response, &model_name, &model_version));
+  RETURN_IF_ERR(response_json.AddStringRef("model_name", model_name));
+  RETURN_IF_ERR(response_json.AddString(
+      "model_version", std::move(std::to_string(model_version))));
+
+  // If the response has any parameters, convert them to JSON.
+  uint32_t parameter_count;
+  RETURN_IF_ERR(
+      TRITONSERVER_InferenceResponseParameterCount(response, &parameter_count));
+  if (parameter_count > 0) {
+    triton::common::TritonJson::Value params_json(
+        response_json, triton::common::TritonJson::ValueType::OBJECT);
+
+    for (uint32_t pidx = 0; pidx < parameter_count; ++pidx) {
+      const char* name;
+      TRITONSERVER_ParameterType type;
+      const void* vvalue;
+      RETURN_IF_ERR(TRITONSERVER_InferenceResponseParameter(
+          response, pidx, &name, &type, &vvalue));
+      switch (type) {
+        case TRITONSERVER_PARAMETER_BOOL:
+          RETURN_IF_ERR(params_json.AddBool(
+              name, *(reinterpret_cast<const bool*>(vvalue))));
+          break;
+        case TRITONSERVER_PARAMETER_INT:
+          RETURN_IF_ERR(params_json.AddInt(
+              name, *(reinterpret_cast<const int64_t*>(vvalue))));
+          break;
+        case TRITONSERVER_PARAMETER_STRING:
+          RETURN_IF_ERR(params_json.AddStringRef(
+              name, reinterpret_cast<const char*>(vvalue)));
+          break;
+        case TRITONSERVER_PARAMETER_DOUBLE:
+          RETURN_IF_ERR(params_json.AddDouble(
+              name, *(reinterpret_cast<const double*>(vvalue))));
+          break;
+        case TRITONSERVER_PARAMETER_BYTES:
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_UNSUPPORTED,
+              "Response parameter of type 'TRITONSERVER_PARAMETER_BYTES' is "
+              "not currently supported");
+          break;
+      }
+    }
+
+    RETURN_IF_ERR(response_json.Add("parameters", std::move(params_json)));
+  }
+
+  // Go through each response output and transfer information to JSON
+  uint32_t output_count;
+  RETURN_IF_ERR(
+      TRITONSERVER_InferenceResponseOutputCount(response, &output_count));
+
+  std::vector<evbuffer*> ordered_buffers;
+  ordered_buffers.reserve(output_count);
+
+  triton::common::TritonJson::Value response_outputs(
+      response_json, triton::common::TritonJson::ValueType::ARRAY);
+
+  for (uint32_t idx = 0; idx < output_count; ++idx) {
+    const char* cname;
+    TRITONSERVER_DataType datatype;
+    const int64_t* shape;
+    uint64_t dim_count;
+    const void* base;
+    size_t byte_size;
+    TRITONSERVER_MemoryType memory_type;
+    int64_t memory_type_id;
+    void* userp;
+
+    RETURN_IF_ERR(TRITONSERVER_InferenceResponseOutput(
+        response, idx, &cname, &datatype, &shape, &dim_count, &base, &byte_size,
+        &memory_type, &memory_type_id, &userp));
+
+    triton::common::TritonJson::Value output_json(
+        response_json, triton::common::TritonJson::ValueType::OBJECT);
+    RETURN_IF_ERR(output_json.AddStringRef("name", cname));
+
+    // Handle data. SHM outputs will not have an info.
+    auto info = reinterpret_cast<AllocPayload::OutputInfo*>(userp);
+
+    size_t element_count = 1;
+    uint32_t batch_size = 0;
+
+    // If returning output as classification then need to set the
+    // datatype and shape based on classification requirements.
+    if ((info != nullptr) && (info->class_cnt_ > 0)) {
+      // For classification need to determine the batch size, if any,
+      // because need to use that to break up the response for each
+      // batch entry.
+      uint32_t batch_flags;
+      RETURN_IF_ERR(TRITONSERVER_ServerModelBatchProperties(
+          server_, model_name, model_version, &batch_flags,
+          nullptr /* voidp */));
+      if ((dim_count > 0) &&
+          ((batch_flags & TRITONSERVER_BATCH_FIRST_DIM) != 0)) {
+        batch_size = shape[0];
+      }
+
+      // Determine the batch1 byte size of the output tensor... needed
+      // when the response tensor batch-size > 1 so that we know how
+      // to stride though the tensor data.
+      size_t batch1_element_count = 1;
+      for (size_t sidx = ((batch_size == 0) ? 0 : 1); sidx < dim_count;
+           sidx++) {
+        batch1_element_count *= shape[sidx];
+      }
+
+      const size_t batch1_byte_size =
+          batch1_element_count * TRITONSERVER_DataTypeByteSize(datatype);
+
+      // Create the classification contents
+      std::string serialized;
+
+      size_t class_offset = 0;
+      for (uint32_t bs = 0; bs < std::max((uint32_t)1, batch_size); ++bs) {
+        std::vector<std::string> class_strs;
+        RETURN_IF_ERR(TopkClassifications(
+            response, idx, reinterpret_cast<const char*>(base) + class_offset,
+            ((class_offset + batch1_byte_size) > byte_size) ? 0
+                                                            : batch1_byte_size,
+            datatype, info->class_cnt_, &class_strs));
+
+        // Serialize for binary representation...
+        for (const auto& str : class_strs) {
+          uint32_t len = str.size();
+          serialized.append(reinterpret_cast<const char*>(&len), sizeof(len));
+          if (len > 0) {
+            serialized.append(str);
+          }
+        }
+
+        class_offset += batch1_byte_size;
+      }
+
+      // Replace existing output with serialized classification output.
+      const char* datatype_str =
+          TRITONSERVER_DataTypeString(TRITONSERVER_TYPE_BYTES);
+      RETURN_IF_ERR(output_json.AddStringRef("datatype", datatype_str));
+
+      triton::common::TritonJson::Value shape_json(
+          response_json, triton::common::TritonJson::ValueType::ARRAY);
+      if (batch_size > 0) {
+        RETURN_IF_ERR(shape_json.AppendUInt(batch_size));
+        element_count *= batch_size;
+      }
+      size_t actual_class_count =
+          std::min((size_t)info->class_cnt_, batch1_element_count);
+      element_count *= actual_class_count;
+      RETURN_IF_ERR(shape_json.AppendUInt(actual_class_count));
+      RETURN_IF_ERR(output_json.Add("shape", std::move(shape_json)));
+
+      evbuffer_free(info->evbuffer_);
+      info->evbuffer_ = nullptr;
+
+      void* buffer;
+      byte_size = serialized.size();
+      RETURN_IF_ERR(AllocEVBuffer(byte_size, &info->evbuffer_, &buffer));
+      memcpy(buffer, serialized.c_str(), byte_size);
+      base = reinterpret_cast<const void*>(buffer);
+      datatype = TRITONSERVER_TYPE_BYTES;
+    } else {
+      const char* datatype_str = TRITONSERVER_DataTypeString(datatype);
+      RETURN_IF_ERR(output_json.AddStringRef("datatype", datatype_str));
+
+      triton::common::TritonJson::Value shape_json(
+          response_json, triton::common::TritonJson::ValueType::ARRAY);
+      for (size_t j = 0; j < dim_count; j++) {
+        RETURN_IF_ERR(shape_json.AppendUInt(shape[j]));
+        element_count *= shape[j];
+      }
+
+      RETURN_IF_ERR(output_json.Add("shape", std::move(shape_json)));
+    }
+
+    // Add JSON data, or collect binary data.
+    if (info->kind_ == AllocPayload::OutputInfo::BINARY) {
+      triton::common::TritonJson::Value parameters_json;
+      if (!output_json.Find("parameters", &parameters_json)) {
+        parameters_json = triton::common::TritonJson::Value(
+            response_json, triton::common::TritonJson::ValueType::OBJECT);
+        RETURN_IF_ERR(parameters_json.AddUInt("binary_data_size", byte_size));
+        RETURN_IF_ERR(
+            output_json.Add("parameters", std::move(parameters_json)));
+      } else {
+        RETURN_IF_ERR(parameters_json.AddUInt("binary_data_size", byte_size));
+      }
+      if (byte_size > 0) {
+        ordered_buffers.push_back(info->evbuffer_);
+      }
+    } else if (info->kind_ == AllocPayload::OutputInfo::JSON) {
+      triton::common::TritonJson::Value data_json(
+          response_json, triton::common::TritonJson::ValueType::ARRAY);
+      RETURN_IF_ERR(WriteDataToJson(
+          &data_json, cname, datatype, base, byte_size, element_count));
+      RETURN_IF_ERR(output_json.Add("data", std::move(data_json)));
+    }
+
+    RETURN_IF_ERR(response_outputs.Append(std::move(output_json)));
+  }
+
+  RETURN_IF_ERR(response_json.Add("outputs", std::move(response_outputs)));
+
+  evbuffer* response_placeholder = evbuffer_new();
+  // Write json metadata into response evbuffer
+  triton::common::TritonJson::WriteBuffer buffer;
+  RETURN_IF_ERR(response_json.Write(&buffer));
+  evbuffer_add(response_placeholder, buffer.Base(), buffer.Size());
+
+  // If there is binary data write it next in the appropriate
+  // order... also need the HTTP header when returning binary data.
+  if (!ordered_buffers.empty()) {
+    for (evbuffer* b : ordered_buffers) {
+      evbuffer_add_buffer(response_placeholder, b);
+    }
+  }
+
+  evbuffer* response_body = response_placeholder;
+  switch (response_compression_type_) {
+    case DataCompressor::Type::DEFLATE:
+    case DataCompressor::Type::GZIP: {
+      auto compressed_buffer = evbuffer_new();
+      auto err = DataCompressor::CompressData(
+          response_compression_type_, response_placeholder, compressed_buffer);
+      if (err == nullptr) {
+        response_body = compressed_buffer;
+        evbuffer_free(response_placeholder);
+      } else {
+        // just log the compression error and return the uncompressed data
+        LOG_VERBOSE(1) << "unable to compress response: "
+                       << TRITONSERVER_ErrorMessage(err);
+        TRITONSERVER_ErrorDelete(err);
+        evbuffer_free(compressed_buffer);
+        response_compression_type_ = DataCompressor::Type::IDENTITY;
+      }
+      break;
+    }
+    case DataCompressor::Type::IDENTITY:
+    case DataCompressor::Type::UNKNOWN:
+      // Do nothing for other cases
+      break;
+  }
+  SetResponseHeader(!ordered_buffers.empty(), buffer.Size());
+  evbuffer_add_buffer(req_->buffer_out, response_body);
+  // Destroy the evbuffer object as the data has been moved
+  // to HTTP response buffer
+  evbuffer_free(response_body);
+
+  return nullptr;  // success
+}
+
+void
+HTTPAPIServer::InferRequestClass::SetResponseHeader(
+    bool has_binary_data, size_t header_length)
+{
+  if (has_binary_data) {
+    AddContentTypeHeader(req_, "application/octet-stream");
+    evhtp_headers_add_header(
+        req_->headers_out, evhtp_header_new(
+                               kInferHeaderContentLengthHTTPHeader,
+                               std::to_string(header_length).c_str(), 1, 1));
+  } else {
+    AddContentTypeHeader(req_, "application/json");
+  }
+
+  switch (response_compression_type_) {
+    case DataCompressor::Type::DEFLATE:
+      evhtp_headers_add_header(
+          req_->headers_out,
+          evhtp_header_new(kContentEncodingHTTPHeader, "deflate", 1, 1));
+      break;
+    case DataCompressor::Type::GZIP:
+      evhtp_headers_add_header(
+          req_->headers_out,
+          evhtp_header_new(kContentEncodingHTTPHeader, "gzip", 1, 1));
+      break;
+    case DataCompressor::Type::IDENTITY:
+    case DataCompressor::Type::UNKNOWN:
+      break;
+  }
+}
+
+uint32_t
+HTTPAPIServer::InferRequestClass::IncrementResponseCount()
+{
+  return response_count_++;
+}
+
+HTTPAPIServer::GenerateRequestClass::~GenerateRequestClass()
+{
+  while (!pending_http_responses_.empty()) {
+    evbuffer_free(pending_http_responses_.front());
+    pending_http_responses_.pop();
+  }
+}
+
+void
+HTTPAPIServer::GenerateRequestClass::InferResponseComplete(
+    TRITONSERVER_InferenceResponse* response, const uint32_t flags, void* userp)
+{
+  // FIXME can't use InferRequestClass object here since it's lifetime
+  // is different than response. For response we need to know how to
+  // send each output (as json, shm, or binary) and that information
+  // has to be maintained in a way that allows us to clean it up
+  // appropriately if connection closed or last response sent.
+  //
+  // But for now userp is the InferRequestClass object and the end of
+  // its life is in the ReplyCallback.
+
+  auto infer_request =
+      reinterpret_cast<HTTPAPIServer::GenerateRequestClass*>(userp);
+
+  // Assuming responses of the same request is sent in sequence.
+
+  TRITONSERVER_Error* err = nullptr;
+  if (response != nullptr) {
+    err = infer_request->FinalizeResponse(response);
+  }
+  if (err != nullptr) {
+    infer_request->AddErrorJson(err);
+  }
+
+
+  // First response starts the chunked response, the response code is set here
+  // so user should check response body in case of error at later time.
+  if (infer_request->IncrementResponseCount() == 0) {
+    infer_request->response_code_ = HttpCodeFromError(err);
+    evthr_defer(infer_request->thread_, StartResponse, infer_request);
+  }
+
+#ifdef TRITON_ENABLE_TRACING
+  if (infer_request->trace_ != nullptr) {
+    infer_request->trace_->CaptureTimestamp(
+        "INFER_RESPONSE_COMPLETE", TraceManager::CaptureTimestamp());
+  }
+#endif  // TRITON_ENABLE_TRACING
+
+  // Final flag indicates there is no more responses, ending chunked response.
+  if ((flags & TRITONSERVER_RESPONSE_COMPLETE_FINAL) != 0) {
+    evthr_defer(infer_request->thread_, EndResponseCallback, infer_request);
+  } else {
+    evthr_defer(infer_request->thread_, ChunkResponseCallback, infer_request);
+  }
+
+  LOG_TRITONSERVER_ERROR(
+      TRITONSERVER_InferenceResponseDelete(response),
+      "deleting inference response");
+}
+
+void
+HTTPAPIServer::GenerateRequestClass::StartResponse(
+    evthr_t* thr, void* arg, void* shared)
+{
+  auto infer_request =
+      reinterpret_cast<HTTPAPIServer::GenerateRequestClass*>(arg);
+  auto req = infer_request->EvHtpRequest();
+
+  if (req == nullptr) {
+    return;
+  }
+
+  if (infer_request->streaming_) {
+    AddContentTypeHeader(req, "text/event-stream; charset=utf-8");
+  } else {
+    AddContentTypeHeader(req, "application/json");
+  }
+  evhtp_send_reply_chunk_start(req, infer_request->response_code_);
+  evhtp_request_resume(req);
+}
+
+void
+HTTPAPIServer::GenerateRequestClass::ChunkResponseCallback(
+    evthr_t* thr, void* arg, void* shared)
+{
+  auto infer_request =
+      reinterpret_cast<HTTPAPIServer::GenerateRequestClass*>(arg);
+
+  if (infer_request->req_ == nullptr) {
+    return;
+  }
+
+  infer_request->SendChunkResponse(false /* end */);
+}
+
+void
+HTTPAPIServer::GenerateRequestClass::EndResponseCallback(
+    evthr_t* thr, void* arg, void* shared)
+{
+  auto infer_request =
+      reinterpret_cast<HTTPAPIServer::GenerateRequestClass*>(arg);
+
+  if (infer_request->EvHtpRequest() != nullptr) {
+    infer_request->SendChunkResponse(true /* end */);
+    evhtp_send_reply_chunk_end(infer_request->EvHtpRequest());
+  }
+
+  delete infer_request;
+}
+
+void
+HTTPAPIServer::GenerateRequestClass::SendChunkResponse(bool end)
+{
+  // check if response count in the case of non-streaming
+  if (!streaming_) {
+    std::lock_guard<std::mutex> lk(res_mtx_);
+    // For non-streaming, wait until end
+    if (!end) {
+      return;
+    }
+    if (pending_http_responses_.size() != 1) {
+      EVBufferAddErrorJson(
+          req_->buffer_out, TRITONSERVER_ErrorNew(
+                                TRITONSERVER_ERROR_INTERNAL,
+                                "generate expects model to produce exactly 1 "
+                                "response, use generate stream for model that "
+                                "generates various number of responses"));
+      evhtp_send_reply_chunk(req_, req_->buffer_out);
+      return;
+    }
+  }
+
+  evbuffer* buffer = nullptr;
+  {
+    std::lock_guard<std::mutex> lk(res_mtx_);
+    // This function may be called with no pending responses when
+    // response complete callback is invoked with flag-only
+    if (pending_http_responses_.empty()) {
+      return;
+    }
+    buffer = pending_http_responses_.front();
+    pending_http_responses_.pop();
+  }
+  evhtp_send_reply_chunk(req_, buffer);
+  evbuffer_free(buffer);
+
+#ifdef TRITON_ENABLE_TRACING
+  if (trace_ != nullptr) {
+    // [FIXME] currently send_start_ns / send_end_ns is
+    // not captured in evhtp when response is sent in chunks
+    trace_->CaptureTimestamp("HTTP_SEND_START", req_->send_start_ns);
+    trace_->CaptureTimestamp("HTTP_SEND_END", req_->send_end_ns);
+  }
+#endif  // TRITON_ENABLE_TRACING
+}
+
+TRITONSERVER_Error*
+HTTPAPIServer::GenerateRequestClass::FinalizeResponse(
+    TRITONSERVER_InferenceResponse* response)
+{
+  triton_response_ = response;
+  RETURN_IF_ERR(TRITONSERVER_InferenceResponseError(response));
+
+  triton::common::TritonJson::Value response_json(
+      triton::common::TritonJson::ValueType::OBJECT);
+
+  // Response metadata in addition to output tensor / parameter falls under
+  // "unspecified field" with predefined name:
+  // "id", "model_name", "model_version"
+  std::map<std::string, TritonOutput> triton_outputs;
+  const char* id = "";
+  RETURN_IF_ERR(TRITONSERVER_InferenceResponseId(response, &id));
+  if (strncmp(id, "", 1)) {
+    triton_outputs.emplace(
+        "id", TritonOutput(TritonOutput::Type::RESERVED, id));
+  }
+  const char* model_name;
+  int64_t model_version;
+  RETURN_IF_ERR(TRITONSERVER_InferenceResponseModel(
+      response, &model_name, &model_version));
+  triton_outputs.emplace(
+      "model_name", TritonOutput(TritonOutput::Type::RESERVED, model_name));
+  triton_outputs.emplace(
+      "model_version",
+      TritonOutput(
+          TritonOutput::Type::RESERVED, std::to_string(model_version)));
+
+  // If the response has any parameters, convert them to JSON.
+  uint32_t parameter_count;
+  RETURN_IF_ERR(
+      TRITONSERVER_InferenceResponseParameterCount(response, &parameter_count));
+  if (parameter_count > 0) {
+    for (uint32_t pidx = 0; pidx < parameter_count; ++pidx) {
+      const char* name;
+      TRITONSERVER_ParameterType type;
+      const void* vvalue;
+      RETURN_IF_ERR(TRITONSERVER_InferenceResponseParameter(
+          response, pidx, &name, &type, &vvalue));
+      switch (type) {
+        case TRITONSERVER_PARAMETER_BOOL:
+        case TRITONSERVER_PARAMETER_INT:
+        case TRITONSERVER_PARAMETER_STRING:
+        case TRITONSERVER_PARAMETER_DOUBLE:
+          triton_outputs.emplace(
+              name, TritonOutput(TritonOutput::Type::PARAMETER, pidx));
+          break;
+        case TRITONSERVER_PARAMETER_BYTES:
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_UNSUPPORTED,
+              (std::string("Response parameter '") + name +
+               "' has type 'TRITONSERVER_PARAMETER_BYTES' which is "
+               "not currently supported")
+                  .c_str());
+          break;
+      }
+    }
+  }
+
+  // Go through each response output and transfer information to JSON
+  uint32_t output_count;
+  RETURN_IF_ERR(
+      TRITONSERVER_InferenceResponseOutputCount(response, &output_count));
+
+  for (uint32_t idx = 0; idx < output_count; ++idx) {
+    const char* cname;
+    TRITONSERVER_DataType datatype;
+    const int64_t* shape;
+    uint64_t dim_count;
+    const void* base;
+    size_t byte_size;
+    TRITONSERVER_MemoryType memory_type;
+    int64_t memory_type_id;
+    void* userp;
+
+    RETURN_IF_ERR(TRITONSERVER_InferenceResponseOutput(
+        response, idx, &cname, &datatype, &shape, &dim_count, &base, &byte_size,
+        &memory_type, &memory_type_id, &userp));
+    triton_outputs.emplace(
+        cname, TritonOutput(TritonOutput::Type::TENSOR, idx));
+  }
+
+  std::set<std::string> mapped_outputs;
+  RETURN_IF_ERR(ConvertGenerateResponse(
+      triton_outputs, response_schema_, &response_json, &mapped_outputs));
+  if (response_schema_->allow_unspecified_) {
+    for (const auto& to : triton_outputs) {
+      if (mapped_outputs.find(to.first) == mapped_outputs.end()) {
+        RETURN_IF_ERR(ExactMappingOutput(
+            to.first, to.second, &response_json, &mapped_outputs));
+      }
+    }
+  }
+
+  // [FIXME] compression
+  evbuffer* response_body = evbuffer_new();
+  if (streaming_) {
+    static std::string sse_prefix = "data: ";
+    evbuffer_add(response_body, sse_prefix.c_str(), sse_prefix.length());
+  }
+  // Write json metadata into response evbuffer
+  triton::common::TritonJson::WriteBuffer buffer;
+  RETURN_IF_ERR(response_json.Write(&buffer));
+  evbuffer_add(response_body, buffer.Base(), buffer.Size());
+  if (streaming_) {
+    static std::string sse_suffix = "\n\n";
+    evbuffer_add(response_body, sse_suffix.c_str(), sse_suffix.length());
+  }
+
+  {
+    std::lock_guard<std::mutex> lk(res_mtx_);
+    pending_http_responses_.emplace(response_body);
+  }
+
+  return nullptr;  // success
+}
+
+void
+HTTPAPIServer::GenerateRequestClass::AddErrorJson(TRITONSERVER_Error* error)
+{
+  evbuffer* buffer = evbuffer_new();
+  if (streaming_) {
+    static std::string sse_prefix = "data: ";
+    evbuffer_add(buffer, sse_prefix.c_str(), sse_prefix.length());
+  }
+  EVBufferAddErrorJson(buffer, error);
+  if (streaming_) {
+    static std::string sse_suffix = "\n\n";
+    evbuffer_add(buffer, sse_suffix.c_str(), sse_suffix.length());
+  }
+  TRITONSERVER_ErrorDelete(error);
+  {
+    std::lock_guard<std::mutex> lk(res_mtx_);
+    pending_http_responses_.emplace(buffer);
+  }
+}
+
+TRITONSERVER_Error*
+HTTPAPIServer::GenerateRequestClass::ConvertGenerateResponse(
+    const std::map<
+        std::string, HTTPAPIServer::GenerateRequestClass::TritonOutput>&
+        output_metadata,
+    const MappingSchema* schema,
+    triton::common::TritonJson::Value* generate_response,
+    std::set<std::string>* mapped_outputs)
+{
+  for (auto& nested : schema->children_) {
+    switch (nested.second->kind_) {
+      case MappingSchema::Kind::MAPPING_SCHEMA: {
+        triton::common::TritonJson::Value nested_response(
+            *generate_response, triton::common::TritonJson::ValueType::OBJECT);
+        RETURN_IF_ERR(ConvertGenerateResponse(
+            output_metadata, nested.second.get(), &nested_response,
+            mapped_outputs));
+        RETURN_IF_ERR(generate_response->Add(
+            nested.first.c_str(), std::move(nested_response)));
+        break;
+      }
+      case MappingSchema::Kind::EXACT_MAPPING: {
+        auto it = output_metadata.find(nested.first);
+        if (it == output_metadata.end()) {
+          if (!nested.second->allow_unspecified_) {
+            return TRITONSERVER_ErrorNew(
+                TRITONSERVER_ERROR_INTERNAL,
+                (std::string("Schema requires output '") + nested.first +
+                 "' to be produced by the model.")
+                    .c_str());
+          }
+        } else {
+          RETURN_IF_ERR(ExactMappingOutput(
+              nested.first, it->second, generate_response, mapped_outputs));
+        }
+        break;
+      }
+      default:
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_UNSUPPORTED, "Unsupported schema kind");
+    }
+  }
+  return nullptr;  // success
+}
+
+TRITONSERVER_Error*
+HTTPAPIServer::GenerateRequestClass::ExactMappingOutput(
+    const std::string& name,
+    const HTTPAPIServer::GenerateRequestClass::TritonOutput& triton_output,
+    triton::common::TritonJson::Value* generate_response,
+    std::set<std::string>* mapped_outputs)
+{
+  mapped_outputs->emplace(name);
+
+  switch (triton_output.type) {
+    case TritonOutput::Type::RESERVED: {
+      generate_response->AddStringRef(
+          name.c_str(), triton_output.value.c_str());
+      break;
+    }
+    case TritonOutput::Type::PARAMETER: {
+      const char* name;
+      TRITONSERVER_ParameterType type;
+      const void* vvalue;
+      RETURN_IF_ERR(TRITONSERVER_InferenceResponseParameter(
+          triton_response_, triton_output.index, &name, &type, &vvalue));
+      switch (type) {
+        case TRITONSERVER_PARAMETER_BOOL:
+          RETURN_IF_ERR(generate_response->AddBool(
+              name, *(reinterpret_cast<const bool*>(vvalue))));
+          break;
+        case TRITONSERVER_PARAMETER_INT:
+          RETURN_IF_ERR(generate_response->AddInt(
+              name, *(reinterpret_cast<const int64_t*>(vvalue))));
+          break;
+        case TRITONSERVER_PARAMETER_STRING:
+          RETURN_IF_ERR(generate_response->AddStringRef(
+              name, reinterpret_cast<const char*>(vvalue)));
+          break;
+        case TRITONSERVER_PARAMETER_DOUBLE:
+          RETURN_IF_ERR(generate_response->AddDouble(
+              name, *(reinterpret_cast<const double*>(vvalue))));
+          break;
+        case TRITONSERVER_PARAMETER_BYTES:
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_UNSUPPORTED,
+              (std::string("Response parameter '") + name +
+               "' has type 'TRITONSERVER_PARAMETER_BYTES' which is "
+               "not currently supported")
+                  .c_str());
+          break;
+      }
+      break;
+    }
+    case TritonOutput::Type::TENSOR: {
+      const char* cname;
+      TRITONSERVER_DataType datatype;
+      const int64_t* shape;
+      uint64_t dim_count;
+      const void* base;
+      size_t byte_size;
+      TRITONSERVER_MemoryType memory_type;
+      int64_t memory_type_id;
+      void* userp;
+
+      RETURN_IF_ERR(TRITONSERVER_InferenceResponseOutput(
+          triton_response_, triton_output.index, &cname, &datatype, &shape,
+          &dim_count, &base, &byte_size, &memory_type, &memory_type_id,
+          &userp));
+
+      auto info = reinterpret_cast<AllocPayload::OutputInfo*>(userp);
+      // sanity check
+      if (info->kind_ != AllocPayload::OutputInfo::JSON) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INTERNAL,
+            (std::string("non-JSON output response type is requested for '") +
+             cname + "'")
+                .c_str());
+      }
+
+      size_t element_count = 1;
+      for (size_t j = 0; j < dim_count; j++) {
+        element_count *= shape[j];
+      }
+
+      triton::common::TritonJson::Value data_json(
+          *generate_response, triton::common::TritonJson::ValueType::ARRAY);
+      RETURN_IF_ERR(WriteDataToJson(
+          &data_json, cname, datatype, base, byte_size, element_count));
+      if (element_count == 1) {
+        // if only 1 element, strip out the array
+        triton::common::TritonJson::Value el;
+        RETURN_IF_ERR(data_json.At(0, &el));
+        RETURN_IF_ERR(generate_response->Add(cname, std::move(el)));
+      } else {
+        RETURN_IF_ERR(generate_response->Add(cname, std::move(data_json)));
+      }
+      break;
+    }
+  }
+  return nullptr;  // success
+}
+
+void
+HTTPAPIServer::Handle(evhtp_request_t* req)
+{
+  LOG_VERBOSE(1) << "HTTP request: " << req->method << " "
+                 << req->uri->path->full;
+
+  if (std::string(req->uri->path->full) == "/v2/models/stats") {
+    // model statistics
+    HandleModelStats(req);
+    return;
+  }
+  if (std::string(req->uri->path->full) == "/v2/logging") {
+    // change logging
+    HandleLogging(req);
+    return;
+  }
+  std::string model_name, version, kind;
+  if (RE2::FullMatch(
+          std::string(req->uri->path->full), model_regex_, &model_name,
+          &version, &kind)) {
+    if (kind == "ready") {
+      // model ready
+      HandleModelReady(req, model_name, version);
+      return;
+    } else if (kind == "infer") {
+      // model infer
+      HandleInfer(req, model_name, version);
+      return;
+    } else if (kind == "generate") {
+      // text generation
+      HandleGenerate(req, model_name, version, false /* streaming */);
+      return;
+    } else if (kind == "generate_stream") {
+      // text generation (streaming)
+      HandleGenerate(req, model_name, version, true /* streaming */);
+      return;
+    } else if (kind == "config") {
+      // model configuration
+      HandleModelConfig(req, model_name, version);
+      return;
+    } else if (kind == "stats") {
+      // model statistics
+      HandleModelStats(req, model_name, version);
+      return;
+    } else if (kind == "trace/setting") {
+      // Trace with specific model, there is no specification on versioning
+      // so fall out and return bad request error if version is specified
+      if (version.empty()) {
+        HandleTrace(req, model_name);
+        return;
+      }
+    } else if (kind == "") {
+      // model metadata
+      HandleModelMetadata(req, model_name, version);
+      return;
+    }
+  }
+
+  std::string region, action, rest, repo_name;
+  if (std::string(req->uri->path->full) == "/v2") {
+    // server metadata
+    HandleServerMetadata(req);
+    return;
+  } else if (RE2::FullMatch(
+                 std::string(req->uri->path->full), server_regex_, &rest)) {
+    // server health
+    HandleServerHealth(req, rest);
+    return;
+  } else if (RE2::FullMatch(
+                 std::string(req->uri->path->full), systemsharedmemory_regex_,
+                 &region, &action)) {
+    // system shared memory
+    HandleSystemSharedMemory(req, region, action);
+    return;
+  } else if (RE2::FullMatch(
+                 std::string(req->uri->path->full), cudasharedmemory_regex_,
+                 &region, &action)) {
+    // cuda shared memory
+    HandleCudaSharedMemory(req, region, action);
+    return;
+  } else if (RE2::FullMatch(
+                 std::string(req->uri->path->full), modelcontrol_regex_,
+                 &repo_name, &kind, &model_name, &action)) {
+    // model repository
+    if (kind == "index") {
+      HandleRepositoryIndex(req, repo_name);
+      return;
+    } else if (kind.find("models", 0) == 0) {
+      HandleRepositoryControl(req, repo_name, model_name, action);
+      return;
+    }
+  } else if (RE2::FullMatch(std::string(req->uri->path->full), trace_regex_)) {
+    // trace request on global settings
+    HandleTrace(req);
+    return;
+  }
+
+  LOG_VERBOSE(1) << "HTTP error: " << req->method << " " << req->uri->path->full
+                 << " - " << static_cast<int>(EVHTP_RES_NOTFOUND);
+  RETURN_AND_RESPOND_WITH_ERR(req, EVHTP_RES_NOTFOUND, "Not Found");
+}
+
+TRITONSERVER_Error*
+HTTPAPIServer::Create(
+    const std::shared_ptr<TRITONSERVER_Server>& server,
+    triton::server::TraceManager* trace_manager,
+    const std::shared_ptr<SharedMemoryManager>& shm_manager, const int32_t port,
+    const bool reuse_port, const std::string& address,
+    const std::string& header_forward_pattern, const int thread_cnt,
+    const RestrictedFeatures& restricted_features,
+    std::unique_ptr<HTTPServer>* http_server)
+{
+  http_server->reset(new HTTPAPIServer(
+      server, trace_manager, shm_manager, port, reuse_port, address,
+      header_forward_pattern, thread_cnt, restricted_features));
+
+  const std::string addr = address + ":" + std::to_string(port);
+  LOG_INFO << "Started HTTPService at " << addr;
+
+  return nullptr;
+}
+
+bool
+HTTPAPIServer::RespondIfRestricted(
+    evhtp_request_t* req, const Restriction& restriction)
+{
+  auto header = restriction.first;
+  auto expected_value = restriction.second;
+  const char* actual_value = evhtp_kv_find(req->headers_in, header.c_str());
+  if ((actual_value == nullptr) || (actual_value != expected_value)) {
+    EVBufferAddErrorJson(
+        req->buffer_out,
+        std::string("This API is restricted, expecting header '" + header + "'")
+            .c_str());
+    evhtp_send_reply(req, EVHTP_RES_FORBIDDEN);
+    return true;
+  }
+  return false;
+}
+
+}}  // namespace triton::server
diff --git a/src/http_server.h b/src/http_server.h
new file mode 100644
index 0000000000..077324cba3
--- /dev/null
+++ b/src/http_server.h
@@ -0,0 +1,630 @@
+// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#pragma once
+
+#include <evhtp/evhtp.h>
+#include <re2/re2.h>
+
+#include <list>
+#include <map>
+#include <memory>
+#include <mutex>
+#include <queue>
+#include <string>
+#include <thread>
+#include <unordered_map>
+
+#include "common.h"
+#include "data_compressor.h"
+#include "restricted_features.h"
+#include "shared_memory_manager.h"
+#include "tracer.h"
+#include "triton/common/logging.h"
+#include "triton/core/tritonserver.h"
+
+namespace triton { namespace server {
+
+class MappingSchema {
+ public:
+  enum class Kind {
+    EXACT_MAPPING,
+    // An object of this kind means it is a nested mapping schema.
+    MAPPING_SCHEMA
+  };
+  std::map<std::string, std::unique_ptr<MappingSchema>> children_;
+  // Whether an unspecified key is allowed. If true,
+  // * for requests, the unspecified key will be converted to Triton input
+  //   following the EXACT_MAPPING rule.
+  // * for responses, the Triton output will be converted to JSON key-value
+  //   pairs at top level if the name is unspecified in the schema,
+  //   following the EXACT_MAPPING rule.
+  const bool allow_unspecified_{true};
+  const Kind kind_{Kind::EXACT_MAPPING};
+
+  explicit MappingSchema(
+      const MappingSchema::Kind& kind = Kind::EXACT_MAPPING,
+      const bool& allow_unspecified = true)
+      : allow_unspecified_(allow_unspecified), kind_(kind)
+  {
+  }
+
+
+ private:
+};
+
+// Generic HTTP server using evhtp
+class HTTPServer {
+ public:
+  virtual ~HTTPServer() { IGNORE_ERR(Stop()); }
+
+  TRITONSERVER_Error* Start();
+  TRITONSERVER_Error* Stop(
+      uint32_t* exit_timeout_secs = nullptr,
+      const std::string& service_name = "HTTP");
+
+ protected:
+  explicit HTTPServer(
+      const int32_t port, const bool reuse_port, const std::string& address,
+      const std::string& header_forward_pattern, const int thread_cnt)
+      : port_(port), reuse_port_(reuse_port), address_(address),
+        header_forward_pattern_(header_forward_pattern),
+        thread_cnt_(thread_cnt), header_forward_regex_(header_forward_pattern_),
+        conn_cnt_(0), accepting_new_conn_(true)
+  {
+  }
+
+
+  static void Dispatch(evhtp_request_t* req, void* arg);
+
+ protected:
+  virtual void Handle(evhtp_request_t* req) = 0;
+
+  static void StopCallback(evutil_socket_t sock, short events, void* arg);
+
+  static evhtp_res NewConnection(evhtp_connection_t* conn, void* arg);
+  static evhtp_res EndConnection(evhtp_connection_t* conn, void* arg);
+
+  int32_t port_;
+  bool reuse_port_;
+  std::string address_;
+  std::string header_forward_pattern_;
+  int thread_cnt_;
+  re2::RE2 header_forward_regex_;
+
+  evhtp_t* htp_;
+  struct event_base* evbase_;
+  std::thread worker_;
+  evutil_socket_t fds_[2];
+  event* break_ev_;
+
+  std::mutex conn_mu_;
+  uint32_t conn_cnt_;
+  bool accepting_new_conn_;
+};
+
+#ifdef TRITON_ENABLE_METRICS
+// Handle HTTP requests to obtain prometheus metrics
+class HTTPMetricsServer : public HTTPServer {
+ public:
+  static TRITONSERVER_Error* Create(
+      const std::shared_ptr<TRITONSERVER_Server>& server, int32_t port,
+      std::string address, int thread_cnt,
+      std::unique_ptr<HTTPServer>* metrics_server);
+
+  ~HTTPMetricsServer() = default;
+
+ private:
+  explicit HTTPMetricsServer(
+      const std::shared_ptr<TRITONSERVER_Server>& server, const int32_t port,
+      std::string address, const int thread_cnt)
+      : HTTPServer(
+            port, false /* reuse_port */, address,
+            "" /* header_forward_pattern */, thread_cnt),
+        server_(server), api_regex_(R"(/metrics/?)")
+  {
+  }
+  void Handle(evhtp_request_t* req) override;
+
+  std::shared_ptr<TRITONSERVER_Server> server_;
+  re2::RE2 api_regex_;
+};
+#endif  // TRITON_ENABLE_METRICS
+
+#if !defined(_WIN32) && defined(TRITON_ENABLE_TRACING)
+class HttpTextMapCarrier : public otel_cntxt::propagation::TextMapCarrier {
+ public:
+  HttpTextMapCarrier(evhtp_kvs_t* headers) : headers_(headers) {}
+  HttpTextMapCarrier() = default;
+  virtual opentelemetry::nostd::string_view Get(
+      opentelemetry::nostd::string_view key) const noexcept override
+  {
+    std::string key_to_compare = key.data();
+    auto it = evhtp_kv_find(headers_, key_to_compare.c_str());
+    if (it != NULL) {
+      return opentelemetry::nostd::string_view(it);
+    }
+    return "";
+  }
+  // Not required on server side
+  virtual void Set(
+      opentelemetry::nostd::string_view key,
+      opentelemetry::nostd::string_view value) noexcept override
+  {
+    return;
+  }
+
+  evhtp_kvs_t* headers_;
+};
+#else
+using HttpTextMapCarrier = void*;
+#endif
+
+
+// HTTP API server that implements KFServing community standard inference
+// protocols and extensions used by Triton.
+class HTTPAPIServer : public HTTPServer {
+ public:
+  static TRITONSERVER_Error* Create(
+      const std::shared_ptr<TRITONSERVER_Server>& server,
+      triton::server::TraceManager* trace_manager,
+      const std::shared_ptr<SharedMemoryManager>& smb_manager,
+      const int32_t port, const bool reuse_port, const std::string& address,
+      const std::string& header_forward_pattern, const int thread_cnt,
+      const RestrictedFeatures& restricted_apis,
+      std::unique_ptr<HTTPServer>* http_server);
+
+  virtual ~HTTPAPIServer();
+
+  //
+  // AllocPayload
+  //
+  // Simple structure that carries the userp payload needed for
+  // allocation.
+  struct AllocPayload {
+    struct OutputInfo {
+      enum Kind { JSON, BINARY, SHM };
+
+      Kind kind_;
+      void* base_;
+      uint64_t byte_size_;
+      TRITONSERVER_MemoryType memory_type_;
+      int64_t device_id_;
+      uint32_t class_cnt_;
+      evbuffer* evbuffer_;
+      char* cuda_ipc_handle_;
+
+      // For non-shared memory
+      OutputInfo(Kind k, uint32_t class_cnt)
+          : kind_(k), class_cnt_(class_cnt), evbuffer_(nullptr)
+      {
+      }
+
+      // For shared memory
+      OutputInfo(
+          void* base, uint64_t byte_size, TRITONSERVER_MemoryType memory_type,
+          int64_t device_id, char* cuda_ipc_handle)
+          : kind_(SHM), base_(base), byte_size_(byte_size),
+            memory_type_(memory_type), device_id_(device_id), class_cnt_(0),
+            evbuffer_(nullptr), cuda_ipc_handle_(cuda_ipc_handle)
+      {
+      }
+
+      ~OutputInfo()
+      {
+        if (evbuffer_ != nullptr) {
+          evbuffer_free(evbuffer_);
+        }
+      }
+    };
+
+    ~AllocPayload()
+    {
+      for (auto it : output_map_) {
+        delete it.second;
+      }
+    }
+
+    AllocPayload() : default_output_kind_(OutputInfo::Kind::JSON){};
+    std::unordered_map<std::string, OutputInfo*> output_map_;
+    AllocPayload::OutputInfo::Kind default_output_kind_;
+  };
+
+  // Object associated with an inference request. This persists
+  // information needed for the request and records the evhtp thread
+  // that is bound to the request. This same thread must be used to
+  // send the response.
+  class InferRequestClass {
+   public:
+    // [FIXME] decompression / compression should be handled implicitly
+    // within InferRequestClass. This alleviate the check for decompressed
+    // buffer in HTTPServer code.
+    explicit InferRequestClass(
+        TRITONSERVER_Server* server, evhtp_request_t* req,
+        DataCompressor::Type response_compression_type,
+        const std::shared_ptr<TRITONSERVER_InferenceRequest>& triton_request);
+    virtual ~InferRequestClass()
+    {
+      if (req_ != nullptr) {
+        evhtp_request_unset_hook(req_, evhtp_hook_on_request_fini);
+      }
+      req_ = nullptr;
+    }
+
+    evhtp_request_t* EvHtpRequest() const { return req_; }
+
+    static void InferRequestComplete(
+        TRITONSERVER_InferenceRequest* request, const uint32_t flags,
+        void* userp);
+    static void InferResponseComplete(
+        TRITONSERVER_InferenceResponse* response, const uint32_t flags,
+        void* userp);
+    virtual TRITONSERVER_Error* FinalizeResponse(
+        TRITONSERVER_InferenceResponse* response);
+
+    // Helper function to set infer response header in the form specified by
+    // the endpoint protocol
+    virtual void SetResponseHeader(
+        const bool has_binary_data, const size_t header_length);
+
+    uint32_t IncrementResponseCount();
+
+    // Only used if tracing enabled
+    std::shared_ptr<TraceManager::Trace> trace_;
+
+    AllocPayload alloc_payload_;
+
+    // Data that cannot be used directly from the HTTP body is first
+    // serialized. Hold that data here so that its lifetime spans the
+    // lifetime of the request.
+    std::list<std::vector<char>> serialized_data_;
+
+    static void ReplyCallback(evthr_t* thr, void* arg, void* shared);
+
+   protected:
+    TRITONSERVER_Server* server_{nullptr};
+    evhtp_request_t* req_{nullptr};
+    evthr_t* thread_{nullptr};
+
+    DataCompressor::Type response_compression_type_{
+        DataCompressor::Type::IDENTITY};
+
+    // Counter to keep track of number of responses generated.
+    std::atomic<uint32_t> response_count_{0};
+
+    // Event hook for called before request deletion
+    static evhtp_res RequestFiniHook(evhtp_request* req, void* arg);
+
+    // Pointer to associated Triton request, this class does not own the
+    // request and must not reference it after a successful
+    // TRITONSERVER_ServerInferAsync (except for cancellation).
+    std::shared_ptr<TRITONSERVER_InferenceRequest> triton_request_{nullptr};
+
+    evhtp_res response_code_{EVHTP_RES_OK};
+  };
+
+  class GenerateRequestClass : public InferRequestClass {
+   public:
+    explicit GenerateRequestClass(
+        TRITONSERVER_Server* server, evhtp_request_t* req,
+        DataCompressor::Type response_compression_type,
+        const MappingSchema* request_schema,
+        const MappingSchema* response_schema, bool streaming,
+        const std::shared_ptr<TRITONSERVER_InferenceRequest>& triton_request)
+        : InferRequestClass(
+              server, req, response_compression_type, triton_request),
+          request_schema_(request_schema), response_schema_(response_schema),
+          streaming_(streaming)
+    {
+    }
+    virtual ~GenerateRequestClass();
+
+    // [FIXME] Specialize response complete function for now, should have
+    // been a dispatcher and call into object specific response function.
+    static void InferResponseComplete(
+        TRITONSERVER_InferenceResponse* response, const uint32_t flags,
+        void* userp);
+    static void ChunkResponseCallback(evthr_t* thr, void* arg, void* shared);
+    static void EndResponseCallback(evthr_t* thr, void* arg, void* shared);
+    // Return whether the response is ending
+    void SendChunkResponse(bool end);
+
+    // Response preparation
+    TRITONSERVER_Error* FinalizeResponse(
+        TRITONSERVER_InferenceResponse* response) override;
+    void AddErrorJson(TRITONSERVER_Error* error);
+    static void StartResponse(evthr_t* thr, void* arg, void* shared);
+
+    // [DLIS-5551] currently always performs basic conversion, only maps schema
+    // of EXACT_MAPPING kind. MAPPING_SCHEMA and upcoming kinds are for
+    // customized conversion where a detailed schema will be provided.
+    TRITONSERVER_Error* ConvertGenerateRequest(
+        std::map<std::string, triton::common::TritonJson::Value>&
+            input_metadata,
+        const MappingSchema* schema,
+        triton::common::TritonJson::Value& generate_request);
+
+    const MappingSchema* RequestSchema() { return request_schema_; }
+    const MappingSchema* ResponseSchema() { return response_schema_; }
+
+   private:
+    struct TritonOutput {
+      enum class Type { RESERVED, TENSOR, PARAMETER };
+      TritonOutput(Type t, const std::string& val) : type(t), value(val) {}
+      explicit TritonOutput(Type t, uint32_t i) : type(t), index(i) {}
+      Type type;
+      // RESERVED type
+      std::string value;
+      // TENSOR, PARAMETER type
+      uint32_t index;
+    };
+    TRITONSERVER_Error* ExactMappingInput(
+        const std::string& name, triton::common::TritonJson::Value& value,
+        std::map<std::string, triton::common::TritonJson::Value>&
+            input_metadata);
+
+    // [DLIS-5551] currently always performs basic conversion, only maps schema
+    // of EXACT_MAPPING kind. MAPPING_SCHEMA and upcoming kinds are for
+    // customized conversion where a detailed schema will be provided.
+    TRITONSERVER_Error* ConvertGenerateResponse(
+        const std::map<std::string, TritonOutput>& output_metadata,
+        const MappingSchema* schema,
+        triton::common::TritonJson::Value* generate_response,
+        std::set<std::string>* mapped_outputs);
+    TRITONSERVER_Error* ExactMappingOutput(
+        const std::string& name, const TritonOutput& triton_output,
+        triton::common::TritonJson::Value* generate_response,
+        std::set<std::string>* mapped_outputs);
+
+    const MappingSchema* request_schema_{nullptr};
+    const MappingSchema* response_schema_{nullptr};
+    const bool streaming_{false};
+    // Placeholder to completing response, this class does not own
+    // the response.
+    TRITONSERVER_InferenceResponse* triton_response_{nullptr};
+    // As InferResponseComplete and ChunkResponseCallback are called in
+    // different threads, need to have dedicated buffers for each response and
+    // ensure mutual exclusive access.
+    std::mutex res_mtx_;
+    std::queue<evbuffer*> pending_http_responses_;
+    bool end_{false};
+  };
+
+  // Simple structure that carries the userp payload needed for
+  // request release callback.
+  struct RequestReleasePayload final {
+    RequestReleasePayload(
+        const std::shared_ptr<TRITONSERVER_InferenceRequest>& inference_request,
+        evbuffer* buffer)
+        : inference_request_(inference_request), buffer_(buffer){};
+
+    ~RequestReleasePayload()
+    {
+      if (buffer_ != nullptr) {
+        evbuffer_free(buffer_);
+      }
+    };
+
+   private:
+    std::shared_ptr<TRITONSERVER_InferenceRequest> inference_request_ = nullptr;
+    evbuffer* buffer_ = nullptr;
+  };
+
+ protected:
+  explicit HTTPAPIServer(
+      const std::shared_ptr<TRITONSERVER_Server>& server,
+      triton::server::TraceManager* trace_manager,
+      const std::shared_ptr<SharedMemoryManager>& shm_manager,
+      const int32_t port, const bool reuse_port, const std::string& address,
+      const std::string& header_forward_pattern, const int thread_cnt,
+      const RestrictedFeatures& restricted_apis = {});
+
+  virtual void Handle(evhtp_request_t* req) override;
+  // [FIXME] extract to "infer" class
+  virtual std::unique_ptr<InferRequestClass> CreateInferRequest(
+      evhtp_request_t* req,
+      const std::shared_ptr<TRITONSERVER_InferenceRequest>& triton_request)
+  {
+    return std::unique_ptr<InferRequestClass>(new InferRequestClass(
+        server_.get(), req, GetResponseCompressionType(req), triton_request));
+  }
+
+  // Helper function to retrieve infer request header in the form specified by
+  // the endpoint protocol
+  //
+  // Get the inference header length. Return 0 if the whole request body is
+  // the inference header.
+  virtual TRITONSERVER_Error* GetInferenceHeaderLength(
+      evhtp_request_t* req, int32_t content_length, size_t* header_length);
+  virtual DataCompressor::Type GetRequestCompressionType(evhtp_request_t* req);
+  virtual DataCompressor::Type GetResponseCompressionType(evhtp_request_t* req);
+
+
+  TRITONSERVER_Error* GetModelConfig(
+      const std::string& model_name, int64_t requested_model_version,
+      std::string* config_json);
+  TRITONSERVER_Error* GetContentLength(
+      evhtp_request_t* req, evbuffer* decompressed_buffer,
+      int32_t* content_length);
+  TRITONSERVER_Error* DecompressBuffer(
+      evhtp_request_t* req, evbuffer** decompressed_buffer);
+  TRITONSERVER_Error* CheckTransactionPolicy(
+      evhtp_request_t* req, const std::string& model_name,
+      int64_t requested_model_version);
+  std::shared_ptr<TraceManager::Trace> StartTrace(
+      evhtp_request_t* req, const std::string& model_name,
+      TRITONSERVER_InferenceTrace** triton_trace);
+  TRITONSERVER_Error* ForwardHeaders(
+      evhtp_request_t* req, TRITONSERVER_InferenceRequest* irequest);
+
+  static TRITONSERVER_Error* InferResponseAlloc(
+      TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
+      size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
+      int64_t preferred_memory_type_id, void* userp, void** buffer,
+      void** buffer_userp, TRITONSERVER_MemoryType* actual_memory_type,
+      int64_t* actual_memory_type_id);
+  static TRITONSERVER_Error* OutputBufferQuery(
+      TRITONSERVER_ResponseAllocator* allocator, void* userp,
+      const char* tensor_name, size_t* byte_size,
+      TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id);
+  static TRITONSERVER_Error* OutputBufferAttributes(
+      TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
+      TRITONSERVER_BufferAttributes* buffer_attributes, void* userp,
+      void* buffer_userp);
+  static TRITONSERVER_Error* InferResponseFree(
+      TRITONSERVER_ResponseAllocator* allocator, void* buffer,
+      void* buffer_userp, size_t byte_size, TRITONSERVER_MemoryType memory_type,
+      int64_t memory_type_id);
+  void HandleServerHealth(evhtp_request_t* req, const std::string& kind);
+  void HandleServerMetadata(evhtp_request_t* req);
+  void HandleModelReady(
+      evhtp_request_t* req, const std::string& model_name,
+      const std::string& model_version_str);
+  void HandleModelMetadata(
+      evhtp_request_t* req, const std::string& model_name,
+      const std::string& model_version_str);
+  void HandleModelConfig(
+      evhtp_request_t* req, const std::string& model_name,
+      const std::string& model_version_str);
+  void HandleInfer(
+      evhtp_request_t* req, const std::string& model_name,
+      const std::string& model_version_str);
+  void HandleModelStats(
+      evhtp_request_t* req, const std::string& model_name = "",
+      const std::string& model_version_str = "");
+  void HandleRepositoryIndex(
+      evhtp_request_t* req, const std::string& repository_name);
+  void HandleRepositoryControl(
+      evhtp_request_t* req, const std::string& repository_name,
+      const std::string& model_name, const std::string& action);
+  void HandleSystemSharedMemory(
+      evhtp_request_t* req, const std::string& region_name,
+      const std::string& action);
+  void HandleCudaSharedMemory(
+      evhtp_request_t* req, const std::string& region_name,
+      const std::string& action);
+  void HandleTrace(evhtp_request_t* req, const std::string& model_name = "");
+  void HandleLogging(evhtp_request_t* req);
+
+  // Text Generation / LLM format
+  //'streaming' selects the schema pair to convert request / response.
+  // 'streaming' also controls the response convention, if true,
+  // Server-Sent Events format will be used to send responses.
+  void HandleGenerate(
+      evhtp_request_t* req, const std::string& model_name,
+      const std::string& model_version_str, bool streaming);
+
+  // 'meta_data_root' is the root JSON document for 'input_metadata'.
+  // In TritonJson, the Value objects are references to the root document.
+  // Therefore the document must stay valid.
+  TRITONSERVER_Error* ModelInputMetadata(
+      const std::string& model_name, const int64_t model_version,
+      std::map<std::string, triton::common::TritonJson::Value>* input_metadata,
+      triton::common::TritonJson::Value* meta_data_root);
+
+  // Parses full evhtp request and its evbuffers into JSON.
+  TRITONSERVER_Error* EVRequestToJson(
+      evhtp_request_t* req, triton::common::TritonJson::Value* request_json);
+  // Parses evhtp request buffers into Triton Inference Request.
+  TRITONSERVER_Error* EVRequestToTritonRequest(
+      evhtp_request_t* req, const std::string& model_name,
+      TRITONSERVER_InferenceRequest* irequest, evbuffer* decompressed_buffer,
+      InferRequestClass* infer_req, size_t header_length);
+  TRITONSERVER_Error* EVBufferToInput(
+      const std::string& model_name, TRITONSERVER_InferenceRequest* irequest,
+      evbuffer* input_buffer, InferRequestClass* infer_req,
+      size_t header_length);
+  TRITONSERVER_Error* EVBufferToRawInput(
+      const std::string& model_name, TRITONSERVER_InferenceRequest* irequest,
+      evbuffer* input_buffer, InferRequestClass* infer_req);
+
+
+  // Helpers for parsing JSON requests for Triton-specific fields
+  TRITONSERVER_Error* ParseJsonTritonIO(
+      triton::common::TritonJson::Value& request_json,
+      TRITONSERVER_InferenceRequest* irequest, InferRequestClass* infer_req,
+      const std::string& model_name, evbuffer_iovec* v, int* v_idx_ptr,
+      size_t header_length, int n);
+  TRITONSERVER_Error* ParseJsonTritonParams(
+      triton::common::TritonJson::Value& request_json,
+      TRITONSERVER_InferenceRequest* irequest, InferRequestClass* infer_req);
+  TRITONSERVER_Error* ParseJsonTritonRequestID(
+      triton::common::TritonJson::Value& request_json,
+      TRITONSERVER_InferenceRequest* irequest);
+
+  std::shared_ptr<TRITONSERVER_Server> server_;
+
+  // Storing server metadata as it is consistent during server running
+  TRITONSERVER_Error* server_metadata_err_;
+  std::string server_metadata_;
+
+  TraceManager* trace_manager_;
+  std::shared_ptr<SharedMemoryManager> shm_manager_;
+
+  // The allocator that will be used to allocate buffers for the
+  // inference result tensors.
+  TRITONSERVER_ResponseAllocator* allocator_;
+
+  re2::RE2 server_regex_;
+  re2::RE2 model_regex_;
+  re2::RE2 modelcontrol_regex_;
+  re2::RE2 systemsharedmemory_regex_;
+  re2::RE2 cudasharedmemory_regex_;
+  re2::RE2 trace_regex_;
+
+  // [DLIS-5551] currently always performs basic conversion, only maps schema
+  // of EXACT_MAPPING kind. MAPPING_SCHEMA and upcoming kinds are for
+  // customized conversion where a detailed schema will be provided.
+  std::unique_ptr<MappingSchema> generate_request_schema_{new MappingSchema()};
+  std::unique_ptr<MappingSchema> generate_response_schema_{new MappingSchema()};
+  std::unique_ptr<MappingSchema> generate_stream_response_schema_{
+      new MappingSchema()};
+  std::unique_ptr<MappingSchema> generate_stream_request_schema_{
+      new MappingSchema()};
+
+  // Provisional definition of generate mapping schema
+  // to allow for parameters passing
+  //
+  // Note: subject to change
+  void ConfigureGenerateMappingSchema()
+  {
+    // Reserved field parameters for generate
+    // If present, parameters will be converted to tensors
+    // or parameters based on model config
+
+    const std::string parameters_field = "parameters";
+    generate_stream_request_schema_->children_.emplace(
+        parameters_field,
+        new MappingSchema(MappingSchema::Kind::MAPPING_SCHEMA, true));
+    generate_request_schema_->children_.emplace(
+        parameters_field,
+        new MappingSchema(MappingSchema::Kind::MAPPING_SCHEMA, true));
+  }
+  RestrictedFeatures restricted_apis_{};
+  bool RespondIfRestricted(
+      evhtp_request_t* req, const Restriction& restriction);
+};
+
+}}  // namespace triton::server
diff --git a/src/main.cc b/src/main.cc
new file mode 100644
index 0000000000..41cb4e2f19
--- /dev/null
+++ b/src/main.cc
@@ -0,0 +1,551 @@
+// Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifdef _WIN32
+#define NOMINMAX
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#include <winsock2.h>
+#include <ws2tcpip.h>
+#pragma comment(lib, "ws2_32.lib")
+#endif
+
+#ifndef _WIN32
+#include <getopt.h>
+#include <unistd.h>
+#endif
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <cctype>
+#include <iomanip>
+#include <iostream>
+#include <sstream>
+#include <thread>
+
+#include "triton_signal.h"
+
+#ifdef TRITON_ENABLE_ASAN
+#include <sanitizer/lsan_interface.h>
+#endif  // TRITON_ENABLE_ASAN
+
+#include "command_line_parser.h"
+#include "common.h"
+#include "shared_memory_manager.h"
+#include "tracer.h"
+#include "triton/common/logging.h"
+#include "triton/core/tritonserver.h"
+
+#if defined(TRITON_ENABLE_HTTP) || defined(TRITON_ENABLE_METRICS)
+#include "http_server.h"
+#endif  // TRITON_ENABLE_HTTP|| TRITON_ENABLE_METRICS
+#ifdef TRITON_ENABLE_SAGEMAKER
+#include "sagemaker_server.h"
+#endif  // TRITON_ENABLE_SAGEMAKER
+#ifdef TRITON_ENABLE_VERTEX_AI
+#include "vertex_ai_server.h"
+#endif  // TRITON_ENABLE_VERTEX_AI
+#ifdef TRITON_ENABLE_GRPC
+#include "grpc/grpc_server.h"
+#endif  // TRITON_ENABLE_GRPC
+
+#ifdef TRITON_ENABLE_GPU
+static_assert(
+    TRITON_MIN_COMPUTE_CAPABILITY >= 1.0,
+    "Invalid TRITON_MIN_COMPUTE_CAPABILITY specified");
+#endif  // TRITON_ENABLE_GPU
+
+namespace {
+
+#ifdef TRITON_ENABLE_HTTP
+std::unique_ptr<triton::server::HTTPServer> g_http_service;
+#endif  // TRITON_ENABLE_HTTP
+
+#ifdef TRITON_ENABLE_GRPC
+std::unique_ptr<triton::server::grpc::Server> g_grpc_service;
+#endif  // TRITON_ENABLE_GRPC
+
+#ifdef TRITON_ENABLE_METRICS
+std::unique_ptr<triton::server::HTTPServer> g_metrics_service;
+#endif  // TRITON_ENABLE_METRICS
+
+#ifdef TRITON_ENABLE_SAGEMAKER
+std::unique_ptr<triton::server::HTTPServer> g_sagemaker_service;
+#endif  // TRITON_ENABLE_SAGEMAKER
+
+#ifdef TRITON_ENABLE_VERTEX_AI
+std::unique_ptr<triton::server::HTTPServer> g_vertex_ai_service;
+#endif  // TRITON_ENABLE_VERTEX_AI
+
+triton::server::TritonServerParameters g_triton_params;
+
+#ifdef TRITON_ENABLE_GRPC
+TRITONSERVER_Error*
+StartGrpcService(
+    std::unique_ptr<triton::server::grpc::Server>* service,
+    const std::shared_ptr<TRITONSERVER_Server>& server,
+    triton::server::TraceManager* trace_manager,
+    const std::shared_ptr<triton::server::SharedMemoryManager>& shm_manager)
+{
+  TRITONSERVER_Error* err = triton::server::grpc::Server::Create(
+      server, trace_manager, shm_manager, g_triton_params.grpc_options_,
+      service);
+  if (err == nullptr) {
+    err = (*service)->Start();
+  }
+
+  if (err != nullptr) {
+    service->reset();
+  }
+
+  return err;
+}
+#endif  // TRITON_ENABLE_GRPC
+
+#ifdef TRITON_ENABLE_HTTP
+TRITONSERVER_Error*
+StartHttpService(
+    std::unique_ptr<triton::server::HTTPServer>* service,
+    const std::shared_ptr<TRITONSERVER_Server>& server,
+    triton::server::TraceManager* trace_manager,
+    const std::shared_ptr<triton::server::SharedMemoryManager>& shm_manager)
+{
+  TRITONSERVER_Error* err = triton::server::HTTPAPIServer::Create(
+      server, trace_manager, shm_manager, g_triton_params.http_port_,
+      g_triton_params.reuse_http_port_, g_triton_params.http_address_,
+      g_triton_params.http_forward_header_pattern_,
+      g_triton_params.http_thread_cnt_, g_triton_params.http_restricted_apis_,
+      service);
+  if (err == nullptr) {
+    err = (*service)->Start();
+  }
+
+  if (err != nullptr) {
+    service->reset();
+  }
+
+  return err;
+}
+#endif  // TRITON_ENABLE_HTTP
+
+#ifdef TRITON_ENABLE_METRICS
+TRITONSERVER_Error*
+StartMetricsService(
+    std::unique_ptr<triton::server::HTTPServer>* service,
+    const std::shared_ptr<TRITONSERVER_Server>& server)
+{
+  TRITONSERVER_Error* err = triton::server::HTTPMetricsServer::Create(
+      server, g_triton_params.metrics_port_, g_triton_params.metrics_address_,
+      1 /* HTTP thread count */, service);
+  if (err == nullptr) {
+    err = (*service)->Start();
+  }
+  if (err != nullptr) {
+    service->reset();
+  }
+
+  return err;
+}
+#endif  // TRITON_ENABLE_METRICS
+
+#ifdef TRITON_ENABLE_SAGEMAKER
+TRITONSERVER_Error*
+StartSagemakerService(
+    std::unique_ptr<triton::server::HTTPServer>* service,
+    const std::shared_ptr<TRITONSERVER_Server>& server,
+    triton::server::TraceManager* trace_manager,
+    const std::shared_ptr<triton::server::SharedMemoryManager>& shm_manager)
+{
+  TRITONSERVER_Error* err = triton::server::SagemakerAPIServer::Create(
+      server, trace_manager, shm_manager, g_triton_params.sagemaker_port_,
+      g_triton_params.sagemaker_address_, g_triton_params.sagemaker_thread_cnt_,
+      service);
+  if (err == nullptr) {
+    err = (*service)->Start();
+  }
+
+  if (err != nullptr) {
+    service->reset();
+  }
+
+  return err;
+}
+#endif  // TRITON_ENABLE_SAGEMAKER
+
+#ifdef TRITON_ENABLE_VERTEX_AI
+TRITONSERVER_Error*
+StartVertexAiService(
+    std::unique_ptr<triton::server::HTTPServer>* service,
+    const std::shared_ptr<TRITONSERVER_Server>& server,
+    triton::server::TraceManager* trace_manager,
+    const std::shared_ptr<triton::server::SharedMemoryManager>& shm_manager)
+{
+  TRITONSERVER_Error* err = triton::server::VertexAiAPIServer::Create(
+      server, trace_manager, shm_manager, g_triton_params.vertex_ai_port_,
+      g_triton_params.vertex_ai_address_, g_triton_params.vertex_ai_thread_cnt_,
+      g_triton_params.vertex_ai_default_model_, service);
+  if (err == nullptr) {
+    err = (*service)->Start();
+  }
+
+  if (err != nullptr) {
+    service->reset();
+  }
+
+  return err;
+}
+#endif  // TRITON_ENABLE_VERTEX_AI
+
+bool
+StartEndpoints(
+    const std::shared_ptr<TRITONSERVER_Server>& server,
+    triton::server::TraceManager* trace_manager,
+    const std::shared_ptr<triton::server::SharedMemoryManager>& shm_manager)
+{
+#ifdef _WIN32
+  WSADATA wsaData;
+  int wsa_ret = WSAStartup(MAKEWORD(2, 2), &wsaData);
+
+  if (wsa_ret != 0) {
+    LOG_ERROR << "Error in WSAStartup " << wsa_ret;
+    return false;
+  }
+#endif
+
+#ifdef TRITON_ENABLE_GRPC
+  // Enable GRPC endpoints if requested...
+  if (g_triton_params.allow_grpc_) {
+    TRITONSERVER_Error* err =
+        StartGrpcService(&g_grpc_service, server, trace_manager, shm_manager);
+    if (err != nullptr) {
+      LOG_TRITONSERVER_ERROR(err, "failed to start GRPC service");
+      return false;
+    }
+  }
+#endif  // TRITON_ENABLE_GRPC
+
+#ifdef TRITON_ENABLE_HTTP
+  // Enable HTTP endpoints if requested...
+  if (g_triton_params.allow_http_) {
+    TRITONSERVER_Error* err =
+        StartHttpService(&g_http_service, server, trace_manager, shm_manager);
+    if (err != nullptr) {
+      LOG_TRITONSERVER_ERROR(err, "failed to start HTTP service");
+      return false;
+    }
+  }
+#endif  // TRITON_ENABLE_HTTP
+
+
+#ifdef TRITON_ENABLE_SAGEMAKER
+  // Enable Sagemaker endpoints if requested...
+  if (g_triton_params.allow_sagemaker_) {
+    TRITONSERVER_Error* err = StartSagemakerService(
+        &g_sagemaker_service, server, trace_manager, shm_manager);
+    if (err != nullptr) {
+      LOG_TRITONSERVER_ERROR(err, "failed to start Sagemaker service");
+      return false;
+    }
+  }
+#endif  // TRITON_ENABLE_SAGEMAKER
+
+#ifdef TRITON_ENABLE_VERTEX_AI
+  // Enable Vertex AI endpoints if requested...
+  if (g_triton_params.allow_vertex_ai_) {
+    TRITONSERVER_Error* err = StartVertexAiService(
+        &g_vertex_ai_service, server, trace_manager, shm_manager);
+    if (err != nullptr) {
+      LOG_TRITONSERVER_ERROR(err, "failed to start Vertex AI service");
+      return false;
+    }
+  }
+#endif  // TRITON_ENABLE_VERTEX_AI
+
+#ifdef TRITON_ENABLE_METRICS
+  // Enable metrics endpoint if requested...
+  if (g_triton_params.allow_metrics_) {
+    TRITONSERVER_Error* err = StartMetricsService(&g_metrics_service, server);
+    if (err != nullptr) {
+      LOG_TRITONSERVER_ERROR(err, "failed to start Metrics service");
+      return false;
+    }
+  }
+#endif  // TRITON_ENABLE_METRICS
+
+  return true;
+}
+
+bool
+StopEndpoints(uint32_t* exit_timeout_secs)
+{
+  bool ret = true;
+
+#ifdef TRITON_ENABLE_HTTP
+  if (g_http_service) {
+    TRITONSERVER_Error* err = g_http_service->Stop(exit_timeout_secs);
+    if (err != nullptr) {
+      LOG_TRITONSERVER_ERROR(err, "failed to stop HTTP service");
+      ret = false;
+    }
+
+    g_http_service.reset();
+  }
+#endif  // TRITON_ENABLE_HTTP
+
+  return ret;
+}
+
+bool
+StopEndpoints()
+{
+  bool ret = true;
+
+  // TODO: Add support for 'exit_timeout_secs' to the endpoints below and move
+  // them to the 'StopEndpoints(uint32_t* exit_timeout_secs)' function above.
+
+#ifdef TRITON_ENABLE_GRPC
+  if (g_grpc_service) {
+    TRITONSERVER_Error* err = g_grpc_service->Stop();
+    if (err != nullptr) {
+      LOG_TRITONSERVER_ERROR(err, "failed to stop GRPC service");
+      ret = false;
+    }
+
+    g_grpc_service.reset();
+  }
+#endif  // TRITON_ENABLE_GRPC
+
+#ifdef TRITON_ENABLE_METRICS
+  if (g_metrics_service) {
+    TRITONSERVER_Error* err = g_metrics_service->Stop();
+    if (err != nullptr) {
+      LOG_TRITONSERVER_ERROR(err, "failed to stop Metrics service");
+      ret = false;
+    }
+
+    g_metrics_service.reset();
+  }
+#endif  // TRITON_ENABLE_METRICS
+
+#ifdef TRITON_ENABLE_SAGEMAKER
+  if (g_sagemaker_service) {
+    TRITONSERVER_Error* err = g_sagemaker_service->Stop();
+    if (err != nullptr) {
+      LOG_TRITONSERVER_ERROR(err, "failed to stop Sagemaker service");
+      ret = false;
+    }
+
+    g_sagemaker_service.reset();
+  }
+#endif  // TRITON_ENABLE_SAGEMAKER
+
+#ifdef TRITON_ENABLE_VERTEX_AI
+  if (g_vertex_ai_service) {
+    TRITONSERVER_Error* err = g_vertex_ai_service->Stop();
+    if (err != nullptr) {
+      LOG_TRITONSERVER_ERROR(err, "failed to stop Vertex AI service");
+      ret = false;
+    }
+
+    g_vertex_ai_service.reset();
+  }
+#endif  // TRITON_ENABLE_VERTEX_AI
+
+#ifdef _WIN32
+  int wsa_ret = WSACleanup();
+
+  if (wsa_ret != 0) {
+    LOG_ERROR << "Error in WSACleanup " << wsa_ret;
+    ret = false;
+  }
+#endif
+
+  return ret;
+}
+
+bool
+StartTracing(triton::server::TraceManager** trace_manager)
+{
+  *trace_manager = nullptr;
+
+#ifdef TRITON_ENABLE_TRACING
+  TRITONSERVER_Error* err = triton::server::TraceManager::Create(
+      trace_manager, g_triton_params.trace_level_, g_triton_params.trace_rate_,
+      g_triton_params.trace_count_, g_triton_params.trace_log_frequency_,
+      g_triton_params.trace_filepath_, g_triton_params.trace_mode_,
+      g_triton_params.trace_config_map_);
+
+  if (err != nullptr) {
+    LOG_TRITONSERVER_ERROR(err, "failed to configure tracing");
+    if (*trace_manager != nullptr) {
+      delete (*trace_manager);
+    }
+    *trace_manager = nullptr;
+    return false;
+  }
+#endif  // TRITON_ENABLE_TRACING
+
+  return true;
+}
+
+bool
+StopTracing(triton::server::TraceManager** trace_manager)
+{
+#ifdef TRITON_ENABLE_TRACING
+  // We assume that at this point Triton has been stopped gracefully,
+  // so can delete the trace manager to finalize the output.
+  delete (*trace_manager);
+  *trace_manager = nullptr;
+#endif  // TRITON_ENABLE_TRACING
+
+  return true;
+}
+
+}  // namespace
+
+int
+main(int argc, char** argv)
+{
+  // Parse command-line to create the options for the inference
+  // server.
+  triton::server::TritonParser tp;
+  try {
+    auto res = tp.Parse(argc, argv);
+    g_triton_params = res.first;
+    g_triton_params.CheckPortCollision();
+  }
+  catch (const triton::server::ParseException& pe) {
+    std::cerr << pe.what() << std::endl;
+    std::cerr << "Usage: tritonserver [options]" << std::endl;
+    std::cerr << tp.Usage() << std::endl;
+    exit(1);
+  }
+
+  triton::server::TritonServerParameters::ManagedTritonServerOptionPtr
+      triton_options(nullptr, TRITONSERVER_ServerOptionsDelete);
+  try {
+    triton_options = g_triton_params.BuildTritonServerOptions();
+  }
+  catch (const triton::server::ParseException& pe) {
+    std::cerr << "Failed to build Triton option:" << std::endl;
+    std::cerr << pe.what() << std::endl;
+    exit(1);
+  }
+
+#ifdef TRITON_ENABLE_LOGGING
+  // Initialize our own logging instance since it is used by GRPC and
+  // HTTP endpoints. This logging instance is separate from the one in
+  // libtritonserver so we must initialize explicitly.
+  LOG_ENABLE_INFO(g_triton_params.log_info_);
+  LOG_ENABLE_WARNING(g_triton_params.log_warn_);
+  LOG_ENABLE_ERROR(g_triton_params.log_error_);
+  LOG_SET_VERBOSE(g_triton_params.log_verbose_);
+  LOG_SET_FORMAT(g_triton_params.log_format_);
+  LOG_SET_OUT_FILE(g_triton_params.log_file_);
+#endif  // TRITON_ENABLE_LOGGING
+
+  // Trace manager.
+  triton::server::TraceManager* trace_manager;
+
+  // Manager for shared memory blocks.
+  auto shm_manager = std::make_shared<triton::server::SharedMemoryManager>();
+
+  // Create the server...
+  TRITONSERVER_Server* server_ptr = nullptr;
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerNew(&server_ptr, triton_options.get()),
+      "creating server");
+
+  std::shared_ptr<TRITONSERVER_Server> server(
+      server_ptr, TRITONSERVER_ServerDelete);
+
+  // Configure and start tracing if specified on the command line.
+  if (!StartTracing(&trace_manager)) {
+    exit(1);
+  }
+
+  // Trap SIGINT and SIGTERM to allow server to exit gracefully
+  TRITONSERVER_Error* signal_err = triton::server::RegisterSignalHandler();
+  if (signal_err != nullptr) {
+    LOG_TRITONSERVER_ERROR(signal_err, "failed to register signal handler");
+    exit(1);
+  }
+
+  // Start the HTTP, GRPC, and metrics endpoints.
+  if (!StartEndpoints(server, trace_manager, shm_manager)) {
+    exit(1);
+  }
+
+  // Wait until a signal terminates the server...
+  while (!triton::server::signal_exiting_) {
+    // If enabled, poll the model repository to see if there have been
+    // any changes.
+    if (g_triton_params.repository_poll_secs_ > 0) {
+      LOG_TRITONSERVER_ERROR(
+          TRITONSERVER_ServerPollModelRepository(server_ptr),
+          "failed to poll model repository");
+    }
+
+    // Wait for the polling interval (or a long time if polling is not
+    // enabled). Will be woken if the server is exiting.
+    std::unique_lock<std::mutex> lock(triton::server::signal_exit_mu_);
+    std::chrono::seconds wait_timeout(
+        (g_triton_params.repository_poll_secs_ == 0)
+            ? 3600
+            : g_triton_params.repository_poll_secs_);
+    triton::server::signal_exit_cv_.wait_for(lock, wait_timeout);
+  }
+
+  // Stop the HTTP[, gRPC, and metrics] endpoints, and update exit timeout.
+  uint32_t exit_timeout_secs = g_triton_params.exit_timeout_secs_;
+  StopEndpoints(&exit_timeout_secs);
+  TRITONSERVER_ServerSetExitTimeout(server_ptr, exit_timeout_secs);
+
+  TRITONSERVER_Error* stop_err = TRITONSERVER_ServerStop(server_ptr);
+
+  // If unable to gracefully stop the server then Triton threads and
+  // state are potentially in an invalid state, so just exit
+  // immediately.
+  if (stop_err != nullptr) {
+    LOG_TRITONSERVER_ERROR(stop_err, "failed to stop server");
+    exit(1);
+  }
+
+  // Stop gRPC and metrics endpoints that do not yet support exit timeout.
+  StopEndpoints();
+
+  // Stop tracing.
+  StopTracing(&trace_manager);
+
+#ifdef TRITON_ENABLE_ASAN
+  // Can invoke ASAN before exit though this is typically not very
+  // useful since there are many objects that are not yet destructed.
+  //  __lsan_do_leak_check();
+#endif  // TRITON_ENABLE_ASAN
+
+  return 0;
+}
diff --git a/src/memory_alloc.cc b/src/memory_alloc.cc
new file mode 100644
index 0000000000..64f61510e9
--- /dev/null
+++ b/src/memory_alloc.cc
@@ -0,0 +1,968 @@
+// Copyright 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <cuda_runtime_api.h>
+#include <rapidjson/document.h>
+#include <rapidjson/error/en.h>
+#include <unistd.h>
+
+#include <chrono>
+#include <future>
+#include <iostream>
+#include <string>
+#include <thread>
+#include <vector>
+
+#include "common.h"
+#include "triton/core/tritonserver.h"
+
+static_assert(
+    TRITON_MIN_COMPUTE_CAPABILITY >= 1.0,
+    "Invalid TRITON_MIN_COMPUTE_CAPABILITY specified");
+
+namespace ni = triton::server;
+
+namespace {
+
+struct IOSpec {
+  TRITONSERVER_MemoryType input_type_;
+  int64_t input_type_id_;
+
+  TRITONSERVER_MemoryType output_type_;
+  int64_t output_type_id_;
+};
+
+// Meta data used for preparing input data and validate output data
+IOSpec io_spec;
+
+static auto gpu_data_deleter = [](void* data) {
+  if (data != nullptr) {
+    FAIL_IF_CUDA_ERR(
+        cudaSetDevice(io_spec.input_type_id_),
+        "setting CUDA device to release GPU memory on " +
+            std::to_string(io_spec.input_type_id_));
+    FAIL_IF_CUDA_ERR(cudaFree(data), "releasing GPU memory");
+  }
+};
+
+void
+Usage(char** argv, const std::string& msg = std::string())
+{
+  if (!msg.empty()) {
+    std::cerr << msg << std::endl;
+  }
+
+  std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
+  std::cerr << "\t-i [input device ID]" << std::endl;
+  std::cerr << "\t-out [output device ID]" << std::endl;
+  std::cerr << "\t-v Enable verbose logging" << std::endl;
+  std::cerr << "\t-r [model repository absolute path]" << std::endl;
+  std::cerr << "\t-m [model name to be tested]" << std::endl;
+  std::cerr << "\t-h [host policy name]" << std::endl;
+  std::cerr << "\tFor '-h', if specify, the input will be set with different "
+            << "host policy names, given that the specified value is the "
+            << "host policy that the model under test is associated with."
+            << std::endl;
+  std::cerr << "\tFor device ID, -1 is used to stand for CPU device, "
+            << "non-negative value is for GPU device." << std::endl;
+
+  exit(1);
+}
+
+TRITONSERVER_Error*
+ResponseAlloc(
+    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
+    size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
+    int64_t preferred_memory_type_id, void* userp, void** buffer,
+    void** buffer_userp, TRITONSERVER_MemoryType* actual_memory_type,
+    int64_t* actual_memory_type_id)
+{
+  // If 'byte_size' is zero just return 'buffer'==nullptr, we don't
+  // need to do any other book-keeping.
+  if (byte_size == 0) {
+    *buffer = nullptr;
+    *buffer_userp = nullptr;
+    std::cout << "allocated " << byte_size << " bytes for result tensor "
+              << tensor_name << std::endl;
+  } else {
+    void* allocated_ptr = nullptr;
+    if (io_spec.output_type_ == TRITONSERVER_MEMORY_CPU) {
+      allocated_ptr = malloc(byte_size);
+    } else {
+      auto err = cudaSetDevice(io_spec.output_type_id_);
+      if (err == cudaSuccess) {
+        err = cudaMalloc(&allocated_ptr, byte_size);
+      }
+      if (err != cudaSuccess) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INTERNAL,
+            std::string(
+                "failed to allocate CUDA memory: " +
+                std::string(cudaGetErrorString(err)))
+                .c_str());
+      }
+    }
+
+    if (allocated_ptr == nullptr) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INTERNAL,
+          std::string(
+              "failed to allocate " + std::to_string(byte_size) + " bytes in " +
+              TRITONSERVER_MemoryTypeString(io_spec.output_type_) +
+              " for result tensor " + tensor_name)
+              .c_str());
+    }
+
+    // Pass the tensor name with buffer_userp so we can show it when
+    // releasing the buffer.
+    *buffer = allocated_ptr;
+    *buffer_userp = new std::string(tensor_name);
+    std::cout << "allocated " << byte_size << " bytes in "
+              << TRITONSERVER_MemoryTypeString(io_spec.output_type_)
+              << " for result tensor " << tensor_name << std::endl;
+  }
+
+  *actual_memory_type = io_spec.output_type_;
+  *actual_memory_type_id = io_spec.output_type_id_;
+  return nullptr;  // Success
+}
+
+TRITONSERVER_Error*
+ResponseRelease(
+    TRITONSERVER_ResponseAllocator* allocator, void* buffer, void* buffer_userp,
+    size_t byte_size, TRITONSERVER_MemoryType memory_type,
+    int64_t memory_type_id)
+{
+  std::unique_ptr<std::string> name;
+  if (buffer_userp != nullptr) {
+    name.reset(reinterpret_cast<std::string*>(buffer_userp));
+  } else {
+    name.reset(new std::string("<unknown>"));
+  }
+
+  std::cout << "Releasing buffer " << buffer << " of size " << byte_size
+            << " in " << TRITONSERVER_MemoryTypeString(memory_type)
+            << " for result '" << *name << "'" << std::endl;
+  if (memory_type == TRITONSERVER_MEMORY_CPU) {
+    free(buffer);
+  } else {
+    auto err = cudaSetDevice(memory_type_id);
+    if (err == cudaSuccess) {
+      err = cudaFree(buffer);
+    }
+    if (err != cudaSuccess) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INTERNAL, std::string(
+                                           "failed to release CUDA memory: " +
+                                           std::string(cudaGetErrorString(err)))
+                                           .c_str());
+    }
+  }
+
+  return nullptr;  // Success
+}
+
+void
+InferRequestComplete(
+    TRITONSERVER_InferenceRequest* request, const uint32_t flags, void* userp)
+{
+  if ((flags & TRITONSERVER_REQUEST_RELEASE_ALL) != 0) {
+    TRITONSERVER_InferenceRequestDelete(request);
+  }
+}
+
+void
+InferResponseComplete(
+    TRITONSERVER_InferenceResponse* response, const uint32_t flags, void* userp)
+{
+  if (response != nullptr) {
+    // Send 'response' to the future.
+    std::promise<TRITONSERVER_InferenceResponse*>* p =
+        reinterpret_cast<std::promise<TRITONSERVER_InferenceResponse*>*>(userp);
+    p->set_value(response);
+    delete p;
+  }
+}
+
+uint32_t
+OutputIndex(TRITONSERVER_InferenceResponse* response, const std::string& name)
+{
+  uint32_t output_count;
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceResponseOutputCount(response, &output_count),
+      "getting number of response outputs");
+
+  for (uint32_t idx = 0; idx < output_count; ++idx) {
+    const char* cname;
+    TRITONSERVER_DataType datatype;
+    const int64_t* shape;
+    uint64_t dim_count;
+    const void* base;
+    size_t byte_size;
+    TRITONSERVER_MemoryType memory_type;
+    int64_t memory_type_id;
+    void* userp;
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceResponseOutput(
+            response, idx, &cname, &datatype, &shape, &dim_count, &base,
+            &byte_size, &memory_type, &memory_type_id, &userp),
+        "getting output info");
+
+    if (name == std::string(cname)) {
+      return idx;
+    }
+  }
+
+  FAIL("can't found output '" + name + "'");
+  return 0;
+}
+
+TRITONSERVER_Error*
+ParseModelConfig(
+    const rapidjson::Document& model_metadata, TRITONSERVER_DataType* dtype,
+    bool* is_torch_model)
+{
+  *dtype = TRITONSERVER_TYPE_INVALID;
+  for (const auto& input : model_metadata["inputs"].GetArray()) {
+    if (strcmp(input["datatype"].GetString(), "INT32") &&
+        strcmp(input["datatype"].GetString(), "FP32") &&
+        strcmp(input["datatype"].GetString(), "BYTES")) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_UNSUPPORTED,
+          "IO test utility only supports model with data type INT32, "
+          "FP32 or BYTES");
+    }
+
+    if (*dtype == TRITONSERVER_TYPE_INVALID) {
+      *dtype = TRITONSERVER_StringToDataType(input["datatype"].GetString());
+    } else {
+      auto dt = TRITONSERVER_StringToDataType(input["datatype"].GetString());
+      if (dt != *dtype) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG,
+            "the model inputs must have the same data type");
+      }
+    }
+  }
+
+  for (const auto& output : model_metadata["outputs"].GetArray()) {
+    if (strcmp(output["datatype"].GetString(), "INT32") &&
+        strcmp(output["datatype"].GetString(), "FP32") &&
+        strcmp(output["datatype"].GetString(), "BYTES")) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_UNSUPPORTED,
+          "IO test utility only supports model with data type INT32, "
+          "FP32 or BYTES");
+    } else {
+      auto dt = TRITONSERVER_StringToDataType(output["datatype"].GetString());
+      if (dt != *dtype) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG,
+            "the model inputs and outputs must have the same data type");
+      }
+    }
+  }
+
+  *is_torch_model = (model_metadata["platform"] == "pytorch_libtorch");
+  return nullptr;
+}
+
+template <typename T>
+void
+GenerateInputData(
+    std::vector<char>* input0_data, std::vector<char>* input1_data)
+{
+  input0_data->resize(16 * sizeof(T));
+  input1_data->resize(16 * sizeof(T));
+  for (size_t i = 0; i < 16; ++i) {
+    ((T*)input0_data->data())[i] = i;
+    ((T*)input1_data->data())[i] = 1;
+  }
+}
+
+void
+GenerateStringInputData(
+    std::vector<char>* input0_data, std::vector<char>* input1_data)
+{
+  std::string input0_str = "";
+  std::string input1_str = "";
+  for (size_t i = 0; i < 16; ++i) {
+    std::string i0 = std::to_string(i + 1);
+    uint32_t i0_len = i0.size();
+    input0_str.append(reinterpret_cast<const char*>(&i0_len), sizeof(uint32_t));
+    input0_str.append(i0);
+    std::string i1 = std::to_string(1);
+    uint32_t i1_len = i1.size();
+    input1_str.append(reinterpret_cast<const char*>(&i1_len), sizeof(uint32_t));
+    input1_str.append(i1);
+  }
+
+  std::copy(
+      input0_str.begin(), input0_str.end(), std::back_inserter(*input0_data));
+  std::copy(
+      input1_str.begin(), input1_str.end(), std::back_inserter(*input1_data));
+}
+
+void
+GenerateStringOutputData(
+    std::vector<char>* output0_data, std::vector<char>* output1_data)
+{
+  std::string output0_str = "";
+  std::string output1_str = "";
+  for (size_t i = 0; i < 16; ++i) {
+    std::string o0 = std::to_string(i + 2);
+    uint32_t o0_len = o0.size();
+    output0_str.append(
+        reinterpret_cast<const char*>(&o0_len), sizeof(uint32_t));
+    output0_str.append(o0);
+    std::string o1 = std::to_string(i);
+    uint32_t o1_len = o1.size();
+    output1_str.append(
+        reinterpret_cast<const char*>(&o1_len), sizeof(uint32_t));
+    output1_str.append(o1);
+  }
+
+  std::copy(
+      output0_str.begin(), output0_str.end(),
+      std::back_inserter(*output0_data));
+  std::copy(
+      output1_str.begin(), output1_str.end(),
+      std::back_inserter(*output1_data));
+}
+
+template <typename T>
+void
+CompareResult(
+    const std::string& output0_name, const std::string& output1_name,
+    const void* input0, const void* input1, const void* output0,
+    const void* output1)
+{
+  for (size_t i = 0; i < 16; ++i) {
+    std::cout << ((T*)input0)[i] << " + " << ((T*)input1)[i] << " = "
+              << ((T*)output0)[i] << std::endl;
+    std::cout << ((T*)input0)[i] << " - " << ((T*)input1)[i] << " = "
+              << ((T*)output1)[i] << std::endl;
+
+    if ((((T*)input0)[i] + ((T*)input1)[i]) != ((T*)output0)[i]) {
+      FAIL("incorrect sum in " + output0_name);
+    }
+    if ((((T*)input0)[i] - ((T*)input1)[i]) != ((T*)output1)[i]) {
+      FAIL("incorrect difference in " + output1_name);
+    }
+  }
+}
+
+void
+CompareStringResult(
+    const std::string& output0_name, const std::string& output1_name,
+    const void* input0, const void* input1, const void* output0,
+    const void* output1)
+{
+  // preprocess results from serialized buffer to integers
+  std::vector<int> output0_numbers;
+  std::vector<int> output1_numbers;
+  size_t buf_offset0 = 0, buf_offset1 = 0;
+  const uint8_t* base0 = reinterpret_cast<const uint8_t*>(output0);
+  const uint8_t* base1 = reinterpret_cast<const uint8_t*>(output1);
+  for (size_t i = 0; i < 16; ++i) {
+    const uint32_t len0 =
+        *(reinterpret_cast<const uint32_t*>(base0 + buf_offset0));
+    std::string o0_tmp(
+        reinterpret_cast<const char*>(base0 + buf_offset0 + sizeof(len0)),
+        len0);
+    output0_numbers.push_back(std::atoi(o0_tmp.c_str()));
+    buf_offset0 += sizeof(len0) + len0;
+
+    const uint32_t len1 =
+        *(reinterpret_cast<const uint32_t*>(base1 + buf_offset1));
+    std::string o1_tmp(
+        reinterpret_cast<const char*>(base1 + buf_offset1 + sizeof(len1)),
+        len1);
+    output1_numbers.push_back(std::atoi(o1_tmp.c_str()));
+    buf_offset1 += sizeof(len1) + len1;
+  }
+
+  for (int i = 0; i < 16; ++i) {
+    std::cout << (i + 1) << " + " << 1 << " = " << output0_numbers[i]
+              << std::endl;
+    std::cout << (i + 1) << " - " << 1 << " = " << output1_numbers[i]
+              << std::endl;
+
+    if (((i + 1) + 1) != output0_numbers[i]) {
+      FAIL("incorrect sum in " + output0_name);
+    }
+    if (((i + 1) - 1) != output1_numbers[i]) {
+      FAIL("incorrect difference in " + output1_name);
+    }
+  }
+}
+
+}  // namespace
+
+int
+main(int argc, char** argv)
+{
+  std::string model_repository_path;
+  std::string model_name;
+  int verbose_level = 0;
+
+  io_spec.input_type_ = TRITONSERVER_MEMORY_CPU;
+  io_spec.input_type_id_ = 0;
+  io_spec.output_type_ = TRITONSERVER_MEMORY_CPU;
+  io_spec.output_type_id_ = 0;
+
+  const char* host_policy_cstr = nullptr;
+  std::string host_policy;
+
+  // Parse commandline...
+  int opt;
+  while ((opt = getopt(argc, argv, "vi:o:r:m:h:")) != -1) {
+    switch (opt) {
+      case 'i': {
+        int64_t raw_id = std::stoll(optarg);
+        if (raw_id < 0) {
+          io_spec.input_type_ = TRITONSERVER_MEMORY_CPU;
+          io_spec.input_type_id_ = 0;
+        } else {
+          io_spec.input_type_ = TRITONSERVER_MEMORY_GPU;
+          io_spec.input_type_id_ = raw_id;
+        }
+        break;
+      }
+      case 'o': {
+        int64_t raw_id = std::stoll(optarg);
+        if (raw_id < 0) {
+          io_spec.output_type_ = TRITONSERVER_MEMORY_CPU;
+          io_spec.output_type_id_ = 0;
+        } else {
+          io_spec.output_type_ = TRITONSERVER_MEMORY_GPU;
+          io_spec.output_type_id_ = raw_id;
+        }
+        break;
+      }
+      case 'h': {
+        host_policy = optarg;
+        host_policy_cstr = host_policy.c_str();
+        break;
+      }
+      case 'r':
+        model_repository_path = optarg;
+        break;
+      case 'm':
+        model_name = optarg;
+        break;
+      case 'v':
+        verbose_level = 1;
+        break;
+      case '?':
+        Usage(argv);
+        break;
+    }
+  }
+
+  if (model_repository_path.empty()) {
+    Usage(argv, "-r must be used to specify model repository path");
+  }
+  if (model_name.empty()) {
+    Usage(argv, "-m must be used to specify model being test");
+  }
+
+  // Create the server...
+  TRITONSERVER_ServerOptions* server_options = nullptr;
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsNew(&server_options),
+      "creating server options");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetModelRepositoryPath(
+          server_options, model_repository_path.c_str()),
+      "setting model repository path");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetModelControlMode(
+          server_options, TRITONSERVER_MODEL_CONTROL_EXPLICIT),
+      "setting model control mode");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetStartupModel(
+          server_options, model_name.c_str()),
+      "setting model to load");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetLogVerbose(server_options, verbose_level),
+      "setting verbose logging level");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetBackendDirectory(
+          server_options, "/opt/tritonserver/backends"),
+      "setting backend directory");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetRepoAgentDirectory(
+          server_options, "/opt/tritonserver/repoagents"),
+      "setting repository agent directory");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetStrictModelConfig(server_options, true),
+      "setting strict model configuration");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability(
+          server_options, TRITON_MIN_COMPUTE_CAPABILITY),
+      "setting minimum supported CUDA compute capability");
+
+  TRITONSERVER_Server* server_ptr = nullptr;
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerNew(&server_ptr, server_options), "creating server");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsDelete(server_options),
+      "deleting server options");
+
+  std::shared_ptr<TRITONSERVER_Server> server(
+      server_ptr, TRITONSERVER_ServerDelete);
+
+  // Wait until the server is both live and ready.
+  size_t health_iters = 0;
+  while (true) {
+    bool live, ready;
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerIsLive(server.get(), &live),
+        "unable to get server liveness");
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerIsReady(server.get(), &ready),
+        "unable to get server readiness");
+    std::cout << "Server Health: live " << live << ", ready " << ready
+              << std::endl;
+    if (live && ready) {
+      break;
+    }
+
+    if (++health_iters >= 10) {
+      FAIL("failed to find healthy inference server");
+    }
+
+    std::this_thread::sleep_for(std::chrono::milliseconds(500));
+  }
+
+  // Print status of the server.
+  {
+    TRITONSERVER_Message* server_metadata_message;
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerMetadata(server.get(), &server_metadata_message),
+        "unable to get server metadata message");
+    const char* buffer;
+    size_t byte_size;
+    FAIL_IF_ERR(
+        TRITONSERVER_MessageSerializeToJson(
+            server_metadata_message, &buffer, &byte_size),
+        "unable to serialize server metadata message");
+
+    std::cout << "Server Status:" << std::endl;
+    std::cout << std::string(buffer, byte_size) << std::endl;
+
+    FAIL_IF_ERR(
+        TRITONSERVER_MessageDelete(server_metadata_message),
+        "deleting status metadata");
+  }
+
+  // Wait for the model to become available.
+  bool is_torch_model = false;
+  TRITONSERVER_DataType dtype = TRITONSERVER_TYPE_INT32;
+  bool is_ready = false;
+  health_iters = 0;
+  while (!is_ready) {
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerModelIsReady(
+            server.get(), model_name.c_str(), 1, &is_ready),
+        "unable to get model readiness");
+    if (!is_ready) {
+      if (++health_iters >= 10) {
+        FAIL("model failed to be ready in 10 iterations");
+      }
+      std::this_thread::sleep_for(std::chrono::milliseconds(500));
+      continue;
+    }
+
+    TRITONSERVER_Message* model_metadata_message;
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerModelMetadata(
+            server.get(), model_name.c_str(), 1, &model_metadata_message),
+        "unable to get model metadata message");
+    const char* buffer;
+    size_t byte_size;
+    FAIL_IF_ERR(
+        TRITONSERVER_MessageSerializeToJson(
+            model_metadata_message, &buffer, &byte_size),
+        "unable to serialize model status protobuf");
+
+    rapidjson::Document model_metadata;
+    model_metadata.Parse(buffer, byte_size);
+    if (model_metadata.HasParseError()) {
+      FAIL(
+          "error: failed to parse model metadata from JSON: " +
+          std::string(GetParseError_En(model_metadata.GetParseError())) +
+          " at " + std::to_string(model_metadata.GetErrorOffset()));
+    }
+
+    FAIL_IF_ERR(
+        TRITONSERVER_MessageDelete(model_metadata_message),
+        "deleting status protobuf");
+
+    if (strcmp(model_metadata["name"].GetString(), model_name.c_str())) {
+      FAIL("unable to find metadata for model");
+    }
+
+    bool found_version = false;
+    if (model_metadata.HasMember("versions")) {
+      for (const auto& version : model_metadata["versions"].GetArray()) {
+        if (strcmp(version.GetString(), "1") == 0) {
+          found_version = true;
+          break;
+        }
+      }
+    }
+
+    if (!found_version) {
+      FAIL("unable to find version 1 status for model");
+    }
+
+    FAIL_IF_ERR(
+        ParseModelConfig(model_metadata, &dtype, &is_torch_model),
+        "parsing model metadata");
+  }
+
+  // Create the allocator that will be used to allocate buffers for
+  // the result tensors.
+  TRITONSERVER_ResponseAllocator* allocator = nullptr;
+  FAIL_IF_ERR(
+      TRITONSERVER_ResponseAllocatorNew(
+          &allocator, ResponseAlloc, ResponseRelease, nullptr /* start_fn */),
+      "creating response allocator");
+
+  TRITONSERVER_InferenceRequest* irequest = nullptr;
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestNew(
+          &irequest, server.get(), model_name.c_str(), -1 /* model_version */),
+      "creating inference request");
+
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestSetId(irequest, "123"),
+      "setting ID for the request");
+
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestSetReleaseCallback(
+          irequest, InferRequestComplete, nullptr /* request_release_userp */),
+      "setting request release callback");
+
+  // Create 0 data that shouldn't be selected and used to test host policy
+  // functionality
+  std::vector<uint32_t> zero_data(16);
+
+  // Create the data for the two input tensors. Initialize the first
+  // to unique integers and the second to all ones.
+  std::vector<char> input0_data;
+  std::vector<char> input1_data;
+  if (dtype == TRITONSERVER_TYPE_INT32) {
+    GenerateInputData<int32_t>(&input0_data, &input1_data);
+  } else if (dtype == TRITONSERVER_TYPE_FP32) {
+    GenerateInputData<float>(&input0_data, &input1_data);
+  } else {
+    GenerateStringInputData(&input0_data, &input1_data);
+  }
+
+  auto input0 = "INPUT0";
+  auto input1 = "INPUT1";
+
+  // Get the size of the input tensors
+  size_t input0_size = input0_data.size();
+  size_t input1_size = input1_data.size();
+
+  std::vector<int64_t> input0_shape({1, 16});
+  std::vector<int64_t> input1_shape({1, 16});
+
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestAddInput(
+          irequest, input0, dtype, &input0_shape[0], input0_shape.size()),
+      "setting input 0 meta-data for the request");
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestAddInput(
+          irequest, input1, dtype, &input1_shape[0], input1_shape.size()),
+      "setting input 1 meta-data for the request");
+
+  auto output0 = is_torch_model ? "OUTPUT__0" : "OUTPUT0";
+  auto output1 = is_torch_model ? "OUTPUT__1" : "OUTPUT1";
+
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output0),
+      "requesting output 0 for the request");
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output1),
+      "requesting output 1 for the request");
+
+  const void* input0_base = &input0_data[0];
+  const void* input1_base = &input1_data[0];
+  bool gpu_input = (io_spec.input_type_ == TRITONSERVER_MEMORY_GPU);
+  std::unique_ptr<void, decltype(gpu_data_deleter)> input0_gpu(
+      nullptr, gpu_data_deleter);
+  std::unique_ptr<void, decltype(gpu_data_deleter)> input1_gpu(
+      nullptr, gpu_data_deleter);
+  if (gpu_input) {
+    FAIL_IF_CUDA_ERR(
+        cudaSetDevice(io_spec.input_type_id_),
+        "setting CUDA device to device " +
+            std::to_string(io_spec.input_type_id_));
+    void* dst;
+    FAIL_IF_CUDA_ERR(
+        cudaMalloc(&dst, input0_size), "allocating GPU memory for INPUT0 data");
+    input0_gpu.reset(dst);
+    FAIL_IF_CUDA_ERR(
+        cudaMemcpy(dst, &input0_data[0], input0_size, cudaMemcpyHostToDevice),
+        "setting INPUT0 data in GPU memory");
+    FAIL_IF_CUDA_ERR(
+        cudaMalloc(&dst, input1_size), "allocating GPU memory for INPUT1 data");
+    input1_gpu.reset(dst);
+    FAIL_IF_CUDA_ERR(
+        cudaMemcpy(dst, &input1_data[0], input1_size, cudaMemcpyHostToDevice),
+        "setting INPUT1 data in GPU memory");
+  }
+
+  input0_base = gpu_input ? input0_gpu.get() : &input0_data[0];
+  input1_base = gpu_input ? input1_gpu.get() : &input1_data[0];
+
+
+  if (host_policy_cstr == nullptr) {
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestAppendInputData(
+            irequest, input0, input0_base, input0_size, io_spec.input_type_,
+            io_spec.input_type_id_),
+        "assigning INPUT0 data");
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestAppendInputData(
+            irequest, input1, input1_base, input1_size, io_spec.input_type_,
+            io_spec.input_type_id_),
+        "assigning INPUT1 data");
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy(
+            irequest, input0, zero_data.data(),
+            zero_data.size() * sizeof(uint32_t), TRITONSERVER_MEMORY_CPU, 0,
+            "fake_host_policy_name"),
+        "assigning zero INPUT0 data with host policy 'fake_host_policy_name'");
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy(
+            irequest, input1, zero_data.data(),
+            zero_data.size() * sizeof(uint32_t), TRITONSERVER_MEMORY_CPU, 0,
+            "fake_host_policy_name"),
+        "assigning zero INPUT1 data with host policy 'fake_host_policy_name'");
+  } else {
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestAppendInputData(
+            irequest, input0, zero_data.data(),
+            zero_data.size() * sizeof(uint32_t), TRITONSERVER_MEMORY_CPU, 0),
+        "assigning zero INPUT0 data");
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestAppendInputData(
+            irequest, input1, zero_data.data(),
+            zero_data.size() * sizeof(uint32_t), TRITONSERVER_MEMORY_CPU, 0),
+        "assigning zero INPUT1 data");
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy(
+            irequest, input0, input0_base, input0_size, io_spec.input_type_,
+            io_spec.input_type_id_, host_policy_cstr),
+        "assigning INPUT0 data to provided host policy");
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy(
+            irequest, input1, input1_base, input1_size, io_spec.input_type_,
+            io_spec.input_type_id_, host_policy_cstr),
+        "assigning INPUT1 data to provided host policy");
+  }
+
+  // Perform inference...
+  auto p = new std::promise<TRITONSERVER_InferenceResponse*>();
+  std::future<TRITONSERVER_InferenceResponse*> completed = p->get_future();
+
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestSetResponseCallback(
+          irequest, allocator, nullptr /* response_allocator_userp */,
+          InferResponseComplete, reinterpret_cast<void*>(p)),
+      "setting response callback");
+
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerInferAsync(
+          server.get(), irequest, nullptr /* trace */),
+      "running inference");
+
+  // Wait for the inference response and check the status.
+  TRITONSERVER_InferenceResponse* response = completed.get();
+  FAIL_IF_ERR(TRITONSERVER_InferenceResponseError(response), "response status");
+
+  // Create the expected data for the two output tensors.
+  std::vector<char> expected0_data;
+  std::vector<char> expected1_data;
+  if (dtype == TRITONSERVER_TYPE_BYTES) {
+    GenerateStringOutputData(&expected0_data, &expected1_data);
+  }
+
+  // Check the output tensor values...
+  // Note that depending on whether the backend supports outputs in GPU memory,
+  // the output tensor may be in CPU memory even if -g flag is set.
+
+  const void* output0_content;
+  size_t output0_byte_size;
+  TRITONSERVER_MemoryType output0_memory_type;
+  int64_t output0_memory_type_id;
+  {
+    const char* cname;
+    TRITONSERVER_DataType datatype;
+    const int64_t* shape;
+    uint64_t dim_count;
+    void* userp;
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceResponseOutput(
+            response, OutputIndex(response, output0), &cname, &datatype, &shape,
+            &dim_count, &output0_content, &output0_byte_size,
+            &output0_memory_type, &output0_memory_type_id, &userp),
+        "getting output0 info");
+
+    if (dtype == TRITONSERVER_TYPE_BYTES) {
+      size_t expected0_size = expected0_data.size();
+      if (expected0_size != output0_byte_size) {
+        FAIL(
+            "unexpected output0 byte-size, expected " +
+            std::to_string(expected0_size) + ", got " +
+            std::to_string(output0_byte_size));
+      }
+    } else if (output0_byte_size != input0_size) {
+      FAIL(
+          "unexpected output0 byte-size, expected " +
+          std::to_string(input0_size) + ", got " +
+          std::to_string(output0_byte_size));
+    } else if (
+        (io_spec.output_type_ != output0_memory_type) ||
+        (io_spec.output_type_id_ != output0_memory_type_id)) {
+      FAIL(
+          std::string("unexpected output0 memory type (id), expected to be "
+                      "allocated in ") +
+          TRITONSERVER_MemoryTypeString(io_spec.output_type_) + " with id " +
+          std::to_string(io_spec.output_type_id_) + ", got " +
+          TRITONSERVER_MemoryTypeString(output0_memory_type) + " with id " +
+          std::to_string(output0_memory_type_id));
+    }
+  }
+
+  const void* output1_content;
+  size_t output1_byte_size;
+  TRITONSERVER_MemoryType output1_memory_type;
+  int64_t output1_memory_type_id;
+  {
+    const char* cname;
+    TRITONSERVER_DataType datatype;
+    const int64_t* shape;
+    uint64_t dim_count;
+    void* userp;
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceResponseOutput(
+            response, OutputIndex(response, output1), &cname, &datatype, &shape,
+            &dim_count, &output1_content, &output1_byte_size,
+            &output1_memory_type, &output1_memory_type_id, &userp),
+        "getting output1 info");
+
+    if (dtype == TRITONSERVER_TYPE_BYTES) {
+      size_t expected1_size = expected1_data.size();
+      if (expected1_size != output1_byte_size) {
+        FAIL(
+            "unexpected output1 byte-size, expected " +
+            std::to_string(expected1_size) + ", got " +
+            std::to_string(output1_byte_size));
+      }
+    } else if (output1_byte_size != input1_size) {
+      FAIL(
+          "unexpected output1 byte-size, expected " +
+          std::to_string(input1_size) + ", got " +
+          std::to_string(output1_byte_size));
+    } else if (
+        (io_spec.output_type_ != output1_memory_type) ||
+        (io_spec.output_type_id_ != output1_memory_type_id)) {
+      FAIL(
+          std::string("unexpected output1 memory type (id), expected to be "
+                      "allocated in ") +
+          TRITONSERVER_MemoryTypeString(io_spec.output_type_) + " with id " +
+          std::to_string(io_spec.output_type_id_) + ", got " +
+          TRITONSERVER_MemoryTypeString(output1_memory_type) + " with id " +
+          std::to_string(output1_memory_type_id));
+    }
+  }
+
+  const void* output0_result = output0_content;
+  const void* output1_result = output1_content;
+
+  // Different from CPU memory, outputs in GPU memory must be copied to CPU
+  // memory to be read directly.
+  std::vector<char> output0_data(output0_byte_size);
+  std::vector<char> output1_data(output1_byte_size);
+  if (output0_memory_type == TRITONSERVER_MEMORY_CPU) {
+    std::cout << "OUTPUT0 are stored in CPU memory" << std::endl;
+  } else {
+    std::cout << "OUTPUT0 are stored in GPU memory" << std::endl;
+    FAIL_IF_CUDA_ERR(
+        cudaMemcpy(
+            &output0_data[0], output0_content, output0_byte_size,
+            cudaMemcpyDeviceToHost),
+        "setting INPUT0 data in GPU memory");
+    output0_result = &output0_data[0];
+  }
+
+  if (output1_memory_type == TRITONSERVER_MEMORY_CPU) {
+    std::cout << "OUTPUT1 are stored in CPU memory" << std::endl;
+  } else {
+    std::cout << "OUTPUT1 are stored in GPU memory" << std::endl;
+    FAIL_IF_CUDA_ERR(
+        cudaMemcpy(
+            &output1_data[0], output1_content, output1_byte_size,
+            cudaMemcpyDeviceToHost),
+        "setting INPUT0 data in GPU memory");
+    output1_result = &output1_data[0];
+  }
+
+  if (dtype == TRITONSERVER_TYPE_INT32) {
+    CompareResult<int32_t>(
+        output0, output1, &input0_data[0], &input1_data[0], output0_result,
+        output1_result);
+  } else if (dtype == TRITONSERVER_TYPE_FP32) {
+    CompareResult<float>(
+        output0, output1, &input0_data[0], &input1_data[0], output0_result,
+        output1_result);
+  } else {
+    CompareStringResult(
+        output0, output1, &input0_data[0], &input1_data[0], output0_result,
+        output1_result);
+  }
+
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceResponseDelete(response),
+      "deleting inference response");
+
+  FAIL_IF_ERR(
+      TRITONSERVER_ResponseAllocatorDelete(allocator),
+      "deleting response allocator");
+
+  return 0;
+}
diff --git a/src/multi_server.cc b/src/multi_server.cc
new file mode 100644
index 0000000000..d575931b58
--- /dev/null
+++ b/src/multi_server.cc
@@ -0,0 +1,1001 @@
+// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <rapidjson/document.h>
+#include <rapidjson/error/en.h>
+#include <unistd.h>
+
+#include <chrono>
+#include <cstring>
+#include <future>
+#include <iostream>
+#include <string>
+#include <thread>
+#include <unordered_map>
+#include <vector>
+
+#include "common.h"
+#include "triton/core/tritonserver.h"
+
+#ifdef TRITON_ENABLE_GPU
+#include <cuda_runtime_api.h>
+#endif  // TRITON_ENABLE_GPU
+
+namespace ni = triton::server;
+
+namespace {
+
+bool enforce_memory_type = false;
+TRITONSERVER_MemoryType requested_memory_type;
+
+#ifdef TRITON_ENABLE_GPU
+static auto cuda_data_deleter = [](void* data) {
+  if (data != nullptr) {
+    cudaPointerAttributes attr;
+    auto cuerr = cudaPointerGetAttributes(&attr, data);
+    if (cuerr != cudaSuccess) {
+      std::cerr << "error: failed to get CUDA pointer attribute of " << data
+                << ": " << cudaGetErrorString(cuerr) << std::endl;
+    }
+    if (attr.type == cudaMemoryTypeDevice) {
+      cuerr = cudaFree(data);
+    } else if (attr.type == cudaMemoryTypeHost) {
+      cuerr = cudaFreeHost(data);
+    }
+    if (cuerr != cudaSuccess) {
+      std::cerr << "error: failed to release CUDA pointer " << data << ": "
+                << cudaGetErrorString(cuerr) << std::endl;
+    }
+  }
+};
+#endif  // TRITON_ENABLE_GPU
+
+void
+Usage(char** argv, const std::string& msg = std::string())
+{
+  if (!msg.empty()) {
+    std::cerr << msg << std::endl;
+  }
+
+  std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
+  std::cerr << "\t-m <\"system\"|\"pinned\"|gpu>"
+            << " Enforce the memory type for input and output tensors."
+            << " If not specified, inputs will be in system memory and outputs"
+            << " will be based on the model's preferred type." << std::endl;
+  std::cerr << "\t-v Enable verbose logging" << std::endl;
+  std::cerr << "\t-r [model repository absolute path]" << std::endl;
+  std::cerr << "\t-t Thread count." << std::endl;
+  std::cerr << "\t-l Number of loops to run in each thread." << std::endl;
+
+  exit(1);
+}
+
+TRITONSERVER_Error*
+ResponseAlloc(
+    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
+    size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
+    int64_t preferred_memory_type_id, void* userp, void** buffer,
+    void** buffer_userp, TRITONSERVER_MemoryType* actual_memory_type,
+    int64_t* actual_memory_type_id)
+{
+  // Initially attempt to make the actual memory type and id that we
+  // allocate be the same as preferred memory type
+  *actual_memory_type = preferred_memory_type;
+  *actual_memory_type_id = preferred_memory_type_id;
+
+  // If 'byte_size' is zero just return 'buffer' == nullptr, we don't
+  // need to do any other book-keeping.
+  if (byte_size == 0) {
+    *buffer = nullptr;
+    *buffer_userp = nullptr;
+    std::cout << "allocated " << byte_size << " bytes for result tensor "
+              << tensor_name << std::endl;
+  } else {
+    void* allocated_ptr = nullptr;
+    if (enforce_memory_type) {
+      *actual_memory_type = requested_memory_type;
+    }
+
+    switch (*actual_memory_type) {
+#ifdef TRITON_ENABLE_GPU
+      case TRITONSERVER_MEMORY_CPU_PINNED: {
+        auto err = cudaSetDevice(*actual_memory_type_id);
+        if ((err != cudaSuccess) && (err != cudaErrorNoDevice) &&
+            (err != cudaErrorInsufficientDriver)) {
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INTERNAL,
+              std::string(
+                  "unable to recover current CUDA device: " +
+                  std::string(cudaGetErrorString(err)))
+                  .c_str());
+        }
+
+        err = cudaHostAlloc(&allocated_ptr, byte_size, cudaHostAllocPortable);
+        if (err != cudaSuccess) {
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INTERNAL,
+              std::string(
+                  "cudaHostAlloc failed: " +
+                  std::string(cudaGetErrorString(err)))
+                  .c_str());
+        }
+        break;
+      }
+
+      case TRITONSERVER_MEMORY_GPU: {
+        auto err = cudaSetDevice(*actual_memory_type_id);
+        if ((err != cudaSuccess) && (err != cudaErrorNoDevice) &&
+            (err != cudaErrorInsufficientDriver)) {
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INTERNAL,
+              std::string(
+                  "unable to recover current CUDA device: " +
+                  std::string(cudaGetErrorString(err)))
+                  .c_str());
+        }
+
+        err = cudaMalloc(&allocated_ptr, byte_size);
+        if (err != cudaSuccess) {
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INTERNAL,
+              std::string(
+                  "cudaMalloc failed: " + std::string(cudaGetErrorString(err)))
+                  .c_str());
+        }
+        break;
+      }
+#endif  // TRITON_ENABLE_GPU
+
+      // Use CPU memory if the requested memory type is unknown
+      // (default case).
+      case TRITONSERVER_MEMORY_CPU:
+      default: {
+        *actual_memory_type = TRITONSERVER_MEMORY_CPU;
+        allocated_ptr = malloc(byte_size);
+        break;
+      }
+    }
+
+    // Pass the tensor name with buffer_userp so we can show it when
+    // releasing the buffer.
+    if (allocated_ptr != nullptr) {
+      *buffer = allocated_ptr;
+      *buffer_userp = new std::string(tensor_name);
+      std::cout << "allocated " << byte_size << " bytes in "
+                << TRITONSERVER_MemoryTypeString(*actual_memory_type)
+                << " for result tensor " << tensor_name << std::endl;
+    }
+  }
+
+  return nullptr;  // Success
+}
+
+TRITONSERVER_Error*
+ResponseRelease(
+    TRITONSERVER_ResponseAllocator* allocator, void* buffer, void* buffer_userp,
+    size_t byte_size, TRITONSERVER_MemoryType memory_type,
+    int64_t memory_type_id)
+{
+  std::string* name = nullptr;
+  if (buffer_userp != nullptr) {
+    name = reinterpret_cast<std::string*>(buffer_userp);
+  } else {
+    name = new std::string("<unknown>");
+  }
+
+  std::cout << "Releasing buffer " << buffer << " of size " << byte_size
+            << " in " << TRITONSERVER_MemoryTypeString(memory_type)
+            << " for result '" << *name << "'" << std::endl;
+  switch (memory_type) {
+    case TRITONSERVER_MEMORY_CPU:
+      free(buffer);
+      break;
+#ifdef TRITON_ENABLE_GPU
+    case TRITONSERVER_MEMORY_CPU_PINNED: {
+      auto err = cudaSetDevice(memory_type_id);
+      if (err == cudaSuccess) {
+        err = cudaFreeHost(buffer);
+      }
+      if (err != cudaSuccess) {
+        std::cerr << "error: failed to cudaFree " << buffer << ": "
+                  << cudaGetErrorString(err) << std::endl;
+      }
+      break;
+    }
+    case TRITONSERVER_MEMORY_GPU: {
+      auto err = cudaSetDevice(memory_type_id);
+      if (err == cudaSuccess) {
+        err = cudaFree(buffer);
+      }
+      if (err != cudaSuccess) {
+        std::cerr << "error: failed to cudaFree " << buffer << ": "
+                  << cudaGetErrorString(err) << std::endl;
+      }
+      break;
+    }
+#endif  // TRITON_ENABLE_GPU
+    default:
+      std::cerr << "error: unexpected buffer allocated in CUDA managed memory"
+                << std::endl;
+      break;
+  }
+
+  delete name;
+
+  return nullptr;  // Success
+}
+
+void
+InferRequestComplete(
+    TRITONSERVER_InferenceRequest* request, const uint32_t flags, void* userp)
+{
+  // We reuse the request so we don't delete it here.
+}
+
+void
+InferResponseComplete(
+    TRITONSERVER_InferenceResponse* response, const uint32_t flags, void* userp)
+{
+  if (response != nullptr) {
+    // Send 'response' to the future.
+    std::promise<TRITONSERVER_InferenceResponse*>* p =
+        reinterpret_cast<std::promise<TRITONSERVER_InferenceResponse*>*>(userp);
+    p->set_value(response);
+    delete p;
+  }
+}
+
+TRITONSERVER_Error*
+ParseModelMetadata(const rapidjson::Document& model_metadata)
+{
+  std::string seen_data_type;
+  for (const auto& input : model_metadata["inputs"].GetArray()) {
+    if (strcmp(input["datatype"].GetString(), "FP32")) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_UNSUPPORTED,
+          "multi-server example only supports model with data type FP32");
+    }
+    if (seen_data_type.empty()) {
+      seen_data_type = input["datatype"].GetString();
+    } else if (strcmp(seen_data_type.c_str(), input["datatype"].GetString())) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG,
+          "the inputs and outputs of model must have the data type");
+    }
+  }
+  for (const auto& output : model_metadata["outputs"].GetArray()) {
+    if (strcmp(output["datatype"].GetString(), "FP32")) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_UNSUPPORTED,
+          "multi-server example only supports model with data type FP32");
+    } else if (strcmp(seen_data_type.c_str(), output["datatype"].GetString())) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG,
+          "the inputs and outputs of model must have the data type");
+    }
+  }
+
+  return nullptr;
+}
+
+void
+GenerateInputData(
+    std::vector<float>* input0_data, std::vector<float>* input1_data)
+{
+  input0_data->resize(16);
+  input1_data->resize(16);
+  for (size_t i = 0; i < 16; ++i) {
+    input0_data->data()[i] = i;
+    input1_data->data()[i] = 1;
+  }
+}
+
+void
+CompareResult(
+    const std::string& output0_name, const std::string& output1_name,
+    const float* input0, const float* input1, const float* output0,
+    const float* output1)
+{
+  for (size_t i = 0; i < 16; ++i) {
+    std::cout << input0[i] << " + " << input1[i] << " = " << output0[i]
+              << std::endl;
+    std::cout << input0[i] << " - " << input1[i] << " = " << output1[i]
+              << std::endl;
+
+    if ((input0[i] + input1[i]) != output0[i]) {
+      FAIL("incorrect sum in " + output0_name);
+    }
+    if ((input0[i] - input1[i]) != output1[i]) {
+      FAIL("incorrect difference in " + output1_name);
+    }
+  }
+}
+
+void
+Check(
+    TRITONSERVER_InferenceResponse* response,
+    const std::vector<float>& input0_data,
+    const std::vector<float>& input1_data, const std::string& output0,
+    const std::string& output1, const size_t expected_byte_size,
+    const TRITONSERVER_DataType expected_datatype)
+{
+  std::unordered_map<std::string, std::vector<float>> output_data;
+
+  uint32_t output_count;
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceResponseOutputCount(response, &output_count),
+      "getting number of response outputs");
+  if (output_count != 2) {
+    FAIL("expecting 2 response outputs, got " + std::to_string(output_count));
+  }
+
+  for (uint32_t idx = 0; idx < output_count; ++idx) {
+    const char* cname;
+    TRITONSERVER_DataType datatype;
+    const int64_t* shape;
+    uint64_t dim_count;
+    const void* base;
+    size_t byte_size;
+    TRITONSERVER_MemoryType memory_type;
+    int64_t memory_type_id;
+    void* userp;
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceResponseOutput(
+            response, idx, &cname, &datatype, &shape, &dim_count, &base,
+            &byte_size, &memory_type, &memory_type_id, &userp),
+        "getting output info");
+
+    if (cname == nullptr) {
+      FAIL("unable to get output name");
+    }
+
+    std::string name(cname);
+    if ((name != output0) && (name != output1)) {
+      FAIL("unexpected output '" + name + "'");
+    }
+
+    if ((dim_count != 2) || (shape[0] != 1) || (shape[1] != 16)) {
+      FAIL("unexpected shape for '" + name + "'");
+    }
+
+    if (datatype != expected_datatype) {
+      FAIL(
+          "unexpected datatype '" +
+          std::string(TRITONSERVER_DataTypeString(datatype)) + "' for '" +
+          name + "'");
+    }
+
+    if (byte_size != expected_byte_size) {
+      FAIL(
+          "unexpected byte-size, expected " +
+          std::to_string(expected_byte_size) + ", got " +
+          std::to_string(byte_size) + " for " + name);
+    }
+
+    if (enforce_memory_type && (memory_type != requested_memory_type)) {
+      FAIL(
+          "unexpected memory type, expected to be allocated in " +
+          std::string(TRITONSERVER_MemoryTypeString(requested_memory_type)) +
+          ", got " + std::string(TRITONSERVER_MemoryTypeString(memory_type)) +
+          ", id " + std::to_string(memory_type_id) + " for " + name);
+    }
+
+    // We make a copy of the data here... which we could avoid for
+    // performance reasons but ok for this example.
+    std::vector<float>& odata = output_data[name];
+    switch (memory_type) {
+      case TRITONSERVER_MEMORY_CPU: {
+        std::cout << name << " is stored in system memory" << std::endl;
+        const float* cbase = reinterpret_cast<const float*>(base);
+        odata.assign(cbase, cbase + (byte_size / sizeof(float)));
+        break;
+      }
+
+      case TRITONSERVER_MEMORY_CPU_PINNED: {
+        std::cout << name << " is stored in pinned memory" << std::endl;
+        const float* cbase = reinterpret_cast<const float*>(base);
+        odata.assign(cbase, cbase + (byte_size / sizeof(float)));
+        break;
+      }
+
+#ifdef TRITON_ENABLE_GPU
+      case TRITONSERVER_MEMORY_GPU: {
+        std::cout << name << " is stored in GPU memory" << std::endl;
+        odata.reserve(byte_size);
+        FAIL_IF_CUDA_ERR(
+            cudaMemcpy(&odata[0], base, byte_size, cudaMemcpyDeviceToHost),
+            "getting " + name + " data from GPU memory");
+        break;
+      }
+#endif
+
+      default:
+        FAIL("unexpected memory type");
+    }
+  }
+
+  CompareResult(
+      output0, output1, &input0_data[0], &input1_data[0],
+      output_data[output0].data(), output_data[output1].data());
+}
+
+}  // namespace
+
+void
+SetServerOptions(
+    TRITONSERVER_ServerOptions** server_options, bool verbose_level,
+    std::string model_repository_path1, std::string model_repository_path2)
+{
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsNew(server_options), "creating server options");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetModelRepositoryPath(
+          *server_options, model_repository_path1.c_str()),
+      "setting model repository path");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetModelRepositoryPath(
+          *server_options, model_repository_path2.c_str()),
+      "setting model repository path");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetLogVerbose(*server_options, verbose_level),
+      "setting verbose logging level");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetMetrics(*server_options, true),
+      "failed to enable metrics");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetStrictReadiness(*server_options, true),
+      "failed to set strict readiness");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetStrictModelConfig(*server_options, true),
+      "failed to set strict model config");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetModelControlMode(
+          *server_options, TRITONSERVER_MODEL_CONTROL_EXPLICIT),
+      "failed to set model control mode to explicit");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetBackendDirectory(
+          *server_options, "/opt/tritonserver/backends"),
+      "setting backend directory");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetRepoAgentDirectory(
+          *server_options, "/opt/tritonserver/repoagents"),
+      "setting repository agent directory");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetStrictModelConfig(*server_options, true),
+      "setting strict model configuration");
+#ifdef TRITON_ENABLE_GPU
+  double min_compute_capability = TRITON_MIN_COMPUTE_CAPABILITY;
+#else
+  double min_compute_capability = 0;
+#endif  // TRITON_ENABLE_GPU
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability(
+          *server_options, min_compute_capability),
+      "setting minimum supported CUDA compute capability");
+}
+
+void
+CheckServerLiveAndReady(std::shared_ptr<TRITONSERVER_Server> server)
+{
+  size_t wait_seconds = 0;
+  while (true) {
+    bool live, ready;
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerIsLive(server.get(), &live),
+        "unable to get server liveness");
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerIsReady(server.get(), &ready),
+        "unable to get server readiness");
+    std::cout << "Server Health: live " << live << ", ready " << ready
+              << std::endl;
+    if (live && ready) {
+      break;
+    }
+
+    if (++wait_seconds >= 10) {
+      FAIL("failed to find healthy inference server");
+    }
+
+    std::this_thread::sleep_for(std::chrono::milliseconds(1000));
+  }
+}
+
+void
+PrintServerStatus(std::shared_ptr<TRITONSERVER_Server> server)
+{
+  TRITONSERVER_Message* server_metadata_message;
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerMetadata(server.get(), &server_metadata_message),
+      "unable to get server metadata message");
+  const char* buffer;
+  size_t byte_size;
+  FAIL_IF_ERR(
+      TRITONSERVER_MessageSerializeToJson(
+          server_metadata_message, &buffer, &byte_size),
+      "unable to serialize server metadata message");
+
+  std::cout << "Server Status:" << std::endl;
+  std::cout << std::string(buffer, byte_size) << std::endl;
+
+  FAIL_IF_ERR(
+      TRITONSERVER_MessageDelete(server_metadata_message),
+      "deleting status metadata");
+}
+
+void
+AwaitModelReady(
+    std::shared_ptr<TRITONSERVER_Server> server, const std::string model_name)
+{
+  bool is_ready = false;
+  size_t wait_seconds = 0;
+  while (!is_ready) {
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerModelIsReady(
+            server.get(), model_name.c_str(), 1, &is_ready),
+        "unable to get model readiness");
+    if (!is_ready) {
+      if (++wait_seconds >= 5) {
+        FAIL("model failed to be ready in 5 seconds");
+      }
+      std::this_thread::sleep_for(std::chrono::milliseconds(1000));
+      continue;
+    }
+
+    TRITONSERVER_Message* model_metadata_message;
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerModelMetadata(
+            server.get(), model_name.c_str(), 1, &model_metadata_message),
+        "unable to get model metadata message");
+    const char* buffer;
+    size_t byte_size;
+    FAIL_IF_ERR(
+        TRITONSERVER_MessageSerializeToJson(
+            model_metadata_message, &buffer, &byte_size),
+        "unable to serialize model status protobuf");
+
+    rapidjson::Document model_metadata;
+    model_metadata.Parse(buffer, byte_size);
+    if (model_metadata.HasParseError()) {
+      FAIL(
+          "error: failed to parse model metadata from JSON: " +
+          std::string(GetParseError_En(model_metadata.GetParseError())) +
+          " at " + std::to_string(model_metadata.GetErrorOffset()));
+    }
+
+    FAIL_IF_ERR(
+        TRITONSERVER_MessageDelete(model_metadata_message),
+        "deleting status protobuf");
+
+    if (strcmp(model_metadata["name"].GetString(), model_name.c_str())) {
+      FAIL("unable to find metadata for model");
+    }
+
+    bool found_version = false;
+    if (model_metadata.HasMember("versions")) {
+      for (const auto& version : model_metadata["versions"].GetArray()) {
+        if (strcmp(version.GetString(), "1") == 0) {
+          found_version = true;
+          break;
+        }
+      }
+    }
+    if (!found_version) {
+      FAIL("unable to find version 1 status for model");
+    }
+
+    FAIL_IF_ERR(ParseModelMetadata(model_metadata), "parsing model metadata");
+  }
+}
+
+void
+RunInferenceAndValidate(
+    std::shared_ptr<TRITONSERVER_Server> server,
+    TRITONSERVER_ResponseAllocator* allocator, const std::string model_name)
+{
+  TRITONSERVER_InferenceRequest* irequest = nullptr;
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestNew(
+          &irequest, server.get(), model_name.c_str(), -1 /* model_version */),
+      "creating inference request");
+
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestSetId(irequest, "my_request_id"),
+      "setting ID for the request");
+
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestSetReleaseCallback(
+          irequest, InferRequestComplete, nullptr /* request_release_userp */),
+      "setting request release callback");
+
+  // Inputs
+  auto input0 = "INPUT0";
+  auto input1 = "INPUT1";
+
+  std::vector<int64_t> input0_shape({1, 16});
+  std::vector<int64_t> input1_shape({1, 16});
+
+  const TRITONSERVER_DataType datatype = TRITONSERVER_TYPE_FP32;
+
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestAddInput(
+          irequest, input0, datatype, &input0_shape[0], input0_shape.size()),
+      "setting input 0 meta-data for the request");
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestAddInput(
+          irequest, input1, datatype, &input1_shape[0], input1_shape.size()),
+      "setting input 1 meta-data for the request");
+
+  auto output0 = "OUTPUT0";
+  auto output1 = "OUTPUT1";
+
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output0),
+      "requesting output 0 for the request");
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output1),
+      "requesting output 1 for the request");
+
+  // Create the data for the two input tensors. Initialize the first
+  // to unique values and the second to all ones.
+  std::vector<float> input0_data;
+  std::vector<float> input1_data;
+  GenerateInputData(&input0_data, &input1_data);
+
+  size_t input0_size = input0_data.size() * 4;
+  size_t input1_size = input1_data.size() * 4;
+
+  const void* input0_base = &input0_data[0];
+  const void* input1_base = &input1_data[0];
+#ifdef TRITON_ENABLE_GPU
+  std::unique_ptr<void, decltype(cuda_data_deleter)> input0_gpu(
+      nullptr, cuda_data_deleter);
+  std::unique_ptr<void, decltype(cuda_data_deleter)> input1_gpu(
+      nullptr, cuda_data_deleter);
+  bool use_cuda_memory =
+      (enforce_memory_type &&
+       (requested_memory_type != TRITONSERVER_MEMORY_CPU));
+  if (use_cuda_memory) {
+    FAIL_IF_CUDA_ERR(cudaSetDevice(0), "setting CUDA device to device 0");
+    if (requested_memory_type != TRITONSERVER_MEMORY_CPU_PINNED) {
+      void* dst;
+      FAIL_IF_CUDA_ERR(
+          cudaMalloc(&dst, input0_size),
+          "allocating GPU memory for INPUT0 data");
+      input0_gpu.reset(dst);
+      FAIL_IF_CUDA_ERR(
+          cudaMemcpy(dst, &input0_data[0], input0_size, cudaMemcpyHostToDevice),
+          "setting INPUT0 data in GPU memory");
+      FAIL_IF_CUDA_ERR(
+          cudaMalloc(&dst, input1_size),
+          "allocating GPU memory for INPUT1 data");
+      input1_gpu.reset(dst);
+      FAIL_IF_CUDA_ERR(
+          cudaMemcpy(dst, &input1_data[0], input1_size, cudaMemcpyHostToDevice),
+          "setting INPUT1 data in GPU memory");
+    } else {
+      void* dst;
+      FAIL_IF_CUDA_ERR(
+          cudaHostAlloc(&dst, input0_size, cudaHostAllocPortable),
+          "allocating pinned memory for INPUT0 data");
+      input0_gpu.reset(dst);
+      FAIL_IF_CUDA_ERR(
+          cudaMemcpy(dst, &input0_data[0], input0_size, cudaMemcpyHostToHost),
+          "setting INPUT0 data in pinned memory");
+      FAIL_IF_CUDA_ERR(
+          cudaHostAlloc(&dst, input1_size, cudaHostAllocPortable),
+          "allocating pinned memory for INPUT1 data");
+      input1_gpu.reset(dst);
+      FAIL_IF_CUDA_ERR(
+          cudaMemcpy(dst, &input1_data[0], input1_size, cudaMemcpyHostToHost),
+          "setting INPUT1 data in pinned memory");
+    }
+  }
+
+  input0_base = use_cuda_memory ? input0_gpu.get() : &input0_data[0];
+  input1_base = use_cuda_memory ? input1_gpu.get() : &input1_data[0];
+#endif  // TRITON_ENABLE_GPU
+
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestAppendInputData(
+          irequest, input0, input0_base, input0_size, requested_memory_type,
+          0 /* memory_type_id */),
+      "assigning INPUT0 data");
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestAppendInputData(
+          irequest, input1, input1_base, input1_size, requested_memory_type,
+          0 /* memory_type_id */),
+      "assigning INPUT1 data");
+
+  // Perform inference...
+  {
+    auto p = new std::promise<TRITONSERVER_InferenceResponse*>();
+    std::future<TRITONSERVER_InferenceResponse*> completed = p->get_future();
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestSetResponseCallback(
+            irequest, allocator, nullptr /* response_allocator_userp */,
+            InferResponseComplete, reinterpret_cast<void*>(p)),
+        "setting response callback");
+
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerInferAsync(
+            server.get(), irequest, nullptr /* trace */),
+        "running inference");
+
+    // Wait for the inference to complete.
+    TRITONSERVER_InferenceResponse* completed_response = completed.get();
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceResponseError(completed_response),
+        "response status");
+
+    Check(
+        completed_response, input0_data, input1_data, output0, output1,
+        input0_size, datatype);
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceResponseDelete(completed_response),
+        "deleting inference response");
+  }
+
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestDelete(irequest),
+      "deleting inference request");
+}
+
+void
+PrintModelStats(
+    std::shared_ptr<TRITONSERVER_Server> server, const std::string model_name)
+{
+  TRITONSERVER_Message* model_stats_message = nullptr;
+
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerModelStatistics(
+          server.get(), model_name.c_str(), -1 /* model_version */,
+          &model_stats_message),
+      "unable to get model stats message");
+  const char* buffer;
+  size_t byte_size;
+  FAIL_IF_ERR(
+      TRITONSERVER_MessageSerializeToJson(
+          model_stats_message, &buffer, &byte_size),
+      "unable to serialize server metadata message");
+
+  std::cout << "Model '" << model_name << "' Stats:" << std::endl;
+  std::cout << std::string(buffer, byte_size) << std::endl;
+
+  FAIL_IF_ERR(
+      TRITONSERVER_MessageDelete(model_stats_message),
+      "deleting model stats message");
+}
+
+void
+CreateAndRunTritonserverInstance(
+    std::vector<std::string> model_repository_paths, size_t thread_id,
+    bool verbose_level)
+{
+  TRITONSERVER_ServerOptions* server_options = nullptr;
+
+  SetServerOptions(
+      &server_options, verbose_level, model_repository_paths[0],
+      model_repository_paths[thread_id]);
+
+  TRITONSERVER_Server* server_ptr = nullptr;
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerNew(&server_ptr, server_options),
+      "creating server instance no. " + std::to_string(thread_id));
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsDelete(server_options),
+      "deleting server options");
+
+  std::shared_ptr<TRITONSERVER_Server> server(
+      server_ptr, TRITONSERVER_ServerDelete);
+
+  // Wait and until the servers are both live and ready.
+  CheckServerLiveAndReady(server);
+
+  // Print status of the servers.
+  PrintServerStatus(server);
+  std::string model1 = "simple1",
+              model2 = "simple" + std::to_string(thread_id + 1);
+
+  // Load models in server.
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerLoadModel(server.get(), model1.c_str()),
+      "failed to load model");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerLoadModel(server.get(), model2.c_str()),
+      "failed to load model");
+
+  // Wait for the models to become available.
+  AwaitModelReady(server, model1.c_str());
+  AwaitModelReady(server, model2.c_str());
+
+  // Create the allocator that will be used to allocate buffers for
+  // the result tensors.
+  TRITONSERVER_ResponseAllocator* allocator = nullptr;
+  FAIL_IF_ERR(
+      TRITONSERVER_ResponseAllocatorNew(
+          &allocator, ResponseAlloc, ResponseRelease, nullptr /* start_fn */),
+      "creating response allocator");
+
+  // Inference
+  RunInferenceAndValidate(server, allocator, model1.c_str());
+  RunInferenceAndValidate(server, allocator, model2.c_str());
+
+  FAIL_IF_ERR(
+      TRITONSERVER_ResponseAllocatorDelete(allocator),
+      "deleting response allocator");
+
+  // Print Model Statistics for all models
+  PrintModelStats(server, model1.c_str());
+  PrintModelStats(server, model2.c_str());
+
+  // Unload models in both servers.
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerUnloadModel(server.get(), model1.c_str()),
+      "failed to unload model");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerUnloadModel(server.get(), model2.c_str()),
+      "failed to unload model");
+
+  std::string wrong_model;
+  if (thread_id == (model_repository_paths.size() + 1)) {
+    wrong_model = "simple2";
+  } else {
+    wrong_model = "simple" + std::to_string(thread_id + 2);
+  }
+
+  // Try to load wrong model. Expected to fail
+  TRITONSERVER_Error* err =
+      TRITONSERVER_ServerLoadModel(server.get(), wrong_model.c_str());
+  if (err == nullptr) {
+    FAIL("Success when expected to failed to load wrong model");
+  }
+}
+
+static volatile std::atomic<int> counter(0);
+static std::mutex mutex;
+std::condition_variable cv;
+
+void
+RepeatedlyCreateAndRunInstance(
+    std::vector<std::string> model_repository_paths, size_t thread_id,
+    size_t loops, bool verbose_level)
+{
+  std::unique_lock<std::mutex> lock(mutex);
+  counter++;
+  cv.wait(lock);
+
+  for (size_t i = 0; i < loops; i++) {
+    CreateAndRunTritonserverInstance(
+        model_repository_paths, thread_id, verbose_level);
+  }
+}
+
+int
+main(int argc, char** argv)
+{
+  std::vector<std::string> model_repository_paths;
+  int verbose_level = 0;
+  int thread_count = 2;
+  int loops = 1;
+
+  // Parse commandline...
+  int opt;
+  while ((opt = getopt(argc, argv, "vm:r:t:l:")) != -1) {
+    switch (opt) {
+      case 'm': {
+        enforce_memory_type = true;
+        if (!strcmp(optarg, "system")) {
+          requested_memory_type = TRITONSERVER_MEMORY_CPU;
+        } else if (!strcmp(optarg, "pinned")) {
+          requested_memory_type = TRITONSERVER_MEMORY_CPU_PINNED;
+        } else if (!strcmp(optarg, "gpu")) {
+          requested_memory_type = TRITONSERVER_MEMORY_GPU;
+        } else {
+          Usage(
+              argv,
+              "-m must be used to specify one of the following types:"
+              " <\"system\"|\"pinned\"|gpu>");
+        }
+        break;
+      }
+      case 'r':
+        model_repository_paths.push_back(optarg);
+        break;
+      case 't':
+        thread_count = std::stoi(optarg);
+        break;
+      case 'l':
+        loops = std::stoi(optarg);
+        break;
+      case 'v':
+        verbose_level = 1;
+        break;
+      case '?':
+        Usage(argv);
+        break;
+    }
+  }
+
+  if ((thread_count < 1) && (loops < 1)) {
+    Usage(argv, "thread_count and loops must be > 1");
+  }
+
+  // model repository paths must be 'thread_count' + 1
+  if (int(model_repository_paths.size() - 1) != thread_count) {
+    Usage(
+        argv, "-r must be used to specify " + std::to_string(thread_count + 1) +
+                  " model repository paths, " + std::to_string(thread_count) +
+                  " unique paths and 1 common");
+  }
+
+  for (const auto& repo_path : model_repository_paths) {
+    if (repo_path.empty()) {
+      Usage(argv, "model repository paths must not be empty");
+    }
+  }
+#ifndef TRITON_ENABLE_GPU
+  if (enforce_memory_type && requested_memory_type != TRITONSERVER_MEMORY_CPU) {
+    Usage(argv, "-m can only be set to \"system\" without enabling GPU");
+  }
+#endif  // TRITON_ENABLE_GPU
+
+  // Check API version.
+  uint32_t api_version_major, api_version_minor;
+  FAIL_IF_ERR(
+      TRITONSERVER_ApiVersion(&api_version_major, &api_version_minor),
+      "getting Triton API version");
+  if ((TRITONSERVER_API_VERSION_MAJOR != api_version_major) ||
+      (TRITONSERVER_API_VERSION_MINOR > api_version_minor)) {
+    FAIL("triton server API version mismatch");
+  }
+
+  // Create 'thread_count' number of instances of the server with 1 common and 1
+  // unique repo each
+  std::thread tritonservers[thread_count];
+  for (int i = 0; i < thread_count; i++) {
+    tritonservers[i] = std::thread(
+        &RepeatedlyCreateAndRunInstance, model_repository_paths, size_t(i + 1),
+        size_t(loops), verbose_level);
+  }
+  while (counter < thread_count) {
+    usleep(50);
+  }
+
+  {
+    std::unique_lock<std::mutex> lock(mutex);
+    cv.notify_all();
+  }
+
+  for (int i = 0; i < thread_count; ++i) {
+    tritonservers[i].join();
+  }
+
+  return 0;
+}
diff --git a/src/nvrpc/BUILD b/src/nvrpc/BUILD
deleted file mode 100644
index 8356347a8d..0000000000
--- a/src/nvrpc/BUILD
+++ /dev/null
@@ -1,53 +0,0 @@
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-package(
-    default_visibility = ["//visibility:public"],
-)
-
-cc_library(
-    name = "nvrpc",
-    srcs = [
-        "Executor.cc", 
-        "Server.cc", 
-        "ThreadPool.cc"
-    ],
-    hdrs = [
-        "Context.h", 
-        "Executor.h", 
-        "Interfaces.h",
-        "LifeCycleUnary.h", 
-        "Resources.h", 
-        "RPC.h", 
-        "Server.h",
-        "Service.h",
-        "ThreadPool.h",
-        "future_std.h",
-    ],
-    deps = [
-        "@grpc//:grpc++_unsecure",
-    ]
-)
diff --git a/src/nvrpc/Context.h b/src/nvrpc/Context.h
deleted file mode 100644
index 33cf481386..0000000000
--- a/src/nvrpc/Context.h
+++ /dev/null
@@ -1,159 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-#pragma once
-
-#include "src/nvrpc/Interfaces.h"
-#include "src/nvrpc/LifeCycleUnary.h"
-#include "src/nvrpc/future_std.h"
-
-namespace nvrpc {
-
-template <class LifeCycle, class Resources>
-class BaseContext;
-
-template <class Request, class Response, class Resources>
-using Context = BaseContext<LifeCycleUnary<Request, Response>, Resources>;
-
-template <class LifeCycle, class Resources>
-class BaseContext : public LifeCycle {
- public:
-  using RequestType = typename LifeCycle::RequestType;
-  using ResponseType = typename LifeCycle::ResponseType;
-  using ResourcesType = std::shared_ptr<Resources>;
-  using QueueFuncType = typename LifeCycle::ExecutorQueueFuncType;
-  using LifeCycleType = LifeCycle;
-
-  virtual ~BaseContext() override {}
-
- protected:
-  const ResourcesType& GetResources() const { return m_Resources; }
-  double Walltime() const;
-
-  virtual void OnContextStart();
-  virtual void OnContextReset();
-
- private:
-  virtual void OnLifeCycleStart() final override;
-  virtual void OnLifeCycleReset() final override;
-
-  ResourcesType m_Resources;
-  std::chrono::high_resolution_clock::time_point m_StartTime;
-
-  void FactoryInitializer(QueueFuncType, ResourcesType);
-
-  // Factory function allowed to create unique pointers to context objects
-  template <class ContextType>
-  friend std::unique_ptr<ContextType> ContextFactory(
-    typename ContextType::QueueFuncType q_fn,
-    typename ContextType::ResourcesType resources);
-
- public:
-  // Convenience method to acquire the Context base pointer from a derived class
-  BaseContext<LifeCycle, Resources>* GetBase()
-  {
-    return dynamic_cast<BaseContext<LifeCycle, Resources>*>(this);
-  }
-};
-
-// Implementations
-
-/**
- * @brief Method invoked when a request is received and the per-call context
- * lifecycle begins.
- */
-template <class LifeCycle, class Resources>
-void
-BaseContext<LifeCycle, Resources>::OnLifeCycleStart()
-{
-  m_StartTime = std::chrono::high_resolution_clock::now();
-  OnContextStart();
-}
-
-template <class LifeCycle, class Resources>
-void
-BaseContext<LifeCycle, Resources>::OnContextStart()
-{
-}
-
-/**
- * @brief Method invoked at the end of the per-call lifecycle just before the
- * context is reset.
- */
-template <class LifeCycle, class Resources>
-void
-BaseContext<LifeCycle, Resources>::OnLifeCycleReset()
-{
-  OnContextReset();
-}
-
-template <class LifeCycle, class Resources>
-void
-BaseContext<LifeCycle, Resources>::OnContextReset()
-{
-}
-
-/**
- * @brief Number of seconds since the start of the RPC
- */
-template <class LifeCycle, class Resources>
-double
-BaseContext<LifeCycle, Resources>::Walltime() const
-{
-  return std::chrono::duration<double>(
-           std::chrono::high_resolution_clock::now() - m_StartTime)
-    .count();
-}
-
-/**
- * @brief Used by ContextFactory to initialize the Context
- */
-template <class LifeCycle, class Resources>
-void
-BaseContext<LifeCycle, Resources>::FactoryInitializer(
-  QueueFuncType queue_fn, ResourcesType resources)
-{
-  this->SetQueueFunc(queue_fn);
-  m_Resources = resources;
-}
-
-/**
- * @brief ContextFactory is the only function in the library allowed to create
- * an IContext object.
- */
-template <class ContextType>
-std::unique_ptr<ContextType>
-ContextFactory(
-  typename ContextType::QueueFuncType queue_fn,
-  typename ContextType::ResourcesType resources)
-{
-  auto ctx = nvrpc::make_unique<ContextType>();
-  auto base = ctx->GetBase();
-  base->FactoryInitializer(queue_fn, resources);
-  return ctx;
-}
-
-}  // end namespace nvrpc
diff --git a/src/nvrpc/Executor.cc b/src/nvrpc/Executor.cc
deleted file mode 100644
index 50d7aeee8e..0000000000
--- a/src/nvrpc/Executor.cc
+++ /dev/null
@@ -1,62 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-#include "src/nvrpc/Executor.h"
-#include "src/nvrpc/future_std.h"
-
-namespace nvrpc {
-
-Executor::Executor() : Executor(1) {}
-
-Executor::Executor(int numThreads)
-    : Executor(nvrpc::make_unique<ThreadPool>(numThreads))
-{
-}
-
-Executor::Executor(std::unique_ptr<ThreadPool> threadpool)
-    : IExecutor(), m_ThreadPool(std::move(threadpool))
-{
-}
-
-void
-Executor::ProgressEngine(int thread_id)
-{
-  bool ok;
-  void* tag;
-  auto myCQ = m_ServerCompletionQueues[thread_id].get();
-
-  while (true) {
-    auto status = myCQ->Next(&tag, &ok);
-    if (!status)
-      return;  // Shutdown
-    auto ctx = IContext::Detag(tag);
-    if (!RunContext(ctx, ok)) {
-      ResetContext(ctx);
-    }
-  }
-}
-
-}  // namespace nvrpc
diff --git a/src/nvrpc/Executor.h b/src/nvrpc/Executor.h
deleted file mode 100644
index 636766a7ba..0000000000
--- a/src/nvrpc/Executor.h
+++ /dev/null
@@ -1,94 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-#pragma once
-
-#include "src/nvrpc/Interfaces.h"
-#include "src/nvrpc/Resources.h"
-#include "src/nvrpc/ThreadPool.h"
-
-#include <thread>
-
-namespace nvrpc {
-
-class Executor : public IExecutor {
- public:
-  Executor();
-  Executor(int numThreads);
-  Executor(std::unique_ptr<ThreadPool> threadpool);
-  ~Executor() override {}
-
-  void Initialize(::grpc::ServerBuilder& builder) final override
-  {
-    for (int i = 0; i < m_ThreadPool->Size(); i++) {
-      m_ServerCompletionQueues.emplace_back(builder.AddCompletionQueue());
-    }
-  }
-
-  void RegisterContexts(
-    IRPC* rpc, std::shared_ptr<Resources> resources,
-    int numContextsPerThread) final override
-  {
-    // CHECK_EQ(m_ThreadPool->Size(), m_ServerCompletionQueues.size()) <<
-    // "Incorrect number of CQs";
-    for (int i = 0; i < m_ThreadPool->Size(); i++) {
-      auto cq = m_ServerCompletionQueues[i].get();
-      for (int j = 0; j < numContextsPerThread; j++) {
-        m_Contexts.emplace_back(this->CreateContext(rpc, cq, resources));
-      }
-    }
-  }
-
-  void Run() final override
-  {
-    // Launch the threads polling on their CQs
-    for (int i = 0; i < m_ThreadPool->Size(); i++) {
-      m_ThreadPool->enqueue([this, i] { ProgressEngine(i); });
-    }
-    // Queue the Execution Contexts in the recieve queue
-    for (size_t i = 0; i < m_Contexts.size(); i++) {
-      // Reseting the context decrements the gauge
-      ResetContext(m_Contexts[i].get());
-    }
-  }
-
-  void Shutdown() final override
-  {
-    for (auto& cq : m_ServerCompletionQueues) {
-      cq->Shutdown();
-    }
-  }
-
- private:
-  void ProgressEngine(int thread_id);
-
-  std::vector<std::unique_ptr<IContext>> m_Contexts;
-  std::vector<std::unique_ptr<::grpc::ServerCompletionQueue>>
-    m_ServerCompletionQueues;
-  std::unique_ptr<ThreadPool> m_ThreadPool;
-};
-
-}  // end namespace nvrpc
diff --git a/src/nvrpc/Interfaces.h b/src/nvrpc/Interfaces.h
deleted file mode 100644
index cca9d250e0..0000000000
--- a/src/nvrpc/Interfaces.h
+++ /dev/null
@@ -1,125 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-#pragma once
-
-#include <grpc++/grpc++.h>
-
-#include "src/nvrpc/Resources.h"
-
-namespace nvrpc {
-
-class IContext;
-class IExecutor;
-class IContextLifeCycle;
-class IRPC;
-class IService;
-
-
-/**
- * The IContext object and it's subsequent derivations are the single more
- * important class in this library. Contexts are responsible for maintaining the
- * state of a message and performing the custom code for an RPC invocation.
- */
-class IContext {
- public:
-  virtual ~IContext() {}
-  static IContext* Detag(void* tag) { return static_cast<IContext*>(tag); }
-
- protected:
-  IContext() = default;
-  void* Tag() { return reinterpret_cast<void*>(this); }
-
- private:
-  virtual bool RunNextState(bool) = 0;
-  virtual void Reset() = 0;
-
-  friend class IRPC;
-  friend class IExecutor;
-};
-
-class IContextLifeCycle : public IContext {
- public:
-  ~IContextLifeCycle() override {}
-
- protected:
-  IContextLifeCycle() = default;
-
-  virtual void OnLifeCycleStart() = 0;
-  virtual void OnLifeCycleReset() = 0;
-
-  virtual void FinishResponse() = 0;
-  virtual void CancelResponse() = 0;
-};
-
-class IService {
- public:
-  IService() = default;
-  virtual ~IService() {}
-
-  virtual void Initialize(::grpc::ServerBuilder&) = 0;
-};
-
-class IRPC {
- public:
-  IRPC() = default;
-  virtual ~IRPC() {}
-
- protected:
-  virtual std::unique_ptr<IContext> CreateContext(
-    ::grpc::ServerCompletionQueue*, std::shared_ptr<Resources>) = 0;
-
-  friend class IExecutor;
-};
-
-class IExecutor {
- public:
-  IExecutor() = default;
-  virtual ~IExecutor() {}
-
-  virtual void Initialize(::grpc::ServerBuilder&) = 0;
-  virtual void Run() = 0;
-  virtual void Shutdown() = 0;
-  virtual void RegisterContexts(
-    IRPC* rpc, std::shared_ptr<Resources> resources,
-    int numContextsPerThread) = 0;
-  // virtual void Shutdown() = 0;
-
- protected:
-  inline bool RunContext(IContext* ctx, bool ok)
-  {
-    return ctx->RunNextState(ok);
-  }
-  inline void ResetContext(IContext* ctx) { ctx->Reset(); }
-  inline std::unique_ptr<IContext> CreateContext(
-    IRPC* rpc, ::grpc::ServerCompletionQueue* cq,
-    std::shared_ptr<Resources> res)
-  {
-    return rpc->CreateContext(cq, res);
-  }
-};
-
-}  // end namespace nvrpc
diff --git a/src/nvrpc/LifeCycleUnary.h b/src/nvrpc/LifeCycleUnary.h
deleted file mode 100644
index da1457af9c..0000000000
--- a/src/nvrpc/LifeCycleUnary.h
+++ /dev/null
@@ -1,179 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-#pragma once
-
-#include "src/nvrpc/Interfaces.h"
-
-namespace nvrpc {
-
-template <class Request, class Response>
-class LifeCycleUnary : public IContextLifeCycle {
- public:
-  using RequestType = Request;
-  using ResponseType = Response;
-  using ServiceQueueFuncType = std::function<void(
-    ::grpc::ServerContext*, RequestType*,
-    ::grpc::ServerAsyncResponseWriter<ResponseType>*, ::grpc::CompletionQueue*,
-    ::grpc::ServerCompletionQueue*, void*)>;
-  using ExecutorQueueFuncType = std::function<void(
-    ::grpc::ServerContext*, RequestType*,
-    ::grpc::ServerAsyncResponseWriter<ResponseType>*, void*)>;
-
-  ~LifeCycleUnary() override {}
-
- protected:
-  LifeCycleUnary() = default;
-  void SetQueueFunc(ExecutorQueueFuncType);
-
-  virtual void ExecuteRPC(RequestType& request, ResponseType& response) = 0;
-
-  void FinishResponse() final override;
-  void CancelResponse() final override;
-
- private:
-  // IContext Methods
-  bool RunNextState(bool ok) final override;
-  void Reset() final override;
-
-  // LifeCycleUnary Specific Methods
-  bool StateRequestDone(bool ok);
-  bool StateFinishedDone(bool ok);
-
-  // Function pointers
-  ExecutorQueueFuncType m_QueuingFunc;
-  bool (LifeCycleUnary<RequestType, ResponseType>::*m_NextState)(bool);
-
-  // Variables
-  RequestType m_Request;
-  ResponseType m_Response;
-  std::unique_ptr<::grpc::ServerContext> m_Context;
-  std::unique_ptr<::grpc::ServerAsyncResponseWriter<ResponseType>>
-    m_ResponseWriter;
-
- public:
-  template <class RequestFuncType, class ServiceType>
-  static ServiceQueueFuncType BindServiceQueueFunc(
-    /*
-    std::function<void(
-        ServiceType *, ::grpc::ServerContext *, RequestType *,
-        ::grpc::ServerAsyncResponseWriter<ResponseType> *,
-        ::grpc::CompletionQueue *, ::grpc::ServerCompletionQueue *, void *)>
-    */
-    RequestFuncType request_fn, ServiceType* service_type)
-  {
-    return std::bind(
-      request_fn, service_type,
-      std::placeholders::_1,  // ServerContext*
-      std::placeholders::_2,  // InputType
-      std::placeholders::_3,  // AsyncResponseWriter<OutputType>
-      std::placeholders::_4,  // CQ
-      std::placeholders::_5,  // ServerCQ
-      std::placeholders::_6   // Tag
-    );
-  }
-
-  static ExecutorQueueFuncType BindExecutorQueueFunc(
-    ServiceQueueFuncType service_q_fn, ::grpc::ServerCompletionQueue* cq)
-  {
-    return std::bind(
-      service_q_fn,
-      std::placeholders::_1,  // ServerContext*
-      std::placeholders::_2,  // Request *
-      std::placeholders::_3,  // AsyncResponseWriter<Response> *
-      cq, cq,
-      std::placeholders::_4  // Tag
-    );
-  }
-};
-
-// Implementation
-
-template <class Request, class Response>
-bool
-LifeCycleUnary<Request, Response>::RunNextState(bool ok)
-{
-  return (this->*m_NextState)(ok);
-}
-
-template <class Request, class Response>
-void
-LifeCycleUnary<Request, Response>::Reset()
-{
-  OnLifeCycleReset();
-  m_Request.Clear();
-  m_Response.Clear();
-  m_Context.reset(new ::grpc::ServerContext);
-  m_ResponseWriter.reset(
-    new ::grpc::ServerAsyncResponseWriter<ResponseType>(m_Context.get()));
-  m_NextState = &LifeCycleUnary<RequestType, ResponseType>::StateRequestDone;
-  m_QueuingFunc(
-    m_Context.get(), &m_Request, m_ResponseWriter.get(), IContext::Tag());
-}
-
-template <class Request, class Response>
-bool
-LifeCycleUnary<Request, Response>::StateRequestDone(bool ok)
-{
-  if (!ok)
-    return false;
-  OnLifeCycleStart();
-  ExecuteRPC(m_Request, m_Response);
-  return true;
-}
-
-template <class Request, class Response>
-bool
-LifeCycleUnary<Request, Response>::StateFinishedDone(bool ok)
-{
-  return false;
-}
-
-template <class Request, class Response>
-void
-LifeCycleUnary<Request, Response>::FinishResponse()
-{
-  m_NextState = &LifeCycleUnary<RequestType, ResponseType>::StateFinishedDone;
-  m_ResponseWriter->Finish(m_Response, ::grpc::Status::OK, IContext::Tag());
-}
-
-template <class Request, class Response>
-void
-LifeCycleUnary<Request, Response>::CancelResponse()
-{
-  m_NextState = &LifeCycleUnary<RequestType, ResponseType>::StateFinishedDone;
-  m_ResponseWriter->Finish(
-    m_Response, ::grpc::Status::CANCELLED, IContext::Tag());
-}
-
-template <class Request, class Response>
-void
-LifeCycleUnary<Request, Response>::SetQueueFunc(ExecutorQueueFuncType queue_fn)
-{
-  m_QueuingFunc = queue_fn;
-}
-
-}  // namespace nvrpc
diff --git a/src/nvrpc/RPC.h b/src/nvrpc/RPC.h
deleted file mode 100644
index 287291bf36..0000000000
--- a/src/nvrpc/RPC.h
+++ /dev/null
@@ -1,76 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-#pragma once
-
-#include "src/nvrpc/Context.h"
-
-namespace nvrpc {
-
-template <class ContextType, class ServiceType>
-class AsyncRPC : public IRPC {
- public:
-  using ContextType_t = ContextType;
-  using ServiceQueueFuncType =
-    typename ContextType::LifeCycleType::ServiceQueueFuncType;
-  using ExecutorQueueFuncType =
-    typename ContextType::LifeCycleType::ExecutorQueueFuncType;
-
-  AsyncRPC(ServiceQueueFuncType);
-  ~AsyncRPC() override {}
-
- protected:
-  std::unique_ptr<IContext> CreateContext(
-    ::grpc::ServerCompletionQueue*, std::shared_ptr<Resources>) final override;
-
- private:
-  ServiceQueueFuncType m_RequestFunc;
-};
-
-template <class ContextType, class ServiceType>
-AsyncRPC<ContextType, ServiceType>::AsyncRPC(ServiceQueueFuncType req_fn)
-    : m_RequestFunc(req_fn)
-{
-}
-
-template <class ContextType, class ServiceType>
-std::unique_ptr<IContext>
-AsyncRPC<ContextType, ServiceType>::CreateContext(
-  ::grpc::ServerCompletionQueue* cq, std::shared_ptr<nvrpc::Resources> r)
-{
-  auto ctx_resources = std::dynamic_pointer_cast<
-    typename ContextType::ResourcesType::element_type>(r);
-  if (!ctx_resources) {
-    throw std::runtime_error("Incompatible Resource object");
-  }
-  auto q_fn =
-    ContextType::LifeCycleType::BindExecutorQueueFunc(m_RequestFunc, cq);
-  std::unique_ptr<IContext> ctx =
-    ContextFactory<ContextType>(q_fn, ctx_resources);
-  return ctx;
-}
-
-}  // end namespace nvrpc
diff --git a/src/nvrpc/Resources.h b/src/nvrpc/Resources.h
deleted file mode 100644
index 8365a4a444..0000000000
--- a/src/nvrpc/Resources.h
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-#pragma once
-
-#include <memory>
-
-namespace nvrpc {
-
-struct Resources : public std::enable_shared_from_this<Resources> {
-  virtual ~Resources() {}
-
-  template <class Target>
-  std::shared_ptr<Target> casted_shared_from_this()
-  {
-    return std::dynamic_pointer_cast<Target>(Resources::shared_from_this());
-  }
-};
-
-// credit:
-// https://stackoverflow.com/questions/16082785/use-of-enable-shared-from-this-with-multiple-inheritance
-template <class T>
-class InheritableResources : virtual public Resources {
- public:
-  std::shared_ptr<T> shared_from_this()
-  {
-    return std::dynamic_pointer_cast<T>(Resources::shared_from_this());
-  }
-};
-
-}  // namespace nvrpc
diff --git a/src/nvrpc/Server.cc b/src/nvrpc/Server.cc
deleted file mode 100644
index ee069c56de..0000000000
--- a/src/nvrpc/Server.cc
+++ /dev/null
@@ -1,88 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-#include "src/nvrpc/Server.h"
-
-#include <thread>
-
-namespace nvrpc {
-
-Server::Server(std::string server_address)
-    : m_ServerAddress(server_address), m_Running(false)
-{
-  m_Builder.AddListeningPort(
-    m_ServerAddress, ::grpc::InsecureServerCredentials());
-}
-
-::grpc::ServerBuilder&
-Server::GetBuilder()
-{
-  if (m_Running) {
-    throw std::runtime_error(
-      "Unable to access Builder after the Server is running.");
-  }
-  return m_Builder;
-}
-
-void
-Server::Run()
-{
-  Run(std::chrono::milliseconds(5000), [] {});
-}
-
-void
-Server::Run(std::chrono::milliseconds timeout, std::function<void()> control_fn)
-{
-  AsyncRun();
-  for (;;) {
-    control_fn();
-    std::this_thread::sleep_for(timeout);
-  }
-  // TODO: gracefully shutdown each service and join threads
-}
-
-void
-Server::AsyncRun()
-{
-  m_Running = true;
-  m_Server = m_Builder.BuildAndStart();
-  for (int i = 0; i < m_Executors.size(); i++) {
-    m_Executors[i]->Run();
-  }
-}
-
-void
-Server::Shutdown()
-{
-  for (int i = 0; i < m_Executors.size(); i++) {
-    m_Executors[i]->Shutdown();
-  }
-
-  // This should cause enforce a join on all async Executor threads
-  m_Executors.clear();
-}
-
-}  // end namespace nvrpc
diff --git a/src/nvrpc/Server.h b/src/nvrpc/Server.h
deleted file mode 100644
index e54a77dd36..0000000000
--- a/src/nvrpc/Server.h
+++ /dev/null
@@ -1,86 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-///
-#pragma once
-
-#include <chrono>
-
-#include "src/nvrpc/Service.h"
-
-namespace nvrpc {
-
-using std::chrono::milliseconds;
-
-class Server {
- public:
-  Server(std::string server_address);
-
-  Server() : Server("0.0.0.0:50051") {}
-
-  template <class ServiceType>
-  AsyncService<typename ServiceType::AsyncService>* RegisterAsyncService();
-
-  IExecutor* RegisterExecutor(IExecutor* executor)
-  {
-    m_Executors.emplace_back(executor);
-    executor->Initialize(m_Builder);
-    return executor;
-  }
-
-  void Run();
-  void Run(milliseconds timeout, std::function<void()> control_fn);
-  void AsyncRun();
-
-  void Shutdown();
-
-  ::grpc::ServerBuilder& GetBuilder();
-
- private:
-  bool m_Running;
-  std::string m_ServerAddress;
-  ::grpc::ServerBuilder m_Builder;
-  std::vector<std::unique_ptr<IService>> m_Services;
-  std::vector<std::unique_ptr<IExecutor>> m_Executors;
-  std::unique_ptr<::grpc::Server> m_Server;
-};
-
-
-template <class ServiceType>
-AsyncService<typename ServiceType::AsyncService>*
-Server::RegisterAsyncService()
-{
-  if (m_Running) {
-    throw std::runtime_error(
-      "Error: cannot register service on a running server");
-  }
-  auto service = new AsyncService<typename ServiceType::AsyncService>;
-  auto base = static_cast<IService*>(service);
-  m_Services.emplace_back(base);
-  service->Initialize(m_Builder);
-  return service;
-}
-
-}  // end namespace nvrpc
diff --git a/src/nvrpc/Service.h b/src/nvrpc/Service.h
deleted file mode 100644
index 1d3a4f0d79..0000000000
--- a/src/nvrpc/Service.h
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-///
-#pragma once
-
-#include "src/nvrpc/Interfaces.h"
-#include "src/nvrpc/RPC.h"
-#include "src/nvrpc/future_std.h"
-
-namespace nvrpc {
-
-template <class ServiceType>
-class AsyncService : public IService {
- public:
-  using ServiceType_t = ServiceType;
-
-  AsyncService() : IService(), m_Service(nvrpc::make_unique<ServiceType>()) {}
-  ~AsyncService() override {}
-
-  void Initialize(::grpc::ServerBuilder& builder) final override
-  {
-    builder.RegisterService(m_Service.get());
-  }
-
-  template <typename ContextType, typename RequestFuncType>
-  IRPC* RegisterRPC(RequestFuncType req_fn)
-  {
-    auto q_fn =
-      ContextType::LifeCycleType::BindServiceQueueFunc(req_fn, m_Service.get());
-    auto rpc = new AsyncRPC<ContextType, ServiceType>(q_fn);
-    auto base = static_cast<IRPC*>(rpc);
-    m_RPCs.emplace_back(base);
-    return base;
-  }
-
- private:
-  std::unique_ptr<ServiceType> m_Service;
-  std::vector<std::unique_ptr<IRPC>> m_RPCs;
-};
-
-}  // end namespace nvrpc
diff --git a/src/nvrpc/ThreadPool.cc b/src/nvrpc/ThreadPool.cc
deleted file mode 100644
index 792e8332a1..0000000000
--- a/src/nvrpc/ThreadPool.cc
+++ /dev/null
@@ -1,106 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-///
-//
-// Original Source: https://github.com/progschj/ThreadPool
-//
-// Original License:
-//
-// Copyright (c) 2012 Jakob Progsch, Václav Zeman
-//
-// This software is provided 'as-is', without any express or implied
-// warranty. In no event will the authors be held liable for any damages
-// arising from the use of this software.
-//
-// Permission is granted to anyone to use this software for any purpose,
-// including commercial applications, and to alter it and redistribute it
-// freely, subject to the following restrictions:
-//
-//   1. The origin of this software must not be misrepresented; you must not
-//   claim that you wrote the original software. If you use this software
-//   in a product, an acknowledgment in the product documentation would be
-//   appreciated but is not required.
-//
-//   2. Altered source versions must be plainly marked as such, and must not be
-//   misrepresented as being the original software.
-//
-//   3. This notice may not be removed or altered from any source
-//   distribution.
-//
-// Modifications: see header file
-//
-#include "src/nvrpc/ThreadPool.h"
-
-namespace nvrpc {
-
-ThreadPool::ThreadPool(size_t nThreads) : stop(false)
-{
-  for (size_t i = 0; i < nThreads; ++i) {
-    InitThread();
-  }
-}
-
-void
-ThreadPool::InitThread()
-{
-  workers.emplace_back([this]() {
-    for (;;) {
-      std::function<void()> task;
-
-      {
-        std::unique_lock<std::mutex> lock(this->queue_mutex);
-        this->condition.wait(
-          lock, [this]() { return this->stop || !this->tasks.empty(); });
-        if (this->stop && this->tasks.empty())
-          return;
-        task = move(this->tasks.front());
-        this->tasks.pop();
-      }
-
-      task();
-    }
-  });
-}
-
-int
-ThreadPool::Size()
-{
-  return workers.size();
-}
-
-// the destructor joins all threads
-ThreadPool::~ThreadPool()
-{
-  {
-    std::unique_lock<std::mutex> lock(queue_mutex);
-    stop = true;
-  }
-  condition.notify_all();
-
-  for (std::thread& worker : workers) worker.join();
-}
-
-}  // end namespace nvrpc
diff --git a/src/nvrpc/ThreadPool.h b/src/nvrpc/ThreadPool.h
deleted file mode 100644
index f24820be9b..0000000000
--- a/src/nvrpc/ThreadPool.h
+++ /dev/null
@@ -1,152 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-//
-// Original Source: https://github.com/progschj/ThreadPool
-//
-// Original License:
-//
-// Copyright (c) 2012 Jakob Progsch, Václav Zeman
-//
-// This software is provided 'as-is', without any express or implied
-// warranty. In no event will the authors be held liable for any damages
-// arising from the use of this software.
-//
-// Permission is granted to anyone to use this software for any purpose,
-// including commercial applications, and to alter it and redistribute it
-// freely, subject to the following restrictions:
-//
-//   1. The origin of this software must not be misrepresented; you must not
-//   claim that you wrote the original software. If you use this software
-//   in a product, an acknowledgment in the product documentation would be
-//   appreciated but is not required.
-//
-//   2. Altered source versions must be plainly marked as such, and must not be
-//   misrepresented as being the original software.
-//
-//   3. This notice may not be removed or altered from any source
-//   distribution.
-//
-// Modifications:
-//   * Header-only file was split into .h/.cc files
-//   * Added an extra safety check (lines 30-31) in the construction (.cc file).
-//   * Added CPU affinity options to the constructor
-//   * Added Size() method to get thread count
-//
-#pragma once
-
-#include <future>
-#include <queue>
-
-namespace nvrpc {
-
-/**
- * @brief Manages a Pool of Threads that consume a shared work Queue
- *
- * ThreadPool is the primary resoruce class for handling threads used throughout
- * the examples and tests.  The library is entirely a BYO-resources; however,
- * this implemenation is provided as a convenience class.  Many thanks to the
- * original authors for a beautifully designed class.
- */
-class ThreadPool {
- public:
-  /**
-   * @brief Construct a new Thread Pool
-   * @param nThreads Number of Worker Threads
-   */
-  ThreadPool(size_t nThreads);
-
-  ~ThreadPool();
-
-  /**
-   * @brief Enqueue Work to the ThreadPool by passing a Lambda Function
-   *
-   * Variadic template allows for an arbituary number of arguments to be passed
-   * the captured lambda function.  Captures are still allowed and used
-   * throughout the examples.
-   *
-   * The queue can grow larger than the number of threads.  A single worker
-   * thread executues pulls a lambda function off the queue and executes it to
-   * completion.  These are synchronous executions in an async messaging
-   * library.  These synchronous pools can be swapped for truely async workers
-   * using libevent or asio.  Happy to accept PRs to improve the async
-   * abilities.
-   *
-   * @tparam F
-   * @tparam Args
-   * @param f
-   * @param args
-   * @return std::future<typename std::result_of<F(Args...)>::type>
-   */
-  template <class F, class... Args>
-  auto enqueue(F&& f, Args&&... args)
-    -> std::future<typename std::result_of<F(Args...)>::type>;
-
-  /**
-   * @brief Number of Threads in the Pool
-   */
-  int Size();
-
- private:
-  void InitThread();
-
-  // need to keep track of threads so we can join them
-  std::vector<std::thread> workers;
-  // the task queue
-  std::queue<std::function<void()>> tasks;
-
-  // synchronization
-  std::mutex queue_mutex;
-  std::condition_variable condition;
-  bool stop;
-};
-
-// add new work item to the pool
-template <class F, class... Args>
-auto
-ThreadPool::enqueue(F&& f, Args&&... args)
-  -> std::future<typename std::result_of<F(Args...)>::type>
-{
-  using return_type = typename std::result_of<F(Args...)>::type;
-
-  auto task = std::make_shared<std::packaged_task<return_type()>>(
-    std::bind(std::forward<F>(f), std::forward<Args>(args)...));
-
-  std::future<return_type> res = task->get_future();
-  {
-    std::unique_lock<std::mutex> lock(queue_mutex);
-
-    // don't allow enqueueing after stopping the pool
-    if (stop)
-      throw std::runtime_error("enqueue on stopped ThreadPool");
-
-    tasks.emplace([task]() { (*task)(); });
-  }
-  condition.notify_one();
-  return res;
-}
-
-}  // end namespace nvrpc
diff --git a/src/nvrpc/future_std.h b/src/nvrpc/future_std.h
deleted file mode 100644
index 36fddfaf03..0000000000
--- a/src/nvrpc/future_std.h
+++ /dev/null
@@ -1,45 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-
-// Original Source:
-// https://github.com/jupp0r/prometheus-cpp/blob/master/core/include/prometheus/detail/future_std.h
-
-#pragma once
-
-#include <memory>
-#include <utility>
-
-namespace nvrpc {
-
-template <typename T, typename... Args>
-std::unique_ptr<T>
-make_unique(Args&&... args)
-{
-  return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
-}
-
-}  // namespace nvrpc
\ No newline at end of file
diff --git a/src/restricted_features.h b/src/restricted_features.h
new file mode 100644
index 0000000000..1b366e8ec4
--- /dev/null
+++ b/src/restricted_features.h
@@ -0,0 +1,114 @@
+// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#pragma once
+
+#include <algorithm>
+#include <array>
+#include <string>
+
+namespace triton { namespace server {
+
+/// Header and Value pair for a restricted feature
+using Restriction = std::pair<std::string, std::string>;
+
+/// Restricted Categories
+enum RestrictedCategory : uint8_t {
+  HEALTH,
+  METADATA,
+  INFERENCE,
+  SHARED_MEMORY,
+  MODEL_CONFIG,
+  MODEL_REPOSITORY,
+  STATISTICS,
+  TRACE,
+  LOGGING,
+  INVALID,
+  CATEGORY_COUNT = INVALID
+};
+
+/// Restricted Category Names
+const std::array<const std::string, RestrictedCategory::CATEGORY_COUNT>
+    RESTRICTED_CATEGORY_NAMES{
+        "health",        "metadata",     "inference",
+        "shared-memory", "model-config", "model-repository",
+        "statistics",    "trace",        "logging"};
+
+/// Collection of restricted features
+///
+/// Initially empty and all categories unrestricted
+class RestrictedFeatures {
+ public:
+  /// Returns RestrictedCategory enum from category name
+  ///
+  /// \param[in] category category name
+  /// \return category enum returns INVALID if unknown
+  static RestrictedCategory ToCategory(const std::string& category)
+  {
+    const auto found = std::find(
+        begin(RESTRICTED_CATEGORY_NAMES), end(RESTRICTED_CATEGORY_NAMES),
+        category);
+    const auto offset = std::distance(begin(RESTRICTED_CATEGORY_NAMES), found);
+    return RestrictedCategory(offset);
+  }
+
+  /// Insert restriction for given category
+  ///
+  /// \param[in] category category to restrict
+  /// \param[in] restriction header, value pair
+  void Insert(const RestrictedCategory& category, Restriction&& restriction)
+  {
+    restrictions_[category] = std::move(restriction);
+    restricted_categories_[category] = true;
+  }
+
+  /// Get header,value pair for restricted category
+  ///
+  /// \param[in] category category to restrict
+  /// \return restriction header, value pair
+  const Restriction& Get(RestrictedCategory category) const
+  {
+    return restrictions_[category];
+  }
+
+  /// Return true if a category is restricted
+  ///
+  /// \param[in] category category to restrict
+  /// \return true if category is restricted, false otherwise
+
+  const bool& IsRestricted(RestrictedCategory category) const
+  {
+    return restricted_categories_[category];
+  }
+
+  RestrictedFeatures() = default;
+  ~RestrictedFeatures() = default;
+
+ private:
+  std::array<Restriction, RestrictedCategory::CATEGORY_COUNT> restrictions_{};
+
+  std::array<bool, RestrictedCategory::CATEGORY_COUNT> restricted_categories_{};
+};
+}}  // namespace triton::server
diff --git a/src/sagemaker_server.cc b/src/sagemaker_server.cc
new file mode 100644
index 0000000000..b81ebe572a
--- /dev/null
+++ b/src/sagemaker_server.cc
@@ -0,0 +1,1111 @@
+// Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#include "sagemaker_server.h"
+
+namespace triton { namespace server {
+
+#define HTTP_RESPOND_IF_ERR(REQ, X)                   \
+  do {                                                \
+    TRITONSERVER_Error* err__ = (X);                  \
+    if (err__ != nullptr) {                           \
+      EVBufferAddErrorJson((REQ)->buffer_out, err__); \
+      evhtp_send_reply((REQ), EVHTP_RES_BADREQ);      \
+      TRITONSERVER_ErrorDelete(err__);                \
+      return;                                         \
+    }                                                 \
+  } while (false)
+
+namespace {
+
+void
+EVBufferAddErrorJson(evbuffer* buffer, TRITONSERVER_Error* err)
+{
+  const char* message = TRITONSERVER_ErrorMessage(err);
+
+  triton::common::TritonJson::Value response(
+      triton::common::TritonJson::ValueType::OBJECT);
+  response.AddStringRef("error", message, strlen(message));
+
+  triton::common::TritonJson::WriteBuffer buffer_json;
+  response.Write(&buffer_json);
+
+  evbuffer_add(buffer, buffer_json.Base(), buffer_json.Size());
+}
+
+TRITONSERVER_Error*
+EVBufferToJson(
+    triton::common::TritonJson::Value* document, evbuffer_iovec* v, int* v_idx,
+    const size_t length, int n)
+{
+  size_t offset = 0, remaining_length = length;
+  char* json_base;
+  std::vector<char> json_buffer;
+
+  // No need to memcpy when number of iovecs is 1
+  if ((n > 0) && (v[0].iov_len >= remaining_length)) {
+    json_base = static_cast<char*>(v[0].iov_base);
+    if (v[0].iov_len > remaining_length) {
+      v[0].iov_base = static_cast<void*>(json_base + remaining_length);
+      v[0].iov_len -= remaining_length;
+      remaining_length = 0;
+    } else if (v[0].iov_len == remaining_length) {
+      remaining_length = 0;
+      *v_idx += 1;
+    }
+  } else {
+    json_buffer.resize(length);
+    json_base = json_buffer.data();
+    while ((remaining_length > 0) && (*v_idx < n)) {
+      char* base = static_cast<char*>(v[*v_idx].iov_base);
+      size_t base_size;
+      if (v[*v_idx].iov_len > remaining_length) {
+        base_size = remaining_length;
+        v[*v_idx].iov_base = static_cast<void*>(base + remaining_length);
+        v[*v_idx].iov_len -= remaining_length;
+        remaining_length = 0;
+      } else {
+        base_size = v[*v_idx].iov_len;
+        remaining_length -= v[*v_idx].iov_len;
+        *v_idx += 1;
+      }
+
+      memcpy(json_base + offset, base, base_size);
+      offset += base_size;
+    }
+  }
+
+  if (remaining_length != 0) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INVALID_ARG,
+        std::string(
+            "unexpected size for request JSON, expecting " +
+            std::to_string(remaining_length) + " more bytes")
+            .c_str());
+  }
+
+  RETURN_IF_ERR(document->Parse(json_base, length));
+
+  return nullptr;  // success
+}
+
+}  // namespace
+
+
+const std::string SagemakerAPIServer::binary_mime_type_(
+    "application/vnd.sagemaker-triton.binary+json;json-header-size=");
+
+TRITONSERVER_Error*
+SagemakerAPIServer::GetInferenceHeaderLength(
+    evhtp_request_t* req, int32_t content_length, size_t* header_length)
+{
+  // Check mime type and set inference header length.
+  // Set to content length in case that it is not specified
+  *header_length = content_length;
+  const char* content_type_c_str =
+      evhtp_kv_find(req->headers_in, kContentTypeHeader);
+  if (content_type_c_str != NULL) {
+    std::string content_type(content_type_c_str);
+    size_t pos = content_type.find(binary_mime_type_);
+    if (pos != std::string::npos) {
+      if (pos != 0) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG,
+            (std::string("expect MIME type for binary data starts with '") +
+             binary_mime_type_ + "', got: " + content_type)
+                .c_str());
+      }
+
+      // Parse
+      int32_t parsed_value;
+      try {
+        parsed_value =
+            std::atoi(content_type_c_str + binary_mime_type_.length());
+      }
+      catch (const std::invalid_argument& ia) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG,
+            (std::string("Unable to parse inference header size, got: ") +
+             (content_type_c_str + binary_mime_type_.length()))
+                .c_str());
+      }
+
+      // Check if the content length is in proper range
+      if ((parsed_value < 0) || (parsed_value > content_length)) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG,
+            (std::string("inference header size should be in range (0, ") +
+             std::to_string(content_length) +
+             "), got: " + (content_type_c_str + binary_mime_type_.length()))
+                .c_str());
+      }
+      *header_length = parsed_value;
+    }
+  }
+  return nullptr;
+}
+
+void
+SagemakerAPIServer::SagemakeInferRequestClass::SetResponseHeader(
+    bool has_binary_data, size_t header_length)
+{
+  if (has_binary_data) {
+    evhtp_headers_add_header(
+        req_->headers_out,
+        evhtp_header_new(
+            kContentTypeHeader,
+            (binary_mime_type_ + std::to_string(header_length)).c_str(), 1, 1));
+  } else {
+    evhtp_headers_add_header(
+        req_->headers_out,
+        evhtp_header_new(kContentTypeHeader, "application/json", 1, 1));
+  }
+}
+
+void
+SagemakerAPIServer::Handle(evhtp_request_t* req)
+{
+  LOG_VERBOSE(1) << "SageMaker request: " << req->method << " "
+                 << req->uri->path->full;
+
+  if (RE2::FullMatch(std::string(req->uri->path->full), ping_regex_)) {
+    HandleServerHealth(req, ping_mode_);
+    return;
+  }
+
+  if (RE2::FullMatch(std::string(req->uri->path->full), invocations_regex_)) {
+    HandleInfer(req, model_name_, model_version_str_);
+    return;
+  }
+
+  std::string multi_model_name, action;
+  if (RE2::FullMatch(
+          std::string(req->uri->path->full), models_regex_, &multi_model_name,
+          &action)) {
+    switch (req->method) {
+      case htp_method_GET:
+        if (multi_model_name.empty()) {
+          LOG_VERBOSE(1) << "SageMaker request: LIST ALL MODELS";
+
+          SageMakerMMEListModel(req);
+          return;
+        } else {
+          LOG_VERBOSE(1) << "SageMaker request: GET MODEL";
+
+          SageMakerMMEGetModel(req, multi_model_name.c_str());
+          return;
+        }
+      case htp_method_POST:
+        if (action == "/invoke") {
+          LOG_VERBOSE(1) << "SageMaker request: INVOKE MODEL";
+
+          if (sagemaker_models_list_.find(multi_model_name.c_str()) ==
+              sagemaker_models_list_.end()) {
+            evhtp_send_reply(req, EVHTP_RES_NOTFOUND); /* 404*/
+            return;
+          }
+          LOG_VERBOSE(1) << "SageMaker MME Custom Invoke Model Path";
+
+          /* Extract targetModel to log the associated archive */
+          const char* target_model =
+              evhtp_kv_find(req->headers_in, "X-Amzn-SageMaker-Target-Model");
+
+          /* If target_model is not available (e.g., in local testing) use
+           * model_name_hash as target_model) */
+          if (target_model == nullptr) {
+            target_model = multi_model_name.c_str();
+          }
+
+          LOG_INFO << "Invoking SageMaker TargetModel: " << target_model;
+
+          SageMakerMMEHandleInfer(req, target_model, model_version_str_);
+          return;
+        }
+        if (action.empty()) {
+          LOG_VERBOSE(1) << "SageMaker request: LOAD MODEL";
+
+          std::unordered_map<std::string, std::string> parse_load_map;
+          ParseSageMakerRequest(req, &parse_load_map, "load");
+          SageMakerMMELoadModel(req, parse_load_map);
+          return;
+        }
+        break;
+      case htp_method_DELETE: {
+        // UNLOAD MODEL
+        LOG_VERBOSE(1) << "SageMaker request: UNLOAD MODEL";
+        req->method = htp_method_POST;
+
+        SageMakerMMEUnloadModel(req, multi_model_name.c_str());
+
+        return;
+      }
+      default:
+        LOG_VERBOSE(1) << "SageMaker error: " << req->method << " "
+                       << req->uri->path->full << " - "
+                       << static_cast<int>(EVHTP_RES_BADREQ);
+        evhtp_send_reply(req, EVHTP_RES_BADREQ);
+        return;
+    }
+  }
+
+  LOG_VERBOSE(1) << "SageMaker error: " << req->method << " "
+                 << req->uri->path->full << " - "
+                 << static_cast<int>(EVHTP_RES_BADREQ);
+
+  evhtp_send_reply(req, EVHTP_RES_BADREQ);
+}
+
+
+TRITONSERVER_Error*
+SagemakerAPIServer::Create(
+    const std::shared_ptr<TRITONSERVER_Server>& server,
+    triton::server::TraceManager* trace_manager,
+    const std::shared_ptr<SharedMemoryManager>& shm_manager, const int32_t port,
+    const std::string address, const int thread_cnt,
+    std::unique_ptr<HTTPServer>* http_server)
+{
+  http_server->reset(new SagemakerAPIServer(
+      server, trace_manager, shm_manager, port, address, thread_cnt));
+
+  const std::string addr = address + ":" + std::to_string(port);
+  LOG_INFO << "Started Sagemaker HTTPService at " << addr;
+
+  return nullptr;
+}
+
+
+void
+SagemakerAPIServer::ParseSageMakerRequest(
+    evhtp_request_t* req,
+    std::unordered_map<std::string, std::string>* parse_map,
+    const std::string& action)
+{
+  struct evbuffer_iovec* v = nullptr;
+  int v_idx = 0;
+  int n = evbuffer_peek(req->buffer_in, -1, NULL, NULL, 0);
+  if (n > 0) {
+    v = static_cast<struct evbuffer_iovec*>(
+        alloca(sizeof(struct evbuffer_iovec) * n));
+    if (evbuffer_peek(req->buffer_in, -1, NULL, v, n) != n) {
+      HTTP_RESPOND_IF_ERR(
+          req, TRITONSERVER_ErrorNew(
+                   TRITONSERVER_ERROR_INTERNAL,
+                   "unexpected error getting load model request buffers"));
+    }
+  }
+
+  std::string model_name_string;
+  std::string url_string;
+
+  size_t buffer_len = evbuffer_get_length(req->buffer_in);
+  if (buffer_len > 0) {
+    triton::common::TritonJson::Value request;
+    HTTP_RESPOND_IF_ERR(
+        req, EVBufferToJson(&request, v, &v_idx, buffer_len, n));
+
+    triton::common::TritonJson::Value url;
+    triton::common::TritonJson::Value model_name;
+
+    if (request.Find("model_name", &model_name)) {
+      HTTP_RESPOND_IF_ERR(req, model_name.AsString(&model_name_string));
+      LOG_VERBOSE(1) << "Received model_name: " << model_name_string.c_str();
+    }
+
+    if ((action == "load") && (request.Find("url", &url))) {
+      HTTP_RESPOND_IF_ERR(req, url.AsString(&url_string));
+      LOG_VERBOSE(1) << "Received url: " << url_string.c_str();
+    }
+  }
+
+  if (action == "load") {
+    (*parse_map)["url"] = url_string.c_str();
+  }
+  (*parse_map)["model_name_hash"] = model_name_string.c_str();
+
+  /* Extract target_model, specified in header, to log the associated archive */
+  const char* target_model =
+      evhtp_kv_find(req->headers_in, "X-Amzn-SageMaker-Target-Model");
+
+
+  /* If target_model is not available (e.g., in local testing) use
+   * model_name_hash as target_model) */
+  if (target_model != nullptr) {
+    (*parse_map)["target_model"] = target_model;
+  } else {
+    (*parse_map)["target_model"] = model_name_string.c_str();
+  }
+
+  LOG_INFO << "Loading SageMaker TargetModel: " << target_model;
+
+  return;
+}
+
+void
+SagemakerAPIServer::SagemakeInferRequestClass::InferResponseComplete(
+    TRITONSERVER_InferenceResponse* response, const uint32_t flags, void* userp)
+{
+  // FIXME can't use InferRequestClass object here since it's lifetime
+  // is different than response. For response we need to know how to
+  // send each output (as json, shm, or binary) and that information
+  // has to be maintained in a way that allows us to clean it up
+  // appropriately if connection closed or last response sent.
+  //
+  // But for now userp is the InferRequestClass object and the end of
+  // its life is in the ReplyCallback.
+
+  SagemakerAPIServer::SagemakeInferRequestClass* infer_request =
+      reinterpret_cast<SagemakerAPIServer::SagemakeInferRequestClass*>(userp);
+
+  if (response != nullptr) {
+    ++infer_request->response_count_;
+  }
+
+  TRITONSERVER_Error* err = nullptr;
+  if (infer_request->response_count_ != 1) {
+    err = TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INTERNAL,
+        std::string(
+            "expected a single response, got " +
+            std::to_string(infer_request->response_count_))
+            .c_str());
+  } else if (response != nullptr) {
+    err = infer_request->FinalizeResponse(response);
+#ifdef TRITON_ENABLE_TRACING
+    if (infer_request->trace_ != nullptr) {
+      infer_request->trace_->CaptureTimestamp(
+          "INFER_RESPONSE_COMPLETE", TraceManager::CaptureTimestamp());
+    }
+#endif  // TRITON_ENABLE_TRACING
+  }
+
+
+  LOG_TRITONSERVER_ERROR(
+      TRITONSERVER_InferenceResponseDelete(response),
+      "deleting inference response");
+
+
+  if (err != nullptr) {
+    EVBufferAddErrorJson(infer_request->req_->buffer_out, err);
+    // [FIXME] In http_server.cc, error handling is enhanced to reporting
+    // different error code according to the Triton error code, holding
+    // the change from SageMaker endpoint as it may not fit their SLA.
+    infer_request->response_code_ = EVHTP_RES_BADREQ;
+    if (SageMakerMMECheckOOMError(err) == true) {
+      LOG_VERBOSE(1)
+          << "Received an OOM error during INVOKE MODEL. Returning a 507."
+          << std::endl;
+      infer_request->response_code_ = 507;
+    }
+    TRITONSERVER_ErrorDelete(err);
+  }
+
+  // Defer sending the response until FINAL flag is seen
+  if ((flags & TRITONSERVER_RESPONSE_COMPLETE_FINAL) == 0) {
+    return;
+  }
+  evthr_defer(infer_request->thread_, ReplyCallback, infer_request);
+}
+
+void
+SagemakerAPIServer::SageMakerMMEHandleInfer(
+    evhtp_request_t* req, const std::string& model_name,
+    const std::string& model_version_str)
+{
+  if (req->method != htp_method_POST) {
+    evhtp_send_reply(req, EVHTP_RES_METHNALLOWED);
+    return;
+  }
+
+  bool connection_paused = false;
+
+  int64_t requested_model_version;
+  auto err = GetModelVersionFromString(
+      model_version_str.c_str(), &requested_model_version);
+
+  if (err == nullptr) {
+    uint32_t txn_flags;
+    err = TRITONSERVER_ServerModelTransactionProperties(
+        server_.get(), model_name.c_str(), requested_model_version, &txn_flags,
+        nullptr /* voidp */);
+    if ((err == nullptr) && (txn_flags & TRITONSERVER_TXN_DECOUPLED) != 0) {
+      err = TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_UNSUPPORTED,
+          "HTTP end point doesn't support models with decoupled "
+          "transaction policy");
+    }
+  }
+
+  // If tracing is enabled see if this request should be traced.
+  TRITONSERVER_InferenceTrace* triton_trace = nullptr;
+  std::shared_ptr<TraceManager::Trace> trace =
+      StartTrace(req, model_name, &triton_trace);
+
+  // Create the inference request object which provides all information needed
+  // for an inference.
+  TRITONSERVER_InferenceRequest* irequest = nullptr;
+  std::shared_ptr<TRITONSERVER_InferenceRequest> irequest_shared = nullptr;
+  if (err == nullptr) {
+    err = TRITONSERVER_InferenceRequestNew(
+        &irequest, server_.get(), model_name.c_str(), requested_model_version);
+  }
+  if (err == nullptr) {
+    irequest_shared = std::shared_ptr<TRITONSERVER_InferenceRequest>(
+        irequest, [](TRITONSERVER_InferenceRequest* request) {
+          LOG_TRITONSERVER_ERROR(
+              TRITONSERVER_InferenceRequestDelete(request),
+              "deleting HTTP/REST inference request");
+        });
+  }
+  // Decompress request body if it is compressed in supported type
+  evbuffer* decompressed_buffer = nullptr;
+  if (err == nullptr) {
+    auto compression_type = GetRequestCompressionType(req);
+    switch (compression_type) {
+      case DataCompressor::Type::DEFLATE:
+      case DataCompressor::Type::GZIP: {
+        decompressed_buffer = evbuffer_new();
+        err = DataCompressor::DecompressData(
+            compression_type, req->buffer_in, decompressed_buffer);
+        break;
+      }
+      case DataCompressor::Type::UNKNOWN: {
+        // Encounter unsupported compressed type,
+        // send 415 error with supported types in Accept-Encoding
+        evhtp_headers_add_header(
+            req->headers_out,
+            evhtp_header_new(kAcceptEncodingHTTPHeader, "gzip, deflate", 1, 1));
+        evhtp_send_reply(req, EVHTP_RES_UNSUPPORTED);
+        return;
+      }
+      case DataCompressor::Type::IDENTITY:
+        // Do nothing
+        break;
+    }
+  }
+
+  // Get the header length
+  size_t header_length;
+  if (err == nullptr) {
+    // Set to body size in case there is no Content-Length to compare with
+    int32_t content_length = evbuffer_get_length(req->buffer_in);
+    if (decompressed_buffer == nullptr) {
+      const char* content_length_c_str =
+          evhtp_kv_find(req->headers_in, kContentLengthHeader);
+      if (content_length_c_str != nullptr) {
+        try {
+          content_length = std::atoi(content_length_c_str);
+        }
+        catch (const std::invalid_argument& ia) {
+          err = TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INVALID_ARG,
+              (std::string("Unable to parse ") + kContentLengthHeader +
+               ", got: " + content_length_c_str)
+                  .c_str());
+        }
+      }
+    } else {
+      // The Content-Length doesn't reflect the actual request body size
+      // if compression is used, set 'content_length' to the decompressed size
+      content_length = evbuffer_get_length(decompressed_buffer);
+    }
+
+    if (err == nullptr) {
+      err = GetInferenceHeaderLength(req, content_length, &header_length);
+    }
+  }
+
+  if (err == nullptr) {
+    connection_paused = true;
+
+    auto infer_request = CreateInferRequest(req, irequest_shared);
+    auto request_release_payload = std::make_unique<RequestReleasePayload>(
+        irequest_shared, decompressed_buffer);
+
+#ifdef TRITON_ENABLE_TRACING
+    infer_request->trace_ = trace;
+#endif  // TRITON_ENABLE_TRACING
+
+    if (err == nullptr) {
+      if (header_length != 0) {
+        err = EVBufferToInput(
+            model_name, irequest,
+            (decompressed_buffer == nullptr) ? req->buffer_in
+                                             : decompressed_buffer,
+            infer_request.get(), header_length);
+      } else {
+        err = EVBufferToRawInput(
+            model_name, irequest,
+            (decompressed_buffer == nullptr) ? req->buffer_in
+                                             : decompressed_buffer,
+            infer_request.get());
+      }
+    }
+    if (err == nullptr) {
+      err = TRITONSERVER_InferenceRequestSetReleaseCallback(
+          irequest, InferRequestClass::InferRequestComplete,
+          request_release_payload.get());
+      if (err == nullptr) {
+        err = TRITONSERVER_InferenceRequestSetResponseCallback(
+            irequest, allocator_,
+            reinterpret_cast<void*>(&infer_request->alloc_payload_),
+            SagemakerAPIServer::SagemakeInferRequestClass::
+                InferResponseComplete,
+            reinterpret_cast<void*>(infer_request.get()));
+
+        LOG_VERBOSE(1) << std::endl;
+      }
+      if (err == nullptr) {
+        err = TRITONSERVER_ServerInferAsync(
+            server_.get(), irequest, triton_trace);
+#ifdef TRITON_ENABLE_TRACING
+        if (trace != nullptr) {
+          trace->trace_ = nullptr;
+        }
+#endif  // TRITON_ENABLE_TRACING
+      }
+      if (err == nullptr) {
+        infer_request.release();
+        request_release_payload.release();
+      }
+    }
+  }
+
+  if (err != nullptr) {
+    LOG_VERBOSE(1) << "Infer failed: " << TRITONSERVER_ErrorMessage(err);
+    evhtp_headers_add_header(
+        req->headers_out,
+        evhtp_header_new(kContentTypeHeader, "application/json", 1, 1));
+
+    SageMakerMMEHandleOOMError(req, err);
+
+    if (connection_paused) {
+      evhtp_request_resume(req);
+    }
+    TRITONSERVER_ErrorDelete(err);
+#ifdef TRITON_ENABLE_TRACING
+    // If HTTP server still owns Triton trace
+    if ((trace != nullptr) && (trace->trace_ != nullptr)) {
+      TraceManager::TraceRelease(trace->trace_, trace->trace_userp_);
+    }
+#endif  // TRITON_ENABLE_TRACING
+  }
+}
+
+TRITONSERVER_Error*
+SagemakerAPIServer::SageMakerMMECheckUnloadedModelIsUnavailable(
+    const char* model_name, bool* is_model_unavailable)
+{
+  /* Use the RepositoryIndex API to check if the model state has become
+  UNAVAILABLE i.e. model is no longer in the 'in-the-process-of' being
+  UNLOADED. Consequently, the reason field should be 'unloaded'.*/
+  TRITONSERVER_Message* server_model_index_message = nullptr;
+  uint32_t ready_flag = 0;  // value of 1 should be set if only the 'ready'
+                            // models are required from the index. In this case,
+                            // we need all models.
+  TRITONSERVER_ServerModelIndex(
+      server_.get(), ready_flag, &server_model_index_message);
+
+  std::shared_ptr<TRITONSERVER_Message> shared_ptr_msg(
+      server_model_index_message,
+      [](TRITONSERVER_Message* msg) { TRITONSERVER_MessageDelete(msg); });
+
+  const char* index_buffer;
+  size_t index_byte_size;
+
+  RETURN_IF_ERR(TRITONSERVER_MessageSerializeToJson(
+      server_model_index_message, &index_buffer, &index_byte_size));
+
+  /* Read into json buffer*/
+  triton::common::TritonJson::Value server_model_index_json;
+  server_model_index_json.Parse(index_buffer, index_byte_size);
+
+  const char* name;
+  const char* state;
+  const char* reason;
+  const char* version;
+
+  size_t name_len;
+  size_t state_len;
+  size_t reason_len;
+  size_t version_len;
+
+  for (size_t id = 0; id < server_model_index_json.ArraySize(); ++id) {
+    triton::common::TritonJson::Value index_json;
+    server_model_index_json.IndexAsObject(id, &index_json);
+
+    RETURN_IF_ERR(index_json.MemberAsString("name", &name, &name_len));
+
+    if (std::string(name) == std::string(model_name)) {
+      RETURN_IF_ERR(index_json.MemberAsString("state", &state, &state_len));
+
+      if (std::string(state) == UNLOAD_EXPECTED_STATE_) {
+        RETURN_IF_ERR(
+            index_json.MemberAsString("reason", &reason, &reason_len));
+
+        if (std::string(reason) == UNLOAD_EXPECTED_REASON_) {
+          *is_model_unavailable = true;
+
+          RETURN_IF_ERR(
+              index_json.MemberAsString("version", &version, &version_len));
+
+          LOG_VERBOSE(1) << "Discovered model: " << name
+                         << ", version: " << version << " in state: " << state
+                         << " for the reason: " << reason;
+
+          break;
+        }
+      }
+    }
+  }
+
+  return nullptr;
+}
+
+void
+SagemakerAPIServer::SageMakerMMEUnloadModel(
+    evhtp_request_t* req, const char* model_name_hash)
+{
+  /* Extract targetModel to log the associated archive */
+  const char* target_model =
+      evhtp_kv_find(req->headers_in, "X-Amzn-SageMaker-Target-Model");
+
+  /* If target_model is not available (e.g., in local testing) use
+   * model_name_hash as target_model) */
+  if (target_model == nullptr) {
+    target_model = model_name_hash;
+  }
+
+  if (sagemaker_models_list_.find(model_name_hash) ==
+      sagemaker_models_list_.end()) {
+    LOG_VERBOSE(1) << "Model " << target_model << " with model hash "
+                   << model_name_hash << " is not loaded." << std::endl;
+    evhtp_send_reply(req, EVHTP_RES_NOTFOUND); /* 404*/
+    return;
+  }
+
+  LOG_INFO << "Unloading SageMaker TargetModel: " << target_model << std::endl;
+
+  auto start_time = std::chrono::high_resolution_clock::now();
+
+  /* Always unload dependents as well - this is required to unload dependents in
+   * ensemble */
+  TRITONSERVER_Error* unload_err = nullptr;
+  unload_err =
+      TRITONSERVER_ServerUnloadModelAndDependents(server_.get(), target_model);
+
+  if (unload_err != nullptr) {
+    EVBufferAddErrorJson(req->buffer_out, unload_err);
+    evhtp_send_reply(req, EVHTP_RES_BADREQ);
+
+    LOG_ERROR
+        << "Error when unloading SageMaker Model with dependents for model: "
+        << target_model << std::endl;
+
+    TRITONSERVER_ErrorDelete(unload_err);
+    return;
+  }
+
+  /*Note: Model status check is repo-specific and therefore must be run before
+   * unregistering the repo, else the model information is lost*/
+  bool is_model_unavailable = false;
+  int64_t unload_time_in_secs = 0;
+
+  /* Wait for the model to be completely unloaded. SageMaker waits a maximum
+  of 360 seconds for the UNLOAD request to timeout. Setting a limit of 350
+  seconds for Triton unload. This should be run only if above UNLOAD call has
+  succeeded.*/
+  if (unload_err == nullptr) {
+    LOG_VERBOSE(1) << "Using Model Repository Index during UNLOAD to check for "
+                      "status of model hash: "
+                   << model_name_hash << " for model: " << target_model;
+    while (is_model_unavailable == false &&
+           unload_time_in_secs < UNLOAD_TIMEOUT_SECS_) {
+      LOG_VERBOSE(1) << "In the loop to wait for model to be unavailable";
+      unload_err = SageMakerMMECheckUnloadedModelIsUnavailable(
+          target_model, &is_model_unavailable);
+      if (unload_err != nullptr) {
+        LOG_ERROR << "Error: Received non-zero exit code on checking for "
+                     "model unavailability. "
+                  << TRITONSERVER_ErrorMessage(unload_err);
+        break;
+      }
+      std::this_thread::sleep_for(
+          std::chrono::milliseconds(UNLOAD_SLEEP_MILLISECONDS_));
+
+      auto end_time = std::chrono::high_resolution_clock::now();
+
+      unload_time_in_secs = std::chrono::duration_cast<std::chrono::seconds>(
+                                end_time - start_time)
+                                .count();
+    }
+    LOG_INFO << "UNLOAD for model " << target_model << " completed in "
+             << unload_time_in_secs << " seconds.";
+    TRITONSERVER_ErrorDelete(unload_err);
+  }
+
+  if ((is_model_unavailable == false) &&
+      (unload_time_in_secs >= UNLOAD_TIMEOUT_SECS_)) {
+    LOG_ERROR << "Error: UNLOAD did not complete within expected "
+              << UNLOAD_TIMEOUT_SECS_
+              << " seconds. This may "
+                 "result in SageMaker UNLOAD timeout.";
+  }
+
+  std::string repo_parent_path = sagemaker_models_list_.at(model_name_hash);
+
+  TRITONSERVER_Error* unregister_err = nullptr;
+
+  unregister_err = TRITONSERVER_ServerUnregisterModelRepository(
+      server_.get(), repo_parent_path.c_str());
+
+  if (unregister_err != nullptr) {
+    EVBufferAddErrorJson(req->buffer_out, unload_err);
+    evhtp_send_reply(req, EVHTP_RES_BADREQ);
+    LOG_ERROR << "Unable to unregister model repository for path: "
+              << repo_parent_path << std::endl;
+  } else {
+    evhtp_send_reply(req, EVHTP_RES_OK);
+  }
+
+  TRITONSERVER_ErrorDelete(unregister_err);
+
+  std::lock_guard<std::mutex> lock(models_list_mutex_);
+  sagemaker_models_list_.erase(model_name_hash);
+}
+
+void
+SagemakerAPIServer::SageMakerMMEGetModel(
+    evhtp_request_t* req, const char* model_name)
+{
+  std::lock_guard<std::mutex> lock(models_list_mutex_);
+
+  if (sagemaker_models_list_.find(model_name) == sagemaker_models_list_.end()) {
+    evhtp_send_reply(req, EVHTP_RES_NOTFOUND); /* 404*/
+    return;
+  }
+
+  triton::common::TritonJson::Value sagemaker_get_json(
+      triton::common::TritonJson::ValueType::OBJECT);
+
+  sagemaker_get_json.AddString("modelName", model_name);
+  sagemaker_get_json.AddString(
+      "modelUrl", sagemaker_models_list_.at(model_name));
+
+  const char* buffer;
+  size_t byte_size;
+
+  triton::common::TritonJson::WriteBuffer json_buffer_;
+  json_buffer_.Clear();
+  sagemaker_get_json.Write(&json_buffer_);
+
+  byte_size = json_buffer_.Size();
+  buffer = json_buffer_.Base();
+
+  evbuffer_add(req->buffer_out, buffer, byte_size);
+  evhtp_send_reply(req, EVHTP_RES_OK);
+}
+
+void
+SagemakerAPIServer::SageMakerMMEListModel(evhtp_request_t* req)
+{
+  std::lock_guard<std::mutex> lock(models_list_mutex_);
+
+  triton::common::TritonJson::Value sagemaker_list_json(
+      triton::common::TritonJson::ValueType::OBJECT);
+
+  triton::common::TritonJson::Value models_array(
+      sagemaker_list_json, triton::common::TritonJson::ValueType::ARRAY);
+
+  for (auto it = sagemaker_models_list_.begin();
+       it != sagemaker_models_list_.end(); it++) {
+    triton::common::TritonJson::Value model_url_pair(
+        models_array, triton::common::TritonJson::ValueType::OBJECT);
+
+    bool ready = false;
+    TRITONSERVER_ServerModelIsReady(
+        server_.get(), it->first.c_str(), 1, &ready);
+
+    /* Add to return list only if model is ready to be served */
+    if (ready) {
+      model_url_pair.AddString("modelName", it->first);
+      model_url_pair.AddString("modelUrl", it->second);
+    }
+
+    models_array.Append(std::move(model_url_pair));
+  }
+
+  sagemaker_list_json.Add("models", std::move(models_array));
+
+  const char* buffer;
+  size_t byte_size;
+
+  triton::common::TritonJson::WriteBuffer json_buffer_;
+  json_buffer_.Clear();
+  sagemaker_list_json.Write(&json_buffer_);
+
+  byte_size = json_buffer_.Size();
+  buffer = json_buffer_.Base();
+
+  evbuffer_add(req->buffer_out, buffer, byte_size);
+  evhtp_send_reply(req, EVHTP_RES_OK);
+}
+
+bool
+SagemakerAPIServer::SageMakerMMECheckOOMError(TRITONSERVER_Error* err)
+{
+  const char* message = TRITONSERVER_ErrorMessage(err);
+  std::string error_string(message);
+
+  LOG_VERBOSE(1) << "Logging Verbose Error: " << std::endl
+                 << error_string.c_str() << std::endl;
+
+  const std::vector<std::string> error_messages{
+      "CUDA out of memory", /* pytorch */
+      "CUDA_OUT_OF_MEMORY", /* tensorflow */
+      "Out of memory",      /* generic */
+      "Out Of Memory",
+      "out of memory",
+      "MemoryError",
+      "OutOfMemory",
+      "OOM",
+      "Dst tensor is not initialized",
+      "Src tensor is not initialized",
+      "CNMEM_STATUS_OUT_OF_MEMORY",
+      "CUDNN_STATUS_NOT_INITIALIZED",
+      "CUBLAS_STATUS_ALLOC_FAILED",
+      "CUBLAS_STATUS_NOT_INITIALIZED",
+      "Failed to allocate memory",
+      "failed to allocate memory",
+      "No space left on device"};
+
+  /*
+    TODO: Improve the search to do pattern match on whole words only
+  */
+  for (long unsigned int i = 0; i < error_messages.size(); i++) {
+    if (error_string.find(error_messages[i]) != std::string::npos) {
+      LOG_VERBOSE(1) << "OOM string '" << error_messages[i].c_str()
+                     << "' detected in logs.";
+      return true;
+    }
+  }
+
+  return false;
+}
+
+void
+SagemakerAPIServer::SageMakerMMEHandleOOMError(
+    evhtp_request_t* req, TRITONSERVER_Error* err)
+{
+  EVBufferAddErrorJson(req->buffer_out, err);
+
+  if (SageMakerMMECheckOOMError(err) == true) {
+    /* Return a 507*/
+    evhtp_send_reply(req, 507);
+    LOG_VERBOSE(1)
+        << "Received an OOM error during LOAD MODEL. Returning a 507.";
+    return;
+  }
+  /* Return a 400*/
+  evhtp_send_reply(req, EVHTP_RES_BADREQ);
+  return;
+}
+
+
+void
+SagemakerAPIServer::SageMakerMMELoadModel(
+    evhtp_request_t* req,
+    const std::unordered_map<std::string, std::string> parse_map)
+{
+  std::string repo_path = parse_map.at("url");
+  std::string model_name_hash = parse_map.at("model_name_hash");
+  std::string target_model = parse_map.at("target_model");
+
+  /* Check subdirs for models and find ensemble model within the repo_path
+   * If only 1 model, that will be selected as model_subdir
+   * Else ensemble model directory is set as model_subdir
+   */
+  DIR* dir;
+  struct dirent* ent;
+  int dir_count = 0;
+  std::string model_subdir, ensemble_model_subdir;
+
+  if ((dir = opendir(repo_path.c_str())) != NULL) {
+    while ((ent = readdir(dir)) != NULL) {
+      if ((ent->d_type == DT_DIR) && (!strcmp(ent->d_name, ".") == 0) &&
+          (!strcmp(ent->d_name, "..") == 0)) {
+        dir_count += 1;
+        model_subdir = std::string(ent->d_name);
+      }
+
+      if (dir_count >= 2) {
+        LOG_VERBOSE(1) << "More than one model detected in archive. "
+                          "Checking if it is an ensemble."
+                       << std::endl;
+      }
+
+      LOG_VERBOSE(1) << "Reading model sub-directory: " << model_subdir.c_str()
+                     << std::endl;
+
+      // Read the config.pbtxt file at each path, if available
+      std::string ensemble_config_path =
+          repo_path + "/" + model_subdir + "/" + "config.pbtxt";
+      std::ifstream config_fstream(ensemble_config_path);
+      std::stringstream ensemble_config_content;
+
+      if (config_fstream.is_open()) {
+        ensemble_config_content << config_fstream.rdbuf();
+      } else {
+        continue;  // A valid config.pbtxt does not exist at this path, or
+                   // cannot be read
+      }
+
+      /* Compare matched string with `platform: "ensemble"` or
+       * `platform:"ensemble"`. If present, we break, and use the model_subdir
+       * to load the ensemble model
+       */
+      std::string detected_ensemble_regex;
+      if (RE2::PartialMatch(
+              ensemble_config_content.str(), platform_ensemble_regex_,
+              &detected_ensemble_regex)) {
+        LOG_INFO << "SageMaker front-end detected an Ensemble config at path: "
+                 << ensemble_config_path << std::endl;
+        ensemble_model_subdir = model_subdir;
+      }
+
+      if (dir_count > 5) {
+        LOG_WARNING
+            << "Several model directories found. If using ensemble, smaller "
+               "ensembles are recommended for better memory management."
+            << std::endl;
+      }
+    }
+    closedir(dir);
+  }
+
+  if (!strcmp(ensemble_model_subdir.c_str(), "") == 0) {
+    model_subdir = ensemble_model_subdir;
+  }
+
+  std::vector<const TRITONSERVER_Parameter*> subdir_modelname_map;
+
+  /* Split repo path into three parts:
+   * /opt/ml/models/<hash>/model/optional_customer_subdir
+   * 1st repo_parent_path: /opt/ml/models/<hash>
+   * 2nd subdir: model
+   * 3rd customer_subdir: optional_customer_subdir
+   */
+
+  std::string repo_parent_path, subdir, customer_subdir;
+  RE2::FullMatch(
+      repo_path, model_path_regex_, &repo_parent_path, &subdir,
+      &customer_subdir);
+
+  std::string config_path = repo_path + "/config.pbtxt";
+  struct stat buffer;
+
+  /* If config.pbtxt is at repo root,
+   * then repo_parent_path = /opt/ml/models/<hash>/, and model_subdir = model
+   * else repo_parent_path = /opt/ml/models/<hash>/model and
+   * model_subdir = dir under model/
+   */
+  if (stat(config_path.c_str(), &buffer) == 0) {
+    model_subdir = subdir;
+  } else {
+    repo_parent_path = repo_path;
+  }
+
+  auto param = TRITONSERVER_ParameterNew(
+      model_subdir.c_str(), TRITONSERVER_PARAMETER_STRING,
+      target_model.c_str());
+
+  if (param != nullptr) {
+    subdir_modelname_map.emplace_back(param);
+  } else {
+    HTTP_RESPOND_IF_ERR(
+        req, TRITONSERVER_ErrorNew(
+                 TRITONSERVER_ERROR_INTERNAL,
+                 "unexpected error on creating Triton parameter"));
+  }
+
+  /* Register repository with model mapping */
+  TRITONSERVER_Error* err = nullptr;
+  err = TRITONSERVER_ServerRegisterModelRepository(
+      server_.get(), repo_parent_path.c_str(), subdir_modelname_map.data(),
+      subdir_modelname_map.size());
+
+  TRITONSERVER_ParameterDelete(param);
+
+  // If a model_name is reused i.e. model_name is already mapped, return a 409
+  if ((err != nullptr) &&
+      (TRITONSERVER_ErrorCode(err) == TRITONSERVER_ERROR_ALREADY_EXISTS)) {
+    EVBufferAddErrorJson(req->buffer_out, err);
+    evhtp_send_reply(req, EVHTP_RES_CONFLICT); /* 409 */
+    TRITONSERVER_ErrorDelete(err);
+    return;
+  } else if (err != nullptr) {
+    EVBufferAddErrorJson(req->buffer_out, err);
+    evhtp_send_reply(req, EVHTP_RES_BADREQ);
+    TRITONSERVER_ErrorDelete(err);
+    return;
+  }
+
+  err = TRITONSERVER_ServerLoadModel(server_.get(), target_model.c_str());
+
+  /* Unlikely after duplicate repo check, but in case Load Model also returns
+   * ALREADY_EXISTS error */
+  if ((err != nullptr) &&
+      (TRITONSERVER_ErrorCode(err) == TRITONSERVER_ERROR_ALREADY_EXISTS)) {
+    EVBufferAddErrorJson(req->buffer_out, err);
+    evhtp_send_reply(req, EVHTP_RES_CONFLICT); /* 409 */
+    TRITONSERVER_ErrorDelete(err);
+    return;
+  } else if (err != nullptr) {
+    SageMakerMMEHandleOOMError(req, err);
+  } else {
+    std::lock_guard<std::mutex> lock(models_list_mutex_);
+
+    /* Use model name hash as expected in SageMaker MME contract */
+    sagemaker_models_list_.emplace(model_name_hash, repo_parent_path);
+    evhtp_send_reply(req, EVHTP_RES_OK);
+  }
+
+  /* Unregister model repository in case of load failure*/
+  if (err != nullptr) {
+    err = TRITONSERVER_ServerUnregisterModelRepository(
+        server_.get(), repo_parent_path.c_str());
+    LOG_VERBOSE(1)
+        << "Unregistered model repository due to load failure for model: "
+        << target_model << std::endl;
+  }
+
+  if (err != nullptr) {
+    EVBufferAddErrorJson(req->buffer_out, err);
+    evhtp_send_reply(req, EVHTP_RES_BADREQ);
+    TRITONSERVER_ErrorDelete(err);
+  }
+
+  return;
+}
+}}  // namespace triton::server
diff --git a/src/sagemaker_server.h b/src/sagemaker_server.h
new file mode 100644
index 0000000000..7b1a272ac0
--- /dev/null
+++ b/src/sagemaker_server.h
@@ -0,0 +1,172 @@
+// Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#pragma once
+
+#include <sys/stat.h>
+
+#include <fstream>
+#include <mutex>
+
+#include "common.h"
+#include "dirent.h"
+#include "http_server.h"
+#include "triton/core/tritonserver.h"
+
+namespace triton { namespace server {
+
+// Handle Sagemaker HTTP requests to inference server APIs
+class SagemakerAPIServer : public HTTPAPIServer {
+ public:
+  static TRITONSERVER_Error* Create(
+      const std::shared_ptr<TRITONSERVER_Server>& server,
+      triton::server::TraceManager* trace_manager,
+      const std::shared_ptr<SharedMemoryManager>& smb_manager,
+      const int32_t port, const std::string address, const int thread_cnt,
+      std::unique_ptr<HTTPServer>* sagemaker_server);
+
+  class SagemakeInferRequestClass : public InferRequestClass {
+   public:
+    explicit SagemakeInferRequestClass(
+        TRITONSERVER_Server* server, evhtp_request_t* req,
+        DataCompressor::Type response_compression_type,
+        const std::shared_ptr<TRITONSERVER_InferenceRequest>& triton_request)
+        : InferRequestClass(
+              server, req, response_compression_type, triton_request)
+    {
+    }
+    using InferRequestClass::InferResponseComplete;
+    static void InferResponseComplete(
+        TRITONSERVER_InferenceResponse* response, const uint32_t flags,
+        void* userp);
+
+    void SetResponseHeader(
+        const bool has_binary_data, const size_t header_length) override;
+  };
+
+ private:
+  explicit SagemakerAPIServer(
+      const std::shared_ptr<TRITONSERVER_Server>& server,
+      triton::server::TraceManager* trace_manager,
+      const std::shared_ptr<SharedMemoryManager>& shm_manager,
+      const int32_t port, const std::string address, const int thread_cnt)
+      : HTTPAPIServer(
+            server, trace_manager, shm_manager, port, false /* reuse_port */,
+            address, "" /* header_forward_pattern */, thread_cnt),
+        ping_regex_(R"(/ping)"), invocations_regex_(R"(/invocations)"),
+        models_regex_(R"(/models(?:/)?([^/]+)?(/invoke)?)"),
+        model_path_regex_(
+            R"((\/opt\/ml\/models\/[0-9A-Za-z._]+)\/(model)\/?([0-9A-Za-z._]+)?)"),
+        platform_ensemble_regex_(R"(platform:(\s)*\"ensemble\")"),
+        ping_mode_(GetEnvironmentVariableOrDefault(
+            "SAGEMAKER_TRITON_PING_MODE", "ready")),
+        model_name_(GetEnvironmentVariableOrDefault(
+            "SAGEMAKER_TRITON_DEFAULT_MODEL_NAME",
+            "unspecified_SAGEMAKER_TRITON_DEFAULT_MODEL_NAME")),
+        model_version_str_("")
+  {
+  }
+
+  void ParseSageMakerRequest(
+      evhtp_request_t* req,
+      std::unordered_map<std::string, std::string>* parse_map,
+      const std::string& action);
+
+  void SageMakerMMEHandleInfer(
+      evhtp_request_t* req, const std::string& model_name,
+      const std::string& model_version_str);
+
+  void SageMakerMMELoadModel(
+      evhtp_request_t* req,
+      const std::unordered_map<std::string, std::string> parse_map);
+
+  void SageMakerMMEHandleOOMError(
+      evhtp_request_t* req, TRITONSERVER_Error* load_err);
+
+  static bool SageMakerMMECheckOOMError(TRITONSERVER_Error* load_err);
+
+  void SageMakerMMEUnloadModel(evhtp_request_t* req, const char* model_name);
+
+  TRITONSERVER_Error* SageMakerMMECheckUnloadedModelIsUnavailable(
+      const char* model_name, bool* is_model_unavailable);
+
+  void SageMakerMMEListModel(evhtp_request_t* req);
+
+  void SageMakerMMEGetModel(evhtp_request_t* req, const char* model_name);
+
+  void Handle(evhtp_request_t* req) override;
+
+  std::unique_ptr<InferRequestClass> CreateInferRequest(
+      evhtp_request_t* req,
+      const std::shared_ptr<TRITONSERVER_InferenceRequest>& triton_request)
+      override
+  {
+    return std::unique_ptr<InferRequestClass>(new SagemakeInferRequestClass(
+        server_.get(), req, GetResponseCompressionType(req), triton_request));
+  }
+  TRITONSERVER_Error* GetInferenceHeaderLength(
+      evhtp_request_t* req, int32_t content_length,
+      size_t* header_length) override;
+
+
+  // Currently the compression schema hasn't been defined,
+  // assume identity compression type is used for both request and response
+  DataCompressor::Type GetRequestCompressionType(evhtp_request_t* req) override
+  {
+    return DataCompressor::Type::IDENTITY;
+  }
+  DataCompressor::Type GetResponseCompressionType(evhtp_request_t* req) override
+  {
+    return DataCompressor::Type::IDENTITY;
+  }
+  re2::RE2 ping_regex_;
+  re2::RE2 invocations_regex_;
+  re2::RE2 models_regex_;
+  re2::RE2 model_path_regex_;
+  re2::RE2 platform_ensemble_regex_;
+
+  const std::string ping_mode_;
+
+  /* For single model mode, assume that only one version of "model" is presented
+   */
+  const std::string model_name_;
+  const std::string model_version_str_;
+
+  static const std::string binary_mime_type_;
+
+  /* Maintain list of loaded models */
+  std::unordered_map<std::string, std::string> sagemaker_models_list_;
+
+  /* Mutex to handle concurrent updates */
+  std::mutex models_list_mutex_;
+
+  /* Constants */
+  const uint32_t UNLOAD_TIMEOUT_SECS_ = 350;
+  const uint32_t UNLOAD_SLEEP_MILLISECONDS_ = 500;
+  const std::string UNLOAD_EXPECTED_STATE_ = "UNAVAILABLE";
+  const std::string UNLOAD_EXPECTED_REASON_ = "unloaded";
+};
+
+}}  // namespace triton::server
diff --git a/src/servables/caffe2/BUILD b/src/servables/caffe2/BUILD
deleted file mode 100644
index 1c9a7d29f9..0000000000
--- a/src/servables/caffe2/BUILD
+++ /dev/null
@@ -1,128 +0,0 @@
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-package(
-    default_visibility = ["//visibility:public"],
-)
-
-load('@tf_serving//tensorflow_serving:serving.bzl', 'serving_proto_library')
-
-serving_proto_library(
-    name = "netdef_bundle_proto",
-    srcs = ["netdef_bundle.proto"],
-    cc_api_version = 2,
-    deps = [
-        "@org_tensorflow//tensorflow/core:protos_all_cc",
-        "@protobuf_archive//:cc_wkt_protos",
-    ],
-)
-
-cc_library(
-    name = "autofill",
-    srcs = ["autofill.cc"],
-    hdrs = ["autofill.h"],
-    deps = [
-        "//src/core:autofill_header",
-        "//src/core:constants",
-        "@org_tensorflow//tensorflow/core:lib",
-    ],
-)
-
-# netdef_bundle_c2 is copied into and built along with Caffe2 to
-# ensure that it uses the same protobuf as Caffe2.
-cc_library(
-    name = "netdef_bundle",
-    srcs = [
-         "netdef_bundle.cc",
-#        "netdef_bundle_c2.cc" - see comment above
-    ],
-    hdrs = [
-         "netdef_bundle.h",
-         "netdef_bundle_c2.h"
-    ],
-    deps = [
-        "//src/core:constants",
-        "//src/core:infer",
-        "//src/core:label_provider",
-        "//src/core:logging",
-        "//src/core:model_config_proto",
-        "//src/core:model_config",
-        "//src/core:server_status",
-        "//src/core:utils",
-    ],
-)
-
-cc_library(
-    name = "netdef_bundle_source_adapter",
-    srcs = ["netdef_bundle_source_adapter.cc"],
-    hdrs = ["netdef_bundle_source_adapter.h"],
-    deps = [
-        ":netdef_bundle",
-        ":netdef_bundle_proto",
-        "//src/core:constants",
-        "//src/core:logging",
-        "//src/core:model_config",
-        "//src/core:model_config_proto",
-         "//src/core:utils",
-        "@tf_serving//tensorflow_serving/core:loader",
-        "@tf_serving//tensorflow_serving/core:simple_loader",
-        "@tf_serving//tensorflow_serving/core:source_adapter",
-        "@tf_serving//tensorflow_serving/core:storage_path",
-        "@tf_serving//tensorflow_serving/util:optional",
-    ],
-    alwayslink = 1,
-)
-
-filegroup(
-    name = "caffe2_testdata",
-    srcs = glob(
-        ["testdata/**/*"]),
-)
-
-cc_test(
-    name = "netdef_bundle_test",
-    srcs = ["netdef_bundle_test.cc"],
-    linkopts = [
-        "-L/opt/tensorrtserver/lib",
-        "-lcaffe2_gpu",
-        "-lcaffe2",
-        "-lnvinfer",
-        "-L/usr/local/cuda/lib64/stubs",
-        "-lnvidia-ml",
-        "-lnvonnxparser_runtime",
-    ],
-    data = [
-        ":caffe2_testdata"
-    ],
-    deps = [
-        ":netdef_bundle",
-        "//src/core:constants",
-        "//src/test:model_config_test_base",
-        "//src/test:testmain",
-        "@local_config_cuda//cuda:cudart",
-    ],
-    tags = ["exclusive"],
-)
diff --git a/src/servables/caffe2/autofill.cc b/src/servables/caffe2/autofill.cc
deleted file mode 100644
index 976479223c..0000000000
--- a/src/servables/caffe2/autofill.cc
+++ /dev/null
@@ -1,106 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "src/servables/caffe2/autofill.h"
-
-#include "src/core/constants.h"
-#include "tensorflow/core/lib/io/path.h"
-#include "tensorflow/core/platform/env.h"
-
-namespace nvidia { namespace inferenceserver {
-
-tensorflow::Status
-AutoFillNetDef::Create(
-  const std::string& model_name, const std::string& model_path,
-  std::unique_ptr<AutoFillNetDef>* autofill)
-{
-  std::set<std::string> version_dirs;
-  TF_RETURN_IF_ERROR(GetSubdirs(model_path, &version_dirs));
-
-  // There must be at least one version directory that we can inspect
-  // to attempt to determine the platform. For now we only handle the
-  // case where there is one version directory.
-  if (version_dirs.size() != 1) {
-    return tensorflow::errors::Internal(
-      "unable to autofill for '", model_name, "' due to multiple versions");
-  }
-
-  const auto version_path =
-    tensorflow::io::JoinPath(model_path, *(version_dirs.begin()));
-
-  // There must be a single netdef model (which is spread across two
-  // files) within the version directory...
-  std::set<std::string> netdef_files;
-  TF_RETURN_IF_ERROR(GetFiles(version_path, &netdef_files));
-  if (netdef_files.size() != 2) {
-    return tensorflow::errors::Internal(
-      "unable to autofill for '", model_name, "', unable to find netdef files");
-  }
-
-  const std::string netdef0_file = *(netdef_files.begin());
-  const auto netdef0_path =
-    tensorflow::io::JoinPath(version_path, netdef0_file);
-
-  const std::string netdef1_file = *(std::next(netdef_files.begin()));
-  const auto netdef1_path =
-    tensorflow::io::JoinPath(version_path, netdef1_file);
-
-  const std::string expected_init_filename =
-    std::string(kCaffe2NetDefInitFilenamePrefix) +
-    std::string(kCaffe2NetDefFilename);
-
-  // If find both files named with the default netdef names then
-  // assume it is a netdef. In the future we can be smarter here and
-  // try to parse to see if it really is a netdef, and then try to
-  // derive more of the configuration...
-  if (!(((netdef0_file == kCaffe2NetDefFilename) &&
-         (netdef1_file == expected_init_filename)) ||
-        ((netdef1_file == kCaffe2NetDefFilename) &&
-         (netdef0_file == expected_init_filename)))) {
-    return tensorflow::errors::Internal(
-      "unable to autofill for '", model_name,
-      "', unable to find netdef files named '", kCaffe2NetDefFilename,
-      "' and '", expected_init_filename, "'");
-  }
-
-  autofill->reset(new AutoFillNetDef(model_name));
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-AutoFillNetDef::Fix(ModelConfig* config)
-{
-  config->set_platform(kCaffe2NetDefPlatform);
-
-  // Set name if not already set.
-  if (config->name().empty()) {
-    config->set_name(model_name_);
-  }
-
-  return tensorflow::Status::OK();
-}
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/servables/caffe2/autofill.h b/src/servables/caffe2/autofill.h
deleted file mode 100644
index 996fac9fbf..0000000000
--- a/src/servables/caffe2/autofill.h
+++ /dev/null
@@ -1,46 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <string>
-#include "src/core/autofill.h"
-#include "src/core/model_config.pb.h"
-#include "tensorflow/core/lib/core/errors.h"
-
-namespace nvidia { namespace inferenceserver {
-
-class AutoFillNetDef : public AutoFill {
- public:
-  static tensorflow::Status Create(
-    const std::string& model_name, const std::string& model_path,
-    std::unique_ptr<AutoFillNetDef>* autofill);
-  tensorflow::Status Fix(ModelConfig* config) override;
-
- private:
-  AutoFillNetDef(const std::string& model_name) : AutoFill(model_name) {}
-};
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/servables/caffe2/netdef_bundle.cc b/src/servables/caffe2/netdef_bundle.cc
deleted file mode 100644
index 630f665a32..0000000000
--- a/src/servables/caffe2/netdef_bundle.cc
+++ /dev/null
@@ -1,582 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "src/servables/caffe2/netdef_bundle.h"
-
-#include <NvInfer.h>
-#include <cuda_runtime_api.h>
-#include <stdint.h>
-#include "src/core/constants.h"
-#include "src/core/logging.h"
-#include "src/core/model_config.h"
-#include "src/core/server_status.h"
-#include "src/core/utils.h"
-#include "tensorflow/c/c_api.h"
-#include "tensorflow/core/lib/io/path.h"
-
-namespace nvidia { namespace inferenceserver {
-
-namespace {
-
-// Convert model datatype to non-protobuf equivalent datatype required
-// by Caffe2Workspace.
-Caffe2Workspace::DataType
-ConvertDatatype(DataType dtype)
-{
-  switch (dtype) {
-    case DataType::TYPE_INVALID:
-      return Caffe2Workspace::DataType::TYPE_INVALID;
-    case DataType::TYPE_BOOL:
-      return Caffe2Workspace::DataType::TYPE_BOOL;
-    case DataType::TYPE_UINT8:
-      return Caffe2Workspace::DataType::TYPE_UINT8;
-    case DataType::TYPE_UINT16:
-      return Caffe2Workspace::DataType::TYPE_UINT16;
-    case DataType::TYPE_UINT32:
-      return Caffe2Workspace::DataType::TYPE_UINT32;
-    case DataType::TYPE_UINT64:
-      return Caffe2Workspace::DataType::TYPE_UINT64;
-    case DataType::TYPE_INT8:
-      return Caffe2Workspace::DataType::TYPE_INT8;
-    case DataType::TYPE_INT16:
-      return Caffe2Workspace::DataType::TYPE_INT16;
-    case DataType::TYPE_INT32:
-      return Caffe2Workspace::DataType::TYPE_INT32;
-    case DataType::TYPE_INT64:
-      return Caffe2Workspace::DataType::TYPE_INT64;
-    case DataType::TYPE_FP16:
-      return Caffe2Workspace::DataType::TYPE_FP16;
-    case DataType::TYPE_FP32:
-      return Caffe2Workspace::DataType::TYPE_FP32;
-    case DataType::TYPE_FP64:
-      return Caffe2Workspace::DataType::TYPE_FP64;
-    default:
-      break;
-  }
-
-  return Caffe2Workspace::DataType::TYPE_INVALID;
-}
-
-}  // namespace
-
-
-NetDefBundle::Context::Context(
-  const std::string& name, const int gpu_device, const int max_batch_size)
-    : name_(name), gpu_device_(gpu_device), max_batch_size_(max_batch_size)
-{
-}
-
-NetDefBundle::Context::Context(Context&& o)
-    : name_(std::move(o.name_)), gpu_device_(o.gpu_device_),
-      max_batch_size_(o.max_batch_size_)
-{
-  o.gpu_device_ = NO_GPU_DEVICE;
-  o.max_batch_size_ = NO_BATCHING;
-  workspace_.swap(o.workspace_);
-}
-
-NetDefBundle::Context::~Context()
-{
-  LOG_VERBOSE(1) << "~NetDefBundle::Context ";
-}
-
-tensorflow::Status
-NetDefBundle::Init(
-  const tensorflow::StringPiece& path, const ModelConfig& config)
-{
-  TF_RETURN_IF_ERROR(ValidateModelConfig(config, kCaffe2NetDefPlatform));
-  TF_RETURN_IF_ERROR(SetModelConfig(path, config));
-
-  // Initialize the datatype map and label provider for each output
-  const auto model_dir = tensorflow::io::Dirname(path);
-  for (const auto& io : config.output()) {
-    output_dtype_map_.insert(std::make_pair(io.name(), io.data_type()));
-
-    if (!io.label_filename().empty()) {
-      const auto label_path =
-        tensorflow::io::JoinPath(model_dir, io.label_filename());
-      TF_RETURN_IF_ERROR(label_provider_.AddLabels(io.name(), label_path));
-    }
-  }
-
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-NetDefBundle::CreateExecutionContexts(
-  const std::unordered_map<std::string, std::vector<char>>& models)
-{
-  uint32_t total_context_cnt = 0;
-
-  // Create a workspace for each instance.
-  //
-  // TODO [DLIS-52] Can this be optimized by sharing a workspace
-  // (across all instances?).
-  for (const auto& group : Config().instance_group()) {
-    for (int c = 0; c < group.count(); c++) {
-      if (group.kind() == ModelInstanceGroup::KIND_CPU) {
-        const std::string instance_name =
-          group.name() + "_" + std::to_string(c) + "_cpu";
-        TF_RETURN_IF_ERROR(CreateExecutionContext(
-          instance_name, Context::NO_GPU_DEVICE, models));
-      } else {
-        for (int gpu_device : group.gpus()) {
-          const std::string instance_name = group.name() + "_" +
-                                            std::to_string(c) + "_gpu" +
-                                            std::to_string(gpu_device);
-          TF_RETURN_IF_ERROR(
-            CreateExecutionContext(instance_name, gpu_device, models));
-        }
-      }
-
-      total_context_cnt++;
-    }
-  }
-
-  // Create one runner for each context available for this
-  // model. Each runner is exclusively tied to the context.
-  TF_RETURN_IF_ERROR(SetRunnerCount(total_context_cnt));
-
-  LOG_VERBOSE(1) << "netdef bundle for " << Name() << std::endl << *this;
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-NetDefBundle::CreateExecutionContext(
-  const std::string& instance_name, const int gpu_device,
-  const std::unordered_map<std::string, std::vector<char>>& models)
-{
-  cudaError_t cuerr;
-
-  // For a GPU context, determine the model file to use for device
-  // compute capability. CPU always uses the default model file.
-  std::string cc;
-  std::string cc_model_filename;
-  if (gpu_device == Context::NO_GPU_DEVICE) {
-    cc_model_filename = Config().default_model_filename();
-  } else {
-    cudaDeviceProp cuprops;
-    cuerr = cudaGetDeviceProperties(&cuprops, gpu_device);
-    if (cuerr != cudaSuccess) {
-      return tensorflow::errors::Internal(
-        "unable to get CUDA device properties for ", Name(), ": ",
-        cudaGetErrorString(cuerr));
-    }
-
-    cc = std::to_string(cuprops.major) + "." + std::to_string(cuprops.minor);
-    const auto& cc_itr = Config().cc_model_filenames().find(cc);
-    cc_model_filename = (cc_itr == Config().cc_model_filenames().end())
-                          ? Config().default_model_filename()
-                          : cc_itr->second;
-  }
-
-
-  const auto& mn_itr = models.find(cc_model_filename);
-  if (mn_itr == models.end()) {
-    return tensorflow::errors::Internal(
-      "unable to find NetDef model '", cc_model_filename, "' for ", Name());
-  }
-
-  // NetDef also requires an init network, the name of which is always
-  // derived from 'cc_model_filename'.
-  const std::string& cc_init_filename =
-    kCaffe2NetDefInitFilenamePrefix + cc_model_filename;
-  const auto& imn_itr = models.find(cc_init_filename);
-  if (imn_itr == models.end()) {
-    return tensorflow::errors::Internal(
-      "unable to find NetDef initialization model '", cc_init_filename,
-      "' for ", Name());
-  }
-
-  if (gpu_device == Context::NO_GPU_DEVICE) {
-    LOG_INFO << "Creating instance " << instance_name << " on CPU using "
-             << cc_init_filename << " and " << cc_model_filename;
-  } else {
-    LOG_INFO << "Creating instance " << instance_name << " on GPU "
-             << gpu_device << " (" << cc << ") using " << cc_init_filename
-             << " and " << cc_model_filename;
-  }
-
-  // Max batch size. A value of 0 in the config becomes NO_BATCHING.
-  const int mbs = (Config().max_batch_size() <= 0) ? Context::NO_BATCHING
-                                                   : Config().max_batch_size();
-
-  contexts_.emplace_back(instance_name, gpu_device, mbs);
-  Context& context = contexts_.back();
-
-  // Extract input and output names from the config and use them to
-  // create a Caffe2 workspace. We can't cross the raw protobuf across
-  // this boundary (since Caffe2 build may use a different protobuf).
-  std::vector<std::string> input_names;
-  for (const auto& io : Config().input()) {
-    input_names.push_back(io.name());
-  }
-  std::vector<std::string> output_names;
-  for (const auto& io : Config().output()) {
-    output_names.push_back(io.name());
-  }
-
-  Caffe2Workspace* c2ws;
-  Caffe2Workspace::Error err = Caffe2WorkspaceCreate(
-    &c2ws, Config().name(), Config().max_batch_size(), input_names,
-    output_names, gpu_device, imn_itr->second, mn_itr->second);
-  if (!err.IsOk()) {
-    return tensorflow::errors::Internal(err.Message());
-  }
-
-  context.workspace_.reset(c2ws);
-
-  TF_RETURN_IF_ERROR(context.InitializeInputs(Config().input()));
-  TF_RETURN_IF_ERROR(context.InitializeOutputs(Config().output()));
-
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-NetDefBundle::Context::InitializeInputs(
-  const ::google::protobuf::RepeatedPtrField<ModelInput>& ios)
-{
-  for (const auto& io : ios) {
-    TF_RETURN_IF_ERROR(
-      ValidateModelInput(io, workspace_->PotentialInputNames()));
-
-    std::vector<int> dims(io.dims().begin(), io.dims().end());
-    Caffe2Workspace::Error err = workspace_->AddInputTensor(
-      io.name(), ConvertDatatype(io.data_type()), dims);
-    if (!err.IsOk()) {
-      return tensorflow::errors::Internal(err.Message());
-    }
-  }
-
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-NetDefBundle::Context::InitializeOutputs(
-  const ::google::protobuf::RepeatedPtrField<ModelOutput>& ios)
-{
-  for (const auto& io : ios) {
-    TF_RETURN_IF_ERROR(
-      ValidateModelOutput(io, workspace_->PotentialOutputNames()));
-
-    std::vector<int> dims(io.dims().begin(), io.dims().end());
-    Caffe2Workspace::Error err = workspace_->AddOutputTensor(
-      io.name(), ConvertDatatype(io.data_type()), dims);
-    if (!err.IsOk()) {
-      return tensorflow::errors::Internal(err.Message());
-    }
-  }
-
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-NetDefBundle::GetOutputDataType(const std::string& name, DataType* dtype) const
-{
-  const auto itr = output_dtype_map_.find(name);
-  if (itr == output_dtype_map_.end()) {
-    return tensorflow::errors::Internal(
-      "unable to find datatype for output '", name, "'");
-  }
-
-  *dtype = itr->second;
-  return tensorflow::Status::OK();
-}
-
-void
-NetDefBundle::Run(
-  uint32_t runner_idx, std::vector<RunnerPayload>* payloads,
-  std::function<void(tensorflow::Status)> OnCompleteQueuedPayloads)
-{
-  // Each runner executes using the corresponding context...
-  if (runner_idx >= contexts_.size()) {
-    OnCompleteQueuedPayloads(tensorflow::errors::Internal(
-      "unexpected runner index", runner_idx, ", max allowed ",
-      contexts_.size()));
-    return;
-  }
-
-  std::vector<ModelInferStats::ScopedTimer> compute_timers;
-  for (auto& payload : *payloads) {
-    compute_timers.emplace_back();
-    payload.stats_->StartComputeTimer(&compute_timers.back());
-    payload.stats_->SetGPUDevice(contexts_[runner_idx].gpu_device_);
-  }
-
-  OnCompleteQueuedPayloads(contexts_[runner_idx].Run(payloads));
-}
-
-tensorflow::Status
-NetDefBundle::Context::Run(std::vector<RunnerPayload>* payloads)
-{
-  LOG_VERBOSE(1) << "Running " << name_ << " with " << payloads->size()
-                 << " request payloads";
-
-  // For each request in 'payloads' make sure the inputs are correct
-  // and collect up the total batch size for this inference execution.
-  size_t total_batch_size = 0;
-  for (auto& payload : *payloads) {
-    const InferRequestHeader& request_header =
-      payload.request_provider_->RequestHeader();
-
-    if ((size_t)request_header.input().size() != workspace_->Inputs().size()) {
-      payload.status_ = tensorflow::errors::InvalidArgument(
-        "expected ", workspace_->Inputs().size(), " inputs but got ",
-        request_header.input().size());
-      continue;
-    }
-
-    // For models that don't support batching (i.e. max_batch_size_ ==
-    // 0) the request batch-size will still be 1.
-    const size_t batch_size = request_header.batch_size();
-    if ((batch_size != 1) && ((int)batch_size > max_batch_size_)) {
-      payload.status_ = tensorflow::errors::InvalidArgument(
-        "unexpected batch size ", batch_size, " for '", name_,
-        "', max allowed is ", max_batch_size_);
-      continue;
-    }
-
-    // Validate that all inputs are expected and of the correct size.
-    for (const auto& input : request_header.input()) {
-      const std::string& name = input.name();
-      const auto& itr = workspace_->Inputs().find(name);
-      if (itr == workspace_->Inputs().end()) {
-        payload.status_ = tensorflow::errors::InvalidArgument(
-          "unexpected inference input '", name, "'");
-        break;
-      }
-
-      const size_t expected_byte_size = itr->second;
-      if (input.byte_size() != expected_byte_size) {
-        payload.status_ = tensorflow::errors::InvalidArgument(
-          "unexpected size ", input.byte_size(), " for inference input '", name,
-          "', expecting ", expected_byte_size);
-        break;
-      }
-    }
-
-    if (!payload.status_.ok()) {
-      continue;
-    }
-
-    total_batch_size += batch_size;
-  }
-
-  // If there are no valid payloads then no need to run the
-  // inference. The payloads will have their error status set so can
-  // just return.
-  if (total_batch_size == 0) {
-    return tensorflow::Status::OK();
-  }
-
-  // total_batch_size can be 1 for models that don't support batching
-  // (i.e. max_batch_size_ == 0).
-  if ((total_batch_size != 1) && (total_batch_size > (size_t)max_batch_size_)) {
-    return tensorflow::errors::Internal(
-      "dynamic batch size ", total_batch_size, " for '", name_,
-      "', max allowed is ", max_batch_size_);
-  }
-
-  // For each input allocate and populate a buffer containing all the
-  // payload inputs.
-  std::vector<std::unique_ptr<char>> input_buffers;
-  for (const auto& ipair : workspace_->Inputs()) {
-    const std::string& name = ipair.first;
-    const size_t batch1_byte_size = ipair.second;
-
-    // The entire input tensor must be delivered as a single
-    // contiguous chunk so create a buffer large enough to hold the
-    // entire dynamic batched input.
-    const size_t total_byte_size = batch1_byte_size * total_batch_size;
-    void* buffer = malloc(total_byte_size);
-    input_buffers.emplace_back(static_cast<char*>(buffer));
-
-    size_t buffer_copy_offset = 0;
-
-    // Visit the payloads in order and copy the input tensors to
-    // GPU. Skip payloads that had errors since they are not included in
-    // the dynamic batch.
-    for (auto& payload : *payloads) {
-      if (!payload.status_.ok()) {
-        continue;
-      }
-
-      const InferRequestHeader& request_header =
-        payload.request_provider_->RequestHeader();
-      const size_t expected_byte_size =
-        request_header.batch_size() * batch1_byte_size;
-
-      int input_idx = 0;
-      for (const auto& input : request_header.input()) {
-        if (input.name() == name) {
-          size_t copied_byte_size = 0;
-          while (payload.compute_status_.ok()) {
-            const void* content;
-            size_t content_byte_size;
-            payload.compute_status_ =
-              payload.request_provider_->GetNextInputContent(
-                input_idx, &content, &content_byte_size, false);
-            if (!payload.compute_status_.ok()) {
-              break;
-            }
-
-            // No more input content available then done with copying...
-            if (content == nullptr) {
-              break;
-            }
-
-            if (
-              (buffer_copy_offset + copied_byte_size + content_byte_size) >
-              total_byte_size) {
-              payload.compute_status_ = tensorflow::errors::InvalidArgument(
-                "unexpected size ",
-                buffer_copy_offset + copied_byte_size + content_byte_size,
-                " for inference input '", name, "', expecting ",
-                total_byte_size);
-              break;
-            }
-
-            memcpy(
-              static_cast<char*>(buffer) + buffer_copy_offset +
-                copied_byte_size,
-              content, content_byte_size);
-            copied_byte_size += content_byte_size;
-          }
-
-          if (
-            payload.compute_status_.ok() &&
-            (copied_byte_size != expected_byte_size)) {
-            payload.compute_status_ = tensorflow::errors::Internal(
-              "expected ", expected_byte_size, " of data for inference input '",
-              name, "', got ", copied_byte_size);
-          }
-
-          break;
-        }
-
-        input_idx++;
-      }
-
-      buffer_copy_offset += expected_byte_size;
-    }
-
-    Caffe2Workspace::Error err = workspace_->SetInputTensor(
-      name, total_batch_size, static_cast<const char*>(buffer),
-      total_byte_size);
-    if (!err.IsOk()) {
-      return tensorflow::errors::Internal(err.Message());
-    }
-  }
-
-  // Run...
-  Caffe2Workspace::Error err = workspace_->Run();
-  if (!err.IsOk()) {
-    return tensorflow::errors::Internal(err.Message());
-  }
-
-  // Make sure each output is of the expected size and copy it into
-  // the payload responses.
-  for (const auto& opair : workspace_->Outputs()) {
-    const std::string& name = opair.first;
-    const size_t batch1_byte_size = opair.second;
-    const size_t total_byte_size = batch1_byte_size * total_batch_size;
-
-    const char* output_tensor;
-    Caffe2Workspace::Error err = workspace_->GetOutputTensor(
-      name, total_batch_size, &output_tensor, total_byte_size);
-    if (!err.IsOk()) {
-      return tensorflow::errors::Internal(err.Message());
-    }
-
-    size_t buffer_copy_offset = 0;
-
-    for (auto& payload : *payloads) {
-      if (!payload.status_.ok()) {
-        continue;
-      }
-
-      // If 'payload' requested this output then copy it from
-      // 'output_tensor'. If it did not request this output then just
-      // skip it in the 'output_tensor'.
-      const InferRequestHeader& request_header =
-        payload.request_provider_->RequestHeader();
-      const size_t expected_byte_size =
-        request_header.batch_size() * batch1_byte_size;
-
-      int output_idx = 0;
-      for (const auto& output : request_header.output()) {
-        if (output.name() == name) {
-          void* content;
-          tensorflow::Status status =
-            payload.response_provider_->GetOutputBuffer(
-              output_idx, &content, expected_byte_size);
-          if (!status.ok()) {
-            payload.compute_status_ = status;
-          } else if (content == nullptr) {
-            payload.compute_status_ = tensorflow::errors::Internal(
-              "no buffer to accept output values for output '", name, "'");
-          } else {
-            if ((buffer_copy_offset + expected_byte_size) > total_byte_size) {
-              payload.compute_status_ = tensorflow::errors::InvalidArgument(
-                "unexpected size ", buffer_copy_offset + expected_byte_size,
-                " for inference output '", name, "', expecting maximum",
-                total_byte_size);
-            } else {
-              memcpy(
-                content, output_tensor + buffer_copy_offset,
-                expected_byte_size);
-            }
-          }
-
-          break;
-        }
-
-        output_idx++;
-      }
-
-      buffer_copy_offset += expected_byte_size;
-    }
-  }
-
-  return tensorflow::Status::OK();
-}
-
-std::ostream&
-operator<<(std::ostream& out, const NetDefBundle& pb)
-{
-  out << "name=" << pb.Name() << std::endl;
-  out << "contexts:" << std::endl;
-  for (const auto& context : pb.contexts_) {
-    out << "  name=" << context.name_ << ", gpu="
-        << ((context.gpu_device_ == NetDefBundle::Context::NO_GPU_DEVICE)
-              ? "<none>"
-              : std::to_string(context.gpu_device_));
-  }
-
-  return out;
-}
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/servables/caffe2/netdef_bundle.h b/src/servables/caffe2/netdef_bundle.h
deleted file mode 100644
index 57801bdcc9..0000000000
--- a/src/servables/caffe2/netdef_bundle.h
+++ /dev/null
@@ -1,122 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "src/core/infer.h"
-#include "src/core/label_provider.h"
-#include "src/core/model_config.pb.h"
-#include "src/servables/caffe2/netdef_bundle_c2.h"
-#include "tensorflow/core/lib/core/errors.h"
-
-namespace nvidia { namespace inferenceserver {
-
-class NetDefBundle : public InferenceServable {
- public:
-  NetDefBundle() = default;
-  NetDefBundle(NetDefBundle&&) = default;
-
-  tensorflow::Status Init(
-    const tensorflow::StringPiece& path, const ModelConfig& config);
-
-  // Create a context for execution for each instance for the
-  // serialized netdefs specified in 'models'.
-  tensorflow::Status CreateExecutionContexts(
-    const std::unordered_map<std::string, std::vector<char>>& models);
-  tensorflow::Status CreateExecutionContext(
-    const std::string& instance_name, const int gpu_device,
-    const std::unordered_map<std::string, std::vector<char>>& models);
-
-  tensorflow::Status GetOutputDataType(
-    const std::string& name, DataType* dtype) const override;
-  const LabelProvider& GetLabelProvider() const override
-  {
-    return label_provider_;
-  }
-
- protected:
-  void Run(
-    uint32_t runner_idx, std::vector<RunnerPayload>* payloads,
-    std::function<void(tensorflow::Status)> OnCompleteQueuedPayloads) override;
-
- private:
-  TF_DISALLOW_COPY_AND_ASSIGN(NetDefBundle);
-  friend std::ostream& operator<<(std::ostream&, const NetDefBundle&);
-
-  // Label provider for this bundle.
-  LabelProvider label_provider_;
-
-  // Map from an output name to the datatype of that output.
-  std::unordered_map<std::string, DataType> output_dtype_map_;
-
-  // For each model instance there is a context.
-  struct Context {
-    // GPU device number that indicates that no gpu is available for a
-    // context
-    static constexpr int NO_GPU_DEVICE = -1;
-
-    // Max batch size value that indicates batching is not supported.
-    static constexpr int NO_BATCHING = 0;
-
-    Context(
-      const std::string& name, const int gpu_device, const int max_batch_size);
-    Context(Context&& o);
-    ~Context();
-
-    TF_DISALLOW_COPY_AND_ASSIGN(Context);
-
-    tensorflow::Status InitializeInputs(
-      const ::google::protobuf::RepeatedPtrField<ModelInput>& ios);
-    tensorflow::Status InitializeOutputs(
-      const ::google::protobuf::RepeatedPtrField<ModelOutput>& ios);
-
-    // Run model to execute for one or more requests. This function
-    // assumes that it is only called by the single runner thread that
-    // is assigned to this context. A non-OK return status indicates
-    // an internal error that prevents any of the of requests from
-    // completing. If an error is isolate to a single request payload
-    // it will be reported in that payload.
-    tensorflow::Status Run(std::vector<RunnerPayload>* payloads);
-
-    // Name of the model instance
-    std::string name_;
-
-    // The GPU index active when this context was created.
-    int gpu_device_;
-
-    // Maximum batch size to allow. NO_BATCHING indicates that
-    // batching is not supported.
-    int max_batch_size_;
-
-    // Caffe2 workspace.
-    std::unique_ptr<Caffe2Workspace> workspace_;
-  };
-
-  std::vector<Context> contexts_;
-};
-
-std::ostream& operator<<(std::ostream& out, const NetDefBundle& pb);
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/servables/caffe2/netdef_bundle.proto b/src/servables/caffe2/netdef_bundle.proto
deleted file mode 100644
index efc741a6c2..0000000000
--- a/src/servables/caffe2/netdef_bundle.proto
+++ /dev/null
@@ -1,37 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-syntax = "proto3";
-
-package nvidia.inferenceserver;
-
-// Config proto for NetDefBundleSourceAdapter.
-message NetDefBundleSourceAdapterConfig
-{
-  // Autofill missing required model configuration settings based on
-  // model definition file.
-  bool autofill = 1;
-}
diff --git a/src/servables/caffe2/netdef_bundle_c2.cc b/src/servables/caffe2/netdef_bundle_c2.cc
deleted file mode 100644
index 11e1d12bd8..0000000000
--- a/src/servables/caffe2/netdef_bundle_c2.cc
+++ /dev/null
@@ -1,647 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "caffe2/core/netdef_bundle_c2.h"
-
-#include <google/protobuf/io/coded_stream.h>
-#include <stdint.h>
-#include "caffe2/core/context_gpu.h"
-#include "caffe2/core/init.h"
-#include "caffe2/core/typeid.h"
-#include "caffe2/core/types.h"
-#include "caffe2/core/workspace.h"
-
-namespace nvidia { namespace inferenceserver {
-
-class Caffe2WorkspaceImpl : public Caffe2Workspace {
- public:
-  static Error Create(
-    Caffe2WorkspaceImpl** c2ws, const std::string& model_name,
-    const int max_batch_size, const std::vector<std::string>& input_names,
-    const std::vector<std::string>& output_names,
-    const caffe2::NetDef& netdef_init, const caffe2::NetDef& netdef_model);
-  Caffe2WorkspaceImpl() = default;
-  ~Caffe2WorkspaceImpl() = default;
-
-  const std::set<std::string>& PotentialInputNames() const override
-  {
-    return potential_input_names_;
-  }
-  const std::set<std::string>& PotentialOutputNames() const override
-  {
-    return potential_output_names_;
-  }
-
-  const std::unordered_map<std::string, size_t>& Inputs() const override
-  {
-    return inputs_;
-  }
-  const std::unordered_map<std::string, size_t>& Outputs() const override
-  {
-    return outputs_;
-  }
-
-  Error AddInputTensor(
-    const std::string& name, const DataType datatype,
-    const std::vector<int>& dims) override;
-  Error AddOutputTensor(
-    const std::string& name, const DataType datatype,
-    const std::vector<int>& dims) override;
-  Error SetInputTensor(
-    const std::string& name, size_t batch_size, const char* content,
-    size_t byte_size) override;
-  Error GetOutputTensor(
-    const std::string& name, size_t batch_size, const char** content,
-    size_t byte_size) override;
-  Error Run() override;
-
- private:
-  using IOTensorMap =
-    std::unordered_map<std::string, std::unique_ptr<caffe2::Tensor>>;
-
-  // The Caffe2 workspace.
-  std::unique_ptr<caffe2::Workspace> ws_;
-
-  // The name of the model in the model store. This is not necessarily
-  // the name in the Caffe2 NetDef protobuf.
-  std::string model_name_;
-
-  // Maximum batch size to allow. NO_BATCHING indicates that
-  // batching is not supported.
-  int max_batch_size_;
-
-  // The name of the model in the Caffe2 NetDef. This does not
-  // necessarily match the model-store name of the model.
-  std::string netdef_model_name_;
-
-  // Names of all possible inputs and outputs for the model. These are
-  // names reported by the model netdef itself as external inputs and
-  // outputs.
-  std::set<std::string> potential_input_names_;
-  std::set<std::string> potential_output_names_;
-
-  // The inputs and outputs of the model specified by the model
-  // configuration and the size of each.
-  std::unordered_map<std::string, size_t> inputs_;
-  std::unordered_map<std::string, size_t> outputs_;
-
-  // Map from input name to caffe2 tensor holding shape and type for
-  // that input. We can use TensorCPU for this even when using the GPU
-  // since we are only interested in the shape and type of these
-  // tensors.
-  IOTensorMap input_tensor_map_;
-
-  // Map from output name to caffe2 tensor holding shape and type for
-  // that output. We can use TensorCPU for this even when using the
-  // GPU since we are only interested in the shape and type of these
-  // tensors.
-  IOTensorMap output_tensor_map_;
-};
-
-namespace {
-
-const std::string
-DimsDebugString(const std::vector<int64_t>& dims)
-{
-  bool first = true;
-  std::string str;
-  str.append("[");
-  for (size_t i = 0; i < dims.size(); ++i) {
-    if (!first) {
-      str.append(",");
-    }
-    str.append(std::to_string(dims[i]));
-    first = false;
-  }
-  str.append("]");
-  return str;
-}
-
-bool
-ReadBinaryProto(
-  const std::vector<char>& blob, google::protobuf::MessageLite* msg)
-{
-  google::protobuf::io::CodedInputStream coded_stream(
-    reinterpret_cast<const uint8_t*>(&blob[0]), blob.size());
-  coded_stream.SetTotalBytesLimit(INT_MAX, INT_MAX);
-  return msg->ParseFromCodedStream(&coded_stream);
-}
-
-std::pair<bool, const caffe2::TypeMeta>
-ConvertDatatype(Caffe2Workspace::DataType dtype)
-{
-  caffe2::TensorProto::DataType ctype;
-
-  switch (dtype) {
-    case Caffe2Workspace::DataType::TYPE_BOOL:
-      ctype = caffe2::TensorProto_DataType_BOOL;
-      break;
-    case Caffe2Workspace::DataType::TYPE_UINT8:
-      ctype = caffe2::TensorProto_DataType_UINT8;
-      break;
-    case Caffe2Workspace::DataType::TYPE_UINT16:
-      ctype = caffe2::TensorProto_DataType_UINT16;
-      break;
-    case Caffe2Workspace::DataType::TYPE_INT8:
-      ctype = caffe2::TensorProto_DataType_INT8;
-      break;
-    case Caffe2Workspace::DataType::TYPE_INT16:
-      ctype = caffe2::TensorProto_DataType_INT16;
-      break;
-    case Caffe2Workspace::DataType::TYPE_INT32:
-      ctype = caffe2::TensorProto_DataType_INT32;
-      break;
-    case Caffe2Workspace::DataType::TYPE_INT64:
-      ctype = caffe2::TensorProto_DataType_INT64;
-      break;
-    case Caffe2Workspace::DataType::TYPE_FP16:
-      ctype = caffe2::TensorProto_DataType_FLOAT16;
-      break;
-    case Caffe2Workspace::DataType::TYPE_FP32:
-      ctype = caffe2::TensorProto_DataType_FLOAT;
-      break;
-    case Caffe2Workspace::DataType::TYPE_FP64:
-      ctype = caffe2::TensorProto_DataType_DOUBLE;
-      break;
-    default:
-      return std::make_pair(false, caffe2::TypeMeta());
-  }
-
-  return std::make_pair(true, caffe2::DataTypeToTypeMeta(ctype));
-}
-
-const std::string
-DataTypeName(const Caffe2Workspace::DataType datatype)
-{
-  switch (datatype) {
-    case Caffe2Workspace::DataType::TYPE_INVALID:
-      return "INVALID";
-    case Caffe2Workspace::DataType::TYPE_BOOL:
-      return "BOOL";
-    case Caffe2Workspace::DataType::TYPE_UINT8:
-      return "UINT8";
-    case Caffe2Workspace::DataType::TYPE_UINT16:
-      return "UINT16";
-    case Caffe2Workspace::DataType::TYPE_UINT32:
-      return "UINT32";
-    case Caffe2Workspace::DataType::TYPE_UINT64:
-      return "UINT64";
-    case Caffe2Workspace::DataType::TYPE_INT8:
-      return "INT8";
-    case Caffe2Workspace::DataType::TYPE_INT16:
-      return "INT16";
-    case Caffe2Workspace::DataType::TYPE_INT32:
-      return "INT32";
-    case Caffe2Workspace::DataType::TYPE_INT64:
-      return "INT64";
-    case Caffe2Workspace::DataType::TYPE_FP16:
-      return "FP16";
-    case Caffe2Workspace::DataType::TYPE_FP32:
-      return "FP32";
-    case Caffe2Workspace::DataType::TYPE_FP64:
-      return "FP64";
-  }
-
-  return "<unknown>";
-}
-
-caffe2::OperatorDef*
-AddOp(
-  caffe2::NetDef* net, const std::string& name,
-  const std::vector<std::string>& inputs,
-  const std::vector<std::string>& outputs)
-{
-  auto op = net->add_op();
-  op->set_type(name);
-  for (auto input : inputs) {
-    op->add_input(input);
-  }
-  for (auto output : outputs) {
-    op->add_output(output);
-  }
-
-  return op;
-}
-
-caffe2::OperatorDef*
-AddCopyFromCpuInput(
-  caffe2::NetDef* net, const std::string& input, const std::string& output)
-{
-  return AddOp(net, "CopyFromCPUInput", {input}, {output});
-}
-
-caffe2::OperatorDef*
-AddEnsureCpuOutput(
-  caffe2::NetDef* net, const std::string& input, const std::string& output)
-{
-  return AddOp(net, "EnsureCPUOutput", {input}, {output});
-}
-
-}  // namespace
-
-
-Caffe2Workspace::Error
-Caffe2WorkspaceCreate(
-  Caffe2Workspace** c2ws, const std::string& model_name,
-  const int max_batch_size, const std::vector<std::string>& input_names,
-  const std::vector<std::string>& output_names, const int gpu_device,
-  const std::vector<char>& init_blob, const std::vector<char>& model_blob)
-{
-  caffe2::GlobalInit();
-
-  // We must construct a caffe2::CUDAContext to get the side-effect of
-  // initializing Caffe2 to use CUDA. It is ok to call this multiple
-  // times.
-  new caffe2::CUDAContext(0);
-
-  caffe2::NetDef netdef_init, netdef_model;
-  if (
-    !ReadBinaryProto(init_blob, &netdef_init) ||
-    !ReadBinaryProto(model_blob, &netdef_model)) {
-    return Caffe2Workspace::Error("failed to parse NetDef model");
-  }
-
-  // Set the device for this model. It seems necessary to set the
-  // device not only on the network but also on each individual
-  // operator.
-  caffe2::DeviceOption device_option;
-  if (gpu_device == Caffe2Workspace::NO_GPU_DEVICE) {
-    device_option.set_device_type(static_cast<int>(caffe2::CPU));
-  } else {
-    device_option.set_device_type(static_cast<int>(caffe2::CUDA));
-    device_option.set_cuda_gpu_id(gpu_device);
-  }
-
-  netdef_init.mutable_device_option()->CopyFrom(device_option);
-  netdef_model.mutable_device_option()->CopyFrom(device_option);
-
-  for (int i = 0; i < netdef_model.op().size(); ++i) {
-    netdef_model.mutable_op(i)->mutable_device_option()->CopyFrom(
-      device_option);
-  }
-
-  // For each input that feeds an operator that is executed on a GPU,
-  // add an operation that copies that input to the GPU. For each
-  // output that is produced on the GPU add an operation that copies
-  // that output to the CPU.
-  std::unordered_map<std::string, std::string> io_name_map;
-  caffe2::NetDef new_input_ops;
-
-  // We don't want to revisit newly added operations, so get the
-  // current size before starting the iteration.
-  const int op_cnt = netdef_model.op().size();
-  for (int opidx = 0; opidx < op_cnt; ++opidx) {
-    caffe2::OperatorDef* opdef = netdef_model.mutable_op(opidx);
-    if (
-      opdef->device_option().device_type() != static_cast<int>(caffe2::CUDA)) {
-      continue;
-    }
-
-    // Inputs...
-    for (const auto& input_name : input_names) {
-      auto itr = io_name_map.find(input_name);
-
-      for (int iidx = 0; iidx < opdef->input().size(); ++iidx) {
-        if (opdef->input(iidx) == input_name) {
-          if (itr == io_name_map.end()) {
-            const std::string gpu_name = input_name + "_in_nvis_";
-            caffe2::OperatorDef* cpdef =
-              AddCopyFromCpuInput(&new_input_ops, input_name, gpu_name);
-            cpdef->mutable_device_option()->CopyFrom(device_option);
-            auto pr = io_name_map.emplace(input_name, gpu_name);
-            itr = pr.first;
-          }
-
-          opdef->set_input(iidx, itr->second);
-        }
-      }
-    }
-
-    // Outputs...
-    for (const auto& output_name : output_names) {
-      auto itr = io_name_map.find(output_name);
-
-      for (int oidx = 0; oidx < opdef->output().size(); ++oidx) {
-        if (opdef->output(oidx) == output_name) {
-          if (itr == io_name_map.end()) {
-            const std::string gpu_name = output_name + "_out_nvis_";
-            caffe2::OperatorDef* cpdef =
-              AddEnsureCpuOutput(&netdef_model, gpu_name, output_name);
-            cpdef->mutable_device_option()->CopyFrom(device_option);
-            auto pr = io_name_map.emplace(output_name, gpu_name);
-            itr = pr.first;
-          }
-
-          opdef->set_output(oidx, itr->second);
-        }
-      }
-    }
-  }
-
-  // If we added new ops for the inputs they need to be added to the
-  // beginning of the ops. NetDef seems to require the tensor be
-  // defined before references... not sure how it handles
-  // cycles. Protobuf doesn't have any way to do this gracefully...
-  if (new_input_ops.op().size() > 0) {
-    new_input_ops.mutable_op()->MergeFrom(netdef_model.op());
-    netdef_model.mutable_op()->CopyFrom(new_input_ops.op());
-  }
-
-  Caffe2WorkspaceImpl* c2wsimpl;
-  Caffe2Workspace::Error err = Caffe2WorkspaceImpl::Create(
-    &c2wsimpl, model_name, max_batch_size, input_names, output_names,
-    netdef_init, netdef_model);
-  *c2ws = c2wsimpl;
-  return err;
-}
-
-Caffe2Workspace::Error
-Caffe2WorkspaceImpl::Create(
-  Caffe2WorkspaceImpl** c2ws, const std::string& model_name,
-  const int max_batch_size, const std::vector<std::string>& input_names,
-  const std::vector<std::string>& output_names,
-  const caffe2::NetDef& netdef_init, const caffe2::NetDef& netdef_model)
-{
-  *c2ws = new Caffe2WorkspaceImpl();
-  (*c2ws)->model_name_ = model_name;
-  (*c2ws)->max_batch_size_ = max_batch_size;
-  (*c2ws)->netdef_model_name_ = netdef_model.name();
-  (*c2ws)->ws_.reset(new caffe2::Workspace("/tmp"));
-  if ((*c2ws)->ws_ == nullptr) {
-    delete *c2ws;
-    *c2ws = nullptr;
-    return Error(
-      "Failed to create Caffe2 workspace for model '" + model_name + "'");
-  }
-
-  if (!(*c2ws)->ws_->RunNetOnce(netdef_init)) {
-    delete *c2ws;
-    *c2ws = nullptr;
-    return Error(
-      "Failed to run Caffe2 init workspace for model '" + model_name + "'");
-  }
-
-  // Create the blobs for each input
-  for (const auto& input_name : input_names) {
-    caffe2::Blob* input = nullptr;
-    try {
-      input = (*c2ws)->ws_->CreateBlob(input_name);
-    }
-    catch (caffe2::EnforceNotMet ex) {
-      delete *c2ws;
-      *c2ws = nullptr;
-      return Error(
-        "Failed to create Caffe2 blob for input '" + input_name +
-        "' for model '" + model_name + "': " + ex.msg());
-    }
-    if (input == nullptr) {
-      delete *c2ws;
-      *c2ws = nullptr;
-      return Error(
-        "Failed to create Caffe2 blob for input '" + input_name +
-        "' for model '" + model_name + "'");
-    }
-  }
-
-  // Collect allowed inputs and outputs...
-  for (const auto& i : netdef_model.external_input()) {
-    (*c2ws)->potential_input_names_.insert(i);
-  }
-  for (const auto& o : netdef_model.external_output()) {
-    (*c2ws)->potential_output_names_.insert(o);
-  }
-
-  if ((*c2ws)->ws_->CreateNet(netdef_model) == nullptr) {
-    delete *c2ws;
-    *c2ws = nullptr;
-    return Error(
-      "Failed to create Caffe2 model for model '" + model_name + "'");
-  }
-
-  return Error();
-}
-
-Caffe2Workspace::Error
-Caffe2WorkspaceImpl::AddInputTensor(
-  const std::string& name, const DataType datatype,
-  const std::vector<int>& dims)
-{
-  // Create a Tensor to hold the shape and datatype.
-  const auto pr = ConvertDatatype(datatype);
-  if (!pr.first) {
-    return Error(
-      "Failed to convert datatype '" + DataTypeName(datatype) +
-      "' to Caffe2 NetDef datatype");
-  }
-
-  // Tensor::ShareExternalPointer allows us to explicitly set the
-  // tensor's type.
-  std::unique_ptr<caffe2::Tensor> tensor(new caffe2::Tensor(dims, caffe2::CPU));
-  tensor->ShareExternalPointer(nullptr, pr.second);
-
-  inputs_.insert(std::make_pair(name, tensor->size() * tensor->itemsize()));
-
-  input_tensor_map_.emplace(
-    std::piecewise_construct, std::make_tuple(name),
-    std::make_tuple(std::move(tensor)));
-
-  return Error();
-}
-
-Caffe2Workspace::Error
-Caffe2WorkspaceImpl::AddOutputTensor(
-  const std::string& name, const DataType datatype,
-  const std::vector<int>& dims)
-{
-  // Create a Tensor to hold the shape and datatype.
-  const auto pr = ConvertDatatype(datatype);
-  if (!pr.first) {
-    return Error(
-      "Failed to convert datatype '" + DataTypeName(datatype) +
-      "' to Caffe2 NetDef datatype");
-  }
-
-  // Tensor::ShareExternalPointer allows us to explicitly set the
-  // tensor's type.
-  std::unique_ptr<caffe2::Tensor> tensor(new caffe2::Tensor(dims, caffe2::CPU));
-  tensor->ShareExternalPointer(nullptr, pr.second);
-
-  outputs_.insert(std::make_pair(name, tensor->size() * tensor->itemsize()));
-
-  output_tensor_map_.emplace(
-    std::piecewise_construct, std::make_tuple(name),
-    std::make_tuple(std::move(tensor)));
-
-  return Error();
-}
-
-Caffe2Workspace::Error
-Caffe2WorkspaceImpl::SetInputTensor(
-  const std::string& name, size_t batch_size, const char* content,
-  size_t byte_size)
-{
-  const auto itr = input_tensor_map_.find(name);
-  if (itr == input_tensor_map_.end()) {
-    return Error("unexpected inference input '" + name + "'");
-  }
-
-  // If model supports batching then prepend the batch dimension onto
-  // the input shape.
-  std::vector<int64_t> dims;
-  if (max_batch_size_ != NO_BATCHING) {
-    dims.push_back(batch_size);
-  }
-  for (const auto d : itr->second->dims()) {
-    dims.push_back(d);
-  }
-
-  // Find the input tensor in the model and set it to use 'content'
-  // in-place.
-  caffe2::Blob* blob = nullptr;
-  try {
-    blob = ws_->GetBlob(name);
-  }
-  catch (caffe2::EnforceNotMet ex) {
-    return Error(
-      "failed to get NetDef blob for input '" + name + "': " + ex.msg());
-  }
-  if (blob == nullptr) {
-    return Error("failed to get NetDef blob for input '" + name + "'");
-  }
-
-  caffe2::Tensor* input = nullptr;
-  try {
-    input = BlobGetMutableTensor(blob, caffe2::CPU);
-  }
-  catch (caffe2::EnforceNotMet ex) {
-    return Error(
-      "failed to get NetDef tensor for input '" + name + "': " + ex.msg());
-  }
-  if (input == nullptr) {
-    return Error("failed to get NetDef tensor for input '" + name + "'");
-  }
-
-  input->Resize(dims);
-  if ((input->size() * itr->second->itemsize()) != byte_size) {
-    return Error(
-      "unexpected size " + std::to_string(byte_size) +
-      " for inference input '" + name + "', expecting " +
-      std::to_string(input->size() * itr->second->itemsize()));
-  }
-
-  input->ShareExternalPointer(
-    const_cast<char*>(content), itr->second->meta(), byte_size);
-
-  return Error();
-}
-
-Caffe2Workspace::Error
-Caffe2WorkspaceImpl::GetOutputTensor(
-  const std::string& name, size_t batch_size, const char** content,
-  size_t byte_size)
-{
-  const auto itr = output_tensor_map_.find(name);
-  if (itr == output_tensor_map_.end()) {
-    return Error("unexpected inference output '" + name + "'");
-  }
-
-  // Find the output tensor in the model...
-  caffe2::Blob* blob = nullptr;
-  try {
-    blob = ws_->GetBlob(name);
-  }
-  catch (caffe2::EnforceNotMet ex) {
-    return Error(
-      "failed to get NetDef blob for output '" + name + "': " + ex.msg());
-  }
-  if (blob == nullptr) {
-    return Error("failed to get NetDef blob for output '" + name + "'");
-  }
-
-  caffe2::Tensor* output = nullptr;
-  try {
-    output = BlobGetMutableTensor(blob, caffe2::CPU);
-  }
-  catch (caffe2::EnforceNotMet ex) {
-    return Error(
-      "failed to get NetDef tensor for output '" + name + "': " + ex.msg());
-  }
-  if (output == nullptr) {
-    return Error("failed to get NetDef tensor for output '" + name + "'");
-  }
-
-  if (itr->second->meta() != output->meta()) {
-    return Error(
-      "unexpected datatype " + std::string(output->meta().name()) +
-      " for inference output '" + name + "', expecting " +
-      std::string(itr->second->meta().name()));
-  }
-
-  // If model supports batching then prepend the batch dimension onto
-  // the output shape.
-  std::vector<int64_t> expected_dims;
-  if (max_batch_size_ != NO_BATCHING) {
-    expected_dims.push_back(batch_size);
-  }
-  for (const auto d : itr->second->dims()) {
-    expected_dims.push_back(d);
-  }
-
-  if (expected_dims != output->dims()) {
-    return Error(
-      "unexpected shape " + DimsDebugString(output->dims()) +
-      " for inference output '" + name + "', expecting " +
-      DimsDebugString(expected_dims));
-  }
-
-  if (byte_size != output->nbytes()) {
-    return Error(
-      "unexpected size " + std::to_string(output->nbytes()) +
-      " for inference output '" + name + "', expecting " +
-      std::to_string(byte_size));
-  }
-
-  *content = static_cast<const char*>(output->raw_data());
-
-  return Error();
-}
-
-Caffe2Workspace::Error
-Caffe2WorkspaceImpl::Run()
-{
-  try {
-    if (!ws_->RunNet(netdef_model_name_)) {
-      return Error("failed to run model '" + model_name_ + "'");
-    }
-  }
-  catch (caffe2::EnforceNotMet ex) {
-    return Error("failed to run model '" + model_name_ + "': " + ex.msg());
-  }
-
-  return Error();
-}
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/servables/caffe2/netdef_bundle_c2.h b/src/servables/caffe2/netdef_bundle_c2.h
deleted file mode 100644
index 8dd6374aea..0000000000
--- a/src/servables/caffe2/netdef_bundle_c2.h
+++ /dev/null
@@ -1,138 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <set>
-#include <string>
-#include <unordered_map>
-#include <vector>
-
-namespace nvidia { namespace inferenceserver {
-
-// To avoid namespace and protobuf collision between TensorFlow and
-// Caffe2, we keep Caffe2 interface isolated to netdef_bundle_c2. The
-// interface to those isolated functions is provided by
-// Caffe2Workspace.
-class Caffe2Workspace {
- public:
-  // GPU device number that indicates that no gpu is available for a
-  // context
-  static constexpr int NO_GPU_DEVICE = -1;
-
-  // Max batch size value that indicates batching is not supported.
-  static constexpr int NO_BATCHING = 0;
-
-  // Error reporting
-  class Error {
-   public:
-    Error() : success_(true) {}
-    Error(const std::string& msg) : success_(false), msg_(msg) {}
-
-    bool IsOk() const { return success_; }
-    const std::string& Message() const { return msg_; }
-
-   private:
-    const bool success_;
-    const std::string msg_;
-  };
-
-  // Input or output datatype. Prototypes can't cross the
-  // Caffe2Workspace boundary so need to have this non-protobuf
-  // definition.
-  enum DataType {
-    TYPE_INVALID,
-    TYPE_BOOL,
-    TYPE_UINT8,
-    TYPE_UINT16,
-    TYPE_UINT32,
-    TYPE_UINT64,
-    TYPE_INT8,
-    TYPE_INT16,
-    TYPE_INT32,
-    TYPE_INT64,
-    TYPE_FP16,
-    TYPE_FP32,
-    TYPE_FP64
-  };
-
-  virtual ~Caffe2Workspace() = default;
-
-  // Return names of all possible inputs and outputs for the
-  // model. These are names reported by the model netdef itself as
-  // external inputs and outputs.
-  virtual const std::set<std::string>& PotentialInputNames() const = 0;
-  virtual const std::set<std::string>& PotentialOutputNames() const = 0;
-
-  // The inputs and outputs of the model, as a map from the name to
-  // the size of the tensor.
-  virtual const std::unordered_map<std::string, size_t>& Inputs() const = 0;
-  virtual const std::unordered_map<std::string, size_t>& Outputs() const = 0;
-
-  // Add an input tensor to the model.
-  virtual Error AddInputTensor(
-    const std::string& name, const DataType datatype,
-    const std::vector<int>& dims) = 0;
-
-  // Add an output tensor to the model.
-  virtual Error AddOutputTensor(
-    const std::string& name, const DataType datatype,
-    const std::vector<int>& dims) = 0;
-
-  // Set the value for an input tensor in preparation for inferencing.
-  virtual Error SetInputTensor(
-    const std::string& name, size_t batch_size, const char* content,
-    size_t byte_size) = 0;
-
-  // Get the value for an output tensor after inferencing.
-  virtual Error GetOutputTensor(
-    const std::string& name, size_t batch_size, const char** content,
-    size_t byte_size) = 0;
-
-  // Run the model.
-  virtual Error Run() = 0;
-};
-
-extern "C" {
-
-#if defined(_MSC_VER)
-#define CAFFE2WS_EXPORT __declspec(dllexport)
-#elif defined(__GNUC__)
-#define CAFFE2WS_EXPORT __attribute__((__visibility__("default")))
-#else
-#define CAFFE2WS_EXPORT
-#endif
-
-// Create a Caffe2Workspace that interfaces with the Caffe2 library
-// for a model specified by an init and network blob.
-CAFFE2WS_EXPORT Caffe2Workspace::Error Caffe2WorkspaceCreate(
-  Caffe2Workspace** c2ws, const std::string& model_name,
-  const int max_batch_size, const std::vector<std::string>& input_names,
-  const std::vector<std::string>& output_names, const int gpu_device,
-  const std::vector<char>& init_blob, const std::vector<char>& model_blob);
-
-}  // extern "C"
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/servables/caffe2/netdef_bundle_source_adapter.cc b/src/servables/caffe2/netdef_bundle_source_adapter.cc
deleted file mode 100644
index 914436e274..0000000000
--- a/src/servables/caffe2/netdef_bundle_source_adapter.cc
+++ /dev/null
@@ -1,124 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "src/servables/caffe2/netdef_bundle_source_adapter.h"
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "src/core/constants.h"
-#include "src/core/logging.h"
-#include "src/core/model_config.pb.h"
-#include "src/core/model_repository_manager.h"
-#include "src/core/utils.h"
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/platform/env.h"
-
-namespace nvidia { namespace inferenceserver {
-
-namespace {
-
-tensorflow::Status
-CreateNetDefBundle(
-  const NetDefBundleSourceAdapterConfig& adapter_config,
-  const std::string& path, std::unique_ptr<NetDefBundle>* bundle)
-{
-  const auto model_path = tensorflow::io::Dirname(path);
-  const auto model_name = tensorflow::io::Basename(model_path);
-
-  ModelConfig model_config;
-  TF_RETURN_IF_ERROR(ModelRepositoryManager::GetModelConfig(
-    std::string(model_name), &model_config));
-
-  // Read all the netdef files in 'path'. GetChildren() returns all
-  // descendants instead for cloud storage like GCS, so filter out all
-  // non-direct descendants.
-  std::vector<std::string> possible_children;
-  TF_RETURN_IF_ERROR(
-    tensorflow::Env::Default()->GetChildren(path, &possible_children));
-  std::set<std::string> children;
-  for (const auto& child : possible_children) {
-    children.insert(child.substr(0, child.find_first_of('/')));
-  }
-
-  std::unordered_map<std::string, std::vector<char>> models;
-  for (const auto& filename : children) {
-    const auto netdef_path = tensorflow::io::JoinPath(path, filename);
-    tensorflow::string model_data_str;
-    TF_RETURN_IF_ERROR(tensorflow::ReadFileToString(
-      tensorflow::Env::Default(), netdef_path, &model_data_str));
-    std::vector<char> model_data(model_data_str.begin(), model_data_str.end());
-    models.emplace(filename, std::move(model_data));
-  }
-
-  // Create the bundle for the model and all the execution contexts
-  // requested for this model.
-  bundle->reset(new NetDefBundle);
-  tensorflow::Status status = (*bundle)->Init(path, model_config);
-  if (status.ok()) {
-    status = (*bundle)->CreateExecutionContexts(models);
-  }
-  if (!status.ok()) {
-    bundle->reset();
-  }
-
-  return status;
-}
-
-}  // namespace
-
-
-tensorflow::Status
-NetDefBundleSourceAdapter::Create(
-  const NetDefBundleSourceAdapterConfig& config,
-  std::unique_ptr<
-    SourceAdapter<tfs::StoragePath, std::unique_ptr<tfs::Loader>>>* adapter)
-{
-  LOG_VERBOSE(1) << "Create NetDefBundleSourceAdaptor for config \""
-                 << config.DebugString() << "\"";
-
-  Creator creator = std::bind(
-    &CreateNetDefBundle, config, std::placeholders::_1, std::placeholders::_2);
-
-  adapter->reset(new NetDefBundleSourceAdapter(
-    config, creator, SimpleSourceAdapter::EstimateNoResources()));
-  return tensorflow::Status::OK();
-}
-
-NetDefBundleSourceAdapter::~NetDefBundleSourceAdapter()
-{
-  Detach();
-}
-
-}}  // namespace nvidia::inferenceserver
-
-namespace tensorflow { namespace serving {
-
-REGISTER_STORAGE_PATH_SOURCE_ADAPTER(
-  nvidia::inferenceserver::NetDefBundleSourceAdapter,
-  nvidia::inferenceserver::NetDefBundleSourceAdapterConfig);
-}}  // namespace tensorflow::serving
diff --git a/src/servables/caffe2/netdef_bundle_source_adapter.h b/src/servables/caffe2/netdef_bundle_source_adapter.h
deleted file mode 100644
index 1e329f9678..0000000000
--- a/src/servables/caffe2/netdef_bundle_source_adapter.h
+++ /dev/null
@@ -1,70 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "src/servables/caffe2/netdef_bundle.h"
-#include "src/servables/caffe2/netdef_bundle.pb.h"
-#include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/platform/macros.h"
-#include "tensorflow_serving/core/loader.h"
-#include "tensorflow_serving/core/simple_loader.h"
-#include "tensorflow_serving/core/source_adapter.h"
-#include "tensorflow_serving/core/storage_path.h"
-
-namespace tfs = tensorflow::serving;
-
-namespace nvidia { namespace inferenceserver {
-
-// Adapter that converts storage paths pointing to NetDef files into
-// the corresponding netdef bundle.
-class NetDefBundleSourceAdapter final
-    : public tfs::SimpleLoaderSourceAdapter<tfs::StoragePath, NetDefBundle> {
- public:
-  static tensorflow::Status Create(
-    const NetDefBundleSourceAdapterConfig& config,
-    std::unique_ptr<
-      tfs::SourceAdapter<tfs::StoragePath, std::unique_ptr<tfs::Loader>>>*
-      adapter);
-
-  ~NetDefBundleSourceAdapter() override;
-
- private:
-  TF_DISALLOW_COPY_AND_ASSIGN(NetDefBundleSourceAdapter);
-  using SimpleSourceAdapter =
-    tfs::SimpleLoaderSourceAdapter<tfs::StoragePath, NetDefBundle>;
-
-  NetDefBundleSourceAdapter(
-    const NetDefBundleSourceAdapterConfig& config,
-    typename SimpleSourceAdapter::Creator creator,
-    typename SimpleSourceAdapter::ResourceEstimator resource_estimator)
-      : SimpleSourceAdapter(creator, resource_estimator), config_(config)
-  {
-  }
-
-  const NetDefBundleSourceAdapterConfig config_;
-};
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/servables/caffe2/netdef_bundle_test.cc b/src/servables/caffe2/netdef_bundle_test.cc
deleted file mode 100644
index b3b3f89a8f..0000000000
--- a/src/servables/caffe2/netdef_bundle_test.cc
+++ /dev/null
@@ -1,75 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "src/servables/caffe2/netdef_bundle.h"
-#include "src/core/constants.h"
-#include "src/test/model_config_test_base.h"
-
-namespace nvidia { namespace inferenceserver { namespace test {
-
-class NetDefBundleTest : public ModelConfigTestBase {
- public:
-};
-
-TEST_F(NetDefBundleTest, ModelConfigSanity)
-{
-  BundleInitFunc init_func =
-    [](
-      const std::string& path,
-      const ModelConfig& config) -> tensorflow::Status {
-    std::unique_ptr<NetDefBundle> bundle(new NetDefBundle());
-    tensorflow::Status status = bundle->Init(path, config);
-    if (status.ok()) {
-      std::unordered_map<std::string, std::vector<char>> netdef_blobs;
-
-      for (const auto& filename : std::vector<std::string>{
-             kCaffe2NetDefFilename,
-             std::string(kCaffe2NetDefInitFilenamePrefix) +
-               std::string(kCaffe2NetDefFilename)}) {
-        const auto netdef_path = tensorflow::io::JoinPath(path, filename);
-        tensorflow::string blob_str;
-        tensorflow::ReadFileToString(
-          tensorflow::Env::Default(), netdef_path, &blob_str);
-        std::vector<char> blob(blob_str.begin(), blob_str.end());
-        netdef_blobs.emplace(filename, std::move(blob));
-      }
-
-      status = bundle->CreateExecutionContexts(netdef_blobs);
-    }
-
-    return status;
-  };
-
-  // Standard testing...
-  ValidateAll(kCaffe2NetDefPlatform, init_func);
-
-  // Sanity tests with autofill and not providing the platform.
-  ValidateOne(
-    "inference_server/src/servables/caffe2/testdata/autofill_sanity",
-    true /* autofill */, std::string() /* platform */, init_func);
-}
-
-}}}  // namespace nvidia::inferenceserver::test
diff --git a/src/servables/caffe2/testdata/autofill_sanity/no_name_platform/1/init_model.netdef b/src/servables/caffe2/testdata/autofill_sanity/no_name_platform/1/init_model.netdef
deleted file mode 100644
index 7cde0beefd..0000000000
--- a/src/servables/caffe2/testdata/autofill_sanity/no_name_platform/1/init_model.netdef
+++ /dev/null
@@ -1,2 +0,0 @@
-
-netdef_int32_int32_int32_init
\ No newline at end of file
diff --git a/src/servables/caffe2/testdata/autofill_sanity/no_name_platform/1/model.netdef b/src/servables/caffe2/testdata/autofill_sanity/no_name_platform/1/model.netdef
deleted file mode 100644
index 06876f4c03..0000000000
Binary files a/src/servables/caffe2/testdata/autofill_sanity/no_name_platform/1/model.netdef and /dev/null differ
diff --git a/src/servables/caffe2/testdata/autofill_sanity/no_name_platform/config.pbtxt b/src/servables/caffe2/testdata/autofill_sanity/no_name_platform/config.pbtxt
deleted file mode 100644
index 37f2043ef6..0000000000
--- a/src/servables/caffe2/testdata/autofill_sanity/no_name_platform/config.pbtxt
+++ /dev/null
@@ -1,25 +0,0 @@
-max_batch_size: 8
-input [
-  {
-    name: "INPUT0"
-    data_type: TYPE_INT32
-    dims: [ 16 ]
-  },
-  {
-    name: "INPUT1"
-    data_type: TYPE_INT32
-    dims: [ 16 ]
-  }
-]
-output [
-  {
-    name: "OUTPUT0"
-    data_type: TYPE_INT32
-    dims: [ 16 ]
-  },
-  {
-    name: "OUTPUT1"
-    data_type: TYPE_INT32
-    dims: [ 16 ]
-  }
-]
diff --git a/src/servables/caffe2/testdata/autofill_sanity/no_name_platform/expected b/src/servables/caffe2/testdata/autofill_sanity/no_name_platform/expected
deleted file mode 100644
index cfc0f84300..0000000000
--- a/src/servables/caffe2/testdata/autofill_sanity/no_name_platform/expected
+++ /dev/null
@@ -1,35 +0,0 @@
-name: "no_name_platform"
-platform: "caffe2_netdef"
-version_policy {
-  latest {
-    num_versions: 1
-  }
-}
-max_batch_size: 8
-input {
-  name: "INPUT0"
-  data_type: TYPE_INT32
-  dims: 16
-}
-input {
-  name: "INPUT1"
-  data_type: TYPE_INT32
-  dims: 16
-}
-output {
-  name: "OUTPUT0"
-  data_type: TYPE_INT32
-  dims: 16
-}
-output {
-  name: "OUTPUT1"
-  data_type: TYPE_INT32
-  dims: 16
-}
-instance_group {
-  name: "no_name_platform"
-  count: 1
-  gpus: 0
-  kind: KIND_GPU
-}
-default_model_filename: "model.netdef"
diff --git a/src/servables/tensorflow/BUILD b/src/servables/tensorflow/BUILD
deleted file mode 100644
index 1eddfd62e0..0000000000
--- a/src/servables/tensorflow/BUILD
+++ /dev/null
@@ -1,226 +0,0 @@
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-package(
-    default_visibility = ["//visibility:public"],
-)
-
-load('@tf_serving//tensorflow_serving:serving.bzl', 'serving_proto_library')
-
-serving_proto_library(
-    name = "graphdef_bundle_proto",
-    srcs = ["graphdef_bundle.proto"],
-    cc_api_version = 2,
-    deps = [
-        "@org_tensorflow//tensorflow/core:protos_all_cc",
-        "@protobuf_archive//:cc_wkt_protos",
-    ],
-)
-
-serving_proto_library(
-    name = "savedmodel_bundle_proto",
-    srcs = ["savedmodel_bundle.proto"],
-    cc_api_version = 2,
-    deps = [
-        "@org_tensorflow//tensorflow/core:protos_all_cc",
-        "@protobuf_archive//:cc_wkt_protos",
-    ],
-)
-
-cc_library(
-    name = "autofill",
-    srcs = ["autofill.cc"],
-    hdrs = ["autofill.h"],
-    deps = [
-        ":loader",
-        ":tf_utils",
-        "//src/core:autofill_header",
-        "//src/core:logging",
-        "@org_tensorflow//tensorflow/cc/saved_model:tag_constants",
-        "@org_tensorflow//tensorflow/c:c_api",
-        "@org_tensorflow//tensorflow/core:lib",
-    ],
-)
-
-cc_library(
-    name = "tf_utils",
-    srcs = ["tf_utils.cc"],
-    hdrs = ["tf_utils.h"],
-    deps = [
-        "//src/core:model_config",
-        "//src/core:model_config_proto",
-        "@org_tensorflow//tensorflow/core:lib",
-    ],
-)
-
-cc_library(
-    name = "loader",
-    srcs = ["loader.cc"],
-    hdrs = ["loader.h"],
-    deps = [
-        "//src/core:logging",
-        "@org_tensorflow//tensorflow/c:c_api",
-        "@org_tensorflow//tensorflow/core:lib",
-        "@org_tensorflow//tensorflow/cc/saved_model:tag_constants",
-    ],
-)
-
-cc_library(
-    name = "base_bundle",
-    srcs = ["base_bundle.cc"],
-    hdrs = ["base_bundle.h"],
-    deps = [
-        ":tf_utils",
-        "//src/core:constants",
-        "//src/core:infer",
-        "//src/core:label_provider",
-        "//src/core:logging",
-        "//src/core:model_config_proto",
-        "//src/core:server_status",
-        "//src/core:utils",
-        "@org_tensorflow//tensorflow/c:c_api",
-        "@org_tensorflow//tensorflow/core:lib",
-    ],
-)
-
-cc_library(
-    name = "graphdef_bundle",
-    srcs = ["graphdef_bundle.cc"],
-    hdrs = ["graphdef_bundle.h"],
-    deps = [
-        ":base_bundle",
-        ":tf_utils",
-        "//src/core:model_config",
-    ],
-)
-
-cc_library(
-    name = "graphdef_bundle_source_adapter",
-    srcs = ["graphdef_bundle_source_adapter.cc"],
-    hdrs = ["graphdef_bundle_source_adapter.h"],
-    deps = [
-        ":graphdef_bundle",
-        ":graphdef_bundle_proto",
-        "//src/core:constants",
-        "//src/core:logging",
-        "//src/core:model_config",
-        "//src/core:model_config_proto",
-        "//src/core:utils",
-        "@tf_serving//tensorflow_serving/core:loader",
-        "@tf_serving//tensorflow_serving/core:simple_loader",
-        "@tf_serving//tensorflow_serving/core:source_adapter",
-        "@tf_serving//tensorflow_serving/core:storage_path",
-        "@tf_serving//tensorflow_serving/util:optional",
-        "@org_tensorflow//tensorflow/core:core_cpu",
-        "@org_tensorflow//tensorflow/core:lib",
-    ],
-    alwayslink = 1,
-)
-
-cc_library(
-    name = "savedmodel_bundle",
-    srcs = ["savedmodel_bundle.cc"],
-    hdrs = ["savedmodel_bundle.h"],
-    deps = [
-        ":base_bundle",
-        ":loader",
-        ":tf_utils",
-        "//src/core:model_config",
-    ],
-)
-
-cc_library(
-    name = "savedmodel_bundle_source_adapter",
-    srcs = ["savedmodel_bundle_source_adapter.cc"],
-    hdrs = ["savedmodel_bundle_source_adapter.h"],
-    deps = [
-        ":savedmodel_bundle",
-        ":savedmodel_bundle_proto",
-        "//src/core:constants",
-        "//src/core:logging",
-        "//src/core:model_config",
-        "//src/core:model_config_proto",
-        "//src/core:utils",
-        "@tf_serving//tensorflow_serving/core:loader",
-        "@tf_serving//tensorflow_serving/core:simple_loader",
-        "@tf_serving//tensorflow_serving/core:source_adapter",
-        "@tf_serving//tensorflow_serving/core:storage_path",
-        "@tf_serving//tensorflow_serving/util:optional",
-        "@org_tensorflow//tensorflow/core:core_cpu",
-        "@org_tensorflow//tensorflow/core:lib",
-    ],
-    alwayslink = 1,
-)
-
-filegroup(
-    name = "tf_testdata",
-    srcs = glob(
-        ["testdata/**/*"]),
-)
-
-cc_test(
-    name = "graphdef_bundle_test",
-    srcs = ["graphdef_bundle_test.cc"],
-    linkopts = [
-        "-lnvinfer",
-        "-L/usr/local/cuda/lib64/stubs",
-        "-lnvidia-ml",
-        "-lnvonnxparser_runtime",
-    ],
-    data = [
-         ":tf_testdata"
-    ],
-    deps = [
-        ":graphdef_bundle",
-        "//src/core:constants",
-        "//src/test:model_config_test_base",
-        "//src/test:testmain",
-        "@local_config_cuda//cuda:cudart",
-    ],
-    tags = ["exclusive"],
-)
-
-cc_test(
-    name = "savedmodel_bundle_test",
-    srcs = ["savedmodel_bundle_test.cc"],
-    linkopts = [
-        "-lnvinfer",
-        "-L/usr/local/cuda/lib64/stubs",
-        "-lnvidia-ml",
-        "-lnvonnxparser_runtime",
-    ],
-    data = [
-         ":tf_testdata"
-    ],
-    deps = [
-        ":savedmodel_bundle",
-        "//src/core:constants",
-        "//src/test:model_config_test_base",
-        "//src/test:testmain",
-        "@local_config_cuda//cuda:cudart",
-    ],
-    tags = ["exclusive"],
-)
diff --git a/src/servables/tensorflow/autofill.cc b/src/servables/tensorflow/autofill.cc
deleted file mode 100644
index cd359d5f18..0000000000
--- a/src/servables/tensorflow/autofill.cc
+++ /dev/null
@@ -1,235 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "src/servables/tensorflow/autofill.h"
-
-#include "src/core/constants.h"
-#include "src/core/logging.h"
-#include "src/core/model_config.h"
-#include "src/servables/tensorflow/loader.h"
-#include "src/servables/tensorflow/tf_utils.h"
-#include "tensorflow/core/lib/io/path.h"
-#include "tensorflow/core/platform/env.h"
-
-namespace nvidia { namespace inferenceserver {
-
-//
-// AutoFillSavedModel
-//
-tensorflow::Status
-AutoFillSavedModel::Create(
-  const std::string& model_name, const std::string& model_path,
-  std::unique_ptr<AutoFillSavedModel>* autofill)
-{
-  std::set<std::string> version_dirs;
-  TF_RETURN_IF_ERROR(GetSubdirs(model_path, &version_dirs));
-
-  // There must be at least one version directory that we can inspect
-  // to attempt to determine the platform. For now we only handle the
-  // case where there is one version directory.
-  if (version_dirs.size() != 1) {
-    return tensorflow::errors::Internal(
-      "unable to autofill for '", model_name, "' due to multiple versions");
-  }
-
-  const auto version_path =
-    tensorflow::io::JoinPath(model_path, *(version_dirs.begin()));
-
-  // There must be a single savedmodel directory within the version
-  // directory...
-  std::set<std::string> savedmodel_dirs;
-  TF_RETURN_IF_ERROR(GetSubdirs(version_path, &savedmodel_dirs));
-  if (savedmodel_dirs.size() != 1) {
-    return tensorflow::errors::Internal(
-      "unable to autofill for '", model_name,
-      "', unable to find savedmodel directory");
-  }
-
-  const std::string savedmodel_dir = *(savedmodel_dirs.begin());
-  const auto savedmodel_path =
-    tensorflow::io::JoinPath(version_path, savedmodel_dir);
-
-  std::unique_ptr<tensorflow::SavedModelBundle> bundle;
-  tensorflow::SessionOptions session_options;
-  tensorflow::SignatureDef sig;
-  TF_RETURN_IF_ERROR(LoadSavedModel(
-    model_name, savedmodel_path, session_options, &bundle, &sig));
-
-  autofill->reset(new AutoFillSavedModel(model_name, savedmodel_dir, sig));
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-AutoFillSavedModel::Fix(ModelConfig* config)
-{
-  config->set_platform(kTensorFlowSavedModelPlatform);
-
-  if (config->name().empty()) {
-    config->set_name(model_name_);
-  }
-
-  if (config->default_model_filename().empty()) {
-    config->set_default_model_filename(savedmodel_dirname_);
-  }
-
-  // Assume model doesn't support batching unless we see a batch
-  // dimension in the input or output.
-  bool supports_batch = false;
-
-  // Inputs
-  if (config->input().size() == 0) {
-    for (const auto& sin : sig_.inputs()) {
-      ModelInput* config_input = config->add_input();
-      config_input->set_name(sin.first);
-
-      const DataType dt = ConvertDataType(sin.second.dtype());
-      if (dt == DataType::TYPE_INVALID) {
-        return tensorflow::errors::Internal(
-          "unable to autofill for '", model_name_, "', unsupported data-type '",
-          tensorflow::DataType_Name(sin.second.dtype()), "'");
-      }
-
-      config_input->set_data_type(dt);
-
-      // The first model dimension can be -1 to serve as a placeholder
-      // for batch. This batch dim doesn't appear in the model
-      // configuration 'dims' that we are creating.
-      const tensorflow::TensorShapeProto& shape = sin.second.tensor_shape();
-      const bool has_batch_dim =
-        (shape.dim().size() >= 1) && (shape.dim(0).size() == -1);
-
-      for (int i = (has_batch_dim ? 1 : 0); i < shape.dim().size(); ++i) {
-        config_input->mutable_dims()->Add(shape.dim(i).size());
-      }
-
-      supports_batch |= has_batch_dim;
-    }
-  }
-
-  // Outputs
-  if (config->output().size() == 0) {
-    for (const auto& sout : sig_.outputs()) {
-      ModelOutput* config_output = config->add_output();
-      config_output->set_name(sout.first);
-
-      const DataType dt = ConvertDataType(sout.second.dtype());
-      if (dt == DataType::TYPE_INVALID) {
-        return tensorflow::errors::Internal(
-          "unable to autofill for '", model_name_, "', unsupported data-type '",
-          tensorflow::DataType_Name(sout.second.dtype()), "'");
-      }
-
-      config_output->set_data_type(dt);
-
-      // The first model dimension can be -1 to serve as a placeholder
-      // for batch. The batch dim doesn't appear in the model
-      // configuration 'dims' that we are creating.
-      const tensorflow::TensorShapeProto& shape = sout.second.tensor_shape();
-      const bool has_batch_dim =
-        (shape.dim().size() >= 1) && (shape.dim(0).size() == -1);
-
-      for (int i = (has_batch_dim ? 1 : 0); i < shape.dim().size(); ++i) {
-        config_output->mutable_dims()->Add(shape.dim(i).size());
-      }
-
-      supports_batch |= has_batch_dim;
-    }
-  }
-
-  // Set max-batch-size to 1 if the model supports it and it is not
-  // already set.
-  if (supports_batch && (config->max_batch_size() == 0)) {
-    config->set_max_batch_size(1);
-  }
-
-  return tensorflow::Status::OK();
-}
-
-//
-// AutoFillGraphDef
-//
-tensorflow::Status
-AutoFillGraphDef::Create(
-  const std::string& model_name, const std::string& model_path,
-  std::unique_ptr<AutoFillGraphDef>* autofill)
-{
-  std::set<std::string> version_dirs;
-  TF_RETURN_IF_ERROR(GetSubdirs(model_path, &version_dirs));
-
-  // There must be at least one version directory that we can inspect
-  // to attempt to determine the platform. For now we only handle the
-  // case where there is one version directory.
-  if (version_dirs.size() != 1) {
-    return tensorflow::errors::Internal(
-      "unable to autofill for '", model_name, "' due to multiple versions");
-  }
-
-  const auto version_path =
-    tensorflow::io::JoinPath(model_path, *(version_dirs.begin()));
-
-  // There must be a single graphdef file within the version
-  // directory...
-  std::set<std::string> graphdef_files;
-  TF_RETURN_IF_ERROR(GetFiles(version_path, &graphdef_files));
-  if (graphdef_files.size() != 1) {
-    return tensorflow::errors::Internal(
-      "unable to autofill for '", model_name,
-      "', unable to find graphdef file");
-  }
-
-  const std::string graphdef_file = *(graphdef_files.begin());
-  const auto graphdef_path =
-    tensorflow::io::JoinPath(version_path, graphdef_file);
-
-  // If find a file named with the default graphdef name then assume
-  // it is a graphdef. We could be smarter here and try to parse to
-  // see if it really is a graphdef. We could also guess thae
-  // placeholders are inputs... but we have no way to know what the
-  // outputs are.
-  if (graphdef_file != kTensorFlowGraphDefFilename) {
-    return tensorflow::errors::Internal(
-      "unable to autofill for '", model_name,
-      "', unable to find graphdef file named '", kTensorFlowGraphDefFilename,
-      "'");
-  }
-
-  autofill->reset(new AutoFillGraphDef(model_name));
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-AutoFillGraphDef::Fix(ModelConfig* config)
-{
-  config->set_platform(kTensorFlowGraphDefPlatform);
-
-  if (config->name().empty()) {
-    config->set_name(model_name_);
-  }
-
-  return tensorflow::Status::OK();
-}
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/servables/tensorflow/autofill.h b/src/servables/tensorflow/autofill.h
deleted file mode 100644
index 85a35376c1..0000000000
--- a/src/servables/tensorflow/autofill.h
+++ /dev/null
@@ -1,74 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <string>
-#include "src/core/autofill.h"
-#include "src/core/model_config.pb.h"
-#include "tensorflow/c/c_api.h"
-#include "tensorflow/cc/saved_model/loader.h"
-#include "tensorflow/cc/saved_model/tag_constants.h"
-#include "tensorflow/core/lib/core/errors.h"
-
-namespace nvidia { namespace inferenceserver {
-
-//
-// AutoFillSavedModel
-//
-class AutoFillSavedModel : public AutoFill {
- public:
-  static tensorflow::Status Create(
-    const std::string& model_name, const std::string& model_path,
-    std::unique_ptr<AutoFillSavedModel>* autofill);
-  tensorflow::Status Fix(ModelConfig* config) override;
-
- private:
-  AutoFillSavedModel(
-    const std::string& model_name, const std::string& savedmodel_dirname,
-    const tensorflow::SignatureDef& sig)
-      : AutoFill(model_name), savedmodel_dirname_(savedmodel_dirname), sig_(sig)
-  {
-  }
-
-  const std::string savedmodel_dirname_;
-  const tensorflow::SignatureDef sig_;
-};
-
-//
-// AutoFillGraphDef
-//
-class AutoFillGraphDef : public AutoFill {
- public:
-  static tensorflow::Status Create(
-    const std::string& model_name, const std::string& model_path,
-    std::unique_ptr<AutoFillGraphDef>* autofill);
-  tensorflow::Status Fix(ModelConfig* config) override;
-
- private:
-  AutoFillGraphDef(const std::string& model_name) : AutoFill(model_name) {}
-};
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/servables/tensorflow/base_bundle.cc b/src/servables/tensorflow/base_bundle.cc
deleted file mode 100644
index 870c8011b6..0000000000
--- a/src/servables/tensorflow/base_bundle.cc
+++ /dev/null
@@ -1,627 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "src/servables/tensorflow/base_bundle.h"
-
-#include <cuda_runtime_api.h>
-#include <set>
-#include "src/core/constants.h"
-#include "src/core/logging.h"
-#include "src/core/model_config.pb.h"
-#include "src/core/server_status.h"
-#include "src/core/utils.h"
-#include "src/servables/tensorflow/tf_utils.h"
-#include "tensorflow/c/c_api.h"
-#include "tensorflow/core/framework/types.pb.h"
-#include "tensorflow/core/graph/default_device.h"
-#include "tensorflow/core/lib/io/path.h"
-
-namespace nvidia { namespace inferenceserver {
-
-BaseBundle::Context::Context(
-  const std::string& name, const int gpu_device, const int max_batch_size)
-    : name_(name), gpu_device_(gpu_device), max_batch_size_(max_batch_size),
-      session_(nullptr)
-{
-}
-
-BaseBundle::Context::Context(Context&& o)
-    : name_(std::move(o.name_)), gpu_device_(o.gpu_device_),
-      max_batch_size_(o.max_batch_size_),
-      input_name_map_(std::move(o.input_name_map_)),
-      output_name_map_(std::move(o.output_name_map_)),
-      inputs_(std::move(o.inputs_)), outputs_(std::move(o.outputs_)),
-      session_(o.session_)
-{
-  o.gpu_device_ = NO_GPU_DEVICE;
-  o.max_batch_size_ = NO_BATCHING;
-  o.session_ = nullptr;
-}
-
-BaseBundle::Context::~Context()
-{
-  LOG_VERBOSE(1) << "~BaseBundle::Context ";
-
-  if (session_ != nullptr) {
-    session_->Close().IgnoreError();
-    session_ = nullptr;
-  }
-}
-
-tensorflow::Status
-BaseBundle::Init(const tensorflow::StringPiece& path, const ModelConfig& config)
-{
-  TF_RETURN_IF_ERROR(SetModelConfig(path, config));
-
-  // Initialize the datatype map and label provider for each output
-  const auto model_dir = tensorflow::io::Dirname(path);
-  for (const auto& io : config.output()) {
-    output_dtype_map_.insert(std::make_pair(io.name(), io.data_type()));
-
-    if (!io.label_filename().empty()) {
-      const auto label_path =
-        tensorflow::io::JoinPath(model_dir, io.label_filename());
-      TF_RETURN_IF_ERROR(label_provider_.AddLabels(io.name(), label_path));
-    }
-  }
-
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-BaseBundle::CreateExecutionContexts(
-  const tensorflow::ConfigProto& session_config,
-  const std::unordered_map<std::string, std::string>& paths)
-{
-  if (LOG_VERBOSE_IS_ON(1)) {
-    LOG_INFO << "Creating execution contexts for:";
-    for (const auto p : paths) {
-      LOG_INFO << "  " << p.first << ": " << p.second;
-    }
-  }
-
-  uint32_t total_context_cnt = 0;
-
-  for (const auto& group : Config().instance_group()) {
-    for (int c = 0; c < group.count(); c++) {
-      if (group.kind() == ModelInstanceGroup::KIND_CPU) {
-        const std::string instance_name =
-          group.name() + "_" + std::to_string(c) + "_cpu";
-        TF_RETURN_IF_ERROR(CreateExecutionContext(
-          instance_name, Context::NO_GPU_DEVICE, session_config, paths));
-      } else {
-        for (int gpu_device : group.gpus()) {
-          const std::string instance_name = group.name() + "_" +
-                                            std::to_string(c) + "_gpu" +
-                                            std::to_string(gpu_device);
-          TF_RETURN_IF_ERROR(CreateExecutionContext(
-            instance_name, gpu_device, session_config, paths));
-        }
-      }
-
-      total_context_cnt++;
-    }
-  }
-
-  // Create one runner for each context available for this
-  // model. Each runner is exclusively tied to the context.
-  TF_RETURN_IF_ERROR(SetRunnerCount(total_context_cnt));
-
-  LOG_VERBOSE(1) << "bundle for " << Name() << std::endl << *this;
-
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-BaseBundle::CreateExecutionContext(
-  const std::string& instance_name, const int gpu_device,
-  const tensorflow::ConfigProto& session_config,
-  const std::unordered_map<std::string, std::string>& paths)
-{
-  // For a GPU context, determine the model file to use for device
-  // compute capability. CPU always uses the default model file.
-  std::string cc_model_filename;
-  if (gpu_device == Context::NO_GPU_DEVICE) {
-    cc_model_filename = Config().default_model_filename();
-
-    LOG_INFO << "Creating instance " << instance_name << " on CPU using "
-             << cc_model_filename;
-  } else {
-    cudaDeviceProp cuprops;
-    cudaError_t cuerr = cudaGetDeviceProperties(&cuprops, gpu_device);
-    if (cuerr != cudaSuccess) {
-      return tensorflow::errors::Internal(
-        "unable to get CUDA device properties for ", Name(), ": ",
-        cudaGetErrorString(cuerr));
-    }
-
-    const std::string cc =
-      std::to_string(cuprops.major) + "." + std::to_string(cuprops.minor);
-    const auto& cc_itr = Config().cc_model_filenames().find(cc);
-    cc_model_filename = (cc_itr == Config().cc_model_filenames().end())
-                          ? Config().default_model_filename()
-                          : cc_itr->second;
-
-    LOG_INFO << "Creating instance " << instance_name << " on GPU "
-             << gpu_device << " (" << cc << ") using " << cc_model_filename;
-  }
-
-  const auto& gdp_itr = paths.find(cc_model_filename);
-  if (gdp_itr == paths.end()) {
-    return tensorflow::errors::Internal(
-      "unable to find model '", cc_model_filename, "' for ", Name());
-  }
-
-  // Max batch size. A value of 0 in the config becomes NO_BATCHING.
-  const int mbs = (Config().max_batch_size() <= 0) ? Context::NO_BATCHING
-                                                   : Config().max_batch_size();
-
-  contexts_.emplace_back(instance_name, gpu_device, mbs);
-  Context& context = contexts_.back();
-
-  // Session GPU option visible_device_list does not work (see
-  // https://github.com/tensorflow/tensorflow/issues/8136 and many
-  // related issues), so we can't use it here to set the GPU (see
-  // CreateSession implementations for SetDefaultDevice). [DLIS-43]
-  tensorflow::SessionOptions options;
-  options.config = session_config;
-
-  // Enable/disable XLA based on the model config optimization
-  // setting.
-  tensorflow::OptimizerOptions::GlobalJitLevel xla =
-    tensorflow::OptimizerOptions::DEFAULT;
-  if (Config().optimization().has_graph()) {
-    if (Config().optimization().graph().level() == -1) {
-      xla = tensorflow::OptimizerOptions::OFF;
-    } else if (Config().optimization().graph().level() == 1) {
-      xla = tensorflow::OptimizerOptions::ON_1;
-    } else if (Config().optimization().graph().level() > 1) {
-      xla = tensorflow::OptimizerOptions::ON_2;
-    }
-  }
-
-  options.config.mutable_graph_options()
-    ->mutable_optimizer_options()
-    ->set_global_jit_level(xla);
-
-  TF_RETURN_IF_ERROR(CreateSession(
-    options, gpu_device, gdp_itr->second, &context.session_,
-    &context.input_name_map_, &context.output_name_map_));
-
-  // Initialize an appropriately sized Tensor for each input and
-  // output.
-  TF_RETURN_IF_ERROR(context.InitializeInputs(Config().input()));
-  TF_RETURN_IF_ERROR(context.InitializeOutputs(Config().output()));
-
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-BaseBundle::Context::InitializeInputs(
-  const ::google::protobuf::RepeatedPtrField<ModelInput>& ios)
-{
-  for (const auto& io : ios) {
-    tensorflow::TensorShape shape;
-    for (int d = 0; d < io.dims_size(); ++d) {
-      shape.AddDim(io.dims(d));
-    }
-
-    tensorflow::DataType dtype = ConvertDataType(io.data_type());
-    inputs_.insert({io.name(), tensorflow::Tensor(dtype, shape)});
-  }
-
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-BaseBundle::Context::InitializeOutputs(
-  const ::google::protobuf::RepeatedPtrField<ModelOutput>& ios)
-{
-  for (const auto& io : ios) {
-    tensorflow::TensorShape shape;
-    for (int d = 0; d < io.dims_size(); ++d) {
-      shape.AddDim(io.dims(d));
-    }
-
-    tensorflow::DataType dtype = ConvertDataType(io.data_type());
-    outputs_.insert({io.name(), tensorflow::Tensor(dtype, shape)});
-  }
-
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-BaseBundle::GetOutputDataType(const std::string& name, DataType* dtype) const
-{
-  const auto itr = output_dtype_map_.find(name);
-  if (itr == output_dtype_map_.end()) {
-    return tensorflow::errors::Internal(
-      "unable to find datatype for output '", name, "'");
-  }
-
-  *dtype = itr->second;
-  return tensorflow::Status::OK();
-}
-
-void
-BaseBundle::Run(
-  uint32_t runner_idx, std::vector<RunnerPayload>* payloads,
-  std::function<void(tensorflow::Status)> OnCompleteQueuedPayloads)
-{
-  // Each runner executes using the corresponding context...
-  if (runner_idx >= contexts_.size()) {
-    OnCompleteQueuedPayloads(tensorflow::errors::Internal(
-      "unexpected runner index", runner_idx, ", max allowed ",
-      contexts_.size()));
-    return;
-  }
-
-  std::vector<ModelInferStats::ScopedTimer> compute_timers;
-  for (auto& payload : *payloads) {
-    compute_timers.emplace_back();
-    payload.stats_->StartComputeTimer(&compute_timers.back());
-    payload.stats_->SetGPUDevice(contexts_[runner_idx].gpu_device_);
-  }
-
-  OnCompleteQueuedPayloads(contexts_[runner_idx].Run(payloads));
-}
-
-tensorflow::Status
-BaseBundle::Context::Run(std::vector<RunnerPayload>* payloads)
-{
-  LOG_VERBOSE(1) << "Running " << name_ << " with " << payloads->size()
-                 << " request payloads";
-
-  // For each request in 'payloads' make sure the inputs are correct
-  // and collect up the total batch size for this inference execution.
-  size_t total_batch_size = 0;
-  for (auto& payload : *payloads) {
-    const InferRequestHeader& request_header =
-      payload.request_provider_->RequestHeader();
-
-    if ((size_t)request_header.input().size() != inputs_.size()) {
-      payload.status_ = tensorflow::errors::InvalidArgument(
-        "expected ", inputs_.size(), " inputs but got ",
-        request_header.input().size());
-      continue;
-    }
-
-    // For models that don't support batching (i.e. max_batch_size_ ==
-    // 0) the request batch-size will still be 1.
-    const size_t batch_size = request_header.batch_size();
-    if ((batch_size != 1) && ((int)batch_size > max_batch_size_)) {
-      payload.status_ = tensorflow::errors::InvalidArgument(
-        "unexpected batch size ", batch_size, " for '", name_,
-        "', max allowed is ", max_batch_size_);
-      continue;
-    }
-
-    // Validate that all inputs are expected and of the correct size.
-    for (const auto& input : request_header.input()) {
-      const std::string& name = input.name();
-
-      const auto& ii_iter = inputs_.find(name);
-      if (ii_iter == inputs_.end()) {
-        payload.status_ = tensorflow::errors::InvalidArgument(
-          "unexpected inference input '", name, "' for '", name_, "'");
-        break;
-      }
-
-      const tensorflow::Tensor& tensor = ii_iter->second;
-      const size_t expected_byte_size =
-        tensor.NumElements() * tensorflow::DataTypeSize(tensor.dtype());
-      if (input.byte_size() != expected_byte_size) {
-        payload.status_ = tensorflow::errors::InvalidArgument(
-          "unexpected size ", input.byte_size(), " for inference input '", name,
-          "', expecting ", expected_byte_size);
-        break;
-      }
-    }
-
-    if (!payload.status_.ok()) {
-      continue;
-    }
-
-    total_batch_size += batch_size;
-  }
-
-  // If there are no valid payloads then no need to run the
-  // inference. The payloads will have their error status set so can
-  // just return.
-  if (total_batch_size == 0) {
-    return tensorflow::Status::OK();
-  }
-
-  // total_batch_size can be 1 for models that don't support batching
-  // (i.e. max_batch_size_ == 0).
-  if ((total_batch_size != 1) && (total_batch_size > (size_t)max_batch_size_)) {
-    return tensorflow::errors::Internal(
-      "dynamic batch size ", total_batch_size, " for '", name_,
-      "', max allowed is ", max_batch_size_);
-  }
-
-  // Create a tensor for each input, sized correctly for the total
-  // payload batch size. Concatenate input values from each payload
-  // into the corresponding tensor.
-  using TensorVec = std::vector<std::pair<std::string, tensorflow::Tensor>>;
-  TensorVec input_tensors;
-  for (const auto& ipair : inputs_) {
-    const std::string& name = ipair.first;
-    const tensorflow::Tensor& batch1_tensor = ipair.second;
-    tensorflow::TensorShape shape = ipair.second.shape();
-    const size_t batch1_byte_size =
-      batch1_tensor.NumElements() *
-      tensorflow::DataTypeSize(batch1_tensor.dtype());
-
-    // If model supports batching then prepend the batch dimension
-    // onto the input shape.
-    if (max_batch_size_ != NO_BATCHING) {
-      shape.InsertDim(0, total_batch_size);
-    }
-
-    const std::string* input_tensor_name = &name;
-    const auto& tn_itr = input_name_map_.find(name);
-    if (tn_itr != input_name_map_.end()) {
-      input_tensor_name = &tn_itr->second;
-    }
-
-    input_tensors.emplace_back(std::make_pair(
-      *input_tensor_name, tensorflow::Tensor(ipair.second.dtype(), shape)));
-    tensorflow::Tensor& tensor = input_tensors.back().second;
-    auto flat = tensor.bit_casted_shaped<char, 1>(
-      {tensor.NumElements() * tensorflow::DataTypeSize(tensor.dtype())});
-    size_t tensor_copy_offset = 0;
-
-    // Visit the payloads in order and copy the input tensors to
-    // GPU. Skip payloads that had errors since they are not included
-    // in the dynamic batch.
-    for (auto& payload : *payloads) {
-      if (!payload.status_.ok()) {
-        continue;
-      }
-
-      const InferRequestHeader& request_header =
-        payload.request_provider_->RequestHeader();
-      const size_t expected_byte_size =
-        request_header.batch_size() * batch1_byte_size;
-
-      int input_idx = 0;
-      for (const auto& input : request_header.input()) {
-        if (input.name() == name) {
-          size_t copied_byte_size = 0;
-          while (payload.compute_status_.ok()) {
-            const void* content;
-            size_t content_byte_size;
-            payload.compute_status_ =
-              payload.request_provider_->GetNextInputContent(
-                input_idx, &content, &content_byte_size, false);
-            if (!payload.compute_status_.ok()) {
-              break;
-            }
-
-            // No more input content available then done with copying...
-            if (content == nullptr) {
-              break;
-            }
-
-            if (
-              (tensor_copy_offset + copied_byte_size + content_byte_size) >
-              ((size_t)flat.size())) {
-              payload.compute_status_ = tensorflow::errors::InvalidArgument(
-                "unexpected size ",
-                tensor_copy_offset + copied_byte_size + content_byte_size,
-                " for inference input '", name, "', expecting ", flat.size());
-              break;
-            }
-
-            memcpy(
-              static_cast<char*>(flat.data()) + tensor_copy_offset +
-                copied_byte_size,
-              content, content_byte_size);
-            copied_byte_size += content_byte_size;
-          }
-
-          if (
-            payload.compute_status_.ok() &&
-            (copied_byte_size != expected_byte_size)) {
-            payload.compute_status_ = tensorflow::errors::Internal(
-              "expected ", expected_byte_size, " of data for inference input '",
-              name, "', got ", copied_byte_size);
-          }
-
-          break;
-        }
-
-        input_idx++;
-      }
-
-      tensor_copy_offset += expected_byte_size;
-    }
-  }
-
-  // Collect the names of outputs requested by any request
-  // payload. Skip payloads that have an error.
-  std::unordered_map<std::string, uint64_t> required_outputs;
-  for (auto& payload : *payloads) {
-    if (!payload.status_.ok()) {
-      continue;
-    }
-
-    const InferRequestHeader& request_header =
-      payload.request_provider_->RequestHeader();
-    for (const auto& output : request_header.output()) {
-      required_outputs.insert(
-        std::make_pair(output.name(), output.byte_size()));
-    }
-  }
-
-  // Create the vector of required output names.
-  std::vector<std::string> output_names;
-  for (const auto& opair : outputs_) {
-    const std::string& name = opair.first;
-    const auto& ritr = required_outputs.find(name);
-    if (ritr == required_outputs.end()) {
-      continue;
-    }
-
-    const auto& tn_itr = output_name_map_.find(name);
-    if (tn_itr == output_name_map_.end()) {
-      output_names.push_back(name);
-    } else {
-      output_names.push_back(tn_itr->second);
-    }
-  }
-
-  // Run. Session will update the 'outputs'.
-  std::vector<tensorflow::Tensor> outputs;
-  TF_RETURN_IF_ERROR(session_->Run(input_tensors, output_names, {}, &outputs));
-
-  // Make sure each output is of the expected size and copy it into
-  // the appropriate response providers.
-  int output_idx = 0;
-  for (const auto& opair : outputs_) {
-    const std::string& name = opair.first;
-    const auto& ritr = required_outputs.find(name);
-    if (ritr == required_outputs.end()) {
-      continue;
-    }
-
-    const tensorflow::Tensor& expected_template = opair.second;
-    const size_t batch1_byte_size =
-      expected_template.NumElements() *
-      tensorflow::DataTypeSize(expected_template.dtype());
-
-    // Use the output template and fix the shape based on the batch
-    // size of the request.
-    tensorflow::TensorShape shape = expected_template.shape();
-    if (max_batch_size_ != NO_BATCHING) {
-      shape.InsertDim(0, total_batch_size);
-    }
-    tensorflow::Tensor expected(expected_template.dtype(), shape);
-
-    if (expected.dtype() != outputs[output_idx].dtype()) {
-      return tensorflow::errors::InvalidArgument(
-        "unexpected datatype ", outputs[output_idx].dtype(),
-        " for inference output '", name, "', expecting ", expected.dtype());
-    }
-
-    if (expected.shape() != outputs[output_idx].shape()) {
-      return tensorflow::errors::InvalidArgument(
-        "unexpected shape ", outputs[output_idx].shape().DebugString(),
-        " for inference output '", name, "', expecting ",
-        expected.shape().DebugString());
-    }
-
-    const auto& flat = outputs[output_idx].bit_casted_shaped<char, 1>(
-      {outputs[output_idx].NumElements() *
-       tensorflow::DataTypeSize(outputs[output_idx].dtype())});
-    size_t tensor_copy_offset = 0;
-
-    for (auto& payload : *payloads) {
-      if (!payload.status_.ok()) {
-        continue;
-      }
-
-      // If 'payload' requested this output then copy it from the
-      // GPU. If it did not request this output then just skip it in
-      // the output buffer.
-      const InferRequestHeader& request_header =
-        payload.request_provider_->RequestHeader();
-      const size_t expected_byte_size =
-        request_header.batch_size() * batch1_byte_size;
-
-      int req_output_idx = 0;
-      for (const auto& output : request_header.output()) {
-        if (output.name() == name) {
-          void* content;
-          tensorflow::Status status =
-            payload.response_provider_->GetOutputBuffer(
-              req_output_idx, &content, expected_byte_size);
-          if (!status.ok()) {
-            payload.compute_status_ = status;
-          } else if (content == nullptr) {
-            payload.compute_status_ = tensorflow::errors::Internal(
-              "no buffer to accept output values for output '", name, "'");
-          } else {
-            if (
-              (tensor_copy_offset + expected_byte_size) >
-              ((size_t)flat.size())) {
-              payload.compute_status_ = tensorflow::errors::InvalidArgument(
-                "unexpected size ", tensor_copy_offset + expected_byte_size,
-                " for inference output '", name, "', expecting ", flat.size());
-            } else {
-              memcpy(
-                content, static_cast<char*>(flat.data()) + tensor_copy_offset,
-                expected_byte_size);
-            }
-          }
-
-          break;
-        }
-
-        req_output_idx++;
-      }
-
-      tensor_copy_offset += expected_byte_size;
-    }
-
-    output_idx++;
-  }
-
-  return tensorflow::Status::OK();
-}
-
-std::ostream&
-operator<<(std::ostream& out, const BaseBundle& pb)
-{
-  out << "name=" << pb.Name() << std::endl;
-  out << "contexts:" << std::endl;
-  for (const auto& context : pb.contexts_) {
-    out << "  name=" << context.name_ << ", gpu="
-        << ((context.gpu_device_ == BaseBundle::Context::NO_GPU_DEVICE)
-              ? "<none>"
-              : std::to_string(context.gpu_device_))
-        << ", max_batch_size="
-        << ((context.max_batch_size_ == BaseBundle::Context::NO_BATCHING)
-              ? "<none>"
-              : std::to_string(context.max_batch_size_))
-        << std::endl
-        << "  inputs:" << std::endl;
-    for (const auto& inp : context.inputs_) {
-      out << "    name=" << inp.first << " " << inp.second.DebugString()
-          << std::endl;
-    }
-    out << "  inputs:" << std::endl;
-    for (const auto& outp : context.outputs_) {
-      out << "    name=" << outp.first << " " << outp.second.DebugString()
-          << std::endl;
-    }
-  }
-
-  return out;
-}
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/servables/tensorflow/base_bundle.h b/src/servables/tensorflow/base_bundle.h
deleted file mode 100644
index 9315e13d47..0000000000
--- a/src/servables/tensorflow/base_bundle.h
+++ /dev/null
@@ -1,152 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "src/core/infer.h"
-#include "src/core/label_provider.h"
-#include "src/core/model_config.pb.h"
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/public/session.h"
-
-namespace nvidia { namespace inferenceserver {
-
-// Base for both GraphDef and SavedModel servables
-class BaseBundle : public InferenceServable {
- public:
-  BaseBundle() = default;
-  BaseBundle(BaseBundle&&) = default;
-
-  tensorflow::Status Init(
-    const tensorflow::StringPiece& path, const ModelConfig& config);
-
-  // Create a context for execution for each instance of the
-  // tensorflow model specified in 'paths'. The model can be either a
-  // graphdef or savedmodel
-  tensorflow::Status CreateExecutionContexts(
-    const tensorflow::ConfigProto& session_config,
-    const std::unordered_map<std::string, std::string>& paths);
-  tensorflow::Status CreateExecutionContext(
-    const std::string& instance_name, const int gpu_device,
-    const tensorflow::ConfigProto& session_config,
-    const std::unordered_map<std::string, std::string>& paths);
-
-  tensorflow::Status GetOutputDataType(
-    const std::string& name, DataType* dtype) const override;
-  const LabelProvider& GetLabelProvider() const override
-  {
-    return label_provider_;
-  }
-
- protected:
-  void Run(
-    uint32_t runner_idx, std::vector<RunnerPayload>* payloads,
-    std::function<void(tensorflow::Status)> OnCompleteQueuedPayloads) override;
-
-  using IONameMap = std::unordered_map<std::string, std::string>;
-
-  // Load model and create a corresponding session object.
-  virtual tensorflow::Status CreateSession(
-    const tensorflow::SessionOptions& options, const int gpu_device,
-    const std::string& model_path, tensorflow::Session** session,
-    IONameMap* input_name_map, IONameMap* output_name_map) = 0;
-
-  // For each model instance there is a context.
-  struct Context {
-    using TensorMap = std::unordered_map<std::string, tensorflow::Tensor>;
-
-    // GPU device number that indicates that no gpu is available for a
-    // context.
-    static constexpr int NO_GPU_DEVICE = -1;
-
-    // Max batch size value that indicates batching is not supported.
-    static constexpr int NO_BATCHING = 0;
-
-    Context(
-      const std::string& name, const int gpu_device, const int max_batch_size);
-    Context(Context&& o);
-    ~Context();
-
-    TF_DISALLOW_COPY_AND_ASSIGN(Context);
-
-    tensorflow::Status InitializeInputs(
-      const ::google::protobuf::RepeatedPtrField<ModelInput>& ios);
-    tensorflow::Status InitializeOutputs(
-      const ::google::protobuf::RepeatedPtrField<ModelOutput>& ios);
-
-    // Run model to execute for one or more requests. This function
-    // assumes that it is only called by the single runner thread that
-    // is assigned to this context. A non-OK return status indicates
-    // an internal error that prevents any of the of requests from
-    // completing. If an error is isolate to a single request payload
-    // it will be reported in that payload.
-    tensorflow::Status Run(std::vector<RunnerPayload>* payloads);
-
-    // Name of the model instance
-    std::string name_;
-
-    // The GPU index active when this context was created.
-    int gpu_device_;
-
-    // Maximum batch size to allow. NO_BATCHING indicates that
-    // batching is not supported.
-    int max_batch_size_;
-
-    // Map from configuration name for an input to tensor name for
-    // that input in the model.
-    IONameMap input_name_map_;
-
-    // Map from configuration name for an output to tensor name for
-    // that output in the model.
-    IONameMap output_name_map_;
-
-    // The input and output tensors. These are not used as actual
-    // inputs and outputs to the model (those are created dynamically
-    // for each run) but simply to hold information about each tensor
-    // (data-type, shape).
-    TensorMap inputs_;
-    TensorMap outputs_;
-
-    // Tensorflow session for this context.
-    tensorflow::Session* session_;
-  };
-
- private:
-  TF_DISALLOW_COPY_AND_ASSIGN(BaseBundle);
-  friend std::ostream& operator<<(std::ostream&, const BaseBundle&);
-
-  // Label provider for this bundle.
-  LabelProvider label_provider_;
-
-  // Map from an output name to the datatype of that output.
-  std::unordered_map<std::string, DataType> output_dtype_map_;
-
-  // The contexts for this servable.
-  std::vector<Context> contexts_;
-};
-
-std::ostream& operator<<(std::ostream& out, const BaseBundle& pb);
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/servables/tensorflow/graphdef_bundle.cc b/src/servables/tensorflow/graphdef_bundle.cc
deleted file mode 100644
index 826a5afe53..0000000000
--- a/src/servables/tensorflow/graphdef_bundle.cc
+++ /dev/null
@@ -1,131 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "src/servables/tensorflow/graphdef_bundle.h"
-
-#include <set>
-#include "src/core/constants.h"
-#include "src/core/model_config.h"
-#include "src/core/utils.h"
-#include "src/servables/tensorflow/tf_utils.h"
-#include "tensorflow/c/c_api.h"
-#include "tensorflow/core/graph/default_device.h"
-#include "tensorflow/core/lib/io/path.h"
-
-namespace nvidia { namespace inferenceserver {
-
-namespace {
-
-tensorflow::Status
-ReadBinaryProto(const std::string& filename, google::protobuf::MessageLite* msg)
-{
-  tensorflow::string msg_str;
-  TF_RETURN_IF_ERROR(tensorflow::ReadFileToString(
-    tensorflow::Env::Default(), filename, &msg_str));
-
-  google::protobuf::io::CodedInputStream coded_stream(
-    reinterpret_cast<const uint8_t*>(msg_str.c_str()), msg_str.size());
-  coded_stream.SetTotalBytesLimit(INT_MAX, INT_MAX);
-  if (!msg->ParseFromCodedStream(&coded_stream)) {
-    return tensorflow::errors::DataLoss(
-      "Can't parse ", filename, " as binary proto");
-  }
-
-  return tensorflow::Status::OK();
-}
-
-}  // namespace
-
-tensorflow::Status
-GraphDefBundle::Init(
-  const tensorflow::StringPiece& path, const ModelConfig& config)
-{
-  TF_RETURN_IF_ERROR(ValidateModelConfig(config, kTensorFlowGraphDefPlatform));
-  TF_RETURN_IF_ERROR(BaseBundle::Init(path, config));
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-GraphDefBundle::CreateSession(
-  const tensorflow::SessionOptions& options, const int gpu_device,
-  const std::string& model_path, tensorflow::Session** session,
-  IONameMap* input_name_map, IONameMap* output_name_map)
-{
-  TF_RETURN_IF_ERROR(tensorflow::NewSession(options, session));
-
-  tensorflow::GraphDef graph_def;
-  TF_RETURN_IF_ERROR(ReadBinaryProto(model_path, &graph_def));
-  if (graph_def.node_size() == 0) {
-    return tensorflow::errors::InvalidArgument(
-      "model ", Name(), " has an empty network");
-  }
-
-  // Set the default device to control the CPU/GPU that the graph runs
-  // on. This isn't foolproof since individual operations in the graph
-  // could specify a specific run location. But given that
-  // visible_device_list doesn't work it seems like the only option we
-  // have. [DLIS-43]
-  if (gpu_device == Context::NO_GPU_DEVICE) {
-    tensorflow::graph::SetDefaultDevice("/cpu:0", &graph_def);
-  } else {
-    tensorflow::graph::SetDefaultDevice(
-      "/gpu:" + std::to_string(gpu_device), &graph_def);
-  }
-
-  TF_RETURN_IF_ERROR((*session)->Create(graph_def));
-
-  // Go through all graph nodes and collect the possible inputs and
-  // outputs. We use this to verify the requested inputs and outputs
-  // when initializing. Unfortunately graphdef isn't explicit in
-  // indicating inputs and outputs so we assume any Placeholder can be
-  // an input and any node can be an output.
-  std::set<std::string> potential_inputs, potential_outputs;
-  for (const auto& node : graph_def.node()) {
-    if (node.op() == "Placeholder") {
-      potential_inputs.emplace(node.name());
-    } else {
-      potential_outputs.emplace(node.name());
-    }
-  }
-
-  if (potential_inputs.size() < (size_t)Config().input().size()) {
-    return tensorflow::errors::InvalidArgument(
-      "unable to load model '", Name(), "', configuration expects ",
-      Config().input().size(), " inputs, model provides at most ",
-      potential_inputs.size());
-  }
-
-  for (const auto& io : Config().input()) {
-    TF_RETURN_IF_ERROR(ValidateModelInput(io, potential_inputs));
-  }
-  for (const auto& io : Config().output()) {
-    TF_RETURN_IF_ERROR(ValidateModelOutput(io, potential_outputs));
-  }
-
-  return tensorflow::Status::OK();
-}
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/servables/tensorflow/graphdef_bundle.h b/src/servables/tensorflow/graphdef_bundle.h
deleted file mode 100644
index dc8c77e3a7..0000000000
--- a/src/servables/tensorflow/graphdef_bundle.h
+++ /dev/null
@@ -1,49 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "src/servables/tensorflow/base_bundle.h"
-
-namespace nvidia { namespace inferenceserver {
-
-class GraphDefBundle : public BaseBundle {
- public:
-  GraphDefBundle() = default;
-  GraphDefBundle(GraphDefBundle&&) = default;
-
-  tensorflow::Status Init(
-    const tensorflow::StringPiece& path, const ModelConfig& config);
-
-  tensorflow::Status CreateSession(
-    const tensorflow::SessionOptions& options, const int gpu_device,
-    const std::string& model_path, tensorflow::Session** session,
-    IONameMap* input_name_map, IONameMap* output_name_map) override;
-
- private:
-  TF_DISALLOW_COPY_AND_ASSIGN(GraphDefBundle);
-};
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/servables/tensorflow/graphdef_bundle.proto b/src/servables/tensorflow/graphdef_bundle.proto
deleted file mode 100644
index 64cf9be17c..0000000000
--- a/src/servables/tensorflow/graphdef_bundle.proto
+++ /dev/null
@@ -1,43 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-syntax = "proto3";
-
-import "tensorflow/core/protobuf/config.proto";
-
-package nvidia.inferenceserver;
-
-// Config proto for GraphDefBundleSourceAdapter.
-message GraphDefBundleSourceAdapterConfig
-{
-  // TensorFlow Session configuration options.
-  // See details at tensorflow/core/protobuf/config.proto.
-  tensorflow.ConfigProto session_config = 1;
-
-  // Autofill missing required model configuration settings based on
-  // model definition file.
-  bool autofill = 2;
-}
diff --git a/src/servables/tensorflow/graphdef_bundle_source_adapter.cc b/src/servables/tensorflow/graphdef_bundle_source_adapter.cc
deleted file mode 100644
index 16504d911e..0000000000
--- a/src/servables/tensorflow/graphdef_bundle_source_adapter.cc
+++ /dev/null
@@ -1,124 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "src/servables/tensorflow/graphdef_bundle_source_adapter.h"
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "src/core/constants.h"
-#include "src/core/logging.h"
-#include "src/core/model_config.pb.h"
-#include "src/core/model_repository_manager.h"
-#include "src/core/utils.h"
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/platform/env.h"
-
-namespace nvidia { namespace inferenceserver {
-
-namespace {
-
-tensorflow::Status
-CreateGraphDefBundle(
-  const GraphDefBundleSourceAdapterConfig& adapter_config,
-  const std::string& path, std::unique_ptr<GraphDefBundle>* bundle)
-{
-  const auto model_path = tensorflow::io::Dirname(path);
-  const auto model_name = tensorflow::io::Basename(model_path);
-
-  ModelConfig model_config;
-  TF_RETURN_IF_ERROR(ModelRepositoryManager::GetModelConfig(
-    std::string(model_name), &model_config));
-
-  // Read all the graphdef files in 'path'. GetChildren() returns all
-  // descendants instead for cloud storage like GCS, so filter out all
-  // non-direct descendants.
-  std::vector<std::string> possible_children;
-  TF_RETURN_IF_ERROR(
-    tensorflow::Env::Default()->GetChildren(path, &possible_children));
-  std::set<std::string> children;
-  for (const auto& child : possible_children) {
-    children.insert(child.substr(0, child.find_first_of('/')));
-  }
-
-  std::unordered_map<std::string, std::string> graphdef_paths;
-  for (const auto& filename : children) {
-    const auto graphdef_path = tensorflow::io::JoinPath(path, filename);
-    graphdef_paths.emplace(
-      std::piecewise_construct, std::make_tuple(filename),
-      std::make_tuple(graphdef_path));
-  }
-
-  bundle->reset(new GraphDefBundle);
-  tensorflow::Status status = (*bundle)->Init(path, model_config);
-  if (status.ok()) {
-    status = (*bundle)->CreateExecutionContexts(
-      adapter_config.session_config(), graphdef_paths);
-  }
-  if (!status.ok()) {
-    bundle->reset();
-  }
-
-  return status;
-}
-
-}  // namespace
-
-
-tensorflow::Status
-GraphDefBundleSourceAdapter::Create(
-  const GraphDefBundleSourceAdapterConfig& config,
-  std::unique_ptr<
-    tfs::SourceAdapter<tfs::StoragePath, std::unique_ptr<tfs::Loader>>>*
-    adapter)
-{
-  LOG_VERBOSE(1) << "Create GraphDefBundleSourceAdaptor for config \""
-                 << config.DebugString() << "\"";
-
-  Creator creator = std::bind(
-    &CreateGraphDefBundle, config, std::placeholders::_1,
-    std::placeholders::_2);
-
-  adapter->reset(new GraphDefBundleSourceAdapter(
-    config, creator, SimpleSourceAdapter::EstimateNoResources()));
-  return tensorflow::Status::OK();
-}
-
-GraphDefBundleSourceAdapter::~GraphDefBundleSourceAdapter()
-{
-  Detach();
-}
-
-}}  // namespace nvidia::inferenceserver
-
-namespace tensorflow { namespace serving {
-
-REGISTER_STORAGE_PATH_SOURCE_ADAPTER(
-  nvidia::inferenceserver::GraphDefBundleSourceAdapter,
-  nvidia::inferenceserver::GraphDefBundleSourceAdapterConfig);
-
-}}  // namespace tensorflow::serving
diff --git a/src/servables/tensorflow/graphdef_bundle_source_adapter.h b/src/servables/tensorflow/graphdef_bundle_source_adapter.h
deleted file mode 100644
index 296d193145..0000000000
--- a/src/servables/tensorflow/graphdef_bundle_source_adapter.h
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "src/servables/tensorflow/graphdef_bundle.h"
-#include "src/servables/tensorflow/graphdef_bundle.pb.h"
-#include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/platform/macros.h"
-#include "tensorflow_serving/core/loader.h"
-#include "tensorflow_serving/core/simple_loader.h"
-#include "tensorflow_serving/core/storage_path.h"
-
-namespace tfs = tensorflow::serving;
-
-namespace nvidia { namespace inferenceserver {
-
-// Adapter that converts storage paths pointing to GraphDef files into
-// the corresponding graphdef bundle.
-class GraphDefBundleSourceAdapter final
-    : public tfs::SimpleLoaderSourceAdapter<tfs::StoragePath, GraphDefBundle> {
- public:
-  static tensorflow::Status Create(
-    const GraphDefBundleSourceAdapterConfig& config,
-    std::unique_ptr<
-      SourceAdapter<tfs::StoragePath, std::unique_ptr<tfs::Loader>>>* adapter);
-
-  ~GraphDefBundleSourceAdapter() override;
-
- private:
-  TF_DISALLOW_COPY_AND_ASSIGN(GraphDefBundleSourceAdapter);
-  using SimpleSourceAdapter =
-    tfs::SimpleLoaderSourceAdapter<tfs::StoragePath, GraphDefBundle>;
-
-  GraphDefBundleSourceAdapter(
-    const GraphDefBundleSourceAdapterConfig& config,
-    typename SimpleSourceAdapter::Creator creator,
-    typename SimpleSourceAdapter::ResourceEstimator resource_estimator)
-      : SimpleSourceAdapter(creator, resource_estimator), config_(config)
-  {
-  }
-
-  const GraphDefBundleSourceAdapterConfig config_;
-};
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/servables/tensorflow/graphdef_bundle_test.cc b/src/servables/tensorflow/graphdef_bundle_test.cc
deleted file mode 100644
index a9b1564884..0000000000
--- a/src/servables/tensorflow/graphdef_bundle_test.cc
+++ /dev/null
@@ -1,73 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "src/servables/tensorflow/graphdef_bundle.h"
-#include "src/core/constants.h"
-#include "src/test/model_config_test_base.h"
-
-namespace nvidia { namespace inferenceserver { namespace test {
-
-class GraphDefBundleTest : public ModelConfigTestBase {
- public:
-};
-
-TEST_F(GraphDefBundleTest, ModelConfigSanity)
-{
-  BundleInitFunc init_func =
-    [](
-      const std::string& path,
-      const ModelConfig& config) -> tensorflow::Status {
-    std::unique_ptr<GraphDefBundle> bundle(new GraphDefBundle());
-    tensorflow::Status status = bundle->Init(path, config);
-    if (status.ok()) {
-      std::unordered_map<std::string, std::string> graphdef_paths;
-
-      for (const auto& filename :
-           std::vector<std::string>{kTensorFlowGraphDefFilename}) {
-        const auto graphdef_path = tensorflow::io::JoinPath(path, filename);
-        graphdef_paths.emplace(
-          std::piecewise_construct, std::make_tuple(filename),
-          std::make_tuple(graphdef_path));
-      }
-
-      tensorflow::ConfigProto session_config;
-      status = bundle->CreateExecutionContexts(session_config, graphdef_paths);
-    }
-
-    return status;
-  };
-
-  // Standard testing...
-  ValidateAll(kTensorFlowGraphDefPlatform, init_func);
-
-  // Sanity tests with autofill and not providing the platform.
-  ValidateOne(
-    "inference_server/src/servables/tensorflow/testdata/"
-    "graphdef_autofill_sanity",
-    true /* autofill */, std::string() /* platform */, init_func);
-}
-
-}}}  // namespace nvidia::inferenceserver::test
diff --git a/src/servables/tensorflow/loader.cc b/src/servables/tensorflow/loader.cc
deleted file mode 100644
index 0cb7524230..0000000000
--- a/src/servables/tensorflow/loader.cc
+++ /dev/null
@@ -1,84 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "src/servables/tensorflow/loader.h"
-
-#include "src/core/logging.h"
-
-namespace nvidia { namespace inferenceserver {
-
-tensorflow::Status
-LoadSavedModel(
-  const std::string& model_name, const std::string& model_path,
-  const tensorflow::SessionOptions& session_options,
-  std::unique_ptr<tensorflow::SavedModelBundle>* bundle,
-  tensorflow::SignatureDef* sig)
-{
-  bundle->reset(new tensorflow::SavedModelBundle);
-
-  std::unordered_set<std::string> saved_model_tags;
-  saved_model_tags.insert(tensorflow::kSavedModelTagServe);
-
-  tensorflow::RunOptions run_options;
-  TF_RETURN_IF_ERROR(tensorflow::LoadSavedModel(
-    session_options, run_options, model_path, saved_model_tags, bundle->get()));
-
-  LOG_VERBOSE(1) << "Loaded saved-model: "
-                 << (*bundle)->meta_graph_def.DebugString();
-
-  // Verify that the bundle has the "serve" tag
-  bool found_serve_tag = false;
-  for (const auto& tag : (*bundle)->meta_graph_def.meta_info_def().tags()) {
-    if (tag == tensorflow::kSavedModelTagServe) {
-      found_serve_tag = true;
-      break;
-    }
-  }
-  if (!found_serve_tag) {
-    return tensorflow::errors::Internal(
-      "unable to load model '", model_name, "', expected '",
-      tensorflow::kSavedModelTagServe, "' tag");
-  }
-
-  // Verify that a "serving_default" signature exists, that is what
-  // will be used to verify the inputs and outputs.
-  static const std::string DEFAULT_SERVING_SIGNATURE_DEF_KEY("serving_default");
-  const auto& sig_itr = (*bundle)->meta_graph_def.signature_def().find(
-    DEFAULT_SERVING_SIGNATURE_DEF_KEY);
-  if (sig_itr == (*bundle)->meta_graph_def.signature_def().end()) {
-    return tensorflow::errors::InvalidArgument(
-      "unable to load model '", model_name, "', expected '",
-      DEFAULT_SERVING_SIGNATURE_DEF_KEY, "' signature");
-  }
-
-  if (sig != nullptr) {
-    *sig = sig_itr->second;
-  }
-
-  return tensorflow::Status::OK();
-}
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/servables/tensorflow/loader.h b/src/servables/tensorflow/loader.h
deleted file mode 100644
index fb116a3c70..0000000000
--- a/src/servables/tensorflow/loader.h
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "tensorflow/cc/saved_model/loader.h"
-#include "tensorflow/cc/saved_model/tag_constants.h"
-
-namespace nvidia { namespace inferenceserver {
-
-/// Load a SavedModel from a path and return the corresponding
-/// bundle.
-///
-/// \param model_name The name of the model
-/// \param model_path The path to the SavedModel directory
-/// \param session_options The options to use when creating the bundle
-/// \param bundle Returns the SavedModelBundle
-/// \param sig If non-nullptr returns the signature of the model
-/// \return Error status.
-tensorflow::Status LoadSavedModel(
-  const std::string& model_name, const std::string& model_path,
-  const tensorflow::SessionOptions& session_options,
-  std::unique_ptr<tensorflow::SavedModelBundle>* bundle,
-  tensorflow::SignatureDef* sig = nullptr);
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/servables/tensorflow/savedmodel_bundle.cc b/src/servables/tensorflow/savedmodel_bundle.cc
deleted file mode 100644
index 59bb19c01e..0000000000
--- a/src/servables/tensorflow/savedmodel_bundle.cc
+++ /dev/null
@@ -1,157 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "src/servables/tensorflow/savedmodel_bundle.h"
-
-#include <set>
-#include "src/core/constants.h"
-#include "src/core/logging.h"
-#include "src/core/model_config.h"
-#include "src/core/utils.h"
-#include "src/servables/tensorflow/loader.h"
-#include "src/servables/tensorflow/tf_utils.h"
-#include "tensorflow/c/c_api.h"
-#include "tensorflow/core/lib/io/path.h"
-
-namespace nvidia { namespace inferenceserver {
-
-tensorflow::Status
-SavedModelBundle::Init(
-  const tensorflow::StringPiece& path, const ModelConfig& config)
-{
-  TF_RETURN_IF_ERROR(
-    ValidateModelConfig(config, kTensorFlowSavedModelPlatform));
-  TF_RETURN_IF_ERROR(BaseBundle::Init(path, config));
-
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-SavedModelBundle::CreateSession(
-  const tensorflow::SessionOptions& options, const int gpu_device,
-  const std::string& model_path, tensorflow::Session** session,
-  IONameMap* input_name_map, IONameMap* output_name_map)
-{
-  // Set the default device to control the CPU/GPU that the graph runs
-  // on. This isn't foolproof since individual operations in the graph
-  // could specify a specific run location. But given that
-  // visible_device_list doesn't work it seems like the only option we
-  // have. [DLIS-43]
-  //
-  // The GraphDef where we need to use this workaround is only
-  // available in tensorflow/cc/saved_model/loader.cc so we use
-  // visible_device_list in pass in the gpu_device we want and then
-  // our (modified) loader.cc will use that to SetDefaultDevice
-  // appropriately.
-  tensorflow::SessionOptions session_options = options;
-  if (gpu_device == Context::NO_GPU_DEVICE) {
-    session_options.config.mutable_gpu_options()->set_visible_device_list(
-      "/cpu:0");
-  } else {
-    session_options.config.mutable_gpu_options()->set_visible_device_list(
-      "/gpu:" + std::to_string(gpu_device));
-  }
-
-  std::unique_ptr<tensorflow::SavedModelBundle> bundle;
-  tensorflow::SignatureDef sig;
-  TF_RETURN_IF_ERROR(
-    LoadSavedModel(Name(), model_path, session_options, &bundle, &sig));
-
-  // Collect all the expected input and allowed output tensor names
-  // based on the signature def.
-  std::set<std::string> expected_inputs, allowed_outputs;
-  for (const auto& i : sig.inputs()) {
-    expected_inputs.emplace(i.first);
-    input_name_map->insert({i.first, i.second.name()});
-  }
-  for (const auto& o : sig.outputs()) {
-    allowed_outputs.emplace(o.first);
-    output_name_map->insert({o.first, o.second.name()});
-  }
-
-  // Verify that the model configuration input and outputs match what
-  // is expected by the signature def.
-  if (expected_inputs.size() != (size_t)Config().input().size()) {
-    return tensorflow::errors::InvalidArgument(
-      "unable to load model '", Name(), "', configuration expects ",
-      Config().input().size(), " inputs, model provides ",
-      expected_inputs.size());
-  }
-
-  for (const auto& io : Config().input()) {
-    TF_RETURN_IF_ERROR(ValidateModelInput(io, expected_inputs));
-
-    const auto& iitr = sig.inputs().find(io.name());
-    if (iitr == sig.inputs().end()) {
-      return tensorflow::errors::Internal(
-        "unexpected inference input '", io.name(), "'");
-    }
-
-    if (!CompareDims(iitr->second.tensor_shape(), io.dims())) {
-      return tensorflow::errors::InvalidArgument(
-        "unable to load model '", Name(), "', input '", io.name(), "' dims ",
-        DimsDebugString(iitr->second.tensor_shape()),
-        " don't match configuration dims ", DimsDebugString(io.dims()));
-    }
-    if (!CompareDataType(iitr->second.dtype(), io.data_type())) {
-      return tensorflow::errors::InvalidArgument(
-        "unable to load model '", Name(), "', input '", io.name(),
-        "' data-type ", tensorflow::DataType_Name(iitr->second.dtype()),
-        " doesn't match configuration data-type ",
-        DataType_Name(io.data_type()));
-    }
-  }
-
-  for (const auto& io : Config().output()) {
-    TF_RETURN_IF_ERROR(ValidateModelOutput(io, allowed_outputs));
-
-    const auto& oitr = sig.outputs().find(io.name());
-    if (oitr == sig.outputs().end()) {
-      return tensorflow::errors::Internal(
-        "unexpected inference output '", io.name(), "'");
-    }
-
-    if (!CompareDims(oitr->second.tensor_shape(), io.dims())) {
-      return tensorflow::errors::InvalidArgument(
-        "unable to load model '", Name(), "', output '", io.name(), "' dims ",
-        DimsDebugString(oitr->second.tensor_shape()),
-        " don't match configuration dims ", DimsDebugString(io.dims()));
-    }
-    if (!CompareDataType(oitr->second.dtype(), io.data_type())) {
-      return tensorflow::errors::InvalidArgument(
-        "unable to load model '", Name(), "', output '", io.name(),
-        "' data-type ", tensorflow::DataType_Name(oitr->second.dtype()),
-        " doesn't match configuration data-type ",
-        DataType_Name(io.data_type()));
-    }
-  }
-
-  *session = bundle->session.release();
-
-  return tensorflow::Status::OK();
-}
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/servables/tensorflow/savedmodel_bundle.h b/src/servables/tensorflow/savedmodel_bundle.h
deleted file mode 100644
index d0dc7a2b81..0000000000
--- a/src/servables/tensorflow/savedmodel_bundle.h
+++ /dev/null
@@ -1,49 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "src/servables/tensorflow/base_bundle.h"
-
-namespace nvidia { namespace inferenceserver {
-
-class SavedModelBundle : public BaseBundle {
- public:
-  SavedModelBundle() = default;
-  SavedModelBundle(SavedModelBundle&&) = default;
-
-  tensorflow::Status Init(
-    const tensorflow::StringPiece& path, const ModelConfig& config);
-
-  tensorflow::Status CreateSession(
-    const tensorflow::SessionOptions& options, const int gpu_device,
-    const std::string& model_path, tensorflow::Session** session,
-    IONameMap* input_name_map, IONameMap* output_name_map) override;
-
- private:
-  TF_DISALLOW_COPY_AND_ASSIGN(SavedModelBundle);
-};
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/servables/tensorflow/savedmodel_bundle.proto b/src/servables/tensorflow/savedmodel_bundle.proto
deleted file mode 100644
index 27b0f5f404..0000000000
--- a/src/servables/tensorflow/savedmodel_bundle.proto
+++ /dev/null
@@ -1,43 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-syntax = "proto3";
-
-import "tensorflow/core/protobuf/config.proto";
-
-package nvidia.inferenceserver;
-
-// Config proto for SavedModelBundleSourceAdapter.
-message SavedModelBundleSourceAdapterConfig
-{
-  // TensorFlow Session configuration options.
-  // See details at tensorflow/core/protobuf/config.proto.
-  tensorflow.ConfigProto session_config = 1;
-
-  // Autofill missing required model configuration settings based on
-  // model definition file.
-  bool autofill = 2;
-}
diff --git a/src/servables/tensorflow/savedmodel_bundle_source_adapter.cc b/src/servables/tensorflow/savedmodel_bundle_source_adapter.cc
deleted file mode 100644
index ae87f14427..0000000000
--- a/src/servables/tensorflow/savedmodel_bundle_source_adapter.cc
+++ /dev/null
@@ -1,124 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "src/servables/tensorflow/savedmodel_bundle_source_adapter.h"
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "src/core/constants.h"
-#include "src/core/logging.h"
-#include "src/core/model_config.pb.h"
-#include "src/core/model_repository_manager.h"
-#include "src/core/utils.h"
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/platform/env.h"
-
-namespace nvidia { namespace inferenceserver {
-
-namespace {
-
-tensorflow::Status
-CreateSavedModelBundle(
-  const SavedModelBundleSourceAdapterConfig& adapter_config,
-  const std::string& path, std::unique_ptr<SavedModelBundle>* bundle)
-{
-  const auto model_path = tensorflow::io::Dirname(path);
-  const auto model_name = tensorflow::io::Basename(model_path);
-
-  ModelConfig model_config;
-  TF_RETURN_IF_ERROR(ModelRepositoryManager::GetModelConfig(
-    std::string(model_name), &model_config));
-
-  // Read all the savedmodel directories in 'path'. GetChildren()
-  // returns all descendants instead for cloud storage like GCS, so
-  // filter out all non-direct descendants.
-  std::vector<std::string> possible_children;
-  TF_RETURN_IF_ERROR(
-    tensorflow::Env::Default()->GetChildren(path, &possible_children));
-  std::set<std::string> children;
-  for (const auto& child : possible_children) {
-    children.insert(child.substr(0, child.find_first_of('/')));
-  }
-
-  std::unordered_map<std::string, std::string> savedmodel_paths;
-  for (const auto& filename : children) {
-    const auto savedmodel_path = tensorflow::io::JoinPath(path, filename);
-    if (tensorflow::Env::Default()->IsDirectory(savedmodel_path).ok()) {
-      savedmodel_paths.emplace(
-        std::piecewise_construct, std::make_tuple(filename),
-        std::make_tuple(savedmodel_path));
-    }
-  }
-
-  bundle->reset(new SavedModelBundle);
-  tensorflow::Status status = (*bundle)->Init(path, model_config);
-  if (status.ok()) {
-    status = (*bundle)->CreateExecutionContexts(
-      adapter_config.session_config(), savedmodel_paths);
-  }
-  if (!status.ok()) {
-    bundle->reset();
-  }
-
-  return status;
-}
-
-}  // namespace
-
-tensorflow::Status
-SavedModelBundleSourceAdapter::Create(
-  const SavedModelBundleSourceAdapterConfig& config,
-  std::unique_ptr<
-    tfs::SourceAdapter<tfs::StoragePath, std::unique_ptr<tfs::Loader>>>*
-    adapter)
-{
-  LOG_VERBOSE(1) << "Create SavedModelBundleSourceAdaptor for config \""
-                 << config.DebugString() << "\"";
-
-  Creator creator = std::bind(
-    &CreateSavedModelBundle, config, std::placeholders::_1,
-    std::placeholders::_2);
-
-  adapter->reset(new SavedModelBundleSourceAdapter(
-    config, creator, SimpleSourceAdapter::EstimateNoResources()));
-  return tensorflow::Status::OK();
-}
-
-SavedModelBundleSourceAdapter::~SavedModelBundleSourceAdapter()
-{
-  Detach();
-}
-
-}}  // namespace nvidia::inferenceserver
-
-namespace tensorflow { namespace serving {
-
-REGISTER_STORAGE_PATH_SOURCE_ADAPTER(
-  nvidia::inferenceserver::SavedModelBundleSourceAdapter,
-  nvidia::inferenceserver::SavedModelBundleSourceAdapterConfig);
-}}  // namespace tensorflow::serving
diff --git a/src/servables/tensorflow/savedmodel_bundle_source_adapter.h b/src/servables/tensorflow/savedmodel_bundle_source_adapter.h
deleted file mode 100644
index 3bba952747..0000000000
--- a/src/servables/tensorflow/savedmodel_bundle_source_adapter.h
+++ /dev/null
@@ -1,69 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "src/servables/tensorflow/savedmodel_bundle.h"
-#include "src/servables/tensorflow/savedmodel_bundle.pb.h"
-#include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/platform/macros.h"
-#include "tensorflow_serving/core/loader.h"
-#include "tensorflow_serving/core/simple_loader.h"
-#include "tensorflow_serving/core/storage_path.h"
-
-namespace tfs = tensorflow::serving;
-
-namespace nvidia { namespace inferenceserver {
-
-// Adapter that converts storage paths pointing to SavedModel files
-// into the corresponding savedmodel bundle.
-class SavedModelBundleSourceAdapter final
-    : public tfs::SimpleLoaderSourceAdapter<
-        tfs::StoragePath, SavedModelBundle> {
- public:
-  static tensorflow::Status Create(
-    const SavedModelBundleSourceAdapterConfig& config,
-    std::unique_ptr<
-      SourceAdapter<tfs::StoragePath, std::unique_ptr<tfs::Loader>>>* adapter);
-
-  ~SavedModelBundleSourceAdapter() override;
-
- private:
-  TF_DISALLOW_COPY_AND_ASSIGN(SavedModelBundleSourceAdapter);
-  using SimpleSourceAdapter =
-    tfs::SimpleLoaderSourceAdapter<tfs::StoragePath, SavedModelBundle>;
-
-  SavedModelBundleSourceAdapter(
-    const SavedModelBundleSourceAdapterConfig& config,
-    typename SimpleSourceAdapter::Creator creator,
-    typename SimpleSourceAdapter::ResourceEstimator resource_estimator)
-      : SimpleSourceAdapter(creator, resource_estimator), config_(config)
-  {
-  }
-
-  const SavedModelBundleSourceAdapterConfig config_;
-};
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/servables/tensorflow/savedmodel_bundle_test.cc b/src/servables/tensorflow/savedmodel_bundle_test.cc
deleted file mode 100644
index e255d268e2..0000000000
--- a/src/servables/tensorflow/savedmodel_bundle_test.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "src/servables/tensorflow/savedmodel_bundle.h"
-#include "src/core/constants.h"
-#include "src/test/model_config_test_base.h"
-
-namespace nvidia { namespace inferenceserver { namespace test {
-
-class SavedModelBundleTest : public ModelConfigTestBase {
- public:
-};
-
-TEST_F(SavedModelBundleTest, ModelConfigSanity)
-{
-  BundleInitFunc init_func =
-    [](
-      const std::string& path,
-      const ModelConfig& config) -> tensorflow::Status {
-    std::unique_ptr<SavedModelBundle> bundle(new SavedModelBundle());
-    tensorflow::Status status = bundle->Init(path, config);
-    if (status.ok()) {
-      std::unordered_map<std::string, std::string> savedmodel_paths;
-
-      for (const auto& filename : std::vector<std::string>{
-             kTensorFlowSavedModelFilename, "vnetsavedmodel"}) {
-        const auto savedmodel_path = tensorflow::io::JoinPath(path, filename);
-        savedmodel_paths.emplace(
-          std::piecewise_construct, std::make_tuple(filename),
-          std::make_tuple(savedmodel_path));
-      }
-
-      tensorflow::ConfigProto session_config;
-      status =
-        bundle->CreateExecutionContexts(session_config, savedmodel_paths);
-    }
-
-    return status;
-  };
-
-  // Standard testing...
-  ValidateAll(kTensorFlowSavedModelPlatform, init_func);
-
-  // Sanity tests with autofill and not providing the platform.
-  ValidateOne(
-    "inference_server/src/servables/tensorflow/testdata/"
-    "savedmodel_autofill_sanity",
-    true /* autofill */, std::string() /* platform */, init_func);
-}
-
-}}}  // namespace nvidia::inferenceserver::test
diff --git a/src/servables/tensorflow/testdata/graphdef_autofill_sanity/no_name_platform/expected b/src/servables/tensorflow/testdata/graphdef_autofill_sanity/no_name_platform/expected
deleted file mode 100644
index d5e8cf3d65..0000000000
--- a/src/servables/tensorflow/testdata/graphdef_autofill_sanity/no_name_platform/expected
+++ /dev/null
@@ -1,35 +0,0 @@
-name: "no_name_platform"
-platform: "tensorflow_graphdef"
-version_policy {
-  latest {
-    num_versions: 1
-  }
-}
-max_batch_size: 1
-input {
-  name: "INPUT0"
-  data_type: TYPE_INT32
-  dims: 16
-}
-input {
-  name: "INPUT1"
-  data_type: TYPE_INT32
-  dims: 16
-}
-output {
-  name: "OUTPUT0"
-  data_type: TYPE_INT8
-  dims: 16
-}
-output {
-  name: "OUTPUT1"
-  data_type: TYPE_INT8
-  dims: 16
-}
-instance_group {
-  name: "no_name_platform"
-  count: 1
-  gpus: 0
-  kind: KIND_GPU
-}
-default_model_filename: "model.graphdef"
diff --git a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/bad_input_dims/expected b/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/bad_input_dims/expected
deleted file mode 100644
index 901e83a621..0000000000
--- a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/bad_input_dims/expected
+++ /dev/null
@@ -1 +0,0 @@
-Invalid argument: unable to load model 'bad_input_dims', input 'INPUT0' dims [-1,16] don't match configuration dims [16,1]
\ No newline at end of file
diff --git a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/bad_input_type/expected b/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/bad_input_type/expected
deleted file mode 100644
index 0e4754bdef..0000000000
--- a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/bad_input_type/expected
+++ /dev/null
@@ -1 +0,0 @@
-Invalid argument: unable to load model 'bad_input_type', input 'INPUT1' data-type DT_INT32 doesn't match configuration data-type TYPE_FP32
\ No newline at end of file
diff --git a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/bad_output_dims/expected b/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/bad_output_dims/expected
deleted file mode 100644
index 121f94293b..0000000000
--- a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/bad_output_dims/expected
+++ /dev/null
@@ -1 +0,0 @@
-Invalid argument: unable to load model 'bad_output_dims', output 'OUTPUT1' dims [-1,16] don't match configuration dims [1]
\ No newline at end of file
diff --git a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/bad_output_type/expected b/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/bad_output_type/expected
deleted file mode 100644
index ea18094035..0000000000
--- a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/bad_output_type/expected
+++ /dev/null
@@ -1 +0,0 @@
-Invalid argument: unable to load model 'bad_output_type', output 'OUTPUT0' data-type DT_INT8 doesn't match configuration data-type TYPE_INT16
\ No newline at end of file
diff --git a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/empty_config/expected b/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/empty_config/expected
deleted file mode 100644
index 5dc0c7de44..0000000000
--- a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/empty_config/expected
+++ /dev/null
@@ -1,35 +0,0 @@
-name: "empty_config"
-platform: "tensorflow_savedmodel"
-version_policy {
-  latest {
-    num_versions: 1
-  }
-}
-max_batch_size: 1
-input {
-  name: "INPUT0"
-  data_type: TYPE_INT32
-  dims: 16
-}
-input {
-  name: "INPUT1"
-  data_type: TYPE_INT32
-  dims: 16
-}
-output {
-  name: "OUTPUT0"
-  data_type: TYPE_INT8
-  dims: 16
-}
-output {
-  name: "OUTPUT1"
-  data_type: TYPE_INT8
-  dims: 16
-}
-instance_group {
-  name: "empty_config"
-  count: 1
-  gpus: 0
-  kind: KIND_GPU
-}
-default_model_filename: "model.savedmodel"
diff --git a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/no_config/expected b/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/no_config/expected
deleted file mode 100644
index 77fafe40f0..0000000000
--- a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/no_config/expected
+++ /dev/null
@@ -1,35 +0,0 @@
-name: "no_config"
-platform: "tensorflow_savedmodel"
-version_policy {
-  latest {
-    num_versions: 1
-  }
-}
-max_batch_size: 1
-input {
-  name: "INPUT0"
-  data_type: TYPE_INT32
-  dims: 16
-}
-input {
-  name: "INPUT1"
-  data_type: TYPE_INT32
-  dims: 16
-}
-output {
-  name: "OUTPUT0"
-  data_type: TYPE_INT8
-  dims: 16
-}
-output {
-  name: "OUTPUT1"
-  data_type: TYPE_INT8
-  dims: 16
-}
-instance_group {
-  name: "no_config"
-  count: 1
-  gpus: 0
-  kind: KIND_GPU
-}
-default_model_filename: "model.savedmodel"
diff --git a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/no_config_no_batch/expected b/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/no_config_no_batch/expected
deleted file mode 100644
index 535c71a554..0000000000
--- a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/no_config_no_batch/expected
+++ /dev/null
@@ -1,31 +0,0 @@
-name: "no_config_no_batch"
-platform: "tensorflow_savedmodel"
-version_policy {
-  latest {
-    num_versions: 1
-  }
-}
-input {
-  name: "input"
-  data_type: TYPE_FP32
-  dims: 1
-  dims: 256
-  dims: 256
-  dims: 256
-  dims: 1
-}
-output {
-  name: "output"
-  data_type: TYPE_FP32
-  dims: 1
-  dims: 256
-  dims: 256
-  dims: 256
-  dims: 14
-}
-instance_group {
-  name: "no_config_no_batch_0"
-  count: 1
-  kind: KIND_CPU
-}
-default_model_filename: "vnetsavedmodel"
diff --git a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/no_name_platform/expected b/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/no_name_platform/expected
deleted file mode 100644
index 027cadf4c3..0000000000
--- a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/no_name_platform/expected
+++ /dev/null
@@ -1,35 +0,0 @@
-name: "no_name_platform"
-platform: "tensorflow_savedmodel"
-version_policy {
-  latest {
-    num_versions: 1
-  }
-}
-max_batch_size: 1
-input {
-  name: "INPUT0"
-  data_type: TYPE_INT32
-  dims: 16
-}
-input {
-  name: "INPUT1"
-  data_type: TYPE_INT32
-  dims: 16
-}
-output {
-  name: "OUTPUT0"
-  data_type: TYPE_INT8
-  dims: 16
-}
-output {
-  name: "OUTPUT1"
-  data_type: TYPE_INT8
-  dims: 16
-}
-instance_group {
-  name: "no_name_platform"
-  count: 1
-  gpus: 0
-  kind: KIND_GPU
-}
-default_model_filename: "model.savedmodel"
diff --git a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/too_few_inputs/expected b/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/too_few_inputs/expected
deleted file mode 100644
index 01c4be08ad..0000000000
--- a/src/servables/tensorflow/testdata/savedmodel_autofill_sanity/too_few_inputs/expected
+++ /dev/null
@@ -1 +0,0 @@
-Invalid argument: unable to load model 'too_few_inputs', configuration expects 1 inputs, model provides 2
\ No newline at end of file
diff --git a/src/servables/tensorflow/tf_utils.cc b/src/servables/tensorflow/tf_utils.cc
deleted file mode 100644
index 9846450273..0000000000
--- a/src/servables/tensorflow/tf_utils.cc
+++ /dev/null
@@ -1,171 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "src/servables/tensorflow/tf_utils.h"
-
-namespace nvidia { namespace inferenceserver {
-
-bool
-CompareDims(
-  const tensorflow::TensorShapeProto& model_shape, const DimsList& dims)
-{
-  // The first model dimension can be -1 to serve as a placeholder for
-  // batch. The batch dim doesn't appear in the configuration 'dims'.
-  const bool has_batch_dim =
-    (model_shape.dim().size() >= 1) && (model_shape.dim(0).size() == -1);
-  if (model_shape.dim().size() != (dims.size() + (has_batch_dim ? 1 : 0))) {
-    return false;
-  }
-
-  for (int i = 0; i < dims.size(); ++i) {
-    if (model_shape.dim(i + (has_batch_dim ? 1 : 0)).size() != dims[i]) {
-      return false;
-    }
-  }
-
-  return true;
-}
-
-bool
-CompareDataType(tensorflow::DataType model_dtype, DataType dtype)
-{
-  tensorflow::DataType cdtype = ConvertDataType(dtype);
-  if (cdtype == tensorflow::DT_INVALID) {
-    return false;
-  }
-
-  return model_dtype == cdtype;
-}
-
-const std::string
-DimsDebugString(const DimsList& dims)
-{
-  bool first = true;
-  std::string str;
-  str.append("[");
-  for (int i = 0; i < dims.size(); ++i) {
-    if (!first) {
-      str.append(",");
-    }
-    str.append(std::to_string(dims[i]));
-    first = false;
-  }
-  str.append("]");
-  return str;
-}
-
-const std::string
-DimsDebugString(const tensorflow::TensorShapeProto& dims)
-{
-  bool first = true;
-  std::string str;
-  str.append("[");
-  for (int i = 0; i < dims.dim().size(); ++i) {
-    if (!first) {
-      str.append(",");
-    }
-    str.append(std::to_string(dims.dim(i).size()));
-    first = false;
-  }
-  str.append("]");
-  return str;
-}
-
-tensorflow::DataType
-ConvertDataType(DataType dtype)
-{
-  switch (dtype) {
-    case DataType::TYPE_INVALID:
-      return tensorflow::DT_INVALID;
-    case DataType::TYPE_BOOL:
-      return tensorflow::DT_BOOL;
-    case DataType::TYPE_UINT8:
-      return tensorflow::DT_UINT8;
-    case DataType::TYPE_UINT16:
-      return tensorflow::DT_UINT16;
-    case DataType::TYPE_UINT32:
-      return tensorflow::DT_UINT32;
-    case DataType::TYPE_UINT64:
-      return tensorflow::DT_UINT64;
-    case DataType::TYPE_INT8:
-      return tensorflow::DT_INT8;
-    case DataType::TYPE_INT16:
-      return tensorflow::DT_INT16;
-    case DataType::TYPE_INT32:
-      return tensorflow::DT_INT32;
-    case DataType::TYPE_INT64:
-      return tensorflow::DT_INT64;
-    case DataType::TYPE_FP16:
-      return tensorflow::DT_HALF;
-    case DataType::TYPE_FP32:
-      return tensorflow::DT_FLOAT;
-    case DataType::TYPE_FP64:
-      return tensorflow::DT_DOUBLE;
-    default:
-      break;
-  }
-
-  return tensorflow::DT_INVALID;
-}
-
-DataType
-ConvertDataType(tensorflow::DataType dtype)
-{
-  switch (dtype) {
-    case tensorflow::DT_INVALID:
-      return DataType::TYPE_INVALID;
-    case tensorflow::DT_BOOL:
-      return DataType::TYPE_BOOL;
-    case tensorflow::DT_UINT8:
-      return DataType::TYPE_UINT8;
-    case tensorflow::DT_UINT16:
-      return DataType::TYPE_UINT16;
-    case tensorflow::DT_UINT32:
-      return DataType::TYPE_UINT32;
-    case tensorflow::DT_UINT64:
-      return DataType::TYPE_UINT64;
-    case tensorflow::DT_INT8:
-      return DataType::TYPE_INT8;
-    case tensorflow::DT_INT16:
-      return DataType::TYPE_INT16;
-    case tensorflow::DT_INT32:
-      return DataType::TYPE_INT32;
-    case tensorflow::DT_INT64:
-      return DataType::TYPE_INT64;
-    case tensorflow::DT_HALF:
-      return DataType::TYPE_FP16;
-    case tensorflow::DT_FLOAT:
-      return DataType::TYPE_FP32;
-    case tensorflow::DT_DOUBLE:
-      return DataType::TYPE_FP64;
-    default:
-      break;
-  }
-
-  return DataType::TYPE_INVALID;
-}
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/servables/tensorflow/tf_utils.h b/src/servables/tensorflow/tf_utils.h
deleted file mode 100644
index 851fab8e14..0000000000
--- a/src/servables/tensorflow/tf_utils.h
+++ /dev/null
@@ -1,59 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "src/core/model_config.h"
-#include "src/core/model_config.pb.h"
-#include "tensorflow/core/framework/tensor_shape.pb.h"
-#include "tensorflow/core/framework/types.pb.h"
-#include "tensorflow/core/lib/core/errors.h"
-
-namespace nvidia { namespace inferenceserver {
-
-/// \return true if a TensorFlow shape matches a model configuration
-/// shape.
-bool CompareDims(
-  const tensorflow::TensorShapeProto& model_shape, const DimsList& dims);
-
-/// \return true if a TensorFlow data-type matches a model
-/// configuration data-type.
-bool CompareDataType(tensorflow::DataType model_dtype, DataType dtype);
-
-/// \return the string representation of a model configuration shape.
-const std::string DimsDebugString(const DimsList& dims);
-
-/// \return the string representation of a TensorFlow shape.
-const std::string DimsDebugString(const tensorflow::TensorShapeProto& dims);
-
-/// \return the TensorFlow data-type that corresponds to a model
-/// configuration data-type.
-tensorflow::DataType ConvertDataType(DataType dtype);
-
-/// \return the model configuration data-type that corresponds to a
-/// TensorFlow data-type.
-DataType ConvertDataType(tensorflow::DataType dtype);
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/servables/tensorrt/BUILD b/src/servables/tensorrt/BUILD
deleted file mode 100644
index d37d788b7a..0000000000
--- a/src/servables/tensorrt/BUILD
+++ /dev/null
@@ -1,150 +0,0 @@
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-package(
-    default_visibility = ["//visibility:public"],
-)
-
-load('@tf_serving//tensorflow_serving:serving.bzl', 'serving_proto_library')
-
-serving_proto_library(
-    name = "plan_bundle_proto",
-    srcs = ["plan_bundle.proto"],
-    cc_api_version = 2,
-    deps = [
-        "@org_tensorflow//tensorflow/core:protos_all_cc",
-        "@protobuf_archive//:cc_wkt_protos",
-    ],
-)
-
-cc_library(
-    name = "autofill",
-    srcs = ["autofill.cc"],
-    hdrs = ["autofill.h"],
-    deps = [
-        ":loader",
-        ":plan_utils",
-        "//src/core:autofill_header",
-        "@org_tensorflow//tensorflow/core:lib",
-    ],
-)
-
-cc_library(
-    name = "loader",
-    srcs = ["loader.cc"],
-    hdrs = ["loader.h"],
-    deps = [
-        ":logging",
-        "@org_tensorflow//tensorflow/core:lib",
-    ],
-)
-
-cc_library(
-    name = "logging",
-    srcs = ["logging.cc"],
-    hdrs = ["logging.h"],
-    deps = [
-        "//src/core:logging",
-    ],
-)
-
-cc_library(
-    name = "plan_utils",
-    srcs = ["plan_utils.cc"],
-    hdrs = ["plan_utils.h"],
-    deps = [
-        "//src/core:model_config",
-    ],
-)
-
-cc_library(
-    name = "plan_bundle",
-    srcs = ["plan_bundle.cc"],
-    hdrs = ["plan_bundle.h"],
-    deps = [
-        ":loader",
-        ":plan_utils",
-        "//src/core:constants",
-        "//src/core:infer",
-        "//src/core:label_provider",
-        "//src/core:model_config_proto",
-        "//src/core:server_status",
-        "//src/core:utils",
-        "@org_tensorflow//tensorflow/c:c_api",
-        "@org_tensorflow//tensorflow/core:lib",
-    ],
-)
-
-cc_library(
-    name = "plan_bundle_source_adapter",
-    srcs = ["plan_bundle_source_adapter.cc"],
-    hdrs = ["plan_bundle_source_adapter.h"],
-    deps = [
-        ":plan_bundle",
-        ":plan_bundle_proto",
-        "//src/core:constants",
-        "//src/core:logging",
-        "//src/core:model_config",
-        "//src/core:model_config_proto",
-        "//src/core:utils",
-        "@tf_serving//tensorflow_serving/core:loader",
-        "@tf_serving//tensorflow_serving/core:simple_loader",
-        "@tf_serving//tensorflow_serving/core:source_adapter",
-        "@tf_serving//tensorflow_serving/core:storage_path",
-        "@tf_serving//tensorflow_serving/util:optional",
-        "@org_tensorflow//tensorflow/core:core_cpu",
-        "@org_tensorflow//tensorflow/core:lib",
-    ],
-    alwayslink = 1,
-)
-
-filegroup(
-    name = "tensorrt_testdata",
-    srcs = glob(
-        ["testdata/**/*"]),
-)
-
-cc_test(
-    name = "plan_bundle_test",
-    srcs = ["plan_bundle_test.cc"],
-    linkopts = [
-        "-lnvinfer",
-        "-L/usr/local/cuda/lib64/stubs",
-        "-lnvidia-ml",
-        "-lnvonnxparser_runtime"
-    ],
-    data = [
-        ":tensorrt_testdata"
-    ],
-    deps = [
-        ":plan_bundle",
-        "//src/core:constants",
-        "//src/test:model_config_test_base",
-        "//src/test:testmain",
-        "@local_config_cuda//cuda:cudart",
-    ],
-    tags = ["exclusive"],
-)
diff --git a/src/servables/tensorrt/autofill.cc b/src/servables/tensorrt/autofill.cc
deleted file mode 100644
index 1d302b9b48..0000000000
--- a/src/servables/tensorrt/autofill.cc
+++ /dev/null
@@ -1,154 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "src/servables/tensorrt/autofill.h"
-
-#include <NvInfer.h>
-#include "src/core/constants.h"
-#include "src/core/logging.h"
-#include "src/core/model_config.h"
-#include "src/servables/tensorrt/loader.h"
-#include "src/servables/tensorrt/plan_utils.h"
-#include "tensorflow/core/lib/io/path.h"
-#include "tensorflow/core/platform/env.h"
-
-namespace nvidia { namespace inferenceserver {
-
-tensorflow::Status
-AutoFillPlan::Create(
-  const std::string& model_name, const std::string& model_path,
-  std::unique_ptr<AutoFillPlan>* autofill)
-{
-  std::set<std::string> version_dirs;
-  TF_RETURN_IF_ERROR(GetSubdirs(model_path, &version_dirs));
-
-  // There must be at least one version directory that we can inspect
-  // to attempt to determine the platform. For now we only handle the
-  // case where there is one version directory.
-  if (version_dirs.size() != 1) {
-    return tensorflow::errors::Internal(
-      "unable to autofill for '", model_name, "' due to multiple versions");
-  }
-
-  const auto version_path =
-    tensorflow::io::JoinPath(model_path, *(version_dirs.begin()));
-
-  // There must be a single plan file within the version directory...
-  std::set<std::string> plan_files;
-  TF_RETURN_IF_ERROR(GetFiles(version_path, &plan_files));
-  if (plan_files.size() != 1) {
-    return tensorflow::errors::Internal(
-      "unable to autofill for '", model_name, "', unable to find plan file");
-  }
-
-  const std::string plan_file = *(plan_files.begin());
-  const auto plan_path = tensorflow::io::JoinPath(version_path, plan_file);
-
-  tensorflow::string plan_data_str;
-  TF_RETURN_IF_ERROR(tensorflow::ReadFileToString(
-    tensorflow::Env::Default(), plan_path, &plan_data_str));
-  std::vector<char> plan_data(plan_data_str.begin(), plan_data_str.end());
-
-  nvinfer1::IRuntime* runtime = nullptr;
-  nvinfer1::ICudaEngine* engine = nullptr;
-  if (!LoadPlan(plan_data, &runtime, &engine).ok()) {
-    if (engine != nullptr) {
-      engine->destroy();
-    }
-    if (runtime != nullptr) {
-      runtime->destroy();
-    }
-    return tensorflow::errors::Internal(
-      "unable to autofill for '", model_name,
-      "', unable to create TensorRT runtime and engine");
-  }
-
-  const int32_t max_batch_size = engine->getMaxBatchSize();
-
-  // Inputs and outputs.
-  ModelConfig config;
-  for (int i = 0; i < engine->getNbBindings(); ++i) {
-    if (engine->bindingIsInput(i)) {
-      ModelInput* config_input = config.add_input();
-      config_input->set_name(engine->getBindingName(i));
-      config_input->set_data_type(
-        ConvertDatatype(engine->getBindingDataType(i)));
-      nvinfer1::Dims dims = engine->getBindingDimensions(i);
-      for (int didx = 0; didx < dims.nbDims; ++didx) {
-        config_input->mutable_dims()->Add(dims.d[didx]);
-      }
-    } else {
-      ModelOutput* config_output = config.add_output();
-      config_output->set_name(engine->getBindingName(i));
-      config_output->set_data_type(
-        ConvertDatatype(engine->getBindingDataType(i)));
-      nvinfer1::Dims dims = engine->getBindingDimensions(i);
-      for (int didx = 0; didx < dims.nbDims; ++didx) {
-        config_output->mutable_dims()->Add(dims.d[didx]);
-      }
-    }
-  }
-
-  engine->destroy();
-  runtime->destroy();
-
-  autofill->reset(
-    new AutoFillPlan(model_name, plan_file, max_batch_size, config));
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-AutoFillPlan::Fix(ModelConfig* config)
-{
-  config->set_platform(kTensorRTPlanPlatform);
-
-  // Set name if not already set.
-  if (config->name().empty()) {
-    config->set_name(model_name_);
-  }
-
-  if (config->default_model_filename().empty()) {
-    config->set_default_model_filename(plan_filename_);
-  }
-
-  if (config->max_batch_size() == 0) {
-    config->set_max_batch_size(max_batch_size_);
-  }
-
-  // Inputs
-  if (config->input().size() == 0) {
-    config->mutable_input()->CopyFrom(config_.input());
-  }
-
-  // Outputs
-  if (config->output().size() == 0) {
-    config->mutable_output()->CopyFrom(config_.output());
-  }
-
-  return tensorflow::Status::OK();
-}
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/servables/tensorrt/autofill.h b/src/servables/tensorrt/autofill.h
deleted file mode 100644
index 373750b42c..0000000000
--- a/src/servables/tensorrt/autofill.h
+++ /dev/null
@@ -1,56 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <string>
-#include "src/core/autofill.h"
-#include "src/core/model_config.pb.h"
-#include "tensorflow/core/lib/core/errors.h"
-
-namespace nvidia { namespace inferenceserver {
-
-class AutoFillPlan : public AutoFill {
- public:
-  static tensorflow::Status Create(
-    const std::string& model_name, const std::string& model_path,
-    std::unique_ptr<AutoFillPlan>* autofill);
-  tensorflow::Status Fix(ModelConfig* config) override;
-
- private:
-  AutoFillPlan(
-    const std::string& model_name, const std::string& plan_filename,
-    const int32_t max_batch_size, const ModelConfig& config)
-      : AutoFill(model_name), plan_filename_(plan_filename),
-        max_batch_size_(max_batch_size), config_(config)
-  {
-  }
-
-  const std::string plan_filename_;
-  const int32_t max_batch_size_;
-  const ModelConfig config_;
-};
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/servables/tensorrt/loader.cc b/src/servables/tensorrt/loader.cc
deleted file mode 100644
index 8d996fcaae..0000000000
--- a/src/servables/tensorrt/loader.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "src/servables/tensorrt/loader.h"
-
-#include <NvOnnxParserRuntime.h>
-#include "src/servables/tensorrt/logging.h"
-
-namespace nvidia { namespace inferenceserver {
-
-tensorflow::Status
-LoadPlan(
-  const std::vector<char>& model_data, nvinfer1::IRuntime** runtime,
-  nvinfer1::ICudaEngine** engine)
-{
-  *engine = nullptr;
-  *runtime = nullptr;
-
-  // Create plugin factory to provide onnx plugins. This should be
-  // generalized based on what the model requires [DLIS-54]
-  nvonnxparser::IPluginFactory* onnx_plugin_factory =
-    nvonnxparser::createPluginFactory(tensorrt_logger);
-
-  *runtime = nvinfer1::createInferRuntime(tensorrt_logger);
-  if (*runtime == nullptr) {
-    return tensorflow::errors::Internal("unable to create TensorRT runtime");
-  }
-
-  *engine = (*runtime)->deserializeCudaEngine(
-    &model_data[0], model_data.size(), onnx_plugin_factory);
-  if (*engine == nullptr) {
-    return tensorflow::errors::Internal("unable to create TensorRT engine");
-  }
-
-  return tensorflow::Status::OK();
-}
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/servables/tensorrt/loader.h b/src/servables/tensorrt/loader.h
deleted file mode 100644
index 3be4d41218..0000000000
--- a/src/servables/tensorrt/loader.h
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <NvInfer.h>
-#include "tensorflow/core/lib/core/errors.h"
-
-namespace nvidia { namespace inferenceserver {
-
-/// Load a TensorRT plan from a binary blob and return the
-/// corresponding runtime and engine. It is the caller's
-/// responsibility to destroy any returned runtime or engine object
-/// even if an error is returned.
-///
-/// \param model_data The binary blob of the plan data
-/// \param runtime Returns the IRuntime object, or nullptr if failed
-/// to create
-/// \param engine Returns the ICudaEngine object, or nullptr if failed
-/// to create
-/// \return Error status.
-tensorflow::Status LoadPlan(
-  const std::vector<char>& model_data, nvinfer1::IRuntime** runtime,
-  nvinfer1::ICudaEngine** engine);
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/servables/tensorrt/logging.cc b/src/servables/tensorrt/logging.cc
deleted file mode 100644
index b6a0a5a57d..0000000000
--- a/src/servables/tensorrt/logging.cc
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "src/servables/tensorrt/logging.h"
-
-#include "src/core/logging.h"
-
-namespace nvidia { namespace inferenceserver {
-
-TensorRTLogger tensorrt_logger;
-
-void
-TensorRTLogger::log(Severity severity, const char* msg)
-{
-  switch (severity) {
-    case Severity::kINTERNAL_ERROR:
-      LOG_ERROR << msg;
-      break;
-    case Severity::kERROR:
-      LOG_ERROR << msg;
-      break;
-    case Severity::kWARNING:
-      LOG_WARNING << msg;
-      break;
-    case Severity::kINFO:
-      LOG_INFO << msg;
-      break;
-  }
-}
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/servables/tensorrt/logging.h b/src/servables/tensorrt/logging.h
deleted file mode 100644
index c8808c49d7..0000000000
--- a/src/servables/tensorrt/logging.h
+++ /dev/null
@@ -1,39 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <NvInfer.h>
-
-namespace nvidia { namespace inferenceserver {
-
-// Logger for TensorRT API
-class TensorRTLogger : public nvinfer1::ILogger {
-  void log(Severity severity, const char* msg) override;
-};
-
-extern TensorRTLogger tensorrt_logger;
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/servables/tensorrt/plan_bundle.cc b/src/servables/tensorrt/plan_bundle.cc
deleted file mode 100644
index c4f50cca50..0000000000
--- a/src/servables/tensorrt/plan_bundle.cc
+++ /dev/null
@@ -1,707 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "src/servables/tensorrt/plan_bundle.h"
-
-#include <NvInfer.h>
-#include <cuda_runtime_api.h>
-#include <stdint.h>
-#include "src/core/constants.h"
-#include "src/core/logging.h"
-#include "src/core/server_status.h"
-#include "src/core/utils.h"
-#include "src/servables/tensorrt/loader.h"
-#include "src/servables/tensorrt/plan_utils.h"
-#include "tensorflow/c/c_api.h"
-#include "tensorflow/core/lib/io/path.h"
-
-namespace nvidia { namespace inferenceserver {
-
-PlanBundle::Context::Context(
-  const std::string& name, const int gpu_device, const int max_batch_size)
-    : name_(name), gpu_device_(gpu_device), max_batch_size_(max_batch_size),
-      runtime_(nullptr), engine_(nullptr), context_(nullptr), num_inputs_(0),
-      byte_sizes_(nullptr), buffers_(nullptr), stream_(nullptr)
-{
-}
-
-PlanBundle::Context::Context(Context&& o)
-    : name_(std::move(o.name_)), gpu_device_(o.gpu_device_),
-      max_batch_size_(o.max_batch_size_), runtime_(o.runtime_),
-      engine_(o.engine_), context_(o.context_), num_inputs_(o.num_inputs_),
-      byte_sizes_(o.byte_sizes_), buffers_(o.buffers_), stream_(o.stream_)
-{
-  o.runtime_ = nullptr;
-  o.engine_ = nullptr;
-  o.context_ = nullptr;
-  o.num_inputs_ = 0;
-  o.byte_sizes_ = nullptr;
-  o.buffers_ = nullptr;
-  o.stream_ = nullptr;
-}
-
-PlanBundle::Context::~Context()
-{
-  LOG_VERBOSE(1) << "~PlanBundle::Context ";
-
-  if (byte_sizes_ != nullptr) {
-    delete[] byte_sizes_;
-    byte_sizes_ = nullptr;
-  }
-  if (buffers_ != nullptr) {
-    for (int i = 0; i < engine_->getNbBindings(); ++i) {
-      if (buffers_[i] != nullptr) {
-        cudaError_t err = cudaFree(buffers_[i]);
-        if (err != cudaSuccess) {
-          LOG_ERROR << "Failed to free cuda memory for '" << name_
-                    << "': " << cudaGetErrorString(err);
-        }
-      }
-    }
-
-    delete[] buffers_;
-    buffers_ = nullptr;
-  }
-
-  if (stream_ != nullptr) {
-    cudaError_t err = cudaStreamDestroy(stream_);
-    if (err != cudaSuccess) {
-      LOG_ERROR << "Failed to destroy cuda stream: " << cudaGetErrorString(err);
-    }
-    stream_ = nullptr;
-  }
-
-  if (context_ != nullptr) {
-    context_->destroy();
-    context_ = nullptr;
-  }
-  if (engine_ != nullptr) {
-    engine_->destroy();
-    engine_ = nullptr;
-  }
-  if (runtime_ != nullptr) {
-    runtime_->destroy();
-    runtime_ = nullptr;
-  }
-}
-
-tensorflow::Status
-PlanBundle::Init(const tensorflow::StringPiece& path, const ModelConfig& config)
-{
-  TF_RETURN_IF_ERROR(ValidateModelConfig(config, kTensorRTPlanPlatform));
-  TF_RETURN_IF_ERROR(SetModelConfig(path, config));
-
-  // Initialize the datatype map and label provider for each output
-  const auto model_dir = tensorflow::io::Dirname(path);
-  for (const auto& io : config.output()) {
-    output_dtype_map_.insert(std::make_pair(io.name(), io.data_type()));
-
-    if (!io.label_filename().empty()) {
-      const auto label_path =
-        tensorflow::io::JoinPath(model_dir, io.label_filename());
-      TF_RETURN_IF_ERROR(label_provider_.AddLabels(io.name(), label_path));
-    }
-  }
-
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-PlanBundle::CreateExecutionContexts(
-  const std::unordered_map<std::string, std::vector<char>>& models)
-{
-  // TensorRT engine creation is not thread-safe, so multiple creations
-  // are serialized with a global lock.
-  static std::mutex global_context_mu;
-  std::lock_guard<std::mutex> glock(global_context_mu);
-
-  uint32_t total_context_cnt = 0;
-
-  // Create a runtime/engine/context trifecta for each instance.
-  //
-  // TODO [DLIS-14] This can be optimized by sharing a runtime (across
-  // all instances?), and sharing an engine across instances that have
-  // access to the same GPU.
-  for (const auto& group : Config().instance_group()) {
-    // TensorRT requires that every context have a GPU.
-    if (
-      (group.kind() != ModelInstanceGroup::KIND_GPU) ||
-      (group.gpus().size() == 0)) {
-      return tensorflow::errors::InvalidArgument(
-        "instance group ", group.name(), " of model ", Name(),
-        " must be KIND_GPU and must specify at least on GPU id");
-    }
-
-    for (int c = 0; c < group.count(); c++) {
-      for (int gpu_device : group.gpus()) {
-        const std::string instance_name = group.name() + "_" +
-                                          std::to_string(c) + "_gpu" +
-                                          std::to_string(gpu_device);
-        TF_RETURN_IF_ERROR(
-          CreateExecutionContext(instance_name, gpu_device, models));
-        total_context_cnt++;
-      }
-    }
-  }
-
-  // Create one runner for each context available for this model. Each
-  // runner is exclusively tied to the context.
-  TF_RETURN_IF_ERROR(SetRunnerCount(total_context_cnt));
-
-  LOG_VERBOSE(1) << "plan bundle for " << Name() << std::endl << *this;
-
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-PlanBundle::CreateExecutionContext(
-  const std::string& instance_name, const int gpu_device,
-  const std::unordered_map<std::string, std::vector<char>>& models)
-{
-  cudaError_t cuerr;
-
-  // Determine the model file to use for device compute capability
-  cudaDeviceProp cuprops;
-  cuerr = cudaGetDeviceProperties(&cuprops, gpu_device);
-  if (cuerr != cudaSuccess) {
-    return tensorflow::errors::Internal(
-      "unable to get CUDA device properties for ", Name(), ": ",
-      cudaGetErrorString(cuerr));
-  }
-
-  const std::string cc =
-    std::to_string(cuprops.major) + "." + std::to_string(cuprops.minor);
-  const auto& cc_itr = Config().cc_model_filenames().find(cc);
-  const std::string& cc_model_filename =
-    (cc_itr == Config().cc_model_filenames().end())
-      ? Config().default_model_filename()
-      : cc_itr->second;
-
-  const auto& mn_itr = models.find(cc_model_filename);
-  if (mn_itr == models.end()) {
-    return tensorflow::errors::Internal(
-      "unable to find PLAN model '", cc_model_filename, "' for ", Name());
-  }
-
-  LOG_INFO << "Creating instance " << instance_name << " on GPU " << gpu_device
-           << " (" << cc << ") using " << cc_model_filename;
-
-  // Max batch size. A value of 0 in the config becomes NO_BATCHING.
-  const int mbs = (Config().max_batch_size() <= 0) ? Context::NO_BATCHING
-                                                   : Config().max_batch_size();
-
-  contexts_.emplace_back(instance_name, gpu_device, mbs);
-  Context& context = contexts_.back();
-
-  // Set the device before generating engine and context.
-  cuerr = cudaSetDevice(gpu_device);
-  if (cuerr != cudaSuccess) {
-    return tensorflow::errors::Internal(
-      "unable to set device for ", Name(), ": ", cudaGetErrorString(cuerr));
-  }
-
-  TF_RETURN_IF_ERROR(
-    LoadPlan(mn_itr->second, &context.runtime_, &context.engine_));
-
-  if (context.max_batch_size_ > context.engine_->getMaxBatchSize()) {
-    return tensorflow::errors::InvalidArgument(
-      "unexpected configuration maximum batch size ", Config().max_batch_size(),
-      " for '", Name(), "', model maximum is ",
-      context.engine_->getMaxBatchSize());
-  }
-
-  // Initialize the inputs and outputs. Make sure the model matches
-  // what is in the configuration. Allocate memory for the maximum
-  // possible batch size: min(engine maximum, config maximum)
-  const int num_expected_bindings = context.engine_->getNbBindings();
-  context.byte_sizes_ = new uint64_t[num_expected_bindings];
-  context.buffers_ = new void*[num_expected_bindings]();  // init to nullptr
-
-  TF_RETURN_IF_ERROR(context.InitializeInputBindings(Config().input()));
-  TF_RETURN_IF_ERROR(context.InitializeOutputBindings(Config().output()));
-
-  // Make sure every index is initialized.
-  for (int i = 0; i < num_expected_bindings; ++i) {
-    if (context.buffers_[i] == nullptr) {
-      return tensorflow::errors::InvalidArgument(
-        "expected configuration for ",
-        (context.engine_->bindingIsInput(i) ? "input" : "output"), " '",
-        context.engine_->getBindingName(i), "' for ", Name());
-    }
-  }
-
-  // Now the TRT execution context
-  context.context_ = context.engine_->createExecutionContext();
-  if (context.context_ == nullptr) {
-    return tensorflow::errors::Internal("unable to create TensorRT context");
-  }
-
-  // Create CUDA stream associated with the execution context
-  int cuda_stream_priority = 0;
-  TF_RETURN_IF_ERROR(
-    GetCudaPriority(Config().optimization().priority(), &cuda_stream_priority));
-  cuerr = cudaStreamCreateWithPriority(
-    &context.stream_, cudaStreamDefault, cuda_stream_priority);
-  if (cuerr != cudaSuccess) {
-    return tensorflow::errors::Internal(
-      "unable to create stream for ", Name(), ": ", cudaGetErrorString(cuerr));
-  }
-
-  LOG_INFO << "Created instance " << instance_name << " on GPU " << gpu_device
-           << " (" << cc << ") with stream priority " << cuda_stream_priority;
-
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-PlanBundle::Context::InitializeInputBindings(
-  const ::google::protobuf::RepeatedPtrField<ModelInput>& ios)
-{
-  for (const auto& io : ios) {
-    TF_RETURN_IF_ERROR(ValidateModelInput(io));
-
-    int index = engine_->getBindingIndex(io.name().c_str());
-    if (index < 0) {
-      return tensorflow::errors::NotFound(
-        "input '", io.name(), "' not found for ", name_);
-    }
-
-    if (buffers_[index] != nullptr) {
-      return tensorflow::errors::InvalidArgument(
-        "input '", io.name(), "' has already appeared as an ",
-        "input or output for ", name_);
-    }
-
-    if (!engine_->bindingIsInput(index)) {
-      return tensorflow::errors::InvalidArgument(
-        "input '", io.name(), "' is expected to be an output in model for ",
-        name_);
-    }
-
-    DataType dt = ConvertDatatype(engine_->getBindingDataType(index));
-    if (dt != io.data_type()) {
-      return tensorflow::errors::InvalidArgument(
-        "input '", io.name(), "' datatype is ", DataType_Name(io.data_type()),
-        ", model specifies ", DataType_Name(dt), " for ", name_);
-    }
-
-    nvinfer1::Dims dims = engine_->getBindingDimensions(index);
-    if (!CompareDims(dims, io.dims())) {
-      return tensorflow::errors::InvalidArgument(
-        "input '", io.name(), "' dims ", DimsDebugString(dims),
-        " don't match configuration dims ", DimsDebugString(io.dims()), " for ",
-        name_);
-    }
-
-    const uint64_t byte_size = GetSize(max_batch_size_, dt, io.dims());
-    if (byte_size == 0) {
-      return tensorflow::errors::Internal(
-        "unable to calculate size for input '", io.name(), " for ", name_);
-    }
-
-    // Allocate CUDA memory
-    void* buffer;
-    cudaError_t err = cudaMalloc(&buffer, byte_size);
-    if (err != cudaSuccess) {
-      return tensorflow::errors::Internal(
-        "unable to allocate memory for input '", io.name(), " for ", name_,
-        ": ", cudaGetErrorString(err));
-    }
-
-    byte_sizes_[index] = byte_size;
-    buffers_[index] = buffer;
-    num_inputs_++;
-  }
-
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-PlanBundle::Context::InitializeOutputBindings(
-  const ::google::protobuf::RepeatedPtrField<ModelOutput>& ios)
-{
-  for (const auto& io : ios) {
-    TF_RETURN_IF_ERROR(ValidateModelOutput(io));
-
-    int index = engine_->getBindingIndex(io.name().c_str());
-    if (index < 0) {
-      return tensorflow::errors::NotFound(
-        "output '", io.name(), "' not found for ", name_);
-    }
-
-    if (buffers_[index] != nullptr) {
-      return tensorflow::errors::InvalidArgument(
-        "output '", io.name(), "' has already appeared as an ",
-        "input or output for ", name_);
-    }
-
-    if (engine_->bindingIsInput(index)) {
-      return tensorflow::errors::InvalidArgument(
-        "output '", io.name(), "' is expected to be an input in model for ",
-        name_);
-    }
-
-    DataType dt = ConvertDatatype(engine_->getBindingDataType(index));
-    if (dt != io.data_type()) {
-      return tensorflow::errors::InvalidArgument(
-        "output '", io.name(), "' datatype is ", DataType_Name(io.data_type()),
-        ", model specifies ", DataType_Name(dt), " for ", name_);
-    }
-
-    nvinfer1::Dims dims = engine_->getBindingDimensions(index);
-    if (!CompareDims(dims, io.dims())) {
-      return tensorflow::errors::InvalidArgument(
-        "output '", io.name(), "' dims ", DimsDebugString(dims),
-        " don't match configuration dims ", DimsDebugString(io.dims()), " for ",
-        name_);
-    }
-
-    const uint64_t byte_size = GetSize(max_batch_size_, dt, io.dims());
-    if (byte_size == 0) {
-      return tensorflow::errors::Internal(
-        "unable to calculate size for output '", io.name(), " for ", name_);
-    }
-
-    // Allocate CUDA memory
-    void* buffer;
-    cudaError_t err = cudaMalloc(&buffer, byte_size);
-    if (err != cudaSuccess) {
-      return tensorflow::errors::Internal(
-        "unable to allocate memory for input '", io.name(), " for ", name_,
-        ": ", cudaGetErrorString(err));
-    }
-
-    byte_sizes_[index] = byte_size;
-    buffers_[index] = buffer;
-  }
-
-  return tensorflow::Status::OK();
-}
-
-tensorflow::Status
-PlanBundle::GetOutputDataType(const std::string& name, DataType* dtype) const
-{
-  const auto itr = output_dtype_map_.find(name);
-  if (itr == output_dtype_map_.end()) {
-    return tensorflow::errors::Internal(
-      "unable to find datatype for output '", name, "'");
-  }
-
-  *dtype = itr->second;
-  return tensorflow::Status::OK();
-}
-
-void
-PlanBundle::Run(
-  uint32_t runner_idx, std::vector<RunnerPayload>* payloads,
-  std::function<void(tensorflow::Status)> OnCompleteQueuedPayloads)
-{
-  // Each runner executes using the corresponding context...
-  if (runner_idx >= contexts_.size()) {
-    OnCompleteQueuedPayloads(tensorflow::errors::Internal(
-      "unexpected runner index", runner_idx, ", max allowed ",
-      contexts_.size()));
-    return;
-  }
-
-  std::vector<ModelInferStats::ScopedTimer> compute_timers;
-  for (auto& payload : *payloads) {
-    compute_timers.emplace_back();
-    payload.stats_->StartComputeTimer(&compute_timers.back());
-    payload.stats_->SetGPUDevice(contexts_[runner_idx].gpu_device_);
-  }
-
-  OnCompleteQueuedPayloads(contexts_[runner_idx].Run(payloads));
-}
-
-tensorflow::Status
-PlanBundle::Context::Run(std::vector<RunnerPayload>* payloads)
-{
-  LOG_VERBOSE(1) << "Running " << name_ << " with " << payloads->size()
-                 << " request payloads";
-
-  cudaSetDevice(gpu_device_);
-
-  // For each request in 'payloads' make sure the inputs are correct
-  // and collect up the total batch size for this inference execution.
-  size_t total_batch_size = 0;
-  for (auto& payload : *payloads) {
-    const InferRequestHeader& request_header =
-      payload.request_provider_->RequestHeader();
-
-    if ((size_t)request_header.input().size() != num_inputs_) {
-      payload.status_ = tensorflow::errors::InvalidArgument(
-        "expected ", num_inputs_, " inputs but got ",
-        request_header.input().size());
-      continue;
-    }
-
-    // For models that don't support batching (i.e. max_batch_size_ ==
-    // 0) the request batch-size will still be 1.
-    const size_t batch_size = request_header.batch_size();
-    if ((batch_size != 1) && ((int)batch_size > max_batch_size_)) {
-      payload.status_ = tensorflow::errors::InvalidArgument(
-        "unexpected batch size ", batch_size, " for '", name_,
-        "', max allowed is ", max_batch_size_);
-      continue;
-    }
-
-    // Validate that all inputs are expected and of the correct size.
-    for (const auto& input : request_header.input()) {
-      const std::string& name = input.name();
-      const int bindex = engine_->getBindingIndex(name.c_str());
-      if ((bindex < 0) || !engine_->bindingIsInput(bindex)) {
-        payload.status_ = tensorflow::errors::InvalidArgument(
-          "unexpected inference input '", name, "'");
-        break;
-      }
-
-      const size_t expected_byte_size =
-        (byte_sizes_[bindex] / std::max(1, max_batch_size_));
-      if (input.byte_size() != expected_byte_size) {
-        payload.status_ = tensorflow::errors::InvalidArgument(
-          "unexpected size ", input.byte_size(), " for inference input '", name,
-          "', expecting ", expected_byte_size);
-        break;
-      }
-    }
-
-    if (!payload.status_.ok()) {
-      continue;
-    }
-
-    total_batch_size += batch_size;
-  }
-
-  // If there are no valid payloads then no need to run the
-  // inference. The payloads will have their error status set so can
-  // just return.
-  if (total_batch_size == 0) {
-    return tensorflow::Status::OK();
-  }
-
-  // total_batch_size can be 1 for models that don't support batching
-  // (i.e. max_batch_size_ == 0).
-  if ((total_batch_size != 1) && (total_batch_size > (size_t)max_batch_size_)) {
-    return tensorflow::errors::Internal(
-      "dynamic batch size ", total_batch_size, " for '", name_,
-      "', max allowed is ", max_batch_size_);
-  }
-
-  // For each input, concatenate input values from each payload into
-  // the corresponding binding.
-  for (int bindex = 0; bindex < engine_->getNbBindings(); ++bindex) {
-    if (!engine_->bindingIsInput(bindex)) {
-      continue;
-    }
-
-    const std::string& name = engine_->getBindingName(bindex);
-    const size_t batch1_byte_size =
-      byte_sizes_[bindex] / std::max(1, max_batch_size_);
-    size_t binding_copy_offset = 0;
-
-    // Visit the payloads in order and copy the input tensors to
-    // GPU. Skip payloads that had errors since they are not included in
-    // the dynamic batch.
-    for (auto& payload : *payloads) {
-      if (!payload.status_.ok()) {
-        continue;
-      }
-
-      const InferRequestHeader& request_header =
-        payload.request_provider_->RequestHeader();
-      const size_t expected_byte_size =
-        request_header.batch_size() * batch1_byte_size;
-
-      int input_idx = 0;
-      for (const auto& input : request_header.input()) {
-        if (input.name() == name) {
-          size_t copied_byte_size = 0;
-          while (payload.compute_status_.ok()) {
-            const void* content;
-            size_t content_byte_size;
-            payload.compute_status_ =
-              payload.request_provider_->GetNextInputContent(
-                input_idx, &content, &content_byte_size, false);
-            if (!payload.compute_status_.ok()) {
-              break;
-            }
-
-            // No more input content available then done with copying...
-            if (content == nullptr) {
-              break;
-            }
-
-            if (
-              (binding_copy_offset + copied_byte_size + content_byte_size) >
-              byte_sizes_[bindex]) {
-              payload.compute_status_ = tensorflow::errors::InvalidArgument(
-                "unexpected size ",
-                binding_copy_offset + copied_byte_size + content_byte_size,
-                " for inference input '", name, "', expecting ",
-                byte_sizes_[bindex]);
-              break;
-            }
-
-            cudaError_t err = cudaMemcpyAsync(
-              static_cast<char*>(buffers_[bindex]) + binding_copy_offset +
-                copied_byte_size,
-              content, content_byte_size, cudaMemcpyHostToDevice, stream_);
-            if (err != cudaSuccess) {
-              payload.compute_status_ = tensorflow::errors::Internal(
-                "failed to copy input values to GPU for input '", name,
-                "': ", cudaGetErrorString(err));
-              break;
-            }
-
-            copied_byte_size += content_byte_size;
-          }
-
-          if (
-            payload.compute_status_.ok() &&
-            (copied_byte_size != expected_byte_size)) {
-            payload.compute_status_ = tensorflow::errors::Internal(
-              "expected ", expected_byte_size, " of data for inference input '",
-              name, "', got ", copied_byte_size);
-          }
-
-          break;
-        }
-
-        input_idx++;
-      }
-
-      binding_copy_offset += expected_byte_size;
-    }
-  }
-
-  // Async execute the inference.
-  if (!context_->enqueue(total_batch_size, buffers_, stream_, nullptr)) {
-    cudaStreamSynchronize(stream_);
-    return tensorflow::errors::Internal(
-      "unable to enqueue for inference ", name_);
-  }
-
-  // For each requested output verify that the output can accept the
-  // actual model output and then copy that output from the GPU
-  for (int bindex = 0; bindex < engine_->getNbBindings(); ++bindex) {
-    if (engine_->bindingIsInput(bindex)) {
-      continue;
-    }
-
-    const std::string& name = engine_->getBindingName(bindex);
-    const size_t batch1_byte_size =
-      (byte_sizes_[bindex] / std::max(1, max_batch_size_));
-    size_t binding_copy_offset = 0;
-
-    for (auto& payload : *payloads) {
-      if (!payload.status_.ok()) {
-        continue;
-      }
-
-      // If 'payload' requested this output then copy it from the
-      // GPU. If it did not request this output then just skip it in
-      // the output buffer.
-      const InferRequestHeader& request_header =
-        payload.request_provider_->RequestHeader();
-      const size_t expected_byte_size =
-        request_header.batch_size() * batch1_byte_size;
-
-      int output_idx = 0;
-      for (const auto& output : request_header.output()) {
-        if (output.name() == name) {
-          void* content;
-          tensorflow::Status status =
-            payload.response_provider_->GetOutputBuffer(
-              output_idx, &content, expected_byte_size);
-          if (!status.ok()) {
-            payload.compute_status_ = status;
-          } else if (content == nullptr) {
-            payload.compute_status_ = tensorflow::errors::Internal(
-              "no buffer to accept output values for output '", name, "'");
-          } else {
-            if (
-              (binding_copy_offset + expected_byte_size) >
-              byte_sizes_[bindex]) {
-              payload.compute_status_ = tensorflow::errors::InvalidArgument(
-                "unexpected size ", binding_copy_offset + expected_byte_size,
-                " for inference output '", name, "', expecting maximum",
-                byte_sizes_[bindex]);
-            } else {
-              cudaError_t err = cudaMemcpyAsync(
-                content,
-                static_cast<char*>(buffers_[bindex]) + binding_copy_offset,
-                expected_byte_size, cudaMemcpyDeviceToHost, stream_);
-              if (err != cudaSuccess) {
-                payload.compute_status_ = tensorflow::errors::Internal(
-                  "failed to copy output values from GPU for output '", name,
-                  "': ", cudaGetErrorString(err));
-              }
-            }
-          }
-
-          break;
-        }
-
-        output_idx++;
-      }
-
-      binding_copy_offset += expected_byte_size;
-    }
-  }
-
-  // Wait for the copy-out to complete
-  cudaStreamSynchronize(stream_);
-  return tensorflow::Status::OK();
-}
-
-std::ostream&
-operator<<(std::ostream& out, const PlanBundle& pb)
-{
-  out << "name=" << pb.Name() << std::endl;
-  out << "contexts:" << std::endl;
-  for (const auto& context : pb.contexts_) {
-    out << "  name=" << context.name_ << ", gpu="
-        << ((context.gpu_device_ == PlanBundle::Context::NO_GPU_DEVICE)
-              ? "<none>"
-              : std::to_string(context.gpu_device_))
-        << ", max_batch_size="
-        << ((context.max_batch_size_ == PlanBundle::Context::NO_BATCHING)
-              ? "<none>"
-              : std::to_string(context.max_batch_size_))
-        << std::endl
-        << "  bindings:" << std::endl;
-
-    for (int i = 0; i < context.engine_->getNbBindings(); ++i) {
-      out << "    " << i << ": byte_size=" << context.byte_sizes_[i]
-          << ", buffer=" << context.buffers_[i] << " ]" << std::endl;
-    }
-  }
-
-  return out;
-}
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/servables/tensorrt/plan_bundle.h b/src/servables/tensorrt/plan_bundle.h
deleted file mode 100644
index bbaa610ffc..0000000000
--- a/src/servables/tensorrt/plan_bundle.h
+++ /dev/null
@@ -1,139 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <NvInfer.h>
-#include <mutex>
-#include "src/core/infer.h"
-#include "src/core/label_provider.h"
-#include "src/core/model_config.pb.h"
-#include "tensorflow/core/lib/core/errors.h"
-
-namespace nvidia { namespace inferenceserver {
-
-class PlanBundle : public InferenceServable {
- public:
-  PlanBundle() = default;
-  PlanBundle(PlanBundle&&) = default;
-
-  tensorflow::Status Init(
-    const tensorflow::StringPiece& path, const ModelConfig& config);
-
-  // Create a context for execution for each instance for the
-  // serialized plans specified in 'models'.
-  tensorflow::Status CreateExecutionContexts(
-    const std::unordered_map<std::string, std::vector<char>>& models);
-  tensorflow::Status CreateExecutionContext(
-    const std::string& instance_name, const int gpu_device,
-    const std::unordered_map<std::string, std::vector<char>>& models);
-
-  tensorflow::Status GetOutputDataType(
-    const std::string& name, DataType* dtype) const override;
-  const LabelProvider& GetLabelProvider() const override
-  {
-    return label_provider_;
-  }
-
- protected:
-  void Run(
-    uint32_t runner_idx, std::vector<RunnerPayload>* payloads,
-    std::function<void(tensorflow::Status)> OnCompleteQueuedPayloads) override;
-
- private:
-  TF_DISALLOW_COPY_AND_ASSIGN(PlanBundle);
-  friend std::ostream& operator<<(std::ostream&, const PlanBundle&);
-
-  // Label provider for this bundle.
-  LabelProvider label_provider_;
-
-  // Map from an output name to the datatype of that output.
-  std::unordered_map<std::string, DataType> output_dtype_map_;
-
-  // For each model instance there is a context.
-  struct Context {
-    // GPU device number that indicates that no gpu is available for a
-    // context (which is an invalid state since TensorRT requires a
-    // GPU).
-    static constexpr int NO_GPU_DEVICE = -1;
-
-    // Max batch size value that indicates batching is not supported.
-    static constexpr int NO_BATCHING = 0;
-
-    Context(
-      const std::string& name, const int gpu_device, const int max_batch_size);
-    Context(Context&& o);
-    ~Context();
-
-    TF_DISALLOW_COPY_AND_ASSIGN(Context);
-
-    tensorflow::Status InitializeInputBindings(
-      const ::google::protobuf::RepeatedPtrField<ModelInput>& ios);
-    tensorflow::Status InitializeOutputBindings(
-      const ::google::protobuf::RepeatedPtrField<ModelOutput>& ios);
-
-    // Run model to execute for one or more requests. This function
-    // assumes that it is only called by the single runner thread that
-    // is assigned to this context. A non-OK return status indicates
-    // an internal error that prevents any of the of requests from
-    // completing. If an error is isolate to a single request payload
-    // it will be reported in that payload.
-    tensorflow::Status Run(std::vector<RunnerPayload>* payloads);
-
-    // Name of the model instance
-    const std::string name_;
-
-    // The GPU index active when this context was created.
-    const int gpu_device_;
-
-    // Maximum batch size to allow. This is the minimum of what is
-    // supported by the model and what is requested in the
-    // configuration.
-    const int max_batch_size_;
-
-    // TensorRT components for the model
-    nvinfer1::IRuntime* runtime_;
-    nvinfer1::ICudaEngine* engine_;
-    nvinfer1::IExecutionContext* context_;
-
-    // The number of inputs required for this model.
-    size_t num_inputs_;
-
-    // For each binding index of the TensorRT engine, the size of the
-    // corresponding tensor and pointer to the CUDA buffer for the
-    // tensor. These are arrays with size equal to number of bindings.
-    uint64_t* byte_sizes_;
-    void** buffers_;
-
-    // The stream where operations are executed.
-    cudaStream_t stream_;
-  };
-
-  std::vector<Context> contexts_;
-};
-
-std::ostream& operator<<(std::ostream& out, const PlanBundle& pb);
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/servables/tensorrt/plan_bundle.proto b/src/servables/tensorrt/plan_bundle.proto
deleted file mode 100644
index 59d1d37bbd..0000000000
--- a/src/servables/tensorrt/plan_bundle.proto
+++ /dev/null
@@ -1,37 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-syntax = "proto3";
-
-package nvidia.inferenceserver;
-
-// Config proto for PlanBundleSourceAdapter.
-message PlanBundleSourceAdapterConfig
-{
-  // Autofill missing required model configuration settings based on
-  // model definition file.
-  bool autofill = 1;
-}
diff --git a/src/servables/tensorrt/plan_bundle_source_adapter.cc b/src/servables/tensorrt/plan_bundle_source_adapter.cc
deleted file mode 100644
index c332c6ac87..0000000000
--- a/src/servables/tensorrt/plan_bundle_source_adapter.cc
+++ /dev/null
@@ -1,123 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "src/servables/tensorrt/plan_bundle_source_adapter.h"
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "src/core/constants.h"
-#include "src/core/logging.h"
-#include "src/core/model_config.pb.h"
-#include "src/core/model_repository_manager.h"
-#include "src/core/utils.h"
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/platform/env.h"
-
-namespace nvidia { namespace inferenceserver {
-
-namespace {
-
-tensorflow::Status
-CreatePlanBundle(
-  const PlanBundleSourceAdapterConfig& adapter_config, const std::string& path,
-  std::unique_ptr<PlanBundle>* bundle)
-{
-  const auto model_path = tensorflow::io::Dirname(path);
-  const auto model_name = tensorflow::io::Basename(model_path);
-
-  ModelConfig model_config;
-  TF_RETURN_IF_ERROR(ModelRepositoryManager::GetModelConfig(
-    std::string(model_name), &model_config));
-
-  // Read all the plan files in 'path'. GetChildren() returns all
-  // descendants instead for cloud storage like GCS, so filter out all
-  // non-direct descendants.
-  std::vector<std::string> possible_children;
-  TF_RETURN_IF_ERROR(
-    tensorflow::Env::Default()->GetChildren(path, &possible_children));
-  std::set<std::string> children;
-  for (const auto& child : possible_children) {
-    children.insert(child.substr(0, child.find_first_of('/')));
-  }
-
-  std::unordered_map<std::string, std::vector<char>> models;
-  for (const auto& filename : children) {
-    const auto plan_path = tensorflow::io::JoinPath(path, filename);
-    tensorflow::string model_data_str;
-    TF_RETURN_IF_ERROR(tensorflow::ReadFileToString(
-      tensorflow::Env::Default(), plan_path, &model_data_str));
-    std::vector<char> model_data(model_data_str.begin(), model_data_str.end());
-    models.emplace(filename, std::move(model_data));
-  }
-
-  // Create the bundle for the model and all the execution contexts
-  // requested for this model.
-  bundle->reset(new PlanBundle);
-  tensorflow::Status status = (*bundle)->Init(path, model_config);
-  if (status.ok()) {
-    status = (*bundle)->CreateExecutionContexts(models);
-  }
-  if (!status.ok()) {
-    bundle->reset();
-  }
-
-  return status;
-}
-
-}  // namespace
-
-tensorflow::Status
-PlanBundleSourceAdapter::Create(
-  const PlanBundleSourceAdapterConfig& config,
-  std::unique_ptr<
-    SourceAdapter<tfs::StoragePath, std::unique_ptr<tfs::Loader>>>* adapter)
-{
-  LOG_VERBOSE(1) << "Create PlanBundleSourceAdaptor for config \""
-                 << config.DebugString() << "\"";
-
-  Creator creator = std::bind(
-    &CreatePlanBundle, config, std::placeholders::_1, std::placeholders::_2);
-
-  adapter->reset(new PlanBundleSourceAdapter(
-    config, creator, SimpleSourceAdapter::EstimateNoResources()));
-  return tensorflow::Status::OK();
-}
-
-PlanBundleSourceAdapter::~PlanBundleSourceAdapter()
-{
-  Detach();
-}
-
-}}  // namespace nvidia::inferenceserver
-
-namespace tensorflow { namespace serving {
-
-REGISTER_STORAGE_PATH_SOURCE_ADAPTER(
-  nvidia::inferenceserver::PlanBundleSourceAdapter,
-  nvidia::inferenceserver::PlanBundleSourceAdapterConfig);
-}}  // namespace tensorflow::serving
diff --git a/src/servables/tensorrt/plan_bundle_source_adapter.h b/src/servables/tensorrt/plan_bundle_source_adapter.h
deleted file mode 100644
index 902958299a..0000000000
--- a/src/servables/tensorrt/plan_bundle_source_adapter.h
+++ /dev/null
@@ -1,70 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "src/servables/tensorrt/plan_bundle.h"
-#include "src/servables/tensorrt/plan_bundle.pb.h"
-#include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/platform/macros.h"
-#include "tensorflow_serving/core/loader.h"
-#include "tensorflow_serving/core/simple_loader.h"
-#include "tensorflow_serving/core/source_adapter.h"
-#include "tensorflow_serving/core/storage_path.h"
-
-namespace tfs = tensorflow::serving;
-
-namespace nvidia { namespace inferenceserver {
-
-// Adapter that converts storage paths pointing to PLAN files into the
-// corresponding plan bundle.
-class PlanBundleSourceAdapter final
-    : public tfs::SimpleLoaderSourceAdapter<tfs::StoragePath, PlanBundle> {
- public:
-  static tensorflow::Status Create(
-    const PlanBundleSourceAdapterConfig& config,
-    std::unique_ptr<
-      tfs::SourceAdapter<tfs::StoragePath, std::unique_ptr<tfs::Loader>>>*
-      adapter);
-
-  ~PlanBundleSourceAdapter() override;
-
- private:
-  TF_DISALLOW_COPY_AND_ASSIGN(PlanBundleSourceAdapter);
-  using SimpleSourceAdapter =
-    tfs::SimpleLoaderSourceAdapter<tfs::StoragePath, PlanBundle>;
-
-  PlanBundleSourceAdapter(
-    const PlanBundleSourceAdapterConfig& config,
-    typename SimpleSourceAdapter::Creator creator,
-    typename SimpleSourceAdapter::ResourceEstimator resource_estimator)
-      : SimpleSourceAdapter(creator, resource_estimator), config_(config)
-  {
-  }
-
-  const PlanBundleSourceAdapterConfig config_;
-};
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/servables/tensorrt/plan_bundle_test.cc b/src/servables/tensorrt/plan_bundle_test.cc
deleted file mode 100644
index c5e9bb6c38..0000000000
--- a/src/servables/tensorrt/plan_bundle_test.cc
+++ /dev/null
@@ -1,73 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "src/servables/tensorrt/plan_bundle.h"
-#include "src/core/constants.h"
-#include "src/test/model_config_test_base.h"
-
-namespace nvidia { namespace inferenceserver { namespace test {
-
-class PlanBundleTest : public ModelConfigTestBase {
- public:
-};
-
-TEST_F(PlanBundleTest, ModelConfigSanity)
-{
-  BundleInitFunc init_func =
-    [](
-      const std::string& path,
-      const ModelConfig& config) -> tensorflow::Status {
-    std::unique_ptr<PlanBundle> bundle(new PlanBundle());
-    tensorflow::Status status = bundle->Init(path, config);
-    if (status.ok()) {
-      std::unordered_map<std::string, std::vector<char>> plan_blobs;
-
-      for (const auto& filename :
-           std::vector<std::string>{kTensorRTPlanFilename}) {
-        const auto plan_path = tensorflow::io::JoinPath(path, filename);
-        tensorflow::string blob_str;
-        tensorflow::ReadFileToString(
-          tensorflow::Env::Default(), plan_path, &blob_str);
-        std::vector<char> blob(blob_str.begin(), blob_str.end());
-        plan_blobs.emplace(filename, std::move(blob));
-      }
-
-      status = bundle->CreateExecutionContexts(plan_blobs);
-    }
-
-    return status;
-  };
-
-  // Standard testing...
-  ValidateAll(kTensorRTPlanPlatform, init_func);
-
-  // Sanity tests with autofill and not providing the platform.
-  ValidateOne(
-    "inference_server/src/servables/tensorrt/testdata/autofill_sanity",
-    true /* autofill */, std::string() /* platform */, init_func);
-}
-
-}}}  // namespace nvidia::inferenceserver::test
diff --git a/src/servables/tensorrt/plan_utils.cc b/src/servables/tensorrt/plan_utils.cc
deleted file mode 100644
index 78559f39ab..0000000000
--- a/src/servables/tensorrt/plan_utils.cc
+++ /dev/null
@@ -1,105 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "src/servables/tensorrt/plan_utils.h"
-
-namespace nvidia { namespace inferenceserver {
-
-uint64_t
-GetSize(const int max_batch_size, const DataType& dtype, const DimsList& dims)
-{
-  size_t dt_size = nvidia::inferenceserver::GetSize(dtype, dims);
-  return std::max(1, max_batch_size) * dt_size;
-}
-
-DataType
-ConvertDatatype(nvinfer1::DataType trt_type)
-{
-  switch (trt_type) {
-    case nvinfer1::DataType::kFLOAT:
-      return TYPE_FP32;
-    case nvinfer1::DataType::kHALF:
-      return TYPE_FP16;
-    case nvinfer1::DataType::kINT8:
-      return TYPE_INT8;
-    case nvinfer1::DataType::kINT32:
-      return TYPE_INT32;
-  }
-
-  return TYPE_INVALID;
-}
-
-bool
-CompareDims(const nvinfer1::Dims& model_dims, const DimsList& dims)
-{
-  if (model_dims.nbDims != dims.size()) {
-    return false;
-  }
-
-  for (int i = 0; i < model_dims.nbDims; ++i) {
-    if (model_dims.d[i] != dims[i]) {
-      return false;
-    }
-  }
-
-  return true;
-}
-
-const std::string
-DimsDebugString(const DimsList& dims)
-{
-  bool first = true;
-  std::string str;
-  str.append("[");
-  for (int i = 0; i < dims.size(); ++i) {
-    if (!first) {
-      str.append(",");
-    }
-    str.append(std::to_string(dims[i]));
-    first = false;
-  }
-  str.append("]");
-  return str;
-}
-
-const std::string
-DimsDebugString(const nvinfer1::Dims& dims)
-{
-  bool first = true;
-  std::string str;
-  str.append("[");
-  for (int i = 0; i < dims.nbDims; ++i) {
-    if (!first) {
-      str.append(",");
-    }
-    str.append(std::to_string(dims.d[i]));
-    first = false;
-  }
-  str.append("]");
-  return str;
-}
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/servables/tensorrt/plan_utils.h b/src/servables/tensorrt/plan_utils.h
deleted file mode 100644
index f22dbaa9f3..0000000000
--- a/src/servables/tensorrt/plan_utils.h
+++ /dev/null
@@ -1,45 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#pragma once
-
-#include <NvInfer.h>
-#include "src/core/model_config.h"
-
-namespace nvidia { namespace inferenceserver {
-
-uint64_t GetSize(
-  const int max_batch_size, const DataType& dtype, const DimsList& dims);
-
-DataType ConvertDatatype(nvinfer1::DataType trt_type);
-
-bool CompareDims(const nvinfer1::Dims& model_dims, const DimsList& dims);
-
-const std::string DimsDebugString(const DimsList& dims);
-
-const std::string DimsDebugString(const nvinfer1::Dims& dims);
-
-}}  // namespace nvidia::inferenceserver
diff --git a/src/servables/tensorrt/testdata/autofill_sanity/bad_input_dims/1/model.plan b/src/servables/tensorrt/testdata/autofill_sanity/bad_input_dims/1/model.plan
deleted file mode 100644
index a0f59b6cf1..0000000000
Binary files a/src/servables/tensorrt/testdata/autofill_sanity/bad_input_dims/1/model.plan and /dev/null differ
diff --git a/src/servables/tensorrt/testdata/autofill_sanity/bad_input_dims/config.pbtxt b/src/servables/tensorrt/testdata/autofill_sanity/bad_input_dims/config.pbtxt
deleted file mode 100644
index 957daf7ca0..0000000000
--- a/src/servables/tensorrt/testdata/autofill_sanity/bad_input_dims/config.pbtxt
+++ /dev/null
@@ -1,25 +0,0 @@
-max_batch_size: 8
-input [
-  {
-    name: "INPUT0"
-    data_type: TYPE_FP32
-    dims: [ 16, 1, 1 ]
-  },
-  {
-    name: "INPUT1"
-    data_type: TYPE_FP32
-    dims: [ 16, 1 ]
-  }
-]
-output [
-  {
-    name: "OUTPUT0"
-    data_type: TYPE_FP32
-    dims: [ 16, 1, 1 ]
-  },
-  {
-    name: "OUTPUT1"
-    data_type: TYPE_FP32
-    dims: [ 16, 1, 1 ]
-  }
-]
diff --git a/src/servables/tensorrt/testdata/autofill_sanity/bad_input_dims/expected b/src/servables/tensorrt/testdata/autofill_sanity/bad_input_dims/expected
deleted file mode 100644
index 58a496f811..0000000000
--- a/src/servables/tensorrt/testdata/autofill_sanity/bad_input_dims/expected
+++ /dev/null
@@ -1 +0,0 @@
-Invalid argument: input 'INPUT1' dims [16,1,1] don't match configuration dims [16,1] for bad_input_dims_0_gpu0
\ No newline at end of file
diff --git a/src/servables/tensorrt/testdata/autofill_sanity/bad_input_type/1/model.plan b/src/servables/tensorrt/testdata/autofill_sanity/bad_input_type/1/model.plan
deleted file mode 100644
index a0f59b6cf1..0000000000
Binary files a/src/servables/tensorrt/testdata/autofill_sanity/bad_input_type/1/model.plan and /dev/null differ
diff --git a/src/servables/tensorrt/testdata/autofill_sanity/bad_input_type/config.pbtxt b/src/servables/tensorrt/testdata/autofill_sanity/bad_input_type/config.pbtxt
deleted file mode 100644
index f2557b602a..0000000000
--- a/src/servables/tensorrt/testdata/autofill_sanity/bad_input_type/config.pbtxt
+++ /dev/null
@@ -1,25 +0,0 @@
-max_batch_size: 8
-input [
-  {
-    name: "INPUT0"
-    data_type: TYPE_FP16
-    dims: [ 16, 1, 1 ]
-  },
-  {
-    name: "INPUT1"
-    data_type: TYPE_FP32
-    dims: [ 16, 1, 1 ]
-  }
-]
-output [
-  {
-    name: "OUTPUT0"
-    data_type: TYPE_FP32
-    dims: [ 16, 1, 1 ]
-  },
-  {
-    name: "OUTPUT1"
-    data_type: TYPE_FP32
-    dims: [ 16, 1, 1 ]
-  }
-]
diff --git a/src/servables/tensorrt/testdata/autofill_sanity/bad_input_type/expected b/src/servables/tensorrt/testdata/autofill_sanity/bad_input_type/expected
deleted file mode 100644
index 3443f51e15..0000000000
--- a/src/servables/tensorrt/testdata/autofill_sanity/bad_input_type/expected
+++ /dev/null
@@ -1 +0,0 @@
-Invalid argument: input 'INPUT0' datatype is TYPE_FP16, model specifies TYPE_FP32 for bad_input_type_0_gpu0
\ No newline at end of file
diff --git a/src/servables/tensorrt/testdata/autofill_sanity/bad_output_dims/1/model.plan b/src/servables/tensorrt/testdata/autofill_sanity/bad_output_dims/1/model.plan
deleted file mode 100644
index a0f59b6cf1..0000000000
Binary files a/src/servables/tensorrt/testdata/autofill_sanity/bad_output_dims/1/model.plan and /dev/null differ
diff --git a/src/servables/tensorrt/testdata/autofill_sanity/bad_output_dims/config.pbtxt b/src/servables/tensorrt/testdata/autofill_sanity/bad_output_dims/config.pbtxt
deleted file mode 100644
index 8896852ca6..0000000000
--- a/src/servables/tensorrt/testdata/autofill_sanity/bad_output_dims/config.pbtxt
+++ /dev/null
@@ -1,25 +0,0 @@
-max_batch_size: 8
-input [
-  {
-    name: "INPUT0"
-    data_type: TYPE_FP32
-    dims: [ 16, 1, 1 ]
-  },
-  {
-    name: "INPUT1"
-    data_type: TYPE_FP32
-    dims: [ 16, 1, 1 ]
-  }
-]
-output [
-  {
-    name: "OUTPUT0"
-    data_type: TYPE_FP32
-    dims: [ 16, 1, 1 ]
-  },
-  {
-    name: "OUTPUT1"
-    data_type: TYPE_FP32
-    dims: [ 7 ]
-  }
-]
diff --git a/src/servables/tensorrt/testdata/autofill_sanity/bad_output_dims/expected b/src/servables/tensorrt/testdata/autofill_sanity/bad_output_dims/expected
deleted file mode 100644
index 77ed52d61d..0000000000
--- a/src/servables/tensorrt/testdata/autofill_sanity/bad_output_dims/expected
+++ /dev/null
@@ -1 +0,0 @@
-Invalid argument: output 'OUTPUT1' dims [16,1,1] don't match configuration dims [7] for bad_output_dims_0_gpu0
\ No newline at end of file
diff --git a/src/servables/tensorrt/testdata/autofill_sanity/bad_output_type/1/model.plan b/src/servables/tensorrt/testdata/autofill_sanity/bad_output_type/1/model.plan
deleted file mode 100644
index a0f59b6cf1..0000000000
Binary files a/src/servables/tensorrt/testdata/autofill_sanity/bad_output_type/1/model.plan and /dev/null differ
diff --git a/src/servables/tensorrt/testdata/autofill_sanity/bad_output_type/config.pbtxt b/src/servables/tensorrt/testdata/autofill_sanity/bad_output_type/config.pbtxt
deleted file mode 100644
index 85c116acc6..0000000000
--- a/src/servables/tensorrt/testdata/autofill_sanity/bad_output_type/config.pbtxt
+++ /dev/null
@@ -1,25 +0,0 @@
-max_batch_size: 8
-input [
-  {
-    name: "INPUT0"
-    data_type: TYPE_FP32
-    dims: [ 16, 1, 1 ]
-  },
-  {
-    name: "INPUT1"
-    data_type: TYPE_FP32
-    dims: [ 16, 1, 1 ]
-  }
-]
-output [
-  {
-    name: "OUTPUT0"
-    data_type: TYPE_FP32
-    dims: [ 16, 1, 1 ]
-  },
-  {
-    name: "OUTPUT1"
-    data_type: TYPE_INT8
-    dims: [ 16, 1, 1 ]
-  }
-]
diff --git a/src/servables/tensorrt/testdata/autofill_sanity/bad_output_type/expected b/src/servables/tensorrt/testdata/autofill_sanity/bad_output_type/expected
deleted file mode 100644
index 09d18f1153..0000000000
--- a/src/servables/tensorrt/testdata/autofill_sanity/bad_output_type/expected
+++ /dev/null
@@ -1 +0,0 @@
-Invalid argument: output 'OUTPUT1' datatype is TYPE_INT8, model specifies TYPE_FP32 for bad_output_type_0_gpu0
\ No newline at end of file
diff --git a/src/servables/tensorrt/testdata/autofill_sanity/empty_config/1/model.plan b/src/servables/tensorrt/testdata/autofill_sanity/empty_config/1/model.plan
deleted file mode 100644
index a0f59b6cf1..0000000000
Binary files a/src/servables/tensorrt/testdata/autofill_sanity/empty_config/1/model.plan and /dev/null differ
diff --git a/src/servables/tensorrt/testdata/autofill_sanity/empty_config/expected b/src/servables/tensorrt/testdata/autofill_sanity/empty_config/expected
deleted file mode 100644
index 9ea58275ba..0000000000
--- a/src/servables/tensorrt/testdata/autofill_sanity/empty_config/expected
+++ /dev/null
@@ -1,43 +0,0 @@
-name: "empty_config"
-platform: "tensorrt_plan"
-version_policy {
-  latest {
-    num_versions: 1
-  }
-}
-max_batch_size: 8
-input {
-  name: "INPUT0"
-  data_type: TYPE_FP32
-  dims: 16
-  dims: 1
-  dims: 1
-}
-input {
-  name: "INPUT1"
-  data_type: TYPE_FP32
-  dims: 16
-  dims: 1
-  dims: 1
-}
-output {
-  name: "OUTPUT0"
-  data_type: TYPE_FP32
-  dims: 16
-  dims: 1
-  dims: 1
-}
-output {
-  name: "OUTPUT1"
-  data_type: TYPE_FP32
-  dims: 16
-  dims: 1
-  dims: 1
-}
-instance_group {
-  name: "empty_config"
-  count: 1
-  gpus: 0
-  kind: KIND_GPU
-}
-default_model_filename: "model.plan"
diff --git a/src/servables/tensorrt/testdata/autofill_sanity/no_config/1/model.plan b/src/servables/tensorrt/testdata/autofill_sanity/no_config/1/model.plan
deleted file mode 100644
index a0f59b6cf1..0000000000
Binary files a/src/servables/tensorrt/testdata/autofill_sanity/no_config/1/model.plan and /dev/null differ
diff --git a/src/servables/tensorrt/testdata/autofill_sanity/no_config/expected b/src/servables/tensorrt/testdata/autofill_sanity/no_config/expected
deleted file mode 100644
index 8726835bb2..0000000000
--- a/src/servables/tensorrt/testdata/autofill_sanity/no_config/expected
+++ /dev/null
@@ -1,43 +0,0 @@
-name: "no_config"
-platform: "tensorrt_plan"
-version_policy {
-  latest {
-    num_versions: 1
-  }
-}
-max_batch_size: 8
-input {
-  name: "INPUT0"
-  data_type: TYPE_FP32
-  dims: 16
-  dims: 1
-  dims: 1
-}
-input {
-  name: "INPUT1"
-  data_type: TYPE_FP32
-  dims: 16
-  dims: 1
-  dims: 1
-}
-output {
-  name: "OUTPUT0"
-  data_type: TYPE_FP32
-  dims: 16
-  dims: 1
-  dims: 1
-}
-output {
-  name: "OUTPUT1"
-  data_type: TYPE_FP32
-  dims: 16
-  dims: 1
-  dims: 1
-}
-instance_group {
-  name: "no_config"
-  count: 1
-  gpus: 0
-  kind: KIND_GPU
-}
-default_model_filename: "model.plan"
diff --git a/src/servables/tensorrt/testdata/autofill_sanity/no_name_platform/1/model.plan b/src/servables/tensorrt/testdata/autofill_sanity/no_name_platform/1/model.plan
deleted file mode 100644
index a0f59b6cf1..0000000000
Binary files a/src/servables/tensorrt/testdata/autofill_sanity/no_name_platform/1/model.plan and /dev/null differ
diff --git a/src/servables/tensorrt/testdata/autofill_sanity/no_name_platform/config.pbtxt b/src/servables/tensorrt/testdata/autofill_sanity/no_name_platform/config.pbtxt
deleted file mode 100644
index 92824087d3..0000000000
--- a/src/servables/tensorrt/testdata/autofill_sanity/no_name_platform/config.pbtxt
+++ /dev/null
@@ -1,25 +0,0 @@
-max_batch_size: 8
-input [
-  {
-    name: "INPUT0"
-    data_type: TYPE_FP32
-    dims: [ 16, 1, 1 ]
-  },
-  {
-    name: "INPUT1"
-    data_type: TYPE_FP32
-    dims: [ 16, 1, 1 ]
-  }
-]
-output [
-  {
-    name: "OUTPUT0"
-    data_type: TYPE_FP32
-    dims: [ 16, 1, 1 ]
-  },
-  {
-    name: "OUTPUT1"
-    data_type: TYPE_FP32
-    dims: [ 16, 1, 1 ]
-  }
-]
diff --git a/src/servables/tensorrt/testdata/autofill_sanity/no_name_platform/expected b/src/servables/tensorrt/testdata/autofill_sanity/no_name_platform/expected
deleted file mode 100644
index e3accc2c3f..0000000000
--- a/src/servables/tensorrt/testdata/autofill_sanity/no_name_platform/expected
+++ /dev/null
@@ -1,43 +0,0 @@
-name: "no_name_platform"
-platform: "tensorrt_plan"
-version_policy {
-  latest {
-    num_versions: 1
-  }
-}
-max_batch_size: 8
-input {
-  name: "INPUT0"
-  data_type: TYPE_FP32
-  dims: 16
-  dims: 1
-  dims: 1
-}
-input {
-  name: "INPUT1"
-  data_type: TYPE_FP32
-  dims: 16
-  dims: 1
-  dims: 1
-}
-output {
-  name: "OUTPUT0"
-  data_type: TYPE_FP32
-  dims: 16
-  dims: 1
-  dims: 1
-}
-output {
-  name: "OUTPUT1"
-  data_type: TYPE_FP32
-  dims: 16
-  dims: 1
-  dims: 1
-}
-instance_group {
-  name: "no_name_platform"
-  count: 1
-  gpus: 0
-  kind: KIND_GPU
-}
-default_model_filename: "model.plan"
diff --git a/src/servables/tensorrt/testdata/autofill_sanity/too_few_inputs/1/model.plan b/src/servables/tensorrt/testdata/autofill_sanity/too_few_inputs/1/model.plan
deleted file mode 100644
index a0f59b6cf1..0000000000
Binary files a/src/servables/tensorrt/testdata/autofill_sanity/too_few_inputs/1/model.plan and /dev/null differ
diff --git a/src/servables/tensorrt/testdata/autofill_sanity/too_few_inputs/config.pbtxt b/src/servables/tensorrt/testdata/autofill_sanity/too_few_inputs/config.pbtxt
deleted file mode 100644
index 8f7ec83e87..0000000000
--- a/src/servables/tensorrt/testdata/autofill_sanity/too_few_inputs/config.pbtxt
+++ /dev/null
@@ -1,20 +0,0 @@
-max_batch_size: 8
-input [
-  {
-    name: "INPUT1"
-    data_type: TYPE_FP32
-    dims: [ 16, 1, 1 ]
-  }
-]
-output [
-  {
-    name: "OUTPUT0"
-    data_type: TYPE_FP32
-    dims: [ 16, 1, 1 ]
-  },
-  {
-    name: "OUTPUT1"
-    data_type: TYPE_FP32
-    dims: [ 16, 1, 1 ]
-  }
-]
diff --git a/src/servables/tensorrt/testdata/autofill_sanity/too_few_inputs/expected b/src/servables/tensorrt/testdata/autofill_sanity/too_few_inputs/expected
deleted file mode 100644
index e14e6f5946..0000000000
--- a/src/servables/tensorrt/testdata/autofill_sanity/too_few_inputs/expected
+++ /dev/null
@@ -1 +0,0 @@
-Invalid argument: expected configuration for input 'INPUT0' for too_few_inputs
\ No newline at end of file
diff --git a/src/servables/tensorrt/testdata/autofill_sanity/too_many_inputs/1/model.plan b/src/servables/tensorrt/testdata/autofill_sanity/too_many_inputs/1/model.plan
deleted file mode 100644
index a0f59b6cf1..0000000000
Binary files a/src/servables/tensorrt/testdata/autofill_sanity/too_many_inputs/1/model.plan and /dev/null differ
diff --git a/src/servables/tensorrt/testdata/autofill_sanity/too_many_inputs/config.pbtxt b/src/servables/tensorrt/testdata/autofill_sanity/too_many_inputs/config.pbtxt
deleted file mode 100644
index 05106f9090..0000000000
--- a/src/servables/tensorrt/testdata/autofill_sanity/too_many_inputs/config.pbtxt
+++ /dev/null
@@ -1,30 +0,0 @@
-max_batch_size: 8
-input [
-  {
-    name: "INPUT_EXTRA"
-    data_type: TYPE_FP32
-    dims: [ 16, 1, 1 ]
-  },
-  {
-    name: "INPUT0"
-    data_type: TYPE_FP32
-    dims: [ 16, 1, 1 ]
-  },
-  {
-    name: "INPUT1"
-    data_type: TYPE_FP32
-    dims: [ 16, 1, 1 ]
-  }
-]
-output [
-  {
-    name: "OUTPUT0"
-    data_type: TYPE_FP32
-    dims: [ 16, 1, 1 ]
-  },
-  {
-    name: "OUTPUT1"
-    data_type: TYPE_FP32
-    dims: [ 16, 1, 1 ]
-  }
-]
diff --git a/src/servables/tensorrt/testdata/autofill_sanity/too_many_inputs/expected b/src/servables/tensorrt/testdata/autofill_sanity/too_many_inputs/expected
deleted file mode 100644
index 91f47fcf60..0000000000
--- a/src/servables/tensorrt/testdata/autofill_sanity/too_many_inputs/expected
+++ /dev/null
@@ -1 +0,0 @@
-Not found: input 'INPUT_EXTRA' not found for too_many_inputs_0_gpu0
\ No newline at end of file
diff --git a/src/servables/tensorrt/testdata/autofill_sanity/unknown_input/1/model.plan b/src/servables/tensorrt/testdata/autofill_sanity/unknown_input/1/model.plan
deleted file mode 100644
index a0f59b6cf1..0000000000
Binary files a/src/servables/tensorrt/testdata/autofill_sanity/unknown_input/1/model.plan and /dev/null differ
diff --git a/src/servables/tensorrt/testdata/autofill_sanity/unknown_input/config.pbtxt b/src/servables/tensorrt/testdata/autofill_sanity/unknown_input/config.pbtxt
deleted file mode 100644
index dfc044e5ef..0000000000
--- a/src/servables/tensorrt/testdata/autofill_sanity/unknown_input/config.pbtxt
+++ /dev/null
@@ -1,25 +0,0 @@
-max_batch_size: 8
-input [
-  {
-    name: "INPUT0"
-    data_type: TYPE_FP32
-    dims: [ 16, 1, 1 ]
-  },
-  {
-    name: "INPUT_UNKNOWN"
-    data_type: TYPE_FP32
-    dims: [ 16, 1, 1 ]
-  }
-]
-output [
-  {
-    name: "OUTPUT0"
-    data_type: TYPE_FP32
-    dims: [ 16, 1, 1 ]
-  },
-  {
-    name: "OUTPUT1"
-    data_type: TYPE_FP32
-    dims: [ 16, 1, 1 ]
-  }
-]
diff --git a/src/servables/tensorrt/testdata/autofill_sanity/unknown_input/expected b/src/servables/tensorrt/testdata/autofill_sanity/unknown_input/expected
deleted file mode 100644
index cb6de462b0..0000000000
--- a/src/servables/tensorrt/testdata/autofill_sanity/unknown_input/expected
+++ /dev/null
@@ -1 +0,0 @@
-Not found: input 'INPUT_UNKNOWN' not found for unknown_input_0_gpu0
\ No newline at end of file
diff --git a/src/servables/tensorrt/testdata/autofill_sanity/unknown_output/1/model.plan b/src/servables/tensorrt/testdata/autofill_sanity/unknown_output/1/model.plan
deleted file mode 100644
index a0f59b6cf1..0000000000
Binary files a/src/servables/tensorrt/testdata/autofill_sanity/unknown_output/1/model.plan and /dev/null differ
diff --git a/src/servables/tensorrt/testdata/autofill_sanity/unknown_output/config.pbtxt b/src/servables/tensorrt/testdata/autofill_sanity/unknown_output/config.pbtxt
deleted file mode 100644
index 6747bbcee6..0000000000
--- a/src/servables/tensorrt/testdata/autofill_sanity/unknown_output/config.pbtxt
+++ /dev/null
@@ -1,25 +0,0 @@
-max_batch_size: 8
-input [
-  {
-    name: "INPUT0"
-    data_type: TYPE_FP32
-    dims: [ 16, 1, 1 ]
-  },
-  {
-    name: "INPUT1"
-    data_type: TYPE_FP32
-    dims: [ 16, 1, 1 ]
-  }
-]
-output [
-  {
-    name: "OUTPUT_UNKNOWN"
-    data_type: TYPE_FP32
-    dims: [ 16, 1, 1 ]
-  },
-  {
-    name: "OUTPUT1"
-    data_type: TYPE_FP32
-    dims: [ 16, 1, 1 ]
-  }
-]
diff --git a/src/servables/tensorrt/testdata/autofill_sanity/unknown_output/expected b/src/servables/tensorrt/testdata/autofill_sanity/unknown_output/expected
deleted file mode 100644
index db7637d415..0000000000
--- a/src/servables/tensorrt/testdata/autofill_sanity/unknown_output/expected
+++ /dev/null
@@ -1 +0,0 @@
-Not found: output 'OUTPUT_UNKNOWN' not found for unknown_output_0_gpu0
\ No newline at end of file
diff --git a/src/servers/BUILD b/src/servers/BUILD
deleted file mode 100644
index beedbf7780..0000000000
--- a/src/servers/BUILD
+++ /dev/null
@@ -1,78 +0,0 @@
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-package(
-    default_visibility = ["//visibility:public"],
-)
-
-cc_library(
-    name = "main",
-    srcs = ["main.cc"],
-    deps = [
-        "//src/core:logging",
-        "//src/core:server",
-    ],
-)
-
-cc_binary(
-    name = "trtserver",
-    deps = [
-        ":main",
-        "//src/core:api_proto",
-        "//src/core:logging",
-        "//src/core:metrics",
-        "//src/core:model_config",
-        "//src/core:model_config_proto",
-        "//src/core:model_repository_manager",
-        "//src/core:profile",
-        "//src/core:request_status",
-        "//src/core:request_status_proto",
-        "//src/core:server",
-        "//src/core:server_status",
-        "//src/core:server_status_proto",
-        "//src/core:utils",
-        "@org_tensorflow//tensorflow/contrib:contrib_kernels",
-        "@org_tensorflow//tensorflow/contrib:contrib_ops_op_lib",
-        "@org_tensorflow//tensorflow/contrib/tensorrt:trt_engine_op_op_lib",
-        "@org_tensorflow//tensorflow/contrib/tensorrt:trt_engine_op_kernel",
-        "@org_tensorflow//tensorflow/contrib/tensorrt:trt_shape_function",
-        "@extern_lib//:libcaffe2",
-        "@extern_lib//:libcaffe2_detectron_ops_gpu",
-        "@extern_lib//:libcaffe2_gpu",
-        "@extern_lib//:libc10",
-        "@extern_lib//:libmkl_core",
-        "@extern_lib//:libmkl_gnu_thread",
-        "@extern_lib//:libmkl_avx2",
-        "@extern_lib//:libmkl_def",
-        "@extern_lib//:libmkl_intel_lp64",
-    ],
-    linkopts = [
-        "-pthread",
-        "-L/usr/local/cuda/lib64/stubs",
-        "-lnvidia-ml",
-        "-lnvonnxparser_runtime"
-    ],
-)
diff --git a/src/servers/main.cc b/src/servers/main.cc
deleted file mode 100644
index d2e1752fc0..0000000000
--- a/src/servers/main.cc
+++ /dev/null
@@ -1,82 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <stdint.h>
-#include <unistd.h>
-#include <csignal>
-#include <thread>
-
-#include "src/core/logging.h"
-#include "src/core/server.h"
-
-namespace {
-
-// The inference server object. Once this server is successfully
-// created it does *not* transition back to a nullptr value and it is
-// *not* explicitly destructed. Thus we assume that 'server_' can
-// always be dereferenced.
-nvidia::inferenceserver::InferenceServer* server_ = nullptr;
-// Thread used to close and exit the server.
-std::unique_ptr<std::thread> exit_thread_;
-
-void
-SignalHandler(int signum)
-{
-  // Don't need a mutex here since signals should be disabled while in
-  // the handler.
-  LOG_INFO << "Interrupt signal (" << signum << ") received.";
-
-  if (exit_thread_ != nullptr)
-    return;
-
-  exit_thread_.reset(new std::thread([] {
-    bool close_status = server_->Close();
-    exit(close_status ? 0 : 1);
-  }));
-
-  exit_thread_->detach();
-}
-
-}  // namespace
-
-int
-main(int argc, char** argv)
-{
-  server_ = new nvidia::inferenceserver::InferenceServer();
-  bool init_status = server_->Init(argc, argv);
-  if (!init_status) {
-    exit(1);
-  }
-
-  // Trap SIGINT and SIGTERM to allow server to exit gracefully
-  signal(SIGINT, SignalHandler);
-  signal(SIGTERM, SignalHandler);
-
-  // Server runs until terminated by a signal...
-  server_->Wait();
-
-  return 0;
-}
diff --git a/src/shared_memory_manager.cc b/src/shared_memory_manager.cc
new file mode 100644
index 0000000000..906a119adf
--- /dev/null
+++ b/src/shared_memory_manager.cc
@@ -0,0 +1,698 @@
+// Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "shared_memory_manager.h"
+
+#include "common.h"
+#include "triton/common/logging.h"
+
+// Not supporting shared memory for now
+#ifndef _WIN32
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#else
+#define TRITON_SHM_FILE_ROOT "C:\\triton_shm\\"
+#endif
+
+namespace triton { namespace server {
+
+namespace {
+
+#ifdef _WIN32
+const std::string
+GetWindowsError()
+{
+  LPSTR err_buffer = nullptr;
+  size_t size = FormatMessageA(
+      FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM |
+          FORMAT_MESSAGE_IGNORE_INSERTS,
+      NULL, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
+      (LPSTR)&err_buffer, 0, NULL);
+  std::string errstr(err_buffer, size);
+  std::string retString = std::to_string(GetLastError()) + " - " + errstr;
+  LocalFree(err_buffer);
+  return retString;
+}
+#endif
+
+TRITONSERVER_Error*
+UnmapSharedMemory(void* mapped_addr, size_t byte_size)
+{
+#ifdef _WIN32
+  bool success = UnmapViewOfFile(mapped_addr);
+  if (!success) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INTERNAL,
+        std::string(
+            "unable to unmap shared memory region, error code: " +
+            GetWindowsError())
+            .c_str());
+  }
+#else
+  int status = munmap(mapped_addr, byte_size);
+  if (status == -1) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INTERNAL,
+        std::string(
+            "unable to munmap shared memory region, errno: " +
+            std::string(std::strerror(errno)))
+            .c_str());
+  }
+#endif
+  return nullptr;
+}
+
+#ifdef TRITON_ENABLE_GPU
+TRITONSERVER_Error*
+OpenCudaIPCRegion(
+    const cudaIpcMemHandle_t* cuda_shm_handle, void** data_ptr, int device_id)
+{
+#ifdef _WIN32
+  return TRITONSERVER_ErrorNew(
+      TRITONSERVER_ERROR_UNSUPPORTED,
+      std::string(
+          "GPU shared memory features are currently not supported on Windows")
+          .c_str());
+#else
+  // Set to device curres
+  cudaSetDevice(device_id);
+
+  // Open CUDA IPC handle and read data from it
+  cudaError_t err = cudaIpcOpenMemHandle(
+      data_ptr, *cuda_shm_handle, cudaIpcMemLazyEnablePeerAccess);
+  if (err != cudaSuccess) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INTERNAL, std::string(
+                                         "failed to open CUDA IPC handle: " +
+                                         std::string(cudaGetErrorString(err)))
+                                         .c_str());
+  }
+
+  return nullptr;
+#endif
+}
+
+#endif  // TRITON_ENABLE_GPU
+
+}  // namespace
+
+TRITONSERVER_Error*
+SharedMemoryManager::OpenSharedMemoryRegion(
+    const std::string& shm_key, std::shared_ptr<ShmFile>& shm_file)
+{
+#ifdef _WIN32
+  HANDLE shm_handle = OpenFileMapping(
+      FILE_MAP_ALL_ACCESS,  // read/write access
+      FALSE,                // cannot inherit handle
+      shm_key.c_str());     // name of mapping object
+
+  if (shm_handle == NULL) {
+    LOG_VERBOSE(1) << "OpenFileMapping failed with error code: "
+                   << GetWindowsError();
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INTERNAL,
+        std::string("Unable to open shared memory region: '" + shm_key + "'")
+            .c_str());
+  }
+  shm_file = std::make_shared<ShmFile>(shm_handle);
+#else
+  // get shared memory region descriptor
+  int shm_fd = shm_open(shm_key.c_str(), O_RDWR, S_IRUSR | S_IWUSR);
+  if (shm_fd == -1) {
+    LOG_VERBOSE(1) << "shm_open failed, errno: " << errno;
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INTERNAL,
+        std::string("Unable to open shared memory region: '" + shm_key + "'")
+            .c_str());
+  }
+  shm_file = std::make_shared<ShmFile>(shm_fd);
+#endif
+  return nullptr;
+}
+
+TRITONSERVER_Error*
+SharedMemoryManager::GetSharedMemoryRegionSize(
+    const std::string& shm_key, ShmFile* shm_file, uint64_t* shm_region_size)
+{
+#ifdef WIN32
+  // Open file for reading
+  LPCSTR backing_file_path =
+      std::string(TRITON_SHM_FILE_ROOT + shm_key).c_str();
+  HANDLE backing_file_handle = CreateFile(
+      backing_file_path, 0, FILE_SHARE_READ, NULL, OPEN_EXISTING,
+      FILE_ATTRIBUTE_NORMAL, NULL);
+  if (backing_file_handle == INVALID_HANDLE_VALUE) {
+    LOG_VERBOSE(1) << "Failed to open backing file with error code: "
+                   << GetWindowsError();
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INTERNAL,
+        std::string("Invalid shared memory region: '" + shm_key + "'").c_str());
+  }
+  // Construct its size
+  uint64_t file_size;
+  DWORD high_order_size;
+  DWORD low_order_size = GetFileSize(backing_file_handle, &high_order_size);
+  if (low_order_size == INVALID_FILE_SIZE) {
+    CloseHandle(backing_file_handle);
+    LOG_VERBOSE(1) << "GetFileSize failed with error code: "
+                   << GetWindowsError();
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INTERNAL,
+        std::string("Invalid shared memory region: '" + shm_key + "'").c_str());
+  } else if (high_order_size != NULL) {
+    file_size = ((uint64_t)high_order_size << 32) | low_order_size;
+  } else {
+    file_size = low_order_size;
+  }
+  if (!CloseHandle(backing_file_handle)) {
+    LOG_VERBOSE(1) << "failed to close backing file with error: "
+                   << GetWindowsError();
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INTERNAL,
+        std::string("Invalid shared memory region: '" + shm_key + "'").c_str());
+  }
+  *shm_region_size = file_size;
+#else
+  struct stat file_status;
+  if (fstat(shm_file->shm_fd_, &file_status) == -1) {
+    LOG_VERBOSE(1) << "fstat on shm_fd failed, errno: " << errno;
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INTERNAL,
+        std::string("Invalid shared memory region: '" + shm_key + "'").c_str());
+  }
+
+  // According to POSIX standard, type off_t can be negative, so for sake of
+  // catching possible under/overflows, assert that the size is non-negative.
+  if (file_status.st_size < 0) {
+    LOG_VERBOSE(1) << "File size of shared memory region must be non-negative";
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INTERNAL,
+        std::string("Invalid shared memory region: '" + shm_key + "'").c_str());
+  }
+  *shm_region_size = static_cast<uint64_t>(file_status.st_size);
+#endif
+  return nullptr;  // success
+}
+
+TRITONSERVER_Error*
+SharedMemoryManager::CheckSharedMemoryRegionSize(
+    const std::string& name, const std::string& shm_key, ShmFile* shm_file,
+    size_t offset, size_t byte_size)
+{
+  uint64_t shm_region_size = 0;
+  RETURN_IF_ERR(GetSharedMemoryRegionSize(shm_key, shm_file, &shm_region_size));
+  // User-provided offset and byte_size should not go out-of-bounds.
+  if ((offset + byte_size) > shm_region_size) {
+    CloseSharedMemoryRegion(shm_file);
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INVALID_ARG,
+        std::string(
+            "failed to register shared memory region '" + name +
+            "': invalid args")
+            .c_str());
+  }
+
+  return nullptr;  // success
+}
+
+TRITONSERVER_Error*
+SharedMemoryManager::CloseSharedMemoryRegion(ShmFile* shm_file)
+{
+#ifdef _WIN32
+  bool success = CloseHandle(shm_file->shm_handle_);
+  if (!success) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INTERNAL,
+        std::string(
+            "unable to close shared memory handle, error code: " +
+            GetWindowsError())
+            .c_str());
+  }
+#else
+  int status = close(shm_file->shm_fd_);
+  if (status == -1) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INTERNAL,
+        std::string(
+            "unable to close shared memory descriptor, errno: " +
+            std::string(std::strerror(errno)))
+            .c_str());
+  }
+#endif
+
+  return nullptr;
+}
+
+TRITONSERVER_Error*
+SharedMemoryManager::MapSharedMemory(
+    ShmFile* shm_file, const size_t offset, const size_t byte_size,
+    void** mapped_addr)
+{
+#ifdef _WIN32
+  // The MapViewOfFile function takes a high-order and low-order DWORD (4 bytes
+  // each) for offset. 'size_t' can either be 4 or 8 bytes depending on the
+  // operating system. To handle both cases agnostically, we cast 'offset' to
+  // uint64 to ensure we have a known size and enough space to perform our
+  // logical operations.
+  uint64_t upperbound_offset = (uint64_t)offset;
+  DWORD high_order_offset = (upperbound_offset >> 32) & 0xFFFFFFFF;
+  DWORD low_order_offset = upperbound_offset & 0xFFFFFFFF;
+  // map shared memory to process address space
+  *mapped_addr = MapViewOfFile(
+      shm_file->shm_handle_,  // handle to map object
+      FILE_MAP_ALL_ACCESS,    // read/write permission
+      high_order_offset,      // offset (high-order DWORD)
+      low_order_offset,       // offset (low-order DWORD)
+      byte_size);
+
+  if (*mapped_addr == NULL) {
+    CloseSharedMemoryRegion(shm_file);
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INTERNAL,
+        std::string(
+            "unable to process address space, error code: " + GetWindowsError())
+            .c_str());
+  }
+#else
+  // map shared memory to process address space
+  *mapped_addr = mmap(
+      NULL, byte_size, PROT_WRITE | PROT_READ, MAP_SHARED, shm_file->shm_fd_,
+      offset);
+  if (*mapped_addr == MAP_FAILED) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INTERNAL, std::string(
+                                         "unable to process address space " +
+                                         std::string(std::strerror(errno)))
+                                         .c_str());
+  }
+#endif
+  return nullptr;
+}
+
+SharedMemoryManager::~SharedMemoryManager()
+{
+  UnregisterAll(TRITONSERVER_MEMORY_CPU);
+  UnregisterAll(TRITONSERVER_MEMORY_GPU);
+}
+
+TRITONSERVER_Error*
+SharedMemoryManager::RegisterSystemSharedMemory(
+    const std::string& name, const std::string& shm_key, const size_t offset,
+    const size_t byte_size)
+{
+  std::lock_guard<std::mutex> lock(mu_);
+
+  if (shared_memory_map_.find(name) != shared_memory_map_.end()) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_ALREADY_EXISTS,
+        std::string("shared memory region '" + name + "' already in manager")
+            .c_str());
+  }
+
+  // register
+  void* mapped_addr;
+  std::shared_ptr<ShmFile> shm_file;
+  bool shm_file_exists = false;
+
+  // don't re-open if shared memory is already open
+  for (auto itr = shared_memory_map_.begin(); itr != shared_memory_map_.end();
+       ++itr) {
+    if (itr->second->shm_key_ == shm_key) {
+      shm_file = itr->second->platform_handle_;
+      shm_file_exists = true;
+      break;
+    }
+  }
+
+  // open and set new shm_file if new shared memory key
+  if (!shm_file_exists) {
+    RETURN_IF_ERR(OpenSharedMemoryRegion(shm_key, shm_file));
+  } else {
+    // FIXME: DLIS-6448 - We should allow users the flexibility to register
+    // the same key under different names with different attributes.
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INVALID_ARG,
+        std::string(
+            "registering an active shared memory key, \"" + shm_key +
+            "\", under a different name is not currently supported")
+            .c_str());
+  }
+
+  // Enforce that registered region is in-bounds of shm file object.
+  RETURN_IF_ERR(CheckSharedMemoryRegionSize(
+      name, shm_key, shm_file.get(), offset, byte_size));
+
+  // Mmap and then close the shared memory descriptor
+  TRITONSERVER_Error* err_map =
+      MapSharedMemory(shm_file.get(), offset, byte_size, &mapped_addr);
+  TRITONSERVER_Error* err_close = CloseSharedMemoryRegion(shm_file.get());
+  if (err_map != nullptr) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INVALID_ARG,
+        std::string(
+            "failed to register shared memory region '" + name +
+            "': " + TRITONSERVER_ErrorMessage(err_map))
+            .c_str());
+  }
+
+  if (err_close != nullptr) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INVALID_ARG,
+        std::string(
+            "failed to register shared memory region '" + name +
+            "': " + TRITONSERVER_ErrorMessage(err_close))
+            .c_str());
+  }
+
+  shared_memory_map_.insert(std::make_pair(
+      name, std::unique_ptr<SharedMemoryInfo>(new SharedMemoryInfo(
+                name, shm_key, offset, byte_size, shm_file, mapped_addr,
+                TRITONSERVER_MEMORY_CPU, 0))));
+
+  return nullptr;  // success
+}
+
+#ifdef TRITON_ENABLE_GPU
+TRITONSERVER_Error*
+SharedMemoryManager::RegisterCUDASharedMemory(
+    const std::string& name, const cudaIpcMemHandle_t* cuda_shm_handle,
+    const size_t byte_size, const int device_id)
+{
+#ifdef _WIN32
+  return TRITONSERVER_ErrorNew(
+      TRITONSERVER_ERROR_UNSUPPORTED,
+      std::string(
+          "GPU shared memory features are currently not supported on Windows")
+          .c_str());
+#else
+  // Serialize all operations that write/read current shared memory regions
+  std::lock_guard<std::mutex> lock(mu_);
+
+  // If name is already in shared_memory_map_ then return error saying already
+  // registered
+  if (shared_memory_map_.find(name) != shared_memory_map_.end()) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_ALREADY_EXISTS,
+        std::string("shared memory region '" + name + "' already in manager")
+            .c_str());
+  }
+
+  // register
+  void* mapped_addr;
+
+  // Get CUDA shared memory base address
+  TRITONSERVER_Error* err =
+      OpenCudaIPCRegion(cuda_shm_handle, &mapped_addr, device_id);
+  if (err != nullptr) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INVALID_ARG,
+        std::string(
+            "failed to register CUDA shared memory region '" + name +
+            "': " + TRITONSERVER_ErrorMessage(err))
+            .c_str());
+  }
+
+  shared_memory_map_.insert(std::make_pair(
+      name, std::unique_ptr<CUDASharedMemoryInfo>(new CUDASharedMemoryInfo(
+                name, "", 0, byte_size, nullptr, mapped_addr,
+                TRITONSERVER_MEMORY_GPU, device_id, cuda_shm_handle))));
+
+  return nullptr;  // success
+#endif
+}
+
+TRITONSERVER_Error*
+SharedMemoryManager::GetCUDAHandle(
+    const std::string& name, cudaIpcMemHandle_t** cuda_mem_handle)
+{
+#ifdef _WIN32
+  return TRITONSERVER_ErrorNew(
+      TRITONSERVER_ERROR_UNSUPPORTED,
+      std::string(
+          "GPU shared memory features are currently not supported on Windows")
+          .c_str());
+#else
+  // protect shared_memory_map_ from concurrent access
+  std::lock_guard<std::mutex> lock(mu_);
+
+  auto it = shared_memory_map_.find(name);
+  if (it == shared_memory_map_.end()) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_NOT_FOUND,
+        std::string("Unable to find shared memory region: '" + name + "'")
+            .c_str());
+  }
+  CUDASharedMemoryInfo& shm_info =
+      reinterpret_cast<CUDASharedMemoryInfo&>(*(it->second));
+  *cuda_mem_handle = &(shm_info.cuda_ipc_handle_);
+
+  return nullptr;
+#endif
+}
+
+#endif  // TRITON_ENABLE_GPU
+
+TRITONSERVER_Error*
+SharedMemoryManager::GetMemoryInfo(
+    const std::string& name, size_t offset, void** shm_mapped_addr,
+    TRITONSERVER_MemoryType* memory_type, int64_t* device_id)
+{
+  // protect shared_memory_map_ from concurrent access
+  std::lock_guard<std::mutex> lock(mu_);
+
+  auto it = shared_memory_map_.find(name);
+  if (it == shared_memory_map_.end()) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_NOT_FOUND,
+        std::string("Unable to find shared memory region: '" + name + "'")
+            .c_str());
+  }
+
+  // validate offset
+  size_t max_offset = 0;
+  if (it->second->kind_ == TRITONSERVER_MEMORY_CPU) {
+    max_offset = it->second->offset_;
+  }
+  if (it->second->byte_size_ > 0) {
+    max_offset += it->second->byte_size_ - 1;
+  }
+  if (offset > max_offset) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INVALID_ARG,
+        std::string("Invalid offset for shared memory region: '" + name + "'")
+            .c_str());
+  }
+  // TODO: should also validate byte_size from caller
+
+  if (it->second->kind_ == TRITONSERVER_MEMORY_CPU) {
+    *shm_mapped_addr = (void*)((uint8_t*)it->second->mapped_addr_ +
+                               it->second->offset_ + offset);
+  } else {
+    *shm_mapped_addr = (void*)((uint8_t*)it->second->mapped_addr_ + offset);
+  }
+
+  *memory_type = it->second->kind_;
+  *device_id = it->second->device_id_;
+
+  return nullptr;
+}
+
+TRITONSERVER_Error*
+SharedMemoryManager::GetStatus(
+    const std::string& name, TRITONSERVER_MemoryType memory_type,
+    triton::common::TritonJson::Value* shm_status)
+{
+  std::lock_guard<std::mutex> lock(mu_);
+
+  if (name.empty()) {
+    for (const auto& shm_info : shared_memory_map_) {
+      if (shm_info.second->kind_ == memory_type) {
+        triton::common::TritonJson::Value shm_region(
+            *shm_status, triton::common::TritonJson::ValueType::OBJECT);
+        RETURN_IF_ERR(shm_region.AddString(
+            "name", shm_info.first.c_str(), shm_info.first.size()));
+        if (memory_type == TRITONSERVER_MEMORY_CPU) {
+          RETURN_IF_ERR(shm_region.AddString(
+              "key", shm_info.second->shm_key_.c_str(),
+              shm_info.second->shm_key_.size()));
+          RETURN_IF_ERR(shm_region.AddUInt("offset", shm_info.second->offset_));
+        } else {
+          RETURN_IF_ERR(
+              shm_region.AddUInt("device_id", shm_info.second->device_id_));
+        }
+        RETURN_IF_ERR(
+            shm_region.AddUInt("byte_size", shm_info.second->byte_size_));
+        RETURN_IF_ERR(shm_status->Append(std::move(shm_region)));
+      }
+    }
+  } else {
+    auto it = shared_memory_map_.find(name);
+    if (it == shared_memory_map_.end()) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_NOT_FOUND,
+          std::string(
+              "Unable to find system shared memory region: '" + name + "'")
+              .c_str());
+    }
+
+    if (it->second->kind_ != memory_type) {
+      if (it->second->kind_ == TRITONSERVER_MEMORY_GPU) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_NOT_FOUND,
+            std::string(
+                "The region named '" + name +
+                "' is registered as CUDA shared "
+                "memory, not system shared memory")
+                .c_str());
+      } else {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_NOT_FOUND,
+            std::string(
+                "The region named '" + name +
+                "' is registered as system shared "
+                "memory, not CUDA shared memory")
+                .c_str());
+      }
+    }
+
+    triton::common::TritonJson::Value shm_region(
+        *shm_status, triton::common::TritonJson::ValueType::OBJECT);
+    RETURN_IF_ERR(shm_region.AddString(
+        "name", it->second->name_.c_str(), it->second->name_.size()));
+    if (memory_type == TRITONSERVER_MEMORY_CPU) {
+      RETURN_IF_ERR(shm_region.AddString(
+          "key", it->second->shm_key_.c_str(), it->second->shm_key_.size()));
+      RETURN_IF_ERR(shm_region.AddUInt("offset", it->second->offset_));
+    } else {
+      RETURN_IF_ERR(shm_region.AddUInt("device_id", it->second->device_id_));
+    }
+    RETURN_IF_ERR(shm_region.AddUInt("byte_size", it->second->byte_size_));
+    RETURN_IF_ERR(shm_status->Append(std::move(shm_region)));
+  }
+
+  return nullptr;  // success
+}
+
+TRITONSERVER_Error*
+SharedMemoryManager::Unregister(
+    const std::string& name, TRITONSERVER_MemoryType memory_type)
+{
+  // Serialize all operations that write/read current shared memory regions
+  std::lock_guard<std::mutex> lock(mu_);
+
+  return UnregisterHelper(name, memory_type);
+}
+
+TRITONSERVER_Error*
+SharedMemoryManager::UnregisterHelper(
+    const std::string& name, TRITONSERVER_MemoryType memory_type)
+{
+  // Must hold the lock on register_mu_ while calling this function.
+  auto it = shared_memory_map_.find(name);
+  if (it != shared_memory_map_.end() && it->second->kind_ == memory_type) {
+    if (it->second->kind_ == TRITONSERVER_MEMORY_CPU) {
+      RETURN_IF_ERR(
+          UnmapSharedMemory(it->second->mapped_addr_, it->second->byte_size_));
+    } else {
+#ifdef TRITON_ENABLE_GPU
+      cudaError_t err = cudaIpcCloseMemHandle(it->second->mapped_addr_);
+      if (err != cudaSuccess) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INTERNAL,
+            std::string(
+                "failed to close CUDA IPC handle: " +
+                std::string(cudaGetErrorString(err)))
+                .c_str());
+      }
+#else
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG,
+          std::string(
+              "failed to unregister CUDA shared memory region: '" + name +
+              "', GPUs not supported")
+              .c_str());
+#endif  // TRITON_ENABLE_GPU
+    }
+
+    // Remove region information from shared_memory_map_
+    shared_memory_map_.erase(it);
+  }
+
+  return nullptr;
+}
+
+TRITONSERVER_Error*
+SharedMemoryManager::UnregisterAll(TRITONSERVER_MemoryType memory_type)
+{
+  std::lock_guard<std::mutex> lock(mu_);
+  std::string error_message = "Failed to unregister the following ";
+  std::vector<std::string> unregister_fails;
+  if (memory_type == TRITONSERVER_MEMORY_CPU) {
+    // Serialize all operations that write/read current shared memory regions
+    error_message += "system shared memory regions: ";
+    for (auto it = shared_memory_map_.cbegin(), next_it = it;
+         it != shared_memory_map_.cend(); it = next_it) {
+      ++next_it;
+      if (it->second->kind_ == TRITONSERVER_MEMORY_CPU) {
+        TRITONSERVER_Error* err = UnregisterHelper(it->first, memory_type);
+        if (err != nullptr) {
+          unregister_fails.push_back(it->first);
+        }
+      }
+    }
+  } else if (memory_type == TRITONSERVER_MEMORY_GPU) {
+    // TODO: DLIS-4169 - Verify whether we need a Windows implementation of
+    // UnregisterAll for GPU shm once we have GPU shm mem support
+    error_message += "cuda shared memory regions: ";
+    for (auto it = shared_memory_map_.cbegin(), next_it = it;
+         it != shared_memory_map_.cend(); it = next_it) {
+      ++next_it;
+      if (it->second->kind_ == TRITONSERVER_MEMORY_GPU) {
+        TRITONSERVER_Error* err = UnregisterHelper(it->first, memory_type);
+        ;
+        if (err != nullptr) {
+          unregister_fails.push_back(it->first);
+        }
+      }
+    }
+  }
+
+  if (!unregister_fails.empty()) {
+    for (auto unreg_fail : unregister_fails) {
+      error_message += unreg_fail + " ,";
+    }
+    LOG_ERROR << error_message;
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INTERNAL, error_message.c_str());
+  }
+
+  return nullptr;
+}
+
+}}  // namespace triton::server
diff --git a/src/shared_memory_manager.h b/src/shared_memory_manager.h
new file mode 100644
index 0000000000..3f2865788a
--- /dev/null
+++ b/src/shared_memory_manager.h
@@ -0,0 +1,259 @@
+// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#pragma once
+
+#include <cstring>
+#include <map>
+#include <memory>
+#include <mutex>
+
+#include "triton/core/tritonserver.h"
+
+#define TRITONJSON_STATUSTYPE TRITONSERVER_Error*
+#define TRITONJSON_STATUSRETURN(M) \
+  return TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INTERNAL, (M).c_str())
+#define TRITONJSON_STATUSSUCCESS nullptr
+#include "triton/common/triton_json.h"
+
+#ifdef TRITON_ENABLE_GPU
+#include <cuda_runtime_api.h>
+#endif  // TRITON_ENABLE_GPU
+#ifdef _WIN32
+#include <windows.h>
+#endif  // _WIN32
+
+namespace triton { namespace server {
+class SharedMemoryManager {
+ public:
+  SharedMemoryManager() = default;
+  ~SharedMemoryManager();
+
+  /// Add a shared memory block representing shared memory in system
+  /// (CPU) memory to the manager. Return TRITONSERVER_ERROR_ALREADY_EXISTS
+  /// if a shared memory block of the same name already exists in the manager.
+  /// \param name The name of the memory block.
+  /// \param shm_key The name of the shared memory object
+  /// containing the block of memory.
+  /// \param offset The offset within the shared memory object to the
+  /// start of the block.
+  /// \param byte_size The size, in bytes of the block.
+  /// \return a TRITONSERVER_Error indicating success or failure.
+  TRITONSERVER_Error* RegisterSystemSharedMemory(
+      const std::string& name, const std::string& shm_key, const size_t offset,
+      const size_t byte_size);
+
+  /// Get the access information for the shared memory block
+  /// with the specified name. Return TRITONSERVER_ERROR_NOT_FOUND
+  /// if named block doesn't exist.
+  /// \param name The name of the shared memory block to get.
+  /// \param offset The offset in the block
+  /// \param shm_mapped_addr Returns the pointer to the shared
+  /// memory block with the specified name and offset
+  /// \param memory_type Returns the type of the memory
+  /// \param device_id Returns the device id associated with the
+  /// memory block
+  /// \return a TRITONSERVER_Error indicating success or failure.
+  TRITONSERVER_Error* GetMemoryInfo(
+      const std::string& name, size_t offset, void** shm_mapped_addr,
+      TRITONSERVER_MemoryType* memory_type, int64_t* device_id);
+
+  /// Populates the status of active system/CUDA shared memory regions
+  /// in the status JSON. If 'name' is empty then return status of all
+  /// active system/CUDA shared memory regions as specified by 'memory_type'.
+  /// \param name The name of the shared memory block to get the status of.
+  /// \param memory_type The type of memory to get the status of.
+  /// \param shm_status Returns status of active shared memory blocks in JSON.
+  /// \return a TRITONSERVER_Error indicating success or failure.
+  TRITONSERVER_Error* GetStatus(
+      const std::string& name, TRITONSERVER_MemoryType memory_type,
+      triton::common::TritonJson::Value* shm_status);
+
+  /// Removes the named shared memory block of the specified type from
+  /// the manager. Any future attempt to get the details of this block
+  /// will result in an array till another block with the same name is
+  /// added to the manager.
+  /// \param name The name of the shared memory block to remove.
+  /// \param memory_type The type of memory to unregister.
+  /// \return a TRITONSERVER_Error indicating success or failure.
+  TRITONSERVER_Error* Unregister(
+      const std::string& name, TRITONSERVER_MemoryType memory_type);
+
+  /// Unregister all shared memory blocks of specified type from the manager.
+  /// \param memory_type The type of memory to unregister.
+  /// \return a TRITONSERVER_Error indicating success or failure.
+  TRITONSERVER_Error* UnregisterAll(TRITONSERVER_MemoryType memory_type);
+
+#ifdef TRITON_ENABLE_GPU
+  /// Add a shared memory block representing shared memory in CUDA
+  /// (GPU) memory to the manager. Return TRITONSERVER_ERROR_ALREADY_EXISTS
+  /// if a shared memory block of the same name already exists in the manager.
+  /// \param name The name of the memory block.
+  /// \param cuda_shm_handle The unique memory handle to the cuda shared
+  /// memory block.
+  /// \param byte_size The size, in bytes of the block.
+  /// \param device id The GPU number the shared memory region is in.
+  /// \return a TRITONSERVER_Error indicating success or failure.
+  TRITONSERVER_Error* RegisterCUDASharedMemory(
+      const std::string& name, const cudaIpcMemHandle_t* cuda_shm_handle,
+      const size_t byte_size, const int device_id);
+
+  /// Get the CUDA memory handle associated with the block name.
+  /// Return TRITONSERVER_ERROR_NOT_FOUND if named block doesn't exist.
+  /// \param name The name of the shared memory block to get.
+  /// \param cuda_mem_handle Returns the cuda memory handle with the memory
+  /// block.
+  /// \return a TRITONSERVER_Error indicating success or failure.
+  TRITONSERVER_Error* GetCUDAHandle(
+      const std::string& name, cudaIpcMemHandle_t** cuda_mem_handle);
+#endif  // TRITON_ENABLE_GPU
+
+ private:
+  /// A struct that records the platform-dependent handle to a shared memory
+  /// file.
+  struct ShmFile {
+#ifdef _WIN32
+    HANDLE shm_handle_;
+    ShmFile(HANDLE shm_handle) : shm_handle_(shm_handle){};
+    ~ShmFile() { CloseHandle(shm_handle_); };
+#else
+    int shm_fd_;
+    ShmFile(int fd) : shm_fd_(fd){};
+    ~ShmFile() { close(fd); };
+#endif  // _WIN32
+  };
+
+  /// Open the shared memory object created by the client.
+  /// \param shm_key The name of the shared memory object
+  /// containing the block of memory.
+  /// \param shm_file The file handle/descriptor of the the
+  /// opened shared memory object.
+  /// \return a TRITONSERVER_Error indicating success or failure.
+  TRITONSERVER_Error* OpenSharedMemoryRegion(
+      const std::string& shm_key, std::shared_ptr<ShmFile>& shm_file);
+
+  /// Get the size of the shared memory region.
+  /// \param shm_key The name of the shared memory object
+  /// containing the block of memory.
+  /// \param shm_file The file handle/descriptor of the the
+  /// opened shared memory object.
+  /// \param shm_region_size A pointer to store the size of the
+  /// shared memory region.
+  /// \return a TRITONSERVER_Error indicating success or failure.
+  TRITONSERVER_Error* GetSharedMemoryRegionSize(
+      const std::string& shm_key, ShmFile* shm_file, size_t* shm_region_size);
+
+  /// Validate that offset + byte_size does not exceed the size of
+  /// the registered shared memory region.
+  /// \param name The name of the memory block.
+  /// \param shm_key The name of the shared memory object
+  /// containing the block of memory.
+  /// \param shm_file The file handle/descriptor of the the
+  /// opened shared memory object.
+  /// \param offset The offset within the shared memory object to the
+  /// start of the block.
+  /// \param byte_size The size, in bytes of the block.
+  /// \return a TRITONSERVER_Error indicating success or failure.
+  TRITONSERVER_Error* CheckSharedMemoryRegionSize(
+      const std::string& name, const std::string& shm_key, ShmFile* shm_file,
+      size_t offset, size_t byte_size);
+
+  /// Close the shared memory object.
+  /// \param shm_file The file handle/descriptor of the the
+  /// open shared memory object.
+  /// \return a TRITONSERVER_Error indicating success or failure.
+  TRITONSERVER_Error* CloseSharedMemoryRegion(ShmFile* shm_file);
+
+  /// Maps a view of the shared object into the address space of
+  /// a calling process.
+  /// \param shm_file The file handle/descriptor of the the
+  /// opened shared memory object.
+  /// \param offset The offset within the shared memory object to the
+  /// start of the block.
+  /// \param byte_size The size, in bytes of the block.
+  /// \param mapped_addr A pointer to the starting address of the mapped
+  /// view.
+  /// \return a TRITONSERVER_Error indicating success or failure.
+  TRITONSERVER_Error* MapSharedMemory(
+      ShmFile* shm_file, const size_t offset, const size_t byte_size,
+      void** mapped_addr);
+
+  /// A helper function to remove the named shared memory blocks of
+  /// specified type
+  TRITONSERVER_Error* UnregisterHelper(
+      const std::string& name, TRITONSERVER_MemoryType memory_type);
+
+  /// A struct that records the shared memory regions registered by the shared
+  /// memory manager.
+  struct SharedMemoryInfo {
+    SharedMemoryInfo(
+        const std::string& name, const std::string& shm_key,
+        const size_t offset, const size_t byte_size,
+        std::shared_ptr<ShmFile> shm_file, void* mapped_addr,
+        const TRITONSERVER_MemoryType kind, const int64_t device_id)
+        : name_(name), shm_key_(shm_key), offset_(offset),
+          platform_handle_(shm_file), byte_size_(byte_size),
+          mapped_addr_(mapped_addr), kind_(kind), device_id_(device_id)
+    {
+    }
+
+    std::string name_;
+    std::string shm_key_;
+    size_t offset_;
+    std::shared_ptr<ShmFile> platform_handle_;
+    size_t byte_size_;
+    void* mapped_addr_;
+    TRITONSERVER_MemoryType kind_;
+    int64_t device_id_;
+  };
+
+#ifdef TRITON_ENABLE_GPU
+  struct CUDASharedMemoryInfo : SharedMemoryInfo {
+    CUDASharedMemoryInfo(
+        const std::string& name, const std::string& shm_key,
+        const size_t offset, const size_t byte_size,
+        std::shared_ptr<ShmFile> shm_file, void* mapped_addr,
+        const TRITONSERVER_MemoryType kind, const int64_t device_id,
+        const cudaIpcMemHandle_t* cuda_ipc_handle)
+        : SharedMemoryInfo(
+              name, shm_key, offset, byte_size, shm_file, mapped_addr, kind,
+              device_id),
+          cuda_ipc_handle_(*cuda_ipc_handle)
+    {
+    }
+
+    cudaIpcMemHandle_t cuda_ipc_handle_;
+  };
+#endif  // TRITON_ENABLE_GPU
+
+  using SharedMemoryStateMap =
+      std::map<std::string, std::unique_ptr<SharedMemoryInfo>>;
+  // A map between the name and the details of the associated
+  // shared memory block
+  SharedMemoryStateMap shared_memory_map_;
+  // A mutex to protect the concurrent access to shared_memory_map_
+  std::mutex mu_;
+};
+}}  // namespace triton::server
diff --git a/src/simple.cc b/src/simple.cc
new file mode 100644
index 0000000000..c429861cc5
--- /dev/null
+++ b/src/simple.cc
@@ -0,0 +1,1007 @@
+// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <rapidjson/document.h>
+#include <rapidjson/error/en.h>
+#include <unistd.h>
+
+#include <chrono>
+#include <cstring>
+#include <future>
+#include <iostream>
+#include <string>
+#include <thread>
+#include <unordered_map>
+#include <vector>
+
+#include "common.h"
+#include "triton/core/tritonserver.h"
+
+#ifdef TRITON_ENABLE_GPU
+#include <cuda_runtime_api.h>
+#endif  // TRITON_ENABLE_GPU
+
+namespace ni = triton::server;
+
+namespace {
+
+bool enforce_memory_type = false;
+TRITONSERVER_MemoryType requested_memory_type;
+
+#ifdef TRITON_ENABLE_GPU
+static auto cuda_data_deleter = [](void* data) {
+  if (data != nullptr) {
+    cudaPointerAttributes attr;
+    auto cuerr = cudaPointerGetAttributes(&attr, data);
+    if (cuerr != cudaSuccess) {
+      std::cerr << "error: failed to get CUDA pointer attribute of " << data
+                << ": " << cudaGetErrorString(cuerr) << std::endl;
+    }
+    if (attr.type == cudaMemoryTypeDevice) {
+      cuerr = cudaFree(data);
+    } else if (attr.type == cudaMemoryTypeHost) {
+      cuerr = cudaFreeHost(data);
+    }
+    if (cuerr != cudaSuccess) {
+      std::cerr << "error: failed to release CUDA pointer " << data << ": "
+                << cudaGetErrorString(cuerr) << std::endl;
+    }
+  }
+};
+#endif  // TRITON_ENABLE_GPU
+
+void
+Usage(char** argv, const std::string& msg = std::string())
+{
+  if (!msg.empty()) {
+    std::cerr << msg << std::endl;
+  }
+
+  std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
+  std::cerr << "\t-m <\"system\"|\"pinned\"|gpu>"
+            << " Enforce the memory type for input and output tensors."
+            << " If not specified, inputs will be in system memory and outputs"
+            << " will be based on the model's preferred type." << std::endl;
+  std::cerr << "\t-v Enable verbose logging" << std::endl;
+  std::cerr << "\t-r [model repository absolute path]" << std::endl;
+
+  exit(1);
+}
+
+TRITONSERVER_Error*
+ResponseAlloc(
+    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
+    size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
+    int64_t preferred_memory_type_id, void* userp, void** buffer,
+    void** buffer_userp, TRITONSERVER_MemoryType* actual_memory_type,
+    int64_t* actual_memory_type_id)
+{
+  // Initially attempt to make the actual memory type and id that we
+  // allocate be the same as preferred memory type
+  *actual_memory_type = preferred_memory_type;
+  *actual_memory_type_id = preferred_memory_type_id;
+
+  // If 'byte_size' is zero just return 'buffer' == nullptr, we don't
+  // need to do any other book-keeping.
+  if (byte_size == 0) {
+    *buffer = nullptr;
+    *buffer_userp = nullptr;
+    std::cout << "allocated " << byte_size << " bytes for result tensor "
+              << tensor_name << std::endl;
+  } else {
+    void* allocated_ptr = nullptr;
+    if (enforce_memory_type) {
+      *actual_memory_type = requested_memory_type;
+    }
+
+    switch (*actual_memory_type) {
+#ifdef TRITON_ENABLE_GPU
+      case TRITONSERVER_MEMORY_CPU_PINNED: {
+        auto err = cudaSetDevice(*actual_memory_type_id);
+        if ((err != cudaSuccess) && (err != cudaErrorNoDevice) &&
+            (err != cudaErrorInsufficientDriver)) {
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INTERNAL,
+              std::string(
+                  "unable to recover current CUDA device: " +
+                  std::string(cudaGetErrorString(err)))
+                  .c_str());
+        }
+
+        err = cudaHostAlloc(&allocated_ptr, byte_size, cudaHostAllocPortable);
+        if (err != cudaSuccess) {
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INTERNAL,
+              std::string(
+                  "cudaHostAlloc failed: " +
+                  std::string(cudaGetErrorString(err)))
+                  .c_str());
+        }
+        break;
+      }
+
+      case TRITONSERVER_MEMORY_GPU: {
+        auto err = cudaSetDevice(*actual_memory_type_id);
+        if ((err != cudaSuccess) && (err != cudaErrorNoDevice) &&
+            (err != cudaErrorInsufficientDriver)) {
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INTERNAL,
+              std::string(
+                  "unable to recover current CUDA device: " +
+                  std::string(cudaGetErrorString(err)))
+                  .c_str());
+        }
+
+        err = cudaMalloc(&allocated_ptr, byte_size);
+        if (err != cudaSuccess) {
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INTERNAL,
+              std::string(
+                  "cudaMalloc failed: " + std::string(cudaGetErrorString(err)))
+                  .c_str());
+        }
+        break;
+      }
+#endif  // TRITON_ENABLE_GPU
+
+      // Use CPU memory if the requested memory type is unknown
+      // (default case).
+      case TRITONSERVER_MEMORY_CPU:
+      default: {
+        *actual_memory_type = TRITONSERVER_MEMORY_CPU;
+        allocated_ptr = malloc(byte_size);
+        break;
+      }
+    }
+
+    // Pass the tensor name with buffer_userp so we can show it when
+    // releasing the buffer.
+    if (allocated_ptr != nullptr) {
+      *buffer = allocated_ptr;
+      *buffer_userp = new std::string(tensor_name);
+      std::cout << "allocated " << byte_size << " bytes in "
+                << TRITONSERVER_MemoryTypeString(*actual_memory_type)
+                << " for result tensor " << tensor_name << std::endl;
+    }
+  }
+
+  return nullptr;  // Success
+}
+
+TRITONSERVER_Error*
+ResponseRelease(
+    TRITONSERVER_ResponseAllocator* allocator, void* buffer, void* buffer_userp,
+    size_t byte_size, TRITONSERVER_MemoryType memory_type,
+    int64_t memory_type_id)
+{
+  std::string* name = nullptr;
+  if (buffer_userp != nullptr) {
+    name = reinterpret_cast<std::string*>(buffer_userp);
+  } else {
+    name = new std::string("<unknown>");
+  }
+
+  std::cout << "Releasing buffer " << buffer << " of size " << byte_size
+            << " in " << TRITONSERVER_MemoryTypeString(memory_type)
+            << " for result '" << *name << "'" << std::endl;
+  switch (memory_type) {
+    case TRITONSERVER_MEMORY_CPU:
+      free(buffer);
+      break;
+#ifdef TRITON_ENABLE_GPU
+    case TRITONSERVER_MEMORY_CPU_PINNED: {
+      auto err = cudaSetDevice(memory_type_id);
+      if (err == cudaSuccess) {
+        err = cudaFreeHost(buffer);
+      }
+      if (err != cudaSuccess) {
+        std::cerr << "error: failed to cudaFree " << buffer << ": "
+                  << cudaGetErrorString(err) << std::endl;
+      }
+      break;
+    }
+    case TRITONSERVER_MEMORY_GPU: {
+      auto err = cudaSetDevice(memory_type_id);
+      if (err == cudaSuccess) {
+        err = cudaFree(buffer);
+      }
+      if (err != cudaSuccess) {
+        std::cerr << "error: failed to cudaFree " << buffer << ": "
+                  << cudaGetErrorString(err) << std::endl;
+      }
+      break;
+    }
+#endif  // TRITON_ENABLE_GPU
+    default:
+      std::cerr << "error: unexpected buffer allocated in CUDA managed memory"
+                << std::endl;
+      break;
+  }
+
+  delete name;
+
+  return nullptr;  // Success
+}
+
+void
+InferRequestRelease(
+    TRITONSERVER_InferenceRequest* request, const uint32_t flags, void* userp)
+{
+  std::promise<void>* barrier = reinterpret_cast<std::promise<void>*>(userp);
+  barrier->set_value();
+}
+
+void
+InferResponseComplete(
+    TRITONSERVER_InferenceResponse* response, const uint32_t flags, void* userp)
+{
+  if (response != nullptr) {
+    // Send 'response' to the future.
+    std::promise<TRITONSERVER_InferenceResponse*>* p =
+        reinterpret_cast<std::promise<TRITONSERVER_InferenceResponse*>*>(userp);
+    p->set_value(response);
+    delete p;
+  }
+}
+
+TRITONSERVER_Error*
+ParseModelMetadata(
+    const rapidjson::Document& model_metadata, bool* is_int,
+    bool* is_torch_model)
+{
+  std::string seen_data_type;
+  for (const auto& input : model_metadata["inputs"].GetArray()) {
+    if (strcmp(input["datatype"].GetString(), "INT32") &&
+        strcmp(input["datatype"].GetString(), "FP32")) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_UNSUPPORTED,
+          "simple lib example only supports model with data type INT32 or "
+          "FP32");
+    }
+    if (seen_data_type.empty()) {
+      seen_data_type = input["datatype"].GetString();
+    } else if (strcmp(seen_data_type.c_str(), input["datatype"].GetString())) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG,
+          "the inputs and outputs of 'simple' model must have the data type");
+    }
+  }
+  for (const auto& output : model_metadata["outputs"].GetArray()) {
+    if (strcmp(output["datatype"].GetString(), "INT32") &&
+        strcmp(output["datatype"].GetString(), "FP32")) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_UNSUPPORTED,
+          "simple lib example only supports model with data type INT32 or "
+          "FP32");
+    } else if (strcmp(seen_data_type.c_str(), output["datatype"].GetString())) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG,
+          "the inputs and outputs of 'simple' model must have the data type");
+    }
+  }
+
+  *is_int = (strcmp(seen_data_type.c_str(), "INT32") == 0);
+  *is_torch_model =
+      (strcmp(model_metadata["platform"].GetString(), "pytorch_libtorch") == 0);
+  return nullptr;
+}
+
+template <typename T>
+void
+GenerateInputData(
+    std::vector<char>* input0_data, std::vector<char>* input1_data)
+{
+  input0_data->resize(16 * sizeof(T));
+  input1_data->resize(16 * sizeof(T));
+  for (size_t i = 0; i < 16; ++i) {
+    ((T*)input0_data->data())[i] = i;
+    ((T*)input1_data->data())[i] = 1;
+  }
+}
+
+template <typename T>
+void
+CompareResult(
+    const std::string& output0_name, const std::string& output1_name,
+    const void* input0, const void* input1, const char* output0,
+    const char* output1)
+{
+  for (size_t i = 0; i < 16; ++i) {
+    std::cout << ((T*)input0)[i] << " + " << ((T*)input1)[i] << " = "
+              << ((T*)output0)[i] << std::endl;
+    std::cout << ((T*)input0)[i] << " - " << ((T*)input1)[i] << " = "
+              << ((T*)output1)[i] << std::endl;
+
+    if ((((T*)input0)[i] + ((T*)input1)[i]) != ((T*)output0)[i]) {
+      FAIL("incorrect sum in " + output0_name);
+    }
+    if ((((T*)input0)[i] - ((T*)input1)[i]) != ((T*)output1)[i]) {
+      FAIL("incorrect difference in " + output1_name);
+    }
+  }
+}
+
+void
+Check(
+    TRITONSERVER_InferenceResponse* response,
+    const std::vector<char>& input0_data, const std::vector<char>& input1_data,
+    const std::string& output0, const std::string& output1,
+    const size_t expected_byte_size,
+    const TRITONSERVER_DataType expected_datatype, const bool is_int)
+{
+  std::unordered_map<std::string, std::vector<char>> output_data;
+
+  uint32_t output_count;
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceResponseOutputCount(response, &output_count),
+      "getting number of response outputs");
+  if (output_count != 2) {
+    FAIL("expecting 2 response outputs, got " + std::to_string(output_count));
+  }
+
+  for (uint32_t idx = 0; idx < output_count; ++idx) {
+    const char* cname;
+    TRITONSERVER_DataType datatype;
+    const int64_t* shape;
+    uint64_t dim_count;
+    const void* base;
+    size_t byte_size;
+    TRITONSERVER_MemoryType memory_type;
+    int64_t memory_type_id;
+    void* userp;
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceResponseOutput(
+            response, idx, &cname, &datatype, &shape, &dim_count, &base,
+            &byte_size, &memory_type, &memory_type_id, &userp),
+        "getting output info");
+
+    if (cname == nullptr) {
+      FAIL("unable to get output name");
+    }
+
+    std::string name(cname);
+    if ((name != output0) && (name != output1)) {
+      FAIL("unexpected output '" + name + "'");
+    }
+
+    if ((dim_count != 2) || (shape[0] != 1) || (shape[1] != 16)) {
+      FAIL("unexpected shape for '" + name + "'");
+    }
+
+    if (datatype != expected_datatype) {
+      FAIL(
+          "unexpected datatype '" +
+          std::string(TRITONSERVER_DataTypeString(datatype)) + "' for '" +
+          name + "'");
+    }
+
+    if (byte_size != expected_byte_size) {
+      FAIL(
+          "unexpected byte-size, expected " +
+          std::to_string(expected_byte_size) + ", got " +
+          std::to_string(byte_size) + " for " + name);
+    }
+
+    if (enforce_memory_type && (memory_type != requested_memory_type)) {
+      FAIL(
+          "unexpected memory type, expected to be allocated in " +
+          std::string(TRITONSERVER_MemoryTypeString(requested_memory_type)) +
+          ", got " + std::string(TRITONSERVER_MemoryTypeString(memory_type)) +
+          ", id " + std::to_string(memory_type_id) + " for " + name);
+    }
+
+    // We make a copy of the data here... which we could avoid for
+    // performance reasons but ok for this simple example.
+    std::vector<char>& odata = output_data[name];
+    switch (memory_type) {
+      case TRITONSERVER_MEMORY_CPU: {
+        std::cout << name << " is stored in system memory" << std::endl;
+        const char* cbase = reinterpret_cast<const char*>(base);
+        odata.assign(cbase, cbase + byte_size);
+        break;
+      }
+
+      case TRITONSERVER_MEMORY_CPU_PINNED: {
+        std::cout << name << " is stored in pinned memory" << std::endl;
+        const char* cbase = reinterpret_cast<const char*>(base);
+        odata.assign(cbase, cbase + byte_size);
+        break;
+      }
+
+#ifdef TRITON_ENABLE_GPU
+      case TRITONSERVER_MEMORY_GPU: {
+        std::cout << name << " is stored in GPU memory" << std::endl;
+        odata.reserve(byte_size);
+        FAIL_IF_CUDA_ERR(
+            cudaMemcpy(&odata[0], base, byte_size, cudaMemcpyDeviceToHost),
+            "getting " + name + " data from GPU memory");
+        break;
+      }
+#endif
+
+      default:
+        FAIL("unexpected memory type");
+    }
+  }
+
+  if (is_int) {
+    CompareResult<int32_t>(
+        output0, output1, &input0_data[0], &input1_data[0],
+        output_data[output0].data(), output_data[output1].data());
+  } else {
+    CompareResult<float>(
+        output0, output1, &input0_data[0], &input1_data[0],
+        output_data[output0].data(), output_data[output1].data());
+  }
+}
+
+}  // namespace
+
+int
+main(int argc, char** argv)
+{
+  std::string model_repository_path;
+  int verbose_level = 0;
+
+  // Parse commandline...
+  int opt;
+  while ((opt = getopt(argc, argv, "vm:r:")) != -1) {
+    switch (opt) {
+      case 'm': {
+        enforce_memory_type = true;
+        if (!strcmp(optarg, "system")) {
+          requested_memory_type = TRITONSERVER_MEMORY_CPU;
+        } else if (!strcmp(optarg, "pinned")) {
+          requested_memory_type = TRITONSERVER_MEMORY_CPU_PINNED;
+        } else if (!strcmp(optarg, "gpu")) {
+          requested_memory_type = TRITONSERVER_MEMORY_GPU;
+        } else {
+          Usage(
+              argv,
+              "-m must be used to specify one of the following types:"
+              " <\"system\"|\"pinned\"|gpu>");
+        }
+        break;
+      }
+      case 'r':
+        model_repository_path = optarg;
+        break;
+      case 'v':
+        verbose_level = 1;
+        break;
+      case '?':
+        Usage(argv);
+        break;
+    }
+  }
+
+  if (model_repository_path.empty()) {
+    Usage(argv, "-r must be used to specify model repository path");
+  }
+#ifndef TRITON_ENABLE_GPU
+  if (enforce_memory_type && requested_memory_type != TRITONSERVER_MEMORY_CPU) {
+    Usage(argv, "-m can only be set to \"system\" without enabling GPU");
+  }
+#endif  // TRITON_ENABLE_GPU
+
+  // Check API version. This compares the API version of the
+  // triton-server library linked into this application against the
+  // API version of the header file used when compiling this
+  // application. The API version of the shared library must be >= the
+  // API version used when compiling this application.
+  uint32_t api_version_major, api_version_minor;
+  FAIL_IF_ERR(
+      TRITONSERVER_ApiVersion(&api_version_major, &api_version_minor),
+      "getting Triton API version");
+  if ((TRITONSERVER_API_VERSION_MAJOR != api_version_major) ||
+      (TRITONSERVER_API_VERSION_MINOR > api_version_minor)) {
+    FAIL("triton server API version mismatch");
+  }
+
+  // Create the option setting to use when creating the inference
+  // server object.
+  TRITONSERVER_ServerOptions* server_options = nullptr;
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsNew(&server_options),
+      "creating server options");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetModelRepositoryPath(
+          server_options, model_repository_path.c_str()),
+      "setting model repository path");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetLogVerbose(server_options, verbose_level),
+      "setting verbose logging level");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetBackendDirectory(
+          server_options, "/opt/tritonserver/backends"),
+      "setting backend directory");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetRepoAgentDirectory(
+          server_options, "/opt/tritonserver/repoagents"),
+      "setting repository agent directory");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetStrictModelConfig(server_options, true),
+      "setting strict model configuration");
+#ifdef TRITON_ENABLE_GPU
+  double min_compute_capability = TRITON_MIN_COMPUTE_CAPABILITY;
+#else
+  double min_compute_capability = 0;
+#endif  // TRITON_ENABLE_GPU
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability(
+          server_options, min_compute_capability),
+      "setting minimum supported CUDA compute capability");
+
+  // Create the server object using the option settings. The server
+  // object encapsulates all the functionality of the Triton server
+  // and allows access to the Triton server API. Typically only a
+  // single server object is needed by an application, but it is
+  // allowed to create multiple server objects within a single
+  // application. After the server object is created the server
+  // options can be deleted.
+  TRITONSERVER_Server* server_ptr = nullptr;
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerNew(&server_ptr, server_options),
+      "creating server object");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsDelete(server_options),
+      "deleting server options");
+
+  // Use a shared_ptr to manage the lifetime of the server object.
+  std::shared_ptr<TRITONSERVER_Server> server(
+      server_ptr, TRITONSERVER_ServerDelete);
+
+  // Wait until the server is both live and ready. The server will not
+  // appear "ready" until all models are loaded and ready to receive
+  // inference requests.
+  size_t health_iters = 0;
+  while (true) {
+    bool live, ready;
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerIsLive(server.get(), &live),
+        "unable to get server liveness");
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerIsReady(server.get(), &ready),
+        "unable to get server readiness");
+    std::cout << "Server Health: live " << live << ", ready " << ready
+              << std::endl;
+    if (live && ready) {
+      break;
+    }
+
+    if (++health_iters >= 10) {
+      FAIL("failed to find healthy inference server");
+    }
+
+    std::this_thread::sleep_for(std::chrono::milliseconds(500));
+  }
+
+  // Server metadata can be accessed using the server object. The
+  // metadata is returned as an abstract TRITONSERVER_Message that can
+  // be converted to JSON for further processing.
+  {
+    TRITONSERVER_Message* server_metadata_message;
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerMetadata(server.get(), &server_metadata_message),
+        "unable to get server metadata message");
+    const char* buffer;
+    size_t byte_size;
+    FAIL_IF_ERR(
+        TRITONSERVER_MessageSerializeToJson(
+            server_metadata_message, &buffer, &byte_size),
+        "unable to serialize server metadata message");
+
+    std::cout << "Server Metadata:" << std::endl;
+    std::cout << std::string(buffer, byte_size) << std::endl;
+
+    FAIL_IF_ERR(
+        TRITONSERVER_MessageDelete(server_metadata_message),
+        "deleting server metadata message");
+  }
+
+  const std::string model_name("simple");
+
+  // We already waited for the server to be ready, above, so we know
+  // that all models are also ready. But as an example we also wait
+  // for a specific model to become available.
+  bool is_torch_model = false;
+  bool is_int = true;
+  bool is_ready = false;
+  health_iters = 0;
+  while (!is_ready) {
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerModelIsReady(
+            server.get(), model_name.c_str(), 1 /* model_version */, &is_ready),
+        "unable to get model readiness");
+    if (!is_ready) {
+      if (++health_iters >= 10) {
+        FAIL("model failed to be ready in 10 iterations");
+      }
+      std::this_thread::sleep_for(std::chrono::milliseconds(500));
+      continue;
+    }
+
+    TRITONSERVER_Message* model_metadata_message;
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerModelMetadata(
+            server.get(), model_name.c_str(), 1, &model_metadata_message),
+        "unable to get model metadata message");
+    const char* buffer;
+    size_t byte_size;
+    FAIL_IF_ERR(
+        TRITONSERVER_MessageSerializeToJson(
+            model_metadata_message, &buffer, &byte_size),
+        "unable to serialize model metadata");
+
+    // Parse the JSON string that represents the model metadata into a
+    // JSON document. We use rapidjson for this parsing but any JSON
+    // parser can be used.
+    rapidjson::Document model_metadata;
+    model_metadata.Parse(buffer, byte_size);
+    if (model_metadata.HasParseError()) {
+      FAIL(
+          "error: failed to parse model metadata from JSON: " +
+          std::string(GetParseError_En(model_metadata.GetParseError())) +
+          " at " + std::to_string(model_metadata.GetErrorOffset()));
+    }
+
+    FAIL_IF_ERR(
+        TRITONSERVER_MessageDelete(model_metadata_message),
+        "deleting model metadata message");
+
+    // Now that we have a document representation of the model
+    // metadata, we can query it to extract some information about the
+    // model.
+    if (strcmp(model_metadata["name"].GetString(), model_name.c_str())) {
+      FAIL("unable to find metadata for model");
+    }
+
+    bool found_version = false;
+    if (model_metadata.HasMember("versions")) {
+      for (const auto& version : model_metadata["versions"].GetArray()) {
+        if (strcmp(version.GetString(), "1") == 0) {
+          found_version = true;
+          break;
+        }
+      }
+    }
+    if (!found_version) {
+      FAIL("unable to find version 1 status for model");
+    }
+
+    FAIL_IF_ERR(
+        ParseModelMetadata(model_metadata, &is_int, &is_torch_model),
+        "parsing model metadata");
+  }
+
+  // When triton needs a buffer to hold an output tensor, it will ask
+  // us to provide the buffer. In this way we can have any buffer
+  // management and sharing strategy that we want. To communicate to
+  // triton the functions that we want it to call to perform the
+  // allocations, we create a "response allocator" object. We pass
+  // this response allocate object to triton when requesting
+  // inference. We can reuse this response allocate object for any
+  // number of inference requests.
+  TRITONSERVER_ResponseAllocator* allocator = nullptr;
+  FAIL_IF_ERR(
+      TRITONSERVER_ResponseAllocatorNew(
+          &allocator, ResponseAlloc, ResponseRelease, nullptr /* start_fn */),
+      "creating response allocator");
+
+  // Create an inference request object. The inference request object
+  // is where we set the name of the model we want to use for
+  // inference and the input tensors.
+  TRITONSERVER_InferenceRequest* irequest = nullptr;
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestNew(
+          &irequest, server.get(), model_name.c_str(), -1 /* model_version */),
+      "creating inference request");
+
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestSetId(irequest, "my_request_id"),
+      "setting ID for the request");
+
+  std::unique_ptr<std::promise<void>> barrier =
+      std::make_unique<std::promise<void>>();
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestSetReleaseCallback(
+          irequest, InferRequestRelease,
+          reinterpret_cast<void*>(barrier.get())),
+      "setting request release callback");
+  std::future<void> request_release_future = barrier->get_future();
+
+  // Add the 2 input tensors to the request...
+  auto input0 = "INPUT0";
+  auto input1 = "INPUT1";
+
+  std::vector<int64_t> input0_shape({1, 16});
+  std::vector<int64_t> input1_shape({1, 16});
+
+  const TRITONSERVER_DataType datatype =
+      (is_int) ? TRITONSERVER_TYPE_INT32 : TRITONSERVER_TYPE_FP32;
+
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestAddInput(
+          irequest, input0, datatype, &input0_shape[0], input0_shape.size()),
+      "setting input 0 meta-data for the request");
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestAddInput(
+          irequest, input1, datatype, &input1_shape[0], input1_shape.size()),
+      "setting input 1 meta-data for the request");
+
+  auto output0 = is_torch_model ? "OUTPUT__0" : "OUTPUT0";
+  auto output1 = is_torch_model ? "OUTPUT__1" : "OUTPUT1";
+
+  // Indicate that we want both output tensors calculated and returned
+  // for the inference request. These calls are optional, if no
+  // output(s) are specifically requested then all outputs defined by
+  // the model will be calculated and returned.
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output0),
+      "requesting output 0 for the request");
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output1),
+      "requesting output 1 for the request");
+
+  // Create the data for the two input tensors. Initialize the first
+  // to unique values and the second to all ones.
+  std::vector<char> input0_data;
+  std::vector<char> input1_data;
+  if (is_int) {
+    GenerateInputData<int32_t>(&input0_data, &input1_data);
+  } else {
+    GenerateInputData<float>(&input0_data, &input1_data);
+  }
+
+  size_t input0_size = input0_data.size();
+  size_t input1_size = input1_data.size();
+
+  const void* input0_base = &input0_data[0];
+  const void* input1_base = &input1_data[0];
+#ifdef TRITON_ENABLE_GPU
+  std::unique_ptr<void, decltype(cuda_data_deleter)> input0_gpu(
+      nullptr, cuda_data_deleter);
+  std::unique_ptr<void, decltype(cuda_data_deleter)> input1_gpu(
+      nullptr, cuda_data_deleter);
+  bool use_cuda_memory =
+      (enforce_memory_type &&
+       (requested_memory_type != TRITONSERVER_MEMORY_CPU));
+  if (use_cuda_memory) {
+    FAIL_IF_CUDA_ERR(cudaSetDevice(0), "setting CUDA device to device 0");
+    if (requested_memory_type != TRITONSERVER_MEMORY_CPU_PINNED) {
+      void* dst;
+      FAIL_IF_CUDA_ERR(
+          cudaMalloc(&dst, input0_size),
+          "allocating GPU memory for INPUT0 data");
+      input0_gpu.reset(dst);
+      FAIL_IF_CUDA_ERR(
+          cudaMemcpy(dst, &input0_data[0], input0_size, cudaMemcpyHostToDevice),
+          "setting INPUT0 data in GPU memory");
+      FAIL_IF_CUDA_ERR(
+          cudaMalloc(&dst, input1_size),
+          "allocating GPU memory for INPUT1 data");
+      input1_gpu.reset(dst);
+      FAIL_IF_CUDA_ERR(
+          cudaMemcpy(dst, &input1_data[0], input1_size, cudaMemcpyHostToDevice),
+          "setting INPUT1 data in GPU memory");
+    } else {
+      void* dst;
+      FAIL_IF_CUDA_ERR(
+          cudaHostAlloc(&dst, input0_size, cudaHostAllocPortable),
+          "allocating pinned memory for INPUT0 data");
+      input0_gpu.reset(dst);
+      FAIL_IF_CUDA_ERR(
+          cudaMemcpy(dst, &input0_data[0], input0_size, cudaMemcpyHostToHost),
+          "setting INPUT0 data in pinned memory");
+      FAIL_IF_CUDA_ERR(
+          cudaHostAlloc(&dst, input1_size, cudaHostAllocPortable),
+          "allocating pinned memory for INPUT1 data");
+      input1_gpu.reset(dst);
+      FAIL_IF_CUDA_ERR(
+          cudaMemcpy(dst, &input1_data[0], input1_size, cudaMemcpyHostToHost),
+          "setting INPUT1 data in pinned memory");
+    }
+  }
+
+  input0_base = use_cuda_memory ? input0_gpu.get() : &input0_data[0];
+  input1_base = use_cuda_memory ? input1_gpu.get() : &input1_data[0];
+#endif  // TRITON_ENABLE_GPU
+
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestAppendInputData(
+          irequest, input0, input0_base, input0_size, requested_memory_type,
+          0 /* memory_type_id */),
+      "assigning INPUT0 data");
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestAppendInputData(
+          irequest, input1, input1_base, input1_size, requested_memory_type,
+          0 /* memory_type_id */),
+      "assigning INPUT1 data");
+
+  // Perform inference by calling TRITONSERVER_ServerInferAsync. This
+  // call is asynchronous and therefore returns immediately. The
+  // completion of the inference and delivery of the response is done
+  // by triton by calling the "response complete" callback functions
+  // (InferResponseComplete in this case).
+  {
+    auto p = new std::promise<TRITONSERVER_InferenceResponse*>();
+    std::future<TRITONSERVER_InferenceResponse*> completed = p->get_future();
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestSetResponseCallback(
+            irequest, allocator, nullptr /* response_allocator_userp */,
+            InferResponseComplete, reinterpret_cast<void*>(p)),
+        "setting response callback");
+
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerInferAsync(
+            server.get(), irequest, nullptr /* trace */),
+        "running inference");
+
+    // The InferResponseComplete function sets the std::promise so
+    // that this thread will block until the response is returned.
+    TRITONSERVER_InferenceResponse* completed_response = completed.get();
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceResponseError(completed_response),
+        "response status");
+
+    Check(
+        completed_response, input0_data, input1_data, output0, output1,
+        input0_size, datatype, is_int);
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceResponseDelete(completed_response),
+        "deleting inference response");
+  }
+
+  // The TRITONSERVER_InferenceRequest object can be reused for
+  // multiple (sequential) inference requests. For example, if we have
+  // multiple requests where the inference request is the same except
+  // for different input tensor data, then we can just change the
+  // input data buffers. Below some input data is changed in place and
+  // then another inference request is issued. For simplicity we only
+  // do this when the input tensors are in non-pinned system memory.
+  if (!enforce_memory_type ||
+      (requested_memory_type == TRITONSERVER_MEMORY_CPU)) {
+    if (is_int) {
+      int32_t* input0_base = reinterpret_cast<int32_t*>(&input0_data[0]);
+      input0_base[0] = 27;
+    } else {
+      float* input0_base = reinterpret_cast<float*>(&input0_data[0]);
+      input0_base[0] = 27.0;
+    }
+
+    auto p = new std::promise<TRITONSERVER_InferenceResponse*>();
+    std::future<TRITONSERVER_InferenceResponse*> completed = p->get_future();
+
+    // Using a new promise so have to re-register the callback to set
+    // the promise as the userp.
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestSetResponseCallback(
+            irequest, allocator, nullptr /* response_allocator_userp */,
+            InferResponseComplete, reinterpret_cast<void*>(p)),
+        "setting response callback");
+
+    // We need to make sure that the previous request was released before
+    // reusing it.
+    request_release_future.get();
+
+    // Register a new promise for the request callback barrier.
+    barrier = std::make_unique<std::promise<void>>();
+    request_release_future = barrier->get_future();
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestSetReleaseCallback(
+            irequest, InferRequestRelease,
+            reinterpret_cast<void*>(barrier.get())),
+        "setting request release callback");
+
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerInferAsync(
+            server.get(), irequest, nullptr /* trace */),
+        "running inference");
+
+    TRITONSERVER_InferenceResponse* completed_response = completed.get();
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceResponseError(completed_response),
+        "response status");
+
+    Check(
+        completed_response, input0_data, input1_data, output0, output1,
+        input0_size, datatype, is_int);
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceResponseDelete(completed_response),
+        "deleting inference response");
+  }
+
+  // There are other TRITONSERVER_InferenceRequest APIs that allow
+  // other in-place modifications so that the object can be reused for
+  // multiple (sequential) inference requests. For example, we can
+  // assign a new data buffer for an input by first removing the
+  // existing data with
+  // TRITONSERVER_InferenceRequestRemoveAllInputData.
+  {
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestRemoveAllInputData(irequest, input0),
+        "removing INPUT0 data");
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestAppendInputData(
+            irequest, input0, input1_base, input1_size, requested_memory_type,
+            0 /* memory_type_id */),
+        "assigning INPUT1 data to INPUT0");
+
+    auto p = new std::promise<TRITONSERVER_InferenceResponse*>();
+    std::future<TRITONSERVER_InferenceResponse*> completed = p->get_future();
+
+    // Using a new promise so have to re-register the callback to set
+    // the promise as the userp.
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestSetResponseCallback(
+            irequest, allocator, nullptr /* response_allocator_userp */,
+            InferResponseComplete, reinterpret_cast<void*>(p)),
+        "setting response callback");
+
+    // Register a new promise for the request callback barrier.
+    barrier = std::make_unique<std::promise<void>>();
+    request_release_future.get();
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestSetReleaseCallback(
+            irequest, InferRequestRelease,
+            reinterpret_cast<void*>(barrier.get())),
+        "setting request release callback");
+
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerInferAsync(
+            server.get(), irequest, nullptr /* trace */),
+        "running inference");
+
+    TRITONSERVER_InferenceResponse* completed_response = completed.get();
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceResponseError(completed_response),
+        "response status");
+
+    // Both inputs are using input1_data...
+    Check(
+        completed_response, input1_data, input1_data, output0, output1,
+        input0_size, datatype, is_int);
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceResponseDelete(completed_response),
+        "deleting inference response");
+  }
+
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestDelete(irequest),
+      "deleting inference request");
+
+  FAIL_IF_ERR(
+      TRITONSERVER_ResponseAllocatorDelete(allocator),
+      "deleting response allocator");
+
+  return 0;
+}
diff --git a/src/test/BUILD b/src/test/BUILD
deleted file mode 100644
index 854451f45f..0000000000
--- a/src/test/BUILD
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-package(
-    default_visibility = ["//visibility:public"],
-)
-
-filegroup(
-    name = "test_testdata",
-    srcs = glob(
-        ["testdata/**/*"]),
-)
-
-cc_library(
-    name = "model_config_test_base",
-    testonly = 1,
-    hdrs = ["model_config_test_base.h"],
-    srcs = ["model_config_test_base.cc"],
-    data = [
-        ":test_testdata",
-    ],
-    deps = [
-        "//src/core:constants",
-        "//src/core:logging",
-        "//src/core:model_config_proto",
-        "//src/core:utils",
-        "@com_google_googletest//:gtest",
-        "@org_tensorflow//tensorflow/core:testlib",
-    ],
-)
-
-cc_library(
-    name = "testmain",
-    testonly = 1,
-    srcs = ["testmain.cc"],
-    deps = [
-        "@com_google_googletest//:gtest",
-        "@org_tensorflow//tensorflow/core:testlib",
-    ],
-)
-
-cc_library(
-    name = "caffe2plan_main",
-    srcs = ["caffe2plan.cc"],
-    deps = [
-        "@local_config_cuda//cuda:cuda_headers",
-    ],
-)
-
-cc_binary(
-    name = "caffe2plan",
-    deps = [
-        ":caffe2plan_main",
-        "@local_config_cuda//cuda:cudart",
-    ],
-    linkopts = [
-        "-lnvinfer",
-        "-lnvcaffe_parser",
-    ],
-)
diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt
new file mode 100644
index 0000000000..1d3e00c40a
--- /dev/null
+++ b/src/test/CMakeLists.txt
@@ -0,0 +1,110 @@
+# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+cmake_minimum_required (VERSION 3.18)
+
+#
+# Unit tests
+#
+find_package(GTest REQUIRED)
+
+#
+# caffe2plan
+#
+if(${TRITON_ENABLE_TENSORRT} AND NOT ${TRITON_IGPU_BUILD})
+  add_executable(caffe2plan caffe2plan.cc)
+  target_include_directories(caffe2plan PRIVATE ${CUDA_INCLUDE_DIRS})
+  target_link_libraries(
+    caffe2plan
+    PRIVATE
+      -lnvinfer
+      -lnvcaffe_parser
+      CUDA::cudart
+  )
+
+  install(
+    TARGETS caffe2plan
+    RUNTIME DESTINATION bin
+  )
+endif() # TRITON_ENABLE_TENSORRT
+
+#
+# Unit test for DataCompressor
+#
+if(${TRITON_ENABLE_HTTP} OR ${TRITON_ENABLE_METRICS} OR
+    ${TRITON_ENABLE_SAGEMAKER} OR ${TRITON_ENABLE_VERTEX_AI})
+  add_executable(
+    data_compressor_test
+    data_compressor_test.cc
+    ../data_compressor.h
+    ../common.h
+  )
+
+  set_target_properties(
+    data_compressor_test
+    PROPERTIES
+      SKIP_BUILD_RPATH TRUE
+      BUILD_WITH_INSTALL_RPATH TRUE
+      INSTALL_RPATH_USE_LINK_PATH FALSE
+      INSTALL_RPATH ""
+  )
+
+  target_include_directories(
+    data_compressor_test
+    PRIVATE
+      ${CMAKE_CURRENT_SOURCE_DIR}/..
+      ${GTEST_INCLUDE_DIRS}
+      ${LIBEVENT_INCLUDE_DIRS}
+  )
+
+  target_link_libraries(
+    data_compressor_test
+    PRIVATE
+      triton-core-serverapi   # from repo-core
+      triton-core-serverstub  # from repo-core
+      GTest::gtest
+      GTest::gtest_main
+      ${LIBEVENT_LIBRARIES}
+      -lz
+  )
+
+  install(
+    TARGETS data_compressor_test
+    RUNTIME DESTINATION bin
+  )
+endif()
+
+add_subdirectory(repoagent/relocation_repoagent repoagent/relocation_repoagent)
+
+add_subdirectory(distributed_addsub distributed_addsub)
+add_subdirectory(dyna_sequence dyna_sequence)
+add_subdirectory(iterative_sequence iterative_sequence)
+add_subdirectory(query_backend query_backend)
+
+if(${TRITON_ENABLE_GPU})
+  add_subdirectory(sequence sequence)
+  add_subdirectory(implicit_state implicit_state)
+endif()
diff --git a/src/test/caffe2plan.cc b/src/test/caffe2plan.cc
index f548839921..301129f10a 100644
--- a/src/test/caffe2plan.cc
+++ b/src/test/caffe2plan.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
+// Copyright 2018-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -26,22 +26,24 @@
 
 #include <NvCaffeParser.h>
 #include <NvInfer.h>
+#include <cuda_runtime_api.h>
 #include <errno.h>
 #include <stddef.h>
 #include <unistd.h>
+
 #include <algorithm>
 #include <cstring>
 #include <fstream>
 #include <iostream>
 #include <iterator>
 #include <vector>
-#include "cuda/include/cuda_runtime_api.h"
 
 namespace {
 
 class Logger : public nvinfer1::ILogger {
  public:
-  void log(nvinfer1::ILogger::Severity severity, const char* msg) override
+  void log(
+      nvinfer1::ILogger::Severity severity, const char* msg) noexcept override
   {
     // Don't show INFO messages...
     if (severity == Severity::kINFO) {
@@ -93,13 +95,13 @@ class BatchStream {
       std::cerr << "Unexpected BatchStream failure" << std::endl;
       exit(1);
     }
-    mDims = nvinfer1::DimsNCHW{d[0], d[1], d[2], d[3]};
+    mDims = nvinfer1::Dims4{d[0], d[1], d[2], d[3]};
     fclose(file);
-    mImageSize = mDims.c() * mDims.h() * mDims.w();
+    mImageSize = mDims.d[1] * mDims.d[2] * mDims.d[3];
     mBatch.resize(mBatchSize * mImageSize, 0);
     mLabels.resize(mBatchSize, 0);
-    mFileBatch.resize(mDims.n() * mImageSize, 0);
-    mFileLabels.resize(mDims.n(), 0);
+    mFileBatch.resize(mDims.d[0] * mImageSize, 0);
+    mFileLabels.resize(mDims.d[0], 0);
     reset(0);
   }
 
@@ -107,7 +109,7 @@ class BatchStream {
   {
     mBatchCount = 0;
     mFileCount = 0;
-    mFileBatchPos = mDims.n();
+    mFileBatchPos = mDims.d[0];
     skip(firstBatch);
   }
 
@@ -118,17 +120,17 @@ class BatchStream {
 
     for (int csize = 1, batchPos = 0; batchPos < mBatchSize;
          batchPos += csize, mFileBatchPos += csize) {
-      if (mFileBatchPos == mDims.n() && !update())
+      if (mFileBatchPos == mDims.d[0] && !update())
         return false;
 
       // copy the smaller of: elements left to fulfill the request, or elements
       // left in the file buffer.
-      csize = std::min(mBatchSize - batchPos, mDims.n() - mFileBatchPos);
+      csize = std::min(mBatchSize - batchPos, mDims.d[0] - mFileBatchPos);
       std::copy_n(
-        getFileBatch() + mFileBatchPos * mImageSize, csize * mImageSize,
-        getBatch() + batchPos * mImageSize);
+          getFileBatch() + mFileBatchPos * mImageSize, csize * mImageSize,
+          getBatch() + batchPos * mImageSize);
       std::copy_n(
-        getFileLabels() + mFileBatchPos, csize, getLabels() + batchPos);
+          getFileLabels() + mFileBatchPos, csize, getLabels() + batchPos);
     }
     mBatchCount++;
     return true;
@@ -136,10 +138,9 @@ class BatchStream {
 
   void skip(int skipCount)
   {
-    if (
-      mBatchSize >= mDims.n() && mBatchSize % mDims.n() == 0 &&
-      mFileBatchPos == mDims.n()) {
-      mFileCount += skipCount * mBatchSize / mDims.n();
+    if (mBatchSize >= mDims.d[0] && mBatchSize % mDims.d[0] == 0 &&
+        mFileBatchPos == mDims.d[0]) {
+      mFileCount += skipCount * mBatchSize / mDims.d[0];
       return;
     }
 
@@ -152,7 +153,7 @@ class BatchStream {
   float* getLabels() { return &mLabels[0]; }
   int getBatchesRead() const { return mBatchCount; }
   int getBatchSize() const { return mBatchSize; }
-  nvinfer1::DimsNCHW getDims() const { return mDims; }
+  nvinfer1::Dims4 getDims() const { return mDims; }
 
  private:
   float* getFileBatch() { return &mFileBatch[0]; }
@@ -161,7 +162,7 @@ class BatchStream {
   bool update()
   {
     const std::string inputFileName =
-      mPath + "/batches/batch" + std::to_string(mFileCount++);
+        mPath + "/batches/batch" + std::to_string(mFileCount++);
     FILE* file = fopen(inputFileName.c_str(), "rb");
     if (!file)
       return false;
@@ -172,13 +173,12 @@ class BatchStream {
     }
 
     size_t readInputCount =
-      fread(getFileBatch(), sizeof(float), mDims.n() * mImageSize, file);
+        fread(getFileBatch(), sizeof(float), mDims.d[0] * mImageSize, file);
     size_t readLabelCount =
-      fread(getFileLabels(), sizeof(float), mDims.n(), file);
+        fread(getFileLabels(), sizeof(float), mDims.d[0], file);
     ;
-    if (
-      (readInputCount != size_t(mDims.n() * mImageSize)) ||
-      (readLabelCount != size_t(mDims.n()))) {
+    if ((readInputCount != size_t(mDims.d[0] * mImageSize)) ||
+        (readLabelCount != size_t(mDims.d[0]))) {
       return false;
     }
 
@@ -195,7 +195,7 @@ class BatchStream {
   int mFileCount{0}, mFileBatchPos{0};
   int mImageSize{0};
 
-  nvinfer1::DimsNCHW mDims;
+  nvinfer1::Dims4 mDims;
   std::vector<float> mBatch;
   std::vector<float> mLabels;
   std::vector<float> mFileBatch;
@@ -205,46 +205,47 @@ class BatchStream {
 class Int8EntropyCalibrator : public nvinfer1::IInt8EntropyCalibrator {
  public:
   Int8EntropyCalibrator(
-    BatchStream& stream, int firstBatch, bool readCache = true)
+      BatchStream& stream, int firstBatch, bool readCache = true)
       : mStream(stream), mReadCache(readCache)
   {
-    nvinfer1::DimsNCHW dims = mStream.getDims();
-    mInputCount = mStream.getBatchSize() * dims.c() * dims.h() * dims.w();
+    nvinfer1::Dims4 dims = mStream.getDims();
+    mInputCount = mStream.getBatchSize() * dims.d[1] * dims.d[2] * dims.d[3];
     cudaMalloc(&mDeviceInput, mInputCount * sizeof(float));
     mStream.reset(firstBatch);
   }
 
   virtual ~Int8EntropyCalibrator() { cudaFree(mDeviceInput); }
 
-  int getBatchSize() const override { return mStream.getBatchSize(); }
+  int getBatchSize() const noexcept override { return mStream.getBatchSize(); }
 
-  bool getBatch(void* bindings[], const char* names[], int nbBindings) override
+  bool getBatch(
+      void* bindings[], const char* names[], int nbBindings) noexcept override
   {
     if (!mStream.next())
       return false;
 
     cudaMemcpy(
-      mDeviceInput, mStream.getBatch(), mInputCount * sizeof(float),
-      cudaMemcpyHostToDevice);
+        mDeviceInput, mStream.getBatch(), mInputCount * sizeof(float),
+        cudaMemcpyHostToDevice);
     bindings[0] = mDeviceInput;
     return true;
   }
 
-  const void* readCalibrationCache(size_t& length) override
+  const void* readCalibrationCache(size_t& length) noexcept override
   {
     mCalibrationCache.clear();
     std::ifstream input(calibrationTableName(), std::ios::binary);
     input >> std::noskipws;
     if (mReadCache && input.good())
       std::copy(
-        std::istream_iterator<char>(input), std::istream_iterator<char>(),
-        std::back_inserter(mCalibrationCache));
+          std::istream_iterator<char>(input), std::istream_iterator<char>(),
+          std::back_inserter(mCalibrationCache));
 
     length = mCalibrationCache.size();
     return length ? &mCalibrationCache[0] : nullptr;
   }
 
-  void writeCalibrationCache(const void* cache, size_t length) override
+  void writeCalibrationCache(const void* cache, size_t length) noexcept override
   {
     std::ofstream output(calibrationTableName(), std::ios::binary);
     output.write(reinterpret_cast<const char*>(cache), length);
@@ -265,28 +266,32 @@ class Int8EntropyCalibrator : public nvinfer1::IInt8EntropyCalibrator {
 
 bool
 CaffeToPlan(
-  const std::string& output_filename, const std::string& prototxt_filename,
-  const std::string& model_filename,
-  const std::vector<std::string>& output_names,
-  const nvinfer1::DataType model_dtype, const std::string& calibration_filename,
-  const size_t max_batch_size, const size_t max_workspace_size)
+    const std::string& output_filename, const std::string& prototxt_filename,
+    const std::string& model_filename,
+    const std::vector<std::string>& output_names,
+    nvinfer1::DataType model_dtype, const std::string& calibration_filename,
+    const size_t max_batch_size, const size_t max_workspace_size)
 {
   nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(gLogger);
-  nvinfer1::INetworkDefinition* network = builder->createNetwork();
+  nvinfer1::INetworkDefinition* network = builder->createNetworkV2(0);
+  nvinfer1::IBuilderConfig* config = builder->createBuilderConfig();
   nvcaffeparser1::ICaffeParser* parser = nvcaffeparser1::createCaffeParser();
 
-  if (
-    ((model_dtype == nvinfer1::DataType::kINT8) &&
-     !builder->platformHasFastInt8()) ||
-    ((model_dtype == nvinfer1::DataType::kHALF) &&
-     !builder->platformHasFastFp16())) {
-    return false;
+  if ((model_dtype == nvinfer1::DataType::kINT8) &&
+      !builder->platformHasFastInt8()) {
+    std::cerr << "WARNING: GPU does not support int8, using fp32" << std::endl;
+    model_dtype = nvinfer1::DataType::kFLOAT;
+  } else if (
+      (model_dtype == nvinfer1::DataType::kHALF) &&
+      !builder->platformHasFastFp16()) {
+    std::cerr << "WARNING: GPU does not support fp16, using fp32" << std::endl;
+    model_dtype = nvinfer1::DataType::kFLOAT;
   }
 
   const nvcaffeparser1::IBlobNameToTensor* name_to_tensor = parser->parse(
-    prototxt_filename.c_str(), model_filename.c_str(), *network,
-    (model_dtype == nvinfer1::DataType::kINT8) ? nvinfer1::DataType::kFLOAT
-                                               : model_dtype);
+      prototxt_filename.c_str(), model_filename.c_str(), *network,
+      (model_dtype == nvinfer1::DataType::kINT8) ? nvinfer1::DataType::kFLOAT
+                                                 : model_dtype);
   if (name_to_tensor == nullptr) {
     return false;
   }
@@ -296,31 +301,29 @@ CaffeToPlan(
   }
 
   builder->setMaxBatchSize(max_batch_size);
-  builder->setMaxWorkspaceSize(max_workspace_size);
-  builder->setFp16Mode(model_dtype == nvinfer1::DataType::kHALF);
-
-  if (model_dtype == nvinfer1::DataType::kINT8) {
+  config->setMaxWorkspaceSize(max_workspace_size);
+  if (model_dtype == nvinfer1::DataType::kHALF) {
+    config->setFlag(nvinfer1::BuilderFlag::kFP16);
+  } else if (model_dtype == nvinfer1::DataType::kINT8) {
     BatchStream* calibrationStream =
-      new BatchStream(calibration_filename, CAL_BATCH_SIZE, NB_CAL_BATCHES);
+        new BatchStream(calibration_filename, CAL_BATCH_SIZE, NB_CAL_BATCHES);
     Int8EntropyCalibrator* calibrator =
-      new Int8EntropyCalibrator(*calibrationStream, FIRST_CAL_BATCH);
-    builder->setInt8Mode(true);
-    builder->setInt8Calibrator(calibrator);
+        new Int8EntropyCalibrator(*calibrationStream, FIRST_CAL_BATCH);
+    config->setFlag(nvinfer1::BuilderFlag::kINT8);
+    config->setInt8Calibrator(calibrator);
   }
 
-  nvinfer1::ICudaEngine* engine = builder->buildCudaEngine(*network);
-  if (engine == nullptr) {
+  nvinfer1::IHostMemory* plan =
+      builder->buildSerializedNetwork(*network, *config);
+  if (plan == nullptr) {
     return false;
   }
 
-  nvinfer1::IHostMemory* plan = engine->serialize();
-
   std::ofstream output(
-    output_filename, std::ios::binary | std::ios::out | std::ios::app);
+      output_filename, std::ios::binary | std::ios::out | std::ios::app);
   output.write(reinterpret_cast<const char*>(plan->data()), plan->size());
   output.close();
 
-  engine->destroy();
   parser->destroy();
   network->destroy();
   builder->destroy();
@@ -412,11 +415,11 @@ main(int argc, char** argv)
   std::string model_filename = argv[optind + 1];
 
   if (!CaffeToPlan(
-        output_filename, prototxt_filename, model_filename, output_names,
-        (use_fp16)
-          ? nvinfer1::DataType::kHALF
-          : (use_int8) ? nvinfer1::DataType::kINT8 : nvinfer1::DataType::kFLOAT,
-        calibration_filename, max_batch_size, max_workspace_size)) {
+          output_filename, prototxt_filename, model_filename, output_names,
+          (use_fp16)   ? nvinfer1::DataType::kHALF
+          : (use_int8) ? nvinfer1::DataType::kINT8
+                       : nvinfer1::DataType::kFLOAT,
+          calibration_filename, max_batch_size, max_workspace_size)) {
     std::cerr << "Failed to create PLAN file" << std::endl;
     return 1;
   }
diff --git a/src/test/data_compressor_test.cc b/src/test/data_compressor_test.cc
new file mode 100644
index 0000000000..292c8c544a
--- /dev/null
+++ b/src/test/data_compressor_test.cc
@@ -0,0 +1,626 @@
+// Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "gtest/gtest.h"
+
+// Undefine the FAIL() macro inside Triton code to avoid redefine error
+// from gtest. Okay as FAIL() is not used in data_compressor
+#ifdef FAIL
+#undef FAIL
+#endif
+
+#include <event2/buffer.h>
+
+#include <chrono>
+#include <condition_variable>
+#include <fstream>
+#include <future>
+#include <limits>
+#include <mutex>
+#include <random>
+#include <string>
+#include <thread>
+#include <vector>
+
+#include "data_compressor.h"
+
+namespace ni = triton::server;
+
+namespace {
+
+struct TritonServerError {
+  TritonServerError(TRITONSERVER_Error_Code code, const char* msg)
+      : code_(code), msg_(msg)
+  {
+  }
+  TRITONSERVER_Error_Code code_;
+  std::string msg_;
+};
+
+void
+WriteEVBufferToFile(const std::string& file_name, evbuffer* evb)
+{
+  std::ofstream fs(file_name);
+  struct evbuffer_iovec* buffer_array = nullptr;
+  int buffer_count = evbuffer_peek(evb, -1, NULL, NULL, 0);
+  if (buffer_count > 0) {
+    buffer_array = static_cast<struct evbuffer_iovec*>(
+        alloca(sizeof(struct evbuffer_iovec) * buffer_count));
+    ASSERT_EQ(
+        evbuffer_peek(evb, -1, NULL, buffer_array, buffer_count), buffer_count)
+        << "unexpected error getting buffers for result";
+  }
+  for (int idx = 0; idx < buffer_count; ++idx) {
+    fs.write(
+        reinterpret_cast<const char*>(buffer_array[idx].iov_base),
+        buffer_array[idx].iov_len);
+  }
+}
+
+void
+EVBufferToContiguousBuffer(evbuffer* evb, std::vector<char>* buffer)
+{
+  *buffer = std::vector<char>(evbuffer_get_length(evb));
+  {
+    struct evbuffer_iovec* buffer_array = nullptr;
+    int buffer_count = evbuffer_peek(evb, -1, NULL, NULL, 0);
+    if (buffer_count > 0) {
+      buffer_array = static_cast<struct evbuffer_iovec*>(
+          alloca(sizeof(struct evbuffer_iovec) * buffer_count));
+      ASSERT_EQ(
+          evbuffer_peek(evb, -1, NULL, buffer_array, buffer_count),
+          buffer_count)
+          << "unexpected error getting buffers for result";
+    }
+    size_t offset = 0;
+    for (int idx = 0; idx < buffer_count; ++idx) {
+      memcpy(
+          buffer->data() + offset, buffer_array[idx].iov_base,
+          buffer_array[idx].iov_len);
+      offset += buffer_array[idx].iov_len;
+    }
+  }
+}
+
+}  // namespace
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+TRITONSERVER_Error*
+TRITONSERVER_ErrorNew(TRITONSERVER_Error_Code code, const char* msg)
+{
+  return reinterpret_cast<TRITONSERVER_Error*>(
+      new TritonServerError(code, msg));
+}
+
+TRITONSERVER_Error_Code
+TRITONSERVER_ErrorCode(TRITONSERVER_Error* error)
+{
+  return (reinterpret_cast<TritonServerError*>(error))->code_;
+}
+
+const char*
+TRITONSERVER_ErrorMessage(TRITONSERVER_Error* error)
+{
+  return (reinterpret_cast<TritonServerError*>(error))->msg_.c_str();
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+namespace {
+
+class DataCompressorTest : public ::testing::Test {
+ public:
+  DataCompressorTest()
+      : raw_data_length_(0), deflate_compressed_length_(0),
+        gzip_compressed_length_(0)
+  {
+    std::vector<std::string> files{
+        "raw_data", "deflate_compressed_data", "gzip_compressed_data"};
+    for (const auto& file : files) {
+      std::fstream fs(file);
+      // get length of file
+      fs.seekg(0, fs.end);
+      int length = fs.tellg();
+      fs.seekg(0, fs.beg);
+
+      // allocate memory
+      char* data = nullptr;
+      if (file == "raw_data") {
+        raw_data_.reset(new char[length]);
+        data = raw_data_.get();
+        raw_data_length_ = length;
+      } else if (file == "deflate_compressed_data") {
+        deflate_compressed_data_.reset(new char[length]);
+        data = deflate_compressed_data_.get();
+        deflate_compressed_length_ = length;
+      } else {
+        gzip_compressed_data_.reset(new char[length]);
+        data = gzip_compressed_data_.get();
+        gzip_compressed_length_ = length;
+      }
+
+      fs.read(data, length);
+    }
+  }
+
+  std::unique_ptr<char[]> raw_data_;
+  size_t raw_data_length_;
+  std::unique_ptr<char[]> deflate_compressed_data_;
+  size_t deflate_compressed_length_;
+  std::unique_ptr<char[]> gzip_compressed_data_;
+  size_t gzip_compressed_length_;
+};
+
+TEST_F(DataCompressorTest, DeflateOneBuffer)
+{
+  // Convert the raw data into evbuffer format
+  auto source = evbuffer_new();
+  ASSERT_TRUE((source != nullptr)) << "Failed to create source evbuffer";
+  ASSERT_EQ(evbuffer_add(source, raw_data_.get(), raw_data_length_), 0)
+      << "Failed to initialize source evbuffer";
+
+  auto compressed = evbuffer_new();
+  ASSERT_TRUE((compressed != nullptr))
+      << "Failed to create compressed evbuffer";
+  auto decompressed = evbuffer_new();
+  ASSERT_TRUE((decompressed != nullptr))
+      << "Failed to create decompressed evbuffer";
+
+  auto err = ni::DataCompressor::CompressData(
+      ni::DataCompressor::Type::DEFLATE, source, compressed);
+  ASSERT_TRUE((err == nullptr))
+      << "Failed to compress data: " << TRITONSERVER_ErrorMessage(err);
+
+  err = ni::DataCompressor::DecompressData(
+      ni::DataCompressor::Type::DEFLATE, compressed, decompressed);
+  ASSERT_TRUE((err == nullptr))
+      << "Failed to decompress data: " << TRITONSERVER_ErrorMessage(err);
+
+  size_t destination_byte_size = evbuffer_get_length(decompressed);
+  ASSERT_EQ(destination_byte_size, raw_data_length_) << "Mismatched byte size";
+
+  std::vector<char> res;
+  EVBufferToContiguousBuffer(decompressed, &res);
+  for (size_t idx = 0; idx < raw_data_length_; ++idx) {
+    ASSERT_TRUE(raw_data_[idx] == res[idx]);
+  }
+}
+
+TEST_F(DataCompressorTest, GzipOneBuffer)
+{
+  // Convert the raw data into evbuffer format
+  auto source = evbuffer_new();
+  ASSERT_TRUE((source != nullptr)) << "Failed to create source evbuffer";
+  ASSERT_EQ(evbuffer_add(source, raw_data_.get(), raw_data_length_), 0)
+      << "Failed to initialize source evbuffer";
+
+  auto compressed = evbuffer_new();
+  ASSERT_TRUE((compressed != nullptr))
+      << "Failed to create compressed evbuffer";
+  auto decompressed = evbuffer_new();
+  ASSERT_TRUE((decompressed != nullptr))
+      << "Failed to create decompressed evbuffer";
+
+  auto err = ni::DataCompressor::CompressData(
+      ni::DataCompressor::Type::GZIP, source, compressed);
+  ASSERT_TRUE((err == nullptr))
+      << "Failed to compress data: " << TRITONSERVER_ErrorMessage(err);
+  err = ni::DataCompressor::DecompressData(
+      ni::DataCompressor::Type::GZIP, compressed, decompressed);
+  ASSERT_TRUE((err == nullptr))
+      << "Failed to decompress data: " << TRITONSERVER_ErrorMessage(err);
+
+  size_t destination_byte_size = evbuffer_get_length(decompressed);
+  ASSERT_EQ(destination_byte_size, raw_data_length_) << "Mismatched byte size";
+
+  std::vector<char> res;
+  EVBufferToContiguousBuffer(decompressed, &res);
+  for (size_t idx = 0; idx < raw_data_length_; ++idx) {
+    ASSERT_TRUE(raw_data_[idx] == res[idx]);
+  }
+}
+
+TEST_F(DataCompressorTest, DeflateTwoBuffer)
+{
+  // Convert the raw data into evbuffer format with two buffers
+  auto source = evbuffer_new();
+  ASSERT_TRUE((source != nullptr)) << "Failed to create source evbuffer";
+  size_t half_length = raw_data_length_ / 2;
+  ASSERT_EQ(evbuffer_add(source, raw_data_.get(), half_length), 0)
+      << "Failed to initialize source evbuffer";
+  // verify evbuffer has two extend
+  {
+    auto second_source = evbuffer_new();
+    ASSERT_EQ(
+        evbuffer_add(
+            second_source, raw_data_.get() + half_length,
+            raw_data_length_ - half_length),
+        0)
+        << "Failed to initialize source evbuffer";
+    ASSERT_EQ(evbuffer_add_buffer(source, second_source), 0)
+        << "Failed to initialize source evbuffer";
+    int buffer_count = evbuffer_peek(source, -1, NULL, NULL, 0);
+    ASSERT_EQ(buffer_count, 2) << "Expect two buffers as source";
+  }
+
+  auto compressed = evbuffer_new();
+  ASSERT_TRUE((compressed != nullptr))
+      << "Failed to create compressed evbuffer";
+  // Reconstruct the compressed buffer to be two buffers
+  if (evbuffer_peek(compressed, -1, NULL, NULL, 0) == 1) {
+    struct evbuffer_iovec* buffer_array = nullptr;
+    int buffer_count = evbuffer_peek(compressed, -1, NULL, NULL, 0);
+    if (buffer_count > 0) {
+      buffer_array = static_cast<struct evbuffer_iovec*>(
+          alloca(sizeof(struct evbuffer_iovec) * buffer_count));
+      ASSERT_EQ(
+          evbuffer_peek(compressed, -1, NULL, buffer_array, buffer_count),
+          buffer_count)
+          << "unexpected error getting buffers for result";
+    }
+
+    auto first_compressed = evbuffer_new();
+    auto second_compressed = evbuffer_new();
+    size_t half_length = buffer_array[0].iov_len / 2;
+    ASSERT_EQ(
+        evbuffer_add(first_compressed, buffer_array[0].iov_base, half_length),
+        0)
+        << "Failed to split compressed buffer";
+    ASSERT_EQ(
+        evbuffer_add(
+            second_compressed,
+            reinterpret_cast<char*>(buffer_array[0].iov_base) + half_length,
+            buffer_array[0].iov_len - half_length),
+        0)
+        << "Failed to split compressed buffer";
+    ASSERT_EQ(evbuffer_add_buffer(first_compressed, second_compressed), 0)
+        << "Failed to initialize source evbuffer";
+    compressed = first_compressed;
+  }
+
+  auto decompressed = evbuffer_new();
+  ASSERT_TRUE((decompressed != nullptr))
+      << "Failed to create decompressed evbuffer";
+
+  auto err = ni::DataCompressor::CompressData(
+      ni::DataCompressor::Type::DEFLATE, source, compressed);
+  ASSERT_TRUE((err == nullptr))
+      << "Failed to compress data: " << TRITONSERVER_ErrorMessage(err);
+  err = ni::DataCompressor::DecompressData(
+      ni::DataCompressor::Type::DEFLATE, compressed, decompressed);
+  ASSERT_TRUE((err == nullptr))
+      << "Failed to decompress data: " << TRITONSERVER_ErrorMessage(err);
+
+  size_t destination_byte_size = evbuffer_get_length(decompressed);
+  ASSERT_EQ(destination_byte_size, raw_data_length_) << "Mismatched byte size";
+
+  std::vector<char> res;
+  EVBufferToContiguousBuffer(decompressed, &res);
+  for (size_t idx = 0; idx < raw_data_length_; ++idx) {
+    ASSERT_TRUE(raw_data_[idx] == res[idx]);
+  }
+}
+
+TEST_F(DataCompressorTest, GzipTwoBuffer)
+{
+  // Convert the raw data into evbuffer format with two buffers
+  auto source = evbuffer_new();
+  ASSERT_TRUE((source != nullptr)) << "Failed to create source evbuffer";
+  size_t half_length = raw_data_length_ / 2;
+  ASSERT_EQ(evbuffer_add(source, raw_data_.get(), half_length), 0)
+      << "Failed to initialize source evbuffer";
+  // verify evbuffer has two extend
+  {
+    auto second_source = evbuffer_new();
+    ASSERT_EQ(
+        evbuffer_add(
+            second_source, raw_data_.get() + half_length,
+            raw_data_length_ - half_length),
+        0)
+        << "Failed to initialize source evbuffer";
+    ASSERT_EQ(evbuffer_add_buffer(source, second_source), 0)
+        << "Failed to initialize source evbuffer";
+    int buffer_count = evbuffer_peek(source, -1, NULL, NULL, 0);
+    ASSERT_EQ(buffer_count, 2) << "Expect two buffers as source";
+  }
+
+  auto compressed = evbuffer_new();
+  ASSERT_TRUE((compressed != nullptr))
+      << "Failed to create compressed evbuffer";
+  // Reconstruct the compressed buffer to be two buffers
+  if (evbuffer_peek(compressed, -1, NULL, NULL, 0) == 1) {
+    struct evbuffer_iovec* buffer_array = nullptr;
+    int buffer_count = evbuffer_peek(compressed, -1, NULL, NULL, 0);
+    if (buffer_count > 0) {
+      buffer_array = static_cast<struct evbuffer_iovec*>(
+          alloca(sizeof(struct evbuffer_iovec) * buffer_count));
+      ASSERT_EQ(
+          evbuffer_peek(compressed, -1, NULL, buffer_array, buffer_count),
+          buffer_count)
+          << "unexpected error getting buffers for result";
+    }
+
+    auto first_compressed = evbuffer_new();
+    auto second_compressed = evbuffer_new();
+    size_t half_length = buffer_array[0].iov_len / 2;
+    ASSERT_EQ(
+        evbuffer_add(first_compressed, buffer_array[0].iov_base, half_length),
+        0)
+        << "Failed to split compressed buffer";
+    ASSERT_EQ(
+        evbuffer_add(
+            second_compressed,
+            reinterpret_cast<char*>(buffer_array[0].iov_base) + half_length,
+            buffer_array[0].iov_len - half_length),
+        0)
+        << "Failed to split compressed buffer";
+    ASSERT_EQ(evbuffer_add_buffer(first_compressed, second_compressed), 0)
+        << "Failed to initialize source evbuffer";
+    compressed = first_compressed;
+  }
+
+  auto decompressed = evbuffer_new();
+  ASSERT_TRUE((decompressed != nullptr))
+      << "Failed to create decompressed evbuffer";
+
+  auto err = ni::DataCompressor::CompressData(
+      ni::DataCompressor::Type::GZIP, source, compressed);
+  ASSERT_TRUE((err == nullptr))
+      << "Failed to compress data: " << TRITONSERVER_ErrorMessage(err);
+  err = ni::DataCompressor::DecompressData(
+      ni::DataCompressor::Type::GZIP, compressed, decompressed);
+  ASSERT_TRUE((err == nullptr))
+      << "Failed to decompress data: " << TRITONSERVER_ErrorMessage(err);
+
+  size_t destination_byte_size = evbuffer_get_length(decompressed);
+  ASSERT_EQ(destination_byte_size, raw_data_length_) << "Mismatched byte size";
+
+  std::vector<char> res;
+  EVBufferToContiguousBuffer(decompressed, &res);
+  for (size_t idx = 0; idx < raw_data_length_; ++idx) {
+    ASSERT_TRUE(raw_data_[idx] == res[idx]);
+  }
+}
+
+TEST_F(DataCompressorTest, DeflateOneLargeBuffer)
+{
+  // Duplicate raw data 2^20 times
+  {
+    std::unique_ptr<char[]> extended_raw_data(
+        new char[raw_data_length_ * (1 << 20)]);
+    memcpy(extended_raw_data.get(), raw_data_.get(), raw_data_length_);
+    size_t filled_size = raw_data_length_;
+    for (size_t i = 1; i < 20; ++i) {
+      memcpy(
+          extended_raw_data.get() + filled_size, extended_raw_data.get(),
+          filled_size);
+      filled_size += filled_size;
+    }
+    raw_data_length_ = filled_size;
+    raw_data_.swap(extended_raw_data);
+  }
+  // Convert the raw data into evbuffer format
+  auto source = evbuffer_new();
+  ASSERT_TRUE((source != nullptr)) << "Failed to create source evbuffer";
+  ASSERT_EQ(evbuffer_add(source, raw_data_.get(), raw_data_length_), 0)
+      << "Failed to initialize source evbuffer";
+
+  auto compressed = evbuffer_new();
+  ASSERT_TRUE((compressed != nullptr))
+      << "Failed to create compressed evbuffer";
+  ASSERT_GE(raw_data_length_ / 2, evbuffer_get_length(compressed))
+      << "Compression should be desired for large data";
+
+  auto decompressed = evbuffer_new();
+  ASSERT_TRUE((decompressed != nullptr))
+      << "Failed to create decompressed evbuffer";
+
+  auto err = ni::DataCompressor::CompressData(
+      ni::DataCompressor::Type::DEFLATE, source, compressed);
+  ASSERT_TRUE((err == nullptr))
+      << "Failed to compress data: " << TRITONSERVER_ErrorMessage(err);
+
+  err = ni::DataCompressor::DecompressData(
+      ni::DataCompressor::Type::DEFLATE, compressed, decompressed);
+  ASSERT_TRUE((err == nullptr))
+      << "Failed to decompress data: " << TRITONSERVER_ErrorMessage(err);
+  size_t destination_byte_size = evbuffer_get_length(decompressed);
+  ASSERT_EQ(destination_byte_size, raw_data_length_) << "Mismatched byte size";
+
+  std::vector<char> res;
+  EVBufferToContiguousBuffer(decompressed, &res);
+  for (size_t idx = 0; idx < raw_data_length_; ++idx) {
+    ASSERT_TRUE(raw_data_[idx] == res[idx]);
+  }
+}
+
+TEST_F(DataCompressorTest, GzipOneLargeBuffer)
+{
+  // Duplicate raw data 2^20 times
+  {
+    std::unique_ptr<char[]> extended_raw_data(
+        new char[raw_data_length_ * (1 << 20)]);
+    memcpy(extended_raw_data.get(), raw_data_.get(), raw_data_length_);
+    size_t filled_size = raw_data_length_;
+    for (size_t i = 1; i < 20; ++i) {
+      memcpy(
+          extended_raw_data.get() + filled_size, extended_raw_data.get(),
+          filled_size);
+      filled_size += filled_size;
+    }
+    raw_data_length_ = filled_size;
+    raw_data_.swap(extended_raw_data);
+  }
+  // Convert the raw data into evbuffer format
+  auto source = evbuffer_new();
+  ASSERT_TRUE((source != nullptr)) << "Failed to create source evbuffer";
+  ASSERT_EQ(evbuffer_add(source, raw_data_.get(), raw_data_length_), 0)
+      << "Failed to initialize source evbuffer";
+
+  auto compressed = evbuffer_new();
+  ASSERT_TRUE((compressed != nullptr))
+      << "Failed to create compressed evbuffer";
+  ASSERT_GE(raw_data_length_ / 2, evbuffer_get_length(compressed))
+      << "Compression should be desired for large data";
+
+  auto decompressed = evbuffer_new();
+  ASSERT_TRUE((decompressed != nullptr))
+      << "Failed to create decompressed evbuffer";
+
+  auto err = ni::DataCompressor::CompressData(
+      ni::DataCompressor::Type::GZIP, source, compressed);
+  ASSERT_TRUE((err == nullptr))
+      << "Failed to compress data: " << TRITONSERVER_ErrorMessage(err);
+  err = ni::DataCompressor::DecompressData(
+      ni::DataCompressor::Type::GZIP, compressed, decompressed);
+  ASSERT_TRUE((err == nullptr))
+      << "Failed to decompress data: " << TRITONSERVER_ErrorMessage(err);
+
+  size_t destination_byte_size = evbuffer_get_length(decompressed);
+  ASSERT_EQ(destination_byte_size, raw_data_length_) << "Mismatched byte size";
+
+  std::vector<char> res;
+  EVBufferToContiguousBuffer(decompressed, &res);
+  for (size_t idx = 0; idx < raw_data_length_; ++idx) {
+    ASSERT_TRUE(raw_data_[idx] == res[idx]);
+  }
+}
+
+TEST_F(DataCompressorTest, DecompressDeflateBuffer)
+{
+  // Convert the compressed data into evbuffer format
+  auto source = evbuffer_new();
+  ASSERT_TRUE((source != nullptr)) << "Failed to create source evbuffer";
+  ASSERT_EQ(
+      evbuffer_add(
+          source, deflate_compressed_data_.get(), deflate_compressed_length_),
+      0)
+      << "Failed to initialize source evbuffer";
+  auto decompressed = evbuffer_new();
+  ASSERT_TRUE((decompressed != nullptr))
+      << "Failed to create decompressed evbuffer";
+
+  auto err = ni::DataCompressor::DecompressData(
+      ni::DataCompressor::Type::DEFLATE, source, decompressed);
+  ASSERT_TRUE((err == nullptr))
+      << "Failed to decompress data: " << TRITONSERVER_ErrorMessage(err);
+
+  size_t destination_byte_size = evbuffer_get_length(decompressed);
+  ASSERT_EQ(destination_byte_size, raw_data_length_) << "Mismatched byte size";
+
+  std::vector<char> res;
+  EVBufferToContiguousBuffer(decompressed, &res);
+  for (size_t idx = 0; idx < raw_data_length_; ++idx) {
+    ASSERT_TRUE(raw_data_[idx] == res[idx]);
+  }
+}
+
+TEST_F(DataCompressorTest, DecompressGzipBuffer)
+{
+  // Convert the compressed data into evbuffer format
+  auto source = evbuffer_new();
+  ASSERT_TRUE((source != nullptr)) << "Failed to create source evbuffer";
+  ASSERT_EQ(
+      evbuffer_add(
+          source, gzip_compressed_data_.get(), gzip_compressed_length_),
+      0)
+      << "Failed to initialize source evbuffer";
+  auto decompressed = evbuffer_new();
+  ASSERT_TRUE((decompressed != nullptr))
+      << "Failed to create decompressed evbuffer";
+
+  auto err = ni::DataCompressor::DecompressData(
+      ni::DataCompressor::Type::GZIP, source, decompressed);
+  ASSERT_TRUE((err == nullptr))
+      << "Failed to decompress data: " << TRITONSERVER_ErrorMessage(err);
+
+  size_t destination_byte_size = evbuffer_get_length(decompressed);
+  ASSERT_EQ(destination_byte_size, raw_data_length_) << "Mismatched byte size";
+
+  std::vector<char> res;
+  EVBufferToContiguousBuffer(decompressed, &res);
+  for (size_t idx = 0; idx < raw_data_length_; ++idx) {
+    ASSERT_TRUE(raw_data_[idx] == res[idx]);
+  }
+}
+
+TEST_F(DataCompressorTest, CompressDeflateBuffer)
+{
+  // Convert the raw data into evbuffer format
+  auto source = evbuffer_new();
+  ASSERT_TRUE((source != nullptr)) << "Failed to create source evbuffer";
+  ASSERT_EQ(evbuffer_add(source, raw_data_.get(), raw_data_length_), 0)
+      << "Failed to initialize source evbuffer";
+
+  auto compressed = evbuffer_new();
+  ASSERT_TRUE((compressed != nullptr))
+      << "Failed to create compressed evbuffer";
+
+  auto err = ni::DataCompressor::CompressData(
+      ni::DataCompressor::Type::DEFLATE, source, compressed);
+  ASSERT_TRUE((err == nullptr))
+      << "Failed to compress data: " << TRITONSERVER_ErrorMessage(err);
+
+  // Write compressed data to file which will be validated by other compression
+  // tool
+  WriteEVBufferToFile("generated_deflate_compressed_data", compressed);
+}
+
+TEST_F(DataCompressorTest, CompressGzipBuffer)
+{
+  // Convert the raw data into evbuffer format
+  auto source = evbuffer_new();
+  ASSERT_TRUE((source != nullptr)) << "Failed to create source evbuffer";
+  ASSERT_EQ(evbuffer_add(source, raw_data_.get(), raw_data_length_), 0)
+      << "Failed to initialize source evbuffer";
+
+  auto compressed = evbuffer_new();
+  ASSERT_TRUE((compressed != nullptr))
+      << "Failed to create compressed evbuffer";
+
+  auto err = ni::DataCompressor::CompressData(
+      ni::DataCompressor::Type::GZIP, source, compressed);
+  ASSERT_TRUE((err == nullptr))
+      << "Failed to compress data: " << TRITONSERVER_ErrorMessage(err);
+
+  // Write compressed data to file which will be validated by other compression
+  // tool
+  WriteEVBufferToFile("generated_gzip_compressed_data", compressed);
+}
+
+}  // namespace
+
+int
+main(int argc, char** argv)
+{
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/src/test/distributed_addsub/CMakeLists.txt b/src/test/distributed_addsub/CMakeLists.txt
new file mode 100644
index 0000000000..10a7148f8a
--- /dev/null
+++ b/src/test/distributed_addsub/CMakeLists.txt
@@ -0,0 +1,118 @@
+# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+cmake_minimum_required(VERSION 3.17)
+
+project(tritondistributedaddsubbackend LANGUAGES C CXX)
+
+#
+# libtriton_distributed_addsub.so
+# Shared library implementing the Triton Distributed Addsub Backend API
+#
+configure_file(src/libtriton_distributed_addsub.ldscript libtriton_distributed_addsub.ldscript COPYONLY)
+
+add_library(
+  triton-distributed-addsub-backend SHARED
+  src/distributed_addsub.cc
+)
+
+add_library(
+  TritonDistributedAddsubBackend::triton-distributed-addsub-backend ALIAS triton-distributed-addsub-backend
+)
+
+target_compile_features(triton-distributed-addsub-backend PRIVATE cxx_std_11)
+target_compile_options(
+  triton-distributed-addsub-backend PRIVATE
+  $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
+    -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror>
+)
+
+target_link_libraries(
+  triton-distributed-addsub-backend
+  PRIVATE
+    triton-backend-utils    # from repo-backend
+    triton-core-serverapi   # from repo-core
+    triton-core-backendapi  # from repo-core
+    triton-core-serverstub  # from repo-core
+)
+
+set_target_properties(
+  triton-distributed-addsub-backend PROPERTIES
+  POSITION_INDEPENDENT_CODE ON
+  OUTPUT_NAME triton_distributed_addsub
+  LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_distributed_addsub.ldscript
+  LINK_FLAGS "-Wl,--version-script libtriton_distributed_addsub.ldscript"
+)
+
+#
+# Install
+#
+include(GNUInstallDirs)
+set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/TritonDistributedAddsubBackend)
+
+install(
+  TARGETS
+    triton-distributed-addsub-backend
+  EXPORT
+    triton-distributed-addsub-backend-targets
+  LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/distributed_addsub
+  ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/distributed_addsub
+)
+
+install(
+  EXPORT
+    triton-distributed-addsub-backend-targets
+  FILE
+    TritonDistributedAddsubBackendTargets.cmake
+  NAMESPACE
+    TritonDistributedAddsubBackend::
+  DESTINATION
+    ${INSTALL_CONFIGDIR}
+)
+
+include(CMakePackageConfigHelpers)
+configure_package_config_file(
+  ${CMAKE_CURRENT_LIST_DIR}/cmake/TritonDistributedAddsubBackendConfig.cmake.in
+  ${CMAKE_CURRENT_BINARY_DIR}/TritonDistributedAddsubBackendConfig.cmake
+  INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
+)
+
+install(
+  FILES
+  ${CMAKE_CURRENT_BINARY_DIR}/TritonDistributedAddsubBackendConfig.cmake
+  DESTINATION ${INSTALL_CONFIGDIR}
+)
+
+#
+# Export from build tree
+#
+export(
+  EXPORT triton-distributed-addsub-backend-targets
+  FILE ${CMAKE_CURRENT_BINARY_DIR}/TritonDistributedAddsubBackendTargets.cmake
+  NAMESPACE TritonDistributedAddsubBackend::
+)
+
+export(PACKAGE TritonDistributedAddsubBackend)
diff --git a/src/test/distributed_addsub/cmake/TritonDistributedAddsubBackendConfig.cmake.in b/src/test/distributed_addsub/cmake/TritonDistributedAddsubBackendConfig.cmake.in
new file mode 100644
index 0000000000..ccc8af6780
--- /dev/null
+++ b/src/test/distributed_addsub/cmake/TritonDistributedAddsubBackendConfig.cmake.in
@@ -0,0 +1,39 @@
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include(CMakeFindDependencyMacro)
+
+get_filename_component(
+  TRITONDISTRIBUTEDADDSUBBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
+)
+
+list(APPEND CMAKE_MODULE_PATH ${TRITONDISTRIBUTEDADDSUBBACKEND_CMAKE_DIR})
+
+if(NOT TARGET TritonDistributedAddsubBackend::triton-distributed-addsub-backend)
+  include("${TRITONDISTRIBUTEDADDSUBBACKEND_CMAKE_DIR}/TritonDistributedAddsubBackendTargets.cmake")
+endif()
+
+set(TRITONDISTRIBUTEDADDSUBBACKEND_LIBRARIES TritonDistributedAddsubBackend::triton-distributed-addsub-backend)
diff --git a/src/test/distributed_addsub/src/distributed_addsub.cc b/src/test/distributed_addsub/src/distributed_addsub.cc
new file mode 100644
index 0000000000..db6e8222e6
--- /dev/null
+++ b/src/test/distributed_addsub/src/distributed_addsub.cc
@@ -0,0 +1,799 @@
+// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <atomic>
+#include <memory>
+#include <thread>
+
+#include "triton/backend/backend_common.h"
+#include "triton/backend/backend_model.h"
+#include "triton/backend/backend_model_instance.h"
+
+namespace triton { namespace backend { namespace distributed_addsub {
+
+
+// Addsub backend that distributes partial computation to different model
+// instances and gather the results to form the response internally.
+// This backend is designed in the way that CPU instance will perform add task
+// and GPU instance will perform sub task, and only the CPU instances will
+// accept inference request from Triton core. Note that GPU instance has
+// different meaning in this backend.
+//
+// The backend supports models that take two input tensors, two variable-size
+// INT32 [ -1 ] value inputs INPUT0 and INPUT1; and produces two output tensors:
+// OUTPUT0 as the element-wise sum of INPUT0 and INPUT1, OUTPUT1 as
+// the element-wise difference of INPUT0 and INPUT1
+//
+
+#define GUARDED_RESPOND_IF_ERROR(RESPONSES, IDX, X)                     \
+  do {                                                                  \
+    if ((RESPONSES)[IDX] != nullptr) {                                  \
+      TRITONSERVER_Error* err__ = (X);                                  \
+      if (err__ != nullptr) {                                           \
+        LOG_IF_ERROR(                                                   \
+            TRITONBACKEND_ResponseSend(                                 \
+                (RESPONSES)[IDX], TRITONSERVER_RESPONSE_COMPLETE_FINAL, \
+                err__),                                                 \
+            "failed to send error response");                           \
+        (RESPONSES)[IDX] = nullptr;                                     \
+        TRITONSERVER_ErrorDelete(err__);                                \
+      }                                                                 \
+    }                                                                   \
+  } while (false)
+
+//
+// ModelState
+//
+// State associated with a model that is using this backend. An object
+// of this class is created and associated with each
+// TRITONBACKEND_Model.
+//
+class ModelInstanceState;
+class ModelState : public BackendModel {
+ public:
+  static TRITONSERVER_Error* Create(
+      TRITONBACKEND_Model* triton_model, ModelState** state);
+  virtual ~ModelState() = default;
+
+  // Validate that model configuration is supported by this backend.
+  TRITONSERVER_Error* ValidateModelConfig();
+
+  // Keep track of the model instance that will only accept works distributed
+  // from within the model (instance)
+  void AddSubInstance(ModelInstanceState* instance) { instance_ = instance; }
+
+  ModelInstanceState* SubInstance() { return instance_; }
+
+  std::atomic<size_t> instance_counter_;
+
+ private:
+  ModelState(TRITONBACKEND_Model* triton_model)
+      : BackendModel(triton_model), instance_counter_(0), instance_(nullptr)
+  {
+  }
+
+  ModelInstanceState* instance_;
+};
+
+TRITONSERVER_Error*
+ModelState::Create(TRITONBACKEND_Model* triton_model, ModelState** state)
+{
+  try {
+    *state = new ModelState(triton_model);
+  }
+  catch (const BackendModelException& ex) {
+    RETURN_ERROR_IF_TRUE(
+        ex.err_ == nullptr, TRITONSERVER_ERROR_INTERNAL,
+        std::string("unexpected nullptr in BackendModelException"));
+    RETURN_IF_ERROR(ex.err_);
+  }
+
+  return nullptr;  // success
+}
+
+TRITONSERVER_Error*
+ModelState::ValidateModelConfig()
+{
+  // We have the json DOM for the model configuration...
+  common::TritonJson::WriteBuffer buffer;
+  RETURN_IF_ERROR(model_config_.PrettyWrite(&buffer));
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("model configuration:\n") + buffer.Contents()).c_str());
+
+  common::TritonJson::Value inputs, outputs;
+  RETURN_IF_ERROR(model_config_.MemberAsArray("input", &inputs));
+  RETURN_IF_ERROR(model_config_.MemberAsArray("output", &outputs));
+
+  // There must be one INT32 input called INPUT defined in the model
+  // configuration and it must be a 1D vector (of any length).
+  RETURN_ERROR_IF_FALSE(
+      inputs.ArraySize() == 2, TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("model must have two inputs"));
+
+  RETURN_ERROR_IF_FALSE(
+      outputs.ArraySize() == 2, TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("model must have two outputs"));
+
+  int64_t dim_value = 0;
+  for (size_t idx = 0; idx < 2; ++idx) {
+    common::TritonJson::Value input;
+    RETURN_IF_ERROR(inputs.IndexAsObject(idx, &input));
+
+    std::vector<int64_t> input_shape;
+    RETURN_IF_ERROR(backend::ParseShape(input, "dims", &input_shape));
+
+    RETURN_ERROR_IF_FALSE(
+        input_shape.size() == 1, TRITONSERVER_ERROR_INVALID_ARG,
+        std::string("model must have input of one-dimensional shape"));
+    if (idx == 0) {
+      dim_value = input_shape[0];
+    } else {
+      RETURN_ERROR_IF_FALSE(
+          dim_value == input_shape[0], TRITONSERVER_ERROR_INVALID_ARG,
+          std::string("model must have consistent shape for all tensors"));
+    }
+
+    std::string input_dtype;
+    RETURN_IF_ERROR(input.MemberAsString("data_type", &input_dtype));
+
+    RETURN_ERROR_IF_FALSE(
+        input_dtype == "TYPE_INT32", TRITONSERVER_ERROR_INVALID_ARG,
+        std::string("model input must have TYPE_INT32 data-type"));
+
+    const char* input_name;
+    size_t input_name_len;
+    RETURN_IF_ERROR(input.MemberAsString("name", &input_name, &input_name_len));
+
+    auto expected_name = (std::string("INPUT") + std::to_string(idx));
+    RETURN_ERROR_IF_FALSE(
+        expected_name == input_name, TRITONSERVER_ERROR_INVALID_ARG,
+        std::string("model input must be named '") + expected_name +
+            "' at index " + std::to_string(idx));
+
+    common::TritonJson::Value output;
+    RETURN_IF_ERROR(outputs.IndexAsObject(idx, &output));
+
+    std::vector<int64_t> output_shape;
+    RETURN_IF_ERROR(backend::ParseShape(output, "dims", &output_shape));
+
+    RETURN_ERROR_IF_FALSE(
+        (output_shape.size() == 1) && (output_shape[0] == input_shape[0]),
+        TRITONSERVER_ERROR_INVALID_ARG,
+        std::string("model must have consistent shape for all tensors"));
+
+    std::string output_dtype;
+    RETURN_IF_ERROR(output.MemberAsString("data_type", &output_dtype));
+
+    RETURN_ERROR_IF_FALSE(
+        output_dtype == "TYPE_INT32", TRITONSERVER_ERROR_INVALID_ARG,
+        std::string("model output must have TYPE_INT32 data-type"));
+
+    const char* output_name;
+    size_t output_name_len;
+    RETURN_IF_ERROR(
+        output.MemberAsString("name", &output_name, &output_name_len));
+
+    expected_name = (std::string("OUTPUT") + std::to_string(idx));
+    RETURN_ERROR_IF_FALSE(
+        expected_name == output_name, TRITONSERVER_ERROR_INVALID_ARG,
+        std::string("model output must be named '") + expected_name +
+            "' at index " + std::to_string(idx));
+  }
+
+  return nullptr;  // success
+}
+
+//
+// ModelInstanceState
+//
+// State associated with a model instance. An object of this class is
+// created and associated with each TRITONBACKEND_ModelInstance.
+//
+class ModelInstanceState : public BackendModelInstance {
+ public:
+  static TRITONSERVER_Error* Create(
+      ModelState* model_state,
+      TRITONBACKEND_ModelInstance* triton_model_instance,
+      ModelInstanceState** state);
+  virtual ~ModelInstanceState() = default;
+
+  // Get the state of the model that corresponds to this instance.
+  ModelState* StateForModel() const { return model_state_; }
+  bool IsPassive() const { return passive_; }
+
+  TRITONSERVER_Error* Add(
+      const size_t element_count, const int32_t* input_0,
+      const int32_t* input_1, int32_t* output);
+  TRITONSERVER_Error* Sub(
+      const size_t element_count, const int32_t* input_0,
+      const int32_t* input_1, int32_t* output);
+
+ private:
+  ModelInstanceState(
+      ModelState* model_state,
+      TRITONBACKEND_ModelInstance* triton_model_instance);
+
+  ModelState* model_state_;
+  bool passive_;
+};
+
+ModelInstanceState::ModelInstanceState(
+    ModelState* model_state, TRITONBACKEND_ModelInstance* triton_model_instance)
+    : BackendModelInstance(model_state, triton_model_instance),
+      model_state_(model_state)
+{
+  // Check if the setup is correct
+  THROW_IF_BACKEND_INSTANCE_ERROR(
+      TRITONBACKEND_ModelInstanceIsPassive(triton_model_instance, &passive_));
+  switch (kind_) {
+    case TRITONSERVER_INSTANCEGROUPKIND_CPU: {
+      if (passive_) {
+        throw BackendModelInstanceException(TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG,
+            std::string("CPU instance should not be passive").c_str()));
+      }
+      break;
+    }
+    case TRITONSERVER_INSTANCEGROUPKIND_GPU:
+      if (!passive_) {
+        throw BackendModelInstanceException(TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG,
+            std::string("GPU instance should be passive").c_str()));
+      }
+      break;
+    default:
+      throw BackendModelInstanceException(TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG,
+          std::string("instance kind must be CPU or GPU").c_str()));
+      break;
+  }
+}
+
+TRITONSERVER_Error*
+ModelInstanceState::Create(
+    ModelState* model_state, TRITONBACKEND_ModelInstance* triton_model_instance,
+    ModelInstanceState** state)
+{
+  try {
+    *state = new ModelInstanceState(model_state, triton_model_instance);
+  }
+  catch (const BackendModelInstanceException& ex) {
+    RETURN_ERROR_IF_TRUE(
+        ex.err_ == nullptr, TRITONSERVER_ERROR_INTERNAL,
+        std::string("unexpected nullptr in BackendModelInstanceException"));
+    RETURN_IF_ERROR(ex.err_);
+  }
+
+  return nullptr;  // success
+}
+
+TRITONSERVER_Error*
+ModelInstanceState::Add(
+    const size_t element_count, const int32_t* input_0, const int32_t* input_1,
+    int32_t* output)
+{
+  if (kind_ != TRITONSERVER_INSTANCEGROUPKIND_CPU) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INTERNAL,
+        std::string("Add operation must be done by CPU instance").c_str());
+  }
+
+  for (size_t i = 0; i < element_count; ++i) {
+    output[i] = input_0[i] + input_1[i];
+  }
+
+  return nullptr;  // success
+}
+
+TRITONSERVER_Error*
+ModelInstanceState::Sub(
+    const size_t element_count, const int32_t* input_0, const int32_t* input_1,
+    int32_t* output)
+{
+  if (kind_ != TRITONSERVER_INSTANCEGROUPKIND_GPU) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INTERNAL,
+        std::string("Sub operation must be done by GPU instance").c_str());
+  }
+
+  for (size_t i = 0; i < element_count; ++i) {
+    output[i] = input_0[i] - input_1[i];
+  }
+
+  return nullptr;  // success
+}
+
+/////////////
+
+extern "C" {
+
+// Implementing TRITONBACKEND_Initialize is optional. The backend
+// should initialize any global state that is intended to be shared
+// across all models and model instances that use the backend.
+TRITONSERVER_Error*
+TRITONBACKEND_Initialize(TRITONBACKEND_Backend* backend)
+{
+  const char* cname;
+  RETURN_IF_ERROR(TRITONBACKEND_BackendName(backend, &cname));
+  std::string name(cname);
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("TRITONBACKEND_Initialize: ") + name).c_str());
+
+  // We should check the backend API version that Triton supports
+  // vs. what this backend was compiled against.
+  uint32_t api_version_major, api_version_minor;
+  RETURN_IF_ERROR(
+      TRITONBACKEND_ApiVersion(&api_version_major, &api_version_minor));
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("Triton TRITONBACKEND API version: ") +
+       std::to_string(api_version_major) + "." +
+       std::to_string(api_version_minor))
+          .c_str());
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("'") + name + "' TRITONBACKEND API version: " +
+       std::to_string(TRITONBACKEND_API_VERSION_MAJOR) + "." +
+       std::to_string(TRITONBACKEND_API_VERSION_MINOR))
+          .c_str());
+
+  if ((api_version_major != TRITONBACKEND_API_VERSION_MAJOR) ||
+      (api_version_minor < TRITONBACKEND_API_VERSION_MINOR)) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_UNSUPPORTED,
+        "triton backend API version does not support this backend");
+  }
+
+  // The backend configuration may contain information needed by the
+  // backend, such a command-line arguments. This backend doesn't use
+  // any such configuration but we print whatever is available.
+  TRITONSERVER_Message* backend_config_message;
+  RETURN_IF_ERROR(
+      TRITONBACKEND_BackendConfig(backend, &backend_config_message));
+
+  const char* buffer;
+  size_t byte_size;
+  RETURN_IF_ERROR(TRITONSERVER_MessageSerializeToJson(
+      backend_config_message, &buffer, &byte_size));
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("backend configuration:\n") + buffer).c_str());
+
+  return nullptr;  // success
+}
+
+// Implementing TRITONBACKEND_ModelInitialize is optional. The backend
+// should initialize any state that is intended to be shared across
+// all instances of the model.
+TRITONSERVER_Error*
+TRITONBACKEND_ModelInitialize(TRITONBACKEND_Model* model)
+{
+  const char* cname;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelName(model, &cname));
+  std::string name(cname);
+
+  uint64_t version;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelVersion(model, &version));
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("TRITONBACKEND_ModelInitialize: ") + name + " (version " +
+       std::to_string(version) + ")")
+          .c_str());
+
+  // With each model we create a ModelState object and associate it
+  // with the TRITONBACKEND_Model.
+  ModelState* model_state;
+  RETURN_IF_ERROR(ModelState::Create(model, &model_state));
+  RETURN_IF_ERROR(
+      TRITONBACKEND_ModelSetState(model, reinterpret_cast<void*>(model_state)));
+
+  // One of the primary things to do in ModelInitialize is to examine
+  // the model configuration to ensure that it is something that this
+  // backend can support. If not, returning an error from this
+  // function will prevent the model from loading.
+  RETURN_IF_ERROR(model_state->ValidateModelConfig());
+
+  return nullptr;  // success
+}
+
+// Implementing TRITONBACKEND_ModelFinalize is optional unless state
+// is set using TRITONBACKEND_ModelSetState. The backend must free
+// this state and perform any other cleanup.
+TRITONSERVER_Error*
+TRITONBACKEND_ModelFinalize(TRITONBACKEND_Model* model)
+{
+  void* vstate;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vstate));
+  ModelState* model_state = reinterpret_cast<ModelState*>(vstate);
+  TRITONSERVER_Error* err = nullptr;
+  if (model_state->instance_counter_ != 0) {
+    err = TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INTERNAL,
+        "Unexpected unfinalized model instance(s)");
+  }
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO, "TRITONBACKEND_ModelFinalize: delete model state");
+
+  delete model_state;
+
+  return err;
+}
+
+// Implementing TRITONBACKEND_ModelInstanceInitialize is optional. The
+// backend should initialize any state that is required for a model
+// instance.
+TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceInitialize(TRITONBACKEND_ModelInstance* instance)
+{
+  const char* cname;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceName(instance, &cname));
+  std::string name(cname);
+
+  int32_t device_id;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceDeviceId(instance, &device_id));
+  TRITONSERVER_InstanceGroupKind kind;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceKind(instance, &kind));
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("TRITONBACKEND_ModelInstanceInitialize: ") + name + " (" +
+       TRITONSERVER_InstanceGroupKindString(kind) + " device " +
+       std::to_string(device_id) + ")")
+          .c_str());
+
+  // The instance can access the corresponding model as well... here
+  // we get the model and from that get the model's state.
+  TRITONBACKEND_Model* model;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceModel(instance, &model));
+
+  void* vmodelstate;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vmodelstate));
+  ModelState* model_state = reinterpret_cast<ModelState*>(vmodelstate);
+
+  // With each instance we create a ModelInstanceState object and
+  // associate it with the TRITONBACKEND_ModelInstance.
+  ModelInstanceState* instance_state;
+  RETURN_IF_ERROR(
+      ModelInstanceState::Create(model_state, instance, &instance_state));
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceSetState(
+      instance, reinterpret_cast<void*>(instance_state)));
+  if (instance_state->IsPassive()) {
+    model_state->AddSubInstance(instance_state);
+  }
+
+  return nullptr;  // success
+}
+
+// Implementing TRITONBACKEND_ModelInstanceFinalize is optional unless
+// state is set using TRITONBACKEND_ModelInstanceSetState. The backend
+// must free this state and perform any other cleanup.
+TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceFinalize(TRITONBACKEND_ModelInstance* instance)
+{
+  void* vstate;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(instance, &vstate));
+  ModelInstanceState* instance_state =
+      reinterpret_cast<ModelInstanceState*>(vstate);
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      "TRITONBACKEND_ModelInstanceFinalize: delete instance state");
+
+  delete instance_state;
+
+  return nullptr;  // success
+}
+
+// Implementing TRITONBACKEND_ModelInstanceExecute is required.
+TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceExecute(
+    TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** requests,
+    const uint32_t request_count)
+{
+  // Triton will not call this function simultaneously for the same
+  // 'instance'. But since this backend could be used by multiple
+  // instances from multiple models the implementation needs to handle
+  // multiple calls to this function at the same time (with different
+  // 'instance' objects). Suggested practice for this is to use only
+  // function-local and model-instance-specific state (obtained from
+  // 'instance'), which is what we do here.
+  ModelInstanceState* instance_state;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(
+      instance, reinterpret_cast<void**>(&instance_state)));
+  ModelState* model_state = instance_state->StateForModel();
+
+  // This backend specifies BLOCKING execution policy. That means that
+  // we should not return from this function until execution is complete. Triton
+  // will automatically release 'instance' on return from this function so that
+  // it is again available to be used for another call to
+  // TRITONBACKEND_ModelInstanceExecute.
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("model ") + model_state->Name() + ", instance " +
+       instance_state->Name() + " (" +
+       TRITONSERVER_InstanceGroupKindString(instance_state->Kind()) +
+       " device " + std::to_string(instance_state->DeviceId()) + ")" +
+       ", executing " + std::to_string(request_count) + " requests")
+          .c_str());
+
+
+  if (instance_state->Kind() != TRITONSERVER_INSTANCEGROUPKIND_CPU) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INTERNAL,
+        "Unexpected inference request sent to non-CPU instance");
+  }
+
+  auto sub_instance_state = model_state->SubInstance();
+
+  // 'responses' is initialized with the response objects below and
+  // if/when an error response is sent the corresponding entry in
+  // 'responses' is set to nullptr to indicate that that response has
+  // already been sent.
+  std::vector<TRITONBACKEND_Response*> responses;
+  responses.reserve(request_count);
+
+  // Create a single response object for each request. If something
+  // goes wrong when attempting to create the response objects just
+  // fail all of the requests by returning an error.
+  for (uint32_t r = 0; r < request_count; ++r) {
+    TRITONBACKEND_Request* request = requests[r];
+
+    TRITONBACKEND_Response* response;
+    RETURN_IF_ERROR(TRITONBACKEND_ResponseNew(&response, request));
+    responses.push_back(response);
+  }
+
+  // After this point we take ownership of 'requests', which means that a
+  // response must be sent for every request. If something does go wrong in
+  // processing a particular request then we send an error response just for the
+  // specific request.
+
+  // The way we collect these batch timestamps is not entirely accurate.
+  // Normally, in a performant backend you would execute all the requests at the
+  // same time, and so there would be a single compute-start / compute-end
+  // time-range. But here we execute each request separately so there is no
+  // single range. As a result we just show the entire execute time as being the
+  // compute time as well.
+  uint64_t batch_exec_start_ns = 0;
+  SET_TIMESTAMP(batch_exec_start_ns);
+  uint64_t batch_exec_end_ns = 0;
+  uint64_t total_batch_size = 0;
+
+  // For simplicity we just process each request separately... in
+  // general a backend should try to operate on the entire batch of
+  // requests at the same time for improved performance.
+  std::vector<uint8_t> start_buffer, ready_buffer, input_buffer;
+  for (uint32_t r = 0; r < request_count; ++r) {
+    uint64_t exec_start_ns = 0;
+    SET_TIMESTAMP(exec_start_ns);
+
+    TRITONBACKEND_Request* request = requests[r];
+
+    uint32_t input_count = 0;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r, TRITONBACKEND_RequestInputCount(request, &input_count));
+
+    uint32_t requested_output_count = 0;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r,
+        TRITONBACKEND_RequestOutputCount(request, &requested_output_count));
+
+    // If an error response was sent for the above then display an error
+    // message and move on to next request.
+    if (responses[r] == nullptr) {
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR,
+          (std::string("request ") + std::to_string(r) +
+           ": failed to read request input/output counts, error response "
+           "sent")
+              .c_str());
+      continue;
+    }
+
+    TRITONBACKEND_Input* input = nullptr;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r, TRITONBACKEND_RequestInput(request, "INPUT0", &input));
+    if (responses[r] == nullptr) {
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR,
+          (std::string("request ") + std::to_string(r) +
+           ": failed to read input, error response sent")
+              .c_str());
+      continue;
+    }
+
+    TRITONSERVER_DataType input_datatype;
+    const int64_t* input_shape;
+    uint32_t input_dims_count;
+    uint64_t input_byte_size;
+    uint32_t input_buffer_count;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r,
+        TRITONBACKEND_InputProperties(
+            input, nullptr /* input_name */, &input_datatype, &input_shape,
+            &input_dims_count, &input_byte_size, &input_buffer_count));
+    if (responses[r] == nullptr) {
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR,
+          (std::string("request ") + std::to_string(r) +
+           ": failed to read input properties, error response sent")
+              .c_str());
+      continue;
+    }
+    if (input_dims_count > 1) {
+      total_batch_size += input_shape[0];
+    } else {
+      ++total_batch_size;
+    }
+
+    std::vector<char> input_0(input_byte_size);
+    std::vector<char> input_1(input_byte_size);
+    uint64_t input_0_byte_size = input_byte_size;
+    uint64_t input_1_byte_size = input_byte_size;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r,
+        ReadInputTensor(
+            request, "INPUT0", input_0.data(),
+            reinterpret_cast<size_t*>(&input_0_byte_size)));
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r,
+        ReadInputTensor(
+            request, "INPUT1", input_1.data(),
+            reinterpret_cast<size_t*>(&input_1_byte_size)));
+    if (responses[r] == nullptr) {
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR,
+          (std::string("request ") + std::to_string(r) +
+           ": failed to get input buffer in CPU memory, error "
+           "response sent")
+              .c_str());
+      continue;
+    }
+
+    // Compute... Get GPU instance from model state and let it compute
+    // the subtraction, while the CPU instance computes the addition.
+    // In real world some parallelization should be used, but here just
+    // serialize the "distributed" work.
+    TRITONBACKEND_Response* response = responses[r];
+
+    uint64_t compute_start_ns = 0;
+    SET_TIMESTAMP(compute_start_ns);
+    for (size_t out_idx = 0; out_idx < requested_output_count; ++out_idx) {
+      const char* output_name;
+      GUARDED_RESPOND_IF_ERROR(
+          responses, r,
+          TRITONBACKEND_RequestOutputName(request, out_idx, &output_name));
+
+      TRITONBACKEND_Output* output;
+      GUARDED_RESPOND_IF_ERROR(
+          responses, r,
+          TRITONBACKEND_ResponseOutput(
+              response, &output, output_name, input_datatype, input_shape,
+              input_dims_count));
+      if (responses[r] == nullptr) {
+        LOG_MESSAGE(
+            TRITONSERVER_LOG_ERROR,
+            (std::string("request ") + std::to_string(r) +
+             ": failed to create response output, error response sent")
+                .c_str());
+        break;
+      }
+
+      // Get the output buffer. We request a buffer in CPU memory but we have
+      // to handle any returned type. If we get back a buffer in GPU memory we
+      // just fail the request.
+      void* output_buffer;
+      TRITONSERVER_MemoryType output_memory_type = TRITONSERVER_MEMORY_CPU;
+      int64_t output_memory_type_id = 0;
+      GUARDED_RESPOND_IF_ERROR(
+          responses, r,
+          TRITONBACKEND_OutputBuffer(
+              output, &output_buffer, input_byte_size, &output_memory_type,
+              &output_memory_type_id));
+      if (responses[r] == nullptr) {
+        LOG_MESSAGE(
+            TRITONSERVER_LOG_ERROR,
+            (std::string("request ") + std::to_string(r) +
+             ": failed to create output buffer in CPU memory, error "
+             "response sent")
+                .c_str());
+        break;
+      }
+      if (output_memory_type == TRITONSERVER_MEMORY_GPU) {
+        GUARDED_RESPOND_IF_ERROR(
+            responses, r,
+            TRITONSERVER_ErrorNew(
+                TRITONSERVER_ERROR_UNSUPPORTED,
+                "failed to create output buffer in CPU memory"));
+        break;
+      }
+
+      static std::string output_0_name("OUTPUT0");
+      if (output_0_name == output_name) {
+        instance_state->Add(
+            (input_byte_size / sizeof(int32_t)),
+            reinterpret_cast<int32_t*>(input_0.data()),
+            reinterpret_cast<int32_t*>(input_1.data()),
+            reinterpret_cast<int32_t*>(output_buffer));
+      } else {
+        sub_instance_state->Sub(
+            (input_byte_size / sizeof(int32_t)),
+            reinterpret_cast<int32_t*>(input_0.data()),
+            reinterpret_cast<int32_t*>(input_1.data()),
+            reinterpret_cast<int32_t*>(output_buffer));
+      }
+    }
+    uint64_t compute_end_ns = 0;
+    SET_TIMESTAMP(compute_end_ns);
+
+    uint64_t exec_end_ns = 0;
+    SET_TIMESTAMP(exec_end_ns);
+    batch_exec_end_ns = exec_end_ns;
+
+    // Send all the responses that haven't already been sent because of an
+    // earlier error.
+    if (responses[r] != nullptr) {
+      LOG_IF_ERROR(
+          TRITONBACKEND_ResponseSend(
+              responses[r], TRITONSERVER_RESPONSE_COMPLETE_FINAL,
+              nullptr /* success */),
+          "failed sending response");
+    }
+
+    // Report statistics for each request.
+    LOG_IF_ERROR(
+        TRITONBACKEND_ModelInstanceReportStatistics(
+            instance_state->TritonModelInstance(), request,
+            (responses[r] != nullptr) /* success */, exec_start_ns,
+            compute_start_ns, compute_end_ns, exec_end_ns),
+        "failed reporting request statistics");
+
+    LOG_IF_ERROR(
+        TRITONBACKEND_RequestRelease(request, TRITONSERVER_REQUEST_RELEASE_ALL),
+        "failed releasing request");
+  }
+
+  // Report the entire batch statistics.
+  LOG_IF_ERROR(
+      TRITONBACKEND_ModelInstanceReportBatchStatistics(
+          instance_state->TritonModelInstance(), total_batch_size,
+          batch_exec_start_ns, batch_exec_start_ns, batch_exec_end_ns,
+          batch_exec_end_ns),
+      "failed reporting batch request statistics");
+
+  return nullptr;  // success
+}
+
+}  // extern "C"
+
+}}}  // namespace triton::backend::distributed_addsub
diff --git a/src/test/distributed_addsub/src/libtriton_distributed_addsub.ldscript b/src/test/distributed_addsub/src/libtriton_distributed_addsub.ldscript
new file mode 100644
index 0000000000..61e9a06fbd
--- /dev/null
+++ b/src/test/distributed_addsub/src/libtriton_distributed_addsub.ldscript
@@ -0,0 +1,30 @@
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+{
+  global:
+    TRITONBACKEND_*;
+  local: *;
+};
diff --git a/src/test/dyna_sequence/CMakeLists.txt b/src/test/dyna_sequence/CMakeLists.txt
new file mode 100644
index 0000000000..1d0c095b9d
--- /dev/null
+++ b/src/test/dyna_sequence/CMakeLists.txt
@@ -0,0 +1,118 @@
+# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+cmake_minimum_required(VERSION 3.17)
+
+project(tritondynasequencebackend LANGUAGES C CXX)
+
+#
+# libtriton_dyna_sequence.so
+# Shared library implementing the Triton Sequence Backend API
+#
+configure_file(src/libtriton_dyna_sequence.ldscript libtriton_dyna_sequence.ldscript COPYONLY)
+
+add_library(
+  triton-dyna-sequence-backend SHARED
+  src/dyna_sequence.cc
+)
+
+add_library(
+  TritonDynaSequenceBackend::triton-dyna-sequence-backend ALIAS triton-dyna-sequence-backend
+)
+
+target_compile_features(triton-dyna-sequence-backend PRIVATE cxx_std_11)
+target_compile_options(
+  triton-dyna-sequence-backend PRIVATE
+  $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
+    -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror>
+)
+
+target_link_libraries(
+  triton-dyna-sequence-backend
+  PRIVATE
+    triton-backend-utils    # from repo-backend
+    triton-core-serverapi   # from repo-core
+    triton-core-backendapi  # from repo-core
+    triton-core-serverstub  # from repo-core
+)
+
+set_target_properties(
+  triton-dyna-sequence-backend PROPERTIES
+  POSITION_INDEPENDENT_CODE ON
+  OUTPUT_NAME triton_dyna_sequence
+  LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_dyna_sequence.ldscript
+  LINK_FLAGS "-Wl,--version-script libtriton_dyna_sequence.ldscript"
+)
+
+#
+# Install
+#
+include(GNUInstallDirs)
+set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/TritonDynaSequenceBackend)
+
+install(
+  TARGETS
+    triton-dyna-sequence-backend
+  EXPORT
+    triton-dyna-sequence-backend-targets
+  LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/dyna_sequence
+  ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/dyna_sequence
+)
+
+install(
+  EXPORT
+    triton-dyna-sequence-backend-targets
+  FILE
+    TritonDynaSequenceBackendTargets.cmake
+  NAMESPACE
+    TritonDynaSequenceBackend::
+  DESTINATION
+    ${INSTALL_CONFIGDIR}
+)
+
+include(CMakePackageConfigHelpers)
+configure_package_config_file(
+  ${CMAKE_CURRENT_LIST_DIR}/cmake/TritonDynaSequenceBackendConfig.cmake.in
+  ${CMAKE_CURRENT_BINARY_DIR}/TritonDynaSequenceBackendConfig.cmake
+  INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
+)
+
+install(
+  FILES
+  ${CMAKE_CURRENT_BINARY_DIR}/TritonDynaSequenceBackendConfig.cmake
+  DESTINATION ${INSTALL_CONFIGDIR}
+)
+
+#
+# Export from build tree
+#
+export(
+  EXPORT triton-dyna-sequence-backend-targets
+  FILE ${CMAKE_CURRENT_BINARY_DIR}/TritonDynaSequenceBackendTargets.cmake
+  NAMESPACE TritonDynaSequenceBackend::
+)
+
+export(PACKAGE TritonDynaSequenceBackend)
diff --git a/src/test/dyna_sequence/cmake/TritonDynaSequenceBackendConfig.cmake.in b/src/test/dyna_sequence/cmake/TritonDynaSequenceBackendConfig.cmake.in
new file mode 100644
index 0000000000..a5206b65ae
--- /dev/null
+++ b/src/test/dyna_sequence/cmake/TritonDynaSequenceBackendConfig.cmake.in
@@ -0,0 +1,39 @@
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include(CMakeFindDependencyMacro)
+
+get_filename_component(
+  TRITONSEQUENCEBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
+)
+
+list(APPEND CMAKE_MODULE_PATH ${TRITONSEQUENCEBACKEND_CMAKE_DIR})
+
+if(NOT TARGET TritonDynaSequenceBackend::triton-sequence-backend)
+  include("${TRITONSEQUENCEBACKEND_CMAKE_DIR}/TritonDynaSequenceBackendTargets.cmake")
+endif()
+
+set(TRITONSEQUENCEBACKEND_LIBRARIES TritonDynaSequenceBackend::triton-sequence-backend)
\ No newline at end of file
diff --git a/src/test/dyna_sequence/src/dyna_sequence.cc b/src/test/dyna_sequence/src/dyna_sequence.cc
new file mode 100644
index 0000000000..91f83db7c9
--- /dev/null
+++ b/src/test/dyna_sequence/src/dyna_sequence.cc
@@ -0,0 +1,1164 @@
+// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <algorithm>
+#include <memory>
+#include <thread>
+
+#include "triton/backend/backend_common.h"
+#include "triton/backend/backend_model.h"
+#include "triton/backend/backend_model_instance.h"
+
+namespace triton { namespace backend { namespace dyna_sequence {
+
+
+// Simple dynamic sequence backend that demonstrates the TRITONBACKEND API for a
+// blocking backend. A blocking backend completes execution of the
+// inference before returning from TRITONBACKEND_ModelInstanceExecute.
+//
+// The backend supports models that take 5 input tensors, three INT32 [ 1 ]
+// control values, one UINT64 [ 1 ] correlation ID control, and one
+// variable-size INT32 [ -1 ] value input; and produces an output
+// tensor with the same shape as the input tensor. The input tensors
+// must be named "START", "END", "READY", "CORRID" and "INPUT". The
+// output tensor must be named "OUTPUT".
+//
+// The model maintains an INT32 accumulator for each sequence which
+// is updated based on the control values in "START", "END", "READY"
+// and "CORRID":
+//
+//   READY=0, START=x, END=x: Ignore value input, do not change
+//   accumulator value.
+//
+//   READY=1, START=1, END=x: Start accumulating. Set accumulator
+//   equal to sum of INPUT tensor elements.
+//
+//   READY=1, START=0, END=x: Add INPUT tensor elements to
+//   accumulator.
+//
+// In addition to the above, when END=1 CORRID is added to the accumulator.
+//
+// When READY=1, the accumulator is returned in every element of the
+// OUTPUT tensor.
+//
+
+#define GUARDED_RESPOND_IF_ERROR(RESPONSES, IDX, X)                     \
+  do {                                                                  \
+    if ((RESPONSES)[IDX] != nullptr) {                                  \
+      TRITONSERVER_Error* err__ = (X);                                  \
+      if (err__ != nullptr) {                                           \
+        LOG_IF_ERROR(                                                   \
+            TRITONBACKEND_ResponseSend(                                 \
+                (RESPONSES)[IDX], TRITONSERVER_RESPONSE_COMPLETE_FINAL, \
+                err__),                                                 \
+            "failed to send error response");                           \
+        (RESPONSES)[IDX] = nullptr;                                     \
+        TRITONSERVER_ErrorDelete(err__);                                \
+      }                                                                 \
+    }                                                                   \
+  } while (false)
+
+//
+// ModelState
+//
+// State associated with a model that is using this backend. An object
+// of this class is created and associated with each
+// TRITONBACKEND_Model.
+//
+class ModelState : public BackendModel {
+ public:
+  static TRITONSERVER_Error* Create(
+      TRITONBACKEND_Model* triton_model, ModelState** state);
+  virtual ~ModelState() = default;
+
+  // Get accumulator size and execution delay
+  size_t AccumulatorSize() const { return accumulator_size_; }
+  int ExecDelay() const { return execute_delay_ms_; }
+  const std::string& CorrelationIdType() const { return corrid_dtype_; }
+
+  // Validate that model configuration is supported by this backend.
+  TRITONSERVER_Error* ValidateModelConfig();
+
+ private:
+  ModelState(TRITONBACKEND_Model* triton_model);
+
+  // Delay to introduce into execution, in milliseconds.
+  int execute_delay_ms_;
+
+  // Accumulator size
+  size_t accumulator_size_;
+
+  // Correlation id type
+  std::string corrid_dtype_;
+};
+
+TRITONSERVER_Error*
+ModelState::Create(TRITONBACKEND_Model* triton_model, ModelState** state)
+{
+  try {
+    *state = new ModelState(triton_model);
+  }
+  catch (const BackendModelException& ex) {
+    RETURN_ERROR_IF_TRUE(
+        ex.err_ == nullptr, TRITONSERVER_ERROR_INTERNAL,
+        std::string("unexpected nullptr in BackendModelException"));
+    RETURN_IF_ERROR(ex.err_);
+  }
+
+  return nullptr;  // success
+}
+
+ModelState::ModelState(TRITONBACKEND_Model* triton_model)
+    : BackendModel(triton_model), execute_delay_ms_(0), accumulator_size_(0),
+      corrid_dtype_("TYPE_UINT64")
+{
+}
+
+TRITONSERVER_Error*
+ModelState::ValidateModelConfig()
+{
+  // We have the json DOM for the model configuration...
+  common::TritonJson::WriteBuffer buffer;
+  RETURN_IF_ERROR(model_config_.PrettyWrite(&buffer));
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("model configuration:\n") + buffer.Contents()).c_str());
+
+  triton::common::TritonJson::Value params;
+  if (model_config_.Find("parameters", &params)) {
+    common::TritonJson::Value exec_delay;
+    if (params.Find("execute_delay_ms", &exec_delay)) {
+      std::string exec_delay_str;
+      RETURN_IF_ERROR(
+          exec_delay.MemberAsString("string_value", &exec_delay_str));
+      execute_delay_ms_ = std::stoi(exec_delay_str);
+    }
+  }
+
+  int64_t max_batch_size = 0;
+  RETURN_IF_ERROR(model_config_.MemberAsInt("max_batch_size", &max_batch_size));
+  accumulator_size_ = (size_t)(std::max((int64_t)1, max_batch_size));
+
+  // The model configuration must specify the sequence batcher and
+  // must use the START, END, READY and CORRID input to indicate
+  // control values.
+  triton::common::TritonJson::Value sequence_batching;
+  RETURN_IF_ERROR(
+      model_config_.MemberAsObject("sequence_batching", &sequence_batching));
+  common::TritonJson::Value control_inputs;
+  RETURN_IF_ERROR(
+      sequence_batching.MemberAsArray("control_input", &control_inputs));
+  RETURN_ERROR_IF_FALSE(
+      control_inputs.ArraySize() == 4, TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("'START', 'END, 'READY' and 'CORRID' must be configured as "
+                  "the control inputs"));
+
+  std::vector<std::string> control_input_names;
+  for (size_t io_index = 0; io_index < control_inputs.ArraySize(); io_index++) {
+    common::TritonJson::Value control_input;
+    RETURN_IF_ERROR(control_inputs.IndexAsObject(io_index, &control_input));
+    const char* input_name;
+    size_t input_name_len;
+    RETURN_IF_ERROR(
+        control_input.MemberAsString("name", &input_name, &input_name_len));
+    control_input_names.push_back(input_name);
+  }
+
+  RETURN_ERROR_IF_FALSE(
+      (std::find(
+           control_input_names.begin(), control_input_names.end(), "START") !=
+       control_input_names.end()) ||
+          (std::find(
+               control_input_names.begin(), control_input_names.end(), "END") !=
+           control_input_names.end()) ||
+          (std::find(
+               control_input_names.begin(), control_input_names.end(),
+               "READY") != control_input_names.end()) ||
+          (std::find(
+               control_input_names.begin(), control_input_names.end(),
+               "CORRID") != control_input_names.end()),
+      TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("'START', 'END, 'READY' and 'CORRID' must be configured as "
+                  "the control inputs"));
+
+  // The CORRID input must be UINT64 type.
+  auto itr = std::find(
+      control_input_names.begin(), control_input_names.end(), "CORRID");
+  size_t corrid_pos = std::distance(control_input_names.begin(), itr);
+  triton::common::TritonJson::Value corrid_input;
+  RETURN_IF_ERROR(control_inputs.IndexAsObject(corrid_pos, &corrid_input));
+  triton::common::TritonJson::Value corrid_control;
+  RETURN_IF_ERROR(corrid_input.MemberAsArray("control", &corrid_control));
+  common::TritonJson::Value control_item;
+  RETURN_IF_ERROR(corrid_control.IndexAsObject(0 /* index */, &control_item));
+  std::string corrid_dtype;
+  RETURN_IF_ERROR(control_item.MemberAsString("data_type", &corrid_dtype));
+
+  RETURN_ERROR_IF_FALSE(
+      ((corrid_dtype == "TYPE_UINT64") || (corrid_dtype == "TYPE_STRING")),
+      TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("model CORRID control input must have TYPE_UINT64 "
+                  "or TYPE_STRING data-type"));
+  corrid_dtype_ = corrid_dtype;
+
+  common::TritonJson::Value inputs, outputs;
+  RETURN_IF_ERROR(model_config_.MemberAsArray("input", &inputs));
+  RETURN_IF_ERROR(model_config_.MemberAsArray("output", &outputs));
+
+  // There must be one INT32 input called INPUT defined in the model
+  // configuration and it must be a 1D vector (of any length).
+  RETURN_ERROR_IF_FALSE(
+      inputs.ArraySize() == 1, TRITONSERVER_ERROR_INVALID_ARG,
+      std::string(
+          "model must have input 'INPUT' with vector shape, any length"));
+
+  common::TritonJson::Value input;
+  RETURN_IF_ERROR(inputs.IndexAsObject(0 /* index */, &input));
+
+  std::vector<int64_t> input_shape;
+  RETURN_IF_ERROR(backend::ParseShape(input, "dims", &input_shape));
+
+  RETURN_ERROR_IF_FALSE(
+      input_shape.size() == 1, TRITONSERVER_ERROR_INVALID_ARG,
+      std::string(
+          "model must have one input 'INPUT' with vector shape, any length"));
+
+  std::string input_dtype;
+  RETURN_IF_ERROR(input.MemberAsString("data_type", &input_dtype));
+
+  RETURN_ERROR_IF_FALSE(
+      input_dtype == "TYPE_INT32", TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("model input must have TYPE_INT32 data-type"));
+
+  const char* input_name;
+  size_t input_name_len;
+  RETURN_IF_ERROR(input.MemberAsString("name", &input_name, &input_name_len));
+
+  RETURN_ERROR_IF_FALSE(
+      strcmp(input_name, "INPUT") == 0, TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("model input must be named 'INPUT'"));
+
+  // There must be one INT32 output with shape that matches the
+  // input. The output must be named OUTPUT.
+  RETURN_ERROR_IF_FALSE(
+      outputs.ArraySize() == 1, TRITONSERVER_ERROR_INVALID_ARG,
+      std::string(
+          "model must have one output 'OUTPUT' with vector shape, any length"));
+
+  common::TritonJson::Value output;
+  RETURN_IF_ERROR(outputs.IndexAsObject(0 /* index */, &output));
+
+  std::vector<int64_t> output_shape;
+  RETURN_IF_ERROR(backend::ParseShape(output, "dims", &output_shape));
+
+  RETURN_ERROR_IF_FALSE(
+      (output_shape.size() == 1) && (output_shape[0] == input_shape[0]),
+      TRITONSERVER_ERROR_INVALID_ARG,
+      std::string(
+          "model must have output 'OUTPUT' with shape matching 'INPUT'"));
+
+  std::string output_dtype;
+  RETURN_IF_ERROR(output.MemberAsString("data_type", &output_dtype));
+
+  RETURN_ERROR_IF_FALSE(
+      output_dtype == "TYPE_INT32", TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("model output must have TYPE_INT32 data-type"));
+
+  const char* output_name;
+  size_t output_name_len;
+  RETURN_IF_ERROR(
+      output.MemberAsString("name", &output_name, &output_name_len));
+
+  RETURN_ERROR_IF_FALSE(
+      strcmp(output_name, "OUTPUT") == 0, TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("model output must be named 'OUTPUT'"));
+
+  return nullptr;  // success
+}
+
+//
+// ModelInstanceState
+//
+// State associated with a model instance. An object of this class is
+// created and associated with each TRITONBACKEND_ModelInstance.
+//
+class ModelInstanceState : public BackendModelInstance {
+ public:
+  static TRITONSERVER_Error* Create(
+      ModelState* model_state,
+      TRITONBACKEND_ModelInstance* triton_model_instance,
+      ModelInstanceState** state);
+  virtual ~ModelInstanceState();
+
+  // Get the state of the model that corresponds to this instance.
+  ModelState* StateForModel() const { return model_state_; }
+
+  // Modify/get accumulator values for this instance
+  int32_t GetAccumulatorVal(uint64_t corrid);
+  void SetAccumulatorVal(uint64_t corrid, int32_t value);
+  void AddAccumulatorVal(uint64_t corrid, int32_t value);
+  void EraseAccumulatorKey(uint64_t corrid);
+
+ private:
+  ModelInstanceState(
+      ModelState* model_state,
+      TRITONBACKEND_ModelInstance* triton_model_instance);
+
+  ModelState* model_state_;
+
+  // Accumulators maintained by this context, as a map from
+  // correlation ID to the accumulator.
+  std::unordered_map<uint64_t, int32_t> accumulator_;
+};
+
+TRITONSERVER_Error*
+ModelInstanceState::Create(
+    ModelState* model_state, TRITONBACKEND_ModelInstance* triton_model_instance,
+    ModelInstanceState** state)
+{
+  try {
+    *state = new ModelInstanceState(model_state, triton_model_instance);
+  }
+  catch (const BackendModelInstanceException& ex) {
+    RETURN_ERROR_IF_TRUE(
+        ex.err_ == nullptr, TRITONSERVER_ERROR_INTERNAL,
+        std::string("unexpected nullptr in BackendModelInstanceException"));
+    RETURN_IF_ERROR(ex.err_);
+  }
+
+  return nullptr;  // success
+}
+
+ModelInstanceState::ModelInstanceState(
+    ModelState* model_state, TRITONBACKEND_ModelInstance* triton_model_instance)
+    : BackendModelInstance(model_state, triton_model_instance),
+      model_state_(model_state)
+{
+}
+
+int32_t
+ModelInstanceState::GetAccumulatorVal(uint64_t corrid)
+{
+  return accumulator_[corrid];
+}
+
+void
+ModelInstanceState::SetAccumulatorVal(uint64_t corrid, int32_t value)
+{
+  accumulator_[corrid] = value;
+}
+
+void
+ModelInstanceState::AddAccumulatorVal(uint64_t corrid, int32_t value)
+{
+  accumulator_[corrid] += value;
+}
+
+void
+ModelInstanceState::EraseAccumulatorKey(uint64_t corrid)
+{
+  accumulator_.erase(corrid);
+}
+
+ModelInstanceState::~ModelInstanceState()
+{
+  accumulator_.clear();
+}
+
+
+/////////////
+
+extern "C" {
+
+// Implementing TRITONBACKEND_Initialize is optional. The backend
+// should initialize any global state that is intended to be shared
+// across all models and model instances that use the backend.
+TRITONSERVER_Error*
+TRITONBACKEND_Initialize(TRITONBACKEND_Backend* backend)
+{
+  const char* cname;
+  RETURN_IF_ERROR(TRITONBACKEND_BackendName(backend, &cname));
+  std::string name(cname);
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("TRITONBACKEND_Initialize: ") + name).c_str());
+
+  // We should check the backend API version that Triton supports
+  // vs. what this backend was compiled against.
+  uint32_t api_version_major, api_version_minor;
+  RETURN_IF_ERROR(
+      TRITONBACKEND_ApiVersion(&api_version_major, &api_version_minor));
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("Triton TRITONBACKEND API version: ") +
+       std::to_string(api_version_major) + "." +
+       std::to_string(api_version_minor))
+          .c_str());
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("'") + name + "' TRITONBACKEND API version: " +
+       std::to_string(TRITONBACKEND_API_VERSION_MAJOR) + "." +
+       std::to_string(TRITONBACKEND_API_VERSION_MINOR))
+          .c_str());
+
+  if ((api_version_major != TRITONBACKEND_API_VERSION_MAJOR) ||
+      (api_version_minor < TRITONBACKEND_API_VERSION_MINOR)) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_UNSUPPORTED,
+        "triton backend API version does not support this backend");
+  }
+
+  // The backend configuration may contain information needed by the
+  // backend, such a command-line arguments. This backend doesn't use
+  // any such configuration but we print whatever is available.
+  TRITONSERVER_Message* backend_config_message;
+  RETURN_IF_ERROR(
+      TRITONBACKEND_BackendConfig(backend, &backend_config_message));
+
+  const char* buffer;
+  size_t byte_size;
+  RETURN_IF_ERROR(TRITONSERVER_MessageSerializeToJson(
+      backend_config_message, &buffer, &byte_size));
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("backend configuration:\n") + buffer).c_str());
+
+  return nullptr;  // success
+}
+
+// Implementing TRITONBACKEND_ModelInitialize is optional. The backend
+// should initialize any state that is intended to be shared across
+// all instances of the model.
+TRITONSERVER_Error*
+TRITONBACKEND_ModelInitialize(TRITONBACKEND_Model* model)
+{
+  const char* cname;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelName(model, &cname));
+  std::string name(cname);
+
+  uint64_t version;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelVersion(model, &version));
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("TRITONBACKEND_ModelInitialize: ") + name + " (version " +
+       std::to_string(version) + ")")
+          .c_str());
+
+  // With each model we create a ModelState object and associate it
+  // with the TRITONBACKEND_Model.
+  ModelState* model_state;
+  RETURN_IF_ERROR(ModelState::Create(model, &model_state));
+  RETURN_IF_ERROR(
+      TRITONBACKEND_ModelSetState(model, reinterpret_cast<void*>(model_state)));
+
+  // One of the primary things to do in ModelInitialize is to examine
+  // the model configuration to ensure that it is something that this
+  // backend can support. If not, returning an error from this
+  // function will prevent the model from loading.
+  RETURN_IF_ERROR(model_state->ValidateModelConfig());
+
+  return nullptr;  // success
+}
+
+// Implementing TRITONBACKEND_ModelFinalize is optional unless state
+// is set using TRITONBACKEND_ModelSetState. The backend must free
+// this state and perform any other cleanup.
+TRITONSERVER_Error*
+TRITONBACKEND_ModelFinalize(TRITONBACKEND_Model* model)
+{
+  void* vstate;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vstate));
+  ModelState* model_state = reinterpret_cast<ModelState*>(vstate);
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO, "TRITONBACKEND_ModelFinalize: delete model state");
+
+  delete model_state;
+
+  return nullptr;  // success
+}
+
+// Implementing TRITONBACKEND_ModelInstanceInitialize is optional. The
+// backend should initialize any state that is required for a model
+// instance.
+TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceInitialize(TRITONBACKEND_ModelInstance* instance)
+{
+  const char* cname;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceName(instance, &cname));
+  std::string name(cname);
+
+  int32_t device_id;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceDeviceId(instance, &device_id));
+  TRITONSERVER_InstanceGroupKind kind;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceKind(instance, &kind));
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("TRITONBACKEND_ModelInstanceInitialize: ") + name + " (" +
+       TRITONSERVER_InstanceGroupKindString(kind) + " device " +
+       std::to_string(device_id) + ")")
+          .c_str());
+
+  // The instance can access the corresponding model as well... here
+  // we get the model and from that get the model's state.
+  TRITONBACKEND_Model* model;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceModel(instance, &model));
+
+  void* vmodelstate;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vmodelstate));
+  ModelState* model_state = reinterpret_cast<ModelState*>(vmodelstate);
+
+  // With each instance we create a ModelInstanceState object and
+  // associate it with the TRITONBACKEND_ModelInstance.
+  ModelInstanceState* instance_state;
+  RETURN_IF_ERROR(
+      ModelInstanceState::Create(model_state, instance, &instance_state));
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceSetState(
+      instance, reinterpret_cast<void*>(instance_state)));
+
+  // Because this backend just copies IN -> OUT and requires that
+  // input and output be in CPU memory, we fail if a GPU instances is
+  // requested.
+  RETURN_ERROR_IF_FALSE(
+      instance_state->Kind() == TRITONSERVER_INSTANCEGROUPKIND_CPU,
+      TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("'dyna_sequence' backend only supports CPU instances"));
+
+  return nullptr;  // success
+}
+
+// Implementing TRITONBACKEND_ModelInstanceFinalize is optional unless
+// state is set using TRITONBACKEND_ModelInstanceSetState. The backend
+// must free this state and perform any other cleanup.
+TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceFinalize(TRITONBACKEND_ModelInstance* instance)
+{
+  void* vstate;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(instance, &vstate));
+  ModelInstanceState* instance_state =
+      reinterpret_cast<ModelInstanceState*>(vstate);
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      "TRITONBACKEND_ModelInstanceFinalize: delete instance state");
+
+  delete instance_state;
+
+  return nullptr;  // success
+}
+
+// Implementing TRITONBACKEND_ModelInstanceExecute is required.
+TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceExecute(
+    TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** requests,
+    const uint32_t request_count)
+{
+  // Triton will not call this function simultaneously for the same
+  // 'instance'. But since this backend could be used by multiple
+  // instances from multiple models the implementation needs to handle
+  // multiple calls to this function at the same time (with different
+  // 'instance' objects). Suggested practice for this is to use only
+  // function-local and model-instance-specific state (obtained from
+  // 'instance'), which is what we do here.
+  ModelInstanceState* instance_state;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(
+      instance, reinterpret_cast<void**>(&instance_state)));
+  ModelState* model_state = instance_state->StateForModel();
+
+  // This backend specifies BLOCKING execution policy. That means that
+  // we should not return from this function until execution is
+  // complete. Triton will automatically release 'instance' on return
+  // from this function so that it is again available to be used for
+  // another call to TRITONBACKEND_ModelInstanceExecute.
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("model ") + model_state->Name() + ", instance " +
+       instance_state->Name() + ", executing " + std::to_string(request_count) +
+       " requests")
+          .c_str());
+
+  bool supports_batching = false;
+  RETURN_IF_ERROR(model_state->SupportsFirstDimBatching(&supports_batching));
+
+  // Each request represents a different sequence, which corresponds
+  // to the accumulator at the same index. Each request must have
+  // batch-size 1 inputs which is the next timestep for that
+  // sequence. The total number of requests will not exceed the
+  // max-batch-size specified in the model configuration.
+  if (request_count > model_state->AccumulatorSize()) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_UNSUPPORTED,
+        "unable to execute batch larger than max-batch-size");
+  }
+
+  // Delay if requested...
+  if (model_state->ExecDelay() > 0) {
+    std::this_thread::sleep_for(
+        std::chrono::milliseconds(model_state->ExecDelay()));
+  }
+
+  // 'responses' is initialized with the response objects below and
+  // if/when an error response is sent the corresponding entry in
+  // 'responses' is set to nullptr to indicate that that response has
+  // already been sent.
+  std::vector<TRITONBACKEND_Response*> responses;
+  responses.reserve(request_count);
+
+  // Create a single response object for each request. If something
+  // goes wrong when attempting to create the response objects just
+  // fail all of the requests by returning an error.
+  for (uint32_t r = 0; r < request_count; ++r) {
+    TRITONBACKEND_Request* request = requests[r];
+
+    TRITONBACKEND_Response* response;
+    RETURN_IF_ERROR(TRITONBACKEND_ResponseNew(&response, request));
+    responses.push_back(response);
+  }
+
+  // The way we collect these batch timestamps is not entirely
+  // accurate. Normally, in a performant backend you would execute all
+  // the requests at the same time, and so there would be a single
+  // compute-start / compute-end time-range. But here we execute each
+  // request separately so there is no single range. As a result we
+  // just show the entire execute time as being the compute time as
+  // well.
+  uint64_t min_exec_start_ns = std::numeric_limits<uint64_t>::max();
+  uint64_t max_exec_end_ns = 0;
+  uint64_t total_batch_size = 0;
+
+  // After this point we take ownership of 'requests', which means
+  // that a response must be sent for every request. If something does
+  // go wrong in processing a particular request then we send an error
+  // response just for the specific request.
+
+  // For simplicity we just process each request separately... in
+  // general a backend should try to operate on the entire batch of
+  // requests at the same time for improved performance.
+  std::vector<uint8_t> start_buffer, end_buffer, ready_buffer, corrid_buffer,
+      input_buffer;
+  for (uint32_t r = 0; r < request_count; ++r) {
+    uint64_t exec_start_ns = 0;
+    SET_TIMESTAMP(exec_start_ns);
+    min_exec_start_ns = std::min(min_exec_start_ns, exec_start_ns);
+
+    TRITONBACKEND_Request* request = requests[r];
+
+    const char* request_id = "";
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r, TRITONBACKEND_RequestId(request, &request_id));
+
+    uint64_t correlation_id = 0;
+    if (model_state->CorrelationIdType() == "TYPE_UINT64") {
+      GUARDED_RESPOND_IF_ERROR(
+          responses, r,
+          TRITONBACKEND_RequestCorrelationId(request, &correlation_id));
+    } else if (model_state->CorrelationIdType() == "TYPE_STRING") {
+      const char* correlation_id_str;
+      GUARDED_RESPOND_IF_ERROR(
+          responses, r,
+          TRITONBACKEND_RequestCorrelationIdString(
+              request, &correlation_id_str));
+
+      // Require that the string be decodable into an unsigned int.
+      try {
+        correlation_id = std::stoi(correlation_id_str);
+      }
+      catch (const std::invalid_argument& ia) {
+        GUARDED_RESPOND_IF_ERROR(
+            responses, r,
+            TRITONSERVER_ErrorNew(
+                TRITONSERVER_ERROR_INVALID_ARG,
+                "dyna sequence backend expects correlation ID to be decodable "
+                "into an integer"));
+      }
+    }
+    uint32_t input_count = 0;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r, TRITONBACKEND_RequestInputCount(request, &input_count));
+
+    uint32_t requested_output_count = 0;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r,
+        TRITONBACKEND_RequestOutputCount(request, &requested_output_count));
+
+    // If an error response was sent for the above then display an error
+    // message and move on to next request.
+    if (responses[r] == nullptr) {
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR,
+          (std::string("request ") + std::to_string(r) +
+           ": failed to read request input/output counts, error response "
+           "sent")
+              .c_str());
+      continue;
+    }
+
+    LOG_MESSAGE(
+        TRITONSERVER_LOG_INFO,
+        (std::string("request ") + std::to_string(r) + ": id = \"" +
+         request_id + "\", correlation_id = " + std::to_string(correlation_id) +
+         ", input_count = " + std::to_string(input_count) +
+         ", requested_output_count = " + std::to_string(requested_output_count))
+            .c_str());
+
+    // For statistics we need to collect the total batch size of all the
+    // requests. If the model doesn't support batching then each request is
+    // necessarily batch-size 1. If the model does support batching then the
+    // first dimension of the shape is the batch size. We only the first input
+    // for this.
+    if (supports_batching) {
+      TRITONBACKEND_Input* input = nullptr;
+      GUARDED_RESPOND_IF_ERROR(
+          responses, r,
+          TRITONBACKEND_RequestInputByIndex(request, 0 /* index */, &input));
+      if (responses[r] == nullptr) {
+        LOG_MESSAGE(
+            TRITONSERVER_LOG_ERROR,
+            (std::string("request ") + std::to_string(r) +
+             ": failed to read input, error response sent")
+                .c_str());
+        continue;
+      }
+
+      const int64_t* input_shape;
+      uint32_t input_dims_count;
+      GUARDED_RESPOND_IF_ERROR(
+          responses, r,
+          TRITONBACKEND_InputProperties(
+              input, nullptr, nullptr, &input_shape, &input_dims_count, nullptr,
+              nullptr));
+      if (responses[r] == nullptr) {
+        LOG_MESSAGE(
+            TRITONSERVER_LOG_ERROR,
+            (std::string("request ") + std::to_string(r) +
+             ": failed to read input properties, error response sent")
+                .c_str());
+        continue;
+      }
+
+      if (input_dims_count > 0) {
+        if (input_shape[0] != 1) {
+          GUARDED_RESPOND_IF_ERROR(
+              responses, r,
+              TRITONSERVER_ErrorNew(
+                  TRITONSERVER_ERROR_UNSUPPORTED,
+                  "unable to execute more than one timestep at a time"));
+          LOG_MESSAGE(
+              TRITONSERVER_LOG_ERROR,
+              (std::string("request ") + std::to_string(r) +
+               ": unable to execute more than one timestep at a time, error "
+               "response sent")
+                  .c_str());
+          continue;
+        }
+        total_batch_size += input_shape[0];
+      }
+    } else {
+      total_batch_size++;
+    }
+
+    LOG_MESSAGE(
+        TRITONSERVER_LOG_ERROR,
+        (std::string("total_batch_size: ") + std::to_string(total_batch_size))
+            .c_str());
+
+    std::set<uint64_t> seen_corrids;
+
+    // Get the input tensors.
+    TRITONBACKEND_Input* start_input = nullptr;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r,
+        TRITONBACKEND_RequestInput(request, "START", &start_input));
+    if (responses[r] == nullptr) {
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR,
+          (std::string("request ") + std::to_string(r) +
+           ": failed to read input 'START', error response sent")
+              .c_str());
+      continue;
+    }
+
+    TRITONBACKEND_Input* end_input = nullptr;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r, TRITONBACKEND_RequestInput(request, "END", &end_input));
+    if (responses[r] == nullptr) {
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR,
+          (std::string("request ") + std::to_string(r) +
+           ": failed to read input 'END', error response sent")
+              .c_str());
+      continue;
+    }
+
+    TRITONBACKEND_Input* ready_input = nullptr;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r,
+        TRITONBACKEND_RequestInput(request, "READY", &ready_input));
+    if (responses[r] == nullptr) {
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR,
+          (std::string("request ") + std::to_string(r) +
+           ": failed to read input 'READY', error response sent")
+              .c_str());
+      continue;
+    }
+
+    TRITONBACKEND_Input* corrid_input = nullptr;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r,
+        TRITONBACKEND_RequestInput(request, "CORRID", &corrid_input));
+    if (responses[r] == nullptr) {
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR,
+          (std::string("request ") + std::to_string(r) +
+           ": failed to read input 'CORRID', error response sent")
+              .c_str());
+      continue;
+    }
+
+    const void* start_buffer = nullptr;
+    uint64_t buffer_byte_size = 0;
+    TRITONSERVER_MemoryType input_memory_type = TRITONSERVER_MEMORY_CPU;
+    int64_t input_memory_type_id = 0;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r,
+        TRITONBACKEND_InputBuffer(
+            start_input, 0 /* input_buffer_count */, &start_buffer,
+            &buffer_byte_size, &input_memory_type, &input_memory_type_id));
+    if (responses[r] == nullptr) {
+      GUARDED_RESPOND_IF_ERROR(
+          responses, r,
+          TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_UNSUPPORTED, "failed to get input buffer"));
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR, (std::string("request ") + std::to_string(r) +
+                                   ": failed to get input buffer, error "
+                                   "response sent")
+                                      .c_str());
+      continue;
+    }
+
+    const void* end_buffer = nullptr;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r,
+        TRITONBACKEND_InputBuffer(
+            end_input, 0 /* input_buffer_count */, &end_buffer,
+            &buffer_byte_size, &input_memory_type, &input_memory_type_id));
+    if (responses[r] == nullptr) {
+      GUARDED_RESPOND_IF_ERROR(
+          responses, r,
+          TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_UNSUPPORTED, "failed to get input buffer"));
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR, (std::string("request ") + std::to_string(r) +
+                                   ": failed to get input buffer, error "
+                                   "response sent")
+                                      .c_str());
+      continue;
+    }
+
+    const void* ready_buffer = nullptr;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r,
+        TRITONBACKEND_InputBuffer(
+            ready_input, 0 /* input_buffer_count */, &ready_buffer,
+            &buffer_byte_size, &input_memory_type, &input_memory_type_id));
+    if (responses[r] == nullptr) {
+      GUARDED_RESPOND_IF_ERROR(
+          responses, r,
+          TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_UNSUPPORTED, "failed to get input buffer"));
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR, (std::string("request ") + std::to_string(r) +
+                                   ": failed to get input buffer, error "
+                                   "response sent")
+                                      .c_str());
+      continue;
+    }
+
+    const void* corrid_buffer = nullptr;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r,
+        TRITONBACKEND_InputBuffer(
+            corrid_input, 0 /* input_buffer_count */, &corrid_buffer,
+            &buffer_byte_size, &input_memory_type, &input_memory_type_id));
+    if (responses[r] == nullptr) {
+      GUARDED_RESPOND_IF_ERROR(
+          responses, r,
+          TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_UNSUPPORTED, "failed to get input buffer"));
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR, (std::string("request ") + std::to_string(r) +
+                                   ": failed to get input buffer, error "
+                                   "response sent")
+                                      .c_str());
+      continue;
+    }
+
+    TRITONBACKEND_Input* input = nullptr;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r, TRITONBACKEND_RequestInput(request, "INPUT", &input));
+    if (responses[r] == nullptr) {
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR,
+          (std::string("request ") + std::to_string(r) +
+           ": failed to read input 'INPUT', error response sent")
+              .c_str());
+      continue;
+    }
+
+    const void* input_buffer = nullptr;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r,
+        TRITONBACKEND_InputBuffer(
+            input, 0 /* input_buffer_count */, &input_buffer, &buffer_byte_size,
+            &input_memory_type, &input_memory_type_id));
+    if ((responses[r] == nullptr) ||
+        (input_memory_type == TRITONSERVER_MEMORY_GPU)) {
+      GUARDED_RESPOND_IF_ERROR(
+          responses, r,
+          TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_UNSUPPORTED,
+              "failed to get input buffer in CPU memory"));
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR,
+          (std::string("request ") + std::to_string(r) +
+           ": failed to get input buffer in CPU memory, error "
+           "response sent")
+              .c_str());
+      continue;
+    }
+
+    TRITONSERVER_DataType input_datatype;
+    const int64_t* input_shape;
+    uint32_t input_dims_count;
+    uint64_t input_byte_size;
+    uint32_t input_buffer_count;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r,
+        TRITONBACKEND_InputProperties(
+            input, nullptr /* input_name */, &input_datatype, &input_shape,
+            &input_dims_count, &input_byte_size, &input_buffer_count));
+    if (responses[r] == nullptr) {
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR,
+          (std::string("request ") + std::to_string(r) +
+           ": failed to read input properties, error response sent")
+              .c_str());
+      continue;
+    }
+
+    int64_t input_element_cnt = input_byte_size / sizeof(int32_t);
+    const int32_t start = *reinterpret_cast<const int32_t*>(start_buffer);
+    const int32_t end = *reinterpret_cast<const int32_t*>(end_buffer);
+    const int32_t ready = *reinterpret_cast<const int32_t*>(ready_buffer);
+    uint64_t corrid;
+
+    if (model_state->CorrelationIdType() == "TYPE_STRING") {
+      // interpret buffer as const char* where first 4 bytes are string length
+      const char* corrid_p = reinterpret_cast<const char*>(corrid_buffer);
+      const std::string corrid_str(
+          corrid_p + sizeof(uint32_t), *((uint32_t*)corrid_p));
+
+      // String sequence ID must be decodable into int for dyna sequence backend
+      try {
+        corrid = std::stoi(corrid_str);
+      }
+      catch (const std::invalid_argument& ia) {
+        GUARDED_RESPOND_IF_ERROR(
+            responses, r,
+            TRITONSERVER_ErrorNew(
+                TRITONSERVER_ERROR_INVALID_ARG,
+                "dyna sequence backend expects correlation ID to be decodable "
+                "into an integer"));
+      }
+
+    } else {
+      corrid = *reinterpret_cast<const uint64_t*>(corrid_buffer);
+    }
+
+    const int32_t* ipbuffer_int =
+        reinterpret_cast<const int32_t*>(input_buffer);
+
+    // Sequence batcher should never send us a batch of payloads where
+    // a given correlation ID occurs more that once. Check that here
+    // and fail if it happens.
+    if (seen_corrids.find(corrid) != seen_corrids.end()) {
+      GUARDED_RESPOND_IF_ERROR(
+          responses, r,
+          TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_UNSUPPORTED,
+              "Execute() called with batch containing multiple inferences "
+              "requests for the same Correlation ID"));
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR,
+          (std::string("request ") + std::to_string(r) +
+           ": Execute() called with batch containing "
+           "multiple inferences requests for the same Correlation ID, error "
+           "response sent")
+              .c_str());
+      continue;
+    }
+    seen_corrids.insert(corrid);
+
+    // Update the accumulator value based on START/END/READY/CORRID
+    // and calculate the output value.
+    if (ready != 0) {
+      if (start == 0) {
+        // Update accumulator.
+        for (int64_t e = 0; e < input_element_cnt; ++e) {
+          instance_state->AddAccumulatorVal(corrid, ipbuffer_int[e]);
+        }
+      } else {
+        // Set accumulator.
+        instance_state->SetAccumulatorVal(corrid, ipbuffer_int[0]);
+        for (int64_t e = 1; e < input_element_cnt; ++e) {
+          instance_state->AddAccumulatorVal(corrid, ipbuffer_int[e]);
+        }
+      }
+
+      if (end != 0) {
+        // Add CORRID (truncated to 32 bits) to accumulator.
+        instance_state->AddAccumulatorVal(corrid, (int32_t)corrid);
+      }
+
+      const int32_t output_val = instance_state->GetAccumulatorVal(corrid);
+
+      // If sequence has ended remove CORRID from the accumulator map.
+      if (end != 0) {
+        instance_state->EraseAccumulatorKey(corrid);
+      }
+
+      TRITONBACKEND_Response* response = responses[r];
+
+      // If the output is requested, copy the calculated output value
+      // into the output buffer.
+      if (requested_output_count > 0) {
+        // The output shape is [1, input_element_cnt] if the model
+        // configuration supports batching, or just
+        // [input_element_cnt] if the model configuration does not
+        // support batching.
+        std::vector<int64_t> shape;
+        if (supports_batching) {
+          shape.push_back(1);
+        }
+        shape.push_back(input_element_cnt);
+
+        TRITONBACKEND_Output* output;
+        GUARDED_RESPOND_IF_ERROR(
+            responses, r,
+            TRITONBACKEND_ResponseOutput(
+                response, &output, "OUTPUT", input_datatype, input_shape,
+                input_dims_count));
+        if (responses[r] == nullptr) {
+          LOG_MESSAGE(
+              TRITONSERVER_LOG_ERROR,
+              (std::string("request ") + std::to_string(r) +
+               ": failed to create response output, error response sent")
+                  .c_str());
+          continue;
+        }
+
+        // Step 2. Get the output buffer. We request a buffer in CPU
+        // memory but we have to handle any returned type. If we get
+        // back a buffer in GPU memory we just fail the request.
+        void* output_buffer;
+        TRITONSERVER_MemoryType output_memory_type = TRITONSERVER_MEMORY_CPU;
+        int64_t output_memory_type_id = 0;
+        GUARDED_RESPOND_IF_ERROR(
+            responses, r,
+            TRITONBACKEND_OutputBuffer(
+                output, &output_buffer, buffer_byte_size, &output_memory_type,
+                &output_memory_type_id));
+        if ((responses[r] == nullptr) ||
+            (output_memory_type == TRITONSERVER_MEMORY_GPU)) {
+          GUARDED_RESPOND_IF_ERROR(
+              responses, r,
+              TRITONSERVER_ErrorNew(
+                  TRITONSERVER_ERROR_UNSUPPORTED,
+                  "failed to create output buffer in CPU memory"));
+          LOG_MESSAGE(
+              TRITONSERVER_LOG_ERROR,
+              (std::string("request ") + std::to_string(r) +
+               ": failed to create output buffer in CPU memory, error "
+               "response sent")
+                  .c_str());
+          continue;
+        }
+
+        int32_t* obuffer_int = reinterpret_cast<int32_t*>(output_buffer);
+        for (int64_t i = 0; i < input_element_cnt; ++i) {
+          obuffer_int[i] = output_val;
+        }
+      }
+    }
+
+    uint64_t exec_end_ns = 0;
+    SET_TIMESTAMP(exec_end_ns);
+    max_exec_end_ns = std::max(max_exec_end_ns, exec_end_ns);
+
+    // Send all the responses that haven't already been sent because of
+    // an earlier error.
+    if (responses[r] != nullptr) {
+      LOG_IF_ERROR(
+          TRITONBACKEND_ResponseSend(
+              responses[r], TRITONSERVER_RESPONSE_COMPLETE_FINAL,
+              nullptr /* success */),
+          "failed sending response");
+    }
+
+    // Report statistics for each request.
+    LOG_IF_ERROR(
+        TRITONBACKEND_ModelInstanceReportStatistics(
+            instance_state->TritonModelInstance(), request,
+            (responses[r] != nullptr) /* success */, exec_start_ns,
+            exec_start_ns, exec_end_ns, exec_end_ns),
+        "failed reporting request statistics");
+
+    LOG_IF_ERROR(
+        TRITONBACKEND_RequestRelease(request, TRITONSERVER_REQUEST_RELEASE_ALL),
+        "failed releasing request");
+  }
+
+  // Report the entire batch statistics.
+  LOG_IF_ERROR(
+      TRITONBACKEND_ModelInstanceReportBatchStatistics(
+          instance_state->TritonModelInstance(), total_batch_size,
+          min_exec_start_ns, min_exec_start_ns, max_exec_end_ns,
+          max_exec_end_ns),
+      "failed reporting batch request statistics");
+
+  return nullptr;  // success
+}
+
+}  // extern "C"
+
+}}}  // namespace triton::backend::dyna_sequence
diff --git a/src/test/dyna_sequence/src/libtriton_dyna_sequence.ldscript b/src/test/dyna_sequence/src/libtriton_dyna_sequence.ldscript
new file mode 100644
index 0000000000..425381ce90
--- /dev/null
+++ b/src/test/dyna_sequence/src/libtriton_dyna_sequence.ldscript
@@ -0,0 +1,30 @@
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+{
+  global:
+    TRITONBACKEND_*;
+  local: *;
+};
\ No newline at end of file
diff --git a/src/test/implicit_state/CMakeLists.txt b/src/test/implicit_state/CMakeLists.txt
new file mode 100644
index 0000000000..c868c5f1b7
--- /dev/null
+++ b/src/test/implicit_state/CMakeLists.txt
@@ -0,0 +1,118 @@
+# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+cmake_minimum_required(VERSION 3.17)
+
+project(tritonimplicitsequencebackend LANGUAGES C CXX)
+
+#
+# libtriton_implicit_state.so
+# Shared library implementing the Triton Implicit Sequence Backend API
+#
+configure_file(src/libtriton_implicit_state.ldscript libtriton_implicit_state.ldscript COPYONLY)
+
+add_library(
+  triton-implicit-state-backend SHARED
+  src/implicit_state.cc
+)
+
+add_library(
+  TritonImplicitStateBackend::triton-implicit-state-backend ALIAS triton-implicit-state-backend
+)
+
+target_compile_features(triton-implicit-state-backend PRIVATE cxx_std_11)
+target_compile_options(
+  triton-implicit-state-backend PRIVATE
+  $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
+    -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror>
+)
+
+target_link_libraries(
+  triton-implicit-state-backend
+  PRIVATE
+    triton-backend-utils    # from repo-backend
+    triton-core-serverapi   # from repo-core
+    triton-core-backendapi  # from repo-core
+    triton-core-serverstub  # from repo-core
+)
+
+set_target_properties(
+  triton-implicit-state-backend PROPERTIES
+  POSITION_INDEPENDENT_CODE ON
+  OUTPUT_NAME triton_implicit_state
+  LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_implicit_state.ldscript
+  LINK_FLAGS "-Wl,--version-script libtriton_implicit_state.ldscript"
+)
+
+#
+# Install
+#
+include(GNUInstallDirs)
+set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/TritonImplicitStateBackend)
+
+install(
+  TARGETS
+    triton-implicit-state-backend
+  EXPORT
+    triton-implicit-state-backend-targets
+  LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/implicit_state
+  ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/implicit_state
+)
+
+install(
+  EXPORT
+    triton-implicit-state-backend-targets
+  FILE
+    TritonImplicitStateBackendTargets.cmake
+  NAMESPACE
+    TritonImplicitStateBackend::
+  DESTINATION
+    ${INSTALL_CONFIGDIR}
+)
+
+include(CMakePackageConfigHelpers)
+configure_package_config_file(
+  ${CMAKE_CURRENT_LIST_DIR}/cmake/TritonImplicitStateBackendConfig.cmake.in
+  ${CMAKE_CURRENT_BINARY_DIR}/TritonImplicitStateBackendConfig.cmake
+  INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
+)
+
+install(
+  FILES
+  ${CMAKE_CURRENT_BINARY_DIR}/TritonImplicitStateBackendConfig.cmake
+  DESTINATION ${INSTALL_CONFIGDIR}
+)
+
+#
+# Export from build tree
+#
+export(
+  EXPORT triton-implicit-state-backend-targets
+  FILE ${CMAKE_CURRENT_BINARY_DIR}/TritonImplicitStateBackendTargets.cmake
+  NAMESPACE TritonImplicitStateBackend::
+)
+
+export(PACKAGE TritonImplicitStateBackend)
diff --git a/src/test/implicit_state/cmake/TritonImplicitStateBackendConfig.cmake.in b/src/test/implicit_state/cmake/TritonImplicitStateBackendConfig.cmake.in
new file mode 100644
index 0000000000..6fcfd56fd8
--- /dev/null
+++ b/src/test/implicit_state/cmake/TritonImplicitStateBackendConfig.cmake.in
@@ -0,0 +1,39 @@
+# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include(CMakeFindDependencyMacro)
+
+get_filename_component(
+  TRITONIMPLICITSTATEBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
+)
+
+list(APPEND CMAKE_MODULE_PATH ${TRITONIMPLICITSTATEBACKEND_CMAKE_DIR})
+
+if(NOT TARGET TritonImplicitStateBackend::triton-implicit-state-backend)
+  include("${TRITONIMPLICITSEQUENCEBACKEND_CMAKE_DIR}/TritonImplicitStateBackendTargets.cmake")
+endif()
+
+set(TRITONIMPLICITSTATEBACKEND_LIBRARIES TritonImplicitStateBackend::triton-implicit-state-backend)
diff --git a/src/test/implicit_state/src/implicit_state.cc b/src/test/implicit_state/src/implicit_state.cc
new file mode 100644
index 0000000000..7def94934c
--- /dev/null
+++ b/src/test/implicit_state/src/implicit_state.cc
@@ -0,0 +1,1190 @@
+// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <algorithm>
+#include <vector>
+
+#include "triton/backend/backend_common.h"
+#include "triton/backend/backend_model.h"
+#include "triton/backend/backend_model_instance.h"
+
+namespace triton { namespace backend { namespace implicit {
+
+// Implicit state backend that is solely used with testing implicit state
+// management functionality in the backend API.
+//
+// The backend supports models that take 4 input tensors, three INT32 [ 1 ]
+// control values, one UINT64 [ 1 ] correlation ID control, one INT32 [ 1 ]
+// value input, and one INT32 [ 1 ] input indicating the test case. The input
+// tensors must be named "START", "END", "READY", "CORRID", "UPDATE", "INPUT",
+// and "TEST_CASE". The output tensor must be named "OUTPUT".
+//
+// The list of accepted values for the "TEST_CASE" field are:
+//
+//   * STATE_NEW_NON_EXISTENT = 0: This tests calling the TRITONBACKEND_StateNew
+//   for a non existent state or a model that doesn't have states section in
+//   sequence batching.
+//
+//   * STATE_UPDATE_FALSE = 1: Tests not calling the state update and expecting
+//   the implicit state to not be updated.
+//
+//   * USE_SINGLE_STATE_BUFFER = 2: For this scenario we will be using the same
+//   buffer for both input and output state. In total there will be 3 requests
+//   sent in a sequence.
+//
+//   * USE_GROWABLE_STATE_BUFFER = 3: In this test case we use growable state
+//   buffer. Currently, growable state buffer only supports CUDA memory.
+
+#define GUARDED_RESPOND_IF_ERROR(RESPONSES, IDX, REQUEST, X)            \
+  do {                                                                  \
+    if ((RESPONSES)[IDX] != nullptr) {                                  \
+      TRITONSERVER_Error* err__ = (X);                                  \
+      if (err__ != nullptr) {                                           \
+        LOG_IF_ERROR(                                                   \
+            TRITONBACKEND_ResponseSend(                                 \
+                (RESPONSES)[IDX], TRITONSERVER_RESPONSE_COMPLETE_FINAL, \
+                err__),                                                 \
+            "failed to send error response");                           \
+        LOG_IF_ERROR(                                                   \
+            TRITONBACKEND_RequestRelease(                               \
+                REQUEST, TRITONSERVER_REQUEST_RELEASE_ALL),             \
+            "failed to release the request.");                          \
+        (RESPONSES)[IDX] = nullptr;                                     \
+        TRITONSERVER_ErrorDelete(err__);                                \
+      }                                                                 \
+    }                                                                   \
+  } while (false)
+
+//
+// ModelState
+//
+// State associated with a model that is using this backend. An object
+// of this class is created and associated with each
+// TRITONBACKEND_Model.
+//
+class ModelState : public BackendModel {
+ public:
+  static TRITONSERVER_Error* Create(
+      TRITONBACKEND_Model* triton_model, ModelState** state);
+  virtual ~ModelState() = default;
+
+  // Validate that model configuration is supported by this backend.
+  TRITONSERVER_Error* ValidateModelConfig();
+
+ private:
+  ModelState(TRITONBACKEND_Model* triton_model);
+};
+
+TRITONSERVER_Error*
+ModelState::Create(TRITONBACKEND_Model* triton_model, ModelState** state)
+{
+  try {
+    *state = new ModelState(triton_model);
+  }
+  catch (const BackendModelException& ex) {
+    RETURN_ERROR_IF_TRUE(
+        ex.err_ == nullptr, TRITONSERVER_ERROR_INTERNAL,
+        std::string("unexpected nullptr in BackendModelException"));
+    RETURN_IF_ERROR(ex.err_);
+  }
+
+  return nullptr;  // success
+}
+
+ModelState::ModelState(TRITONBACKEND_Model* triton_model)
+    : BackendModel(triton_model)
+{
+}
+
+TRITONSERVER_Error*
+ModelState::ValidateModelConfig()
+{
+  // We have the json DOM for the model configuration...
+  common::TritonJson::WriteBuffer buffer;
+  RETURN_IF_ERROR(model_config_.PrettyWrite(&buffer));
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("model configuration:\n") + buffer.Contents()).c_str());
+
+  // The model configuration must specify the sequence batcher and
+  // must use the START, END, READ and CORRID input to indicate
+  // control values.
+  triton::common::TritonJson::Value sequence_batching;
+  RETURN_IF_ERROR(
+      model_config_.MemberAsObject("sequence_batching", &sequence_batching));
+  common::TritonJson::Value control_inputs;
+  RETURN_IF_ERROR(
+      sequence_batching.MemberAsArray("control_input", &control_inputs));
+  RETURN_ERROR_IF_FALSE(
+      control_inputs.ArraySize() == 3, TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("'START', 'END, and 'READY' must be configured as "
+                  "the control inputs"));
+
+  std::vector<std::string> control_input_names;
+  for (size_t io_index = 0; io_index < control_inputs.ArraySize(); io_index++) {
+    common::TritonJson::Value control_input;
+    RETURN_IF_ERROR(control_inputs.IndexAsObject(io_index, &control_input));
+    const char* input_name = nullptr;
+    size_t input_name_len;
+    RETURN_IF_ERROR(
+        control_input.MemberAsString("name", &input_name, &input_name_len));
+    control_input_names.push_back(input_name);
+  }
+
+  RETURN_ERROR_IF_FALSE(
+      ((std::find(
+            control_input_names.begin(), control_input_names.end(), "START") !=
+        control_input_names.end()) ||
+       (std::find(
+            control_input_names.begin(), control_input_names.end(), "END") !=
+        control_input_names.end()) ||
+       (std::find(
+            control_input_names.begin(), control_input_names.end(), "READY") !=
+        control_input_names.end())),
+      TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("'START', 'END, and 'READY' must be configured as "
+                  "the control inputs"));
+
+  return nullptr;  // success
+}
+
+//
+// ModelInstanceState
+//
+// State associated with a model instance. An object of this class is
+// created and associated with each TRITONBACKEND_ModelInstance.
+//
+class ModelInstanceState : public BackendModelInstance {
+ public:
+  static TRITONSERVER_Error* Create(
+      ModelState* model_state,
+      TRITONBACKEND_ModelInstance* triton_model_instance,
+      ModelInstanceState** state);
+
+  // Get the state of the model that corresponds to this instance.
+  ModelState* StateForModel() const { return model_state_; }
+  void* state_ = nullptr;
+
+  // Index of the request in the sequence
+  uint32_t request_index_ = 0;
+
+ private:
+  ModelInstanceState(
+      ModelState* model_state,
+      TRITONBACKEND_ModelInstance* triton_model_instance);
+
+  ModelState* model_state_;
+};
+
+TRITONSERVER_Error*
+ModelInstanceState::Create(
+    ModelState* model_state, TRITONBACKEND_ModelInstance* triton_model_instance,
+    ModelInstanceState** state)
+{
+  try {
+    *state = new ModelInstanceState(model_state, triton_model_instance);
+  }
+  catch (const BackendModelInstanceException& ex) {
+    RETURN_ERROR_IF_TRUE(
+        ex.err_ == nullptr, TRITONSERVER_ERROR_INTERNAL,
+        std::string("unexpected nullptr in BackendModelInstanceException"));
+    RETURN_IF_ERROR(ex.err_);
+  }
+
+  return nullptr;  // success
+}
+
+ModelInstanceState::ModelInstanceState(
+    ModelState* model_state, TRITONBACKEND_ModelInstance* triton_model_instance)
+    : BackendModelInstance(model_state, triton_model_instance),
+      model_state_(model_state)
+{
+}
+
+extern "C" {
+
+// Implementing TRITONBACKEND_Initialize is optional. The backend
+// should initialize any global state that is intended to be shared
+// across all models and model instances that use the backend.
+TRITONSERVER_Error*
+TRITONBACKEND_Initialize(TRITONBACKEND_Backend* backend)
+{
+  const char* cname;
+  RETURN_IF_ERROR(TRITONBACKEND_BackendName(backend, &cname));
+  std::string name(cname);
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("TRITONBACKEND_Initialize: ") + name).c_str());
+
+  // We should check the backend API version that Triton supports
+  // vs. what this backend was compiled against.
+  uint32_t api_version_major, api_version_minor;
+  RETURN_IF_ERROR(
+      TRITONBACKEND_ApiVersion(&api_version_major, &api_version_minor));
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("Triton TRITONBACKEND API version: ") +
+       std::to_string(api_version_major) + "." +
+       std::to_string(api_version_minor))
+          .c_str());
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("'") + name + "' TRITONBACKEND API version: " +
+       std::to_string(TRITONBACKEND_API_VERSION_MAJOR) + "." +
+       std::to_string(TRITONBACKEND_API_VERSION_MINOR))
+          .c_str());
+
+  if ((api_version_major != TRITONBACKEND_API_VERSION_MAJOR) ||
+      (api_version_minor < TRITONBACKEND_API_VERSION_MINOR)) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_UNSUPPORTED,
+        "triton backend API version does not support this backend");
+  }
+
+  // The backend configuration may contain information needed by the
+  // backend, such a command-line arguments. This backend doesn't use
+  // any such configuration but we print whatever is available.
+  TRITONSERVER_Message* backend_config_message;
+  RETURN_IF_ERROR(
+      TRITONBACKEND_BackendConfig(backend, &backend_config_message));
+
+  const char* buffer;
+  size_t byte_size;
+  RETURN_IF_ERROR(TRITONSERVER_MessageSerializeToJson(
+      backend_config_message, &buffer, &byte_size));
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("backend configuration:\n") + buffer).c_str());
+
+  return nullptr;  // success
+}
+
+// Implementing TRITONBACKEND_ModelInitialize is optional. The backend
+// should initialize any state that is intended to be shared across
+// all instances of the model.
+TRITONSERVER_Error*
+TRITONBACKEND_ModelInitialize(TRITONBACKEND_Model* model)
+{
+  const char* cname;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelName(model, &cname));
+  std::string name(cname);
+
+  uint64_t version;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelVersion(model, &version));
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("TRITONBACKEND_ModelInitialize: ") + name + " (version " +
+       std::to_string(version) + ")")
+          .c_str());
+
+  // With each model we create a ModelState object and associate it
+  // with the TRITONBACKEND_Model.
+  ModelState* model_state;
+  RETURN_IF_ERROR(ModelState::Create(model, &model_state));
+  RETURN_IF_ERROR(
+      TRITONBACKEND_ModelSetState(model, reinterpret_cast<void*>(model_state)));
+
+  // One of the primary things to do in ModelInitialize is to examine
+  // the model configuration to ensure that it is something that this
+  // backend can support. If not, returning an error from this
+  // function will prevent the model from loading.
+  RETURN_IF_ERROR(model_state->ValidateModelConfig());
+
+  return nullptr;  // success
+}
+
+// Implementing TRITONBACKEND_ModelFinalize is optional unless state
+// is set using TRITONBACKEND_ModelSetState. The backend must free
+// this state and perform any other cleanup.
+TRITONSERVER_Error*
+TRITONBACKEND_ModelFinalize(TRITONBACKEND_Model* model)
+{
+  void* vstate;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vstate));
+  ModelState* model_state = reinterpret_cast<ModelState*>(vstate);
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO, "TRITONBACKEND_ModelFinalize: delete model state");
+
+  delete model_state;
+
+  return nullptr;  // success
+}
+
+// Implementing TRITONBACKEND_ModelInstanceInitialize is optional. The
+// backend should initialize any state that is required for a model
+// instance.
+TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceInitialize(TRITONBACKEND_ModelInstance* instance)
+{
+  const char* cname;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceName(instance, &cname));
+  std::string name(cname);
+
+  int32_t device_id;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceDeviceId(instance, &device_id));
+  TRITONSERVER_InstanceGroupKind kind;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceKind(instance, &kind));
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("TRITONBACKEND_ModelInstanceInitialize: ") + name + " (" +
+       TRITONSERVER_InstanceGroupKindString(kind) + " device " +
+       std::to_string(device_id) + ")")
+          .c_str());
+
+  // The instance can access the corresponding model as well... here
+  // we get the model and from that get the model's state.
+  TRITONBACKEND_Model* model;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceModel(instance, &model));
+
+  void* vmodelstate;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vmodelstate));
+  ModelState* model_state = reinterpret_cast<ModelState*>(vmodelstate);
+
+  // With each instance we create a ModelInstanceState object and
+  // associate it with the TRITONBACKEND_ModelInstance.
+  ModelInstanceState* instance_state;
+  RETURN_IF_ERROR(
+      ModelInstanceState::Create(model_state, instance, &instance_state));
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceSetState(
+      instance, reinterpret_cast<void*>(instance_state)));
+
+  return nullptr;  // success
+}
+
+// Implementing TRITONBACKEND_ModelInstanceFinalize is optional unless
+// state is set using TRITONBACKEND_ModelInstanceSetState. The backend
+// must free this state and perform any other cleanup.
+TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceFinalize(TRITONBACKEND_ModelInstance* instance)
+{
+  void* vstate;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(instance, &vstate));
+  ModelInstanceState* instance_state =
+      reinterpret_cast<ModelInstanceState*>(vstate);
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      "TRITONBACKEND_ModelInstanceFinalize: delete instance state");
+
+  delete instance_state;
+
+  return nullptr;  // success
+}
+
+// Implementing TRITONBACKEND_ModelInstanceExecute is required.
+TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceExecute(
+    TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** requests,
+    const uint32_t request_count)
+{
+  // Triton will not call this function simultaneously for the same
+  // 'instance'. But since this backend could be used by multiple
+  // instances from multiple models the implementation needs to handle
+  // multiple calls to this function at the same time (with different
+  // 'instance' objects). Suggested practice for this is to use only
+  // function-local and model-instance-specific state (obtained from
+  // 'instance'), which is what we do here.
+  ModelInstanceState* instance_state;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(
+      instance, reinterpret_cast<void**>(&instance_state)));
+  ModelState* model_state = instance_state->StateForModel();
+
+  // This backend specifies BLOCKING execution policy. That means that
+  // we should not return from this function until execution is
+  // complete. Triton will automatically release 'instance' on return
+  // from this function so that it is again available to be used for
+  // another call to TRITONBACKEND_ModelInstanceExecute.
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("model ") + model_state->Name() + ", instance " +
+       instance_state->Name() + ", executing " + std::to_string(request_count) +
+       " requests")
+          .c_str());
+
+  bool supports_batching = false;
+  RETURN_IF_ERROR(model_state->SupportsFirstDimBatching(&supports_batching));
+
+  // 'responses' is initialized with the response objects below and
+  // if/when an error response is sent the corresponding entry in
+  // 'responses' is set to nullptr to indicate that that response has
+  // already been sent.
+  std::vector<TRITONBACKEND_Response*> responses;
+  responses.reserve(request_count);
+
+  // Create a single response object for each request. If something
+  // goes wrong when attempting to create the response objects just
+  // fail all of the requests by returning an error.
+  for (uint32_t r = 0; r < request_count; ++r) {
+    TRITONBACKEND_Request* request = requests[r];
+
+    TRITONBACKEND_Response* response;
+    RETURN_IF_ERROR(TRITONBACKEND_ResponseNew(&response, request));
+    responses.push_back(response);
+  }
+
+  // The way we collect these batch timestamps is not entirely
+  // accurate. Normally, in a performant backend you would execute all
+  // the requests at the same time, and so there would be a single
+  // compute-start / compute-end time-range. But here we execute each
+  // request separately so there is no single range. As a result we
+  // just show the entire execute time as being the compute time as
+  // well.
+  uint64_t min_exec_start_ns = std::numeric_limits<uint64_t>::max();
+  uint64_t max_exec_end_ns = 0;
+  uint64_t total_batch_size = 0;
+
+  // After this point we take ownership of 'requests', which means
+  // that a response must be sent for every request. If something does
+  // go wrong in processing a particular request then we send an error
+  // response just for the specific request.
+
+  // For simplicity we just process each request separately... in
+  // general a backend should try to operate on the entire batch of
+  // requests at the same time for improved performance.
+  std::vector<uint8_t> start_buffer, end_buffer, ready_buffer, corrid_buffer,
+      input_buffer;
+  for (uint32_t r = 0; r < request_count; ++r) {
+    uint64_t exec_start_ns = 0;
+    SET_TIMESTAMP(exec_start_ns);
+    min_exec_start_ns = std::min(min_exec_start_ns, exec_start_ns);
+
+    TRITONBACKEND_Request* request = requests[r];
+
+    const char* request_id = "";
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r, request, TRITONBACKEND_RequestId(request, &request_id));
+
+    uint32_t input_count = 0;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r, request,
+        TRITONBACKEND_RequestInputCount(request, &input_count));
+
+    uint32_t requested_output_count = 0;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r, request,
+        TRITONBACKEND_RequestOutputCount(request, &requested_output_count));
+
+    // If an error response was sent for the above then display an error
+    // message and move on to next request.
+    if (responses[r] == nullptr) {
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR,
+          (std::string("request ") + std::to_string(r) +
+           ": failed to read request input/output counts, error response "
+           "sent")
+              .c_str());
+      continue;
+    }
+
+    LOG_MESSAGE(
+        TRITONSERVER_LOG_INFO,
+        (std::string("request ") + std::to_string(r) + ": id = \"" +
+         request_id + "\", input_count = " + std::to_string(input_count) +
+         ", requested_output_count = " + std::to_string(requested_output_count))
+            .c_str());
+
+    // For statistics we need to collect the total batch size of all the
+    // requests. If the model doesn't support batching then each request is
+    // necessarily batch-size 1. If the model does support batching then the
+    // first dimension of the shape is the batch size. We only the first input
+    // for this.
+    if (supports_batching) {
+      TRITONBACKEND_Input* input = nullptr;
+      GUARDED_RESPOND_IF_ERROR(
+          responses, r, request,
+          TRITONBACKEND_RequestInputByIndex(request, 0 /* index */, &input));
+      if (responses[r] == nullptr) {
+        LOG_MESSAGE(
+            TRITONSERVER_LOG_ERROR,
+            (std::string("request ") + std::to_string(r) +
+             ": failed to read input, error response sent")
+                .c_str());
+        continue;
+      }
+
+      const int64_t* input_shape;
+      uint32_t input_dims_count;
+      GUARDED_RESPOND_IF_ERROR(
+          responses, r, request,
+          TRITONBACKEND_InputProperties(
+              input, nullptr, nullptr, &input_shape, &input_dims_count, nullptr,
+              nullptr));
+      if (responses[r] == nullptr) {
+        LOG_MESSAGE(
+            TRITONSERVER_LOG_ERROR,
+            (std::string("request ") + std::to_string(r) +
+             ": failed to read input properties, error response sent")
+                .c_str());
+        continue;
+      }
+
+      if (input_dims_count > 0) {
+        if (input_shape[0] != 1) {
+          GUARDED_RESPOND_IF_ERROR(
+              responses, r, request,
+              TRITONSERVER_ErrorNew(
+                  TRITONSERVER_ERROR_UNSUPPORTED,
+                  "unable to execute more than one timestep at a time"));
+          LOG_MESSAGE(
+              TRITONSERVER_LOG_ERROR,
+              (std::string("request ") + std::to_string(r) +
+               ": unable to execute more than one timestep at a time, error "
+               "response sent")
+                  .c_str());
+          continue;
+        }
+        total_batch_size += input_shape[0];
+      }
+    } else {
+      total_batch_size++;
+    }
+
+    LOG_MESSAGE(
+        TRITONSERVER_LOG_VERBOSE,
+        (std::string("total_batch_size: ") + std::to_string(total_batch_size))
+            .c_str());
+
+    std::set<uint64_t> seen_corrids;
+
+    // Get the input tensors.
+    TRITONBACKEND_Input* start_input = nullptr;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r, request,
+        TRITONBACKEND_RequestInput(request, "START", &start_input));
+    if (responses[r] == nullptr) {
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR,
+          (std::string("request ") + std::to_string(r) +
+           ": failed to read input 'START', error response sent")
+              .c_str());
+      continue;
+    }
+
+    TRITONBACKEND_Input* end_input = nullptr;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r, request,
+        TRITONBACKEND_RequestInput(request, "END", &end_input));
+    if (responses[r] == nullptr) {
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR,
+          (std::string("request ") + std::to_string(r) +
+           ": failed to read input 'END', error response sent")
+              .c_str());
+      continue;
+    }
+
+    TRITONBACKEND_Input* ready_input = nullptr;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r, request,
+        TRITONBACKEND_RequestInput(request, "READY", &ready_input));
+    if (responses[r] == nullptr) {
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR,
+          (std::string("request ") + std::to_string(r) +
+           ": failed to read input 'READY', error response sent")
+              .c_str());
+      continue;
+    }
+
+    const void* start_buffer = nullptr;
+    uint64_t buffer_byte_size = 0;
+    TRITONSERVER_MemoryType input_memory_type = TRITONSERVER_MEMORY_CPU;
+    int64_t input_memory_type_id = 0;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r, request,
+        TRITONBACKEND_InputBuffer(
+            start_input, 0 /* input_buffer_count */, &start_buffer,
+            &buffer_byte_size, &input_memory_type, &input_memory_type_id));
+    if (responses[r] == nullptr) {
+      GUARDED_RESPOND_IF_ERROR(
+          responses, r, request,
+          TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_UNSUPPORTED, "failed to get input buffer"));
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR, (std::string("request ") + std::to_string(r) +
+                                   ": failed to get input buffer, error "
+                                   "response sent")
+                                      .c_str());
+      continue;
+    }
+
+    const float* lstart_buffer = reinterpret_cast<const float*>(start_buffer);
+    if (*lstart_buffer == 1) {
+      instance_state->request_index_ = 0;
+      instance_state->state_ = nullptr;
+    }
+
+    const void* end_buffer = nullptr;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r, request,
+        TRITONBACKEND_InputBuffer(
+            end_input, 0 /* input_buffer_count */, &end_buffer,
+            &buffer_byte_size, &input_memory_type, &input_memory_type_id));
+    if (responses[r] == nullptr) {
+      GUARDED_RESPOND_IF_ERROR(
+          responses, r, request,
+          TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_UNSUPPORTED, "failed to get input buffer"));
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR, (std::string("request ") + std::to_string(r) +
+                                   ": failed to get input buffer, error "
+                                   "response sent")
+                                      .c_str());
+      continue;
+    }
+
+    const void* ready_buffer = nullptr;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r, request,
+        TRITONBACKEND_InputBuffer(
+            ready_input, 0 /* input_buffer_count */, &ready_buffer,
+            &buffer_byte_size, &input_memory_type, &input_memory_type_id));
+    if (responses[r] == nullptr) {
+      GUARDED_RESPOND_IF_ERROR(
+          responses, r, request,
+          TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_UNSUPPORTED, "failed to get input buffer"));
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR, (std::string("request ") + std::to_string(r) +
+                                   ": failed to get input buffer, error "
+                                   "response sent")
+                                      .c_str());
+      continue;
+    }
+
+    TRITONBACKEND_Input* input = nullptr;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r, request,
+        TRITONBACKEND_RequestInput(request, "INPUT", &input));
+    if (responses[r] == nullptr) {
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR,
+          (std::string("request ") + std::to_string(r) +
+           ": failed to read input 'INPUT', error response sent")
+              .c_str());
+      continue;
+    }
+
+    const void* input_buffer = nullptr;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r, request,
+        TRITONBACKEND_InputBuffer(
+            input, 0 /* input_buffer_count */, &input_buffer, &buffer_byte_size,
+            &input_memory_type, &input_memory_type_id));
+    if ((responses[r] == nullptr) ||
+        (input_memory_type == TRITONSERVER_MEMORY_GPU)) {
+      GUARDED_RESPOND_IF_ERROR(
+          responses, r, request,
+          TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_UNSUPPORTED,
+              "failed to get input buffer in CPU memory"));
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR,
+          (std::string("request ") + std::to_string(r) +
+           ": failed to get input buffer in CPU memory, error "
+           "response sent")
+              .c_str());
+      continue;
+    }
+
+    TRITONBACKEND_Input* test_case = nullptr;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r, request,
+        TRITONBACKEND_RequestInput(request, "TEST_CASE", &test_case));
+    if (responses[r] == nullptr) {
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR,
+          (std::string("request ") + std::to_string(r) +
+           ": failed to read input 'TEST_CASE', error response sent")
+              .c_str());
+      continue;
+    }
+
+    const void* test_case_buffer = nullptr;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r, request,
+        TRITONBACKEND_InputBuffer(
+            test_case, 0 /* test_case_buffer_count */, &test_case_buffer,
+            &buffer_byte_size, &input_memory_type, &input_memory_type_id));
+    if ((responses[r] == nullptr) ||
+        (input_memory_type == TRITONSERVER_MEMORY_GPU)) {
+      GUARDED_RESPOND_IF_ERROR(
+          responses, r, request,
+          TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_UNSUPPORTED,
+              "failed to get input buffer in CPU memory"));
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR,
+          (std::string("request ") + std::to_string(r) +
+           ": failed to get input buffer in CPU memory, error "
+           "response sent")
+              .c_str());
+      continue;
+    }
+    const int32_t test_case_buffer_int =
+        *reinterpret_cast<const int32_t*>(test_case_buffer);
+    const int32_t ipbuffer_int =
+        *reinterpret_cast<const int32_t*>(input_buffer);
+    int32_t ipbuffer_state_int = 0;
+
+    if (test_case_buffer_int != 0) {
+      TRITONBACKEND_Input* input_state = nullptr;
+      GUARDED_RESPOND_IF_ERROR(
+          responses, r, request,
+          TRITONBACKEND_RequestInput(request, "INPUT_STATE", &input_state));
+      if (responses[r] == nullptr) {
+        LOG_MESSAGE(
+            TRITONSERVER_LOG_ERROR,
+            (std::string("request ") + std::to_string(r) +
+             ": failed to read input 'INPUT_STATE', error response sent")
+                .c_str());
+        continue;
+      }
+
+      const void* input_state_buffer = nullptr;
+      GUARDED_RESPOND_IF_ERROR(
+          responses, r, request,
+          TRITONBACKEND_InputBuffer(
+              input_state, 0 /* input_buffer_count */, &input_state_buffer,
+              &buffer_byte_size, &input_memory_type, &input_memory_type_id));
+      if ((responses[r] == nullptr) ||
+          (test_case_buffer_int == 3 &&
+           input_memory_type != TRITONSERVER_MEMORY_GPU)) {
+        GUARDED_RESPOND_IF_ERROR(
+            responses, r, request,
+            TRITONSERVER_ErrorNew(
+                TRITONSERVER_ERROR_UNSUPPORTED,
+                "growable memory should always provide memory in GPU"));
+        LOG_MESSAGE(
+            TRITONSERVER_LOG_ERROR,
+            (std::string("request ") + std::to_string(r) +
+             ": failed to get input buffer in GPU memory, error "
+             "response sent")
+                .c_str());
+        continue;
+      } else if (
+          (responses[r] == nullptr) ||
+          (input_memory_type == TRITONSERVER_MEMORY_GPU &&
+           test_case_buffer_int != 3)) {
+        GUARDED_RESPOND_IF_ERROR(
+            responses, r, request,
+            TRITONSERVER_ErrorNew(
+                TRITONSERVER_ERROR_UNSUPPORTED,
+                "failed to get input buffer in CPU memory"));
+        LOG_MESSAGE(
+            TRITONSERVER_LOG_ERROR,
+            (std::string("request ") + std::to_string(r) +
+             ": failed to get input buffer in CPU memory, error "
+             "response sent")
+                .c_str());
+        continue;
+      }
+
+      // When using single state buffer, input/output tensors should point to
+      // the buffer.
+      if ((test_case_buffer_int == 2 || test_case_buffer_int == 3) &&
+          instance_state->state_ != nullptr) {
+        if (input_state_buffer != instance_state->state_) {
+          GUARDED_RESPOND_IF_ERROR(
+              responses, r, request,
+              TRITONSERVER_ErrorNew(
+                  TRITONSERVER_ERROR_UNSUPPORTED,
+                  "Input and output state are using different buffers."));
+          LOG_MESSAGE(
+              TRITONSERVER_LOG_ERROR,
+              (std::string("request ") + std::to_string(r) +
+               ": input and output state are using different buffers, error "
+               "response sent")
+                  .c_str());
+          continue;
+        }
+      }
+
+      if (test_case_buffer_int == 2 || test_case_buffer_int == 1 ||
+          test_case_buffer_int == 0) {
+        const int32_t ipbuffer_state =
+            *reinterpret_cast<const int32_t*>(input_state_buffer);
+        ipbuffer_state_int = ipbuffer_state;
+      }
+    }
+
+    switch (test_case_buffer_int) {
+      // STATE_NEW_NON_EXISTENT. The behavior for both of the test cases is
+      // the same.
+      case 0: {
+        TRITONBACKEND_State* response_state;
+        GUARDED_RESPOND_IF_ERROR(
+            responses, r, request,
+            TRITONBACKEND_StateNew(
+                &response_state, request, "undefined_state",
+                TRITONSERVER_TYPE_INT32, nullptr /* shape */,
+                0 /* dim_count */));
+        if (responses[r] == nullptr) {
+          LOG_MESSAGE(
+              TRITONSERVER_LOG_ERROR,
+              (std::string("request ") + std::to_string(r) +
+               ": failed to create the output state 'OUTPUT_STATE', error "
+               "response sent")
+                  .c_str());
+          continue;
+        }
+      } break;
+      // STATE_UPDATE_FALSE
+      case 1: {
+        TRITONBACKEND_State* response_state;
+        TRITONBACKEND_Output* response_output;
+        std::vector<int64_t> shape{1};
+        GUARDED_RESPOND_IF_ERROR(
+            responses, r, request,
+            TRITONBACKEND_StateNew(
+                &response_state, request, "OUTPUT_STATE",
+                TRITONSERVER_TYPE_INT32, shape.data() /* data */,
+                shape.size() /* dim_count */));
+
+        if (responses[r] == nullptr) {
+          LOG_MESSAGE(
+              TRITONSERVER_LOG_ERROR,
+              (std::string("request ") + std::to_string(r) +
+               ": failed to create the output state 'OUTPUT_STATE', error "
+               "response sent")
+                  .c_str());
+          continue;
+        }
+        TRITONSERVER_MemoryType actual_memory_type = TRITONSERVER_MEMORY_GPU;
+        int64_t actual_memory_type_id = 0;
+        char* buffer;
+
+        // Request an output buffer in GPU. This is only for testing purposes
+        // to make sure that GPU output buffers can be requested.
+        GUARDED_RESPOND_IF_ERROR(
+            responses, r, request,
+            TRITONBACKEND_StateBuffer(
+                response_state, reinterpret_cast<void**>(&buffer),
+                sizeof(int32_t), &actual_memory_type, &actual_memory_type_id));
+
+
+        if ((responses[r] == nullptr) ||
+            (actual_memory_type == TRITONSERVER_MEMORY_CPU)) {
+          GUARDED_RESPOND_IF_ERROR(
+              responses, r, request,
+              TRITONSERVER_ErrorNew(
+                  TRITONSERVER_ERROR_UNSUPPORTED,
+                  "failed to create the state buffer in GPU memory"));
+          LOG_MESSAGE(
+              TRITONSERVER_LOG_ERROR,
+              (std::string("request ") + std::to_string(r) +
+               ": failed to create the state buffer in GPU memory, error "
+               "response sent")
+                  .c_str());
+          continue;
+        }
+
+        actual_memory_type = TRITONSERVER_MEMORY_CPU;
+        actual_memory_type_id = 0;
+        GUARDED_RESPOND_IF_ERROR(
+            responses, r, request,
+            TRITONBACKEND_StateBuffer(
+                response_state, reinterpret_cast<void**>(&buffer),
+                sizeof(int32_t), &actual_memory_type, &actual_memory_type_id));
+
+        if ((responses[r] == nullptr) ||
+            (actual_memory_type == TRITONSERVER_MEMORY_GPU)) {
+          GUARDED_RESPOND_IF_ERROR(
+              responses, r, request,
+              TRITONSERVER_ErrorNew(
+                  TRITONSERVER_ERROR_UNSUPPORTED,
+                  "failed to create the state buffer in CPU memory"));
+          LOG_MESSAGE(
+              TRITONSERVER_LOG_ERROR,
+              (std::string("request ") + std::to_string(r) +
+               ": failed to create the state buffer in CPU memory, error "
+               "response sent")
+                  .c_str());
+          continue;
+        }
+
+        TRITONSERVER_BufferAttributes* buffer_attributes;
+        GUARDED_RESPOND_IF_ERROR(
+            responses, r, request,
+            TRITONBACKEND_StateBufferAttributes(
+                response_state, &buffer_attributes));
+
+        // Testing for the StateBuffer attributes
+        TRITONSERVER_MemoryType ba_memory_type;
+        int64_t ba_memory_type_id;
+        size_t ba_byte_size;
+
+        GUARDED_RESPOND_IF_ERROR(
+            responses, r, request,
+            TRITONSERVER_BufferAttributesMemoryType(
+                buffer_attributes, &ba_memory_type));
+
+        GUARDED_RESPOND_IF_ERROR(
+            responses, r, request,
+            TRITONSERVER_BufferAttributesMemoryTypeId(
+                buffer_attributes, &ba_memory_type_id));
+
+        GUARDED_RESPOND_IF_ERROR(
+            responses, r, request,
+            TRITONSERVER_BufferAttributesByteSize(
+                buffer_attributes, &ba_byte_size));
+
+        if (!((actual_memory_type == ba_memory_type) &&
+              (sizeof(int32_t) == ba_byte_size) &&
+              (ba_memory_type_id == actual_memory_type_id)) ||
+            responses[r] == nullptr) {
+          GUARDED_RESPOND_IF_ERROR(
+              responses, r, request,
+              TRITONSERVER_ErrorNew(
+                  TRITONSERVER_ERROR_UNSUPPORTED,
+                  "State buffer attributes are not set correctly."));
+          LOG_MESSAGE(
+              TRITONSERVER_LOG_ERROR,
+              (std::string("request ") + std::to_string(r) +
+               ": State buffer attributes are not set correctly., error "
+               "response sent")
+                  .c_str());
+          continue;
+        }
+
+        // Put the new state in the output buffer but intentionally do not
+        // call the TRITONBACKEND_StateUpdate function.
+        int32_t* lbuffer = reinterpret_cast<int32_t*>(buffer);
+        *lbuffer = ipbuffer_int + ipbuffer_state_int;
+
+        GUARDED_RESPOND_IF_ERROR(
+            responses, r, request,
+            TRITONBACKEND_ResponseOutput(
+                responses[r], &response_output, "OUTPUT",
+                TRITONSERVER_TYPE_INT32, shape.data() /* data */,
+                shape.size() /* dim_count */));
+
+        if (responses[r] == nullptr) {
+          LOG_MESSAGE(
+              TRITONSERVER_LOG_ERROR,
+              (std::string("request ") + std::to_string(r) +
+               ": failed to create the output state 'OUTPUT_STATE', error "
+               "response sent")
+                  .c_str());
+          continue;
+        }
+
+        actual_memory_type = TRITONSERVER_MEMORY_CPU;
+        actual_memory_type_id = 0;
+        GUARDED_RESPOND_IF_ERROR(
+            responses, r, request,
+            TRITONBACKEND_OutputBuffer(
+                response_output, reinterpret_cast<void**>(&buffer),
+                sizeof(int32_t), &actual_memory_type, &actual_memory_type_id));
+
+        if ((responses[r] == nullptr) ||
+            (actual_memory_type == TRITONSERVER_MEMORY_GPU)) {
+          GUARDED_RESPOND_IF_ERROR(
+              responses, r, request,
+              TRITONSERVER_ErrorNew(
+                  TRITONSERVER_ERROR_UNSUPPORTED,
+                  "failed to create the state buffer in CPU memory"));
+          LOG_MESSAGE(
+              TRITONSERVER_LOG_ERROR,
+              (std::string("request ") + std::to_string(r) +
+               ": failed to create the state buffer in CPU memory, error "
+               "response sent")
+                  .c_str());
+          continue;
+        }
+        lbuffer = reinterpret_cast<int32_t*>(buffer);
+        *lbuffer = ipbuffer_int + ipbuffer_state_int;
+      } break;
+      // USE_SINGLE_BUFFER
+      case 2: {
+        TRITONBACKEND_State* response_state;
+        std::vector<int64_t> shape{1};
+        GUARDED_RESPOND_IF_ERROR(
+            responses, r, request,
+            TRITONBACKEND_StateNew(
+                &response_state, request, "OUTPUT_STATE",
+                TRITONSERVER_TYPE_INT32, shape.data() /* data */,
+                shape.size() /* dim_count */));
+
+        if (responses[r] == nullptr) {
+          LOG_MESSAGE(
+              TRITONSERVER_LOG_ERROR,
+              (std::string("request ") + std::to_string(r) +
+               ": failed to create the output state 'OUTPUT_STATE', error "
+               "response sent")
+                  .c_str());
+          continue;
+        }
+        TRITONSERVER_MemoryType actual_memory_type = TRITONSERVER_MEMORY_CPU;
+        int64_t actual_memory_type_id = 0;
+        char* buffer;
+
+        // Request an output buffer in GPU. This is only for testing purposes
+        // to make sure that GPU output buffers can be requested.
+        GUARDED_RESPOND_IF_ERROR(
+            responses, r, request,
+            TRITONBACKEND_StateBuffer(
+                response_state, reinterpret_cast<void**>(&buffer),
+                sizeof(int32_t), &actual_memory_type, &actual_memory_type_id));
+
+        instance_state->state_ = buffer;
+      } break;
+      case 3: {
+        TRITONBACKEND_State* response_state;
+        size_t block_size = sizeof(int8_t) * 1024 * 1024;
+        int64_t current_elements =
+            (instance_state->request_index_ + 1) * 1024 * 1024;
+        std::cout << "current elements are "
+                  << (instance_state->request_index_ + 1) << std::endl;
+        std::vector<int64_t> shape{current_elements};
+        GUARDED_RESPOND_IF_ERROR(
+            responses, r, request,
+            TRITONBACKEND_StateNew(
+                &response_state, request, "OUTPUT_STATE",
+                TRITONSERVER_TYPE_INT8, shape.data() /* data */,
+                shape.size() /* dim_count */));
+
+        if (responses[r] == nullptr) {
+          LOG_MESSAGE(
+              TRITONSERVER_LOG_ERROR,
+              (std::string("request ") + std::to_string(r) +
+               ": failed to create the output state 'OUTPUT_STATE', error "
+               "response sent")
+                  .c_str());
+          continue;
+        }
+        TRITONSERVER_MemoryType actual_memory_type = TRITONSERVER_MEMORY_GPU;
+        int64_t actual_memory_type_id = 0;
+        char* buffer;
+
+        // Request an output buffer in GPU. This is only for testing purposes
+        // to make sure that GPU output buffers can be requested.
+        GUARDED_RESPOND_IF_ERROR(
+            responses, r, request,
+            TRITONBACKEND_StateBuffer(
+                response_state, reinterpret_cast<void**>(&buffer),
+                block_size * (instance_state->request_index_ + 1),
+                &actual_memory_type, &actual_memory_type_id));
+
+        // Only write the new data to the portion of the state buffer that
+        // has been grown.
+        cudaMemset(
+            buffer + block_size * (instance_state->request_index_),
+            instance_state->request_index_, block_size);
+
+        TRITONBACKEND_Output* response_output;
+        GUARDED_RESPOND_IF_ERROR(
+            responses, r, request,
+            TRITONBACKEND_ResponseOutput(
+                responses[r], &response_output, "OUTPUT_STATE",
+                TRITONSERVER_TYPE_INT8, shape.data() /* data */,
+                shape.size() /* dim_count */));
+
+        actual_memory_type = TRITONSERVER_MEMORY_CPU;
+        actual_memory_type_id = 0;
+        char* output_buffer;
+        GUARDED_RESPOND_IF_ERROR(
+            responses, r, request,
+            TRITONBACKEND_OutputBuffer(
+                response_output, reinterpret_cast<void**>(&output_buffer),
+                block_size * (instance_state->request_index_ + 1),
+                &actual_memory_type, &actual_memory_type_id));
+        if ((responses[r] == nullptr) ||
+            (actual_memory_type != TRITONSERVER_MEMORY_CPU)) {
+          GUARDED_RESPOND_IF_ERROR(
+              responses, r, request,
+              TRITONSERVER_ErrorNew(
+                  TRITONSERVER_ERROR_UNSUPPORTED,
+                  "the backend can only handle CPU tensors"));
+          LOG_MESSAGE(
+              TRITONSERVER_LOG_ERROR,
+              (std::string("request ") + std::to_string(r) +
+               "the backend can only handle CPU tensors"
+               "response sent")
+                  .c_str());
+          continue;
+        }
+        cudaMemcpy(
+            output_buffer, buffer,
+            block_size * (instance_state->request_index_ + 1),
+            cudaMemcpyDeviceToHost);
+
+        instance_state->state_ = buffer;
+      } break;
+    }
+    const float* lend_buffer = reinterpret_cast<const float*>(end_buffer);
+
+    if (*lend_buffer == 1) {
+      instance_state->request_index_ = 0;
+    } else {
+      instance_state->request_index_ += 1;
+    }
+
+    uint64_t exec_end_ns = 0;
+    SET_TIMESTAMP(exec_end_ns);
+    max_exec_end_ns = std::max(max_exec_end_ns, exec_end_ns);
+
+    // Send all the responses that haven't already been sent because of
+    // an earlier error.
+    if (responses[r] != nullptr) {
+      LOG_IF_ERROR(
+          TRITONBACKEND_ResponseSend(
+              responses[r], TRITONSERVER_RESPONSE_COMPLETE_FINAL,
+              nullptr /* success */),
+          "failed sending response");
+    }
+
+    // Report statistics for each request.
+    LOG_IF_ERROR(
+        TRITONBACKEND_ModelInstanceReportStatistics(
+            instance_state->TritonModelInstance(), request,
+            (responses[r] != nullptr) /* success */, exec_start_ns,
+            exec_start_ns, exec_end_ns, exec_end_ns),
+        "failed reporting request statistics");
+
+    LOG_IF_ERROR(
+        TRITONBACKEND_RequestRelease(request, TRITONSERVER_REQUEST_RELEASE_ALL),
+        "failed releasing request");
+  }
+
+  // Report the entire batch statistics.
+  LOG_IF_ERROR(
+      TRITONBACKEND_ModelInstanceReportBatchStatistics(
+          instance_state->TritonModelInstance(), total_batch_size,
+          min_exec_start_ns, min_exec_start_ns, max_exec_end_ns,
+          max_exec_end_ns),
+      "failed reporting batch request statistics");
+
+  return nullptr;  // success
+}
+}  // extern "C"
+}}}  // namespace triton::backend::implicit
diff --git a/src/test/implicit_state/src/libtriton_implicit_state.ldscript b/src/test/implicit_state/src/libtriton_implicit_state.ldscript
new file mode 100644
index 0000000000..871db5afd8
--- /dev/null
+++ b/src/test/implicit_state/src/libtriton_implicit_state.ldscript
@@ -0,0 +1,30 @@
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+{
+  global:
+    TRITONBACKEND_*;
+  local: *;
+};
diff --git a/src/test/iterative_sequence/CMakeLists.txt b/src/test/iterative_sequence/CMakeLists.txt
new file mode 100644
index 0000000000..9321e32049
--- /dev/null
+++ b/src/test/iterative_sequence/CMakeLists.txt
@@ -0,0 +1,118 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+cmake_minimum_required(VERSION 3.17)
+
+project(tritoniterativesequencebackend LANGUAGES C CXX)
+
+#
+# libtriton_iterative_sequence.so
+# Shared library implementing the Triton Sequence Backend API
+#
+configure_file(src/libtriton_iterative_sequence.ldscript libtriton_iterative_sequence.ldscript COPYONLY)
+
+add_library(
+  triton-iterative-sequence-backend SHARED
+  src/iterative_sequence.cc
+)
+
+add_library(
+  TritonIterativeSequenceBackend::triton-iterative-sequence-backend ALIAS triton-iterative-sequence-backend
+)
+
+target_compile_features(triton-iterative-sequence-backend PRIVATE cxx_std_11)
+target_compile_options(
+  triton-iterative-sequence-backend PRIVATE
+  $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
+    -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror>
+)
+
+target_link_libraries(
+  triton-iterative-sequence-backend
+  PRIVATE
+    triton-backend-utils    # from repo-backend
+    triton-core-serverapi   # from repo-core
+    triton-core-backendapi  # from repo-core
+    triton-core-serverstub  # from repo-core
+)
+
+set_target_properties(
+  triton-iterative-sequence-backend PROPERTIES
+  POSITION_INDEPENDENT_CODE ON
+  OUTPUT_NAME triton_iterative_sequence
+  LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_iterative_sequence.ldscript
+  LINK_FLAGS "-Wl,--version-script libtriton_iterative_sequence.ldscript"
+)
+
+#
+# Install
+#
+include(GNUInstallDirs)
+set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/TritonIterativeSequenceBackend)
+
+install(
+  TARGETS
+    triton-iterative-sequence-backend
+  EXPORT
+    triton-iterative-sequence-backend-targets
+  LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/iterative_sequence
+  ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/iterative_sequence
+)
+
+install(
+  EXPORT
+    triton-iterative-sequence-backend-targets
+  FILE
+    TritonIterativeSequenceBackendTargets.cmake
+  NAMESPACE
+    TritonIterativeSequenceBackend::
+  DESTINATION
+    ${INSTALL_CONFIGDIR}
+)
+
+include(CMakePackageConfigHelpers)
+configure_package_config_file(
+  ${CMAKE_CURRENT_LIST_DIR}/cmake/TritonIterativeSequenceBackendConfig.cmake.in
+  ${CMAKE_CURRENT_BINARY_DIR}/TritonIterativeSequenceBackendConfig.cmake
+  INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
+)
+
+install(
+  FILES
+  ${CMAKE_CURRENT_BINARY_DIR}/TritonIterativeSequenceBackendConfig.cmake
+  DESTINATION ${INSTALL_CONFIGDIR}
+)
+
+#
+# Export from build tree
+#
+export(
+  EXPORT triton-iterative-sequence-backend-targets
+  FILE ${CMAKE_CURRENT_BINARY_DIR}/TritonIterativeSequenceBackendTargets.cmake
+  NAMESPACE TritonIterativeSequenceBackend::
+)
+
+export(PACKAGE TritonIterativeSequenceBackend)
diff --git a/src/test/iterative_sequence/cmake/TritonIterativeSequenceBackendConfig.cmake.in b/src/test/iterative_sequence/cmake/TritonIterativeSequenceBackendConfig.cmake.in
new file mode 100644
index 0000000000..516fbc23b3
--- /dev/null
+++ b/src/test/iterative_sequence/cmake/TritonIterativeSequenceBackendConfig.cmake.in
@@ -0,0 +1,39 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include(CMakeFindDependencyMacro)
+
+get_filename_component(
+  TRITONSEQUENCEBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
+)
+
+list(APPEND CMAKE_MODULE_PATH ${TRITONSEQUENCEBACKEND_CMAKE_DIR})
+
+if(NOT TARGET TritonIterativeSequenceBackend::triton-sequence-backend)
+  include("${TRITONSEQUENCEBACKEND_CMAKE_DIR}/TritonIterativeSequenceBackendTargets.cmake")
+endif()
+
+set(TRITONSEQUENCEBACKEND_LIBRARIES TritonIterativeSequenceBackend::triton-sequence-backend)
diff --git a/src/test/iterative_sequence/src/iterative_sequence.cc b/src/test/iterative_sequence/src/iterative_sequence.cc
new file mode 100644
index 0000000000..4c71a2adf0
--- /dev/null
+++ b/src/test/iterative_sequence/src/iterative_sequence.cc
@@ -0,0 +1,582 @@
+// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <algorithm>
+#include <memory>
+#include <thread>
+
+#include "triton/backend/backend_common.h"
+#include "triton/backend/backend_model.h"
+#include "triton/backend/backend_model_instance.h"
+
+namespace triton { namespace backend { namespace iterative_sequence {
+
+
+// Simple iterative sequence backend that demonstrates the use of
+// TRITONSERVER_REQUEST_RELEASE_RESCHEDULE flag to iteratively produce
+// sequence response.
+//
+// The backend supports models that take 1 input tensor, an INT32 [ 1 ]
+// input named "INPUT"; and produces an output tensor "OUTPUT" with the same
+// shape as the input tensor. The input value indicates the total number of
+// responses to be generated and the output value indicates the number of
+// remaining responses. For example, if the request input has value 2,
+// the backend will:
+//   - Send a response with value 1.
+//   - Release request with RESCHEDULE flag.
+//   - When execute on the same request, send the last response with value 0.
+//   - Release request with ALL flag.
+//
+
+#define GUARDED_RESPOND_IF_ERROR(RESPONSES, IDX, X)                     \
+  do {                                                                  \
+    if ((RESPONSES)[IDX] != nullptr) {                                  \
+      TRITONSERVER_Error* err__ = (X);                                  \
+      if (err__ != nullptr) {                                           \
+        LOG_IF_ERROR(                                                   \
+            TRITONBACKEND_ResponseSend(                                 \
+                (RESPONSES)[IDX], TRITONSERVER_RESPONSE_COMPLETE_FINAL, \
+                err__),                                                 \
+            "failed to send error response");                           \
+        (RESPONSES)[IDX] = nullptr;                                     \
+        TRITONSERVER_ErrorDelete(err__);                                \
+      }                                                                 \
+    }                                                                   \
+  } while (false)
+
+//
+// ModelState
+//
+// State associated with a model that is using this backend. An object
+// of this class is created and associated with each
+// TRITONBACKEND_Model.
+//
+class ModelState : public BackendModel {
+ public:
+  static TRITONSERVER_Error* Create(
+      TRITONBACKEND_Model* triton_model, ModelState** state);
+  virtual ~ModelState() = default;
+
+ private:
+  ModelState(TRITONBACKEND_Model* triton_model);
+};
+
+TRITONSERVER_Error*
+ModelState::Create(TRITONBACKEND_Model* triton_model, ModelState** state)
+{
+  try {
+    *state = new ModelState(triton_model);
+  }
+  catch (const BackendModelException& ex) {
+    RETURN_ERROR_IF_TRUE(
+        ex.err_ == nullptr, TRITONSERVER_ERROR_INTERNAL,
+        std::string("unexpected nullptr in BackendModelException"));
+    RETURN_IF_ERROR(ex.err_);
+  }
+
+  return nullptr;  // success
+}
+
+ModelState::ModelState(TRITONBACKEND_Model* triton_model)
+    : BackendModel(triton_model)
+{
+}
+
+//
+// ModelInstanceState
+//
+// State associated with a model instance. An object of this class is
+// created and associated with each TRITONBACKEND_ModelInstance.
+//
+class ModelInstanceState : public BackendModelInstance {
+ public:
+  static TRITONSERVER_Error* Create(
+      ModelState* model_state,
+      TRITONBACKEND_ModelInstance* triton_model_instance,
+      ModelInstanceState** state);
+  virtual ~ModelInstanceState() = default;
+
+  // Get the state of the model that corresponds to this instance.
+  ModelState* StateForModel() const { return model_state_; }
+
+  // return output value on receiving request, initialize remainder
+  // if the corrid hasn't been recorded.
+  int32_t GetOutput(uint64_t corrid, int32_t init_value);
+
+ private:
+  ModelInstanceState(
+      ModelState* model_state,
+      TRITONBACKEND_ModelInstance* triton_model_instance);
+
+  ModelState* model_state_;
+
+  // A map from correlation ID to the remaining responses.
+  std::unordered_map<uint64_t, int32_t> remainders_;
+};
+
+TRITONSERVER_Error*
+ModelInstanceState::Create(
+    ModelState* model_state, TRITONBACKEND_ModelInstance* triton_model_instance,
+    ModelInstanceState** state)
+{
+  try {
+    *state = new ModelInstanceState(model_state, triton_model_instance);
+  }
+  catch (const BackendModelInstanceException& ex) {
+    RETURN_ERROR_IF_TRUE(
+        ex.err_ == nullptr, TRITONSERVER_ERROR_INTERNAL,
+        std::string("unexpected nullptr in BackendModelInstanceException"));
+    RETURN_IF_ERROR(ex.err_);
+  }
+
+  return nullptr;  // success
+}
+
+ModelInstanceState::ModelInstanceState(
+    ModelState* model_state, TRITONBACKEND_ModelInstance* triton_model_instance)
+    : BackendModelInstance(model_state, triton_model_instance),
+      model_state_(model_state)
+{
+}
+
+int32_t
+ModelInstanceState::GetOutput(uint64_t corrid, int32_t init_value)
+{
+  auto it = remainders_.find(corrid);
+  if (it == remainders_.end()) {
+    it = remainders_.emplace(corrid, init_value).first;
+  }
+  auto res = --it->second;
+  if (res <= 0) {
+    remainders_.erase(it);
+  }
+  return res;
+}
+
+/////////////
+
+extern "C" {
+
+// Implementing TRITONBACKEND_Initialize is optional. The backend
+// should initialize any global state that is intended to be shared
+// across all models and model instances that use the backend.
+TRITONSERVER_Error*
+TRITONBACKEND_Initialize(TRITONBACKEND_Backend* backend)
+{
+  const char* cname;
+  RETURN_IF_ERROR(TRITONBACKEND_BackendName(backend, &cname));
+  std::string name(cname);
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("TRITONBACKEND_Initialize: ") + name).c_str());
+
+  // We should check the backend API version that Triton supports
+  // vs. what this backend was compiled against.
+  uint32_t api_version_major, api_version_minor;
+  RETURN_IF_ERROR(
+      TRITONBACKEND_ApiVersion(&api_version_major, &api_version_minor));
+
+  if ((api_version_major != TRITONBACKEND_API_VERSION_MAJOR) ||
+      (api_version_minor < TRITONBACKEND_API_VERSION_MINOR)) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_UNSUPPORTED,
+        "triton backend API version does not support this backend");
+  }
+
+  return nullptr;  // success
+}
+
+// Implementing TRITONBACKEND_ModelInitialize is optional. The backend
+// should initialize any state that is intended to be shared across
+// all instances of the model.
+TRITONSERVER_Error*
+TRITONBACKEND_ModelInitialize(TRITONBACKEND_Model* model)
+{
+  const char* cname;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelName(model, &cname));
+  std::string name(cname);
+
+  uint64_t version;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelVersion(model, &version));
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("TRITONBACKEND_ModelInitialize: ") + name + " (version " +
+       std::to_string(version) + ")")
+          .c_str());
+
+  // With each model we create a ModelState object and associate it
+  // with the TRITONBACKEND_Model.
+  ModelState* model_state;
+  RETURN_IF_ERROR(ModelState::Create(model, &model_state));
+  RETURN_IF_ERROR(
+      TRITONBACKEND_ModelSetState(model, reinterpret_cast<void*>(model_state)));
+
+  return nullptr;  // success
+}
+
+// Implementing TRITONBACKEND_ModelFinalize is optional unless state
+// is set using TRITONBACKEND_ModelSetState. The backend must free
+// this state and perform any other cleanup.
+TRITONSERVER_Error*
+TRITONBACKEND_ModelFinalize(TRITONBACKEND_Model* model)
+{
+  void* vstate;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vstate));
+  ModelState* model_state = reinterpret_cast<ModelState*>(vstate);
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO, "TRITONBACKEND_ModelFinalize: delete model state");
+
+  delete model_state;
+
+  return nullptr;  // success
+}
+
+// Implementing TRITONBACKEND_ModelInstanceInitialize is optional. The
+// backend should initialize any state that is required for a model
+// instance.
+TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceInitialize(TRITONBACKEND_ModelInstance* instance)
+{
+  const char* cname;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceName(instance, &cname));
+  std::string name(cname);
+
+  int32_t device_id;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceDeviceId(instance, &device_id));
+  TRITONSERVER_InstanceGroupKind kind;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceKind(instance, &kind));
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("TRITONBACKEND_ModelInstanceInitialize: ") + name + " (" +
+       TRITONSERVER_InstanceGroupKindString(kind) + " device " +
+       std::to_string(device_id) + ")")
+          .c_str());
+
+  // The instance can access the corresponding model as well... here
+  // we get the model and from that get the model's state.
+  TRITONBACKEND_Model* model;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceModel(instance, &model));
+
+  void* vmodelstate;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vmodelstate));
+  ModelState* model_state = reinterpret_cast<ModelState*>(vmodelstate);
+
+  // With each instance we create a ModelInstanceState object and
+  // associate it with the TRITONBACKEND_ModelInstance.
+  ModelInstanceState* instance_state;
+  RETURN_IF_ERROR(
+      ModelInstanceState::Create(model_state, instance, &instance_state));
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceSetState(
+      instance, reinterpret_cast<void*>(instance_state)));
+
+  // Because this backend just copies IN -> OUT and requires that
+  // input and output be in CPU memory, we fail if a GPU instances is
+  // requested.
+  RETURN_ERROR_IF_FALSE(
+      instance_state->Kind() == TRITONSERVER_INSTANCEGROUPKIND_CPU,
+      TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("'iterative_sequence' backend only supports CPU instances"));
+
+  return nullptr;  // success
+}
+
+// Implementing TRITONBACKEND_ModelInstanceFinalize is optional unless
+// state is set using TRITONBACKEND_ModelInstanceSetState. The backend
+// must free this state and perform any other cleanup.
+TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceFinalize(TRITONBACKEND_ModelInstance* instance)
+{
+  void* vstate;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(instance, &vstate));
+  ModelInstanceState* instance_state =
+      reinterpret_cast<ModelInstanceState*>(vstate);
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      "TRITONBACKEND_ModelInstanceFinalize: delete instance state");
+
+  delete instance_state;
+
+  return nullptr;  // success
+}
+
+// Implementing TRITONBACKEND_ModelInstanceExecute is required.
+TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceExecute(
+    TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** requests,
+    const uint32_t request_count)
+{
+  // Triton will not call this function simultaneously for the same
+  // 'instance'. But since this backend could be used by multiple
+  // instances from multiple models the implementation needs to handle
+  // multiple calls to this function at the same time (with different
+  // 'instance' objects). Suggested practice for this is to use only
+  // function-local and model-instance-specific state (obtained from
+  // 'instance'), which is what we do here.
+  ModelInstanceState* instance_state;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(
+      instance, reinterpret_cast<void**>(&instance_state)));
+  ModelState* model_state = instance_state->StateForModel();
+
+  // This backend specifies BLOCKING execution policy. That means that
+  // we should not return from this function until execution is
+  // complete. Triton will automatically release 'instance' on return
+  // from this function so that it is again available to be used for
+  // another call to TRITONBACKEND_ModelInstanceExecute.
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("model ") + model_state->Name() + ", instance " +
+       instance_state->Name() + ", executing " + std::to_string(request_count) +
+       " requests")
+          .c_str());
+
+  bool supports_batching = false;
+  RETURN_IF_ERROR(model_state->SupportsFirstDimBatching(&supports_batching));
+
+  // 'responses' is initialized with the response objects below and
+  // if/when an error response is sent the corresponding entry in
+  // 'responses' is set to nullptr to indicate that that response has
+  // already been sent.
+  std::vector<TRITONBACKEND_Response*> responses;
+  responses.reserve(request_count);
+
+  // Create a single response object for each request. If something
+  // goes wrong when attempting to create the response objects just
+  // fail all of the requests by returning an error.
+  for (uint32_t r = 0; r < request_count; ++r) {
+    TRITONBACKEND_Request* request = requests[r];
+
+    TRITONBACKEND_Response* response;
+    RETURN_IF_ERROR(TRITONBACKEND_ResponseNew(&response, request));
+    responses.push_back(response);
+  }
+
+  // The way we collect these batch timestamps is not entirely
+  // accurate. Normally, in a performant backend you would execute all
+  // the requests at the same time, and so there would be a single
+  // compute-start / compute-end time-range. But here we execute each
+  // request separately so there is no single range. As a result we
+  // just show the entire execute time as being the compute time as
+  // well.
+  uint64_t min_exec_start_ns = std::numeric_limits<uint64_t>::max();
+  uint64_t max_exec_end_ns = 0;
+  uint64_t total_batch_size = 0;
+
+  // After this point we take ownership of 'requests', which means
+  // that a response must be sent for every request. If something does
+  // go wrong in processing a particular request then we send an error
+  // response just for the specific request.
+
+  // For simplicity we just process each request separately... in
+  // general a backend should try to operate on the entire batch of
+  // requests at the same time for improved performance.
+  std::vector<uint8_t> start_buffer, end_buffer, ready_buffer, corrid_buffer,
+      input_buffer;
+  for (uint32_t r = 0; r < request_count; ++r) {
+    ++total_batch_size;
+
+    uint64_t exec_start_ns = 0;
+    SET_TIMESTAMP(exec_start_ns);
+    min_exec_start_ns = std::min(min_exec_start_ns, exec_start_ns);
+
+    TRITONBACKEND_Request* request = requests[r];
+
+    uint64_t correlation_id = 0;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r,
+        TRITONBACKEND_RequestCorrelationId(request, &correlation_id));
+    // If an error response was sent for the above then display an error
+    // message and move on to next request.
+    if (responses[r] == nullptr) {
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR,
+          (std::string("request ") + std::to_string(r) +
+           ": failed to read request input/output counts, error response "
+           "sent")
+              .c_str());
+      continue;
+    }
+
+    TRITONBACKEND_Input* input = nullptr;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r, TRITONBACKEND_RequestInput(request, "INPUT", &input));
+    if (responses[r] == nullptr) {
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR,
+          (std::string("request ") + std::to_string(r) +
+           ": failed to read input 'INPUT', error response sent")
+              .c_str());
+      continue;
+    }
+
+    const void* input_buffer = nullptr;
+    uint64_t buffer_byte_size = 0;
+    TRITONSERVER_MemoryType input_memory_type = TRITONSERVER_MEMORY_CPU;
+    int64_t input_memory_type_id = 0;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r,
+        TRITONBACKEND_InputBuffer(
+            input, 0 /* input_buffer_count */, &input_buffer, &buffer_byte_size,
+            &input_memory_type, &input_memory_type_id));
+    if ((responses[r] == nullptr) ||
+        (input_memory_type == TRITONSERVER_MEMORY_GPU)) {
+      GUARDED_RESPOND_IF_ERROR(
+          responses, r,
+          TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_UNSUPPORTED,
+              "failed to get input buffer in CPU memory"));
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR,
+          (std::string("request ") + std::to_string(r) +
+           ": failed to get input buffer in CPU memory, error "
+           "response sent")
+              .c_str());
+      continue;
+    }
+
+    const int32_t init_value = *reinterpret_cast<const int32_t*>(input_buffer);
+    auto output_value = instance_state->GetOutput(correlation_id, init_value);
+
+    TRITONBACKEND_Response* response = responses[r];
+
+    // The output shape is [1, 1] if the model
+    // configuration supports batching, or just
+    // [1] if the model configuration does not
+    // support batching.
+    std::vector<int64_t> shape;
+    if (supports_batching) {
+      shape.push_back(1);
+    }
+    shape.push_back(1);
+
+    TRITONBACKEND_Output* output;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r,
+        TRITONBACKEND_ResponseOutput(
+            response, &output, "OUTPUT", TRITONSERVER_TYPE_INT32, shape.data(),
+            shape.size()));
+    if (responses[r] == nullptr) {
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR,
+          (std::string("request ") + std::to_string(r) +
+           ": failed to create response output, error response sent")
+              .c_str());
+      continue;
+    }
+
+    // Step 2. Get the output buffer. We request a buffer in CPU
+    // memory but we have to handle any returned type. If we get
+    // back a buffer in GPU memory we just fail the request.
+    void* output_buffer;
+    TRITONSERVER_MemoryType output_memory_type = TRITONSERVER_MEMORY_CPU;
+    int64_t output_memory_type_id = 0;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r,
+        TRITONBACKEND_OutputBuffer(
+            output, &output_buffer, sizeof(int32_t), &output_memory_type,
+            &output_memory_type_id));
+    if ((responses[r] == nullptr) ||
+        (output_memory_type == TRITONSERVER_MEMORY_GPU)) {
+      GUARDED_RESPOND_IF_ERROR(
+          responses, r,
+          TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_UNSUPPORTED,
+              "failed to create output buffer in CPU memory"));
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR,
+          (std::string("request ") + std::to_string(r) +
+           ": failed to create output buffer in CPU memory, error "
+           "response sent")
+              .c_str());
+      continue;
+    }
+
+    reinterpret_cast<int32_t*>(output_buffer)[0] = output_value;
+
+    // Set response flag and request flag correctly based on whether this
+    // is the last response of the sequence.
+    uint32_t res_flag =
+        (output_value <= 0) ? TRITONSERVER_RESPONSE_COMPLETE_FINAL : 0;
+    uint32_t req_flag = (output_value <= 0)
+                            ? TRITONSERVER_REQUEST_RELEASE_ALL
+                            : TRITONSERVER_REQUEST_RELEASE_RESCHEDULE;
+
+    uint64_t exec_end_ns = 0;
+    SET_TIMESTAMP(exec_end_ns);
+    max_exec_end_ns = std::max(max_exec_end_ns, exec_end_ns);
+
+    // wait for 0.5 second before rescheduling the request.
+    std::this_thread::sleep_for(std::chrono::milliseconds(500));
+    // Release the request first as the testing backend may be configured to
+    // receive error on request release, in such a case, the error will be
+    // propagated back through error response.
+    auto err = TRITONBACKEND_RequestRelease(request, req_flag);
+    if (err) {
+      // Release request with ALL flag
+      LOG_IF_ERROR(
+          TRITONBACKEND_RequestRelease(
+              request, TRITONSERVER_REQUEST_RELEASE_ALL),
+          "failed releasing request");
+      res_flag = TRITONSERVER_RESPONSE_COMPLETE_FINAL;
+    }
+
+    // Send all the responses that haven't already been sent because of
+    // an earlier error.
+    if (responses[r] != nullptr) {
+      LOG_IF_ERROR(
+          TRITONBACKEND_ResponseSend(responses[r], res_flag, err),
+          "failed sending response");
+    }
+    TRITONSERVER_ErrorDelete(err);
+
+    // Report statistics for each request.
+    LOG_IF_ERROR(
+        TRITONBACKEND_ModelInstanceReportStatistics(
+            instance_state->TritonModelInstance(), request,
+            (responses[r] != nullptr) /* success */, exec_start_ns,
+            exec_start_ns, exec_end_ns, exec_end_ns),
+        "failed reporting request statistics");
+  }
+
+  // Report the entire batch statistics.
+  LOG_IF_ERROR(
+      TRITONBACKEND_ModelInstanceReportBatchStatistics(
+          instance_state->TritonModelInstance(), total_batch_size,
+          min_exec_start_ns, min_exec_start_ns, max_exec_end_ns,
+          max_exec_end_ns),
+      "failed reporting batch request statistics");
+
+  return nullptr;  // success
+}
+
+}  // extern "C"
+
+}}}  // namespace triton::backend::iterative_sequence
diff --git a/src/test/iterative_sequence/src/libtriton_iterative_sequence.ldscript b/src/test/iterative_sequence/src/libtriton_iterative_sequence.ldscript
new file mode 100644
index 0000000000..00ee877745
--- /dev/null
+++ b/src/test/iterative_sequence/src/libtriton_iterative_sequence.ldscript
@@ -0,0 +1,30 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+{
+  global:
+    TRITONBACKEND_*;
+  local: *;
+};
diff --git a/src/test/model_config_test_base.cc b/src/test/model_config_test_base.cc
deleted file mode 100644
index 4633e582d9..0000000000
--- a/src/test/model_config_test_base.cc
+++ /dev/null
@@ -1,144 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "src/test/model_config_test_base.h"
-
-#include <stdlib.h>
-#include <fstream>
-#include <memory>
-#include "src/core/constants.h"
-#include "src/core/logging.h"
-#include "src/core/utils.h"
-
-namespace nvidia { namespace inferenceserver { namespace test {
-
-bool
-ModelConfigTestBase::ValidateInit(
-  const std::string& model_path, bool autofill, BundleInitFunc init_func,
-  std::string* result)
-{
-  result->clear();
-
-  ModelConfig config;
-  tensorflow::Status status =
-    GetNormalizedModelConfig(model_path, autofill, &config);
-  if (!status.ok()) {
-    result->append(status.ToString());
-    return false;
-  }
-
-  status = ValidateModelConfig(config, std::string());
-  if (!status.ok()) {
-    result->append(status.ToString());
-    return false;
-  }
-
-  // ModelConfig unit tests assume model version "1"
-  const std::string version_path = tensorflow::io::JoinPath(model_path, "1");
-
-  status = init_func(version_path, config);
-  if (!status.ok()) {
-    result->append(status.ToString());
-    return false;
-  }
-
-  *result = config.DebugString();
-  return true;
-}
-
-void
-ModelConfigTestBase::ValidateAll(
-  const std::string& platform, BundleInitFunc init_func)
-{
-  // Sanity tests without autofill and forcing the platform.
-  ValidateOne(
-    "inference_server/src/test/testdata/model_config_sanity",
-    false /* autofill */, platform, init_func);
-
-  // Sanity tests with autofill and no platform.
-  ValidateOne(
-    "inference_server/src/test/testdata/autofill_sanity", true /* autofill */,
-    std::string() /* platform */, init_func);
-}
-
-void
-ModelConfigTestBase::ValidateOne(
-  const std::string& test_repository_rpath, bool autofill,
-  const std::string& platform, BundleInitFunc init_func)
-{
-  const std::string model_base_path =
-    tensorflow::io::JoinPath(getenv("TEST_SRCDIR"), test_repository_rpath);
-
-  std::vector<std::string> models;
-  TF_CHECK_OK(
-    tensorflow::Env::Default()->GetChildren(model_base_path, &models));
-
-  for (const auto& model_name : models) {
-    const auto model_path =
-      tensorflow::io::JoinPath(model_base_path, model_name);
-    const auto expected_path = tensorflow::io::JoinPath(model_path, "expected");
-
-    // If a platform is specified and there is a configuration file
-    // then must change the configuration to use that platform. We
-    // modify the config file in place... not ideal but for how our CI
-    // testing is done it is not a problem.
-    if (!platform.empty()) {
-      const auto config_path =
-        tensorflow::io::JoinPath(model_path, kModelConfigPbTxt);
-      if (tensorflow::Env::Default()->FileExists(config_path).ok()) {
-        ModelConfig config;
-        TF_CHECK_OK(
-          ReadTextProto(tensorflow::Env::Default(), config_path, &config));
-        config.set_platform(platform);
-        TF_CHECK_OK(
-          WriteTextProto(tensorflow::Env::Default(), config_path, config));
-      }
-    }
-
-    LOG_INFO << "Testing " << model_name;
-    std::string actual, truncated_actual;
-    ValidateInit(model_path, autofill, init_func, &actual);
-
-    std::ifstream expected_file(expected_path);
-    std::string expected(
-      (std::istreambuf_iterator<char>(expected_file)),
-      (std::istreambuf_iterator<char>()));
-
-    if (expected.size() < actual.size()) {
-      truncated_actual = actual.substr(0, expected.size());
-    } else {
-      truncated_actual = actual;
-    }
-
-    EXPECT_TRUE(expected == truncated_actual);
-    if (expected != truncated_actual) {
-      LOG_ERROR << "Expected:" << std::endl << expected;
-      LOG_ERROR << "Actual:" << std::endl << actual;
-    }
-  }
-}
-
-}}}  // namespace nvidia::inferenceserver::test
diff --git a/src/test/model_config_test_base.h b/src/test/model_config_test_base.h
deleted file mode 100644
index 628f7093a5..0000000000
--- a/src/test/model_config_test_base.h
+++ /dev/null
@@ -1,50 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-#pragma once
-
-#include <gtest/gtest.h>
-#include "src/core/model_config.pb.h"
-#include "tensorflow/core/lib/io/path.h"
-#include "tensorflow/core/platform/env.h"
-
-namespace nvidia { namespace inferenceserver { namespace test {
-
-class ModelConfigTestBase : public ::testing::Test {
- public:
-  using BundleInitFunc =
-    std::function<tensorflow::Status(const std::string&, const ModelConfig&)>;
-
-  bool ValidateInit(
-    const std::string& path, bool autofill, BundleInitFunc init_func,
-    std::string* result);
-  void ValidateAll(const std::string& platform, BundleInitFunc init_func);
-  void ValidateOne(
-    const std::string& test_repository_rpath, bool autofill,
-    const std::string& platform, BundleInitFunc init_func);
-};
-
-}}}  // namespace nvidia::inferenceserver::test
diff --git a/src/test/models/identity_fp32/config.pbtxt b/src/test/models/identity_fp32/config.pbtxt
new file mode 100644
index 0000000000..3153b5ae60
--- /dev/null
+++ b/src/test/models/identity_fp32/config.pbtxt
@@ -0,0 +1,49 @@
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "identity_fp32"
+backend: "identity"
+max_batch_size: 1
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+instance_group [
+  {
+    count: 1
+    kind : KIND_CPU
+  }
+]
diff --git a/src/test/models/repeat_int32/config.pbtxt b/src/test/models/repeat_int32/config.pbtxt
new file mode 100644
index 0000000000..ea8955412b
--- /dev/null
+++ b/src/test/models/repeat_int32/config.pbtxt
@@ -0,0 +1,61 @@
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "repeat_int32"
+backend: "repeat"
+max_batch_size: 0
+model_transaction_policy {
+  decoupled: True
+}
+input [
+  {
+    name: "IN"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  },
+  {
+    name: "DELAY"
+    data_type: TYPE_UINT32
+    dims: [ -1 ]
+  },
+  {
+    name: "WAIT"
+    data_type: TYPE_UINT32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  },
+  {
+    name: "IDX"
+    data_type: TYPE_UINT32
+    dims: [ 1 ]
+  }
+]
diff --git a/src/test/models/square_int32/config.pbtxt b/src/test/models/square_int32/config.pbtxt
new file mode 100644
index 0000000000..77da8de19f
--- /dev/null
+++ b/src/test/models/square_int32/config.pbtxt
@@ -0,0 +1,46 @@
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "square_int32"
+backend: "square"
+max_batch_size: 0
+model_transaction_policy {
+  decoupled: True
+}
+input [
+  {
+    name: "IN"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "OUT"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+  }
+]
diff --git a/src/test/query_backend/CMakeLists.txt b/src/test/query_backend/CMakeLists.txt
new file mode 100644
index 0000000000..809499cd4b
--- /dev/null
+++ b/src/test/query_backend/CMakeLists.txt
@@ -0,0 +1,118 @@
+# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+cmake_minimum_required(VERSION 3.17)
+
+project(tritonquerybackend LANGUAGES C CXX)
+
+#
+# libtriton_query.so
+# Shared library implementing the Triton Query Backend API
+#
+configure_file(src/libtriton_query.ldscript libtriton_query.ldscript COPYONLY)
+
+add_library(
+  triton-query-backend SHARED
+  src/query.cc
+)
+
+add_library(
+  TritonQueryBackend::triton-query-backend ALIAS triton-query-backend
+)
+
+target_compile_features(triton-query-backend PRIVATE cxx_std_11)
+target_compile_options(
+  triton-query-backend PRIVATE
+  $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
+    -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror>
+)
+
+target_link_libraries(
+  triton-query-backend
+  PRIVATE
+    triton-backend-utils    # from repo-backend
+    triton-core-serverapi   # from repo-core
+    triton-core-backendapi  # from repo-core
+    triton-core-serverstub  # from repo-core
+)
+
+set_target_properties(
+  triton-query-backend PROPERTIES
+  POSITION_INDEPENDENT_CODE ON
+  OUTPUT_NAME triton_query
+  LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_query.ldscript
+  LINK_FLAGS "-Wl,--version-script libtriton_query.ldscript"
+)
+
+#
+# Install
+#
+include(GNUInstallDirs)
+set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/TritonQueryBackend)
+
+install(
+  TARGETS
+    triton-query-backend
+  EXPORT
+    triton-query-backend-targets
+  LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/query
+  ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/query
+)
+
+install(
+  EXPORT
+    triton-query-backend-targets
+  FILE
+    TritonQueryBackendTargets.cmake
+  NAMESPACE
+    TritonQueryBackend::
+  DESTINATION
+    ${INSTALL_CONFIGDIR}
+)
+
+include(CMakePackageConfigHelpers)
+configure_package_config_file(
+  ${CMAKE_CURRENT_LIST_DIR}/cmake/TritonQueryBackendConfig.cmake.in
+  ${CMAKE_CURRENT_BINARY_DIR}/TritonQueryBackendConfig.cmake
+  INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
+)
+
+install(
+  FILES
+  ${CMAKE_CURRENT_BINARY_DIR}/TritonQueryBackendConfig.cmake
+  DESTINATION ${INSTALL_CONFIGDIR}
+)
+
+#
+# Export from build tree
+#
+export(
+  EXPORT triton-query-backend-targets
+  FILE ${CMAKE_CURRENT_BINARY_DIR}/TritonQueryBackendTargets.cmake
+  NAMESPACE TritonQueryBackend::
+)
+
+export(PACKAGE TritonQueryBackend)
diff --git a/src/test/query_backend/cmake/TritonQueryBackendConfig.cmake.in b/src/test/query_backend/cmake/TritonQueryBackendConfig.cmake.in
new file mode 100644
index 0000000000..aa5e4afe76
--- /dev/null
+++ b/src/test/query_backend/cmake/TritonQueryBackendConfig.cmake.in
@@ -0,0 +1,39 @@
+# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include(CMakeFindDependencyMacro)
+
+get_filename_component(
+  TRITONQUERYBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
+)
+
+list(APPEND CMAKE_MODULE_PATH ${TRITONQUERYBACKEND_CMAKE_DIR})
+
+if(NOT TARGET TritonQueryBackend::triton-distributed-addsub-backend)
+  include("${TRITONQUERYBACKEND_CMAKE_DIR}/TritonQueryBackendTargets.cmake")
+endif()
+
+set(TRITONQUERYBACKEND_LIBRARIES TritonQueryBackend::triton-query-backend)
diff --git a/src/test/query_backend/src/libtriton_query.ldscript b/src/test/query_backend/src/libtriton_query.ldscript
new file mode 100644
index 0000000000..b7ebd4ce55
--- /dev/null
+++ b/src/test/query_backend/src/libtriton_query.ldscript
@@ -0,0 +1,30 @@
+# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+{
+  global:
+    TRITONBACKEND_*;
+  local: *;
+};
diff --git a/src/test/query_backend/src/query.cc b/src/test/query_backend/src/query.cc
new file mode 100644
index 0000000000..8cc2fd4a06
--- /dev/null
+++ b/src/test/query_backend/src/query.cc
@@ -0,0 +1,184 @@
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <vector>
+
+#include "triton/backend/backend_common.h"
+#include "triton/backend/backend_model.h"
+#include "triton/backend/backend_model_instance.h"
+
+namespace triton { namespace backend { namespace query {
+
+
+// Query backend that is solely used with unit-testing query functionality
+// in both the server API and backend API.
+//
+// The backend will call the backend query API in setting below:
+// name, byte_size, memory_type, memory_type_id (refer backend API for detail)
+// "OUTPUT0", nullptr, CPU_PINNED, 1
+// "OUTPUT1", nullptr, CPU_PINNED, 1
+// Then it will call the alloc function (TRITONBACKEND_OutputBuffer) with
+// the returned value accordingly. If 'byte_size' is nullptr, it creates the
+// outputs with UINT8 type and shape [2].
+// The backend will read environment variables for different query behavior
+// 'TEST_ANONYMOUS': the backend will call the query API only once with 'name'
+//                   set to nullptr
+// 'TEST_BYTE_SIZE': the backend will call the query API once with 'byte_size'
+//                   set to the variable value, and the outputs will be created
+//                   with UINT8 and shape [byte_size]. If 'TEST_ANONYMOUS' is
+//                   also specified, the outputs will have shape [byte_size / 2]
+// 'TEST_FAIL_WITH_QUERY_RESULT' : the query results will be formatted to string
+//                                 and returned as error message.
+
+#define RESPOND_IF_ERROR(RESPONSE, X)                                   \
+  do {                                                                  \
+    if (RESPONSE != nullptr) {                                          \
+      TRITONSERVER_Error* err__ = (X);                                  \
+      if (err__ != nullptr) {                                           \
+        LOG_IF_ERROR(                                                   \
+            TRITONBACKEND_ResponseSend(                                 \
+                RESPONSE, TRITONSERVER_RESPONSE_COMPLETE_FINAL, err__), \
+            "failed to send error response");                           \
+        TRITONSERVER_ErrorDelete(err__);                                \
+      }                                                                 \
+    }                                                                   \
+  } while (false)
+
+extern "C" {
+
+// Implementing TRITONBACKEND_ModelInstanceExecute is required.
+TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceExecute(
+    TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** requests,
+    const uint32_t request_count)
+{
+  // Read environment variables
+  const char* anonymous_str = getenv("TEST_ANONYMOUS");
+  const char* byte_size_str = getenv("TEST_BYTE_SIZE");
+  const char* fail_str = getenv("TEST_FAIL_WITH_QUERY_RESULT");
+  bool anonymous = (anonymous_str != nullptr);
+  size_t byte_size = 2;
+  size_t query_byte_size = byte_size;
+  size_t* byte_size_ptr = nullptr;
+  if (byte_size_str != nullptr) {
+    byte_size = atoi(byte_size_str);
+    query_byte_size = byte_size;
+    if (anonymous) {
+      byte_size /= 2;
+    }
+    byte_size_ptr = &query_byte_size;
+  }
+
+  for (uint32_t r = 0; r < request_count; ++r) {
+    std::string log_message;
+
+    TRITONBACKEND_Request* request = requests[r];
+    TRITONBACKEND_Response* response = nullptr;
+
+    // Query before creating output
+    std::vector<const char*> names;
+    if (anonymous) {
+      names.emplace_back(nullptr);
+    } else {
+      names = {"OUTPUT0", "OUTPUT1"};
+    }
+    std::vector<TRITONSERVER_MemoryType> types{
+        TRITONSERVER_MEMORY_CPU_PINNED, TRITONSERVER_MEMORY_CPU_PINNED};
+    std::vector<int64_t> type_ids{1, 1};
+    for (size_t i = 0; i < names.size(); ++i) {
+      auto err = TRITONBACKEND_RequestOutputBufferProperties(
+          request, names[i], byte_size_ptr, &types[i], &type_ids[i]);
+      if (err != nullptr) {
+        RETURN_IF_ERROR(TRITONBACKEND_ResponseNew(&response, request));
+        RESPOND_IF_ERROR(response, err);
+        break;
+      }
+      if (fail_str != nullptr) {
+        log_message += ((names[i] == nullptr) ? "NULL" : names[i]);
+        switch (types[i]) {
+          case TRITONSERVER_MEMORY_CPU:
+            log_message += " CPU ";
+            break;
+          case TRITONSERVER_MEMORY_CPU_PINNED:
+            log_message += " CPU_PINNED ";
+            break;
+          case TRITONSERVER_MEMORY_GPU:
+            log_message += " GPU ";
+            break;
+        }
+        log_message += (std::to_string(type_ids[i]) + "; ");
+      }
+    }
+
+    // If response is not nullptr, some error is returned from query API and
+    // the response has been sent
+    if (response == nullptr) {
+      if (names.size() == 1) {
+        names = {"OUTPUT0", "OUTPUT1"};
+        types[1] = types[0];
+        type_ids[1] = type_ids[0];
+      }
+      std::vector<int64_t> shape{(int64_t)byte_size};
+
+      TRITONBACKEND_Response* response;
+      RETURN_IF_ERROR(TRITONBACKEND_ResponseNew(&response, request));
+      TRITONSERVER_Error* err = nullptr;
+      if (fail_str == nullptr) {
+        for (size_t i = 0; i < names.size(); ++i) {
+          TRITONBACKEND_Output* output;
+          err = TRITONBACKEND_ResponseOutput(
+              response, &output, names[i], TRITONSERVER_TYPE_UINT8,
+              shape.data(), 1);
+          if (err != nullptr) {
+            break;
+          }
+          void* output_buffer;
+          err = TRITONBACKEND_OutputBuffer(
+              output, &output_buffer, byte_size, &types[i], &type_ids[i]);
+          if (err != nullptr) {
+            break;
+          }
+          // Do nothing with the buffer as we don't care
+        }
+      } else {
+        // Use an uncommon error code
+        err = TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_UNKNOWN, log_message.c_str());
+      }
+
+      TRITONBACKEND_ResponseSend(
+          response, TRITONSERVER_RESPONSE_COMPLETE_FINAL, err);
+    }
+
+    TRITONBACKEND_RequestRelease(request, TRITONSERVER_REQUEST_RELEASE_ALL);
+  }
+
+  return nullptr;  // success
+}
+
+}  // extern "C"
+
+}}}  // namespace triton::backend::query
diff --git a/src/test/repoagent/relocation_repoagent/CMakeLists.txt b/src/test/repoagent/relocation_repoagent/CMakeLists.txt
new file mode 100644
index 0000000000..8c2e27be36
--- /dev/null
+++ b/src/test/repoagent/relocation_repoagent/CMakeLists.txt
@@ -0,0 +1,114 @@
+# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+cmake_minimum_required(VERSION 3.17)
+
+project(tritonrelocationrepoagent LANGUAGES C CXX)
+
+configure_file(
+  src/libtritonrepoagent_relocation.ldscript
+  libtritonrepoagent_relocation.ldscript COPYONLY)
+
+add_library(
+  triton-relocation-repoagent SHARED
+  src/relocation.cc
+)
+
+add_library(
+  TritonRelocationRepoAgent::triton-relocation-repoagent ALIAS triton-relocation-repoagent
+)
+
+target_compile_features(triton-relocation-repoagent PRIVATE cxx_std_11)
+target_compile_options(
+  triton-relocation-repoagent PRIVATE
+  $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
+    -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror>
+)
+
+target_link_libraries(
+  triton-relocation-repoagent
+  PRIVATE
+    triton-core-serverapi     # from repo-core
+    triton-core-repoagentapi  # from repo-core
+    triton-core-serverstub    # from repo-core
+)
+
+set_target_properties(
+  triton-relocation-repoagent PROPERTIES
+  POSITION_INDEPENDENT_CODE ON
+  OUTPUT_NAME tritonrepoagent_relocation
+  LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtritonrepoagent_relocation.ldscript
+  LINK_FLAGS "-Wl,--version-script libtritonrepoagent_relocation.ldscript"
+)
+
+#
+# Install
+#
+include(GNUInstallDirs)
+set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/TritonRelocationRepoAgent)
+
+install(
+  TARGETS
+    triton-relocation-repoagent
+  EXPORT
+    triton-relocation-repoagent-targets
+  LIBRARY DESTINATION lib
+)
+
+install(
+  EXPORT
+    triton-relocation-repoagent-targets
+  FILE
+    TritonRelocationRepoAgentTargets.cmake
+  NAMESPACE
+    TritonRelocationRepoAgent::
+  DESTINATION
+    ${INSTALL_CONFIGDIR}
+)
+
+include(CMakePackageConfigHelpers)
+configure_package_config_file(
+  ${CMAKE_CURRENT_LIST_DIR}/cmake/TritonRelocationRepoAgentConfig.cmake.in
+  ${CMAKE_CURRENT_BINARY_DIR}/TritonRelocationRepoAgentConfig.cmake
+  INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
+)
+
+install(
+  FILES
+  ${CMAKE_CURRENT_BINARY_DIR}/TritonRelocationRepoAgentConfig.cmake
+  DESTINATION ${INSTALL_CONFIGDIR}
+)
+
+#
+# Export from build tree
+#
+export(
+  EXPORT triton-relocation-repoagent-targets
+  FILE ${CMAKE_CURRENT_BINARY_DIR}/TritonRelocationRepoAgentTargets.cmake
+  NAMESPACE TritonRelocationRepoAgent::
+)
+
+export(PACKAGE TritonRelocationRepoAgent)
diff --git a/src/test/repoagent/relocation_repoagent/cmake/TritonRelocationRepoAgentConfig.cmake.in b/src/test/repoagent/relocation_repoagent/cmake/TritonRelocationRepoAgentConfig.cmake.in
new file mode 100644
index 0000000000..db4ca0c838
--- /dev/null
+++ b/src/test/repoagent/relocation_repoagent/cmake/TritonRelocationRepoAgentConfig.cmake.in
@@ -0,0 +1,39 @@
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include(CMakeFindDependencyMacro)
+
+get_filename_component(
+  TRITONRELOCATIONREPOAGENT_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
+)
+
+list(APPEND CMAKE_MODULE_PATH ${TRITONRELOCATIONREPOAGENT_CMAKE_DIR})
+
+if(NOT TARGET TritonRelocationRepoAgent::triton-relocation-repoagent)
+  include("${TRITONRELOCATIONREPOAGENT_CMAKE_DIR}/TritonRelocationRepoAgentTargets.cmake")
+endif()
+
+set(TRITONRELOCATIONREPOAGENT_LIBRARIES TritonRelocationRepoAgent::triton-relocation-repoagent)
diff --git a/src/test/repoagent/relocation_repoagent/src/libtritonrepoagent_relocation.ldscript b/src/test/repoagent/relocation_repoagent/src/libtritonrepoagent_relocation.ldscript
new file mode 100644
index 0000000000..a79b032c89
--- /dev/null
+++ b/src/test/repoagent/relocation_repoagent/src/libtritonrepoagent_relocation.ldscript
@@ -0,0 +1,30 @@
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+{
+  global:
+    TRITONREPOAGENT_*;
+  local: *;
+};
diff --git a/src/test/repoagent/relocation_repoagent/src/relocation.cc b/src/test/repoagent/relocation_repoagent/src/relocation.cc
new file mode 100644
index 0000000000..3fee0b3ca0
--- /dev/null
+++ b/src/test/repoagent/relocation_repoagent/src/relocation.cc
@@ -0,0 +1,169 @@
+// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <cctype>
+#include <cstring>
+#include <iomanip>
+#include <stdexcept>
+#include <string>
+
+#include "triton/core/tritonrepoagent.h"
+#include "triton/core/tritonserver.h"
+
+//
+// Relocation Repository Agent that is for test only.
+//
+
+namespace triton { namespace repoagent { namespace relocation {
+
+namespace {
+//
+// ErrorException
+//
+// Exception thrown if error occurs while running RelocationRepoAgent
+//
+struct ErrorException {
+  ErrorException(TRITONSERVER_Error* err) : err_(err) {}
+  TRITONSERVER_Error* err_;
+};
+
+#define THROW_IF_TRITON_ERROR(X)                                      \
+  do {                                                                \
+    TRITONSERVER_Error* tie_err__ = (X);                              \
+    if (tie_err__ != nullptr) {                                       \
+      throw triton::repoagent::relocation::ErrorException(tie_err__); \
+    }                                                                 \
+  } while (false)
+
+
+#define THROW_TRITON_ERROR(CODE, MSG)                                 \
+  do {                                                                \
+    TRITONSERVER_Error* tie_err__ = TRITONSERVER_ErrorNew(CODE, MSG); \
+    throw triton::repoagent::relocation::ErrorException(tie_err__);   \
+  } while (false)
+
+
+#define RETURN_IF_ERROR(X)               \
+  do {                                   \
+    TRITONSERVER_Error* rie_err__ = (X); \
+    if (rie_err__ != nullptr) {          \
+      return rie_err__;                  \
+    }                                    \
+  } while (false)
+
+}  // namespace
+
+/////////////
+
+extern "C" {
+
+TRITONSERVER_Error*
+TRITONREPOAGENT_ModelFinalize(
+    TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model)
+{
+  const char* location;
+  RETURN_IF_ERROR(TRITONREPOAGENT_ModelState(model, (void**)&location));
+  RETURN_IF_ERROR(
+      TRITONREPOAGENT_ModelRepositoryLocationRelease(agent, model, location));
+  return nullptr;
+}
+
+TRITONSERVER_Error*
+TRITONREPOAGENT_ModelAction(
+    TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
+    const TRITONREPOAGENT_ActionType action_type)
+{
+  // Return success (nullptr) if the agent does not handle the action
+  if (action_type != TRITONREPOAGENT_ACTION_LOAD) {
+    return nullptr;
+  }
+
+  // Check the agent parameters for the relocation configuration of the model
+  uint32_t parameter_count = 0;
+  RETURN_IF_ERROR(
+      TRITONREPOAGENT_ModelParameterCount(agent, model, &parameter_count));
+  if (parameter_count != 1) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INVALID_ARG,
+        "Relocation repoagent expects 1 parameter for relocation agent");
+  }
+  const char* key = nullptr;
+  const char* value = nullptr;
+  RETURN_IF_ERROR(
+      TRITONREPOAGENT_ModelParameter(agent, model, 0, &key, &value));
+  if (std::string(key) != "empty_config") {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INVALID_ARG,
+        "Relocation repoagent expects parameter with key 'empty_config' for "
+        "relocation agent");
+  } else if (
+      (std::string(value) != "true") && (std::string(value) != "false")) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INVALID_ARG,
+        "Relocation repoagent expects 'empty_config' parameter with value "
+        "'true' or 'false' for relocation agent");
+  }
+  TRITONSERVER_Message* model_config;
+  RETURN_IF_ERROR(TRITONREPOAGENT_ModelConfig(agent, model, 1, &model_config));
+  const char* base;
+  size_t byte_size;
+  auto err =
+      TRITONSERVER_MessageSerializeToJson(model_config, &base, &byte_size);
+  if (err == nullptr) {
+    // hack to check if proper model config is passed by knowing that only
+    // the original config will contain 'model_repository_agents' setting
+    auto pos = std::string(base, byte_size).find("model_repository_agents");
+    if ((std::string(value) == "true") && (pos != std::string::npos)) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG,
+          "Relocation repoagent expects config does not contain "
+          "'model_repository_agents' field when 'empty_config' has value "
+          "'true' for relocation agent");
+    } else if ((std::string(value) == "false") && (pos == std::string::npos)) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG,
+          "Relocation repoagent expects config contains "
+          "'model_repository_agents' field when 'empty_config' has value "
+          "'false' for relocation agent");
+    }
+  }
+  RETURN_IF_ERROR(TRITONSERVER_MessageDelete(model_config));
+  RETURN_IF_ERROR(err);
+
+  // Point to a new model repository
+  const char* location;
+  RETURN_IF_ERROR(TRITONREPOAGENT_ModelRepositoryLocationAcquire(
+      agent, model, TRITONREPOAGENT_ARTIFACT_FILESYSTEM, &location));
+  RETURN_IF_ERROR(TRITONREPOAGENT_ModelRepositoryUpdate(
+      agent, model, TRITONREPOAGENT_ARTIFACT_FILESYSTEM, location));
+  RETURN_IF_ERROR(TRITONREPOAGENT_ModelSetState(model, (void*)location));
+
+  return nullptr;  // success
+}
+
+}  // extern "C"
+
+}}}  // namespace triton::repoagent::relocation
diff --git a/src/test/sequence/CMakeLists.txt b/src/test/sequence/CMakeLists.txt
new file mode 100644
index 0000000000..0cad9a6c76
--- /dev/null
+++ b/src/test/sequence/CMakeLists.txt
@@ -0,0 +1,118 @@
+# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+cmake_minimum_required(VERSION 3.17)
+
+project(tritonsequencebackend LANGUAGES C CXX)
+
+#
+# libtriton_sequence.so
+# Shared library implementing the Triton Sequence Backend API
+#
+configure_file(src/libtriton_sequence.ldscript libtriton_sequence.ldscript COPYONLY)
+
+add_library(
+  triton-sequence-backend SHARED
+  src/sequence.cc
+)
+
+add_library(
+  TritonSequenceBackend::triton-sequence-backend ALIAS triton-sequence-backend
+)
+
+target_compile_features(triton-sequence-backend PRIVATE cxx_std_11)
+target_compile_options(
+  triton-sequence-backend PRIVATE
+  $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
+    -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror>
+)
+
+target_link_libraries(
+  triton-sequence-backend
+  PRIVATE
+    triton-backend-utils    # from repo-backend
+    triton-core-serverapi   # from repo-core
+    triton-core-backendapi  # from repo-core
+    triton-core-serverstub  # from repo-core
+)
+
+set_target_properties(
+  triton-sequence-backend PROPERTIES
+  POSITION_INDEPENDENT_CODE ON
+  OUTPUT_NAME triton_sequence
+  LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_sequence.ldscript
+  LINK_FLAGS "-Wl,--version-script libtriton_sequence.ldscript"
+)
+
+#
+# Install
+#
+include(GNUInstallDirs)
+set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/TritonSequenceBackend)
+
+install(
+  TARGETS
+    triton-sequence-backend
+  EXPORT
+    triton-sequence-backend-targets
+  LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/sequence
+  ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/sequence
+)
+
+install(
+  EXPORT
+    triton-sequence-backend-targets
+  FILE
+    TritonSequenceBackendTargets.cmake
+  NAMESPACE
+    TritonSequenceBackend::
+  DESTINATION
+    ${INSTALL_CONFIGDIR}
+)
+
+include(CMakePackageConfigHelpers)
+configure_package_config_file(
+  ${CMAKE_CURRENT_LIST_DIR}/cmake/TritonSequenceBackendConfig.cmake.in
+  ${CMAKE_CURRENT_BINARY_DIR}/TritonSequenceBackendConfig.cmake
+  INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
+)
+
+install(
+  FILES
+  ${CMAKE_CURRENT_BINARY_DIR}/TritonSequenceBackendConfig.cmake
+  DESTINATION ${INSTALL_CONFIGDIR}
+)
+
+#
+# Export from build tree
+#
+export(
+  EXPORT triton-sequence-backend-targets
+  FILE ${CMAKE_CURRENT_BINARY_DIR}/TritonSequenceBackendTargets.cmake
+  NAMESPACE TritonSequenceBackend::
+)
+
+export(PACKAGE TritonSequenceBackend)
diff --git a/src/test/sequence/cmake/TritonSequenceBackendConfig.cmake.in b/src/test/sequence/cmake/TritonSequenceBackendConfig.cmake.in
new file mode 100644
index 0000000000..6adfe0d04e
--- /dev/null
+++ b/src/test/sequence/cmake/TritonSequenceBackendConfig.cmake.in
@@ -0,0 +1,39 @@
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include(CMakeFindDependencyMacro)
+
+get_filename_component(
+  TRITONSEQUENCEBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
+)
+
+list(APPEND CMAKE_MODULE_PATH ${TRITONSEQUENCEBACKEND_CMAKE_DIR})
+
+if(NOT TARGET TritonSequenceBackend::triton-sequence-backend)
+  include("${TRITONSEQUENCEBACKEND_CMAKE_DIR}/TritonSequenceBackendTargets.cmake")
+endif()
+
+set(TRITONSEQUENCEBACKEND_LIBRARIES TritonSequenceBackend::triton-sequence-backend)
diff --git a/src/test/sequence/src/libtriton_sequence.ldscript b/src/test/sequence/src/libtriton_sequence.ldscript
new file mode 100644
index 0000000000..61e9a06fbd
--- /dev/null
+++ b/src/test/sequence/src/libtriton_sequence.ldscript
@@ -0,0 +1,30 @@
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+{
+  global:
+    TRITONBACKEND_*;
+  local: *;
+};
diff --git a/src/test/sequence/src/sequence.cc b/src/test/sequence/src/sequence.cc
new file mode 100644
index 0000000000..44896d2974
--- /dev/null
+++ b/src/test/sequence/src/sequence.cc
@@ -0,0 +1,989 @@
+// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <memory>
+#include <thread>
+
+#include "triton/backend/backend_common.h"
+#include "triton/backend/backend_model.h"
+#include "triton/backend/backend_model_instance.h"
+
+namespace triton { namespace backend { namespace sequence {
+
+
+// Simple sequence backend that demonstrates the TRITONBACKEND API for a
+// blocking backend. A blocking backend completes execution of the inference
+// before returning from TRITONBACKEND_ModelInstanceExecute.
+//
+// The backend supports models that take three input tensors, two INT32 [ 1 ]
+// control values and one variable-size INT32 [ -1 ] value input; and produces
+// an output tensor with the same shape as the input tensor. The input tensors
+// must be named "START", "READY" and "INPUT". The output tensor must be named
+// "OUTPUT".
+//
+// The model maintains an INT32 accumulator which is updated based on the
+// control values in "START" and "READY":
+//
+//   READY=0, START=x: Ignore value input, do not change accumulator value.
+//
+//   READY=1, START=1: Start accumulating. Set accumulator equal to sum of input
+//   tensor.
+//
+//   READY=1, START=0: Add input tensor values to accumulator.
+//
+// When READY=1, the accumulator is returned in every element of the output.
+//
+
+#define GUARDED_RESPOND_IF_ERROR(RESPONSES, IDX, X)                     \
+  do {                                                                  \
+    if ((RESPONSES)[IDX] != nullptr) {                                  \
+      TRITONSERVER_Error* err__ = (X);                                  \
+      if (err__ != nullptr) {                                           \
+        LOG_IF_ERROR(                                                   \
+            TRITONBACKEND_ResponseSend(                                 \
+                (RESPONSES)[IDX], TRITONSERVER_RESPONSE_COMPLETE_FINAL, \
+                err__),                                                 \
+            "failed to send error response");                           \
+        (RESPONSES)[IDX] = nullptr;                                     \
+        TRITONSERVER_ErrorDelete(err__);                                \
+      }                                                                 \
+    }                                                                   \
+  } while (false)
+
+//
+// ModelState
+//
+// State associated with a model that is using this backend. An object
+// of this class is created and associated with each
+// TRITONBACKEND_Model.
+//
+class ModelState : public BackendModel {
+ public:
+  static TRITONSERVER_Error* Create(
+      TRITONBACKEND_Model* triton_model, ModelState** state);
+  virtual ~ModelState() = default;
+
+  // Get accumulator size and execution delay
+  size_t AccumulatorSize() const { return accumulator_size_; }
+  int ExecDelay() const { return execute_delay_ms_; }
+
+  // Validate that model configuration is supported by this backend.
+  TRITONSERVER_Error* ValidateModelConfig();
+
+ private:
+  ModelState(TRITONBACKEND_Model* triton_model);
+
+  // Delay to introduce into execution, in milliseconds.
+  int execute_delay_ms_;
+
+  // Accumulator size
+  size_t accumulator_size_;
+};
+
+TRITONSERVER_Error*
+ModelState::Create(TRITONBACKEND_Model* triton_model, ModelState** state)
+{
+  try {
+    *state = new ModelState(triton_model);
+  }
+  catch (const BackendModelException& ex) {
+    RETURN_ERROR_IF_TRUE(
+        ex.err_ == nullptr, TRITONSERVER_ERROR_INTERNAL,
+        std::string("unexpected nullptr in BackendModelException"));
+    RETURN_IF_ERROR(ex.err_);
+  }
+
+  return nullptr;  // success
+}
+
+ModelState::ModelState(TRITONBACKEND_Model* triton_model)
+    : BackendModel(triton_model), execute_delay_ms_(0)
+{
+}
+
+TRITONSERVER_Error*
+ModelState::ValidateModelConfig()
+{
+  // We have the json DOM for the model configuration...
+  common::TritonJson::WriteBuffer buffer;
+  RETURN_IF_ERROR(model_config_.PrettyWrite(&buffer));
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("model configuration:\n") + buffer.Contents()).c_str());
+
+  triton::common::TritonJson::Value params;
+  if (model_config_.Find("parameters", &params)) {
+    common::TritonJson::Value exec_delay;
+    if (params.Find("execute_delay_ms", &exec_delay)) {
+      std::string exec_delay_str;
+      RETURN_IF_ERROR(
+          exec_delay.MemberAsString("string_value", &exec_delay_str));
+      execute_delay_ms_ = std::stoi(exec_delay_str);
+    }
+  }
+
+  int64_t max_batch_size = 0;
+  RETURN_IF_ERROR(model_config_.MemberAsInt("max_batch_size", &max_batch_size));
+  accumulator_size_ = (size_t)(std::max((int64_t)1, max_batch_size));
+
+  // The model configuration must specify the sequence batcher and must use the
+  // START and READY input to indicate control values.
+  triton::common::TritonJson::Value sequence_batching;
+  RETURN_IF_ERROR(
+      model_config_.MemberAsObject("sequence_batching", &sequence_batching));
+  common::TritonJson::Value control_inputs;
+  RETURN_IF_ERROR(
+      sequence_batching.MemberAsArray("control_input", &control_inputs));
+  RETURN_ERROR_IF_FALSE(
+      control_inputs.ArraySize() == 2, TRITONSERVER_ERROR_INVALID_ARG,
+      std::string(
+          "'START' and 'READY' must be configured as the control inputs"));
+
+  std::vector<std::string> control_input_names;
+  for (size_t io_index = 0; io_index < control_inputs.ArraySize(); io_index++) {
+    common::TritonJson::Value control_input;
+    RETURN_IF_ERROR(control_inputs.IndexAsObject(io_index, &control_input));
+    const char* input_name;
+    size_t input_name_len;
+    RETURN_IF_ERROR(
+        control_input.MemberAsString("name", &input_name, &input_name_len));
+    control_input_names.push_back(input_name);
+  }
+
+  RETURN_ERROR_IF_FALSE(
+      ((control_input_names[0] == "START") &&
+       (control_input_names[1] == "READY")) ||
+          ((control_input_names[0] == "READY") &&
+           (control_input_names[1] == "START")),
+      TRITONSERVER_ERROR_INVALID_ARG,
+      std::string(
+          "'START' and 'READY' must be configured as the control inputs"));
+
+  common::TritonJson::Value inputs, outputs;
+  RETURN_IF_ERROR(model_config_.MemberAsArray("input", &inputs));
+  RETURN_IF_ERROR(model_config_.MemberAsArray("output", &outputs));
+
+  // There must be one INT32 input called INPUT defined in the model
+  // configuration and it must be a 1D vector (of any length).
+  RETURN_ERROR_IF_FALSE(
+      inputs.ArraySize() == 1, TRITONSERVER_ERROR_INVALID_ARG,
+      std::string(
+          "model must have input 'INPUT' with vector shape, any length"));
+
+  common::TritonJson::Value input;
+  RETURN_IF_ERROR(inputs.IndexAsObject(0 /* index */, &input));
+
+  std::vector<int64_t> input_shape;
+  RETURN_IF_ERROR(backend::ParseShape(input, "dims", &input_shape));
+
+  RETURN_ERROR_IF_FALSE(
+      input_shape.size() == 1, TRITONSERVER_ERROR_INVALID_ARG,
+      std::string(
+          "model must have one input 'INPUT' with vector shape, any length"));
+
+  std::string input_dtype;
+  RETURN_IF_ERROR(input.MemberAsString("data_type", &input_dtype));
+
+  RETURN_ERROR_IF_FALSE(
+      input_dtype == "TYPE_INT32", TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("model input must have TYPE_INT32 data-type"));
+
+  const char* input_name;
+  size_t input_name_len;
+  RETURN_IF_ERROR(input.MemberAsString("name", &input_name, &input_name_len));
+
+  RETURN_ERROR_IF_FALSE(
+      strcmp(input_name, "INPUT") == 0, TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("model input must be named 'INPUT'"));
+
+  // There must be one INT32 output with shape that matches the input. The
+  // output must be named OUTPUT.
+  RETURN_ERROR_IF_FALSE(
+      outputs.ArraySize() == 1, TRITONSERVER_ERROR_INVALID_ARG,
+      std::string(
+          "model must have one output 'OUTPUT' with vector shape, any length"));
+
+  common::TritonJson::Value output;
+  RETURN_IF_ERROR(outputs.IndexAsObject(0 /* index */, &output));
+
+  std::vector<int64_t> output_shape;
+  RETURN_IF_ERROR(backend::ParseShape(output, "dims", &output_shape));
+
+  RETURN_ERROR_IF_FALSE(
+      (output_shape.size() == 1) && (output_shape[0] == input_shape[0]),
+      TRITONSERVER_ERROR_INVALID_ARG,
+      std::string(
+          "model must have output 'OUTPUT' with shape matching 'INPUT'"));
+
+  std::string output_dtype;
+  RETURN_IF_ERROR(output.MemberAsString("data_type", &output_dtype));
+
+  RETURN_ERROR_IF_FALSE(
+      output_dtype == "TYPE_INT32", TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("model output must have TYPE_INT32 data-type"));
+
+  const char* output_name;
+  size_t output_name_len;
+  RETURN_IF_ERROR(
+      output.MemberAsString("name", &output_name, &output_name_len));
+
+  RETURN_ERROR_IF_FALSE(
+      strcmp(output_name, "OUTPUT") == 0, TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("model output must be named 'OUTPUT'"));
+
+  return nullptr;  // success
+}
+
+//
+// ModelInstanceState
+//
+// State associated with a model instance. An object of this class is
+// created and associated with each TRITONBACKEND_ModelInstance.
+//
+class ModelInstanceState : public BackendModelInstance {
+ public:
+  static TRITONSERVER_Error* Create(
+      ModelState* model_state,
+      TRITONBACKEND_ModelInstance* triton_model_instance,
+      ModelInstanceState** state);
+  virtual ~ModelInstanceState();
+
+  // Get the state of the model that corresponds to this instance.
+  ModelState* StateForModel() const { return model_state_; }
+
+  // Get accumulator for this instance
+  int32_t GetAccumulatorAt(size_t idx);
+  void SetAccumulatorAt(size_t idx, int32_t value);
+  void AddAccumulatorAt(size_t idx, int32_t value);
+
+ private:
+  ModelInstanceState(
+      ModelState* model_state,
+      TRITONBACKEND_ModelInstance* triton_model_instance);
+
+  ModelState* model_state_;
+
+  // Accumulators maintained by this instance, one for each batch slot.
+  std::vector<int32_t> accumulator_;
+};
+
+TRITONSERVER_Error*
+ModelInstanceState::Create(
+    ModelState* model_state, TRITONBACKEND_ModelInstance* triton_model_instance,
+    ModelInstanceState** state)
+{
+  try {
+    *state = new ModelInstanceState(model_state, triton_model_instance);
+  }
+  catch (const BackendModelInstanceException& ex) {
+    RETURN_ERROR_IF_TRUE(
+        ex.err_ == nullptr, TRITONSERVER_ERROR_INTERNAL,
+        std::string("unexpected nullptr in BackendModelInstanceException"));
+    RETURN_IF_ERROR(ex.err_);
+  }
+
+  return nullptr;  // success
+}
+
+ModelInstanceState::ModelInstanceState(
+    ModelState* model_state, TRITONBACKEND_ModelInstance* triton_model_instance)
+    : BackendModelInstance(model_state, triton_model_instance),
+      model_state_(model_state)
+{
+  accumulator_.resize(model_state->AccumulatorSize());
+}
+
+int32_t
+ModelInstanceState::GetAccumulatorAt(size_t idx)
+{
+  return accumulator_[idx];
+}
+
+void
+ModelInstanceState::SetAccumulatorAt(size_t idx, int32_t value)
+{
+  accumulator_[idx] = value;
+}
+
+void
+ModelInstanceState::AddAccumulatorAt(size_t idx, int32_t value)
+{
+  accumulator_[idx] += value;
+}
+
+ModelInstanceState::~ModelInstanceState()
+{
+  accumulator_.clear();
+}
+
+/////////////
+
+extern "C" {
+
+// Implementing TRITONBACKEND_Initialize is optional. The backend
+// should initialize any global state that is intended to be shared
+// across all models and model instances that use the backend.
+TRITONSERVER_Error*
+TRITONBACKEND_Initialize(TRITONBACKEND_Backend* backend)
+{
+  const char* cname;
+  RETURN_IF_ERROR(TRITONBACKEND_BackendName(backend, &cname));
+  std::string name(cname);
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("TRITONBACKEND_Initialize: ") + name).c_str());
+
+  // We should check the backend API version that Triton supports
+  // vs. what this backend was compiled against.
+  uint32_t api_version_major, api_version_minor;
+  RETURN_IF_ERROR(
+      TRITONBACKEND_ApiVersion(&api_version_major, &api_version_minor));
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("Triton TRITONBACKEND API version: ") +
+       std::to_string(api_version_major) + "." +
+       std::to_string(api_version_minor))
+          .c_str());
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("'") + name + "' TRITONBACKEND API version: " +
+       std::to_string(TRITONBACKEND_API_VERSION_MAJOR) + "." +
+       std::to_string(TRITONBACKEND_API_VERSION_MINOR))
+          .c_str());
+
+  if ((api_version_major != TRITONBACKEND_API_VERSION_MAJOR) ||
+      (api_version_minor < TRITONBACKEND_API_VERSION_MINOR)) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_UNSUPPORTED,
+        "triton backend API version does not support this backend");
+  }
+
+  // The backend configuration may contain information needed by the
+  // backend, such a command-line arguments. This backend doesn't use
+  // any such configuration but we print whatever is available.
+  TRITONSERVER_Message* backend_config_message;
+  RETURN_IF_ERROR(
+      TRITONBACKEND_BackendConfig(backend, &backend_config_message));
+
+  const char* buffer;
+  size_t byte_size;
+  RETURN_IF_ERROR(TRITONSERVER_MessageSerializeToJson(
+      backend_config_message, &buffer, &byte_size));
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("backend configuration:\n") + buffer).c_str());
+
+  return nullptr;  // success
+}
+
+// Implementing TRITONBACKEND_ModelInitialize is optional. The backend
+// should initialize any state that is intended to be shared across
+// all instances of the model.
+TRITONSERVER_Error*
+TRITONBACKEND_ModelInitialize(TRITONBACKEND_Model* model)
+{
+  const char* cname;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelName(model, &cname));
+  std::string name(cname);
+
+  uint64_t version;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelVersion(model, &version));
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("TRITONBACKEND_ModelInitialize: ") + name + " (version " +
+       std::to_string(version) + ")")
+          .c_str());
+
+  // With each model we create a ModelState object and associate it
+  // with the TRITONBACKEND_Model.
+  ModelState* model_state;
+  RETURN_IF_ERROR(ModelState::Create(model, &model_state));
+  RETURN_IF_ERROR(
+      TRITONBACKEND_ModelSetState(model, reinterpret_cast<void*>(model_state)));
+
+  // One of the primary things to do in ModelInitialize is to examine
+  // the model configuration to ensure that it is something that this
+  // backend can support. If not, returning an error from this
+  // function will prevent the model from loading.
+  RETURN_IF_ERROR(model_state->ValidateModelConfig());
+
+  return nullptr;  // success
+}
+
+// Implementing TRITONBACKEND_ModelFinalize is optional unless state
+// is set using TRITONBACKEND_ModelSetState. The backend must free
+// this state and perform any other cleanup.
+TRITONSERVER_Error*
+TRITONBACKEND_ModelFinalize(TRITONBACKEND_Model* model)
+{
+  void* vstate;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vstate));
+  ModelState* model_state = reinterpret_cast<ModelState*>(vstate);
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO, "TRITONBACKEND_ModelFinalize: delete model state");
+
+  delete model_state;
+
+  return nullptr;  // success
+}
+
+// Implementing TRITONBACKEND_ModelInstanceInitialize is optional. The
+// backend should initialize any state that is required for a model
+// instance.
+TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceInitialize(TRITONBACKEND_ModelInstance* instance)
+{
+  const char* cname;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceName(instance, &cname));
+  std::string name(cname);
+
+  int32_t device_id;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceDeviceId(instance, &device_id));
+  TRITONSERVER_InstanceGroupKind kind;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceKind(instance, &kind));
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("TRITONBACKEND_ModelInstanceInitialize: ") + name + " (" +
+       TRITONSERVER_InstanceGroupKindString(kind) + " device " +
+       std::to_string(device_id) + ")")
+          .c_str());
+
+  // The instance can access the corresponding model as well... here
+  // we get the model and from that get the model's state.
+  TRITONBACKEND_Model* model;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceModel(instance, &model));
+
+  void* vmodelstate;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vmodelstate));
+  ModelState* model_state = reinterpret_cast<ModelState*>(vmodelstate);
+
+  // With each instance we create a ModelInstanceState object and
+  // associate it with the TRITONBACKEND_ModelInstance.
+  ModelInstanceState* instance_state;
+  RETURN_IF_ERROR(
+      ModelInstanceState::Create(model_state, instance, &instance_state));
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceSetState(
+      instance, reinterpret_cast<void*>(instance_state)));
+
+  // Because this backend just copies IN -> OUT and requires that
+  // input and output be in CPU memory, we fail if a GPU instances is
+  // requested.
+  RETURN_ERROR_IF_FALSE(
+      instance_state->Kind() == TRITONSERVER_INSTANCEGROUPKIND_CPU,
+      TRITONSERVER_ERROR_INVALID_ARG,
+      std::string("'sequence' backend only supports CPU instances"));
+
+  return nullptr;  // success
+}
+
+// Implementing TRITONBACKEND_ModelInstanceFinalize is optional unless
+// state is set using TRITONBACKEND_ModelInstanceSetState. The backend
+// must free this state and perform any other cleanup.
+TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceFinalize(TRITONBACKEND_ModelInstance* instance)
+{
+  void* vstate;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(instance, &vstate));
+  ModelInstanceState* instance_state =
+      reinterpret_cast<ModelInstanceState*>(vstate);
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      "TRITONBACKEND_ModelInstanceFinalize: delete instance state");
+
+  delete instance_state;
+
+  return nullptr;  // success
+}
+
+// Implementing TRITONBACKEND_ModelInstanceExecute is required.
+TRITONSERVER_Error*
+TRITONBACKEND_ModelInstanceExecute(
+    TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** requests,
+    const uint32_t request_count)
+{
+  // Triton will not call this function simultaneously for the same
+  // 'instance'. But since this backend could be used by multiple
+  // instances from multiple models the implementation needs to handle
+  // multiple calls to this function at the same time (with different
+  // 'instance' objects). Suggested practice for this is to use only
+  // function-local and model-instance-specific state (obtained from
+  // 'instance'), which is what we do here.
+  ModelInstanceState* instance_state;
+  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(
+      instance, reinterpret_cast<void**>(&instance_state)));
+  ModelState* model_state = instance_state->StateForModel();
+
+  // This backend specifies BLOCKING execution policy. That means that
+  // we should not return from this function until execution is complete. Triton
+  // will automatically release 'instance' on return from this function so that
+  // it is again available to be used for another call to
+  // TRITONBACKEND_ModelInstanceExecute.
+
+  LOG_MESSAGE(
+      TRITONSERVER_LOG_INFO,
+      (std::string("model ") + model_state->Name() + ", instance " +
+       instance_state->Name() + ", executing " + std::to_string(request_count) +
+       " requests")
+          .c_str());
+
+  bool supports_batching = false;
+  RETURN_IF_ERROR(model_state->SupportsFirstDimBatching(&supports_batching));
+
+  // Each request represents a different sequence, which corresponds
+  // to the accumulator at the same index. Each request must have
+  // batch-size 1 inputs which is the next timestep for that sequence. The total
+  // number of requests will not exceed the max-batch-size specified in the
+  // model configuration.
+  if (request_count > model_state->AccumulatorSize()) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_UNSUPPORTED,
+        "unable to execute batch larger than max-batch-size");
+  }
+
+  // Delay if requested...
+  if (model_state->ExecDelay() > 0) {
+    std::this_thread::sleep_for(
+        std::chrono::milliseconds(model_state->ExecDelay()));
+  }
+
+  // 'responses' is initialized with the response objects below and
+  // if/when an error response is sent the corresponding entry in
+  // 'responses' is set to nullptr to indicate that that response has
+  // already been sent.
+  std::vector<TRITONBACKEND_Response*> responses;
+  responses.reserve(request_count);
+
+  // Create a single response object for each request. If something
+  // goes wrong when attempting to create the response objects just
+  // fail all of the requests by returning an error.
+  for (uint32_t r = 0; r < request_count; ++r) {
+    TRITONBACKEND_Request* request = requests[r];
+
+    TRITONBACKEND_Response* response;
+    RETURN_IF_ERROR(TRITONBACKEND_ResponseNew(&response, request));
+    responses.push_back(response);
+  }
+
+  // After this point we take ownership of 'requests', which means that a
+  // response must be sent for every request. If something does go wrong in
+  // processing a particular request then we send an error response just for the
+  // specific request.
+
+  // The way we collect these batch timestamps is not entirely accurate.
+  // Normally, in a performant backend you would execute all the requests at the
+  // same time, and so there would be a single compute-start / compute-end
+  // time-range. But here we execute each request separately so there is no
+  // single range. As a result we just show the entire execute time as being the
+  // compute time as well.
+  uint64_t min_exec_start_ns = std::numeric_limits<uint64_t>::max();
+  uint64_t max_exec_end_ns = 0;
+  uint64_t total_batch_size = 0;
+
+  // For simplicity we just process each request separately... in
+  // general a backend should try to operate on the entire batch of
+  // requests at the same time for improved performance.
+  std::vector<uint8_t> start_buffer, ready_buffer, input_buffer;
+  for (uint32_t r = 0; r < request_count; ++r) {
+    uint64_t exec_start_ns = 0;
+    SET_TIMESTAMP(exec_start_ns);
+    min_exec_start_ns = std::min(min_exec_start_ns, exec_start_ns);
+
+    TRITONBACKEND_Request* request = requests[r];
+
+    const char* request_id = "";
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r, TRITONBACKEND_RequestId(request, &request_id));
+
+    uint64_t correlation_id = 0;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r,
+        TRITONBACKEND_RequestCorrelationId(request, &correlation_id));
+
+    uint32_t input_count = 0;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r, TRITONBACKEND_RequestInputCount(request, &input_count));
+
+    uint32_t requested_output_count = 0;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r,
+        TRITONBACKEND_RequestOutputCount(request, &requested_output_count));
+
+    // If an error response was sent for the above then display an error
+    // message and move on to next request.
+    if (responses[r] == nullptr) {
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR,
+          (std::string("request ") + std::to_string(r) +
+           ": failed to read request input/output counts, error response "
+           "sent")
+              .c_str());
+      continue;
+    }
+
+    LOG_MESSAGE(
+        TRITONSERVER_LOG_INFO,
+        (std::string("request ") + std::to_string(r) + ": id = \"" +
+         request_id + "\", correlation_id = " + std::to_string(correlation_id) +
+         ", input_count = " + std::to_string(input_count) +
+         ", requested_output_count = " + std::to_string(requested_output_count))
+            .c_str());
+
+    // For statistics we need to collect the total batch size of all the
+    // requests. If the model doesn't support batching then each request is
+    // necessarily batch-size 1. If the model does support batching then the
+    // first dimension of the shape is the batch size. We only the first input
+    // for this.
+    if (supports_batching) {
+      TRITONBACKEND_Input* input = nullptr;
+      GUARDED_RESPOND_IF_ERROR(
+          responses, r,
+          TRITONBACKEND_RequestInputByIndex(request, 0 /* index */, &input));
+      if (responses[r] == nullptr) {
+        LOG_MESSAGE(
+            TRITONSERVER_LOG_ERROR,
+            (std::string("request ") + std::to_string(r) +
+             ": failed to read input, error response sent")
+                .c_str());
+        continue;
+      }
+
+      const int64_t* input_shape;
+      uint32_t input_dims_count;
+      GUARDED_RESPOND_IF_ERROR(
+          responses, r,
+          TRITONBACKEND_InputProperties(
+              input, nullptr, nullptr, &input_shape, &input_dims_count, nullptr,
+              nullptr));
+      if (responses[r] == nullptr) {
+        LOG_MESSAGE(
+            TRITONSERVER_LOG_ERROR,
+            (std::string("request ") + std::to_string(r) +
+             ": failed to read input properties, error response sent")
+                .c_str());
+        continue;
+      }
+
+      if (input_dims_count > 0) {
+        if (input_shape[0] != 1) {
+          GUARDED_RESPOND_IF_ERROR(
+              responses, r,
+              TRITONSERVER_ErrorNew(
+                  TRITONSERVER_ERROR_UNSUPPORTED,
+                  "unable to execute more than one timestep at a time"));
+          LOG_MESSAGE(
+              TRITONSERVER_LOG_ERROR,
+              (std::string("request ") + std::to_string(r) +
+               ": unable to execute more than one timestep at a time, error "
+               "response sent")
+                  .c_str());
+          continue;
+        }
+        total_batch_size += input_shape[0];
+      }
+    } else {
+      total_batch_size++;
+    }
+
+    LOG_MESSAGE(
+        TRITONSERVER_LOG_ERROR,
+        (std::string("total_batch_size: ") + std::to_string(total_batch_size))
+            .c_str());
+
+    // Get the input tensors.
+    TRITONBACKEND_Input* start_input = nullptr;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r,
+        TRITONBACKEND_RequestInput(request, "START", &start_input));
+    if (responses[r] == nullptr) {
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR,
+          (std::string("request ") + std::to_string(r) +
+           ": failed to read input 'START', error response sent")
+              .c_str());
+      continue;
+    }
+
+    TRITONBACKEND_Input* ready_input = nullptr;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r,
+        TRITONBACKEND_RequestInput(request, "READY", &ready_input));
+    if (responses[r] == nullptr) {
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR,
+          (std::string("request ") + std::to_string(r) +
+           ": failed to read input 'READY', error response sent")
+              .c_str());
+      continue;
+    }
+
+    const void* start_buffer = nullptr;
+    uint64_t buffer_byte_size = 0;
+    TRITONSERVER_MemoryType input_memory_type = TRITONSERVER_MEMORY_CPU;
+    int64_t input_memory_type_id = 0;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r,
+        TRITONBACKEND_InputBuffer(
+            start_input, 0 /* input_buffer_count */, &start_buffer,
+            &buffer_byte_size, &input_memory_type, &input_memory_type_id));
+    if (responses[r] == nullptr) {
+      GUARDED_RESPOND_IF_ERROR(
+          responses, r,
+          TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_UNSUPPORTED, "failed to get input buffer"));
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR, (std::string("request ") + std::to_string(r) +
+                                   ": failed to get input buffer, error "
+                                   "response sent")
+                                      .c_str());
+      continue;
+    }
+
+    const void* ready_buffer = nullptr;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r,
+        TRITONBACKEND_InputBuffer(
+            ready_input, 0 /* input_buffer_count */, &ready_buffer,
+            &buffer_byte_size, &input_memory_type, &input_memory_type_id));
+    if (responses[r] == nullptr) {
+      GUARDED_RESPOND_IF_ERROR(
+          responses, r,
+          TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_UNSUPPORTED, "failed to get input buffer"));
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR, (std::string("request ") + std::to_string(r) +
+                                   ": failed to get input buffer, error "
+                                   "response sent")
+                                      .c_str());
+      continue;
+    }
+
+    TRITONBACKEND_Input* input = nullptr;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r, TRITONBACKEND_RequestInput(request, "INPUT", &input));
+    if (responses[r] == nullptr) {
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR,
+          (std::string("request ") + std::to_string(r) +
+           ": failed to read input, error response sent")
+              .c_str());
+      continue;
+    }
+
+    const void* input_buffer = nullptr;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r,
+        TRITONBACKEND_InputBuffer(
+            input, 0 /* input_buffer_count */, &input_buffer, &buffer_byte_size,
+            &input_memory_type, &input_memory_type_id));
+    if (responses[r] == nullptr) {
+      GUARDED_RESPOND_IF_ERROR(
+          responses, r,
+          TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_UNSUPPORTED,
+              "failed to get input buffer in CPU memory"));
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR,
+          (std::string("request ") + std::to_string(r) +
+           ": failed to get input buffer in CPU memory, error "
+           "response sent")
+              .c_str());
+      continue;
+    }
+
+    TRITONSERVER_DataType input_datatype;
+    const int64_t* input_shape;
+    uint32_t input_dims_count;
+    uint64_t input_byte_size;
+    uint32_t input_buffer_count;
+    GUARDED_RESPOND_IF_ERROR(
+        responses, r,
+        TRITONBACKEND_InputProperties(
+            input, nullptr /* input_name */, &input_datatype, &input_shape,
+            &input_dims_count, &input_byte_size, &input_buffer_count));
+    if (responses[r] == nullptr) {
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_ERROR,
+          (std::string("request ") + std::to_string(r) +
+           ": failed to read input properties, error response sent")
+              .c_str());
+      continue;
+    }
+
+    int64_t input_element_cnt = input_byte_size / sizeof(int32_t);
+    const int32_t* start = reinterpret_cast<const int32_t*>(start_buffer);
+    const int32_t* ready = reinterpret_cast<const int32_t*>(ready_buffer);
+    const int32_t* ipbuffer_int = nullptr;
+    std::vector<int32_t> ipbuffer_vec;
+
+    if (input_memory_type == TRITONSERVER_MEMORY_GPU) {
+      ipbuffer_vec.resize(input_element_cnt);
+      ipbuffer_int = ipbuffer_vec.data();
+      cudaMemcpy(
+          const_cast<int32_t*>(ipbuffer_int), input_buffer, input_byte_size,
+          cudaMemcpyDeviceToHost);
+    } else {
+      ipbuffer_int = reinterpret_cast<const int32_t*>(input_buffer);
+    }
+
+    // Update the accumulator value based on START/READY and calculate the
+    // output value.
+    if (ready[0] != 0) {
+      if (start[0] == 0) {
+        // Update accumulator.
+        for (int64_t e = 0; e < input_element_cnt; ++e) {
+          instance_state->AddAccumulatorAt(r, ipbuffer_int[e]);
+        }
+      } else {
+        // Set accumulator.
+        instance_state->SetAccumulatorAt(r, ipbuffer_int[0]);
+        for (int64_t e = 1; e < input_element_cnt; ++e) {
+          instance_state->AddAccumulatorAt(r, ipbuffer_int[e]);
+        }
+      }
+
+      TRITONBACKEND_Response* response = responses[r];
+
+      // If the output is requested, copy the calculated output value
+      // into the output buffer.
+      if (requested_output_count > 0) {
+        // The output shape is [1, input_element_cnt] if the model configuration
+        // supports batching, or just [input_element_cnt] if the model
+        // configuration does not support batching.
+        std::vector<int64_t> shape;
+        if (supports_batching) {
+          shape.push_back(1);
+        }
+        shape.push_back(input_element_cnt);
+
+        TRITONBACKEND_Output* output;
+        GUARDED_RESPOND_IF_ERROR(
+            responses, r,
+            TRITONBACKEND_ResponseOutput(
+                response, &output, "OUTPUT", input_datatype, input_shape,
+                input_dims_count));
+        if (responses[r] == nullptr) {
+          LOG_MESSAGE(
+              TRITONSERVER_LOG_ERROR,
+              (std::string("request ") + std::to_string(r) +
+               ": failed to create response output, error response sent")
+                  .c_str());
+          continue;
+        }
+
+        // Get the output buffer. We request a buffer in CPU memory but we have
+        // to handle any returned type. If we get back a buffer in GPU memory we
+        // just fail the request.
+        void* output_buffer;
+        TRITONSERVER_MemoryType output_memory_type = TRITONSERVER_MEMORY_CPU;
+        int64_t output_memory_type_id = 0;
+        GUARDED_RESPOND_IF_ERROR(
+            responses, r,
+            TRITONBACKEND_OutputBuffer(
+                output, &output_buffer, buffer_byte_size, &output_memory_type,
+                &output_memory_type_id));
+        if (responses[r] == nullptr) {
+          GUARDED_RESPOND_IF_ERROR(
+              responses, r,
+              TRITONSERVER_ErrorNew(
+                  TRITONSERVER_ERROR_UNSUPPORTED,
+                  "failed to create output buffer in CPU memory"));
+          LOG_MESSAGE(
+              TRITONSERVER_LOG_ERROR,
+              (std::string("request ") + std::to_string(r) +
+               ": failed to create output buffer in CPU memory, error "
+               "response sent")
+                  .c_str());
+          continue;
+        }
+
+        int32_t* obuffer_int = nullptr;
+        std::vector<int32_t> obuffer_vec;
+        if (output_memory_type == TRITONSERVER_MEMORY_GPU) {
+          obuffer_vec.resize(buffer_byte_size / sizeof(int32_t));
+          obuffer_int = obuffer_vec.data();
+        } else {
+          obuffer_int = reinterpret_cast<int32_t*>(output_buffer);
+        }
+
+        for (int64_t i = 0; i < input_element_cnt; ++i) {
+          obuffer_int[i] = instance_state->GetAccumulatorAt(r);
+        }
+
+        if (output_memory_type == TRITONSERVER_MEMORY_GPU) {
+          cudaMemcpy(
+              output_buffer, const_cast<int32_t*>(obuffer_int),
+              buffer_byte_size, cudaMemcpyHostToDevice);
+        }
+      }
+    }
+
+    uint64_t exec_end_ns = 0;
+    SET_TIMESTAMP(exec_end_ns);
+    max_exec_end_ns = std::max(max_exec_end_ns, exec_end_ns);
+
+    // Send all the responses that haven't already been sent because of an
+    // earlier error.
+    if (responses[r] != nullptr) {
+      LOG_IF_ERROR(
+          TRITONBACKEND_ResponseSend(
+              responses[r], TRITONSERVER_RESPONSE_COMPLETE_FINAL,
+              nullptr /* success */),
+          "failed sending response");
+    }
+
+    // Report statistics for each request.
+    LOG_IF_ERROR(
+        TRITONBACKEND_ModelInstanceReportStatistics(
+            instance_state->TritonModelInstance(), request,
+            (responses[r] != nullptr) /* success */, exec_start_ns,
+            exec_start_ns, exec_end_ns, exec_end_ns),
+        "failed reporting request statistics");
+
+    LOG_IF_ERROR(
+        TRITONBACKEND_RequestRelease(request, TRITONSERVER_REQUEST_RELEASE_ALL),
+        "failed releasing request");
+  }
+
+  // Report the entire batch statistics.
+  LOG_IF_ERROR(
+      TRITONBACKEND_ModelInstanceReportBatchStatistics(
+          instance_state->TritonModelInstance(), total_batch_size,
+          min_exec_start_ns, min_exec_start_ns, max_exec_end_ns,
+          max_exec_end_ns),
+      "failed reporting batch request statistics");
+
+  return nullptr;  // success
+}
+
+}  // extern "C"
+
+}}}  // namespace triton::backend::sequence
diff --git a/src/test/testdata/autofill_sanity/no_version/expected b/src/test/testdata/autofill_sanity/no_version/expected
deleted file mode 100644
index 3e1037ae7f..0000000000
--- a/src/test/testdata/autofill_sanity/no_version/expected
+++ /dev/null
@@ -1 +0,0 @@
-Invalid argument: must specify platform for model 'no_version'
\ No newline at end of file
diff --git a/src/test/testdata/model_config_sanity/invalid_cpu/expected b/src/test/testdata/model_config_sanity/invalid_cpu/expected
deleted file mode 100644
index 8577bd92b7..0000000000
--- a/src/test/testdata/model_config_sanity/invalid_cpu/expected
+++ /dev/null
@@ -1 +0,0 @@
-Invalid argument: instance group invalid_cpu_0 of model invalid_cpu has kind KIND_CPU but specifies one or more GPU
\ No newline at end of file
diff --git a/src/test/testdata/model_config_sanity/invalid_gpu/expected b/src/test/testdata/model_config_sanity/invalid_gpu/expected
deleted file mode 100644
index a9a02b3014..0000000000
--- a/src/test/testdata/model_config_sanity/invalid_gpu/expected
+++ /dev/null
@@ -1 +0,0 @@
-Invalid argument: instance group invalid_gpu_0 of model invalid_gpu specifies invalid GPU id 42, valid GPUs are 0 -
\ No newline at end of file
diff --git a/src/test/testdata/model_config_sanity/negative_gpu/expected b/src/test/testdata/model_config_sanity/negative_gpu/expected
deleted file mode 100644
index f68a0baae0..0000000000
--- a/src/test/testdata/model_config_sanity/negative_gpu/expected
+++ /dev/null
@@ -1 +0,0 @@
-Invalid argument: instance group negative_gpu_1 of model negative_gpu specifies invalid GPU id -1, valid GPUs are 0 -
\ No newline at end of file
diff --git a/src/test/testdata/model_config_sanity/no_name/config.pbtxt b/src/test/testdata/model_config_sanity/no_name/config.pbtxt
deleted file mode 100644
index 3fc328202f..0000000000
--- a/src/test/testdata/model_config_sanity/no_name/config.pbtxt
+++ /dev/null
@@ -1,16 +0,0 @@
-max_batch_size: 8
-input [
-  {
-    name: "data"
-    data_type: TYPE_FP32
-    format: FORMAT_NCHW
-    dims: [ 1, 28, 28 ]
-  }
-]
-output [
-  {
-    name: "prob"
-    data_type: TYPE_FP32
-    dims: [ 10, 1, 1 ]
-  }
-]
diff --git a/src/test/testdata/model_config_sanity/no_name/expected b/src/test/testdata/model_config_sanity/no_name/expected
deleted file mode 100644
index d0b59d0d4d..0000000000
--- a/src/test/testdata/model_config_sanity/no_name/expected
+++ /dev/null
@@ -1 +0,0 @@
-Invalid argument: model configuration must specify 'name'
\ No newline at end of file
diff --git a/src/test/testmain.cc b/src/test/testmain.cc
deleted file mode 100644
index 06e6bb30b3..0000000000
--- a/src/test/testmain.cc
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "gtest/gtest.h"
-
-int
-main(int argc, char** argv)
-{
-  testing::InitGoogleTest(&argc, argv);
-  return RUN_ALL_TESTS();
-}
diff --git a/src/tracer.cc b/src/tracer.cc
new file mode 100644
index 0000000000..5c27d0592e
--- /dev/null
+++ b/src/tracer.cc
@@ -0,0 +1,1253 @@
+// Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "tracer.h"
+
+#include <stdlib.h>
+
+#include <unordered_map>
+
+#include "common.h"
+#include "triton/common/logging.h"
+#ifdef TRITON_ENABLE_GPU
+#include <cuda_runtime_api.h>
+#endif  // TRITON_ENABLE_GPU
+#ifndef _WIN32
+#include "opentelemetry/sdk/resource/semantic_conventions.h"
+#include "opentelemetry/sdk/trace/batch_span_processor_factory.h"
+namespace otel_common = opentelemetry::common;
+#endif
+
+namespace triton { namespace server {
+
+TRITONSERVER_Error*
+TraceManager::Create(
+    TraceManager** manager, const TRITONSERVER_InferenceTraceLevel level,
+    const uint32_t rate, const int32_t count, const uint32_t log_frequency,
+    const std::string& filepath, const InferenceTraceMode mode,
+    const triton::server::TraceConfigMap& config_map)
+{
+  // Always create TraceManager regardless of the global setting as they
+  // can be updated at runtime even if tracing is not enable at start.
+  // No trace should be sampled if the setting is not valid.
+  *manager = new TraceManager(
+      level, rate, count, log_frequency, filepath, mode, config_map);
+
+  return nullptr;  // success
+}
+
+TraceManager::TraceManager(
+    const TRITONSERVER_InferenceTraceLevel level, const uint32_t rate,
+    const int32_t count, const uint32_t log_frequency,
+    const std::string& filepath, const InferenceTraceMode mode,
+    const TraceConfigMap& config_map)
+{
+  std::shared_ptr<TraceFile> file(new TraceFile(filepath));
+  global_default_.reset(new TraceSetting(
+      level, rate, count, log_frequency, file, mode, config_map,
+      false /*level_specified*/, false /*rate_specified*/,
+      false /*count_specified*/, false /*log_frequency_specified*/,
+      false /*filepath_specified*/, false /*mode_specified*/,
+      false /*config_map_specified*/));
+  global_setting_.reset(new TraceSetting(
+      level, rate, count, log_frequency, file, mode, config_map,
+      false /*level_specified*/, false /*rate_specified*/,
+      false /*count_specified*/, false /*log_frequency_specified*/,
+      false /*filepath_specified*/, false /*mode_specified*/,
+      false /*config_map_specified*/));
+  trace_files_.emplace(filepath, file);
+
+  InitTracer(config_map);
+}
+
+TRITONSERVER_Error*
+TraceManager::UpdateTraceSetting(
+    const std::string& model_name, const NewSetting& new_setting)
+{
+  std::lock_guard<std::mutex> w_lk(w_mu_);
+
+  RETURN_IF_ERR(UpdateTraceSettingInternal(model_name, new_setting));
+  // If updating global setting, must check and update the model settings
+  // that are (partially) mirroring global setting.
+  if (model_name.empty()) {
+    // Default constructed setting means no active update,
+    // only the unspecified fields will be checked and updated.
+    NewSetting setting;
+    // Make a copy of the set as UpdateTraceSettingInternal() may modify
+    // 'fallback_used_models_'
+    auto fallback_models = fallback_used_models_;
+    for (const auto& name : fallback_models) {
+      RETURN_IF_ERR(UpdateTraceSettingInternal(name, setting));
+    }
+  }
+  return nullptr;
+}
+
+TRITONSERVER_Error*
+TraceManager::UpdateTraceSettingInternal(
+    const std::string& model_name, const NewSetting& new_setting)
+{
+  // First try to get the current setting and fallback setting,
+  // current setting may be 'nullptr' if the setting is newly added
+  const TraceSetting* current_setting = nullptr;
+  const TraceSetting* fallback_setting = nullptr;
+  if (!model_name.empty()) {
+    auto it = model_settings_.find(model_name);
+    if (it != model_settings_.end()) {
+      current_setting = it->second.get();
+    }
+    fallback_setting = global_setting_.get();
+  } else {
+    current_setting = global_setting_.get();
+    fallback_setting = global_default_.get();
+  }
+
+  // Prepare the updated setting, use two passes for simplicity:
+  // 1. Set all fields based on 'fallback_setting'
+  // 2. If there are specified fields based on current and new setting,
+  //    use the specified value
+  TRITONSERVER_InferenceTraceLevel level = fallback_setting->level_;
+  uint32_t rate = fallback_setting->rate_;
+  int32_t count = fallback_setting->count_;
+  uint32_t log_frequency = fallback_setting->log_frequency_;
+  std::string filepath = fallback_setting->file_->FileName();
+  InferenceTraceMode mode = fallback_setting->mode_;
+  TraceConfigMap config_map = fallback_setting->config_map_;
+
+  // Whether the field value is specified:
+  // if clear then it is not specified, otherwise,
+  // it is specified if it is being updated, or it was previously specified
+  const bool level_specified =
+      (new_setting.clear_level_ ? false
+                                : (((current_setting != nullptr) &&
+                                    current_setting->level_specified_) ||
+                                   (new_setting.level_ != nullptr)));
+  const bool rate_specified =
+      (new_setting.clear_rate_ ? false
+                               : (((current_setting != nullptr) &&
+                                   current_setting->rate_specified_) ||
+                                  (new_setting.rate_ != nullptr)));
+  const bool count_specified =
+      (new_setting.clear_count_ ? false
+                                : (((current_setting != nullptr) &&
+                                    current_setting->count_specified_) ||
+                                   (new_setting.count_ != nullptr)));
+  const bool log_frequency_specified =
+      (new_setting.clear_log_frequency_
+           ? false
+           : (((current_setting != nullptr) &&
+               current_setting->log_frequency_specified_) ||
+              (new_setting.log_frequency_ != nullptr)));
+  const bool filepath_specified =
+      (new_setting.clear_filepath_ ? false
+                                   : (((current_setting != nullptr) &&
+                                       current_setting->filepath_specified_) ||
+                                      (new_setting.filepath_ != nullptr)));
+
+  if (level_specified) {
+    level = (new_setting.level_ != nullptr) ? *new_setting.level_
+                                            : current_setting->level_;
+  }
+  if (rate_specified) {
+    rate = (new_setting.rate_ != nullptr) ? *new_setting.rate_
+                                          : current_setting->rate_;
+  }
+  if (count_specified) {
+    count = (new_setting.count_ != nullptr) ? *new_setting.count_
+                                            : current_setting->count_;
+  }
+  if (log_frequency_specified) {
+    log_frequency = (new_setting.log_frequency_ != nullptr)
+                        ? *new_setting.log_frequency_
+                        : current_setting->log_frequency_;
+  }
+  if (filepath_specified) {
+    filepath = (new_setting.filepath_ != nullptr)
+                   ? *new_setting.filepath_
+                   : current_setting->file_->FileName();
+  }
+
+  // Some special case when updating model setting
+  if (!model_name.empty()) {
+    bool all_specified =
+        (level_specified & rate_specified & count_specified &
+         log_frequency_specified & filepath_specified);
+    bool none_specified =
+        !(level_specified | rate_specified | count_specified |
+          log_frequency_specified | filepath_specified);
+    if (all_specified) {
+      fallback_used_models_.erase(model_name);
+    } else if (none_specified) {
+      // Simply let the model uses global setting
+      std::lock_guard<std::mutex> r_lk(r_mu_);
+      model_settings_.erase(model_name);
+      return nullptr;
+    } else {
+      fallback_used_models_.emplace(model_name);
+    }
+  }
+
+  // Create TraceSetting object with the updated setting
+  std::shared_ptr<TraceFile> file;
+  const auto it = trace_files_.find(filepath);
+  if (it != trace_files_.end()) {
+    file = it->second.lock();
+    // The TraceFile object is no longer valid
+    if (file == nullptr) {
+      trace_files_.erase(it);
+    }
+  }
+  if (file == nullptr) {
+    file.reset(new TraceFile(filepath));
+    trace_files_.emplace(filepath, file);
+  }
+
+  std::shared_ptr<TraceSetting> lts(new TraceSetting(
+      level, rate, count, log_frequency, file, mode, config_map,
+      level_specified, rate_specified, count_specified, log_frequency_specified,
+      filepath_specified, false /*mode_specified*/,
+      false /*config_map_specified*/));
+  // The only invalid setting allowed is if it disables tracing
+  if ((!lts->Valid()) && (level != TRITONSERVER_TRACE_LEVEL_DISABLED)) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INVALID_ARG,
+        (std::string("Attempting to set invalid trace setting :") +
+         lts->Reason())
+            .c_str());
+  }
+
+  // Update / Init the setting in read lock to exclude reader access,
+  // we replace the object instead of modifying the existing object in case
+  // of there are ongoing traces. This makes sure those traces are referring
+  // to the setting when the traces are sampled.
+  {
+    std::lock_guard<std::mutex> r_lk(r_mu_);
+    if (model_name.empty()) {
+      // global update
+      global_setting_ = std::move(lts);
+    } else {
+      auto it = model_settings_.find(model_name);
+      if (it != model_settings_.end()) {
+        // Model update
+        it->second = std::move(lts);
+      } else {
+        // Model init
+        model_settings_.emplace(model_name, lts);
+      }
+    }
+  }
+
+  return nullptr;
+}
+
+void
+TraceManager::GetTraceSetting(
+    const std::string& model_name, TRITONSERVER_InferenceTraceLevel* level,
+    uint32_t* rate, int32_t* count, uint32_t* log_frequency,
+    std::string* filepath, InferenceTraceMode* trace_mode,
+    TraceConfigMap* config_map)
+{
+  std::shared_ptr<TraceSetting> trace_setting;
+  {
+    std::lock_guard<std::mutex> r_lk(r_mu_);
+    auto m_it = model_settings_.find(model_name);
+    trace_setting =
+        (m_it == model_settings_.end()) ? global_setting_ : m_it->second;
+  }
+
+  *level = trace_setting->level_;
+  *rate = trace_setting->rate_;
+  *count = trace_setting->count_;
+  *log_frequency = trace_setting->log_frequency_;
+  *filepath = trace_setting->file_->FileName();
+  *trace_mode = trace_setting->mode_;
+  *config_map = trace_setting->config_map_;
+}
+
+void
+TraceManager::GetTraceSetting(
+    const std::string& model_name, std::shared_ptr<TraceSetting>& trace_setting)
+{
+  std::lock_guard<std::mutex> r_lk(r_mu_);
+  auto m_it = model_settings_.find(model_name);
+  trace_setting =
+      (m_it == model_settings_.end()) ? global_setting_ : m_it->second;
+}
+
+TraceManager::TraceStartOptions
+TraceManager::GetTraceStartOptions(
+    AbstractCarrier& carrier, const std::string& model_name)
+{
+  TraceManager::TraceStartOptions start_options;
+  GetTraceSetting(model_name, start_options.trace_setting);
+  if (!start_options.trace_setting->level_ ==
+          TRITONSERVER_TRACE_LEVEL_DISABLED &&
+      start_options.trace_setting->mode_ == TRACE_MODE_OPENTELEMETRY) {
+#ifndef _WIN32
+    auto prop =
+        otel_cntxt::propagation::GlobalTextMapPropagator::GetGlobalPropagator();
+    auto ctxt = otel_cntxt::Context();
+    ctxt = prop->Extract(carrier, ctxt);
+    otel_trace_api::SpanContext span_context =
+        otel_trace_api::GetSpan(ctxt)->GetContext();
+    if (span_context.IsValid()) {
+      start_options.propagated_context = ctxt;
+      start_options.force_sample = true;
+    }
+#else
+    LOG_ERROR << "Unsupported trace mode: "
+              << TraceManager::InferenceTraceModeString(
+                     start_options.trace_setting->mode_);
+#endif  // _WIN32
+  }
+  return start_options;
+}
+
+
+std::shared_ptr<TraceManager::Trace>
+TraceManager::SampleTrace(const TraceStartOptions& start_options)
+{
+  std::shared_ptr<Trace> ts =
+      start_options.trace_setting->SampleTrace(start_options.force_sample);
+  if (ts != nullptr) {
+    ts->setting_ = start_options.trace_setting;
+    if (ts->setting_->mode_ == TRACE_MODE_OPENTELEMETRY) {
+#ifndef _WIN32
+      auto steady_timestamp_ns =
+          std::chrono::duration_cast<std::chrono::nanoseconds>(
+              std::chrono::steady_clock::now().time_since_epoch())
+              .count();
+      ts->otel_context_ = start_options.propagated_context;
+      opentelemetry::nostd::shared_ptr<otel_trace_api::Span> root_span;
+      root_span = ts->StartSpan(
+          "InferRequest", steady_timestamp_ns, otel_trace_api::kSpanKey);
+      // Storing "InferRequest" span as a root span
+      // to keep it alive for the duration of the request.
+      ts->otel_context_ = ts->otel_context_.SetValue(kRootSpan, root_span);
+#else
+      LOG_ERROR << "Unsupported trace mode: "
+                << TraceManager::InferenceTraceModeString(ts->setting_->mode_);
+#endif
+    }
+  }
+  return ts;
+}
+
+TraceManager::Trace::~Trace()
+{
+  if (setting_->mode_ == TRACE_MODE_TRITON) {
+    // Write trace now
+    setting_->WriteTrace(streams_);
+  } else if (setting_->mode_ == TRACE_MODE_OPENTELEMETRY) {
+#ifndef _WIN32
+    EndSpan(kRootSpan);
+#else
+    LOG_ERROR << "Unsupported trace mode: "
+              << TraceManager::InferenceTraceModeString(setting_->mode_);
+#endif
+  }
+}
+
+void
+TraceManager::Trace::CaptureTimestamp(
+    const std::string& name, uint64_t timestamp_ns)
+{
+  if (setting_->level_ & TRITONSERVER_TRACE_LEVEL_TIMESTAMPS) {
+    if (setting_->mode_ == TRACE_MODE_TRITON) {
+      std::lock_guard<std::mutex> lk(mtx_);
+      std::stringstream* ss = nullptr;
+      {
+        if (streams_.find(trace_id_) == streams_.end()) {
+          std::unique_ptr<std::stringstream> stream(new std::stringstream());
+          ss = stream.get();
+          streams_.emplace(trace_id_, std::move(stream));
+        } else {
+          ss = streams_[trace_id_].get();
+          // If the string stream is not newly created, add "," as there is
+          // already content in the string stream
+          *ss << ",";
+        }
+      }
+      *ss << "{\"id\":" << trace_id_ << ",\"timestamps\":["
+          << "{\"name\":\"" << name << "\",\"ns\":" << timestamp_ns << "}]}";
+    } else if (setting_->mode_ == TRACE_MODE_OPENTELEMETRY) {
+#ifndef _WIN32
+      AddEvent(kRootSpan, name, timestamp_ns);
+#else
+      LOG_ERROR << "Unsupported trace mode: "
+                << TraceManager::InferenceTraceModeString(setting_->mode_);
+#endif
+    }
+  }
+}
+
+void
+TraceManager::InitTracer(const triton::server::TraceConfigMap& config_map)
+{
+  switch (global_setting_->mode_) {
+    case TRACE_MODE_OPENTELEMETRY: {
+#ifndef _WIN32
+      otlp::OtlpHttpExporterOptions exporter_options;
+      otel_resource::ResourceAttributes attributes = {};
+      otel_trace_sdk::BatchSpanProcessorOptions processor_options;
+
+      ProcessOpenTelemetryParameters(
+          config_map, exporter_options, attributes, processor_options);
+
+      auto exporter = otlp::OtlpHttpExporterFactory::Create(exporter_options);
+      auto processor = otel_trace_sdk::BatchSpanProcessorFactory::Create(
+          std::move(exporter), processor_options);
+      auto resource = otel_resource::Resource::Create(attributes);
+      std::shared_ptr<otel_trace_api::TracerProvider> provider =
+          otel_trace_sdk::TracerProviderFactory::Create(
+              std::move(processor), resource);
+
+      otel_trace_api::Provider::SetTracerProvider(provider);
+      otel_cntxt::propagation::GlobalTextMapPropagator::SetGlobalPropagator(
+          opentelemetry::nostd::shared_ptr<
+              otel_cntxt::propagation::TextMapPropagator>(
+              new otel_trace_api::propagation::HttpTraceContext()));
+      break;
+#else
+      LOG_ERROR << "Unsupported trace mode: "
+                << TraceManager::InferenceTraceModeString(
+                       global_setting_->mode_);
+      break;
+#endif
+    }
+    default:
+      return;
+  }
+}
+
+void
+TraceManager::CleanupTracer()
+{
+  switch (global_setting_->mode_) {
+    case TRACE_MODE_OPENTELEMETRY: {
+#ifndef _WIN32
+      std::shared_ptr<otel_trace_api::TracerProvider> none;
+      otel_trace_api::Provider::SetTracerProvider(none);
+      break;
+#else
+      LOG_ERROR << "Unsupported trace mode: "
+                << TraceManager::InferenceTraceModeString(
+                       global_setting_->mode_);
+      break;
+#endif
+    }
+    default:
+      return;
+  }
+}
+
+#ifndef _WIN32
+void
+TraceManager::ProcessOpenTelemetryParameters(
+    const triton::server::TraceConfigMap& config_map,
+    otlp::OtlpHttpExporterOptions& exporter_options,
+    otel_resource::ResourceAttributes& attributes,
+    otel_trace_sdk::BatchSpanProcessorOptions& processor_options)
+{
+  attributes[otel_resource::SemanticConventions::kServiceName] =
+      std::string("triton-inference-server");
+  auto mode_key = std::to_string(TRACE_MODE_OPENTELEMETRY);
+  auto otel_options_it = config_map.find(mode_key);
+  if (otel_options_it == config_map.end()) {
+    return;
+  }
+  for (const auto& [setting, value] : otel_options_it->second) {
+    // FIXME add more configuration options of OTLP HTTP Exporter
+    if (setting == "url") {
+      exporter_options.url = std::get<std::string>(value);
+    }
+    if (setting == "resource") {
+      auto user_setting = std::get<std::string>(value);
+      auto pos = user_setting.find('=');
+      auto key = user_setting.substr(0, pos);
+      auto value = user_setting.substr(pos + 1);
+      attributes[key] = value;
+    }
+    if (setting == "bsp_max_queue_size") {
+      processor_options.max_queue_size = std::get<uint32_t>(value);
+    }
+    if (setting == "bsp_schedule_delay") {
+      processor_options.schedule_delay_millis =
+          std::chrono::milliseconds(std::get<uint32_t>(value));
+    }
+    if (setting == "bsp_max_export_batch_size") {
+      processor_options.max_export_batch_size = std::get<uint32_t>(value);
+    }
+  }
+}
+
+void
+TraceManager::Trace::StartSpan(
+    std::string span_key, TRITONSERVER_InferenceTrace* trace,
+    TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns,
+    uint64_t trace_id)
+{
+  uint64_t parent_id;
+  LOG_TRITONSERVER_ERROR(
+      TRITONSERVER_InferenceTraceParentId(trace, &parent_id),
+      "getting trace parent id");
+  std::string parent_span_key = "";
+
+  // Currently, only 2 types of sub-spans are supported:
+  // request span and compute span. Compute span is a leaf span
+  // and can not be a parent of any sub-span. If parent_id==0,
+  // then current model is either a standalone model, or an ensemble model.
+  // In both cases, the parent of the new request sub-span is the kRootSpan.
+  // A request span with trace id = `trace_id` is a parent of a compute span,
+  // started in the same trace.
+  // If parent_id > 0, then this is a child trace, spawned from
+  // the ensamble's main request. For this instance, the parent
+  // span is the ensembles's request span.
+  if (parent_id == 0 && activity == TRITONSERVER_TRACE_REQUEST_START) {
+    parent_span_key = kRootSpan;
+  } else if (activity == TRITONSERVER_TRACE_REQUEST_START) {
+    // [FIXME] For BLS requests parent span for children's request spans
+    // should be parent model's compute span. Currently,
+    // this won't work, since parent's compute span will be created
+    // only after children's spans are created.
+    parent_span_key = kRequestSpan + std::to_string(parent_id);
+  } else if (activity == TRITONSERVER_TRACE_COMPUTE_START) {
+    parent_span_key = kRequestSpan + std::to_string(trace_id);
+  }
+
+  std::string display_name = "compute";
+  const char* model_name;
+  if (activity == TRITONSERVER_TRACE_REQUEST_START) {
+    LOG_TRITONSERVER_ERROR(
+        TRITONSERVER_InferenceTraceModelName(trace, &model_name),
+        "getting model name");
+    display_name = model_name;
+  }
+
+  auto span = StartSpan(display_name, timestamp_ns, parent_span_key);
+
+  if (activity == TRITONSERVER_TRACE_REQUEST_START) {
+    int64_t model_version;
+    const char* request_id;
+    LOG_TRITONSERVER_ERROR(
+        TRITONSERVER_InferenceTraceModelVersion(trace, &model_version),
+        "getting model version");
+    LOG_TRITONSERVER_ERROR(
+        TRITONSERVER_InferenceTraceRequestId(trace, &request_id),
+        "getting request id");
+    span->SetAttribute("triton.model_name", model_name);
+    span->SetAttribute("triton.model_version", model_version);
+    span->SetAttribute("triton.trace_id", trace_id);
+    span->SetAttribute("triton.trace_parent_id", parent_id);
+    if (std::string(request_id) != "") {
+      span->SetAttribute("triton.request_id", request_id);
+    }
+    triton::common::TritonJson::WriteBuffer buffer;
+    PrepareTraceContext(span, &buffer);
+    TRITONSERVER_InferenceTraceSetContext(trace, buffer.Contents().c_str());
+  }
+
+  otel_context_ = otel_context_.SetValue(span_key, span);
+}
+
+opentelemetry::nostd::shared_ptr<otel_trace_api::Span>
+TraceManager::Trace::StartSpan(
+    std::string display_name, const uint64_t& raw_timestamp_ns,
+    std::string parent_span_key)
+{
+  otel_trace_api::StartSpanOptions options;
+  options.kind = otel_trace_api::SpanKind::kServer;
+  options.start_system_time =
+      time_offset_ + std::chrono::nanoseconds{raw_timestamp_ns};
+  options.start_steady_time =
+      otel_common::SteadyTimestamp{std::chrono::nanoseconds{raw_timestamp_ns}};
+
+  // If the new span is a child span, we need to retrieve its parent from
+  // the context and provide it through StartSpanOptions to the child span
+  if (!parent_span_key.empty() && otel_context_.HasKey(parent_span_key)) {
+    auto parent_span = opentelemetry::nostd::get<
+        opentelemetry::nostd::shared_ptr<otel_trace_api::Span>>(
+        otel_context_.GetValue(parent_span_key));
+    options.parent = parent_span->GetContext();
+  }
+  auto provider = opentelemetry::trace::Provider::GetTracerProvider();
+  return provider->GetTracer(kTritonTracer)->StartSpan(display_name, options);
+}
+
+void
+TraceManager::Trace::EndSpan(std::string span_key)
+{
+  auto timestamp_ns = std::chrono::duration_cast<std::chrono::nanoseconds>(
+                          std::chrono::steady_clock::now().time_since_epoch())
+                          .count();
+  EndSpan(span_key, timestamp_ns);
+}
+
+
+void
+TraceManager::Trace::EndSpan(
+    std::string span_key, const uint64_t& raw_timestamp_ns)
+{
+  if (otel_context_.HasKey(span_key)) {
+    auto span = opentelemetry::nostd::get<
+        opentelemetry::nostd::shared_ptr<otel_trace_api::Span>>(
+        otel_context_.GetValue(span_key));
+
+    if (span == nullptr) {
+      return;
+    }
+
+    otel_trace_api::EndSpanOptions end_options;
+    end_options.end_steady_time = otel_common::SteadyTimestamp{
+        std::chrono::nanoseconds{raw_timestamp_ns}};
+    span->End(end_options);
+  }
+}
+
+void
+TraceManager::Trace::ReportToOpenTelemetry(
+    TRITONSERVER_InferenceTrace* trace,
+    TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns)
+{
+  uint64_t id;
+  LOG_TRITONSERVER_ERROR(
+      TRITONSERVER_InferenceTraceId(trace, &id), "getting trace id");
+
+  auto current_span_key = GetSpanKeyForActivity(activity, id);
+  if (current_span_key.empty()) {
+    return;
+  }
+
+  AddEvent(current_span_key, trace, activity, timestamp_ns, id);
+}
+
+std::string
+TraceManager::Trace::GetSpanKeyForActivity(
+    TRITONSERVER_InferenceTraceActivity activity, uint64_t trace_id)
+{
+  std::string span_name;
+  switch (activity) {
+    case TRITONSERVER_TRACE_REQUEST_START:
+    case TRITONSERVER_TRACE_QUEUE_START:
+    case TRITONSERVER_TRACE_REQUEST_END: {
+      span_name = kRequestSpan + std::to_string(trace_id);
+      break;
+    }
+
+    case TRITONSERVER_TRACE_COMPUTE_START:
+    case TRITONSERVER_TRACE_COMPUTE_INPUT_END:
+    case TRITONSERVER_TRACE_COMPUTE_OUTPUT_START:
+    case TRITONSERVER_TRACE_COMPUTE_END: {
+      span_name = kComputeSpan + std::to_string(trace_id);
+      break;
+    }
+    case TRITONSERVER_TRACE_TENSOR_QUEUE_INPUT:
+    case TRITONSERVER_TRACE_TENSOR_BACKEND_INPUT:
+    case TRITONSERVER_TRACE_TENSOR_BACKEND_OUTPUT:
+    default: {
+      LOG_ERROR << "Unsupported activity: "
+                << TRITONSERVER_InferenceTraceActivityString(activity);
+      span_name = "";
+      break;
+    }
+  }
+
+  return span_name;
+}
+
+void
+TraceManager::Trace::AddEvent(
+    std::string span_key, TRITONSERVER_InferenceTrace* trace,
+    TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns,
+    uint64_t id)
+{
+  if (activity == TRITONSERVER_TRACE_REQUEST_START ||
+      activity == TRITONSERVER_TRACE_COMPUTE_START) {
+    StartSpan(span_key, trace, activity, timestamp_ns, id);
+  }
+
+  AddEvent(
+      span_key, TRITONSERVER_InferenceTraceActivityString(activity),
+      timestamp_ns);
+
+  if (activity == TRITONSERVER_TRACE_REQUEST_END ||
+      activity == TRITONSERVER_TRACE_COMPUTE_END) {
+    EndSpan(span_key, timestamp_ns);
+  }
+}
+
+void
+TraceManager::Trace::AddEvent(
+    std::string span_key, std::string event, uint64_t timestamp)
+{
+  if (otel_context_.HasKey(span_key)) {
+    auto span = opentelemetry::nostd::get<
+        opentelemetry::nostd::shared_ptr<otel_trace_api::Span>>(
+        otel_context_.GetValue(span_key));
+    span->AddEvent(event, time_offset_ + std::chrono::nanoseconds{timestamp});
+  }
+}
+
+void
+TraceManager::Trace::PrepareTraceContext(
+    opentelemetry::nostd::shared_ptr<otel_trace_api::Span> span,
+    triton::common::TritonJson::WriteBuffer* buffer)
+{
+  triton::common::TritonJson::Value json(
+      triton::common::TritonJson::ValueType::OBJECT);
+  char trace_id[32] = {0};
+  char span_id[16] = {0};
+  char trace_flags[2] = {0};
+  span->GetContext().span_id().ToLowerBase16(span_id);
+  span->GetContext().trace_id().ToLowerBase16(trace_id);
+  span->GetContext().trace_flags().ToLowerBase16(trace_flags);
+  std::string kTraceParent = std::string("traceparent");
+  std::string kTraceState = std::string("tracestate");
+  std::string traceparent = std::string("00-") + std::string(trace_id, 32) +
+                            std::string("-") + std::string(span_id, 16) +
+                            std::string("-") + std::string(trace_flags, 2);
+  std::string tracestate = span->GetContext().trace_state()->ToHeader();
+  json.SetStringObject(kTraceParent.c_str(), traceparent);
+  if (!tracestate.empty()) {
+    json.SetStringObject(kTraceState.c_str(), tracestate);
+  }
+  json.Write(buffer);
+}
+#endif
+
+void
+TraceManager::TraceRelease(TRITONSERVER_InferenceTrace* trace, void* userp)
+{
+  uint64_t id;
+  LOG_TRITONSERVER_ERROR(
+      TRITONSERVER_InferenceTraceId(trace, &id), "getting trace id");
+
+  auto ts = reinterpret_cast<std::shared_ptr<TraceManager::Trace>*>(userp);
+  std::lock_guard<std::mutex> lk((*ts)->mtx_);
+  (*ts)->spawned_traces_tracker_.erase(id);
+  // The userp will be shared with the trace children, so only delete it
+  // if no more TraceRelease calls are expected
+  if ((*ts)->spawned_traces_tracker_.empty()) {
+    delete ts;
+  }
+  LOG_TRITONSERVER_ERROR(
+      TRITONSERVER_InferenceTraceDelete(trace), "deleting trace");
+}
+
+const char*
+TraceManager::InferenceTraceModeString(InferenceTraceMode mode)
+{
+  switch (mode) {
+    case TRACE_MODE_TRITON:
+      return "triton";
+    case TRACE_MODE_OPENTELEMETRY:
+      return "opentelemetry";
+  }
+
+  return "<unknown>";
+}
+
+void
+TraceManager::TraceActivity(
+    TRITONSERVER_InferenceTrace* trace,
+    TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns,
+    void* userp)
+{
+  uint64_t id;
+  LOG_TRITONSERVER_ERROR(
+      TRITONSERVER_InferenceTraceId(trace, &id), "getting trace id");
+
+  // The function may be called with different traces but the same 'userp',
+  // group the activity of the same trace together for more readable output.
+  auto ts =
+      reinterpret_cast<std::shared_ptr<TraceManager::Trace>*>(userp)->get();
+
+  std::lock_guard<std::mutex> lk(ts->mtx_);
+  if (ts->spawned_traces_tracker_.find(id) ==
+      ts->spawned_traces_tracker_.end()) {
+    ts->spawned_traces_tracker_.emplace(id);
+  }
+
+  if (ts->setting_->mode_ == TRACE_MODE_OPENTELEMETRY) {
+#ifndef _WIN32
+    ts->ReportToOpenTelemetry(trace, activity, timestamp_ns);
+#else
+    LOG_ERROR << "Unsupported trace mode: "
+              << TraceManager::InferenceTraceModeString(ts->setting_->mode_);
+#endif
+    return;
+  }
+  std::stringstream* ss = nullptr;
+  {
+    if (ts->streams_.find(id) == ts->streams_.end()) {
+      std::unique_ptr<std::stringstream> stream(new std::stringstream());
+      ss = stream.get();
+      ts->streams_.emplace(id, std::move(stream));
+    } else {
+      ss = ts->streams_[id].get();
+      // If the string stream is not newly created, add "," as there is
+      // already content in the string stream
+      *ss << ",";
+    }
+  }
+  // If 'activity' is TRITONSERVER_TRACE_REQUEST_START then collect
+  // and serialize trace details.
+  if (activity == TRITONSERVER_TRACE_REQUEST_START) {
+    const char* model_name;
+    int64_t model_version;
+    uint64_t parent_id;
+    const char* request_id;
+
+    LOG_TRITONSERVER_ERROR(
+        TRITONSERVER_InferenceTraceModelName(trace, &model_name),
+        "getting model name");
+    LOG_TRITONSERVER_ERROR(
+        TRITONSERVER_InferenceTraceModelVersion(trace, &model_version),
+        "getting model version");
+    LOG_TRITONSERVER_ERROR(
+        TRITONSERVER_InferenceTraceParentId(trace, &parent_id),
+        "getting trace parent id");
+    LOG_TRITONSERVER_ERROR(
+        TRITONSERVER_InferenceTraceRequestId(trace, &request_id),
+        "getting request id");
+
+    *ss << "{\"id\":" << id << ",\"model_name\":\"" << model_name
+        << "\",\"model_version\":" << model_version;
+
+    if (std::string(request_id) != "") {
+      *ss << ",\"request_id\":\"" << request_id << "\"";
+    }
+
+    if (parent_id != 0) {
+      *ss << ",\"parent_id\":" << parent_id;
+    }
+    *ss << "},";
+  }
+
+  *ss << "{\"id\":" << id << ",\"timestamps\":["
+      << "{\"name\":\"" << TRITONSERVER_InferenceTraceActivityString(activity)
+      << "\",\"ns\":" << timestamp_ns << "}]}";
+}
+
+void
+TraceManager::TraceTensorActivity(
+    TRITONSERVER_InferenceTrace* trace,
+    TRITONSERVER_InferenceTraceActivity activity, const char* name,
+    TRITONSERVER_DataType datatype, const void* base, size_t byte_size,
+    const int64_t* shape, uint64_t dim_count,
+    TRITONSERVER_MemoryType memory_type, int64_t memory_type_id, void* userp)
+{
+  if ((activity != TRITONSERVER_TRACE_TENSOR_QUEUE_INPUT) &&
+      (activity != TRITONSERVER_TRACE_TENSOR_BACKEND_INPUT) &&
+      (activity != TRITONSERVER_TRACE_TENSOR_BACKEND_OUTPUT)) {
+    LOG_ERROR << "Unsupported activity: "
+              << TRITONSERVER_InferenceTraceActivityString(activity);
+    return;
+  }
+
+  void* buffer_base = const_cast<void*>(base);
+  if (memory_type == TRITONSERVER_MEMORY_GPU) {
+#ifdef TRITON_ENABLE_GPU
+    buffer_base = malloc(byte_size);
+    if (buffer_base == nullptr) {
+      LOG_ERROR << "Failed to malloc CPU buffer";
+      return;
+    }
+    FAIL_IF_CUDA_ERR(
+        cudaMemcpy(buffer_base, base, byte_size, cudaMemcpyDeviceToHost),
+        "copying buffer into CPU memory");
+#else
+    LOG_ERROR << "GPU buffer is unsupported";
+    return;
+#endif  // TRITON_ENABLE_GPU
+  }
+
+  uint64_t id;
+  LOG_TRITONSERVER_ERROR(
+      TRITONSERVER_InferenceTraceId(trace, &id), "getting trace id");
+
+  // The function may be called with different traces but the same 'userp',
+  // group the activity of the same trace together for more readable output.
+  auto ts =
+      reinterpret_cast<std::shared_ptr<TraceManager::Trace>*>(userp)->get();
+
+  if (ts->setting_->mode_ == TRACE_MODE_OPENTELEMETRY) {
+    LOG_ERROR << "Tensor level tracing is not supported by the mode: "
+              << TraceManager::InferenceTraceModeString(ts->setting_->mode_);
+  } else if (ts->setting_->mode_ == TRACE_MODE_TRITON) {
+    std::lock_guard<std::mutex> lk(ts->mtx_);
+    std::stringstream* ss = nullptr;
+    {
+      if (ts->streams_.find(id) == ts->streams_.end()) {
+        std::unique_ptr<std::stringstream> stream(new std::stringstream());
+        ss = stream.get();
+        ts->streams_.emplace(id, std::move(stream));
+        ts->spawned_traces_tracker_.emplace(id);
+      } else {
+        ss = ts->streams_[id].get();
+        // If the string stream is not newly created, add "," as there is
+        // already content in the string stream
+        *ss << ",";
+      }
+    }
+
+    // collect and serialize trace details.
+    *ss << "{\"id\":" << id << ",\"activity\":\""
+        << TRITONSERVER_InferenceTraceActivityString(activity) << "\"";
+    // collect tensor
+    *ss << ",\"tensor\":{";
+    // collect tensor name
+    *ss << "\"name\":\"" << std::string(name) << "\"";
+    // collect tensor data
+    *ss << ",\"data\":\"";
+    size_t element_count = 1;
+    for (uint64_t i = 0; i < dim_count; i++) {
+      element_count *= shape[i];
+    }
+    switch (datatype) {
+      case TRITONSERVER_TYPE_BOOL: {
+        const uint8_t* bool_base =
+            reinterpret_cast<const uint8_t*>(buffer_base);
+        for (size_t e = 0; e < element_count; ++e) {
+          *ss << ((bool_base[e] == 0) ? false : true);
+          if (e < (element_count - 1))
+            *ss << ",";
+        }
+        break;
+      }
+      case TRITONSERVER_TYPE_UINT8: {
+        const uint8_t* cbase = reinterpret_cast<const uint8_t*>(buffer_base);
+        for (size_t e = 0; e < element_count; ++e) {
+          *ss << cbase[e];
+          if (e < (element_count - 1))
+            *ss << ",";
+        }
+        break;
+      }
+      case TRITONSERVER_TYPE_UINT16: {
+        const uint16_t* cbase = reinterpret_cast<const uint16_t*>(buffer_base);
+        for (size_t e = 0; e < element_count; ++e) {
+          *ss << cbase[e];
+          if (e < (element_count - 1))
+            *ss << ",";
+        }
+        break;
+      }
+      case TRITONSERVER_TYPE_UINT32: {
+        const uint32_t* cbase = reinterpret_cast<const uint32_t*>(buffer_base);
+        for (size_t e = 0; e < element_count; ++e) {
+          *ss << cbase[e];
+          if (e < (element_count - 1))
+            *ss << ",";
+        }
+        break;
+      }
+      case TRITONSERVER_TYPE_UINT64: {
+        const uint64_t* cbase = reinterpret_cast<const uint64_t*>(buffer_base);
+        for (size_t e = 0; e < element_count; ++e) {
+          *ss << cbase[e];
+          if (e < (element_count - 1))
+            *ss << ",";
+        }
+        break;
+      }
+      case TRITONSERVER_TYPE_INT8: {
+        const int8_t* cbase = reinterpret_cast<const int8_t*>(buffer_base);
+        for (size_t e = 0; e < element_count; ++e) {
+          *ss << cbase[e];
+          if (e < (element_count - 1))
+            *ss << ",";
+        }
+        break;
+      }
+      case TRITONSERVER_TYPE_INT16: {
+        const int16_t* cbase = reinterpret_cast<const int16_t*>(buffer_base);
+        for (size_t e = 0; e < element_count; ++e) {
+          *ss << cbase[e];
+          if (e < (element_count - 1))
+            *ss << ",";
+        }
+        break;
+      }
+      case TRITONSERVER_TYPE_INT32: {
+        const int32_t* cbase = reinterpret_cast<const int32_t*>(buffer_base);
+        for (size_t e = 0; e < element_count; ++e) {
+          *ss << cbase[e];
+          if (e < (element_count - 1))
+            *ss << ",";
+        }
+        break;
+      }
+      case TRITONSERVER_TYPE_INT64: {
+        const int64_t* cbase = reinterpret_cast<const int64_t*>(buffer_base);
+        for (size_t e = 0; e < element_count; ++e) {
+          *ss << cbase[e];
+          if (e < (element_count - 1))
+            *ss << ",";
+        }
+        break;
+      }
+
+      // FP16 / BF16 already handled as binary blobs, no need to manipulate
+      // here
+      case TRITONSERVER_TYPE_FP16: {
+        break;
+      }
+      case TRITONSERVER_TYPE_BF16: {
+        break;
+      }
+
+      case TRITONSERVER_TYPE_FP32: {
+        const float* cbase = reinterpret_cast<const float*>(buffer_base);
+        for (size_t e = 0; e < element_count; ++e) {
+          *ss << cbase[e];
+          if (e < (element_count - 1))
+            *ss << ",";
+        }
+        break;
+      }
+      case TRITONSERVER_TYPE_FP64: {
+        const double* cbase = reinterpret_cast<const double*>(buffer_base);
+        for (size_t e = 0; e < element_count; ++e) {
+          *ss << cbase[e];
+          if (e < (element_count - 1))
+            *ss << ",";
+        }
+        break;
+      }
+      case TRITONSERVER_TYPE_BYTES: {
+        const char* cbase = reinterpret_cast<const char*>(buffer_base);
+        size_t offset = 0;
+        for (size_t e = 0; e < element_count; ++e) {
+          if ((offset + sizeof(uint32_t)) > byte_size) {
+            return;
+          }
+          const size_t len =
+              *(reinterpret_cast<const uint32_t*>(cbase + offset));
+          offset += sizeof(uint32_t);
+          if ((offset + len) > byte_size) {
+            return;
+          }
+          std::string str(cbase + offset, len);
+          *ss << "\\\"" << str << "\\\"";
+          offset += len;
+
+          if (e < (element_count - 1))
+            *ss << ",";
+        }
+        break;
+      }
+      case TRITONSERVER_TYPE_INVALID: {
+        return;
+      }
+    }
+    *ss << "\",\"shape\":\"";
+    for (uint64_t i = 0; i < dim_count; i++) {
+      *ss << shape[i];
+      if (i < (dim_count - 1)) {
+        *ss << ",";
+      }
+    }
+    *ss << "\",\"dtype\":\"" << TRITONSERVER_DataTypeString(datatype) << "\"}";
+    *ss << "}";
+  }
+
+  if (memory_type == TRITONSERVER_MEMORY_GPU) {
+#ifdef TRITON_ENABLE_GPU
+    if (buffer_base != nullptr) {
+      free(buffer_base);
+    }
+#endif  // TRITON_ENABLE_GPU
+  }
+}
+
+TraceManager::TraceFile::~TraceFile()
+{
+  if (!first_write_) {
+    trace_file_ << "]";
+  }
+}
+
+void
+TraceManager::TraceFile::SaveTraces(
+    std::stringstream& trace_stream, const bool to_index_file)
+{
+  try {
+    if (to_index_file) {
+      std::string file_name =
+          file_name_ + "." + std::to_string(index_.fetch_add(1));
+      std::ofstream file_stream;
+      file_stream.open(file_name);
+      file_stream << "[";
+      file_stream << trace_stream.rdbuf();
+      file_stream << "]";
+    } else {
+      std::lock_guard<std::mutex> lock(mu_);
+      if (first_write_) {
+        trace_file_.open(file_name_);
+        trace_file_ << "[";
+        first_write_ = false;
+      } else {
+        trace_file_ << ",";
+      }
+      trace_file_ << trace_stream.rdbuf();
+    }
+  }
+  catch (const std::ofstream::failure& e) {
+    LOG_ERROR << "failed creating trace file: " << e.what();
+  }
+  catch (...) {
+    LOG_ERROR << "failed creating trace file: reason unknown";
+  }
+}
+
+std::shared_ptr<TraceManager::Trace>
+TraceManager::TraceSetting::SampleTrace(bool force_sample)
+{
+  bool count_rate_hit = false;
+  {
+    std::lock_guard<std::mutex> lk(mu_);
+    // [FIXME: DLIS-6033]
+    // A current WAR for initiating trace based on propagated context only
+    // Currently this is implemented through setting trace rate as 0
+    if (rate_ != 0) {
+      // If `count_` hits 0, `Valid()` returns false for this and all
+      // following requests (unless `count_` is updated by a user).
+      // At this point we only trace requests for which
+      // `force_sample` is true.
+      if (!Valid() && !force_sample) {
+        return nullptr;
+      }
+      // `sample_` counts all requests, coming to server.
+      count_rate_hit = (((++sample_) % rate_) == 0);
+      if (count_rate_hit && (count_ > 0)) {
+        --count_;
+        ++created_;
+      } else if (count_rate_hit && (count_ == 0)) {
+        // This condition is reached, when `force_sample` is true,
+        // `count_rate_hit` is true, but `count_` is 0. Due to the
+        // latter, we explicitly set `count_rate_hit` to false.
+        count_rate_hit = false;
+      }
+    }
+  }
+  if (count_rate_hit || force_sample) {
+    std::shared_ptr<TraceManager::Trace> lts(new Trace());
+    // Split 'Trace' management to frontend and Triton trace separately
+    // to avoid dependency between frontend request and Triton trace's
+    // liveness
+    auto trace_userp = new std::shared_ptr<TraceManager::Trace>(lts);
+    TRITONSERVER_InferenceTrace* trace;
+    TRITONSERVER_Error* err = TRITONSERVER_InferenceTraceTensorNew(
+        &trace, level_, 0 /* parent_id */, TraceActivity, TraceTensorActivity,
+        TraceRelease, trace_userp);
+    if (err != nullptr) {
+      LOG_TRITONSERVER_ERROR(err, "creating inference trace object");
+      delete trace_userp;
+      return nullptr;
+    }
+    lts->trace_ = trace;
+    lts->trace_userp_ = trace_userp;
+    LOG_TRITONSERVER_ERROR(
+        TRITONSERVER_InferenceTraceId(trace, &lts->trace_id_),
+        "getting trace id");
+    return lts;
+  }
+  return nullptr;
+}
+
+void
+TraceManager::TraceSetting::WriteTrace(
+    const std::unordered_map<uint64_t, std::unique_ptr<std::stringstream>>&
+        streams)
+{
+  std::unique_lock<std::mutex> lock(mu_);
+
+  if (sample_in_stream_ != 0) {
+    trace_stream_ << ",";
+  }
+  ++sample_in_stream_;
+  ++collected_;
+
+  size_t stream_count = 0;
+  for (const auto& stream : streams) {
+    trace_stream_ << stream.second->rdbuf();
+    // Need to add ',' unless it is the last trace in the group
+    ++stream_count;
+    if (stream_count != streams.size()) {
+      trace_stream_ << ",";
+    }
+  }
+  // Write to file with index when one of the following is true
+  // 1. trace_count is specified and that number of traces has been collected
+  // 2. log_frequency is specified and that number of traces has been
+  // collected
+  if (((count_ == 0) && (collected_ == sample_)) ||
+      ((log_frequency_ != 0) && (sample_in_stream_ >= log_frequency_))) {
+    // Reset variables and release lock before saving to file
+    sample_in_stream_ = 0;
+    std::stringstream stream;
+    trace_stream_.swap(stream);
+    lock.unlock();
+
+    file_->SaveTraces(stream, true /* to_index_file */);
+  }
+}
+
+TraceManager::TraceSetting::TraceSetting(
+    const TRITONSERVER_InferenceTraceLevel level, const uint32_t rate,
+    const int32_t count, const uint32_t log_frequency,
+    const std::shared_ptr<TraceFile>& file, const InferenceTraceMode mode,
+    const TraceConfigMap& config_map, const bool level_specified,
+    const bool rate_specified, const bool count_specified,
+    const bool log_frequency_specified, const bool filepath_specified,
+    const bool mode_specified, const bool config_map_specified)
+    : level_(level), rate_(rate), count_(count), log_frequency_(log_frequency),
+      file_(file), mode_(mode), config_map_(config_map),
+      level_specified_(level_specified), rate_specified_(rate_specified),
+      count_specified_(count_specified),
+      log_frequency_specified_(log_frequency_specified),
+      filepath_specified_(filepath_specified), mode_specified_(mode_specified),
+      config_map_specified_(config_map_specified), sample_(0), created_(0),
+      collected_(0), sample_in_stream_(0)
+{
+  if (level_ == TRITONSERVER_TRACE_LEVEL_DISABLED) {
+    invalid_reason_ = "tracing is disabled";
+  } else if (rate_ == 0) {
+    invalid_reason_ = "sample rate must be non-zero";
+  } else if (mode_ == TRACE_MODE_TRITON && file_->FileName().empty()) {
+    invalid_reason_ = "trace file name is not given";
+  }
+}
+
+TraceManager::TraceSetting::~TraceSetting()
+{
+  // If log frequency is set, should log the remaining traces to indexed file.
+  if (mode_ == TRACE_MODE_TRITON && sample_in_stream_ != 0) {
+    file_->SaveTraces(trace_stream_, (log_frequency_ != 0));
+  }
+}
+}}  // namespace triton::server
diff --git a/src/tracer.h b/src/tracer.h
new file mode 100644
index 0000000000..b7ef82ccf4
--- /dev/null
+++ b/src/tracer.h
@@ -0,0 +1,522 @@
+// Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#pragma once
+
+#include <atomic>
+#include <condition_variable>
+#include <fstream>
+#include <memory>
+#include <mutex>
+#include <set>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+#include <variant>
+
+#if !defined(_WIN32) && defined(TRITON_ENABLE_TRACING)
+#include "opentelemetry/context/propagation/global_propagator.h"
+#include "opentelemetry/exporters/otlp/otlp_http_exporter_factory.h"
+#include "opentelemetry/nostd/shared_ptr.h"
+#include "opentelemetry/sdk/resource/resource.h"
+#include "opentelemetry/sdk/trace/batch_span_processor_options.h"
+#include "opentelemetry/sdk/trace/processor.h"
+#include "opentelemetry/sdk/trace/tracer_provider_factory.h"
+#include "opentelemetry/trace/context.h"
+#include "opentelemetry/trace/propagation/http_trace_context.h"
+#include "opentelemetry/trace/provider.h"
+namespace otlp = opentelemetry::exporter::otlp;
+namespace otel_trace_sdk = opentelemetry::sdk::trace;
+namespace otel_trace_api = opentelemetry::trace;
+namespace otel_cntxt = opentelemetry::context;
+namespace otel_resource = opentelemetry::sdk::resource;
+#endif
+#include "triton/core/tritonserver.h"
+#define TRITONJSON_STATUSTYPE TRITONSERVER_Error*
+#define TRITONJSON_STATUSSUCCESS nullptr
+#define TRITONJSON_STATUSRETURN(M) \
+  return TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INTERNAL, (M).c_str())
+#include "triton/common/triton_json.h"
+
+namespace triton { namespace server {
+
+using TraceConfig = std::vector<
+    std::pair<std::string, std::variant<std::string, int, uint32_t>>>;
+// Key is trace mode,
+using TraceConfigMap = std::unordered_map<std::string, TraceConfig>;
+#if !defined(_WIN32) && defined(TRITON_ENABLE_TRACING)
+using AbstractCarrier = otel_cntxt::propagation::TextMapCarrier;
+#else
+using AbstractCarrier = void*;
+#endif
+
+// Common OTel span keys to store in OTel context
+// with the corresponding trace id.
+constexpr char kRootSpan[] = "root_span";
+constexpr char kRequestSpan[] = "request_span";
+constexpr char kComputeSpan[] = "compute_span";
+
+// OTel tracer name
+constexpr char kTritonTracer[] = "triton-server";
+
+/// Trace modes.
+typedef enum tracemode_enum {
+  /// Default is Triton tracing API
+  TRACE_MODE_TRITON = 0,
+  /// OpenTelemetry API for tracing
+  TRACE_MODE_OPENTELEMETRY = 1
+} InferenceTraceMode;
+
+//
+// Manager for tracing to a file.
+//
+class TraceManager {
+ private:
+  class TraceSetting;
+
+ public:
+  static constexpr int32_t MIN_TRACE_COUNT_VALUE{-1};
+  // The new field values for a setting, 'clear_xxx_' indicates
+  // whether to clear the previously specified filed value.
+  // If false, 'xxx_' will be used as the new field value.
+  // If 'xxx_' is nullptr, the field value will not be updated.
+  struct NewSetting {
+    NewSetting()
+        : clear_level_(false), level_(nullptr), clear_rate_(false),
+          rate_(nullptr), clear_count_(false), count_(nullptr),
+          clear_log_frequency_(false), log_frequency_(nullptr),
+          clear_filepath_(false), filepath_(nullptr), mode_(nullptr),
+          config_map_(nullptr)
+    {
+    }
+    bool clear_level_;
+    const TRITONSERVER_InferenceTraceLevel* level_;
+
+    bool clear_rate_;
+    const uint32_t* rate_;
+
+    bool clear_count_;
+    const int32_t* count_;
+
+    bool clear_log_frequency_;
+    const uint32_t* log_frequency_;
+
+    bool clear_filepath_;
+    const std::string* filepath_;
+
+    const InferenceTraceMode* mode_;
+
+    const TraceConfigMap* config_map_;
+  };
+
+  struct Trace;
+  // Create a trace manager that appends trace information
+  // to a specified file as global setting.
+  static TRITONSERVER_Error* Create(
+      TraceManager** manager, const TRITONSERVER_InferenceTraceLevel level,
+      const uint32_t rate, const int32_t count, const uint32_t log_frequency,
+      const std::string& filepath, const InferenceTraceMode mode,
+      const TraceConfigMap& config_map);
+
+  ~TraceManager() { CleanupTracer(); }
+
+  /// Options required at Trace initialization
+  struct TraceStartOptions {
+#if !defined(_WIN32) && defined(TRITON_ENABLE_TRACING)
+    otel_cntxt::Context propagated_context{otel_cntxt::Context{}};
+#else
+    void* propagated_context{nullptr};
+#endif
+    std::shared_ptr<TraceSetting> trace_setting{nullptr};
+    bool force_sample{false};
+  };
+
+  // Returns TraceStartOptions for specified model
+  TraceStartOptions GetTraceStartOptions(
+      AbstractCarrier& carriers, const std::string& model_name);
+
+  // Return a trace that should be used to collected trace activities
+  // for an inference request. Return nullptr if no tracing should occur.
+  std::shared_ptr<Trace> SampleTrace(const TraceStartOptions& start_options);
+
+  // Update global setting if 'model_name' is empty, otherwise, model setting is
+  // updated.
+  TRITONSERVER_Error* UpdateTraceSetting(
+      const std::string& model_name, const NewSetting& new_setting);
+
+  void GetTraceSetting(
+      const std::string& model_name, TRITONSERVER_InferenceTraceLevel* level,
+      uint32_t* rate, int32_t* count, uint32_t* log_frequency,
+      std::string* filepath, InferenceTraceMode* mode,
+      TraceConfigMap* config_map);
+
+  // Sets provided TraceSetting with correct trace settings for provided model.
+  void GetTraceSetting(
+      const std::string& model_name,
+      std::shared_ptr<TraceSetting>& trace_setting);
+
+  // Return the current timestamp.
+  static uint64_t CaptureTimestamp()
+  {
+    return std::chrono::duration_cast<std::chrono::nanoseconds>(
+               std::chrono::steady_clock::now().time_since_epoch())
+        .count();
+  }
+
+  static void TraceRelease(TRITONSERVER_InferenceTrace* trace, void* userp);
+
+  static const char* InferenceTraceModeString(InferenceTraceMode mode);
+
+  /// In OpenTelemetry trace mode initializes Opentelemetry exporter, processor,
+  /// and sets the global trace provider.
+  /// In Triton trace mode is a no-op.
+  ///
+  /// \param config_map A config map, which stores all parameters, specified
+  /// by user.
+  void InitTracer(const TraceConfigMap& config_map);
+
+  /// In OpenTelemetry trace mode cleans global tracer provider,
+  /// set by InitTracer.
+  /// In Triton trace mode is a no-op.
+  void CleanupTracer();
+#if !defined(_WIN32) && defined(TRITON_ENABLE_TRACING)
+  void ProcessOpenTelemetryParameters(
+      const triton::server::TraceConfigMap& config_map,
+      otlp::OtlpHttpExporterOptions& exporter_options,
+      otel_resource::ResourceAttributes& attributes,
+      otel_trace_sdk::BatchSpanProcessorOptions& processor_options);
+#endif
+
+  struct Trace {
+    Trace() : trace_(nullptr), trace_id_(0) {}
+    ~Trace();
+    std::shared_ptr<TraceSetting> setting_;
+    // Group the spawned traces by trace ID for better formatting
+    std::mutex mtx_;
+    std::unordered_map<uint64_t, std::unique_ptr<std::stringstream>> streams_;
+    // We use the set to track the number of spawned traces, so that
+    // when TraceManager::TraceRelease() with 'trace_userp_' is called
+    // we can safely release 'trace_userp_'
+    std::set<uint64_t> spawned_traces_tracker_;
+    // Triton trace object that this trace is assosicated with,
+    // 'Trace' object does not take ownership of 'trace_'. The caller of
+    // SampleTrace() must call TraceManager::TraceRelease() with 'trace_userp_'
+    // to properly release the resources if 'trace_' is not passed to a
+    // TRITONSERVER_ServerInferAsync() call.
+    TRITONSERVER_InferenceTrace* trace_;
+    void* trace_userp_;
+
+    uint64_t trace_id_;
+
+    // Capture a timestamp generated outside of triton and associate it
+    // with this trace.
+    void CaptureTimestamp(const std::string& name, uint64_t timestamp_ns);
+
+#if !defined(_WIN32) && defined(TRITON_ENABLE_TRACING)
+    /// Reports TRITONSERVER_InferenceTraceActivity as event to
+    /// the currently active span. If activity is an instance of
+    /// `TRITONSERVER_TRACE_REQUEST_START` or
+    /// `TRITONSERVER_TRACE_COMPUTE_START`,
+    /// it starts a new request or compute span. For the request span it
+    /// adds some triton related attributes, and adds this span to
+    /// `otel_context_`. Alternatively, if activity is
+    /// `TRITONSERVER_TRACE_REQUEST_END` or
+    /// `TRITONSERVER_TRACE_COMPUTE_END`, it ends the corresponding span.
+    ///
+    /// \param trace TRITONSERVER_InferenceTrace instance.
+    /// \param activity  Trace activity.
+    /// \param timestamp_ns Steady timestamp, which is used to calculate
+    /// OpenTelemetry SystemTimestamp to display span on a timeline, and
+    /// OpenTelemetry SteadyTimestamp to calculate the duration on the span
+    /// with better precision.
+    void ReportToOpenTelemetry(
+        TRITONSERVER_InferenceTrace* trace,
+        TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns);
+
+    /// Starts a span with the provided timestamp and name.
+    ///
+    /// \param display_name Span's name, which will be shown in the trace.
+    /// \param raw_timestamp_ns Steady timestamp, which is used to calculate
+    /// OpenTelemetry SystemTimestamp to display span on a timeline, and
+    /// OpenTelemetry SteadyTimestamp to calculate the duration on the span
+    /// with better precision.
+    /// \param parent_span_key A span key, to find a parent span in the
+    /// OpenTelemetry context. If empty, a root span will be started,
+    /// i.e. with no parent span specified.
+    /// \return A shared pointer to a newly created OpenTelemetry span.
+    opentelemetry::nostd::shared_ptr<otel_trace_api::Span> StartSpan(
+        std::string display_name, const uint64_t& raw_timestamp_ns,
+        std::string parent_span_key = "");
+
+    // OTel context to store spans, created in the current trace
+    opentelemetry::context::Context otel_context_;
+
+    /// Prepares trace context to propagate to TRITONSERVER_InferenceTrace.
+    /// Trace context follows W3C Trace Context specification.
+    /// Ref. https://www.w3.org/TR/trace-context/.
+    /// OpenTelemetry ref:
+    /// https://github.com/open-telemetry/opentelemetry-cpp/blob/4bd64c9a336fd438d6c4c9dad2e6b61b0585311f/api/include/opentelemetry/trace/propagation/http_trace_context.h#L94-L113
+    ///
+    /// \param span An OpenTelemetry span, which is used to extract
+    /// OpenTelemetry's trace_id and span_id.
+    /// \param buffer Buffer used when writing JSON representation of
+    /// OpenTelemetry's context.
+    void PrepareTraceContext(
+        opentelemetry::nostd::shared_ptr<otel_trace_api::Span> span,
+        triton::common::TritonJson::WriteBuffer* buffer);
+
+   private:
+    // OpenTelemetry SDK relies on system's clock for event timestamps.
+    // Triton Tracing records timestamps using steady_clock. This is a
+    // monotonic clock, i.e. time is always moving forward. It is not related
+    // to wall clock time (for example, it can be time since last reboot).
+    // `time_offset_` is recorded when the trace instance is created,
+    // and further used to calculate `opentelemetry::common::SystemTimestamp`
+    // as `time_offset_` + std::chrono:nanoseconds{temestamp_ns}. This way,
+    // every event recorded timestamp will receive a timestamp of
+    // <time when the trace started> + <nanoseconds passed since the start>
+    // FIXME: add steady clock timestamps to Triton OpenTelemetry SDK,
+    // when created
+    const std::chrono::time_point<std::chrono::system_clock> time_offset_ =
+        std::chrono::system_clock::now() -
+        std::chrono::duration_cast<std::chrono::nanoseconds>(
+            std::chrono::steady_clock::now().time_since_epoch());
+
+    /// Starts a compute or request span based on `activity`.
+    /// For request spans, it will add the following attributes to the span:
+    /// `model_name`, `model_version`, `trace_id`, `parent_id`.
+    ///
+    /// \param span_key Span's key to retrieve the corresponding span from the
+    /// OpenTelemetry context.
+    /// \param trace TRITONSERVER_InferenceTrace, used to request model's name,
+    /// version, trace parent_id from the backend.
+    /// \param activity Trace activity.
+    /// \param timestamp_ns Steady timestamp, which is used to calculate
+    /// OpenTelemetry SystemTimestamp to display span on a timeline, and
+    /// OpenTelemetry SteadyTimestamp to calculate the duration on the span
+    /// with better precision.
+    /// \param trace_id Trace id.
+    void StartSpan(
+        std::string span_key, TRITONSERVER_InferenceTrace* trace,
+        TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns,
+        uint64_t trace_id);
+
+    /// Ends the provided span.
+    ///
+    /// \param span_key Span's key to retrieve the corresponding span from the
+    /// OpenTelemetry context.
+    void EndSpan(std::string span_key);
+
+    /// Ends the provided span at specified steady timestamp.
+    ///
+    /// \param span_key Span's key to retrieve the corresponding span from the
+    /// OpenTelemetry context.
+    /// \param raw_timestamp_ns Steady timestamp to use as
+    /// `EndSpanOptions::end_steady_time`.
+    void EndSpan(std::string span_key, const uint64_t& raw_timestamp_ns);
+
+    /// Returns the span key, for which the activity belongs.
+    ///
+    /// \param activity reported activity.
+    /// \param trace_id Trace id.
+    /// \return A key to identify span, stored in the OpenTelemetry context.
+    std::string GetSpanKeyForActivity(
+        TRITONSERVER_InferenceTraceActivity activity, uint64_t trace_id);
+
+    /// Adds event to the span, which is retrieved from OpenTelemetry context
+    /// with the provided `span_key`. If activity is
+    /// TRITONSERVER_TRACE_REQUEST_START, or TRITONSERVER_TRACE_COMPUTE_START,
+    /// starts a new span and adds it to `otel_context_`.
+    ///
+    /// \param span_key Span's key to retrieve the corresponding span from the
+    /// OpenTelemetry context.
+    /// \param trace TRITONSERVER_InferenceTrace, used to request model's name,
+    /// version, trace parent_id from the backend.
+    /// \param activity Trace activity.
+    /// \param timestamp_ns Timestamp of the provided event.
+    /// \param id Trace id.
+    void AddEvent(
+        std::string span_key, TRITONSERVER_InferenceTrace* trace,
+        TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns,
+        uint64_t id);
+
+    /// Adds event to the OpenTelemetry span, retrieved from an OpenTelementry
+    /// context with the provided `span_key`.
+    ///
+    /// \param span_key Span's key to retrieve the corresponding span from the
+    /// OpenTelemetry context.
+    /// \param event An event to add to the span.
+    /// \param timestamp_ns Timestamp of the provided event.
+    void AddEvent(
+        std::string span_key, std::string event, uint64_t timestamp_ns);
+#endif
+  };
+
+ private:
+  TraceManager(
+      const TRITONSERVER_InferenceTraceLevel level, const uint32_t rate,
+      const int32_t count, const uint32_t log_frequency,
+      const std::string& filepath, const InferenceTraceMode mode,
+      const TraceConfigMap& config_map);
+
+  static void TraceActivity(
+      TRITONSERVER_InferenceTrace* trace,
+      TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns,
+      void* userp);
+
+  static void TraceTensorActivity(
+      TRITONSERVER_InferenceTrace* trace,
+      TRITONSERVER_InferenceTraceActivity activity, const char* name,
+      TRITONSERVER_DataType datatype, const void* base, size_t byte_size,
+      const int64_t* shape, uint64_t dim_count,
+      TRITONSERVER_MemoryType memory_type, int64_t memory_type_id, void* userp);
+
+  // Helper function for UpdateTraceSetting() as recursive update may be needed
+  // if global setting is being updated
+  TRITONSERVER_Error* UpdateTraceSettingInternal(
+      const std::string& model_name, const NewSetting& new_setting);
+
+  class TraceFile {
+   public:
+    TraceFile(const std::string& file_name)
+        : file_name_(file_name), index_(0), first_write_(true)
+    {
+    }
+    ~TraceFile();
+
+    // Save the traces stored in 'trace_stream' into the file. 'to_index_file'
+    // specifies whether the file name should be indexed, if true, the traces
+    // will be written to 'file_name.index' where index will be incremented
+    // every time the traces are written to a file with index. If false, the
+    // trace will be written to 'file_name'.
+    void SaveTraces(std::stringstream& trace_stream, const bool to_index_file);
+
+    const std::string& FileName() { return file_name_; }
+
+   private:
+    const std::string file_name_;
+    // The file index for the next index file write.
+    std::atomic<uint32_t> index_;
+
+    // Multiple traces may be finished and write to the trace file at the same
+    // time
+    std::mutex mu_;
+    std::ofstream trace_file_;
+    bool first_write_;
+  };
+
+  class TraceSetting {
+   public:
+    TraceSetting()
+        : level_(TRITONSERVER_TRACE_LEVEL_DISABLED), rate_(0), count_(-1),
+          log_frequency_(0), mode_(TRACE_MODE_TRITON), level_specified_(false),
+          rate_specified_(false), count_specified_(false),
+          log_frequency_specified_(false), filepath_specified_(false),
+          mode_specified_(false), config_map_specified_(false), sample_(0),
+          created_(0), collected_(0), sample_in_stream_(0)
+    {
+      invalid_reason_ = "Setting hasn't been initialized";
+    }
+    TraceSetting(
+        const TRITONSERVER_InferenceTraceLevel level, const uint32_t rate,
+        const int32_t count, const uint32_t log_frequency,
+        const std::shared_ptr<TraceFile>& file, const InferenceTraceMode mode,
+        const TraceConfigMap& config_map, const bool level_specified,
+        const bool rate_specified, const bool count_specified,
+        const bool log_frequency_specified, const bool filepath_specified,
+        const bool mode_specified, const bool config_map_specified);
+
+    ~TraceSetting();
+
+    bool Valid() { return invalid_reason_.empty() && (count_ != 0); }
+    const std::string& Reason() { return invalid_reason_; }
+
+    void WriteTrace(
+        const std::unordered_map<uint64_t, std::unique_ptr<std::stringstream>>&
+            streams);
+
+    // Pass `force_sample` = true, when trace needs to be initiated
+    // no matter what `rate` and `count` is.
+    // For example, in OpenTelemetry tracing mode, we always initiate tracing
+    // when OpenTelemetry context was propagated from client.
+    std::shared_ptr<Trace> SampleTrace(bool force_sample = false);
+
+    const TRITONSERVER_InferenceTraceLevel level_;
+    const uint32_t rate_;
+    int32_t count_;
+    const uint32_t log_frequency_;
+    const std::shared_ptr<TraceFile> file_;
+    const InferenceTraceMode mode_;
+    const TraceConfigMap config_map_;
+
+    // Whether the field value is specified or mirror from upper level setting
+    const bool level_specified_;
+    const bool rate_specified_;
+    const bool count_specified_;
+    const bool log_frequency_specified_;
+    const bool filepath_specified_;
+    const bool mode_specified_;
+    const bool config_map_specified_;
+
+   private:
+    std::string invalid_reason_;
+
+    std::mutex mu_;
+
+    // use to sample a trace based on sampling rate.
+    uint64_t sample_;
+
+    // use to track the status of trace count feature
+    uint64_t created_;
+    uint64_t collected_;
+
+    // Tracking traces that haven't been saved to file
+    uint32_t sample_in_stream_;
+    std::stringstream trace_stream_;
+  };
+
+  // Trace settings
+  // Note that 'global_default_' doesn't use for actual trace sampling,
+  // it is used to revert the field values when clearing fields in
+  // 'global_setting_'
+  std::unique_ptr<TraceSetting> global_default_;
+  std::shared_ptr<TraceSetting> global_setting_;
+  std::unordered_map<std::string, std::shared_ptr<TraceSetting>>
+      model_settings_;
+  // The collection of models that have their own trace setting while
+  // some of the fields are mirroring global setting.
+  std::set<std::string> fallback_used_models_;
+
+  // The collection of files that are used in trace settings, use to
+  // avoid creating duplicate TraceFile objects for the same file path.
+  std::unordered_map<std::string, std::weak_ptr<TraceFile>> trace_files_;
+
+  // lock for accessing trace setting. 'w_mu_' for write and
+  // 'r_mu_' for read / write
+  std::mutex w_mu_;
+  std::mutex r_mu_;
+};
+
+}}  // namespace triton::server
diff --git a/src/triton_signal.cc b/src/triton_signal.cc
new file mode 100644
index 0000000000..1a02144d2a
--- /dev/null
+++ b/src/triton_signal.cc
@@ -0,0 +1,145 @@
+// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "triton_signal.h"
+
+#include <iostream>
+
+#ifdef _WIN32
+#include <windows.h>
+#else
+#include <csignal>
+#endif
+
+#define BOOST_STACKTRACE_USE_ADDR2LINE
+#include <boost/stacktrace.hpp>
+
+namespace triton { namespace server {
+
+// Exit mutex and cv used to signal the main thread that it should
+// close the server and exit.
+bool signal_exiting_ = false;
+std::mutex signal_exit_mu_;
+std::condition_variable signal_exit_cv_;
+
+namespace {
+
+void
+CommonSignalHandler()
+{
+  {
+    std::unique_lock<std::mutex> lock(signal_exit_mu_);
+
+    // Do nothing if already exiting...
+    if (signal_exiting_)
+      return;
+
+    signal_exiting_ = true;
+  }
+
+  signal_exit_cv_.notify_all();
+}
+
+}  // namespace
+
+#ifdef _WIN32
+
+// Windows
+
+BOOL WINAPI
+CtrlHandler(DWORD fdwCtrlType)
+{
+  switch (fdwCtrlType) {
+      // Handle these events...
+    case CTRL_C_EVENT:
+    case CTRL_CLOSE_EVENT:
+    case CTRL_BREAK_EVENT:
+    case CTRL_LOGOFF_EVENT:
+    case CTRL_SHUTDOWN_EVENT:
+      break;
+
+    default:
+      return FALSE;
+  }
+
+  CommonSignalHandler();
+  return TRUE;
+}
+
+TRITONSERVER_Error*
+RegisterSignalHandler()
+{
+  if (!SetConsoleCtrlHandler(CtrlHandler, TRUE)) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INTERNAL, "SetConsoleCtrlHandler failed");
+  }
+
+  return nullptr;  // success
+}
+
+#else
+
+namespace {
+
+// Non-Windows
+
+void
+SignalHandler(int signum)
+{
+  std::cout << "Signal (" << signum << ") received." << std::endl;
+  CommonSignalHandler();
+}
+
+void
+ErrorSignalHandler(int signum)
+{
+  std::cerr << "Signal (" << signum << ") received." << std::endl;
+  std::cerr << boost::stacktrace::stacktrace() << std::endl;
+
+  // Trigger the core dump
+  signal(signum, SIG_DFL);
+  raise(signum);
+}
+
+}  // namespace
+
+TRITONSERVER_Error*
+RegisterSignalHandler()
+{
+  // Trap SIGINT and SIGTERM to allow server to exit gracefully
+  signal(SIGINT, SignalHandler);
+  signal(SIGTERM, SignalHandler);
+
+  // Trap SIGSEGV and SIGABRT to exit when server crashes
+  signal(SIGSEGV, ErrorSignalHandler);
+  signal(SIGABRT, ErrorSignalHandler);
+
+  return nullptr;  // success
+}
+
+#endif
+
+}}  // namespace triton::server
diff --git a/src/triton_signal.h b/src/triton_signal.h
new file mode 100644
index 0000000000..d5aefbf0bf
--- /dev/null
+++ b/src/triton_signal.h
@@ -0,0 +1,44 @@
+// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#pragma once
+
+#include <condition_variable>
+#include <mutex>
+
+#include "triton/core/tritonserver.h"
+
+namespace triton { namespace server {
+
+// Exit mutex and cv used to signal the main thread that it should
+// close the server and exit.
+extern bool signal_exiting_;
+extern std::mutex signal_exit_mu_;
+extern std::condition_variable signal_exit_cv_;
+
+// Register signal handler. Return true if success, false if failure.
+TRITONSERVER_Error* RegisterSignalHandler();
+
+}}  // namespace triton::server
diff --git a/src/vertex_ai_server.cc b/src/vertex_ai_server.cc
new file mode 100644
index 0000000000..a7792d9c1b
--- /dev/null
+++ b/src/vertex_ai_server.cc
@@ -0,0 +1,355 @@
+// Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#include "vertex_ai_server.h"
+
+#include <memory>
+
+#include "common.h"
+
+namespace triton { namespace server {
+
+const std::string VertexAiAPIServer::binary_mime_type_(
+    "application/vnd.vertex-ai-triton.binary+json;json-header-size=");
+const std::string VertexAiAPIServer::redirect_header_(
+    "X-Vertex-Ai-Triton-Redirect");
+
+VertexAiAPIServer::VertexAiAPIServer(
+    const std::shared_ptr<TRITONSERVER_Server>& server,
+    triton::server::TraceManager* trace_manager,
+    const std::shared_ptr<SharedMemoryManager>& shm_manager, const int32_t port,
+    const std::string address, const int thread_cnt,
+    const std::string& prediction_route, const std::string& health_route,
+    const std::string& default_model_name)
+    : HTTPAPIServer(
+          server, trace_manager, shm_manager, port, false /* reuse_port */,
+          address, "" /* header_forward_pattern */, thread_cnt),
+      prediction_regex_(prediction_route), health_regex_(health_route),
+      health_mode_("ready"), model_name_(default_model_name),
+      model_version_str_("")
+{
+}
+
+TRITONSERVER_Error*
+VertexAiAPIServer::GetInferenceHeaderLength(
+    evhtp_request_t* req, int32_t content_length, size_t* header_length)
+{
+  // Check mime type and set inference header length.
+  // Set to content length in case that it is not specified
+  *header_length = content_length;
+  const char* content_type_c_str =
+      evhtp_kv_find(req->headers_in, kContentTypeHeader);
+  if (content_type_c_str != NULL) {
+    std::string content_type(content_type_c_str);
+    size_t pos = content_type.find(binary_mime_type_);
+    if (pos != std::string::npos) {
+      if (pos != 0) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG,
+            (std::string("expect MIME type for binary data starts with '") +
+             binary_mime_type_ + "', got: " + content_type)
+                .c_str());
+      }
+
+      // Parse
+      int32_t parsed_value;
+      try {
+        parsed_value =
+            std::atoi(content_type_c_str + binary_mime_type_.length());
+      }
+      catch (const std::invalid_argument& ia) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG,
+            (std::string("Unable to parse inference header size, got: ") +
+             (content_type_c_str + binary_mime_type_.length()))
+                .c_str());
+      }
+
+      // Check if the content length is in proper range
+      if ((parsed_value < 0) || (parsed_value > content_length)) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG,
+            (std::string("inference header size should be in range (0, ") +
+             std::to_string(content_length) +
+             "), got: " + (content_type_c_str + binary_mime_type_.length()))
+                .c_str());
+      }
+      *header_length = parsed_value;
+    }
+  }
+  return nullptr;
+}
+
+void
+VertexAiAPIServer::Handle(evhtp_request_t* req)
+{
+  LOG_VERBOSE(1) << "Vertex AI request: " << req->method << " "
+                 << req->uri->path->full;
+
+  if (RE2::FullMatch(std::string(req->uri->path->full), health_regex_)) {
+    HandleServerHealth(req, health_mode_);
+    return;
+  }
+
+  if (RE2::FullMatch(std::string(req->uri->path->full), prediction_regex_)) {
+    // Secondary regex matching if redirection is requested
+    const char* redirect_c_str =
+        evhtp_kv_find(req->headers_in, redirect_header_.c_str());
+    if (redirect_c_str == nullptr) {
+      // Infer the default model
+      HandleInfer(req, model_name_, model_version_str_);
+      return;
+    } else {
+      // Endpoint redirection is requested
+      // Prepend the header value with "/" to form the regex expected by
+      // Triton endpoints
+      std::string redirect_endpoint("/");
+      redirect_endpoint += redirect_c_str;
+      LOG_VERBOSE(1) << "Redirecting Vertex AI request: " << redirect_endpoint;
+
+      // The endpoint handlers in base class expects specific HTTP methods
+      // while the Vertex AI endpoint only accepts "POST", so the method will
+      // be set to endpoint expected one before invoking the handlers
+      if (req->method != htp_method_POST) {
+        evhtp_send_reply(req, EVHTP_RES_METHNALLOWED);
+        return;
+      }
+
+      if (redirect_endpoint == "/metrics") {
+        req->method = htp_method_GET;
+        HandleMetrics(req);
+        return;
+      }
+
+      if (redirect_endpoint == "/v2/models/stats") {
+        // model statistics
+        req->method = htp_method_GET;
+        HandleModelStats(req);
+        return;
+      }
+
+      std::string model_name, version, kind;
+      if (RE2::FullMatch(
+              redirect_endpoint, model_regex_, &model_name, &version, &kind)) {
+        if (kind == "ready") {
+          // model ready
+          req->method = htp_method_GET;
+          HandleModelReady(req, model_name, version);
+          return;
+        } else if (kind == "infer") {
+          // model infer
+          HandleInfer(req, model_name, version);
+          return;
+        } else if (kind == "config") {
+          // model configuration
+          req->method = htp_method_GET;
+          HandleModelConfig(req, model_name, version);
+          return;
+        } else if (kind == "stats") {
+          // model statistics
+          req->method = htp_method_GET;
+          HandleModelStats(req, model_name, version);
+          return;
+        } else if (kind == "") {
+          // model metadata
+          req->method = htp_method_GET;
+          HandleModelMetadata(req, model_name, version);
+          return;
+        }
+      }
+
+      std::string region, action, rest, repo_name;
+      if (redirect_endpoint == "/v2") {
+        // server metadata
+        req->method = htp_method_GET;
+        HandleServerMetadata(req);
+        return;
+      } else if (RE2::FullMatch(redirect_endpoint, server_regex_, &rest)) {
+        // server health
+        req->method = htp_method_GET;
+        HandleServerHealth(req, rest);
+        return;
+      } else if (RE2::FullMatch(
+                     redirect_endpoint, systemsharedmemory_regex_, &region,
+                     &action)) {
+        // system shared memory
+        if (action == "status") {
+          req->method = htp_method_GET;
+        }
+        HandleSystemSharedMemory(req, region, action);
+        return;
+      } else if (RE2::FullMatch(
+                     redirect_endpoint, cudasharedmemory_regex_, &region,
+                     &action)) {
+        // cuda shared memory
+        if (action == "status") {
+          req->method = htp_method_GET;
+        }
+        HandleCudaSharedMemory(req, region, action);
+        return;
+      } else if (RE2::FullMatch(
+                     redirect_endpoint, modelcontrol_regex_, &repo_name, &kind,
+                     &model_name, &action)) {
+        // model repository
+        if (kind == "index") {
+          HandleRepositoryIndex(req, repo_name);
+          return;
+        } else if (kind.find("models", 0) == 0) {
+          HandleRepositoryControl(req, repo_name, model_name, action);
+          return;
+        }
+      }
+    }
+  }
+
+  LOG_VERBOSE(1) << "Vertex AI error: " << req->method << " "
+                 << req->uri->path->full << " - "
+                 << static_cast<int>(EVHTP_RES_BADREQ);
+
+  evhtp_send_reply(req, EVHTP_RES_BADREQ);
+}
+
+void
+VertexAiAPIServer::HandleMetrics(evhtp_request_t* req)
+{
+  // Mirror of HTTPMetricsServer::Handle()
+  if (req->method != htp_method_GET) {
+    evhtp_send_reply(req, EVHTP_RES_METHNALLOWED);
+    return;
+  }
+
+  evhtp_res res = EVHTP_RES_BADREQ;
+
+  // Call to metric endpoint should not have any trailing string
+  TRITONSERVER_Metrics* metrics = nullptr;
+  TRITONSERVER_Error* err = TRITONSERVER_ServerMetrics(server_.get(), &metrics);
+  if (err == nullptr) {
+    const char* base;
+    size_t byte_size;
+    err = TRITONSERVER_MetricsFormatted(
+        metrics, TRITONSERVER_METRIC_PROMETHEUS, &base, &byte_size);
+    if (err == nullptr) {
+      res = EVHTP_RES_OK;
+      evbuffer_add(req->buffer_out, base, byte_size);
+    }
+  }
+
+  TRITONSERVER_MetricsDelete(metrics);
+  TRITONSERVER_ErrorDelete(err);
+
+  evhtp_send_reply(req, res);
+}
+
+
+TRITONSERVER_Error*
+VertexAiAPIServer::Create(
+    const std::shared_ptr<TRITONSERVER_Server>& server,
+    triton::server::TraceManager* trace_manager,
+    const std::shared_ptr<SharedMemoryManager>& shm_manager, const int32_t port,
+    const std::string address, const int thread_cnt,
+    std::string default_model_name, std::unique_ptr<HTTPServer>* http_server)
+{
+  auto predict_route = GetEnvironmentVariableOrDefault("AIP_PREDICT_ROUTE", "");
+  auto health_route = GetEnvironmentVariableOrDefault("AIP_HEALTH_ROUTE", "");
+  if (predict_route.empty())
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INVALID_ARG,
+        "API_PREDICT_ROUTE is not defined for Vertex AI endpoint");
+  else if (health_route.empty()) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INVALID_ARG,
+        "AIP_HEALTH_ROUTE is not defined for Vertex AI endpoint");
+  }
+
+  // Set default model
+  {
+    TRITONSERVER_Message* model_index_message = nullptr;
+    RETURN_IF_ERR(TRITONSERVER_ServerModelIndex(
+        server.get(), TRITONSERVER_INDEX_FLAG_READY, &model_index_message));
+
+    // avoid memory leak when return early
+    std::shared_ptr<TRITONSERVER_Message> managed_msg(
+        model_index_message,
+        [](TRITONSERVER_Message* msg) { TRITONSERVER_MessageDelete(msg); });
+
+    const char* buffer;
+    size_t byte_size;
+    RETURN_IF_ERR(TRITONSERVER_MessageSerializeToJson(
+        model_index_message, &buffer, &byte_size));
+
+    triton::common::TritonJson::Value model_index_json;
+    RETURN_IF_ERR(model_index_json.Parse(buffer, byte_size));
+
+    if (default_model_name.empty()) {
+      if (model_index_json.ArraySize() != 1) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG,
+            "Expect the model repository contains only a single model if "
+            "default model is not specified");
+      }
+
+      triton::common::TritonJson::Value index_json;
+      RETURN_IF_ERR(model_index_json.IndexAsObject(0, &index_json));
+      const char* name;
+      size_t namelen;
+      RETURN_IF_ERR(index_json.MemberAsString("name", &name, &namelen));
+      default_model_name = std::string(name, namelen);
+    }
+    // Check if default model is loaded
+    else {
+      bool found = false;
+      for (size_t idx = 0; idx < model_index_json.ArraySize(); ++idx) {
+        triton::common::TritonJson::Value index_json;
+        RETURN_IF_ERR(model_index_json.IndexAsObject(idx, &index_json));
+
+        const char* name;
+        size_t namelen;
+        RETURN_IF_ERR(index_json.MemberAsString("name", &name, &namelen));
+        if (default_model_name == std::string(name, namelen)) {
+          found = true;
+          break;
+        }
+      }
+      if (!found) {
+        return TRITONSERVER_ErrorNew(
+            TRITONSERVER_ERROR_INVALID_ARG,
+            (std::string("Expect the default model '") + default_model_name +
+             "' is loaded")
+                .c_str());
+      }
+    }
+  }
+
+  http_server->reset(new VertexAiAPIServer(
+      server, trace_manager, shm_manager, port, address, thread_cnt,
+      predict_route, health_route, default_model_name));
+
+  const std::string addr = address + ":" + std::to_string(port);
+  LOG_INFO << "Started Vertex AI HTTPService at " << addr;
+
+  return nullptr;
+}
+
+}}  // namespace triton::server
diff --git a/src/vertex_ai_server.h b/src/vertex_ai_server.h
new file mode 100644
index 0000000000..13fb62675a
--- /dev/null
+++ b/src/vertex_ai_server.h
@@ -0,0 +1,82 @@
+// Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#pragma once
+
+#include "http_server.h"
+
+namespace triton { namespace server {
+
+// Handle Vertex HTTP requests to inference server APIs
+class VertexAiAPIServer : public HTTPAPIServer {
+ public:
+  static TRITONSERVER_Error* Create(
+      const std::shared_ptr<TRITONSERVER_Server>& server,
+      triton::server::TraceManager* trace_manager,
+      const std::shared_ptr<SharedMemoryManager>& smb_manager,
+      const int32_t port, const std::string address, const int thread_cnt,
+      std::string default_model_name,
+      std::unique_ptr<HTTPServer>* vertex_ai_server);
+
+ private:
+  explicit VertexAiAPIServer(
+      const std::shared_ptr<TRITONSERVER_Server>& server,
+      triton::server::TraceManager* trace_manager,
+      const std::shared_ptr<SharedMemoryManager>& shm_manager,
+      const int32_t port, const std::string address, const int thread_cnt,
+      const std::string& prediction_route, const std::string& health_route,
+      const std::string& default_model_name);
+
+  void Handle(evhtp_request_t* req) override;
+
+  void HandleMetrics(evhtp_request_t* req);
+
+  TRITONSERVER_Error* GetInferenceHeaderLength(
+      evhtp_request_t* req, int32_t content_length,
+      size_t* header_length) override;
+
+  // Currently the compression schema hasn't been defined,
+  // assume identity compression type is used for both request and response
+  DataCompressor::Type GetRequestCompressionType(evhtp_request_t* req) override
+  {
+    return DataCompressor::Type::IDENTITY;
+  }
+  DataCompressor::Type GetResponseCompressionType(evhtp_request_t* req) override
+  {
+    return DataCompressor::Type::IDENTITY;
+  }
+  re2::RE2 prediction_regex_;
+  re2::RE2 health_regex_;
+  const std::string health_mode_;
+
+  // For default model, assume that only one version of "model" is presented
+  const std::string model_name_;
+  const std::string model_version_str_;
+
+  static const std::string binary_mime_type_;
+  static const std::string redirect_header_;
+};
+
+}}  // namespace triton::server
diff --git a/tools/bazel.rc b/tools/bazel.rc
deleted file mode 100644
index ecd47fc5af..0000000000
--- a/tools/bazel.rc
+++ /dev/null
@@ -1,31 +0,0 @@
-# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain
-build:cuda --define=using_cuda=true --define=using_cuda_nvcc=true
-
-# needed for TFS GRPC (see tensorflow_serving/workspace.bzl)
-build --define=grpc_no_ares=true
diff --git a/tools/patch/caffe2/checksums b/tools/patch/caffe2/checksums
deleted file mode 100644
index a466beb679..0000000000
--- a/tools/patch/caffe2/checksums
+++ /dev/null
@@ -1,3 +0,0 @@
-8e144fb17600ff378a059e8ecfe1a09247837f13  /opt/pytorch/pytorch/caffe2/core/logging.cc
-8ff6e47a66cda4946435f026ff35494924456ad3  /opt/pytorch/pytorch/caffe2/core/logging_is_not_google_glog.h
-42f754888113925f860d9b8760a0e44828b921ea  /opt/pytorch/pytorch/caffe2/core/context_gpu.cu
diff --git a/tools/patch/caffe2/core/context_gpu.cu b/tools/patch/caffe2/core/context_gpu.cu
deleted file mode 100644
index 5f4dc29dc8..0000000000
--- a/tools/patch/caffe2/core/context_gpu.cu
+++ /dev/null
@@ -1,17 +0,0 @@
---- pytorch/caffe2/core/context_gpu.cu	2018-11-02 12:29:48.223056985 -0700
-+++ ../tensorrtserver/tools/patch/caffe2/core/context_gpu.cu	2018-10-12 12:44:37.280572118 -0700
-@@ -152,7 +152,13 @@
-         // Note: just for future reference, the 0 here is not a gpu id, it is
-         // a reserved flag for cudaDeviceEnablePeerAccess that should always be
-         // zero currently.
--        CUDA_ENFORCE(cudaDeviceEnablePeerAccess(j, 0));
-+        // It is ok if peer access is already enabled...
-+        cudaError_t err = cudaDeviceEnablePeerAccess(j, 0);
-+        if ((err != cudaErrorPeerAccessAlreadyEnabled) &&
-+            (err != cudaSuccess)) {
-+          CAFFE_THROW(cudaGetErrorString(err));
-+        }
-+        cudaGetLastError(); // reset cuda error code
-       }
-     }
-   }
diff --git a/tools/patch/caffe2/core/logging.cc b/tools/patch/caffe2/core/logging.cc
deleted file mode 100644
index 5abdcebe65..0000000000
--- a/tools/patch/caffe2/core/logging.cc
+++ /dev/null
@@ -1,49 +0,0 @@
---- pytorch/caffe2/core/logging.cc	2018-11-02 12:29:48.223056985 -0700
-+++ ../tensorrtserver/tools/patch/caffe2/core/logging.cc	2018-10-12 16:39:38.071855234 -0700
-@@ -180,8 +180,25 @@
-   FLAGS_caffe2_log_level = INFO;
- }
-
-+// NVIDIA
-+//
-+// Update to deliver the message via tensorrtserver logger. The
-+// extern declaration must be kept in sync with that defined in
-+// tensorrtserver/src/core/logging.h.
-+//
-+#define LOG_DELEGATED_ERROR_LEVEL 0
-+#define LOG_DELEGATED_WARNING_LEVEL 1
-+#define LOG_DELEGATED_INFO_LEVEL 2
-+extern "C" void DelegatedLogMessage(
-+  int level, const char* file, int line, const std::string& msg) __attribute__((weak));
-+
- MessageLogger::MessageLogger(const char *file, int line, int severity)
--  : severity_(severity) {
-+  : file_(file), line_(line), severity_(severity) {
-+
-+  if (DelegatedLogMessage) {
-+    return;
-+  }
-+
-   if (severity_ < FLAGS_caffe2_log_level) {
-     // Nothing needs to be logged.
-     return;
-@@ -212,6 +229,19 @@
-
- // Output the contents of the stream to the proper channel on destruction.
- MessageLogger::~MessageLogger() {
-+
-+  int level = LOG_DELEGATED_ERROR_LEVEL;
-+  if (severity_ == INFO) {
-+    level = LOG_DELEGATED_INFO_LEVEL;
-+  } else if (severity_ == WARNING) {
-+    level = LOG_DELEGATED_WARNING_LEVEL;
-+  }
-+
-+  if (DelegatedLogMessage) {
-+    DelegatedLogMessage(level, file_, line_, stream_.str());
-+    return;
-+  }
-+
-   if (severity_ < FLAGS_caffe2_log_level) {
-     // Nothing needs to be logged.
-     return;
diff --git a/tools/patch/caffe2/core/logging_is_not_google_glog.h b/tools/patch/caffe2/core/logging_is_not_google_glog.h
deleted file mode 100644
index 1f0900db8a..0000000000
--- a/tools/patch/caffe2/core/logging_is_not_google_glog.h
+++ /dev/null
@@ -1,55 +0,0 @@
---- pytorch/caffe2/core/logging_is_not_google_glog.h	2018-11-02 12:29:48.223056985 -0700
-+++ ../tensorrtserver/tools/patch/caffe2/core/logging_is_not_google_glog.h	2018-10-12 16:39:38.071855234 -0700
-@@ -40,6 +40,8 @@
- 
-   const char* tag_;
-   std::stringstream stream_;
-+  const char* file_;
-+  const int line_;
-   int severity_;
- };
- 
-@@ -91,14 +93,40 @@
- #define LOG(n) \
-   if (n >= CAFFE2_LOG_THRESHOLD) \
-     ::caffe2::MessageLogger((char*)__FILE__, __LINE__, n).stream()
--#define VLOG(n) LOG((-n))
- 
- #define LOG_IF(n, condition)                    \
-   if (n >= CAFFE2_LOG_THRESHOLD && (condition)) \
-   ::caffe2::MessageLogger((char*)__FILE__, __LINE__, n).stream()
--#define VLOG_IF(n, condition) LOG_IF((-n), (condition))
- 
--#define VLOG_IS_ON(verboselevel) (CAFFE2_LOG_THRESHOLD <= -(verboselevel))
-+namespace {
-+// NVIDIA
-+//
-+// Get verbose logging level via inference-server logger. The extern
-+// declaration must be kept in sync with that defined in
-+// tensorrtserver/src/core/logging.h.
-+//
-+extern "C" uint32_t DelegatedVerboseLogLevel() __attribute__((weak));
-+static inline int
-+NVDelegatedVerboseLogLevel()
-+{
-+  if (DelegatedVerboseLogLevel) {
-+    int lvl = DelegatedVerboseLogLevel();
-+    // Reduce the inference-server verbose-log level by 1 for C2. That
-+    // is, inference-server verbose level 2 is C2 vlog level 1.
-+    return (lvl <= 0) ? 0 : (lvl - 1);
-+  }
-+
-+  return 0;
-+}
-+} // namespace
-+
-+#define VLOG_IS_ON(n) ((n) <= NVDelegatedVerboseLogLevel())
-+#define VLOG(n)                                                       \
-+  if (VLOG_IS_ON(n))                                                  \
-+    ::caffe2::MessageLogger((char*)__FILE__, __LINE__, INFO).stream()
-+#define VLOG_IF(n, condition)                                         \
-+  if (VLOG_IS_ON(n) && (condition))                                   \
-+    ::caffe2::MessageLogger((char*)__FILE__, __LINE__, INFO).stream()
- 
- // Log only if condition is met.  Otherwise evaluates to void.
- #define FATAL_IF(condition) \
diff --git a/tools/patch/tensorflow/cc/saved_model/loader.cc b/tools/patch/tensorflow/cc/saved_model/loader.cc
deleted file mode 100644
index 2dd477f1fb..0000000000
--- a/tools/patch/tensorflow/cc/saved_model/loader.cc
+++ /dev/null
@@ -1,32 +0,0 @@
---- tensorflow/cc/saved_model/loader.cc	2018-10-19 10:25:04.454294410 -0700
-+++ ../tensorrtserver/tools/patch/tensorflow/cc/saved_model/loader.cc	2018-11-02 11:47:57.254526547 -0700
-@@ -19,6 +19,7 @@
- 
- #include "tensorflow/cc/saved_model/constants.h"
- #include "tensorflow/cc/saved_model/reader.h"
-+#include "tensorflow/core/graph/default_device.h"
- #include "tensorflow/core/lib/io/path.h"
- #include "tensorflow/core/lib/monitoring/counter.h"
- #include "tensorflow/core/lib/strings/str_util.h"
-@@ -216,8 +217,20 @@
-   TF_RETURN_IF_ERROR(ReadMetaGraphDefFromSavedModel(export_dir, tags,
-                                                     &bundle->meta_graph_def));
- 
-+  // If visible_device_list starts with a '/' then it is being used to
-+  // communicate the CPU/GPU that the graph runs on. This isn't
-+  // foolproof since individual operations in the graph could specify
-+  // a specific run location. [DLIS-43]
-+  SessionOptions lsession_options = session_options;
-+  const std::string& vdl =
-+    lsession_options.config.gpu_options().visible_device_list();
-+  if (!vdl.empty() && (vdl[0] == '/')) {
-+    graph::SetDefaultDevice(vdl, bundle->meta_graph_def.mutable_graph_def());
-+    lsession_options.config.mutable_gpu_options()->clear_visible_device_list();
-+  }
-+
-   TF_RETURN_IF_ERROR(LoadMetaGraphIntoSession(
--      bundle->meta_graph_def, session_options, &bundle->session));
-+      bundle->meta_graph_def, lsession_options, &bundle->session));
- 
-   std::vector<AssetFileDef> asset_file_defs;
-   TF_RETURN_IF_ERROR(
diff --git a/tools/patch/tensorflow/checksums b/tools/patch/tensorflow/checksums
deleted file mode 100644
index 286fab19de..0000000000
--- a/tools/patch/tensorflow/checksums
+++ /dev/null
@@ -1,2 +0,0 @@
-327d4aeafda1aa0e9e17007eb67a16bb31715a9c  /opt/tensorflow/tensorflow/cc/saved_model/loader.cc
-c1d704be26629357e5d5eeb9fc07829ca345b778  /opt/tensorflow/tensorflow/core/platform/default/logging.cc
diff --git a/tools/patch/tensorflow/core/platform/default/logging.cc b/tools/patch/tensorflow/core/platform/default/logging.cc
deleted file mode 100644
index 3ca5d3b3ec..0000000000
--- a/tools/patch/tensorflow/core/platform/default/logging.cc
+++ /dev/null
@@ -1,57 +0,0 @@
---- tensorflow/core/platform/default/logging.cc	2018-07-13 12:53:35.159015218 -0700
-+++ ../tensorrtserver/tools/patch/tensorflow/core/platform/default/logging.cc	2018-10-12 16:39:38.071855234 -0700
-@@ -76,7 +76,31 @@
- 
- #else
- 
-+// NVIDIA
-+//
-+// Update to deliver the message via tensorrtserver logger. The
-+// extern declaration must be kept in sync with that defined in
-+// tensorrtserver/src/core/logging.h.
-+//
-+#define LOG_DELEGATED_ERROR_LEVEL 0
-+#define LOG_DELEGATED_WARNING_LEVEL 1
-+#define LOG_DELEGATED_INFO_LEVEL 2
-+extern "C" void DelegatedLogMessage(
-+  int level, const char* file, int line, const std::string& msg) __attribute__((weak));
-+
- void LogMessage::GenerateLogMessage() {
-+  int level = LOG_DELEGATED_ERROR_LEVEL;
-+  if (severity_ == INFO) {
-+    level = LOG_DELEGATED_INFO_LEVEL;
-+  } else if (severity_ == WARNING) {
-+    level = LOG_DELEGATED_WARNING_LEVEL;
-+  }
-+
-+  if (DelegatedLogMessage) {
-+    DelegatedLogMessage(level, fname_, line_, str());
-+    return;
-+  }
-+
-   static EnvTime* env_time = tensorflow::EnvTime::Default();
-   uint64 now_micros = env_time->NowMicros();
-   time_t now_seconds = static_cast<time_t>(now_micros / 1000000);
-@@ -121,7 +145,22 @@
-   return LogLevelStrToInt(tf_env_var_val);
- }
- 
-+// NVIDIA
-+//
-+// Update to get verbose logging level via inference-server
-+// logger. The extern declaration must be kept in sync with that
-+// defined in tensorrtserver/src/core/logging.h.
-+//
-+extern "C" uint32_t DelegatedVerboseLogLevel() __attribute__((weak));
-+
- int64 MinVLogLevelFromEnv() {
-+  if (DelegatedVerboseLogLevel) {
-+    int64 lvl = DelegatedVerboseLogLevel();
-+    // Reduce the inference-server verbose-log level by 1 for TF. That
-+    // is, inference-server verbose level 2 is TF vlog level 1.
-+    return (lvl <= 0) ? 0 : (lvl - 1);
-+  }
-+
-   const char* tf_env_var_val = getenv("TF_CPP_MIN_VLOG_LEVEL");
-   return LogLevelStrToInt(tf_env_var_val);
- }
diff --git a/tools/patch/tfs/checksums b/tools/patch/tfs/checksums
deleted file mode 100644
index 69b2b04051..0000000000
--- a/tools/patch/tfs/checksums
+++ /dev/null
@@ -1,11 +0,0 @@
-b8b8c4a0d09513815d6531733c401a97bdbe8112  /workspace/serving/tensorflow_serving/model_servers/server_core.cc
-a4a19d5058e08441bce56444fa09e6b96fe0ab16  /workspace/serving/tensorflow_serving/sources/storage_path/file_system_storage_path_source.cc
-3671863adc5454626dc3de0b94948def0c9fa1e1  /workspace/serving/tensorflow_serving/sources/storage_path/file_system_storage_path_source.h
-4b3aab50d2bd6113c2e52e87fdd938dcfa828dd4  /workspace/serving/tensorflow_serving/sources/storage_path/file_system_storage_path_source.proto
-39a0fdd3f0fbc2f2ed13a97335be63f16cadc607  /workspace/serving/tensorflow_serving/util/retrier.cc
-1f7d5d15099bda729b4e9803327d7dedbe037988  /workspace/serving/tensorflow_serving/util/BUILD
-a004e360a37db3d26574dd18140c2179c9708fa1  /workspace/serving/tensorflow_serving/util/net_http/server/public/BUILD
-1c4491ac158453e4fd91b7048f875204e024c731  /workspace/serving/tensorflow_serving/workspace.bzl
-398d07e915232885501cad3899ffa9d7463a9d1e  /workspace/serving/tensorflow_serving/util/net_http/server/public/server_request_interface.h
-b3b14127c76ef9c7206c914cdf8a45494829ef9f  /workspace/serving/tensorflow_serving/util/net_http/server/internal/evhttp_request.cc
-59e8834fc9d51eee3503805f9d7b38d2742796d4  /workspace/serving/tensorflow_serving/util/net_http/server/internal/evhttp_request.h
diff --git a/tools/patch/tfs/model_servers/server_core.cc b/tools/patch/tfs/model_servers/server_core.cc
deleted file mode 100644
index a22f16fe66..0000000000
--- a/tools/patch/tfs/model_servers/server_core.cc
+++ /dev/null
@@ -1,16 +0,0 @@
---- tensorflow_serving/model_servers/server_core.cc	2018-11-01 12:04:22.181086237 -0700
-+++ /home/david/dev/gitlab/dgx/tensorrtserver/tools/patch/tfs/model_servers/server_core.cc	2018-11-02 13:00:23.162610533 -0700
-@@ -342,8 +342,12 @@
-     TF_RETURN_IF_ERROR(
-         CreateStoragePathSource(source_config, router.get(), &source));
- 
-+    //
-+    // NVIDIA. Ignore errors that occur if some servable fails to
-+    // load. In that case we still want to continue and set up the
-+    // source, router, manager...
-     // Connect the adapters to the manager, and wait for the models to load.
--    TF_RETURN_IF_ERROR(ConnectAdaptersToManagerAndAwaitModelLoads(&adapters));
-+    ConnectAdaptersToManagerAndAwaitModelLoads(&adapters);
- 
-     // Stow the source components.
-     storage_path_source_and_router_ = {source.get(), router.get()};
diff --git a/tools/patch/tfs/sources/storage_path/file_system_storage_path_source.cc b/tools/patch/tfs/sources/storage_path/file_system_storage_path_source.cc
deleted file mode 100644
index b4269c122b..0000000000
--- a/tools/patch/tfs/sources/storage_path/file_system_storage_path_source.cc
+++ /dev/null
@@ -1,48 +0,0 @@
-diff --git a/tensorflow_serving/sources/storage_path/file_system_storage_path_source.cc b/tensorflow_serving/sources/storage_path/file_system_storage_path_source.cc
-index ea49918..397c939 100644
---- a/tensorflow_serving/sources/storage_path/file_system_storage_path_source.cc
-+++ b/tensorflow_serving/sources/storage_path/file_system_storage_path_source.cc
-@@ -357,6 +357,17 @@ void FileSystemStoragePathSource::SetAspiredVersionsCallback(
-   aspired_versions_callback_ = callback;
- 
-   if (config_.file_system_poll_wait_seconds() >= 0) {
-+    run_callback_once_ = false;
-+    run_callback_done_ = false;
-+
-+    // If poll_wait_seconds == 0 then we need to poll just once to
-+    // initialize from the model repository.
-+    int64 poll_secs = config_.file_system_poll_wait_seconds();
-+    if (poll_secs == 0) {
-+      poll_secs = (int64)INT32_MAX;
-+      run_callback_once_ = true;
-+    }
-+
-     // Kick off a thread to poll the file system periodically, and call the
-     // callback.
-     PeriodicFunction::Options pf_options;
-@@ -364,14 +375,19 @@ void FileSystemStoragePathSource::SetAspiredVersionsCallback(
-         "FileSystemStoragePathSource_filesystem_polling_thread";
-     fs_polling_thread_.reset(new PeriodicFunction(
-         [this] {
--          Status status = this->PollFileSystemAndInvokeCallback();
--          if (!status.ok()) {
--            LOG(ERROR) << "FileSystemStoragePathSource encountered a "
--                          "file-system access error: "
--                       << status.error_message();
-+          if (!this->run_callback_done_) {
-+            Status status = this->PollFileSystemAndInvokeCallback();
-+            if (!status.ok()) {
-+              LOG(ERROR) << "FileSystemStoragePathSource encountered a "
-+                            "file-system access error: "
-+                         << status.error_message();
-+            }
-+          }
-+          if (this->run_callback_once_) {
-+            this->run_callback_done_ = true;
-           }
-         },
--        config_.file_system_poll_wait_seconds() * 1000000, pf_options));
-+        poll_secs * 1000000, pf_options));
-   }
- }
- 
diff --git a/tools/patch/tfs/sources/storage_path/file_system_storage_path_source.h b/tools/patch/tfs/sources/storage_path/file_system_storage_path_source.h
deleted file mode 100644
index 88d69f6ed0..0000000000
--- a/tools/patch/tfs/sources/storage_path/file_system_storage_path_source.h
+++ /dev/null
@@ -1,14 +0,0 @@
-diff --git a/tensorflow_serving/sources/storage_path/file_system_storage_path_source.h b/tensorflow_serving/sources/storage_path/file_system_storage_path_source.h
-index fd9af2d..9f1964d 100644
---- a/tensorflow_serving/sources/storage_path/file_system_storage_path_source.h
-+++ b/tensorflow_serving/sources/storage_path/file_system_storage_path_source.h
-@@ -96,6 +96,9 @@ class FileSystemStoragePathSource : public Source<StoragePath> {
- 
-   AspiredVersionsCallback aspired_versions_callback_ GUARDED_BY(mu_);
- 
-+  bool run_callback_once_ GUARDED_BY(mu_);
-+  bool run_callback_done_ GUARDED_BY(mu_);
-+
-   // A thread that periodically calls PollFileSystemAndInvokeCallback().
-   std::unique_ptr<PeriodicFunction> fs_polling_thread_ GUARDED_BY(mu_);
- 
diff --git a/tools/patch/tfs/sources/storage_path/file_system_storage_path_source.proto b/tools/patch/tfs/sources/storage_path/file_system_storage_path_source.proto
deleted file mode 100644
index f0affa1868..0000000000
--- a/tools/patch/tfs/sources/storage_path/file_system_storage_path_source.proto
+++ /dev/null
@@ -1,14 +0,0 @@
-diff --git a/tensorflow_serving/sources/storage_path/file_system_storage_path_source.proto b/tensorflow_serving/sources/storage_path/file_system_storage_path_source.proto
-index 8fa7be5..700983a 100644
---- a/tensorflow_serving/sources/storage_path/file_system_storage_path_source.proto
-+++ b/tensorflow_serving/sources/storage_path/file_system_storage_path_source.proto
-@@ -63,7 +63,8 @@ message FileSystemStoragePathSourceConfig {
-   string base_path = 2 [deprecated = true];
- 
-   // How long to wait between file-system polling to look for children of
--  // 'base_path', in seconds.
-+  // 'base_path', in seconds. A value of zero indicates that 'base_path'
-+  // is checked once for children but no additional polling is performed.
-   //
-   // For testing use only: a negative value disables the polling thread.
-   int64 file_system_poll_wait_seconds = 3;
diff --git a/tools/patch/tfs/util/BUILD b/tools/patch/tfs/util/BUILD
deleted file mode 100644
index c1b461f454..0000000000
--- a/tools/patch/tfs/util/BUILD
+++ /dev/null
@@ -1,11 +0,0 @@
---- tensorflow_serving/util/BUILD	2018-11-01 12:04:22.185086318 -0700
-+++ /home/david/dev/gitlab/dgx/tensorrtserver/tools/patch/tfs/util/BUILD	2018-11-02 11:47:57.254526547 -0700
-@@ -2,7 +2,7 @@
- 
- package(
-     default_visibility = [
--        "//tensorflow_serving:internal",
-+        "//visibility:public",
-     ],
-     features = ["-layering_check"],
- )
diff --git a/tools/patch/tfs/util/net_http/server/internal/evhttp_request.cc b/tools/patch/tfs/util/net_http/server/internal/evhttp_request.cc
deleted file mode 100644
index 86725899a1..0000000000
--- a/tools/patch/tfs/util/net_http/server/internal/evhttp_request.cc
+++ /dev/null
@@ -1,47 +0,0 @@
---- tensorflow_serving/util/net_http/server/internal/evhttp_request.cc	2018-11-01 12:04:22.185086318 -0700
-+++ /home/david/dev/gitlab/dgx/tensorrtserver/tools/patch/tfs/util/net_http/server/internal/evhttp_request.cc	2018-11-02 11:47:57.254526547 -0700
-@@ -44,6 +44,8 @@
-     evhttp_uri_free(decoded_uri);
-   }
- 
-+  evhttp_clear_headers(&params);
-+
-   if (request && evhttp_request_is_owned(request)) {
-     evhttp_request_free(request);
-   }
-@@ -98,6 +100,8 @@
-     path = "/";
-   }
- 
-+  evhttp_parse_query(uri, &params);
-+
-   headers = evhttp_request_get_input_headers(request);
- 
-   return true;
-@@ -123,6 +127,26 @@
-   return parsed_request_->method;
- }
- 
-+bool EvHTTPRequest::QueryParam(absl::string_view key, std::string* str) const {
-+  std::string key_str(key.data(), key.size());
-+  const char* val =
-+    evhttp_find_header(&parsed_request_->params, key_str.c_str());
-+  if (val != nullptr) {
-+    *str = val;
-+    return true;
-+  }
-+
-+  return false;
-+}
-+
-+evbuffer* EvHTTPRequest::InputBuffer() const {
-+  return evhttp_request_get_input_buffer(parsed_request_->request);
-+}
-+
-+evbuffer* EvHTTPRequest::OutputBuffer() {
-+  return output_buf;
-+}
-+
- bool EvHTTPRequest::Initialize() {
-   output_buf = evbuffer_new();
-   return output_buf != nullptr;
diff --git a/tools/patch/tfs/util/net_http/server/internal/evhttp_request.h b/tools/patch/tfs/util/net_http/server/internal/evhttp_request.h
deleted file mode 100644
index f4c5b2b054..0000000000
--- a/tools/patch/tfs/util/net_http/server/internal/evhttp_request.h
+++ /dev/null
@@ -1,30 +0,0 @@
---- tensorflow_serving/util/net_http/server/internal/evhttp_request.h	2018-11-01 12:04:22.185086318 -0700
-+++ /home/david/dev/gitlab/dgx/tensorrtserver/tools/patch/tfs/util/net_http/server/internal/evhttp_request.h	2018-11-02 11:47:57.254526547 -0700
-@@ -21,6 +21,7 @@
- #include <cstdint>
- #include <memory>
- 
-+#include "libevent/include/event2/keyvalq_struct.h"
- #include "tensorflow_serving/util/net_http/server/internal/server_support.h"
- #include "tensorflow_serving/util/net_http/server/public/httpserver_interface.h"
- #include "tensorflow_serving/util/net_http/server/public/server_request_interface.h"
-@@ -55,6 +56,8 @@
-   // evhttp_uridecode(path)
-   const char* path = nullptr;  // owned by uri
- 
-+  struct evkeyvalq params;  // query parameters
-+
-   evkeyvalq* headers = nullptr;  // owned by raw request
- };
- 
-@@ -75,6 +78,10 @@
- 
-   absl::string_view http_method() const override;
- 
-+  bool QueryParam(absl::string_view key, std::string* str) const override;
-+  evbuffer* InputBuffer() const override;
-+  evbuffer* OutputBuffer() override;
-+
-   void WriteResponseBytes(const char* data, int64_t size) override;
- 
-   void WriteResponseString(absl::string_view data) override;
diff --git a/tools/patch/tfs/util/net_http/server/public/BUILD b/tools/patch/tfs/util/net_http/server/public/BUILD
deleted file mode 100644
index 429db4ea5d..0000000000
--- a/tools/patch/tfs/util/net_http/server/public/BUILD
+++ /dev/null
@@ -1,12 +0,0 @@
---- tensorflow_serving/util/net_http/server/public/BUILD	2018-11-01 12:04:22.185086318 -0700
-+++ /home/david/dev/gitlab/dgx/tensorrtserver/tools/patch/tfs/util/net_http/server/public/BUILD	2018-11-02 11:47:57.254526547 -0700
-@@ -2,8 +2,7 @@
- 
- package(
-     default_visibility = [
--        "//tensorflow_serving:internal",
--        "//tensorflow_serving/util/net_http:__subpackages__",
-+        "//visibility:public",
-     ],
- )
- 
diff --git a/tools/patch/tfs/util/net_http/server/public/server_request_interface.h b/tools/patch/tfs/util/net_http/server/public/server_request_interface.h
deleted file mode 100644
index b9d86c6935..0000000000
--- a/tools/patch/tfs/util/net_http/server/public/server_request_interface.h
+++ /dev/null
@@ -1,28 +0,0 @@
---- tensorflow_serving/util/net_http/server/public/server_request_interface.h	2018-06-25 15:21:58.138237776 -0700
-+++ /home/david/dev/gitlab/dgx/tensorrtserver/tools/patch/tfs/util/net_http/server/public/server_request_interface.h	2018-10-12 12:44:37.280572118 -0700
-@@ -36,6 +36,8 @@
- 
- #include "tensorflow_serving/util/net_http/server/public/response_code_enum.h"
- 
-+struct evbuffer;
-+
- namespace tensorflow {
- namespace serving {
- namespace net_http {
-@@ -61,6 +63,16 @@
-   // Must be in Upper Case.
-   virtual absl::string_view http_method() const = 0;
- 
-+  // Get the value for a query param. Return true if query param
-+  // exists or false if it doesn't (in that case 'str' is undefined).
-+  virtual bool QueryParam(absl::string_view key, std::string* str) const = 0;
-+
-+  // Get the evbuffer for the request body.
-+  virtual evbuffer* InputBuffer() const = 0;
-+
-+  // Get the evbuffer for the response body.
-+  virtual evbuffer* OutputBuffer() = 0;
-+
-   // Input/output byte-buffer types are subject to change!
-   // I/O buffer choices:
-   // - absl:ByteStream would work but it is not yet open-sourced
diff --git a/tools/patch/tfs/util/retrier.cc b/tools/patch/tfs/util/retrier.cc
deleted file mode 100644
index 6282eed5f0..0000000000
--- a/tools/patch/tfs/util/retrier.cc
+++ /dev/null
@@ -1,13 +0,0 @@
---- tensorflow_serving/util/retrier.cc	2018-05-15 13:11:26.068986416 -0700
-+++ /home/david/dev/gitlab/dgx/tensorrtserver/tools/patch/tfs/util/retrier.cc	2018-10-12 12:44:37.280572118 -0700
-@@ -42,7 +42,9 @@
-   if (is_cancelled()) {
-     LOG(INFO) << "Retrying of " << description << " was cancelled.";
-   }
--  if (num_tries == max_num_retries + 1) {
-+
-+  // NVIDIA: Add max_num_retries>0 check to avoid spurious logging
-+  if ((max_num_retries > 0) && (num_tries == max_num_retries + 1)) {
-     LOG(INFO) << "Retrying of " << description
-               << " exhausted max_num_retries: " << max_num_retries;
-   }
diff --git a/tools/patch/tfs/workspace.bzl b/tools/patch/tfs/workspace.bzl
deleted file mode 100644
index 89953ec269..0000000000
--- a/tools/patch/tfs/workspace.bzl
+++ /dev/null
@@ -1,18 +0,0 @@
---- tensorflow_serving/workspace.bzl	2018-07-13 13:15:37.431961006 -0700
-+++ /home/david/dev/gitlab/dgx/tensorrtserver/tools/patch/tfs/workspace.bzl	2018-10-12 12:44:37.280572118 -0700
-@@ -45,7 +45,7 @@
-       ],
-       sha256 = "8e00c38829d6785a2dfb951bb87c6974fa07dfe488aa5b25deec4b8bc0f6a3ab",
-       strip_prefix = "rapidjson-1.1.0",
--      build_file = "third_party/rapidjson.BUILD"
-+      build_file = "serving/third_party/rapidjson.BUILD"
-   )
- 
-   # ===== libevent (libevent.org) dependencies =====
-@@ -56,5 +56,5 @@
-       ],
-       sha256 = "70158101eab7ed44fd9cc34e7f247b3cae91a8e4490745d9d6eb7edc184e4d96",
-       strip_prefix = "libevent-release-2.1.8-stable",
--      build_file = "third_party/libevent.BUILD"
-+      build_file = "serving/third_party/libevent.BUILD"
-   )